zstd-ruby 1.5.2.2 → 1.5.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +15 -3
- data/ext/zstdruby/common.h +7 -0
- data/ext/zstdruby/libzstd/common/bits.h +175 -0
- data/ext/zstdruby/libzstd/common/bitstream.h +18 -59
- data/ext/zstdruby/libzstd/common/compiler.h +22 -3
- data/ext/zstdruby/libzstd/common/cpu.h +1 -1
- data/ext/zstdruby/libzstd/common/debug.c +1 -1
- data/ext/zstdruby/libzstd/common/debug.h +1 -1
- data/ext/zstdruby/libzstd/common/entropy_common.c +12 -40
- data/ext/zstdruby/libzstd/common/error_private.c +9 -2
- data/ext/zstdruby/libzstd/common/error_private.h +1 -1
- data/ext/zstdruby/libzstd/common/fse.h +5 -83
- data/ext/zstdruby/libzstd/common/fse_decompress.c +7 -99
- data/ext/zstdruby/libzstd/common/huf.h +65 -156
- data/ext/zstdruby/libzstd/common/mem.h +39 -46
- data/ext/zstdruby/libzstd/common/pool.c +26 -10
- data/ext/zstdruby/libzstd/common/pool.h +7 -1
- data/ext/zstdruby/libzstd/common/portability_macros.h +22 -3
- data/ext/zstdruby/libzstd/common/threading.c +68 -14
- data/ext/zstdruby/libzstd/common/threading.h +5 -10
- data/ext/zstdruby/libzstd/common/xxhash.c +2 -2
- data/ext/zstdruby/libzstd/common/xxhash.h +8 -8
- data/ext/zstdruby/libzstd/common/zstd_common.c +1 -1
- data/ext/zstdruby/libzstd/common/zstd_deps.h +1 -1
- data/ext/zstdruby/libzstd/common/zstd_internal.h +17 -113
- data/ext/zstdruby/libzstd/common/zstd_trace.h +3 -3
- data/ext/zstdruby/libzstd/compress/clevels.h +1 -1
- data/ext/zstdruby/libzstd/compress/fse_compress.c +7 -124
- data/ext/zstdruby/libzstd/compress/hist.c +1 -1
- data/ext/zstdruby/libzstd/compress/hist.h +1 -1
- data/ext/zstdruby/libzstd/compress/huf_compress.c +234 -169
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +1055 -455
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +165 -145
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +115 -39
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +16 -8
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +3 -3
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +25 -21
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +5 -3
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +95 -33
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +3 -2
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +433 -148
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +3 -2
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +306 -283
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +4 -2
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +5 -5
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +104 -80
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +12 -5
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +1 -1
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +434 -441
- data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +30 -39
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +3 -4
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +1 -1
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +164 -42
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +186 -65
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +1 -1
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +4 -2
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +19 -15
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +2 -2
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +9 -87
- data/ext/zstdruby/libzstd/zdict.h +53 -31
- data/ext/zstdruby/libzstd/zstd.h +489 -90
- data/ext/zstdruby/libzstd/zstd_errors.h +27 -8
- data/ext/zstdruby/main.c +4 -0
- data/ext/zstdruby/streaming_compress.c +1 -7
- data/ext/zstdruby/zstdruby.c +110 -26
- data/lib/zstd-ruby/version.rb +1 -1
- data/lib/zstd-ruby.rb +0 -1
- metadata +7 -6
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -16,7 +16,6 @@
|
|
|
16
16
|
#include "hist.h" /* HIST_countFast_wksp */
|
|
17
17
|
#define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */
|
|
18
18
|
#include "../common/fse.h"
|
|
19
|
-
#define HUF_STATIC_LINKING_ONLY
|
|
20
19
|
#include "../common/huf.h"
|
|
21
20
|
#include "zstd_compress_internal.h"
|
|
22
21
|
#include "zstd_compress_sequences.h"
|
|
@@ -27,6 +26,7 @@
|
|
|
27
26
|
#include "zstd_opt.h"
|
|
28
27
|
#include "zstd_ldm.h"
|
|
29
28
|
#include "zstd_compress_superblock.h"
|
|
29
|
+
#include "../common/bits.h" /* ZSTD_highbit32 */
|
|
30
30
|
|
|
31
31
|
/* ***************************************************************
|
|
32
32
|
* Tuning parameters
|
|
@@ -58,14 +58,17 @@
|
|
|
58
58
|
* Helper functions
|
|
59
59
|
***************************************/
|
|
60
60
|
/* ZSTD_compressBound()
|
|
61
|
-
* Note that the result from this function is only
|
|
62
|
-
*
|
|
63
|
-
* When
|
|
64
|
-
*
|
|
65
|
-
*
|
|
61
|
+
* Note that the result from this function is only valid for
|
|
62
|
+
* the one-pass compression functions.
|
|
63
|
+
* When employing the streaming mode,
|
|
64
|
+
* if flushes are frequently altering the size of blocks,
|
|
65
|
+
* the overhead from block headers can make the compressed data larger
|
|
66
|
+
* than the return value of ZSTD_compressBound().
|
|
66
67
|
*/
|
|
67
68
|
size_t ZSTD_compressBound(size_t srcSize) {
|
|
68
|
-
|
|
69
|
+
size_t const r = ZSTD_COMPRESSBOUND(srcSize);
|
|
70
|
+
if (r==0) return ERROR(srcSize_wrong);
|
|
71
|
+
return r;
|
|
69
72
|
}
|
|
70
73
|
|
|
71
74
|
|
|
@@ -177,12 +180,9 @@ size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
|
|
|
177
180
|
if (cctx==NULL) return 0; /* support free on NULL */
|
|
178
181
|
RETURN_ERROR_IF(cctx->staticSize, memory_allocation,
|
|
179
182
|
"not compatible with static CCtx");
|
|
180
|
-
{
|
|
181
|
-
int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx);
|
|
183
|
+
{ int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx);
|
|
182
184
|
ZSTD_freeCCtxContent(cctx);
|
|
183
|
-
if (!cctxInWorkspace)
|
|
184
|
-
ZSTD_customFree(cctx, cctx->customMem);
|
|
185
|
-
}
|
|
185
|
+
if (!cctxInWorkspace) ZSTD_customFree(cctx, cctx->customMem);
|
|
186
186
|
}
|
|
187
187
|
return 0;
|
|
188
188
|
}
|
|
@@ -267,9 +267,9 @@ static int ZSTD_allocateChainTable(const ZSTD_strategy strategy,
|
|
|
267
267
|
return forDDSDict || ((strategy != ZSTD_fast) && !ZSTD_rowMatchFinderUsed(strategy, useRowMatchFinder));
|
|
268
268
|
}
|
|
269
269
|
|
|
270
|
-
/* Returns
|
|
270
|
+
/* Returns ZSTD_ps_enable if compression parameters are such that we should
|
|
271
271
|
* enable long distance matching (wlog >= 27, strategy >= btopt).
|
|
272
|
-
* Returns
|
|
272
|
+
* Returns ZSTD_ps_disable otherwise.
|
|
273
273
|
*/
|
|
274
274
|
static ZSTD_paramSwitch_e ZSTD_resolveEnableLdm(ZSTD_paramSwitch_e mode,
|
|
275
275
|
const ZSTD_compressionParameters* const cParams) {
|
|
@@ -277,6 +277,34 @@ static ZSTD_paramSwitch_e ZSTD_resolveEnableLdm(ZSTD_paramSwitch_e mode,
|
|
|
277
277
|
return (cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27) ? ZSTD_ps_enable : ZSTD_ps_disable;
|
|
278
278
|
}
|
|
279
279
|
|
|
280
|
+
static int ZSTD_resolveExternalSequenceValidation(int mode) {
|
|
281
|
+
return mode;
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
/* Resolves maxBlockSize to the default if no value is present. */
|
|
285
|
+
static size_t ZSTD_resolveMaxBlockSize(size_t maxBlockSize) {
|
|
286
|
+
if (maxBlockSize == 0) {
|
|
287
|
+
return ZSTD_BLOCKSIZE_MAX;
|
|
288
|
+
} else {
|
|
289
|
+
return maxBlockSize;
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
static ZSTD_paramSwitch_e ZSTD_resolveExternalRepcodeSearch(ZSTD_paramSwitch_e value, int cLevel) {
|
|
294
|
+
if (value != ZSTD_ps_auto) return value;
|
|
295
|
+
if (cLevel < 10) {
|
|
296
|
+
return ZSTD_ps_disable;
|
|
297
|
+
} else {
|
|
298
|
+
return ZSTD_ps_enable;
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
/* Returns 1 if compression parameters are such that CDict hashtable and chaintable indices are tagged.
|
|
303
|
+
* If so, the tags need to be removed in ZSTD_resetCCtx_byCopyingCDict. */
|
|
304
|
+
static int ZSTD_CDictIndicesAreTagged(const ZSTD_compressionParameters* const cParams) {
|
|
305
|
+
return cParams->strategy == ZSTD_fast || cParams->strategy == ZSTD_dfast;
|
|
306
|
+
}
|
|
307
|
+
|
|
280
308
|
static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
|
|
281
309
|
ZSTD_compressionParameters cParams)
|
|
282
310
|
{
|
|
@@ -294,6 +322,10 @@ static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
|
|
|
294
322
|
}
|
|
295
323
|
cctxParams.useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams.useBlockSplitter, &cParams);
|
|
296
324
|
cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams);
|
|
325
|
+
cctxParams.validateSequences = ZSTD_resolveExternalSequenceValidation(cctxParams.validateSequences);
|
|
326
|
+
cctxParams.maxBlockSize = ZSTD_resolveMaxBlockSize(cctxParams.maxBlockSize);
|
|
327
|
+
cctxParams.searchForExternalRepcodes = ZSTD_resolveExternalRepcodeSearch(cctxParams.searchForExternalRepcodes,
|
|
328
|
+
cctxParams.compressionLevel);
|
|
297
329
|
assert(!ZSTD_checkCParams(cParams));
|
|
298
330
|
return cctxParams;
|
|
299
331
|
}
|
|
@@ -339,10 +371,13 @@ size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel)
|
|
|
339
371
|
#define ZSTD_NO_CLEVEL 0
|
|
340
372
|
|
|
341
373
|
/**
|
|
342
|
-
* Initializes
|
|
374
|
+
* Initializes `cctxParams` from `params` and `compressionLevel`.
|
|
343
375
|
* @param compressionLevel If params are derived from a compression level then that compression level, otherwise ZSTD_NO_CLEVEL.
|
|
344
376
|
*/
|
|
345
|
-
static void
|
|
377
|
+
static void
|
|
378
|
+
ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams,
|
|
379
|
+
const ZSTD_parameters* params,
|
|
380
|
+
int compressionLevel)
|
|
346
381
|
{
|
|
347
382
|
assert(!ZSTD_checkCParams(params->cParams));
|
|
348
383
|
ZSTD_memset(cctxParams, 0, sizeof(*cctxParams));
|
|
@@ -355,6 +390,9 @@ static void ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, ZSTD_par
|
|
|
355
390
|
cctxParams->useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams->useRowMatchFinder, ¶ms->cParams);
|
|
356
391
|
cctxParams->useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams->useBlockSplitter, ¶ms->cParams);
|
|
357
392
|
cctxParams->ldmParams.enableLdm = ZSTD_resolveEnableLdm(cctxParams->ldmParams.enableLdm, ¶ms->cParams);
|
|
393
|
+
cctxParams->validateSequences = ZSTD_resolveExternalSequenceValidation(cctxParams->validateSequences);
|
|
394
|
+
cctxParams->maxBlockSize = ZSTD_resolveMaxBlockSize(cctxParams->maxBlockSize);
|
|
395
|
+
cctxParams->searchForExternalRepcodes = ZSTD_resolveExternalRepcodeSearch(cctxParams->searchForExternalRepcodes, compressionLevel);
|
|
358
396
|
DEBUGLOG(4, "ZSTD_CCtxParams_init_internal: useRowMatchFinder=%d, useBlockSplitter=%d ldm=%d",
|
|
359
397
|
cctxParams->useRowMatchFinder, cctxParams->useBlockSplitter, cctxParams->ldmParams.enableLdm);
|
|
360
398
|
}
|
|
@@ -369,7 +407,7 @@ size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_paramete
|
|
|
369
407
|
|
|
370
408
|
/**
|
|
371
409
|
* Sets cctxParams' cParams and fParams from params, but otherwise leaves them alone.
|
|
372
|
-
* @param
|
|
410
|
+
* @param params Validated zstd parameters.
|
|
373
411
|
*/
|
|
374
412
|
static void ZSTD_CCtxParams_setZstdParams(
|
|
375
413
|
ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params)
|
|
@@ -478,8 +516,8 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
|
|
|
478
516
|
return bounds;
|
|
479
517
|
|
|
480
518
|
case ZSTD_c_enableLongDistanceMatching:
|
|
481
|
-
bounds.lowerBound =
|
|
482
|
-
bounds.upperBound =
|
|
519
|
+
bounds.lowerBound = (int)ZSTD_ps_auto;
|
|
520
|
+
bounds.upperBound = (int)ZSTD_ps_disable;
|
|
483
521
|
return bounds;
|
|
484
522
|
|
|
485
523
|
case ZSTD_c_ldmHashLog:
|
|
@@ -572,6 +610,26 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
|
|
|
572
610
|
bounds.upperBound = 1;
|
|
573
611
|
return bounds;
|
|
574
612
|
|
|
613
|
+
case ZSTD_c_prefetchCDictTables:
|
|
614
|
+
bounds.lowerBound = (int)ZSTD_ps_auto;
|
|
615
|
+
bounds.upperBound = (int)ZSTD_ps_disable;
|
|
616
|
+
return bounds;
|
|
617
|
+
|
|
618
|
+
case ZSTD_c_enableSeqProducerFallback:
|
|
619
|
+
bounds.lowerBound = 0;
|
|
620
|
+
bounds.upperBound = 1;
|
|
621
|
+
return bounds;
|
|
622
|
+
|
|
623
|
+
case ZSTD_c_maxBlockSize:
|
|
624
|
+
bounds.lowerBound = ZSTD_BLOCKSIZE_MAX_MIN;
|
|
625
|
+
bounds.upperBound = ZSTD_BLOCKSIZE_MAX;
|
|
626
|
+
return bounds;
|
|
627
|
+
|
|
628
|
+
case ZSTD_c_searchForExternalRepcodes:
|
|
629
|
+
bounds.lowerBound = (int)ZSTD_ps_auto;
|
|
630
|
+
bounds.upperBound = (int)ZSTD_ps_disable;
|
|
631
|
+
return bounds;
|
|
632
|
+
|
|
575
633
|
default:
|
|
576
634
|
bounds.error = ERROR(parameter_unsupported);
|
|
577
635
|
return bounds;
|
|
@@ -636,6 +694,10 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
|
|
|
636
694
|
case ZSTD_c_useBlockSplitter:
|
|
637
695
|
case ZSTD_c_useRowMatchFinder:
|
|
638
696
|
case ZSTD_c_deterministicRefPrefix:
|
|
697
|
+
case ZSTD_c_prefetchCDictTables:
|
|
698
|
+
case ZSTD_c_enableSeqProducerFallback:
|
|
699
|
+
case ZSTD_c_maxBlockSize:
|
|
700
|
+
case ZSTD_c_searchForExternalRepcodes:
|
|
639
701
|
default:
|
|
640
702
|
return 0;
|
|
641
703
|
}
|
|
@@ -648,7 +710,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
|
|
|
648
710
|
if (ZSTD_isUpdateAuthorized(param)) {
|
|
649
711
|
cctx->cParamsChanged = 1;
|
|
650
712
|
} else {
|
|
651
|
-
RETURN_ERROR(stage_wrong, "can only set params in
|
|
713
|
+
RETURN_ERROR(stage_wrong, "can only set params in cctx init stage");
|
|
652
714
|
} }
|
|
653
715
|
|
|
654
716
|
switch(param)
|
|
@@ -691,6 +753,10 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
|
|
|
691
753
|
case ZSTD_c_useBlockSplitter:
|
|
692
754
|
case ZSTD_c_useRowMatchFinder:
|
|
693
755
|
case ZSTD_c_deterministicRefPrefix:
|
|
756
|
+
case ZSTD_c_prefetchCDictTables:
|
|
757
|
+
case ZSTD_c_enableSeqProducerFallback:
|
|
758
|
+
case ZSTD_c_maxBlockSize:
|
|
759
|
+
case ZSTD_c_searchForExternalRepcodes:
|
|
694
760
|
break;
|
|
695
761
|
|
|
696
762
|
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
|
|
@@ -746,12 +812,12 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
|
|
|
746
812
|
case ZSTD_c_minMatch :
|
|
747
813
|
if (value!=0) /* 0 => use default */
|
|
748
814
|
BOUNDCHECK(ZSTD_c_minMatch, value);
|
|
749
|
-
CCtxParams->cParams.minMatch = value;
|
|
815
|
+
CCtxParams->cParams.minMatch = (U32)value;
|
|
750
816
|
return CCtxParams->cParams.minMatch;
|
|
751
817
|
|
|
752
818
|
case ZSTD_c_targetLength :
|
|
753
819
|
BOUNDCHECK(ZSTD_c_targetLength, value);
|
|
754
|
-
CCtxParams->cParams.targetLength = value;
|
|
820
|
+
CCtxParams->cParams.targetLength = (U32)value;
|
|
755
821
|
return CCtxParams->cParams.targetLength;
|
|
756
822
|
|
|
757
823
|
case ZSTD_c_strategy :
|
|
@@ -764,12 +830,12 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
|
|
|
764
830
|
/* Content size written in frame header _when known_ (default:1) */
|
|
765
831
|
DEBUGLOG(4, "set content size flag = %u", (value!=0));
|
|
766
832
|
CCtxParams->fParams.contentSizeFlag = value != 0;
|
|
767
|
-
return CCtxParams->fParams.contentSizeFlag;
|
|
833
|
+
return (size_t)CCtxParams->fParams.contentSizeFlag;
|
|
768
834
|
|
|
769
835
|
case ZSTD_c_checksumFlag :
|
|
770
836
|
/* A 32-bits content checksum will be calculated and written at end of frame (default:0) */
|
|
771
837
|
CCtxParams->fParams.checksumFlag = value != 0;
|
|
772
|
-
return CCtxParams->fParams.checksumFlag;
|
|
838
|
+
return (size_t)CCtxParams->fParams.checksumFlag;
|
|
773
839
|
|
|
774
840
|
case ZSTD_c_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */
|
|
775
841
|
DEBUGLOG(4, "set dictIDFlag = %u", (value!=0));
|
|
@@ -778,18 +844,18 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
|
|
|
778
844
|
|
|
779
845
|
case ZSTD_c_forceMaxWindow :
|
|
780
846
|
CCtxParams->forceWindow = (value != 0);
|
|
781
|
-
return CCtxParams->forceWindow;
|
|
847
|
+
return (size_t)CCtxParams->forceWindow;
|
|
782
848
|
|
|
783
849
|
case ZSTD_c_forceAttachDict : {
|
|
784
850
|
const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value;
|
|
785
|
-
BOUNDCHECK(ZSTD_c_forceAttachDict, pref);
|
|
851
|
+
BOUNDCHECK(ZSTD_c_forceAttachDict, (int)pref);
|
|
786
852
|
CCtxParams->attachDictPref = pref;
|
|
787
853
|
return CCtxParams->attachDictPref;
|
|
788
854
|
}
|
|
789
855
|
|
|
790
856
|
case ZSTD_c_literalCompressionMode : {
|
|
791
857
|
const ZSTD_paramSwitch_e lcm = (ZSTD_paramSwitch_e)value;
|
|
792
|
-
BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm);
|
|
858
|
+
BOUNDCHECK(ZSTD_c_literalCompressionMode, (int)lcm);
|
|
793
859
|
CCtxParams->literalCompressionMode = lcm;
|
|
794
860
|
return CCtxParams->literalCompressionMode;
|
|
795
861
|
}
|
|
@@ -840,47 +906,48 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
|
|
|
840
906
|
|
|
841
907
|
case ZSTD_c_enableDedicatedDictSearch :
|
|
842
908
|
CCtxParams->enableDedicatedDictSearch = (value!=0);
|
|
843
|
-
return CCtxParams->enableDedicatedDictSearch;
|
|
909
|
+
return (size_t)CCtxParams->enableDedicatedDictSearch;
|
|
844
910
|
|
|
845
911
|
case ZSTD_c_enableLongDistanceMatching :
|
|
912
|
+
BOUNDCHECK(ZSTD_c_enableLongDistanceMatching, value);
|
|
846
913
|
CCtxParams->ldmParams.enableLdm = (ZSTD_paramSwitch_e)value;
|
|
847
914
|
return CCtxParams->ldmParams.enableLdm;
|
|
848
915
|
|
|
849
916
|
case ZSTD_c_ldmHashLog :
|
|
850
917
|
if (value!=0) /* 0 ==> auto */
|
|
851
918
|
BOUNDCHECK(ZSTD_c_ldmHashLog, value);
|
|
852
|
-
CCtxParams->ldmParams.hashLog = value;
|
|
919
|
+
CCtxParams->ldmParams.hashLog = (U32)value;
|
|
853
920
|
return CCtxParams->ldmParams.hashLog;
|
|
854
921
|
|
|
855
922
|
case ZSTD_c_ldmMinMatch :
|
|
856
923
|
if (value!=0) /* 0 ==> default */
|
|
857
924
|
BOUNDCHECK(ZSTD_c_ldmMinMatch, value);
|
|
858
|
-
CCtxParams->ldmParams.minMatchLength = value;
|
|
925
|
+
CCtxParams->ldmParams.minMatchLength = (U32)value;
|
|
859
926
|
return CCtxParams->ldmParams.minMatchLength;
|
|
860
927
|
|
|
861
928
|
case ZSTD_c_ldmBucketSizeLog :
|
|
862
929
|
if (value!=0) /* 0 ==> default */
|
|
863
930
|
BOUNDCHECK(ZSTD_c_ldmBucketSizeLog, value);
|
|
864
|
-
CCtxParams->ldmParams.bucketSizeLog = value;
|
|
931
|
+
CCtxParams->ldmParams.bucketSizeLog = (U32)value;
|
|
865
932
|
return CCtxParams->ldmParams.bucketSizeLog;
|
|
866
933
|
|
|
867
934
|
case ZSTD_c_ldmHashRateLog :
|
|
868
935
|
if (value!=0) /* 0 ==> default */
|
|
869
936
|
BOUNDCHECK(ZSTD_c_ldmHashRateLog, value);
|
|
870
|
-
CCtxParams->ldmParams.hashRateLog = value;
|
|
937
|
+
CCtxParams->ldmParams.hashRateLog = (U32)value;
|
|
871
938
|
return CCtxParams->ldmParams.hashRateLog;
|
|
872
939
|
|
|
873
940
|
case ZSTD_c_targetCBlockSize :
|
|
874
941
|
if (value!=0) /* 0 ==> default */
|
|
875
942
|
BOUNDCHECK(ZSTD_c_targetCBlockSize, value);
|
|
876
|
-
CCtxParams->targetCBlockSize = value;
|
|
943
|
+
CCtxParams->targetCBlockSize = (U32)value;
|
|
877
944
|
return CCtxParams->targetCBlockSize;
|
|
878
945
|
|
|
879
946
|
case ZSTD_c_srcSizeHint :
|
|
880
947
|
if (value!=0) /* 0 ==> default */
|
|
881
948
|
BOUNDCHECK(ZSTD_c_srcSizeHint, value);
|
|
882
949
|
CCtxParams->srcSizeHint = value;
|
|
883
|
-
return CCtxParams->srcSizeHint;
|
|
950
|
+
return (size_t)CCtxParams->srcSizeHint;
|
|
884
951
|
|
|
885
952
|
case ZSTD_c_stableInBuffer:
|
|
886
953
|
BOUNDCHECK(ZSTD_c_stableInBuffer, value);
|
|
@@ -917,6 +984,27 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
|
|
|
917
984
|
CCtxParams->deterministicRefPrefix = !!value;
|
|
918
985
|
return CCtxParams->deterministicRefPrefix;
|
|
919
986
|
|
|
987
|
+
case ZSTD_c_prefetchCDictTables:
|
|
988
|
+
BOUNDCHECK(ZSTD_c_prefetchCDictTables, value);
|
|
989
|
+
CCtxParams->prefetchCDictTables = (ZSTD_paramSwitch_e)value;
|
|
990
|
+
return CCtxParams->prefetchCDictTables;
|
|
991
|
+
|
|
992
|
+
case ZSTD_c_enableSeqProducerFallback:
|
|
993
|
+
BOUNDCHECK(ZSTD_c_enableSeqProducerFallback, value);
|
|
994
|
+
CCtxParams->enableMatchFinderFallback = value;
|
|
995
|
+
return CCtxParams->enableMatchFinderFallback;
|
|
996
|
+
|
|
997
|
+
case ZSTD_c_maxBlockSize:
|
|
998
|
+
if (value!=0) /* 0 ==> default */
|
|
999
|
+
BOUNDCHECK(ZSTD_c_maxBlockSize, value);
|
|
1000
|
+
CCtxParams->maxBlockSize = value;
|
|
1001
|
+
return CCtxParams->maxBlockSize;
|
|
1002
|
+
|
|
1003
|
+
case ZSTD_c_searchForExternalRepcodes:
|
|
1004
|
+
BOUNDCHECK(ZSTD_c_searchForExternalRepcodes, value);
|
|
1005
|
+
CCtxParams->searchForExternalRepcodes = (ZSTD_paramSwitch_e)value;
|
|
1006
|
+
return CCtxParams->searchForExternalRepcodes;
|
|
1007
|
+
|
|
920
1008
|
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
|
|
921
1009
|
}
|
|
922
1010
|
}
|
|
@@ -1049,6 +1137,18 @@ size_t ZSTD_CCtxParams_getParameter(
|
|
|
1049
1137
|
case ZSTD_c_deterministicRefPrefix:
|
|
1050
1138
|
*value = (int)CCtxParams->deterministicRefPrefix;
|
|
1051
1139
|
break;
|
|
1140
|
+
case ZSTD_c_prefetchCDictTables:
|
|
1141
|
+
*value = (int)CCtxParams->prefetchCDictTables;
|
|
1142
|
+
break;
|
|
1143
|
+
case ZSTD_c_enableSeqProducerFallback:
|
|
1144
|
+
*value = CCtxParams->enableMatchFinderFallback;
|
|
1145
|
+
break;
|
|
1146
|
+
case ZSTD_c_maxBlockSize:
|
|
1147
|
+
*value = (int)CCtxParams->maxBlockSize;
|
|
1148
|
+
break;
|
|
1149
|
+
case ZSTD_c_searchForExternalRepcodes:
|
|
1150
|
+
*value = (int)CCtxParams->searchForExternalRepcodes;
|
|
1151
|
+
break;
|
|
1052
1152
|
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
|
|
1053
1153
|
}
|
|
1054
1154
|
return 0;
|
|
@@ -1075,9 +1175,24 @@ size_t ZSTD_CCtx_setParametersUsingCCtxParams(
|
|
|
1075
1175
|
return 0;
|
|
1076
1176
|
}
|
|
1077
1177
|
|
|
1178
|
+
size_t ZSTD_CCtx_setCParams(ZSTD_CCtx* cctx, ZSTD_compressionParameters cparams)
|
|
1179
|
+
{
|
|
1180
|
+
DEBUGLOG(4, "ZSTD_CCtx_setCParams");
|
|
1181
|
+
assert(cctx != NULL);
|
|
1182
|
+
if (cctx->streamStage != zcss_init) {
|
|
1183
|
+
/* All parameters in @cparams are allowed to be updated during MT compression.
|
|
1184
|
+
* This must be signaled, so that MT compression picks up the changes */
|
|
1185
|
+
cctx->cParamsChanged = 1;
|
|
1186
|
+
}
|
|
1187
|
+
/* only update if parameters are valid */
|
|
1188
|
+
FORWARD_IF_ERROR(ZSTD_checkCParams(cparams), "");
|
|
1189
|
+
cctx->requestedParams.cParams = cparams;
|
|
1190
|
+
return 0;
|
|
1191
|
+
}
|
|
1192
|
+
|
|
1078
1193
|
size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize)
|
|
1079
1194
|
{
|
|
1080
|
-
DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %
|
|
1195
|
+
DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %llu bytes", pledgedSrcSize);
|
|
1081
1196
|
RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
|
|
1082
1197
|
"Can't set pledgedSrcSize when not in init stage.");
|
|
1083
1198
|
cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1;
|
|
@@ -1220,6 +1335,7 @@ size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset)
|
|
|
1220
1335
|
RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
|
|
1221
1336
|
"Can't reset parameters only when not in init stage.");
|
|
1222
1337
|
ZSTD_clearAllDicts(cctx);
|
|
1338
|
+
ZSTD_memset(&cctx->externalMatchCtx, 0, sizeof(cctx->externalMatchCtx));
|
|
1223
1339
|
return ZSTD_CCtxParams_reset(&cctx->requestedParams);
|
|
1224
1340
|
}
|
|
1225
1341
|
return 0;
|
|
@@ -1316,7 +1432,8 @@ static ZSTD_compressionParameters
|
|
|
1316
1432
|
ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
|
|
1317
1433
|
unsigned long long srcSize,
|
|
1318
1434
|
size_t dictSize,
|
|
1319
|
-
ZSTD_cParamMode_e mode
|
|
1435
|
+
ZSTD_cParamMode_e mode,
|
|
1436
|
+
ZSTD_paramSwitch_e useRowMatchFinder)
|
|
1320
1437
|
{
|
|
1321
1438
|
const U64 minSrcSize = 513; /* (1<<9) + 1 */
|
|
1322
1439
|
const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1);
|
|
@@ -1350,8 +1467,8 @@ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
|
|
|
1350
1467
|
}
|
|
1351
1468
|
|
|
1352
1469
|
/* resize windowLog if input is small enough, to use less memory */
|
|
1353
|
-
if ( (srcSize
|
|
1354
|
-
&& (dictSize
|
|
1470
|
+
if ( (srcSize <= maxWindowResize)
|
|
1471
|
+
&& (dictSize <= maxWindowResize) ) {
|
|
1355
1472
|
U32 const tSize = (U32)(srcSize + dictSize);
|
|
1356
1473
|
static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN;
|
|
1357
1474
|
U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN :
|
|
@@ -1369,6 +1486,42 @@ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
|
|
|
1369
1486
|
if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN)
|
|
1370
1487
|
cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* minimum wlog required for valid frame header */
|
|
1371
1488
|
|
|
1489
|
+
/* We can't use more than 32 bits of hash in total, so that means that we require:
|
|
1490
|
+
* (hashLog + 8) <= 32 && (chainLog + 8) <= 32
|
|
1491
|
+
*/
|
|
1492
|
+
if (mode == ZSTD_cpm_createCDict && ZSTD_CDictIndicesAreTagged(&cPar)) {
|
|
1493
|
+
U32 const maxShortCacheHashLog = 32 - ZSTD_SHORT_CACHE_TAG_BITS;
|
|
1494
|
+
if (cPar.hashLog > maxShortCacheHashLog) {
|
|
1495
|
+
cPar.hashLog = maxShortCacheHashLog;
|
|
1496
|
+
}
|
|
1497
|
+
if (cPar.chainLog > maxShortCacheHashLog) {
|
|
1498
|
+
cPar.chainLog = maxShortCacheHashLog;
|
|
1499
|
+
}
|
|
1500
|
+
}
|
|
1501
|
+
|
|
1502
|
+
|
|
1503
|
+
/* At this point, we aren't 100% sure if we are using the row match finder.
|
|
1504
|
+
* Unless it is explicitly disabled, conservatively assume that it is enabled.
|
|
1505
|
+
* In this case it will only be disabled for small sources, so shrinking the
|
|
1506
|
+
* hash log a little bit shouldn't result in any ratio loss.
|
|
1507
|
+
*/
|
|
1508
|
+
if (useRowMatchFinder == ZSTD_ps_auto)
|
|
1509
|
+
useRowMatchFinder = ZSTD_ps_enable;
|
|
1510
|
+
|
|
1511
|
+
/* We can't hash more than 32-bits in total. So that means that we require:
|
|
1512
|
+
* (hashLog - rowLog + 8) <= 32
|
|
1513
|
+
*/
|
|
1514
|
+
if (ZSTD_rowMatchFinderUsed(cPar.strategy, useRowMatchFinder)) {
|
|
1515
|
+
/* Switch to 32-entry rows if searchLog is 5 (or more) */
|
|
1516
|
+
U32 const rowLog = BOUNDED(4, cPar.searchLog, 6);
|
|
1517
|
+
U32 const maxRowHashLog = 32 - ZSTD_ROW_HASH_TAG_BITS;
|
|
1518
|
+
U32 const maxHashLog = maxRowHashLog + rowLog;
|
|
1519
|
+
assert(cPar.hashLog >= rowLog);
|
|
1520
|
+
if (cPar.hashLog > maxHashLog) {
|
|
1521
|
+
cPar.hashLog = maxHashLog;
|
|
1522
|
+
}
|
|
1523
|
+
}
|
|
1524
|
+
|
|
1372
1525
|
return cPar;
|
|
1373
1526
|
}
|
|
1374
1527
|
|
|
@@ -1379,7 +1532,7 @@ ZSTD_adjustCParams(ZSTD_compressionParameters cPar,
|
|
|
1379
1532
|
{
|
|
1380
1533
|
cPar = ZSTD_clampCParams(cPar); /* resulting cPar is necessarily valid (all parameters within range) */
|
|
1381
1534
|
if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN;
|
|
1382
|
-
return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown);
|
|
1535
|
+
return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown, ZSTD_ps_auto);
|
|
1383
1536
|
}
|
|
1384
1537
|
|
|
1385
1538
|
static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
|
|
@@ -1410,7 +1563,7 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
|
|
|
1410
1563
|
ZSTD_overrideCParams(&cParams, &CCtxParams->cParams);
|
|
1411
1564
|
assert(!ZSTD_checkCParams(cParams));
|
|
1412
1565
|
/* srcSizeHint == 0 means 0 */
|
|
1413
|
-
return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode);
|
|
1566
|
+
return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode, CCtxParams->useRowMatchFinder);
|
|
1414
1567
|
}
|
|
1415
1568
|
|
|
1416
1569
|
static size_t
|
|
@@ -1455,6 +1608,13 @@ ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
|
|
|
1455
1608
|
return tableSpace + optSpace + slackSpace + lazyAdditionalSpace;
|
|
1456
1609
|
}
|
|
1457
1610
|
|
|
1611
|
+
/* Helper function for calculating memory requirements.
|
|
1612
|
+
* Gives a tighter bound than ZSTD_sequenceBound() by taking minMatch into account. */
|
|
1613
|
+
static size_t ZSTD_maxNbSeq(size_t blockSize, unsigned minMatch, int useSequenceProducer) {
|
|
1614
|
+
U32 const divider = (minMatch==3 || useSequenceProducer) ? 3 : 4;
|
|
1615
|
+
return blockSize / divider;
|
|
1616
|
+
}
|
|
1617
|
+
|
|
1458
1618
|
static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
|
|
1459
1619
|
const ZSTD_compressionParameters* cParams,
|
|
1460
1620
|
const ldmParams_t* ldmParams,
|
|
@@ -1462,12 +1622,13 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
|
|
|
1462
1622
|
const ZSTD_paramSwitch_e useRowMatchFinder,
|
|
1463
1623
|
const size_t buffInSize,
|
|
1464
1624
|
const size_t buffOutSize,
|
|
1465
|
-
const U64 pledgedSrcSize
|
|
1625
|
+
const U64 pledgedSrcSize,
|
|
1626
|
+
int useSequenceProducer,
|
|
1627
|
+
size_t maxBlockSize)
|
|
1466
1628
|
{
|
|
1467
1629
|
size_t const windowSize = (size_t) BOUNDED(1ULL, 1ULL << cParams->windowLog, pledgedSrcSize);
|
|
1468
|
-
size_t const blockSize = MIN(
|
|
1469
|
-
|
|
1470
|
-
size_t const maxNbSeq = blockSize / divider;
|
|
1630
|
+
size_t const blockSize = MIN(ZSTD_resolveMaxBlockSize(maxBlockSize), windowSize);
|
|
1631
|
+
size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, cParams->minMatch, useSequenceProducer);
|
|
1471
1632
|
size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize)
|
|
1472
1633
|
+ ZSTD_cwksp_aligned_alloc_size(maxNbSeq * sizeof(seqDef))
|
|
1473
1634
|
+ 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE));
|
|
@@ -1486,6 +1647,11 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
|
|
|
1486
1647
|
|
|
1487
1648
|
size_t const cctxSpace = isStatic ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0;
|
|
1488
1649
|
|
|
1650
|
+
size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize);
|
|
1651
|
+
size_t const externalSeqSpace = useSequenceProducer
|
|
1652
|
+
? ZSTD_cwksp_aligned_alloc_size(maxNbExternalSeq * sizeof(ZSTD_Sequence))
|
|
1653
|
+
: 0;
|
|
1654
|
+
|
|
1489
1655
|
size_t const neededSpace =
|
|
1490
1656
|
cctxSpace +
|
|
1491
1657
|
entropySpace +
|
|
@@ -1494,7 +1660,8 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
|
|
|
1494
1660
|
ldmSeqSpace +
|
|
1495
1661
|
matchStateSize +
|
|
1496
1662
|
tokenSpace +
|
|
1497
|
-
bufferSpace
|
|
1663
|
+
bufferSpace +
|
|
1664
|
+
externalSeqSpace;
|
|
1498
1665
|
|
|
1499
1666
|
DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace);
|
|
1500
1667
|
return neededSpace;
|
|
@@ -1512,7 +1679,7 @@ size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params)
|
|
|
1512
1679
|
* be needed. However, we still allocate two 0-sized buffers, which can
|
|
1513
1680
|
* take space under ASAN. */
|
|
1514
1681
|
return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
|
|
1515
|
-
&cParams, ¶ms->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN);
|
|
1682
|
+
&cParams, ¶ms->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN, params->useSequenceProducer, params->maxBlockSize);
|
|
1516
1683
|
}
|
|
1517
1684
|
|
|
1518
1685
|
size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams)
|
|
@@ -1562,7 +1729,7 @@ size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params)
|
|
|
1562
1729
|
RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
|
|
1563
1730
|
{ ZSTD_compressionParameters const cParams =
|
|
1564
1731
|
ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
|
|
1565
|
-
size_t const blockSize = MIN(
|
|
1732
|
+
size_t const blockSize = MIN(ZSTD_resolveMaxBlockSize(params->maxBlockSize), (size_t)1 << cParams.windowLog);
|
|
1566
1733
|
size_t const inBuffSize = (params->inBufferMode == ZSTD_bm_buffered)
|
|
1567
1734
|
? ((size_t)1 << cParams.windowLog) + blockSize
|
|
1568
1735
|
: 0;
|
|
@@ -1573,7 +1740,7 @@ size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params)
|
|
|
1573
1740
|
|
|
1574
1741
|
return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
|
|
1575
1742
|
&cParams, ¶ms->ldmParams, 1, useRowMatchFinder, inBuffSize, outBuffSize,
|
|
1576
|
-
ZSTD_CONTENTSIZE_UNKNOWN);
|
|
1743
|
+
ZSTD_CONTENTSIZE_UNKNOWN, params->useSequenceProducer, params->maxBlockSize);
|
|
1577
1744
|
}
|
|
1578
1745
|
}
|
|
1579
1746
|
|
|
@@ -1847,6 +2014,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
|
|
1847
2014
|
assert(params->useRowMatchFinder != ZSTD_ps_auto);
|
|
1848
2015
|
assert(params->useBlockSplitter != ZSTD_ps_auto);
|
|
1849
2016
|
assert(params->ldmParams.enableLdm != ZSTD_ps_auto);
|
|
2017
|
+
assert(params->maxBlockSize != 0);
|
|
1850
2018
|
if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
|
|
1851
2019
|
/* Adjust long distance matching parameters */
|
|
1852
2020
|
ZSTD_ldm_adjustParameters(&zc->appliedParams.ldmParams, ¶ms->cParams);
|
|
@@ -1855,9 +2023,8 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
|
|
1855
2023
|
}
|
|
1856
2024
|
|
|
1857
2025
|
{ size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params->cParams.windowLog), pledgedSrcSize));
|
|
1858
|
-
size_t const blockSize = MIN(
|
|
1859
|
-
|
|
1860
|
-
size_t const maxNbSeq = blockSize / divider;
|
|
2026
|
+
size_t const blockSize = MIN(params->maxBlockSize, windowSize);
|
|
2027
|
+
size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, params->cParams.minMatch, params->useSequenceProducer);
|
|
1861
2028
|
size_t const buffOutSize = (zbuff == ZSTDb_buffered && params->outBufferMode == ZSTD_bm_buffered)
|
|
1862
2029
|
? ZSTD_compressBound(blockSize) + 1
|
|
1863
2030
|
: 0;
|
|
@@ -1874,7 +2041,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
|
|
1874
2041
|
size_t const neededSpace =
|
|
1875
2042
|
ZSTD_estimateCCtxSize_usingCCtxParams_internal(
|
|
1876
2043
|
¶ms->cParams, ¶ms->ldmParams, zc->staticSize != 0, params->useRowMatchFinder,
|
|
1877
|
-
buffInSize, buffOutSize, pledgedSrcSize);
|
|
2044
|
+
buffInSize, buffOutSize, pledgedSrcSize, params->useSequenceProducer, params->maxBlockSize);
|
|
1878
2045
|
int resizeWorkspace;
|
|
1879
2046
|
|
|
1880
2047
|
FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!");
|
|
@@ -1917,6 +2084,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
|
|
1917
2084
|
|
|
1918
2085
|
/* init params */
|
|
1919
2086
|
zc->blockState.matchState.cParams = params->cParams;
|
|
2087
|
+
zc->blockState.matchState.prefetchCDictTables = params->prefetchCDictTables == ZSTD_ps_enable;
|
|
1920
2088
|
zc->pledgedSrcSizePlusOne = pledgedSrcSize+1;
|
|
1921
2089
|
zc->consumedSrcSize = 0;
|
|
1922
2090
|
zc->producedCSize = 0;
|
|
@@ -1986,6 +2154,14 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
|
|
1986
2154
|
zc->ldmState.loadedDictEnd = 0;
|
|
1987
2155
|
}
|
|
1988
2156
|
|
|
2157
|
+
/* reserve space for block-level external sequences */
|
|
2158
|
+
if (params->useSequenceProducer) {
|
|
2159
|
+
size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize);
|
|
2160
|
+
zc->externalMatchCtx.seqBufferCapacity = maxNbExternalSeq;
|
|
2161
|
+
zc->externalMatchCtx.seqBuffer =
|
|
2162
|
+
(ZSTD_Sequence*)ZSTD_cwksp_reserve_aligned(ws, maxNbExternalSeq * sizeof(ZSTD_Sequence));
|
|
2163
|
+
}
|
|
2164
|
+
|
|
1989
2165
|
DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws));
|
|
1990
2166
|
assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace, resizeWorkspace));
|
|
1991
2167
|
|
|
@@ -2059,7 +2235,8 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
|
|
|
2059
2235
|
}
|
|
2060
2236
|
|
|
2061
2237
|
params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize,
|
|
2062
|
-
cdict->dictContentSize, ZSTD_cpm_attachDict
|
|
2238
|
+
cdict->dictContentSize, ZSTD_cpm_attachDict,
|
|
2239
|
+
params.useRowMatchFinder);
|
|
2063
2240
|
params.cParams.windowLog = windowLog;
|
|
2064
2241
|
params.useRowMatchFinder = cdict->useRowMatchFinder; /* cdict overrides */
|
|
2065
2242
|
FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, ¶ms, pledgedSrcSize,
|
|
@@ -2098,6 +2275,22 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
|
|
|
2098
2275
|
return 0;
|
|
2099
2276
|
}
|
|
2100
2277
|
|
|
2278
|
+
static void ZSTD_copyCDictTableIntoCCtx(U32* dst, U32 const* src, size_t tableSize,
|
|
2279
|
+
ZSTD_compressionParameters const* cParams) {
|
|
2280
|
+
if (ZSTD_CDictIndicesAreTagged(cParams)){
|
|
2281
|
+
/* Remove tags from the CDict table if they are present.
|
|
2282
|
+
* See docs on "short cache" in zstd_compress_internal.h for context. */
|
|
2283
|
+
size_t i;
|
|
2284
|
+
for (i = 0; i < tableSize; i++) {
|
|
2285
|
+
U32 const taggedIndex = src[i];
|
|
2286
|
+
U32 const index = taggedIndex >> ZSTD_SHORT_CACHE_TAG_BITS;
|
|
2287
|
+
dst[i] = index;
|
|
2288
|
+
}
|
|
2289
|
+
} else {
|
|
2290
|
+
ZSTD_memcpy(dst, src, tableSize * sizeof(U32));
|
|
2291
|
+
}
|
|
2292
|
+
}
|
|
2293
|
+
|
|
2101
2294
|
static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
|
|
2102
2295
|
const ZSTD_CDict* cdict,
|
|
2103
2296
|
ZSTD_CCtx_params params,
|
|
@@ -2133,14 +2326,15 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
|
|
|
2133
2326
|
: 0;
|
|
2134
2327
|
size_t const hSize = (size_t)1 << cdict_cParams->hashLog;
|
|
2135
2328
|
|
|
2136
|
-
|
|
2137
|
-
|
|
2138
|
-
|
|
2329
|
+
ZSTD_copyCDictTableIntoCCtx(cctx->blockState.matchState.hashTable,
|
|
2330
|
+
cdict->matchState.hashTable,
|
|
2331
|
+
hSize, cdict_cParams);
|
|
2332
|
+
|
|
2139
2333
|
/* Do not copy cdict's chainTable if cctx has parameters such that it would not use chainTable */
|
|
2140
2334
|
if (ZSTD_allocateChainTable(cctx->appliedParams.cParams.strategy, cctx->appliedParams.useRowMatchFinder, 0 /* forDDSDict */)) {
|
|
2141
|
-
|
|
2142
|
-
|
|
2143
|
-
|
|
2335
|
+
ZSTD_copyCDictTableIntoCCtx(cctx->blockState.matchState.chainTable,
|
|
2336
|
+
cdict->matchState.chainTable,
|
|
2337
|
+
chainSize, cdict_cParams);
|
|
2144
2338
|
}
|
|
2145
2339
|
/* copy tag table */
|
|
2146
2340
|
if (ZSTD_rowMatchFinderUsed(cdict_cParams->strategy, cdict->useRowMatchFinder)) {
|
|
@@ -2226,6 +2420,7 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
|
|
|
2226
2420
|
params.useBlockSplitter = srcCCtx->appliedParams.useBlockSplitter;
|
|
2227
2421
|
params.ldmParams = srcCCtx->appliedParams.ldmParams;
|
|
2228
2422
|
params.fParams = fParams;
|
|
2423
|
+
params.maxBlockSize = srcCCtx->appliedParams.maxBlockSize;
|
|
2229
2424
|
ZSTD_resetCCtx_internal(dstCCtx, ¶ms, pledgedSrcSize,
|
|
2230
2425
|
/* loadedDictSize */ 0,
|
|
2231
2426
|
ZSTDcrp_leaveDirty, zbuff);
|
|
@@ -2385,7 +2580,7 @@ static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* par
|
|
|
2385
2580
|
|
|
2386
2581
|
/* See doc/zstd_compression_format.md for detailed format description */
|
|
2387
2582
|
|
|
2388
|
-
|
|
2583
|
+
int ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
|
|
2389
2584
|
{
|
|
2390
2585
|
const seqDef* const sequences = seqStorePtr->sequencesStart;
|
|
2391
2586
|
BYTE* const llCodeTable = seqStorePtr->llCode;
|
|
@@ -2393,18 +2588,24 @@ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
|
|
|
2393
2588
|
BYTE* const mlCodeTable = seqStorePtr->mlCode;
|
|
2394
2589
|
U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
|
|
2395
2590
|
U32 u;
|
|
2591
|
+
int longOffsets = 0;
|
|
2396
2592
|
assert(nbSeq <= seqStorePtr->maxNbSeq);
|
|
2397
2593
|
for (u=0; u<nbSeq; u++) {
|
|
2398
2594
|
U32 const llv = sequences[u].litLength;
|
|
2595
|
+
U32 const ofCode = ZSTD_highbit32(sequences[u].offBase);
|
|
2399
2596
|
U32 const mlv = sequences[u].mlBase;
|
|
2400
2597
|
llCodeTable[u] = (BYTE)ZSTD_LLcode(llv);
|
|
2401
|
-
ofCodeTable[u] = (BYTE)
|
|
2598
|
+
ofCodeTable[u] = (BYTE)ofCode;
|
|
2402
2599
|
mlCodeTable[u] = (BYTE)ZSTD_MLcode(mlv);
|
|
2600
|
+
assert(!(MEM_64bits() && ofCode >= STREAM_ACCUMULATOR_MIN));
|
|
2601
|
+
if (MEM_32bits() && ofCode >= STREAM_ACCUMULATOR_MIN)
|
|
2602
|
+
longOffsets = 1;
|
|
2403
2603
|
}
|
|
2404
2604
|
if (seqStorePtr->longLengthType==ZSTD_llt_literalLength)
|
|
2405
2605
|
llCodeTable[seqStorePtr->longLengthPos] = MaxLL;
|
|
2406
2606
|
if (seqStorePtr->longLengthType==ZSTD_llt_matchLength)
|
|
2407
2607
|
mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
|
|
2608
|
+
return longOffsets;
|
|
2408
2609
|
}
|
|
2409
2610
|
|
|
2410
2611
|
/* ZSTD_useTargetCBlockSize():
|
|
@@ -2438,6 +2639,7 @@ typedef struct {
|
|
|
2438
2639
|
U32 MLtype;
|
|
2439
2640
|
size_t size;
|
|
2440
2641
|
size_t lastCountSize; /* Accounts for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */
|
|
2642
|
+
int longOffsets;
|
|
2441
2643
|
} ZSTD_symbolEncodingTypeStats_t;
|
|
2442
2644
|
|
|
2443
2645
|
/* ZSTD_buildSequencesStatistics():
|
|
@@ -2448,11 +2650,13 @@ typedef struct {
|
|
|
2448
2650
|
* entropyWkspSize must be of size at least ENTROPY_WORKSPACE_SIZE - (MaxSeq + 1)*sizeof(U32)
|
|
2449
2651
|
*/
|
|
2450
2652
|
static ZSTD_symbolEncodingTypeStats_t
|
|
2451
|
-
ZSTD_buildSequencesStatistics(
|
|
2452
|
-
|
|
2453
|
-
|
|
2454
|
-
|
|
2455
|
-
|
|
2653
|
+
ZSTD_buildSequencesStatistics(
|
|
2654
|
+
const seqStore_t* seqStorePtr, size_t nbSeq,
|
|
2655
|
+
const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy,
|
|
2656
|
+
BYTE* dst, const BYTE* const dstEnd,
|
|
2657
|
+
ZSTD_strategy strategy, unsigned* countWorkspace,
|
|
2658
|
+
void* entropyWorkspace, size_t entropyWkspSize)
|
|
2659
|
+
{
|
|
2456
2660
|
BYTE* const ostart = dst;
|
|
2457
2661
|
const BYTE* const oend = dstEnd;
|
|
2458
2662
|
BYTE* op = ostart;
|
|
@@ -2466,7 +2670,7 @@ ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq,
|
|
|
2466
2670
|
|
|
2467
2671
|
stats.lastCountSize = 0;
|
|
2468
2672
|
/* convert length/distances into codes */
|
|
2469
|
-
ZSTD_seqToCodes(seqStorePtr);
|
|
2673
|
+
stats.longOffsets = ZSTD_seqToCodes(seqStorePtr);
|
|
2470
2674
|
assert(op <= oend);
|
|
2471
2675
|
assert(nbSeq != 0); /* ZSTD_selectEncodingType() divides by nbSeq */
|
|
2472
2676
|
/* build CTable for Literal Lengths */
|
|
@@ -2571,22 +2775,22 @@ ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq,
|
|
|
2571
2775
|
*/
|
|
2572
2776
|
#define SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO 20
|
|
2573
2777
|
MEM_STATIC size_t
|
|
2574
|
-
ZSTD_entropyCompressSeqStore_internal(
|
|
2575
|
-
|
|
2576
|
-
|
|
2577
|
-
|
|
2578
|
-
|
|
2579
|
-
|
|
2580
|
-
|
|
2778
|
+
ZSTD_entropyCompressSeqStore_internal(
|
|
2779
|
+
const seqStore_t* seqStorePtr,
|
|
2780
|
+
const ZSTD_entropyCTables_t* prevEntropy,
|
|
2781
|
+
ZSTD_entropyCTables_t* nextEntropy,
|
|
2782
|
+
const ZSTD_CCtx_params* cctxParams,
|
|
2783
|
+
void* dst, size_t dstCapacity,
|
|
2784
|
+
void* entropyWorkspace, size_t entropyWkspSize,
|
|
2785
|
+
const int bmi2)
|
|
2581
2786
|
{
|
|
2582
|
-
const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
|
|
2583
2787
|
ZSTD_strategy const strategy = cctxParams->cParams.strategy;
|
|
2584
2788
|
unsigned* count = (unsigned*)entropyWorkspace;
|
|
2585
2789
|
FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable;
|
|
2586
2790
|
FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable;
|
|
2587
2791
|
FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable;
|
|
2588
2792
|
const seqDef* const sequences = seqStorePtr->sequencesStart;
|
|
2589
|
-
const size_t nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
|
|
2793
|
+
const size_t nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
|
|
2590
2794
|
const BYTE* const ofCodeTable = seqStorePtr->ofCode;
|
|
2591
2795
|
const BYTE* const llCodeTable = seqStorePtr->llCode;
|
|
2592
2796
|
const BYTE* const mlCodeTable = seqStorePtr->mlCode;
|
|
@@ -2594,29 +2798,31 @@ ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr,
|
|
|
2594
2798
|
BYTE* const oend = ostart + dstCapacity;
|
|
2595
2799
|
BYTE* op = ostart;
|
|
2596
2800
|
size_t lastCountSize;
|
|
2801
|
+
int longOffsets = 0;
|
|
2597
2802
|
|
|
2598
2803
|
entropyWorkspace = count + (MaxSeq + 1);
|
|
2599
2804
|
entropyWkspSize -= (MaxSeq + 1) * sizeof(*count);
|
|
2600
2805
|
|
|
2601
|
-
DEBUGLOG(
|
|
2806
|
+
DEBUGLOG(5, "ZSTD_entropyCompressSeqStore_internal (nbSeq=%zu, dstCapacity=%zu)", nbSeq, dstCapacity);
|
|
2602
2807
|
ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
|
|
2603
2808
|
assert(entropyWkspSize >= HUF_WORKSPACE_SIZE);
|
|
2604
2809
|
|
|
2605
2810
|
/* Compress literals */
|
|
2606
2811
|
{ const BYTE* const literals = seqStorePtr->litStart;
|
|
2607
|
-
size_t const numSequences = seqStorePtr->sequences - seqStorePtr->sequencesStart;
|
|
2608
|
-
size_t const numLiterals = seqStorePtr->lit - seqStorePtr->litStart;
|
|
2812
|
+
size_t const numSequences = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
|
|
2813
|
+
size_t const numLiterals = (size_t)(seqStorePtr->lit - seqStorePtr->litStart);
|
|
2609
2814
|
/* Base suspicion of uncompressibility on ratio of literals to sequences */
|
|
2610
2815
|
unsigned const suspectUncompressible = (numSequences == 0) || (numLiterals / numSequences >= SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO);
|
|
2611
2816
|
size_t const litSize = (size_t)(seqStorePtr->lit - literals);
|
|
2817
|
+
|
|
2612
2818
|
size_t const cSize = ZSTD_compressLiterals(
|
|
2613
|
-
&prevEntropy->huf, &nextEntropy->huf,
|
|
2614
|
-
cctxParams->cParams.strategy,
|
|
2615
|
-
ZSTD_literalsCompressionIsDisabled(cctxParams),
|
|
2616
2819
|
op, dstCapacity,
|
|
2617
2820
|
literals, litSize,
|
|
2618
2821
|
entropyWorkspace, entropyWkspSize,
|
|
2619
|
-
|
|
2822
|
+
&prevEntropy->huf, &nextEntropy->huf,
|
|
2823
|
+
cctxParams->cParams.strategy,
|
|
2824
|
+
ZSTD_literalsCompressionIsDisabled(cctxParams),
|
|
2825
|
+
suspectUncompressible, bmi2);
|
|
2620
2826
|
FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed");
|
|
2621
2827
|
assert(cSize <= dstCapacity);
|
|
2622
2828
|
op += cSize;
|
|
@@ -2642,11 +2848,10 @@ ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr,
|
|
|
2642
2848
|
ZSTD_memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse));
|
|
2643
2849
|
return (size_t)(op - ostart);
|
|
2644
2850
|
}
|
|
2645
|
-
{
|
|
2646
|
-
ZSTD_symbolEncodingTypeStats_t stats;
|
|
2647
|
-
BYTE* seqHead = op++;
|
|
2851
|
+
{ BYTE* const seqHead = op++;
|
|
2648
2852
|
/* build stats for sequences */
|
|
2649
|
-
stats =
|
|
2853
|
+
const ZSTD_symbolEncodingTypeStats_t stats =
|
|
2854
|
+
ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq,
|
|
2650
2855
|
&prevEntropy->fse, &nextEntropy->fse,
|
|
2651
2856
|
op, oend,
|
|
2652
2857
|
strategy, count,
|
|
@@ -2655,6 +2860,7 @@ ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr,
|
|
|
2655
2860
|
*seqHead = (BYTE)((stats.LLtype<<6) + (stats.Offtype<<4) + (stats.MLtype<<2));
|
|
2656
2861
|
lastCountSize = stats.lastCountSize;
|
|
2657
2862
|
op += stats.size;
|
|
2863
|
+
longOffsets = stats.longOffsets;
|
|
2658
2864
|
}
|
|
2659
2865
|
|
|
2660
2866
|
{ size_t const bitstreamSize = ZSTD_encodeSequences(
|
|
@@ -2689,14 +2895,15 @@ ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr,
|
|
|
2689
2895
|
}
|
|
2690
2896
|
|
|
2691
2897
|
MEM_STATIC size_t
|
|
2692
|
-
ZSTD_entropyCompressSeqStore(
|
|
2693
|
-
|
|
2694
|
-
|
|
2695
|
-
|
|
2696
|
-
|
|
2697
|
-
|
|
2698
|
-
|
|
2699
|
-
|
|
2898
|
+
ZSTD_entropyCompressSeqStore(
|
|
2899
|
+
const seqStore_t* seqStorePtr,
|
|
2900
|
+
const ZSTD_entropyCTables_t* prevEntropy,
|
|
2901
|
+
ZSTD_entropyCTables_t* nextEntropy,
|
|
2902
|
+
const ZSTD_CCtx_params* cctxParams,
|
|
2903
|
+
void* dst, size_t dstCapacity,
|
|
2904
|
+
size_t srcSize,
|
|
2905
|
+
void* entropyWorkspace, size_t entropyWkspSize,
|
|
2906
|
+
int bmi2)
|
|
2700
2907
|
{
|
|
2701
2908
|
size_t const cSize = ZSTD_entropyCompressSeqStore_internal(
|
|
2702
2909
|
seqStorePtr, prevEntropy, nextEntropy, cctxParams,
|
|
@@ -2706,15 +2913,21 @@ ZSTD_entropyCompressSeqStore(seqStore_t* seqStorePtr,
|
|
|
2706
2913
|
/* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block.
|
|
2707
2914
|
* Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block.
|
|
2708
2915
|
*/
|
|
2709
|
-
if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity))
|
|
2916
|
+
if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity)) {
|
|
2917
|
+
DEBUGLOG(4, "not enough dstCapacity (%zu) for ZSTD_entropyCompressSeqStore_internal()=> do not compress block", dstCapacity);
|
|
2710
2918
|
return 0; /* block not compressed */
|
|
2919
|
+
}
|
|
2711
2920
|
FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSeqStore_internal failed");
|
|
2712
2921
|
|
|
2713
2922
|
/* Check compressibility */
|
|
2714
2923
|
{ size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy);
|
|
2715
2924
|
if (cSize >= maxCSize) return 0; /* block not compressed */
|
|
2716
2925
|
}
|
|
2717
|
-
DEBUGLOG(
|
|
2926
|
+
DEBUGLOG(5, "ZSTD_entropyCompressSeqStore() cSize: %zu", cSize);
|
|
2927
|
+
/* libzstd decoder before > v1.5.4 is not compatible with compressed blocks of size ZSTD_BLOCKSIZE_MAX exactly.
|
|
2928
|
+
* This restriction is indirectly already fulfilled by respecting ZSTD_minGain() condition above.
|
|
2929
|
+
*/
|
|
2930
|
+
assert(cSize < ZSTD_BLOCKSIZE_MAX);
|
|
2718
2931
|
return cSize;
|
|
2719
2932
|
}
|
|
2720
2933
|
|
|
@@ -2809,6 +3022,72 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr)
|
|
|
2809
3022
|
ssPtr->longLengthType = ZSTD_llt_none;
|
|
2810
3023
|
}
|
|
2811
3024
|
|
|
3025
|
+
/* ZSTD_postProcessSequenceProducerResult() :
|
|
3026
|
+
* Validates and post-processes sequences obtained through the external matchfinder API:
|
|
3027
|
+
* - Checks whether nbExternalSeqs represents an error condition.
|
|
3028
|
+
* - Appends a block delimiter to outSeqs if one is not already present.
|
|
3029
|
+
* See zstd.h for context regarding block delimiters.
|
|
3030
|
+
* Returns the number of sequences after post-processing, or an error code. */
|
|
3031
|
+
static size_t ZSTD_postProcessSequenceProducerResult(
|
|
3032
|
+
ZSTD_Sequence* outSeqs, size_t nbExternalSeqs, size_t outSeqsCapacity, size_t srcSize
|
|
3033
|
+
) {
|
|
3034
|
+
RETURN_ERROR_IF(
|
|
3035
|
+
nbExternalSeqs > outSeqsCapacity,
|
|
3036
|
+
sequenceProducer_failed,
|
|
3037
|
+
"External sequence producer returned error code %lu",
|
|
3038
|
+
(unsigned long)nbExternalSeqs
|
|
3039
|
+
);
|
|
3040
|
+
|
|
3041
|
+
RETURN_ERROR_IF(
|
|
3042
|
+
nbExternalSeqs == 0 && srcSize > 0,
|
|
3043
|
+
sequenceProducer_failed,
|
|
3044
|
+
"Got zero sequences from external sequence producer for a non-empty src buffer!"
|
|
3045
|
+
);
|
|
3046
|
+
|
|
3047
|
+
if (srcSize == 0) {
|
|
3048
|
+
ZSTD_memset(&outSeqs[0], 0, sizeof(ZSTD_Sequence));
|
|
3049
|
+
return 1;
|
|
3050
|
+
}
|
|
3051
|
+
|
|
3052
|
+
{
|
|
3053
|
+
ZSTD_Sequence const lastSeq = outSeqs[nbExternalSeqs - 1];
|
|
3054
|
+
|
|
3055
|
+
/* We can return early if lastSeq is already a block delimiter. */
|
|
3056
|
+
if (lastSeq.offset == 0 && lastSeq.matchLength == 0) {
|
|
3057
|
+
return nbExternalSeqs;
|
|
3058
|
+
}
|
|
3059
|
+
|
|
3060
|
+
/* This error condition is only possible if the external matchfinder
|
|
3061
|
+
* produced an invalid parse, by definition of ZSTD_sequenceBound(). */
|
|
3062
|
+
RETURN_ERROR_IF(
|
|
3063
|
+
nbExternalSeqs == outSeqsCapacity,
|
|
3064
|
+
sequenceProducer_failed,
|
|
3065
|
+
"nbExternalSeqs == outSeqsCapacity but lastSeq is not a block delimiter!"
|
|
3066
|
+
);
|
|
3067
|
+
|
|
3068
|
+
/* lastSeq is not a block delimiter, so we need to append one. */
|
|
3069
|
+
ZSTD_memset(&outSeqs[nbExternalSeqs], 0, sizeof(ZSTD_Sequence));
|
|
3070
|
+
return nbExternalSeqs + 1;
|
|
3071
|
+
}
|
|
3072
|
+
}
|
|
3073
|
+
|
|
3074
|
+
/* ZSTD_fastSequenceLengthSum() :
|
|
3075
|
+
* Returns sum(litLen) + sum(matchLen) + lastLits for *seqBuf*.
|
|
3076
|
+
* Similar to another function in zstd_compress.c (determine_blockSize),
|
|
3077
|
+
* except it doesn't check for a block delimiter to end summation.
|
|
3078
|
+
* Removing the early exit allows the compiler to auto-vectorize (https://godbolt.org/z/cY1cajz9P).
|
|
3079
|
+
* This function can be deleted and replaced by determine_blockSize after we resolve issue #3456. */
|
|
3080
|
+
static size_t ZSTD_fastSequenceLengthSum(ZSTD_Sequence const* seqBuf, size_t seqBufSize) {
|
|
3081
|
+
size_t matchLenSum, litLenSum, i;
|
|
3082
|
+
matchLenSum = 0;
|
|
3083
|
+
litLenSum = 0;
|
|
3084
|
+
for (i = 0; i < seqBufSize; i++) {
|
|
3085
|
+
litLenSum += seqBuf[i].litLength;
|
|
3086
|
+
matchLenSum += seqBuf[i].matchLength;
|
|
3087
|
+
}
|
|
3088
|
+
return litLenSum + matchLenSum;
|
|
3089
|
+
}
|
|
3090
|
+
|
|
2812
3091
|
typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e;
|
|
2813
3092
|
|
|
2814
3093
|
static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
|
|
@@ -2818,7 +3097,9 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
|
|
|
2818
3097
|
assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
|
|
2819
3098
|
/* Assert that we have correctly flushed the ctx params into the ms's copy */
|
|
2820
3099
|
ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams);
|
|
2821
|
-
|
|
3100
|
+
/* TODO: See 3090. We reduced MIN_CBLOCK_SIZE from 3 to 2 so to compensate we are adding
|
|
3101
|
+
* additional 1. We need to revisit and change this logic to be more consistent */
|
|
3102
|
+
if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1+1) {
|
|
2822
3103
|
if (zc->appliedParams.cParams.strategy >= ZSTD_btopt) {
|
|
2823
3104
|
ZSTD_ldm_skipRawSeqStoreBytes(&zc->externSeqStore, srcSize);
|
|
2824
3105
|
} else {
|
|
@@ -2854,6 +3135,15 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
|
|
|
2854
3135
|
}
|
|
2855
3136
|
if (zc->externSeqStore.pos < zc->externSeqStore.size) {
|
|
2856
3137
|
assert(zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_disable);
|
|
3138
|
+
|
|
3139
|
+
/* External matchfinder + LDM is technically possible, just not implemented yet.
|
|
3140
|
+
* We need to revisit soon and implement it. */
|
|
3141
|
+
RETURN_ERROR_IF(
|
|
3142
|
+
zc->appliedParams.useSequenceProducer,
|
|
3143
|
+
parameter_combination_unsupported,
|
|
3144
|
+
"Long-distance matching with external sequence producer enabled is not currently supported."
|
|
3145
|
+
);
|
|
3146
|
+
|
|
2857
3147
|
/* Updates ldmSeqStore.pos */
|
|
2858
3148
|
lastLLSize =
|
|
2859
3149
|
ZSTD_ldm_blockCompress(&zc->externSeqStore,
|
|
@@ -2865,6 +3155,14 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
|
|
|
2865
3155
|
} else if (zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable) {
|
|
2866
3156
|
rawSeqStore_t ldmSeqStore = kNullRawSeqStore;
|
|
2867
3157
|
|
|
3158
|
+
/* External matchfinder + LDM is technically possible, just not implemented yet.
|
|
3159
|
+
* We need to revisit soon and implement it. */
|
|
3160
|
+
RETURN_ERROR_IF(
|
|
3161
|
+
zc->appliedParams.useSequenceProducer,
|
|
3162
|
+
parameter_combination_unsupported,
|
|
3163
|
+
"Long-distance matching with external sequence producer enabled is not currently supported."
|
|
3164
|
+
);
|
|
3165
|
+
|
|
2868
3166
|
ldmSeqStore.seq = zc->ldmSequences;
|
|
2869
3167
|
ldmSeqStore.capacity = zc->maxNbLdmSequences;
|
|
2870
3168
|
/* Updates ldmSeqStore.size */
|
|
@@ -2879,7 +3177,68 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
|
|
|
2879
3177
|
zc->appliedParams.useRowMatchFinder,
|
|
2880
3178
|
src, srcSize);
|
|
2881
3179
|
assert(ldmSeqStore.pos == ldmSeqStore.size);
|
|
2882
|
-
} else
|
|
3180
|
+
} else if (zc->appliedParams.useSequenceProducer) {
|
|
3181
|
+
assert(
|
|
3182
|
+
zc->externalMatchCtx.seqBufferCapacity >= ZSTD_sequenceBound(srcSize)
|
|
3183
|
+
);
|
|
3184
|
+
assert(zc->externalMatchCtx.mFinder != NULL);
|
|
3185
|
+
|
|
3186
|
+
{ U32 const windowSize = (U32)1 << zc->appliedParams.cParams.windowLog;
|
|
3187
|
+
|
|
3188
|
+
size_t const nbExternalSeqs = (zc->externalMatchCtx.mFinder)(
|
|
3189
|
+
zc->externalMatchCtx.mState,
|
|
3190
|
+
zc->externalMatchCtx.seqBuffer,
|
|
3191
|
+
zc->externalMatchCtx.seqBufferCapacity,
|
|
3192
|
+
src, srcSize,
|
|
3193
|
+
NULL, 0, /* dict and dictSize, currently not supported */
|
|
3194
|
+
zc->appliedParams.compressionLevel,
|
|
3195
|
+
windowSize
|
|
3196
|
+
);
|
|
3197
|
+
|
|
3198
|
+
size_t const nbPostProcessedSeqs = ZSTD_postProcessSequenceProducerResult(
|
|
3199
|
+
zc->externalMatchCtx.seqBuffer,
|
|
3200
|
+
nbExternalSeqs,
|
|
3201
|
+
zc->externalMatchCtx.seqBufferCapacity,
|
|
3202
|
+
srcSize
|
|
3203
|
+
);
|
|
3204
|
+
|
|
3205
|
+
/* Return early if there is no error, since we don't need to worry about last literals */
|
|
3206
|
+
if (!ZSTD_isError(nbPostProcessedSeqs)) {
|
|
3207
|
+
ZSTD_sequencePosition seqPos = {0,0,0};
|
|
3208
|
+
size_t const seqLenSum = ZSTD_fastSequenceLengthSum(zc->externalMatchCtx.seqBuffer, nbPostProcessedSeqs);
|
|
3209
|
+
RETURN_ERROR_IF(seqLenSum > srcSize, externalSequences_invalid, "External sequences imply too large a block!");
|
|
3210
|
+
FORWARD_IF_ERROR(
|
|
3211
|
+
ZSTD_copySequencesToSeqStoreExplicitBlockDelim(
|
|
3212
|
+
zc, &seqPos,
|
|
3213
|
+
zc->externalMatchCtx.seqBuffer, nbPostProcessedSeqs,
|
|
3214
|
+
src, srcSize,
|
|
3215
|
+
zc->appliedParams.searchForExternalRepcodes
|
|
3216
|
+
),
|
|
3217
|
+
"Failed to copy external sequences to seqStore!"
|
|
3218
|
+
);
|
|
3219
|
+
ms->ldmSeqStore = NULL;
|
|
3220
|
+
DEBUGLOG(5, "Copied %lu sequences from external sequence producer to internal seqStore.", (unsigned long)nbExternalSeqs);
|
|
3221
|
+
return ZSTDbss_compress;
|
|
3222
|
+
}
|
|
3223
|
+
|
|
3224
|
+
/* Propagate the error if fallback is disabled */
|
|
3225
|
+
if (!zc->appliedParams.enableMatchFinderFallback) {
|
|
3226
|
+
return nbPostProcessedSeqs;
|
|
3227
|
+
}
|
|
3228
|
+
|
|
3229
|
+
/* Fallback to software matchfinder */
|
|
3230
|
+
{ ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy,
|
|
3231
|
+
zc->appliedParams.useRowMatchFinder,
|
|
3232
|
+
dictMode);
|
|
3233
|
+
ms->ldmSeqStore = NULL;
|
|
3234
|
+
DEBUGLOG(
|
|
3235
|
+
5,
|
|
3236
|
+
"External sequence producer returned error code %lu. Falling back to internal parser.",
|
|
3237
|
+
(unsigned long)nbExternalSeqs
|
|
3238
|
+
);
|
|
3239
|
+
lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
|
|
3240
|
+
} }
|
|
3241
|
+
} else { /* not long range mode and no external matchfinder */
|
|
2883
3242
|
ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy,
|
|
2884
3243
|
zc->appliedParams.useRowMatchFinder,
|
|
2885
3244
|
dictMode);
|
|
@@ -2940,7 +3299,7 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
|
|
|
2940
3299
|
/* seqStoreSeqs[i].offset == offCode+1, and ZSTD_updateRep() expects offCode
|
|
2941
3300
|
so we provide seqStoreSeqs[i].offset - 1 */
|
|
2942
3301
|
ZSTD_updateRep(updatedRepcodes.rep,
|
|
2943
|
-
seqStoreSeqs[i].offBase
|
|
3302
|
+
seqStoreSeqs[i].offBase,
|
|
2944
3303
|
seqStoreSeqs[i].litLength == 0);
|
|
2945
3304
|
literalsRead += outSeqs[i].litLength;
|
|
2946
3305
|
}
|
|
@@ -2956,6 +3315,10 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
|
|
|
2956
3315
|
zc->seqCollector.seqIndex += seqStoreSeqSize;
|
|
2957
3316
|
}
|
|
2958
3317
|
|
|
3318
|
+
size_t ZSTD_sequenceBound(size_t srcSize) {
|
|
3319
|
+
return (srcSize / ZSTD_MINMATCH_MIN) + 1;
|
|
3320
|
+
}
|
|
3321
|
+
|
|
2959
3322
|
size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
|
|
2960
3323
|
size_t outSeqsSize, const void* src, size_t srcSize)
|
|
2961
3324
|
{
|
|
@@ -3001,19 +3364,17 @@ static int ZSTD_isRLE(const BYTE* src, size_t length) {
|
|
|
3001
3364
|
const size_t unrollMask = unrollSize - 1;
|
|
3002
3365
|
const size_t prefixLength = length & unrollMask;
|
|
3003
3366
|
size_t i;
|
|
3004
|
-
size_t u;
|
|
3005
3367
|
if (length == 1) return 1;
|
|
3006
3368
|
/* Check if prefix is RLE first before using unrolled loop */
|
|
3007
3369
|
if (prefixLength && ZSTD_count(ip+1, ip, ip+prefixLength) != prefixLength-1) {
|
|
3008
3370
|
return 0;
|
|
3009
3371
|
}
|
|
3010
3372
|
for (i = prefixLength; i != length; i += unrollSize) {
|
|
3373
|
+
size_t u;
|
|
3011
3374
|
for (u = 0; u < unrollSize; u += sizeof(size_t)) {
|
|
3012
3375
|
if (MEM_readST(ip + i + u) != valueST) {
|
|
3013
3376
|
return 0;
|
|
3014
|
-
|
|
3015
|
-
}
|
|
3016
|
-
}
|
|
3377
|
+
} } }
|
|
3017
3378
|
return 1;
|
|
3018
3379
|
}
|
|
3019
3380
|
|
|
@@ -3029,7 +3390,8 @@ static int ZSTD_maybeRLE(seqStore_t const* seqStore)
|
|
|
3029
3390
|
return nbSeqs < 4 && nbLits < 10;
|
|
3030
3391
|
}
|
|
3031
3392
|
|
|
3032
|
-
static void
|
|
3393
|
+
static void
|
|
3394
|
+
ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t* const bs)
|
|
3033
3395
|
{
|
|
3034
3396
|
ZSTD_compressedBlockState_t* const tmp = bs->prevCBlock;
|
|
3035
3397
|
bs->prevCBlock = bs->nextCBlock;
|
|
@@ -3037,7 +3399,9 @@ static void ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t* c
|
|
|
3037
3399
|
}
|
|
3038
3400
|
|
|
3039
3401
|
/* Writes the block header */
|
|
3040
|
-
static void
|
|
3402
|
+
static void
|
|
3403
|
+
writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastBlock)
|
|
3404
|
+
{
|
|
3041
3405
|
U32 const cBlockHeader = cSize == 1 ?
|
|
3042
3406
|
lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) :
|
|
3043
3407
|
lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
|
|
@@ -3050,13 +3414,16 @@ static void writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastB
|
|
|
3050
3414
|
* Stores literals block type (raw, rle, compressed, repeat) and
|
|
3051
3415
|
* huffman description table to hufMetadata.
|
|
3052
3416
|
* Requires ENTROPY_WORKSPACE_SIZE workspace
|
|
3053
|
-
*
|
|
3054
|
-
|
|
3055
|
-
|
|
3056
|
-
|
|
3057
|
-
|
|
3058
|
-
|
|
3059
|
-
|
|
3417
|
+
* @return : size of huffman description table, or an error code
|
|
3418
|
+
*/
|
|
3419
|
+
static size_t
|
|
3420
|
+
ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSize,
|
|
3421
|
+
const ZSTD_hufCTables_t* prevHuf,
|
|
3422
|
+
ZSTD_hufCTables_t* nextHuf,
|
|
3423
|
+
ZSTD_hufCTablesMetadata_t* hufMetadata,
|
|
3424
|
+
const int literalsCompressionIsDisabled,
|
|
3425
|
+
void* workspace, size_t wkspSize,
|
|
3426
|
+
int hufFlags)
|
|
3060
3427
|
{
|
|
3061
3428
|
BYTE* const wkspStart = (BYTE*)workspace;
|
|
3062
3429
|
BYTE* const wkspEnd = wkspStart + wkspSize;
|
|
@@ -3064,9 +3431,9 @@ static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSi
|
|
|
3064
3431
|
unsigned* const countWksp = (unsigned*)workspace;
|
|
3065
3432
|
const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned);
|
|
3066
3433
|
BYTE* const nodeWksp = countWkspStart + countWkspSize;
|
|
3067
|
-
const size_t nodeWkspSize = wkspEnd-nodeWksp;
|
|
3434
|
+
const size_t nodeWkspSize = (size_t)(wkspEnd - nodeWksp);
|
|
3068
3435
|
unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX;
|
|
3069
|
-
unsigned huffLog =
|
|
3436
|
+
unsigned huffLog = LitHufLog;
|
|
3070
3437
|
HUF_repeat repeat = prevHuf->repeatMode;
|
|
3071
3438
|
DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_literals (srcSize=%zu)", srcSize);
|
|
3072
3439
|
|
|
@@ -3081,73 +3448,77 @@ static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSi
|
|
|
3081
3448
|
|
|
3082
3449
|
/* small ? don't even attempt compression (speed opt) */
|
|
3083
3450
|
#ifndef COMPRESS_LITERALS_SIZE_MIN
|
|
3084
|
-
#define COMPRESS_LITERALS_SIZE_MIN 63
|
|
3451
|
+
# define COMPRESS_LITERALS_SIZE_MIN 63 /* heuristic */
|
|
3085
3452
|
#endif
|
|
3086
3453
|
{ size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
|
|
3087
3454
|
if (srcSize <= minLitSize) {
|
|
3088
3455
|
DEBUGLOG(5, "set_basic - too small");
|
|
3089
3456
|
hufMetadata->hType = set_basic;
|
|
3090
3457
|
return 0;
|
|
3091
|
-
|
|
3092
|
-
}
|
|
3458
|
+
} }
|
|
3093
3459
|
|
|
3094
3460
|
/* Scan input and build symbol stats */
|
|
3095
|
-
{ size_t const largest =
|
|
3461
|
+
{ size_t const largest =
|
|
3462
|
+
HIST_count_wksp (countWksp, &maxSymbolValue,
|
|
3463
|
+
(const BYTE*)src, srcSize,
|
|
3464
|
+
workspace, wkspSize);
|
|
3096
3465
|
FORWARD_IF_ERROR(largest, "HIST_count_wksp failed");
|
|
3097
3466
|
if (largest == srcSize) {
|
|
3467
|
+
/* only one literal symbol */
|
|
3098
3468
|
DEBUGLOG(5, "set_rle");
|
|
3099
3469
|
hufMetadata->hType = set_rle;
|
|
3100
3470
|
return 0;
|
|
3101
3471
|
}
|
|
3102
3472
|
if (largest <= (srcSize >> 7)+4) {
|
|
3473
|
+
/* heuristic: likely not compressible */
|
|
3103
3474
|
DEBUGLOG(5, "set_basic - no gain");
|
|
3104
3475
|
hufMetadata->hType = set_basic;
|
|
3105
3476
|
return 0;
|
|
3106
|
-
|
|
3107
|
-
}
|
|
3477
|
+
} }
|
|
3108
3478
|
|
|
3109
3479
|
/* Validate the previous Huffman table */
|
|
3110
|
-
if (repeat == HUF_repeat_check
|
|
3480
|
+
if (repeat == HUF_repeat_check
|
|
3481
|
+
&& !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) {
|
|
3111
3482
|
repeat = HUF_repeat_none;
|
|
3112
3483
|
}
|
|
3113
3484
|
|
|
3114
3485
|
/* Build Huffman Tree */
|
|
3115
3486
|
ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable));
|
|
3116
|
-
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
|
|
3487
|
+
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue, nodeWksp, nodeWkspSize, nextHuf->CTable, countWksp, hufFlags);
|
|
3488
|
+
assert(huffLog <= LitHufLog);
|
|
3117
3489
|
{ size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp,
|
|
3118
3490
|
maxSymbolValue, huffLog,
|
|
3119
3491
|
nodeWksp, nodeWkspSize);
|
|
3120
3492
|
FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp");
|
|
3121
3493
|
huffLog = (U32)maxBits;
|
|
3122
|
-
|
|
3123
|
-
|
|
3124
|
-
|
|
3125
|
-
|
|
3126
|
-
|
|
3127
|
-
|
|
3128
|
-
|
|
3129
|
-
|
|
3130
|
-
|
|
3131
|
-
|
|
3132
|
-
|
|
3133
|
-
|
|
3134
|
-
|
|
3135
|
-
|
|
3136
|
-
hufMetadata->hType = set_repeat;
|
|
3137
|
-
return 0;
|
|
3138
|
-
}
|
|
3139
|
-
}
|
|
3140
|
-
if (newCSize + hSize >= srcSize) {
|
|
3141
|
-
DEBUGLOG(5, "set_basic - no gains");
|
|
3494
|
+
}
|
|
3495
|
+
{ /* Build and write the CTable */
|
|
3496
|
+
size_t const newCSize = HUF_estimateCompressedSize(
|
|
3497
|
+
(HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue);
|
|
3498
|
+
size_t const hSize = HUF_writeCTable_wksp(
|
|
3499
|
+
hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer),
|
|
3500
|
+
(HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog,
|
|
3501
|
+
nodeWksp, nodeWkspSize);
|
|
3502
|
+
/* Check against repeating the previous CTable */
|
|
3503
|
+
if (repeat != HUF_repeat_none) {
|
|
3504
|
+
size_t const oldCSize = HUF_estimateCompressedSize(
|
|
3505
|
+
(HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue);
|
|
3506
|
+
if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) {
|
|
3507
|
+
DEBUGLOG(5, "set_repeat - smaller");
|
|
3142
3508
|
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
|
3143
|
-
hufMetadata->hType =
|
|
3509
|
+
hufMetadata->hType = set_repeat;
|
|
3144
3510
|
return 0;
|
|
3145
|
-
|
|
3146
|
-
|
|
3147
|
-
|
|
3148
|
-
nextHuf
|
|
3149
|
-
|
|
3511
|
+
} }
|
|
3512
|
+
if (newCSize + hSize >= srcSize) {
|
|
3513
|
+
DEBUGLOG(5, "set_basic - no gains");
|
|
3514
|
+
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
|
3515
|
+
hufMetadata->hType = set_basic;
|
|
3516
|
+
return 0;
|
|
3150
3517
|
}
|
|
3518
|
+
DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize);
|
|
3519
|
+
hufMetadata->hType = set_compressed;
|
|
3520
|
+
nextHuf->repeatMode = HUF_repeat_check;
|
|
3521
|
+
return hSize;
|
|
3151
3522
|
}
|
|
3152
3523
|
}
|
|
3153
3524
|
|
|
@@ -3157,8 +3528,9 @@ static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSi
|
|
|
3157
3528
|
* and updates nextEntropy to the appropriate repeatMode.
|
|
3158
3529
|
*/
|
|
3159
3530
|
static ZSTD_symbolEncodingTypeStats_t
|
|
3160
|
-
ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy)
|
|
3161
|
-
|
|
3531
|
+
ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy)
|
|
3532
|
+
{
|
|
3533
|
+
ZSTD_symbolEncodingTypeStats_t stats = {set_basic, set_basic, set_basic, 0, 0, 0};
|
|
3162
3534
|
nextEntropy->litlength_repeatMode = FSE_repeat_none;
|
|
3163
3535
|
nextEntropy->offcode_repeatMode = FSE_repeat_none;
|
|
3164
3536
|
nextEntropy->matchlength_repeatMode = FSE_repeat_none;
|
|
@@ -3169,16 +3541,18 @@ ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy) {
|
|
|
3169
3541
|
* Builds entropy for the sequences.
|
|
3170
3542
|
* Stores symbol compression modes and fse table to fseMetadata.
|
|
3171
3543
|
* Requires ENTROPY_WORKSPACE_SIZE wksp.
|
|
3172
|
-
*
|
|
3173
|
-
static size_t
|
|
3174
|
-
|
|
3175
|
-
|
|
3176
|
-
|
|
3177
|
-
|
|
3178
|
-
|
|
3544
|
+
* @return : size of fse tables or error code */
|
|
3545
|
+
static size_t
|
|
3546
|
+
ZSTD_buildBlockEntropyStats_sequences(
|
|
3547
|
+
const seqStore_t* seqStorePtr,
|
|
3548
|
+
const ZSTD_fseCTables_t* prevEntropy,
|
|
3549
|
+
ZSTD_fseCTables_t* nextEntropy,
|
|
3550
|
+
const ZSTD_CCtx_params* cctxParams,
|
|
3551
|
+
ZSTD_fseCTablesMetadata_t* fseMetadata,
|
|
3552
|
+
void* workspace, size_t wkspSize)
|
|
3179
3553
|
{
|
|
3180
3554
|
ZSTD_strategy const strategy = cctxParams->cParams.strategy;
|
|
3181
|
-
size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
|
|
3555
|
+
size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
|
|
3182
3556
|
BYTE* const ostart = fseMetadata->fseTablesBuffer;
|
|
3183
3557
|
BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer);
|
|
3184
3558
|
BYTE* op = ostart;
|
|
@@ -3205,23 +3579,28 @@ static size_t ZSTD_buildBlockEntropyStats_sequences(seqStore_t* seqStorePtr,
|
|
|
3205
3579
|
/** ZSTD_buildBlockEntropyStats() :
|
|
3206
3580
|
* Builds entropy for the block.
|
|
3207
3581
|
* Requires workspace size ENTROPY_WORKSPACE_SIZE
|
|
3208
|
-
*
|
|
3209
|
-
*
|
|
3582
|
+
* @return : 0 on success, or an error code
|
|
3583
|
+
* Note : also employed in superblock
|
|
3210
3584
|
*/
|
|
3211
|
-
size_t ZSTD_buildBlockEntropyStats(
|
|
3212
|
-
|
|
3213
|
-
|
|
3214
|
-
|
|
3215
|
-
|
|
3216
|
-
|
|
3217
|
-
|
|
3218
|
-
|
|
3585
|
+
size_t ZSTD_buildBlockEntropyStats(
|
|
3586
|
+
const seqStore_t* seqStorePtr,
|
|
3587
|
+
const ZSTD_entropyCTables_t* prevEntropy,
|
|
3588
|
+
ZSTD_entropyCTables_t* nextEntropy,
|
|
3589
|
+
const ZSTD_CCtx_params* cctxParams,
|
|
3590
|
+
ZSTD_entropyCTablesMetadata_t* entropyMetadata,
|
|
3591
|
+
void* workspace, size_t wkspSize)
|
|
3592
|
+
{
|
|
3593
|
+
size_t const litSize = (size_t)(seqStorePtr->lit - seqStorePtr->litStart);
|
|
3594
|
+
int const huf_useOptDepth = (cctxParams->cParams.strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD);
|
|
3595
|
+
int const hufFlags = huf_useOptDepth ? HUF_flags_optimalDepth : 0;
|
|
3596
|
+
|
|
3219
3597
|
entropyMetadata->hufMetadata.hufDesSize =
|
|
3220
3598
|
ZSTD_buildBlockEntropyStats_literals(seqStorePtr->litStart, litSize,
|
|
3221
3599
|
&prevEntropy->huf, &nextEntropy->huf,
|
|
3222
3600
|
&entropyMetadata->hufMetadata,
|
|
3223
3601
|
ZSTD_literalsCompressionIsDisabled(cctxParams),
|
|
3224
|
-
workspace, wkspSize);
|
|
3602
|
+
workspace, wkspSize, hufFlags);
|
|
3603
|
+
|
|
3225
3604
|
FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildBlockEntropyStats_literals failed");
|
|
3226
3605
|
entropyMetadata->fseMetadata.fseTablesSize =
|
|
3227
3606
|
ZSTD_buildBlockEntropyStats_sequences(seqStorePtr,
|
|
@@ -3234,11 +3613,12 @@ size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
|
|
|
3234
3613
|
}
|
|
3235
3614
|
|
|
3236
3615
|
/* Returns the size estimate for the literals section (header + content) of a block */
|
|
3237
|
-
static size_t
|
|
3238
|
-
|
|
3239
|
-
|
|
3240
|
-
|
|
3241
|
-
|
|
3616
|
+
static size_t
|
|
3617
|
+
ZSTD_estimateBlockSize_literal(const BYTE* literals, size_t litSize,
|
|
3618
|
+
const ZSTD_hufCTables_t* huf,
|
|
3619
|
+
const ZSTD_hufCTablesMetadata_t* hufMetadata,
|
|
3620
|
+
void* workspace, size_t wkspSize,
|
|
3621
|
+
int writeEntropy)
|
|
3242
3622
|
{
|
|
3243
3623
|
unsigned* const countWksp = (unsigned*)workspace;
|
|
3244
3624
|
unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX;
|
|
@@ -3260,12 +3640,13 @@ static size_t ZSTD_estimateBlockSize_literal(const BYTE* literals, size_t litSiz
|
|
|
3260
3640
|
}
|
|
3261
3641
|
|
|
3262
3642
|
/* Returns the size estimate for the FSE-compressed symbols (of, ml, ll) of a block */
|
|
3263
|
-
static size_t
|
|
3264
|
-
|
|
3265
|
-
|
|
3266
|
-
|
|
3267
|
-
|
|
3268
|
-
|
|
3643
|
+
static size_t
|
|
3644
|
+
ZSTD_estimateBlockSize_symbolType(symbolEncodingType_e type,
|
|
3645
|
+
const BYTE* codeTable, size_t nbSeq, unsigned maxCode,
|
|
3646
|
+
const FSE_CTable* fseCTable,
|
|
3647
|
+
const U8* additionalBits,
|
|
3648
|
+
short const* defaultNorm, U32 defaultNormLog, U32 defaultMax,
|
|
3649
|
+
void* workspace, size_t wkspSize)
|
|
3269
3650
|
{
|
|
3270
3651
|
unsigned* const countWksp = (unsigned*)workspace;
|
|
3271
3652
|
const BYTE* ctp = codeTable;
|
|
@@ -3297,99 +3678,107 @@ static size_t ZSTD_estimateBlockSize_symbolType(symbolEncodingType_e type,
|
|
|
3297
3678
|
}
|
|
3298
3679
|
|
|
3299
3680
|
/* Returns the size estimate for the sequences section (header + content) of a block */
|
|
3300
|
-
static size_t
|
|
3301
|
-
|
|
3302
|
-
|
|
3303
|
-
|
|
3304
|
-
|
|
3305
|
-
|
|
3306
|
-
|
|
3307
|
-
|
|
3681
|
+
static size_t
|
|
3682
|
+
ZSTD_estimateBlockSize_sequences(const BYTE* ofCodeTable,
|
|
3683
|
+
const BYTE* llCodeTable,
|
|
3684
|
+
const BYTE* mlCodeTable,
|
|
3685
|
+
size_t nbSeq,
|
|
3686
|
+
const ZSTD_fseCTables_t* fseTables,
|
|
3687
|
+
const ZSTD_fseCTablesMetadata_t* fseMetadata,
|
|
3688
|
+
void* workspace, size_t wkspSize,
|
|
3689
|
+
int writeEntropy)
|
|
3308
3690
|
{
|
|
3309
3691
|
size_t sequencesSectionHeaderSize = 1 /* seqHead */ + 1 /* min seqSize size */ + (nbSeq >= 128) + (nbSeq >= LONGNBSEQ);
|
|
3310
3692
|
size_t cSeqSizeEstimate = 0;
|
|
3311
3693
|
cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, nbSeq, MaxOff,
|
|
3312
|
-
|
|
3313
|
-
|
|
3314
|
-
|
|
3694
|
+
fseTables->offcodeCTable, NULL,
|
|
3695
|
+
OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
|
|
3696
|
+
workspace, wkspSize);
|
|
3315
3697
|
cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->llType, llCodeTable, nbSeq, MaxLL,
|
|
3316
|
-
|
|
3317
|
-
|
|
3318
|
-
|
|
3698
|
+
fseTables->litlengthCTable, LL_bits,
|
|
3699
|
+
LL_defaultNorm, LL_defaultNormLog, MaxLL,
|
|
3700
|
+
workspace, wkspSize);
|
|
3319
3701
|
cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, nbSeq, MaxML,
|
|
3320
|
-
|
|
3321
|
-
|
|
3322
|
-
|
|
3702
|
+
fseTables->matchlengthCTable, ML_bits,
|
|
3703
|
+
ML_defaultNorm, ML_defaultNormLog, MaxML,
|
|
3704
|
+
workspace, wkspSize);
|
|
3323
3705
|
if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize;
|
|
3324
3706
|
return cSeqSizeEstimate + sequencesSectionHeaderSize;
|
|
3325
3707
|
}
|
|
3326
3708
|
|
|
3327
3709
|
/* Returns the size estimate for a given stream of literals, of, ll, ml */
|
|
3328
|
-
static size_t
|
|
3329
|
-
|
|
3330
|
-
|
|
3331
|
-
|
|
3332
|
-
|
|
3333
|
-
|
|
3334
|
-
|
|
3335
|
-
|
|
3336
|
-
|
|
3710
|
+
static size_t
|
|
3711
|
+
ZSTD_estimateBlockSize(const BYTE* literals, size_t litSize,
|
|
3712
|
+
const BYTE* ofCodeTable,
|
|
3713
|
+
const BYTE* llCodeTable,
|
|
3714
|
+
const BYTE* mlCodeTable,
|
|
3715
|
+
size_t nbSeq,
|
|
3716
|
+
const ZSTD_entropyCTables_t* entropy,
|
|
3717
|
+
const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
|
|
3718
|
+
void* workspace, size_t wkspSize,
|
|
3719
|
+
int writeLitEntropy, int writeSeqEntropy)
|
|
3720
|
+
{
|
|
3337
3721
|
size_t const literalsSize = ZSTD_estimateBlockSize_literal(literals, litSize,
|
|
3338
|
-
|
|
3339
|
-
|
|
3722
|
+
&entropy->huf, &entropyMetadata->hufMetadata,
|
|
3723
|
+
workspace, wkspSize, writeLitEntropy);
|
|
3340
3724
|
size_t const seqSize = ZSTD_estimateBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
|
|
3341
|
-
|
|
3342
|
-
|
|
3725
|
+
nbSeq, &entropy->fse, &entropyMetadata->fseMetadata,
|
|
3726
|
+
workspace, wkspSize, writeSeqEntropy);
|
|
3343
3727
|
return seqSize + literalsSize + ZSTD_blockHeaderSize;
|
|
3344
3728
|
}
|
|
3345
3729
|
|
|
3346
3730
|
/* Builds entropy statistics and uses them for blocksize estimation.
|
|
3347
3731
|
*
|
|
3348
|
-
*
|
|
3732
|
+
* @return: estimated compressed size of the seqStore, or a zstd error.
|
|
3349
3733
|
*/
|
|
3350
|
-
static size_t
|
|
3351
|
-
|
|
3734
|
+
static size_t
|
|
3735
|
+
ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore_t* seqStore, ZSTD_CCtx* zc)
|
|
3736
|
+
{
|
|
3737
|
+
ZSTD_entropyCTablesMetadata_t* const entropyMetadata = &zc->blockSplitCtx.entropyMetadata;
|
|
3352
3738
|
DEBUGLOG(6, "ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize()");
|
|
3353
3739
|
FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(seqStore,
|
|
3354
3740
|
&zc->blockState.prevCBlock->entropy,
|
|
3355
3741
|
&zc->blockState.nextCBlock->entropy,
|
|
3356
3742
|
&zc->appliedParams,
|
|
3357
3743
|
entropyMetadata,
|
|
3358
|
-
zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE
|
|
3359
|
-
return ZSTD_estimateBlockSize(
|
|
3744
|
+
zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE), "");
|
|
3745
|
+
return ZSTD_estimateBlockSize(
|
|
3746
|
+
seqStore->litStart, (size_t)(seqStore->lit - seqStore->litStart),
|
|
3360
3747
|
seqStore->ofCode, seqStore->llCode, seqStore->mlCode,
|
|
3361
3748
|
(size_t)(seqStore->sequences - seqStore->sequencesStart),
|
|
3362
|
-
&zc->blockState.nextCBlock->entropy,
|
|
3749
|
+
&zc->blockState.nextCBlock->entropy,
|
|
3750
|
+
entropyMetadata,
|
|
3751
|
+
zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE,
|
|
3363
3752
|
(int)(entropyMetadata->hufMetadata.hType == set_compressed), 1);
|
|
3364
3753
|
}
|
|
3365
3754
|
|
|
3366
3755
|
/* Returns literals bytes represented in a seqStore */
|
|
3367
|
-
static size_t ZSTD_countSeqStoreLiteralsBytes(const seqStore_t* const seqStore)
|
|
3756
|
+
static size_t ZSTD_countSeqStoreLiteralsBytes(const seqStore_t* const seqStore)
|
|
3757
|
+
{
|
|
3368
3758
|
size_t literalsBytes = 0;
|
|
3369
|
-
size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart;
|
|
3759
|
+
size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart);
|
|
3370
3760
|
size_t i;
|
|
3371
3761
|
for (i = 0; i < nbSeqs; ++i) {
|
|
3372
|
-
seqDef seq = seqStore->sequencesStart[i];
|
|
3762
|
+
seqDef const seq = seqStore->sequencesStart[i];
|
|
3373
3763
|
literalsBytes += seq.litLength;
|
|
3374
3764
|
if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_literalLength) {
|
|
3375
3765
|
literalsBytes += 0x10000;
|
|
3376
|
-
|
|
3377
|
-
}
|
|
3766
|
+
} }
|
|
3378
3767
|
return literalsBytes;
|
|
3379
3768
|
}
|
|
3380
3769
|
|
|
3381
3770
|
/* Returns match bytes represented in a seqStore */
|
|
3382
|
-
static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* const seqStore)
|
|
3771
|
+
static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* const seqStore)
|
|
3772
|
+
{
|
|
3383
3773
|
size_t matchBytes = 0;
|
|
3384
|
-
size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart;
|
|
3774
|
+
size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart);
|
|
3385
3775
|
size_t i;
|
|
3386
3776
|
for (i = 0; i < nbSeqs; ++i) {
|
|
3387
3777
|
seqDef seq = seqStore->sequencesStart[i];
|
|
3388
3778
|
matchBytes += seq.mlBase + MINMATCH;
|
|
3389
3779
|
if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_matchLength) {
|
|
3390
3780
|
matchBytes += 0x10000;
|
|
3391
|
-
|
|
3392
|
-
}
|
|
3781
|
+
} }
|
|
3393
3782
|
return matchBytes;
|
|
3394
3783
|
}
|
|
3395
3784
|
|
|
@@ -3398,15 +3787,12 @@ static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* const seqStore) {
|
|
|
3398
3787
|
*/
|
|
3399
3788
|
static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore,
|
|
3400
3789
|
const seqStore_t* originalSeqStore,
|
|
3401
|
-
size_t startIdx, size_t endIdx)
|
|
3402
|
-
|
|
3403
|
-
size_t literalsBytes;
|
|
3404
|
-
size_t literalsBytesPreceding = 0;
|
|
3405
|
-
|
|
3790
|
+
size_t startIdx, size_t endIdx)
|
|
3791
|
+
{
|
|
3406
3792
|
*resultSeqStore = *originalSeqStore;
|
|
3407
3793
|
if (startIdx > 0) {
|
|
3408
3794
|
resultSeqStore->sequences = originalSeqStore->sequencesStart + startIdx;
|
|
3409
|
-
|
|
3795
|
+
resultSeqStore->litStart += ZSTD_countSeqStoreLiteralsBytes(resultSeqStore);
|
|
3410
3796
|
}
|
|
3411
3797
|
|
|
3412
3798
|
/* Move longLengthPos into the correct position if necessary */
|
|
@@ -3419,13 +3805,12 @@ static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore,
|
|
|
3419
3805
|
}
|
|
3420
3806
|
resultSeqStore->sequencesStart = originalSeqStore->sequencesStart + startIdx;
|
|
3421
3807
|
resultSeqStore->sequences = originalSeqStore->sequencesStart + endIdx;
|
|
3422
|
-
literalsBytes = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore);
|
|
3423
|
-
resultSeqStore->litStart += literalsBytesPreceding;
|
|
3424
3808
|
if (endIdx == (size_t)(originalSeqStore->sequences - originalSeqStore->sequencesStart)) {
|
|
3425
3809
|
/* This accounts for possible last literals if the derived chunk reaches the end of the block */
|
|
3426
|
-
resultSeqStore->lit
|
|
3810
|
+
assert(resultSeqStore->lit == originalSeqStore->lit);
|
|
3427
3811
|
} else {
|
|
3428
|
-
|
|
3812
|
+
size_t const literalsBytes = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore);
|
|
3813
|
+
resultSeqStore->lit = resultSeqStore->litStart + literalsBytes;
|
|
3429
3814
|
}
|
|
3430
3815
|
resultSeqStore->llCode += startIdx;
|
|
3431
3816
|
resultSeqStore->mlCode += startIdx;
|
|
@@ -3433,20 +3818,26 @@ static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore,
|
|
|
3433
3818
|
}
|
|
3434
3819
|
|
|
3435
3820
|
/**
|
|
3436
|
-
* Returns the raw offset represented by the combination of
|
|
3437
|
-
*
|
|
3821
|
+
* Returns the raw offset represented by the combination of offBase, ll0, and repcode history.
|
|
3822
|
+
* offBase must represent a repcode in the numeric representation of ZSTD_storeSeq().
|
|
3438
3823
|
*/
|
|
3439
3824
|
static U32
|
|
3440
|
-
ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32
|
|
3441
|
-
{
|
|
3442
|
-
U32 const
|
|
3443
|
-
assert(
|
|
3444
|
-
if (
|
|
3445
|
-
|
|
3446
|
-
|
|
3825
|
+
ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32 offBase, const U32 ll0)
|
|
3826
|
+
{
|
|
3827
|
+
U32 const adjustedRepCode = OFFBASE_TO_REPCODE(offBase) - 1 + ll0; /* [ 0 - 3 ] */
|
|
3828
|
+
assert(OFFBASE_IS_REPCODE(offBase));
|
|
3829
|
+
if (adjustedRepCode == ZSTD_REP_NUM) {
|
|
3830
|
+
assert(ll0);
|
|
3831
|
+
/* litlength == 0 and offCode == 2 implies selection of first repcode - 1
|
|
3832
|
+
* This is only valid if it results in a valid offset value, aka > 0.
|
|
3833
|
+
* Note : it may happen that `rep[0]==1` in exceptional circumstances.
|
|
3834
|
+
* In which case this function will return 0, which is an invalid offset.
|
|
3835
|
+
* It's not an issue though, since this value will be
|
|
3836
|
+
* compared and discarded within ZSTD_seqStore_resolveOffCodes().
|
|
3837
|
+
*/
|
|
3447
3838
|
return rep[0] - 1;
|
|
3448
3839
|
}
|
|
3449
|
-
return rep[
|
|
3840
|
+
return rep[adjustedRepCode];
|
|
3450
3841
|
}
|
|
3451
3842
|
|
|
3452
3843
|
/**
|
|
@@ -3462,30 +3853,32 @@ ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32 offCode, c
|
|
|
3462
3853
|
* 1-3 : repcode 1-3
|
|
3463
3854
|
* 4+ : real_offset+3
|
|
3464
3855
|
*/
|
|
3465
|
-
static void
|
|
3466
|
-
|
|
3856
|
+
static void
|
|
3857
|
+
ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_t* const cRepcodes,
|
|
3858
|
+
const seqStore_t* const seqStore, U32 const nbSeq)
|
|
3859
|
+
{
|
|
3467
3860
|
U32 idx = 0;
|
|
3468
3861
|
for (; idx < nbSeq; ++idx) {
|
|
3469
3862
|
seqDef* const seq = seqStore->sequencesStart + idx;
|
|
3470
3863
|
U32 const ll0 = (seq->litLength == 0);
|
|
3471
|
-
U32 const
|
|
3472
|
-
assert(
|
|
3473
|
-
if (
|
|
3474
|
-
U32 const dRawOffset = ZSTD_resolveRepcodeToRawOffset(dRepcodes->rep,
|
|
3475
|
-
U32 const cRawOffset = ZSTD_resolveRepcodeToRawOffset(cRepcodes->rep,
|
|
3864
|
+
U32 const offBase = seq->offBase;
|
|
3865
|
+
assert(offBase > 0);
|
|
3866
|
+
if (OFFBASE_IS_REPCODE(offBase)) {
|
|
3867
|
+
U32 const dRawOffset = ZSTD_resolveRepcodeToRawOffset(dRepcodes->rep, offBase, ll0);
|
|
3868
|
+
U32 const cRawOffset = ZSTD_resolveRepcodeToRawOffset(cRepcodes->rep, offBase, ll0);
|
|
3476
3869
|
/* Adjust simulated decompression repcode history if we come across a mismatch. Replace
|
|
3477
3870
|
* the repcode with the offset it actually references, determined by the compression
|
|
3478
3871
|
* repcode history.
|
|
3479
3872
|
*/
|
|
3480
3873
|
if (dRawOffset != cRawOffset) {
|
|
3481
|
-
seq->offBase = cRawOffset
|
|
3874
|
+
seq->offBase = OFFSET_TO_OFFBASE(cRawOffset);
|
|
3482
3875
|
}
|
|
3483
3876
|
}
|
|
3484
3877
|
/* Compression repcode history is always updated with values directly from the unmodified seqStore.
|
|
3485
3878
|
* Decompression repcode history may use modified seq->offset value taken from compression repcode history.
|
|
3486
3879
|
*/
|
|
3487
|
-
ZSTD_updateRep(dRepcodes->rep,
|
|
3488
|
-
ZSTD_updateRep(cRepcodes->rep,
|
|
3880
|
+
ZSTD_updateRep(dRepcodes->rep, seq->offBase, ll0);
|
|
3881
|
+
ZSTD_updateRep(cRepcodes->rep, offBase, ll0);
|
|
3489
3882
|
}
|
|
3490
3883
|
}
|
|
3491
3884
|
|
|
@@ -3495,10 +3888,11 @@ static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_
|
|
|
3495
3888
|
* Returns the total size of that block (including header) or a ZSTD error code.
|
|
3496
3889
|
*/
|
|
3497
3890
|
static size_t
|
|
3498
|
-
ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc,
|
|
3891
|
+
ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc,
|
|
3892
|
+
const seqStore_t* const seqStore,
|
|
3499
3893
|
repcodes_t* const dRep, repcodes_t* const cRep,
|
|
3500
3894
|
void* dst, size_t dstCapacity,
|
|
3501
|
-
|
|
3895
|
+
const void* src, size_t srcSize,
|
|
3502
3896
|
U32 lastBlock, U32 isPartition)
|
|
3503
3897
|
{
|
|
3504
3898
|
const U32 rleMaxLength = 25;
|
|
@@ -3572,45 +3966,49 @@ typedef struct {
|
|
|
3572
3966
|
|
|
3573
3967
|
/* Helper function to perform the recursive search for block splits.
|
|
3574
3968
|
* Estimates the cost of seqStore prior to split, and estimates the cost of splitting the sequences in half.
|
|
3575
|
-
* If advantageous to split, then we recurse down the two sub-blocks.
|
|
3576
|
-
* we do not recurse.
|
|
3969
|
+
* If advantageous to split, then we recurse down the two sub-blocks.
|
|
3970
|
+
* If not, or if an error occurred in estimation, then we do not recurse.
|
|
3577
3971
|
*
|
|
3578
|
-
* Note: The recursion depth is capped by a heuristic minimum number of sequences,
|
|
3972
|
+
* Note: The recursion depth is capped by a heuristic minimum number of sequences,
|
|
3973
|
+
* defined by MIN_SEQUENCES_BLOCK_SPLITTING.
|
|
3579
3974
|
* In theory, this means the absolute largest recursion depth is 10 == log2(maxNbSeqInBlock/MIN_SEQUENCES_BLOCK_SPLITTING).
|
|
3580
3975
|
* In practice, recursion depth usually doesn't go beyond 4.
|
|
3581
3976
|
*
|
|
3582
|
-
* Furthermore, the number of splits is capped by ZSTD_MAX_NB_BLOCK_SPLITS.
|
|
3977
|
+
* Furthermore, the number of splits is capped by ZSTD_MAX_NB_BLOCK_SPLITS.
|
|
3978
|
+
* At ZSTD_MAX_NB_BLOCK_SPLITS == 196 with the current existing blockSize
|
|
3583
3979
|
* maximum of 128 KB, this value is actually impossible to reach.
|
|
3584
3980
|
*/
|
|
3585
3981
|
static void
|
|
3586
3982
|
ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx, size_t endIdx,
|
|
3587
3983
|
ZSTD_CCtx* zc, const seqStore_t* origSeqStore)
|
|
3588
3984
|
{
|
|
3589
|
-
seqStore_t* fullSeqStoreChunk = &zc->blockSplitCtx.fullSeqStoreChunk;
|
|
3590
|
-
seqStore_t* firstHalfSeqStore = &zc->blockSplitCtx.firstHalfSeqStore;
|
|
3591
|
-
seqStore_t* secondHalfSeqStore = &zc->blockSplitCtx.secondHalfSeqStore;
|
|
3985
|
+
seqStore_t* const fullSeqStoreChunk = &zc->blockSplitCtx.fullSeqStoreChunk;
|
|
3986
|
+
seqStore_t* const firstHalfSeqStore = &zc->blockSplitCtx.firstHalfSeqStore;
|
|
3987
|
+
seqStore_t* const secondHalfSeqStore = &zc->blockSplitCtx.secondHalfSeqStore;
|
|
3592
3988
|
size_t estimatedOriginalSize;
|
|
3593
3989
|
size_t estimatedFirstHalfSize;
|
|
3594
3990
|
size_t estimatedSecondHalfSize;
|
|
3595
3991
|
size_t midIdx = (startIdx + endIdx)/2;
|
|
3596
3992
|
|
|
3993
|
+
DEBUGLOG(5, "ZSTD_deriveBlockSplitsHelper: startIdx=%zu endIdx=%zu", startIdx, endIdx);
|
|
3994
|
+
assert(endIdx >= startIdx);
|
|
3597
3995
|
if (endIdx - startIdx < MIN_SEQUENCES_BLOCK_SPLITTING || splits->idx >= ZSTD_MAX_NB_BLOCK_SPLITS) {
|
|
3598
|
-
DEBUGLOG(6, "ZSTD_deriveBlockSplitsHelper: Too few sequences");
|
|
3996
|
+
DEBUGLOG(6, "ZSTD_deriveBlockSplitsHelper: Too few sequences (%zu)", endIdx - startIdx);
|
|
3599
3997
|
return;
|
|
3600
3998
|
}
|
|
3601
|
-
DEBUGLOG(4, "ZSTD_deriveBlockSplitsHelper: startIdx=%zu endIdx=%zu", startIdx, endIdx);
|
|
3602
3999
|
ZSTD_deriveSeqStoreChunk(fullSeqStoreChunk, origSeqStore, startIdx, endIdx);
|
|
3603
4000
|
ZSTD_deriveSeqStoreChunk(firstHalfSeqStore, origSeqStore, startIdx, midIdx);
|
|
3604
4001
|
ZSTD_deriveSeqStoreChunk(secondHalfSeqStore, origSeqStore, midIdx, endIdx);
|
|
3605
4002
|
estimatedOriginalSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(fullSeqStoreChunk, zc);
|
|
3606
4003
|
estimatedFirstHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(firstHalfSeqStore, zc);
|
|
3607
4004
|
estimatedSecondHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(secondHalfSeqStore, zc);
|
|
3608
|
-
DEBUGLOG(
|
|
4005
|
+
DEBUGLOG(5, "Estimated original block size: %zu -- First half split: %zu -- Second half split: %zu",
|
|
3609
4006
|
estimatedOriginalSize, estimatedFirstHalfSize, estimatedSecondHalfSize);
|
|
3610
4007
|
if (ZSTD_isError(estimatedOriginalSize) || ZSTD_isError(estimatedFirstHalfSize) || ZSTD_isError(estimatedSecondHalfSize)) {
|
|
3611
4008
|
return;
|
|
3612
4009
|
}
|
|
3613
4010
|
if (estimatedFirstHalfSize + estimatedSecondHalfSize < estimatedOriginalSize) {
|
|
4011
|
+
DEBUGLOG(5, "split decided at seqNb:%zu", midIdx);
|
|
3614
4012
|
ZSTD_deriveBlockSplitsHelper(splits, startIdx, midIdx, zc, origSeqStore);
|
|
3615
4013
|
splits->splitLocations[splits->idx] = (U32)midIdx;
|
|
3616
4014
|
splits->idx++;
|
|
@@ -3618,14 +4016,18 @@ ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx, size_t end
|
|
|
3618
4016
|
}
|
|
3619
4017
|
}
|
|
3620
4018
|
|
|
3621
|
-
/* Base recursive function.
|
|
4019
|
+
/* Base recursive function.
|
|
4020
|
+
* Populates a table with intra-block partition indices that can improve compression ratio.
|
|
3622
4021
|
*
|
|
3623
|
-
*
|
|
4022
|
+
* @return: number of splits made (which equals the size of the partition table - 1).
|
|
3624
4023
|
*/
|
|
3625
|
-
static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq)
|
|
3626
|
-
|
|
4024
|
+
static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq)
|
|
4025
|
+
{
|
|
4026
|
+
seqStoreSplits splits;
|
|
4027
|
+
splits.splitLocations = partitions;
|
|
4028
|
+
splits.idx = 0;
|
|
3627
4029
|
if (nbSeq <= 4) {
|
|
3628
|
-
DEBUGLOG(
|
|
4030
|
+
DEBUGLOG(5, "ZSTD_deriveBlockSplits: Too few sequences to split (%u <= 4)", nbSeq);
|
|
3629
4031
|
/* Refuse to try and split anything with less than 4 sequences */
|
|
3630
4032
|
return 0;
|
|
3631
4033
|
}
|
|
@@ -3641,18 +4043,20 @@ static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq)
|
|
|
3641
4043
|
* Returns combined size of all blocks (which includes headers), or a ZSTD error code.
|
|
3642
4044
|
*/
|
|
3643
4045
|
static size_t
|
|
3644
|
-
ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc,
|
|
3645
|
-
|
|
4046
|
+
ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc,
|
|
4047
|
+
void* dst, size_t dstCapacity,
|
|
4048
|
+
const void* src, size_t blockSize,
|
|
4049
|
+
U32 lastBlock, U32 nbSeq)
|
|
3646
4050
|
{
|
|
3647
4051
|
size_t cSize = 0;
|
|
3648
4052
|
const BYTE* ip = (const BYTE*)src;
|
|
3649
4053
|
BYTE* op = (BYTE*)dst;
|
|
3650
4054
|
size_t i = 0;
|
|
3651
4055
|
size_t srcBytesTotal = 0;
|
|
3652
|
-
U32* partitions = zc->blockSplitCtx.partitions; /* size == ZSTD_MAX_NB_BLOCK_SPLITS */
|
|
3653
|
-
seqStore_t* nextSeqStore = &zc->blockSplitCtx.nextSeqStore;
|
|
3654
|
-
seqStore_t* currSeqStore = &zc->blockSplitCtx.currSeqStore;
|
|
3655
|
-
size_t numSplits = ZSTD_deriveBlockSplits(zc, partitions, nbSeq);
|
|
4056
|
+
U32* const partitions = zc->blockSplitCtx.partitions; /* size == ZSTD_MAX_NB_BLOCK_SPLITS */
|
|
4057
|
+
seqStore_t* const nextSeqStore = &zc->blockSplitCtx.nextSeqStore;
|
|
4058
|
+
seqStore_t* const currSeqStore = &zc->blockSplitCtx.currSeqStore;
|
|
4059
|
+
size_t const numSplits = ZSTD_deriveBlockSplits(zc, partitions, nbSeq);
|
|
3656
4060
|
|
|
3657
4061
|
/* If a block is split and some partitions are emitted as RLE/uncompressed, then repcode history
|
|
3658
4062
|
* may become invalid. In order to reconcile potentially invalid repcodes, we keep track of two
|
|
@@ -3674,30 +4078,31 @@ ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapac
|
|
|
3674
4078
|
ZSTD_memcpy(cRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
|
|
3675
4079
|
ZSTD_memset(nextSeqStore, 0, sizeof(seqStore_t));
|
|
3676
4080
|
|
|
3677
|
-
DEBUGLOG(
|
|
4081
|
+
DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
|
|
3678
4082
|
(unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit,
|
|
3679
4083
|
(unsigned)zc->blockState.matchState.nextToUpdate);
|
|
3680
4084
|
|
|
3681
4085
|
if (numSplits == 0) {
|
|
3682
|
-
size_t cSizeSingleBlock =
|
|
3683
|
-
|
|
3684
|
-
|
|
3685
|
-
|
|
3686
|
-
|
|
4086
|
+
size_t cSizeSingleBlock =
|
|
4087
|
+
ZSTD_compressSeqStore_singleBlock(zc, &zc->seqStore,
|
|
4088
|
+
&dRep, &cRep,
|
|
4089
|
+
op, dstCapacity,
|
|
4090
|
+
ip, blockSize,
|
|
4091
|
+
lastBlock, 0 /* isPartition */);
|
|
3687
4092
|
FORWARD_IF_ERROR(cSizeSingleBlock, "Compressing single block from splitBlock_internal() failed!");
|
|
3688
4093
|
DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal: No splits");
|
|
3689
|
-
assert(
|
|
4094
|
+
assert(zc->blockSize <= ZSTD_BLOCKSIZE_MAX);
|
|
4095
|
+
assert(cSizeSingleBlock <= zc->blockSize + ZSTD_blockHeaderSize);
|
|
3690
4096
|
return cSizeSingleBlock;
|
|
3691
4097
|
}
|
|
3692
4098
|
|
|
3693
4099
|
ZSTD_deriveSeqStoreChunk(currSeqStore, &zc->seqStore, 0, partitions[0]);
|
|
3694
4100
|
for (i = 0; i <= numSplits; ++i) {
|
|
3695
|
-
size_t srcBytes;
|
|
3696
4101
|
size_t cSizeChunk;
|
|
3697
4102
|
U32 const lastPartition = (i == numSplits);
|
|
3698
4103
|
U32 lastBlockEntireSrc = 0;
|
|
3699
4104
|
|
|
3700
|
-
srcBytes = ZSTD_countSeqStoreLiteralsBytes(currSeqStore) + ZSTD_countSeqStoreMatchBytes(currSeqStore);
|
|
4105
|
+
size_t srcBytes = ZSTD_countSeqStoreLiteralsBytes(currSeqStore) + ZSTD_countSeqStoreMatchBytes(currSeqStore);
|
|
3701
4106
|
srcBytesTotal += srcBytes;
|
|
3702
4107
|
if (lastPartition) {
|
|
3703
4108
|
/* This is the final partition, need to account for possible last literals */
|
|
@@ -3712,7 +4117,8 @@ ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapac
|
|
|
3712
4117
|
op, dstCapacity,
|
|
3713
4118
|
ip, srcBytes,
|
|
3714
4119
|
lastBlockEntireSrc, 1 /* isPartition */);
|
|
3715
|
-
DEBUGLOG(5, "Estimated size: %zu
|
|
4120
|
+
DEBUGLOG(5, "Estimated size: %zu vs %zu : actual size",
|
|
4121
|
+
ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(currSeqStore, zc), cSizeChunk);
|
|
3716
4122
|
FORWARD_IF_ERROR(cSizeChunk, "Compressing chunk failed!");
|
|
3717
4123
|
|
|
3718
4124
|
ip += srcBytes;
|
|
@@ -3720,10 +4126,10 @@ ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapac
|
|
|
3720
4126
|
dstCapacity -= cSizeChunk;
|
|
3721
4127
|
cSize += cSizeChunk;
|
|
3722
4128
|
*currSeqStore = *nextSeqStore;
|
|
3723
|
-
assert(cSizeChunk <=
|
|
4129
|
+
assert(cSizeChunk <= zc->blockSize + ZSTD_blockHeaderSize);
|
|
3724
4130
|
}
|
|
3725
|
-
/* cRep and dRep may have diverged during the compression.
|
|
3726
|
-
* for the next block.
|
|
4131
|
+
/* cRep and dRep may have diverged during the compression.
|
|
4132
|
+
* If so, we use the dRep repcodes for the next block.
|
|
3727
4133
|
*/
|
|
3728
4134
|
ZSTD_memcpy(zc->blockState.prevCBlock->rep, dRep.rep, sizeof(repcodes_t));
|
|
3729
4135
|
return cSize;
|
|
@@ -3734,8 +4140,6 @@ ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc,
|
|
|
3734
4140
|
void* dst, size_t dstCapacity,
|
|
3735
4141
|
const void* src, size_t srcSize, U32 lastBlock)
|
|
3736
4142
|
{
|
|
3737
|
-
const BYTE* ip = (const BYTE*)src;
|
|
3738
|
-
BYTE* op = (BYTE*)dst;
|
|
3739
4143
|
U32 nbSeq;
|
|
3740
4144
|
size_t cSize;
|
|
3741
4145
|
DEBUGLOG(4, "ZSTD_compressBlock_splitBlock");
|
|
@@ -3746,7 +4150,7 @@ ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc,
|
|
|
3746
4150
|
if (bss == ZSTDbss_noCompress) {
|
|
3747
4151
|
if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
|
|
3748
4152
|
zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
|
|
3749
|
-
cSize = ZSTD_noCompressBlock(
|
|
4153
|
+
cSize = ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock);
|
|
3750
4154
|
FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
|
|
3751
4155
|
DEBUGLOG(4, "ZSTD_compressBlock_splitBlock: Nocompress block");
|
|
3752
4156
|
return cSize;
|
|
@@ -3764,9 +4168,9 @@ ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
|
|
|
3764
4168
|
void* dst, size_t dstCapacity,
|
|
3765
4169
|
const void* src, size_t srcSize, U32 frame)
|
|
3766
4170
|
{
|
|
3767
|
-
/* This
|
|
3768
|
-
* This isn't the actual upper bound.
|
|
3769
|
-
* needs further investigation.
|
|
4171
|
+
/* This is an estimated upper bound for the length of an rle block.
|
|
4172
|
+
* This isn't the actual upper bound.
|
|
4173
|
+
* Finding the real threshold needs further investigation.
|
|
3770
4174
|
*/
|
|
3771
4175
|
const U32 rleMaxLength = 25;
|
|
3772
4176
|
size_t cSize;
|
|
@@ -3858,10 +4262,11 @@ static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc,
|
|
|
3858
4262
|
* * cSize >= blockBound(srcSize): We have expanded the block too much so
|
|
3859
4263
|
* emit an uncompressed block.
|
|
3860
4264
|
*/
|
|
3861
|
-
{
|
|
3862
|
-
|
|
4265
|
+
{ size_t const cSize =
|
|
4266
|
+
ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock);
|
|
3863
4267
|
if (cSize != ERROR(dstSize_tooSmall)) {
|
|
3864
|
-
size_t const maxCSize =
|
|
4268
|
+
size_t const maxCSize =
|
|
4269
|
+
srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy);
|
|
3865
4270
|
FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed");
|
|
3866
4271
|
if (cSize != 0 && cSize < maxCSize + ZSTD_blockHeaderSize) {
|
|
3867
4272
|
ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
|
|
@@ -3869,7 +4274,7 @@ static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc,
|
|
|
3869
4274
|
}
|
|
3870
4275
|
}
|
|
3871
4276
|
}
|
|
3872
|
-
}
|
|
4277
|
+
} /* if (bss == ZSTDbss_compress)*/
|
|
3873
4278
|
|
|
3874
4279
|
DEBUGLOG(6, "Resorting to ZSTD_noCompressBlock()");
|
|
3875
4280
|
/* Superblock compression failed, attempt to emit a single no compress block.
|
|
@@ -3927,7 +4332,7 @@ static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms,
|
|
|
3927
4332
|
* All blocks will be terminated, all input will be consumed.
|
|
3928
4333
|
* Function will issue an error if there is not enough `dstCapacity` to hold the compressed content.
|
|
3929
4334
|
* Frame is supposed already started (header already produced)
|
|
3930
|
-
*
|
|
4335
|
+
* @return : compressed size, or an error code
|
|
3931
4336
|
*/
|
|
3932
4337
|
static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx,
|
|
3933
4338
|
void* dst, size_t dstCapacity,
|
|
@@ -3951,7 +4356,9 @@ static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx,
|
|
|
3951
4356
|
ZSTD_matchState_t* const ms = &cctx->blockState.matchState;
|
|
3952
4357
|
U32 const lastBlock = lastFrameChunk & (blockSize >= remaining);
|
|
3953
4358
|
|
|
3954
|
-
|
|
4359
|
+
/* TODO: See 3090. We reduced MIN_CBLOCK_SIZE from 3 to 2 so to compensate we are adding
|
|
4360
|
+
* additional 1. We need to revisit and change this logic to be more consistent */
|
|
4361
|
+
RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE + 1,
|
|
3955
4362
|
dstSize_tooSmall,
|
|
3956
4363
|
"not enough space to store compressed block");
|
|
3957
4364
|
if (remaining < blockSize) blockSize = remaining;
|
|
@@ -3990,7 +4397,7 @@ static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx,
|
|
|
3990
4397
|
MEM_writeLE24(op, cBlockHeader);
|
|
3991
4398
|
cSize += ZSTD_blockHeaderSize;
|
|
3992
4399
|
}
|
|
3993
|
-
}
|
|
4400
|
+
} /* if (ZSTD_useTargetCBlockSize(&cctx->appliedParams))*/
|
|
3994
4401
|
|
|
3995
4402
|
|
|
3996
4403
|
ip += blockSize;
|
|
@@ -4182,7 +4589,7 @@ size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx)
|
|
|
4182
4589
|
{
|
|
4183
4590
|
ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams;
|
|
4184
4591
|
assert(!ZSTD_checkCParams(cParams));
|
|
4185
|
-
return MIN
|
|
4592
|
+
return MIN(cctx->appliedParams.maxBlockSize, (size_t)1 << cParams.windowLog);
|
|
4186
4593
|
}
|
|
4187
4594
|
|
|
4188
4595
|
size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
|
|
@@ -4202,31 +4609,47 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
|
|
|
4202
4609
|
ZSTD_cwksp* ws,
|
|
4203
4610
|
ZSTD_CCtx_params const* params,
|
|
4204
4611
|
const void* src, size_t srcSize,
|
|
4205
|
-
ZSTD_dictTableLoadMethod_e dtlm
|
|
4612
|
+
ZSTD_dictTableLoadMethod_e dtlm,
|
|
4613
|
+
ZSTD_tableFillPurpose_e tfp)
|
|
4206
4614
|
{
|
|
4207
4615
|
const BYTE* ip = (const BYTE*) src;
|
|
4208
4616
|
const BYTE* const iend = ip + srcSize;
|
|
4209
4617
|
int const loadLdmDict = params->ldmParams.enableLdm == ZSTD_ps_enable && ls != NULL;
|
|
4210
4618
|
|
|
4211
|
-
/* Assert that
|
|
4619
|
+
/* Assert that the ms params match the params we're being given */
|
|
4212
4620
|
ZSTD_assertEqualCParams(params->cParams, ms->cParams);
|
|
4213
4621
|
|
|
4214
|
-
|
|
4622
|
+
{ /* Ensure large dictionaries can't cause index overflow */
|
|
4623
|
+
|
|
4215
4624
|
/* Allow the dictionary to set indices up to exactly ZSTD_CURRENT_MAX.
|
|
4216
4625
|
* Dictionaries right at the edge will immediately trigger overflow
|
|
4217
4626
|
* correction, but I don't want to insert extra constraints here.
|
|
4218
4627
|
*/
|
|
4219
|
-
U32
|
|
4220
|
-
|
|
4221
|
-
|
|
4222
|
-
if (
|
|
4223
|
-
|
|
4628
|
+
U32 maxDictSize = ZSTD_CURRENT_MAX - ZSTD_WINDOW_START_INDEX;
|
|
4629
|
+
|
|
4630
|
+
int const CDictTaggedIndices = ZSTD_CDictIndicesAreTagged(¶ms->cParams);
|
|
4631
|
+
if (CDictTaggedIndices && tfp == ZSTD_tfp_forCDict) {
|
|
4632
|
+
/* Some dictionary matchfinders in zstd use "short cache",
|
|
4633
|
+
* which treats the lower ZSTD_SHORT_CACHE_TAG_BITS of each
|
|
4634
|
+
* CDict hashtable entry as a tag rather than as part of an index.
|
|
4635
|
+
* When short cache is used, we need to truncate the dictionary
|
|
4636
|
+
* so that its indices don't overlap with the tag. */
|
|
4637
|
+
U32 const shortCacheMaxDictSize = (1u << (32 - ZSTD_SHORT_CACHE_TAG_BITS)) - ZSTD_WINDOW_START_INDEX;
|
|
4638
|
+
maxDictSize = MIN(maxDictSize, shortCacheMaxDictSize);
|
|
4639
|
+
assert(!loadLdmDict);
|
|
4640
|
+
}
|
|
4641
|
+
|
|
4224
4642
|
/* If the dictionary is too large, only load the suffix of the dictionary. */
|
|
4225
4643
|
if (srcSize > maxDictSize) {
|
|
4226
4644
|
ip = iend - maxDictSize;
|
|
4227
4645
|
src = ip;
|
|
4228
4646
|
srcSize = maxDictSize;
|
|
4229
|
-
|
|
4647
|
+
} }
|
|
4648
|
+
|
|
4649
|
+
if (srcSize > ZSTD_CHUNKSIZE_MAX) {
|
|
4650
|
+
/* We must have cleared our windows when our source is this large. */
|
|
4651
|
+
assert(ZSTD_window_isEmpty(ms->window));
|
|
4652
|
+
if (loadLdmDict) assert(ZSTD_window_isEmpty(ls->window));
|
|
4230
4653
|
}
|
|
4231
4654
|
|
|
4232
4655
|
DEBUGLOG(4, "ZSTD_loadDictionaryContent(): useRowMatchFinder=%d", (int)params->useRowMatchFinder);
|
|
@@ -4249,10 +4672,10 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
|
|
|
4249
4672
|
switch(params->cParams.strategy)
|
|
4250
4673
|
{
|
|
4251
4674
|
case ZSTD_fast:
|
|
4252
|
-
ZSTD_fillHashTable(ms, iend, dtlm);
|
|
4675
|
+
ZSTD_fillHashTable(ms, iend, dtlm, tfp);
|
|
4253
4676
|
break;
|
|
4254
4677
|
case ZSTD_dfast:
|
|
4255
|
-
ZSTD_fillDoubleHashTable(ms, iend, dtlm);
|
|
4678
|
+
ZSTD_fillDoubleHashTable(ms, iend, dtlm, tfp);
|
|
4256
4679
|
break;
|
|
4257
4680
|
|
|
4258
4681
|
case ZSTD_greedy:
|
|
@@ -4418,6 +4841,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
|
|
|
4418
4841
|
ZSTD_CCtx_params const* params,
|
|
4419
4842
|
const void* dict, size_t dictSize,
|
|
4420
4843
|
ZSTD_dictTableLoadMethod_e dtlm,
|
|
4844
|
+
ZSTD_tableFillPurpose_e tfp,
|
|
4421
4845
|
void* workspace)
|
|
4422
4846
|
{
|
|
4423
4847
|
const BYTE* dictPtr = (const BYTE*)dict;
|
|
@@ -4436,7 +4860,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
|
|
|
4436
4860
|
{
|
|
4437
4861
|
size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
|
|
4438
4862
|
FORWARD_IF_ERROR(ZSTD_loadDictionaryContent(
|
|
4439
|
-
ms, NULL, ws, params, dictPtr, dictContentSize, dtlm), "");
|
|
4863
|
+
ms, NULL, ws, params, dictPtr, dictContentSize, dtlm, tfp), "");
|
|
4440
4864
|
}
|
|
4441
4865
|
return dictID;
|
|
4442
4866
|
}
|
|
@@ -4452,6 +4876,7 @@ ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs,
|
|
|
4452
4876
|
const void* dict, size_t dictSize,
|
|
4453
4877
|
ZSTD_dictContentType_e dictContentType,
|
|
4454
4878
|
ZSTD_dictTableLoadMethod_e dtlm,
|
|
4879
|
+
ZSTD_tableFillPurpose_e tfp,
|
|
4455
4880
|
void* workspace)
|
|
4456
4881
|
{
|
|
4457
4882
|
DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize);
|
|
@@ -4464,13 +4889,13 @@ ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs,
|
|
|
4464
4889
|
|
|
4465
4890
|
/* dict restricted modes */
|
|
4466
4891
|
if (dictContentType == ZSTD_dct_rawContent)
|
|
4467
|
-
return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm);
|
|
4892
|
+
return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm, tfp);
|
|
4468
4893
|
|
|
4469
4894
|
if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) {
|
|
4470
4895
|
if (dictContentType == ZSTD_dct_auto) {
|
|
4471
4896
|
DEBUGLOG(4, "raw content dictionary detected");
|
|
4472
4897
|
return ZSTD_loadDictionaryContent(
|
|
4473
|
-
ms, ls, ws, params, dict, dictSize, dtlm);
|
|
4898
|
+
ms, ls, ws, params, dict, dictSize, dtlm, tfp);
|
|
4474
4899
|
}
|
|
4475
4900
|
RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, "");
|
|
4476
4901
|
assert(0); /* impossible */
|
|
@@ -4478,13 +4903,14 @@ ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs,
|
|
|
4478
4903
|
|
|
4479
4904
|
/* dict as full zstd dictionary */
|
|
4480
4905
|
return ZSTD_loadZstdDictionary(
|
|
4481
|
-
bs, ms, ws, params, dict, dictSize, dtlm, workspace);
|
|
4906
|
+
bs, ms, ws, params, dict, dictSize, dtlm, tfp, workspace);
|
|
4482
4907
|
}
|
|
4483
4908
|
|
|
4484
4909
|
#define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB)
|
|
4485
4910
|
#define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6ULL)
|
|
4486
4911
|
|
|
4487
4912
|
/*! ZSTD_compressBegin_internal() :
|
|
4913
|
+
* Assumption : either @dict OR @cdict (or none) is non-NULL, never both
|
|
4488
4914
|
* @return : 0, or an error code */
|
|
4489
4915
|
static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
|
|
4490
4916
|
const void* dict, size_t dictSize,
|
|
@@ -4520,11 +4946,11 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
|
|
|
4520
4946
|
cctx->blockState.prevCBlock, &cctx->blockState.matchState,
|
|
4521
4947
|
&cctx->ldmState, &cctx->workspace, &cctx->appliedParams, cdict->dictContent,
|
|
4522
4948
|
cdict->dictContentSize, cdict->dictContentType, dtlm,
|
|
4523
|
-
cctx->entropyWorkspace)
|
|
4949
|
+
ZSTD_tfp_forCCtx, cctx->entropyWorkspace)
|
|
4524
4950
|
: ZSTD_compress_insertDictionary(
|
|
4525
4951
|
cctx->blockState.prevCBlock, &cctx->blockState.matchState,
|
|
4526
4952
|
&cctx->ldmState, &cctx->workspace, &cctx->appliedParams, dict, dictSize,
|
|
4527
|
-
dictContentType, dtlm, cctx->entropyWorkspace);
|
|
4953
|
+
dictContentType, dtlm, ZSTD_tfp_forCCtx, cctx->entropyWorkspace);
|
|
4528
4954
|
FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed");
|
|
4529
4955
|
assert(dictID <= UINT_MAX);
|
|
4530
4956
|
cctx->dictID = (U32)dictID;
|
|
@@ -4565,11 +4991,11 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,
|
|
|
4565
4991
|
&cctxParams, pledgedSrcSize);
|
|
4566
4992
|
}
|
|
4567
4993
|
|
|
4568
|
-
size_t
|
|
4994
|
+
size_t
|
|
4995
|
+
ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
|
|
4569
4996
|
{
|
|
4570
4997
|
ZSTD_CCtx_params cctxParams;
|
|
4571
|
-
{
|
|
4572
|
-
ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict);
|
|
4998
|
+
{ ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict);
|
|
4573
4999
|
ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel);
|
|
4574
5000
|
}
|
|
4575
5001
|
DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize);
|
|
@@ -4828,7 +5254,7 @@ static size_t ZSTD_initCDict_internal(
|
|
|
4828
5254
|
{ size_t const dictID = ZSTD_compress_insertDictionary(
|
|
4829
5255
|
&cdict->cBlockState, &cdict->matchState, NULL, &cdict->workspace,
|
|
4830
5256
|
¶ms, cdict->dictContent, cdict->dictContentSize,
|
|
4831
|
-
dictContentType, ZSTD_dtlm_full, cdict->entropyWorkspace);
|
|
5257
|
+
dictContentType, ZSTD_dtlm_full, ZSTD_tfp_forCDict, cdict->entropyWorkspace);
|
|
4832
5258
|
FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed");
|
|
4833
5259
|
assert(dictID <= (size_t)(U32)-1);
|
|
4834
5260
|
cdict->dictID = (U32)dictID;
|
|
@@ -5316,30 +5742,41 @@ size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel)
|
|
|
5316
5742
|
|
|
5317
5743
|
static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx* cctx)
|
|
5318
5744
|
{
|
|
5319
|
-
|
|
5320
|
-
|
|
5321
|
-
|
|
5745
|
+
if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) {
|
|
5746
|
+
return cctx->blockSize - cctx->stableIn_notConsumed;
|
|
5747
|
+
}
|
|
5748
|
+
assert(cctx->appliedParams.inBufferMode == ZSTD_bm_buffered);
|
|
5749
|
+
{ size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos;
|
|
5750
|
+
if (hintInSize==0) hintInSize = cctx->blockSize;
|
|
5751
|
+
return hintInSize;
|
|
5752
|
+
}
|
|
5322
5753
|
}
|
|
5323
5754
|
|
|
5324
5755
|
/** ZSTD_compressStream_generic():
|
|
5325
5756
|
* internal function for all *compressStream*() variants
|
|
5326
|
-
*
|
|
5327
|
-
* @return : hint size for next input */
|
|
5757
|
+
* @return : hint size for next input to complete ongoing block */
|
|
5328
5758
|
static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
|
|
5329
5759
|
ZSTD_outBuffer* output,
|
|
5330
5760
|
ZSTD_inBuffer* input,
|
|
5331
5761
|
ZSTD_EndDirective const flushMode)
|
|
5332
5762
|
{
|
|
5333
|
-
const char* const istart = (const char*)input->src;
|
|
5334
|
-
const char* const iend =
|
|
5335
|
-
const char* ip =
|
|
5336
|
-
char* const ostart = (char*)output->dst;
|
|
5337
|
-
char* const oend =
|
|
5338
|
-
char* op =
|
|
5763
|
+
const char* const istart = (assert(input != NULL), (const char*)input->src);
|
|
5764
|
+
const char* const iend = (istart != NULL) ? istart + input->size : istart;
|
|
5765
|
+
const char* ip = (istart != NULL) ? istart + input->pos : istart;
|
|
5766
|
+
char* const ostart = (assert(output != NULL), (char*)output->dst);
|
|
5767
|
+
char* const oend = (ostart != NULL) ? ostart + output->size : ostart;
|
|
5768
|
+
char* op = (ostart != NULL) ? ostart + output->pos : ostart;
|
|
5339
5769
|
U32 someMoreWork = 1;
|
|
5340
5770
|
|
|
5341
5771
|
/* check expectations */
|
|
5342
|
-
DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%
|
|
5772
|
+
DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%i, srcSize = %zu", (int)flushMode, input->size - input->pos);
|
|
5773
|
+
assert(zcs != NULL);
|
|
5774
|
+
if (zcs->appliedParams.inBufferMode == ZSTD_bm_stable) {
|
|
5775
|
+
assert(input->pos >= zcs->stableIn_notConsumed);
|
|
5776
|
+
input->pos -= zcs->stableIn_notConsumed;
|
|
5777
|
+
ip -= zcs->stableIn_notConsumed;
|
|
5778
|
+
zcs->stableIn_notConsumed = 0;
|
|
5779
|
+
}
|
|
5343
5780
|
if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) {
|
|
5344
5781
|
assert(zcs->inBuff != NULL);
|
|
5345
5782
|
assert(zcs->inBuffSize > 0);
|
|
@@ -5348,8 +5785,10 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
|
|
|
5348
5785
|
assert(zcs->outBuff != NULL);
|
|
5349
5786
|
assert(zcs->outBuffSize > 0);
|
|
5350
5787
|
}
|
|
5351
|
-
|
|
5788
|
+
if (input->src == NULL) assert(input->size == 0);
|
|
5352
5789
|
assert(input->pos <= input->size);
|
|
5790
|
+
if (output->dst == NULL) assert(output->size == 0);
|
|
5791
|
+
assert(output->pos <= output->size);
|
|
5353
5792
|
assert((U32)flushMode <= (U32)ZSTD_e_end);
|
|
5354
5793
|
|
|
5355
5794
|
while (someMoreWork) {
|
|
@@ -5381,8 +5820,7 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
|
|
|
5381
5820
|
zcs->inBuff + zcs->inBuffPos, toLoad,
|
|
5382
5821
|
ip, iend-ip);
|
|
5383
5822
|
zcs->inBuffPos += loaded;
|
|
5384
|
-
if (
|
|
5385
|
-
ip += loaded;
|
|
5823
|
+
if (ip) ip += loaded;
|
|
5386
5824
|
if ( (flushMode == ZSTD_e_continue)
|
|
5387
5825
|
&& (zcs->inBuffPos < zcs->inBuffTarget) ) {
|
|
5388
5826
|
/* not enough input to fill full block : stop here */
|
|
@@ -5393,6 +5831,20 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
|
|
|
5393
5831
|
/* empty */
|
|
5394
5832
|
someMoreWork = 0; break;
|
|
5395
5833
|
}
|
|
5834
|
+
} else {
|
|
5835
|
+
assert(zcs->appliedParams.inBufferMode == ZSTD_bm_stable);
|
|
5836
|
+
if ( (flushMode == ZSTD_e_continue)
|
|
5837
|
+
&& ( (size_t)(iend - ip) < zcs->blockSize) ) {
|
|
5838
|
+
/* can't compress a full block : stop here */
|
|
5839
|
+
zcs->stableIn_notConsumed = (size_t)(iend - ip);
|
|
5840
|
+
ip = iend; /* pretend to have consumed input */
|
|
5841
|
+
someMoreWork = 0; break;
|
|
5842
|
+
}
|
|
5843
|
+
if ( (flushMode == ZSTD_e_flush)
|
|
5844
|
+
&& (ip == iend) ) {
|
|
5845
|
+
/* empty */
|
|
5846
|
+
someMoreWork = 0; break;
|
|
5847
|
+
}
|
|
5396
5848
|
}
|
|
5397
5849
|
/* compress current block (note : this stage cannot be stopped in the middle) */
|
|
5398
5850
|
DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode);
|
|
@@ -5400,9 +5852,8 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
|
|
|
5400
5852
|
void* cDst;
|
|
5401
5853
|
size_t cSize;
|
|
5402
5854
|
size_t oSize = oend-op;
|
|
5403
|
-
size_t const iSize = inputBuffered
|
|
5404
|
-
|
|
5405
|
-
: MIN((size_t)(iend - ip), zcs->blockSize);
|
|
5855
|
+
size_t const iSize = inputBuffered ? zcs->inBuffPos - zcs->inToCompress
|
|
5856
|
+
: MIN((size_t)(iend - ip), zcs->blockSize);
|
|
5406
5857
|
if (oSize >= ZSTD_compressBound(iSize) || zcs->appliedParams.outBufferMode == ZSTD_bm_stable)
|
|
5407
5858
|
cDst = op; /* compress into output buffer, to skip flush stage */
|
|
5408
5859
|
else
|
|
@@ -5425,19 +5876,16 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
|
|
|
5425
5876
|
if (!lastBlock)
|
|
5426
5877
|
assert(zcs->inBuffTarget <= zcs->inBuffSize);
|
|
5427
5878
|
zcs->inToCompress = zcs->inBuffPos;
|
|
5428
|
-
} else {
|
|
5429
|
-
unsigned const lastBlock = (ip + iSize == iend);
|
|
5430
|
-
assert(flushMode == ZSTD_e_end /* Already validated */);
|
|
5879
|
+
} else { /* !inputBuffered, hence ZSTD_bm_stable */
|
|
5880
|
+
unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip + iSize == iend);
|
|
5431
5881
|
cSize = lastBlock ?
|
|
5432
5882
|
ZSTD_compressEnd(zcs, cDst, oSize, ip, iSize) :
|
|
5433
5883
|
ZSTD_compressContinue(zcs, cDst, oSize, ip, iSize);
|
|
5434
5884
|
/* Consume the input prior to error checking to mirror buffered mode. */
|
|
5435
|
-
if (
|
|
5436
|
-
ip += iSize;
|
|
5885
|
+
if (ip) ip += iSize;
|
|
5437
5886
|
FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed");
|
|
5438
5887
|
zcs->frameEnded = lastBlock;
|
|
5439
|
-
if (lastBlock)
|
|
5440
|
-
assert(ip == iend);
|
|
5888
|
+
if (lastBlock) assert(ip == iend);
|
|
5441
5889
|
}
|
|
5442
5890
|
if (cDst == op) { /* no need to flush */
|
|
5443
5891
|
op += cSize;
|
|
@@ -5513,8 +5961,10 @@ size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuf
|
|
|
5513
5961
|
/* After a compression call set the expected input/output buffer.
|
|
5514
5962
|
* This is validated at the start of the next compression call.
|
|
5515
5963
|
*/
|
|
5516
|
-
static void
|
|
5964
|
+
static void
|
|
5965
|
+
ZSTD_setBufferExpectations(ZSTD_CCtx* cctx, const ZSTD_outBuffer* output, const ZSTD_inBuffer* input)
|
|
5517
5966
|
{
|
|
5967
|
+
DEBUGLOG(5, "ZSTD_setBufferExpectations (for advanced stable in/out modes)");
|
|
5518
5968
|
if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) {
|
|
5519
5969
|
cctx->expectedInBuffer = *input;
|
|
5520
5970
|
}
|
|
@@ -5533,22 +5983,22 @@ static size_t ZSTD_checkBufferStability(ZSTD_CCtx const* cctx,
|
|
|
5533
5983
|
{
|
|
5534
5984
|
if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) {
|
|
5535
5985
|
ZSTD_inBuffer const expect = cctx->expectedInBuffer;
|
|
5536
|
-
if (expect.src != input->src || expect.pos != input->pos
|
|
5537
|
-
RETURN_ERROR(
|
|
5538
|
-
if (endOp != ZSTD_e_end)
|
|
5539
|
-
RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer can only be used with ZSTD_e_end!");
|
|
5986
|
+
if (expect.src != input->src || expect.pos != input->pos)
|
|
5987
|
+
RETURN_ERROR(stabilityCondition_notRespected, "ZSTD_c_stableInBuffer enabled but input differs!");
|
|
5540
5988
|
}
|
|
5989
|
+
(void)endOp;
|
|
5541
5990
|
if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) {
|
|
5542
5991
|
size_t const outBufferSize = output->size - output->pos;
|
|
5543
5992
|
if (cctx->expectedOutBufferSize != outBufferSize)
|
|
5544
|
-
RETURN_ERROR(
|
|
5993
|
+
RETURN_ERROR(stabilityCondition_notRespected, "ZSTD_c_stableOutBuffer enabled but output size differs!");
|
|
5545
5994
|
}
|
|
5546
5995
|
return 0;
|
|
5547
5996
|
}
|
|
5548
5997
|
|
|
5549
5998
|
static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
|
|
5550
5999
|
ZSTD_EndDirective endOp,
|
|
5551
|
-
size_t inSize)
|
|
6000
|
+
size_t inSize)
|
|
6001
|
+
{
|
|
5552
6002
|
ZSTD_CCtx_params params = cctx->requestedParams;
|
|
5553
6003
|
ZSTD_prefixDict const prefixDict = cctx->prefixDict;
|
|
5554
6004
|
FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */
|
|
@@ -5562,9 +6012,9 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
|
|
|
5562
6012
|
params.compressionLevel = cctx->cdict->compressionLevel;
|
|
5563
6013
|
}
|
|
5564
6014
|
DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage");
|
|
5565
|
-
if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = inSize + 1; /* auto-
|
|
5566
|
-
|
|
5567
|
-
|
|
6015
|
+
if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = inSize + 1; /* auto-determine pledgedSrcSize */
|
|
6016
|
+
|
|
6017
|
+
{ size_t const dictSize = prefixDict.dict
|
|
5568
6018
|
? prefixDict.dictSize
|
|
5569
6019
|
: (cctx->cdict ? cctx->cdict->dictContentSize : 0);
|
|
5570
6020
|
ZSTD_cParamMode_e const mode = ZSTD_getCParamMode(cctx->cdict, ¶ms, cctx->pledgedSrcSizePlusOne - 1);
|
|
@@ -5576,8 +6026,18 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
|
|
|
5576
6026
|
params.useBlockSplitter = ZSTD_resolveBlockSplitterMode(params.useBlockSplitter, ¶ms.cParams);
|
|
5577
6027
|
params.ldmParams.enableLdm = ZSTD_resolveEnableLdm(params.ldmParams.enableLdm, ¶ms.cParams);
|
|
5578
6028
|
params.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params.useRowMatchFinder, ¶ms.cParams);
|
|
6029
|
+
params.validateSequences = ZSTD_resolveExternalSequenceValidation(params.validateSequences);
|
|
6030
|
+
params.maxBlockSize = ZSTD_resolveMaxBlockSize(params.maxBlockSize);
|
|
6031
|
+
params.searchForExternalRepcodes = ZSTD_resolveExternalRepcodeSearch(params.searchForExternalRepcodes, params.compressionLevel);
|
|
5579
6032
|
|
|
5580
6033
|
#ifdef ZSTD_MULTITHREAD
|
|
6034
|
+
/* If external matchfinder is enabled, make sure to fail before checking job size (for consistency) */
|
|
6035
|
+
RETURN_ERROR_IF(
|
|
6036
|
+
params.useSequenceProducer == 1 && params.nbWorkers >= 1,
|
|
6037
|
+
parameter_combination_unsupported,
|
|
6038
|
+
"External sequence producer isn't supported with nbWorkers >= 1"
|
|
6039
|
+
);
|
|
6040
|
+
|
|
5581
6041
|
if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) {
|
|
5582
6042
|
params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */
|
|
5583
6043
|
}
|
|
@@ -5605,7 +6065,7 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
|
|
|
5605
6065
|
cctx->streamStage = zcss_load;
|
|
5606
6066
|
cctx->appliedParams = params;
|
|
5607
6067
|
} else
|
|
5608
|
-
#endif
|
|
6068
|
+
#endif /* ZSTD_MULTITHREAD */
|
|
5609
6069
|
{ U64 const pledgedSrcSize = cctx->pledgedSrcSizePlusOne - 1;
|
|
5610
6070
|
assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
|
|
5611
6071
|
FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,
|
|
@@ -5631,6 +6091,8 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
|
|
|
5631
6091
|
return 0;
|
|
5632
6092
|
}
|
|
5633
6093
|
|
|
6094
|
+
/* @return provides a minimum amount of data remaining to be flushed from internal buffers
|
|
6095
|
+
*/
|
|
5634
6096
|
size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
|
|
5635
6097
|
ZSTD_outBuffer* output,
|
|
5636
6098
|
ZSTD_inBuffer* input,
|
|
@@ -5645,8 +6107,27 @@ size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
|
|
|
5645
6107
|
|
|
5646
6108
|
/* transparent initialization stage */
|
|
5647
6109
|
if (cctx->streamStage == zcss_init) {
|
|
5648
|
-
|
|
5649
|
-
|
|
6110
|
+
size_t const inputSize = input->size - input->pos; /* no obligation to start from pos==0 */
|
|
6111
|
+
size_t const totalInputSize = inputSize + cctx->stableIn_notConsumed;
|
|
6112
|
+
if ( (cctx->requestedParams.inBufferMode == ZSTD_bm_stable) /* input is presumed stable, across invocations */
|
|
6113
|
+
&& (endOp == ZSTD_e_continue) /* no flush requested, more input to come */
|
|
6114
|
+
&& (totalInputSize < ZSTD_BLOCKSIZE_MAX) ) { /* not even reached one block yet */
|
|
6115
|
+
if (cctx->stableIn_notConsumed) { /* not the first time */
|
|
6116
|
+
/* check stable source guarantees */
|
|
6117
|
+
RETURN_ERROR_IF(input->src != cctx->expectedInBuffer.src, stabilityCondition_notRespected, "stableInBuffer condition not respected: wrong src pointer");
|
|
6118
|
+
RETURN_ERROR_IF(input->pos != cctx->expectedInBuffer.size, stabilityCondition_notRespected, "stableInBuffer condition not respected: externally modified pos");
|
|
6119
|
+
}
|
|
6120
|
+
/* pretend input was consumed, to give a sense forward progress */
|
|
6121
|
+
input->pos = input->size;
|
|
6122
|
+
/* save stable inBuffer, for later control, and flush/end */
|
|
6123
|
+
cctx->expectedInBuffer = *input;
|
|
6124
|
+
/* but actually input wasn't consumed, so keep track of position from where compression shall resume */
|
|
6125
|
+
cctx->stableIn_notConsumed += inputSize;
|
|
6126
|
+
/* don't initialize yet, wait for the first block of flush() order, for better parameters adaptation */
|
|
6127
|
+
return ZSTD_FRAMEHEADERSIZE_MIN(cctx->requestedParams.format); /* at least some header to produce */
|
|
6128
|
+
}
|
|
6129
|
+
FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, endOp, totalInputSize), "compressStream2 initialization failed");
|
|
6130
|
+
ZSTD_setBufferExpectations(cctx, output, input); /* Set initial buffer expectations now that we've initialized */
|
|
5650
6131
|
}
|
|
5651
6132
|
/* end of transparent initialization stage */
|
|
5652
6133
|
|
|
@@ -5659,6 +6140,13 @@ size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
|
|
|
5659
6140
|
ZSTDMT_updateCParams_whileCompressing(cctx->mtctx, &cctx->requestedParams);
|
|
5660
6141
|
cctx->cParamsChanged = 0;
|
|
5661
6142
|
}
|
|
6143
|
+
if (cctx->stableIn_notConsumed) {
|
|
6144
|
+
assert(cctx->appliedParams.inBufferMode == ZSTD_bm_stable);
|
|
6145
|
+
/* some early data was skipped - make it available for consumption */
|
|
6146
|
+
assert(input->pos >= cctx->stableIn_notConsumed);
|
|
6147
|
+
input->pos -= cctx->stableIn_notConsumed;
|
|
6148
|
+
cctx->stableIn_notConsumed = 0;
|
|
6149
|
+
}
|
|
5662
6150
|
for (;;) {
|
|
5663
6151
|
size_t const ipos = input->pos;
|
|
5664
6152
|
size_t const opos = output->pos;
|
|
@@ -5697,7 +6185,7 @@ size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
|
|
|
5697
6185
|
ZSTD_setBufferExpectations(cctx, output, input);
|
|
5698
6186
|
return flushMin;
|
|
5699
6187
|
}
|
|
5700
|
-
#endif
|
|
6188
|
+
#endif /* ZSTD_MULTITHREAD */
|
|
5701
6189
|
FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) , "");
|
|
5702
6190
|
DEBUGLOG(5, "completed ZSTD_compressStream2");
|
|
5703
6191
|
ZSTD_setBufferExpectations(cctx, output, input);
|
|
@@ -5710,13 +6198,20 @@ size_t ZSTD_compressStream2_simpleArgs (
|
|
|
5710
6198
|
const void* src, size_t srcSize, size_t* srcPos,
|
|
5711
6199
|
ZSTD_EndDirective endOp)
|
|
5712
6200
|
{
|
|
5713
|
-
ZSTD_outBuffer output
|
|
5714
|
-
ZSTD_inBuffer input
|
|
6201
|
+
ZSTD_outBuffer output;
|
|
6202
|
+
ZSTD_inBuffer input;
|
|
6203
|
+
output.dst = dst;
|
|
6204
|
+
output.size = dstCapacity;
|
|
6205
|
+
output.pos = *dstPos;
|
|
6206
|
+
input.src = src;
|
|
6207
|
+
input.size = srcSize;
|
|
6208
|
+
input.pos = *srcPos;
|
|
5715
6209
|
/* ZSTD_compressStream2() will check validity of dstPos and srcPos */
|
|
5716
|
-
size_t const cErr = ZSTD_compressStream2(cctx, &output, &input, endOp);
|
|
5717
|
-
|
|
5718
|
-
|
|
5719
|
-
|
|
6210
|
+
{ size_t const cErr = ZSTD_compressStream2(cctx, &output, &input, endOp);
|
|
6211
|
+
*dstPos = output.pos;
|
|
6212
|
+
*srcPos = input.pos;
|
|
6213
|
+
return cErr;
|
|
6214
|
+
}
|
|
5720
6215
|
}
|
|
5721
6216
|
|
|
5722
6217
|
size_t ZSTD_compress2(ZSTD_CCtx* cctx,
|
|
@@ -5739,6 +6234,7 @@ size_t ZSTD_compress2(ZSTD_CCtx* cctx,
|
|
|
5739
6234
|
/* Reset to the original values. */
|
|
5740
6235
|
cctx->requestedParams.inBufferMode = originalInBufferMode;
|
|
5741
6236
|
cctx->requestedParams.outBufferMode = originalOutBufferMode;
|
|
6237
|
+
|
|
5742
6238
|
FORWARD_IF_ERROR(result, "ZSTD_compressStream2_simpleArgs failed");
|
|
5743
6239
|
if (result != 0) { /* compression not completed, due to lack of output space */
|
|
5744
6240
|
assert(oPos == dstCapacity);
|
|
@@ -5749,64 +6245,61 @@ size_t ZSTD_compress2(ZSTD_CCtx* cctx,
|
|
|
5749
6245
|
}
|
|
5750
6246
|
}
|
|
5751
6247
|
|
|
5752
|
-
typedef struct {
|
|
5753
|
-
U32 idx; /* Index in array of ZSTD_Sequence */
|
|
5754
|
-
U32 posInSequence; /* Position within sequence at idx */
|
|
5755
|
-
size_t posInSrc; /* Number of bytes given by sequences provided so far */
|
|
5756
|
-
} ZSTD_sequencePosition;
|
|
5757
|
-
|
|
5758
6248
|
/* ZSTD_validateSequence() :
|
|
5759
6249
|
* @offCode : is presumed to follow format required by ZSTD_storeSeq()
|
|
5760
6250
|
* @returns a ZSTD error code if sequence is not valid
|
|
5761
6251
|
*/
|
|
5762
6252
|
static size_t
|
|
5763
|
-
ZSTD_validateSequence(U32 offCode, U32 matchLength,
|
|
5764
|
-
size_t posInSrc, U32 windowLog, size_t dictSize)
|
|
6253
|
+
ZSTD_validateSequence(U32 offCode, U32 matchLength, U32 minMatch,
|
|
6254
|
+
size_t posInSrc, U32 windowLog, size_t dictSize, int useSequenceProducer)
|
|
5765
6255
|
{
|
|
5766
|
-
U32 const windowSize =
|
|
5767
|
-
/* posInSrc represents the amount of data the
|
|
6256
|
+
U32 const windowSize = 1u << windowLog;
|
|
6257
|
+
/* posInSrc represents the amount of data the decoder would decode up to this point.
|
|
5768
6258
|
* As long as the amount of data decoded is less than or equal to window size, offsets may be
|
|
5769
6259
|
* larger than the total length of output decoded in order to reference the dict, even larger than
|
|
5770
6260
|
* window size. After output surpasses windowSize, we're limited to windowSize offsets again.
|
|
5771
6261
|
*/
|
|
5772
6262
|
size_t const offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize;
|
|
5773
|
-
|
|
5774
|
-
RETURN_ERROR_IF(
|
|
6263
|
+
size_t const matchLenLowerBound = (minMatch == 3 || useSequenceProducer) ? 3 : 4;
|
|
6264
|
+
RETURN_ERROR_IF(offCode > OFFSET_TO_OFFBASE(offsetBound), externalSequences_invalid, "Offset too large!");
|
|
6265
|
+
/* Validate maxNbSeq is large enough for the given matchLength and minMatch */
|
|
6266
|
+
RETURN_ERROR_IF(matchLength < matchLenLowerBound, externalSequences_invalid, "Matchlength too small for the minMatch");
|
|
5775
6267
|
return 0;
|
|
5776
6268
|
}
|
|
5777
6269
|
|
|
5778
6270
|
/* Returns an offset code, given a sequence's raw offset, the ongoing repcode array, and whether litLength == 0 */
|
|
5779
|
-
static U32
|
|
6271
|
+
static U32 ZSTD_finalizeOffBase(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 ll0)
|
|
5780
6272
|
{
|
|
5781
|
-
U32
|
|
6273
|
+
U32 offBase = OFFSET_TO_OFFBASE(rawOffset);
|
|
5782
6274
|
|
|
5783
6275
|
if (!ll0 && rawOffset == rep[0]) {
|
|
5784
|
-
|
|
6276
|
+
offBase = REPCODE1_TO_OFFBASE;
|
|
5785
6277
|
} else if (rawOffset == rep[1]) {
|
|
5786
|
-
|
|
6278
|
+
offBase = REPCODE_TO_OFFBASE(2 - ll0);
|
|
5787
6279
|
} else if (rawOffset == rep[2]) {
|
|
5788
|
-
|
|
6280
|
+
offBase = REPCODE_TO_OFFBASE(3 - ll0);
|
|
5789
6281
|
} else if (ll0 && rawOffset == rep[0] - 1) {
|
|
5790
|
-
|
|
6282
|
+
offBase = REPCODE3_TO_OFFBASE;
|
|
5791
6283
|
}
|
|
5792
|
-
return
|
|
6284
|
+
return offBase;
|
|
5793
6285
|
}
|
|
5794
6286
|
|
|
5795
|
-
|
|
5796
|
-
* ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter.
|
|
5797
|
-
*/
|
|
5798
|
-
static size_t
|
|
6287
|
+
size_t
|
|
5799
6288
|
ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,
|
|
5800
6289
|
ZSTD_sequencePosition* seqPos,
|
|
5801
6290
|
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
|
|
5802
|
-
const void* src, size_t blockSize
|
|
6291
|
+
const void* src, size_t blockSize,
|
|
6292
|
+
ZSTD_paramSwitch_e externalRepSearch)
|
|
5803
6293
|
{
|
|
5804
6294
|
U32 idx = seqPos->idx;
|
|
6295
|
+
U32 const startIdx = idx;
|
|
5805
6296
|
BYTE const* ip = (BYTE const*)(src);
|
|
5806
6297
|
const BYTE* const iend = ip + blockSize;
|
|
5807
6298
|
repcodes_t updatedRepcodes;
|
|
5808
6299
|
U32 dictSize;
|
|
5809
6300
|
|
|
6301
|
+
DEBUGLOG(5, "ZSTD_copySequencesToSeqStoreExplicitBlockDelim (blockSize = %zu)", blockSize);
|
|
6302
|
+
|
|
5810
6303
|
if (cctx->cdict) {
|
|
5811
6304
|
dictSize = (U32)cctx->cdict->dictContentSize;
|
|
5812
6305
|
} else if (cctx->prefixDict.dict) {
|
|
@@ -5815,25 +6308,55 @@ ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,
|
|
|
5815
6308
|
dictSize = 0;
|
|
5816
6309
|
}
|
|
5817
6310
|
ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t));
|
|
5818
|
-
for (; (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0)
|
|
6311
|
+
for (; idx < inSeqsSize && (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0); ++idx) {
|
|
5819
6312
|
U32 const litLength = inSeqs[idx].litLength;
|
|
5820
|
-
U32 const ll0 = (litLength == 0);
|
|
5821
6313
|
U32 const matchLength = inSeqs[idx].matchLength;
|
|
5822
|
-
U32
|
|
5823
|
-
|
|
6314
|
+
U32 offBase;
|
|
6315
|
+
|
|
6316
|
+
if (externalRepSearch == ZSTD_ps_disable) {
|
|
6317
|
+
offBase = OFFSET_TO_OFFBASE(inSeqs[idx].offset);
|
|
6318
|
+
} else {
|
|
6319
|
+
U32 const ll0 = (litLength == 0);
|
|
6320
|
+
offBase = ZSTD_finalizeOffBase(inSeqs[idx].offset, updatedRepcodes.rep, ll0);
|
|
6321
|
+
ZSTD_updateRep(updatedRepcodes.rep, offBase, ll0);
|
|
6322
|
+
}
|
|
5824
6323
|
|
|
5825
|
-
DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)",
|
|
6324
|
+
DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offBase, matchLength, litLength);
|
|
5826
6325
|
if (cctx->appliedParams.validateSequences) {
|
|
5827
6326
|
seqPos->posInSrc += litLength + matchLength;
|
|
5828
|
-
FORWARD_IF_ERROR(ZSTD_validateSequence(
|
|
5829
|
-
cctx->appliedParams.cParams.windowLog, dictSize),
|
|
6327
|
+
FORWARD_IF_ERROR(ZSTD_validateSequence(offBase, matchLength, cctx->appliedParams.cParams.minMatch, seqPos->posInSrc,
|
|
6328
|
+
cctx->appliedParams.cParams.windowLog, dictSize, cctx->appliedParams.useSequenceProducer),
|
|
5830
6329
|
"Sequence validation failed");
|
|
5831
6330
|
}
|
|
5832
|
-
RETURN_ERROR_IF(idx - seqPos->idx
|
|
6331
|
+
RETURN_ERROR_IF(idx - seqPos->idx >= cctx->seqStore.maxNbSeq, externalSequences_invalid,
|
|
5833
6332
|
"Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
|
|
5834
|
-
ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend,
|
|
6333
|
+
ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offBase, matchLength);
|
|
5835
6334
|
ip += matchLength + litLength;
|
|
5836
6335
|
}
|
|
6336
|
+
|
|
6337
|
+
/* If we skipped repcode search while parsing, we need to update repcodes now */
|
|
6338
|
+
assert(externalRepSearch != ZSTD_ps_auto);
|
|
6339
|
+
assert(idx >= startIdx);
|
|
6340
|
+
if (externalRepSearch == ZSTD_ps_disable && idx != startIdx) {
|
|
6341
|
+
U32* const rep = updatedRepcodes.rep;
|
|
6342
|
+
U32 lastSeqIdx = idx - 1; /* index of last non-block-delimiter sequence */
|
|
6343
|
+
|
|
6344
|
+
if (lastSeqIdx >= startIdx + 2) {
|
|
6345
|
+
rep[2] = inSeqs[lastSeqIdx - 2].offset;
|
|
6346
|
+
rep[1] = inSeqs[lastSeqIdx - 1].offset;
|
|
6347
|
+
rep[0] = inSeqs[lastSeqIdx].offset;
|
|
6348
|
+
} else if (lastSeqIdx == startIdx + 1) {
|
|
6349
|
+
rep[2] = rep[0];
|
|
6350
|
+
rep[1] = inSeqs[lastSeqIdx - 1].offset;
|
|
6351
|
+
rep[0] = inSeqs[lastSeqIdx].offset;
|
|
6352
|
+
} else {
|
|
6353
|
+
assert(lastSeqIdx == startIdx);
|
|
6354
|
+
rep[2] = rep[1];
|
|
6355
|
+
rep[1] = rep[0];
|
|
6356
|
+
rep[0] = inSeqs[lastSeqIdx].offset;
|
|
6357
|
+
}
|
|
6358
|
+
}
|
|
6359
|
+
|
|
5837
6360
|
ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t));
|
|
5838
6361
|
|
|
5839
6362
|
if (inSeqs[idx].litLength) {
|
|
@@ -5842,26 +6365,15 @@ ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,
|
|
|
5842
6365
|
ip += inSeqs[idx].litLength;
|
|
5843
6366
|
seqPos->posInSrc += inSeqs[idx].litLength;
|
|
5844
6367
|
}
|
|
5845
|
-
RETURN_ERROR_IF(ip != iend,
|
|
6368
|
+
RETURN_ERROR_IF(ip != iend, externalSequences_invalid, "Blocksize doesn't agree with block delimiter!");
|
|
5846
6369
|
seqPos->idx = idx+1;
|
|
5847
6370
|
return 0;
|
|
5848
6371
|
}
|
|
5849
6372
|
|
|
5850
|
-
|
|
5851
|
-
* if we ended up splitting a sequence. Otherwise, it may return a ZSTD error if something
|
|
5852
|
-
* went wrong.
|
|
5853
|
-
*
|
|
5854
|
-
* This function will attempt to scan through blockSize bytes represented by the sequences
|
|
5855
|
-
* in inSeqs, storing any (partial) sequences.
|
|
5856
|
-
*
|
|
5857
|
-
* Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to
|
|
5858
|
-
* avoid splitting a match, or to avoid splitting a match such that it would produce a match
|
|
5859
|
-
* smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block.
|
|
5860
|
-
*/
|
|
5861
|
-
static size_t
|
|
6373
|
+
size_t
|
|
5862
6374
|
ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
|
|
5863
6375
|
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
|
|
5864
|
-
const void* src, size_t blockSize)
|
|
6376
|
+
const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch)
|
|
5865
6377
|
{
|
|
5866
6378
|
U32 idx = seqPos->idx;
|
|
5867
6379
|
U32 startPosInSequence = seqPos->posInSequence;
|
|
@@ -5873,6 +6385,9 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition*
|
|
|
5873
6385
|
U32 bytesAdjustment = 0;
|
|
5874
6386
|
U32 finalMatchSplit = 0;
|
|
5875
6387
|
|
|
6388
|
+
/* TODO(embg) support fast parsing mode in noBlockDelim mode */
|
|
6389
|
+
(void)externalRepSearch;
|
|
6390
|
+
|
|
5876
6391
|
if (cctx->cdict) {
|
|
5877
6392
|
dictSize = cctx->cdict->dictContentSize;
|
|
5878
6393
|
} else if (cctx->prefixDict.dict) {
|
|
@@ -5880,7 +6395,7 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition*
|
|
|
5880
6395
|
} else {
|
|
5881
6396
|
dictSize = 0;
|
|
5882
6397
|
}
|
|
5883
|
-
DEBUGLOG(5, "
|
|
6398
|
+
DEBUGLOG(5, "ZSTD_copySequencesToSeqStoreNoBlockDelim: idx: %u PIS: %u blockSize: %zu", idx, startPosInSequence, blockSize);
|
|
5884
6399
|
DEBUGLOG(5, "Start seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength);
|
|
5885
6400
|
ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t));
|
|
5886
6401
|
while (endPosInSequence && idx < inSeqsSize && !finalMatchSplit) {
|
|
@@ -5888,7 +6403,7 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition*
|
|
|
5888
6403
|
U32 litLength = currSeq.litLength;
|
|
5889
6404
|
U32 matchLength = currSeq.matchLength;
|
|
5890
6405
|
U32 const rawOffset = currSeq.offset;
|
|
5891
|
-
U32
|
|
6406
|
+
U32 offBase;
|
|
5892
6407
|
|
|
5893
6408
|
/* Modify the sequence depending on where endPosInSequence lies */
|
|
5894
6409
|
if (endPosInSequence >= currSeq.litLength + currSeq.matchLength) {
|
|
@@ -5902,7 +6417,6 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition*
|
|
|
5902
6417
|
/* Move to the next sequence */
|
|
5903
6418
|
endPosInSequence -= currSeq.litLength + currSeq.matchLength;
|
|
5904
6419
|
startPosInSequence = 0;
|
|
5905
|
-
idx++;
|
|
5906
6420
|
} else {
|
|
5907
6421
|
/* This is the final (partial) sequence we're adding from inSeqs, and endPosInSequence
|
|
5908
6422
|
does not reach the end of the match. So, we have to split the sequence */
|
|
@@ -5942,21 +6456,23 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition*
|
|
|
5942
6456
|
}
|
|
5943
6457
|
/* Check if this offset can be represented with a repcode */
|
|
5944
6458
|
{ U32 const ll0 = (litLength == 0);
|
|
5945
|
-
|
|
5946
|
-
ZSTD_updateRep(updatedRepcodes.rep,
|
|
6459
|
+
offBase = ZSTD_finalizeOffBase(rawOffset, updatedRepcodes.rep, ll0);
|
|
6460
|
+
ZSTD_updateRep(updatedRepcodes.rep, offBase, ll0);
|
|
5947
6461
|
}
|
|
5948
6462
|
|
|
5949
6463
|
if (cctx->appliedParams.validateSequences) {
|
|
5950
6464
|
seqPos->posInSrc += litLength + matchLength;
|
|
5951
|
-
FORWARD_IF_ERROR(ZSTD_validateSequence(
|
|
5952
|
-
cctx->appliedParams.cParams.windowLog, dictSize),
|
|
6465
|
+
FORWARD_IF_ERROR(ZSTD_validateSequence(offBase, matchLength, cctx->appliedParams.cParams.minMatch, seqPos->posInSrc,
|
|
6466
|
+
cctx->appliedParams.cParams.windowLog, dictSize, cctx->appliedParams.useSequenceProducer),
|
|
5953
6467
|
"Sequence validation failed");
|
|
5954
6468
|
}
|
|
5955
|
-
DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)",
|
|
5956
|
-
RETURN_ERROR_IF(idx - seqPos->idx
|
|
6469
|
+
DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offBase, matchLength, litLength);
|
|
6470
|
+
RETURN_ERROR_IF(idx - seqPos->idx >= cctx->seqStore.maxNbSeq, externalSequences_invalid,
|
|
5957
6471
|
"Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
|
|
5958
|
-
ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend,
|
|
6472
|
+
ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offBase, matchLength);
|
|
5959
6473
|
ip += matchLength + litLength;
|
|
6474
|
+
if (!finalMatchSplit)
|
|
6475
|
+
idx++; /* Next Sequence */
|
|
5960
6476
|
}
|
|
5961
6477
|
DEBUGLOG(5, "Ending seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength);
|
|
5962
6478
|
assert(idx == inSeqsSize || endPosInSequence <= inSeqs[idx].litLength + inSeqs[idx].matchLength);
|
|
@@ -5979,7 +6495,7 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition*
|
|
|
5979
6495
|
|
|
5980
6496
|
typedef size_t (*ZSTD_sequenceCopier) (ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
|
|
5981
6497
|
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
|
|
5982
|
-
const void* src, size_t blockSize);
|
|
6498
|
+
const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch);
|
|
5983
6499
|
static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode)
|
|
5984
6500
|
{
|
|
5985
6501
|
ZSTD_sequenceCopier sequenceCopier = NULL;
|
|
@@ -5993,6 +6509,57 @@ static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode)
|
|
|
5993
6509
|
return sequenceCopier;
|
|
5994
6510
|
}
|
|
5995
6511
|
|
|
6512
|
+
/* Discover the size of next block by searching for the delimiter.
|
|
6513
|
+
* Note that a block delimiter **must** exist in this mode,
|
|
6514
|
+
* otherwise it's an input error.
|
|
6515
|
+
* The block size retrieved will be later compared to ensure it remains within bounds */
|
|
6516
|
+
static size_t
|
|
6517
|
+
blockSize_explicitDelimiter(const ZSTD_Sequence* inSeqs, size_t inSeqsSize, ZSTD_sequencePosition seqPos)
|
|
6518
|
+
{
|
|
6519
|
+
int end = 0;
|
|
6520
|
+
size_t blockSize = 0;
|
|
6521
|
+
size_t spos = seqPos.idx;
|
|
6522
|
+
DEBUGLOG(6, "blockSize_explicitDelimiter : seq %zu / %zu", spos, inSeqsSize);
|
|
6523
|
+
assert(spos <= inSeqsSize);
|
|
6524
|
+
while (spos < inSeqsSize) {
|
|
6525
|
+
end = (inSeqs[spos].offset == 0);
|
|
6526
|
+
blockSize += inSeqs[spos].litLength + inSeqs[spos].matchLength;
|
|
6527
|
+
if (end) {
|
|
6528
|
+
if (inSeqs[spos].matchLength != 0)
|
|
6529
|
+
RETURN_ERROR(externalSequences_invalid, "delimiter format error : both matchlength and offset must be == 0");
|
|
6530
|
+
break;
|
|
6531
|
+
}
|
|
6532
|
+
spos++;
|
|
6533
|
+
}
|
|
6534
|
+
if (!end)
|
|
6535
|
+
RETURN_ERROR(externalSequences_invalid, "Reached end of sequences without finding a block delimiter");
|
|
6536
|
+
return blockSize;
|
|
6537
|
+
}
|
|
6538
|
+
|
|
6539
|
+
/* More a "target" block size */
|
|
6540
|
+
static size_t blockSize_noDelimiter(size_t blockSize, size_t remaining)
|
|
6541
|
+
{
|
|
6542
|
+
int const lastBlock = (remaining <= blockSize);
|
|
6543
|
+
return lastBlock ? remaining : blockSize;
|
|
6544
|
+
}
|
|
6545
|
+
|
|
6546
|
+
static size_t determine_blockSize(ZSTD_sequenceFormat_e mode,
|
|
6547
|
+
size_t blockSize, size_t remaining,
|
|
6548
|
+
const ZSTD_Sequence* inSeqs, size_t inSeqsSize, ZSTD_sequencePosition seqPos)
|
|
6549
|
+
{
|
|
6550
|
+
DEBUGLOG(6, "determine_blockSize : remainingSize = %zu", remaining);
|
|
6551
|
+
if (mode == ZSTD_sf_noBlockDelimiters)
|
|
6552
|
+
return blockSize_noDelimiter(blockSize, remaining);
|
|
6553
|
+
{ size_t const explicitBlockSize = blockSize_explicitDelimiter(inSeqs, inSeqsSize, seqPos);
|
|
6554
|
+
FORWARD_IF_ERROR(explicitBlockSize, "Error while determining block size with explicit delimiters");
|
|
6555
|
+
if (explicitBlockSize > blockSize)
|
|
6556
|
+
RETURN_ERROR(externalSequences_invalid, "sequences incorrectly define a too large block");
|
|
6557
|
+
if (explicitBlockSize > remaining)
|
|
6558
|
+
RETURN_ERROR(externalSequences_invalid, "sequences define a frame longer than source");
|
|
6559
|
+
return explicitBlockSize;
|
|
6560
|
+
}
|
|
6561
|
+
}
|
|
6562
|
+
|
|
5996
6563
|
/* Compress, block-by-block, all of the sequences given.
|
|
5997
6564
|
*
|
|
5998
6565
|
* Returns the cumulative size of all compressed blocks (including their headers),
|
|
@@ -6005,9 +6572,6 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
|
|
|
6005
6572
|
const void* src, size_t srcSize)
|
|
6006
6573
|
{
|
|
6007
6574
|
size_t cSize = 0;
|
|
6008
|
-
U32 lastBlock;
|
|
6009
|
-
size_t blockSize;
|
|
6010
|
-
size_t compressedSeqsSize;
|
|
6011
6575
|
size_t remaining = srcSize;
|
|
6012
6576
|
ZSTD_sequencePosition seqPos = {0, 0, 0};
|
|
6013
6577
|
|
|
@@ -6027,22 +6591,29 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
|
|
|
6027
6591
|
}
|
|
6028
6592
|
|
|
6029
6593
|
while (remaining) {
|
|
6594
|
+
size_t compressedSeqsSize;
|
|
6030
6595
|
size_t cBlockSize;
|
|
6031
6596
|
size_t additionalByteAdjustment;
|
|
6032
|
-
|
|
6033
|
-
|
|
6597
|
+
size_t blockSize = determine_blockSize(cctx->appliedParams.blockDelimiters,
|
|
6598
|
+
cctx->blockSize, remaining,
|
|
6599
|
+
inSeqs, inSeqsSize, seqPos);
|
|
6600
|
+
U32 const lastBlock = (blockSize == remaining);
|
|
6601
|
+
FORWARD_IF_ERROR(blockSize, "Error while trying to determine block size");
|
|
6602
|
+
assert(blockSize <= remaining);
|
|
6034
6603
|
ZSTD_resetSeqStore(&cctx->seqStore);
|
|
6035
|
-
DEBUGLOG(
|
|
6604
|
+
DEBUGLOG(5, "Working on new block. Blocksize: %zu (total:%zu)", blockSize, (ip - (const BYTE*)src) + blockSize);
|
|
6036
6605
|
|
|
6037
|
-
additionalByteAdjustment = sequenceCopier(cctx, &seqPos, inSeqs, inSeqsSize, ip, blockSize);
|
|
6606
|
+
additionalByteAdjustment = sequenceCopier(cctx, &seqPos, inSeqs, inSeqsSize, ip, blockSize, cctx->appliedParams.searchForExternalRepcodes);
|
|
6038
6607
|
FORWARD_IF_ERROR(additionalByteAdjustment, "Bad sequence copy");
|
|
6039
6608
|
blockSize -= additionalByteAdjustment;
|
|
6040
6609
|
|
|
6041
6610
|
/* If blocks are too small, emit as a nocompress block */
|
|
6042
|
-
|
|
6611
|
+
/* TODO: See 3090. We reduced MIN_CBLOCK_SIZE from 3 to 2 so to compensate we are adding
|
|
6612
|
+
* additional 1. We need to revisit and change this logic to be more consistent */
|
|
6613
|
+
if (blockSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1+1) {
|
|
6043
6614
|
cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
|
|
6044
6615
|
FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed");
|
|
6045
|
-
DEBUGLOG(
|
|
6616
|
+
DEBUGLOG(5, "Block too small, writing out nocompress block: cSize: %zu", cBlockSize);
|
|
6046
6617
|
cSize += cBlockSize;
|
|
6047
6618
|
ip += blockSize;
|
|
6048
6619
|
op += cBlockSize;
|
|
@@ -6051,6 +6622,7 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
|
|
|
6051
6622
|
continue;
|
|
6052
6623
|
}
|
|
6053
6624
|
|
|
6625
|
+
RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall, "not enough dstCapacity to write a new compressed block");
|
|
6054
6626
|
compressedSeqsSize = ZSTD_entropyCompressSeqStore(&cctx->seqStore,
|
|
6055
6627
|
&cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy,
|
|
6056
6628
|
&cctx->appliedParams,
|
|
@@ -6059,11 +6631,11 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
|
|
|
6059
6631
|
cctx->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
|
|
6060
6632
|
cctx->bmi2);
|
|
6061
6633
|
FORWARD_IF_ERROR(compressedSeqsSize, "Compressing sequences of block failed");
|
|
6062
|
-
DEBUGLOG(
|
|
6634
|
+
DEBUGLOG(5, "Compressed sequences size: %zu", compressedSeqsSize);
|
|
6063
6635
|
|
|
6064
6636
|
if (!cctx->isFirstBlock &&
|
|
6065
6637
|
ZSTD_maybeRLE(&cctx->seqStore) &&
|
|
6066
|
-
ZSTD_isRLE(
|
|
6638
|
+
ZSTD_isRLE(ip, blockSize)) {
|
|
6067
6639
|
/* We don't want to emit our first block as a RLE even if it qualifies because
|
|
6068
6640
|
* doing so will cause the decoder (cli only) to throw a "should consume all input error."
|
|
6069
6641
|
* This is only an issue for zstd <= v1.4.3
|
|
@@ -6074,12 +6646,12 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
|
|
|
6074
6646
|
if (compressedSeqsSize == 0) {
|
|
6075
6647
|
/* ZSTD_noCompressBlock writes the block header as well */
|
|
6076
6648
|
cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
|
|
6077
|
-
FORWARD_IF_ERROR(cBlockSize, "
|
|
6078
|
-
DEBUGLOG(
|
|
6649
|
+
FORWARD_IF_ERROR(cBlockSize, "ZSTD_noCompressBlock failed");
|
|
6650
|
+
DEBUGLOG(5, "Writing out nocompress block, size: %zu", cBlockSize);
|
|
6079
6651
|
} else if (compressedSeqsSize == 1) {
|
|
6080
6652
|
cBlockSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, blockSize, lastBlock);
|
|
6081
|
-
FORWARD_IF_ERROR(cBlockSize, "
|
|
6082
|
-
DEBUGLOG(
|
|
6653
|
+
FORWARD_IF_ERROR(cBlockSize, "ZSTD_rleCompressBlock failed");
|
|
6654
|
+
DEBUGLOG(5, "Writing out RLE block, size: %zu", cBlockSize);
|
|
6083
6655
|
} else {
|
|
6084
6656
|
U32 cBlockHeader;
|
|
6085
6657
|
/* Error checking and repcodes update */
|
|
@@ -6091,11 +6663,10 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
|
|
|
6091
6663
|
cBlockHeader = lastBlock + (((U32)bt_compressed)<<1) + (U32)(compressedSeqsSize << 3);
|
|
6092
6664
|
MEM_writeLE24(op, cBlockHeader);
|
|
6093
6665
|
cBlockSize = ZSTD_blockHeaderSize + compressedSeqsSize;
|
|
6094
|
-
DEBUGLOG(
|
|
6666
|
+
DEBUGLOG(5, "Writing out compressed block, size: %zu", cBlockSize);
|
|
6095
6667
|
}
|
|
6096
6668
|
|
|
6097
6669
|
cSize += cBlockSize;
|
|
6098
|
-
DEBUGLOG(4, "cSize running total: %zu", cSize);
|
|
6099
6670
|
|
|
6100
6671
|
if (lastBlock) {
|
|
6101
6672
|
break;
|
|
@@ -6106,12 +6677,15 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
|
|
|
6106
6677
|
dstCapacity -= cBlockSize;
|
|
6107
6678
|
cctx->isFirstBlock = 0;
|
|
6108
6679
|
}
|
|
6680
|
+
DEBUGLOG(5, "cSize running total: %zu (remaining dstCapacity=%zu)", cSize, dstCapacity);
|
|
6109
6681
|
}
|
|
6110
6682
|
|
|
6683
|
+
DEBUGLOG(4, "cSize final total: %zu", cSize);
|
|
6111
6684
|
return cSize;
|
|
6112
6685
|
}
|
|
6113
6686
|
|
|
6114
|
-
size_t ZSTD_compressSequences(ZSTD_CCtx*
|
|
6687
|
+
size_t ZSTD_compressSequences(ZSTD_CCtx* cctx,
|
|
6688
|
+
void* dst, size_t dstCapacity,
|
|
6115
6689
|
const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
|
|
6116
6690
|
const void* src, size_t srcSize)
|
|
6117
6691
|
{
|
|
@@ -6121,7 +6695,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstCapaci
|
|
|
6121
6695
|
size_t frameHeaderSize = 0;
|
|
6122
6696
|
|
|
6123
6697
|
/* Transparent initialization stage, same as compressStream2() */
|
|
6124
|
-
DEBUGLOG(
|
|
6698
|
+
DEBUGLOG(4, "ZSTD_compressSequences (dstCapacity=%zu)", dstCapacity);
|
|
6125
6699
|
assert(cctx != NULL);
|
|
6126
6700
|
FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, ZSTD_e_end, srcSize), "CCtx initialization failed");
|
|
6127
6701
|
/* Begin writing output, starting with frame header */
|
|
@@ -6149,26 +6723,34 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstCapaci
|
|
|
6149
6723
|
cSize += 4;
|
|
6150
6724
|
}
|
|
6151
6725
|
|
|
6152
|
-
DEBUGLOG(
|
|
6726
|
+
DEBUGLOG(4, "Final compressed size: %zu", cSize);
|
|
6153
6727
|
return cSize;
|
|
6154
6728
|
}
|
|
6155
6729
|
|
|
6156
6730
|
/*====== Finalize ======*/
|
|
6157
6731
|
|
|
6732
|
+
static ZSTD_inBuffer inBuffer_forEndFlush(const ZSTD_CStream* zcs)
|
|
6733
|
+
{
|
|
6734
|
+
const ZSTD_inBuffer nullInput = { NULL, 0, 0 };
|
|
6735
|
+
const int stableInput = (zcs->appliedParams.inBufferMode == ZSTD_bm_stable);
|
|
6736
|
+
return stableInput ? zcs->expectedInBuffer : nullInput;
|
|
6737
|
+
}
|
|
6738
|
+
|
|
6158
6739
|
/*! ZSTD_flushStream() :
|
|
6159
6740
|
* @return : amount of data remaining to flush */
|
|
6160
6741
|
size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
|
|
6161
6742
|
{
|
|
6162
|
-
ZSTD_inBuffer input =
|
|
6743
|
+
ZSTD_inBuffer input = inBuffer_forEndFlush(zcs);
|
|
6744
|
+
input.size = input.pos; /* do not ingest more input during flush */
|
|
6163
6745
|
return ZSTD_compressStream2(zcs, output, &input, ZSTD_e_flush);
|
|
6164
6746
|
}
|
|
6165
6747
|
|
|
6166
6748
|
|
|
6167
6749
|
size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
|
|
6168
6750
|
{
|
|
6169
|
-
ZSTD_inBuffer input =
|
|
6751
|
+
ZSTD_inBuffer input = inBuffer_forEndFlush(zcs);
|
|
6170
6752
|
size_t const remainingToFlush = ZSTD_compressStream2(zcs, output, &input, ZSTD_e_end);
|
|
6171
|
-
FORWARD_IF_ERROR(
|
|
6753
|
+
FORWARD_IF_ERROR(remainingToFlush , "ZSTD_compressStream2(,,ZSTD_e_end) failed");
|
|
6172
6754
|
if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush; /* minimal estimation */
|
|
6173
6755
|
/* single thread mode : attempt to calculate remaining to flush more precisely */
|
|
6174
6756
|
{ size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE;
|
|
@@ -6290,7 +6872,7 @@ static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel,
|
|
|
6290
6872
|
cp.targetLength = (unsigned)(-clampedCompressionLevel);
|
|
6291
6873
|
}
|
|
6292
6874
|
/* refine parameters based on srcSize & dictSize */
|
|
6293
|
-
return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode);
|
|
6875
|
+
return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode, ZSTD_ps_auto);
|
|
6294
6876
|
}
|
|
6295
6877
|
}
|
|
6296
6878
|
|
|
@@ -6325,3 +6907,21 @@ ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeH
|
|
|
6325
6907
|
if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN;
|
|
6326
6908
|
return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown);
|
|
6327
6909
|
}
|
|
6910
|
+
|
|
6911
|
+
void ZSTD_registerSequenceProducer(
|
|
6912
|
+
ZSTD_CCtx* zc, void* mState,
|
|
6913
|
+
ZSTD_sequenceProducer_F* mFinder
|
|
6914
|
+
) {
|
|
6915
|
+
if (mFinder != NULL) {
|
|
6916
|
+
ZSTD_externalMatchCtx emctx;
|
|
6917
|
+
emctx.mState = mState;
|
|
6918
|
+
emctx.mFinder = mFinder;
|
|
6919
|
+
emctx.seqBuffer = NULL;
|
|
6920
|
+
emctx.seqBufferCapacity = 0;
|
|
6921
|
+
zc->externalMatchCtx = emctx;
|
|
6922
|
+
zc->requestedParams.useSequenceProducer = 1;
|
|
6923
|
+
} else {
|
|
6924
|
+
ZSTD_memset(&zc->externalMatchCtx, 0, sizeof(zc->externalMatchCtx));
|
|
6925
|
+
zc->requestedParams.useSequenceProducer = 0;
|
|
6926
|
+
}
|
|
6927
|
+
}
|