zstd-ruby 1.5.0.0 → 1.5.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +2 -2
- data/README.md +1 -1
- data/ext/zstdruby/extconf.rb +1 -0
- data/ext/zstdruby/libzstd/Makefile +50 -175
- data/ext/zstdruby/libzstd/README.md +7 -1
- data/ext/zstdruby/libzstd/common/bitstream.h +24 -9
- data/ext/zstdruby/libzstd/common/compiler.h +89 -43
- data/ext/zstdruby/libzstd/common/entropy_common.c +11 -5
- data/ext/zstdruby/libzstd/common/error_private.h +79 -0
- data/ext/zstdruby/libzstd/common/fse.h +2 -1
- data/ext/zstdruby/libzstd/common/fse_decompress.c +1 -1
- data/ext/zstdruby/libzstd/common/huf.h +24 -22
- data/ext/zstdruby/libzstd/common/mem.h +18 -0
- data/ext/zstdruby/libzstd/common/portability_macros.h +131 -0
- data/ext/zstdruby/libzstd/common/xxhash.c +5 -805
- data/ext/zstdruby/libzstd/common/xxhash.h +5568 -167
- data/ext/zstdruby/libzstd/common/zstd_internal.h +92 -88
- data/ext/zstdruby/libzstd/common/zstd_trace.h +12 -3
- data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
- data/ext/zstdruby/libzstd/compress/fse_compress.c +63 -27
- data/ext/zstdruby/libzstd/compress/huf_compress.c +537 -104
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +194 -278
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +102 -44
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +4 -3
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +3 -1
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +5 -4
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +3 -2
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +3 -3
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +289 -114
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +302 -123
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +418 -502
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +4 -4
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +4 -1
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +186 -108
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +59 -29
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +727 -189
- data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +571 -0
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +85 -22
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +744 -220
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +8 -2
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +34 -3
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +23 -3
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +9 -2
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +11 -4
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +99 -28
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +2 -6
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +3 -7
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +3 -7
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +3 -7
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +3 -7
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +3 -7
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +3 -7
- data/ext/zstdruby/libzstd/libzstd.mk +185 -0
- data/ext/zstdruby/libzstd/libzstd.pc.in +1 -0
- data/ext/zstdruby/libzstd/modulemap/module.modulemap +4 -0
- data/ext/zstdruby/libzstd/zdict.h +4 -4
- data/ext/zstdruby/libzstd/zstd.h +179 -136
- data/ext/zstdruby/zstdruby.c +2 -2
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +8 -3
@@ -12,7 +12,6 @@
|
|
12
12
|
* Dependencies
|
13
13
|
***************************************/
|
14
14
|
#include "../common/zstd_deps.h" /* INT_MAX, ZSTD_memset, ZSTD_memcpy */
|
15
|
-
#include "../common/cpu.h"
|
16
15
|
#include "../common/mem.h"
|
17
16
|
#include "hist.h" /* HIST_countFast_wksp */
|
18
17
|
#define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */
|
@@ -42,6 +41,18 @@
|
|
42
41
|
# define ZSTD_COMPRESS_HEAPMODE 0
|
43
42
|
#endif
|
44
43
|
|
44
|
+
/*!
|
45
|
+
* ZSTD_HASHLOG3_MAX :
|
46
|
+
* Maximum size of the hash table dedicated to find 3-bytes matches,
|
47
|
+
* in log format, aka 17 => 1 << 17 == 128Ki positions.
|
48
|
+
* This structure is only used in zstd_opt.
|
49
|
+
* Since allocation is centralized for all strategies, it has to be known here.
|
50
|
+
* The actual (selected) size of the hash table is then stored in ZSTD_matchState_t.hashLog3,
|
51
|
+
* so that zstd_opt.c doesn't need to know about this constant.
|
52
|
+
*/
|
53
|
+
#ifndef ZSTD_HASHLOG3_MAX
|
54
|
+
# define ZSTD_HASHLOG3_MAX 17
|
55
|
+
#endif
|
45
56
|
|
46
57
|
/*-*************************************
|
47
58
|
* Helper functions
|
@@ -72,10 +83,10 @@ struct ZSTD_CDict_s {
|
|
72
83
|
ZSTD_customMem customMem;
|
73
84
|
U32 dictID;
|
74
85
|
int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
86
|
+
ZSTD_paramSwitch_e useRowMatchFinder; /* Indicates whether the CDict was created with params that would use
|
87
|
+
* row-based matchfinder. Unless the cdict is reloaded, we will use
|
88
|
+
* the same greedy/lazy matchfinder at compression time.
|
89
|
+
*/
|
79
90
|
}; /* typedef'd to ZSTD_CDict within "zstd.h" */
|
80
91
|
|
81
92
|
ZSTD_CCtx* ZSTD_createCCtx(void)
|
@@ -88,7 +99,7 @@ static void ZSTD_initCCtx(ZSTD_CCtx* cctx, ZSTD_customMem memManager)
|
|
88
99
|
assert(cctx != NULL);
|
89
100
|
ZSTD_memset(cctx, 0, sizeof(*cctx));
|
90
101
|
cctx->customMem = memManager;
|
91
|
-
cctx->bmi2 =
|
102
|
+
cctx->bmi2 = ZSTD_cpuSupportsBmi2();
|
92
103
|
{ size_t const err = ZSTD_CCtx_reset(cctx, ZSTD_reset_parameters);
|
93
104
|
assert(!ZSTD_isError(err));
|
94
105
|
(void)err;
|
@@ -214,35 +225,42 @@ static int ZSTD_rowMatchFinderSupported(const ZSTD_strategy strategy) {
|
|
214
225
|
/* Returns true if the strategy and useRowMatchFinder mode indicate that we will use the row based matchfinder
|
215
226
|
* for this compression.
|
216
227
|
*/
|
217
|
-
static int ZSTD_rowMatchFinderUsed(const ZSTD_strategy strategy, const
|
218
|
-
assert(mode !=
|
219
|
-
return ZSTD_rowMatchFinderSupported(strategy) && (mode ==
|
228
|
+
static int ZSTD_rowMatchFinderUsed(const ZSTD_strategy strategy, const ZSTD_paramSwitch_e mode) {
|
229
|
+
assert(mode != ZSTD_ps_auto);
|
230
|
+
return ZSTD_rowMatchFinderSupported(strategy) && (mode == ZSTD_ps_enable);
|
220
231
|
}
|
221
232
|
|
222
|
-
/* Returns row matchfinder usage
|
223
|
-
static
|
224
|
-
|
225
|
-
#if
|
233
|
+
/* Returns row matchfinder usage given an initial mode and cParams */
|
234
|
+
static ZSTD_paramSwitch_e ZSTD_resolveRowMatchFinderMode(ZSTD_paramSwitch_e mode,
|
235
|
+
const ZSTD_compressionParameters* const cParams) {
|
236
|
+
#if defined(ZSTD_ARCH_X86_SSE2) || defined(ZSTD_ARCH_ARM_NEON)
|
226
237
|
int const kHasSIMD128 = 1;
|
227
238
|
#else
|
228
239
|
int const kHasSIMD128 = 0;
|
229
240
|
#endif
|
230
|
-
if (mode !=
|
231
|
-
mode =
|
241
|
+
if (mode != ZSTD_ps_auto) return mode; /* if requested enabled, but no SIMD, we still will use row matchfinder */
|
242
|
+
mode = ZSTD_ps_disable;
|
232
243
|
if (!ZSTD_rowMatchFinderSupported(cParams->strategy)) return mode;
|
233
244
|
if (kHasSIMD128) {
|
234
|
-
if (cParams->windowLog > 14) mode =
|
245
|
+
if (cParams->windowLog > 14) mode = ZSTD_ps_enable;
|
235
246
|
} else {
|
236
|
-
if (cParams->windowLog > 17) mode =
|
247
|
+
if (cParams->windowLog > 17) mode = ZSTD_ps_enable;
|
237
248
|
}
|
238
249
|
return mode;
|
239
250
|
}
|
240
251
|
|
252
|
+
/* Returns block splitter usage (generally speaking, when using slower/stronger compression modes) */
|
253
|
+
static ZSTD_paramSwitch_e ZSTD_resolveBlockSplitterMode(ZSTD_paramSwitch_e mode,
|
254
|
+
const ZSTD_compressionParameters* const cParams) {
|
255
|
+
if (mode != ZSTD_ps_auto) return mode;
|
256
|
+
return (cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 17) ? ZSTD_ps_enable : ZSTD_ps_disable;
|
257
|
+
}
|
258
|
+
|
241
259
|
/* Returns 1 if the arguments indicate that we should allocate a chainTable, 0 otherwise */
|
242
260
|
static int ZSTD_allocateChainTable(const ZSTD_strategy strategy,
|
243
|
-
const
|
261
|
+
const ZSTD_paramSwitch_e useRowMatchFinder,
|
244
262
|
const U32 forDDSDict) {
|
245
|
-
assert(useRowMatchFinder !=
|
263
|
+
assert(useRowMatchFinder != ZSTD_ps_auto);
|
246
264
|
/* We always should allocate a chaintable if we are allocating a matchstate for a DDS dictionary matchstate.
|
247
265
|
* We do not allocate a chaintable if we are using ZSTD_fast, or are using the row-based matchfinder.
|
248
266
|
*/
|
@@ -253,16 +271,10 @@ static int ZSTD_allocateChainTable(const ZSTD_strategy strategy,
|
|
253
271
|
* enable long distance matching (wlog >= 27, strategy >= btopt).
|
254
272
|
* Returns 0 otherwise.
|
255
273
|
*/
|
256
|
-
static
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
/* Returns 1 if compression parameters are such that we should
|
261
|
-
* enable blockSplitter (wlog >= 17, strategy >= btopt).
|
262
|
-
* Returns 0 otherwise.
|
263
|
-
*/
|
264
|
-
static U32 ZSTD_CParams_useBlockSplitter(const ZSTD_compressionParameters* const cParams) {
|
265
|
-
return cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 17;
|
274
|
+
static ZSTD_paramSwitch_e ZSTD_resolveEnableLdm(ZSTD_paramSwitch_e mode,
|
275
|
+
const ZSTD_compressionParameters* const cParams) {
|
276
|
+
if (mode != ZSTD_ps_auto) return mode;
|
277
|
+
return (cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27) ? ZSTD_ps_enable : ZSTD_ps_disable;
|
266
278
|
}
|
267
279
|
|
268
280
|
static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
|
@@ -274,20 +286,13 @@ static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
|
|
274
286
|
cctxParams.cParams = cParams;
|
275
287
|
|
276
288
|
/* Adjust advanced params according to cParams */
|
277
|
-
|
278
|
-
|
279
|
-
cctxParams.ldmParams.enableLdm = 1;
|
280
|
-
/* LDM is enabled by default for optimal parser and window size >= 128MB */
|
289
|
+
cctxParams.ldmParams.enableLdm = ZSTD_resolveEnableLdm(cctxParams.ldmParams.enableLdm, &cParams);
|
290
|
+
if (cctxParams.ldmParams.enableLdm == ZSTD_ps_enable) {
|
281
291
|
ZSTD_ldm_adjustParameters(&cctxParams.ldmParams, &cParams);
|
282
292
|
assert(cctxParams.ldmParams.hashLog >= cctxParams.ldmParams.bucketSizeLog);
|
283
293
|
assert(cctxParams.ldmParams.hashRateLog < 32);
|
284
294
|
}
|
285
|
-
|
286
|
-
if (ZSTD_CParams_useBlockSplitter(&cParams)) {
|
287
|
-
DEBUGLOG(4, "ZSTD_makeCCtxParamsFromCParams(): Including block splitting into cctx params");
|
288
|
-
cctxParams.splitBlocks = 1;
|
289
|
-
}
|
290
|
-
|
295
|
+
cctxParams.useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams.useBlockSplitter, &cParams);
|
291
296
|
cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams);
|
292
297
|
assert(!ZSTD_checkCParams(cParams));
|
293
298
|
return cctxParams;
|
@@ -348,7 +353,10 @@ static void ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, ZSTD_par
|
|
348
353
|
*/
|
349
354
|
cctxParams->compressionLevel = compressionLevel;
|
350
355
|
cctxParams->useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams->useRowMatchFinder, ¶ms->cParams);
|
351
|
-
|
356
|
+
cctxParams->useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams->useBlockSplitter, ¶ms->cParams);
|
357
|
+
cctxParams->ldmParams.enableLdm = ZSTD_resolveEnableLdm(cctxParams->ldmParams.enableLdm, ¶ms->cParams);
|
358
|
+
DEBUGLOG(4, "ZSTD_CCtxParams_init_internal: useRowMatchFinder=%d, useBlockSplitter=%d ldm=%d",
|
359
|
+
cctxParams->useRowMatchFinder, cctxParams->useBlockSplitter, cctxParams->ldmParams.enableLdm);
|
352
360
|
}
|
353
361
|
|
354
362
|
size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params)
|
@@ -518,9 +526,9 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
|
|
518
526
|
return bounds;
|
519
527
|
|
520
528
|
case ZSTD_c_literalCompressionMode:
|
521
|
-
ZSTD_STATIC_ASSERT(
|
522
|
-
bounds.lowerBound =
|
523
|
-
bounds.upperBound =
|
529
|
+
ZSTD_STATIC_ASSERT(ZSTD_ps_auto < ZSTD_ps_enable && ZSTD_ps_enable < ZSTD_ps_disable);
|
530
|
+
bounds.lowerBound = (int)ZSTD_ps_auto;
|
531
|
+
bounds.upperBound = (int)ZSTD_ps_disable;
|
524
532
|
return bounds;
|
525
533
|
|
526
534
|
case ZSTD_c_targetCBlockSize:
|
@@ -549,14 +557,14 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
|
|
549
557
|
bounds.upperBound = 1;
|
550
558
|
return bounds;
|
551
559
|
|
552
|
-
case
|
553
|
-
bounds.lowerBound =
|
554
|
-
bounds.upperBound =
|
560
|
+
case ZSTD_c_useBlockSplitter:
|
561
|
+
bounds.lowerBound = (int)ZSTD_ps_auto;
|
562
|
+
bounds.upperBound = (int)ZSTD_ps_disable;
|
555
563
|
return bounds;
|
556
564
|
|
557
565
|
case ZSTD_c_useRowMatchFinder:
|
558
|
-
bounds.lowerBound = (int)
|
559
|
-
bounds.upperBound = (int)
|
566
|
+
bounds.lowerBound = (int)ZSTD_ps_auto;
|
567
|
+
bounds.upperBound = (int)ZSTD_ps_disable;
|
560
568
|
return bounds;
|
561
569
|
|
562
570
|
case ZSTD_c_deterministicRefPrefix:
|
@@ -625,7 +633,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
|
|
625
633
|
case ZSTD_c_stableOutBuffer:
|
626
634
|
case ZSTD_c_blockDelimiters:
|
627
635
|
case ZSTD_c_validateSequences:
|
628
|
-
case
|
636
|
+
case ZSTD_c_useBlockSplitter:
|
629
637
|
case ZSTD_c_useRowMatchFinder:
|
630
638
|
case ZSTD_c_deterministicRefPrefix:
|
631
639
|
default:
|
@@ -680,7 +688,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
|
|
680
688
|
case ZSTD_c_stableOutBuffer:
|
681
689
|
case ZSTD_c_blockDelimiters:
|
682
690
|
case ZSTD_c_validateSequences:
|
683
|
-
case
|
691
|
+
case ZSTD_c_useBlockSplitter:
|
684
692
|
case ZSTD_c_useRowMatchFinder:
|
685
693
|
case ZSTD_c_deterministicRefPrefix:
|
686
694
|
break;
|
@@ -780,7 +788,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
|
|
780
788
|
}
|
781
789
|
|
782
790
|
case ZSTD_c_literalCompressionMode : {
|
783
|
-
const
|
791
|
+
const ZSTD_paramSwitch_e lcm = (ZSTD_paramSwitch_e)value;
|
784
792
|
BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm);
|
785
793
|
CCtxParams->literalCompressionMode = lcm;
|
786
794
|
return CCtxParams->literalCompressionMode;
|
@@ -835,7 +843,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
|
|
835
843
|
return CCtxParams->enableDedicatedDictSearch;
|
836
844
|
|
837
845
|
case ZSTD_c_enableLongDistanceMatching :
|
838
|
-
CCtxParams->ldmParams.enableLdm = (value
|
846
|
+
CCtxParams->ldmParams.enableLdm = (ZSTD_paramSwitch_e)value;
|
839
847
|
return CCtxParams->ldmParams.enableLdm;
|
840
848
|
|
841
849
|
case ZSTD_c_ldmHashLog :
|
@@ -857,8 +865,8 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
|
|
857
865
|
return CCtxParams->ldmParams.bucketSizeLog;
|
858
866
|
|
859
867
|
case ZSTD_c_ldmHashRateLog :
|
860
|
-
|
861
|
-
|
868
|
+
if (value!=0) /* 0 ==> default */
|
869
|
+
BOUNDCHECK(ZSTD_c_ldmHashRateLog, value);
|
862
870
|
CCtxParams->ldmParams.hashRateLog = value;
|
863
871
|
return CCtxParams->ldmParams.hashRateLog;
|
864
872
|
|
@@ -894,14 +902,14 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
|
|
894
902
|
CCtxParams->validateSequences = value;
|
895
903
|
return CCtxParams->validateSequences;
|
896
904
|
|
897
|
-
case
|
898
|
-
BOUNDCHECK(
|
899
|
-
CCtxParams->
|
900
|
-
return CCtxParams->
|
905
|
+
case ZSTD_c_useBlockSplitter:
|
906
|
+
BOUNDCHECK(ZSTD_c_useBlockSplitter, value);
|
907
|
+
CCtxParams->useBlockSplitter = (ZSTD_paramSwitch_e)value;
|
908
|
+
return CCtxParams->useBlockSplitter;
|
901
909
|
|
902
910
|
case ZSTD_c_useRowMatchFinder:
|
903
911
|
BOUNDCHECK(ZSTD_c_useRowMatchFinder, value);
|
904
|
-
CCtxParams->useRowMatchFinder = (
|
912
|
+
CCtxParams->useRowMatchFinder = (ZSTD_paramSwitch_e)value;
|
905
913
|
return CCtxParams->useRowMatchFinder;
|
906
914
|
|
907
915
|
case ZSTD_c_deterministicRefPrefix:
|
@@ -1032,8 +1040,8 @@ size_t ZSTD_CCtxParams_getParameter(
|
|
1032
1040
|
case ZSTD_c_validateSequences :
|
1033
1041
|
*value = (int)CCtxParams->validateSequences;
|
1034
1042
|
break;
|
1035
|
-
case
|
1036
|
-
*value = (int)CCtxParams->
|
1043
|
+
case ZSTD_c_useBlockSplitter :
|
1044
|
+
*value = (int)CCtxParams->useBlockSplitter;
|
1037
1045
|
break;
|
1038
1046
|
case ZSTD_c_useRowMatchFinder :
|
1039
1047
|
*value = (int)CCtxParams->useRowMatchFinder;
|
@@ -1067,7 +1075,7 @@ size_t ZSTD_CCtx_setParametersUsingCCtxParams(
|
|
1067
1075
|
return 0;
|
1068
1076
|
}
|
1069
1077
|
|
1070
|
-
|
1078
|
+
size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize)
|
1071
1079
|
{
|
1072
1080
|
DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize);
|
1073
1081
|
RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
|
@@ -1147,14 +1155,14 @@ size_t ZSTD_CCtx_loadDictionary_advanced(
|
|
1147
1155
|
return 0;
|
1148
1156
|
}
|
1149
1157
|
|
1150
|
-
|
1158
|
+
size_t ZSTD_CCtx_loadDictionary_byReference(
|
1151
1159
|
ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
|
1152
1160
|
{
|
1153
1161
|
return ZSTD_CCtx_loadDictionary_advanced(
|
1154
1162
|
cctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto);
|
1155
1163
|
}
|
1156
1164
|
|
1157
|
-
|
1165
|
+
size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
|
1158
1166
|
{
|
1159
1167
|
return ZSTD_CCtx_loadDictionary_advanced(
|
1160
1168
|
cctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto);
|
@@ -1324,7 +1332,7 @@ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
|
|
1324
1332
|
break;
|
1325
1333
|
case ZSTD_cpm_createCDict:
|
1326
1334
|
/* Assume a small source size when creating a dictionary
|
1327
|
-
* with an
|
1335
|
+
* with an unknown source size.
|
1328
1336
|
*/
|
1329
1337
|
if (dictSize && srcSize == ZSTD_CONTENTSIZE_UNKNOWN)
|
1330
1338
|
srcSize = minSrcSize;
|
@@ -1398,7 +1406,7 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
|
|
1398
1406
|
srcSizeHint = CCtxParams->srcSizeHint;
|
1399
1407
|
}
|
1400
1408
|
cParams = ZSTD_getCParams_internal(CCtxParams->compressionLevel, srcSizeHint, dictSize, mode);
|
1401
|
-
if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG;
|
1409
|
+
if (CCtxParams->ldmParams.enableLdm == ZSTD_ps_enable) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG;
|
1402
1410
|
ZSTD_overrideCParams(&cParams, &CCtxParams->cParams);
|
1403
1411
|
assert(!ZSTD_checkCParams(cParams));
|
1404
1412
|
/* srcSizeHint == 0 means 0 */
|
@@ -1407,7 +1415,7 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
|
|
1407
1415
|
|
1408
1416
|
static size_t
|
1409
1417
|
ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
|
1410
|
-
const
|
1418
|
+
const ZSTD_paramSwitch_e useRowMatchFinder,
|
1411
1419
|
const U32 enableDedicatedDictSearch,
|
1412
1420
|
const U32 forCCtx)
|
1413
1421
|
{
|
@@ -1440,7 +1448,7 @@ ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
|
|
1440
1448
|
|
1441
1449
|
/* tables are guaranteed to be sized in multiples of 64 bytes (or 16 uint32_t) */
|
1442
1450
|
ZSTD_STATIC_ASSERT(ZSTD_HASHLOG_MIN >= 4 && ZSTD_WINDOWLOG_MIN >= 4 && ZSTD_CHAINLOG_MIN >= 4);
|
1443
|
-
assert(useRowMatchFinder !=
|
1451
|
+
assert(useRowMatchFinder != ZSTD_ps_auto);
|
1444
1452
|
|
1445
1453
|
DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u",
|
1446
1454
|
(U32)chainSize, (U32)hSize, (U32)h3Size);
|
@@ -1451,12 +1459,12 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
|
|
1451
1459
|
const ZSTD_compressionParameters* cParams,
|
1452
1460
|
const ldmParams_t* ldmParams,
|
1453
1461
|
const int isStatic,
|
1454
|
-
const
|
1462
|
+
const ZSTD_paramSwitch_e useRowMatchFinder,
|
1455
1463
|
const size_t buffInSize,
|
1456
1464
|
const size_t buffOutSize,
|
1457
1465
|
const U64 pledgedSrcSize)
|
1458
1466
|
{
|
1459
|
-
size_t const windowSize =
|
1467
|
+
size_t const windowSize = (size_t) BOUNDED(1ULL, 1ULL << cParams->windowLog, pledgedSrcSize);
|
1460
1468
|
size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
|
1461
1469
|
U32 const divider = (cParams->minMatch==3) ? 3 : 4;
|
1462
1470
|
size_t const maxNbSeq = blockSize / divider;
|
@@ -1469,7 +1477,7 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
|
|
1469
1477
|
|
1470
1478
|
size_t const ldmSpace = ZSTD_ldm_getTableSize(*ldmParams);
|
1471
1479
|
size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(*ldmParams, blockSize);
|
1472
|
-
size_t const ldmSeqSpace = ldmParams->enableLdm ?
|
1480
|
+
size_t const ldmSeqSpace = ldmParams->enableLdm == ZSTD_ps_enable ?
|
1473
1481
|
ZSTD_cwksp_aligned_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0;
|
1474
1482
|
|
1475
1483
|
|
@@ -1496,8 +1504,8 @@ size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params)
|
|
1496
1504
|
{
|
1497
1505
|
ZSTD_compressionParameters const cParams =
|
1498
1506
|
ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
|
1499
|
-
|
1500
|
-
|
1507
|
+
ZSTD_paramSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder,
|
1508
|
+
&cParams);
|
1501
1509
|
|
1502
1510
|
RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
|
1503
1511
|
/* estimateCCtxSize is for one-shot compression. So no buffers should
|
@@ -1514,9 +1522,9 @@ size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams)
|
|
1514
1522
|
/* Pick bigger of not using and using row-based matchfinder for greedy and lazy strategies */
|
1515
1523
|
size_t noRowCCtxSize;
|
1516
1524
|
size_t rowCCtxSize;
|
1517
|
-
initialParams.useRowMatchFinder =
|
1525
|
+
initialParams.useRowMatchFinder = ZSTD_ps_disable;
|
1518
1526
|
noRowCCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams);
|
1519
|
-
initialParams.useRowMatchFinder =
|
1527
|
+
initialParams.useRowMatchFinder = ZSTD_ps_enable;
|
1520
1528
|
rowCCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams);
|
1521
1529
|
return MAX(noRowCCtxSize, rowCCtxSize);
|
1522
1530
|
} else {
|
@@ -1561,7 +1569,7 @@ size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params)
|
|
1561
1569
|
size_t const outBuffSize = (params->outBufferMode == ZSTD_bm_buffered)
|
1562
1570
|
? ZSTD_compressBound(blockSize) + 1
|
1563
1571
|
: 0;
|
1564
|
-
|
1572
|
+
ZSTD_paramSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder, ¶ms->cParams);
|
1565
1573
|
|
1566
1574
|
return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
|
1567
1575
|
&cParams, ¶ms->ldmParams, 1, useRowMatchFinder, inBuffSize, outBuffSize,
|
@@ -1576,9 +1584,9 @@ size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams)
|
|
1576
1584
|
/* Pick bigger of not using and using row-based matchfinder for greedy and lazy strategies */
|
1577
1585
|
size_t noRowCCtxSize;
|
1578
1586
|
size_t rowCCtxSize;
|
1579
|
-
initialParams.useRowMatchFinder =
|
1587
|
+
initialParams.useRowMatchFinder = ZSTD_ps_disable;
|
1580
1588
|
noRowCCtxSize = ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams);
|
1581
|
-
initialParams.useRowMatchFinder =
|
1589
|
+
initialParams.useRowMatchFinder = ZSTD_ps_enable;
|
1582
1590
|
rowCCtxSize = ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams);
|
1583
1591
|
return MAX(noRowCCtxSize, rowCCtxSize);
|
1584
1592
|
} else {
|
@@ -1713,7 +1721,7 @@ static size_t
|
|
1713
1721
|
ZSTD_reset_matchState(ZSTD_matchState_t* ms,
|
1714
1722
|
ZSTD_cwksp* ws,
|
1715
1723
|
const ZSTD_compressionParameters* cParams,
|
1716
|
-
const
|
1724
|
+
const ZSTD_paramSwitch_e useRowMatchFinder,
|
1717
1725
|
const ZSTD_compResetPolicy_e crp,
|
1718
1726
|
const ZSTD_indexResetPolicy_e forceResetIndex,
|
1719
1727
|
const ZSTD_resetTarget_e forWho)
|
@@ -1728,7 +1736,7 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
|
|
1728
1736
|
size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0;
|
1729
1737
|
|
1730
1738
|
DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset);
|
1731
|
-
assert(useRowMatchFinder !=
|
1739
|
+
assert(useRowMatchFinder != ZSTD_ps_auto);
|
1732
1740
|
if (forceResetIndex == ZSTDirp_reset) {
|
1733
1741
|
ZSTD_window_init(&ms->window);
|
1734
1742
|
ZSTD_cwksp_mark_tables_dirty(ws);
|
@@ -1774,8 +1782,8 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
|
|
1774
1782
|
if (ms->tagTable) ZSTD_memset(ms->tagTable, 0, tagTableSize);
|
1775
1783
|
}
|
1776
1784
|
{ /* Switch to 32-entry rows if searchLog is 5 (or more) */
|
1777
|
-
U32 const rowLog = cParams->searchLog
|
1778
|
-
assert(cParams->hashLog
|
1785
|
+
U32 const rowLog = BOUNDED(4, cParams->searchLog, 6);
|
1786
|
+
assert(cParams->hashLog >= rowLog);
|
1779
1787
|
ms->rowHashLog = cParams->hashLog - rowLog;
|
1780
1788
|
}
|
1781
1789
|
}
|
@@ -1824,8 +1832,8 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
|
1824
1832
|
ZSTD_buffered_policy_e const zbuff)
|
1825
1833
|
{
|
1826
1834
|
ZSTD_cwksp* const ws = &zc->workspace;
|
1827
|
-
DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u, useRowMatchFinder=%d",
|
1828
|
-
(U32)pledgedSrcSize, params->cParams.windowLog, (int)params->useRowMatchFinder);
|
1835
|
+
DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u, useRowMatchFinder=%d useBlockSplitter=%d",
|
1836
|
+
(U32)pledgedSrcSize, params->cParams.windowLog, (int)params->useRowMatchFinder, (int)params->useBlockSplitter);
|
1829
1837
|
assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams)));
|
1830
1838
|
|
1831
1839
|
zc->isFirstBlock = 1;
|
@@ -1836,8 +1844,10 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
|
1836
1844
|
zc->appliedParams = *params;
|
1837
1845
|
params = &zc->appliedParams;
|
1838
1846
|
|
1839
|
-
assert(params->useRowMatchFinder !=
|
1840
|
-
|
1847
|
+
assert(params->useRowMatchFinder != ZSTD_ps_auto);
|
1848
|
+
assert(params->useBlockSplitter != ZSTD_ps_auto);
|
1849
|
+
assert(params->ldmParams.enableLdm != ZSTD_ps_auto);
|
1850
|
+
if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
|
1841
1851
|
/* Adjust long distance matching parameters */
|
1842
1852
|
ZSTD_ldm_adjustParameters(&zc->appliedParams.ldmParams, ¶ms->cParams);
|
1843
1853
|
assert(params->ldmParams.hashLog >= params->ldmParams.bucketSizeLog);
|
@@ -1900,7 +1910,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
|
1900
1910
|
zc->blockState.nextCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t));
|
1901
1911
|
RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate nextCBlock");
|
1902
1912
|
zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, ENTROPY_WORKSPACE_SIZE);
|
1903
|
-
RETURN_ERROR_IF(zc->
|
1913
|
+
RETURN_ERROR_IF(zc->entropyWorkspace == NULL, memory_allocation, "couldn't allocate entropyWorkspace");
|
1904
1914
|
} }
|
1905
1915
|
|
1906
1916
|
ZSTD_cwksp_clear(ws);
|
@@ -1937,7 +1947,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
|
1937
1947
|
zc->outBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffOutSize);
|
1938
1948
|
|
1939
1949
|
/* ldm bucketOffsets table */
|
1940
|
-
if (params->ldmParams.enableLdm) {
|
1950
|
+
if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
|
1941
1951
|
/* TODO: avoid memset? */
|
1942
1952
|
size_t const numBuckets =
|
1943
1953
|
((size_t)1) << (params->ldmParams.hashLog -
|
@@ -1964,7 +1974,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
|
1964
1974
|
ZSTD_resetTarget_CCtx), "");
|
1965
1975
|
|
1966
1976
|
/* ldm hash table */
|
1967
|
-
if (params->ldmParams.enableLdm) {
|
1977
|
+
if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
|
1968
1978
|
/* TODO: avoid memset? */
|
1969
1979
|
size_t const ldmHSize = ((size_t)1) << params->ldmParams.hashLog;
|
1970
1980
|
zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t));
|
@@ -1976,8 +1986,8 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
|
1976
1986
|
zc->ldmState.loadedDictEnd = 0;
|
1977
1987
|
}
|
1978
1988
|
|
1979
|
-
assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace, resizeWorkspace));
|
1980
1989
|
DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws));
|
1990
|
+
assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace, resizeWorkspace));
|
1981
1991
|
|
1982
1992
|
zc->initialized = 1;
|
1983
1993
|
|
@@ -2115,7 +2125,7 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
|
|
2115
2125
|
}
|
2116
2126
|
|
2117
2127
|
ZSTD_cwksp_mark_tables_dirty(&cctx->workspace);
|
2118
|
-
assert(params.useRowMatchFinder !=
|
2128
|
+
assert(params.useRowMatchFinder != ZSTD_ps_auto);
|
2119
2129
|
|
2120
2130
|
/* copy tables */
|
2121
2131
|
{ size_t const chainSize = ZSTD_allocateChainTable(cdict_cParams->strategy, cdict->useRowMatchFinder, 0 /* DDS guaranteed disabled */)
|
@@ -2209,8 +2219,12 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
|
|
2209
2219
|
{ ZSTD_CCtx_params params = dstCCtx->requestedParams;
|
2210
2220
|
/* Copy only compression parameters related to tables. */
|
2211
2221
|
params.cParams = srcCCtx->appliedParams.cParams;
|
2212
|
-
assert(srcCCtx->appliedParams.useRowMatchFinder !=
|
2222
|
+
assert(srcCCtx->appliedParams.useRowMatchFinder != ZSTD_ps_auto);
|
2223
|
+
assert(srcCCtx->appliedParams.useBlockSplitter != ZSTD_ps_auto);
|
2224
|
+
assert(srcCCtx->appliedParams.ldmParams.enableLdm != ZSTD_ps_auto);
|
2213
2225
|
params.useRowMatchFinder = srcCCtx->appliedParams.useRowMatchFinder;
|
2226
|
+
params.useBlockSplitter = srcCCtx->appliedParams.useBlockSplitter;
|
2227
|
+
params.ldmParams = srcCCtx->appliedParams.ldmParams;
|
2214
2228
|
params.fParams = fParams;
|
2215
2229
|
ZSTD_resetCCtx_internal(dstCCtx, ¶ms, pledgedSrcSize,
|
2216
2230
|
/* loadedDictSize */ 0,
|
@@ -2296,6 +2310,8 @@ ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerVa
|
|
2296
2310
|
int const nbRows = (int)size / ZSTD_ROWSIZE;
|
2297
2311
|
int cellNb = 0;
|
2298
2312
|
int rowNb;
|
2313
|
+
/* Protect special index values < ZSTD_WINDOW_START_INDEX. */
|
2314
|
+
U32 const reducerThreshold = reducerValue + ZSTD_WINDOW_START_INDEX;
|
2299
2315
|
assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */
|
2300
2316
|
assert(size < (1U<<31)); /* can be casted to int */
|
2301
2317
|
|
@@ -2315,12 +2331,17 @@ ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerVa
|
|
2315
2331
|
for (rowNb=0 ; rowNb < nbRows ; rowNb++) {
|
2316
2332
|
int column;
|
2317
2333
|
for (column=0; column<ZSTD_ROWSIZE; column++) {
|
2318
|
-
|
2319
|
-
|
2320
|
-
|
2334
|
+
U32 newVal;
|
2335
|
+
if (preserveMark && table[cellNb] == ZSTD_DUBT_UNSORTED_MARK) {
|
2336
|
+
/* This write is pointless, but is required(?) for the compiler
|
2337
|
+
* to auto-vectorize the loop. */
|
2338
|
+
newVal = ZSTD_DUBT_UNSORTED_MARK;
|
2339
|
+
} else if (table[cellNb] < reducerThreshold) {
|
2340
|
+
newVal = 0;
|
2341
|
+
} else {
|
2342
|
+
newVal = table[cellNb] - reducerValue;
|
2321
2343
|
}
|
2322
|
-
|
2323
|
-
else table[cellNb] -= reducerValue;
|
2344
|
+
table[cellNb] = newVal;
|
2324
2345
|
cellNb++;
|
2325
2346
|
} }
|
2326
2347
|
}
|
@@ -2399,11 +2420,13 @@ static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams)
|
|
2399
2420
|
/* ZSTD_blockSplitterEnabled():
|
2400
2421
|
* Returns if block splitting param is being used
|
2401
2422
|
* If used, compression will do best effort to split a block in order to improve compression ratio.
|
2423
|
+
* At the time this function is called, the parameter must be finalized.
|
2402
2424
|
* Returns 1 if true, 0 otherwise. */
|
2403
2425
|
static int ZSTD_blockSplitterEnabled(ZSTD_CCtx_params* cctxParams)
|
2404
2426
|
{
|
2405
|
-
DEBUGLOG(5, "ZSTD_blockSplitterEnabled(
|
2406
|
-
|
2427
|
+
DEBUGLOG(5, "ZSTD_blockSplitterEnabled (useBlockSplitter=%d)", cctxParams->useBlockSplitter);
|
2428
|
+
assert(cctxParams->useBlockSplitter != ZSTD_ps_auto);
|
2429
|
+
return (cctxParams->useBlockSplitter == ZSTD_ps_enable);
|
2407
2430
|
}
|
2408
2431
|
|
2409
2432
|
/* Type returned by ZSTD_buildSequencesStatistics containing finalized symbol encoding types
|
@@ -2546,6 +2569,7 @@ ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq,
|
|
2546
2569
|
* compresses both literals and sequences
|
2547
2570
|
* Returns compressed size of block, or a zstd error.
|
2548
2571
|
*/
|
2572
|
+
#define SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO 20
|
2549
2573
|
MEM_STATIC size_t
|
2550
2574
|
ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr,
|
2551
2575
|
const ZSTD_entropyCTables_t* prevEntropy,
|
@@ -2580,15 +2604,19 @@ ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr,
|
|
2580
2604
|
|
2581
2605
|
/* Compress literals */
|
2582
2606
|
{ const BYTE* const literals = seqStorePtr->litStart;
|
2607
|
+
size_t const numSequences = seqStorePtr->sequences - seqStorePtr->sequencesStart;
|
2608
|
+
size_t const numLiterals = seqStorePtr->lit - seqStorePtr->litStart;
|
2609
|
+
/* Base suspicion of uncompressibility on ratio of literals to sequences */
|
2610
|
+
unsigned const suspectUncompressible = (numSequences == 0) || (numLiterals / numSequences >= SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO);
|
2583
2611
|
size_t const litSize = (size_t)(seqStorePtr->lit - literals);
|
2584
2612
|
size_t const cSize = ZSTD_compressLiterals(
|
2585
2613
|
&prevEntropy->huf, &nextEntropy->huf,
|
2586
2614
|
cctxParams->cParams.strategy,
|
2587
|
-
|
2615
|
+
ZSTD_literalsCompressionIsDisabled(cctxParams),
|
2588
2616
|
op, dstCapacity,
|
2589
2617
|
literals, litSize,
|
2590
2618
|
entropyWorkspace, entropyWkspSize,
|
2591
|
-
bmi2);
|
2619
|
+
bmi2, suspectUncompressible);
|
2592
2620
|
FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed");
|
2593
2621
|
assert(cSize <= dstCapacity);
|
2594
2622
|
op += cSize;
|
@@ -2693,7 +2721,7 @@ ZSTD_entropyCompressSeqStore(seqStore_t* seqStorePtr,
|
|
2693
2721
|
/* ZSTD_selectBlockCompressor() :
|
2694
2722
|
* Not static, but internal use only (used by long distance matcher)
|
2695
2723
|
* assumption : strat is a valid strategy */
|
2696
|
-
ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat,
|
2724
|
+
ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramSwitch_e useRowMatchFinder, ZSTD_dictMode_e dictMode)
|
2697
2725
|
{
|
2698
2726
|
static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = {
|
2699
2727
|
{ ZSTD_compressBlock_fast /* default for 0 */,
|
@@ -2758,7 +2786,7 @@ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_useRow
|
|
2758
2786
|
ZSTD_compressBlock_lazy2_dedicatedDictSearch_row }
|
2759
2787
|
};
|
2760
2788
|
DEBUGLOG(4, "Selecting a row-based matchfinder");
|
2761
|
-
assert(useRowMatchFinder !=
|
2789
|
+
assert(useRowMatchFinder != ZSTD_ps_auto);
|
2762
2790
|
selectedCompressor = rowBasedBlockCompressors[(int)dictMode][(int)strat - (int)ZSTD_greedy];
|
2763
2791
|
} else {
|
2764
2792
|
selectedCompressor = blockCompressor[(int)dictMode][(int)strat];
|
@@ -2825,7 +2853,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
|
|
2825
2853
|
zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i];
|
2826
2854
|
}
|
2827
2855
|
if (zc->externSeqStore.pos < zc->externSeqStore.size) {
|
2828
|
-
assert(
|
2856
|
+
assert(zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_disable);
|
2829
2857
|
/* Updates ldmSeqStore.pos */
|
2830
2858
|
lastLLSize =
|
2831
2859
|
ZSTD_ldm_blockCompress(&zc->externSeqStore,
|
@@ -2834,7 +2862,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
|
|
2834
2862
|
zc->appliedParams.useRowMatchFinder,
|
2835
2863
|
src, srcSize);
|
2836
2864
|
assert(zc->externSeqStore.pos <= zc->externSeqStore.size);
|
2837
|
-
} else if (zc->appliedParams.ldmParams.enableLdm) {
|
2865
|
+
} else if (zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable) {
|
2838
2866
|
rawSeqStore_t ldmSeqStore = kNullRawSeqStore;
|
2839
2867
|
|
2840
2868
|
ldmSeqStore.seq = zc->ldmSequences;
|
@@ -3027,7 +3055,7 @@ static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSi
|
|
3027
3055
|
const ZSTD_hufCTables_t* prevHuf,
|
3028
3056
|
ZSTD_hufCTables_t* nextHuf,
|
3029
3057
|
ZSTD_hufCTablesMetadata_t* hufMetadata,
|
3030
|
-
const int
|
3058
|
+
const int literalsCompressionIsDisabled,
|
3031
3059
|
void* workspace, size_t wkspSize)
|
3032
3060
|
{
|
3033
3061
|
BYTE* const wkspStart = (BYTE*)workspace;
|
@@ -3045,7 +3073,7 @@ static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSi
|
|
3045
3073
|
/* Prepare nextEntropy assuming reusing the existing table */
|
3046
3074
|
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
3047
3075
|
|
3048
|
-
if (
|
3076
|
+
if (literalsCompressionIsDisabled) {
|
3049
3077
|
DEBUGLOG(5, "set_basic - disabled");
|
3050
3078
|
hufMetadata->hType = set_basic;
|
3051
3079
|
return 0;
|
@@ -3192,7 +3220,7 @@ size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
|
|
3192
3220
|
ZSTD_buildBlockEntropyStats_literals(seqStorePtr->litStart, litSize,
|
3193
3221
|
&prevEntropy->huf, &nextEntropy->huf,
|
3194
3222
|
&entropyMetadata->hufMetadata,
|
3195
|
-
|
3223
|
+
ZSTD_literalsCompressionIsDisabled(cctxParams),
|
3196
3224
|
workspace, wkspSize);
|
3197
3225
|
FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildBlockEntropyStats_literals failed");
|
3198
3226
|
entropyMetadata->fseMetadata.fseTablesSize =
|
@@ -3235,7 +3263,7 @@ static size_t ZSTD_estimateBlockSize_literal(const BYTE* literals, size_t litSiz
|
|
3235
3263
|
static size_t ZSTD_estimateBlockSize_symbolType(symbolEncodingType_e type,
|
3236
3264
|
const BYTE* codeTable, size_t nbSeq, unsigned maxCode,
|
3237
3265
|
const FSE_CTable* fseCTable,
|
3238
|
-
const
|
3266
|
+
const U8* additionalBits,
|
3239
3267
|
short const* defaultNorm, U32 defaultNormLog, U32 defaultMax,
|
3240
3268
|
void* workspace, size_t wkspSize)
|
3241
3269
|
{
|
@@ -3319,19 +3347,20 @@ static size_t ZSTD_estimateBlockSize(const BYTE* literals, size_t litSize,
|
|
3319
3347
|
*
|
3320
3348
|
* Returns the estimated compressed size of the seqStore, or a zstd error.
|
3321
3349
|
*/
|
3322
|
-
static size_t ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore_t* seqStore,
|
3323
|
-
ZSTD_entropyCTablesMetadata_t entropyMetadata;
|
3350
|
+
static size_t ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore_t* seqStore, ZSTD_CCtx* zc) {
|
3351
|
+
ZSTD_entropyCTablesMetadata_t* entropyMetadata = &zc->blockSplitCtx.entropyMetadata;
|
3352
|
+
DEBUGLOG(6, "ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize()");
|
3324
3353
|
FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(seqStore,
|
3325
3354
|
&zc->blockState.prevCBlock->entropy,
|
3326
3355
|
&zc->blockState.nextCBlock->entropy,
|
3327
3356
|
&zc->appliedParams,
|
3328
|
-
|
3357
|
+
entropyMetadata,
|
3329
3358
|
zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), "");
|
3330
3359
|
return ZSTD_estimateBlockSize(seqStore->litStart, (size_t)(seqStore->lit - seqStore->litStart),
|
3331
3360
|
seqStore->ofCode, seqStore->llCode, seqStore->mlCode,
|
3332
3361
|
(size_t)(seqStore->sequences - seqStore->sequencesStart),
|
3333
|
-
&zc->blockState.nextCBlock->entropy,
|
3334
|
-
(int)(entropyMetadata
|
3362
|
+
&zc->blockState.nextCBlock->entropy, entropyMetadata, zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE,
|
3363
|
+
(int)(entropyMetadata->hufMetadata.hType == set_compressed), 1);
|
3335
3364
|
}
|
3336
3365
|
|
3337
3366
|
/* Returns literals bytes represented in a seqStore */
|
@@ -3474,6 +3503,7 @@ static size_t ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, seqStore_t* const
|
|
3474
3503
|
if (isPartition)
|
3475
3504
|
ZSTD_seqStore_resolveOffCodes(dRep, cRep, seqStore, (U32)(seqStore->sequences - seqStore->sequencesStart));
|
3476
3505
|
|
3506
|
+
RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall, "Block header doesn't fit");
|
3477
3507
|
cSeqsSize = ZSTD_entropyCompressSeqStore(seqStore,
|
3478
3508
|
&zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,
|
3479
3509
|
&zc->appliedParams,
|
@@ -3499,9 +3529,6 @@ static size_t ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, seqStore_t* const
|
|
3499
3529
|
return 0;
|
3500
3530
|
}
|
3501
3531
|
|
3502
|
-
if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
|
3503
|
-
zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
|
3504
|
-
|
3505
3532
|
if (cSeqsSize == 0) {
|
3506
3533
|
cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock);
|
3507
3534
|
FORWARD_IF_ERROR(cSize, "Nocompress block failed");
|
@@ -3518,6 +3545,10 @@ static size_t ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, seqStore_t* const
|
|
3518
3545
|
cSize = ZSTD_blockHeaderSize + cSeqsSize;
|
3519
3546
|
DEBUGLOG(4, "Writing out compressed block, size: %zu", cSize);
|
3520
3547
|
}
|
3548
|
+
|
3549
|
+
if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
|
3550
|
+
zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
|
3551
|
+
|
3521
3552
|
return cSize;
|
3522
3553
|
}
|
3523
3554
|
|
@@ -3528,7 +3559,6 @@ typedef struct {
|
|
3528
3559
|
} seqStoreSplits;
|
3529
3560
|
|
3530
3561
|
#define MIN_SEQUENCES_BLOCK_SPLITTING 300
|
3531
|
-
#define MAX_NB_SPLITS 196
|
3532
3562
|
|
3533
3563
|
/* Helper function to perform the recursive search for block splits.
|
3534
3564
|
* Estimates the cost of seqStore prior to split, and estimates the cost of splitting the sequences in half.
|
@@ -3539,29 +3569,31 @@ typedef struct {
|
|
3539
3569
|
* In theory, this means the absolute largest recursion depth is 10 == log2(maxNbSeqInBlock/MIN_SEQUENCES_BLOCK_SPLITTING).
|
3540
3570
|
* In practice, recursion depth usually doesn't go beyond 4.
|
3541
3571
|
*
|
3542
|
-
* Furthermore, the number of splits is capped by
|
3572
|
+
* Furthermore, the number of splits is capped by ZSTD_MAX_NB_BLOCK_SPLITS. At ZSTD_MAX_NB_BLOCK_SPLITS == 196 with the current existing blockSize
|
3543
3573
|
* maximum of 128 KB, this value is actually impossible to reach.
|
3544
3574
|
*/
|
3545
3575
|
static void ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx, size_t endIdx,
|
3546
|
-
|
3547
|
-
seqStore_t fullSeqStoreChunk;
|
3548
|
-
seqStore_t firstHalfSeqStore;
|
3549
|
-
seqStore_t secondHalfSeqStore;
|
3576
|
+
ZSTD_CCtx* zc, const seqStore_t* origSeqStore) {
|
3577
|
+
seqStore_t* fullSeqStoreChunk = &zc->blockSplitCtx.fullSeqStoreChunk;
|
3578
|
+
seqStore_t* firstHalfSeqStore = &zc->blockSplitCtx.firstHalfSeqStore;
|
3579
|
+
seqStore_t* secondHalfSeqStore = &zc->blockSplitCtx.secondHalfSeqStore;
|
3550
3580
|
size_t estimatedOriginalSize;
|
3551
3581
|
size_t estimatedFirstHalfSize;
|
3552
3582
|
size_t estimatedSecondHalfSize;
|
3553
3583
|
size_t midIdx = (startIdx + endIdx)/2;
|
3554
3584
|
|
3555
|
-
if (endIdx - startIdx < MIN_SEQUENCES_BLOCK_SPLITTING || splits->idx >=
|
3585
|
+
if (endIdx - startIdx < MIN_SEQUENCES_BLOCK_SPLITTING || splits->idx >= ZSTD_MAX_NB_BLOCK_SPLITS) {
|
3586
|
+
DEBUGLOG(6, "ZSTD_deriveBlockSplitsHelper: Too few sequences");
|
3556
3587
|
return;
|
3557
3588
|
}
|
3558
|
-
|
3559
|
-
ZSTD_deriveSeqStoreChunk(
|
3560
|
-
ZSTD_deriveSeqStoreChunk(
|
3561
|
-
|
3562
|
-
|
3563
|
-
|
3564
|
-
|
3589
|
+
DEBUGLOG(4, "ZSTD_deriveBlockSplitsHelper: startIdx=%zu endIdx=%zu", startIdx, endIdx);
|
3590
|
+
ZSTD_deriveSeqStoreChunk(fullSeqStoreChunk, origSeqStore, startIdx, endIdx);
|
3591
|
+
ZSTD_deriveSeqStoreChunk(firstHalfSeqStore, origSeqStore, startIdx, midIdx);
|
3592
|
+
ZSTD_deriveSeqStoreChunk(secondHalfSeqStore, origSeqStore, midIdx, endIdx);
|
3593
|
+
estimatedOriginalSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(fullSeqStoreChunk, zc);
|
3594
|
+
estimatedFirstHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(firstHalfSeqStore, zc);
|
3595
|
+
estimatedSecondHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(secondHalfSeqStore, zc);
|
3596
|
+
DEBUGLOG(4, "Estimated original block size: %zu -- First half split: %zu -- Second half split: %zu",
|
3565
3597
|
estimatedOriginalSize, estimatedFirstHalfSize, estimatedSecondHalfSize);
|
3566
3598
|
if (ZSTD_isError(estimatedOriginalSize) || ZSTD_isError(estimatedFirstHalfSize) || ZSTD_isError(estimatedSecondHalfSize)) {
|
3567
3599
|
return;
|
@@ -3601,12 +3633,12 @@ static size_t ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, s
|
|
3601
3633
|
size_t cSize = 0;
|
3602
3634
|
const BYTE* ip = (const BYTE*)src;
|
3603
3635
|
BYTE* op = (BYTE*)dst;
|
3604
|
-
U32 partitions[MAX_NB_SPLITS];
|
3605
3636
|
size_t i = 0;
|
3606
3637
|
size_t srcBytesTotal = 0;
|
3638
|
+
U32* partitions = zc->blockSplitCtx.partitions; /* size == ZSTD_MAX_NB_BLOCK_SPLITS */
|
3639
|
+
seqStore_t* nextSeqStore = &zc->blockSplitCtx.nextSeqStore;
|
3640
|
+
seqStore_t* currSeqStore = &zc->blockSplitCtx.currSeqStore;
|
3607
3641
|
size_t numSplits = ZSTD_deriveBlockSplits(zc, partitions, nbSeq);
|
3608
|
-
seqStore_t nextSeqStore;
|
3609
|
-
seqStore_t currSeqStore;
|
3610
3642
|
|
3611
3643
|
/* If a block is split and some partitions are emitted as RLE/uncompressed, then repcode history
|
3612
3644
|
* may become invalid. In order to reconcile potentially invalid repcodes, we keep track of two
|
@@ -3626,6 +3658,7 @@ static size_t ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, s
|
|
3626
3658
|
repcodes_t cRep;
|
3627
3659
|
ZSTD_memcpy(dRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
|
3628
3660
|
ZSTD_memcpy(cRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
|
3661
|
+
ZSTD_memset(nextSeqStore, 0, sizeof(seqStore_t));
|
3629
3662
|
|
3630
3663
|
DEBUGLOG(4, "ZSTD_compressBlock_splitBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
|
3631
3664
|
(unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit,
|
@@ -3643,36 +3676,36 @@ static size_t ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, s
|
|
3643
3676
|
return cSizeSingleBlock;
|
3644
3677
|
}
|
3645
3678
|
|
3646
|
-
ZSTD_deriveSeqStoreChunk(
|
3679
|
+
ZSTD_deriveSeqStoreChunk(currSeqStore, &zc->seqStore, 0, partitions[0]);
|
3647
3680
|
for (i = 0; i <= numSplits; ++i) {
|
3648
3681
|
size_t srcBytes;
|
3649
3682
|
size_t cSizeChunk;
|
3650
3683
|
U32 const lastPartition = (i == numSplits);
|
3651
3684
|
U32 lastBlockEntireSrc = 0;
|
3652
3685
|
|
3653
|
-
srcBytes = ZSTD_countSeqStoreLiteralsBytes(
|
3686
|
+
srcBytes = ZSTD_countSeqStoreLiteralsBytes(currSeqStore) + ZSTD_countSeqStoreMatchBytes(currSeqStore);
|
3654
3687
|
srcBytesTotal += srcBytes;
|
3655
3688
|
if (lastPartition) {
|
3656
3689
|
/* This is the final partition, need to account for possible last literals */
|
3657
3690
|
srcBytes += blockSize - srcBytesTotal;
|
3658
3691
|
lastBlockEntireSrc = lastBlock;
|
3659
3692
|
} else {
|
3660
|
-
ZSTD_deriveSeqStoreChunk(
|
3693
|
+
ZSTD_deriveSeqStoreChunk(nextSeqStore, &zc->seqStore, partitions[i], partitions[i+1]);
|
3661
3694
|
}
|
3662
3695
|
|
3663
|
-
cSizeChunk = ZSTD_compressSeqStore_singleBlock(zc,
|
3696
|
+
cSizeChunk = ZSTD_compressSeqStore_singleBlock(zc, currSeqStore,
|
3664
3697
|
&dRep, &cRep,
|
3665
3698
|
op, dstCapacity,
|
3666
3699
|
ip, srcBytes,
|
3667
3700
|
lastBlockEntireSrc, 1 /* isPartition */);
|
3668
|
-
DEBUGLOG(5, "Estimated size: %zu actual size: %zu", ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(
|
3701
|
+
DEBUGLOG(5, "Estimated size: %zu actual size: %zu", ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(currSeqStore, zc), cSizeChunk);
|
3669
3702
|
FORWARD_IF_ERROR(cSizeChunk, "Compressing chunk failed!");
|
3670
3703
|
|
3671
3704
|
ip += srcBytes;
|
3672
3705
|
op += cSizeChunk;
|
3673
3706
|
dstCapacity -= cSizeChunk;
|
3674
3707
|
cSize += cSizeChunk;
|
3675
|
-
currSeqStore = nextSeqStore;
|
3708
|
+
*currSeqStore = *nextSeqStore;
|
3676
3709
|
assert(cSizeChunk <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize);
|
3677
3710
|
}
|
3678
3711
|
/* cRep and dRep may have diverged during the compression. If so, we use the dRep repcodes
|
@@ -3690,6 +3723,7 @@ static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc,
|
|
3690
3723
|
U32 nbSeq;
|
3691
3724
|
size_t cSize;
|
3692
3725
|
DEBUGLOG(4, "ZSTD_compressBlock_splitBlock");
|
3726
|
+
assert(zc->appliedParams.useBlockSplitter == ZSTD_ps_enable);
|
3693
3727
|
|
3694
3728
|
{ const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
|
3695
3729
|
FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");
|
@@ -3704,7 +3738,6 @@ static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc,
|
|
3704
3738
|
nbSeq = (U32)(zc->seqStore.sequences - zc->seqStore.sequencesStart);
|
3705
3739
|
}
|
3706
3740
|
|
3707
|
-
assert(zc->appliedParams.splitBlocks == 1);
|
3708
3741
|
cSize = ZSTD_compressBlock_splitBlock_internal(zc, dst, dstCapacity, src, srcSize, lastBlock, nbSeq);
|
3709
3742
|
FORWARD_IF_ERROR(cSize, "Splitting blocks failed!");
|
3710
3743
|
return cSize;
|
@@ -3746,12 +3779,6 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
|
|
3746
3779
|
zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
|
3747
3780
|
zc->bmi2);
|
3748
3781
|
|
3749
|
-
if (zc->seqCollector.collectSequences) {
|
3750
|
-
ZSTD_copyBlockSequences(zc);
|
3751
|
-
return 0;
|
3752
|
-
}
|
3753
|
-
|
3754
|
-
|
3755
3782
|
if (frame &&
|
3756
3783
|
/* We don't want to emit our first block as a RLE even if it qualifies because
|
3757
3784
|
* doing so will cause the decoder (cli only) to throw a "should consume all input error."
|
@@ -3915,6 +3942,7 @@ static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx,
|
|
3915
3942
|
ZSTD_overflowCorrectIfNeeded(
|
3916
3943
|
ms, &cctx->workspace, &cctx->appliedParams, ip, ip + blockSize);
|
3917
3944
|
ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);
|
3945
|
+
ZSTD_window_enforceMaxDist(&ms->window, ip, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);
|
3918
3946
|
|
3919
3947
|
/* Ensure hash/chain table insertion resumes no sooner than lowlimit */
|
3920
3948
|
if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit;
|
@@ -3991,7 +4019,9 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
|
|
3991
4019
|
if (!singleSegment) op[pos++] = windowLogByte;
|
3992
4020
|
switch(dictIDSizeCode)
|
3993
4021
|
{
|
3994
|
-
default:
|
4022
|
+
default:
|
4023
|
+
assert(0); /* impossible */
|
4024
|
+
ZSTD_FALLTHROUGH;
|
3995
4025
|
case 0 : break;
|
3996
4026
|
case 1 : op[pos] = (BYTE)(dictID); pos++; break;
|
3997
4027
|
case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break;
|
@@ -3999,7 +4029,9 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
|
|
3999
4029
|
}
|
4000
4030
|
switch(fcsCode)
|
4001
4031
|
{
|
4002
|
-
default:
|
4032
|
+
default:
|
4033
|
+
assert(0); /* impossible */
|
4034
|
+
ZSTD_FALLTHROUGH;
|
4003
4035
|
case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break;
|
4004
4036
|
case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break;
|
4005
4037
|
case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break;
|
@@ -4047,7 +4079,7 @@ size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSe
|
|
4047
4079
|
{
|
4048
4080
|
RETURN_ERROR_IF(cctx->stage != ZSTDcs_init, stage_wrong,
|
4049
4081
|
"wrong cctx stage");
|
4050
|
-
RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm,
|
4082
|
+
RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable,
|
4051
4083
|
parameter_unsupported,
|
4052
4084
|
"incompatible with ldm");
|
4053
4085
|
cctx->externSeqStore.seq = seq;
|
@@ -4088,7 +4120,7 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
|
|
4088
4120
|
ms->forceNonContiguous = 0;
|
4089
4121
|
ms->nextToUpdate = ms->window.dictLimit;
|
4090
4122
|
}
|
4091
|
-
if (cctx->appliedParams.ldmParams.enableLdm) {
|
4123
|
+
if (cctx->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable) {
|
4092
4124
|
ZSTD_window_update(&cctx->ldmState.window, src, srcSize, /* forceNonContiguous */ 0);
|
4093
4125
|
}
|
4094
4126
|
|
@@ -4157,7 +4189,7 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
|
|
4157
4189
|
{
|
4158
4190
|
const BYTE* ip = (const BYTE*) src;
|
4159
4191
|
const BYTE* const iend = ip + srcSize;
|
4160
|
-
int const loadLdmDict = params->ldmParams.enableLdm && ls != NULL;
|
4192
|
+
int const loadLdmDict = params->ldmParams.enableLdm == ZSTD_ps_enable && ls != NULL;
|
4161
4193
|
|
4162
4194
|
/* Assert that we the ms params match the params we're being given */
|
4163
4195
|
ZSTD_assertEqualCParams(params->cParams, ms->cParams);
|
@@ -4214,8 +4246,8 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
|
|
4214
4246
|
assert(ms->chainTable != NULL);
|
4215
4247
|
ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, iend-HASH_READ_SIZE);
|
4216
4248
|
} else {
|
4217
|
-
assert(params->useRowMatchFinder !=
|
4218
|
-
if (params->useRowMatchFinder ==
|
4249
|
+
assert(params->useRowMatchFinder != ZSTD_ps_auto);
|
4250
|
+
if (params->useRowMatchFinder == ZSTD_ps_enable) {
|
4219
4251
|
size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog) * sizeof(U16);
|
4220
4252
|
ZSTD_memset(ms->tagTable, 0, tagTableSize);
|
4221
4253
|
ZSTD_row_update(ms, iend-HASH_READ_SIZE);
|
@@ -4715,7 +4747,7 @@ size_t ZSTD_estimateCDictSize_advanced(
|
|
4715
4747
|
+ ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE)
|
4716
4748
|
/* enableDedicatedDictSearch == 1 ensures that CDict estimation will not be too small
|
4717
4749
|
* in case we are using DDS with row-hash. */
|
4718
|
-
+ ZSTD_sizeof_matchState(&cParams, ZSTD_resolveRowMatchFinderMode(
|
4750
|
+
+ ZSTD_sizeof_matchState(&cParams, ZSTD_resolveRowMatchFinderMode(ZSTD_ps_auto, &cParams),
|
4719
4751
|
/* enableDedicatedDictSearch */ 1, /* forCCtx */ 0)
|
4720
4752
|
+ (dictLoadMethod == ZSTD_dlm_byRef ? 0
|
4721
4753
|
: ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void *))));
|
@@ -4792,7 +4824,7 @@ static size_t ZSTD_initCDict_internal(
|
|
4792
4824
|
static ZSTD_CDict* ZSTD_createCDict_advanced_internal(size_t dictSize,
|
4793
4825
|
ZSTD_dictLoadMethod_e dictLoadMethod,
|
4794
4826
|
ZSTD_compressionParameters cParams,
|
4795
|
-
|
4827
|
+
ZSTD_paramSwitch_e useRowMatchFinder,
|
4796
4828
|
U32 enableDedicatedDictSearch,
|
4797
4829
|
ZSTD_customMem customMem)
|
4798
4830
|
{
|
@@ -4842,7 +4874,7 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize,
|
|
4842
4874
|
&cctxParams, customMem);
|
4843
4875
|
}
|
4844
4876
|
|
4845
|
-
|
4877
|
+
ZSTD_CDict* ZSTD_createCDict_advanced2(
|
4846
4878
|
const void* dict, size_t dictSize,
|
4847
4879
|
ZSTD_dictLoadMethod_e dictLoadMethod,
|
4848
4880
|
ZSTD_dictContentType_e dictContentType,
|
@@ -4947,7 +4979,7 @@ const ZSTD_CDict* ZSTD_initStaticCDict(
|
|
4947
4979
|
ZSTD_dictContentType_e dictContentType,
|
4948
4980
|
ZSTD_compressionParameters cParams)
|
4949
4981
|
{
|
4950
|
-
|
4982
|
+
ZSTD_paramSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(ZSTD_ps_auto, &cParams);
|
4951
4983
|
/* enableDedicatedDictSearch == 1 ensures matchstate is not too small in case this CDict will be used for DDS + row hash */
|
4952
4984
|
size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 1, /* forCCtx */ 0);
|
4953
4985
|
size_t const neededSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict))
|
@@ -5403,7 +5435,7 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
|
|
5403
5435
|
zcs->outBuffFlushedSize = 0;
|
5404
5436
|
zcs->streamStage = zcss_flush; /* pass-through to flush stage */
|
5405
5437
|
}
|
5406
|
-
|
5438
|
+
ZSTD_FALLTHROUGH;
|
5407
5439
|
case zcss_flush:
|
5408
5440
|
DEBUGLOG(5, "flush stage");
|
5409
5441
|
assert(zcs->appliedParams.outBufferMode == ZSTD_bm_buffered);
|
@@ -5524,17 +5556,8 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
|
|
5524
5556
|
dictSize, mode);
|
5525
5557
|
}
|
5526
5558
|
|
5527
|
-
|
5528
|
-
|
5529
|
-
DEBUGLOG(4, "LDM enabled by default (window size >= 128MB, strategy >= btopt)");
|
5530
|
-
params.ldmParams.enableLdm = 1;
|
5531
|
-
}
|
5532
|
-
|
5533
|
-
if (ZSTD_CParams_useBlockSplitter(¶ms.cParams)) {
|
5534
|
-
DEBUGLOG(4, "Block splitter enabled by default (window size >= 128K, strategy >= btopt)");
|
5535
|
-
params.splitBlocks = 1;
|
5536
|
-
}
|
5537
|
-
|
5559
|
+
params.useBlockSplitter = ZSTD_resolveBlockSplitterMode(params.useBlockSplitter, ¶ms.cParams);
|
5560
|
+
params.ldmParams.enableLdm = ZSTD_resolveEnableLdm(params.ldmParams.enableLdm, ¶ms.cParams);
|
5538
5561
|
params.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params.useRowMatchFinder, ¶ms.cParams);
|
5539
5562
|
|
5540
5563
|
#ifdef ZSTD_MULTITHREAD
|
@@ -6140,119 +6163,12 @@ size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
|
|
6140
6163
|
|
6141
6164
|
|
6142
6165
|
/*-===== Pre-defined compression levels =====-*/
|
6166
|
+
#include "clevels.h"
|
6143
6167
|
|
6144
|
-
#define ZSTD_MAX_CLEVEL 22
|
6145
6168
|
int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; }
|
6146
6169
|
int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX; }
|
6147
6170
|
int ZSTD_defaultCLevel(void) { return ZSTD_CLEVEL_DEFAULT; }
|
6148
6171
|
|
6149
|
-
static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
|
6150
|
-
{ /* "default" - for any srcSize > 256 KB */
|
6151
|
-
/* W, C, H, S, L, TL, strat */
|
6152
|
-
{ 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */
|
6153
|
-
{ 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */
|
6154
|
-
{ 20, 15, 16, 1, 6, 0, ZSTD_fast }, /* level 2 */
|
6155
|
-
{ 21, 16, 17, 1, 5, 0, ZSTD_dfast }, /* level 3 */
|
6156
|
-
{ 21, 18, 18, 1, 5, 0, ZSTD_dfast }, /* level 4 */
|
6157
|
-
{ 21, 18, 19, 2, 5, 2, ZSTD_greedy }, /* level 5 */
|
6158
|
-
{ 21, 19, 19, 3, 5, 4, ZSTD_greedy }, /* level 6 */
|
6159
|
-
{ 21, 19, 19, 3, 5, 8, ZSTD_lazy }, /* level 7 */
|
6160
|
-
{ 21, 19, 19, 3, 5, 16, ZSTD_lazy2 }, /* level 8 */
|
6161
|
-
{ 21, 19, 20, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */
|
6162
|
-
{ 22, 20, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */
|
6163
|
-
{ 22, 21, 22, 4, 5, 16, ZSTD_lazy2 }, /* level 11 */
|
6164
|
-
{ 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */
|
6165
|
-
{ 22, 21, 22, 5, 5, 32, ZSTD_btlazy2 }, /* level 13 */
|
6166
|
-
{ 22, 22, 23, 5, 5, 32, ZSTD_btlazy2 }, /* level 14 */
|
6167
|
-
{ 22, 23, 23, 6, 5, 32, ZSTD_btlazy2 }, /* level 15 */
|
6168
|
-
{ 22, 22, 22, 5, 5, 48, ZSTD_btopt }, /* level 16 */
|
6169
|
-
{ 23, 23, 22, 5, 4, 64, ZSTD_btopt }, /* level 17 */
|
6170
|
-
{ 23, 23, 22, 6, 3, 64, ZSTD_btultra }, /* level 18 */
|
6171
|
-
{ 23, 24, 22, 7, 3,256, ZSTD_btultra2}, /* level 19 */
|
6172
|
-
{ 25, 25, 23, 7, 3,256, ZSTD_btultra2}, /* level 20 */
|
6173
|
-
{ 26, 26, 24, 7, 3,512, ZSTD_btultra2}, /* level 21 */
|
6174
|
-
{ 27, 27, 25, 9, 3,999, ZSTD_btultra2}, /* level 22 */
|
6175
|
-
},
|
6176
|
-
{ /* for srcSize <= 256 KB */
|
6177
|
-
/* W, C, H, S, L, T, strat */
|
6178
|
-
{ 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
|
6179
|
-
{ 18, 13, 14, 1, 6, 0, ZSTD_fast }, /* level 1 */
|
6180
|
-
{ 18, 14, 14, 1, 5, 0, ZSTD_dfast }, /* level 2 */
|
6181
|
-
{ 18, 16, 16, 1, 4, 0, ZSTD_dfast }, /* level 3 */
|
6182
|
-
{ 18, 16, 17, 2, 5, 2, ZSTD_greedy }, /* level 4.*/
|
6183
|
-
{ 18, 18, 18, 3, 5, 2, ZSTD_greedy }, /* level 5.*/
|
6184
|
-
{ 18, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6.*/
|
6185
|
-
{ 18, 18, 19, 4, 4, 4, ZSTD_lazy }, /* level 7 */
|
6186
|
-
{ 18, 18, 19, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
|
6187
|
-
{ 18, 18, 19, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
|
6188
|
-
{ 18, 18, 19, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
|
6189
|
-
{ 18, 18, 19, 5, 4, 12, ZSTD_btlazy2 }, /* level 11.*/
|
6190
|
-
{ 18, 19, 19, 7, 4, 12, ZSTD_btlazy2 }, /* level 12.*/
|
6191
|
-
{ 18, 18, 19, 4, 4, 16, ZSTD_btopt }, /* level 13 */
|
6192
|
-
{ 18, 18, 19, 4, 3, 32, ZSTD_btopt }, /* level 14.*/
|
6193
|
-
{ 18, 18, 19, 6, 3,128, ZSTD_btopt }, /* level 15.*/
|
6194
|
-
{ 18, 19, 19, 6, 3,128, ZSTD_btultra }, /* level 16.*/
|
6195
|
-
{ 18, 19, 19, 8, 3,256, ZSTD_btultra }, /* level 17.*/
|
6196
|
-
{ 18, 19, 19, 6, 3,128, ZSTD_btultra2}, /* level 18.*/
|
6197
|
-
{ 18, 19, 19, 8, 3,256, ZSTD_btultra2}, /* level 19.*/
|
6198
|
-
{ 18, 19, 19, 10, 3,512, ZSTD_btultra2}, /* level 20.*/
|
6199
|
-
{ 18, 19, 19, 12, 3,512, ZSTD_btultra2}, /* level 21.*/
|
6200
|
-
{ 18, 19, 19, 13, 3,999, ZSTD_btultra2}, /* level 22.*/
|
6201
|
-
},
|
6202
|
-
{ /* for srcSize <= 128 KB */
|
6203
|
-
/* W, C, H, S, L, T, strat */
|
6204
|
-
{ 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
|
6205
|
-
{ 17, 12, 13, 1, 6, 0, ZSTD_fast }, /* level 1 */
|
6206
|
-
{ 17, 13, 15, 1, 5, 0, ZSTD_fast }, /* level 2 */
|
6207
|
-
{ 17, 15, 16, 2, 5, 0, ZSTD_dfast }, /* level 3 */
|
6208
|
-
{ 17, 17, 17, 2, 4, 0, ZSTD_dfast }, /* level 4 */
|
6209
|
-
{ 17, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */
|
6210
|
-
{ 17, 17, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */
|
6211
|
-
{ 17, 17, 17, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */
|
6212
|
-
{ 17, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
|
6213
|
-
{ 17, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
|
6214
|
-
{ 17, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
|
6215
|
-
{ 17, 17, 17, 5, 4, 8, ZSTD_btlazy2 }, /* level 11 */
|
6216
|
-
{ 17, 18, 17, 7, 4, 12, ZSTD_btlazy2 }, /* level 12 */
|
6217
|
-
{ 17, 18, 17, 3, 4, 12, ZSTD_btopt }, /* level 13.*/
|
6218
|
-
{ 17, 18, 17, 4, 3, 32, ZSTD_btopt }, /* level 14.*/
|
6219
|
-
{ 17, 18, 17, 6, 3,256, ZSTD_btopt }, /* level 15.*/
|
6220
|
-
{ 17, 18, 17, 6, 3,128, ZSTD_btultra }, /* level 16.*/
|
6221
|
-
{ 17, 18, 17, 8, 3,256, ZSTD_btultra }, /* level 17.*/
|
6222
|
-
{ 17, 18, 17, 10, 3,512, ZSTD_btultra }, /* level 18.*/
|
6223
|
-
{ 17, 18, 17, 5, 3,256, ZSTD_btultra2}, /* level 19.*/
|
6224
|
-
{ 17, 18, 17, 7, 3,512, ZSTD_btultra2}, /* level 20.*/
|
6225
|
-
{ 17, 18, 17, 9, 3,512, ZSTD_btultra2}, /* level 21.*/
|
6226
|
-
{ 17, 18, 17, 11, 3,999, ZSTD_btultra2}, /* level 22.*/
|
6227
|
-
},
|
6228
|
-
{ /* for srcSize <= 16 KB */
|
6229
|
-
/* W, C, H, S, L, T, strat */
|
6230
|
-
{ 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
|
6231
|
-
{ 14, 14, 15, 1, 5, 0, ZSTD_fast }, /* level 1 */
|
6232
|
-
{ 14, 14, 15, 1, 4, 0, ZSTD_fast }, /* level 2 */
|
6233
|
-
{ 14, 14, 15, 2, 4, 0, ZSTD_dfast }, /* level 3 */
|
6234
|
-
{ 14, 14, 14, 4, 4, 2, ZSTD_greedy }, /* level 4 */
|
6235
|
-
{ 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 5.*/
|
6236
|
-
{ 14, 14, 14, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */
|
6237
|
-
{ 14, 14, 14, 6, 4, 8, ZSTD_lazy2 }, /* level 7 */
|
6238
|
-
{ 14, 14, 14, 8, 4, 8, ZSTD_lazy2 }, /* level 8.*/
|
6239
|
-
{ 14, 15, 14, 5, 4, 8, ZSTD_btlazy2 }, /* level 9.*/
|
6240
|
-
{ 14, 15, 14, 9, 4, 8, ZSTD_btlazy2 }, /* level 10.*/
|
6241
|
-
{ 14, 15, 14, 3, 4, 12, ZSTD_btopt }, /* level 11.*/
|
6242
|
-
{ 14, 15, 14, 4, 3, 24, ZSTD_btopt }, /* level 12.*/
|
6243
|
-
{ 14, 15, 14, 5, 3, 32, ZSTD_btultra }, /* level 13.*/
|
6244
|
-
{ 14, 15, 15, 6, 3, 64, ZSTD_btultra }, /* level 14.*/
|
6245
|
-
{ 14, 15, 15, 7, 3,256, ZSTD_btultra }, /* level 15.*/
|
6246
|
-
{ 14, 15, 15, 5, 3, 48, ZSTD_btultra2}, /* level 16.*/
|
6247
|
-
{ 14, 15, 15, 6, 3,128, ZSTD_btultra2}, /* level 17.*/
|
6248
|
-
{ 14, 15, 15, 7, 3,256, ZSTD_btultra2}, /* level 18.*/
|
6249
|
-
{ 14, 15, 15, 8, 3,256, ZSTD_btultra2}, /* level 19.*/
|
6250
|
-
{ 14, 15, 15, 8, 3,512, ZSTD_btultra2}, /* level 20.*/
|
6251
|
-
{ 14, 15, 15, 9, 3,512, ZSTD_btultra2}, /* level 21.*/
|
6252
|
-
{ 14, 15, 15, 10, 3,999, ZSTD_btultra2}, /* level 22.*/
|
6253
|
-
},
|
6254
|
-
};
|
6255
|
-
|
6256
6172
|
static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(int const compressionLevel, size_t const dictSize)
|
6257
6173
|
{
|
6258
6174
|
ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, 0, dictSize, ZSTD_cpm_createCDict);
|