zstdlib 0.12.0-x86_64-darwin → 0.13.0-x86_64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +7 -0
- data/Rakefile +1 -1
- data/ext/zstdlib_c/extconf.rb +1 -1
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/allocations.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/bitstream.h +49 -29
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/compiler.h +114 -22
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/cpu.h +36 -0
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/debug.c +6 -0
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/debug.h +20 -11
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/error_private.h +45 -36
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/fse.h +3 -2
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/fse_decompress.c +19 -17
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/huf.h +14 -1
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/mem.h +0 -9
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/pool.c +1 -1
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/pool.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/portability_macros.h +2 -0
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/threading.c +8 -2
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/xxhash.c +5 -11
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/xxhash.h +2341 -1007
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/zstd_internal.h +5 -5
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/fse_compress.c +8 -7
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/huf_compress.c +54 -25
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_compress.c +282 -161
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_compress_internal.h +29 -27
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_compress_superblock.c +224 -113
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_cwksp.h +19 -13
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_double_fast.c +17 -5
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_double_fast.h +11 -0
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_fast.c +14 -6
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_lazy.c +129 -87
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_lazy.h +103 -28
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_ldm.c +8 -2
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_opt.c +216 -112
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_opt.h +31 -7
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstdmt_compress.c +94 -79
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/decompress/huf_decompress.c +188 -126
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/decompress/huf_decompress_amd64.S +38 -19
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/decompress/zstd_decompress.c +84 -32
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/decompress/zstd_decompress_block.c +231 -208
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/decompress/zstd_decompress_block.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/decompress/zstd_decompress_internal.h +2 -0
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/zstd.h +129 -60
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/zlibWrapper/gzclose.c +1 -3
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/zlibWrapper/gzlib.c +20 -73
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/zlibWrapper/gzread.c +17 -58
- data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/zlibWrapper/gzwrite.c +18 -58
- data/lib/2.4/zstdlib_c.bundle +0 -0
- data/lib/2.5/zstdlib_c.bundle +0 -0
- data/lib/2.6/zstdlib_c.bundle +0 -0
- data/lib/2.7/zstdlib_c.bundle +0 -0
- data/lib/3.0/zstdlib_c.bundle +0 -0
- data/lib/3.1/zstdlib_c.bundle +0 -0
- data/lib/3.2/zstdlib_c.bundle +0 -0
- data/lib/3.3/zstdlib_c.bundle +0 -0
- metadata +75 -75
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/bits.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/entropy_common.c +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/error_private.c +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/threading.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/zstd_common.c +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/zstd_deps.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/common/zstd_trace.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/clevels.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/hist.c +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/hist.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_compress_literals.c +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_compress_literals.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_compress_sequences.c +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_compress_sequences.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_compress_superblock.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_fast.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_ldm.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstd_ldm_geartab.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/compress/zstdmt_compress.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/decompress/zstd_ddict.c +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/decompress/zstd_ddict.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/zdict.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/lib/zstd_errors.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/zlibWrapper/gzcompatibility.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/zlibWrapper/gzguts.h +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/zlibWrapper/zstd_zlibwrapper.c +0 -0
- /data/ext/zstdlib_c/{zstd-1.5.5 → zstd-1.5.6}/zlibWrapper/zstd_zlibwrapper.h +0 -0
@@ -39,7 +39,7 @@ extern "C" {
|
|
39
39
|
It's not a big deal though : candidate will just be sorted again.
|
40
40
|
Additionally, candidate position 1 will be lost.
|
41
41
|
But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
|
42
|
-
The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table
|
42
|
+
The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table reuse with a different strategy.
|
43
43
|
This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
|
44
44
|
|
45
45
|
|
@@ -159,23 +159,24 @@ typedef struct {
|
|
159
159
|
UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0};
|
160
160
|
|
161
161
|
typedef struct {
|
162
|
-
int price;
|
163
|
-
U32 off;
|
164
|
-
U32 mlen;
|
165
|
-
U32 litlen;
|
166
|
-
U32 rep[ZSTD_REP_NUM];
|
162
|
+
int price; /* price from beginning of segment to this position */
|
163
|
+
U32 off; /* offset of previous match */
|
164
|
+
U32 mlen; /* length of previous match */
|
165
|
+
U32 litlen; /* nb of literals since previous match */
|
166
|
+
U32 rep[ZSTD_REP_NUM]; /* offset history after previous match */
|
167
167
|
} ZSTD_optimal_t;
|
168
168
|
|
169
169
|
typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e;
|
170
170
|
|
171
|
+
#define ZSTD_OPT_SIZE (ZSTD_OPT_NUM+3)
|
171
172
|
typedef struct {
|
172
173
|
/* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */
|
173
174
|
unsigned* litFreq; /* table of literals statistics, of size 256 */
|
174
175
|
unsigned* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */
|
175
176
|
unsigned* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */
|
176
177
|
unsigned* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */
|
177
|
-
ZSTD_match_t* matchTable; /* list of found matches, of size
|
178
|
-
ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size
|
178
|
+
ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_SIZE */
|
179
|
+
ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_SIZE */
|
179
180
|
|
180
181
|
U32 litSum; /* nb of literals */
|
181
182
|
U32 litLengthSum; /* nb of litLength codes */
|
@@ -228,7 +229,7 @@ struct ZSTD_matchState_t {
|
|
228
229
|
U32 rowHashLog; /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/
|
229
230
|
BYTE* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */
|
230
231
|
U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */
|
231
|
-
U64 hashSalt; /* For row-based matchFinder: salts the hash for
|
232
|
+
U64 hashSalt; /* For row-based matchFinder: salts the hash for reuse of tag table */
|
232
233
|
U32 hashSaltEntropy; /* For row-based matchFinder: collects entropy for salt generation */
|
233
234
|
|
234
235
|
U32* hashTable;
|
@@ -360,10 +361,11 @@ struct ZSTD_CCtx_params_s {
|
|
360
361
|
* if the external matchfinder returns an error code. */
|
361
362
|
int enableMatchFinderFallback;
|
362
363
|
|
363
|
-
/*
|
364
|
-
* Users
|
365
|
-
* It is set
|
366
|
-
|
364
|
+
/* Parameters for the external sequence producer API.
|
365
|
+
* Users set these parameters through ZSTD_registerSequenceProducer().
|
366
|
+
* It is not possible to set these parameters individually through the public API. */
|
367
|
+
void* extSeqProdState;
|
368
|
+
ZSTD_sequenceProducer_F extSeqProdFunc;
|
367
369
|
|
368
370
|
/* Adjust the max block size*/
|
369
371
|
size_t maxBlockSize;
|
@@ -401,14 +403,6 @@ typedef struct {
|
|
401
403
|
ZSTD_entropyCTablesMetadata_t entropyMetadata;
|
402
404
|
} ZSTD_blockSplitCtx;
|
403
405
|
|
404
|
-
/* Context for block-level external matchfinder API */
|
405
|
-
typedef struct {
|
406
|
-
void* mState;
|
407
|
-
ZSTD_sequenceProducer_F* mFinder;
|
408
|
-
ZSTD_Sequence* seqBuffer;
|
409
|
-
size_t seqBufferCapacity;
|
410
|
-
} ZSTD_externalMatchCtx;
|
411
|
-
|
412
406
|
struct ZSTD_CCtx_s {
|
413
407
|
ZSTD_compressionStage_e stage;
|
414
408
|
int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
|
@@ -479,8 +473,9 @@ struct ZSTD_CCtx_s {
|
|
479
473
|
/* Workspace for block splitter */
|
480
474
|
ZSTD_blockSplitCtx blockSplitCtx;
|
481
475
|
|
482
|
-
/*
|
483
|
-
|
476
|
+
/* Buffer for output from external sequence producer */
|
477
|
+
ZSTD_Sequence* extSeqBuf;
|
478
|
+
size_t extSeqBufCapacity;
|
484
479
|
};
|
485
480
|
|
486
481
|
typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
|
@@ -1053,7 +1048,9 @@ MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
|
|
1053
1048
|
* The least significant cycleLog bits of the indices must remain the same,
|
1054
1049
|
* which may be 0. Every index up to maxDist in the past must be valid.
|
1055
1050
|
*/
|
1056
|
-
MEM_STATIC
|
1051
|
+
MEM_STATIC
|
1052
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
1053
|
+
U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
|
1057
1054
|
U32 maxDist, void const* src)
|
1058
1055
|
{
|
1059
1056
|
/* preemptive overflow correction:
|
@@ -1246,7 +1243,9 @@ MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
|
|
1246
1243
|
* forget about the extDict. Handles overlap of the prefix and extDict.
|
1247
1244
|
* Returns non-zero if the segment is contiguous.
|
1248
1245
|
*/
|
1249
|
-
MEM_STATIC
|
1246
|
+
MEM_STATIC
|
1247
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
1248
|
+
U32 ZSTD_window_update(ZSTD_window_t* window,
|
1250
1249
|
void const* src, size_t srcSize,
|
1251
1250
|
int forceNonContiguous)
|
1252
1251
|
{
|
@@ -1467,11 +1466,10 @@ size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
|
|
1467
1466
|
* This cannot be used when long range matching is enabled.
|
1468
1467
|
* Zstd will use these sequences, and pass the literals to a secondary block
|
1469
1468
|
* compressor.
|
1470
|
-
* @return : An error code on failure.
|
1471
1469
|
* NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory
|
1472
1470
|
* access and data corruption.
|
1473
1471
|
*/
|
1474
|
-
|
1472
|
+
void ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
|
1475
1473
|
|
1476
1474
|
/** ZSTD_cycleLog() :
|
1477
1475
|
* condition for correct operation : hashLog > 1 */
|
@@ -1509,6 +1507,10 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition*
|
|
1509
1507
|
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
|
1510
1508
|
const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch);
|
1511
1509
|
|
1510
|
+
/* Returns 1 if an external sequence producer is registered, otherwise returns 0. */
|
1511
|
+
MEM_STATIC int ZSTD_hasExtSeqProd(const ZSTD_CCtx_params* params) {
|
1512
|
+
return params->extSeqProdFunc != NULL;
|
1513
|
+
}
|
1512
1514
|
|
1513
1515
|
/* ===============================================================
|
1514
1516
|
* Deprecated definitions that are still used internally to avoid
|
@@ -76,8 +76,8 @@ ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
|
|
76
76
|
}
|
77
77
|
|
78
78
|
{ int const flags = bmi2 ? HUF_flags_bmi2 : 0;
|
79
|
-
const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable, flags)
|
80
|
-
: HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable, flags);
|
79
|
+
const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, (size_t)(oend-op), literals, litSize, hufTable, flags)
|
80
|
+
: HUF_compress4X_usingCTable(op, (size_t)(oend-op), literals, litSize, hufTable, flags);
|
81
81
|
op += cSize;
|
82
82
|
cLitSize += cSize;
|
83
83
|
if (cSize == 0 || ERR_isError(cSize)) {
|
@@ -102,7 +102,7 @@ ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
|
|
102
102
|
switch(lhSize)
|
103
103
|
{
|
104
104
|
case 3: /* 2 - 2 - 10 - 10 */
|
105
|
-
{ U32 const lhc = hType + ((!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14);
|
105
|
+
{ U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14);
|
106
106
|
MEM_writeLE24(ostart, lhc);
|
107
107
|
break;
|
108
108
|
}
|
@@ -122,30 +122,30 @@ ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
|
|
122
122
|
}
|
123
123
|
*entropyWritten = 1;
|
124
124
|
DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)litSize, (U32)(op-ostart));
|
125
|
-
return op-ostart;
|
125
|
+
return (size_t)(op-ostart);
|
126
126
|
}
|
127
127
|
|
128
128
|
static size_t
|
129
129
|
ZSTD_seqDecompressedSize(seqStore_t const* seqStore,
|
130
|
-
const seqDef* sequences, size_t
|
131
|
-
size_t litSize, int
|
130
|
+
const seqDef* sequences, size_t nbSeqs,
|
131
|
+
size_t litSize, int lastSubBlock)
|
132
132
|
{
|
133
|
-
const seqDef* const sstart = sequences;
|
134
|
-
const seqDef* const send = sequences + nbSeq;
|
135
|
-
const seqDef* sp = sstart;
|
136
133
|
size_t matchLengthSum = 0;
|
137
134
|
size_t litLengthSum = 0;
|
138
|
-
|
139
|
-
|
140
|
-
ZSTD_sequenceLength
|
135
|
+
size_t n;
|
136
|
+
for (n=0; n<nbSeqs; n++) {
|
137
|
+
const ZSTD_sequenceLength seqLen = ZSTD_getSequenceLength(seqStore, sequences+n);
|
141
138
|
litLengthSum += seqLen.litLength;
|
142
139
|
matchLengthSum += seqLen.matchLength;
|
143
|
-
sp++;
|
144
140
|
}
|
145
|
-
|
146
|
-
|
141
|
+
DEBUGLOG(5, "ZSTD_seqDecompressedSize: %u sequences from %p: %u literals + %u matchlength",
|
142
|
+
(unsigned)nbSeqs, (const void*)sequences,
|
143
|
+
(unsigned)litLengthSum, (unsigned)matchLengthSum);
|
144
|
+
if (!lastSubBlock)
|
147
145
|
assert(litLengthSum == litSize);
|
148
|
-
|
146
|
+
else
|
147
|
+
assert(litLengthSum <= litSize);
|
148
|
+
(void)litLengthSum;
|
149
149
|
return matchLengthSum + litSize;
|
150
150
|
}
|
151
151
|
|
@@ -180,14 +180,14 @@ ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
|
|
180
180
|
/* Sequences Header */
|
181
181
|
RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/,
|
182
182
|
dstSize_tooSmall, "");
|
183
|
-
if (nbSeq <
|
183
|
+
if (nbSeq < 128)
|
184
184
|
*op++ = (BYTE)nbSeq;
|
185
185
|
else if (nbSeq < LONGNBSEQ)
|
186
186
|
op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
|
187
187
|
else
|
188
188
|
op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
|
189
189
|
if (nbSeq==0) {
|
190
|
-
return op - ostart;
|
190
|
+
return (size_t)(op - ostart);
|
191
191
|
}
|
192
192
|
|
193
193
|
/* seqHead : flags for FSE encoding type */
|
@@ -209,7 +209,7 @@ ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
|
|
209
209
|
}
|
210
210
|
|
211
211
|
{ size_t const bitstreamSize = ZSTD_encodeSequences(
|
212
|
-
op, oend - op,
|
212
|
+
op, (size_t)(oend - op),
|
213
213
|
fseTables->matchlengthCTable, mlCode,
|
214
214
|
fseTables->offcodeCTable, ofCode,
|
215
215
|
fseTables->litlengthCTable, llCode,
|
@@ -253,7 +253,7 @@ ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
|
|
253
253
|
#endif
|
254
254
|
|
255
255
|
*entropyWritten = 1;
|
256
|
-
return op - ostart;
|
256
|
+
return (size_t)(op - ostart);
|
257
257
|
}
|
258
258
|
|
259
259
|
/** ZSTD_compressSubBlock() :
|
@@ -279,7 +279,8 @@ static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy,
|
|
279
279
|
litSize, nbSeq, writeLitEntropy, writeSeqEntropy, lastBlock);
|
280
280
|
{ size_t cLitSize = ZSTD_compressSubBlock_literal((const HUF_CElt*)entropy->huf.CTable,
|
281
281
|
&entropyMetadata->hufMetadata, literals, litSize,
|
282
|
-
op, oend-op,
|
282
|
+
op, (size_t)(oend-op),
|
283
|
+
bmi2, writeLitEntropy, litEntropyWritten);
|
283
284
|
FORWARD_IF_ERROR(cLitSize, "ZSTD_compressSubBlock_literal failed");
|
284
285
|
if (cLitSize == 0) return 0;
|
285
286
|
op += cLitSize;
|
@@ -289,18 +290,18 @@ static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy,
|
|
289
290
|
sequences, nbSeq,
|
290
291
|
llCode, mlCode, ofCode,
|
291
292
|
cctxParams,
|
292
|
-
op, oend-op,
|
293
|
+
op, (size_t)(oend-op),
|
293
294
|
bmi2, writeSeqEntropy, seqEntropyWritten);
|
294
295
|
FORWARD_IF_ERROR(cSeqSize, "ZSTD_compressSubBlock_sequences failed");
|
295
296
|
if (cSeqSize == 0) return 0;
|
296
297
|
op += cSeqSize;
|
297
298
|
}
|
298
299
|
/* Write block header */
|
299
|
-
{ size_t cSize = (op-ostart)-ZSTD_blockHeaderSize;
|
300
|
+
{ size_t cSize = (size_t)(op-ostart) - ZSTD_blockHeaderSize;
|
300
301
|
U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
|
301
302
|
MEM_writeLE24(ostart, cBlockHeader24);
|
302
303
|
}
|
303
|
-
return op-ostart;
|
304
|
+
return (size_t)(op-ostart);
|
304
305
|
}
|
305
306
|
|
306
307
|
static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize,
|
@@ -389,7 +390,11 @@ static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable,
|
|
389
390
|
return cSeqSizeEstimate + sequencesSectionHeaderSize;
|
390
391
|
}
|
391
392
|
|
392
|
-
|
393
|
+
typedef struct {
|
394
|
+
size_t estLitSize;
|
395
|
+
size_t estBlockSize;
|
396
|
+
} EstimatedBlockSize;
|
397
|
+
static EstimatedBlockSize ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
|
393
398
|
const BYTE* ofCodeTable,
|
394
399
|
const BYTE* llCodeTable,
|
395
400
|
const BYTE* mlCodeTable,
|
@@ -397,15 +402,17 @@ static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
|
|
397
402
|
const ZSTD_entropyCTables_t* entropy,
|
398
403
|
const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
|
399
404
|
void* workspace, size_t wkspSize,
|
400
|
-
int writeLitEntropy, int writeSeqEntropy)
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
405
|
+
int writeLitEntropy, int writeSeqEntropy)
|
406
|
+
{
|
407
|
+
EstimatedBlockSize ebs;
|
408
|
+
ebs.estLitSize = ZSTD_estimateSubBlockSize_literal(literals, litSize,
|
409
|
+
&entropy->huf, &entropyMetadata->hufMetadata,
|
410
|
+
workspace, wkspSize, writeLitEntropy);
|
411
|
+
ebs.estBlockSize = ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
|
406
412
|
nbSeq, &entropy->fse, &entropyMetadata->fseMetadata,
|
407
413
|
workspace, wkspSize, writeSeqEntropy);
|
408
|
-
|
414
|
+
ebs.estBlockSize += ebs.estLitSize + ZSTD_blockHeaderSize;
|
415
|
+
return ebs;
|
409
416
|
}
|
410
417
|
|
411
418
|
static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMetadata)
|
@@ -419,13 +426,56 @@ static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMe
|
|
419
426
|
return 0;
|
420
427
|
}
|
421
428
|
|
429
|
+
static size_t countLiterals(seqStore_t const* seqStore, const seqDef* sp, size_t seqCount)
|
430
|
+
{
|
431
|
+
size_t n, total = 0;
|
432
|
+
assert(sp != NULL);
|
433
|
+
for (n=0; n<seqCount; n++) {
|
434
|
+
total += ZSTD_getSequenceLength(seqStore, sp+n).litLength;
|
435
|
+
}
|
436
|
+
DEBUGLOG(6, "countLiterals for %zu sequences from %p => %zu bytes", seqCount, (const void*)sp, total);
|
437
|
+
return total;
|
438
|
+
}
|
439
|
+
|
440
|
+
#define BYTESCALE 256
|
441
|
+
|
442
|
+
static size_t sizeBlockSequences(const seqDef* sp, size_t nbSeqs,
|
443
|
+
size_t targetBudget, size_t avgLitCost, size_t avgSeqCost,
|
444
|
+
int firstSubBlock)
|
445
|
+
{
|
446
|
+
size_t n, budget = 0, inSize=0;
|
447
|
+
/* entropy headers */
|
448
|
+
size_t const headerSize = (size_t)firstSubBlock * 120 * BYTESCALE; /* generous estimate */
|
449
|
+
assert(firstSubBlock==0 || firstSubBlock==1);
|
450
|
+
budget += headerSize;
|
451
|
+
|
452
|
+
/* first sequence => at least one sequence*/
|
453
|
+
budget += sp[0].litLength * avgLitCost + avgSeqCost;
|
454
|
+
if (budget > targetBudget) return 1;
|
455
|
+
inSize = sp[0].litLength + (sp[0].mlBase+MINMATCH);
|
456
|
+
|
457
|
+
/* loop over sequences */
|
458
|
+
for (n=1; n<nbSeqs; n++) {
|
459
|
+
size_t currentCost = sp[n].litLength * avgLitCost + avgSeqCost;
|
460
|
+
budget += currentCost;
|
461
|
+
inSize += sp[n].litLength + (sp[n].mlBase+MINMATCH);
|
462
|
+
/* stop when sub-block budget is reached */
|
463
|
+
if ( (budget > targetBudget)
|
464
|
+
/* though continue to expand until the sub-block is deemed compressible */
|
465
|
+
&& (budget < inSize * BYTESCALE) )
|
466
|
+
break;
|
467
|
+
}
|
468
|
+
|
469
|
+
return n;
|
470
|
+
}
|
471
|
+
|
422
472
|
/** ZSTD_compressSubBlock_multi() :
|
423
473
|
* Breaks super-block into multiple sub-blocks and compresses them.
|
424
|
-
* Entropy will be written
|
425
|
-
* The following blocks
|
426
|
-
*
|
427
|
-
* @return : compressed size of the super block (which
|
428
|
-
*
|
474
|
+
* Entropy will be written into the first block.
|
475
|
+
* The following blocks use repeat_mode to compress.
|
476
|
+
* Sub-blocks are all compressed, except the last one when beneficial.
|
477
|
+
* @return : compressed size of the super block (which features multiple ZSTD blocks)
|
478
|
+
* or 0 if it failed to compress. */
|
429
479
|
static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
|
430
480
|
const ZSTD_compressedBlockState_t* prevCBlock,
|
431
481
|
ZSTD_compressedBlockState_t* nextCBlock,
|
@@ -438,10 +488,12 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
|
|
438
488
|
{
|
439
489
|
const seqDef* const sstart = seqStorePtr->sequencesStart;
|
440
490
|
const seqDef* const send = seqStorePtr->sequences;
|
441
|
-
const seqDef* sp = sstart;
|
491
|
+
const seqDef* sp = sstart; /* tracks progresses within seqStorePtr->sequences */
|
492
|
+
size_t const nbSeqs = (size_t)(send - sstart);
|
442
493
|
const BYTE* const lstart = seqStorePtr->litStart;
|
443
494
|
const BYTE* const lend = seqStorePtr->lit;
|
444
495
|
const BYTE* lp = lstart;
|
496
|
+
size_t const nbLiterals = (size_t)(lend - lstart);
|
445
497
|
BYTE const* ip = (BYTE const*)src;
|
446
498
|
BYTE const* const iend = ip + srcSize;
|
447
499
|
BYTE* const ostart = (BYTE*)dst;
|
@@ -450,96 +502,152 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
|
|
450
502
|
const BYTE* llCodePtr = seqStorePtr->llCode;
|
451
503
|
const BYTE* mlCodePtr = seqStorePtr->mlCode;
|
452
504
|
const BYTE* ofCodePtr = seqStorePtr->ofCode;
|
453
|
-
size_t
|
454
|
-
size_t
|
455
|
-
int writeLitEntropy = entropyMetadata->hufMetadata.hType == set_compressed;
|
505
|
+
size_t const minTarget = ZSTD_TARGETCBLOCKSIZE_MIN; /* enforce minimum size, to reduce undesirable side effects */
|
506
|
+
size_t const targetCBlockSize = MAX(minTarget, cctxParams->targetCBlockSize);
|
507
|
+
int writeLitEntropy = (entropyMetadata->hufMetadata.hType == set_compressed);
|
456
508
|
int writeSeqEntropy = 1;
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
509
|
+
|
510
|
+
DEBUGLOG(5, "ZSTD_compressSubBlock_multi (srcSize=%u, litSize=%u, nbSeq=%u)",
|
511
|
+
(unsigned)srcSize, (unsigned)(lend-lstart), (unsigned)(send-sstart));
|
512
|
+
|
513
|
+
/* let's start by a general estimation for the full block */
|
514
|
+
if (nbSeqs > 0) {
|
515
|
+
EstimatedBlockSize const ebs =
|
516
|
+
ZSTD_estimateSubBlockSize(lp, nbLiterals,
|
517
|
+
ofCodePtr, llCodePtr, mlCodePtr, nbSeqs,
|
518
|
+
&nextCBlock->entropy, entropyMetadata,
|
519
|
+
workspace, wkspSize,
|
520
|
+
writeLitEntropy, writeSeqEntropy);
|
521
|
+
/* quick estimation */
|
522
|
+
size_t const avgLitCost = nbLiterals ? (ebs.estLitSize * BYTESCALE) / nbLiterals : BYTESCALE;
|
523
|
+
size_t const avgSeqCost = ((ebs.estBlockSize - ebs.estLitSize) * BYTESCALE) / nbSeqs;
|
524
|
+
const size_t nbSubBlocks = MAX((ebs.estBlockSize + (targetCBlockSize/2)) / targetCBlockSize, 1);
|
525
|
+
size_t n, avgBlockBudget, blockBudgetSupp=0;
|
526
|
+
avgBlockBudget = (ebs.estBlockSize * BYTESCALE) / nbSubBlocks;
|
527
|
+
DEBUGLOG(5, "estimated fullblock size=%u bytes ; avgLitCost=%.2f ; avgSeqCost=%.2f ; targetCBlockSize=%u, nbSubBlocks=%u ; avgBlockBudget=%.0f bytes",
|
528
|
+
(unsigned)ebs.estBlockSize, (double)avgLitCost/BYTESCALE, (double)avgSeqCost/BYTESCALE,
|
529
|
+
(unsigned)targetCBlockSize, (unsigned)nbSubBlocks, (double)avgBlockBudget/BYTESCALE);
|
530
|
+
/* simplification: if estimates states that the full superblock doesn't compress, just bail out immediately
|
531
|
+
* this will result in the production of a single uncompressed block covering @srcSize.*/
|
532
|
+
if (ebs.estBlockSize > srcSize) return 0;
|
533
|
+
|
534
|
+
/* compress and write sub-blocks */
|
535
|
+
assert(nbSubBlocks>0);
|
536
|
+
for (n=0; n < nbSubBlocks-1; n++) {
|
537
|
+
/* determine nb of sequences for current sub-block + nbLiterals from next sequence */
|
538
|
+
size_t const seqCount = sizeBlockSequences(sp, (size_t)(send-sp),
|
539
|
+
avgBlockBudget + blockBudgetSupp, avgLitCost, avgSeqCost, n==0);
|
540
|
+
/* if reached last sequence : break to last sub-block (simplification) */
|
541
|
+
assert(seqCount <= (size_t)(send-sp));
|
542
|
+
if (sp + seqCount == send) break;
|
543
|
+
assert(seqCount > 0);
|
544
|
+
/* compress sub-block */
|
545
|
+
{ int litEntropyWritten = 0;
|
546
|
+
int seqEntropyWritten = 0;
|
547
|
+
size_t litSize = countLiterals(seqStorePtr, sp, seqCount);
|
548
|
+
const size_t decompressedSize =
|
549
|
+
ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, 0);
|
550
|
+
size_t const cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
|
551
|
+
sp, seqCount,
|
552
|
+
lp, litSize,
|
553
|
+
llCodePtr, mlCodePtr, ofCodePtr,
|
554
|
+
cctxParams,
|
555
|
+
op, (size_t)(oend-op),
|
556
|
+
bmi2, writeLitEntropy, writeSeqEntropy,
|
557
|
+
&litEntropyWritten, &seqEntropyWritten,
|
558
|
+
0);
|
559
|
+
FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
|
560
|
+
|
561
|
+
/* check compressibility, update state components */
|
562
|
+
if (cSize > 0 && cSize < decompressedSize) {
|
563
|
+
DEBUGLOG(5, "Committed sub-block compressing %u bytes => %u bytes",
|
564
|
+
(unsigned)decompressedSize, (unsigned)cSize);
|
565
|
+
assert(ip + decompressedSize <= iend);
|
566
|
+
ip += decompressedSize;
|
567
|
+
lp += litSize;
|
568
|
+
op += cSize;
|
569
|
+
llCodePtr += seqCount;
|
570
|
+
mlCodePtr += seqCount;
|
571
|
+
ofCodePtr += seqCount;
|
572
|
+
/* Entropy only needs to be written once */
|
573
|
+
if (litEntropyWritten) {
|
574
|
+
writeLitEntropy = 0;
|
575
|
+
}
|
576
|
+
if (seqEntropyWritten) {
|
577
|
+
writeSeqEntropy = 0;
|
578
|
+
}
|
579
|
+
sp += seqCount;
|
580
|
+
blockBudgetSupp = 0;
|
581
|
+
} }
|
582
|
+
/* otherwise : do not compress yet, coalesce current sub-block with following one */
|
478
583
|
}
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
if (seqEntropyWritten) {
|
518
|
-
writeSeqEntropy = 0;
|
519
|
-
}
|
584
|
+
} /* if (nbSeqs > 0) */
|
585
|
+
|
586
|
+
/* write last block */
|
587
|
+
DEBUGLOG(5, "Generate last sub-block: %u sequences remaining", (unsigned)(send - sp));
|
588
|
+
{ int litEntropyWritten = 0;
|
589
|
+
int seqEntropyWritten = 0;
|
590
|
+
size_t litSize = (size_t)(lend - lp);
|
591
|
+
size_t seqCount = (size_t)(send - sp);
|
592
|
+
const size_t decompressedSize =
|
593
|
+
ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, 1);
|
594
|
+
size_t const cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
|
595
|
+
sp, seqCount,
|
596
|
+
lp, litSize,
|
597
|
+
llCodePtr, mlCodePtr, ofCodePtr,
|
598
|
+
cctxParams,
|
599
|
+
op, (size_t)(oend-op),
|
600
|
+
bmi2, writeLitEntropy, writeSeqEntropy,
|
601
|
+
&litEntropyWritten, &seqEntropyWritten,
|
602
|
+
lastBlock);
|
603
|
+
FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
|
604
|
+
|
605
|
+
/* update pointers, the nb of literals borrowed from next sequence must be preserved */
|
606
|
+
if (cSize > 0 && cSize < decompressedSize) {
|
607
|
+
DEBUGLOG(5, "Last sub-block compressed %u bytes => %u bytes",
|
608
|
+
(unsigned)decompressedSize, (unsigned)cSize);
|
609
|
+
assert(ip + decompressedSize <= iend);
|
610
|
+
ip += decompressedSize;
|
611
|
+
lp += litSize;
|
612
|
+
op += cSize;
|
613
|
+
llCodePtr += seqCount;
|
614
|
+
mlCodePtr += seqCount;
|
615
|
+
ofCodePtr += seqCount;
|
616
|
+
/* Entropy only needs to be written once */
|
617
|
+
if (litEntropyWritten) {
|
618
|
+
writeLitEntropy = 0;
|
619
|
+
}
|
620
|
+
if (seqEntropyWritten) {
|
621
|
+
writeSeqEntropy = 0;
|
520
622
|
}
|
623
|
+
sp += seqCount;
|
521
624
|
}
|
522
|
-
}
|
625
|
+
}
|
626
|
+
|
627
|
+
|
523
628
|
if (writeLitEntropy) {
|
524
|
-
DEBUGLOG(5, "
|
629
|
+
DEBUGLOG(5, "Literal entropy tables were never written");
|
525
630
|
ZSTD_memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf));
|
526
631
|
}
|
527
632
|
if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) {
|
528
633
|
/* If we haven't written our entropy tables, then we've violated our contract and
|
529
634
|
* must emit an uncompressed block.
|
530
635
|
*/
|
531
|
-
DEBUGLOG(5, "
|
636
|
+
DEBUGLOG(5, "Sequence entropy tables were never written => cancel, emit an uncompressed block");
|
532
637
|
return 0;
|
533
638
|
}
|
639
|
+
|
534
640
|
if (ip < iend) {
|
535
|
-
|
536
|
-
|
641
|
+
/* some data left : last part of the block sent uncompressed */
|
642
|
+
size_t const rSize = (size_t)((iend - ip));
|
643
|
+
size_t const cSize = ZSTD_noCompressBlock(op, (size_t)(oend - op), ip, rSize, lastBlock);
|
644
|
+
DEBUGLOG(5, "Generate last uncompressed sub-block of %u bytes", (unsigned)(rSize));
|
537
645
|
FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
|
538
646
|
assert(cSize != 0);
|
539
647
|
op += cSize;
|
540
648
|
/* We have to regenerate the repcodes because we've skipped some sequences */
|
541
649
|
if (sp < send) {
|
542
|
-
seqDef
|
650
|
+
const seqDef* seq;
|
543
651
|
repcodes_t rep;
|
544
652
|
ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep));
|
545
653
|
for (seq = sstart; seq < sp; ++seq) {
|
@@ -548,14 +656,17 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
|
|
548
656
|
ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep));
|
549
657
|
}
|
550
658
|
}
|
551
|
-
|
552
|
-
|
659
|
+
|
660
|
+
DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed all subBlocks: total compressed size = %u",
|
661
|
+
(unsigned)(op-ostart));
|
662
|
+
return (size_t)(op-ostart);
|
553
663
|
}
|
554
664
|
|
555
665
|
size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
|
556
666
|
void* dst, size_t dstCapacity,
|
557
|
-
void
|
558
|
-
unsigned lastBlock)
|
667
|
+
const void* src, size_t srcSize,
|
668
|
+
unsigned lastBlock)
|
669
|
+
{
|
559
670
|
ZSTD_entropyCTablesMetadata_t entropyMetadata;
|
560
671
|
|
561
672
|
FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(&zc->seqStore,
|