zstd-ruby 1.1.3.0 → 1.1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/ext/zstdruby/libzstd/Makefile +9 -6
- data/ext/zstdruby/libzstd/common/bitstream.h +3 -0
- data/ext/zstdruby/libzstd/common/entropy_common.c +13 -19
- data/ext/zstdruby/libzstd/common/fse.h +48 -22
- data/ext/zstdruby/libzstd/common/fse_decompress.c +0 -1
- data/ext/zstdruby/libzstd/common/huf.h +27 -5
- data/ext/zstdruby/libzstd/common/mem.h +14 -12
- data/ext/zstdruby/libzstd/common/threading.c +5 -4
- data/ext/zstdruby/libzstd/common/threading.h +1 -1
- data/ext/zstdruby/libzstd/common/xxhash.c +3 -1
- data/ext/zstdruby/libzstd/common/xxhash.h +11 -15
- data/ext/zstdruby/libzstd/common/zstd_common.c +1 -1
- data/ext/zstdruby/libzstd/common/zstd_internal.h +4 -0
- data/ext/zstdruby/libzstd/compress/fse_compress.c +16 -9
- data/ext/zstdruby/libzstd/compress/huf_compress.c +103 -28
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +90 -37
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +7 -8
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +20 -17
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +429 -120
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +3 -1
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +16 -8
- data/ext/zstdruby/libzstd/dictBuilder/zdict.h +1 -1
- data/ext/zstdruby/libzstd/dll/example/build_package.bat +1 -0
- data/ext/zstdruby/libzstd/dll/libzstd.def +2 -0
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +122 -7
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +31 -0
- data/ext/zstdruby/libzstd/legacy/zstd_v01.h +8 -0
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +37 -0
- data/ext/zstdruby/libzstd/legacy/zstd_v02.h +8 -0
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +37 -0
- data/ext/zstdruby/libzstd/legacy/zstd_v03.h +8 -0
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +33 -0
- data/ext/zstdruby/libzstd/legacy/zstd_v04.h +8 -0
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +29 -0
- data/ext/zstdruby/libzstd/legacy/zstd_v05.h +7 -0
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +32 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v06.h +7 -0
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +44 -6
- data/ext/zstdruby/libzstd/legacy/zstd_v07.h +8 -0
- data/ext/zstdruby/libzstd/zstd.h +87 -13
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +2 -2
@@ -127,7 +127,7 @@ struct HUF_CElt_s {
|
|
127
127
|
}; /* typedef'd to HUF_CElt within "huf.h" */
|
128
128
|
|
129
129
|
/*! HUF_writeCTable() :
|
130
|
-
`CTable` :
|
130
|
+
`CTable` : Huffman tree to save, using huf representation.
|
131
131
|
@return : size of saved CTable */
|
132
132
|
size_t HUF_writeCTable (void* dst, size_t maxDstSize,
|
133
133
|
const HUF_CElt* CTable, U32 maxSymbolValue, U32 huffLog)
|
@@ -409,6 +409,25 @@ size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U3
|
|
409
409
|
return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(nodeTable));
|
410
410
|
}
|
411
411
|
|
412
|
+
static size_t HUF_estimateCompressedSize(HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
|
413
|
+
{
|
414
|
+
size_t nbBits = 0;
|
415
|
+
int s;
|
416
|
+
for (s = 0; s <= (int)maxSymbolValue; ++s) {
|
417
|
+
nbBits += CTable[s].nbBits * count[s];
|
418
|
+
}
|
419
|
+
return nbBits >> 3;
|
420
|
+
}
|
421
|
+
|
422
|
+
static int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
|
423
|
+
int bad = 0;
|
424
|
+
int s;
|
425
|
+
for (s = 0; s <= (int)maxSymbolValue; ++s) {
|
426
|
+
bad |= (count[s] != 0) & (CTable[s].nbBits == 0);
|
427
|
+
}
|
428
|
+
return !bad;
|
429
|
+
}
|
430
|
+
|
412
431
|
static void HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
|
413
432
|
{
|
414
433
|
BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
|
@@ -510,25 +529,43 @@ size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, si
|
|
510
529
|
}
|
511
530
|
|
512
531
|
|
532
|
+
static size_t HUF_compressCTable_internal(
|
533
|
+
BYTE* const ostart, BYTE* op, BYTE* const oend,
|
534
|
+
const void* src, size_t srcSize,
|
535
|
+
unsigned singleStream, const HUF_CElt* CTable)
|
536
|
+
{
|
537
|
+
size_t const cSize = singleStream ?
|
538
|
+
HUF_compress1X_usingCTable(op, oend - op, src, srcSize, CTable) :
|
539
|
+
HUF_compress4X_usingCTable(op, oend - op, src, srcSize, CTable);
|
540
|
+
if (HUF_isError(cSize)) { return cSize; }
|
541
|
+
if (cSize==0) { return 0; } /* uncompressible */
|
542
|
+
op += cSize;
|
543
|
+
/* check compressibility */
|
544
|
+
if ((size_t)(op-ostart) >= srcSize-1) { return 0; }
|
545
|
+
return op-ostart;
|
546
|
+
}
|
547
|
+
|
548
|
+
|
513
549
|
/* `workSpace` must a table of at least 1024 unsigned */
|
514
550
|
static size_t HUF_compress_internal (
|
515
551
|
void* dst, size_t dstSize,
|
516
552
|
const void* src, size_t srcSize,
|
517
553
|
unsigned maxSymbolValue, unsigned huffLog,
|
518
554
|
unsigned singleStream,
|
519
|
-
void* workSpace, size_t wkspSize
|
555
|
+
void* workSpace, size_t wkspSize,
|
556
|
+
HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat)
|
520
557
|
{
|
521
558
|
BYTE* const ostart = (BYTE*)dst;
|
522
559
|
BYTE* const oend = ostart + dstSize;
|
523
560
|
BYTE* op = ostart;
|
524
561
|
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
562
|
+
U32* count;
|
563
|
+
size_t const countSize = sizeof(U32) * (HUF_SYMBOLVALUE_MAX + 1);
|
564
|
+
HUF_CElt* CTable;
|
565
|
+
size_t const CTableSize = sizeof(HUF_CElt) * (HUF_SYMBOLVALUE_MAX + 1);
|
529
566
|
|
530
567
|
/* checks & inits */
|
531
|
-
if (wkspSize < sizeof(huffNodeTable)) return ERROR(GENERIC);
|
568
|
+
if (wkspSize < sizeof(huffNodeTable) + countSize + CTableSize) return ERROR(GENERIC);
|
532
569
|
if (!srcSize) return 0; /* Uncompressed (note : 1 means rle, so first byte must be correct) */
|
533
570
|
if (!dstSize) return 0; /* cannot fit within dst budget */
|
534
571
|
if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */
|
@@ -536,38 +573,58 @@ static size_t HUF_compress_internal (
|
|
536
573
|
if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX;
|
537
574
|
if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
|
538
575
|
|
576
|
+
count = (U32*)workSpace;
|
577
|
+
workSpace = (BYTE*)workSpace + countSize;
|
578
|
+
wkspSize -= countSize;
|
579
|
+
CTable = (HUF_CElt*)workSpace;
|
580
|
+
workSpace = (BYTE*)workSpace + CTableSize;
|
581
|
+
wkspSize -= CTableSize;
|
582
|
+
|
583
|
+
/* Heuristic : If we don't need to check the validity of the old table use the old table for small inputs */
|
584
|
+
if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
|
585
|
+
return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
|
586
|
+
}
|
587
|
+
|
539
588
|
/* Scan input and build symbol stats */
|
540
|
-
{ CHECK_V_F(largest, FSE_count_wksp (
|
589
|
+
{ CHECK_V_F(largest, FSE_count_wksp (count, &maxSymbolValue, (const BYTE*)src, srcSize, (U32*)workSpace) );
|
541
590
|
if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
|
542
591
|
if (largest <= (srcSize >> 7)+1) return 0; /* Fast heuristic : not compressible enough */
|
543
592
|
}
|
544
593
|
|
594
|
+
/* Check validity of previous table */
|
595
|
+
if (repeat && *repeat == HUF_repeat_check && !HUF_validateCTable(oldHufTable, count, maxSymbolValue)) {
|
596
|
+
*repeat = HUF_repeat_none;
|
597
|
+
}
|
598
|
+
/* Heuristic : use existing table for small inputs */
|
599
|
+
if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
|
600
|
+
return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
|
601
|
+
}
|
602
|
+
|
545
603
|
/* Build Huffman Tree */
|
546
604
|
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
|
547
|
-
{ CHECK_V_F(maxBits, HUF_buildCTable_wksp (
|
605
|
+
{ CHECK_V_F(maxBits, HUF_buildCTable_wksp (CTable, count, maxSymbolValue, huffLog, workSpace, wkspSize) );
|
548
606
|
huffLog = (U32)maxBits;
|
607
|
+
/* Zero the unused symbols so we can check it for validity */
|
608
|
+
memset(CTable + maxSymbolValue + 1, 0, CTableSize - (maxSymbolValue + 1) * sizeof(HUF_CElt));
|
549
609
|
}
|
550
610
|
|
551
611
|
/* Write table description header */
|
552
|
-
{ CHECK_V_F(hSize, HUF_writeCTable (op, dstSize,
|
553
|
-
|
612
|
+
{ CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, CTable, maxSymbolValue, huffLog) );
|
613
|
+
/* Check if using the previous table will be beneficial */
|
614
|
+
if (repeat && *repeat != HUF_repeat_none) {
|
615
|
+
size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, count, maxSymbolValue);
|
616
|
+
size_t const newSize = HUF_estimateCompressedSize(CTable, count, maxSymbolValue);
|
617
|
+
if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
|
618
|
+
return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
|
619
|
+
}
|
620
|
+
}
|
621
|
+
/* Use the new table */
|
622
|
+
if (hSize + 12ul >= srcSize) { return 0; }
|
554
623
|
op += hSize;
|
624
|
+
if (repeat) { *repeat = HUF_repeat_none; }
|
625
|
+
if (oldHufTable) { memcpy(oldHufTable, CTable, CTableSize); } /* Save the new table */
|
555
626
|
}
|
556
|
-
|
557
|
-
/* Compress */
|
558
|
-
{ size_t const cSize = (singleStream) ?
|
559
|
-
HUF_compress1X_usingCTable(op, oend - op, src, srcSize, table.CTable) : /* single segment */
|
560
|
-
HUF_compress4X_usingCTable(op, oend - op, src, srcSize, table.CTable);
|
561
|
-
if (HUF_isError(cSize)) return cSize;
|
562
|
-
if (cSize==0) return 0; /* uncompressible */
|
563
|
-
op += cSize;
|
564
|
-
}
|
565
|
-
|
566
|
-
/* check compressibility */
|
567
|
-
if ((size_t)(op-ostart) >= srcSize-1)
|
568
|
-
return 0;
|
569
|
-
|
570
|
-
return op-ostart;
|
627
|
+
return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, CTable);
|
571
628
|
}
|
572
629
|
|
573
630
|
|
@@ -576,7 +633,16 @@ size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
|
|
576
633
|
unsigned maxSymbolValue, unsigned huffLog,
|
577
634
|
void* workSpace, size_t wkspSize)
|
578
635
|
{
|
579
|
-
return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize);
|
636
|
+
return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, NULL, NULL, 0);
|
637
|
+
}
|
638
|
+
|
639
|
+
size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
|
640
|
+
const void* src, size_t srcSize,
|
641
|
+
unsigned maxSymbolValue, unsigned huffLog,
|
642
|
+
void* workSpace, size_t wkspSize,
|
643
|
+
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat)
|
644
|
+
{
|
645
|
+
return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, hufTable, repeat, preferRepeat);
|
580
646
|
}
|
581
647
|
|
582
648
|
size_t HUF_compress1X (void* dst, size_t dstSize,
|
@@ -592,7 +658,16 @@ size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
|
|
592
658
|
unsigned maxSymbolValue, unsigned huffLog,
|
593
659
|
void* workSpace, size_t wkspSize)
|
594
660
|
{
|
595
|
-
return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize);
|
661
|
+
return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, NULL, NULL, 0);
|
662
|
+
}
|
663
|
+
|
664
|
+
size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
|
665
|
+
const void* src, size_t srcSize,
|
666
|
+
unsigned maxSymbolValue, unsigned huffLog,
|
667
|
+
void* workSpace, size_t wkspSize,
|
668
|
+
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat)
|
669
|
+
{
|
670
|
+
return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, hufTable, repeat, preferRepeat);
|
596
671
|
}
|
597
672
|
|
598
673
|
size_t HUF_compress2 (void* dst, size_t dstSize,
|
@@ -13,8 +13,6 @@
|
|
13
13
|
***************************************/
|
14
14
|
#include <string.h> /* memset */
|
15
15
|
#include "mem.h"
|
16
|
-
#define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
|
17
|
-
#include "xxhash.h" /* XXH_reset, update, digest */
|
18
16
|
#define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */
|
19
17
|
#include "fse.h"
|
20
18
|
#define HUF_STATIC_LINKING_ONLY
|
@@ -62,6 +60,7 @@ struct ZSTD_CCtx_s {
|
|
62
60
|
U32 hashLog3; /* dispatch table : larger == faster, more memory */
|
63
61
|
U32 loadedDictEnd; /* index of end of dictionary */
|
64
62
|
U32 forceWindow; /* force back-references to respect limit of 1<<wLog, even for dictionary */
|
63
|
+
U32 forceRawDict; /* Force loading dictionary in "content-only" mode (no header analysis) */
|
65
64
|
ZSTD_compressionStage_e stage;
|
66
65
|
U32 rep[ZSTD_REP_NUM];
|
67
66
|
U32 repToConfirm[ZSTD_REP_NUM];
|
@@ -80,10 +79,11 @@ struct ZSTD_CCtx_s {
|
|
80
79
|
U32* chainTable;
|
81
80
|
HUF_CElt* hufTable;
|
82
81
|
U32 flagStaticTables;
|
82
|
+
HUF_repeat flagStaticHufTable;
|
83
83
|
FSE_CTable offcodeCTable [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
|
84
84
|
FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
|
85
85
|
FSE_CTable litlengthCTable [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
|
86
|
-
unsigned tmpCounters[
|
86
|
+
unsigned tmpCounters[HUF_WORKSPACE_SIZE_U32];
|
87
87
|
};
|
88
88
|
|
89
89
|
ZSTD_CCtx* ZSTD_createCCtx(void)
|
@@ -124,6 +124,7 @@ size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned
|
|
124
124
|
switch(param)
|
125
125
|
{
|
126
126
|
case ZSTD_p_forceWindow : cctx->forceWindow = value>0; cctx->loadedDictEnd = 0; return 0;
|
127
|
+
case ZSTD_p_forceRawDict : cctx->forceRawDict = value>0; return 0;
|
127
128
|
default: return ERROR(parameter_unknown);
|
128
129
|
}
|
129
130
|
}
|
@@ -246,14 +247,17 @@ static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_parameters params, U64 fra
|
|
246
247
|
typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset, ZSTDcrp_fullReset } ZSTD_compResetPolicy_e;
|
247
248
|
|
248
249
|
/*! ZSTD_resetCCtx_advanced() :
|
249
|
-
note :
|
250
|
+
note : `params` must be validated */
|
250
251
|
static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc,
|
251
252
|
ZSTD_parameters params, U64 frameContentSize,
|
252
253
|
ZSTD_compResetPolicy_e const crp)
|
253
254
|
{
|
254
255
|
if (crp == ZSTDcrp_continue)
|
255
|
-
if (ZSTD_equivalentParams(params, zc->params))
|
256
|
+
if (ZSTD_equivalentParams(params, zc->params)) {
|
257
|
+
zc->flagStaticTables = 0;
|
258
|
+
zc->flagStaticHufTable = HUF_repeat_none;
|
256
259
|
return ZSTD_continueCCtx(zc, params, frameContentSize);
|
260
|
+
}
|
257
261
|
|
258
262
|
{ size_t const blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, (size_t)1 << params.cParams.windowLog);
|
259
263
|
U32 const divider = (params.cParams.searchLength==3) ? 3 : 4;
|
@@ -287,6 +291,7 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc,
|
|
287
291
|
ptr = zc->hashTable3 + h3Size;
|
288
292
|
zc->hufTable = (HUF_CElt*)ptr;
|
289
293
|
zc->flagStaticTables = 0;
|
294
|
+
zc->flagStaticHufTable = HUF_repeat_none;
|
290
295
|
ptr = ((U32*)ptr) + 256; /* note : HUF_CElt* is incomplete type, size is simulated using U32 */
|
291
296
|
|
292
297
|
zc->nextToUpdate = 1;
|
@@ -344,8 +349,12 @@ size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long
|
|
344
349
|
{
|
345
350
|
if (srcCCtx->stage!=ZSTDcs_init) return ERROR(stage_wrong);
|
346
351
|
|
352
|
+
|
347
353
|
memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem));
|
348
|
-
|
354
|
+
{ ZSTD_parameters params = srcCCtx->params;
|
355
|
+
params.fParams.contentSizeFlag = (pledgedSrcSize > 0);
|
356
|
+
ZSTD_resetCCtx_advanced(dstCCtx, params, pledgedSrcSize, ZSTDcrp_noMemset);
|
357
|
+
}
|
349
358
|
|
350
359
|
/* copy tables */
|
351
360
|
{ size_t const chainSize = (srcCCtx->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << srcCCtx->params.cParams.chainLog);
|
@@ -368,12 +377,15 @@ size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long
|
|
368
377
|
|
369
378
|
/* copy entropy tables */
|
370
379
|
dstCCtx->flagStaticTables = srcCCtx->flagStaticTables;
|
380
|
+
dstCCtx->flagStaticHufTable = srcCCtx->flagStaticHufTable;
|
371
381
|
if (srcCCtx->flagStaticTables) {
|
372
|
-
memcpy(dstCCtx->hufTable, srcCCtx->hufTable, 256*4);
|
373
382
|
memcpy(dstCCtx->litlengthCTable, srcCCtx->litlengthCTable, sizeof(dstCCtx->litlengthCTable));
|
374
383
|
memcpy(dstCCtx->matchlengthCTable, srcCCtx->matchlengthCTable, sizeof(dstCCtx->matchlengthCTable));
|
375
384
|
memcpy(dstCCtx->offcodeCTable, srcCCtx->offcodeCTable, sizeof(dstCCtx->offcodeCTable));
|
376
385
|
}
|
386
|
+
if (srcCCtx->flagStaticHufTable) {
|
387
|
+
memcpy(dstCCtx->hufTable, srcCCtx->hufTable, 256*4);
|
388
|
+
}
|
377
389
|
|
378
390
|
return 0;
|
379
391
|
}
|
@@ -487,24 +499,28 @@ static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc,
|
|
487
499
|
|
488
500
|
/* small ? don't even attempt compression (speed opt) */
|
489
501
|
# define LITERAL_NOENTROPY 63
|
490
|
-
{ size_t const minLitSize = zc->
|
502
|
+
{ size_t const minLitSize = zc->flagStaticHufTable == HUF_repeat_valid ? 6 : LITERAL_NOENTROPY;
|
491
503
|
if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
|
492
504
|
}
|
493
505
|
|
494
506
|
if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */
|
495
|
-
|
496
|
-
|
497
|
-
singleStream = 1;
|
498
|
-
cLitSize =
|
499
|
-
|
500
|
-
|
501
|
-
|
507
|
+
{ HUF_repeat repeat = zc->flagStaticHufTable;
|
508
|
+
int const preferRepeat = zc->params.cParams.strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
|
509
|
+
if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
|
510
|
+
cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, zc->tmpCounters, sizeof(zc->tmpCounters), zc->hufTable, &repeat, preferRepeat)
|
511
|
+
: HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, zc->tmpCounters, sizeof(zc->tmpCounters), zc->hufTable, &repeat, preferRepeat);
|
512
|
+
if (repeat != HUF_repeat_none) { hType = set_repeat; } /* reused the existing table */
|
513
|
+
else { zc->flagStaticHufTable = HUF_repeat_check; } /* now have a table to reuse */
|
502
514
|
}
|
503
515
|
|
504
|
-
if ((cLitSize==0) | (cLitSize >= srcSize - minGain))
|
516
|
+
if ((cLitSize==0) | (cLitSize >= srcSize - minGain)) {
|
517
|
+
zc->flagStaticHufTable = HUF_repeat_none;
|
505
518
|
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
|
506
|
-
|
519
|
+
}
|
520
|
+
if (cLitSize==1) {
|
521
|
+
zc->flagStaticHufTable = HUF_repeat_none;
|
507
522
|
return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
|
523
|
+
}
|
508
524
|
|
509
525
|
/* Build header */
|
510
526
|
switch(lhSize)
|
@@ -572,11 +588,11 @@ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
|
|
572
588
|
mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
|
573
589
|
}
|
574
590
|
|
575
|
-
|
576
|
-
size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
|
591
|
+
MEM_STATIC size_t ZSTD_compressSequences (ZSTD_CCtx* zc,
|
577
592
|
void* dst, size_t dstCapacity,
|
578
593
|
size_t srcSize)
|
579
594
|
{
|
595
|
+
const int longOffsets = zc->params.cParams.windowLog > STREAM_ACCUMULATOR_MIN;
|
580
596
|
const seqStore_t* seqStorePtr = &(zc->seqStore);
|
581
597
|
U32 count[MaxSeq+1];
|
582
598
|
S16 norm[MaxSeq+1];
|
@@ -710,7 +726,18 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
|
|
710
726
|
if (MEM_32bits()) BIT_flushBits(&blockStream);
|
711
727
|
BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
|
712
728
|
if (MEM_32bits()) BIT_flushBits(&blockStream);
|
713
|
-
|
729
|
+
if (longOffsets) {
|
730
|
+
U32 const ofBits = ofCodeTable[nbSeq-1];
|
731
|
+
int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
|
732
|
+
if (extraBits) {
|
733
|
+
BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
|
734
|
+
BIT_flushBits(&blockStream);
|
735
|
+
}
|
736
|
+
BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits,
|
737
|
+
ofBits - extraBits);
|
738
|
+
} else {
|
739
|
+
BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
|
740
|
+
}
|
714
741
|
BIT_flushBits(&blockStream);
|
715
742
|
|
716
743
|
{ size_t n;
|
@@ -732,7 +759,17 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
|
|
732
759
|
if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
|
733
760
|
BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
|
734
761
|
if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/
|
735
|
-
|
762
|
+
if (longOffsets) {
|
763
|
+
int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
|
764
|
+
if (extraBits) {
|
765
|
+
BIT_addBits(&blockStream, sequences[n].offset, extraBits);
|
766
|
+
BIT_flushBits(&blockStream); /* (7)*/
|
767
|
+
}
|
768
|
+
BIT_addBits(&blockStream, sequences[n].offset >> extraBits,
|
769
|
+
ofBits - extraBits); /* 31 */
|
770
|
+
} else {
|
771
|
+
BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */
|
772
|
+
}
|
736
773
|
BIT_flushBits(&blockStream); /* (7)*/
|
737
774
|
} }
|
738
775
|
|
@@ -747,9 +784,12 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
|
|
747
784
|
|
748
785
|
/* check compressibility */
|
749
786
|
_check_compressibility:
|
750
|
-
{
|
751
|
-
|
752
|
-
|
787
|
+
{ size_t const minGain = ZSTD_minGain(srcSize);
|
788
|
+
size_t const maxCSize = srcSize - minGain;
|
789
|
+
if ((size_t)(op-ostart) >= maxCSize) {
|
790
|
+
zc->flagStaticHufTable = HUF_repeat_none;
|
791
|
+
return 0;
|
792
|
+
} }
|
753
793
|
|
754
794
|
/* confirm repcodes */
|
755
795
|
{ int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->rep[i] = zc->repToConfirm[i]; }
|
@@ -757,7 +797,6 @@ _check_compressibility:
|
|
757
797
|
return op - ostart;
|
758
798
|
}
|
759
799
|
|
760
|
-
|
761
800
|
#if 0 /* for debug */
|
762
801
|
# define STORESEQ_DEBUG
|
763
802
|
#include <stdio.h> /* fprintf */
|
@@ -1748,7 +1787,7 @@ static size_t ZSTD_BtFindBestMatch_selectMLS_extDict (
|
|
1748
1787
|
#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & mask]
|
1749
1788
|
|
1750
1789
|
/* Update chains up to ip (excluded)
|
1751
|
-
Assumption : always within prefix (
|
1790
|
+
Assumption : always within prefix (i.e. not within extDict) */
|
1752
1791
|
FORCE_INLINE
|
1753
1792
|
U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls)
|
1754
1793
|
{
|
@@ -2308,7 +2347,7 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
|
|
2308
2347
|
if (remaining < blockSize) blockSize = remaining;
|
2309
2348
|
|
2310
2349
|
/* preemptive overflow correction */
|
2311
|
-
if (cctx->lowLimit > (
|
2350
|
+
if (cctx->lowLimit > (3U<<29)) {
|
2312
2351
|
U32 const cycleMask = (1 << ZSTD_cycleLog(cctx->params.cParams.hashLog, cctx->params.cParams.strategy)) - 1;
|
2313
2352
|
U32 const current = (U32)(ip - cctx->base);
|
2314
2353
|
U32 const newCurrent = (current & cycleMask) + (1 << cctx->params.cParams.windowLog);
|
@@ -2362,7 +2401,7 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
|
|
2362
2401
|
U32 const dictIDSizeCode = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */
|
2363
2402
|
U32 const checksumFlag = params.fParams.checksumFlag>0;
|
2364
2403
|
U32 const windowSize = 1U << params.cParams.windowLog;
|
2365
|
-
U32 const singleSegment = params.fParams.contentSizeFlag && (windowSize
|
2404
|
+
U32 const singleSegment = params.fParams.contentSizeFlag && (windowSize >= pledgedSrcSize);
|
2366
2405
|
BYTE const windowLogByte = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3);
|
2367
2406
|
U32 const fcsCode = params.fParams.contentSizeFlag ?
|
2368
2407
|
(pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : /* 0-3 */
|
@@ -2508,7 +2547,7 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t
|
|
2508
2547
|
return ERROR(GENERIC); /* strategy doesn't exist; impossible */
|
2509
2548
|
}
|
2510
2549
|
|
2511
|
-
zc->nextToUpdate = zc->
|
2550
|
+
zc->nextToUpdate = (U32)(iend - zc->base);
|
2512
2551
|
return 0;
|
2513
2552
|
}
|
2514
2553
|
|
@@ -2600,6 +2639,7 @@ static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* cctx, const void* dict, size_
|
|
2600
2639
|
}
|
2601
2640
|
|
2602
2641
|
cctx->flagStaticTables = 1;
|
2642
|
+
cctx->flagStaticHufTable = HUF_repeat_valid;
|
2603
2643
|
return dictPtr - (const BYTE*)dict;
|
2604
2644
|
}
|
2605
2645
|
|
@@ -2609,8 +2649,9 @@ static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* zc, const void* dict, si
|
|
2609
2649
|
{
|
2610
2650
|
if ((dict==NULL) || (dictSize<=8)) return 0;
|
2611
2651
|
|
2612
|
-
/*
|
2613
|
-
if (MEM_readLE32(dict) != ZSTD_DICT_MAGIC)
|
2652
|
+
/* dict as pure content */
|
2653
|
+
if ((MEM_readLE32(dict) != ZSTD_DICT_MAGIC) || (zc->forceRawDict))
|
2654
|
+
return ZSTD_loadDictionaryContent(zc, dict, dictSize);
|
2614
2655
|
zc->dictID = zc->params.fParams.noDictIDFlag ? 0 : MEM_readLE32((const char*)dict+4);
|
2615
2656
|
|
2616
2657
|
/* known magic number : dict is parsed for entropy stats and content */
|
@@ -2782,7 +2823,7 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, u
|
|
2782
2823
|
|
2783
2824
|
if (!cdict || !cctx) {
|
2784
2825
|
ZSTD_free(cdict, customMem);
|
2785
|
-
|
2826
|
+
ZSTD_freeCCtx(cctx);
|
2786
2827
|
return NULL;
|
2787
2828
|
}
|
2788
2829
|
|
@@ -2800,8 +2841,8 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, u
|
|
2800
2841
|
{ size_t const errorCode = ZSTD_compressBegin_advanced(cctx, cdict->dictContent, dictSize, params, 0);
|
2801
2842
|
if (ZSTD_isError(errorCode)) {
|
2802
2843
|
ZSTD_free(cdict->dictBuffer, customMem);
|
2803
|
-
ZSTD_free(cctx, customMem);
|
2804
2844
|
ZSTD_free(cdict, customMem);
|
2845
|
+
ZSTD_freeCCtx(cctx);
|
2805
2846
|
return NULL;
|
2806
2847
|
} }
|
2807
2848
|
|
@@ -2845,7 +2886,11 @@ static ZSTD_parameters ZSTD_getParamsFromCDict(const ZSTD_CDict* cdict) {
|
|
2845
2886
|
size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, unsigned long long pledgedSrcSize)
|
2846
2887
|
{
|
2847
2888
|
if (cdict->dictContentSize) CHECK_F(ZSTD_copyCCtx(cctx, cdict->refContext, pledgedSrcSize))
|
2848
|
-
else
|
2889
|
+
else {
|
2890
|
+
ZSTD_parameters params = cdict->refContext->params;
|
2891
|
+
params.fParams.contentSizeFlag = (pledgedSrcSize > 0);
|
2892
|
+
CHECK_F(ZSTD_compressBegin_advanced(cctx, NULL, 0, params, pledgedSrcSize));
|
2893
|
+
}
|
2849
2894
|
return 0;
|
2850
2895
|
}
|
2851
2896
|
|
@@ -2939,7 +2984,7 @@ size_t ZSTD_freeCStream(ZSTD_CStream* zcs)
|
|
2939
2984
|
size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX; }
|
2940
2985
|
size_t ZSTD_CStreamOutSize(void) { return ZSTD_compressBound(ZSTD_BLOCKSIZE_ABSOLUTEMAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ; }
|
2941
2986
|
|
2942
|
-
size_t
|
2987
|
+
static size_t ZSTD_resetCStream_internal(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize)
|
2943
2988
|
{
|
2944
2989
|
if (zcs->inBuffSize==0) return ERROR(stage_wrong); /* zcs has not been init at least once => can't reset */
|
2945
2990
|
|
@@ -2957,6 +3002,14 @@ size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize)
|
|
2957
3002
|
return 0; /* ready to go */
|
2958
3003
|
}
|
2959
3004
|
|
3005
|
+
size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize)
|
3006
|
+
{
|
3007
|
+
|
3008
|
+
zcs->params.fParams.contentSizeFlag = (pledgedSrcSize > 0);
|
3009
|
+
|
3010
|
+
return ZSTD_resetCStream_internal(zcs, pledgedSrcSize);
|
3011
|
+
}
|
3012
|
+
|
2960
3013
|
size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
|
2961
3014
|
const void* dict, size_t dictSize,
|
2962
3015
|
ZSTD_parameters params, unsigned long long pledgedSrcSize)
|
@@ -2988,7 +3041,7 @@ size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
|
|
2988
3041
|
zcs->checksum = params.fParams.checksumFlag > 0;
|
2989
3042
|
zcs->params = params;
|
2990
3043
|
|
2991
|
-
return
|
3044
|
+
return ZSTD_resetCStream_internal(zcs, pledgedSrcSize);
|
2992
3045
|
}
|
2993
3046
|
|
2994
3047
|
/* note : cdict must outlive compression session */
|
@@ -3022,7 +3075,7 @@ size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel)
|
|
3022
3075
|
size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs)
|
3023
3076
|
{
|
3024
3077
|
if (zcs==NULL) return 0; /* support sizeof on NULL */
|
3025
|
-
return sizeof(zcs) + ZSTD_sizeof_CCtx(zcs->cctx) + ZSTD_sizeof_CDict(zcs->cdictLocal) + zcs->outBuffSize + zcs->inBuffSize;
|
3078
|
+
return sizeof(*zcs) + ZSTD_sizeof_CCtx(zcs->cctx) + ZSTD_sizeof_CDict(zcs->cdictLocal) + zcs->outBuffSize + zcs->inBuffSize;
|
3026
3079
|
}
|
3027
3080
|
|
3028
3081
|
/*====== Compression ======*/
|
@@ -203,7 +203,7 @@ MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const B
|
|
203
203
|
|
204
204
|
|
205
205
|
/* Update hashTable3 up to ip (excluded)
|
206
|
-
Assumption : always within prefix (
|
206
|
+
Assumption : always within prefix (i.e. not within extDict) */
|
207
207
|
FORCE_INLINE
|
208
208
|
U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* zc, const BYTE* ip)
|
209
209
|
{
|
@@ -25,8 +25,6 @@
|
|
25
25
|
#include "threading.h" /* mutex */
|
26
26
|
#include "zstd_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
|
27
27
|
#include "zstdmt_compress.h"
|
28
|
-
#define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
|
29
|
-
#include "xxhash.h"
|
30
28
|
|
31
29
|
|
32
30
|
/* ====== Debug ====== */
|
@@ -231,16 +229,17 @@ void ZSTDMT_compressChunk(void* jobDescription)
|
|
231
229
|
const void* const src = (const char*)job->srcStart + job->dictSize;
|
232
230
|
buffer_t const dstBuff = job->dstBuff;
|
233
231
|
DEBUGLOG(3, "job (first:%u) (last:%u) : dictSize %u, srcSize %u", job->firstChunk, job->lastChunk, (U32)job->dictSize, (U32)job->srcSize);
|
234
|
-
if (job->cdict) {
|
232
|
+
if (job->cdict) { /* should only happen for first segment */
|
235
233
|
size_t const initError = ZSTD_compressBegin_usingCDict(job->cctx, job->cdict, job->fullFrameSize);
|
236
234
|
if (job->cdict) DEBUGLOG(3, "using CDict ");
|
237
235
|
if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; }
|
238
|
-
} else {
|
239
|
-
size_t const
|
240
|
-
|
236
|
+
} else { /* srcStart points at reloaded section */
|
237
|
+
size_t const dictModeError = ZSTD_setCCtxParameter(job->cctx, ZSTD_p_forceRawDict, 1); /* Force loading dictionary in "content-only" mode (no header analysis) */
|
238
|
+
size_t const initError = ZSTD_compressBegin_advanced(job->cctx, job->srcStart, job->dictSize, job->params, 0);
|
239
|
+
if (ZSTD_isError(initError) || ZSTD_isError(dictModeError)) { job->cSize = initError; goto _endJob; }
|
241
240
|
ZSTD_setCCtxParameter(job->cctx, ZSTD_p_forceWindow, 1);
|
242
241
|
}
|
243
|
-
if (!job->firstChunk) { /* flush frame header */
|
242
|
+
if (!job->firstChunk) { /* flush and overwrite frame header when it's not first segment */
|
244
243
|
size_t const hSize = ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, 0);
|
245
244
|
if (ZSTD_isError(hSize)) { job->cSize = hSize; goto _endJob; }
|
246
245
|
ZSTD_invalidateRepCodes(job->cctx);
|
@@ -248,7 +247,7 @@ void ZSTDMT_compressChunk(void* jobDescription)
|
|
248
247
|
|
249
248
|
DEBUGLOG(4, "Compressing : ");
|
250
249
|
DEBUG_PRINTHEX(4, job->srcStart, 12);
|
251
|
-
job->cSize = (job->lastChunk) ?
|
250
|
+
job->cSize = (job->lastChunk) ?
|
252
251
|
ZSTD_compressEnd (job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize) :
|
253
252
|
ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize);
|
254
253
|
DEBUGLOG(3, "compressed %u bytes into %u bytes (first:%u) (last:%u)", (unsigned)job->srcSize, (unsigned)job->cSize, job->firstChunk, job->lastChunk);
|