zstd-ruby 1.1.3.0 → 1.1.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/ext/zstdruby/libzstd/Makefile +9 -6
- data/ext/zstdruby/libzstd/common/bitstream.h +3 -0
- data/ext/zstdruby/libzstd/common/entropy_common.c +13 -19
- data/ext/zstdruby/libzstd/common/fse.h +48 -22
- data/ext/zstdruby/libzstd/common/fse_decompress.c +0 -1
- data/ext/zstdruby/libzstd/common/huf.h +27 -5
- data/ext/zstdruby/libzstd/common/mem.h +14 -12
- data/ext/zstdruby/libzstd/common/threading.c +5 -4
- data/ext/zstdruby/libzstd/common/threading.h +1 -1
- data/ext/zstdruby/libzstd/common/xxhash.c +3 -1
- data/ext/zstdruby/libzstd/common/xxhash.h +11 -15
- data/ext/zstdruby/libzstd/common/zstd_common.c +1 -1
- data/ext/zstdruby/libzstd/common/zstd_internal.h +4 -0
- data/ext/zstdruby/libzstd/compress/fse_compress.c +16 -9
- data/ext/zstdruby/libzstd/compress/huf_compress.c +103 -28
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +90 -37
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +7 -8
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +20 -17
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +429 -120
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +3 -1
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +16 -8
- data/ext/zstdruby/libzstd/dictBuilder/zdict.h +1 -1
- data/ext/zstdruby/libzstd/dll/example/build_package.bat +1 -0
- data/ext/zstdruby/libzstd/dll/libzstd.def +2 -0
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +122 -7
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +31 -0
- data/ext/zstdruby/libzstd/legacy/zstd_v01.h +8 -0
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +37 -0
- data/ext/zstdruby/libzstd/legacy/zstd_v02.h +8 -0
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +37 -0
- data/ext/zstdruby/libzstd/legacy/zstd_v03.h +8 -0
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +33 -0
- data/ext/zstdruby/libzstd/legacy/zstd_v04.h +8 -0
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +29 -0
- data/ext/zstdruby/libzstd/legacy/zstd_v05.h +7 -0
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +32 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v06.h +7 -0
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +44 -6
- data/ext/zstdruby/libzstd/legacy/zstd_v07.h +8 -0
- data/ext/zstdruby/libzstd/zstd.h +87 -13
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +2 -2
@@ -127,7 +127,7 @@ struct HUF_CElt_s {
|
|
127
127
|
}; /* typedef'd to HUF_CElt within "huf.h" */
|
128
128
|
|
129
129
|
/*! HUF_writeCTable() :
|
130
|
-
`CTable` :
|
130
|
+
`CTable` : Huffman tree to save, using huf representation.
|
131
131
|
@return : size of saved CTable */
|
132
132
|
size_t HUF_writeCTable (void* dst, size_t maxDstSize,
|
133
133
|
const HUF_CElt* CTable, U32 maxSymbolValue, U32 huffLog)
|
@@ -409,6 +409,25 @@ size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U3
|
|
409
409
|
return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(nodeTable));
|
410
410
|
}
|
411
411
|
|
412
|
+
static size_t HUF_estimateCompressedSize(HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
|
413
|
+
{
|
414
|
+
size_t nbBits = 0;
|
415
|
+
int s;
|
416
|
+
for (s = 0; s <= (int)maxSymbolValue; ++s) {
|
417
|
+
nbBits += CTable[s].nbBits * count[s];
|
418
|
+
}
|
419
|
+
return nbBits >> 3;
|
420
|
+
}
|
421
|
+
|
422
|
+
static int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
|
423
|
+
int bad = 0;
|
424
|
+
int s;
|
425
|
+
for (s = 0; s <= (int)maxSymbolValue; ++s) {
|
426
|
+
bad |= (count[s] != 0) & (CTable[s].nbBits == 0);
|
427
|
+
}
|
428
|
+
return !bad;
|
429
|
+
}
|
430
|
+
|
412
431
|
static void HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
|
413
432
|
{
|
414
433
|
BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
|
@@ -510,25 +529,43 @@ size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, si
|
|
510
529
|
}
|
511
530
|
|
512
531
|
|
532
|
+
static size_t HUF_compressCTable_internal(
|
533
|
+
BYTE* const ostart, BYTE* op, BYTE* const oend,
|
534
|
+
const void* src, size_t srcSize,
|
535
|
+
unsigned singleStream, const HUF_CElt* CTable)
|
536
|
+
{
|
537
|
+
size_t const cSize = singleStream ?
|
538
|
+
HUF_compress1X_usingCTable(op, oend - op, src, srcSize, CTable) :
|
539
|
+
HUF_compress4X_usingCTable(op, oend - op, src, srcSize, CTable);
|
540
|
+
if (HUF_isError(cSize)) { return cSize; }
|
541
|
+
if (cSize==0) { return 0; } /* uncompressible */
|
542
|
+
op += cSize;
|
543
|
+
/* check compressibility */
|
544
|
+
if ((size_t)(op-ostart) >= srcSize-1) { return 0; }
|
545
|
+
return op-ostart;
|
546
|
+
}
|
547
|
+
|
548
|
+
|
513
549
|
/* `workSpace` must a table of at least 1024 unsigned */
|
514
550
|
static size_t HUF_compress_internal (
|
515
551
|
void* dst, size_t dstSize,
|
516
552
|
const void* src, size_t srcSize,
|
517
553
|
unsigned maxSymbolValue, unsigned huffLog,
|
518
554
|
unsigned singleStream,
|
519
|
-
void* workSpace, size_t wkspSize
|
555
|
+
void* workSpace, size_t wkspSize,
|
556
|
+
HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat)
|
520
557
|
{
|
521
558
|
BYTE* const ostart = (BYTE*)dst;
|
522
559
|
BYTE* const oend = ostart + dstSize;
|
523
560
|
BYTE* op = ostart;
|
524
561
|
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
562
|
+
U32* count;
|
563
|
+
size_t const countSize = sizeof(U32) * (HUF_SYMBOLVALUE_MAX + 1);
|
564
|
+
HUF_CElt* CTable;
|
565
|
+
size_t const CTableSize = sizeof(HUF_CElt) * (HUF_SYMBOLVALUE_MAX + 1);
|
529
566
|
|
530
567
|
/* checks & inits */
|
531
|
-
if (wkspSize < sizeof(huffNodeTable)) return ERROR(GENERIC);
|
568
|
+
if (wkspSize < sizeof(huffNodeTable) + countSize + CTableSize) return ERROR(GENERIC);
|
532
569
|
if (!srcSize) return 0; /* Uncompressed (note : 1 means rle, so first byte must be correct) */
|
533
570
|
if (!dstSize) return 0; /* cannot fit within dst budget */
|
534
571
|
if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */
|
@@ -536,38 +573,58 @@ static size_t HUF_compress_internal (
|
|
536
573
|
if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX;
|
537
574
|
if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
|
538
575
|
|
576
|
+
count = (U32*)workSpace;
|
577
|
+
workSpace = (BYTE*)workSpace + countSize;
|
578
|
+
wkspSize -= countSize;
|
579
|
+
CTable = (HUF_CElt*)workSpace;
|
580
|
+
workSpace = (BYTE*)workSpace + CTableSize;
|
581
|
+
wkspSize -= CTableSize;
|
582
|
+
|
583
|
+
/* Heuristic : If we don't need to check the validity of the old table use the old table for small inputs */
|
584
|
+
if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
|
585
|
+
return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
|
586
|
+
}
|
587
|
+
|
539
588
|
/* Scan input and build symbol stats */
|
540
|
-
{ CHECK_V_F(largest, FSE_count_wksp (
|
589
|
+
{ CHECK_V_F(largest, FSE_count_wksp (count, &maxSymbolValue, (const BYTE*)src, srcSize, (U32*)workSpace) );
|
541
590
|
if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
|
542
591
|
if (largest <= (srcSize >> 7)+1) return 0; /* Fast heuristic : not compressible enough */
|
543
592
|
}
|
544
593
|
|
594
|
+
/* Check validity of previous table */
|
595
|
+
if (repeat && *repeat == HUF_repeat_check && !HUF_validateCTable(oldHufTable, count, maxSymbolValue)) {
|
596
|
+
*repeat = HUF_repeat_none;
|
597
|
+
}
|
598
|
+
/* Heuristic : use existing table for small inputs */
|
599
|
+
if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
|
600
|
+
return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
|
601
|
+
}
|
602
|
+
|
545
603
|
/* Build Huffman Tree */
|
546
604
|
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
|
547
|
-
{ CHECK_V_F(maxBits, HUF_buildCTable_wksp (
|
605
|
+
{ CHECK_V_F(maxBits, HUF_buildCTable_wksp (CTable, count, maxSymbolValue, huffLog, workSpace, wkspSize) );
|
548
606
|
huffLog = (U32)maxBits;
|
607
|
+
/* Zero the unused symbols so we can check it for validity */
|
608
|
+
memset(CTable + maxSymbolValue + 1, 0, CTableSize - (maxSymbolValue + 1) * sizeof(HUF_CElt));
|
549
609
|
}
|
550
610
|
|
551
611
|
/* Write table description header */
|
552
|
-
{ CHECK_V_F(hSize, HUF_writeCTable (op, dstSize,
|
553
|
-
|
612
|
+
{ CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, CTable, maxSymbolValue, huffLog) );
|
613
|
+
/* Check if using the previous table will be beneficial */
|
614
|
+
if (repeat && *repeat != HUF_repeat_none) {
|
615
|
+
size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, count, maxSymbolValue);
|
616
|
+
size_t const newSize = HUF_estimateCompressedSize(CTable, count, maxSymbolValue);
|
617
|
+
if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
|
618
|
+
return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
|
619
|
+
}
|
620
|
+
}
|
621
|
+
/* Use the new table */
|
622
|
+
if (hSize + 12ul >= srcSize) { return 0; }
|
554
623
|
op += hSize;
|
624
|
+
if (repeat) { *repeat = HUF_repeat_none; }
|
625
|
+
if (oldHufTable) { memcpy(oldHufTable, CTable, CTableSize); } /* Save the new table */
|
555
626
|
}
|
556
|
-
|
557
|
-
/* Compress */
|
558
|
-
{ size_t const cSize = (singleStream) ?
|
559
|
-
HUF_compress1X_usingCTable(op, oend - op, src, srcSize, table.CTable) : /* single segment */
|
560
|
-
HUF_compress4X_usingCTable(op, oend - op, src, srcSize, table.CTable);
|
561
|
-
if (HUF_isError(cSize)) return cSize;
|
562
|
-
if (cSize==0) return 0; /* uncompressible */
|
563
|
-
op += cSize;
|
564
|
-
}
|
565
|
-
|
566
|
-
/* check compressibility */
|
567
|
-
if ((size_t)(op-ostart) >= srcSize-1)
|
568
|
-
return 0;
|
569
|
-
|
570
|
-
return op-ostart;
|
627
|
+
return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, CTable);
|
571
628
|
}
|
572
629
|
|
573
630
|
|
@@ -576,7 +633,16 @@ size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
|
|
576
633
|
unsigned maxSymbolValue, unsigned huffLog,
|
577
634
|
void* workSpace, size_t wkspSize)
|
578
635
|
{
|
579
|
-
return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize);
|
636
|
+
return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, NULL, NULL, 0);
|
637
|
+
}
|
638
|
+
|
639
|
+
size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
|
640
|
+
const void* src, size_t srcSize,
|
641
|
+
unsigned maxSymbolValue, unsigned huffLog,
|
642
|
+
void* workSpace, size_t wkspSize,
|
643
|
+
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat)
|
644
|
+
{
|
645
|
+
return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, hufTable, repeat, preferRepeat);
|
580
646
|
}
|
581
647
|
|
582
648
|
size_t HUF_compress1X (void* dst, size_t dstSize,
|
@@ -592,7 +658,16 @@ size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
|
|
592
658
|
unsigned maxSymbolValue, unsigned huffLog,
|
593
659
|
void* workSpace, size_t wkspSize)
|
594
660
|
{
|
595
|
-
return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize);
|
661
|
+
return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, NULL, NULL, 0);
|
662
|
+
}
|
663
|
+
|
664
|
+
size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
|
665
|
+
const void* src, size_t srcSize,
|
666
|
+
unsigned maxSymbolValue, unsigned huffLog,
|
667
|
+
void* workSpace, size_t wkspSize,
|
668
|
+
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat)
|
669
|
+
{
|
670
|
+
return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, hufTable, repeat, preferRepeat);
|
596
671
|
}
|
597
672
|
|
598
673
|
size_t HUF_compress2 (void* dst, size_t dstSize,
|
@@ -13,8 +13,6 @@
|
|
13
13
|
***************************************/
|
14
14
|
#include <string.h> /* memset */
|
15
15
|
#include "mem.h"
|
16
|
-
#define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
|
17
|
-
#include "xxhash.h" /* XXH_reset, update, digest */
|
18
16
|
#define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */
|
19
17
|
#include "fse.h"
|
20
18
|
#define HUF_STATIC_LINKING_ONLY
|
@@ -62,6 +60,7 @@ struct ZSTD_CCtx_s {
|
|
62
60
|
U32 hashLog3; /* dispatch table : larger == faster, more memory */
|
63
61
|
U32 loadedDictEnd; /* index of end of dictionary */
|
64
62
|
U32 forceWindow; /* force back-references to respect limit of 1<<wLog, even for dictionary */
|
63
|
+
U32 forceRawDict; /* Force loading dictionary in "content-only" mode (no header analysis) */
|
65
64
|
ZSTD_compressionStage_e stage;
|
66
65
|
U32 rep[ZSTD_REP_NUM];
|
67
66
|
U32 repToConfirm[ZSTD_REP_NUM];
|
@@ -80,10 +79,11 @@ struct ZSTD_CCtx_s {
|
|
80
79
|
U32* chainTable;
|
81
80
|
HUF_CElt* hufTable;
|
82
81
|
U32 flagStaticTables;
|
82
|
+
HUF_repeat flagStaticHufTable;
|
83
83
|
FSE_CTable offcodeCTable [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
|
84
84
|
FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
|
85
85
|
FSE_CTable litlengthCTable [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
|
86
|
-
unsigned tmpCounters[
|
86
|
+
unsigned tmpCounters[HUF_WORKSPACE_SIZE_U32];
|
87
87
|
};
|
88
88
|
|
89
89
|
ZSTD_CCtx* ZSTD_createCCtx(void)
|
@@ -124,6 +124,7 @@ size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned
|
|
124
124
|
switch(param)
|
125
125
|
{
|
126
126
|
case ZSTD_p_forceWindow : cctx->forceWindow = value>0; cctx->loadedDictEnd = 0; return 0;
|
127
|
+
case ZSTD_p_forceRawDict : cctx->forceRawDict = value>0; return 0;
|
127
128
|
default: return ERROR(parameter_unknown);
|
128
129
|
}
|
129
130
|
}
|
@@ -246,14 +247,17 @@ static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_parameters params, U64 fra
|
|
246
247
|
typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset, ZSTDcrp_fullReset } ZSTD_compResetPolicy_e;
|
247
248
|
|
248
249
|
/*! ZSTD_resetCCtx_advanced() :
|
249
|
-
note :
|
250
|
+
note : `params` must be validated */
|
250
251
|
static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc,
|
251
252
|
ZSTD_parameters params, U64 frameContentSize,
|
252
253
|
ZSTD_compResetPolicy_e const crp)
|
253
254
|
{
|
254
255
|
if (crp == ZSTDcrp_continue)
|
255
|
-
if (ZSTD_equivalentParams(params, zc->params))
|
256
|
+
if (ZSTD_equivalentParams(params, zc->params)) {
|
257
|
+
zc->flagStaticTables = 0;
|
258
|
+
zc->flagStaticHufTable = HUF_repeat_none;
|
256
259
|
return ZSTD_continueCCtx(zc, params, frameContentSize);
|
260
|
+
}
|
257
261
|
|
258
262
|
{ size_t const blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, (size_t)1 << params.cParams.windowLog);
|
259
263
|
U32 const divider = (params.cParams.searchLength==3) ? 3 : 4;
|
@@ -287,6 +291,7 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc,
|
|
287
291
|
ptr = zc->hashTable3 + h3Size;
|
288
292
|
zc->hufTable = (HUF_CElt*)ptr;
|
289
293
|
zc->flagStaticTables = 0;
|
294
|
+
zc->flagStaticHufTable = HUF_repeat_none;
|
290
295
|
ptr = ((U32*)ptr) + 256; /* note : HUF_CElt* is incomplete type, size is simulated using U32 */
|
291
296
|
|
292
297
|
zc->nextToUpdate = 1;
|
@@ -344,8 +349,12 @@ size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long
|
|
344
349
|
{
|
345
350
|
if (srcCCtx->stage!=ZSTDcs_init) return ERROR(stage_wrong);
|
346
351
|
|
352
|
+
|
347
353
|
memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem));
|
348
|
-
|
354
|
+
{ ZSTD_parameters params = srcCCtx->params;
|
355
|
+
params.fParams.contentSizeFlag = (pledgedSrcSize > 0);
|
356
|
+
ZSTD_resetCCtx_advanced(dstCCtx, params, pledgedSrcSize, ZSTDcrp_noMemset);
|
357
|
+
}
|
349
358
|
|
350
359
|
/* copy tables */
|
351
360
|
{ size_t const chainSize = (srcCCtx->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << srcCCtx->params.cParams.chainLog);
|
@@ -368,12 +377,15 @@ size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long
|
|
368
377
|
|
369
378
|
/* copy entropy tables */
|
370
379
|
dstCCtx->flagStaticTables = srcCCtx->flagStaticTables;
|
380
|
+
dstCCtx->flagStaticHufTable = srcCCtx->flagStaticHufTable;
|
371
381
|
if (srcCCtx->flagStaticTables) {
|
372
|
-
memcpy(dstCCtx->hufTable, srcCCtx->hufTable, 256*4);
|
373
382
|
memcpy(dstCCtx->litlengthCTable, srcCCtx->litlengthCTable, sizeof(dstCCtx->litlengthCTable));
|
374
383
|
memcpy(dstCCtx->matchlengthCTable, srcCCtx->matchlengthCTable, sizeof(dstCCtx->matchlengthCTable));
|
375
384
|
memcpy(dstCCtx->offcodeCTable, srcCCtx->offcodeCTable, sizeof(dstCCtx->offcodeCTable));
|
376
385
|
}
|
386
|
+
if (srcCCtx->flagStaticHufTable) {
|
387
|
+
memcpy(dstCCtx->hufTable, srcCCtx->hufTable, 256*4);
|
388
|
+
}
|
377
389
|
|
378
390
|
return 0;
|
379
391
|
}
|
@@ -487,24 +499,28 @@ static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc,
|
|
487
499
|
|
488
500
|
/* small ? don't even attempt compression (speed opt) */
|
489
501
|
# define LITERAL_NOENTROPY 63
|
490
|
-
{ size_t const minLitSize = zc->
|
502
|
+
{ size_t const minLitSize = zc->flagStaticHufTable == HUF_repeat_valid ? 6 : LITERAL_NOENTROPY;
|
491
503
|
if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
|
492
504
|
}
|
493
505
|
|
494
506
|
if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */
|
495
|
-
|
496
|
-
|
497
|
-
singleStream = 1;
|
498
|
-
cLitSize =
|
499
|
-
|
500
|
-
|
501
|
-
|
507
|
+
{ HUF_repeat repeat = zc->flagStaticHufTable;
|
508
|
+
int const preferRepeat = zc->params.cParams.strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
|
509
|
+
if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
|
510
|
+
cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, zc->tmpCounters, sizeof(zc->tmpCounters), zc->hufTable, &repeat, preferRepeat)
|
511
|
+
: HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, zc->tmpCounters, sizeof(zc->tmpCounters), zc->hufTable, &repeat, preferRepeat);
|
512
|
+
if (repeat != HUF_repeat_none) { hType = set_repeat; } /* reused the existing table */
|
513
|
+
else { zc->flagStaticHufTable = HUF_repeat_check; } /* now have a table to reuse */
|
502
514
|
}
|
503
515
|
|
504
|
-
if ((cLitSize==0) | (cLitSize >= srcSize - minGain))
|
516
|
+
if ((cLitSize==0) | (cLitSize >= srcSize - minGain)) {
|
517
|
+
zc->flagStaticHufTable = HUF_repeat_none;
|
505
518
|
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
|
506
|
-
|
519
|
+
}
|
520
|
+
if (cLitSize==1) {
|
521
|
+
zc->flagStaticHufTable = HUF_repeat_none;
|
507
522
|
return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
|
523
|
+
}
|
508
524
|
|
509
525
|
/* Build header */
|
510
526
|
switch(lhSize)
|
@@ -572,11 +588,11 @@ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
|
|
572
588
|
mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
|
573
589
|
}
|
574
590
|
|
575
|
-
|
576
|
-
size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
|
591
|
+
MEM_STATIC size_t ZSTD_compressSequences (ZSTD_CCtx* zc,
|
577
592
|
void* dst, size_t dstCapacity,
|
578
593
|
size_t srcSize)
|
579
594
|
{
|
595
|
+
const int longOffsets = zc->params.cParams.windowLog > STREAM_ACCUMULATOR_MIN;
|
580
596
|
const seqStore_t* seqStorePtr = &(zc->seqStore);
|
581
597
|
U32 count[MaxSeq+1];
|
582
598
|
S16 norm[MaxSeq+1];
|
@@ -710,7 +726,18 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
|
|
710
726
|
if (MEM_32bits()) BIT_flushBits(&blockStream);
|
711
727
|
BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
|
712
728
|
if (MEM_32bits()) BIT_flushBits(&blockStream);
|
713
|
-
|
729
|
+
if (longOffsets) {
|
730
|
+
U32 const ofBits = ofCodeTable[nbSeq-1];
|
731
|
+
int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
|
732
|
+
if (extraBits) {
|
733
|
+
BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
|
734
|
+
BIT_flushBits(&blockStream);
|
735
|
+
}
|
736
|
+
BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits,
|
737
|
+
ofBits - extraBits);
|
738
|
+
} else {
|
739
|
+
BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
|
740
|
+
}
|
714
741
|
BIT_flushBits(&blockStream);
|
715
742
|
|
716
743
|
{ size_t n;
|
@@ -732,7 +759,17 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
|
|
732
759
|
if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
|
733
760
|
BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
|
734
761
|
if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/
|
735
|
-
|
762
|
+
if (longOffsets) {
|
763
|
+
int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
|
764
|
+
if (extraBits) {
|
765
|
+
BIT_addBits(&blockStream, sequences[n].offset, extraBits);
|
766
|
+
BIT_flushBits(&blockStream); /* (7)*/
|
767
|
+
}
|
768
|
+
BIT_addBits(&blockStream, sequences[n].offset >> extraBits,
|
769
|
+
ofBits - extraBits); /* 31 */
|
770
|
+
} else {
|
771
|
+
BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */
|
772
|
+
}
|
736
773
|
BIT_flushBits(&blockStream); /* (7)*/
|
737
774
|
} }
|
738
775
|
|
@@ -747,9 +784,12 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
|
|
747
784
|
|
748
785
|
/* check compressibility */
|
749
786
|
_check_compressibility:
|
750
|
-
{
|
751
|
-
|
752
|
-
|
787
|
+
{ size_t const minGain = ZSTD_minGain(srcSize);
|
788
|
+
size_t const maxCSize = srcSize - minGain;
|
789
|
+
if ((size_t)(op-ostart) >= maxCSize) {
|
790
|
+
zc->flagStaticHufTable = HUF_repeat_none;
|
791
|
+
return 0;
|
792
|
+
} }
|
753
793
|
|
754
794
|
/* confirm repcodes */
|
755
795
|
{ int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->rep[i] = zc->repToConfirm[i]; }
|
@@ -757,7 +797,6 @@ _check_compressibility:
|
|
757
797
|
return op - ostart;
|
758
798
|
}
|
759
799
|
|
760
|
-
|
761
800
|
#if 0 /* for debug */
|
762
801
|
# define STORESEQ_DEBUG
|
763
802
|
#include <stdio.h> /* fprintf */
|
@@ -1748,7 +1787,7 @@ static size_t ZSTD_BtFindBestMatch_selectMLS_extDict (
|
|
1748
1787
|
#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & mask]
|
1749
1788
|
|
1750
1789
|
/* Update chains up to ip (excluded)
|
1751
|
-
Assumption : always within prefix (
|
1790
|
+
Assumption : always within prefix (i.e. not within extDict) */
|
1752
1791
|
FORCE_INLINE
|
1753
1792
|
U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls)
|
1754
1793
|
{
|
@@ -2308,7 +2347,7 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
|
|
2308
2347
|
if (remaining < blockSize) blockSize = remaining;
|
2309
2348
|
|
2310
2349
|
/* preemptive overflow correction */
|
2311
|
-
if (cctx->lowLimit > (
|
2350
|
+
if (cctx->lowLimit > (3U<<29)) {
|
2312
2351
|
U32 const cycleMask = (1 << ZSTD_cycleLog(cctx->params.cParams.hashLog, cctx->params.cParams.strategy)) - 1;
|
2313
2352
|
U32 const current = (U32)(ip - cctx->base);
|
2314
2353
|
U32 const newCurrent = (current & cycleMask) + (1 << cctx->params.cParams.windowLog);
|
@@ -2362,7 +2401,7 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
|
|
2362
2401
|
U32 const dictIDSizeCode = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */
|
2363
2402
|
U32 const checksumFlag = params.fParams.checksumFlag>0;
|
2364
2403
|
U32 const windowSize = 1U << params.cParams.windowLog;
|
2365
|
-
U32 const singleSegment = params.fParams.contentSizeFlag && (windowSize
|
2404
|
+
U32 const singleSegment = params.fParams.contentSizeFlag && (windowSize >= pledgedSrcSize);
|
2366
2405
|
BYTE const windowLogByte = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3);
|
2367
2406
|
U32 const fcsCode = params.fParams.contentSizeFlag ?
|
2368
2407
|
(pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : /* 0-3 */
|
@@ -2508,7 +2547,7 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t
|
|
2508
2547
|
return ERROR(GENERIC); /* strategy doesn't exist; impossible */
|
2509
2548
|
}
|
2510
2549
|
|
2511
|
-
zc->nextToUpdate = zc->
|
2550
|
+
zc->nextToUpdate = (U32)(iend - zc->base);
|
2512
2551
|
return 0;
|
2513
2552
|
}
|
2514
2553
|
|
@@ -2600,6 +2639,7 @@ static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* cctx, const void* dict, size_
|
|
2600
2639
|
}
|
2601
2640
|
|
2602
2641
|
cctx->flagStaticTables = 1;
|
2642
|
+
cctx->flagStaticHufTable = HUF_repeat_valid;
|
2603
2643
|
return dictPtr - (const BYTE*)dict;
|
2604
2644
|
}
|
2605
2645
|
|
@@ -2609,8 +2649,9 @@ static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* zc, const void* dict, si
|
|
2609
2649
|
{
|
2610
2650
|
if ((dict==NULL) || (dictSize<=8)) return 0;
|
2611
2651
|
|
2612
|
-
/*
|
2613
|
-
if (MEM_readLE32(dict) != ZSTD_DICT_MAGIC)
|
2652
|
+
/* dict as pure content */
|
2653
|
+
if ((MEM_readLE32(dict) != ZSTD_DICT_MAGIC) || (zc->forceRawDict))
|
2654
|
+
return ZSTD_loadDictionaryContent(zc, dict, dictSize);
|
2614
2655
|
zc->dictID = zc->params.fParams.noDictIDFlag ? 0 : MEM_readLE32((const char*)dict+4);
|
2615
2656
|
|
2616
2657
|
/* known magic number : dict is parsed for entropy stats and content */
|
@@ -2782,7 +2823,7 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, u
|
|
2782
2823
|
|
2783
2824
|
if (!cdict || !cctx) {
|
2784
2825
|
ZSTD_free(cdict, customMem);
|
2785
|
-
|
2826
|
+
ZSTD_freeCCtx(cctx);
|
2786
2827
|
return NULL;
|
2787
2828
|
}
|
2788
2829
|
|
@@ -2800,8 +2841,8 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, u
|
|
2800
2841
|
{ size_t const errorCode = ZSTD_compressBegin_advanced(cctx, cdict->dictContent, dictSize, params, 0);
|
2801
2842
|
if (ZSTD_isError(errorCode)) {
|
2802
2843
|
ZSTD_free(cdict->dictBuffer, customMem);
|
2803
|
-
ZSTD_free(cctx, customMem);
|
2804
2844
|
ZSTD_free(cdict, customMem);
|
2845
|
+
ZSTD_freeCCtx(cctx);
|
2805
2846
|
return NULL;
|
2806
2847
|
} }
|
2807
2848
|
|
@@ -2845,7 +2886,11 @@ static ZSTD_parameters ZSTD_getParamsFromCDict(const ZSTD_CDict* cdict) {
|
|
2845
2886
|
size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, unsigned long long pledgedSrcSize)
|
2846
2887
|
{
|
2847
2888
|
if (cdict->dictContentSize) CHECK_F(ZSTD_copyCCtx(cctx, cdict->refContext, pledgedSrcSize))
|
2848
|
-
else
|
2889
|
+
else {
|
2890
|
+
ZSTD_parameters params = cdict->refContext->params;
|
2891
|
+
params.fParams.contentSizeFlag = (pledgedSrcSize > 0);
|
2892
|
+
CHECK_F(ZSTD_compressBegin_advanced(cctx, NULL, 0, params, pledgedSrcSize));
|
2893
|
+
}
|
2849
2894
|
return 0;
|
2850
2895
|
}
|
2851
2896
|
|
@@ -2939,7 +2984,7 @@ size_t ZSTD_freeCStream(ZSTD_CStream* zcs)
|
|
2939
2984
|
size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX; }
|
2940
2985
|
size_t ZSTD_CStreamOutSize(void) { return ZSTD_compressBound(ZSTD_BLOCKSIZE_ABSOLUTEMAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ; }
|
2941
2986
|
|
2942
|
-
size_t
|
2987
|
+
static size_t ZSTD_resetCStream_internal(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize)
|
2943
2988
|
{
|
2944
2989
|
if (zcs->inBuffSize==0) return ERROR(stage_wrong); /* zcs has not been init at least once => can't reset */
|
2945
2990
|
|
@@ -2957,6 +3002,14 @@ size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize)
|
|
2957
3002
|
return 0; /* ready to go */
|
2958
3003
|
}
|
2959
3004
|
|
3005
|
+
size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize)
|
3006
|
+
{
|
3007
|
+
|
3008
|
+
zcs->params.fParams.contentSizeFlag = (pledgedSrcSize > 0);
|
3009
|
+
|
3010
|
+
return ZSTD_resetCStream_internal(zcs, pledgedSrcSize);
|
3011
|
+
}
|
3012
|
+
|
2960
3013
|
size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
|
2961
3014
|
const void* dict, size_t dictSize,
|
2962
3015
|
ZSTD_parameters params, unsigned long long pledgedSrcSize)
|
@@ -2988,7 +3041,7 @@ size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
|
|
2988
3041
|
zcs->checksum = params.fParams.checksumFlag > 0;
|
2989
3042
|
zcs->params = params;
|
2990
3043
|
|
2991
|
-
return
|
3044
|
+
return ZSTD_resetCStream_internal(zcs, pledgedSrcSize);
|
2992
3045
|
}
|
2993
3046
|
|
2994
3047
|
/* note : cdict must outlive compression session */
|
@@ -3022,7 +3075,7 @@ size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel)
|
|
3022
3075
|
size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs)
|
3023
3076
|
{
|
3024
3077
|
if (zcs==NULL) return 0; /* support sizeof on NULL */
|
3025
|
-
return sizeof(zcs) + ZSTD_sizeof_CCtx(zcs->cctx) + ZSTD_sizeof_CDict(zcs->cdictLocal) + zcs->outBuffSize + zcs->inBuffSize;
|
3078
|
+
return sizeof(*zcs) + ZSTD_sizeof_CCtx(zcs->cctx) + ZSTD_sizeof_CDict(zcs->cdictLocal) + zcs->outBuffSize + zcs->inBuffSize;
|
3026
3079
|
}
|
3027
3080
|
|
3028
3081
|
/*====== Compression ======*/
|
@@ -203,7 +203,7 @@ MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const B
|
|
203
203
|
|
204
204
|
|
205
205
|
/* Update hashTable3 up to ip (excluded)
|
206
|
-
Assumption : always within prefix (
|
206
|
+
Assumption : always within prefix (i.e. not within extDict) */
|
207
207
|
FORCE_INLINE
|
208
208
|
U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* zc, const BYTE* ip)
|
209
209
|
{
|
@@ -25,8 +25,6 @@
|
|
25
25
|
#include "threading.h" /* mutex */
|
26
26
|
#include "zstd_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
|
27
27
|
#include "zstdmt_compress.h"
|
28
|
-
#define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
|
29
|
-
#include "xxhash.h"
|
30
28
|
|
31
29
|
|
32
30
|
/* ====== Debug ====== */
|
@@ -231,16 +229,17 @@ void ZSTDMT_compressChunk(void* jobDescription)
|
|
231
229
|
const void* const src = (const char*)job->srcStart + job->dictSize;
|
232
230
|
buffer_t const dstBuff = job->dstBuff;
|
233
231
|
DEBUGLOG(3, "job (first:%u) (last:%u) : dictSize %u, srcSize %u", job->firstChunk, job->lastChunk, (U32)job->dictSize, (U32)job->srcSize);
|
234
|
-
if (job->cdict) {
|
232
|
+
if (job->cdict) { /* should only happen for first segment */
|
235
233
|
size_t const initError = ZSTD_compressBegin_usingCDict(job->cctx, job->cdict, job->fullFrameSize);
|
236
234
|
if (job->cdict) DEBUGLOG(3, "using CDict ");
|
237
235
|
if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; }
|
238
|
-
} else {
|
239
|
-
size_t const
|
240
|
-
|
236
|
+
} else { /* srcStart points at reloaded section */
|
237
|
+
size_t const dictModeError = ZSTD_setCCtxParameter(job->cctx, ZSTD_p_forceRawDict, 1); /* Force loading dictionary in "content-only" mode (no header analysis) */
|
238
|
+
size_t const initError = ZSTD_compressBegin_advanced(job->cctx, job->srcStart, job->dictSize, job->params, 0);
|
239
|
+
if (ZSTD_isError(initError) || ZSTD_isError(dictModeError)) { job->cSize = initError; goto _endJob; }
|
241
240
|
ZSTD_setCCtxParameter(job->cctx, ZSTD_p_forceWindow, 1);
|
242
241
|
}
|
243
|
-
if (!job->firstChunk) { /* flush frame header */
|
242
|
+
if (!job->firstChunk) { /* flush and overwrite frame header when it's not first segment */
|
244
243
|
size_t const hSize = ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, 0);
|
245
244
|
if (ZSTD_isError(hSize)) { job->cSize = hSize; goto _endJob; }
|
246
245
|
ZSTD_invalidateRepCodes(job->cctx);
|
@@ -248,7 +247,7 @@ void ZSTDMT_compressChunk(void* jobDescription)
|
|
248
247
|
|
249
248
|
DEBUGLOG(4, "Compressing : ");
|
250
249
|
DEBUG_PRINTHEX(4, job->srcStart, 12);
|
251
|
-
job->cSize = (job->lastChunk) ?
|
250
|
+
job->cSize = (job->lastChunk) ?
|
252
251
|
ZSTD_compressEnd (job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize) :
|
253
252
|
ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize);
|
254
253
|
DEBUGLOG(3, "compressed %u bytes into %u bytes (first:%u) (last:%u)", (unsigned)job->srcSize, (unsigned)job->cSize, job->firstChunk, job->lastChunk);
|