zstd-ruby 1.5.0.0 → 1.5.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +2 -2
- data/README.md +1 -1
- data/ext/zstdruby/extconf.rb +2 -1
- data/ext/zstdruby/libzstd/Makefile +50 -175
- data/ext/zstdruby/libzstd/README.md +7 -1
- data/ext/zstdruby/libzstd/common/bitstream.h +24 -9
- data/ext/zstdruby/libzstd/common/compiler.h +89 -43
- data/ext/zstdruby/libzstd/common/entropy_common.c +11 -5
- data/ext/zstdruby/libzstd/common/error_private.h +79 -0
- data/ext/zstdruby/libzstd/common/fse.h +2 -1
- data/ext/zstdruby/libzstd/common/fse_decompress.c +1 -1
- data/ext/zstdruby/libzstd/common/huf.h +24 -22
- data/ext/zstdruby/libzstd/common/mem.h +18 -0
- data/ext/zstdruby/libzstd/common/pool.c +11 -6
- data/ext/zstdruby/libzstd/common/pool.h +2 -2
- data/ext/zstdruby/libzstd/common/portability_macros.h +137 -0
- data/ext/zstdruby/libzstd/common/xxhash.c +5 -805
- data/ext/zstdruby/libzstd/common/xxhash.h +5568 -167
- data/ext/zstdruby/libzstd/common/zstd_internal.h +95 -92
- data/ext/zstdruby/libzstd/common/zstd_trace.h +12 -3
- data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
- data/ext/zstdruby/libzstd/compress/fse_compress.c +63 -27
- data/ext/zstdruby/libzstd/compress/huf_compress.c +537 -104
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +307 -373
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +174 -83
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +4 -3
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +3 -1
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +15 -14
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +4 -3
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +41 -27
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +295 -120
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +309 -130
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +482 -562
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +9 -7
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +4 -1
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +249 -148
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +76 -38
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +4 -1
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +727 -189
- data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +585 -0
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +85 -22
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +744 -220
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +8 -2
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +34 -3
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +23 -3
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +9 -2
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +11 -4
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +101 -30
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +2 -6
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +3 -7
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +3 -7
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +3 -7
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +3 -7
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +3 -7
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +3 -7
- data/ext/zstdruby/libzstd/libzstd.mk +203 -0
- data/ext/zstdruby/libzstd/libzstd.pc.in +1 -0
- data/ext/zstdruby/libzstd/module.modulemap +25 -0
- data/ext/zstdruby/libzstd/zdict.h +4 -4
- data/ext/zstdruby/libzstd/zstd.h +179 -136
- data/ext/zstdruby/zstdruby.c +2 -2
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +11 -6
@@ -63,7 +63,7 @@ typedef struct {
|
|
63
63
|
} ZSTD_localDict;
|
64
64
|
|
65
65
|
typedef struct {
|
66
|
-
HUF_CElt CTable[
|
66
|
+
HUF_CElt CTable[HUF_CTABLE_SIZE_ST(255)];
|
67
67
|
HUF_repeat repeatMode;
|
68
68
|
} ZSTD_hufCTables_t;
|
69
69
|
|
@@ -129,7 +129,7 @@ size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
|
|
129
129
|
*********************************/
|
130
130
|
|
131
131
|
typedef struct {
|
132
|
-
U32 off; /* Offset code
|
132
|
+
U32 off; /* Offset sumtype code for the match, using ZSTD_storeSeq() format */
|
133
133
|
U32 len; /* Raw length of match */
|
134
134
|
} ZSTD_match_t;
|
135
135
|
|
@@ -179,7 +179,7 @@ typedef struct {
|
|
179
179
|
U32 offCodeSumBasePrice; /* to compare to log2(offreq) */
|
180
180
|
ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */
|
181
181
|
const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */
|
182
|
-
|
182
|
+
ZSTD_paramSwitch_e literalCompressionMode;
|
183
183
|
} optState_t;
|
184
184
|
|
185
185
|
typedef struct {
|
@@ -199,6 +199,8 @@ typedef struct {
|
|
199
199
|
*/
|
200
200
|
} ZSTD_window_t;
|
201
201
|
|
202
|
+
#define ZSTD_WINDOW_START_INDEX 2
|
203
|
+
|
202
204
|
typedef struct ZSTD_matchState_t ZSTD_matchState_t;
|
203
205
|
|
204
206
|
#define ZSTD_ROW_HASH_CACHE_SIZE 8 /* Size of prefetching hash cache for row-based matchfinder */
|
@@ -264,7 +266,7 @@ typedef struct {
|
|
264
266
|
} ldmState_t;
|
265
267
|
|
266
268
|
typedef struct {
|
267
|
-
|
269
|
+
ZSTD_paramSwitch_e enableLdm; /* ZSTD_ps_enable to enable LDM. ZSTD_ps_auto by default */
|
268
270
|
U32 hashLog; /* Log size of hashTable */
|
269
271
|
U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */
|
270
272
|
U32 minMatchLength; /* Minimum match length */
|
@@ -295,7 +297,7 @@ struct ZSTD_CCtx_params_s {
|
|
295
297
|
* There is no guarantee that hint is close to actual source size */
|
296
298
|
|
297
299
|
ZSTD_dictAttachPref_e attachDictPref;
|
298
|
-
|
300
|
+
ZSTD_paramSwitch_e literalCompressionMode;
|
299
301
|
|
300
302
|
/* Multithreading: used to pass parameters to mtctx */
|
301
303
|
int nbWorkers;
|
@@ -318,10 +320,10 @@ struct ZSTD_CCtx_params_s {
|
|
318
320
|
int validateSequences;
|
319
321
|
|
320
322
|
/* Block splitting */
|
321
|
-
|
323
|
+
ZSTD_paramSwitch_e useBlockSplitter;
|
322
324
|
|
323
325
|
/* Param for deciding whether to use row-based matchfinder */
|
324
|
-
|
326
|
+
ZSTD_paramSwitch_e useRowMatchFinder;
|
325
327
|
|
326
328
|
/* Always load a dictionary in ext-dict mode (not prefix mode)? */
|
327
329
|
int deterministicRefPrefix;
|
@@ -343,6 +345,22 @@ typedef enum {
|
|
343
345
|
ZSTDb_buffered
|
344
346
|
} ZSTD_buffered_policy_e;
|
345
347
|
|
348
|
+
/**
|
349
|
+
* Struct that contains all elements of block splitter that should be allocated
|
350
|
+
* in a wksp.
|
351
|
+
*/
|
352
|
+
#define ZSTD_MAX_NB_BLOCK_SPLITS 196
|
353
|
+
typedef struct {
|
354
|
+
seqStore_t fullSeqStoreChunk;
|
355
|
+
seqStore_t firstHalfSeqStore;
|
356
|
+
seqStore_t secondHalfSeqStore;
|
357
|
+
seqStore_t currSeqStore;
|
358
|
+
seqStore_t nextSeqStore;
|
359
|
+
|
360
|
+
U32 partitions[ZSTD_MAX_NB_BLOCK_SPLITS];
|
361
|
+
ZSTD_entropyCTablesMetadata_t entropyMetadata;
|
362
|
+
} ZSTD_blockSplitCtx;
|
363
|
+
|
346
364
|
struct ZSTD_CCtx_s {
|
347
365
|
ZSTD_compressionStage_e stage;
|
348
366
|
int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
|
@@ -374,7 +392,7 @@ struct ZSTD_CCtx_s {
|
|
374
392
|
ZSTD_blockState_t blockState;
|
375
393
|
U32* entropyWorkspace; /* entropy workspace of ENTROPY_WORKSPACE_SIZE bytes */
|
376
394
|
|
377
|
-
/*
|
395
|
+
/* Whether we are streaming or not */
|
378
396
|
ZSTD_buffered_policy_e bufferedPolicy;
|
379
397
|
|
380
398
|
/* streaming */
|
@@ -408,6 +426,9 @@ struct ZSTD_CCtx_s {
|
|
408
426
|
#if ZSTD_TRACE
|
409
427
|
ZSTD_TraceCtx traceCtx;
|
410
428
|
#endif
|
429
|
+
|
430
|
+
/* Workspace for block splitter */
|
431
|
+
ZSTD_blockSplitCtx blockSplitCtx;
|
411
432
|
};
|
412
433
|
|
413
434
|
typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
|
@@ -442,7 +463,7 @@ typedef enum {
|
|
442
463
|
typedef size_t (*ZSTD_blockCompressor) (
|
443
464
|
ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
444
465
|
void const* src, size_t srcSize);
|
445
|
-
ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat,
|
466
|
+
ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramSwitch_e rowMatchfinderMode, ZSTD_dictMode_e dictMode);
|
446
467
|
|
447
468
|
|
448
469
|
MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
|
@@ -476,31 +497,6 @@ MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
|
|
476
497
|
return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];
|
477
498
|
}
|
478
499
|
|
479
|
-
typedef struct repcodes_s {
|
480
|
-
U32 rep[3];
|
481
|
-
} repcodes_t;
|
482
|
-
|
483
|
-
MEM_STATIC repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
|
484
|
-
{
|
485
|
-
repcodes_t newReps;
|
486
|
-
if (offset >= ZSTD_REP_NUM) { /* full offset */
|
487
|
-
newReps.rep[2] = rep[1];
|
488
|
-
newReps.rep[1] = rep[0];
|
489
|
-
newReps.rep[0] = offset - ZSTD_REP_MOVE;
|
490
|
-
} else { /* repcode */
|
491
|
-
U32 const repCode = offset + ll0;
|
492
|
-
if (repCode > 0) { /* note : if repCode==0, no change */
|
493
|
-
U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
|
494
|
-
newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
|
495
|
-
newReps.rep[1] = rep[0];
|
496
|
-
newReps.rep[0] = currentOffset;
|
497
|
-
} else { /* repCode == 0 */
|
498
|
-
ZSTD_memcpy(&newReps, rep, sizeof(newReps));
|
499
|
-
}
|
500
|
-
}
|
501
|
-
return newReps;
|
502
|
-
}
|
503
|
-
|
504
500
|
/* ZSTD_cParam_withinBounds:
|
505
501
|
* @return 1 if value is within cParam bounds,
|
506
502
|
* 0 otherwise */
|
@@ -549,17 +545,17 @@ MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
|
|
549
545
|
return (srcSize >> minlog) + 2;
|
550
546
|
}
|
551
547
|
|
552
|
-
MEM_STATIC int
|
548
|
+
MEM_STATIC int ZSTD_literalsCompressionIsDisabled(const ZSTD_CCtx_params* cctxParams)
|
553
549
|
{
|
554
550
|
switch (cctxParams->literalCompressionMode) {
|
555
|
-
case
|
551
|
+
case ZSTD_ps_enable:
|
556
552
|
return 0;
|
557
|
-
case
|
553
|
+
case ZSTD_ps_disable:
|
558
554
|
return 1;
|
559
555
|
default:
|
560
556
|
assert(0 /* impossible: pre-validated */);
|
561
|
-
|
562
|
-
case
|
557
|
+
ZSTD_FALLTHROUGH;
|
558
|
+
case ZSTD_ps_auto:
|
563
559
|
return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);
|
564
560
|
}
|
565
561
|
}
|
@@ -569,7 +565,9 @@ MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParam
|
|
569
565
|
* Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
|
570
566
|
* large copies.
|
571
567
|
*/
|
572
|
-
static void
|
568
|
+
static void
|
569
|
+
ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w)
|
570
|
+
{
|
573
571
|
assert(iend > ilimit_w);
|
574
572
|
if (ip <= ilimit_w) {
|
575
573
|
ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap);
|
@@ -579,14 +577,30 @@ static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const ie
|
|
579
577
|
while (ip < iend) *op++ = *ip++;
|
580
578
|
}
|
581
579
|
|
580
|
+
#define ZSTD_REP_MOVE (ZSTD_REP_NUM-1)
|
581
|
+
#define STORE_REPCODE_1 STORE_REPCODE(1)
|
582
|
+
#define STORE_REPCODE_2 STORE_REPCODE(2)
|
583
|
+
#define STORE_REPCODE_3 STORE_REPCODE(3)
|
584
|
+
#define STORE_REPCODE(r) (assert((r)>=1), assert((r)<=3), (r)-1)
|
585
|
+
#define STORE_OFFSET(o) (assert((o)>0), o + ZSTD_REP_MOVE)
|
586
|
+
#define STORED_IS_OFFSET(o) ((o) > ZSTD_REP_MOVE)
|
587
|
+
#define STORED_IS_REPCODE(o) ((o) <= ZSTD_REP_MOVE)
|
588
|
+
#define STORED_OFFSET(o) (assert(STORED_IS_OFFSET(o)), (o)-ZSTD_REP_MOVE)
|
589
|
+
#define STORED_REPCODE(o) (assert(STORED_IS_REPCODE(o)), (o)+1) /* returns ID 1,2,3 */
|
590
|
+
#define STORED_TO_OFFBASE(o) ((o)+1)
|
591
|
+
#define OFFBASE_TO_STORED(o) ((o)-1)
|
592
|
+
|
582
593
|
/*! ZSTD_storeSeq() :
|
583
|
-
* Store a sequence (litlen, litPtr, offCode and
|
584
|
-
*
|
585
|
-
*
|
594
|
+
* Store a sequence (litlen, litPtr, offCode and matchLength) into seqStore_t.
|
595
|
+
* @offBase_minus1 : Users should use employ macros STORE_REPCODE_X and STORE_OFFSET().
|
596
|
+
* @matchLength : must be >= MINMATCH
|
586
597
|
* Allowed to overread literals up to litLimit.
|
587
598
|
*/
|
588
|
-
HINT_INLINE UNUSED_ATTR
|
589
|
-
|
599
|
+
HINT_INLINE UNUSED_ATTR void
|
600
|
+
ZSTD_storeSeq(seqStore_t* seqStorePtr,
|
601
|
+
size_t litLength, const BYTE* literals, const BYTE* litLimit,
|
602
|
+
U32 offBase_minus1,
|
603
|
+
size_t matchLength)
|
590
604
|
{
|
591
605
|
BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
|
592
606
|
BYTE const* const litEnd = literals + litLength;
|
@@ -595,7 +609,7 @@ void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* litera
|
|
595
609
|
if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */
|
596
610
|
{ U32 const pos = (U32)((const BYTE*)literals - g_start);
|
597
611
|
DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
|
598
|
-
pos, (U32)litLength, (U32)
|
612
|
+
pos, (U32)litLength, (U32)matchLength, (U32)offBase_minus1);
|
599
613
|
}
|
600
614
|
#endif
|
601
615
|
assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
|
@@ -626,19 +640,59 @@ void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* litera
|
|
626
640
|
seqStorePtr->sequences[0].litLength = (U16)litLength;
|
627
641
|
|
628
642
|
/* match offset */
|
629
|
-
seqStorePtr->sequences[0].
|
643
|
+
seqStorePtr->sequences[0].offBase = STORED_TO_OFFBASE(offBase_minus1);
|
630
644
|
|
631
645
|
/* match Length */
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
646
|
+
assert(matchLength >= MINMATCH);
|
647
|
+
{ size_t const mlBase = matchLength - MINMATCH;
|
648
|
+
if (mlBase>0xFFFF) {
|
649
|
+
assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
|
650
|
+
seqStorePtr->longLengthType = ZSTD_llt_matchLength;
|
651
|
+
seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
|
652
|
+
}
|
653
|
+
seqStorePtr->sequences[0].mlBase = (U16)mlBase;
|
636
654
|
}
|
637
|
-
seqStorePtr->sequences[0].matchLength = (U16)mlBase;
|
638
655
|
|
639
656
|
seqStorePtr->sequences++;
|
640
657
|
}
|
641
658
|
|
659
|
+
/* ZSTD_updateRep() :
|
660
|
+
* updates in-place @rep (array of repeat offsets)
|
661
|
+
* @offBase_minus1 : sum-type, with same numeric representation as ZSTD_storeSeq()
|
662
|
+
*/
|
663
|
+
MEM_STATIC void
|
664
|
+
ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0)
|
665
|
+
{
|
666
|
+
if (STORED_IS_OFFSET(offBase_minus1)) { /* full offset */
|
667
|
+
rep[2] = rep[1];
|
668
|
+
rep[1] = rep[0];
|
669
|
+
rep[0] = STORED_OFFSET(offBase_minus1);
|
670
|
+
} else { /* repcode */
|
671
|
+
U32 const repCode = STORED_REPCODE(offBase_minus1) - 1 + ll0;
|
672
|
+
if (repCode > 0) { /* note : if repCode==0, no change */
|
673
|
+
U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
|
674
|
+
rep[2] = (repCode >= 2) ? rep[1] : rep[2];
|
675
|
+
rep[1] = rep[0];
|
676
|
+
rep[0] = currentOffset;
|
677
|
+
} else { /* repCode == 0 */
|
678
|
+
/* nothing to do */
|
679
|
+
}
|
680
|
+
}
|
681
|
+
}
|
682
|
+
|
683
|
+
typedef struct repcodes_s {
|
684
|
+
U32 rep[3];
|
685
|
+
} repcodes_t;
|
686
|
+
|
687
|
+
MEM_STATIC repcodes_t
|
688
|
+
ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0)
|
689
|
+
{
|
690
|
+
repcodes_t newReps;
|
691
|
+
ZSTD_memcpy(&newReps, rep, sizeof(newReps));
|
692
|
+
ZSTD_updateRep(newReps.rep, offBase_minus1, ll0);
|
693
|
+
return newReps;
|
694
|
+
}
|
695
|
+
|
642
696
|
|
643
697
|
/*-*************************************
|
644
698
|
* Match length counter
|
@@ -651,8 +705,14 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
|
|
651
705
|
# if STATIC_BMI2
|
652
706
|
return _tzcnt_u64(val) >> 3;
|
653
707
|
# else
|
654
|
-
|
655
|
-
|
708
|
+
if (val != 0) {
|
709
|
+
unsigned long r;
|
710
|
+
_BitScanForward64(&r, (U64)val);
|
711
|
+
return (unsigned)(r >> 3);
|
712
|
+
} else {
|
713
|
+
/* Should not reach this code path */
|
714
|
+
__assume(0);
|
715
|
+
}
|
656
716
|
# endif
|
657
717
|
# elif defined(__GNUC__) && (__GNUC__ >= 4)
|
658
718
|
return (__builtin_ctzll((U64)val) >> 3);
|
@@ -669,8 +729,14 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
|
|
669
729
|
# endif
|
670
730
|
} else { /* 32 bits */
|
671
731
|
# if defined(_MSC_VER)
|
672
|
-
|
673
|
-
|
732
|
+
if (val != 0) {
|
733
|
+
unsigned long r;
|
734
|
+
_BitScanForward(&r, (U32)val);
|
735
|
+
return (unsigned)(r >> 3);
|
736
|
+
} else {
|
737
|
+
/* Should not reach this code path */
|
738
|
+
__assume(0);
|
739
|
+
}
|
674
740
|
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
675
741
|
return (__builtin_ctz((U32)val) >> 3);
|
676
742
|
# else
|
@@ -687,8 +753,14 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
|
|
687
753
|
# if STATIC_BMI2
|
688
754
|
return _lzcnt_u64(val) >> 3;
|
689
755
|
# else
|
690
|
-
|
691
|
-
|
756
|
+
if (val != 0) {
|
757
|
+
unsigned long r;
|
758
|
+
_BitScanReverse64(&r, (U64)val);
|
759
|
+
return (unsigned)(r >> 3);
|
760
|
+
} else {
|
761
|
+
/* Should not reach this code path */
|
762
|
+
__assume(0);
|
763
|
+
}
|
692
764
|
# endif
|
693
765
|
# elif defined(__GNUC__) && (__GNUC__ >= 4)
|
694
766
|
return (__builtin_clzll(val) >> 3);
|
@@ -702,8 +774,14 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
|
|
702
774
|
# endif
|
703
775
|
} else { /* 32 bits */
|
704
776
|
# if defined(_MSC_VER)
|
705
|
-
|
706
|
-
|
777
|
+
if (val != 0) {
|
778
|
+
unsigned long r;
|
779
|
+
_BitScanReverse(&r, (unsigned long)val);
|
780
|
+
return (unsigned)(r >> 3);
|
781
|
+
} else {
|
782
|
+
/* Should not reach this code path */
|
783
|
+
__assume(0);
|
784
|
+
}
|
707
785
|
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
708
786
|
return (__builtin_clz((U32)val) >> 3);
|
709
787
|
# else
|
@@ -884,9 +962,9 @@ MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window)
|
|
884
962
|
|
885
963
|
MEM_STATIC U32 ZSTD_window_isEmpty(ZSTD_window_t const window)
|
886
964
|
{
|
887
|
-
return window.dictLimit ==
|
888
|
-
window.lowLimit ==
|
889
|
-
(window.nextSrc - window.base) ==
|
965
|
+
return window.dictLimit == ZSTD_WINDOW_START_INDEX &&
|
966
|
+
window.lowLimit == ZSTD_WINDOW_START_INDEX &&
|
967
|
+
(window.nextSrc - window.base) == ZSTD_WINDOW_START_INDEX;
|
890
968
|
}
|
891
969
|
|
892
970
|
/**
|
@@ -937,7 +1015,9 @@ MEM_STATIC U32 ZSTD_window_canOverflowCorrect(ZSTD_window_t const window,
|
|
937
1015
|
{
|
938
1016
|
U32 const cycleSize = 1u << cycleLog;
|
939
1017
|
U32 const curr = (U32)((BYTE const*)src - window.base);
|
940
|
-
U32 const minIndexToOverflowCorrect = cycleSize
|
1018
|
+
U32 const minIndexToOverflowCorrect = cycleSize
|
1019
|
+
+ MAX(maxDist, cycleSize)
|
1020
|
+
+ ZSTD_WINDOW_START_INDEX;
|
941
1021
|
|
942
1022
|
/* Adjust the min index to backoff the overflow correction frequency,
|
943
1023
|
* so we don't waste too much CPU in overflow correction. If this
|
@@ -1012,10 +1092,14 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
|
|
1012
1092
|
U32 const cycleSize = 1u << cycleLog;
|
1013
1093
|
U32 const cycleMask = cycleSize - 1;
|
1014
1094
|
U32 const curr = (U32)((BYTE const*)src - window->base);
|
1015
|
-
U32 const
|
1016
|
-
/*
|
1017
|
-
U32 const
|
1018
|
-
|
1095
|
+
U32 const currentCycle = curr & cycleMask;
|
1096
|
+
/* Ensure newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX. */
|
1097
|
+
U32 const currentCycleCorrection = currentCycle < ZSTD_WINDOW_START_INDEX
|
1098
|
+
? MAX(cycleSize, ZSTD_WINDOW_START_INDEX)
|
1099
|
+
: 0;
|
1100
|
+
U32 const newCurrent = currentCycle
|
1101
|
+
+ currentCycleCorrection
|
1102
|
+
+ MAX(maxDist, cycleSize);
|
1019
1103
|
U32 const correction = curr - newCurrent;
|
1020
1104
|
/* maxDist must be a power of two so that:
|
1021
1105
|
* (newCurrent & cycleMask) == (curr & cycleMask)
|
@@ -1031,14 +1115,20 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
|
|
1031
1115
|
|
1032
1116
|
window->base += correction;
|
1033
1117
|
window->dictBase += correction;
|
1034
|
-
if (window->lowLimit
|
1035
|
-
|
1036
|
-
|
1037
|
-
|
1118
|
+
if (window->lowLimit < correction + ZSTD_WINDOW_START_INDEX) {
|
1119
|
+
window->lowLimit = ZSTD_WINDOW_START_INDEX;
|
1120
|
+
} else {
|
1121
|
+
window->lowLimit -= correction;
|
1122
|
+
}
|
1123
|
+
if (window->dictLimit < correction + ZSTD_WINDOW_START_INDEX) {
|
1124
|
+
window->dictLimit = ZSTD_WINDOW_START_INDEX;
|
1125
|
+
} else {
|
1126
|
+
window->dictLimit -= correction;
|
1127
|
+
}
|
1038
1128
|
|
1039
1129
|
/* Ensure we can still reference the full window. */
|
1040
1130
|
assert(newCurrent >= maxDist);
|
1041
|
-
assert(newCurrent - maxDist >=
|
1131
|
+
assert(newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX);
|
1042
1132
|
/* Ensure that lowLimit and dictLimit didn't underflow. */
|
1043
1133
|
assert(window->lowLimit <= newCurrent);
|
1044
1134
|
assert(window->dictLimit <= newCurrent);
|
@@ -1149,11 +1239,12 @@ ZSTD_checkDictValidity(const ZSTD_window_t* window,
|
|
1149
1239
|
|
1150
1240
|
MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
|
1151
1241
|
ZSTD_memset(window, 0, sizeof(*window));
|
1152
|
-
window->base = (BYTE const*)"";
|
1153
|
-
window->dictBase = (BYTE const*)"";
|
1154
|
-
|
1155
|
-
window->
|
1156
|
-
window->
|
1242
|
+
window->base = (BYTE const*)" ";
|
1243
|
+
window->dictBase = (BYTE const*)" ";
|
1244
|
+
ZSTD_STATIC_ASSERT(ZSTD_DUBT_UNSORTED_MARK < ZSTD_WINDOW_START_INDEX); /* Start above ZSTD_DUBT_UNSORTED_MARK */
|
1245
|
+
window->dictLimit = ZSTD_WINDOW_START_INDEX; /* start from >0, so that 1st position is valid */
|
1246
|
+
window->lowLimit = ZSTD_WINDOW_START_INDEX; /* it ensures first and later CCtx usages compress the same */
|
1247
|
+
window->nextSrc = window->base + ZSTD_WINDOW_START_INDEX; /* see issue #1241 */
|
1157
1248
|
window->nbOverflowCorrections = 0;
|
1158
1249
|
}
|
1159
1250
|
|
@@ -1206,15 +1297,15 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
|
|
1206
1297
|
*/
|
1207
1298
|
MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
|
1208
1299
|
{
|
1209
|
-
U32
|
1210
|
-
U32
|
1211
|
-
U32
|
1212
|
-
U32
|
1300
|
+
U32 const maxDistance = 1U << windowLog;
|
1301
|
+
U32 const lowestValid = ms->window.lowLimit;
|
1302
|
+
U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
|
1303
|
+
U32 const isDictionary = (ms->loadedDictEnd != 0);
|
1213
1304
|
/* When using a dictionary the entire dictionary is valid if a single byte of the dictionary
|
1214
1305
|
* is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't
|
1215
1306
|
* valid for the entire block. So this check is sufficient to find the lowest valid match index.
|
1216
1307
|
*/
|
1217
|
-
U32
|
1308
|
+
U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
|
1218
1309
|
return matchLowest;
|
1219
1310
|
}
|
1220
1311
|
|
@@ -73,7 +73,8 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
|
|
73
73
|
void* dst, size_t dstCapacity,
|
74
74
|
const void* src, size_t srcSize,
|
75
75
|
void* entropyWorkspace, size_t entropyWorkspaceSize,
|
76
|
-
const int bmi2
|
76
|
+
const int bmi2,
|
77
|
+
unsigned suspectUncompressible)
|
77
78
|
{
|
78
79
|
size_t const minGain = ZSTD_minGain(srcSize, strategy);
|
79
80
|
size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
|
@@ -105,11 +106,11 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
|
|
105
106
|
HUF_compress1X_repeat(
|
106
107
|
ostart+lhSize, dstCapacity-lhSize, src, srcSize,
|
107
108
|
HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
|
108
|
-
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) :
|
109
|
+
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2, suspectUncompressible) :
|
109
110
|
HUF_compress4X_repeat(
|
110
111
|
ostart+lhSize, dstCapacity-lhSize, src, srcSize,
|
111
112
|
HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
|
112
|
-
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
|
113
|
+
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2, suspectUncompressible);
|
113
114
|
if (repeat != HUF_repeat_none) {
|
114
115
|
/* reused the existing table */
|
115
116
|
DEBUGLOG(5, "Reusing previous huffman table");
|
@@ -18,12 +18,14 @@ size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src,
|
|
18
18
|
|
19
19
|
size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
|
20
20
|
|
21
|
+
/* If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */
|
21
22
|
size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
|
22
23
|
ZSTD_hufCTables_t* nextHuf,
|
23
24
|
ZSTD_strategy strategy, int disableLiteralCompression,
|
24
25
|
void* dst, size_t dstCapacity,
|
25
26
|
const void* src, size_t srcSize,
|
26
27
|
void* entropyWorkspace, size_t entropyWorkspaceSize,
|
27
|
-
const int bmi2
|
28
|
+
const int bmi2,
|
29
|
+
unsigned suspectUncompressible);
|
28
30
|
|
29
31
|
#endif /* ZSTD_COMPRESS_LITERALS_H */
|
@@ -275,10 +275,11 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity,
|
|
275
275
|
assert(nbSeq_1 > 1);
|
276
276
|
assert(entropyWorkspaceSize >= sizeof(ZSTD_BuildCTableWksp));
|
277
277
|
(void)entropyWorkspaceSize;
|
278
|
-
FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "");
|
279
|
-
|
278
|
+
FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "FSE_normalizeCount failed");
|
279
|
+
assert(oend >= op);
|
280
|
+
{ size_t const NCountSize = FSE_writeNCount(op, (size_t)(oend - op), wksp->norm, max, tableLog); /* overflow protected */
|
280
281
|
FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed");
|
281
|
-
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), "");
|
282
|
+
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), "FSE_buildCTable_wksp failed");
|
282
283
|
return NCountSize;
|
283
284
|
}
|
284
285
|
}
|
@@ -312,19 +313,19 @@ ZSTD_encodeSequences_body(
|
|
312
313
|
FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]);
|
313
314
|
BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
|
314
315
|
if (MEM_32bits()) BIT_flushBits(&blockStream);
|
315
|
-
BIT_addBits(&blockStream, sequences[nbSeq-1].
|
316
|
+
BIT_addBits(&blockStream, sequences[nbSeq-1].mlBase, ML_bits[mlCodeTable[nbSeq-1]]);
|
316
317
|
if (MEM_32bits()) BIT_flushBits(&blockStream);
|
317
318
|
if (longOffsets) {
|
318
319
|
U32 const ofBits = ofCodeTable[nbSeq-1];
|
319
320
|
unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
|
320
321
|
if (extraBits) {
|
321
|
-
BIT_addBits(&blockStream, sequences[nbSeq-1].
|
322
|
+
BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, extraBits);
|
322
323
|
BIT_flushBits(&blockStream);
|
323
324
|
}
|
324
|
-
BIT_addBits(&blockStream, sequences[nbSeq-1].
|
325
|
+
BIT_addBits(&blockStream, sequences[nbSeq-1].offBase >> extraBits,
|
325
326
|
ofBits - extraBits);
|
326
327
|
} else {
|
327
|
-
BIT_addBits(&blockStream, sequences[nbSeq-1].
|
328
|
+
BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, ofCodeTable[nbSeq-1]);
|
328
329
|
}
|
329
330
|
BIT_flushBits(&blockStream);
|
330
331
|
|
@@ -338,8 +339,8 @@ ZSTD_encodeSequences_body(
|
|
338
339
|
U32 const mlBits = ML_bits[mlCode];
|
339
340
|
DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u",
|
340
341
|
(unsigned)sequences[n].litLength,
|
341
|
-
(unsigned)sequences[n].
|
342
|
-
(unsigned)sequences[n].
|
342
|
+
(unsigned)sequences[n].mlBase + MINMATCH,
|
343
|
+
(unsigned)sequences[n].offBase);
|
343
344
|
/* 32b*/ /* 64b*/
|
344
345
|
/* (7)*/ /* (7)*/
|
345
346
|
FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */
|
@@ -350,18 +351,18 @@ ZSTD_encodeSequences_body(
|
|
350
351
|
BIT_flushBits(&blockStream); /* (7)*/
|
351
352
|
BIT_addBits(&blockStream, sequences[n].litLength, llBits);
|
352
353
|
if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
|
353
|
-
BIT_addBits(&blockStream, sequences[n].
|
354
|
+
BIT_addBits(&blockStream, sequences[n].mlBase, mlBits);
|
354
355
|
if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream);
|
355
356
|
if (longOffsets) {
|
356
357
|
unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
|
357
358
|
if (extraBits) {
|
358
|
-
BIT_addBits(&blockStream, sequences[n].
|
359
|
+
BIT_addBits(&blockStream, sequences[n].offBase, extraBits);
|
359
360
|
BIT_flushBits(&blockStream); /* (7)*/
|
360
361
|
}
|
361
|
-
BIT_addBits(&blockStream, sequences[n].
|
362
|
+
BIT_addBits(&blockStream, sequences[n].offBase >> extraBits,
|
362
363
|
ofBits - extraBits); /* 31 */
|
363
364
|
} else {
|
364
|
-
BIT_addBits(&blockStream, sequences[n].
|
365
|
+
BIT_addBits(&blockStream, sequences[n].offBase, ofBits); /* 31 */
|
365
366
|
}
|
366
367
|
BIT_flushBits(&blockStream); /* (7)*/
|
367
368
|
DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr));
|
@@ -398,7 +399,7 @@ ZSTD_encodeSequences_default(
|
|
398
399
|
|
399
400
|
#if DYNAMIC_BMI2
|
400
401
|
|
401
|
-
static
|
402
|
+
static BMI2_TARGET_ATTRIBUTE size_t
|
402
403
|
ZSTD_encodeSequences_bmi2(
|
403
404
|
void* dst, size_t dstCapacity,
|
404
405
|
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
|
@@ -132,6 +132,7 @@ static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef*
|
|
132
132
|
const seqDef* sp = sstart;
|
133
133
|
size_t matchLengthSum = 0;
|
134
134
|
size_t litLengthSum = 0;
|
135
|
+
(void)(litLengthSum); /* suppress unused variable warning on some environments */
|
135
136
|
while (send-sp > 0) {
|
136
137
|
ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp);
|
137
138
|
litLengthSum += seqLen.litLength;
|
@@ -324,7 +325,7 @@ static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t lit
|
|
324
325
|
static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type,
|
325
326
|
const BYTE* codeTable, unsigned maxCode,
|
326
327
|
size_t nbSeq, const FSE_CTable* fseCTable,
|
327
|
-
const
|
328
|
+
const U8* additionalBits,
|
328
329
|
short const* defaultNorm, U32 defaultNormLog, U32 defaultMax,
|
329
330
|
void* workspace, size_t wkspSize)
|
330
331
|
{
|
@@ -474,7 +475,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
|
|
474
475
|
/* I think there is an optimization opportunity here.
|
475
476
|
* Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful
|
476
477
|
* since it recalculates estimate from scratch.
|
477
|
-
* For example, it would recount literal distribution and symbol codes
|
478
|
+
* For example, it would recount literal distribution and symbol codes every time.
|
478
479
|
*/
|
479
480
|
cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount,
|
480
481
|
&nextCBlock->entropy, entropyMetadata,
|
@@ -538,7 +539,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
|
|
538
539
|
repcodes_t rep;
|
539
540
|
ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep));
|
540
541
|
for (seq = sstart; seq < sp; ++seq) {
|
541
|
-
|
542
|
+
ZSTD_updateRep(rep.rep, seq->offBase - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
|
542
543
|
}
|
543
544
|
ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep));
|
544
545
|
}
|