zstd-ruby 1.3.8.0 → 1.4.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +6 -5
- data/README.md +1 -1
- data/ext/zstdruby/libzstd/Makefile +133 -61
- data/ext/zstdruby/libzstd/README.md +51 -18
- data/ext/zstdruby/libzstd/common/bitstream.h +38 -39
- data/ext/zstdruby/libzstd/common/compiler.h +41 -6
- data/ext/zstdruby/libzstd/common/cpu.h +1 -1
- data/ext/zstdruby/libzstd/common/debug.c +11 -31
- data/ext/zstdruby/libzstd/common/debug.h +11 -31
- data/ext/zstdruby/libzstd/common/entropy_common.c +13 -33
- data/ext/zstdruby/libzstd/common/error_private.c +2 -1
- data/ext/zstdruby/libzstd/common/error_private.h +6 -2
- data/ext/zstdruby/libzstd/common/fse.h +13 -33
- data/ext/zstdruby/libzstd/common/fse_decompress.c +12 -35
- data/ext/zstdruby/libzstd/common/huf.h +15 -33
- data/ext/zstdruby/libzstd/common/mem.h +75 -2
- data/ext/zstdruby/libzstd/common/pool.c +8 -4
- data/ext/zstdruby/libzstd/common/pool.h +2 -2
- data/ext/zstdruby/libzstd/common/threading.c +52 -6
- data/ext/zstdruby/libzstd/common/threading.h +36 -4
- data/ext/zstdruby/libzstd/common/xxhash.c +25 -37
- data/ext/zstdruby/libzstd/common/xxhash.h +11 -31
- data/ext/zstdruby/libzstd/common/zstd_common.c +1 -1
- data/ext/zstdruby/libzstd/common/zstd_errors.h +2 -1
- data/ext/zstdruby/libzstd/common/zstd_internal.h +203 -22
- data/ext/zstdruby/libzstd/compress/fse_compress.c +19 -42
- data/ext/zstdruby/libzstd/compress/hist.c +15 -35
- data/ext/zstdruby/libzstd/compress/hist.h +12 -32
- data/ext/zstdruby/libzstd/compress/huf_compress.c +92 -92
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +1460 -1472
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +330 -65
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +158 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +29 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +419 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +54 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +845 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +525 -0
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +65 -43
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +264 -159
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +74 -42
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +2 -2
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +33 -11
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +7 -2
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +108 -125
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +129 -93
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +46 -28
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +76 -60
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +14 -10
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +2 -2
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +471 -258
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +471 -346
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +3 -3
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +25 -4
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +9 -8
- data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -1
- data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +220 -65
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +81 -7
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +85 -56
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +43 -19
- data/ext/zstdruby/libzstd/dictBuilder/zdict.h +73 -35
- data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
- data/ext/zstdruby/libzstd/dll/example/build_package.bat +3 -2
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +49 -15
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +142 -117
- data/ext/zstdruby/libzstd/legacy/zstd_v01.h +13 -8
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +54 -25
- data/ext/zstdruby/libzstd/legacy/zstd_v02.h +13 -8
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +55 -25
- data/ext/zstdruby/libzstd/legacy/zstd_v03.h +13 -8
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +62 -29
- data/ext/zstdruby/libzstd/legacy/zstd_v04.h +13 -8
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +145 -109
- data/ext/zstdruby/libzstd/legacy/zstd_v05.h +14 -9
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +56 -26
- data/ext/zstdruby/libzstd/legacy/zstd_v06.h +11 -6
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +65 -28
- data/ext/zstdruby/libzstd/legacy/zstd_v07.h +11 -6
- data/ext/zstdruby/libzstd/libzstd.pc.in +3 -2
- data/ext/zstdruby/libzstd/zstd.h +921 -597
- data/lib/zstd-ruby/version.rb +1 -1
- data/zstd-ruby.gemspec +2 -2
- metadata +19 -14
- data/ext/zstdruby/libzstd/dll/libzstd.def +0 -87
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c) 2016-
|
|
2
|
+
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
extern "C" {
|
|
16
16
|
#endif
|
|
17
17
|
|
|
18
|
-
#include "mem.h" /* U32 */
|
|
18
|
+
#include "../common/mem.h" /* U32 */
|
|
19
19
|
#include "zstd_compress_internal.h"
|
|
20
20
|
|
|
21
21
|
void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c) 2016-
|
|
2
|
+
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -83,7 +83,10 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
|
|
|
83
83
|
U32* largerPtr = smallerPtr + 1;
|
|
84
84
|
U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
|
|
85
85
|
U32 dummy32; /* to be nullified at the end */
|
|
86
|
-
U32 const
|
|
86
|
+
U32 const windowValid = ms->window.lowLimit;
|
|
87
|
+
U32 const maxDistance = 1U << cParams->windowLog;
|
|
88
|
+
U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid;
|
|
89
|
+
|
|
87
90
|
|
|
88
91
|
DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
|
|
89
92
|
current, dictLimit, windowLow);
|
|
@@ -239,7 +242,7 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
|
|
|
239
242
|
|
|
240
243
|
const BYTE* const base = ms->window.base;
|
|
241
244
|
U32 const current = (U32)(ip-base);
|
|
242
|
-
U32 const windowLow = ms->
|
|
245
|
+
U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog);
|
|
243
246
|
|
|
244
247
|
U32* const bt = ms->chainTable;
|
|
245
248
|
U32 const btLog = cParams->chainLog - 1;
|
|
@@ -490,8 +493,12 @@ size_t ZSTD_HcFindBestMatch_generic (
|
|
|
490
493
|
const U32 dictLimit = ms->window.dictLimit;
|
|
491
494
|
const BYTE* const prefixStart = base + dictLimit;
|
|
492
495
|
const BYTE* const dictEnd = dictBase + dictLimit;
|
|
493
|
-
const U32 lowLimit = ms->window.lowLimit;
|
|
494
496
|
const U32 current = (U32)(ip-base);
|
|
497
|
+
const U32 maxDistance = 1U << cParams->windowLog;
|
|
498
|
+
const U32 lowestValid = ms->window.lowLimit;
|
|
499
|
+
const U32 withinMaxDistance = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
|
|
500
|
+
const U32 isDictionary = (ms->loadedDictEnd != 0);
|
|
501
|
+
const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
|
|
495
502
|
const U32 minChain = current > chainSize ? current - chainSize : 0;
|
|
496
503
|
U32 nbAttempts = 1U << cParams->searchLog;
|
|
497
504
|
size_t ml=4-1;
|
|
@@ -612,12 +619,14 @@ FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
|
|
|
612
619
|
/* *******************************
|
|
613
620
|
* Common parser - lazy strategy
|
|
614
621
|
*********************************/
|
|
615
|
-
|
|
616
|
-
|
|
622
|
+
typedef enum { search_hashChain, search_binaryTree } searchMethod_e;
|
|
623
|
+
|
|
624
|
+
FORCE_INLINE_TEMPLATE size_t
|
|
625
|
+
ZSTD_compressBlock_lazy_generic(
|
|
617
626
|
ZSTD_matchState_t* ms, seqStore_t* seqStore,
|
|
618
627
|
U32 rep[ZSTD_REP_NUM],
|
|
619
628
|
const void* src, size_t srcSize,
|
|
620
|
-
const
|
|
629
|
+
const searchMethod_e searchMethod, const U32 depth,
|
|
621
630
|
ZSTD_dictMode_e const dictMode)
|
|
622
631
|
{
|
|
623
632
|
const BYTE* const istart = (const BYTE*)src;
|
|
@@ -633,8 +642,10 @@ size_t ZSTD_compressBlock_lazy_generic(
|
|
|
633
642
|
ZSTD_matchState_t* ms,
|
|
634
643
|
const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
|
|
635
644
|
searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ?
|
|
636
|
-
(searchMethod ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS
|
|
637
|
-
|
|
645
|
+
(searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS
|
|
646
|
+
: ZSTD_HcFindBestMatch_dictMatchState_selectMLS) :
|
|
647
|
+
(searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_selectMLS
|
|
648
|
+
: ZSTD_HcFindBestMatch_selectMLS);
|
|
638
649
|
U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
|
|
639
650
|
|
|
640
651
|
const ZSTD_matchState_t* const dms = ms->dictMatchState;
|
|
@@ -649,13 +660,16 @@ size_t ZSTD_compressBlock_lazy_generic(
|
|
|
649
660
|
const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
|
|
650
661
|
prefixLowestIndex - (U32)(dictEnd - dictBase) :
|
|
651
662
|
0;
|
|
652
|
-
const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictLowest);
|
|
663
|
+
const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest));
|
|
664
|
+
|
|
665
|
+
DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode);
|
|
653
666
|
|
|
654
667
|
/* init */
|
|
655
668
|
ip += (dictAndPrefixLength == 0);
|
|
656
|
-
ms->nextToUpdate3 = ms->nextToUpdate;
|
|
657
669
|
if (dictMode == ZSTD_noDict) {
|
|
658
|
-
U32 const
|
|
670
|
+
U32 const current = (U32)(ip - base);
|
|
671
|
+
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, ms->cParams.windowLog);
|
|
672
|
+
U32 const maxRep = current - windowLow;
|
|
659
673
|
if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
|
|
660
674
|
if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
|
|
661
675
|
}
|
|
@@ -667,6 +681,12 @@ size_t ZSTD_compressBlock_lazy_generic(
|
|
|
667
681
|
}
|
|
668
682
|
|
|
669
683
|
/* Match Loop */
|
|
684
|
+
#if defined(__GNUC__) && defined(__x86_64__)
|
|
685
|
+
/* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
|
|
686
|
+
* code alignment is perturbed. To fix the instability align the loop on 32-bytes.
|
|
687
|
+
*/
|
|
688
|
+
__asm__(".p2align 5");
|
|
689
|
+
#endif
|
|
670
690
|
while (ip < ilimit) {
|
|
671
691
|
size_t matchLength=0;
|
|
672
692
|
size_t offset=0;
|
|
@@ -800,7 +820,7 @@ size_t ZSTD_compressBlock_lazy_generic(
|
|
|
800
820
|
/* store sequence */
|
|
801
821
|
_storeSequence:
|
|
802
822
|
{ size_t const litLength = start - anchor;
|
|
803
|
-
ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH);
|
|
823
|
+
ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
|
|
804
824
|
anchor = ip = start + matchLength;
|
|
805
825
|
}
|
|
806
826
|
|
|
@@ -818,7 +838,7 @@ _storeSequence:
|
|
|
818
838
|
const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
|
|
819
839
|
matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
|
|
820
840
|
offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */
|
|
821
|
-
ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
|
|
841
|
+
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
|
|
822
842
|
ip += matchLength;
|
|
823
843
|
anchor = ip;
|
|
824
844
|
continue;
|
|
@@ -833,7 +853,7 @@ _storeSequence:
|
|
|
833
853
|
/* store sequence */
|
|
834
854
|
matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
|
|
835
855
|
offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
|
|
836
|
-
ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
|
|
856
|
+
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
|
|
837
857
|
ip += matchLength;
|
|
838
858
|
anchor = ip;
|
|
839
859
|
continue; /* faster when present ... (?) */
|
|
@@ -844,7 +864,7 @@ _storeSequence:
|
|
|
844
864
|
rep[1] = offset_2 ? offset_2 : savedOffset;
|
|
845
865
|
|
|
846
866
|
/* Return the last literals size */
|
|
847
|
-
return iend - anchor;
|
|
867
|
+
return (size_t)(iend - anchor);
|
|
848
868
|
}
|
|
849
869
|
|
|
850
870
|
|
|
@@ -852,56 +872,56 @@ size_t ZSTD_compressBlock_btlazy2(
|
|
|
852
872
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
853
873
|
void const* src, size_t srcSize)
|
|
854
874
|
{
|
|
855
|
-
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize,
|
|
875
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
|
|
856
876
|
}
|
|
857
877
|
|
|
858
878
|
size_t ZSTD_compressBlock_lazy2(
|
|
859
879
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
860
880
|
void const* src, size_t srcSize)
|
|
861
881
|
{
|
|
862
|
-
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize,
|
|
882
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
|
|
863
883
|
}
|
|
864
884
|
|
|
865
885
|
size_t ZSTD_compressBlock_lazy(
|
|
866
886
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
867
887
|
void const* src, size_t srcSize)
|
|
868
888
|
{
|
|
869
|
-
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize,
|
|
889
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
|
|
870
890
|
}
|
|
871
891
|
|
|
872
892
|
size_t ZSTD_compressBlock_greedy(
|
|
873
893
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
874
894
|
void const* src, size_t srcSize)
|
|
875
895
|
{
|
|
876
|
-
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize,
|
|
896
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
|
|
877
897
|
}
|
|
878
898
|
|
|
879
899
|
size_t ZSTD_compressBlock_btlazy2_dictMatchState(
|
|
880
900
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
881
901
|
void const* src, size_t srcSize)
|
|
882
902
|
{
|
|
883
|
-
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize,
|
|
903
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
|
|
884
904
|
}
|
|
885
905
|
|
|
886
906
|
size_t ZSTD_compressBlock_lazy2_dictMatchState(
|
|
887
907
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
888
908
|
void const* src, size_t srcSize)
|
|
889
909
|
{
|
|
890
|
-
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize,
|
|
910
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
|
|
891
911
|
}
|
|
892
912
|
|
|
893
913
|
size_t ZSTD_compressBlock_lazy_dictMatchState(
|
|
894
914
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
895
915
|
void const* src, size_t srcSize)
|
|
896
916
|
{
|
|
897
|
-
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize,
|
|
917
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
|
|
898
918
|
}
|
|
899
919
|
|
|
900
920
|
size_t ZSTD_compressBlock_greedy_dictMatchState(
|
|
901
921
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
902
922
|
void const* src, size_t srcSize)
|
|
903
923
|
{
|
|
904
|
-
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize,
|
|
924
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
|
|
905
925
|
}
|
|
906
926
|
|
|
907
927
|
|
|
@@ -910,7 +930,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
|
|
|
910
930
|
ZSTD_matchState_t* ms, seqStore_t* seqStore,
|
|
911
931
|
U32 rep[ZSTD_REP_NUM],
|
|
912
932
|
const void* src, size_t srcSize,
|
|
913
|
-
const
|
|
933
|
+
const searchMethod_e searchMethod, const U32 depth)
|
|
914
934
|
{
|
|
915
935
|
const BYTE* const istart = (const BYTE*)src;
|
|
916
936
|
const BYTE* ip = istart;
|
|
@@ -919,24 +939,31 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
|
|
|
919
939
|
const BYTE* const ilimit = iend - 8;
|
|
920
940
|
const BYTE* const base = ms->window.base;
|
|
921
941
|
const U32 dictLimit = ms->window.dictLimit;
|
|
922
|
-
const U32 lowestIndex = ms->window.lowLimit;
|
|
923
942
|
const BYTE* const prefixStart = base + dictLimit;
|
|
924
943
|
const BYTE* const dictBase = ms->window.dictBase;
|
|
925
944
|
const BYTE* const dictEnd = dictBase + dictLimit;
|
|
926
|
-
const BYTE* const dictStart = dictBase +
|
|
945
|
+
const BYTE* const dictStart = dictBase + ms->window.lowLimit;
|
|
946
|
+
const U32 windowLog = ms->cParams.windowLog;
|
|
927
947
|
|
|
928
948
|
typedef size_t (*searchMax_f)(
|
|
929
949
|
ZSTD_matchState_t* ms,
|
|
930
950
|
const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
|
|
931
|
-
searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
|
|
951
|
+
searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
|
|
932
952
|
|
|
933
953
|
U32 offset_1 = rep[0], offset_2 = rep[1];
|
|
934
954
|
|
|
955
|
+
DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic");
|
|
956
|
+
|
|
935
957
|
/* init */
|
|
936
|
-
ms->nextToUpdate3 = ms->nextToUpdate;
|
|
937
958
|
ip += (ip == prefixStart);
|
|
938
959
|
|
|
939
960
|
/* Match Loop */
|
|
961
|
+
#if defined(__GNUC__) && defined(__x86_64__)
|
|
962
|
+
/* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
|
|
963
|
+
* code alignment is perturbed. To fix the instability align the loop on 32-bytes.
|
|
964
|
+
*/
|
|
965
|
+
__asm__(".p2align 5");
|
|
966
|
+
#endif
|
|
940
967
|
while (ip < ilimit) {
|
|
941
968
|
size_t matchLength=0;
|
|
942
969
|
size_t offset=0;
|
|
@@ -944,10 +971,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
|
|
|
944
971
|
U32 current = (U32)(ip-base);
|
|
945
972
|
|
|
946
973
|
/* check repCode */
|
|
947
|
-
{ const U32
|
|
974
|
+
{ const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current+1, windowLog);
|
|
975
|
+
const U32 repIndex = (U32)(current+1 - offset_1);
|
|
948
976
|
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
|
|
949
977
|
const BYTE* const repMatch = repBase + repIndex;
|
|
950
|
-
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex >
|
|
978
|
+
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
|
|
951
979
|
if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
|
|
952
980
|
/* repcode detected we should take it */
|
|
953
981
|
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
|
|
@@ -974,10 +1002,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
|
|
|
974
1002
|
current++;
|
|
975
1003
|
/* check repCode */
|
|
976
1004
|
if (offset) {
|
|
1005
|
+
const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog);
|
|
977
1006
|
const U32 repIndex = (U32)(current - offset_1);
|
|
978
1007
|
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
|
|
979
1008
|
const BYTE* const repMatch = repBase + repIndex;
|
|
980
|
-
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex >
|
|
1009
|
+
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
|
|
981
1010
|
if (MEM_read32(ip) == MEM_read32(repMatch)) {
|
|
982
1011
|
/* repcode detected */
|
|
983
1012
|
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
|
|
@@ -1004,10 +1033,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
|
|
|
1004
1033
|
current++;
|
|
1005
1034
|
/* check repCode */
|
|
1006
1035
|
if (offset) {
|
|
1036
|
+
const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog);
|
|
1007
1037
|
const U32 repIndex = (U32)(current - offset_1);
|
|
1008
1038
|
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
|
|
1009
1039
|
const BYTE* const repMatch = repBase + repIndex;
|
|
1010
|
-
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex >
|
|
1040
|
+
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
|
|
1011
1041
|
if (MEM_read32(ip) == MEM_read32(repMatch)) {
|
|
1012
1042
|
/* repcode detected */
|
|
1013
1043
|
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
|
|
@@ -1042,22 +1072,24 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
|
|
|
1042
1072
|
/* store sequence */
|
|
1043
1073
|
_storeSequence:
|
|
1044
1074
|
{ size_t const litLength = start - anchor;
|
|
1045
|
-
ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH);
|
|
1075
|
+
ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
|
|
1046
1076
|
anchor = ip = start + matchLength;
|
|
1047
1077
|
}
|
|
1048
1078
|
|
|
1049
1079
|
/* check immediate repcode */
|
|
1050
1080
|
while (ip <= ilimit) {
|
|
1051
|
-
const U32
|
|
1081
|
+
const U32 repCurrent = (U32)(ip-base);
|
|
1082
|
+
const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog);
|
|
1083
|
+
const U32 repIndex = repCurrent - offset_2;
|
|
1052
1084
|
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
|
|
1053
1085
|
const BYTE* const repMatch = repBase + repIndex;
|
|
1054
|
-
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex >
|
|
1086
|
+
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
|
|
1055
1087
|
if (MEM_read32(ip) == MEM_read32(repMatch)) {
|
|
1056
1088
|
/* repcode detected we should take it */
|
|
1057
1089
|
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
|
|
1058
1090
|
matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
|
|
1059
1091
|
offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */
|
|
1060
|
-
ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
|
|
1092
|
+
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
|
|
1061
1093
|
ip += matchLength;
|
|
1062
1094
|
anchor = ip;
|
|
1063
1095
|
continue; /* faster when present ... (?) */
|
|
@@ -1070,7 +1102,7 @@ _storeSequence:
|
|
|
1070
1102
|
rep[1] = offset_2;
|
|
1071
1103
|
|
|
1072
1104
|
/* Return the last literals size */
|
|
1073
|
-
return iend - anchor;
|
|
1105
|
+
return (size_t)(iend - anchor);
|
|
1074
1106
|
}
|
|
1075
1107
|
|
|
1076
1108
|
|
|
@@ -1078,7 +1110,7 @@ size_t ZSTD_compressBlock_greedy_extDict(
|
|
|
1078
1110
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
1079
1111
|
void const* src, size_t srcSize)
|
|
1080
1112
|
{
|
|
1081
|
-
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize,
|
|
1113
|
+
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0);
|
|
1082
1114
|
}
|
|
1083
1115
|
|
|
1084
1116
|
size_t ZSTD_compressBlock_lazy_extDict(
|
|
@@ -1086,7 +1118,7 @@ size_t ZSTD_compressBlock_lazy_extDict(
|
|
|
1086
1118
|
void const* src, size_t srcSize)
|
|
1087
1119
|
|
|
1088
1120
|
{
|
|
1089
|
-
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize,
|
|
1121
|
+
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
|
|
1090
1122
|
}
|
|
1091
1123
|
|
|
1092
1124
|
size_t ZSTD_compressBlock_lazy2_extDict(
|
|
@@ -1094,7 +1126,7 @@ size_t ZSTD_compressBlock_lazy2_extDict(
|
|
|
1094
1126
|
void const* src, size_t srcSize)
|
|
1095
1127
|
|
|
1096
1128
|
{
|
|
1097
|
-
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize,
|
|
1129
|
+
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
|
|
1098
1130
|
}
|
|
1099
1131
|
|
|
1100
1132
|
size_t ZSTD_compressBlock_btlazy2_extDict(
|
|
@@ -1102,5 +1134,5 @@ size_t ZSTD_compressBlock_btlazy2_extDict(
|
|
|
1102
1134
|
void const* src, size_t srcSize)
|
|
1103
1135
|
|
|
1104
1136
|
{
|
|
1105
|
-
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize,
|
|
1137
|
+
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
|
|
1106
1138
|
}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c) 2016-
|
|
2
|
+
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -19,7 +19,7 @@ extern "C" {
|
|
|
19
19
|
|
|
20
20
|
U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
|
|
21
21
|
|
|
22
|
-
void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex().
|
|
22
|
+
void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */
|
|
23
23
|
|
|
24
24
|
size_t ZSTD_compressBlock_btlazy2(
|
|
25
25
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
@@ -1,15 +1,16 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c) 2016-
|
|
2
|
+
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
6
6
|
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
|
7
7
|
* in the COPYING file in the root directory of this source tree).
|
|
8
|
+
* You may select, at your option, one of the above-listed licenses.
|
|
8
9
|
*/
|
|
9
10
|
|
|
10
11
|
#include "zstd_ldm.h"
|
|
11
12
|
|
|
12
|
-
#include "debug.h"
|
|
13
|
+
#include "../common/debug.h"
|
|
13
14
|
#include "zstd_fast.h" /* ZSTD_fillHashTable() */
|
|
14
15
|
#include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */
|
|
15
16
|
|
|
@@ -49,9 +50,9 @@ size_t ZSTD_ldm_getTableSize(ldmParams_t params)
|
|
|
49
50
|
{
|
|
50
51
|
size_t const ldmHSize = ((size_t)1) << params.hashLog;
|
|
51
52
|
size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog);
|
|
52
|
-
size_t const ldmBucketSize =
|
|
53
|
-
|
|
54
|
-
|
|
53
|
+
size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
|
|
54
|
+
size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize)
|
|
55
|
+
+ ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t));
|
|
55
56
|
return params.enableLdm ? totalSize : 0;
|
|
56
57
|
}
|
|
57
58
|
|
|
@@ -223,6 +224,20 @@ static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
|
|
|
223
224
|
return rollingHash;
|
|
224
225
|
}
|
|
225
226
|
|
|
227
|
+
void ZSTD_ldm_fillHashTable(
|
|
228
|
+
ldmState_t* state, const BYTE* ip,
|
|
229
|
+
const BYTE* iend, ldmParams_t const* params)
|
|
230
|
+
{
|
|
231
|
+
DEBUGLOG(5, "ZSTD_ldm_fillHashTable");
|
|
232
|
+
if ((size_t)(iend - ip) >= params->minMatchLength) {
|
|
233
|
+
U64 startingHash = ZSTD_rollingHash_compute(ip, params->minMatchLength);
|
|
234
|
+
ZSTD_ldm_fillLdmHashTable(
|
|
235
|
+
state, startingHash, ip, iend - params->minMatchLength, state->window.base,
|
|
236
|
+
params->hashLog - params->bucketSizeLog,
|
|
237
|
+
*params);
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
|
|
226
241
|
|
|
227
242
|
/** ZSTD_ldm_limitTableUpdate() :
|
|
228
243
|
*
|
|
@@ -429,7 +444,7 @@ size_t ZSTD_ldm_generateSequences(
|
|
|
429
444
|
*/
|
|
430
445
|
assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize);
|
|
431
446
|
/* The input could be very large (in zstdmt), so it must be broken up into
|
|
432
|
-
* chunks to enforce the
|
|
447
|
+
* chunks to enforce the maximum distance and handle overflow correction.
|
|
433
448
|
*/
|
|
434
449
|
assert(sequences->pos <= sequences->size);
|
|
435
450
|
assert(sequences->size <= sequences->capacity);
|
|
@@ -447,8 +462,10 @@ size_t ZSTD_ldm_generateSequences(
|
|
|
447
462
|
if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
|
|
448
463
|
U32 const ldmHSize = 1U << params->hashLog;
|
|
449
464
|
U32 const correction = ZSTD_window_correctOverflow(
|
|
450
|
-
&ldmState->window, /* cycleLog */ 0, maxDist,
|
|
465
|
+
&ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
|
|
451
466
|
ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
|
|
467
|
+
/* invalidate dictionaries on overflow correction */
|
|
468
|
+
ldmState->loadedDictEnd = 0;
|
|
452
469
|
}
|
|
453
470
|
/* 2. We enforce the maximum offset allowed.
|
|
454
471
|
*
|
|
@@ -457,8 +474,14 @@ size_t ZSTD_ldm_generateSequences(
|
|
|
457
474
|
* TODO: * Test the chunk size.
|
|
458
475
|
* * Try invalidation after the sequence generation and test the
|
|
459
476
|
* the offset against maxDist directly.
|
|
477
|
+
*
|
|
478
|
+
* NOTE: Because of dictionaries + sequence splitting we MUST make sure
|
|
479
|
+
* that any offset used is valid at the END of the sequence, since it may
|
|
480
|
+
* be split into two sequences. This condition holds when using
|
|
481
|
+
* ZSTD_window_enforceMaxDist(), but if we move to checking offsets
|
|
482
|
+
* against maxDist directly, we'll have to carefully handle that case.
|
|
460
483
|
*/
|
|
461
|
-
ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist,
|
|
484
|
+
ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL);
|
|
462
485
|
/* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
|
|
463
486
|
newLeftoverSize = ZSTD_ldm_generateSequences_internal(
|
|
464
487
|
ldmState, sequences, params, chunkStart, chunkSize);
|
|
@@ -566,14 +589,13 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
|
|
|
566
589
|
if (sequence.offset == 0)
|
|
567
590
|
break;
|
|
568
591
|
|
|
569
|
-
assert(sequence.offset <= (1U << cParams->windowLog));
|
|
570
592
|
assert(ip + sequence.litLength + sequence.matchLength <= iend);
|
|
571
593
|
|
|
572
594
|
/* Fill tables for block compressor */
|
|
573
595
|
ZSTD_ldm_limitTableUpdate(ms, ip);
|
|
574
596
|
ZSTD_ldm_fillFastTables(ms, ip);
|
|
575
597
|
/* Run the block compressor */
|
|
576
|
-
DEBUGLOG(5, "calling block compressor on segment of size %u", sequence.litLength);
|
|
598
|
+
DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength);
|
|
577
599
|
{
|
|
578
600
|
size_t const newLitLength =
|
|
579
601
|
blockCompressor(ms, seqStore, rep, ip, sequence.litLength);
|
|
@@ -583,7 +605,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
|
|
|
583
605
|
rep[i] = rep[i-1];
|
|
584
606
|
rep[0] = sequence.offset;
|
|
585
607
|
/* Store the sequence */
|
|
586
|
-
ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength,
|
|
608
|
+
ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
|
|
587
609
|
sequence.offset + ZSTD_REP_MOVE,
|
|
588
610
|
sequence.matchLength - MINMATCH);
|
|
589
611
|
ip += sequence.matchLength;
|