extzstd 0.3 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/HISTORY.ja.md +8 -0
- data/README.md +1 -1
- data/contrib/zstd/CHANGELOG +94 -0
- data/contrib/zstd/CONTRIBUTING.md +351 -1
- data/contrib/zstd/Makefile +32 -10
- data/contrib/zstd/README.md +33 -10
- data/contrib/zstd/TESTING.md +2 -2
- data/contrib/zstd/appveyor.yml +42 -4
- data/contrib/zstd/lib/Makefile +128 -60
- data/contrib/zstd/lib/README.md +47 -16
- data/contrib/zstd/lib/common/bitstream.h +38 -39
- data/contrib/zstd/lib/common/compiler.h +40 -5
- data/contrib/zstd/lib/common/cpu.h +1 -1
- data/contrib/zstd/lib/common/debug.c +11 -31
- data/contrib/zstd/lib/common/debug.h +11 -31
- data/contrib/zstd/lib/common/entropy_common.c +13 -33
- data/contrib/zstd/lib/common/error_private.c +2 -1
- data/contrib/zstd/lib/common/error_private.h +6 -2
- data/contrib/zstd/lib/common/fse.h +12 -32
- data/contrib/zstd/lib/common/fse_decompress.c +12 -35
- data/contrib/zstd/lib/common/huf.h +15 -33
- data/contrib/zstd/lib/common/mem.h +75 -2
- data/contrib/zstd/lib/common/pool.c +8 -4
- data/contrib/zstd/lib/common/pool.h +2 -2
- data/contrib/zstd/lib/common/threading.c +50 -4
- data/contrib/zstd/lib/common/threading.h +36 -4
- data/contrib/zstd/lib/common/xxhash.c +23 -35
- data/contrib/zstd/lib/common/xxhash.h +11 -31
- data/contrib/zstd/lib/common/zstd_common.c +1 -1
- data/contrib/zstd/lib/common/zstd_errors.h +2 -1
- data/contrib/zstd/lib/common/zstd_internal.h +154 -26
- data/contrib/zstd/lib/compress/fse_compress.c +17 -40
- data/contrib/zstd/lib/compress/hist.c +15 -35
- data/contrib/zstd/lib/compress/hist.h +12 -32
- data/contrib/zstd/lib/compress/huf_compress.c +92 -92
- data/contrib/zstd/lib/compress/zstd_compress.c +1191 -1330
- data/contrib/zstd/lib/compress/zstd_compress_internal.h +317 -55
- data/contrib/zstd/lib/compress/zstd_compress_literals.c +158 -0
- data/contrib/zstd/lib/compress/zstd_compress_literals.h +29 -0
- data/contrib/zstd/lib/compress/zstd_compress_sequences.c +419 -0
- data/contrib/zstd/lib/compress/zstd_compress_sequences.h +54 -0
- data/contrib/zstd/lib/compress/zstd_compress_superblock.c +845 -0
- data/contrib/zstd/lib/compress/zstd_compress_superblock.h +32 -0
- data/contrib/zstd/lib/compress/zstd_cwksp.h +525 -0
- data/contrib/zstd/lib/compress/zstd_double_fast.c +65 -43
- data/contrib/zstd/lib/compress/zstd_double_fast.h +2 -2
- data/contrib/zstd/lib/compress/zstd_fast.c +92 -66
- data/contrib/zstd/lib/compress/zstd_fast.h +2 -2
- data/contrib/zstd/lib/compress/zstd_lazy.c +74 -42
- data/contrib/zstd/lib/compress/zstd_lazy.h +1 -1
- data/contrib/zstd/lib/compress/zstd_ldm.c +32 -10
- data/contrib/zstd/lib/compress/zstd_ldm.h +7 -2
- data/contrib/zstd/lib/compress/zstd_opt.c +81 -114
- data/contrib/zstd/lib/compress/zstd_opt.h +1 -1
- data/contrib/zstd/lib/compress/zstdmt_compress.c +95 -51
- data/contrib/zstd/lib/compress/zstdmt_compress.h +3 -2
- data/contrib/zstd/lib/decompress/huf_decompress.c +76 -60
- data/contrib/zstd/lib/decompress/zstd_ddict.c +12 -8
- data/contrib/zstd/lib/decompress/zstd_ddict.h +2 -2
- data/contrib/zstd/lib/decompress/zstd_decompress.c +292 -172
- data/contrib/zstd/lib/decompress/zstd_decompress_block.c +459 -338
- data/contrib/zstd/lib/decompress/zstd_decompress_block.h +3 -3
- data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +18 -4
- data/contrib/zstd/lib/deprecated/zbuff.h +9 -8
- data/contrib/zstd/lib/deprecated/zbuff_common.c +2 -2
- data/contrib/zstd/lib/deprecated/zbuff_compress.c +1 -1
- data/contrib/zstd/lib/deprecated/zbuff_decompress.c +1 -1
- data/contrib/zstd/lib/dictBuilder/cover.c +164 -54
- data/contrib/zstd/lib/dictBuilder/cover.h +52 -7
- data/contrib/zstd/lib/dictBuilder/fastcover.c +60 -43
- data/contrib/zstd/lib/dictBuilder/zdict.c +43 -19
- data/contrib/zstd/lib/dictBuilder/zdict.h +56 -28
- data/contrib/zstd/lib/legacy/zstd_legacy.h +8 -4
- data/contrib/zstd/lib/legacy/zstd_v01.c +110 -110
- data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v02.c +23 -13
- data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v03.c +23 -13
- data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v04.c +30 -17
- data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v05.c +113 -102
- data/contrib/zstd/lib/legacy/zstd_v05.h +2 -2
- data/contrib/zstd/lib/legacy/zstd_v06.c +20 -18
- data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v07.c +25 -19
- data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
- data/contrib/zstd/lib/libzstd.pc.in +3 -2
- data/contrib/zstd/lib/zstd.h +265 -88
- data/ext/extzstd.h +1 -1
- data/ext/libzstd_conf.h +8 -0
- data/ext/zstd_common.c +1 -3
- data/ext/zstd_compress.c +3 -3
- data/ext/zstd_decompress.c +1 -5
- data/ext/zstd_dictbuilder.c +2 -3
- data/ext/zstd_dictbuilder_fastcover.c +1 -3
- data/ext/zstd_legacy_v01.c +2 -0
- data/ext/zstd_legacy_v02.c +2 -0
- data/ext/zstd_legacy_v03.c +2 -0
- data/ext/zstd_legacy_v04.c +2 -0
- data/ext/zstd_legacy_v05.c +2 -0
- data/ext/zstd_legacy_v06.c +2 -0
- data/ext/zstd_legacy_v07.c +2 -0
- data/lib/extzstd.rb +18 -10
- data/lib/extzstd/version.rb +1 -1
- metadata +15 -6
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c) 2016-
|
2
|
+
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -15,7 +15,7 @@
|
|
15
15
|
extern "C" {
|
16
16
|
#endif
|
17
17
|
|
18
|
-
#include "mem.h" /* U32 */
|
18
|
+
#include "../common/mem.h" /* U32 */
|
19
19
|
#include "zstd_compress_internal.h"
|
20
20
|
|
21
21
|
void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c) 2016-
|
2
|
+
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -83,7 +83,10 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
|
|
83
83
|
U32* largerPtr = smallerPtr + 1;
|
84
84
|
U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
|
85
85
|
U32 dummy32; /* to be nullified at the end */
|
86
|
-
U32 const
|
86
|
+
U32 const windowValid = ms->window.lowLimit;
|
87
|
+
U32 const maxDistance = 1U << cParams->windowLog;
|
88
|
+
U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid;
|
89
|
+
|
87
90
|
|
88
91
|
DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
|
89
92
|
current, dictLimit, windowLow);
|
@@ -239,7 +242,7 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
|
|
239
242
|
|
240
243
|
const BYTE* const base = ms->window.base;
|
241
244
|
U32 const current = (U32)(ip-base);
|
242
|
-
U32 const windowLow = ms->
|
245
|
+
U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog);
|
243
246
|
|
244
247
|
U32* const bt = ms->chainTable;
|
245
248
|
U32 const btLog = cParams->chainLog - 1;
|
@@ -490,8 +493,12 @@ size_t ZSTD_HcFindBestMatch_generic (
|
|
490
493
|
const U32 dictLimit = ms->window.dictLimit;
|
491
494
|
const BYTE* const prefixStart = base + dictLimit;
|
492
495
|
const BYTE* const dictEnd = dictBase + dictLimit;
|
493
|
-
const U32 lowLimit = ms->window.lowLimit;
|
494
496
|
const U32 current = (U32)(ip-base);
|
497
|
+
const U32 maxDistance = 1U << cParams->windowLog;
|
498
|
+
const U32 lowestValid = ms->window.lowLimit;
|
499
|
+
const U32 withinMaxDistance = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
|
500
|
+
const U32 isDictionary = (ms->loadedDictEnd != 0);
|
501
|
+
const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
|
495
502
|
const U32 minChain = current > chainSize ? current - chainSize : 0;
|
496
503
|
U32 nbAttempts = 1U << cParams->searchLog;
|
497
504
|
size_t ml=4-1;
|
@@ -612,12 +619,14 @@ FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
|
|
612
619
|
/* *******************************
|
613
620
|
* Common parser - lazy strategy
|
614
621
|
*********************************/
|
615
|
-
|
616
|
-
|
622
|
+
typedef enum { search_hashChain, search_binaryTree } searchMethod_e;
|
623
|
+
|
624
|
+
FORCE_INLINE_TEMPLATE size_t
|
625
|
+
ZSTD_compressBlock_lazy_generic(
|
617
626
|
ZSTD_matchState_t* ms, seqStore_t* seqStore,
|
618
627
|
U32 rep[ZSTD_REP_NUM],
|
619
628
|
const void* src, size_t srcSize,
|
620
|
-
const
|
629
|
+
const searchMethod_e searchMethod, const U32 depth,
|
621
630
|
ZSTD_dictMode_e const dictMode)
|
622
631
|
{
|
623
632
|
const BYTE* const istart = (const BYTE*)src;
|
@@ -633,8 +642,10 @@ size_t ZSTD_compressBlock_lazy_generic(
|
|
633
642
|
ZSTD_matchState_t* ms,
|
634
643
|
const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
|
635
644
|
searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ?
|
636
|
-
(searchMethod ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS
|
637
|
-
|
645
|
+
(searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS
|
646
|
+
: ZSTD_HcFindBestMatch_dictMatchState_selectMLS) :
|
647
|
+
(searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_selectMLS
|
648
|
+
: ZSTD_HcFindBestMatch_selectMLS);
|
638
649
|
U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
|
639
650
|
|
640
651
|
const ZSTD_matchState_t* const dms = ms->dictMatchState;
|
@@ -649,13 +660,16 @@ size_t ZSTD_compressBlock_lazy_generic(
|
|
649
660
|
const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
|
650
661
|
prefixLowestIndex - (U32)(dictEnd - dictBase) :
|
651
662
|
0;
|
652
|
-
const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictLowest);
|
663
|
+
const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest));
|
664
|
+
|
665
|
+
DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode);
|
653
666
|
|
654
667
|
/* init */
|
655
668
|
ip += (dictAndPrefixLength == 0);
|
656
|
-
ms->nextToUpdate3 = ms->nextToUpdate;
|
657
669
|
if (dictMode == ZSTD_noDict) {
|
658
|
-
U32 const
|
670
|
+
U32 const current = (U32)(ip - base);
|
671
|
+
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, ms->cParams.windowLog);
|
672
|
+
U32 const maxRep = current - windowLow;
|
659
673
|
if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
|
660
674
|
if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
|
661
675
|
}
|
@@ -667,6 +681,12 @@ size_t ZSTD_compressBlock_lazy_generic(
|
|
667
681
|
}
|
668
682
|
|
669
683
|
/* Match Loop */
|
684
|
+
#if defined(__GNUC__) && defined(__x86_64__)
|
685
|
+
/* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
|
686
|
+
* code alignment is perturbed. To fix the instability align the loop on 32-bytes.
|
687
|
+
*/
|
688
|
+
__asm__(".p2align 5");
|
689
|
+
#endif
|
670
690
|
while (ip < ilimit) {
|
671
691
|
size_t matchLength=0;
|
672
692
|
size_t offset=0;
|
@@ -800,7 +820,7 @@ size_t ZSTD_compressBlock_lazy_generic(
|
|
800
820
|
/* store sequence */
|
801
821
|
_storeSequence:
|
802
822
|
{ size_t const litLength = start - anchor;
|
803
|
-
ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH);
|
823
|
+
ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
|
804
824
|
anchor = ip = start + matchLength;
|
805
825
|
}
|
806
826
|
|
@@ -818,7 +838,7 @@ _storeSequence:
|
|
818
838
|
const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
|
819
839
|
matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
|
820
840
|
offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */
|
821
|
-
ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
|
841
|
+
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
|
822
842
|
ip += matchLength;
|
823
843
|
anchor = ip;
|
824
844
|
continue;
|
@@ -833,7 +853,7 @@ _storeSequence:
|
|
833
853
|
/* store sequence */
|
834
854
|
matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
|
835
855
|
offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
|
836
|
-
ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
|
856
|
+
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
|
837
857
|
ip += matchLength;
|
838
858
|
anchor = ip;
|
839
859
|
continue; /* faster when present ... (?) */
|
@@ -844,7 +864,7 @@ _storeSequence:
|
|
844
864
|
rep[1] = offset_2 ? offset_2 : savedOffset;
|
845
865
|
|
846
866
|
/* Return the last literals size */
|
847
|
-
return iend - anchor;
|
867
|
+
return (size_t)(iend - anchor);
|
848
868
|
}
|
849
869
|
|
850
870
|
|
@@ -852,56 +872,56 @@ size_t ZSTD_compressBlock_btlazy2(
|
|
852
872
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
853
873
|
void const* src, size_t srcSize)
|
854
874
|
{
|
855
|
-
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize,
|
875
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
|
856
876
|
}
|
857
877
|
|
858
878
|
size_t ZSTD_compressBlock_lazy2(
|
859
879
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
860
880
|
void const* src, size_t srcSize)
|
861
881
|
{
|
862
|
-
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize,
|
882
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
|
863
883
|
}
|
864
884
|
|
865
885
|
size_t ZSTD_compressBlock_lazy(
|
866
886
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
867
887
|
void const* src, size_t srcSize)
|
868
888
|
{
|
869
|
-
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize,
|
889
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
|
870
890
|
}
|
871
891
|
|
872
892
|
size_t ZSTD_compressBlock_greedy(
|
873
893
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
874
894
|
void const* src, size_t srcSize)
|
875
895
|
{
|
876
|
-
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize,
|
896
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
|
877
897
|
}
|
878
898
|
|
879
899
|
size_t ZSTD_compressBlock_btlazy2_dictMatchState(
|
880
900
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
881
901
|
void const* src, size_t srcSize)
|
882
902
|
{
|
883
|
-
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize,
|
903
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
|
884
904
|
}
|
885
905
|
|
886
906
|
size_t ZSTD_compressBlock_lazy2_dictMatchState(
|
887
907
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
888
908
|
void const* src, size_t srcSize)
|
889
909
|
{
|
890
|
-
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize,
|
910
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
|
891
911
|
}
|
892
912
|
|
893
913
|
size_t ZSTD_compressBlock_lazy_dictMatchState(
|
894
914
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
895
915
|
void const* src, size_t srcSize)
|
896
916
|
{
|
897
|
-
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize,
|
917
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
|
898
918
|
}
|
899
919
|
|
900
920
|
size_t ZSTD_compressBlock_greedy_dictMatchState(
|
901
921
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
902
922
|
void const* src, size_t srcSize)
|
903
923
|
{
|
904
|
-
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize,
|
924
|
+
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
|
905
925
|
}
|
906
926
|
|
907
927
|
|
@@ -910,7 +930,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
|
|
910
930
|
ZSTD_matchState_t* ms, seqStore_t* seqStore,
|
911
931
|
U32 rep[ZSTD_REP_NUM],
|
912
932
|
const void* src, size_t srcSize,
|
913
|
-
const
|
933
|
+
const searchMethod_e searchMethod, const U32 depth)
|
914
934
|
{
|
915
935
|
const BYTE* const istart = (const BYTE*)src;
|
916
936
|
const BYTE* ip = istart;
|
@@ -919,24 +939,31 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
|
|
919
939
|
const BYTE* const ilimit = iend - 8;
|
920
940
|
const BYTE* const base = ms->window.base;
|
921
941
|
const U32 dictLimit = ms->window.dictLimit;
|
922
|
-
const U32 lowestIndex = ms->window.lowLimit;
|
923
942
|
const BYTE* const prefixStart = base + dictLimit;
|
924
943
|
const BYTE* const dictBase = ms->window.dictBase;
|
925
944
|
const BYTE* const dictEnd = dictBase + dictLimit;
|
926
|
-
const BYTE* const dictStart = dictBase +
|
945
|
+
const BYTE* const dictStart = dictBase + ms->window.lowLimit;
|
946
|
+
const U32 windowLog = ms->cParams.windowLog;
|
927
947
|
|
928
948
|
typedef size_t (*searchMax_f)(
|
929
949
|
ZSTD_matchState_t* ms,
|
930
950
|
const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
|
931
|
-
searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
|
951
|
+
searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
|
932
952
|
|
933
953
|
U32 offset_1 = rep[0], offset_2 = rep[1];
|
934
954
|
|
955
|
+
DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic");
|
956
|
+
|
935
957
|
/* init */
|
936
|
-
ms->nextToUpdate3 = ms->nextToUpdate;
|
937
958
|
ip += (ip == prefixStart);
|
938
959
|
|
939
960
|
/* Match Loop */
|
961
|
+
#if defined(__GNUC__) && defined(__x86_64__)
|
962
|
+
/* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
|
963
|
+
* code alignment is perturbed. To fix the instability align the loop on 32-bytes.
|
964
|
+
*/
|
965
|
+
__asm__(".p2align 5");
|
966
|
+
#endif
|
940
967
|
while (ip < ilimit) {
|
941
968
|
size_t matchLength=0;
|
942
969
|
size_t offset=0;
|
@@ -944,10 +971,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
|
|
944
971
|
U32 current = (U32)(ip-base);
|
945
972
|
|
946
973
|
/* check repCode */
|
947
|
-
{ const U32
|
974
|
+
{ const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current+1, windowLog);
|
975
|
+
const U32 repIndex = (U32)(current+1 - offset_1);
|
948
976
|
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
|
949
977
|
const BYTE* const repMatch = repBase + repIndex;
|
950
|
-
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex >
|
978
|
+
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
|
951
979
|
if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
|
952
980
|
/* repcode detected we should take it */
|
953
981
|
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
|
@@ -974,10 +1002,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
|
|
974
1002
|
current++;
|
975
1003
|
/* check repCode */
|
976
1004
|
if (offset) {
|
1005
|
+
const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog);
|
977
1006
|
const U32 repIndex = (U32)(current - offset_1);
|
978
1007
|
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
|
979
1008
|
const BYTE* const repMatch = repBase + repIndex;
|
980
|
-
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex >
|
1009
|
+
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
|
981
1010
|
if (MEM_read32(ip) == MEM_read32(repMatch)) {
|
982
1011
|
/* repcode detected */
|
983
1012
|
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
|
@@ -1004,10 +1033,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
|
|
1004
1033
|
current++;
|
1005
1034
|
/* check repCode */
|
1006
1035
|
if (offset) {
|
1036
|
+
const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog);
|
1007
1037
|
const U32 repIndex = (U32)(current - offset_1);
|
1008
1038
|
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
|
1009
1039
|
const BYTE* const repMatch = repBase + repIndex;
|
1010
|
-
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex >
|
1040
|
+
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
|
1011
1041
|
if (MEM_read32(ip) == MEM_read32(repMatch)) {
|
1012
1042
|
/* repcode detected */
|
1013
1043
|
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
|
@@ -1042,22 +1072,24 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
|
|
1042
1072
|
/* store sequence */
|
1043
1073
|
_storeSequence:
|
1044
1074
|
{ size_t const litLength = start - anchor;
|
1045
|
-
ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH);
|
1075
|
+
ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
|
1046
1076
|
anchor = ip = start + matchLength;
|
1047
1077
|
}
|
1048
1078
|
|
1049
1079
|
/* check immediate repcode */
|
1050
1080
|
while (ip <= ilimit) {
|
1051
|
-
const U32
|
1081
|
+
const U32 repCurrent = (U32)(ip-base);
|
1082
|
+
const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog);
|
1083
|
+
const U32 repIndex = repCurrent - offset_2;
|
1052
1084
|
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
|
1053
1085
|
const BYTE* const repMatch = repBase + repIndex;
|
1054
|
-
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex >
|
1086
|
+
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
|
1055
1087
|
if (MEM_read32(ip) == MEM_read32(repMatch)) {
|
1056
1088
|
/* repcode detected we should take it */
|
1057
1089
|
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
|
1058
1090
|
matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
|
1059
1091
|
offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */
|
1060
|
-
ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
|
1092
|
+
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
|
1061
1093
|
ip += matchLength;
|
1062
1094
|
anchor = ip;
|
1063
1095
|
continue; /* faster when present ... (?) */
|
@@ -1070,7 +1102,7 @@ _storeSequence:
|
|
1070
1102
|
rep[1] = offset_2;
|
1071
1103
|
|
1072
1104
|
/* Return the last literals size */
|
1073
|
-
return iend - anchor;
|
1105
|
+
return (size_t)(iend - anchor);
|
1074
1106
|
}
|
1075
1107
|
|
1076
1108
|
|
@@ -1078,7 +1110,7 @@ size_t ZSTD_compressBlock_greedy_extDict(
|
|
1078
1110
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1079
1111
|
void const* src, size_t srcSize)
|
1080
1112
|
{
|
1081
|
-
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize,
|
1113
|
+
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0);
|
1082
1114
|
}
|
1083
1115
|
|
1084
1116
|
size_t ZSTD_compressBlock_lazy_extDict(
|
@@ -1086,7 +1118,7 @@ size_t ZSTD_compressBlock_lazy_extDict(
|
|
1086
1118
|
void const* src, size_t srcSize)
|
1087
1119
|
|
1088
1120
|
{
|
1089
|
-
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize,
|
1121
|
+
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
|
1090
1122
|
}
|
1091
1123
|
|
1092
1124
|
size_t ZSTD_compressBlock_lazy2_extDict(
|
@@ -1094,7 +1126,7 @@ size_t ZSTD_compressBlock_lazy2_extDict(
|
|
1094
1126
|
void const* src, size_t srcSize)
|
1095
1127
|
|
1096
1128
|
{
|
1097
|
-
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize,
|
1129
|
+
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
|
1098
1130
|
}
|
1099
1131
|
|
1100
1132
|
size_t ZSTD_compressBlock_btlazy2_extDict(
|
@@ -1102,5 +1134,5 @@ size_t ZSTD_compressBlock_btlazy2_extDict(
|
|
1102
1134
|
void const* src, size_t srcSize)
|
1103
1135
|
|
1104
1136
|
{
|
1105
|
-
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize,
|
1137
|
+
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
|
1106
1138
|
}
|
@@ -1,15 +1,16 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c) 2016-
|
2
|
+
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
6
6
|
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
7
7
|
* in the COPYING file in the root directory of this source tree).
|
8
|
+
* You may select, at your option, one of the above-listed licenses.
|
8
9
|
*/
|
9
10
|
|
10
11
|
#include "zstd_ldm.h"
|
11
12
|
|
12
|
-
#include "debug.h"
|
13
|
+
#include "../common/debug.h"
|
13
14
|
#include "zstd_fast.h" /* ZSTD_fillHashTable() */
|
14
15
|
#include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */
|
15
16
|
|
@@ -49,9 +50,9 @@ size_t ZSTD_ldm_getTableSize(ldmParams_t params)
|
|
49
50
|
{
|
50
51
|
size_t const ldmHSize = ((size_t)1) << params.hashLog;
|
51
52
|
size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog);
|
52
|
-
size_t const ldmBucketSize =
|
53
|
-
|
54
|
-
|
53
|
+
size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
|
54
|
+
size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize)
|
55
|
+
+ ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t));
|
55
56
|
return params.enableLdm ? totalSize : 0;
|
56
57
|
}
|
57
58
|
|
@@ -223,6 +224,20 @@ static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
|
|
223
224
|
return rollingHash;
|
224
225
|
}
|
225
226
|
|
227
|
+
void ZSTD_ldm_fillHashTable(
|
228
|
+
ldmState_t* state, const BYTE* ip,
|
229
|
+
const BYTE* iend, ldmParams_t const* params)
|
230
|
+
{
|
231
|
+
DEBUGLOG(5, "ZSTD_ldm_fillHashTable");
|
232
|
+
if ((size_t)(iend - ip) >= params->minMatchLength) {
|
233
|
+
U64 startingHash = ZSTD_rollingHash_compute(ip, params->minMatchLength);
|
234
|
+
ZSTD_ldm_fillLdmHashTable(
|
235
|
+
state, startingHash, ip, iend - params->minMatchLength, state->window.base,
|
236
|
+
params->hashLog - params->bucketSizeLog,
|
237
|
+
*params);
|
238
|
+
}
|
239
|
+
}
|
240
|
+
|
226
241
|
|
227
242
|
/** ZSTD_ldm_limitTableUpdate() :
|
228
243
|
*
|
@@ -447,8 +462,10 @@ size_t ZSTD_ldm_generateSequences(
|
|
447
462
|
if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
|
448
463
|
U32 const ldmHSize = 1U << params->hashLog;
|
449
464
|
U32 const correction = ZSTD_window_correctOverflow(
|
450
|
-
&ldmState->window, /* cycleLog */ 0, maxDist,
|
465
|
+
&ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
|
451
466
|
ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
|
467
|
+
/* invalidate dictionaries on overflow correction */
|
468
|
+
ldmState->loadedDictEnd = 0;
|
452
469
|
}
|
453
470
|
/* 2. We enforce the maximum offset allowed.
|
454
471
|
*
|
@@ -457,8 +474,14 @@ size_t ZSTD_ldm_generateSequences(
|
|
457
474
|
* TODO: * Test the chunk size.
|
458
475
|
* * Try invalidation after the sequence generation and test the
|
459
476
|
* the offset against maxDist directly.
|
477
|
+
*
|
478
|
+
* NOTE: Because of dictionaries + sequence splitting we MUST make sure
|
479
|
+
* that any offset used is valid at the END of the sequence, since it may
|
480
|
+
* be split into two sequences. This condition holds when using
|
481
|
+
* ZSTD_window_enforceMaxDist(), but if we move to checking offsets
|
482
|
+
* against maxDist directly, we'll have to carefully handle that case.
|
460
483
|
*/
|
461
|
-
ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist,
|
484
|
+
ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL);
|
462
485
|
/* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
|
463
486
|
newLeftoverSize = ZSTD_ldm_generateSequences_internal(
|
464
487
|
ldmState, sequences, params, chunkStart, chunkSize);
|
@@ -566,14 +589,13 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
|
|
566
589
|
if (sequence.offset == 0)
|
567
590
|
break;
|
568
591
|
|
569
|
-
assert(sequence.offset <= (1U << cParams->windowLog));
|
570
592
|
assert(ip + sequence.litLength + sequence.matchLength <= iend);
|
571
593
|
|
572
594
|
/* Fill tables for block compressor */
|
573
595
|
ZSTD_ldm_limitTableUpdate(ms, ip);
|
574
596
|
ZSTD_ldm_fillFastTables(ms, ip);
|
575
597
|
/* Run the block compressor */
|
576
|
-
DEBUGLOG(5, "calling block compressor on segment of size %u", sequence.litLength);
|
598
|
+
DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength);
|
577
599
|
{
|
578
600
|
size_t const newLitLength =
|
579
601
|
blockCompressor(ms, seqStore, rep, ip, sequence.litLength);
|
@@ -583,7 +605,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
|
|
583
605
|
rep[i] = rep[i-1];
|
584
606
|
rep[0] = sequence.offset;
|
585
607
|
/* Store the sequence */
|
586
|
-
ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength,
|
608
|
+
ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
|
587
609
|
sequence.offset + ZSTD_REP_MOVE,
|
588
610
|
sequence.matchLength - MINMATCH);
|
589
611
|
ip += sequence.matchLength;
|