extzstd 0.3 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (107) hide show
  1. checksums.yaml +4 -4
  2. data/HISTORY.ja.md +8 -0
  3. data/README.md +1 -1
  4. data/contrib/zstd/CHANGELOG +94 -0
  5. data/contrib/zstd/CONTRIBUTING.md +351 -1
  6. data/contrib/zstd/Makefile +32 -10
  7. data/contrib/zstd/README.md +33 -10
  8. data/contrib/zstd/TESTING.md +2 -2
  9. data/contrib/zstd/appveyor.yml +42 -4
  10. data/contrib/zstd/lib/Makefile +128 -60
  11. data/contrib/zstd/lib/README.md +47 -16
  12. data/contrib/zstd/lib/common/bitstream.h +38 -39
  13. data/contrib/zstd/lib/common/compiler.h +40 -5
  14. data/contrib/zstd/lib/common/cpu.h +1 -1
  15. data/contrib/zstd/lib/common/debug.c +11 -31
  16. data/contrib/zstd/lib/common/debug.h +11 -31
  17. data/contrib/zstd/lib/common/entropy_common.c +13 -33
  18. data/contrib/zstd/lib/common/error_private.c +2 -1
  19. data/contrib/zstd/lib/common/error_private.h +6 -2
  20. data/contrib/zstd/lib/common/fse.h +12 -32
  21. data/contrib/zstd/lib/common/fse_decompress.c +12 -35
  22. data/contrib/zstd/lib/common/huf.h +15 -33
  23. data/contrib/zstd/lib/common/mem.h +75 -2
  24. data/contrib/zstd/lib/common/pool.c +8 -4
  25. data/contrib/zstd/lib/common/pool.h +2 -2
  26. data/contrib/zstd/lib/common/threading.c +50 -4
  27. data/contrib/zstd/lib/common/threading.h +36 -4
  28. data/contrib/zstd/lib/common/xxhash.c +23 -35
  29. data/contrib/zstd/lib/common/xxhash.h +11 -31
  30. data/contrib/zstd/lib/common/zstd_common.c +1 -1
  31. data/contrib/zstd/lib/common/zstd_errors.h +2 -1
  32. data/contrib/zstd/lib/common/zstd_internal.h +154 -26
  33. data/contrib/zstd/lib/compress/fse_compress.c +17 -40
  34. data/contrib/zstd/lib/compress/hist.c +15 -35
  35. data/contrib/zstd/lib/compress/hist.h +12 -32
  36. data/contrib/zstd/lib/compress/huf_compress.c +92 -92
  37. data/contrib/zstd/lib/compress/zstd_compress.c +1191 -1330
  38. data/contrib/zstd/lib/compress/zstd_compress_internal.h +317 -55
  39. data/contrib/zstd/lib/compress/zstd_compress_literals.c +158 -0
  40. data/contrib/zstd/lib/compress/zstd_compress_literals.h +29 -0
  41. data/contrib/zstd/lib/compress/zstd_compress_sequences.c +419 -0
  42. data/contrib/zstd/lib/compress/zstd_compress_sequences.h +54 -0
  43. data/contrib/zstd/lib/compress/zstd_compress_superblock.c +845 -0
  44. data/contrib/zstd/lib/compress/zstd_compress_superblock.h +32 -0
  45. data/contrib/zstd/lib/compress/zstd_cwksp.h +525 -0
  46. data/contrib/zstd/lib/compress/zstd_double_fast.c +65 -43
  47. data/contrib/zstd/lib/compress/zstd_double_fast.h +2 -2
  48. data/contrib/zstd/lib/compress/zstd_fast.c +92 -66
  49. data/contrib/zstd/lib/compress/zstd_fast.h +2 -2
  50. data/contrib/zstd/lib/compress/zstd_lazy.c +74 -42
  51. data/contrib/zstd/lib/compress/zstd_lazy.h +1 -1
  52. data/contrib/zstd/lib/compress/zstd_ldm.c +32 -10
  53. data/contrib/zstd/lib/compress/zstd_ldm.h +7 -2
  54. data/contrib/zstd/lib/compress/zstd_opt.c +81 -114
  55. data/contrib/zstd/lib/compress/zstd_opt.h +1 -1
  56. data/contrib/zstd/lib/compress/zstdmt_compress.c +95 -51
  57. data/contrib/zstd/lib/compress/zstdmt_compress.h +3 -2
  58. data/contrib/zstd/lib/decompress/huf_decompress.c +76 -60
  59. data/contrib/zstd/lib/decompress/zstd_ddict.c +12 -8
  60. data/contrib/zstd/lib/decompress/zstd_ddict.h +2 -2
  61. data/contrib/zstd/lib/decompress/zstd_decompress.c +292 -172
  62. data/contrib/zstd/lib/decompress/zstd_decompress_block.c +459 -338
  63. data/contrib/zstd/lib/decompress/zstd_decompress_block.h +3 -3
  64. data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +18 -4
  65. data/contrib/zstd/lib/deprecated/zbuff.h +9 -8
  66. data/contrib/zstd/lib/deprecated/zbuff_common.c +2 -2
  67. data/contrib/zstd/lib/deprecated/zbuff_compress.c +1 -1
  68. data/contrib/zstd/lib/deprecated/zbuff_decompress.c +1 -1
  69. data/contrib/zstd/lib/dictBuilder/cover.c +164 -54
  70. data/contrib/zstd/lib/dictBuilder/cover.h +52 -7
  71. data/contrib/zstd/lib/dictBuilder/fastcover.c +60 -43
  72. data/contrib/zstd/lib/dictBuilder/zdict.c +43 -19
  73. data/contrib/zstd/lib/dictBuilder/zdict.h +56 -28
  74. data/contrib/zstd/lib/legacy/zstd_legacy.h +8 -4
  75. data/contrib/zstd/lib/legacy/zstd_v01.c +110 -110
  76. data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
  77. data/contrib/zstd/lib/legacy/zstd_v02.c +23 -13
  78. data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
  79. data/contrib/zstd/lib/legacy/zstd_v03.c +23 -13
  80. data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
  81. data/contrib/zstd/lib/legacy/zstd_v04.c +30 -17
  82. data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
  83. data/contrib/zstd/lib/legacy/zstd_v05.c +113 -102
  84. data/contrib/zstd/lib/legacy/zstd_v05.h +2 -2
  85. data/contrib/zstd/lib/legacy/zstd_v06.c +20 -18
  86. data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
  87. data/contrib/zstd/lib/legacy/zstd_v07.c +25 -19
  88. data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
  89. data/contrib/zstd/lib/libzstd.pc.in +3 -2
  90. data/contrib/zstd/lib/zstd.h +265 -88
  91. data/ext/extzstd.h +1 -1
  92. data/ext/libzstd_conf.h +8 -0
  93. data/ext/zstd_common.c +1 -3
  94. data/ext/zstd_compress.c +3 -3
  95. data/ext/zstd_decompress.c +1 -5
  96. data/ext/zstd_dictbuilder.c +2 -3
  97. data/ext/zstd_dictbuilder_fastcover.c +1 -3
  98. data/ext/zstd_legacy_v01.c +2 -0
  99. data/ext/zstd_legacy_v02.c +2 -0
  100. data/ext/zstd_legacy_v03.c +2 -0
  101. data/ext/zstd_legacy_v04.c +2 -0
  102. data/ext/zstd_legacy_v05.c +2 -0
  103. data/ext/zstd_legacy_v06.c +2 -0
  104. data/ext/zstd_legacy_v07.c +2 -0
  105. data/lib/extzstd.rb +18 -10
  106. data/lib/extzstd/version.rb +1 -1
  107. metadata +15 -6
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -15,7 +15,7 @@
15
15
  extern "C" {
16
16
  #endif
17
17
 
18
- #include "mem.h" /* U32 */
18
+ #include "../common/mem.h" /* U32 */
19
19
  #include "zstd_compress_internal.h"
20
20
 
21
21
  void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -83,7 +83,10 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
83
83
  U32* largerPtr = smallerPtr + 1;
84
84
  U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
85
85
  U32 dummy32; /* to be nullified at the end */
86
- U32 const windowLow = ms->window.lowLimit;
86
+ U32 const windowValid = ms->window.lowLimit;
87
+ U32 const maxDistance = 1U << cParams->windowLog;
88
+ U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid;
89
+
87
90
 
88
91
  DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
89
92
  current, dictLimit, windowLow);
@@ -239,7 +242,7 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
239
242
 
240
243
  const BYTE* const base = ms->window.base;
241
244
  U32 const current = (U32)(ip-base);
242
- U32 const windowLow = ms->window.lowLimit;
245
+ U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog);
243
246
 
244
247
  U32* const bt = ms->chainTable;
245
248
  U32 const btLog = cParams->chainLog - 1;
@@ -490,8 +493,12 @@ size_t ZSTD_HcFindBestMatch_generic (
490
493
  const U32 dictLimit = ms->window.dictLimit;
491
494
  const BYTE* const prefixStart = base + dictLimit;
492
495
  const BYTE* const dictEnd = dictBase + dictLimit;
493
- const U32 lowLimit = ms->window.lowLimit;
494
496
  const U32 current = (U32)(ip-base);
497
+ const U32 maxDistance = 1U << cParams->windowLog;
498
+ const U32 lowestValid = ms->window.lowLimit;
499
+ const U32 withinMaxDistance = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
500
+ const U32 isDictionary = (ms->loadedDictEnd != 0);
501
+ const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
495
502
  const U32 minChain = current > chainSize ? current - chainSize : 0;
496
503
  U32 nbAttempts = 1U << cParams->searchLog;
497
504
  size_t ml=4-1;
@@ -612,12 +619,14 @@ FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
612
619
  /* *******************************
613
620
  * Common parser - lazy strategy
614
621
  *********************************/
615
- FORCE_INLINE_TEMPLATE
616
- size_t ZSTD_compressBlock_lazy_generic(
622
+ typedef enum { search_hashChain, search_binaryTree } searchMethod_e;
623
+
624
+ FORCE_INLINE_TEMPLATE size_t
625
+ ZSTD_compressBlock_lazy_generic(
617
626
  ZSTD_matchState_t* ms, seqStore_t* seqStore,
618
627
  U32 rep[ZSTD_REP_NUM],
619
628
  const void* src, size_t srcSize,
620
- const U32 searchMethod, const U32 depth,
629
+ const searchMethod_e searchMethod, const U32 depth,
621
630
  ZSTD_dictMode_e const dictMode)
622
631
  {
623
632
  const BYTE* const istart = (const BYTE*)src;
@@ -633,8 +642,10 @@ size_t ZSTD_compressBlock_lazy_generic(
633
642
  ZSTD_matchState_t* ms,
634
643
  const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
635
644
  searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ?
636
- (searchMethod ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) :
637
- (searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS);
645
+ (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS
646
+ : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) :
647
+ (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_selectMLS
648
+ : ZSTD_HcFindBestMatch_selectMLS);
638
649
  U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
639
650
 
640
651
  const ZSTD_matchState_t* const dms = ms->dictMatchState;
@@ -649,13 +660,16 @@ size_t ZSTD_compressBlock_lazy_generic(
649
660
  const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
650
661
  prefixLowestIndex - (U32)(dictEnd - dictBase) :
651
662
  0;
652
- const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictLowest);
663
+ const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest));
664
+
665
+ DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode);
653
666
 
654
667
  /* init */
655
668
  ip += (dictAndPrefixLength == 0);
656
- ms->nextToUpdate3 = ms->nextToUpdate;
657
669
  if (dictMode == ZSTD_noDict) {
658
- U32 const maxRep = (U32)(ip - prefixLowest);
670
+ U32 const current = (U32)(ip - base);
671
+ U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, ms->cParams.windowLog);
672
+ U32 const maxRep = current - windowLow;
659
673
  if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
660
674
  if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
661
675
  }
@@ -667,6 +681,12 @@ size_t ZSTD_compressBlock_lazy_generic(
667
681
  }
668
682
 
669
683
  /* Match Loop */
684
+ #if defined(__GNUC__) && defined(__x86_64__)
685
+ /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
686
+ * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
687
+ */
688
+ __asm__(".p2align 5");
689
+ #endif
670
690
  while (ip < ilimit) {
671
691
  size_t matchLength=0;
672
692
  size_t offset=0;
@@ -800,7 +820,7 @@ size_t ZSTD_compressBlock_lazy_generic(
800
820
  /* store sequence */
801
821
  _storeSequence:
802
822
  { size_t const litLength = start - anchor;
803
- ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH);
823
+ ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
804
824
  anchor = ip = start + matchLength;
805
825
  }
806
826
 
@@ -818,7 +838,7 @@ _storeSequence:
818
838
  const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
819
839
  matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
820
840
  offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */
821
- ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
841
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
822
842
  ip += matchLength;
823
843
  anchor = ip;
824
844
  continue;
@@ -833,7 +853,7 @@ _storeSequence:
833
853
  /* store sequence */
834
854
  matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
835
855
  offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
836
- ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
856
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
837
857
  ip += matchLength;
838
858
  anchor = ip;
839
859
  continue; /* faster when present ... (?) */
@@ -844,7 +864,7 @@ _storeSequence:
844
864
  rep[1] = offset_2 ? offset_2 : savedOffset;
845
865
 
846
866
  /* Return the last literals size */
847
- return iend - anchor;
867
+ return (size_t)(iend - anchor);
848
868
  }
849
869
 
850
870
 
@@ -852,56 +872,56 @@ size_t ZSTD_compressBlock_btlazy2(
852
872
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
853
873
  void const* src, size_t srcSize)
854
874
  {
855
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 1, 2, ZSTD_noDict);
875
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
856
876
  }
857
877
 
858
878
  size_t ZSTD_compressBlock_lazy2(
859
879
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
860
880
  void const* src, size_t srcSize)
861
881
  {
862
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 2, ZSTD_noDict);
882
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
863
883
  }
864
884
 
865
885
  size_t ZSTD_compressBlock_lazy(
866
886
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
867
887
  void const* src, size_t srcSize)
868
888
  {
869
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 1, ZSTD_noDict);
889
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
870
890
  }
871
891
 
872
892
  size_t ZSTD_compressBlock_greedy(
873
893
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
874
894
  void const* src, size_t srcSize)
875
895
  {
876
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 0, ZSTD_noDict);
896
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
877
897
  }
878
898
 
879
899
  size_t ZSTD_compressBlock_btlazy2_dictMatchState(
880
900
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
881
901
  void const* src, size_t srcSize)
882
902
  {
883
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 1, 2, ZSTD_dictMatchState);
903
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
884
904
  }
885
905
 
886
906
  size_t ZSTD_compressBlock_lazy2_dictMatchState(
887
907
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
888
908
  void const* src, size_t srcSize)
889
909
  {
890
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 2, ZSTD_dictMatchState);
910
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
891
911
  }
892
912
 
893
913
  size_t ZSTD_compressBlock_lazy_dictMatchState(
894
914
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
895
915
  void const* src, size_t srcSize)
896
916
  {
897
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 1, ZSTD_dictMatchState);
917
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
898
918
  }
899
919
 
900
920
  size_t ZSTD_compressBlock_greedy_dictMatchState(
901
921
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
902
922
  void const* src, size_t srcSize)
903
923
  {
904
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 0, ZSTD_dictMatchState);
924
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
905
925
  }
906
926
 
907
927
 
@@ -910,7 +930,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
910
930
  ZSTD_matchState_t* ms, seqStore_t* seqStore,
911
931
  U32 rep[ZSTD_REP_NUM],
912
932
  const void* src, size_t srcSize,
913
- const U32 searchMethod, const U32 depth)
933
+ const searchMethod_e searchMethod, const U32 depth)
914
934
  {
915
935
  const BYTE* const istart = (const BYTE*)src;
916
936
  const BYTE* ip = istart;
@@ -919,24 +939,31 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
919
939
  const BYTE* const ilimit = iend - 8;
920
940
  const BYTE* const base = ms->window.base;
921
941
  const U32 dictLimit = ms->window.dictLimit;
922
- const U32 lowestIndex = ms->window.lowLimit;
923
942
  const BYTE* const prefixStart = base + dictLimit;
924
943
  const BYTE* const dictBase = ms->window.dictBase;
925
944
  const BYTE* const dictEnd = dictBase + dictLimit;
926
- const BYTE* const dictStart = dictBase + lowestIndex;
945
+ const BYTE* const dictStart = dictBase + ms->window.lowLimit;
946
+ const U32 windowLog = ms->cParams.windowLog;
927
947
 
928
948
  typedef size_t (*searchMax_f)(
929
949
  ZSTD_matchState_t* ms,
930
950
  const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
931
- searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
951
+ searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
932
952
 
933
953
  U32 offset_1 = rep[0], offset_2 = rep[1];
934
954
 
955
+ DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic");
956
+
935
957
  /* init */
936
- ms->nextToUpdate3 = ms->nextToUpdate;
937
958
  ip += (ip == prefixStart);
938
959
 
939
960
  /* Match Loop */
961
+ #if defined(__GNUC__) && defined(__x86_64__)
962
+ /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
963
+ * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
964
+ */
965
+ __asm__(".p2align 5");
966
+ #endif
940
967
  while (ip < ilimit) {
941
968
  size_t matchLength=0;
942
969
  size_t offset=0;
@@ -944,10 +971,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
944
971
  U32 current = (U32)(ip-base);
945
972
 
946
973
  /* check repCode */
947
- { const U32 repIndex = (U32)(current+1 - offset_1);
974
+ { const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current+1, windowLog);
975
+ const U32 repIndex = (U32)(current+1 - offset_1);
948
976
  const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
949
977
  const BYTE* const repMatch = repBase + repIndex;
950
- if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
978
+ if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
951
979
  if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
952
980
  /* repcode detected we should take it */
953
981
  const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@@ -974,10 +1002,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
974
1002
  current++;
975
1003
  /* check repCode */
976
1004
  if (offset) {
1005
+ const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog);
977
1006
  const U32 repIndex = (U32)(current - offset_1);
978
1007
  const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
979
1008
  const BYTE* const repMatch = repBase + repIndex;
980
- if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
1009
+ if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
981
1010
  if (MEM_read32(ip) == MEM_read32(repMatch)) {
982
1011
  /* repcode detected */
983
1012
  const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@@ -1004,10 +1033,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
1004
1033
  current++;
1005
1034
  /* check repCode */
1006
1035
  if (offset) {
1036
+ const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog);
1007
1037
  const U32 repIndex = (U32)(current - offset_1);
1008
1038
  const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1009
1039
  const BYTE* const repMatch = repBase + repIndex;
1010
- if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
1040
+ if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
1011
1041
  if (MEM_read32(ip) == MEM_read32(repMatch)) {
1012
1042
  /* repcode detected */
1013
1043
  const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@@ -1042,22 +1072,24 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
1042
1072
  /* store sequence */
1043
1073
  _storeSequence:
1044
1074
  { size_t const litLength = start - anchor;
1045
- ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH);
1075
+ ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
1046
1076
  anchor = ip = start + matchLength;
1047
1077
  }
1048
1078
 
1049
1079
  /* check immediate repcode */
1050
1080
  while (ip <= ilimit) {
1051
- const U32 repIndex = (U32)((ip-base) - offset_2);
1081
+ const U32 repCurrent = (U32)(ip-base);
1082
+ const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog);
1083
+ const U32 repIndex = repCurrent - offset_2;
1052
1084
  const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1053
1085
  const BYTE* const repMatch = repBase + repIndex;
1054
- if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
1086
+ if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
1055
1087
  if (MEM_read32(ip) == MEM_read32(repMatch)) {
1056
1088
  /* repcode detected we should take it */
1057
1089
  const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
1058
1090
  matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
1059
1091
  offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */
1060
- ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
1092
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
1061
1093
  ip += matchLength;
1062
1094
  anchor = ip;
1063
1095
  continue; /* faster when present ... (?) */
@@ -1070,7 +1102,7 @@ _storeSequence:
1070
1102
  rep[1] = offset_2;
1071
1103
 
1072
1104
  /* Return the last literals size */
1073
- return iend - anchor;
1105
+ return (size_t)(iend - anchor);
1074
1106
  }
1075
1107
 
1076
1108
 
@@ -1078,7 +1110,7 @@ size_t ZSTD_compressBlock_greedy_extDict(
1078
1110
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1079
1111
  void const* src, size_t srcSize)
1080
1112
  {
1081
- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 0, 0);
1113
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0);
1082
1114
  }
1083
1115
 
1084
1116
  size_t ZSTD_compressBlock_lazy_extDict(
@@ -1086,7 +1118,7 @@ size_t ZSTD_compressBlock_lazy_extDict(
1086
1118
  void const* src, size_t srcSize)
1087
1119
 
1088
1120
  {
1089
- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 0, 1);
1121
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
1090
1122
  }
1091
1123
 
1092
1124
  size_t ZSTD_compressBlock_lazy2_extDict(
@@ -1094,7 +1126,7 @@ size_t ZSTD_compressBlock_lazy2_extDict(
1094
1126
  void const* src, size_t srcSize)
1095
1127
 
1096
1128
  {
1097
- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 0, 2);
1129
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
1098
1130
  }
1099
1131
 
1100
1132
  size_t ZSTD_compressBlock_btlazy2_extDict(
@@ -1102,5 +1134,5 @@ size_t ZSTD_compressBlock_btlazy2_extDict(
1102
1134
  void const* src, size_t srcSize)
1103
1135
 
1104
1136
  {
1105
- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 1, 2);
1137
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
1106
1138
  }
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -1,15 +1,16 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
6
6
  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
7
  * in the COPYING file in the root directory of this source tree).
8
+ * You may select, at your option, one of the above-listed licenses.
8
9
  */
9
10
 
10
11
  #include "zstd_ldm.h"
11
12
 
12
- #include "debug.h"
13
+ #include "../common/debug.h"
13
14
  #include "zstd_fast.h" /* ZSTD_fillHashTable() */
14
15
  #include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */
15
16
 
@@ -49,9 +50,9 @@ size_t ZSTD_ldm_getTableSize(ldmParams_t params)
49
50
  {
50
51
  size_t const ldmHSize = ((size_t)1) << params.hashLog;
51
52
  size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog);
52
- size_t const ldmBucketSize =
53
- ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
54
- size_t const totalSize = ldmBucketSize + ldmHSize * sizeof(ldmEntry_t);
53
+ size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
54
+ size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize)
55
+ + ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t));
55
56
  return params.enableLdm ? totalSize : 0;
56
57
  }
57
58
 
@@ -223,6 +224,20 @@ static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
223
224
  return rollingHash;
224
225
  }
225
226
 
227
+ void ZSTD_ldm_fillHashTable(
228
+ ldmState_t* state, const BYTE* ip,
229
+ const BYTE* iend, ldmParams_t const* params)
230
+ {
231
+ DEBUGLOG(5, "ZSTD_ldm_fillHashTable");
232
+ if ((size_t)(iend - ip) >= params->minMatchLength) {
233
+ U64 startingHash = ZSTD_rollingHash_compute(ip, params->minMatchLength);
234
+ ZSTD_ldm_fillLdmHashTable(
235
+ state, startingHash, ip, iend - params->minMatchLength, state->window.base,
236
+ params->hashLog - params->bucketSizeLog,
237
+ *params);
238
+ }
239
+ }
240
+
226
241
 
227
242
  /** ZSTD_ldm_limitTableUpdate() :
228
243
  *
@@ -447,8 +462,10 @@ size_t ZSTD_ldm_generateSequences(
447
462
  if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
448
463
  U32 const ldmHSize = 1U << params->hashLog;
449
464
  U32 const correction = ZSTD_window_correctOverflow(
450
- &ldmState->window, /* cycleLog */ 0, maxDist, src);
465
+ &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
451
466
  ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
467
+ /* invalidate dictionaries on overflow correction */
468
+ ldmState->loadedDictEnd = 0;
452
469
  }
453
470
  /* 2. We enforce the maximum offset allowed.
454
471
  *
@@ -457,8 +474,14 @@ size_t ZSTD_ldm_generateSequences(
457
474
  * TODO: * Test the chunk size.
458
475
  * * Try invalidation after the sequence generation and test the
459
476
  * the offset against maxDist directly.
477
+ *
478
+ * NOTE: Because of dictionaries + sequence splitting we MUST make sure
479
+ * that any offset used is valid at the END of the sequence, since it may
480
+ * be split into two sequences. This condition holds when using
481
+ * ZSTD_window_enforceMaxDist(), but if we move to checking offsets
482
+ * against maxDist directly, we'll have to carefully handle that case.
460
483
  */
461
- ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL, NULL);
484
+ ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL);
462
485
  /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
463
486
  newLeftoverSize = ZSTD_ldm_generateSequences_internal(
464
487
  ldmState, sequences, params, chunkStart, chunkSize);
@@ -566,14 +589,13 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
566
589
  if (sequence.offset == 0)
567
590
  break;
568
591
 
569
- assert(sequence.offset <= (1U << cParams->windowLog));
570
592
  assert(ip + sequence.litLength + sequence.matchLength <= iend);
571
593
 
572
594
  /* Fill tables for block compressor */
573
595
  ZSTD_ldm_limitTableUpdate(ms, ip);
574
596
  ZSTD_ldm_fillFastTables(ms, ip);
575
597
  /* Run the block compressor */
576
- DEBUGLOG(5, "calling block compressor on segment of size %u", sequence.litLength);
598
+ DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength);
577
599
  {
578
600
  size_t const newLitLength =
579
601
  blockCompressor(ms, seqStore, rep, ip, sequence.litLength);
@@ -583,7 +605,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
583
605
  rep[i] = rep[i-1];
584
606
  rep[0] = sequence.offset;
585
607
  /* Store the sequence */
586
- ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength,
608
+ ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
587
609
  sequence.offset + ZSTD_REP_MOVE,
588
610
  sequence.matchLength - MINMATCH);
589
611
  ip += sequence.matchLength;