zstd-ruby 1.3.8.0 → 1.4.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +6 -5
  3. data/README.md +1 -1
  4. data/ext/zstdruby/libzstd/Makefile +133 -61
  5. data/ext/zstdruby/libzstd/README.md +51 -18
  6. data/ext/zstdruby/libzstd/common/bitstream.h +38 -39
  7. data/ext/zstdruby/libzstd/common/compiler.h +41 -6
  8. data/ext/zstdruby/libzstd/common/cpu.h +1 -1
  9. data/ext/zstdruby/libzstd/common/debug.c +11 -31
  10. data/ext/zstdruby/libzstd/common/debug.h +11 -31
  11. data/ext/zstdruby/libzstd/common/entropy_common.c +13 -33
  12. data/ext/zstdruby/libzstd/common/error_private.c +2 -1
  13. data/ext/zstdruby/libzstd/common/error_private.h +6 -2
  14. data/ext/zstdruby/libzstd/common/fse.h +13 -33
  15. data/ext/zstdruby/libzstd/common/fse_decompress.c +12 -35
  16. data/ext/zstdruby/libzstd/common/huf.h +15 -33
  17. data/ext/zstdruby/libzstd/common/mem.h +75 -2
  18. data/ext/zstdruby/libzstd/common/pool.c +8 -4
  19. data/ext/zstdruby/libzstd/common/pool.h +2 -2
  20. data/ext/zstdruby/libzstd/common/threading.c +52 -6
  21. data/ext/zstdruby/libzstd/common/threading.h +36 -4
  22. data/ext/zstdruby/libzstd/common/xxhash.c +25 -37
  23. data/ext/zstdruby/libzstd/common/xxhash.h +11 -31
  24. data/ext/zstdruby/libzstd/common/zstd_common.c +1 -1
  25. data/ext/zstdruby/libzstd/common/zstd_errors.h +2 -1
  26. data/ext/zstdruby/libzstd/common/zstd_internal.h +203 -22
  27. data/ext/zstdruby/libzstd/compress/fse_compress.c +19 -42
  28. data/ext/zstdruby/libzstd/compress/hist.c +15 -35
  29. data/ext/zstdruby/libzstd/compress/hist.h +12 -32
  30. data/ext/zstdruby/libzstd/compress/huf_compress.c +92 -92
  31. data/ext/zstdruby/libzstd/compress/zstd_compress.c +1460 -1472
  32. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +330 -65
  33. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +158 -0
  34. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +29 -0
  35. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +419 -0
  36. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +54 -0
  37. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +845 -0
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
  39. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +525 -0
  40. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +65 -43
  41. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
  42. data/ext/zstdruby/libzstd/compress/zstd_fast.c +264 -159
  43. data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
  44. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +74 -42
  45. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +2 -2
  46. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +33 -11
  47. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +7 -2
  48. data/ext/zstdruby/libzstd/compress/zstd_opt.c +108 -125
  49. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  50. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +129 -93
  51. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +46 -28
  52. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +76 -60
  53. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +14 -10
  54. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +2 -2
  55. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +471 -258
  56. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +471 -346
  57. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +3 -3
  58. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +25 -4
  59. data/ext/zstdruby/libzstd/deprecated/zbuff.h +9 -8
  60. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
  61. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -1
  62. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
  63. data/ext/zstdruby/libzstd/dictBuilder/cover.c +220 -65
  64. data/ext/zstdruby/libzstd/dictBuilder/cover.h +81 -7
  65. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +85 -56
  66. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +43 -19
  67. data/ext/zstdruby/libzstd/dictBuilder/zdict.h +73 -35
  68. data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
  69. data/ext/zstdruby/libzstd/dll/example/build_package.bat +3 -2
  70. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +49 -15
  71. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +142 -117
  72. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +13 -8
  73. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +54 -25
  74. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +13 -8
  75. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +55 -25
  76. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +13 -8
  77. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +62 -29
  78. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +13 -8
  79. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +145 -109
  80. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +14 -9
  81. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +56 -26
  82. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +11 -6
  83. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +65 -28
  84. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +11 -6
  85. data/ext/zstdruby/libzstd/libzstd.pc.in +3 -2
  86. data/ext/zstdruby/libzstd/zstd.h +921 -597
  87. data/lib/zstd-ruby/version.rb +1 -1
  88. data/zstd-ruby.gemspec +2 -2
  89. metadata +19 -14
  90. data/ext/zstdruby/libzstd/dll/libzstd.def +0 -87
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -15,7 +15,7 @@
15
15
  extern "C" {
16
16
  #endif
17
17
 
18
- #include "mem.h" /* U32 */
18
+ #include "../common/mem.h" /* U32 */
19
19
  #include "zstd_compress_internal.h"
20
20
 
21
21
  void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -83,7 +83,10 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
83
83
  U32* largerPtr = smallerPtr + 1;
84
84
  U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
85
85
  U32 dummy32; /* to be nullified at the end */
86
- U32 const windowLow = ms->window.lowLimit;
86
+ U32 const windowValid = ms->window.lowLimit;
87
+ U32 const maxDistance = 1U << cParams->windowLog;
88
+ U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid;
89
+
87
90
 
88
91
  DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
89
92
  current, dictLimit, windowLow);
@@ -239,7 +242,7 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
239
242
 
240
243
  const BYTE* const base = ms->window.base;
241
244
  U32 const current = (U32)(ip-base);
242
- U32 const windowLow = ms->window.lowLimit;
245
+ U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog);
243
246
 
244
247
  U32* const bt = ms->chainTable;
245
248
  U32 const btLog = cParams->chainLog - 1;
@@ -490,8 +493,12 @@ size_t ZSTD_HcFindBestMatch_generic (
490
493
  const U32 dictLimit = ms->window.dictLimit;
491
494
  const BYTE* const prefixStart = base + dictLimit;
492
495
  const BYTE* const dictEnd = dictBase + dictLimit;
493
- const U32 lowLimit = ms->window.lowLimit;
494
496
  const U32 current = (U32)(ip-base);
497
+ const U32 maxDistance = 1U << cParams->windowLog;
498
+ const U32 lowestValid = ms->window.lowLimit;
499
+ const U32 withinMaxDistance = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
500
+ const U32 isDictionary = (ms->loadedDictEnd != 0);
501
+ const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
495
502
  const U32 minChain = current > chainSize ? current - chainSize : 0;
496
503
  U32 nbAttempts = 1U << cParams->searchLog;
497
504
  size_t ml=4-1;
@@ -612,12 +619,14 @@ FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
612
619
  /* *******************************
613
620
  * Common parser - lazy strategy
614
621
  *********************************/
615
- FORCE_INLINE_TEMPLATE
616
- size_t ZSTD_compressBlock_lazy_generic(
622
+ typedef enum { search_hashChain, search_binaryTree } searchMethod_e;
623
+
624
+ FORCE_INLINE_TEMPLATE size_t
625
+ ZSTD_compressBlock_lazy_generic(
617
626
  ZSTD_matchState_t* ms, seqStore_t* seqStore,
618
627
  U32 rep[ZSTD_REP_NUM],
619
628
  const void* src, size_t srcSize,
620
- const U32 searchMethod, const U32 depth,
629
+ const searchMethod_e searchMethod, const U32 depth,
621
630
  ZSTD_dictMode_e const dictMode)
622
631
  {
623
632
  const BYTE* const istart = (const BYTE*)src;
@@ -633,8 +642,10 @@ size_t ZSTD_compressBlock_lazy_generic(
633
642
  ZSTD_matchState_t* ms,
634
643
  const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
635
644
  searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ?
636
- (searchMethod ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) :
637
- (searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS);
645
+ (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS
646
+ : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) :
647
+ (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_selectMLS
648
+ : ZSTD_HcFindBestMatch_selectMLS);
638
649
  U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
639
650
 
640
651
  const ZSTD_matchState_t* const dms = ms->dictMatchState;
@@ -649,13 +660,16 @@ size_t ZSTD_compressBlock_lazy_generic(
649
660
  const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
650
661
  prefixLowestIndex - (U32)(dictEnd - dictBase) :
651
662
  0;
652
- const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictLowest);
663
+ const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest));
664
+
665
+ DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode);
653
666
 
654
667
  /* init */
655
668
  ip += (dictAndPrefixLength == 0);
656
- ms->nextToUpdate3 = ms->nextToUpdate;
657
669
  if (dictMode == ZSTD_noDict) {
658
- U32 const maxRep = (U32)(ip - prefixLowest);
670
+ U32 const current = (U32)(ip - base);
671
+ U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, ms->cParams.windowLog);
672
+ U32 const maxRep = current - windowLow;
659
673
  if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
660
674
  if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
661
675
  }
@@ -667,6 +681,12 @@ size_t ZSTD_compressBlock_lazy_generic(
667
681
  }
668
682
 
669
683
  /* Match Loop */
684
+ #if defined(__GNUC__) && defined(__x86_64__)
685
+ /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
686
+ * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
687
+ */
688
+ __asm__(".p2align 5");
689
+ #endif
670
690
  while (ip < ilimit) {
671
691
  size_t matchLength=0;
672
692
  size_t offset=0;
@@ -800,7 +820,7 @@ size_t ZSTD_compressBlock_lazy_generic(
800
820
  /* store sequence */
801
821
  _storeSequence:
802
822
  { size_t const litLength = start - anchor;
803
- ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH);
823
+ ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
804
824
  anchor = ip = start + matchLength;
805
825
  }
806
826
 
@@ -818,7 +838,7 @@ _storeSequence:
818
838
  const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
819
839
  matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
820
840
  offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */
821
- ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
841
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
822
842
  ip += matchLength;
823
843
  anchor = ip;
824
844
  continue;
@@ -833,7 +853,7 @@ _storeSequence:
833
853
  /* store sequence */
834
854
  matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
835
855
  offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
836
- ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
856
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
837
857
  ip += matchLength;
838
858
  anchor = ip;
839
859
  continue; /* faster when present ... (?) */
@@ -844,7 +864,7 @@ _storeSequence:
844
864
  rep[1] = offset_2 ? offset_2 : savedOffset;
845
865
 
846
866
  /* Return the last literals size */
847
- return iend - anchor;
867
+ return (size_t)(iend - anchor);
848
868
  }
849
869
 
850
870
 
@@ -852,56 +872,56 @@ size_t ZSTD_compressBlock_btlazy2(
852
872
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
853
873
  void const* src, size_t srcSize)
854
874
  {
855
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 1, 2, ZSTD_noDict);
875
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
856
876
  }
857
877
 
858
878
  size_t ZSTD_compressBlock_lazy2(
859
879
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
860
880
  void const* src, size_t srcSize)
861
881
  {
862
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 2, ZSTD_noDict);
882
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
863
883
  }
864
884
 
865
885
  size_t ZSTD_compressBlock_lazy(
866
886
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
867
887
  void const* src, size_t srcSize)
868
888
  {
869
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 1, ZSTD_noDict);
889
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
870
890
  }
871
891
 
872
892
  size_t ZSTD_compressBlock_greedy(
873
893
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
874
894
  void const* src, size_t srcSize)
875
895
  {
876
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 0, ZSTD_noDict);
896
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
877
897
  }
878
898
 
879
899
  size_t ZSTD_compressBlock_btlazy2_dictMatchState(
880
900
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
881
901
  void const* src, size_t srcSize)
882
902
  {
883
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 1, 2, ZSTD_dictMatchState);
903
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
884
904
  }
885
905
 
886
906
  size_t ZSTD_compressBlock_lazy2_dictMatchState(
887
907
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
888
908
  void const* src, size_t srcSize)
889
909
  {
890
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 2, ZSTD_dictMatchState);
910
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
891
911
  }
892
912
 
893
913
  size_t ZSTD_compressBlock_lazy_dictMatchState(
894
914
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
895
915
  void const* src, size_t srcSize)
896
916
  {
897
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 1, ZSTD_dictMatchState);
917
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
898
918
  }
899
919
 
900
920
  size_t ZSTD_compressBlock_greedy_dictMatchState(
901
921
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
902
922
  void const* src, size_t srcSize)
903
923
  {
904
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 0, ZSTD_dictMatchState);
924
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
905
925
  }
906
926
 
907
927
 
@@ -910,7 +930,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
910
930
  ZSTD_matchState_t* ms, seqStore_t* seqStore,
911
931
  U32 rep[ZSTD_REP_NUM],
912
932
  const void* src, size_t srcSize,
913
- const U32 searchMethod, const U32 depth)
933
+ const searchMethod_e searchMethod, const U32 depth)
914
934
  {
915
935
  const BYTE* const istart = (const BYTE*)src;
916
936
  const BYTE* ip = istart;
@@ -919,24 +939,31 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
919
939
  const BYTE* const ilimit = iend - 8;
920
940
  const BYTE* const base = ms->window.base;
921
941
  const U32 dictLimit = ms->window.dictLimit;
922
- const U32 lowestIndex = ms->window.lowLimit;
923
942
  const BYTE* const prefixStart = base + dictLimit;
924
943
  const BYTE* const dictBase = ms->window.dictBase;
925
944
  const BYTE* const dictEnd = dictBase + dictLimit;
926
- const BYTE* const dictStart = dictBase + lowestIndex;
945
+ const BYTE* const dictStart = dictBase + ms->window.lowLimit;
946
+ const U32 windowLog = ms->cParams.windowLog;
927
947
 
928
948
  typedef size_t (*searchMax_f)(
929
949
  ZSTD_matchState_t* ms,
930
950
  const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
931
- searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
951
+ searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
932
952
 
933
953
  U32 offset_1 = rep[0], offset_2 = rep[1];
934
954
 
955
+ DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic");
956
+
935
957
  /* init */
936
- ms->nextToUpdate3 = ms->nextToUpdate;
937
958
  ip += (ip == prefixStart);
938
959
 
939
960
  /* Match Loop */
961
+ #if defined(__GNUC__) && defined(__x86_64__)
962
+ /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
963
+ * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
964
+ */
965
+ __asm__(".p2align 5");
966
+ #endif
940
967
  while (ip < ilimit) {
941
968
  size_t matchLength=0;
942
969
  size_t offset=0;
@@ -944,10 +971,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
944
971
  U32 current = (U32)(ip-base);
945
972
 
946
973
  /* check repCode */
947
- { const U32 repIndex = (U32)(current+1 - offset_1);
974
+ { const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current+1, windowLog);
975
+ const U32 repIndex = (U32)(current+1 - offset_1);
948
976
  const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
949
977
  const BYTE* const repMatch = repBase + repIndex;
950
- if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
978
+ if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
951
979
  if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
952
980
  /* repcode detected we should take it */
953
981
  const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@@ -974,10 +1002,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
974
1002
  current++;
975
1003
  /* check repCode */
976
1004
  if (offset) {
1005
+ const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog);
977
1006
  const U32 repIndex = (U32)(current - offset_1);
978
1007
  const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
979
1008
  const BYTE* const repMatch = repBase + repIndex;
980
- if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
1009
+ if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
981
1010
  if (MEM_read32(ip) == MEM_read32(repMatch)) {
982
1011
  /* repcode detected */
983
1012
  const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@@ -1004,10 +1033,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
1004
1033
  current++;
1005
1034
  /* check repCode */
1006
1035
  if (offset) {
1036
+ const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog);
1007
1037
  const U32 repIndex = (U32)(current - offset_1);
1008
1038
  const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1009
1039
  const BYTE* const repMatch = repBase + repIndex;
1010
- if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
1040
+ if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
1011
1041
  if (MEM_read32(ip) == MEM_read32(repMatch)) {
1012
1042
  /* repcode detected */
1013
1043
  const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@@ -1042,22 +1072,24 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
1042
1072
  /* store sequence */
1043
1073
  _storeSequence:
1044
1074
  { size_t const litLength = start - anchor;
1045
- ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH);
1075
+ ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
1046
1076
  anchor = ip = start + matchLength;
1047
1077
  }
1048
1078
 
1049
1079
  /* check immediate repcode */
1050
1080
  while (ip <= ilimit) {
1051
- const U32 repIndex = (U32)((ip-base) - offset_2);
1081
+ const U32 repCurrent = (U32)(ip-base);
1082
+ const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog);
1083
+ const U32 repIndex = repCurrent - offset_2;
1052
1084
  const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1053
1085
  const BYTE* const repMatch = repBase + repIndex;
1054
- if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
1086
+ if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
1055
1087
  if (MEM_read32(ip) == MEM_read32(repMatch)) {
1056
1088
  /* repcode detected we should take it */
1057
1089
  const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
1058
1090
  matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
1059
1091
  offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */
1060
- ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
1092
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
1061
1093
  ip += matchLength;
1062
1094
  anchor = ip;
1063
1095
  continue; /* faster when present ... (?) */
@@ -1070,7 +1102,7 @@ _storeSequence:
1070
1102
  rep[1] = offset_2;
1071
1103
 
1072
1104
  /* Return the last literals size */
1073
- return iend - anchor;
1105
+ return (size_t)(iend - anchor);
1074
1106
  }
1075
1107
 
1076
1108
 
@@ -1078,7 +1110,7 @@ size_t ZSTD_compressBlock_greedy_extDict(
1078
1110
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1079
1111
  void const* src, size_t srcSize)
1080
1112
  {
1081
- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 0, 0);
1113
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0);
1082
1114
  }
1083
1115
 
1084
1116
  size_t ZSTD_compressBlock_lazy_extDict(
@@ -1086,7 +1118,7 @@ size_t ZSTD_compressBlock_lazy_extDict(
1086
1118
  void const* src, size_t srcSize)
1087
1119
 
1088
1120
  {
1089
- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 0, 1);
1121
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
1090
1122
  }
1091
1123
 
1092
1124
  size_t ZSTD_compressBlock_lazy2_extDict(
@@ -1094,7 +1126,7 @@ size_t ZSTD_compressBlock_lazy2_extDict(
1094
1126
  void const* src, size_t srcSize)
1095
1127
 
1096
1128
  {
1097
- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 0, 2);
1129
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
1098
1130
  }
1099
1131
 
1100
1132
  size_t ZSTD_compressBlock_btlazy2_extDict(
@@ -1102,5 +1134,5 @@ size_t ZSTD_compressBlock_btlazy2_extDict(
1102
1134
  void const* src, size_t srcSize)
1103
1135
 
1104
1136
  {
1105
- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 1, 2);
1137
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
1106
1138
  }
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -19,7 +19,7 @@ extern "C" {
19
19
 
20
20
  U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
21
21
 
22
- void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). pre-emptively increase value of ZSTD_DUBT_UNSORTED_MARK */
22
+ void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */
23
23
 
24
24
  size_t ZSTD_compressBlock_btlazy2(
25
25
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@@ -1,15 +1,16 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
6
6
  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
7
  * in the COPYING file in the root directory of this source tree).
8
+ * You may select, at your option, one of the above-listed licenses.
8
9
  */
9
10
 
10
11
  #include "zstd_ldm.h"
11
12
 
12
- #include "debug.h"
13
+ #include "../common/debug.h"
13
14
  #include "zstd_fast.h" /* ZSTD_fillHashTable() */
14
15
  #include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */
15
16
 
@@ -49,9 +50,9 @@ size_t ZSTD_ldm_getTableSize(ldmParams_t params)
49
50
  {
50
51
  size_t const ldmHSize = ((size_t)1) << params.hashLog;
51
52
  size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog);
52
- size_t const ldmBucketSize =
53
- ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
54
- size_t const totalSize = ldmBucketSize + ldmHSize * sizeof(ldmEntry_t);
53
+ size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
54
+ size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize)
55
+ + ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t));
55
56
  return params.enableLdm ? totalSize : 0;
56
57
  }
57
58
 
@@ -223,6 +224,20 @@ static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
223
224
  return rollingHash;
224
225
  }
225
226
 
227
+ void ZSTD_ldm_fillHashTable(
228
+ ldmState_t* state, const BYTE* ip,
229
+ const BYTE* iend, ldmParams_t const* params)
230
+ {
231
+ DEBUGLOG(5, "ZSTD_ldm_fillHashTable");
232
+ if ((size_t)(iend - ip) >= params->minMatchLength) {
233
+ U64 startingHash = ZSTD_rollingHash_compute(ip, params->minMatchLength);
234
+ ZSTD_ldm_fillLdmHashTable(
235
+ state, startingHash, ip, iend - params->minMatchLength, state->window.base,
236
+ params->hashLog - params->bucketSizeLog,
237
+ *params);
238
+ }
239
+ }
240
+
226
241
 
227
242
  /** ZSTD_ldm_limitTableUpdate() :
228
243
  *
@@ -429,7 +444,7 @@ size_t ZSTD_ldm_generateSequences(
429
444
  */
430
445
  assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize);
431
446
  /* The input could be very large (in zstdmt), so it must be broken up into
432
- * chunks to enforce the maximmum distance and handle overflow correction.
447
+ * chunks to enforce the maximum distance and handle overflow correction.
433
448
  */
434
449
  assert(sequences->pos <= sequences->size);
435
450
  assert(sequences->size <= sequences->capacity);
@@ -447,8 +462,10 @@ size_t ZSTD_ldm_generateSequences(
447
462
  if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
448
463
  U32 const ldmHSize = 1U << params->hashLog;
449
464
  U32 const correction = ZSTD_window_correctOverflow(
450
- &ldmState->window, /* cycleLog */ 0, maxDist, src);
465
+ &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
451
466
  ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
467
+ /* invalidate dictionaries on overflow correction */
468
+ ldmState->loadedDictEnd = 0;
452
469
  }
453
470
  /* 2. We enforce the maximum offset allowed.
454
471
  *
@@ -457,8 +474,14 @@ size_t ZSTD_ldm_generateSequences(
457
474
  * TODO: * Test the chunk size.
458
475
  * * Try invalidation after the sequence generation and test the
459
476
  * the offset against maxDist directly.
477
+ *
478
+ * NOTE: Because of dictionaries + sequence splitting we MUST make sure
479
+ * that any offset used is valid at the END of the sequence, since it may
480
+ * be split into two sequences. This condition holds when using
481
+ * ZSTD_window_enforceMaxDist(), but if we move to checking offsets
482
+ * against maxDist directly, we'll have to carefully handle that case.
460
483
  */
461
- ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL, NULL);
484
+ ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL);
462
485
  /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
463
486
  newLeftoverSize = ZSTD_ldm_generateSequences_internal(
464
487
  ldmState, sequences, params, chunkStart, chunkSize);
@@ -566,14 +589,13 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
566
589
  if (sequence.offset == 0)
567
590
  break;
568
591
 
569
- assert(sequence.offset <= (1U << cParams->windowLog));
570
592
  assert(ip + sequence.litLength + sequence.matchLength <= iend);
571
593
 
572
594
  /* Fill tables for block compressor */
573
595
  ZSTD_ldm_limitTableUpdate(ms, ip);
574
596
  ZSTD_ldm_fillFastTables(ms, ip);
575
597
  /* Run the block compressor */
576
- DEBUGLOG(5, "calling block compressor on segment of size %u", sequence.litLength);
598
+ DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength);
577
599
  {
578
600
  size_t const newLitLength =
579
601
  blockCompressor(ms, seqStore, rep, ip, sequence.litLength);
@@ -583,7 +605,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
583
605
  rep[i] = rep[i-1];
584
606
  rep[0] = sequence.offset;
585
607
  /* Store the sequence */
586
- ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength,
608
+ ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
587
609
  sequence.offset + ZSTD_REP_MOVE,
588
610
  sequence.matchLength - MINMATCH);
589
611
  ip += sequence.matchLength;