zstd-ruby 1.4.0.0 → 1.4.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (95) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +35 -0
  3. data/README.md +2 -2
  4. data/ext/zstdruby/libzstd/Makefile +274 -107
  5. data/ext/zstdruby/libzstd/README.md +75 -16
  6. data/ext/zstdruby/libzstd/common/bitstream.h +59 -51
  7. data/ext/zstdruby/libzstd/common/compiler.h +154 -5
  8. data/ext/zstdruby/libzstd/common/cpu.h +1 -3
  9. data/ext/zstdruby/libzstd/common/debug.c +11 -31
  10. data/ext/zstdruby/libzstd/common/debug.h +22 -49
  11. data/ext/zstdruby/libzstd/common/entropy_common.c +201 -75
  12. data/ext/zstdruby/libzstd/common/error_private.c +3 -1
  13. data/ext/zstdruby/libzstd/common/error_private.h +7 -3
  14. data/ext/zstdruby/libzstd/common/fse.h +50 -42
  15. data/ext/zstdruby/libzstd/common/fse_decompress.c +134 -50
  16. data/ext/zstdruby/libzstd/common/huf.h +41 -38
  17. data/ext/zstdruby/libzstd/common/mem.h +68 -22
  18. data/ext/zstdruby/libzstd/common/pool.c +30 -20
  19. data/ext/zstdruby/libzstd/common/pool.h +3 -3
  20. data/ext/zstdruby/libzstd/common/threading.c +51 -4
  21. data/ext/zstdruby/libzstd/common/threading.h +36 -4
  22. data/ext/zstdruby/libzstd/common/xxhash.c +39 -89
  23. data/ext/zstdruby/libzstd/common/xxhash.h +12 -32
  24. data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
  25. data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
  26. data/ext/zstdruby/libzstd/common/zstd_errors.h +3 -1
  27. data/ext/zstdruby/libzstd/common/zstd_internal.h +231 -72
  28. data/ext/zstdruby/libzstd/common/zstd_trace.c +42 -0
  29. data/ext/zstdruby/libzstd/common/zstd_trace.h +152 -0
  30. data/ext/zstdruby/libzstd/compress/fse_compress.c +47 -63
  31. data/ext/zstdruby/libzstd/compress/hist.c +41 -63
  32. data/ext/zstdruby/libzstd/compress/hist.h +13 -33
  33. data/ext/zstdruby/libzstd/compress/huf_compress.c +288 -172
  34. data/ext/zstdruby/libzstd/compress/zstd_compress.c +2504 -1626
  35. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +446 -85
  36. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +158 -0
  37. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +29 -0
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +433 -0
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +54 -0
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +849 -0
  41. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
  42. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +561 -0
  43. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +82 -60
  44. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
  45. data/ext/zstdruby/libzstd/compress/zstd_fast.c +106 -80
  46. data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
  47. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +411 -105
  48. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +21 -1
  49. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +296 -207
  50. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +14 -3
  51. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +103 -0
  52. data/ext/zstdruby/libzstd/compress/zstd_opt.c +260 -148
  53. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  54. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +153 -440
  55. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +29 -110
  56. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +356 -238
  57. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +20 -16
  58. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
  59. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +641 -238
  60. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +600 -371
  61. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +8 -5
  62. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +40 -9
  63. data/ext/zstdruby/libzstd/deprecated/zbuff.h +9 -8
  64. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
  65. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -1
  66. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
  67. data/ext/zstdruby/libzstd/dictBuilder/cover.c +197 -78
  68. data/ext/zstdruby/libzstd/dictBuilder/cover.h +52 -7
  69. data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
  70. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +84 -66
  71. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +58 -36
  72. data/ext/zstdruby/libzstd/dictBuilder/zdict.h +60 -31
  73. data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
  74. data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
  75. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +8 -4
  76. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +115 -111
  77. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
  78. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +28 -14
  79. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
  80. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +28 -14
  81. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
  82. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +36 -19
  83. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
  84. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +122 -107
  85. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +2 -2
  86. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +29 -23
  87. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
  88. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +34 -24
  89. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
  90. data/ext/zstdruby/libzstd/libzstd.pc.in +2 -1
  91. data/ext/zstdruby/libzstd/zstd.h +655 -118
  92. data/lib/zstd-ruby/version.rb +1 -1
  93. data/zstd-ruby.gemspec +1 -1
  94. metadata +20 -10
  95. data/.travis.yml +0 -14
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -17,8 +17,18 @@ extern "C" {
17
17
 
18
18
  #include "zstd_compress_internal.h"
19
19
 
20
+ /**
21
+ * Dedicated Dictionary Search Structure bucket log. In the
22
+ * ZSTD_dedicatedDictSearch mode, the hashTable has
23
+ * 2 ** ZSTD_LAZY_DDSS_BUCKET_LOG entries in each bucket, rather than just
24
+ * one.
25
+ */
26
+ #define ZSTD_LAZY_DDSS_BUCKET_LOG 2
27
+
20
28
  U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
21
29
 
30
+ void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip);
31
+
22
32
  void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */
23
33
 
24
34
  size_t ZSTD_compressBlock_btlazy2(
@@ -47,6 +57,16 @@ size_t ZSTD_compressBlock_greedy_dictMatchState(
47
57
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
48
58
  void const* src, size_t srcSize);
49
59
 
60
+ size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
61
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
62
+ void const* src, size_t srcSize);
63
+ size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
64
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
65
+ void const* src, size_t srcSize);
66
+ size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
67
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
68
+ void const* src, size_t srcSize);
69
+
50
70
  size_t ZSTD_compressBlock_greedy_extDict(
51
71
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
52
72
  void const* src, size_t srcSize);
@@ -1,22 +1,109 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
6
6
  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
7
  * in the COPYING file in the root directory of this source tree).
8
+ * You may select, at your option, one of the above-listed licenses.
8
9
  */
9
10
 
10
11
  #include "zstd_ldm.h"
11
12
 
12
- #include "debug.h"
13
+ #include "../common/debug.h"
14
+ #include "../common/xxhash.h"
13
15
  #include "zstd_fast.h" /* ZSTD_fillHashTable() */
14
16
  #include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */
17
+ #include "zstd_ldm_geartab.h"
15
18
 
16
19
  #define LDM_BUCKET_SIZE_LOG 3
17
20
  #define LDM_MIN_MATCH_LENGTH 64
18
21
  #define LDM_HASH_RLOG 7
19
- #define LDM_HASH_CHAR_OFFSET 10
22
+
23
+ typedef struct {
24
+ U64 rolling;
25
+ U64 stopMask;
26
+ } ldmRollingHashState_t;
27
+
28
+ /** ZSTD_ldm_gear_init():
29
+ *
30
+ * Initializes the rolling hash state such that it will honor the
31
+ * settings in params. */
32
+ static void ZSTD_ldm_gear_init(ldmRollingHashState_t* state, ldmParams_t const* params)
33
+ {
34
+ unsigned maxBitsInMask = MIN(params->minMatchLength, 64);
35
+ unsigned hashRateLog = params->hashRateLog;
36
+
37
+ state->rolling = ~(U32)0;
38
+
39
+ /* The choice of the splitting criterion is subject to two conditions:
40
+ * 1. it has to trigger on average every 2^(hashRateLog) bytes;
41
+ * 2. ideally, it has to depend on a window of minMatchLength bytes.
42
+ *
43
+ * In the gear hash algorithm, bit n depends on the last n bytes;
44
+ * so in order to obtain a good quality splitting criterion it is
45
+ * preferable to use bits with high weight.
46
+ *
47
+ * To match condition 1 we use a mask with hashRateLog bits set
48
+ * and, because of the previous remark, we make sure these bits
49
+ * have the highest possible weight while still respecting
50
+ * condition 2.
51
+ */
52
+ if (hashRateLog > 0 && hashRateLog <= maxBitsInMask) {
53
+ state->stopMask = (((U64)1 << hashRateLog) - 1) << (maxBitsInMask - hashRateLog);
54
+ } else {
55
+ /* In this degenerate case we simply honor the hash rate. */
56
+ state->stopMask = ((U64)1 << hashRateLog) - 1;
57
+ }
58
+ }
59
+
60
+ /** ZSTD_ldm_gear_feed():
61
+ *
62
+ * Registers in the splits array all the split points found in the first
63
+ * size bytes following the data pointer. This function terminates when
64
+ * either all the data has been processed or LDM_BATCH_SIZE splits are
65
+ * present in the splits array.
66
+ *
67
+ * Precondition: The splits array must not be full.
68
+ * Returns: The number of bytes processed. */
69
+ static size_t ZSTD_ldm_gear_feed(ldmRollingHashState_t* state,
70
+ BYTE const* data, size_t size,
71
+ size_t* splits, unsigned* numSplits)
72
+ {
73
+ size_t n;
74
+ U64 hash, mask;
75
+
76
+ hash = state->rolling;
77
+ mask = state->stopMask;
78
+ n = 0;
79
+
80
+ #define GEAR_ITER_ONCE() do { \
81
+ hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \
82
+ n += 1; \
83
+ if (UNLIKELY((hash & mask) == 0)) { \
84
+ splits[*numSplits] = n; \
85
+ *numSplits += 1; \
86
+ if (*numSplits == LDM_BATCH_SIZE) \
87
+ goto done; \
88
+ } \
89
+ } while (0)
90
+
91
+ while (n + 3 < size) {
92
+ GEAR_ITER_ONCE();
93
+ GEAR_ITER_ONCE();
94
+ GEAR_ITER_ONCE();
95
+ GEAR_ITER_ONCE();
96
+ }
97
+ while (n < size) {
98
+ GEAR_ITER_ONCE();
99
+ }
100
+
101
+ #undef GEAR_ITER_ONCE
102
+
103
+ done:
104
+ state->rolling = hash;
105
+ return n;
106
+ }
20
107
 
21
108
  void ZSTD_ldm_adjustParameters(ldmParams_t* params,
22
109
  ZSTD_compressionParameters const* cParams)
@@ -26,13 +113,6 @@ void ZSTD_ldm_adjustParameters(ldmParams_t* params,
26
113
  DEBUGLOG(4, "ZSTD_ldm_adjustParameters");
27
114
  if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
28
115
  if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH;
29
- if (cParams->strategy >= ZSTD_btopt) {
30
- /* Get out of the way of the optimal parser */
31
- U32 const minMatch = MAX(cParams->targetLength, params->minMatchLength);
32
- assert(minMatch >= ZSTD_LDM_MINMATCH_MIN);
33
- assert(minMatch <= ZSTD_LDM_MINMATCH_MAX);
34
- params->minMatchLength = minMatch;
35
- }
36
116
  if (params->hashLog == 0) {
37
117
  params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG);
38
118
  assert(params->hashLog <= ZSTD_HASHLOG_MAX);
@@ -49,9 +129,9 @@ size_t ZSTD_ldm_getTableSize(ldmParams_t params)
49
129
  {
50
130
  size_t const ldmHSize = ((size_t)1) << params.hashLog;
51
131
  size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog);
52
- size_t const ldmBucketSize =
53
- ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
54
- size_t const totalSize = ldmBucketSize + ldmHSize * sizeof(ldmEntry_t);
132
+ size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
133
+ size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize)
134
+ + ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t));
55
135
  return params.enableLdm ? totalSize : 0;
56
136
  }
57
137
 
@@ -60,41 +140,6 @@ size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize)
60
140
  return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0;
61
141
  }
62
142
 
63
- /** ZSTD_ldm_getSmallHash() :
64
- * numBits should be <= 32
65
- * If numBits==0, returns 0.
66
- * @return : the most significant numBits of value. */
67
- static U32 ZSTD_ldm_getSmallHash(U64 value, U32 numBits)
68
- {
69
- assert(numBits <= 32);
70
- return numBits == 0 ? 0 : (U32)(value >> (64 - numBits));
71
- }
72
-
73
- /** ZSTD_ldm_getChecksum() :
74
- * numBitsToDiscard should be <= 32
75
- * @return : the next most significant 32 bits after numBitsToDiscard */
76
- static U32 ZSTD_ldm_getChecksum(U64 hash, U32 numBitsToDiscard)
77
- {
78
- assert(numBitsToDiscard <= 32);
79
- return (hash >> (64 - 32 - numBitsToDiscard)) & 0xFFFFFFFF;
80
- }
81
-
82
- /** ZSTD_ldm_getTag() ;
83
- * Given the hash, returns the most significant numTagBits bits
84
- * after (32 + hbits) bits.
85
- *
86
- * If there are not enough bits remaining, return the last
87
- * numTagBits bits. */
88
- static U32 ZSTD_ldm_getTag(U64 hash, U32 hbits, U32 numTagBits)
89
- {
90
- assert(numTagBits < 32 && hbits <= 32);
91
- if (32 - hbits < numTagBits) {
92
- return hash & (((U32)1 << numTagBits) - 1);
93
- } else {
94
- return (hash >> (32 - hbits - numTagBits)) & (((U32)1 << numTagBits) - 1);
95
- }
96
- }
97
-
98
143
  /** ZSTD_ldm_getBucket() :
99
144
  * Returns a pointer to the start of the bucket associated with hash. */
100
145
  static ldmEntry_t* ZSTD_ldm_getBucket(
@@ -109,38 +154,12 @@ static void ZSTD_ldm_insertEntry(ldmState_t* ldmState,
109
154
  size_t const hash, const ldmEntry_t entry,
110
155
  ldmParams_t const ldmParams)
111
156
  {
112
- BYTE* const bucketOffsets = ldmState->bucketOffsets;
113
- *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + bucketOffsets[hash]) = entry;
114
- bucketOffsets[hash]++;
115
- bucketOffsets[hash] &= ((U32)1 << ldmParams.bucketSizeLog) - 1;
116
- }
157
+ BYTE* const pOffset = ldmState->bucketOffsets + hash;
158
+ unsigned const offset = *pOffset;
159
+
160
+ *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + offset) = entry;
161
+ *pOffset = (BYTE)((offset + 1) & ((1u << ldmParams.bucketSizeLog) - 1));
117
162
 
118
- /** ZSTD_ldm_makeEntryAndInsertByTag() :
119
- *
120
- * Gets the small hash, checksum, and tag from the rollingHash.
121
- *
122
- * If the tag matches (1 << ldmParams.hashRateLog)-1, then
123
- * creates an ldmEntry from the offset, and inserts it into the hash table.
124
- *
125
- * hBits is the length of the small hash, which is the most significant hBits
126
- * of rollingHash. The checksum is the next 32 most significant bits, followed
127
- * by ldmParams.hashRateLog bits that make up the tag. */
128
- static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
129
- U64 const rollingHash,
130
- U32 const hBits,
131
- U32 const offset,
132
- ldmParams_t const ldmParams)
133
- {
134
- U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashRateLog);
135
- U32 const tagMask = ((U32)1 << ldmParams.hashRateLog) - 1;
136
- if (tag == tagMask) {
137
- U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits);
138
- U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
139
- ldmEntry_t entry;
140
- entry.offset = offset;
141
- entry.checksum = checksum;
142
- ZSTD_ldm_insertEntry(ldmState, hash, entry, ldmParams);
143
- }
144
163
  }
145
164
 
146
165
  /** ZSTD_ldm_countBackwardsMatch() :
@@ -149,10 +168,10 @@ static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
149
168
  * We count only bytes where pMatch >= pBase and pIn >= pAnchor. */
150
169
  static size_t ZSTD_ldm_countBackwardsMatch(
151
170
  const BYTE* pIn, const BYTE* pAnchor,
152
- const BYTE* pMatch, const BYTE* pBase)
171
+ const BYTE* pMatch, const BYTE* pMatchBase)
153
172
  {
154
173
  size_t matchLength = 0;
155
- while (pIn > pAnchor && pMatch > pBase && pIn[-1] == pMatch[-1]) {
174
+ while (pIn > pAnchor && pMatch > pMatchBase && pIn[-1] == pMatch[-1]) {
156
175
  pIn--;
157
176
  pMatch--;
158
177
  matchLength++;
@@ -160,6 +179,27 @@ static size_t ZSTD_ldm_countBackwardsMatch(
160
179
  return matchLength;
161
180
  }
162
181
 
182
+ /** ZSTD_ldm_countBackwardsMatch_2segments() :
183
+ * Returns the number of bytes that match backwards from pMatch,
184
+ * even with the backwards match spanning 2 different segments.
185
+ *
186
+ * On reaching `pMatchBase`, start counting from mEnd */
187
+ static size_t ZSTD_ldm_countBackwardsMatch_2segments(
188
+ const BYTE* pIn, const BYTE* pAnchor,
189
+ const BYTE* pMatch, const BYTE* pMatchBase,
190
+ const BYTE* pExtDictStart, const BYTE* pExtDictEnd)
191
+ {
192
+ size_t matchLength = ZSTD_ldm_countBackwardsMatch(pIn, pAnchor, pMatch, pMatchBase);
193
+ if (pMatch - matchLength != pMatchBase || pMatchBase == pExtDictStart) {
194
+ /* If backwards match is entirely in the extDict or prefix, immediately return */
195
+ return matchLength;
196
+ }
197
+ DEBUGLOG(7, "ZSTD_ldm_countBackwardsMatch_2segments: found 2-parts backwards match (length in prefix==%zu)", matchLength);
198
+ matchLength += ZSTD_ldm_countBackwardsMatch(pIn - matchLength, pAnchor, pExtDictEnd, pExtDictStart);
199
+ DEBUGLOG(7, "final backwards match length = %zu", matchLength);
200
+ return matchLength;
201
+ }
202
+
163
203
  /** ZSTD_ldm_fillFastTables() :
164
204
  *
165
205
  * Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies.
@@ -197,30 +237,43 @@ static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
197
237
  return 0;
198
238
  }
199
239
 
200
- /** ZSTD_ldm_fillLdmHashTable() :
201
- *
202
- * Fills hashTable from (lastHashed + 1) to iend (non-inclusive).
203
- * lastHash is the rolling hash that corresponds to lastHashed.
204
- *
205
- * Returns the rolling hash corresponding to position iend-1. */
206
- static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
207
- U64 lastHash, const BYTE* lastHashed,
208
- const BYTE* iend, const BYTE* base,
209
- U32 hBits, ldmParams_t const ldmParams)
240
+ void ZSTD_ldm_fillHashTable(
241
+ ldmState_t* ldmState, const BYTE* ip,
242
+ const BYTE* iend, ldmParams_t const* params)
210
243
  {
211
- U64 rollingHash = lastHash;
212
- const BYTE* cur = lastHashed + 1;
213
-
214
- while (cur < iend) {
215
- rollingHash = ZSTD_rollingHash_rotate(rollingHash, cur[-1],
216
- cur[ldmParams.minMatchLength-1],
217
- state->hashPower);
218
- ZSTD_ldm_makeEntryAndInsertByTag(state,
219
- rollingHash, hBits,
220
- (U32)(cur - base), ldmParams);
221
- ++cur;
244
+ U32 const minMatchLength = params->minMatchLength;
245
+ U32 const hBits = params->hashLog - params->bucketSizeLog;
246
+ BYTE const* const base = ldmState->window.base;
247
+ BYTE const* const istart = ip;
248
+ ldmRollingHashState_t hashState;
249
+ size_t* const splits = ldmState->splitIndices;
250
+ unsigned numSplits;
251
+
252
+ DEBUGLOG(5, "ZSTD_ldm_fillHashTable");
253
+
254
+ ZSTD_ldm_gear_init(&hashState, params);
255
+ while (ip < iend) {
256
+ size_t hashed;
257
+ unsigned n;
258
+
259
+ numSplits = 0;
260
+ hashed = ZSTD_ldm_gear_feed(&hashState, ip, iend - ip, splits, &numSplits);
261
+
262
+ for (n = 0; n < numSplits; n++) {
263
+ if (ip + splits[n] >= istart + minMatchLength) {
264
+ BYTE const* const split = ip + splits[n] - minMatchLength;
265
+ U64 const xxhash = XXH64(split, minMatchLength, 0);
266
+ U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1));
267
+ ldmEntry_t entry;
268
+
269
+ entry.offset = (U32)(split - base);
270
+ entry.checksum = (U32)(xxhash >> 32);
271
+ ZSTD_ldm_insertEntry(ldmState, hash, entry, *params);
272
+ }
273
+ }
274
+
275
+ ip += hashed;
222
276
  }
223
- return rollingHash;
224
277
  }
225
278
 
226
279
 
@@ -231,10 +284,10 @@ static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
231
284
  * (after a long match, only update tables a limited amount). */
232
285
  static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor)
233
286
  {
234
- U32 const current = (U32)(anchor - ms->window.base);
235
- if (current > ms->nextToUpdate + 1024) {
287
+ U32 const curr = (U32)(anchor - ms->window.base);
288
+ if (curr > ms->nextToUpdate + 1024) {
236
289
  ms->nextToUpdate =
237
- current - MIN(512, current - ms->nextToUpdate - 1024);
290
+ curr - MIN(512, curr - ms->nextToUpdate - 1024);
238
291
  }
239
292
  }
240
293
 
@@ -245,11 +298,8 @@ static size_t ZSTD_ldm_generateSequences_internal(
245
298
  /* LDM parameters */
246
299
  int const extDict = ZSTD_window_hasExtDict(ldmState->window);
247
300
  U32 const minMatchLength = params->minMatchLength;
248
- U64 const hashPower = ldmState->hashPower;
301
+ U32 const entsPerBucket = 1U << params->bucketSizeLog;
249
302
  U32 const hBits = params->hashLog - params->bucketSizeLog;
250
- U32 const ldmBucketSize = 1U << params->bucketSizeLog;
251
- U32 const hashRateLog = params->hashRateLog;
252
- U32 const ldmTagMask = (1U << params->hashRateLog) - 1;
253
303
  /* Prefix and extDict parameters */
254
304
  U32 const dictLimit = ldmState->window.dictLimit;
255
305
  U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit;
@@ -261,45 +311,76 @@ static size_t ZSTD_ldm_generateSequences_internal(
261
311
  /* Input bounds */
262
312
  BYTE const* const istart = (BYTE const*)src;
263
313
  BYTE const* const iend = istart + srcSize;
264
- BYTE const* const ilimit = iend - MAX(minMatchLength, HASH_READ_SIZE);
314
+ BYTE const* const ilimit = iend - HASH_READ_SIZE;
265
315
  /* Input positions */
266
316
  BYTE const* anchor = istart;
267
317
  BYTE const* ip = istart;
268
- /* Rolling hash */
269
- BYTE const* lastHashed = NULL;
270
- U64 rollingHash = 0;
271
-
272
- while (ip <= ilimit) {
273
- size_t mLength;
274
- U32 const current = (U32)(ip - base);
275
- size_t forwardMatchLength = 0, backwardMatchLength = 0;
276
- ldmEntry_t* bestEntry = NULL;
277
- if (ip != istart) {
278
- rollingHash = ZSTD_rollingHash_rotate(rollingHash, lastHashed[0],
279
- lastHashed[minMatchLength],
280
- hashPower);
281
- } else {
282
- rollingHash = ZSTD_rollingHash_compute(ip, minMatchLength);
318
+ /* Rolling hash state */
319
+ ldmRollingHashState_t hashState;
320
+ /* Arrays for staged-processing */
321
+ size_t* const splits = ldmState->splitIndices;
322
+ ldmMatchCandidate_t* const candidates = ldmState->matchCandidates;
323
+ unsigned numSplits;
324
+
325
+ if (srcSize < minMatchLength)
326
+ return iend - anchor;
327
+
328
+ /* Initialize the rolling hash state with the first minMatchLength bytes */
329
+ ZSTD_ldm_gear_init(&hashState, params);
330
+ {
331
+ size_t n = 0;
332
+
333
+ while (n < minMatchLength) {
334
+ numSplits = 0;
335
+ n += ZSTD_ldm_gear_feed(&hashState, ip + n, minMatchLength - n,
336
+ splits, &numSplits);
283
337
  }
284
- lastHashed = ip;
338
+ ip += minMatchLength;
339
+ }
340
+
341
+ while (ip < ilimit) {
342
+ size_t hashed;
343
+ unsigned n;
344
+
345
+ numSplits = 0;
346
+ hashed = ZSTD_ldm_gear_feed(&hashState, ip, ilimit - ip,
347
+ splits, &numSplits);
348
+
349
+ for (n = 0; n < numSplits; n++) {
350
+ BYTE const* const split = ip + splits[n] - minMatchLength;
351
+ U64 const xxhash = XXH64(split, minMatchLength, 0);
352
+ U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1));
285
353
 
286
- /* Do not insert and do not look for a match */
287
- if (ZSTD_ldm_getTag(rollingHash, hBits, hashRateLog) != ldmTagMask) {
288
- ip++;
289
- continue;
354
+ candidates[n].split = split;
355
+ candidates[n].hash = hash;
356
+ candidates[n].checksum = (U32)(xxhash >> 32);
357
+ candidates[n].bucket = ZSTD_ldm_getBucket(ldmState, hash, *params);
358
+ PREFETCH_L1(candidates[n].bucket);
290
359
  }
291
360
 
292
- /* Get the best entry and compute the match lengths */
293
- {
294
- ldmEntry_t* const bucket =
295
- ZSTD_ldm_getBucket(ldmState,
296
- ZSTD_ldm_getSmallHash(rollingHash, hBits),
297
- *params);
298
- ldmEntry_t* cur;
299
- size_t bestMatchLength = 0;
300
- U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
301
-
302
- for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) {
361
+ for (n = 0; n < numSplits; n++) {
362
+ size_t forwardMatchLength = 0, backwardMatchLength = 0,
363
+ bestMatchLength = 0, mLength;
364
+ BYTE const* const split = candidates[n].split;
365
+ U32 const checksum = candidates[n].checksum;
366
+ U32 const hash = candidates[n].hash;
367
+ ldmEntry_t* const bucket = candidates[n].bucket;
368
+ ldmEntry_t const* cur;
369
+ ldmEntry_t const* bestEntry = NULL;
370
+ ldmEntry_t newEntry;
371
+
372
+ newEntry.offset = (U32)(split - base);
373
+ newEntry.checksum = checksum;
374
+
375
+ /* If a split point would generate a sequence overlapping with
376
+ * the previous one, we merely register it in the hash table and
377
+ * move on */
378
+ if (split < anchor) {
379
+ ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
380
+ continue;
381
+ }
382
+
383
+ for (cur = bucket; cur < bucket + entsPerBucket; cur++) {
303
384
  size_t curForwardMatchLength, curBackwardMatchLength,
304
385
  curTotalMatchLength;
305
386
  if (cur->checksum != checksum || cur->offset <= lowestIndex) {
@@ -313,30 +394,23 @@ static size_t ZSTD_ldm_generateSequences_internal(
313
394
  cur->offset < dictLimit ? dictEnd : iend;
314
395
  BYTE const* const lowMatchPtr =
315
396
  cur->offset < dictLimit ? dictStart : lowPrefixPtr;
316
-
317
- curForwardMatchLength = ZSTD_count_2segments(
318
- ip, pMatch, iend,
319
- matchEnd, lowPrefixPtr);
397
+ curForwardMatchLength =
398
+ ZSTD_count_2segments(split, pMatch, iend, matchEnd, lowPrefixPtr);
320
399
  if (curForwardMatchLength < minMatchLength) {
321
400
  continue;
322
401
  }
323
- curBackwardMatchLength =
324
- ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,
325
- lowMatchPtr);
326
- curTotalMatchLength = curForwardMatchLength +
327
- curBackwardMatchLength;
402
+ curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch_2segments(
403
+ split, anchor, pMatch, lowMatchPtr, dictStart, dictEnd);
328
404
  } else { /* !extDict */
329
405
  BYTE const* const pMatch = base + cur->offset;
330
- curForwardMatchLength = ZSTD_count(ip, pMatch, iend);
406
+ curForwardMatchLength = ZSTD_count(split, pMatch, iend);
331
407
  if (curForwardMatchLength < minMatchLength) {
332
408
  continue;
333
409
  }
334
410
  curBackwardMatchLength =
335
- ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,
336
- lowPrefixPtr);
337
- curTotalMatchLength = curForwardMatchLength +
338
- curBackwardMatchLength;
411
+ ZSTD_ldm_countBackwardsMatch(split, anchor, pMatch, lowPrefixPtr);
339
412
  }
413
+ curTotalMatchLength = curForwardMatchLength + curBackwardMatchLength;
340
414
 
341
415
  if (curTotalMatchLength > bestMatchLength) {
342
416
  bestMatchLength = curTotalMatchLength;
@@ -345,57 +419,39 @@ static size_t ZSTD_ldm_generateSequences_internal(
345
419
  bestEntry = cur;
346
420
  }
347
421
  }
348
- }
349
-
350
- /* No match found -- continue searching */
351
- if (bestEntry == NULL) {
352
- ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash,
353
- hBits, current,
354
- *params);
355
- ip++;
356
- continue;
357
- }
358
422
 
359
- /* Match found */
360
- mLength = forwardMatchLength + backwardMatchLength;
361
- ip -= backwardMatchLength;
362
-
363
- {
364
- /* Store the sequence:
365
- * ip = current - backwardMatchLength
366
- * The match is at (bestEntry->offset - backwardMatchLength)
367
- */
368
- U32 const matchIndex = bestEntry->offset;
369
- U32 const offset = current - matchIndex;
370
- rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
371
-
372
- /* Out of sequence storage */
373
- if (rawSeqStore->size == rawSeqStore->capacity)
374
- return ERROR(dstSize_tooSmall);
375
- seq->litLength = (U32)(ip - anchor);
376
- seq->matchLength = (U32)mLength;
377
- seq->offset = offset;
378
- rawSeqStore->size++;
379
- }
423
+ /* No match found -- insert an entry into the hash table
424
+ * and process the next candidate match */
425
+ if (bestEntry == NULL) {
426
+ ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
427
+ continue;
428
+ }
380
429
 
381
- /* Insert the current entry into the hash table */
382
- ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
383
- (U32)(lastHashed - base),
384
- *params);
430
+ /* Match found */
431
+ mLength = forwardMatchLength + backwardMatchLength;
432
+ {
433
+ U32 const offset = (U32)(split - base) - bestEntry->offset;
434
+ rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
435
+
436
+ /* Out of sequence storage */
437
+ if (rawSeqStore->size == rawSeqStore->capacity)
438
+ return ERROR(dstSize_tooSmall);
439
+ seq->litLength = (U32)(split - backwardMatchLength - anchor);
440
+ seq->matchLength = (U32)mLength;
441
+ seq->offset = offset;
442
+ rawSeqStore->size++;
443
+ }
385
444
 
386
- assert(ip + backwardMatchLength == lastHashed);
445
+ /* Insert the current entry into the hash table --- it must be
446
+ * done after the previous block to avoid clobbering bestEntry */
447
+ ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
387
448
 
388
- /* Fill the hash table from lastHashed+1 to ip+mLength*/
389
- /* Heuristic: don't need to fill the entire table at end of block */
390
- if (ip + mLength <= ilimit) {
391
- rollingHash = ZSTD_ldm_fillLdmHashTable(
392
- ldmState, rollingHash, lastHashed,
393
- ip + mLength, base, hBits, *params);
394
- lastHashed = ip + mLength - 1;
449
+ anchor = split + forwardMatchLength;
395
450
  }
396
- ip += mLength;
397
- anchor = ip;
451
+
452
+ ip += hashed;
398
453
  }
454
+
399
455
  return iend - anchor;
400
456
  }
401
457
 
@@ -447,8 +503,10 @@ size_t ZSTD_ldm_generateSequences(
447
503
  if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
448
504
  U32 const ldmHSize = 1U << params->hashLog;
449
505
  U32 const correction = ZSTD_window_correctOverflow(
450
- &ldmState->window, /* cycleLog */ 0, maxDist, src);
506
+ &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
451
507
  ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
508
+ /* invalidate dictionaries on overflow correction */
509
+ ldmState->loadedDictEnd = 0;
452
510
  }
453
511
  /* 2. We enforce the maximum offset allowed.
454
512
  *
@@ -457,8 +515,14 @@ size_t ZSTD_ldm_generateSequences(
457
515
  * TODO: * Test the chunk size.
458
516
  * * Try invalidation after the sequence generation and test the
459
517
  * the offset against maxDist directly.
518
+ *
519
+ * NOTE: Because of dictionaries + sequence splitting we MUST make sure
520
+ * that any offset used is valid at the END of the sequence, since it may
521
+ * be split into two sequences. This condition holds when using
522
+ * ZSTD_window_enforceMaxDist(), but if we move to checking offsets
523
+ * against maxDist directly, we'll have to carefully handle that case.
460
524
  */
461
- ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL, NULL);
525
+ ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL);
462
526
  /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
463
527
  newLeftoverSize = ZSTD_ldm_generateSequences_internal(
464
528
  ldmState, sequences, params, chunkStart, chunkSize);
@@ -539,6 +603,23 @@ static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
539
603
  return sequence;
540
604
  }
541
605
 
606
+ void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
607
+ U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
608
+ while (currPos && rawSeqStore->pos < rawSeqStore->size) {
609
+ rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
610
+ if (currPos >= currSeq.litLength + currSeq.matchLength) {
611
+ currPos -= currSeq.litLength + currSeq.matchLength;
612
+ rawSeqStore->pos++;
613
+ } else {
614
+ rawSeqStore->posInSequence = currPos;
615
+ break;
616
+ }
617
+ }
618
+ if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
619
+ rawSeqStore->posInSequence = 0;
620
+ }
621
+ }
622
+
542
623
  size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
543
624
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
544
625
  void const* src, size_t srcSize)
@@ -554,9 +635,18 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
554
635
  BYTE const* ip = istart;
555
636
 
556
637
  DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize);
638
+ /* If using opt parser, use LDMs only as candidates rather than always accepting them */
639
+ if (cParams->strategy >= ZSTD_btopt) {
640
+ size_t lastLLSize;
641
+ ms->ldmSeqStore = rawSeqStore;
642
+ lastLLSize = blockCompressor(ms, seqStore, rep, src, srcSize);
643
+ ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore, srcSize);
644
+ return lastLLSize;
645
+ }
646
+
557
647
  assert(rawSeqStore->pos <= rawSeqStore->size);
558
648
  assert(rawSeqStore->size <= rawSeqStore->capacity);
559
- /* Loop through each sequence and apply the block compressor to the lits */
649
+ /* Loop through each sequence and apply the block compressor to the literals */
560
650
  while (rawSeqStore->pos < rawSeqStore->size && ip < iend) {
561
651
  /* maybeSplitSequence updates rawSeqStore->pos */
562
652
  rawSeq const sequence = maybeSplitSequence(rawSeqStore,
@@ -566,14 +656,13 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
566
656
  if (sequence.offset == 0)
567
657
  break;
568
658
 
569
- assert(sequence.offset <= (1U << cParams->windowLog));
570
659
  assert(ip + sequence.litLength + sequence.matchLength <= iend);
571
660
 
572
661
  /* Fill tables for block compressor */
573
662
  ZSTD_ldm_limitTableUpdate(ms, ip);
574
663
  ZSTD_ldm_fillFastTables(ms, ip);
575
664
  /* Run the block compressor */
576
- DEBUGLOG(5, "calling block compressor on segment of size %u", sequence.litLength);
665
+ DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength);
577
666
  {
578
667
  size_t const newLitLength =
579
668
  blockCompressor(ms, seqStore, rep, ip, sequence.litLength);
@@ -583,7 +672,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
583
672
  rep[i] = rep[i-1];
584
673
  rep[0] = sequence.offset;
585
674
  /* Store the sequence */
586
- ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength,
675
+ ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
587
676
  sequence.offset + ZSTD_REP_MOVE,
588
677
  sequence.matchLength - MINMATCH);
589
678
  ip += sequence.matchLength;