extzstd 0.3.2 → 0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +4 -3
  3. data/contrib/zstd/CHANGELOG +225 -1
  4. data/contrib/zstd/CONTRIBUTING.md +158 -75
  5. data/contrib/zstd/LICENSE +4 -4
  6. data/contrib/zstd/Makefile +106 -69
  7. data/contrib/zstd/Package.swift +36 -0
  8. data/contrib/zstd/README.md +64 -36
  9. data/contrib/zstd/SECURITY.md +15 -0
  10. data/contrib/zstd/TESTING.md +2 -3
  11. data/contrib/zstd/lib/BUCK +5 -7
  12. data/contrib/zstd/lib/Makefile +117 -199
  13. data/contrib/zstd/lib/README.md +37 -7
  14. data/contrib/zstd/lib/common/allocations.h +55 -0
  15. data/contrib/zstd/lib/common/bits.h +200 -0
  16. data/contrib/zstd/lib/common/bitstream.h +80 -86
  17. data/contrib/zstd/lib/common/compiler.h +225 -63
  18. data/contrib/zstd/lib/common/cpu.h +37 -1
  19. data/contrib/zstd/lib/common/debug.c +7 -1
  20. data/contrib/zstd/lib/common/debug.h +21 -12
  21. data/contrib/zstd/lib/common/entropy_common.c +15 -37
  22. data/contrib/zstd/lib/common/error_private.c +9 -2
  23. data/contrib/zstd/lib/common/error_private.h +93 -5
  24. data/contrib/zstd/lib/common/fse.h +12 -87
  25. data/contrib/zstd/lib/common/fse_decompress.c +37 -117
  26. data/contrib/zstd/lib/common/huf.h +97 -172
  27. data/contrib/zstd/lib/common/mem.h +58 -58
  28. data/contrib/zstd/lib/common/pool.c +38 -17
  29. data/contrib/zstd/lib/common/pool.h +10 -4
  30. data/contrib/zstd/lib/common/portability_macros.h +158 -0
  31. data/contrib/zstd/lib/common/threading.c +74 -14
  32. data/contrib/zstd/lib/common/threading.h +5 -10
  33. data/contrib/zstd/lib/common/xxhash.c +6 -814
  34. data/contrib/zstd/lib/common/xxhash.h +6930 -195
  35. data/contrib/zstd/lib/common/zstd_common.c +1 -36
  36. data/contrib/zstd/lib/common/zstd_deps.h +1 -1
  37. data/contrib/zstd/lib/common/zstd_internal.h +68 -154
  38. data/contrib/zstd/lib/common/zstd_trace.h +163 -0
  39. data/contrib/zstd/lib/compress/clevels.h +134 -0
  40. data/contrib/zstd/lib/compress/fse_compress.c +75 -155
  41. data/contrib/zstd/lib/compress/hist.c +1 -1
  42. data/contrib/zstd/lib/compress/hist.h +1 -1
  43. data/contrib/zstd/lib/compress/huf_compress.c +810 -259
  44. data/contrib/zstd/lib/compress/zstd_compress.c +2864 -919
  45. data/contrib/zstd/lib/compress/zstd_compress_internal.h +523 -192
  46. data/contrib/zstd/lib/compress/zstd_compress_literals.c +117 -40
  47. data/contrib/zstd/lib/compress/zstd_compress_literals.h +16 -6
  48. data/contrib/zstd/lib/compress/zstd_compress_sequences.c +28 -19
  49. data/contrib/zstd/lib/compress/zstd_compress_sequences.h +1 -1
  50. data/contrib/zstd/lib/compress/zstd_compress_superblock.c +251 -412
  51. data/contrib/zstd/lib/compress/zstd_compress_superblock.h +1 -1
  52. data/contrib/zstd/lib/compress/zstd_cwksp.h +284 -97
  53. data/contrib/zstd/lib/compress/zstd_double_fast.c +382 -133
  54. data/contrib/zstd/lib/compress/zstd_double_fast.h +14 -2
  55. data/contrib/zstd/lib/compress/zstd_fast.c +732 -260
  56. data/contrib/zstd/lib/compress/zstd_fast.h +3 -2
  57. data/contrib/zstd/lib/compress/zstd_lazy.c +1177 -390
  58. data/contrib/zstd/lib/compress/zstd_lazy.h +129 -14
  59. data/contrib/zstd/lib/compress/zstd_ldm.c +280 -210
  60. data/contrib/zstd/lib/compress/zstd_ldm.h +3 -2
  61. data/contrib/zstd/lib/compress/zstd_ldm_geartab.h +106 -0
  62. data/contrib/zstd/lib/compress/zstd_opt.c +516 -285
  63. data/contrib/zstd/lib/compress/zstd_opt.h +32 -8
  64. data/contrib/zstd/lib/compress/zstdmt_compress.c +202 -131
  65. data/contrib/zstd/lib/compress/zstdmt_compress.h +9 -6
  66. data/contrib/zstd/lib/decompress/huf_decompress.c +1149 -555
  67. data/contrib/zstd/lib/decompress/huf_decompress_amd64.S +595 -0
  68. data/contrib/zstd/lib/decompress/zstd_ddict.c +4 -4
  69. data/contrib/zstd/lib/decompress/zstd_ddict.h +1 -1
  70. data/contrib/zstd/lib/decompress/zstd_decompress.c +583 -106
  71. data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1054 -379
  72. data/contrib/zstd/lib/decompress/zstd_decompress_block.h +14 -3
  73. data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +56 -6
  74. data/contrib/zstd/lib/deprecated/zbuff.h +1 -1
  75. data/contrib/zstd/lib/deprecated/zbuff_common.c +1 -1
  76. data/contrib/zstd/lib/deprecated/zbuff_compress.c +24 -4
  77. data/contrib/zstd/lib/deprecated/zbuff_decompress.c +3 -1
  78. data/contrib/zstd/lib/dictBuilder/cover.c +60 -44
  79. data/contrib/zstd/lib/dictBuilder/cover.h +6 -11
  80. data/contrib/zstd/lib/dictBuilder/divsufsort.c +1 -1
  81. data/contrib/zstd/lib/dictBuilder/fastcover.c +26 -18
  82. data/contrib/zstd/lib/dictBuilder/zdict.c +100 -101
  83. data/contrib/zstd/lib/legacy/zstd_legacy.h +38 -1
  84. data/contrib/zstd/lib/legacy/zstd_v01.c +18 -53
  85. data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
  86. data/contrib/zstd/lib/legacy/zstd_v02.c +28 -85
  87. data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
  88. data/contrib/zstd/lib/legacy/zstd_v03.c +29 -88
  89. data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
  90. data/contrib/zstd/lib/legacy/zstd_v04.c +27 -80
  91. data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
  92. data/contrib/zstd/lib/legacy/zstd_v05.c +36 -85
  93. data/contrib/zstd/lib/legacy/zstd_v05.h +1 -1
  94. data/contrib/zstd/lib/legacy/zstd_v06.c +44 -96
  95. data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
  96. data/contrib/zstd/lib/legacy/zstd_v07.c +37 -92
  97. data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
  98. data/contrib/zstd/lib/libzstd.mk +237 -0
  99. data/contrib/zstd/lib/libzstd.pc.in +4 -3
  100. data/contrib/zstd/lib/module.modulemap +35 -0
  101. data/contrib/zstd/lib/{dictBuilder/zdict.h → zdict.h} +202 -33
  102. data/contrib/zstd/lib/zstd.h +1030 -332
  103. data/contrib/zstd/lib/{common/zstd_errors.h → zstd_errors.h} +27 -8
  104. data/ext/extconf.rb +26 -7
  105. data/ext/extzstd.c +51 -24
  106. data/ext/extzstd.h +33 -6
  107. data/ext/extzstd_stream.c +74 -31
  108. data/ext/libzstd_conf.h +0 -1
  109. data/ext/zstd_decompress_asm.S +1 -0
  110. metadata +17 -7
  111. data/contrib/zstd/appveyor.yml +0 -292
  112. data/ext/depend +0 -2
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -11,13 +11,126 @@
11
11
  #include "zstd_ldm.h"
12
12
 
13
13
  #include "../common/debug.h"
14
+ #include "../common/xxhash.h"
14
15
  #include "zstd_fast.h" /* ZSTD_fillHashTable() */
15
16
  #include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */
17
+ #include "zstd_ldm_geartab.h"
16
18
 
17
19
  #define LDM_BUCKET_SIZE_LOG 3
18
20
  #define LDM_MIN_MATCH_LENGTH 64
19
21
  #define LDM_HASH_RLOG 7
20
- #define LDM_HASH_CHAR_OFFSET 10
22
+
23
+ typedef struct {
24
+ U64 rolling;
25
+ U64 stopMask;
26
+ } ldmRollingHashState_t;
27
+
28
+ /** ZSTD_ldm_gear_init():
29
+ *
30
+ * Initializes the rolling hash state such that it will honor the
31
+ * settings in params. */
32
+ static void ZSTD_ldm_gear_init(ldmRollingHashState_t* state, ldmParams_t const* params)
33
+ {
34
+ unsigned maxBitsInMask = MIN(params->minMatchLength, 64);
35
+ unsigned hashRateLog = params->hashRateLog;
36
+
37
+ state->rolling = ~(U32)0;
38
+
39
+ /* The choice of the splitting criterion is subject to two conditions:
40
+ * 1. it has to trigger on average every 2^(hashRateLog) bytes;
41
+ * 2. ideally, it has to depend on a window of minMatchLength bytes.
42
+ *
43
+ * In the gear hash algorithm, bit n depends on the last n bytes;
44
+ * so in order to obtain a good quality splitting criterion it is
45
+ * preferable to use bits with high weight.
46
+ *
47
+ * To match condition 1 we use a mask with hashRateLog bits set
48
+ * and, because of the previous remark, we make sure these bits
49
+ * have the highest possible weight while still respecting
50
+ * condition 2.
51
+ */
52
+ if (hashRateLog > 0 && hashRateLog <= maxBitsInMask) {
53
+ state->stopMask = (((U64)1 << hashRateLog) - 1) << (maxBitsInMask - hashRateLog);
54
+ } else {
55
+ /* In this degenerate case we simply honor the hash rate. */
56
+ state->stopMask = ((U64)1 << hashRateLog) - 1;
57
+ }
58
+ }
59
+
60
+ /** ZSTD_ldm_gear_reset()
61
+ * Feeds [data, data + minMatchLength) into the hash without registering any
62
+ * splits. This effectively resets the hash state. This is used when skipping
63
+ * over data, either at the beginning of a block, or skipping sections.
64
+ */
65
+ static void ZSTD_ldm_gear_reset(ldmRollingHashState_t* state,
66
+ BYTE const* data, size_t minMatchLength)
67
+ {
68
+ U64 hash = state->rolling;
69
+ size_t n = 0;
70
+
71
+ #define GEAR_ITER_ONCE() do { \
72
+ hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \
73
+ n += 1; \
74
+ } while (0)
75
+ while (n + 3 < minMatchLength) {
76
+ GEAR_ITER_ONCE();
77
+ GEAR_ITER_ONCE();
78
+ GEAR_ITER_ONCE();
79
+ GEAR_ITER_ONCE();
80
+ }
81
+ while (n < minMatchLength) {
82
+ GEAR_ITER_ONCE();
83
+ }
84
+ #undef GEAR_ITER_ONCE
85
+ }
86
+
87
+ /** ZSTD_ldm_gear_feed():
88
+ *
89
+ * Registers in the splits array all the split points found in the first
90
+ * size bytes following the data pointer. This function terminates when
91
+ * either all the data has been processed or LDM_BATCH_SIZE splits are
92
+ * present in the splits array.
93
+ *
94
+ * Precondition: The splits array must not be full.
95
+ * Returns: The number of bytes processed. */
96
+ static size_t ZSTD_ldm_gear_feed(ldmRollingHashState_t* state,
97
+ BYTE const* data, size_t size,
98
+ size_t* splits, unsigned* numSplits)
99
+ {
100
+ size_t n;
101
+ U64 hash, mask;
102
+
103
+ hash = state->rolling;
104
+ mask = state->stopMask;
105
+ n = 0;
106
+
107
+ #define GEAR_ITER_ONCE() do { \
108
+ hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \
109
+ n += 1; \
110
+ if (UNLIKELY((hash & mask) == 0)) { \
111
+ splits[*numSplits] = n; \
112
+ *numSplits += 1; \
113
+ if (*numSplits == LDM_BATCH_SIZE) \
114
+ goto done; \
115
+ } \
116
+ } while (0)
117
+
118
+ while (n + 3 < size) {
119
+ GEAR_ITER_ONCE();
120
+ GEAR_ITER_ONCE();
121
+ GEAR_ITER_ONCE();
122
+ GEAR_ITER_ONCE();
123
+ }
124
+ while (n < size) {
125
+ GEAR_ITER_ONCE();
126
+ }
127
+
128
+ #undef GEAR_ITER_ONCE
129
+
130
+ done:
131
+ state->rolling = hash;
132
+ return n;
133
+ }
21
134
 
22
135
  void ZSTD_ldm_adjustParameters(ldmParams_t* params,
23
136
  ZSTD_compressionParameters const* cParams)
@@ -46,47 +159,12 @@ size_t ZSTD_ldm_getTableSize(ldmParams_t params)
46
159
  size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
47
160
  size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize)
48
161
  + ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t));
49
- return params.enableLdm ? totalSize : 0;
162
+ return params.enableLdm == ZSTD_ps_enable ? totalSize : 0;
50
163
  }
51
164
 
52
165
  size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize)
53
166
  {
54
- return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0;
55
- }
56
-
57
- /** ZSTD_ldm_getSmallHash() :
58
- * numBits should be <= 32
59
- * If numBits==0, returns 0.
60
- * @return : the most significant numBits of value. */
61
- static U32 ZSTD_ldm_getSmallHash(U64 value, U32 numBits)
62
- {
63
- assert(numBits <= 32);
64
- return numBits == 0 ? 0 : (U32)(value >> (64 - numBits));
65
- }
66
-
67
- /** ZSTD_ldm_getChecksum() :
68
- * numBitsToDiscard should be <= 32
69
- * @return : the next most significant 32 bits after numBitsToDiscard */
70
- static U32 ZSTD_ldm_getChecksum(U64 hash, U32 numBitsToDiscard)
71
- {
72
- assert(numBitsToDiscard <= 32);
73
- return (hash >> (64 - 32 - numBitsToDiscard)) & 0xFFFFFFFF;
74
- }
75
-
76
- /** ZSTD_ldm_getTag() ;
77
- * Given the hash, returns the most significant numTagBits bits
78
- * after (32 + hbits) bits.
79
- *
80
- * If there are not enough bits remaining, return the last
81
- * numTagBits bits. */
82
- static U32 ZSTD_ldm_getTag(U64 hash, U32 hbits, U32 numTagBits)
83
- {
84
- assert(numTagBits < 32 && hbits <= 32);
85
- if (32 - hbits < numTagBits) {
86
- return hash & (((U32)1 << numTagBits) - 1);
87
- } else {
88
- return (hash >> (32 - hbits - numTagBits)) & (((U32)1 << numTagBits) - 1);
89
- }
167
+ return params.enableLdm == ZSTD_ps_enable ? (maxChunkSize / params.minMatchLength) : 0;
90
168
  }
91
169
 
92
170
  /** ZSTD_ldm_getBucket() :
@@ -103,38 +181,12 @@ static void ZSTD_ldm_insertEntry(ldmState_t* ldmState,
103
181
  size_t const hash, const ldmEntry_t entry,
104
182
  ldmParams_t const ldmParams)
105
183
  {
106
- BYTE* const bucketOffsets = ldmState->bucketOffsets;
107
- *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + bucketOffsets[hash]) = entry;
108
- bucketOffsets[hash]++;
109
- bucketOffsets[hash] &= ((U32)1 << ldmParams.bucketSizeLog) - 1;
110
- }
184
+ BYTE* const pOffset = ldmState->bucketOffsets + hash;
185
+ unsigned const offset = *pOffset;
186
+
187
+ *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + offset) = entry;
188
+ *pOffset = (BYTE)((offset + 1) & ((1u << ldmParams.bucketSizeLog) - 1));
111
189
 
112
- /** ZSTD_ldm_makeEntryAndInsertByTag() :
113
- *
114
- * Gets the small hash, checksum, and tag from the rollingHash.
115
- *
116
- * If the tag matches (1 << ldmParams.hashRateLog)-1, then
117
- * creates an ldmEntry from the offset, and inserts it into the hash table.
118
- *
119
- * hBits is the length of the small hash, which is the most significant hBits
120
- * of rollingHash. The checksum is the next 32 most significant bits, followed
121
- * by ldmParams.hashRateLog bits that make up the tag. */
122
- static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
123
- U64 const rollingHash,
124
- U32 const hBits,
125
- U32 const offset,
126
- ldmParams_t const ldmParams)
127
- {
128
- U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashRateLog);
129
- U32 const tagMask = ((U32)1 << ldmParams.hashRateLog) - 1;
130
- if (tag == tagMask) {
131
- U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits);
132
- U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
133
- ldmEntry_t entry;
134
- entry.offset = offset;
135
- entry.checksum = checksum;
136
- ZSTD_ldm_insertEntry(ldmState, hash, entry, ldmParams);
137
- }
138
190
  }
139
191
 
140
192
  /** ZSTD_ldm_countBackwardsMatch() :
@@ -190,11 +242,15 @@ static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
190
242
  switch(ms->cParams.strategy)
191
243
  {
192
244
  case ZSTD_fast:
193
- ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast);
245
+ ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx);
194
246
  break;
195
247
 
196
248
  case ZSTD_dfast:
197
- ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast);
249
+ #ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
250
+ ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx);
251
+ #else
252
+ assert(0); /* shouldn't be called: cparams should've been adjusted. */
253
+ #endif
198
254
  break;
199
255
 
200
256
  case ZSTD_greedy:
@@ -212,43 +268,42 @@ static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
212
268
  return 0;
213
269
  }
214
270
 
215
- /** ZSTD_ldm_fillLdmHashTable() :
216
- *
217
- * Fills hashTable from (lastHashed + 1) to iend (non-inclusive).
218
- * lastHash is the rolling hash that corresponds to lastHashed.
219
- *
220
- * Returns the rolling hash corresponding to position iend-1. */
221
- static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
222
- U64 lastHash, const BYTE* lastHashed,
223
- const BYTE* iend, const BYTE* base,
224
- U32 hBits, ldmParams_t const ldmParams)
225
- {
226
- U64 rollingHash = lastHash;
227
- const BYTE* cur = lastHashed + 1;
228
-
229
- while (cur < iend) {
230
- rollingHash = ZSTD_rollingHash_rotate(rollingHash, cur[-1],
231
- cur[ldmParams.minMatchLength-1],
232
- state->hashPower);
233
- ZSTD_ldm_makeEntryAndInsertByTag(state,
234
- rollingHash, hBits,
235
- (U32)(cur - base), ldmParams);
236
- ++cur;
237
- }
238
- return rollingHash;
239
- }
240
-
241
271
  void ZSTD_ldm_fillHashTable(
242
- ldmState_t* state, const BYTE* ip,
272
+ ldmState_t* ldmState, const BYTE* ip,
243
273
  const BYTE* iend, ldmParams_t const* params)
244
274
  {
275
+ U32 const minMatchLength = params->minMatchLength;
276
+ U32 const hBits = params->hashLog - params->bucketSizeLog;
277
+ BYTE const* const base = ldmState->window.base;
278
+ BYTE const* const istart = ip;
279
+ ldmRollingHashState_t hashState;
280
+ size_t* const splits = ldmState->splitIndices;
281
+ unsigned numSplits;
282
+
245
283
  DEBUGLOG(5, "ZSTD_ldm_fillHashTable");
246
- if ((size_t)(iend - ip) >= params->minMatchLength) {
247
- U64 startingHash = ZSTD_rollingHash_compute(ip, params->minMatchLength);
248
- ZSTD_ldm_fillLdmHashTable(
249
- state, startingHash, ip, iend - params->minMatchLength, state->window.base,
250
- params->hashLog - params->bucketSizeLog,
251
- *params);
284
+
285
+ ZSTD_ldm_gear_init(&hashState, params);
286
+ while (ip < iend) {
287
+ size_t hashed;
288
+ unsigned n;
289
+
290
+ numSplits = 0;
291
+ hashed = ZSTD_ldm_gear_feed(&hashState, ip, iend - ip, splits, &numSplits);
292
+
293
+ for (n = 0; n < numSplits; n++) {
294
+ if (ip + splits[n] >= istart + minMatchLength) {
295
+ BYTE const* const split = ip + splits[n] - minMatchLength;
296
+ U64 const xxhash = XXH64(split, minMatchLength, 0);
297
+ U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1));
298
+ ldmEntry_t entry;
299
+
300
+ entry.offset = (U32)(split - base);
301
+ entry.checksum = (U32)(xxhash >> 32);
302
+ ZSTD_ldm_insertEntry(ldmState, hash, entry, *params);
303
+ }
304
+ }
305
+
306
+ ip += hashed;
252
307
  }
253
308
  }
254
309
 
@@ -267,18 +322,17 @@ static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor)
267
322
  }
268
323
  }
269
324
 
270
- static size_t ZSTD_ldm_generateSequences_internal(
325
+ static
326
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
327
+ size_t ZSTD_ldm_generateSequences_internal(
271
328
  ldmState_t* ldmState, rawSeqStore_t* rawSeqStore,
272
329
  ldmParams_t const* params, void const* src, size_t srcSize)
273
330
  {
274
331
  /* LDM parameters */
275
332
  int const extDict = ZSTD_window_hasExtDict(ldmState->window);
276
333
  U32 const minMatchLength = params->minMatchLength;
277
- U64 const hashPower = ldmState->hashPower;
334
+ U32 const entsPerBucket = 1U << params->bucketSizeLog;
278
335
  U32 const hBits = params->hashLog - params->bucketSizeLog;
279
- U32 const ldmBucketSize = 1U << params->bucketSizeLog;
280
- U32 const hashRateLog = params->hashRateLog;
281
- U32 const ldmTagMask = (1U << params->hashRateLog) - 1;
282
336
  /* Prefix and extDict parameters */
283
337
  U32 const dictLimit = ldmState->window.dictLimit;
284
338
  U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit;
@@ -290,45 +344,69 @@ static size_t ZSTD_ldm_generateSequences_internal(
290
344
  /* Input bounds */
291
345
  BYTE const* const istart = (BYTE const*)src;
292
346
  BYTE const* const iend = istart + srcSize;
293
- BYTE const* const ilimit = iend - MAX(minMatchLength, HASH_READ_SIZE);
347
+ BYTE const* const ilimit = iend - HASH_READ_SIZE;
294
348
  /* Input positions */
295
349
  BYTE const* anchor = istart;
296
350
  BYTE const* ip = istart;
297
- /* Rolling hash */
298
- BYTE const* lastHashed = NULL;
299
- U64 rollingHash = 0;
300
-
301
- while (ip <= ilimit) {
302
- size_t mLength;
303
- U32 const curr = (U32)(ip - base);
304
- size_t forwardMatchLength = 0, backwardMatchLength = 0;
305
- ldmEntry_t* bestEntry = NULL;
306
- if (ip != istart) {
307
- rollingHash = ZSTD_rollingHash_rotate(rollingHash, lastHashed[0],
308
- lastHashed[minMatchLength],
309
- hashPower);
310
- } else {
311
- rollingHash = ZSTD_rollingHash_compute(ip, minMatchLength);
351
+ /* Rolling hash state */
352
+ ldmRollingHashState_t hashState;
353
+ /* Arrays for staged-processing */
354
+ size_t* const splits = ldmState->splitIndices;
355
+ ldmMatchCandidate_t* const candidates = ldmState->matchCandidates;
356
+ unsigned numSplits;
357
+
358
+ if (srcSize < minMatchLength)
359
+ return iend - anchor;
360
+
361
+ /* Initialize the rolling hash state with the first minMatchLength bytes */
362
+ ZSTD_ldm_gear_init(&hashState, params);
363
+ ZSTD_ldm_gear_reset(&hashState, ip, minMatchLength);
364
+ ip += minMatchLength;
365
+
366
+ while (ip < ilimit) {
367
+ size_t hashed;
368
+ unsigned n;
369
+
370
+ numSplits = 0;
371
+ hashed = ZSTD_ldm_gear_feed(&hashState, ip, ilimit - ip,
372
+ splits, &numSplits);
373
+
374
+ for (n = 0; n < numSplits; n++) {
375
+ BYTE const* const split = ip + splits[n] - minMatchLength;
376
+ U64 const xxhash = XXH64(split, minMatchLength, 0);
377
+ U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1));
378
+
379
+ candidates[n].split = split;
380
+ candidates[n].hash = hash;
381
+ candidates[n].checksum = (U32)(xxhash >> 32);
382
+ candidates[n].bucket = ZSTD_ldm_getBucket(ldmState, hash, *params);
383
+ PREFETCH_L1(candidates[n].bucket);
312
384
  }
313
- lastHashed = ip;
314
385
 
315
- /* Do not insert and do not look for a match */
316
- if (ZSTD_ldm_getTag(rollingHash, hBits, hashRateLog) != ldmTagMask) {
317
- ip++;
318
- continue;
319
- }
386
+ for (n = 0; n < numSplits; n++) {
387
+ size_t forwardMatchLength = 0, backwardMatchLength = 0,
388
+ bestMatchLength = 0, mLength;
389
+ U32 offset;
390
+ BYTE const* const split = candidates[n].split;
391
+ U32 const checksum = candidates[n].checksum;
392
+ U32 const hash = candidates[n].hash;
393
+ ldmEntry_t* const bucket = candidates[n].bucket;
394
+ ldmEntry_t const* cur;
395
+ ldmEntry_t const* bestEntry = NULL;
396
+ ldmEntry_t newEntry;
397
+
398
+ newEntry.offset = (U32)(split - base);
399
+ newEntry.checksum = checksum;
400
+
401
+ /* If a split point would generate a sequence overlapping with
402
+ * the previous one, we merely register it in the hash table and
403
+ * move on */
404
+ if (split < anchor) {
405
+ ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
406
+ continue;
407
+ }
320
408
 
321
- /* Get the best entry and compute the match lengths */
322
- {
323
- ldmEntry_t* const bucket =
324
- ZSTD_ldm_getBucket(ldmState,
325
- ZSTD_ldm_getSmallHash(rollingHash, hBits),
326
- *params);
327
- ldmEntry_t* cur;
328
- size_t bestMatchLength = 0;
329
- U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
330
-
331
- for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) {
409
+ for (cur = bucket; cur < bucket + entsPerBucket; cur++) {
332
410
  size_t curForwardMatchLength, curBackwardMatchLength,
333
411
  curTotalMatchLength;
334
412
  if (cur->checksum != checksum || cur->offset <= lowestIndex) {
@@ -342,31 +420,23 @@ static size_t ZSTD_ldm_generateSequences_internal(
342
420
  cur->offset < dictLimit ? dictEnd : iend;
343
421
  BYTE const* const lowMatchPtr =
344
422
  cur->offset < dictLimit ? dictStart : lowPrefixPtr;
345
-
346
- curForwardMatchLength = ZSTD_count_2segments(
347
- ip, pMatch, iend,
348
- matchEnd, lowPrefixPtr);
423
+ curForwardMatchLength =
424
+ ZSTD_count_2segments(split, pMatch, iend, matchEnd, lowPrefixPtr);
349
425
  if (curForwardMatchLength < minMatchLength) {
350
426
  continue;
351
427
  }
352
- curBackwardMatchLength =
353
- ZSTD_ldm_countBackwardsMatch_2segments(ip, anchor,
354
- pMatch, lowMatchPtr,
355
- dictStart, dictEnd);
356
- curTotalMatchLength = curForwardMatchLength +
357
- curBackwardMatchLength;
428
+ curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch_2segments(
429
+ split, anchor, pMatch, lowMatchPtr, dictStart, dictEnd);
358
430
  } else { /* !extDict */
359
431
  BYTE const* const pMatch = base + cur->offset;
360
- curForwardMatchLength = ZSTD_count(ip, pMatch, iend);
432
+ curForwardMatchLength = ZSTD_count(split, pMatch, iend);
361
433
  if (curForwardMatchLength < minMatchLength) {
362
434
  continue;
363
435
  }
364
436
  curBackwardMatchLength =
365
- ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,
366
- lowPrefixPtr);
367
- curTotalMatchLength = curForwardMatchLength +
368
- curBackwardMatchLength;
437
+ ZSTD_ldm_countBackwardsMatch(split, anchor, pMatch, lowPrefixPtr);
369
438
  }
439
+ curTotalMatchLength = curForwardMatchLength + curBackwardMatchLength;
370
440
 
371
441
  if (curTotalMatchLength > bestMatchLength) {
372
442
  bestMatchLength = curTotalMatchLength;
@@ -375,57 +445,54 @@ static size_t ZSTD_ldm_generateSequences_internal(
375
445
  bestEntry = cur;
376
446
  }
377
447
  }
378
- }
379
448
 
380
- /* No match found -- continue searching */
381
- if (bestEntry == NULL) {
382
- ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash,
383
- hBits, curr,
384
- *params);
385
- ip++;
386
- continue;
387
- }
388
-
389
- /* Match found */
390
- mLength = forwardMatchLength + backwardMatchLength;
391
- ip -= backwardMatchLength;
449
+ /* No match found -- insert an entry into the hash table
450
+ * and process the next candidate match */
451
+ if (bestEntry == NULL) {
452
+ ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
453
+ continue;
454
+ }
392
455
 
393
- {
394
- /* Store the sequence:
395
- * ip = curr - backwardMatchLength
396
- * The match is at (bestEntry->offset - backwardMatchLength)
397
- */
398
- U32 const matchIndex = bestEntry->offset;
399
- U32 const offset = curr - matchIndex;
400
- rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
401
-
402
- /* Out of sequence storage */
403
- if (rawSeqStore->size == rawSeqStore->capacity)
404
- return ERROR(dstSize_tooSmall);
405
- seq->litLength = (U32)(ip - anchor);
406
- seq->matchLength = (U32)mLength;
407
- seq->offset = offset;
408
- rawSeqStore->size++;
409
- }
456
+ /* Match found */
457
+ offset = (U32)(split - base) - bestEntry->offset;
458
+ mLength = forwardMatchLength + backwardMatchLength;
459
+ {
460
+ rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
461
+
462
+ /* Out of sequence storage */
463
+ if (rawSeqStore->size == rawSeqStore->capacity)
464
+ return ERROR(dstSize_tooSmall);
465
+ seq->litLength = (U32)(split - backwardMatchLength - anchor);
466
+ seq->matchLength = (U32)mLength;
467
+ seq->offset = offset;
468
+ rawSeqStore->size++;
469
+ }
410
470
 
411
- /* Insert the current entry into the hash table */
412
- ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
413
- (U32)(lastHashed - base),
414
- *params);
471
+ /* Insert the current entry into the hash table --- it must be
472
+ * done after the previous block to avoid clobbering bestEntry */
473
+ ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
415
474
 
416
- assert(ip + backwardMatchLength == lastHashed);
475
+ anchor = split + forwardMatchLength;
417
476
 
418
- /* Fill the hash table from lastHashed+1 to ip+mLength*/
419
- /* Heuristic: don't need to fill the entire table at end of block */
420
- if (ip + mLength <= ilimit) {
421
- rollingHash = ZSTD_ldm_fillLdmHashTable(
422
- ldmState, rollingHash, lastHashed,
423
- ip + mLength, base, hBits, *params);
424
- lastHashed = ip + mLength - 1;
477
+ /* If we find a match that ends after the data that we've hashed
478
+ * then we have a repeating, overlapping, pattern. E.g. all zeros.
479
+ * If one repetition of the pattern matches our `stopMask` then all
480
+ * repetitions will. We don't need to insert them all into out table,
481
+ * only the first one. So skip over overlapping matches.
482
+ * This is a major speed boost (20x) for compressing a single byte
483
+ * repeated, when that byte ends up in the table.
484
+ */
485
+ if (anchor > ip + hashed) {
486
+ ZSTD_ldm_gear_reset(&hashState, anchor - minMatchLength, minMatchLength);
487
+ /* Continue the outer loop at anchor (ip + hashed == anchor). */
488
+ ip = anchor - hashed;
489
+ break;
490
+ }
425
491
  }
426
- ip += mLength;
427
- anchor = ip;
492
+
493
+ ip += hashed;
428
494
  }
495
+
429
496
  return iend - anchor;
430
497
  }
431
498
 
@@ -474,7 +541,7 @@ size_t ZSTD_ldm_generateSequences(
474
541
 
475
542
  assert(chunkStart < iend);
476
543
  /* 1. Perform overflow correction if necessary. */
477
- if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
544
+ if (ZSTD_window_needOverflowCorrection(ldmState->window, 0, maxDist, ldmState->loadedDictEnd, chunkStart, chunkEnd)) {
478
545
  U32 const ldmHSize = 1U << params->hashLog;
479
546
  U32 const correction = ZSTD_window_correctOverflow(
480
547
  &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
@@ -488,7 +555,7 @@ size_t ZSTD_ldm_generateSequences(
488
555
  * the window through early invalidation.
489
556
  * TODO: * Test the chunk size.
490
557
  * * Try invalidation after the sequence generation and test the
491
- * the offset against maxDist directly.
558
+ * offset against maxDist directly.
492
559
  *
493
560
  * NOTE: Because of dictionaries + sequence splitting we MUST make sure
494
561
  * that any offset used is valid at the END of the sequence, since it may
@@ -518,7 +585,9 @@ size_t ZSTD_ldm_generateSequences(
518
585
  return 0;
519
586
  }
520
587
 
521
- void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) {
588
+ void
589
+ ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch)
590
+ {
522
591
  while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) {
523
592
  rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos;
524
593
  if (srcSize <= seq->litLength) {
@@ -596,12 +665,13 @@ void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
596
665
 
597
666
  size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
598
667
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
668
+ ZSTD_paramSwitch_e useRowMatchFinder,
599
669
  void const* src, size_t srcSize)
600
670
  {
601
671
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
602
672
  unsigned const minMatch = cParams->minMatch;
603
673
  ZSTD_blockCompressor const blockCompressor =
604
- ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms));
674
+ ZSTD_selectBlockCompressor(cParams->strategy, useRowMatchFinder, ZSTD_matchState_dictMode(ms));
605
675
  /* Input bounds */
606
676
  BYTE const* const istart = (BYTE const*)src;
607
677
  BYTE const* const iend = istart + srcSize;
@@ -620,12 +690,11 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
620
690
 
621
691
  assert(rawSeqStore->pos <= rawSeqStore->size);
622
692
  assert(rawSeqStore->size <= rawSeqStore->capacity);
623
- /* Loop through each sequence and apply the block compressor to the lits */
693
+ /* Loop through each sequence and apply the block compressor to the literals */
624
694
  while (rawSeqStore->pos < rawSeqStore->size && ip < iend) {
625
695
  /* maybeSplitSequence updates rawSeqStore->pos */
626
696
  rawSeq const sequence = maybeSplitSequence(rawSeqStore,
627
697
  (U32)(iend - ip), minMatch);
628
- int i;
629
698
  /* End signal */
630
699
  if (sequence.offset == 0)
631
700
  break;
@@ -638,6 +707,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
638
707
  /* Run the block compressor */
639
708
  DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength);
640
709
  {
710
+ int i;
641
711
  size_t const newLitLength =
642
712
  blockCompressor(ms, seqStore, rep, ip, sequence.litLength);
643
713
  ip += sequence.litLength;
@@ -647,8 +717,8 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
647
717
  rep[0] = sequence.offset;
648
718
  /* Store the sequence */
649
719
  ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
650
- sequence.offset + ZSTD_REP_MOVE,
651
- sequence.matchLength - MINMATCH);
720
+ OFFSET_TO_OFFBASE(sequence.offset),
721
+ sequence.matchLength);
652
722
  ip += sequence.matchLength;
653
723
  }
654
724
  }