zstd-ruby 1.4.0.0 → 1.4.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +35 -0
  3. data/README.md +2 -2
  4. data/ext/zstdruby/libzstd/Makefile +274 -107
  5. data/ext/zstdruby/libzstd/README.md +75 -16
  6. data/ext/zstdruby/libzstd/common/bitstream.h +59 -51
  7. data/ext/zstdruby/libzstd/common/compiler.h +154 -5
  8. data/ext/zstdruby/libzstd/common/cpu.h +1 -3
  9. data/ext/zstdruby/libzstd/common/debug.c +11 -31
  10. data/ext/zstdruby/libzstd/common/debug.h +22 -49
  11. data/ext/zstdruby/libzstd/common/entropy_common.c +201 -75
  12. data/ext/zstdruby/libzstd/common/error_private.c +3 -1
  13. data/ext/zstdruby/libzstd/common/error_private.h +7 -3
  14. data/ext/zstdruby/libzstd/common/fse.h +50 -42
  15. data/ext/zstdruby/libzstd/common/fse_decompress.c +134 -50
  16. data/ext/zstdruby/libzstd/common/huf.h +41 -38
  17. data/ext/zstdruby/libzstd/common/mem.h +68 -22
  18. data/ext/zstdruby/libzstd/common/pool.c +30 -20
  19. data/ext/zstdruby/libzstd/common/pool.h +3 -3
  20. data/ext/zstdruby/libzstd/common/threading.c +51 -4
  21. data/ext/zstdruby/libzstd/common/threading.h +36 -4
  22. data/ext/zstdruby/libzstd/common/xxhash.c +39 -89
  23. data/ext/zstdruby/libzstd/common/xxhash.h +12 -32
  24. data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
  25. data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
  26. data/ext/zstdruby/libzstd/common/zstd_errors.h +3 -1
  27. data/ext/zstdruby/libzstd/common/zstd_internal.h +231 -72
  28. data/ext/zstdruby/libzstd/common/zstd_trace.c +42 -0
  29. data/ext/zstdruby/libzstd/common/zstd_trace.h +152 -0
  30. data/ext/zstdruby/libzstd/compress/fse_compress.c +47 -63
  31. data/ext/zstdruby/libzstd/compress/hist.c +41 -63
  32. data/ext/zstdruby/libzstd/compress/hist.h +13 -33
  33. data/ext/zstdruby/libzstd/compress/huf_compress.c +288 -172
  34. data/ext/zstdruby/libzstd/compress/zstd_compress.c +2504 -1626
  35. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +446 -85
  36. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +158 -0
  37. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +29 -0
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +433 -0
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +54 -0
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +849 -0
  41. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
  42. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +561 -0
  43. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +82 -60
  44. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
  45. data/ext/zstdruby/libzstd/compress/zstd_fast.c +106 -80
  46. data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
  47. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +411 -105
  48. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +21 -1
  49. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +296 -207
  50. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +14 -3
  51. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +103 -0
  52. data/ext/zstdruby/libzstd/compress/zstd_opt.c +260 -148
  53. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  54. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +153 -440
  55. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +29 -110
  56. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +356 -238
  57. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +20 -16
  58. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
  59. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +641 -238
  60. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +600 -371
  61. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +8 -5
  62. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +40 -9
  63. data/ext/zstdruby/libzstd/deprecated/zbuff.h +9 -8
  64. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
  65. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -1
  66. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
  67. data/ext/zstdruby/libzstd/dictBuilder/cover.c +197 -78
  68. data/ext/zstdruby/libzstd/dictBuilder/cover.h +52 -7
  69. data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
  70. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +84 -66
  71. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +58 -36
  72. data/ext/zstdruby/libzstd/dictBuilder/zdict.h +60 -31
  73. data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
  74. data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
  75. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +8 -4
  76. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +115 -111
  77. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
  78. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +28 -14
  79. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
  80. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +28 -14
  81. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
  82. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +36 -19
  83. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
  84. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +122 -107
  85. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +2 -2
  86. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +29 -23
  87. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
  88. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +34 -24
  89. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
  90. data/ext/zstdruby/libzstd/libzstd.pc.in +2 -1
  91. data/ext/zstdruby/libzstd/zstd.h +655 -118
  92. data/lib/zstd-ruby/version.rb +1 -1
  93. data/zstd-ruby.gemspec +1 -1
  94. metadata +20 -10
  95. data/.travis.yml +0 -14
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -17,8 +17,18 @@ extern "C" {
17
17
 
18
18
  #include "zstd_compress_internal.h"
19
19
 
20
+ /**
21
+ * Dedicated Dictionary Search Structure bucket log. In the
22
+ * ZSTD_dedicatedDictSearch mode, the hashTable has
23
+ * 2 ** ZSTD_LAZY_DDSS_BUCKET_LOG entries in each bucket, rather than just
24
+ * one.
25
+ */
26
+ #define ZSTD_LAZY_DDSS_BUCKET_LOG 2
27
+
20
28
  U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
21
29
 
30
+ void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip);
31
+
22
32
  void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */
23
33
 
24
34
  size_t ZSTD_compressBlock_btlazy2(
@@ -47,6 +57,16 @@ size_t ZSTD_compressBlock_greedy_dictMatchState(
47
57
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
48
58
  void const* src, size_t srcSize);
49
59
 
60
+ size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
61
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
62
+ void const* src, size_t srcSize);
63
+ size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
64
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
65
+ void const* src, size_t srcSize);
66
+ size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
67
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
68
+ void const* src, size_t srcSize);
69
+
50
70
  size_t ZSTD_compressBlock_greedy_extDict(
51
71
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
52
72
  void const* src, size_t srcSize);
@@ -1,22 +1,109 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
6
6
  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
7
  * in the COPYING file in the root directory of this source tree).
8
+ * You may select, at your option, one of the above-listed licenses.
8
9
  */
9
10
 
10
11
  #include "zstd_ldm.h"
11
12
 
12
- #include "debug.h"
13
+ #include "../common/debug.h"
14
+ #include "../common/xxhash.h"
13
15
  #include "zstd_fast.h" /* ZSTD_fillHashTable() */
14
16
  #include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */
17
+ #include "zstd_ldm_geartab.h"
15
18
 
16
19
  #define LDM_BUCKET_SIZE_LOG 3
17
20
  #define LDM_MIN_MATCH_LENGTH 64
18
21
  #define LDM_HASH_RLOG 7
19
- #define LDM_HASH_CHAR_OFFSET 10
22
+
23
+ typedef struct {
24
+ U64 rolling;
25
+ U64 stopMask;
26
+ } ldmRollingHashState_t;
27
+
28
+ /** ZSTD_ldm_gear_init():
29
+ *
30
+ * Initializes the rolling hash state such that it will honor the
31
+ * settings in params. */
32
+ static void ZSTD_ldm_gear_init(ldmRollingHashState_t* state, ldmParams_t const* params)
33
+ {
34
+ unsigned maxBitsInMask = MIN(params->minMatchLength, 64);
35
+ unsigned hashRateLog = params->hashRateLog;
36
+
37
+ state->rolling = ~(U32)0;
38
+
39
+ /* The choice of the splitting criterion is subject to two conditions:
40
+ * 1. it has to trigger on average every 2^(hashRateLog) bytes;
41
+ * 2. ideally, it has to depend on a window of minMatchLength bytes.
42
+ *
43
+ * In the gear hash algorithm, bit n depends on the last n bytes;
44
+ * so in order to obtain a good quality splitting criterion it is
45
+ * preferable to use bits with high weight.
46
+ *
47
+ * To match condition 1 we use a mask with hashRateLog bits set
48
+ * and, because of the previous remark, we make sure these bits
49
+ * have the highest possible weight while still respecting
50
+ * condition 2.
51
+ */
52
+ if (hashRateLog > 0 && hashRateLog <= maxBitsInMask) {
53
+ state->stopMask = (((U64)1 << hashRateLog) - 1) << (maxBitsInMask - hashRateLog);
54
+ } else {
55
+ /* In this degenerate case we simply honor the hash rate. */
56
+ state->stopMask = ((U64)1 << hashRateLog) - 1;
57
+ }
58
+ }
59
+
60
+ /** ZSTD_ldm_gear_feed():
61
+ *
62
+ * Registers in the splits array all the split points found in the first
63
+ * size bytes following the data pointer. This function terminates when
64
+ * either all the data has been processed or LDM_BATCH_SIZE splits are
65
+ * present in the splits array.
66
+ *
67
+ * Precondition: The splits array must not be full.
68
+ * Returns: The number of bytes processed. */
69
+ static size_t ZSTD_ldm_gear_feed(ldmRollingHashState_t* state,
70
+ BYTE const* data, size_t size,
71
+ size_t* splits, unsigned* numSplits)
72
+ {
73
+ size_t n;
74
+ U64 hash, mask;
75
+
76
+ hash = state->rolling;
77
+ mask = state->stopMask;
78
+ n = 0;
79
+
80
+ #define GEAR_ITER_ONCE() do { \
81
+ hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \
82
+ n += 1; \
83
+ if (UNLIKELY((hash & mask) == 0)) { \
84
+ splits[*numSplits] = n; \
85
+ *numSplits += 1; \
86
+ if (*numSplits == LDM_BATCH_SIZE) \
87
+ goto done; \
88
+ } \
89
+ } while (0)
90
+
91
+ while (n + 3 < size) {
92
+ GEAR_ITER_ONCE();
93
+ GEAR_ITER_ONCE();
94
+ GEAR_ITER_ONCE();
95
+ GEAR_ITER_ONCE();
96
+ }
97
+ while (n < size) {
98
+ GEAR_ITER_ONCE();
99
+ }
100
+
101
+ #undef GEAR_ITER_ONCE
102
+
103
+ done:
104
+ state->rolling = hash;
105
+ return n;
106
+ }
20
107
 
21
108
  void ZSTD_ldm_adjustParameters(ldmParams_t* params,
22
109
  ZSTD_compressionParameters const* cParams)
@@ -26,13 +113,6 @@ void ZSTD_ldm_adjustParameters(ldmParams_t* params,
26
113
  DEBUGLOG(4, "ZSTD_ldm_adjustParameters");
27
114
  if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
28
115
  if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH;
29
- if (cParams->strategy >= ZSTD_btopt) {
30
- /* Get out of the way of the optimal parser */
31
- U32 const minMatch = MAX(cParams->targetLength, params->minMatchLength);
32
- assert(minMatch >= ZSTD_LDM_MINMATCH_MIN);
33
- assert(minMatch <= ZSTD_LDM_MINMATCH_MAX);
34
- params->minMatchLength = minMatch;
35
- }
36
116
  if (params->hashLog == 0) {
37
117
  params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG);
38
118
  assert(params->hashLog <= ZSTD_HASHLOG_MAX);
@@ -49,9 +129,9 @@ size_t ZSTD_ldm_getTableSize(ldmParams_t params)
49
129
  {
50
130
  size_t const ldmHSize = ((size_t)1) << params.hashLog;
51
131
  size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog);
52
- size_t const ldmBucketSize =
53
- ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
54
- size_t const totalSize = ldmBucketSize + ldmHSize * sizeof(ldmEntry_t);
132
+ size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
133
+ size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize)
134
+ + ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t));
55
135
  return params.enableLdm ? totalSize : 0;
56
136
  }
57
137
 
@@ -60,41 +140,6 @@ size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize)
60
140
  return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0;
61
141
  }
62
142
 
63
- /** ZSTD_ldm_getSmallHash() :
64
- * numBits should be <= 32
65
- * If numBits==0, returns 0.
66
- * @return : the most significant numBits of value. */
67
- static U32 ZSTD_ldm_getSmallHash(U64 value, U32 numBits)
68
- {
69
- assert(numBits <= 32);
70
- return numBits == 0 ? 0 : (U32)(value >> (64 - numBits));
71
- }
72
-
73
- /** ZSTD_ldm_getChecksum() :
74
- * numBitsToDiscard should be <= 32
75
- * @return : the next most significant 32 bits after numBitsToDiscard */
76
- static U32 ZSTD_ldm_getChecksum(U64 hash, U32 numBitsToDiscard)
77
- {
78
- assert(numBitsToDiscard <= 32);
79
- return (hash >> (64 - 32 - numBitsToDiscard)) & 0xFFFFFFFF;
80
- }
81
-
82
- /** ZSTD_ldm_getTag() ;
83
- * Given the hash, returns the most significant numTagBits bits
84
- * after (32 + hbits) bits.
85
- *
86
- * If there are not enough bits remaining, return the last
87
- * numTagBits bits. */
88
- static U32 ZSTD_ldm_getTag(U64 hash, U32 hbits, U32 numTagBits)
89
- {
90
- assert(numTagBits < 32 && hbits <= 32);
91
- if (32 - hbits < numTagBits) {
92
- return hash & (((U32)1 << numTagBits) - 1);
93
- } else {
94
- return (hash >> (32 - hbits - numTagBits)) & (((U32)1 << numTagBits) - 1);
95
- }
96
- }
97
-
98
143
  /** ZSTD_ldm_getBucket() :
99
144
  * Returns a pointer to the start of the bucket associated with hash. */
100
145
  static ldmEntry_t* ZSTD_ldm_getBucket(
@@ -109,38 +154,12 @@ static void ZSTD_ldm_insertEntry(ldmState_t* ldmState,
109
154
  size_t const hash, const ldmEntry_t entry,
110
155
  ldmParams_t const ldmParams)
111
156
  {
112
- BYTE* const bucketOffsets = ldmState->bucketOffsets;
113
- *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + bucketOffsets[hash]) = entry;
114
- bucketOffsets[hash]++;
115
- bucketOffsets[hash] &= ((U32)1 << ldmParams.bucketSizeLog) - 1;
116
- }
157
+ BYTE* const pOffset = ldmState->bucketOffsets + hash;
158
+ unsigned const offset = *pOffset;
159
+
160
+ *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + offset) = entry;
161
+ *pOffset = (BYTE)((offset + 1) & ((1u << ldmParams.bucketSizeLog) - 1));
117
162
 
118
- /** ZSTD_ldm_makeEntryAndInsertByTag() :
119
- *
120
- * Gets the small hash, checksum, and tag from the rollingHash.
121
- *
122
- * If the tag matches (1 << ldmParams.hashRateLog)-1, then
123
- * creates an ldmEntry from the offset, and inserts it into the hash table.
124
- *
125
- * hBits is the length of the small hash, which is the most significant hBits
126
- * of rollingHash. The checksum is the next 32 most significant bits, followed
127
- * by ldmParams.hashRateLog bits that make up the tag. */
128
- static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
129
- U64 const rollingHash,
130
- U32 const hBits,
131
- U32 const offset,
132
- ldmParams_t const ldmParams)
133
- {
134
- U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashRateLog);
135
- U32 const tagMask = ((U32)1 << ldmParams.hashRateLog) - 1;
136
- if (tag == tagMask) {
137
- U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits);
138
- U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
139
- ldmEntry_t entry;
140
- entry.offset = offset;
141
- entry.checksum = checksum;
142
- ZSTD_ldm_insertEntry(ldmState, hash, entry, ldmParams);
143
- }
144
163
  }
145
164
 
146
165
  /** ZSTD_ldm_countBackwardsMatch() :
@@ -149,10 +168,10 @@ static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
149
168
  * We count only bytes where pMatch >= pBase and pIn >= pAnchor. */
150
169
  static size_t ZSTD_ldm_countBackwardsMatch(
151
170
  const BYTE* pIn, const BYTE* pAnchor,
152
- const BYTE* pMatch, const BYTE* pBase)
171
+ const BYTE* pMatch, const BYTE* pMatchBase)
153
172
  {
154
173
  size_t matchLength = 0;
155
- while (pIn > pAnchor && pMatch > pBase && pIn[-1] == pMatch[-1]) {
174
+ while (pIn > pAnchor && pMatch > pMatchBase && pIn[-1] == pMatch[-1]) {
156
175
  pIn--;
157
176
  pMatch--;
158
177
  matchLength++;
@@ -160,6 +179,27 @@ static size_t ZSTD_ldm_countBackwardsMatch(
160
179
  return matchLength;
161
180
  }
162
181
 
182
+ /** ZSTD_ldm_countBackwardsMatch_2segments() :
183
+ * Returns the number of bytes that match backwards from pMatch,
184
+ * even with the backwards match spanning 2 different segments.
185
+ *
186
+ * On reaching `pMatchBase`, start counting from mEnd */
187
+ static size_t ZSTD_ldm_countBackwardsMatch_2segments(
188
+ const BYTE* pIn, const BYTE* pAnchor,
189
+ const BYTE* pMatch, const BYTE* pMatchBase,
190
+ const BYTE* pExtDictStart, const BYTE* pExtDictEnd)
191
+ {
192
+ size_t matchLength = ZSTD_ldm_countBackwardsMatch(pIn, pAnchor, pMatch, pMatchBase);
193
+ if (pMatch - matchLength != pMatchBase || pMatchBase == pExtDictStart) {
194
+ /* If backwards match is entirely in the extDict or prefix, immediately return */
195
+ return matchLength;
196
+ }
197
+ DEBUGLOG(7, "ZSTD_ldm_countBackwardsMatch_2segments: found 2-parts backwards match (length in prefix==%zu)", matchLength);
198
+ matchLength += ZSTD_ldm_countBackwardsMatch(pIn - matchLength, pAnchor, pExtDictEnd, pExtDictStart);
199
+ DEBUGLOG(7, "final backwards match length = %zu", matchLength);
200
+ return matchLength;
201
+ }
202
+
163
203
  /** ZSTD_ldm_fillFastTables() :
164
204
  *
165
205
  * Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies.
@@ -197,30 +237,43 @@ static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
197
237
  return 0;
198
238
  }
199
239
 
200
- /** ZSTD_ldm_fillLdmHashTable() :
201
- *
202
- * Fills hashTable from (lastHashed + 1) to iend (non-inclusive).
203
- * lastHash is the rolling hash that corresponds to lastHashed.
204
- *
205
- * Returns the rolling hash corresponding to position iend-1. */
206
- static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
207
- U64 lastHash, const BYTE* lastHashed,
208
- const BYTE* iend, const BYTE* base,
209
- U32 hBits, ldmParams_t const ldmParams)
240
+ void ZSTD_ldm_fillHashTable(
241
+ ldmState_t* ldmState, const BYTE* ip,
242
+ const BYTE* iend, ldmParams_t const* params)
210
243
  {
211
- U64 rollingHash = lastHash;
212
- const BYTE* cur = lastHashed + 1;
213
-
214
- while (cur < iend) {
215
- rollingHash = ZSTD_rollingHash_rotate(rollingHash, cur[-1],
216
- cur[ldmParams.minMatchLength-1],
217
- state->hashPower);
218
- ZSTD_ldm_makeEntryAndInsertByTag(state,
219
- rollingHash, hBits,
220
- (U32)(cur - base), ldmParams);
221
- ++cur;
244
+ U32 const minMatchLength = params->minMatchLength;
245
+ U32 const hBits = params->hashLog - params->bucketSizeLog;
246
+ BYTE const* const base = ldmState->window.base;
247
+ BYTE const* const istart = ip;
248
+ ldmRollingHashState_t hashState;
249
+ size_t* const splits = ldmState->splitIndices;
250
+ unsigned numSplits;
251
+
252
+ DEBUGLOG(5, "ZSTD_ldm_fillHashTable");
253
+
254
+ ZSTD_ldm_gear_init(&hashState, params);
255
+ while (ip < iend) {
256
+ size_t hashed;
257
+ unsigned n;
258
+
259
+ numSplits = 0;
260
+ hashed = ZSTD_ldm_gear_feed(&hashState, ip, iend - ip, splits, &numSplits);
261
+
262
+ for (n = 0; n < numSplits; n++) {
263
+ if (ip + splits[n] >= istart + minMatchLength) {
264
+ BYTE const* const split = ip + splits[n] - minMatchLength;
265
+ U64 const xxhash = XXH64(split, minMatchLength, 0);
266
+ U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1));
267
+ ldmEntry_t entry;
268
+
269
+ entry.offset = (U32)(split - base);
270
+ entry.checksum = (U32)(xxhash >> 32);
271
+ ZSTD_ldm_insertEntry(ldmState, hash, entry, *params);
272
+ }
273
+ }
274
+
275
+ ip += hashed;
222
276
  }
223
- return rollingHash;
224
277
  }
225
278
 
226
279
 
@@ -231,10 +284,10 @@ static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
231
284
  * (after a long match, only update tables a limited amount). */
232
285
  static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor)
233
286
  {
234
- U32 const current = (U32)(anchor - ms->window.base);
235
- if (current > ms->nextToUpdate + 1024) {
287
+ U32 const curr = (U32)(anchor - ms->window.base);
288
+ if (curr > ms->nextToUpdate + 1024) {
236
289
  ms->nextToUpdate =
237
- current - MIN(512, current - ms->nextToUpdate - 1024);
290
+ curr - MIN(512, curr - ms->nextToUpdate - 1024);
238
291
  }
239
292
  }
240
293
 
@@ -245,11 +298,8 @@ static size_t ZSTD_ldm_generateSequences_internal(
245
298
  /* LDM parameters */
246
299
  int const extDict = ZSTD_window_hasExtDict(ldmState->window);
247
300
  U32 const minMatchLength = params->minMatchLength;
248
- U64 const hashPower = ldmState->hashPower;
301
+ U32 const entsPerBucket = 1U << params->bucketSizeLog;
249
302
  U32 const hBits = params->hashLog - params->bucketSizeLog;
250
- U32 const ldmBucketSize = 1U << params->bucketSizeLog;
251
- U32 const hashRateLog = params->hashRateLog;
252
- U32 const ldmTagMask = (1U << params->hashRateLog) - 1;
253
303
  /* Prefix and extDict parameters */
254
304
  U32 const dictLimit = ldmState->window.dictLimit;
255
305
  U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit;
@@ -261,45 +311,76 @@ static size_t ZSTD_ldm_generateSequences_internal(
261
311
  /* Input bounds */
262
312
  BYTE const* const istart = (BYTE const*)src;
263
313
  BYTE const* const iend = istart + srcSize;
264
- BYTE const* const ilimit = iend - MAX(minMatchLength, HASH_READ_SIZE);
314
+ BYTE const* const ilimit = iend - HASH_READ_SIZE;
265
315
  /* Input positions */
266
316
  BYTE const* anchor = istart;
267
317
  BYTE const* ip = istart;
268
- /* Rolling hash */
269
- BYTE const* lastHashed = NULL;
270
- U64 rollingHash = 0;
271
-
272
- while (ip <= ilimit) {
273
- size_t mLength;
274
- U32 const current = (U32)(ip - base);
275
- size_t forwardMatchLength = 0, backwardMatchLength = 0;
276
- ldmEntry_t* bestEntry = NULL;
277
- if (ip != istart) {
278
- rollingHash = ZSTD_rollingHash_rotate(rollingHash, lastHashed[0],
279
- lastHashed[minMatchLength],
280
- hashPower);
281
- } else {
282
- rollingHash = ZSTD_rollingHash_compute(ip, minMatchLength);
318
+ /* Rolling hash state */
319
+ ldmRollingHashState_t hashState;
320
+ /* Arrays for staged-processing */
321
+ size_t* const splits = ldmState->splitIndices;
322
+ ldmMatchCandidate_t* const candidates = ldmState->matchCandidates;
323
+ unsigned numSplits;
324
+
325
+ if (srcSize < minMatchLength)
326
+ return iend - anchor;
327
+
328
+ /* Initialize the rolling hash state with the first minMatchLength bytes */
329
+ ZSTD_ldm_gear_init(&hashState, params);
330
+ {
331
+ size_t n = 0;
332
+
333
+ while (n < minMatchLength) {
334
+ numSplits = 0;
335
+ n += ZSTD_ldm_gear_feed(&hashState, ip + n, minMatchLength - n,
336
+ splits, &numSplits);
283
337
  }
284
- lastHashed = ip;
338
+ ip += minMatchLength;
339
+ }
340
+
341
+ while (ip < ilimit) {
342
+ size_t hashed;
343
+ unsigned n;
344
+
345
+ numSplits = 0;
346
+ hashed = ZSTD_ldm_gear_feed(&hashState, ip, ilimit - ip,
347
+ splits, &numSplits);
348
+
349
+ for (n = 0; n < numSplits; n++) {
350
+ BYTE const* const split = ip + splits[n] - minMatchLength;
351
+ U64 const xxhash = XXH64(split, minMatchLength, 0);
352
+ U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1));
285
353
 
286
- /* Do not insert and do not look for a match */
287
- if (ZSTD_ldm_getTag(rollingHash, hBits, hashRateLog) != ldmTagMask) {
288
- ip++;
289
- continue;
354
+ candidates[n].split = split;
355
+ candidates[n].hash = hash;
356
+ candidates[n].checksum = (U32)(xxhash >> 32);
357
+ candidates[n].bucket = ZSTD_ldm_getBucket(ldmState, hash, *params);
358
+ PREFETCH_L1(candidates[n].bucket);
290
359
  }
291
360
 
292
- /* Get the best entry and compute the match lengths */
293
- {
294
- ldmEntry_t* const bucket =
295
- ZSTD_ldm_getBucket(ldmState,
296
- ZSTD_ldm_getSmallHash(rollingHash, hBits),
297
- *params);
298
- ldmEntry_t* cur;
299
- size_t bestMatchLength = 0;
300
- U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
301
-
302
- for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) {
361
+ for (n = 0; n < numSplits; n++) {
362
+ size_t forwardMatchLength = 0, backwardMatchLength = 0,
363
+ bestMatchLength = 0, mLength;
364
+ BYTE const* const split = candidates[n].split;
365
+ U32 const checksum = candidates[n].checksum;
366
+ U32 const hash = candidates[n].hash;
367
+ ldmEntry_t* const bucket = candidates[n].bucket;
368
+ ldmEntry_t const* cur;
369
+ ldmEntry_t const* bestEntry = NULL;
370
+ ldmEntry_t newEntry;
371
+
372
+ newEntry.offset = (U32)(split - base);
373
+ newEntry.checksum = checksum;
374
+
375
+ /* If a split point would generate a sequence overlapping with
376
+ * the previous one, we merely register it in the hash table and
377
+ * move on */
378
+ if (split < anchor) {
379
+ ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
380
+ continue;
381
+ }
382
+
383
+ for (cur = bucket; cur < bucket + entsPerBucket; cur++) {
303
384
  size_t curForwardMatchLength, curBackwardMatchLength,
304
385
  curTotalMatchLength;
305
386
  if (cur->checksum != checksum || cur->offset <= lowestIndex) {
@@ -313,30 +394,23 @@ static size_t ZSTD_ldm_generateSequences_internal(
313
394
  cur->offset < dictLimit ? dictEnd : iend;
314
395
  BYTE const* const lowMatchPtr =
315
396
  cur->offset < dictLimit ? dictStart : lowPrefixPtr;
316
-
317
- curForwardMatchLength = ZSTD_count_2segments(
318
- ip, pMatch, iend,
319
- matchEnd, lowPrefixPtr);
397
+ curForwardMatchLength =
398
+ ZSTD_count_2segments(split, pMatch, iend, matchEnd, lowPrefixPtr);
320
399
  if (curForwardMatchLength < minMatchLength) {
321
400
  continue;
322
401
  }
323
- curBackwardMatchLength =
324
- ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,
325
- lowMatchPtr);
326
- curTotalMatchLength = curForwardMatchLength +
327
- curBackwardMatchLength;
402
+ curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch_2segments(
403
+ split, anchor, pMatch, lowMatchPtr, dictStart, dictEnd);
328
404
  } else { /* !extDict */
329
405
  BYTE const* const pMatch = base + cur->offset;
330
- curForwardMatchLength = ZSTD_count(ip, pMatch, iend);
406
+ curForwardMatchLength = ZSTD_count(split, pMatch, iend);
331
407
  if (curForwardMatchLength < minMatchLength) {
332
408
  continue;
333
409
  }
334
410
  curBackwardMatchLength =
335
- ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,
336
- lowPrefixPtr);
337
- curTotalMatchLength = curForwardMatchLength +
338
- curBackwardMatchLength;
411
+ ZSTD_ldm_countBackwardsMatch(split, anchor, pMatch, lowPrefixPtr);
339
412
  }
413
+ curTotalMatchLength = curForwardMatchLength + curBackwardMatchLength;
340
414
 
341
415
  if (curTotalMatchLength > bestMatchLength) {
342
416
  bestMatchLength = curTotalMatchLength;
@@ -345,57 +419,39 @@ static size_t ZSTD_ldm_generateSequences_internal(
345
419
  bestEntry = cur;
346
420
  }
347
421
  }
348
- }
349
-
350
- /* No match found -- continue searching */
351
- if (bestEntry == NULL) {
352
- ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash,
353
- hBits, current,
354
- *params);
355
- ip++;
356
- continue;
357
- }
358
422
 
359
- /* Match found */
360
- mLength = forwardMatchLength + backwardMatchLength;
361
- ip -= backwardMatchLength;
362
-
363
- {
364
- /* Store the sequence:
365
- * ip = current - backwardMatchLength
366
- * The match is at (bestEntry->offset - backwardMatchLength)
367
- */
368
- U32 const matchIndex = bestEntry->offset;
369
- U32 const offset = current - matchIndex;
370
- rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
371
-
372
- /* Out of sequence storage */
373
- if (rawSeqStore->size == rawSeqStore->capacity)
374
- return ERROR(dstSize_tooSmall);
375
- seq->litLength = (U32)(ip - anchor);
376
- seq->matchLength = (U32)mLength;
377
- seq->offset = offset;
378
- rawSeqStore->size++;
379
- }
423
+ /* No match found -- insert an entry into the hash table
424
+ * and process the next candidate match */
425
+ if (bestEntry == NULL) {
426
+ ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
427
+ continue;
428
+ }
380
429
 
381
- /* Insert the current entry into the hash table */
382
- ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
383
- (U32)(lastHashed - base),
384
- *params);
430
+ /* Match found */
431
+ mLength = forwardMatchLength + backwardMatchLength;
432
+ {
433
+ U32 const offset = (U32)(split - base) - bestEntry->offset;
434
+ rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
435
+
436
+ /* Out of sequence storage */
437
+ if (rawSeqStore->size == rawSeqStore->capacity)
438
+ return ERROR(dstSize_tooSmall);
439
+ seq->litLength = (U32)(split - backwardMatchLength - anchor);
440
+ seq->matchLength = (U32)mLength;
441
+ seq->offset = offset;
442
+ rawSeqStore->size++;
443
+ }
385
444
 
386
- assert(ip + backwardMatchLength == lastHashed);
445
+ /* Insert the current entry into the hash table --- it must be
446
+ * done after the previous block to avoid clobbering bestEntry */
447
+ ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
387
448
 
388
- /* Fill the hash table from lastHashed+1 to ip+mLength*/
389
- /* Heuristic: don't need to fill the entire table at end of block */
390
- if (ip + mLength <= ilimit) {
391
- rollingHash = ZSTD_ldm_fillLdmHashTable(
392
- ldmState, rollingHash, lastHashed,
393
- ip + mLength, base, hBits, *params);
394
- lastHashed = ip + mLength - 1;
449
+ anchor = split + forwardMatchLength;
395
450
  }
396
- ip += mLength;
397
- anchor = ip;
451
+
452
+ ip += hashed;
398
453
  }
454
+
399
455
  return iend - anchor;
400
456
  }
401
457
 
@@ -447,8 +503,10 @@ size_t ZSTD_ldm_generateSequences(
447
503
  if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
448
504
  U32 const ldmHSize = 1U << params->hashLog;
449
505
  U32 const correction = ZSTD_window_correctOverflow(
450
- &ldmState->window, /* cycleLog */ 0, maxDist, src);
506
+ &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
451
507
  ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
508
+ /* invalidate dictionaries on overflow correction */
509
+ ldmState->loadedDictEnd = 0;
452
510
  }
453
511
  /* 2. We enforce the maximum offset allowed.
454
512
  *
@@ -457,8 +515,14 @@ size_t ZSTD_ldm_generateSequences(
457
515
  * TODO: * Test the chunk size.
458
516
  * * Try invalidation after the sequence generation and test the
459
517
  * the offset against maxDist directly.
518
+ *
519
+ * NOTE: Because of dictionaries + sequence splitting we MUST make sure
520
+ * that any offset used is valid at the END of the sequence, since it may
521
+ * be split into two sequences. This condition holds when using
522
+ * ZSTD_window_enforceMaxDist(), but if we move to checking offsets
523
+ * against maxDist directly, we'll have to carefully handle that case.
460
524
  */
461
- ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL, NULL);
525
+ ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL);
462
526
  /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
463
527
  newLeftoverSize = ZSTD_ldm_generateSequences_internal(
464
528
  ldmState, sequences, params, chunkStart, chunkSize);
@@ -539,6 +603,23 @@ static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
539
603
  return sequence;
540
604
  }
541
605
 
606
+ void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
607
+ U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
608
+ while (currPos && rawSeqStore->pos < rawSeqStore->size) {
609
+ rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
610
+ if (currPos >= currSeq.litLength + currSeq.matchLength) {
611
+ currPos -= currSeq.litLength + currSeq.matchLength;
612
+ rawSeqStore->pos++;
613
+ } else {
614
+ rawSeqStore->posInSequence = currPos;
615
+ break;
616
+ }
617
+ }
618
+ if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
619
+ rawSeqStore->posInSequence = 0;
620
+ }
621
+ }
622
+
542
623
  size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
543
624
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
544
625
  void const* src, size_t srcSize)
@@ -554,9 +635,18 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
554
635
  BYTE const* ip = istart;
555
636
 
556
637
  DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize);
638
+ /* If using opt parser, use LDMs only as candidates rather than always accepting them */
639
+ if (cParams->strategy >= ZSTD_btopt) {
640
+ size_t lastLLSize;
641
+ ms->ldmSeqStore = rawSeqStore;
642
+ lastLLSize = blockCompressor(ms, seqStore, rep, src, srcSize);
643
+ ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore, srcSize);
644
+ return lastLLSize;
645
+ }
646
+
557
647
  assert(rawSeqStore->pos <= rawSeqStore->size);
558
648
  assert(rawSeqStore->size <= rawSeqStore->capacity);
559
- /* Loop through each sequence and apply the block compressor to the lits */
649
+ /* Loop through each sequence and apply the block compressor to the literals */
560
650
  while (rawSeqStore->pos < rawSeqStore->size && ip < iend) {
561
651
  /* maybeSplitSequence updates rawSeqStore->pos */
562
652
  rawSeq const sequence = maybeSplitSequence(rawSeqStore,
@@ -566,14 +656,13 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
566
656
  if (sequence.offset == 0)
567
657
  break;
568
658
 
569
- assert(sequence.offset <= (1U << cParams->windowLog));
570
659
  assert(ip + sequence.litLength + sequence.matchLength <= iend);
571
660
 
572
661
  /* Fill tables for block compressor */
573
662
  ZSTD_ldm_limitTableUpdate(ms, ip);
574
663
  ZSTD_ldm_fillFastTables(ms, ip);
575
664
  /* Run the block compressor */
576
- DEBUGLOG(5, "calling block compressor on segment of size %u", sequence.litLength);
665
+ DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength);
577
666
  {
578
667
  size_t const newLitLength =
579
668
  blockCompressor(ms, seqStore, rep, ip, sequence.litLength);
@@ -583,7 +672,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
583
672
  rep[i] = rep[i-1];
584
673
  rep[0] = sequence.offset;
585
674
  /* Store the sequence */
586
- ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength,
675
+ ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
587
676
  sequence.offset + ZSTD_REP_MOVE,
588
677
  sequence.matchLength - MINMATCH);
589
678
  ip += sequence.matchLength;