extzstd 0.2 → 0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. checksums.yaml +4 -4
  2. data/HISTORY.ja.md +13 -0
  3. data/README.md +17 -14
  4. data/contrib/zstd/{NEWS → CHANGELOG} +115 -2
  5. data/contrib/zstd/CODE_OF_CONDUCT.md +5 -0
  6. data/contrib/zstd/Makefile +99 -53
  7. data/contrib/zstd/README.md +59 -39
  8. data/contrib/zstd/TESTING.md +1 -1
  9. data/contrib/zstd/appveyor.yml +17 -6
  10. data/contrib/zstd/lib/BUCK +29 -2
  11. data/contrib/zstd/lib/Makefile +118 -21
  12. data/contrib/zstd/lib/README.md +84 -44
  13. data/contrib/zstd/lib/common/bitstream.h +17 -33
  14. data/contrib/zstd/lib/common/compiler.h +62 -8
  15. data/contrib/zstd/lib/common/cpu.h +215 -0
  16. data/contrib/zstd/lib/common/debug.c +44 -0
  17. data/contrib/zstd/lib/common/debug.h +134 -0
  18. data/contrib/zstd/lib/common/entropy_common.c +16 -1
  19. data/contrib/zstd/lib/common/error_private.c +7 -0
  20. data/contrib/zstd/lib/common/fse.h +48 -44
  21. data/contrib/zstd/lib/common/fse_decompress.c +3 -3
  22. data/contrib/zstd/lib/common/huf.h +169 -113
  23. data/contrib/zstd/lib/common/mem.h +20 -2
  24. data/contrib/zstd/lib/common/pool.c +135 -49
  25. data/contrib/zstd/lib/common/pool.h +40 -21
  26. data/contrib/zstd/lib/common/threading.c +2 -2
  27. data/contrib/zstd/lib/common/threading.h +12 -12
  28. data/contrib/zstd/lib/common/xxhash.c +3 -2
  29. data/contrib/zstd/lib/common/zstd_common.c +3 -6
  30. data/contrib/zstd/lib/common/zstd_errors.h +17 -7
  31. data/contrib/zstd/lib/common/zstd_internal.h +76 -48
  32. data/contrib/zstd/lib/compress/fse_compress.c +89 -209
  33. data/contrib/zstd/lib/compress/hist.c +203 -0
  34. data/contrib/zstd/lib/compress/hist.h +95 -0
  35. data/contrib/zstd/lib/compress/huf_compress.c +188 -80
  36. data/contrib/zstd/lib/compress/zstd_compress.c +2500 -1203
  37. data/contrib/zstd/lib/compress/zstd_compress_internal.h +463 -62
  38. data/contrib/zstd/lib/compress/zstd_double_fast.c +321 -131
  39. data/contrib/zstd/lib/compress/zstd_double_fast.h +13 -4
  40. data/contrib/zstd/lib/compress/zstd_fast.c +335 -108
  41. data/contrib/zstd/lib/compress/zstd_fast.h +12 -6
  42. data/contrib/zstd/lib/compress/zstd_lazy.c +654 -313
  43. data/contrib/zstd/lib/compress/zstd_lazy.h +44 -16
  44. data/contrib/zstd/lib/compress/zstd_ldm.c +310 -420
  45. data/contrib/zstd/lib/compress/zstd_ldm.h +63 -26
  46. data/contrib/zstd/lib/compress/zstd_opt.c +773 -325
  47. data/contrib/zstd/lib/compress/zstd_opt.h +31 -5
  48. data/contrib/zstd/lib/compress/zstdmt_compress.c +1468 -518
  49. data/contrib/zstd/lib/compress/zstdmt_compress.h +96 -45
  50. data/contrib/zstd/lib/decompress/huf_decompress.c +518 -282
  51. data/contrib/zstd/lib/decompress/zstd_ddict.c +240 -0
  52. data/contrib/zstd/lib/decompress/zstd_ddict.h +44 -0
  53. data/contrib/zstd/lib/decompress/zstd_decompress.c +613 -1513
  54. data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1311 -0
  55. data/contrib/zstd/lib/decompress/zstd_decompress_block.h +59 -0
  56. data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +175 -0
  57. data/contrib/zstd/lib/dictBuilder/cover.c +194 -113
  58. data/contrib/zstd/lib/dictBuilder/cover.h +112 -0
  59. data/contrib/zstd/lib/dictBuilder/divsufsort.c +3 -3
  60. data/contrib/zstd/lib/dictBuilder/fastcover.c +740 -0
  61. data/contrib/zstd/lib/dictBuilder/zdict.c +142 -106
  62. data/contrib/zstd/lib/dictBuilder/zdict.h +115 -49
  63. data/contrib/zstd/lib/legacy/zstd_legacy.h +44 -12
  64. data/contrib/zstd/lib/legacy/zstd_v01.c +41 -10
  65. data/contrib/zstd/lib/legacy/zstd_v01.h +12 -7
  66. data/contrib/zstd/lib/legacy/zstd_v02.c +37 -12
  67. data/contrib/zstd/lib/legacy/zstd_v02.h +12 -7
  68. data/contrib/zstd/lib/legacy/zstd_v03.c +38 -12
  69. data/contrib/zstd/lib/legacy/zstd_v03.h +12 -7
  70. data/contrib/zstd/lib/legacy/zstd_v04.c +55 -174
  71. data/contrib/zstd/lib/legacy/zstd_v04.h +12 -7
  72. data/contrib/zstd/lib/legacy/zstd_v05.c +59 -31
  73. data/contrib/zstd/lib/legacy/zstd_v05.h +12 -7
  74. data/contrib/zstd/lib/legacy/zstd_v06.c +48 -20
  75. data/contrib/zstd/lib/legacy/zstd_v06.h +10 -5
  76. data/contrib/zstd/lib/legacy/zstd_v07.c +62 -29
  77. data/contrib/zstd/lib/legacy/zstd_v07.h +10 -5
  78. data/contrib/zstd/lib/zstd.h +1346 -832
  79. data/ext/extzstd.c +27 -19
  80. data/ext/extzstd_stream.c +20 -4
  81. data/ext/zstd_compress.c +1 -0
  82. data/ext/zstd_decompress.c +4 -0
  83. data/ext/zstd_dictbuilder.c +4 -0
  84. data/ext/zstd_dictbuilder_fastcover.c +5 -0
  85. data/lib/extzstd.rb +52 -220
  86. data/lib/extzstd/version.rb +1 -1
  87. metadata +21 -7
  88. data/contrib/zstd/circle.yml +0 -63
@@ -15,22 +15,50 @@
15
15
  extern "C" {
16
16
  #endif
17
17
 
18
- #include "mem.h" /* U32 */
19
- #include "zstd.h" /* ZSTD_CCtx, size_t */
20
-
21
- U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls);
22
- void ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls);
23
- void ZSTD_updateTree_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls);
24
-
25
- size_t ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
26
- size_t ZSTD_compressBlock_lazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
27
- size_t ZSTD_compressBlock_lazy(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
28
- size_t ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
29
-
30
- size_t ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
31
- size_t ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
32
- size_t ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
33
- size_t ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
18
+ #include "zstd_compress_internal.h"
19
+
20
+ U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
21
+
22
+ void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */
23
+
24
+ size_t ZSTD_compressBlock_btlazy2(
25
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
26
+ void const* src, size_t srcSize);
27
+ size_t ZSTD_compressBlock_lazy2(
28
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
29
+ void const* src, size_t srcSize);
30
+ size_t ZSTD_compressBlock_lazy(
31
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
32
+ void const* src, size_t srcSize);
33
+ size_t ZSTD_compressBlock_greedy(
34
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
35
+ void const* src, size_t srcSize);
36
+
37
+ size_t ZSTD_compressBlock_btlazy2_dictMatchState(
38
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
39
+ void const* src, size_t srcSize);
40
+ size_t ZSTD_compressBlock_lazy2_dictMatchState(
41
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
42
+ void const* src, size_t srcSize);
43
+ size_t ZSTD_compressBlock_lazy_dictMatchState(
44
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
45
+ void const* src, size_t srcSize);
46
+ size_t ZSTD_compressBlock_greedy_dictMatchState(
47
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
48
+ void const* src, size_t srcSize);
49
+
50
+ size_t ZSTD_compressBlock_greedy_extDict(
51
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
52
+ void const* src, size_t srcSize);
53
+ size_t ZSTD_compressBlock_lazy_extDict(
54
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
55
+ void const* src, size_t srcSize);
56
+ size_t ZSTD_compressBlock_lazy2_extDict(
57
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
58
+ void const* src, size_t srcSize);
59
+ size_t ZSTD_compressBlock_btlazy2_extDict(
60
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
61
+ void const* src, size_t srcSize);
34
62
 
35
63
  #if defined (__cplusplus)
36
64
  }
@@ -9,6 +9,7 @@
9
9
 
10
10
  #include "zstd_ldm.h"
11
11
 
12
+ #include "debug.h"
12
13
  #include "zstd_fast.h" /* ZSTD_fillHashTable() */
13
14
  #include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */
14
15
 
@@ -17,36 +18,46 @@
17
18
  #define LDM_HASH_RLOG 7
18
19
  #define LDM_HASH_CHAR_OFFSET 10
19
20
 
20
- size_t ZSTD_ldm_initializeParameters(ldmParams_t* params, U32 enableLdm)
21
+ void ZSTD_ldm_adjustParameters(ldmParams_t* params,
22
+ ZSTD_compressionParameters const* cParams)
21
23
  {
24
+ params->windowLog = cParams->windowLog;
22
25
  ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX);
23
- params->enableLdm = enableLdm>0;
24
- params->hashLog = 0;
25
- params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
26
- params->minMatchLength = LDM_MIN_MATCH_LENGTH;
27
- params->hashEveryLog = ZSTD_LDM_HASHEVERYLOG_NOTSET;
28
- return 0;
29
- }
30
-
31
- void ZSTD_ldm_adjustParameters(ldmParams_t* params, U32 windowLog)
32
- {
26
+ DEBUGLOG(4, "ZSTD_ldm_adjustParameters");
27
+ if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
28
+ if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH;
29
+ if (cParams->strategy >= ZSTD_btopt) {
30
+ /* Get out of the way of the optimal parser */
31
+ U32 const minMatch = MAX(cParams->targetLength, params->minMatchLength);
32
+ assert(minMatch >= ZSTD_LDM_MINMATCH_MIN);
33
+ assert(minMatch <= ZSTD_LDM_MINMATCH_MAX);
34
+ params->minMatchLength = minMatch;
35
+ }
33
36
  if (params->hashLog == 0) {
34
- params->hashLog = MAX(ZSTD_HASHLOG_MIN, windowLog - LDM_HASH_RLOG);
37
+ params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG);
35
38
  assert(params->hashLog <= ZSTD_HASHLOG_MAX);
36
39
  }
37
- if (params->hashEveryLog == ZSTD_LDM_HASHEVERYLOG_NOTSET) {
38
- params->hashEveryLog =
39
- windowLog < params->hashLog ? 0 : windowLog - params->hashLog;
40
+ if (params->hashRateLog == 0) {
41
+ params->hashRateLog = params->windowLog < params->hashLog
42
+ ? 0
43
+ : params->windowLog - params->hashLog;
40
44
  }
41
45
  params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog);
42
46
  }
43
47
 
44
- size_t ZSTD_ldm_getTableSize(U32 hashLog, U32 bucketSizeLog) {
45
- size_t const ldmHSize = ((size_t)1) << hashLog;
46
- size_t const ldmBucketSizeLog = MIN(bucketSizeLog, hashLog);
48
+ size_t ZSTD_ldm_getTableSize(ldmParams_t params)
49
+ {
50
+ size_t const ldmHSize = ((size_t)1) << params.hashLog;
51
+ size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog);
47
52
  size_t const ldmBucketSize =
48
- ((size_t)1) << (hashLog - ldmBucketSizeLog);
49
- return ldmBucketSize + (ldmHSize * (sizeof(ldmEntry_t)));
53
+ ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
54
+ size_t const totalSize = ldmBucketSize + ldmHSize * sizeof(ldmEntry_t);
55
+ return params.enableLdm ? totalSize : 0;
56
+ }
57
+
58
+ size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize)
59
+ {
60
+ return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0;
50
61
  }
51
62
 
52
63
  /** ZSTD_ldm_getSmallHash() :
@@ -108,20 +119,20 @@ static void ZSTD_ldm_insertEntry(ldmState_t* ldmState,
108
119
  *
109
120
  * Gets the small hash, checksum, and tag from the rollingHash.
110
121
  *
111
- * If the tag matches (1 << ldmParams.hashEveryLog)-1, then
122
+ * If the tag matches (1 << ldmParams.hashRateLog)-1, then
112
123
  * creates an ldmEntry from the offset, and inserts it into the hash table.
113
124
  *
114
125
  * hBits is the length of the small hash, which is the most significant hBits
115
126
  * of rollingHash. The checksum is the next 32 most significant bits, followed
116
- * by ldmParams.hashEveryLog bits that make up the tag. */
127
+ * by ldmParams.hashRateLog bits that make up the tag. */
117
128
  static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
118
129
  U64 const rollingHash,
119
130
  U32 const hBits,
120
131
  U32 const offset,
121
132
  ldmParams_t const ldmParams)
122
133
  {
123
- U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog);
124
- U32 const tagMask = ((U32)1 << ldmParams.hashEveryLog) - 1;
134
+ U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashRateLog);
135
+ U32 const tagMask = ((U32)1 << ldmParams.hashRateLog) - 1;
125
136
  if (tag == tagMask) {
126
137
  U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits);
127
138
  U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
@@ -132,55 +143,6 @@ static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
132
143
  }
133
144
  }
134
145
 
135
- /** ZSTD_ldm_getRollingHash() :
136
- * Get a 64-bit hash using the first len bytes from buf.
137
- *
138
- * Giving bytes s = s_1, s_2, ... s_k, the hash is defined to be
139
- * H(s) = s_1*(a^(k-1)) + s_2*(a^(k-2)) + ... + s_k*(a^0)
140
- *
141
- * where the constant a is defined to be prime8bytes.
142
- *
143
- * The implementation adds an offset to each byte, so
144
- * H(s) = (s_1 + HASH_CHAR_OFFSET)*(a^(k-1)) + ... */
145
- static U64 ZSTD_ldm_getRollingHash(const BYTE* buf, U32 len)
146
- {
147
- U64 ret = 0;
148
- U32 i;
149
- for (i = 0; i < len; i++) {
150
- ret *= prime8bytes;
151
- ret += buf[i] + LDM_HASH_CHAR_OFFSET;
152
- }
153
- return ret;
154
- }
155
-
156
- /** ZSTD_ldm_ipow() :
157
- * Return base^exp. */
158
- static U64 ZSTD_ldm_ipow(U64 base, U64 exp)
159
- {
160
- U64 ret = 1;
161
- while (exp) {
162
- if (exp & 1) { ret *= base; }
163
- exp >>= 1;
164
- base *= base;
165
- }
166
- return ret;
167
- }
168
-
169
- U64 ZSTD_ldm_getHashPower(U32 minMatchLength) {
170
- assert(minMatchLength >= ZSTD_LDM_MINMATCH_MIN);
171
- return ZSTD_ldm_ipow(prime8bytes, minMatchLength - 1);
172
- }
173
-
174
- /** ZSTD_ldm_updateHash() :
175
- * Updates hash by removing toRemove and adding toAdd. */
176
- static U64 ZSTD_ldm_updateHash(U64 hash, BYTE toRemove, BYTE toAdd, U64 hashPower)
177
- {
178
- hash -= ((toRemove + LDM_HASH_CHAR_OFFSET) * hashPower);
179
- hash *= prime8bytes;
180
- hash += toAdd + LDM_HASH_CHAR_OFFSET;
181
- return hash;
182
- }
183
-
184
146
  /** ZSTD_ldm_countBackwardsMatch() :
185
147
  * Returns the number of bytes that match backwards before pIn and pMatch.
186
148
  *
@@ -205,21 +167,19 @@ static size_t ZSTD_ldm_countBackwardsMatch(
205
167
  *
206
168
  * The tables for the other strategies are filled within their
207
169
  * block compressors. */
208
- static size_t ZSTD_ldm_fillFastTables(ZSTD_CCtx* zc, const void* end)
170
+ static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
171
+ void const* end)
209
172
  {
210
173
  const BYTE* const iend = (const BYTE*)end;
211
- const U32 mls = zc->appliedParams.cParams.searchLength;
212
174
 
213
- switch(zc->appliedParams.cParams.strategy)
175
+ switch(ms->cParams.strategy)
214
176
  {
215
177
  case ZSTD_fast:
216
- ZSTD_fillHashTable(zc, iend, mls);
217
- zc->nextToUpdate = (U32)(iend - zc->base);
178
+ ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast);
218
179
  break;
219
180
 
220
181
  case ZSTD_dfast:
221
- ZSTD_fillDoubleHashTable(zc, iend, mls);
222
- zc->nextToUpdate = (U32)(iend - zc->base);
182
+ ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast);
223
183
  break;
224
184
 
225
185
  case ZSTD_greedy:
@@ -228,6 +188,7 @@ static size_t ZSTD_ldm_fillFastTables(ZSTD_CCtx* zc, const void* end)
228
188
  case ZSTD_btlazy2:
229
189
  case ZSTD_btopt:
230
190
  case ZSTD_btultra:
191
+ case ZSTD_btultra2:
231
192
  break;
232
193
  default:
233
194
  assert(0); /* not possible : not a valid strategy id */
@@ -251,9 +212,9 @@ static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
251
212
  const BYTE* cur = lastHashed + 1;
252
213
 
253
214
  while (cur < iend) {
254
- rollingHash = ZSTD_ldm_updateHash(rollingHash, cur[-1],
255
- cur[ldmParams.minMatchLength-1],
256
- state->hashPower);
215
+ rollingHash = ZSTD_rollingHash_rotate(rollingHash, cur[-1],
216
+ cur[ldmParams.minMatchLength-1],
217
+ state->hashPower);
257
218
  ZSTD_ldm_makeEntryAndInsertByTag(state,
258
219
  rollingHash, hBits,
259
220
  (U32)(cur - base), ldmParams);
@@ -268,69 +229,62 @@ static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
268
229
  * Sets cctx->nextToUpdate to a position corresponding closer to anchor
269
230
  * if it is far way
270
231
  * (after a long match, only update tables a limited amount). */
271
- static void ZSTD_ldm_limitTableUpdate(ZSTD_CCtx* cctx, const BYTE* anchor)
232
+ static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor)
272
233
  {
273
- U32 const current = (U32)(anchor - cctx->base);
274
- if (current > cctx->nextToUpdate + 1024) {
275
- cctx->nextToUpdate =
276
- current - MIN(512, current - cctx->nextToUpdate - 1024);
234
+ U32 const current = (U32)(anchor - ms->window.base);
235
+ if (current > ms->nextToUpdate + 1024) {
236
+ ms->nextToUpdate =
237
+ current - MIN(512, current - ms->nextToUpdate - 1024);
277
238
  }
278
239
  }
279
240
 
280
- typedef size_t (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t srcSize);
281
- /* defined in zstd_compress.c */
282
- ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict);
283
-
284
- FORCE_INLINE_TEMPLATE
285
- size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx,
286
- const void* src, size_t srcSize)
241
+ static size_t ZSTD_ldm_generateSequences_internal(
242
+ ldmState_t* ldmState, rawSeqStore_t* rawSeqStore,
243
+ ldmParams_t const* params, void const* src, size_t srcSize)
287
244
  {
288
- ldmState_t* const ldmState = &(cctx->ldmState);
289
- const ldmParams_t ldmParams = cctx->appliedParams.ldmParams;
290
- const U64 hashPower = ldmState->hashPower;
291
- const U32 hBits = ldmParams.hashLog - ldmParams.bucketSizeLog;
292
- const U32 ldmBucketSize = ((U32)1 << ldmParams.bucketSizeLog);
293
- const U32 ldmTagMask = ((U32)1 << ldmParams.hashEveryLog) - 1;
294
- seqStore_t* const seqStorePtr = &(cctx->seqStore);
295
- const BYTE* const base = cctx->base;
296
- const BYTE* const istart = (const BYTE*)src;
297
- const BYTE* ip = istart;
298
- const BYTE* anchor = istart;
299
- const U32 lowestIndex = cctx->dictLimit;
300
- const BYTE* const lowest = base + lowestIndex;
301
- const BYTE* const iend = istart + srcSize;
302
- const BYTE* const ilimit = iend - MAX(ldmParams.minMatchLength, HASH_READ_SIZE);
303
-
304
- const ZSTD_blockCompressor blockCompressor =
305
- ZSTD_selectBlockCompressor(cctx->appliedParams.cParams.strategy, 0);
306
- U32* const repToConfirm = seqStorePtr->repToConfirm;
307
- U32 savedRep[ZSTD_REP_NUM];
245
+ /* LDM parameters */
246
+ int const extDict = ZSTD_window_hasExtDict(ldmState->window);
247
+ U32 const minMatchLength = params->minMatchLength;
248
+ U64 const hashPower = ldmState->hashPower;
249
+ U32 const hBits = params->hashLog - params->bucketSizeLog;
250
+ U32 const ldmBucketSize = 1U << params->bucketSizeLog;
251
+ U32 const hashRateLog = params->hashRateLog;
252
+ U32 const ldmTagMask = (1U << params->hashRateLog) - 1;
253
+ /* Prefix and extDict parameters */
254
+ U32 const dictLimit = ldmState->window.dictLimit;
255
+ U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit;
256
+ BYTE const* const base = ldmState->window.base;
257
+ BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL;
258
+ BYTE const* const dictStart = extDict ? dictBase + lowestIndex : NULL;
259
+ BYTE const* const dictEnd = extDict ? dictBase + dictLimit : NULL;
260
+ BYTE const* const lowPrefixPtr = base + dictLimit;
261
+ /* Input bounds */
262
+ BYTE const* const istart = (BYTE const*)src;
263
+ BYTE const* const iend = istart + srcSize;
264
+ BYTE const* const ilimit = iend - MAX(minMatchLength, HASH_READ_SIZE);
265
+ /* Input positions */
266
+ BYTE const* anchor = istart;
267
+ BYTE const* ip = istart;
268
+ /* Rolling hash */
269
+ BYTE const* lastHashed = NULL;
308
270
  U64 rollingHash = 0;
309
- const BYTE* lastHashed = NULL;
310
- size_t i, lastLiterals;
311
-
312
- /* Save seqStorePtr->rep and copy repToConfirm */
313
- for (i = 0; i < ZSTD_REP_NUM; i++)
314
- savedRep[i] = repToConfirm[i] = seqStorePtr->rep[i];
315
271
 
316
- /* Main Search Loop */
317
- while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
272
+ while (ip <= ilimit) {
318
273
  size_t mLength;
319
274
  U32 const current = (U32)(ip - base);
320
275
  size_t forwardMatchLength = 0, backwardMatchLength = 0;
321
276
  ldmEntry_t* bestEntry = NULL;
322
277
  if (ip != istart) {
323
- rollingHash = ZSTD_ldm_updateHash(rollingHash, lastHashed[0],
324
- lastHashed[ldmParams.minMatchLength],
325
- hashPower);
278
+ rollingHash = ZSTD_rollingHash_rotate(rollingHash, lastHashed[0],
279
+ lastHashed[minMatchLength],
280
+ hashPower);
326
281
  } else {
327
- rollingHash = ZSTD_ldm_getRollingHash(ip, ldmParams.minMatchLength);
282
+ rollingHash = ZSTD_rollingHash_compute(ip, minMatchLength);
328
283
  }
329
284
  lastHashed = ip;
330
285
 
331
286
  /* Do not insert and do not look for a match */
332
- if (ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog) !=
333
- ldmTagMask) {
287
+ if (ZSTD_ldm_getTag(rollingHash, hBits, hashRateLog) != ldmTagMask) {
334
288
  ip++;
335
289
  continue;
336
290
  }
@@ -340,27 +294,49 @@ size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx,
340
294
  ldmEntry_t* const bucket =
341
295
  ZSTD_ldm_getBucket(ldmState,
342
296
  ZSTD_ldm_getSmallHash(rollingHash, hBits),
343
- ldmParams);
297
+ *params);
344
298
  ldmEntry_t* cur;
345
299
  size_t bestMatchLength = 0;
346
300
  U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
347
301
 
348
302
  for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) {
349
- const BYTE* const pMatch = cur->offset + base;
350
303
  size_t curForwardMatchLength, curBackwardMatchLength,
351
304
  curTotalMatchLength;
352
305
  if (cur->checksum != checksum || cur->offset <= lowestIndex) {
353
306
  continue;
354
307
  }
355
-
356
- curForwardMatchLength = ZSTD_count(ip, pMatch, iend);
357
- if (curForwardMatchLength < ldmParams.minMatchLength) {
358
- continue;
308
+ if (extDict) {
309
+ BYTE const* const curMatchBase =
310
+ cur->offset < dictLimit ? dictBase : base;
311
+ BYTE const* const pMatch = curMatchBase + cur->offset;
312
+ BYTE const* const matchEnd =
313
+ cur->offset < dictLimit ? dictEnd : iend;
314
+ BYTE const* const lowMatchPtr =
315
+ cur->offset < dictLimit ? dictStart : lowPrefixPtr;
316
+
317
+ curForwardMatchLength = ZSTD_count_2segments(
318
+ ip, pMatch, iend,
319
+ matchEnd, lowPrefixPtr);
320
+ if (curForwardMatchLength < minMatchLength) {
321
+ continue;
322
+ }
323
+ curBackwardMatchLength =
324
+ ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,
325
+ lowMatchPtr);
326
+ curTotalMatchLength = curForwardMatchLength +
327
+ curBackwardMatchLength;
328
+ } else { /* !extDict */
329
+ BYTE const* const pMatch = base + cur->offset;
330
+ curForwardMatchLength = ZSTD_count(ip, pMatch, iend);
331
+ if (curForwardMatchLength < minMatchLength) {
332
+ continue;
333
+ }
334
+ curBackwardMatchLength =
335
+ ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,
336
+ lowPrefixPtr);
337
+ curTotalMatchLength = curForwardMatchLength +
338
+ curBackwardMatchLength;
359
339
  }
360
- curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch(
361
- ip, anchor, pMatch, lowest);
362
- curTotalMatchLength = curForwardMatchLength +
363
- curBackwardMatchLength;
364
340
 
365
341
  if (curTotalMatchLength > bestMatchLength) {
366
342
  bestMatchLength = curTotalMatchLength;
@@ -375,7 +351,7 @@ size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx,
375
351
  if (bestEntry == NULL) {
376
352
  ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash,
377
353
  hBits, current,
378
- ldmParams);
354
+ *params);
379
355
  ip++;
380
356
  continue;
381
357
  }
@@ -384,324 +360,238 @@ size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx,
384
360
  mLength = forwardMatchLength + backwardMatchLength;
385
361
  ip -= backwardMatchLength;
386
362
 
387
- /* Call the block compressor on the remaining literals */
388
363
  {
364
+ /* Store the sequence:
365
+ * ip = current - backwardMatchLength
366
+ * The match is at (bestEntry->offset - backwardMatchLength)
367
+ */
389
368
  U32 const matchIndex = bestEntry->offset;
390
- const BYTE* const match = base + matchIndex - backwardMatchLength;
391
- U32 const offset = (U32)(ip - match);
392
-
393
- /* Overwrite rep codes */
394
- for (i = 0; i < ZSTD_REP_NUM; i++)
395
- seqStorePtr->rep[i] = repToConfirm[i];
396
-
397
- /* Fill tables for block compressor */
398
- ZSTD_ldm_limitTableUpdate(cctx, anchor);
399
- ZSTD_ldm_fillFastTables(cctx, anchor);
400
-
401
- /* Call block compressor and get remaining literals */
402
- lastLiterals = blockCompressor(cctx, anchor, ip - anchor);
403
- cctx->nextToUpdate = (U32)(ip - base);
404
-
405
- /* Update repToConfirm with the new offset */
406
- for (i = ZSTD_REP_NUM - 1; i > 0; i--)
407
- repToConfirm[i] = repToConfirm[i-1];
408
- repToConfirm[0] = offset;
409
-
410
- /* Store the sequence with the leftover literals */
411
- ZSTD_storeSeq(seqStorePtr, lastLiterals, ip - lastLiterals,
412
- offset + ZSTD_REP_MOVE, mLength - MINMATCH);
369
+ U32 const offset = current - matchIndex;
370
+ rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
371
+
372
+ /* Out of sequence storage */
373
+ if (rawSeqStore->size == rawSeqStore->capacity)
374
+ return ERROR(dstSize_tooSmall);
375
+ seq->litLength = (U32)(ip - anchor);
376
+ seq->matchLength = (U32)mLength;
377
+ seq->offset = offset;
378
+ rawSeqStore->size++;
413
379
  }
414
380
 
415
381
  /* Insert the current entry into the hash table */
416
382
  ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
417
383
  (U32)(lastHashed - base),
418
- ldmParams);
384
+ *params);
419
385
 
420
386
  assert(ip + backwardMatchLength == lastHashed);
421
387
 
422
388
  /* Fill the hash table from lastHashed+1 to ip+mLength*/
423
389
  /* Heuristic: don't need to fill the entire table at end of block */
424
- if (ip + mLength < ilimit) {
390
+ if (ip + mLength <= ilimit) {
425
391
  rollingHash = ZSTD_ldm_fillLdmHashTable(
426
392
  ldmState, rollingHash, lastHashed,
427
- ip + mLength, base, hBits, ldmParams);
393
+ ip + mLength, base, hBits, *params);
428
394
  lastHashed = ip + mLength - 1;
429
395
  }
430
396
  ip += mLength;
431
397
  anchor = ip;
432
- /* Check immediate repcode */
433
- while ( (ip < ilimit)
434
- && ( (repToConfirm[1] > 0) && (repToConfirm[1] <= (U32)(ip-lowest))
435
- && (MEM_read32(ip) == MEM_read32(ip - repToConfirm[1])) )) {
436
-
437
- size_t const rLength = ZSTD_count(ip+4, ip+4-repToConfirm[1],
438
- iend) + 4;
439
- /* Swap repToConfirm[1] <=> repToConfirm[0] */
440
- {
441
- U32 const tmpOff = repToConfirm[1];
442
- repToConfirm[1] = repToConfirm[0];
443
- repToConfirm[0] = tmpOff;
444
- }
445
-
446
- ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH);
447
-
448
- /* Fill the hash table from lastHashed+1 to ip+rLength*/
449
- if (ip + rLength < ilimit) {
450
- rollingHash = ZSTD_ldm_fillLdmHashTable(
451
- ldmState, rollingHash, lastHashed,
452
- ip + rLength, base, hBits, ldmParams);
453
- lastHashed = ip + rLength - 1;
454
- }
455
- ip += rLength;
456
- anchor = ip;
457
- }
458
398
  }
459
-
460
- /* Overwrite rep */
461
- for (i = 0; i < ZSTD_REP_NUM; i++)
462
- seqStorePtr->rep[i] = repToConfirm[i];
463
-
464
- ZSTD_ldm_limitTableUpdate(cctx, anchor);
465
- ZSTD_ldm_fillFastTables(cctx, anchor);
466
-
467
- lastLiterals = blockCompressor(cctx, anchor, iend - anchor);
468
- cctx->nextToUpdate = (U32)(iend - base);
469
-
470
- /* Restore seqStorePtr->rep */
471
- for (i = 0; i < ZSTD_REP_NUM; i++)
472
- seqStorePtr->rep[i] = savedRep[i];
473
-
474
- /* Return the last literals size */
475
- return lastLiterals;
399
+ return iend - anchor;
476
400
  }
477
401
 
478
- size_t ZSTD_compressBlock_ldm(ZSTD_CCtx* ctx,
479
- const void* src, size_t srcSize)
402
+ /*! ZSTD_ldm_reduceTable() :
403
+ * reduce table indexes by `reducerValue` */
404
+ static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size,
405
+ U32 const reducerValue)
480
406
  {
481
- return ZSTD_compressBlock_ldm_generic(ctx, src, srcSize);
407
+ U32 u;
408
+ for (u = 0; u < size; u++) {
409
+ if (table[u].offset < reducerValue) table[u].offset = 0;
410
+ else table[u].offset -= reducerValue;
411
+ }
482
412
  }
483
413
 
484
- static size_t ZSTD_compressBlock_ldm_extDict_generic(
485
- ZSTD_CCtx* ctx,
486
- const void* src, size_t srcSize)
414
+ size_t ZSTD_ldm_generateSequences(
415
+ ldmState_t* ldmState, rawSeqStore_t* sequences,
416
+ ldmParams_t const* params, void const* src, size_t srcSize)
487
417
  {
488
- ldmState_t* const ldmState = &(ctx->ldmState);
489
- const ldmParams_t ldmParams = ctx->appliedParams.ldmParams;
490
- const U64 hashPower = ldmState->hashPower;
491
- const U32 hBits = ldmParams.hashLog - ldmParams.bucketSizeLog;
492
- const U32 ldmBucketSize = ((U32)1 << ldmParams.bucketSizeLog);
493
- const U32 ldmTagMask = ((U32)1 << ldmParams.hashEveryLog) - 1;
494
- seqStore_t* const seqStorePtr = &(ctx->seqStore);
495
- const BYTE* const base = ctx->base;
496
- const BYTE* const dictBase = ctx->dictBase;
497
- const BYTE* const istart = (const BYTE*)src;
498
- const BYTE* ip = istart;
499
- const BYTE* anchor = istart;
500
- const U32 lowestIndex = ctx->lowLimit;
501
- const BYTE* const dictStart = dictBase + lowestIndex;
502
- const U32 dictLimit = ctx->dictLimit;
503
- const BYTE* const lowPrefixPtr = base + dictLimit;
504
- const BYTE* const dictEnd = dictBase + dictLimit;
505
- const BYTE* const iend = istart + srcSize;
506
- const BYTE* const ilimit = iend - MAX(ldmParams.minMatchLength, HASH_READ_SIZE);
507
-
508
- const ZSTD_blockCompressor blockCompressor =
509
- ZSTD_selectBlockCompressor(ctx->appliedParams.cParams.strategy, 1);
510
- U32* const repToConfirm = seqStorePtr->repToConfirm;
511
- U32 savedRep[ZSTD_REP_NUM];
512
- U64 rollingHash = 0;
513
- const BYTE* lastHashed = NULL;
514
- size_t i, lastLiterals;
515
-
516
- /* Save seqStorePtr->rep and copy repToConfirm */
517
- for (i = 0; i < ZSTD_REP_NUM; i++) {
518
- savedRep[i] = repToConfirm[i] = seqStorePtr->rep[i];
519
- }
520
-
521
- /* Search Loop */
522
- while (ip < ilimit) { /* < instead of <=, because (ip+1) */
523
- size_t mLength;
524
- const U32 current = (U32)(ip-base);
525
- size_t forwardMatchLength = 0, backwardMatchLength = 0;
526
- ldmEntry_t* bestEntry = NULL;
527
- if (ip != istart) {
528
- rollingHash = ZSTD_ldm_updateHash(rollingHash, lastHashed[0],
529
- lastHashed[ldmParams.minMatchLength],
530
- hashPower);
418
+ U32 const maxDist = 1U << params->windowLog;
419
+ BYTE const* const istart = (BYTE const*)src;
420
+ BYTE const* const iend = istart + srcSize;
421
+ size_t const kMaxChunkSize = 1 << 20;
422
+ size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0);
423
+ size_t chunk;
424
+ size_t leftoverSize = 0;
425
+
426
+ assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize);
427
+ /* Check that ZSTD_window_update() has been called for this chunk prior
428
+ * to passing it to this function.
429
+ */
430
+ assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize);
431
+ /* The input could be very large (in zstdmt), so it must be broken up into
432
+ * chunks to enforce the maximum distance and handle overflow correction.
433
+ */
434
+ assert(sequences->pos <= sequences->size);
435
+ assert(sequences->size <= sequences->capacity);
436
+ for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) {
437
+ BYTE const* const chunkStart = istart + chunk * kMaxChunkSize;
438
+ size_t const remaining = (size_t)(iend - chunkStart);
439
+ BYTE const *const chunkEnd =
440
+ (remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize;
441
+ size_t const chunkSize = chunkEnd - chunkStart;
442
+ size_t newLeftoverSize;
443
+ size_t const prevSize = sequences->size;
444
+
445
+ assert(chunkStart < iend);
446
+ /* 1. Perform overflow correction if necessary. */
447
+ if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
448
+ U32 const ldmHSize = 1U << params->hashLog;
449
+ U32 const correction = ZSTD_window_correctOverflow(
450
+ &ldmState->window, /* cycleLog */ 0, maxDist, src);
451
+ ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
452
+ }
453
+ /* 2. We enforce the maximum offset allowed.
454
+ *
455
+ * kMaxChunkSize should be small enough that we don't lose too much of
456
+ * the window through early invalidation.
457
+ * TODO: * Test the chunk size.
458
+ * * Try invalidation after the sequence generation and test the
459
+ * the offset against maxDist directly.
460
+ */
461
+ ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL, NULL);
462
+ /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
463
+ newLeftoverSize = ZSTD_ldm_generateSequences_internal(
464
+ ldmState, sequences, params, chunkStart, chunkSize);
465
+ if (ZSTD_isError(newLeftoverSize))
466
+ return newLeftoverSize;
467
+ /* 4. We add the leftover literals from previous iterations to the first
468
+ * newly generated sequence, or add the `newLeftoverSize` if none are
469
+ * generated.
470
+ */
471
+ /* Prepend the leftover literals from the last call */
472
+ if (prevSize < sequences->size) {
473
+ sequences->seq[prevSize].litLength += (U32)leftoverSize;
474
+ leftoverSize = newLeftoverSize;
531
475
  } else {
532
- rollingHash = ZSTD_ldm_getRollingHash(ip, ldmParams.minMatchLength);
476
+ assert(newLeftoverSize == chunkSize);
477
+ leftoverSize += chunkSize;
533
478
  }
534
- lastHashed = ip;
479
+ }
480
+ return 0;
481
+ }
535
482
 
536
- if (ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog) !=
537
- ldmTagMask) {
538
- /* Don't insert and don't look for a match */
539
- ip++;
540
- continue;
483
+ void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) {
484
+ while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) {
485
+ rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos;
486
+ if (srcSize <= seq->litLength) {
487
+ /* Skip past srcSize literals */
488
+ seq->litLength -= (U32)srcSize;
489
+ return;
541
490
  }
542
-
543
- /* Get the best entry and compute the match lengths */
544
- {
545
- ldmEntry_t* const bucket =
546
- ZSTD_ldm_getBucket(ldmState,
547
- ZSTD_ldm_getSmallHash(rollingHash, hBits),
548
- ldmParams);
549
- ldmEntry_t* cur;
550
- size_t bestMatchLength = 0;
551
- U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
552
-
553
- for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) {
554
- const BYTE* const curMatchBase =
555
- cur->offset < dictLimit ? dictBase : base;
556
- const BYTE* const pMatch = curMatchBase + cur->offset;
557
- const BYTE* const matchEnd =
558
- cur->offset < dictLimit ? dictEnd : iend;
559
- const BYTE* const lowMatchPtr =
560
- cur->offset < dictLimit ? dictStart : lowPrefixPtr;
561
- size_t curForwardMatchLength, curBackwardMatchLength,
562
- curTotalMatchLength;
563
-
564
- if (cur->checksum != checksum || cur->offset <= lowestIndex) {
565
- continue;
566
- }
567
-
568
- curForwardMatchLength = ZSTD_count_2segments(
569
- ip, pMatch, iend,
570
- matchEnd, lowPrefixPtr);
571
- if (curForwardMatchLength < ldmParams.minMatchLength) {
572
- continue;
573
- }
574
- curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch(
575
- ip, anchor, pMatch, lowMatchPtr);
576
- curTotalMatchLength = curForwardMatchLength +
577
- curBackwardMatchLength;
578
-
579
- if (curTotalMatchLength > bestMatchLength) {
580
- bestMatchLength = curTotalMatchLength;
581
- forwardMatchLength = curForwardMatchLength;
582
- backwardMatchLength = curBackwardMatchLength;
583
- bestEntry = cur;
491
+ srcSize -= seq->litLength;
492
+ seq->litLength = 0;
493
+ if (srcSize < seq->matchLength) {
494
+ /* Skip past the first srcSize of the match */
495
+ seq->matchLength -= (U32)srcSize;
496
+ if (seq->matchLength < minMatch) {
497
+ /* The match is too short, omit it */
498
+ if (rawSeqStore->pos + 1 < rawSeqStore->size) {
499
+ seq[1].litLength += seq[0].matchLength;
584
500
  }
501
+ rawSeqStore->pos++;
585
502
  }
503
+ return;
586
504
  }
505
+ srcSize -= seq->matchLength;
506
+ seq->matchLength = 0;
507
+ rawSeqStore->pos++;
508
+ }
509
+ }
587
510
 
588
- /* No match found -- continue searching */
589
- if (bestEntry == NULL) {
590
- ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
591
- (U32)(lastHashed - base),
592
- ldmParams);
593
- ip++;
594
- continue;
511
+ /**
512
+ * If the sequence length is longer than remaining then the sequence is split
513
+ * between this block and the next.
514
+ *
515
+ * Returns the current sequence to handle, or if the rest of the block should
516
+ * be literals, it returns a sequence with offset == 0.
517
+ */
518
+ static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
519
+ U32 const remaining, U32 const minMatch)
520
+ {
521
+ rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos];
522
+ assert(sequence.offset > 0);
523
+ /* Likely: No partial sequence */
524
+ if (remaining >= sequence.litLength + sequence.matchLength) {
525
+ rawSeqStore->pos++;
526
+ return sequence;
527
+ }
528
+ /* Cut the sequence short (offset == 0 ==> rest is literals). */
529
+ if (remaining <= sequence.litLength) {
530
+ sequence.offset = 0;
531
+ } else if (remaining < sequence.litLength + sequence.matchLength) {
532
+ sequence.matchLength = remaining - sequence.litLength;
533
+ if (sequence.matchLength < minMatch) {
534
+ sequence.offset = 0;
595
535
  }
536
+ }
537
+ /* Skip past `remaining` bytes for the future sequences. */
538
+ ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch);
539
+ return sequence;
540
+ }
596
541
 
597
- /* Match found */
598
- mLength = forwardMatchLength + backwardMatchLength;
599
- ip -= backwardMatchLength;
600
-
601
- /* Call the block compressor on the remaining literals */
602
- {
603
- /* ip = current - backwardMatchLength
604
- * The match is at (bestEntry->offset - backwardMatchLength) */
605
- U32 const matchIndex = bestEntry->offset;
606
- U32 const offset = current - matchIndex;
607
-
608
- /* Overwrite rep codes */
609
- for (i = 0; i < ZSTD_REP_NUM; i++)
610
- seqStorePtr->rep[i] = repToConfirm[i];
611
-
612
- /* Fill the hash table for the block compressor */
613
- ZSTD_ldm_limitTableUpdate(ctx, anchor);
614
- ZSTD_ldm_fillFastTables(ctx, anchor);
542
+ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
543
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
544
+ void const* src, size_t srcSize)
545
+ {
546
+ const ZSTD_compressionParameters* const cParams = &ms->cParams;
547
+ unsigned const minMatch = cParams->minMatch;
548
+ ZSTD_blockCompressor const blockCompressor =
549
+ ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms));
550
+ /* Input bounds */
551
+ BYTE const* const istart = (BYTE const*)src;
552
+ BYTE const* const iend = istart + srcSize;
553
+ /* Input positions */
554
+ BYTE const* ip = istart;
555
+
556
+ DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize);
557
+ assert(rawSeqStore->pos <= rawSeqStore->size);
558
+ assert(rawSeqStore->size <= rawSeqStore->capacity);
559
+ /* Loop through each sequence and apply the block compressor to the lits */
560
+ while (rawSeqStore->pos < rawSeqStore->size && ip < iend) {
561
+ /* maybeSplitSequence updates rawSeqStore->pos */
562
+ rawSeq const sequence = maybeSplitSequence(rawSeqStore,
563
+ (U32)(iend - ip), minMatch);
564
+ int i;
565
+ /* End signal */
566
+ if (sequence.offset == 0)
567
+ break;
615
568
 
616
- /* Call block compressor and get remaining literals */
617
- lastLiterals = blockCompressor(ctx, anchor, ip - anchor);
618
- ctx->nextToUpdate = (U32)(ip - base);
569
+ assert(sequence.offset <= (1U << cParams->windowLog));
570
+ assert(ip + sequence.litLength + sequence.matchLength <= iend);
619
571
 
620
- /* Update repToConfirm with the new offset */
572
+ /* Fill tables for block compressor */
573
+ ZSTD_ldm_limitTableUpdate(ms, ip);
574
+ ZSTD_ldm_fillFastTables(ms, ip);
575
+ /* Run the block compressor */
576
+ DEBUGLOG(5, "calling block compressor on segment of size %u", sequence.litLength);
577
+ {
578
+ size_t const newLitLength =
579
+ blockCompressor(ms, seqStore, rep, ip, sequence.litLength);
580
+ ip += sequence.litLength;
581
+ /* Update the repcodes */
621
582
  for (i = ZSTD_REP_NUM - 1; i > 0; i--)
622
- repToConfirm[i] = repToConfirm[i-1];
623
- repToConfirm[0] = offset;
624
-
625
- /* Store the sequence with the leftover literals */
626
- ZSTD_storeSeq(seqStorePtr, lastLiterals, ip - lastLiterals,
627
- offset + ZSTD_REP_MOVE, mLength - MINMATCH);
628
- }
629
-
630
- /* Insert the current entry into the hash table */
631
- ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
632
- (U32)(lastHashed - base),
633
- ldmParams);
634
-
635
- /* Fill the hash table from lastHashed+1 to ip+mLength */
636
- assert(ip + backwardMatchLength == lastHashed);
637
- if (ip + mLength < ilimit) {
638
- rollingHash = ZSTD_ldm_fillLdmHashTable(
639
- ldmState, rollingHash, lastHashed,
640
- ip + mLength, base, hBits,
641
- ldmParams);
642
- lastHashed = ip + mLength - 1;
643
- }
644
- ip += mLength;
645
- anchor = ip;
646
-
647
- /* check immediate repcode */
648
- while (ip < ilimit) {
649
- U32 const current2 = (U32)(ip-base);
650
- U32 const repIndex2 = current2 - repToConfirm[1];
651
- const BYTE* repMatch2 = repIndex2 < dictLimit ?
652
- dictBase + repIndex2 : base + repIndex2;
653
- if ( (((U32)((dictLimit-1) - repIndex2) >= 3) &
654
- (repIndex2 > lowestIndex)) /* intentional overflow */
655
- && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
656
- const BYTE* const repEnd2 = repIndex2 < dictLimit ?
657
- dictEnd : iend;
658
- size_t const repLength2 =
659
- ZSTD_count_2segments(ip+4, repMatch2+4, iend,
660
- repEnd2, lowPrefixPtr) + 4;
661
-
662
- U32 tmpOffset = repToConfirm[1];
663
- repToConfirm[1] = repToConfirm[0];
664
- repToConfirm[0] = tmpOffset;
665
-
666
- ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH);
667
-
668
- /* Fill the hash table from lastHashed+1 to ip+repLength2*/
669
- if (ip + repLength2 < ilimit) {
670
- rollingHash = ZSTD_ldm_fillLdmHashTable(
671
- ldmState, rollingHash, lastHashed,
672
- ip + repLength2, base, hBits,
673
- ldmParams);
674
- lastHashed = ip + repLength2 - 1;
675
- }
676
- ip += repLength2;
677
- anchor = ip;
678
- continue;
679
- }
680
- break;
583
+ rep[i] = rep[i-1];
584
+ rep[0] = sequence.offset;
585
+ /* Store the sequence */
586
+ ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength,
587
+ sequence.offset + ZSTD_REP_MOVE,
588
+ sequence.matchLength - MINMATCH);
589
+ ip += sequence.matchLength;
681
590
  }
682
591
  }
683
-
684
- /* Overwrite rep */
685
- for (i = 0; i < ZSTD_REP_NUM; i++)
686
- seqStorePtr->rep[i] = repToConfirm[i];
687
-
688
- ZSTD_ldm_limitTableUpdate(ctx, anchor);
689
- ZSTD_ldm_fillFastTables(ctx, anchor);
690
-
691
- /* Call the block compressor one last time on the last literals */
692
- lastLiterals = blockCompressor(ctx, anchor, iend - anchor);
693
- ctx->nextToUpdate = (U32)(iend - base);
694
-
695
- /* Restore seqStorePtr->rep */
696
- for (i = 0; i < ZSTD_REP_NUM; i++)
697
- seqStorePtr->rep[i] = savedRep[i];
698
-
699
- /* Return the last literals size */
700
- return lastLiterals;
701
- }
702
-
703
- size_t ZSTD_compressBlock_ldm_extDict(ZSTD_CCtx* ctx,
704
- const void* src, size_t srcSize)
705
- {
706
- return ZSTD_compressBlock_ldm_extDict_generic(ctx, src, srcSize);
592
+ /* Fill the tables for the block compressor */
593
+ ZSTD_ldm_limitTableUpdate(ms, ip);
594
+ ZSTD_ldm_fillFastTables(ms, ip);
595
+ /* Compress the last literals */
596
+ return blockCompressor(ms, seqStore, rep, ip, iend - ip);
707
597
  }