extzstd 0.2 → 0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (88) hide show
  1. checksums.yaml +4 -4
  2. data/HISTORY.ja.md +13 -0
  3. data/README.md +17 -14
  4. data/contrib/zstd/{NEWS → CHANGELOG} +115 -2
  5. data/contrib/zstd/CODE_OF_CONDUCT.md +5 -0
  6. data/contrib/zstd/Makefile +99 -53
  7. data/contrib/zstd/README.md +59 -39
  8. data/contrib/zstd/TESTING.md +1 -1
  9. data/contrib/zstd/appveyor.yml +17 -6
  10. data/contrib/zstd/lib/BUCK +29 -2
  11. data/contrib/zstd/lib/Makefile +118 -21
  12. data/contrib/zstd/lib/README.md +84 -44
  13. data/contrib/zstd/lib/common/bitstream.h +17 -33
  14. data/contrib/zstd/lib/common/compiler.h +62 -8
  15. data/contrib/zstd/lib/common/cpu.h +215 -0
  16. data/contrib/zstd/lib/common/debug.c +44 -0
  17. data/contrib/zstd/lib/common/debug.h +134 -0
  18. data/contrib/zstd/lib/common/entropy_common.c +16 -1
  19. data/contrib/zstd/lib/common/error_private.c +7 -0
  20. data/contrib/zstd/lib/common/fse.h +48 -44
  21. data/contrib/zstd/lib/common/fse_decompress.c +3 -3
  22. data/contrib/zstd/lib/common/huf.h +169 -113
  23. data/contrib/zstd/lib/common/mem.h +20 -2
  24. data/contrib/zstd/lib/common/pool.c +135 -49
  25. data/contrib/zstd/lib/common/pool.h +40 -21
  26. data/contrib/zstd/lib/common/threading.c +2 -2
  27. data/contrib/zstd/lib/common/threading.h +12 -12
  28. data/contrib/zstd/lib/common/xxhash.c +3 -2
  29. data/contrib/zstd/lib/common/zstd_common.c +3 -6
  30. data/contrib/zstd/lib/common/zstd_errors.h +17 -7
  31. data/contrib/zstd/lib/common/zstd_internal.h +76 -48
  32. data/contrib/zstd/lib/compress/fse_compress.c +89 -209
  33. data/contrib/zstd/lib/compress/hist.c +203 -0
  34. data/contrib/zstd/lib/compress/hist.h +95 -0
  35. data/contrib/zstd/lib/compress/huf_compress.c +188 -80
  36. data/contrib/zstd/lib/compress/zstd_compress.c +2500 -1203
  37. data/contrib/zstd/lib/compress/zstd_compress_internal.h +463 -62
  38. data/contrib/zstd/lib/compress/zstd_double_fast.c +321 -131
  39. data/contrib/zstd/lib/compress/zstd_double_fast.h +13 -4
  40. data/contrib/zstd/lib/compress/zstd_fast.c +335 -108
  41. data/contrib/zstd/lib/compress/zstd_fast.h +12 -6
  42. data/contrib/zstd/lib/compress/zstd_lazy.c +654 -313
  43. data/contrib/zstd/lib/compress/zstd_lazy.h +44 -16
  44. data/contrib/zstd/lib/compress/zstd_ldm.c +310 -420
  45. data/contrib/zstd/lib/compress/zstd_ldm.h +63 -26
  46. data/contrib/zstd/lib/compress/zstd_opt.c +773 -325
  47. data/contrib/zstd/lib/compress/zstd_opt.h +31 -5
  48. data/contrib/zstd/lib/compress/zstdmt_compress.c +1468 -518
  49. data/contrib/zstd/lib/compress/zstdmt_compress.h +96 -45
  50. data/contrib/zstd/lib/decompress/huf_decompress.c +518 -282
  51. data/contrib/zstd/lib/decompress/zstd_ddict.c +240 -0
  52. data/contrib/zstd/lib/decompress/zstd_ddict.h +44 -0
  53. data/contrib/zstd/lib/decompress/zstd_decompress.c +613 -1513
  54. data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1311 -0
  55. data/contrib/zstd/lib/decompress/zstd_decompress_block.h +59 -0
  56. data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +175 -0
  57. data/contrib/zstd/lib/dictBuilder/cover.c +194 -113
  58. data/contrib/zstd/lib/dictBuilder/cover.h +112 -0
  59. data/contrib/zstd/lib/dictBuilder/divsufsort.c +3 -3
  60. data/contrib/zstd/lib/dictBuilder/fastcover.c +740 -0
  61. data/contrib/zstd/lib/dictBuilder/zdict.c +142 -106
  62. data/contrib/zstd/lib/dictBuilder/zdict.h +115 -49
  63. data/contrib/zstd/lib/legacy/zstd_legacy.h +44 -12
  64. data/contrib/zstd/lib/legacy/zstd_v01.c +41 -10
  65. data/contrib/zstd/lib/legacy/zstd_v01.h +12 -7
  66. data/contrib/zstd/lib/legacy/zstd_v02.c +37 -12
  67. data/contrib/zstd/lib/legacy/zstd_v02.h +12 -7
  68. data/contrib/zstd/lib/legacy/zstd_v03.c +38 -12
  69. data/contrib/zstd/lib/legacy/zstd_v03.h +12 -7
  70. data/contrib/zstd/lib/legacy/zstd_v04.c +55 -174
  71. data/contrib/zstd/lib/legacy/zstd_v04.h +12 -7
  72. data/contrib/zstd/lib/legacy/zstd_v05.c +59 -31
  73. data/contrib/zstd/lib/legacy/zstd_v05.h +12 -7
  74. data/contrib/zstd/lib/legacy/zstd_v06.c +48 -20
  75. data/contrib/zstd/lib/legacy/zstd_v06.h +10 -5
  76. data/contrib/zstd/lib/legacy/zstd_v07.c +62 -29
  77. data/contrib/zstd/lib/legacy/zstd_v07.h +10 -5
  78. data/contrib/zstd/lib/zstd.h +1346 -832
  79. data/ext/extzstd.c +27 -19
  80. data/ext/extzstd_stream.c +20 -4
  81. data/ext/zstd_compress.c +1 -0
  82. data/ext/zstd_decompress.c +4 -0
  83. data/ext/zstd_dictbuilder.c +4 -0
  84. data/ext/zstd_dictbuilder_fastcover.c +5 -0
  85. data/lib/extzstd.rb +52 -220
  86. data/lib/extzstd/version.rb +1 -1
  87. metadata +21 -7
  88. data/contrib/zstd/circle.yml +0 -63
@@ -15,22 +15,50 @@
15
15
  extern "C" {
16
16
  #endif
17
17
 
18
- #include "mem.h" /* U32 */
19
- #include "zstd.h" /* ZSTD_CCtx, size_t */
20
-
21
- U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls);
22
- void ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls);
23
- void ZSTD_updateTree_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls);
24
-
25
- size_t ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
26
- size_t ZSTD_compressBlock_lazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
27
- size_t ZSTD_compressBlock_lazy(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
28
- size_t ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
29
-
30
- size_t ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
31
- size_t ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
32
- size_t ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
33
- size_t ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
18
+ #include "zstd_compress_internal.h"
19
+
20
+ U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
21
+
22
+ void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */
23
+
24
+ size_t ZSTD_compressBlock_btlazy2(
25
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
26
+ void const* src, size_t srcSize);
27
+ size_t ZSTD_compressBlock_lazy2(
28
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
29
+ void const* src, size_t srcSize);
30
+ size_t ZSTD_compressBlock_lazy(
31
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
32
+ void const* src, size_t srcSize);
33
+ size_t ZSTD_compressBlock_greedy(
34
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
35
+ void const* src, size_t srcSize);
36
+
37
+ size_t ZSTD_compressBlock_btlazy2_dictMatchState(
38
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
39
+ void const* src, size_t srcSize);
40
+ size_t ZSTD_compressBlock_lazy2_dictMatchState(
41
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
42
+ void const* src, size_t srcSize);
43
+ size_t ZSTD_compressBlock_lazy_dictMatchState(
44
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
45
+ void const* src, size_t srcSize);
46
+ size_t ZSTD_compressBlock_greedy_dictMatchState(
47
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
48
+ void const* src, size_t srcSize);
49
+
50
+ size_t ZSTD_compressBlock_greedy_extDict(
51
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
52
+ void const* src, size_t srcSize);
53
+ size_t ZSTD_compressBlock_lazy_extDict(
54
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
55
+ void const* src, size_t srcSize);
56
+ size_t ZSTD_compressBlock_lazy2_extDict(
57
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
58
+ void const* src, size_t srcSize);
59
+ size_t ZSTD_compressBlock_btlazy2_extDict(
60
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
61
+ void const* src, size_t srcSize);
34
62
 
35
63
  #if defined (__cplusplus)
36
64
  }
@@ -9,6 +9,7 @@
9
9
 
10
10
  #include "zstd_ldm.h"
11
11
 
12
+ #include "debug.h"
12
13
  #include "zstd_fast.h" /* ZSTD_fillHashTable() */
13
14
  #include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */
14
15
 
@@ -17,36 +18,46 @@
17
18
  #define LDM_HASH_RLOG 7
18
19
  #define LDM_HASH_CHAR_OFFSET 10
19
20
 
20
- size_t ZSTD_ldm_initializeParameters(ldmParams_t* params, U32 enableLdm)
21
+ void ZSTD_ldm_adjustParameters(ldmParams_t* params,
22
+ ZSTD_compressionParameters const* cParams)
21
23
  {
24
+ params->windowLog = cParams->windowLog;
22
25
  ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX);
23
- params->enableLdm = enableLdm>0;
24
- params->hashLog = 0;
25
- params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
26
- params->minMatchLength = LDM_MIN_MATCH_LENGTH;
27
- params->hashEveryLog = ZSTD_LDM_HASHEVERYLOG_NOTSET;
28
- return 0;
29
- }
30
-
31
- void ZSTD_ldm_adjustParameters(ldmParams_t* params, U32 windowLog)
32
- {
26
+ DEBUGLOG(4, "ZSTD_ldm_adjustParameters");
27
+ if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
28
+ if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH;
29
+ if (cParams->strategy >= ZSTD_btopt) {
30
+ /* Get out of the way of the optimal parser */
31
+ U32 const minMatch = MAX(cParams->targetLength, params->minMatchLength);
32
+ assert(minMatch >= ZSTD_LDM_MINMATCH_MIN);
33
+ assert(minMatch <= ZSTD_LDM_MINMATCH_MAX);
34
+ params->minMatchLength = minMatch;
35
+ }
33
36
  if (params->hashLog == 0) {
34
- params->hashLog = MAX(ZSTD_HASHLOG_MIN, windowLog - LDM_HASH_RLOG);
37
+ params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG);
35
38
  assert(params->hashLog <= ZSTD_HASHLOG_MAX);
36
39
  }
37
- if (params->hashEveryLog == ZSTD_LDM_HASHEVERYLOG_NOTSET) {
38
- params->hashEveryLog =
39
- windowLog < params->hashLog ? 0 : windowLog - params->hashLog;
40
+ if (params->hashRateLog == 0) {
41
+ params->hashRateLog = params->windowLog < params->hashLog
42
+ ? 0
43
+ : params->windowLog - params->hashLog;
40
44
  }
41
45
  params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog);
42
46
  }
43
47
 
44
- size_t ZSTD_ldm_getTableSize(U32 hashLog, U32 bucketSizeLog) {
45
- size_t const ldmHSize = ((size_t)1) << hashLog;
46
- size_t const ldmBucketSizeLog = MIN(bucketSizeLog, hashLog);
48
+ size_t ZSTD_ldm_getTableSize(ldmParams_t params)
49
+ {
50
+ size_t const ldmHSize = ((size_t)1) << params.hashLog;
51
+ size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog);
47
52
  size_t const ldmBucketSize =
48
- ((size_t)1) << (hashLog - ldmBucketSizeLog);
49
- return ldmBucketSize + (ldmHSize * (sizeof(ldmEntry_t)));
53
+ ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
54
+ size_t const totalSize = ldmBucketSize + ldmHSize * sizeof(ldmEntry_t);
55
+ return params.enableLdm ? totalSize : 0;
56
+ }
57
+
58
+ size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize)
59
+ {
60
+ return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0;
50
61
  }
51
62
 
52
63
  /** ZSTD_ldm_getSmallHash() :
@@ -108,20 +119,20 @@ static void ZSTD_ldm_insertEntry(ldmState_t* ldmState,
108
119
  *
109
120
  * Gets the small hash, checksum, and tag from the rollingHash.
110
121
  *
111
- * If the tag matches (1 << ldmParams.hashEveryLog)-1, then
122
+ * If the tag matches (1 << ldmParams.hashRateLog)-1, then
112
123
  * creates an ldmEntry from the offset, and inserts it into the hash table.
113
124
  *
114
125
  * hBits is the length of the small hash, which is the most significant hBits
115
126
  * of rollingHash. The checksum is the next 32 most significant bits, followed
116
- * by ldmParams.hashEveryLog bits that make up the tag. */
127
+ * by ldmParams.hashRateLog bits that make up the tag. */
117
128
  static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
118
129
  U64 const rollingHash,
119
130
  U32 const hBits,
120
131
  U32 const offset,
121
132
  ldmParams_t const ldmParams)
122
133
  {
123
- U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog);
124
- U32 const tagMask = ((U32)1 << ldmParams.hashEveryLog) - 1;
134
+ U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashRateLog);
135
+ U32 const tagMask = ((U32)1 << ldmParams.hashRateLog) - 1;
125
136
  if (tag == tagMask) {
126
137
  U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits);
127
138
  U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
@@ -132,55 +143,6 @@ static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
132
143
  }
133
144
  }
134
145
 
135
- /** ZSTD_ldm_getRollingHash() :
136
- * Get a 64-bit hash using the first len bytes from buf.
137
- *
138
- * Giving bytes s = s_1, s_2, ... s_k, the hash is defined to be
139
- * H(s) = s_1*(a^(k-1)) + s_2*(a^(k-2)) + ... + s_k*(a^0)
140
- *
141
- * where the constant a is defined to be prime8bytes.
142
- *
143
- * The implementation adds an offset to each byte, so
144
- * H(s) = (s_1 + HASH_CHAR_OFFSET)*(a^(k-1)) + ... */
145
- static U64 ZSTD_ldm_getRollingHash(const BYTE* buf, U32 len)
146
- {
147
- U64 ret = 0;
148
- U32 i;
149
- for (i = 0; i < len; i++) {
150
- ret *= prime8bytes;
151
- ret += buf[i] + LDM_HASH_CHAR_OFFSET;
152
- }
153
- return ret;
154
- }
155
-
156
- /** ZSTD_ldm_ipow() :
157
- * Return base^exp. */
158
- static U64 ZSTD_ldm_ipow(U64 base, U64 exp)
159
- {
160
- U64 ret = 1;
161
- while (exp) {
162
- if (exp & 1) { ret *= base; }
163
- exp >>= 1;
164
- base *= base;
165
- }
166
- return ret;
167
- }
168
-
169
- U64 ZSTD_ldm_getHashPower(U32 minMatchLength) {
170
- assert(minMatchLength >= ZSTD_LDM_MINMATCH_MIN);
171
- return ZSTD_ldm_ipow(prime8bytes, minMatchLength - 1);
172
- }
173
-
174
- /** ZSTD_ldm_updateHash() :
175
- * Updates hash by removing toRemove and adding toAdd. */
176
- static U64 ZSTD_ldm_updateHash(U64 hash, BYTE toRemove, BYTE toAdd, U64 hashPower)
177
- {
178
- hash -= ((toRemove + LDM_HASH_CHAR_OFFSET) * hashPower);
179
- hash *= prime8bytes;
180
- hash += toAdd + LDM_HASH_CHAR_OFFSET;
181
- return hash;
182
- }
183
-
184
146
  /** ZSTD_ldm_countBackwardsMatch() :
185
147
  * Returns the number of bytes that match backwards before pIn and pMatch.
186
148
  *
@@ -205,21 +167,19 @@ static size_t ZSTD_ldm_countBackwardsMatch(
205
167
  *
206
168
  * The tables for the other strategies are filled within their
207
169
  * block compressors. */
208
- static size_t ZSTD_ldm_fillFastTables(ZSTD_CCtx* zc, const void* end)
170
+ static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
171
+ void const* end)
209
172
  {
210
173
  const BYTE* const iend = (const BYTE*)end;
211
- const U32 mls = zc->appliedParams.cParams.searchLength;
212
174
 
213
- switch(zc->appliedParams.cParams.strategy)
175
+ switch(ms->cParams.strategy)
214
176
  {
215
177
  case ZSTD_fast:
216
- ZSTD_fillHashTable(zc, iend, mls);
217
- zc->nextToUpdate = (U32)(iend - zc->base);
178
+ ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast);
218
179
  break;
219
180
 
220
181
  case ZSTD_dfast:
221
- ZSTD_fillDoubleHashTable(zc, iend, mls);
222
- zc->nextToUpdate = (U32)(iend - zc->base);
182
+ ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast);
223
183
  break;
224
184
 
225
185
  case ZSTD_greedy:
@@ -228,6 +188,7 @@ static size_t ZSTD_ldm_fillFastTables(ZSTD_CCtx* zc, const void* end)
228
188
  case ZSTD_btlazy2:
229
189
  case ZSTD_btopt:
230
190
  case ZSTD_btultra:
191
+ case ZSTD_btultra2:
231
192
  break;
232
193
  default:
233
194
  assert(0); /* not possible : not a valid strategy id */
@@ -251,9 +212,9 @@ static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
251
212
  const BYTE* cur = lastHashed + 1;
252
213
 
253
214
  while (cur < iend) {
254
- rollingHash = ZSTD_ldm_updateHash(rollingHash, cur[-1],
255
- cur[ldmParams.minMatchLength-1],
256
- state->hashPower);
215
+ rollingHash = ZSTD_rollingHash_rotate(rollingHash, cur[-1],
216
+ cur[ldmParams.minMatchLength-1],
217
+ state->hashPower);
257
218
  ZSTD_ldm_makeEntryAndInsertByTag(state,
258
219
  rollingHash, hBits,
259
220
  (U32)(cur - base), ldmParams);
@@ -268,69 +229,62 @@ static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
268
229
  * Sets cctx->nextToUpdate to a position corresponding closer to anchor
269
230
  * if it is far way
270
231
  * (after a long match, only update tables a limited amount). */
271
- static void ZSTD_ldm_limitTableUpdate(ZSTD_CCtx* cctx, const BYTE* anchor)
232
+ static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor)
272
233
  {
273
- U32 const current = (U32)(anchor - cctx->base);
274
- if (current > cctx->nextToUpdate + 1024) {
275
- cctx->nextToUpdate =
276
- current - MIN(512, current - cctx->nextToUpdate - 1024);
234
+ U32 const current = (U32)(anchor - ms->window.base);
235
+ if (current > ms->nextToUpdate + 1024) {
236
+ ms->nextToUpdate =
237
+ current - MIN(512, current - ms->nextToUpdate - 1024);
277
238
  }
278
239
  }
279
240
 
280
- typedef size_t (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t srcSize);
281
- /* defined in zstd_compress.c */
282
- ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict);
283
-
284
- FORCE_INLINE_TEMPLATE
285
- size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx,
286
- const void* src, size_t srcSize)
241
+ static size_t ZSTD_ldm_generateSequences_internal(
242
+ ldmState_t* ldmState, rawSeqStore_t* rawSeqStore,
243
+ ldmParams_t const* params, void const* src, size_t srcSize)
287
244
  {
288
- ldmState_t* const ldmState = &(cctx->ldmState);
289
- const ldmParams_t ldmParams = cctx->appliedParams.ldmParams;
290
- const U64 hashPower = ldmState->hashPower;
291
- const U32 hBits = ldmParams.hashLog - ldmParams.bucketSizeLog;
292
- const U32 ldmBucketSize = ((U32)1 << ldmParams.bucketSizeLog);
293
- const U32 ldmTagMask = ((U32)1 << ldmParams.hashEveryLog) - 1;
294
- seqStore_t* const seqStorePtr = &(cctx->seqStore);
295
- const BYTE* const base = cctx->base;
296
- const BYTE* const istart = (const BYTE*)src;
297
- const BYTE* ip = istart;
298
- const BYTE* anchor = istart;
299
- const U32 lowestIndex = cctx->dictLimit;
300
- const BYTE* const lowest = base + lowestIndex;
301
- const BYTE* const iend = istart + srcSize;
302
- const BYTE* const ilimit = iend - MAX(ldmParams.minMatchLength, HASH_READ_SIZE);
303
-
304
- const ZSTD_blockCompressor blockCompressor =
305
- ZSTD_selectBlockCompressor(cctx->appliedParams.cParams.strategy, 0);
306
- U32* const repToConfirm = seqStorePtr->repToConfirm;
307
- U32 savedRep[ZSTD_REP_NUM];
245
+ /* LDM parameters */
246
+ int const extDict = ZSTD_window_hasExtDict(ldmState->window);
247
+ U32 const minMatchLength = params->minMatchLength;
248
+ U64 const hashPower = ldmState->hashPower;
249
+ U32 const hBits = params->hashLog - params->bucketSizeLog;
250
+ U32 const ldmBucketSize = 1U << params->bucketSizeLog;
251
+ U32 const hashRateLog = params->hashRateLog;
252
+ U32 const ldmTagMask = (1U << params->hashRateLog) - 1;
253
+ /* Prefix and extDict parameters */
254
+ U32 const dictLimit = ldmState->window.dictLimit;
255
+ U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit;
256
+ BYTE const* const base = ldmState->window.base;
257
+ BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL;
258
+ BYTE const* const dictStart = extDict ? dictBase + lowestIndex : NULL;
259
+ BYTE const* const dictEnd = extDict ? dictBase + dictLimit : NULL;
260
+ BYTE const* const lowPrefixPtr = base + dictLimit;
261
+ /* Input bounds */
262
+ BYTE const* const istart = (BYTE const*)src;
263
+ BYTE const* const iend = istart + srcSize;
264
+ BYTE const* const ilimit = iend - MAX(minMatchLength, HASH_READ_SIZE);
265
+ /* Input positions */
266
+ BYTE const* anchor = istart;
267
+ BYTE const* ip = istart;
268
+ /* Rolling hash */
269
+ BYTE const* lastHashed = NULL;
308
270
  U64 rollingHash = 0;
309
- const BYTE* lastHashed = NULL;
310
- size_t i, lastLiterals;
311
-
312
- /* Save seqStorePtr->rep and copy repToConfirm */
313
- for (i = 0; i < ZSTD_REP_NUM; i++)
314
- savedRep[i] = repToConfirm[i] = seqStorePtr->rep[i];
315
271
 
316
- /* Main Search Loop */
317
- while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
272
+ while (ip <= ilimit) {
318
273
  size_t mLength;
319
274
  U32 const current = (U32)(ip - base);
320
275
  size_t forwardMatchLength = 0, backwardMatchLength = 0;
321
276
  ldmEntry_t* bestEntry = NULL;
322
277
  if (ip != istart) {
323
- rollingHash = ZSTD_ldm_updateHash(rollingHash, lastHashed[0],
324
- lastHashed[ldmParams.minMatchLength],
325
- hashPower);
278
+ rollingHash = ZSTD_rollingHash_rotate(rollingHash, lastHashed[0],
279
+ lastHashed[minMatchLength],
280
+ hashPower);
326
281
  } else {
327
- rollingHash = ZSTD_ldm_getRollingHash(ip, ldmParams.minMatchLength);
282
+ rollingHash = ZSTD_rollingHash_compute(ip, minMatchLength);
328
283
  }
329
284
  lastHashed = ip;
330
285
 
331
286
  /* Do not insert and do not look for a match */
332
- if (ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog) !=
333
- ldmTagMask) {
287
+ if (ZSTD_ldm_getTag(rollingHash, hBits, hashRateLog) != ldmTagMask) {
334
288
  ip++;
335
289
  continue;
336
290
  }
@@ -340,27 +294,49 @@ size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx,
340
294
  ldmEntry_t* const bucket =
341
295
  ZSTD_ldm_getBucket(ldmState,
342
296
  ZSTD_ldm_getSmallHash(rollingHash, hBits),
343
- ldmParams);
297
+ *params);
344
298
  ldmEntry_t* cur;
345
299
  size_t bestMatchLength = 0;
346
300
  U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
347
301
 
348
302
  for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) {
349
- const BYTE* const pMatch = cur->offset + base;
350
303
  size_t curForwardMatchLength, curBackwardMatchLength,
351
304
  curTotalMatchLength;
352
305
  if (cur->checksum != checksum || cur->offset <= lowestIndex) {
353
306
  continue;
354
307
  }
355
-
356
- curForwardMatchLength = ZSTD_count(ip, pMatch, iend);
357
- if (curForwardMatchLength < ldmParams.minMatchLength) {
358
- continue;
308
+ if (extDict) {
309
+ BYTE const* const curMatchBase =
310
+ cur->offset < dictLimit ? dictBase : base;
311
+ BYTE const* const pMatch = curMatchBase + cur->offset;
312
+ BYTE const* const matchEnd =
313
+ cur->offset < dictLimit ? dictEnd : iend;
314
+ BYTE const* const lowMatchPtr =
315
+ cur->offset < dictLimit ? dictStart : lowPrefixPtr;
316
+
317
+ curForwardMatchLength = ZSTD_count_2segments(
318
+ ip, pMatch, iend,
319
+ matchEnd, lowPrefixPtr);
320
+ if (curForwardMatchLength < minMatchLength) {
321
+ continue;
322
+ }
323
+ curBackwardMatchLength =
324
+ ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,
325
+ lowMatchPtr);
326
+ curTotalMatchLength = curForwardMatchLength +
327
+ curBackwardMatchLength;
328
+ } else { /* !extDict */
329
+ BYTE const* const pMatch = base + cur->offset;
330
+ curForwardMatchLength = ZSTD_count(ip, pMatch, iend);
331
+ if (curForwardMatchLength < minMatchLength) {
332
+ continue;
333
+ }
334
+ curBackwardMatchLength =
335
+ ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,
336
+ lowPrefixPtr);
337
+ curTotalMatchLength = curForwardMatchLength +
338
+ curBackwardMatchLength;
359
339
  }
360
- curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch(
361
- ip, anchor, pMatch, lowest);
362
- curTotalMatchLength = curForwardMatchLength +
363
- curBackwardMatchLength;
364
340
 
365
341
  if (curTotalMatchLength > bestMatchLength) {
366
342
  bestMatchLength = curTotalMatchLength;
@@ -375,7 +351,7 @@ size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx,
375
351
  if (bestEntry == NULL) {
376
352
  ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash,
377
353
  hBits, current,
378
- ldmParams);
354
+ *params);
379
355
  ip++;
380
356
  continue;
381
357
  }
@@ -384,324 +360,238 @@ size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx,
384
360
  mLength = forwardMatchLength + backwardMatchLength;
385
361
  ip -= backwardMatchLength;
386
362
 
387
- /* Call the block compressor on the remaining literals */
388
363
  {
364
+ /* Store the sequence:
365
+ * ip = current - backwardMatchLength
366
+ * The match is at (bestEntry->offset - backwardMatchLength)
367
+ */
389
368
  U32 const matchIndex = bestEntry->offset;
390
- const BYTE* const match = base + matchIndex - backwardMatchLength;
391
- U32 const offset = (U32)(ip - match);
392
-
393
- /* Overwrite rep codes */
394
- for (i = 0; i < ZSTD_REP_NUM; i++)
395
- seqStorePtr->rep[i] = repToConfirm[i];
396
-
397
- /* Fill tables for block compressor */
398
- ZSTD_ldm_limitTableUpdate(cctx, anchor);
399
- ZSTD_ldm_fillFastTables(cctx, anchor);
400
-
401
- /* Call block compressor and get remaining literals */
402
- lastLiterals = blockCompressor(cctx, anchor, ip - anchor);
403
- cctx->nextToUpdate = (U32)(ip - base);
404
-
405
- /* Update repToConfirm with the new offset */
406
- for (i = ZSTD_REP_NUM - 1; i > 0; i--)
407
- repToConfirm[i] = repToConfirm[i-1];
408
- repToConfirm[0] = offset;
409
-
410
- /* Store the sequence with the leftover literals */
411
- ZSTD_storeSeq(seqStorePtr, lastLiterals, ip - lastLiterals,
412
- offset + ZSTD_REP_MOVE, mLength - MINMATCH);
369
+ U32 const offset = current - matchIndex;
370
+ rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
371
+
372
+ /* Out of sequence storage */
373
+ if (rawSeqStore->size == rawSeqStore->capacity)
374
+ return ERROR(dstSize_tooSmall);
375
+ seq->litLength = (U32)(ip - anchor);
376
+ seq->matchLength = (U32)mLength;
377
+ seq->offset = offset;
378
+ rawSeqStore->size++;
413
379
  }
414
380
 
415
381
  /* Insert the current entry into the hash table */
416
382
  ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
417
383
  (U32)(lastHashed - base),
418
- ldmParams);
384
+ *params);
419
385
 
420
386
  assert(ip + backwardMatchLength == lastHashed);
421
387
 
422
388
  /* Fill the hash table from lastHashed+1 to ip+mLength*/
423
389
  /* Heuristic: don't need to fill the entire table at end of block */
424
- if (ip + mLength < ilimit) {
390
+ if (ip + mLength <= ilimit) {
425
391
  rollingHash = ZSTD_ldm_fillLdmHashTable(
426
392
  ldmState, rollingHash, lastHashed,
427
- ip + mLength, base, hBits, ldmParams);
393
+ ip + mLength, base, hBits, *params);
428
394
  lastHashed = ip + mLength - 1;
429
395
  }
430
396
  ip += mLength;
431
397
  anchor = ip;
432
- /* Check immediate repcode */
433
- while ( (ip < ilimit)
434
- && ( (repToConfirm[1] > 0) && (repToConfirm[1] <= (U32)(ip-lowest))
435
- && (MEM_read32(ip) == MEM_read32(ip - repToConfirm[1])) )) {
436
-
437
- size_t const rLength = ZSTD_count(ip+4, ip+4-repToConfirm[1],
438
- iend) + 4;
439
- /* Swap repToConfirm[1] <=> repToConfirm[0] */
440
- {
441
- U32 const tmpOff = repToConfirm[1];
442
- repToConfirm[1] = repToConfirm[0];
443
- repToConfirm[0] = tmpOff;
444
- }
445
-
446
- ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH);
447
-
448
- /* Fill the hash table from lastHashed+1 to ip+rLength*/
449
- if (ip + rLength < ilimit) {
450
- rollingHash = ZSTD_ldm_fillLdmHashTable(
451
- ldmState, rollingHash, lastHashed,
452
- ip + rLength, base, hBits, ldmParams);
453
- lastHashed = ip + rLength - 1;
454
- }
455
- ip += rLength;
456
- anchor = ip;
457
- }
458
398
  }
459
-
460
- /* Overwrite rep */
461
- for (i = 0; i < ZSTD_REP_NUM; i++)
462
- seqStorePtr->rep[i] = repToConfirm[i];
463
-
464
- ZSTD_ldm_limitTableUpdate(cctx, anchor);
465
- ZSTD_ldm_fillFastTables(cctx, anchor);
466
-
467
- lastLiterals = blockCompressor(cctx, anchor, iend - anchor);
468
- cctx->nextToUpdate = (U32)(iend - base);
469
-
470
- /* Restore seqStorePtr->rep */
471
- for (i = 0; i < ZSTD_REP_NUM; i++)
472
- seqStorePtr->rep[i] = savedRep[i];
473
-
474
- /* Return the last literals size */
475
- return lastLiterals;
399
+ return iend - anchor;
476
400
  }
477
401
 
478
- size_t ZSTD_compressBlock_ldm(ZSTD_CCtx* ctx,
479
- const void* src, size_t srcSize)
402
+ /*! ZSTD_ldm_reduceTable() :
403
+ * reduce table indexes by `reducerValue` */
404
+ static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size,
405
+ U32 const reducerValue)
480
406
  {
481
- return ZSTD_compressBlock_ldm_generic(ctx, src, srcSize);
407
+ U32 u;
408
+ for (u = 0; u < size; u++) {
409
+ if (table[u].offset < reducerValue) table[u].offset = 0;
410
+ else table[u].offset -= reducerValue;
411
+ }
482
412
  }
483
413
 
484
- static size_t ZSTD_compressBlock_ldm_extDict_generic(
485
- ZSTD_CCtx* ctx,
486
- const void* src, size_t srcSize)
414
+ size_t ZSTD_ldm_generateSequences(
415
+ ldmState_t* ldmState, rawSeqStore_t* sequences,
416
+ ldmParams_t const* params, void const* src, size_t srcSize)
487
417
  {
488
- ldmState_t* const ldmState = &(ctx->ldmState);
489
- const ldmParams_t ldmParams = ctx->appliedParams.ldmParams;
490
- const U64 hashPower = ldmState->hashPower;
491
- const U32 hBits = ldmParams.hashLog - ldmParams.bucketSizeLog;
492
- const U32 ldmBucketSize = ((U32)1 << ldmParams.bucketSizeLog);
493
- const U32 ldmTagMask = ((U32)1 << ldmParams.hashEveryLog) - 1;
494
- seqStore_t* const seqStorePtr = &(ctx->seqStore);
495
- const BYTE* const base = ctx->base;
496
- const BYTE* const dictBase = ctx->dictBase;
497
- const BYTE* const istart = (const BYTE*)src;
498
- const BYTE* ip = istart;
499
- const BYTE* anchor = istart;
500
- const U32 lowestIndex = ctx->lowLimit;
501
- const BYTE* const dictStart = dictBase + lowestIndex;
502
- const U32 dictLimit = ctx->dictLimit;
503
- const BYTE* const lowPrefixPtr = base + dictLimit;
504
- const BYTE* const dictEnd = dictBase + dictLimit;
505
- const BYTE* const iend = istart + srcSize;
506
- const BYTE* const ilimit = iend - MAX(ldmParams.minMatchLength, HASH_READ_SIZE);
507
-
508
- const ZSTD_blockCompressor blockCompressor =
509
- ZSTD_selectBlockCompressor(ctx->appliedParams.cParams.strategy, 1);
510
- U32* const repToConfirm = seqStorePtr->repToConfirm;
511
- U32 savedRep[ZSTD_REP_NUM];
512
- U64 rollingHash = 0;
513
- const BYTE* lastHashed = NULL;
514
- size_t i, lastLiterals;
515
-
516
- /* Save seqStorePtr->rep and copy repToConfirm */
517
- for (i = 0; i < ZSTD_REP_NUM; i++) {
518
- savedRep[i] = repToConfirm[i] = seqStorePtr->rep[i];
519
- }
520
-
521
- /* Search Loop */
522
- while (ip < ilimit) { /* < instead of <=, because (ip+1) */
523
- size_t mLength;
524
- const U32 current = (U32)(ip-base);
525
- size_t forwardMatchLength = 0, backwardMatchLength = 0;
526
- ldmEntry_t* bestEntry = NULL;
527
- if (ip != istart) {
528
- rollingHash = ZSTD_ldm_updateHash(rollingHash, lastHashed[0],
529
- lastHashed[ldmParams.minMatchLength],
530
- hashPower);
418
+ U32 const maxDist = 1U << params->windowLog;
419
+ BYTE const* const istart = (BYTE const*)src;
420
+ BYTE const* const iend = istart + srcSize;
421
+ size_t const kMaxChunkSize = 1 << 20;
422
+ size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0);
423
+ size_t chunk;
424
+ size_t leftoverSize = 0;
425
+
426
+ assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize);
427
+ /* Check that ZSTD_window_update() has been called for this chunk prior
428
+ * to passing it to this function.
429
+ */
430
+ assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize);
431
+ /* The input could be very large (in zstdmt), so it must be broken up into
432
+ * chunks to enforce the maximum distance and handle overflow correction.
433
+ */
434
+ assert(sequences->pos <= sequences->size);
435
+ assert(sequences->size <= sequences->capacity);
436
+ for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) {
437
+ BYTE const* const chunkStart = istart + chunk * kMaxChunkSize;
438
+ size_t const remaining = (size_t)(iend - chunkStart);
439
+ BYTE const *const chunkEnd =
440
+ (remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize;
441
+ size_t const chunkSize = chunkEnd - chunkStart;
442
+ size_t newLeftoverSize;
443
+ size_t const prevSize = sequences->size;
444
+
445
+ assert(chunkStart < iend);
446
+ /* 1. Perform overflow correction if necessary. */
447
+ if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
448
+ U32 const ldmHSize = 1U << params->hashLog;
449
+ U32 const correction = ZSTD_window_correctOverflow(
450
+ &ldmState->window, /* cycleLog */ 0, maxDist, src);
451
+ ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
452
+ }
453
+ /* 2. We enforce the maximum offset allowed.
454
+ *
455
+ * kMaxChunkSize should be small enough that we don't lose too much of
456
+ * the window through early invalidation.
457
+ * TODO: * Test the chunk size.
458
+ * * Try invalidation after the sequence generation and test the
459
+ * the offset against maxDist directly.
460
+ */
461
+ ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL, NULL);
462
+ /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
463
+ newLeftoverSize = ZSTD_ldm_generateSequences_internal(
464
+ ldmState, sequences, params, chunkStart, chunkSize);
465
+ if (ZSTD_isError(newLeftoverSize))
466
+ return newLeftoverSize;
467
+ /* 4. We add the leftover literals from previous iterations to the first
468
+ * newly generated sequence, or add the `newLeftoverSize` if none are
469
+ * generated.
470
+ */
471
+ /* Prepend the leftover literals from the last call */
472
+ if (prevSize < sequences->size) {
473
+ sequences->seq[prevSize].litLength += (U32)leftoverSize;
474
+ leftoverSize = newLeftoverSize;
531
475
  } else {
532
- rollingHash = ZSTD_ldm_getRollingHash(ip, ldmParams.minMatchLength);
476
+ assert(newLeftoverSize == chunkSize);
477
+ leftoverSize += chunkSize;
533
478
  }
534
- lastHashed = ip;
479
+ }
480
+ return 0;
481
+ }
535
482
 
536
- if (ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog) !=
537
- ldmTagMask) {
538
- /* Don't insert and don't look for a match */
539
- ip++;
540
- continue;
483
+ void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) {
484
+ while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) {
485
+ rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos;
486
+ if (srcSize <= seq->litLength) {
487
+ /* Skip past srcSize literals */
488
+ seq->litLength -= (U32)srcSize;
489
+ return;
541
490
  }
542
-
543
- /* Get the best entry and compute the match lengths */
544
- {
545
- ldmEntry_t* const bucket =
546
- ZSTD_ldm_getBucket(ldmState,
547
- ZSTD_ldm_getSmallHash(rollingHash, hBits),
548
- ldmParams);
549
- ldmEntry_t* cur;
550
- size_t bestMatchLength = 0;
551
- U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
552
-
553
- for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) {
554
- const BYTE* const curMatchBase =
555
- cur->offset < dictLimit ? dictBase : base;
556
- const BYTE* const pMatch = curMatchBase + cur->offset;
557
- const BYTE* const matchEnd =
558
- cur->offset < dictLimit ? dictEnd : iend;
559
- const BYTE* const lowMatchPtr =
560
- cur->offset < dictLimit ? dictStart : lowPrefixPtr;
561
- size_t curForwardMatchLength, curBackwardMatchLength,
562
- curTotalMatchLength;
563
-
564
- if (cur->checksum != checksum || cur->offset <= lowestIndex) {
565
- continue;
566
- }
567
-
568
- curForwardMatchLength = ZSTD_count_2segments(
569
- ip, pMatch, iend,
570
- matchEnd, lowPrefixPtr);
571
- if (curForwardMatchLength < ldmParams.minMatchLength) {
572
- continue;
573
- }
574
- curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch(
575
- ip, anchor, pMatch, lowMatchPtr);
576
- curTotalMatchLength = curForwardMatchLength +
577
- curBackwardMatchLength;
578
-
579
- if (curTotalMatchLength > bestMatchLength) {
580
- bestMatchLength = curTotalMatchLength;
581
- forwardMatchLength = curForwardMatchLength;
582
- backwardMatchLength = curBackwardMatchLength;
583
- bestEntry = cur;
491
+ srcSize -= seq->litLength;
492
+ seq->litLength = 0;
493
+ if (srcSize < seq->matchLength) {
494
+ /* Skip past the first srcSize of the match */
495
+ seq->matchLength -= (U32)srcSize;
496
+ if (seq->matchLength < minMatch) {
497
+ /* The match is too short, omit it */
498
+ if (rawSeqStore->pos + 1 < rawSeqStore->size) {
499
+ seq[1].litLength += seq[0].matchLength;
584
500
  }
501
+ rawSeqStore->pos++;
585
502
  }
503
+ return;
586
504
  }
505
+ srcSize -= seq->matchLength;
506
+ seq->matchLength = 0;
507
+ rawSeqStore->pos++;
508
+ }
509
+ }
587
510
 
588
- /* No match found -- continue searching */
589
- if (bestEntry == NULL) {
590
- ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
591
- (U32)(lastHashed - base),
592
- ldmParams);
593
- ip++;
594
- continue;
511
+ /**
512
+ * If the sequence length is longer than remaining then the sequence is split
513
+ * between this block and the next.
514
+ *
515
+ * Returns the current sequence to handle, or if the rest of the block should
516
+ * be literals, it returns a sequence with offset == 0.
517
+ */
518
+ static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
519
+ U32 const remaining, U32 const minMatch)
520
+ {
521
+ rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos];
522
+ assert(sequence.offset > 0);
523
+ /* Likely: No partial sequence */
524
+ if (remaining >= sequence.litLength + sequence.matchLength) {
525
+ rawSeqStore->pos++;
526
+ return sequence;
527
+ }
528
+ /* Cut the sequence short (offset == 0 ==> rest is literals). */
529
+ if (remaining <= sequence.litLength) {
530
+ sequence.offset = 0;
531
+ } else if (remaining < sequence.litLength + sequence.matchLength) {
532
+ sequence.matchLength = remaining - sequence.litLength;
533
+ if (sequence.matchLength < minMatch) {
534
+ sequence.offset = 0;
595
535
  }
536
+ }
537
+ /* Skip past `remaining` bytes for the future sequences. */
538
+ ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch);
539
+ return sequence;
540
+ }
596
541
 
597
- /* Match found */
598
- mLength = forwardMatchLength + backwardMatchLength;
599
- ip -= backwardMatchLength;
600
-
601
- /* Call the block compressor on the remaining literals */
602
- {
603
- /* ip = current - backwardMatchLength
604
- * The match is at (bestEntry->offset - backwardMatchLength) */
605
- U32 const matchIndex = bestEntry->offset;
606
- U32 const offset = current - matchIndex;
607
-
608
- /* Overwrite rep codes */
609
- for (i = 0; i < ZSTD_REP_NUM; i++)
610
- seqStorePtr->rep[i] = repToConfirm[i];
611
-
612
- /* Fill the hash table for the block compressor */
613
- ZSTD_ldm_limitTableUpdate(ctx, anchor);
614
- ZSTD_ldm_fillFastTables(ctx, anchor);
542
+ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
543
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
544
+ void const* src, size_t srcSize)
545
+ {
546
+ const ZSTD_compressionParameters* const cParams = &ms->cParams;
547
+ unsigned const minMatch = cParams->minMatch;
548
+ ZSTD_blockCompressor const blockCompressor =
549
+ ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms));
550
+ /* Input bounds */
551
+ BYTE const* const istart = (BYTE const*)src;
552
+ BYTE const* const iend = istart + srcSize;
553
+ /* Input positions */
554
+ BYTE const* ip = istart;
555
+
556
+ DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize);
557
+ assert(rawSeqStore->pos <= rawSeqStore->size);
558
+ assert(rawSeqStore->size <= rawSeqStore->capacity);
559
+ /* Loop through each sequence and apply the block compressor to the lits */
560
+ while (rawSeqStore->pos < rawSeqStore->size && ip < iend) {
561
+ /* maybeSplitSequence updates rawSeqStore->pos */
562
+ rawSeq const sequence = maybeSplitSequence(rawSeqStore,
563
+ (U32)(iend - ip), minMatch);
564
+ int i;
565
+ /* End signal */
566
+ if (sequence.offset == 0)
567
+ break;
615
568
 
616
- /* Call block compressor and get remaining literals */
617
- lastLiterals = blockCompressor(ctx, anchor, ip - anchor);
618
- ctx->nextToUpdate = (U32)(ip - base);
569
+ assert(sequence.offset <= (1U << cParams->windowLog));
570
+ assert(ip + sequence.litLength + sequence.matchLength <= iend);
619
571
 
620
- /* Update repToConfirm with the new offset */
572
+ /* Fill tables for block compressor */
573
+ ZSTD_ldm_limitTableUpdate(ms, ip);
574
+ ZSTD_ldm_fillFastTables(ms, ip);
575
+ /* Run the block compressor */
576
+ DEBUGLOG(5, "calling block compressor on segment of size %u", sequence.litLength);
577
+ {
578
+ size_t const newLitLength =
579
+ blockCompressor(ms, seqStore, rep, ip, sequence.litLength);
580
+ ip += sequence.litLength;
581
+ /* Update the repcodes */
621
582
  for (i = ZSTD_REP_NUM - 1; i > 0; i--)
622
- repToConfirm[i] = repToConfirm[i-1];
623
- repToConfirm[0] = offset;
624
-
625
- /* Store the sequence with the leftover literals */
626
- ZSTD_storeSeq(seqStorePtr, lastLiterals, ip - lastLiterals,
627
- offset + ZSTD_REP_MOVE, mLength - MINMATCH);
628
- }
629
-
630
- /* Insert the current entry into the hash table */
631
- ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
632
- (U32)(lastHashed - base),
633
- ldmParams);
634
-
635
- /* Fill the hash table from lastHashed+1 to ip+mLength */
636
- assert(ip + backwardMatchLength == lastHashed);
637
- if (ip + mLength < ilimit) {
638
- rollingHash = ZSTD_ldm_fillLdmHashTable(
639
- ldmState, rollingHash, lastHashed,
640
- ip + mLength, base, hBits,
641
- ldmParams);
642
- lastHashed = ip + mLength - 1;
643
- }
644
- ip += mLength;
645
- anchor = ip;
646
-
647
- /* check immediate repcode */
648
- while (ip < ilimit) {
649
- U32 const current2 = (U32)(ip-base);
650
- U32 const repIndex2 = current2 - repToConfirm[1];
651
- const BYTE* repMatch2 = repIndex2 < dictLimit ?
652
- dictBase + repIndex2 : base + repIndex2;
653
- if ( (((U32)((dictLimit-1) - repIndex2) >= 3) &
654
- (repIndex2 > lowestIndex)) /* intentional overflow */
655
- && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
656
- const BYTE* const repEnd2 = repIndex2 < dictLimit ?
657
- dictEnd : iend;
658
- size_t const repLength2 =
659
- ZSTD_count_2segments(ip+4, repMatch2+4, iend,
660
- repEnd2, lowPrefixPtr) + 4;
661
-
662
- U32 tmpOffset = repToConfirm[1];
663
- repToConfirm[1] = repToConfirm[0];
664
- repToConfirm[0] = tmpOffset;
665
-
666
- ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH);
667
-
668
- /* Fill the hash table from lastHashed+1 to ip+repLength2*/
669
- if (ip + repLength2 < ilimit) {
670
- rollingHash = ZSTD_ldm_fillLdmHashTable(
671
- ldmState, rollingHash, lastHashed,
672
- ip + repLength2, base, hBits,
673
- ldmParams);
674
- lastHashed = ip + repLength2 - 1;
675
- }
676
- ip += repLength2;
677
- anchor = ip;
678
- continue;
679
- }
680
- break;
583
+ rep[i] = rep[i-1];
584
+ rep[0] = sequence.offset;
585
+ /* Store the sequence */
586
+ ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength,
587
+ sequence.offset + ZSTD_REP_MOVE,
588
+ sequence.matchLength - MINMATCH);
589
+ ip += sequence.matchLength;
681
590
  }
682
591
  }
683
-
684
- /* Overwrite rep */
685
- for (i = 0; i < ZSTD_REP_NUM; i++)
686
- seqStorePtr->rep[i] = repToConfirm[i];
687
-
688
- ZSTD_ldm_limitTableUpdate(ctx, anchor);
689
- ZSTD_ldm_fillFastTables(ctx, anchor);
690
-
691
- /* Call the block compressor one last time on the last literals */
692
- lastLiterals = blockCompressor(ctx, anchor, iend - anchor);
693
- ctx->nextToUpdate = (U32)(iend - base);
694
-
695
- /* Restore seqStorePtr->rep */
696
- for (i = 0; i < ZSTD_REP_NUM; i++)
697
- seqStorePtr->rep[i] = savedRep[i];
698
-
699
- /* Return the last literals size */
700
- return lastLiterals;
701
- }
702
-
703
- size_t ZSTD_compressBlock_ldm_extDict(ZSTD_CCtx* ctx,
704
- const void* src, size_t srcSize)
705
- {
706
- return ZSTD_compressBlock_ldm_extDict_generic(ctx, src, srcSize);
592
+ /* Fill the tables for the block compressor */
593
+ ZSTD_ldm_limitTableUpdate(ms, ip);
594
+ ZSTD_ldm_fillFastTables(ms, ip);
595
+ /* Compress the last literals */
596
+ return blockCompressor(ms, seqStore, rep, ip, iend - ip);
707
597
  }