zstd-ruby 1.3.1.1 → 1.3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/ext/zstdruby/libzstd/.gitignore +1 -0
  4. data/ext/zstdruby/libzstd/Makefile +40 -26
  5. data/ext/zstdruby/libzstd/README.md +68 -45
  6. data/ext/zstdruby/libzstd/common/bitstream.h +35 -23
  7. data/ext/zstdruby/libzstd/common/compiler.h +1 -0
  8. data/ext/zstdruby/libzstd/common/error_private.c +4 -2
  9. data/ext/zstdruby/libzstd/common/error_private.h +4 -4
  10. data/ext/zstdruby/libzstd/common/fse.h +1 -1
  11. data/ext/zstdruby/libzstd/common/huf.h +1 -1
  12. data/ext/zstdruby/libzstd/common/mem.h +1 -0
  13. data/ext/zstdruby/libzstd/common/pool.c +61 -46
  14. data/ext/zstdruby/libzstd/common/pool.h +4 -0
  15. data/ext/zstdruby/libzstd/common/threading.c +11 -15
  16. data/ext/zstdruby/libzstd/common/threading.h +52 -32
  17. data/ext/zstdruby/libzstd/common/zstd_common.c +2 -2
  18. data/ext/zstdruby/libzstd/common/zstd_errors.h +3 -1
  19. data/ext/zstdruby/libzstd/common/zstd_internal.h +95 -21
  20. data/ext/zstdruby/libzstd/compress/fse_compress.c +3 -1
  21. data/ext/zstdruby/libzstd/compress/huf_compress.c +4 -3
  22. data/ext/zstdruby/libzstd/compress/zstd_compress.c +922 -2102
  23. data/ext/zstdruby/libzstd/compress/zstd_compress.h +307 -0
  24. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +308 -0
  25. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +28 -0
  26. data/ext/zstdruby/libzstd/compress/zstd_fast.c +242 -0
  27. data/ext/zstdruby/libzstd/compress/zstd_fast.h +30 -0
  28. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +749 -0
  29. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +38 -0
  30. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +707 -0
  31. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +67 -0
  32. data/ext/zstdruby/libzstd/compress/zstd_opt.c +957 -0
  33. data/ext/zstdruby/libzstd/compress/zstd_opt.h +14 -922
  34. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +210 -133
  35. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +20 -3
  36. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +373 -196
  37. data/ext/zstdruby/libzstd/deprecated/zbuff.h +1 -0
  38. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +1 -0
  39. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -0
  40. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -0
  41. data/ext/zstdruby/libzstd/dictBuilder/cover.c +33 -22
  42. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +8 -5
  43. data/ext/zstdruby/libzstd/dictBuilder/zdict.h +1 -0
  44. data/ext/zstdruby/libzstd/dll/example/Makefile +5 -5
  45. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +1 -0
  46. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +1 -0
  47. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -0
  48. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +1 -0
  49. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -0
  50. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +1 -0
  51. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -0
  52. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +1 -0
  53. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -0
  54. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +1 -0
  55. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +1 -0
  56. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +1 -0
  57. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -0
  58. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +1 -0
  59. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -0
  60. data/ext/zstdruby/libzstd/zstd.h +366 -118
  61. data/lib/zstd-ruby/version.rb +1 -1
  62. metadata +11 -1
@@ -0,0 +1,38 @@
1
+ /*
2
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under both the BSD-style license (found in the
6
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
+ * in the COPYING file in the root directory of this source tree).
8
+ * You may select, at your option, one of the above-listed licenses.
9
+ */
10
+
11
+ #ifndef ZSTD_LAZY_H
12
+ #define ZSTD_LAZY_H
13
+
14
+ #include "zstd_compress.h"
15
+
16
+ #if defined (__cplusplus)
17
+ extern "C" {
18
+ #endif
19
+
20
+ U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls);
21
+ void ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls);
22
+ void ZSTD_updateTree_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls);
23
+
24
+ size_t ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
25
+ size_t ZSTD_compressBlock_lazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
26
+ size_t ZSTD_compressBlock_lazy(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
27
+ size_t ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
28
+
29
+ size_t ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
30
+ size_t ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
31
+ size_t ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
32
+ size_t ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
33
+
34
+ #if defined (__cplusplus)
35
+ }
36
+ #endif
37
+
38
+ #endif /* ZSTD_LAZY_H */
@@ -0,0 +1,707 @@
1
+ /*
2
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under both the BSD-style license (found in the
6
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
+ * in the COPYING file in the root directory of this source tree).
8
+ */
9
+
10
+ #include "zstd_ldm.h"
11
+
12
+ #include "zstd_fast.h" /* ZSTD_fillHashTable() */
13
+ #include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */
14
+
15
+ #define LDM_BUCKET_SIZE_LOG 3
16
+ #define LDM_MIN_MATCH_LENGTH 64
17
+ #define LDM_HASH_RLOG 7
18
+ #define LDM_HASH_CHAR_OFFSET 10
19
+
20
+ size_t ZSTD_ldm_initializeParameters(ldmParams_t* params, U32 enableLdm)
21
+ {
22
+ ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX);
23
+ params->enableLdm = enableLdm>0;
24
+ params->hashLog = 0;
25
+ params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
26
+ params->minMatchLength = LDM_MIN_MATCH_LENGTH;
27
+ params->hashEveryLog = ZSTD_LDM_HASHEVERYLOG_NOTSET;
28
+ return 0;
29
+ }
30
+
31
+ void ZSTD_ldm_adjustParameters(ldmParams_t* params, U32 windowLog)
32
+ {
33
+ if (params->hashLog == 0) {
34
+ params->hashLog = MAX(ZSTD_HASHLOG_MIN, windowLog - LDM_HASH_RLOG);
35
+ assert(params->hashLog <= ZSTD_HASHLOG_MAX);
36
+ }
37
+ if (params->hashEveryLog == ZSTD_LDM_HASHEVERYLOG_NOTSET) {
38
+ params->hashEveryLog =
39
+ windowLog < params->hashLog ? 0 : windowLog - params->hashLog;
40
+ }
41
+ params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog);
42
+ }
43
+
44
+ size_t ZSTD_ldm_getTableSize(U32 hashLog, U32 bucketSizeLog) {
45
+ size_t const ldmHSize = ((size_t)1) << hashLog;
46
+ size_t const ldmBucketSizeLog = MIN(bucketSizeLog, hashLog);
47
+ size_t const ldmBucketSize =
48
+ ((size_t)1) << (hashLog - ldmBucketSizeLog);
49
+ return ldmBucketSize + (ldmHSize * (sizeof(ldmEntry_t)));
50
+ }
51
+
52
+ /** ZSTD_ldm_getSmallHash() :
53
+ * numBits should be <= 32
54
+ * If numBits==0, returns 0.
55
+ * @return : the most significant numBits of value. */
56
+ static U32 ZSTD_ldm_getSmallHash(U64 value, U32 numBits)
57
+ {
58
+ assert(numBits <= 32);
59
+ return numBits == 0 ? 0 : (U32)(value >> (64 - numBits));
60
+ }
61
+
62
+ /** ZSTD_ldm_getChecksum() :
63
+ * numBitsToDiscard should be <= 32
64
+ * @return : the next most significant 32 bits after numBitsToDiscard */
65
+ static U32 ZSTD_ldm_getChecksum(U64 hash, U32 numBitsToDiscard)
66
+ {
67
+ assert(numBitsToDiscard <= 32);
68
+ return (hash >> (64 - 32 - numBitsToDiscard)) & 0xFFFFFFFF;
69
+ }
70
+
71
+ /** ZSTD_ldm_getTag() ;
72
+ * Given the hash, returns the most significant numTagBits bits
73
+ * after (32 + hbits) bits.
74
+ *
75
+ * If there are not enough bits remaining, return the last
76
+ * numTagBits bits. */
77
+ static U32 ZSTD_ldm_getTag(U64 hash, U32 hbits, U32 numTagBits)
78
+ {
79
+ assert(numTagBits < 32 && hbits <= 32);
80
+ if (32 - hbits < numTagBits) {
81
+ return hash & (((U32)1 << numTagBits) - 1);
82
+ } else {
83
+ return (hash >> (32 - hbits - numTagBits)) & (((U32)1 << numTagBits) - 1);
84
+ }
85
+ }
86
+
87
+ /** ZSTD_ldm_getBucket() :
88
+ * Returns a pointer to the start of the bucket associated with hash. */
89
+ static ldmEntry_t* ZSTD_ldm_getBucket(
90
+ ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams)
91
+ {
92
+ return ldmState->hashTable + (hash << ldmParams.bucketSizeLog);
93
+ }
94
+
95
+ /** ZSTD_ldm_insertEntry() :
96
+ * Insert the entry with corresponding hash into the hash table */
97
+ static void ZSTD_ldm_insertEntry(ldmState_t* ldmState,
98
+ size_t const hash, const ldmEntry_t entry,
99
+ ldmParams_t const ldmParams)
100
+ {
101
+ BYTE* const bucketOffsets = ldmState->bucketOffsets;
102
+ *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + bucketOffsets[hash]) = entry;
103
+ bucketOffsets[hash]++;
104
+ bucketOffsets[hash] &= ((U32)1 << ldmParams.bucketSizeLog) - 1;
105
+ }
106
+
107
+ /** ZSTD_ldm_makeEntryAndInsertByTag() :
108
+ *
109
+ * Gets the small hash, checksum, and tag from the rollingHash.
110
+ *
111
+ * If the tag matches (1 << ldmParams.hashEveryLog)-1, then
112
+ * creates an ldmEntry from the offset, and inserts it into the hash table.
113
+ *
114
+ * hBits is the length of the small hash, which is the most significant hBits
115
+ * of rollingHash. The checksum is the next 32 most significant bits, followed
116
+ * by ldmParams.hashEveryLog bits that make up the tag. */
117
+ static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
118
+ U64 const rollingHash,
119
+ U32 const hBits,
120
+ U32 const offset,
121
+ ldmParams_t const ldmParams)
122
+ {
123
+ U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog);
124
+ U32 const tagMask = ((U32)1 << ldmParams.hashEveryLog) - 1;
125
+ if (tag == tagMask) {
126
+ U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits);
127
+ U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
128
+ ldmEntry_t entry;
129
+ entry.offset = offset;
130
+ entry.checksum = checksum;
131
+ ZSTD_ldm_insertEntry(ldmState, hash, entry, ldmParams);
132
+ }
133
+ }
134
+
135
+ /** ZSTD_ldm_getRollingHash() :
136
+ * Get a 64-bit hash using the first len bytes from buf.
137
+ *
138
+ * Giving bytes s = s_1, s_2, ... s_k, the hash is defined to be
139
+ * H(s) = s_1*(a^(k-1)) + s_2*(a^(k-2)) + ... + s_k*(a^0)
140
+ *
141
+ * where the constant a is defined to be prime8bytes.
142
+ *
143
+ * The implementation adds an offset to each byte, so
144
+ * H(s) = (s_1 + HASH_CHAR_OFFSET)*(a^(k-1)) + ... */
145
+ static U64 ZSTD_ldm_getRollingHash(const BYTE* buf, U32 len)
146
+ {
147
+ U64 ret = 0;
148
+ U32 i;
149
+ for (i = 0; i < len; i++) {
150
+ ret *= prime8bytes;
151
+ ret += buf[i] + LDM_HASH_CHAR_OFFSET;
152
+ }
153
+ return ret;
154
+ }
155
+
156
+ /** ZSTD_ldm_ipow() :
157
+ * Return base^exp. */
158
+ static U64 ZSTD_ldm_ipow(U64 base, U64 exp)
159
+ {
160
+ U64 ret = 1;
161
+ while (exp) {
162
+ if (exp & 1) { ret *= base; }
163
+ exp >>= 1;
164
+ base *= base;
165
+ }
166
+ return ret;
167
+ }
168
+
169
+ U64 ZSTD_ldm_getHashPower(U32 minMatchLength) {
170
+ assert(minMatchLength >= ZSTD_LDM_MINMATCH_MIN);
171
+ return ZSTD_ldm_ipow(prime8bytes, minMatchLength - 1);
172
+ }
173
+
174
+ /** ZSTD_ldm_updateHash() :
175
+ * Updates hash by removing toRemove and adding toAdd. */
176
+ static U64 ZSTD_ldm_updateHash(U64 hash, BYTE toRemove, BYTE toAdd, U64 hashPower)
177
+ {
178
+ hash -= ((toRemove + LDM_HASH_CHAR_OFFSET) * hashPower);
179
+ hash *= prime8bytes;
180
+ hash += toAdd + LDM_HASH_CHAR_OFFSET;
181
+ return hash;
182
+ }
183
+
184
+ /** ZSTD_ldm_countBackwardsMatch() :
185
+ * Returns the number of bytes that match backwards before pIn and pMatch.
186
+ *
187
+ * We count only bytes where pMatch >= pBase and pIn >= pAnchor. */
188
+ static size_t ZSTD_ldm_countBackwardsMatch(
189
+ const BYTE* pIn, const BYTE* pAnchor,
190
+ const BYTE* pMatch, const BYTE* pBase)
191
+ {
192
+ size_t matchLength = 0;
193
+ while (pIn > pAnchor && pMatch > pBase && pIn[-1] == pMatch[-1]) {
194
+ pIn--;
195
+ pMatch--;
196
+ matchLength++;
197
+ }
198
+ return matchLength;
199
+ }
200
+
201
+ /** ZSTD_ldm_fillFastTables() :
202
+ *
203
+ * Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies.
204
+ * This is similar to ZSTD_loadDictionaryContent.
205
+ *
206
+ * The tables for the other strategies are filled within their
207
+ * block compressors. */
208
+ static size_t ZSTD_ldm_fillFastTables(ZSTD_CCtx* zc, const void* end)
209
+ {
210
+ const BYTE* const iend = (const BYTE*)end;
211
+ const U32 mls = zc->appliedParams.cParams.searchLength;
212
+
213
+ switch(zc->appliedParams.cParams.strategy)
214
+ {
215
+ case ZSTD_fast:
216
+ ZSTD_fillHashTable(zc, iend, mls);
217
+ zc->nextToUpdate = (U32)(iend - zc->base);
218
+ break;
219
+
220
+ case ZSTD_dfast:
221
+ ZSTD_fillDoubleHashTable(zc, iend, mls);
222
+ zc->nextToUpdate = (U32)(iend - zc->base);
223
+ break;
224
+
225
+ case ZSTD_greedy:
226
+ case ZSTD_lazy:
227
+ case ZSTD_lazy2:
228
+ case ZSTD_btlazy2:
229
+ case ZSTD_btopt:
230
+ case ZSTD_btultra:
231
+ break;
232
+ default:
233
+ assert(0); /* not possible : not a valid strategy id */
234
+ }
235
+
236
+ return 0;
237
+ }
238
+
239
+ /** ZSTD_ldm_fillLdmHashTable() :
240
+ *
241
+ * Fills hashTable from (lastHashed + 1) to iend (non-inclusive).
242
+ * lastHash is the rolling hash that corresponds to lastHashed.
243
+ *
244
+ * Returns the rolling hash corresponding to position iend-1. */
245
+ static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
246
+ U64 lastHash, const BYTE* lastHashed,
247
+ const BYTE* iend, const BYTE* base,
248
+ U32 hBits, ldmParams_t const ldmParams)
249
+ {
250
+ U64 rollingHash = lastHash;
251
+ const BYTE* cur = lastHashed + 1;
252
+
253
+ while (cur < iend) {
254
+ rollingHash = ZSTD_ldm_updateHash(rollingHash, cur[-1],
255
+ cur[ldmParams.minMatchLength-1],
256
+ state->hashPower);
257
+ ZSTD_ldm_makeEntryAndInsertByTag(state,
258
+ rollingHash, hBits,
259
+ (U32)(cur - base), ldmParams);
260
+ ++cur;
261
+ }
262
+ return rollingHash;
263
+ }
264
+
265
+
266
+ /** ZSTD_ldm_limitTableUpdate() :
267
+ *
268
+ * Sets cctx->nextToUpdate to a position corresponding closer to anchor
269
+ * if it is far way
270
+ * (after a long match, only update tables a limited amount). */
271
+ static void ZSTD_ldm_limitTableUpdate(ZSTD_CCtx* cctx, const BYTE* anchor)
272
+ {
273
+ U32 const current = (U32)(anchor - cctx->base);
274
+ if (current > cctx->nextToUpdate + 1024) {
275
+ cctx->nextToUpdate =
276
+ current - MIN(512, current - cctx->nextToUpdate - 1024);
277
+ }
278
+ }
279
+
280
+ typedef size_t (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t srcSize);
281
+ /* defined in zstd_compress.c */
282
+ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict);
283
+
284
+ FORCE_INLINE_TEMPLATE
285
+ size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx,
286
+ const void* src, size_t srcSize)
287
+ {
288
+ ldmState_t* const ldmState = &(cctx->ldmState);
289
+ const ldmParams_t ldmParams = cctx->appliedParams.ldmParams;
290
+ const U64 hashPower = ldmState->hashPower;
291
+ const U32 hBits = ldmParams.hashLog - ldmParams.bucketSizeLog;
292
+ const U32 ldmBucketSize = ((U32)1 << ldmParams.bucketSizeLog);
293
+ const U32 ldmTagMask = ((U32)1 << ldmParams.hashEveryLog) - 1;
294
+ seqStore_t* const seqStorePtr = &(cctx->seqStore);
295
+ const BYTE* const base = cctx->base;
296
+ const BYTE* const istart = (const BYTE*)src;
297
+ const BYTE* ip = istart;
298
+ const BYTE* anchor = istart;
299
+ const U32 lowestIndex = cctx->dictLimit;
300
+ const BYTE* const lowest = base + lowestIndex;
301
+ const BYTE* const iend = istart + srcSize;
302
+ const BYTE* const ilimit = iend - MAX(ldmParams.minMatchLength, HASH_READ_SIZE);
303
+
304
+ const ZSTD_blockCompressor blockCompressor =
305
+ ZSTD_selectBlockCompressor(cctx->appliedParams.cParams.strategy, 0);
306
+ U32* const repToConfirm = seqStorePtr->repToConfirm;
307
+ U32 savedRep[ZSTD_REP_NUM];
308
+ U64 rollingHash = 0;
309
+ const BYTE* lastHashed = NULL;
310
+ size_t i, lastLiterals;
311
+
312
+ /* Save seqStorePtr->rep and copy repToConfirm */
313
+ for (i = 0; i < ZSTD_REP_NUM; i++)
314
+ savedRep[i] = repToConfirm[i] = seqStorePtr->rep[i];
315
+
316
+ /* Main Search Loop */
317
+ while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
318
+ size_t mLength;
319
+ U32 const current = (U32)(ip - base);
320
+ size_t forwardMatchLength = 0, backwardMatchLength = 0;
321
+ ldmEntry_t* bestEntry = NULL;
322
+ if (ip != istart) {
323
+ rollingHash = ZSTD_ldm_updateHash(rollingHash, lastHashed[0],
324
+ lastHashed[ldmParams.minMatchLength],
325
+ hashPower);
326
+ } else {
327
+ rollingHash = ZSTD_ldm_getRollingHash(ip, ldmParams.minMatchLength);
328
+ }
329
+ lastHashed = ip;
330
+
331
+ /* Do not insert and do not look for a match */
332
+ if (ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog) !=
333
+ ldmTagMask) {
334
+ ip++;
335
+ continue;
336
+ }
337
+
338
+ /* Get the best entry and compute the match lengths */
339
+ {
340
+ ldmEntry_t* const bucket =
341
+ ZSTD_ldm_getBucket(ldmState,
342
+ ZSTD_ldm_getSmallHash(rollingHash, hBits),
343
+ ldmParams);
344
+ ldmEntry_t* cur;
345
+ size_t bestMatchLength = 0;
346
+ U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
347
+
348
+ for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) {
349
+ const BYTE* const pMatch = cur->offset + base;
350
+ size_t curForwardMatchLength, curBackwardMatchLength,
351
+ curTotalMatchLength;
352
+ if (cur->checksum != checksum || cur->offset <= lowestIndex) {
353
+ continue;
354
+ }
355
+
356
+ curForwardMatchLength = ZSTD_count(ip, pMatch, iend);
357
+ if (curForwardMatchLength < ldmParams.minMatchLength) {
358
+ continue;
359
+ }
360
+ curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch(
361
+ ip, anchor, pMatch, lowest);
362
+ curTotalMatchLength = curForwardMatchLength +
363
+ curBackwardMatchLength;
364
+
365
+ if (curTotalMatchLength > bestMatchLength) {
366
+ bestMatchLength = curTotalMatchLength;
367
+ forwardMatchLength = curForwardMatchLength;
368
+ backwardMatchLength = curBackwardMatchLength;
369
+ bestEntry = cur;
370
+ }
371
+ }
372
+ }
373
+
374
+ /* No match found -- continue searching */
375
+ if (bestEntry == NULL) {
376
+ ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash,
377
+ hBits, current,
378
+ ldmParams);
379
+ ip++;
380
+ continue;
381
+ }
382
+
383
+ /* Match found */
384
+ mLength = forwardMatchLength + backwardMatchLength;
385
+ ip -= backwardMatchLength;
386
+
387
+ /* Call the block compressor on the remaining literals */
388
+ {
389
+ U32 const matchIndex = bestEntry->offset;
390
+ const BYTE* const match = base + matchIndex - backwardMatchLength;
391
+ U32 const offset = (U32)(ip - match);
392
+
393
+ /* Overwrite rep codes */
394
+ for (i = 0; i < ZSTD_REP_NUM; i++)
395
+ seqStorePtr->rep[i] = repToConfirm[i];
396
+
397
+ /* Fill tables for block compressor */
398
+ ZSTD_ldm_limitTableUpdate(cctx, anchor);
399
+ ZSTD_ldm_fillFastTables(cctx, anchor);
400
+
401
+ /* Call block compressor and get remaining literals */
402
+ lastLiterals = blockCompressor(cctx, anchor, ip - anchor);
403
+ cctx->nextToUpdate = (U32)(ip - base);
404
+
405
+ /* Update repToConfirm with the new offset */
406
+ for (i = ZSTD_REP_NUM - 1; i > 0; i--)
407
+ repToConfirm[i] = repToConfirm[i-1];
408
+ repToConfirm[0] = offset;
409
+
410
+ /* Store the sequence with the leftover literals */
411
+ ZSTD_storeSeq(seqStorePtr, lastLiterals, ip - lastLiterals,
412
+ offset + ZSTD_REP_MOVE, mLength - MINMATCH);
413
+ }
414
+
415
+ /* Insert the current entry into the hash table */
416
+ ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
417
+ (U32)(lastHashed - base),
418
+ ldmParams);
419
+
420
+ assert(ip + backwardMatchLength == lastHashed);
421
+
422
+ /* Fill the hash table from lastHashed+1 to ip+mLength*/
423
+ /* Heuristic: don't need to fill the entire table at end of block */
424
+ if (ip + mLength < ilimit) {
425
+ rollingHash = ZSTD_ldm_fillLdmHashTable(
426
+ ldmState, rollingHash, lastHashed,
427
+ ip + mLength, base, hBits, ldmParams);
428
+ lastHashed = ip + mLength - 1;
429
+ }
430
+ ip += mLength;
431
+ anchor = ip;
432
+ /* Check immediate repcode */
433
+ while ( (ip < ilimit)
434
+ && ( (repToConfirm[1] > 0) && (repToConfirm[1] <= (U32)(ip-lowest))
435
+ && (MEM_read32(ip) == MEM_read32(ip - repToConfirm[1])) )) {
436
+
437
+ size_t const rLength = ZSTD_count(ip+4, ip+4-repToConfirm[1],
438
+ iend) + 4;
439
+ /* Swap repToConfirm[1] <=> repToConfirm[0] */
440
+ {
441
+ U32 const tmpOff = repToConfirm[1];
442
+ repToConfirm[1] = repToConfirm[0];
443
+ repToConfirm[0] = tmpOff;
444
+ }
445
+
446
+ ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH);
447
+
448
+ /* Fill the hash table from lastHashed+1 to ip+rLength*/
449
+ if (ip + rLength < ilimit) {
450
+ rollingHash = ZSTD_ldm_fillLdmHashTable(
451
+ ldmState, rollingHash, lastHashed,
452
+ ip + rLength, base, hBits, ldmParams);
453
+ lastHashed = ip + rLength - 1;
454
+ }
455
+ ip += rLength;
456
+ anchor = ip;
457
+ }
458
+ }
459
+
460
+ /* Overwrite rep */
461
+ for (i = 0; i < ZSTD_REP_NUM; i++)
462
+ seqStorePtr->rep[i] = repToConfirm[i];
463
+
464
+ ZSTD_ldm_limitTableUpdate(cctx, anchor);
465
+ ZSTD_ldm_fillFastTables(cctx, anchor);
466
+
467
+ lastLiterals = blockCompressor(cctx, anchor, iend - anchor);
468
+ cctx->nextToUpdate = (U32)(iend - base);
469
+
470
+ /* Restore seqStorePtr->rep */
471
+ for (i = 0; i < ZSTD_REP_NUM; i++)
472
+ seqStorePtr->rep[i] = savedRep[i];
473
+
474
+ /* Return the last literals size */
475
+ return lastLiterals;
476
+ }
477
+
478
+ size_t ZSTD_compressBlock_ldm(ZSTD_CCtx* ctx,
479
+ const void* src, size_t srcSize)
480
+ {
481
+ return ZSTD_compressBlock_ldm_generic(ctx, src, srcSize);
482
+ }
483
+
484
+ static size_t ZSTD_compressBlock_ldm_extDict_generic(
485
+ ZSTD_CCtx* ctx,
486
+ const void* src, size_t srcSize)
487
+ {
488
+ ldmState_t* const ldmState = &(ctx->ldmState);
489
+ const ldmParams_t ldmParams = ctx->appliedParams.ldmParams;
490
+ const U64 hashPower = ldmState->hashPower;
491
+ const U32 hBits = ldmParams.hashLog - ldmParams.bucketSizeLog;
492
+ const U32 ldmBucketSize = ((U32)1 << ldmParams.bucketSizeLog);
493
+ const U32 ldmTagMask = ((U32)1 << ldmParams.hashEveryLog) - 1;
494
+ seqStore_t* const seqStorePtr = &(ctx->seqStore);
495
+ const BYTE* const base = ctx->base;
496
+ const BYTE* const dictBase = ctx->dictBase;
497
+ const BYTE* const istart = (const BYTE*)src;
498
+ const BYTE* ip = istart;
499
+ const BYTE* anchor = istart;
500
+ const U32 lowestIndex = ctx->lowLimit;
501
+ const BYTE* const dictStart = dictBase + lowestIndex;
502
+ const U32 dictLimit = ctx->dictLimit;
503
+ const BYTE* const lowPrefixPtr = base + dictLimit;
504
+ const BYTE* const dictEnd = dictBase + dictLimit;
505
+ const BYTE* const iend = istart + srcSize;
506
+ const BYTE* const ilimit = iend - MAX(ldmParams.minMatchLength, HASH_READ_SIZE);
507
+
508
+ const ZSTD_blockCompressor blockCompressor =
509
+ ZSTD_selectBlockCompressor(ctx->appliedParams.cParams.strategy, 1);
510
+ U32* const repToConfirm = seqStorePtr->repToConfirm;
511
+ U32 savedRep[ZSTD_REP_NUM];
512
+ U64 rollingHash = 0;
513
+ const BYTE* lastHashed = NULL;
514
+ size_t i, lastLiterals;
515
+
516
+ /* Save seqStorePtr->rep and copy repToConfirm */
517
+ for (i = 0; i < ZSTD_REP_NUM; i++) {
518
+ savedRep[i] = repToConfirm[i] = seqStorePtr->rep[i];
519
+ }
520
+
521
+ /* Search Loop */
522
+ while (ip < ilimit) { /* < instead of <=, because (ip+1) */
523
+ size_t mLength;
524
+ const U32 current = (U32)(ip-base);
525
+ size_t forwardMatchLength = 0, backwardMatchLength = 0;
526
+ ldmEntry_t* bestEntry = NULL;
527
+ if (ip != istart) {
528
+ rollingHash = ZSTD_ldm_updateHash(rollingHash, lastHashed[0],
529
+ lastHashed[ldmParams.minMatchLength],
530
+ hashPower);
531
+ } else {
532
+ rollingHash = ZSTD_ldm_getRollingHash(ip, ldmParams.minMatchLength);
533
+ }
534
+ lastHashed = ip;
535
+
536
+ if (ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog) !=
537
+ ldmTagMask) {
538
+ /* Don't insert and don't look for a match */
539
+ ip++;
540
+ continue;
541
+ }
542
+
543
+ /* Get the best entry and compute the match lengths */
544
+ {
545
+ ldmEntry_t* const bucket =
546
+ ZSTD_ldm_getBucket(ldmState,
547
+ ZSTD_ldm_getSmallHash(rollingHash, hBits),
548
+ ldmParams);
549
+ ldmEntry_t* cur;
550
+ size_t bestMatchLength = 0;
551
+ U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
552
+
553
+ for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) {
554
+ const BYTE* const curMatchBase =
555
+ cur->offset < dictLimit ? dictBase : base;
556
+ const BYTE* const pMatch = curMatchBase + cur->offset;
557
+ const BYTE* const matchEnd =
558
+ cur->offset < dictLimit ? dictEnd : iend;
559
+ const BYTE* const lowMatchPtr =
560
+ cur->offset < dictLimit ? dictStart : lowPrefixPtr;
561
+ size_t curForwardMatchLength, curBackwardMatchLength,
562
+ curTotalMatchLength;
563
+
564
+ if (cur->checksum != checksum || cur->offset <= lowestIndex) {
565
+ continue;
566
+ }
567
+
568
+ curForwardMatchLength = ZSTD_count_2segments(
569
+ ip, pMatch, iend,
570
+ matchEnd, lowPrefixPtr);
571
+ if (curForwardMatchLength < ldmParams.minMatchLength) {
572
+ continue;
573
+ }
574
+ curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch(
575
+ ip, anchor, pMatch, lowMatchPtr);
576
+ curTotalMatchLength = curForwardMatchLength +
577
+ curBackwardMatchLength;
578
+
579
+ if (curTotalMatchLength > bestMatchLength) {
580
+ bestMatchLength = curTotalMatchLength;
581
+ forwardMatchLength = curForwardMatchLength;
582
+ backwardMatchLength = curBackwardMatchLength;
583
+ bestEntry = cur;
584
+ }
585
+ }
586
+ }
587
+
588
+ /* No match found -- continue searching */
589
+ if (bestEntry == NULL) {
590
+ ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
591
+ (U32)(lastHashed - base),
592
+ ldmParams);
593
+ ip++;
594
+ continue;
595
+ }
596
+
597
+ /* Match found */
598
+ mLength = forwardMatchLength + backwardMatchLength;
599
+ ip -= backwardMatchLength;
600
+
601
+ /* Call the block compressor on the remaining literals */
602
+ {
603
+ /* ip = current - backwardMatchLength
604
+ * The match is at (bestEntry->offset - backwardMatchLength) */
605
+ U32 const matchIndex = bestEntry->offset;
606
+ U32 const offset = current - matchIndex;
607
+
608
+ /* Overwrite rep codes */
609
+ for (i = 0; i < ZSTD_REP_NUM; i++)
610
+ seqStorePtr->rep[i] = repToConfirm[i];
611
+
612
+ /* Fill the hash table for the block compressor */
613
+ ZSTD_ldm_limitTableUpdate(ctx, anchor);
614
+ ZSTD_ldm_fillFastTables(ctx, anchor);
615
+
616
+ /* Call block compressor and get remaining literals */
617
+ lastLiterals = blockCompressor(ctx, anchor, ip - anchor);
618
+ ctx->nextToUpdate = (U32)(ip - base);
619
+
620
+ /* Update repToConfirm with the new offset */
621
+ for (i = ZSTD_REP_NUM - 1; i > 0; i--)
622
+ repToConfirm[i] = repToConfirm[i-1];
623
+ repToConfirm[0] = offset;
624
+
625
+ /* Store the sequence with the leftover literals */
626
+ ZSTD_storeSeq(seqStorePtr, lastLiterals, ip - lastLiterals,
627
+ offset + ZSTD_REP_MOVE, mLength - MINMATCH);
628
+ }
629
+
630
+ /* Insert the current entry into the hash table */
631
+ ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
632
+ (U32)(lastHashed - base),
633
+ ldmParams);
634
+
635
+ /* Fill the hash table from lastHashed+1 to ip+mLength */
636
+ assert(ip + backwardMatchLength == lastHashed);
637
+ if (ip + mLength < ilimit) {
638
+ rollingHash = ZSTD_ldm_fillLdmHashTable(
639
+ ldmState, rollingHash, lastHashed,
640
+ ip + mLength, base, hBits,
641
+ ldmParams);
642
+ lastHashed = ip + mLength - 1;
643
+ }
644
+ ip += mLength;
645
+ anchor = ip;
646
+
647
+ /* check immediate repcode */
648
+ while (ip < ilimit) {
649
+ U32 const current2 = (U32)(ip-base);
650
+ U32 const repIndex2 = current2 - repToConfirm[1];
651
+ const BYTE* repMatch2 = repIndex2 < dictLimit ?
652
+ dictBase + repIndex2 : base + repIndex2;
653
+ if ( (((U32)((dictLimit-1) - repIndex2) >= 3) &
654
+ (repIndex2 > lowestIndex)) /* intentional overflow */
655
+ && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
656
+ const BYTE* const repEnd2 = repIndex2 < dictLimit ?
657
+ dictEnd : iend;
658
+ size_t const repLength2 =
659
+ ZSTD_count_2segments(ip+4, repMatch2+4, iend,
660
+ repEnd2, lowPrefixPtr) + 4;
661
+
662
+ U32 tmpOffset = repToConfirm[1];
663
+ repToConfirm[1] = repToConfirm[0];
664
+ repToConfirm[0] = tmpOffset;
665
+
666
+ ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH);
667
+
668
+ /* Fill the hash table from lastHashed+1 to ip+repLength2*/
669
+ if (ip + repLength2 < ilimit) {
670
+ rollingHash = ZSTD_ldm_fillLdmHashTable(
671
+ ldmState, rollingHash, lastHashed,
672
+ ip + repLength2, base, hBits,
673
+ ldmParams);
674
+ lastHashed = ip + repLength2 - 1;
675
+ }
676
+ ip += repLength2;
677
+ anchor = ip;
678
+ continue;
679
+ }
680
+ break;
681
+ }
682
+ }
683
+
684
+ /* Overwrite rep */
685
+ for (i = 0; i < ZSTD_REP_NUM; i++)
686
+ seqStorePtr->rep[i] = repToConfirm[i];
687
+
688
+ ZSTD_ldm_limitTableUpdate(ctx, anchor);
689
+ ZSTD_ldm_fillFastTables(ctx, anchor);
690
+
691
+ /* Call the block compressor one last time on the last literals */
692
+ lastLiterals = blockCompressor(ctx, anchor, iend - anchor);
693
+ ctx->nextToUpdate = (U32)(iend - base);
694
+
695
+ /* Restore seqStorePtr->rep */
696
+ for (i = 0; i < ZSTD_REP_NUM; i++)
697
+ seqStorePtr->rep[i] = savedRep[i];
698
+
699
+ /* Return the last literals size */
700
+ return lastLiterals;
701
+ }
702
+
703
+ size_t ZSTD_compressBlock_ldm_extDict(ZSTD_CCtx* ctx,
704
+ const void* src, size_t srcSize)
705
+ {
706
+ return ZSTD_compressBlock_ldm_extDict_generic(ctx, src, srcSize);
707
+ }