extzstd 0.1.1 → 0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (85) hide show
  1. checksums.yaml +5 -5
  2. data/HISTORY.ja.md +18 -0
  3. data/README.md +15 -50
  4. data/contrib/zstd/CONTRIBUTING.md +1 -1
  5. data/contrib/zstd/COPYING +339 -0
  6. data/contrib/zstd/Makefile +82 -51
  7. data/contrib/zstd/NEWS +92 -5
  8. data/contrib/zstd/README.md +50 -41
  9. data/contrib/zstd/appveyor.yml +164 -102
  10. data/contrib/zstd/circle.yml +10 -22
  11. data/contrib/zstd/lib/BUCK +31 -10
  12. data/contrib/zstd/lib/Makefile +57 -31
  13. data/contrib/zstd/lib/README.md +68 -37
  14. data/contrib/zstd/lib/common/bitstream.h +130 -76
  15. data/contrib/zstd/lib/common/compiler.h +86 -0
  16. data/contrib/zstd/lib/common/error_private.c +15 -11
  17. data/contrib/zstd/lib/common/error_private.h +8 -8
  18. data/contrib/zstd/lib/common/fse.h +19 -9
  19. data/contrib/zstd/lib/common/fse_decompress.c +3 -22
  20. data/contrib/zstd/lib/common/huf.h +68 -26
  21. data/contrib/zstd/lib/common/mem.h +23 -35
  22. data/contrib/zstd/lib/common/pool.c +123 -63
  23. data/contrib/zstd/lib/common/pool.h +19 -10
  24. data/contrib/zstd/lib/common/threading.c +11 -16
  25. data/contrib/zstd/lib/common/threading.h +52 -33
  26. data/contrib/zstd/lib/common/xxhash.c +28 -22
  27. data/contrib/zstd/lib/common/zstd_common.c +40 -27
  28. data/contrib/zstd/lib/common/zstd_errors.h +43 -34
  29. data/contrib/zstd/lib/common/zstd_internal.h +131 -123
  30. data/contrib/zstd/lib/compress/fse_compress.c +17 -33
  31. data/contrib/zstd/lib/compress/huf_compress.c +15 -9
  32. data/contrib/zstd/lib/compress/zstd_compress.c +2096 -2363
  33. data/contrib/zstd/lib/compress/zstd_compress_internal.h +462 -0
  34. data/contrib/zstd/lib/compress/zstd_double_fast.c +309 -0
  35. data/contrib/zstd/lib/compress/zstd_double_fast.h +29 -0
  36. data/contrib/zstd/lib/compress/zstd_fast.c +243 -0
  37. data/contrib/zstd/lib/compress/zstd_fast.h +31 -0
  38. data/contrib/zstd/lib/compress/zstd_lazy.c +765 -0
  39. data/contrib/zstd/lib/compress/zstd_lazy.h +39 -0
  40. data/contrib/zstd/lib/compress/zstd_ldm.c +707 -0
  41. data/contrib/zstd/lib/compress/zstd_ldm.h +68 -0
  42. data/contrib/zstd/lib/compress/zstd_opt.c +785 -0
  43. data/contrib/zstd/lib/compress/zstd_opt.h +19 -908
  44. data/contrib/zstd/lib/compress/zstdmt_compress.c +737 -327
  45. data/contrib/zstd/lib/compress/zstdmt_compress.h +88 -26
  46. data/contrib/zstd/lib/decompress/huf_decompress.c +158 -50
  47. data/contrib/zstd/lib/decompress/zstd_decompress.c +884 -699
  48. data/contrib/zstd/lib/deprecated/zbuff.h +5 -4
  49. data/contrib/zstd/lib/deprecated/zbuff_common.c +5 -5
  50. data/contrib/zstd/lib/deprecated/zbuff_compress.c +6 -4
  51. data/contrib/zstd/lib/deprecated/zbuff_decompress.c +5 -4
  52. data/contrib/zstd/lib/dictBuilder/cover.c +93 -77
  53. data/contrib/zstd/lib/dictBuilder/zdict.c +107 -92
  54. data/contrib/zstd/lib/dictBuilder/zdict.h +112 -102
  55. data/contrib/zstd/lib/legacy/zstd_legacy.h +9 -4
  56. data/contrib/zstd/lib/legacy/zstd_v01.c +7 -6
  57. data/contrib/zstd/lib/legacy/zstd_v01.h +5 -4
  58. data/contrib/zstd/lib/legacy/zstd_v02.c +27 -99
  59. data/contrib/zstd/lib/legacy/zstd_v02.h +5 -4
  60. data/contrib/zstd/lib/legacy/zstd_v03.c +26 -98
  61. data/contrib/zstd/lib/legacy/zstd_v03.h +5 -4
  62. data/contrib/zstd/lib/legacy/zstd_v04.c +22 -91
  63. data/contrib/zstd/lib/legacy/zstd_v04.h +5 -4
  64. data/contrib/zstd/lib/legacy/zstd_v05.c +23 -99
  65. data/contrib/zstd/lib/legacy/zstd_v05.h +5 -4
  66. data/contrib/zstd/lib/legacy/zstd_v06.c +22 -96
  67. data/contrib/zstd/lib/legacy/zstd_v06.h +5 -4
  68. data/contrib/zstd/lib/legacy/zstd_v07.c +19 -95
  69. data/contrib/zstd/lib/legacy/zstd_v07.h +5 -4
  70. data/contrib/zstd/lib/zstd.h +895 -271
  71. data/ext/extconf.rb +11 -2
  72. data/ext/extzstd.c +45 -128
  73. data/ext/extzstd.h +74 -31
  74. data/ext/extzstd_stream.c +401 -142
  75. data/ext/zstd_common.c +5 -0
  76. data/ext/zstd_compress.c +8 -0
  77. data/ext/zstd_decompress.c +1 -0
  78. data/ext/zstd_dictbuilder.c +2 -0
  79. data/lib/extzstd/version.rb +1 -1
  80. data/lib/extzstd.rb +48 -1
  81. data/test/test_basic.rb +9 -1
  82. metadata +17 -7
  83. data/HISTORY.ja +0 -10
  84. data/contrib/zstd/LICENSE-examples +0 -11
  85. data/contrib/zstd/PATENTS +0 -33
@@ -0,0 +1,707 @@
1
+ /*
2
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under both the BSD-style license (found in the
6
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
+ * in the COPYING file in the root directory of this source tree).
8
+ */
9
+
10
+ #include "zstd_ldm.h"
11
+
12
+ #include "zstd_fast.h" /* ZSTD_fillHashTable() */
13
+ #include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */
14
+
15
+ #define LDM_BUCKET_SIZE_LOG 3
16
+ #define LDM_MIN_MATCH_LENGTH 64
17
+ #define LDM_HASH_RLOG 7
18
+ #define LDM_HASH_CHAR_OFFSET 10
19
+
20
+ size_t ZSTD_ldm_initializeParameters(ldmParams_t* params, U32 enableLdm)
21
+ {
22
+ ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX);
23
+ params->enableLdm = enableLdm>0;
24
+ params->hashLog = 0;
25
+ params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
26
+ params->minMatchLength = LDM_MIN_MATCH_LENGTH;
27
+ params->hashEveryLog = ZSTD_LDM_HASHEVERYLOG_NOTSET;
28
+ return 0;
29
+ }
30
+
31
+ void ZSTD_ldm_adjustParameters(ldmParams_t* params, U32 windowLog)
32
+ {
33
+ if (params->hashLog == 0) {
34
+ params->hashLog = MAX(ZSTD_HASHLOG_MIN, windowLog - LDM_HASH_RLOG);
35
+ assert(params->hashLog <= ZSTD_HASHLOG_MAX);
36
+ }
37
+ if (params->hashEveryLog == ZSTD_LDM_HASHEVERYLOG_NOTSET) {
38
+ params->hashEveryLog =
39
+ windowLog < params->hashLog ? 0 : windowLog - params->hashLog;
40
+ }
41
+ params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog);
42
+ }
43
+
44
+ size_t ZSTD_ldm_getTableSize(U32 hashLog, U32 bucketSizeLog) {
45
+ size_t const ldmHSize = ((size_t)1) << hashLog;
46
+ size_t const ldmBucketSizeLog = MIN(bucketSizeLog, hashLog);
47
+ size_t const ldmBucketSize =
48
+ ((size_t)1) << (hashLog - ldmBucketSizeLog);
49
+ return ldmBucketSize + (ldmHSize * (sizeof(ldmEntry_t)));
50
+ }
51
+
52
+ /** ZSTD_ldm_getSmallHash() :
53
+ * numBits should be <= 32
54
+ * If numBits==0, returns 0.
55
+ * @return : the most significant numBits of value. */
56
+ static U32 ZSTD_ldm_getSmallHash(U64 value, U32 numBits)
57
+ {
58
+ assert(numBits <= 32);
59
+ return numBits == 0 ? 0 : (U32)(value >> (64 - numBits));
60
+ }
61
+
62
+ /** ZSTD_ldm_getChecksum() :
63
+ * numBitsToDiscard should be <= 32
64
+ * @return : the next most significant 32 bits after numBitsToDiscard */
65
+ static U32 ZSTD_ldm_getChecksum(U64 hash, U32 numBitsToDiscard)
66
+ {
67
+ assert(numBitsToDiscard <= 32);
68
+ return (hash >> (64 - 32 - numBitsToDiscard)) & 0xFFFFFFFF;
69
+ }
70
+
71
+ /** ZSTD_ldm_getTag() ;
72
+ * Given the hash, returns the most significant numTagBits bits
73
+ * after (32 + hbits) bits.
74
+ *
75
+ * If there are not enough bits remaining, return the last
76
+ * numTagBits bits. */
77
+ static U32 ZSTD_ldm_getTag(U64 hash, U32 hbits, U32 numTagBits)
78
+ {
79
+ assert(numTagBits < 32 && hbits <= 32);
80
+ if (32 - hbits < numTagBits) {
81
+ return hash & (((U32)1 << numTagBits) - 1);
82
+ } else {
83
+ return (hash >> (32 - hbits - numTagBits)) & (((U32)1 << numTagBits) - 1);
84
+ }
85
+ }
86
+
87
+ /** ZSTD_ldm_getBucket() :
88
+ * Returns a pointer to the start of the bucket associated with hash. */
89
+ static ldmEntry_t* ZSTD_ldm_getBucket(
90
+ ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams)
91
+ {
92
+ return ldmState->hashTable + (hash << ldmParams.bucketSizeLog);
93
+ }
94
+
95
+ /** ZSTD_ldm_insertEntry() :
96
+ * Insert the entry with corresponding hash into the hash table */
97
+ static void ZSTD_ldm_insertEntry(ldmState_t* ldmState,
98
+ size_t const hash, const ldmEntry_t entry,
99
+ ldmParams_t const ldmParams)
100
+ {
101
+ BYTE* const bucketOffsets = ldmState->bucketOffsets;
102
+ *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + bucketOffsets[hash]) = entry;
103
+ bucketOffsets[hash]++;
104
+ bucketOffsets[hash] &= ((U32)1 << ldmParams.bucketSizeLog) - 1;
105
+ }
106
+
107
+ /** ZSTD_ldm_makeEntryAndInsertByTag() :
108
+ *
109
+ * Gets the small hash, checksum, and tag from the rollingHash.
110
+ *
111
+ * If the tag matches (1 << ldmParams.hashEveryLog)-1, then
112
+ * creates an ldmEntry from the offset, and inserts it into the hash table.
113
+ *
114
+ * hBits is the length of the small hash, which is the most significant hBits
115
+ * of rollingHash. The checksum is the next 32 most significant bits, followed
116
+ * by ldmParams.hashEveryLog bits that make up the tag. */
117
+ static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
118
+ U64 const rollingHash,
119
+ U32 const hBits,
120
+ U32 const offset,
121
+ ldmParams_t const ldmParams)
122
+ {
123
+ U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog);
124
+ U32 const tagMask = ((U32)1 << ldmParams.hashEveryLog) - 1;
125
+ if (tag == tagMask) {
126
+ U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits);
127
+ U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
128
+ ldmEntry_t entry;
129
+ entry.offset = offset;
130
+ entry.checksum = checksum;
131
+ ZSTD_ldm_insertEntry(ldmState, hash, entry, ldmParams);
132
+ }
133
+ }
134
+
135
+ /** ZSTD_ldm_getRollingHash() :
136
+ * Get a 64-bit hash using the first len bytes from buf.
137
+ *
138
+ * Giving bytes s = s_1, s_2, ... s_k, the hash is defined to be
139
+ * H(s) = s_1*(a^(k-1)) + s_2*(a^(k-2)) + ... + s_k*(a^0)
140
+ *
141
+ * where the constant a is defined to be prime8bytes.
142
+ *
143
+ * The implementation adds an offset to each byte, so
144
+ * H(s) = (s_1 + HASH_CHAR_OFFSET)*(a^(k-1)) + ... */
145
+ static U64 ZSTD_ldm_getRollingHash(const BYTE* buf, U32 len)
146
+ {
147
+ U64 ret = 0;
148
+ U32 i;
149
+ for (i = 0; i < len; i++) {
150
+ ret *= prime8bytes;
151
+ ret += buf[i] + LDM_HASH_CHAR_OFFSET;
152
+ }
153
+ return ret;
154
+ }
155
+
156
+ /** ZSTD_ldm_ipow() :
157
+ * Return base^exp. */
158
+ static U64 ZSTD_ldm_ipow(U64 base, U64 exp)
159
+ {
160
+ U64 ret = 1;
161
+ while (exp) {
162
+ if (exp & 1) { ret *= base; }
163
+ exp >>= 1;
164
+ base *= base;
165
+ }
166
+ return ret;
167
+ }
168
+
169
+ U64 ZSTD_ldm_getHashPower(U32 minMatchLength) {
170
+ assert(minMatchLength >= ZSTD_LDM_MINMATCH_MIN);
171
+ return ZSTD_ldm_ipow(prime8bytes, minMatchLength - 1);
172
+ }
173
+
174
+ /** ZSTD_ldm_updateHash() :
175
+ * Updates hash by removing toRemove and adding toAdd. */
176
+ static U64 ZSTD_ldm_updateHash(U64 hash, BYTE toRemove, BYTE toAdd, U64 hashPower)
177
+ {
178
+ hash -= ((toRemove + LDM_HASH_CHAR_OFFSET) * hashPower);
179
+ hash *= prime8bytes;
180
+ hash += toAdd + LDM_HASH_CHAR_OFFSET;
181
+ return hash;
182
+ }
183
+
184
+ /** ZSTD_ldm_countBackwardsMatch() :
185
+ * Returns the number of bytes that match backwards before pIn and pMatch.
186
+ *
187
+ * We count only bytes where pMatch >= pBase and pIn >= pAnchor. */
188
+ static size_t ZSTD_ldm_countBackwardsMatch(
189
+ const BYTE* pIn, const BYTE* pAnchor,
190
+ const BYTE* pMatch, const BYTE* pBase)
191
+ {
192
+ size_t matchLength = 0;
193
+ while (pIn > pAnchor && pMatch > pBase && pIn[-1] == pMatch[-1]) {
194
+ pIn--;
195
+ pMatch--;
196
+ matchLength++;
197
+ }
198
+ return matchLength;
199
+ }
200
+
201
+ /** ZSTD_ldm_fillFastTables() :
202
+ *
203
+ * Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies.
204
+ * This is similar to ZSTD_loadDictionaryContent.
205
+ *
206
+ * The tables for the other strategies are filled within their
207
+ * block compressors. */
208
+ static size_t ZSTD_ldm_fillFastTables(ZSTD_CCtx* zc, const void* end)
209
+ {
210
+ const BYTE* const iend = (const BYTE*)end;
211
+ const U32 mls = zc->appliedParams.cParams.searchLength;
212
+
213
+ switch(zc->appliedParams.cParams.strategy)
214
+ {
215
+ case ZSTD_fast:
216
+ ZSTD_fillHashTable(zc, iend, mls);
217
+ zc->nextToUpdate = (U32)(iend - zc->base);
218
+ break;
219
+
220
+ case ZSTD_dfast:
221
+ ZSTD_fillDoubleHashTable(zc, iend, mls);
222
+ zc->nextToUpdate = (U32)(iend - zc->base);
223
+ break;
224
+
225
+ case ZSTD_greedy:
226
+ case ZSTD_lazy:
227
+ case ZSTD_lazy2:
228
+ case ZSTD_btlazy2:
229
+ case ZSTD_btopt:
230
+ case ZSTD_btultra:
231
+ break;
232
+ default:
233
+ assert(0); /* not possible : not a valid strategy id */
234
+ }
235
+
236
+ return 0;
237
+ }
238
+
239
+ /** ZSTD_ldm_fillLdmHashTable() :
240
+ *
241
+ * Fills hashTable from (lastHashed + 1) to iend (non-inclusive).
242
+ * lastHash is the rolling hash that corresponds to lastHashed.
243
+ *
244
+ * Returns the rolling hash corresponding to position iend-1. */
245
+ static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
246
+ U64 lastHash, const BYTE* lastHashed,
247
+ const BYTE* iend, const BYTE* base,
248
+ U32 hBits, ldmParams_t const ldmParams)
249
+ {
250
+ U64 rollingHash = lastHash;
251
+ const BYTE* cur = lastHashed + 1;
252
+
253
+ while (cur < iend) {
254
+ rollingHash = ZSTD_ldm_updateHash(rollingHash, cur[-1],
255
+ cur[ldmParams.minMatchLength-1],
256
+ state->hashPower);
257
+ ZSTD_ldm_makeEntryAndInsertByTag(state,
258
+ rollingHash, hBits,
259
+ (U32)(cur - base), ldmParams);
260
+ ++cur;
261
+ }
262
+ return rollingHash;
263
+ }
264
+
265
+
266
+ /** ZSTD_ldm_limitTableUpdate() :
267
+ *
268
+ * Sets cctx->nextToUpdate to a position corresponding closer to anchor
269
+ * if it is far way
270
+ * (after a long match, only update tables a limited amount). */
271
+ static void ZSTD_ldm_limitTableUpdate(ZSTD_CCtx* cctx, const BYTE* anchor)
272
+ {
273
+ U32 const current = (U32)(anchor - cctx->base);
274
+ if (current > cctx->nextToUpdate + 1024) {
275
+ cctx->nextToUpdate =
276
+ current - MIN(512, current - cctx->nextToUpdate - 1024);
277
+ }
278
+ }
279
+
280
+ typedef size_t (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t srcSize);
281
+ /* defined in zstd_compress.c */
282
+ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict);
283
+
284
+ FORCE_INLINE_TEMPLATE
285
+ size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx,
286
+ const void* src, size_t srcSize)
287
+ {
288
+ ldmState_t* const ldmState = &(cctx->ldmState);
289
+ const ldmParams_t ldmParams = cctx->appliedParams.ldmParams;
290
+ const U64 hashPower = ldmState->hashPower;
291
+ const U32 hBits = ldmParams.hashLog - ldmParams.bucketSizeLog;
292
+ const U32 ldmBucketSize = ((U32)1 << ldmParams.bucketSizeLog);
293
+ const U32 ldmTagMask = ((U32)1 << ldmParams.hashEveryLog) - 1;
294
+ seqStore_t* const seqStorePtr = &(cctx->seqStore);
295
+ const BYTE* const base = cctx->base;
296
+ const BYTE* const istart = (const BYTE*)src;
297
+ const BYTE* ip = istart;
298
+ const BYTE* anchor = istart;
299
+ const U32 lowestIndex = cctx->dictLimit;
300
+ const BYTE* const lowest = base + lowestIndex;
301
+ const BYTE* const iend = istart + srcSize;
302
+ const BYTE* const ilimit = iend - MAX(ldmParams.minMatchLength, HASH_READ_SIZE);
303
+
304
+ const ZSTD_blockCompressor blockCompressor =
305
+ ZSTD_selectBlockCompressor(cctx->appliedParams.cParams.strategy, 0);
306
+ U32* const repToConfirm = seqStorePtr->repToConfirm;
307
+ U32 savedRep[ZSTD_REP_NUM];
308
+ U64 rollingHash = 0;
309
+ const BYTE* lastHashed = NULL;
310
+ size_t i, lastLiterals;
311
+
312
+ /* Save seqStorePtr->rep and copy repToConfirm */
313
+ for (i = 0; i < ZSTD_REP_NUM; i++)
314
+ savedRep[i] = repToConfirm[i] = seqStorePtr->rep[i];
315
+
316
+ /* Main Search Loop */
317
+ while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
318
+ size_t mLength;
319
+ U32 const current = (U32)(ip - base);
320
+ size_t forwardMatchLength = 0, backwardMatchLength = 0;
321
+ ldmEntry_t* bestEntry = NULL;
322
+ if (ip != istart) {
323
+ rollingHash = ZSTD_ldm_updateHash(rollingHash, lastHashed[0],
324
+ lastHashed[ldmParams.minMatchLength],
325
+ hashPower);
326
+ } else {
327
+ rollingHash = ZSTD_ldm_getRollingHash(ip, ldmParams.minMatchLength);
328
+ }
329
+ lastHashed = ip;
330
+
331
+ /* Do not insert and do not look for a match */
332
+ if (ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog) !=
333
+ ldmTagMask) {
334
+ ip++;
335
+ continue;
336
+ }
337
+
338
+ /* Get the best entry and compute the match lengths */
339
+ {
340
+ ldmEntry_t* const bucket =
341
+ ZSTD_ldm_getBucket(ldmState,
342
+ ZSTD_ldm_getSmallHash(rollingHash, hBits),
343
+ ldmParams);
344
+ ldmEntry_t* cur;
345
+ size_t bestMatchLength = 0;
346
+ U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
347
+
348
+ for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) {
349
+ const BYTE* const pMatch = cur->offset + base;
350
+ size_t curForwardMatchLength, curBackwardMatchLength,
351
+ curTotalMatchLength;
352
+ if (cur->checksum != checksum || cur->offset <= lowestIndex) {
353
+ continue;
354
+ }
355
+
356
+ curForwardMatchLength = ZSTD_count(ip, pMatch, iend);
357
+ if (curForwardMatchLength < ldmParams.minMatchLength) {
358
+ continue;
359
+ }
360
+ curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch(
361
+ ip, anchor, pMatch, lowest);
362
+ curTotalMatchLength = curForwardMatchLength +
363
+ curBackwardMatchLength;
364
+
365
+ if (curTotalMatchLength > bestMatchLength) {
366
+ bestMatchLength = curTotalMatchLength;
367
+ forwardMatchLength = curForwardMatchLength;
368
+ backwardMatchLength = curBackwardMatchLength;
369
+ bestEntry = cur;
370
+ }
371
+ }
372
+ }
373
+
374
+ /* No match found -- continue searching */
375
+ if (bestEntry == NULL) {
376
+ ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash,
377
+ hBits, current,
378
+ ldmParams);
379
+ ip++;
380
+ continue;
381
+ }
382
+
383
+ /* Match found */
384
+ mLength = forwardMatchLength + backwardMatchLength;
385
+ ip -= backwardMatchLength;
386
+
387
+ /* Call the block compressor on the remaining literals */
388
+ {
389
+ U32 const matchIndex = bestEntry->offset;
390
+ const BYTE* const match = base + matchIndex - backwardMatchLength;
391
+ U32 const offset = (U32)(ip - match);
392
+
393
+ /* Overwrite rep codes */
394
+ for (i = 0; i < ZSTD_REP_NUM; i++)
395
+ seqStorePtr->rep[i] = repToConfirm[i];
396
+
397
+ /* Fill tables for block compressor */
398
+ ZSTD_ldm_limitTableUpdate(cctx, anchor);
399
+ ZSTD_ldm_fillFastTables(cctx, anchor);
400
+
401
+ /* Call block compressor and get remaining literals */
402
+ lastLiterals = blockCompressor(cctx, anchor, ip - anchor);
403
+ cctx->nextToUpdate = (U32)(ip - base);
404
+
405
+ /* Update repToConfirm with the new offset */
406
+ for (i = ZSTD_REP_NUM - 1; i > 0; i--)
407
+ repToConfirm[i] = repToConfirm[i-1];
408
+ repToConfirm[0] = offset;
409
+
410
+ /* Store the sequence with the leftover literals */
411
+ ZSTD_storeSeq(seqStorePtr, lastLiterals, ip - lastLiterals,
412
+ offset + ZSTD_REP_MOVE, mLength - MINMATCH);
413
+ }
414
+
415
+ /* Insert the current entry into the hash table */
416
+ ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
417
+ (U32)(lastHashed - base),
418
+ ldmParams);
419
+
420
+ assert(ip + backwardMatchLength == lastHashed);
421
+
422
+ /* Fill the hash table from lastHashed+1 to ip+mLength*/
423
+ /* Heuristic: don't need to fill the entire table at end of block */
424
+ if (ip + mLength < ilimit) {
425
+ rollingHash = ZSTD_ldm_fillLdmHashTable(
426
+ ldmState, rollingHash, lastHashed,
427
+ ip + mLength, base, hBits, ldmParams);
428
+ lastHashed = ip + mLength - 1;
429
+ }
430
+ ip += mLength;
431
+ anchor = ip;
432
+ /* Check immediate repcode */
433
+ while ( (ip < ilimit)
434
+ && ( (repToConfirm[1] > 0) && (repToConfirm[1] <= (U32)(ip-lowest))
435
+ && (MEM_read32(ip) == MEM_read32(ip - repToConfirm[1])) )) {
436
+
437
+ size_t const rLength = ZSTD_count(ip+4, ip+4-repToConfirm[1],
438
+ iend) + 4;
439
+ /* Swap repToConfirm[1] <=> repToConfirm[0] */
440
+ {
441
+ U32 const tmpOff = repToConfirm[1];
442
+ repToConfirm[1] = repToConfirm[0];
443
+ repToConfirm[0] = tmpOff;
444
+ }
445
+
446
+ ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH);
447
+
448
+ /* Fill the hash table from lastHashed+1 to ip+rLength*/
449
+ if (ip + rLength < ilimit) {
450
+ rollingHash = ZSTD_ldm_fillLdmHashTable(
451
+ ldmState, rollingHash, lastHashed,
452
+ ip + rLength, base, hBits, ldmParams);
453
+ lastHashed = ip + rLength - 1;
454
+ }
455
+ ip += rLength;
456
+ anchor = ip;
457
+ }
458
+ }
459
+
460
+ /* Overwrite rep */
461
+ for (i = 0; i < ZSTD_REP_NUM; i++)
462
+ seqStorePtr->rep[i] = repToConfirm[i];
463
+
464
+ ZSTD_ldm_limitTableUpdate(cctx, anchor);
465
+ ZSTD_ldm_fillFastTables(cctx, anchor);
466
+
467
+ lastLiterals = blockCompressor(cctx, anchor, iend - anchor);
468
+ cctx->nextToUpdate = (U32)(iend - base);
469
+
470
+ /* Restore seqStorePtr->rep */
471
+ for (i = 0; i < ZSTD_REP_NUM; i++)
472
+ seqStorePtr->rep[i] = savedRep[i];
473
+
474
+ /* Return the last literals size */
475
+ return lastLiterals;
476
+ }
477
+
478
+ size_t ZSTD_compressBlock_ldm(ZSTD_CCtx* ctx,
479
+ const void* src, size_t srcSize)
480
+ {
481
+ return ZSTD_compressBlock_ldm_generic(ctx, src, srcSize);
482
+ }
483
+
484
+ static size_t ZSTD_compressBlock_ldm_extDict_generic(
485
+ ZSTD_CCtx* ctx,
486
+ const void* src, size_t srcSize)
487
+ {
488
+ ldmState_t* const ldmState = &(ctx->ldmState);
489
+ const ldmParams_t ldmParams = ctx->appliedParams.ldmParams;
490
+ const U64 hashPower = ldmState->hashPower;
491
+ const U32 hBits = ldmParams.hashLog - ldmParams.bucketSizeLog;
492
+ const U32 ldmBucketSize = ((U32)1 << ldmParams.bucketSizeLog);
493
+ const U32 ldmTagMask = ((U32)1 << ldmParams.hashEveryLog) - 1;
494
+ seqStore_t* const seqStorePtr = &(ctx->seqStore);
495
+ const BYTE* const base = ctx->base;
496
+ const BYTE* const dictBase = ctx->dictBase;
497
+ const BYTE* const istart = (const BYTE*)src;
498
+ const BYTE* ip = istart;
499
+ const BYTE* anchor = istart;
500
+ const U32 lowestIndex = ctx->lowLimit;
501
+ const BYTE* const dictStart = dictBase + lowestIndex;
502
+ const U32 dictLimit = ctx->dictLimit;
503
+ const BYTE* const lowPrefixPtr = base + dictLimit;
504
+ const BYTE* const dictEnd = dictBase + dictLimit;
505
+ const BYTE* const iend = istart + srcSize;
506
+ const BYTE* const ilimit = iend - MAX(ldmParams.minMatchLength, HASH_READ_SIZE);
507
+
508
+ const ZSTD_blockCompressor blockCompressor =
509
+ ZSTD_selectBlockCompressor(ctx->appliedParams.cParams.strategy, 1);
510
+ U32* const repToConfirm = seqStorePtr->repToConfirm;
511
+ U32 savedRep[ZSTD_REP_NUM];
512
+ U64 rollingHash = 0;
513
+ const BYTE* lastHashed = NULL;
514
+ size_t i, lastLiterals;
515
+
516
+ /* Save seqStorePtr->rep and copy repToConfirm */
517
+ for (i = 0; i < ZSTD_REP_NUM; i++) {
518
+ savedRep[i] = repToConfirm[i] = seqStorePtr->rep[i];
519
+ }
520
+
521
+ /* Search Loop */
522
+ while (ip < ilimit) { /* < instead of <=, because (ip+1) */
523
+ size_t mLength;
524
+ const U32 current = (U32)(ip-base);
525
+ size_t forwardMatchLength = 0, backwardMatchLength = 0;
526
+ ldmEntry_t* bestEntry = NULL;
527
+ if (ip != istart) {
528
+ rollingHash = ZSTD_ldm_updateHash(rollingHash, lastHashed[0],
529
+ lastHashed[ldmParams.minMatchLength],
530
+ hashPower);
531
+ } else {
532
+ rollingHash = ZSTD_ldm_getRollingHash(ip, ldmParams.minMatchLength);
533
+ }
534
+ lastHashed = ip;
535
+
536
+ if (ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog) !=
537
+ ldmTagMask) {
538
+ /* Don't insert and don't look for a match */
539
+ ip++;
540
+ continue;
541
+ }
542
+
543
+ /* Get the best entry and compute the match lengths */
544
+ {
545
+ ldmEntry_t* const bucket =
546
+ ZSTD_ldm_getBucket(ldmState,
547
+ ZSTD_ldm_getSmallHash(rollingHash, hBits),
548
+ ldmParams);
549
+ ldmEntry_t* cur;
550
+ size_t bestMatchLength = 0;
551
+ U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
552
+
553
+ for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) {
554
+ const BYTE* const curMatchBase =
555
+ cur->offset < dictLimit ? dictBase : base;
556
+ const BYTE* const pMatch = curMatchBase + cur->offset;
557
+ const BYTE* const matchEnd =
558
+ cur->offset < dictLimit ? dictEnd : iend;
559
+ const BYTE* const lowMatchPtr =
560
+ cur->offset < dictLimit ? dictStart : lowPrefixPtr;
561
+ size_t curForwardMatchLength, curBackwardMatchLength,
562
+ curTotalMatchLength;
563
+
564
+ if (cur->checksum != checksum || cur->offset <= lowestIndex) {
565
+ continue;
566
+ }
567
+
568
+ curForwardMatchLength = ZSTD_count_2segments(
569
+ ip, pMatch, iend,
570
+ matchEnd, lowPrefixPtr);
571
+ if (curForwardMatchLength < ldmParams.minMatchLength) {
572
+ continue;
573
+ }
574
+ curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch(
575
+ ip, anchor, pMatch, lowMatchPtr);
576
+ curTotalMatchLength = curForwardMatchLength +
577
+ curBackwardMatchLength;
578
+
579
+ if (curTotalMatchLength > bestMatchLength) {
580
+ bestMatchLength = curTotalMatchLength;
581
+ forwardMatchLength = curForwardMatchLength;
582
+ backwardMatchLength = curBackwardMatchLength;
583
+ bestEntry = cur;
584
+ }
585
+ }
586
+ }
587
+
588
+ /* No match found -- continue searching */
589
+ if (bestEntry == NULL) {
590
+ ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
591
+ (U32)(lastHashed - base),
592
+ ldmParams);
593
+ ip++;
594
+ continue;
595
+ }
596
+
597
+ /* Match found */
598
+ mLength = forwardMatchLength + backwardMatchLength;
599
+ ip -= backwardMatchLength;
600
+
601
+ /* Call the block compressor on the remaining literals */
602
+ {
603
+ /* ip = current - backwardMatchLength
604
+ * The match is at (bestEntry->offset - backwardMatchLength) */
605
+ U32 const matchIndex = bestEntry->offset;
606
+ U32 const offset = current - matchIndex;
607
+
608
+ /* Overwrite rep codes */
609
+ for (i = 0; i < ZSTD_REP_NUM; i++)
610
+ seqStorePtr->rep[i] = repToConfirm[i];
611
+
612
+ /* Fill the hash table for the block compressor */
613
+ ZSTD_ldm_limitTableUpdate(ctx, anchor);
614
+ ZSTD_ldm_fillFastTables(ctx, anchor);
615
+
616
+ /* Call block compressor and get remaining literals */
617
+ lastLiterals = blockCompressor(ctx, anchor, ip - anchor);
618
+ ctx->nextToUpdate = (U32)(ip - base);
619
+
620
+ /* Update repToConfirm with the new offset */
621
+ for (i = ZSTD_REP_NUM - 1; i > 0; i--)
622
+ repToConfirm[i] = repToConfirm[i-1];
623
+ repToConfirm[0] = offset;
624
+
625
+ /* Store the sequence with the leftover literals */
626
+ ZSTD_storeSeq(seqStorePtr, lastLiterals, ip - lastLiterals,
627
+ offset + ZSTD_REP_MOVE, mLength - MINMATCH);
628
+ }
629
+
630
+ /* Insert the current entry into the hash table */
631
+ ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
632
+ (U32)(lastHashed - base),
633
+ ldmParams);
634
+
635
+ /* Fill the hash table from lastHashed+1 to ip+mLength */
636
+ assert(ip + backwardMatchLength == lastHashed);
637
+ if (ip + mLength < ilimit) {
638
+ rollingHash = ZSTD_ldm_fillLdmHashTable(
639
+ ldmState, rollingHash, lastHashed,
640
+ ip + mLength, base, hBits,
641
+ ldmParams);
642
+ lastHashed = ip + mLength - 1;
643
+ }
644
+ ip += mLength;
645
+ anchor = ip;
646
+
647
+ /* check immediate repcode */
648
+ while (ip < ilimit) {
649
+ U32 const current2 = (U32)(ip-base);
650
+ U32 const repIndex2 = current2 - repToConfirm[1];
651
+ const BYTE* repMatch2 = repIndex2 < dictLimit ?
652
+ dictBase + repIndex2 : base + repIndex2;
653
+ if ( (((U32)((dictLimit-1) - repIndex2) >= 3) &
654
+ (repIndex2 > lowestIndex)) /* intentional overflow */
655
+ && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
656
+ const BYTE* const repEnd2 = repIndex2 < dictLimit ?
657
+ dictEnd : iend;
658
+ size_t const repLength2 =
659
+ ZSTD_count_2segments(ip+4, repMatch2+4, iend,
660
+ repEnd2, lowPrefixPtr) + 4;
661
+
662
+ U32 tmpOffset = repToConfirm[1];
663
+ repToConfirm[1] = repToConfirm[0];
664
+ repToConfirm[0] = tmpOffset;
665
+
666
+ ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH);
667
+
668
+ /* Fill the hash table from lastHashed+1 to ip+repLength2*/
669
+ if (ip + repLength2 < ilimit) {
670
+ rollingHash = ZSTD_ldm_fillLdmHashTable(
671
+ ldmState, rollingHash, lastHashed,
672
+ ip + repLength2, base, hBits,
673
+ ldmParams);
674
+ lastHashed = ip + repLength2 - 1;
675
+ }
676
+ ip += repLength2;
677
+ anchor = ip;
678
+ continue;
679
+ }
680
+ break;
681
+ }
682
+ }
683
+
684
+ /* Overwrite rep */
685
+ for (i = 0; i < ZSTD_REP_NUM; i++)
686
+ seqStorePtr->rep[i] = repToConfirm[i];
687
+
688
+ ZSTD_ldm_limitTableUpdate(ctx, anchor);
689
+ ZSTD_ldm_fillFastTables(ctx, anchor);
690
+
691
+ /* Call the block compressor one last time on the last literals */
692
+ lastLiterals = blockCompressor(ctx, anchor, iend - anchor);
693
+ ctx->nextToUpdate = (U32)(iend - base);
694
+
695
+ /* Restore seqStorePtr->rep */
696
+ for (i = 0; i < ZSTD_REP_NUM; i++)
697
+ seqStorePtr->rep[i] = savedRep[i];
698
+
699
+ /* Return the last literals size */
700
+ return lastLiterals;
701
+ }
702
+
703
+ size_t ZSTD_compressBlock_ldm_extDict(ZSTD_CCtx* ctx,
704
+ const void* src, size_t srcSize)
705
+ {
706
+ return ZSTD_compressBlock_ldm_extDict_generic(ctx, src, srcSize);
707
+ }