zstdlib 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGES.md +5 -0
  3. data/ext/zstdlib/extconf.rb +1 -1
  4. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/common/bitstream.h +0 -0
  5. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/common/compiler.h +7 -0
  6. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/common/cpu.h +0 -0
  7. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/common/debug.c +0 -0
  8. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/common/debug.h +0 -0
  9. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/common/entropy_common.c +0 -0
  10. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/common/error_private.c +0 -0
  11. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/common/error_private.h +0 -0
  12. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/common/fse.h +0 -0
  13. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/common/fse_decompress.c +0 -0
  14. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/common/huf.h +0 -0
  15. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/common/mem.h +0 -0
  16. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/common/pool.c +0 -0
  17. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/common/pool.h +0 -0
  18. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/common/threading.c +0 -0
  19. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/common/threading.h +0 -0
  20. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/common/xxhash.c +0 -0
  21. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/common/xxhash.h +0 -0
  22. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/common/zstd_common.c +0 -0
  23. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/common/zstd_errors.h +0 -0
  24. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/common/zstd_internal.h +58 -6
  25. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/compress/fse_compress.c +0 -0
  26. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/compress/hist.c +0 -0
  27. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/compress/hist.h +0 -0
  28. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/compress/huf_compress.c +0 -0
  29. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/compress/zstd_compress.c +178 -691
  30. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/compress/zstd_compress_internal.h +98 -30
  31. data/ext/zstdlib/zstd-1.4.2/lib/compress/zstd_compress_literals.c +149 -0
  32. data/ext/zstdlib/zstd-1.4.2/lib/compress/zstd_compress_literals.h +29 -0
  33. data/ext/zstdlib/zstd-1.4.2/lib/compress/zstd_compress_sequences.c +415 -0
  34. data/ext/zstdlib/zstd-1.4.2/lib/compress/zstd_compress_sequences.h +47 -0
  35. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/compress/zstd_double_fast.c +56 -36
  36. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/compress/zstd_double_fast.h +0 -0
  37. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/compress/zstd_fast.c +35 -14
  38. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/compress/zstd_fast.h +0 -0
  39. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/compress/zstd_lazy.c +10 -5
  40. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/compress/zstd_lazy.h +0 -0
  41. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/compress/zstd_ldm.c +1 -1
  42. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/compress/zstd_ldm.h +0 -0
  43. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/compress/zstd_opt.c +45 -32
  44. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/compress/zstd_opt.h +0 -0
  45. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/compress/zstdmt_compress.c +18 -7
  46. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/compress/zstdmt_compress.h +1 -0
  47. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/decompress/huf_decompress.c +0 -0
  48. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/decompress/zstd_ddict.c +0 -0
  49. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/decompress/zstd_ddict.h +0 -0
  50. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/decompress/zstd_decompress.c +14 -9
  51. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/decompress/zstd_decompress_block.c +20 -9
  52. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/decompress/zstd_decompress_block.h +0 -0
  53. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/decompress/zstd_decompress_internal.h +0 -0
  54. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/zstd.h +53 -21
  55. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/zlibWrapper/gzclose.c +0 -0
  56. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/zlibWrapper/gzcompatibility.h +0 -0
  57. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/zlibWrapper/gzguts.h +0 -0
  58. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/zlibWrapper/gzlib.c +0 -0
  59. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/zlibWrapper/gzread.c +0 -0
  60. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/zlibWrapper/gzwrite.c +0 -0
  61. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/zlibWrapper/zstd_zlibwrapper.c +0 -0
  62. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/zlibWrapper/zstd_zlibwrapper.h +0 -0
  63. metadata +62 -59
@@ -0,0 +1,47 @@
1
+ /*
2
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under both the BSD-style license (found in the
6
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
+ * in the COPYING file in the root directory of this source tree).
8
+ * You may select, at your option, one of the above-listed licenses.
9
+ */
10
+
11
+ #ifndef ZSTD_COMPRESS_SEQUENCES_H
12
+ #define ZSTD_COMPRESS_SEQUENCES_H
13
+
14
+ #include "fse.h" /* FSE_repeat, FSE_CTable */
15
+ #include "zstd_internal.h" /* symbolEncodingType_e, ZSTD_strategy */
16
+
17
+ typedef enum {
18
+ ZSTD_defaultDisallowed = 0,
19
+ ZSTD_defaultAllowed = 1
20
+ } ZSTD_defaultPolicy_e;
21
+
22
+ symbolEncodingType_e
23
+ ZSTD_selectEncodingType(
24
+ FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
25
+ size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
26
+ FSE_CTable const* prevCTable,
27
+ short const* defaultNorm, U32 defaultNormLog,
28
+ ZSTD_defaultPolicy_e const isDefaultAllowed,
29
+ ZSTD_strategy const strategy);
30
+
31
+ size_t
32
+ ZSTD_buildCTable(void* dst, size_t dstCapacity,
33
+ FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
34
+ unsigned* count, U32 max,
35
+ const BYTE* codeTable, size_t nbSeq,
36
+ const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
37
+ const FSE_CTable* prevCTable, size_t prevCTableSize,
38
+ void* workspace, size_t workspaceSize);
39
+
40
+ size_t ZSTD_encodeSequences(
41
+ void* dst, size_t dstCapacity,
42
+ FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
43
+ FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
44
+ FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
45
+ seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2);
46
+
47
+ #endif /* ZSTD_COMPRESS_SEQUENCES_H */
@@ -43,8 +43,7 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
43
43
  /* Only load extra positions for ZSTD_dtlm_full */
44
44
  if (dtlm == ZSTD_dtlm_fast)
45
45
  break;
46
- }
47
- }
46
+ } }
48
47
  }
49
48
 
50
49
 
@@ -63,7 +62,10 @@ size_t ZSTD_compressBlock_doubleFast_generic(
63
62
  const BYTE* const istart = (const BYTE*)src;
64
63
  const BYTE* ip = istart;
65
64
  const BYTE* anchor = istart;
66
- const U32 prefixLowestIndex = ms->window.dictLimit;
65
+ const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
66
+ const U32 lowestValid = ms->window.dictLimit;
67
+ const U32 maxDistance = 1U << cParams->windowLog;
68
+ const U32 prefixLowestIndex = (endIndex - lowestValid > maxDistance) ? endIndex - maxDistance : lowestValid;
67
69
  const BYTE* const prefixLowest = base + prefixLowestIndex;
68
70
  const BYTE* const iend = istart + srcSize;
69
71
  const BYTE* const ilimit = iend - HASH_READ_SIZE;
@@ -95,8 +97,15 @@ size_t ZSTD_compressBlock_doubleFast_generic(
95
97
  dictCParams->chainLog : hBitsS;
96
98
  const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictStart);
97
99
 
100
+ DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic");
101
+
98
102
  assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState);
99
103
 
104
+ /* if a dictionary is attached, it must be within window range */
105
+ if (dictMode == ZSTD_dictMatchState) {
106
+ assert(lowestValid + maxDistance >= endIndex);
107
+ }
108
+
100
109
  /* init */
101
110
  ip += (dictAndPrefixLength == 0);
102
111
  if (dictMode == ZSTD_noDict) {
@@ -138,7 +147,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
138
147
  const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
139
148
  mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
140
149
  ip++;
141
- ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
150
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
142
151
  goto _match_stored;
143
152
  }
144
153
 
@@ -147,7 +156,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
147
156
  && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
148
157
  mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
149
158
  ip++;
150
- ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
159
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
151
160
  goto _match_stored;
152
161
  }
153
162
 
@@ -170,8 +179,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
170
179
  offset = (U32)(current - dictMatchIndexL - dictIndexDelta);
171
180
  while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
172
181
  goto _match_found;
173
- }
174
- }
182
+ } }
175
183
 
176
184
  if (matchIndexS > prefixLowestIndex) {
177
185
  /* check prefix short match */
@@ -186,16 +194,14 @@ size_t ZSTD_compressBlock_doubleFast_generic(
186
194
 
187
195
  if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) {
188
196
  goto _search_next_long;
189
- }
190
- }
197
+ } }
191
198
 
192
199
  ip += ((ip-anchor) >> kSearchStrength) + 1;
193
200
  continue;
194
201
 
195
202
  _search_next_long:
196
203
 
197
- {
198
- size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
204
+ { size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
199
205
  size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
200
206
  U32 const matchIndexL3 = hashLong[hl3];
201
207
  const BYTE* matchL3 = base + matchIndexL3;
@@ -221,9 +227,7 @@ _search_next_long:
221
227
  offset = (U32)(current + 1 - dictMatchIndexL3 - dictIndexDelta);
222
228
  while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
223
229
  goto _match_found;
224
- }
225
- }
226
- }
230
+ } } }
227
231
 
228
232
  /* if no long +1 match, explore the short match we found */
229
233
  if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) {
@@ -242,7 +246,7 @@ _match_found:
242
246
  offset_2 = offset_1;
243
247
  offset_1 = offset;
244
248
 
245
- ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
249
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
246
250
 
247
251
  _match_stored:
248
252
  /* match found */
@@ -250,11 +254,14 @@ _match_stored:
250
254
  anchor = ip;
251
255
 
252
256
  if (ip <= ilimit) {
253
- /* Fill Table */
254
- hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] =
255
- hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2; /* here because current+2 could be > iend-8 */
256
- hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] =
257
- hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);
257
+ /* Complementary insertion */
258
+ /* done after iLimit test, as candidates could be > iend-8 */
259
+ { U32 const indexToInsert = current+2;
260
+ hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
261
+ hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
262
+ hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
263
+ hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
264
+ }
258
265
 
259
266
  /* check immediate repcode */
260
267
  if (dictMode == ZSTD_dictMatchState) {
@@ -278,8 +285,7 @@ _match_stored:
278
285
  continue;
279
286
  }
280
287
  break;
281
- }
282
- }
288
+ } }
283
289
 
284
290
  if (dictMode == ZSTD_noDict) {
285
291
  while ( (ip <= ilimit)
@@ -294,14 +300,15 @@ _match_stored:
294
300
  ip += rLength;
295
301
  anchor = ip;
296
302
  continue; /* faster when present ... (?) */
297
- } } } }
303
+ } } }
304
+ } /* while (ip < ilimit) */
298
305
 
299
306
  /* save reps for next block */
300
307
  rep[0] = offset_1 ? offset_1 : offsetSaved;
301
308
  rep[1] = offset_2 ? offset_2 : offsetSaved;
302
309
 
303
310
  /* Return the last literals size */
304
- return iend - anchor;
311
+ return (size_t)(iend - anchor);
305
312
  }
306
313
 
307
314
 
@@ -360,10 +367,15 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
360
367
  const BYTE* anchor = istart;
361
368
  const BYTE* const iend = istart + srcSize;
362
369
  const BYTE* const ilimit = iend - 8;
363
- const U32 prefixStartIndex = ms->window.dictLimit;
364
370
  const BYTE* const base = ms->window.base;
371
+ const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
372
+ const U32 maxDistance = 1U << cParams->windowLog;
373
+ const U32 lowestValid = ms->window.lowLimit;
374
+ const U32 lowLimit = (endIndex - lowestValid > maxDistance) ? endIndex - maxDistance : lowestValid;
375
+ const U32 dictStartIndex = lowLimit;
376
+ const U32 dictLimit = ms->window.dictLimit;
377
+ const U32 prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit;
365
378
  const BYTE* const prefixStart = base + prefixStartIndex;
366
- const U32 dictStartIndex = ms->window.lowLimit;
367
379
  const BYTE* const dictBase = ms->window.dictBase;
368
380
  const BYTE* const dictStart = dictBase + dictStartIndex;
369
381
  const BYTE* const dictEnd = dictBase + prefixStartIndex;
@@ -371,6 +383,10 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
371
383
 
372
384
  DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize);
373
385
 
386
+ /* if extDict is invalidated due to maxDistance, switch to "regular" variant */
387
+ if (prefixStartIndex == dictStartIndex)
388
+ return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict);
389
+
374
390
  /* Search Loop */
375
391
  while (ip < ilimit) { /* < instead of <=, because (ip+1) */
376
392
  const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);
@@ -396,7 +412,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
396
412
  const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
397
413
  mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
398
414
  ip++;
399
- ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
415
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
400
416
  } else {
401
417
  if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
402
418
  const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
@@ -407,7 +423,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
407
423
  while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
408
424
  offset_2 = offset_1;
409
425
  offset_1 = offset;
410
- ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
426
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
411
427
 
412
428
  } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
413
429
  size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
@@ -432,23 +448,27 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
432
448
  }
433
449
  offset_2 = offset_1;
434
450
  offset_1 = offset;
435
- ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
451
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
436
452
 
437
453
  } else {
438
454
  ip += ((ip-anchor) >> kSearchStrength) + 1;
439
455
  continue;
440
456
  } }
441
457
 
442
- /* found a match : store it */
458
+ /* move to next sequence start */
443
459
  ip += mLength;
444
460
  anchor = ip;
445
461
 
446
462
  if (ip <= ilimit) {
447
- /* Fill Table */
448
- hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2;
449
- hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = current+2;
450
- hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);
451
- hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
463
+ /* Complementary insertion */
464
+ /* done after iLimit test, as candidates could be > iend-8 */
465
+ { U32 const indexToInsert = current+2;
466
+ hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
467
+ hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
468
+ hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
469
+ hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
470
+ }
471
+
452
472
  /* check immediate repcode */
453
473
  while (ip <= ilimit) {
454
474
  U32 const current2 = (U32)(ip-base);
@@ -475,7 +495,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
475
495
  rep[1] = offset_2;
476
496
 
477
497
  /* Return the last literals size */
478
- return iend - anchor;
498
+ return (size_t)(iend - anchor);
479
499
  }
480
500
 
481
501
 
@@ -13,7 +13,8 @@
13
13
 
14
14
 
15
15
  void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
16
- void const* end, ZSTD_dictTableLoadMethod_e dtlm)
16
+ const void* const end,
17
+ ZSTD_dictTableLoadMethod_e dtlm)
17
18
  {
18
19
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
19
20
  U32* const hashTable = ms->hashTable;
@@ -41,6 +42,7 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
41
42
  } } } }
42
43
  }
43
44
 
45
+
44
46
  FORCE_INLINE_TEMPLATE
45
47
  size_t ZSTD_compressBlock_fast_generic(
46
48
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@@ -58,7 +60,10 @@ size_t ZSTD_compressBlock_fast_generic(
58
60
  const BYTE* ip0 = istart;
59
61
  const BYTE* ip1;
60
62
  const BYTE* anchor = istart;
61
- const U32 prefixStartIndex = ms->window.dictLimit;
63
+ const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
64
+ const U32 maxDistance = 1U << cParams->windowLog;
65
+ const U32 validStartIndex = ms->window.dictLimit;
66
+ const U32 prefixStartIndex = (endIndex - validStartIndex > maxDistance) ? endIndex - maxDistance : validStartIndex;
62
67
  const BYTE* const prefixStart = base + prefixStartIndex;
63
68
  const BYTE* const iend = istart + srcSize;
64
69
  const BYTE* const ilimit = iend - HASH_READ_SIZE;
@@ -165,7 +170,7 @@ _match: /* Requires: ip0, match0, offcode */
165
170
  rep[1] = offset_2 ? offset_2 : offsetSaved;
166
171
 
167
172
  /* Return the last literals size */
168
- return iend - anchor;
173
+ return (size_t)(iend - anchor);
169
174
  }
170
175
 
171
176
 
@@ -222,8 +227,15 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
222
227
  const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart);
223
228
  const U32 dictHLog = dictCParams->hashLog;
224
229
 
225
- /* otherwise, we would get index underflow when translating a dict index
226
- * into a local index */
230
+ /* if a dictionary is still attached, it necessarily means that
231
+ * it is within window size. So we just check it. */
232
+ const U32 maxDistance = 1U << cParams->windowLog;
233
+ const U32 endIndex = (U32)((size_t)(ip - base) + srcSize);
234
+ assert(endIndex - prefixStartIndex <= maxDistance);
235
+ (void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */
236
+
237
+ /* ensure there will be no no underflow
238
+ * when translating a dict index into a local index */
227
239
  assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
228
240
 
229
241
  /* init */
@@ -251,7 +263,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
251
263
  const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
252
264
  mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
253
265
  ip++;
254
- ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
266
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
255
267
  } else if ( (matchIndex <= prefixStartIndex) ) {
256
268
  size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
257
269
  U32 const dictMatchIndex = dictHashTable[dictHash];
@@ -271,7 +283,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
271
283
  } /* catch up */
272
284
  offset_2 = offset_1;
273
285
  offset_1 = offset;
274
- ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
286
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
275
287
  }
276
288
  } else if (MEM_read32(match) != MEM_read32(ip)) {
277
289
  /* it's not a match, and we're not going to check the dictionary */
@@ -286,7 +298,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
286
298
  && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
287
299
  offset_2 = offset_1;
288
300
  offset_1 = offset;
289
- ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
301
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
290
302
  }
291
303
 
292
304
  /* match found */
@@ -327,7 +339,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
327
339
  rep[1] = offset_2 ? offset_2 : offsetSaved;
328
340
 
329
341
  /* Return the last literals size */
330
- return iend - anchor;
342
+ return (size_t)(iend - anchor);
331
343
  }
332
344
 
333
345
  size_t ZSTD_compressBlock_fast_dictMatchState(
@@ -366,15 +378,24 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
366
378
  const BYTE* const istart = (const BYTE*)src;
367
379
  const BYTE* ip = istart;
368
380
  const BYTE* anchor = istart;
369
- const U32 dictStartIndex = ms->window.lowLimit;
381
+ const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
382
+ const U32 maxDistance = 1U << cParams->windowLog;
383
+ const U32 validLow = ms->window.lowLimit;
384
+ const U32 lowLimit = (endIndex - validLow > maxDistance) ? endIndex - maxDistance : validLow;
385
+ const U32 dictStartIndex = lowLimit;
370
386
  const BYTE* const dictStart = dictBase + dictStartIndex;
371
- const U32 prefixStartIndex = ms->window.dictLimit;
387
+ const U32 dictLimit = ms->window.dictLimit;
388
+ const U32 prefixStartIndex = dictLimit < lowLimit ? lowLimit : dictLimit;
372
389
  const BYTE* const prefixStart = base + prefixStartIndex;
373
390
  const BYTE* const dictEnd = dictBase + prefixStartIndex;
374
391
  const BYTE* const iend = istart + srcSize;
375
392
  const BYTE* const ilimit = iend - 8;
376
393
  U32 offset_1=rep[0], offset_2=rep[1];
377
394
 
395
+ /* switch to "regular" variant if extDict is invalidated due to maxDistance */
396
+ if (prefixStartIndex == dictStartIndex)
397
+ return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls);
398
+
378
399
  /* Search Loop */
379
400
  while (ip < ilimit) { /* < instead of <=, because (ip+1) */
380
401
  const size_t h = ZSTD_hashPtr(ip, hlog, mls);
@@ -394,7 +415,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
394
415
  const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
395
416
  mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
396
417
  ip++;
397
- ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
418
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
398
419
  } else {
399
420
  if ( (matchIndex < dictStartIndex) ||
400
421
  (MEM_read32(match) != MEM_read32(ip)) ) {
@@ -410,7 +431,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
410
431
  offset = current - matchIndex;
411
432
  offset_2 = offset_1;
412
433
  offset_1 = offset;
413
- ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
434
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
414
435
  } }
415
436
 
416
437
  /* found a match : store it */
@@ -445,7 +466,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
445
466
  rep[1] = offset_2;
446
467
 
447
468
  /* Return the last literals size */
448
- return iend - anchor;
469
+ return (size_t)(iend - anchor);
449
470
  }
450
471
 
451
472
 
@@ -83,7 +83,10 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
83
83
  U32* largerPtr = smallerPtr + 1;
84
84
  U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
85
85
  U32 dummy32; /* to be nullified at the end */
86
- U32 const windowLow = ms->window.lowLimit;
86
+ U32 const windowValid = ms->window.lowLimit;
87
+ U32 const maxDistance = 1U << cParams->windowLog;
88
+ U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid;
89
+
87
90
 
88
91
  DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
89
92
  current, dictLimit, windowLow);
@@ -239,7 +242,9 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
239
242
 
240
243
  const BYTE* const base = ms->window.base;
241
244
  U32 const current = (U32)(ip-base);
242
- U32 const windowLow = ms->window.lowLimit;
245
+ U32 const maxDistance = 1U << cParams->windowLog;
246
+ U32 const windowValid = ms->window.lowLimit;
247
+ U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid;
243
248
 
244
249
  U32* const bt = ms->chainTable;
245
250
  U32 const btLog = cParams->chainLog - 1;
@@ -490,8 +495,10 @@ size_t ZSTD_HcFindBestMatch_generic (
490
495
  const U32 dictLimit = ms->window.dictLimit;
491
496
  const BYTE* const prefixStart = base + dictLimit;
492
497
  const BYTE* const dictEnd = dictBase + dictLimit;
493
- const U32 lowLimit = ms->window.lowLimit;
494
498
  const U32 current = (U32)(ip-base);
499
+ const U32 maxDistance = 1U << cParams->windowLog;
500
+ const U32 lowValid = ms->window.lowLimit;
501
+ const U32 lowLimit = (current - lowValid > maxDistance) ? current - maxDistance : lowValid;
495
502
  const U32 minChain = current > chainSize ? current - chainSize : 0;
496
503
  U32 nbAttempts = 1U << cParams->searchLog;
497
504
  size_t ml=4-1;
@@ -653,7 +660,6 @@ size_t ZSTD_compressBlock_lazy_generic(
653
660
 
654
661
  /* init */
655
662
  ip += (dictAndPrefixLength == 0);
656
- ms->nextToUpdate3 = ms->nextToUpdate;
657
663
  if (dictMode == ZSTD_noDict) {
658
664
  U32 const maxRep = (U32)(ip - prefixLowest);
659
665
  if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
@@ -933,7 +939,6 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
933
939
  U32 offset_1 = rep[0], offset_2 = rep[1];
934
940
 
935
941
  /* init */
936
- ms->nextToUpdate3 = ms->nextToUpdate;
937
942
  ip += (ip == prefixStart);
938
943
 
939
944
  /* Match Loop */