zstdlib 0.7.0-x64-mingw32 → 0.8.0-x64-mingw32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGES.md +5 -0
  3. data/ext/zstdlib/extconf.rb +1 -1
  4. data/ext/zstdlib/ruby/zlib-3.0/zstdlib.c +4994 -0
  5. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/common/bitstream.h +25 -16
  6. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/common/compiler.h +118 -4
  7. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/common/cpu.h +1 -3
  8. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/common/debug.c +1 -1
  9. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/common/debug.h +12 -19
  10. data/ext/zstdlib/zstd-1.5.0/lib/common/entropy_common.c +362 -0
  11. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/common/error_private.c +2 -1
  12. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/common/error_private.h +3 -3
  13. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/common/fse.h +40 -12
  14. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/common/fse_decompress.c +139 -22
  15. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/common/huf.h +29 -7
  16. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/common/mem.h +69 -98
  17. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/common/pool.c +23 -17
  18. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/common/pool.h +2 -2
  19. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/common/threading.c +6 -5
  20. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/common/threading.h +0 -0
  21. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/common/xxhash.c +20 -60
  22. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/common/xxhash.h +2 -2
  23. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/common/zstd_common.c +10 -10
  24. data/ext/zstdlib/zstd-1.5.0/lib/common/zstd_deps.h +111 -0
  25. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/common/zstd_internal.h +105 -62
  26. data/ext/zstdlib/zstd-1.5.0/lib/common/zstd_trace.h +154 -0
  27. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/compress/fse_compress.c +31 -24
  28. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/compress/hist.c +27 -29
  29. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/compress/hist.h +2 -2
  30. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/compress/huf_compress.c +265 -126
  31. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/compress/zstd_compress.c +2843 -728
  32. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/compress/zstd_compress_internal.h +305 -63
  33. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/compress/zstd_compress_literals.c +8 -8
  34. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/compress/zstd_compress_literals.h +1 -1
  35. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/compress/zstd_compress_sequences.c +29 -7
  36. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/compress/zstd_compress_sequences.h +1 -1
  37. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/compress/zstd_compress_superblock.c +22 -295
  38. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/compress/zstd_compress_superblock.h +1 -1
  39. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/compress/zstd_cwksp.h +204 -67
  40. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/compress/zstd_double_fast.c +25 -25
  41. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/compress/zstd_double_fast.h +1 -1
  42. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/compress/zstd_fast.c +23 -23
  43. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/compress/zstd_fast.h +1 -1
  44. data/ext/zstdlib/zstd-1.5.0/lib/compress/zstd_lazy.c +2184 -0
  45. data/ext/zstdlib/zstd-1.5.0/lib/compress/zstd_lazy.h +125 -0
  46. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/compress/zstd_ldm.c +314 -211
  47. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/compress/zstd_ldm.h +9 -2
  48. data/ext/zstdlib/zstd-1.5.0/lib/compress/zstd_ldm_geartab.h +103 -0
  49. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/compress/zstd_opt.c +191 -46
  50. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/compress/zstd_opt.h +1 -1
  51. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/compress/zstdmt_compress.c +93 -415
  52. data/ext/zstdlib/zstd-1.5.0/lib/compress/zstdmt_compress.h +110 -0
  53. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/decompress/huf_decompress.c +342 -239
  54. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/decompress/zstd_ddict.c +9 -9
  55. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/decompress/zstd_ddict.h +2 -2
  56. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/decompress/zstd_decompress.c +369 -87
  57. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/decompress/zstd_decompress_block.c +191 -75
  58. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/decompress/zstd_decompress_block.h +6 -3
  59. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/decompress/zstd_decompress_internal.h +27 -11
  60. data/ext/zstdlib/zstd-1.5.0/lib/zdict.h +452 -0
  61. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/zstd.h +568 -126
  62. data/ext/zstdlib/{zstd-1.4.5/lib/common → zstd-1.5.0/lib}/zstd_errors.h +2 -1
  63. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/zlibWrapper/gzclose.c +0 -0
  64. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/zlibWrapper/gzcompatibility.h +1 -1
  65. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/zlibWrapper/gzguts.h +0 -0
  66. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/zlibWrapper/gzlib.c +0 -0
  67. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/zlibWrapper/gzread.c +0 -0
  68. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/zlibWrapper/gzwrite.c +0 -0
  69. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/zlibWrapper/zstd_zlibwrapper.c +126 -44
  70. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/zlibWrapper/zstd_zlibwrapper.h +1 -1
  71. data/lib/2.2/zstdlib.so +0 -0
  72. data/lib/2.3/zstdlib.so +0 -0
  73. data/lib/2.4/zstdlib.so +0 -0
  74. data/lib/2.5/zstdlib.so +0 -0
  75. data/lib/2.6/zstdlib.so +0 -0
  76. data/lib/2.7/zstdlib.so +0 -0
  77. metadata +69 -64
  78. data/ext/zstdlib/zstd-1.4.5/lib/common/entropy_common.c +0 -216
  79. data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_lazy.c +0 -1138
  80. data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_lazy.h +0 -67
  81. data/ext/zstdlib/zstd-1.4.5/lib/compress/zstdmt_compress.h +0 -192
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -14,7 +14,7 @@
14
14
  /*-*******************************************************
15
15
  * Dependencies
16
16
  *********************************************************/
17
- #include <string.h> /* memcpy, memmove, memset */
17
+ #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
18
18
  #include "../common/compiler.h" /* prefetch */
19
19
  #include "../common/cpu.h" /* bmi2 */
20
20
  #include "../common/mem.h" /* low level memory routines */
@@ -44,7 +44,7 @@
44
44
  /*_*******************************************************
45
45
  * Memory operations
46
46
  **********************************************************/
47
- static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
47
+ static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); }
48
48
 
49
49
 
50
50
  /*-*************************************************************
@@ -166,7 +166,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
166
166
  dctx->litSize = litSize;
167
167
  dctx->litEntropy = 1;
168
168
  if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
169
- memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
169
+ ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
170
170
  return litCSize + lhSize;
171
171
  }
172
172
 
@@ -191,10 +191,10 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
191
191
 
192
192
  if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */
193
193
  RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
194
- memcpy(dctx->litBuffer, istart+lhSize, litSize);
194
+ ZSTD_memcpy(dctx->litBuffer, istart+lhSize, litSize);
195
195
  dctx->litPtr = dctx->litBuffer;
196
196
  dctx->litSize = litSize;
197
- memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
197
+ ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
198
198
  return lhSize+litSize;
199
199
  }
200
200
  /* direct reference into compressed stream */
@@ -223,7 +223,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
223
223
  break;
224
224
  }
225
225
  RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
226
- memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
226
+ ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
227
227
  dctx->litPtr = dctx->litBuffer;
228
228
  dctx->litSize = litSize;
229
229
  return lhSize+1;
@@ -236,7 +236,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
236
236
 
237
237
  /* Default FSE distribution tables.
238
238
  * These are pre-calculated FSE decoding tables using default distributions as defined in specification :
239
- * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#default-distributions
239
+ * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions
240
240
  * They were generated programmatically with following method :
241
241
  * - start from default distributions, present in /lib/common/zstd_internal.h
242
242
  * - generate tables normally, using ZSTD_buildFSETable()
@@ -364,23 +364,26 @@ static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddB
364
364
  * generate FSE decoding table for one symbol (ll, ml or off)
365
365
  * cannot fail if input is valid =>
366
366
  * all inputs are presumed validated at this stage */
367
- void
368
- ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
367
+ FORCE_INLINE_TEMPLATE
368
+ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
369
369
  const short* normalizedCounter, unsigned maxSymbolValue,
370
370
  const U32* baseValue, const U32* nbAdditionalBits,
371
- unsigned tableLog)
371
+ unsigned tableLog, void* wksp, size_t wkspSize)
372
372
  {
373
373
  ZSTD_seqSymbol* const tableDecode = dt+1;
374
- U16 symbolNext[MaxSeq+1];
375
-
376
374
  U32 const maxSV1 = maxSymbolValue + 1;
377
375
  U32 const tableSize = 1 << tableLog;
378
- U32 highThreshold = tableSize-1;
376
+
377
+ U16* symbolNext = (U16*)wksp;
378
+ BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1);
379
+ U32 highThreshold = tableSize - 1;
380
+
379
381
 
380
382
  /* Sanity Checks */
381
383
  assert(maxSymbolValue <= MaxSeq);
382
384
  assert(tableLog <= MaxFSELog);
383
-
385
+ assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE);
386
+ (void)wkspSize;
384
387
  /* Init, lay down lowprob symbols */
385
388
  { ZSTD_seqSymbol_header DTableH;
386
389
  DTableH.tableLog = tableLog;
@@ -396,16 +399,69 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
396
399
  assert(normalizedCounter[s]>=0);
397
400
  symbolNext[s] = (U16)normalizedCounter[s];
398
401
  } } }
399
- memcpy(dt, &DTableH, sizeof(DTableH));
402
+ ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
400
403
  }
401
404
 
402
405
  /* Spread symbols */
403
- { U32 const tableMask = tableSize-1;
406
+ assert(tableSize <= 512);
407
+ /* Specialized symbol spreading for the case when there are
408
+ * no low probability (-1 count) symbols. When compressing
409
+ * small blocks we avoid low probability symbols to hit this
410
+ * case, since header decoding speed matters more.
411
+ */
412
+ if (highThreshold == tableSize - 1) {
413
+ size_t const tableMask = tableSize-1;
414
+ size_t const step = FSE_TABLESTEP(tableSize);
415
+ /* First lay down the symbols in order.
416
+ * We use a uint64_t to lay down 8 bytes at a time. This reduces branch
417
+ * misses since small blocks generally have small table logs, so nearly
418
+ * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
419
+ * our buffer to handle the over-write.
420
+ */
421
+ {
422
+ U64 const add = 0x0101010101010101ull;
423
+ size_t pos = 0;
424
+ U64 sv = 0;
425
+ U32 s;
426
+ for (s=0; s<maxSV1; ++s, sv += add) {
427
+ int i;
428
+ int const n = normalizedCounter[s];
429
+ MEM_write64(spread + pos, sv);
430
+ for (i = 8; i < n; i += 8) {
431
+ MEM_write64(spread + pos + i, sv);
432
+ }
433
+ pos += n;
434
+ }
435
+ }
436
+ /* Now we spread those positions across the table.
437
+ * The benefit of doing it in two stages is that we avoid the the
438
+ * variable size inner loop, which caused lots of branch misses.
439
+ * Now we can run through all the positions without any branch misses.
440
+ * We unroll the loop twice, since that is what emperically worked best.
441
+ */
442
+ {
443
+ size_t position = 0;
444
+ size_t s;
445
+ size_t const unroll = 2;
446
+ assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
447
+ for (s = 0; s < (size_t)tableSize; s += unroll) {
448
+ size_t u;
449
+ for (u = 0; u < unroll; ++u) {
450
+ size_t const uPosition = (position + (u * step)) & tableMask;
451
+ tableDecode[uPosition].baseValue = spread[s + u];
452
+ }
453
+ position = (position + (unroll * step)) & tableMask;
454
+ }
455
+ assert(position == 0);
456
+ }
457
+ } else {
458
+ U32 const tableMask = tableSize-1;
404
459
  U32 const step = FSE_TABLESTEP(tableSize);
405
460
  U32 s, position = 0;
406
461
  for (s=0; s<maxSV1; s++) {
407
462
  int i;
408
- for (i=0; i<normalizedCounter[s]; i++) {
463
+ int const n = normalizedCounter[s];
464
+ for (i=0; i<n; i++) {
409
465
  tableDecode[position].baseValue = s;
410
466
  position = (position + step) & tableMask;
411
467
  while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
@@ -414,7 +470,8 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
414
470
  }
415
471
 
416
472
  /* Build Decoding table */
417
- { U32 u;
473
+ {
474
+ U32 u;
418
475
  for (u=0; u<tableSize; u++) {
419
476
  U32 const symbol = tableDecode[u].baseValue;
420
477
  U32 const nextState = symbolNext[symbol]++;
@@ -423,7 +480,46 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
423
480
  assert(nbAdditionalBits[symbol] < 255);
424
481
  tableDecode[u].nbAdditionalBits = (BYTE)nbAdditionalBits[symbol];
425
482
  tableDecode[u].baseValue = baseValue[symbol];
426
- } }
483
+ }
484
+ }
485
+ }
486
+
487
+ /* Avoids the FORCE_INLINE of the _body() function. */
488
+ static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
489
+ const short* normalizedCounter, unsigned maxSymbolValue,
490
+ const U32* baseValue, const U32* nbAdditionalBits,
491
+ unsigned tableLog, void* wksp, size_t wkspSize)
492
+ {
493
+ ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
494
+ baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
495
+ }
496
+
497
+ #if DYNAMIC_BMI2
498
+ TARGET_ATTRIBUTE("bmi2") static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
499
+ const short* normalizedCounter, unsigned maxSymbolValue,
500
+ const U32* baseValue, const U32* nbAdditionalBits,
501
+ unsigned tableLog, void* wksp, size_t wkspSize)
502
+ {
503
+ ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
504
+ baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
505
+ }
506
+ #endif
507
+
508
+ void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
509
+ const short* normalizedCounter, unsigned maxSymbolValue,
510
+ const U32* baseValue, const U32* nbAdditionalBits,
511
+ unsigned tableLog, void* wksp, size_t wkspSize, int bmi2)
512
+ {
513
+ #if DYNAMIC_BMI2
514
+ if (bmi2) {
515
+ ZSTD_buildFSETable_body_bmi2(dt, normalizedCounter, maxSymbolValue,
516
+ baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
517
+ return;
518
+ }
519
+ #endif
520
+ (void)bmi2;
521
+ ZSTD_buildFSETable_body_default(dt, normalizedCounter, maxSymbolValue,
522
+ baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
427
523
  }
428
524
 
429
525
 
@@ -435,7 +531,8 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
435
531
  const void* src, size_t srcSize,
436
532
  const U32* baseValue, const U32* nbAdditionalBits,
437
533
  const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
438
- int ddictIsCold, int nbSeq)
534
+ int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize,
535
+ int bmi2)
439
536
  {
440
537
  switch(type)
441
538
  {
@@ -467,7 +564,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
467
564
  size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
468
565
  RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
469
566
  RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
470
- ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog);
567
+ ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2);
471
568
  *DTablePtr = DTableSpace;
472
569
  return headerSize;
473
570
  }
@@ -480,7 +577,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
480
577
  size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
481
578
  const void* src, size_t srcSize)
482
579
  {
483
- const BYTE* const istart = (const BYTE* const)src;
580
+ const BYTE* const istart = (const BYTE*)src;
484
581
  const BYTE* const iend = istart + srcSize;
485
582
  const BYTE* ip = istart;
486
583
  int nbSeq;
@@ -499,7 +596,8 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
499
596
  if (nbSeq > 0x7F) {
500
597
  if (nbSeq == 0xFF) {
501
598
  RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
502
- nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2;
599
+ nbSeq = MEM_readLE16(ip) + LONGNBSEQ;
600
+ ip+=2;
503
601
  } else {
504
602
  RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
505
603
  nbSeq = ((nbSeq-0x80)<<8) + *ip++;
@@ -520,7 +618,9 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
520
618
  ip, iend-ip,
521
619
  LL_base, LL_bits,
522
620
  LL_defaultDTable, dctx->fseEntropy,
523
- dctx->ddictIsCold, nbSeq);
621
+ dctx->ddictIsCold, nbSeq,
622
+ dctx->workspace, sizeof(dctx->workspace),
623
+ dctx->bmi2);
524
624
  RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
525
625
  ip += llhSize;
526
626
  }
@@ -530,7 +630,9 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
530
630
  ip, iend-ip,
531
631
  OF_base, OF_bits,
532
632
  OF_defaultDTable, dctx->fseEntropy,
533
- dctx->ddictIsCold, nbSeq);
633
+ dctx->ddictIsCold, nbSeq,
634
+ dctx->workspace, sizeof(dctx->workspace),
635
+ dctx->bmi2);
534
636
  RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
535
637
  ip += ofhSize;
536
638
  }
@@ -540,7 +642,9 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
540
642
  ip, iend-ip,
541
643
  ML_base, ML_bits,
542
644
  ML_defaultDTable, dctx->fseEntropy,
543
- dctx->ddictIsCold, nbSeq);
645
+ dctx->ddictIsCold, nbSeq,
646
+ dctx->workspace, sizeof(dctx->workspace),
647
+ dctx->bmi2);
544
648
  RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
545
649
  ip += mlhSize;
546
650
  }
@@ -554,7 +658,6 @@ typedef struct {
554
658
  size_t litLength;
555
659
  size_t matchLength;
556
660
  size_t offset;
557
- const BYTE* match;
558
661
  } seq_t;
559
662
 
560
663
  typedef struct {
@@ -568,9 +671,6 @@ typedef struct {
568
671
  ZSTD_fseState stateOffb;
569
672
  ZSTD_fseState stateML;
570
673
  size_t prevOffset[ZSTD_REP_NUM];
571
- const BYTE* prefixStart;
572
- const BYTE* dictEnd;
573
- size_t pos;
574
674
  } seqState_t;
575
675
 
576
676
  /*! ZSTD_overlapCopy8() :
@@ -686,12 +786,12 @@ size_t ZSTD_execSequenceEnd(BYTE* op,
686
786
  RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
687
787
  match = dictEnd - (prefixStart-match);
688
788
  if (match + sequence.matchLength <= dictEnd) {
689
- memmove(oLitEnd, match, sequence.matchLength);
789
+ ZSTD_memmove(oLitEnd, match, sequence.matchLength);
690
790
  return sequenceLength;
691
791
  }
692
792
  /* span extDict & currentPrefixSegment */
693
793
  { size_t const length1 = dictEnd - match;
694
- memmove(oLitEnd, match, length1);
794
+ ZSTD_memmove(oLitEnd, match, length1);
695
795
  op = oLitEnd + length1;
696
796
  sequence.matchLength -= length1;
697
797
  match = prefixStart;
@@ -752,12 +852,12 @@ size_t ZSTD_execSequence(BYTE* op,
752
852
  RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
753
853
  match = dictEnd + (match - prefixStart);
754
854
  if (match + sequence.matchLength <= dictEnd) {
755
- memmove(oLitEnd, match, sequence.matchLength);
855
+ ZSTD_memmove(oLitEnd, match, sequence.matchLength);
756
856
  return sequenceLength;
757
857
  }
758
858
  /* span extDict & currentPrefixSegment */
759
859
  { size_t const length1 = dictEnd - match;
760
- memmove(oLitEnd, match, length1);
860
+ ZSTD_memmove(oLitEnd, match, length1);
761
861
  op = oLitEnd + length1;
762
862
  sequence.matchLength -= length1;
763
863
  match = prefixStart;
@@ -832,10 +932,9 @@ ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD
832
932
  : 0)
833
933
 
834
934
  typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
835
- typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e;
836
935
 
837
936
  FORCE_INLINE_TEMPLATE seq_t
838
- ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const ZSTD_prefetch_e prefetch)
937
+ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
839
938
  {
840
939
  seq_t seq;
841
940
  ZSTD_seqSymbol const llDInfo = seqState->stateLL.table[seqState->stateLL.state];
@@ -910,14 +1009,6 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, c
910
1009
  DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
911
1010
  (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
912
1011
 
913
- if (prefetch == ZSTD_p_prefetch) {
914
- size_t const pos = seqState->pos + seq.litLength;
915
- const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
916
- seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
917
- * No consequence though : no memory access will occur, offset is only used for prefetching */
918
- seqState->pos = pos + seq.matchLength;
919
- }
920
-
921
1012
  /* ANS state update
922
1013
  * gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo().
923
1014
  * clang-9.2.0 does 7% worse with ZSTD_updateFseState().
@@ -948,7 +1039,7 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, c
948
1039
  }
949
1040
 
950
1041
  #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
951
- static int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
1042
+ MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
952
1043
  {
953
1044
  size_t const windowSize = dctx->fParams.windowSize;
954
1045
  /* No dictionary used. */
@@ -969,6 +1060,7 @@ MEM_STATIC void ZSTD_assertValidSequence(
969
1060
  seq_t const seq,
970
1061
  BYTE const* prefixStart, BYTE const* virtualStart)
971
1062
  {
1063
+ #if DEBUGLEVEL >= 1
972
1064
  size_t const windowSize = dctx->fParams.windowSize;
973
1065
  size_t const sequenceSize = seq.litLength + seq.matchLength;
974
1066
  BYTE const* const oLitEnd = op + seq.litLength;
@@ -986,6 +1078,9 @@ MEM_STATIC void ZSTD_assertValidSequence(
986
1078
  /* Offset must be within our window. */
987
1079
  assert(seq.offset <= windowSize);
988
1080
  }
1081
+ #else
1082
+ (void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart;
1083
+ #endif
989
1084
  }
990
1085
  #endif
991
1086
 
@@ -1000,7 +1095,7 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
1000
1095
  {
1001
1096
  const BYTE* ip = (const BYTE*)seqStart;
1002
1097
  const BYTE* const iend = ip + seqSize;
1003
- BYTE* const ostart = (BYTE* const)dst;
1098
+ BYTE* const ostart = (BYTE*)dst;
1004
1099
  BYTE* const oend = ostart + maxDstSize;
1005
1100
  BYTE* op = ostart;
1006
1101
  const BYTE* litPtr = dctx->litPtr;
@@ -1014,7 +1109,6 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
1014
1109
  /* Regen sequences */
1015
1110
  if (nbSeq) {
1016
1111
  seqState_t seqState;
1017
- size_t error = 0;
1018
1112
  dctx->fseEntropy = 1;
1019
1113
  { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
1020
1114
  RETURN_ERROR_IF(
@@ -1048,13 +1142,14 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
1048
1142
  * If you see most cycles served out of the DSB you've hit the good case.
1049
1143
  * If it is pretty even then you may be in an okay case.
1050
1144
  *
1051
- * I've been able to reproduce this issue on the following CPUs:
1145
+ * This issue has been reproduced on the following CPUs:
1052
1146
  * - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
1053
1147
  * Use Instruments->Counters to get DSB/MITE cycles.
1054
1148
  * I never got performance swings, but I was able to
1055
1149
  * go from the good case of mostly DSB to half of the
1056
1150
  * cycles served from MITE.
1057
1151
  * - Coffeelake: Intel i9-9900k
1152
+ * - Coffeelake: Intel i7-9700k
1058
1153
  *
1059
1154
  * I haven't been able to reproduce the instability or DSB misses on any
1060
1155
  * of the following CPUS:
@@ -1067,33 +1162,35 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
1067
1162
  *
1068
1163
  * https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
1069
1164
  */
1165
+ __asm__(".p2align 6");
1166
+ __asm__("nop");
1070
1167
  __asm__(".p2align 5");
1071
1168
  __asm__("nop");
1169
+ # if __GNUC__ >= 9
1170
+ /* better for gcc-9 and gcc-10, worse for clang and gcc-8 */
1171
+ __asm__(".p2align 3");
1172
+ # else
1072
1173
  __asm__(".p2align 4");
1174
+ # endif
1073
1175
  #endif
1074
1176
  for ( ; ; ) {
1075
- seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_noPrefetch);
1177
+ seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
1076
1178
  size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
1077
1179
  #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1078
1180
  assert(!ZSTD_isError(oneSeqSize));
1079
1181
  if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1080
1182
  #endif
1183
+ if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1184
+ return oneSeqSize;
1081
1185
  DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1186
+ op += oneSeqSize;
1187
+ if (UNLIKELY(!--nbSeq))
1188
+ break;
1082
1189
  BIT_reloadDStream(&(seqState.DStream));
1083
- /* gcc and clang both don't like early returns in this loop.
1084
- * gcc doesn't like early breaks either.
1085
- * Instead save an error and report it at the end.
1086
- * When there is an error, don't increment op, so we don't
1087
- * overwrite.
1088
- */
1089
- if (UNLIKELY(ZSTD_isError(oneSeqSize))) error = oneSeqSize;
1090
- else op += oneSeqSize;
1091
- if (UNLIKELY(!--nbSeq)) break;
1092
1190
  }
1093
1191
 
1094
1192
  /* check if reached exact end */
1095
1193
  DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
1096
- if (ZSTD_isError(error)) return error;
1097
1194
  RETURN_ERROR_IF(nbSeq, corruption_detected, "");
1098
1195
  RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
1099
1196
  /* save reps for next block */
@@ -1104,7 +1201,7 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
1104
1201
  { size_t const lastLLSize = litEnd - litPtr;
1105
1202
  RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1106
1203
  if (op != NULL) {
1107
- memcpy(op, litPtr, lastLLSize);
1204
+ ZSTD_memcpy(op, litPtr, lastLLSize);
1108
1205
  op += lastLLSize;
1109
1206
  }
1110
1207
  }
@@ -1124,6 +1221,24 @@ ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
1124
1221
  #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1125
1222
 
1126
1223
  #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1224
+
1225
+ FORCE_INLINE_TEMPLATE size_t
1226
+ ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
1227
+ const BYTE* const prefixStart, const BYTE* const dictEnd)
1228
+ {
1229
+ prefetchPos += sequence.litLength;
1230
+ { const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart;
1231
+ const BYTE* const match = matchBase + prefetchPos - sequence.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
1232
+ * No consequence though : memory address is only used for prefetching, not for dereferencing */
1233
+ PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
1234
+ }
1235
+ return prefetchPos + sequence.matchLength;
1236
+ }
1237
+
1238
+ /* This decoding function employs prefetching
1239
+ * to reduce latency impact of cache misses.
1240
+ * It's generally employed when block contains a significant portion of long-distance matches
1241
+ * or when coupled with a "cold" dictionary */
1127
1242
  FORCE_INLINE_TEMPLATE size_t
1128
1243
  ZSTD_decompressSequencesLong_body(
1129
1244
  ZSTD_DCtx* dctx,
@@ -1134,7 +1249,7 @@ ZSTD_decompressSequencesLong_body(
1134
1249
  {
1135
1250
  const BYTE* ip = (const BYTE*)seqStart;
1136
1251
  const BYTE* const iend = ip + seqSize;
1137
- BYTE* const ostart = (BYTE* const)dst;
1252
+ BYTE* const ostart = (BYTE*)dst;
1138
1253
  BYTE* const oend = ostart + maxDstSize;
1139
1254
  BYTE* op = ostart;
1140
1255
  const BYTE* litPtr = dctx->litPtr;
@@ -1146,18 +1261,17 @@ ZSTD_decompressSequencesLong_body(
1146
1261
 
1147
1262
  /* Regen sequences */
1148
1263
  if (nbSeq) {
1149
- #define STORED_SEQS 4
1264
+ #define STORED_SEQS 8
1150
1265
  #define STORED_SEQS_MASK (STORED_SEQS-1)
1151
- #define ADVANCED_SEQS 4
1266
+ #define ADVANCED_SEQS STORED_SEQS
1152
1267
  seq_t sequences[STORED_SEQS];
1153
1268
  int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
1154
1269
  seqState_t seqState;
1155
1270
  int seqNb;
1271
+ size_t prefetchPos = (size_t)(op-prefixStart); /* track position relative to prefixStart */
1272
+
1156
1273
  dctx->fseEntropy = 1;
1157
1274
  { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
1158
- seqState.prefixStart = prefixStart;
1159
- seqState.pos = (size_t)(op-prefixStart);
1160
- seqState.dictEnd = dictEnd;
1161
1275
  assert(dst != NULL);
1162
1276
  assert(iend >= ip);
1163
1277
  RETURN_ERROR_IF(
@@ -1169,21 +1283,23 @@ ZSTD_decompressSequencesLong_body(
1169
1283
 
1170
1284
  /* prepare in advance */
1171
1285
  for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
1172
- sequences[seqNb] = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
1173
- PREFETCH_L1(sequences[seqNb].match); PREFETCH_L1(sequences[seqNb].match + sequences[seqNb].matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
1286
+ seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
1287
+ prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
1288
+ sequences[seqNb] = sequence;
1174
1289
  }
1175
1290
  RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
1176
1291
 
1177
1292
  /* decode and decompress */
1178
1293
  for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
1179
- seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
1294
+ seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
1180
1295
  size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1181
1296
  #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1182
1297
  assert(!ZSTD_isError(oneSeqSize));
1183
1298
  if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
1184
1299
  #endif
1185
1300
  if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1186
- PREFETCH_L1(sequence.match); PREFETCH_L1(sequence.match + sequence.matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
1301
+
1302
+ prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
1187
1303
  sequences[seqNb & STORED_SEQS_MASK] = sequence;
1188
1304
  op += oneSeqSize;
1189
1305
  }
@@ -1209,7 +1325,7 @@ ZSTD_decompressSequencesLong_body(
1209
1325
  { size_t const lastLLSize = litEnd - litPtr;
1210
1326
  RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1211
1327
  if (op != NULL) {
1212
- memcpy(op, litPtr, lastLLSize);
1328
+ ZSTD_memcpy(op, litPtr, lastLLSize);
1213
1329
  op += lastLLSize;
1214
1330
  }
1215
1331
  }
@@ -1409,9 +1525,9 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
1409
1525
  }
1410
1526
 
1411
1527
 
1412
- void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst)
1528
+ void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
1413
1529
  {
1414
- if (dst != dctx->previousDstEnd) { /* not contiguous */
1530
+ if (dst != dctx->previousDstEnd && dstSize > 0) { /* not contiguous */
1415
1531
  dctx->dictEnd = dctx->previousDstEnd;
1416
1532
  dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
1417
1533
  dctx->prefixStart = dst;
@@ -1425,7 +1541,7 @@ size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
1425
1541
  const void* src, size_t srcSize)
1426
1542
  {
1427
1543
  size_t dSize;
1428
- ZSTD_checkContinuity(dctx, dst);
1544
+ ZSTD_checkContinuity(dctx, dst, dstCapacity);
1429
1545
  dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0);
1430
1546
  dctx->previousDstEnd = (char*)dst + dSize;
1431
1547
  return dSize;