extzstd 0.3.2 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +3 -3
  3. data/contrib/zstd/CHANGELOG +188 -1
  4. data/contrib/zstd/CONTRIBUTING.md +157 -74
  5. data/contrib/zstd/LICENSE +4 -4
  6. data/contrib/zstd/Makefile +81 -58
  7. data/contrib/zstd/Package.swift +36 -0
  8. data/contrib/zstd/README.md +59 -35
  9. data/contrib/zstd/TESTING.md +2 -3
  10. data/contrib/zstd/appveyor.yml +49 -136
  11. data/contrib/zstd/lib/BUCK +5 -7
  12. data/contrib/zstd/lib/Makefile +87 -181
  13. data/contrib/zstd/lib/README.md +23 -6
  14. data/contrib/zstd/lib/common/allocations.h +55 -0
  15. data/contrib/zstd/lib/common/bits.h +200 -0
  16. data/contrib/zstd/lib/common/bitstream.h +33 -59
  17. data/contrib/zstd/lib/common/compiler.h +115 -45
  18. data/contrib/zstd/lib/common/cpu.h +1 -1
  19. data/contrib/zstd/lib/common/debug.c +1 -1
  20. data/contrib/zstd/lib/common/debug.h +1 -1
  21. data/contrib/zstd/lib/common/entropy_common.c +15 -37
  22. data/contrib/zstd/lib/common/error_private.c +9 -2
  23. data/contrib/zstd/lib/common/error_private.h +82 -3
  24. data/contrib/zstd/lib/common/fse.h +9 -85
  25. data/contrib/zstd/lib/common/fse_decompress.c +29 -111
  26. data/contrib/zstd/lib/common/huf.h +84 -172
  27. data/contrib/zstd/lib/common/mem.h +58 -49
  28. data/contrib/zstd/lib/common/pool.c +37 -16
  29. data/contrib/zstd/lib/common/pool.h +9 -3
  30. data/contrib/zstd/lib/common/portability_macros.h +156 -0
  31. data/contrib/zstd/lib/common/threading.c +68 -14
  32. data/contrib/zstd/lib/common/threading.h +5 -10
  33. data/contrib/zstd/lib/common/xxhash.c +7 -809
  34. data/contrib/zstd/lib/common/xxhash.h +5568 -167
  35. data/contrib/zstd/lib/common/zstd_common.c +1 -36
  36. data/contrib/zstd/lib/common/zstd_deps.h +1 -1
  37. data/contrib/zstd/lib/common/zstd_internal.h +64 -150
  38. data/contrib/zstd/lib/common/zstd_trace.h +163 -0
  39. data/contrib/zstd/lib/compress/clevels.h +134 -0
  40. data/contrib/zstd/lib/compress/fse_compress.c +69 -150
  41. data/contrib/zstd/lib/compress/hist.c +1 -1
  42. data/contrib/zstd/lib/compress/hist.h +1 -1
  43. data/contrib/zstd/lib/compress/huf_compress.c +773 -251
  44. data/contrib/zstd/lib/compress/zstd_compress.c +2650 -826
  45. data/contrib/zstd/lib/compress/zstd_compress_internal.h +509 -180
  46. data/contrib/zstd/lib/compress/zstd_compress_literals.c +117 -40
  47. data/contrib/zstd/lib/compress/zstd_compress_literals.h +16 -6
  48. data/contrib/zstd/lib/compress/zstd_compress_sequences.c +28 -19
  49. data/contrib/zstd/lib/compress/zstd_compress_sequences.h +1 -1
  50. data/contrib/zstd/lib/compress/zstd_compress_superblock.c +33 -305
  51. data/contrib/zstd/lib/compress/zstd_compress_superblock.h +1 -1
  52. data/contrib/zstd/lib/compress/zstd_cwksp.h +266 -85
  53. data/contrib/zstd/lib/compress/zstd_double_fast.c +369 -132
  54. data/contrib/zstd/lib/compress/zstd_double_fast.h +3 -2
  55. data/contrib/zstd/lib/compress/zstd_fast.c +722 -258
  56. data/contrib/zstd/lib/compress/zstd_fast.h +3 -2
  57. data/contrib/zstd/lib/compress/zstd_lazy.c +1105 -360
  58. data/contrib/zstd/lib/compress/zstd_lazy.h +41 -1
  59. data/contrib/zstd/lib/compress/zstd_ldm.c +272 -208
  60. data/contrib/zstd/lib/compress/zstd_ldm.h +3 -2
  61. data/contrib/zstd/lib/compress/zstd_ldm_geartab.h +106 -0
  62. data/contrib/zstd/lib/compress/zstd_opt.c +324 -197
  63. data/contrib/zstd/lib/compress/zstd_opt.h +1 -1
  64. data/contrib/zstd/lib/compress/zstdmt_compress.c +109 -53
  65. data/contrib/zstd/lib/compress/zstdmt_compress.h +9 -6
  66. data/contrib/zstd/lib/decompress/huf_decompress.c +1071 -539
  67. data/contrib/zstd/lib/decompress/huf_decompress_amd64.S +576 -0
  68. data/contrib/zstd/lib/decompress/zstd_ddict.c +4 -4
  69. data/contrib/zstd/lib/decompress/zstd_ddict.h +1 -1
  70. data/contrib/zstd/lib/decompress/zstd_decompress.c +507 -82
  71. data/contrib/zstd/lib/decompress/zstd_decompress_block.c +962 -310
  72. data/contrib/zstd/lib/decompress/zstd_decompress_block.h +14 -3
  73. data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +54 -6
  74. data/contrib/zstd/lib/deprecated/zbuff.h +1 -1
  75. data/contrib/zstd/lib/deprecated/zbuff_common.c +1 -1
  76. data/contrib/zstd/lib/deprecated/zbuff_compress.c +24 -4
  77. data/contrib/zstd/lib/deprecated/zbuff_decompress.c +3 -1
  78. data/contrib/zstd/lib/dictBuilder/cover.c +44 -32
  79. data/contrib/zstd/lib/dictBuilder/cover.h +6 -5
  80. data/contrib/zstd/lib/dictBuilder/divsufsort.c +1 -1
  81. data/contrib/zstd/lib/dictBuilder/fastcover.c +24 -16
  82. data/contrib/zstd/lib/dictBuilder/zdict.c +88 -95
  83. data/contrib/zstd/lib/legacy/zstd_legacy.h +8 -1
  84. data/contrib/zstd/lib/legacy/zstd_v01.c +16 -53
  85. data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
  86. data/contrib/zstd/lib/legacy/zstd_v02.c +24 -69
  87. data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
  88. data/contrib/zstd/lib/legacy/zstd_v03.c +25 -72
  89. data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
  90. data/contrib/zstd/lib/legacy/zstd_v04.c +23 -69
  91. data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
  92. data/contrib/zstd/lib/legacy/zstd_v05.c +35 -85
  93. data/contrib/zstd/lib/legacy/zstd_v05.h +1 -1
  94. data/contrib/zstd/lib/legacy/zstd_v06.c +42 -87
  95. data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
  96. data/contrib/zstd/lib/legacy/zstd_v07.c +35 -82
  97. data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
  98. data/contrib/zstd/lib/libzstd.mk +214 -0
  99. data/contrib/zstd/lib/libzstd.pc.in +4 -3
  100. data/contrib/zstd/lib/module.modulemap +35 -0
  101. data/contrib/zstd/lib/{dictBuilder/zdict.h → zdict.h} +202 -33
  102. data/contrib/zstd/lib/zstd.h +922 -293
  103. data/contrib/zstd/lib/{common/zstd_errors.h → zstd_errors.h} +27 -8
  104. data/ext/extconf.rb +7 -6
  105. data/ext/extzstd.c +13 -10
  106. data/ext/libzstd_conf.h +0 -1
  107. data/ext/zstd_decompress_asm.S +1 -0
  108. metadata +16 -5
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -20,6 +20,7 @@
20
20
 
21
21
 
22
22
  /* ====== Dependencies ====== */
23
+ #include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
23
24
  #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset, INT_MAX, UINT_MAX */
24
25
  #include "../common/mem.h" /* MEM_STATIC */
25
26
  #include "../common/pool.h" /* threadpool */
@@ -102,9 +103,8 @@ typedef struct ZSTDMT_bufferPool_s {
102
103
  buffer_t bTable[1]; /* variable size */
103
104
  } ZSTDMT_bufferPool;
104
105
 
105
- static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbWorkers, ZSTD_customMem cMem)
106
+ static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned maxNbBuffers, ZSTD_customMem cMem)
106
107
  {
107
- unsigned const maxNbBuffers = 2*nbWorkers + 3;
108
108
  ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)ZSTD_customCalloc(
109
109
  sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t), cMem);
110
110
  if (bufPool==NULL) return NULL;
@@ -160,9 +160,8 @@ static void ZSTDMT_setBufferSize(ZSTDMT_bufferPool* const bufPool, size_t const
160
160
  }
161
161
 
162
162
 
163
- static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool, U32 nbWorkers)
163
+ static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool, unsigned maxNbBuffers)
164
164
  {
165
- unsigned const maxNbBuffers = 2*nbWorkers + 3;
166
165
  if (srcBufPool==NULL) return NULL;
167
166
  if (srcBufPool->totalBuffers >= maxNbBuffers) /* good enough */
168
167
  return srcBufPool;
@@ -171,7 +170,7 @@ static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool,
171
170
  size_t const bSize = srcBufPool->bufferSize; /* forward parameters */
172
171
  ZSTDMT_bufferPool* newBufPool;
173
172
  ZSTDMT_freeBufferPool(srcBufPool);
174
- newBufPool = ZSTDMT_createBufferPool(nbWorkers, cMem);
173
+ newBufPool = ZSTDMT_createBufferPool(maxNbBuffers, cMem);
175
174
  if (newBufPool==NULL) return newBufPool;
176
175
  ZSTDMT_setBufferSize(newBufPool, bSize);
177
176
  return newBufPool;
@@ -263,6 +262,16 @@ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf)
263
262
  ZSTD_customFree(buf.start, bufPool->cMem);
264
263
  }
265
264
 
265
+ /* We need 2 output buffers per worker since each dstBuff must be flushed after it is released.
266
+ * The 3 additional buffers are as follows:
267
+ * 1 buffer for input loading
268
+ * 1 buffer for "next input" when submitting current one
269
+ * 1 buffer stuck in queue */
270
+ #define BUF_POOL_MAX_NB_BUFFERS(nbWorkers) (2*(nbWorkers) + 3)
271
+
272
+ /* After a worker releases its rawSeqStore, it is immediately ready for reuse.
273
+ * So we only need one seq buffer per worker. */
274
+ #define SEQ_POOL_MAX_NB_BUFFERS(nbWorkers) (nbWorkers)
266
275
 
267
276
  /* ===== Seq Pool Wrapper ====== */
268
277
 
@@ -316,7 +325,7 @@ static void ZSTDMT_setNbSeq(ZSTDMT_seqPool* const seqPool, size_t const nbSeq)
316
325
 
317
326
  static ZSTDMT_seqPool* ZSTDMT_createSeqPool(unsigned nbWorkers, ZSTD_customMem cMem)
318
327
  {
319
- ZSTDMT_seqPool* const seqPool = ZSTDMT_createBufferPool(nbWorkers, cMem);
328
+ ZSTDMT_seqPool* const seqPool = ZSTDMT_createBufferPool(SEQ_POOL_MAX_NB_BUFFERS(nbWorkers), cMem);
320
329
  if (seqPool == NULL) return NULL;
321
330
  ZSTDMT_setNbSeq(seqPool, 0);
322
331
  return seqPool;
@@ -329,7 +338,7 @@ static void ZSTDMT_freeSeqPool(ZSTDMT_seqPool* seqPool)
329
338
 
330
339
  static ZSTDMT_seqPool* ZSTDMT_expandSeqPool(ZSTDMT_seqPool* pool, U32 nbWorkers)
331
340
  {
332
- return ZSTDMT_expandBufferPool(pool, nbWorkers);
341
+ return ZSTDMT_expandBufferPool(pool, SEQ_POOL_MAX_NB_BUFFERS(nbWorkers));
333
342
  }
334
343
 
335
344
 
@@ -467,29 +476,27 @@ ZSTDMT_serialState_reset(serialState_t* serialState,
467
476
  ZSTD_dictContentType_e dictContentType)
468
477
  {
469
478
  /* Adjust parameters */
470
- if (params.ldmParams.enableLdm) {
479
+ if (params.ldmParams.enableLdm == ZSTD_ps_enable) {
471
480
  DEBUGLOG(4, "LDM window size = %u KB", (1U << params.cParams.windowLog) >> 10);
472
481
  ZSTD_ldm_adjustParameters(&params.ldmParams, &params.cParams);
473
482
  assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
474
483
  assert(params.ldmParams.hashRateLog < 32);
475
- serialState->ldmState.hashPower =
476
- ZSTD_rollingHash_primePower(params.ldmParams.minMatchLength);
477
484
  } else {
478
485
  ZSTD_memset(&params.ldmParams, 0, sizeof(params.ldmParams));
479
486
  }
480
487
  serialState->nextJobID = 0;
481
488
  if (params.fParams.checksumFlag)
482
489
  XXH64_reset(&serialState->xxhState, 0);
483
- if (params.ldmParams.enableLdm) {
490
+ if (params.ldmParams.enableLdm == ZSTD_ps_enable) {
484
491
  ZSTD_customMem cMem = params.customMem;
485
492
  unsigned const hashLog = params.ldmParams.hashLog;
486
493
  size_t const hashSize = ((size_t)1 << hashLog) * sizeof(ldmEntry_t);
487
494
  unsigned const bucketLog =
488
495
  params.ldmParams.hashLog - params.ldmParams.bucketSizeLog;
489
- size_t const bucketSize = (size_t)1 << bucketLog;
490
496
  unsigned const prevBucketLog =
491
497
  serialState->params.ldmParams.hashLog -
492
498
  serialState->params.ldmParams.bucketSizeLog;
499
+ size_t const numBuckets = (size_t)1 << bucketLog;
493
500
  /* Size the seq pool tables */
494
501
  ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, jobSize));
495
502
  /* Reset the window */
@@ -501,20 +508,20 @@ ZSTDMT_serialState_reset(serialState_t* serialState,
501
508
  }
502
509
  if (serialState->ldmState.bucketOffsets == NULL || prevBucketLog < bucketLog) {
503
510
  ZSTD_customFree(serialState->ldmState.bucketOffsets, cMem);
504
- serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_customMalloc(bucketSize, cMem);
511
+ serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_customMalloc(numBuckets, cMem);
505
512
  }
506
513
  if (!serialState->ldmState.hashTable || !serialState->ldmState.bucketOffsets)
507
514
  return 1;
508
515
  /* Zero the tables */
509
516
  ZSTD_memset(serialState->ldmState.hashTable, 0, hashSize);
510
- ZSTD_memset(serialState->ldmState.bucketOffsets, 0, bucketSize);
517
+ ZSTD_memset(serialState->ldmState.bucketOffsets, 0, numBuckets);
511
518
 
512
519
  /* Update window state and fill hash table with dict */
513
520
  serialState->ldmState.loadedDictEnd = 0;
514
521
  if (dictSize > 0) {
515
522
  if (dictContentType == ZSTD_dct_rawContent) {
516
523
  BYTE const* const dictEnd = (const BYTE*)dict + dictSize;
517
- ZSTD_window_update(&serialState->ldmState.window, dict, dictSize);
524
+ ZSTD_window_update(&serialState->ldmState.window, dict, dictSize, /* forceNonContiguous */ 0);
518
525
  ZSTD_ldm_fillHashTable(&serialState->ldmState, (const BYTE*)dict, dictEnd, &params.ldmParams);
519
526
  serialState->ldmState.loadedDictEnd = params.forceWindow ? 0 : (U32)(dictEnd - serialState->ldmState.window.base);
520
527
  } else {
@@ -566,12 +573,12 @@ static void ZSTDMT_serialState_update(serialState_t* serialState,
566
573
  /* A future job may error and skip our job */
567
574
  if (serialState->nextJobID == jobID) {
568
575
  /* It is now our turn, do any processing necessary */
569
- if (serialState->params.ldmParams.enableLdm) {
576
+ if (serialState->params.ldmParams.enableLdm == ZSTD_ps_enable) {
570
577
  size_t error;
571
578
  assert(seqStore.seq != NULL && seqStore.pos == 0 &&
572
579
  seqStore.size == 0 && seqStore.capacity > 0);
573
580
  assert(src.size <= serialState->params.jobSize);
574
- ZSTD_window_update(&serialState->ldmState.window, src.start, src.size);
581
+ ZSTD_window_update(&serialState->ldmState.window, src.start, src.size, /* forceNonContiguous */ 0);
575
582
  error = ZSTD_ldm_generateSequences(
576
583
  &serialState->ldmState, &seqStore,
577
584
  &serialState->params.ldmParams, src.start, src.size);
@@ -596,7 +603,7 @@ static void ZSTDMT_serialState_update(serialState_t* serialState,
596
603
  if (seqStore.size > 0) {
597
604
  size_t const err = ZSTD_referenceExternalSequences(
598
605
  jobCCtx, seqStore.seq, seqStore.size);
599
- assert(serialState->params.ldmParams.enableLdm);
606
+ assert(serialState->params.ldmParams.enableLdm == ZSTD_ps_enable);
600
607
  assert(!ZSTD_isError(err));
601
608
  (void)err;
602
609
  }
@@ -674,7 +681,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
674
681
  if (dstBuff.start==NULL) JOB_ERROR(ERROR(memory_allocation));
675
682
  job->dstBuff = dstBuff; /* this value can be read in ZSTDMT_flush, when it copies the whole job */
676
683
  }
677
- if (jobParams.ldmParams.enableLdm && rawSeqStore.seq == NULL)
684
+ if (jobParams.ldmParams.enableLdm == ZSTD_ps_enable && rawSeqStore.seq == NULL)
678
685
  JOB_ERROR(ERROR(memory_allocation));
679
686
 
680
687
  /* Don't compute the checksum for chunks, since we compute it externally,
@@ -682,7 +689,9 @@ static void ZSTDMT_compressionJob(void* jobDescription)
682
689
  */
683
690
  if (job->jobID != 0) jobParams.fParams.checksumFlag = 0;
684
691
  /* Don't run LDM for the chunks, since we handle it externally */
685
- jobParams.ldmParams.enableLdm = 0;
692
+ jobParams.ldmParams.enableLdm = ZSTD_ps_disable;
693
+ /* Correct nbWorkers to 0. */
694
+ jobParams.nbWorkers = 0;
686
695
 
687
696
 
688
697
  /* init */
@@ -695,6 +704,10 @@ static void ZSTDMT_compressionJob(void* jobDescription)
695
704
  { size_t const forceWindowError = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_forceMaxWindow, !job->firstJob);
696
705
  if (ZSTD_isError(forceWindowError)) JOB_ERROR(forceWindowError);
697
706
  }
707
+ if (!job->firstJob) {
708
+ size_t const err = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_deterministicRefPrefix, 0);
709
+ if (ZSTD_isError(err)) JOB_ERROR(err);
710
+ }
698
711
  { size_t const initError = ZSTD_compressBegin_advanced_internal(cctx,
699
712
  job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
700
713
  ZSTD_dtlm_fast,
@@ -707,7 +720,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
707
720
  ZSTDMT_serialState_update(job->serial, cctx, rawSeqStore, job->src, job->jobID);
708
721
 
709
722
  if (!job->firstJob) { /* flush and overwrite frame header when it's not first job */
710
- size_t const hSize = ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.capacity, job->src.start, 0);
723
+ size_t const hSize = ZSTD_compressContinue_public(cctx, dstBuff.start, dstBuff.capacity, job->src.start, 0);
711
724
  if (ZSTD_isError(hSize)) JOB_ERROR(hSize);
712
725
  DEBUGLOG(5, "ZSTDMT_compressionJob: flush and overwrite %u bytes of frame header (not first job)", (U32)hSize);
713
726
  ZSTD_invalidateRepCodes(cctx);
@@ -725,7 +738,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
725
738
  DEBUGLOG(5, "ZSTDMT_compressionJob: compress %u bytes in %i blocks", (U32)job->src.size, nbChunks);
726
739
  assert(job->cSize == 0);
727
740
  for (chunkNb = 1; chunkNb < nbChunks; chunkNb++) {
728
- size_t const cSize = ZSTD_compressContinue(cctx, op, oend-op, ip, chunkSize);
741
+ size_t const cSize = ZSTD_compressContinue_public(cctx, op, oend-op, ip, chunkSize);
729
742
  if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
730
743
  ip += chunkSize;
731
744
  op += cSize; assert(op < oend);
@@ -745,11 +758,18 @@ static void ZSTDMT_compressionJob(void* jobDescription)
745
758
  size_t const lastBlockSize1 = job->src.size & (chunkSize-1);
746
759
  size_t const lastBlockSize = ((lastBlockSize1==0) & (job->src.size>=chunkSize)) ? chunkSize : lastBlockSize1;
747
760
  size_t const cSize = (job->lastJob) ?
748
- ZSTD_compressEnd (cctx, op, oend-op, ip, lastBlockSize) :
749
- ZSTD_compressContinue(cctx, op, oend-op, ip, lastBlockSize);
761
+ ZSTD_compressEnd_public(cctx, op, oend-op, ip, lastBlockSize) :
762
+ ZSTD_compressContinue_public(cctx, op, oend-op, ip, lastBlockSize);
750
763
  if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
751
764
  lastCBlockSize = cSize;
752
765
  } }
766
+ if (!job->firstJob) {
767
+ /* Double check that we don't have an ext-dict, because then our
768
+ * repcode invalidation doesn't work.
769
+ */
770
+ assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window));
771
+ }
772
+ ZSTD_CCtx_trace(cctx, 0);
753
773
 
754
774
  _endJob:
755
775
  ZSTDMT_serialState_ensureFinished(job->serial, job->jobID, job->cSize);
@@ -796,6 +816,15 @@ typedef struct {
796
816
  static const roundBuff_t kNullRoundBuff = {NULL, 0, 0};
797
817
 
798
818
  #define RSYNC_LENGTH 32
819
+ /* Don't create chunks smaller than the zstd block size.
820
+ * This stops us from regressing compression ratio too much,
821
+ * and ensures our output fits in ZSTD_compressBound().
822
+ *
823
+ * If this is shrunk < ZSTD_BLOCKSIZELOG_MIN then
824
+ * ZSTD_COMPRESSBOUND() will need to be updated.
825
+ */
826
+ #define RSYNC_MIN_BLOCK_LOG ZSTD_BLOCKSIZELOG_MAX
827
+ #define RSYNC_MIN_BLOCK_SIZE (1<<RSYNC_MIN_BLOCK_LOG)
799
828
 
800
829
  typedef struct {
801
830
  U64 hash;
@@ -916,7 +945,7 @@ MEM_STATIC ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced_internal(unsigned nbWorkers,
916
945
  mtctx->jobs = ZSTDMT_createJobsTable(&nbJobs, cMem);
917
946
  assert(nbJobs > 0); assert((nbJobs & (nbJobs - 1)) == 0); /* ensure nbJobs is a power of 2 */
918
947
  mtctx->jobIDMask = nbJobs - 1;
919
- mtctx->bufPool = ZSTDMT_createBufferPool(nbWorkers, cMem);
948
+ mtctx->bufPool = ZSTDMT_createBufferPool(BUF_POOL_MAX_NB_BUFFERS(nbWorkers), cMem);
920
949
  mtctx->cctxPool = ZSTDMT_createCCtxPool(nbWorkers, cMem);
921
950
  mtctx->seqPool = ZSTDMT_createSeqPool(nbWorkers, cMem);
922
951
  initError = ZSTDMT_serialState_init(&mtctx->serial);
@@ -1019,7 +1048,7 @@ static size_t ZSTDMT_resize(ZSTDMT_CCtx* mtctx, unsigned nbWorkers)
1019
1048
  {
1020
1049
  if (POOL_resize(mtctx->factory, nbWorkers)) return ERROR(memory_allocation);
1021
1050
  FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbWorkers) , "");
1022
- mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, nbWorkers);
1051
+ mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, BUF_POOL_MAX_NB_BUFFERS(nbWorkers));
1023
1052
  if (mtctx->bufPool == NULL) return ERROR(memory_allocation);
1024
1053
  mtctx->cctxPool = ZSTDMT_expandCCtxPool(mtctx->cctxPool, nbWorkers);
1025
1054
  if (mtctx->cctxPool == NULL) return ERROR(memory_allocation);
@@ -1124,7 +1153,7 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
1124
1153
  static unsigned ZSTDMT_computeTargetJobLog(const ZSTD_CCtx_params* params)
1125
1154
  {
1126
1155
  unsigned jobLog;
1127
- if (params->ldmParams.enableLdm) {
1156
+ if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
1128
1157
  /* In Long Range Mode, the windowLog is typically oversized.
1129
1158
  * In which case, it's preferable to determine the jobSize
1130
1159
  * based on cycleLog instead. */
@@ -1168,7 +1197,7 @@ static size_t ZSTDMT_computeOverlapSize(const ZSTD_CCtx_params* params)
1168
1197
  int const overlapRLog = 9 - ZSTDMT_overlapLog(params->overlapLog, params->cParams.strategy);
1169
1198
  int ovLog = (overlapRLog >= 8) ? 0 : (params->cParams.windowLog - overlapRLog);
1170
1199
  assert(0 <= overlapRLog && overlapRLog <= 8);
1171
- if (params->ldmParams.enableLdm) {
1200
+ if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
1172
1201
  /* In Long Range Mode, the windowLog is typically oversized.
1173
1202
  * In which case, it's preferable to determine the jobSize
1174
1203
  * based on chainLog instead.
@@ -1239,9 +1268,11 @@ size_t ZSTDMT_initCStream_internal(
1239
1268
 
1240
1269
  if (params.rsyncable) {
1241
1270
  /* Aim for the targetsectionSize as the average job size. */
1242
- U32 const jobSizeMB = (U32)(mtctx->targetSectionSize >> 20);
1243
- U32 const rsyncBits = ZSTD_highbit32(jobSizeMB) + 20;
1244
- assert(jobSizeMB >= 1);
1271
+ U32 const jobSizeKB = (U32)(mtctx->targetSectionSize >> 10);
1272
+ U32 const rsyncBits = (assert(jobSizeKB >= 1), ZSTD_highbit32(jobSizeKB) + 10);
1273
+ /* We refuse to create jobs < RSYNC_MIN_BLOCK_SIZE bytes, so make sure our
1274
+ * expected job size is at least 4x larger. */
1275
+ assert(rsyncBits >= RSYNC_MIN_BLOCK_LOG + 2);
1245
1276
  DEBUGLOG(4, "rsyncLog = %u", rsyncBits);
1246
1277
  mtctx->rsync.hash = 0;
1247
1278
  mtctx->rsync.hitMask = (1ULL << rsyncBits) - 1;
@@ -1253,7 +1284,7 @@ size_t ZSTDMT_initCStream_internal(
1253
1284
  ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(mtctx->targetSectionSize));
1254
1285
  {
1255
1286
  /* If ldm is enabled we need windowSize space. */
1256
- size_t const windowSize = mtctx->params.ldmParams.enableLdm ? (1U << mtctx->params.cParams.windowLog) : 0;
1287
+ size_t const windowSize = mtctx->params.ldmParams.enableLdm == ZSTD_ps_enable ? (1U << mtctx->params.cParams.windowLog) : 0;
1257
1288
  /* Two buffers of slack, plus extra space for the overlap
1258
1289
  * This is the minimum slack that LDM works with. One extra because
1259
1290
  * flush might waste up to targetSectionSize-1 bytes. Another extra
@@ -1528,17 +1559,21 @@ static range_t ZSTDMT_getInputDataInUse(ZSTDMT_CCtx* mtctx)
1528
1559
  static int ZSTDMT_isOverlapped(buffer_t buffer, range_t range)
1529
1560
  {
1530
1561
  BYTE const* const bufferStart = (BYTE const*)buffer.start;
1531
- BYTE const* const bufferEnd = bufferStart + buffer.capacity;
1532
1562
  BYTE const* const rangeStart = (BYTE const*)range.start;
1533
- BYTE const* const rangeEnd = range.size != 0 ? rangeStart + range.size : rangeStart;
1534
1563
 
1535
1564
  if (rangeStart == NULL || bufferStart == NULL)
1536
1565
  return 0;
1537
- /* Empty ranges cannot overlap */
1538
- if (bufferStart == bufferEnd || rangeStart == rangeEnd)
1539
- return 0;
1540
1566
 
1541
- return bufferStart < rangeEnd && rangeStart < bufferEnd;
1567
+ {
1568
+ BYTE const* const bufferEnd = bufferStart + buffer.capacity;
1569
+ BYTE const* const rangeEnd = rangeStart + range.size;
1570
+
1571
+ /* Empty ranges cannot overlap */
1572
+ if (bufferStart == bufferEnd || rangeStart == rangeEnd)
1573
+ return 0;
1574
+
1575
+ return bufferStart < rangeEnd && rangeStart < bufferEnd;
1576
+ }
1542
1577
  }
1543
1578
 
1544
1579
  static int ZSTDMT_doesOverlapWindow(buffer_t buffer, ZSTD_window_t window)
@@ -1565,7 +1600,7 @@ static int ZSTDMT_doesOverlapWindow(buffer_t buffer, ZSTD_window_t window)
1565
1600
 
1566
1601
  static void ZSTDMT_waitForLdmComplete(ZSTDMT_CCtx* mtctx, buffer_t buffer)
1567
1602
  {
1568
- if (mtctx->params.ldmParams.enableLdm) {
1603
+ if (mtctx->params.ldmParams.enableLdm == ZSTD_ps_enable) {
1569
1604
  ZSTD_pthread_mutex_t* mutex = &mtctx->serial.ldmWindowMutex;
1570
1605
  DEBUGLOG(5, "ZSTDMT_waitForLdmComplete");
1571
1606
  DEBUGLOG(5, "source [0x%zx, 0x%zx)",
@@ -1668,6 +1703,11 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
1668
1703
  if (!mtctx->params.rsyncable)
1669
1704
  /* Rsync is disabled. */
1670
1705
  return syncPoint;
1706
+ if (mtctx->inBuff.filled + input.size - input.pos < RSYNC_MIN_BLOCK_SIZE)
1707
+ /* We don't emit synchronization points if it would produce too small blocks.
1708
+ * We don't have enough input to find a synchronization point, so don't look.
1709
+ */
1710
+ return syncPoint;
1671
1711
  if (mtctx->inBuff.filled + syncPoint.toLoad < RSYNC_LENGTH)
1672
1712
  /* Not enough to compute the hash.
1673
1713
  * We will miss any synchronization points in this RSYNC_LENGTH byte
@@ -1678,10 +1718,28 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
1678
1718
  */
1679
1719
  return syncPoint;
1680
1720
  /* Initialize the loop variables. */
1681
- if (mtctx->inBuff.filled >= RSYNC_LENGTH) {
1682
- /* We have enough bytes buffered to initialize the hash.
1721
+ if (mtctx->inBuff.filled < RSYNC_MIN_BLOCK_SIZE) {
1722
+ /* We don't need to scan the first RSYNC_MIN_BLOCK_SIZE positions
1723
+ * because they can't possibly be a sync point. So we can start
1724
+ * part way through the input buffer.
1725
+ */
1726
+ pos = RSYNC_MIN_BLOCK_SIZE - mtctx->inBuff.filled;
1727
+ if (pos >= RSYNC_LENGTH) {
1728
+ prev = istart + pos - RSYNC_LENGTH;
1729
+ hash = ZSTD_rollingHash_compute(prev, RSYNC_LENGTH);
1730
+ } else {
1731
+ assert(mtctx->inBuff.filled >= RSYNC_LENGTH);
1732
+ prev = (BYTE const*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled - RSYNC_LENGTH;
1733
+ hash = ZSTD_rollingHash_compute(prev + pos, (RSYNC_LENGTH - pos));
1734
+ hash = ZSTD_rollingHash_append(hash, istart, pos);
1735
+ }
1736
+ } else {
1737
+ /* We have enough bytes buffered to initialize the hash,
1738
+ * and have processed enough bytes to find a sync point.
1683
1739
  * Start scanning at the beginning of the input.
1684
1740
  */
1741
+ assert(mtctx->inBuff.filled >= RSYNC_MIN_BLOCK_SIZE);
1742
+ assert(RSYNC_MIN_BLOCK_SIZE >= RSYNC_LENGTH);
1685
1743
  pos = 0;
1686
1744
  prev = (BYTE const*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled - RSYNC_LENGTH;
1687
1745
  hash = ZSTD_rollingHash_compute(prev, RSYNC_LENGTH);
@@ -1695,16 +1753,6 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
1695
1753
  syncPoint.flush = 1;
1696
1754
  return syncPoint;
1697
1755
  }
1698
- } else {
1699
- /* We don't have enough bytes buffered to initialize the hash, but
1700
- * we know we have at least RSYNC_LENGTH bytes total.
1701
- * Start scanning after the first RSYNC_LENGTH bytes less the bytes
1702
- * already buffered.
1703
- */
1704
- pos = RSYNC_LENGTH - mtctx->inBuff.filled;
1705
- prev = (BYTE const*)mtctx->inBuff.buffer.start - pos;
1706
- hash = ZSTD_rollingHash_compute(mtctx->inBuff.buffer.start, mtctx->inBuff.filled);
1707
- hash = ZSTD_rollingHash_append(hash, istart, pos);
1708
1756
  }
1709
1757
  /* Starting with the hash of the previous RSYNC_LENGTH bytes, roll
1710
1758
  * through the input. If we hit a synchronization point, then cut the
@@ -1714,16 +1762,24 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
1714
1762
  * then a block will be emitted anyways, but this is okay, since if we
1715
1763
  * are already synchronized we will remain synchronized.
1716
1764
  */
1765
+ assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash);
1717
1766
  for (; pos < syncPoint.toLoad; ++pos) {
1718
1767
  BYTE const toRemove = pos < RSYNC_LENGTH ? prev[pos] : istart[pos - RSYNC_LENGTH];
1719
- /* if (pos >= RSYNC_LENGTH) assert(ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash); */
1768
+ /* This assert is very expensive, and Debian compiles with asserts enabled.
1769
+ * So disable it for now. We can get similar coverage by checking it at the
1770
+ * beginning & end of the loop.
1771
+ * assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash);
1772
+ */
1720
1773
  hash = ZSTD_rollingHash_rotate(hash, toRemove, istart[pos], primePower);
1774
+ assert(mtctx->inBuff.filled + pos >= RSYNC_MIN_BLOCK_SIZE);
1721
1775
  if ((hash & hitMask) == hitMask) {
1722
1776
  syncPoint.toLoad = pos + 1;
1723
1777
  syncPoint.flush = 1;
1778
+ ++pos; /* for assert */
1724
1779
  break;
1725
1780
  }
1726
1781
  }
1782
+ assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash);
1727
1783
  return syncPoint;
1728
1784
  }
1729
1785
 
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -32,11 +32,11 @@
32
32
 
33
33
 
34
34
  /* === Constants === */
35
- #ifndef ZSTDMT_NBWORKERS_MAX
36
- # define ZSTDMT_NBWORKERS_MAX 200
35
+ #ifndef ZSTDMT_NBWORKERS_MAX /* a different value can be selected at compile time */
36
+ # define ZSTDMT_NBWORKERS_MAX ((sizeof(void*)==4) /*32-bit*/ ? 64 : 256)
37
37
  #endif
38
- #ifndef ZSTDMT_JOBSIZE_MIN
39
- # define ZSTDMT_JOBSIZE_MIN (1 MB)
38
+ #ifndef ZSTDMT_JOBSIZE_MIN /* a different value can be selected at compile time */
39
+ # define ZSTDMT_JOBSIZE_MIN (512 KB)
40
40
  #endif
41
41
  #define ZSTDMT_JOBLOG_MAX (MEM_32bits() ? 29 : 30)
42
42
  #define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (1024 MB))
@@ -65,8 +65,11 @@ size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx);
65
65
  * Private use only. Init streaming operation.
66
66
  * expects params to be valid.
67
67
  * must receive dict, or cdict, or none, but not both.
68
+ * mtctx can be freshly constructed or reused from a prior compression.
69
+ * If mtctx is reused, memory allocations from the prior compression may not be freed,
70
+ * even if they are not needed for the current compression.
68
71
  * @return : 0, or an error code */
69
- size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
72
+ size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* mtctx,
70
73
  const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType,
71
74
  const ZSTD_CDict* cdict,
72
75
  ZSTD_CCtx_params params, unsigned long long pledgedSrcSize);