zstd-ruby 1.5.5.1 → 1.5.6.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/ext/zstdruby/libzstd/common/allocations.h +1 -1
  4. data/ext/zstdruby/libzstd/common/bitstream.h +49 -29
  5. data/ext/zstdruby/libzstd/common/compiler.h +114 -22
  6. data/ext/zstdruby/libzstd/common/cpu.h +36 -0
  7. data/ext/zstdruby/libzstd/common/debug.c +6 -0
  8. data/ext/zstdruby/libzstd/common/debug.h +20 -11
  9. data/ext/zstdruby/libzstd/common/error_private.h +45 -36
  10. data/ext/zstdruby/libzstd/common/fse.h +3 -2
  11. data/ext/zstdruby/libzstd/common/fse_decompress.c +19 -17
  12. data/ext/zstdruby/libzstd/common/huf.h +14 -1
  13. data/ext/zstdruby/libzstd/common/mem.h +0 -9
  14. data/ext/zstdruby/libzstd/common/pool.c +1 -1
  15. data/ext/zstdruby/libzstd/common/pool.h +1 -1
  16. data/ext/zstdruby/libzstd/common/portability_macros.h +2 -0
  17. data/ext/zstdruby/libzstd/common/threading.c +8 -2
  18. data/ext/zstdruby/libzstd/common/xxhash.c +5 -11
  19. data/ext/zstdruby/libzstd/common/xxhash.h +2341 -1007
  20. data/ext/zstdruby/libzstd/common/zstd_internal.h +5 -5
  21. data/ext/zstdruby/libzstd/compress/fse_compress.c +8 -7
  22. data/ext/zstdruby/libzstd/compress/huf_compress.c +54 -25
  23. data/ext/zstdruby/libzstd/compress/zstd_compress.c +282 -161
  24. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +29 -27
  25. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +224 -113
  26. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +19 -13
  27. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +17 -5
  28. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +11 -0
  29. data/ext/zstdruby/libzstd/compress/zstd_fast.c +14 -6
  30. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +129 -87
  31. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +103 -28
  32. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +8 -2
  33. data/ext/zstdruby/libzstd/compress/zstd_opt.c +216 -112
  34. data/ext/zstdruby/libzstd/compress/zstd_opt.h +31 -7
  35. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +94 -79
  36. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +188 -126
  37. data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +38 -19
  38. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +84 -32
  39. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +231 -208
  40. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +1 -1
  41. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +2 -0
  42. data/ext/zstdruby/libzstd/dictBuilder/cover.c +16 -12
  43. data/ext/zstdruby/libzstd/dictBuilder/cover.h +2 -8
  44. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +2 -2
  45. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +12 -6
  46. data/ext/zstdruby/libzstd/zstd.h +129 -60
  47. data/ext/zstdruby/streaming_compress.c +1 -1
  48. data/ext/zstdruby/streaming_decompress.c +1 -1
  49. data/lib/zstd-ruby/version.rb +1 -1
  50. data/renovate.json +6 -0
  51. metadata +4 -3
@@ -39,7 +39,7 @@ extern "C" {
39
39
  It's not a big deal though : candidate will just be sorted again.
40
40
  Additionally, candidate position 1 will be lost.
41
41
  But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
42
- The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy.
42
+ The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table reuse with a different strategy.
43
43
  This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
44
44
 
45
45
 
@@ -159,23 +159,24 @@ typedef struct {
159
159
  UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0};
160
160
 
161
161
  typedef struct {
162
- int price;
163
- U32 off;
164
- U32 mlen;
165
- U32 litlen;
166
- U32 rep[ZSTD_REP_NUM];
162
+ int price; /* price from beginning of segment to this position */
163
+ U32 off; /* offset of previous match */
164
+ U32 mlen; /* length of previous match */
165
+ U32 litlen; /* nb of literals since previous match */
166
+ U32 rep[ZSTD_REP_NUM]; /* offset history after previous match */
167
167
  } ZSTD_optimal_t;
168
168
 
169
169
  typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e;
170
170
 
171
+ #define ZSTD_OPT_SIZE (ZSTD_OPT_NUM+3)
171
172
  typedef struct {
172
173
  /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */
173
174
  unsigned* litFreq; /* table of literals statistics, of size 256 */
174
175
  unsigned* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */
175
176
  unsigned* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */
176
177
  unsigned* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */
177
- ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */
178
- ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */
178
+ ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_SIZE */
179
+ ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_SIZE */
179
180
 
180
181
  U32 litSum; /* nb of literals */
181
182
  U32 litLengthSum; /* nb of litLength codes */
@@ -228,7 +229,7 @@ struct ZSTD_matchState_t {
228
229
  U32 rowHashLog; /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/
229
230
  BYTE* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */
230
231
  U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */
231
- U64 hashSalt; /* For row-based matchFinder: salts the hash for re-use of tag table */
232
+ U64 hashSalt; /* For row-based matchFinder: salts the hash for reuse of tag table */
232
233
  U32 hashSaltEntropy; /* For row-based matchFinder: collects entropy for salt generation */
233
234
 
234
235
  U32* hashTable;
@@ -360,10 +361,11 @@ struct ZSTD_CCtx_params_s {
360
361
  * if the external matchfinder returns an error code. */
361
362
  int enableMatchFinderFallback;
362
363
 
363
- /* Indicates whether an external matchfinder has been referenced.
364
- * Users can't set this externally.
365
- * It is set internally in ZSTD_registerSequenceProducer(). */
366
- int useSequenceProducer;
364
+ /* Parameters for the external sequence producer API.
365
+ * Users set these parameters through ZSTD_registerSequenceProducer().
366
+ * It is not possible to set these parameters individually through the public API. */
367
+ void* extSeqProdState;
368
+ ZSTD_sequenceProducer_F extSeqProdFunc;
367
369
 
368
370
  /* Adjust the max block size*/
369
371
  size_t maxBlockSize;
@@ -401,14 +403,6 @@ typedef struct {
401
403
  ZSTD_entropyCTablesMetadata_t entropyMetadata;
402
404
  } ZSTD_blockSplitCtx;
403
405
 
404
- /* Context for block-level external matchfinder API */
405
- typedef struct {
406
- void* mState;
407
- ZSTD_sequenceProducer_F* mFinder;
408
- ZSTD_Sequence* seqBuffer;
409
- size_t seqBufferCapacity;
410
- } ZSTD_externalMatchCtx;
411
-
412
406
  struct ZSTD_CCtx_s {
413
407
  ZSTD_compressionStage_e stage;
414
408
  int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
@@ -479,8 +473,9 @@ struct ZSTD_CCtx_s {
479
473
  /* Workspace for block splitter */
480
474
  ZSTD_blockSplitCtx blockSplitCtx;
481
475
 
482
- /* Workspace for external matchfinder */
483
- ZSTD_externalMatchCtx externalMatchCtx;
476
+ /* Buffer for output from external sequence producer */
477
+ ZSTD_Sequence* extSeqBuf;
478
+ size_t extSeqBufCapacity;
484
479
  };
485
480
 
486
481
  typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
@@ -1053,7 +1048,9 @@ MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
1053
1048
  * The least significant cycleLog bits of the indices must remain the same,
1054
1049
  * which may be 0. Every index up to maxDist in the past must be valid.
1055
1050
  */
1056
- MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
1051
+ MEM_STATIC
1052
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
1053
+ U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
1057
1054
  U32 maxDist, void const* src)
1058
1055
  {
1059
1056
  /* preemptive overflow correction:
@@ -1246,7 +1243,9 @@ MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
1246
1243
  * forget about the extDict. Handles overlap of the prefix and extDict.
1247
1244
  * Returns non-zero if the segment is contiguous.
1248
1245
  */
1249
- MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
1246
+ MEM_STATIC
1247
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
1248
+ U32 ZSTD_window_update(ZSTD_window_t* window,
1250
1249
  void const* src, size_t srcSize,
1251
1250
  int forceNonContiguous)
1252
1251
  {
@@ -1467,11 +1466,10 @@ size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
1467
1466
  * This cannot be used when long range matching is enabled.
1468
1467
  * Zstd will use these sequences, and pass the literals to a secondary block
1469
1468
  * compressor.
1470
- * @return : An error code on failure.
1471
1469
  * NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory
1472
1470
  * access and data corruption.
1473
1471
  */
1474
- size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
1472
+ void ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
1475
1473
 
1476
1474
  /** ZSTD_cycleLog() :
1477
1475
  * condition for correct operation : hashLog > 1 */
@@ -1509,6 +1507,10 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition*
1509
1507
  const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
1510
1508
  const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch);
1511
1509
 
1510
+ /* Returns 1 if an external sequence producer is registered, otherwise returns 0. */
1511
+ MEM_STATIC int ZSTD_hasExtSeqProd(const ZSTD_CCtx_params* params) {
1512
+ return params->extSeqProdFunc != NULL;
1513
+ }
1512
1514
 
1513
1515
  /* ===============================================================
1514
1516
  * Deprecated definitions that are still used internally to avoid
@@ -76,8 +76,8 @@ ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
76
76
  }
77
77
 
78
78
  { int const flags = bmi2 ? HUF_flags_bmi2 : 0;
79
- const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable, flags)
80
- : HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable, flags);
79
+ const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, (size_t)(oend-op), literals, litSize, hufTable, flags)
80
+ : HUF_compress4X_usingCTable(op, (size_t)(oend-op), literals, litSize, hufTable, flags);
81
81
  op += cSize;
82
82
  cLitSize += cSize;
83
83
  if (cSize == 0 || ERR_isError(cSize)) {
@@ -102,7 +102,7 @@ ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
102
102
  switch(lhSize)
103
103
  {
104
104
  case 3: /* 2 - 2 - 10 - 10 */
105
- { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14);
105
+ { U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14);
106
106
  MEM_writeLE24(ostart, lhc);
107
107
  break;
108
108
  }
@@ -122,30 +122,30 @@ ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
122
122
  }
123
123
  *entropyWritten = 1;
124
124
  DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)litSize, (U32)(op-ostart));
125
- return op-ostart;
125
+ return (size_t)(op-ostart);
126
126
  }
127
127
 
128
128
  static size_t
129
129
  ZSTD_seqDecompressedSize(seqStore_t const* seqStore,
130
- const seqDef* sequences, size_t nbSeq,
131
- size_t litSize, int lastSequence)
130
+ const seqDef* sequences, size_t nbSeqs,
131
+ size_t litSize, int lastSubBlock)
132
132
  {
133
- const seqDef* const sstart = sequences;
134
- const seqDef* const send = sequences + nbSeq;
135
- const seqDef* sp = sstart;
136
133
  size_t matchLengthSum = 0;
137
134
  size_t litLengthSum = 0;
138
- (void)(litLengthSum); /* suppress unused variable warning on some environments */
139
- while (send-sp > 0) {
140
- ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp);
135
+ size_t n;
136
+ for (n=0; n<nbSeqs; n++) {
137
+ const ZSTD_sequenceLength seqLen = ZSTD_getSequenceLength(seqStore, sequences+n);
141
138
  litLengthSum += seqLen.litLength;
142
139
  matchLengthSum += seqLen.matchLength;
143
- sp++;
144
140
  }
145
- assert(litLengthSum <= litSize);
146
- if (!lastSequence) {
141
+ DEBUGLOG(5, "ZSTD_seqDecompressedSize: %u sequences from %p: %u literals + %u matchlength",
142
+ (unsigned)nbSeqs, (const void*)sequences,
143
+ (unsigned)litLengthSum, (unsigned)matchLengthSum);
144
+ if (!lastSubBlock)
147
145
  assert(litLengthSum == litSize);
148
- }
146
+ else
147
+ assert(litLengthSum <= litSize);
148
+ (void)litLengthSum;
149
149
  return matchLengthSum + litSize;
150
150
  }
151
151
 
@@ -180,14 +180,14 @@ ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
180
180
  /* Sequences Header */
181
181
  RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/,
182
182
  dstSize_tooSmall, "");
183
- if (nbSeq < 0x7F)
183
+ if (nbSeq < 128)
184
184
  *op++ = (BYTE)nbSeq;
185
185
  else if (nbSeq < LONGNBSEQ)
186
186
  op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
187
187
  else
188
188
  op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
189
189
  if (nbSeq==0) {
190
- return op - ostart;
190
+ return (size_t)(op - ostart);
191
191
  }
192
192
 
193
193
  /* seqHead : flags for FSE encoding type */
@@ -209,7 +209,7 @@ ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
209
209
  }
210
210
 
211
211
  { size_t const bitstreamSize = ZSTD_encodeSequences(
212
- op, oend - op,
212
+ op, (size_t)(oend - op),
213
213
  fseTables->matchlengthCTable, mlCode,
214
214
  fseTables->offcodeCTable, ofCode,
215
215
  fseTables->litlengthCTable, llCode,
@@ -253,7 +253,7 @@ ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
253
253
  #endif
254
254
 
255
255
  *entropyWritten = 1;
256
- return op - ostart;
256
+ return (size_t)(op - ostart);
257
257
  }
258
258
 
259
259
  /** ZSTD_compressSubBlock() :
@@ -279,7 +279,8 @@ static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy,
279
279
  litSize, nbSeq, writeLitEntropy, writeSeqEntropy, lastBlock);
280
280
  { size_t cLitSize = ZSTD_compressSubBlock_literal((const HUF_CElt*)entropy->huf.CTable,
281
281
  &entropyMetadata->hufMetadata, literals, litSize,
282
- op, oend-op, bmi2, writeLitEntropy, litEntropyWritten);
282
+ op, (size_t)(oend-op),
283
+ bmi2, writeLitEntropy, litEntropyWritten);
283
284
  FORWARD_IF_ERROR(cLitSize, "ZSTD_compressSubBlock_literal failed");
284
285
  if (cLitSize == 0) return 0;
285
286
  op += cLitSize;
@@ -289,18 +290,18 @@ static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy,
289
290
  sequences, nbSeq,
290
291
  llCode, mlCode, ofCode,
291
292
  cctxParams,
292
- op, oend-op,
293
+ op, (size_t)(oend-op),
293
294
  bmi2, writeSeqEntropy, seqEntropyWritten);
294
295
  FORWARD_IF_ERROR(cSeqSize, "ZSTD_compressSubBlock_sequences failed");
295
296
  if (cSeqSize == 0) return 0;
296
297
  op += cSeqSize;
297
298
  }
298
299
  /* Write block header */
299
- { size_t cSize = (op-ostart)-ZSTD_blockHeaderSize;
300
+ { size_t cSize = (size_t)(op-ostart) - ZSTD_blockHeaderSize;
300
301
  U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
301
302
  MEM_writeLE24(ostart, cBlockHeader24);
302
303
  }
303
- return op-ostart;
304
+ return (size_t)(op-ostart);
304
305
  }
305
306
 
306
307
  static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize,
@@ -389,7 +390,11 @@ static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable,
389
390
  return cSeqSizeEstimate + sequencesSectionHeaderSize;
390
391
  }
391
392
 
392
- static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
393
+ typedef struct {
394
+ size_t estLitSize;
395
+ size_t estBlockSize;
396
+ } EstimatedBlockSize;
397
+ static EstimatedBlockSize ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
393
398
  const BYTE* ofCodeTable,
394
399
  const BYTE* llCodeTable,
395
400
  const BYTE* mlCodeTable,
@@ -397,15 +402,17 @@ static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
397
402
  const ZSTD_entropyCTables_t* entropy,
398
403
  const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
399
404
  void* workspace, size_t wkspSize,
400
- int writeLitEntropy, int writeSeqEntropy) {
401
- size_t cSizeEstimate = 0;
402
- cSizeEstimate += ZSTD_estimateSubBlockSize_literal(literals, litSize,
403
- &entropy->huf, &entropyMetadata->hufMetadata,
404
- workspace, wkspSize, writeLitEntropy);
405
- cSizeEstimate += ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
405
+ int writeLitEntropy, int writeSeqEntropy)
406
+ {
407
+ EstimatedBlockSize ebs;
408
+ ebs.estLitSize = ZSTD_estimateSubBlockSize_literal(literals, litSize,
409
+ &entropy->huf, &entropyMetadata->hufMetadata,
410
+ workspace, wkspSize, writeLitEntropy);
411
+ ebs.estBlockSize = ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
406
412
  nbSeq, &entropy->fse, &entropyMetadata->fseMetadata,
407
413
  workspace, wkspSize, writeSeqEntropy);
408
- return cSizeEstimate + ZSTD_blockHeaderSize;
414
+ ebs.estBlockSize += ebs.estLitSize + ZSTD_blockHeaderSize;
415
+ return ebs;
409
416
  }
410
417
 
411
418
  static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMetadata)
@@ -419,13 +426,56 @@ static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMe
419
426
  return 0;
420
427
  }
421
428
 
429
+ static size_t countLiterals(seqStore_t const* seqStore, const seqDef* sp, size_t seqCount)
430
+ {
431
+ size_t n, total = 0;
432
+ assert(sp != NULL);
433
+ for (n=0; n<seqCount; n++) {
434
+ total += ZSTD_getSequenceLength(seqStore, sp+n).litLength;
435
+ }
436
+ DEBUGLOG(6, "countLiterals for %zu sequences from %p => %zu bytes", seqCount, (const void*)sp, total);
437
+ return total;
438
+ }
439
+
440
+ #define BYTESCALE 256
441
+
442
+ static size_t sizeBlockSequences(const seqDef* sp, size_t nbSeqs,
443
+ size_t targetBudget, size_t avgLitCost, size_t avgSeqCost,
444
+ int firstSubBlock)
445
+ {
446
+ size_t n, budget = 0, inSize=0;
447
+ /* entropy headers */
448
+ size_t const headerSize = (size_t)firstSubBlock * 120 * BYTESCALE; /* generous estimate */
449
+ assert(firstSubBlock==0 || firstSubBlock==1);
450
+ budget += headerSize;
451
+
452
+ /* first sequence => at least one sequence*/
453
+ budget += sp[0].litLength * avgLitCost + avgSeqCost;
454
+ if (budget > targetBudget) return 1;
455
+ inSize = sp[0].litLength + (sp[0].mlBase+MINMATCH);
456
+
457
+ /* loop over sequences */
458
+ for (n=1; n<nbSeqs; n++) {
459
+ size_t currentCost = sp[n].litLength * avgLitCost + avgSeqCost;
460
+ budget += currentCost;
461
+ inSize += sp[n].litLength + (sp[n].mlBase+MINMATCH);
462
+ /* stop when sub-block budget is reached */
463
+ if ( (budget > targetBudget)
464
+ /* though continue to expand until the sub-block is deemed compressible */
465
+ && (budget < inSize * BYTESCALE) )
466
+ break;
467
+ }
468
+
469
+ return n;
470
+ }
471
+
422
472
  /** ZSTD_compressSubBlock_multi() :
423
473
  * Breaks super-block into multiple sub-blocks and compresses them.
424
- * Entropy will be written to the first block.
425
- * The following blocks will use repeat mode to compress.
426
- * All sub-blocks are compressed blocks (no raw or rle blocks).
427
- * @return : compressed size of the super block (which is multiple ZSTD blocks)
428
- * Or 0 if it failed to compress. */
474
+ * Entropy will be written into the first block.
475
+ * The following blocks use repeat_mode to compress.
476
+ * Sub-blocks are all compressed, except the last one when beneficial.
477
+ * @return : compressed size of the super block (which features multiple ZSTD blocks)
478
+ * or 0 if it failed to compress. */
429
479
  static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
430
480
  const ZSTD_compressedBlockState_t* prevCBlock,
431
481
  ZSTD_compressedBlockState_t* nextCBlock,
@@ -438,10 +488,12 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
438
488
  {
439
489
  const seqDef* const sstart = seqStorePtr->sequencesStart;
440
490
  const seqDef* const send = seqStorePtr->sequences;
441
- const seqDef* sp = sstart;
491
+ const seqDef* sp = sstart; /* tracks progresses within seqStorePtr->sequences */
492
+ size_t const nbSeqs = (size_t)(send - sstart);
442
493
  const BYTE* const lstart = seqStorePtr->litStart;
443
494
  const BYTE* const lend = seqStorePtr->lit;
444
495
  const BYTE* lp = lstart;
496
+ size_t const nbLiterals = (size_t)(lend - lstart);
445
497
  BYTE const* ip = (BYTE const*)src;
446
498
  BYTE const* const iend = ip + srcSize;
447
499
  BYTE* const ostart = (BYTE*)dst;
@@ -450,96 +502,152 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
450
502
  const BYTE* llCodePtr = seqStorePtr->llCode;
451
503
  const BYTE* mlCodePtr = seqStorePtr->mlCode;
452
504
  const BYTE* ofCodePtr = seqStorePtr->ofCode;
453
- size_t targetCBlockSize = cctxParams->targetCBlockSize;
454
- size_t litSize, seqCount;
455
- int writeLitEntropy = entropyMetadata->hufMetadata.hType == set_compressed;
505
+ size_t const minTarget = ZSTD_TARGETCBLOCKSIZE_MIN; /* enforce minimum size, to reduce undesirable side effects */
506
+ size_t const targetCBlockSize = MAX(minTarget, cctxParams->targetCBlockSize);
507
+ int writeLitEntropy = (entropyMetadata->hufMetadata.hType == set_compressed);
456
508
  int writeSeqEntropy = 1;
457
- int lastSequence = 0;
458
-
459
- DEBUGLOG(5, "ZSTD_compressSubBlock_multi (litSize=%u, nbSeq=%u)",
460
- (unsigned)(lend-lp), (unsigned)(send-sstart));
461
-
462
- litSize = 0;
463
- seqCount = 0;
464
- do {
465
- size_t cBlockSizeEstimate = 0;
466
- if (sstart == send) {
467
- lastSequence = 1;
468
- } else {
469
- const seqDef* const sequence = sp + seqCount;
470
- lastSequence = sequence == send - 1;
471
- litSize += ZSTD_getSequenceLength(seqStorePtr, sequence).litLength;
472
- seqCount++;
473
- }
474
- if (lastSequence) {
475
- assert(lp <= lend);
476
- assert(litSize <= (size_t)(lend - lp));
477
- litSize = (size_t)(lend - lp);
509
+
510
+ DEBUGLOG(5, "ZSTD_compressSubBlock_multi (srcSize=%u, litSize=%u, nbSeq=%u)",
511
+ (unsigned)srcSize, (unsigned)(lend-lstart), (unsigned)(send-sstart));
512
+
513
+ /* let's start by a general estimation for the full block */
514
+ if (nbSeqs > 0) {
515
+ EstimatedBlockSize const ebs =
516
+ ZSTD_estimateSubBlockSize(lp, nbLiterals,
517
+ ofCodePtr, llCodePtr, mlCodePtr, nbSeqs,
518
+ &nextCBlock->entropy, entropyMetadata,
519
+ workspace, wkspSize,
520
+ writeLitEntropy, writeSeqEntropy);
521
+ /* quick estimation */
522
+ size_t const avgLitCost = nbLiterals ? (ebs.estLitSize * BYTESCALE) / nbLiterals : BYTESCALE;
523
+ size_t const avgSeqCost = ((ebs.estBlockSize - ebs.estLitSize) * BYTESCALE) / nbSeqs;
524
+ const size_t nbSubBlocks = MAX((ebs.estBlockSize + (targetCBlockSize/2)) / targetCBlockSize, 1);
525
+ size_t n, avgBlockBudget, blockBudgetSupp=0;
526
+ avgBlockBudget = (ebs.estBlockSize * BYTESCALE) / nbSubBlocks;
527
+ DEBUGLOG(5, "estimated fullblock size=%u bytes ; avgLitCost=%.2f ; avgSeqCost=%.2f ; targetCBlockSize=%u, nbSubBlocks=%u ; avgBlockBudget=%.0f bytes",
528
+ (unsigned)ebs.estBlockSize, (double)avgLitCost/BYTESCALE, (double)avgSeqCost/BYTESCALE,
529
+ (unsigned)targetCBlockSize, (unsigned)nbSubBlocks, (double)avgBlockBudget/BYTESCALE);
530
+ /* simplification: if estimates states that the full superblock doesn't compress, just bail out immediately
531
+ * this will result in the production of a single uncompressed block covering @srcSize.*/
532
+ if (ebs.estBlockSize > srcSize) return 0;
533
+
534
+ /* compress and write sub-blocks */
535
+ assert(nbSubBlocks>0);
536
+ for (n=0; n < nbSubBlocks-1; n++) {
537
+ /* determine nb of sequences for current sub-block + nbLiterals from next sequence */
538
+ size_t const seqCount = sizeBlockSequences(sp, (size_t)(send-sp),
539
+ avgBlockBudget + blockBudgetSupp, avgLitCost, avgSeqCost, n==0);
540
+ /* if reached last sequence : break to last sub-block (simplification) */
541
+ assert(seqCount <= (size_t)(send-sp));
542
+ if (sp + seqCount == send) break;
543
+ assert(seqCount > 0);
544
+ /* compress sub-block */
545
+ { int litEntropyWritten = 0;
546
+ int seqEntropyWritten = 0;
547
+ size_t litSize = countLiterals(seqStorePtr, sp, seqCount);
548
+ const size_t decompressedSize =
549
+ ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, 0);
550
+ size_t const cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
551
+ sp, seqCount,
552
+ lp, litSize,
553
+ llCodePtr, mlCodePtr, ofCodePtr,
554
+ cctxParams,
555
+ op, (size_t)(oend-op),
556
+ bmi2, writeLitEntropy, writeSeqEntropy,
557
+ &litEntropyWritten, &seqEntropyWritten,
558
+ 0);
559
+ FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
560
+
561
+ /* check compressibility, update state components */
562
+ if (cSize > 0 && cSize < decompressedSize) {
563
+ DEBUGLOG(5, "Committed sub-block compressing %u bytes => %u bytes",
564
+ (unsigned)decompressedSize, (unsigned)cSize);
565
+ assert(ip + decompressedSize <= iend);
566
+ ip += decompressedSize;
567
+ lp += litSize;
568
+ op += cSize;
569
+ llCodePtr += seqCount;
570
+ mlCodePtr += seqCount;
571
+ ofCodePtr += seqCount;
572
+ /* Entropy only needs to be written once */
573
+ if (litEntropyWritten) {
574
+ writeLitEntropy = 0;
575
+ }
576
+ if (seqEntropyWritten) {
577
+ writeSeqEntropy = 0;
578
+ }
579
+ sp += seqCount;
580
+ blockBudgetSupp = 0;
581
+ } }
582
+ /* otherwise : do not compress yet, coalesce current sub-block with following one */
478
583
  }
479
- /* I think there is an optimization opportunity here.
480
- * Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful
481
- * since it recalculates estimate from scratch.
482
- * For example, it would recount literal distribution and symbol codes every time.
483
- */
484
- cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount,
485
- &nextCBlock->entropy, entropyMetadata,
486
- workspace, wkspSize, writeLitEntropy, writeSeqEntropy);
487
- if (cBlockSizeEstimate > targetCBlockSize || lastSequence) {
488
- int litEntropyWritten = 0;
489
- int seqEntropyWritten = 0;
490
- const size_t decompressedSize = ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, lastSequence);
491
- const size_t cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
492
- sp, seqCount,
493
- lp, litSize,
494
- llCodePtr, mlCodePtr, ofCodePtr,
495
- cctxParams,
496
- op, oend-op,
497
- bmi2, writeLitEntropy, writeSeqEntropy,
498
- &litEntropyWritten, &seqEntropyWritten,
499
- lastBlock && lastSequence);
500
- FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
501
- if (cSize > 0 && cSize < decompressedSize) {
502
- DEBUGLOG(5, "Committed the sub-block");
503
- assert(ip + decompressedSize <= iend);
504
- ip += decompressedSize;
505
- sp += seqCount;
506
- lp += litSize;
507
- op += cSize;
508
- llCodePtr += seqCount;
509
- mlCodePtr += seqCount;
510
- ofCodePtr += seqCount;
511
- litSize = 0;
512
- seqCount = 0;
513
- /* Entropy only needs to be written once */
514
- if (litEntropyWritten) {
515
- writeLitEntropy = 0;
516
- }
517
- if (seqEntropyWritten) {
518
- writeSeqEntropy = 0;
519
- }
584
+ } /* if (nbSeqs > 0) */
585
+
586
+ /* write last block */
587
+ DEBUGLOG(5, "Generate last sub-block: %u sequences remaining", (unsigned)(send - sp));
588
+ { int litEntropyWritten = 0;
589
+ int seqEntropyWritten = 0;
590
+ size_t litSize = (size_t)(lend - lp);
591
+ size_t seqCount = (size_t)(send - sp);
592
+ const size_t decompressedSize =
593
+ ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, 1);
594
+ size_t const cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
595
+ sp, seqCount,
596
+ lp, litSize,
597
+ llCodePtr, mlCodePtr, ofCodePtr,
598
+ cctxParams,
599
+ op, (size_t)(oend-op),
600
+ bmi2, writeLitEntropy, writeSeqEntropy,
601
+ &litEntropyWritten, &seqEntropyWritten,
602
+ lastBlock);
603
+ FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
604
+
605
+ /* update pointers, the nb of literals borrowed from next sequence must be preserved */
606
+ if (cSize > 0 && cSize < decompressedSize) {
607
+ DEBUGLOG(5, "Last sub-block compressed %u bytes => %u bytes",
608
+ (unsigned)decompressedSize, (unsigned)cSize);
609
+ assert(ip + decompressedSize <= iend);
610
+ ip += decompressedSize;
611
+ lp += litSize;
612
+ op += cSize;
613
+ llCodePtr += seqCount;
614
+ mlCodePtr += seqCount;
615
+ ofCodePtr += seqCount;
616
+ /* Entropy only needs to be written once */
617
+ if (litEntropyWritten) {
618
+ writeLitEntropy = 0;
619
+ }
620
+ if (seqEntropyWritten) {
621
+ writeSeqEntropy = 0;
520
622
  }
623
+ sp += seqCount;
521
624
  }
522
- } while (!lastSequence);
625
+ }
626
+
627
+
523
628
  if (writeLitEntropy) {
524
- DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten");
629
+ DEBUGLOG(5, "Literal entropy tables were never written");
525
630
  ZSTD_memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf));
526
631
  }
527
632
  if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) {
528
633
  /* If we haven't written our entropy tables, then we've violated our contract and
529
634
  * must emit an uncompressed block.
530
635
  */
531
- DEBUGLOG(5, "ZSTD_compressSubBlock_multi has sequence entropy tables unwritten");
636
+ DEBUGLOG(5, "Sequence entropy tables were never written => cancel, emit an uncompressed block");
532
637
  return 0;
533
638
  }
639
+
534
640
  if (ip < iend) {
535
- size_t const cSize = ZSTD_noCompressBlock(op, oend - op, ip, iend - ip, lastBlock);
536
- DEBUGLOG(5, "ZSTD_compressSubBlock_multi last sub-block uncompressed, %zu bytes", (size_t)(iend - ip));
641
+ /* some data left : last part of the block sent uncompressed */
642
+ size_t const rSize = (size_t)((iend - ip));
643
+ size_t const cSize = ZSTD_noCompressBlock(op, (size_t)(oend - op), ip, rSize, lastBlock);
644
+ DEBUGLOG(5, "Generate last uncompressed sub-block of %u bytes", (unsigned)(rSize));
537
645
  FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
538
646
  assert(cSize != 0);
539
647
  op += cSize;
540
648
  /* We have to regenerate the repcodes because we've skipped some sequences */
541
649
  if (sp < send) {
542
- seqDef const* seq;
650
+ const seqDef* seq;
543
651
  repcodes_t rep;
544
652
  ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep));
545
653
  for (seq = sstart; seq < sp; ++seq) {
@@ -548,14 +656,17 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
548
656
  ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep));
549
657
  }
550
658
  }
551
- DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed");
552
- return op-ostart;
659
+
660
+ DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed all subBlocks: total compressed size = %u",
661
+ (unsigned)(op-ostart));
662
+ return (size_t)(op-ostart);
553
663
  }
554
664
 
555
665
  size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
556
666
  void* dst, size_t dstCapacity,
557
- void const* src, size_t srcSize,
558
- unsigned lastBlock) {
667
+ const void* src, size_t srcSize,
668
+ unsigned lastBlock)
669
+ {
559
670
  ZSTD_entropyCTablesMetadata_t entropyMetadata;
560
671
 
561
672
  FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(&zc->seqStore,