zstd-ruby 1.1.3.0 → 1.1.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/ext/zstdruby/libzstd/Makefile +9 -6
  4. data/ext/zstdruby/libzstd/common/bitstream.h +3 -0
  5. data/ext/zstdruby/libzstd/common/entropy_common.c +13 -19
  6. data/ext/zstdruby/libzstd/common/fse.h +48 -22
  7. data/ext/zstdruby/libzstd/common/fse_decompress.c +0 -1
  8. data/ext/zstdruby/libzstd/common/huf.h +27 -5
  9. data/ext/zstdruby/libzstd/common/mem.h +14 -12
  10. data/ext/zstdruby/libzstd/common/threading.c +5 -4
  11. data/ext/zstdruby/libzstd/common/threading.h +1 -1
  12. data/ext/zstdruby/libzstd/common/xxhash.c +3 -1
  13. data/ext/zstdruby/libzstd/common/xxhash.h +11 -15
  14. data/ext/zstdruby/libzstd/common/zstd_common.c +1 -1
  15. data/ext/zstdruby/libzstd/common/zstd_internal.h +4 -0
  16. data/ext/zstdruby/libzstd/compress/fse_compress.c +16 -9
  17. data/ext/zstdruby/libzstd/compress/huf_compress.c +103 -28
  18. data/ext/zstdruby/libzstd/compress/zstd_compress.c +90 -37
  19. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  20. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +7 -8
  21. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +20 -17
  22. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +429 -120
  23. data/ext/zstdruby/libzstd/deprecated/zbuff.h +3 -1
  24. data/ext/zstdruby/libzstd/dictBuilder/cover.c +16 -8
  25. data/ext/zstdruby/libzstd/dictBuilder/zdict.h +1 -1
  26. data/ext/zstdruby/libzstd/dll/example/build_package.bat +1 -0
  27. data/ext/zstdruby/libzstd/dll/libzstd.def +2 -0
  28. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +122 -7
  29. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +31 -0
  30. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +8 -0
  31. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +37 -0
  32. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +8 -0
  33. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +37 -0
  34. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +8 -0
  35. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +33 -0
  36. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +8 -0
  37. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +29 -0
  38. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +7 -0
  39. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +32 -1
  40. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +7 -0
  41. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +44 -6
  42. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +8 -0
  43. data/ext/zstdruby/libzstd/zstd.h +87 -13
  44. data/lib/zstd-ruby/version.rb +1 -1
  45. metadata +2 -2
@@ -127,7 +127,7 @@ struct HUF_CElt_s {
127
127
  }; /* typedef'd to HUF_CElt within "huf.h" */
128
128
 
129
129
  /*! HUF_writeCTable() :
130
- `CTable` : huffman tree to save, using huf representation.
130
+ `CTable` : Huffman tree to save, using huf representation.
131
131
  @return : size of saved CTable */
132
132
  size_t HUF_writeCTable (void* dst, size_t maxDstSize,
133
133
  const HUF_CElt* CTable, U32 maxSymbolValue, U32 huffLog)
@@ -409,6 +409,25 @@ size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U3
409
409
  return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(nodeTable));
410
410
  }
411
411
 
412
+ static size_t HUF_estimateCompressedSize(HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
413
+ {
414
+ size_t nbBits = 0;
415
+ int s;
416
+ for (s = 0; s <= (int)maxSymbolValue; ++s) {
417
+ nbBits += CTable[s].nbBits * count[s];
418
+ }
419
+ return nbBits >> 3;
420
+ }
421
+
422
+ static int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
423
+ int bad = 0;
424
+ int s;
425
+ for (s = 0; s <= (int)maxSymbolValue; ++s) {
426
+ bad |= (count[s] != 0) & (CTable[s].nbBits == 0);
427
+ }
428
+ return !bad;
429
+ }
430
+
412
431
  static void HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
413
432
  {
414
433
  BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
@@ -510,25 +529,43 @@ size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, si
510
529
  }
511
530
 
512
531
 
532
+ static size_t HUF_compressCTable_internal(
533
+ BYTE* const ostart, BYTE* op, BYTE* const oend,
534
+ const void* src, size_t srcSize,
535
+ unsigned singleStream, const HUF_CElt* CTable)
536
+ {
537
+ size_t const cSize = singleStream ?
538
+ HUF_compress1X_usingCTable(op, oend - op, src, srcSize, CTable) :
539
+ HUF_compress4X_usingCTable(op, oend - op, src, srcSize, CTable);
540
+ if (HUF_isError(cSize)) { return cSize; }
541
+ if (cSize==0) { return 0; } /* uncompressible */
542
+ op += cSize;
543
+ /* check compressibility */
544
+ if ((size_t)(op-ostart) >= srcSize-1) { return 0; }
545
+ return op-ostart;
546
+ }
547
+
548
+
513
549
  /* `workSpace` must a table of at least 1024 unsigned */
514
550
  static size_t HUF_compress_internal (
515
551
  void* dst, size_t dstSize,
516
552
  const void* src, size_t srcSize,
517
553
  unsigned maxSymbolValue, unsigned huffLog,
518
554
  unsigned singleStream,
519
- void* workSpace, size_t wkspSize)
555
+ void* workSpace, size_t wkspSize,
556
+ HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat)
520
557
  {
521
558
  BYTE* const ostart = (BYTE*)dst;
522
559
  BYTE* const oend = ostart + dstSize;
523
560
  BYTE* op = ostart;
524
561
 
525
- union {
526
- U32 count[HUF_SYMBOLVALUE_MAX+1];
527
- HUF_CElt CTable[HUF_SYMBOLVALUE_MAX+1];
528
- } table; /* `count` can overlap with `CTable`; saves 1 KB */
562
+ U32* count;
563
+ size_t const countSize = sizeof(U32) * (HUF_SYMBOLVALUE_MAX + 1);
564
+ HUF_CElt* CTable;
565
+ size_t const CTableSize = sizeof(HUF_CElt) * (HUF_SYMBOLVALUE_MAX + 1);
529
566
 
530
567
  /* checks & inits */
531
- if (wkspSize < sizeof(huffNodeTable)) return ERROR(GENERIC);
568
+ if (wkspSize < sizeof(huffNodeTable) + countSize + CTableSize) return ERROR(GENERIC);
532
569
  if (!srcSize) return 0; /* Uncompressed (note : 1 means rle, so first byte must be correct) */
533
570
  if (!dstSize) return 0; /* cannot fit within dst budget */
534
571
  if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */
@@ -536,38 +573,58 @@ static size_t HUF_compress_internal (
536
573
  if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX;
537
574
  if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
538
575
 
576
+ count = (U32*)workSpace;
577
+ workSpace = (BYTE*)workSpace + countSize;
578
+ wkspSize -= countSize;
579
+ CTable = (HUF_CElt*)workSpace;
580
+ workSpace = (BYTE*)workSpace + CTableSize;
581
+ wkspSize -= CTableSize;
582
+
583
+ /* Heuristic : If we don't need to check the validity of the old table use the old table for small inputs */
584
+ if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
585
+ return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
586
+ }
587
+
539
588
  /* Scan input and build symbol stats */
540
- { CHECK_V_F(largest, FSE_count_wksp (table.count, &maxSymbolValue, (const BYTE*)src, srcSize, (U32*)workSpace) );
589
+ { CHECK_V_F(largest, FSE_count_wksp (count, &maxSymbolValue, (const BYTE*)src, srcSize, (U32*)workSpace) );
541
590
  if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
542
591
  if (largest <= (srcSize >> 7)+1) return 0; /* Fast heuristic : not compressible enough */
543
592
  }
544
593
 
594
+ /* Check validity of previous table */
595
+ if (repeat && *repeat == HUF_repeat_check && !HUF_validateCTable(oldHufTable, count, maxSymbolValue)) {
596
+ *repeat = HUF_repeat_none;
597
+ }
598
+ /* Heuristic : use existing table for small inputs */
599
+ if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
600
+ return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
601
+ }
602
+
545
603
  /* Build Huffman Tree */
546
604
  huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
547
- { CHECK_V_F(maxBits, HUF_buildCTable_wksp (table.CTable, table.count, maxSymbolValue, huffLog, workSpace, wkspSize) );
605
+ { CHECK_V_F(maxBits, HUF_buildCTable_wksp (CTable, count, maxSymbolValue, huffLog, workSpace, wkspSize) );
548
606
  huffLog = (U32)maxBits;
607
+ /* Zero the unused symbols so we can check it for validity */
608
+ memset(CTable + maxSymbolValue + 1, 0, CTableSize - (maxSymbolValue + 1) * sizeof(HUF_CElt));
549
609
  }
550
610
 
551
611
  /* Write table description header */
552
- { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table.CTable, maxSymbolValue, huffLog) );
553
- if (hSize + 12 >= srcSize) return 0; /* not useful to try compression */
612
+ { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, CTable, maxSymbolValue, huffLog) );
613
+ /* Check if using the previous table will be beneficial */
614
+ if (repeat && *repeat != HUF_repeat_none) {
615
+ size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, count, maxSymbolValue);
616
+ size_t const newSize = HUF_estimateCompressedSize(CTable, count, maxSymbolValue);
617
+ if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
618
+ return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
619
+ }
620
+ }
621
+ /* Use the new table */
622
+ if (hSize + 12ul >= srcSize) { return 0; }
554
623
  op += hSize;
624
+ if (repeat) { *repeat = HUF_repeat_none; }
625
+ if (oldHufTable) { memcpy(oldHufTable, CTable, CTableSize); } /* Save the new table */
555
626
  }
556
-
557
- /* Compress */
558
- { size_t const cSize = (singleStream) ?
559
- HUF_compress1X_usingCTable(op, oend - op, src, srcSize, table.CTable) : /* single segment */
560
- HUF_compress4X_usingCTable(op, oend - op, src, srcSize, table.CTable);
561
- if (HUF_isError(cSize)) return cSize;
562
- if (cSize==0) return 0; /* uncompressible */
563
- op += cSize;
564
- }
565
-
566
- /* check compressibility */
567
- if ((size_t)(op-ostart) >= srcSize-1)
568
- return 0;
569
-
570
- return op-ostart;
627
+ return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, CTable);
571
628
  }
572
629
 
573
630
 
@@ -576,7 +633,16 @@ size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
576
633
  unsigned maxSymbolValue, unsigned huffLog,
577
634
  void* workSpace, size_t wkspSize)
578
635
  {
579
- return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize);
636
+ return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, NULL, NULL, 0);
637
+ }
638
+
639
+ size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
640
+ const void* src, size_t srcSize,
641
+ unsigned maxSymbolValue, unsigned huffLog,
642
+ void* workSpace, size_t wkspSize,
643
+ HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat)
644
+ {
645
+ return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, hufTable, repeat, preferRepeat);
580
646
  }
581
647
 
582
648
  size_t HUF_compress1X (void* dst, size_t dstSize,
@@ -592,7 +658,16 @@ size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
592
658
  unsigned maxSymbolValue, unsigned huffLog,
593
659
  void* workSpace, size_t wkspSize)
594
660
  {
595
- return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize);
661
+ return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, NULL, NULL, 0);
662
+ }
663
+
664
+ size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
665
+ const void* src, size_t srcSize,
666
+ unsigned maxSymbolValue, unsigned huffLog,
667
+ void* workSpace, size_t wkspSize,
668
+ HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat)
669
+ {
670
+ return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, hufTable, repeat, preferRepeat);
596
671
  }
597
672
 
598
673
  size_t HUF_compress2 (void* dst, size_t dstSize,
@@ -13,8 +13,6 @@
13
13
  ***************************************/
14
14
  #include <string.h> /* memset */
15
15
  #include "mem.h"
16
- #define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
17
- #include "xxhash.h" /* XXH_reset, update, digest */
18
16
  #define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */
19
17
  #include "fse.h"
20
18
  #define HUF_STATIC_LINKING_ONLY
@@ -62,6 +60,7 @@ struct ZSTD_CCtx_s {
62
60
  U32 hashLog3; /* dispatch table : larger == faster, more memory */
63
61
  U32 loadedDictEnd; /* index of end of dictionary */
64
62
  U32 forceWindow; /* force back-references to respect limit of 1<<wLog, even for dictionary */
63
+ U32 forceRawDict; /* Force loading dictionary in "content-only" mode (no header analysis) */
65
64
  ZSTD_compressionStage_e stage;
66
65
  U32 rep[ZSTD_REP_NUM];
67
66
  U32 repToConfirm[ZSTD_REP_NUM];
@@ -80,10 +79,11 @@ struct ZSTD_CCtx_s {
80
79
  U32* chainTable;
81
80
  HUF_CElt* hufTable;
82
81
  U32 flagStaticTables;
82
+ HUF_repeat flagStaticHufTable;
83
83
  FSE_CTable offcodeCTable [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
84
84
  FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
85
85
  FSE_CTable litlengthCTable [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
86
- unsigned tmpCounters[1024];
86
+ unsigned tmpCounters[HUF_WORKSPACE_SIZE_U32];
87
87
  };
88
88
 
89
89
  ZSTD_CCtx* ZSTD_createCCtx(void)
@@ -124,6 +124,7 @@ size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned
124
124
  switch(param)
125
125
  {
126
126
  case ZSTD_p_forceWindow : cctx->forceWindow = value>0; cctx->loadedDictEnd = 0; return 0;
127
+ case ZSTD_p_forceRawDict : cctx->forceRawDict = value>0; return 0;
127
128
  default: return ERROR(parameter_unknown);
128
129
  }
129
130
  }
@@ -246,14 +247,17 @@ static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_parameters params, U64 fra
246
247
  typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset, ZSTDcrp_fullReset } ZSTD_compResetPolicy_e;
247
248
 
248
249
  /*! ZSTD_resetCCtx_advanced() :
249
- note : 'params' must be validated */
250
+ note : `params` must be validated */
250
251
  static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc,
251
252
  ZSTD_parameters params, U64 frameContentSize,
252
253
  ZSTD_compResetPolicy_e const crp)
253
254
  {
254
255
  if (crp == ZSTDcrp_continue)
255
- if (ZSTD_equivalentParams(params, zc->params))
256
+ if (ZSTD_equivalentParams(params, zc->params)) {
257
+ zc->flagStaticTables = 0;
258
+ zc->flagStaticHufTable = HUF_repeat_none;
256
259
  return ZSTD_continueCCtx(zc, params, frameContentSize);
260
+ }
257
261
 
258
262
  { size_t const blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, (size_t)1 << params.cParams.windowLog);
259
263
  U32 const divider = (params.cParams.searchLength==3) ? 3 : 4;
@@ -287,6 +291,7 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc,
287
291
  ptr = zc->hashTable3 + h3Size;
288
292
  zc->hufTable = (HUF_CElt*)ptr;
289
293
  zc->flagStaticTables = 0;
294
+ zc->flagStaticHufTable = HUF_repeat_none;
290
295
  ptr = ((U32*)ptr) + 256; /* note : HUF_CElt* is incomplete type, size is simulated using U32 */
291
296
 
292
297
  zc->nextToUpdate = 1;
@@ -344,8 +349,12 @@ size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long
344
349
  {
345
350
  if (srcCCtx->stage!=ZSTDcs_init) return ERROR(stage_wrong);
346
351
 
352
+
347
353
  memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem));
348
- ZSTD_resetCCtx_advanced(dstCCtx, srcCCtx->params, pledgedSrcSize, ZSTDcrp_noMemset);
354
+ { ZSTD_parameters params = srcCCtx->params;
355
+ params.fParams.contentSizeFlag = (pledgedSrcSize > 0);
356
+ ZSTD_resetCCtx_advanced(dstCCtx, params, pledgedSrcSize, ZSTDcrp_noMemset);
357
+ }
349
358
 
350
359
  /* copy tables */
351
360
  { size_t const chainSize = (srcCCtx->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << srcCCtx->params.cParams.chainLog);
@@ -368,12 +377,15 @@ size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long
368
377
 
369
378
  /* copy entropy tables */
370
379
  dstCCtx->flagStaticTables = srcCCtx->flagStaticTables;
380
+ dstCCtx->flagStaticHufTable = srcCCtx->flagStaticHufTable;
371
381
  if (srcCCtx->flagStaticTables) {
372
- memcpy(dstCCtx->hufTable, srcCCtx->hufTable, 256*4);
373
382
  memcpy(dstCCtx->litlengthCTable, srcCCtx->litlengthCTable, sizeof(dstCCtx->litlengthCTable));
374
383
  memcpy(dstCCtx->matchlengthCTable, srcCCtx->matchlengthCTable, sizeof(dstCCtx->matchlengthCTable));
375
384
  memcpy(dstCCtx->offcodeCTable, srcCCtx->offcodeCTable, sizeof(dstCCtx->offcodeCTable));
376
385
  }
386
+ if (srcCCtx->flagStaticHufTable) {
387
+ memcpy(dstCCtx->hufTable, srcCCtx->hufTable, 256*4);
388
+ }
377
389
 
378
390
  return 0;
379
391
  }
@@ -487,24 +499,28 @@ static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc,
487
499
 
488
500
  /* small ? don't even attempt compression (speed opt) */
489
501
  # define LITERAL_NOENTROPY 63
490
- { size_t const minLitSize = zc->flagStaticTables ? 6 : LITERAL_NOENTROPY;
502
+ { size_t const minLitSize = zc->flagStaticHufTable == HUF_repeat_valid ? 6 : LITERAL_NOENTROPY;
491
503
  if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
492
504
  }
493
505
 
494
506
  if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */
495
- if (zc->flagStaticTables && (lhSize==3)) {
496
- hType = set_repeat;
497
- singleStream = 1;
498
- cLitSize = HUF_compress1X_usingCTable(ostart+lhSize, dstCapacity-lhSize, src, srcSize, zc->hufTable);
499
- } else {
500
- cLitSize = singleStream ? HUF_compress1X_wksp(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, zc->tmpCounters, sizeof(zc->tmpCounters))
501
- : HUF_compress4X_wksp(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, zc->tmpCounters, sizeof(zc->tmpCounters));
507
+ { HUF_repeat repeat = zc->flagStaticHufTable;
508
+ int const preferRepeat = zc->params.cParams.strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
509
+ if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
510
+ cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, zc->tmpCounters, sizeof(zc->tmpCounters), zc->hufTable, &repeat, preferRepeat)
511
+ : HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, zc->tmpCounters, sizeof(zc->tmpCounters), zc->hufTable, &repeat, preferRepeat);
512
+ if (repeat != HUF_repeat_none) { hType = set_repeat; } /* reused the existing table */
513
+ else { zc->flagStaticHufTable = HUF_repeat_check; } /* now have a table to reuse */
502
514
  }
503
515
 
504
- if ((cLitSize==0) | (cLitSize >= srcSize - minGain))
516
+ if ((cLitSize==0) | (cLitSize >= srcSize - minGain)) {
517
+ zc->flagStaticHufTable = HUF_repeat_none;
505
518
  return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
506
- if (cLitSize==1)
519
+ }
520
+ if (cLitSize==1) {
521
+ zc->flagStaticHufTable = HUF_repeat_none;
507
522
  return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
523
+ }
508
524
 
509
525
  /* Build header */
510
526
  switch(lhSize)
@@ -572,11 +588,11 @@ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
572
588
  mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
573
589
  }
574
590
 
575
-
576
- size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
591
+ MEM_STATIC size_t ZSTD_compressSequences (ZSTD_CCtx* zc,
577
592
  void* dst, size_t dstCapacity,
578
593
  size_t srcSize)
579
594
  {
595
+ const int longOffsets = zc->params.cParams.windowLog > STREAM_ACCUMULATOR_MIN;
580
596
  const seqStore_t* seqStorePtr = &(zc->seqStore);
581
597
  U32 count[MaxSeq+1];
582
598
  S16 norm[MaxSeq+1];
@@ -710,7 +726,18 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
710
726
  if (MEM_32bits()) BIT_flushBits(&blockStream);
711
727
  BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
712
728
  if (MEM_32bits()) BIT_flushBits(&blockStream);
713
- BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
729
+ if (longOffsets) {
730
+ U32 const ofBits = ofCodeTable[nbSeq-1];
731
+ int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
732
+ if (extraBits) {
733
+ BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
734
+ BIT_flushBits(&blockStream);
735
+ }
736
+ BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits,
737
+ ofBits - extraBits);
738
+ } else {
739
+ BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
740
+ }
714
741
  BIT_flushBits(&blockStream);
715
742
 
716
743
  { size_t n;
@@ -732,7 +759,17 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
732
759
  if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
733
760
  BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
734
761
  if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/
735
- BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */
762
+ if (longOffsets) {
763
+ int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
764
+ if (extraBits) {
765
+ BIT_addBits(&blockStream, sequences[n].offset, extraBits);
766
+ BIT_flushBits(&blockStream); /* (7)*/
767
+ }
768
+ BIT_addBits(&blockStream, sequences[n].offset >> extraBits,
769
+ ofBits - extraBits); /* 31 */
770
+ } else {
771
+ BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */
772
+ }
736
773
  BIT_flushBits(&blockStream); /* (7)*/
737
774
  } }
738
775
 
@@ -747,9 +784,12 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
747
784
 
748
785
  /* check compressibility */
749
786
  _check_compressibility:
750
- { size_t const minGain = ZSTD_minGain(srcSize);
751
- size_t const maxCSize = srcSize - minGain;
752
- if ((size_t)(op-ostart) >= maxCSize) return 0; }
787
+ { size_t const minGain = ZSTD_minGain(srcSize);
788
+ size_t const maxCSize = srcSize - minGain;
789
+ if ((size_t)(op-ostart) >= maxCSize) {
790
+ zc->flagStaticHufTable = HUF_repeat_none;
791
+ return 0;
792
+ } }
753
793
 
754
794
  /* confirm repcodes */
755
795
  { int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->rep[i] = zc->repToConfirm[i]; }
@@ -757,7 +797,6 @@ _check_compressibility:
757
797
  return op - ostart;
758
798
  }
759
799
 
760
-
761
800
  #if 0 /* for debug */
762
801
  # define STORESEQ_DEBUG
763
802
  #include <stdio.h> /* fprintf */
@@ -1748,7 +1787,7 @@ static size_t ZSTD_BtFindBestMatch_selectMLS_extDict (
1748
1787
  #define NEXT_IN_CHAIN(d, mask) chainTable[(d) & mask]
1749
1788
 
1750
1789
  /* Update chains up to ip (excluded)
1751
- Assumption : always within prefix (ie. not within extDict) */
1790
+ Assumption : always within prefix (i.e. not within extDict) */
1752
1791
  FORCE_INLINE
1753
1792
  U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls)
1754
1793
  {
@@ -2308,7 +2347,7 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
2308
2347
  if (remaining < blockSize) blockSize = remaining;
2309
2348
 
2310
2349
  /* preemptive overflow correction */
2311
- if (cctx->lowLimit > (2U<<30)) {
2350
+ if (cctx->lowLimit > (3U<<29)) {
2312
2351
  U32 const cycleMask = (1 << ZSTD_cycleLog(cctx->params.cParams.hashLog, cctx->params.cParams.strategy)) - 1;
2313
2352
  U32 const current = (U32)(ip - cctx->base);
2314
2353
  U32 const newCurrent = (current & cycleMask) + (1 << cctx->params.cParams.windowLog);
@@ -2362,7 +2401,7 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
2362
2401
  U32 const dictIDSizeCode = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */
2363
2402
  U32 const checksumFlag = params.fParams.checksumFlag>0;
2364
2403
  U32 const windowSize = 1U << params.cParams.windowLog;
2365
- U32 const singleSegment = params.fParams.contentSizeFlag && (windowSize > (pledgedSrcSize-1));
2404
+ U32 const singleSegment = params.fParams.contentSizeFlag && (windowSize >= pledgedSrcSize);
2366
2405
  BYTE const windowLogByte = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3);
2367
2406
  U32 const fcsCode = params.fParams.contentSizeFlag ?
2368
2407
  (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : /* 0-3 */
@@ -2508,7 +2547,7 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t
2508
2547
  return ERROR(GENERIC); /* strategy doesn't exist; impossible */
2509
2548
  }
2510
2549
 
2511
- zc->nextToUpdate = zc->loadedDictEnd;
2550
+ zc->nextToUpdate = (U32)(iend - zc->base);
2512
2551
  return 0;
2513
2552
  }
2514
2553
 
@@ -2600,6 +2639,7 @@ static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* cctx, const void* dict, size_
2600
2639
  }
2601
2640
 
2602
2641
  cctx->flagStaticTables = 1;
2642
+ cctx->flagStaticHufTable = HUF_repeat_valid;
2603
2643
  return dictPtr - (const BYTE*)dict;
2604
2644
  }
2605
2645
 
@@ -2609,8 +2649,9 @@ static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* zc, const void* dict, si
2609
2649
  {
2610
2650
  if ((dict==NULL) || (dictSize<=8)) return 0;
2611
2651
 
2612
- /* default : dict is pure content */
2613
- if (MEM_readLE32(dict) != ZSTD_DICT_MAGIC) return ZSTD_loadDictionaryContent(zc, dict, dictSize);
2652
+ /* dict as pure content */
2653
+ if ((MEM_readLE32(dict) != ZSTD_DICT_MAGIC) || (zc->forceRawDict))
2654
+ return ZSTD_loadDictionaryContent(zc, dict, dictSize);
2614
2655
  zc->dictID = zc->params.fParams.noDictIDFlag ? 0 : MEM_readLE32((const char*)dict+4);
2615
2656
 
2616
2657
  /* known magic number : dict is parsed for entropy stats and content */
@@ -2782,7 +2823,7 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, u
2782
2823
 
2783
2824
  if (!cdict || !cctx) {
2784
2825
  ZSTD_free(cdict, customMem);
2785
- ZSTD_free(cctx, customMem);
2826
+ ZSTD_freeCCtx(cctx);
2786
2827
  return NULL;
2787
2828
  }
2788
2829
 
@@ -2800,8 +2841,8 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, u
2800
2841
  { size_t const errorCode = ZSTD_compressBegin_advanced(cctx, cdict->dictContent, dictSize, params, 0);
2801
2842
  if (ZSTD_isError(errorCode)) {
2802
2843
  ZSTD_free(cdict->dictBuffer, customMem);
2803
- ZSTD_free(cctx, customMem);
2804
2844
  ZSTD_free(cdict, customMem);
2845
+ ZSTD_freeCCtx(cctx);
2805
2846
  return NULL;
2806
2847
  } }
2807
2848
 
@@ -2845,7 +2886,11 @@ static ZSTD_parameters ZSTD_getParamsFromCDict(const ZSTD_CDict* cdict) {
2845
2886
  size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, unsigned long long pledgedSrcSize)
2846
2887
  {
2847
2888
  if (cdict->dictContentSize) CHECK_F(ZSTD_copyCCtx(cctx, cdict->refContext, pledgedSrcSize))
2848
- else CHECK_F(ZSTD_compressBegin_advanced(cctx, NULL, 0, cdict->refContext->params, pledgedSrcSize));
2889
+ else {
2890
+ ZSTD_parameters params = cdict->refContext->params;
2891
+ params.fParams.contentSizeFlag = (pledgedSrcSize > 0);
2892
+ CHECK_F(ZSTD_compressBegin_advanced(cctx, NULL, 0, params, pledgedSrcSize));
2893
+ }
2849
2894
  return 0;
2850
2895
  }
2851
2896
 
@@ -2939,7 +2984,7 @@ size_t ZSTD_freeCStream(ZSTD_CStream* zcs)
2939
2984
  size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX; }
2940
2985
  size_t ZSTD_CStreamOutSize(void) { return ZSTD_compressBound(ZSTD_BLOCKSIZE_ABSOLUTEMAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ; }
2941
2986
 
2942
- size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize)
2987
+ static size_t ZSTD_resetCStream_internal(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize)
2943
2988
  {
2944
2989
  if (zcs->inBuffSize==0) return ERROR(stage_wrong); /* zcs has not been init at least once => can't reset */
2945
2990
 
@@ -2957,6 +3002,14 @@ size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize)
2957
3002
  return 0; /* ready to go */
2958
3003
  }
2959
3004
 
3005
+ size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize)
3006
+ {
3007
+
3008
+ zcs->params.fParams.contentSizeFlag = (pledgedSrcSize > 0);
3009
+
3010
+ return ZSTD_resetCStream_internal(zcs, pledgedSrcSize);
3011
+ }
3012
+
2960
3013
  size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
2961
3014
  const void* dict, size_t dictSize,
2962
3015
  ZSTD_parameters params, unsigned long long pledgedSrcSize)
@@ -2988,7 +3041,7 @@ size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
2988
3041
  zcs->checksum = params.fParams.checksumFlag > 0;
2989
3042
  zcs->params = params;
2990
3043
 
2991
- return ZSTD_resetCStream(zcs, pledgedSrcSize);
3044
+ return ZSTD_resetCStream_internal(zcs, pledgedSrcSize);
2992
3045
  }
2993
3046
 
2994
3047
  /* note : cdict must outlive compression session */
@@ -3022,7 +3075,7 @@ size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel)
3022
3075
  size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs)
3023
3076
  {
3024
3077
  if (zcs==NULL) return 0; /* support sizeof on NULL */
3025
- return sizeof(zcs) + ZSTD_sizeof_CCtx(zcs->cctx) + ZSTD_sizeof_CDict(zcs->cdictLocal) + zcs->outBuffSize + zcs->inBuffSize;
3078
+ return sizeof(*zcs) + ZSTD_sizeof_CCtx(zcs->cctx) + ZSTD_sizeof_CDict(zcs->cdictLocal) + zcs->outBuffSize + zcs->inBuffSize;
3026
3079
  }
3027
3080
 
3028
3081
  /*====== Compression ======*/
@@ -203,7 +203,7 @@ MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const B
203
203
 
204
204
 
205
205
  /* Update hashTable3 up to ip (excluded)
206
- Assumption : always within prefix (ie. not within extDict) */
206
+ Assumption : always within prefix (i.e. not within extDict) */
207
207
  FORCE_INLINE
208
208
  U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* zc, const BYTE* ip)
209
209
  {
@@ -25,8 +25,6 @@
25
25
  #include "threading.h" /* mutex */
26
26
  #include "zstd_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
27
27
  #include "zstdmt_compress.h"
28
- #define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
29
- #include "xxhash.h"
30
28
 
31
29
 
32
30
  /* ====== Debug ====== */
@@ -231,16 +229,17 @@ void ZSTDMT_compressChunk(void* jobDescription)
231
229
  const void* const src = (const char*)job->srcStart + job->dictSize;
232
230
  buffer_t const dstBuff = job->dstBuff;
233
231
  DEBUGLOG(3, "job (first:%u) (last:%u) : dictSize %u, srcSize %u", job->firstChunk, job->lastChunk, (U32)job->dictSize, (U32)job->srcSize);
234
- if (job->cdict) {
232
+ if (job->cdict) { /* should only happen for first segment */
235
233
  size_t const initError = ZSTD_compressBegin_usingCDict(job->cctx, job->cdict, job->fullFrameSize);
236
234
  if (job->cdict) DEBUGLOG(3, "using CDict ");
237
235
  if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; }
238
- } else {
239
- size_t const initError = ZSTD_compressBegin_advanced(job->cctx, job->srcStart, job->dictSize, job->params, job->fullFrameSize);
240
- if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; }
236
+ } else { /* srcStart points at reloaded section */
237
+ size_t const dictModeError = ZSTD_setCCtxParameter(job->cctx, ZSTD_p_forceRawDict, 1); /* Force loading dictionary in "content-only" mode (no header analysis) */
238
+ size_t const initError = ZSTD_compressBegin_advanced(job->cctx, job->srcStart, job->dictSize, job->params, 0);
239
+ if (ZSTD_isError(initError) || ZSTD_isError(dictModeError)) { job->cSize = initError; goto _endJob; }
241
240
  ZSTD_setCCtxParameter(job->cctx, ZSTD_p_forceWindow, 1);
242
241
  }
243
- if (!job->firstChunk) { /* flush frame header */
242
+ if (!job->firstChunk) { /* flush and overwrite frame header when it's not first segment */
244
243
  size_t const hSize = ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, 0);
245
244
  if (ZSTD_isError(hSize)) { job->cSize = hSize; goto _endJob; }
246
245
  ZSTD_invalidateRepCodes(job->cctx);
@@ -248,7 +247,7 @@ void ZSTDMT_compressChunk(void* jobDescription)
248
247
 
249
248
  DEBUGLOG(4, "Compressing : ");
250
249
  DEBUG_PRINTHEX(4, job->srcStart, 12);
251
- job->cSize = (job->lastChunk) ? /* last chunk signal */
250
+ job->cSize = (job->lastChunk) ?
252
251
  ZSTD_compressEnd (job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize) :
253
252
  ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize);
254
253
  DEBUGLOG(3, "compressed %u bytes into %u bytes (first:%u) (last:%u)", (unsigned)job->srcSize, (unsigned)job->cSize, job->firstChunk, job->lastChunk);