zstd-ruby 1.1.3.0 → 1.1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/ext/zstdruby/libzstd/Makefile +9 -6
  4. data/ext/zstdruby/libzstd/common/bitstream.h +3 -0
  5. data/ext/zstdruby/libzstd/common/entropy_common.c +13 -19
  6. data/ext/zstdruby/libzstd/common/fse.h +48 -22
  7. data/ext/zstdruby/libzstd/common/fse_decompress.c +0 -1
  8. data/ext/zstdruby/libzstd/common/huf.h +27 -5
  9. data/ext/zstdruby/libzstd/common/mem.h +14 -12
  10. data/ext/zstdruby/libzstd/common/threading.c +5 -4
  11. data/ext/zstdruby/libzstd/common/threading.h +1 -1
  12. data/ext/zstdruby/libzstd/common/xxhash.c +3 -1
  13. data/ext/zstdruby/libzstd/common/xxhash.h +11 -15
  14. data/ext/zstdruby/libzstd/common/zstd_common.c +1 -1
  15. data/ext/zstdruby/libzstd/common/zstd_internal.h +4 -0
  16. data/ext/zstdruby/libzstd/compress/fse_compress.c +16 -9
  17. data/ext/zstdruby/libzstd/compress/huf_compress.c +103 -28
  18. data/ext/zstdruby/libzstd/compress/zstd_compress.c +90 -37
  19. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  20. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +7 -8
  21. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +20 -17
  22. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +429 -120
  23. data/ext/zstdruby/libzstd/deprecated/zbuff.h +3 -1
  24. data/ext/zstdruby/libzstd/dictBuilder/cover.c +16 -8
  25. data/ext/zstdruby/libzstd/dictBuilder/zdict.h +1 -1
  26. data/ext/zstdruby/libzstd/dll/example/build_package.bat +1 -0
  27. data/ext/zstdruby/libzstd/dll/libzstd.def +2 -0
  28. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +122 -7
  29. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +31 -0
  30. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +8 -0
  31. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +37 -0
  32. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +8 -0
  33. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +37 -0
  34. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +8 -0
  35. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +33 -0
  36. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +8 -0
  37. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +29 -0
  38. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +7 -0
  39. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +32 -1
  40. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +7 -0
  41. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +44 -6
  42. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +8 -0
  43. data/ext/zstdruby/libzstd/zstd.h +87 -13
  44. data/lib/zstd-ruby/version.rb +1 -1
  45. metadata +2 -2
@@ -127,7 +127,7 @@ struct HUF_CElt_s {
127
127
  }; /* typedef'd to HUF_CElt within "huf.h" */
128
128
 
129
129
  /*! HUF_writeCTable() :
130
- `CTable` : huffman tree to save, using huf representation.
130
+ `CTable` : Huffman tree to save, using huf representation.
131
131
  @return : size of saved CTable */
132
132
  size_t HUF_writeCTable (void* dst, size_t maxDstSize,
133
133
  const HUF_CElt* CTable, U32 maxSymbolValue, U32 huffLog)
@@ -409,6 +409,25 @@ size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U3
409
409
  return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(nodeTable));
410
410
  }
411
411
 
412
+ static size_t HUF_estimateCompressedSize(HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
413
+ {
414
+ size_t nbBits = 0;
415
+ int s;
416
+ for (s = 0; s <= (int)maxSymbolValue; ++s) {
417
+ nbBits += CTable[s].nbBits * count[s];
418
+ }
419
+ return nbBits >> 3;
420
+ }
421
+
422
+ static int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
423
+ int bad = 0;
424
+ int s;
425
+ for (s = 0; s <= (int)maxSymbolValue; ++s) {
426
+ bad |= (count[s] != 0) & (CTable[s].nbBits == 0);
427
+ }
428
+ return !bad;
429
+ }
430
+
412
431
  static void HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
413
432
  {
414
433
  BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
@@ -510,25 +529,43 @@ size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, si
510
529
  }
511
530
 
512
531
 
532
+ static size_t HUF_compressCTable_internal(
533
+ BYTE* const ostart, BYTE* op, BYTE* const oend,
534
+ const void* src, size_t srcSize,
535
+ unsigned singleStream, const HUF_CElt* CTable)
536
+ {
537
+ size_t const cSize = singleStream ?
538
+ HUF_compress1X_usingCTable(op, oend - op, src, srcSize, CTable) :
539
+ HUF_compress4X_usingCTable(op, oend - op, src, srcSize, CTable);
540
+ if (HUF_isError(cSize)) { return cSize; }
541
+ if (cSize==0) { return 0; } /* uncompressible */
542
+ op += cSize;
543
+ /* check compressibility */
544
+ if ((size_t)(op-ostart) >= srcSize-1) { return 0; }
545
+ return op-ostart;
546
+ }
547
+
548
+
513
549
  /* `workSpace` must a table of at least 1024 unsigned */
514
550
  static size_t HUF_compress_internal (
515
551
  void* dst, size_t dstSize,
516
552
  const void* src, size_t srcSize,
517
553
  unsigned maxSymbolValue, unsigned huffLog,
518
554
  unsigned singleStream,
519
- void* workSpace, size_t wkspSize)
555
+ void* workSpace, size_t wkspSize,
556
+ HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat)
520
557
  {
521
558
  BYTE* const ostart = (BYTE*)dst;
522
559
  BYTE* const oend = ostart + dstSize;
523
560
  BYTE* op = ostart;
524
561
 
525
- union {
526
- U32 count[HUF_SYMBOLVALUE_MAX+1];
527
- HUF_CElt CTable[HUF_SYMBOLVALUE_MAX+1];
528
- } table; /* `count` can overlap with `CTable`; saves 1 KB */
562
+ U32* count;
563
+ size_t const countSize = sizeof(U32) * (HUF_SYMBOLVALUE_MAX + 1);
564
+ HUF_CElt* CTable;
565
+ size_t const CTableSize = sizeof(HUF_CElt) * (HUF_SYMBOLVALUE_MAX + 1);
529
566
 
530
567
  /* checks & inits */
531
- if (wkspSize < sizeof(huffNodeTable)) return ERROR(GENERIC);
568
+ if (wkspSize < sizeof(huffNodeTable) + countSize + CTableSize) return ERROR(GENERIC);
532
569
  if (!srcSize) return 0; /* Uncompressed (note : 1 means rle, so first byte must be correct) */
533
570
  if (!dstSize) return 0; /* cannot fit within dst budget */
534
571
  if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */
@@ -536,38 +573,58 @@ static size_t HUF_compress_internal (
536
573
  if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX;
537
574
  if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
538
575
 
576
+ count = (U32*)workSpace;
577
+ workSpace = (BYTE*)workSpace + countSize;
578
+ wkspSize -= countSize;
579
+ CTable = (HUF_CElt*)workSpace;
580
+ workSpace = (BYTE*)workSpace + CTableSize;
581
+ wkspSize -= CTableSize;
582
+
583
+ /* Heuristic : If we don't need to check the validity of the old table use the old table for small inputs */
584
+ if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
585
+ return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
586
+ }
587
+
539
588
  /* Scan input and build symbol stats */
540
- { CHECK_V_F(largest, FSE_count_wksp (table.count, &maxSymbolValue, (const BYTE*)src, srcSize, (U32*)workSpace) );
589
+ { CHECK_V_F(largest, FSE_count_wksp (count, &maxSymbolValue, (const BYTE*)src, srcSize, (U32*)workSpace) );
541
590
  if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
542
591
  if (largest <= (srcSize >> 7)+1) return 0; /* Fast heuristic : not compressible enough */
543
592
  }
544
593
 
594
+ /* Check validity of previous table */
595
+ if (repeat && *repeat == HUF_repeat_check && !HUF_validateCTable(oldHufTable, count, maxSymbolValue)) {
596
+ *repeat = HUF_repeat_none;
597
+ }
598
+ /* Heuristic : use existing table for small inputs */
599
+ if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
600
+ return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
601
+ }
602
+
545
603
  /* Build Huffman Tree */
546
604
  huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
547
- { CHECK_V_F(maxBits, HUF_buildCTable_wksp (table.CTable, table.count, maxSymbolValue, huffLog, workSpace, wkspSize) );
605
+ { CHECK_V_F(maxBits, HUF_buildCTable_wksp (CTable, count, maxSymbolValue, huffLog, workSpace, wkspSize) );
548
606
  huffLog = (U32)maxBits;
607
+ /* Zero the unused symbols so we can check it for validity */
608
+ memset(CTable + maxSymbolValue + 1, 0, CTableSize - (maxSymbolValue + 1) * sizeof(HUF_CElt));
549
609
  }
550
610
 
551
611
  /* Write table description header */
552
- { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table.CTable, maxSymbolValue, huffLog) );
553
- if (hSize + 12 >= srcSize) return 0; /* not useful to try compression */
612
+ { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, CTable, maxSymbolValue, huffLog) );
613
+ /* Check if using the previous table will be beneficial */
614
+ if (repeat && *repeat != HUF_repeat_none) {
615
+ size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, count, maxSymbolValue);
616
+ size_t const newSize = HUF_estimateCompressedSize(CTable, count, maxSymbolValue);
617
+ if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
618
+ return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
619
+ }
620
+ }
621
+ /* Use the new table */
622
+ if (hSize + 12ul >= srcSize) { return 0; }
554
623
  op += hSize;
624
+ if (repeat) { *repeat = HUF_repeat_none; }
625
+ if (oldHufTable) { memcpy(oldHufTable, CTable, CTableSize); } /* Save the new table */
555
626
  }
556
-
557
- /* Compress */
558
- { size_t const cSize = (singleStream) ?
559
- HUF_compress1X_usingCTable(op, oend - op, src, srcSize, table.CTable) : /* single segment */
560
- HUF_compress4X_usingCTable(op, oend - op, src, srcSize, table.CTable);
561
- if (HUF_isError(cSize)) return cSize;
562
- if (cSize==0) return 0; /* uncompressible */
563
- op += cSize;
564
- }
565
-
566
- /* check compressibility */
567
- if ((size_t)(op-ostart) >= srcSize-1)
568
- return 0;
569
-
570
- return op-ostart;
627
+ return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, CTable);
571
628
  }
572
629
 
573
630
 
@@ -576,7 +633,16 @@ size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
576
633
  unsigned maxSymbolValue, unsigned huffLog,
577
634
  void* workSpace, size_t wkspSize)
578
635
  {
579
- return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize);
636
+ return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, NULL, NULL, 0);
637
+ }
638
+
639
+ size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
640
+ const void* src, size_t srcSize,
641
+ unsigned maxSymbolValue, unsigned huffLog,
642
+ void* workSpace, size_t wkspSize,
643
+ HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat)
644
+ {
645
+ return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, hufTable, repeat, preferRepeat);
580
646
  }
581
647
 
582
648
  size_t HUF_compress1X (void* dst, size_t dstSize,
@@ -592,7 +658,16 @@ size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
592
658
  unsigned maxSymbolValue, unsigned huffLog,
593
659
  void* workSpace, size_t wkspSize)
594
660
  {
595
- return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize);
661
+ return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, NULL, NULL, 0);
662
+ }
663
+
664
+ size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
665
+ const void* src, size_t srcSize,
666
+ unsigned maxSymbolValue, unsigned huffLog,
667
+ void* workSpace, size_t wkspSize,
668
+ HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat)
669
+ {
670
+ return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, hufTable, repeat, preferRepeat);
596
671
  }
597
672
 
598
673
  size_t HUF_compress2 (void* dst, size_t dstSize,
@@ -13,8 +13,6 @@
13
13
  ***************************************/
14
14
  #include <string.h> /* memset */
15
15
  #include "mem.h"
16
- #define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
17
- #include "xxhash.h" /* XXH_reset, update, digest */
18
16
  #define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */
19
17
  #include "fse.h"
20
18
  #define HUF_STATIC_LINKING_ONLY
@@ -62,6 +60,7 @@ struct ZSTD_CCtx_s {
62
60
  U32 hashLog3; /* dispatch table : larger == faster, more memory */
63
61
  U32 loadedDictEnd; /* index of end of dictionary */
64
62
  U32 forceWindow; /* force back-references to respect limit of 1<<wLog, even for dictionary */
63
+ U32 forceRawDict; /* Force loading dictionary in "content-only" mode (no header analysis) */
65
64
  ZSTD_compressionStage_e stage;
66
65
  U32 rep[ZSTD_REP_NUM];
67
66
  U32 repToConfirm[ZSTD_REP_NUM];
@@ -80,10 +79,11 @@ struct ZSTD_CCtx_s {
80
79
  U32* chainTable;
81
80
  HUF_CElt* hufTable;
82
81
  U32 flagStaticTables;
82
+ HUF_repeat flagStaticHufTable;
83
83
  FSE_CTable offcodeCTable [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
84
84
  FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
85
85
  FSE_CTable litlengthCTable [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
86
- unsigned tmpCounters[1024];
86
+ unsigned tmpCounters[HUF_WORKSPACE_SIZE_U32];
87
87
  };
88
88
 
89
89
  ZSTD_CCtx* ZSTD_createCCtx(void)
@@ -124,6 +124,7 @@ size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned
124
124
  switch(param)
125
125
  {
126
126
  case ZSTD_p_forceWindow : cctx->forceWindow = value>0; cctx->loadedDictEnd = 0; return 0;
127
+ case ZSTD_p_forceRawDict : cctx->forceRawDict = value>0; return 0;
127
128
  default: return ERROR(parameter_unknown);
128
129
  }
129
130
  }
@@ -246,14 +247,17 @@ static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_parameters params, U64 fra
246
247
  typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset, ZSTDcrp_fullReset } ZSTD_compResetPolicy_e;
247
248
 
248
249
  /*! ZSTD_resetCCtx_advanced() :
249
- note : 'params' must be validated */
250
+ note : `params` must be validated */
250
251
  static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc,
251
252
  ZSTD_parameters params, U64 frameContentSize,
252
253
  ZSTD_compResetPolicy_e const crp)
253
254
  {
254
255
  if (crp == ZSTDcrp_continue)
255
- if (ZSTD_equivalentParams(params, zc->params))
256
+ if (ZSTD_equivalentParams(params, zc->params)) {
257
+ zc->flagStaticTables = 0;
258
+ zc->flagStaticHufTable = HUF_repeat_none;
256
259
  return ZSTD_continueCCtx(zc, params, frameContentSize);
260
+ }
257
261
 
258
262
  { size_t const blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, (size_t)1 << params.cParams.windowLog);
259
263
  U32 const divider = (params.cParams.searchLength==3) ? 3 : 4;
@@ -287,6 +291,7 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc,
287
291
  ptr = zc->hashTable3 + h3Size;
288
292
  zc->hufTable = (HUF_CElt*)ptr;
289
293
  zc->flagStaticTables = 0;
294
+ zc->flagStaticHufTable = HUF_repeat_none;
290
295
  ptr = ((U32*)ptr) + 256; /* note : HUF_CElt* is incomplete type, size is simulated using U32 */
291
296
 
292
297
  zc->nextToUpdate = 1;
@@ -344,8 +349,12 @@ size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long
344
349
  {
345
350
  if (srcCCtx->stage!=ZSTDcs_init) return ERROR(stage_wrong);
346
351
 
352
+
347
353
  memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem));
348
- ZSTD_resetCCtx_advanced(dstCCtx, srcCCtx->params, pledgedSrcSize, ZSTDcrp_noMemset);
354
+ { ZSTD_parameters params = srcCCtx->params;
355
+ params.fParams.contentSizeFlag = (pledgedSrcSize > 0);
356
+ ZSTD_resetCCtx_advanced(dstCCtx, params, pledgedSrcSize, ZSTDcrp_noMemset);
357
+ }
349
358
 
350
359
  /* copy tables */
351
360
  { size_t const chainSize = (srcCCtx->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << srcCCtx->params.cParams.chainLog);
@@ -368,12 +377,15 @@ size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long
368
377
 
369
378
  /* copy entropy tables */
370
379
  dstCCtx->flagStaticTables = srcCCtx->flagStaticTables;
380
+ dstCCtx->flagStaticHufTable = srcCCtx->flagStaticHufTable;
371
381
  if (srcCCtx->flagStaticTables) {
372
- memcpy(dstCCtx->hufTable, srcCCtx->hufTable, 256*4);
373
382
  memcpy(dstCCtx->litlengthCTable, srcCCtx->litlengthCTable, sizeof(dstCCtx->litlengthCTable));
374
383
  memcpy(dstCCtx->matchlengthCTable, srcCCtx->matchlengthCTable, sizeof(dstCCtx->matchlengthCTable));
375
384
  memcpy(dstCCtx->offcodeCTable, srcCCtx->offcodeCTable, sizeof(dstCCtx->offcodeCTable));
376
385
  }
386
+ if (srcCCtx->flagStaticHufTable) {
387
+ memcpy(dstCCtx->hufTable, srcCCtx->hufTable, 256*4);
388
+ }
377
389
 
378
390
  return 0;
379
391
  }
@@ -487,24 +499,28 @@ static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc,
487
499
 
488
500
  /* small ? don't even attempt compression (speed opt) */
489
501
  # define LITERAL_NOENTROPY 63
490
- { size_t const minLitSize = zc->flagStaticTables ? 6 : LITERAL_NOENTROPY;
502
+ { size_t const minLitSize = zc->flagStaticHufTable == HUF_repeat_valid ? 6 : LITERAL_NOENTROPY;
491
503
  if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
492
504
  }
493
505
 
494
506
  if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */
495
- if (zc->flagStaticTables && (lhSize==3)) {
496
- hType = set_repeat;
497
- singleStream = 1;
498
- cLitSize = HUF_compress1X_usingCTable(ostart+lhSize, dstCapacity-lhSize, src, srcSize, zc->hufTable);
499
- } else {
500
- cLitSize = singleStream ? HUF_compress1X_wksp(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, zc->tmpCounters, sizeof(zc->tmpCounters))
501
- : HUF_compress4X_wksp(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, zc->tmpCounters, sizeof(zc->tmpCounters));
507
+ { HUF_repeat repeat = zc->flagStaticHufTable;
508
+ int const preferRepeat = zc->params.cParams.strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
509
+ if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
510
+ cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, zc->tmpCounters, sizeof(zc->tmpCounters), zc->hufTable, &repeat, preferRepeat)
511
+ : HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, zc->tmpCounters, sizeof(zc->tmpCounters), zc->hufTable, &repeat, preferRepeat);
512
+ if (repeat != HUF_repeat_none) { hType = set_repeat; } /* reused the existing table */
513
+ else { zc->flagStaticHufTable = HUF_repeat_check; } /* now have a table to reuse */
502
514
  }
503
515
 
504
- if ((cLitSize==0) | (cLitSize >= srcSize - minGain))
516
+ if ((cLitSize==0) | (cLitSize >= srcSize - minGain)) {
517
+ zc->flagStaticHufTable = HUF_repeat_none;
505
518
  return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
506
- if (cLitSize==1)
519
+ }
520
+ if (cLitSize==1) {
521
+ zc->flagStaticHufTable = HUF_repeat_none;
507
522
  return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
523
+ }
508
524
 
509
525
  /* Build header */
510
526
  switch(lhSize)
@@ -572,11 +588,11 @@ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
572
588
  mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
573
589
  }
574
590
 
575
-
576
- size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
591
+ MEM_STATIC size_t ZSTD_compressSequences (ZSTD_CCtx* zc,
577
592
  void* dst, size_t dstCapacity,
578
593
  size_t srcSize)
579
594
  {
595
+ const int longOffsets = zc->params.cParams.windowLog > STREAM_ACCUMULATOR_MIN;
580
596
  const seqStore_t* seqStorePtr = &(zc->seqStore);
581
597
  U32 count[MaxSeq+1];
582
598
  S16 norm[MaxSeq+1];
@@ -710,7 +726,18 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
710
726
  if (MEM_32bits()) BIT_flushBits(&blockStream);
711
727
  BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
712
728
  if (MEM_32bits()) BIT_flushBits(&blockStream);
713
- BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
729
+ if (longOffsets) {
730
+ U32 const ofBits = ofCodeTable[nbSeq-1];
731
+ int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
732
+ if (extraBits) {
733
+ BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
734
+ BIT_flushBits(&blockStream);
735
+ }
736
+ BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits,
737
+ ofBits - extraBits);
738
+ } else {
739
+ BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
740
+ }
714
741
  BIT_flushBits(&blockStream);
715
742
 
716
743
  { size_t n;
@@ -732,7 +759,17 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
732
759
  if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
733
760
  BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
734
761
  if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/
735
- BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */
762
+ if (longOffsets) {
763
+ int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
764
+ if (extraBits) {
765
+ BIT_addBits(&blockStream, sequences[n].offset, extraBits);
766
+ BIT_flushBits(&blockStream); /* (7)*/
767
+ }
768
+ BIT_addBits(&blockStream, sequences[n].offset >> extraBits,
769
+ ofBits - extraBits); /* 31 */
770
+ } else {
771
+ BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */
772
+ }
736
773
  BIT_flushBits(&blockStream); /* (7)*/
737
774
  } }
738
775
 
@@ -747,9 +784,12 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
747
784
 
748
785
  /* check compressibility */
749
786
  _check_compressibility:
750
- { size_t const minGain = ZSTD_minGain(srcSize);
751
- size_t const maxCSize = srcSize - minGain;
752
- if ((size_t)(op-ostart) >= maxCSize) return 0; }
787
+ { size_t const minGain = ZSTD_minGain(srcSize);
788
+ size_t const maxCSize = srcSize - minGain;
789
+ if ((size_t)(op-ostart) >= maxCSize) {
790
+ zc->flagStaticHufTable = HUF_repeat_none;
791
+ return 0;
792
+ } }
753
793
 
754
794
  /* confirm repcodes */
755
795
  { int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->rep[i] = zc->repToConfirm[i]; }
@@ -757,7 +797,6 @@ _check_compressibility:
757
797
  return op - ostart;
758
798
  }
759
799
 
760
-
761
800
  #if 0 /* for debug */
762
801
  # define STORESEQ_DEBUG
763
802
  #include <stdio.h> /* fprintf */
@@ -1748,7 +1787,7 @@ static size_t ZSTD_BtFindBestMatch_selectMLS_extDict (
1748
1787
  #define NEXT_IN_CHAIN(d, mask) chainTable[(d) & mask]
1749
1788
 
1750
1789
  /* Update chains up to ip (excluded)
1751
- Assumption : always within prefix (ie. not within extDict) */
1790
+ Assumption : always within prefix (i.e. not within extDict) */
1752
1791
  FORCE_INLINE
1753
1792
  U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls)
1754
1793
  {
@@ -2308,7 +2347,7 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
2308
2347
  if (remaining < blockSize) blockSize = remaining;
2309
2348
 
2310
2349
  /* preemptive overflow correction */
2311
- if (cctx->lowLimit > (2U<<30)) {
2350
+ if (cctx->lowLimit > (3U<<29)) {
2312
2351
  U32 const cycleMask = (1 << ZSTD_cycleLog(cctx->params.cParams.hashLog, cctx->params.cParams.strategy)) - 1;
2313
2352
  U32 const current = (U32)(ip - cctx->base);
2314
2353
  U32 const newCurrent = (current & cycleMask) + (1 << cctx->params.cParams.windowLog);
@@ -2362,7 +2401,7 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
2362
2401
  U32 const dictIDSizeCode = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */
2363
2402
  U32 const checksumFlag = params.fParams.checksumFlag>0;
2364
2403
  U32 const windowSize = 1U << params.cParams.windowLog;
2365
- U32 const singleSegment = params.fParams.contentSizeFlag && (windowSize > (pledgedSrcSize-1));
2404
+ U32 const singleSegment = params.fParams.contentSizeFlag && (windowSize >= pledgedSrcSize);
2366
2405
  BYTE const windowLogByte = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3);
2367
2406
  U32 const fcsCode = params.fParams.contentSizeFlag ?
2368
2407
  (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : /* 0-3 */
@@ -2508,7 +2547,7 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t
2508
2547
  return ERROR(GENERIC); /* strategy doesn't exist; impossible */
2509
2548
  }
2510
2549
 
2511
- zc->nextToUpdate = zc->loadedDictEnd;
2550
+ zc->nextToUpdate = (U32)(iend - zc->base);
2512
2551
  return 0;
2513
2552
  }
2514
2553
 
@@ -2600,6 +2639,7 @@ static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* cctx, const void* dict, size_
2600
2639
  }
2601
2640
 
2602
2641
  cctx->flagStaticTables = 1;
2642
+ cctx->flagStaticHufTable = HUF_repeat_valid;
2603
2643
  return dictPtr - (const BYTE*)dict;
2604
2644
  }
2605
2645
 
@@ -2609,8 +2649,9 @@ static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* zc, const void* dict, si
2609
2649
  {
2610
2650
  if ((dict==NULL) || (dictSize<=8)) return 0;
2611
2651
 
2612
- /* default : dict is pure content */
2613
- if (MEM_readLE32(dict) != ZSTD_DICT_MAGIC) return ZSTD_loadDictionaryContent(zc, dict, dictSize);
2652
+ /* dict as pure content */
2653
+ if ((MEM_readLE32(dict) != ZSTD_DICT_MAGIC) || (zc->forceRawDict))
2654
+ return ZSTD_loadDictionaryContent(zc, dict, dictSize);
2614
2655
  zc->dictID = zc->params.fParams.noDictIDFlag ? 0 : MEM_readLE32((const char*)dict+4);
2615
2656
 
2616
2657
  /* known magic number : dict is parsed for entropy stats and content */
@@ -2782,7 +2823,7 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, u
2782
2823
 
2783
2824
  if (!cdict || !cctx) {
2784
2825
  ZSTD_free(cdict, customMem);
2785
- ZSTD_free(cctx, customMem);
2826
+ ZSTD_freeCCtx(cctx);
2786
2827
  return NULL;
2787
2828
  }
2788
2829
 
@@ -2800,8 +2841,8 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, u
2800
2841
  { size_t const errorCode = ZSTD_compressBegin_advanced(cctx, cdict->dictContent, dictSize, params, 0);
2801
2842
  if (ZSTD_isError(errorCode)) {
2802
2843
  ZSTD_free(cdict->dictBuffer, customMem);
2803
- ZSTD_free(cctx, customMem);
2804
2844
  ZSTD_free(cdict, customMem);
2845
+ ZSTD_freeCCtx(cctx);
2805
2846
  return NULL;
2806
2847
  } }
2807
2848
 
@@ -2845,7 +2886,11 @@ static ZSTD_parameters ZSTD_getParamsFromCDict(const ZSTD_CDict* cdict) {
2845
2886
  size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, unsigned long long pledgedSrcSize)
2846
2887
  {
2847
2888
  if (cdict->dictContentSize) CHECK_F(ZSTD_copyCCtx(cctx, cdict->refContext, pledgedSrcSize))
2848
- else CHECK_F(ZSTD_compressBegin_advanced(cctx, NULL, 0, cdict->refContext->params, pledgedSrcSize));
2889
+ else {
2890
+ ZSTD_parameters params = cdict->refContext->params;
2891
+ params.fParams.contentSizeFlag = (pledgedSrcSize > 0);
2892
+ CHECK_F(ZSTD_compressBegin_advanced(cctx, NULL, 0, params, pledgedSrcSize));
2893
+ }
2849
2894
  return 0;
2850
2895
  }
2851
2896
 
@@ -2939,7 +2984,7 @@ size_t ZSTD_freeCStream(ZSTD_CStream* zcs)
2939
2984
  size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX; }
2940
2985
  size_t ZSTD_CStreamOutSize(void) { return ZSTD_compressBound(ZSTD_BLOCKSIZE_ABSOLUTEMAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ; }
2941
2986
 
2942
- size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize)
2987
+ static size_t ZSTD_resetCStream_internal(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize)
2943
2988
  {
2944
2989
  if (zcs->inBuffSize==0) return ERROR(stage_wrong); /* zcs has not been init at least once => can't reset */
2945
2990
 
@@ -2957,6 +3002,14 @@ size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize)
2957
3002
  return 0; /* ready to go */
2958
3003
  }
2959
3004
 
3005
+ size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize)
3006
+ {
3007
+
3008
+ zcs->params.fParams.contentSizeFlag = (pledgedSrcSize > 0);
3009
+
3010
+ return ZSTD_resetCStream_internal(zcs, pledgedSrcSize);
3011
+ }
3012
+
2960
3013
  size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
2961
3014
  const void* dict, size_t dictSize,
2962
3015
  ZSTD_parameters params, unsigned long long pledgedSrcSize)
@@ -2988,7 +3041,7 @@ size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
2988
3041
  zcs->checksum = params.fParams.checksumFlag > 0;
2989
3042
  zcs->params = params;
2990
3043
 
2991
- return ZSTD_resetCStream(zcs, pledgedSrcSize);
3044
+ return ZSTD_resetCStream_internal(zcs, pledgedSrcSize);
2992
3045
  }
2993
3046
 
2994
3047
  /* note : cdict must outlive compression session */
@@ -3022,7 +3075,7 @@ size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel)
3022
3075
  size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs)
3023
3076
  {
3024
3077
  if (zcs==NULL) return 0; /* support sizeof on NULL */
3025
- return sizeof(zcs) + ZSTD_sizeof_CCtx(zcs->cctx) + ZSTD_sizeof_CDict(zcs->cdictLocal) + zcs->outBuffSize + zcs->inBuffSize;
3078
+ return sizeof(*zcs) + ZSTD_sizeof_CCtx(zcs->cctx) + ZSTD_sizeof_CDict(zcs->cdictLocal) + zcs->outBuffSize + zcs->inBuffSize;
3026
3079
  }
3027
3080
 
3028
3081
  /*====== Compression ======*/
@@ -203,7 +203,7 @@ MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const B
203
203
 
204
204
 
205
205
  /* Update hashTable3 up to ip (excluded)
206
- Assumption : always within prefix (ie. not within extDict) */
206
+ Assumption : always within prefix (i.e. not within extDict) */
207
207
  FORCE_INLINE
208
208
  U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* zc, const BYTE* ip)
209
209
  {
@@ -25,8 +25,6 @@
25
25
  #include "threading.h" /* mutex */
26
26
  #include "zstd_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
27
27
  #include "zstdmt_compress.h"
28
- #define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
29
- #include "xxhash.h"
30
28
 
31
29
 
32
30
  /* ====== Debug ====== */
@@ -231,16 +229,17 @@ void ZSTDMT_compressChunk(void* jobDescription)
231
229
  const void* const src = (const char*)job->srcStart + job->dictSize;
232
230
  buffer_t const dstBuff = job->dstBuff;
233
231
  DEBUGLOG(3, "job (first:%u) (last:%u) : dictSize %u, srcSize %u", job->firstChunk, job->lastChunk, (U32)job->dictSize, (U32)job->srcSize);
234
- if (job->cdict) {
232
+ if (job->cdict) { /* should only happen for first segment */
235
233
  size_t const initError = ZSTD_compressBegin_usingCDict(job->cctx, job->cdict, job->fullFrameSize);
236
234
  if (job->cdict) DEBUGLOG(3, "using CDict ");
237
235
  if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; }
238
- } else {
239
- size_t const initError = ZSTD_compressBegin_advanced(job->cctx, job->srcStart, job->dictSize, job->params, job->fullFrameSize);
240
- if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; }
236
+ } else { /* srcStart points at reloaded section */
237
+ size_t const dictModeError = ZSTD_setCCtxParameter(job->cctx, ZSTD_p_forceRawDict, 1); /* Force loading dictionary in "content-only" mode (no header analysis) */
238
+ size_t const initError = ZSTD_compressBegin_advanced(job->cctx, job->srcStart, job->dictSize, job->params, 0);
239
+ if (ZSTD_isError(initError) || ZSTD_isError(dictModeError)) { job->cSize = initError; goto _endJob; }
241
240
  ZSTD_setCCtxParameter(job->cctx, ZSTD_p_forceWindow, 1);
242
241
  }
243
- if (!job->firstChunk) { /* flush frame header */
242
+ if (!job->firstChunk) { /* flush and overwrite frame header when it's not first segment */
244
243
  size_t const hSize = ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, 0);
245
244
  if (ZSTD_isError(hSize)) { job->cSize = hSize; goto _endJob; }
246
245
  ZSTD_invalidateRepCodes(job->cctx);
@@ -248,7 +247,7 @@ void ZSTDMT_compressChunk(void* jobDescription)
248
247
 
249
248
  DEBUGLOG(4, "Compressing : ");
250
249
  DEBUG_PRINTHEX(4, job->srcStart, 12);
251
- job->cSize = (job->lastChunk) ? /* last chunk signal */
250
+ job->cSize = (job->lastChunk) ?
252
251
  ZSTD_compressEnd (job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize) :
253
252
  ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize);
254
253
  DEBUGLOG(3, "compressed %u bytes into %u bytes (first:%u) (last:%u)", (unsigned)job->srcSize, (unsigned)job->cSize, job->firstChunk, job->lastChunk);