extzstd 0.1 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/HISTORY.ja +5 -0
  3. data/README.md +5 -5
  4. data/contrib/zstd/CONTRIBUTING.md +42 -0
  5. data/contrib/zstd/LICENSE-examples +11 -0
  6. data/contrib/zstd/Makefile +315 -0
  7. data/contrib/zstd/NEWS +261 -0
  8. data/contrib/zstd/PATENTS +33 -0
  9. data/contrib/zstd/README.md +121 -41
  10. data/contrib/zstd/TESTING.md +44 -0
  11. data/contrib/zstd/appveyor.yml +178 -0
  12. data/contrib/zstd/circle.yml +75 -0
  13. data/contrib/zstd/lib/BUCK +186 -0
  14. data/contrib/zstd/lib/Makefile +163 -0
  15. data/contrib/zstd/lib/README.md +77 -0
  16. data/contrib/zstd/{common → lib/common}/bitstream.h +7 -4
  17. data/contrib/zstd/{common → lib/common}/entropy_common.c +19 -23
  18. data/contrib/zstd/{common → lib/common}/error_private.c +0 -0
  19. data/contrib/zstd/{common → lib/common}/error_private.h +0 -0
  20. data/contrib/zstd/{common → lib/common}/fse.h +94 -34
  21. data/contrib/zstd/{common → lib/common}/fse_decompress.c +18 -19
  22. data/contrib/zstd/{common → lib/common}/huf.h +52 -20
  23. data/contrib/zstd/{common → lib/common}/mem.h +17 -13
  24. data/contrib/zstd/lib/common/pool.c +194 -0
  25. data/contrib/zstd/lib/common/pool.h +56 -0
  26. data/contrib/zstd/lib/common/threading.c +80 -0
  27. data/contrib/zstd/lib/common/threading.h +104 -0
  28. data/contrib/zstd/{common → lib/common}/xxhash.c +3 -1
  29. data/contrib/zstd/{common → lib/common}/xxhash.h +11 -15
  30. data/contrib/zstd/{common → lib/common}/zstd_common.c +1 -11
  31. data/contrib/zstd/{common → lib/common}/zstd_errors.h +16 -2
  32. data/contrib/zstd/{common → lib/common}/zstd_internal.h +17 -1
  33. data/contrib/zstd/{compress → lib/compress}/fse_compress.c +138 -91
  34. data/contrib/zstd/{compress → lib/compress}/huf_compress.c +218 -67
  35. data/contrib/zstd/{compress → lib/compress}/zstd_compress.c +231 -108
  36. data/contrib/zstd/{compress → lib/compress}/zstd_opt.h +44 -25
  37. data/contrib/zstd/lib/compress/zstdmt_compress.c +739 -0
  38. data/contrib/zstd/lib/compress/zstdmt_compress.h +78 -0
  39. data/contrib/zstd/{decompress → lib/decompress}/huf_decompress.c +28 -23
  40. data/contrib/zstd/{decompress → lib/decompress}/zstd_decompress.c +814 -176
  41. data/contrib/zstd/{common → lib/deprecated}/zbuff.h +60 -39
  42. data/contrib/zstd/lib/deprecated/zbuff_common.c +26 -0
  43. data/contrib/zstd/lib/deprecated/zbuff_compress.c +145 -0
  44. data/contrib/zstd/lib/deprecated/zbuff_decompress.c +74 -0
  45. data/contrib/zstd/lib/dictBuilder/cover.c +1029 -0
  46. data/contrib/zstd/{dictBuilder → lib/dictBuilder}/divsufsort.c +0 -0
  47. data/contrib/zstd/{dictBuilder → lib/dictBuilder}/divsufsort.h +0 -0
  48. data/contrib/zstd/{dictBuilder → lib/dictBuilder}/zdict.c +68 -18
  49. data/contrib/zstd/lib/dictBuilder/zdict.h +201 -0
  50. data/contrib/zstd/{legacy → lib/legacy}/zstd_legacy.h +122 -7
  51. data/contrib/zstd/{legacy → lib/legacy}/zstd_v01.c +34 -3
  52. data/contrib/zstd/{legacy → lib/legacy}/zstd_v01.h +8 -0
  53. data/contrib/zstd/{legacy → lib/legacy}/zstd_v02.c +45 -12
  54. data/contrib/zstd/{legacy → lib/legacy}/zstd_v02.h +8 -0
  55. data/contrib/zstd/{legacy → lib/legacy}/zstd_v03.c +45 -12
  56. data/contrib/zstd/{legacy → lib/legacy}/zstd_v03.h +8 -0
  57. data/contrib/zstd/{legacy → lib/legacy}/zstd_v04.c +56 -33
  58. data/contrib/zstd/{legacy → lib/legacy}/zstd_v04.h +8 -0
  59. data/contrib/zstd/{legacy → lib/legacy}/zstd_v05.c +45 -18
  60. data/contrib/zstd/{legacy → lib/legacy}/zstd_v05.h +7 -0
  61. data/contrib/zstd/{legacy → lib/legacy}/zstd_v06.c +43 -16
  62. data/contrib/zstd/{legacy → lib/legacy}/zstd_v06.h +7 -0
  63. data/contrib/zstd/{legacy → lib/legacy}/zstd_v07.c +57 -23
  64. data/contrib/zstd/{legacy → lib/legacy}/zstd_v07.h +8 -0
  65. data/contrib/zstd/lib/libzstd.pc.in +14 -0
  66. data/contrib/zstd/{zstd.h → lib/zstd.h} +206 -71
  67. data/ext/depend +2 -0
  68. data/ext/extconf.rb +4 -4
  69. data/ext/extzstd.c +1 -1
  70. data/ext/zstd_common.c +5 -5
  71. data/ext/zstd_compress.c +3 -3
  72. data/ext/zstd_decompress.c +2 -2
  73. data/ext/zstd_dictbuilder.c +2 -2
  74. data/ext/zstd_legacy_v01.c +1 -1
  75. data/ext/zstd_legacy_v02.c +1 -1
  76. data/ext/zstd_legacy_v03.c +1 -1
  77. data/ext/zstd_legacy_v04.c +1 -1
  78. data/ext/zstd_legacy_v05.c +1 -1
  79. data/ext/zstd_legacy_v06.c +1 -1
  80. data/ext/zstd_legacy_v07.c +1 -1
  81. data/gemstub.rb +9 -5
  82. data/lib/extzstd/version.rb +1 -1
  83. metadata +73 -51
  84. data/contrib/zstd/compress/zbuff_compress.c +0 -319
  85. data/contrib/zstd/decompress/zbuff_decompress.c +0 -252
  86. data/contrib/zstd/dictBuilder/zdict.h +0 -111
@@ -13,8 +13,6 @@
13
13
  ***************************************/
14
14
  #include <string.h> /* memset */
15
15
  #include "mem.h"
16
- #define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
17
- #include "xxhash.h" /* XXH_reset, update, digest */
18
16
  #define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */
19
17
  #include "fse.h"
20
18
  #define HUF_STATIC_LINKING_ONLY
@@ -33,6 +31,7 @@ typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZS
33
31
  /*-*************************************
34
32
  * Helper functions
35
33
  ***************************************/
34
+ #define ZSTD_STATIC_ASSERT(c) { enum { ZSTD_static_assert = 1/(int)(!!(c)) }; }
36
35
  size_t ZSTD_compressBound(size_t srcSize) { return FSE_compressBound(srcSize) + 12; }
37
36
 
38
37
 
@@ -50,8 +49,7 @@ static void ZSTD_resetSeqStore(seqStore_t* ssPtr)
50
49
  /*-*************************************
51
50
  * Context memory management
52
51
  ***************************************/
53
- struct ZSTD_CCtx_s
54
- {
52
+ struct ZSTD_CCtx_s {
55
53
  const BYTE* nextSrc; /* next block here to continue on current prefix */
56
54
  const BYTE* base; /* All regular indexes relative to this position */
57
55
  const BYTE* dictBase; /* extDict indexes relative to this position */
@@ -60,10 +58,12 @@ struct ZSTD_CCtx_s
60
58
  U32 nextToUpdate; /* index from which to continue dictionary update */
61
59
  U32 nextToUpdate3; /* index from which to continue dictionary update */
62
60
  U32 hashLog3; /* dispatch table : larger == faster, more memory */
63
- U32 loadedDictEnd;
61
+ U32 loadedDictEnd; /* index of end of dictionary */
62
+ U32 forceWindow; /* force back-references to respect limit of 1<<wLog, even for dictionary */
63
+ U32 forceRawDict; /* Force loading dictionary in "content-only" mode (no header analysis) */
64
64
  ZSTD_compressionStage_e stage;
65
65
  U32 rep[ZSTD_REP_NUM];
66
- U32 savedRep[ZSTD_REP_NUM];
66
+ U32 repToConfirm[ZSTD_REP_NUM];
67
67
  U32 dictID;
68
68
  ZSTD_parameters params;
69
69
  void* workSpace;
@@ -79,9 +79,11 @@ struct ZSTD_CCtx_s
79
79
  U32* chainTable;
80
80
  HUF_CElt* hufTable;
81
81
  U32 flagStaticTables;
82
+ HUF_repeat flagStaticHufTable;
82
83
  FSE_CTable offcodeCTable [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
83
84
  FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
84
85
  FSE_CTable litlengthCTable [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
86
+ unsigned tmpCounters[HUF_WORKSPACE_SIZE_U32];
85
87
  };
86
88
 
87
89
  ZSTD_CCtx* ZSTD_createCCtx(void)
@@ -99,7 +101,7 @@ ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem)
99
101
  cctx = (ZSTD_CCtx*) ZSTD_malloc(sizeof(ZSTD_CCtx), customMem);
100
102
  if (!cctx) return NULL;
101
103
  memset(cctx, 0, sizeof(ZSTD_CCtx));
102
- memcpy(&(cctx->customMem), &customMem, sizeof(customMem));
104
+ cctx->customMem = customMem;
103
105
  return cctx;
104
106
  }
105
107
 
@@ -117,6 +119,16 @@ size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx)
117
119
  return sizeof(*cctx) + cctx->workSpaceSize;
118
120
  }
119
121
 
122
+ size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned value)
123
+ {
124
+ switch(param)
125
+ {
126
+ case ZSTD_p_forceWindow : cctx->forceWindow = value>0; cctx->loadedDictEnd = 0; return 0;
127
+ case ZSTD_p_forceRawDict : cctx->forceRawDict = value>0; return 0;
128
+ default: return ERROR(parameter_unknown);
129
+ }
130
+ }
131
+
120
132
  const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) /* hidden interface */
121
133
  {
122
134
  return &(ctx->seqStore);
@@ -147,6 +159,14 @@ size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
147
159
  }
148
160
 
149
161
 
162
+ /** ZSTD_cycleLog() :
163
+ * condition for correct operation : hashLog > 1 */
164
+ static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat)
165
+ {
166
+ U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2);
167
+ return hashLog - btScale;
168
+ }
169
+
150
170
  /** ZSTD_adjustCParams() :
151
171
  optimize `cPar` for a given input (`srcSize` and `dictSize`).
152
172
  mostly downsizing to reduce memory consumption and initialization.
@@ -165,9 +185,9 @@ ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, u
165
185
  if (cPar.windowLog > srcLog) cPar.windowLog = srcLog;
166
186
  } }
167
187
  if (cPar.hashLog > cPar.windowLog) cPar.hashLog = cPar.windowLog;
168
- { U32 const btPlus = (cPar.strategy == ZSTD_btlazy2) | (cPar.strategy == ZSTD_btopt) | (cPar.strategy == ZSTD_btopt2);
169
- U32 const maxChainLog = cPar.windowLog+btPlus;
170
- if (cPar.chainLog > maxChainLog) cPar.chainLog = maxChainLog; } /* <= ZSTD_CHAINLOG_MAX */
188
+ { U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy);
189
+ if (cycleLog > cPar.windowLog) cPar.chainLog -= (cycleLog - cPar.windowLog);
190
+ }
171
191
 
172
192
  if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* required for frame header */
173
193
 
@@ -227,14 +247,17 @@ static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_parameters params, U64 fra
227
247
  typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset, ZSTDcrp_fullReset } ZSTD_compResetPolicy_e;
228
248
 
229
249
  /*! ZSTD_resetCCtx_advanced() :
230
- note : 'params' must be validated */
250
+ note : `params` must be validated */
231
251
  static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc,
232
252
  ZSTD_parameters params, U64 frameContentSize,
233
253
  ZSTD_compResetPolicy_e const crp)
234
254
  {
235
255
  if (crp == ZSTDcrp_continue)
236
- if (ZSTD_equivalentParams(params, zc->params))
256
+ if (ZSTD_equivalentParams(params, zc->params)) {
257
+ zc->flagStaticTables = 0;
258
+ zc->flagStaticHufTable = HUF_repeat_none;
237
259
  return ZSTD_continueCCtx(zc, params, frameContentSize);
260
+ }
238
261
 
239
262
  { size_t const blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, (size_t)1 << params.cParams.windowLog);
240
263
  U32 const divider = (params.cParams.searchLength==3) ? 3 : 4;
@@ -268,6 +291,7 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc,
268
291
  ptr = zc->hashTable3 + h3Size;
269
292
  zc->hufTable = (HUF_CElt*)ptr;
270
293
  zc->flagStaticTables = 0;
294
+ zc->flagStaticHufTable = HUF_repeat_none;
271
295
  ptr = ((U32*)ptr) + 256; /* note : HUF_CElt* is incomplete type, size is simulated using U32 */
272
296
 
273
297
  zc->nextToUpdate = 1;
@@ -308,6 +332,14 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc,
308
332
  }
309
333
  }
310
334
 
335
+ /* ZSTD_invalidateRepCodes() :
336
+ * ensures next compression will not use repcodes from previous block.
337
+ * Note : only works with regular variant;
338
+ * do not use with extDict variant ! */
339
+ void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) {
340
+ int i;
341
+ for (i=0; i<ZSTD_REP_NUM; i++) cctx->rep[i] = 0;
342
+ }
311
343
 
312
344
  /*! ZSTD_copyCCtx() :
313
345
  * Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
@@ -317,8 +349,12 @@ size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long
317
349
  {
318
350
  if (srcCCtx->stage!=ZSTDcs_init) return ERROR(stage_wrong);
319
351
 
352
+
320
353
  memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem));
321
- ZSTD_resetCCtx_advanced(dstCCtx, srcCCtx->params, pledgedSrcSize, ZSTDcrp_noMemset);
354
+ { ZSTD_parameters params = srcCCtx->params;
355
+ params.fParams.contentSizeFlag = (pledgedSrcSize > 0);
356
+ ZSTD_resetCCtx_advanced(dstCCtx, params, pledgedSrcSize, ZSTDcrp_noMemset);
357
+ }
322
358
 
323
359
  /* copy tables */
324
360
  { size_t const chainSize = (srcCCtx->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << srcCCtx->params.cParams.chainLog);
@@ -341,12 +377,15 @@ size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long
341
377
 
342
378
  /* copy entropy tables */
343
379
  dstCCtx->flagStaticTables = srcCCtx->flagStaticTables;
380
+ dstCCtx->flagStaticHufTable = srcCCtx->flagStaticHufTable;
344
381
  if (srcCCtx->flagStaticTables) {
345
- memcpy(dstCCtx->hufTable, srcCCtx->hufTable, 256*4);
346
382
  memcpy(dstCCtx->litlengthCTable, srcCCtx->litlengthCTable, sizeof(dstCCtx->litlengthCTable));
347
383
  memcpy(dstCCtx->matchlengthCTable, srcCCtx->matchlengthCTable, sizeof(dstCCtx->matchlengthCTable));
348
384
  memcpy(dstCCtx->offcodeCTable, srcCCtx->offcodeCTable, sizeof(dstCCtx->offcodeCTable));
349
385
  }
386
+ if (srcCCtx->flagStaticHufTable) {
387
+ memcpy(dstCCtx->hufTable, srcCCtx->hufTable, 256*4);
388
+ }
350
389
 
351
390
  return 0;
352
391
  }
@@ -460,24 +499,28 @@ static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc,
460
499
 
461
500
  /* small ? don't even attempt compression (speed opt) */
462
501
  # define LITERAL_NOENTROPY 63
463
- { size_t const minLitSize = zc->flagStaticTables ? 6 : LITERAL_NOENTROPY;
502
+ { size_t const minLitSize = zc->flagStaticHufTable == HUF_repeat_valid ? 6 : LITERAL_NOENTROPY;
464
503
  if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
465
504
  }
466
505
 
467
506
  if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */
468
- if (zc->flagStaticTables && (lhSize==3)) {
469
- hType = set_repeat;
470
- singleStream = 1;
471
- cLitSize = HUF_compress1X_usingCTable(ostart+lhSize, dstCapacity-lhSize, src, srcSize, zc->hufTable);
472
- } else {
473
- cLitSize = singleStream ? HUF_compress1X(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11)
474
- : HUF_compress2 (ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11);
507
+ { HUF_repeat repeat = zc->flagStaticHufTable;
508
+ int const preferRepeat = zc->params.cParams.strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
509
+ if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
510
+ cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, zc->tmpCounters, sizeof(zc->tmpCounters), zc->hufTable, &repeat, preferRepeat)
511
+ : HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, zc->tmpCounters, sizeof(zc->tmpCounters), zc->hufTable, &repeat, preferRepeat);
512
+ if (repeat != HUF_repeat_none) { hType = set_repeat; } /* reused the existing table */
513
+ else { zc->flagStaticHufTable = HUF_repeat_check; } /* now have a table to reuse */
475
514
  }
476
515
 
477
- if ((cLitSize==0) | (cLitSize >= srcSize - minGain))
516
+ if ((cLitSize==0) | (cLitSize >= srcSize - minGain)) {
517
+ zc->flagStaticHufTable = HUF_repeat_none;
478
518
  return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
479
- if (cLitSize==1)
519
+ }
520
+ if (cLitSize==1) {
521
+ zc->flagStaticHufTable = HUF_repeat_none;
480
522
  return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
523
+ }
481
524
 
482
525
  /* Build header */
483
526
  switch(lhSize)
@@ -545,11 +588,11 @@ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
545
588
  mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
546
589
  }
547
590
 
548
-
549
- size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
591
+ MEM_STATIC size_t ZSTD_compressSequences (ZSTD_CCtx* zc,
550
592
  void* dst, size_t dstCapacity,
551
593
  size_t srcSize)
552
594
  {
595
+ const int longOffsets = zc->params.cParams.windowLog > STREAM_ACCUMULATOR_MIN;
553
596
  const seqStore_t* seqStorePtr = &(zc->seqStore);
554
597
  U32 count[MaxSeq+1];
555
598
  S16 norm[MaxSeq+1];
@@ -566,6 +609,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
566
609
  BYTE* op = ostart;
567
610
  size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
568
611
  BYTE* seqHead;
612
+ BYTE scratchBuffer[1<<MAX(MLFSELog,LLFSELog)];
569
613
 
570
614
  /* Compress literals */
571
615
  { const BYTE* const literals = seqStorePtr->litStart;
@@ -593,7 +637,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
593
637
 
594
638
  /* CTable for Literal Lengths */
595
639
  { U32 max = MaxLL;
596
- size_t const mostFrequent = FSE_countFast(count, &max, llCodeTable, nbSeq);
640
+ size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, zc->tmpCounters);
597
641
  if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
598
642
  *op++ = llCodeTable[0];
599
643
  FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
@@ -601,7 +645,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
601
645
  } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
602
646
  LLtype = set_repeat;
603
647
  } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LL_defaultNormLog-1)))) {
604
- FSE_buildCTable(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog);
648
+ FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
605
649
  LLtype = set_basic;
606
650
  } else {
607
651
  size_t nbSeq_1 = nbSeq;
@@ -611,13 +655,13 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
611
655
  { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
612
656
  if (FSE_isError(NCountSize)) return ERROR(GENERIC);
613
657
  op += NCountSize; }
614
- FSE_buildCTable(CTable_LitLength, norm, max, tableLog);
658
+ FSE_buildCTable_wksp(CTable_LitLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
615
659
  LLtype = set_compressed;
616
660
  } }
617
661
 
618
662
  /* CTable for Offsets */
619
663
  { U32 max = MaxOff;
620
- size_t const mostFrequent = FSE_countFast(count, &max, ofCodeTable, nbSeq);
664
+ size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, zc->tmpCounters);
621
665
  if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
622
666
  *op++ = ofCodeTable[0];
623
667
  FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
@@ -625,7 +669,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
625
669
  } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
626
670
  Offtype = set_repeat;
627
671
  } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (OF_defaultNormLog-1)))) {
628
- FSE_buildCTable(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog);
672
+ FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
629
673
  Offtype = set_basic;
630
674
  } else {
631
675
  size_t nbSeq_1 = nbSeq;
@@ -635,13 +679,13 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
635
679
  { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
636
680
  if (FSE_isError(NCountSize)) return ERROR(GENERIC);
637
681
  op += NCountSize; }
638
- FSE_buildCTable(CTable_OffsetBits, norm, max, tableLog);
682
+ FSE_buildCTable_wksp(CTable_OffsetBits, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
639
683
  Offtype = set_compressed;
640
684
  } }
641
685
 
642
686
  /* CTable for MatchLengths */
643
687
  { U32 max = MaxML;
644
- size_t const mostFrequent = FSE_countFast(count, &max, mlCodeTable, nbSeq);
688
+ size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, zc->tmpCounters);
645
689
  if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
646
690
  *op++ = *mlCodeTable;
647
691
  FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
@@ -649,7 +693,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
649
693
  } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
650
694
  MLtype = set_repeat;
651
695
  } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (ML_defaultNormLog-1)))) {
652
- FSE_buildCTable(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog);
696
+ FSE_buildCTable_wksp(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
653
697
  MLtype = set_basic;
654
698
  } else {
655
699
  size_t nbSeq_1 = nbSeq;
@@ -659,7 +703,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
659
703
  { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
660
704
  if (FSE_isError(NCountSize)) return ERROR(GENERIC);
661
705
  op += NCountSize; }
662
- FSE_buildCTable(CTable_MatchLength, norm, max, tableLog);
706
+ FSE_buildCTable_wksp(CTable_MatchLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
663
707
  MLtype = set_compressed;
664
708
  } }
665
709
 
@@ -682,7 +726,18 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
682
726
  if (MEM_32bits()) BIT_flushBits(&blockStream);
683
727
  BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
684
728
  if (MEM_32bits()) BIT_flushBits(&blockStream);
685
- BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
729
+ if (longOffsets) {
730
+ U32 const ofBits = ofCodeTable[nbSeq-1];
731
+ int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
732
+ if (extraBits) {
733
+ BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
734
+ BIT_flushBits(&blockStream);
735
+ }
736
+ BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits,
737
+ ofBits - extraBits);
738
+ } else {
739
+ BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
740
+ }
686
741
  BIT_flushBits(&blockStream);
687
742
 
688
743
  { size_t n;
@@ -704,7 +759,17 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
704
759
  if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
705
760
  BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
706
761
  if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/
707
- BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */
762
+ if (longOffsets) {
763
+ int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
764
+ if (extraBits) {
765
+ BIT_addBits(&blockStream, sequences[n].offset, extraBits);
766
+ BIT_flushBits(&blockStream); /* (7)*/
767
+ }
768
+ BIT_addBits(&blockStream, sequences[n].offset >> extraBits,
769
+ ofBits - extraBits); /* 31 */
770
+ } else {
771
+ BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */
772
+ }
708
773
  BIT_flushBits(&blockStream); /* (7)*/
709
774
  } }
710
775
 
@@ -719,16 +784,25 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
719
784
 
720
785
  /* check compressibility */
721
786
  _check_compressibility:
722
- { size_t const minGain = ZSTD_minGain(srcSize);
723
- size_t const maxCSize = srcSize - minGain;
724
- if ((size_t)(op-ostart) >= maxCSize) return 0; }
787
+ { size_t const minGain = ZSTD_minGain(srcSize);
788
+ size_t const maxCSize = srcSize - minGain;
789
+ if ((size_t)(op-ostart) >= maxCSize) {
790
+ zc->flagStaticHufTable = HUF_repeat_none;
791
+ return 0;
792
+ } }
725
793
 
726
794
  /* confirm repcodes */
727
- { int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->rep[i] = zc->savedRep[i]; }
795
+ { int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->rep[i] = zc->repToConfirm[i]; }
728
796
 
729
797
  return op - ostart;
730
798
  }
731
799
 
800
+ #if 0 /* for debug */
801
+ # define STORESEQ_DEBUG
802
+ #include <stdio.h> /* fprintf */
803
+ U32 g_startDebug = 0;
804
+ const BYTE* g_start = NULL;
805
+ #endif
732
806
 
733
807
  /*! ZSTD_storeSeq() :
734
808
  Store a sequence (literal length, literals, offset code and match length code) into seqStore_t.
@@ -737,13 +811,14 @@ _check_compressibility:
737
811
  */
738
812
  MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t matchCode)
739
813
  {
740
- #if 0 /* for debug */
741
- static const BYTE* g_start = NULL;
742
- const U32 pos = (U32)(literals - g_start);
743
- if (g_start==NULL) g_start = literals;
744
- //if ((pos > 1) && (pos < 50000))
745
- printf("Cpos %6u :%5u literals & match %3u bytes at distance %6u \n",
746
- pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode);
814
+ #ifdef STORESEQ_DEBUG
815
+ if (g_startDebug) {
816
+ const U32 pos = (U32)((const BYTE*)literals - g_start);
817
+ if (g_start==NULL) g_start = (const BYTE*)literals;
818
+ if ((pos > 1895000) && (pos < 1895300))
819
+ fprintf(stderr, "Cpos %6u :%5u literals & match %3u bytes at distance %6u \n",
820
+ pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode);
821
+ }
747
822
  #endif
748
823
  /* copy Literals */
749
824
  ZSTD_wildcopy(seqStorePtr->lit, literals, litLength);
@@ -993,8 +1068,8 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
993
1068
  } } }
994
1069
 
995
1070
  /* save reps for next block */
996
- cctx->savedRep[0] = offset_1 ? offset_1 : offsetSaved;
997
- cctx->savedRep[1] = offset_2 ? offset_2 : offsetSaved;
1071
+ cctx->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved;
1072
+ cctx->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved;
998
1073
 
999
1074
  /* Last Literals */
1000
1075
  { size_t const lastLLSize = iend - anchor;
@@ -1108,7 +1183,7 @@ static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
1108
1183
  } } }
1109
1184
 
1110
1185
  /* save reps for next block */
1111
- ctx->savedRep[0] = offset_1; ctx->savedRep[1] = offset_2;
1186
+ ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2;
1112
1187
 
1113
1188
  /* Last Literals */
1114
1189
  { size_t const lastLLSize = iend - anchor;
@@ -1262,8 +1337,8 @@ void ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx,
1262
1337
  } } }
1263
1338
 
1264
1339
  /* save reps for next block */
1265
- cctx->savedRep[0] = offset_1 ? offset_1 : offsetSaved;
1266
- cctx->savedRep[1] = offset_2 ? offset_2 : offsetSaved;
1340
+ cctx->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved;
1341
+ cctx->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved;
1267
1342
 
1268
1343
  /* Last Literals */
1269
1344
  { size_t const lastLLSize = iend - anchor;
@@ -1412,7 +1487,7 @@ static void ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx,
1412
1487
  } } }
1413
1488
 
1414
1489
  /* save reps for next block */
1415
- ctx->savedRep[0] = offset_1; ctx->savedRep[1] = offset_2;
1490
+ ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2;
1416
1491
 
1417
1492
  /* Last Literals */
1418
1493
  { size_t const lastLLSize = iend - anchor;
@@ -1482,8 +1557,9 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co
1482
1557
  hashTable[h] = current; /* Update Hash Table */
1483
1558
 
1484
1559
  while (nbCompares-- && (matchIndex > windowLow)) {
1485
- U32* nextPtr = bt + 2*(matchIndex & btMask);
1560
+ U32* const nextPtr = bt + 2*(matchIndex & btMask);
1486
1561
  size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
1562
+
1487
1563
  #ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */
1488
1564
  const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */
1489
1565
  if (matchIndex == predictedSmall) {
@@ -1579,7 +1655,7 @@ static size_t ZSTD_insertBtAndFindBestMatch (
1579
1655
  hashTable[h] = current; /* Update Hash Table */
1580
1656
 
1581
1657
  while (nbCompares-- && (matchIndex > windowLow)) {
1582
- U32* nextPtr = bt + 2*(matchIndex & btMask);
1658
+ U32* const nextPtr = bt + 2*(matchIndex & btMask);
1583
1659
  size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
1584
1660
  const BYTE* match;
1585
1661
 
@@ -1711,7 +1787,7 @@ static size_t ZSTD_BtFindBestMatch_selectMLS_extDict (
1711
1787
  #define NEXT_IN_CHAIN(d, mask) chainTable[(d) & mask]
1712
1788
 
1713
1789
  /* Update chains up to ip (excluded)
1714
- Assumption : always within prefix (ie. not within extDict) */
1790
+ Assumption : always within prefix (i.e. not within extDict) */
1715
1791
  FORCE_INLINE
1716
1792
  U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls)
1717
1793
  {
@@ -1943,8 +2019,8 @@ _storeSequence:
1943
2019
  } }
1944
2020
 
1945
2021
  /* Save reps for next block */
1946
- ctx->savedRep[0] = offset_1 ? offset_1 : savedOffset;
1947
- ctx->savedRep[1] = offset_2 ? offset_2 : savedOffset;
2022
+ ctx->repToConfirm[0] = offset_1 ? offset_1 : savedOffset;
2023
+ ctx->repToConfirm[1] = offset_2 ? offset_2 : savedOffset;
1948
2024
 
1949
2025
  /* Last Literals */
1950
2026
  { size_t const lastLLSize = iend - anchor;
@@ -2138,7 +2214,7 @@ _storeSequence:
2138
2214
  } }
2139
2215
 
2140
2216
  /* Save reps for next block */
2141
- ctx->savedRep[0] = offset_1; ctx->savedRep[1] = offset_2;
2217
+ ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2;
2142
2218
 
2143
2219
  /* Last Literals */
2144
2220
  { size_t const lastLLSize = iend - anchor;
@@ -2271,16 +2347,16 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
2271
2347
  if (remaining < blockSize) blockSize = remaining;
2272
2348
 
2273
2349
  /* preemptive overflow correction */
2274
- if (cctx->lowLimit > (1<<30)) {
2275
- U32 const btplus = (cctx->params.cParams.strategy == ZSTD_btlazy2) | (cctx->params.cParams.strategy == ZSTD_btopt) | (cctx->params.cParams.strategy == ZSTD_btopt2);
2276
- U32 const chainMask = (1 << (cctx->params.cParams.chainLog - btplus)) - 1;
2277
- U32 const supLog = MAX(cctx->params.cParams.chainLog, 17 /* blockSize */);
2278
- U32 const newLowLimit = (cctx->lowLimit & chainMask) + (1 << supLog); /* preserve position % chainSize, ensure current-repcode doesn't underflow */
2279
- U32 const correction = cctx->lowLimit - newLowLimit;
2350
+ if (cctx->lowLimit > (3U<<29)) {
2351
+ U32 const cycleMask = (1 << ZSTD_cycleLog(cctx->params.cParams.hashLog, cctx->params.cParams.strategy)) - 1;
2352
+ U32 const current = (U32)(ip - cctx->base);
2353
+ U32 const newCurrent = (current & cycleMask) + (1 << cctx->params.cParams.windowLog);
2354
+ U32 const correction = current - newCurrent;
2355
+ ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_64 <= 30);
2280
2356
  ZSTD_reduceIndex(cctx, correction);
2281
2357
  cctx->base += correction;
2282
2358
  cctx->dictBase += correction;
2283
- cctx->lowLimit = newLowLimit;
2359
+ cctx->lowLimit -= correction;
2284
2360
  cctx->dictLimit -= correction;
2285
2361
  if (cctx->nextToUpdate < correction) cctx->nextToUpdate = 0;
2286
2362
  else cctx->nextToUpdate -= correction;
@@ -2325,7 +2401,7 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
2325
2401
  U32 const dictIDSizeCode = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */
2326
2402
  U32 const checksumFlag = params.fParams.checksumFlag>0;
2327
2403
  U32 const windowSize = 1U << params.cParams.windowLog;
2328
- U32 const singleSegment = params.fParams.contentSizeFlag && (windowSize > (pledgedSrcSize-1));
2404
+ U32 const singleSegment = params.fParams.contentSizeFlag && (windowSize >= pledgedSrcSize);
2329
2405
  BYTE const windowLogByte = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3);
2330
2406
  U32 const fcsCode = params.fParams.contentSizeFlag ?
2331
2407
  (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : /* 0-3 */
@@ -2397,12 +2473,14 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
2397
2473
 
2398
2474
  cctx->nextSrc = ip + srcSize;
2399
2475
 
2400
- { size_t const cSize = frame ?
2476
+ if (srcSize) {
2477
+ size_t const cSize = frame ?
2401
2478
  ZSTD_compress_generic (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) :
2402
2479
  ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize);
2403
2480
  if (ZSTD_isError(cSize)) return cSize;
2404
2481
  return cSize + fhSize;
2405
- }
2482
+ } else
2483
+ return fhSize;
2406
2484
  }
2407
2485
 
2408
2486
 
@@ -2438,7 +2516,7 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t
2438
2516
  zc->dictBase = zc->base;
2439
2517
  zc->base += ip - zc->nextSrc;
2440
2518
  zc->nextToUpdate = zc->dictLimit;
2441
- zc->loadedDictEnd = (U32)(iend - zc->base);
2519
+ zc->loadedDictEnd = zc->forceWindow ? 0 : (U32)(iend - zc->base);
2442
2520
 
2443
2521
  zc->nextSrc = iend;
2444
2522
  if (srcSize <= HASH_READ_SIZE) return 0;
@@ -2469,7 +2547,7 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t
2469
2547
  return ERROR(GENERIC); /* strategy doesn't exist; impossible */
2470
2548
  }
2471
2549
 
2472
- zc->nextToUpdate = zc->loadedDictEnd;
2550
+ zc->nextToUpdate = (U32)(iend - zc->base);
2473
2551
  return 0;
2474
2552
  }
2475
2553
 
@@ -2506,6 +2584,7 @@ static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* cctx, const void* dict, size_
2506
2584
  const BYTE* const dictEnd = dictPtr + dictSize;
2507
2585
  short offcodeNCount[MaxOff+1];
2508
2586
  unsigned offcodeMaxValue = MaxOff;
2587
+ BYTE scratchBuffer[1<<MAX(MLFSELog,LLFSELog)];
2509
2588
 
2510
2589
  { size_t const hufHeaderSize = HUF_readCTable(cctx->hufTable, 255, dict, dictSize);
2511
2590
  if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted);
@@ -2517,7 +2596,7 @@ static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* cctx, const void* dict, size_
2517
2596
  if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
2518
2597
  if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
2519
2598
  /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
2520
- CHECK_E (FSE_buildCTable(cctx->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog), dictionary_corrupted);
2599
+ CHECK_E (FSE_buildCTable_wksp(cctx->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog, scratchBuffer, sizeof(scratchBuffer)), dictionary_corrupted);
2521
2600
  dictPtr += offcodeHeaderSize;
2522
2601
  }
2523
2602
 
@@ -2528,7 +2607,7 @@ static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* cctx, const void* dict, size_
2528
2607
  if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
2529
2608
  /* Every match length code must have non-zero probability */
2530
2609
  CHECK_F (ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML));
2531
- CHECK_E (FSE_buildCTable(cctx->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog), dictionary_corrupted);
2610
+ CHECK_E (FSE_buildCTable_wksp(cctx->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, scratchBuffer, sizeof(scratchBuffer)), dictionary_corrupted);
2532
2611
  dictPtr += matchlengthHeaderSize;
2533
2612
  }
2534
2613
 
@@ -2539,14 +2618,14 @@ static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* cctx, const void* dict, size_
2539
2618
  if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
2540
2619
  /* Every literal length code must have non-zero probability */
2541
2620
  CHECK_F (ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL));
2542
- CHECK_E(FSE_buildCTable(cctx->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog), dictionary_corrupted);
2621
+ CHECK_E(FSE_buildCTable_wksp(cctx->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog, scratchBuffer, sizeof(scratchBuffer)), dictionary_corrupted);
2543
2622
  dictPtr += litlengthHeaderSize;
2544
2623
  }
2545
2624
 
2546
2625
  if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
2547
- cctx->rep[0] = MEM_readLE32(dictPtr+0); if (cctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
2548
- cctx->rep[1] = MEM_readLE32(dictPtr+4); if (cctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
2549
- cctx->rep[2] = MEM_readLE32(dictPtr+8); if (cctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
2626
+ cctx->rep[0] = MEM_readLE32(dictPtr+0); if (cctx->rep[0] == 0 || cctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
2627
+ cctx->rep[1] = MEM_readLE32(dictPtr+4); if (cctx->rep[1] == 0 || cctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
2628
+ cctx->rep[2] = MEM_readLE32(dictPtr+8); if (cctx->rep[2] == 0 || cctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
2550
2629
  dictPtr += 12;
2551
2630
 
2552
2631
  { U32 offcodeMax = MaxOff;
@@ -2560,6 +2639,7 @@ static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* cctx, const void* dict, size_
2560
2639
  }
2561
2640
 
2562
2641
  cctx->flagStaticTables = 1;
2642
+ cctx->flagStaticHufTable = HUF_repeat_valid;
2563
2643
  return dictPtr - (const BYTE*)dict;
2564
2644
  }
2565
2645
 
@@ -2569,8 +2649,9 @@ static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* zc, const void* dict, si
2569
2649
  {
2570
2650
  if ((dict==NULL) || (dictSize<=8)) return 0;
2571
2651
 
2572
- /* default : dict is pure content */
2573
- if (MEM_readLE32(dict) != ZSTD_DICT_MAGIC) return ZSTD_loadDictionaryContent(zc, dict, dictSize);
2652
+ /* dict as pure content */
2653
+ if ((MEM_readLE32(dict) != ZSTD_DICT_MAGIC) || (zc->forceRawDict))
2654
+ return ZSTD_loadDictionaryContent(zc, dict, dictSize);
2574
2655
  zc->dictID = zc->params.fParams.noDictIDFlag ? 0 : MEM_readLE32((const char*)dict+4);
2575
2656
 
2576
2657
  /* known magic number : dict is parsed for entropy stats and content */
@@ -2581,7 +2662,6 @@ static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* zc, const void* dict, si
2581
2662
  }
2582
2663
  }
2583
2664
 
2584
-
2585
2665
  /*! ZSTD_compressBegin_internal() :
2586
2666
  * @return : 0, or an error code */
2587
2667
  static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
@@ -2613,9 +2693,9 @@ size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t di
2613
2693
  }
2614
2694
 
2615
2695
 
2616
- size_t ZSTD_compressBegin(ZSTD_CCtx* zc, int compressionLevel)
2696
+ size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel)
2617
2697
  {
2618
- return ZSTD_compressBegin_usingDict(zc, NULL, 0, compressionLevel);
2698
+ return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel);
2619
2699
  }
2620
2700
 
2621
2701
 
@@ -2695,7 +2775,7 @@ size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx,
2695
2775
 
2696
2776
  size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize, int compressionLevel)
2697
2777
  {
2698
- ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, dictSize);
2778
+ ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, dict ? dictSize : 0);
2699
2779
  params.fParams.contentSizeFlag = 1;
2700
2780
  return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params);
2701
2781
  }
@@ -2720,7 +2800,8 @@ size_t ZSTD_compress(void* dst, size_t dstCapacity, const void* src, size_t srcS
2720
2800
  /* ===== Dictionary API ===== */
2721
2801
 
2722
2802
  struct ZSTD_CDict_s {
2723
- void* dictContent;
2803
+ void* dictBuffer;
2804
+ const void* dictContent;
2724
2805
  size_t dictContentSize;
2725
2806
  ZSTD_CCtx* refContext;
2726
2807
  }; /* typedef'd tp ZSTD_CDict within "zstd.h" */
@@ -2728,39 +2809,45 @@ struct ZSTD_CDict_s {
2728
2809
  size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict)
2729
2810
  {
2730
2811
  if (cdict==NULL) return 0; /* support sizeof on NULL */
2731
- return ZSTD_sizeof_CCtx(cdict->refContext) + cdict->dictContentSize;
2812
+ return ZSTD_sizeof_CCtx(cdict->refContext) + (cdict->dictBuffer ? cdict->dictContentSize : 0) + sizeof(*cdict);
2732
2813
  }
2733
2814
 
2734
- ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, ZSTD_parameters params, ZSTD_customMem customMem)
2815
+ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, unsigned byReference,
2816
+ ZSTD_parameters params, ZSTD_customMem customMem)
2735
2817
  {
2736
2818
  if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem;
2737
2819
  if (!customMem.customAlloc || !customMem.customFree) return NULL;
2738
2820
 
2739
2821
  { ZSTD_CDict* const cdict = (ZSTD_CDict*) ZSTD_malloc(sizeof(ZSTD_CDict), customMem);
2740
- void* const dictContent = ZSTD_malloc(dictSize, customMem);
2741
2822
  ZSTD_CCtx* const cctx = ZSTD_createCCtx_advanced(customMem);
2742
2823
 
2743
- if (!dictContent || !cdict || !cctx) {
2744
- ZSTD_free(dictContent, customMem);
2824
+ if (!cdict || !cctx) {
2745
2825
  ZSTD_free(cdict, customMem);
2746
- ZSTD_free(cctx, customMem);
2826
+ ZSTD_freeCCtx(cctx);
2747
2827
  return NULL;
2748
2828
  }
2749
2829
 
2750
- if (dictSize) {
2751
- memcpy(dictContent, dict, dictSize);
2830
+ if ((byReference) || (!dictBuffer) || (!dictSize)) {
2831
+ cdict->dictBuffer = NULL;
2832
+ cdict->dictContent = dictBuffer;
2833
+ } else {
2834
+ void* const internalBuffer = ZSTD_malloc(dictSize, customMem);
2835
+ if (!internalBuffer) { ZSTD_free(cctx, customMem); ZSTD_free(cdict, customMem); return NULL; }
2836
+ memcpy(internalBuffer, dictBuffer, dictSize);
2837
+ cdict->dictBuffer = internalBuffer;
2838
+ cdict->dictContent = internalBuffer;
2752
2839
  }
2753
- { size_t const errorCode = ZSTD_compressBegin_advanced(cctx, dictContent, dictSize, params, 0);
2840
+
2841
+ { size_t const errorCode = ZSTD_compressBegin_advanced(cctx, cdict->dictContent, dictSize, params, 0);
2754
2842
  if (ZSTD_isError(errorCode)) {
2755
- ZSTD_free(dictContent, customMem);
2843
+ ZSTD_free(cdict->dictBuffer, customMem);
2756
2844
  ZSTD_free(cdict, customMem);
2757
- ZSTD_free(cctx, customMem);
2845
+ ZSTD_freeCCtx(cctx);
2758
2846
  return NULL;
2759
2847
  } }
2760
2848
 
2761
- cdict->dictContent = dictContent;
2762
- cdict->dictContentSize = dictSize;
2763
2849
  cdict->refContext = cctx;
2850
+ cdict->dictContentSize = dictSize;
2764
2851
  return cdict;
2765
2852
  }
2766
2853
  }
@@ -2770,7 +2857,15 @@ ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionL
2770
2857
  ZSTD_customMem const allocator = { NULL, NULL, NULL };
2771
2858
  ZSTD_parameters params = ZSTD_getParams(compressionLevel, 0, dictSize);
2772
2859
  params.fParams.contentSizeFlag = 1;
2773
- return ZSTD_createCDict_advanced(dict, dictSize, params, allocator);
2860
+ return ZSTD_createCDict_advanced(dict, dictSize, 0, params, allocator);
2861
+ }
2862
+
2863
+ ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel)
2864
+ {
2865
+ ZSTD_customMem const allocator = { NULL, NULL, NULL };
2866
+ ZSTD_parameters params = ZSTD_getParams(compressionLevel, 0, dictSize);
2867
+ params.fParams.contentSizeFlag = 1;
2868
+ return ZSTD_createCDict_advanced(dict, dictSize, 1, params, allocator);
2774
2869
  }
2775
2870
 
2776
2871
  size_t ZSTD_freeCDict(ZSTD_CDict* cdict)
@@ -2778,7 +2873,7 @@ size_t ZSTD_freeCDict(ZSTD_CDict* cdict)
2778
2873
  if (cdict==NULL) return 0; /* support free on NULL */
2779
2874
  { ZSTD_customMem const cMem = cdict->refContext->customMem;
2780
2875
  ZSTD_freeCCtx(cdict->refContext);
2781
- ZSTD_free(cdict->dictContent, cMem);
2876
+ ZSTD_free(cdict->dictBuffer, cMem);
2782
2877
  ZSTD_free(cdict, cMem);
2783
2878
  return 0;
2784
2879
  }
@@ -2788,10 +2883,14 @@ static ZSTD_parameters ZSTD_getParamsFromCDict(const ZSTD_CDict* cdict) {
2788
2883
  return ZSTD_getParamsFromCCtx(cdict->refContext);
2789
2884
  }
2790
2885
 
2791
- size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, U64 pledgedSrcSize)
2886
+ size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, unsigned long long pledgedSrcSize)
2792
2887
  {
2793
2888
  if (cdict->dictContentSize) CHECK_F(ZSTD_copyCCtx(cctx, cdict->refContext, pledgedSrcSize))
2794
- else CHECK_F(ZSTD_compressBegin_advanced(cctx, NULL, 0, cdict->refContext->params, pledgedSrcSize));
2889
+ else {
2890
+ ZSTD_parameters params = cdict->refContext->params;
2891
+ params.fParams.contentSizeFlag = (pledgedSrcSize > 0);
2892
+ CHECK_F(ZSTD_compressBegin_advanced(cctx, NULL, 0, params, pledgedSrcSize));
2893
+ }
2795
2894
  return 0;
2796
2895
  }
2797
2896
 
@@ -2839,6 +2938,8 @@ struct ZSTD_CStream_s {
2839
2938
  ZSTD_cStreamStage stage;
2840
2939
  U32 checksum;
2841
2940
  U32 frameEnded;
2941
+ U64 pledgedSrcSize;
2942
+ U64 inputProcessed;
2842
2943
  ZSTD_parameters params;
2843
2944
  ZSTD_customMem customMem;
2844
2945
  }; /* typedef'd to ZSTD_CStream within "zstd.h" */
@@ -2883,9 +2984,9 @@ size_t ZSTD_freeCStream(ZSTD_CStream* zcs)
2883
2984
  size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX; }
2884
2985
  size_t ZSTD_CStreamOutSize(void) { return ZSTD_compressBound(ZSTD_BLOCKSIZE_ABSOLUTEMAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ; }
2885
2986
 
2886
- size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize)
2987
+ static size_t ZSTD_resetCStream_internal(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize)
2887
2988
  {
2888
- if (zcs->inBuffSize==0) return ERROR(stage_wrong); /* zcs has not been init at least once */
2989
+ if (zcs->inBuffSize==0) return ERROR(stage_wrong); /* zcs has not been init at least once => can't reset */
2889
2990
 
2890
2991
  if (zcs->cdict) CHECK_F(ZSTD_compressBegin_usingCDict(zcs->cctx, zcs->cdict, pledgedSrcSize))
2891
2992
  else CHECK_F(ZSTD_compressBegin_advanced(zcs->cctx, NULL, 0, zcs->params, pledgedSrcSize));
@@ -2896,9 +2997,19 @@ size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize)
2896
2997
  zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0;
2897
2998
  zcs->stage = zcss_load;
2898
2999
  zcs->frameEnded = 0;
3000
+ zcs->pledgedSrcSize = pledgedSrcSize;
3001
+ zcs->inputProcessed = 0;
2899
3002
  return 0; /* ready to go */
2900
3003
  }
2901
3004
 
3005
+ size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize)
3006
+ {
3007
+
3008
+ zcs->params.fParams.contentSizeFlag = (pledgedSrcSize > 0);
3009
+
3010
+ return ZSTD_resetCStream_internal(zcs, pledgedSrcSize);
3011
+ }
3012
+
2902
3013
  size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
2903
3014
  const void* dict, size_t dictSize,
2904
3015
  ZSTD_parameters params, unsigned long long pledgedSrcSize)
@@ -2920,9 +3031,9 @@ size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
2920
3031
  if (zcs->outBuff == NULL) return ERROR(memory_allocation);
2921
3032
  }
2922
3033
 
2923
- if (dict) {
3034
+ if (dict && dictSize >= 8) {
2924
3035
  ZSTD_freeCDict(zcs->cdictLocal);
2925
- zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, params, zcs->customMem);
3036
+ zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, 0, params, zcs->customMem);
2926
3037
  if (zcs->cdictLocal == NULL) return ERROR(memory_allocation);
2927
3038
  zcs->cdict = zcs->cdictLocal;
2928
3039
  } else zcs->cdict = NULL;
@@ -2930,7 +3041,7 @@ size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
2930
3041
  zcs->checksum = params.fParams.checksumFlag > 0;
2931
3042
  zcs->params = params;
2932
3043
 
2933
- return ZSTD_resetCStream(zcs, pledgedSrcSize);
3044
+ return ZSTD_resetCStream_internal(zcs, pledgedSrcSize);
2934
3045
  }
2935
3046
 
2936
3047
  /* note : cdict must outlive compression session */
@@ -2939,6 +3050,7 @@ size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict)
2939
3050
  ZSTD_parameters const params = ZSTD_getParamsFromCDict(cdict);
2940
3051
  size_t const initError = ZSTD_initCStream_advanced(zcs, NULL, 0, params, 0);
2941
3052
  zcs->cdict = cdict;
3053
+ zcs->cctx->dictID = params.fParams.noDictIDFlag ? 0 : cdict->refContext->dictID;
2942
3054
  return initError;
2943
3055
  }
2944
3056
 
@@ -2948,6 +3060,13 @@ size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t di
2948
3060
  return ZSTD_initCStream_advanced(zcs, dict, dictSize, params, 0);
2949
3061
  }
2950
3062
 
3063
+ size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize)
3064
+ {
3065
+ ZSTD_parameters params = ZSTD_getParams(compressionLevel, pledgedSrcSize, 0);
3066
+ if (pledgedSrcSize) params.fParams.contentSizeFlag = 1;
3067
+ return ZSTD_initCStream_advanced(zcs, NULL, 0, params, pledgedSrcSize);
3068
+ }
3069
+
2951
3070
  size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel)
2952
3071
  {
2953
3072
  return ZSTD_initCStream_usingDict(zcs, NULL, 0, compressionLevel);
@@ -2956,7 +3075,7 @@ size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel)
2956
3075
  size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs)
2957
3076
  {
2958
3077
  if (zcs==NULL) return 0; /* support sizeof on NULL */
2959
- return sizeof(zcs) + ZSTD_sizeof_CCtx(zcs->cctx) + ZSTD_sizeof_CDict(zcs->cdictLocal) + zcs->outBuffSize + zcs->inBuffSize;
3078
+ return sizeof(*zcs) + ZSTD_sizeof_CCtx(zcs->cctx) + ZSTD_sizeof_CDict(zcs->cdictLocal) + zcs->outBuffSize + zcs->inBuffSize;
2960
3079
  }
2961
3080
 
2962
3081
  /*====== Compression ======*/
@@ -3044,6 +3163,7 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
3044
3163
 
3045
3164
  *srcSizePtr = ip - istart;
3046
3165
  *dstCapacityPtr = op - ostart;
3166
+ zcs->inputProcessed += *srcSizePtr;
3047
3167
  if (zcs->frameEnded) return 0;
3048
3168
  { size_t hintInSize = zcs->inBuffTarget - zcs->inBuffPos;
3049
3169
  if (hintInSize==0) hintInSize = zcs->blockSize;
@@ -3088,6 +3208,9 @@ size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
3088
3208
  BYTE* const oend = (BYTE*)(output->dst) + output->size;
3089
3209
  BYTE* op = ostart;
3090
3210
 
3211
+ if ((zcs->pledgedSrcSize) && (zcs->inputProcessed != zcs->pledgedSrcSize))
3212
+ return ERROR(srcSize_wrong); /* pledgedSrcSize not respected */
3213
+
3091
3214
  if (zcs->stage != zcss_final) {
3092
3215
  /* flush whatever remains */
3093
3216
  size_t srcSize = 0;