zstd-ruby 1.3.3.0 → 1.3.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +5 -5
  2. data/README.md +1 -1
  3. data/ext/zstdruby/libzstd/BUCK +13 -0
  4. data/ext/zstdruby/libzstd/README.md +32 -25
  5. data/ext/zstdruby/libzstd/common/bitstream.h +1 -1
  6. data/ext/zstdruby/libzstd/common/compiler.h +25 -0
  7. data/ext/zstdruby/libzstd/common/cpu.h +216 -0
  8. data/ext/zstdruby/libzstd/common/error_private.c +1 -0
  9. data/ext/zstdruby/libzstd/common/fse.h +1 -1
  10. data/ext/zstdruby/libzstd/common/fse_decompress.c +2 -2
  11. data/ext/zstdruby/libzstd/common/huf.h +114 -89
  12. data/ext/zstdruby/libzstd/common/pool.c +46 -17
  13. data/ext/zstdruby/libzstd/common/pool.h +18 -9
  14. data/ext/zstdruby/libzstd/common/threading.h +12 -12
  15. data/ext/zstdruby/libzstd/common/zstd_errors.h +16 -7
  16. data/ext/zstdruby/libzstd/common/zstd_internal.h +4 -5
  17. data/ext/zstdruby/libzstd/compress/fse_compress.c +19 -11
  18. data/ext/zstdruby/libzstd/compress/huf_compress.c +160 -62
  19. data/ext/zstdruby/libzstd/compress/zstd_compress.c +973 -644
  20. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +281 -34
  21. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +80 -62
  22. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +11 -4
  23. data/ext/zstdruby/libzstd/compress/zstd_fast.c +87 -71
  24. data/ext/zstdruby/libzstd/compress/zstd_fast.h +10 -6
  25. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +333 -274
  26. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +33 -16
  27. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +305 -359
  28. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +64 -21
  29. data/ext/zstdruby/libzstd/compress/zstd_opt.c +194 -56
  30. data/ext/zstdruby/libzstd/compress/zstd_opt.h +17 -5
  31. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +1131 -449
  32. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +32 -16
  33. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +390 -290
  34. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +777 -439
  35. data/ext/zstdruby/libzstd/dictBuilder/cover.c +11 -8
  36. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +83 -50
  37. data/ext/zstdruby/libzstd/dictBuilder/zdict.h +44 -43
  38. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +2 -0
  39. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +42 -118
  40. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +2 -2
  41. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +2 -2
  42. data/ext/zstdruby/libzstd/zstd.h +254 -254
  43. data/lib/zstd-ruby/version.rb +1 -1
  44. metadata +4 -3
@@ -14,8 +14,9 @@
14
14
  *****************************************************************/
15
15
  /*!
16
16
  * HEAPMODE :
17
- * Select how default decompression function ZSTD_decompress() will allocate memory,
18
- * in memory stack (0), or in memory heap (1, requires malloc())
17
+ * Select how default decompression function ZSTD_decompress() allocates its context,
18
+ * on stack (0), or into heap (1, default; requires malloc()).
19
+ * Note that functions with explicit context such as ZSTD_decompressDCtx() are unaffected.
19
20
  */
20
21
  #ifndef ZSTD_HEAPMODE
21
22
  # define ZSTD_HEAPMODE 1
@@ -23,17 +24,18 @@
23
24
 
24
25
  /*!
25
26
  * LEGACY_SUPPORT :
26
- * if set to 1, ZSTD_decompress() can decode older formats (v0.1+)
27
+ * if set to 1+, ZSTD_decompress() can decode older formats (v0.1+)
27
28
  */
28
29
  #ifndef ZSTD_LEGACY_SUPPORT
29
30
  # define ZSTD_LEGACY_SUPPORT 0
30
31
  #endif
31
32
 
32
33
  /*!
33
- * MAXWINDOWSIZE_DEFAULT :
34
- * maximum window size accepted by DStream, by default.
35
- * Frames requiring more memory will be rejected.
36
- */
34
+ * MAXWINDOWSIZE_DEFAULT :
35
+ * maximum window size accepted by DStream __by default__.
36
+ * Frames requiring more memory will be rejected.
37
+ * It's possible to set a different limit using ZSTD_DCtx_setMaxWindowSize().
38
+ */
37
39
  #ifndef ZSTD_MAXWINDOWSIZE_DEFAULT
38
40
  # define ZSTD_MAXWINDOWSIZE_DEFAULT (((U32)1 << ZSTD_WINDOWLOG_DEFAULTMAX) + 1)
39
41
  #endif
@@ -43,6 +45,7 @@
43
45
  * Dependencies
44
46
  *********************************************************/
45
47
  #include <string.h> /* memcpy, memmove, memset */
48
+ #include "cpu.h"
46
49
  #include "mem.h" /* low level memory routines */
47
50
  #define FSE_STATIC_LINKING_ONLY
48
51
  #include "fse.h"
@@ -80,10 +83,25 @@ typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
80
83
  typedef enum { zdss_init=0, zdss_loadHeader,
81
84
  zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage;
82
85
 
86
+
87
+ typedef struct {
88
+ U32 fastMode;
89
+ U32 tableLog;
90
+ } ZSTD_seqSymbol_header;
91
+
92
+ typedef struct {
93
+ U16 nextState;
94
+ BYTE nbAdditionalBits;
95
+ BYTE nbBits;
96
+ U32 baseValue;
97
+ } ZSTD_seqSymbol;
98
+
99
+ #define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log)))
100
+
83
101
  typedef struct {
84
- FSE_DTable LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
85
- FSE_DTable OFTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
86
- FSE_DTable MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
102
+ ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)];
103
+ ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)];
104
+ ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)];
87
105
  HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
88
106
  U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
89
107
  U32 rep[ZSTD_REP_NUM];
@@ -91,9 +109,9 @@ typedef struct {
91
109
 
92
110
  struct ZSTD_DCtx_s
93
111
  {
94
- const FSE_DTable* LLTptr;
95
- const FSE_DTable* MLTptr;
96
- const FSE_DTable* OFTptr;
112
+ const ZSTD_seqSymbol* LLTptr;
113
+ const ZSTD_seqSymbol* MLTptr;
114
+ const ZSTD_seqSymbol* OFTptr;
97
115
  const HUF_DTable* HUFptr;
98
116
  ZSTD_entropyDTables_t entropy;
99
117
  const void* previousDstEnd; /* detect continuity */
@@ -116,6 +134,7 @@ struct ZSTD_DCtx_s
116
134
  size_t litSize;
117
135
  size_t rleSize;
118
136
  size_t staticSize;
137
+ int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
119
138
 
120
139
  /* streaming */
121
140
  ZSTD_DDict* ddictLocal;
@@ -173,6 +192,7 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
173
192
  dctx->inBuffSize = 0;
174
193
  dctx->outBuffSize = 0;
175
194
  dctx->streamStage = zdss_init;
195
+ dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
176
196
  }
177
197
 
178
198
  ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize)
@@ -204,6 +224,7 @@ ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem)
204
224
 
205
225
  ZSTD_DCtx* ZSTD_createDCtx(void)
206
226
  {
227
+ DEBUGLOG(3, "ZSTD_createDCtx");
207
228
  return ZSTD_createDCtx_advanced(ZSTD_defaultCMem);
208
229
  }
209
230
 
@@ -234,8 +255,8 @@ void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
234
255
 
235
256
 
236
257
  /*-*************************************************************
237
- * Decompression section
238
- ***************************************************************/
258
+ * Frame header decoding
259
+ ***************************************************************/
239
260
 
240
261
  /*! ZSTD_isFrame() :
241
262
  * Tells if the content of `buffer` starts with a valid Frame Identifier.
@@ -257,7 +278,7 @@ unsigned ZSTD_isFrame(const void* buffer, size_t size)
257
278
 
258
279
  /** ZSTD_frameHeaderSize_internal() :
259
280
  * srcSize must be large enough to reach header size fields.
260
- * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless
281
+ * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless.
261
282
  * @return : size of the Frame Header
262
283
  * or an error code, which can be tested with ZSTD_isError() */
263
284
  static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZSTD_format_e format)
@@ -480,6 +501,10 @@ static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t he
480
501
  }
481
502
 
482
503
 
504
+ /*-*************************************************************
505
+ * Block decoding
506
+ ***************************************************************/
507
+
483
508
  /*! ZSTD_getcBlockSize() :
484
509
  * Provides the size of compressed block from block header `src` */
485
510
  size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
@@ -566,13 +591,13 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
566
591
 
567
592
  if (HUF_isError((litEncType==set_repeat) ?
568
593
  ( singleStream ?
569
- HUF_decompress1X_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr) :
570
- HUF_decompress4X_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr) ) :
594
+ HUF_decompress1X_usingDTable_bmi2(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr, dctx->bmi2) :
595
+ HUF_decompress4X_usingDTable_bmi2(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr, dctx->bmi2) ) :
571
596
  ( singleStream ?
572
- HUF_decompress1X2_DCtx_wksp(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize,
573
- dctx->entropy.workspace, sizeof(dctx->entropy.workspace)) :
574
- HUF_decompress4X_hufOnly_wksp(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize,
575
- dctx->entropy.workspace, sizeof(dctx->entropy.workspace)))))
597
+ HUF_decompress1X2_DCtx_wksp_bmi2(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize,
598
+ dctx->entropy.workspace, sizeof(dctx->entropy.workspace), dctx->bmi2) :
599
+ HUF_decompress4X_hufOnly_wksp_bmi2(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize,
600
+ dctx->entropy.workspace, sizeof(dctx->entropy.workspace), dctx->bmi2))))
576
601
  return ERROR(corruption_detected);
577
602
 
578
603
  dctx->litPtr = dctx->litBuffer;
@@ -647,115 +672,268 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
647
672
  }
648
673
  }
649
674
 
650
-
651
- typedef union {
652
- FSE_decode_t realData;
653
- U32 alignedBy4;
654
- } FSE_decode_t4;
675
+ /* Default FSE distribution tables.
676
+ * These are pre-calculated FSE decoding tables using default distributions as defined in specification :
677
+ * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#default-distributions
678
+ * They were generated programmatically with following method :
679
+ * - start from default distributions, present in /lib/common/zstd_internal.h
680
+ * - generate tables normally, using ZSTD_buildFSETable()
681
+ * - printout the content of tables
682
+ * - pretify output, report below, test with fuzzer to ensure it's correct */
655
683
 
656
684
  /* Default FSE distribution table for Literal Lengths */
657
- static const FSE_decode_t4 LL_defaultDTable[(1<<LL_DEFAULTNORMLOG)+1] = {
658
- { { LL_DEFAULTNORMLOG, 1, 1 } }, /* header : tableLog, fastMode, fastMode */
659
- /* base, symbol, bits */
660
- { { 0, 0, 4 } }, { { 16, 0, 4 } }, { { 32, 1, 5 } }, { { 0, 3, 5 } },
661
- { { 0, 4, 5 } }, { { 0, 6, 5 } }, { { 0, 7, 5 } }, { { 0, 9, 5 } },
662
- { { 0, 10, 5 } }, { { 0, 12, 5 } }, { { 0, 14, 6 } }, { { 0, 16, 5 } },
663
- { { 0, 18, 5 } }, { { 0, 19, 5 } }, { { 0, 21, 5 } }, { { 0, 22, 5 } },
664
- { { 0, 24, 5 } }, { { 32, 25, 5 } }, { { 0, 26, 5 } }, { { 0, 27, 6 } },
665
- { { 0, 29, 6 } }, { { 0, 31, 6 } }, { { 32, 0, 4 } }, { { 0, 1, 4 } },
666
- { { 0, 2, 5 } }, { { 32, 4, 5 } }, { { 0, 5, 5 } }, { { 32, 7, 5 } },
667
- { { 0, 8, 5 } }, { { 32, 10, 5 } }, { { 0, 11, 5 } }, { { 0, 13, 6 } },
668
- { { 32, 16, 5 } }, { { 0, 17, 5 } }, { { 32, 19, 5 } }, { { 0, 20, 5 } },
669
- { { 32, 22, 5 } }, { { 0, 23, 5 } }, { { 0, 25, 4 } }, { { 16, 25, 4 } },
670
- { { 32, 26, 5 } }, { { 0, 28, 6 } }, { { 0, 30, 6 } }, { { 48, 0, 4 } },
671
- { { 16, 1, 4 } }, { { 32, 2, 5 } }, { { 32, 3, 5 } }, { { 32, 5, 5 } },
672
- { { 32, 6, 5 } }, { { 32, 8, 5 } }, { { 32, 9, 5 } }, { { 32, 11, 5 } },
673
- { { 32, 12, 5 } }, { { 0, 15, 6 } }, { { 32, 17, 5 } }, { { 32, 18, 5 } },
674
- { { 32, 20, 5 } }, { { 32, 21, 5 } }, { { 32, 23, 5 } }, { { 32, 24, 5 } },
675
- { { 0, 35, 6 } }, { { 0, 34, 6 } }, { { 0, 33, 6 } }, { { 0, 32, 6 } },
685
+ static const ZSTD_seqSymbol LL_defaultDTable[(1<<LL_DEFAULTNORMLOG)+1] = {
686
+ { 1, 1, 1, LL_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
687
+ /* nextState, nbAddBits, nbBits, baseVal */
688
+ { 0, 0, 4, 0}, { 16, 0, 4, 0},
689
+ { 32, 0, 5, 1}, { 0, 0, 5, 3},
690
+ { 0, 0, 5, 4}, { 0, 0, 5, 6},
691
+ { 0, 0, 5, 7}, { 0, 0, 5, 9},
692
+ { 0, 0, 5, 10}, { 0, 0, 5, 12},
693
+ { 0, 0, 6, 14}, { 0, 1, 5, 16},
694
+ { 0, 1, 5, 20}, { 0, 1, 5, 22},
695
+ { 0, 2, 5, 28}, { 0, 3, 5, 32},
696
+ { 0, 4, 5, 48}, { 32, 6, 5, 64},
697
+ { 0, 7, 5, 128}, { 0, 8, 6, 256},
698
+ { 0, 10, 6, 1024}, { 0, 12, 6, 4096},
699
+ { 32, 0, 4, 0}, { 0, 0, 4, 1},
700
+ { 0, 0, 5, 2}, { 32, 0, 5, 4},
701
+ { 0, 0, 5, 5}, { 32, 0, 5, 7},
702
+ { 0, 0, 5, 8}, { 32, 0, 5, 10},
703
+ { 0, 0, 5, 11}, { 0, 0, 6, 13},
704
+ { 32, 1, 5, 16}, { 0, 1, 5, 18},
705
+ { 32, 1, 5, 22}, { 0, 2, 5, 24},
706
+ { 32, 3, 5, 32}, { 0, 3, 5, 40},
707
+ { 0, 6, 4, 64}, { 16, 6, 4, 64},
708
+ { 32, 7, 5, 128}, { 0, 9, 6, 512},
709
+ { 0, 11, 6, 2048}, { 48, 0, 4, 0},
710
+ { 16, 0, 4, 1}, { 32, 0, 5, 2},
711
+ { 32, 0, 5, 3}, { 32, 0, 5, 5},
712
+ { 32, 0, 5, 6}, { 32, 0, 5, 8},
713
+ { 32, 0, 5, 9}, { 32, 0, 5, 11},
714
+ { 32, 0, 5, 12}, { 0, 0, 6, 15},
715
+ { 32, 1, 5, 18}, { 32, 1, 5, 20},
716
+ { 32, 2, 5, 24}, { 32, 2, 5, 28},
717
+ { 32, 3, 5, 40}, { 32, 4, 5, 48},
718
+ { 0, 16, 6,65536}, { 0, 15, 6,32768},
719
+ { 0, 14, 6,16384}, { 0, 13, 6, 8192},
676
720
  }; /* LL_defaultDTable */
677
721
 
722
+ /* Default FSE distribution table for Offset Codes */
723
+ static const ZSTD_seqSymbol OF_defaultDTable[(1<<OF_DEFAULTNORMLOG)+1] = {
724
+ { 1, 1, 1, OF_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
725
+ /* nextState, nbAddBits, nbBits, baseVal */
726
+ { 0, 0, 5, 0}, { 0, 6, 4, 61},
727
+ { 0, 9, 5, 509}, { 0, 15, 5,32765},
728
+ { 0, 21, 5,2097149}, { 0, 3, 5, 5},
729
+ { 0, 7, 4, 125}, { 0, 12, 5, 4093},
730
+ { 0, 18, 5,262141}, { 0, 23, 5,8388605},
731
+ { 0, 5, 5, 29}, { 0, 8, 4, 253},
732
+ { 0, 14, 5,16381}, { 0, 20, 5,1048573},
733
+ { 0, 2, 5, 1}, { 16, 7, 4, 125},
734
+ { 0, 11, 5, 2045}, { 0, 17, 5,131069},
735
+ { 0, 22, 5,4194301}, { 0, 4, 5, 13},
736
+ { 16, 8, 4, 253}, { 0, 13, 5, 8189},
737
+ { 0, 19, 5,524285}, { 0, 1, 5, 1},
738
+ { 16, 6, 4, 61}, { 0, 10, 5, 1021},
739
+ { 0, 16, 5,65533}, { 0, 28, 5,268435453},
740
+ { 0, 27, 5,134217725}, { 0, 26, 5,67108861},
741
+ { 0, 25, 5,33554429}, { 0, 24, 5,16777213},
742
+ }; /* OF_defaultDTable */
743
+
744
+
678
745
  /* Default FSE distribution table for Match Lengths */
679
- static const FSE_decode_t4 ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
680
- { { ML_DEFAULTNORMLOG, 1, 1 } }, /* header : tableLog, fastMode, fastMode */
681
- /* base, symbol, bits */
682
- { { 0, 0, 6 } }, { { 0, 1, 4 } }, { { 32, 2, 5 } }, { { 0, 3, 5 } },
683
- { { 0, 5, 5 } }, { { 0, 6, 5 } }, { { 0, 8, 5 } }, { { 0, 10, 6 } },
684
- { { 0, 13, 6 } }, { { 0, 16, 6 } }, { { 0, 19, 6 } }, { { 0, 22, 6 } },
685
- { { 0, 25, 6 } }, { { 0, 28, 6 } }, { { 0, 31, 6 } }, { { 0, 33, 6 } },
686
- { { 0, 35, 6 } }, { { 0, 37, 6 } }, { { 0, 39, 6 } }, { { 0, 41, 6 } },
687
- { { 0, 43, 6 } }, { { 0, 45, 6 } }, { { 16, 1, 4 } }, { { 0, 2, 4 } },
688
- { { 32, 3, 5 } }, { { 0, 4, 5 } }, { { 32, 6, 5 } }, { { 0, 7, 5 } },
689
- { { 0, 9, 6 } }, { { 0, 12, 6 } }, { { 0, 15, 6 } }, { { 0, 18, 6 } },
690
- { { 0, 21, 6 } }, { { 0, 24, 6 } }, { { 0, 27, 6 } }, { { 0, 30, 6 } },
691
- { { 0, 32, 6 } }, { { 0, 34, 6 } }, { { 0, 36, 6 } }, { { 0, 38, 6 } },
692
- { { 0, 40, 6 } }, { { 0, 42, 6 } }, { { 0, 44, 6 } }, { { 32, 1, 4 } },
693
- { { 48, 1, 4 } }, { { 16, 2, 4 } }, { { 32, 4, 5 } }, { { 32, 5, 5 } },
694
- { { 32, 7, 5 } }, { { 32, 8, 5 } }, { { 0, 11, 6 } }, { { 0, 14, 6 } },
695
- { { 0, 17, 6 } }, { { 0, 20, 6 } }, { { 0, 23, 6 } }, { { 0, 26, 6 } },
696
- { { 0, 29, 6 } }, { { 0, 52, 6 } }, { { 0, 51, 6 } }, { { 0, 50, 6 } },
697
- { { 0, 49, 6 } }, { { 0, 48, 6 } }, { { 0, 47, 6 } }, { { 0, 46, 6 } },
746
+ static const ZSTD_seqSymbol ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
747
+ { 1, 1, 1, ML_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
748
+ /* nextState, nbAddBits, nbBits, baseVal */
749
+ { 0, 0, 6, 3}, { 0, 0, 4, 4},
750
+ { 32, 0, 5, 5}, { 0, 0, 5, 6},
751
+ { 0, 0, 5, 8}, { 0, 0, 5, 9},
752
+ { 0, 0, 5, 11}, { 0, 0, 6, 13},
753
+ { 0, 0, 6, 16}, { 0, 0, 6, 19},
754
+ { 0, 0, 6, 22}, { 0, 0, 6, 25},
755
+ { 0, 0, 6, 28}, { 0, 0, 6, 31},
756
+ { 0, 0, 6, 34}, { 0, 1, 6, 37},
757
+ { 0, 1, 6, 41}, { 0, 2, 6, 47},
758
+ { 0, 3, 6, 59}, { 0, 4, 6, 83},
759
+ { 0, 7, 6, 131}, { 0, 9, 6, 515},
760
+ { 16, 0, 4, 4}, { 0, 0, 4, 5},
761
+ { 32, 0, 5, 6}, { 0, 0, 5, 7},
762
+ { 32, 0, 5, 9}, { 0, 0, 5, 10},
763
+ { 0, 0, 6, 12}, { 0, 0, 6, 15},
764
+ { 0, 0, 6, 18}, { 0, 0, 6, 21},
765
+ { 0, 0, 6, 24}, { 0, 0, 6, 27},
766
+ { 0, 0, 6, 30}, { 0, 0, 6, 33},
767
+ { 0, 1, 6, 35}, { 0, 1, 6, 39},
768
+ { 0, 2, 6, 43}, { 0, 3, 6, 51},
769
+ { 0, 4, 6, 67}, { 0, 5, 6, 99},
770
+ { 0, 8, 6, 259}, { 32, 0, 4, 4},
771
+ { 48, 0, 4, 4}, { 16, 0, 4, 5},
772
+ { 32, 0, 5, 7}, { 32, 0, 5, 8},
773
+ { 32, 0, 5, 10}, { 32, 0, 5, 11},
774
+ { 0, 0, 6, 14}, { 0, 0, 6, 17},
775
+ { 0, 0, 6, 20}, { 0, 0, 6, 23},
776
+ { 0, 0, 6, 26}, { 0, 0, 6, 29},
777
+ { 0, 0, 6, 32}, { 0, 16, 6,65539},
778
+ { 0, 15, 6,32771}, { 0, 14, 6,16387},
779
+ { 0, 13, 6, 8195}, { 0, 12, 6, 4099},
780
+ { 0, 11, 6, 2051}, { 0, 10, 6, 1027},
698
781
  }; /* ML_defaultDTable */
699
782
 
700
- /* Default FSE distribution table for Offset Codes */
701
- static const FSE_decode_t4 OF_defaultDTable[(1<<OF_DEFAULTNORMLOG)+1] = {
702
- { { OF_DEFAULTNORMLOG, 1, 1 } }, /* header : tableLog, fastMode, fastMode */
703
- /* base, symbol, bits */
704
- { { 0, 0, 5 } }, { { 0, 6, 4 } },
705
- { { 0, 9, 5 } }, { { 0, 15, 5 } },
706
- { { 0, 21, 5 } }, { { 0, 3, 5 } },
707
- { { 0, 7, 4 } }, { { 0, 12, 5 } },
708
- { { 0, 18, 5 } }, { { 0, 23, 5 } },
709
- { { 0, 5, 5 } }, { { 0, 8, 4 } },
710
- { { 0, 14, 5 } }, { { 0, 20, 5 } },
711
- { { 0, 2, 5 } }, { { 16, 7, 4 } },
712
- { { 0, 11, 5 } }, { { 0, 17, 5 } },
713
- { { 0, 22, 5 } }, { { 0, 4, 5 } },
714
- { { 16, 8, 4 } }, { { 0, 13, 5 } },
715
- { { 0, 19, 5 } }, { { 0, 1, 5 } },
716
- { { 16, 6, 4 } }, { { 0, 10, 5 } },
717
- { { 0, 16, 5 } }, { { 0, 28, 5 } },
718
- { { 0, 27, 5 } }, { { 0, 26, 5 } },
719
- { { 0, 25, 5 } }, { { 0, 24, 5 } },
720
- }; /* OF_defaultDTable */
783
+
784
+ static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddBits)
785
+ {
786
+ void* ptr = dt;
787
+ ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;
788
+ ZSTD_seqSymbol* const cell = dt + 1;
789
+
790
+ DTableH->tableLog = 0;
791
+ DTableH->fastMode = 0;
792
+
793
+ cell->nbBits = 0;
794
+ cell->nextState = 0;
795
+ assert(nbAddBits < 255);
796
+ cell->nbAdditionalBits = (BYTE)nbAddBits;
797
+ cell->baseValue = baseValue;
798
+ }
799
+
800
+
801
+ /* ZSTD_buildFSETable() :
802
+ * generate FSE decoding table for one symbol (ll, ml or off) */
803
+ static void
804
+ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
805
+ const short* normalizedCounter, unsigned maxSymbolValue,
806
+ const U32* baseValue, const U32* nbAdditionalBits,
807
+ unsigned tableLog)
808
+ {
809
+ ZSTD_seqSymbol* const tableDecode = dt+1;
810
+ U16 symbolNext[MaxSeq+1];
811
+
812
+ U32 const maxSV1 = maxSymbolValue + 1;
813
+ U32 const tableSize = 1 << tableLog;
814
+ U32 highThreshold = tableSize-1;
815
+
816
+ /* Sanity Checks */
817
+ assert(maxSymbolValue <= MaxSeq);
818
+ assert(tableLog <= MaxFSELog);
819
+
820
+ /* Init, lay down lowprob symbols */
821
+ { ZSTD_seqSymbol_header DTableH;
822
+ DTableH.tableLog = tableLog;
823
+ DTableH.fastMode = 1;
824
+ { S16 const largeLimit= (S16)(1 << (tableLog-1));
825
+ U32 s;
826
+ for (s=0; s<maxSV1; s++) {
827
+ if (normalizedCounter[s]==-1) {
828
+ tableDecode[highThreshold--].baseValue = s;
829
+ symbolNext[s] = 1;
830
+ } else {
831
+ if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
832
+ symbolNext[s] = normalizedCounter[s];
833
+ } } }
834
+ memcpy(dt, &DTableH, sizeof(DTableH));
835
+ }
836
+
837
+ /* Spread symbols */
838
+ { U32 const tableMask = tableSize-1;
839
+ U32 const step = FSE_TABLESTEP(tableSize);
840
+ U32 s, position = 0;
841
+ for (s=0; s<maxSV1; s++) {
842
+ int i;
843
+ for (i=0; i<normalizedCounter[s]; i++) {
844
+ tableDecode[position].baseValue = s;
845
+ position = (position + step) & tableMask;
846
+ while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
847
+ } }
848
+ assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
849
+ }
850
+
851
+ /* Build Decoding table */
852
+ { U32 u;
853
+ for (u=0; u<tableSize; u++) {
854
+ U32 const symbol = tableDecode[u].baseValue;
855
+ U32 const nextState = symbolNext[symbol]++;
856
+ tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
857
+ tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
858
+ assert(nbAdditionalBits[symbol] < 255);
859
+ tableDecode[u].nbAdditionalBits = (BYTE)nbAdditionalBits[symbol];
860
+ tableDecode[u].baseValue = baseValue[symbol];
861
+ } }
862
+ }
863
+
721
864
 
722
865
  /*! ZSTD_buildSeqTable() :
723
866
  * @return : nb bytes read from src,
724
- * or an error code if it fails, testable with ZSTD_isError()
725
- */
726
- static size_t ZSTD_buildSeqTable(FSE_DTable* DTableSpace, const FSE_DTable** DTablePtr,
867
+ * or an error code if it fails */
868
+ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
727
869
  symbolEncodingType_e type, U32 max, U32 maxLog,
728
870
  const void* src, size_t srcSize,
729
- const FSE_decode_t4* defaultTable, U32 flagRepeatTable)
871
+ const U32* baseValue, const U32* nbAdditionalBits,
872
+ const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable)
730
873
  {
731
- const void* const tmpPtr = defaultTable; /* bypass strict aliasing */
732
874
  switch(type)
733
875
  {
734
876
  case set_rle :
735
877
  if (!srcSize) return ERROR(srcSize_wrong);
736
878
  if ( (*(const BYTE*)src) > max) return ERROR(corruption_detected);
737
- FSE_buildDTable_rle(DTableSpace, *(const BYTE*)src);
879
+ { U32 const symbol = *(const BYTE*)src;
880
+ U32 const baseline = baseValue[symbol];
881
+ U32 const nbBits = nbAdditionalBits[symbol];
882
+ ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits);
883
+ }
738
884
  *DTablePtr = DTableSpace;
739
885
  return 1;
740
886
  case set_basic :
741
- *DTablePtr = (const FSE_DTable*)tmpPtr;
887
+ *DTablePtr = defaultTable;
742
888
  return 0;
743
889
  case set_repeat:
744
890
  if (!flagRepeatTable) return ERROR(corruption_detected);
745
891
  return 0;
746
- default : /* impossible */
747
892
  case set_compressed :
748
893
  { U32 tableLog;
749
894
  S16 norm[MaxSeq+1];
750
895
  size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
751
896
  if (FSE_isError(headerSize)) return ERROR(corruption_detected);
752
897
  if (tableLog > maxLog) return ERROR(corruption_detected);
753
- FSE_buildDTable(DTableSpace, norm, max, tableLog);
898
+ ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog);
754
899
  *DTablePtr = DTableSpace;
755
900
  return headerSize;
756
- } }
901
+ }
902
+ default : /* impossible */
903
+ assert(0);
904
+ return ERROR(GENERIC);
905
+ }
757
906
  }
758
907
 
908
+ static const U32 LL_base[MaxLL+1] = {
909
+ 0, 1, 2, 3, 4, 5, 6, 7,
910
+ 8, 9, 10, 11, 12, 13, 14, 15,
911
+ 16, 18, 20, 22, 24, 28, 32, 40,
912
+ 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
913
+ 0x2000, 0x4000, 0x8000, 0x10000 };
914
+
915
+ static const U32 OF_base[MaxOff+1] = {
916
+ 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D,
917
+ 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD,
918
+ 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
919
+ 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
920
+
921
+ static const U32 OF_bits[MaxOff+1] = {
922
+ 0, 1, 2, 3, 4, 5, 6, 7,
923
+ 8, 9, 10, 11, 12, 13, 14, 15,
924
+ 16, 17, 18, 19, 20, 21, 22, 23,
925
+ 24, 25, 26, 27, 28, 29, 30, 31 };
926
+
927
+ static const U32 ML_base[MaxML+1] = {
928
+ 3, 4, 5, 6, 7, 8, 9, 10,
929
+ 11, 12, 13, 14, 15, 16, 17, 18,
930
+ 19, 20, 21, 22, 23, 24, 25, 26,
931
+ 27, 28, 29, 30, 31, 32, 33, 34,
932
+ 35, 37, 39, 41, 43, 47, 51, 59,
933
+ 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
934
+ 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
935
+
936
+
759
937
  size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
760
938
  const void* src, size_t srcSize)
761
939
  {
@@ -792,19 +970,27 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
792
970
  /* Build DTables */
793
971
  { size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr,
794
972
  LLtype, MaxLL, LLFSELog,
795
- ip, iend-ip, LL_defaultDTable, dctx->fseEntropy);
973
+ ip, iend-ip,
974
+ LL_base, LL_bits,
975
+ LL_defaultDTable, dctx->fseEntropy);
796
976
  if (ZSTD_isError(llhSize)) return ERROR(corruption_detected);
797
977
  ip += llhSize;
798
978
  }
979
+
799
980
  { size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr,
800
981
  OFtype, MaxOff, OffFSELog,
801
- ip, iend-ip, OF_defaultDTable, dctx->fseEntropy);
982
+ ip, iend-ip,
983
+ OF_base, OF_bits,
984
+ OF_defaultDTable, dctx->fseEntropy);
802
985
  if (ZSTD_isError(ofhSize)) return ERROR(corruption_detected);
803
986
  ip += ofhSize;
804
987
  }
988
+
805
989
  { size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr,
806
990
  MLtype, MaxML, MLFSELog,
807
- ip, iend-ip, ML_defaultDTable, dctx->fseEntropy);
991
+ ip, iend-ip,
992
+ ML_base, ML_bits,
993
+ ML_defaultDTable, dctx->fseEntropy);
808
994
  if (ZSTD_isError(mlhSize)) return ERROR(corruption_detected);
809
995
  ip += mlhSize;
810
996
  }
@@ -821,11 +1007,16 @@ typedef struct {
821
1007
  const BYTE* match;
822
1008
  } seq_t;
823
1009
 
1010
+ typedef struct {
1011
+ size_t state;
1012
+ const ZSTD_seqSymbol* table;
1013
+ } ZSTD_fseState;
1014
+
824
1015
  typedef struct {
825
1016
  BIT_DStream_t DStream;
826
- FSE_DState_t stateLL;
827
- FSE_DState_t stateOffb;
828
- FSE_DState_t stateML;
1017
+ ZSTD_fseState stateLL;
1018
+ ZSTD_fseState stateOffb;
1019
+ ZSTD_fseState stateML;
829
1020
  size_t prevOffset[ZSTD_REP_NUM];
830
1021
  const BYTE* prefixStart;
831
1022
  const BYTE* dictEnd;
@@ -880,118 +1071,6 @@ size_t ZSTD_execSequenceLast7(BYTE* op,
880
1071
  }
881
1072
 
882
1073
 
883
- typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
884
-
885
- /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
886
- * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
887
- * bits before reloading. This value is the maximum number of bytes we read
888
- * after reloading when we are decoding long offets.
889
- */
890
- #define LONG_OFFSETS_MAX_EXTRA_BITS_32 \
891
- (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \
892
- ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \
893
- : 0)
894
-
895
- static seq_t ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
896
- {
897
- seq_t seq;
898
-
899
- U32 const llCode = FSE_peekSymbol(&seqState->stateLL);
900
- U32 const mlCode = FSE_peekSymbol(&seqState->stateML);
901
- U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= MaxOff, by table construction */
902
-
903
- U32 const llBits = LL_bits[llCode];
904
- U32 const mlBits = ML_bits[mlCode];
905
- U32 const ofBits = ofCode;
906
- U32 const totalBits = llBits+mlBits+ofBits;
907
-
908
- static const U32 LL_base[MaxLL+1] = {
909
- 0, 1, 2, 3, 4, 5, 6, 7,
910
- 8, 9, 10, 11, 12, 13, 14, 15,
911
- 16, 18, 20, 22, 24, 28, 32, 40,
912
- 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
913
- 0x2000, 0x4000, 0x8000, 0x10000 };
914
-
915
- static const U32 ML_base[MaxML+1] = {
916
- 3, 4, 5, 6, 7, 8, 9, 10,
917
- 11, 12, 13, 14, 15, 16, 17, 18,
918
- 19, 20, 21, 22, 23, 24, 25, 26,
919
- 27, 28, 29, 30, 31, 32, 33, 34,
920
- 35, 37, 39, 41, 43, 47, 51, 59,
921
- 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
922
- 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
923
-
924
- static const U32 OF_base[MaxOff+1] = {
925
- 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D,
926
- 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD,
927
- 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
928
- 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
929
-
930
- /* sequence */
931
- { size_t offset;
932
- if (!ofCode)
933
- offset = 0;
934
- else {
935
- ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
936
- ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
937
- assert(ofBits <= MaxOff);
938
- if (MEM_32bits() && longOffsets) {
939
- U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1);
940
- offset = OF_base[ofCode] + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
941
- if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream);
942
- if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
943
- } else {
944
- offset = OF_base[ofCode] + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
945
- if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
946
- }
947
- }
948
-
949
- if (ofCode <= 1) {
950
- offset += (llCode==0);
951
- if (offset) {
952
- size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
953
- temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
954
- if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
955
- seqState->prevOffset[1] = seqState->prevOffset[0];
956
- seqState->prevOffset[0] = offset = temp;
957
- } else {
958
- offset = seqState->prevOffset[0];
959
- }
960
- } else {
961
- seqState->prevOffset[2] = seqState->prevOffset[1];
962
- seqState->prevOffset[1] = seqState->prevOffset[0];
963
- seqState->prevOffset[0] = offset;
964
- }
965
- seq.offset = offset;
966
- }
967
-
968
- seq.matchLength = ML_base[mlCode]
969
- + ((mlCode>31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */
970
- if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
971
- BIT_reloadDStream(&seqState->DStream);
972
- if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
973
- BIT_reloadDStream(&seqState->DStream);
974
- /* Verify that there is enough bits to read the rest of the data in 64-bit mode. */
975
- ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
976
-
977
- seq.litLength = LL_base[llCode]
978
- + ((llCode>15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */
979
- if (MEM_32bits())
980
- BIT_reloadDStream(&seqState->DStream);
981
-
982
- DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
983
- (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
984
-
985
- /* ANS state update */
986
- FSE_updateState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
987
- FSE_updateState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
988
- if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
989
- FSE_updateState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
990
-
991
- return seq;
992
- }
993
-
994
-
995
1074
  HINT_INLINE
996
1075
  size_t ZSTD_execSequence(BYTE* op,
997
1076
  BYTE* const oend, seq_t sequence,
@@ -1073,10 +1152,199 @@ size_t ZSTD_execSequence(BYTE* op,
1073
1152
  }
1074
1153
 
1075
1154
 
1076
- static size_t ZSTD_decompressSequences(
1077
- ZSTD_DCtx* dctx,
1155
+ HINT_INLINE
1156
+ size_t ZSTD_execSequenceLong(BYTE* op,
1157
+ BYTE* const oend, seq_t sequence,
1158
+ const BYTE** litPtr, const BYTE* const litLimit,
1159
+ const BYTE* const prefixStart, const BYTE* const dictStart, const BYTE* const dictEnd)
1160
+ {
1161
+ BYTE* const oLitEnd = op + sequence.litLength;
1162
+ size_t const sequenceLength = sequence.litLength + sequence.matchLength;
1163
+ BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
1164
+ BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
1165
+ const BYTE* const iLitEnd = *litPtr + sequence.litLength;
1166
+ const BYTE* match = sequence.match;
1167
+
1168
+ /* check */
1169
+ if (oMatchEnd > oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
1170
+ if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */
1171
+ if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd);
1172
+
1173
+ /* copy Literals */
1174
+ ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */
1175
+ if (sequence.litLength > 8)
1176
+ ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
1177
+ op = oLitEnd;
1178
+ *litPtr = iLitEnd; /* update for next sequence */
1179
+
1180
+ /* copy Match */
1181
+ if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
1182
+ /* offset beyond prefix */
1183
+ if (sequence.offset > (size_t)(oLitEnd - dictStart)) return ERROR(corruption_detected);
1184
+ if (match + sequence.matchLength <= dictEnd) {
1185
+ memmove(oLitEnd, match, sequence.matchLength);
1186
+ return sequenceLength;
1187
+ }
1188
+ /* span extDict & currentPrefixSegment */
1189
+ { size_t const length1 = dictEnd - match;
1190
+ memmove(oLitEnd, match, length1);
1191
+ op = oLitEnd + length1;
1192
+ sequence.matchLength -= length1;
1193
+ match = prefixStart;
1194
+ if (op > oend_w || sequence.matchLength < MINMATCH) {
1195
+ U32 i;
1196
+ for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
1197
+ return sequenceLength;
1198
+ }
1199
+ } }
1200
+ assert(op <= oend_w);
1201
+ assert(sequence.matchLength >= MINMATCH);
1202
+
1203
+ /* match within prefix */
1204
+ if (sequence.offset < 8) {
1205
+ /* close range match, overlap */
1206
+ static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
1207
+ static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
1208
+ int const sub2 = dec64table[sequence.offset];
1209
+ op[0] = match[0];
1210
+ op[1] = match[1];
1211
+ op[2] = match[2];
1212
+ op[3] = match[3];
1213
+ match += dec32table[sequence.offset];
1214
+ ZSTD_copy4(op+4, match);
1215
+ match -= sub2;
1216
+ } else {
1217
+ ZSTD_copy8(op, match);
1218
+ }
1219
+ op += 8; match += 8;
1220
+
1221
+ if (oMatchEnd > oend-(16-MINMATCH)) {
1222
+ if (op < oend_w) {
1223
+ ZSTD_wildcopy(op, match, oend_w - op);
1224
+ match += oend_w - op;
1225
+ op = oend_w;
1226
+ }
1227
+ while (op < oMatchEnd) *op++ = *match++;
1228
+ } else {
1229
+ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
1230
+ }
1231
+ return sequenceLength;
1232
+ }
1233
+
1234
+ static void
1235
+ ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
1236
+ {
1237
+ const void* ptr = dt;
1238
+ const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr;
1239
+ DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
1240
+ DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits",
1241
+ (U32)DStatePtr->state, DTableH->tableLog);
1242
+ BIT_reloadDStream(bitD);
1243
+ DStatePtr->table = dt + 1;
1244
+ }
1245
+
1246
+ FORCE_INLINE_TEMPLATE void
1247
+ ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
1248
+ {
1249
+ ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state];
1250
+ U32 const nbBits = DInfo.nbBits;
1251
+ size_t const lowBits = BIT_readBits(bitD, nbBits);
1252
+ DStatePtr->state = DInfo.nextState + lowBits;
1253
+ }
1254
+
1255
+ /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
1256
+ * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
1257
+ * bits before reloading. This value is the maximum number of bytes we read
1258
+ * after reloading when we are decoding long offets.
1259
+ */
1260
+ #define LONG_OFFSETS_MAX_EXTRA_BITS_32 \
1261
+ (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \
1262
+ ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \
1263
+ : 0)
1264
+
1265
+ typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
1266
+
1267
+ FORCE_INLINE_TEMPLATE seq_t
1268
+ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
1269
+ {
1270
+ seq_t seq;
1271
+ U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits;
1272
+ U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits;
1273
+ U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits;
1274
+ U32 const totalBits = llBits+mlBits+ofBits;
1275
+ U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue;
1276
+ U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue;
1277
+ U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue;
1278
+
1279
+ /* sequence */
1280
+ { size_t offset;
1281
+ if (!ofBits)
1282
+ offset = 0;
1283
+ else {
1284
+ ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
1285
+ ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
1286
+ assert(ofBits <= MaxOff);
1287
+ if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
1288
+ U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed);
1289
+ offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
1290
+ BIT_reloadDStream(&seqState->DStream);
1291
+ if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
1292
+ assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */
1293
+ } else {
1294
+ offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
1295
+ if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
1296
+ }
1297
+ }
1298
+
1299
+ if (ofBits <= 1) {
1300
+ offset += (llBase==0);
1301
+ if (offset) {
1302
+ size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
1303
+ temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
1304
+ if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
1305
+ seqState->prevOffset[1] = seqState->prevOffset[0];
1306
+ seqState->prevOffset[0] = offset = temp;
1307
+ } else { /* offset == 0 */
1308
+ offset = seqState->prevOffset[0];
1309
+ }
1310
+ } else {
1311
+ seqState->prevOffset[2] = seqState->prevOffset[1];
1312
+ seqState->prevOffset[1] = seqState->prevOffset[0];
1313
+ seqState->prevOffset[0] = offset;
1314
+ }
1315
+ seq.offset = offset;
1316
+ }
1317
+
1318
+ seq.matchLength = mlBase
1319
+ + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/) : 0); /* <= 16 bits */
1320
+ if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
1321
+ BIT_reloadDStream(&seqState->DStream);
1322
+ if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
1323
+ BIT_reloadDStream(&seqState->DStream);
1324
+ /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
1325
+ ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
1326
+
1327
+ seq.litLength = llBase
1328
+ + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits/*>0*/) : 0); /* <= 16 bits */
1329
+ if (MEM_32bits())
1330
+ BIT_reloadDStream(&seqState->DStream);
1331
+
1332
+ DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
1333
+ (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
1334
+
1335
+ /* ANS state update */
1336
+ ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
1337
+ ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
1338
+ if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
1339
+ ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
1340
+
1341
+ return seq;
1342
+ }
1343
+
1344
+ FORCE_INLINE_TEMPLATE size_t
1345
+ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
1078
1346
  void* dst, size_t maxDstSize,
1079
- const void* seqStart, size_t seqSize,
1347
+ const void* seqStart, size_t seqSize, int nbSeq,
1080
1348
  const ZSTD_longOffset_e isLongOffset)
1081
1349
  {
1082
1350
  const BYTE* ip = (const BYTE*)seqStart;
@@ -1089,26 +1357,17 @@ static size_t ZSTD_decompressSequences(
1089
1357
  const BYTE* const base = (const BYTE*) (dctx->base);
1090
1358
  const BYTE* const vBase = (const BYTE*) (dctx->vBase);
1091
1359
  const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
1092
- int nbSeq;
1093
1360
  DEBUGLOG(5, "ZSTD_decompressSequences");
1094
1361
 
1095
- /* Build Decoding Tables */
1096
- { size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, seqSize);
1097
- DEBUGLOG(5, "ZSTD_decodeSeqHeaders: size=%u, nbSeq=%i",
1098
- (U32)seqHSize, nbSeq);
1099
- if (ZSTD_isError(seqHSize)) return seqHSize;
1100
- ip += seqHSize;
1101
- }
1102
-
1103
1362
  /* Regen sequences */
1104
1363
  if (nbSeq) {
1105
1364
  seqState_t seqState;
1106
1365
  dctx->fseEntropy = 1;
1107
1366
  { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
1108
1367
  CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected);
1109
- FSE_initDState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1110
- FSE_initDState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1111
- FSE_initDState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
1368
+ ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1369
+ ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1370
+ ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
1112
1371
 
1113
1372
  for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) {
1114
1373
  nbSeq--;
@@ -1120,7 +1379,7 @@ static size_t ZSTD_decompressSequences(
1120
1379
  } }
1121
1380
 
1122
1381
  /* check if reached exact end */
1123
- DEBUGLOG(5, "after decode loop, remaining nbSeq : %i", nbSeq);
1382
+ DEBUGLOG(5, "ZSTD_decompressSequences: after decode loop, remaining nbSeq : %i", nbSeq);
1124
1383
  if (nbSeq) return ERROR(corruption_detected);
1125
1384
  /* save reps for next block */
1126
1385
  { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
@@ -1136,46 +1395,32 @@ static size_t ZSTD_decompressSequences(
1136
1395
  return op-ostart;
1137
1396
  }
1138
1397
 
1139
-
1140
- HINT_INLINE
1141
- seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const longOffsets)
1398
+ static size_t
1399
+ ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
1400
+ void* dst, size_t maxDstSize,
1401
+ const void* seqStart, size_t seqSize, int nbSeq,
1402
+ const ZSTD_longOffset_e isLongOffset)
1142
1403
  {
1143
- seq_t seq;
1404
+ return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1405
+ }
1144
1406
 
1145
- U32 const llCode = FSE_peekSymbol(&seqState->stateLL);
1146
- U32 const mlCode = FSE_peekSymbol(&seqState->stateML);
1147
- U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= MaxOff, by table construction */
1148
1407
 
1149
- U32 const llBits = LL_bits[llCode];
1150
- U32 const mlBits = ML_bits[mlCode];
1151
- U32 const ofBits = ofCode;
1152
- U32 const totalBits = llBits+mlBits+ofBits;
1153
1408
 
1154
- static const U32 LL_base[MaxLL+1] = {
1155
- 0, 1, 2, 3, 4, 5, 6, 7,
1156
- 8, 9, 10, 11, 12, 13, 14, 15,
1157
- 16, 18, 20, 22, 24, 28, 32, 40,
1158
- 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
1159
- 0x2000, 0x4000, 0x8000, 0x10000 };
1160
-
1161
- static const U32 ML_base[MaxML+1] = {
1162
- 3, 4, 5, 6, 7, 8, 9, 10,
1163
- 11, 12, 13, 14, 15, 16, 17, 18,
1164
- 19, 20, 21, 22, 23, 24, 25, 26,
1165
- 27, 28, 29, 30, 31, 32, 33, 34,
1166
- 35, 37, 39, 41, 43, 47, 51, 59,
1167
- 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
1168
- 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
1169
-
1170
- static const U32 OF_base[MaxOff+1] = {
1171
- 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D,
1172
- 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD,
1173
- 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
1174
- 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
1409
+ FORCE_INLINE_TEMPLATE seq_t
1410
+ ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const longOffsets)
1411
+ {
1412
+ seq_t seq;
1413
+ U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits;
1414
+ U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits;
1415
+ U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits;
1416
+ U32 const totalBits = llBits+mlBits+ofBits;
1417
+ U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue;
1418
+ U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue;
1419
+ U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue;
1175
1420
 
1176
1421
  /* sequence */
1177
1422
  { size_t offset;
1178
- if (!ofCode)
1423
+ if (!ofBits)
1179
1424
  offset = 0;
1180
1425
  else {
1181
1426
  ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
@@ -1183,17 +1428,17 @@ seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const long
1183
1428
  assert(ofBits <= MaxOff);
1184
1429
  if (MEM_32bits() && longOffsets) {
1185
1430
  U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1);
1186
- offset = OF_base[ofCode] + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
1431
+ offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
1187
1432
  if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream);
1188
1433
  if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
1189
1434
  } else {
1190
- offset = OF_base[ofCode] + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
1435
+ offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
1191
1436
  if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
1192
1437
  }
1193
1438
  }
1194
1439
 
1195
- if (ofCode <= 1) {
1196
- offset += (llCode==0);
1440
+ if (ofBits <= 1) {
1441
+ offset += (llBase==0);
1197
1442
  if (offset) {
1198
1443
  size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
1199
1444
  temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
@@ -1211,7 +1456,7 @@ seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const long
1211
1456
  seq.offset = offset;
1212
1457
  }
1213
1458
 
1214
- seq.matchLength = ML_base[mlCode] + ((mlCode>31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */
1459
+ seq.matchLength = mlBase + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */
1215
1460
  if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
1216
1461
  BIT_reloadDStream(&seqState->DStream);
1217
1462
  if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
@@ -1219,7 +1464,7 @@ seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const long
1219
1464
  /* Verify that there is enough bits to read the rest of the data in 64-bit mode. */
1220
1465
  ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
1221
1466
 
1222
- seq.litLength = LL_base[llCode] + ((llCode>15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */
1467
+ seq.litLength = llBase + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */
1223
1468
  if (MEM_32bits())
1224
1469
  BIT_reloadDStream(&seqState->DStream);
1225
1470
 
@@ -1231,98 +1476,19 @@ seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const long
1231
1476
  }
1232
1477
 
1233
1478
  /* ANS state update */
1234
- FSE_updateState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
1235
- FSE_updateState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
1479
+ ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
1480
+ ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
1236
1481
  if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
1237
- FSE_updateState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
1482
+ ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
1238
1483
 
1239
1484
  return seq;
1240
1485
  }
1241
1486
 
1242
-
1243
- HINT_INLINE
1244
- size_t ZSTD_execSequenceLong(BYTE* op,
1245
- BYTE* const oend, seq_t sequence,
1246
- const BYTE** litPtr, const BYTE* const litLimit,
1247
- const BYTE* const prefixStart, const BYTE* const dictStart, const BYTE* const dictEnd)
1248
- {
1249
- BYTE* const oLitEnd = op + sequence.litLength;
1250
- size_t const sequenceLength = sequence.litLength + sequence.matchLength;
1251
- BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
1252
- BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
1253
- const BYTE* const iLitEnd = *litPtr + sequence.litLength;
1254
- const BYTE* match = sequence.match;
1255
-
1256
- /* check */
1257
- if (oMatchEnd > oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
1258
- if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */
1259
- if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd);
1260
-
1261
- /* copy Literals */
1262
- ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */
1263
- if (sequence.litLength > 8)
1264
- ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
1265
- op = oLitEnd;
1266
- *litPtr = iLitEnd; /* update for next sequence */
1267
-
1268
- /* copy Match */
1269
- if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
1270
- /* offset beyond prefix */
1271
- if (sequence.offset > (size_t)(oLitEnd - dictStart)) return ERROR(corruption_detected);
1272
- if (match + sequence.matchLength <= dictEnd) {
1273
- memmove(oLitEnd, match, sequence.matchLength);
1274
- return sequenceLength;
1275
- }
1276
- /* span extDict & currentPrefixSegment */
1277
- { size_t const length1 = dictEnd - match;
1278
- memmove(oLitEnd, match, length1);
1279
- op = oLitEnd + length1;
1280
- sequence.matchLength -= length1;
1281
- match = prefixStart;
1282
- if (op > oend_w || sequence.matchLength < MINMATCH) {
1283
- U32 i;
1284
- for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
1285
- return sequenceLength;
1286
- }
1287
- } }
1288
- assert(op <= oend_w);
1289
- assert(sequence.matchLength >= MINMATCH);
1290
-
1291
- /* match within prefix */
1292
- if (sequence.offset < 8) {
1293
- /* close range match, overlap */
1294
- static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
1295
- static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
1296
- int const sub2 = dec64table[sequence.offset];
1297
- op[0] = match[0];
1298
- op[1] = match[1];
1299
- op[2] = match[2];
1300
- op[3] = match[3];
1301
- match += dec32table[sequence.offset];
1302
- ZSTD_copy4(op+4, match);
1303
- match -= sub2;
1304
- } else {
1305
- ZSTD_copy8(op, match);
1306
- }
1307
- op += 8; match += 8;
1308
-
1309
- if (oMatchEnd > oend-(16-MINMATCH)) {
1310
- if (op < oend_w) {
1311
- ZSTD_wildcopy(op, match, oend_w - op);
1312
- match += oend_w - op;
1313
- op = oend_w;
1314
- }
1315
- while (op < oMatchEnd) *op++ = *match++;
1316
- } else {
1317
- ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
1318
- }
1319
- return sequenceLength;
1320
- }
1321
-
1322
- static size_t ZSTD_decompressSequencesLong(
1487
+ FORCE_INLINE_TEMPLATE size_t
1488
+ ZSTD_decompressSequencesLong_body(
1323
1489
  ZSTD_DCtx* dctx,
1324
1490
  void* dst, size_t maxDstSize,
1325
- const void* seqStart, size_t seqSize,
1491
+ const void* seqStart, size_t seqSize, int nbSeq,
1326
1492
  const ZSTD_longOffset_e isLongOffset)
1327
1493
  {
1328
1494
  const BYTE* ip = (const BYTE*)seqStart;
@@ -1335,13 +1501,6 @@ static size_t ZSTD_decompressSequencesLong(
1335
1501
  const BYTE* const prefixStart = (const BYTE*) (dctx->base);
1336
1502
  const BYTE* const dictStart = (const BYTE*) (dctx->vBase);
1337
1503
  const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
1338
- int nbSeq;
1339
-
1340
- /* Build Decoding Tables */
1341
- { size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, seqSize);
1342
- if (ZSTD_isError(seqHSize)) return seqHSize;
1343
- ip += seqHSize;
1344
- }
1345
1504
 
1346
1505
  /* Regen sequences */
1347
1506
  if (nbSeq) {
@@ -1358,18 +1517,18 @@ static size_t ZSTD_decompressSequencesLong(
1358
1517
  seqState.pos = (size_t)(op-prefixStart);
1359
1518
  seqState.dictEnd = dictEnd;
1360
1519
  CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected);
1361
- FSE_initDState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1362
- FSE_initDState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1363
- FSE_initDState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
1520
+ ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1521
+ ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1522
+ ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
1364
1523
 
1365
1524
  /* prepare in advance */
1366
- for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && seqNb<seqAdvance; seqNb++) {
1525
+ for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
1367
1526
  sequences[seqNb] = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
1368
1527
  }
1369
1528
  if (seqNb<seqAdvance) return ERROR(corruption_detected);
1370
1529
 
1371
1530
  /* decode and decompress */
1372
- for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && seqNb<nbSeq ; seqNb++) {
1531
+ for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
1373
1532
  seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
1374
1533
  size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STOSEQ_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1375
1534
  if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
@@ -1389,6 +1548,9 @@ static size_t ZSTD_decompressSequencesLong(
1389
1548
 
1390
1549
  /* save reps for next block */
1391
1550
  { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
1551
+ #undef STORED_SEQS
1552
+ #undef STOSEQ_MASK
1553
+ #undef ADVANCED_SEQS
1392
1554
  }
1393
1555
 
1394
1556
  /* last literal segment */
@@ -1401,6 +1563,96 @@ static size_t ZSTD_decompressSequencesLong(
1401
1563
  return op-ostart;
1402
1564
  }
1403
1565
 
1566
+ static size_t
1567
+ ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
1568
+ void* dst, size_t maxDstSize,
1569
+ const void* seqStart, size_t seqSize, int nbSeq,
1570
+ const ZSTD_longOffset_e isLongOffset)
1571
+ {
1572
+ return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1573
+ }
1574
+
1575
+
1576
+
1577
+ #if DYNAMIC_BMI2
1578
+
1579
+ static TARGET_ATTRIBUTE("bmi2") size_t
1580
+ ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
1581
+ void* dst, size_t maxDstSize,
1582
+ const void* seqStart, size_t seqSize, int nbSeq,
1583
+ const ZSTD_longOffset_e isLongOffset)
1584
+ {
1585
+ return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1586
+ }
1587
+
1588
+ static TARGET_ATTRIBUTE("bmi2") size_t
1589
+ ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
1590
+ void* dst, size_t maxDstSize,
1591
+ const void* seqStart, size_t seqSize, int nbSeq,
1592
+ const ZSTD_longOffset_e isLongOffset)
1593
+ {
1594
+ return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1595
+ }
1596
+
1597
+ #endif
1598
+
1599
+ typedef size_t (*ZSTD_decompressSequences_t)(
1600
+ ZSTD_DCtx *dctx, void *dst, size_t maxDstSize,
1601
+ const void *seqStart, size_t seqSize, int nbSeq,
1602
+ const ZSTD_longOffset_e isLongOffset);
1603
+
1604
+ static size_t ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
1605
+ const void* seqStart, size_t seqSize, int nbSeq,
1606
+ const ZSTD_longOffset_e isLongOffset)
1607
+ {
1608
+ DEBUGLOG(5, "ZSTD_decompressSequences");
1609
+ #if DYNAMIC_BMI2
1610
+ if (dctx->bmi2) {
1611
+ return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1612
+ }
1613
+ #endif
1614
+ return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1615
+ }
1616
+
1617
+ static size_t ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
1618
+ void* dst, size_t maxDstSize,
1619
+ const void* seqStart, size_t seqSize, int nbSeq,
1620
+ const ZSTD_longOffset_e isLongOffset)
1621
+ {
1622
+ DEBUGLOG(5, "ZSTD_decompressSequencesLong");
1623
+ #if DYNAMIC_BMI2
1624
+ if (dctx->bmi2) {
1625
+ return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1626
+ }
1627
+ #endif
1628
+ return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1629
+ }
1630
+
1631
+ /* ZSTD_getLongOffsetsShare() :
1632
+ * condition : offTable must be valid
1633
+ * @return : "share" of long offsets (arbitrarily defined as > (1<<23))
1634
+ * compared to maximum possible of (1<<OffFSELog) */
1635
+ static unsigned
1636
+ ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)
1637
+ {
1638
+ const void* ptr = offTable;
1639
+ U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
1640
+ const ZSTD_seqSymbol* table = offTable + 1;
1641
+ U32 const max = 1 << tableLog;
1642
+ U32 u, total = 0;
1643
+ DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
1644
+
1645
+ assert(max <= (1 << OffFSELog)); /* max not too large */
1646
+ for (u=0; u<max; u++) {
1647
+ if (table[u].nbAdditionalBits > 22) total += 1;
1648
+ }
1649
+
1650
+ assert(tableLog <= OffFSELog);
1651
+ total <<= (OffFSELog - tableLog); /* scale to OffFSELog */
1652
+
1653
+ return total;
1654
+ }
1655
+
1404
1656
 
1405
1657
  static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
1406
1658
  void* dst, size_t dstCapacity,
@@ -1410,13 +1662,9 @@ static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
1410
1662
  /* isLongOffset must be true if there are long offsets.
1411
1663
  * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.
1412
1664
  * We don't expect that to be the case in 64-bit mode.
1413
- * If we are in block mode we don't know the window size, so we have to be
1414
- * conservative.
1665
+ * In block mode, window size is not known, so we have to be conservative. (note: but it could be evaluated from current-lowLimit)
1415
1666
  */
1416
1667
  ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN)));
1417
- /* windowSize could be any value at this point, since it is only validated
1418
- * in the streaming API.
1419
- */
1420
1668
  DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
1421
1669
 
1422
1670
  if (srcSize >= ZSTD_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);
@@ -1428,9 +1676,24 @@ static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
1428
1676
  ip += litCSize;
1429
1677
  srcSize -= litCSize;
1430
1678
  }
1431
- if (frame && dctx->fParams.windowSize > (1<<23))
1432
- return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, isLongOffset);
1433
- return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, isLongOffset);
1679
+
1680
+ /* Build Decoding Tables */
1681
+ { int nbSeq;
1682
+ size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
1683
+ if (ZSTD_isError(seqHSize)) return seqHSize;
1684
+ ip += seqHSize;
1685
+ srcSize -= seqHSize;
1686
+
1687
+ if ( (!frame || dctx->fParams.windowSize > (1<<24))
1688
+ && (nbSeq>0) ) { /* could probably use a larger nbSeq limit */
1689
+ U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr);
1690
+ U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
1691
+ if (shareLongOffsets >= minShare)
1692
+ return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
1693
+ }
1694
+
1695
+ return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
1696
+ }
1434
1697
  }
1435
1698
 
1436
1699
 
@@ -1758,7 +2021,7 @@ static int ZSTD_isSkipFrame(ZSTD_DCtx* dctx) { return dctx->stage == ZSTDds_skip
1758
2021
  * or an error code, which can be tested using ZSTD_isError() */
1759
2022
  size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
1760
2023
  {
1761
- DEBUGLOG(5, "ZSTD_decompressContinue");
2024
+ DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (U32)srcSize);
1762
2025
  /* Sanity check */
1763
2026
  if (srcSize != dctx->expected) return ERROR(srcSize_wrong); /* not allowed */
1764
2027
  if (dstCapacity) ZSTD_checkContinuity(dctx, dst);
@@ -1819,12 +2082,12 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
1819
2082
 
1820
2083
  case ZSTDds_decompressLastBlock:
1821
2084
  case ZSTDds_decompressBlock:
1822
- DEBUGLOG(5, "case ZSTDds_decompressBlock");
2085
+ DEBUGLOG(5, "ZSTD_decompressContinue: case ZSTDds_decompressBlock");
1823
2086
  { size_t rSize;
1824
2087
  switch(dctx->bType)
1825
2088
  {
1826
2089
  case bt_compressed:
1827
- DEBUGLOG(5, "case bt_compressed");
2090
+ DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed");
1828
2091
  rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1);
1829
2092
  break;
1830
2093
  case bt_raw :
@@ -1838,12 +2101,12 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
1838
2101
  return ERROR(corruption_detected);
1839
2102
  }
1840
2103
  if (ZSTD_isError(rSize)) return rSize;
1841
- DEBUGLOG(5, "decoded size from block : %u", (U32)rSize);
2104
+ DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (U32)rSize);
1842
2105
  dctx->decodedSize += rSize;
1843
2106
  if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize);
1844
2107
 
1845
2108
  if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */
1846
- DEBUGLOG(4, "decoded size from frame : %u", (U32)dctx->decodedSize);
2109
+ DEBUGLOG(4, "ZSTD_decompressContinue: decoded size from frame : %u", (U32)dctx->decodedSize);
1847
2110
  if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) {
1848
2111
  if (dctx->decodedSize != dctx->fParams.frameContentSize) {
1849
2112
  return ERROR(corruption_detected);
@@ -1867,7 +2130,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
1867
2130
  assert(srcSize == 4); /* guaranteed by dctx->expected */
1868
2131
  { U32 const h32 = (U32)XXH64_digest(&dctx->xxhState);
1869
2132
  U32 const check32 = MEM_readLE32(src);
1870
- DEBUGLOG(4, "checksum : calculated %08X :: %08X read", h32, check32);
2133
+ DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", h32, check32);
1871
2134
  if (check32 != h32) return ERROR(checksum_wrong);
1872
2135
  dctx->expected = 0;
1873
2136
  dctx->stage = ZSTDds_getFrameHeaderSize;
@@ -1925,8 +2188,12 @@ static size_t ZSTD_loadEntropy(ZSTD_entropyDTables_t* entropy, const void* const
1925
2188
  U32 offcodeMaxValue = MaxOff, offcodeLog;
1926
2189
  size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
1927
2190
  if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
2191
+ if (offcodeMaxValue > MaxOff) return ERROR(dictionary_corrupted);
1928
2192
  if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
1929
- CHECK_E(FSE_buildDTable(entropy->OFTable, offcodeNCount, offcodeMaxValue, offcodeLog), dictionary_corrupted);
2193
+ ZSTD_buildFSETable(entropy->OFTable,
2194
+ offcodeNCount, offcodeMaxValue,
2195
+ OF_base, OF_bits,
2196
+ offcodeLog);
1930
2197
  dictPtr += offcodeHeaderSize;
1931
2198
  }
1932
2199
 
@@ -1934,8 +2201,12 @@ static size_t ZSTD_loadEntropy(ZSTD_entropyDTables_t* entropy, const void* const
1934
2201
  unsigned matchlengthMaxValue = MaxML, matchlengthLog;
1935
2202
  size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
1936
2203
  if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
2204
+ if (matchlengthMaxValue > MaxML) return ERROR(dictionary_corrupted);
1937
2205
  if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
1938
- CHECK_E(FSE_buildDTable(entropy->MLTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog), dictionary_corrupted);
2206
+ ZSTD_buildFSETable(entropy->MLTable,
2207
+ matchlengthNCount, matchlengthMaxValue,
2208
+ ML_base, ML_bits,
2209
+ matchlengthLog);
1939
2210
  dictPtr += matchlengthHeaderSize;
1940
2211
  }
1941
2212
 
@@ -1943,8 +2214,12 @@ static size_t ZSTD_loadEntropy(ZSTD_entropyDTables_t* entropy, const void* const
1943
2214
  unsigned litlengthMaxValue = MaxLL, litlengthLog;
1944
2215
  size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
1945
2216
  if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
2217
+ if (litlengthMaxValue > MaxLL) return ERROR(dictionary_corrupted);
1946
2218
  if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
1947
- CHECK_E(FSE_buildDTable(entropy->LLTable, litlengthNCount, litlengthMaxValue, litlengthLog), dictionary_corrupted);
2219
+ ZSTD_buildFSETable(entropy->LLTable,
2220
+ litlengthNCount, litlengthMaxValue,
2221
+ LL_base, LL_bits,
2222
+ litlengthLog);
1948
2223
  dictPtr += litlengthHeaderSize;
1949
2224
  }
1950
2225
 
@@ -2062,13 +2337,23 @@ size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dstDCtx, const ZSTD_DDict* ddi
2062
2337
  return 0;
2063
2338
  }
2064
2339
 
2065
- static size_t ZSTD_loadEntropy_inDDict(ZSTD_DDict* ddict)
2340
+ static size_t ZSTD_loadEntropy_inDDict(ZSTD_DDict* ddict, ZSTD_dictContentType_e dictContentType)
2066
2341
  {
2067
2342
  ddict->dictID = 0;
2068
2343
  ddict->entropyPresent = 0;
2069
- if (ddict->dictSize < 8) return 0;
2344
+ if (dictContentType == ZSTD_dct_rawContent) return 0;
2345
+
2346
+ if (ddict->dictSize < 8) {
2347
+ if (dictContentType == ZSTD_dct_fullDict)
2348
+ return ERROR(dictionary_corrupted); /* only accept specified dictionaries */
2349
+ return 0; /* pure content mode */
2350
+ }
2070
2351
  { U32 const magic = MEM_readLE32(ddict->dictContent);
2071
- if (magic != ZSTD_MAGIC_DICTIONARY) return 0; /* pure content mode */
2352
+ if (magic != ZSTD_MAGIC_DICTIONARY) {
2353
+ if (dictContentType == ZSTD_dct_fullDict)
2354
+ return ERROR(dictionary_corrupted); /* only accept specified dictionaries */
2355
+ return 0; /* pure content mode */
2356
+ }
2072
2357
  }
2073
2358
  ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_frameIdSize);
2074
2359
 
@@ -2079,7 +2364,10 @@ static size_t ZSTD_loadEntropy_inDDict(ZSTD_DDict* ddict)
2079
2364
  }
2080
2365
 
2081
2366
 
2082
- static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod)
2367
+ static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
2368
+ const void* dict, size_t dictSize,
2369
+ ZSTD_dictLoadMethod_e dictLoadMethod,
2370
+ ZSTD_dictContentType_e dictContentType)
2083
2371
  {
2084
2372
  if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) {
2085
2373
  ddict->dictBuffer = NULL;
@@ -2095,12 +2383,15 @@ static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict, const void* dict, size_
2095
2383
  ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */
2096
2384
 
2097
2385
  /* parse dictionary content */
2098
- CHECK_F( ZSTD_loadEntropy_inDDict(ddict) );
2386
+ CHECK_F( ZSTD_loadEntropy_inDDict(ddict, dictContentType) );
2099
2387
 
2100
2388
  return 0;
2101
2389
  }
2102
2390
 
2103
- ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_customMem customMem)
2391
+ ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
2392
+ ZSTD_dictLoadMethod_e dictLoadMethod,
2393
+ ZSTD_dictContentType_e dictContentType,
2394
+ ZSTD_customMem customMem)
2104
2395
  {
2105
2396
  if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
2106
2397
 
@@ -2108,7 +2399,7 @@ ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, ZSTD_di
2108
2399
  if (!ddict) return NULL;
2109
2400
  ddict->cMem = customMem;
2110
2401
 
2111
- if (ZSTD_isError( ZSTD_initDDict_internal(ddict, dict, dictSize, dictLoadMethod) )) {
2402
+ if (ZSTD_isError( ZSTD_initDDict_internal(ddict, dict, dictSize, dictLoadMethod, dictContentType) )) {
2112
2403
  ZSTD_freeDDict(ddict);
2113
2404
  return NULL;
2114
2405
  }
@@ -2124,7 +2415,7 @@ ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, ZSTD_di
2124
2415
  ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
2125
2416
  {
2126
2417
  ZSTD_customMem const allocator = { NULL, NULL, NULL };
2127
- return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, allocator);
2418
+ return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator);
2128
2419
  }
2129
2420
 
2130
2421
  /*! ZSTD_createDDict_byReference() :
@@ -2134,13 +2425,15 @@ ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
2134
2425
  ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
2135
2426
  {
2136
2427
  ZSTD_customMem const allocator = { NULL, NULL, NULL };
2137
- return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, allocator);
2428
+ return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator);
2138
2429
  }
2139
2430
 
2140
2431
 
2141
- ZSTD_DDict* ZSTD_initStaticDDict(void* workspace, size_t workspaceSize,
2142
- const void* dict, size_t dictSize,
2143
- ZSTD_dictLoadMethod_e dictLoadMethod)
2432
+ const ZSTD_DDict* ZSTD_initStaticDDict(
2433
+ void* workspace, size_t workspaceSize,
2434
+ const void* dict, size_t dictSize,
2435
+ ZSTD_dictLoadMethod_e dictLoadMethod,
2436
+ ZSTD_dictContentType_e dictContentType)
2144
2437
  {
2145
2438
  size_t const neededSpace =
2146
2439
  sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
@@ -2153,7 +2446,7 @@ ZSTD_DDict* ZSTD_initStaticDDict(void* workspace, size_t workspaceSize,
2153
2446
  memcpy(ddict+1, dict, dictSize); /* local copy */
2154
2447
  dict = ddict+1;
2155
2448
  }
2156
- if (ZSTD_isError( ZSTD_initDDict_internal(ddict, dict, dictSize, ZSTD_dlm_byRef) ))
2449
+ if (ZSTD_isError( ZSTD_initDDict_internal(ddict, dict, dictSize, ZSTD_dlm_byRef, dictContentType) ))
2157
2450
  return NULL;
2158
2451
  return ddict;
2159
2452
  }
@@ -2247,6 +2540,7 @@ size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
2247
2540
 
2248
2541
  ZSTD_DStream* ZSTD_createDStream(void)
2249
2542
  {
2543
+ DEBUGLOG(3, "ZSTD_createDStream");
2250
2544
  return ZSTD_createDStream_advanced(ZSTD_defaultCMem);
2251
2545
  }
2252
2546
 
@@ -2271,58 +2565,99 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds)
2271
2565
  size_t ZSTD_DStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize; }
2272
2566
  size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_MAX; }
2273
2567
 
2274
- size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize)
2568
+ size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType)
2275
2569
  {
2276
- zds->streamStage = zdss_loadHeader;
2277
- zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
2278
- ZSTD_freeDDict(zds->ddictLocal);
2570
+ if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
2571
+ ZSTD_freeDDict(dctx->ddictLocal);
2279
2572
  if (dict && dictSize >= 8) {
2280
- zds->ddictLocal = ZSTD_createDDict(dict, dictSize);
2281
- if (zds->ddictLocal == NULL) return ERROR(memory_allocation);
2282
- } else zds->ddictLocal = NULL;
2283
- zds->ddict = zds->ddictLocal;
2284
- zds->legacyVersion = 0;
2285
- zds->hostageByte = 0;
2573
+ dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem);
2574
+ if (dctx->ddictLocal == NULL) return ERROR(memory_allocation);
2575
+ } else {
2576
+ dctx->ddictLocal = NULL;
2577
+ }
2578
+ dctx->ddict = dctx->ddictLocal;
2579
+ return 0;
2580
+ }
2581
+
2582
+ size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
2583
+ {
2584
+ return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto);
2585
+ }
2586
+
2587
+ size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
2588
+ {
2589
+ return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto);
2590
+ }
2591
+
2592
+ size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType)
2593
+ {
2594
+ return ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType);
2595
+ }
2596
+
2597
+ size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize)
2598
+ {
2599
+ return ZSTD_DCtx_refPrefix_advanced(dctx, prefix, prefixSize, ZSTD_dct_rawContent);
2600
+ }
2601
+
2602
+
2603
+ /* ZSTD_initDStream_usingDict() :
2604
+ * return : expected size, aka ZSTD_frameHeaderSize_prefix.
2605
+ * this function cannot fail */
2606
+ size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize)
2607
+ {
2608
+ DEBUGLOG(4, "ZSTD_initDStream_usingDict");
2609
+ zds->streamStage = zdss_init;
2610
+ CHECK_F( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) );
2286
2611
  return ZSTD_frameHeaderSize_prefix;
2287
2612
  }
2288
2613
 
2289
2614
  /* note : this variant can't fail */
2290
2615
  size_t ZSTD_initDStream(ZSTD_DStream* zds)
2291
2616
  {
2617
+ DEBUGLOG(4, "ZSTD_initDStream");
2292
2618
  return ZSTD_initDStream_usingDict(zds, NULL, 0);
2293
2619
  }
2294
2620
 
2621
+ size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
2622
+ {
2623
+ if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
2624
+ dctx->ddict = ddict;
2625
+ return 0;
2626
+ }
2627
+
2295
2628
  /* ZSTD_initDStream_usingDDict() :
2296
2629
  * ddict will just be referenced, and must outlive decompression session
2297
2630
  * this function cannot fail */
2298
- size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict)
2631
+ size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict)
2299
2632
  {
2300
- size_t const initResult = ZSTD_initDStream(zds);
2301
- zds->ddict = ddict;
2633
+ size_t const initResult = ZSTD_initDStream(dctx);
2634
+ dctx->ddict = ddict;
2302
2635
  return initResult;
2303
2636
  }
2304
2637
 
2305
- size_t ZSTD_resetDStream(ZSTD_DStream* zds)
2638
+ /* ZSTD_resetDStream() :
2639
+ * return : expected size, aka ZSTD_frameHeaderSize_prefix.
2640
+ * this function cannot fail */
2641
+ size_t ZSTD_resetDStream(ZSTD_DStream* dctx)
2306
2642
  {
2307
- zds->streamStage = zdss_loadHeader;
2308
- zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
2309
- zds->legacyVersion = 0;
2310
- zds->hostageByte = 0;
2643
+ DEBUGLOG(4, "ZSTD_resetDStream");
2644
+ dctx->streamStage = zdss_loadHeader;
2645
+ dctx->lhSize = dctx->inPos = dctx->outStart = dctx->outEnd = 0;
2646
+ dctx->legacyVersion = 0;
2647
+ dctx->hostageByte = 0;
2311
2648
  return ZSTD_frameHeaderSize_prefix;
2312
2649
  }
2313
2650
 
2314
- size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds,
2651
+ size_t ZSTD_setDStreamParameter(ZSTD_DStream* dctx,
2315
2652
  ZSTD_DStreamParameter_e paramType, unsigned paramValue)
2316
2653
  {
2317
- ZSTD_STATIC_ASSERT((unsigned)zdss_loadHeader >= (unsigned)zdss_init);
2318
- if ((unsigned)zds->streamStage > (unsigned)zdss_loadHeader)
2319
- return ERROR(stage_wrong);
2654
+ if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
2320
2655
  switch(paramType)
2321
2656
  {
2322
2657
  default : return ERROR(parameter_unsupported);
2323
2658
  case DStream_p_maxWindowSize :
2324
2659
  DEBUGLOG(4, "setting maxWindowSize = %u KB", paramValue >> 10);
2325
- zds->maxWindowSize = paramValue ? paramValue : (U32)(-1);
2660
+ dctx->maxWindowSize = paramValue ? paramValue : (U32)(-1);
2326
2661
  break;
2327
2662
  }
2328
2663
  return 0;
@@ -2330,9 +2665,7 @@ size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds,
2330
2665
 
2331
2666
  size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize)
2332
2667
  {
2333
- ZSTD_STATIC_ASSERT((unsigned)zdss_loadHeader >= (unsigned)zdss_init);
2334
- if ((unsigned)dctx->streamStage > (unsigned)zdss_loadHeader)
2335
- return ERROR(stage_wrong);
2668
+ if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
2336
2669
  dctx->maxWindowSize = maxWindowSize;
2337
2670
  return 0;
2338
2671
  }
@@ -2340,17 +2673,15 @@ size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize)
2340
2673
  size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format)
2341
2674
  {
2342
2675
  DEBUGLOG(4, "ZSTD_DCtx_setFormat : %u", (unsigned)format);
2343
- ZSTD_STATIC_ASSERT((unsigned)zdss_loadHeader >= (unsigned)zdss_init);
2344
- if ((unsigned)dctx->streamStage > (unsigned)zdss_loadHeader)
2345
- return ERROR(stage_wrong);
2676
+ if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
2346
2677
  dctx->format = format;
2347
2678
  return 0;
2348
2679
  }
2349
2680
 
2350
2681
 
2351
- size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds)
2682
+ size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx)
2352
2683
  {
2353
- return ZSTD_sizeof_DCtx(zds);
2684
+ return ZSTD_sizeof_DCtx(dctx);
2354
2685
  }
2355
2686
 
2356
2687
  size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize)
@@ -2417,23 +2748,25 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
2417
2748
  }
2418
2749
  DEBUGLOG(5, "input size : %u", (U32)(input->size - input->pos));
2419
2750
 
2420
- #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
2421
- if (zds->legacyVersion) {
2422
- /* legacy support is incompatible with static dctx */
2423
- if (zds->staticSize) return ERROR(memory_allocation);
2424
- return ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input);
2425
- }
2426
- #endif
2427
-
2428
2751
  while (someMoreWork) {
2429
2752
  switch(zds->streamStage)
2430
2753
  {
2431
2754
  case zdss_init :
2755
+ DEBUGLOG(5, "stage zdss_init => transparent reset ");
2432
2756
  ZSTD_resetDStream(zds); /* transparent reset on starting decoding a new frame */
2433
2757
  /* fall-through */
2434
2758
 
2435
2759
  case zdss_loadHeader :
2436
2760
  DEBUGLOG(5, "stage zdss_loadHeader (srcSize : %u)", (U32)(iend - ip));
2761
+ #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
2762
+ if (zds->legacyVersion) {
2763
+ /* legacy support is incompatible with static dctx */
2764
+ if (zds->staticSize) return ERROR(memory_allocation);
2765
+ { size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input);
2766
+ if (hint==0) zds->streamStage = zdss_init;
2767
+ return hint;
2768
+ } }
2769
+ #endif
2437
2770
  { size_t const hSize = ZSTD_getFrameHeader_internal(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format);
2438
2771
  DEBUGLOG(5, "header size : %u", (U32)hSize);
2439
2772
  if (ZSTD_isError(hSize)) {
@@ -2442,14 +2775,17 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
2442
2775
  if (legacyVersion) {
2443
2776
  const void* const dict = zds->ddict ? zds->ddict->dictContent : NULL;
2444
2777
  size_t const dictSize = zds->ddict ? zds->ddict->dictSize : 0;
2778
+ DEBUGLOG(5, "ZSTD_decompressStream: detected legacy version v0.%u", legacyVersion);
2445
2779
  /* legacy support is incompatible with static dctx */
2446
2780
  if (zds->staticSize) return ERROR(memory_allocation);
2447
2781
  CHECK_F(ZSTD_initLegacyStream(&zds->legacyContext,
2448
2782
  zds->previousLegacyVersion, legacyVersion,
2449
2783
  dict, dictSize));
2450
2784
  zds->legacyVersion = zds->previousLegacyVersion = legacyVersion;
2451
- return ZSTD_decompressLegacyStream(zds->legacyContext, legacyVersion, output, input);
2452
- }
2785
+ { size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, legacyVersion, output, input);
2786
+ if (hint==0) zds->streamStage = zdss_init; /* or stay in stage zdss_loadHeader */
2787
+ return hint;
2788
+ } }
2453
2789
  #endif
2454
2790
  return hSize; /* error */
2455
2791
  }
@@ -2559,6 +2895,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
2559
2895
  if (ip==iend) { someMoreWork = 0; break; } /* no more input */
2560
2896
  zds->streamStage = zdss_load;
2561
2897
  /* fall-through */
2898
+
2562
2899
  case zdss_load:
2563
2900
  { size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds);
2564
2901
  size_t const toLoad = neededInSize - zds->inPos;
@@ -2585,6 +2922,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
2585
2922
  } }
2586
2923
  zds->streamStage = zdss_flush;
2587
2924
  /* fall-through */
2925
+
2588
2926
  case zdss_flush:
2589
2927
  { size_t const toFlushSize = zds->outEnd - zds->outStart;
2590
2928
  size_t const flushedSize = ZSTD_limitCopy(op, oend-op, zds->outBuff + zds->outStart, toFlushSize);
@@ -2631,8 +2969,8 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
2631
2969
  return 1;
2632
2970
  } /* nextSrcSizeHint==0 */
2633
2971
  nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zds) == ZSTDnit_block); /* preload header of next block */
2634
- if (zds->inPos > nextSrcSizeHint) return ERROR(GENERIC); /* should never happen */
2635
- nextSrcSizeHint -= zds->inPos; /* already loaded*/
2972
+ assert(zds->inPos <= nextSrcSizeHint);
2973
+ nextSrcSizeHint -= zds->inPos; /* part already loaded*/
2636
2974
  return nextSrcSizeHint;
2637
2975
  }
2638
2976
  }