zstd-ruby 1.3.3.0 → 1.3.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. checksums.yaml +5 -5
  2. data/README.md +1 -1
  3. data/ext/zstdruby/libzstd/BUCK +13 -0
  4. data/ext/zstdruby/libzstd/README.md +32 -25
  5. data/ext/zstdruby/libzstd/common/bitstream.h +1 -1
  6. data/ext/zstdruby/libzstd/common/compiler.h +25 -0
  7. data/ext/zstdruby/libzstd/common/cpu.h +216 -0
  8. data/ext/zstdruby/libzstd/common/error_private.c +1 -0
  9. data/ext/zstdruby/libzstd/common/fse.h +1 -1
  10. data/ext/zstdruby/libzstd/common/fse_decompress.c +2 -2
  11. data/ext/zstdruby/libzstd/common/huf.h +114 -89
  12. data/ext/zstdruby/libzstd/common/pool.c +46 -17
  13. data/ext/zstdruby/libzstd/common/pool.h +18 -9
  14. data/ext/zstdruby/libzstd/common/threading.h +12 -12
  15. data/ext/zstdruby/libzstd/common/zstd_errors.h +16 -7
  16. data/ext/zstdruby/libzstd/common/zstd_internal.h +4 -5
  17. data/ext/zstdruby/libzstd/compress/fse_compress.c +19 -11
  18. data/ext/zstdruby/libzstd/compress/huf_compress.c +160 -62
  19. data/ext/zstdruby/libzstd/compress/zstd_compress.c +973 -644
  20. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +281 -34
  21. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +80 -62
  22. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +11 -4
  23. data/ext/zstdruby/libzstd/compress/zstd_fast.c +87 -71
  24. data/ext/zstdruby/libzstd/compress/zstd_fast.h +10 -6
  25. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +333 -274
  26. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +33 -16
  27. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +305 -359
  28. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +64 -21
  29. data/ext/zstdruby/libzstd/compress/zstd_opt.c +194 -56
  30. data/ext/zstdruby/libzstd/compress/zstd_opt.h +17 -5
  31. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +1131 -449
  32. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +32 -16
  33. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +390 -290
  34. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +777 -439
  35. data/ext/zstdruby/libzstd/dictBuilder/cover.c +11 -8
  36. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +83 -50
  37. data/ext/zstdruby/libzstd/dictBuilder/zdict.h +44 -43
  38. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +2 -0
  39. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +42 -118
  40. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +2 -2
  41. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +2 -2
  42. data/ext/zstdruby/libzstd/zstd.h +254 -254
  43. data/lib/zstd-ruby/version.rb +1 -1
  44. metadata +4 -3
@@ -14,8 +14,9 @@
14
14
  *****************************************************************/
15
15
  /*!
16
16
  * HEAPMODE :
17
- * Select how default decompression function ZSTD_decompress() will allocate memory,
18
- * in memory stack (0), or in memory heap (1, requires malloc())
17
+ * Select how default decompression function ZSTD_decompress() allocates its context,
18
+ * on stack (0), or into heap (1, default; requires malloc()).
19
+ * Note that functions with explicit context such as ZSTD_decompressDCtx() are unaffected.
19
20
  */
20
21
  #ifndef ZSTD_HEAPMODE
21
22
  # define ZSTD_HEAPMODE 1
@@ -23,17 +24,18 @@
23
24
 
24
25
  /*!
25
26
  * LEGACY_SUPPORT :
26
- * if set to 1, ZSTD_decompress() can decode older formats (v0.1+)
27
+ * if set to 1+, ZSTD_decompress() can decode older formats (v0.1+)
27
28
  */
28
29
  #ifndef ZSTD_LEGACY_SUPPORT
29
30
  # define ZSTD_LEGACY_SUPPORT 0
30
31
  #endif
31
32
 
32
33
  /*!
33
- * MAXWINDOWSIZE_DEFAULT :
34
- * maximum window size accepted by DStream, by default.
35
- * Frames requiring more memory will be rejected.
36
- */
34
+ * MAXWINDOWSIZE_DEFAULT :
35
+ * maximum window size accepted by DStream __by default__.
36
+ * Frames requiring more memory will be rejected.
37
+ * It's possible to set a different limit using ZSTD_DCtx_setMaxWindowSize().
38
+ */
37
39
  #ifndef ZSTD_MAXWINDOWSIZE_DEFAULT
38
40
  # define ZSTD_MAXWINDOWSIZE_DEFAULT (((U32)1 << ZSTD_WINDOWLOG_DEFAULTMAX) + 1)
39
41
  #endif
@@ -43,6 +45,7 @@
43
45
  * Dependencies
44
46
  *********************************************************/
45
47
  #include <string.h> /* memcpy, memmove, memset */
48
+ #include "cpu.h"
46
49
  #include "mem.h" /* low level memory routines */
47
50
  #define FSE_STATIC_LINKING_ONLY
48
51
  #include "fse.h"
@@ -80,10 +83,25 @@ typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
80
83
  typedef enum { zdss_init=0, zdss_loadHeader,
81
84
  zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage;
82
85
 
86
+
87
+ typedef struct {
88
+ U32 fastMode;
89
+ U32 tableLog;
90
+ } ZSTD_seqSymbol_header;
91
+
92
+ typedef struct {
93
+ U16 nextState;
94
+ BYTE nbAdditionalBits;
95
+ BYTE nbBits;
96
+ U32 baseValue;
97
+ } ZSTD_seqSymbol;
98
+
99
+ #define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log)))
100
+
83
101
  typedef struct {
84
- FSE_DTable LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
85
- FSE_DTable OFTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
86
- FSE_DTable MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
102
+ ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)];
103
+ ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)];
104
+ ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)];
87
105
  HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
88
106
  U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
89
107
  U32 rep[ZSTD_REP_NUM];
@@ -91,9 +109,9 @@ typedef struct {
91
109
 
92
110
  struct ZSTD_DCtx_s
93
111
  {
94
- const FSE_DTable* LLTptr;
95
- const FSE_DTable* MLTptr;
96
- const FSE_DTable* OFTptr;
112
+ const ZSTD_seqSymbol* LLTptr;
113
+ const ZSTD_seqSymbol* MLTptr;
114
+ const ZSTD_seqSymbol* OFTptr;
97
115
  const HUF_DTable* HUFptr;
98
116
  ZSTD_entropyDTables_t entropy;
99
117
  const void* previousDstEnd; /* detect continuity */
@@ -116,6 +134,7 @@ struct ZSTD_DCtx_s
116
134
  size_t litSize;
117
135
  size_t rleSize;
118
136
  size_t staticSize;
137
+ int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
119
138
 
120
139
  /* streaming */
121
140
  ZSTD_DDict* ddictLocal;
@@ -173,6 +192,7 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
173
192
  dctx->inBuffSize = 0;
174
193
  dctx->outBuffSize = 0;
175
194
  dctx->streamStage = zdss_init;
195
+ dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
176
196
  }
177
197
 
178
198
  ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize)
@@ -204,6 +224,7 @@ ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem)
204
224
 
205
225
  ZSTD_DCtx* ZSTD_createDCtx(void)
206
226
  {
227
+ DEBUGLOG(3, "ZSTD_createDCtx");
207
228
  return ZSTD_createDCtx_advanced(ZSTD_defaultCMem);
208
229
  }
209
230
 
@@ -234,8 +255,8 @@ void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
234
255
 
235
256
 
236
257
  /*-*************************************************************
237
- * Decompression section
238
- ***************************************************************/
258
+ * Frame header decoding
259
+ ***************************************************************/
239
260
 
240
261
  /*! ZSTD_isFrame() :
241
262
  * Tells if the content of `buffer` starts with a valid Frame Identifier.
@@ -257,7 +278,7 @@ unsigned ZSTD_isFrame(const void* buffer, size_t size)
257
278
 
258
279
  /** ZSTD_frameHeaderSize_internal() :
259
280
  * srcSize must be large enough to reach header size fields.
260
- * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless
281
+ * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless.
261
282
  * @return : size of the Frame Header
262
283
  * or an error code, which can be tested with ZSTD_isError() */
263
284
  static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZSTD_format_e format)
@@ -480,6 +501,10 @@ static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t he
480
501
  }
481
502
 
482
503
 
504
+ /*-*************************************************************
505
+ * Block decoding
506
+ ***************************************************************/
507
+
483
508
  /*! ZSTD_getcBlockSize() :
484
509
  * Provides the size of compressed block from block header `src` */
485
510
  size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
@@ -566,13 +591,13 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
566
591
 
567
592
  if (HUF_isError((litEncType==set_repeat) ?
568
593
  ( singleStream ?
569
- HUF_decompress1X_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr) :
570
- HUF_decompress4X_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr) ) :
594
+ HUF_decompress1X_usingDTable_bmi2(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr, dctx->bmi2) :
595
+ HUF_decompress4X_usingDTable_bmi2(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr, dctx->bmi2) ) :
571
596
  ( singleStream ?
572
- HUF_decompress1X2_DCtx_wksp(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize,
573
- dctx->entropy.workspace, sizeof(dctx->entropy.workspace)) :
574
- HUF_decompress4X_hufOnly_wksp(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize,
575
- dctx->entropy.workspace, sizeof(dctx->entropy.workspace)))))
597
+ HUF_decompress1X2_DCtx_wksp_bmi2(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize,
598
+ dctx->entropy.workspace, sizeof(dctx->entropy.workspace), dctx->bmi2) :
599
+ HUF_decompress4X_hufOnly_wksp_bmi2(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize,
600
+ dctx->entropy.workspace, sizeof(dctx->entropy.workspace), dctx->bmi2))))
576
601
  return ERROR(corruption_detected);
577
602
 
578
603
  dctx->litPtr = dctx->litBuffer;
@@ -647,115 +672,268 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
647
672
  }
648
673
  }
649
674
 
650
-
651
- typedef union {
652
- FSE_decode_t realData;
653
- U32 alignedBy4;
654
- } FSE_decode_t4;
675
+ /* Default FSE distribution tables.
676
+ * These are pre-calculated FSE decoding tables using default distributions as defined in specification :
677
+ * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#default-distributions
678
+ * They were generated programmatically with following method :
679
+ * - start from default distributions, present in /lib/common/zstd_internal.h
680
+ * - generate tables normally, using ZSTD_buildFSETable()
681
+ * - printout the content of tables
682
+ * - pretify output, report below, test with fuzzer to ensure it's correct */
655
683
 
656
684
  /* Default FSE distribution table for Literal Lengths */
657
- static const FSE_decode_t4 LL_defaultDTable[(1<<LL_DEFAULTNORMLOG)+1] = {
658
- { { LL_DEFAULTNORMLOG, 1, 1 } }, /* header : tableLog, fastMode, fastMode */
659
- /* base, symbol, bits */
660
- { { 0, 0, 4 } }, { { 16, 0, 4 } }, { { 32, 1, 5 } }, { { 0, 3, 5 } },
661
- { { 0, 4, 5 } }, { { 0, 6, 5 } }, { { 0, 7, 5 } }, { { 0, 9, 5 } },
662
- { { 0, 10, 5 } }, { { 0, 12, 5 } }, { { 0, 14, 6 } }, { { 0, 16, 5 } },
663
- { { 0, 18, 5 } }, { { 0, 19, 5 } }, { { 0, 21, 5 } }, { { 0, 22, 5 } },
664
- { { 0, 24, 5 } }, { { 32, 25, 5 } }, { { 0, 26, 5 } }, { { 0, 27, 6 } },
665
- { { 0, 29, 6 } }, { { 0, 31, 6 } }, { { 32, 0, 4 } }, { { 0, 1, 4 } },
666
- { { 0, 2, 5 } }, { { 32, 4, 5 } }, { { 0, 5, 5 } }, { { 32, 7, 5 } },
667
- { { 0, 8, 5 } }, { { 32, 10, 5 } }, { { 0, 11, 5 } }, { { 0, 13, 6 } },
668
- { { 32, 16, 5 } }, { { 0, 17, 5 } }, { { 32, 19, 5 } }, { { 0, 20, 5 } },
669
- { { 32, 22, 5 } }, { { 0, 23, 5 } }, { { 0, 25, 4 } }, { { 16, 25, 4 } },
670
- { { 32, 26, 5 } }, { { 0, 28, 6 } }, { { 0, 30, 6 } }, { { 48, 0, 4 } },
671
- { { 16, 1, 4 } }, { { 32, 2, 5 } }, { { 32, 3, 5 } }, { { 32, 5, 5 } },
672
- { { 32, 6, 5 } }, { { 32, 8, 5 } }, { { 32, 9, 5 } }, { { 32, 11, 5 } },
673
- { { 32, 12, 5 } }, { { 0, 15, 6 } }, { { 32, 17, 5 } }, { { 32, 18, 5 } },
674
- { { 32, 20, 5 } }, { { 32, 21, 5 } }, { { 32, 23, 5 } }, { { 32, 24, 5 } },
675
- { { 0, 35, 6 } }, { { 0, 34, 6 } }, { { 0, 33, 6 } }, { { 0, 32, 6 } },
685
+ static const ZSTD_seqSymbol LL_defaultDTable[(1<<LL_DEFAULTNORMLOG)+1] = {
686
+ { 1, 1, 1, LL_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
687
+ /* nextState, nbAddBits, nbBits, baseVal */
688
+ { 0, 0, 4, 0}, { 16, 0, 4, 0},
689
+ { 32, 0, 5, 1}, { 0, 0, 5, 3},
690
+ { 0, 0, 5, 4}, { 0, 0, 5, 6},
691
+ { 0, 0, 5, 7}, { 0, 0, 5, 9},
692
+ { 0, 0, 5, 10}, { 0, 0, 5, 12},
693
+ { 0, 0, 6, 14}, { 0, 1, 5, 16},
694
+ { 0, 1, 5, 20}, { 0, 1, 5, 22},
695
+ { 0, 2, 5, 28}, { 0, 3, 5, 32},
696
+ { 0, 4, 5, 48}, { 32, 6, 5, 64},
697
+ { 0, 7, 5, 128}, { 0, 8, 6, 256},
698
+ { 0, 10, 6, 1024}, { 0, 12, 6, 4096},
699
+ { 32, 0, 4, 0}, { 0, 0, 4, 1},
700
+ { 0, 0, 5, 2}, { 32, 0, 5, 4},
701
+ { 0, 0, 5, 5}, { 32, 0, 5, 7},
702
+ { 0, 0, 5, 8}, { 32, 0, 5, 10},
703
+ { 0, 0, 5, 11}, { 0, 0, 6, 13},
704
+ { 32, 1, 5, 16}, { 0, 1, 5, 18},
705
+ { 32, 1, 5, 22}, { 0, 2, 5, 24},
706
+ { 32, 3, 5, 32}, { 0, 3, 5, 40},
707
+ { 0, 6, 4, 64}, { 16, 6, 4, 64},
708
+ { 32, 7, 5, 128}, { 0, 9, 6, 512},
709
+ { 0, 11, 6, 2048}, { 48, 0, 4, 0},
710
+ { 16, 0, 4, 1}, { 32, 0, 5, 2},
711
+ { 32, 0, 5, 3}, { 32, 0, 5, 5},
712
+ { 32, 0, 5, 6}, { 32, 0, 5, 8},
713
+ { 32, 0, 5, 9}, { 32, 0, 5, 11},
714
+ { 32, 0, 5, 12}, { 0, 0, 6, 15},
715
+ { 32, 1, 5, 18}, { 32, 1, 5, 20},
716
+ { 32, 2, 5, 24}, { 32, 2, 5, 28},
717
+ { 32, 3, 5, 40}, { 32, 4, 5, 48},
718
+ { 0, 16, 6,65536}, { 0, 15, 6,32768},
719
+ { 0, 14, 6,16384}, { 0, 13, 6, 8192},
676
720
  }; /* LL_defaultDTable */
677
721
 
722
+ /* Default FSE distribution table for Offset Codes */
723
+ static const ZSTD_seqSymbol OF_defaultDTable[(1<<OF_DEFAULTNORMLOG)+1] = {
724
+ { 1, 1, 1, OF_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
725
+ /* nextState, nbAddBits, nbBits, baseVal */
726
+ { 0, 0, 5, 0}, { 0, 6, 4, 61},
727
+ { 0, 9, 5, 509}, { 0, 15, 5,32765},
728
+ { 0, 21, 5,2097149}, { 0, 3, 5, 5},
729
+ { 0, 7, 4, 125}, { 0, 12, 5, 4093},
730
+ { 0, 18, 5,262141}, { 0, 23, 5,8388605},
731
+ { 0, 5, 5, 29}, { 0, 8, 4, 253},
732
+ { 0, 14, 5,16381}, { 0, 20, 5,1048573},
733
+ { 0, 2, 5, 1}, { 16, 7, 4, 125},
734
+ { 0, 11, 5, 2045}, { 0, 17, 5,131069},
735
+ { 0, 22, 5,4194301}, { 0, 4, 5, 13},
736
+ { 16, 8, 4, 253}, { 0, 13, 5, 8189},
737
+ { 0, 19, 5,524285}, { 0, 1, 5, 1},
738
+ { 16, 6, 4, 61}, { 0, 10, 5, 1021},
739
+ { 0, 16, 5,65533}, { 0, 28, 5,268435453},
740
+ { 0, 27, 5,134217725}, { 0, 26, 5,67108861},
741
+ { 0, 25, 5,33554429}, { 0, 24, 5,16777213},
742
+ }; /* OF_defaultDTable */
743
+
744
+
678
745
  /* Default FSE distribution table for Match Lengths */
679
- static const FSE_decode_t4 ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
680
- { { ML_DEFAULTNORMLOG, 1, 1 } }, /* header : tableLog, fastMode, fastMode */
681
- /* base, symbol, bits */
682
- { { 0, 0, 6 } }, { { 0, 1, 4 } }, { { 32, 2, 5 } }, { { 0, 3, 5 } },
683
- { { 0, 5, 5 } }, { { 0, 6, 5 } }, { { 0, 8, 5 } }, { { 0, 10, 6 } },
684
- { { 0, 13, 6 } }, { { 0, 16, 6 } }, { { 0, 19, 6 } }, { { 0, 22, 6 } },
685
- { { 0, 25, 6 } }, { { 0, 28, 6 } }, { { 0, 31, 6 } }, { { 0, 33, 6 } },
686
- { { 0, 35, 6 } }, { { 0, 37, 6 } }, { { 0, 39, 6 } }, { { 0, 41, 6 } },
687
- { { 0, 43, 6 } }, { { 0, 45, 6 } }, { { 16, 1, 4 } }, { { 0, 2, 4 } },
688
- { { 32, 3, 5 } }, { { 0, 4, 5 } }, { { 32, 6, 5 } }, { { 0, 7, 5 } },
689
- { { 0, 9, 6 } }, { { 0, 12, 6 } }, { { 0, 15, 6 } }, { { 0, 18, 6 } },
690
- { { 0, 21, 6 } }, { { 0, 24, 6 } }, { { 0, 27, 6 } }, { { 0, 30, 6 } },
691
- { { 0, 32, 6 } }, { { 0, 34, 6 } }, { { 0, 36, 6 } }, { { 0, 38, 6 } },
692
- { { 0, 40, 6 } }, { { 0, 42, 6 } }, { { 0, 44, 6 } }, { { 32, 1, 4 } },
693
- { { 48, 1, 4 } }, { { 16, 2, 4 } }, { { 32, 4, 5 } }, { { 32, 5, 5 } },
694
- { { 32, 7, 5 } }, { { 32, 8, 5 } }, { { 0, 11, 6 } }, { { 0, 14, 6 } },
695
- { { 0, 17, 6 } }, { { 0, 20, 6 } }, { { 0, 23, 6 } }, { { 0, 26, 6 } },
696
- { { 0, 29, 6 } }, { { 0, 52, 6 } }, { { 0, 51, 6 } }, { { 0, 50, 6 } },
697
- { { 0, 49, 6 } }, { { 0, 48, 6 } }, { { 0, 47, 6 } }, { { 0, 46, 6 } },
746
+ static const ZSTD_seqSymbol ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
747
+ { 1, 1, 1, ML_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
748
+ /* nextState, nbAddBits, nbBits, baseVal */
749
+ { 0, 0, 6, 3}, { 0, 0, 4, 4},
750
+ { 32, 0, 5, 5}, { 0, 0, 5, 6},
751
+ { 0, 0, 5, 8}, { 0, 0, 5, 9},
752
+ { 0, 0, 5, 11}, { 0, 0, 6, 13},
753
+ { 0, 0, 6, 16}, { 0, 0, 6, 19},
754
+ { 0, 0, 6, 22}, { 0, 0, 6, 25},
755
+ { 0, 0, 6, 28}, { 0, 0, 6, 31},
756
+ { 0, 0, 6, 34}, { 0, 1, 6, 37},
757
+ { 0, 1, 6, 41}, { 0, 2, 6, 47},
758
+ { 0, 3, 6, 59}, { 0, 4, 6, 83},
759
+ { 0, 7, 6, 131}, { 0, 9, 6, 515},
760
+ { 16, 0, 4, 4}, { 0, 0, 4, 5},
761
+ { 32, 0, 5, 6}, { 0, 0, 5, 7},
762
+ { 32, 0, 5, 9}, { 0, 0, 5, 10},
763
+ { 0, 0, 6, 12}, { 0, 0, 6, 15},
764
+ { 0, 0, 6, 18}, { 0, 0, 6, 21},
765
+ { 0, 0, 6, 24}, { 0, 0, 6, 27},
766
+ { 0, 0, 6, 30}, { 0, 0, 6, 33},
767
+ { 0, 1, 6, 35}, { 0, 1, 6, 39},
768
+ { 0, 2, 6, 43}, { 0, 3, 6, 51},
769
+ { 0, 4, 6, 67}, { 0, 5, 6, 99},
770
+ { 0, 8, 6, 259}, { 32, 0, 4, 4},
771
+ { 48, 0, 4, 4}, { 16, 0, 4, 5},
772
+ { 32, 0, 5, 7}, { 32, 0, 5, 8},
773
+ { 32, 0, 5, 10}, { 32, 0, 5, 11},
774
+ { 0, 0, 6, 14}, { 0, 0, 6, 17},
775
+ { 0, 0, 6, 20}, { 0, 0, 6, 23},
776
+ { 0, 0, 6, 26}, { 0, 0, 6, 29},
777
+ { 0, 0, 6, 32}, { 0, 16, 6,65539},
778
+ { 0, 15, 6,32771}, { 0, 14, 6,16387},
779
+ { 0, 13, 6, 8195}, { 0, 12, 6, 4099},
780
+ { 0, 11, 6, 2051}, { 0, 10, 6, 1027},
698
781
  }; /* ML_defaultDTable */
699
782
 
700
- /* Default FSE distribution table for Offset Codes */
701
- static const FSE_decode_t4 OF_defaultDTable[(1<<OF_DEFAULTNORMLOG)+1] = {
702
- { { OF_DEFAULTNORMLOG, 1, 1 } }, /* header : tableLog, fastMode, fastMode */
703
- /* base, symbol, bits */
704
- { { 0, 0, 5 } }, { { 0, 6, 4 } },
705
- { { 0, 9, 5 } }, { { 0, 15, 5 } },
706
- { { 0, 21, 5 } }, { { 0, 3, 5 } },
707
- { { 0, 7, 4 } }, { { 0, 12, 5 } },
708
- { { 0, 18, 5 } }, { { 0, 23, 5 } },
709
- { { 0, 5, 5 } }, { { 0, 8, 4 } },
710
- { { 0, 14, 5 } }, { { 0, 20, 5 } },
711
- { { 0, 2, 5 } }, { { 16, 7, 4 } },
712
- { { 0, 11, 5 } }, { { 0, 17, 5 } },
713
- { { 0, 22, 5 } }, { { 0, 4, 5 } },
714
- { { 16, 8, 4 } }, { { 0, 13, 5 } },
715
- { { 0, 19, 5 } }, { { 0, 1, 5 } },
716
- { { 16, 6, 4 } }, { { 0, 10, 5 } },
717
- { { 0, 16, 5 } }, { { 0, 28, 5 } },
718
- { { 0, 27, 5 } }, { { 0, 26, 5 } },
719
- { { 0, 25, 5 } }, { { 0, 24, 5 } },
720
- }; /* OF_defaultDTable */
783
+
784
+ static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddBits)
785
+ {
786
+ void* ptr = dt;
787
+ ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;
788
+ ZSTD_seqSymbol* const cell = dt + 1;
789
+
790
+ DTableH->tableLog = 0;
791
+ DTableH->fastMode = 0;
792
+
793
+ cell->nbBits = 0;
794
+ cell->nextState = 0;
795
+ assert(nbAddBits < 255);
796
+ cell->nbAdditionalBits = (BYTE)nbAddBits;
797
+ cell->baseValue = baseValue;
798
+ }
799
+
800
+
801
+ /* ZSTD_buildFSETable() :
802
+ * generate FSE decoding table for one symbol (ll, ml or off) */
803
+ static void
804
+ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
805
+ const short* normalizedCounter, unsigned maxSymbolValue,
806
+ const U32* baseValue, const U32* nbAdditionalBits,
807
+ unsigned tableLog)
808
+ {
809
+ ZSTD_seqSymbol* const tableDecode = dt+1;
810
+ U16 symbolNext[MaxSeq+1];
811
+
812
+ U32 const maxSV1 = maxSymbolValue + 1;
813
+ U32 const tableSize = 1 << tableLog;
814
+ U32 highThreshold = tableSize-1;
815
+
816
+ /* Sanity Checks */
817
+ assert(maxSymbolValue <= MaxSeq);
818
+ assert(tableLog <= MaxFSELog);
819
+
820
+ /* Init, lay down lowprob symbols */
821
+ { ZSTD_seqSymbol_header DTableH;
822
+ DTableH.tableLog = tableLog;
823
+ DTableH.fastMode = 1;
824
+ { S16 const largeLimit= (S16)(1 << (tableLog-1));
825
+ U32 s;
826
+ for (s=0; s<maxSV1; s++) {
827
+ if (normalizedCounter[s]==-1) {
828
+ tableDecode[highThreshold--].baseValue = s;
829
+ symbolNext[s] = 1;
830
+ } else {
831
+ if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
832
+ symbolNext[s] = normalizedCounter[s];
833
+ } } }
834
+ memcpy(dt, &DTableH, sizeof(DTableH));
835
+ }
836
+
837
+ /* Spread symbols */
838
+ { U32 const tableMask = tableSize-1;
839
+ U32 const step = FSE_TABLESTEP(tableSize);
840
+ U32 s, position = 0;
841
+ for (s=0; s<maxSV1; s++) {
842
+ int i;
843
+ for (i=0; i<normalizedCounter[s]; i++) {
844
+ tableDecode[position].baseValue = s;
845
+ position = (position + step) & tableMask;
846
+ while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
847
+ } }
848
+ assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
849
+ }
850
+
851
+ /* Build Decoding table */
852
+ { U32 u;
853
+ for (u=0; u<tableSize; u++) {
854
+ U32 const symbol = tableDecode[u].baseValue;
855
+ U32 const nextState = symbolNext[symbol]++;
856
+ tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
857
+ tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
858
+ assert(nbAdditionalBits[symbol] < 255);
859
+ tableDecode[u].nbAdditionalBits = (BYTE)nbAdditionalBits[symbol];
860
+ tableDecode[u].baseValue = baseValue[symbol];
861
+ } }
862
+ }
863
+
721
864
 
722
865
  /*! ZSTD_buildSeqTable() :
723
866
  * @return : nb bytes read from src,
724
- * or an error code if it fails, testable with ZSTD_isError()
725
- */
726
- static size_t ZSTD_buildSeqTable(FSE_DTable* DTableSpace, const FSE_DTable** DTablePtr,
867
+ * or an error code if it fails */
868
+ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
727
869
  symbolEncodingType_e type, U32 max, U32 maxLog,
728
870
  const void* src, size_t srcSize,
729
- const FSE_decode_t4* defaultTable, U32 flagRepeatTable)
871
+ const U32* baseValue, const U32* nbAdditionalBits,
872
+ const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable)
730
873
  {
731
- const void* const tmpPtr = defaultTable; /* bypass strict aliasing */
732
874
  switch(type)
733
875
  {
734
876
  case set_rle :
735
877
  if (!srcSize) return ERROR(srcSize_wrong);
736
878
  if ( (*(const BYTE*)src) > max) return ERROR(corruption_detected);
737
- FSE_buildDTable_rle(DTableSpace, *(const BYTE*)src);
879
+ { U32 const symbol = *(const BYTE*)src;
880
+ U32 const baseline = baseValue[symbol];
881
+ U32 const nbBits = nbAdditionalBits[symbol];
882
+ ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits);
883
+ }
738
884
  *DTablePtr = DTableSpace;
739
885
  return 1;
740
886
  case set_basic :
741
- *DTablePtr = (const FSE_DTable*)tmpPtr;
887
+ *DTablePtr = defaultTable;
742
888
  return 0;
743
889
  case set_repeat:
744
890
  if (!flagRepeatTable) return ERROR(corruption_detected);
745
891
  return 0;
746
- default : /* impossible */
747
892
  case set_compressed :
748
893
  { U32 tableLog;
749
894
  S16 norm[MaxSeq+1];
750
895
  size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
751
896
  if (FSE_isError(headerSize)) return ERROR(corruption_detected);
752
897
  if (tableLog > maxLog) return ERROR(corruption_detected);
753
- FSE_buildDTable(DTableSpace, norm, max, tableLog);
898
+ ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog);
754
899
  *DTablePtr = DTableSpace;
755
900
  return headerSize;
756
- } }
901
+ }
902
+ default : /* impossible */
903
+ assert(0);
904
+ return ERROR(GENERIC);
905
+ }
757
906
  }
758
907
 
908
+ static const U32 LL_base[MaxLL+1] = {
909
+ 0, 1, 2, 3, 4, 5, 6, 7,
910
+ 8, 9, 10, 11, 12, 13, 14, 15,
911
+ 16, 18, 20, 22, 24, 28, 32, 40,
912
+ 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
913
+ 0x2000, 0x4000, 0x8000, 0x10000 };
914
+
915
+ static const U32 OF_base[MaxOff+1] = {
916
+ 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D,
917
+ 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD,
918
+ 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
919
+ 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
920
+
921
+ static const U32 OF_bits[MaxOff+1] = {
922
+ 0, 1, 2, 3, 4, 5, 6, 7,
923
+ 8, 9, 10, 11, 12, 13, 14, 15,
924
+ 16, 17, 18, 19, 20, 21, 22, 23,
925
+ 24, 25, 26, 27, 28, 29, 30, 31 };
926
+
927
+ static const U32 ML_base[MaxML+1] = {
928
+ 3, 4, 5, 6, 7, 8, 9, 10,
929
+ 11, 12, 13, 14, 15, 16, 17, 18,
930
+ 19, 20, 21, 22, 23, 24, 25, 26,
931
+ 27, 28, 29, 30, 31, 32, 33, 34,
932
+ 35, 37, 39, 41, 43, 47, 51, 59,
933
+ 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
934
+ 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
935
+
936
+
759
937
  size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
760
938
  const void* src, size_t srcSize)
761
939
  {
@@ -792,19 +970,27 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
792
970
  /* Build DTables */
793
971
  { size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr,
794
972
  LLtype, MaxLL, LLFSELog,
795
- ip, iend-ip, LL_defaultDTable, dctx->fseEntropy);
973
+ ip, iend-ip,
974
+ LL_base, LL_bits,
975
+ LL_defaultDTable, dctx->fseEntropy);
796
976
  if (ZSTD_isError(llhSize)) return ERROR(corruption_detected);
797
977
  ip += llhSize;
798
978
  }
979
+
799
980
  { size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr,
800
981
  OFtype, MaxOff, OffFSELog,
801
- ip, iend-ip, OF_defaultDTable, dctx->fseEntropy);
982
+ ip, iend-ip,
983
+ OF_base, OF_bits,
984
+ OF_defaultDTable, dctx->fseEntropy);
802
985
  if (ZSTD_isError(ofhSize)) return ERROR(corruption_detected);
803
986
  ip += ofhSize;
804
987
  }
988
+
805
989
  { size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr,
806
990
  MLtype, MaxML, MLFSELog,
807
- ip, iend-ip, ML_defaultDTable, dctx->fseEntropy);
991
+ ip, iend-ip,
992
+ ML_base, ML_bits,
993
+ ML_defaultDTable, dctx->fseEntropy);
808
994
  if (ZSTD_isError(mlhSize)) return ERROR(corruption_detected);
809
995
  ip += mlhSize;
810
996
  }
@@ -821,11 +1007,16 @@ typedef struct {
821
1007
  const BYTE* match;
822
1008
  } seq_t;
823
1009
 
1010
+ typedef struct {
1011
+ size_t state;
1012
+ const ZSTD_seqSymbol* table;
1013
+ } ZSTD_fseState;
1014
+
824
1015
  typedef struct {
825
1016
  BIT_DStream_t DStream;
826
- FSE_DState_t stateLL;
827
- FSE_DState_t stateOffb;
828
- FSE_DState_t stateML;
1017
+ ZSTD_fseState stateLL;
1018
+ ZSTD_fseState stateOffb;
1019
+ ZSTD_fseState stateML;
829
1020
  size_t prevOffset[ZSTD_REP_NUM];
830
1021
  const BYTE* prefixStart;
831
1022
  const BYTE* dictEnd;
@@ -880,118 +1071,6 @@ size_t ZSTD_execSequenceLast7(BYTE* op,
880
1071
  }
881
1072
 
882
1073
 
883
- typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
884
-
885
- /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
886
- * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
887
- * bits before reloading. This value is the maximum number of bytes we read
888
- * after reloading when we are decoding long offets.
889
- */
890
- #define LONG_OFFSETS_MAX_EXTRA_BITS_32 \
891
- (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \
892
- ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \
893
- : 0)
894
-
895
- static seq_t ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
896
- {
897
- seq_t seq;
898
-
899
- U32 const llCode = FSE_peekSymbol(&seqState->stateLL);
900
- U32 const mlCode = FSE_peekSymbol(&seqState->stateML);
901
- U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= MaxOff, by table construction */
902
-
903
- U32 const llBits = LL_bits[llCode];
904
- U32 const mlBits = ML_bits[mlCode];
905
- U32 const ofBits = ofCode;
906
- U32 const totalBits = llBits+mlBits+ofBits;
907
-
908
- static const U32 LL_base[MaxLL+1] = {
909
- 0, 1, 2, 3, 4, 5, 6, 7,
910
- 8, 9, 10, 11, 12, 13, 14, 15,
911
- 16, 18, 20, 22, 24, 28, 32, 40,
912
- 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
913
- 0x2000, 0x4000, 0x8000, 0x10000 };
914
-
915
- static const U32 ML_base[MaxML+1] = {
916
- 3, 4, 5, 6, 7, 8, 9, 10,
917
- 11, 12, 13, 14, 15, 16, 17, 18,
918
- 19, 20, 21, 22, 23, 24, 25, 26,
919
- 27, 28, 29, 30, 31, 32, 33, 34,
920
- 35, 37, 39, 41, 43, 47, 51, 59,
921
- 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
922
- 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
923
-
924
- static const U32 OF_base[MaxOff+1] = {
925
- 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D,
926
- 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD,
927
- 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
928
- 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
929
-
930
- /* sequence */
931
- { size_t offset;
932
- if (!ofCode)
933
- offset = 0;
934
- else {
935
- ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
936
- ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
937
- assert(ofBits <= MaxOff);
938
- if (MEM_32bits() && longOffsets) {
939
- U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1);
940
- offset = OF_base[ofCode] + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
941
- if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream);
942
- if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
943
- } else {
944
- offset = OF_base[ofCode] + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
945
- if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
946
- }
947
- }
948
-
949
- if (ofCode <= 1) {
950
- offset += (llCode==0);
951
- if (offset) {
952
- size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
953
- temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
954
- if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
955
- seqState->prevOffset[1] = seqState->prevOffset[0];
956
- seqState->prevOffset[0] = offset = temp;
957
- } else {
958
- offset = seqState->prevOffset[0];
959
- }
960
- } else {
961
- seqState->prevOffset[2] = seqState->prevOffset[1];
962
- seqState->prevOffset[1] = seqState->prevOffset[0];
963
- seqState->prevOffset[0] = offset;
964
- }
965
- seq.offset = offset;
966
- }
967
-
968
- seq.matchLength = ML_base[mlCode]
969
- + ((mlCode>31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */
970
- if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
971
- BIT_reloadDStream(&seqState->DStream);
972
- if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
973
- BIT_reloadDStream(&seqState->DStream);
974
- /* Verify that there is enough bits to read the rest of the data in 64-bit mode. */
975
- ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
976
-
977
- seq.litLength = LL_base[llCode]
978
- + ((llCode>15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */
979
- if (MEM_32bits())
980
- BIT_reloadDStream(&seqState->DStream);
981
-
982
- DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
983
- (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
984
-
985
- /* ANS state update */
986
- FSE_updateState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
987
- FSE_updateState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
988
- if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
989
- FSE_updateState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
990
-
991
- return seq;
992
- }
993
-
994
-
995
1074
  HINT_INLINE
996
1075
  size_t ZSTD_execSequence(BYTE* op,
997
1076
  BYTE* const oend, seq_t sequence,
@@ -1073,10 +1152,199 @@ size_t ZSTD_execSequence(BYTE* op,
1073
1152
  }
1074
1153
 
1075
1154
 
1076
- static size_t ZSTD_decompressSequences(
1077
- ZSTD_DCtx* dctx,
1155
+ HINT_INLINE
1156
+ size_t ZSTD_execSequenceLong(BYTE* op,
1157
+ BYTE* const oend, seq_t sequence,
1158
+ const BYTE** litPtr, const BYTE* const litLimit,
1159
+ const BYTE* const prefixStart, const BYTE* const dictStart, const BYTE* const dictEnd)
1160
+ {
1161
+ BYTE* const oLitEnd = op + sequence.litLength;
1162
+ size_t const sequenceLength = sequence.litLength + sequence.matchLength;
1163
+ BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
1164
+ BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
1165
+ const BYTE* const iLitEnd = *litPtr + sequence.litLength;
1166
+ const BYTE* match = sequence.match;
1167
+
1168
+ /* check */
1169
+ if (oMatchEnd > oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
1170
+ if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */
1171
+ if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd);
1172
+
1173
+ /* copy Literals */
1174
+ ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */
1175
+ if (sequence.litLength > 8)
1176
+ ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
1177
+ op = oLitEnd;
1178
+ *litPtr = iLitEnd; /* update for next sequence */
1179
+
1180
+ /* copy Match */
1181
+ if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
1182
+ /* offset beyond prefix */
1183
+ if (sequence.offset > (size_t)(oLitEnd - dictStart)) return ERROR(corruption_detected);
1184
+ if (match + sequence.matchLength <= dictEnd) {
1185
+ memmove(oLitEnd, match, sequence.matchLength);
1186
+ return sequenceLength;
1187
+ }
1188
+ /* span extDict & currentPrefixSegment */
1189
+ { size_t const length1 = dictEnd - match;
1190
+ memmove(oLitEnd, match, length1);
1191
+ op = oLitEnd + length1;
1192
+ sequence.matchLength -= length1;
1193
+ match = prefixStart;
1194
+ if (op > oend_w || sequence.matchLength < MINMATCH) {
1195
+ U32 i;
1196
+ for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
1197
+ return sequenceLength;
1198
+ }
1199
+ } }
1200
+ assert(op <= oend_w);
1201
+ assert(sequence.matchLength >= MINMATCH);
1202
+
1203
+ /* match within prefix */
1204
+ if (sequence.offset < 8) {
1205
+ /* close range match, overlap */
1206
+ static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
1207
+ static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
1208
+ int const sub2 = dec64table[sequence.offset];
1209
+ op[0] = match[0];
1210
+ op[1] = match[1];
1211
+ op[2] = match[2];
1212
+ op[3] = match[3];
1213
+ match += dec32table[sequence.offset];
1214
+ ZSTD_copy4(op+4, match);
1215
+ match -= sub2;
1216
+ } else {
1217
+ ZSTD_copy8(op, match);
1218
+ }
1219
+ op += 8; match += 8;
1220
+
1221
+ if (oMatchEnd > oend-(16-MINMATCH)) {
1222
+ if (op < oend_w) {
1223
+ ZSTD_wildcopy(op, match, oend_w - op);
1224
+ match += oend_w - op;
1225
+ op = oend_w;
1226
+ }
1227
+ while (op < oMatchEnd) *op++ = *match++;
1228
+ } else {
1229
+ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
1230
+ }
1231
+ return sequenceLength;
1232
+ }
1233
+
1234
+ static void
1235
+ ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
1236
+ {
1237
+ const void* ptr = dt;
1238
+ const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr;
1239
+ DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
1240
+ DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits",
1241
+ (U32)DStatePtr->state, DTableH->tableLog);
1242
+ BIT_reloadDStream(bitD);
1243
+ DStatePtr->table = dt + 1;
1244
+ }
1245
+
1246
+ FORCE_INLINE_TEMPLATE void
1247
+ ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
1248
+ {
1249
+ ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state];
1250
+ U32 const nbBits = DInfo.nbBits;
1251
+ size_t const lowBits = BIT_readBits(bitD, nbBits);
1252
+ DStatePtr->state = DInfo.nextState + lowBits;
1253
+ }
1254
+
1255
+ /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
1256
+ * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
1257
+ * bits before reloading. This value is the maximum number of bytes we read
1258
+ * after reloading when we are decoding long offets.
1259
+ */
1260
+ #define LONG_OFFSETS_MAX_EXTRA_BITS_32 \
1261
+ (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \
1262
+ ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \
1263
+ : 0)
1264
+
1265
+ typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
1266
+
1267
+ FORCE_INLINE_TEMPLATE seq_t
1268
+ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
1269
+ {
1270
+ seq_t seq;
1271
+ U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits;
1272
+ U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits;
1273
+ U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits;
1274
+ U32 const totalBits = llBits+mlBits+ofBits;
1275
+ U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue;
1276
+ U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue;
1277
+ U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue;
1278
+
1279
+ /* sequence */
1280
+ { size_t offset;
1281
+ if (!ofBits)
1282
+ offset = 0;
1283
+ else {
1284
+ ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
1285
+ ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
1286
+ assert(ofBits <= MaxOff);
1287
+ if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
1288
+ U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed);
1289
+ offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
1290
+ BIT_reloadDStream(&seqState->DStream);
1291
+ if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
1292
+ assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */
1293
+ } else {
1294
+ offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
1295
+ if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
1296
+ }
1297
+ }
1298
+
1299
+ if (ofBits <= 1) {
1300
+ offset += (llBase==0);
1301
+ if (offset) {
1302
+ size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
1303
+ temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
1304
+ if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
1305
+ seqState->prevOffset[1] = seqState->prevOffset[0];
1306
+ seqState->prevOffset[0] = offset = temp;
1307
+ } else { /* offset == 0 */
1308
+ offset = seqState->prevOffset[0];
1309
+ }
1310
+ } else {
1311
+ seqState->prevOffset[2] = seqState->prevOffset[1];
1312
+ seqState->prevOffset[1] = seqState->prevOffset[0];
1313
+ seqState->prevOffset[0] = offset;
1314
+ }
1315
+ seq.offset = offset;
1316
+ }
1317
+
1318
+ seq.matchLength = mlBase
1319
+ + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/) : 0); /* <= 16 bits */
1320
+ if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
1321
+ BIT_reloadDStream(&seqState->DStream);
1322
+ if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
1323
+ BIT_reloadDStream(&seqState->DStream);
1324
+ /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
1325
+ ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
1326
+
1327
+ seq.litLength = llBase
1328
+ + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits/*>0*/) : 0); /* <= 16 bits */
1329
+ if (MEM_32bits())
1330
+ BIT_reloadDStream(&seqState->DStream);
1331
+
1332
+ DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
1333
+ (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
1334
+
1335
+ /* ANS state update */
1336
+ ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
1337
+ ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
1338
+ if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
1339
+ ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
1340
+
1341
+ return seq;
1342
+ }
1343
+
1344
+ FORCE_INLINE_TEMPLATE size_t
1345
+ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
1078
1346
  void* dst, size_t maxDstSize,
1079
- const void* seqStart, size_t seqSize,
1347
+ const void* seqStart, size_t seqSize, int nbSeq,
1080
1348
  const ZSTD_longOffset_e isLongOffset)
1081
1349
  {
1082
1350
  const BYTE* ip = (const BYTE*)seqStart;
@@ -1089,26 +1357,17 @@ static size_t ZSTD_decompressSequences(
1089
1357
  const BYTE* const base = (const BYTE*) (dctx->base);
1090
1358
  const BYTE* const vBase = (const BYTE*) (dctx->vBase);
1091
1359
  const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
1092
- int nbSeq;
1093
1360
  DEBUGLOG(5, "ZSTD_decompressSequences");
1094
1361
 
1095
- /* Build Decoding Tables */
1096
- { size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, seqSize);
1097
- DEBUGLOG(5, "ZSTD_decodeSeqHeaders: size=%u, nbSeq=%i",
1098
- (U32)seqHSize, nbSeq);
1099
- if (ZSTD_isError(seqHSize)) return seqHSize;
1100
- ip += seqHSize;
1101
- }
1102
-
1103
1362
  /* Regen sequences */
1104
1363
  if (nbSeq) {
1105
1364
  seqState_t seqState;
1106
1365
  dctx->fseEntropy = 1;
1107
1366
  { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
1108
1367
  CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected);
1109
- FSE_initDState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1110
- FSE_initDState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1111
- FSE_initDState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
1368
+ ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1369
+ ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1370
+ ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
1112
1371
 
1113
1372
  for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) {
1114
1373
  nbSeq--;
@@ -1120,7 +1379,7 @@ static size_t ZSTD_decompressSequences(
1120
1379
  } }
1121
1380
 
1122
1381
  /* check if reached exact end */
1123
- DEBUGLOG(5, "after decode loop, remaining nbSeq : %i", nbSeq);
1382
+ DEBUGLOG(5, "ZSTD_decompressSequences: after decode loop, remaining nbSeq : %i", nbSeq);
1124
1383
  if (nbSeq) return ERROR(corruption_detected);
1125
1384
  /* save reps for next block */
1126
1385
  { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
@@ -1136,46 +1395,32 @@ static size_t ZSTD_decompressSequences(
1136
1395
  return op-ostart;
1137
1396
  }
1138
1397
 
1139
-
1140
- HINT_INLINE
1141
- seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const longOffsets)
1398
+ static size_t
1399
+ ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
1400
+ void* dst, size_t maxDstSize,
1401
+ const void* seqStart, size_t seqSize, int nbSeq,
1402
+ const ZSTD_longOffset_e isLongOffset)
1142
1403
  {
1143
- seq_t seq;
1404
+ return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1405
+ }
1144
1406
 
1145
- U32 const llCode = FSE_peekSymbol(&seqState->stateLL);
1146
- U32 const mlCode = FSE_peekSymbol(&seqState->stateML);
1147
- U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= MaxOff, by table construction */
1148
1407
 
1149
- U32 const llBits = LL_bits[llCode];
1150
- U32 const mlBits = ML_bits[mlCode];
1151
- U32 const ofBits = ofCode;
1152
- U32 const totalBits = llBits+mlBits+ofBits;
1153
1408
 
1154
- static const U32 LL_base[MaxLL+1] = {
1155
- 0, 1, 2, 3, 4, 5, 6, 7,
1156
- 8, 9, 10, 11, 12, 13, 14, 15,
1157
- 16, 18, 20, 22, 24, 28, 32, 40,
1158
- 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
1159
- 0x2000, 0x4000, 0x8000, 0x10000 };
1160
-
1161
- static const U32 ML_base[MaxML+1] = {
1162
- 3, 4, 5, 6, 7, 8, 9, 10,
1163
- 11, 12, 13, 14, 15, 16, 17, 18,
1164
- 19, 20, 21, 22, 23, 24, 25, 26,
1165
- 27, 28, 29, 30, 31, 32, 33, 34,
1166
- 35, 37, 39, 41, 43, 47, 51, 59,
1167
- 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
1168
- 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
1169
-
1170
- static const U32 OF_base[MaxOff+1] = {
1171
- 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D,
1172
- 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD,
1173
- 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
1174
- 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
1409
+ FORCE_INLINE_TEMPLATE seq_t
1410
+ ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const longOffsets)
1411
+ {
1412
+ seq_t seq;
1413
+ U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits;
1414
+ U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits;
1415
+ U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits;
1416
+ U32 const totalBits = llBits+mlBits+ofBits;
1417
+ U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue;
1418
+ U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue;
1419
+ U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue;
1175
1420
 
1176
1421
  /* sequence */
1177
1422
  { size_t offset;
1178
- if (!ofCode)
1423
+ if (!ofBits)
1179
1424
  offset = 0;
1180
1425
  else {
1181
1426
  ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
@@ -1183,17 +1428,17 @@ seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const long
1183
1428
  assert(ofBits <= MaxOff);
1184
1429
  if (MEM_32bits() && longOffsets) {
1185
1430
  U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1);
1186
- offset = OF_base[ofCode] + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
1431
+ offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
1187
1432
  if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream);
1188
1433
  if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
1189
1434
  } else {
1190
- offset = OF_base[ofCode] + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
1435
+ offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
1191
1436
  if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
1192
1437
  }
1193
1438
  }
1194
1439
 
1195
- if (ofCode <= 1) {
1196
- offset += (llCode==0);
1440
+ if (ofBits <= 1) {
1441
+ offset += (llBase==0);
1197
1442
  if (offset) {
1198
1443
  size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
1199
1444
  temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
@@ -1211,7 +1456,7 @@ seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const long
1211
1456
  seq.offset = offset;
1212
1457
  }
1213
1458
 
1214
- seq.matchLength = ML_base[mlCode] + ((mlCode>31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */
1459
+ seq.matchLength = mlBase + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */
1215
1460
  if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
1216
1461
  BIT_reloadDStream(&seqState->DStream);
1217
1462
  if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
@@ -1219,7 +1464,7 @@ seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const long
1219
1464
  /* Verify that there is enough bits to read the rest of the data in 64-bit mode. */
1220
1465
  ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
1221
1466
 
1222
- seq.litLength = LL_base[llCode] + ((llCode>15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */
1467
+ seq.litLength = llBase + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */
1223
1468
  if (MEM_32bits())
1224
1469
  BIT_reloadDStream(&seqState->DStream);
1225
1470
 
@@ -1231,98 +1476,19 @@ seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const long
1231
1476
  }
1232
1477
 
1233
1478
  /* ANS state update */
1234
- FSE_updateState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
1235
- FSE_updateState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
1479
+ ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
1480
+ ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
1236
1481
  if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
1237
- FSE_updateState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
1482
+ ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
1238
1483
 
1239
1484
  return seq;
1240
1485
  }
1241
1486
 
1242
-
1243
- HINT_INLINE
1244
- size_t ZSTD_execSequenceLong(BYTE* op,
1245
- BYTE* const oend, seq_t sequence,
1246
- const BYTE** litPtr, const BYTE* const litLimit,
1247
- const BYTE* const prefixStart, const BYTE* const dictStart, const BYTE* const dictEnd)
1248
- {
1249
- BYTE* const oLitEnd = op + sequence.litLength;
1250
- size_t const sequenceLength = sequence.litLength + sequence.matchLength;
1251
- BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
1252
- BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
1253
- const BYTE* const iLitEnd = *litPtr + sequence.litLength;
1254
- const BYTE* match = sequence.match;
1255
-
1256
- /* check */
1257
- if (oMatchEnd > oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
1258
- if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */
1259
- if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd);
1260
-
1261
- /* copy Literals */
1262
- ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */
1263
- if (sequence.litLength > 8)
1264
- ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
1265
- op = oLitEnd;
1266
- *litPtr = iLitEnd; /* update for next sequence */
1267
-
1268
- /* copy Match */
1269
- if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
1270
- /* offset beyond prefix */
1271
- if (sequence.offset > (size_t)(oLitEnd - dictStart)) return ERROR(corruption_detected);
1272
- if (match + sequence.matchLength <= dictEnd) {
1273
- memmove(oLitEnd, match, sequence.matchLength);
1274
- return sequenceLength;
1275
- }
1276
- /* span extDict & currentPrefixSegment */
1277
- { size_t const length1 = dictEnd - match;
1278
- memmove(oLitEnd, match, length1);
1279
- op = oLitEnd + length1;
1280
- sequence.matchLength -= length1;
1281
- match = prefixStart;
1282
- if (op > oend_w || sequence.matchLength < MINMATCH) {
1283
- U32 i;
1284
- for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
1285
- return sequenceLength;
1286
- }
1287
- } }
1288
- assert(op <= oend_w);
1289
- assert(sequence.matchLength >= MINMATCH);
1290
-
1291
- /* match within prefix */
1292
- if (sequence.offset < 8) {
1293
- /* close range match, overlap */
1294
- static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
1295
- static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
1296
- int const sub2 = dec64table[sequence.offset];
1297
- op[0] = match[0];
1298
- op[1] = match[1];
1299
- op[2] = match[2];
1300
- op[3] = match[3];
1301
- match += dec32table[sequence.offset];
1302
- ZSTD_copy4(op+4, match);
1303
- match -= sub2;
1304
- } else {
1305
- ZSTD_copy8(op, match);
1306
- }
1307
- op += 8; match += 8;
1308
-
1309
- if (oMatchEnd > oend-(16-MINMATCH)) {
1310
- if (op < oend_w) {
1311
- ZSTD_wildcopy(op, match, oend_w - op);
1312
- match += oend_w - op;
1313
- op = oend_w;
1314
- }
1315
- while (op < oMatchEnd) *op++ = *match++;
1316
- } else {
1317
- ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
1318
- }
1319
- return sequenceLength;
1320
- }
1321
-
1322
- static size_t ZSTD_decompressSequencesLong(
1487
+ FORCE_INLINE_TEMPLATE size_t
1488
+ ZSTD_decompressSequencesLong_body(
1323
1489
  ZSTD_DCtx* dctx,
1324
1490
  void* dst, size_t maxDstSize,
1325
- const void* seqStart, size_t seqSize,
1491
+ const void* seqStart, size_t seqSize, int nbSeq,
1326
1492
  const ZSTD_longOffset_e isLongOffset)
1327
1493
  {
1328
1494
  const BYTE* ip = (const BYTE*)seqStart;
@@ -1335,13 +1501,6 @@ static size_t ZSTD_decompressSequencesLong(
1335
1501
  const BYTE* const prefixStart = (const BYTE*) (dctx->base);
1336
1502
  const BYTE* const dictStart = (const BYTE*) (dctx->vBase);
1337
1503
  const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
1338
- int nbSeq;
1339
-
1340
- /* Build Decoding Tables */
1341
- { size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, seqSize);
1342
- if (ZSTD_isError(seqHSize)) return seqHSize;
1343
- ip += seqHSize;
1344
- }
1345
1504
 
1346
1505
  /* Regen sequences */
1347
1506
  if (nbSeq) {
@@ -1358,18 +1517,18 @@ static size_t ZSTD_decompressSequencesLong(
1358
1517
  seqState.pos = (size_t)(op-prefixStart);
1359
1518
  seqState.dictEnd = dictEnd;
1360
1519
  CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected);
1361
- FSE_initDState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1362
- FSE_initDState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1363
- FSE_initDState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
1520
+ ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1521
+ ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1522
+ ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
1364
1523
 
1365
1524
  /* prepare in advance */
1366
- for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && seqNb<seqAdvance; seqNb++) {
1525
+ for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
1367
1526
  sequences[seqNb] = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
1368
1527
  }
1369
1528
  if (seqNb<seqAdvance) return ERROR(corruption_detected);
1370
1529
 
1371
1530
  /* decode and decompress */
1372
- for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && seqNb<nbSeq ; seqNb++) {
1531
+ for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
1373
1532
  seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
1374
1533
  size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STOSEQ_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1375
1534
  if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
@@ -1389,6 +1548,9 @@ static size_t ZSTD_decompressSequencesLong(
1389
1548
 
1390
1549
  /* save reps for next block */
1391
1550
  { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
1551
+ #undef STORED_SEQS
1552
+ #undef STOSEQ_MASK
1553
+ #undef ADVANCED_SEQS
1392
1554
  }
1393
1555
 
1394
1556
  /* last literal segment */
@@ -1401,6 +1563,96 @@ static size_t ZSTD_decompressSequencesLong(
1401
1563
  return op-ostart;
1402
1564
  }
1403
1565
 
1566
+ static size_t
1567
+ ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
1568
+ void* dst, size_t maxDstSize,
1569
+ const void* seqStart, size_t seqSize, int nbSeq,
1570
+ const ZSTD_longOffset_e isLongOffset)
1571
+ {
1572
+ return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1573
+ }
1574
+
1575
+
1576
+
1577
+ #if DYNAMIC_BMI2
1578
+
1579
+ static TARGET_ATTRIBUTE("bmi2") size_t
1580
+ ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
1581
+ void* dst, size_t maxDstSize,
1582
+ const void* seqStart, size_t seqSize, int nbSeq,
1583
+ const ZSTD_longOffset_e isLongOffset)
1584
+ {
1585
+ return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1586
+ }
1587
+
1588
+ static TARGET_ATTRIBUTE("bmi2") size_t
1589
+ ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
1590
+ void* dst, size_t maxDstSize,
1591
+ const void* seqStart, size_t seqSize, int nbSeq,
1592
+ const ZSTD_longOffset_e isLongOffset)
1593
+ {
1594
+ return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1595
+ }
1596
+
1597
+ #endif
1598
+
1599
+ typedef size_t (*ZSTD_decompressSequences_t)(
1600
+ ZSTD_DCtx *dctx, void *dst, size_t maxDstSize,
1601
+ const void *seqStart, size_t seqSize, int nbSeq,
1602
+ const ZSTD_longOffset_e isLongOffset);
1603
+
1604
+ static size_t ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
1605
+ const void* seqStart, size_t seqSize, int nbSeq,
1606
+ const ZSTD_longOffset_e isLongOffset)
1607
+ {
1608
+ DEBUGLOG(5, "ZSTD_decompressSequences");
1609
+ #if DYNAMIC_BMI2
1610
+ if (dctx->bmi2) {
1611
+ return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1612
+ }
1613
+ #endif
1614
+ return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1615
+ }
1616
+
1617
+ static size_t ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
1618
+ void* dst, size_t maxDstSize,
1619
+ const void* seqStart, size_t seqSize, int nbSeq,
1620
+ const ZSTD_longOffset_e isLongOffset)
1621
+ {
1622
+ DEBUGLOG(5, "ZSTD_decompressSequencesLong");
1623
+ #if DYNAMIC_BMI2
1624
+ if (dctx->bmi2) {
1625
+ return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1626
+ }
1627
+ #endif
1628
+ return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1629
+ }
1630
+
1631
+ /* ZSTD_getLongOffsetsShare() :
1632
+ * condition : offTable must be valid
1633
+ * @return : "share" of long offsets (arbitrarily defined as > (1<<23))
1634
+ * compared to maximum possible of (1<<OffFSELog) */
1635
+ static unsigned
1636
+ ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)
1637
+ {
1638
+ const void* ptr = offTable;
1639
+ U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
1640
+ const ZSTD_seqSymbol* table = offTable + 1;
1641
+ U32 const max = 1 << tableLog;
1642
+ U32 u, total = 0;
1643
+ DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
1644
+
1645
+ assert(max <= (1 << OffFSELog)); /* max not too large */
1646
+ for (u=0; u<max; u++) {
1647
+ if (table[u].nbAdditionalBits > 22) total += 1;
1648
+ }
1649
+
1650
+ assert(tableLog <= OffFSELog);
1651
+ total <<= (OffFSELog - tableLog); /* scale to OffFSELog */
1652
+
1653
+ return total;
1654
+ }
1655
+
1404
1656
 
1405
1657
  static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
1406
1658
  void* dst, size_t dstCapacity,
@@ -1410,13 +1662,9 @@ static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
1410
1662
  /* isLongOffset must be true if there are long offsets.
1411
1663
  * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.
1412
1664
  * We don't expect that to be the case in 64-bit mode.
1413
- * If we are in block mode we don't know the window size, so we have to be
1414
- * conservative.
1665
+ * In block mode, window size is not known, so we have to be conservative. (note: but it could be evaluated from current-lowLimit)
1415
1666
  */
1416
1667
  ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN)));
1417
- /* windowSize could be any value at this point, since it is only validated
1418
- * in the streaming API.
1419
- */
1420
1668
  DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
1421
1669
 
1422
1670
  if (srcSize >= ZSTD_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);
@@ -1428,9 +1676,24 @@ static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
1428
1676
  ip += litCSize;
1429
1677
  srcSize -= litCSize;
1430
1678
  }
1431
- if (frame && dctx->fParams.windowSize > (1<<23))
1432
- return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, isLongOffset);
1433
- return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, isLongOffset);
1679
+
1680
+ /* Build Decoding Tables */
1681
+ { int nbSeq;
1682
+ size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
1683
+ if (ZSTD_isError(seqHSize)) return seqHSize;
1684
+ ip += seqHSize;
1685
+ srcSize -= seqHSize;
1686
+
1687
+ if ( (!frame || dctx->fParams.windowSize > (1<<24))
1688
+ && (nbSeq>0) ) { /* could probably use a larger nbSeq limit */
1689
+ U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr);
1690
+ U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
1691
+ if (shareLongOffsets >= minShare)
1692
+ return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
1693
+ }
1694
+
1695
+ return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
1696
+ }
1434
1697
  }
1435
1698
 
1436
1699
 
@@ -1758,7 +2021,7 @@ static int ZSTD_isSkipFrame(ZSTD_DCtx* dctx) { return dctx->stage == ZSTDds_skip
1758
2021
  * or an error code, which can be tested using ZSTD_isError() */
1759
2022
  size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
1760
2023
  {
1761
- DEBUGLOG(5, "ZSTD_decompressContinue");
2024
+ DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (U32)srcSize);
1762
2025
  /* Sanity check */
1763
2026
  if (srcSize != dctx->expected) return ERROR(srcSize_wrong); /* not allowed */
1764
2027
  if (dstCapacity) ZSTD_checkContinuity(dctx, dst);
@@ -1819,12 +2082,12 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
1819
2082
 
1820
2083
  case ZSTDds_decompressLastBlock:
1821
2084
  case ZSTDds_decompressBlock:
1822
- DEBUGLOG(5, "case ZSTDds_decompressBlock");
2085
+ DEBUGLOG(5, "ZSTD_decompressContinue: case ZSTDds_decompressBlock");
1823
2086
  { size_t rSize;
1824
2087
  switch(dctx->bType)
1825
2088
  {
1826
2089
  case bt_compressed:
1827
- DEBUGLOG(5, "case bt_compressed");
2090
+ DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed");
1828
2091
  rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1);
1829
2092
  break;
1830
2093
  case bt_raw :
@@ -1838,12 +2101,12 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
1838
2101
  return ERROR(corruption_detected);
1839
2102
  }
1840
2103
  if (ZSTD_isError(rSize)) return rSize;
1841
- DEBUGLOG(5, "decoded size from block : %u", (U32)rSize);
2104
+ DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (U32)rSize);
1842
2105
  dctx->decodedSize += rSize;
1843
2106
  if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize);
1844
2107
 
1845
2108
  if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */
1846
- DEBUGLOG(4, "decoded size from frame : %u", (U32)dctx->decodedSize);
2109
+ DEBUGLOG(4, "ZSTD_decompressContinue: decoded size from frame : %u", (U32)dctx->decodedSize);
1847
2110
  if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) {
1848
2111
  if (dctx->decodedSize != dctx->fParams.frameContentSize) {
1849
2112
  return ERROR(corruption_detected);
@@ -1867,7 +2130,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
1867
2130
  assert(srcSize == 4); /* guaranteed by dctx->expected */
1868
2131
  { U32 const h32 = (U32)XXH64_digest(&dctx->xxhState);
1869
2132
  U32 const check32 = MEM_readLE32(src);
1870
- DEBUGLOG(4, "checksum : calculated %08X :: %08X read", h32, check32);
2133
+ DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", h32, check32);
1871
2134
  if (check32 != h32) return ERROR(checksum_wrong);
1872
2135
  dctx->expected = 0;
1873
2136
  dctx->stage = ZSTDds_getFrameHeaderSize;
@@ -1925,8 +2188,12 @@ static size_t ZSTD_loadEntropy(ZSTD_entropyDTables_t* entropy, const void* const
1925
2188
  U32 offcodeMaxValue = MaxOff, offcodeLog;
1926
2189
  size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
1927
2190
  if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
2191
+ if (offcodeMaxValue > MaxOff) return ERROR(dictionary_corrupted);
1928
2192
  if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
1929
- CHECK_E(FSE_buildDTable(entropy->OFTable, offcodeNCount, offcodeMaxValue, offcodeLog), dictionary_corrupted);
2193
+ ZSTD_buildFSETable(entropy->OFTable,
2194
+ offcodeNCount, offcodeMaxValue,
2195
+ OF_base, OF_bits,
2196
+ offcodeLog);
1930
2197
  dictPtr += offcodeHeaderSize;
1931
2198
  }
1932
2199
 
@@ -1934,8 +2201,12 @@ static size_t ZSTD_loadEntropy(ZSTD_entropyDTables_t* entropy, const void* const
1934
2201
  unsigned matchlengthMaxValue = MaxML, matchlengthLog;
1935
2202
  size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
1936
2203
  if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
2204
+ if (matchlengthMaxValue > MaxML) return ERROR(dictionary_corrupted);
1937
2205
  if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
1938
- CHECK_E(FSE_buildDTable(entropy->MLTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog), dictionary_corrupted);
2206
+ ZSTD_buildFSETable(entropy->MLTable,
2207
+ matchlengthNCount, matchlengthMaxValue,
2208
+ ML_base, ML_bits,
2209
+ matchlengthLog);
1939
2210
  dictPtr += matchlengthHeaderSize;
1940
2211
  }
1941
2212
 
@@ -1943,8 +2214,12 @@ static size_t ZSTD_loadEntropy(ZSTD_entropyDTables_t* entropy, const void* const
1943
2214
  unsigned litlengthMaxValue = MaxLL, litlengthLog;
1944
2215
  size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
1945
2216
  if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
2217
+ if (litlengthMaxValue > MaxLL) return ERROR(dictionary_corrupted);
1946
2218
  if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
1947
- CHECK_E(FSE_buildDTable(entropy->LLTable, litlengthNCount, litlengthMaxValue, litlengthLog), dictionary_corrupted);
2219
+ ZSTD_buildFSETable(entropy->LLTable,
2220
+ litlengthNCount, litlengthMaxValue,
2221
+ LL_base, LL_bits,
2222
+ litlengthLog);
1948
2223
  dictPtr += litlengthHeaderSize;
1949
2224
  }
1950
2225
 
@@ -2062,13 +2337,23 @@ size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dstDCtx, const ZSTD_DDict* ddi
2062
2337
  return 0;
2063
2338
  }
2064
2339
 
2065
- static size_t ZSTD_loadEntropy_inDDict(ZSTD_DDict* ddict)
2340
+ static size_t ZSTD_loadEntropy_inDDict(ZSTD_DDict* ddict, ZSTD_dictContentType_e dictContentType)
2066
2341
  {
2067
2342
  ddict->dictID = 0;
2068
2343
  ddict->entropyPresent = 0;
2069
- if (ddict->dictSize < 8) return 0;
2344
+ if (dictContentType == ZSTD_dct_rawContent) return 0;
2345
+
2346
+ if (ddict->dictSize < 8) {
2347
+ if (dictContentType == ZSTD_dct_fullDict)
2348
+ return ERROR(dictionary_corrupted); /* only accept specified dictionaries */
2349
+ return 0; /* pure content mode */
2350
+ }
2070
2351
  { U32 const magic = MEM_readLE32(ddict->dictContent);
2071
- if (magic != ZSTD_MAGIC_DICTIONARY) return 0; /* pure content mode */
2352
+ if (magic != ZSTD_MAGIC_DICTIONARY) {
2353
+ if (dictContentType == ZSTD_dct_fullDict)
2354
+ return ERROR(dictionary_corrupted); /* only accept specified dictionaries */
2355
+ return 0; /* pure content mode */
2356
+ }
2072
2357
  }
2073
2358
  ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_frameIdSize);
2074
2359
 
@@ -2079,7 +2364,10 @@ static size_t ZSTD_loadEntropy_inDDict(ZSTD_DDict* ddict)
2079
2364
  }
2080
2365
 
2081
2366
 
2082
- static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod)
2367
+ static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
2368
+ const void* dict, size_t dictSize,
2369
+ ZSTD_dictLoadMethod_e dictLoadMethod,
2370
+ ZSTD_dictContentType_e dictContentType)
2083
2371
  {
2084
2372
  if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) {
2085
2373
  ddict->dictBuffer = NULL;
@@ -2095,12 +2383,15 @@ static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict, const void* dict, size_
2095
2383
  ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */
2096
2384
 
2097
2385
  /* parse dictionary content */
2098
- CHECK_F( ZSTD_loadEntropy_inDDict(ddict) );
2386
+ CHECK_F( ZSTD_loadEntropy_inDDict(ddict, dictContentType) );
2099
2387
 
2100
2388
  return 0;
2101
2389
  }
2102
2390
 
2103
- ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_customMem customMem)
2391
+ ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
2392
+ ZSTD_dictLoadMethod_e dictLoadMethod,
2393
+ ZSTD_dictContentType_e dictContentType,
2394
+ ZSTD_customMem customMem)
2104
2395
  {
2105
2396
  if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
2106
2397
 
@@ -2108,7 +2399,7 @@ ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, ZSTD_di
2108
2399
  if (!ddict) return NULL;
2109
2400
  ddict->cMem = customMem;
2110
2401
 
2111
- if (ZSTD_isError( ZSTD_initDDict_internal(ddict, dict, dictSize, dictLoadMethod) )) {
2402
+ if (ZSTD_isError( ZSTD_initDDict_internal(ddict, dict, dictSize, dictLoadMethod, dictContentType) )) {
2112
2403
  ZSTD_freeDDict(ddict);
2113
2404
  return NULL;
2114
2405
  }
@@ -2124,7 +2415,7 @@ ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, ZSTD_di
2124
2415
  ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
2125
2416
  {
2126
2417
  ZSTD_customMem const allocator = { NULL, NULL, NULL };
2127
- return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, allocator);
2418
+ return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator);
2128
2419
  }
2129
2420
 
2130
2421
  /*! ZSTD_createDDict_byReference() :
@@ -2134,13 +2425,15 @@ ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
2134
2425
  ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
2135
2426
  {
2136
2427
  ZSTD_customMem const allocator = { NULL, NULL, NULL };
2137
- return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, allocator);
2428
+ return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator);
2138
2429
  }
2139
2430
 
2140
2431
 
2141
- ZSTD_DDict* ZSTD_initStaticDDict(void* workspace, size_t workspaceSize,
2142
- const void* dict, size_t dictSize,
2143
- ZSTD_dictLoadMethod_e dictLoadMethod)
2432
+ const ZSTD_DDict* ZSTD_initStaticDDict(
2433
+ void* workspace, size_t workspaceSize,
2434
+ const void* dict, size_t dictSize,
2435
+ ZSTD_dictLoadMethod_e dictLoadMethod,
2436
+ ZSTD_dictContentType_e dictContentType)
2144
2437
  {
2145
2438
  size_t const neededSpace =
2146
2439
  sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
@@ -2153,7 +2446,7 @@ ZSTD_DDict* ZSTD_initStaticDDict(void* workspace, size_t workspaceSize,
2153
2446
  memcpy(ddict+1, dict, dictSize); /* local copy */
2154
2447
  dict = ddict+1;
2155
2448
  }
2156
- if (ZSTD_isError( ZSTD_initDDict_internal(ddict, dict, dictSize, ZSTD_dlm_byRef) ))
2449
+ if (ZSTD_isError( ZSTD_initDDict_internal(ddict, dict, dictSize, ZSTD_dlm_byRef, dictContentType) ))
2157
2450
  return NULL;
2158
2451
  return ddict;
2159
2452
  }
@@ -2247,6 +2540,7 @@ size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
2247
2540
 
2248
2541
  ZSTD_DStream* ZSTD_createDStream(void)
2249
2542
  {
2543
+ DEBUGLOG(3, "ZSTD_createDStream");
2250
2544
  return ZSTD_createDStream_advanced(ZSTD_defaultCMem);
2251
2545
  }
2252
2546
 
@@ -2271,58 +2565,99 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds)
2271
2565
  size_t ZSTD_DStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize; }
2272
2566
  size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_MAX; }
2273
2567
 
2274
- size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize)
2568
+ size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType)
2275
2569
  {
2276
- zds->streamStage = zdss_loadHeader;
2277
- zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
2278
- ZSTD_freeDDict(zds->ddictLocal);
2570
+ if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
2571
+ ZSTD_freeDDict(dctx->ddictLocal);
2279
2572
  if (dict && dictSize >= 8) {
2280
- zds->ddictLocal = ZSTD_createDDict(dict, dictSize);
2281
- if (zds->ddictLocal == NULL) return ERROR(memory_allocation);
2282
- } else zds->ddictLocal = NULL;
2283
- zds->ddict = zds->ddictLocal;
2284
- zds->legacyVersion = 0;
2285
- zds->hostageByte = 0;
2573
+ dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem);
2574
+ if (dctx->ddictLocal == NULL) return ERROR(memory_allocation);
2575
+ } else {
2576
+ dctx->ddictLocal = NULL;
2577
+ }
2578
+ dctx->ddict = dctx->ddictLocal;
2579
+ return 0;
2580
+ }
2581
+
2582
+ size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
2583
+ {
2584
+ return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto);
2585
+ }
2586
+
2587
+ size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
2588
+ {
2589
+ return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto);
2590
+ }
2591
+
2592
+ size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType)
2593
+ {
2594
+ return ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType);
2595
+ }
2596
+
2597
+ size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize)
2598
+ {
2599
+ return ZSTD_DCtx_refPrefix_advanced(dctx, prefix, prefixSize, ZSTD_dct_rawContent);
2600
+ }
2601
+
2602
+
2603
+ /* ZSTD_initDStream_usingDict() :
2604
+ * return : expected size, aka ZSTD_frameHeaderSize_prefix.
2605
+ * this function cannot fail */
2606
+ size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize)
2607
+ {
2608
+ DEBUGLOG(4, "ZSTD_initDStream_usingDict");
2609
+ zds->streamStage = zdss_init;
2610
+ CHECK_F( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) );
2286
2611
  return ZSTD_frameHeaderSize_prefix;
2287
2612
  }
2288
2613
 
2289
2614
  /* note : this variant can't fail */
2290
2615
  size_t ZSTD_initDStream(ZSTD_DStream* zds)
2291
2616
  {
2617
+ DEBUGLOG(4, "ZSTD_initDStream");
2292
2618
  return ZSTD_initDStream_usingDict(zds, NULL, 0);
2293
2619
  }
2294
2620
 
2621
+ size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
2622
+ {
2623
+ if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
2624
+ dctx->ddict = ddict;
2625
+ return 0;
2626
+ }
2627
+
2295
2628
  /* ZSTD_initDStream_usingDDict() :
2296
2629
  * ddict will just be referenced, and must outlive decompression session
2297
2630
  * this function cannot fail */
2298
- size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict)
2631
+ size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict)
2299
2632
  {
2300
- size_t const initResult = ZSTD_initDStream(zds);
2301
- zds->ddict = ddict;
2633
+ size_t const initResult = ZSTD_initDStream(dctx);
2634
+ dctx->ddict = ddict;
2302
2635
  return initResult;
2303
2636
  }
2304
2637
 
2305
- size_t ZSTD_resetDStream(ZSTD_DStream* zds)
2638
+ /* ZSTD_resetDStream() :
2639
+ * return : expected size, aka ZSTD_frameHeaderSize_prefix.
2640
+ * this function cannot fail */
2641
+ size_t ZSTD_resetDStream(ZSTD_DStream* dctx)
2306
2642
  {
2307
- zds->streamStage = zdss_loadHeader;
2308
- zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
2309
- zds->legacyVersion = 0;
2310
- zds->hostageByte = 0;
2643
+ DEBUGLOG(4, "ZSTD_resetDStream");
2644
+ dctx->streamStage = zdss_loadHeader;
2645
+ dctx->lhSize = dctx->inPos = dctx->outStart = dctx->outEnd = 0;
2646
+ dctx->legacyVersion = 0;
2647
+ dctx->hostageByte = 0;
2311
2648
  return ZSTD_frameHeaderSize_prefix;
2312
2649
  }
2313
2650
 
2314
- size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds,
2651
+ size_t ZSTD_setDStreamParameter(ZSTD_DStream* dctx,
2315
2652
  ZSTD_DStreamParameter_e paramType, unsigned paramValue)
2316
2653
  {
2317
- ZSTD_STATIC_ASSERT((unsigned)zdss_loadHeader >= (unsigned)zdss_init);
2318
- if ((unsigned)zds->streamStage > (unsigned)zdss_loadHeader)
2319
- return ERROR(stage_wrong);
2654
+ if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
2320
2655
  switch(paramType)
2321
2656
  {
2322
2657
  default : return ERROR(parameter_unsupported);
2323
2658
  case DStream_p_maxWindowSize :
2324
2659
  DEBUGLOG(4, "setting maxWindowSize = %u KB", paramValue >> 10);
2325
- zds->maxWindowSize = paramValue ? paramValue : (U32)(-1);
2660
+ dctx->maxWindowSize = paramValue ? paramValue : (U32)(-1);
2326
2661
  break;
2327
2662
  }
2328
2663
  return 0;
@@ -2330,9 +2665,7 @@ size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds,
2330
2665
 
2331
2666
  size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize)
2332
2667
  {
2333
- ZSTD_STATIC_ASSERT((unsigned)zdss_loadHeader >= (unsigned)zdss_init);
2334
- if ((unsigned)dctx->streamStage > (unsigned)zdss_loadHeader)
2335
- return ERROR(stage_wrong);
2668
+ if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
2336
2669
  dctx->maxWindowSize = maxWindowSize;
2337
2670
  return 0;
2338
2671
  }
@@ -2340,17 +2673,15 @@ size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize)
2340
2673
  size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format)
2341
2674
  {
2342
2675
  DEBUGLOG(4, "ZSTD_DCtx_setFormat : %u", (unsigned)format);
2343
- ZSTD_STATIC_ASSERT((unsigned)zdss_loadHeader >= (unsigned)zdss_init);
2344
- if ((unsigned)dctx->streamStage > (unsigned)zdss_loadHeader)
2345
- return ERROR(stage_wrong);
2676
+ if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
2346
2677
  dctx->format = format;
2347
2678
  return 0;
2348
2679
  }
2349
2680
 
2350
2681
 
2351
- size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds)
2682
+ size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx)
2352
2683
  {
2353
- return ZSTD_sizeof_DCtx(zds);
2684
+ return ZSTD_sizeof_DCtx(dctx);
2354
2685
  }
2355
2686
 
2356
2687
  size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize)
@@ -2417,23 +2748,25 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
2417
2748
  }
2418
2749
  DEBUGLOG(5, "input size : %u", (U32)(input->size - input->pos));
2419
2750
 
2420
- #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
2421
- if (zds->legacyVersion) {
2422
- /* legacy support is incompatible with static dctx */
2423
- if (zds->staticSize) return ERROR(memory_allocation);
2424
- return ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input);
2425
- }
2426
- #endif
2427
-
2428
2751
  while (someMoreWork) {
2429
2752
  switch(zds->streamStage)
2430
2753
  {
2431
2754
  case zdss_init :
2755
+ DEBUGLOG(5, "stage zdss_init => transparent reset ");
2432
2756
  ZSTD_resetDStream(zds); /* transparent reset on starting decoding a new frame */
2433
2757
  /* fall-through */
2434
2758
 
2435
2759
  case zdss_loadHeader :
2436
2760
  DEBUGLOG(5, "stage zdss_loadHeader (srcSize : %u)", (U32)(iend - ip));
2761
+ #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
2762
+ if (zds->legacyVersion) {
2763
+ /* legacy support is incompatible with static dctx */
2764
+ if (zds->staticSize) return ERROR(memory_allocation);
2765
+ { size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input);
2766
+ if (hint==0) zds->streamStage = zdss_init;
2767
+ return hint;
2768
+ } }
2769
+ #endif
2437
2770
  { size_t const hSize = ZSTD_getFrameHeader_internal(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format);
2438
2771
  DEBUGLOG(5, "header size : %u", (U32)hSize);
2439
2772
  if (ZSTD_isError(hSize)) {
@@ -2442,14 +2775,17 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
2442
2775
  if (legacyVersion) {
2443
2776
  const void* const dict = zds->ddict ? zds->ddict->dictContent : NULL;
2444
2777
  size_t const dictSize = zds->ddict ? zds->ddict->dictSize : 0;
2778
+ DEBUGLOG(5, "ZSTD_decompressStream: detected legacy version v0.%u", legacyVersion);
2445
2779
  /* legacy support is incompatible with static dctx */
2446
2780
  if (zds->staticSize) return ERROR(memory_allocation);
2447
2781
  CHECK_F(ZSTD_initLegacyStream(&zds->legacyContext,
2448
2782
  zds->previousLegacyVersion, legacyVersion,
2449
2783
  dict, dictSize));
2450
2784
  zds->legacyVersion = zds->previousLegacyVersion = legacyVersion;
2451
- return ZSTD_decompressLegacyStream(zds->legacyContext, legacyVersion, output, input);
2452
- }
2785
+ { size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, legacyVersion, output, input);
2786
+ if (hint==0) zds->streamStage = zdss_init; /* or stay in stage zdss_loadHeader */
2787
+ return hint;
2788
+ } }
2453
2789
  #endif
2454
2790
  return hSize; /* error */
2455
2791
  }
@@ -2559,6 +2895,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
2559
2895
  if (ip==iend) { someMoreWork = 0; break; } /* no more input */
2560
2896
  zds->streamStage = zdss_load;
2561
2897
  /* fall-through */
2898
+
2562
2899
  case zdss_load:
2563
2900
  { size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds);
2564
2901
  size_t const toLoad = neededInSize - zds->inPos;
@@ -2585,6 +2922,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
2585
2922
  } }
2586
2923
  zds->streamStage = zdss_flush;
2587
2924
  /* fall-through */
2925
+
2588
2926
  case zdss_flush:
2589
2927
  { size_t const toFlushSize = zds->outEnd - zds->outStart;
2590
2928
  size_t const flushedSize = ZSTD_limitCopy(op, oend-op, zds->outBuff + zds->outStart, toFlushSize);
@@ -2631,8 +2969,8 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
2631
2969
  return 1;
2632
2970
  } /* nextSrcSizeHint==0 */
2633
2971
  nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zds) == ZSTDnit_block); /* preload header of next block */
2634
- if (zds->inPos > nextSrcSizeHint) return ERROR(GENERIC); /* should never happen */
2635
- nextSrcSizeHint -= zds->inPos; /* already loaded*/
2972
+ assert(zds->inPos <= nextSrcSizeHint);
2973
+ nextSrcSizeHint -= zds->inPos; /* part already loaded*/
2636
2974
  return nextSrcSizeHint;
2637
2975
  }
2638
2976
  }