extzstd 0.1 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (134) hide show
  1. checksums.yaml +5 -5
  2. data/HISTORY.ja.md +39 -0
  3. data/README.md +38 -56
  4. data/contrib/zstd/CHANGELOG +613 -0
  5. data/contrib/zstd/CODE_OF_CONDUCT.md +5 -0
  6. data/contrib/zstd/CONTRIBUTING.md +406 -0
  7. data/contrib/zstd/COPYING +339 -0
  8. data/contrib/zstd/Makefile +420 -0
  9. data/contrib/zstd/README.md +179 -41
  10. data/contrib/zstd/TESTING.md +44 -0
  11. data/contrib/zstd/appveyor.yml +292 -0
  12. data/contrib/zstd/lib/BUCK +234 -0
  13. data/contrib/zstd/lib/Makefile +451 -0
  14. data/contrib/zstd/lib/README.md +207 -0
  15. data/contrib/zstd/{common → lib/common}/bitstream.h +187 -138
  16. data/contrib/zstd/lib/common/compiler.h +288 -0
  17. data/contrib/zstd/lib/common/cpu.h +213 -0
  18. data/contrib/zstd/lib/common/debug.c +24 -0
  19. data/contrib/zstd/lib/common/debug.h +107 -0
  20. data/contrib/zstd/lib/common/entropy_common.c +362 -0
  21. data/contrib/zstd/{common → lib/common}/error_private.c +25 -12
  22. data/contrib/zstd/{common → lib/common}/error_private.h +14 -10
  23. data/contrib/zstd/{common → lib/common}/fse.h +173 -92
  24. data/contrib/zstd/{common → lib/common}/fse_decompress.c +149 -85
  25. data/contrib/zstd/lib/common/huf.h +361 -0
  26. data/contrib/zstd/{common → lib/common}/mem.h +115 -59
  27. data/contrib/zstd/lib/common/pool.c +350 -0
  28. data/contrib/zstd/lib/common/pool.h +84 -0
  29. data/contrib/zstd/lib/common/threading.c +122 -0
  30. data/contrib/zstd/lib/common/threading.h +155 -0
  31. data/contrib/zstd/{common → lib/common}/xxhash.c +55 -96
  32. data/contrib/zstd/{common → lib/common}/xxhash.h +23 -47
  33. data/contrib/zstd/lib/common/zstd_common.c +83 -0
  34. data/contrib/zstd/lib/common/zstd_deps.h +111 -0
  35. data/contrib/zstd/lib/common/zstd_errors.h +95 -0
  36. data/contrib/zstd/lib/common/zstd_internal.h +478 -0
  37. data/contrib/zstd/{compress → lib/compress}/fse_compress.c +214 -319
  38. data/contrib/zstd/lib/compress/hist.c +181 -0
  39. data/contrib/zstd/lib/compress/hist.h +75 -0
  40. data/contrib/zstd/lib/compress/huf_compress.c +913 -0
  41. data/contrib/zstd/lib/compress/zstd_compress.c +5208 -0
  42. data/contrib/zstd/lib/compress/zstd_compress_internal.h +1203 -0
  43. data/contrib/zstd/lib/compress/zstd_compress_literals.c +158 -0
  44. data/contrib/zstd/lib/compress/zstd_compress_literals.h +29 -0
  45. data/contrib/zstd/lib/compress/zstd_compress_sequences.c +433 -0
  46. data/contrib/zstd/lib/compress/zstd_compress_sequences.h +54 -0
  47. data/contrib/zstd/lib/compress/zstd_compress_superblock.c +849 -0
  48. data/contrib/zstd/lib/compress/zstd_compress_superblock.h +32 -0
  49. data/contrib/zstd/lib/compress/zstd_cwksp.h +561 -0
  50. data/contrib/zstd/lib/compress/zstd_double_fast.c +521 -0
  51. data/contrib/zstd/lib/compress/zstd_double_fast.h +38 -0
  52. data/contrib/zstd/lib/compress/zstd_fast.c +496 -0
  53. data/contrib/zstd/lib/compress/zstd_fast.h +37 -0
  54. data/contrib/zstd/lib/compress/zstd_lazy.c +1412 -0
  55. data/contrib/zstd/lib/compress/zstd_lazy.h +87 -0
  56. data/contrib/zstd/lib/compress/zstd_ldm.c +660 -0
  57. data/contrib/zstd/lib/compress/zstd_ldm.h +116 -0
  58. data/contrib/zstd/lib/compress/zstd_opt.c +1345 -0
  59. data/contrib/zstd/lib/compress/zstd_opt.h +56 -0
  60. data/contrib/zstd/lib/compress/zstdmt_compress.c +1811 -0
  61. data/contrib/zstd/lib/compress/zstdmt_compress.h +110 -0
  62. data/contrib/zstd/lib/decompress/huf_decompress.c +1350 -0
  63. data/contrib/zstd/lib/decompress/zstd_ddict.c +244 -0
  64. data/contrib/zstd/lib/decompress/zstd_ddict.h +44 -0
  65. data/contrib/zstd/lib/decompress/zstd_decompress.c +1930 -0
  66. data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1540 -0
  67. data/contrib/zstd/lib/decompress/zstd_decompress_block.h +62 -0
  68. data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +190 -0
  69. data/contrib/zstd/{common → lib/deprecated}/zbuff.h +68 -45
  70. data/contrib/zstd/lib/deprecated/zbuff_common.c +26 -0
  71. data/contrib/zstd/lib/deprecated/zbuff_compress.c +147 -0
  72. data/contrib/zstd/lib/deprecated/zbuff_decompress.c +75 -0
  73. data/contrib/zstd/lib/dictBuilder/cover.c +1245 -0
  74. data/contrib/zstd/lib/dictBuilder/cover.h +157 -0
  75. data/contrib/zstd/{dictBuilder → lib/dictBuilder}/divsufsort.c +3 -3
  76. data/contrib/zstd/{dictBuilder → lib/dictBuilder}/divsufsort.h +0 -0
  77. data/contrib/zstd/lib/dictBuilder/fastcover.c +758 -0
  78. data/contrib/zstd/{dictBuilder → lib/dictBuilder}/zdict.c +318 -194
  79. data/contrib/zstd/lib/dictBuilder/zdict.h +305 -0
  80. data/contrib/zstd/{legacy → lib/legacy}/zstd_legacy.h +171 -15
  81. data/contrib/zstd/{legacy → lib/legacy}/zstd_v01.c +191 -124
  82. data/contrib/zstd/{legacy → lib/legacy}/zstd_v01.h +19 -5
  83. data/contrib/zstd/{legacy → lib/legacy}/zstd_v02.c +125 -125
  84. data/contrib/zstd/{legacy → lib/legacy}/zstd_v02.h +19 -5
  85. data/contrib/zstd/{legacy → lib/legacy}/zstd_v03.c +125 -124
  86. data/contrib/zstd/{legacy → lib/legacy}/zstd_v03.h +20 -6
  87. data/contrib/zstd/{legacy → lib/legacy}/zstd_v04.c +151 -299
  88. data/contrib/zstd/{legacy → lib/legacy}/zstd_v04.h +19 -5
  89. data/contrib/zstd/{legacy → lib/legacy}/zstd_v05.c +237 -243
  90. data/contrib/zstd/{legacy → lib/legacy}/zstd_v05.h +19 -6
  91. data/contrib/zstd/{legacy → lib/legacy}/zstd_v06.c +130 -143
  92. data/contrib/zstd/{legacy → lib/legacy}/zstd_v06.h +18 -5
  93. data/contrib/zstd/{legacy → lib/legacy}/zstd_v07.c +158 -157
  94. data/contrib/zstd/{legacy → lib/legacy}/zstd_v07.h +19 -5
  95. data/contrib/zstd/lib/libzstd.pc.in +15 -0
  96. data/contrib/zstd/lib/zstd.h +2391 -0
  97. data/ext/depend +2 -0
  98. data/ext/extconf.rb +15 -6
  99. data/ext/extzstd.c +76 -145
  100. data/ext/extzstd.h +80 -31
  101. data/ext/extzstd_stream.c +417 -142
  102. data/ext/libzstd_conf.h +8 -0
  103. data/ext/zstd_common.c +10 -7
  104. data/ext/zstd_compress.c +14 -5
  105. data/ext/zstd_decompress.c +5 -4
  106. data/ext/zstd_dictbuilder.c +9 -4
  107. data/ext/zstd_dictbuilder_fastcover.c +3 -0
  108. data/ext/zstd_legacy_v01.c +3 -1
  109. data/ext/zstd_legacy_v02.c +3 -1
  110. data/ext/zstd_legacy_v03.c +3 -1
  111. data/ext/zstd_legacy_v04.c +3 -1
  112. data/ext/zstd_legacy_v05.c +3 -1
  113. data/ext/zstd_legacy_v06.c +3 -1
  114. data/ext/zstd_legacy_v07.c +3 -1
  115. data/gemstub.rb +10 -24
  116. data/lib/extzstd.rb +64 -179
  117. data/lib/extzstd/version.rb +6 -1
  118. data/test/test_basic.rb +9 -6
  119. metadata +113 -57
  120. data/HISTORY.ja +0 -5
  121. data/contrib/zstd/common/entropy_common.c +0 -225
  122. data/contrib/zstd/common/huf.h +0 -228
  123. data/contrib/zstd/common/zstd_common.c +0 -83
  124. data/contrib/zstd/common/zstd_errors.h +0 -60
  125. data/contrib/zstd/common/zstd_internal.h +0 -267
  126. data/contrib/zstd/compress/huf_compress.c +0 -533
  127. data/contrib/zstd/compress/zbuff_compress.c +0 -319
  128. data/contrib/zstd/compress/zstd_compress.c +0 -3264
  129. data/contrib/zstd/compress/zstd_opt.h +0 -900
  130. data/contrib/zstd/decompress/huf_decompress.c +0 -883
  131. data/contrib/zstd/decompress/zbuff_decompress.c +0 -252
  132. data/contrib/zstd/decompress/zstd_decompress.c +0 -1842
  133. data/contrib/zstd/dictBuilder/zdict.h +0 -111
  134. data/contrib/zstd/zstd.h +0 -640
@@ -0,0 +1,110 @@
1
+ /*
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under both the BSD-style license (found in the
6
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
+ * in the COPYING file in the root directory of this source tree).
8
+ * You may select, at your option, one of the above-listed licenses.
9
+ */
10
+
11
+ #ifndef ZSTDMT_COMPRESS_H
12
+ #define ZSTDMT_COMPRESS_H
13
+
14
+ #if defined (__cplusplus)
15
+ extern "C" {
16
+ #endif
17
+
18
+
19
+ /* Note : This is an internal API.
20
+ * These APIs used to be exposed with ZSTDLIB_API,
21
+ * because it used to be the only way to invoke MT compression.
22
+ * Now, you must use ZSTD_compress2 and ZSTD_compressStream2() instead.
23
+ *
24
+ * This API requires ZSTD_MULTITHREAD to be defined during compilation,
25
+ * otherwise ZSTDMT_createCCtx*() will fail.
26
+ */
27
+
28
+ /* === Dependencies === */
29
+ #include "../common/zstd_deps.h" /* size_t */
30
+ #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters */
31
+ #include "../zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */
32
+
33
+
34
+ /* === Constants === */
35
+ #ifndef ZSTDMT_NBWORKERS_MAX
36
+ # define ZSTDMT_NBWORKERS_MAX 200
37
+ #endif
38
+ #ifndef ZSTDMT_JOBSIZE_MIN
39
+ # define ZSTDMT_JOBSIZE_MIN (1 MB)
40
+ #endif
41
+ #define ZSTDMT_JOBLOG_MAX (MEM_32bits() ? 29 : 30)
42
+ #define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (1024 MB))
43
+
44
+
45
+ /* ========================================================
46
+ * === Private interface, for use by ZSTD_compress.c ===
47
+ * === Not exposed in libzstd. Never invoke directly ===
48
+ * ======================================================== */
49
+
50
+ /* === Memory management === */
51
+ typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx;
52
+ /* Requires ZSTD_MULTITHREAD to be defined during compilation, otherwise it will return NULL. */
53
+ ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers,
54
+ ZSTD_customMem cMem,
55
+ ZSTD_threadPool *pool);
56
+ size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx);
57
+
58
+ size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx);
59
+
60
+ /* === Streaming functions === */
61
+
62
+ size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx);
63
+
64
+ /*! ZSTDMT_initCStream_internal() :
65
+ * Private use only. Init streaming operation.
66
+ * expects params to be valid.
67
+ * must receive dict, or cdict, or none, but not both.
68
+ * @return : 0, or an error code */
69
+ size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
70
+ const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType,
71
+ const ZSTD_CDict* cdict,
72
+ ZSTD_CCtx_params params, unsigned long long pledgedSrcSize);
73
+
74
+ /*! ZSTDMT_compressStream_generic() :
75
+ * Combines ZSTDMT_compressStream() with optional ZSTDMT_flushStream() or ZSTDMT_endStream()
76
+ * depending on flush directive.
77
+ * @return : minimum amount of data still to be flushed
78
+ * 0 if fully flushed
79
+ * or an error code
80
+ * note : needs to be init using any ZSTD_initCStream*() variant */
81
+ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
82
+ ZSTD_outBuffer* output,
83
+ ZSTD_inBuffer* input,
84
+ ZSTD_EndDirective endOp);
85
+
86
+ /*! ZSTDMT_toFlushNow()
87
+ * Tell how many bytes are ready to be flushed immediately.
88
+ * Probe the oldest active job (not yet entirely flushed) and check its output buffer.
89
+ * If return 0, it means there is no active job,
90
+ * or, it means oldest job is still active, but everything produced has been flushed so far,
91
+ * therefore flushing is limited by speed of oldest job. */
92
+ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx);
93
+
94
+ /*! ZSTDMT_updateCParams_whileCompressing() :
95
+ * Updates only a selected set of compression parameters, to remain compatible with current frame.
96
+ * New parameters will be applied to next compression job. */
97
+ void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams);
98
+
99
+ /*! ZSTDMT_getFrameProgression():
100
+ * tells how much data has been consumed (input) and produced (output) for current frame.
101
+ * able to count progression inside worker threads.
102
+ */
103
+ ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx);
104
+
105
+
106
+ #if defined (__cplusplus)
107
+ }
108
+ #endif
109
+
110
+ #endif /* ZSTDMT_COMPRESS_H */
@@ -0,0 +1,1350 @@
1
+ /* ******************************************************************
2
+ * huff0 huffman decoder,
3
+ * part of Finite State Entropy library
4
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
5
+ *
6
+ * You can contact the author at :
7
+ * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
8
+ *
9
+ * This source code is licensed under both the BSD-style license (found in the
10
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
11
+ * in the COPYING file in the root directory of this source tree).
12
+ * You may select, at your option, one of the above-listed licenses.
13
+ ****************************************************************** */
14
+
15
+ /* **************************************************************
16
+ * Dependencies
17
+ ****************************************************************/
18
+ #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */
19
+ #include "../common/compiler.h"
20
+ #include "../common/bitstream.h" /* BIT_* */
21
+ #include "../common/fse.h" /* to compress headers */
22
+ #define HUF_STATIC_LINKING_ONLY
23
+ #include "../common/huf.h"
24
+ #include "../common/error_private.h"
25
+
26
+ /* **************************************************************
27
+ * Macros
28
+ ****************************************************************/
29
+
30
+ /* These two optional macros force the use one way or another of the two
31
+ * Huffman decompression implementations. You can't force in both directions
32
+ * at the same time.
33
+ */
34
+ #if defined(HUF_FORCE_DECOMPRESS_X1) && \
35
+ defined(HUF_FORCE_DECOMPRESS_X2)
36
+ #error "Cannot force the use of the X1 and X2 decoders at the same time!"
37
+ #endif
38
+
39
+
40
+ /* **************************************************************
41
+ * Error Management
42
+ ****************************************************************/
43
+ #define HUF_isError ERR_isError
44
+
45
+
46
+ /* **************************************************************
47
+ * Byte alignment for workSpace management
48
+ ****************************************************************/
49
+ #define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1)
50
+ #define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
51
+
52
+
53
+ /* **************************************************************
54
+ * BMI2 Variant Wrappers
55
+ ****************************************************************/
56
+ #if DYNAMIC_BMI2
57
+
58
+ #define HUF_DGEN(fn) \
59
+ \
60
+ static size_t fn##_default( \
61
+ void* dst, size_t dstSize, \
62
+ const void* cSrc, size_t cSrcSize, \
63
+ const HUF_DTable* DTable) \
64
+ { \
65
+ return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
66
+ } \
67
+ \
68
+ static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \
69
+ void* dst, size_t dstSize, \
70
+ const void* cSrc, size_t cSrcSize, \
71
+ const HUF_DTable* DTable) \
72
+ { \
73
+ return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
74
+ } \
75
+ \
76
+ static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
77
+ size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
78
+ { \
79
+ if (bmi2) { \
80
+ return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \
81
+ } \
82
+ return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \
83
+ }
84
+
85
+ #else
86
+
87
+ #define HUF_DGEN(fn) \
88
+ static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
89
+ size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
90
+ { \
91
+ (void)bmi2; \
92
+ return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
93
+ }
94
+
95
+ #endif
96
+
97
+
98
+ /*-***************************/
99
+ /* generic DTableDesc */
100
+ /*-***************************/
101
+ typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;
102
+
103
+ static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
104
+ {
105
+ DTableDesc dtd;
106
+ ZSTD_memcpy(&dtd, table, sizeof(dtd));
107
+ return dtd;
108
+ }
109
+
110
+
111
+ #ifndef HUF_FORCE_DECOMPRESS_X2
112
+
113
+ /*-***************************/
114
+ /* single-symbol decoding */
115
+ /*-***************************/
116
+ typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */
117
+
118
+ /**
119
+ * Packs 4 HUF_DEltX1 structs into a U64. This is used to lay down 4 entries at
120
+ * a time.
121
+ */
122
+ static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) {
123
+ U64 D4;
124
+ if (MEM_isLittleEndian()) {
125
+ D4 = symbol + (nbBits << 8);
126
+ } else {
127
+ D4 = (symbol << 8) + nbBits;
128
+ }
129
+ D4 *= 0x0001000100010001ULL;
130
+ return D4;
131
+ }
132
+
133
+ typedef struct {
134
+ U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];
135
+ U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1];
136
+ U32 statsWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
137
+ BYTE symbols[HUF_SYMBOLVALUE_MAX + 1];
138
+ BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
139
+ } HUF_ReadDTableX1_Workspace;
140
+
141
+
142
+ size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
143
+ {
144
+ return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);
145
+ }
146
+
147
+ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2)
148
+ {
149
+ U32 tableLog = 0;
150
+ U32 nbSymbols = 0;
151
+ size_t iSize;
152
+ void* const dtPtr = DTable + 1;
153
+ HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
154
+ HUF_ReadDTableX1_Workspace* wksp = (HUF_ReadDTableX1_Workspace*)workSpace;
155
+
156
+ DEBUG_STATIC_ASSERT(HUF_DECOMPRESS_WORKSPACE_SIZE >= sizeof(*wksp));
157
+ if (sizeof(*wksp) > wkspSize) return ERROR(tableLog_tooLarge);
158
+
159
+ DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
160
+ /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
161
+
162
+ iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2);
163
+ if (HUF_isError(iSize)) return iSize;
164
+
165
+ /* Table header */
166
+ { DTableDesc dtd = HUF_getDTableDesc(DTable);
167
+ if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */
168
+ dtd.tableType = 0;
169
+ dtd.tableLog = (BYTE)tableLog;
170
+ ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
171
+ }
172
+
173
+ /* Compute symbols and rankStart given rankVal:
174
+ *
175
+ * rankVal already contains the number of values of each weight.
176
+ *
177
+ * symbols contains the symbols ordered by weight. First are the rankVal[0]
178
+ * weight 0 symbols, followed by the rankVal[1] weight 1 symbols, and so on.
179
+ * symbols[0] is filled (but unused) to avoid a branch.
180
+ *
181
+ * rankStart contains the offset where each rank belongs in the DTable.
182
+ * rankStart[0] is not filled because there are no entries in the table for
183
+ * weight 0.
184
+ */
185
+ {
186
+ int n;
187
+ int nextRankStart = 0;
188
+ int const unroll = 4;
189
+ int const nLimit = (int)nbSymbols - unroll + 1;
190
+ for (n=0; n<(int)tableLog+1; n++) {
191
+ U32 const curr = nextRankStart;
192
+ nextRankStart += wksp->rankVal[n];
193
+ wksp->rankStart[n] = curr;
194
+ }
195
+ for (n=0; n < nLimit; n += unroll) {
196
+ int u;
197
+ for (u=0; u < unroll; ++u) {
198
+ size_t const w = wksp->huffWeight[n+u];
199
+ wksp->symbols[wksp->rankStart[w]++] = (BYTE)(n+u);
200
+ }
201
+ }
202
+ for (; n < (int)nbSymbols; ++n) {
203
+ size_t const w = wksp->huffWeight[n];
204
+ wksp->symbols[wksp->rankStart[w]++] = (BYTE)n;
205
+ }
206
+ }
207
+
208
+ /* fill DTable
209
+ * We fill all entries of each weight in order.
210
+ * That way length is a constant for each iteration of the outter loop.
211
+ * We can switch based on the length to a different inner loop which is
212
+ * optimized for that particular case.
213
+ */
214
+ {
215
+ U32 w;
216
+ int symbol=wksp->rankVal[0];
217
+ int rankStart=0;
218
+ for (w=1; w<tableLog+1; ++w) {
219
+ int const symbolCount = wksp->rankVal[w];
220
+ int const length = (1 << w) >> 1;
221
+ int uStart = rankStart;
222
+ BYTE const nbBits = (BYTE)(tableLog + 1 - w);
223
+ int s;
224
+ int u;
225
+ switch (length) {
226
+ case 1:
227
+ for (s=0; s<symbolCount; ++s) {
228
+ HUF_DEltX1 D;
229
+ D.byte = wksp->symbols[symbol + s];
230
+ D.nbBits = nbBits;
231
+ dt[uStart] = D;
232
+ uStart += 1;
233
+ }
234
+ break;
235
+ case 2:
236
+ for (s=0; s<symbolCount; ++s) {
237
+ HUF_DEltX1 D;
238
+ D.byte = wksp->symbols[symbol + s];
239
+ D.nbBits = nbBits;
240
+ dt[uStart+0] = D;
241
+ dt[uStart+1] = D;
242
+ uStart += 2;
243
+ }
244
+ break;
245
+ case 4:
246
+ for (s=0; s<symbolCount; ++s) {
247
+ U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
248
+ MEM_write64(dt + uStart, D4);
249
+ uStart += 4;
250
+ }
251
+ break;
252
+ case 8:
253
+ for (s=0; s<symbolCount; ++s) {
254
+ U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
255
+ MEM_write64(dt + uStart, D4);
256
+ MEM_write64(dt + uStart + 4, D4);
257
+ uStart += 8;
258
+ }
259
+ break;
260
+ default:
261
+ for (s=0; s<symbolCount; ++s) {
262
+ U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
263
+ for (u=0; u < length; u += 16) {
264
+ MEM_write64(dt + uStart + u + 0, D4);
265
+ MEM_write64(dt + uStart + u + 4, D4);
266
+ MEM_write64(dt + uStart + u + 8, D4);
267
+ MEM_write64(dt + uStart + u + 12, D4);
268
+ }
269
+ assert(u == length);
270
+ uStart += length;
271
+ }
272
+ break;
273
+ }
274
+ symbol += symbolCount;
275
+ rankStart += symbolCount * length;
276
+ }
277
+ }
278
+ return iSize;
279
+ }
280
+
281
+ FORCE_INLINE_TEMPLATE BYTE
282
+ HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
283
+ {
284
+ size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
285
+ BYTE const c = dt[val].byte;
286
+ BIT_skipBits(Dstream, dt[val].nbBits);
287
+ return c;
288
+ }
289
+
290
+ #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
291
+ *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
292
+
293
+ #define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \
294
+ if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
295
+ HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
296
+
297
+ #define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
298
+ if (MEM_64bits()) \
299
+ HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
300
+
301
+ HINT_INLINE size_t
302
+ HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
303
+ {
304
+ BYTE* const pStart = p;
305
+
306
+ /* up to 4 symbols at a time */
307
+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
308
+ HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
309
+ HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
310
+ HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
311
+ HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
312
+ }
313
+
314
+ /* [0-3] symbols remaining */
315
+ if (MEM_32bits())
316
+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
317
+ HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
318
+
319
+ /* no more data to retrieve from bitstream, no need to reload */
320
+ while (p < pEnd)
321
+ HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
322
+
323
+ return pEnd-pStart;
324
+ }
325
+
326
+ FORCE_INLINE_TEMPLATE size_t
327
+ HUF_decompress1X1_usingDTable_internal_body(
328
+ void* dst, size_t dstSize,
329
+ const void* cSrc, size_t cSrcSize,
330
+ const HUF_DTable* DTable)
331
+ {
332
+ BYTE* op = (BYTE*)dst;
333
+ BYTE* const oend = op + dstSize;
334
+ const void* dtPtr = DTable + 1;
335
+ const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
336
+ BIT_DStream_t bitD;
337
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
338
+ U32 const dtLog = dtd.tableLog;
339
+
340
+ CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
341
+
342
+ HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog);
343
+
344
+ if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
345
+
346
+ return dstSize;
347
+ }
348
+
349
+ FORCE_INLINE_TEMPLATE size_t
350
+ HUF_decompress4X1_usingDTable_internal_body(
351
+ void* dst, size_t dstSize,
352
+ const void* cSrc, size_t cSrcSize,
353
+ const HUF_DTable* DTable)
354
+ {
355
+ /* Check */
356
+ if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
357
+
358
+ { const BYTE* const istart = (const BYTE*) cSrc;
359
+ BYTE* const ostart = (BYTE*) dst;
360
+ BYTE* const oend = ostart + dstSize;
361
+ BYTE* const olimit = oend - 3;
362
+ const void* const dtPtr = DTable + 1;
363
+ const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
364
+
365
+ /* Init */
366
+ BIT_DStream_t bitD1;
367
+ BIT_DStream_t bitD2;
368
+ BIT_DStream_t bitD3;
369
+ BIT_DStream_t bitD4;
370
+ size_t const length1 = MEM_readLE16(istart);
371
+ size_t const length2 = MEM_readLE16(istart+2);
372
+ size_t const length3 = MEM_readLE16(istart+4);
373
+ size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
374
+ const BYTE* const istart1 = istart + 6; /* jumpTable */
375
+ const BYTE* const istart2 = istart1 + length1;
376
+ const BYTE* const istart3 = istart2 + length2;
377
+ const BYTE* const istart4 = istart3 + length3;
378
+ const size_t segmentSize = (dstSize+3) / 4;
379
+ BYTE* const opStart2 = ostart + segmentSize;
380
+ BYTE* const opStart3 = opStart2 + segmentSize;
381
+ BYTE* const opStart4 = opStart3 + segmentSize;
382
+ BYTE* op1 = ostart;
383
+ BYTE* op2 = opStart2;
384
+ BYTE* op3 = opStart3;
385
+ BYTE* op4 = opStart4;
386
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
387
+ U32 const dtLog = dtd.tableLog;
388
+ U32 endSignal = 1;
389
+
390
+ if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
391
+ CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
392
+ CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
393
+ CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
394
+ CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
395
+
396
+ /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
397
+ for ( ; (endSignal) & (op4 < olimit) ; ) {
398
+ HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
399
+ HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
400
+ HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
401
+ HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
402
+ HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
403
+ HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
404
+ HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
405
+ HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
406
+ HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
407
+ HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
408
+ HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
409
+ HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
410
+ HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
411
+ HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
412
+ HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
413
+ HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
414
+ endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
415
+ endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
416
+ endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
417
+ endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
418
+ }
419
+
420
+ /* check corruption */
421
+ /* note : should not be necessary : op# advance in lock step, and we control op4.
422
+ * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */
423
+ if (op1 > opStart2) return ERROR(corruption_detected);
424
+ if (op2 > opStart3) return ERROR(corruption_detected);
425
+ if (op3 > opStart4) return ERROR(corruption_detected);
426
+ /* note : op4 supposed already verified within main loop */
427
+
428
+ /* finish bitStreams one by one */
429
+ HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog);
430
+ HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog);
431
+ HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog);
432
+ HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog);
433
+
434
+ /* check */
435
+ { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
436
+ if (!endCheck) return ERROR(corruption_detected); }
437
+
438
+ /* decoded size */
439
+ return dstSize;
440
+ }
441
+ }
442
+
443
+
444
+ typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
445
+ const void *cSrc,
446
+ size_t cSrcSize,
447
+ const HUF_DTable *DTable);
448
+
449
+ HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
450
+ HUF_DGEN(HUF_decompress4X1_usingDTable_internal)
451
+
452
+
453
+
454
+ size_t HUF_decompress1X1_usingDTable(
455
+ void* dst, size_t dstSize,
456
+ const void* cSrc, size_t cSrcSize,
457
+ const HUF_DTable* DTable)
458
+ {
459
+ DTableDesc dtd = HUF_getDTableDesc(DTable);
460
+ if (dtd.tableType != 0) return ERROR(GENERIC);
461
+ return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
462
+ }
463
+
464
+ size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
465
+ const void* cSrc, size_t cSrcSize,
466
+ void* workSpace, size_t wkspSize)
467
+ {
468
+ const BYTE* ip = (const BYTE*) cSrc;
469
+
470
+ size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
471
+ if (HUF_isError(hSize)) return hSize;
472
+ if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
473
+ ip += hSize; cSrcSize -= hSize;
474
+
475
+ return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
476
+ }
477
+
478
+
479
+ size_t HUF_decompress4X1_usingDTable(
480
+ void* dst, size_t dstSize,
481
+ const void* cSrc, size_t cSrcSize,
482
+ const HUF_DTable* DTable)
483
+ {
484
+ DTableDesc dtd = HUF_getDTableDesc(DTable);
485
+ if (dtd.tableType != 0) return ERROR(GENERIC);
486
+ return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
487
+ }
488
+
489
+ static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
490
+ const void* cSrc, size_t cSrcSize,
491
+ void* workSpace, size_t wkspSize, int bmi2)
492
+ {
493
+ const BYTE* ip = (const BYTE*) cSrc;
494
+
495
+ size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
496
+ if (HUF_isError(hSize)) return hSize;
497
+ if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
498
+ ip += hSize; cSrcSize -= hSize;
499
+
500
+ return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
501
+ }
502
+
503
+ size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
504
+ const void* cSrc, size_t cSrcSize,
505
+ void* workSpace, size_t wkspSize)
506
+ {
507
+ return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
508
+ }
509
+
510
+
511
+ #endif /* HUF_FORCE_DECOMPRESS_X2 */
512
+
513
+
514
+ #ifndef HUF_FORCE_DECOMPRESS_X1
515
+
516
+ /* *************************/
517
+ /* double-symbols decoding */
518
+ /* *************************/
519
+
520
+ typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */
521
+ typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
522
+ typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
523
+ typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
524
+
525
+
526
+ /* HUF_fillDTableX2Level2() :
527
+ * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
528
+ static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed,
529
+ const U32* rankValOrigin, const int minWeight,
530
+ const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
531
+ U32 nbBitsBaseline, U16 baseSeq)
532
+ {
533
+ HUF_DEltX2 DElt;
534
+ U32 rankVal[HUF_TABLELOG_MAX + 1];
535
+
536
+ /* get pre-calculated rankVal */
537
+ ZSTD_memcpy(rankVal, rankValOrigin, sizeof(rankVal));
538
+
539
+ /* fill skipped values */
540
+ if (minWeight>1) {
541
+ U32 i, skipSize = rankVal[minWeight];
542
+ MEM_writeLE16(&(DElt.sequence), baseSeq);
543
+ DElt.nbBits = (BYTE)(consumed);
544
+ DElt.length = 1;
545
+ for (i = 0; i < skipSize; i++)
546
+ DTable[i] = DElt;
547
+ }
548
+
549
+ /* fill DTable */
550
+ { U32 s; for (s=0; s<sortedListSize; s++) { /* note : sortedSymbols already skipped */
551
+ const U32 symbol = sortedSymbols[s].symbol;
552
+ const U32 weight = sortedSymbols[s].weight;
553
+ const U32 nbBits = nbBitsBaseline - weight;
554
+ const U32 length = 1 << (sizeLog-nbBits);
555
+ const U32 start = rankVal[weight];
556
+ U32 i = start;
557
+ const U32 end = start + length;
558
+
559
+ MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
560
+ DElt.nbBits = (BYTE)(nbBits + consumed);
561
+ DElt.length = 2;
562
+ do { DTable[i++] = DElt; } while (i<end); /* since length >= 1 */
563
+
564
+ rankVal[weight] += length;
565
+ } }
566
+ }
567
+
568
+
569
+ static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
570
+ const sortedSymbol_t* sortedList, const U32 sortedListSize,
571
+ const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
572
+ const U32 nbBitsBaseline)
573
+ {
574
+ U32 rankVal[HUF_TABLELOG_MAX + 1];
575
+ const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */
576
+ const U32 minBits = nbBitsBaseline - maxWeight;
577
+ U32 s;
578
+
579
+ ZSTD_memcpy(rankVal, rankValOrigin, sizeof(rankVal));
580
+
581
+ /* fill DTable */
582
+ for (s=0; s<sortedListSize; s++) {
583
+ const U16 symbol = sortedList[s].symbol;
584
+ const U32 weight = sortedList[s].weight;
585
+ const U32 nbBits = nbBitsBaseline - weight;
586
+ const U32 start = rankVal[weight];
587
+ const U32 length = 1 << (targetLog-nbBits);
588
+
589
+ if (targetLog-nbBits >= minBits) { /* enough room for a second symbol */
590
+ U32 sortedRank;
591
+ int minWeight = nbBits + scaleLog;
592
+ if (minWeight < 1) minWeight = 1;
593
+ sortedRank = rankStart[minWeight];
594
+ HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits,
595
+ rankValOrigin[nbBits], minWeight,
596
+ sortedList+sortedRank, sortedListSize-sortedRank,
597
+ nbBitsBaseline, symbol);
598
+ } else {
599
+ HUF_DEltX2 DElt;
600
+ MEM_writeLE16(&(DElt.sequence), symbol);
601
+ DElt.nbBits = (BYTE)(nbBits);
602
+ DElt.length = 1;
603
+ { U32 const end = start + length;
604
+ U32 u;
605
+ for (u = start; u < end; u++) DTable[u] = DElt;
606
+ } }
607
+ rankVal[weight] += length;
608
+ }
609
+ }
610
+
611
+ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
612
+ const void* src, size_t srcSize,
613
+ void* workSpace, size_t wkspSize)
614
+ {
615
+ U32 tableLog, maxW, sizeOfSort, nbSymbols;
616
+ DTableDesc dtd = HUF_getDTableDesc(DTable);
617
+ U32 const maxTableLog = dtd.maxTableLog;
618
+ size_t iSize;
619
+ void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */
620
+ HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
621
+ U32 *rankStart;
622
+
623
+ rankValCol_t* rankVal;
624
+ U32* rankStats;
625
+ U32* rankStart0;
626
+ sortedSymbol_t* sortedSymbol;
627
+ BYTE* weightList;
628
+ size_t spaceUsed32 = 0;
629
+
630
+ rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32);
631
+ spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2;
632
+ rankStats = (U32 *)workSpace + spaceUsed32;
633
+ spaceUsed32 += HUF_TABLELOG_MAX + 1;
634
+ rankStart0 = (U32 *)workSpace + spaceUsed32;
635
+ spaceUsed32 += HUF_TABLELOG_MAX + 2;
636
+ sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t);
637
+ spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2;
638
+ weightList = (BYTE *)((U32 *)workSpace + spaceUsed32);
639
+ spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
640
+
641
+ if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
642
+
643
+ rankStart = rankStart0 + 1;
644
+ ZSTD_memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
645
+
646
+ DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
647
+ if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
648
+ /* ZSTD_memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
649
+
650
+ iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
651
+ if (HUF_isError(iSize)) return iSize;
652
+
653
+ /* check result */
654
+ if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */
655
+
656
+ /* find maxWeight */
657
+ for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */
658
+
659
+ /* Get start index of each weight */
660
+ { U32 w, nextRankStart = 0;
661
+ for (w=1; w<maxW+1; w++) {
662
+ U32 curr = nextRankStart;
663
+ nextRankStart += rankStats[w];
664
+ rankStart[w] = curr;
665
+ }
666
+ rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/
667
+ sizeOfSort = nextRankStart;
668
+ }
669
+
670
+ /* sort symbols by weight */
671
+ { U32 s;
672
+ for (s=0; s<nbSymbols; s++) {
673
+ U32 const w = weightList[s];
674
+ U32 const r = rankStart[w]++;
675
+ sortedSymbol[r].symbol = (BYTE)s;
676
+ sortedSymbol[r].weight = (BYTE)w;
677
+ }
678
+ rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
679
+ }
680
+
681
+ /* Build rankVal */
682
+ { U32* const rankVal0 = rankVal[0];
683
+ { int const rescale = (maxTableLog-tableLog) - 1; /* tableLog <= maxTableLog */
684
+ U32 nextRankVal = 0;
685
+ U32 w;
686
+ for (w=1; w<maxW+1; w++) {
687
+ U32 curr = nextRankVal;
688
+ nextRankVal += rankStats[w] << (w+rescale);
689
+ rankVal0[w] = curr;
690
+ } }
691
+ { U32 const minBits = tableLog+1 - maxW;
692
+ U32 consumed;
693
+ for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
694
+ U32* const rankValPtr = rankVal[consumed];
695
+ U32 w;
696
+ for (w = 1; w < maxW+1; w++) {
697
+ rankValPtr[w] = rankVal0[w] >> consumed;
698
+ } } } }
699
+
700
+ HUF_fillDTableX2(dt, maxTableLog,
701
+ sortedSymbol, sizeOfSort,
702
+ rankStart0, rankVal, maxW,
703
+ tableLog+1);
704
+
705
+ dtd.tableLog = (BYTE)maxTableLog;
706
+ dtd.tableType = 1;
707
+ ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
708
+ return iSize;
709
+ }
710
+
711
+
712
+ FORCE_INLINE_TEMPLATE U32
713
+ HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
714
+ {
715
+ size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
716
+ ZSTD_memcpy(op, dt+val, 2);
717
+ BIT_skipBits(DStream, dt[val].nbBits);
718
+ return dt[val].length;
719
+ }
720
+
721
+ FORCE_INLINE_TEMPLATE U32
722
+ HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
723
+ {
724
+ size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
725
+ ZSTD_memcpy(op, dt+val, 1);
726
+ if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
727
+ else {
728
+ if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
729
+ BIT_skipBits(DStream, dt[val].nbBits);
730
+ if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
731
+ /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
732
+ DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
733
+ } }
734
+ return 1;
735
+ }
736
+
737
+ #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
738
+ ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
739
+
740
+ #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
741
+ if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
742
+ ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
743
+
744
+ #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
745
+ if (MEM_64bits()) \
746
+ ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
747
+
748
+ HINT_INLINE size_t
749
+ HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
750
+ const HUF_DEltX2* const dt, const U32 dtLog)
751
+ {
752
+ BYTE* const pStart = p;
753
+
754
+ /* up to 8 symbols at a time */
755
+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
756
+ HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
757
+ HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
758
+ HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
759
+ HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
760
+ }
761
+
762
+ /* closer to end : up to 2 symbols at a time */
763
+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
764
+ HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
765
+
766
+ while (p <= pEnd-2)
767
+ HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
768
+
769
+ if (p < pEnd)
770
+ p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
771
+
772
+ return p-pStart;
773
+ }
774
+
775
+ FORCE_INLINE_TEMPLATE size_t
776
+ HUF_decompress1X2_usingDTable_internal_body(
777
+ void* dst, size_t dstSize,
778
+ const void* cSrc, size_t cSrcSize,
779
+ const HUF_DTable* DTable)
780
+ {
781
+ BIT_DStream_t bitD;
782
+
783
+ /* Init */
784
+ CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
785
+
786
+ /* decode */
787
+ { BYTE* const ostart = (BYTE*) dst;
788
+ BYTE* const oend = ostart + dstSize;
789
+ const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
790
+ const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
791
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
792
+ HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog);
793
+ }
794
+
795
+ /* check */
796
+ if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
797
+
798
+ /* decoded size */
799
+ return dstSize;
800
+ }
801
+
802
+ FORCE_INLINE_TEMPLATE size_t
803
+ HUF_decompress4X2_usingDTable_internal_body(
804
+ void* dst, size_t dstSize,
805
+ const void* cSrc, size_t cSrcSize,
806
+ const HUF_DTable* DTable)
807
+ {
808
+ if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
809
+
810
+ { const BYTE* const istart = (const BYTE*) cSrc;
811
+ BYTE* const ostart = (BYTE*) dst;
812
+ BYTE* const oend = ostart + dstSize;
813
+ BYTE* const olimit = oend - (sizeof(size_t)-1);
814
+ const void* const dtPtr = DTable+1;
815
+ const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
816
+
817
+ /* Init */
818
+ BIT_DStream_t bitD1;
819
+ BIT_DStream_t bitD2;
820
+ BIT_DStream_t bitD3;
821
+ BIT_DStream_t bitD4;
822
+ size_t const length1 = MEM_readLE16(istart);
823
+ size_t const length2 = MEM_readLE16(istart+2);
824
+ size_t const length3 = MEM_readLE16(istart+4);
825
+ size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
826
+ const BYTE* const istart1 = istart + 6; /* jumpTable */
827
+ const BYTE* const istart2 = istart1 + length1;
828
+ const BYTE* const istart3 = istart2 + length2;
829
+ const BYTE* const istart4 = istart3 + length3;
830
+ size_t const segmentSize = (dstSize+3) / 4;
831
+ BYTE* const opStart2 = ostart + segmentSize;
832
+ BYTE* const opStart3 = opStart2 + segmentSize;
833
+ BYTE* const opStart4 = opStart3 + segmentSize;
834
+ BYTE* op1 = ostart;
835
+ BYTE* op2 = opStart2;
836
+ BYTE* op3 = opStart3;
837
+ BYTE* op4 = opStart4;
838
+ U32 endSignal = 1;
839
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
840
+ U32 const dtLog = dtd.tableLog;
841
+
842
+ if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
843
+ CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
844
+ CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
845
+ CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
846
+ CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
847
+
848
+ /* 16-32 symbols per loop (4-8 symbols per stream) */
849
+ for ( ; (endSignal) & (op4 < olimit); ) {
850
+ #if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
851
+ HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
852
+ HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
853
+ HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
854
+ HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
855
+ HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
856
+ HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
857
+ HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
858
+ HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
859
+ endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
860
+ endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
861
+ HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
862
+ HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
863
+ HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
864
+ HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
865
+ HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
866
+ HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
867
+ HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
868
+ HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
869
+ endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
870
+ endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
871
+ #else
872
+ HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
873
+ HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
874
+ HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
875
+ HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
876
+ HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
877
+ HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
878
+ HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
879
+ HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
880
+ HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
881
+ HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
882
+ HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
883
+ HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
884
+ HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
885
+ HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
886
+ HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
887
+ HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
888
+ endSignal = (U32)LIKELY(
889
+ (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
890
+ & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
891
+ & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
892
+ & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
893
+ #endif
894
+ }
895
+
896
+ /* check corruption */
897
+ if (op1 > opStart2) return ERROR(corruption_detected);
898
+ if (op2 > opStart3) return ERROR(corruption_detected);
899
+ if (op3 > opStart4) return ERROR(corruption_detected);
900
+ /* note : op4 already verified within main loop */
901
+
902
+ /* finish bitStreams one by one */
903
+ HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
904
+ HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
905
+ HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
906
+ HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
907
+
908
+ /* check */
909
+ { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
910
+ if (!endCheck) return ERROR(corruption_detected); }
911
+
912
+ /* decoded size */
913
+ return dstSize;
914
+ }
915
+ }
916
+
917
+ HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
918
+ HUF_DGEN(HUF_decompress4X2_usingDTable_internal)
919
+
920
+ size_t HUF_decompress1X2_usingDTable(
921
+ void* dst, size_t dstSize,
922
+ const void* cSrc, size_t cSrcSize,
923
+ const HUF_DTable* DTable)
924
+ {
925
+ DTableDesc dtd = HUF_getDTableDesc(DTable);
926
+ if (dtd.tableType != 1) return ERROR(GENERIC);
927
+ return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
928
+ }
929
+
930
+ size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
931
+ const void* cSrc, size_t cSrcSize,
932
+ void* workSpace, size_t wkspSize)
933
+ {
934
+ const BYTE* ip = (const BYTE*) cSrc;
935
+
936
+ size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
937
+ workSpace, wkspSize);
938
+ if (HUF_isError(hSize)) return hSize;
939
+ if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
940
+ ip += hSize; cSrcSize -= hSize;
941
+
942
+ return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
943
+ }
944
+
945
+
946
+ size_t HUF_decompress4X2_usingDTable(
947
+ void* dst, size_t dstSize,
948
+ const void* cSrc, size_t cSrcSize,
949
+ const HUF_DTable* DTable)
950
+ {
951
+ DTableDesc dtd = HUF_getDTableDesc(DTable);
952
+ if (dtd.tableType != 1) return ERROR(GENERIC);
953
+ return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
954
+ }
955
+
956
+ static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
957
+ const void* cSrc, size_t cSrcSize,
958
+ void* workSpace, size_t wkspSize, int bmi2)
959
+ {
960
+ const BYTE* ip = (const BYTE*) cSrc;
961
+
962
+ size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
963
+ workSpace, wkspSize);
964
+ if (HUF_isError(hSize)) return hSize;
965
+ if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
966
+ ip += hSize; cSrcSize -= hSize;
967
+
968
+ return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
969
+ }
970
+
971
+ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
972
+ const void* cSrc, size_t cSrcSize,
973
+ void* workSpace, size_t wkspSize)
974
+ {
975
+ return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
976
+ }
977
+
978
+
979
+ #endif /* HUF_FORCE_DECOMPRESS_X1 */
980
+
981
+
982
+ /* ***********************************/
983
+ /* Universal decompression selectors */
984
+ /* ***********************************/
985
+
986
+ size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
987
+ const void* cSrc, size_t cSrcSize,
988
+ const HUF_DTable* DTable)
989
+ {
990
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
991
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
992
+ (void)dtd;
993
+ assert(dtd.tableType == 0);
994
+ return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
995
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
996
+ (void)dtd;
997
+ assert(dtd.tableType == 1);
998
+ return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
999
+ #else
1000
+ return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
1001
+ HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1002
+ #endif
1003
+ }
1004
+
1005
+ size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
1006
+ const void* cSrc, size_t cSrcSize,
1007
+ const HUF_DTable* DTable)
1008
+ {
1009
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
1010
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1011
+ (void)dtd;
1012
+ assert(dtd.tableType == 0);
1013
+ return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1014
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1015
+ (void)dtd;
1016
+ assert(dtd.tableType == 1);
1017
+ return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1018
+ #else
1019
+ return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
1020
+ HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1021
+ #endif
1022
+ }
1023
+
1024
+
1025
+ #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
1026
+ typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
1027
+ static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
1028
+ {
1029
+ /* single, double, quad */
1030
+ {{0,0}, {1,1}, {2,2}}, /* Q==0 : impossible */
1031
+ {{0,0}, {1,1}, {2,2}}, /* Q==1 : impossible */
1032
+ {{ 38,130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */
1033
+ {{ 448,128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */
1034
+ {{ 556,128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */
1035
+ {{ 714,128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */
1036
+ {{ 883,128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */
1037
+ {{ 897,128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */
1038
+ {{ 926,128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */
1039
+ {{ 947,128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */
1040
+ {{1107,128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */
1041
+ {{1177,128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */
1042
+ {{1242,128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */
1043
+ {{1349,128}, {2644,106}, {5260,106}}, /* Q ==13 : 81-87% */
1044
+ {{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */
1045
+ {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */
1046
+ };
1047
+ #endif
1048
+
1049
+ /** HUF_selectDecoder() :
1050
+ * Tells which decoder is likely to decode faster,
1051
+ * based on a set of pre-computed metrics.
1052
+ * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
1053
+ * Assumption : 0 < dstSize <= 128 KB */
1054
+ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
1055
+ {
1056
+ assert(dstSize > 0);
1057
+ assert(dstSize <= 128*1024);
1058
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1059
+ (void)dstSize;
1060
+ (void)cSrcSize;
1061
+ return 0;
1062
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1063
+ (void)dstSize;
1064
+ (void)cSrcSize;
1065
+ return 1;
1066
+ #else
1067
+ /* decoder timing evaluation */
1068
+ { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */
1069
+ U32 const D256 = (U32)(dstSize >> 8);
1070
+ U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
1071
+ U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
1072
+ DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */
1073
+ return DTime1 < DTime0;
1074
+ }
1075
+ #endif
1076
+ }
1077
+
1078
+
1079
+ size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
1080
+ size_t dstSize, const void* cSrc,
1081
+ size_t cSrcSize, void* workSpace,
1082
+ size_t wkspSize)
1083
+ {
1084
+ /* validation checks */
1085
+ if (dstSize == 0) return ERROR(dstSize_tooSmall);
1086
+ if (cSrcSize == 0) return ERROR(corruption_detected);
1087
+
1088
+ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1089
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1090
+ (void)algoNb;
1091
+ assert(algoNb == 0);
1092
+ return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1093
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1094
+ (void)algoNb;
1095
+ assert(algoNb == 1);
1096
+ return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1097
+ #else
1098
+ return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1099
+ cSrcSize, workSpace, wkspSize):
1100
+ HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1101
+ #endif
1102
+ }
1103
+ }
1104
+
1105
+ size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
1106
+ const void* cSrc, size_t cSrcSize,
1107
+ void* workSpace, size_t wkspSize)
1108
+ {
1109
+ /* validation checks */
1110
+ if (dstSize == 0) return ERROR(dstSize_tooSmall);
1111
+ if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1112
+ if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1113
+ if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1114
+
1115
+ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1116
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1117
+ (void)algoNb;
1118
+ assert(algoNb == 0);
1119
+ return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1120
+ cSrcSize, workSpace, wkspSize);
1121
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1122
+ (void)algoNb;
1123
+ assert(algoNb == 1);
1124
+ return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1125
+ cSrcSize, workSpace, wkspSize);
1126
+ #else
1127
+ return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1128
+ cSrcSize, workSpace, wkspSize):
1129
+ HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1130
+ cSrcSize, workSpace, wkspSize);
1131
+ #endif
1132
+ }
1133
+ }
1134
+
1135
+
1136
+ size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1137
+ {
1138
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
1139
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1140
+ (void)dtd;
1141
+ assert(dtd.tableType == 0);
1142
+ return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1143
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1144
+ (void)dtd;
1145
+ assert(dtd.tableType == 1);
1146
+ return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1147
+ #else
1148
+ return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1149
+ HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1150
+ #endif
1151
+ }
1152
+
1153
+ #ifndef HUF_FORCE_DECOMPRESS_X2
1154
+ size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1155
+ {
1156
+ const BYTE* ip = (const BYTE*) cSrc;
1157
+
1158
+ size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1159
+ if (HUF_isError(hSize)) return hSize;
1160
+ if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
1161
+ ip += hSize; cSrcSize -= hSize;
1162
+
1163
+ return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
1164
+ }
1165
+ #endif
1166
+
1167
+ size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1168
+ {
1169
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
1170
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1171
+ (void)dtd;
1172
+ assert(dtd.tableType == 0);
1173
+ return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1174
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1175
+ (void)dtd;
1176
+ assert(dtd.tableType == 1);
1177
+ return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1178
+ #else
1179
+ return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1180
+ HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1181
+ #endif
1182
+ }
1183
+
1184
+ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1185
+ {
1186
+ /* validation checks */
1187
+ if (dstSize == 0) return ERROR(dstSize_tooSmall);
1188
+ if (cSrcSize == 0) return ERROR(corruption_detected);
1189
+
1190
+ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1191
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1192
+ (void)algoNb;
1193
+ assert(algoNb == 0);
1194
+ return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1195
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1196
+ (void)algoNb;
1197
+ assert(algoNb == 1);
1198
+ return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1199
+ #else
1200
+ return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
1201
+ HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1202
+ #endif
1203
+ }
1204
+ }
1205
+
1206
+ #ifndef ZSTD_NO_UNUSED_FUNCTIONS
1207
+ #ifndef HUF_FORCE_DECOMPRESS_X2
1208
+ size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
1209
+ {
1210
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1211
+ return HUF_readDTableX1_wksp(DTable, src, srcSize,
1212
+ workSpace, sizeof(workSpace));
1213
+ }
1214
+
1215
+ size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
1216
+ const void* cSrc, size_t cSrcSize)
1217
+ {
1218
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1219
+ return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
1220
+ workSpace, sizeof(workSpace));
1221
+ }
1222
+
1223
+ size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1224
+ {
1225
+ HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
1226
+ return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
1227
+ }
1228
+ #endif
1229
+
1230
+ #ifndef HUF_FORCE_DECOMPRESS_X1
1231
+ size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
1232
+ {
1233
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1234
+ return HUF_readDTableX2_wksp(DTable, src, srcSize,
1235
+ workSpace, sizeof(workSpace));
1236
+ }
1237
+
1238
+ size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
1239
+ const void* cSrc, size_t cSrcSize)
1240
+ {
1241
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1242
+ return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
1243
+ workSpace, sizeof(workSpace));
1244
+ }
1245
+
1246
+ size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1247
+ {
1248
+ HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
1249
+ return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
1250
+ }
1251
+ #endif
1252
+
1253
+ #ifndef HUF_FORCE_DECOMPRESS_X2
1254
+ size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1255
+ {
1256
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1257
+ return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1258
+ workSpace, sizeof(workSpace));
1259
+ }
1260
+ size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1261
+ {
1262
+ HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
1263
+ return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
1264
+ }
1265
+ #endif
1266
+
1267
+ #ifndef HUF_FORCE_DECOMPRESS_X1
1268
+ size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
1269
+ const void* cSrc, size_t cSrcSize)
1270
+ {
1271
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1272
+ return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1273
+ workSpace, sizeof(workSpace));
1274
+ }
1275
+
1276
+ size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1277
+ {
1278
+ HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
1279
+ return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
1280
+ }
1281
+ #endif
1282
+
1283
+ typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
1284
+
1285
+ size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1286
+ {
1287
+ #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
1288
+ static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
1289
+ #endif
1290
+
1291
+ /* validation checks */
1292
+ if (dstSize == 0) return ERROR(dstSize_tooSmall);
1293
+ if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1294
+ if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1295
+ if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1296
+
1297
+ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1298
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1299
+ (void)algoNb;
1300
+ assert(algoNb == 0);
1301
+ return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize);
1302
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1303
+ (void)algoNb;
1304
+ assert(algoNb == 1);
1305
+ return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);
1306
+ #else
1307
+ return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
1308
+ #endif
1309
+ }
1310
+ }
1311
+
1312
+ size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1313
+ {
1314
+ /* validation checks */
1315
+ if (dstSize == 0) return ERROR(dstSize_tooSmall);
1316
+ if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1317
+ if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1318
+ if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1319
+
1320
+ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1321
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1322
+ (void)algoNb;
1323
+ assert(algoNb == 0);
1324
+ return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
1325
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1326
+ (void)algoNb;
1327
+ assert(algoNb == 1);
1328
+ return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
1329
+ #else
1330
+ return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
1331
+ HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
1332
+ #endif
1333
+ }
1334
+ }
1335
+
1336
+ size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1337
+ {
1338
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1339
+ return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1340
+ workSpace, sizeof(workSpace));
1341
+ }
1342
+
1343
+ size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
1344
+ const void* cSrc, size_t cSrcSize)
1345
+ {
1346
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1347
+ return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1348
+ workSpace, sizeof(workSpace));
1349
+ }
1350
+ #endif