extzstd 0.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. checksums.yaml +5 -5
  2. data/HISTORY.ja.md +39 -0
  3. data/README.md +38 -56
  4. data/contrib/zstd/CHANGELOG +613 -0
  5. data/contrib/zstd/CODE_OF_CONDUCT.md +5 -0
  6. data/contrib/zstd/CONTRIBUTING.md +406 -0
  7. data/contrib/zstd/COPYING +339 -0
  8. data/contrib/zstd/Makefile +420 -0
  9. data/contrib/zstd/README.md +179 -41
  10. data/contrib/zstd/TESTING.md +44 -0
  11. data/contrib/zstd/appveyor.yml +292 -0
  12. data/contrib/zstd/lib/BUCK +234 -0
  13. data/contrib/zstd/lib/Makefile +451 -0
  14. data/contrib/zstd/lib/README.md +207 -0
  15. data/contrib/zstd/{common → lib/common}/bitstream.h +187 -138
  16. data/contrib/zstd/lib/common/compiler.h +288 -0
  17. data/contrib/zstd/lib/common/cpu.h +213 -0
  18. data/contrib/zstd/lib/common/debug.c +24 -0
  19. data/contrib/zstd/lib/common/debug.h +107 -0
  20. data/contrib/zstd/lib/common/entropy_common.c +362 -0
  21. data/contrib/zstd/{common → lib/common}/error_private.c +25 -12
  22. data/contrib/zstd/{common → lib/common}/error_private.h +14 -10
  23. data/contrib/zstd/{common → lib/common}/fse.h +173 -92
  24. data/contrib/zstd/{common → lib/common}/fse_decompress.c +149 -85
  25. data/contrib/zstd/lib/common/huf.h +361 -0
  26. data/contrib/zstd/{common → lib/common}/mem.h +115 -59
  27. data/contrib/zstd/lib/common/pool.c +350 -0
  28. data/contrib/zstd/lib/common/pool.h +84 -0
  29. data/contrib/zstd/lib/common/threading.c +122 -0
  30. data/contrib/zstd/lib/common/threading.h +155 -0
  31. data/contrib/zstd/{common → lib/common}/xxhash.c +55 -96
  32. data/contrib/zstd/{common → lib/common}/xxhash.h +23 -47
  33. data/contrib/zstd/lib/common/zstd_common.c +83 -0
  34. data/contrib/zstd/lib/common/zstd_deps.h +111 -0
  35. data/contrib/zstd/lib/common/zstd_errors.h +95 -0
  36. data/contrib/zstd/lib/common/zstd_internal.h +478 -0
  37. data/contrib/zstd/{compress → lib/compress}/fse_compress.c +214 -319
  38. data/contrib/zstd/lib/compress/hist.c +181 -0
  39. data/contrib/zstd/lib/compress/hist.h +75 -0
  40. data/contrib/zstd/lib/compress/huf_compress.c +913 -0
  41. data/contrib/zstd/lib/compress/zstd_compress.c +5208 -0
  42. data/contrib/zstd/lib/compress/zstd_compress_internal.h +1203 -0
  43. data/contrib/zstd/lib/compress/zstd_compress_literals.c +158 -0
  44. data/contrib/zstd/lib/compress/zstd_compress_literals.h +29 -0
  45. data/contrib/zstd/lib/compress/zstd_compress_sequences.c +433 -0
  46. data/contrib/zstd/lib/compress/zstd_compress_sequences.h +54 -0
  47. data/contrib/zstd/lib/compress/zstd_compress_superblock.c +849 -0
  48. data/contrib/zstd/lib/compress/zstd_compress_superblock.h +32 -0
  49. data/contrib/zstd/lib/compress/zstd_cwksp.h +561 -0
  50. data/contrib/zstd/lib/compress/zstd_double_fast.c +521 -0
  51. data/contrib/zstd/lib/compress/zstd_double_fast.h +38 -0
  52. data/contrib/zstd/lib/compress/zstd_fast.c +496 -0
  53. data/contrib/zstd/lib/compress/zstd_fast.h +37 -0
  54. data/contrib/zstd/lib/compress/zstd_lazy.c +1412 -0
  55. data/contrib/zstd/lib/compress/zstd_lazy.h +87 -0
  56. data/contrib/zstd/lib/compress/zstd_ldm.c +660 -0
  57. data/contrib/zstd/lib/compress/zstd_ldm.h +116 -0
  58. data/contrib/zstd/lib/compress/zstd_opt.c +1345 -0
  59. data/contrib/zstd/lib/compress/zstd_opt.h +56 -0
  60. data/contrib/zstd/lib/compress/zstdmt_compress.c +1811 -0
  61. data/contrib/zstd/lib/compress/zstdmt_compress.h +110 -0
  62. data/contrib/zstd/lib/decompress/huf_decompress.c +1350 -0
  63. data/contrib/zstd/lib/decompress/zstd_ddict.c +244 -0
  64. data/contrib/zstd/lib/decompress/zstd_ddict.h +44 -0
  65. data/contrib/zstd/lib/decompress/zstd_decompress.c +1930 -0
  66. data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1540 -0
  67. data/contrib/zstd/lib/decompress/zstd_decompress_block.h +62 -0
  68. data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +190 -0
  69. data/contrib/zstd/{common → lib/deprecated}/zbuff.h +68 -45
  70. data/contrib/zstd/lib/deprecated/zbuff_common.c +26 -0
  71. data/contrib/zstd/lib/deprecated/zbuff_compress.c +147 -0
  72. data/contrib/zstd/lib/deprecated/zbuff_decompress.c +75 -0
  73. data/contrib/zstd/lib/dictBuilder/cover.c +1245 -0
  74. data/contrib/zstd/lib/dictBuilder/cover.h +157 -0
  75. data/contrib/zstd/{dictBuilder → lib/dictBuilder}/divsufsort.c +3 -3
  76. data/contrib/zstd/{dictBuilder → lib/dictBuilder}/divsufsort.h +0 -0
  77. data/contrib/zstd/lib/dictBuilder/fastcover.c +758 -0
  78. data/contrib/zstd/{dictBuilder → lib/dictBuilder}/zdict.c +318 -194
  79. data/contrib/zstd/lib/dictBuilder/zdict.h +305 -0
  80. data/contrib/zstd/{legacy → lib/legacy}/zstd_legacy.h +171 -15
  81. data/contrib/zstd/{legacy → lib/legacy}/zstd_v01.c +191 -124
  82. data/contrib/zstd/{legacy → lib/legacy}/zstd_v01.h +19 -5
  83. data/contrib/zstd/{legacy → lib/legacy}/zstd_v02.c +125 -125
  84. data/contrib/zstd/{legacy → lib/legacy}/zstd_v02.h +19 -5
  85. data/contrib/zstd/{legacy → lib/legacy}/zstd_v03.c +125 -124
  86. data/contrib/zstd/{legacy → lib/legacy}/zstd_v03.h +20 -6
  87. data/contrib/zstd/{legacy → lib/legacy}/zstd_v04.c +151 -299
  88. data/contrib/zstd/{legacy → lib/legacy}/zstd_v04.h +19 -5
  89. data/contrib/zstd/{legacy → lib/legacy}/zstd_v05.c +237 -243
  90. data/contrib/zstd/{legacy → lib/legacy}/zstd_v05.h +19 -6
  91. data/contrib/zstd/{legacy → lib/legacy}/zstd_v06.c +130 -143
  92. data/contrib/zstd/{legacy → lib/legacy}/zstd_v06.h +18 -5
  93. data/contrib/zstd/{legacy → lib/legacy}/zstd_v07.c +158 -157
  94. data/contrib/zstd/{legacy → lib/legacy}/zstd_v07.h +19 -5
  95. data/contrib/zstd/lib/libzstd.pc.in +15 -0
  96. data/contrib/zstd/lib/zstd.h +2391 -0
  97. data/ext/depend +2 -0
  98. data/ext/extconf.rb +15 -6
  99. data/ext/extzstd.c +76 -145
  100. data/ext/extzstd.h +80 -31
  101. data/ext/extzstd_stream.c +417 -142
  102. data/ext/libzstd_conf.h +8 -0
  103. data/ext/zstd_common.c +10 -7
  104. data/ext/zstd_compress.c +14 -5
  105. data/ext/zstd_decompress.c +5 -4
  106. data/ext/zstd_dictbuilder.c +9 -4
  107. data/ext/zstd_dictbuilder_fastcover.c +3 -0
  108. data/ext/zstd_legacy_v01.c +3 -1
  109. data/ext/zstd_legacy_v02.c +3 -1
  110. data/ext/zstd_legacy_v03.c +3 -1
  111. data/ext/zstd_legacy_v04.c +3 -1
  112. data/ext/zstd_legacy_v05.c +3 -1
  113. data/ext/zstd_legacy_v06.c +3 -1
  114. data/ext/zstd_legacy_v07.c +3 -1
  115. data/gemstub.rb +10 -24
  116. data/lib/extzstd.rb +64 -179
  117. data/lib/extzstd/version.rb +6 -1
  118. data/test/test_basic.rb +9 -6
  119. metadata +113 -57
  120. data/HISTORY.ja +0 -5
  121. data/contrib/zstd/common/entropy_common.c +0 -225
  122. data/contrib/zstd/common/huf.h +0 -228
  123. data/contrib/zstd/common/zstd_common.c +0 -83
  124. data/contrib/zstd/common/zstd_errors.h +0 -60
  125. data/contrib/zstd/common/zstd_internal.h +0 -267
  126. data/contrib/zstd/compress/huf_compress.c +0 -533
  127. data/contrib/zstd/compress/zbuff_compress.c +0 -319
  128. data/contrib/zstd/compress/zstd_compress.c +0 -3264
  129. data/contrib/zstd/compress/zstd_opt.h +0 -900
  130. data/contrib/zstd/decompress/huf_decompress.c +0 -883
  131. data/contrib/zstd/decompress/zbuff_decompress.c +0 -252
  132. data/contrib/zstd/decompress/zstd_decompress.c +0 -1842
  133. data/contrib/zstd/dictBuilder/zdict.h +0 -111
  134. data/contrib/zstd/zstd.h +0 -640
@@ -0,0 +1,110 @@
1
+ /*
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under both the BSD-style license (found in the
6
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
+ * in the COPYING file in the root directory of this source tree).
8
+ * You may select, at your option, one of the above-listed licenses.
9
+ */
10
+
11
+ #ifndef ZSTDMT_COMPRESS_H
12
+ #define ZSTDMT_COMPRESS_H
13
+
14
+ #if defined (__cplusplus)
15
+ extern "C" {
16
+ #endif
17
+
18
+
19
+ /* Note : This is an internal API.
20
+ * These APIs used to be exposed with ZSTDLIB_API,
21
+ * because it used to be the only way to invoke MT compression.
22
+ * Now, you must use ZSTD_compress2 and ZSTD_compressStream2() instead.
23
+ *
24
+ * This API requires ZSTD_MULTITHREAD to be defined during compilation,
25
+ * otherwise ZSTDMT_createCCtx*() will fail.
26
+ */
27
+
28
+ /* === Dependencies === */
29
+ #include "../common/zstd_deps.h" /* size_t */
30
+ #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters */
31
+ #include "../zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */
32
+
33
+
34
+ /* === Constants === */
35
+ #ifndef ZSTDMT_NBWORKERS_MAX
36
+ # define ZSTDMT_NBWORKERS_MAX 200
37
+ #endif
38
+ #ifndef ZSTDMT_JOBSIZE_MIN
39
+ # define ZSTDMT_JOBSIZE_MIN (1 MB)
40
+ #endif
41
+ #define ZSTDMT_JOBLOG_MAX (MEM_32bits() ? 29 : 30)
42
+ #define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (1024 MB))
43
+
44
+
45
+ /* ========================================================
46
+ * === Private interface, for use by ZSTD_compress.c ===
47
+ * === Not exposed in libzstd. Never invoke directly ===
48
+ * ======================================================== */
49
+
50
+ /* === Memory management === */
51
+ typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx;
52
+ /* Requires ZSTD_MULTITHREAD to be defined during compilation, otherwise it will return NULL. */
53
+ ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers,
54
+ ZSTD_customMem cMem,
55
+ ZSTD_threadPool *pool);
56
+ size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx);
57
+
58
+ size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx);
59
+
60
+ /* === Streaming functions === */
61
+
62
+ size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx);
63
+
64
+ /*! ZSTDMT_initCStream_internal() :
65
+ * Private use only. Init streaming operation.
66
+ * expects params to be valid.
67
+ * must receive dict, or cdict, or none, but not both.
68
+ * @return : 0, or an error code */
69
+ size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
70
+ const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType,
71
+ const ZSTD_CDict* cdict,
72
+ ZSTD_CCtx_params params, unsigned long long pledgedSrcSize);
73
+
74
+ /*! ZSTDMT_compressStream_generic() :
75
+ * Combines ZSTDMT_compressStream() with optional ZSTDMT_flushStream() or ZSTDMT_endStream()
76
+ * depending on flush directive.
77
+ * @return : minimum amount of data still to be flushed
78
+ * 0 if fully flushed
79
+ * or an error code
80
+ * note : needs to be init using any ZSTD_initCStream*() variant */
81
+ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
82
+ ZSTD_outBuffer* output,
83
+ ZSTD_inBuffer* input,
84
+ ZSTD_EndDirective endOp);
85
+
86
+ /*! ZSTDMT_toFlushNow()
87
+ * Tell how many bytes are ready to be flushed immediately.
88
+ * Probe the oldest active job (not yet entirely flushed) and check its output buffer.
89
+ * If return 0, it means there is no active job,
90
+ * or, it means oldest job is still active, but everything produced has been flushed so far,
91
+ * therefore flushing is limited by speed of oldest job. */
92
+ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx);
93
+
94
+ /*! ZSTDMT_updateCParams_whileCompressing() :
95
+ * Updates only a selected set of compression parameters, to remain compatible with current frame.
96
+ * New parameters will be applied to next compression job. */
97
+ void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams);
98
+
99
+ /*! ZSTDMT_getFrameProgression():
100
+ * tells how much data has been consumed (input) and produced (output) for current frame.
101
+ * able to count progression inside worker threads.
102
+ */
103
+ ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx);
104
+
105
+
106
+ #if defined (__cplusplus)
107
+ }
108
+ #endif
109
+
110
+ #endif /* ZSTDMT_COMPRESS_H */
@@ -0,0 +1,1350 @@
1
+ /* ******************************************************************
2
+ * huff0 huffman decoder,
3
+ * part of Finite State Entropy library
4
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
5
+ *
6
+ * You can contact the author at :
7
+ * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
8
+ *
9
+ * This source code is licensed under both the BSD-style license (found in the
10
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
11
+ * in the COPYING file in the root directory of this source tree).
12
+ * You may select, at your option, one of the above-listed licenses.
13
+ ****************************************************************** */
14
+
15
+ /* **************************************************************
16
+ * Dependencies
17
+ ****************************************************************/
18
+ #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */
19
+ #include "../common/compiler.h"
20
+ #include "../common/bitstream.h" /* BIT_* */
21
+ #include "../common/fse.h" /* to compress headers */
22
+ #define HUF_STATIC_LINKING_ONLY
23
+ #include "../common/huf.h"
24
+ #include "../common/error_private.h"
25
+
26
+ /* **************************************************************
27
+ * Macros
28
+ ****************************************************************/
29
+
30
+ /* These two optional macros force the use one way or another of the two
31
+ * Huffman decompression implementations. You can't force in both directions
32
+ * at the same time.
33
+ */
34
+ #if defined(HUF_FORCE_DECOMPRESS_X1) && \
35
+ defined(HUF_FORCE_DECOMPRESS_X2)
36
+ #error "Cannot force the use of the X1 and X2 decoders at the same time!"
37
+ #endif
38
+
39
+
40
+ /* **************************************************************
41
+ * Error Management
42
+ ****************************************************************/
43
+ #define HUF_isError ERR_isError
44
+
45
+
46
+ /* **************************************************************
47
+ * Byte alignment for workSpace management
48
+ ****************************************************************/
49
+ #define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1)
50
+ #define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
51
+
52
+
53
+ /* **************************************************************
54
+ * BMI2 Variant Wrappers
55
+ ****************************************************************/
56
+ #if DYNAMIC_BMI2
57
+
58
+ #define HUF_DGEN(fn) \
59
+ \
60
+ static size_t fn##_default( \
61
+ void* dst, size_t dstSize, \
62
+ const void* cSrc, size_t cSrcSize, \
63
+ const HUF_DTable* DTable) \
64
+ { \
65
+ return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
66
+ } \
67
+ \
68
+ static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \
69
+ void* dst, size_t dstSize, \
70
+ const void* cSrc, size_t cSrcSize, \
71
+ const HUF_DTable* DTable) \
72
+ { \
73
+ return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
74
+ } \
75
+ \
76
+ static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
77
+ size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
78
+ { \
79
+ if (bmi2) { \
80
+ return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \
81
+ } \
82
+ return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \
83
+ }
84
+
85
+ #else
86
+
87
+ #define HUF_DGEN(fn) \
88
+ static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
89
+ size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
90
+ { \
91
+ (void)bmi2; \
92
+ return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
93
+ }
94
+
95
+ #endif
96
+
97
+
98
+ /*-***************************/
99
+ /* generic DTableDesc */
100
+ /*-***************************/
101
+ typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;
102
+
103
+ static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
104
+ {
105
+ DTableDesc dtd;
106
+ ZSTD_memcpy(&dtd, table, sizeof(dtd));
107
+ return dtd;
108
+ }
109
+
110
+
111
+ #ifndef HUF_FORCE_DECOMPRESS_X2
112
+
113
+ /*-***************************/
114
+ /* single-symbol decoding */
115
+ /*-***************************/
116
+ typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */
117
+
118
+ /**
119
+ * Packs 4 HUF_DEltX1 structs into a U64. This is used to lay down 4 entries at
120
+ * a time.
121
+ */
122
+ static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) {
123
+ U64 D4;
124
+ if (MEM_isLittleEndian()) {
125
+ D4 = symbol + (nbBits << 8);
126
+ } else {
127
+ D4 = (symbol << 8) + nbBits;
128
+ }
129
+ D4 *= 0x0001000100010001ULL;
130
+ return D4;
131
+ }
132
+
133
+ typedef struct {
134
+ U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];
135
+ U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1];
136
+ U32 statsWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
137
+ BYTE symbols[HUF_SYMBOLVALUE_MAX + 1];
138
+ BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
139
+ } HUF_ReadDTableX1_Workspace;
140
+
141
+
142
+ size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
143
+ {
144
+ return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);
145
+ }
146
+
147
+ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2)
148
+ {
149
+ U32 tableLog = 0;
150
+ U32 nbSymbols = 0;
151
+ size_t iSize;
152
+ void* const dtPtr = DTable + 1;
153
+ HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
154
+ HUF_ReadDTableX1_Workspace* wksp = (HUF_ReadDTableX1_Workspace*)workSpace;
155
+
156
+ DEBUG_STATIC_ASSERT(HUF_DECOMPRESS_WORKSPACE_SIZE >= sizeof(*wksp));
157
+ if (sizeof(*wksp) > wkspSize) return ERROR(tableLog_tooLarge);
158
+
159
+ DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
160
+ /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
161
+
162
+ iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2);
163
+ if (HUF_isError(iSize)) return iSize;
164
+
165
+ /* Table header */
166
+ { DTableDesc dtd = HUF_getDTableDesc(DTable);
167
+ if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */
168
+ dtd.tableType = 0;
169
+ dtd.tableLog = (BYTE)tableLog;
170
+ ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
171
+ }
172
+
173
+ /* Compute symbols and rankStart given rankVal:
174
+ *
175
+ * rankVal already contains the number of values of each weight.
176
+ *
177
+ * symbols contains the symbols ordered by weight. First are the rankVal[0]
178
+ * weight 0 symbols, followed by the rankVal[1] weight 1 symbols, and so on.
179
+ * symbols[0] is filled (but unused) to avoid a branch.
180
+ *
181
+ * rankStart contains the offset where each rank belongs in the DTable.
182
+ * rankStart[0] is not filled because there are no entries in the table for
183
+ * weight 0.
184
+ */
185
+ {
186
+ int n;
187
+ int nextRankStart = 0;
188
+ int const unroll = 4;
189
+ int const nLimit = (int)nbSymbols - unroll + 1;
190
+ for (n=0; n<(int)tableLog+1; n++) {
191
+ U32 const curr = nextRankStart;
192
+ nextRankStart += wksp->rankVal[n];
193
+ wksp->rankStart[n] = curr;
194
+ }
195
+ for (n=0; n < nLimit; n += unroll) {
196
+ int u;
197
+ for (u=0; u < unroll; ++u) {
198
+ size_t const w = wksp->huffWeight[n+u];
199
+ wksp->symbols[wksp->rankStart[w]++] = (BYTE)(n+u);
200
+ }
201
+ }
202
+ for (; n < (int)nbSymbols; ++n) {
203
+ size_t const w = wksp->huffWeight[n];
204
+ wksp->symbols[wksp->rankStart[w]++] = (BYTE)n;
205
+ }
206
+ }
207
+
208
+ /* fill DTable
209
+ * We fill all entries of each weight in order.
210
+ * That way length is a constant for each iteration of the outter loop.
211
+ * We can switch based on the length to a different inner loop which is
212
+ * optimized for that particular case.
213
+ */
214
+ {
215
+ U32 w;
216
+ int symbol=wksp->rankVal[0];
217
+ int rankStart=0;
218
+ for (w=1; w<tableLog+1; ++w) {
219
+ int const symbolCount = wksp->rankVal[w];
220
+ int const length = (1 << w) >> 1;
221
+ int uStart = rankStart;
222
+ BYTE const nbBits = (BYTE)(tableLog + 1 - w);
223
+ int s;
224
+ int u;
225
+ switch (length) {
226
+ case 1:
227
+ for (s=0; s<symbolCount; ++s) {
228
+ HUF_DEltX1 D;
229
+ D.byte = wksp->symbols[symbol + s];
230
+ D.nbBits = nbBits;
231
+ dt[uStart] = D;
232
+ uStart += 1;
233
+ }
234
+ break;
235
+ case 2:
236
+ for (s=0; s<symbolCount; ++s) {
237
+ HUF_DEltX1 D;
238
+ D.byte = wksp->symbols[symbol + s];
239
+ D.nbBits = nbBits;
240
+ dt[uStart+0] = D;
241
+ dt[uStart+1] = D;
242
+ uStart += 2;
243
+ }
244
+ break;
245
+ case 4:
246
+ for (s=0; s<symbolCount; ++s) {
247
+ U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
248
+ MEM_write64(dt + uStart, D4);
249
+ uStart += 4;
250
+ }
251
+ break;
252
+ case 8:
253
+ for (s=0; s<symbolCount; ++s) {
254
+ U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
255
+ MEM_write64(dt + uStart, D4);
256
+ MEM_write64(dt + uStart + 4, D4);
257
+ uStart += 8;
258
+ }
259
+ break;
260
+ default:
261
+ for (s=0; s<symbolCount; ++s) {
262
+ U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
263
+ for (u=0; u < length; u += 16) {
264
+ MEM_write64(dt + uStart + u + 0, D4);
265
+ MEM_write64(dt + uStart + u + 4, D4);
266
+ MEM_write64(dt + uStart + u + 8, D4);
267
+ MEM_write64(dt + uStart + u + 12, D4);
268
+ }
269
+ assert(u == length);
270
+ uStart += length;
271
+ }
272
+ break;
273
+ }
274
+ symbol += symbolCount;
275
+ rankStart += symbolCount * length;
276
+ }
277
+ }
278
+ return iSize;
279
+ }
280
+
281
+ FORCE_INLINE_TEMPLATE BYTE
282
+ HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
283
+ {
284
+ size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
285
+ BYTE const c = dt[val].byte;
286
+ BIT_skipBits(Dstream, dt[val].nbBits);
287
+ return c;
288
+ }
289
+
290
+ #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
291
+ *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
292
+
293
+ #define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \
294
+ if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
295
+ HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
296
+
297
+ #define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
298
+ if (MEM_64bits()) \
299
+ HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
300
+
301
+ HINT_INLINE size_t
302
+ HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
303
+ {
304
+ BYTE* const pStart = p;
305
+
306
+ /* up to 4 symbols at a time */
307
+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
308
+ HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
309
+ HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
310
+ HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
311
+ HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
312
+ }
313
+
314
+ /* [0-3] symbols remaining */
315
+ if (MEM_32bits())
316
+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
317
+ HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
318
+
319
+ /* no more data to retrieve from bitstream, no need to reload */
320
+ while (p < pEnd)
321
+ HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
322
+
323
+ return pEnd-pStart;
324
+ }
325
+
326
+ FORCE_INLINE_TEMPLATE size_t
327
+ HUF_decompress1X1_usingDTable_internal_body(
328
+ void* dst, size_t dstSize,
329
+ const void* cSrc, size_t cSrcSize,
330
+ const HUF_DTable* DTable)
331
+ {
332
+ BYTE* op = (BYTE*)dst;
333
+ BYTE* const oend = op + dstSize;
334
+ const void* dtPtr = DTable + 1;
335
+ const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
336
+ BIT_DStream_t bitD;
337
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
338
+ U32 const dtLog = dtd.tableLog;
339
+
340
+ CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
341
+
342
+ HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog);
343
+
344
+ if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
345
+
346
+ return dstSize;
347
+ }
348
+
349
+ FORCE_INLINE_TEMPLATE size_t
350
+ HUF_decompress4X1_usingDTable_internal_body(
351
+ void* dst, size_t dstSize,
352
+ const void* cSrc, size_t cSrcSize,
353
+ const HUF_DTable* DTable)
354
+ {
355
+ /* Check */
356
+ if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
357
+
358
+ { const BYTE* const istart = (const BYTE*) cSrc;
359
+ BYTE* const ostart = (BYTE*) dst;
360
+ BYTE* const oend = ostart + dstSize;
361
+ BYTE* const olimit = oend - 3;
362
+ const void* const dtPtr = DTable + 1;
363
+ const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
364
+
365
+ /* Init */
366
+ BIT_DStream_t bitD1;
367
+ BIT_DStream_t bitD2;
368
+ BIT_DStream_t bitD3;
369
+ BIT_DStream_t bitD4;
370
+ size_t const length1 = MEM_readLE16(istart);
371
+ size_t const length2 = MEM_readLE16(istart+2);
372
+ size_t const length3 = MEM_readLE16(istart+4);
373
+ size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
374
+ const BYTE* const istart1 = istart + 6; /* jumpTable */
375
+ const BYTE* const istart2 = istart1 + length1;
376
+ const BYTE* const istart3 = istart2 + length2;
377
+ const BYTE* const istart4 = istart3 + length3;
378
+ const size_t segmentSize = (dstSize+3) / 4;
379
+ BYTE* const opStart2 = ostart + segmentSize;
380
+ BYTE* const opStart3 = opStart2 + segmentSize;
381
+ BYTE* const opStart4 = opStart3 + segmentSize;
382
+ BYTE* op1 = ostart;
383
+ BYTE* op2 = opStart2;
384
+ BYTE* op3 = opStart3;
385
+ BYTE* op4 = opStart4;
386
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
387
+ U32 const dtLog = dtd.tableLog;
388
+ U32 endSignal = 1;
389
+
390
+ if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
391
+ CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
392
+ CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
393
+ CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
394
+ CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
395
+
396
+ /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
397
+ for ( ; (endSignal) & (op4 < olimit) ; ) {
398
+ HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
399
+ HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
400
+ HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
401
+ HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
402
+ HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
403
+ HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
404
+ HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
405
+ HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
406
+ HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
407
+ HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
408
+ HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
409
+ HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
410
+ HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
411
+ HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
412
+ HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
413
+ HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
414
+ endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
415
+ endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
416
+ endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
417
+ endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
418
+ }
419
+
420
+ /* check corruption */
421
+ /* note : should not be necessary : op# advance in lock step, and we control op4.
422
+ * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */
423
+ if (op1 > opStart2) return ERROR(corruption_detected);
424
+ if (op2 > opStart3) return ERROR(corruption_detected);
425
+ if (op3 > opStart4) return ERROR(corruption_detected);
426
+ /* note : op4 supposed already verified within main loop */
427
+
428
+ /* finish bitStreams one by one */
429
+ HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog);
430
+ HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog);
431
+ HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog);
432
+ HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog);
433
+
434
+ /* check */
435
+ { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
436
+ if (!endCheck) return ERROR(corruption_detected); }
437
+
438
+ /* decoded size */
439
+ return dstSize;
440
+ }
441
+ }
442
+
443
+
444
+ typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
445
+ const void *cSrc,
446
+ size_t cSrcSize,
447
+ const HUF_DTable *DTable);
448
+
449
+ HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
450
+ HUF_DGEN(HUF_decompress4X1_usingDTable_internal)
451
+
452
+
453
+
454
+ size_t HUF_decompress1X1_usingDTable(
455
+ void* dst, size_t dstSize,
456
+ const void* cSrc, size_t cSrcSize,
457
+ const HUF_DTable* DTable)
458
+ {
459
+ DTableDesc dtd = HUF_getDTableDesc(DTable);
460
+ if (dtd.tableType != 0) return ERROR(GENERIC);
461
+ return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
462
+ }
463
+
464
+ size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
465
+ const void* cSrc, size_t cSrcSize,
466
+ void* workSpace, size_t wkspSize)
467
+ {
468
+ const BYTE* ip = (const BYTE*) cSrc;
469
+
470
+ size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
471
+ if (HUF_isError(hSize)) return hSize;
472
+ if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
473
+ ip += hSize; cSrcSize -= hSize;
474
+
475
+ return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
476
+ }
477
+
478
+
479
+ size_t HUF_decompress4X1_usingDTable(
480
+ void* dst, size_t dstSize,
481
+ const void* cSrc, size_t cSrcSize,
482
+ const HUF_DTable* DTable)
483
+ {
484
+ DTableDesc dtd = HUF_getDTableDesc(DTable);
485
+ if (dtd.tableType != 0) return ERROR(GENERIC);
486
+ return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
487
+ }
488
+
489
+ static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
490
+ const void* cSrc, size_t cSrcSize,
491
+ void* workSpace, size_t wkspSize, int bmi2)
492
+ {
493
+ const BYTE* ip = (const BYTE*) cSrc;
494
+
495
+ size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
496
+ if (HUF_isError(hSize)) return hSize;
497
+ if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
498
+ ip += hSize; cSrcSize -= hSize;
499
+
500
+ return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
501
+ }
502
+
503
+ size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
504
+ const void* cSrc, size_t cSrcSize,
505
+ void* workSpace, size_t wkspSize)
506
+ {
507
+ return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
508
+ }
509
+
510
+
511
+ #endif /* HUF_FORCE_DECOMPRESS_X2 */
512
+
513
+
514
+ #ifndef HUF_FORCE_DECOMPRESS_X1
515
+
516
+ /* *************************/
517
+ /* double-symbols decoding */
518
+ /* *************************/
519
+
520
+ typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */
521
+ typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
522
+ typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
523
+ typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
524
+
525
+
526
+ /* HUF_fillDTableX2Level2() :
527
+ * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
528
+ static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed,
529
+ const U32* rankValOrigin, const int minWeight,
530
+ const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
531
+ U32 nbBitsBaseline, U16 baseSeq)
532
+ {
533
+ HUF_DEltX2 DElt;
534
+ U32 rankVal[HUF_TABLELOG_MAX + 1];
535
+
536
+ /* get pre-calculated rankVal */
537
+ ZSTD_memcpy(rankVal, rankValOrigin, sizeof(rankVal));
538
+
539
+ /* fill skipped values */
540
+ if (minWeight>1) {
541
+ U32 i, skipSize = rankVal[minWeight];
542
+ MEM_writeLE16(&(DElt.sequence), baseSeq);
543
+ DElt.nbBits = (BYTE)(consumed);
544
+ DElt.length = 1;
545
+ for (i = 0; i < skipSize; i++)
546
+ DTable[i] = DElt;
547
+ }
548
+
549
+ /* fill DTable */
550
+ { U32 s; for (s=0; s<sortedListSize; s++) { /* note : sortedSymbols already skipped */
551
+ const U32 symbol = sortedSymbols[s].symbol;
552
+ const U32 weight = sortedSymbols[s].weight;
553
+ const U32 nbBits = nbBitsBaseline - weight;
554
+ const U32 length = 1 << (sizeLog-nbBits);
555
+ const U32 start = rankVal[weight];
556
+ U32 i = start;
557
+ const U32 end = start + length;
558
+
559
+ MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
560
+ DElt.nbBits = (BYTE)(nbBits + consumed);
561
+ DElt.length = 2;
562
+ do { DTable[i++] = DElt; } while (i<end); /* since length >= 1 */
563
+
564
+ rankVal[weight] += length;
565
+ } }
566
+ }
567
+
568
+
569
+ static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
570
+ const sortedSymbol_t* sortedList, const U32 sortedListSize,
571
+ const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
572
+ const U32 nbBitsBaseline)
573
+ {
574
+ U32 rankVal[HUF_TABLELOG_MAX + 1];
575
+ const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */
576
+ const U32 minBits = nbBitsBaseline - maxWeight;
577
+ U32 s;
578
+
579
+ ZSTD_memcpy(rankVal, rankValOrigin, sizeof(rankVal));
580
+
581
+ /* fill DTable */
582
+ for (s=0; s<sortedListSize; s++) {
583
+ const U16 symbol = sortedList[s].symbol;
584
+ const U32 weight = sortedList[s].weight;
585
+ const U32 nbBits = nbBitsBaseline - weight;
586
+ const U32 start = rankVal[weight];
587
+ const U32 length = 1 << (targetLog-nbBits);
588
+
589
+ if (targetLog-nbBits >= minBits) { /* enough room for a second symbol */
590
+ U32 sortedRank;
591
+ int minWeight = nbBits + scaleLog;
592
+ if (minWeight < 1) minWeight = 1;
593
+ sortedRank = rankStart[minWeight];
594
+ HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits,
595
+ rankValOrigin[nbBits], minWeight,
596
+ sortedList+sortedRank, sortedListSize-sortedRank,
597
+ nbBitsBaseline, symbol);
598
+ } else {
599
+ HUF_DEltX2 DElt;
600
+ MEM_writeLE16(&(DElt.sequence), symbol);
601
+ DElt.nbBits = (BYTE)(nbBits);
602
+ DElt.length = 1;
603
+ { U32 const end = start + length;
604
+ U32 u;
605
+ for (u = start; u < end; u++) DTable[u] = DElt;
606
+ } }
607
+ rankVal[weight] += length;
608
+ }
609
+ }
610
+
611
+ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
612
+ const void* src, size_t srcSize,
613
+ void* workSpace, size_t wkspSize)
614
+ {
615
+ U32 tableLog, maxW, sizeOfSort, nbSymbols;
616
+ DTableDesc dtd = HUF_getDTableDesc(DTable);
617
+ U32 const maxTableLog = dtd.maxTableLog;
618
+ size_t iSize;
619
+ void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */
620
+ HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
621
+ U32 *rankStart;
622
+
623
+ rankValCol_t* rankVal;
624
+ U32* rankStats;
625
+ U32* rankStart0;
626
+ sortedSymbol_t* sortedSymbol;
627
+ BYTE* weightList;
628
+ size_t spaceUsed32 = 0;
629
+
630
+ rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32);
631
+ spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2;
632
+ rankStats = (U32 *)workSpace + spaceUsed32;
633
+ spaceUsed32 += HUF_TABLELOG_MAX + 1;
634
+ rankStart0 = (U32 *)workSpace + spaceUsed32;
635
+ spaceUsed32 += HUF_TABLELOG_MAX + 2;
636
+ sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t);
637
+ spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2;
638
+ weightList = (BYTE *)((U32 *)workSpace + spaceUsed32);
639
+ spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
640
+
641
+ if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
642
+
643
+ rankStart = rankStart0 + 1;
644
+ ZSTD_memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
645
+
646
+ DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
647
+ if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
648
+ /* ZSTD_memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
649
+
650
+ iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
651
+ if (HUF_isError(iSize)) return iSize;
652
+
653
+ /* check result */
654
+ if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */
655
+
656
+ /* find maxWeight */
657
+ for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */
658
+
659
+ /* Get start index of each weight */
660
+ { U32 w, nextRankStart = 0;
661
+ for (w=1; w<maxW+1; w++) {
662
+ U32 curr = nextRankStart;
663
+ nextRankStart += rankStats[w];
664
+ rankStart[w] = curr;
665
+ }
666
+ rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/
667
+ sizeOfSort = nextRankStart;
668
+ }
669
+
670
+ /* sort symbols by weight */
671
+ { U32 s;
672
+ for (s=0; s<nbSymbols; s++) {
673
+ U32 const w = weightList[s];
674
+ U32 const r = rankStart[w]++;
675
+ sortedSymbol[r].symbol = (BYTE)s;
676
+ sortedSymbol[r].weight = (BYTE)w;
677
+ }
678
+ rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
679
+ }
680
+
681
+ /* Build rankVal */
682
+ { U32* const rankVal0 = rankVal[0];
683
+ { int const rescale = (maxTableLog-tableLog) - 1; /* tableLog <= maxTableLog */
684
+ U32 nextRankVal = 0;
685
+ U32 w;
686
+ for (w=1; w<maxW+1; w++) {
687
+ U32 curr = nextRankVal;
688
+ nextRankVal += rankStats[w] << (w+rescale);
689
+ rankVal0[w] = curr;
690
+ } }
691
+ { U32 const minBits = tableLog+1 - maxW;
692
+ U32 consumed;
693
+ for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
694
+ U32* const rankValPtr = rankVal[consumed];
695
+ U32 w;
696
+ for (w = 1; w < maxW+1; w++) {
697
+ rankValPtr[w] = rankVal0[w] >> consumed;
698
+ } } } }
699
+
700
+ HUF_fillDTableX2(dt, maxTableLog,
701
+ sortedSymbol, sizeOfSort,
702
+ rankStart0, rankVal, maxW,
703
+ tableLog+1);
704
+
705
+ dtd.tableLog = (BYTE)maxTableLog;
706
+ dtd.tableType = 1;
707
+ ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
708
+ return iSize;
709
+ }
710
+
711
+
712
+ FORCE_INLINE_TEMPLATE U32
713
+ HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
714
+ {
715
+ size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
716
+ ZSTD_memcpy(op, dt+val, 2);
717
+ BIT_skipBits(DStream, dt[val].nbBits);
718
+ return dt[val].length;
719
+ }
720
+
721
+ FORCE_INLINE_TEMPLATE U32
722
+ HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
723
+ {
724
+ size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
725
+ ZSTD_memcpy(op, dt+val, 1);
726
+ if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
727
+ else {
728
+ if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
729
+ BIT_skipBits(DStream, dt[val].nbBits);
730
+ if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
731
+ /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
732
+ DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
733
+ } }
734
+ return 1;
735
+ }
736
+
737
+ #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
738
+ ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
739
+
740
+ #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
741
+ if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
742
+ ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
743
+
744
+ #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
745
+ if (MEM_64bits()) \
746
+ ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
747
+
748
+ HINT_INLINE size_t
749
+ HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
750
+ const HUF_DEltX2* const dt, const U32 dtLog)
751
+ {
752
+ BYTE* const pStart = p;
753
+
754
+ /* up to 8 symbols at a time */
755
+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
756
+ HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
757
+ HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
758
+ HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
759
+ HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
760
+ }
761
+
762
+ /* closer to end : up to 2 symbols at a time */
763
+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
764
+ HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
765
+
766
+ while (p <= pEnd-2)
767
+ HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
768
+
769
+ if (p < pEnd)
770
+ p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
771
+
772
+ return p-pStart;
773
+ }
774
+
775
+ FORCE_INLINE_TEMPLATE size_t
776
+ HUF_decompress1X2_usingDTable_internal_body(
777
+ void* dst, size_t dstSize,
778
+ const void* cSrc, size_t cSrcSize,
779
+ const HUF_DTable* DTable)
780
+ {
781
+ BIT_DStream_t bitD;
782
+
783
+ /* Init */
784
+ CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
785
+
786
+ /* decode */
787
+ { BYTE* const ostart = (BYTE*) dst;
788
+ BYTE* const oend = ostart + dstSize;
789
+ const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
790
+ const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
791
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
792
+ HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog);
793
+ }
794
+
795
+ /* check */
796
+ if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
797
+
798
+ /* decoded size */
799
+ return dstSize;
800
+ }
801
+
802
+ FORCE_INLINE_TEMPLATE size_t
803
+ HUF_decompress4X2_usingDTable_internal_body(
804
+ void* dst, size_t dstSize,
805
+ const void* cSrc, size_t cSrcSize,
806
+ const HUF_DTable* DTable)
807
+ {
808
+ if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
809
+
810
+ { const BYTE* const istart = (const BYTE*) cSrc;
811
+ BYTE* const ostart = (BYTE*) dst;
812
+ BYTE* const oend = ostart + dstSize;
813
+ BYTE* const olimit = oend - (sizeof(size_t)-1);
814
+ const void* const dtPtr = DTable+1;
815
+ const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
816
+
817
+ /* Init */
818
+ BIT_DStream_t bitD1;
819
+ BIT_DStream_t bitD2;
820
+ BIT_DStream_t bitD3;
821
+ BIT_DStream_t bitD4;
822
+ size_t const length1 = MEM_readLE16(istart);
823
+ size_t const length2 = MEM_readLE16(istart+2);
824
+ size_t const length3 = MEM_readLE16(istart+4);
825
+ size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
826
+ const BYTE* const istart1 = istart + 6; /* jumpTable */
827
+ const BYTE* const istart2 = istart1 + length1;
828
+ const BYTE* const istart3 = istart2 + length2;
829
+ const BYTE* const istart4 = istart3 + length3;
830
+ size_t const segmentSize = (dstSize+3) / 4;
831
+ BYTE* const opStart2 = ostart + segmentSize;
832
+ BYTE* const opStart3 = opStart2 + segmentSize;
833
+ BYTE* const opStart4 = opStart3 + segmentSize;
834
+ BYTE* op1 = ostart;
835
+ BYTE* op2 = opStart2;
836
+ BYTE* op3 = opStart3;
837
+ BYTE* op4 = opStart4;
838
+ U32 endSignal = 1;
839
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
840
+ U32 const dtLog = dtd.tableLog;
841
+
842
+ if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
843
+ CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
844
+ CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
845
+ CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
846
+ CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
847
+
848
+ /* 16-32 symbols per loop (4-8 symbols per stream) */
849
+ for ( ; (endSignal) & (op4 < olimit); ) {
850
+ #if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
851
+ HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
852
+ HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
853
+ HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
854
+ HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
855
+ HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
856
+ HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
857
+ HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
858
+ HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
859
+ endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
860
+ endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
861
+ HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
862
+ HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
863
+ HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
864
+ HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
865
+ HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
866
+ HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
867
+ HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
868
+ HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
869
+ endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
870
+ endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
871
+ #else
872
+ HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
873
+ HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
874
+ HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
875
+ HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
876
+ HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
877
+ HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
878
+ HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
879
+ HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
880
+ HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
881
+ HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
882
+ HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
883
+ HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
884
+ HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
885
+ HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
886
+ HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
887
+ HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
888
+ endSignal = (U32)LIKELY(
889
+ (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
890
+ & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
891
+ & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
892
+ & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
893
+ #endif
894
+ }
895
+
896
+ /* check corruption */
897
+ if (op1 > opStart2) return ERROR(corruption_detected);
898
+ if (op2 > opStart3) return ERROR(corruption_detected);
899
+ if (op3 > opStart4) return ERROR(corruption_detected);
900
+ /* note : op4 already verified within main loop */
901
+
902
+ /* finish bitStreams one by one */
903
+ HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
904
+ HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
905
+ HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
906
+ HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
907
+
908
+ /* check */
909
+ { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
910
+ if (!endCheck) return ERROR(corruption_detected); }
911
+
912
+ /* decoded size */
913
+ return dstSize;
914
+ }
915
+ }
916
+
917
+ HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
918
+ HUF_DGEN(HUF_decompress4X2_usingDTable_internal)
919
+
920
+ size_t HUF_decompress1X2_usingDTable(
921
+ void* dst, size_t dstSize,
922
+ const void* cSrc, size_t cSrcSize,
923
+ const HUF_DTable* DTable)
924
+ {
925
+ DTableDesc dtd = HUF_getDTableDesc(DTable);
926
+ if (dtd.tableType != 1) return ERROR(GENERIC);
927
+ return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
928
+ }
929
+
930
+ size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
931
+ const void* cSrc, size_t cSrcSize,
932
+ void* workSpace, size_t wkspSize)
933
+ {
934
+ const BYTE* ip = (const BYTE*) cSrc;
935
+
936
+ size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
937
+ workSpace, wkspSize);
938
+ if (HUF_isError(hSize)) return hSize;
939
+ if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
940
+ ip += hSize; cSrcSize -= hSize;
941
+
942
+ return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
943
+ }
944
+
945
+
946
+ size_t HUF_decompress4X2_usingDTable(
947
+ void* dst, size_t dstSize,
948
+ const void* cSrc, size_t cSrcSize,
949
+ const HUF_DTable* DTable)
950
+ {
951
+ DTableDesc dtd = HUF_getDTableDesc(DTable);
952
+ if (dtd.tableType != 1) return ERROR(GENERIC);
953
+ return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
954
+ }
955
+
956
+ static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
957
+ const void* cSrc, size_t cSrcSize,
958
+ void* workSpace, size_t wkspSize, int bmi2)
959
+ {
960
+ const BYTE* ip = (const BYTE*) cSrc;
961
+
962
+ size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
963
+ workSpace, wkspSize);
964
+ if (HUF_isError(hSize)) return hSize;
965
+ if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
966
+ ip += hSize; cSrcSize -= hSize;
967
+
968
+ return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
969
+ }
970
+
971
+ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
972
+ const void* cSrc, size_t cSrcSize,
973
+ void* workSpace, size_t wkspSize)
974
+ {
975
+ return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
976
+ }
977
+
978
+
979
+ #endif /* HUF_FORCE_DECOMPRESS_X1 */
980
+
981
+
982
+ /* ***********************************/
983
+ /* Universal decompression selectors */
984
+ /* ***********************************/
985
+
986
+ size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
987
+ const void* cSrc, size_t cSrcSize,
988
+ const HUF_DTable* DTable)
989
+ {
990
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
991
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
992
+ (void)dtd;
993
+ assert(dtd.tableType == 0);
994
+ return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
995
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
996
+ (void)dtd;
997
+ assert(dtd.tableType == 1);
998
+ return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
999
+ #else
1000
+ return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
1001
+ HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1002
+ #endif
1003
+ }
1004
+
1005
+ size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
1006
+ const void* cSrc, size_t cSrcSize,
1007
+ const HUF_DTable* DTable)
1008
+ {
1009
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
1010
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1011
+ (void)dtd;
1012
+ assert(dtd.tableType == 0);
1013
+ return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1014
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1015
+ (void)dtd;
1016
+ assert(dtd.tableType == 1);
1017
+ return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1018
+ #else
1019
+ return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
1020
+ HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1021
+ #endif
1022
+ }
1023
+
1024
+
1025
+ #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
1026
+ typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
1027
+ static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
1028
+ {
1029
+ /* single, double, quad */
1030
+ {{0,0}, {1,1}, {2,2}}, /* Q==0 : impossible */
1031
+ {{0,0}, {1,1}, {2,2}}, /* Q==1 : impossible */
1032
+ {{ 38,130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */
1033
+ {{ 448,128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */
1034
+ {{ 556,128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */
1035
+ {{ 714,128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */
1036
+ {{ 883,128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */
1037
+ {{ 897,128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */
1038
+ {{ 926,128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */
1039
+ {{ 947,128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */
1040
+ {{1107,128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */
1041
+ {{1177,128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */
1042
+ {{1242,128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */
1043
+ {{1349,128}, {2644,106}, {5260,106}}, /* Q ==13 : 81-87% */
1044
+ {{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */
1045
+ {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */
1046
+ };
1047
+ #endif
1048
+
1049
+ /** HUF_selectDecoder() :
1050
+ * Tells which decoder is likely to decode faster,
1051
+ * based on a set of pre-computed metrics.
1052
+ * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
1053
+ * Assumption : 0 < dstSize <= 128 KB */
1054
+ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
1055
+ {
1056
+ assert(dstSize > 0);
1057
+ assert(dstSize <= 128*1024);
1058
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1059
+ (void)dstSize;
1060
+ (void)cSrcSize;
1061
+ return 0;
1062
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1063
+ (void)dstSize;
1064
+ (void)cSrcSize;
1065
+ return 1;
1066
+ #else
1067
+ /* decoder timing evaluation */
1068
+ { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */
1069
+ U32 const D256 = (U32)(dstSize >> 8);
1070
+ U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
1071
+ U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
1072
+ DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */
1073
+ return DTime1 < DTime0;
1074
+ }
1075
+ #endif
1076
+ }
1077
+
1078
+
1079
+ size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
1080
+ size_t dstSize, const void* cSrc,
1081
+ size_t cSrcSize, void* workSpace,
1082
+ size_t wkspSize)
1083
+ {
1084
+ /* validation checks */
1085
+ if (dstSize == 0) return ERROR(dstSize_tooSmall);
1086
+ if (cSrcSize == 0) return ERROR(corruption_detected);
1087
+
1088
+ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1089
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1090
+ (void)algoNb;
1091
+ assert(algoNb == 0);
1092
+ return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1093
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1094
+ (void)algoNb;
1095
+ assert(algoNb == 1);
1096
+ return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1097
+ #else
1098
+ return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1099
+ cSrcSize, workSpace, wkspSize):
1100
+ HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1101
+ #endif
1102
+ }
1103
+ }
1104
+
1105
+ size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
1106
+ const void* cSrc, size_t cSrcSize,
1107
+ void* workSpace, size_t wkspSize)
1108
+ {
1109
+ /* validation checks */
1110
+ if (dstSize == 0) return ERROR(dstSize_tooSmall);
1111
+ if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1112
+ if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1113
+ if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1114
+
1115
+ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1116
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1117
+ (void)algoNb;
1118
+ assert(algoNb == 0);
1119
+ return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1120
+ cSrcSize, workSpace, wkspSize);
1121
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1122
+ (void)algoNb;
1123
+ assert(algoNb == 1);
1124
+ return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1125
+ cSrcSize, workSpace, wkspSize);
1126
+ #else
1127
+ return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1128
+ cSrcSize, workSpace, wkspSize):
1129
+ HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1130
+ cSrcSize, workSpace, wkspSize);
1131
+ #endif
1132
+ }
1133
+ }
1134
+
1135
+
1136
+ size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1137
+ {
1138
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
1139
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1140
+ (void)dtd;
1141
+ assert(dtd.tableType == 0);
1142
+ return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1143
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1144
+ (void)dtd;
1145
+ assert(dtd.tableType == 1);
1146
+ return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1147
+ #else
1148
+ return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1149
+ HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1150
+ #endif
1151
+ }
1152
+
1153
+ #ifndef HUF_FORCE_DECOMPRESS_X2
1154
+ size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1155
+ {
1156
+ const BYTE* ip = (const BYTE*) cSrc;
1157
+
1158
+ size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1159
+ if (HUF_isError(hSize)) return hSize;
1160
+ if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
1161
+ ip += hSize; cSrcSize -= hSize;
1162
+
1163
+ return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
1164
+ }
1165
+ #endif
1166
+
1167
+ size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1168
+ {
1169
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
1170
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1171
+ (void)dtd;
1172
+ assert(dtd.tableType == 0);
1173
+ return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1174
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1175
+ (void)dtd;
1176
+ assert(dtd.tableType == 1);
1177
+ return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1178
+ #else
1179
+ return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1180
+ HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1181
+ #endif
1182
+ }
1183
+
1184
+ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1185
+ {
1186
+ /* validation checks */
1187
+ if (dstSize == 0) return ERROR(dstSize_tooSmall);
1188
+ if (cSrcSize == 0) return ERROR(corruption_detected);
1189
+
1190
+ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1191
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1192
+ (void)algoNb;
1193
+ assert(algoNb == 0);
1194
+ return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1195
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1196
+ (void)algoNb;
1197
+ assert(algoNb == 1);
1198
+ return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1199
+ #else
1200
+ return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
1201
+ HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1202
+ #endif
1203
+ }
1204
+ }
1205
+
1206
+ #ifndef ZSTD_NO_UNUSED_FUNCTIONS
1207
+ #ifndef HUF_FORCE_DECOMPRESS_X2
1208
+ size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
1209
+ {
1210
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1211
+ return HUF_readDTableX1_wksp(DTable, src, srcSize,
1212
+ workSpace, sizeof(workSpace));
1213
+ }
1214
+
1215
+ size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
1216
+ const void* cSrc, size_t cSrcSize)
1217
+ {
1218
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1219
+ return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
1220
+ workSpace, sizeof(workSpace));
1221
+ }
1222
+
1223
+ size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1224
+ {
1225
+ HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
1226
+ return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
1227
+ }
1228
+ #endif
1229
+
1230
+ #ifndef HUF_FORCE_DECOMPRESS_X1
1231
+ size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
1232
+ {
1233
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1234
+ return HUF_readDTableX2_wksp(DTable, src, srcSize,
1235
+ workSpace, sizeof(workSpace));
1236
+ }
1237
+
1238
+ size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
1239
+ const void* cSrc, size_t cSrcSize)
1240
+ {
1241
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1242
+ return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
1243
+ workSpace, sizeof(workSpace));
1244
+ }
1245
+
1246
+ size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1247
+ {
1248
+ HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
1249
+ return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
1250
+ }
1251
+ #endif
1252
+
1253
+ #ifndef HUF_FORCE_DECOMPRESS_X2
1254
+ size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1255
+ {
1256
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1257
+ return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1258
+ workSpace, sizeof(workSpace));
1259
+ }
1260
+ size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1261
+ {
1262
+ HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
1263
+ return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
1264
+ }
1265
+ #endif
1266
+
1267
+ #ifndef HUF_FORCE_DECOMPRESS_X1
1268
+ size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
1269
+ const void* cSrc, size_t cSrcSize)
1270
+ {
1271
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1272
+ return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1273
+ workSpace, sizeof(workSpace));
1274
+ }
1275
+
1276
+ size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1277
+ {
1278
+ HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
1279
+ return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
1280
+ }
1281
+ #endif
1282
+
1283
+ typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
1284
+
1285
+ size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1286
+ {
1287
+ #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
1288
+ static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
1289
+ #endif
1290
+
1291
+ /* validation checks */
1292
+ if (dstSize == 0) return ERROR(dstSize_tooSmall);
1293
+ if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1294
+ if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1295
+ if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1296
+
1297
+ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1298
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1299
+ (void)algoNb;
1300
+ assert(algoNb == 0);
1301
+ return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize);
1302
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1303
+ (void)algoNb;
1304
+ assert(algoNb == 1);
1305
+ return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);
1306
+ #else
1307
+ return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
1308
+ #endif
1309
+ }
1310
+ }
1311
+
1312
+ size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1313
+ {
1314
+ /* validation checks */
1315
+ if (dstSize == 0) return ERROR(dstSize_tooSmall);
1316
+ if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1317
+ if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1318
+ if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1319
+
1320
+ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1321
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1322
+ (void)algoNb;
1323
+ assert(algoNb == 0);
1324
+ return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
1325
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1326
+ (void)algoNb;
1327
+ assert(algoNb == 1);
1328
+ return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
1329
+ #else
1330
+ return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
1331
+ HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
1332
+ #endif
1333
+ }
1334
+ }
1335
+
1336
+ size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1337
+ {
1338
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1339
+ return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1340
+ workSpace, sizeof(workSpace));
1341
+ }
1342
+
1343
+ size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
1344
+ const void* cSrc, size_t cSrcSize)
1345
+ {
1346
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1347
+ return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1348
+ workSpace, sizeof(workSpace));
1349
+ }
1350
+ #endif