extzstd 0.3.2 → 0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +4 -3
  3. data/contrib/zstd/CHANGELOG +225 -1
  4. data/contrib/zstd/CONTRIBUTING.md +158 -75
  5. data/contrib/zstd/LICENSE +4 -4
  6. data/contrib/zstd/Makefile +106 -69
  7. data/contrib/zstd/Package.swift +36 -0
  8. data/contrib/zstd/README.md +64 -36
  9. data/contrib/zstd/SECURITY.md +15 -0
  10. data/contrib/zstd/TESTING.md +2 -3
  11. data/contrib/zstd/lib/BUCK +5 -7
  12. data/contrib/zstd/lib/Makefile +117 -199
  13. data/contrib/zstd/lib/README.md +37 -7
  14. data/contrib/zstd/lib/common/allocations.h +55 -0
  15. data/contrib/zstd/lib/common/bits.h +200 -0
  16. data/contrib/zstd/lib/common/bitstream.h +80 -86
  17. data/contrib/zstd/lib/common/compiler.h +225 -63
  18. data/contrib/zstd/lib/common/cpu.h +37 -1
  19. data/contrib/zstd/lib/common/debug.c +7 -1
  20. data/contrib/zstd/lib/common/debug.h +21 -12
  21. data/contrib/zstd/lib/common/entropy_common.c +15 -37
  22. data/contrib/zstd/lib/common/error_private.c +9 -2
  23. data/contrib/zstd/lib/common/error_private.h +93 -5
  24. data/contrib/zstd/lib/common/fse.h +12 -87
  25. data/contrib/zstd/lib/common/fse_decompress.c +37 -117
  26. data/contrib/zstd/lib/common/huf.h +97 -172
  27. data/contrib/zstd/lib/common/mem.h +58 -58
  28. data/contrib/zstd/lib/common/pool.c +38 -17
  29. data/contrib/zstd/lib/common/pool.h +10 -4
  30. data/contrib/zstd/lib/common/portability_macros.h +158 -0
  31. data/contrib/zstd/lib/common/threading.c +74 -14
  32. data/contrib/zstd/lib/common/threading.h +5 -10
  33. data/contrib/zstd/lib/common/xxhash.c +6 -814
  34. data/contrib/zstd/lib/common/xxhash.h +6930 -195
  35. data/contrib/zstd/lib/common/zstd_common.c +1 -36
  36. data/contrib/zstd/lib/common/zstd_deps.h +1 -1
  37. data/contrib/zstd/lib/common/zstd_internal.h +68 -154
  38. data/contrib/zstd/lib/common/zstd_trace.h +163 -0
  39. data/contrib/zstd/lib/compress/clevels.h +134 -0
  40. data/contrib/zstd/lib/compress/fse_compress.c +75 -155
  41. data/contrib/zstd/lib/compress/hist.c +1 -1
  42. data/contrib/zstd/lib/compress/hist.h +1 -1
  43. data/contrib/zstd/lib/compress/huf_compress.c +810 -259
  44. data/contrib/zstd/lib/compress/zstd_compress.c +2864 -919
  45. data/contrib/zstd/lib/compress/zstd_compress_internal.h +523 -192
  46. data/contrib/zstd/lib/compress/zstd_compress_literals.c +117 -40
  47. data/contrib/zstd/lib/compress/zstd_compress_literals.h +16 -6
  48. data/contrib/zstd/lib/compress/zstd_compress_sequences.c +28 -19
  49. data/contrib/zstd/lib/compress/zstd_compress_sequences.h +1 -1
  50. data/contrib/zstd/lib/compress/zstd_compress_superblock.c +251 -412
  51. data/contrib/zstd/lib/compress/zstd_compress_superblock.h +1 -1
  52. data/contrib/zstd/lib/compress/zstd_cwksp.h +284 -97
  53. data/contrib/zstd/lib/compress/zstd_double_fast.c +382 -133
  54. data/contrib/zstd/lib/compress/zstd_double_fast.h +14 -2
  55. data/contrib/zstd/lib/compress/zstd_fast.c +732 -260
  56. data/contrib/zstd/lib/compress/zstd_fast.h +3 -2
  57. data/contrib/zstd/lib/compress/zstd_lazy.c +1177 -390
  58. data/contrib/zstd/lib/compress/zstd_lazy.h +129 -14
  59. data/contrib/zstd/lib/compress/zstd_ldm.c +280 -210
  60. data/contrib/zstd/lib/compress/zstd_ldm.h +3 -2
  61. data/contrib/zstd/lib/compress/zstd_ldm_geartab.h +106 -0
  62. data/contrib/zstd/lib/compress/zstd_opt.c +516 -285
  63. data/contrib/zstd/lib/compress/zstd_opt.h +32 -8
  64. data/contrib/zstd/lib/compress/zstdmt_compress.c +202 -131
  65. data/contrib/zstd/lib/compress/zstdmt_compress.h +9 -6
  66. data/contrib/zstd/lib/decompress/huf_decompress.c +1149 -555
  67. data/contrib/zstd/lib/decompress/huf_decompress_amd64.S +595 -0
  68. data/contrib/zstd/lib/decompress/zstd_ddict.c +4 -4
  69. data/contrib/zstd/lib/decompress/zstd_ddict.h +1 -1
  70. data/contrib/zstd/lib/decompress/zstd_decompress.c +583 -106
  71. data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1054 -379
  72. data/contrib/zstd/lib/decompress/zstd_decompress_block.h +14 -3
  73. data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +56 -6
  74. data/contrib/zstd/lib/deprecated/zbuff.h +1 -1
  75. data/contrib/zstd/lib/deprecated/zbuff_common.c +1 -1
  76. data/contrib/zstd/lib/deprecated/zbuff_compress.c +24 -4
  77. data/contrib/zstd/lib/deprecated/zbuff_decompress.c +3 -1
  78. data/contrib/zstd/lib/dictBuilder/cover.c +60 -44
  79. data/contrib/zstd/lib/dictBuilder/cover.h +6 -11
  80. data/contrib/zstd/lib/dictBuilder/divsufsort.c +1 -1
  81. data/contrib/zstd/lib/dictBuilder/fastcover.c +26 -18
  82. data/contrib/zstd/lib/dictBuilder/zdict.c +100 -101
  83. data/contrib/zstd/lib/legacy/zstd_legacy.h +38 -1
  84. data/contrib/zstd/lib/legacy/zstd_v01.c +18 -53
  85. data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
  86. data/contrib/zstd/lib/legacy/zstd_v02.c +28 -85
  87. data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
  88. data/contrib/zstd/lib/legacy/zstd_v03.c +29 -88
  89. data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
  90. data/contrib/zstd/lib/legacy/zstd_v04.c +27 -80
  91. data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
  92. data/contrib/zstd/lib/legacy/zstd_v05.c +36 -85
  93. data/contrib/zstd/lib/legacy/zstd_v05.h +1 -1
  94. data/contrib/zstd/lib/legacy/zstd_v06.c +44 -96
  95. data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
  96. data/contrib/zstd/lib/legacy/zstd_v07.c +37 -92
  97. data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
  98. data/contrib/zstd/lib/libzstd.mk +237 -0
  99. data/contrib/zstd/lib/libzstd.pc.in +4 -3
  100. data/contrib/zstd/lib/module.modulemap +35 -0
  101. data/contrib/zstd/lib/{dictBuilder/zdict.h → zdict.h} +202 -33
  102. data/contrib/zstd/lib/zstd.h +1030 -332
  103. data/contrib/zstd/lib/{common/zstd_errors.h → zstd_errors.h} +27 -8
  104. data/ext/extconf.rb +26 -7
  105. data/ext/extzstd.c +51 -24
  106. data/ext/extzstd.h +33 -6
  107. data/ext/extzstd_stream.c +74 -31
  108. data/ext/libzstd_conf.h +0 -1
  109. data/ext/zstd_decompress_asm.S +1 -0
  110. metadata +17 -7
  111. data/contrib/zstd/appveyor.yml +0 -292
  112. data/ext/depend +0 -2
@@ -1,6 +1,6 @@
1
1
  /* ******************************************************************
2
2
  * Huffman encoder, part of New Generation Entropy library
3
- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
3
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
4
4
  *
5
5
  * You can contact the author at :
6
6
  * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -29,9 +29,9 @@
29
29
  #include "hist.h"
30
30
  #define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */
31
31
  #include "../common/fse.h" /* header compression */
32
- #define HUF_STATIC_LINKING_ONLY
33
32
  #include "../common/huf.h"
34
33
  #include "../common/error_private.h"
34
+ #include "../common/bits.h" /* ZSTD_highbit32 */
35
35
 
36
36
 
37
37
  /* **************************************************************
@@ -42,24 +42,111 @@
42
42
 
43
43
 
44
44
  /* **************************************************************
45
- * Utils
45
+ * Required declarations
46
46
  ****************************************************************/
47
- unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
47
+ typedef struct nodeElt_s {
48
+ U32 count;
49
+ U16 parent;
50
+ BYTE byte;
51
+ BYTE nbBits;
52
+ } nodeElt;
53
+
54
+
55
+ /* **************************************************************
56
+ * Debug Traces
57
+ ****************************************************************/
58
+
59
+ #if DEBUGLEVEL >= 2
60
+
61
+ static size_t showU32(const U32* arr, size_t size)
62
+ {
63
+ size_t u;
64
+ for (u=0; u<size; u++) {
65
+ RAWLOG(6, " %u", arr[u]); (void)arr;
66
+ }
67
+ RAWLOG(6, " \n");
68
+ return size;
69
+ }
70
+
71
+ static size_t HUF_getNbBits(HUF_CElt elt);
72
+
73
+ static size_t showCTableBits(const HUF_CElt* ctable, size_t size)
48
74
  {
49
- return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
75
+ size_t u;
76
+ for (u=0; u<size; u++) {
77
+ RAWLOG(6, " %zu", HUF_getNbBits(ctable[u])); (void)ctable;
78
+ }
79
+ RAWLOG(6, " \n");
80
+ return size;
81
+
50
82
  }
51
83
 
84
+ static size_t showHNodeSymbols(const nodeElt* hnode, size_t size)
85
+ {
86
+ size_t u;
87
+ for (u=0; u<size; u++) {
88
+ RAWLOG(6, " %u", hnode[u].byte); (void)hnode;
89
+ }
90
+ RAWLOG(6, " \n");
91
+ return size;
92
+ }
93
+
94
+ static size_t showHNodeBits(const nodeElt* hnode, size_t size)
95
+ {
96
+ size_t u;
97
+ for (u=0; u<size; u++) {
98
+ RAWLOG(6, " %u", hnode[u].nbBits); (void)hnode;
99
+ }
100
+ RAWLOG(6, " \n");
101
+ return size;
102
+ }
103
+
104
+ #endif
105
+
52
106
 
53
107
  /* *******************************************************
54
108
  * HUF : Huffman block compression
55
109
  *********************************************************/
110
+ #define HUF_WORKSPACE_MAX_ALIGNMENT 8
111
+
112
+ static void* HUF_alignUpWorkspace(void* workspace, size_t* workspaceSizePtr, size_t align)
113
+ {
114
+ size_t const mask = align - 1;
115
+ size_t const rem = (size_t)workspace & mask;
116
+ size_t const add = (align - rem) & mask;
117
+ BYTE* const aligned = (BYTE*)workspace + add;
118
+ assert((align & (align - 1)) == 0); /* pow 2 */
119
+ assert(align <= HUF_WORKSPACE_MAX_ALIGNMENT);
120
+ if (*workspaceSizePtr >= add) {
121
+ assert(add < align);
122
+ assert(((size_t)aligned & mask) == 0);
123
+ *workspaceSizePtr -= add;
124
+ return aligned;
125
+ } else {
126
+ *workspaceSizePtr = 0;
127
+ return NULL;
128
+ }
129
+ }
130
+
131
+
56
132
  /* HUF_compressWeights() :
57
133
  * Same as FSE_compress(), but dedicated to huff0's weights compression.
58
134
  * The use case needs much less stack memory.
59
135
  * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
60
136
  */
61
137
  #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
62
- static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize)
138
+
139
+ typedef struct {
140
+ FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
141
+ U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)];
142
+ unsigned count[HUF_TABLELOG_MAX+1];
143
+ S16 norm[HUF_TABLELOG_MAX+1];
144
+ } HUF_CompressWeightsWksp;
145
+
146
+ static size_t
147
+ HUF_compressWeights(void* dst, size_t dstSize,
148
+ const void* weightTable, size_t wtSize,
149
+ void* workspace, size_t workspaceSize)
63
150
  {
64
151
  BYTE* const ostart = (BYTE*) dst;
65
152
  BYTE* op = ostart;
@@ -67,33 +154,30 @@ static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weight
67
154
 
68
155
  unsigned maxSymbolValue = HUF_TABLELOG_MAX;
69
156
  U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
157
+ HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32));
70
158
 
71
- FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
72
- BYTE scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)];
73
-
74
- unsigned count[HUF_TABLELOG_MAX+1];
75
- S16 norm[HUF_TABLELOG_MAX+1];
159
+ if (workspaceSize < sizeof(HUF_CompressWeightsWksp)) return ERROR(GENERIC);
76
160
 
77
161
  /* init conditions */
78
162
  if (wtSize <= 1) return 0; /* Not compressible */
79
163
 
80
164
  /* Scan input and build symbol stats */
81
- { unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize); /* never fails */
165
+ { unsigned const maxCount = HIST_count_simple(wksp->count, &maxSymbolValue, weightTable, wtSize); /* never fails */
82
166
  if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */
83
167
  if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
84
168
  }
85
169
 
86
170
  tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
87
- CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) );
171
+ CHECK_F( FSE_normalizeCount(wksp->norm, tableLog, wksp->count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) );
88
172
 
89
173
  /* Write table description header */
90
- { CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), norm, maxSymbolValue, tableLog) );
174
+ { CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), wksp->norm, maxSymbolValue, tableLog) );
91
175
  op += hSize;
92
176
  }
93
177
 
94
178
  /* Compress */
95
- CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) );
96
- { CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, CTable) );
179
+ CHECK_F( FSE_buildCTable_wksp(wksp->CTable, wksp->norm, maxSymbolValue, tableLog, wksp->scratchBuffer, sizeof(wksp->scratchBuffer)) );
180
+ { CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, wksp->CTable) );
97
181
  if (cSize == 0) return 0; /* not enough space for compressed data */
98
182
  op += cSize;
99
183
  }
@@ -101,30 +185,94 @@ static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weight
101
185
  return (size_t)(op-ostart);
102
186
  }
103
187
 
188
+ static size_t HUF_getNbBits(HUF_CElt elt)
189
+ {
190
+ return elt & 0xFF;
191
+ }
192
+
193
+ static size_t HUF_getNbBitsFast(HUF_CElt elt)
194
+ {
195
+ return elt;
196
+ }
197
+
198
+ static size_t HUF_getValue(HUF_CElt elt)
199
+ {
200
+ return elt & ~(size_t)0xFF;
201
+ }
202
+
203
+ static size_t HUF_getValueFast(HUF_CElt elt)
204
+ {
205
+ return elt;
206
+ }
207
+
208
+ static void HUF_setNbBits(HUF_CElt* elt, size_t nbBits)
209
+ {
210
+ assert(nbBits <= HUF_TABLELOG_ABSOLUTEMAX);
211
+ *elt = nbBits;
212
+ }
104
213
 
105
- /*! HUF_writeCTable() :
106
- `CTable` : Huffman tree to save, using huf representation.
107
- @return : size of saved CTable */
108
- size_t HUF_writeCTable (void* dst, size_t maxDstSize,
109
- const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
214
+ static void HUF_setValue(HUF_CElt* elt, size_t value)
110
215
  {
216
+ size_t const nbBits = HUF_getNbBits(*elt);
217
+ if (nbBits > 0) {
218
+ assert((value >> nbBits) == 0);
219
+ *elt |= value << (sizeof(HUF_CElt) * 8 - nbBits);
220
+ }
221
+ }
222
+
223
+ HUF_CTableHeader HUF_readCTableHeader(HUF_CElt const* ctable)
224
+ {
225
+ HUF_CTableHeader header;
226
+ ZSTD_memcpy(&header, ctable, sizeof(header));
227
+ return header;
228
+ }
229
+
230
+ static void HUF_writeCTableHeader(HUF_CElt* ctable, U32 tableLog, U32 maxSymbolValue)
231
+ {
232
+ HUF_CTableHeader header;
233
+ HUF_STATIC_ASSERT(sizeof(ctable[0]) == sizeof(header));
234
+ ZSTD_memset(&header, 0, sizeof(header));
235
+ assert(tableLog < 256);
236
+ header.tableLog = (BYTE)tableLog;
237
+ assert(maxSymbolValue < 256);
238
+ header.maxSymbolValue = (BYTE)maxSymbolValue;
239
+ ZSTD_memcpy(ctable, &header, sizeof(header));
240
+ }
241
+
242
+ typedef struct {
243
+ HUF_CompressWeightsWksp wksp;
111
244
  BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */
112
245
  BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
246
+ } HUF_WriteCTableWksp;
247
+
248
+ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
249
+ const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog,
250
+ void* workspace, size_t workspaceSize)
251
+ {
252
+ HUF_CElt const* const ct = CTable + 1;
113
253
  BYTE* op = (BYTE*)dst;
114
254
  U32 n;
255
+ HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32));
256
+
257
+ HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE >= sizeof(HUF_WriteCTableWksp));
115
258
 
116
- /* check conditions */
259
+ assert(HUF_readCTableHeader(CTable).maxSymbolValue == maxSymbolValue);
260
+ assert(HUF_readCTableHeader(CTable).tableLog == huffLog);
261
+
262
+ /* check conditions */
263
+ if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC);
117
264
  if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
118
265
 
119
266
  /* convert to weight */
120
- bitsToWeight[0] = 0;
267
+ wksp->bitsToWeight[0] = 0;
121
268
  for (n=1; n<huffLog+1; n++)
122
- bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
269
+ wksp->bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
123
270
  for (n=0; n<maxSymbolValue; n++)
124
- huffWeight[n] = bitsToWeight[CTable[n].nbBits];
271
+ wksp->huffWeight[n] = wksp->bitsToWeight[HUF_getNbBits(ct[n])];
125
272
 
126
273
  /* attempt weights compression by FSE */
127
- { CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, huffWeight, maxSymbolValue) );
274
+ if (maxDstSize < 1) return ERROR(dstSize_tooSmall);
275
+ { CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, wksp->huffWeight, maxSymbolValue, &wksp->wksp, sizeof(wksp->wksp)) );
128
276
  if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */
129
277
  op[0] = (BYTE)hSize;
130
278
  return hSize+1;
@@ -134,9 +282,9 @@ size_t HUF_writeCTable (void* dst, size_t maxDstSize,
134
282
  if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */
135
283
  if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */
136
284
  op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1));
137
- huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */
285
+ wksp->huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */
138
286
  for (n=0; n<maxSymbolValue; n+=2)
139
- op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
287
+ op[(n/2)+1] = (BYTE)((wksp->huffWeight[n] << 4) + wksp->huffWeight[n+1]);
140
288
  return ((maxSymbolValue+1)/2) + 1;
141
289
  }
142
290
 
@@ -147,6 +295,7 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
147
295
  U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */
148
296
  U32 tableLog = 0;
149
297
  U32 nbSymbols = 0;
298
+ HUF_CElt* const ct = CTable + 1;
150
299
 
151
300
  /* get symbol weights */
152
301
  CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize));
@@ -156,6 +305,10 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
156
305
  if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
157
306
  if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);
158
307
 
308
+ *maxSymbolValuePtr = nbSymbols - 1;
309
+
310
+ HUF_writeCTableHeader(CTable, tableLog, *maxSymbolValuePtr);
311
+
159
312
  /* Prepare base value per rank */
160
313
  { U32 n, nextRankStart = 0;
161
314
  for (n=1; n<=tableLog; n++) {
@@ -167,13 +320,13 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
167
320
  /* fill nbBits */
168
321
  { U32 n; for (n=0; n<nbSymbols; n++) {
169
322
  const U32 w = huffWeight[n];
170
- CTable[n].nbBits = (BYTE)(tableLog + 1 - w) & -(w != 0);
323
+ HUF_setNbBits(ct + n, (BYTE)(tableLog + 1 - w) & -(w != 0));
171
324
  } }
172
325
 
173
326
  /* fill val */
174
327
  { U16 nbPerRank[HUF_TABLELOG_MAX+2] = {0}; /* support w=0=>n=tableLog+1 */
175
328
  U16 valPerRank[HUF_TABLELOG_MAX+2] = {0};
176
- { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; }
329
+ { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[HUF_getNbBits(ct[n])]++; }
177
330
  /* determine stating value per rank */
178
331
  valPerRank[tableLog+1] = 0; /* for w==0 */
179
332
  { U16 min = 0;
@@ -183,77 +336,74 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
183
336
  min >>= 1;
184
337
  } }
185
338
  /* assign value within rank, symbol order */
186
- { U32 n; for (n=0; n<nbSymbols; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; }
339
+ { U32 n; for (n=0; n<nbSymbols; n++) HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); }
187
340
  }
188
341
 
189
- *maxSymbolValuePtr = nbSymbols - 1;
190
342
  return readSize;
191
343
  }
192
344
 
193
- U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue)
345
+ U32 HUF_getNbBitsFromCTable(HUF_CElt const* CTable, U32 symbolValue)
194
346
  {
195
- const HUF_CElt* table = (const HUF_CElt*)symbolTable;
347
+ const HUF_CElt* const ct = CTable + 1;
196
348
  assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
197
- return table[symbolValue].nbBits;
349
+ if (symbolValue > HUF_readCTableHeader(CTable).maxSymbolValue)
350
+ return 0;
351
+ return (U32)HUF_getNbBits(ct[symbolValue]);
198
352
  }
199
353
 
200
354
 
201
- typedef struct nodeElt_s {
202
- U32 count;
203
- U16 parent;
204
- BYTE byte;
205
- BYTE nbBits;
206
- } nodeElt;
207
-
208
355
  /**
209
356
  * HUF_setMaxHeight():
210
- * Enforces maxNbBits on the Huffman tree described in huffNode.
357
+ * Try to enforce @targetNbBits on the Huffman tree described in @huffNode.
211
358
  *
212
- * It sets all nodes with nbBits > maxNbBits to be maxNbBits. Then it adjusts
213
- * the tree to so that it is a valid canonical Huffman tree.
359
+ * It attempts to convert all nodes with nbBits > @targetNbBits
360
+ * to employ @targetNbBits instead. Then it adjusts the tree
361
+ * so that it remains a valid canonical Huffman tree.
214
362
  *
215
363
  * @pre The sum of the ranks of each symbol == 2^largestBits,
216
364
  * where largestBits == huffNode[lastNonNull].nbBits.
217
365
  * @post The sum of the ranks of each symbol == 2^largestBits,
218
- * where largestBits is the return value <= maxNbBits.
366
+ * where largestBits is the return value (expected <= targetNbBits).
219
367
  *
220
- * @param huffNode The Huffman tree modified in place to enforce maxNbBits.
368
+ * @param huffNode The Huffman tree modified in place to enforce targetNbBits.
369
+ * It's presumed sorted, from most frequent to rarest symbol.
221
370
  * @param lastNonNull The symbol with the lowest count in the Huffman tree.
222
- * @param maxNbBits The maximum allowed number of bits, which the Huffman tree
371
+ * @param targetNbBits The allowed number of bits, which the Huffman tree
223
372
  * may not respect. After this function the Huffman tree will
224
- * respect maxNbBits.
225
- * @return The maximum number of bits of the Huffman tree after adjustment,
226
- * necessarily no more than maxNbBits.
373
+ * respect targetNbBits.
374
+ * @return The maximum number of bits of the Huffman tree after adjustment.
227
375
  */
228
- static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
376
+ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 targetNbBits)
229
377
  {
230
378
  const U32 largestBits = huffNode[lastNonNull].nbBits;
231
- /* early exit : no elt > maxNbBits, so the tree is already valid. */
232
- if (largestBits <= maxNbBits) return largestBits;
379
+ /* early exit : no elt > targetNbBits, so the tree is already valid. */
380
+ if (largestBits <= targetNbBits) return largestBits;
381
+
382
+ DEBUGLOG(5, "HUF_setMaxHeight (targetNbBits = %u)", targetNbBits);
233
383
 
234
384
  /* there are several too large elements (at least >= 2) */
235
385
  { int totalCost = 0;
236
- const U32 baseCost = 1 << (largestBits - maxNbBits);
386
+ const U32 baseCost = 1 << (largestBits - targetNbBits);
237
387
  int n = (int)lastNonNull;
238
388
 
239
- /* Adjust any ranks > maxNbBits to maxNbBits.
389
+ /* Adjust any ranks > targetNbBits to targetNbBits.
240
390
  * Compute totalCost, which is how far the sum of the ranks is
241
391
  * we are over 2^largestBits after adjust the offending ranks.
242
392
  */
243
- while (huffNode[n].nbBits > maxNbBits) {
393
+ while (huffNode[n].nbBits > targetNbBits) {
244
394
  totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
245
- huffNode[n].nbBits = (BYTE)maxNbBits;
395
+ huffNode[n].nbBits = (BYTE)targetNbBits;
246
396
  n--;
247
397
  }
248
- /* n stops at huffNode[n].nbBits <= maxNbBits */
249
- assert(huffNode[n].nbBits <= maxNbBits);
250
- /* n end at index of smallest symbol using < maxNbBits */
251
- while (huffNode[n].nbBits == maxNbBits) --n;
398
+ /* n stops at huffNode[n].nbBits <= targetNbBits */
399
+ assert(huffNode[n].nbBits <= targetNbBits);
400
+ /* n end at index of smallest symbol using < targetNbBits */
401
+ while (huffNode[n].nbBits == targetNbBits) --n;
252
402
 
253
- /* renorm totalCost from 2^largestBits to 2^maxNbBits
403
+ /* renorm totalCost from 2^largestBits to 2^targetNbBits
254
404
  * note : totalCost is necessarily a multiple of baseCost */
255
- assert((totalCost & (baseCost - 1)) == 0);
256
- totalCost >>= (largestBits - maxNbBits);
405
+ assert(((U32)totalCost & (baseCost - 1)) == 0);
406
+ totalCost >>= (largestBits - targetNbBits);
257
407
  assert(totalCost > 0);
258
408
 
259
409
  /* repay normalized cost */
@@ -262,19 +412,19 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
262
412
 
263
413
  /* Get pos of last (smallest = lowest cum. count) symbol per rank */
264
414
  ZSTD_memset(rankLast, 0xF0, sizeof(rankLast));
265
- { U32 currentNbBits = maxNbBits;
415
+ { U32 currentNbBits = targetNbBits;
266
416
  int pos;
267
417
  for (pos=n ; pos >= 0; pos--) {
268
418
  if (huffNode[pos].nbBits >= currentNbBits) continue;
269
- currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */
270
- rankLast[maxNbBits-currentNbBits] = (U32)pos;
419
+ currentNbBits = huffNode[pos].nbBits; /* < targetNbBits */
420
+ rankLast[targetNbBits-currentNbBits] = (U32)pos;
271
421
  } }
272
422
 
273
423
  while (totalCost > 0) {
274
424
  /* Try to reduce the next power of 2 above totalCost because we
275
425
  * gain back half the rank.
276
426
  */
277
- U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1;
427
+ U32 nBitsToDecrease = ZSTD_highbit32((U32)totalCost) + 1;
278
428
  for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
279
429
  U32 const highPos = rankLast[nBitsToDecrease];
280
430
  U32 const lowPos = rankLast[nBitsToDecrease-1];
@@ -314,7 +464,7 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
314
464
  rankLast[nBitsToDecrease] = noSymbol;
315
465
  else {
316
466
  rankLast[nBitsToDecrease]--;
317
- if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease)
467
+ if (huffNode[rankLast[nBitsToDecrease]].nbBits != targetNbBits-nBitsToDecrease)
318
468
  rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */
319
469
  }
320
470
  } /* while (totalCost > 0) */
@@ -326,11 +476,11 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
326
476
  * TODO.
327
477
  */
328
478
  while (totalCost < 0) { /* Sometimes, cost correction overshoot */
329
- /* special case : no rank 1 symbol (using maxNbBits-1);
330
- * let's create one from largest rank 0 (using maxNbBits).
479
+ /* special case : no rank 1 symbol (using targetNbBits-1);
480
+ * let's create one from largest rank 0 (using targetNbBits).
331
481
  */
332
482
  if (rankLast[1] == noSymbol) {
333
- while (huffNode[n].nbBits == maxNbBits) n--;
483
+ while (huffNode[n].nbBits == targetNbBits) n--;
334
484
  huffNode[n+1].nbBits--;
335
485
  assert(n >= 0);
336
486
  rankLast[1] = (U32)(n+1);
@@ -344,26 +494,122 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
344
494
  } /* repay normalized cost */
345
495
  } /* there are several too large elements (at least >= 2) */
346
496
 
347
- return maxNbBits;
497
+ return targetNbBits;
348
498
  }
349
499
 
350
500
  typedef struct {
351
- U32 base;
352
- U32 curr;
501
+ U16 base;
502
+ U16 curr;
353
503
  } rankPos;
354
504
 
355
- typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
505
+ typedef nodeElt huffNodeTable[2 * (HUF_SYMBOLVALUE_MAX + 1)];
356
506
 
357
- #define RANK_POSITION_TABLE_SIZE 32
507
+ /* Number of buckets available for HUF_sort() */
508
+ #define RANK_POSITION_TABLE_SIZE 192
358
509
 
359
510
  typedef struct {
360
511
  huffNodeTable huffNodeTbl;
361
512
  rankPos rankPosition[RANK_POSITION_TABLE_SIZE];
362
513
  } HUF_buildCTable_wksp_tables;
363
514
 
515
+ /* RANK_POSITION_DISTINCT_COUNT_CUTOFF == Cutoff point in HUF_sort() buckets for which we use log2 bucketing.
516
+ * Strategy is to use as many buckets as possible for representing distinct
517
+ * counts while using the remainder to represent all "large" counts.
518
+ *
519
+ * To satisfy this requirement for 192 buckets, we can do the following:
520
+ * Let buckets 0-166 represent distinct counts of [0, 166]
521
+ * Let buckets 166 to 192 represent all remaining counts up to RANK_POSITION_MAX_COUNT_LOG using log2 bucketing.
522
+ */
523
+ #define RANK_POSITION_MAX_COUNT_LOG 32
524
+ #define RANK_POSITION_LOG_BUCKETS_BEGIN ((RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */)
525
+ #define RANK_POSITION_DISTINCT_COUNT_CUTOFF (RANK_POSITION_LOG_BUCKETS_BEGIN + ZSTD_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */)
526
+
527
+ /* Return the appropriate bucket index for a given count. See definition of
528
+ * RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy.
529
+ */
530
+ static U32 HUF_getIndex(U32 const count) {
531
+ return (count < RANK_POSITION_DISTINCT_COUNT_CUTOFF)
532
+ ? count
533
+ : ZSTD_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN;
534
+ }
535
+
536
+ /* Helper swap function for HUF_quickSortPartition() */
537
+ static void HUF_swapNodes(nodeElt* a, nodeElt* b) {
538
+ nodeElt tmp = *a;
539
+ *a = *b;
540
+ *b = tmp;
541
+ }
542
+
543
+ /* Returns 0 if the huffNode array is not sorted by descending count */
544
+ MEM_STATIC int HUF_isSorted(nodeElt huffNode[], U32 const maxSymbolValue1) {
545
+ U32 i;
546
+ for (i = 1; i < maxSymbolValue1; ++i) {
547
+ if (huffNode[i].count > huffNode[i-1].count) {
548
+ return 0;
549
+ }
550
+ }
551
+ return 1;
552
+ }
553
+
554
+ /* Insertion sort by descending order */
555
+ HINT_INLINE void HUF_insertionSort(nodeElt huffNode[], int const low, int const high) {
556
+ int i;
557
+ int const size = high-low+1;
558
+ huffNode += low;
559
+ for (i = 1; i < size; ++i) {
560
+ nodeElt const key = huffNode[i];
561
+ int j = i - 1;
562
+ while (j >= 0 && huffNode[j].count < key.count) {
563
+ huffNode[j + 1] = huffNode[j];
564
+ j--;
565
+ }
566
+ huffNode[j + 1] = key;
567
+ }
568
+ }
569
+
570
+ /* Pivot helper function for quicksort. */
571
+ static int HUF_quickSortPartition(nodeElt arr[], int const low, int const high) {
572
+ /* Simply select rightmost element as pivot. "Better" selectors like
573
+ * median-of-three don't experimentally appear to have any benefit.
574
+ */
575
+ U32 const pivot = arr[high].count;
576
+ int i = low - 1;
577
+ int j = low;
578
+ for ( ; j < high; j++) {
579
+ if (arr[j].count > pivot) {
580
+ i++;
581
+ HUF_swapNodes(&arr[i], &arr[j]);
582
+ }
583
+ }
584
+ HUF_swapNodes(&arr[i + 1], &arr[high]);
585
+ return i + 1;
586
+ }
587
+
588
+ /* Classic quicksort by descending with partially iterative calls
589
+ * to reduce worst case callstack size.
590
+ */
591
+ static void HUF_simpleQuickSort(nodeElt arr[], int low, int high) {
592
+ int const kInsertionSortThreshold = 8;
593
+ if (high - low < kInsertionSortThreshold) {
594
+ HUF_insertionSort(arr, low, high);
595
+ return;
596
+ }
597
+ while (low < high) {
598
+ int const idx = HUF_quickSortPartition(arr, low, high);
599
+ if (idx - low < high - idx) {
600
+ HUF_simpleQuickSort(arr, low, idx - 1);
601
+ low = idx + 1;
602
+ } else {
603
+ HUF_simpleQuickSort(arr, idx + 1, high);
604
+ high = idx - 1;
605
+ }
606
+ }
607
+ }
608
+
364
609
  /**
365
610
  * HUF_sort():
366
611
  * Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order.
612
+ * This is a typical bucket sorting strategy that uses either quicksort or insertion sort to sort each bucket.
367
613
  *
368
614
  * @param[out] huffNode Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled.
369
615
  * Must have (maxSymbolValue + 1) entries.
@@ -371,42 +617,51 @@ typedef struct {
371
617
  * @param[in] maxSymbolValue Maximum symbol value.
372
618
  * @param rankPosition This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries.
373
619
  */
374
- static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue, rankPos* rankPosition)
375
- {
376
- int n;
377
- int const maxSymbolValue1 = (int)maxSymbolValue + 1;
620
+ static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSymbolValue, rankPos rankPosition[]) {
621
+ U32 n;
622
+ U32 const maxSymbolValue1 = maxSymbolValue+1;
378
623
 
379
624
  /* Compute base and set curr to base.
380
- * For symbol s let lowerRank = BIT_highbit32(count[n]+1) and rank = lowerRank + 1.
381
- * Then 2^lowerRank <= count[n]+1 <= 2^rank.
625
+ * For symbol s let lowerRank = HUF_getIndex(count[n]) and rank = lowerRank + 1.
626
+ * See HUF_getIndex to see bucketing strategy.
382
627
  * We attribute each symbol to lowerRank's base value, because we want to know where
383
628
  * each rank begins in the output, so for rank R we want to count ranks R+1 and above.
384
629
  */
385
630
  ZSTD_memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE);
386
631
  for (n = 0; n < maxSymbolValue1; ++n) {
387
- U32 lowerRank = BIT_highbit32(count[n] + 1);
632
+ U32 lowerRank = HUF_getIndex(count[n]);
633
+ assert(lowerRank < RANK_POSITION_TABLE_SIZE - 1);
388
634
  rankPosition[lowerRank].base++;
389
635
  }
636
+
390
637
  assert(rankPosition[RANK_POSITION_TABLE_SIZE - 1].base == 0);
638
+ /* Set up the rankPosition table */
391
639
  for (n = RANK_POSITION_TABLE_SIZE - 1; n > 0; --n) {
392
640
  rankPosition[n-1].base += rankPosition[n].base;
393
641
  rankPosition[n-1].curr = rankPosition[n-1].base;
394
642
  }
395
- /* Sort */
643
+
644
+ /* Insert each symbol into their appropriate bucket, setting up rankPosition table. */
396
645
  for (n = 0; n < maxSymbolValue1; ++n) {
397
646
  U32 const c = count[n];
398
- U32 const r = BIT_highbit32(c+1) + 1;
399
- U32 pos = rankPosition[r].curr++;
400
- /* Insert into the correct position in the rank.
401
- * We have at most 256 symbols, so this insertion should be fine.
402
- */
403
- while ((pos > rankPosition[r].base) && (c > huffNode[pos-1].count)) {
404
- huffNode[pos] = huffNode[pos-1];
405
- pos--;
406
- }
647
+ U32 const r = HUF_getIndex(c) + 1;
648
+ U32 const pos = rankPosition[r].curr++;
649
+ assert(pos < maxSymbolValue1);
407
650
  huffNode[pos].count = c;
408
651
  huffNode[pos].byte = (BYTE)n;
409
652
  }
653
+
654
+ /* Sort each bucket. */
655
+ for (n = RANK_POSITION_DISTINCT_COUNT_CUTOFF; n < RANK_POSITION_TABLE_SIZE - 1; ++n) {
656
+ int const bucketSize = rankPosition[n].curr - rankPosition[n].base;
657
+ U32 const bucketStartIdx = rankPosition[n].base;
658
+ if (bucketSize > 1) {
659
+ assert(bucketStartIdx < maxSymbolValue1);
660
+ HUF_simpleQuickSort(huffNode + bucketStartIdx, 0, bucketSize-1);
661
+ }
662
+ }
663
+
664
+ assert(HUF_isSorted(huffNode, maxSymbolValue1));
410
665
  }
411
666
 
412
667
 
@@ -430,6 +685,7 @@ static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
430
685
  int lowS, lowN;
431
686
  int nodeNb = STARTNODE;
432
687
  int n, nodeRoot;
688
+ DEBUGLOG(5, "HUF_buildTree (alphabet size = %u)", maxSymbolValue + 1);
433
689
  /* init for parents */
434
690
  nonNullRank = (int)maxSymbolValue;
435
691
  while(huffNode[nonNullRank].count == 0) nonNullRank--;
@@ -456,6 +712,8 @@ static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
456
712
  for (n=0; n<=nonNullRank; n++)
457
713
  huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
458
714
 
715
+ DEBUGLOG(6, "Initial distribution of bits completed (%zu sorted symbols)", showHNodeBits(huffNode, maxSymbolValue+1));
716
+
459
717
  return nonNullRank;
460
718
  }
461
719
 
@@ -471,6 +729,7 @@ static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
471
729
  */
472
730
  static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits)
473
731
  {
732
+ HUF_CElt* const ct = CTable + 1;
474
733
  /* fill result into ctable (val, nbBits) */
475
734
  int n;
476
735
  U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
@@ -486,127 +745,381 @@ static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, i
486
745
  min >>= 1;
487
746
  } }
488
747
  for (n=0; n<alphabetSize; n++)
489
- CTable[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */
748
+ HUF_setNbBits(ct + huffNode[n].byte, huffNode[n].nbBits); /* push nbBits per symbol, symbol order */
490
749
  for (n=0; n<alphabetSize; n++)
491
- CTable[n].val = valPerRank[CTable[n].nbBits]++; /* assign value within rank, symbol order */
750
+ HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); /* assign value within rank, symbol order */
751
+
752
+ HUF_writeCTableHeader(CTable, maxNbBits, maxSymbolValue);
492
753
  }
493
754
 
494
- size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
755
+ size_t
756
+ HUF_buildCTable_wksp(HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits,
757
+ void* workSpace, size_t wkspSize)
495
758
  {
496
- HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)workSpace;
759
+ HUF_buildCTable_wksp_tables* const wksp_tables =
760
+ (HUF_buildCTable_wksp_tables*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(U32));
497
761
  nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
498
762
  nodeElt* const huffNode = huffNode0+1;
499
763
  int nonNullRank;
500
764
 
765
+ HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE == sizeof(HUF_buildCTable_wksp_tables));
766
+
767
+ DEBUGLOG(5, "HUF_buildCTable_wksp (alphabet size = %u)", maxSymbolValue+1);
768
+
501
769
  /* safety checks */
502
- if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
503
770
  if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
504
- return ERROR(workSpace_tooSmall);
771
+ return ERROR(workSpace_tooSmall);
505
772
  if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
506
773
  if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
507
- return ERROR(maxSymbolValue_tooLarge);
774
+ return ERROR(maxSymbolValue_tooLarge);
508
775
  ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable));
509
776
 
510
777
  /* sort, decreasing order */
511
778
  HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition);
779
+ DEBUGLOG(6, "sorted symbols completed (%zu symbols)", showHNodeSymbols(huffNode, maxSymbolValue+1));
512
780
 
513
781
  /* build tree */
514
782
  nonNullRank = HUF_buildTree(huffNode, maxSymbolValue);
515
783
 
516
- /* enforce maxTableLog */
784
+ /* determine and enforce maxTableLog */
517
785
  maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);
518
786
  if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */
519
787
 
520
- HUF_buildCTableFromTree(tree, huffNode, nonNullRank, maxSymbolValue, maxNbBits);
788
+ HUF_buildCTableFromTree(CTable, huffNode, nonNullRank, maxSymbolValue, maxNbBits);
521
789
 
522
790
  return maxNbBits;
523
791
  }
524
792
 
525
793
  size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
526
794
  {
795
+ HUF_CElt const* ct = CTable + 1;
527
796
  size_t nbBits = 0;
528
797
  int s;
529
798
  for (s = 0; s <= (int)maxSymbolValue; ++s) {
530
- nbBits += CTable[s].nbBits * count[s];
799
+ nbBits += HUF_getNbBits(ct[s]) * count[s];
531
800
  }
532
801
  return nbBits >> 3;
533
802
  }
534
803
 
535
804
  int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
536
- int bad = 0;
537
- int s;
538
- for (s = 0; s <= (int)maxSymbolValue; ++s) {
539
- bad |= (count[s] != 0) & (CTable[s].nbBits == 0);
540
- }
541
- return !bad;
805
+ HUF_CTableHeader header = HUF_readCTableHeader(CTable);
806
+ HUF_CElt const* ct = CTable + 1;
807
+ int bad = 0;
808
+ int s;
809
+
810
+ assert(header.tableLog <= HUF_TABLELOG_ABSOLUTEMAX);
811
+
812
+ if (header.maxSymbolValue < maxSymbolValue)
813
+ return 0;
814
+
815
+ for (s = 0; s <= (int)maxSymbolValue; ++s) {
816
+ bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0);
817
+ }
818
+ return !bad;
542
819
  }
543
820
 
544
821
  size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
545
822
 
823
+ /** HUF_CStream_t:
824
+ * Huffman uses its own BIT_CStream_t implementation.
825
+ * There are three major differences from BIT_CStream_t:
826
+ * 1. HUF_addBits() takes a HUF_CElt (size_t) which is
827
+ * the pair (nbBits, value) in the format:
828
+ * format:
829
+ * - Bits [0, 4) = nbBits
830
+ * - Bits [4, 64 - nbBits) = 0
831
+ * - Bits [64 - nbBits, 64) = value
832
+ * 2. The bitContainer is built from the upper bits and
833
+ * right shifted. E.g. to add a new value of N bits
834
+ * you right shift the bitContainer by N, then or in
835
+ * the new value into the N upper bits.
836
+ * 3. The bitstream has two bit containers. You can add
837
+ * bits to the second container and merge them into
838
+ * the first container.
839
+ */
840
+
841
+ #define HUF_BITS_IN_CONTAINER (sizeof(size_t) * 8)
842
+
843
+ typedef struct {
844
+ size_t bitContainer[2];
845
+ size_t bitPos[2];
846
+
847
+ BYTE* startPtr;
848
+ BYTE* ptr;
849
+ BYTE* endPtr;
850
+ } HUF_CStream_t;
851
+
852
+ /**! HUF_initCStream():
853
+ * Initializes the bitstream.
854
+ * @returns 0 or an error code.
855
+ */
856
+ static size_t HUF_initCStream(HUF_CStream_t* bitC,
857
+ void* startPtr, size_t dstCapacity)
858
+ {
859
+ ZSTD_memset(bitC, 0, sizeof(*bitC));
860
+ bitC->startPtr = (BYTE*)startPtr;
861
+ bitC->ptr = bitC->startPtr;
862
+ bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer[0]);
863
+ if (dstCapacity <= sizeof(bitC->bitContainer[0])) return ERROR(dstSize_tooSmall);
864
+ return 0;
865
+ }
866
+
867
+ /*! HUF_addBits():
868
+ * Adds the symbol stored in HUF_CElt elt to the bitstream.
869
+ *
870
+ * @param elt The element we're adding. This is a (nbBits, value) pair.
871
+ * See the HUF_CStream_t docs for the format.
872
+ * @param idx Insert into the bitstream at this idx.
873
+ * @param kFast This is a template parameter. If the bitstream is guaranteed
874
+ * to have at least 4 unused bits after this call it may be 1,
875
+ * otherwise it must be 0. HUF_addBits() is faster when fast is set.
876
+ */
877
+ FORCE_INLINE_TEMPLATE void HUF_addBits(HUF_CStream_t* bitC, HUF_CElt elt, int idx, int kFast)
878
+ {
879
+ assert(idx <= 1);
880
+ assert(HUF_getNbBits(elt) <= HUF_TABLELOG_ABSOLUTEMAX);
881
+ /* This is efficient on x86-64 with BMI2 because shrx
882
+ * only reads the low 6 bits of the register. The compiler
883
+ * knows this and elides the mask. When fast is set,
884
+ * every operation can use the same value loaded from elt.
885
+ */
886
+ bitC->bitContainer[idx] >>= HUF_getNbBits(elt);
887
+ bitC->bitContainer[idx] |= kFast ? HUF_getValueFast(elt) : HUF_getValue(elt);
888
+ /* We only read the low 8 bits of bitC->bitPos[idx] so it
889
+ * doesn't matter that the high bits have noise from the value.
890
+ */
891
+ bitC->bitPos[idx] += HUF_getNbBitsFast(elt);
892
+ assert((bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER);
893
+ /* The last 4-bits of elt are dirty if fast is set,
894
+ * so we must not be overwriting bits that have already been
895
+ * inserted into the bit container.
896
+ */
897
+ #if DEBUGLEVEL >= 1
898
+ {
899
+ size_t const nbBits = HUF_getNbBits(elt);
900
+ size_t const dirtyBits = nbBits == 0 ? 0 : ZSTD_highbit32((U32)nbBits) + 1;
901
+ (void)dirtyBits;
902
+ /* Middle bits are 0. */
903
+ assert(((elt >> dirtyBits) << (dirtyBits + nbBits)) == 0);
904
+ /* We didn't overwrite any bits in the bit container. */
905
+ assert(!kFast || (bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER);
906
+ (void)dirtyBits;
907
+ }
908
+ #endif
909
+ }
910
+
911
+ FORCE_INLINE_TEMPLATE void HUF_zeroIndex1(HUF_CStream_t* bitC)
912
+ {
913
+ bitC->bitContainer[1] = 0;
914
+ bitC->bitPos[1] = 0;
915
+ }
916
+
917
+ /*! HUF_mergeIndex1() :
918
+ * Merges the bit container @ index 1 into the bit container @ index 0
919
+ * and zeros the bit container @ index 1.
920
+ */
921
+ FORCE_INLINE_TEMPLATE void HUF_mergeIndex1(HUF_CStream_t* bitC)
922
+ {
923
+ assert((bitC->bitPos[1] & 0xFF) < HUF_BITS_IN_CONTAINER);
924
+ bitC->bitContainer[0] >>= (bitC->bitPos[1] & 0xFF);
925
+ bitC->bitContainer[0] |= bitC->bitContainer[1];
926
+ bitC->bitPos[0] += bitC->bitPos[1];
927
+ assert((bitC->bitPos[0] & 0xFF) <= HUF_BITS_IN_CONTAINER);
928
+ }
929
+
930
+ /*! HUF_flushBits() :
931
+ * Flushes the bits in the bit container @ index 0.
932
+ *
933
+ * @post bitPos will be < 8.
934
+ * @param kFast If kFast is set then we must know a-priori that
935
+ * the bit container will not overflow.
936
+ */
937
+ FORCE_INLINE_TEMPLATE void HUF_flushBits(HUF_CStream_t* bitC, int kFast)
938
+ {
939
+ /* The upper bits of bitPos are noisy, so we must mask by 0xFF. */
940
+ size_t const nbBits = bitC->bitPos[0] & 0xFF;
941
+ size_t const nbBytes = nbBits >> 3;
942
+ /* The top nbBits bits of bitContainer are the ones we need. */
943
+ size_t const bitContainer = bitC->bitContainer[0] >> (HUF_BITS_IN_CONTAINER - nbBits);
944
+ /* Mask bitPos to account for the bytes we consumed. */
945
+ bitC->bitPos[0] &= 7;
946
+ assert(nbBits > 0);
947
+ assert(nbBits <= sizeof(bitC->bitContainer[0]) * 8);
948
+ assert(bitC->ptr <= bitC->endPtr);
949
+ MEM_writeLEST(bitC->ptr, bitContainer);
950
+ bitC->ptr += nbBytes;
951
+ assert(!kFast || bitC->ptr <= bitC->endPtr);
952
+ if (!kFast && bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
953
+ /* bitContainer doesn't need to be modified because the leftover
954
+ * bits are already the top bitPos bits. And we don't care about
955
+ * noise in the lower values.
956
+ */
957
+ }
958
+
959
+ /*! HUF_endMark()
960
+ * @returns The Huffman stream end mark: A 1-bit value = 1.
961
+ */
962
+ static HUF_CElt HUF_endMark(void)
963
+ {
964
+ HUF_CElt endMark;
965
+ HUF_setNbBits(&endMark, 1);
966
+ HUF_setValue(&endMark, 1);
967
+ return endMark;
968
+ }
969
+
970
+ /*! HUF_closeCStream() :
971
+ * @return Size of CStream, in bytes,
972
+ * or 0 if it could not fit into dstBuffer */
973
+ static size_t HUF_closeCStream(HUF_CStream_t* bitC)
974
+ {
975
+ HUF_addBits(bitC, HUF_endMark(), /* idx */ 0, /* kFast */ 0);
976
+ HUF_flushBits(bitC, /* kFast */ 0);
977
+ {
978
+ size_t const nbBits = bitC->bitPos[0] & 0xFF;
979
+ if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
980
+ return (size_t)(bitC->ptr - bitC->startPtr) + (nbBits > 0);
981
+ }
982
+ }
983
+
546
984
  FORCE_INLINE_TEMPLATE void
547
- HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
985
+ HUF_encodeSymbol(HUF_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable, int idx, int fast)
548
986
  {
549
- BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
987
+ HUF_addBits(bitCPtr, CTable[symbol], idx, fast);
550
988
  }
551
989
 
552
- #define HUF_FLUSHBITS(s) BIT_flushBits(s)
990
+ FORCE_INLINE_TEMPLATE void
991
+ HUF_compress1X_usingCTable_internal_body_loop(HUF_CStream_t* bitC,
992
+ const BYTE* ip, size_t srcSize,
993
+ const HUF_CElt* ct,
994
+ int kUnroll, int kFastFlush, int kLastFast)
995
+ {
996
+ /* Join to kUnroll */
997
+ int n = (int)srcSize;
998
+ int rem = n % kUnroll;
999
+ if (rem > 0) {
1000
+ for (; rem > 0; --rem) {
1001
+ HUF_encodeSymbol(bitC, ip[--n], ct, 0, /* fast */ 0);
1002
+ }
1003
+ HUF_flushBits(bitC, kFastFlush);
1004
+ }
1005
+ assert(n % kUnroll == 0);
1006
+
1007
+ /* Join to 2 * kUnroll */
1008
+ if (n % (2 * kUnroll)) {
1009
+ int u;
1010
+ for (u = 1; u < kUnroll; ++u) {
1011
+ HUF_encodeSymbol(bitC, ip[n - u], ct, 0, 1);
1012
+ }
1013
+ HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, 0, kLastFast);
1014
+ HUF_flushBits(bitC, kFastFlush);
1015
+ n -= kUnroll;
1016
+ }
1017
+ assert(n % (2 * kUnroll) == 0);
1018
+
1019
+ for (; n>0; n-= 2 * kUnroll) {
1020
+ /* Encode kUnroll symbols into the bitstream @ index 0. */
1021
+ int u;
1022
+ for (u = 1; u < kUnroll; ++u) {
1023
+ HUF_encodeSymbol(bitC, ip[n - u], ct, /* idx */ 0, /* fast */ 1);
1024
+ }
1025
+ HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, /* idx */ 0, /* fast */ kLastFast);
1026
+ HUF_flushBits(bitC, kFastFlush);
1027
+ /* Encode kUnroll symbols into the bitstream @ index 1.
1028
+ * This allows us to start filling the bit container
1029
+ * without any data dependencies.
1030
+ */
1031
+ HUF_zeroIndex1(bitC);
1032
+ for (u = 1; u < kUnroll; ++u) {
1033
+ HUF_encodeSymbol(bitC, ip[n - kUnroll - u], ct, /* idx */ 1, /* fast */ 1);
1034
+ }
1035
+ HUF_encodeSymbol(bitC, ip[n - kUnroll - kUnroll], ct, /* idx */ 1, /* fast */ kLastFast);
1036
+ /* Merge bitstream @ index 1 into the bitstream @ index 0 */
1037
+ HUF_mergeIndex1(bitC);
1038
+ HUF_flushBits(bitC, kFastFlush);
1039
+ }
1040
+ assert(n == 0);
1041
+
1042
+ }
553
1043
 
554
- #define HUF_FLUSHBITS_1(stream) \
555
- if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream)
1044
+ /**
1045
+ * Returns a tight upper bound on the output space needed by Huffman
1046
+ * with 8 bytes buffer to handle over-writes. If the output is at least
1047
+ * this large we don't need to do bounds checks during Huffman encoding.
1048
+ */
1049
+ static size_t HUF_tightCompressBound(size_t srcSize, size_t tableLog)
1050
+ {
1051
+ return ((srcSize * tableLog) >> 3) + 8;
1052
+ }
556
1053
 
557
- #define HUF_FLUSHBITS_2(stream) \
558
- if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream)
559
1054
 
560
1055
  FORCE_INLINE_TEMPLATE size_t
561
1056
  HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
562
1057
  const void* src, size_t srcSize,
563
1058
  const HUF_CElt* CTable)
564
1059
  {
1060
+ U32 const tableLog = HUF_readCTableHeader(CTable).tableLog;
1061
+ HUF_CElt const* ct = CTable + 1;
565
1062
  const BYTE* ip = (const BYTE*) src;
566
1063
  BYTE* const ostart = (BYTE*)dst;
567
1064
  BYTE* const oend = ostart + dstSize;
568
- BYTE* op = ostart;
569
- size_t n;
570
- BIT_CStream_t bitC;
1065
+ HUF_CStream_t bitC;
571
1066
 
572
1067
  /* init */
573
1068
  if (dstSize < 8) return 0; /* not enough space to compress */
574
- { size_t const initErr = BIT_initCStream(&bitC, op, (size_t)(oend-op));
1069
+ { BYTE* op = ostart;
1070
+ size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op));
575
1071
  if (HUF_isError(initErr)) return 0; }
576
1072
 
577
- n = srcSize & ~3; /* join to mod 4 */
578
- switch (srcSize & 3)
579
- {
580
- case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable);
581
- HUF_FLUSHBITS_2(&bitC);
582
- /* fall-through */
583
- case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable);
584
- HUF_FLUSHBITS_1(&bitC);
585
- /* fall-through */
586
- case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable);
587
- HUF_FLUSHBITS(&bitC);
588
- /* fall-through */
589
- case 0 : /* fall-through */
590
- default: break;
591
- }
592
-
593
- for (; n>0; n-=4) { /* note : n&3==0 at this stage */
594
- HUF_encodeSymbol(&bitC, ip[n- 1], CTable);
595
- HUF_FLUSHBITS_1(&bitC);
596
- HUF_encodeSymbol(&bitC, ip[n- 2], CTable);
597
- HUF_FLUSHBITS_2(&bitC);
598
- HUF_encodeSymbol(&bitC, ip[n- 3], CTable);
599
- HUF_FLUSHBITS_1(&bitC);
600
- HUF_encodeSymbol(&bitC, ip[n- 4], CTable);
601
- HUF_FLUSHBITS(&bitC);
1073
+ if (dstSize < HUF_tightCompressBound(srcSize, (size_t)tableLog) || tableLog > 11)
1074
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ MEM_32bits() ? 2 : 4, /* kFast */ 0, /* kLastFast */ 0);
1075
+ else {
1076
+ if (MEM_32bits()) {
1077
+ switch (tableLog) {
1078
+ case 11:
1079
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 0);
1080
+ break;
1081
+ case 10: ZSTD_FALLTHROUGH;
1082
+ case 9: ZSTD_FALLTHROUGH;
1083
+ case 8:
1084
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 1);
1085
+ break;
1086
+ case 7: ZSTD_FALLTHROUGH;
1087
+ default:
1088
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 3, /* kFastFlush */ 1, /* kLastFast */ 1);
1089
+ break;
1090
+ }
1091
+ } else {
1092
+ switch (tableLog) {
1093
+ case 11:
1094
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 0);
1095
+ break;
1096
+ case 10:
1097
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 1);
1098
+ break;
1099
+ case 9:
1100
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 6, /* kFastFlush */ 1, /* kLastFast */ 0);
1101
+ break;
1102
+ case 8:
1103
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 7, /* kFastFlush */ 1, /* kLastFast */ 0);
1104
+ break;
1105
+ case 7:
1106
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 8, /* kFastFlush */ 1, /* kLastFast */ 0);
1107
+ break;
1108
+ case 6: ZSTD_FALLTHROUGH;
1109
+ default:
1110
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 9, /* kFastFlush */ 1, /* kLastFast */ 1);
1111
+ break;
1112
+ }
1113
+ }
602
1114
  }
1115
+ assert(bitC.ptr <= bitC.endPtr);
603
1116
 
604
- return BIT_closeCStream(&bitC);
1117
+ return HUF_closeCStream(&bitC);
605
1118
  }
606
1119
 
607
1120
  #if DYNAMIC_BMI2
608
1121
 
609
- static TARGET_ATTRIBUTE("bmi2") size_t
1122
+ static BMI2_TARGET_ATTRIBUTE size_t
610
1123
  HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,
611
1124
  const void* src, size_t srcSize,
612
1125
  const HUF_CElt* CTable)
@@ -625,9 +1138,9 @@ HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize,
625
1138
  static size_t
626
1139
  HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
627
1140
  const void* src, size_t srcSize,
628
- const HUF_CElt* CTable, const int bmi2)
1141
+ const HUF_CElt* CTable, const int flags)
629
1142
  {
630
- if (bmi2) {
1143
+ if (flags & HUF_flags_bmi2) {
631
1144
  return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable);
632
1145
  }
633
1146
  return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable);
@@ -638,24 +1151,23 @@ HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
638
1151
  static size_t
639
1152
  HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
640
1153
  const void* src, size_t srcSize,
641
- const HUF_CElt* CTable, const int bmi2)
1154
+ const HUF_CElt* CTable, const int flags)
642
1155
  {
643
- (void)bmi2;
1156
+ (void)flags;
644
1157
  return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
645
1158
  }
646
1159
 
647
1160
  #endif
648
1161
 
649
- size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
1162
+ size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags)
650
1163
  {
651
- return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
1164
+ return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags);
652
1165
  }
653
1166
 
654
-
655
1167
  static size_t
656
1168
  HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
657
1169
  const void* src, size_t srcSize,
658
- const HUF_CElt* CTable, int bmi2)
1170
+ const HUF_CElt* CTable, int flags)
659
1171
  {
660
1172
  size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */
661
1173
  const BYTE* ip = (const BYTE*) src;
@@ -669,27 +1181,24 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
669
1181
  op += 6; /* jumpTable */
670
1182
 
671
1183
  assert(op <= oend);
672
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
673
- if (cSize==0) return 0;
674
- assert(cSize <= 65535);
1184
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
1185
+ if (cSize == 0 || cSize > 65535) return 0;
675
1186
  MEM_writeLE16(ostart, (U16)cSize);
676
1187
  op += cSize;
677
1188
  }
678
1189
 
679
1190
  ip += segmentSize;
680
1191
  assert(op <= oend);
681
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
682
- if (cSize==0) return 0;
683
- assert(cSize <= 65535);
1192
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
1193
+ if (cSize == 0 || cSize > 65535) return 0;
684
1194
  MEM_writeLE16(ostart+2, (U16)cSize);
685
1195
  op += cSize;
686
1196
  }
687
1197
 
688
1198
  ip += segmentSize;
689
1199
  assert(op <= oend);
690
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
691
- if (cSize==0) return 0;
692
- assert(cSize <= 65535);
1200
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
1201
+ if (cSize == 0 || cSize > 65535) return 0;
693
1202
  MEM_writeLE16(ostart+4, (U16)cSize);
694
1203
  op += cSize;
695
1204
  }
@@ -697,17 +1206,17 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
697
1206
  ip += segmentSize;
698
1207
  assert(op <= oend);
699
1208
  assert(ip <= iend);
700
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) );
701
- if (cSize==0) return 0;
1209
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, flags) );
1210
+ if (cSize == 0 || cSize > 65535) return 0;
702
1211
  op += cSize;
703
1212
  }
704
1213
 
705
1214
  return (size_t)(op-ostart);
706
1215
  }
707
1216
 
708
- size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
1217
+ size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags)
709
1218
  {
710
- return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
1219
+ return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags);
711
1220
  }
712
1221
 
713
1222
  typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
@@ -715,11 +1224,11 @@ typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
715
1224
  static size_t HUF_compressCTable_internal(
716
1225
  BYTE* const ostart, BYTE* op, BYTE* const oend,
717
1226
  const void* src, size_t srcSize,
718
- HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2)
1227
+ HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int flags)
719
1228
  {
720
1229
  size_t const cSize = (nbStreams==HUF_singleStream) ?
721
- HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2) :
722
- HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2);
1230
+ HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags) :
1231
+ HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags);
723
1232
  if (HUF_isError(cSize)) { return cSize; }
724
1233
  if (cSize==0) { return 0; } /* uncompressible */
725
1234
  op += cSize;
@@ -731,31 +1240,113 @@ static size_t HUF_compressCTable_internal(
731
1240
 
732
1241
  typedef struct {
733
1242
  unsigned count[HUF_SYMBOLVALUE_MAX + 1];
734
- HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1];
735
- HUF_buildCTable_wksp_tables buildCTable_wksp;
1243
+ HUF_CElt CTable[HUF_CTABLE_SIZE_ST(HUF_SYMBOLVALUE_MAX)];
1244
+ union {
1245
+ HUF_buildCTable_wksp_tables buildCTable_wksp;
1246
+ HUF_WriteCTableWksp writeCTable_wksp;
1247
+ U32 hist_wksp[HIST_WKSP_SIZE_U32];
1248
+ } wksps;
736
1249
  } HUF_compress_tables_t;
737
1250
 
1251
+ #define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE 4096
1252
+ #define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO 10 /* Must be >= 2 */
1253
+
1254
+ unsigned HUF_cardinality(const unsigned* count, unsigned maxSymbolValue)
1255
+ {
1256
+ unsigned cardinality = 0;
1257
+ unsigned i;
1258
+
1259
+ for (i = 0; i < maxSymbolValue + 1; i++) {
1260
+ if (count[i] != 0) cardinality += 1;
1261
+ }
1262
+
1263
+ return cardinality;
1264
+ }
1265
+
1266
+ unsigned HUF_minTableLog(unsigned symbolCardinality)
1267
+ {
1268
+ U32 minBitsSymbols = ZSTD_highbit32(symbolCardinality) + 1;
1269
+ return minBitsSymbols;
1270
+ }
1271
+
1272
+ unsigned HUF_optimalTableLog(
1273
+ unsigned maxTableLog,
1274
+ size_t srcSize,
1275
+ unsigned maxSymbolValue,
1276
+ void* workSpace, size_t wkspSize,
1277
+ HUF_CElt* table,
1278
+ const unsigned* count,
1279
+ int flags)
1280
+ {
1281
+ assert(srcSize > 1); /* Not supported, RLE should be used instead */
1282
+ assert(wkspSize >= sizeof(HUF_buildCTable_wksp_tables));
1283
+
1284
+ if (!(flags & HUF_flags_optimalDepth)) {
1285
+ /* cheap evaluation, based on FSE */
1286
+ return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
1287
+ }
1288
+
1289
+ { BYTE* dst = (BYTE*)workSpace + sizeof(HUF_WriteCTableWksp);
1290
+ size_t dstSize = wkspSize - sizeof(HUF_WriteCTableWksp);
1291
+ size_t hSize, newSize;
1292
+ const unsigned symbolCardinality = HUF_cardinality(count, maxSymbolValue);
1293
+ const unsigned minTableLog = HUF_minTableLog(symbolCardinality);
1294
+ size_t optSize = ((size_t) ~0) - 1;
1295
+ unsigned optLog = maxTableLog, optLogGuess;
1296
+
1297
+ DEBUGLOG(6, "HUF_optimalTableLog: probing huf depth (srcSize=%zu)", srcSize);
1298
+
1299
+ /* Search until size increases */
1300
+ for (optLogGuess = minTableLog; optLogGuess <= maxTableLog; optLogGuess++) {
1301
+ DEBUGLOG(7, "checking for huffLog=%u", optLogGuess);
1302
+
1303
+ { size_t maxBits = HUF_buildCTable_wksp(table, count, maxSymbolValue, optLogGuess, workSpace, wkspSize);
1304
+ if (ERR_isError(maxBits)) continue;
1305
+
1306
+ if (maxBits < optLogGuess && optLogGuess > minTableLog) break;
1307
+
1308
+ hSize = HUF_writeCTable_wksp(dst, dstSize, table, maxSymbolValue, (U32)maxBits, workSpace, wkspSize);
1309
+ }
1310
+
1311
+ if (ERR_isError(hSize)) continue;
1312
+
1313
+ newSize = HUF_estimateCompressedSize(table, count, maxSymbolValue) + hSize;
1314
+
1315
+ if (newSize > optSize + 1) {
1316
+ break;
1317
+ }
1318
+
1319
+ if (newSize < optSize) {
1320
+ optSize = newSize;
1321
+ optLog = optLogGuess;
1322
+ }
1323
+ }
1324
+ assert(optLog <= HUF_TABLELOG_MAX);
1325
+ return optLog;
1326
+ }
1327
+ }
1328
+
738
1329
  /* HUF_compress_internal() :
739
- * `workSpace` must a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
1330
+ * `workSpace_align4` must be aligned on 4-bytes boundaries,
1331
+ * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */
740
1332
  static size_t
741
1333
  HUF_compress_internal (void* dst, size_t dstSize,
742
1334
  const void* src, size_t srcSize,
743
1335
  unsigned maxSymbolValue, unsigned huffLog,
744
1336
  HUF_nbStreams_e nbStreams,
745
1337
  void* workSpace, size_t wkspSize,
746
- HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
747
- const int bmi2)
1338
+ HUF_CElt* oldHufTable, HUF_repeat* repeat, int flags)
748
1339
  {
749
- HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace;
1340
+ HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(size_t));
750
1341
  BYTE* const ostart = (BYTE*)dst;
751
1342
  BYTE* const oend = ostart + dstSize;
752
1343
  BYTE* op = ostart;
753
1344
 
754
- HUF_STATIC_ASSERT(sizeof(*table) <= HUF_WORKSPACE_SIZE);
1345
+ DEBUGLOG(5, "HUF_compress_internal (srcSize=%zu)", srcSize);
1346
+ HUF_STATIC_ASSERT(sizeof(*table) + HUF_WORKSPACE_MAX_ALIGNMENT <= HUF_WORKSPACE_SIZE);
755
1347
 
756
1348
  /* checks & inits */
757
- if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
758
- if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall);
1349
+ if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall);
759
1350
  if (!srcSize) return 0; /* Uncompressed */
760
1351
  if (!dstSize) return 0; /* cannot fit anything within dst budget */
761
1352
  if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */
@@ -765,17 +1356,34 @@ HUF_compress_internal (void* dst, size_t dstSize,
765
1356
  if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
766
1357
 
767
1358
  /* Heuristic : If old table is valid, use it for small inputs */
768
- if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
1359
+ if ((flags & HUF_flags_preferRepeat) && repeat && *repeat == HUF_repeat_valid) {
769
1360
  return HUF_compressCTable_internal(ostart, op, oend,
770
1361
  src, srcSize,
771
- nbStreams, oldHufTable, bmi2);
1362
+ nbStreams, oldHufTable, flags);
1363
+ }
1364
+
1365
+ /* If uncompressible data is suspected, do a smaller sampling first */
1366
+ DEBUG_STATIC_ASSERT(SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO >= 2);
1367
+ if ((flags & HUF_flags_suspectUncompressible) && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) {
1368
+ size_t largestTotal = 0;
1369
+ DEBUGLOG(5, "input suspected incompressible : sampling to check");
1370
+ { unsigned maxSymbolValueBegin = maxSymbolValue;
1371
+ CHECK_V_F(largestBegin, HIST_count_simple (table->count, &maxSymbolValueBegin, (const BYTE*)src, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );
1372
+ largestTotal += largestBegin;
1373
+ }
1374
+ { unsigned maxSymbolValueEnd = maxSymbolValue;
1375
+ CHECK_V_F(largestEnd, HIST_count_simple (table->count, &maxSymbolValueEnd, (const BYTE*)src + srcSize - SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );
1376
+ largestTotal += largestEnd;
1377
+ }
1378
+ if (largestTotal <= ((2 * SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) >> 7)+4) return 0; /* heuristic : probably not compressible enough */
772
1379
  }
773
1380
 
774
1381
  /* Scan input and build symbol stats */
775
- { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace, wkspSize) );
1382
+ { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->wksps.hist_wksp, sizeof(table->wksps.hist_wksp)) );
776
1383
  if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
777
1384
  if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */
778
1385
  }
1386
+ DEBUGLOG(6, "histogram detail completed (%zu symbols)", showU32(table->count, maxSymbolValue+1));
779
1387
 
780
1388
  /* Check validity of previous table */
781
1389
  if ( repeat
@@ -784,26 +1392,25 @@ HUF_compress_internal (void* dst, size_t dstSize,
784
1392
  *repeat = HUF_repeat_none;
785
1393
  }
786
1394
  /* Heuristic : use existing table for small inputs */
787
- if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
1395
+ if ((flags & HUF_flags_preferRepeat) && repeat && *repeat != HUF_repeat_none) {
788
1396
  return HUF_compressCTable_internal(ostart, op, oend,
789
1397
  src, srcSize,
790
- nbStreams, oldHufTable, bmi2);
1398
+ nbStreams, oldHufTable, flags);
791
1399
  }
792
1400
 
793
1401
  /* Build Huffman Tree */
794
- huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
1402
+ huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue, &table->wksps, sizeof(table->wksps), table->CTable, table->count, flags);
795
1403
  { size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
796
1404
  maxSymbolValue, huffLog,
797
- &table->buildCTable_wksp, sizeof(table->buildCTable_wksp));
1405
+ &table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp));
798
1406
  CHECK_F(maxBits);
799
1407
  huffLog = (U32)maxBits;
800
- /* Zero unused symbols in CTable, so we can check it for validity */
801
- ZSTD_memset(table->CTable + (maxSymbolValue + 1), 0,
802
- sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt)));
1408
+ DEBUGLOG(6, "bit distribution completed (%zu symbols)", showCTableBits(table->CTable + 1, maxSymbolValue+1));
803
1409
  }
804
1410
 
805
1411
  /* Write table description header */
806
- { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) );
1412
+ { CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog,
1413
+ &table->wksps.writeCTable_wksp, sizeof(table->wksps.writeCTable_wksp)) );
807
1414
  /* Check if using previous huffman table is beneficial */
808
1415
  if (repeat && *repeat != HUF_repeat_none) {
809
1416
  size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue);
@@ -811,7 +1418,7 @@ HUF_compress_internal (void* dst, size_t dstSize,
811
1418
  if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
812
1419
  return HUF_compressCTable_internal(ostart, op, oend,
813
1420
  src, srcSize,
814
- nbStreams, oldHufTable, bmi2);
1421
+ nbStreams, oldHufTable, flags);
815
1422
  } }
816
1423
 
817
1424
  /* Use the new huffman table */
@@ -823,91 +1430,35 @@ HUF_compress_internal (void* dst, size_t dstSize,
823
1430
  }
824
1431
  return HUF_compressCTable_internal(ostart, op, oend,
825
1432
  src, srcSize,
826
- nbStreams, table->CTable, bmi2);
827
- }
828
-
829
-
830
- size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
831
- const void* src, size_t srcSize,
832
- unsigned maxSymbolValue, unsigned huffLog,
833
- void* workSpace, size_t wkspSize)
834
- {
835
- return HUF_compress_internal(dst, dstSize, src, srcSize,
836
- maxSymbolValue, huffLog, HUF_singleStream,
837
- workSpace, wkspSize,
838
- NULL, NULL, 0, 0 /*bmi2*/);
1433
+ nbStreams, table->CTable, flags);
839
1434
  }
840
1435
 
841
1436
  size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
842
1437
  const void* src, size_t srcSize,
843
1438
  unsigned maxSymbolValue, unsigned huffLog,
844
1439
  void* workSpace, size_t wkspSize,
845
- HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
1440
+ HUF_CElt* hufTable, HUF_repeat* repeat, int flags)
846
1441
  {
1442
+ DEBUGLOG(5, "HUF_compress1X_repeat (srcSize = %zu)", srcSize);
847
1443
  return HUF_compress_internal(dst, dstSize, src, srcSize,
848
1444
  maxSymbolValue, huffLog, HUF_singleStream,
849
1445
  workSpace, wkspSize, hufTable,
850
- repeat, preferRepeat, bmi2);
851
- }
852
-
853
- /* HUF_compress4X_repeat():
854
- * compress input using 4 streams.
855
- * provide workspace to generate compression tables */
856
- size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
857
- const void* src, size_t srcSize,
858
- unsigned maxSymbolValue, unsigned huffLog,
859
- void* workSpace, size_t wkspSize)
860
- {
861
- return HUF_compress_internal(dst, dstSize, src, srcSize,
862
- maxSymbolValue, huffLog, HUF_fourStreams,
863
- workSpace, wkspSize,
864
- NULL, NULL, 0, 0 /*bmi2*/);
1446
+ repeat, flags);
865
1447
  }
866
1448
 
867
1449
  /* HUF_compress4X_repeat():
868
1450
  * compress input using 4 streams.
869
- * re-use an existing huffman compression table */
1451
+ * consider skipping quickly
1452
+ * reuse an existing huffman compression table */
870
1453
  size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
871
1454
  const void* src, size_t srcSize,
872
1455
  unsigned maxSymbolValue, unsigned huffLog,
873
1456
  void* workSpace, size_t wkspSize,
874
- HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
1457
+ HUF_CElt* hufTable, HUF_repeat* repeat, int flags)
875
1458
  {
1459
+ DEBUGLOG(5, "HUF_compress4X_repeat (srcSize = %zu)", srcSize);
876
1460
  return HUF_compress_internal(dst, dstSize, src, srcSize,
877
1461
  maxSymbolValue, huffLog, HUF_fourStreams,
878
1462
  workSpace, wkspSize,
879
- hufTable, repeat, preferRepeat, bmi2);
1463
+ hufTable, repeat, flags);
880
1464
  }
881
-
882
- #ifndef ZSTD_NO_UNUSED_FUNCTIONS
883
- /** HUF_buildCTable() :
884
- * @return : maxNbBits
885
- * Note : count is used before tree is written, so they can safely overlap
886
- */
887
- size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits)
888
- {
889
- HUF_buildCTable_wksp_tables workspace;
890
- return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, &workspace, sizeof(workspace));
891
- }
892
-
893
- size_t HUF_compress1X (void* dst, size_t dstSize,
894
- const void* src, size_t srcSize,
895
- unsigned maxSymbolValue, unsigned huffLog)
896
- {
897
- unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
898
- return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
899
- }
900
-
901
- size_t HUF_compress2 (void* dst, size_t dstSize,
902
- const void* src, size_t srcSize,
903
- unsigned maxSymbolValue, unsigned huffLog)
904
- {
905
- unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
906
- return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
907
- }
908
-
909
- size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
910
- {
911
- return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT);
912
- }
913
- #endif