zstd-ruby 1.4.5.0 → 1.5.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +8 -0
  3. data/.github/workflows/ruby.yml +35 -0
  4. data/README.md +2 -2
  5. data/ext/zstdruby/extconf.rb +2 -1
  6. data/ext/zstdruby/libzstd/BUCK +5 -7
  7. data/ext/zstdruby/libzstd/Makefile +225 -222
  8. data/ext/zstdruby/libzstd/README.md +43 -5
  9. data/ext/zstdruby/libzstd/common/bitstream.h +46 -22
  10. data/ext/zstdruby/libzstd/common/compiler.h +182 -22
  11. data/ext/zstdruby/libzstd/common/cpu.h +1 -3
  12. data/ext/zstdruby/libzstd/common/debug.c +1 -1
  13. data/ext/zstdruby/libzstd/common/debug.h +12 -19
  14. data/ext/zstdruby/libzstd/common/entropy_common.c +196 -44
  15. data/ext/zstdruby/libzstd/common/error_private.c +2 -1
  16. data/ext/zstdruby/libzstd/common/error_private.h +82 -3
  17. data/ext/zstdruby/libzstd/common/fse.h +41 -12
  18. data/ext/zstdruby/libzstd/common/fse_decompress.c +139 -22
  19. data/ext/zstdruby/libzstd/common/huf.h +47 -23
  20. data/ext/zstdruby/libzstd/common/mem.h +87 -98
  21. data/ext/zstdruby/libzstd/common/pool.c +23 -17
  22. data/ext/zstdruby/libzstd/common/pool.h +2 -2
  23. data/ext/zstdruby/libzstd/common/portability_macros.h +131 -0
  24. data/ext/zstdruby/libzstd/common/threading.c +6 -5
  25. data/ext/zstdruby/libzstd/common/xxhash.c +6 -846
  26. data/ext/zstdruby/libzstd/common/xxhash.h +5568 -167
  27. data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
  28. data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
  29. data/ext/zstdruby/libzstd/common/zstd_internal.h +189 -142
  30. data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
  31. data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
  32. data/ext/zstdruby/libzstd/compress/fse_compress.c +89 -46
  33. data/ext/zstdruby/libzstd/compress/hist.c +27 -29
  34. data/ext/zstdruby/libzstd/compress/hist.h +2 -2
  35. data/ext/zstdruby/libzstd/compress/huf_compress.c +770 -198
  36. data/ext/zstdruby/libzstd/compress/zstd_compress.c +2894 -863
  37. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +390 -90
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +12 -11
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +4 -2
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +31 -8
  41. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +1 -1
  42. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +25 -297
  43. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +1 -1
  44. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +206 -69
  45. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +307 -132
  46. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +1 -1
  47. data/ext/zstdruby/libzstd/compress/zstd_fast.c +322 -143
  48. data/ext/zstdruby/libzstd/compress/zstd_fast.h +1 -1
  49. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1136 -174
  50. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +59 -1
  51. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +316 -213
  52. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +9 -2
  53. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
  54. data/ext/zstdruby/libzstd/compress/zstd_opt.c +373 -150
  55. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  56. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +152 -444
  57. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +31 -113
  58. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1044 -403
  59. data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +571 -0
  60. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +9 -9
  61. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +2 -2
  62. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +450 -105
  63. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +913 -273
  64. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +14 -5
  65. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +59 -12
  66. data/ext/zstdruby/libzstd/deprecated/zbuff.h +1 -1
  67. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +1 -1
  68. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +24 -4
  69. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
  70. data/ext/zstdruby/libzstd/dictBuilder/cover.c +55 -38
  71. data/ext/zstdruby/libzstd/dictBuilder/cover.h +7 -6
  72. data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
  73. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +43 -34
  74. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +128 -58
  75. data/ext/zstdruby/libzstd/dll/example/Makefile +1 -1
  76. data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
  77. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +1 -1
  78. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +8 -8
  79. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
  80. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +9 -9
  81. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
  82. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +9 -9
  83. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
  84. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +10 -10
  85. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
  86. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +13 -13
  87. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +1 -1
  88. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +13 -13
  89. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
  90. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +13 -13
  91. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
  92. data/ext/zstdruby/libzstd/libzstd.mk +185 -0
  93. data/ext/zstdruby/libzstd/libzstd.pc.in +4 -3
  94. data/ext/zstdruby/libzstd/modulemap/module.modulemap +4 -0
  95. data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +154 -7
  96. data/ext/zstdruby/libzstd/zstd.h +699 -214
  97. data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +2 -1
  98. data/ext/zstdruby/zstdruby.c +2 -2
  99. data/lib/zstd-ruby/version.rb +1 -1
  100. metadata +15 -6
  101. data/.travis.yml +0 -14
@@ -1,6 +1,6 @@
1
1
  /* ******************************************************************
2
2
  * Huffman encoder, part of New Generation Entropy library
3
- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
3
+ * Copyright (c) Yann Collet, Facebook, Inc.
4
4
  *
5
5
  * You can contact the author at :
6
6
  * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -23,8 +23,7 @@
23
23
  /* **************************************************************
24
24
  * Includes
25
25
  ****************************************************************/
26
- #include <string.h> /* memcpy, memset */
27
- #include <stdio.h> /* printf (debug) */
26
+ #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */
28
27
  #include "../common/compiler.h"
29
28
  #include "../common/bitstream.h"
30
29
  #include "hist.h"
@@ -54,13 +53,43 @@ unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS
54
53
  /* *******************************************************
55
54
  * HUF : Huffman block compression
56
55
  *********************************************************/
56
+ #define HUF_WORKSPACE_MAX_ALIGNMENT 8
57
+
58
+ static void* HUF_alignUpWorkspace(void* workspace, size_t* workspaceSizePtr, size_t align)
59
+ {
60
+ size_t const mask = align - 1;
61
+ size_t const rem = (size_t)workspace & mask;
62
+ size_t const add = (align - rem) & mask;
63
+ BYTE* const aligned = (BYTE*)workspace + add;
64
+ assert((align & (align - 1)) == 0); /* pow 2 */
65
+ assert(align <= HUF_WORKSPACE_MAX_ALIGNMENT);
66
+ if (*workspaceSizePtr >= add) {
67
+ assert(add < align);
68
+ assert(((size_t)aligned & mask) == 0);
69
+ *workspaceSizePtr -= add;
70
+ return aligned;
71
+ } else {
72
+ *workspaceSizePtr = 0;
73
+ return NULL;
74
+ }
75
+ }
76
+
77
+
57
78
  /* HUF_compressWeights() :
58
79
  * Same as FSE_compress(), but dedicated to huff0's weights compression.
59
80
  * The use case needs much less stack memory.
60
81
  * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
61
82
  */
62
83
  #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
63
- static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize)
84
+
85
+ typedef struct {
86
+ FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
87
+ U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)];
88
+ unsigned count[HUF_TABLELOG_MAX+1];
89
+ S16 norm[HUF_TABLELOG_MAX+1];
90
+ } HUF_CompressWeightsWksp;
91
+
92
+ static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightTable, size_t wtSize, void* workspace, size_t workspaceSize)
64
93
  {
65
94
  BYTE* const ostart = (BYTE*) dst;
66
95
  BYTE* op = ostart;
@@ -68,33 +97,30 @@ static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weight
68
97
 
69
98
  unsigned maxSymbolValue = HUF_TABLELOG_MAX;
70
99
  U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
100
+ HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32));
71
101
 
72
- FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
73
- BYTE scratchBuffer[1<<MAX_FSE_TABLELOG_FOR_HUFF_HEADER];
74
-
75
- unsigned count[HUF_TABLELOG_MAX+1];
76
- S16 norm[HUF_TABLELOG_MAX+1];
102
+ if (workspaceSize < sizeof(HUF_CompressWeightsWksp)) return ERROR(GENERIC);
77
103
 
78
104
  /* init conditions */
79
105
  if (wtSize <= 1) return 0; /* Not compressible */
80
106
 
81
107
  /* Scan input and build symbol stats */
82
- { unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize); /* never fails */
108
+ { unsigned const maxCount = HIST_count_simple(wksp->count, &maxSymbolValue, weightTable, wtSize); /* never fails */
83
109
  if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */
84
110
  if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
85
111
  }
86
112
 
87
113
  tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
88
- CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) );
114
+ CHECK_F( FSE_normalizeCount(wksp->norm, tableLog, wksp->count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) );
89
115
 
90
116
  /* Write table description header */
91
- { CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), norm, maxSymbolValue, tableLog) );
117
+ { CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), wksp->norm, maxSymbolValue, tableLog) );
92
118
  op += hSize;
93
119
  }
94
120
 
95
121
  /* Compress */
96
- CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) );
97
- { CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, CTable) );
122
+ CHECK_F( FSE_buildCTable_wksp(wksp->CTable, wksp->norm, maxSymbolValue, tableLog, wksp->scratchBuffer, sizeof(wksp->scratchBuffer)) );
123
+ { CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, wksp->CTable) );
98
124
  if (cSize == 0) return 0; /* not enough space for compressed data */
99
125
  op += cSize;
100
126
  }
@@ -102,35 +128,70 @@ static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weight
102
128
  return (size_t)(op-ostart);
103
129
  }
104
130
 
131
+ static size_t HUF_getNbBits(HUF_CElt elt)
132
+ {
133
+ return elt & 0xFF;
134
+ }
105
135
 
106
- struct HUF_CElt_s {
107
- U16 val;
108
- BYTE nbBits;
109
- }; /* typedef'd to HUF_CElt within "huf.h" */
136
+ static size_t HUF_getNbBitsFast(HUF_CElt elt)
137
+ {
138
+ return elt;
139
+ }
110
140
 
111
- /*! HUF_writeCTable() :
112
- `CTable` : Huffman tree to save, using huf representation.
113
- @return : size of saved CTable */
114
- size_t HUF_writeCTable (void* dst, size_t maxDstSize,
115
- const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
141
+ static size_t HUF_getValue(HUF_CElt elt)
142
+ {
143
+ return elt & ~0xFF;
144
+ }
145
+
146
+ static size_t HUF_getValueFast(HUF_CElt elt)
147
+ {
148
+ return elt;
149
+ }
150
+
151
+ static void HUF_setNbBits(HUF_CElt* elt, size_t nbBits)
116
152
  {
153
+ assert(nbBits <= HUF_TABLELOG_ABSOLUTEMAX);
154
+ *elt = nbBits;
155
+ }
156
+
157
+ static void HUF_setValue(HUF_CElt* elt, size_t value)
158
+ {
159
+ size_t const nbBits = HUF_getNbBits(*elt);
160
+ if (nbBits > 0) {
161
+ assert((value >> nbBits) == 0);
162
+ *elt |= value << (sizeof(HUF_CElt) * 8 - nbBits);
163
+ }
164
+ }
165
+
166
+ typedef struct {
167
+ HUF_CompressWeightsWksp wksp;
117
168
  BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */
118
169
  BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
170
+ } HUF_WriteCTableWksp;
171
+
172
+ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
173
+ const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog,
174
+ void* workspace, size_t workspaceSize)
175
+ {
176
+ HUF_CElt const* const ct = CTable + 1;
119
177
  BYTE* op = (BYTE*)dst;
120
178
  U32 n;
179
+ HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32));
121
180
 
122
- /* check conditions */
181
+ /* check conditions */
182
+ if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC);
123
183
  if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
124
184
 
125
185
  /* convert to weight */
126
- bitsToWeight[0] = 0;
186
+ wksp->bitsToWeight[0] = 0;
127
187
  for (n=1; n<huffLog+1; n++)
128
- bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
188
+ wksp->bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
129
189
  for (n=0; n<maxSymbolValue; n++)
130
- huffWeight[n] = bitsToWeight[CTable[n].nbBits];
190
+ wksp->huffWeight[n] = wksp->bitsToWeight[HUF_getNbBits(ct[n])];
131
191
 
132
192
  /* attempt weights compression by FSE */
133
- { CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, huffWeight, maxSymbolValue) );
193
+ if (maxDstSize < 1) return ERROR(dstSize_tooSmall);
194
+ { CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, wksp->huffWeight, maxSymbolValue, &wksp->wksp, sizeof(wksp->wksp)) );
134
195
  if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */
135
196
  op[0] = (BYTE)hSize;
136
197
  return hSize+1;
@@ -140,12 +201,22 @@ size_t HUF_writeCTable (void* dst, size_t maxDstSize,
140
201
  if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */
141
202
  if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */
142
203
  op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1));
143
- huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */
204
+ wksp->huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */
144
205
  for (n=0; n<maxSymbolValue; n+=2)
145
- op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
206
+ op[(n/2)+1] = (BYTE)((wksp->huffWeight[n] << 4) + wksp->huffWeight[n+1]);
146
207
  return ((maxSymbolValue+1)/2) + 1;
147
208
  }
148
209
 
210
+ /*! HUF_writeCTable() :
211
+ `CTable` : Huffman tree to save, using huf representation.
212
+ @return : size of saved CTable */
213
+ size_t HUF_writeCTable (void* dst, size_t maxDstSize,
214
+ const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
215
+ {
216
+ HUF_WriteCTableWksp wksp;
217
+ return HUF_writeCTable_wksp(dst, maxDstSize, CTable, maxSymbolValue, huffLog, &wksp, sizeof(wksp));
218
+ }
219
+
149
220
 
150
221
  size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights)
151
222
  {
@@ -153,34 +224,36 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
153
224
  U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */
154
225
  U32 tableLog = 0;
155
226
  U32 nbSymbols = 0;
227
+ HUF_CElt* const ct = CTable + 1;
156
228
 
157
229
  /* get symbol weights */
158
230
  CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize));
231
+ *hasZeroWeights = (rankVal[0] > 0);
159
232
 
160
233
  /* check result */
161
234
  if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
162
235
  if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);
163
236
 
237
+ CTable[0] = tableLog;
238
+
164
239
  /* Prepare base value per rank */
165
240
  { U32 n, nextRankStart = 0;
166
241
  for (n=1; n<=tableLog; n++) {
167
- U32 current = nextRankStart;
242
+ U32 curr = nextRankStart;
168
243
  nextRankStart += (rankVal[n] << (n-1));
169
- rankVal[n] = current;
244
+ rankVal[n] = curr;
170
245
  } }
171
246
 
172
247
  /* fill nbBits */
173
- *hasZeroWeights = 0;
174
248
  { U32 n; for (n=0; n<nbSymbols; n++) {
175
249
  const U32 w = huffWeight[n];
176
- *hasZeroWeights |= (w == 0);
177
- CTable[n].nbBits = (BYTE)(tableLog + 1 - w) & -(w != 0);
250
+ HUF_setNbBits(ct + n, (BYTE)(tableLog + 1 - w) & -(w != 0));
178
251
  } }
179
252
 
180
253
  /* fill val */
181
254
  { U16 nbPerRank[HUF_TABLELOG_MAX+2] = {0}; /* support w=0=>n=tableLog+1 */
182
255
  U16 valPerRank[HUF_TABLELOG_MAX+2] = {0};
183
- { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; }
256
+ { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[HUF_getNbBits(ct[n])]++; }
184
257
  /* determine stating value per rank */
185
258
  valPerRank[tableLog+1] = 0; /* for w==0 */
186
259
  { U16 min = 0;
@@ -190,18 +263,18 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
190
263
  min >>= 1;
191
264
  } }
192
265
  /* assign value within rank, symbol order */
193
- { U32 n; for (n=0; n<nbSymbols; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; }
266
+ { U32 n; for (n=0; n<nbSymbols; n++) HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); }
194
267
  }
195
268
 
196
269
  *maxSymbolValuePtr = nbSymbols - 1;
197
270
  return readSize;
198
271
  }
199
272
 
200
- U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue)
273
+ U32 HUF_getNbBitsFromCTable(HUF_CElt const* CTable, U32 symbolValue)
201
274
  {
202
- const HUF_CElt* table = (const HUF_CElt*)symbolTable;
275
+ const HUF_CElt* ct = CTable + 1;
203
276
  assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
204
- return table[symbolValue].nbBits;
277
+ return (U32)HUF_getNbBits(ct[symbolValue]);
205
278
  }
206
279
 
207
280
 
@@ -212,32 +285,63 @@ typedef struct nodeElt_s {
212
285
  BYTE nbBits;
213
286
  } nodeElt;
214
287
 
288
+ /**
289
+ * HUF_setMaxHeight():
290
+ * Enforces maxNbBits on the Huffman tree described in huffNode.
291
+ *
292
+ * It sets all nodes with nbBits > maxNbBits to be maxNbBits. Then it adjusts
293
+ * the tree to so that it is a valid canonical Huffman tree.
294
+ *
295
+ * @pre The sum of the ranks of each symbol == 2^largestBits,
296
+ * where largestBits == huffNode[lastNonNull].nbBits.
297
+ * @post The sum of the ranks of each symbol == 2^largestBits,
298
+ * where largestBits is the return value <= maxNbBits.
299
+ *
300
+ * @param huffNode The Huffman tree modified in place to enforce maxNbBits.
301
+ * @param lastNonNull The symbol with the lowest count in the Huffman tree.
302
+ * @param maxNbBits The maximum allowed number of bits, which the Huffman tree
303
+ * may not respect. After this function the Huffman tree will
304
+ * respect maxNbBits.
305
+ * @return The maximum number of bits of the Huffman tree after adjustment,
306
+ * necessarily no more than maxNbBits.
307
+ */
215
308
  static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
216
309
  {
217
310
  const U32 largestBits = huffNode[lastNonNull].nbBits;
218
- if (largestBits <= maxNbBits) return largestBits; /* early exit : no elt > maxNbBits */
311
+ /* early exit : no elt > maxNbBits, so the tree is already valid. */
312
+ if (largestBits <= maxNbBits) return largestBits;
219
313
 
220
314
  /* there are several too large elements (at least >= 2) */
221
315
  { int totalCost = 0;
222
316
  const U32 baseCost = 1 << (largestBits - maxNbBits);
223
317
  int n = (int)lastNonNull;
224
318
 
319
+ /* Adjust any ranks > maxNbBits to maxNbBits.
320
+ * Compute totalCost, which is how far the sum of the ranks is
321
+ * we are over 2^largestBits after adjust the offending ranks.
322
+ */
225
323
  while (huffNode[n].nbBits > maxNbBits) {
226
324
  totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
227
325
  huffNode[n].nbBits = (BYTE)maxNbBits;
228
- n --;
229
- } /* n stops at huffNode[n].nbBits <= maxNbBits */
230
- while (huffNode[n].nbBits == maxNbBits) n--; /* n end at index of smallest symbol using < maxNbBits */
326
+ n--;
327
+ }
328
+ /* n stops at huffNode[n].nbBits <= maxNbBits */
329
+ assert(huffNode[n].nbBits <= maxNbBits);
330
+ /* n end at index of smallest symbol using < maxNbBits */
331
+ while (huffNode[n].nbBits == maxNbBits) --n;
231
332
 
232
- /* renorm totalCost */
233
- totalCost >>= (largestBits - maxNbBits); /* note : totalCost is necessarily a multiple of baseCost */
333
+ /* renorm totalCost from 2^largestBits to 2^maxNbBits
334
+ * note : totalCost is necessarily a multiple of baseCost */
335
+ assert((totalCost & (baseCost - 1)) == 0);
336
+ totalCost >>= (largestBits - maxNbBits);
337
+ assert(totalCost > 0);
234
338
 
235
339
  /* repay normalized cost */
236
340
  { U32 const noSymbol = 0xF0F0F0F0;
237
341
  U32 rankLast[HUF_TABLELOG_MAX+2];
238
342
 
239
- /* Get pos of last (smallest) symbol per rank */
240
- memset(rankLast, 0xF0, sizeof(rankLast));
343
+ /* Get pos of last (smallest = lowest cum. count) symbol per rank */
344
+ ZSTD_memset(rankLast, 0xF0, sizeof(rankLast));
241
345
  { U32 currentNbBits = maxNbBits;
242
346
  int pos;
243
347
  for (pos=n ; pos >= 0; pos--) {
@@ -247,34 +351,65 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
247
351
  } }
248
352
 
249
353
  while (totalCost > 0) {
354
+ /* Try to reduce the next power of 2 above totalCost because we
355
+ * gain back half the rank.
356
+ */
250
357
  U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1;
251
358
  for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
252
359
  U32 const highPos = rankLast[nBitsToDecrease];
253
360
  U32 const lowPos = rankLast[nBitsToDecrease-1];
254
361
  if (highPos == noSymbol) continue;
362
+ /* Decrease highPos if no symbols of lowPos or if it is
363
+ * not cheaper to remove 2 lowPos than highPos.
364
+ */
255
365
  if (lowPos == noSymbol) break;
256
366
  { U32 const highTotal = huffNode[highPos].count;
257
367
  U32 const lowTotal = 2 * huffNode[lowPos].count;
258
368
  if (highTotal <= lowTotal) break;
259
369
  } }
260
370
  /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
371
+ assert(rankLast[nBitsToDecrease] != noSymbol || nBitsToDecrease == 1);
261
372
  /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
262
373
  while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))
263
- nBitsToDecrease ++;
374
+ nBitsToDecrease++;
375
+ assert(rankLast[nBitsToDecrease] != noSymbol);
376
+ /* Increase the number of bits to gain back half the rank cost. */
264
377
  totalCost -= 1 << (nBitsToDecrease-1);
378
+ huffNode[rankLast[nBitsToDecrease]].nbBits++;
379
+
380
+ /* Fix up the new rank.
381
+ * If the new rank was empty, this symbol is now its smallest.
382
+ * Otherwise, this symbol will be the largest in the new rank so no adjustment.
383
+ */
265
384
  if (rankLast[nBitsToDecrease-1] == noSymbol)
266
- rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; /* this rank is no longer empty */
267
- huffNode[rankLast[nBitsToDecrease]].nbBits ++;
385
+ rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease];
386
+ /* Fix up the old rank.
387
+ * If the symbol was at position 0, meaning it was the highest weight symbol in the tree,
388
+ * it must be the only symbol in its rank, so the old rank now has no symbols.
389
+ * Otherwise, since the Huffman nodes are sorted by count, the previous position is now
390
+ * the smallest node in the rank. If the previous position belongs to a different rank,
391
+ * then the rank is now empty.
392
+ */
268
393
  if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */
269
394
  rankLast[nBitsToDecrease] = noSymbol;
270
395
  else {
271
396
  rankLast[nBitsToDecrease]--;
272
397
  if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease)
273
398
  rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */
274
- } } /* while (totalCost > 0) */
275
-
399
+ }
400
+ } /* while (totalCost > 0) */
401
+
402
+ /* If we've removed too much weight, then we have to add it back.
403
+ * To avoid overshooting again, we only adjust the smallest rank.
404
+ * We take the largest nodes from the lowest rank 0 and move them
405
+ * to rank 1. There's guaranteed to be enough rank 0 symbols because
406
+ * TODO.
407
+ */
276
408
  while (totalCost < 0) { /* Sometimes, cost correction overshoot */
277
- if (rankLast[1] == noSymbol) { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */
409
+ /* special case : no rank 1 symbol (using maxNbBits-1);
410
+ * let's create one from largest rank 0 (using maxNbBits).
411
+ */
412
+ if (rankLast[1] == noSymbol) {
278
413
  while (huffNode[n].nbBits == maxNbBits) n--;
279
414
  huffNode[n+1].nbBits--;
280
415
  assert(n >= 0);
@@ -285,49 +420,179 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
285
420
  huffNode[ rankLast[1] + 1 ].nbBits--;
286
421
  rankLast[1]++;
287
422
  totalCost ++;
288
- } } } /* there are several too large elements (at least >= 2) */
423
+ }
424
+ } /* repay normalized cost */
425
+ } /* there are several too large elements (at least >= 2) */
289
426
 
290
427
  return maxNbBits;
291
428
  }
292
429
 
293
430
  typedef struct {
294
- U32 base;
295
- U32 current;
431
+ U16 base;
432
+ U16 curr;
296
433
  } rankPos;
297
434
 
298
435
  typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
299
436
 
300
- #define RANK_POSITION_TABLE_SIZE 32
437
+ /* Number of buckets available for HUF_sort() */
438
+ #define RANK_POSITION_TABLE_SIZE 192
301
439
 
302
440
  typedef struct {
303
441
  huffNodeTable huffNodeTbl;
304
442
  rankPos rankPosition[RANK_POSITION_TABLE_SIZE];
305
443
  } HUF_buildCTable_wksp_tables;
306
444
 
307
- static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue, rankPos* rankPosition)
308
- {
445
+ /* RANK_POSITION_DISTINCT_COUNT_CUTOFF == Cutoff point in HUF_sort() buckets for which we use log2 bucketing.
446
+ * Strategy is to use as many buckets as possible for representing distinct
447
+ * counts while using the remainder to represent all "large" counts.
448
+ *
449
+ * To satisfy this requirement for 192 buckets, we can do the following:
450
+ * Let buckets 0-166 represent distinct counts of [0, 166]
451
+ * Let buckets 166 to 192 represent all remaining counts up to RANK_POSITION_MAX_COUNT_LOG using log2 bucketing.
452
+ */
453
+ #define RANK_POSITION_MAX_COUNT_LOG 32
454
+ #define RANK_POSITION_LOG_BUCKETS_BEGIN (RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */
455
+ #define RANK_POSITION_DISTINCT_COUNT_CUTOFF RANK_POSITION_LOG_BUCKETS_BEGIN + BIT_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */
456
+
457
+ /* Return the appropriate bucket index for a given count. See definition of
458
+ * RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy.
459
+ */
460
+ static U32 HUF_getIndex(U32 const count) {
461
+ return (count < RANK_POSITION_DISTINCT_COUNT_CUTOFF)
462
+ ? count
463
+ : BIT_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN;
464
+ }
465
+
466
+ /* Helper swap function for HUF_quickSortPartition() */
467
+ static void HUF_swapNodes(nodeElt* a, nodeElt* b) {
468
+ nodeElt tmp = *a;
469
+ *a = *b;
470
+ *b = tmp;
471
+ }
472
+
473
+ /* Returns 0 if the huffNode array is not sorted by descending count */
474
+ MEM_STATIC int HUF_isSorted(nodeElt huffNode[], U32 const maxSymbolValue1) {
475
+ U32 i;
476
+ for (i = 1; i < maxSymbolValue1; ++i) {
477
+ if (huffNode[i].count > huffNode[i-1].count) {
478
+ return 0;
479
+ }
480
+ }
481
+ return 1;
482
+ }
483
+
484
+ /* Insertion sort by descending order */
485
+ HINT_INLINE void HUF_insertionSort(nodeElt huffNode[], int const low, int const high) {
486
+ int i;
487
+ int const size = high-low+1;
488
+ huffNode += low;
489
+ for (i = 1; i < size; ++i) {
490
+ nodeElt const key = huffNode[i];
491
+ int j = i - 1;
492
+ while (j >= 0 && huffNode[j].count < key.count) {
493
+ huffNode[j + 1] = huffNode[j];
494
+ j--;
495
+ }
496
+ huffNode[j + 1] = key;
497
+ }
498
+ }
499
+
500
+ /* Pivot helper function for quicksort. */
501
+ static int HUF_quickSortPartition(nodeElt arr[], int const low, int const high) {
502
+ /* Simply select rightmost element as pivot. "Better" selectors like
503
+ * median-of-three don't experimentally appear to have any benefit.
504
+ */
505
+ U32 const pivot = arr[high].count;
506
+ int i = low - 1;
507
+ int j = low;
508
+ for ( ; j < high; j++) {
509
+ if (arr[j].count > pivot) {
510
+ i++;
511
+ HUF_swapNodes(&arr[i], &arr[j]);
512
+ }
513
+ }
514
+ HUF_swapNodes(&arr[i + 1], &arr[high]);
515
+ return i + 1;
516
+ }
517
+
518
+ /* Classic quicksort by descending with partially iterative calls
519
+ * to reduce worst case callstack size.
520
+ */
521
+ static void HUF_simpleQuickSort(nodeElt arr[], int low, int high) {
522
+ int const kInsertionSortThreshold = 8;
523
+ if (high - low < kInsertionSortThreshold) {
524
+ HUF_insertionSort(arr, low, high);
525
+ return;
526
+ }
527
+ while (low < high) {
528
+ int const idx = HUF_quickSortPartition(arr, low, high);
529
+ if (idx - low < high - idx) {
530
+ HUF_simpleQuickSort(arr, low, idx - 1);
531
+ low = idx + 1;
532
+ } else {
533
+ HUF_simpleQuickSort(arr, idx + 1, high);
534
+ high = idx - 1;
535
+ }
536
+ }
537
+ }
538
+
539
+ /**
540
+ * HUF_sort():
541
+ * Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order.
542
+ * This is a typical bucket sorting strategy that uses either quicksort or insertion sort to sort each bucket.
543
+ *
544
+ * @param[out] huffNode Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled.
545
+ * Must have (maxSymbolValue + 1) entries.
546
+ * @param[in] count Histogram of the symbols.
547
+ * @param[in] maxSymbolValue Maximum symbol value.
548
+ * @param rankPosition This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries.
549
+ */
550
+ static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSymbolValue, rankPos rankPosition[]) {
309
551
  U32 n;
552
+ U32 const maxSymbolValue1 = maxSymbolValue+1;
553
+
554
+ /* Compute base and set curr to base.
555
+ * For symbol s let lowerRank = HUF_getIndex(count[n]) and rank = lowerRank + 1.
556
+ * See HUF_getIndex to see bucketing strategy.
557
+ * We attribute each symbol to lowerRank's base value, because we want to know where
558
+ * each rank begins in the output, so for rank R we want to count ranks R+1 and above.
559
+ */
560
+ ZSTD_memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE);
561
+ for (n = 0; n < maxSymbolValue1; ++n) {
562
+ U32 lowerRank = HUF_getIndex(count[n]);
563
+ assert(lowerRank < RANK_POSITION_TABLE_SIZE - 1);
564
+ rankPosition[lowerRank].base++;
565
+ }
310
566
 
311
- memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE);
312
- for (n=0; n<=maxSymbolValue; n++) {
313
- U32 r = BIT_highbit32(count[n] + 1);
314
- rankPosition[r].base ++;
567
+ assert(rankPosition[RANK_POSITION_TABLE_SIZE - 1].base == 0);
568
+ /* Set up the rankPosition table */
569
+ for (n = RANK_POSITION_TABLE_SIZE - 1; n > 0; --n) {
570
+ rankPosition[n-1].base += rankPosition[n].base;
571
+ rankPosition[n-1].curr = rankPosition[n-1].base;
315
572
  }
316
- for (n=30; n>0; n--) rankPosition[n-1].base += rankPosition[n].base;
317
- for (n=0; n<32; n++) rankPosition[n].current = rankPosition[n].base;
318
- for (n=0; n<=maxSymbolValue; n++) {
573
+
574
+ /* Insert each symbol into their appropriate bucket, setting up rankPosition table. */
575
+ for (n = 0; n < maxSymbolValue1; ++n) {
319
576
  U32 const c = count[n];
320
- U32 const r = BIT_highbit32(c+1) + 1;
321
- U32 pos = rankPosition[r].current++;
322
- while ((pos > rankPosition[r].base) && (c > huffNode[pos-1].count)) {
323
- huffNode[pos] = huffNode[pos-1];
324
- pos--;
325
- }
577
+ U32 const r = HUF_getIndex(c) + 1;
578
+ U32 const pos = rankPosition[r].curr++;
579
+ assert(pos < maxSymbolValue1);
326
580
  huffNode[pos].count = c;
327
581
  huffNode[pos].byte = (BYTE)n;
328
582
  }
329
- }
330
583
 
584
+ /* Sort each bucket. */
585
+ for (n = RANK_POSITION_DISTINCT_COUNT_CUTOFF; n < RANK_POSITION_TABLE_SIZE - 1; ++n) {
586
+ U32 const bucketSize = rankPosition[n].curr-rankPosition[n].base;
587
+ U32 const bucketStartIdx = rankPosition[n].base;
588
+ if (bucketSize > 1) {
589
+ assert(bucketStartIdx < maxSymbolValue1);
590
+ HUF_simpleQuickSort(huffNode + bucketStartIdx, 0, bucketSize-1);
591
+ }
592
+ }
593
+
594
+ assert(HUF_isSorted(huffNode, maxSymbolValue1));
595
+ }
331
596
 
332
597
  /** HUF_buildCTable_wksp() :
333
598
  * Same as HUF_buildCTable(), but using externally allocated scratch buffer.
@@ -335,28 +600,20 @@ static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValu
335
600
  */
336
601
  #define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
337
602
 
338
- size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
603
+ /* HUF_buildTree():
604
+ * Takes the huffNode array sorted by HUF_sort() and builds an unlimited-depth Huffman tree.
605
+ *
606
+ * @param huffNode The array sorted by HUF_sort(). Builds the Huffman tree in this array.
607
+ * @param maxSymbolValue The maximum symbol value.
608
+ * @return The smallest node in the Huffman tree (by count).
609
+ */
610
+ static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
339
611
  {
340
- HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)workSpace;
341
- nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
342
- nodeElt* const huffNode = huffNode0+1;
612
+ nodeElt* const huffNode0 = huffNode - 1;
343
613
  int nonNullRank;
344
614
  int lowS, lowN;
345
615
  int nodeNb = STARTNODE;
346
616
  int n, nodeRoot;
347
-
348
- /* safety checks */
349
- if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
350
- if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
351
- return ERROR(workSpace_tooSmall);
352
- if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
353
- if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
354
- return ERROR(maxSymbolValue_tooLarge);
355
- memset(huffNode0, 0, sizeof(huffNodeTable));
356
-
357
- /* sort, decreasing order */
358
- HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition);
359
-
360
617
  /* init for parents */
361
618
  nonNullRank = (int)maxSymbolValue;
362
619
  while(huffNode[nonNullRank].count == 0) nonNullRank--;
@@ -383,127 +640,396 @@ size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbo
383
640
  for (n=0; n<=nonNullRank; n++)
384
641
  huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
385
642
 
643
+ return nonNullRank;
644
+ }
645
+
646
+ /**
647
+ * HUF_buildCTableFromTree():
648
+ * Build the CTable given the Huffman tree in huffNode.
649
+ *
650
+ * @param[out] CTable The output Huffman CTable.
651
+ * @param huffNode The Huffman tree.
652
+ * @param nonNullRank The last and smallest node in the Huffman tree.
653
+ * @param maxSymbolValue The maximum symbol value.
654
+ * @param maxNbBits The exact maximum number of bits used in the Huffman tree.
655
+ */
656
+ static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits)
657
+ {
658
+ HUF_CElt* const ct = CTable + 1;
659
+ /* fill result into ctable (val, nbBits) */
660
+ int n;
661
+ U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
662
+ U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
663
+ int const alphabetSize = (int)(maxSymbolValue + 1);
664
+ for (n=0; n<=nonNullRank; n++)
665
+ nbPerRank[huffNode[n].nbBits]++;
666
+ /* determine starting value per rank */
667
+ { U16 min = 0;
668
+ for (n=(int)maxNbBits; n>0; n--) {
669
+ valPerRank[n] = min; /* get starting value within each rank */
670
+ min += nbPerRank[n];
671
+ min >>= 1;
672
+ } }
673
+ for (n=0; n<alphabetSize; n++)
674
+ HUF_setNbBits(ct + huffNode[n].byte, huffNode[n].nbBits); /* push nbBits per symbol, symbol order */
675
+ for (n=0; n<alphabetSize; n++)
676
+ HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); /* assign value within rank, symbol order */
677
+ CTable[0] = maxNbBits;
678
+ }
679
+
680
+ size_t HUF_buildCTable_wksp (HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
681
+ {
682
+ HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(U32));
683
+ nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
684
+ nodeElt* const huffNode = huffNode0+1;
685
+ int nonNullRank;
686
+
687
+ /* safety checks */
688
+ if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
689
+ return ERROR(workSpace_tooSmall);
690
+ if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
691
+ if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
692
+ return ERROR(maxSymbolValue_tooLarge);
693
+ ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable));
694
+
695
+ /* sort, decreasing order */
696
+ HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition);
697
+
698
+ /* build tree */
699
+ nonNullRank = HUF_buildTree(huffNode, maxSymbolValue);
700
+
386
701
  /* enforce maxTableLog */
387
702
  maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);
703
+ if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */
388
704
 
389
- /* fill result into tree (val, nbBits) */
390
- { U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
391
- U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
392
- int const alphabetSize = (int)(maxSymbolValue + 1);
393
- if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */
394
- for (n=0; n<=nonNullRank; n++)
395
- nbPerRank[huffNode[n].nbBits]++;
396
- /* determine stating value per rank */
397
- { U16 min = 0;
398
- for (n=(int)maxNbBits; n>0; n--) {
399
- valPerRank[n] = min; /* get starting value within each rank */
400
- min += nbPerRank[n];
401
- min >>= 1;
402
- } }
403
- for (n=0; n<alphabetSize; n++)
404
- tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */
405
- for (n=0; n<alphabetSize; n++)
406
- tree[n].val = valPerRank[tree[n].nbBits]++; /* assign value within rank, symbol order */
407
- }
705
+ HUF_buildCTableFromTree(CTable, huffNode, nonNullRank, maxSymbolValue, maxNbBits);
408
706
 
409
707
  return maxNbBits;
410
708
  }
411
709
 
412
- /** HUF_buildCTable() :
413
- * @return : maxNbBits
414
- * Note : count is used before tree is written, so they can safely overlap
415
- */
416
- size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits)
417
- {
418
- HUF_buildCTable_wksp_tables workspace;
419
- return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, &workspace, sizeof(workspace));
420
- }
421
-
422
710
  size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
423
711
  {
712
+ HUF_CElt const* ct = CTable + 1;
424
713
  size_t nbBits = 0;
425
714
  int s;
426
715
  for (s = 0; s <= (int)maxSymbolValue; ++s) {
427
- nbBits += CTable[s].nbBits * count[s];
716
+ nbBits += HUF_getNbBits(ct[s]) * count[s];
428
717
  }
429
718
  return nbBits >> 3;
430
719
  }
431
720
 
432
721
  int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
722
+ HUF_CElt const* ct = CTable + 1;
433
723
  int bad = 0;
434
724
  int s;
435
725
  for (s = 0; s <= (int)maxSymbolValue; ++s) {
436
- bad |= (count[s] != 0) & (CTable[s].nbBits == 0);
726
+ bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0);
437
727
  }
438
728
  return !bad;
439
729
  }
440
730
 
441
731
  size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
442
732
 
733
+ /** HUF_CStream_t:
734
+ * Huffman uses its own BIT_CStream_t implementation.
735
+ * There are three major differences from BIT_CStream_t:
736
+ * 1. HUF_addBits() takes a HUF_CElt (size_t) which is
737
+ * the pair (nbBits, value) in the format:
738
+ * format:
739
+ * - Bits [0, 4) = nbBits
740
+ * - Bits [4, 64 - nbBits) = 0
741
+ * - Bits [64 - nbBits, 64) = value
742
+ * 2. The bitContainer is built from the upper bits and
743
+ * right shifted. E.g. to add a new value of N bits
744
+ * you right shift the bitContainer by N, then or in
745
+ * the new value into the N upper bits.
746
+ * 3. The bitstream has two bit containers. You can add
747
+ * bits to the second container and merge them into
748
+ * the first container.
749
+ */
750
+
751
+ #define HUF_BITS_IN_CONTAINER (sizeof(size_t) * 8)
752
+
753
+ typedef struct {
754
+ size_t bitContainer[2];
755
+ size_t bitPos[2];
756
+
757
+ BYTE* startPtr;
758
+ BYTE* ptr;
759
+ BYTE* endPtr;
760
+ } HUF_CStream_t;
761
+
762
+ /**! HUF_initCStream():
763
+ * Initializes the bitstream.
764
+ * @returns 0 or an error code.
765
+ */
766
+ static size_t HUF_initCStream(HUF_CStream_t* bitC,
767
+ void* startPtr, size_t dstCapacity)
768
+ {
769
+ ZSTD_memset(bitC, 0, sizeof(*bitC));
770
+ bitC->startPtr = (BYTE*)startPtr;
771
+ bitC->ptr = bitC->startPtr;
772
+ bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer[0]);
773
+ if (dstCapacity <= sizeof(bitC->bitContainer[0])) return ERROR(dstSize_tooSmall);
774
+ return 0;
775
+ }
776
+
777
+ /*! HUF_addBits():
778
+ * Adds the symbol stored in HUF_CElt elt to the bitstream.
779
+ *
780
+ * @param elt The element we're adding. This is a (nbBits, value) pair.
781
+ * See the HUF_CStream_t docs for the format.
782
+ * @param idx Insert into the bitstream at this idx.
783
+ * @param kFast This is a template parameter. If the bitstream is guaranteed
784
+ * to have at least 4 unused bits after this call it may be 1,
785
+ * otherwise it must be 0. HUF_addBits() is faster when fast is set.
786
+ */
787
+ FORCE_INLINE_TEMPLATE void HUF_addBits(HUF_CStream_t* bitC, HUF_CElt elt, int idx, int kFast)
788
+ {
789
+ assert(idx <= 1);
790
+ assert(HUF_getNbBits(elt) <= HUF_TABLELOG_ABSOLUTEMAX);
791
+ /* This is efficient on x86-64 with BMI2 because shrx
792
+ * only reads the low 6 bits of the register. The compiler
793
+ * knows this and elides the mask. When fast is set,
794
+ * every operation can use the same value loaded from elt.
795
+ */
796
+ bitC->bitContainer[idx] >>= HUF_getNbBits(elt);
797
+ bitC->bitContainer[idx] |= kFast ? HUF_getValueFast(elt) : HUF_getValue(elt);
798
+ /* We only read the low 8 bits of bitC->bitPos[idx] so it
799
+ * doesn't matter that the high bits have noise from the value.
800
+ */
801
+ bitC->bitPos[idx] += HUF_getNbBitsFast(elt);
802
+ assert((bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER);
803
+ /* The last 4-bits of elt are dirty if fast is set,
804
+ * so we must not be overwriting bits that have already been
805
+ * inserted into the bit container.
806
+ */
807
+ #if DEBUGLEVEL >= 1
808
+ {
809
+ size_t const nbBits = HUF_getNbBits(elt);
810
+ size_t const dirtyBits = nbBits == 0 ? 0 : BIT_highbit32((U32)nbBits) + 1;
811
+ (void)dirtyBits;
812
+ /* Middle bits are 0. */
813
+ assert(((elt >> dirtyBits) << (dirtyBits + nbBits)) == 0);
814
+ /* We didn't overwrite any bits in the bit container. */
815
+ assert(!kFast || (bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER);
816
+ (void)dirtyBits;
817
+ }
818
+ #endif
819
+ }
820
+
821
+ FORCE_INLINE_TEMPLATE void HUF_zeroIndex1(HUF_CStream_t* bitC)
822
+ {
823
+ bitC->bitContainer[1] = 0;
824
+ bitC->bitPos[1] = 0;
825
+ }
826
+
827
+ /*! HUF_mergeIndex1() :
828
+ * Merges the bit container @ index 1 into the bit container @ index 0
829
+ * and zeros the bit container @ index 1.
830
+ */
831
+ FORCE_INLINE_TEMPLATE void HUF_mergeIndex1(HUF_CStream_t* bitC)
832
+ {
833
+ assert((bitC->bitPos[1] & 0xFF) < HUF_BITS_IN_CONTAINER);
834
+ bitC->bitContainer[0] >>= (bitC->bitPos[1] & 0xFF);
835
+ bitC->bitContainer[0] |= bitC->bitContainer[1];
836
+ bitC->bitPos[0] += bitC->bitPos[1];
837
+ assert((bitC->bitPos[0] & 0xFF) <= HUF_BITS_IN_CONTAINER);
838
+ }
839
+
840
+ /*! HUF_flushBits() :
841
+ * Flushes the bits in the bit container @ index 0.
842
+ *
843
+ * @post bitPos will be < 8.
844
+ * @param kFast If kFast is set then we must know a-priori that
845
+ * the bit container will not overflow.
846
+ */
847
+ FORCE_INLINE_TEMPLATE void HUF_flushBits(HUF_CStream_t* bitC, int kFast)
848
+ {
849
+ /* The upper bits of bitPos are noisy, so we must mask by 0xFF. */
850
+ size_t const nbBits = bitC->bitPos[0] & 0xFF;
851
+ size_t const nbBytes = nbBits >> 3;
852
+ /* The top nbBits bits of bitContainer are the ones we need. */
853
+ size_t const bitContainer = bitC->bitContainer[0] >> (HUF_BITS_IN_CONTAINER - nbBits);
854
+ /* Mask bitPos to account for the bytes we consumed. */
855
+ bitC->bitPos[0] &= 7;
856
+ assert(nbBits > 0);
857
+ assert(nbBits <= sizeof(bitC->bitContainer[0]) * 8);
858
+ assert(bitC->ptr <= bitC->endPtr);
859
+ MEM_writeLEST(bitC->ptr, bitContainer);
860
+ bitC->ptr += nbBytes;
861
+ assert(!kFast || bitC->ptr <= bitC->endPtr);
862
+ if (!kFast && bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
863
+ /* bitContainer doesn't need to be modified because the leftover
864
+ * bits are already the top bitPos bits. And we don't care about
865
+ * noise in the lower values.
866
+ */
867
+ }
868
+
869
+ /*! HUF_endMark()
870
+ * @returns The Huffman stream end mark: A 1-bit value = 1.
871
+ */
872
+ static HUF_CElt HUF_endMark(void)
873
+ {
874
+ HUF_CElt endMark;
875
+ HUF_setNbBits(&endMark, 1);
876
+ HUF_setValue(&endMark, 1);
877
+ return endMark;
878
+ }
879
+
880
+ /*! HUF_closeCStream() :
881
+ * @return Size of CStream, in bytes,
882
+ * or 0 if it could not fit into dstBuffer */
883
+ static size_t HUF_closeCStream(HUF_CStream_t* bitC)
884
+ {
885
+ HUF_addBits(bitC, HUF_endMark(), /* idx */ 0, /* kFast */ 0);
886
+ HUF_flushBits(bitC, /* kFast */ 0);
887
+ {
888
+ size_t const nbBits = bitC->bitPos[0] & 0xFF;
889
+ if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
890
+ return (bitC->ptr - bitC->startPtr) + (nbBits > 0);
891
+ }
892
+ }
893
+
443
894
  FORCE_INLINE_TEMPLATE void
444
- HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
895
+ HUF_encodeSymbol(HUF_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable, int idx, int fast)
445
896
  {
446
- BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
897
+ HUF_addBits(bitCPtr, CTable[symbol], idx, fast);
447
898
  }
448
899
 
449
- #define HUF_FLUSHBITS(s) BIT_flushBits(s)
900
+ FORCE_INLINE_TEMPLATE void
901
+ HUF_compress1X_usingCTable_internal_body_loop(HUF_CStream_t* bitC,
902
+ const BYTE* ip, size_t srcSize,
903
+ const HUF_CElt* ct,
904
+ int kUnroll, int kFastFlush, int kLastFast)
905
+ {
906
+ /* Join to kUnroll */
907
+ int n = (int)srcSize;
908
+ int rem = n % kUnroll;
909
+ if (rem > 0) {
910
+ for (; rem > 0; --rem) {
911
+ HUF_encodeSymbol(bitC, ip[--n], ct, 0, /* fast */ 0);
912
+ }
913
+ HUF_flushBits(bitC, kFastFlush);
914
+ }
915
+ assert(n % kUnroll == 0);
916
+
917
+ /* Join to 2 * kUnroll */
918
+ if (n % (2 * kUnroll)) {
919
+ int u;
920
+ for (u = 1; u < kUnroll; ++u) {
921
+ HUF_encodeSymbol(bitC, ip[n - u], ct, 0, 1);
922
+ }
923
+ HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, 0, kLastFast);
924
+ HUF_flushBits(bitC, kFastFlush);
925
+ n -= kUnroll;
926
+ }
927
+ assert(n % (2 * kUnroll) == 0);
450
928
 
451
- #define HUF_FLUSHBITS_1(stream) \
452
- if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream)
929
+ for (; n>0; n-= 2 * kUnroll) {
930
+ /* Encode kUnroll symbols into the bitstream @ index 0. */
931
+ int u;
932
+ for (u = 1; u < kUnroll; ++u) {
933
+ HUF_encodeSymbol(bitC, ip[n - u], ct, /* idx */ 0, /* fast */ 1);
934
+ }
935
+ HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, /* idx */ 0, /* fast */ kLastFast);
936
+ HUF_flushBits(bitC, kFastFlush);
937
+ /* Encode kUnroll symbols into the bitstream @ index 1.
938
+ * This allows us to start filling the bit container
939
+ * without any data dependencies.
940
+ */
941
+ HUF_zeroIndex1(bitC);
942
+ for (u = 1; u < kUnroll; ++u) {
943
+ HUF_encodeSymbol(bitC, ip[n - kUnroll - u], ct, /* idx */ 1, /* fast */ 1);
944
+ }
945
+ HUF_encodeSymbol(bitC, ip[n - kUnroll - kUnroll], ct, /* idx */ 1, /* fast */ kLastFast);
946
+ /* Merge bitstream @ index 1 into the bitstream @ index 0 */
947
+ HUF_mergeIndex1(bitC);
948
+ HUF_flushBits(bitC, kFastFlush);
949
+ }
950
+ assert(n == 0);
951
+
952
+ }
953
+
954
+ /**
955
+ * Returns a tight upper bound on the output space needed by Huffman
956
+ * with 8 bytes buffer to handle over-writes. If the output is at least
957
+ * this large we don't need to do bounds checks during Huffman encoding.
958
+ */
959
+ static size_t HUF_tightCompressBound(size_t srcSize, size_t tableLog)
960
+ {
961
+ return ((srcSize * tableLog) >> 3) + 8;
962
+ }
453
963
 
454
- #define HUF_FLUSHBITS_2(stream) \
455
- if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream)
456
964
 
457
965
  FORCE_INLINE_TEMPLATE size_t
458
966
  HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
459
967
  const void* src, size_t srcSize,
460
968
  const HUF_CElt* CTable)
461
969
  {
970
+ U32 const tableLog = (U32)CTable[0];
971
+ HUF_CElt const* ct = CTable + 1;
462
972
  const BYTE* ip = (const BYTE*) src;
463
973
  BYTE* const ostart = (BYTE*)dst;
464
974
  BYTE* const oend = ostart + dstSize;
465
975
  BYTE* op = ostart;
466
- size_t n;
467
- BIT_CStream_t bitC;
976
+ HUF_CStream_t bitC;
468
977
 
469
978
  /* init */
470
979
  if (dstSize < 8) return 0; /* not enough space to compress */
471
- { size_t const initErr = BIT_initCStream(&bitC, op, (size_t)(oend-op));
980
+ { size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op));
472
981
  if (HUF_isError(initErr)) return 0; }
473
982
 
474
- n = srcSize & ~3; /* join to mod 4 */
475
- switch (srcSize & 3)
476
- {
477
- case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable);
478
- HUF_FLUSHBITS_2(&bitC);
479
- /* fall-through */
480
- case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable);
481
- HUF_FLUSHBITS_1(&bitC);
482
- /* fall-through */
483
- case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable);
484
- HUF_FLUSHBITS(&bitC);
485
- /* fall-through */
486
- case 0 : /* fall-through */
487
- default: break;
488
- }
489
-
490
- for (; n>0; n-=4) { /* note : n&3==0 at this stage */
491
- HUF_encodeSymbol(&bitC, ip[n- 1], CTable);
492
- HUF_FLUSHBITS_1(&bitC);
493
- HUF_encodeSymbol(&bitC, ip[n- 2], CTable);
494
- HUF_FLUSHBITS_2(&bitC);
495
- HUF_encodeSymbol(&bitC, ip[n- 3], CTable);
496
- HUF_FLUSHBITS_1(&bitC);
497
- HUF_encodeSymbol(&bitC, ip[n- 4], CTable);
498
- HUF_FLUSHBITS(&bitC);
983
+ if (dstSize < HUF_tightCompressBound(srcSize, (size_t)tableLog) || tableLog > 11)
984
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ MEM_32bits() ? 2 : 4, /* kFast */ 0, /* kLastFast */ 0);
985
+ else {
986
+ if (MEM_32bits()) {
987
+ switch (tableLog) {
988
+ case 11:
989
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 0);
990
+ break;
991
+ case 10: ZSTD_FALLTHROUGH;
992
+ case 9: ZSTD_FALLTHROUGH;
993
+ case 8:
994
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 1);
995
+ break;
996
+ case 7: ZSTD_FALLTHROUGH;
997
+ default:
998
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 3, /* kFastFlush */ 1, /* kLastFast */ 1);
999
+ break;
1000
+ }
1001
+ } else {
1002
+ switch (tableLog) {
1003
+ case 11:
1004
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 0);
1005
+ break;
1006
+ case 10:
1007
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 1);
1008
+ break;
1009
+ case 9:
1010
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 6, /* kFastFlush */ 1, /* kLastFast */ 0);
1011
+ break;
1012
+ case 8:
1013
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 7, /* kFastFlush */ 1, /* kLastFast */ 0);
1014
+ break;
1015
+ case 7:
1016
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 8, /* kFastFlush */ 1, /* kLastFast */ 0);
1017
+ break;
1018
+ case 6: ZSTD_FALLTHROUGH;
1019
+ default:
1020
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 9, /* kFastFlush */ 1, /* kLastFast */ 1);
1021
+ break;
1022
+ }
1023
+ }
499
1024
  }
1025
+ assert(bitC.ptr <= bitC.endPtr);
500
1026
 
501
- return BIT_closeCStream(&bitC);
1027
+ return HUF_closeCStream(&bitC);
502
1028
  }
503
1029
 
504
1030
  #if DYNAMIC_BMI2
505
1031
 
506
- static TARGET_ATTRIBUTE("bmi2") size_t
1032
+ static BMI2_TARGET_ATTRIBUTE size_t
507
1033
  HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,
508
1034
  const void* src, size_t srcSize,
509
1035
  const HUF_CElt* CTable)
@@ -545,9 +1071,13 @@ HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
545
1071
 
546
1072
  size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
547
1073
  {
548
- return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
1074
+ return HUF_compress1X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
549
1075
  }
550
1076
 
1077
+ size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2)
1078
+ {
1079
+ return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2);
1080
+ }
551
1081
 
552
1082
  static size_t
553
1083
  HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
@@ -567,8 +1097,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
567
1097
 
568
1098
  assert(op <= oend);
569
1099
  { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
570
- if (cSize==0) return 0;
571
- assert(cSize <= 65535);
1100
+ if (cSize == 0 || cSize > 65535) return 0;
572
1101
  MEM_writeLE16(ostart, (U16)cSize);
573
1102
  op += cSize;
574
1103
  }
@@ -576,8 +1105,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
576
1105
  ip += segmentSize;
577
1106
  assert(op <= oend);
578
1107
  { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
579
- if (cSize==0) return 0;
580
- assert(cSize <= 65535);
1108
+ if (cSize == 0 || cSize > 65535) return 0;
581
1109
  MEM_writeLE16(ostart+2, (U16)cSize);
582
1110
  op += cSize;
583
1111
  }
@@ -585,8 +1113,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
585
1113
  ip += segmentSize;
586
1114
  assert(op <= oend);
587
1115
  { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
588
- if (cSize==0) return 0;
589
- assert(cSize <= 65535);
1116
+ if (cSize == 0 || cSize > 65535) return 0;
590
1117
  MEM_writeLE16(ostart+4, (U16)cSize);
591
1118
  op += cSize;
592
1119
  }
@@ -595,7 +1122,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
595
1122
  assert(op <= oend);
596
1123
  assert(ip <= iend);
597
1124
  { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) );
598
- if (cSize==0) return 0;
1125
+ if (cSize == 0 || cSize > 65535) return 0;
599
1126
  op += cSize;
600
1127
  }
601
1128
 
@@ -604,7 +1131,12 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
604
1131
 
605
1132
  size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
606
1133
  {
607
- return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
1134
+ return HUF_compress4X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
1135
+ }
1136
+
1137
+ size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2)
1138
+ {
1139
+ return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2);
608
1140
  }
609
1141
 
610
1142
  typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
@@ -628,12 +1160,20 @@ static size_t HUF_compressCTable_internal(
628
1160
 
629
1161
  typedef struct {
630
1162
  unsigned count[HUF_SYMBOLVALUE_MAX + 1];
631
- HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1];
632
- HUF_buildCTable_wksp_tables buildCTable_wksp;
1163
+ HUF_CElt CTable[HUF_CTABLE_SIZE_ST(HUF_SYMBOLVALUE_MAX)];
1164
+ union {
1165
+ HUF_buildCTable_wksp_tables buildCTable_wksp;
1166
+ HUF_WriteCTableWksp writeCTable_wksp;
1167
+ U32 hist_wksp[HIST_WKSP_SIZE_U32];
1168
+ } wksps;
633
1169
  } HUF_compress_tables_t;
634
1170
 
1171
+ #define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE 4096
1172
+ #define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO 10 /* Must be >= 2 */
1173
+
635
1174
  /* HUF_compress_internal() :
636
- * `workSpace` must a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
1175
+ * `workSpace_align4` must be aligned on 4-bytes boundaries,
1176
+ * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */
637
1177
  static size_t
638
1178
  HUF_compress_internal (void* dst, size_t dstSize,
639
1179
  const void* src, size_t srcSize,
@@ -641,18 +1181,17 @@ HUF_compress_internal (void* dst, size_t dstSize,
641
1181
  HUF_nbStreams_e nbStreams,
642
1182
  void* workSpace, size_t wkspSize,
643
1183
  HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
644
- const int bmi2)
1184
+ const int bmi2, unsigned suspectUncompressible)
645
1185
  {
646
- HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace;
1186
+ HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(size_t));
647
1187
  BYTE* const ostart = (BYTE*)dst;
648
1188
  BYTE* const oend = ostart + dstSize;
649
1189
  BYTE* op = ostart;
650
1190
 
651
- HUF_STATIC_ASSERT(sizeof(*table) <= HUF_WORKSPACE_SIZE);
1191
+ HUF_STATIC_ASSERT(sizeof(*table) + HUF_WORKSPACE_MAX_ALIGNMENT <= HUF_WORKSPACE_SIZE);
652
1192
 
653
1193
  /* checks & inits */
654
- if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
655
- if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall);
1194
+ if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall);
656
1195
  if (!srcSize) return 0; /* Uncompressed */
657
1196
  if (!dstSize) return 0; /* cannot fit anything within dst budget */
658
1197
  if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */
@@ -668,8 +1207,23 @@ HUF_compress_internal (void* dst, size_t dstSize,
668
1207
  nbStreams, oldHufTable, bmi2);
669
1208
  }
670
1209
 
1210
+ /* If uncompressible data is suspected, do a smaller sampling first */
1211
+ DEBUG_STATIC_ASSERT(SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO >= 2);
1212
+ if (suspectUncompressible && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) {
1213
+ size_t largestTotal = 0;
1214
+ { unsigned maxSymbolValueBegin = maxSymbolValue;
1215
+ CHECK_V_F(largestBegin, HIST_count_simple (table->count, &maxSymbolValueBegin, (const BYTE*)src, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );
1216
+ largestTotal += largestBegin;
1217
+ }
1218
+ { unsigned maxSymbolValueEnd = maxSymbolValue;
1219
+ CHECK_V_F(largestEnd, HIST_count_simple (table->count, &maxSymbolValueEnd, (const BYTE*)src + srcSize - SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );
1220
+ largestTotal += largestEnd;
1221
+ }
1222
+ if (largestTotal <= ((2 * SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) >> 7)+4) return 0; /* heuristic : probably not compressible enough */
1223
+ }
1224
+
671
1225
  /* Scan input and build symbol stats */
672
- { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace, wkspSize) );
1226
+ { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->wksps.hist_wksp, sizeof(table->wksps.hist_wksp)) );
673
1227
  if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
674
1228
  if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */
675
1229
  }
@@ -691,16 +1245,20 @@ HUF_compress_internal (void* dst, size_t dstSize,
691
1245
  huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
692
1246
  { size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
693
1247
  maxSymbolValue, huffLog,
694
- &table->buildCTable_wksp, sizeof(table->buildCTable_wksp));
1248
+ &table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp));
695
1249
  CHECK_F(maxBits);
696
1250
  huffLog = (U32)maxBits;
697
- /* Zero unused symbols in CTable, so we can check it for validity */
698
- memset(table->CTable + (maxSymbolValue + 1), 0,
699
- sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt)));
1251
+ }
1252
+ /* Zero unused symbols in CTable, so we can check it for validity */
1253
+ {
1254
+ size_t const ctableSize = HUF_CTABLE_SIZE_ST(maxSymbolValue);
1255
+ size_t const unusedSize = sizeof(table->CTable) - ctableSize * sizeof(HUF_CElt);
1256
+ ZSTD_memset(table->CTable + ctableSize, 0, unusedSize);
700
1257
  }
701
1258
 
702
1259
  /* Write table description header */
703
- { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) );
1260
+ { CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog,
1261
+ &table->wksps.writeCTable_wksp, sizeof(table->wksps.writeCTable_wksp)) );
704
1262
  /* Check if using previous huffman table is beneficial */
705
1263
  if (repeat && *repeat != HUF_repeat_none) {
706
1264
  size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue);
@@ -716,7 +1274,7 @@ HUF_compress_internal (void* dst, size_t dstSize,
716
1274
  op += hSize;
717
1275
  if (repeat) { *repeat = HUF_repeat_none; }
718
1276
  if (oldHufTable)
719
- memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */
1277
+ ZSTD_memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */
720
1278
  }
721
1279
  return HUF_compressCTable_internal(ostart, op, oend,
722
1280
  src, srcSize,
@@ -732,27 +1290,20 @@ size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
732
1290
  return HUF_compress_internal(dst, dstSize, src, srcSize,
733
1291
  maxSymbolValue, huffLog, HUF_singleStream,
734
1292
  workSpace, wkspSize,
735
- NULL, NULL, 0, 0 /*bmi2*/);
1293
+ NULL, NULL, 0, 0 /*bmi2*/, 0);
736
1294
  }
737
1295
 
738
1296
  size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
739
1297
  const void* src, size_t srcSize,
740
1298
  unsigned maxSymbolValue, unsigned huffLog,
741
1299
  void* workSpace, size_t wkspSize,
742
- HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
1300
+ HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat,
1301
+ int bmi2, unsigned suspectUncompressible)
743
1302
  {
744
1303
  return HUF_compress_internal(dst, dstSize, src, srcSize,
745
1304
  maxSymbolValue, huffLog, HUF_singleStream,
746
1305
  workSpace, wkspSize, hufTable,
747
- repeat, preferRepeat, bmi2);
748
- }
749
-
750
- size_t HUF_compress1X (void* dst, size_t dstSize,
751
- const void* src, size_t srcSize,
752
- unsigned maxSymbolValue, unsigned huffLog)
753
- {
754
- unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
755
- return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
1306
+ repeat, preferRepeat, bmi2, suspectUncompressible);
756
1307
  }
757
1308
 
758
1309
  /* HUF_compress4X_repeat():
@@ -766,29 +1317,49 @@ size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
766
1317
  return HUF_compress_internal(dst, dstSize, src, srcSize,
767
1318
  maxSymbolValue, huffLog, HUF_fourStreams,
768
1319
  workSpace, wkspSize,
769
- NULL, NULL, 0, 0 /*bmi2*/);
1320
+ NULL, NULL, 0, 0 /*bmi2*/, 0);
770
1321
  }
771
1322
 
772
1323
  /* HUF_compress4X_repeat():
773
1324
  * compress input using 4 streams.
1325
+ * consider skipping quickly
774
1326
  * re-use an existing huffman compression table */
775
1327
  size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
776
1328
  const void* src, size_t srcSize,
777
1329
  unsigned maxSymbolValue, unsigned huffLog,
778
1330
  void* workSpace, size_t wkspSize,
779
- HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
1331
+ HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible)
780
1332
  {
781
1333
  return HUF_compress_internal(dst, dstSize, src, srcSize,
782
1334
  maxSymbolValue, huffLog, HUF_fourStreams,
783
1335
  workSpace, wkspSize,
784
- hufTable, repeat, preferRepeat, bmi2);
1336
+ hufTable, repeat, preferRepeat, bmi2, suspectUncompressible);
1337
+ }
1338
+
1339
+ #ifndef ZSTD_NO_UNUSED_FUNCTIONS
1340
+ /** HUF_buildCTable() :
1341
+ * @return : maxNbBits
1342
+ * Note : count is used before tree is written, so they can safely overlap
1343
+ */
1344
+ size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits)
1345
+ {
1346
+ HUF_buildCTable_wksp_tables workspace;
1347
+ return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, &workspace, sizeof(workspace));
1348
+ }
1349
+
1350
+ size_t HUF_compress1X (void* dst, size_t dstSize,
1351
+ const void* src, size_t srcSize,
1352
+ unsigned maxSymbolValue, unsigned huffLog)
1353
+ {
1354
+ U64 workSpace[HUF_WORKSPACE_SIZE_U64];
1355
+ return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
785
1356
  }
786
1357
 
787
1358
  size_t HUF_compress2 (void* dst, size_t dstSize,
788
1359
  const void* src, size_t srcSize,
789
1360
  unsigned maxSymbolValue, unsigned huffLog)
790
1361
  {
791
- unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
1362
+ U64 workSpace[HUF_WORKSPACE_SIZE_U64];
792
1363
  return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
793
1364
  }
794
1365
 
@@ -796,3 +1367,4 @@ size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSi
796
1367
  {
797
1368
  return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT);
798
1369
  }
1370
+ #endif