zstd-ruby 1.4.5.0 → 1.5.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/README.md +78 -5
  4. data/Rakefile +8 -2
  5. data/ext/zstdruby/common.h +15 -0
  6. data/ext/zstdruby/extconf.rb +3 -2
  7. data/ext/zstdruby/libzstd/common/allocations.h +55 -0
  8. data/ext/zstdruby/libzstd/common/bits.h +200 -0
  9. data/ext/zstdruby/libzstd/common/bitstream.h +45 -62
  10. data/ext/zstdruby/libzstd/common/compiler.h +205 -22
  11. data/ext/zstdruby/libzstd/common/cpu.h +1 -3
  12. data/ext/zstdruby/libzstd/common/debug.c +1 -1
  13. data/ext/zstdruby/libzstd/common/debug.h +12 -19
  14. data/ext/zstdruby/libzstd/common/entropy_common.c +172 -48
  15. data/ext/zstdruby/libzstd/common/error_private.c +10 -2
  16. data/ext/zstdruby/libzstd/common/error_private.h +82 -3
  17. data/ext/zstdruby/libzstd/common/fse.h +37 -86
  18. data/ext/zstdruby/libzstd/common/fse_decompress.c +117 -92
  19. data/ext/zstdruby/libzstd/common/huf.h +99 -166
  20. data/ext/zstdruby/libzstd/common/mem.h +124 -142
  21. data/ext/zstdruby/libzstd/common/pool.c +54 -27
  22. data/ext/zstdruby/libzstd/common/pool.h +10 -4
  23. data/ext/zstdruby/libzstd/common/portability_macros.h +156 -0
  24. data/ext/zstdruby/libzstd/common/threading.c +74 -19
  25. data/ext/zstdruby/libzstd/common/threading.h +5 -10
  26. data/ext/zstdruby/libzstd/common/xxhash.c +7 -847
  27. data/ext/zstdruby/libzstd/common/xxhash.h +5568 -167
  28. data/ext/zstdruby/libzstd/common/zstd_common.c +2 -37
  29. data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
  30. data/ext/zstdruby/libzstd/common/zstd_internal.h +132 -187
  31. data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
  32. data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
  33. data/ext/zstdruby/libzstd/compress/fse_compress.c +83 -157
  34. data/ext/zstdruby/libzstd/compress/hist.c +27 -29
  35. data/ext/zstdruby/libzstd/compress/hist.h +2 -2
  36. data/ext/zstdruby/libzstd/compress/huf_compress.c +916 -279
  37. data/ext/zstdruby/libzstd/compress/zstd_compress.c +3773 -1019
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +610 -203
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +119 -42
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +16 -6
  41. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +42 -19
  42. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +1 -1
  43. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +49 -317
  44. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +1 -1
  45. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +320 -103
  46. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +388 -151
  47. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +3 -2
  48. data/ext/zstdruby/libzstd/compress/zstd_fast.c +729 -265
  49. data/ext/zstdruby/libzstd/compress/zstd_fast.h +3 -2
  50. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1270 -251
  51. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +61 -1
  52. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +324 -219
  53. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +9 -2
  54. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
  55. data/ext/zstdruby/libzstd/compress/zstd_opt.c +481 -209
  56. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  57. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +181 -457
  58. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +34 -113
  59. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1199 -565
  60. data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +576 -0
  61. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +12 -12
  62. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +2 -2
  63. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +627 -157
  64. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1086 -326
  65. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +19 -5
  66. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +62 -13
  67. data/ext/zstdruby/libzstd/dictBuilder/cover.c +73 -52
  68. data/ext/zstdruby/libzstd/dictBuilder/cover.h +7 -6
  69. data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
  70. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +44 -35
  71. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +103 -111
  72. data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +203 -34
  73. data/ext/zstdruby/libzstd/zstd.h +1217 -287
  74. data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +28 -8
  75. data/ext/zstdruby/main.c +20 -0
  76. data/ext/zstdruby/skippable_frame.c +63 -0
  77. data/ext/zstdruby/streaming_compress.c +177 -0
  78. data/ext/zstdruby/streaming_compress.h +5 -0
  79. data/ext/zstdruby/streaming_decompress.c +123 -0
  80. data/ext/zstdruby/zstdruby.c +114 -32
  81. data/lib/zstd-ruby/version.rb +1 -1
  82. data/lib/zstd-ruby.rb +0 -1
  83. data/zstd-ruby.gemspec +1 -1
  84. metadata +19 -36
  85. data/.travis.yml +0 -14
  86. data/ext/zstdruby/libzstd/.gitignore +0 -3
  87. data/ext/zstdruby/libzstd/BUCK +0 -234
  88. data/ext/zstdruby/libzstd/Makefile +0 -354
  89. data/ext/zstdruby/libzstd/README.md +0 -179
  90. data/ext/zstdruby/libzstd/deprecated/zbuff.h +0 -214
  91. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +0 -26
  92. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +0 -147
  93. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +0 -75
  94. data/ext/zstdruby/libzstd/dll/example/Makefile +0 -48
  95. data/ext/zstdruby/libzstd/dll/example/README.md +0 -69
  96. data/ext/zstdruby/libzstd/dll/example/build_package.bat +0 -20
  97. data/ext/zstdruby/libzstd/dll/example/fullbench-dll.sln +0 -25
  98. data/ext/zstdruby/libzstd/dll/example/fullbench-dll.vcxproj +0 -181
  99. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +0 -415
  100. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +0 -2158
  101. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +0 -94
  102. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +0 -3518
  103. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +0 -93
  104. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +0 -3160
  105. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +0 -93
  106. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +0 -3647
  107. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +0 -142
  108. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +0 -4050
  109. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +0 -162
  110. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +0 -4154
  111. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +0 -172
  112. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +0 -4541
  113. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +0 -187
  114. data/ext/zstdruby/libzstd/libzstd.pc.in +0 -15
  115. data/ext/zstdruby/zstdruby.h +0 -6
@@ -1,6 +1,6 @@
1
1
  /* ******************************************************************
2
2
  * Huffman encoder, part of New Generation Entropy library
3
- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
3
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
4
4
  *
5
5
  * You can contact the author at :
6
6
  * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -23,16 +23,15 @@
23
23
  /* **************************************************************
24
24
  * Includes
25
25
  ****************************************************************/
26
- #include <string.h> /* memcpy, memset */
27
- #include <stdio.h> /* printf (debug) */
26
+ #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */
28
27
  #include "../common/compiler.h"
29
28
  #include "../common/bitstream.h"
30
29
  #include "hist.h"
31
30
  #define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */
32
31
  #include "../common/fse.h" /* header compression */
33
- #define HUF_STATIC_LINKING_ONLY
34
32
  #include "../common/huf.h"
35
33
  #include "../common/error_private.h"
34
+ #include "../common/bits.h" /* ZSTD_highbit32 */
36
35
 
37
36
 
38
37
  /* **************************************************************
@@ -43,24 +42,111 @@
43
42
 
44
43
 
45
44
  /* **************************************************************
46
- * Utils
45
+ * Required declarations
47
46
  ****************************************************************/
48
- unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
47
+ typedef struct nodeElt_s {
48
+ U32 count;
49
+ U16 parent;
50
+ BYTE byte;
51
+ BYTE nbBits;
52
+ } nodeElt;
53
+
54
+
55
+ /* **************************************************************
56
+ * Debug Traces
57
+ ****************************************************************/
58
+
59
+ #if DEBUGLEVEL >= 2
60
+
61
+ static size_t showU32(const U32* arr, size_t size)
62
+ {
63
+ size_t u;
64
+ for (u=0; u<size; u++) {
65
+ RAWLOG(6, " %u", arr[u]); (void)arr;
66
+ }
67
+ RAWLOG(6, " \n");
68
+ return size;
69
+ }
70
+
71
+ static size_t HUF_getNbBits(HUF_CElt elt);
72
+
73
+ static size_t showCTableBits(const HUF_CElt* ctable, size_t size)
74
+ {
75
+ size_t u;
76
+ for (u=0; u<size; u++) {
77
+ RAWLOG(6, " %zu", HUF_getNbBits(ctable[u])); (void)ctable;
78
+ }
79
+ RAWLOG(6, " \n");
80
+ return size;
81
+
82
+ }
83
+
84
+ static size_t showHNodeSymbols(const nodeElt* hnode, size_t size)
49
85
  {
50
- return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
86
+ size_t u;
87
+ for (u=0; u<size; u++) {
88
+ RAWLOG(6, " %u", hnode[u].byte); (void)hnode;
89
+ }
90
+ RAWLOG(6, " \n");
91
+ return size;
92
+ }
93
+
94
+ static size_t showHNodeBits(const nodeElt* hnode, size_t size)
95
+ {
96
+ size_t u;
97
+ for (u=0; u<size; u++) {
98
+ RAWLOG(6, " %u", hnode[u].nbBits); (void)hnode;
99
+ }
100
+ RAWLOG(6, " \n");
101
+ return size;
51
102
  }
52
103
 
104
+ #endif
105
+
53
106
 
54
107
  /* *******************************************************
55
108
  * HUF : Huffman block compression
56
109
  *********************************************************/
110
+ #define HUF_WORKSPACE_MAX_ALIGNMENT 8
111
+
112
+ static void* HUF_alignUpWorkspace(void* workspace, size_t* workspaceSizePtr, size_t align)
113
+ {
114
+ size_t const mask = align - 1;
115
+ size_t const rem = (size_t)workspace & mask;
116
+ size_t const add = (align - rem) & mask;
117
+ BYTE* const aligned = (BYTE*)workspace + add;
118
+ assert((align & (align - 1)) == 0); /* pow 2 */
119
+ assert(align <= HUF_WORKSPACE_MAX_ALIGNMENT);
120
+ if (*workspaceSizePtr >= add) {
121
+ assert(add < align);
122
+ assert(((size_t)aligned & mask) == 0);
123
+ *workspaceSizePtr -= add;
124
+ return aligned;
125
+ } else {
126
+ *workspaceSizePtr = 0;
127
+ return NULL;
128
+ }
129
+ }
130
+
131
+
57
132
  /* HUF_compressWeights() :
58
133
  * Same as FSE_compress(), but dedicated to huff0's weights compression.
59
134
  * The use case needs much less stack memory.
60
135
  * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
61
136
  */
62
137
  #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
63
- static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize)
138
+
139
+ typedef struct {
140
+ FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
141
+ U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)];
142
+ unsigned count[HUF_TABLELOG_MAX+1];
143
+ S16 norm[HUF_TABLELOG_MAX+1];
144
+ } HUF_CompressWeightsWksp;
145
+
146
+ static size_t
147
+ HUF_compressWeights(void* dst, size_t dstSize,
148
+ const void* weightTable, size_t wtSize,
149
+ void* workspace, size_t workspaceSize)
64
150
  {
65
151
  BYTE* const ostart = (BYTE*) dst;
66
152
  BYTE* op = ostart;
@@ -68,33 +154,30 @@ static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weight
68
154
 
69
155
  unsigned maxSymbolValue = HUF_TABLELOG_MAX;
70
156
  U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
157
+ HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32));
71
158
 
72
- FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
73
- BYTE scratchBuffer[1<<MAX_FSE_TABLELOG_FOR_HUFF_HEADER];
74
-
75
- unsigned count[HUF_TABLELOG_MAX+1];
76
- S16 norm[HUF_TABLELOG_MAX+1];
159
+ if (workspaceSize < sizeof(HUF_CompressWeightsWksp)) return ERROR(GENERIC);
77
160
 
78
161
  /* init conditions */
79
162
  if (wtSize <= 1) return 0; /* Not compressible */
80
163
 
81
164
  /* Scan input and build symbol stats */
82
- { unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize); /* never fails */
165
+ { unsigned const maxCount = HIST_count_simple(wksp->count, &maxSymbolValue, weightTable, wtSize); /* never fails */
83
166
  if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */
84
167
  if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
85
168
  }
86
169
 
87
170
  tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
88
- CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) );
171
+ CHECK_F( FSE_normalizeCount(wksp->norm, tableLog, wksp->count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) );
89
172
 
90
173
  /* Write table description header */
91
- { CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), norm, maxSymbolValue, tableLog) );
174
+ { CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), wksp->norm, maxSymbolValue, tableLog) );
92
175
  op += hSize;
93
176
  }
94
177
 
95
178
  /* Compress */
96
- CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) );
97
- { CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, CTable) );
179
+ CHECK_F( FSE_buildCTable_wksp(wksp->CTable, wksp->norm, maxSymbolValue, tableLog, wksp->scratchBuffer, sizeof(wksp->scratchBuffer)) );
180
+ { CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, wksp->CTable) );
98
181
  if (cSize == 0) return 0; /* not enough space for compressed data */
99
182
  op += cSize;
100
183
  }
@@ -102,35 +185,72 @@ static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weight
102
185
  return (size_t)(op-ostart);
103
186
  }
104
187
 
188
+ static size_t HUF_getNbBits(HUF_CElt elt)
189
+ {
190
+ return elt & 0xFF;
191
+ }
192
+
193
+ static size_t HUF_getNbBitsFast(HUF_CElt elt)
194
+ {
195
+ return elt;
196
+ }
197
+
198
+ static size_t HUF_getValue(HUF_CElt elt)
199
+ {
200
+ return elt & ~(size_t)0xFF;
201
+ }
202
+
203
+ static size_t HUF_getValueFast(HUF_CElt elt)
204
+ {
205
+ return elt;
206
+ }
105
207
 
106
- struct HUF_CElt_s {
107
- U16 val;
108
- BYTE nbBits;
109
- }; /* typedef'd to HUF_CElt within "huf.h" */
208
+ static void HUF_setNbBits(HUF_CElt* elt, size_t nbBits)
209
+ {
210
+ assert(nbBits <= HUF_TABLELOG_ABSOLUTEMAX);
211
+ *elt = nbBits;
212
+ }
110
213
 
111
- /*! HUF_writeCTable() :
112
- `CTable` : Huffman tree to save, using huf representation.
113
- @return : size of saved CTable */
114
- size_t HUF_writeCTable (void* dst, size_t maxDstSize,
115
- const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
214
+ static void HUF_setValue(HUF_CElt* elt, size_t value)
116
215
  {
216
+ size_t const nbBits = HUF_getNbBits(*elt);
217
+ if (nbBits > 0) {
218
+ assert((value >> nbBits) == 0);
219
+ *elt |= value << (sizeof(HUF_CElt) * 8 - nbBits);
220
+ }
221
+ }
222
+
223
+ typedef struct {
224
+ HUF_CompressWeightsWksp wksp;
117
225
  BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */
118
226
  BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
227
+ } HUF_WriteCTableWksp;
228
+
229
+ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
230
+ const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog,
231
+ void* workspace, size_t workspaceSize)
232
+ {
233
+ HUF_CElt const* const ct = CTable + 1;
119
234
  BYTE* op = (BYTE*)dst;
120
235
  U32 n;
236
+ HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32));
237
+
238
+ HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE >= sizeof(HUF_WriteCTableWksp));
121
239
 
122
- /* check conditions */
240
+ /* check conditions */
241
+ if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC);
123
242
  if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
124
243
 
125
244
  /* convert to weight */
126
- bitsToWeight[0] = 0;
245
+ wksp->bitsToWeight[0] = 0;
127
246
  for (n=1; n<huffLog+1; n++)
128
- bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
247
+ wksp->bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
129
248
  for (n=0; n<maxSymbolValue; n++)
130
- huffWeight[n] = bitsToWeight[CTable[n].nbBits];
249
+ wksp->huffWeight[n] = wksp->bitsToWeight[HUF_getNbBits(ct[n])];
131
250
 
132
251
  /* attempt weights compression by FSE */
133
- { CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, huffWeight, maxSymbolValue) );
252
+ if (maxDstSize < 1) return ERROR(dstSize_tooSmall);
253
+ { CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, wksp->huffWeight, maxSymbolValue, &wksp->wksp, sizeof(wksp->wksp)) );
134
254
  if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */
135
255
  op[0] = (BYTE)hSize;
136
256
  return hSize+1;
@@ -140,9 +260,9 @@ size_t HUF_writeCTable (void* dst, size_t maxDstSize,
140
260
  if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */
141
261
  if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */
142
262
  op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1));
143
- huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */
263
+ wksp->huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */
144
264
  for (n=0; n<maxSymbolValue; n+=2)
145
- op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
265
+ op[(n/2)+1] = (BYTE)((wksp->huffWeight[n] << 4) + wksp->huffWeight[n+1]);
146
266
  return ((maxSymbolValue+1)/2) + 1;
147
267
  }
148
268
 
@@ -153,34 +273,36 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
153
273
  U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */
154
274
  U32 tableLog = 0;
155
275
  U32 nbSymbols = 0;
276
+ HUF_CElt* const ct = CTable + 1;
156
277
 
157
278
  /* get symbol weights */
158
279
  CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize));
280
+ *hasZeroWeights = (rankVal[0] > 0);
159
281
 
160
282
  /* check result */
161
283
  if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
162
284
  if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);
163
285
 
286
+ CTable[0] = tableLog;
287
+
164
288
  /* Prepare base value per rank */
165
289
  { U32 n, nextRankStart = 0;
166
290
  for (n=1; n<=tableLog; n++) {
167
- U32 current = nextRankStart;
291
+ U32 curr = nextRankStart;
168
292
  nextRankStart += (rankVal[n] << (n-1));
169
- rankVal[n] = current;
293
+ rankVal[n] = curr;
170
294
  } }
171
295
 
172
296
  /* fill nbBits */
173
- *hasZeroWeights = 0;
174
297
  { U32 n; for (n=0; n<nbSymbols; n++) {
175
298
  const U32 w = huffWeight[n];
176
- *hasZeroWeights |= (w == 0);
177
- CTable[n].nbBits = (BYTE)(tableLog + 1 - w) & -(w != 0);
299
+ HUF_setNbBits(ct + n, (BYTE)(tableLog + 1 - w) & -(w != 0));
178
300
  } }
179
301
 
180
302
  /* fill val */
181
303
  { U16 nbPerRank[HUF_TABLELOG_MAX+2] = {0}; /* support w=0=>n=tableLog+1 */
182
304
  U16 valPerRank[HUF_TABLELOG_MAX+2] = {0};
183
- { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; }
305
+ { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[HUF_getNbBits(ct[n])]++; }
184
306
  /* determine stating value per rank */
185
307
  valPerRank[tableLog+1] = 0; /* for w==0 */
186
308
  { U16 min = 0;
@@ -190,92 +312,150 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
190
312
  min >>= 1;
191
313
  } }
192
314
  /* assign value within rank, symbol order */
193
- { U32 n; for (n=0; n<nbSymbols; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; }
315
+ { U32 n; for (n=0; n<nbSymbols; n++) HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); }
194
316
  }
195
317
 
196
318
  *maxSymbolValuePtr = nbSymbols - 1;
197
319
  return readSize;
198
320
  }
199
321
 
200
- U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue)
322
+ U32 HUF_getNbBitsFromCTable(HUF_CElt const* CTable, U32 symbolValue)
201
323
  {
202
- const HUF_CElt* table = (const HUF_CElt*)symbolTable;
324
+ const HUF_CElt* const ct = CTable + 1;
203
325
  assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
204
- return table[symbolValue].nbBits;
326
+ return (U32)HUF_getNbBits(ct[symbolValue]);
205
327
  }
206
328
 
207
329
 
208
- typedef struct nodeElt_s {
209
- U32 count;
210
- U16 parent;
211
- BYTE byte;
212
- BYTE nbBits;
213
- } nodeElt;
214
-
215
- static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
330
+ /**
331
+ * HUF_setMaxHeight():
332
+ * Try to enforce @targetNbBits on the Huffman tree described in @huffNode.
333
+ *
334
+ * It attempts to convert all nodes with nbBits > @targetNbBits
335
+ * to employ @targetNbBits instead. Then it adjusts the tree
336
+ * so that it remains a valid canonical Huffman tree.
337
+ *
338
+ * @pre The sum of the ranks of each symbol == 2^largestBits,
339
+ * where largestBits == huffNode[lastNonNull].nbBits.
340
+ * @post The sum of the ranks of each symbol == 2^largestBits,
341
+ * where largestBits is the return value (expected <= targetNbBits).
342
+ *
343
+ * @param huffNode The Huffman tree modified in place to enforce targetNbBits.
344
+ * It's presumed sorted, from most frequent to rarest symbol.
345
+ * @param lastNonNull The symbol with the lowest count in the Huffman tree.
346
+ * @param targetNbBits The allowed number of bits, which the Huffman tree
347
+ * may not respect. After this function the Huffman tree will
348
+ * respect targetNbBits.
349
+ * @return The maximum number of bits of the Huffman tree after adjustment.
350
+ */
351
+ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 targetNbBits)
216
352
  {
217
353
  const U32 largestBits = huffNode[lastNonNull].nbBits;
218
- if (largestBits <= maxNbBits) return largestBits; /* early exit : no elt > maxNbBits */
354
+ /* early exit : no elt > targetNbBits, so the tree is already valid. */
355
+ if (largestBits <= targetNbBits) return largestBits;
356
+
357
+ DEBUGLOG(5, "HUF_setMaxHeight (targetNbBits = %u)", targetNbBits);
219
358
 
220
359
  /* there are several too large elements (at least >= 2) */
221
360
  { int totalCost = 0;
222
- const U32 baseCost = 1 << (largestBits - maxNbBits);
361
+ const U32 baseCost = 1 << (largestBits - targetNbBits);
223
362
  int n = (int)lastNonNull;
224
363
 
225
- while (huffNode[n].nbBits > maxNbBits) {
364
+ /* Adjust any ranks > targetNbBits to targetNbBits.
365
+ * Compute totalCost, which is how far the sum of the ranks is
366
+ * we are over 2^largestBits after adjust the offending ranks.
367
+ */
368
+ while (huffNode[n].nbBits > targetNbBits) {
226
369
  totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
227
- huffNode[n].nbBits = (BYTE)maxNbBits;
228
- n --;
229
- } /* n stops at huffNode[n].nbBits <= maxNbBits */
230
- while (huffNode[n].nbBits == maxNbBits) n--; /* n end at index of smallest symbol using < maxNbBits */
370
+ huffNode[n].nbBits = (BYTE)targetNbBits;
371
+ n--;
372
+ }
373
+ /* n stops at huffNode[n].nbBits <= targetNbBits */
374
+ assert(huffNode[n].nbBits <= targetNbBits);
375
+ /* n end at index of smallest symbol using < targetNbBits */
376
+ while (huffNode[n].nbBits == targetNbBits) --n;
231
377
 
232
- /* renorm totalCost */
233
- totalCost >>= (largestBits - maxNbBits); /* note : totalCost is necessarily a multiple of baseCost */
378
+ /* renorm totalCost from 2^largestBits to 2^targetNbBits
379
+ * note : totalCost is necessarily a multiple of baseCost */
380
+ assert(((U32)totalCost & (baseCost - 1)) == 0);
381
+ totalCost >>= (largestBits - targetNbBits);
382
+ assert(totalCost > 0);
234
383
 
235
384
  /* repay normalized cost */
236
385
  { U32 const noSymbol = 0xF0F0F0F0;
237
386
  U32 rankLast[HUF_TABLELOG_MAX+2];
238
387
 
239
- /* Get pos of last (smallest) symbol per rank */
240
- memset(rankLast, 0xF0, sizeof(rankLast));
241
- { U32 currentNbBits = maxNbBits;
388
+ /* Get pos of last (smallest = lowest cum. count) symbol per rank */
389
+ ZSTD_memset(rankLast, 0xF0, sizeof(rankLast));
390
+ { U32 currentNbBits = targetNbBits;
242
391
  int pos;
243
392
  for (pos=n ; pos >= 0; pos--) {
244
393
  if (huffNode[pos].nbBits >= currentNbBits) continue;
245
- currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */
246
- rankLast[maxNbBits-currentNbBits] = (U32)pos;
394
+ currentNbBits = huffNode[pos].nbBits; /* < targetNbBits */
395
+ rankLast[targetNbBits-currentNbBits] = (U32)pos;
247
396
  } }
248
397
 
249
398
  while (totalCost > 0) {
250
- U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1;
399
+ /* Try to reduce the next power of 2 above totalCost because we
400
+ * gain back half the rank.
401
+ */
402
+ U32 nBitsToDecrease = ZSTD_highbit32((U32)totalCost) + 1;
251
403
  for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
252
404
  U32 const highPos = rankLast[nBitsToDecrease];
253
405
  U32 const lowPos = rankLast[nBitsToDecrease-1];
254
406
  if (highPos == noSymbol) continue;
407
+ /* Decrease highPos if no symbols of lowPos or if it is
408
+ * not cheaper to remove 2 lowPos than highPos.
409
+ */
255
410
  if (lowPos == noSymbol) break;
256
411
  { U32 const highTotal = huffNode[highPos].count;
257
412
  U32 const lowTotal = 2 * huffNode[lowPos].count;
258
413
  if (highTotal <= lowTotal) break;
259
414
  } }
260
415
  /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
416
+ assert(rankLast[nBitsToDecrease] != noSymbol || nBitsToDecrease == 1);
261
417
  /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
262
418
  while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))
263
- nBitsToDecrease ++;
419
+ nBitsToDecrease++;
420
+ assert(rankLast[nBitsToDecrease] != noSymbol);
421
+ /* Increase the number of bits to gain back half the rank cost. */
264
422
  totalCost -= 1 << (nBitsToDecrease-1);
423
+ huffNode[rankLast[nBitsToDecrease]].nbBits++;
424
+
425
+ /* Fix up the new rank.
426
+ * If the new rank was empty, this symbol is now its smallest.
427
+ * Otherwise, this symbol will be the largest in the new rank so no adjustment.
428
+ */
265
429
  if (rankLast[nBitsToDecrease-1] == noSymbol)
266
- rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; /* this rank is no longer empty */
267
- huffNode[rankLast[nBitsToDecrease]].nbBits ++;
430
+ rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease];
431
+ /* Fix up the old rank.
432
+ * If the symbol was at position 0, meaning it was the highest weight symbol in the tree,
433
+ * it must be the only symbol in its rank, so the old rank now has no symbols.
434
+ * Otherwise, since the Huffman nodes are sorted by count, the previous position is now
435
+ * the smallest node in the rank. If the previous position belongs to a different rank,
436
+ * then the rank is now empty.
437
+ */
268
438
  if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */
269
439
  rankLast[nBitsToDecrease] = noSymbol;
270
440
  else {
271
441
  rankLast[nBitsToDecrease]--;
272
- if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease)
442
+ if (huffNode[rankLast[nBitsToDecrease]].nbBits != targetNbBits-nBitsToDecrease)
273
443
  rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */
274
- } } /* while (totalCost > 0) */
275
-
444
+ }
445
+ } /* while (totalCost > 0) */
446
+
447
+ /* If we've removed too much weight, then we have to add it back.
448
+ * To avoid overshooting again, we only adjust the smallest rank.
449
+ * We take the largest nodes from the lowest rank 0 and move them
450
+ * to rank 1. There's guaranteed to be enough rank 0 symbols because
451
+ * TODO.
452
+ */
276
453
  while (totalCost < 0) { /* Sometimes, cost correction overshoot */
277
- if (rankLast[1] == noSymbol) { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */
278
- while (huffNode[n].nbBits == maxNbBits) n--;
454
+ /* special case : no rank 1 symbol (using targetNbBits-1);
455
+ * let's create one from largest rank 0 (using targetNbBits).
456
+ */
457
+ if (rankLast[1] == noSymbol) {
458
+ while (huffNode[n].nbBits == targetNbBits) n--;
279
459
  huffNode[n+1].nbBits--;
280
460
  assert(n >= 0);
281
461
  rankLast[1] = (U32)(n+1);
@@ -285,47 +465,178 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
285
465
  huffNode[ rankLast[1] + 1 ].nbBits--;
286
466
  rankLast[1]++;
287
467
  totalCost ++;
288
- } } } /* there are several too large elements (at least >= 2) */
468
+ }
469
+ } /* repay normalized cost */
470
+ } /* there are several too large elements (at least >= 2) */
289
471
 
290
- return maxNbBits;
472
+ return targetNbBits;
291
473
  }
292
474
 
293
475
  typedef struct {
294
- U32 base;
295
- U32 current;
476
+ U16 base;
477
+ U16 curr;
296
478
  } rankPos;
297
479
 
298
- typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
480
+ typedef nodeElt huffNodeTable[2 * (HUF_SYMBOLVALUE_MAX + 1)];
299
481
 
300
- #define RANK_POSITION_TABLE_SIZE 32
482
+ /* Number of buckets available for HUF_sort() */
483
+ #define RANK_POSITION_TABLE_SIZE 192
301
484
 
302
485
  typedef struct {
303
486
  huffNodeTable huffNodeTbl;
304
487
  rankPos rankPosition[RANK_POSITION_TABLE_SIZE];
305
488
  } HUF_buildCTable_wksp_tables;
306
489
 
307
- static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue, rankPos* rankPosition)
308
- {
490
+ /* RANK_POSITION_DISTINCT_COUNT_CUTOFF == Cutoff point in HUF_sort() buckets for which we use log2 bucketing.
491
+ * Strategy is to use as many buckets as possible for representing distinct
492
+ * counts while using the remainder to represent all "large" counts.
493
+ *
494
+ * To satisfy this requirement for 192 buckets, we can do the following:
495
+ * Let buckets 0-166 represent distinct counts of [0, 166]
496
+ * Let buckets 166 to 192 represent all remaining counts up to RANK_POSITION_MAX_COUNT_LOG using log2 bucketing.
497
+ */
498
+ #define RANK_POSITION_MAX_COUNT_LOG 32
499
+ #define RANK_POSITION_LOG_BUCKETS_BEGIN ((RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */)
500
+ #define RANK_POSITION_DISTINCT_COUNT_CUTOFF (RANK_POSITION_LOG_BUCKETS_BEGIN + ZSTD_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */)
501
+
502
+ /* Return the appropriate bucket index for a given count. See definition of
503
+ * RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy.
504
+ */
505
+ static U32 HUF_getIndex(U32 const count) {
506
+ return (count < RANK_POSITION_DISTINCT_COUNT_CUTOFF)
507
+ ? count
508
+ : ZSTD_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN;
509
+ }
510
+
511
+ /* Helper swap function for HUF_quickSortPartition() */
512
+ static void HUF_swapNodes(nodeElt* a, nodeElt* b) {
513
+ nodeElt tmp = *a;
514
+ *a = *b;
515
+ *b = tmp;
516
+ }
517
+
518
+ /* Returns 0 if the huffNode array is not sorted by descending count */
519
+ MEM_STATIC int HUF_isSorted(nodeElt huffNode[], U32 const maxSymbolValue1) {
520
+ U32 i;
521
+ for (i = 1; i < maxSymbolValue1; ++i) {
522
+ if (huffNode[i].count > huffNode[i-1].count) {
523
+ return 0;
524
+ }
525
+ }
526
+ return 1;
527
+ }
528
+
529
+ /* Insertion sort by descending order */
530
+ HINT_INLINE void HUF_insertionSort(nodeElt huffNode[], int const low, int const high) {
531
+ int i;
532
+ int const size = high-low+1;
533
+ huffNode += low;
534
+ for (i = 1; i < size; ++i) {
535
+ nodeElt const key = huffNode[i];
536
+ int j = i - 1;
537
+ while (j >= 0 && huffNode[j].count < key.count) {
538
+ huffNode[j + 1] = huffNode[j];
539
+ j--;
540
+ }
541
+ huffNode[j + 1] = key;
542
+ }
543
+ }
544
+
545
+ /* Pivot helper function for quicksort. */
546
+ static int HUF_quickSortPartition(nodeElt arr[], int const low, int const high) {
547
+ /* Simply select rightmost element as pivot. "Better" selectors like
548
+ * median-of-three don't experimentally appear to have any benefit.
549
+ */
550
+ U32 const pivot = arr[high].count;
551
+ int i = low - 1;
552
+ int j = low;
553
+ for ( ; j < high; j++) {
554
+ if (arr[j].count > pivot) {
555
+ i++;
556
+ HUF_swapNodes(&arr[i], &arr[j]);
557
+ }
558
+ }
559
+ HUF_swapNodes(&arr[i + 1], &arr[high]);
560
+ return i + 1;
561
+ }
562
+
563
+ /* Classic quicksort by descending with partially iterative calls
564
+ * to reduce worst case callstack size.
565
+ */
566
+ static void HUF_simpleQuickSort(nodeElt arr[], int low, int high) {
567
+ int const kInsertionSortThreshold = 8;
568
+ if (high - low < kInsertionSortThreshold) {
569
+ HUF_insertionSort(arr, low, high);
570
+ return;
571
+ }
572
+ while (low < high) {
573
+ int const idx = HUF_quickSortPartition(arr, low, high);
574
+ if (idx - low < high - idx) {
575
+ HUF_simpleQuickSort(arr, low, idx - 1);
576
+ low = idx + 1;
577
+ } else {
578
+ HUF_simpleQuickSort(arr, idx + 1, high);
579
+ high = idx - 1;
580
+ }
581
+ }
582
+ }
583
+
584
+ /**
585
+ * HUF_sort():
586
+ * Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order.
587
+ * This is a typical bucket sorting strategy that uses either quicksort or insertion sort to sort each bucket.
588
+ *
589
+ * @param[out] huffNode Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled.
590
+ * Must have (maxSymbolValue + 1) entries.
591
+ * @param[in] count Histogram of the symbols.
592
+ * @param[in] maxSymbolValue Maximum symbol value.
593
+ * @param rankPosition This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries.
594
+ */
595
+ static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSymbolValue, rankPos rankPosition[]) {
309
596
  U32 n;
597
+ U32 const maxSymbolValue1 = maxSymbolValue+1;
598
+
599
+ /* Compute base and set curr to base.
600
+ * For symbol s let lowerRank = HUF_getIndex(count[n]) and rank = lowerRank + 1.
601
+ * See HUF_getIndex to see bucketing strategy.
602
+ * We attribute each symbol to lowerRank's base value, because we want to know where
603
+ * each rank begins in the output, so for rank R we want to count ranks R+1 and above.
604
+ */
605
+ ZSTD_memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE);
606
+ for (n = 0; n < maxSymbolValue1; ++n) {
607
+ U32 lowerRank = HUF_getIndex(count[n]);
608
+ assert(lowerRank < RANK_POSITION_TABLE_SIZE - 1);
609
+ rankPosition[lowerRank].base++;
610
+ }
310
611
 
311
- memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE);
312
- for (n=0; n<=maxSymbolValue; n++) {
313
- U32 r = BIT_highbit32(count[n] + 1);
314
- rankPosition[r].base ++;
612
+ assert(rankPosition[RANK_POSITION_TABLE_SIZE - 1].base == 0);
613
+ /* Set up the rankPosition table */
614
+ for (n = RANK_POSITION_TABLE_SIZE - 1; n > 0; --n) {
615
+ rankPosition[n-1].base += rankPosition[n].base;
616
+ rankPosition[n-1].curr = rankPosition[n-1].base;
315
617
  }
316
- for (n=30; n>0; n--) rankPosition[n-1].base += rankPosition[n].base;
317
- for (n=0; n<32; n++) rankPosition[n].current = rankPosition[n].base;
318
- for (n=0; n<=maxSymbolValue; n++) {
618
+
619
+ /* Insert each symbol into their appropriate bucket, setting up rankPosition table. */
620
+ for (n = 0; n < maxSymbolValue1; ++n) {
319
621
  U32 const c = count[n];
320
- U32 const r = BIT_highbit32(c+1) + 1;
321
- U32 pos = rankPosition[r].current++;
322
- while ((pos > rankPosition[r].base) && (c > huffNode[pos-1].count)) {
323
- huffNode[pos] = huffNode[pos-1];
324
- pos--;
325
- }
622
+ U32 const r = HUF_getIndex(c) + 1;
623
+ U32 const pos = rankPosition[r].curr++;
624
+ assert(pos < maxSymbolValue1);
326
625
  huffNode[pos].count = c;
327
626
  huffNode[pos].byte = (BYTE)n;
328
627
  }
628
+
629
+ /* Sort each bucket. */
630
+ for (n = RANK_POSITION_DISTINCT_COUNT_CUTOFF; n < RANK_POSITION_TABLE_SIZE - 1; ++n) {
631
+ int const bucketSize = rankPosition[n].curr - rankPosition[n].base;
632
+ U32 const bucketStartIdx = rankPosition[n].base;
633
+ if (bucketSize > 1) {
634
+ assert(bucketStartIdx < maxSymbolValue1);
635
+ HUF_simpleQuickSort(huffNode + bucketStartIdx, 0, bucketSize-1);
636
+ }
637
+ }
638
+
639
+ assert(HUF_isSorted(huffNode, maxSymbolValue1));
329
640
  }
330
641
 
331
642
 
@@ -335,28 +646,21 @@ static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValu
335
646
  */
336
647
  #define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
337
648
 
338
- size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
649
+ /* HUF_buildTree():
650
+ * Takes the huffNode array sorted by HUF_sort() and builds an unlimited-depth Huffman tree.
651
+ *
652
+ * @param huffNode The array sorted by HUF_sort(). Builds the Huffman tree in this array.
653
+ * @param maxSymbolValue The maximum symbol value.
654
+ * @return The smallest node in the Huffman tree (by count).
655
+ */
656
+ static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
339
657
  {
340
- HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)workSpace;
341
- nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
342
- nodeElt* const huffNode = huffNode0+1;
658
+ nodeElt* const huffNode0 = huffNode - 1;
343
659
  int nonNullRank;
344
660
  int lowS, lowN;
345
661
  int nodeNb = STARTNODE;
346
662
  int n, nodeRoot;
347
-
348
- /* safety checks */
349
- if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
350
- if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
351
- return ERROR(workSpace_tooSmall);
352
- if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
353
- if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
354
- return ERROR(maxSymbolValue_tooLarge);
355
- memset(huffNode0, 0, sizeof(huffNodeTable));
356
-
357
- /* sort, decreasing order */
358
- HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition);
359
-
663
+ DEBUGLOG(5, "HUF_buildTree (alphabet size = %u)", maxSymbolValue + 1);
360
664
  /* init for parents */
361
665
  nonNullRank = (int)maxSymbolValue;
362
666
  while(huffNode[nonNullRank].count == 0) nonNullRank--;
@@ -383,127 +687,406 @@ size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbo
383
687
  for (n=0; n<=nonNullRank; n++)
384
688
  huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
385
689
 
386
- /* enforce maxTableLog */
387
- maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);
388
-
389
- /* fill result into tree (val, nbBits) */
390
- { U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
391
- U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
392
- int const alphabetSize = (int)(maxSymbolValue + 1);
393
- if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */
394
- for (n=0; n<=nonNullRank; n++)
395
- nbPerRank[huffNode[n].nbBits]++;
396
- /* determine stating value per rank */
397
- { U16 min = 0;
398
- for (n=(int)maxNbBits; n>0; n--) {
399
- valPerRank[n] = min; /* get starting value within each rank */
400
- min += nbPerRank[n];
401
- min >>= 1;
402
- } }
403
- for (n=0; n<alphabetSize; n++)
404
- tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */
405
- for (n=0; n<alphabetSize; n++)
406
- tree[n].val = valPerRank[tree[n].nbBits]++; /* assign value within rank, symbol order */
407
- }
690
+ DEBUGLOG(6, "Initial distribution of bits completed (%zu sorted symbols)", showHNodeBits(huffNode, maxSymbolValue+1));
408
691
 
409
- return maxNbBits;
692
+ return nonNullRank;
410
693
  }
411
694
 
412
- /** HUF_buildCTable() :
413
- * @return : maxNbBits
414
- * Note : count is used before tree is written, so they can safely overlap
695
+ /**
696
+ * HUF_buildCTableFromTree():
697
+ * Build the CTable given the Huffman tree in huffNode.
698
+ *
699
+ * @param[out] CTable The output Huffman CTable.
700
+ * @param huffNode The Huffman tree.
701
+ * @param nonNullRank The last and smallest node in the Huffman tree.
702
+ * @param maxSymbolValue The maximum symbol value.
703
+ * @param maxNbBits The exact maximum number of bits used in the Huffman tree.
415
704
  */
416
- size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits)
705
+ static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits)
417
706
  {
418
- HUF_buildCTable_wksp_tables workspace;
419
- return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, &workspace, sizeof(workspace));
707
+ HUF_CElt* const ct = CTable + 1;
708
+ /* fill result into ctable (val, nbBits) */
709
+ int n;
710
+ U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
711
+ U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
712
+ int const alphabetSize = (int)(maxSymbolValue + 1);
713
+ for (n=0; n<=nonNullRank; n++)
714
+ nbPerRank[huffNode[n].nbBits]++;
715
+ /* determine starting value per rank */
716
+ { U16 min = 0;
717
+ for (n=(int)maxNbBits; n>0; n--) {
718
+ valPerRank[n] = min; /* get starting value within each rank */
719
+ min += nbPerRank[n];
720
+ min >>= 1;
721
+ } }
722
+ for (n=0; n<alphabetSize; n++)
723
+ HUF_setNbBits(ct + huffNode[n].byte, huffNode[n].nbBits); /* push nbBits per symbol, symbol order */
724
+ for (n=0; n<alphabetSize; n++)
725
+ HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); /* assign value within rank, symbol order */
726
+ CTable[0] = maxNbBits;
727
+ }
728
+
729
+ size_t
730
+ HUF_buildCTable_wksp(HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits,
731
+ void* workSpace, size_t wkspSize)
732
+ {
733
+ HUF_buildCTable_wksp_tables* const wksp_tables =
734
+ (HUF_buildCTable_wksp_tables*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(U32));
735
+ nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
736
+ nodeElt* const huffNode = huffNode0+1;
737
+ int nonNullRank;
738
+
739
+ HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE == sizeof(HUF_buildCTable_wksp_tables));
740
+
741
+ DEBUGLOG(5, "HUF_buildCTable_wksp (alphabet size = %u)", maxSymbolValue+1);
742
+
743
+ /* safety checks */
744
+ if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
745
+ return ERROR(workSpace_tooSmall);
746
+ if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
747
+ if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
748
+ return ERROR(maxSymbolValue_tooLarge);
749
+ ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable));
750
+
751
+ /* sort, decreasing order */
752
+ HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition);
753
+ DEBUGLOG(6, "sorted symbols completed (%zu symbols)", showHNodeSymbols(huffNode, maxSymbolValue+1));
754
+
755
+ /* build tree */
756
+ nonNullRank = HUF_buildTree(huffNode, maxSymbolValue);
757
+
758
+ /* determine and enforce maxTableLog */
759
+ maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);
760
+ if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */
761
+
762
+ HUF_buildCTableFromTree(CTable, huffNode, nonNullRank, maxSymbolValue, maxNbBits);
763
+
764
+ return maxNbBits;
420
765
  }
421
766
 
422
767
  size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
423
768
  {
769
+ HUF_CElt const* ct = CTable + 1;
424
770
  size_t nbBits = 0;
425
771
  int s;
426
772
  for (s = 0; s <= (int)maxSymbolValue; ++s) {
427
- nbBits += CTable[s].nbBits * count[s];
773
+ nbBits += HUF_getNbBits(ct[s]) * count[s];
428
774
  }
429
775
  return nbBits >> 3;
430
776
  }
431
777
 
432
778
  int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
779
+ HUF_CElt const* ct = CTable + 1;
433
780
  int bad = 0;
434
781
  int s;
435
782
  for (s = 0; s <= (int)maxSymbolValue; ++s) {
436
- bad |= (count[s] != 0) & (CTable[s].nbBits == 0);
783
+ bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0);
437
784
  }
438
785
  return !bad;
439
786
  }
440
787
 
441
788
  size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
442
789
 
790
+ /** HUF_CStream_t:
791
+ * Huffman uses its own BIT_CStream_t implementation.
792
+ * There are three major differences from BIT_CStream_t:
793
+ * 1. HUF_addBits() takes a HUF_CElt (size_t) which is
794
+ * the pair (nbBits, value) in the format:
795
+ * format:
796
+ * - Bits [0, 4) = nbBits
797
+ * - Bits [4, 64 - nbBits) = 0
798
+ * - Bits [64 - nbBits, 64) = value
799
+ * 2. The bitContainer is built from the upper bits and
800
+ * right shifted. E.g. to add a new value of N bits
801
+ * you right shift the bitContainer by N, then or in
802
+ * the new value into the N upper bits.
803
+ * 3. The bitstream has two bit containers. You can add
804
+ * bits to the second container and merge them into
805
+ * the first container.
806
+ */
807
+
808
+ #define HUF_BITS_IN_CONTAINER (sizeof(size_t) * 8)
809
+
810
+ typedef struct {
811
+ size_t bitContainer[2];
812
+ size_t bitPos[2];
813
+
814
+ BYTE* startPtr;
815
+ BYTE* ptr;
816
+ BYTE* endPtr;
817
+ } HUF_CStream_t;
818
+
819
+ /**! HUF_initCStream():
820
+ * Initializes the bitstream.
821
+ * @returns 0 or an error code.
822
+ */
823
+ static size_t HUF_initCStream(HUF_CStream_t* bitC,
824
+ void* startPtr, size_t dstCapacity)
825
+ {
826
+ ZSTD_memset(bitC, 0, sizeof(*bitC));
827
+ bitC->startPtr = (BYTE*)startPtr;
828
+ bitC->ptr = bitC->startPtr;
829
+ bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer[0]);
830
+ if (dstCapacity <= sizeof(bitC->bitContainer[0])) return ERROR(dstSize_tooSmall);
831
+ return 0;
832
+ }
833
+
834
+ /*! HUF_addBits():
835
+ * Adds the symbol stored in HUF_CElt elt to the bitstream.
836
+ *
837
+ * @param elt The element we're adding. This is a (nbBits, value) pair.
838
+ * See the HUF_CStream_t docs for the format.
839
+ * @param idx Insert into the bitstream at this idx.
840
+ * @param kFast This is a template parameter. If the bitstream is guaranteed
841
+ * to have at least 4 unused bits after this call it may be 1,
842
+ * otherwise it must be 0. HUF_addBits() is faster when fast is set.
843
+ */
844
+ FORCE_INLINE_TEMPLATE void HUF_addBits(HUF_CStream_t* bitC, HUF_CElt elt, int idx, int kFast)
845
+ {
846
+ assert(idx <= 1);
847
+ assert(HUF_getNbBits(elt) <= HUF_TABLELOG_ABSOLUTEMAX);
848
+ /* This is efficient on x86-64 with BMI2 because shrx
849
+ * only reads the low 6 bits of the register. The compiler
850
+ * knows this and elides the mask. When fast is set,
851
+ * every operation can use the same value loaded from elt.
852
+ */
853
+ bitC->bitContainer[idx] >>= HUF_getNbBits(elt);
854
+ bitC->bitContainer[idx] |= kFast ? HUF_getValueFast(elt) : HUF_getValue(elt);
855
+ /* We only read the low 8 bits of bitC->bitPos[idx] so it
856
+ * doesn't matter that the high bits have noise from the value.
857
+ */
858
+ bitC->bitPos[idx] += HUF_getNbBitsFast(elt);
859
+ assert((bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER);
860
+ /* The last 4-bits of elt are dirty if fast is set,
861
+ * so we must not be overwriting bits that have already been
862
+ * inserted into the bit container.
863
+ */
864
+ #if DEBUGLEVEL >= 1
865
+ {
866
+ size_t const nbBits = HUF_getNbBits(elt);
867
+ size_t const dirtyBits = nbBits == 0 ? 0 : ZSTD_highbit32((U32)nbBits) + 1;
868
+ (void)dirtyBits;
869
+ /* Middle bits are 0. */
870
+ assert(((elt >> dirtyBits) << (dirtyBits + nbBits)) == 0);
871
+ /* We didn't overwrite any bits in the bit container. */
872
+ assert(!kFast || (bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER);
873
+ (void)dirtyBits;
874
+ }
875
+ #endif
876
+ }
877
+
878
+ FORCE_INLINE_TEMPLATE void HUF_zeroIndex1(HUF_CStream_t* bitC)
879
+ {
880
+ bitC->bitContainer[1] = 0;
881
+ bitC->bitPos[1] = 0;
882
+ }
883
+
884
+ /*! HUF_mergeIndex1() :
885
+ * Merges the bit container @ index 1 into the bit container @ index 0
886
+ * and zeros the bit container @ index 1.
887
+ */
888
+ FORCE_INLINE_TEMPLATE void HUF_mergeIndex1(HUF_CStream_t* bitC)
889
+ {
890
+ assert((bitC->bitPos[1] & 0xFF) < HUF_BITS_IN_CONTAINER);
891
+ bitC->bitContainer[0] >>= (bitC->bitPos[1] & 0xFF);
892
+ bitC->bitContainer[0] |= bitC->bitContainer[1];
893
+ bitC->bitPos[0] += bitC->bitPos[1];
894
+ assert((bitC->bitPos[0] & 0xFF) <= HUF_BITS_IN_CONTAINER);
895
+ }
896
+
897
+ /*! HUF_flushBits() :
898
+ * Flushes the bits in the bit container @ index 0.
899
+ *
900
+ * @post bitPos will be < 8.
901
+ * @param kFast If kFast is set then we must know a-priori that
902
+ * the bit container will not overflow.
903
+ */
904
+ FORCE_INLINE_TEMPLATE void HUF_flushBits(HUF_CStream_t* bitC, int kFast)
905
+ {
906
+ /* The upper bits of bitPos are noisy, so we must mask by 0xFF. */
907
+ size_t const nbBits = bitC->bitPos[0] & 0xFF;
908
+ size_t const nbBytes = nbBits >> 3;
909
+ /* The top nbBits bits of bitContainer are the ones we need. */
910
+ size_t const bitContainer = bitC->bitContainer[0] >> (HUF_BITS_IN_CONTAINER - nbBits);
911
+ /* Mask bitPos to account for the bytes we consumed. */
912
+ bitC->bitPos[0] &= 7;
913
+ assert(nbBits > 0);
914
+ assert(nbBits <= sizeof(bitC->bitContainer[0]) * 8);
915
+ assert(bitC->ptr <= bitC->endPtr);
916
+ MEM_writeLEST(bitC->ptr, bitContainer);
917
+ bitC->ptr += nbBytes;
918
+ assert(!kFast || bitC->ptr <= bitC->endPtr);
919
+ if (!kFast && bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
920
+ /* bitContainer doesn't need to be modified because the leftover
921
+ * bits are already the top bitPos bits. And we don't care about
922
+ * noise in the lower values.
923
+ */
924
+ }
925
+
926
+ /*! HUF_endMark()
927
+ * @returns The Huffman stream end mark: A 1-bit value = 1.
928
+ */
929
+ static HUF_CElt HUF_endMark(void)
930
+ {
931
+ HUF_CElt endMark;
932
+ HUF_setNbBits(&endMark, 1);
933
+ HUF_setValue(&endMark, 1);
934
+ return endMark;
935
+ }
936
+
937
+ /*! HUF_closeCStream() :
938
+ * @return Size of CStream, in bytes,
939
+ * or 0 if it could not fit into dstBuffer */
940
+ static size_t HUF_closeCStream(HUF_CStream_t* bitC)
941
+ {
942
+ HUF_addBits(bitC, HUF_endMark(), /* idx */ 0, /* kFast */ 0);
943
+ HUF_flushBits(bitC, /* kFast */ 0);
944
+ {
945
+ size_t const nbBits = bitC->bitPos[0] & 0xFF;
946
+ if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
947
+ return (size_t)(bitC->ptr - bitC->startPtr) + (nbBits > 0);
948
+ }
949
+ }
950
+
443
951
  FORCE_INLINE_TEMPLATE void
444
- HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
952
+ HUF_encodeSymbol(HUF_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable, int idx, int fast)
445
953
  {
446
- BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
954
+ HUF_addBits(bitCPtr, CTable[symbol], idx, fast);
447
955
  }
448
956
 
449
- #define HUF_FLUSHBITS(s) BIT_flushBits(s)
957
+ FORCE_INLINE_TEMPLATE void
958
+ HUF_compress1X_usingCTable_internal_body_loop(HUF_CStream_t* bitC,
959
+ const BYTE* ip, size_t srcSize,
960
+ const HUF_CElt* ct,
961
+ int kUnroll, int kFastFlush, int kLastFast)
962
+ {
963
+ /* Join to kUnroll */
964
+ int n = (int)srcSize;
965
+ int rem = n % kUnroll;
966
+ if (rem > 0) {
967
+ for (; rem > 0; --rem) {
968
+ HUF_encodeSymbol(bitC, ip[--n], ct, 0, /* fast */ 0);
969
+ }
970
+ HUF_flushBits(bitC, kFastFlush);
971
+ }
972
+ assert(n % kUnroll == 0);
973
+
974
+ /* Join to 2 * kUnroll */
975
+ if (n % (2 * kUnroll)) {
976
+ int u;
977
+ for (u = 1; u < kUnroll; ++u) {
978
+ HUF_encodeSymbol(bitC, ip[n - u], ct, 0, 1);
979
+ }
980
+ HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, 0, kLastFast);
981
+ HUF_flushBits(bitC, kFastFlush);
982
+ n -= kUnroll;
983
+ }
984
+ assert(n % (2 * kUnroll) == 0);
450
985
 
451
- #define HUF_FLUSHBITS_1(stream) \
452
- if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream)
986
+ for (; n>0; n-= 2 * kUnroll) {
987
+ /* Encode kUnroll symbols into the bitstream @ index 0. */
988
+ int u;
989
+ for (u = 1; u < kUnroll; ++u) {
990
+ HUF_encodeSymbol(bitC, ip[n - u], ct, /* idx */ 0, /* fast */ 1);
991
+ }
992
+ HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, /* idx */ 0, /* fast */ kLastFast);
993
+ HUF_flushBits(bitC, kFastFlush);
994
+ /* Encode kUnroll symbols into the bitstream @ index 1.
995
+ * This allows us to start filling the bit container
996
+ * without any data dependencies.
997
+ */
998
+ HUF_zeroIndex1(bitC);
999
+ for (u = 1; u < kUnroll; ++u) {
1000
+ HUF_encodeSymbol(bitC, ip[n - kUnroll - u], ct, /* idx */ 1, /* fast */ 1);
1001
+ }
1002
+ HUF_encodeSymbol(bitC, ip[n - kUnroll - kUnroll], ct, /* idx */ 1, /* fast */ kLastFast);
1003
+ /* Merge bitstream @ index 1 into the bitstream @ index 0 */
1004
+ HUF_mergeIndex1(bitC);
1005
+ HUF_flushBits(bitC, kFastFlush);
1006
+ }
1007
+ assert(n == 0);
1008
+
1009
+ }
1010
+
1011
+ /**
1012
+ * Returns a tight upper bound on the output space needed by Huffman
1013
+ * with 8 bytes buffer to handle over-writes. If the output is at least
1014
+ * this large we don't need to do bounds checks during Huffman encoding.
1015
+ */
1016
+ static size_t HUF_tightCompressBound(size_t srcSize, size_t tableLog)
1017
+ {
1018
+ return ((srcSize * tableLog) >> 3) + 8;
1019
+ }
453
1020
 
454
- #define HUF_FLUSHBITS_2(stream) \
455
- if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream)
456
1021
 
457
1022
  FORCE_INLINE_TEMPLATE size_t
458
1023
  HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
459
1024
  const void* src, size_t srcSize,
460
1025
  const HUF_CElt* CTable)
461
1026
  {
1027
+ U32 const tableLog = (U32)CTable[0];
1028
+ HUF_CElt const* ct = CTable + 1;
462
1029
  const BYTE* ip = (const BYTE*) src;
463
1030
  BYTE* const ostart = (BYTE*)dst;
464
1031
  BYTE* const oend = ostart + dstSize;
465
1032
  BYTE* op = ostart;
466
- size_t n;
467
- BIT_CStream_t bitC;
1033
+ HUF_CStream_t bitC;
468
1034
 
469
1035
  /* init */
470
1036
  if (dstSize < 8) return 0; /* not enough space to compress */
471
- { size_t const initErr = BIT_initCStream(&bitC, op, (size_t)(oend-op));
1037
+ { size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op));
472
1038
  if (HUF_isError(initErr)) return 0; }
473
1039
 
474
- n = srcSize & ~3; /* join to mod 4 */
475
- switch (srcSize & 3)
476
- {
477
- case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable);
478
- HUF_FLUSHBITS_2(&bitC);
479
- /* fall-through */
480
- case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable);
481
- HUF_FLUSHBITS_1(&bitC);
482
- /* fall-through */
483
- case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable);
484
- HUF_FLUSHBITS(&bitC);
485
- /* fall-through */
486
- case 0 : /* fall-through */
487
- default: break;
488
- }
489
-
490
- for (; n>0; n-=4) { /* note : n&3==0 at this stage */
491
- HUF_encodeSymbol(&bitC, ip[n- 1], CTable);
492
- HUF_FLUSHBITS_1(&bitC);
493
- HUF_encodeSymbol(&bitC, ip[n- 2], CTable);
494
- HUF_FLUSHBITS_2(&bitC);
495
- HUF_encodeSymbol(&bitC, ip[n- 3], CTable);
496
- HUF_FLUSHBITS_1(&bitC);
497
- HUF_encodeSymbol(&bitC, ip[n- 4], CTable);
498
- HUF_FLUSHBITS(&bitC);
1040
+ if (dstSize < HUF_tightCompressBound(srcSize, (size_t)tableLog) || tableLog > 11)
1041
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ MEM_32bits() ? 2 : 4, /* kFast */ 0, /* kLastFast */ 0);
1042
+ else {
1043
+ if (MEM_32bits()) {
1044
+ switch (tableLog) {
1045
+ case 11:
1046
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 0);
1047
+ break;
1048
+ case 10: ZSTD_FALLTHROUGH;
1049
+ case 9: ZSTD_FALLTHROUGH;
1050
+ case 8:
1051
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 1);
1052
+ break;
1053
+ case 7: ZSTD_FALLTHROUGH;
1054
+ default:
1055
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 3, /* kFastFlush */ 1, /* kLastFast */ 1);
1056
+ break;
1057
+ }
1058
+ } else {
1059
+ switch (tableLog) {
1060
+ case 11:
1061
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 0);
1062
+ break;
1063
+ case 10:
1064
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 1);
1065
+ break;
1066
+ case 9:
1067
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 6, /* kFastFlush */ 1, /* kLastFast */ 0);
1068
+ break;
1069
+ case 8:
1070
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 7, /* kFastFlush */ 1, /* kLastFast */ 0);
1071
+ break;
1072
+ case 7:
1073
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 8, /* kFastFlush */ 1, /* kLastFast */ 0);
1074
+ break;
1075
+ case 6: ZSTD_FALLTHROUGH;
1076
+ default:
1077
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 9, /* kFastFlush */ 1, /* kLastFast */ 1);
1078
+ break;
1079
+ }
1080
+ }
499
1081
  }
1082
+ assert(bitC.ptr <= bitC.endPtr);
500
1083
 
501
- return BIT_closeCStream(&bitC);
1084
+ return HUF_closeCStream(&bitC);
502
1085
  }
503
1086
 
504
1087
  #if DYNAMIC_BMI2
505
1088
 
506
- static TARGET_ATTRIBUTE("bmi2") size_t
1089
+ static BMI2_TARGET_ATTRIBUTE size_t
507
1090
  HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,
508
1091
  const void* src, size_t srcSize,
509
1092
  const HUF_CElt* CTable)
@@ -522,9 +1105,9 @@ HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize,
522
1105
  static size_t
523
1106
  HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
524
1107
  const void* src, size_t srcSize,
525
- const HUF_CElt* CTable, const int bmi2)
1108
+ const HUF_CElt* CTable, const int flags)
526
1109
  {
527
- if (bmi2) {
1110
+ if (flags & HUF_flags_bmi2) {
528
1111
  return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable);
529
1112
  }
530
1113
  return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable);
@@ -535,24 +1118,23 @@ HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
535
1118
  static size_t
536
1119
  HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
537
1120
  const void* src, size_t srcSize,
538
- const HUF_CElt* CTable, const int bmi2)
1121
+ const HUF_CElt* CTable, const int flags)
539
1122
  {
540
- (void)bmi2;
1123
+ (void)flags;
541
1124
  return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
542
1125
  }
543
1126
 
544
1127
  #endif
545
1128
 
546
- size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
1129
+ size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags)
547
1130
  {
548
- return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
1131
+ return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags);
549
1132
  }
550
1133
 
551
-
552
1134
  static size_t
553
1135
  HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
554
1136
  const void* src, size_t srcSize,
555
- const HUF_CElt* CTable, int bmi2)
1137
+ const HUF_CElt* CTable, int flags)
556
1138
  {
557
1139
  size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */
558
1140
  const BYTE* ip = (const BYTE*) src;
@@ -566,27 +1148,24 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
566
1148
  op += 6; /* jumpTable */
567
1149
 
568
1150
  assert(op <= oend);
569
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
570
- if (cSize==0) return 0;
571
- assert(cSize <= 65535);
1151
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
1152
+ if (cSize == 0 || cSize > 65535) return 0;
572
1153
  MEM_writeLE16(ostart, (U16)cSize);
573
1154
  op += cSize;
574
1155
  }
575
1156
 
576
1157
  ip += segmentSize;
577
1158
  assert(op <= oend);
578
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
579
- if (cSize==0) return 0;
580
- assert(cSize <= 65535);
1159
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
1160
+ if (cSize == 0 || cSize > 65535) return 0;
581
1161
  MEM_writeLE16(ostart+2, (U16)cSize);
582
1162
  op += cSize;
583
1163
  }
584
1164
 
585
1165
  ip += segmentSize;
586
1166
  assert(op <= oend);
587
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
588
- if (cSize==0) return 0;
589
- assert(cSize <= 65535);
1167
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
1168
+ if (cSize == 0 || cSize > 65535) return 0;
590
1169
  MEM_writeLE16(ostart+4, (U16)cSize);
591
1170
  op += cSize;
592
1171
  }
@@ -594,17 +1173,17 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
594
1173
  ip += segmentSize;
595
1174
  assert(op <= oend);
596
1175
  assert(ip <= iend);
597
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) );
598
- if (cSize==0) return 0;
1176
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, flags) );
1177
+ if (cSize == 0 || cSize > 65535) return 0;
599
1178
  op += cSize;
600
1179
  }
601
1180
 
602
1181
  return (size_t)(op-ostart);
603
1182
  }
604
1183
 
605
- size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
1184
+ size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags)
606
1185
  {
607
- return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
1186
+ return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags);
608
1187
  }
609
1188
 
610
1189
  typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
@@ -612,11 +1191,11 @@ typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
612
1191
  static size_t HUF_compressCTable_internal(
613
1192
  BYTE* const ostart, BYTE* op, BYTE* const oend,
614
1193
  const void* src, size_t srcSize,
615
- HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2)
1194
+ HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int flags)
616
1195
  {
617
1196
  size_t const cSize = (nbStreams==HUF_singleStream) ?
618
- HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2) :
619
- HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2);
1197
+ HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags) :
1198
+ HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags);
620
1199
  if (HUF_isError(cSize)) { return cSize; }
621
1200
  if (cSize==0) { return 0; } /* uncompressible */
622
1201
  op += cSize;
@@ -628,31 +1207,111 @@ static size_t HUF_compressCTable_internal(
628
1207
 
629
1208
  typedef struct {
630
1209
  unsigned count[HUF_SYMBOLVALUE_MAX + 1];
631
- HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1];
632
- HUF_buildCTable_wksp_tables buildCTable_wksp;
1210
+ HUF_CElt CTable[HUF_CTABLE_SIZE_ST(HUF_SYMBOLVALUE_MAX)];
1211
+ union {
1212
+ HUF_buildCTable_wksp_tables buildCTable_wksp;
1213
+ HUF_WriteCTableWksp writeCTable_wksp;
1214
+ U32 hist_wksp[HIST_WKSP_SIZE_U32];
1215
+ } wksps;
633
1216
  } HUF_compress_tables_t;
634
1217
 
1218
+ #define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE 4096
1219
+ #define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO 10 /* Must be >= 2 */
1220
+
1221
+ unsigned HUF_cardinality(const unsigned* count, unsigned maxSymbolValue)
1222
+ {
1223
+ unsigned cardinality = 0;
1224
+ unsigned i;
1225
+
1226
+ for (i = 0; i < maxSymbolValue + 1; i++) {
1227
+ if (count[i] != 0) cardinality += 1;
1228
+ }
1229
+
1230
+ return cardinality;
1231
+ }
1232
+
1233
+ unsigned HUF_minTableLog(unsigned symbolCardinality)
1234
+ {
1235
+ U32 minBitsSymbols = ZSTD_highbit32(symbolCardinality) + 1;
1236
+ return minBitsSymbols;
1237
+ }
1238
+
1239
+ unsigned HUF_optimalTableLog(
1240
+ unsigned maxTableLog,
1241
+ size_t srcSize,
1242
+ unsigned maxSymbolValue,
1243
+ void* workSpace, size_t wkspSize,
1244
+ HUF_CElt* table,
1245
+ const unsigned* count,
1246
+ int flags)
1247
+ {
1248
+ assert(srcSize > 1); /* Not supported, RLE should be used instead */
1249
+ assert(wkspSize >= sizeof(HUF_buildCTable_wksp_tables));
1250
+
1251
+ if (!(flags & HUF_flags_optimalDepth)) {
1252
+ /* cheap evaluation, based on FSE */
1253
+ return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
1254
+ }
1255
+
1256
+ { BYTE* dst = (BYTE*)workSpace + sizeof(HUF_WriteCTableWksp);
1257
+ size_t dstSize = wkspSize - sizeof(HUF_WriteCTableWksp);
1258
+ size_t maxBits, hSize, newSize;
1259
+ const unsigned symbolCardinality = HUF_cardinality(count, maxSymbolValue);
1260
+ const unsigned minTableLog = HUF_minTableLog(symbolCardinality);
1261
+ size_t optSize = ((size_t) ~0) - 1;
1262
+ unsigned optLog = maxTableLog, optLogGuess;
1263
+
1264
+ DEBUGLOG(6, "HUF_optimalTableLog: probing huf depth (srcSize=%zu)", srcSize);
1265
+
1266
+ /* Search until size increases */
1267
+ for (optLogGuess = minTableLog; optLogGuess <= maxTableLog; optLogGuess++) {
1268
+ DEBUGLOG(7, "checking for huffLog=%u", optLogGuess);
1269
+ maxBits = HUF_buildCTable_wksp(table, count, maxSymbolValue, optLogGuess, workSpace, wkspSize);
1270
+ if (ERR_isError(maxBits)) continue;
1271
+
1272
+ if (maxBits < optLogGuess && optLogGuess > minTableLog) break;
1273
+
1274
+ hSize = HUF_writeCTable_wksp(dst, dstSize, table, maxSymbolValue, (U32)maxBits, workSpace, wkspSize);
1275
+
1276
+ if (ERR_isError(hSize)) continue;
1277
+
1278
+ newSize = HUF_estimateCompressedSize(table, count, maxSymbolValue) + hSize;
1279
+
1280
+ if (newSize > optSize + 1) {
1281
+ break;
1282
+ }
1283
+
1284
+ if (newSize < optSize) {
1285
+ optSize = newSize;
1286
+ optLog = optLogGuess;
1287
+ }
1288
+ }
1289
+ assert(optLog <= HUF_TABLELOG_MAX);
1290
+ return optLog;
1291
+ }
1292
+ }
1293
+
635
1294
  /* HUF_compress_internal() :
636
- * `workSpace` must a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
1295
+ * `workSpace_align4` must be aligned on 4-bytes boundaries,
1296
+ * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */
637
1297
  static size_t
638
1298
  HUF_compress_internal (void* dst, size_t dstSize,
639
1299
  const void* src, size_t srcSize,
640
1300
  unsigned maxSymbolValue, unsigned huffLog,
641
1301
  HUF_nbStreams_e nbStreams,
642
1302
  void* workSpace, size_t wkspSize,
643
- HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
644
- const int bmi2)
1303
+ HUF_CElt* oldHufTable, HUF_repeat* repeat, int flags)
645
1304
  {
646
- HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace;
1305
+ HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(size_t));
647
1306
  BYTE* const ostart = (BYTE*)dst;
648
1307
  BYTE* const oend = ostart + dstSize;
649
1308
  BYTE* op = ostart;
650
1309
 
651
- HUF_STATIC_ASSERT(sizeof(*table) <= HUF_WORKSPACE_SIZE);
1310
+ DEBUGLOG(5, "HUF_compress_internal (srcSize=%zu)", srcSize);
1311
+ HUF_STATIC_ASSERT(sizeof(*table) + HUF_WORKSPACE_MAX_ALIGNMENT <= HUF_WORKSPACE_SIZE);
652
1312
 
653
1313
  /* checks & inits */
654
- if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
655
- if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall);
1314
+ if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall);
656
1315
  if (!srcSize) return 0; /* Uncompressed */
657
1316
  if (!dstSize) return 0; /* cannot fit anything within dst budget */
658
1317
  if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */
@@ -662,17 +1321,34 @@ HUF_compress_internal (void* dst, size_t dstSize,
662
1321
  if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
663
1322
 
664
1323
  /* Heuristic : If old table is valid, use it for small inputs */
665
- if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
1324
+ if ((flags & HUF_flags_preferRepeat) && repeat && *repeat == HUF_repeat_valid) {
666
1325
  return HUF_compressCTable_internal(ostart, op, oend,
667
1326
  src, srcSize,
668
- nbStreams, oldHufTable, bmi2);
1327
+ nbStreams, oldHufTable, flags);
1328
+ }
1329
+
1330
+ /* If uncompressible data is suspected, do a smaller sampling first */
1331
+ DEBUG_STATIC_ASSERT(SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO >= 2);
1332
+ if ((flags & HUF_flags_suspectUncompressible) && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) {
1333
+ size_t largestTotal = 0;
1334
+ DEBUGLOG(5, "input suspected incompressible : sampling to check");
1335
+ { unsigned maxSymbolValueBegin = maxSymbolValue;
1336
+ CHECK_V_F(largestBegin, HIST_count_simple (table->count, &maxSymbolValueBegin, (const BYTE*)src, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );
1337
+ largestTotal += largestBegin;
1338
+ }
1339
+ { unsigned maxSymbolValueEnd = maxSymbolValue;
1340
+ CHECK_V_F(largestEnd, HIST_count_simple (table->count, &maxSymbolValueEnd, (const BYTE*)src + srcSize - SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );
1341
+ largestTotal += largestEnd;
1342
+ }
1343
+ if (largestTotal <= ((2 * SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) >> 7)+4) return 0; /* heuristic : probably not compressible enough */
669
1344
  }
670
1345
 
671
1346
  /* Scan input and build symbol stats */
672
- { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace, wkspSize) );
1347
+ { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->wksps.hist_wksp, sizeof(table->wksps.hist_wksp)) );
673
1348
  if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
674
1349
  if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */
675
1350
  }
1351
+ DEBUGLOG(6, "histogram detail completed (%zu symbols)", showU32(table->count, maxSymbolValue+1));
676
1352
 
677
1353
  /* Check validity of previous table */
678
1354
  if ( repeat
@@ -681,26 +1357,31 @@ HUF_compress_internal (void* dst, size_t dstSize,
681
1357
  *repeat = HUF_repeat_none;
682
1358
  }
683
1359
  /* Heuristic : use existing table for small inputs */
684
- if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
1360
+ if ((flags & HUF_flags_preferRepeat) && repeat && *repeat != HUF_repeat_none) {
685
1361
  return HUF_compressCTable_internal(ostart, op, oend,
686
1362
  src, srcSize,
687
- nbStreams, oldHufTable, bmi2);
1363
+ nbStreams, oldHufTable, flags);
688
1364
  }
689
1365
 
690
1366
  /* Build Huffman Tree */
691
- huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
1367
+ huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue, &table->wksps, sizeof(table->wksps), table->CTable, table->count, flags);
692
1368
  { size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
693
1369
  maxSymbolValue, huffLog,
694
- &table->buildCTable_wksp, sizeof(table->buildCTable_wksp));
1370
+ &table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp));
695
1371
  CHECK_F(maxBits);
696
1372
  huffLog = (U32)maxBits;
697
- /* Zero unused symbols in CTable, so we can check it for validity */
698
- memset(table->CTable + (maxSymbolValue + 1), 0,
699
- sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt)));
1373
+ DEBUGLOG(6, "bit distribution completed (%zu symbols)", showCTableBits(table->CTable + 1, maxSymbolValue+1));
1374
+ }
1375
+ /* Zero unused symbols in CTable, so we can check it for validity */
1376
+ {
1377
+ size_t const ctableSize = HUF_CTABLE_SIZE_ST(maxSymbolValue);
1378
+ size_t const unusedSize = sizeof(table->CTable) - ctableSize * sizeof(HUF_CElt);
1379
+ ZSTD_memset(table->CTable + ctableSize, 0, unusedSize);
700
1380
  }
701
1381
 
702
1382
  /* Write table description header */
703
- { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) );
1383
+ { CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog,
1384
+ &table->wksps.writeCTable_wksp, sizeof(table->wksps.writeCTable_wksp)) );
704
1385
  /* Check if using previous huffman table is beneficial */
705
1386
  if (repeat && *repeat != HUF_repeat_none) {
706
1387
  size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue);
@@ -708,7 +1389,7 @@ HUF_compress_internal (void* dst, size_t dstSize,
708
1389
  if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
709
1390
  return HUF_compressCTable_internal(ostart, op, oend,
710
1391
  src, srcSize,
711
- nbStreams, oldHufTable, bmi2);
1392
+ nbStreams, oldHufTable, flags);
712
1393
  } }
713
1394
 
714
1395
  /* Use the new huffman table */
@@ -716,83 +1397,39 @@ HUF_compress_internal (void* dst, size_t dstSize,
716
1397
  op += hSize;
717
1398
  if (repeat) { *repeat = HUF_repeat_none; }
718
1399
  if (oldHufTable)
719
- memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */
1400
+ ZSTD_memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */
720
1401
  }
721
1402
  return HUF_compressCTable_internal(ostart, op, oend,
722
1403
  src, srcSize,
723
- nbStreams, table->CTable, bmi2);
724
- }
725
-
726
-
727
- size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
728
- const void* src, size_t srcSize,
729
- unsigned maxSymbolValue, unsigned huffLog,
730
- void* workSpace, size_t wkspSize)
731
- {
732
- return HUF_compress_internal(dst, dstSize, src, srcSize,
733
- maxSymbolValue, huffLog, HUF_singleStream,
734
- workSpace, wkspSize,
735
- NULL, NULL, 0, 0 /*bmi2*/);
1404
+ nbStreams, table->CTable, flags);
736
1405
  }
737
1406
 
738
1407
  size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
739
1408
  const void* src, size_t srcSize,
740
1409
  unsigned maxSymbolValue, unsigned huffLog,
741
1410
  void* workSpace, size_t wkspSize,
742
- HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
1411
+ HUF_CElt* hufTable, HUF_repeat* repeat, int flags)
743
1412
  {
1413
+ DEBUGLOG(5, "HUF_compress1X_repeat (srcSize = %zu)", srcSize);
744
1414
  return HUF_compress_internal(dst, dstSize, src, srcSize,
745
1415
  maxSymbolValue, huffLog, HUF_singleStream,
746
1416
  workSpace, wkspSize, hufTable,
747
- repeat, preferRepeat, bmi2);
748
- }
749
-
750
- size_t HUF_compress1X (void* dst, size_t dstSize,
751
- const void* src, size_t srcSize,
752
- unsigned maxSymbolValue, unsigned huffLog)
753
- {
754
- unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
755
- return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
756
- }
757
-
758
- /* HUF_compress4X_repeat():
759
- * compress input using 4 streams.
760
- * provide workspace to generate compression tables */
761
- size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
762
- const void* src, size_t srcSize,
763
- unsigned maxSymbolValue, unsigned huffLog,
764
- void* workSpace, size_t wkspSize)
765
- {
766
- return HUF_compress_internal(dst, dstSize, src, srcSize,
767
- maxSymbolValue, huffLog, HUF_fourStreams,
768
- workSpace, wkspSize,
769
- NULL, NULL, 0, 0 /*bmi2*/);
1417
+ repeat, flags);
770
1418
  }
771
1419
 
772
1420
  /* HUF_compress4X_repeat():
773
1421
  * compress input using 4 streams.
1422
+ * consider skipping quickly
774
1423
  * re-use an existing huffman compression table */
775
1424
  size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
776
1425
  const void* src, size_t srcSize,
777
1426
  unsigned maxSymbolValue, unsigned huffLog,
778
1427
  void* workSpace, size_t wkspSize,
779
- HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
1428
+ HUF_CElt* hufTable, HUF_repeat* repeat, int flags)
780
1429
  {
1430
+ DEBUGLOG(5, "HUF_compress4X_repeat (srcSize = %zu)", srcSize);
781
1431
  return HUF_compress_internal(dst, dstSize, src, srcSize,
782
1432
  maxSymbolValue, huffLog, HUF_fourStreams,
783
1433
  workSpace, wkspSize,
784
- hufTable, repeat, preferRepeat, bmi2);
785
- }
786
-
787
- size_t HUF_compress2 (void* dst, size_t dstSize,
788
- const void* src, size_t srcSize,
789
- unsigned maxSymbolValue, unsigned huffLog)
790
- {
791
- unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
792
- return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
793
- }
794
-
795
- size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
796
- {
797
- return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT);
1434
+ hufTable, repeat, flags);
798
1435
  }