zstd-ruby 1.4.4.0 → 1.5.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/README.md +78 -5
  4. data/Rakefile +8 -2
  5. data/ext/zstdruby/common.h +15 -0
  6. data/ext/zstdruby/extconf.rb +3 -2
  7. data/ext/zstdruby/libzstd/common/allocations.h +55 -0
  8. data/ext/zstdruby/libzstd/common/bits.h +200 -0
  9. data/ext/zstdruby/libzstd/common/bitstream.h +74 -97
  10. data/ext/zstdruby/libzstd/common/compiler.h +219 -20
  11. data/ext/zstdruby/libzstd/common/cpu.h +1 -3
  12. data/ext/zstdruby/libzstd/common/debug.c +11 -31
  13. data/ext/zstdruby/libzstd/common/debug.h +22 -49
  14. data/ext/zstdruby/libzstd/common/entropy_common.c +184 -80
  15. data/ext/zstdruby/libzstd/common/error_private.c +11 -2
  16. data/ext/zstdruby/libzstd/common/error_private.h +87 -4
  17. data/ext/zstdruby/libzstd/common/fse.h +47 -116
  18. data/ext/zstdruby/libzstd/common/fse_decompress.c +127 -127
  19. data/ext/zstdruby/libzstd/common/huf.h +112 -197
  20. data/ext/zstdruby/libzstd/common/mem.h +124 -142
  21. data/ext/zstdruby/libzstd/common/pool.c +54 -27
  22. data/ext/zstdruby/libzstd/common/pool.h +11 -5
  23. data/ext/zstdruby/libzstd/common/portability_macros.h +156 -0
  24. data/ext/zstdruby/libzstd/common/threading.c +78 -22
  25. data/ext/zstdruby/libzstd/common/threading.h +9 -13
  26. data/ext/zstdruby/libzstd/common/xxhash.c +15 -873
  27. data/ext/zstdruby/libzstd/common/xxhash.h +5572 -191
  28. data/ext/zstdruby/libzstd/common/zstd_common.c +2 -37
  29. data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
  30. data/ext/zstdruby/libzstd/common/zstd_internal.h +186 -144
  31. data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
  32. data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
  33. data/ext/zstdruby/libzstd/compress/fse_compress.c +99 -196
  34. data/ext/zstdruby/libzstd/compress/hist.c +41 -63
  35. data/ext/zstdruby/libzstd/compress/hist.h +13 -33
  36. data/ext/zstdruby/libzstd/compress/huf_compress.c +968 -331
  37. data/ext/zstdruby/libzstd/compress/zstd_compress.c +4120 -1191
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +688 -159
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +121 -40
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +16 -6
  41. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +62 -35
  42. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +10 -3
  43. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +577 -0
  44. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
  45. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +322 -115
  46. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +394 -154
  47. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +4 -3
  48. data/ext/zstdruby/libzstd/compress/zstd_fast.c +729 -253
  49. data/ext/zstdruby/libzstd/compress/zstd_fast.h +4 -3
  50. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1289 -247
  51. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +61 -1
  52. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +339 -212
  53. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +15 -3
  54. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
  55. data/ext/zstdruby/libzstd/compress/zstd_opt.c +508 -282
  56. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  57. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +217 -466
  58. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +35 -114
  59. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1220 -572
  60. data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +576 -0
  61. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +23 -19
  62. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
  63. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +859 -273
  64. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1244 -375
  65. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +21 -7
  66. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +74 -11
  67. data/ext/zstdruby/libzstd/dictBuilder/cover.c +75 -54
  68. data/ext/zstdruby/libzstd/dictBuilder/cover.h +20 -9
  69. data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
  70. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +55 -36
  71. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +126 -110
  72. data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +248 -56
  73. data/ext/zstdruby/libzstd/zstd.h +1277 -306
  74. data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +29 -8
  75. data/ext/zstdruby/main.c +20 -0
  76. data/ext/zstdruby/skippable_frame.c +63 -0
  77. data/ext/zstdruby/streaming_compress.c +177 -0
  78. data/ext/zstdruby/streaming_compress.h +5 -0
  79. data/ext/zstdruby/streaming_decompress.c +123 -0
  80. data/ext/zstdruby/zstdruby.c +114 -32
  81. data/lib/zstd-ruby/version.rb +1 -1
  82. data/lib/zstd-ruby.rb +0 -1
  83. data/zstd-ruby.gemspec +1 -1
  84. metadata +24 -39
  85. data/.travis.yml +0 -14
  86. data/ext/zstdruby/libzstd/.gitignore +0 -3
  87. data/ext/zstdruby/libzstd/BUCK +0 -234
  88. data/ext/zstdruby/libzstd/Makefile +0 -289
  89. data/ext/zstdruby/libzstd/README.md +0 -159
  90. data/ext/zstdruby/libzstd/deprecated/zbuff.h +0 -214
  91. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +0 -26
  92. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +0 -147
  93. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +0 -75
  94. data/ext/zstdruby/libzstd/dll/example/Makefile +0 -47
  95. data/ext/zstdruby/libzstd/dll/example/README.md +0 -69
  96. data/ext/zstdruby/libzstd/dll/example/build_package.bat +0 -20
  97. data/ext/zstdruby/libzstd/dll/example/fullbench-dll.sln +0 -25
  98. data/ext/zstdruby/libzstd/dll/example/fullbench-dll.vcxproj +0 -181
  99. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +0 -415
  100. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +0 -2152
  101. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +0 -94
  102. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +0 -3514
  103. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +0 -93
  104. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +0 -3156
  105. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +0 -93
  106. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +0 -3641
  107. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +0 -142
  108. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +0 -4046
  109. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +0 -162
  110. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +0 -4150
  111. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +0 -172
  112. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +0 -4533
  113. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +0 -187
  114. data/ext/zstdruby/libzstd/libzstd.pc.in +0 -15
  115. data/ext/zstdruby/zstdruby.h +0 -6
@@ -1,35 +1,15 @@
1
1
  /* ******************************************************************
2
- Huffman encoder, part of New Generation Entropy library
3
- Copyright (C) 2013-2016, Yann Collet.
4
-
5
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
-
7
- Redistribution and use in source and binary forms, with or without
8
- modification, are permitted provided that the following conditions are
9
- met:
10
-
11
- * Redistributions of source code must retain the above copyright
12
- notice, this list of conditions and the following disclaimer.
13
- * Redistributions in binary form must reproduce the above
14
- copyright notice, this list of conditions and the following disclaimer
15
- in the documentation and/or other materials provided with the
16
- distribution.
17
-
18
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
-
30
- You can contact the author at :
31
- - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
32
- - Public forum : https://groups.google.com/forum/#!forum/lz4c
2
+ * Huffman encoder, part of New Generation Entropy library
3
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
4
+ *
5
+ * You can contact the author at :
6
+ * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
7
+ * - Public forum : https://groups.google.com/forum/#!forum/lz4c
8
+ *
9
+ * This source code is licensed under both the BSD-style license (found in the
10
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
11
+ * in the COPYING file in the root directory of this source tree).
12
+ * You may select, at your option, one of the above-listed licenses.
33
13
  ****************************************************************** */
34
14
 
35
15
  /* **************************************************************
@@ -43,16 +23,15 @@
43
23
  /* **************************************************************
44
24
  * Includes
45
25
  ****************************************************************/
46
- #include <string.h> /* memcpy, memset */
47
- #include <stdio.h> /* printf (debug) */
48
- #include "compiler.h"
49
- #include "bitstream.h"
26
+ #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */
27
+ #include "../common/compiler.h"
28
+ #include "../common/bitstream.h"
50
29
  #include "hist.h"
51
30
  #define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */
52
- #include "fse.h" /* header compression */
53
- #define HUF_STATIC_LINKING_ONLY
54
- #include "huf.h"
55
- #include "error_private.h"
31
+ #include "../common/fse.h" /* header compression */
32
+ #include "../common/huf.h"
33
+ #include "../common/error_private.h"
34
+ #include "../common/bits.h" /* ZSTD_highbit32 */
56
35
 
57
36
 
58
37
  /* **************************************************************
@@ -60,29 +39,114 @@
60
39
  ****************************************************************/
61
40
  #define HUF_isError ERR_isError
62
41
  #define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */
63
- #define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
64
- #define CHECK_F(f) { CHECK_V_F(_var_err__, f); }
65
42
 
66
43
 
67
44
  /* **************************************************************
68
- * Utils
45
+ * Required declarations
69
46
  ****************************************************************/
70
- unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
47
+ typedef struct nodeElt_s {
48
+ U32 count;
49
+ U16 parent;
50
+ BYTE byte;
51
+ BYTE nbBits;
52
+ } nodeElt;
53
+
54
+
55
+ /* **************************************************************
56
+ * Debug Traces
57
+ ****************************************************************/
58
+
59
+ #if DEBUGLEVEL >= 2
60
+
61
+ static size_t showU32(const U32* arr, size_t size)
71
62
  {
72
- return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
63
+ size_t u;
64
+ for (u=0; u<size; u++) {
65
+ RAWLOG(6, " %u", arr[u]); (void)arr;
66
+ }
67
+ RAWLOG(6, " \n");
68
+ return size;
73
69
  }
74
70
 
71
+ static size_t HUF_getNbBits(HUF_CElt elt);
72
+
73
+ static size_t showCTableBits(const HUF_CElt* ctable, size_t size)
74
+ {
75
+ size_t u;
76
+ for (u=0; u<size; u++) {
77
+ RAWLOG(6, " %zu", HUF_getNbBits(ctable[u])); (void)ctable;
78
+ }
79
+ RAWLOG(6, " \n");
80
+ return size;
81
+
82
+ }
83
+
84
+ static size_t showHNodeSymbols(const nodeElt* hnode, size_t size)
85
+ {
86
+ size_t u;
87
+ for (u=0; u<size; u++) {
88
+ RAWLOG(6, " %u", hnode[u].byte); (void)hnode;
89
+ }
90
+ RAWLOG(6, " \n");
91
+ return size;
92
+ }
93
+
94
+ static size_t showHNodeBits(const nodeElt* hnode, size_t size)
95
+ {
96
+ size_t u;
97
+ for (u=0; u<size; u++) {
98
+ RAWLOG(6, " %u", hnode[u].nbBits); (void)hnode;
99
+ }
100
+ RAWLOG(6, " \n");
101
+ return size;
102
+ }
103
+
104
+ #endif
105
+
75
106
 
76
107
  /* *******************************************************
77
108
  * HUF : Huffman block compression
78
109
  *********************************************************/
110
+ #define HUF_WORKSPACE_MAX_ALIGNMENT 8
111
+
112
+ static void* HUF_alignUpWorkspace(void* workspace, size_t* workspaceSizePtr, size_t align)
113
+ {
114
+ size_t const mask = align - 1;
115
+ size_t const rem = (size_t)workspace & mask;
116
+ size_t const add = (align - rem) & mask;
117
+ BYTE* const aligned = (BYTE*)workspace + add;
118
+ assert((align & (align - 1)) == 0); /* pow 2 */
119
+ assert(align <= HUF_WORKSPACE_MAX_ALIGNMENT);
120
+ if (*workspaceSizePtr >= add) {
121
+ assert(add < align);
122
+ assert(((size_t)aligned & mask) == 0);
123
+ *workspaceSizePtr -= add;
124
+ return aligned;
125
+ } else {
126
+ *workspaceSizePtr = 0;
127
+ return NULL;
128
+ }
129
+ }
130
+
131
+
79
132
  /* HUF_compressWeights() :
80
133
  * Same as FSE_compress(), but dedicated to huff0's weights compression.
81
134
  * The use case needs much less stack memory.
82
135
  * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
83
136
  */
84
137
  #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
85
- static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize)
138
+
139
+ typedef struct {
140
+ FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
141
+ U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)];
142
+ unsigned count[HUF_TABLELOG_MAX+1];
143
+ S16 norm[HUF_TABLELOG_MAX+1];
144
+ } HUF_CompressWeightsWksp;
145
+
146
+ static size_t
147
+ HUF_compressWeights(void* dst, size_t dstSize,
148
+ const void* weightTable, size_t wtSize,
149
+ void* workspace, size_t workspaceSize)
86
150
  {
87
151
  BYTE* const ostart = (BYTE*) dst;
88
152
  BYTE* op = ostart;
@@ -90,69 +154,103 @@ static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weight
90
154
 
91
155
  unsigned maxSymbolValue = HUF_TABLELOG_MAX;
92
156
  U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
157
+ HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32));
93
158
 
94
- FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
95
- BYTE scratchBuffer[1<<MAX_FSE_TABLELOG_FOR_HUFF_HEADER];
96
-
97
- unsigned count[HUF_TABLELOG_MAX+1];
98
- S16 norm[HUF_TABLELOG_MAX+1];
159
+ if (workspaceSize < sizeof(HUF_CompressWeightsWksp)) return ERROR(GENERIC);
99
160
 
100
161
  /* init conditions */
101
162
  if (wtSize <= 1) return 0; /* Not compressible */
102
163
 
103
164
  /* Scan input and build symbol stats */
104
- { unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize); /* never fails */
165
+ { unsigned const maxCount = HIST_count_simple(wksp->count, &maxSymbolValue, weightTable, wtSize); /* never fails */
105
166
  if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */
106
167
  if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
107
168
  }
108
169
 
109
170
  tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
110
- CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) );
171
+ CHECK_F( FSE_normalizeCount(wksp->norm, tableLog, wksp->count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) );
111
172
 
112
173
  /* Write table description header */
113
- { CHECK_V_F(hSize, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) );
174
+ { CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), wksp->norm, maxSymbolValue, tableLog) );
114
175
  op += hSize;
115
176
  }
116
177
 
117
178
  /* Compress */
118
- CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) );
119
- { CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, weightTable, wtSize, CTable) );
179
+ CHECK_F( FSE_buildCTable_wksp(wksp->CTable, wksp->norm, maxSymbolValue, tableLog, wksp->scratchBuffer, sizeof(wksp->scratchBuffer)) );
180
+ { CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, wksp->CTable) );
120
181
  if (cSize == 0) return 0; /* not enough space for compressed data */
121
182
  op += cSize;
122
183
  }
123
184
 
124
- return op-ostart;
185
+ return (size_t)(op-ostart);
186
+ }
187
+
188
+ static size_t HUF_getNbBits(HUF_CElt elt)
189
+ {
190
+ return elt & 0xFF;
191
+ }
192
+
193
+ static size_t HUF_getNbBitsFast(HUF_CElt elt)
194
+ {
195
+ return elt;
196
+ }
197
+
198
+ static size_t HUF_getValue(HUF_CElt elt)
199
+ {
200
+ return elt & ~(size_t)0xFF;
125
201
  }
126
202
 
203
+ static size_t HUF_getValueFast(HUF_CElt elt)
204
+ {
205
+ return elt;
206
+ }
127
207
 
128
- struct HUF_CElt_s {
129
- U16 val;
130
- BYTE nbBits;
131
- }; /* typedef'd to HUF_CElt within "huf.h" */
208
+ static void HUF_setNbBits(HUF_CElt* elt, size_t nbBits)
209
+ {
210
+ assert(nbBits <= HUF_TABLELOG_ABSOLUTEMAX);
211
+ *elt = nbBits;
212
+ }
132
213
 
133
- /*! HUF_writeCTable() :
134
- `CTable` : Huffman tree to save, using huf representation.
135
- @return : size of saved CTable */
136
- size_t HUF_writeCTable (void* dst, size_t maxDstSize,
137
- const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
214
+ static void HUF_setValue(HUF_CElt* elt, size_t value)
138
215
  {
216
+ size_t const nbBits = HUF_getNbBits(*elt);
217
+ if (nbBits > 0) {
218
+ assert((value >> nbBits) == 0);
219
+ *elt |= value << (sizeof(HUF_CElt) * 8 - nbBits);
220
+ }
221
+ }
222
+
223
+ typedef struct {
224
+ HUF_CompressWeightsWksp wksp;
139
225
  BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */
140
226
  BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
227
+ } HUF_WriteCTableWksp;
228
+
229
+ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
230
+ const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog,
231
+ void* workspace, size_t workspaceSize)
232
+ {
233
+ HUF_CElt const* const ct = CTable + 1;
141
234
  BYTE* op = (BYTE*)dst;
142
235
  U32 n;
236
+ HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32));
143
237
 
144
- /* check conditions */
238
+ HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE >= sizeof(HUF_WriteCTableWksp));
239
+
240
+ /* check conditions */
241
+ if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC);
145
242
  if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
146
243
 
147
244
  /* convert to weight */
148
- bitsToWeight[0] = 0;
245
+ wksp->bitsToWeight[0] = 0;
149
246
  for (n=1; n<huffLog+1; n++)
150
- bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
247
+ wksp->bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
151
248
  for (n=0; n<maxSymbolValue; n++)
152
- huffWeight[n] = bitsToWeight[CTable[n].nbBits];
249
+ wksp->huffWeight[n] = wksp->bitsToWeight[HUF_getNbBits(ct[n])];
153
250
 
154
251
  /* attempt weights compression by FSE */
155
- { CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, huffWeight, maxSymbolValue) );
252
+ if (maxDstSize < 1) return ERROR(dstSize_tooSmall);
253
+ { CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, wksp->huffWeight, maxSymbolValue, &wksp->wksp, sizeof(wksp->wksp)) );
156
254
  if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */
157
255
  op[0] = (BYTE)hSize;
158
256
  return hSize+1;
@@ -162,45 +260,49 @@ size_t HUF_writeCTable (void* dst, size_t maxDstSize,
162
260
  if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */
163
261
  if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */
164
262
  op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1));
165
- huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */
263
+ wksp->huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */
166
264
  for (n=0; n<maxSymbolValue; n+=2)
167
- op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
265
+ op[(n/2)+1] = (BYTE)((wksp->huffWeight[n] << 4) + wksp->huffWeight[n+1]);
168
266
  return ((maxSymbolValue+1)/2) + 1;
169
267
  }
170
268
 
171
269
 
172
- size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize)
270
+ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights)
173
271
  {
174
272
  BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; /* init not required, even though some static analyzer may complain */
175
273
  U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */
176
274
  U32 tableLog = 0;
177
275
  U32 nbSymbols = 0;
276
+ HUF_CElt* const ct = CTable + 1;
178
277
 
179
278
  /* get symbol weights */
180
279
  CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize));
280
+ *hasZeroWeights = (rankVal[0] > 0);
181
281
 
182
282
  /* check result */
183
283
  if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
184
284
  if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);
185
285
 
286
+ CTable[0] = tableLog;
287
+
186
288
  /* Prepare base value per rank */
187
289
  { U32 n, nextRankStart = 0;
188
290
  for (n=1; n<=tableLog; n++) {
189
- U32 current = nextRankStart;
291
+ U32 curr = nextRankStart;
190
292
  nextRankStart += (rankVal[n] << (n-1));
191
- rankVal[n] = current;
293
+ rankVal[n] = curr;
192
294
  } }
193
295
 
194
296
  /* fill nbBits */
195
297
  { U32 n; for (n=0; n<nbSymbols; n++) {
196
298
  const U32 w = huffWeight[n];
197
- CTable[n].nbBits = (BYTE)(tableLog + 1 - w);
299
+ HUF_setNbBits(ct + n, (BYTE)(tableLog + 1 - w) & -(w != 0));
198
300
  } }
199
301
 
200
302
  /* fill val */
201
303
  { U16 nbPerRank[HUF_TABLELOG_MAX+2] = {0}; /* support w=0=>n=tableLog+1 */
202
304
  U16 valPerRank[HUF_TABLELOG_MAX+2] = {0};
203
- { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; }
305
+ { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[HUF_getNbBits(ct[n])]++; }
204
306
  /* determine stating value per rank */
205
307
  valPerRank[tableLog+1] = 0; /* for w==0 */
206
308
  { U16 min = 0;
@@ -210,178 +312,371 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
210
312
  min >>= 1;
211
313
  } }
212
314
  /* assign value within rank, symbol order */
213
- { U32 n; for (n=0; n<nbSymbols; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; }
315
+ { U32 n; for (n=0; n<nbSymbols; n++) HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); }
214
316
  }
215
317
 
216
318
  *maxSymbolValuePtr = nbSymbols - 1;
217
319
  return readSize;
218
320
  }
219
321
 
220
- U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue)
322
+ U32 HUF_getNbBitsFromCTable(HUF_CElt const* CTable, U32 symbolValue)
221
323
  {
222
- const HUF_CElt* table = (const HUF_CElt*)symbolTable;
324
+ const HUF_CElt* const ct = CTable + 1;
223
325
  assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
224
- return table[symbolValue].nbBits;
326
+ return (U32)HUF_getNbBits(ct[symbolValue]);
225
327
  }
226
328
 
227
329
 
228
- typedef struct nodeElt_s {
229
- U32 count;
230
- U16 parent;
231
- BYTE byte;
232
- BYTE nbBits;
233
- } nodeElt;
234
-
235
- static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
330
+ /**
331
+ * HUF_setMaxHeight():
332
+ * Try to enforce @targetNbBits on the Huffman tree described in @huffNode.
333
+ *
334
+ * It attempts to convert all nodes with nbBits > @targetNbBits
335
+ * to employ @targetNbBits instead. Then it adjusts the tree
336
+ * so that it remains a valid canonical Huffman tree.
337
+ *
338
+ * @pre The sum of the ranks of each symbol == 2^largestBits,
339
+ * where largestBits == huffNode[lastNonNull].nbBits.
340
+ * @post The sum of the ranks of each symbol == 2^largestBits,
341
+ * where largestBits is the return value (expected <= targetNbBits).
342
+ *
343
+ * @param huffNode The Huffman tree modified in place to enforce targetNbBits.
344
+ * It's presumed sorted, from most frequent to rarest symbol.
345
+ * @param lastNonNull The symbol with the lowest count in the Huffman tree.
346
+ * @param targetNbBits The allowed number of bits, which the Huffman tree
347
+ * may not respect. After this function the Huffman tree will
348
+ * respect targetNbBits.
349
+ * @return The maximum number of bits of the Huffman tree after adjustment.
350
+ */
351
+ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 targetNbBits)
236
352
  {
237
353
  const U32 largestBits = huffNode[lastNonNull].nbBits;
238
- if (largestBits <= maxNbBits) return largestBits; /* early exit : no elt > maxNbBits */
354
+ /* early exit : no elt > targetNbBits, so the tree is already valid. */
355
+ if (largestBits <= targetNbBits) return largestBits;
356
+
357
+ DEBUGLOG(5, "HUF_setMaxHeight (targetNbBits = %u)", targetNbBits);
239
358
 
240
359
  /* there are several too large elements (at least >= 2) */
241
360
  { int totalCost = 0;
242
- const U32 baseCost = 1 << (largestBits - maxNbBits);
243
- U32 n = lastNonNull;
244
-
245
- while (huffNode[n].nbBits > maxNbBits) {
361
+ const U32 baseCost = 1 << (largestBits - targetNbBits);
362
+ int n = (int)lastNonNull;
363
+
364
+ /* Adjust any ranks > targetNbBits to targetNbBits.
365
+ * Compute totalCost, which is how far the sum of the ranks is
366
+ * we are over 2^largestBits after adjust the offending ranks.
367
+ */
368
+ while (huffNode[n].nbBits > targetNbBits) {
246
369
  totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
247
- huffNode[n].nbBits = (BYTE)maxNbBits;
248
- n --;
249
- } /* n stops at huffNode[n].nbBits <= maxNbBits */
250
- while (huffNode[n].nbBits == maxNbBits) n--; /* n end at index of smallest symbol using < maxNbBits */
370
+ huffNode[n].nbBits = (BYTE)targetNbBits;
371
+ n--;
372
+ }
373
+ /* n stops at huffNode[n].nbBits <= targetNbBits */
374
+ assert(huffNode[n].nbBits <= targetNbBits);
375
+ /* n end at index of smallest symbol using < targetNbBits */
376
+ while (huffNode[n].nbBits == targetNbBits) --n;
251
377
 
252
- /* renorm totalCost */
253
- totalCost >>= (largestBits - maxNbBits); /* note : totalCost is necessarily a multiple of baseCost */
378
+ /* renorm totalCost from 2^largestBits to 2^targetNbBits
379
+ * note : totalCost is necessarily a multiple of baseCost */
380
+ assert(((U32)totalCost & (baseCost - 1)) == 0);
381
+ totalCost >>= (largestBits - targetNbBits);
382
+ assert(totalCost > 0);
254
383
 
255
384
  /* repay normalized cost */
256
385
  { U32 const noSymbol = 0xF0F0F0F0;
257
386
  U32 rankLast[HUF_TABLELOG_MAX+2];
258
- int pos;
259
387
 
260
- /* Get pos of last (smallest) symbol per rank */
261
- memset(rankLast, 0xF0, sizeof(rankLast));
262
- { U32 currentNbBits = maxNbBits;
388
+ /* Get pos of last (smallest = lowest cum. count) symbol per rank */
389
+ ZSTD_memset(rankLast, 0xF0, sizeof(rankLast));
390
+ { U32 currentNbBits = targetNbBits;
391
+ int pos;
263
392
  for (pos=n ; pos >= 0; pos--) {
264
393
  if (huffNode[pos].nbBits >= currentNbBits) continue;
265
- currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */
266
- rankLast[maxNbBits-currentNbBits] = pos;
394
+ currentNbBits = huffNode[pos].nbBits; /* < targetNbBits */
395
+ rankLast[targetNbBits-currentNbBits] = (U32)pos;
267
396
  } }
268
397
 
269
398
  while (totalCost > 0) {
270
- U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1;
399
+ /* Try to reduce the next power of 2 above totalCost because we
400
+ * gain back half the rank.
401
+ */
402
+ U32 nBitsToDecrease = ZSTD_highbit32((U32)totalCost) + 1;
271
403
  for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
272
- U32 highPos = rankLast[nBitsToDecrease];
273
- U32 lowPos = rankLast[nBitsToDecrease-1];
404
+ U32 const highPos = rankLast[nBitsToDecrease];
405
+ U32 const lowPos = rankLast[nBitsToDecrease-1];
274
406
  if (highPos == noSymbol) continue;
407
+ /* Decrease highPos if no symbols of lowPos or if it is
408
+ * not cheaper to remove 2 lowPos than highPos.
409
+ */
275
410
  if (lowPos == noSymbol) break;
276
411
  { U32 const highTotal = huffNode[highPos].count;
277
412
  U32 const lowTotal = 2 * huffNode[lowPos].count;
278
413
  if (highTotal <= lowTotal) break;
279
414
  } }
280
415
  /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
416
+ assert(rankLast[nBitsToDecrease] != noSymbol || nBitsToDecrease == 1);
281
417
  /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
282
418
  while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))
283
- nBitsToDecrease ++;
419
+ nBitsToDecrease++;
420
+ assert(rankLast[nBitsToDecrease] != noSymbol);
421
+ /* Increase the number of bits to gain back half the rank cost. */
284
422
  totalCost -= 1 << (nBitsToDecrease-1);
423
+ huffNode[rankLast[nBitsToDecrease]].nbBits++;
424
+
425
+ /* Fix up the new rank.
426
+ * If the new rank was empty, this symbol is now its smallest.
427
+ * Otherwise, this symbol will be the largest in the new rank so no adjustment.
428
+ */
285
429
  if (rankLast[nBitsToDecrease-1] == noSymbol)
286
- rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; /* this rank is no longer empty */
287
- huffNode[rankLast[nBitsToDecrease]].nbBits ++;
430
+ rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease];
431
+ /* Fix up the old rank.
432
+ * If the symbol was at position 0, meaning it was the highest weight symbol in the tree,
433
+ * it must be the only symbol in its rank, so the old rank now has no symbols.
434
+ * Otherwise, since the Huffman nodes are sorted by count, the previous position is now
435
+ * the smallest node in the rank. If the previous position belongs to a different rank,
436
+ * then the rank is now empty.
437
+ */
288
438
  if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */
289
439
  rankLast[nBitsToDecrease] = noSymbol;
290
440
  else {
291
441
  rankLast[nBitsToDecrease]--;
292
- if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease)
442
+ if (huffNode[rankLast[nBitsToDecrease]].nbBits != targetNbBits-nBitsToDecrease)
293
443
  rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */
294
- } } /* while (totalCost > 0) */
295
-
444
+ }
445
+ } /* while (totalCost > 0) */
446
+
447
+ /* If we've removed too much weight, then we have to add it back.
448
+ * To avoid overshooting again, we only adjust the smallest rank.
449
+ * We take the largest nodes from the lowest rank 0 and move them
450
+ * to rank 1. There's guaranteed to be enough rank 0 symbols because
451
+ * TODO.
452
+ */
296
453
  while (totalCost < 0) { /* Sometimes, cost correction overshoot */
297
- if (rankLast[1] == noSymbol) { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */
298
- while (huffNode[n].nbBits == maxNbBits) n--;
454
+ /* special case : no rank 1 symbol (using targetNbBits-1);
455
+ * let's create one from largest rank 0 (using targetNbBits).
456
+ */
457
+ if (rankLast[1] == noSymbol) {
458
+ while (huffNode[n].nbBits == targetNbBits) n--;
299
459
  huffNode[n+1].nbBits--;
300
- rankLast[1] = n+1;
460
+ assert(n >= 0);
461
+ rankLast[1] = (U32)(n+1);
301
462
  totalCost++;
302
463
  continue;
303
464
  }
304
465
  huffNode[ rankLast[1] + 1 ].nbBits--;
305
466
  rankLast[1]++;
306
467
  totalCost ++;
307
- } } } /* there are several too large elements (at least >= 2) */
468
+ }
469
+ } /* repay normalized cost */
470
+ } /* there are several too large elements (at least >= 2) */
308
471
 
309
- return maxNbBits;
472
+ return targetNbBits;
310
473
  }
311
474
 
312
-
313
475
  typedef struct {
314
- U32 base;
315
- U32 current;
476
+ U16 base;
477
+ U16 curr;
316
478
  } rankPos;
317
479
 
318
- static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue)
319
- {
320
- rankPos rank[32];
480
+ typedef nodeElt huffNodeTable[2 * (HUF_SYMBOLVALUE_MAX + 1)];
481
+
482
+ /* Number of buckets available for HUF_sort() */
483
+ #define RANK_POSITION_TABLE_SIZE 192
484
+
485
+ typedef struct {
486
+ huffNodeTable huffNodeTbl;
487
+ rankPos rankPosition[RANK_POSITION_TABLE_SIZE];
488
+ } HUF_buildCTable_wksp_tables;
489
+
490
+ /* RANK_POSITION_DISTINCT_COUNT_CUTOFF == Cutoff point in HUF_sort() buckets for which we use log2 bucketing.
491
+ * Strategy is to use as many buckets as possible for representing distinct
492
+ * counts while using the remainder to represent all "large" counts.
493
+ *
494
+ * To satisfy this requirement for 192 buckets, we can do the following:
495
+ * Let buckets 0-166 represent distinct counts of [0, 166]
496
+ * Let buckets 166 to 192 represent all remaining counts up to RANK_POSITION_MAX_COUNT_LOG using log2 bucketing.
497
+ */
498
+ #define RANK_POSITION_MAX_COUNT_LOG 32
499
+ #define RANK_POSITION_LOG_BUCKETS_BEGIN ((RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */)
500
+ #define RANK_POSITION_DISTINCT_COUNT_CUTOFF (RANK_POSITION_LOG_BUCKETS_BEGIN + ZSTD_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */)
501
+
502
+ /* Return the appropriate bucket index for a given count. See definition of
503
+ * RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy.
504
+ */
505
+ static U32 HUF_getIndex(U32 const count) {
506
+ return (count < RANK_POSITION_DISTINCT_COUNT_CUTOFF)
507
+ ? count
508
+ : ZSTD_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN;
509
+ }
510
+
511
+ /* Helper swap function for HUF_quickSortPartition() */
512
+ static void HUF_swapNodes(nodeElt* a, nodeElt* b) {
513
+ nodeElt tmp = *a;
514
+ *a = *b;
515
+ *b = tmp;
516
+ }
517
+
518
+ /* Returns 0 if the huffNode array is not sorted by descending count */
519
+ MEM_STATIC int HUF_isSorted(nodeElt huffNode[], U32 const maxSymbolValue1) {
520
+ U32 i;
521
+ for (i = 1; i < maxSymbolValue1; ++i) {
522
+ if (huffNode[i].count > huffNode[i-1].count) {
523
+ return 0;
524
+ }
525
+ }
526
+ return 1;
527
+ }
528
+
529
+ /* Insertion sort by descending order */
530
+ HINT_INLINE void HUF_insertionSort(nodeElt huffNode[], int const low, int const high) {
531
+ int i;
532
+ int const size = high-low+1;
533
+ huffNode += low;
534
+ for (i = 1; i < size; ++i) {
535
+ nodeElt const key = huffNode[i];
536
+ int j = i - 1;
537
+ while (j >= 0 && huffNode[j].count < key.count) {
538
+ huffNode[j + 1] = huffNode[j];
539
+ j--;
540
+ }
541
+ huffNode[j + 1] = key;
542
+ }
543
+ }
544
+
545
+ /* Pivot helper function for quicksort. */
546
+ static int HUF_quickSortPartition(nodeElt arr[], int const low, int const high) {
547
+ /* Simply select rightmost element as pivot. "Better" selectors like
548
+ * median-of-three don't experimentally appear to have any benefit.
549
+ */
550
+ U32 const pivot = arr[high].count;
551
+ int i = low - 1;
552
+ int j = low;
553
+ for ( ; j < high; j++) {
554
+ if (arr[j].count > pivot) {
555
+ i++;
556
+ HUF_swapNodes(&arr[i], &arr[j]);
557
+ }
558
+ }
559
+ HUF_swapNodes(&arr[i + 1], &arr[high]);
560
+ return i + 1;
561
+ }
562
+
563
+ /* Classic quicksort by descending with partially iterative calls
564
+ * to reduce worst case callstack size.
565
+ */
566
+ static void HUF_simpleQuickSort(nodeElt arr[], int low, int high) {
567
+ int const kInsertionSortThreshold = 8;
568
+ if (high - low < kInsertionSortThreshold) {
569
+ HUF_insertionSort(arr, low, high);
570
+ return;
571
+ }
572
+ while (low < high) {
573
+ int const idx = HUF_quickSortPartition(arr, low, high);
574
+ if (idx - low < high - idx) {
575
+ HUF_simpleQuickSort(arr, low, idx - 1);
576
+ low = idx + 1;
577
+ } else {
578
+ HUF_simpleQuickSort(arr, idx + 1, high);
579
+ high = idx - 1;
580
+ }
581
+ }
582
+ }
583
+
584
+ /**
585
+ * HUF_sort():
586
+ * Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order.
587
+ * This is a typical bucket sorting strategy that uses either quicksort or insertion sort to sort each bucket.
588
+ *
589
+ * @param[out] huffNode Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled.
590
+ * Must have (maxSymbolValue + 1) entries.
591
+ * @param[in] count Histogram of the symbols.
592
+ * @param[in] maxSymbolValue Maximum symbol value.
593
+ * @param rankPosition This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries.
594
+ */
595
+ static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSymbolValue, rankPos rankPosition[]) {
321
596
  U32 n;
597
+ U32 const maxSymbolValue1 = maxSymbolValue+1;
598
+
599
+ /* Compute base and set curr to base.
600
+ * For symbol s let lowerRank = HUF_getIndex(count[n]) and rank = lowerRank + 1.
601
+ * See HUF_getIndex to see bucketing strategy.
602
+ * We attribute each symbol to lowerRank's base value, because we want to know where
603
+ * each rank begins in the output, so for rank R we want to count ranks R+1 and above.
604
+ */
605
+ ZSTD_memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE);
606
+ for (n = 0; n < maxSymbolValue1; ++n) {
607
+ U32 lowerRank = HUF_getIndex(count[n]);
608
+ assert(lowerRank < RANK_POSITION_TABLE_SIZE - 1);
609
+ rankPosition[lowerRank].base++;
610
+ }
322
611
 
323
- memset(rank, 0, sizeof(rank));
324
- for (n=0; n<=maxSymbolValue; n++) {
325
- U32 r = BIT_highbit32(count[n] + 1);
326
- rank[r].base ++;
612
+ assert(rankPosition[RANK_POSITION_TABLE_SIZE - 1].base == 0);
613
+ /* Set up the rankPosition table */
614
+ for (n = RANK_POSITION_TABLE_SIZE - 1; n > 0; --n) {
615
+ rankPosition[n-1].base += rankPosition[n].base;
616
+ rankPosition[n-1].curr = rankPosition[n-1].base;
327
617
  }
328
- for (n=30; n>0; n--) rank[n-1].base += rank[n].base;
329
- for (n=0; n<32; n++) rank[n].current = rank[n].base;
330
- for (n=0; n<=maxSymbolValue; n++) {
618
+
619
+ /* Insert each symbol into their appropriate bucket, setting up rankPosition table. */
620
+ for (n = 0; n < maxSymbolValue1; ++n) {
331
621
  U32 const c = count[n];
332
- U32 const r = BIT_highbit32(c+1) + 1;
333
- U32 pos = rank[r].current++;
334
- while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) {
335
- huffNode[pos] = huffNode[pos-1];
336
- pos--;
337
- }
622
+ U32 const r = HUF_getIndex(c) + 1;
623
+ U32 const pos = rankPosition[r].curr++;
624
+ assert(pos < maxSymbolValue1);
338
625
  huffNode[pos].count = c;
339
626
  huffNode[pos].byte = (BYTE)n;
340
627
  }
628
+
629
+ /* Sort each bucket. */
630
+ for (n = RANK_POSITION_DISTINCT_COUNT_CUTOFF; n < RANK_POSITION_TABLE_SIZE - 1; ++n) {
631
+ int const bucketSize = rankPosition[n].curr - rankPosition[n].base;
632
+ U32 const bucketStartIdx = rankPosition[n].base;
633
+ if (bucketSize > 1) {
634
+ assert(bucketStartIdx < maxSymbolValue1);
635
+ HUF_simpleQuickSort(huffNode + bucketStartIdx, 0, bucketSize-1);
636
+ }
637
+ }
638
+
639
+ assert(HUF_isSorted(huffNode, maxSymbolValue1));
341
640
  }
342
641
 
343
642
 
344
643
  /** HUF_buildCTable_wksp() :
345
644
  * Same as HUF_buildCTable(), but using externally allocated scratch buffer.
346
- * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of HUF_CTABLE_WORKSPACE_SIZE_U32 unsigned.
645
+ * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables).
347
646
  */
348
647
  #define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
349
- typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
350
- size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
648
+
649
+ /* HUF_buildTree():
650
+ * Takes the huffNode array sorted by HUF_sort() and builds an unlimited-depth Huffman tree.
651
+ *
652
+ * @param huffNode The array sorted by HUF_sort(). Builds the Huffman tree in this array.
653
+ * @param maxSymbolValue The maximum symbol value.
654
+ * @return The smallest node in the Huffman tree (by count).
655
+ */
656
+ static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
351
657
  {
352
- nodeElt* const huffNode0 = (nodeElt*)workSpace;
353
- nodeElt* const huffNode = huffNode0+1;
354
- U32 n, nonNullRank;
658
+ nodeElt* const huffNode0 = huffNode - 1;
659
+ int nonNullRank;
355
660
  int lowS, lowN;
356
- U16 nodeNb = STARTNODE;
357
- U32 nodeRoot;
358
-
359
- /* safety checks */
360
- if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
361
- if (wkspSize < sizeof(huffNodeTable)) return ERROR(workSpace_tooSmall);
362
- if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
363
- if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
364
- memset(huffNode0, 0, sizeof(huffNodeTable));
365
-
366
- /* sort, decreasing order */
367
- HUF_sort(huffNode, count, maxSymbolValue);
368
-
661
+ int nodeNb = STARTNODE;
662
+ int n, nodeRoot;
663
+ DEBUGLOG(5, "HUF_buildTree (alphabet size = %u)", maxSymbolValue + 1);
369
664
  /* init for parents */
370
- nonNullRank = maxSymbolValue;
665
+ nonNullRank = (int)maxSymbolValue;
371
666
  while(huffNode[nonNullRank].count == 0) nonNullRank--;
372
667
  lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb;
373
668
  huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count;
374
- huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb;
669
+ huffNode[lowS].parent = huffNode[lowS-1].parent = (U16)nodeNb;
375
670
  nodeNb++; lowS-=2;
376
671
  for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30);
377
672
  huffNode0[0].count = (U32)(1U<<31); /* fake entry, strong barrier */
378
673
 
379
674
  /* create parents */
380
675
  while (nodeNb <= nodeRoot) {
381
- U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
382
- U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
676
+ int const n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
677
+ int const n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
383
678
  huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count;
384
- huffNode[n1].parent = huffNode[n2].parent = nodeNb;
679
+ huffNode[n1].parent = huffNode[n2].parent = (U16)nodeNb;
385
680
  nodeNb++;
386
681
  }
387
682
 
@@ -392,126 +687,406 @@ size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbo
392
687
  for (n=0; n<=nonNullRank; n++)
393
688
  huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
394
689
 
395
- /* enforce maxTableLog */
396
- maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits);
690
+ DEBUGLOG(6, "Initial distribution of bits completed (%zu sorted symbols)", showHNodeBits(huffNode, maxSymbolValue+1));
397
691
 
398
- /* fill result into tree (val, nbBits) */
399
- { U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
400
- U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
401
- if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */
402
- for (n=0; n<=nonNullRank; n++)
403
- nbPerRank[huffNode[n].nbBits]++;
404
- /* determine stating value per rank */
405
- { U16 min = 0;
406
- for (n=maxNbBits; n>0; n--) {
407
- valPerRank[n] = min; /* get starting value within each rank */
408
- min += nbPerRank[n];
409
- min >>= 1;
410
- } }
411
- for (n=0; n<=maxSymbolValue; n++)
412
- tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */
413
- for (n=0; n<=maxSymbolValue; n++)
414
- tree[n].val = valPerRank[tree[n].nbBits]++; /* assign value within rank, symbol order */
415
- }
416
-
417
- return maxNbBits;
692
+ return nonNullRank;
418
693
  }
419
694
 
420
- /** HUF_buildCTable() :
421
- * @return : maxNbBits
422
- * Note : count is used before tree is written, so they can safely overlap
695
+ /**
696
+ * HUF_buildCTableFromTree():
697
+ * Build the CTable given the Huffman tree in huffNode.
698
+ *
699
+ * @param[out] CTable The output Huffman CTable.
700
+ * @param huffNode The Huffman tree.
701
+ * @param nonNullRank The last and smallest node in the Huffman tree.
702
+ * @param maxSymbolValue The maximum symbol value.
703
+ * @param maxNbBits The exact maximum number of bits used in the Huffman tree.
423
704
  */
424
- size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits)
705
+ static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits)
425
706
  {
426
- huffNodeTable nodeTable;
427
- return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(nodeTable));
707
+ HUF_CElt* const ct = CTable + 1;
708
+ /* fill result into ctable (val, nbBits) */
709
+ int n;
710
+ U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
711
+ U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
712
+ int const alphabetSize = (int)(maxSymbolValue + 1);
713
+ for (n=0; n<=nonNullRank; n++)
714
+ nbPerRank[huffNode[n].nbBits]++;
715
+ /* determine starting value per rank */
716
+ { U16 min = 0;
717
+ for (n=(int)maxNbBits; n>0; n--) {
718
+ valPerRank[n] = min; /* get starting value within each rank */
719
+ min += nbPerRank[n];
720
+ min >>= 1;
721
+ } }
722
+ for (n=0; n<alphabetSize; n++)
723
+ HUF_setNbBits(ct + huffNode[n].byte, huffNode[n].nbBits); /* push nbBits per symbol, symbol order */
724
+ for (n=0; n<alphabetSize; n++)
725
+ HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); /* assign value within rank, symbol order */
726
+ CTable[0] = maxNbBits;
727
+ }
728
+
729
+ size_t
730
+ HUF_buildCTable_wksp(HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits,
731
+ void* workSpace, size_t wkspSize)
732
+ {
733
+ HUF_buildCTable_wksp_tables* const wksp_tables =
734
+ (HUF_buildCTable_wksp_tables*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(U32));
735
+ nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
736
+ nodeElt* const huffNode = huffNode0+1;
737
+ int nonNullRank;
738
+
739
+ HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE == sizeof(HUF_buildCTable_wksp_tables));
740
+
741
+ DEBUGLOG(5, "HUF_buildCTable_wksp (alphabet size = %u)", maxSymbolValue+1);
742
+
743
+ /* safety checks */
744
+ if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
745
+ return ERROR(workSpace_tooSmall);
746
+ if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
747
+ if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
748
+ return ERROR(maxSymbolValue_tooLarge);
749
+ ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable));
750
+
751
+ /* sort, decreasing order */
752
+ HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition);
753
+ DEBUGLOG(6, "sorted symbols completed (%zu symbols)", showHNodeSymbols(huffNode, maxSymbolValue+1));
754
+
755
+ /* build tree */
756
+ nonNullRank = HUF_buildTree(huffNode, maxSymbolValue);
757
+
758
+ /* determine and enforce maxTableLog */
759
+ maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);
760
+ if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */
761
+
762
+ HUF_buildCTableFromTree(CTable, huffNode, nonNullRank, maxSymbolValue, maxNbBits);
763
+
764
+ return maxNbBits;
428
765
  }
429
766
 
430
- static size_t HUF_estimateCompressedSize(HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
767
+ size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
431
768
  {
769
+ HUF_CElt const* ct = CTable + 1;
432
770
  size_t nbBits = 0;
433
771
  int s;
434
772
  for (s = 0; s <= (int)maxSymbolValue; ++s) {
435
- nbBits += CTable[s].nbBits * count[s];
773
+ nbBits += HUF_getNbBits(ct[s]) * count[s];
436
774
  }
437
775
  return nbBits >> 3;
438
776
  }
439
777
 
440
- static int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
778
+ int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
779
+ HUF_CElt const* ct = CTable + 1;
441
780
  int bad = 0;
442
781
  int s;
443
782
  for (s = 0; s <= (int)maxSymbolValue; ++s) {
444
- bad |= (count[s] != 0) & (CTable[s].nbBits == 0);
783
+ bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0);
445
784
  }
446
785
  return !bad;
447
786
  }
448
787
 
449
788
  size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
450
789
 
790
+ /** HUF_CStream_t:
791
+ * Huffman uses its own BIT_CStream_t implementation.
792
+ * There are three major differences from BIT_CStream_t:
793
+ * 1. HUF_addBits() takes a HUF_CElt (size_t) which is
794
+ * the pair (nbBits, value) in the format:
795
+ * format:
796
+ * - Bits [0, 4) = nbBits
797
+ * - Bits [4, 64 - nbBits) = 0
798
+ * - Bits [64 - nbBits, 64) = value
799
+ * 2. The bitContainer is built from the upper bits and
800
+ * right shifted. E.g. to add a new value of N bits
801
+ * you right shift the bitContainer by N, then or in
802
+ * the new value into the N upper bits.
803
+ * 3. The bitstream has two bit containers. You can add
804
+ * bits to the second container and merge them into
805
+ * the first container.
806
+ */
807
+
808
+ #define HUF_BITS_IN_CONTAINER (sizeof(size_t) * 8)
809
+
810
+ typedef struct {
811
+ size_t bitContainer[2];
812
+ size_t bitPos[2];
813
+
814
+ BYTE* startPtr;
815
+ BYTE* ptr;
816
+ BYTE* endPtr;
817
+ } HUF_CStream_t;
818
+
819
+ /**! HUF_initCStream():
820
+ * Initializes the bitstream.
821
+ * @returns 0 or an error code.
822
+ */
823
+ static size_t HUF_initCStream(HUF_CStream_t* bitC,
824
+ void* startPtr, size_t dstCapacity)
825
+ {
826
+ ZSTD_memset(bitC, 0, sizeof(*bitC));
827
+ bitC->startPtr = (BYTE*)startPtr;
828
+ bitC->ptr = bitC->startPtr;
829
+ bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer[0]);
830
+ if (dstCapacity <= sizeof(bitC->bitContainer[0])) return ERROR(dstSize_tooSmall);
831
+ return 0;
832
+ }
833
+
834
+ /*! HUF_addBits():
835
+ * Adds the symbol stored in HUF_CElt elt to the bitstream.
836
+ *
837
+ * @param elt The element we're adding. This is a (nbBits, value) pair.
838
+ * See the HUF_CStream_t docs for the format.
839
+ * @param idx Insert into the bitstream at this idx.
840
+ * @param kFast This is a template parameter. If the bitstream is guaranteed
841
+ * to have at least 4 unused bits after this call it may be 1,
842
+ * otherwise it must be 0. HUF_addBits() is faster when fast is set.
843
+ */
844
+ FORCE_INLINE_TEMPLATE void HUF_addBits(HUF_CStream_t* bitC, HUF_CElt elt, int idx, int kFast)
845
+ {
846
+ assert(idx <= 1);
847
+ assert(HUF_getNbBits(elt) <= HUF_TABLELOG_ABSOLUTEMAX);
848
+ /* This is efficient on x86-64 with BMI2 because shrx
849
+ * only reads the low 6 bits of the register. The compiler
850
+ * knows this and elides the mask. When fast is set,
851
+ * every operation can use the same value loaded from elt.
852
+ */
853
+ bitC->bitContainer[idx] >>= HUF_getNbBits(elt);
854
+ bitC->bitContainer[idx] |= kFast ? HUF_getValueFast(elt) : HUF_getValue(elt);
855
+ /* We only read the low 8 bits of bitC->bitPos[idx] so it
856
+ * doesn't matter that the high bits have noise from the value.
857
+ */
858
+ bitC->bitPos[idx] += HUF_getNbBitsFast(elt);
859
+ assert((bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER);
860
+ /* The last 4-bits of elt are dirty if fast is set,
861
+ * so we must not be overwriting bits that have already been
862
+ * inserted into the bit container.
863
+ */
864
+ #if DEBUGLEVEL >= 1
865
+ {
866
+ size_t const nbBits = HUF_getNbBits(elt);
867
+ size_t const dirtyBits = nbBits == 0 ? 0 : ZSTD_highbit32((U32)nbBits) + 1;
868
+ (void)dirtyBits;
869
+ /* Middle bits are 0. */
870
+ assert(((elt >> dirtyBits) << (dirtyBits + nbBits)) == 0);
871
+ /* We didn't overwrite any bits in the bit container. */
872
+ assert(!kFast || (bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER);
873
+ (void)dirtyBits;
874
+ }
875
+ #endif
876
+ }
877
+
878
+ FORCE_INLINE_TEMPLATE void HUF_zeroIndex1(HUF_CStream_t* bitC)
879
+ {
880
+ bitC->bitContainer[1] = 0;
881
+ bitC->bitPos[1] = 0;
882
+ }
883
+
884
+ /*! HUF_mergeIndex1() :
885
+ * Merges the bit container @ index 1 into the bit container @ index 0
886
+ * and zeros the bit container @ index 1.
887
+ */
888
+ FORCE_INLINE_TEMPLATE void HUF_mergeIndex1(HUF_CStream_t* bitC)
889
+ {
890
+ assert((bitC->bitPos[1] & 0xFF) < HUF_BITS_IN_CONTAINER);
891
+ bitC->bitContainer[0] >>= (bitC->bitPos[1] & 0xFF);
892
+ bitC->bitContainer[0] |= bitC->bitContainer[1];
893
+ bitC->bitPos[0] += bitC->bitPos[1];
894
+ assert((bitC->bitPos[0] & 0xFF) <= HUF_BITS_IN_CONTAINER);
895
+ }
896
+
897
+ /*! HUF_flushBits() :
898
+ * Flushes the bits in the bit container @ index 0.
899
+ *
900
+ * @post bitPos will be < 8.
901
+ * @param kFast If kFast is set then we must know a-priori that
902
+ * the bit container will not overflow.
903
+ */
904
+ FORCE_INLINE_TEMPLATE void HUF_flushBits(HUF_CStream_t* bitC, int kFast)
905
+ {
906
+ /* The upper bits of bitPos are noisy, so we must mask by 0xFF. */
907
+ size_t const nbBits = bitC->bitPos[0] & 0xFF;
908
+ size_t const nbBytes = nbBits >> 3;
909
+ /* The top nbBits bits of bitContainer are the ones we need. */
910
+ size_t const bitContainer = bitC->bitContainer[0] >> (HUF_BITS_IN_CONTAINER - nbBits);
911
+ /* Mask bitPos to account for the bytes we consumed. */
912
+ bitC->bitPos[0] &= 7;
913
+ assert(nbBits > 0);
914
+ assert(nbBits <= sizeof(bitC->bitContainer[0]) * 8);
915
+ assert(bitC->ptr <= bitC->endPtr);
916
+ MEM_writeLEST(bitC->ptr, bitContainer);
917
+ bitC->ptr += nbBytes;
918
+ assert(!kFast || bitC->ptr <= bitC->endPtr);
919
+ if (!kFast && bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
920
+ /* bitContainer doesn't need to be modified because the leftover
921
+ * bits are already the top bitPos bits. And we don't care about
922
+ * noise in the lower values.
923
+ */
924
+ }
925
+
926
+ /*! HUF_endMark()
927
+ * @returns The Huffman stream end mark: A 1-bit value = 1.
928
+ */
929
+ static HUF_CElt HUF_endMark(void)
930
+ {
931
+ HUF_CElt endMark;
932
+ HUF_setNbBits(&endMark, 1);
933
+ HUF_setValue(&endMark, 1);
934
+ return endMark;
935
+ }
936
+
937
+ /*! HUF_closeCStream() :
938
+ * @return Size of CStream, in bytes,
939
+ * or 0 if it could not fit into dstBuffer */
940
+ static size_t HUF_closeCStream(HUF_CStream_t* bitC)
941
+ {
942
+ HUF_addBits(bitC, HUF_endMark(), /* idx */ 0, /* kFast */ 0);
943
+ HUF_flushBits(bitC, /* kFast */ 0);
944
+ {
945
+ size_t const nbBits = bitC->bitPos[0] & 0xFF;
946
+ if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
947
+ return (size_t)(bitC->ptr - bitC->startPtr) + (nbBits > 0);
948
+ }
949
+ }
950
+
451
951
  FORCE_INLINE_TEMPLATE void
452
- HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
952
+ HUF_encodeSymbol(HUF_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable, int idx, int fast)
453
953
  {
454
- BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
954
+ HUF_addBits(bitCPtr, CTable[symbol], idx, fast);
455
955
  }
456
956
 
457
- #define HUF_FLUSHBITS(s) BIT_flushBits(s)
957
+ FORCE_INLINE_TEMPLATE void
958
+ HUF_compress1X_usingCTable_internal_body_loop(HUF_CStream_t* bitC,
959
+ const BYTE* ip, size_t srcSize,
960
+ const HUF_CElt* ct,
961
+ int kUnroll, int kFastFlush, int kLastFast)
962
+ {
963
+ /* Join to kUnroll */
964
+ int n = (int)srcSize;
965
+ int rem = n % kUnroll;
966
+ if (rem > 0) {
967
+ for (; rem > 0; --rem) {
968
+ HUF_encodeSymbol(bitC, ip[--n], ct, 0, /* fast */ 0);
969
+ }
970
+ HUF_flushBits(bitC, kFastFlush);
971
+ }
972
+ assert(n % kUnroll == 0);
973
+
974
+ /* Join to 2 * kUnroll */
975
+ if (n % (2 * kUnroll)) {
976
+ int u;
977
+ for (u = 1; u < kUnroll; ++u) {
978
+ HUF_encodeSymbol(bitC, ip[n - u], ct, 0, 1);
979
+ }
980
+ HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, 0, kLastFast);
981
+ HUF_flushBits(bitC, kFastFlush);
982
+ n -= kUnroll;
983
+ }
984
+ assert(n % (2 * kUnroll) == 0);
458
985
 
459
- #define HUF_FLUSHBITS_1(stream) \
460
- if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream)
986
+ for (; n>0; n-= 2 * kUnroll) {
987
+ /* Encode kUnroll symbols into the bitstream @ index 0. */
988
+ int u;
989
+ for (u = 1; u < kUnroll; ++u) {
990
+ HUF_encodeSymbol(bitC, ip[n - u], ct, /* idx */ 0, /* fast */ 1);
991
+ }
992
+ HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, /* idx */ 0, /* fast */ kLastFast);
993
+ HUF_flushBits(bitC, kFastFlush);
994
+ /* Encode kUnroll symbols into the bitstream @ index 1.
995
+ * This allows us to start filling the bit container
996
+ * without any data dependencies.
997
+ */
998
+ HUF_zeroIndex1(bitC);
999
+ for (u = 1; u < kUnroll; ++u) {
1000
+ HUF_encodeSymbol(bitC, ip[n - kUnroll - u], ct, /* idx */ 1, /* fast */ 1);
1001
+ }
1002
+ HUF_encodeSymbol(bitC, ip[n - kUnroll - kUnroll], ct, /* idx */ 1, /* fast */ kLastFast);
1003
+ /* Merge bitstream @ index 1 into the bitstream @ index 0 */
1004
+ HUF_mergeIndex1(bitC);
1005
+ HUF_flushBits(bitC, kFastFlush);
1006
+ }
1007
+ assert(n == 0);
1008
+
1009
+ }
1010
+
1011
+ /**
1012
+ * Returns a tight upper bound on the output space needed by Huffman
1013
+ * with 8 bytes buffer to handle over-writes. If the output is at least
1014
+ * this large we don't need to do bounds checks during Huffman encoding.
1015
+ */
1016
+ static size_t HUF_tightCompressBound(size_t srcSize, size_t tableLog)
1017
+ {
1018
+ return ((srcSize * tableLog) >> 3) + 8;
1019
+ }
461
1020
 
462
- #define HUF_FLUSHBITS_2(stream) \
463
- if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream)
464
1021
 
465
1022
  FORCE_INLINE_TEMPLATE size_t
466
1023
  HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
467
1024
  const void* src, size_t srcSize,
468
1025
  const HUF_CElt* CTable)
469
1026
  {
1027
+ U32 const tableLog = (U32)CTable[0];
1028
+ HUF_CElt const* ct = CTable + 1;
470
1029
  const BYTE* ip = (const BYTE*) src;
471
1030
  BYTE* const ostart = (BYTE*)dst;
472
1031
  BYTE* const oend = ostart + dstSize;
473
1032
  BYTE* op = ostart;
474
- size_t n;
475
- BIT_CStream_t bitC;
1033
+ HUF_CStream_t bitC;
476
1034
 
477
1035
  /* init */
478
1036
  if (dstSize < 8) return 0; /* not enough space to compress */
479
- { size_t const initErr = BIT_initCStream(&bitC, op, oend-op);
1037
+ { size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op));
480
1038
  if (HUF_isError(initErr)) return 0; }
481
1039
 
482
- n = srcSize & ~3; /* join to mod 4 */
483
- switch (srcSize & 3)
484
- {
485
- case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable);
486
- HUF_FLUSHBITS_2(&bitC);
487
- /* fall-through */
488
- case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable);
489
- HUF_FLUSHBITS_1(&bitC);
490
- /* fall-through */
491
- case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable);
492
- HUF_FLUSHBITS(&bitC);
493
- /* fall-through */
494
- case 0 : /* fall-through */
495
- default: break;
496
- }
497
-
498
- for (; n>0; n-=4) { /* note : n&3==0 at this stage */
499
- HUF_encodeSymbol(&bitC, ip[n- 1], CTable);
500
- HUF_FLUSHBITS_1(&bitC);
501
- HUF_encodeSymbol(&bitC, ip[n- 2], CTable);
502
- HUF_FLUSHBITS_2(&bitC);
503
- HUF_encodeSymbol(&bitC, ip[n- 3], CTable);
504
- HUF_FLUSHBITS_1(&bitC);
505
- HUF_encodeSymbol(&bitC, ip[n- 4], CTable);
506
- HUF_FLUSHBITS(&bitC);
1040
+ if (dstSize < HUF_tightCompressBound(srcSize, (size_t)tableLog) || tableLog > 11)
1041
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ MEM_32bits() ? 2 : 4, /* kFast */ 0, /* kLastFast */ 0);
1042
+ else {
1043
+ if (MEM_32bits()) {
1044
+ switch (tableLog) {
1045
+ case 11:
1046
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 0);
1047
+ break;
1048
+ case 10: ZSTD_FALLTHROUGH;
1049
+ case 9: ZSTD_FALLTHROUGH;
1050
+ case 8:
1051
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 1);
1052
+ break;
1053
+ case 7: ZSTD_FALLTHROUGH;
1054
+ default:
1055
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 3, /* kFastFlush */ 1, /* kLastFast */ 1);
1056
+ break;
1057
+ }
1058
+ } else {
1059
+ switch (tableLog) {
1060
+ case 11:
1061
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 0);
1062
+ break;
1063
+ case 10:
1064
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 1);
1065
+ break;
1066
+ case 9:
1067
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 6, /* kFastFlush */ 1, /* kLastFast */ 0);
1068
+ break;
1069
+ case 8:
1070
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 7, /* kFastFlush */ 1, /* kLastFast */ 0);
1071
+ break;
1072
+ case 7:
1073
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 8, /* kFastFlush */ 1, /* kLastFast */ 0);
1074
+ break;
1075
+ case 6: ZSTD_FALLTHROUGH;
1076
+ default:
1077
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 9, /* kFastFlush */ 1, /* kLastFast */ 1);
1078
+ break;
1079
+ }
1080
+ }
507
1081
  }
1082
+ assert(bitC.ptr <= bitC.endPtr);
508
1083
 
509
- return BIT_closeCStream(&bitC);
1084
+ return HUF_closeCStream(&bitC);
510
1085
  }
511
1086
 
512
1087
  #if DYNAMIC_BMI2
513
1088
 
514
- static TARGET_ATTRIBUTE("bmi2") size_t
1089
+ static BMI2_TARGET_ATTRIBUTE size_t
515
1090
  HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,
516
1091
  const void* src, size_t srcSize,
517
1092
  const HUF_CElt* CTable)
@@ -530,9 +1105,9 @@ HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize,
530
1105
  static size_t
531
1106
  HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
532
1107
  const void* src, size_t srcSize,
533
- const HUF_CElt* CTable, const int bmi2)
1108
+ const HUF_CElt* CTable, const int flags)
534
1109
  {
535
- if (bmi2) {
1110
+ if (flags & HUF_flags_bmi2) {
536
1111
  return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable);
537
1112
  }
538
1113
  return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable);
@@ -543,24 +1118,23 @@ HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
543
1118
  static size_t
544
1119
  HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
545
1120
  const void* src, size_t srcSize,
546
- const HUF_CElt* CTable, const int bmi2)
1121
+ const HUF_CElt* CTable, const int flags)
547
1122
  {
548
- (void)bmi2;
1123
+ (void)flags;
549
1124
  return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
550
1125
  }
551
1126
 
552
1127
  #endif
553
1128
 
554
- size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
1129
+ size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags)
555
1130
  {
556
- return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
1131
+ return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags);
557
1132
  }
558
1133
 
559
-
560
1134
  static size_t
561
1135
  HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
562
1136
  const void* src, size_t srcSize,
563
- const HUF_CElt* CTable, int bmi2)
1137
+ const HUF_CElt* CTable, int flags)
564
1138
  {
565
1139
  size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */
566
1140
  const BYTE* ip = (const BYTE*) src;
@@ -573,41 +1147,43 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
573
1147
  if (srcSize < 12) return 0; /* no saving possible : too small input */
574
1148
  op += 6; /* jumpTable */
575
1149
 
576
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
577
- if (cSize==0) return 0;
578
- assert(cSize <= 65535);
1150
+ assert(op <= oend);
1151
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
1152
+ if (cSize == 0 || cSize > 65535) return 0;
579
1153
  MEM_writeLE16(ostart, (U16)cSize);
580
1154
  op += cSize;
581
1155
  }
582
1156
 
583
1157
  ip += segmentSize;
584
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
585
- if (cSize==0) return 0;
586
- assert(cSize <= 65535);
1158
+ assert(op <= oend);
1159
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
1160
+ if (cSize == 0 || cSize > 65535) return 0;
587
1161
  MEM_writeLE16(ostart+2, (U16)cSize);
588
1162
  op += cSize;
589
1163
  }
590
1164
 
591
1165
  ip += segmentSize;
592
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
593
- if (cSize==0) return 0;
594
- assert(cSize <= 65535);
1166
+ assert(op <= oend);
1167
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
1168
+ if (cSize == 0 || cSize > 65535) return 0;
595
1169
  MEM_writeLE16(ostart+4, (U16)cSize);
596
1170
  op += cSize;
597
1171
  }
598
1172
 
599
1173
  ip += segmentSize;
600
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, iend-ip, CTable, bmi2) );
601
- if (cSize==0) return 0;
1174
+ assert(op <= oend);
1175
+ assert(ip <= iend);
1176
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, flags) );
1177
+ if (cSize == 0 || cSize > 65535) return 0;
602
1178
  op += cSize;
603
1179
  }
604
1180
 
605
- return op-ostart;
1181
+ return (size_t)(op-ostart);
606
1182
  }
607
1183
 
608
- size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
1184
+ size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags)
609
1185
  {
610
- return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
1186
+ return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags);
611
1187
  }
612
1188
 
613
1189
  typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
@@ -615,44 +1191,127 @@ typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
615
1191
  static size_t HUF_compressCTable_internal(
616
1192
  BYTE* const ostart, BYTE* op, BYTE* const oend,
617
1193
  const void* src, size_t srcSize,
618
- HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2)
1194
+ HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int flags)
619
1195
  {
620
1196
  size_t const cSize = (nbStreams==HUF_singleStream) ?
621
- HUF_compress1X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2) :
622
- HUF_compress4X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2);
1197
+ HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags) :
1198
+ HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags);
623
1199
  if (HUF_isError(cSize)) { return cSize; }
624
1200
  if (cSize==0) { return 0; } /* uncompressible */
625
1201
  op += cSize;
626
1202
  /* check compressibility */
1203
+ assert(op >= ostart);
627
1204
  if ((size_t)(op-ostart) >= srcSize-1) { return 0; }
628
- return op-ostart;
1205
+ return (size_t)(op-ostart);
629
1206
  }
630
1207
 
631
1208
  typedef struct {
632
1209
  unsigned count[HUF_SYMBOLVALUE_MAX + 1];
633
- HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1];
634
- huffNodeTable nodeTable;
1210
+ HUF_CElt CTable[HUF_CTABLE_SIZE_ST(HUF_SYMBOLVALUE_MAX)];
1211
+ union {
1212
+ HUF_buildCTable_wksp_tables buildCTable_wksp;
1213
+ HUF_WriteCTableWksp writeCTable_wksp;
1214
+ U32 hist_wksp[HIST_WKSP_SIZE_U32];
1215
+ } wksps;
635
1216
  } HUF_compress_tables_t;
636
1217
 
1218
+ #define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE 4096
1219
+ #define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO 10 /* Must be >= 2 */
1220
+
1221
+ unsigned HUF_cardinality(const unsigned* count, unsigned maxSymbolValue)
1222
+ {
1223
+ unsigned cardinality = 0;
1224
+ unsigned i;
1225
+
1226
+ for (i = 0; i < maxSymbolValue + 1; i++) {
1227
+ if (count[i] != 0) cardinality += 1;
1228
+ }
1229
+
1230
+ return cardinality;
1231
+ }
1232
+
1233
+ unsigned HUF_minTableLog(unsigned symbolCardinality)
1234
+ {
1235
+ U32 minBitsSymbols = ZSTD_highbit32(symbolCardinality) + 1;
1236
+ return minBitsSymbols;
1237
+ }
1238
+
1239
+ unsigned HUF_optimalTableLog(
1240
+ unsigned maxTableLog,
1241
+ size_t srcSize,
1242
+ unsigned maxSymbolValue,
1243
+ void* workSpace, size_t wkspSize,
1244
+ HUF_CElt* table,
1245
+ const unsigned* count,
1246
+ int flags)
1247
+ {
1248
+ assert(srcSize > 1); /* Not supported, RLE should be used instead */
1249
+ assert(wkspSize >= sizeof(HUF_buildCTable_wksp_tables));
1250
+
1251
+ if (!(flags & HUF_flags_optimalDepth)) {
1252
+ /* cheap evaluation, based on FSE */
1253
+ return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
1254
+ }
1255
+
1256
+ { BYTE* dst = (BYTE*)workSpace + sizeof(HUF_WriteCTableWksp);
1257
+ size_t dstSize = wkspSize - sizeof(HUF_WriteCTableWksp);
1258
+ size_t maxBits, hSize, newSize;
1259
+ const unsigned symbolCardinality = HUF_cardinality(count, maxSymbolValue);
1260
+ const unsigned minTableLog = HUF_minTableLog(symbolCardinality);
1261
+ size_t optSize = ((size_t) ~0) - 1;
1262
+ unsigned optLog = maxTableLog, optLogGuess;
1263
+
1264
+ DEBUGLOG(6, "HUF_optimalTableLog: probing huf depth (srcSize=%zu)", srcSize);
1265
+
1266
+ /* Search until size increases */
1267
+ for (optLogGuess = minTableLog; optLogGuess <= maxTableLog; optLogGuess++) {
1268
+ DEBUGLOG(7, "checking for huffLog=%u", optLogGuess);
1269
+ maxBits = HUF_buildCTable_wksp(table, count, maxSymbolValue, optLogGuess, workSpace, wkspSize);
1270
+ if (ERR_isError(maxBits)) continue;
1271
+
1272
+ if (maxBits < optLogGuess && optLogGuess > minTableLog) break;
1273
+
1274
+ hSize = HUF_writeCTable_wksp(dst, dstSize, table, maxSymbolValue, (U32)maxBits, workSpace, wkspSize);
1275
+
1276
+ if (ERR_isError(hSize)) continue;
1277
+
1278
+ newSize = HUF_estimateCompressedSize(table, count, maxSymbolValue) + hSize;
1279
+
1280
+ if (newSize > optSize + 1) {
1281
+ break;
1282
+ }
1283
+
1284
+ if (newSize < optSize) {
1285
+ optSize = newSize;
1286
+ optLog = optLogGuess;
1287
+ }
1288
+ }
1289
+ assert(optLog <= HUF_TABLELOG_MAX);
1290
+ return optLog;
1291
+ }
1292
+ }
1293
+
637
1294
  /* HUF_compress_internal() :
638
- * `workSpace` must a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
1295
+ * `workSpace_align4` must be aligned on 4-bytes boundaries,
1296
+ * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */
639
1297
  static size_t
640
1298
  HUF_compress_internal (void* dst, size_t dstSize,
641
1299
  const void* src, size_t srcSize,
642
1300
  unsigned maxSymbolValue, unsigned huffLog,
643
1301
  HUF_nbStreams_e nbStreams,
644
1302
  void* workSpace, size_t wkspSize,
645
- HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
646
- const int bmi2)
1303
+ HUF_CElt* oldHufTable, HUF_repeat* repeat, int flags)
647
1304
  {
648
- HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace;
1305
+ HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(size_t));
649
1306
  BYTE* const ostart = (BYTE*)dst;
650
1307
  BYTE* const oend = ostart + dstSize;
651
1308
  BYTE* op = ostart;
652
1309
 
1310
+ DEBUGLOG(5, "HUF_compress_internal (srcSize=%zu)", srcSize);
1311
+ HUF_STATIC_ASSERT(sizeof(*table) + HUF_WORKSPACE_MAX_ALIGNMENT <= HUF_WORKSPACE_SIZE);
1312
+
653
1313
  /* checks & inits */
654
- if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
655
- if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall);
1314
+ if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall);
656
1315
  if (!srcSize) return 0; /* Uncompressed */
657
1316
  if (!dstSize) return 0; /* cannot fit anything within dst budget */
658
1317
  if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */
@@ -662,17 +1321,34 @@ HUF_compress_internal (void* dst, size_t dstSize,
662
1321
  if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
663
1322
 
664
1323
  /* Heuristic : If old table is valid, use it for small inputs */
665
- if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
1324
+ if ((flags & HUF_flags_preferRepeat) && repeat && *repeat == HUF_repeat_valid) {
666
1325
  return HUF_compressCTable_internal(ostart, op, oend,
667
1326
  src, srcSize,
668
- nbStreams, oldHufTable, bmi2);
1327
+ nbStreams, oldHufTable, flags);
1328
+ }
1329
+
1330
+ /* If uncompressible data is suspected, do a smaller sampling first */
1331
+ DEBUG_STATIC_ASSERT(SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO >= 2);
1332
+ if ((flags & HUF_flags_suspectUncompressible) && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) {
1333
+ size_t largestTotal = 0;
1334
+ DEBUGLOG(5, "input suspected incompressible : sampling to check");
1335
+ { unsigned maxSymbolValueBegin = maxSymbolValue;
1336
+ CHECK_V_F(largestBegin, HIST_count_simple (table->count, &maxSymbolValueBegin, (const BYTE*)src, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );
1337
+ largestTotal += largestBegin;
1338
+ }
1339
+ { unsigned maxSymbolValueEnd = maxSymbolValue;
1340
+ CHECK_V_F(largestEnd, HIST_count_simple (table->count, &maxSymbolValueEnd, (const BYTE*)src + srcSize - SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );
1341
+ largestTotal += largestEnd;
1342
+ }
1343
+ if (largestTotal <= ((2 * SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) >> 7)+4) return 0; /* heuristic : probably not compressible enough */
669
1344
  }
670
1345
 
671
1346
  /* Scan input and build symbol stats */
672
- { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace, wkspSize) );
1347
+ { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->wksps.hist_wksp, sizeof(table->wksps.hist_wksp)) );
673
1348
  if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
674
1349
  if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */
675
1350
  }
1351
+ DEBUGLOG(6, "histogram detail completed (%zu symbols)", showU32(table->count, maxSymbolValue+1));
676
1352
 
677
1353
  /* Check validity of previous table */
678
1354
  if ( repeat
@@ -681,26 +1357,31 @@ HUF_compress_internal (void* dst, size_t dstSize,
681
1357
  *repeat = HUF_repeat_none;
682
1358
  }
683
1359
  /* Heuristic : use existing table for small inputs */
684
- if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
1360
+ if ((flags & HUF_flags_preferRepeat) && repeat && *repeat != HUF_repeat_none) {
685
1361
  return HUF_compressCTable_internal(ostart, op, oend,
686
1362
  src, srcSize,
687
- nbStreams, oldHufTable, bmi2);
1363
+ nbStreams, oldHufTable, flags);
688
1364
  }
689
1365
 
690
1366
  /* Build Huffman Tree */
691
- huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
1367
+ huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue, &table->wksps, sizeof(table->wksps), table->CTable, table->count, flags);
692
1368
  { size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
693
1369
  maxSymbolValue, huffLog,
694
- table->nodeTable, sizeof(table->nodeTable));
1370
+ &table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp));
695
1371
  CHECK_F(maxBits);
696
1372
  huffLog = (U32)maxBits;
697
- /* Zero unused symbols in CTable, so we can check it for validity */
698
- memset(table->CTable + (maxSymbolValue + 1), 0,
699
- sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt)));
1373
+ DEBUGLOG(6, "bit distribution completed (%zu symbols)", showCTableBits(table->CTable + 1, maxSymbolValue+1));
1374
+ }
1375
+ /* Zero unused symbols in CTable, so we can check it for validity */
1376
+ {
1377
+ size_t const ctableSize = HUF_CTABLE_SIZE_ST(maxSymbolValue);
1378
+ size_t const unusedSize = sizeof(table->CTable) - ctableSize * sizeof(HUF_CElt);
1379
+ ZSTD_memset(table->CTable + ctableSize, 0, unusedSize);
700
1380
  }
701
1381
 
702
1382
  /* Write table description header */
703
- { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) );
1383
+ { CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog,
1384
+ &table->wksps.writeCTable_wksp, sizeof(table->wksps.writeCTable_wksp)) );
704
1385
  /* Check if using previous huffman table is beneficial */
705
1386
  if (repeat && *repeat != HUF_repeat_none) {
706
1387
  size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue);
@@ -708,7 +1389,7 @@ HUF_compress_internal (void* dst, size_t dstSize,
708
1389
  if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
709
1390
  return HUF_compressCTable_internal(ostart, op, oend,
710
1391
  src, srcSize,
711
- nbStreams, oldHufTable, bmi2);
1392
+ nbStreams, oldHufTable, flags);
712
1393
  } }
713
1394
 
714
1395
  /* Use the new huffman table */
@@ -716,83 +1397,39 @@ HUF_compress_internal (void* dst, size_t dstSize,
716
1397
  op += hSize;
717
1398
  if (repeat) { *repeat = HUF_repeat_none; }
718
1399
  if (oldHufTable)
719
- memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */
1400
+ ZSTD_memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */
720
1401
  }
721
1402
  return HUF_compressCTable_internal(ostart, op, oend,
722
1403
  src, srcSize,
723
- nbStreams, table->CTable, bmi2);
724
- }
725
-
726
-
727
- size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
728
- const void* src, size_t srcSize,
729
- unsigned maxSymbolValue, unsigned huffLog,
730
- void* workSpace, size_t wkspSize)
731
- {
732
- return HUF_compress_internal(dst, dstSize, src, srcSize,
733
- maxSymbolValue, huffLog, HUF_singleStream,
734
- workSpace, wkspSize,
735
- NULL, NULL, 0, 0 /*bmi2*/);
1404
+ nbStreams, table->CTable, flags);
736
1405
  }
737
1406
 
738
1407
  size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
739
1408
  const void* src, size_t srcSize,
740
1409
  unsigned maxSymbolValue, unsigned huffLog,
741
1410
  void* workSpace, size_t wkspSize,
742
- HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
1411
+ HUF_CElt* hufTable, HUF_repeat* repeat, int flags)
743
1412
  {
1413
+ DEBUGLOG(5, "HUF_compress1X_repeat (srcSize = %zu)", srcSize);
744
1414
  return HUF_compress_internal(dst, dstSize, src, srcSize,
745
1415
  maxSymbolValue, huffLog, HUF_singleStream,
746
1416
  workSpace, wkspSize, hufTable,
747
- repeat, preferRepeat, bmi2);
748
- }
749
-
750
- size_t HUF_compress1X (void* dst, size_t dstSize,
751
- const void* src, size_t srcSize,
752
- unsigned maxSymbolValue, unsigned huffLog)
753
- {
754
- unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
755
- return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
756
- }
757
-
758
- /* HUF_compress4X_repeat():
759
- * compress input using 4 streams.
760
- * provide workspace to generate compression tables */
761
- size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
762
- const void* src, size_t srcSize,
763
- unsigned maxSymbolValue, unsigned huffLog,
764
- void* workSpace, size_t wkspSize)
765
- {
766
- return HUF_compress_internal(dst, dstSize, src, srcSize,
767
- maxSymbolValue, huffLog, HUF_fourStreams,
768
- workSpace, wkspSize,
769
- NULL, NULL, 0, 0 /*bmi2*/);
1417
+ repeat, flags);
770
1418
  }
771
1419
 
772
1420
  /* HUF_compress4X_repeat():
773
1421
  * compress input using 4 streams.
1422
+ * consider skipping quickly
774
1423
  * re-use an existing huffman compression table */
775
1424
  size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
776
1425
  const void* src, size_t srcSize,
777
1426
  unsigned maxSymbolValue, unsigned huffLog,
778
1427
  void* workSpace, size_t wkspSize,
779
- HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
1428
+ HUF_CElt* hufTable, HUF_repeat* repeat, int flags)
780
1429
  {
1430
+ DEBUGLOG(5, "HUF_compress4X_repeat (srcSize = %zu)", srcSize);
781
1431
  return HUF_compress_internal(dst, dstSize, src, srcSize,
782
1432
  maxSymbolValue, huffLog, HUF_fourStreams,
783
1433
  workSpace, wkspSize,
784
- hufTable, repeat, preferRepeat, bmi2);
785
- }
786
-
787
- size_t HUF_compress2 (void* dst, size_t dstSize,
788
- const void* src, size_t srcSize,
789
- unsigned maxSymbolValue, unsigned huffLog)
790
- {
791
- unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
792
- return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
793
- }
794
-
795
- size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
796
- {
797
- return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT);
1434
+ hufTable, repeat, flags);
798
1435
  }