zstd-ruby 1.4.4.0 → 1.5.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +8 -0
  3. data/.github/workflows/ruby.yml +35 -0
  4. data/README.md +2 -2
  5. data/ext/zstdruby/extconf.rb +1 -0
  6. data/ext/zstdruby/libzstd/BUCK +5 -7
  7. data/ext/zstdruby/libzstd/Makefile +241 -173
  8. data/ext/zstdruby/libzstd/README.md +76 -18
  9. data/ext/zstdruby/libzstd/common/bitstream.h +75 -57
  10. data/ext/zstdruby/libzstd/common/compiler.h +196 -20
  11. data/ext/zstdruby/libzstd/common/cpu.h +1 -3
  12. data/ext/zstdruby/libzstd/common/debug.c +11 -31
  13. data/ext/zstdruby/libzstd/common/debug.h +22 -49
  14. data/ext/zstdruby/libzstd/common/entropy_common.c +208 -76
  15. data/ext/zstdruby/libzstd/common/error_private.c +3 -1
  16. data/ext/zstdruby/libzstd/common/error_private.h +87 -4
  17. data/ext/zstdruby/libzstd/common/fse.h +51 -42
  18. data/ext/zstdruby/libzstd/common/fse_decompress.c +149 -57
  19. data/ext/zstdruby/libzstd/common/huf.h +60 -54
  20. data/ext/zstdruby/libzstd/common/mem.h +87 -98
  21. data/ext/zstdruby/libzstd/common/pool.c +23 -17
  22. data/ext/zstdruby/libzstd/common/pool.h +3 -3
  23. data/ext/zstdruby/libzstd/common/portability_macros.h +131 -0
  24. data/ext/zstdruby/libzstd/common/threading.c +10 -8
  25. data/ext/zstdruby/libzstd/common/threading.h +4 -3
  26. data/ext/zstdruby/libzstd/common/xxhash.c +15 -873
  27. data/ext/zstdruby/libzstd/common/xxhash.h +5572 -191
  28. data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
  29. data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
  30. data/ext/zstdruby/libzstd/common/zstd_internal.h +252 -108
  31. data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
  32. data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
  33. data/ext/zstdruby/libzstd/compress/fse_compress.c +105 -85
  34. data/ext/zstdruby/libzstd/compress/hist.c +41 -63
  35. data/ext/zstdruby/libzstd/compress/hist.h +13 -33
  36. data/ext/zstdruby/libzstd/compress/huf_compress.c +831 -259
  37. data/ext/zstdruby/libzstd/compress/zstd_compress.c +3213 -1007
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +493 -71
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +21 -16
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +4 -2
  41. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +51 -24
  42. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +10 -3
  43. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +573 -0
  44. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
  45. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +208 -81
  46. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +315 -137
  47. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
  48. data/ext/zstdruby/libzstd/compress/zstd_fast.c +319 -128
  49. data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
  50. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1156 -171
  51. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +59 -1
  52. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +331 -206
  53. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +15 -3
  54. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
  55. data/ext/zstdruby/libzstd/compress/zstd_opt.c +403 -226
  56. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  57. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +188 -453
  58. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +32 -114
  59. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1065 -410
  60. data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +571 -0
  61. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +20 -16
  62. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
  63. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +691 -230
  64. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1072 -323
  65. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +16 -7
  66. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +71 -10
  67. data/ext/zstdruby/libzstd/deprecated/zbuff.h +3 -3
  68. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
  69. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +24 -4
  70. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
  71. data/ext/zstdruby/libzstd/dictBuilder/cover.c +57 -40
  72. data/ext/zstdruby/libzstd/dictBuilder/cover.h +20 -9
  73. data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
  74. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +54 -35
  75. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +151 -57
  76. data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
  77. data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
  78. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +4 -4
  79. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +25 -19
  80. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
  81. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +18 -14
  82. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
  83. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +18 -14
  84. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
  85. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +22 -16
  86. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
  87. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +29 -25
  88. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +2 -2
  89. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +29 -25
  90. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
  91. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +34 -26
  92. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
  93. data/ext/zstdruby/libzstd/libzstd.mk +185 -0
  94. data/ext/zstdruby/libzstd/libzstd.pc.in +4 -3
  95. data/ext/zstdruby/libzstd/modulemap/module.modulemap +4 -0
  96. data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +201 -31
  97. data/ext/zstdruby/libzstd/zstd.h +760 -234
  98. data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +3 -1
  99. data/ext/zstdruby/zstdruby.c +2 -2
  100. data/lib/zstd-ruby/version.rb +1 -1
  101. metadata +20 -9
  102. data/.travis.yml +0 -14
@@ -1,35 +1,15 @@
1
1
  /* ******************************************************************
2
- Huffman encoder, part of New Generation Entropy library
3
- Copyright (C) 2013-2016, Yann Collet.
4
-
5
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
-
7
- Redistribution and use in source and binary forms, with or without
8
- modification, are permitted provided that the following conditions are
9
- met:
10
-
11
- * Redistributions of source code must retain the above copyright
12
- notice, this list of conditions and the following disclaimer.
13
- * Redistributions in binary form must reproduce the above
14
- copyright notice, this list of conditions and the following disclaimer
15
- in the documentation and/or other materials provided with the
16
- distribution.
17
-
18
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
-
30
- You can contact the author at :
31
- - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
32
- - Public forum : https://groups.google.com/forum/#!forum/lz4c
2
+ * Huffman encoder, part of New Generation Entropy library
3
+ * Copyright (c) Yann Collet, Facebook, Inc.
4
+ *
5
+ * You can contact the author at :
6
+ * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
7
+ * - Public forum : https://groups.google.com/forum/#!forum/lz4c
8
+ *
9
+ * This source code is licensed under both the BSD-style license (found in the
10
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
11
+ * in the COPYING file in the root directory of this source tree).
12
+ * You may select, at your option, one of the above-listed licenses.
33
13
  ****************************************************************** */
34
14
 
35
15
  /* **************************************************************
@@ -43,16 +23,15 @@
43
23
  /* **************************************************************
44
24
  * Includes
45
25
  ****************************************************************/
46
- #include <string.h> /* memcpy, memset */
47
- #include <stdio.h> /* printf (debug) */
48
- #include "compiler.h"
49
- #include "bitstream.h"
26
+ #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */
27
+ #include "../common/compiler.h"
28
+ #include "../common/bitstream.h"
50
29
  #include "hist.h"
51
30
  #define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */
52
- #include "fse.h" /* header compression */
31
+ #include "../common/fse.h" /* header compression */
53
32
  #define HUF_STATIC_LINKING_ONLY
54
- #include "huf.h"
55
- #include "error_private.h"
33
+ #include "../common/huf.h"
34
+ #include "../common/error_private.h"
56
35
 
57
36
 
58
37
  /* **************************************************************
@@ -60,8 +39,6 @@
60
39
  ****************************************************************/
61
40
  #define HUF_isError ERR_isError
62
41
  #define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */
63
- #define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
64
- #define CHECK_F(f) { CHECK_V_F(_var_err__, f); }
65
42
 
66
43
 
67
44
  /* **************************************************************
@@ -76,13 +53,43 @@ unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS
76
53
  /* *******************************************************
77
54
  * HUF : Huffman block compression
78
55
  *********************************************************/
56
+ #define HUF_WORKSPACE_MAX_ALIGNMENT 8
57
+
58
+ static void* HUF_alignUpWorkspace(void* workspace, size_t* workspaceSizePtr, size_t align)
59
+ {
60
+ size_t const mask = align - 1;
61
+ size_t const rem = (size_t)workspace & mask;
62
+ size_t const add = (align - rem) & mask;
63
+ BYTE* const aligned = (BYTE*)workspace + add;
64
+ assert((align & (align - 1)) == 0); /* pow 2 */
65
+ assert(align <= HUF_WORKSPACE_MAX_ALIGNMENT);
66
+ if (*workspaceSizePtr >= add) {
67
+ assert(add < align);
68
+ assert(((size_t)aligned & mask) == 0);
69
+ *workspaceSizePtr -= add;
70
+ return aligned;
71
+ } else {
72
+ *workspaceSizePtr = 0;
73
+ return NULL;
74
+ }
75
+ }
76
+
77
+
79
78
  /* HUF_compressWeights() :
80
79
  * Same as FSE_compress(), but dedicated to huff0's weights compression.
81
80
  * The use case needs much less stack memory.
82
81
  * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
83
82
  */
84
83
  #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
85
- static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize)
84
+
85
+ typedef struct {
86
+ FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
87
+ U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)];
88
+ unsigned count[HUF_TABLELOG_MAX+1];
89
+ S16 norm[HUF_TABLELOG_MAX+1];
90
+ } HUF_CompressWeightsWksp;
91
+
92
+ static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightTable, size_t wtSize, void* workspace, size_t workspaceSize)
86
93
  {
87
94
  BYTE* const ostart = (BYTE*) dst;
88
95
  BYTE* op = ostart;
@@ -90,69 +97,101 @@ static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weight
90
97
 
91
98
  unsigned maxSymbolValue = HUF_TABLELOG_MAX;
92
99
  U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
100
+ HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32));
93
101
 
94
- FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
95
- BYTE scratchBuffer[1<<MAX_FSE_TABLELOG_FOR_HUFF_HEADER];
96
-
97
- unsigned count[HUF_TABLELOG_MAX+1];
98
- S16 norm[HUF_TABLELOG_MAX+1];
102
+ if (workspaceSize < sizeof(HUF_CompressWeightsWksp)) return ERROR(GENERIC);
99
103
 
100
104
  /* init conditions */
101
105
  if (wtSize <= 1) return 0; /* Not compressible */
102
106
 
103
107
  /* Scan input and build symbol stats */
104
- { unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize); /* never fails */
108
+ { unsigned const maxCount = HIST_count_simple(wksp->count, &maxSymbolValue, weightTable, wtSize); /* never fails */
105
109
  if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */
106
110
  if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
107
111
  }
108
112
 
109
113
  tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
110
- CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) );
114
+ CHECK_F( FSE_normalizeCount(wksp->norm, tableLog, wksp->count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) );
111
115
 
112
116
  /* Write table description header */
113
- { CHECK_V_F(hSize, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) );
117
+ { CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), wksp->norm, maxSymbolValue, tableLog) );
114
118
  op += hSize;
115
119
  }
116
120
 
117
121
  /* Compress */
118
- CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) );
119
- { CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, weightTable, wtSize, CTable) );
122
+ CHECK_F( FSE_buildCTable_wksp(wksp->CTable, wksp->norm, maxSymbolValue, tableLog, wksp->scratchBuffer, sizeof(wksp->scratchBuffer)) );
123
+ { CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, wksp->CTable) );
120
124
  if (cSize == 0) return 0; /* not enough space for compressed data */
121
125
  op += cSize;
122
126
  }
123
127
 
124
- return op-ostart;
128
+ return (size_t)(op-ostart);
125
129
  }
126
130
 
131
+ static size_t HUF_getNbBits(HUF_CElt elt)
132
+ {
133
+ return elt & 0xFF;
134
+ }
127
135
 
128
- struct HUF_CElt_s {
129
- U16 val;
130
- BYTE nbBits;
131
- }; /* typedef'd to HUF_CElt within "huf.h" */
136
+ static size_t HUF_getNbBitsFast(HUF_CElt elt)
137
+ {
138
+ return elt;
139
+ }
132
140
 
133
- /*! HUF_writeCTable() :
134
- `CTable` : Huffman tree to save, using huf representation.
135
- @return : size of saved CTable */
136
- size_t HUF_writeCTable (void* dst, size_t maxDstSize,
137
- const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
141
+ static size_t HUF_getValue(HUF_CElt elt)
138
142
  {
143
+ return elt & ~0xFF;
144
+ }
145
+
146
+ static size_t HUF_getValueFast(HUF_CElt elt)
147
+ {
148
+ return elt;
149
+ }
150
+
151
+ static void HUF_setNbBits(HUF_CElt* elt, size_t nbBits)
152
+ {
153
+ assert(nbBits <= HUF_TABLELOG_ABSOLUTEMAX);
154
+ *elt = nbBits;
155
+ }
156
+
157
+ static void HUF_setValue(HUF_CElt* elt, size_t value)
158
+ {
159
+ size_t const nbBits = HUF_getNbBits(*elt);
160
+ if (nbBits > 0) {
161
+ assert((value >> nbBits) == 0);
162
+ *elt |= value << (sizeof(HUF_CElt) * 8 - nbBits);
163
+ }
164
+ }
165
+
166
+ typedef struct {
167
+ HUF_CompressWeightsWksp wksp;
139
168
  BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */
140
169
  BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
170
+ } HUF_WriteCTableWksp;
171
+
172
+ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
173
+ const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog,
174
+ void* workspace, size_t workspaceSize)
175
+ {
176
+ HUF_CElt const* const ct = CTable + 1;
141
177
  BYTE* op = (BYTE*)dst;
142
178
  U32 n;
179
+ HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32));
143
180
 
144
- /* check conditions */
181
+ /* check conditions */
182
+ if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC);
145
183
  if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
146
184
 
147
185
  /* convert to weight */
148
- bitsToWeight[0] = 0;
186
+ wksp->bitsToWeight[0] = 0;
149
187
  for (n=1; n<huffLog+1; n++)
150
- bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
188
+ wksp->bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
151
189
  for (n=0; n<maxSymbolValue; n++)
152
- huffWeight[n] = bitsToWeight[CTable[n].nbBits];
190
+ wksp->huffWeight[n] = wksp->bitsToWeight[HUF_getNbBits(ct[n])];
153
191
 
154
192
  /* attempt weights compression by FSE */
155
- { CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, huffWeight, maxSymbolValue) );
193
+ if (maxDstSize < 1) return ERROR(dstSize_tooSmall);
194
+ { CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, wksp->huffWeight, maxSymbolValue, &wksp->wksp, sizeof(wksp->wksp)) );
156
195
  if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */
157
196
  op[0] = (BYTE)hSize;
158
197
  return hSize+1;
@@ -162,45 +201,59 @@ size_t HUF_writeCTable (void* dst, size_t maxDstSize,
162
201
  if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */
163
202
  if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */
164
203
  op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1));
165
- huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */
204
+ wksp->huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */
166
205
  for (n=0; n<maxSymbolValue; n+=2)
167
- op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
206
+ op[(n/2)+1] = (BYTE)((wksp->huffWeight[n] << 4) + wksp->huffWeight[n+1]);
168
207
  return ((maxSymbolValue+1)/2) + 1;
169
208
  }
170
209
 
210
+ /*! HUF_writeCTable() :
211
+ `CTable` : Huffman tree to save, using huf representation.
212
+ @return : size of saved CTable */
213
+ size_t HUF_writeCTable (void* dst, size_t maxDstSize,
214
+ const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
215
+ {
216
+ HUF_WriteCTableWksp wksp;
217
+ return HUF_writeCTable_wksp(dst, maxDstSize, CTable, maxSymbolValue, huffLog, &wksp, sizeof(wksp));
218
+ }
219
+
171
220
 
172
- size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize)
221
+ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights)
173
222
  {
174
223
  BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; /* init not required, even though some static analyzer may complain */
175
224
  U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */
176
225
  U32 tableLog = 0;
177
226
  U32 nbSymbols = 0;
227
+ HUF_CElt* const ct = CTable + 1;
178
228
 
179
229
  /* get symbol weights */
180
230
  CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize));
231
+ *hasZeroWeights = (rankVal[0] > 0);
181
232
 
182
233
  /* check result */
183
234
  if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
184
235
  if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);
185
236
 
237
+ CTable[0] = tableLog;
238
+
186
239
  /* Prepare base value per rank */
187
240
  { U32 n, nextRankStart = 0;
188
241
  for (n=1; n<=tableLog; n++) {
189
- U32 current = nextRankStart;
242
+ U32 curr = nextRankStart;
190
243
  nextRankStart += (rankVal[n] << (n-1));
191
- rankVal[n] = current;
244
+ rankVal[n] = curr;
192
245
  } }
193
246
 
194
247
  /* fill nbBits */
195
248
  { U32 n; for (n=0; n<nbSymbols; n++) {
196
249
  const U32 w = huffWeight[n];
197
- CTable[n].nbBits = (BYTE)(tableLog + 1 - w);
250
+ HUF_setNbBits(ct + n, (BYTE)(tableLog + 1 - w) & -(w != 0));
198
251
  } }
199
252
 
200
253
  /* fill val */
201
254
  { U16 nbPerRank[HUF_TABLELOG_MAX+2] = {0}; /* support w=0=>n=tableLog+1 */
202
255
  U16 valPerRank[HUF_TABLELOG_MAX+2] = {0};
203
- { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; }
256
+ { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[HUF_getNbBits(ct[n])]++; }
204
257
  /* determine stating value per rank */
205
258
  valPerRank[tableLog+1] = 0; /* for w==0 */
206
259
  { U16 min = 0;
@@ -210,18 +263,18 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
210
263
  min >>= 1;
211
264
  } }
212
265
  /* assign value within rank, symbol order */
213
- { U32 n; for (n=0; n<nbSymbols; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; }
266
+ { U32 n; for (n=0; n<nbSymbols; n++) HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); }
214
267
  }
215
268
 
216
269
  *maxSymbolValuePtr = nbSymbols - 1;
217
270
  return readSize;
218
271
  }
219
272
 
220
- U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue)
273
+ U32 HUF_getNbBitsFromCTable(HUF_CElt const* CTable, U32 symbolValue)
221
274
  {
222
- const HUF_CElt* table = (const HUF_CElt*)symbolTable;
275
+ const HUF_CElt* ct = CTable + 1;
223
276
  assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
224
- return table[symbolValue].nbBits;
277
+ return (U32)HUF_getNbBits(ct[symbolValue]);
225
278
  }
226
279
 
227
280
 
@@ -232,156 +285,351 @@ typedef struct nodeElt_s {
232
285
  BYTE nbBits;
233
286
  } nodeElt;
234
287
 
288
+ /**
289
+ * HUF_setMaxHeight():
290
+ * Enforces maxNbBits on the Huffman tree described in huffNode.
291
+ *
292
+ * It sets all nodes with nbBits > maxNbBits to be maxNbBits. Then it adjusts
293
+ * the tree to so that it is a valid canonical Huffman tree.
294
+ *
295
+ * @pre The sum of the ranks of each symbol == 2^largestBits,
296
+ * where largestBits == huffNode[lastNonNull].nbBits.
297
+ * @post The sum of the ranks of each symbol == 2^largestBits,
298
+ * where largestBits is the return value <= maxNbBits.
299
+ *
300
+ * @param huffNode The Huffman tree modified in place to enforce maxNbBits.
301
+ * @param lastNonNull The symbol with the lowest count in the Huffman tree.
302
+ * @param maxNbBits The maximum allowed number of bits, which the Huffman tree
303
+ * may not respect. After this function the Huffman tree will
304
+ * respect maxNbBits.
305
+ * @return The maximum number of bits of the Huffman tree after adjustment,
306
+ * necessarily no more than maxNbBits.
307
+ */
235
308
  static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
236
309
  {
237
310
  const U32 largestBits = huffNode[lastNonNull].nbBits;
238
- if (largestBits <= maxNbBits) return largestBits; /* early exit : no elt > maxNbBits */
311
+ /* early exit : no elt > maxNbBits, so the tree is already valid. */
312
+ if (largestBits <= maxNbBits) return largestBits;
239
313
 
240
314
  /* there are several too large elements (at least >= 2) */
241
315
  { int totalCost = 0;
242
316
  const U32 baseCost = 1 << (largestBits - maxNbBits);
243
- U32 n = lastNonNull;
317
+ int n = (int)lastNonNull;
244
318
 
319
+ /* Adjust any ranks > maxNbBits to maxNbBits.
320
+ * Compute totalCost, which is how far the sum of the ranks is
321
+ * we are over 2^largestBits after adjust the offending ranks.
322
+ */
245
323
  while (huffNode[n].nbBits > maxNbBits) {
246
324
  totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
247
325
  huffNode[n].nbBits = (BYTE)maxNbBits;
248
- n --;
249
- } /* n stops at huffNode[n].nbBits <= maxNbBits */
250
- while (huffNode[n].nbBits == maxNbBits) n--; /* n end at index of smallest symbol using < maxNbBits */
326
+ n--;
327
+ }
328
+ /* n stops at huffNode[n].nbBits <= maxNbBits */
329
+ assert(huffNode[n].nbBits <= maxNbBits);
330
+ /* n end at index of smallest symbol using < maxNbBits */
331
+ while (huffNode[n].nbBits == maxNbBits) --n;
251
332
 
252
- /* renorm totalCost */
253
- totalCost >>= (largestBits - maxNbBits); /* note : totalCost is necessarily a multiple of baseCost */
333
+ /* renorm totalCost from 2^largestBits to 2^maxNbBits
334
+ * note : totalCost is necessarily a multiple of baseCost */
335
+ assert((totalCost & (baseCost - 1)) == 0);
336
+ totalCost >>= (largestBits - maxNbBits);
337
+ assert(totalCost > 0);
254
338
 
255
339
  /* repay normalized cost */
256
340
  { U32 const noSymbol = 0xF0F0F0F0;
257
341
  U32 rankLast[HUF_TABLELOG_MAX+2];
258
- int pos;
259
342
 
260
- /* Get pos of last (smallest) symbol per rank */
261
- memset(rankLast, 0xF0, sizeof(rankLast));
343
+ /* Get pos of last (smallest = lowest cum. count) symbol per rank */
344
+ ZSTD_memset(rankLast, 0xF0, sizeof(rankLast));
262
345
  { U32 currentNbBits = maxNbBits;
346
+ int pos;
263
347
  for (pos=n ; pos >= 0; pos--) {
264
348
  if (huffNode[pos].nbBits >= currentNbBits) continue;
265
349
  currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */
266
- rankLast[maxNbBits-currentNbBits] = pos;
350
+ rankLast[maxNbBits-currentNbBits] = (U32)pos;
267
351
  } }
268
352
 
269
353
  while (totalCost > 0) {
270
- U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1;
354
+ /* Try to reduce the next power of 2 above totalCost because we
355
+ * gain back half the rank.
356
+ */
357
+ U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1;
271
358
  for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
272
- U32 highPos = rankLast[nBitsToDecrease];
273
- U32 lowPos = rankLast[nBitsToDecrease-1];
359
+ U32 const highPos = rankLast[nBitsToDecrease];
360
+ U32 const lowPos = rankLast[nBitsToDecrease-1];
274
361
  if (highPos == noSymbol) continue;
362
+ /* Decrease highPos if no symbols of lowPos or if it is
363
+ * not cheaper to remove 2 lowPos than highPos.
364
+ */
275
365
  if (lowPos == noSymbol) break;
276
366
  { U32 const highTotal = huffNode[highPos].count;
277
367
  U32 const lowTotal = 2 * huffNode[lowPos].count;
278
368
  if (highTotal <= lowTotal) break;
279
369
  } }
280
370
  /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
371
+ assert(rankLast[nBitsToDecrease] != noSymbol || nBitsToDecrease == 1);
281
372
  /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
282
373
  while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))
283
- nBitsToDecrease ++;
374
+ nBitsToDecrease++;
375
+ assert(rankLast[nBitsToDecrease] != noSymbol);
376
+ /* Increase the number of bits to gain back half the rank cost. */
284
377
  totalCost -= 1 << (nBitsToDecrease-1);
378
+ huffNode[rankLast[nBitsToDecrease]].nbBits++;
379
+
380
+ /* Fix up the new rank.
381
+ * If the new rank was empty, this symbol is now its smallest.
382
+ * Otherwise, this symbol will be the largest in the new rank so no adjustment.
383
+ */
285
384
  if (rankLast[nBitsToDecrease-1] == noSymbol)
286
- rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; /* this rank is no longer empty */
287
- huffNode[rankLast[nBitsToDecrease]].nbBits ++;
385
+ rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease];
386
+ /* Fix up the old rank.
387
+ * If the symbol was at position 0, meaning it was the highest weight symbol in the tree,
388
+ * it must be the only symbol in its rank, so the old rank now has no symbols.
389
+ * Otherwise, since the Huffman nodes are sorted by count, the previous position is now
390
+ * the smallest node in the rank. If the previous position belongs to a different rank,
391
+ * then the rank is now empty.
392
+ */
288
393
  if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */
289
394
  rankLast[nBitsToDecrease] = noSymbol;
290
395
  else {
291
396
  rankLast[nBitsToDecrease]--;
292
397
  if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease)
293
398
  rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */
294
- } } /* while (totalCost > 0) */
295
-
399
+ }
400
+ } /* while (totalCost > 0) */
401
+
402
+ /* If we've removed too much weight, then we have to add it back.
403
+ * To avoid overshooting again, we only adjust the smallest rank.
404
+ * We take the largest nodes from the lowest rank 0 and move them
405
+ * to rank 1. There's guaranteed to be enough rank 0 symbols because
406
+ * TODO.
407
+ */
296
408
  while (totalCost < 0) { /* Sometimes, cost correction overshoot */
297
- if (rankLast[1] == noSymbol) { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */
409
+ /* special case : no rank 1 symbol (using maxNbBits-1);
410
+ * let's create one from largest rank 0 (using maxNbBits).
411
+ */
412
+ if (rankLast[1] == noSymbol) {
298
413
  while (huffNode[n].nbBits == maxNbBits) n--;
299
414
  huffNode[n+1].nbBits--;
300
- rankLast[1] = n+1;
415
+ assert(n >= 0);
416
+ rankLast[1] = (U32)(n+1);
301
417
  totalCost++;
302
418
  continue;
303
419
  }
304
420
  huffNode[ rankLast[1] + 1 ].nbBits--;
305
421
  rankLast[1]++;
306
422
  totalCost ++;
307
- } } } /* there are several too large elements (at least >= 2) */
423
+ }
424
+ } /* repay normalized cost */
425
+ } /* there are several too large elements (at least >= 2) */
308
426
 
309
427
  return maxNbBits;
310
428
  }
311
429
 
312
-
313
430
  typedef struct {
314
- U32 base;
315
- U32 current;
431
+ U16 base;
432
+ U16 curr;
316
433
  } rankPos;
317
434
 
318
- static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue)
319
- {
320
- rankPos rank[32];
435
+ typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
436
+
437
+ /* Number of buckets available for HUF_sort() */
438
+ #define RANK_POSITION_TABLE_SIZE 192
439
+
440
+ typedef struct {
441
+ huffNodeTable huffNodeTbl;
442
+ rankPos rankPosition[RANK_POSITION_TABLE_SIZE];
443
+ } HUF_buildCTable_wksp_tables;
444
+
445
+ /* RANK_POSITION_DISTINCT_COUNT_CUTOFF == Cutoff point in HUF_sort() buckets for which we use log2 bucketing.
446
+ * Strategy is to use as many buckets as possible for representing distinct
447
+ * counts while using the remainder to represent all "large" counts.
448
+ *
449
+ * To satisfy this requirement for 192 buckets, we can do the following:
450
+ * Let buckets 0-166 represent distinct counts of [0, 166]
451
+ * Let buckets 166 to 192 represent all remaining counts up to RANK_POSITION_MAX_COUNT_LOG using log2 bucketing.
452
+ */
453
+ #define RANK_POSITION_MAX_COUNT_LOG 32
454
+ #define RANK_POSITION_LOG_BUCKETS_BEGIN (RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */
455
+ #define RANK_POSITION_DISTINCT_COUNT_CUTOFF RANK_POSITION_LOG_BUCKETS_BEGIN + BIT_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */
456
+
457
+ /* Return the appropriate bucket index for a given count. See definition of
458
+ * RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy.
459
+ */
460
+ static U32 HUF_getIndex(U32 const count) {
461
+ return (count < RANK_POSITION_DISTINCT_COUNT_CUTOFF)
462
+ ? count
463
+ : BIT_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN;
464
+ }
465
+
466
+ /* Helper swap function for HUF_quickSortPartition() */
467
+ static void HUF_swapNodes(nodeElt* a, nodeElt* b) {
468
+ nodeElt tmp = *a;
469
+ *a = *b;
470
+ *b = tmp;
471
+ }
472
+
473
+ /* Returns 0 if the huffNode array is not sorted by descending count */
474
+ MEM_STATIC int HUF_isSorted(nodeElt huffNode[], U32 const maxSymbolValue1) {
475
+ U32 i;
476
+ for (i = 1; i < maxSymbolValue1; ++i) {
477
+ if (huffNode[i].count > huffNode[i-1].count) {
478
+ return 0;
479
+ }
480
+ }
481
+ return 1;
482
+ }
483
+
484
+ /* Insertion sort by descending order */
485
+ HINT_INLINE void HUF_insertionSort(nodeElt huffNode[], int const low, int const high) {
486
+ int i;
487
+ int const size = high-low+1;
488
+ huffNode += low;
489
+ for (i = 1; i < size; ++i) {
490
+ nodeElt const key = huffNode[i];
491
+ int j = i - 1;
492
+ while (j >= 0 && huffNode[j].count < key.count) {
493
+ huffNode[j + 1] = huffNode[j];
494
+ j--;
495
+ }
496
+ huffNode[j + 1] = key;
497
+ }
498
+ }
499
+
500
+ /* Pivot helper function for quicksort. */
501
+ static int HUF_quickSortPartition(nodeElt arr[], int const low, int const high) {
502
+ /* Simply select rightmost element as pivot. "Better" selectors like
503
+ * median-of-three don't experimentally appear to have any benefit.
504
+ */
505
+ U32 const pivot = arr[high].count;
506
+ int i = low - 1;
507
+ int j = low;
508
+ for ( ; j < high; j++) {
509
+ if (arr[j].count > pivot) {
510
+ i++;
511
+ HUF_swapNodes(&arr[i], &arr[j]);
512
+ }
513
+ }
514
+ HUF_swapNodes(&arr[i + 1], &arr[high]);
515
+ return i + 1;
516
+ }
517
+
518
+ /* Classic quicksort by descending with partially iterative calls
519
+ * to reduce worst case callstack size.
520
+ */
521
+ static void HUF_simpleQuickSort(nodeElt arr[], int low, int high) {
522
+ int const kInsertionSortThreshold = 8;
523
+ if (high - low < kInsertionSortThreshold) {
524
+ HUF_insertionSort(arr, low, high);
525
+ return;
526
+ }
527
+ while (low < high) {
528
+ int const idx = HUF_quickSortPartition(arr, low, high);
529
+ if (idx - low < high - idx) {
530
+ HUF_simpleQuickSort(arr, low, idx - 1);
531
+ low = idx + 1;
532
+ } else {
533
+ HUF_simpleQuickSort(arr, idx + 1, high);
534
+ high = idx - 1;
535
+ }
536
+ }
537
+ }
538
+
539
+ /**
540
+ * HUF_sort():
541
+ * Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order.
542
+ * This is a typical bucket sorting strategy that uses either quicksort or insertion sort to sort each bucket.
543
+ *
544
+ * @param[out] huffNode Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled.
545
+ * Must have (maxSymbolValue + 1) entries.
546
+ * @param[in] count Histogram of the symbols.
547
+ * @param[in] maxSymbolValue Maximum symbol value.
548
+ * @param rankPosition This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries.
549
+ */
550
+ static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSymbolValue, rankPos rankPosition[]) {
321
551
  U32 n;
552
+ U32 const maxSymbolValue1 = maxSymbolValue+1;
553
+
554
+ /* Compute base and set curr to base.
555
+ * For symbol s let lowerRank = HUF_getIndex(count[n]) and rank = lowerRank + 1.
556
+ * See HUF_getIndex to see bucketing strategy.
557
+ * We attribute each symbol to lowerRank's base value, because we want to know where
558
+ * each rank begins in the output, so for rank R we want to count ranks R+1 and above.
559
+ */
560
+ ZSTD_memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE);
561
+ for (n = 0; n < maxSymbolValue1; ++n) {
562
+ U32 lowerRank = HUF_getIndex(count[n]);
563
+ assert(lowerRank < RANK_POSITION_TABLE_SIZE - 1);
564
+ rankPosition[lowerRank].base++;
565
+ }
322
566
 
323
- memset(rank, 0, sizeof(rank));
324
- for (n=0; n<=maxSymbolValue; n++) {
325
- U32 r = BIT_highbit32(count[n] + 1);
326
- rank[r].base ++;
567
+ assert(rankPosition[RANK_POSITION_TABLE_SIZE - 1].base == 0);
568
+ /* Set up the rankPosition table */
569
+ for (n = RANK_POSITION_TABLE_SIZE - 1; n > 0; --n) {
570
+ rankPosition[n-1].base += rankPosition[n].base;
571
+ rankPosition[n-1].curr = rankPosition[n-1].base;
327
572
  }
328
- for (n=30; n>0; n--) rank[n-1].base += rank[n].base;
329
- for (n=0; n<32; n++) rank[n].current = rank[n].base;
330
- for (n=0; n<=maxSymbolValue; n++) {
573
+
574
+ /* Insert each symbol into their appropriate bucket, setting up rankPosition table. */
575
+ for (n = 0; n < maxSymbolValue1; ++n) {
331
576
  U32 const c = count[n];
332
- U32 const r = BIT_highbit32(c+1) + 1;
333
- U32 pos = rank[r].current++;
334
- while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) {
335
- huffNode[pos] = huffNode[pos-1];
336
- pos--;
337
- }
577
+ U32 const r = HUF_getIndex(c) + 1;
578
+ U32 const pos = rankPosition[r].curr++;
579
+ assert(pos < maxSymbolValue1);
338
580
  huffNode[pos].count = c;
339
581
  huffNode[pos].byte = (BYTE)n;
340
582
  }
341
- }
342
583
 
584
+ /* Sort each bucket. */
585
+ for (n = RANK_POSITION_DISTINCT_COUNT_CUTOFF; n < RANK_POSITION_TABLE_SIZE - 1; ++n) {
586
+ U32 const bucketSize = rankPosition[n].curr-rankPosition[n].base;
587
+ U32 const bucketStartIdx = rankPosition[n].base;
588
+ if (bucketSize > 1) {
589
+ assert(bucketStartIdx < maxSymbolValue1);
590
+ HUF_simpleQuickSort(huffNode + bucketStartIdx, 0, bucketSize-1);
591
+ }
592
+ }
593
+
594
+ assert(HUF_isSorted(huffNode, maxSymbolValue1));
595
+ }
343
596
 
344
597
  /** HUF_buildCTable_wksp() :
345
598
  * Same as HUF_buildCTable(), but using externally allocated scratch buffer.
346
- * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of HUF_CTABLE_WORKSPACE_SIZE_U32 unsigned.
599
+ * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables).
347
600
  */
348
601
  #define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
349
- typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
350
- size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
602
+
603
+ /* HUF_buildTree():
604
+ * Takes the huffNode array sorted by HUF_sort() and builds an unlimited-depth Huffman tree.
605
+ *
606
+ * @param huffNode The array sorted by HUF_sort(). Builds the Huffman tree in this array.
607
+ * @param maxSymbolValue The maximum symbol value.
608
+ * @return The smallest node in the Huffman tree (by count).
609
+ */
610
+ static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
351
611
  {
352
- nodeElt* const huffNode0 = (nodeElt*)workSpace;
353
- nodeElt* const huffNode = huffNode0+1;
354
- U32 n, nonNullRank;
612
+ nodeElt* const huffNode0 = huffNode - 1;
613
+ int nonNullRank;
355
614
  int lowS, lowN;
356
- U16 nodeNb = STARTNODE;
357
- U32 nodeRoot;
358
-
359
- /* safety checks */
360
- if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
361
- if (wkspSize < sizeof(huffNodeTable)) return ERROR(workSpace_tooSmall);
362
- if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
363
- if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
364
- memset(huffNode0, 0, sizeof(huffNodeTable));
365
-
366
- /* sort, decreasing order */
367
- HUF_sort(huffNode, count, maxSymbolValue);
368
-
615
+ int nodeNb = STARTNODE;
616
+ int n, nodeRoot;
369
617
  /* init for parents */
370
- nonNullRank = maxSymbolValue;
618
+ nonNullRank = (int)maxSymbolValue;
371
619
  while(huffNode[nonNullRank].count == 0) nonNullRank--;
372
620
  lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb;
373
621
  huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count;
374
- huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb;
622
+ huffNode[lowS].parent = huffNode[lowS-1].parent = (U16)nodeNb;
375
623
  nodeNb++; lowS-=2;
376
624
  for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30);
377
625
  huffNode0[0].count = (U32)(1U<<31); /* fake entry, strong barrier */
378
626
 
379
627
  /* create parents */
380
628
  while (nodeNb <= nodeRoot) {
381
- U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
382
- U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
629
+ int const n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
630
+ int const n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
383
631
  huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count;
384
- huffNode[n1].parent = huffNode[n2].parent = nodeNb;
632
+ huffNode[n1].parent = huffNode[n2].parent = (U16)nodeNb;
385
633
  nodeNb++;
386
634
  }
387
635
 
@@ -392,126 +640,396 @@ size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbo
392
640
  for (n=0; n<=nonNullRank; n++)
393
641
  huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
394
642
 
395
- /* enforce maxTableLog */
396
- maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits);
397
-
398
- /* fill result into tree (val, nbBits) */
399
- { U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
400
- U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
401
- if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */
402
- for (n=0; n<=nonNullRank; n++)
403
- nbPerRank[huffNode[n].nbBits]++;
404
- /* determine stating value per rank */
405
- { U16 min = 0;
406
- for (n=maxNbBits; n>0; n--) {
407
- valPerRank[n] = min; /* get starting value within each rank */
408
- min += nbPerRank[n];
409
- min >>= 1;
410
- } }
411
- for (n=0; n<=maxSymbolValue; n++)
412
- tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */
413
- for (n=0; n<=maxSymbolValue; n++)
414
- tree[n].val = valPerRank[tree[n].nbBits]++; /* assign value within rank, symbol order */
415
- }
416
-
417
- return maxNbBits;
643
+ return nonNullRank;
418
644
  }
419
645
 
420
- /** HUF_buildCTable() :
421
- * @return : maxNbBits
422
- * Note : count is used before tree is written, so they can safely overlap
646
+ /**
647
+ * HUF_buildCTableFromTree():
648
+ * Build the CTable given the Huffman tree in huffNode.
649
+ *
650
+ * @param[out] CTable The output Huffman CTable.
651
+ * @param huffNode The Huffman tree.
652
+ * @param nonNullRank The last and smallest node in the Huffman tree.
653
+ * @param maxSymbolValue The maximum symbol value.
654
+ * @param maxNbBits The exact maximum number of bits used in the Huffman tree.
423
655
  */
424
- size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits)
656
+ static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits)
425
657
  {
426
- huffNodeTable nodeTable;
427
- return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(nodeTable));
658
+ HUF_CElt* const ct = CTable + 1;
659
+ /* fill result into ctable (val, nbBits) */
660
+ int n;
661
+ U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
662
+ U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
663
+ int const alphabetSize = (int)(maxSymbolValue + 1);
664
+ for (n=0; n<=nonNullRank; n++)
665
+ nbPerRank[huffNode[n].nbBits]++;
666
+ /* determine starting value per rank */
667
+ { U16 min = 0;
668
+ for (n=(int)maxNbBits; n>0; n--) {
669
+ valPerRank[n] = min; /* get starting value within each rank */
670
+ min += nbPerRank[n];
671
+ min >>= 1;
672
+ } }
673
+ for (n=0; n<alphabetSize; n++)
674
+ HUF_setNbBits(ct + huffNode[n].byte, huffNode[n].nbBits); /* push nbBits per symbol, symbol order */
675
+ for (n=0; n<alphabetSize; n++)
676
+ HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); /* assign value within rank, symbol order */
677
+ CTable[0] = maxNbBits;
428
678
  }
429
679
 
430
- static size_t HUF_estimateCompressedSize(HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
680
+ size_t HUF_buildCTable_wksp (HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
431
681
  {
682
+ HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(U32));
683
+ nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
684
+ nodeElt* const huffNode = huffNode0+1;
685
+ int nonNullRank;
686
+
687
+ /* safety checks */
688
+ if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
689
+ return ERROR(workSpace_tooSmall);
690
+ if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
691
+ if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
692
+ return ERROR(maxSymbolValue_tooLarge);
693
+ ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable));
694
+
695
+ /* sort, decreasing order */
696
+ HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition);
697
+
698
+ /* build tree */
699
+ nonNullRank = HUF_buildTree(huffNode, maxSymbolValue);
700
+
701
+ /* enforce maxTableLog */
702
+ maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);
703
+ if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */
704
+
705
+ HUF_buildCTableFromTree(CTable, huffNode, nonNullRank, maxSymbolValue, maxNbBits);
706
+
707
+ return maxNbBits;
708
+ }
709
+
710
+ size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
711
+ {
712
+ HUF_CElt const* ct = CTable + 1;
432
713
  size_t nbBits = 0;
433
714
  int s;
434
715
  for (s = 0; s <= (int)maxSymbolValue; ++s) {
435
- nbBits += CTable[s].nbBits * count[s];
716
+ nbBits += HUF_getNbBits(ct[s]) * count[s];
436
717
  }
437
718
  return nbBits >> 3;
438
719
  }
439
720
 
440
- static int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
721
+ int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
722
+ HUF_CElt const* ct = CTable + 1;
441
723
  int bad = 0;
442
724
  int s;
443
725
  for (s = 0; s <= (int)maxSymbolValue; ++s) {
444
- bad |= (count[s] != 0) & (CTable[s].nbBits == 0);
726
+ bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0);
445
727
  }
446
728
  return !bad;
447
729
  }
448
730
 
449
731
  size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
450
732
 
733
+ /** HUF_CStream_t:
734
+ * Huffman uses its own BIT_CStream_t implementation.
735
+ * There are three major differences from BIT_CStream_t:
736
+ * 1. HUF_addBits() takes a HUF_CElt (size_t) which is
737
+ * the pair (nbBits, value) in the format:
738
+ * format:
739
+ * - Bits [0, 4) = nbBits
740
+ * - Bits [4, 64 - nbBits) = 0
741
+ * - Bits [64 - nbBits, 64) = value
742
+ * 2. The bitContainer is built from the upper bits and
743
+ * right shifted. E.g. to add a new value of N bits
744
+ * you right shift the bitContainer by N, then or in
745
+ * the new value into the N upper bits.
746
+ * 3. The bitstream has two bit containers. You can add
747
+ * bits to the second container and merge them into
748
+ * the first container.
749
+ */
750
+
751
+ #define HUF_BITS_IN_CONTAINER (sizeof(size_t) * 8)
752
+
753
+ typedef struct {
754
+ size_t bitContainer[2];
755
+ size_t bitPos[2];
756
+
757
+ BYTE* startPtr;
758
+ BYTE* ptr;
759
+ BYTE* endPtr;
760
+ } HUF_CStream_t;
761
+
762
+ /**! HUF_initCStream():
763
+ * Initializes the bitstream.
764
+ * @returns 0 or an error code.
765
+ */
766
+ static size_t HUF_initCStream(HUF_CStream_t* bitC,
767
+ void* startPtr, size_t dstCapacity)
768
+ {
769
+ ZSTD_memset(bitC, 0, sizeof(*bitC));
770
+ bitC->startPtr = (BYTE*)startPtr;
771
+ bitC->ptr = bitC->startPtr;
772
+ bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer[0]);
773
+ if (dstCapacity <= sizeof(bitC->bitContainer[0])) return ERROR(dstSize_tooSmall);
774
+ return 0;
775
+ }
776
+
777
+ /*! HUF_addBits():
778
+ * Adds the symbol stored in HUF_CElt elt to the bitstream.
779
+ *
780
+ * @param elt The element we're adding. This is a (nbBits, value) pair.
781
+ * See the HUF_CStream_t docs for the format.
782
+ * @param idx Insert into the bitstream at this idx.
783
+ * @param kFast This is a template parameter. If the bitstream is guaranteed
784
+ * to have at least 4 unused bits after this call it may be 1,
785
+ * otherwise it must be 0. HUF_addBits() is faster when fast is set.
786
+ */
787
+ FORCE_INLINE_TEMPLATE void HUF_addBits(HUF_CStream_t* bitC, HUF_CElt elt, int idx, int kFast)
788
+ {
789
+ assert(idx <= 1);
790
+ assert(HUF_getNbBits(elt) <= HUF_TABLELOG_ABSOLUTEMAX);
791
+ /* This is efficient on x86-64 with BMI2 because shrx
792
+ * only reads the low 6 bits of the register. The compiler
793
+ * knows this and elides the mask. When fast is set,
794
+ * every operation can use the same value loaded from elt.
795
+ */
796
+ bitC->bitContainer[idx] >>= HUF_getNbBits(elt);
797
+ bitC->bitContainer[idx] |= kFast ? HUF_getValueFast(elt) : HUF_getValue(elt);
798
+ /* We only read the low 8 bits of bitC->bitPos[idx] so it
799
+ * doesn't matter that the high bits have noise from the value.
800
+ */
801
+ bitC->bitPos[idx] += HUF_getNbBitsFast(elt);
802
+ assert((bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER);
803
+ /* The last 4-bits of elt are dirty if fast is set,
804
+ * so we must not be overwriting bits that have already been
805
+ * inserted into the bit container.
806
+ */
807
+ #if DEBUGLEVEL >= 1
808
+ {
809
+ size_t const nbBits = HUF_getNbBits(elt);
810
+ size_t const dirtyBits = nbBits == 0 ? 0 : BIT_highbit32((U32)nbBits) + 1;
811
+ (void)dirtyBits;
812
+ /* Middle bits are 0. */
813
+ assert(((elt >> dirtyBits) << (dirtyBits + nbBits)) == 0);
814
+ /* We didn't overwrite any bits in the bit container. */
815
+ assert(!kFast || (bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER);
816
+ (void)dirtyBits;
817
+ }
818
+ #endif
819
+ }
820
+
821
+ FORCE_INLINE_TEMPLATE void HUF_zeroIndex1(HUF_CStream_t* bitC)
822
+ {
823
+ bitC->bitContainer[1] = 0;
824
+ bitC->bitPos[1] = 0;
825
+ }
826
+
827
+ /*! HUF_mergeIndex1() :
828
+ * Merges the bit container @ index 1 into the bit container @ index 0
829
+ * and zeros the bit container @ index 1.
830
+ */
831
+ FORCE_INLINE_TEMPLATE void HUF_mergeIndex1(HUF_CStream_t* bitC)
832
+ {
833
+ assert((bitC->bitPos[1] & 0xFF) < HUF_BITS_IN_CONTAINER);
834
+ bitC->bitContainer[0] >>= (bitC->bitPos[1] & 0xFF);
835
+ bitC->bitContainer[0] |= bitC->bitContainer[1];
836
+ bitC->bitPos[0] += bitC->bitPos[1];
837
+ assert((bitC->bitPos[0] & 0xFF) <= HUF_BITS_IN_CONTAINER);
838
+ }
839
+
840
+ /*! HUF_flushBits() :
841
+ * Flushes the bits in the bit container @ index 0.
842
+ *
843
+ * @post bitPos will be < 8.
844
+ * @param kFast If kFast is set then we must know a-priori that
845
+ * the bit container will not overflow.
846
+ */
847
+ FORCE_INLINE_TEMPLATE void HUF_flushBits(HUF_CStream_t* bitC, int kFast)
848
+ {
849
+ /* The upper bits of bitPos are noisy, so we must mask by 0xFF. */
850
+ size_t const nbBits = bitC->bitPos[0] & 0xFF;
851
+ size_t const nbBytes = nbBits >> 3;
852
+ /* The top nbBits bits of bitContainer are the ones we need. */
853
+ size_t const bitContainer = bitC->bitContainer[0] >> (HUF_BITS_IN_CONTAINER - nbBits);
854
+ /* Mask bitPos to account for the bytes we consumed. */
855
+ bitC->bitPos[0] &= 7;
856
+ assert(nbBits > 0);
857
+ assert(nbBits <= sizeof(bitC->bitContainer[0]) * 8);
858
+ assert(bitC->ptr <= bitC->endPtr);
859
+ MEM_writeLEST(bitC->ptr, bitContainer);
860
+ bitC->ptr += nbBytes;
861
+ assert(!kFast || bitC->ptr <= bitC->endPtr);
862
+ if (!kFast && bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
863
+ /* bitContainer doesn't need to be modified because the leftover
864
+ * bits are already the top bitPos bits. And we don't care about
865
+ * noise in the lower values.
866
+ */
867
+ }
868
+
869
+ /*! HUF_endMark()
870
+ * @returns The Huffman stream end mark: A 1-bit value = 1.
871
+ */
872
+ static HUF_CElt HUF_endMark(void)
873
+ {
874
+ HUF_CElt endMark;
875
+ HUF_setNbBits(&endMark, 1);
876
+ HUF_setValue(&endMark, 1);
877
+ return endMark;
878
+ }
879
+
880
+ /*! HUF_closeCStream() :
881
+ * @return Size of CStream, in bytes,
882
+ * or 0 if it could not fit into dstBuffer */
883
+ static size_t HUF_closeCStream(HUF_CStream_t* bitC)
884
+ {
885
+ HUF_addBits(bitC, HUF_endMark(), /* idx */ 0, /* kFast */ 0);
886
+ HUF_flushBits(bitC, /* kFast */ 0);
887
+ {
888
+ size_t const nbBits = bitC->bitPos[0] & 0xFF;
889
+ if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
890
+ return (bitC->ptr - bitC->startPtr) + (nbBits > 0);
891
+ }
892
+ }
893
+
451
894
  FORCE_INLINE_TEMPLATE void
452
- HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
895
+ HUF_encodeSymbol(HUF_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable, int idx, int fast)
453
896
  {
454
- BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
897
+ HUF_addBits(bitCPtr, CTable[symbol], idx, fast);
455
898
  }
456
899
 
457
- #define HUF_FLUSHBITS(s) BIT_flushBits(s)
900
+ FORCE_INLINE_TEMPLATE void
901
+ HUF_compress1X_usingCTable_internal_body_loop(HUF_CStream_t* bitC,
902
+ const BYTE* ip, size_t srcSize,
903
+ const HUF_CElt* ct,
904
+ int kUnroll, int kFastFlush, int kLastFast)
905
+ {
906
+ /* Join to kUnroll */
907
+ int n = (int)srcSize;
908
+ int rem = n % kUnroll;
909
+ if (rem > 0) {
910
+ for (; rem > 0; --rem) {
911
+ HUF_encodeSymbol(bitC, ip[--n], ct, 0, /* fast */ 0);
912
+ }
913
+ HUF_flushBits(bitC, kFastFlush);
914
+ }
915
+ assert(n % kUnroll == 0);
916
+
917
+ /* Join to 2 * kUnroll */
918
+ if (n % (2 * kUnroll)) {
919
+ int u;
920
+ for (u = 1; u < kUnroll; ++u) {
921
+ HUF_encodeSymbol(bitC, ip[n - u], ct, 0, 1);
922
+ }
923
+ HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, 0, kLastFast);
924
+ HUF_flushBits(bitC, kFastFlush);
925
+ n -= kUnroll;
926
+ }
927
+ assert(n % (2 * kUnroll) == 0);
458
928
 
459
- #define HUF_FLUSHBITS_1(stream) \
460
- if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream)
929
+ for (; n>0; n-= 2 * kUnroll) {
930
+ /* Encode kUnroll symbols into the bitstream @ index 0. */
931
+ int u;
932
+ for (u = 1; u < kUnroll; ++u) {
933
+ HUF_encodeSymbol(bitC, ip[n - u], ct, /* idx */ 0, /* fast */ 1);
934
+ }
935
+ HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, /* idx */ 0, /* fast */ kLastFast);
936
+ HUF_flushBits(bitC, kFastFlush);
937
+ /* Encode kUnroll symbols into the bitstream @ index 1.
938
+ * This allows us to start filling the bit container
939
+ * without any data dependencies.
940
+ */
941
+ HUF_zeroIndex1(bitC);
942
+ for (u = 1; u < kUnroll; ++u) {
943
+ HUF_encodeSymbol(bitC, ip[n - kUnroll - u], ct, /* idx */ 1, /* fast */ 1);
944
+ }
945
+ HUF_encodeSymbol(bitC, ip[n - kUnroll - kUnroll], ct, /* idx */ 1, /* fast */ kLastFast);
946
+ /* Merge bitstream @ index 1 into the bitstream @ index 0 */
947
+ HUF_mergeIndex1(bitC);
948
+ HUF_flushBits(bitC, kFastFlush);
949
+ }
950
+ assert(n == 0);
951
+
952
+ }
953
+
954
+ /**
955
+ * Returns a tight upper bound on the output space needed by Huffman
956
+ * with 8 bytes buffer to handle over-writes. If the output is at least
957
+ * this large we don't need to do bounds checks during Huffman encoding.
958
+ */
959
+ static size_t HUF_tightCompressBound(size_t srcSize, size_t tableLog)
960
+ {
961
+ return ((srcSize * tableLog) >> 3) + 8;
962
+ }
461
963
 
462
- #define HUF_FLUSHBITS_2(stream) \
463
- if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream)
464
964
 
465
965
  FORCE_INLINE_TEMPLATE size_t
466
966
  HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
467
967
  const void* src, size_t srcSize,
468
968
  const HUF_CElt* CTable)
469
969
  {
970
+ U32 const tableLog = (U32)CTable[0];
971
+ HUF_CElt const* ct = CTable + 1;
470
972
  const BYTE* ip = (const BYTE*) src;
471
973
  BYTE* const ostart = (BYTE*)dst;
472
974
  BYTE* const oend = ostart + dstSize;
473
975
  BYTE* op = ostart;
474
- size_t n;
475
- BIT_CStream_t bitC;
976
+ HUF_CStream_t bitC;
476
977
 
477
978
  /* init */
478
979
  if (dstSize < 8) return 0; /* not enough space to compress */
479
- { size_t const initErr = BIT_initCStream(&bitC, op, oend-op);
980
+ { size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op));
480
981
  if (HUF_isError(initErr)) return 0; }
481
982
 
482
- n = srcSize & ~3; /* join to mod 4 */
483
- switch (srcSize & 3)
484
- {
485
- case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable);
486
- HUF_FLUSHBITS_2(&bitC);
487
- /* fall-through */
488
- case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable);
489
- HUF_FLUSHBITS_1(&bitC);
490
- /* fall-through */
491
- case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable);
492
- HUF_FLUSHBITS(&bitC);
493
- /* fall-through */
494
- case 0 : /* fall-through */
495
- default: break;
496
- }
497
-
498
- for (; n>0; n-=4) { /* note : n&3==0 at this stage */
499
- HUF_encodeSymbol(&bitC, ip[n- 1], CTable);
500
- HUF_FLUSHBITS_1(&bitC);
501
- HUF_encodeSymbol(&bitC, ip[n- 2], CTable);
502
- HUF_FLUSHBITS_2(&bitC);
503
- HUF_encodeSymbol(&bitC, ip[n- 3], CTable);
504
- HUF_FLUSHBITS_1(&bitC);
505
- HUF_encodeSymbol(&bitC, ip[n- 4], CTable);
506
- HUF_FLUSHBITS(&bitC);
983
+ if (dstSize < HUF_tightCompressBound(srcSize, (size_t)tableLog) || tableLog > 11)
984
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ MEM_32bits() ? 2 : 4, /* kFast */ 0, /* kLastFast */ 0);
985
+ else {
986
+ if (MEM_32bits()) {
987
+ switch (tableLog) {
988
+ case 11:
989
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 0);
990
+ break;
991
+ case 10: ZSTD_FALLTHROUGH;
992
+ case 9: ZSTD_FALLTHROUGH;
993
+ case 8:
994
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 1);
995
+ break;
996
+ case 7: ZSTD_FALLTHROUGH;
997
+ default:
998
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 3, /* kFastFlush */ 1, /* kLastFast */ 1);
999
+ break;
1000
+ }
1001
+ } else {
1002
+ switch (tableLog) {
1003
+ case 11:
1004
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 0);
1005
+ break;
1006
+ case 10:
1007
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 1);
1008
+ break;
1009
+ case 9:
1010
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 6, /* kFastFlush */ 1, /* kLastFast */ 0);
1011
+ break;
1012
+ case 8:
1013
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 7, /* kFastFlush */ 1, /* kLastFast */ 0);
1014
+ break;
1015
+ case 7:
1016
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 8, /* kFastFlush */ 1, /* kLastFast */ 0);
1017
+ break;
1018
+ case 6: ZSTD_FALLTHROUGH;
1019
+ default:
1020
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 9, /* kFastFlush */ 1, /* kLastFast */ 1);
1021
+ break;
1022
+ }
1023
+ }
507
1024
  }
1025
+ assert(bitC.ptr <= bitC.endPtr);
508
1026
 
509
- return BIT_closeCStream(&bitC);
1027
+ return HUF_closeCStream(&bitC);
510
1028
  }
511
1029
 
512
1030
  #if DYNAMIC_BMI2
513
1031
 
514
- static TARGET_ATTRIBUTE("bmi2") size_t
1032
+ static BMI2_TARGET_ATTRIBUTE size_t
515
1033
  HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,
516
1034
  const void* src, size_t srcSize,
517
1035
  const HUF_CElt* CTable)
@@ -553,9 +1071,13 @@ HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
553
1071
 
554
1072
  size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
555
1073
  {
556
- return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
1074
+ return HUF_compress1X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
557
1075
  }
558
1076
 
1077
+ size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2)
1078
+ {
1079
+ return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2);
1080
+ }
559
1081
 
560
1082
  static size_t
561
1083
  HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
@@ -573,41 +1095,48 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
573
1095
  if (srcSize < 12) return 0; /* no saving possible : too small input */
574
1096
  op += 6; /* jumpTable */
575
1097
 
576
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
577
- if (cSize==0) return 0;
578
- assert(cSize <= 65535);
1098
+ assert(op <= oend);
1099
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
1100
+ if (cSize == 0 || cSize > 65535) return 0;
579
1101
  MEM_writeLE16(ostart, (U16)cSize);
580
1102
  op += cSize;
581
1103
  }
582
1104
 
583
1105
  ip += segmentSize;
584
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
585
- if (cSize==0) return 0;
586
- assert(cSize <= 65535);
1106
+ assert(op <= oend);
1107
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
1108
+ if (cSize == 0 || cSize > 65535) return 0;
587
1109
  MEM_writeLE16(ostart+2, (U16)cSize);
588
1110
  op += cSize;
589
1111
  }
590
1112
 
591
1113
  ip += segmentSize;
592
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
593
- if (cSize==0) return 0;
594
- assert(cSize <= 65535);
1114
+ assert(op <= oend);
1115
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
1116
+ if (cSize == 0 || cSize > 65535) return 0;
595
1117
  MEM_writeLE16(ostart+4, (U16)cSize);
596
1118
  op += cSize;
597
1119
  }
598
1120
 
599
1121
  ip += segmentSize;
600
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, iend-ip, CTable, bmi2) );
601
- if (cSize==0) return 0;
1122
+ assert(op <= oend);
1123
+ assert(ip <= iend);
1124
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) );
1125
+ if (cSize == 0 || cSize > 65535) return 0;
602
1126
  op += cSize;
603
1127
  }
604
1128
 
605
- return op-ostart;
1129
+ return (size_t)(op-ostart);
606
1130
  }
607
1131
 
608
1132
  size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
609
1133
  {
610
- return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
1134
+ return HUF_compress4X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
1135
+ }
1136
+
1137
+ size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2)
1138
+ {
1139
+ return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2);
611
1140
  }
612
1141
 
613
1142
  typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
@@ -618,24 +1147,33 @@ static size_t HUF_compressCTable_internal(
618
1147
  HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2)
619
1148
  {
620
1149
  size_t const cSize = (nbStreams==HUF_singleStream) ?
621
- HUF_compress1X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2) :
622
- HUF_compress4X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2);
1150
+ HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2) :
1151
+ HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2);
623
1152
  if (HUF_isError(cSize)) { return cSize; }
624
1153
  if (cSize==0) { return 0; } /* uncompressible */
625
1154
  op += cSize;
626
1155
  /* check compressibility */
1156
+ assert(op >= ostart);
627
1157
  if ((size_t)(op-ostart) >= srcSize-1) { return 0; }
628
- return op-ostart;
1158
+ return (size_t)(op-ostart);
629
1159
  }
630
1160
 
631
1161
  typedef struct {
632
1162
  unsigned count[HUF_SYMBOLVALUE_MAX + 1];
633
- HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1];
634
- huffNodeTable nodeTable;
1163
+ HUF_CElt CTable[HUF_CTABLE_SIZE_ST(HUF_SYMBOLVALUE_MAX)];
1164
+ union {
1165
+ HUF_buildCTable_wksp_tables buildCTable_wksp;
1166
+ HUF_WriteCTableWksp writeCTable_wksp;
1167
+ U32 hist_wksp[HIST_WKSP_SIZE_U32];
1168
+ } wksps;
635
1169
  } HUF_compress_tables_t;
636
1170
 
1171
+ #define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE 4096
1172
+ #define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO 10 /* Must be >= 2 */
1173
+
637
1174
  /* HUF_compress_internal() :
638
- * `workSpace` must a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
1175
+ * `workSpace_align4` must be aligned on 4-bytes boundaries,
1176
+ * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */
639
1177
  static size_t
640
1178
  HUF_compress_internal (void* dst, size_t dstSize,
641
1179
  const void* src, size_t srcSize,
@@ -643,16 +1181,17 @@ HUF_compress_internal (void* dst, size_t dstSize,
643
1181
  HUF_nbStreams_e nbStreams,
644
1182
  void* workSpace, size_t wkspSize,
645
1183
  HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
646
- const int bmi2)
1184
+ const int bmi2, unsigned suspectUncompressible)
647
1185
  {
648
- HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace;
1186
+ HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(size_t));
649
1187
  BYTE* const ostart = (BYTE*)dst;
650
1188
  BYTE* const oend = ostart + dstSize;
651
1189
  BYTE* op = ostart;
652
1190
 
1191
+ HUF_STATIC_ASSERT(sizeof(*table) + HUF_WORKSPACE_MAX_ALIGNMENT <= HUF_WORKSPACE_SIZE);
1192
+
653
1193
  /* checks & inits */
654
- if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
655
- if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall);
1194
+ if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall);
656
1195
  if (!srcSize) return 0; /* Uncompressed */
657
1196
  if (!dstSize) return 0; /* cannot fit anything within dst budget */
658
1197
  if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */
@@ -668,8 +1207,23 @@ HUF_compress_internal (void* dst, size_t dstSize,
668
1207
  nbStreams, oldHufTable, bmi2);
669
1208
  }
670
1209
 
1210
+ /* If uncompressible data is suspected, do a smaller sampling first */
1211
+ DEBUG_STATIC_ASSERT(SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO >= 2);
1212
+ if (suspectUncompressible && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) {
1213
+ size_t largestTotal = 0;
1214
+ { unsigned maxSymbolValueBegin = maxSymbolValue;
1215
+ CHECK_V_F(largestBegin, HIST_count_simple (table->count, &maxSymbolValueBegin, (const BYTE*)src, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );
1216
+ largestTotal += largestBegin;
1217
+ }
1218
+ { unsigned maxSymbolValueEnd = maxSymbolValue;
1219
+ CHECK_V_F(largestEnd, HIST_count_simple (table->count, &maxSymbolValueEnd, (const BYTE*)src + srcSize - SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );
1220
+ largestTotal += largestEnd;
1221
+ }
1222
+ if (largestTotal <= ((2 * SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) >> 7)+4) return 0; /* heuristic : probably not compressible enough */
1223
+ }
1224
+
671
1225
  /* Scan input and build symbol stats */
672
- { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace, wkspSize) );
1226
+ { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->wksps.hist_wksp, sizeof(table->wksps.hist_wksp)) );
673
1227
  if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
674
1228
  if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */
675
1229
  }
@@ -691,16 +1245,20 @@ HUF_compress_internal (void* dst, size_t dstSize,
691
1245
  huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
692
1246
  { size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
693
1247
  maxSymbolValue, huffLog,
694
- table->nodeTable, sizeof(table->nodeTable));
1248
+ &table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp));
695
1249
  CHECK_F(maxBits);
696
1250
  huffLog = (U32)maxBits;
697
- /* Zero unused symbols in CTable, so we can check it for validity */
698
- memset(table->CTable + (maxSymbolValue + 1), 0,
699
- sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt)));
1251
+ }
1252
+ /* Zero unused symbols in CTable, so we can check it for validity */
1253
+ {
1254
+ size_t const ctableSize = HUF_CTABLE_SIZE_ST(maxSymbolValue);
1255
+ size_t const unusedSize = sizeof(table->CTable) - ctableSize * sizeof(HUF_CElt);
1256
+ ZSTD_memset(table->CTable + ctableSize, 0, unusedSize);
700
1257
  }
701
1258
 
702
1259
  /* Write table description header */
703
- { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) );
1260
+ { CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog,
1261
+ &table->wksps.writeCTable_wksp, sizeof(table->wksps.writeCTable_wksp)) );
704
1262
  /* Check if using previous huffman table is beneficial */
705
1263
  if (repeat && *repeat != HUF_repeat_none) {
706
1264
  size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue);
@@ -716,7 +1274,7 @@ HUF_compress_internal (void* dst, size_t dstSize,
716
1274
  op += hSize;
717
1275
  if (repeat) { *repeat = HUF_repeat_none; }
718
1276
  if (oldHufTable)
719
- memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */
1277
+ ZSTD_memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */
720
1278
  }
721
1279
  return HUF_compressCTable_internal(ostart, op, oend,
722
1280
  src, srcSize,
@@ -732,27 +1290,20 @@ size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
732
1290
  return HUF_compress_internal(dst, dstSize, src, srcSize,
733
1291
  maxSymbolValue, huffLog, HUF_singleStream,
734
1292
  workSpace, wkspSize,
735
- NULL, NULL, 0, 0 /*bmi2*/);
1293
+ NULL, NULL, 0, 0 /*bmi2*/, 0);
736
1294
  }
737
1295
 
738
1296
  size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
739
1297
  const void* src, size_t srcSize,
740
1298
  unsigned maxSymbolValue, unsigned huffLog,
741
1299
  void* workSpace, size_t wkspSize,
742
- HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
1300
+ HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat,
1301
+ int bmi2, unsigned suspectUncompressible)
743
1302
  {
744
1303
  return HUF_compress_internal(dst, dstSize, src, srcSize,
745
1304
  maxSymbolValue, huffLog, HUF_singleStream,
746
1305
  workSpace, wkspSize, hufTable,
747
- repeat, preferRepeat, bmi2);
748
- }
749
-
750
- size_t HUF_compress1X (void* dst, size_t dstSize,
751
- const void* src, size_t srcSize,
752
- unsigned maxSymbolValue, unsigned huffLog)
753
- {
754
- unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
755
- return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
1306
+ repeat, preferRepeat, bmi2, suspectUncompressible);
756
1307
  }
757
1308
 
758
1309
  /* HUF_compress4X_repeat():
@@ -766,29 +1317,49 @@ size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
766
1317
  return HUF_compress_internal(dst, dstSize, src, srcSize,
767
1318
  maxSymbolValue, huffLog, HUF_fourStreams,
768
1319
  workSpace, wkspSize,
769
- NULL, NULL, 0, 0 /*bmi2*/);
1320
+ NULL, NULL, 0, 0 /*bmi2*/, 0);
770
1321
  }
771
1322
 
772
1323
  /* HUF_compress4X_repeat():
773
1324
  * compress input using 4 streams.
1325
+ * consider skipping quickly
774
1326
  * re-use an existing huffman compression table */
775
1327
  size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
776
1328
  const void* src, size_t srcSize,
777
1329
  unsigned maxSymbolValue, unsigned huffLog,
778
1330
  void* workSpace, size_t wkspSize,
779
- HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
1331
+ HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible)
780
1332
  {
781
1333
  return HUF_compress_internal(dst, dstSize, src, srcSize,
782
1334
  maxSymbolValue, huffLog, HUF_fourStreams,
783
1335
  workSpace, wkspSize,
784
- hufTable, repeat, preferRepeat, bmi2);
1336
+ hufTable, repeat, preferRepeat, bmi2, suspectUncompressible);
1337
+ }
1338
+
1339
+ #ifndef ZSTD_NO_UNUSED_FUNCTIONS
1340
+ /** HUF_buildCTable() :
1341
+ * @return : maxNbBits
1342
+ * Note : count is used before tree is written, so they can safely overlap
1343
+ */
1344
+ size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits)
1345
+ {
1346
+ HUF_buildCTable_wksp_tables workspace;
1347
+ return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, &workspace, sizeof(workspace));
1348
+ }
1349
+
1350
+ size_t HUF_compress1X (void* dst, size_t dstSize,
1351
+ const void* src, size_t srcSize,
1352
+ unsigned maxSymbolValue, unsigned huffLog)
1353
+ {
1354
+ U64 workSpace[HUF_WORKSPACE_SIZE_U64];
1355
+ return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
785
1356
  }
786
1357
 
787
1358
  size_t HUF_compress2 (void* dst, size_t dstSize,
788
1359
  const void* src, size_t srcSize,
789
1360
  unsigned maxSymbolValue, unsigned huffLog)
790
1361
  {
791
- unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
1362
+ U64 workSpace[HUF_WORKSPACE_SIZE_U64];
792
1363
  return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
793
1364
  }
794
1365
 
@@ -796,3 +1367,4 @@ size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSi
796
1367
  {
797
1368
  return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT);
798
1369
  }
1370
+ #endif