extzstd 0.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. checksums.yaml +5 -5
  2. data/HISTORY.ja.md +39 -0
  3. data/README.md +38 -56
  4. data/contrib/zstd/CHANGELOG +613 -0
  5. data/contrib/zstd/CODE_OF_CONDUCT.md +5 -0
  6. data/contrib/zstd/CONTRIBUTING.md +406 -0
  7. data/contrib/zstd/COPYING +339 -0
  8. data/contrib/zstd/Makefile +420 -0
  9. data/contrib/zstd/README.md +179 -41
  10. data/contrib/zstd/TESTING.md +44 -0
  11. data/contrib/zstd/appveyor.yml +292 -0
  12. data/contrib/zstd/lib/BUCK +234 -0
  13. data/contrib/zstd/lib/Makefile +451 -0
  14. data/contrib/zstd/lib/README.md +207 -0
  15. data/contrib/zstd/{common → lib/common}/bitstream.h +187 -138
  16. data/contrib/zstd/lib/common/compiler.h +288 -0
  17. data/contrib/zstd/lib/common/cpu.h +213 -0
  18. data/contrib/zstd/lib/common/debug.c +24 -0
  19. data/contrib/zstd/lib/common/debug.h +107 -0
  20. data/contrib/zstd/lib/common/entropy_common.c +362 -0
  21. data/contrib/zstd/{common → lib/common}/error_private.c +25 -12
  22. data/contrib/zstd/{common → lib/common}/error_private.h +14 -10
  23. data/contrib/zstd/{common → lib/common}/fse.h +173 -92
  24. data/contrib/zstd/{common → lib/common}/fse_decompress.c +149 -85
  25. data/contrib/zstd/lib/common/huf.h +361 -0
  26. data/contrib/zstd/{common → lib/common}/mem.h +115 -59
  27. data/contrib/zstd/lib/common/pool.c +350 -0
  28. data/contrib/zstd/lib/common/pool.h +84 -0
  29. data/contrib/zstd/lib/common/threading.c +122 -0
  30. data/contrib/zstd/lib/common/threading.h +155 -0
  31. data/contrib/zstd/{common → lib/common}/xxhash.c +55 -96
  32. data/contrib/zstd/{common → lib/common}/xxhash.h +23 -47
  33. data/contrib/zstd/lib/common/zstd_common.c +83 -0
  34. data/contrib/zstd/lib/common/zstd_deps.h +111 -0
  35. data/contrib/zstd/lib/common/zstd_errors.h +95 -0
  36. data/contrib/zstd/lib/common/zstd_internal.h +478 -0
  37. data/contrib/zstd/{compress → lib/compress}/fse_compress.c +214 -319
  38. data/contrib/zstd/lib/compress/hist.c +181 -0
  39. data/contrib/zstd/lib/compress/hist.h +75 -0
  40. data/contrib/zstd/lib/compress/huf_compress.c +913 -0
  41. data/contrib/zstd/lib/compress/zstd_compress.c +5208 -0
  42. data/contrib/zstd/lib/compress/zstd_compress_internal.h +1203 -0
  43. data/contrib/zstd/lib/compress/zstd_compress_literals.c +158 -0
  44. data/contrib/zstd/lib/compress/zstd_compress_literals.h +29 -0
  45. data/contrib/zstd/lib/compress/zstd_compress_sequences.c +433 -0
  46. data/contrib/zstd/lib/compress/zstd_compress_sequences.h +54 -0
  47. data/contrib/zstd/lib/compress/zstd_compress_superblock.c +849 -0
  48. data/contrib/zstd/lib/compress/zstd_compress_superblock.h +32 -0
  49. data/contrib/zstd/lib/compress/zstd_cwksp.h +561 -0
  50. data/contrib/zstd/lib/compress/zstd_double_fast.c +521 -0
  51. data/contrib/zstd/lib/compress/zstd_double_fast.h +38 -0
  52. data/contrib/zstd/lib/compress/zstd_fast.c +496 -0
  53. data/contrib/zstd/lib/compress/zstd_fast.h +37 -0
  54. data/contrib/zstd/lib/compress/zstd_lazy.c +1412 -0
  55. data/contrib/zstd/lib/compress/zstd_lazy.h +87 -0
  56. data/contrib/zstd/lib/compress/zstd_ldm.c +660 -0
  57. data/contrib/zstd/lib/compress/zstd_ldm.h +116 -0
  58. data/contrib/zstd/lib/compress/zstd_opt.c +1345 -0
  59. data/contrib/zstd/lib/compress/zstd_opt.h +56 -0
  60. data/contrib/zstd/lib/compress/zstdmt_compress.c +1811 -0
  61. data/contrib/zstd/lib/compress/zstdmt_compress.h +110 -0
  62. data/contrib/zstd/lib/decompress/huf_decompress.c +1350 -0
  63. data/contrib/zstd/lib/decompress/zstd_ddict.c +244 -0
  64. data/contrib/zstd/lib/decompress/zstd_ddict.h +44 -0
  65. data/contrib/zstd/lib/decompress/zstd_decompress.c +1930 -0
  66. data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1540 -0
  67. data/contrib/zstd/lib/decompress/zstd_decompress_block.h +62 -0
  68. data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +190 -0
  69. data/contrib/zstd/{common → lib/deprecated}/zbuff.h +68 -45
  70. data/contrib/zstd/lib/deprecated/zbuff_common.c +26 -0
  71. data/contrib/zstd/lib/deprecated/zbuff_compress.c +147 -0
  72. data/contrib/zstd/lib/deprecated/zbuff_decompress.c +75 -0
  73. data/contrib/zstd/lib/dictBuilder/cover.c +1245 -0
  74. data/contrib/zstd/lib/dictBuilder/cover.h +157 -0
  75. data/contrib/zstd/{dictBuilder → lib/dictBuilder}/divsufsort.c +3 -3
  76. data/contrib/zstd/{dictBuilder → lib/dictBuilder}/divsufsort.h +0 -0
  77. data/contrib/zstd/lib/dictBuilder/fastcover.c +758 -0
  78. data/contrib/zstd/{dictBuilder → lib/dictBuilder}/zdict.c +318 -194
  79. data/contrib/zstd/lib/dictBuilder/zdict.h +305 -0
  80. data/contrib/zstd/{legacy → lib/legacy}/zstd_legacy.h +171 -15
  81. data/contrib/zstd/{legacy → lib/legacy}/zstd_v01.c +191 -124
  82. data/contrib/zstd/{legacy → lib/legacy}/zstd_v01.h +19 -5
  83. data/contrib/zstd/{legacy → lib/legacy}/zstd_v02.c +125 -125
  84. data/contrib/zstd/{legacy → lib/legacy}/zstd_v02.h +19 -5
  85. data/contrib/zstd/{legacy → lib/legacy}/zstd_v03.c +125 -124
  86. data/contrib/zstd/{legacy → lib/legacy}/zstd_v03.h +20 -6
  87. data/contrib/zstd/{legacy → lib/legacy}/zstd_v04.c +151 -299
  88. data/contrib/zstd/{legacy → lib/legacy}/zstd_v04.h +19 -5
  89. data/contrib/zstd/{legacy → lib/legacy}/zstd_v05.c +237 -243
  90. data/contrib/zstd/{legacy → lib/legacy}/zstd_v05.h +19 -6
  91. data/contrib/zstd/{legacy → lib/legacy}/zstd_v06.c +130 -143
  92. data/contrib/zstd/{legacy → lib/legacy}/zstd_v06.h +18 -5
  93. data/contrib/zstd/{legacy → lib/legacy}/zstd_v07.c +158 -157
  94. data/contrib/zstd/{legacy → lib/legacy}/zstd_v07.h +19 -5
  95. data/contrib/zstd/lib/libzstd.pc.in +15 -0
  96. data/contrib/zstd/lib/zstd.h +2391 -0
  97. data/ext/depend +2 -0
  98. data/ext/extconf.rb +15 -6
  99. data/ext/extzstd.c +76 -145
  100. data/ext/extzstd.h +80 -31
  101. data/ext/extzstd_stream.c +417 -142
  102. data/ext/libzstd_conf.h +8 -0
  103. data/ext/zstd_common.c +10 -7
  104. data/ext/zstd_compress.c +14 -5
  105. data/ext/zstd_decompress.c +5 -4
  106. data/ext/zstd_dictbuilder.c +9 -4
  107. data/ext/zstd_dictbuilder_fastcover.c +3 -0
  108. data/ext/zstd_legacy_v01.c +3 -1
  109. data/ext/zstd_legacy_v02.c +3 -1
  110. data/ext/zstd_legacy_v03.c +3 -1
  111. data/ext/zstd_legacy_v04.c +3 -1
  112. data/ext/zstd_legacy_v05.c +3 -1
  113. data/ext/zstd_legacy_v06.c +3 -1
  114. data/ext/zstd_legacy_v07.c +3 -1
  115. data/gemstub.rb +10 -24
  116. data/lib/extzstd.rb +64 -179
  117. data/lib/extzstd/version.rb +6 -1
  118. data/test/test_basic.rb +9 -6
  119. metadata +113 -57
  120. data/HISTORY.ja +0 -5
  121. data/contrib/zstd/common/entropy_common.c +0 -225
  122. data/contrib/zstd/common/huf.h +0 -228
  123. data/contrib/zstd/common/zstd_common.c +0 -83
  124. data/contrib/zstd/common/zstd_errors.h +0 -60
  125. data/contrib/zstd/common/zstd_internal.h +0 -267
  126. data/contrib/zstd/compress/huf_compress.c +0 -533
  127. data/contrib/zstd/compress/zbuff_compress.c +0 -319
  128. data/contrib/zstd/compress/zstd_compress.c +0 -3264
  129. data/contrib/zstd/compress/zstd_opt.h +0 -900
  130. data/contrib/zstd/decompress/huf_decompress.c +0 -883
  131. data/contrib/zstd/decompress/zbuff_decompress.c +0 -252
  132. data/contrib/zstd/decompress/zstd_decompress.c +0 -1842
  133. data/contrib/zstd/dictBuilder/zdict.h +0 -111
  134. data/contrib/zstd/zstd.h +0 -640
@@ -1,79 +1,37 @@
1
1
  /* ******************************************************************
2
- FSE : Finite State Entropy encoder
3
- Copyright (C) 2013-2015, Yann Collet.
4
-
5
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
-
7
- Redistribution and use in source and binary forms, with or without
8
- modification, are permitted provided that the following conditions are
9
- met:
10
-
11
- * Redistributions of source code must retain the above copyright
12
- notice, this list of conditions and the following disclaimer.
13
- * Redistributions in binary form must reproduce the above
14
- copyright notice, this list of conditions and the following disclaimer
15
- in the documentation and/or other materials provided with the
16
- distribution.
17
-
18
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
-
30
- You can contact the author at :
31
- - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
32
- - Public forum : https://groups.google.com/forum/#!forum/lz4c
2
+ * FSE : Finite State Entropy encoder
3
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
4
+ *
5
+ * You can contact the author at :
6
+ * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
7
+ * - Public forum : https://groups.google.com/forum/#!forum/lz4c
8
+ *
9
+ * This source code is licensed under both the BSD-style license (found in the
10
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
11
+ * in the COPYING file in the root directory of this source tree).
12
+ * You may select, at your option, one of the above-listed licenses.
33
13
  ****************************************************************** */
34
14
 
35
- /* **************************************************************
36
- * Compiler specifics
37
- ****************************************************************/
38
- #ifdef _MSC_VER /* Visual Studio */
39
- # define FORCE_INLINE static __forceinline
40
- # include <intrin.h> /* For Visual 2005 */
41
- # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
42
- # pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */
43
- #else
44
- # if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
45
- # ifdef __GNUC__
46
- # define FORCE_INLINE static inline __attribute__((always_inline))
47
- # else
48
- # define FORCE_INLINE static inline
49
- # endif
50
- # else
51
- # define FORCE_INLINE static
52
- # endif /* __STDC_VERSION__ */
53
- #endif
54
-
55
-
56
15
  /* **************************************************************
57
16
  * Includes
58
17
  ****************************************************************/
59
- #include <stdlib.h> /* malloc, free, qsort */
60
- #include <string.h> /* memcpy, memset */
61
- #include <stdio.h> /* printf (debug) */
62
- #include "bitstream.h"
18
+ #include "../common/compiler.h"
19
+ #include "../common/mem.h" /* U32, U16, etc. */
20
+ #include "../common/debug.h" /* assert, DEBUGLOG */
21
+ #include "hist.h" /* HIST_count_wksp */
22
+ #include "../common/bitstream.h"
63
23
  #define FSE_STATIC_LINKING_ONLY
64
- #include "fse.h"
24
+ #include "../common/fse.h"
25
+ #include "../common/error_private.h"
26
+ #define ZSTD_DEPS_NEED_MALLOC
27
+ #define ZSTD_DEPS_NEED_MATH64
28
+ #include "../common/zstd_deps.h" /* ZSTD_malloc, ZSTD_free, ZSTD_memcpy, ZSTD_memset */
65
29
 
66
30
 
67
31
  /* **************************************************************
68
32
  * Error Management
69
33
  ****************************************************************/
70
- #define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
71
-
72
-
73
- /* **************************************************************
74
- * Complex types
75
- ****************************************************************/
76
- typedef U32 CTable_max_t[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
34
+ #define FSE_isError ERR_isError
77
35
 
78
36
 
79
37
  /* **************************************************************
@@ -100,7 +58,15 @@ typedef U32 CTable_max_t[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VA
100
58
 
101
59
 
102
60
  /* Function templates */
103
- size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
61
+
62
+ /* FSE_buildCTable_wksp() :
63
+ * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
64
+ * wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)`
65
+ * workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements
66
+ */
67
+ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
68
+ const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
69
+ void* workSpace, size_t wkspSize)
104
70
  {
105
71
  U32 const tableSize = 1 << tableLog;
106
72
  U32 const tableMask = tableSize - 1;
@@ -109,22 +75,30 @@ size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned
109
75
  void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;
110
76
  FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
111
77
  U32 const step = FSE_TABLESTEP(tableSize);
112
- U32 cumul[FSE_MAX_SYMBOL_VALUE+2];
113
78
 
114
- FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* memset() is not necessary, even if static analyzer complain about it */
79
+ U32* cumul = (U32*)workSpace;
80
+ FSE_FUNCTION_TYPE* tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSymbolValue + 2));
81
+
115
82
  U32 highThreshold = tableSize-1;
116
83
 
84
+ if ((size_t)workSpace & 3) return ERROR(GENERIC); /* Must be 4 byte aligned */
85
+ if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge);
117
86
  /* CTable header */
118
87
  tableU16[-2] = (U16) tableLog;
119
88
  tableU16[-1] = (U16) maxSymbolValue;
89
+ assert(tableLog < 16); /* required for threshold strategy to work */
120
90
 
121
91
  /* For explanations on how to distribute symbol values over the table :
122
- * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
92
+ * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
93
+
94
+ #ifdef __clang_analyzer__
95
+ ZSTD_memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */
96
+ #endif
123
97
 
124
98
  /* symbol start positions */
125
99
  { U32 u;
126
100
  cumul[0] = 0;
127
- for (u=1; u<=maxSymbolValue+1; u++) {
101
+ for (u=1; u <= maxSymbolValue+1; u++) {
128
102
  if (normalizedCounter[u-1]==-1) { /* Low proba symbol */
129
103
  cumul[u] = cumul[u-1] + 1;
130
104
  tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);
@@ -138,14 +112,16 @@ size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned
138
112
  { U32 position = 0;
139
113
  U32 symbol;
140
114
  for (symbol=0; symbol<=maxSymbolValue; symbol++) {
141
- int nbOccurences;
142
- for (nbOccurences=0; nbOccurences<normalizedCounter[symbol]; nbOccurences++) {
115
+ int nbOccurrences;
116
+ int const freq = normalizedCounter[symbol];
117
+ for (nbOccurrences=0; nbOccurrences<freq; nbOccurrences++) {
143
118
  tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
144
119
  position = (position + step) & tableMask;
145
- while (position > highThreshold) position = (position + step) & tableMask; /* Low proba area */
120
+ while (position > highThreshold)
121
+ position = (position + step) & tableMask; /* Low proba area */
146
122
  } }
147
123
 
148
- if (position!=0) return ERROR(GENERIC); /* Must have gone through all positions */
124
+ assert(position==0); /* Must have initialized all positions */
149
125
  }
150
126
 
151
127
  /* Build table */
@@ -160,7 +136,10 @@ size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned
160
136
  for (s=0; s<=maxSymbolValue; s++) {
161
137
  switch (normalizedCounter[s])
162
138
  {
163
- case 0: break;
139
+ case 0:
140
+ /* filling nonetheless, for compatibility with FSE_getMaxNbBits() */
141
+ symbolTT[s].deltaNbBits = ((tableLog+1) << 16) - (1<<tableLog);
142
+ break;
164
143
 
165
144
  case -1:
166
145
  case 1:
@@ -177,27 +156,47 @@ size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned
177
156
  total += normalizedCounter[s];
178
157
  } } } }
179
158
 
159
+ #if 0 /* debug : symbol costs */
160
+ DEBUGLOG(5, "\n --- table statistics : ");
161
+ { U32 symbol;
162
+ for (symbol=0; symbol<=maxSymbolValue; symbol++) {
163
+ DEBUGLOG(5, "%3u: w=%3i, maxBits=%u, fracBits=%.2f",
164
+ symbol, normalizedCounter[symbol],
165
+ FSE_getMaxNbBits(symbolTT, symbol),
166
+ (double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256);
167
+ }
168
+ }
169
+ #endif
170
+
180
171
  return 0;
181
172
  }
182
173
 
174
+ #ifndef ZSTD_NO_UNUSED_FUNCTIONS
175
+ size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
176
+ {
177
+ FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* memset() is not necessary, even if static analyzer complain about it */
178
+ return FSE_buildCTable_wksp(ct, normalizedCounter, maxSymbolValue, tableLog, tableSymbol, sizeof(tableSymbol));
179
+ }
180
+ #endif
181
+
183
182
 
184
183
 
185
184
  #ifndef FSE_COMMONDEFS_ONLY
186
185
 
186
+
187
187
  /*-**************************************************************
188
- * FSE NCount encoding-decoding
188
+ * FSE NCount encoding
189
189
  ****************************************************************/
190
190
  size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
191
191
  {
192
- size_t maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3;
192
+ size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3;
193
193
  return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
194
194
  }
195
195
 
196
- static short FSE_abs(short a) { return (short)(a<0 ? -a : a); }
197
-
198
- static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
199
- const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
200
- unsigned writeIsSafe)
196
+ static size_t
197
+ FSE_writeNCount_generic (void* header, size_t headerBufferSize,
198
+ const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
199
+ unsigned writeIsSafe)
201
200
  {
202
201
  BYTE* const ostart = (BYTE*) header;
203
202
  BYTE* out = ostart;
@@ -206,13 +205,12 @@ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
206
205
  const int tableSize = 1 << tableLog;
207
206
  int remaining;
208
207
  int threshold;
209
- U32 bitStream;
210
- int bitCount;
211
- unsigned charnum = 0;
212
- int previous0 = 0;
208
+ U32 bitStream = 0;
209
+ int bitCount = 0;
210
+ unsigned symbol = 0;
211
+ unsigned const alphabetSize = maxSymbolValue + 1;
212
+ int previousIs0 = 0;
213
213
 
214
- bitStream = 0;
215
- bitCount = 0;
216
214
  /* Table Size */
217
215
  bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount;
218
216
  bitCount += 4;
@@ -222,48 +220,53 @@ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
222
220
  threshold = tableSize;
223
221
  nbBits = tableLog+1;
224
222
 
225
- while (remaining>1) { /* stops at 1 */
226
- if (previous0) {
227
- unsigned start = charnum;
228
- while (!normalizedCounter[charnum]) charnum++;
229
- while (charnum >= start+24) {
223
+ while ((symbol < alphabetSize) && (remaining>1)) { /* stops at 1 */
224
+ if (previousIs0) {
225
+ unsigned start = symbol;
226
+ while ((symbol < alphabetSize) && !normalizedCounter[symbol]) symbol++;
227
+ if (symbol == alphabetSize) break; /* incorrect distribution */
228
+ while (symbol >= start+24) {
230
229
  start+=24;
231
230
  bitStream += 0xFFFFU << bitCount;
232
- if ((!writeIsSafe) && (out > oend-2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */
231
+ if ((!writeIsSafe) && (out > oend-2))
232
+ return ERROR(dstSize_tooSmall); /* Buffer overflow */
233
233
  out[0] = (BYTE) bitStream;
234
234
  out[1] = (BYTE)(bitStream>>8);
235
235
  out+=2;
236
236
  bitStream>>=16;
237
237
  }
238
- while (charnum >= start+3) {
238
+ while (symbol >= start+3) {
239
239
  start+=3;
240
240
  bitStream += 3 << bitCount;
241
241
  bitCount += 2;
242
242
  }
243
- bitStream += (charnum-start) << bitCount;
243
+ bitStream += (symbol-start) << bitCount;
244
244
  bitCount += 2;
245
245
  if (bitCount>16) {
246
- if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */
246
+ if ((!writeIsSafe) && (out > oend - 2))
247
+ return ERROR(dstSize_tooSmall); /* Buffer overflow */
247
248
  out[0] = (BYTE)bitStream;
248
249
  out[1] = (BYTE)(bitStream>>8);
249
250
  out += 2;
250
251
  bitStream >>= 16;
251
252
  bitCount -= 16;
252
253
  } }
253
- { short count = normalizedCounter[charnum++];
254
- const short max = (short)((2*threshold-1)-remaining);
255
- remaining -= FSE_abs(count);
256
- if (remaining<1) return ERROR(GENERIC);
254
+ { int count = normalizedCounter[symbol++];
255
+ int const max = (2*threshold-1) - remaining;
256
+ remaining -= count < 0 ? -count : count;
257
257
  count++; /* +1 for extra accuracy */
258
- if (count>=threshold) count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
258
+ if (count>=threshold)
259
+ count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
259
260
  bitStream += count << bitCount;
260
261
  bitCount += nbBits;
261
262
  bitCount -= (count<max);
262
- previous0 = (count==1);
263
- while (remaining<threshold) nbBits--, threshold>>=1;
263
+ previousIs0 = (count==1);
264
+ if (remaining<1) return ERROR(GENERIC);
265
+ while (remaining<threshold) { nbBits--; threshold>>=1; }
264
266
  }
265
267
  if (bitCount>16) {
266
- if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */
268
+ if ((!writeIsSafe) && (out > oend - 2))
269
+ return ERROR(dstSize_tooSmall); /* Buffer overflow */
267
270
  out[0] = (BYTE)bitStream;
268
271
  out[1] = (BYTE)(bitStream>>8);
269
272
  out += 2;
@@ -271,200 +274,67 @@ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
271
274
  bitCount -= 16;
272
275
  } }
273
276
 
277
+ if (remaining != 1)
278
+ return ERROR(GENERIC); /* incorrect normalized distribution */
279
+ assert(symbol <= alphabetSize);
280
+
274
281
  /* flush remaining bitStream */
275
- if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */
282
+ if ((!writeIsSafe) && (out > oend - 2))
283
+ return ERROR(dstSize_tooSmall); /* Buffer overflow */
276
284
  out[0] = (BYTE)bitStream;
277
285
  out[1] = (BYTE)(bitStream>>8);
278
286
  out+= (bitCount+7) /8;
279
287
 
280
- if (charnum > maxSymbolValue + 1) return ERROR(GENERIC);
281
-
282
288
  return (out-ostart);
283
289
  }
284
290
 
285
291
 
286
- size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
292
+ size_t FSE_writeNCount (void* buffer, size_t bufferSize,
293
+ const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
287
294
  {
288
- if (tableLog > FSE_MAX_TABLELOG) return ERROR(GENERIC); /* Unsupported */
295
+ if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported */
289
296
  if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported */
290
297
 
291
298
  if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
292
299
  return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
293
300
 
294
- return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1);
301
+ return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1 /* write in buffer is safe */);
295
302
  }
296
303
 
297
304
 
298
-
299
- /*-**************************************************************
300
- * Counting histogram
301
- ****************************************************************/
302
- /*! FSE_count_simple
303
- This function just counts byte values within `src`,
304
- and store the histogram into table `count`.
305
- This function is unsafe : it doesn't check that all values within `src` can fit into `count`.
306
- For this reason, prefer using a table `count` with 256 elements.
307
- @return : count of most numerous element
308
- */
309
- static size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
310
- const void* src, size_t srcSize)
311
- {
312
- const BYTE* ip = (const BYTE*)src;
313
- const BYTE* const end = ip + srcSize;
314
- unsigned maxSymbolValue = *maxSymbolValuePtr;
315
- unsigned max=0;
316
-
317
-
318
- memset(count, 0, (maxSymbolValue+1)*sizeof(*count));
319
- if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
320
-
321
- while (ip<end) count[*ip++]++;
322
-
323
- while (!count[maxSymbolValue]) maxSymbolValue--;
324
- *maxSymbolValuePtr = maxSymbolValue;
325
-
326
- { U32 s; for (s=0; s<=maxSymbolValue; s++) if (count[s] > max) max = count[s]; }
327
-
328
- return (size_t)max;
329
- }
330
-
331
-
332
- static size_t FSE_count_parallel(unsigned* count, unsigned* maxSymbolValuePtr,
333
- const void* source, size_t sourceSize,
334
- unsigned checkMax)
335
- {
336
- const BYTE* ip = (const BYTE*)source;
337
- const BYTE* const iend = ip+sourceSize;
338
- unsigned maxSymbolValue = *maxSymbolValuePtr;
339
- unsigned max=0;
340
-
341
-
342
- U32 Counting1[256] = { 0 };
343
- U32 Counting2[256] = { 0 };
344
- U32 Counting3[256] = { 0 };
345
- U32 Counting4[256] = { 0 };
346
-
347
- /* safety checks */
348
- if (!sourceSize) {
349
- memset(count, 0, maxSymbolValue + 1);
350
- *maxSymbolValuePtr = 0;
351
- return 0;
352
- }
353
- if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */
354
-
355
- /* by stripes of 16 bytes */
356
- { U32 cached = MEM_read32(ip); ip += 4;
357
- while (ip < iend-15) {
358
- U32 c = cached; cached = MEM_read32(ip); ip += 4;
359
- Counting1[(BYTE) c ]++;
360
- Counting2[(BYTE)(c>>8) ]++;
361
- Counting3[(BYTE)(c>>16)]++;
362
- Counting4[ c>>24 ]++;
363
- c = cached; cached = MEM_read32(ip); ip += 4;
364
- Counting1[(BYTE) c ]++;
365
- Counting2[(BYTE)(c>>8) ]++;
366
- Counting3[(BYTE)(c>>16)]++;
367
- Counting4[ c>>24 ]++;
368
- c = cached; cached = MEM_read32(ip); ip += 4;
369
- Counting1[(BYTE) c ]++;
370
- Counting2[(BYTE)(c>>8) ]++;
371
- Counting3[(BYTE)(c>>16)]++;
372
- Counting4[ c>>24 ]++;
373
- c = cached; cached = MEM_read32(ip); ip += 4;
374
- Counting1[(BYTE) c ]++;
375
- Counting2[(BYTE)(c>>8) ]++;
376
- Counting3[(BYTE)(c>>16)]++;
377
- Counting4[ c>>24 ]++;
378
- }
379
- ip-=4;
380
- }
381
-
382
- /* finish last symbols */
383
- while (ip<iend) Counting1[*ip++]++;
384
-
385
- if (checkMax) { /* verify stats will fit into destination table */
386
- U32 s; for (s=255; s>maxSymbolValue; s--) {
387
- Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
388
- if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
389
- } }
390
-
391
- { U32 s; for (s=0; s<=maxSymbolValue; s++) {
392
- count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
393
- if (count[s] > max) max = count[s];
394
- }}
395
-
396
- while (!count[maxSymbolValue]) maxSymbolValue--;
397
- *maxSymbolValuePtr = maxSymbolValue;
398
- return (size_t)max;
399
- }
400
-
401
- /* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
402
- size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
403
- const void* source, size_t sourceSize)
404
- {
405
- if (sourceSize < 1500) return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize);
406
- return FSE_count_parallel(count, maxSymbolValuePtr, source, sourceSize, 0);
407
- }
408
-
409
- size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr,
410
- const void* source, size_t sourceSize)
411
- {
412
- if (*maxSymbolValuePtr <255)
413
- return FSE_count_parallel(count, maxSymbolValuePtr, source, sourceSize, 1);
414
- *maxSymbolValuePtr = 255;
415
- return FSE_countFast(count, maxSymbolValuePtr, source, sourceSize);
416
- }
417
-
418
-
419
-
420
305
  /*-**************************************************************
421
306
  * FSE Compression Code
422
307
  ****************************************************************/
423
- /*! FSE_sizeof_CTable() :
424
- FSE_CTable is a variable size structure which contains :
425
- `U16 tableLog;`
426
- `U16 maxSymbolValue;`
427
- `U16 nextStateNumber[1 << tableLog];` // This size is variable
428
- `FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];` // This size is variable
429
- Allocation is manual (C standard does not support variable-size structures).
430
- */
431
-
432
- size_t FSE_sizeof_CTable (unsigned maxSymbolValue, unsigned tableLog)
433
- {
434
- size_t size;
435
- FSE_STATIC_ASSERT((size_t)FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)*4 >= sizeof(CTable_max_t)); /* A compilation error here means FSE_CTABLE_SIZE_U32 is not large enough */
436
- if (tableLog > FSE_MAX_TABLELOG) return ERROR(GENERIC);
437
- size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
438
- return size;
439
- }
440
308
 
441
309
  FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
442
310
  {
443
311
  size_t size;
444
312
  if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
445
313
  size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
446
- return (FSE_CTable*)malloc(size);
314
+ return (FSE_CTable*)ZSTD_malloc(size);
447
315
  }
448
316
 
449
- void FSE_freeCTable (FSE_CTable* ct) { free(ct); }
317
+ void FSE_freeCTable (FSE_CTable* ct) { ZSTD_free(ct); }
450
318
 
451
319
  /* provides the minimum logSize to safely represent a distribution */
452
320
  static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
453
321
  {
454
- U32 minBitsSrc = BIT_highbit32((U32)(srcSize - 1)) + 1;
455
- U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
456
- U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
457
- return minBits;
322
+ U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1;
323
+ U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
324
+ U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
325
+ assert(srcSize > 1); /* Not supported, RLE should be used instead */
326
+ return minBits;
458
327
  }
459
328
 
460
329
  unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
461
330
  {
462
- U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
331
+ U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
463
332
  U32 tableLog = maxTableLog;
464
- U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
333
+ U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
334
+ assert(srcSize > 1); /* Not supported, RLE should be used instead */
465
335
  if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
466
- if (maxBitsSrc < tableLog) tableLog = maxBitsSrc; /* Accuracy can be reduced */
467
- if (minBits > tableLog) tableLog = minBits; /* Need a minimum to safely represent all symbol values */
336
+ if (maxBitsSrc < tableLog) tableLog = maxBitsSrc; /* Accuracy can be reduced */
337
+ if (minBits > tableLog) tableLog = minBits; /* Need a minimum to safely represent all symbol values */
468
338
  if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG;
469
339
  if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG;
470
340
  return tableLog;
@@ -475,18 +345,18 @@ unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS
475
345
  return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2);
476
346
  }
477
347
 
478
-
479
348
  /* Secondary normalization method.
480
349
  To be used when primary method fails. */
481
350
 
482
- static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue)
351
+ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue, short lowProbCount)
483
352
  {
353
+ short const NOT_YET_ASSIGNED = -2;
484
354
  U32 s;
485
355
  U32 distributed = 0;
486
356
  U32 ToDistribute;
487
357
 
488
358
  /* Init */
489
- U32 lowThreshold = (U32)(total >> tableLog);
359
+ U32 const lowThreshold = (U32)(total >> tableLog);
490
360
  U32 lowOne = (U32)((total * 3) >> (tableLog + 1));
491
361
 
492
362
  for (s=0; s<=maxSymbolValue; s++) {
@@ -495,7 +365,7 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
495
365
  continue;
496
366
  }
497
367
  if (count[s] <= lowThreshold) {
498
- norm[s] = -1;
368
+ norm[s] = lowProbCount;
499
369
  distributed++;
500
370
  total -= count[s];
501
371
  continue;
@@ -506,15 +376,19 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
506
376
  total -= count[s];
507
377
  continue;
508
378
  }
509
- norm[s]=-2;
379
+
380
+ norm[s]=NOT_YET_ASSIGNED;
510
381
  }
511
382
  ToDistribute = (1 << tableLog) - distributed;
512
383
 
384
+ if (ToDistribute == 0)
385
+ return 0;
386
+
513
387
  if ((total / ToDistribute) > lowOne) {
514
388
  /* risk of rounding to zero */
515
389
  lowOne = (U32)((total * 3) / (ToDistribute * 2));
516
390
  for (s=0; s<=maxSymbolValue; s++) {
517
- if ((norm[s] == -2) && (count[s] <= lowOne)) {
391
+ if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) {
518
392
  norm[s] = 1;
519
393
  distributed++;
520
394
  total -= count[s];
@@ -529,22 +403,28 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
529
403
  find max, then give all remaining points to max */
530
404
  U32 maxV = 0, maxC = 0;
531
405
  for (s=0; s<=maxSymbolValue; s++)
532
- if (count[s] > maxC) maxV=s, maxC=count[s];
406
+ if (count[s] > maxC) { maxV=s; maxC=count[s]; }
533
407
  norm[maxV] += (short)ToDistribute;
534
408
  return 0;
535
409
  }
536
410
 
537
- {
538
- U64 const vStepLog = 62 - tableLog;
411
+ if (total == 0) {
412
+ /* all of the symbols were low enough for the lowOne or lowThreshold */
413
+ for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1))
414
+ if (norm[s] > 0) { ToDistribute--; norm[s]++; }
415
+ return 0;
416
+ }
417
+
418
+ { U64 const vStepLog = 62 - tableLog;
539
419
  U64 const mid = (1ULL << (vStepLog-1)) - 1;
540
- U64 const rStep = ((((U64)1<<vStepLog) * ToDistribute) + mid) / total; /* scale on remaining */
420
+ U64 const rStep = ZSTD_div64((((U64)1<<vStepLog) * ToDistribute) + mid, (U32)total); /* scale on remaining */
541
421
  U64 tmpTotal = mid;
542
422
  for (s=0; s<=maxSymbolValue; s++) {
543
- if (norm[s]==-2) {
544
- U64 end = tmpTotal + (count[s] * rStep);
545
- U32 sStart = (U32)(tmpTotal >> vStepLog);
546
- U32 sEnd = (U32)(end >> vStepLog);
547
- U32 weight = sEnd - sStart;
423
+ if (norm[s]==NOT_YET_ASSIGNED) {
424
+ U64 const end = tmpTotal + (count[s] * rStep);
425
+ U32 const sStart = (U32)(tmpTotal >> vStepLog);
426
+ U32 const sEnd = (U32)(end >> vStepLog);
427
+ U32 const weight = sEnd - sStart;
548
428
  if (weight < 1)
549
429
  return ERROR(GENERIC);
550
430
  norm[s] = (short)weight;
@@ -554,10 +434,9 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
554
434
  return 0;
555
435
  }
556
436
 
557
-
558
437
  size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
559
438
  const unsigned* count, size_t total,
560
- unsigned maxSymbolValue)
439
+ unsigned maxSymbolValue, unsigned useLowProbCount)
561
440
  {
562
441
  /* Sanity checks */
563
442
  if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
@@ -565,10 +444,10 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
565
444
  if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported size */
566
445
  if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */
567
446
 
568
- { U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
569
-
447
+ { static U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
448
+ short const lowProbCount = useLowProbCount ? -1 : 1;
570
449
  U64 const scale = 62 - tableLog;
571
- U64 const step = ((U64)1<<62) / total; /* <== here, one division ! */
450
+ U64 const step = ZSTD_div64((U64)1<<62, (U32)total); /* <== here, one division ! */
572
451
  U64 const vStep = 1ULL<<(scale-20);
573
452
  int stillToDistribute = 1<<tableLog;
574
453
  unsigned s;
@@ -580,7 +459,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
580
459
  if (count[s] == total) return 0; /* rle special case */
581
460
  if (count[s] == 0) { normalizedCounter[s]=0; continue; }
582
461
  if (count[s] <= lowThreshold) {
583
- normalizedCounter[s] = -1;
462
+ normalizedCounter[s] = lowProbCount;
584
463
  stillToDistribute--;
585
464
  } else {
586
465
  short proba = (short)((count[s]*step) >> scale);
@@ -588,13 +467,13 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
588
467
  U64 restToBeat = vStep * rtbTable[proba];
589
468
  proba += (count[s]*step) - ((U64)proba<<scale) > restToBeat;
590
469
  }
591
- if (proba > largestP) largestP=proba, largest=s;
470
+ if (proba > largestP) { largestP=proba; largest=s; }
592
471
  normalizedCounter[s] = proba;
593
472
  stillToDistribute -= proba;
594
473
  } }
595
474
  if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) {
596
475
  /* corner case, need another normalization method */
597
- size_t errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue);
476
+ size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue, lowProbCount);
598
477
  if (FSE_isError(errorCode)) return errorCode;
599
478
  }
600
479
  else normalizedCounter[largest] += (short)stillToDistribute;
@@ -605,11 +484,11 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
605
484
  U32 s;
606
485
  U32 nTotal = 0;
607
486
  for (s=0; s<=maxSymbolValue; s++)
608
- printf("%3i: %4i \n", s, normalizedCounter[s]);
487
+ RAWLOG(2, "%3i: %4i \n", s, normalizedCounter[s]);
609
488
  for (s=0; s<=maxSymbolValue; s++)
610
489
  nTotal += abs(normalizedCounter[s]);
611
490
  if (nTotal != (1U<<tableLog))
612
- printf("Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);
491
+ RAWLOG(2, "Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);
613
492
  getchar();
614
493
  }
615
494
  #endif
@@ -643,17 +522,15 @@ size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits)
643
522
 
644
523
  /* Build Symbol Transformation Table */
645
524
  { const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
646
-
647
525
  for (s=0; s<=maxSymbolValue; s++) {
648
526
  symbolTT[s].deltaNbBits = deltaNbBits;
649
527
  symbolTT[s].deltaFindState = s-1;
650
528
  } }
651
529
 
652
-
653
530
  return 0;
654
531
  }
655
532
 
656
- /* fake FSE_CTable, for rle (100% always same symbol) input */
533
+ /* fake FSE_CTable, for rle input (always same symbol) */
657
534
  size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)
658
535
  {
659
536
  void* ptr = ct;
@@ -685,14 +562,13 @@ static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize,
685
562
  const BYTE* const iend = istart + srcSize;
686
563
  const BYTE* ip=iend;
687
564
 
688
-
689
565
  BIT_CStream_t bitC;
690
566
  FSE_CState_t CState1, CState2;
691
567
 
692
568
  /* init */
693
569
  if (srcSize <= 2) return 0;
694
- { size_t const errorCode = BIT_initCStream(&bitC, dst, dstSize);
695
- if (FSE_isError(errorCode)) return 0; }
570
+ { size_t const initError = BIT_initCStream(&bitC, dst, dstSize);
571
+ if (FSE_isError(initError)) return 0; /* not enough space available to write a bitstream */ }
696
572
 
697
573
  #define FSE_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
698
574
 
@@ -715,7 +591,7 @@ static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize,
715
591
  }
716
592
 
717
593
  /* 2 or 4 encoding per loop */
718
- for ( ; ip>istart ; ) {
594
+ while ( ip>istart ) {
719
595
 
720
596
  FSE_encodeSymbol(&bitC, &CState2, *--ip);
721
597
 
@@ -741,7 +617,7 @@ size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
741
617
  const void* src, size_t srcSize,
742
618
  const FSE_CTable* ct)
743
619
  {
744
- const unsigned fast = (dstSize >= FSE_BLOCKBOUND(srcSize));
620
+ unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize));
745
621
 
746
622
  if (fast)
747
623
  return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1);
@@ -752,59 +628,78 @@ size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
752
628
 
753
629
  size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
754
630
 
755
- size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
631
+ #ifndef ZSTD_NO_UNUSED_FUNCTIONS
632
+ /* FSE_compress_wksp() :
633
+ * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
634
+ * `wkspSize` size must be `(1<<tableLog)`.
635
+ */
636
+ size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
756
637
  {
757
- const BYTE* const istart = (const BYTE*) src;
758
- const BYTE* ip = istart;
759
-
760
638
  BYTE* const ostart = (BYTE*) dst;
761
639
  BYTE* op = ostart;
762
640
  BYTE* const oend = ostart + dstSize;
763
641
 
764
- U32 count[FSE_MAX_SYMBOL_VALUE+1];
642
+ unsigned count[FSE_MAX_SYMBOL_VALUE+1];
765
643
  S16 norm[FSE_MAX_SYMBOL_VALUE+1];
766
- CTable_max_t ct;
767
- size_t errorCode;
644
+ FSE_CTable* CTable = (FSE_CTable*)workSpace;
645
+ size_t const CTableSize = FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue);
646
+ void* scratchBuffer = (void*)(CTable + CTableSize);
647
+ size_t const scratchBufferSize = wkspSize - (CTableSize * sizeof(FSE_CTable));
768
648
 
769
649
  /* init conditions */
770
- if (srcSize <= 1) return 0; /* Uncompressible */
650
+ if (wkspSize < FSE_COMPRESS_WKSP_SIZE_U32(tableLog, maxSymbolValue)) return ERROR(tableLog_tooLarge);
651
+ if (srcSize <= 1) return 0; /* Not compressible */
771
652
  if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
772
653
  if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
773
654
 
774
655
  /* Scan input and build symbol stats */
775
- errorCode = FSE_count (count, &maxSymbolValue, ip, srcSize);
776
- if (FSE_isError(errorCode)) return errorCode;
777
- if (errorCode == srcSize) return 1;
778
- if (errorCode == 1) return 0; /* each symbol only present once */
779
- if (errorCode < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */
656
+ { CHECK_V_F(maxCount, HIST_count_wksp(count, &maxSymbolValue, src, srcSize, scratchBuffer, scratchBufferSize) );
657
+ if (maxCount == srcSize) return 1; /* only a single symbol in src : rle */
658
+ if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
659
+ if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */
660
+ }
780
661
 
781
662
  tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
782
- errorCode = FSE_normalizeCount (norm, tableLog, count, srcSize, maxSymbolValue);
783
- if (FSE_isError(errorCode)) return errorCode;
663
+ CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue, /* useLowProbCount */ srcSize >= 2048) );
784
664
 
785
665
  /* Write table description header */
786
- errorCode = FSE_writeNCount (op, oend-op, norm, maxSymbolValue, tableLog);
787
- if (FSE_isError(errorCode)) return errorCode;
788
- op += errorCode;
666
+ { CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) );
667
+ op += nc_err;
668
+ }
789
669
 
790
670
  /* Compress */
791
- errorCode = FSE_buildCTable (ct, norm, maxSymbolValue, tableLog);
792
- if (FSE_isError(errorCode)) return errorCode;
793
- errorCode = FSE_compress_usingCTable(op, oend - op, ip, srcSize, ct);
794
- if (errorCode == 0) return 0; /* not enough space for compressed data */
795
- op += errorCode;
671
+ CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, scratchBufferSize) );
672
+ { CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, src, srcSize, CTable) );
673
+ if (cSize == 0) return 0; /* not enough space for compressed data */
674
+ op += cSize;
675
+ }
796
676
 
797
677
  /* check compressibility */
798
- if ( (size_t)(op-ostart) >= srcSize-1 )
799
- return 0;
678
+ if ( (size_t)(op-ostart) >= srcSize-1 ) return 0;
800
679
 
801
680
  return op-ostart;
802
681
  }
803
682
 
804
- size_t FSE_compress (void* dst, size_t dstSize, const void* src, size_t srcSize)
683
+ typedef struct {
684
+ FSE_CTable CTable_max[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
685
+ union {
686
+ U32 hist_wksp[HIST_WKSP_SIZE_U32];
687
+ BYTE scratchBuffer[1 << FSE_MAX_TABLELOG];
688
+ } workspace;
689
+ } fseWkspMax_t;
690
+
691
+ size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
805
692
  {
806
- return FSE_compress2(dst, dstSize, src, (U32)srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
693
+ fseWkspMax_t scratchBuffer;
694
+ DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_COMPRESS_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */
695
+ if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
696
+ return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer));
807
697
  }
808
698
 
699
+ size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
700
+ {
701
+ return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
702
+ }
703
+ #endif
809
704
 
810
705
  #endif /* FSE_COMMONDEFS_ONLY */