extzstd 0.0.2.CONCEPT → 0.0.3.CONCEPT

Sign up to get free protection for your applications and to get access to all the features.
Files changed (68) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +72 -22
  3. data/Rakefile +73 -19
  4. data/contrib/zstd/README.md +68 -0
  5. data/contrib/zstd/common/bitstream.h +414 -0
  6. data/contrib/zstd/common/entropy_common.c +231 -0
  7. data/contrib/zstd/common/error_private.h +125 -0
  8. data/contrib/zstd/common/error_public.h +77 -0
  9. data/contrib/zstd/common/fse.h +628 -0
  10. data/contrib/zstd/common/fse_decompress.c +331 -0
  11. data/contrib/zstd/common/huf.h +228 -0
  12. data/contrib/zstd/common/mem.h +377 -0
  13. data/contrib/zstd/common/xxhash.c +854 -0
  14. data/contrib/zstd/common/xxhash.h +273 -0
  15. data/contrib/zstd/common/zbuff.h +197 -0
  16. data/contrib/zstd/common/zstd.h +475 -0
  17. data/contrib/zstd/common/zstd_common.c +91 -0
  18. data/contrib/zstd/common/zstd_internal.h +238 -0
  19. data/contrib/zstd/compress/fse_compress.c +807 -0
  20. data/contrib/zstd/compress/huf_compress.c +577 -0
  21. data/contrib/zstd/compress/zbuff_compress.c +327 -0
  22. data/contrib/zstd/compress/zstd_compress.c +3074 -0
  23. data/contrib/zstd/compress/zstd_opt.h +1046 -0
  24. data/contrib/zstd/decompress/huf_decompress.c +894 -0
  25. data/contrib/zstd/decompress/zbuff_decompress.c +294 -0
  26. data/contrib/zstd/decompress/zstd_decompress.c +1362 -0
  27. data/contrib/zstd/dictBuilder/divsufsort.c +1913 -0
  28. data/contrib/zstd/dictBuilder/divsufsort.h +67 -0
  29. data/contrib/zstd/dictBuilder/zdict.c +1045 -0
  30. data/contrib/zstd/dictBuilder/zdict.h +113 -0
  31. data/contrib/zstd/legacy/zstd_legacy.h +140 -0
  32. data/contrib/zstd/legacy/zstd_v01.c +2178 -0
  33. data/contrib/zstd/{zstd.h → legacy/zstd_v01.h} +46 -39
  34. data/contrib/zstd/legacy/zstd_v02.c +3748 -0
  35. data/contrib/zstd/legacy/zstd_v02.h +99 -0
  36. data/contrib/zstd/legacy/zstd_v03.c +3389 -0
  37. data/contrib/zstd/legacy/zstd_v03.h +99 -0
  38. data/contrib/zstd/legacy/zstd_v04.c +4056 -0
  39. data/contrib/zstd/legacy/zstd_v04.h +148 -0
  40. data/contrib/zstd/legacy/zstd_v05.c +4325 -0
  41. data/contrib/zstd/legacy/zstd_v05.h +171 -0
  42. data/contrib/zstd/legacy/zstd_v06.c +4581 -0
  43. data/contrib/zstd/legacy/zstd_v06.h +185 -0
  44. data/ext/extconf.rb +10 -12
  45. data/ext/extzstd.c +497 -144
  46. data/ext/extzstd.h +127 -22
  47. data/ext/extzstd_buffered.c +265 -0
  48. data/ext/extzstd_nogvls.h +174 -0
  49. data/ext/zstd_amalgam.c +18 -0
  50. data/ext/zstd_legacy_v01.c +1 -0
  51. data/ext/zstd_legacy_v02.c +1 -0
  52. data/ext/zstd_legacy_v03.c +1 -0
  53. data/ext/zstd_legacy_v04.c +1 -0
  54. data/ext/zstd_legacy_v05.c +1 -0
  55. data/ext/zstd_legacy_v06.c +1 -0
  56. data/gemstub.rb +17 -1
  57. data/lib/extzstd.rb +197 -77
  58. data/lib/extzstd/version.rb +1 -1
  59. data/test/test_basic.rb +41 -0
  60. metadata +70 -20
  61. data/contrib/zstd/Makefile +0 -115
  62. data/contrib/zstd/fse.c +0 -2466
  63. data/contrib/zstd/fse.h +0 -320
  64. data/contrib/zstd/fse_static.h +0 -282
  65. data/contrib/zstd/libzstd.pc.in +0 -14
  66. data/contrib/zstd/zstd.c +0 -1768
  67. data/contrib/zstd/zstd_static.h +0 -89
  68. data/ext/extzstd-stream.c +0 -398
@@ -0,0 +1,91 @@
1
+ /*
2
+ Common functions of Zstd compression library
3
+ Copyright (C) 2015-2016, Yann Collet.
4
+
5
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
+
7
+ Redistribution and use in source and binary forms, with or without
8
+ modification, are permitted provided that the following conditions are
9
+ met:
10
+ * Redistributions of source code must retain the above copyright
11
+ notice, this list of conditions and the following disclaimer.
12
+ * Redistributions in binary form must reproduce the above
13
+ copyright notice, this list of conditions and the following disclaimer
14
+ in the documentation and/or other materials provided with the
15
+ distribution.
16
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
+
28
+ You can contact the author at :
29
+ - zstd homepage : http://www.zstd.net/
30
+ */
31
+
32
+
33
+ /*-*************************************
34
+ * Dependencies
35
+ ***************************************/
36
+ #include <stdlib.h> /* malloc */
37
+ #include "error_private.h"
38
+ #define ZSTD_STATIC_LINKING_ONLY
39
+ #include "zstd.h" /* declaration of ZSTD_isError, ZSTD_getErrorName, ZSTD_getErrorCode, ZSTD_getErrorString, ZSTD_versionNumber */
40
+ #include "zbuff.h" /* declaration of ZBUFF_isError, ZBUFF_getErrorName */
41
+
42
+
43
+ /*-****************************************
44
+ * Version
45
+ ******************************************/
46
+ unsigned ZSTD_versionNumber (void) { return ZSTD_VERSION_NUMBER; }
47
+
48
+
49
+ /*-****************************************
50
+ * ZSTD Error Management
51
+ ******************************************/
52
+ /*! ZSTD_isError() :
53
+ * tells if a return value is an error code */
54
+ unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
55
+
56
+ /*! ZSTD_getErrorName() :
57
+ * provides error code string from function result (useful for debugging) */
58
+ const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); }
59
+
60
+ /*! ZSTD_getError() :
61
+ * convert a `size_t` function result into a proper ZSTD_errorCode enum */
62
+ ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); }
63
+
64
+ /*! ZSTD_getErrorString() :
65
+ * provides error code string from enum */
66
+ const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorName(code); }
67
+
68
+
69
+ /* **************************************************************
70
+ * ZBUFF Error Management
71
+ ****************************************************************/
72
+ unsigned ZBUFF_isError(size_t errorCode) { return ERR_isError(errorCode); }
73
+
74
+ const char* ZBUFF_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
75
+
76
+
77
+
78
+ void* ZSTD_defaultAllocFunction(void* opaque, size_t size)
79
+ {
80
+ void* address = malloc(size);
81
+ (void)opaque;
82
+ /* printf("alloc %p, %d opaque=%p \n", address, (int)size, opaque); */
83
+ return address;
84
+ }
85
+
86
+ void ZSTD_defaultFreeFunction(void* opaque, void* address)
87
+ {
88
+ (void)opaque;
89
+ /* if (address) printf("free %p opaque=%p \n", address, opaque); */
90
+ free(address);
91
+ }
@@ -0,0 +1,238 @@
1
+ /*
2
+ zstd_internal - common functions to include
3
+ Header File for include
4
+ Copyright (C) 2014-2016, Yann Collet.
5
+
6
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
+
8
+ Redistribution and use in source and binary forms, with or without
9
+ modification, are permitted provided that the following conditions are
10
+ met:
11
+ * Redistributions of source code must retain the above copyright
12
+ notice, this list of conditions and the following disclaimer.
13
+ * Redistributions in binary form must reproduce the above
14
+ copyright notice, this list of conditions and the following disclaimer
15
+ in the documentation and/or other materials provided with the
16
+ distribution.
17
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
+
29
+ You can contact the author at :
30
+ - zstd homepage : https://www.zstd.net
31
+ */
32
+ #ifndef ZSTD_CCOMMON_H_MODULE
33
+ #define ZSTD_CCOMMON_H_MODULE
34
+
35
+ /*-*************************************
36
+ * Dependencies
37
+ ***************************************/
38
+ #include "mem.h"
39
+ #include "error_private.h"
40
+ #define ZSTD_STATIC_LINKING_ONLY
41
+ #include "zstd.h"
42
+
43
+
44
+ /*-*************************************
45
+ * Common macros
46
+ ***************************************/
47
+ #define MIN(a,b) ((a)<(b) ? (a) : (b))
48
+ #define MAX(a,b) ((a)>(b) ? (a) : (b))
49
+
50
+
51
+ /*-*************************************
52
+ * Common constants
53
+ ***************************************/
54
+ #define ZSTD_OPT_DEBUG 0 /* 3 = compression stats; 5 = check encoded sequences; 9 = full logs */
55
+ #include <stdio.h>
56
+ #if defined(ZSTD_OPT_DEBUG) && ZSTD_OPT_DEBUG>=9
57
+ #define ZSTD_LOG_PARSER(...) printf(__VA_ARGS__)
58
+ #define ZSTD_LOG_ENCODE(...) printf(__VA_ARGS__)
59
+ #define ZSTD_LOG_BLOCK(...) printf(__VA_ARGS__)
60
+ #else
61
+ #define ZSTD_LOG_PARSER(...)
62
+ #define ZSTD_LOG_ENCODE(...)
63
+ #define ZSTD_LOG_BLOCK(...)
64
+ #endif
65
+
66
+ #define ZSTD_OPT_NUM (1<<12)
67
+ #define ZSTD_DICT_MAGIC 0xEC30A437 /* v0.7 */
68
+
69
+ #define ZSTD_REP_NUM 3
70
+ #define ZSTD_REP_INIT ZSTD_REP_NUM
71
+ #define ZSTD_REP_MOVE (ZSTD_REP_NUM-1)
72
+ static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
73
+
74
+ #define KB *(1 <<10)
75
+ #define MB *(1 <<20)
76
+ #define GB *(1U<<30)
77
+
78
+ #define BIT7 128
79
+ #define BIT6 64
80
+ #define BIT5 32
81
+ #define BIT4 16
82
+ #define BIT1 2
83
+ #define BIT0 1
84
+
85
+ #define ZSTD_WINDOWLOG_ABSOLUTEMIN 10
86
+ static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
87
+ static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
88
+
89
+ #define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
90
+ static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
91
+ typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
92
+
93
+ #define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
94
+ #define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */
95
+
96
+ #define HufLog 12
97
+ typedef enum { lbt_huffman, lbt_repeat, lbt_raw, lbt_rle } litBlockType_t;
98
+
99
+ #define LONGNBSEQ 0x7F00
100
+
101
+ #define MINMATCH 3
102
+ #define EQUAL_READ32 4
103
+
104
+ #define Litbits 8
105
+ #define MaxLit ((1<<Litbits) - 1)
106
+ #define MaxML 52
107
+ #define MaxLL 35
108
+ #define MaxOff 28
109
+ #define MaxSeq MAX(MaxLL, MaxML) /* Assumption : MaxOff < MaxLL,MaxML */
110
+ #define MLFSELog 9
111
+ #define LLFSELog 9
112
+ #define OffFSELog 8
113
+
114
+ #define FSE_ENCODING_RAW 0
115
+ #define FSE_ENCODING_RLE 1
116
+ #define FSE_ENCODING_STATIC 2
117
+ #define FSE_ENCODING_DYNAMIC 3
118
+
119
+ static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
120
+ 1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9,10,11,12,
121
+ 13,14,15,16 };
122
+ static const S16 LL_defaultNorm[MaxLL+1] = { 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1,
123
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1,
124
+ -1,-1,-1,-1 };
125
+ static const U32 LL_defaultNormLog = 6;
126
+
127
+ static const U32 ML_bits[MaxML+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
128
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
129
+ 1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 7, 8, 9,10,11,
130
+ 12,13,14,15,16 };
131
+ static const S16 ML_defaultNorm[MaxML+1] = { 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
132
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
133
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,
134
+ -1,-1,-1,-1,-1 };
135
+ static const U32 ML_defaultNormLog = 6;
136
+
137
+ static const S16 OF_defaultNorm[MaxOff+1] = { 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
138
+ 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1 };
139
+ static const U32 OF_defaultNormLog = 5;
140
+
141
+
142
+ /*-*******************************************
143
+ * Shared functions to include for inlining
144
+ *********************************************/
145
+ static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
146
+ #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
147
+
148
+ /*! ZSTD_wildcopy() :
149
+ * custom version of memcpy(), can copy up to 7 bytes too many (8 bytes if length==0) */
150
+ #define WILDCOPY_OVERLENGTH 8
151
+ MEM_STATIC void ZSTD_wildcopy(void* dst, const void* src, size_t length)
152
+ {
153
+ const BYTE* ip = (const BYTE*)src;
154
+ BYTE* op = (BYTE*)dst;
155
+ BYTE* const oend = op + length;
156
+ do
157
+ COPY8(op, ip)
158
+ while (op < oend);
159
+ }
160
+
161
+
162
+ /*-*******************************************
163
+ * Private interfaces
164
+ *********************************************/
165
+ typedef struct ZSTD_stats_s ZSTD_stats_t;
166
+
167
+ typedef struct {
168
+ U32 off;
169
+ U32 len;
170
+ } ZSTD_match_t;
171
+
172
+ typedef struct {
173
+ U32 price;
174
+ U32 off;
175
+ U32 mlen;
176
+ U32 litlen;
177
+ U32 rep[ZSTD_REP_INIT];
178
+ } ZSTD_optimal_t;
179
+
180
+ #if ZSTD_OPT_DEBUG == 3
181
+ #include ".debug/zstd_stats.h"
182
+ #else
183
+ struct ZSTD_stats_s { U32 unused; };
184
+ MEM_STATIC void ZSTD_statsPrint(ZSTD_stats_t* stats, U32 searchLength) { (void)stats; (void)searchLength; }
185
+ MEM_STATIC void ZSTD_statsInit(ZSTD_stats_t* stats) { (void)stats; }
186
+ MEM_STATIC void ZSTD_statsResetFreqs(ZSTD_stats_t* stats) { (void)stats; }
187
+ MEM_STATIC void ZSTD_statsUpdatePrices(ZSTD_stats_t* stats, size_t litLength, const BYTE* literals, size_t offset, size_t matchLength) { (void)stats; (void)litLength; (void)literals; (void)offset; (void)matchLength; }
188
+ #endif /* #if ZSTD_OPT_DEBUG == 3 */
189
+
190
+ typedef struct {
191
+ void* buffer;
192
+ U32* offsetStart;
193
+ U32* offset;
194
+ BYTE* offCodeStart;
195
+ BYTE* litStart;
196
+ BYTE* lit;
197
+ U16* litLengthStart;
198
+ U16* litLength;
199
+ BYTE* llCodeStart;
200
+ U16* matchLengthStart;
201
+ U16* matchLength;
202
+ BYTE* mlCodeStart;
203
+ U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */
204
+ U32 longLengthPos;
205
+ /* opt */
206
+ ZSTD_optimal_t* priceTable;
207
+ ZSTD_match_t* matchTable;
208
+ U32* matchLengthFreq;
209
+ U32* litLengthFreq;
210
+ U32* litFreq;
211
+ U32* offCodeFreq;
212
+ U32 matchLengthSum;
213
+ U32 matchSum;
214
+ U32 litLengthSum;
215
+ U32 litSum;
216
+ U32 offCodeSum;
217
+ U32 log2matchLengthSum;
218
+ U32 log2matchSum;
219
+ U32 log2litLengthSum;
220
+ U32 log2litSum;
221
+ U32 log2offCodeSum;
222
+ U32 factor;
223
+ U32 cachedPrice;
224
+ U32 cachedLitLength;
225
+ const BYTE* cachedLiterals;
226
+ ZSTD_stats_t stats;
227
+ } seqStore_t;
228
+
229
+ const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx);
230
+ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr, size_t const nbSeq);
231
+ int ZSTD_isSkipFrame(ZSTD_DCtx* dctx);
232
+
233
+ /* custom memory allocation functions */
234
+ void* ZSTD_defaultAllocFunction(void* opaque, size_t size);
235
+ void ZSTD_defaultFreeFunction(void* opaque, void* address);
236
+ static const ZSTD_customMem defaultCustomMem = { ZSTD_defaultAllocFunction, ZSTD_defaultFreeFunction, NULL };
237
+
238
+ #endif /* ZSTD_CCOMMON_H_MODULE */
@@ -0,0 +1,807 @@
1
+ /* ******************************************************************
2
+ FSE : Finite State Entropy encoder
3
+ Copyright (C) 2013-2015, Yann Collet.
4
+
5
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
+
7
+ Redistribution and use in source and binary forms, with or without
8
+ modification, are permitted provided that the following conditions are
9
+ met:
10
+
11
+ * Redistributions of source code must retain the above copyright
12
+ notice, this list of conditions and the following disclaimer.
13
+ * Redistributions in binary form must reproduce the above
14
+ copyright notice, this list of conditions and the following disclaimer
15
+ in the documentation and/or other materials provided with the
16
+ distribution.
17
+
18
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
+
30
+ You can contact the author at :
31
+ - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
32
+ - Public forum : https://groups.google.com/forum/#!forum/lz4c
33
+ ****************************************************************** */
34
+
35
+ /* **************************************************************
36
+ * Compiler specifics
37
+ ****************************************************************/
38
+ #ifdef _MSC_VER /* Visual Studio */
39
+ # define FORCE_INLINE static __forceinline
40
+ # include <intrin.h> /* For Visual 2005 */
41
+ # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
42
+ # pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */
43
+ #else
44
+ # ifdef __GNUC__
45
+ # define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
46
+ # define FORCE_INLINE static inline __attribute__((always_inline))
47
+ # else
48
+ # define FORCE_INLINE static inline
49
+ # endif
50
+ #endif
51
+
52
+
53
+ /* **************************************************************
54
+ * Includes
55
+ ****************************************************************/
56
+ #include <stdlib.h> /* malloc, free, qsort */
57
+ #include <string.h> /* memcpy, memset */
58
+ #include <stdio.h> /* printf (debug) */
59
+ #include "bitstream.h"
60
+ #define FSE_STATIC_LINKING_ONLY
61
+ #include "fse.h"
62
+
63
+
64
+ /* **************************************************************
65
+ * Error Management
66
+ ****************************************************************/
67
+ #define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
68
+
69
+
70
+ /* **************************************************************
71
+ * Complex types
72
+ ****************************************************************/
73
+ typedef U32 CTable_max_t[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
74
+
75
+
76
+ /* **************************************************************
77
+ * Templates
78
+ ****************************************************************/
79
+ /*
80
+ designed to be included
81
+ for type-specific functions (template emulation in C)
82
+ Objective is to write these functions only once, for improved maintenance
83
+ */
84
+
85
+ /* safety checks */
86
+ #ifndef FSE_FUNCTION_EXTENSION
87
+ # error "FSE_FUNCTION_EXTENSION must be defined"
88
+ #endif
89
+ #ifndef FSE_FUNCTION_TYPE
90
+ # error "FSE_FUNCTION_TYPE must be defined"
91
+ #endif
92
+
93
+ /* Function names */
94
+ #define FSE_CAT(X,Y) X##Y
95
+ #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
96
+ #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
97
+
98
+
99
+ /* Function templates */
100
+ size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
101
+ {
102
+ U32 const tableSize = 1 << tableLog;
103
+ U32 const tableMask = tableSize - 1;
104
+ void* const ptr = ct;
105
+ U16* const tableU16 = ( (U16*) ptr) + 2;
106
+ void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;
107
+ FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
108
+ U32 const step = FSE_TABLESTEP(tableSize);
109
+ U32 cumul[FSE_MAX_SYMBOL_VALUE+2];
110
+
111
+ FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* memset() is not necessary, even if static analyzer complain about it */
112
+ U32 highThreshold = tableSize-1;
113
+
114
+ /* CTable header */
115
+ tableU16[-2] = (U16) tableLog;
116
+ tableU16[-1] = (U16) maxSymbolValue;
117
+
118
+ /* For explanations on how to distribute symbol values over the table :
119
+ * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
120
+
121
+ /* symbol start positions */
122
+ { U32 u;
123
+ cumul[0] = 0;
124
+ for (u=1; u<=maxSymbolValue+1; u++) {
125
+ if (normalizedCounter[u-1]==-1) { /* Low proba symbol */
126
+ cumul[u] = cumul[u-1] + 1;
127
+ tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);
128
+ } else {
129
+ cumul[u] = cumul[u-1] + normalizedCounter[u-1];
130
+ } }
131
+ cumul[maxSymbolValue+1] = tableSize+1;
132
+ }
133
+
134
+ /* Spread symbols */
135
+ { U32 position = 0;
136
+ U32 symbol;
137
+ for (symbol=0; symbol<=maxSymbolValue; symbol++) {
138
+ int nbOccurences;
139
+ for (nbOccurences=0; nbOccurences<normalizedCounter[symbol]; nbOccurences++) {
140
+ tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
141
+ position = (position + step) & tableMask;
142
+ while (position > highThreshold) position = (position + step) & tableMask; /* Low proba area */
143
+ } }
144
+
145
+ if (position!=0) return ERROR(GENERIC); /* Must have gone through all positions */
146
+ }
147
+
148
+ /* Build table */
149
+ { U32 u; for (u=0; u<tableSize; u++) {
150
+ FSE_FUNCTION_TYPE s = tableSymbol[u]; /* note : static analyzer may not understand tableSymbol is properly initialized */
151
+ tableU16[cumul[s]++] = (U16) (tableSize+u); /* TableU16 : sorted by symbol order; gives next state value */
152
+ } }
153
+
154
+ /* Build Symbol Transformation Table */
155
+ { unsigned total = 0;
156
+ unsigned s;
157
+ for (s=0; s<=maxSymbolValue; s++) {
158
+ switch (normalizedCounter[s])
159
+ {
160
+ case 0: break;
161
+
162
+ case -1:
163
+ case 1:
164
+ symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog);
165
+ symbolTT[s].deltaFindState = total - 1;
166
+ total ++;
167
+ break;
168
+ default :
169
+ {
170
+ U32 const maxBitsOut = tableLog - BIT_highbit32 (normalizedCounter[s]-1);
171
+ U32 const minStatePlus = normalizedCounter[s] << maxBitsOut;
172
+ symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
173
+ symbolTT[s].deltaFindState = total - normalizedCounter[s];
174
+ total += normalizedCounter[s];
175
+ } } } }
176
+
177
+ return 0;
178
+ }
179
+
180
+
181
+
182
+ #ifndef FSE_COMMONDEFS_ONLY
183
+
184
+ /*-**************************************************************
185
+ * FSE NCount encoding-decoding
186
+ ****************************************************************/
187
+ size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
188
+ {
189
+ size_t maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3;
190
+ return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
191
+ }
192
+
193
+ static short FSE_abs(short a) { return a<0 ? -a : a; }
194
+
195
+ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
196
+ const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
197
+ unsigned writeIsSafe)
198
+ {
199
+ BYTE* const ostart = (BYTE*) header;
200
+ BYTE* out = ostart;
201
+ BYTE* const oend = ostart + headerBufferSize;
202
+ int nbBits;
203
+ const int tableSize = 1 << tableLog;
204
+ int remaining;
205
+ int threshold;
206
+ U32 bitStream;
207
+ int bitCount;
208
+ unsigned charnum = 0;
209
+ int previous0 = 0;
210
+
211
+ bitStream = 0;
212
+ bitCount = 0;
213
+ /* Table Size */
214
+ bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount;
215
+ bitCount += 4;
216
+
217
+ /* Init */
218
+ remaining = tableSize+1; /* +1 for extra accuracy */
219
+ threshold = tableSize;
220
+ nbBits = tableLog+1;
221
+
222
+ while (remaining>1) { /* stops at 1 */
223
+ if (previous0) {
224
+ unsigned start = charnum;
225
+ while (!normalizedCounter[charnum]) charnum++;
226
+ while (charnum >= start+24) {
227
+ start+=24;
228
+ bitStream += 0xFFFFU << bitCount;
229
+ if ((!writeIsSafe) && (out > oend-2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */
230
+ out[0] = (BYTE) bitStream;
231
+ out[1] = (BYTE)(bitStream>>8);
232
+ out+=2;
233
+ bitStream>>=16;
234
+ }
235
+ while (charnum >= start+3) {
236
+ start+=3;
237
+ bitStream += 3 << bitCount;
238
+ bitCount += 2;
239
+ }
240
+ bitStream += (charnum-start) << bitCount;
241
+ bitCount += 2;
242
+ if (bitCount>16) {
243
+ if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */
244
+ out[0] = (BYTE)bitStream;
245
+ out[1] = (BYTE)(bitStream>>8);
246
+ out += 2;
247
+ bitStream >>= 16;
248
+ bitCount -= 16;
249
+ } }
250
+ { short count = normalizedCounter[charnum++];
251
+ const short max = (short)((2*threshold-1)-remaining);
252
+ remaining -= FSE_abs(count);
253
+ if (remaining<1) return ERROR(GENERIC);
254
+ count++; /* +1 for extra accuracy */
255
+ if (count>=threshold) count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
256
+ bitStream += count << bitCount;
257
+ bitCount += nbBits;
258
+ bitCount -= (count<max);
259
+ previous0 = (count==1);
260
+ while (remaining<threshold) nbBits--, threshold>>=1;
261
+ }
262
+ if (bitCount>16) {
263
+ if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */
264
+ out[0] = (BYTE)bitStream;
265
+ out[1] = (BYTE)(bitStream>>8);
266
+ out += 2;
267
+ bitStream >>= 16;
268
+ bitCount -= 16;
269
+ } }
270
+
271
+ /* flush remaining bitStream */
272
+ if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */
273
+ out[0] = (BYTE)bitStream;
274
+ out[1] = (BYTE)(bitStream>>8);
275
+ out+= (bitCount+7) /8;
276
+
277
+ if (charnum > maxSymbolValue + 1) return ERROR(GENERIC);
278
+
279
+ return (out-ostart);
280
+ }
281
+
282
+
283
+ size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
284
+ {
285
+ if (tableLog > FSE_MAX_TABLELOG) return ERROR(GENERIC); /* Unsupported */
286
+ if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported */
287
+
288
+ if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
289
+ return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
290
+
291
+ return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1);
292
+ }
293
+
294
+
295
+
296
+ /*-**************************************************************
297
+ * Counting histogram
298
+ ****************************************************************/
299
+ /*! FSE_count_simple
300
+ This function just counts byte values within `src`,
301
+ and store the histogram into table `count`.
302
+ This function is unsafe : it doesn't check that all values within `src` can fit into `count`.
303
+ For this reason, prefer using a table `count` with 256 elements.
304
+ @return : count of most numerous element
305
+ */
306
+ static size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
307
+ const void* src, size_t srcSize)
308
+ {
309
+ const BYTE* ip = (const BYTE*)src;
310
+ const BYTE* const end = ip + srcSize;
311
+ unsigned maxSymbolValue = *maxSymbolValuePtr;
312
+ unsigned max=0;
313
+
314
+
315
+ memset(count, 0, (maxSymbolValue+1)*sizeof(*count));
316
+ if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
317
+
318
+ while (ip<end) count[*ip++]++;
319
+
320
+ while (!count[maxSymbolValue]) maxSymbolValue--;
321
+ *maxSymbolValuePtr = maxSymbolValue;
322
+
323
+ { U32 s; for (s=0; s<=maxSymbolValue; s++) if (count[s] > max) max = count[s]; }
324
+
325
+ return (size_t)max;
326
+ }
327
+
328
+
329
+ static size_t FSE_count_parallel(unsigned* count, unsigned* maxSymbolValuePtr,
330
+ const void* source, size_t sourceSize,
331
+ unsigned checkMax)
332
+ {
333
+ const BYTE* ip = (const BYTE*)source;
334
+ const BYTE* const iend = ip+sourceSize;
335
+ unsigned maxSymbolValue = *maxSymbolValuePtr;
336
+ unsigned max=0;
337
+
338
+
339
+ U32 Counting1[256] = { 0 };
340
+ U32 Counting2[256] = { 0 };
341
+ U32 Counting3[256] = { 0 };
342
+ U32 Counting4[256] = { 0 };
343
+
344
+ /* safety checks */
345
+ if (!sourceSize) {
346
+ memset(count, 0, maxSymbolValue + 1);
347
+ *maxSymbolValuePtr = 0;
348
+ return 0;
349
+ }
350
+ if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */
351
+
352
+ /* by stripes of 16 bytes */
353
+ { U32 cached = MEM_read32(ip); ip += 4;
354
+ while (ip < iend-15) {
355
+ U32 c = cached; cached = MEM_read32(ip); ip += 4;
356
+ Counting1[(BYTE) c ]++;
357
+ Counting2[(BYTE)(c>>8) ]++;
358
+ Counting3[(BYTE)(c>>16)]++;
359
+ Counting4[ c>>24 ]++;
360
+ c = cached; cached = MEM_read32(ip); ip += 4;
361
+ Counting1[(BYTE) c ]++;
362
+ Counting2[(BYTE)(c>>8) ]++;
363
+ Counting3[(BYTE)(c>>16)]++;
364
+ Counting4[ c>>24 ]++;
365
+ c = cached; cached = MEM_read32(ip); ip += 4;
366
+ Counting1[(BYTE) c ]++;
367
+ Counting2[(BYTE)(c>>8) ]++;
368
+ Counting3[(BYTE)(c>>16)]++;
369
+ Counting4[ c>>24 ]++;
370
+ c = cached; cached = MEM_read32(ip); ip += 4;
371
+ Counting1[(BYTE) c ]++;
372
+ Counting2[(BYTE)(c>>8) ]++;
373
+ Counting3[(BYTE)(c>>16)]++;
374
+ Counting4[ c>>24 ]++;
375
+ }
376
+ ip-=4;
377
+ }
378
+
379
+ /* finish last symbols */
380
+ while (ip<iend) Counting1[*ip++]++;
381
+
382
+ if (checkMax) { /* verify stats will fit into destination table */
383
+ U32 s; for (s=255; s>maxSymbolValue; s--) {
384
+ Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
385
+ if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
386
+ } }
387
+
388
+ { U32 s; for (s=0; s<=maxSymbolValue; s++) {
389
+ count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
390
+ if (count[s] > max) max = count[s];
391
+ }}
392
+
393
+ while (!count[maxSymbolValue]) maxSymbolValue--;
394
+ *maxSymbolValuePtr = maxSymbolValue;
395
+ return (size_t)max;
396
+ }
397
+
398
+ /* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
399
+ size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
400
+ const void* source, size_t sourceSize)
401
+ {
402
+ if (sourceSize < 1500) return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize);
403
+ return FSE_count_parallel(count, maxSymbolValuePtr, source, sourceSize, 0);
404
+ }
405
+
406
+ size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr,
407
+ const void* source, size_t sourceSize)
408
+ {
409
+ if (*maxSymbolValuePtr <255)
410
+ return FSE_count_parallel(count, maxSymbolValuePtr, source, sourceSize, 1);
411
+ *maxSymbolValuePtr = 255;
412
+ return FSE_countFast(count, maxSymbolValuePtr, source, sourceSize);
413
+ }
414
+
415
+
416
+
417
+ /*-**************************************************************
418
+ * FSE Compression Code
419
+ ****************************************************************/
420
+ /*! FSE_sizeof_CTable() :
421
+ FSE_CTable is a variable size structure which contains :
422
+ `U16 tableLog;`
423
+ `U16 maxSymbolValue;`
424
+ `U16 nextStateNumber[1 << tableLog];` // This size is variable
425
+ `FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];` // This size is variable
426
+ Allocation is manual (C standard does not support variable-size structures).
427
+ */
428
+
429
+ size_t FSE_sizeof_CTable (unsigned maxSymbolValue, unsigned tableLog)
430
+ {
431
+ size_t size;
432
+ FSE_STATIC_ASSERT((size_t)FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)*4 >= sizeof(CTable_max_t)); /* A compilation error here means FSE_CTABLE_SIZE_U32 is not large enough */
433
+ if (tableLog > FSE_MAX_TABLELOG) return ERROR(GENERIC);
434
+ size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
435
+ return size;
436
+ }
437
+
438
+ FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
439
+ {
440
+ size_t size;
441
+ if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
442
+ size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
443
+ return (FSE_CTable*)malloc(size);
444
+ }
445
+
446
+ void FSE_freeCTable (FSE_CTable* ct) { free(ct); }
447
+
448
+ /* provides the minimum logSize to safely represent a distribution */
449
+ static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
450
+ {
451
+ U32 minBitsSrc = BIT_highbit32((U32)(srcSize - 1)) + 1;
452
+ U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
453
+ U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
454
+ return minBits;
455
+ }
456
+
457
+ unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
458
+ {
459
+ U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
460
+ U32 tableLog = maxTableLog;
461
+ U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
462
+ if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
463
+ if (maxBitsSrc < tableLog) tableLog = maxBitsSrc; /* Accuracy can be reduced */
464
+ if (minBits > tableLog) tableLog = minBits; /* Need a minimum to safely represent all symbol values */
465
+ if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG;
466
+ if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG;
467
+ return tableLog;
468
+ }
469
+
470
+ unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
471
+ {
472
+ return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2);
473
+ }
474
+
475
+
476
+ /* Secondary normalization method.
477
+ To be used when primary method fails. */
478
+
479
+ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue)
480
+ {
481
+ U32 s;
482
+ U32 distributed = 0;
483
+ U32 ToDistribute;
484
+
485
+ /* Init */
486
+ U32 lowThreshold = (U32)(total >> tableLog);
487
+ U32 lowOne = (U32)((total * 3) >> (tableLog + 1));
488
+
489
+ for (s=0; s<=maxSymbolValue; s++) {
490
+ if (count[s] == 0) {
491
+ norm[s]=0;
492
+ continue;
493
+ }
494
+ if (count[s] <= lowThreshold) {
495
+ norm[s] = -1;
496
+ distributed++;
497
+ total -= count[s];
498
+ continue;
499
+ }
500
+ if (count[s] <= lowOne) {
501
+ norm[s] = 1;
502
+ distributed++;
503
+ total -= count[s];
504
+ continue;
505
+ }
506
+ norm[s]=-2;
507
+ }
508
+ ToDistribute = (1 << tableLog) - distributed;
509
+
510
+ if ((total / ToDistribute) > lowOne) {
511
+ /* risk of rounding to zero */
512
+ lowOne = (U32)((total * 3) / (ToDistribute * 2));
513
+ for (s=0; s<=maxSymbolValue; s++) {
514
+ if ((norm[s] == -2) && (count[s] <= lowOne)) {
515
+ norm[s] = 1;
516
+ distributed++;
517
+ total -= count[s];
518
+ continue;
519
+ } }
520
+ ToDistribute = (1 << tableLog) - distributed;
521
+ }
522
+
523
+ if (distributed == maxSymbolValue+1) {
524
+ /* all values are pretty poor;
525
+ probably incompressible data (should have already been detected);
526
+ find max, then give all remaining points to max */
527
+ U32 maxV = 0, maxC = 0;
528
+ for (s=0; s<=maxSymbolValue; s++)
529
+ if (count[s] > maxC) maxV=s, maxC=count[s];
530
+ norm[maxV] += (short)ToDistribute;
531
+ return 0;
532
+ }
533
+
534
+ {
535
+ U64 const vStepLog = 62 - tableLog;
536
+ U64 const mid = (1ULL << (vStepLog-1)) - 1;
537
+ U64 const rStep = ((((U64)1<<vStepLog) * ToDistribute) + mid) / total; /* scale on remaining */
538
+ U64 tmpTotal = mid;
539
+ for (s=0; s<=maxSymbolValue; s++) {
540
+ if (norm[s]==-2) {
541
+ U64 end = tmpTotal + (count[s] * rStep);
542
+ U32 sStart = (U32)(tmpTotal >> vStepLog);
543
+ U32 sEnd = (U32)(end >> vStepLog);
544
+ U32 weight = sEnd - sStart;
545
+ if (weight < 1)
546
+ return ERROR(GENERIC);
547
+ norm[s] = (short)weight;
548
+ tmpTotal = end;
549
+ } } }
550
+
551
+ return 0;
552
+ }
553
+
554
+
555
+ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
556
+ const unsigned* count, size_t total,
557
+ unsigned maxSymbolValue)
558
+ {
559
+ /* Sanity checks */
560
+ if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
561
+ if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported size */
562
+ if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported size */
563
+ if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */
564
+
565
+ { U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
566
+
567
+ U64 const scale = 62 - tableLog;
568
+ U64 const step = ((U64)1<<62) / total; /* <== here, one division ! */
569
+ U64 const vStep = 1ULL<<(scale-20);
570
+ int stillToDistribute = 1<<tableLog;
571
+ unsigned s;
572
+ unsigned largest=0;
573
+ short largestP=0;
574
+ U32 lowThreshold = (U32)(total >> tableLog);
575
+
576
+ for (s=0; s<=maxSymbolValue; s++) {
577
+ if (count[s] == total) return 0; /* rle special case */
578
+ if (count[s] == 0) { normalizedCounter[s]=0; continue; }
579
+ if (count[s] <= lowThreshold) {
580
+ normalizedCounter[s] = -1;
581
+ stillToDistribute--;
582
+ } else {
583
+ short proba = (short)((count[s]*step) >> scale);
584
+ if (proba<8) {
585
+ U64 restToBeat = vStep * rtbTable[proba];
586
+ proba += (count[s]*step) - ((U64)proba<<scale) > restToBeat;
587
+ }
588
+ if (proba > largestP) largestP=proba, largest=s;
589
+ normalizedCounter[s] = proba;
590
+ stillToDistribute -= proba;
591
+ } }
592
+ if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) {
593
+ /* corner case, need another normalization method */
594
+ size_t errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue);
595
+ if (FSE_isError(errorCode)) return errorCode;
596
+ }
597
+ else normalizedCounter[largest] += (short)stillToDistribute;
598
+ }
599
+
600
+ #if 0
601
+ { /* Print Table (debug) */
602
+ U32 s;
603
+ U32 nTotal = 0;
604
+ for (s=0; s<=maxSymbolValue; s++)
605
+ printf("%3i: %4i \n", s, normalizedCounter[s]);
606
+ for (s=0; s<=maxSymbolValue; s++)
607
+ nTotal += abs(normalizedCounter[s]);
608
+ if (nTotal != (1U<<tableLog))
609
+ printf("Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);
610
+ getchar();
611
+ }
612
+ #endif
613
+
614
+ return tableLog;
615
+ }
616
+
617
+
618
+ /* fake FSE_CTable, for raw (uncompressed) input */
619
+ size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits)
620
+ {
621
+ const unsigned tableSize = 1 << nbBits;
622
+ const unsigned tableMask = tableSize - 1;
623
+ const unsigned maxSymbolValue = tableMask;
624
+ void* const ptr = ct;
625
+ U16* const tableU16 = ( (U16*) ptr) + 2;
626
+ void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableSize>>1); /* assumption : tableLog >= 1 */
627
+ FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
628
+ unsigned s;
629
+
630
+ /* Sanity checks */
631
+ if (nbBits < 1) return ERROR(GENERIC); /* min size */
632
+
633
+ /* header */
634
+ tableU16[-2] = (U16) nbBits;
635
+ tableU16[-1] = (U16) maxSymbolValue;
636
+
637
+ /* Build table */
638
+ for (s=0; s<tableSize; s++)
639
+ tableU16[s] = (U16)(tableSize + s);
640
+
641
+ /* Build Symbol Transformation Table */
642
+ { const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
643
+
644
+ for (s=0; s<=maxSymbolValue; s++) {
645
+ symbolTT[s].deltaNbBits = deltaNbBits;
646
+ symbolTT[s].deltaFindState = s-1;
647
+ } }
648
+
649
+
650
+ return 0;
651
+ }
652
+
653
+ /* fake FSE_CTable, for rle (100% always same symbol) input */
654
+ size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)
655
+ {
656
+ void* ptr = ct;
657
+ U16* tableU16 = ( (U16*) ptr) + 2;
658
+ void* FSCTptr = (U32*)ptr + 2;
659
+ FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) FSCTptr;
660
+
661
+ /* header */
662
+ tableU16[-2] = (U16) 0;
663
+ tableU16[-1] = (U16) symbolValue;
664
+
665
+ /* Build table */
666
+ tableU16[0] = 0;
667
+ tableU16[1] = 0; /* just in case */
668
+
669
+ /* Build Symbol Transformation Table */
670
+ symbolTT[symbolValue].deltaNbBits = 0;
671
+ symbolTT[symbolValue].deltaFindState = 0;
672
+
673
+ return 0;
674
+ }
675
+
676
+
677
+ static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize,
678
+ const void* src, size_t srcSize,
679
+ const FSE_CTable* ct, const unsigned fast)
680
+ {
681
+ const BYTE* const istart = (const BYTE*) src;
682
+ const BYTE* const iend = istart + srcSize;
683
+ const BYTE* ip=iend;
684
+
685
+
686
+ BIT_CStream_t bitC;
687
+ FSE_CState_t CState1, CState2;
688
+
689
+ /* init */
690
+ if (srcSize <= 2) return 0;
691
+ { size_t const errorCode = BIT_initCStream(&bitC, dst, dstSize);
692
+ if (FSE_isError(errorCode)) return 0; }
693
+
694
+ #define FSE_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
695
+
696
+ if (srcSize & 1) {
697
+ FSE_initCState2(&CState1, ct, *--ip);
698
+ FSE_initCState2(&CState2, ct, *--ip);
699
+ FSE_encodeSymbol(&bitC, &CState1, *--ip);
700
+ FSE_FLUSHBITS(&bitC);
701
+ } else {
702
+ FSE_initCState2(&CState2, ct, *--ip);
703
+ FSE_initCState2(&CState1, ct, *--ip);
704
+ }
705
+
706
+ /* join to mod 4 */
707
+ srcSize -= 2;
708
+ if ((sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) { /* test bit 2 */
709
+ FSE_encodeSymbol(&bitC, &CState2, *--ip);
710
+ FSE_encodeSymbol(&bitC, &CState1, *--ip);
711
+ FSE_FLUSHBITS(&bitC);
712
+ }
713
+
714
+ /* 2 or 4 encoding per loop */
715
+ for ( ; ip>istart ; ) {
716
+
717
+ FSE_encodeSymbol(&bitC, &CState2, *--ip);
718
+
719
+ if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 ) /* this test must be static */
720
+ FSE_FLUSHBITS(&bitC);
721
+
722
+ FSE_encodeSymbol(&bitC, &CState1, *--ip);
723
+
724
+ if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) { /* this test must be static */
725
+ FSE_encodeSymbol(&bitC, &CState2, *--ip);
726
+ FSE_encodeSymbol(&bitC, &CState1, *--ip);
727
+ }
728
+
729
+ FSE_FLUSHBITS(&bitC);
730
+ }
731
+
732
+ FSE_flushCState(&bitC, &CState2);
733
+ FSE_flushCState(&bitC, &CState1);
734
+ return BIT_closeCStream(&bitC);
735
+ }
736
+
737
+ size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
738
+ const void* src, size_t srcSize,
739
+ const FSE_CTable* ct)
740
+ {
741
+ const unsigned fast = (dstSize >= FSE_BLOCKBOUND(srcSize));
742
+
743
+ if (fast)
744
+ return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1);
745
+ else
746
+ return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0);
747
+ }
748
+
749
+
750
+ size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
751
+
752
+ size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
753
+ {
754
+ const BYTE* const istart = (const BYTE*) src;
755
+ const BYTE* ip = istart;
756
+
757
+ BYTE* const ostart = (BYTE*) dst;
758
+ BYTE* op = ostart;
759
+ BYTE* const oend = ostart + dstSize;
760
+
761
+ U32 count[FSE_MAX_SYMBOL_VALUE+1];
762
+ S16 norm[FSE_MAX_SYMBOL_VALUE+1];
763
+ CTable_max_t ct;
764
+ size_t errorCode;
765
+
766
+ /* init conditions */
767
+ if (srcSize <= 1) return 0; /* Uncompressible */
768
+ if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
769
+ if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
770
+
771
+ /* Scan input and build symbol stats */
772
+ errorCode = FSE_count (count, &maxSymbolValue, ip, srcSize);
773
+ if (FSE_isError(errorCode)) return errorCode;
774
+ if (errorCode == srcSize) return 1;
775
+ if (errorCode == 1) return 0; /* each symbol only present once */
776
+ if (errorCode < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */
777
+
778
+ tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
779
+ errorCode = FSE_normalizeCount (norm, tableLog, count, srcSize, maxSymbolValue);
780
+ if (FSE_isError(errorCode)) return errorCode;
781
+
782
+ /* Write table description header */
783
+ errorCode = FSE_writeNCount (op, oend-op, norm, maxSymbolValue, tableLog);
784
+ if (FSE_isError(errorCode)) return errorCode;
785
+ op += errorCode;
786
+
787
+ /* Compress */
788
+ errorCode = FSE_buildCTable (ct, norm, maxSymbolValue, tableLog);
789
+ if (FSE_isError(errorCode)) return errorCode;
790
+ errorCode = FSE_compress_usingCTable(op, oend - op, ip, srcSize, ct);
791
+ if (errorCode == 0) return 0; /* not enough space for compressed data */
792
+ op += errorCode;
793
+
794
+ /* check compressibility */
795
+ if ( (size_t)(op-ostart) >= srcSize-1 )
796
+ return 0;
797
+
798
+ return op-ostart;
799
+ }
800
+
801
+ size_t FSE_compress (void* dst, size_t dstSize, const void* src, size_t srcSize)
802
+ {
803
+ return FSE_compress2(dst, dstSize, src, (U32)srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
804
+ }
805
+
806
+
807
+ #endif /* FSE_COMMONDEFS_ONLY */