extzstd 0.0.1.CONCEPT

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,1521 @@
1
+ /* ******************************************************************
2
+ FSE : Finite State Entropy coder
3
+ Copyright (C) 2013-2015, Yann Collet.
4
+
5
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
+
7
+ Redistribution and use in source and binary forms, with or without
8
+ modification, are permitted provided that the following conditions are
9
+ met:
10
+
11
+ * Redistributions of source code must retain the above copyright
12
+ notice, this list of conditions and the following disclaimer.
13
+ * Redistributions in binary form must reproduce the above
14
+ copyright notice, this list of conditions and the following disclaimer
15
+ in the documentation and/or other materials provided with the
16
+ distribution.
17
+
18
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
+
30
+ You can contact the author at :
31
+ - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
32
+ - Public forum : https://groups.google.com/forum/#!forum/lz4c
33
+ ****************************************************************** */
34
+
35
+ #ifndef FSE_COMMONDEFS_ONLY
36
+
37
+ /****************************************************************
38
+ * Tuning parameters
39
+ ****************************************************************/
40
+ /* MEMORY_USAGE :
41
+ * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
42
+ * Increasing memory usage improves compression ratio
43
+ * Reduced memory usage can improve speed, due to cache effect
44
+ * Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
45
+ #define FSE_MAX_MEMORY_USAGE 14
46
+ #define FSE_DEFAULT_MEMORY_USAGE 13
47
+
48
+ /* FSE_MAX_SYMBOL_VALUE :
49
+ * Maximum symbol value authorized.
50
+ * Required for proper stack allocation */
51
+ #define FSE_MAX_SYMBOL_VALUE 255
52
+
53
+
54
+ /****************************************************************
55
+ * Generic function type & suffix (C template emulation)
56
+ ****************************************************************/
57
+ #define FSE_FUNCTION_TYPE BYTE
58
+ #define FSE_FUNCTION_EXTENSION
59
+
60
+ #endif /* !FSE_COMMONDEFS_ONLY */
61
+
62
+
63
+ /****************************************************************
64
+ * Compiler specifics
65
+ ****************************************************************/
66
+ #ifdef _MSC_VER /* Visual Studio */
67
+ # define FORCE_INLINE static __forceinline
68
+ # include <intrin.h> /* For Visual 2005 */
69
+ # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
70
+ # pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */
71
+ #else
72
+ # define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
73
+ # ifdef __GNUC__
74
+ # define FORCE_INLINE static inline __attribute__((always_inline))
75
+ # else
76
+ # define FORCE_INLINE static inline
77
+ # endif
78
+ #endif
79
+
80
+
81
+ /****************************************************************
82
+ * Includes
83
+ ****************************************************************/
84
+ #include <stdlib.h> /* malloc, free, qsort */
85
+ #include <string.h> /* memcpy, memset */
86
+ #include <stdio.h> /* printf (debug) */
87
+ #include "fse_static.h"
88
+
89
+
90
+ /****************************************************************
91
+ * Basic Types
92
+ *****************************************************************/
93
+ #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
94
+ # include <stdint.h>
95
+ typedef uint8_t BYTE;
96
+ typedef uint16_t U16;
97
+ typedef int16_t S16;
98
+ typedef uint32_t U32;
99
+ typedef int32_t S32;
100
+ typedef uint64_t U64;
101
+ typedef int64_t S64;
102
+ #else
103
+ typedef unsigned char BYTE;
104
+ typedef unsigned short U16;
105
+ typedef signed short S16;
106
+ typedef unsigned int U32;
107
+ typedef signed int S32;
108
+ typedef unsigned long long U64;
109
+ typedef signed long long S64;
110
+ #endif
111
+
112
+
113
+ /****************************************************************
114
+ * Memory I/O
115
+ *****************************************************************/
116
+ static unsigned FSE_isLittleEndian(void)
117
+ {
118
+ const union { U32 i; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
119
+ return one.c[0];
120
+ }
121
+
122
+ static U32 FSE_read32(const void* memPtr)
123
+ {
124
+ U32 val32;
125
+ memcpy(&val32, memPtr, 4);
126
+ return val32;
127
+ }
128
+
129
+ static U32 FSE_readLE32(const void* memPtr)
130
+ {
131
+ if (FSE_isLittleEndian())
132
+ return FSE_read32(memPtr);
133
+ else
134
+ {
135
+ const BYTE* p = (const BYTE*)memPtr;
136
+ return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
137
+ }
138
+ }
139
+
140
+ static void FSE_writeLE32(void* memPtr, U32 val32)
141
+ {
142
+ if (FSE_isLittleEndian())
143
+ {
144
+ memcpy(memPtr, &val32, 4);
145
+ }
146
+ else
147
+ {
148
+ BYTE* p = (BYTE*)memPtr;
149
+ p[0] = (BYTE)val32;
150
+ p[1] = (BYTE)(val32>>8);
151
+ p[2] = (BYTE)(val32>>16);
152
+ p[3] = (BYTE)(val32>>24);
153
+ }
154
+ }
155
+
156
+ static U64 FSE_read64(const void* memPtr)
157
+ {
158
+ U64 val64;
159
+ memcpy(&val64, memPtr, 8);
160
+ return val64;
161
+ }
162
+
163
+ static U64 FSE_readLE64(const void* memPtr)
164
+ {
165
+ if (FSE_isLittleEndian())
166
+ return FSE_read64(memPtr);
167
+ else
168
+ {
169
+ const BYTE* p = (const BYTE*)memPtr;
170
+ return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24)
171
+ + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56));
172
+ }
173
+ }
174
+
175
+ static void FSE_writeLE64(void* memPtr, U64 val64)
176
+ {
177
+ if (FSE_isLittleEndian())
178
+ {
179
+ memcpy(memPtr, &val64, 8);
180
+ }
181
+ else
182
+ {
183
+ BYTE* p = (BYTE*)memPtr;
184
+ p[0] = (BYTE)val64;
185
+ p[1] = (BYTE)(val64>>8);
186
+ p[2] = (BYTE)(val64>>16);
187
+ p[3] = (BYTE)(val64>>24);
188
+ p[4] = (BYTE)(val64>>32);
189
+ p[5] = (BYTE)(val64>>40);
190
+ p[6] = (BYTE)(val64>>48);
191
+ p[7] = (BYTE)(val64>>56);
192
+ }
193
+ }
194
+
195
+ static size_t FSE_readLEST(const void* memPtr)
196
+ {
197
+ if (sizeof(size_t)==4)
198
+ return (size_t)FSE_readLE32(memPtr);
199
+ else
200
+ return (size_t)FSE_readLE64(memPtr);
201
+ }
202
+
203
+ static void FSE_writeLEST(void* memPtr, size_t val)
204
+ {
205
+ if (sizeof(size_t)==4)
206
+ FSE_writeLE32(memPtr, (U32)val);
207
+ else
208
+ FSE_writeLE64(memPtr, (U64)val);
209
+ }
210
+
211
+
212
+ /****************************************************************
213
+ * Constants
214
+ *****************************************************************/
215
+ #define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE-2)
216
+ #define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
217
+ #define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
218
+ #define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
219
+ #define FSE_MIN_TABLELOG 5
220
+
221
+ #define FSE_TABLELOG_ABSOLUTE_MAX 15
222
+ #if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
223
+ #error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
224
+ #endif
225
+
226
+
227
+ /****************************************************************
228
+ * Error Management
229
+ ****************************************************************/
230
+ #define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
231
+
232
+
233
+ /****************************************************************
234
+ * Complex types
235
+ ****************************************************************/
236
+ typedef struct
237
+ {
238
+ int deltaFindState;
239
+ U16 maxState;
240
+ BYTE minBitsOut;
241
+ /* one byte padding */
242
+ } FSE_symbolCompressionTransform;
243
+
244
+ typedef struct
245
+ {
246
+ U32 fakeTable[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)]; /* compatible with FSE_compressU16() */
247
+ } CTable_max_t;
248
+
249
+
250
+ /****************************************************************
251
+ * Internal functions
252
+ ****************************************************************/
253
+ FORCE_INLINE unsigned FSE_highbit32 (register U32 val)
254
+ {
255
+ # if defined(_MSC_VER) /* Visual */
256
+ unsigned long r;
257
+ _BitScanReverse ( &r, val );
258
+ return (unsigned) r;
259
+ # elif defined(__GNUC__) && (GCC_VERSION >= 304) /* GCC Intrinsic */
260
+ return 31 - __builtin_clz (val);
261
+ # else /* Software version */
262
+ static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
263
+ U32 v = val;
264
+ unsigned r;
265
+ v |= v >> 1;
266
+ v |= v >> 2;
267
+ v |= v >> 4;
268
+ v |= v >> 8;
269
+ v |= v >> 16;
270
+ r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
271
+ return r;
272
+ # endif
273
+ }
274
+
275
+
276
+ #ifndef FSE_COMMONDEFS_ONLY
277
+
278
+ unsigned FSE_isError(size_t code) { return (code > (size_t)(-FSE_ERROR_maxCode)); }
279
+
280
+ #define FSE_GENERATE_STRING(STRING) #STRING,
281
+ static const char* FSE_errorStrings[] = { FSE_LIST_ERRORS(FSE_GENERATE_STRING) };
282
+
283
+ const char* FSE_getErrorName(size_t code)
284
+ {
285
+ static const char* codeError = "Unspecified error code";
286
+ if (FSE_isError(code)) return FSE_errorStrings[-(int)(code)];
287
+ return codeError;
288
+ }
289
+
290
+ static short FSE_abs(short a)
291
+ {
292
+ return a<0? -a : a;
293
+ }
294
+
295
+
296
+ /****************************************************************
297
+ * Header bitstream management
298
+ ****************************************************************/
299
+ size_t FSE_headerBound(unsigned maxSymbolValue, unsigned tableLog)
300
+ {
301
+ size_t maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 1;
302
+ return maxSymbolValue ? maxHeaderSize : FSE_MAX_HEADERSIZE;
303
+ }
304
+
305
+ static size_t FSE_writeHeader_generic (void* header, size_t headerBufferSize,
306
+ const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
307
+ unsigned safeWrite)
308
+ {
309
+ BYTE* const ostart = (BYTE*) header;
310
+ BYTE* out = ostart;
311
+ BYTE* const oend = ostart + headerBufferSize;
312
+ int nbBits;
313
+ const int tableSize = 1 << tableLog;
314
+ int remaining;
315
+ int threshold;
316
+ U32 bitStream;
317
+ int bitCount;
318
+ unsigned charnum = 0;
319
+ int previous0 = 0;
320
+
321
+ bitStream = 0;
322
+ bitCount = 0;
323
+ /* Table Size */
324
+ bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount;
325
+ bitCount += 4;
326
+
327
+ /* Init */
328
+ remaining = tableSize+1; /* +1 for extra accuracy */
329
+ threshold = tableSize;
330
+ nbBits = tableLog+1;
331
+
332
+ while (remaining>1) /* stops at 1 */
333
+ {
334
+ if (previous0)
335
+ {
336
+ unsigned start = charnum;
337
+ while (!normalizedCounter[charnum]) charnum++;
338
+ while (charnum >= start+24)
339
+ {
340
+ start+=24;
341
+ bitStream += 0xFFFF<<bitCount;
342
+ if ((!safeWrite) && (out > oend-2)) return (size_t)-FSE_ERROR_GENERIC; /* Buffer overflow */
343
+ out[0] = (BYTE)bitStream;
344
+ out[1] = (BYTE)(bitStream>>8);
345
+ out+=2;
346
+ bitStream>>=16;
347
+ }
348
+ while (charnum >= start+3)
349
+ {
350
+ start+=3;
351
+ bitStream += 3 << bitCount;
352
+ bitCount += 2;
353
+ }
354
+ bitStream += (charnum-start) << bitCount;
355
+ bitCount += 2;
356
+ if (bitCount>16)
357
+ {
358
+ if ((!safeWrite) && (out > oend - 2)) return (size_t)-FSE_ERROR_GENERIC; /* Buffer overflow */
359
+ out[0] = (BYTE)bitStream;
360
+ out[1] = (BYTE)(bitStream>>8);
361
+ out += 2;
362
+ bitStream >>= 16;
363
+ bitCount -= 16;
364
+ }
365
+ }
366
+ {
367
+ short count = normalizedCounter[charnum++];
368
+ const short max = (short)((2*threshold-1)-remaining);
369
+ remaining -= FSE_abs(count);
370
+ if (remaining<0) return (size_t)-FSE_ERROR_GENERIC;
371
+ count++; /* +1 for extra accuracy */
372
+ if (count>=threshold) count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
373
+ bitStream += count << bitCount;
374
+ bitCount += nbBits;
375
+ bitCount -= (count<max);
376
+ previous0 = (count==1);
377
+ while (remaining<threshold) nbBits--, threshold>>=1;
378
+ }
379
+ if (bitCount>16)
380
+ {
381
+ if ((!safeWrite) && (out > oend - 2)) return (size_t)-FSE_ERROR_GENERIC; /* Buffer overflow */
382
+ out[0] = (BYTE)bitStream;
383
+ out[1] = (BYTE)(bitStream>>8);
384
+ out += 2;
385
+ bitStream >>= 16;
386
+ bitCount -= 16;
387
+ }
388
+ }
389
+
390
+ /* flush remaining bitStream */
391
+ if ((!safeWrite) && (out > oend - 2)) return (size_t)-FSE_ERROR_GENERIC; /* Buffer overflow */
392
+ out[0] = (BYTE)bitStream;
393
+ out[1] = (BYTE)(bitStream>>8);
394
+ out+= (bitCount+7) /8;
395
+
396
+ if (charnum > maxSymbolValue + 1) return (size_t)-FSE_ERROR_GENERIC; /* Too many symbols written (a bit too late?) */
397
+
398
+ return (out-ostart);
399
+ }
400
+
401
+
402
+ size_t FSE_writeHeader (void* header, size_t headerBufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
403
+ {
404
+ if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_GENERIC; /* Unsupported */
405
+ if (tableLog < FSE_MIN_TABLELOG) return (size_t)-FSE_ERROR_GENERIC; /* Unsupported */
406
+
407
+ if (headerBufferSize < FSE_headerBound(maxSymbolValue, tableLog))
408
+ return FSE_writeHeader_generic(header, headerBufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
409
+
410
+ return FSE_writeHeader_generic(header, headerBufferSize, normalizedCounter, maxSymbolValue, tableLog, 1);
411
+ }
412
+
413
+
414
+ size_t FSE_readHeader (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
415
+ const void* headerBuffer, size_t hbSize)
416
+ {
417
+ const BYTE* const istart = (const BYTE*) headerBuffer;
418
+ const BYTE* ip = istart;
419
+ int nbBits;
420
+ int remaining;
421
+ int threshold;
422
+ U32 bitStream;
423
+ int bitCount;
424
+ unsigned charnum = 0;
425
+ int previous0 = 0;
426
+
427
+ bitStream = FSE_readLE32(ip);
428
+ nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */
429
+ if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return (size_t)-FSE_ERROR_tableLog_tooLarge;
430
+ bitStream >>= 4;
431
+ bitCount = 4;
432
+ *tableLogPtr = nbBits;
433
+ remaining = (1<<nbBits)+1;
434
+ threshold = 1<<nbBits;
435
+ nbBits++;
436
+
437
+ while ((remaining>1) && (charnum<=*maxSVPtr))
438
+ {
439
+ if (previous0)
440
+ {
441
+ unsigned n0 = charnum;
442
+ while ((bitStream & 0xFFFF) == 0xFFFF)
443
+ {
444
+ n0+=24;
445
+ ip+=2;
446
+ bitStream = FSE_readLE32(ip) >> bitCount;
447
+ }
448
+ while ((bitStream & 3) == 3)
449
+ {
450
+ n0+=3;
451
+ bitStream>>=2;
452
+ bitCount+=2;
453
+ }
454
+ n0 += bitStream & 3;
455
+ bitCount += 2;
456
+ if (n0 > *maxSVPtr) return (size_t)-FSE_ERROR_GENERIC;
457
+ while (charnum < n0) normalizedCounter[charnum++] = 0;
458
+ ip += bitCount>>3;
459
+ bitCount &= 7;
460
+ bitStream = FSE_readLE32(ip) >> bitCount;
461
+ }
462
+ {
463
+ const short max = (short)((2*threshold-1)-remaining);
464
+ short count;
465
+
466
+ if ((bitStream & (threshold-1)) < (U32)max)
467
+ {
468
+ count = (short)(bitStream & (threshold-1));
469
+ bitCount += nbBits-1;
470
+ }
471
+ else
472
+ {
473
+ count = (short)(bitStream & (2*threshold-1));
474
+ if (count >= threshold) count -= max;
475
+ bitCount += nbBits;
476
+ }
477
+
478
+ count--; /* extra accuracy */
479
+ remaining -= FSE_abs(count);
480
+ normalizedCounter[charnum++] = count;
481
+ previous0 = !count;
482
+ while (remaining < threshold)
483
+ {
484
+ nbBits--;
485
+ threshold >>= 1;
486
+ }
487
+
488
+ ip += bitCount>>3;
489
+ bitCount &= 7;
490
+ bitStream = FSE_readLE32(ip) >> bitCount;
491
+ }
492
+ }
493
+ if (remaining != 1) return (size_t)-FSE_ERROR_GENERIC;
494
+ *maxSVPtr = charnum-1;
495
+
496
+ ip += bitCount>0;
497
+ if ((size_t)(ip-istart) >= hbSize) return (size_t)-FSE_ERROR_srcSize_wrong; /* arguably a bit late , tbd */
498
+ return ip-istart;
499
+ }
500
+
501
+
502
+ /****************************************************************
503
+ * FSE Compression Code
504
+ ****************************************************************/
505
+ /*
506
+ CTable is a variable size structure which contains :
507
+ U16 tableLog;
508
+ U16 maxSymbolValue;
509
+ U16 nextStateNumber[1 << tableLog]; // This size is variable
510
+ FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1]; // This size is variable
511
+ Allocation is manual, since C standard does not support variable-size structures.
512
+ */
513
+
514
+ size_t FSE_sizeof_CTable (unsigned maxSymbolValue, unsigned tableLog)
515
+ {
516
+ size_t size;
517
+ FSE_STATIC_ASSERT((size_t)FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)*4 >= sizeof(CTable_max_t)); /* A compilation error here means FSE_CTABLE_SIZE_U32 is not large enough */
518
+ if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_GENERIC;
519
+ size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
520
+ return size;
521
+ }
522
+
523
+ void* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
524
+ {
525
+ size_t size;
526
+ if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
527
+ size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
528
+ return malloc(size);
529
+ }
530
+
531
+ void FSE_freeCTable (void* CTable)
532
+ {
533
+ free(CTable);
534
+ }
535
+
536
+
537
+ unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
538
+ {
539
+ U32 tableLog = maxTableLog;
540
+ if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
541
+ if ((FSE_highbit32((U32)(srcSize - 1)) - 2) < tableLog) tableLog = FSE_highbit32((U32)(srcSize - 1)) - 2; /* Accuracy can be reduced */
542
+ if ((FSE_highbit32(maxSymbolValue+1)+1) > tableLog) tableLog = FSE_highbit32(maxSymbolValue+1)+1; /* Need a minimum to safely represent all symbol values */
543
+ if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG;
544
+ if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG;
545
+ return tableLog;
546
+ }
547
+
548
+
549
+ typedef struct
550
+ {
551
+ U32 id;
552
+ U32 count;
553
+ } rank_t;
554
+
555
+ int FSE_compareRankT(const void* r1, const void* r2)
556
+ {
557
+ const rank_t* R1 = (const rank_t*)r1;
558
+ const rank_t* R2 = (const rank_t*)r2;
559
+
560
+ return 2 * (R1->count < R2->count) - 1;
561
+ }
562
+
563
+ static size_t FSE_adjustNormSlow(short* norm, int pointsToRemove, const unsigned* count, U32 maxSymbolValue)
564
+ {
565
+ rank_t rank[FSE_MAX_SYMBOL_VALUE+2];
566
+ U32 s;
567
+
568
+ /* Init */
569
+ for (s=0; s<=maxSymbolValue; s++)
570
+ {
571
+ rank[s].id = s;
572
+ rank[s].count = count[s];
573
+ if (norm[s] <= 1) rank[s].count = 0;
574
+ }
575
+ rank[maxSymbolValue+1].id = 0;
576
+ rank[maxSymbolValue+1].count = 0; /* ensures comparison ends here in worst case */
577
+
578
+ /* Sort according to count */
579
+ qsort(rank, maxSymbolValue+1, sizeof(rank_t), FSE_compareRankT);
580
+
581
+ while(pointsToRemove)
582
+ {
583
+ int newRank = 1;
584
+ rank_t savedR;
585
+ if (norm[rank[0].id] == 1)
586
+ return (size_t)-FSE_ERROR_GENERIC;
587
+ norm[rank[0].id]--;
588
+ pointsToRemove--;
589
+ rank[0].count -= (rank[0].count + 6) >> 3;
590
+ if (norm[rank[0].id] == 1)
591
+ rank[0].count=0;
592
+ savedR = rank[0];
593
+ while (rank[newRank].count > savedR.count)
594
+ {
595
+ rank[newRank-1] = rank[newRank];
596
+ newRank++;
597
+ }
598
+ rank[newRank-1] = savedR;
599
+ }
600
+
601
+ return 0;
602
+ }
603
+
604
+
605
+ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
606
+ const unsigned* count, size_t total,
607
+ unsigned maxSymbolValue)
608
+ {
609
+ /* Sanity checks */
610
+ if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
611
+ if (tableLog < FSE_MIN_TABLELOG) return (size_t)-FSE_ERROR_GENERIC; /* Unsupported size */
612
+ if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_GENERIC; /* Unsupported size */
613
+ if ((1U<<tableLog) <= maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; /* Too small tableLog, compression potentially impossible */
614
+
615
+ {
616
+ U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
617
+ U64 const scale = 62 - tableLog;
618
+ U64 const step = ((U64)1<<62) / total; /* <== here, one division ! */
619
+ U64 const vStep = 1ULL<<(scale-20);
620
+ int stillToDistribute = 1<<tableLog;
621
+ unsigned s;
622
+ unsigned largest=0;
623
+ short largestP=0;
624
+ U32 lowThreshold = (U32)(total >> tableLog);
625
+
626
+ for (s=0; s<=maxSymbolValue; s++)
627
+ {
628
+ if (count[s] == total) return 0;
629
+ if (count[s] == 0)
630
+ {
631
+ normalizedCounter[s]=0;
632
+ continue;
633
+ }
634
+ if (count[s] <= lowThreshold)
635
+ {
636
+ normalizedCounter[s] = -1;
637
+ stillToDistribute--;
638
+ }
639
+ else
640
+ {
641
+ short proba = (short)((count[s]*step) >> scale);
642
+ if (proba<8)
643
+ {
644
+ U64 restToBeat = vStep * rtbTable[proba];
645
+ proba += (count[s]*step) - ((U64)proba<<scale) > restToBeat;
646
+ }
647
+ if (proba > largestP)
648
+ {
649
+ largestP=proba;
650
+ largest=s;
651
+ }
652
+ normalizedCounter[s] = proba;
653
+ stillToDistribute -= proba;
654
+ }
655
+ }
656
+ if (-stillToDistribute >= (normalizedCounter[largest] >> 1))
657
+ {
658
+ /* corner case, need to converge towards normalization with caution */
659
+ size_t errorCode = FSE_adjustNormSlow(normalizedCounter, -stillToDistribute, count, maxSymbolValue);
660
+ if (FSE_isError(errorCode)) return errorCode;
661
+ //FSE_adjustNormSlow(normalizedCounter, -stillToDistribute, count, maxSymbolValue);
662
+ }
663
+ else normalizedCounter[largest] += (short)stillToDistribute;
664
+ }
665
+
666
+ #if 0
667
+ { /* Print Table (debug) */
668
+ U32 s;
669
+ U32 nTotal = 0;
670
+ for (s=0; s<=maxSymbolValue; s++)
671
+ printf("%3i: %4i \n", s, normalizedCounter[s]);
672
+ for (s=0; s<=maxSymbolValue; s++)
673
+ nTotal += abs(normalizedCounter[s]);
674
+ if (nTotal != (1U<<tableLog))
675
+ printf("Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);
676
+ getchar();
677
+ }
678
+ #endif
679
+
680
+ return tableLog;
681
+ }
682
+
683
+
684
+ /* fake CTable, for raw (uncompressed) input */
685
+ size_t FSE_buildCTable_raw (void* CTable, unsigned nbBits)
686
+ {
687
+ const unsigned tableSize = 1 << nbBits;
688
+ const unsigned tableMask = tableSize - 1;
689
+ const unsigned maxSymbolValue = tableMask;
690
+ U16* tableU16 = ( (U16*) CTable) + 2;
691
+ FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) ((((U32*)CTable)+1) + (tableSize>>1));
692
+ unsigned s;
693
+
694
+ /* Sanity checks */
695
+ if (nbBits < 1) return (size_t)-FSE_ERROR_GENERIC; /* min size */
696
+ if (((size_t)CTable) & 3) return (size_t)-FSE_ERROR_GENERIC; /* Must be allocated of 4 bytes boundaries */
697
+
698
+ /* header */
699
+ tableU16[-2] = (U16) nbBits;
700
+ tableU16[-1] = (U16) maxSymbolValue;
701
+
702
+ /* Build table */
703
+ for (s=0; s<tableSize; s++)
704
+ tableU16[s] = (U16)(tableSize + s);
705
+
706
+ /* Build Symbol Transformation Table */
707
+ for (s=0; s<=maxSymbolValue; s++)
708
+ {
709
+ symbolTT[s].minBitsOut = (BYTE)nbBits;
710
+ symbolTT[s].deltaFindState = s-1;
711
+ symbolTT[s].maxState = (U16)( (tableSize*2) - 1); /* ensures state <= maxState */
712
+ }
713
+
714
+ return 0;
715
+ }
716
+
717
+
718
+ /* fake CTable, for rle (100% always same symbol) input */
719
+ size_t FSE_buildCTable_rle (void* CTable, BYTE symbolValue)
720
+ {
721
+ const unsigned tableSize = 1;
722
+ U16* tableU16 = ( (U16*) CTable) + 2;
723
+ FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) ((U32*)CTable + 2);
724
+
725
+ /* safety checks */
726
+ if (((size_t)CTable) & 3) return (size_t)-FSE_ERROR_GENERIC; /* Must be 4 bytes aligned */
727
+
728
+ /* header */
729
+ tableU16[-2] = (U16) 0;
730
+ tableU16[-1] = (U16) symbolValue;
731
+
732
+ /* Build table */
733
+ tableU16[0] = 0;
734
+ tableU16[1] = 0; /* just in case */
735
+
736
+ /* Build Symbol Transformation Table */
737
+ {
738
+ symbolTT[symbolValue].minBitsOut = 0;
739
+ symbolTT[symbolValue].deltaFindState = 0;
740
+ symbolTT[symbolValue].maxState = (U16)(2*tableSize-1); /* ensures state <= maxState */
741
+ }
742
+
743
+ return 0;
744
+ }
745
+
746
+
747
+ void FSE_initCStream(FSE_CStream_t* bitC, void* start)
748
+ {
749
+ bitC->bitContainer = 0;
750
+ bitC->bitPos = 0; /* reserved for unusedBits */
751
+ bitC->startPtr = (char*)start;
752
+ bitC->ptr = bitC->startPtr;
753
+ }
754
+
755
+ void FSE_initCState(FSE_CState_t* statePtr, const void* CTable)
756
+ {
757
+ const U32 tableLog = ( (U16*) CTable) [0];
758
+ statePtr->value = (ptrdiff_t)1<<tableLog;
759
+ statePtr->stateTable = ((const U16*) CTable) + 2;
760
+ statePtr->symbolTT = (const U32*)CTable + 1 + (tableLog ? (1<<(tableLog-1)) : 1);
761
+ statePtr->stateLog = tableLog;
762
+ }
763
+
764
+ void FSE_addBits(FSE_CStream_t* bitC, size_t value, unsigned nbBits)
765
+ {
766
+ static const unsigned mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, 0xFFFFFF, 0x1FFFFFF }; /* up to 25 bits */
767
+ bitC->bitContainer |= (value & mask[nbBits]) << bitC->bitPos;
768
+ bitC->bitPos += nbBits;
769
+ }
770
+
771
+ void FSE_encodeByte(FSE_CStream_t* bitC, FSE_CState_t* statePtr, BYTE symbol)
772
+ {
773
+ const FSE_symbolCompressionTransform* const symbolTT = (const FSE_symbolCompressionTransform*) statePtr->symbolTT;
774
+ const U16* const stateTable = (const U16*) statePtr->stateTable;
775
+ int nbBitsOut = symbolTT[symbol].minBitsOut;
776
+ nbBitsOut -= (int)((symbolTT[symbol].maxState - statePtr->value) >> 31);
777
+ FSE_addBits(bitC, statePtr->value, nbBitsOut);
778
+ statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT[symbol].deltaFindState];
779
+ }
780
+
781
+ void FSE_flushBits(FSE_CStream_t* bitC)
782
+ {
783
+ size_t nbBytes = bitC->bitPos >> 3;
784
+ FSE_writeLEST(bitC->ptr, bitC->bitContainer);
785
+ bitC->bitPos &= 7;
786
+ bitC->ptr += nbBytes;
787
+ bitC->bitContainer >>= nbBytes*8;
788
+ }
789
+
790
+ void FSE_flushCState(FSE_CStream_t* bitC, const FSE_CState_t* statePtr)
791
+ {
792
+ FSE_addBits(bitC, statePtr->value, statePtr->stateLog);
793
+ FSE_flushBits(bitC);
794
+ }
795
+
796
+
797
+ size_t FSE_closeCStream(FSE_CStream_t* bitC)
798
+ {
799
+ char* endPtr;
800
+
801
+ FSE_addBits(bitC, 1, 1);
802
+ FSE_flushBits(bitC);
803
+
804
+ endPtr = bitC->ptr;
805
+ endPtr += bitC->bitPos > 0;
806
+
807
+ return (endPtr - bitC->startPtr);
808
+ }
809
+
810
+
811
+ size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
812
+ const void* src, size_t srcSize,
813
+ const void* CTable)
814
+ {
815
+ const BYTE* const istart = (const BYTE*) src;
816
+ const BYTE* ip;
817
+ const BYTE* const iend = istart + srcSize;
818
+
819
+ FSE_CStream_t bitC;
820
+ FSE_CState_t CState1, CState2;
821
+
822
+
823
+ /* init */
824
+ (void)dstSize; /* objective : ensure it fits into dstBuffer (Todo) */
825
+ FSE_initCStream(&bitC, dst);
826
+ FSE_initCState(&CState1, CTable);
827
+ CState2 = CState1;
828
+
829
+ ip=iend;
830
+
831
+ /* join to even */
832
+ if (srcSize & 1)
833
+ {
834
+ FSE_encodeByte(&bitC, &CState1, *--ip);
835
+ FSE_flushBits(&bitC);
836
+ }
837
+
838
+ /* join to mod 4 */
839
+ if ((sizeof(size_t)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) /* test bit 2 */
840
+ {
841
+ FSE_encodeByte(&bitC, &CState2, *--ip);
842
+ FSE_encodeByte(&bitC, &CState1, *--ip);
843
+ FSE_flushBits(&bitC);
844
+ }
845
+
846
+ /* 2 or 4 encoding per loop */
847
+ while (ip>istart)
848
+ {
849
+ FSE_encodeByte(&bitC, &CState2, *--ip);
850
+
851
+ if (sizeof(size_t)*8 < FSE_MAX_TABLELOG*2+7 ) /* this test must be static */
852
+ FSE_flushBits(&bitC);
853
+
854
+ FSE_encodeByte(&bitC, &CState1, *--ip);
855
+
856
+ if (sizeof(size_t)*8 > FSE_MAX_TABLELOG*4+7 ) /* this test must be static */
857
+ {
858
+ FSE_encodeByte(&bitC, &CState2, *--ip);
859
+ FSE_encodeByte(&bitC, &CState1, *--ip);
860
+ }
861
+
862
+ FSE_flushBits(&bitC);
863
+ }
864
+
865
+ FSE_flushCState(&bitC, &CState2);
866
+ FSE_flushCState(&bitC, &CState1);
867
+ return FSE_closeCStream(&bitC);
868
+ }
869
+
870
+
871
+ static size_t FSE_compressRLE (BYTE *out, BYTE symbol)
872
+ {
873
+ *out=symbol;
874
+ return 1;
875
+ }
876
+
877
+ size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
878
+
879
+
880
+ size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
881
+ {
882
+ const BYTE* const istart = (const BYTE*) src;
883
+ const BYTE* ip = istart;
884
+
885
+ BYTE* const ostart = (BYTE*) dst;
886
+ BYTE* op = ostart;
887
+ BYTE* const oend = ostart + dstSize;
888
+
889
+ U32 count[FSE_MAX_SYMBOL_VALUE+1];
890
+ S16 norm[FSE_MAX_SYMBOL_VALUE+1];
891
+ CTable_max_t CTable;
892
+ size_t errorCode;
893
+
894
+ /* early out */
895
+ if (dstSize < FSE_compressBound(srcSize)) return (size_t)-FSE_ERROR_dstSize_tooSmall;
896
+ if (srcSize <= 1) return srcSize; /* Uncompressed or RLE */
897
+ if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
898
+ if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
899
+
900
+ /* Scan input and build symbol stats */
901
+ errorCode = FSE_count (count, ip, srcSize, &maxSymbolValue);
902
+ if (FSE_isError(errorCode)) return errorCode;
903
+ if (errorCode == srcSize) return FSE_compressRLE (ostart, *istart);
904
+ if (errorCode < ((srcSize * 7) >> 10)) return 0; /* Heuristic : not compressible enough */
905
+
906
+ tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
907
+ errorCode = FSE_normalizeCount (norm, tableLog, count, srcSize, maxSymbolValue);
908
+ if (FSE_isError(errorCode)) return errorCode;
909
+
910
+ /* Write table description header */
911
+ errorCode = FSE_writeHeader (op, FSE_MAX_HEADERSIZE, norm, maxSymbolValue, tableLog);
912
+ if (FSE_isError(errorCode)) return errorCode;
913
+ op += errorCode;
914
+
915
+ /* Compress */
916
+ errorCode = FSE_buildCTable (&CTable, norm, maxSymbolValue, tableLog);
917
+ if (FSE_isError(errorCode)) return errorCode;
918
+ op += FSE_compress_usingCTable(op, oend - op, ip, srcSize, &CTable);
919
+
920
+ /* check compressibility */
921
+ if ( (size_t)(op-ostart) >= srcSize-1 )
922
+ return 0;
923
+
924
+ return op-ostart;
925
+ }
926
+
927
+
928
+ size_t FSE_compress (void* dst, size_t dstSize, const void* src, size_t srcSize)
929
+ {
930
+ return FSE_compress2(dst, dstSize, src, (U32)srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
931
+ }
932
+
933
+
934
+ /*********************************************************
935
+ * Decompression (Byte symbols)
936
+ *********************************************************/
937
+ typedef struct
938
+ {
939
+ U16 newState;
940
+ BYTE symbol;
941
+ BYTE nbBits;
942
+ } FSE_decode_t; /* size == U32 */
943
+
944
+ /* Specific corner case : RLE compression */
945
+ size_t FSE_decompressRLE(void* dst, size_t originalSize,
946
+ const void* cSrc, size_t cSrcSize)
947
+ {
948
+ if (cSrcSize != 1) return (size_t)-FSE_ERROR_srcSize_wrong;
949
+ memset(dst, *(BYTE*)cSrc, originalSize);
950
+ return originalSize;
951
+ }
952
+
953
+
954
+ size_t FSE_buildDTable_rle (void* DTable, BYTE symbolValue)
955
+ {
956
+ U32* const base32 = (U32*)DTable;
957
+ FSE_decode_t* const cell = (FSE_decode_t*)(base32 + 1);
958
+
959
+ /* Sanity check */
960
+ if (((size_t)DTable) & 3) return (size_t)-FSE_ERROR_GENERIC; /* Must be allocated of 4 bytes boundaries */
961
+
962
+ base32[0] = 0;
963
+
964
+ cell->newState = 0;
965
+ cell->symbol = symbolValue;
966
+ cell->nbBits = 0;
967
+
968
+ return 0;
969
+ }
970
+
971
+
972
+ size_t FSE_buildDTable_raw (void* DTable, unsigned nbBits)
973
+ {
974
+ U32* const base32 = (U32*)DTable;
975
+ FSE_decode_t* dinfo = (FSE_decode_t*)(base32 + 1);
976
+ const unsigned tableSize = 1 << nbBits;
977
+ const unsigned tableMask = tableSize - 1;
978
+ const unsigned maxSymbolValue = tableMask;
979
+ unsigned s;
980
+
981
+ /* Sanity checks */
982
+ if (nbBits < 1) return (size_t)-FSE_ERROR_GENERIC; /* min size */
983
+ if (((size_t)DTable) & 3) return (size_t)-FSE_ERROR_GENERIC; /* Must be allocated of 4 bytes boundaries */
984
+
985
+ /* Build Decoding Table */
986
+ base32[0] = nbBits;
987
+ for (s=0; s<=maxSymbolValue; s++)
988
+ {
989
+ dinfo[s].newState = 0;
990
+ dinfo[s].symbol = (BYTE)s;
991
+ dinfo[s].nbBits = (BYTE)nbBits;
992
+ }
993
+
994
+ return 0;
995
+ }
996
+
997
+
998
+ /* FSE_initDStream
999
+ * Initialize a FSE_DStream_t.
1000
+ * srcBuffer must point at the beginning of an FSE block.
1001
+ * The function result is the size of the FSE_block (== srcSize).
1002
+ * If srcSize is too small, the function will return an errorCode;
1003
+ */
1004
+ size_t FSE_initDStream(FSE_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
1005
+ {
1006
+ if (srcSize < 1) return (size_t)-FSE_ERROR_srcSize_wrong;
1007
+
1008
+ if (srcSize >= sizeof(bitD_t))
1009
+ {
1010
+ U32 contain32;
1011
+ bitD->start = (char*)srcBuffer;
1012
+ bitD->ptr = (char*)srcBuffer + srcSize - sizeof(bitD_t);
1013
+ bitD->bitContainer = FSE_readLEST(bitD->ptr);
1014
+ contain32 = ((BYTE*)srcBuffer)[srcSize-1];
1015
+ if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC; /* stop bit not present */
1016
+ bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
1017
+ }
1018
+ else
1019
+ {
1020
+ U32 contain32;
1021
+ bitD->start = (char*)srcBuffer;
1022
+ bitD->ptr = bitD->start;
1023
+ bitD->bitContainer = *(BYTE*)(bitD->start);
1024
+ switch(srcSize)
1025
+ {
1026
+ case 7: bitD->bitContainer += (bitD_t)(((BYTE*)(bitD->start))[6]) << (sizeof(bitD_t)*8 - 16);
1027
+ case 6: bitD->bitContainer += (bitD_t)(((BYTE*)(bitD->start))[5]) << (sizeof(bitD_t)*8 - 24);
1028
+ case 5: bitD->bitContainer += (bitD_t)(((BYTE*)(bitD->start))[4]) << (sizeof(bitD_t)*8 - 32);
1029
+ case 4: bitD->bitContainer += (bitD_t)(((BYTE*)(bitD->start))[3]) << 24;
1030
+ case 3: bitD->bitContainer += (bitD_t)(((BYTE*)(bitD->start))[2]) << 16;
1031
+ case 2: bitD->bitContainer += (bitD_t)(((BYTE*)(bitD->start))[1]) << 8;
1032
+ default:;
1033
+ }
1034
+ contain32 = ((BYTE*)srcBuffer)[srcSize-1];
1035
+ if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC; /* stop bit not present */
1036
+ bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
1037
+ bitD->bitsConsumed += (U32)(sizeof(bitD_t) - srcSize)*8;
1038
+ }
1039
+
1040
+ return srcSize;
1041
+ }
1042
+
1043
+
1044
+ /* FSE_readBits
1045
+ * Read next n bits from the bitContainer.
1046
+ * Use the fast variant *only* if n > 0.
1047
+ * Note : for this function to work properly on 32-bits, don't read more than maxNbBits==25
1048
+ * return : value extracted.
1049
+ */
1050
+ bitD_t FSE_readBits(FSE_DStream_t* bitD, U32 nbBits)
1051
+ {
1052
+ bitD_t value = ((bitD->bitContainer << bitD->bitsConsumed) >> 1) >> (((sizeof(bitD_t)*8)-1)-nbBits);
1053
+ bitD->bitsConsumed += nbBits;
1054
+ return value;
1055
+ }
1056
+
1057
+ bitD_t FSE_readBitsFast(FSE_DStream_t* bitD, U32 nbBits) /* only if nbBits >= 1 */
1058
+ {
1059
+ bitD_t value = (bitD->bitContainer << bitD->bitsConsumed) >> ((sizeof(bitD_t)*8)-nbBits);
1060
+ bitD->bitsConsumed += nbBits;
1061
+ return value;
1062
+ }
1063
+
1064
+ unsigned FSE_reloadDStream(FSE_DStream_t* bitD)
1065
+ {
1066
+ if (bitD->ptr >= bitD->start + sizeof(bitD_t))
1067
+ {
1068
+ bitD->ptr -= bitD->bitsConsumed >> 3;
1069
+ bitD->bitsConsumed &= 7;
1070
+ bitD->bitContainer = FSE_readLEST(bitD->ptr);
1071
+ return 0;
1072
+ }
1073
+ if (bitD->ptr == bitD->start)
1074
+ {
1075
+ if (bitD->bitsConsumed < sizeof(bitD_t)*8) return 1;
1076
+ if (bitD->bitsConsumed == sizeof(bitD_t)*8) return 2;
1077
+ return 3;
1078
+ }
1079
+ {
1080
+ U32 nbBytes = bitD->bitsConsumed >> 3;
1081
+ if (bitD->ptr - nbBytes < bitD->start)
1082
+ nbBytes = (U32)(bitD->ptr - bitD->start); /* note : necessarily ptr > start */
1083
+ bitD->ptr -= nbBytes;
1084
+ bitD->bitsConsumed -= nbBytes*8;
1085
+ bitD->bitContainer = FSE_readLEST(bitD->ptr); /* note : necessarily srcSize > sizeof(bitD) */
1086
+ return (bitD->ptr == bitD->start);
1087
+ }
1088
+ }
1089
+
1090
+
1091
+ void FSE_initDState(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD, const void* DTable)
1092
+ {
1093
+ const U32* const base32 = (const U32*)DTable;
1094
+ DStatePtr->state = FSE_readBits(bitD, base32[0]);
1095
+ FSE_reloadDStream(bitD);
1096
+ DStatePtr->table = base32 + 1;
1097
+ }
1098
+
1099
+ BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
1100
+ {
1101
+ const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
1102
+ const U32 nbBits = DInfo.nbBits;
1103
+ BYTE symbol = DInfo.symbol;
1104
+ bitD_t lowBits = FSE_readBits(bitD, nbBits);
1105
+
1106
+ DStatePtr->state = DInfo.newState + lowBits;
1107
+ return symbol;
1108
+ }
1109
+
1110
+ BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
1111
+ {
1112
+ const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
1113
+ const U32 nbBits = DInfo.nbBits;
1114
+ BYTE symbol = DInfo.symbol;
1115
+ bitD_t lowBits = FSE_readBitsFast(bitD, nbBits);
1116
+
1117
+ DStatePtr->state = DInfo.newState + lowBits;
1118
+ return symbol;
1119
+ }
1120
+
1121
+ /* FSE_endOfDStream
1122
+ Tells if bitD has reached end of bitStream or not */
1123
+
1124
+ unsigned FSE_endOfDStream(const FSE_DStream_t* bitD)
1125
+ {
1126
+ return FSE_reloadDStream((FSE_DStream_t*)bitD)==2;
1127
+ }
1128
+
1129
+ unsigned FSE_endOfDState(const FSE_DState_t* statePtr)
1130
+ {
1131
+ return statePtr->state == 0;
1132
+ }
1133
+
1134
+
1135
+ FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
1136
+ void* dst, size_t maxDstSize,
1137
+ const void* cSrc, size_t cSrcSize,
1138
+ const void* DTable, unsigned fast)
1139
+ {
1140
+ BYTE* const ostart = (BYTE*) dst;
1141
+ BYTE* op = ostart;
1142
+ BYTE* const omax = op + maxDstSize;
1143
+ BYTE* const olimit = omax-3;
1144
+
1145
+ FSE_DStream_t bitD;
1146
+ FSE_DState_t state1, state2;
1147
+ size_t errorCode;
1148
+
1149
+ /* Init */
1150
+ errorCode = FSE_initDStream(&bitD, cSrc, cSrcSize); /* replaced last arg by maxCompressed Size */
1151
+ if (FSE_isError(errorCode)) return errorCode;
1152
+
1153
+ FSE_initDState(&state1, &bitD, DTable);
1154
+ FSE_initDState(&state2, &bitD, DTable);
1155
+
1156
+
1157
+ /* 2 symbols per loop */
1158
+ while (!FSE_reloadDStream(&bitD) && (op<olimit))
1159
+ {
1160
+ *op++ = fast ? FSE_decodeSymbolFast(&state1, &bitD) : FSE_decodeSymbol(&state1, &bitD);
1161
+
1162
+ if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD_t)*8) /* This test must be static */
1163
+ FSE_reloadDStream(&bitD);
1164
+
1165
+ *op++ = fast ? FSE_decodeSymbolFast(&state2, &bitD) : FSE_decodeSymbol(&state2, &bitD);
1166
+
1167
+ if (FSE_MAX_TABLELOG*4+7 < sizeof(bitD_t)*8) /* This test must be static */
1168
+ {
1169
+ *op++ = fast ? FSE_decodeSymbolFast(&state1, &bitD) : FSE_decodeSymbol(&state1, &bitD);
1170
+ *op++ = fast ? FSE_decodeSymbolFast(&state2, &bitD) : FSE_decodeSymbol(&state2, &bitD);
1171
+ }
1172
+ }
1173
+
1174
+ /* tail */
1175
+ while (1)
1176
+ {
1177
+ if ( (FSE_reloadDStream(&bitD)>2) || (op==omax) || (FSE_endOfDState(&state1) && FSE_endOfDStream(&bitD)) )
1178
+ break;
1179
+
1180
+ *op++ = fast ? FSE_decodeSymbolFast(&state1, &bitD) : FSE_decodeSymbol(&state1, &bitD);
1181
+
1182
+ if ( (FSE_reloadDStream(&bitD)>2) || (op==omax) || (FSE_endOfDState(&state2) && FSE_endOfDStream(&bitD)) )
1183
+ break;
1184
+
1185
+ *op++ = fast ? FSE_decodeSymbolFast(&state2, &bitD) : FSE_decodeSymbol(&state2, &bitD);
1186
+ }
1187
+
1188
+ /* end ? */
1189
+ if (FSE_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2) )
1190
+ return op-ostart;
1191
+
1192
+ if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */
1193
+
1194
+ return (size_t)-FSE_ERROR_corruptionDetected;
1195
+ }
1196
+
1197
+
1198
+ size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
1199
+ const void* cSrc, size_t cSrcSize,
1200
+ const void* DTable, size_t fastMode)
1201
+ {
1202
+ /* select fast mode (static) */
1203
+ if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, DTable, 1);
1204
+ return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, DTable, 0);
1205
+ }
1206
+
1207
+
1208
+ size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
1209
+ {
1210
+ const BYTE* const istart = (const BYTE*)cSrc;
1211
+ const BYTE* ip = istart;
1212
+ short counting[FSE_MAX_SYMBOL_VALUE+1];
1213
+ FSE_decode_t DTable[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
1214
+ unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
1215
+ unsigned tableLog;
1216
+ size_t errorCode, fastMode;
1217
+
1218
+ if (cSrcSize<2) return (size_t)-FSE_ERROR_srcSize_wrong; /* too small input size */
1219
+
1220
+ /* normal FSE decoding mode */
1221
+ errorCode = FSE_readHeader (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
1222
+ if (FSE_isError(errorCode)) return errorCode;
1223
+ if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; /* too small input size */
1224
+ ip += errorCode;
1225
+ cSrcSize -= errorCode;
1226
+
1227
+ fastMode = FSE_buildDTable (DTable, counting, maxSymbolValue, tableLog);
1228
+ if (FSE_isError(fastMode)) return fastMode;
1229
+
1230
+ /* always return, even if it is an error code */
1231
+ return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, DTable, fastMode);
1232
+ }
1233
+
1234
+
1235
+ #endif /* FSE_COMMONDEFS_ONLY */
1236
+
1237
+ /*
1238
+ 2nd part of the file
1239
+ designed to be included
1240
+ for type-specific functions (template equivalent in C)
1241
+ Objective is to write such functions only once, for better maintenance
1242
+ */
1243
+
1244
+ /* safety checks */
1245
+ #ifndef FSE_FUNCTION_EXTENSION
1246
+ # error "FSE_FUNCTION_EXTENSION must be defined"
1247
+ #endif
1248
+ #ifndef FSE_FUNCTION_TYPE
1249
+ # error "FSE_FUNCTION_TYPE must be defined"
1250
+ #endif
1251
+
1252
+ /* Function names */
1253
+ #define FSE_CAT(X,Y) X##Y
1254
+ #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
1255
+ #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
1256
+
1257
+
1258
+ /* Function templates */
1259
+ size_t FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) (unsigned* count, const FSE_FUNCTION_TYPE* source, size_t sourceSize, unsigned* maxSymbolValuePtr, unsigned safe)
1260
+ {
1261
+ const FSE_FUNCTION_TYPE* ip = source;
1262
+ const FSE_FUNCTION_TYPE* const iend = ip+sourceSize;
1263
+ unsigned maxSymbolValue = *maxSymbolValuePtr;
1264
+ unsigned max=0;
1265
+ int s;
1266
+
1267
+ U32 Counting1[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
1268
+ U32 Counting2[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
1269
+ U32 Counting3[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
1270
+ U32 Counting4[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
1271
+
1272
+ /* safety checks */
1273
+ if (!sourceSize)
1274
+ {
1275
+ memset(count, 0, (maxSymbolValue + 1) * sizeof(FSE_FUNCTION_TYPE));
1276
+ *maxSymbolValuePtr = 0;
1277
+ return 0;
1278
+ }
1279
+ if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_GENERIC; /* maxSymbolValue too large : unsupported */
1280
+ if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE; /* 0 == default */
1281
+
1282
+ if ((safe) || (sizeof(FSE_FUNCTION_TYPE)>1))
1283
+ {
1284
+ /* check input values, to avoid count table overflow */
1285
+ while (ip < iend-3)
1286
+ {
1287
+ if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting1[*ip++]++;
1288
+ if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting2[*ip++]++;
1289
+ if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting3[*ip++]++;
1290
+ if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting4[*ip++]++;
1291
+ }
1292
+ }
1293
+ else
1294
+ {
1295
+ U32 cached = FSE_read32(ip); ip += 4;
1296
+ while (ip < iend-15)
1297
+ {
1298
+ U32 c = cached; cached = FSE_read32(ip); ip += 4;
1299
+ Counting1[(BYTE) c ]++;
1300
+ Counting2[(BYTE)(c>>8) ]++;
1301
+ Counting3[(BYTE)(c>>16)]++;
1302
+ Counting4[ c>>24 ]++;
1303
+ c = cached; cached = FSE_read32(ip); ip += 4;
1304
+ Counting1[(BYTE) c ]++;
1305
+ Counting2[(BYTE)(c>>8) ]++;
1306
+ Counting3[(BYTE)(c>>16)]++;
1307
+ Counting4[ c>>24 ]++;
1308
+ c = cached; cached = FSE_read32(ip); ip += 4;
1309
+ Counting1[(BYTE) c ]++;
1310
+ Counting2[(BYTE)(c>>8) ]++;
1311
+ Counting3[(BYTE)(c>>16)]++;
1312
+ Counting4[ c>>24 ]++;
1313
+ c = cached; cached = FSE_read32(ip); ip += 4;
1314
+ Counting1[(BYTE) c ]++;
1315
+ Counting2[(BYTE)(c>>8) ]++;
1316
+ Counting3[(BYTE)(c>>16)]++;
1317
+ Counting4[ c>>24 ]++;
1318
+ }
1319
+ ip-=4;
1320
+ }
1321
+
1322
+ /* finish last symbols */
1323
+ while (ip<iend) { if ((safe) && (*ip>maxSymbolValue)) return (size_t)-FSE_ERROR_GENERIC; Counting1[*ip++]++; }
1324
+
1325
+ for (s=0; s<=(int)maxSymbolValue; s++)
1326
+ {
1327
+ count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
1328
+ if (count[s] > max) max = count[s];
1329
+ }
1330
+
1331
+ while (!count[maxSymbolValue]) maxSymbolValue--;
1332
+ *maxSymbolValuePtr = maxSymbolValue;
1333
+ return (int)max;
1334
+ }
1335
+
1336
+ /* hidden fast variant (unsafe) */
1337
+ size_t FSE_FUNCTION_NAME(FSE_countFast, FSE_FUNCTION_EXTENSION) (unsigned* count, const FSE_FUNCTION_TYPE* source, size_t sourceSize, unsigned* maxSymbolValuePtr)
1338
+ {
1339
+ return FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) (count, source, sourceSize, maxSymbolValuePtr, 0);
1340
+ }
1341
+
1342
+ size_t FSE_FUNCTION_NAME(FSE_count, FSE_FUNCTION_EXTENSION) (unsigned* count, const FSE_FUNCTION_TYPE* source, size_t sourceSize, unsigned* maxSymbolValuePtr)
1343
+ {
1344
+ if ((sizeof(FSE_FUNCTION_TYPE)==1) && (*maxSymbolValuePtr >= 255))
1345
+ {
1346
+ *maxSymbolValuePtr = 255;
1347
+ return FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) (count, source, sourceSize, maxSymbolValuePtr, 0);
1348
+ }
1349
+ return FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) (count, source, sourceSize, maxSymbolValuePtr, 1);
1350
+ }
1351
+
1352
+
1353
+ static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; }
1354
+
1355
+ size_t FSE_FUNCTION_NAME(FSE_buildCTable, FSE_FUNCTION_EXTENSION)
1356
+ (void* CTable, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
1357
+ {
1358
+ const unsigned tableSize = 1 << tableLog;
1359
+ const unsigned tableMask = tableSize - 1;
1360
+ U16* tableU16 = ( (U16*) CTable) + 2;
1361
+ FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) (((U32*)CTable) + 1 + (tableLog ? tableSize>>1 : 1) );
1362
+ const unsigned step = FSE_tableStep(tableSize);
1363
+ unsigned cumul[FSE_MAX_SYMBOL_VALUE+2];
1364
+ U32 position = 0;
1365
+ FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE];
1366
+ U32 highThreshold = tableSize-1;
1367
+ unsigned symbol;
1368
+ unsigned i;
1369
+
1370
+ /* safety checks */
1371
+ if (((size_t)CTable) & 3) return (size_t)-FSE_ERROR_GENERIC; /* Must be allocated of 4 bytes boundaries */
1372
+
1373
+ /* header */
1374
+ tableU16[-2] = (U16) tableLog;
1375
+ tableU16[-1] = (U16) maxSymbolValue;
1376
+
1377
+ /* For explanations on how to distribute symbol values over the table :
1378
+ * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
1379
+
1380
+ /* symbol start positions */
1381
+ cumul[0] = 0;
1382
+ for (i=1; i<=maxSymbolValue+1; i++)
1383
+ {
1384
+ if (normalizedCounter[i-1]==-1) /* Low prob symbol */
1385
+ {
1386
+ cumul[i] = cumul[i-1] + 1;
1387
+ tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(i-1);
1388
+ }
1389
+ else
1390
+ cumul[i] = cumul[i-1] + normalizedCounter[i-1];
1391
+ }
1392
+ cumul[maxSymbolValue+1] = tableSize+1;
1393
+
1394
+ /* Spread symbols */
1395
+ for (symbol=0; symbol<=maxSymbolValue; symbol++)
1396
+ {
1397
+ int nbOccurences;
1398
+ for (nbOccurences=0; nbOccurences<normalizedCounter[symbol]; nbOccurences++)
1399
+ {
1400
+ tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
1401
+ position = (position + step) & tableMask;
1402
+ while (position > highThreshold) position = (position + step) & tableMask; /* Lowprob area */
1403
+ }
1404
+ }
1405
+
1406
+ if (position!=0) return (size_t)-FSE_ERROR_GENERIC; /* Must have gone through all positions */
1407
+
1408
+ /* Build table */
1409
+ for (i=0; i<tableSize; i++)
1410
+ {
1411
+ FSE_FUNCTION_TYPE s = tableSymbol[i];
1412
+ tableU16[cumul[s]++] = (U16) (tableSize+i); // Table U16 : sorted by symbol order; gives next state value
1413
+ }
1414
+
1415
+ // Build Symbol Transformation Table
1416
+ {
1417
+ unsigned s;
1418
+ unsigned total = 0;
1419
+ for (s=0; s<=maxSymbolValue; s++)
1420
+ {
1421
+ switch (normalizedCounter[s])
1422
+ {
1423
+ case 0:
1424
+ break;
1425
+ case -1:
1426
+ case 1:
1427
+ symbolTT[s].minBitsOut = (BYTE)tableLog;
1428
+ symbolTT[s].deltaFindState = total - 1;
1429
+ total ++;
1430
+ symbolTT[s].maxState = (U16)( (tableSize*2) - 1); /* ensures state <= maxState */
1431
+ break;
1432
+ default :
1433
+ symbolTT[s].minBitsOut = (BYTE)( (tableLog-1) - FSE_highbit32 (normalizedCounter[s]-1) );
1434
+ symbolTT[s].deltaFindState = total - normalizedCounter[s];
1435
+ total += normalizedCounter[s];
1436
+ symbolTT[s].maxState = (U16)( (normalizedCounter[s] << (symbolTT[s].minBitsOut+1)) - 1);
1437
+ }
1438
+ }
1439
+ }
1440
+
1441
+ return 0;
1442
+ }
1443
+
1444
+
1445
+ #define FSE_DECODE_TYPE FSE_TYPE_NAME(FSE_decode_t, FSE_FUNCTION_EXTENSION)
1446
+
1447
+ void* FSE_FUNCTION_NAME(FSE_createDTable, FSE_FUNCTION_EXTENSION) (unsigned tableLog)
1448
+ {
1449
+ if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
1450
+ return malloc( ((size_t)1<<tableLog) * sizeof (FSE_DECODE_TYPE) );
1451
+ }
1452
+
1453
+ void FSE_FUNCTION_NAME(FSE_freeDTable, FSE_FUNCTION_EXTENSION) (void* DTable)
1454
+ {
1455
+ free(DTable);
1456
+ }
1457
+
1458
+
1459
+ size_t FSE_FUNCTION_NAME(FSE_buildDTable, FSE_FUNCTION_EXTENSION)
1460
+ (void* DTable, const short* const normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
1461
+ {
1462
+ U32* const base32 = (U32*)DTable;
1463
+ FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (base32+1);
1464
+ const U32 tableSize = 1 << tableLog;
1465
+ const U32 tableMask = tableSize-1;
1466
+ const U32 step = FSE_tableStep(tableSize);
1467
+ U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
1468
+ U32 position = 0;
1469
+ U32 highThreshold = tableSize-1;
1470
+ const S16 largeLimit= 1 << (tableLog-1);
1471
+ U32 noLarge = 1;
1472
+ U32 s;
1473
+
1474
+ /* Sanity Checks */
1475
+ if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_maxSymbolValue_tooLarge;
1476
+ if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_tableLog_tooLarge;
1477
+
1478
+ /* Init, lay down lowprob symbols */
1479
+ base32[0] = tableLog;
1480
+ for (s=0; s<=maxSymbolValue; s++)
1481
+ {
1482
+ if (normalizedCounter[s]==-1)
1483
+ {
1484
+ tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
1485
+ symbolNext[s] = 1;
1486
+ }
1487
+ else
1488
+ {
1489
+ if (normalizedCounter[s] >= largeLimit) noLarge=0;
1490
+ symbolNext[s] = normalizedCounter[s];
1491
+ }
1492
+ }
1493
+
1494
+ /* Spread symbols */
1495
+ for (s=0; s<=maxSymbolValue; s++)
1496
+ {
1497
+ int i;
1498
+ for (i=0; i<normalizedCounter[s]; i++)
1499
+ {
1500
+ tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
1501
+ position = (position + step) & tableMask;
1502
+ while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
1503
+ }
1504
+ }
1505
+
1506
+ if (position!=0) return (size_t)-FSE_ERROR_GENERIC; /* position must reach all cells once, otherwise normalizedCounter is incorrect */
1507
+
1508
+ /* Build Decoding table */
1509
+ {
1510
+ U32 i;
1511
+ for (i=0; i<tableSize; i++)
1512
+ {
1513
+ FSE_FUNCTION_TYPE symbol = tableDecode[i].symbol;
1514
+ U16 nextState = symbolNext[symbol]++;
1515
+ tableDecode[i].nbBits = (BYTE) (tableLog - FSE_highbit32 ((U32)nextState) );
1516
+ tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize);
1517
+ }
1518
+ }
1519
+
1520
+ return noLarge;
1521
+ }