extzstd 0.0.1.CONCEPT → 0.0.2.CONCEPT

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 714080450979b0ec369f0afc948a7018882da0bd
4
- data.tar.gz: a7b33f5732cab823a73f9b31f2e0a711e1976caa
3
+ metadata.gz: e5563734d016a497235ac43fed1b14a41d32e1c3
4
+ data.tar.gz: 63389454aa8f607e5f823b27314be3ae7efe45ed
5
5
  SHA512:
6
- metadata.gz: cbcf17649efbb2a49ecd26e2059ea74f2e9bb34eda0b6fff8694f522fd2487bbceedc6cc76fd7e2c674cab7cfa7a173d7c26c54b849cd010f9e98162bee5f434
7
- data.tar.gz: e9eba393e111181cae14f787672abd2fe24fdf316cf03158a0e0c696287501334437ef89f0634eca71a5e589e17dc32acd6bc5cb8b2341c233e029dc04fe923c
6
+ metadata.gz: a7dd05b9080beec1707db4e76948154309052fb85c615a211bcb4c82147fc84a666aadeddf670dbb1932a406ef88448e133ed33967cd06b27af2f1536c6ab042
7
+ data.tar.gz: 53e088c35e916c5bb21e10fe849e57a20e1e933b9f97cc5ad43be6c0e8531fe3984682bbeb2fe7b542fadccffb07ac9e2b1a8a978696fd43ba80a6155d7bbc6c
data/README.md CHANGED
@@ -1,26 +1,38 @@
1
1
  # encoding:utf-8 ;
2
2
 
3
- # extzstd - ruby binding for Zstandard (zstd)
3
+ # extzstd - ruby bindings for Zstd (Zstandard)
4
4
 
5
- This is ruby binding for compression library
5
+ This is ruby bindings for compression library
6
6
  [Zstd (https://github.com/Cyan4973/zstd)](https://github.com/Cyan4973/zstd).
7
7
 
8
- * PACKAGE NAME: extzstd
9
- * AUTHOR: dearblue <dearblue@users.sourceforge.jp>
10
- * VERSION: 0.0.1.CONCEPT
11
- * LICENSING: 2-clause BSD License
12
- * REPORT ISSUE TO: <http://sourceforge.jp/projects/rutsubo/ticket/>
13
- * DEPENDENCY RUBY: ruby-2.0+
14
- * DEPENDENCY RUBY GEMS: (none)
15
- * DEPENDENCY LIBRARY: (none)
16
- * BUNDLED EXTERNAL LIBRARIES:
17
- * zstd <https://github.com/Cyan4973/zstd>
18
- (commit-e739b273f95902b7616e11338a4ef04bebc9d07b (Mon Feb 9 01:53:12 2015 +0100))
8
+ * package name: extzstd
9
+ * author: dearblue (mailto:dearblue@users.osdn.me)
10
+ * version: 0.0.2.CONCEPT
11
+ * software quality: EXPERIMENTAL
12
+ * license: 2-clause BSD License
13
+ * report issue to: https://osdn.jp/projects/rutsubo/ticket/
14
+ * dependency ruby: ruby-2.0+
15
+ * dependency ruby gems: (none)
16
+ * dependency library: (none)
17
+ * bundled external libraries:
18
+ * zstd-0.1.2 (https://github.com/Cyan4973/zstd/tree/zstd-0.1.2)
19
+
20
+
21
+ ## ***WARNING***
22
+
23
+ Zstd data format compatibility is not guaranteed in future versions
24
+ (There is a possibility that it becomes impossible to future use).
25
+
26
+ Written in [zstd/README.md](https://github.com/Cyan4973/zstd/blob/zstd-0.1.2/README.md):
27
+
28
+ > Zstd has not yet reached "stable" status. Specifically, it doesn't
29
+ > guarantee yet that its current compressed format will remain stable
30
+ > and supported in future versions.
19
31
 
20
32
 
21
33
  ## HOW TO USE
22
34
 
23
- ### Simply process
35
+ ### basic usage (one pass encode/decode)
24
36
 
25
37
  ``` ruby:ruby
26
38
  # First, load library
@@ -34,7 +46,7 @@ encdata = Zstd.encode(source)
34
46
  puts "encdata.bytesize=#{encdata.bytesize}"
35
47
 
36
48
  # Directly decompression
37
- maxdestsize = source.bytesize # MUST BE ORIGINAL SIZE OR MORE! If given a smaller size, crash ruby interpreter.
49
+ maxdestsize = source.bytesize
38
50
  decdata = Zstd.decode(encdata, maxdestsize)
39
51
  puts "decdata.bytesize=#{decdata.bytesize}"
40
52
 
data/Rakefile CHANGED
@@ -2,13 +2,15 @@
2
2
  require "rake/clean"
3
3
 
4
4
  DOC = FileList["{README,LICENSE,CHANGELOG,Changelog,HISTORY}{,.ja}{,.txt,.rd,.rdoc,.md,.markdown}"] +
5
- FileList["ext/**/{README,LICENSE,CHANGELOG,Changelog,HISTORY}{,.ja}{,.txt,.rd,.rdoc,.md,.markdown}"]
5
+ FileList["{contrib,ext}/**/{README,LICENSE,CHANGELOG,Changelog,HISTORY}{,.ja}{,.txt,.rd,.rdoc,.md,.markdown}"] +
6
+ FileList["ext/**/*.{c,C,cc,cxx,cpp,h,H,hh}"]
6
7
  #EXT = FileList["ext/**/*.{h,hh,c,cc,cpp,cxx}"] +
7
8
  # FileList["ext/externals/**/*"]
8
9
  EXT = FileList["ext/**/*"]
9
10
  BIN = FileList["bin/*"]
10
11
  LIB = FileList["lib/**/*.rb"]
11
12
  SPEC = FileList["spec/**/*"]
13
+ TEST = FileList["test/**/*"]
12
14
  EXAMPLE = FileList["examples/**/*"]
13
15
  GEMSTUB_SRC = "gemstub.rb"
14
16
  RAKEFILE = [File.basename(__FILE__), GEMSTUB_SRC]
@@ -22,12 +24,16 @@ GEMSTUB.extensions += EXTCONF
22
24
  GEMSTUB.executables += FileList["bin/*"].map { |n| File.basename n }
23
25
  GEMSTUB.executables.sort!
24
26
 
25
- GEMFILE = "#{GEMSTUB.name}-#{GEMSTUB.version}.gem"
27
+ PACKAGENAME = "#{GEMSTUB.name}-#{GEMSTUB.version}"
28
+ GEMFILE = "#{PACKAGENAME}.gem"
26
29
  GEMSPEC = "#{GEMSTUB.name}.gemspec"
27
30
 
28
- GEMSTUB.files += DOC + EXT + EXTCONF + BIN + LIB + SPEC + EXAMPLE + RAKEFILE + EXTRA
31
+ GEMSTUB.files += DOC + EXT + EXTCONF + BIN + LIB + SPEC + TEST + EXAMPLE + RAKEFILE + EXTRA
29
32
  GEMSTUB.files.sort!
30
- GEMSTUB.rdoc_options ||= %w(--charset UTF-8)
33
+ if GEMSTUB.rdoc_options.nil? || GEMSTUB.rdoc_options.empty?
34
+ readme = %W(.md .markdown .rd .rdoc .txt #{""}).map { |ext| "README#{ext}" }.find { |m| DOC.find { |n| n == m } }
35
+ GEMSTUB.rdoc_options = %w(--charset UTF-8) + (readme ? %W(-m #{readme}) : [])
36
+ end
31
37
  GEMSTUB.extra_rdoc_files += DOC + LIB + EXT.reject { |n| n.include?("/externals/") || !%w(.h .hh .c .cc .cpp .cxx).include?(File.extname(n)) }
32
38
  GEMSTUB.extra_rdoc_files.sort!
33
39
 
@@ -82,7 +88,7 @@ unless EXTCONF.empty?
82
88
  desc "generate binary gemspec"
83
89
  task "native-gemspec" => GEMSPEC_NATIVE
84
90
 
85
- file GEMFILE_NATIVE => DOC + EXT + EXTCONF + BIN + LIB + SPEC + EXAMPLE + SOFILES + RAKEFILE + [GEMSPEC_NATIVE] do
91
+ file GEMFILE_NATIVE => DOC + EXT + EXTCONF + BIN + LIB + SPEC + TEST + EXAMPLE + SOFILES + RAKEFILE + [GEMSPEC_NATIVE] do
86
92
  sh "gem build #{GEMSPEC_NATIVE}"
87
93
  end
88
94
 
@@ -123,8 +129,8 @@ end
123
129
  task :all => GEMFILE
124
130
 
125
131
  desc "generate local rdoc"
126
- task :rdoc => DOC + EXT + LIB do
127
- sh *(%w(rdoc) + GEMSTUB.rdoc_options + DOC + EXT + LIB)
132
+ task :rdoc => DOC + LIB do
133
+ sh *(%w(rdoc) + GEMSTUB.rdoc_options + DOC + LIB)
128
134
  end
129
135
 
130
136
  desc "launch rspec"
@@ -138,7 +144,12 @@ task gem: GEMFILE
138
144
  desc "generate gemspec"
139
145
  task gemspec: GEMSPEC
140
146
 
141
- file GEMFILE => DOC + EXT + EXTCONF + BIN + LIB + SPEC + EXAMPLE + RAKEFILE + [GEMSPEC] do
147
+ desc "print package name"
148
+ task "package-name" do
149
+ puts PACKAGENAME
150
+ end
151
+
152
+ file GEMFILE => DOC + EXT + EXTCONF + BIN + LIB + SPEC + TEST + EXAMPLE + RAKEFILE + [GEMSPEC] do
142
153
  sh "gem build #{GEMSPEC}"
143
154
  end
144
155
 
@@ -32,16 +32,17 @@
32
32
  # ################################################################
33
33
 
34
34
  # Version numbers
35
- VERSION?= 0
35
+ VERSION?= 0.1.2
36
36
  LIBVER_MAJOR=`sed -n '/define ZSTD_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < zstd.h`
37
37
  LIBVER_MINOR=`sed -n '/define ZSTD_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < zstd.h`
38
38
  LIBVER_PATCH=`sed -n '/define ZSTD_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < zstd.h`
39
39
  LIBVER = $(LIBVER_MAJOR).$(LIBVER_MINOR).$(LIBVER_PATCH)
40
40
 
41
41
  DESTDIR?=
42
- PREFIX ?= /usr
42
+ PREFIX ?= /usr/local
43
43
  CFLAGS ?= -O3
44
44
  CFLAGS += -I. -std=c99 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Wstrict-prototypes
45
+ FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(MOREFLAGS)
45
46
 
46
47
  LIBDIR ?= $(PREFIX)/lib
47
48
  INCLUDEDIR=$(PREFIX)/include
@@ -67,10 +68,10 @@ all: libzstd
67
68
 
68
69
  libzstd: zstd.c
69
70
  @echo compiling static library
70
- @$(CC) $(CPPFLAGS) $(CFLAGS) -c $^
71
+ @$(CC) $(FLAGS) -c $^
71
72
  @$(AR) rcs libzstd.a zstd.o
72
73
  @echo compiling dynamic library $(LIBVER)
73
- @$(CC) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) -shared $^ -fPIC $(SONAME_FLAGS) -o $@.$(SHARED_EXT_VER)
74
+ @$(CC) $(FLAGS) -shared $^ -fPIC $(SONAME_FLAGS) -o $@.$(SHARED_EXT_VER)
74
75
  @echo creating versioned links
75
76
  @ln -sf $@.$(SHARED_EXT_VER) $@.$(SHARED_EXT_MAJOR)
76
77
  @ln -sf $@.$(SHARED_EXT_VER) $@.$(SHARED_EXT)
@@ -52,11 +52,22 @@
52
52
 
53
53
 
54
54
  /****************************************************************
55
- * Generic function type & suffix (C template emulation)
55
+ * template functions type & suffix
56
56
  ****************************************************************/
57
57
  #define FSE_FUNCTION_TYPE BYTE
58
58
  #define FSE_FUNCTION_EXTENSION
59
59
 
60
+
61
+ /****************************************************************
62
+ * Byte symbol type
63
+ ****************************************************************/
64
+ typedef struct
65
+ {
66
+ unsigned short newState;
67
+ unsigned char symbol;
68
+ unsigned char nbBits;
69
+ } FSE_decode_t; /* size == U32 */
70
+
60
71
  #endif /* !FSE_COMMONDEFS_ONLY */
61
72
 
62
73
 
@@ -87,6 +98,8 @@
87
98
  #include "fse_static.h"
88
99
 
89
100
 
101
+ #ifndef MEM_ACCESS_MODULE
102
+ #define MEM_ACCESS_MODULE
90
103
  /****************************************************************
91
104
  * Basic Types
92
105
  *****************************************************************/
@@ -109,21 +122,126 @@ typedef unsigned long long U64;
109
122
  typedef signed long long S64;
110
123
  #endif
111
124
 
125
+ #endif /* MEM_ACCESS_MODULE */
112
126
 
113
127
  /****************************************************************
114
128
  * Memory I/O
115
129
  *****************************************************************/
130
+ /* FSE_FORCE_MEMORY_ACCESS
131
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
132
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
133
+ * The below switch allow to select different access method for improved performance.
134
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
135
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
136
+ * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
137
+ * Method 2 : direct access. This method is portable but violate C standard.
138
+ * It can generate buggy code on targets generating assembly depending on alignment.
139
+ * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
140
+ * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
141
+ * Prefer these methods in priority order (0 > 1 > 2)
142
+ */
143
+ #ifndef FSE_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
144
+ # if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
145
+ # define FSE_FORCE_MEMORY_ACCESS 2
146
+ # elif defined(__INTEL_COMPILER) || \
147
+ (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
148
+ # define FSE_FORCE_MEMORY_ACCESS 1
149
+ # endif
150
+ #endif
151
+
152
+
153
+ static unsigned FSE_32bits(void)
154
+ {
155
+ return sizeof(void*)==4;
156
+ }
157
+
116
158
  static unsigned FSE_isLittleEndian(void)
117
159
  {
118
160
  const union { U32 i; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
119
161
  return one.c[0];
120
162
  }
121
163
 
164
+ #if defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==2)
165
+
166
+ static U16 FSE_read16(const void* memPtr) { return *(const U16*) memPtr; }
167
+ static U32 FSE_read32(const void* memPtr) { return *(const U32*) memPtr; }
168
+ static U64 FSE_read64(const void* memPtr) { return *(const U64*) memPtr; }
169
+
170
+ static void FSE_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
171
+ static void FSE_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
172
+ static void FSE_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; }
173
+
174
+ #elif defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==1)
175
+
176
+ /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
177
+ /* currently only defined for gcc and icc */
178
+ typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign;
179
+
180
+ static U16 FSE_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
181
+ static U32 FSE_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
182
+ static U64 FSE_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
183
+
184
+ static void FSE_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
185
+ static void FSE_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; }
186
+ static void FSE_write64(void* memPtr, U64 value) { ((unalign*)memPtr)->u64 = value; }
187
+
188
+ #else
189
+
190
+ static U16 FSE_read16(const void* memPtr)
191
+ {
192
+ U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
193
+ }
194
+
122
195
  static U32 FSE_read32(const void* memPtr)
123
196
  {
124
- U32 val32;
125
- memcpy(&val32, memPtr, 4);
126
- return val32;
197
+ U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
198
+ }
199
+
200
+ static U64 FSE_read64(const void* memPtr)
201
+ {
202
+ U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
203
+ }
204
+
205
+ static void FSE_write16(void* memPtr, U16 value)
206
+ {
207
+ memcpy(memPtr, &value, sizeof(value));
208
+ }
209
+
210
+ static void FSE_write32(void* memPtr, U32 value)
211
+ {
212
+ memcpy(memPtr, &value, sizeof(value));
213
+ }
214
+
215
+ static void FSE_write64(void* memPtr, U64 value)
216
+ {
217
+ memcpy(memPtr, &value, sizeof(value));
218
+ }
219
+
220
+ #endif // FSE_FORCE_MEMORY_ACCESS
221
+
222
+ static U16 FSE_readLE16(const void* memPtr)
223
+ {
224
+ if (FSE_isLittleEndian())
225
+ return FSE_read16(memPtr);
226
+ else
227
+ {
228
+ const BYTE* p = (const BYTE*)memPtr;
229
+ return (U16)(p[0] + (p[1]<<8));
230
+ }
231
+ }
232
+
233
+ static void FSE_writeLE16(void* memPtr, U16 val)
234
+ {
235
+ if (FSE_isLittleEndian())
236
+ {
237
+ FSE_write16(memPtr, val);
238
+ }
239
+ else
240
+ {
241
+ BYTE* p = (BYTE*)memPtr;
242
+ p[0] = (BYTE)val;
243
+ p[1] = (BYTE)(val>>8);
244
+ }
127
245
  }
128
246
 
129
247
  static U32 FSE_readLE32(const void* memPtr)
@@ -141,7 +259,7 @@ static void FSE_writeLE32(void* memPtr, U32 val32)
141
259
  {
142
260
  if (FSE_isLittleEndian())
143
261
  {
144
- memcpy(memPtr, &val32, 4);
262
+ FSE_write32(memPtr, val32);
145
263
  }
146
264
  else
147
265
  {
@@ -153,13 +271,6 @@ static void FSE_writeLE32(void* memPtr, U32 val32)
153
271
  }
154
272
  }
155
273
 
156
- static U64 FSE_read64(const void* memPtr)
157
- {
158
- U64 val64;
159
- memcpy(&val64, memPtr, 8);
160
- return val64;
161
- }
162
-
163
274
  static U64 FSE_readLE64(const void* memPtr)
164
275
  {
165
276
  if (FSE_isLittleEndian())
@@ -176,7 +287,7 @@ static void FSE_writeLE64(void* memPtr, U64 val64)
176
287
  {
177
288
  if (FSE_isLittleEndian())
178
289
  {
179
- memcpy(memPtr, &val64, 8);
290
+ FSE_write64(memPtr, val64);
180
291
  }
181
292
  else
182
293
  {
@@ -194,7 +305,7 @@ static void FSE_writeLE64(void* memPtr, U64 val64)
194
305
 
195
306
  static size_t FSE_readLEST(const void* memPtr)
196
307
  {
197
- if (sizeof(size_t)==4)
308
+ if (FSE_32bits())
198
309
  return (size_t)FSE_readLE32(memPtr);
199
310
  else
200
311
  return (size_t)FSE_readLE64(memPtr);
@@ -202,7 +313,7 @@ static size_t FSE_readLEST(const void* memPtr)
202
313
 
203
314
  static void FSE_writeLEST(void* memPtr, size_t val)
204
315
  {
205
- if (sizeof(size_t)==4)
316
+ if (FSE_32bits())
206
317
  FSE_writeLE32(memPtr, (U32)val);
207
318
  else
208
319
  FSE_writeLE64(memPtr, (U64)val);
@@ -235,17 +346,12 @@ static void FSE_writeLEST(void* memPtr, size_t val)
235
346
  ****************************************************************/
236
347
  typedef struct
237
348
  {
238
- int deltaFindState;
239
- U16 maxState;
240
- BYTE minBitsOut;
241
- /* one byte padding */
242
- } FSE_symbolCompressionTransform;
243
-
244
- typedef struct
245
- {
246
- U32 fakeTable[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)]; /* compatible with FSE_compressU16() */
247
- } CTable_max_t;
349
+ int deltaFindState;
350
+ U32 deltaNbBits;
351
+ } FSE_symbolCompressionTransform; /* total 8 bytes */
248
352
 
353
+ typedef U32 CTable_max_t[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
354
+ typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
249
355
 
250
356
  /****************************************************************
251
357
  * Internal functions
@@ -273,157 +379,458 @@ FORCE_INLINE unsigned FSE_highbit32 (register U32 val)
273
379
  }
274
380
 
275
381
 
276
- #ifndef FSE_COMMONDEFS_ONLY
277
-
278
- unsigned FSE_isError(size_t code) { return (code > (size_t)(-FSE_ERROR_maxCode)); }
279
-
280
- #define FSE_GENERATE_STRING(STRING) #STRING,
281
- static const char* FSE_errorStrings[] = { FSE_LIST_ERRORS(FSE_GENERATE_STRING) };
282
-
283
- const char* FSE_getErrorName(size_t code)
284
- {
285
- static const char* codeError = "Unspecified error code";
286
- if (FSE_isError(code)) return FSE_errorStrings[-(int)(code)];
287
- return codeError;
288
- }
382
+ /****************************************************************
383
+ * Templates
384
+ ****************************************************************/
385
+ /*
386
+ designed to be included
387
+ for type-specific functions (template emulation in C)
388
+ Objective is to write these functions only once, for improved maintenance
389
+ */
289
390
 
290
- static short FSE_abs(short a)
291
- {
292
- return a<0? -a : a;
293
- }
391
+ /* safety checks */
392
+ #ifndef FSE_FUNCTION_EXTENSION
393
+ # error "FSE_FUNCTION_EXTENSION must be defined"
394
+ #endif
395
+ #ifndef FSE_FUNCTION_TYPE
396
+ # error "FSE_FUNCTION_TYPE must be defined"
397
+ #endif
294
398
 
399
+ /* Function names */
400
+ #define FSE_CAT(X,Y) X##Y
401
+ #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
402
+ #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
295
403
 
296
- /****************************************************************
297
- * Header bitstream management
298
- ****************************************************************/
299
- size_t FSE_headerBound(unsigned maxSymbolValue, unsigned tableLog)
300
- {
301
- size_t maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 1;
302
- return maxSymbolValue ? maxHeaderSize : FSE_MAX_HEADERSIZE;
303
- }
304
404
 
305
- static size_t FSE_writeHeader_generic (void* header, size_t headerBufferSize,
306
- const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
307
- unsigned safeWrite)
405
+ /* Function templates */
406
+ size_t FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION)
407
+ (unsigned* count, unsigned* maxSymbolValuePtr, const FSE_FUNCTION_TYPE* source, size_t sourceSize, unsigned safe)
308
408
  {
309
- BYTE* const ostart = (BYTE*) header;
310
- BYTE* out = ostart;
311
- BYTE* const oend = ostart + headerBufferSize;
312
- int nbBits;
313
- const int tableSize = 1 << tableLog;
314
- int remaining;
315
- int threshold;
316
- U32 bitStream;
317
- int bitCount;
318
- unsigned charnum = 0;
319
- int previous0 = 0;
409
+ const FSE_FUNCTION_TYPE* ip = source;
410
+ const FSE_FUNCTION_TYPE* const iend = ip+sourceSize;
411
+ unsigned maxSymbolValue = *maxSymbolValuePtr;
412
+ unsigned max=0;
413
+ int s;
320
414
 
321
- bitStream = 0;
322
- bitCount = 0;
323
- /* Table Size */
324
- bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount;
325
- bitCount += 4;
415
+ U32 Counting1[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
416
+ U32 Counting2[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
417
+ U32 Counting3[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
418
+ U32 Counting4[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
326
419
 
327
- /* Init */
328
- remaining = tableSize+1; /* +1 for extra accuracy */
329
- threshold = tableSize;
330
- nbBits = tableLog+1;
420
+ /* safety checks */
421
+ if (!sourceSize)
422
+ {
423
+ memset(count, 0, (maxSymbolValue + 1) * sizeof(FSE_FUNCTION_TYPE));
424
+ *maxSymbolValuePtr = 0;
425
+ return 0;
426
+ }
427
+ if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_GENERIC; /* maxSymbolValue too large : unsupported */
428
+ if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE; /* 0 == default */
331
429
 
332
- while (remaining>1) /* stops at 1 */
430
+ if ((safe) || (sizeof(FSE_FUNCTION_TYPE)>1))
333
431
  {
334
- if (previous0)
335
- {
336
- unsigned start = charnum;
337
- while (!normalizedCounter[charnum]) charnum++;
338
- while (charnum >= start+24)
339
- {
340
- start+=24;
341
- bitStream += 0xFFFF<<bitCount;
342
- if ((!safeWrite) && (out > oend-2)) return (size_t)-FSE_ERROR_GENERIC; /* Buffer overflow */
343
- out[0] = (BYTE)bitStream;
344
- out[1] = (BYTE)(bitStream>>8);
345
- out+=2;
346
- bitStream>>=16;
347
- }
348
- while (charnum >= start+3)
349
- {
350
- start+=3;
351
- bitStream += 3 << bitCount;
352
- bitCount += 2;
353
- }
354
- bitStream += (charnum-start) << bitCount;
355
- bitCount += 2;
356
- if (bitCount>16)
357
- {
358
- if ((!safeWrite) && (out > oend - 2)) return (size_t)-FSE_ERROR_GENERIC; /* Buffer overflow */
359
- out[0] = (BYTE)bitStream;
360
- out[1] = (BYTE)(bitStream>>8);
361
- out += 2;
362
- bitStream >>= 16;
363
- bitCount -= 16;
364
- }
365
- }
432
+ /* check input values, to avoid count table overflow */
433
+ while (ip < iend-3)
366
434
  {
367
- short count = normalizedCounter[charnum++];
368
- const short max = (short)((2*threshold-1)-remaining);
369
- remaining -= FSE_abs(count);
370
- if (remaining<0) return (size_t)-FSE_ERROR_GENERIC;
371
- count++; /* +1 for extra accuracy */
372
- if (count>=threshold) count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
373
- bitStream += count << bitCount;
374
- bitCount += nbBits;
375
- bitCount -= (count<max);
376
- previous0 = (count==1);
377
- while (remaining<threshold) nbBits--, threshold>>=1;
435
+ if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting1[*ip++]++;
436
+ if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting2[*ip++]++;
437
+ if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting3[*ip++]++;
438
+ if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting4[*ip++]++;
378
439
  }
379
- if (bitCount>16)
440
+ }
441
+ else
442
+ {
443
+ U32 cached = FSE_read32(ip); ip += 4;
444
+ while (ip < iend-15)
380
445
  {
381
- if ((!safeWrite) && (out > oend - 2)) return (size_t)-FSE_ERROR_GENERIC; /* Buffer overflow */
382
- out[0] = (BYTE)bitStream;
383
- out[1] = (BYTE)(bitStream>>8);
384
- out += 2;
385
- bitStream >>= 16;
386
- bitCount -= 16;
446
+ U32 c = cached; cached = FSE_read32(ip); ip += 4;
447
+ Counting1[(BYTE) c ]++;
448
+ Counting2[(BYTE)(c>>8) ]++;
449
+ Counting3[(BYTE)(c>>16)]++;
450
+ Counting4[ c>>24 ]++;
451
+ c = cached; cached = FSE_read32(ip); ip += 4;
452
+ Counting1[(BYTE) c ]++;
453
+ Counting2[(BYTE)(c>>8) ]++;
454
+ Counting3[(BYTE)(c>>16)]++;
455
+ Counting4[ c>>24 ]++;
456
+ c = cached; cached = FSE_read32(ip); ip += 4;
457
+ Counting1[(BYTE) c ]++;
458
+ Counting2[(BYTE)(c>>8) ]++;
459
+ Counting3[(BYTE)(c>>16)]++;
460
+ Counting4[ c>>24 ]++;
461
+ c = cached; cached = FSE_read32(ip); ip += 4;
462
+ Counting1[(BYTE) c ]++;
463
+ Counting2[(BYTE)(c>>8) ]++;
464
+ Counting3[(BYTE)(c>>16)]++;
465
+ Counting4[ c>>24 ]++;
387
466
  }
467
+ ip-=4;
388
468
  }
389
469
 
390
- /* flush remaining bitStream */
391
- if ((!safeWrite) && (out > oend - 2)) return (size_t)-FSE_ERROR_GENERIC; /* Buffer overflow */
392
- out[0] = (BYTE)bitStream;
393
- out[1] = (BYTE)(bitStream>>8);
394
- out+= (bitCount+7) /8;
470
+ /* finish last symbols */
471
+ while (ip<iend) { if ((safe) && (*ip>maxSymbolValue)) return (size_t)-FSE_ERROR_GENERIC; Counting1[*ip++]++; }
395
472
 
396
- if (charnum > maxSymbolValue + 1) return (size_t)-FSE_ERROR_GENERIC; /* Too many symbols written (a bit too late?) */
473
+ for (s=0; s<=(int)maxSymbolValue; s++)
474
+ {
475
+ count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
476
+ if (count[s] > max) max = count[s];
477
+ }
397
478
 
398
- return (out-ostart);
479
+ while (!count[maxSymbolValue]) maxSymbolValue--;
480
+ *maxSymbolValuePtr = maxSymbolValue;
481
+ return (size_t)max;
399
482
  }
400
483
 
401
-
402
- size_t FSE_writeHeader (void* header, size_t headerBufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
484
+ /* hidden fast variant (unsafe) */
485
+ size_t FSE_FUNCTION_NAME(FSE_countFast, FSE_FUNCTION_EXTENSION)
486
+ (unsigned* count, unsigned* maxSymbolValuePtr, const FSE_FUNCTION_TYPE* source, size_t sourceSize)
403
487
  {
404
- if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_GENERIC; /* Unsupported */
405
- if (tableLog < FSE_MIN_TABLELOG) return (size_t)-FSE_ERROR_GENERIC; /* Unsupported */
406
-
407
- if (headerBufferSize < FSE_headerBound(maxSymbolValue, tableLog))
408
- return FSE_writeHeader_generic(header, headerBufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
488
+ return FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) (count, maxSymbolValuePtr, source, sourceSize, 0);
489
+ }
409
490
 
410
- return FSE_writeHeader_generic(header, headerBufferSize, normalizedCounter, maxSymbolValue, tableLog, 1);
491
+ size_t FSE_FUNCTION_NAME(FSE_count, FSE_FUNCTION_EXTENSION)
492
+ (unsigned* count, unsigned* maxSymbolValuePtr, const FSE_FUNCTION_TYPE* source, size_t sourceSize)
493
+ {
494
+ if ((sizeof(FSE_FUNCTION_TYPE)==1) && (*maxSymbolValuePtr >= 255))
495
+ {
496
+ *maxSymbolValuePtr = 255;
497
+ return FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) (count, maxSymbolValuePtr, source, sourceSize, 0);
498
+ }
499
+ return FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) (count, maxSymbolValuePtr, source, sourceSize, 1);
411
500
  }
412
501
 
413
502
 
414
- size_t FSE_readHeader (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
415
- const void* headerBuffer, size_t hbSize)
503
+ static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; }
504
+
505
+ size_t FSE_FUNCTION_NAME(FSE_buildCTable, FSE_FUNCTION_EXTENSION)
506
+ (FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
416
507
  {
417
- const BYTE* const istart = (const BYTE*) headerBuffer;
418
- const BYTE* ip = istart;
419
- int nbBits;
420
- int remaining;
508
+ const unsigned tableSize = 1 << tableLog;
509
+ const unsigned tableMask = tableSize - 1;
510
+ U16* tableU16 = ( (U16*) ct) + 2;
511
+ FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) (((U32*)ct) + 1 + (tableLog ? tableSize>>1 : 1) );
512
+ const unsigned step = FSE_tableStep(tableSize);
513
+ unsigned cumul[FSE_MAX_SYMBOL_VALUE+2];
514
+ U32 position = 0;
515
+ FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* init not necessary, but analyzer complain about it */
516
+ U32 highThreshold = tableSize-1;
517
+ unsigned symbol;
518
+ unsigned i;
519
+
520
+ /* header */
521
+ tableU16[-2] = (U16) tableLog;
522
+ tableU16[-1] = (U16) maxSymbolValue;
523
+
524
+ /* For explanations on how to distribute symbol values over the table :
525
+ * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
526
+
527
+ /* symbol start positions */
528
+ cumul[0] = 0;
529
+ for (i=1; i<=maxSymbolValue+1; i++)
530
+ {
531
+ if (normalizedCounter[i-1]==-1) /* Low prob symbol */
532
+ {
533
+ cumul[i] = cumul[i-1] + 1;
534
+ tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(i-1);
535
+ }
536
+ else
537
+ cumul[i] = cumul[i-1] + normalizedCounter[i-1];
538
+ }
539
+ cumul[maxSymbolValue+1] = tableSize+1;
540
+
541
+ /* Spread symbols */
542
+ for (symbol=0; symbol<=maxSymbolValue; symbol++)
543
+ {
544
+ int nbOccurences;
545
+ for (nbOccurences=0; nbOccurences<normalizedCounter[symbol]; nbOccurences++)
546
+ {
547
+ tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
548
+ position = (position + step) & tableMask;
549
+ while (position > highThreshold) position = (position + step) & tableMask; /* Lowprob area */
550
+ }
551
+ }
552
+
553
+ if (position!=0) return (size_t)-FSE_ERROR_GENERIC; /* Must have gone through all positions */
554
+
555
+ /* Build table */
556
+ for (i=0; i<tableSize; i++)
557
+ {
558
+ FSE_FUNCTION_TYPE s = tableSymbol[i]; /* static analyzer doesn't understand tableSymbol is properly initialized */
559
+ tableU16[cumul[s]++] = (U16) (tableSize+i); /* TableU16 : sorted by symbol order; gives next state value */
560
+ }
561
+
562
+ /* Build Symbol Transformation Table */
563
+ {
564
+ unsigned s;
565
+ unsigned total = 0;
566
+ for (s=0; s<=maxSymbolValue; s++)
567
+ {
568
+ switch (normalizedCounter[s])
569
+ {
570
+ case 0:
571
+ break;
572
+ case -1:
573
+ case 1:
574
+ symbolTT[s].deltaNbBits = tableLog << 16;
575
+ symbolTT[s].deltaFindState = total - 1;
576
+ total ++;
577
+ break;
578
+ default :
579
+ {
580
+ U32 maxBitsOut = tableLog - FSE_highbit32 (normalizedCounter[s]-1);
581
+ U32 minStatePlus = normalizedCounter[s] << maxBitsOut;
582
+ symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
583
+ symbolTT[s].deltaFindState = total - normalizedCounter[s];
584
+ total += normalizedCounter[s];
585
+ }
586
+ }
587
+ }
588
+ }
589
+
590
+ return 0;
591
+ }
592
+
593
+
594
+ #define FSE_DECODE_TYPE FSE_TYPE_NAME(FSE_decode_t, FSE_FUNCTION_EXTENSION)
595
+
596
+ FSE_DTable* FSE_FUNCTION_NAME(FSE_createDTable, FSE_FUNCTION_EXTENSION) (unsigned tableLog)
597
+ {
598
+ if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
599
+ return (FSE_DTable*)malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
600
+ }
601
+
602
+ void FSE_FUNCTION_NAME(FSE_freeDTable, FSE_FUNCTION_EXTENSION) (FSE_DTable* dt)
603
+ {
604
+ free(dt);
605
+ }
606
+
607
+ typedef struct {
608
+ U16 tableLog;
609
+ U16 fastMode;
610
+ } FSE_DTableHeader; /* sizeof U32 */
611
+
612
+ size_t FSE_FUNCTION_NAME(FSE_buildDTable, FSE_FUNCTION_EXTENSION)
613
+ (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
614
+ {
615
+ FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)dt;
616
+ FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (dt+1); /* because dt is unsigned, 32-bits aligned on 32-bits */
617
+ const U32 tableSize = 1 << tableLog;
618
+ const U32 tableMask = tableSize-1;
619
+ const U32 step = FSE_tableStep(tableSize);
620
+ U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
621
+ U32 position = 0;
622
+ U32 highThreshold = tableSize-1;
623
+ const S16 largeLimit= (S16)(1 << (tableLog-1));
624
+ U32 noLarge = 1;
625
+ U32 s;
626
+
627
+ /* Sanity Checks */
628
+ if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_maxSymbolValue_tooLarge;
629
+ if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_tableLog_tooLarge;
630
+
631
+ /* Init, lay down lowprob symbols */
632
+ DTableH[0].tableLog = (U16)tableLog;
633
+ for (s=0; s<=maxSymbolValue; s++)
634
+ {
635
+ if (normalizedCounter[s]==-1)
636
+ {
637
+ tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
638
+ symbolNext[s] = 1;
639
+ }
640
+ else
641
+ {
642
+ if (normalizedCounter[s] >= largeLimit) noLarge=0;
643
+ symbolNext[s] = normalizedCounter[s];
644
+ }
645
+ }
646
+
647
+ /* Spread symbols */
648
+ for (s=0; s<=maxSymbolValue; s++)
649
+ {
650
+ int i;
651
+ for (i=0; i<normalizedCounter[s]; i++)
652
+ {
653
+ tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
654
+ position = (position + step) & tableMask;
655
+ while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
656
+ }
657
+ }
658
+
659
+ if (position!=0) return (size_t)-FSE_ERROR_GENERIC; /* position must reach all cells once, otherwise normalizedCounter is incorrect */
660
+
661
+ /* Build Decoding table */
662
+ {
663
+ U32 i;
664
+ for (i=0; i<tableSize; i++)
665
+ {
666
+ FSE_FUNCTION_TYPE symbol = (FSE_FUNCTION_TYPE)(tableDecode[i].symbol);
667
+ U16 nextState = symbolNext[symbol]++;
668
+ tableDecode[i].nbBits = (BYTE) (tableLog - FSE_highbit32 ((U32)nextState) );
669
+ tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize);
670
+ }
671
+ }
672
+
673
+ DTableH->fastMode = (U16)noLarge;
674
+ return 0;
675
+ }
676
+
677
+
678
+ /******************************************
679
+ * FSE byte symbol
680
+ ******************************************/
681
+ #ifndef FSE_COMMONDEFS_ONLY
682
+
683
+ unsigned FSE_isError(size_t code) { return (code > (size_t)(-FSE_ERROR_maxCode)); }
684
+
685
+ #define FSE_GENERATE_STRING(STRING) #STRING,
686
+ static const char* FSE_errorStrings[] = { FSE_LIST_ERRORS(FSE_GENERATE_STRING) };
687
+
688
+ const char* FSE_getErrorName(size_t code)
689
+ {
690
+ static const char* codeError = "Unspecified error code";
691
+ if (FSE_isError(code)) return FSE_errorStrings[-(int)(code)];
692
+ return codeError;
693
+ }
694
+
695
+ static short FSE_abs(short a)
696
+ {
697
+ return a<0? -a : a;
698
+ }
699
+
700
+
701
+ /****************************************************************
702
+ * Header bitstream management
703
+ ****************************************************************/
704
+ size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
705
+ {
706
+ size_t maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3;
707
+ return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
708
+ }
709
+
710
+ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
711
+ const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
712
+ unsigned writeIsSafe)
713
+ {
714
+ BYTE* const ostart = (BYTE*) header;
715
+ BYTE* out = ostart;
716
+ BYTE* const oend = ostart + headerBufferSize;
717
+ int nbBits;
718
+ const int tableSize = 1 << tableLog;
719
+ int remaining;
421
720
  int threshold;
422
721
  U32 bitStream;
423
722
  int bitCount;
424
723
  unsigned charnum = 0;
425
724
  int previous0 = 0;
426
725
 
726
+ bitStream = 0;
727
+ bitCount = 0;
728
+ /* Table Size */
729
+ bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount;
730
+ bitCount += 4;
731
+
732
+ /* Init */
733
+ remaining = tableSize+1; /* +1 for extra accuracy */
734
+ threshold = tableSize;
735
+ nbBits = tableLog+1;
736
+
737
+ while (remaining>1) /* stops at 1 */
738
+ {
739
+ if (previous0)
740
+ {
741
+ unsigned start = charnum;
742
+ while (!normalizedCounter[charnum]) charnum++;
743
+ while (charnum >= start+24)
744
+ {
745
+ start+=24;
746
+ bitStream += 0xFFFFU << bitCount;
747
+ if ((!writeIsSafe) && (out > oend-2)) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* Buffer overflow */
748
+ out[0] = (BYTE) bitStream;
749
+ out[1] = (BYTE)(bitStream>>8);
750
+ out+=2;
751
+ bitStream>>=16;
752
+ }
753
+ while (charnum >= start+3)
754
+ {
755
+ start+=3;
756
+ bitStream += 3 << bitCount;
757
+ bitCount += 2;
758
+ }
759
+ bitStream += (charnum-start) << bitCount;
760
+ bitCount += 2;
761
+ if (bitCount>16)
762
+ {
763
+ if ((!writeIsSafe) && (out > oend - 2)) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* Buffer overflow */
764
+ out[0] = (BYTE)bitStream;
765
+ out[1] = (BYTE)(bitStream>>8);
766
+ out += 2;
767
+ bitStream >>= 16;
768
+ bitCount -= 16;
769
+ }
770
+ }
771
+ {
772
+ short count = normalizedCounter[charnum++];
773
+ const short max = (short)((2*threshold-1)-remaining);
774
+ remaining -= FSE_abs(count);
775
+ if (remaining<1) return (size_t)-FSE_ERROR_GENERIC;
776
+ count++; /* +1 for extra accuracy */
777
+ if (count>=threshold) count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
778
+ bitStream += count << bitCount;
779
+ bitCount += nbBits;
780
+ bitCount -= (count<max);
781
+ previous0 = (count==1);
782
+ while (remaining<threshold) nbBits--, threshold>>=1;
783
+ }
784
+ if (bitCount>16)
785
+ {
786
+ if ((!writeIsSafe) && (out > oend - 2)) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* Buffer overflow */
787
+ out[0] = (BYTE)bitStream;
788
+ out[1] = (BYTE)(bitStream>>8);
789
+ out += 2;
790
+ bitStream >>= 16;
791
+ bitCount -= 16;
792
+ }
793
+ }
794
+
795
+ /* flush remaining bitStream */
796
+ if ((!writeIsSafe) && (out > oend - 2)) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* Buffer overflow */
797
+ out[0] = (BYTE)bitStream;
798
+ out[1] = (BYTE)(bitStream>>8);
799
+ out+= (bitCount+7) /8;
800
+
801
+ if (charnum > maxSymbolValue + 1) return (size_t)-FSE_ERROR_GENERIC;
802
+
803
+ return (out-ostart);
804
+ }
805
+
806
+
807
+ size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
808
+ {
809
+ if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_GENERIC; /* Unsupported */
810
+ if (tableLog < FSE_MIN_TABLELOG) return (size_t)-FSE_ERROR_GENERIC; /* Unsupported */
811
+
812
+ if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
813
+ return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
814
+
815
+ return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1);
816
+ }
817
+
818
+
819
+ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
820
+ const void* headerBuffer, size_t hbSize)
821
+ {
822
+ const BYTE* const istart = (const BYTE*) headerBuffer;
823
+ const BYTE* const iend = istart + hbSize;
824
+ const BYTE* ip = istart;
825
+ int nbBits;
826
+ int remaining;
827
+ int threshold;
828
+ U32 bitStream;
829
+ int bitCount;
830
+ unsigned charnum = 0;
831
+ int previous0 = 0;
832
+
833
+ if (hbSize < 4) return (size_t)-FSE_ERROR_srcSize_wrong;
427
834
  bitStream = FSE_readLE32(ip);
428
835
  nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */
429
836
  if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return (size_t)-FSE_ERROR_tableLog_tooLarge;
@@ -442,8 +849,16 @@ size_t FSE_readHeader (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
442
849
  while ((bitStream & 0xFFFF) == 0xFFFF)
443
850
  {
444
851
  n0+=24;
445
- ip+=2;
446
- bitStream = FSE_readLE32(ip) >> bitCount;
852
+ if (ip < iend-5)
853
+ {
854
+ ip+=2;
855
+ bitStream = FSE_readLE32(ip) >> bitCount;
856
+ }
857
+ else
858
+ {
859
+ bitStream >>= 16;
860
+ bitCount+=16;
861
+ }
447
862
  }
448
863
  while ((bitStream & 3) == 3)
449
864
  {
@@ -453,11 +868,16 @@ size_t FSE_readHeader (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
453
868
  }
454
869
  n0 += bitStream & 3;
455
870
  bitCount += 2;
456
- if (n0 > *maxSVPtr) return (size_t)-FSE_ERROR_GENERIC;
871
+ if (n0 > *maxSVPtr) return (size_t)-FSE_ERROR_maxSymbolValue_tooSmall;
457
872
  while (charnum < n0) normalizedCounter[charnum++] = 0;
458
- ip += bitCount>>3;
459
- bitCount &= 7;
460
- bitStream = FSE_readLE32(ip) >> bitCount;
873
+ if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
874
+ {
875
+ ip += bitCount>>3;
876
+ bitCount &= 7;
877
+ bitStream = FSE_readLE32(ip) >> bitCount;
878
+ }
879
+ else
880
+ bitStream >>= 2;
461
881
  }
462
882
  {
463
883
  const short max = (short)((2*threshold-1)-remaining);
@@ -485,16 +905,26 @@ size_t FSE_readHeader (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
485
905
  threshold >>= 1;
486
906
  }
487
907
 
488
- ip += bitCount>>3;
489
- bitCount &= 7;
490
- bitStream = FSE_readLE32(ip) >> bitCount;
908
+ {
909
+ if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
910
+ {
911
+ ip += bitCount>>3;
912
+ bitCount &= 7;
913
+ }
914
+ else
915
+ {
916
+ bitCount -= (int)(8 * (iend - 4 - ip));
917
+ ip = iend - 4;
918
+ }
919
+ bitStream = FSE_readLE32(ip) >> (bitCount & 31);
920
+ }
491
921
  }
492
922
  }
493
923
  if (remaining != 1) return (size_t)-FSE_ERROR_GENERIC;
494
924
  *maxSVPtr = charnum-1;
495
925
 
496
- ip += bitCount>0;
497
- if ((size_t)(ip-istart) >= hbSize) return (size_t)-FSE_ERROR_srcSize_wrong; /* arguably a bit late , tbd */
926
+ ip += (bitCount+7)>>3;
927
+ if ((size_t)(ip-istart) > hbSize) return (size_t)-FSE_ERROR_srcSize_wrong;
498
928
  return ip-istart;
499
929
  }
500
930
 
@@ -503,7 +933,7 @@ size_t FSE_readHeader (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
503
933
  * FSE Compression Code
504
934
  ****************************************************************/
505
935
  /*
506
- CTable is a variable size structure which contains :
936
+ FSE_CTable[0] is a variable size structure which contains :
507
937
  U16 tableLog;
508
938
  U16 maxSymbolValue;
509
939
  U16 nextStateNumber[1 << tableLog]; // This size is variable
@@ -520,82 +950,129 @@ size_t FSE_sizeof_CTable (unsigned maxSymbolValue, unsigned tableLog)
520
950
  return size;
521
951
  }
522
952
 
523
- void* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
953
+ FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
524
954
  {
525
955
  size_t size;
526
956
  if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
527
957
  size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
528
- return malloc(size);
958
+ return (FSE_CTable*)malloc(size);
529
959
  }
530
960
 
531
- void FSE_freeCTable (void* CTable)
961
+ void FSE_freeCTable (FSE_CTable* ct)
532
962
  {
533
- free(CTable);
963
+ free(ct);
534
964
  }
535
965
 
536
966
 
967
+ /* provides the minimum logSize to safely represent a distribution */
968
+ static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
969
+ {
970
+ U32 minBitsSrc = FSE_highbit32((U32)(srcSize - 1)) + 1;
971
+ U32 minBitsSymbols = FSE_highbit32(maxSymbolValue) + 2;
972
+ U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
973
+ return minBits;
974
+ }
975
+
537
976
  unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
538
977
  {
978
+ U32 maxBitsSrc = FSE_highbit32((U32)(srcSize - 1)) - 2;
539
979
  U32 tableLog = maxTableLog;
980
+ U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
540
981
  if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
541
- if ((FSE_highbit32((U32)(srcSize - 1)) - 2) < tableLog) tableLog = FSE_highbit32((U32)(srcSize - 1)) - 2; /* Accuracy can be reduced */
542
- if ((FSE_highbit32(maxSymbolValue+1)+1) > tableLog) tableLog = FSE_highbit32(maxSymbolValue+1)+1; /* Need a minimum to safely represent all symbol values */
982
+ if (maxBitsSrc < tableLog) tableLog = maxBitsSrc; /* Accuracy can be reduced */
983
+ if (minBits > tableLog) tableLog = minBits; /* Need a minimum to safely represent all symbol values */
543
984
  if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG;
544
985
  if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG;
545
986
  return tableLog;
546
987
  }
547
988
 
548
989
 
549
- typedef struct
550
- {
551
- U32 id;
552
- U32 count;
553
- } rank_t;
554
-
555
- int FSE_compareRankT(const void* r1, const void* r2)
556
- {
557
- const rank_t* R1 = (const rank_t*)r1;
558
- const rank_t* R2 = (const rank_t*)r2;
559
-
560
- return 2 * (R1->count < R2->count) - 1;
561
- }
990
+ /* Secondary normalization method.
991
+ To be used when primary method fails. */
562
992
 
563
- static size_t FSE_adjustNormSlow(short* norm, int pointsToRemove, const unsigned* count, U32 maxSymbolValue)
993
+ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue)
564
994
  {
565
- rank_t rank[FSE_MAX_SYMBOL_VALUE+2];
566
995
  U32 s;
996
+ U32 distributed = 0;
997
+ U32 ToDistribute;
567
998
 
568
999
  /* Init */
1000
+ U32 lowThreshold = (U32)(total >> tableLog);
1001
+ U32 lowOne = (U32)((total * 3) >> (tableLog + 1));
1002
+
569
1003
  for (s=0; s<=maxSymbolValue; s++)
570
1004
  {
571
- rank[s].id = s;
572
- rank[s].count = count[s];
573
- if (norm[s] <= 1) rank[s].count = 0;
1005
+ if (count[s] == 0)
1006
+ {
1007
+ norm[s]=0;
1008
+ continue;
1009
+ }
1010
+ if (count[s] <= lowThreshold)
1011
+ {
1012
+ norm[s] = -1;
1013
+ distributed++;
1014
+ total -= count[s];
1015
+ continue;
1016
+ }
1017
+ if (count[s] <= lowOne)
1018
+ {
1019
+ norm[s] = 1;
1020
+ distributed++;
1021
+ total -= count[s];
1022
+ continue;
1023
+ }
1024
+ norm[s]=-2;
574
1025
  }
575
- rank[maxSymbolValue+1].id = 0;
576
- rank[maxSymbolValue+1].count = 0; /* ensures comparison ends here in worst case */
1026
+ ToDistribute = (1 << tableLog) - distributed;
577
1027
 
578
- /* Sort according to count */
579
- qsort(rank, maxSymbolValue+1, sizeof(rank_t), FSE_compareRankT);
1028
+ if ((total / ToDistribute) > lowOne)
1029
+ {
1030
+ /* risk of rounding to zero */
1031
+ lowOne = (U32)((total * 3) / (ToDistribute * 2));
1032
+ for (s=0; s<=maxSymbolValue; s++)
1033
+ {
1034
+ if ((norm[s] == -2) && (count[s] <= lowOne))
1035
+ {
1036
+ norm[s] = 1;
1037
+ distributed++;
1038
+ total -= count[s];
1039
+ continue;
1040
+ }
1041
+ }
1042
+ ToDistribute = (1 << tableLog) - distributed;
1043
+ }
1044
+
1045
+ if (distributed == maxSymbolValue+1)
1046
+ {
1047
+ /* all values are pretty poor;
1048
+ probably incompressible data (should have already been detected);
1049
+ find max, then give all remaining points to max */
1050
+ U32 maxV = 0, maxC =0;
1051
+ for (s=0; s<=maxSymbolValue; s++)
1052
+ if (count[s] > maxC) maxV=s, maxC=count[s];
1053
+ norm[maxV] += (short)ToDistribute;
1054
+ return 0;
1055
+ }
580
1056
 
581
- while(pointsToRemove)
582
1057
  {
583
- int newRank = 1;
584
- rank_t savedR;
585
- if (norm[rank[0].id] == 1)
586
- return (size_t)-FSE_ERROR_GENERIC;
587
- norm[rank[0].id]--;
588
- pointsToRemove--;
589
- rank[0].count -= (rank[0].count + 6) >> 3;
590
- if (norm[rank[0].id] == 1)
591
- rank[0].count=0;
592
- savedR = rank[0];
593
- while (rank[newRank].count > savedR.count)
1058
+ U64 const vStepLog = 62 - tableLog;
1059
+ U64 const mid = (1ULL << (vStepLog-1)) - 1;
1060
+ U64 const rStep = ((((U64)1<<vStepLog) * ToDistribute) + mid) / total; /* scale on remaining */
1061
+ U64 tmpTotal = mid;
1062
+ for (s=0; s<=maxSymbolValue; s++)
594
1063
  {
595
- rank[newRank-1] = rank[newRank];
596
- newRank++;
1064
+ if (norm[s]==-2)
1065
+ {
1066
+ U64 end = tmpTotal + (count[s] * rStep);
1067
+ U32 sStart = (U32)(tmpTotal >> vStepLog);
1068
+ U32 sEnd = (U32)(end >> vStepLog);
1069
+ U32 weight = sEnd - sStart;
1070
+ if (weight < 1)
1071
+ return (size_t)-FSE_ERROR_GENERIC;
1072
+ norm[s] = (short)weight;
1073
+ tmpTotal = end;
1074
+ }
597
1075
  }
598
- rank[newRank-1] = savedR;
599
1076
  }
600
1077
 
601
1078
  return 0;
@@ -610,7 +1087,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
610
1087
  if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
611
1088
  if (tableLog < FSE_MIN_TABLELOG) return (size_t)-FSE_ERROR_GENERIC; /* Unsupported size */
612
1089
  if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_GENERIC; /* Unsupported size */
613
- if ((1U<<tableLog) <= maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; /* Too small tableLog, compression potentially impossible */
1090
+ if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return (size_t)-FSE_ERROR_GENERIC; /* Too small tableLog, compression potentially impossible */
614
1091
 
615
1092
  {
616
1093
  U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
@@ -655,10 +1132,9 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
655
1132
  }
656
1133
  if (-stillToDistribute >= (normalizedCounter[largest] >> 1))
657
1134
  {
658
- /* corner case, need to converge towards normalization with caution */
659
- size_t errorCode = FSE_adjustNormSlow(normalizedCounter, -stillToDistribute, count, maxSymbolValue);
1135
+ /* corner case, need another normalization method */
1136
+ size_t errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue);
660
1137
  if (FSE_isError(errorCode)) return errorCode;
661
- //FSE_adjustNormSlow(normalizedCounter, -stillToDistribute, count, maxSymbolValue);
662
1138
  }
663
1139
  else normalizedCounter[largest] += (short)stillToDistribute;
664
1140
  }
@@ -681,19 +1157,18 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
681
1157
  }
682
1158
 
683
1159
 
684
- /* fake CTable, for raw (uncompressed) input */
685
- size_t FSE_buildCTable_raw (void* CTable, unsigned nbBits)
1160
+ /* fake FSE_CTable, for raw (uncompressed) input */
1161
+ size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits)
686
1162
  {
687
1163
  const unsigned tableSize = 1 << nbBits;
688
1164
  const unsigned tableMask = tableSize - 1;
689
1165
  const unsigned maxSymbolValue = tableMask;
690
- U16* tableU16 = ( (U16*) CTable) + 2;
691
- FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) ((((U32*)CTable)+1) + (tableSize>>1));
1166
+ U16* tableU16 = ( (U16*) ct) + 2;
1167
+ FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) ((((U32*)ct)+1) + (tableSize>>1));
692
1168
  unsigned s;
693
1169
 
694
1170
  /* Sanity checks */
695
1171
  if (nbBits < 1) return (size_t)-FSE_ERROR_GENERIC; /* min size */
696
- if (((size_t)CTable) & 3) return (size_t)-FSE_ERROR_GENERIC; /* Must be allocated of 4 bytes boundaries */
697
1172
 
698
1173
  /* header */
699
1174
  tableU16[-2] = (U16) nbBits;
@@ -706,24 +1181,19 @@ size_t FSE_buildCTable_raw (void* CTable, unsigned nbBits)
706
1181
  /* Build Symbol Transformation Table */
707
1182
  for (s=0; s<=maxSymbolValue; s++)
708
1183
  {
709
- symbolTT[s].minBitsOut = (BYTE)nbBits;
1184
+ symbolTT[s].deltaNbBits = nbBits << 16;
710
1185
  symbolTT[s].deltaFindState = s-1;
711
- symbolTT[s].maxState = (U16)( (tableSize*2) - 1); /* ensures state <= maxState */
712
1186
  }
713
1187
 
714
1188
  return 0;
715
1189
  }
716
1190
 
717
1191
 
718
- /* fake CTable, for rle (100% always same symbol) input */
719
- size_t FSE_buildCTable_rle (void* CTable, BYTE symbolValue)
1192
+ /* fake FSE_CTable, for rle (100% always same symbol) input */
1193
+ size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)
720
1194
  {
721
- const unsigned tableSize = 1;
722
- U16* tableU16 = ( (U16*) CTable) + 2;
723
- FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) ((U32*)CTable + 2);
724
-
725
- /* safety checks */
726
- if (((size_t)CTable) & 3) return (size_t)-FSE_ERROR_GENERIC; /* Must be 4 bytes aligned */
1195
+ U16* tableU16 = ( (U16*) ct) + 2;
1196
+ FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) ((U32*)ct + 2);
727
1197
 
728
1198
  /* header */
729
1199
  tableU16[-2] = (U16) 0;
@@ -735,32 +1205,40 @@ size_t FSE_buildCTable_rle (void* CTable, BYTE symbolValue)
735
1205
 
736
1206
  /* Build Symbol Transformation Table */
737
1207
  {
738
- symbolTT[symbolValue].minBitsOut = 0;
1208
+ symbolTT[symbolValue].deltaNbBits = 0;
739
1209
  symbolTT[symbolValue].deltaFindState = 0;
740
- symbolTT[symbolValue].maxState = (U16)(2*tableSize-1); /* ensures state <= maxState */
741
1210
  }
742
1211
 
743
1212
  return 0;
744
1213
  }
745
1214
 
746
1215
 
747
- void FSE_initCStream(FSE_CStream_t* bitC, void* start)
1216
+ size_t FSE_initCStream(FSE_CStream_t* bitC, void* start, size_t maxSize)
748
1217
  {
1218
+ if (maxSize < sizeof(bitC->ptr)) return (size_t)-FSE_ERROR_dstSize_tooSmall;
749
1219
  bitC->bitContainer = 0;
750
- bitC->bitPos = 0; /* reserved for unusedBits */
1220
+ bitC->bitPos = 0;
751
1221
  bitC->startPtr = (char*)start;
752
1222
  bitC->ptr = bitC->startPtr;
1223
+ bitC->endPtr = bitC->startPtr + maxSize - sizeof(bitC->ptr);
1224
+ return 0;
753
1225
  }
754
1226
 
755
- void FSE_initCState(FSE_CState_t* statePtr, const void* CTable)
1227
+ void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct)
756
1228
  {
757
- const U32 tableLog = ( (U16*) CTable) [0];
1229
+ const U32 tableLog = ( (const U16*) ct) [0];
758
1230
  statePtr->value = (ptrdiff_t)1<<tableLog;
759
- statePtr->stateTable = ((const U16*) CTable) + 2;
760
- statePtr->symbolTT = (const U32*)CTable + 1 + (tableLog ? (1<<(tableLog-1)) : 1);
1231
+ statePtr->stateTable = ((const U16*) ct) + 2;
1232
+ statePtr->symbolTT = (const FSE_symbolCompressionTransform*)((const U32*)ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1));
761
1233
  statePtr->stateLog = tableLog;
762
1234
  }
763
1235
 
1236
+ void FSE_addBitsFast(FSE_CStream_t* bitC, size_t value, unsigned nbBits) /* only use if upper bits are clean 0 */
1237
+ {
1238
+ bitC->bitContainer |= value << bitC->bitPos;
1239
+ bitC->bitPos += nbBits;
1240
+ }
1241
+
764
1242
  void FSE_addBits(FSE_CStream_t* bitC, size_t value, unsigned nbBits)
765
1243
  {
766
1244
  static const unsigned mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, 0xFFFFFF, 0x1FFFFFF }; /* up to 25 bits */
@@ -768,22 +1246,31 @@ void FSE_addBits(FSE_CStream_t* bitC, size_t value, unsigned nbBits)
768
1246
  bitC->bitPos += nbBits;
769
1247
  }
770
1248
 
771
- void FSE_encodeByte(FSE_CStream_t* bitC, FSE_CState_t* statePtr, BYTE symbol)
1249
+ void FSE_encodeSymbol(FSE_CStream_t* bitC, FSE_CState_t* statePtr, U32 symbol)
772
1250
  {
773
- const FSE_symbolCompressionTransform* const symbolTT = (const FSE_symbolCompressionTransform*) statePtr->symbolTT;
774
- const U16* const stateTable = (const U16*) statePtr->stateTable;
775
- int nbBitsOut = symbolTT[symbol].minBitsOut;
776
- nbBitsOut -= (int)((symbolTT[symbol].maxState - statePtr->value) >> 31);
1251
+ const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
1252
+ const U16* const stateTable = (const U16*)(statePtr->stateTable);
1253
+ U32 nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16);
777
1254
  FSE_addBits(bitC, statePtr->value, nbBitsOut);
778
- statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT[symbol].deltaFindState];
1255
+ statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
779
1256
  }
780
1257
 
781
- void FSE_flushBits(FSE_CStream_t* bitC)
1258
+ void FSE_flushBitsFast(FSE_CStream_t* bitC) /* only if dst buffer is large enough ( >= FSE_compressBound()) */
782
1259
  {
783
1260
  size_t nbBytes = bitC->bitPos >> 3;
784
1261
  FSE_writeLEST(bitC->ptr, bitC->bitContainer);
1262
+ bitC->ptr += nbBytes;
785
1263
  bitC->bitPos &= 7;
1264
+ bitC->bitContainer >>= nbBytes*8;
1265
+ }
1266
+
1267
+ void FSE_flushBits(FSE_CStream_t* bitC)
1268
+ {
1269
+ size_t nbBytes = bitC->bitPos >> 3;
1270
+ FSE_writeLEST(bitC->ptr, bitC->bitContainer);
786
1271
  bitC->ptr += nbBytes;
1272
+ if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
1273
+ bitC->bitPos &= 7;
787
1274
  bitC->bitContainer >>= nbBytes*8;
788
1275
  }
789
1276
 
@@ -798,9 +1285,12 @@ size_t FSE_closeCStream(FSE_CStream_t* bitC)
798
1285
  {
799
1286
  char* endPtr;
800
1287
 
801
- FSE_addBits(bitC, 1, 1);
1288
+ FSE_addBitsFast(bitC, 1, 1);
802
1289
  FSE_flushBits(bitC);
803
1290
 
1291
+ if (bitC->ptr >= bitC->endPtr) /* too close to buffer's end */
1292
+ return 0; /* not compressible */
1293
+
804
1294
  endPtr = bitC->ptr;
805
1295
  endPtr += bitC->bitPos > 0;
806
1296
 
@@ -808,58 +1298,61 @@ size_t FSE_closeCStream(FSE_CStream_t* bitC)
808
1298
  }
809
1299
 
810
1300
 
811
- size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
1301
+ static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize,
812
1302
  const void* src, size_t srcSize,
813
- const void* CTable)
1303
+ const FSE_CTable* ct, const unsigned fast)
814
1304
  {
815
1305
  const BYTE* const istart = (const BYTE*) src;
816
1306
  const BYTE* ip;
817
1307
  const BYTE* const iend = istart + srcSize;
818
1308
 
1309
+ size_t errorCode;
819
1310
  FSE_CStream_t bitC;
820
1311
  FSE_CState_t CState1, CState2;
821
1312
 
822
1313
 
823
1314
  /* init */
824
- (void)dstSize; /* objective : ensure it fits into dstBuffer (Todo) */
825
- FSE_initCStream(&bitC, dst);
826
- FSE_initCState(&CState1, CTable);
1315
+ errorCode = FSE_initCStream(&bitC, dst, dstSize);
1316
+ if (FSE_isError(errorCode)) return 0;
1317
+ FSE_initCState(&CState1, ct);
827
1318
  CState2 = CState1;
828
1319
 
829
1320
  ip=iend;
830
1321
 
1322
+ #define FSE_FLUSHBITS(s) (fast ? FSE_flushBitsFast(s) : FSE_flushBits(s))
1323
+
831
1324
  /* join to even */
832
1325
  if (srcSize & 1)
833
1326
  {
834
- FSE_encodeByte(&bitC, &CState1, *--ip);
835
- FSE_flushBits(&bitC);
1327
+ FSE_encodeSymbol(&bitC, &CState1, *--ip);
1328
+ FSE_FLUSHBITS(&bitC);
836
1329
  }
837
1330
 
838
1331
  /* join to mod 4 */
839
- if ((sizeof(size_t)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) /* test bit 2 */
1332
+ if ((sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) /* test bit 2 */
840
1333
  {
841
- FSE_encodeByte(&bitC, &CState2, *--ip);
842
- FSE_encodeByte(&bitC, &CState1, *--ip);
843
- FSE_flushBits(&bitC);
1334
+ FSE_encodeSymbol(&bitC, &CState2, *--ip);
1335
+ FSE_encodeSymbol(&bitC, &CState1, *--ip);
1336
+ FSE_FLUSHBITS(&bitC);
844
1337
  }
845
1338
 
846
1339
  /* 2 or 4 encoding per loop */
847
- while (ip>istart)
1340
+ for ( ; ip>istart ; )
848
1341
  {
849
- FSE_encodeByte(&bitC, &CState2, *--ip);
1342
+ FSE_encodeSymbol(&bitC, &CState2, *--ip);
850
1343
 
851
- if (sizeof(size_t)*8 < FSE_MAX_TABLELOG*2+7 ) /* this test must be static */
852
- FSE_flushBits(&bitC);
1344
+ if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 ) /* this test must be static */
1345
+ FSE_FLUSHBITS(&bitC);
853
1346
 
854
- FSE_encodeByte(&bitC, &CState1, *--ip);
1347
+ FSE_encodeSymbol(&bitC, &CState1, *--ip);
855
1348
 
856
- if (sizeof(size_t)*8 > FSE_MAX_TABLELOG*4+7 ) /* this test must be static */
1349
+ if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) /* this test must be static */
857
1350
  {
858
- FSE_encodeByte(&bitC, &CState2, *--ip);
859
- FSE_encodeByte(&bitC, &CState1, *--ip);
1351
+ FSE_encodeSymbol(&bitC, &CState2, *--ip);
1352
+ FSE_encodeSymbol(&bitC, &CState1, *--ip);
860
1353
  }
861
1354
 
862
- FSE_flushBits(&bitC);
1355
+ FSE_FLUSHBITS(&bitC);
863
1356
  }
864
1357
 
865
1358
  FSE_flushCState(&bitC, &CState2);
@@ -867,15 +1360,20 @@ size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
867
1360
  return FSE_closeCStream(&bitC);
868
1361
  }
869
1362
 
870
-
871
- static size_t FSE_compressRLE (BYTE *out, BYTE symbol)
1363
+ size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
1364
+ const void* src, size_t srcSize,
1365
+ const FSE_CTable* ct)
872
1366
  {
873
- *out=symbol;
874
- return 1;
1367
+ const unsigned fast = (dstSize >= FSE_BLOCKBOUND(srcSize));
1368
+
1369
+ if (fast)
1370
+ return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1);
1371
+ else
1372
+ return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0);
875
1373
  }
876
1374
 
877
- size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
878
1375
 
1376
+ size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
879
1377
 
880
1378
  size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
881
1379
  {
@@ -888,34 +1386,36 @@ size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize
888
1386
 
889
1387
  U32 count[FSE_MAX_SYMBOL_VALUE+1];
890
1388
  S16 norm[FSE_MAX_SYMBOL_VALUE+1];
891
- CTable_max_t CTable;
1389
+ CTable_max_t ct;
892
1390
  size_t errorCode;
893
1391
 
894
- /* early out */
895
- if (dstSize < FSE_compressBound(srcSize)) return (size_t)-FSE_ERROR_dstSize_tooSmall;
896
- if (srcSize <= 1) return srcSize; /* Uncompressed or RLE */
1392
+ /* init conditions */
1393
+ if (srcSize <= 1) return 0; /* Uncompressible */
897
1394
  if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
898
1395
  if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
899
1396
 
900
1397
  /* Scan input and build symbol stats */
901
- errorCode = FSE_count (count, ip, srcSize, &maxSymbolValue);
1398
+ errorCode = FSE_count (count, &maxSymbolValue, ip, srcSize);
902
1399
  if (FSE_isError(errorCode)) return errorCode;
903
- if (errorCode == srcSize) return FSE_compressRLE (ostart, *istart);
904
- if (errorCode < ((srcSize * 7) >> 10)) return 0; /* Heuristic : not compressible enough */
1400
+ if (errorCode == srcSize) return 1;
1401
+ if (errorCode == 1) return 0; /* each symbol only present once */
1402
+ if (errorCode < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */
905
1403
 
906
1404
  tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
907
1405
  errorCode = FSE_normalizeCount (norm, tableLog, count, srcSize, maxSymbolValue);
908
1406
  if (FSE_isError(errorCode)) return errorCode;
909
1407
 
910
1408
  /* Write table description header */
911
- errorCode = FSE_writeHeader (op, FSE_MAX_HEADERSIZE, norm, maxSymbolValue, tableLog);
1409
+ errorCode = FSE_writeNCount (op, oend-op, norm, maxSymbolValue, tableLog);
912
1410
  if (FSE_isError(errorCode)) return errorCode;
913
1411
  op += errorCode;
914
1412
 
915
1413
  /* Compress */
916
- errorCode = FSE_buildCTable (&CTable, norm, maxSymbolValue, tableLog);
1414
+ errorCode = FSE_buildCTable (ct, norm, maxSymbolValue, tableLog);
917
1415
  if (FSE_isError(errorCode)) return errorCode;
918
- op += FSE_compress_usingCTable(op, oend - op, ip, srcSize, &CTable);
1416
+ errorCode = FSE_compress_usingCTable(op, oend - op, ip, srcSize, ct);
1417
+ if (errorCode == 0) return 0; /* not enough space for compressed data */
1418
+ op += errorCode;
919
1419
 
920
1420
  /* check compressibility */
921
1421
  if ( (size_t)(op-ostart) >= srcSize-1 )
@@ -924,7 +1424,6 @@ size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize
924
1424
  return op-ostart;
925
1425
  }
926
1426
 
927
-
928
1427
  size_t FSE_compress (void* dst, size_t dstSize, const void* src, size_t srcSize)
929
1428
  {
930
1429
  return FSE_compress2(dst, dstSize, src, (U32)srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
@@ -934,32 +1433,13 @@ size_t FSE_compress (void* dst, size_t dstSize, const void* src, size_t srcSize)
934
1433
  /*********************************************************
935
1434
  * Decompression (Byte symbols)
936
1435
  *********************************************************/
937
- typedef struct
938
- {
939
- U16 newState;
940
- BYTE symbol;
941
- BYTE nbBits;
942
- } FSE_decode_t; /* size == U32 */
943
-
944
- /* Specific corner case : RLE compression */
945
- size_t FSE_decompressRLE(void* dst, size_t originalSize,
946
- const void* cSrc, size_t cSrcSize)
1436
+ size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
947
1437
  {
948
- if (cSrcSize != 1) return (size_t)-FSE_ERROR_srcSize_wrong;
949
- memset(dst, *(BYTE*)cSrc, originalSize);
950
- return originalSize;
951
- }
1438
+ FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)dt;
1439
+ FSE_decode_t* const cell = (FSE_decode_t*)(dt + 1); /* because dt is unsigned */
952
1440
 
953
-
954
- size_t FSE_buildDTable_rle (void* DTable, BYTE symbolValue)
955
- {
956
- U32* const base32 = (U32*)DTable;
957
- FSE_decode_t* const cell = (FSE_decode_t*)(base32 + 1);
958
-
959
- /* Sanity check */
960
- if (((size_t)DTable) & 3) return (size_t)-FSE_ERROR_GENERIC; /* Must be allocated of 4 bytes boundaries */
961
-
962
- base32[0] = 0;
1441
+ DTableH->tableLog = 0;
1442
+ DTableH->fastMode = 0;
963
1443
 
964
1444
  cell->newState = 0;
965
1445
  cell->symbol = symbolValue;
@@ -969,10 +1449,10 @@ size_t FSE_buildDTable_rle (void* DTable, BYTE symbolValue)
969
1449
  }
970
1450
 
971
1451
 
972
- size_t FSE_buildDTable_raw (void* DTable, unsigned nbBits)
1452
+ size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
973
1453
  {
974
- U32* const base32 = (U32*)DTable;
975
- FSE_decode_t* dinfo = (FSE_decode_t*)(base32 + 1);
1454
+ FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)dt;
1455
+ FSE_decode_t* const dinfo = (FSE_decode_t*)(dt + 1); /* because dt is unsigned */
976
1456
  const unsigned tableSize = 1 << nbBits;
977
1457
  const unsigned tableMask = tableSize - 1;
978
1458
  const unsigned maxSymbolValue = tableMask;
@@ -980,10 +1460,10 @@ size_t FSE_buildDTable_raw (void* DTable, unsigned nbBits)
980
1460
 
981
1461
  /* Sanity checks */
982
1462
  if (nbBits < 1) return (size_t)-FSE_ERROR_GENERIC; /* min size */
983
- if (((size_t)DTable) & 3) return (size_t)-FSE_ERROR_GENERIC; /* Must be allocated of 4 bytes boundaries */
984
1463
 
985
1464
  /* Build Decoding Table */
986
- base32[0] = nbBits;
1465
+ DTableH->tableLog = (U16)nbBits;
1466
+ DTableH->fastMode = 1;
987
1467
  for (s=0; s<=maxSymbolValue; s++)
988
1468
  {
989
1469
  dinfo[s].newState = 0;
@@ -1005,95 +1485,127 @@ size_t FSE_initDStream(FSE_DStream_t* bitD, const void* srcBuffer, size_t srcSiz
1005
1485
  {
1006
1486
  if (srcSize < 1) return (size_t)-FSE_ERROR_srcSize_wrong;
1007
1487
 
1008
- if (srcSize >= sizeof(bitD_t))
1488
+ if (srcSize >= sizeof(size_t))
1009
1489
  {
1010
1490
  U32 contain32;
1011
- bitD->start = (char*)srcBuffer;
1012
- bitD->ptr = (char*)srcBuffer + srcSize - sizeof(bitD_t);
1491
+ bitD->start = (const char*)srcBuffer;
1492
+ bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(size_t);
1013
1493
  bitD->bitContainer = FSE_readLEST(bitD->ptr);
1014
- contain32 = ((BYTE*)srcBuffer)[srcSize-1];
1494
+ contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
1015
1495
  if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC; /* stop bit not present */
1016
1496
  bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
1017
1497
  }
1018
1498
  else
1019
1499
  {
1020
1500
  U32 contain32;
1021
- bitD->start = (char*)srcBuffer;
1501
+ bitD->start = (const char*)srcBuffer;
1022
1502
  bitD->ptr = bitD->start;
1023
- bitD->bitContainer = *(BYTE*)(bitD->start);
1503
+ bitD->bitContainer = *(const BYTE*)(bitD->start);
1024
1504
  switch(srcSize)
1025
1505
  {
1026
- case 7: bitD->bitContainer += (bitD_t)(((BYTE*)(bitD->start))[6]) << (sizeof(bitD_t)*8 - 16);
1027
- case 6: bitD->bitContainer += (bitD_t)(((BYTE*)(bitD->start))[5]) << (sizeof(bitD_t)*8 - 24);
1028
- case 5: bitD->bitContainer += (bitD_t)(((BYTE*)(bitD->start))[4]) << (sizeof(bitD_t)*8 - 32);
1029
- case 4: bitD->bitContainer += (bitD_t)(((BYTE*)(bitD->start))[3]) << 24;
1030
- case 3: bitD->bitContainer += (bitD_t)(((BYTE*)(bitD->start))[2]) << 16;
1031
- case 2: bitD->bitContainer += (bitD_t)(((BYTE*)(bitD->start))[1]) << 8;
1506
+ case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16);
1507
+ case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24);
1508
+ case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32);
1509
+ case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24;
1510
+ case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16;
1511
+ case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) << 8;
1032
1512
  default:;
1033
1513
  }
1034
- contain32 = ((BYTE*)srcBuffer)[srcSize-1];
1514
+ contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
1035
1515
  if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC; /* stop bit not present */
1036
1516
  bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
1037
- bitD->bitsConsumed += (U32)(sizeof(bitD_t) - srcSize)*8;
1517
+ bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8;
1038
1518
  }
1039
1519
 
1040
1520
  return srcSize;
1041
1521
  }
1042
1522
 
1043
1523
 
1524
+ /* FSE_lookBits
1525
+ * Provides next n bits from the bitContainer.
1526
+ * bitContainer is not modified (bits are still present for next read/look)
1527
+ * On 32-bits, maxNbBits==25
1528
+ * On 64-bits, maxNbBits==57
1529
+ * return : value extracted.
1530
+ */
1531
+ static size_t FSE_lookBits(FSE_DStream_t* bitD, U32 nbBits)
1532
+ {
1533
+ const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
1534
+ return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
1535
+ }
1536
+
1537
+ static size_t FSE_lookBitsFast(FSE_DStream_t* bitD, U32 nbBits) /* only if nbBits >= 1 !! */
1538
+ {
1539
+ const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
1540
+ return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
1541
+ }
1542
+
1543
+ static void FSE_skipBits(FSE_DStream_t* bitD, U32 nbBits)
1544
+ {
1545
+ bitD->bitsConsumed += nbBits;
1546
+ }
1547
+
1548
+
1044
1549
  /* FSE_readBits
1045
1550
  * Read next n bits from the bitContainer.
1046
- * Use the fast variant *only* if n > 0.
1047
- * Note : for this function to work properly on 32-bits, don't read more than maxNbBits==25
1551
+ * On 32-bits, don't read more than maxNbBits==25
1552
+ * On 64-bits, don't read more than maxNbBits==57
1553
+ * Use the fast variant *only* if n >= 1.
1048
1554
  * return : value extracted.
1049
1555
  */
1050
- bitD_t FSE_readBits(FSE_DStream_t* bitD, U32 nbBits)
1556
+ size_t FSE_readBits(FSE_DStream_t* bitD, U32 nbBits)
1051
1557
  {
1052
- bitD_t value = ((bitD->bitContainer << bitD->bitsConsumed) >> 1) >> (((sizeof(bitD_t)*8)-1)-nbBits);
1053
- bitD->bitsConsumed += nbBits;
1558
+ size_t value = FSE_lookBits(bitD, nbBits);
1559
+ FSE_skipBits(bitD, nbBits);
1054
1560
  return value;
1055
1561
  }
1056
1562
 
1057
- bitD_t FSE_readBitsFast(FSE_DStream_t* bitD, U32 nbBits) /* only if nbBits >= 1 */
1563
+ size_t FSE_readBitsFast(FSE_DStream_t* bitD, U32 nbBits) /* only if nbBits >= 1 !! */
1058
1564
  {
1059
- bitD_t value = (bitD->bitContainer << bitD->bitsConsumed) >> ((sizeof(bitD_t)*8)-nbBits);
1060
- bitD->bitsConsumed += nbBits;
1565
+ size_t value = FSE_lookBitsFast(bitD, nbBits);
1566
+ FSE_skipBits(bitD, nbBits);
1061
1567
  return value;
1062
1568
  }
1063
1569
 
1064
1570
  unsigned FSE_reloadDStream(FSE_DStream_t* bitD)
1065
1571
  {
1066
- if (bitD->ptr >= bitD->start + sizeof(bitD_t))
1572
+ if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should never happen */
1573
+ return FSE_DStream_tooFar;
1574
+
1575
+ if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer))
1067
1576
  {
1068
1577
  bitD->ptr -= bitD->bitsConsumed >> 3;
1069
1578
  bitD->bitsConsumed &= 7;
1070
1579
  bitD->bitContainer = FSE_readLEST(bitD->ptr);
1071
- return 0;
1580
+ return FSE_DStream_unfinished;
1072
1581
  }
1073
1582
  if (bitD->ptr == bitD->start)
1074
1583
  {
1075
- if (bitD->bitsConsumed < sizeof(bitD_t)*8) return 1;
1076
- if (bitD->bitsConsumed == sizeof(bitD_t)*8) return 2;
1077
- return 3;
1584
+ if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return FSE_DStream_endOfBuffer;
1585
+ return FSE_DStream_completed;
1078
1586
  }
1079
1587
  {
1080
1588
  U32 nbBytes = bitD->bitsConsumed >> 3;
1589
+ U32 result = FSE_DStream_unfinished;
1081
1590
  if (bitD->ptr - nbBytes < bitD->start)
1082
- nbBytes = (U32)(bitD->ptr - bitD->start); /* note : necessarily ptr > start */
1591
+ {
1592
+ nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */
1593
+ result = FSE_DStream_endOfBuffer;
1594
+ }
1083
1595
  bitD->ptr -= nbBytes;
1084
1596
  bitD->bitsConsumed -= nbBytes*8;
1085
- bitD->bitContainer = FSE_readLEST(bitD->ptr); /* note : necessarily srcSize > sizeof(bitD) */
1086
- return (bitD->ptr == bitD->start);
1597
+ bitD->bitContainer = FSE_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD) */
1598
+ return result;
1087
1599
  }
1088
1600
  }
1089
1601
 
1090
1602
 
1091
- void FSE_initDState(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD, const void* DTable)
1603
+ void FSE_initDState(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD, const FSE_DTable* dt)
1092
1604
  {
1093
- const U32* const base32 = (const U32*)DTable;
1094
- DStatePtr->state = FSE_readBits(bitD, base32[0]);
1605
+ const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)dt;
1606
+ DStatePtr->state = FSE_readBits(bitD, DTableH->tableLog);
1095
1607
  FSE_reloadDStream(bitD);
1096
- DStatePtr->table = base32 + 1;
1608
+ DStatePtr->table = dt + 1;
1097
1609
  }
1098
1610
 
1099
1611
  BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
@@ -1101,7 +1613,7 @@ BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
1101
1613
  const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
1102
1614
  const U32 nbBits = DInfo.nbBits;
1103
1615
  BYTE symbol = DInfo.symbol;
1104
- bitD_t lowBits = FSE_readBits(bitD, nbBits);
1616
+ size_t lowBits = FSE_readBits(bitD, nbBits);
1105
1617
 
1106
1618
  DStatePtr->state = DInfo.newState + lowBits;
1107
1619
  return symbol;
@@ -1112,7 +1624,7 @@ BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
1112
1624
  const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
1113
1625
  const U32 nbBits = DInfo.nbBits;
1114
1626
  BYTE symbol = DInfo.symbol;
1115
- bitD_t lowBits = FSE_readBitsFast(bitD, nbBits);
1627
+ size_t lowBits = FSE_readBitsFast(bitD, nbBits);
1116
1628
 
1117
1629
  DStatePtr->state = DInfo.newState + lowBits;
1118
1630
  return symbol;
@@ -1123,19 +1635,19 @@ BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
1123
1635
 
1124
1636
  unsigned FSE_endOfDStream(const FSE_DStream_t* bitD)
1125
1637
  {
1126
- return FSE_reloadDStream((FSE_DStream_t*)bitD)==2;
1638
+ return ((bitD->ptr == bitD->start) && (bitD->bitsConsumed == sizeof(bitD->bitContainer)*8));
1127
1639
  }
1128
1640
 
1129
- unsigned FSE_endOfDState(const FSE_DState_t* statePtr)
1641
+ unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
1130
1642
  {
1131
- return statePtr->state == 0;
1643
+ return DStatePtr->state == 0;
1132
1644
  }
1133
1645
 
1134
1646
 
1135
1647
  FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
1136
1648
  void* dst, size_t maxDstSize,
1137
1649
  const void* cSrc, size_t cSrcSize,
1138
- const void* DTable, unsigned fast)
1650
+ const FSE_DTable* dt, const unsigned fast)
1139
1651
  {
1140
1652
  BYTE* const ostart = (BYTE*) dst;
1141
1653
  BYTE* op = ostart;
@@ -1143,50 +1655,57 @@ FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
1143
1655
  BYTE* const olimit = omax-3;
1144
1656
 
1145
1657
  FSE_DStream_t bitD;
1146
- FSE_DState_t state1, state2;
1658
+ FSE_DState_t state1;
1659
+ FSE_DState_t state2;
1147
1660
  size_t errorCode;
1148
1661
 
1149
1662
  /* Init */
1150
1663
  errorCode = FSE_initDStream(&bitD, cSrc, cSrcSize); /* replaced last arg by maxCompressed Size */
1151
1664
  if (FSE_isError(errorCode)) return errorCode;
1152
1665
 
1153
- FSE_initDState(&state1, &bitD, DTable);
1154
- FSE_initDState(&state2, &bitD, DTable);
1666
+ FSE_initDState(&state1, &bitD, dt);
1667
+ FSE_initDState(&state2, &bitD, dt);
1155
1668
 
1669
+ #define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
1156
1670
 
1157
- /* 2 symbols per loop */
1158
- while (!FSE_reloadDStream(&bitD) && (op<olimit))
1671
+ /* 4 symbols per loop */
1672
+ for ( ; (FSE_reloadDStream(&bitD)==FSE_DStream_unfinished) && (op<olimit) ; op+=4)
1159
1673
  {
1160
- *op++ = fast ? FSE_decodeSymbolFast(&state1, &bitD) : FSE_decodeSymbol(&state1, &bitD);
1674
+ op[0] = FSE_GETSYMBOL(&state1);
1161
1675
 
1162
- if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD_t)*8) /* This test must be static */
1676
+ if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */
1163
1677
  FSE_reloadDStream(&bitD);
1164
1678
 
1165
- *op++ = fast ? FSE_decodeSymbolFast(&state2, &bitD) : FSE_decodeSymbol(&state2, &bitD);
1679
+ op[1] = FSE_GETSYMBOL(&state2);
1166
1680
 
1167
- if (FSE_MAX_TABLELOG*4+7 < sizeof(bitD_t)*8) /* This test must be static */
1168
- {
1169
- *op++ = fast ? FSE_decodeSymbolFast(&state1, &bitD) : FSE_decodeSymbol(&state1, &bitD);
1170
- *op++ = fast ? FSE_decodeSymbolFast(&state2, &bitD) : FSE_decodeSymbol(&state2, &bitD);
1171
- }
1681
+ if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */
1682
+ { if (FSE_reloadDStream(&bitD) > FSE_DStream_unfinished) { op+=2; break; } }
1683
+
1684
+ op[2] = FSE_GETSYMBOL(&state1);
1685
+
1686
+ if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */
1687
+ FSE_reloadDStream(&bitD);
1688
+
1689
+ op[3] = FSE_GETSYMBOL(&state2);
1172
1690
  }
1173
1691
 
1174
1692
  /* tail */
1693
+ /* note : FSE_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly FSE_DStream_completed */
1175
1694
  while (1)
1176
1695
  {
1177
- if ( (FSE_reloadDStream(&bitD)>2) || (op==omax) || (FSE_endOfDState(&state1) && FSE_endOfDStream(&bitD)) )
1696
+ if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) )
1178
1697
  break;
1179
1698
 
1180
- *op++ = fast ? FSE_decodeSymbolFast(&state1, &bitD) : FSE_decodeSymbol(&state1, &bitD);
1699
+ *op++ = FSE_GETSYMBOL(&state1);
1181
1700
 
1182
- if ( (FSE_reloadDStream(&bitD)>2) || (op==omax) || (FSE_endOfDState(&state2) && FSE_endOfDStream(&bitD)) )
1701
+ if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) )
1183
1702
  break;
1184
1703
 
1185
- *op++ = fast ? FSE_decodeSymbolFast(&state2, &bitD) : FSE_decodeSymbol(&state2, &bitD);
1704
+ *op++ = FSE_GETSYMBOL(&state2);
1186
1705
  }
1187
1706
 
1188
1707
  /* end ? */
1189
- if (FSE_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2) )
1708
+ if (FSE_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2))
1190
1709
  return op-ostart;
1191
1710
 
1192
1711
  if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */
@@ -1197,11 +1716,14 @@ FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
1197
1716
 
1198
1717
  size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
1199
1718
  const void* cSrc, size_t cSrcSize,
1200
- const void* DTable, size_t fastMode)
1719
+ const FSE_DTable* dt)
1201
1720
  {
1721
+ const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)dt;
1722
+ const U32 fastMode = DTableH->fastMode;
1723
+
1202
1724
  /* select fast mode (static) */
1203
- if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, DTable, 1);
1204
- return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, DTable, 0);
1725
+ if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
1726
+ return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
1205
1727
  }
1206
1728
 
1207
1729
 
@@ -1210,312 +1732,735 @@ size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSr
1210
1732
  const BYTE* const istart = (const BYTE*)cSrc;
1211
1733
  const BYTE* ip = istart;
1212
1734
  short counting[FSE_MAX_SYMBOL_VALUE+1];
1213
- FSE_decode_t DTable[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
1214
- unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
1735
+ DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */
1215
1736
  unsigned tableLog;
1216
- size_t errorCode, fastMode;
1737
+ unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
1738
+ size_t errorCode;
1217
1739
 
1218
1740
  if (cSrcSize<2) return (size_t)-FSE_ERROR_srcSize_wrong; /* too small input size */
1219
1741
 
1220
1742
  /* normal FSE decoding mode */
1221
- errorCode = FSE_readHeader (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
1743
+ errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
1222
1744
  if (FSE_isError(errorCode)) return errorCode;
1223
1745
  if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; /* too small input size */
1224
1746
  ip += errorCode;
1225
1747
  cSrcSize -= errorCode;
1226
1748
 
1227
- fastMode = FSE_buildDTable (DTable, counting, maxSymbolValue, tableLog);
1228
- if (FSE_isError(fastMode)) return fastMode;
1749
+ errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog);
1750
+ if (FSE_isError(errorCode)) return errorCode;
1229
1751
 
1230
1752
  /* always return, even if it is an error code */
1231
- return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, DTable, fastMode);
1753
+ return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt);
1232
1754
  }
1233
1755
 
1234
1756
 
1235
- #endif /* FSE_COMMONDEFS_ONLY */
1236
1757
 
1237
- /*
1238
- 2nd part of the file
1239
- designed to be included
1240
- for type-specific functions (template equivalent in C)
1241
- Objective is to write such functions only once, for better maintenance
1242
- */
1243
-
1244
- /* safety checks */
1245
- #ifndef FSE_FUNCTION_EXTENSION
1246
- # error "FSE_FUNCTION_EXTENSION must be defined"
1247
- #endif
1248
- #ifndef FSE_FUNCTION_TYPE
1249
- # error "FSE_FUNCTION_TYPE must be defined"
1758
+ /*********************************************************
1759
+ * Huff0 : Huffman block compression
1760
+ *********************************************************/
1761
+ #define HUF_MAX_SYMBOL_VALUE 255
1762
+ #define HUF_DEFAULT_TABLELOG 12 /* used by default, when not specified */
1763
+ #define HUF_MAX_TABLELOG 12 /* max possible tableLog; for allocation purpose; can be modified */
1764
+ #define HUF_ABSOLUTEMAX_TABLELOG 16 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
1765
+ #if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG)
1766
+ # error "HUF_MAX_TABLELOG is too large !"
1250
1767
  #endif
1251
1768
 
1252
- /* Function names */
1253
- #define FSE_CAT(X,Y) X##Y
1254
- #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
1255
- #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
1769
+ typedef struct HUF_CElt_s {
1770
+ U16 val;
1771
+ BYTE nbBits;
1772
+ } HUF_CElt ;
1256
1773
 
1774
+ typedef struct nodeElt_s {
1775
+ U32 count;
1776
+ U16 parent;
1777
+ BYTE byte;
1778
+ BYTE nbBits;
1779
+ } nodeElt;
1257
1780
 
1258
- /* Function templates */
1259
- size_t FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) (unsigned* count, const FSE_FUNCTION_TYPE* source, size_t sourceSize, unsigned* maxSymbolValuePtr, unsigned safe)
1781
+ /* HUF_writeCTable() :
1782
+ return : size of saved CTable */
1783
+ size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* tree, U32 maxSymbolValue, U32 huffLog)
1260
1784
  {
1261
- const FSE_FUNCTION_TYPE* ip = source;
1262
- const FSE_FUNCTION_TYPE* const iend = ip+sourceSize;
1263
- unsigned maxSymbolValue = *maxSymbolValuePtr;
1264
- unsigned max=0;
1265
- int s;
1785
+ BYTE bitsToWeight[HUF_ABSOLUTEMAX_TABLELOG + 1];
1786
+ BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1];
1787
+ U32 n;
1788
+ BYTE* op = (BYTE*)dst;
1789
+ size_t size;
1266
1790
 
1267
- U32 Counting1[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
1268
- U32 Counting2[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
1269
- U32 Counting3[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
1270
- U32 Counting4[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
1791
+ /* check conditions */
1792
+ if (maxSymbolValue > HUF_MAX_SYMBOL_VALUE + 1)
1793
+ return (size_t)-FSE_ERROR_GENERIC;
1794
+
1795
+ /* convert to weight */
1796
+ bitsToWeight[0] = 0;
1797
+ for (n=1; n<=huffLog; n++)
1798
+ bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
1799
+ for (n=0; n<maxSymbolValue; n++)
1800
+ huffWeight[n] = bitsToWeight[tree[n].nbBits];
1801
+
1802
+ size = FSE_compress(op+1, maxDstSize-1, huffWeight, maxSymbolValue); /* don't need last symbol stat : implied */
1803
+ if (FSE_isError(size)) return size;
1804
+ if (size >= 128) return (size_t)-FSE_ERROR_GENERIC; /* should never happen, since maxSymbolValue <= 255 */
1805
+ if ((size <= 1) || (size >= maxSymbolValue/2))
1806
+ {
1807
+ if (size==1) /* RLE */
1808
+ {
1809
+ /* only possible case : serie of 1 (because there are at least 2) */
1810
+ /* can only be 2^n or (2^n-1), otherwise not an huffman tree */
1811
+ BYTE code;
1812
+ switch(maxSymbolValue)
1813
+ {
1814
+ case 1: code = 0; break;
1815
+ case 2: code = 1; break;
1816
+ case 3: code = 2; break;
1817
+ case 4: code = 3; break;
1818
+ case 7: code = 4; break;
1819
+ case 8: code = 5; break;
1820
+ case 15: code = 6; break;
1821
+ case 16: code = 7; break;
1822
+ case 31: code = 8; break;
1823
+ case 32: code = 9; break;
1824
+ case 63: code = 10; break;
1825
+ case 64: code = 11; break;
1826
+ case 127: code = 12; break;
1827
+ case 128: code = 13; break;
1828
+ default : return (size_t)-FSE_ERROR_corruptionDetected;
1829
+ }
1830
+ op[0] = (BYTE)(255-13 + code);
1831
+ return 1;
1832
+ }
1833
+ /* Not compressible */
1834
+ if (maxSymbolValue > (241-128)) return (size_t)-FSE_ERROR_GENERIC; /* not implemented (not possible with current format) */
1835
+ if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* not enough space within dst buffer */
1836
+ op[0] = (BYTE)(128 /*special case*/ + 0 /* Not Compressible */ + (maxSymbolValue-1));
1837
+ huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause issue in final combination */
1838
+ for (n=0; n<maxSymbolValue; n+=2)
1839
+ op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
1840
+ return ((maxSymbolValue+1)/2) + 1;
1841
+ }
1842
+
1843
+ /* normal header case */
1844
+ op[0] = (BYTE)size;
1845
+ return size+1;
1846
+ }
1847
+
1848
+
1849
+ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
1850
+ {
1851
+ int totalCost = 0;
1852
+ const U32 largestBits = huffNode[lastNonNull].nbBits;
1853
+
1854
+ /* early exit : all is fine */
1855
+ if (largestBits <= maxNbBits) return largestBits;
1856
+
1857
+ // now we have a few too large elements (at least >= 2)
1858
+ {
1859
+ const U32 baseCost = 1 << (largestBits - maxNbBits);
1860
+ U32 n = lastNonNull;
1861
+
1862
+ while (huffNode[n].nbBits > maxNbBits)
1863
+ {
1864
+ totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
1865
+ huffNode[n].nbBits = (BYTE)maxNbBits;
1866
+ n --;
1867
+ }
1868
+
1869
+ /* renorm totalCost */
1870
+ totalCost >>= (largestBits - maxNbBits); /* note : totalCost necessarily multiple of baseCost */
1871
+
1872
+ // repay cost
1873
+ while (huffNode[n].nbBits == maxNbBits) n--; // n at last of rank (maxNbBits-1)
1874
+
1875
+ {
1876
+ const U32 noOne = 0xF0F0F0F0;
1877
+ // Get pos of last (smallest) symbol per rank
1878
+ U32 rankLast[HUF_MAX_TABLELOG];
1879
+ U32 currentNbBits = maxNbBits;
1880
+ int pos;
1881
+ memset(rankLast, 0xF0, sizeof(rankLast));
1882
+ for (pos=n ; pos >= 0; pos--)
1883
+ {
1884
+ if (huffNode[pos].nbBits >= currentNbBits) continue;
1885
+ currentNbBits = huffNode[pos].nbBits;
1886
+ rankLast[maxNbBits-currentNbBits] = pos;
1887
+ }
1888
+
1889
+ while (totalCost > 0)
1890
+ {
1891
+ U32 nBitsToDecrease = FSE_highbit32(totalCost) + 1;
1892
+ for ( ; nBitsToDecrease > 1; nBitsToDecrease--)
1893
+ {
1894
+ U32 highPos = rankLast[nBitsToDecrease];
1895
+ U32 lowPos = rankLast[nBitsToDecrease-1];
1896
+ if (highPos == noOne) continue;
1897
+ if (lowPos == noOne) break;
1898
+ {
1899
+ U32 highTotal = huffNode[highPos].count;
1900
+ U32 lowTotal = 2 * huffNode[lowPos].count;
1901
+ if (highTotal <= lowTotal) break;
1902
+ }
1903
+ }
1904
+ while (rankLast[nBitsToDecrease] == noOne)
1905
+ nBitsToDecrease ++; // In some rare cases, no more rank 1 left => overshoot to closest
1906
+ totalCost -= 1 << (nBitsToDecrease-1);
1907
+ if (rankLast[nBitsToDecrease-1] == noOne)
1908
+ rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; // now there is one elt
1909
+ huffNode[rankLast[nBitsToDecrease]].nbBits ++;
1910
+ if (rankLast[nBitsToDecrease] == 0)
1911
+ rankLast[nBitsToDecrease] = noOne;
1912
+ else
1913
+ {
1914
+ rankLast[nBitsToDecrease]--;
1915
+ if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease)
1916
+ rankLast[nBitsToDecrease] = noOne; // rank list emptied
1917
+ }
1918
+ }
1919
+
1920
+ while (totalCost < 0) /* Sometimes, cost correction overshoot */
1921
+ {
1922
+ if (rankLast[1] == noOne) /* special case, no weight 1, let's find it back at n */
1923
+ {
1924
+ while (huffNode[n].nbBits == maxNbBits) n--;
1925
+ huffNode[n+1].nbBits--;
1926
+ rankLast[1] = n+1;
1927
+ totalCost++;
1928
+ continue;
1929
+ }
1930
+ huffNode[ rankLast[1] + 1 ].nbBits--;
1931
+ rankLast[1]++;
1932
+ totalCost ++;
1933
+ }
1934
+ }
1935
+ }
1936
+
1937
+ return maxNbBits;
1938
+ }
1939
+
1940
+
1941
+ typedef struct {
1942
+ U32 base;
1943
+ U32 current;
1944
+ } rankPos;
1945
+
1946
+ static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue)
1947
+ {
1948
+ rankPos rank[32];
1949
+ U32 n;
1271
1950
 
1272
- /* safety checks */
1273
- if (!sourceSize)
1951
+ memset(rank, 0, sizeof(rank));
1952
+ for (n=0; n<=maxSymbolValue; n++)
1274
1953
  {
1275
- memset(count, 0, (maxSymbolValue + 1) * sizeof(FSE_FUNCTION_TYPE));
1276
- *maxSymbolValuePtr = 0;
1277
- return 0;
1954
+ U32 r = FSE_highbit32(count[n] + 1);
1955
+ rank[r].base ++;
1278
1956
  }
1279
- if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_GENERIC; /* maxSymbolValue too large : unsupported */
1280
- if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE; /* 0 == default */
1281
-
1282
- if ((safe) || (sizeof(FSE_FUNCTION_TYPE)>1))
1957
+ for (n=30; n>0; n--) rank[n-1].base += rank[n].base;
1958
+ for (n=0; n<32; n++) rank[n].current = rank[n].base;
1959
+ for (n=0; n<=maxSymbolValue; n++)
1283
1960
  {
1284
- /* check input values, to avoid count table overflow */
1285
- while (ip < iend-3)
1286
- {
1287
- if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting1[*ip++]++;
1288
- if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting2[*ip++]++;
1289
- if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting3[*ip++]++;
1290
- if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting4[*ip++]++;
1291
- }
1961
+ U32 c = count[n];
1962
+ U32 r = FSE_highbit32(c+1) + 1;
1963
+ U32 pos = rank[r].current++;
1964
+ while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) huffNode[pos]=huffNode[pos-1], pos--;
1965
+ huffNode[pos].count = c;
1966
+ huffNode[pos].byte = (BYTE)n;
1292
1967
  }
1293
- else
1968
+ }
1969
+
1970
+
1971
+ #define STARTNODE (HUF_MAX_SYMBOL_VALUE+1)
1972
+ size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits)
1973
+ {
1974
+ nodeElt huffNode0[2*HUF_MAX_SYMBOL_VALUE+1 +1];
1975
+ nodeElt* huffNode = huffNode0 + 1;
1976
+ U32 n, nonNullRank;
1977
+ int lowS, lowN;
1978
+ U16 nodeNb = STARTNODE;
1979
+ U32 nodeRoot;
1980
+
1981
+ /* safety checks */
1982
+ if (maxNbBits == 0) maxNbBits = HUF_DEFAULT_TABLELOG;
1983
+ if (maxSymbolValue > HUF_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_GENERIC;
1984
+ memset(huffNode0, 0, sizeof(huffNode0));
1985
+
1986
+ // sort, decreasing order
1987
+ HUF_sort(huffNode, count, maxSymbolValue);
1988
+
1989
+ // init for parents
1990
+ nonNullRank = maxSymbolValue;
1991
+ while(huffNode[nonNullRank].count == 0) nonNullRank--;
1992
+ lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb;
1993
+ huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count;
1994
+ huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb;
1995
+ nodeNb++; lowS-=2;
1996
+ for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30);
1997
+ huffNode0[0].count = (U32)(1U<<31);
1998
+
1999
+ // create parents
2000
+ while (nodeNb <= nodeRoot)
1294
2001
  {
1295
- U32 cached = FSE_read32(ip); ip += 4;
1296
- while (ip < iend-15)
1297
- {
1298
- U32 c = cached; cached = FSE_read32(ip); ip += 4;
1299
- Counting1[(BYTE) c ]++;
1300
- Counting2[(BYTE)(c>>8) ]++;
1301
- Counting3[(BYTE)(c>>16)]++;
1302
- Counting4[ c>>24 ]++;
1303
- c = cached; cached = FSE_read32(ip); ip += 4;
1304
- Counting1[(BYTE) c ]++;
1305
- Counting2[(BYTE)(c>>8) ]++;
1306
- Counting3[(BYTE)(c>>16)]++;
1307
- Counting4[ c>>24 ]++;
1308
- c = cached; cached = FSE_read32(ip); ip += 4;
1309
- Counting1[(BYTE) c ]++;
1310
- Counting2[(BYTE)(c>>8) ]++;
1311
- Counting3[(BYTE)(c>>16)]++;
1312
- Counting4[ c>>24 ]++;
1313
- c = cached; cached = FSE_read32(ip); ip += 4;
1314
- Counting1[(BYTE) c ]++;
1315
- Counting2[(BYTE)(c>>8) ]++;
1316
- Counting3[(BYTE)(c>>16)]++;
1317
- Counting4[ c>>24 ]++;
1318
- }
1319
- ip-=4;
2002
+ U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
2003
+ U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
2004
+ huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count;
2005
+ huffNode[n1].parent = huffNode[n2].parent = nodeNb;
2006
+ nodeNb++;
1320
2007
  }
1321
2008
 
1322
- /* finish last symbols */
1323
- while (ip<iend) { if ((safe) && (*ip>maxSymbolValue)) return (size_t)-FSE_ERROR_GENERIC; Counting1[*ip++]++; }
2009
+ // distribute weights (unlimited tree height)
2010
+ huffNode[nodeRoot].nbBits = 0;
2011
+ for (n=nodeRoot-1; n>=STARTNODE; n--)
2012
+ huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
2013
+ for (n=0; n<=nonNullRank; n++)
2014
+ huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
1324
2015
 
1325
- for (s=0; s<=(int)maxSymbolValue; s++)
2016
+ // enforce maxTableLog
2017
+ maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits);
2018
+
2019
+ // fill result into tree (val, nbBits)
1326
2020
  {
1327
- count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
1328
- if (count[s] > max) max = count[s];
2021
+ U16 nbPerRank[HUF_ABSOLUTEMAX_TABLELOG+1] = {0};
2022
+ U16 valPerRank[HUF_ABSOLUTEMAX_TABLELOG+1];
2023
+ if (maxNbBits > HUF_ABSOLUTEMAX_TABLELOG) return (size_t)-FSE_ERROR_GENERIC; // check
2024
+ for (n=0; n<=nonNullRank; n++)
2025
+ nbPerRank[huffNode[n].nbBits]++;
2026
+ {
2027
+ // determine stating value per rank
2028
+ U16 min = 0;
2029
+ for (n=maxNbBits; n>0; n--)
2030
+ {
2031
+ valPerRank[n] = min; // get starting value within each rank
2032
+ min += nbPerRank[n];
2033
+ min >>= 1;
2034
+ }
2035
+ }
2036
+ for (n=0; n<=maxSymbolValue; n++)
2037
+ tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; // push nbBits per symbol, symbol order
2038
+ for (n=0; n<=maxSymbolValue; n++)
2039
+ tree[n].val = valPerRank[tree[n].nbBits]++; // assign value within rank, symbol order
1329
2040
  }
1330
2041
 
1331
- while (!count[maxSymbolValue]) maxSymbolValue--;
1332
- *maxSymbolValuePtr = maxSymbolValue;
1333
- return (int)max;
2042
+ return maxNbBits;
1334
2043
  }
1335
2044
 
1336
- /* hidden fast variant (unsafe) */
1337
- size_t FSE_FUNCTION_NAME(FSE_countFast, FSE_FUNCTION_EXTENSION) (unsigned* count, const FSE_FUNCTION_TYPE* source, size_t sourceSize, unsigned* maxSymbolValuePtr)
2045
+ static void HUF_encodeSymbol(FSE_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
1338
2046
  {
1339
- return FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) (count, source, sourceSize, maxSymbolValuePtr, 0);
2047
+ FSE_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
1340
2048
  }
1341
2049
 
1342
- size_t FSE_FUNCTION_NAME(FSE_count, FSE_FUNCTION_EXTENSION) (unsigned* count, const FSE_FUNCTION_TYPE* source, size_t sourceSize, unsigned* maxSymbolValuePtr)
2050
+ #define FSE_FLUSHBITS_1(stream) \
2051
+ if (sizeof((stream)->bitContainer)*8 < HUF_MAX_TABLELOG*2+7) FSE_FLUSHBITS(stream)
2052
+
2053
+ #define FSE_FLUSHBITS_2(stream) \
2054
+ if (sizeof((stream)->bitContainer)*8 < HUF_MAX_TABLELOG*4+7) FSE_FLUSHBITS(stream)
2055
+
2056
+ size_t HUF_compress_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, HUF_CElt* CTable)
1343
2057
  {
1344
- if ((sizeof(FSE_FUNCTION_TYPE)==1) && (*maxSymbolValuePtr >= 255))
2058
+ const BYTE* ip = (const BYTE*) src;
2059
+ BYTE* const ostart = (BYTE*)dst;
2060
+ BYTE* op = (BYTE*) ostart;
2061
+ BYTE* const oend = ostart + dstSize;
2062
+ U16* jumpTable = (U16*) dst;
2063
+ size_t n, streamSize;
2064
+ const unsigned fast = (dstSize >= HUF_BLOCKBOUND(srcSize));
2065
+ size_t errorCode;
2066
+ FSE_CStream_t bitC;
2067
+
2068
+ /* init */
2069
+ if (dstSize < 8) return 0;
2070
+ op += 6; /* jump Table -- could be optimized by delta / deviation */
2071
+ errorCode = FSE_initCStream(&bitC, op, oend-op);
2072
+ if (FSE_isError(errorCode)) return 0;
2073
+
2074
+ n = srcSize & ~15; // mod 16
2075
+ switch (srcSize & 15)
1345
2076
  {
1346
- *maxSymbolValuePtr = 255;
1347
- return FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) (count, source, sourceSize, maxSymbolValuePtr, 0);
2077
+ case 15: HUF_encodeSymbol(&bitC, ip[n+14], CTable);
2078
+ FSE_FLUSHBITS_1(&bitC);
2079
+ case 14: HUF_encodeSymbol(&bitC, ip[n+13], CTable);
2080
+ FSE_FLUSHBITS_2(&bitC);
2081
+ case 13: HUF_encodeSymbol(&bitC, ip[n+12], CTable);
2082
+ FSE_FLUSHBITS_1(&bitC);
2083
+ case 12: HUF_encodeSymbol(&bitC, ip[n+11], CTable);
2084
+ FSE_FLUSHBITS(&bitC);
2085
+ case 11: HUF_encodeSymbol(&bitC, ip[n+10], CTable);
2086
+ FSE_FLUSHBITS_1(&bitC);
2087
+ case 10: HUF_encodeSymbol(&bitC, ip[n+ 9], CTable);
2088
+ FSE_FLUSHBITS_2(&bitC);
2089
+ case 9 : HUF_encodeSymbol(&bitC, ip[n+ 8], CTable);
2090
+ FSE_FLUSHBITS_1(&bitC);
2091
+ case 8 : HUF_encodeSymbol(&bitC, ip[n+ 7], CTable);
2092
+ FSE_FLUSHBITS(&bitC);
2093
+ case 7 : HUF_encodeSymbol(&bitC, ip[n+ 6], CTable);
2094
+ FSE_FLUSHBITS_1(&bitC);
2095
+ case 6 : HUF_encodeSymbol(&bitC, ip[n+ 5], CTable);
2096
+ FSE_FLUSHBITS_2(&bitC);
2097
+ case 5 : HUF_encodeSymbol(&bitC, ip[n+ 4], CTable);
2098
+ FSE_FLUSHBITS_1(&bitC);
2099
+ case 4 : HUF_encodeSymbol(&bitC, ip[n+ 3], CTable);
2100
+ FSE_FLUSHBITS(&bitC);
2101
+ case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable);
2102
+ FSE_FLUSHBITS_2(&bitC);
2103
+ case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable);
2104
+ FSE_FLUSHBITS_1(&bitC);
2105
+ case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable);
2106
+ FSE_FLUSHBITS(&bitC);
2107
+ case 0 :
2108
+ default: ;
1348
2109
  }
1349
- return FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) (count, source, sourceSize, maxSymbolValuePtr, 1);
1350
- }
1351
2110
 
2111
+ for (; n>0; n-=16)
2112
+ {
2113
+ HUF_encodeSymbol(&bitC, ip[n- 4], CTable);
2114
+ FSE_FLUSHBITS_1(&bitC);
2115
+ HUF_encodeSymbol(&bitC, ip[n- 8], CTable);
2116
+ FSE_FLUSHBITS_2(&bitC);
2117
+ HUF_encodeSymbol(&bitC, ip[n-12], CTable);
2118
+ FSE_FLUSHBITS_1(&bitC);
2119
+ HUF_encodeSymbol(&bitC, ip[n-16], CTable);
2120
+ FSE_FLUSHBITS(&bitC);
2121
+ }
2122
+ streamSize = FSE_closeCStream(&bitC);
2123
+ if (streamSize==0) return 0; /* not enough space within dst buffer == uncompressible */
2124
+ FSE_writeLE16(jumpTable, (U16)streamSize);
2125
+ op += streamSize;
2126
+
2127
+ errorCode = FSE_initCStream(&bitC, op, oend-op);
2128
+ if (FSE_isError(errorCode)) return 0;
2129
+ n = srcSize & ~15; // mod 16
2130
+ for (; n>0; n-=16)
2131
+ {
2132
+ HUF_encodeSymbol(&bitC, ip[n- 3], CTable);
2133
+ FSE_FLUSHBITS_1(&bitC);
2134
+ HUF_encodeSymbol(&bitC, ip[n- 7], CTable);
2135
+ FSE_FLUSHBITS_2(&bitC);
2136
+ HUF_encodeSymbol(&bitC, ip[n-11], CTable);
2137
+ FSE_FLUSHBITS_1(&bitC);
2138
+ HUF_encodeSymbol(&bitC, ip[n-15], CTable);
2139
+ FSE_FLUSHBITS(&bitC);
2140
+ }
2141
+ streamSize = FSE_closeCStream(&bitC);
2142
+ if (streamSize==0) return 0; /* not enough space within dst buffer == uncompressible */
2143
+ FSE_writeLE16(jumpTable+1, (U16)streamSize);
2144
+ op += streamSize;
2145
+
2146
+ errorCode = FSE_initCStream(&bitC, op, oend-op);
2147
+ if (FSE_isError(errorCode)) return 0;
2148
+ n = srcSize & ~15; // mod 16
2149
+ for (; n>0; n-=16)
2150
+ {
2151
+ HUF_encodeSymbol(&bitC, ip[n- 2], CTable);
2152
+ FSE_FLUSHBITS_1(&bitC);
2153
+ HUF_encodeSymbol(&bitC, ip[n- 6], CTable);
2154
+ FSE_FLUSHBITS_2(&bitC);
2155
+ HUF_encodeSymbol(&bitC, ip[n-10], CTable);
2156
+ FSE_FLUSHBITS_1(&bitC);
2157
+ HUF_encodeSymbol(&bitC, ip[n-14], CTable);
2158
+ FSE_FLUSHBITS(&bitC);
2159
+ }
2160
+ streamSize = FSE_closeCStream(&bitC);
2161
+ if (streamSize==0) return 0; /* not enough space within dst buffer == uncompressible */
2162
+ FSE_writeLE16(jumpTable+2, (U16)streamSize);
2163
+ op += streamSize;
2164
+
2165
+ errorCode = FSE_initCStream(&bitC, op, oend-op);
2166
+ if (FSE_isError(errorCode)) return 0;
2167
+ n = srcSize & ~15; // mod 16
2168
+ for (; n>0; n-=16)
2169
+ {
2170
+ HUF_encodeSymbol(&bitC, ip[n- 1], CTable);
2171
+ FSE_FLUSHBITS_1(&bitC);
2172
+ HUF_encodeSymbol(&bitC, ip[n- 5], CTable);
2173
+ FSE_FLUSHBITS_2(&bitC);
2174
+ HUF_encodeSymbol(&bitC, ip[n- 9], CTable);
2175
+ FSE_FLUSHBITS_1(&bitC);
2176
+ HUF_encodeSymbol(&bitC, ip[n-13], CTable);
2177
+ FSE_FLUSHBITS(&bitC);
2178
+ }
2179
+ streamSize = FSE_closeCStream(&bitC);
2180
+ if (streamSize==0) return 0; /* not enough space within dst buffer == uncompressible */
2181
+ op += streamSize;
2182
+
2183
+ return op-ostart;
2184
+ }
1352
2185
 
1353
- static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; }
1354
2186
 
1355
- size_t FSE_FUNCTION_NAME(FSE_buildCTable, FSE_FUNCTION_EXTENSION)
1356
- (void* CTable, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
2187
+ size_t HUF_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog)
1357
2188
  {
1358
- const unsigned tableSize = 1 << tableLog;
1359
- const unsigned tableMask = tableSize - 1;
1360
- U16* tableU16 = ( (U16*) CTable) + 2;
1361
- FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) (((U32*)CTable) + 1 + (tableLog ? tableSize>>1 : 1) );
1362
- const unsigned step = FSE_tableStep(tableSize);
1363
- unsigned cumul[FSE_MAX_SYMBOL_VALUE+2];
1364
- U32 position = 0;
1365
- FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE];
1366
- U32 highThreshold = tableSize-1;
1367
- unsigned symbol;
1368
- unsigned i;
2189
+ BYTE* const ostart = (BYTE*)dst;
2190
+ BYTE* op = ostart;
2191
+ BYTE* const oend = ostart + dstSize;
1369
2192
 
1370
- /* safety checks */
1371
- if (((size_t)CTable) & 3) return (size_t)-FSE_ERROR_GENERIC; /* Must be allocated of 4 bytes boundaries */
2193
+ U32 count[HUF_MAX_SYMBOL_VALUE+1];
2194
+ HUF_CElt CTable[HUF_MAX_SYMBOL_VALUE+1];
2195
+ size_t errorCode;
1372
2196
 
1373
- /* header */
1374
- tableU16[-2] = (U16) tableLog;
1375
- tableU16[-1] = (U16) maxSymbolValue;
2197
+ /* early out */
2198
+ if (srcSize <= 1) return srcSize; /* Uncompressed or RLE */
2199
+ if (!maxSymbolValue) maxSymbolValue = HUF_MAX_SYMBOL_VALUE;
2200
+ if (!huffLog) huffLog = HUF_DEFAULT_TABLELOG;
1376
2201
 
1377
- /* For explanations on how to distribute symbol values over the table :
1378
- * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
2202
+ /* Scan input and build symbol stats */
2203
+ errorCode = FSE_count (count, &maxSymbolValue, (const BYTE*)src, srcSize);
2204
+ if (FSE_isError(errorCode)) return errorCode;
2205
+ if (errorCode == srcSize) return 1;
2206
+ if (errorCode < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */
1379
2207
 
1380
- /* symbol start positions */
1381
- cumul[0] = 0;
1382
- for (i=1; i<=maxSymbolValue+1; i++)
2208
+ /* Build Huffman Tree */
2209
+ errorCode = HUF_buildCTable (CTable, count, maxSymbolValue, huffLog);
2210
+ if (FSE_isError(errorCode)) return errorCode;
2211
+ huffLog = (U32)errorCode;
2212
+
2213
+ /* Write table description header */
2214
+ errorCode = HUF_writeCTable (op, dstSize, CTable, maxSymbolValue, huffLog); /* don't write last symbol, implied */
2215
+ if (FSE_isError(errorCode)) return errorCode;
2216
+ op += errorCode;
2217
+
2218
+ /* Compress */
2219
+ errorCode = HUF_compress_usingCTable(op, oend - op, src, srcSize, CTable);
2220
+ if (FSE_isError(errorCode)) return errorCode;
2221
+ if (errorCode==0) return 0;
2222
+ op += errorCode;
2223
+
2224
+ /* check compressibility */
2225
+ if ((size_t)(op-ostart) >= srcSize-1)
2226
+ return op-ostart;
2227
+
2228
+ return op-ostart;
2229
+ }
2230
+
2231
+ size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
2232
+ {
2233
+ return HUF_compress2(dst, maxDstSize, src, (U32)srcSize, 255, HUF_DEFAULT_TABLELOG);
2234
+ }
2235
+
2236
+
2237
+ /*********************************************************
2238
+ * Huff0 : Huffman block decompression
2239
+ *********************************************************/
2240
+ typedef struct {
2241
+ BYTE byte;
2242
+ BYTE nbBits;
2243
+ } HUF_DElt;
2244
+
2245
+ size_t HUF_readDTable (U16* DTable, const void* src, size_t srcSize)
2246
+ {
2247
+ BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1];
2248
+ U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; /* large enough for values from 0 to 16 */
2249
+ U32 weightTotal;
2250
+ U32 maxBits;
2251
+ const BYTE* ip = (const BYTE*) src;
2252
+ size_t iSize = ip[0];
2253
+ size_t oSize;
2254
+ U32 n;
2255
+ U32 nextRankStart;
2256
+ HUF_DElt* const dt = (HUF_DElt*)(DTable + 1);
2257
+
2258
+ FSE_STATIC_ASSERT(sizeof(HUF_DElt) == sizeof(U16)); /* if compilation fails here, assertion is false */
2259
+ //memset(huffWeight, 0, sizeof(huffWeight)); /* should not be necessary, but some analyzer complain ... */
2260
+ if (iSize >= 128) /* special header */
1383
2261
  {
1384
- if (normalizedCounter[i-1]==-1) /* Low prob symbol */
2262
+ if (iSize >= (242)) /* RLE */
1385
2263
  {
1386
- cumul[i] = cumul[i-1] + 1;
1387
- tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(i-1);
2264
+ static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 };
2265
+ oSize = l[iSize-242];
2266
+ memset(huffWeight, 1, oSize);
2267
+ iSize = 0;
2268
+ }
2269
+ else /* Incompressible */
2270
+ {
2271
+ oSize = iSize - 127;
2272
+ iSize = ((oSize+1)/2);
2273
+ if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
2274
+ ip += 1;
2275
+ for (n=0; n<oSize; n+=2)
2276
+ {
2277
+ huffWeight[n] = ip[n/2] >> 4;
2278
+ huffWeight[n+1] = ip[n/2] & 15;
2279
+ }
1388
2280
  }
1389
- else
1390
- cumul[i] = cumul[i-1] + normalizedCounter[i-1];
1391
2281
  }
1392
- cumul[maxSymbolValue+1] = tableSize+1;
2282
+ else /* header compressed with FSE (normal case) */
2283
+ {
2284
+ if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
2285
+ oSize = FSE_decompress(huffWeight, HUF_MAX_SYMBOL_VALUE, ip+1, iSize); /* max 255 values decoded, last one is implied */
2286
+ if (FSE_isError(oSize)) return oSize;
2287
+ }
1393
2288
 
1394
- /* Spread symbols */
1395
- for (symbol=0; symbol<=maxSymbolValue; symbol++)
2289
+ /* collect weight stats */
2290
+ memset(rankVal, 0, sizeof(rankVal));
2291
+ weightTotal = 0;
2292
+ for (n=0; n<oSize; n++)
1396
2293
  {
1397
- int nbOccurences;
1398
- for (nbOccurences=0; nbOccurences<normalizedCounter[symbol]; nbOccurences++)
1399
- {
1400
- tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
1401
- position = (position + step) & tableMask;
1402
- while (position > highThreshold) position = (position + step) & tableMask; /* Lowprob area */
1403
- }
2294
+ if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return (size_t)-FSE_ERROR_corruptionDetected;
2295
+ rankVal[huffWeight[n]]++;
2296
+ weightTotal += (1 << huffWeight[n]) >> 1;
1404
2297
  }
1405
2298
 
1406
- if (position!=0) return (size_t)-FSE_ERROR_GENERIC; /* Must have gone through all positions */
2299
+ /* get last non-null symbol weight (implied, total must be 2^n) */
2300
+ maxBits = FSE_highbit32(weightTotal) + 1;
2301
+ if (maxBits > DTable[0]) return (size_t)-FSE_ERROR_tableLog_tooLarge; /* DTable is too small */
2302
+ DTable[0] = (U16)maxBits;
2303
+ {
2304
+ U32 total = 1 << maxBits;
2305
+ U32 rest = total - weightTotal;
2306
+ U32 verif = 1 << FSE_highbit32(rest);
2307
+ U32 lastWeight = FSE_highbit32(rest) + 1;
2308
+ if (verif != rest) return (size_t)-FSE_ERROR_corruptionDetected; /* last value must be a clean power of 2 */
2309
+ huffWeight[oSize] = (BYTE)lastWeight;
2310
+ rankVal[lastWeight]++;
2311
+ }
1407
2312
 
1408
- /* Build table */
1409
- for (i=0; i<tableSize; i++)
2313
+ /* check tree construction validity */
2314
+ if ((rankVal[1] < 2) || (rankVal[1] & 1)) return (size_t)-FSE_ERROR_corruptionDetected; /* by construction : at least 2 elts of rank 1, must be even */
2315
+
2316
+ /* Prepare ranks */
2317
+ nextRankStart = 0;
2318
+ for (n=1; n<=maxBits; n++)
1410
2319
  {
1411
- FSE_FUNCTION_TYPE s = tableSymbol[i];
1412
- tableU16[cumul[s]++] = (U16) (tableSize+i); // Table U16 : sorted by symbol order; gives next state value
2320
+ U32 current = nextRankStart;
2321
+ nextRankStart += (rankVal[n] << (n-1));
2322
+ rankVal[n] = current;
1413
2323
  }
1414
2324
 
1415
- // Build Symbol Transformation Table
2325
+ /* fill DTable */
2326
+ for (n=0; n<=oSize; n++)
1416
2327
  {
1417
- unsigned s;
1418
- unsigned total = 0;
1419
- for (s=0; s<=maxSymbolValue; s++)
1420
- {
1421
- switch (normalizedCounter[s])
1422
- {
1423
- case 0:
1424
- break;
1425
- case -1:
1426
- case 1:
1427
- symbolTT[s].minBitsOut = (BYTE)tableLog;
1428
- symbolTT[s].deltaFindState = total - 1;
1429
- total ++;
1430
- symbolTT[s].maxState = (U16)( (tableSize*2) - 1); /* ensures state <= maxState */
1431
- break;
1432
- default :
1433
- symbolTT[s].minBitsOut = (BYTE)( (tableLog-1) - FSE_highbit32 (normalizedCounter[s]-1) );
1434
- symbolTT[s].deltaFindState = total - normalizedCounter[s];
1435
- total += normalizedCounter[s];
1436
- symbolTT[s].maxState = (U16)( (normalizedCounter[s] << (symbolTT[s].minBitsOut+1)) - 1);
1437
- }
1438
- }
2328
+ const U32 w = huffWeight[n];
2329
+ const U32 length = (1 << w) >> 1;
2330
+ U32 i;
2331
+ HUF_DElt D;
2332
+ D.byte = (BYTE)n; D.nbBits = (BYTE)(maxBits + 1 - w);
2333
+ for (i = rankVal[w]; i < rankVal[w] + length; i++)
2334
+ dt[i] = D;
2335
+ rankVal[w] += length;
1439
2336
  }
1440
2337
 
1441
- return 0;
2338
+ return iSize+1;
1442
2339
  }
1443
2340
 
1444
2341
 
1445
- #define FSE_DECODE_TYPE FSE_TYPE_NAME(FSE_decode_t, FSE_FUNCTION_EXTENSION)
1446
-
1447
- void* FSE_FUNCTION_NAME(FSE_createDTable, FSE_FUNCTION_EXTENSION) (unsigned tableLog)
2342
+ static BYTE HUF_decodeSymbol(FSE_DStream_t* Dstream, const HUF_DElt* dt, const U32 dtLog)
1448
2343
  {
1449
- if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
1450
- return malloc( ((size_t)1<<tableLog) * sizeof (FSE_DECODE_TYPE) );
2344
+ const size_t val = FSE_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
2345
+ const BYTE c = dt[val].byte;
2346
+ FSE_skipBits(Dstream, dt[val].nbBits);
2347
+ return c;
1451
2348
  }
1452
2349
 
1453
- void FSE_FUNCTION_NAME(FSE_freeDTable, FSE_FUNCTION_EXTENSION) (void* DTable)
2350
+ static size_t HUF_decompress_usingDTable( /* -3% slower when non static */
2351
+ void* dst, size_t maxDstSize,
2352
+ const void* cSrc, size_t cSrcSize,
2353
+ const U16* DTable)
1454
2354
  {
1455
- free(DTable);
1456
- }
2355
+ BYTE* const ostart = (BYTE*) dst;
2356
+ BYTE* op = ostart;
2357
+ BYTE* const omax = op + maxDstSize;
2358
+ BYTE* const olimit = omax-15;
1457
2359
 
2360
+ const HUF_DElt* const dt = (const HUF_DElt*)(DTable+1);
2361
+ const U32 dtLog = DTable[0];
2362
+ size_t errorCode;
2363
+ U32 reloadStatus;
1458
2364
 
1459
- size_t FSE_FUNCTION_NAME(FSE_buildDTable, FSE_FUNCTION_EXTENSION)
1460
- (void* DTable, const short* const normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
1461
- {
1462
- U32* const base32 = (U32*)DTable;
1463
- FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (base32+1);
1464
- const U32 tableSize = 1 << tableLog;
1465
- const U32 tableMask = tableSize-1;
1466
- const U32 step = FSE_tableStep(tableSize);
1467
- U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
1468
- U32 position = 0;
1469
- U32 highThreshold = tableSize-1;
1470
- const S16 largeLimit= 1 << (tableLog-1);
1471
- U32 noLarge = 1;
1472
- U32 s;
2365
+ /* Init */
1473
2366
 
1474
- /* Sanity Checks */
1475
- if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_maxSymbolValue_tooLarge;
1476
- if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_tableLog_tooLarge;
2367
+ const U16* jumpTable = (const U16*)cSrc;
2368
+ const size_t length1 = FSE_readLE16(jumpTable);
2369
+ const size_t length2 = FSE_readLE16(jumpTable+1);
2370
+ const size_t length3 = FSE_readLE16(jumpTable+2);
2371
+ const size_t length4 = cSrcSize - 6 - length1 - length2 - length3; // check coherency !!
2372
+ const char* const start1 = (const char*)(cSrc) + 6;
2373
+ const char* const start2 = start1 + length1;
2374
+ const char* const start3 = start2 + length2;
2375
+ const char* const start4 = start3 + length3;
2376
+ FSE_DStream_t bitD1, bitD2, bitD3, bitD4;
1477
2377
 
1478
- /* Init, lay down lowprob symbols */
1479
- base32[0] = tableLog;
1480
- for (s=0; s<=maxSymbolValue; s++)
1481
- {
1482
- if (normalizedCounter[s]==-1)
1483
- {
1484
- tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
1485
- symbolNext[s] = 1;
1486
- }
1487
- else
1488
- {
1489
- if (normalizedCounter[s] >= largeLimit) noLarge=0;
1490
- symbolNext[s] = normalizedCounter[s];
1491
- }
1492
- }
2378
+ if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
1493
2379
 
1494
- /* Spread symbols */
1495
- for (s=0; s<=maxSymbolValue; s++)
2380
+ errorCode = FSE_initDStream(&bitD1, start1, length1);
2381
+ if (FSE_isError(errorCode)) return errorCode;
2382
+ errorCode = FSE_initDStream(&bitD2, start2, length2);
2383
+ if (FSE_isError(errorCode)) return errorCode;
2384
+ errorCode = FSE_initDStream(&bitD3, start3, length3);
2385
+ if (FSE_isError(errorCode)) return errorCode;
2386
+ errorCode = FSE_initDStream(&bitD4, start4, length4);
2387
+ if (FSE_isError(errorCode)) return errorCode;
2388
+
2389
+ reloadStatus=FSE_reloadDStream(&bitD2);
2390
+
2391
+ /* 16 symbols per loop */
2392
+ for ( ; (reloadStatus<FSE_DStream_completed) && (op<olimit); /* D2-3-4 are supposed to be synchronized and finish together */
2393
+ op+=16, reloadStatus = FSE_reloadDStream(&bitD2) | FSE_reloadDStream(&bitD3) | FSE_reloadDStream(&bitD4), FSE_reloadDStream(&bitD1))
1496
2394
  {
1497
- int i;
1498
- for (i=0; i<normalizedCounter[s]; i++)
1499
- {
1500
- tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
1501
- position = (position + step) & tableMask;
1502
- while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
1503
- }
2395
+ #define HUF_DECODE_SYMBOL_0(n, Dstream) \
2396
+ op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog);
2397
+
2398
+ #define HUF_DECODE_SYMBOL_1(n, Dstream) \
2399
+ op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
2400
+ if (FSE_32bits() && (HUF_MAX_TABLELOG>12)) FSE_reloadDStream(&Dstream)
2401
+
2402
+ #define HUF_DECODE_SYMBOL_2(n, Dstream) \
2403
+ op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
2404
+ if (FSE_32bits()) FSE_reloadDStream(&Dstream)
2405
+
2406
+ HUF_DECODE_SYMBOL_1( 0, bitD1);
2407
+ HUF_DECODE_SYMBOL_1( 1, bitD2);
2408
+ HUF_DECODE_SYMBOL_1( 2, bitD3);
2409
+ HUF_DECODE_SYMBOL_1( 3, bitD4);
2410
+ HUF_DECODE_SYMBOL_2( 4, bitD1);
2411
+ HUF_DECODE_SYMBOL_2( 5, bitD2);
2412
+ HUF_DECODE_SYMBOL_2( 6, bitD3);
2413
+ HUF_DECODE_SYMBOL_2( 7, bitD4);
2414
+ HUF_DECODE_SYMBOL_1( 8, bitD1);
2415
+ HUF_DECODE_SYMBOL_1( 9, bitD2);
2416
+ HUF_DECODE_SYMBOL_1(10, bitD3);
2417
+ HUF_DECODE_SYMBOL_1(11, bitD4);
2418
+ HUF_DECODE_SYMBOL_0(12, bitD1);
2419
+ HUF_DECODE_SYMBOL_0(13, bitD2);
2420
+ HUF_DECODE_SYMBOL_0(14, bitD3);
2421
+ HUF_DECODE_SYMBOL_0(15, bitD4);
1504
2422
  }
1505
2423
 
1506
- if (position!=0) return (size_t)-FSE_ERROR_GENERIC; /* position must reach all cells once, otherwise normalizedCounter is incorrect */
2424
+ if (reloadStatus!=FSE_DStream_completed) /* not complete : some bitStream might be FSE_DStream_unfinished */
2425
+ return (size_t)-FSE_ERROR_corruptionDetected;
1507
2426
 
1508
- /* Build Decoding table */
2427
+ /* tail */
1509
2428
  {
1510
- U32 i;
1511
- for (i=0; i<tableSize; i++)
2429
+ // bitTail = bitD1; // *much* slower : -20% !??!
2430
+ FSE_DStream_t bitTail;
2431
+ bitTail.ptr = bitD1.ptr;
2432
+ bitTail.bitsConsumed = bitD1.bitsConsumed;
2433
+ bitTail.bitContainer = bitD1.bitContainer; // required in case of FSE_DStream_endOfBuffer
2434
+ bitTail.start = start1;
2435
+ for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op<omax) ; op++)
1512
2436
  {
1513
- FSE_FUNCTION_TYPE symbol = tableDecode[i].symbol;
1514
- U16 nextState = symbolNext[symbol]++;
1515
- tableDecode[i].nbBits = (BYTE) (tableLog - FSE_highbit32 ((U32)nextState) );
1516
- tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize);
2437
+ HUF_DECODE_SYMBOL_0(0, bitTail);
1517
2438
  }
2439
+
2440
+ if (FSE_endOfDStream(&bitTail))
2441
+ return op-ostart;
1518
2442
  }
1519
2443
 
1520
- return noLarge;
2444
+ if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */
2445
+
2446
+ return (size_t)-FSE_ERROR_corruptionDetected;
2447
+ }
2448
+
2449
+
2450
+ size_t HUF_decompress (void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
2451
+ {
2452
+ HUF_CREATE_STATIC_DTABLE(DTable, HUF_MAX_TABLELOG);
2453
+ const BYTE* ip = (const BYTE*) cSrc;
2454
+ size_t errorCode;
2455
+
2456
+ errorCode = HUF_readDTable (DTable, cSrc, cSrcSize);
2457
+ if (FSE_isError(errorCode)) return errorCode;
2458
+ if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
2459
+ ip += errorCode;
2460
+ cSrcSize -= errorCode;
2461
+
2462
+ return HUF_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, DTable);
1521
2463
  }
2464
+
2465
+
2466
+ #endif /* FSE_COMMONDEFS_ONLY */