extzstd 0.0.1.CONCEPT → 0.0.2.CONCEPT

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 714080450979b0ec369f0afc948a7018882da0bd
4
- data.tar.gz: a7b33f5732cab823a73f9b31f2e0a711e1976caa
3
+ metadata.gz: e5563734d016a497235ac43fed1b14a41d32e1c3
4
+ data.tar.gz: 63389454aa8f607e5f823b27314be3ae7efe45ed
5
5
  SHA512:
6
- metadata.gz: cbcf17649efbb2a49ecd26e2059ea74f2e9bb34eda0b6fff8694f522fd2487bbceedc6cc76fd7e2c674cab7cfa7a173d7c26c54b849cd010f9e98162bee5f434
7
- data.tar.gz: e9eba393e111181cae14f787672abd2fe24fdf316cf03158a0e0c696287501334437ef89f0634eca71a5e589e17dc32acd6bc5cb8b2341c233e029dc04fe923c
6
+ metadata.gz: a7dd05b9080beec1707db4e76948154309052fb85c615a211bcb4c82147fc84a666aadeddf670dbb1932a406ef88448e133ed33967cd06b27af2f1536c6ab042
7
+ data.tar.gz: 53e088c35e916c5bb21e10fe849e57a20e1e933b9f97cc5ad43be6c0e8531fe3984682bbeb2fe7b542fadccffb07ac9e2b1a8a978696fd43ba80a6155d7bbc6c
data/README.md CHANGED
@@ -1,26 +1,38 @@
1
1
  # encoding:utf-8 ;
2
2
 
3
- # extzstd - ruby binding for Zstandard (zstd)
3
+ # extzstd - ruby bindings for Zstd (Zstandard)
4
4
 
5
- This is ruby binding for compression library
5
+ This is ruby bindings for compression library
6
6
  [Zstd (https://github.com/Cyan4973/zstd)](https://github.com/Cyan4973/zstd).
7
7
 
8
- * PACKAGE NAME: extzstd
9
- * AUTHOR: dearblue <dearblue@users.sourceforge.jp>
10
- * VERSION: 0.0.1.CONCEPT
11
- * LICENSING: 2-clause BSD License
12
- * REPORT ISSUE TO: <http://sourceforge.jp/projects/rutsubo/ticket/>
13
- * DEPENDENCY RUBY: ruby-2.0+
14
- * DEPENDENCY RUBY GEMS: (none)
15
- * DEPENDENCY LIBRARY: (none)
16
- * BUNDLED EXTERNAL LIBRARIES:
17
- * zstd <https://github.com/Cyan4973/zstd>
18
- (commit-e739b273f95902b7616e11338a4ef04bebc9d07b (Mon Feb 9 01:53:12 2015 +0100))
8
+ * package name: extzstd
9
+ * author: dearblue (mailto:dearblue@users.osdn.me)
10
+ * version: 0.0.2.CONCEPT
11
+ * software quality: EXPERIMENTAL
12
+ * license: 2-clause BSD License
13
+ * report issue to: https://osdn.jp/projects/rutsubo/ticket/
14
+ * dependency ruby: ruby-2.0+
15
+ * dependency ruby gems: (none)
16
+ * dependency library: (none)
17
+ * bundled external libraries:
18
+ * zstd-0.1.2 (https://github.com/Cyan4973/zstd/tree/zstd-0.1.2)
19
+
20
+
21
+ ## ***WARNING***
22
+
23
+ Zstd data format compatibility is not guaranteed in future versions
24
+ (There is a possibility that it becomes impossible to future use).
25
+
26
+ Written in [zstd/README.md](https://github.com/Cyan4973/zstd/blob/zstd-0.1.2/README.md):
27
+
28
+ > Zstd has not yet reached "stable" status. Specifically, it doesn't
29
+ > guarantee yet that its current compressed format will remain stable
30
+ > and supported in future versions.
19
31
 
20
32
 
21
33
  ## HOW TO USE
22
34
 
23
- ### Simply process
35
+ ### basic usage (one pass encode/decode)
24
36
 
25
37
  ``` ruby:ruby
26
38
  # First, load library
@@ -34,7 +46,7 @@ encdata = Zstd.encode(source)
34
46
  puts "encdata.bytesize=#{encdata.bytesize}"
35
47
 
36
48
  # Directly decompression
37
- maxdestsize = source.bytesize # MUST BE ORIGINAL SIZE OR MORE! If given a smaller size, crash ruby interpreter.
49
+ maxdestsize = source.bytesize
38
50
  decdata = Zstd.decode(encdata, maxdestsize)
39
51
  puts "decdata.bytesize=#{decdata.bytesize}"
40
52
 
data/Rakefile CHANGED
@@ -2,13 +2,15 @@
2
2
  require "rake/clean"
3
3
 
4
4
  DOC = FileList["{README,LICENSE,CHANGELOG,Changelog,HISTORY}{,.ja}{,.txt,.rd,.rdoc,.md,.markdown}"] +
5
- FileList["ext/**/{README,LICENSE,CHANGELOG,Changelog,HISTORY}{,.ja}{,.txt,.rd,.rdoc,.md,.markdown}"]
5
+ FileList["{contrib,ext}/**/{README,LICENSE,CHANGELOG,Changelog,HISTORY}{,.ja}{,.txt,.rd,.rdoc,.md,.markdown}"] +
6
+ FileList["ext/**/*.{c,C,cc,cxx,cpp,h,H,hh}"]
6
7
  #EXT = FileList["ext/**/*.{h,hh,c,cc,cpp,cxx}"] +
7
8
  # FileList["ext/externals/**/*"]
8
9
  EXT = FileList["ext/**/*"]
9
10
  BIN = FileList["bin/*"]
10
11
  LIB = FileList["lib/**/*.rb"]
11
12
  SPEC = FileList["spec/**/*"]
13
+ TEST = FileList["test/**/*"]
12
14
  EXAMPLE = FileList["examples/**/*"]
13
15
  GEMSTUB_SRC = "gemstub.rb"
14
16
  RAKEFILE = [File.basename(__FILE__), GEMSTUB_SRC]
@@ -22,12 +24,16 @@ GEMSTUB.extensions += EXTCONF
22
24
  GEMSTUB.executables += FileList["bin/*"].map { |n| File.basename n }
23
25
  GEMSTUB.executables.sort!
24
26
 
25
- GEMFILE = "#{GEMSTUB.name}-#{GEMSTUB.version}.gem"
27
+ PACKAGENAME = "#{GEMSTUB.name}-#{GEMSTUB.version}"
28
+ GEMFILE = "#{PACKAGENAME}.gem"
26
29
  GEMSPEC = "#{GEMSTUB.name}.gemspec"
27
30
 
28
- GEMSTUB.files += DOC + EXT + EXTCONF + BIN + LIB + SPEC + EXAMPLE + RAKEFILE + EXTRA
31
+ GEMSTUB.files += DOC + EXT + EXTCONF + BIN + LIB + SPEC + TEST + EXAMPLE + RAKEFILE + EXTRA
29
32
  GEMSTUB.files.sort!
30
- GEMSTUB.rdoc_options ||= %w(--charset UTF-8)
33
+ if GEMSTUB.rdoc_options.nil? || GEMSTUB.rdoc_options.empty?
34
+ readme = %W(.md .markdown .rd .rdoc .txt #{""}).map { |ext| "README#{ext}" }.find { |m| DOC.find { |n| n == m } }
35
+ GEMSTUB.rdoc_options = %w(--charset UTF-8) + (readme ? %W(-m #{readme}) : [])
36
+ end
31
37
  GEMSTUB.extra_rdoc_files += DOC + LIB + EXT.reject { |n| n.include?("/externals/") || !%w(.h .hh .c .cc .cpp .cxx).include?(File.extname(n)) }
32
38
  GEMSTUB.extra_rdoc_files.sort!
33
39
 
@@ -82,7 +88,7 @@ unless EXTCONF.empty?
82
88
  desc "generate binary gemspec"
83
89
  task "native-gemspec" => GEMSPEC_NATIVE
84
90
 
85
- file GEMFILE_NATIVE => DOC + EXT + EXTCONF + BIN + LIB + SPEC + EXAMPLE + SOFILES + RAKEFILE + [GEMSPEC_NATIVE] do
91
+ file GEMFILE_NATIVE => DOC + EXT + EXTCONF + BIN + LIB + SPEC + TEST + EXAMPLE + SOFILES + RAKEFILE + [GEMSPEC_NATIVE] do
86
92
  sh "gem build #{GEMSPEC_NATIVE}"
87
93
  end
88
94
 
@@ -123,8 +129,8 @@ end
123
129
  task :all => GEMFILE
124
130
 
125
131
  desc "generate local rdoc"
126
- task :rdoc => DOC + EXT + LIB do
127
- sh *(%w(rdoc) + GEMSTUB.rdoc_options + DOC + EXT + LIB)
132
+ task :rdoc => DOC + LIB do
133
+ sh *(%w(rdoc) + GEMSTUB.rdoc_options + DOC + LIB)
128
134
  end
129
135
 
130
136
  desc "launch rspec"
@@ -138,7 +144,12 @@ task gem: GEMFILE
138
144
  desc "generate gemspec"
139
145
  task gemspec: GEMSPEC
140
146
 
141
- file GEMFILE => DOC + EXT + EXTCONF + BIN + LIB + SPEC + EXAMPLE + RAKEFILE + [GEMSPEC] do
147
+ desc "print package name"
148
+ task "package-name" do
149
+ puts PACKAGENAME
150
+ end
151
+
152
+ file GEMFILE => DOC + EXT + EXTCONF + BIN + LIB + SPEC + TEST + EXAMPLE + RAKEFILE + [GEMSPEC] do
142
153
  sh "gem build #{GEMSPEC}"
143
154
  end
144
155
 
@@ -32,16 +32,17 @@
32
32
  # ################################################################
33
33
 
34
34
  # Version numbers
35
- VERSION?= 0
35
+ VERSION?= 0.1.2
36
36
  LIBVER_MAJOR=`sed -n '/define ZSTD_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < zstd.h`
37
37
  LIBVER_MINOR=`sed -n '/define ZSTD_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < zstd.h`
38
38
  LIBVER_PATCH=`sed -n '/define ZSTD_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < zstd.h`
39
39
  LIBVER = $(LIBVER_MAJOR).$(LIBVER_MINOR).$(LIBVER_PATCH)
40
40
 
41
41
  DESTDIR?=
42
- PREFIX ?= /usr
42
+ PREFIX ?= /usr/local
43
43
  CFLAGS ?= -O3
44
44
  CFLAGS += -I. -std=c99 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Wstrict-prototypes
45
+ FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(MOREFLAGS)
45
46
 
46
47
  LIBDIR ?= $(PREFIX)/lib
47
48
  INCLUDEDIR=$(PREFIX)/include
@@ -67,10 +68,10 @@ all: libzstd
67
68
 
68
69
  libzstd: zstd.c
69
70
  @echo compiling static library
70
- @$(CC) $(CPPFLAGS) $(CFLAGS) -c $^
71
+ @$(CC) $(FLAGS) -c $^
71
72
  @$(AR) rcs libzstd.a zstd.o
72
73
  @echo compiling dynamic library $(LIBVER)
73
- @$(CC) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) -shared $^ -fPIC $(SONAME_FLAGS) -o $@.$(SHARED_EXT_VER)
74
+ @$(CC) $(FLAGS) -shared $^ -fPIC $(SONAME_FLAGS) -o $@.$(SHARED_EXT_VER)
74
75
  @echo creating versioned links
75
76
  @ln -sf $@.$(SHARED_EXT_VER) $@.$(SHARED_EXT_MAJOR)
76
77
  @ln -sf $@.$(SHARED_EXT_VER) $@.$(SHARED_EXT)
@@ -52,11 +52,22 @@
52
52
 
53
53
 
54
54
  /****************************************************************
55
- * Generic function type & suffix (C template emulation)
55
+ * template functions type & suffix
56
56
  ****************************************************************/
57
57
  #define FSE_FUNCTION_TYPE BYTE
58
58
  #define FSE_FUNCTION_EXTENSION
59
59
 
60
+
61
+ /****************************************************************
62
+ * Byte symbol type
63
+ ****************************************************************/
64
+ typedef struct
65
+ {
66
+ unsigned short newState;
67
+ unsigned char symbol;
68
+ unsigned char nbBits;
69
+ } FSE_decode_t; /* size == U32 */
70
+
60
71
  #endif /* !FSE_COMMONDEFS_ONLY */
61
72
 
62
73
 
@@ -87,6 +98,8 @@
87
98
  #include "fse_static.h"
88
99
 
89
100
 
101
+ #ifndef MEM_ACCESS_MODULE
102
+ #define MEM_ACCESS_MODULE
90
103
  /****************************************************************
91
104
  * Basic Types
92
105
  *****************************************************************/
@@ -109,21 +122,126 @@ typedef unsigned long long U64;
109
122
  typedef signed long long S64;
110
123
  #endif
111
124
 
125
+ #endif /* MEM_ACCESS_MODULE */
112
126
 
113
127
  /****************************************************************
114
128
  * Memory I/O
115
129
  *****************************************************************/
130
+ /* FSE_FORCE_MEMORY_ACCESS
131
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
132
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
133
+ * The below switch allow to select different access method for improved performance.
134
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
135
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
136
+ * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
137
+ * Method 2 : direct access. This method is portable but violate C standard.
138
+ * It can generate buggy code on targets generating assembly depending on alignment.
139
+ * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
140
+ * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
141
+ * Prefer these methods in priority order (0 > 1 > 2)
142
+ */
143
+ #ifndef FSE_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
144
+ # if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
145
+ # define FSE_FORCE_MEMORY_ACCESS 2
146
+ # elif defined(__INTEL_COMPILER) || \
147
+ (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
148
+ # define FSE_FORCE_MEMORY_ACCESS 1
149
+ # endif
150
+ #endif
151
+
152
+
153
+ static unsigned FSE_32bits(void)
154
+ {
155
+ return sizeof(void*)==4;
156
+ }
157
+
116
158
  static unsigned FSE_isLittleEndian(void)
117
159
  {
118
160
  const union { U32 i; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
119
161
  return one.c[0];
120
162
  }
121
163
 
164
+ #if defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==2)
165
+
166
+ static U16 FSE_read16(const void* memPtr) { return *(const U16*) memPtr; }
167
+ static U32 FSE_read32(const void* memPtr) { return *(const U32*) memPtr; }
168
+ static U64 FSE_read64(const void* memPtr) { return *(const U64*) memPtr; }
169
+
170
+ static void FSE_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
171
+ static void FSE_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
172
+ static void FSE_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; }
173
+
174
+ #elif defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==1)
175
+
176
+ /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
177
+ /* currently only defined for gcc and icc */
178
+ typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign;
179
+
180
+ static U16 FSE_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
181
+ static U32 FSE_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
182
+ static U64 FSE_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
183
+
184
+ static void FSE_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
185
+ static void FSE_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; }
186
+ static void FSE_write64(void* memPtr, U64 value) { ((unalign*)memPtr)->u64 = value; }
187
+
188
+ #else
189
+
190
+ static U16 FSE_read16(const void* memPtr)
191
+ {
192
+ U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
193
+ }
194
+
122
195
  static U32 FSE_read32(const void* memPtr)
123
196
  {
124
- U32 val32;
125
- memcpy(&val32, memPtr, 4);
126
- return val32;
197
+ U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
198
+ }
199
+
200
+ static U64 FSE_read64(const void* memPtr)
201
+ {
202
+ U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
203
+ }
204
+
205
+ static void FSE_write16(void* memPtr, U16 value)
206
+ {
207
+ memcpy(memPtr, &value, sizeof(value));
208
+ }
209
+
210
+ static void FSE_write32(void* memPtr, U32 value)
211
+ {
212
+ memcpy(memPtr, &value, sizeof(value));
213
+ }
214
+
215
+ static void FSE_write64(void* memPtr, U64 value)
216
+ {
217
+ memcpy(memPtr, &value, sizeof(value));
218
+ }
219
+
220
+ #endif // FSE_FORCE_MEMORY_ACCESS
221
+
222
+ static U16 FSE_readLE16(const void* memPtr)
223
+ {
224
+ if (FSE_isLittleEndian())
225
+ return FSE_read16(memPtr);
226
+ else
227
+ {
228
+ const BYTE* p = (const BYTE*)memPtr;
229
+ return (U16)(p[0] + (p[1]<<8));
230
+ }
231
+ }
232
+
233
+ static void FSE_writeLE16(void* memPtr, U16 val)
234
+ {
235
+ if (FSE_isLittleEndian())
236
+ {
237
+ FSE_write16(memPtr, val);
238
+ }
239
+ else
240
+ {
241
+ BYTE* p = (BYTE*)memPtr;
242
+ p[0] = (BYTE)val;
243
+ p[1] = (BYTE)(val>>8);
244
+ }
127
245
  }
128
246
 
129
247
  static U32 FSE_readLE32(const void* memPtr)
@@ -141,7 +259,7 @@ static void FSE_writeLE32(void* memPtr, U32 val32)
141
259
  {
142
260
  if (FSE_isLittleEndian())
143
261
  {
144
- memcpy(memPtr, &val32, 4);
262
+ FSE_write32(memPtr, val32);
145
263
  }
146
264
  else
147
265
  {
@@ -153,13 +271,6 @@ static void FSE_writeLE32(void* memPtr, U32 val32)
153
271
  }
154
272
  }
155
273
 
156
- static U64 FSE_read64(const void* memPtr)
157
- {
158
- U64 val64;
159
- memcpy(&val64, memPtr, 8);
160
- return val64;
161
- }
162
-
163
274
  static U64 FSE_readLE64(const void* memPtr)
164
275
  {
165
276
  if (FSE_isLittleEndian())
@@ -176,7 +287,7 @@ static void FSE_writeLE64(void* memPtr, U64 val64)
176
287
  {
177
288
  if (FSE_isLittleEndian())
178
289
  {
179
- memcpy(memPtr, &val64, 8);
290
+ FSE_write64(memPtr, val64);
180
291
  }
181
292
  else
182
293
  {
@@ -194,7 +305,7 @@ static void FSE_writeLE64(void* memPtr, U64 val64)
194
305
 
195
306
  static size_t FSE_readLEST(const void* memPtr)
196
307
  {
197
- if (sizeof(size_t)==4)
308
+ if (FSE_32bits())
198
309
  return (size_t)FSE_readLE32(memPtr);
199
310
  else
200
311
  return (size_t)FSE_readLE64(memPtr);
@@ -202,7 +313,7 @@ static size_t FSE_readLEST(const void* memPtr)
202
313
 
203
314
  static void FSE_writeLEST(void* memPtr, size_t val)
204
315
  {
205
- if (sizeof(size_t)==4)
316
+ if (FSE_32bits())
206
317
  FSE_writeLE32(memPtr, (U32)val);
207
318
  else
208
319
  FSE_writeLE64(memPtr, (U64)val);
@@ -235,17 +346,12 @@ static void FSE_writeLEST(void* memPtr, size_t val)
235
346
  ****************************************************************/
236
347
  typedef struct
237
348
  {
238
- int deltaFindState;
239
- U16 maxState;
240
- BYTE minBitsOut;
241
- /* one byte padding */
242
- } FSE_symbolCompressionTransform;
243
-
244
- typedef struct
245
- {
246
- U32 fakeTable[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)]; /* compatible with FSE_compressU16() */
247
- } CTable_max_t;
349
+ int deltaFindState;
350
+ U32 deltaNbBits;
351
+ } FSE_symbolCompressionTransform; /* total 8 bytes */
248
352
 
353
+ typedef U32 CTable_max_t[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
354
+ typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
249
355
 
250
356
  /****************************************************************
251
357
  * Internal functions
@@ -273,157 +379,458 @@ FORCE_INLINE unsigned FSE_highbit32 (register U32 val)
273
379
  }
274
380
 
275
381
 
276
- #ifndef FSE_COMMONDEFS_ONLY
277
-
278
- unsigned FSE_isError(size_t code) { return (code > (size_t)(-FSE_ERROR_maxCode)); }
279
-
280
- #define FSE_GENERATE_STRING(STRING) #STRING,
281
- static const char* FSE_errorStrings[] = { FSE_LIST_ERRORS(FSE_GENERATE_STRING) };
282
-
283
- const char* FSE_getErrorName(size_t code)
284
- {
285
- static const char* codeError = "Unspecified error code";
286
- if (FSE_isError(code)) return FSE_errorStrings[-(int)(code)];
287
- return codeError;
288
- }
382
+ /****************************************************************
383
+ * Templates
384
+ ****************************************************************/
385
+ /*
386
+ designed to be included
387
+ for type-specific functions (template emulation in C)
388
+ Objective is to write these functions only once, for improved maintenance
389
+ */
289
390
 
290
- static short FSE_abs(short a)
291
- {
292
- return a<0? -a : a;
293
- }
391
+ /* safety checks */
392
+ #ifndef FSE_FUNCTION_EXTENSION
393
+ # error "FSE_FUNCTION_EXTENSION must be defined"
394
+ #endif
395
+ #ifndef FSE_FUNCTION_TYPE
396
+ # error "FSE_FUNCTION_TYPE must be defined"
397
+ #endif
294
398
 
399
+ /* Function names */
400
+ #define FSE_CAT(X,Y) X##Y
401
+ #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
402
+ #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
295
403
 
296
- /****************************************************************
297
- * Header bitstream management
298
- ****************************************************************/
299
- size_t FSE_headerBound(unsigned maxSymbolValue, unsigned tableLog)
300
- {
301
- size_t maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 1;
302
- return maxSymbolValue ? maxHeaderSize : FSE_MAX_HEADERSIZE;
303
- }
304
404
 
305
- static size_t FSE_writeHeader_generic (void* header, size_t headerBufferSize,
306
- const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
307
- unsigned safeWrite)
405
+ /* Function templates */
406
+ size_t FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION)
407
+ (unsigned* count, unsigned* maxSymbolValuePtr, const FSE_FUNCTION_TYPE* source, size_t sourceSize, unsigned safe)
308
408
  {
309
- BYTE* const ostart = (BYTE*) header;
310
- BYTE* out = ostart;
311
- BYTE* const oend = ostart + headerBufferSize;
312
- int nbBits;
313
- const int tableSize = 1 << tableLog;
314
- int remaining;
315
- int threshold;
316
- U32 bitStream;
317
- int bitCount;
318
- unsigned charnum = 0;
319
- int previous0 = 0;
409
+ const FSE_FUNCTION_TYPE* ip = source;
410
+ const FSE_FUNCTION_TYPE* const iend = ip+sourceSize;
411
+ unsigned maxSymbolValue = *maxSymbolValuePtr;
412
+ unsigned max=0;
413
+ int s;
320
414
 
321
- bitStream = 0;
322
- bitCount = 0;
323
- /* Table Size */
324
- bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount;
325
- bitCount += 4;
415
+ U32 Counting1[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
416
+ U32 Counting2[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
417
+ U32 Counting3[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
418
+ U32 Counting4[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
326
419
 
327
- /* Init */
328
- remaining = tableSize+1; /* +1 for extra accuracy */
329
- threshold = tableSize;
330
- nbBits = tableLog+1;
420
+ /* safety checks */
421
+ if (!sourceSize)
422
+ {
423
+ memset(count, 0, (maxSymbolValue + 1) * sizeof(FSE_FUNCTION_TYPE));
424
+ *maxSymbolValuePtr = 0;
425
+ return 0;
426
+ }
427
+ if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_GENERIC; /* maxSymbolValue too large : unsupported */
428
+ if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE; /* 0 == default */
331
429
 
332
- while (remaining>1) /* stops at 1 */
430
+ if ((safe) || (sizeof(FSE_FUNCTION_TYPE)>1))
333
431
  {
334
- if (previous0)
335
- {
336
- unsigned start = charnum;
337
- while (!normalizedCounter[charnum]) charnum++;
338
- while (charnum >= start+24)
339
- {
340
- start+=24;
341
- bitStream += 0xFFFF<<bitCount;
342
- if ((!safeWrite) && (out > oend-2)) return (size_t)-FSE_ERROR_GENERIC; /* Buffer overflow */
343
- out[0] = (BYTE)bitStream;
344
- out[1] = (BYTE)(bitStream>>8);
345
- out+=2;
346
- bitStream>>=16;
347
- }
348
- while (charnum >= start+3)
349
- {
350
- start+=3;
351
- bitStream += 3 << bitCount;
352
- bitCount += 2;
353
- }
354
- bitStream += (charnum-start) << bitCount;
355
- bitCount += 2;
356
- if (bitCount>16)
357
- {
358
- if ((!safeWrite) && (out > oend - 2)) return (size_t)-FSE_ERROR_GENERIC; /* Buffer overflow */
359
- out[0] = (BYTE)bitStream;
360
- out[1] = (BYTE)(bitStream>>8);
361
- out += 2;
362
- bitStream >>= 16;
363
- bitCount -= 16;
364
- }
365
- }
432
+ /* check input values, to avoid count table overflow */
433
+ while (ip < iend-3)
366
434
  {
367
- short count = normalizedCounter[charnum++];
368
- const short max = (short)((2*threshold-1)-remaining);
369
- remaining -= FSE_abs(count);
370
- if (remaining<0) return (size_t)-FSE_ERROR_GENERIC;
371
- count++; /* +1 for extra accuracy */
372
- if (count>=threshold) count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
373
- bitStream += count << bitCount;
374
- bitCount += nbBits;
375
- bitCount -= (count<max);
376
- previous0 = (count==1);
377
- while (remaining<threshold) nbBits--, threshold>>=1;
435
+ if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting1[*ip++]++;
436
+ if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting2[*ip++]++;
437
+ if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting3[*ip++]++;
438
+ if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting4[*ip++]++;
378
439
  }
379
- if (bitCount>16)
440
+ }
441
+ else
442
+ {
443
+ U32 cached = FSE_read32(ip); ip += 4;
444
+ while (ip < iend-15)
380
445
  {
381
- if ((!safeWrite) && (out > oend - 2)) return (size_t)-FSE_ERROR_GENERIC; /* Buffer overflow */
382
- out[0] = (BYTE)bitStream;
383
- out[1] = (BYTE)(bitStream>>8);
384
- out += 2;
385
- bitStream >>= 16;
386
- bitCount -= 16;
446
+ U32 c = cached; cached = FSE_read32(ip); ip += 4;
447
+ Counting1[(BYTE) c ]++;
448
+ Counting2[(BYTE)(c>>8) ]++;
449
+ Counting3[(BYTE)(c>>16)]++;
450
+ Counting4[ c>>24 ]++;
451
+ c = cached; cached = FSE_read32(ip); ip += 4;
452
+ Counting1[(BYTE) c ]++;
453
+ Counting2[(BYTE)(c>>8) ]++;
454
+ Counting3[(BYTE)(c>>16)]++;
455
+ Counting4[ c>>24 ]++;
456
+ c = cached; cached = FSE_read32(ip); ip += 4;
457
+ Counting1[(BYTE) c ]++;
458
+ Counting2[(BYTE)(c>>8) ]++;
459
+ Counting3[(BYTE)(c>>16)]++;
460
+ Counting4[ c>>24 ]++;
461
+ c = cached; cached = FSE_read32(ip); ip += 4;
462
+ Counting1[(BYTE) c ]++;
463
+ Counting2[(BYTE)(c>>8) ]++;
464
+ Counting3[(BYTE)(c>>16)]++;
465
+ Counting4[ c>>24 ]++;
387
466
  }
467
+ ip-=4;
388
468
  }
389
469
 
390
- /* flush remaining bitStream */
391
- if ((!safeWrite) && (out > oend - 2)) return (size_t)-FSE_ERROR_GENERIC; /* Buffer overflow */
392
- out[0] = (BYTE)bitStream;
393
- out[1] = (BYTE)(bitStream>>8);
394
- out+= (bitCount+7) /8;
470
+ /* finish last symbols */
471
+ while (ip<iend) { if ((safe) && (*ip>maxSymbolValue)) return (size_t)-FSE_ERROR_GENERIC; Counting1[*ip++]++; }
395
472
 
396
- if (charnum > maxSymbolValue + 1) return (size_t)-FSE_ERROR_GENERIC; /* Too many symbols written (a bit too late?) */
473
+ for (s=0; s<=(int)maxSymbolValue; s++)
474
+ {
475
+ count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
476
+ if (count[s] > max) max = count[s];
477
+ }
397
478
 
398
- return (out-ostart);
479
+ while (!count[maxSymbolValue]) maxSymbolValue--;
480
+ *maxSymbolValuePtr = maxSymbolValue;
481
+ return (size_t)max;
399
482
  }
400
483
 
401
-
402
- size_t FSE_writeHeader (void* header, size_t headerBufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
484
+ /* hidden fast variant (unsafe) */
485
+ size_t FSE_FUNCTION_NAME(FSE_countFast, FSE_FUNCTION_EXTENSION)
486
+ (unsigned* count, unsigned* maxSymbolValuePtr, const FSE_FUNCTION_TYPE* source, size_t sourceSize)
403
487
  {
404
- if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_GENERIC; /* Unsupported */
405
- if (tableLog < FSE_MIN_TABLELOG) return (size_t)-FSE_ERROR_GENERIC; /* Unsupported */
406
-
407
- if (headerBufferSize < FSE_headerBound(maxSymbolValue, tableLog))
408
- return FSE_writeHeader_generic(header, headerBufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
488
+ return FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) (count, maxSymbolValuePtr, source, sourceSize, 0);
489
+ }
409
490
 
410
- return FSE_writeHeader_generic(header, headerBufferSize, normalizedCounter, maxSymbolValue, tableLog, 1);
491
+ size_t FSE_FUNCTION_NAME(FSE_count, FSE_FUNCTION_EXTENSION)
492
+ (unsigned* count, unsigned* maxSymbolValuePtr, const FSE_FUNCTION_TYPE* source, size_t sourceSize)
493
+ {
494
+ if ((sizeof(FSE_FUNCTION_TYPE)==1) && (*maxSymbolValuePtr >= 255))
495
+ {
496
+ *maxSymbolValuePtr = 255;
497
+ return FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) (count, maxSymbolValuePtr, source, sourceSize, 0);
498
+ }
499
+ return FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) (count, maxSymbolValuePtr, source, sourceSize, 1);
411
500
  }
412
501
 
413
502
 
414
- size_t FSE_readHeader (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
415
- const void* headerBuffer, size_t hbSize)
503
+ static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; }
504
+
505
+ size_t FSE_FUNCTION_NAME(FSE_buildCTable, FSE_FUNCTION_EXTENSION)
506
+ (FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
416
507
  {
417
- const BYTE* const istart = (const BYTE*) headerBuffer;
418
- const BYTE* ip = istart;
419
- int nbBits;
420
- int remaining;
508
+ const unsigned tableSize = 1 << tableLog;
509
+ const unsigned tableMask = tableSize - 1;
510
+ U16* tableU16 = ( (U16*) ct) + 2;
511
+ FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) (((U32*)ct) + 1 + (tableLog ? tableSize>>1 : 1) );
512
+ const unsigned step = FSE_tableStep(tableSize);
513
+ unsigned cumul[FSE_MAX_SYMBOL_VALUE+2];
514
+ U32 position = 0;
515
+ FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* init not necessary, but analyzer complain about it */
516
+ U32 highThreshold = tableSize-1;
517
+ unsigned symbol;
518
+ unsigned i;
519
+
520
+ /* header */
521
+ tableU16[-2] = (U16) tableLog;
522
+ tableU16[-1] = (U16) maxSymbolValue;
523
+
524
+ /* For explanations on how to distribute symbol values over the table :
525
+ * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
526
+
527
+ /* symbol start positions */
528
+ cumul[0] = 0;
529
+ for (i=1; i<=maxSymbolValue+1; i++)
530
+ {
531
+ if (normalizedCounter[i-1]==-1) /* Low prob symbol */
532
+ {
533
+ cumul[i] = cumul[i-1] + 1;
534
+ tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(i-1);
535
+ }
536
+ else
537
+ cumul[i] = cumul[i-1] + normalizedCounter[i-1];
538
+ }
539
+ cumul[maxSymbolValue+1] = tableSize+1;
540
+
541
+ /* Spread symbols */
542
+ for (symbol=0; symbol<=maxSymbolValue; symbol++)
543
+ {
544
+ int nbOccurences;
545
+ for (nbOccurences=0; nbOccurences<normalizedCounter[symbol]; nbOccurences++)
546
+ {
547
+ tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
548
+ position = (position + step) & tableMask;
549
+ while (position > highThreshold) position = (position + step) & tableMask; /* Lowprob area */
550
+ }
551
+ }
552
+
553
+ if (position!=0) return (size_t)-FSE_ERROR_GENERIC; /* Must have gone through all positions */
554
+
555
+ /* Build table */
556
+ for (i=0; i<tableSize; i++)
557
+ {
558
+ FSE_FUNCTION_TYPE s = tableSymbol[i]; /* static analyzer doesn't understand tableSymbol is properly initialized */
559
+ tableU16[cumul[s]++] = (U16) (tableSize+i); /* TableU16 : sorted by symbol order; gives next state value */
560
+ }
561
+
562
+ /* Build Symbol Transformation Table */
563
+ {
564
+ unsigned s;
565
+ unsigned total = 0;
566
+ for (s=0; s<=maxSymbolValue; s++)
567
+ {
568
+ switch (normalizedCounter[s])
569
+ {
570
+ case 0:
571
+ break;
572
+ case -1:
573
+ case 1:
574
+ symbolTT[s].deltaNbBits = tableLog << 16;
575
+ symbolTT[s].deltaFindState = total - 1;
576
+ total ++;
577
+ break;
578
+ default :
579
+ {
580
+ U32 maxBitsOut = tableLog - FSE_highbit32 (normalizedCounter[s]-1);
581
+ U32 minStatePlus = normalizedCounter[s] << maxBitsOut;
582
+ symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
583
+ symbolTT[s].deltaFindState = total - normalizedCounter[s];
584
+ total += normalizedCounter[s];
585
+ }
586
+ }
587
+ }
588
+ }
589
+
590
+ return 0;
591
+ }
592
+
593
+
594
+ #define FSE_DECODE_TYPE FSE_TYPE_NAME(FSE_decode_t, FSE_FUNCTION_EXTENSION)
595
+
596
+ FSE_DTable* FSE_FUNCTION_NAME(FSE_createDTable, FSE_FUNCTION_EXTENSION) (unsigned tableLog)
597
+ {
598
+ if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
599
+ return (FSE_DTable*)malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
600
+ }
601
+
602
+ void FSE_FUNCTION_NAME(FSE_freeDTable, FSE_FUNCTION_EXTENSION) (FSE_DTable* dt)
603
+ {
604
+ free(dt);
605
+ }
606
+
607
+ typedef struct {
608
+ U16 tableLog;
609
+ U16 fastMode;
610
+ } FSE_DTableHeader; /* sizeof U32 */
611
+
612
+ size_t FSE_FUNCTION_NAME(FSE_buildDTable, FSE_FUNCTION_EXTENSION)
613
+ (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
614
+ {
615
+ FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)dt;
616
+ FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (dt+1); /* because dt is unsigned, 32-bits aligned on 32-bits */
617
+ const U32 tableSize = 1 << tableLog;
618
+ const U32 tableMask = tableSize-1;
619
+ const U32 step = FSE_tableStep(tableSize);
620
+ U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
621
+ U32 position = 0;
622
+ U32 highThreshold = tableSize-1;
623
+ const S16 largeLimit= (S16)(1 << (tableLog-1));
624
+ U32 noLarge = 1;
625
+ U32 s;
626
+
627
+ /* Sanity Checks */
628
+ if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_maxSymbolValue_tooLarge;
629
+ if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_tableLog_tooLarge;
630
+
631
+ /* Init, lay down lowprob symbols */
632
+ DTableH[0].tableLog = (U16)tableLog;
633
+ for (s=0; s<=maxSymbolValue; s++)
634
+ {
635
+ if (normalizedCounter[s]==-1)
636
+ {
637
+ tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
638
+ symbolNext[s] = 1;
639
+ }
640
+ else
641
+ {
642
+ if (normalizedCounter[s] >= largeLimit) noLarge=0;
643
+ symbolNext[s] = normalizedCounter[s];
644
+ }
645
+ }
646
+
647
+ /* Spread symbols */
648
+ for (s=0; s<=maxSymbolValue; s++)
649
+ {
650
+ int i;
651
+ for (i=0; i<normalizedCounter[s]; i++)
652
+ {
653
+ tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
654
+ position = (position + step) & tableMask;
655
+ while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
656
+ }
657
+ }
658
+
659
+ if (position!=0) return (size_t)-FSE_ERROR_GENERIC; /* position must reach all cells once, otherwise normalizedCounter is incorrect */
660
+
661
+ /* Build Decoding table */
662
+ {
663
+ U32 i;
664
+ for (i=0; i<tableSize; i++)
665
+ {
666
+ FSE_FUNCTION_TYPE symbol = (FSE_FUNCTION_TYPE)(tableDecode[i].symbol);
667
+ U16 nextState = symbolNext[symbol]++;
668
+ tableDecode[i].nbBits = (BYTE) (tableLog - FSE_highbit32 ((U32)nextState) );
669
+ tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize);
670
+ }
671
+ }
672
+
673
+ DTableH->fastMode = (U16)noLarge;
674
+ return 0;
675
+ }
676
+
677
+
678
+ /******************************************
679
+ * FSE byte symbol
680
+ ******************************************/
681
+ #ifndef FSE_COMMONDEFS_ONLY
682
+
683
+ unsigned FSE_isError(size_t code) { return (code > (size_t)(-FSE_ERROR_maxCode)); }
684
+
685
+ #define FSE_GENERATE_STRING(STRING) #STRING,
686
+ static const char* FSE_errorStrings[] = { FSE_LIST_ERRORS(FSE_GENERATE_STRING) };
687
+
688
+ const char* FSE_getErrorName(size_t code)
689
+ {
690
+ static const char* codeError = "Unspecified error code";
691
+ if (FSE_isError(code)) return FSE_errorStrings[-(int)(code)];
692
+ return codeError;
693
+ }
694
+
695
+ static short FSE_abs(short a)
696
+ {
697
+ return a<0? -a : a;
698
+ }
699
+
700
+
701
+ /****************************************************************
702
+ * Header bitstream management
703
+ ****************************************************************/
704
+ size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
705
+ {
706
+ size_t maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3;
707
+ return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
708
+ }
709
+
710
+ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
711
+ const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
712
+ unsigned writeIsSafe)
713
+ {
714
+ BYTE* const ostart = (BYTE*) header;
715
+ BYTE* out = ostart;
716
+ BYTE* const oend = ostart + headerBufferSize;
717
+ int nbBits;
718
+ const int tableSize = 1 << tableLog;
719
+ int remaining;
421
720
  int threshold;
422
721
  U32 bitStream;
423
722
  int bitCount;
424
723
  unsigned charnum = 0;
425
724
  int previous0 = 0;
426
725
 
726
+ bitStream = 0;
727
+ bitCount = 0;
728
+ /* Table Size */
729
+ bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount;
730
+ bitCount += 4;
731
+
732
+ /* Init */
733
+ remaining = tableSize+1; /* +1 for extra accuracy */
734
+ threshold = tableSize;
735
+ nbBits = tableLog+1;
736
+
737
+ while (remaining>1) /* stops at 1 */
738
+ {
739
+ if (previous0)
740
+ {
741
+ unsigned start = charnum;
742
+ while (!normalizedCounter[charnum]) charnum++;
743
+ while (charnum >= start+24)
744
+ {
745
+ start+=24;
746
+ bitStream += 0xFFFFU << bitCount;
747
+ if ((!writeIsSafe) && (out > oend-2)) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* Buffer overflow */
748
+ out[0] = (BYTE) bitStream;
749
+ out[1] = (BYTE)(bitStream>>8);
750
+ out+=2;
751
+ bitStream>>=16;
752
+ }
753
+ while (charnum >= start+3)
754
+ {
755
+ start+=3;
756
+ bitStream += 3 << bitCount;
757
+ bitCount += 2;
758
+ }
759
+ bitStream += (charnum-start) << bitCount;
760
+ bitCount += 2;
761
+ if (bitCount>16)
762
+ {
763
+ if ((!writeIsSafe) && (out > oend - 2)) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* Buffer overflow */
764
+ out[0] = (BYTE)bitStream;
765
+ out[1] = (BYTE)(bitStream>>8);
766
+ out += 2;
767
+ bitStream >>= 16;
768
+ bitCount -= 16;
769
+ }
770
+ }
771
+ {
772
+ short count = normalizedCounter[charnum++];
773
+ const short max = (short)((2*threshold-1)-remaining);
774
+ remaining -= FSE_abs(count);
775
+ if (remaining<1) return (size_t)-FSE_ERROR_GENERIC;
776
+ count++; /* +1 for extra accuracy */
777
+ if (count>=threshold) count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
778
+ bitStream += count << bitCount;
779
+ bitCount += nbBits;
780
+ bitCount -= (count<max);
781
+ previous0 = (count==1);
782
+ while (remaining<threshold) nbBits--, threshold>>=1;
783
+ }
784
+ if (bitCount>16)
785
+ {
786
+ if ((!writeIsSafe) && (out > oend - 2)) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* Buffer overflow */
787
+ out[0] = (BYTE)bitStream;
788
+ out[1] = (BYTE)(bitStream>>8);
789
+ out += 2;
790
+ bitStream >>= 16;
791
+ bitCount -= 16;
792
+ }
793
+ }
794
+
795
+ /* flush remaining bitStream */
796
+ if ((!writeIsSafe) && (out > oend - 2)) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* Buffer overflow */
797
+ out[0] = (BYTE)bitStream;
798
+ out[1] = (BYTE)(bitStream>>8);
799
+ out+= (bitCount+7) /8;
800
+
801
+ if (charnum > maxSymbolValue + 1) return (size_t)-FSE_ERROR_GENERIC;
802
+
803
+ return (out-ostart);
804
+ }
805
+
806
+
807
+ size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
808
+ {
809
+ if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_GENERIC; /* Unsupported */
810
+ if (tableLog < FSE_MIN_TABLELOG) return (size_t)-FSE_ERROR_GENERIC; /* Unsupported */
811
+
812
+ if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
813
+ return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
814
+
815
+ return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1);
816
+ }
817
+
818
+
819
+ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
820
+ const void* headerBuffer, size_t hbSize)
821
+ {
822
+ const BYTE* const istart = (const BYTE*) headerBuffer;
823
+ const BYTE* const iend = istart + hbSize;
824
+ const BYTE* ip = istart;
825
+ int nbBits;
826
+ int remaining;
827
+ int threshold;
828
+ U32 bitStream;
829
+ int bitCount;
830
+ unsigned charnum = 0;
831
+ int previous0 = 0;
832
+
833
+ if (hbSize < 4) return (size_t)-FSE_ERROR_srcSize_wrong;
427
834
  bitStream = FSE_readLE32(ip);
428
835
  nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */
429
836
  if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return (size_t)-FSE_ERROR_tableLog_tooLarge;
@@ -442,8 +849,16 @@ size_t FSE_readHeader (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
442
849
  while ((bitStream & 0xFFFF) == 0xFFFF)
443
850
  {
444
851
  n0+=24;
445
- ip+=2;
446
- bitStream = FSE_readLE32(ip) >> bitCount;
852
+ if (ip < iend-5)
853
+ {
854
+ ip+=2;
855
+ bitStream = FSE_readLE32(ip) >> bitCount;
856
+ }
857
+ else
858
+ {
859
+ bitStream >>= 16;
860
+ bitCount+=16;
861
+ }
447
862
  }
448
863
  while ((bitStream & 3) == 3)
449
864
  {
@@ -453,11 +868,16 @@ size_t FSE_readHeader (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
453
868
  }
454
869
  n0 += bitStream & 3;
455
870
  bitCount += 2;
456
- if (n0 > *maxSVPtr) return (size_t)-FSE_ERROR_GENERIC;
871
+ if (n0 > *maxSVPtr) return (size_t)-FSE_ERROR_maxSymbolValue_tooSmall;
457
872
  while (charnum < n0) normalizedCounter[charnum++] = 0;
458
- ip += bitCount>>3;
459
- bitCount &= 7;
460
- bitStream = FSE_readLE32(ip) >> bitCount;
873
+ if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
874
+ {
875
+ ip += bitCount>>3;
876
+ bitCount &= 7;
877
+ bitStream = FSE_readLE32(ip) >> bitCount;
878
+ }
879
+ else
880
+ bitStream >>= 2;
461
881
  }
462
882
  {
463
883
  const short max = (short)((2*threshold-1)-remaining);
@@ -485,16 +905,26 @@ size_t FSE_readHeader (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
485
905
  threshold >>= 1;
486
906
  }
487
907
 
488
- ip += bitCount>>3;
489
- bitCount &= 7;
490
- bitStream = FSE_readLE32(ip) >> bitCount;
908
+ {
909
+ if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
910
+ {
911
+ ip += bitCount>>3;
912
+ bitCount &= 7;
913
+ }
914
+ else
915
+ {
916
+ bitCount -= (int)(8 * (iend - 4 - ip));
917
+ ip = iend - 4;
918
+ }
919
+ bitStream = FSE_readLE32(ip) >> (bitCount & 31);
920
+ }
491
921
  }
492
922
  }
493
923
  if (remaining != 1) return (size_t)-FSE_ERROR_GENERIC;
494
924
  *maxSVPtr = charnum-1;
495
925
 
496
- ip += bitCount>0;
497
- if ((size_t)(ip-istart) >= hbSize) return (size_t)-FSE_ERROR_srcSize_wrong; /* arguably a bit late , tbd */
926
+ ip += (bitCount+7)>>3;
927
+ if ((size_t)(ip-istart) > hbSize) return (size_t)-FSE_ERROR_srcSize_wrong;
498
928
  return ip-istart;
499
929
  }
500
930
 
@@ -503,7 +933,7 @@ size_t FSE_readHeader (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
503
933
  * FSE Compression Code
504
934
  ****************************************************************/
505
935
  /*
506
- CTable is a variable size structure which contains :
936
+ FSE_CTable[0] is a variable size structure which contains :
507
937
  U16 tableLog;
508
938
  U16 maxSymbolValue;
509
939
  U16 nextStateNumber[1 << tableLog]; // This size is variable
@@ -520,82 +950,129 @@ size_t FSE_sizeof_CTable (unsigned maxSymbolValue, unsigned tableLog)
520
950
  return size;
521
951
  }
522
952
 
523
- void* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
953
+ FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
524
954
  {
525
955
  size_t size;
526
956
  if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
527
957
  size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
528
- return malloc(size);
958
+ return (FSE_CTable*)malloc(size);
529
959
  }
530
960
 
531
- void FSE_freeCTable (void* CTable)
961
+ void FSE_freeCTable (FSE_CTable* ct)
532
962
  {
533
- free(CTable);
963
+ free(ct);
534
964
  }
535
965
 
536
966
 
967
+ /* provides the minimum logSize to safely represent a distribution */
968
+ static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
969
+ {
970
+ U32 minBitsSrc = FSE_highbit32((U32)(srcSize - 1)) + 1;
971
+ U32 minBitsSymbols = FSE_highbit32(maxSymbolValue) + 2;
972
+ U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
973
+ return minBits;
974
+ }
975
+
537
976
  unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
538
977
  {
978
+ U32 maxBitsSrc = FSE_highbit32((U32)(srcSize - 1)) - 2;
539
979
  U32 tableLog = maxTableLog;
980
+ U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
540
981
  if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
541
- if ((FSE_highbit32((U32)(srcSize - 1)) - 2) < tableLog) tableLog = FSE_highbit32((U32)(srcSize - 1)) - 2; /* Accuracy can be reduced */
542
- if ((FSE_highbit32(maxSymbolValue+1)+1) > tableLog) tableLog = FSE_highbit32(maxSymbolValue+1)+1; /* Need a minimum to safely represent all symbol values */
982
+ if (maxBitsSrc < tableLog) tableLog = maxBitsSrc; /* Accuracy can be reduced */
983
+ if (minBits > tableLog) tableLog = minBits; /* Need a minimum to safely represent all symbol values */
543
984
  if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG;
544
985
  if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG;
545
986
  return tableLog;
546
987
  }
547
988
 
548
989
 
549
- typedef struct
550
- {
551
- U32 id;
552
- U32 count;
553
- } rank_t;
554
-
555
- int FSE_compareRankT(const void* r1, const void* r2)
556
- {
557
- const rank_t* R1 = (const rank_t*)r1;
558
- const rank_t* R2 = (const rank_t*)r2;
559
-
560
- return 2 * (R1->count < R2->count) - 1;
561
- }
990
+ /* Secondary normalization method.
991
+ To be used when primary method fails. */
562
992
 
563
- static size_t FSE_adjustNormSlow(short* norm, int pointsToRemove, const unsigned* count, U32 maxSymbolValue)
993
+ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue)
564
994
  {
565
- rank_t rank[FSE_MAX_SYMBOL_VALUE+2];
566
995
  U32 s;
996
+ U32 distributed = 0;
997
+ U32 ToDistribute;
567
998
 
568
999
  /* Init */
1000
+ U32 lowThreshold = (U32)(total >> tableLog);
1001
+ U32 lowOne = (U32)((total * 3) >> (tableLog + 1));
1002
+
569
1003
  for (s=0; s<=maxSymbolValue; s++)
570
1004
  {
571
- rank[s].id = s;
572
- rank[s].count = count[s];
573
- if (norm[s] <= 1) rank[s].count = 0;
1005
+ if (count[s] == 0)
1006
+ {
1007
+ norm[s]=0;
1008
+ continue;
1009
+ }
1010
+ if (count[s] <= lowThreshold)
1011
+ {
1012
+ norm[s] = -1;
1013
+ distributed++;
1014
+ total -= count[s];
1015
+ continue;
1016
+ }
1017
+ if (count[s] <= lowOne)
1018
+ {
1019
+ norm[s] = 1;
1020
+ distributed++;
1021
+ total -= count[s];
1022
+ continue;
1023
+ }
1024
+ norm[s]=-2;
574
1025
  }
575
- rank[maxSymbolValue+1].id = 0;
576
- rank[maxSymbolValue+1].count = 0; /* ensures comparison ends here in worst case */
1026
+ ToDistribute = (1 << tableLog) - distributed;
577
1027
 
578
- /* Sort according to count */
579
- qsort(rank, maxSymbolValue+1, sizeof(rank_t), FSE_compareRankT);
1028
+ if ((total / ToDistribute) > lowOne)
1029
+ {
1030
+ /* risk of rounding to zero */
1031
+ lowOne = (U32)((total * 3) / (ToDistribute * 2));
1032
+ for (s=0; s<=maxSymbolValue; s++)
1033
+ {
1034
+ if ((norm[s] == -2) && (count[s] <= lowOne))
1035
+ {
1036
+ norm[s] = 1;
1037
+ distributed++;
1038
+ total -= count[s];
1039
+ continue;
1040
+ }
1041
+ }
1042
+ ToDistribute = (1 << tableLog) - distributed;
1043
+ }
1044
+
1045
+ if (distributed == maxSymbolValue+1)
1046
+ {
1047
+ /* all values are pretty poor;
1048
+ probably incompressible data (should have already been detected);
1049
+ find max, then give all remaining points to max */
1050
+ U32 maxV = 0, maxC =0;
1051
+ for (s=0; s<=maxSymbolValue; s++)
1052
+ if (count[s] > maxC) maxV=s, maxC=count[s];
1053
+ norm[maxV] += (short)ToDistribute;
1054
+ return 0;
1055
+ }
580
1056
 
581
- while(pointsToRemove)
582
1057
  {
583
- int newRank = 1;
584
- rank_t savedR;
585
- if (norm[rank[0].id] == 1)
586
- return (size_t)-FSE_ERROR_GENERIC;
587
- norm[rank[0].id]--;
588
- pointsToRemove--;
589
- rank[0].count -= (rank[0].count + 6) >> 3;
590
- if (norm[rank[0].id] == 1)
591
- rank[0].count=0;
592
- savedR = rank[0];
593
- while (rank[newRank].count > savedR.count)
1058
+ U64 const vStepLog = 62 - tableLog;
1059
+ U64 const mid = (1ULL << (vStepLog-1)) - 1;
1060
+ U64 const rStep = ((((U64)1<<vStepLog) * ToDistribute) + mid) / total; /* scale on remaining */
1061
+ U64 tmpTotal = mid;
1062
+ for (s=0; s<=maxSymbolValue; s++)
594
1063
  {
595
- rank[newRank-1] = rank[newRank];
596
- newRank++;
1064
+ if (norm[s]==-2)
1065
+ {
1066
+ U64 end = tmpTotal + (count[s] * rStep);
1067
+ U32 sStart = (U32)(tmpTotal >> vStepLog);
1068
+ U32 sEnd = (U32)(end >> vStepLog);
1069
+ U32 weight = sEnd - sStart;
1070
+ if (weight < 1)
1071
+ return (size_t)-FSE_ERROR_GENERIC;
1072
+ norm[s] = (short)weight;
1073
+ tmpTotal = end;
1074
+ }
597
1075
  }
598
- rank[newRank-1] = savedR;
599
1076
  }
600
1077
 
601
1078
  return 0;
@@ -610,7 +1087,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
610
1087
  if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
611
1088
  if (tableLog < FSE_MIN_TABLELOG) return (size_t)-FSE_ERROR_GENERIC; /* Unsupported size */
612
1089
  if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_GENERIC; /* Unsupported size */
613
- if ((1U<<tableLog) <= maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; /* Too small tableLog, compression potentially impossible */
1090
+ if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return (size_t)-FSE_ERROR_GENERIC; /* Too small tableLog, compression potentially impossible */
614
1091
 
615
1092
  {
616
1093
  U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
@@ -655,10 +1132,9 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
655
1132
  }
656
1133
  if (-stillToDistribute >= (normalizedCounter[largest] >> 1))
657
1134
  {
658
- /* corner case, need to converge towards normalization with caution */
659
- size_t errorCode = FSE_adjustNormSlow(normalizedCounter, -stillToDistribute, count, maxSymbolValue);
1135
+ /* corner case, need another normalization method */
1136
+ size_t errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue);
660
1137
  if (FSE_isError(errorCode)) return errorCode;
661
- //FSE_adjustNormSlow(normalizedCounter, -stillToDistribute, count, maxSymbolValue);
662
1138
  }
663
1139
  else normalizedCounter[largest] += (short)stillToDistribute;
664
1140
  }
@@ -681,19 +1157,18 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
681
1157
  }
682
1158
 
683
1159
 
684
- /* fake CTable, for raw (uncompressed) input */
685
- size_t FSE_buildCTable_raw (void* CTable, unsigned nbBits)
1160
+ /* fake FSE_CTable, for raw (uncompressed) input */
1161
+ size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits)
686
1162
  {
687
1163
  const unsigned tableSize = 1 << nbBits;
688
1164
  const unsigned tableMask = tableSize - 1;
689
1165
  const unsigned maxSymbolValue = tableMask;
690
- U16* tableU16 = ( (U16*) CTable) + 2;
691
- FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) ((((U32*)CTable)+1) + (tableSize>>1));
1166
+ U16* tableU16 = ( (U16*) ct) + 2;
1167
+ FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) ((((U32*)ct)+1) + (tableSize>>1));
692
1168
  unsigned s;
693
1169
 
694
1170
  /* Sanity checks */
695
1171
  if (nbBits < 1) return (size_t)-FSE_ERROR_GENERIC; /* min size */
696
- if (((size_t)CTable) & 3) return (size_t)-FSE_ERROR_GENERIC; /* Must be allocated of 4 bytes boundaries */
697
1172
 
698
1173
  /* header */
699
1174
  tableU16[-2] = (U16) nbBits;
@@ -706,24 +1181,19 @@ size_t FSE_buildCTable_raw (void* CTable, unsigned nbBits)
706
1181
  /* Build Symbol Transformation Table */
707
1182
  for (s=0; s<=maxSymbolValue; s++)
708
1183
  {
709
- symbolTT[s].minBitsOut = (BYTE)nbBits;
1184
+ symbolTT[s].deltaNbBits = nbBits << 16;
710
1185
  symbolTT[s].deltaFindState = s-1;
711
- symbolTT[s].maxState = (U16)( (tableSize*2) - 1); /* ensures state <= maxState */
712
1186
  }
713
1187
 
714
1188
  return 0;
715
1189
  }
716
1190
 
717
1191
 
718
- /* fake CTable, for rle (100% always same symbol) input */
719
- size_t FSE_buildCTable_rle (void* CTable, BYTE symbolValue)
1192
+ /* fake FSE_CTable, for rle (100% always same symbol) input */
1193
+ size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)
720
1194
  {
721
- const unsigned tableSize = 1;
722
- U16* tableU16 = ( (U16*) CTable) + 2;
723
- FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) ((U32*)CTable + 2);
724
-
725
- /* safety checks */
726
- if (((size_t)CTable) & 3) return (size_t)-FSE_ERROR_GENERIC; /* Must be 4 bytes aligned */
1195
+ U16* tableU16 = ( (U16*) ct) + 2;
1196
+ FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) ((U32*)ct + 2);
727
1197
 
728
1198
  /* header */
729
1199
  tableU16[-2] = (U16) 0;
@@ -735,32 +1205,40 @@ size_t FSE_buildCTable_rle (void* CTable, BYTE symbolValue)
735
1205
 
736
1206
  /* Build Symbol Transformation Table */
737
1207
  {
738
- symbolTT[symbolValue].minBitsOut = 0;
1208
+ symbolTT[symbolValue].deltaNbBits = 0;
739
1209
  symbolTT[symbolValue].deltaFindState = 0;
740
- symbolTT[symbolValue].maxState = (U16)(2*tableSize-1); /* ensures state <= maxState */
741
1210
  }
742
1211
 
743
1212
  return 0;
744
1213
  }
745
1214
 
746
1215
 
747
- void FSE_initCStream(FSE_CStream_t* bitC, void* start)
1216
+ size_t FSE_initCStream(FSE_CStream_t* bitC, void* start, size_t maxSize)
748
1217
  {
1218
+ if (maxSize < sizeof(bitC->ptr)) return (size_t)-FSE_ERROR_dstSize_tooSmall;
749
1219
  bitC->bitContainer = 0;
750
- bitC->bitPos = 0; /* reserved for unusedBits */
1220
+ bitC->bitPos = 0;
751
1221
  bitC->startPtr = (char*)start;
752
1222
  bitC->ptr = bitC->startPtr;
1223
+ bitC->endPtr = bitC->startPtr + maxSize - sizeof(bitC->ptr);
1224
+ return 0;
753
1225
  }
754
1226
 
755
- void FSE_initCState(FSE_CState_t* statePtr, const void* CTable)
1227
+ void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct)
756
1228
  {
757
- const U32 tableLog = ( (U16*) CTable) [0];
1229
+ const U32 tableLog = ( (const U16*) ct) [0];
758
1230
  statePtr->value = (ptrdiff_t)1<<tableLog;
759
- statePtr->stateTable = ((const U16*) CTable) + 2;
760
- statePtr->symbolTT = (const U32*)CTable + 1 + (tableLog ? (1<<(tableLog-1)) : 1);
1231
+ statePtr->stateTable = ((const U16*) ct) + 2;
1232
+ statePtr->symbolTT = (const FSE_symbolCompressionTransform*)((const U32*)ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1));
761
1233
  statePtr->stateLog = tableLog;
762
1234
  }
763
1235
 
1236
+ void FSE_addBitsFast(FSE_CStream_t* bitC, size_t value, unsigned nbBits) /* only use if upper bits are clean 0 */
1237
+ {
1238
+ bitC->bitContainer |= value << bitC->bitPos;
1239
+ bitC->bitPos += nbBits;
1240
+ }
1241
+
764
1242
  void FSE_addBits(FSE_CStream_t* bitC, size_t value, unsigned nbBits)
765
1243
  {
766
1244
  static const unsigned mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, 0xFFFFFF, 0x1FFFFFF }; /* up to 25 bits */
@@ -768,22 +1246,31 @@ void FSE_addBits(FSE_CStream_t* bitC, size_t value, unsigned nbBits)
768
1246
  bitC->bitPos += nbBits;
769
1247
  }
770
1248
 
771
- void FSE_encodeByte(FSE_CStream_t* bitC, FSE_CState_t* statePtr, BYTE symbol)
1249
+ void FSE_encodeSymbol(FSE_CStream_t* bitC, FSE_CState_t* statePtr, U32 symbol)
772
1250
  {
773
- const FSE_symbolCompressionTransform* const symbolTT = (const FSE_symbolCompressionTransform*) statePtr->symbolTT;
774
- const U16* const stateTable = (const U16*) statePtr->stateTable;
775
- int nbBitsOut = symbolTT[symbol].minBitsOut;
776
- nbBitsOut -= (int)((symbolTT[symbol].maxState - statePtr->value) >> 31);
1251
+ const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
1252
+ const U16* const stateTable = (const U16*)(statePtr->stateTable);
1253
+ U32 nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16);
777
1254
  FSE_addBits(bitC, statePtr->value, nbBitsOut);
778
- statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT[symbol].deltaFindState];
1255
+ statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
779
1256
  }
780
1257
 
781
- void FSE_flushBits(FSE_CStream_t* bitC)
1258
+ void FSE_flushBitsFast(FSE_CStream_t* bitC) /* only if dst buffer is large enough ( >= FSE_compressBound()) */
782
1259
  {
783
1260
  size_t nbBytes = bitC->bitPos >> 3;
784
1261
  FSE_writeLEST(bitC->ptr, bitC->bitContainer);
1262
+ bitC->ptr += nbBytes;
785
1263
  bitC->bitPos &= 7;
1264
+ bitC->bitContainer >>= nbBytes*8;
1265
+ }
1266
+
1267
+ void FSE_flushBits(FSE_CStream_t* bitC)
1268
+ {
1269
+ size_t nbBytes = bitC->bitPos >> 3;
1270
+ FSE_writeLEST(bitC->ptr, bitC->bitContainer);
786
1271
  bitC->ptr += nbBytes;
1272
+ if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
1273
+ bitC->bitPos &= 7;
787
1274
  bitC->bitContainer >>= nbBytes*8;
788
1275
  }
789
1276
 
@@ -798,9 +1285,12 @@ size_t FSE_closeCStream(FSE_CStream_t* bitC)
798
1285
  {
799
1286
  char* endPtr;
800
1287
 
801
- FSE_addBits(bitC, 1, 1);
1288
+ FSE_addBitsFast(bitC, 1, 1);
802
1289
  FSE_flushBits(bitC);
803
1290
 
1291
+ if (bitC->ptr >= bitC->endPtr) /* too close to buffer's end */
1292
+ return 0; /* not compressible */
1293
+
804
1294
  endPtr = bitC->ptr;
805
1295
  endPtr += bitC->bitPos > 0;
806
1296
 
@@ -808,58 +1298,61 @@ size_t FSE_closeCStream(FSE_CStream_t* bitC)
808
1298
  }
809
1299
 
810
1300
 
811
- size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
1301
+ static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize,
812
1302
  const void* src, size_t srcSize,
813
- const void* CTable)
1303
+ const FSE_CTable* ct, const unsigned fast)
814
1304
  {
815
1305
  const BYTE* const istart = (const BYTE*) src;
816
1306
  const BYTE* ip;
817
1307
  const BYTE* const iend = istart + srcSize;
818
1308
 
1309
+ size_t errorCode;
819
1310
  FSE_CStream_t bitC;
820
1311
  FSE_CState_t CState1, CState2;
821
1312
 
822
1313
 
823
1314
  /* init */
824
- (void)dstSize; /* objective : ensure it fits into dstBuffer (Todo) */
825
- FSE_initCStream(&bitC, dst);
826
- FSE_initCState(&CState1, CTable);
1315
+ errorCode = FSE_initCStream(&bitC, dst, dstSize);
1316
+ if (FSE_isError(errorCode)) return 0;
1317
+ FSE_initCState(&CState1, ct);
827
1318
  CState2 = CState1;
828
1319
 
829
1320
  ip=iend;
830
1321
 
1322
+ #define FSE_FLUSHBITS(s) (fast ? FSE_flushBitsFast(s) : FSE_flushBits(s))
1323
+
831
1324
  /* join to even */
832
1325
  if (srcSize & 1)
833
1326
  {
834
- FSE_encodeByte(&bitC, &CState1, *--ip);
835
- FSE_flushBits(&bitC);
1327
+ FSE_encodeSymbol(&bitC, &CState1, *--ip);
1328
+ FSE_FLUSHBITS(&bitC);
836
1329
  }
837
1330
 
838
1331
  /* join to mod 4 */
839
- if ((sizeof(size_t)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) /* test bit 2 */
1332
+ if ((sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) /* test bit 2 */
840
1333
  {
841
- FSE_encodeByte(&bitC, &CState2, *--ip);
842
- FSE_encodeByte(&bitC, &CState1, *--ip);
843
- FSE_flushBits(&bitC);
1334
+ FSE_encodeSymbol(&bitC, &CState2, *--ip);
1335
+ FSE_encodeSymbol(&bitC, &CState1, *--ip);
1336
+ FSE_FLUSHBITS(&bitC);
844
1337
  }
845
1338
 
846
1339
  /* 2 or 4 encoding per loop */
847
- while (ip>istart)
1340
+ for ( ; ip>istart ; )
848
1341
  {
849
- FSE_encodeByte(&bitC, &CState2, *--ip);
1342
+ FSE_encodeSymbol(&bitC, &CState2, *--ip);
850
1343
 
851
- if (sizeof(size_t)*8 < FSE_MAX_TABLELOG*2+7 ) /* this test must be static */
852
- FSE_flushBits(&bitC);
1344
+ if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 ) /* this test must be static */
1345
+ FSE_FLUSHBITS(&bitC);
853
1346
 
854
- FSE_encodeByte(&bitC, &CState1, *--ip);
1347
+ FSE_encodeSymbol(&bitC, &CState1, *--ip);
855
1348
 
856
- if (sizeof(size_t)*8 > FSE_MAX_TABLELOG*4+7 ) /* this test must be static */
1349
+ if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) /* this test must be static */
857
1350
  {
858
- FSE_encodeByte(&bitC, &CState2, *--ip);
859
- FSE_encodeByte(&bitC, &CState1, *--ip);
1351
+ FSE_encodeSymbol(&bitC, &CState2, *--ip);
1352
+ FSE_encodeSymbol(&bitC, &CState1, *--ip);
860
1353
  }
861
1354
 
862
- FSE_flushBits(&bitC);
1355
+ FSE_FLUSHBITS(&bitC);
863
1356
  }
864
1357
 
865
1358
  FSE_flushCState(&bitC, &CState2);
@@ -867,15 +1360,20 @@ size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
867
1360
  return FSE_closeCStream(&bitC);
868
1361
  }
869
1362
 
870
-
871
- static size_t FSE_compressRLE (BYTE *out, BYTE symbol)
1363
+ size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
1364
+ const void* src, size_t srcSize,
1365
+ const FSE_CTable* ct)
872
1366
  {
873
- *out=symbol;
874
- return 1;
1367
+ const unsigned fast = (dstSize >= FSE_BLOCKBOUND(srcSize));
1368
+
1369
+ if (fast)
1370
+ return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1);
1371
+ else
1372
+ return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0);
875
1373
  }
876
1374
 
877
- size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
878
1375
 
1376
+ size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
879
1377
 
880
1378
  size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
881
1379
  {
@@ -888,34 +1386,36 @@ size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize
888
1386
 
889
1387
  U32 count[FSE_MAX_SYMBOL_VALUE+1];
890
1388
  S16 norm[FSE_MAX_SYMBOL_VALUE+1];
891
- CTable_max_t CTable;
1389
+ CTable_max_t ct;
892
1390
  size_t errorCode;
893
1391
 
894
- /* early out */
895
- if (dstSize < FSE_compressBound(srcSize)) return (size_t)-FSE_ERROR_dstSize_tooSmall;
896
- if (srcSize <= 1) return srcSize; /* Uncompressed or RLE */
1392
+ /* init conditions */
1393
+ if (srcSize <= 1) return 0; /* Uncompressible */
897
1394
  if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
898
1395
  if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
899
1396
 
900
1397
  /* Scan input and build symbol stats */
901
- errorCode = FSE_count (count, ip, srcSize, &maxSymbolValue);
1398
+ errorCode = FSE_count (count, &maxSymbolValue, ip, srcSize);
902
1399
  if (FSE_isError(errorCode)) return errorCode;
903
- if (errorCode == srcSize) return FSE_compressRLE (ostart, *istart);
904
- if (errorCode < ((srcSize * 7) >> 10)) return 0; /* Heuristic : not compressible enough */
1400
+ if (errorCode == srcSize) return 1;
1401
+ if (errorCode == 1) return 0; /* each symbol only present once */
1402
+ if (errorCode < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */
905
1403
 
906
1404
  tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
907
1405
  errorCode = FSE_normalizeCount (norm, tableLog, count, srcSize, maxSymbolValue);
908
1406
  if (FSE_isError(errorCode)) return errorCode;
909
1407
 
910
1408
  /* Write table description header */
911
- errorCode = FSE_writeHeader (op, FSE_MAX_HEADERSIZE, norm, maxSymbolValue, tableLog);
1409
+ errorCode = FSE_writeNCount (op, oend-op, norm, maxSymbolValue, tableLog);
912
1410
  if (FSE_isError(errorCode)) return errorCode;
913
1411
  op += errorCode;
914
1412
 
915
1413
  /* Compress */
916
- errorCode = FSE_buildCTable (&CTable, norm, maxSymbolValue, tableLog);
1414
+ errorCode = FSE_buildCTable (ct, norm, maxSymbolValue, tableLog);
917
1415
  if (FSE_isError(errorCode)) return errorCode;
918
- op += FSE_compress_usingCTable(op, oend - op, ip, srcSize, &CTable);
1416
+ errorCode = FSE_compress_usingCTable(op, oend - op, ip, srcSize, ct);
1417
+ if (errorCode == 0) return 0; /* not enough space for compressed data */
1418
+ op += errorCode;
919
1419
 
920
1420
  /* check compressibility */
921
1421
  if ( (size_t)(op-ostart) >= srcSize-1 )
@@ -924,7 +1424,6 @@ size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize
924
1424
  return op-ostart;
925
1425
  }
926
1426
 
927
-
928
1427
  size_t FSE_compress (void* dst, size_t dstSize, const void* src, size_t srcSize)
929
1428
  {
930
1429
  return FSE_compress2(dst, dstSize, src, (U32)srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
@@ -934,32 +1433,13 @@ size_t FSE_compress (void* dst, size_t dstSize, const void* src, size_t srcSize)
934
1433
  /*********************************************************
935
1434
  * Decompression (Byte symbols)
936
1435
  *********************************************************/
937
- typedef struct
938
- {
939
- U16 newState;
940
- BYTE symbol;
941
- BYTE nbBits;
942
- } FSE_decode_t; /* size == U32 */
943
-
944
- /* Specific corner case : RLE compression */
945
- size_t FSE_decompressRLE(void* dst, size_t originalSize,
946
- const void* cSrc, size_t cSrcSize)
1436
+ size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
947
1437
  {
948
- if (cSrcSize != 1) return (size_t)-FSE_ERROR_srcSize_wrong;
949
- memset(dst, *(BYTE*)cSrc, originalSize);
950
- return originalSize;
951
- }
1438
+ FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)dt;
1439
+ FSE_decode_t* const cell = (FSE_decode_t*)(dt + 1); /* because dt is unsigned */
952
1440
 
953
-
954
- size_t FSE_buildDTable_rle (void* DTable, BYTE symbolValue)
955
- {
956
- U32* const base32 = (U32*)DTable;
957
- FSE_decode_t* const cell = (FSE_decode_t*)(base32 + 1);
958
-
959
- /* Sanity check */
960
- if (((size_t)DTable) & 3) return (size_t)-FSE_ERROR_GENERIC; /* Must be allocated of 4 bytes boundaries */
961
-
962
- base32[0] = 0;
1441
+ DTableH->tableLog = 0;
1442
+ DTableH->fastMode = 0;
963
1443
 
964
1444
  cell->newState = 0;
965
1445
  cell->symbol = symbolValue;
@@ -969,10 +1449,10 @@ size_t FSE_buildDTable_rle (void* DTable, BYTE symbolValue)
969
1449
  }
970
1450
 
971
1451
 
972
- size_t FSE_buildDTable_raw (void* DTable, unsigned nbBits)
1452
+ size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
973
1453
  {
974
- U32* const base32 = (U32*)DTable;
975
- FSE_decode_t* dinfo = (FSE_decode_t*)(base32 + 1);
1454
+ FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)dt;
1455
+ FSE_decode_t* const dinfo = (FSE_decode_t*)(dt + 1); /* because dt is unsigned */
976
1456
  const unsigned tableSize = 1 << nbBits;
977
1457
  const unsigned tableMask = tableSize - 1;
978
1458
  const unsigned maxSymbolValue = tableMask;
@@ -980,10 +1460,10 @@ size_t FSE_buildDTable_raw (void* DTable, unsigned nbBits)
980
1460
 
981
1461
  /* Sanity checks */
982
1462
  if (nbBits < 1) return (size_t)-FSE_ERROR_GENERIC; /* min size */
983
- if (((size_t)DTable) & 3) return (size_t)-FSE_ERROR_GENERIC; /* Must be allocated of 4 bytes boundaries */
984
1463
 
985
1464
  /* Build Decoding Table */
986
- base32[0] = nbBits;
1465
+ DTableH->tableLog = (U16)nbBits;
1466
+ DTableH->fastMode = 1;
987
1467
  for (s=0; s<=maxSymbolValue; s++)
988
1468
  {
989
1469
  dinfo[s].newState = 0;
@@ -1005,95 +1485,127 @@ size_t FSE_initDStream(FSE_DStream_t* bitD, const void* srcBuffer, size_t srcSiz
1005
1485
  {
1006
1486
  if (srcSize < 1) return (size_t)-FSE_ERROR_srcSize_wrong;
1007
1487
 
1008
- if (srcSize >= sizeof(bitD_t))
1488
+ if (srcSize >= sizeof(size_t))
1009
1489
  {
1010
1490
  U32 contain32;
1011
- bitD->start = (char*)srcBuffer;
1012
- bitD->ptr = (char*)srcBuffer + srcSize - sizeof(bitD_t);
1491
+ bitD->start = (const char*)srcBuffer;
1492
+ bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(size_t);
1013
1493
  bitD->bitContainer = FSE_readLEST(bitD->ptr);
1014
- contain32 = ((BYTE*)srcBuffer)[srcSize-1];
1494
+ contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
1015
1495
  if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC; /* stop bit not present */
1016
1496
  bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
1017
1497
  }
1018
1498
  else
1019
1499
  {
1020
1500
  U32 contain32;
1021
- bitD->start = (char*)srcBuffer;
1501
+ bitD->start = (const char*)srcBuffer;
1022
1502
  bitD->ptr = bitD->start;
1023
- bitD->bitContainer = *(BYTE*)(bitD->start);
1503
+ bitD->bitContainer = *(const BYTE*)(bitD->start);
1024
1504
  switch(srcSize)
1025
1505
  {
1026
- case 7: bitD->bitContainer += (bitD_t)(((BYTE*)(bitD->start))[6]) << (sizeof(bitD_t)*8 - 16);
1027
- case 6: bitD->bitContainer += (bitD_t)(((BYTE*)(bitD->start))[5]) << (sizeof(bitD_t)*8 - 24);
1028
- case 5: bitD->bitContainer += (bitD_t)(((BYTE*)(bitD->start))[4]) << (sizeof(bitD_t)*8 - 32);
1029
- case 4: bitD->bitContainer += (bitD_t)(((BYTE*)(bitD->start))[3]) << 24;
1030
- case 3: bitD->bitContainer += (bitD_t)(((BYTE*)(bitD->start))[2]) << 16;
1031
- case 2: bitD->bitContainer += (bitD_t)(((BYTE*)(bitD->start))[1]) << 8;
1506
+ case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16);
1507
+ case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24);
1508
+ case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32);
1509
+ case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24;
1510
+ case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16;
1511
+ case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) << 8;
1032
1512
  default:;
1033
1513
  }
1034
- contain32 = ((BYTE*)srcBuffer)[srcSize-1];
1514
+ contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
1035
1515
  if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC; /* stop bit not present */
1036
1516
  bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
1037
- bitD->bitsConsumed += (U32)(sizeof(bitD_t) - srcSize)*8;
1517
+ bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8;
1038
1518
  }
1039
1519
 
1040
1520
  return srcSize;
1041
1521
  }
1042
1522
 
1043
1523
 
1524
+ /* FSE_lookBits
1525
+ * Provides next n bits from the bitContainer.
1526
+ * bitContainer is not modified (bits are still present for next read/look)
1527
+ * On 32-bits, maxNbBits==25
1528
+ * On 64-bits, maxNbBits==57
1529
+ * return : value extracted.
1530
+ */
1531
+ static size_t FSE_lookBits(FSE_DStream_t* bitD, U32 nbBits)
1532
+ {
1533
+ const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
1534
+ return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
1535
+ }
1536
+
1537
+ static size_t FSE_lookBitsFast(FSE_DStream_t* bitD, U32 nbBits) /* only if nbBits >= 1 !! */
1538
+ {
1539
+ const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
1540
+ return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
1541
+ }
1542
+
1543
+ static void FSE_skipBits(FSE_DStream_t* bitD, U32 nbBits)
1544
+ {
1545
+ bitD->bitsConsumed += nbBits;
1546
+ }
1547
+
1548
+
1044
1549
  /* FSE_readBits
1045
1550
  * Read next n bits from the bitContainer.
1046
- * Use the fast variant *only* if n > 0.
1047
- * Note : for this function to work properly on 32-bits, don't read more than maxNbBits==25
1551
+ * On 32-bits, don't read more than maxNbBits==25
1552
+ * On 64-bits, don't read more than maxNbBits==57
1553
+ * Use the fast variant *only* if n >= 1.
1048
1554
  * return : value extracted.
1049
1555
  */
1050
- bitD_t FSE_readBits(FSE_DStream_t* bitD, U32 nbBits)
1556
+ size_t FSE_readBits(FSE_DStream_t* bitD, U32 nbBits)
1051
1557
  {
1052
- bitD_t value = ((bitD->bitContainer << bitD->bitsConsumed) >> 1) >> (((sizeof(bitD_t)*8)-1)-nbBits);
1053
- bitD->bitsConsumed += nbBits;
1558
+ size_t value = FSE_lookBits(bitD, nbBits);
1559
+ FSE_skipBits(bitD, nbBits);
1054
1560
  return value;
1055
1561
  }
1056
1562
 
1057
- bitD_t FSE_readBitsFast(FSE_DStream_t* bitD, U32 nbBits) /* only if nbBits >= 1 */
1563
+ size_t FSE_readBitsFast(FSE_DStream_t* bitD, U32 nbBits) /* only if nbBits >= 1 !! */
1058
1564
  {
1059
- bitD_t value = (bitD->bitContainer << bitD->bitsConsumed) >> ((sizeof(bitD_t)*8)-nbBits);
1060
- bitD->bitsConsumed += nbBits;
1565
+ size_t value = FSE_lookBitsFast(bitD, nbBits);
1566
+ FSE_skipBits(bitD, nbBits);
1061
1567
  return value;
1062
1568
  }
1063
1569
 
1064
1570
  unsigned FSE_reloadDStream(FSE_DStream_t* bitD)
1065
1571
  {
1066
- if (bitD->ptr >= bitD->start + sizeof(bitD_t))
1572
+ if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should never happen */
1573
+ return FSE_DStream_tooFar;
1574
+
1575
+ if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer))
1067
1576
  {
1068
1577
  bitD->ptr -= bitD->bitsConsumed >> 3;
1069
1578
  bitD->bitsConsumed &= 7;
1070
1579
  bitD->bitContainer = FSE_readLEST(bitD->ptr);
1071
- return 0;
1580
+ return FSE_DStream_unfinished;
1072
1581
  }
1073
1582
  if (bitD->ptr == bitD->start)
1074
1583
  {
1075
- if (bitD->bitsConsumed < sizeof(bitD_t)*8) return 1;
1076
- if (bitD->bitsConsumed == sizeof(bitD_t)*8) return 2;
1077
- return 3;
1584
+ if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return FSE_DStream_endOfBuffer;
1585
+ return FSE_DStream_completed;
1078
1586
  }
1079
1587
  {
1080
1588
  U32 nbBytes = bitD->bitsConsumed >> 3;
1589
+ U32 result = FSE_DStream_unfinished;
1081
1590
  if (bitD->ptr - nbBytes < bitD->start)
1082
- nbBytes = (U32)(bitD->ptr - bitD->start); /* note : necessarily ptr > start */
1591
+ {
1592
+ nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */
1593
+ result = FSE_DStream_endOfBuffer;
1594
+ }
1083
1595
  bitD->ptr -= nbBytes;
1084
1596
  bitD->bitsConsumed -= nbBytes*8;
1085
- bitD->bitContainer = FSE_readLEST(bitD->ptr); /* note : necessarily srcSize > sizeof(bitD) */
1086
- return (bitD->ptr == bitD->start);
1597
+ bitD->bitContainer = FSE_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD) */
1598
+ return result;
1087
1599
  }
1088
1600
  }
1089
1601
 
1090
1602
 
1091
- void FSE_initDState(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD, const void* DTable)
1603
+ void FSE_initDState(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD, const FSE_DTable* dt)
1092
1604
  {
1093
- const U32* const base32 = (const U32*)DTable;
1094
- DStatePtr->state = FSE_readBits(bitD, base32[0]);
1605
+ const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)dt;
1606
+ DStatePtr->state = FSE_readBits(bitD, DTableH->tableLog);
1095
1607
  FSE_reloadDStream(bitD);
1096
- DStatePtr->table = base32 + 1;
1608
+ DStatePtr->table = dt + 1;
1097
1609
  }
1098
1610
 
1099
1611
  BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
@@ -1101,7 +1613,7 @@ BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
1101
1613
  const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
1102
1614
  const U32 nbBits = DInfo.nbBits;
1103
1615
  BYTE symbol = DInfo.symbol;
1104
- bitD_t lowBits = FSE_readBits(bitD, nbBits);
1616
+ size_t lowBits = FSE_readBits(bitD, nbBits);
1105
1617
 
1106
1618
  DStatePtr->state = DInfo.newState + lowBits;
1107
1619
  return symbol;
@@ -1112,7 +1624,7 @@ BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
1112
1624
  const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
1113
1625
  const U32 nbBits = DInfo.nbBits;
1114
1626
  BYTE symbol = DInfo.symbol;
1115
- bitD_t lowBits = FSE_readBitsFast(bitD, nbBits);
1627
+ size_t lowBits = FSE_readBitsFast(bitD, nbBits);
1116
1628
 
1117
1629
  DStatePtr->state = DInfo.newState + lowBits;
1118
1630
  return symbol;
@@ -1123,19 +1635,19 @@ BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
1123
1635
 
1124
1636
  unsigned FSE_endOfDStream(const FSE_DStream_t* bitD)
1125
1637
  {
1126
- return FSE_reloadDStream((FSE_DStream_t*)bitD)==2;
1638
+ return ((bitD->ptr == bitD->start) && (bitD->bitsConsumed == sizeof(bitD->bitContainer)*8));
1127
1639
  }
1128
1640
 
1129
- unsigned FSE_endOfDState(const FSE_DState_t* statePtr)
1641
+ unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
1130
1642
  {
1131
- return statePtr->state == 0;
1643
+ return DStatePtr->state == 0;
1132
1644
  }
1133
1645
 
1134
1646
 
1135
1647
  FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
1136
1648
  void* dst, size_t maxDstSize,
1137
1649
  const void* cSrc, size_t cSrcSize,
1138
- const void* DTable, unsigned fast)
1650
+ const FSE_DTable* dt, const unsigned fast)
1139
1651
  {
1140
1652
  BYTE* const ostart = (BYTE*) dst;
1141
1653
  BYTE* op = ostart;
@@ -1143,50 +1655,57 @@ FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
1143
1655
  BYTE* const olimit = omax-3;
1144
1656
 
1145
1657
  FSE_DStream_t bitD;
1146
- FSE_DState_t state1, state2;
1658
+ FSE_DState_t state1;
1659
+ FSE_DState_t state2;
1147
1660
  size_t errorCode;
1148
1661
 
1149
1662
  /* Init */
1150
1663
  errorCode = FSE_initDStream(&bitD, cSrc, cSrcSize); /* replaced last arg by maxCompressed Size */
1151
1664
  if (FSE_isError(errorCode)) return errorCode;
1152
1665
 
1153
- FSE_initDState(&state1, &bitD, DTable);
1154
- FSE_initDState(&state2, &bitD, DTable);
1666
+ FSE_initDState(&state1, &bitD, dt);
1667
+ FSE_initDState(&state2, &bitD, dt);
1155
1668
 
1669
+ #define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
1156
1670
 
1157
- /* 2 symbols per loop */
1158
- while (!FSE_reloadDStream(&bitD) && (op<olimit))
1671
+ /* 4 symbols per loop */
1672
+ for ( ; (FSE_reloadDStream(&bitD)==FSE_DStream_unfinished) && (op<olimit) ; op+=4)
1159
1673
  {
1160
- *op++ = fast ? FSE_decodeSymbolFast(&state1, &bitD) : FSE_decodeSymbol(&state1, &bitD);
1674
+ op[0] = FSE_GETSYMBOL(&state1);
1161
1675
 
1162
- if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD_t)*8) /* This test must be static */
1676
+ if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */
1163
1677
  FSE_reloadDStream(&bitD);
1164
1678
 
1165
- *op++ = fast ? FSE_decodeSymbolFast(&state2, &bitD) : FSE_decodeSymbol(&state2, &bitD);
1679
+ op[1] = FSE_GETSYMBOL(&state2);
1166
1680
 
1167
- if (FSE_MAX_TABLELOG*4+7 < sizeof(bitD_t)*8) /* This test must be static */
1168
- {
1169
- *op++ = fast ? FSE_decodeSymbolFast(&state1, &bitD) : FSE_decodeSymbol(&state1, &bitD);
1170
- *op++ = fast ? FSE_decodeSymbolFast(&state2, &bitD) : FSE_decodeSymbol(&state2, &bitD);
1171
- }
1681
+ if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */
1682
+ { if (FSE_reloadDStream(&bitD) > FSE_DStream_unfinished) { op+=2; break; } }
1683
+
1684
+ op[2] = FSE_GETSYMBOL(&state1);
1685
+
1686
+ if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */
1687
+ FSE_reloadDStream(&bitD);
1688
+
1689
+ op[3] = FSE_GETSYMBOL(&state2);
1172
1690
  }
1173
1691
 
1174
1692
  /* tail */
1693
+ /* note : FSE_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly FSE_DStream_completed */
1175
1694
  while (1)
1176
1695
  {
1177
- if ( (FSE_reloadDStream(&bitD)>2) || (op==omax) || (FSE_endOfDState(&state1) && FSE_endOfDStream(&bitD)) )
1696
+ if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) )
1178
1697
  break;
1179
1698
 
1180
- *op++ = fast ? FSE_decodeSymbolFast(&state1, &bitD) : FSE_decodeSymbol(&state1, &bitD);
1699
+ *op++ = FSE_GETSYMBOL(&state1);
1181
1700
 
1182
- if ( (FSE_reloadDStream(&bitD)>2) || (op==omax) || (FSE_endOfDState(&state2) && FSE_endOfDStream(&bitD)) )
1701
+ if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) )
1183
1702
  break;
1184
1703
 
1185
- *op++ = fast ? FSE_decodeSymbolFast(&state2, &bitD) : FSE_decodeSymbol(&state2, &bitD);
1704
+ *op++ = FSE_GETSYMBOL(&state2);
1186
1705
  }
1187
1706
 
1188
1707
  /* end ? */
1189
- if (FSE_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2) )
1708
+ if (FSE_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2))
1190
1709
  return op-ostart;
1191
1710
 
1192
1711
  if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */
@@ -1197,11 +1716,14 @@ FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
1197
1716
 
1198
1717
  size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
1199
1718
  const void* cSrc, size_t cSrcSize,
1200
- const void* DTable, size_t fastMode)
1719
+ const FSE_DTable* dt)
1201
1720
  {
1721
+ const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)dt;
1722
+ const U32 fastMode = DTableH->fastMode;
1723
+
1202
1724
  /* select fast mode (static) */
1203
- if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, DTable, 1);
1204
- return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, DTable, 0);
1725
+ if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
1726
+ return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
1205
1727
  }
1206
1728
 
1207
1729
 
@@ -1210,312 +1732,735 @@ size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSr
1210
1732
  const BYTE* const istart = (const BYTE*)cSrc;
1211
1733
  const BYTE* ip = istart;
1212
1734
  short counting[FSE_MAX_SYMBOL_VALUE+1];
1213
- FSE_decode_t DTable[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
1214
- unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
1735
+ DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */
1215
1736
  unsigned tableLog;
1216
- size_t errorCode, fastMode;
1737
+ unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
1738
+ size_t errorCode;
1217
1739
 
1218
1740
  if (cSrcSize<2) return (size_t)-FSE_ERROR_srcSize_wrong; /* too small input size */
1219
1741
 
1220
1742
  /* normal FSE decoding mode */
1221
- errorCode = FSE_readHeader (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
1743
+ errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
1222
1744
  if (FSE_isError(errorCode)) return errorCode;
1223
1745
  if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; /* too small input size */
1224
1746
  ip += errorCode;
1225
1747
  cSrcSize -= errorCode;
1226
1748
 
1227
- fastMode = FSE_buildDTable (DTable, counting, maxSymbolValue, tableLog);
1228
- if (FSE_isError(fastMode)) return fastMode;
1749
+ errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog);
1750
+ if (FSE_isError(errorCode)) return errorCode;
1229
1751
 
1230
1752
  /* always return, even if it is an error code */
1231
- return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, DTable, fastMode);
1753
+ return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt);
1232
1754
  }
1233
1755
 
1234
1756
 
1235
- #endif /* FSE_COMMONDEFS_ONLY */
1236
1757
 
1237
- /*
1238
- 2nd part of the file
1239
- designed to be included
1240
- for type-specific functions (template equivalent in C)
1241
- Objective is to write such functions only once, for better maintenance
1242
- */
1243
-
1244
- /* safety checks */
1245
- #ifndef FSE_FUNCTION_EXTENSION
1246
- # error "FSE_FUNCTION_EXTENSION must be defined"
1247
- #endif
1248
- #ifndef FSE_FUNCTION_TYPE
1249
- # error "FSE_FUNCTION_TYPE must be defined"
1758
+ /*********************************************************
1759
+ * Huff0 : Huffman block compression
1760
+ *********************************************************/
1761
+ #define HUF_MAX_SYMBOL_VALUE 255
1762
+ #define HUF_DEFAULT_TABLELOG 12 /* used by default, when not specified */
1763
+ #define HUF_MAX_TABLELOG 12 /* max possible tableLog; for allocation purpose; can be modified */
1764
+ #define HUF_ABSOLUTEMAX_TABLELOG 16 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
1765
+ #if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG)
1766
+ # error "HUF_MAX_TABLELOG is too large !"
1250
1767
  #endif
1251
1768
 
1252
- /* Function names */
1253
- #define FSE_CAT(X,Y) X##Y
1254
- #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
1255
- #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
1769
+ typedef struct HUF_CElt_s {
1770
+ U16 val;
1771
+ BYTE nbBits;
1772
+ } HUF_CElt ;
1256
1773
 
1774
+ typedef struct nodeElt_s {
1775
+ U32 count;
1776
+ U16 parent;
1777
+ BYTE byte;
1778
+ BYTE nbBits;
1779
+ } nodeElt;
1257
1780
 
1258
- /* Function templates */
1259
- size_t FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) (unsigned* count, const FSE_FUNCTION_TYPE* source, size_t sourceSize, unsigned* maxSymbolValuePtr, unsigned safe)
1781
+ /* HUF_writeCTable() :
1782
+ return : size of saved CTable */
1783
+ size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* tree, U32 maxSymbolValue, U32 huffLog)
1260
1784
  {
1261
- const FSE_FUNCTION_TYPE* ip = source;
1262
- const FSE_FUNCTION_TYPE* const iend = ip+sourceSize;
1263
- unsigned maxSymbolValue = *maxSymbolValuePtr;
1264
- unsigned max=0;
1265
- int s;
1785
+ BYTE bitsToWeight[HUF_ABSOLUTEMAX_TABLELOG + 1];
1786
+ BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1];
1787
+ U32 n;
1788
+ BYTE* op = (BYTE*)dst;
1789
+ size_t size;
1266
1790
 
1267
- U32 Counting1[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
1268
- U32 Counting2[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
1269
- U32 Counting3[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
1270
- U32 Counting4[FSE_MAX_SYMBOL_VALUE+1] = { 0 };
1791
+ /* check conditions */
1792
+ if (maxSymbolValue > HUF_MAX_SYMBOL_VALUE + 1)
1793
+ return (size_t)-FSE_ERROR_GENERIC;
1794
+
1795
+ /* convert to weight */
1796
+ bitsToWeight[0] = 0;
1797
+ for (n=1; n<=huffLog; n++)
1798
+ bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
1799
+ for (n=0; n<maxSymbolValue; n++)
1800
+ huffWeight[n] = bitsToWeight[tree[n].nbBits];
1801
+
1802
+ size = FSE_compress(op+1, maxDstSize-1, huffWeight, maxSymbolValue); /* don't need last symbol stat : implied */
1803
+ if (FSE_isError(size)) return size;
1804
+ if (size >= 128) return (size_t)-FSE_ERROR_GENERIC; /* should never happen, since maxSymbolValue <= 255 */
1805
+ if ((size <= 1) || (size >= maxSymbolValue/2))
1806
+ {
1807
+ if (size==1) /* RLE */
1808
+ {
1809
+ /* only possible case : serie of 1 (because there are at least 2) */
1810
+ /* can only be 2^n or (2^n-1), otherwise not an huffman tree */
1811
+ BYTE code;
1812
+ switch(maxSymbolValue)
1813
+ {
1814
+ case 1: code = 0; break;
1815
+ case 2: code = 1; break;
1816
+ case 3: code = 2; break;
1817
+ case 4: code = 3; break;
1818
+ case 7: code = 4; break;
1819
+ case 8: code = 5; break;
1820
+ case 15: code = 6; break;
1821
+ case 16: code = 7; break;
1822
+ case 31: code = 8; break;
1823
+ case 32: code = 9; break;
1824
+ case 63: code = 10; break;
1825
+ case 64: code = 11; break;
1826
+ case 127: code = 12; break;
1827
+ case 128: code = 13; break;
1828
+ default : return (size_t)-FSE_ERROR_corruptionDetected;
1829
+ }
1830
+ op[0] = (BYTE)(255-13 + code);
1831
+ return 1;
1832
+ }
1833
+ /* Not compressible */
1834
+ if (maxSymbolValue > (241-128)) return (size_t)-FSE_ERROR_GENERIC; /* not implemented (not possible with current format) */
1835
+ if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* not enough space within dst buffer */
1836
+ op[0] = (BYTE)(128 /*special case*/ + 0 /* Not Compressible */ + (maxSymbolValue-1));
1837
+ huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause issue in final combination */
1838
+ for (n=0; n<maxSymbolValue; n+=2)
1839
+ op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
1840
+ return ((maxSymbolValue+1)/2) + 1;
1841
+ }
1842
+
1843
+ /* normal header case */
1844
+ op[0] = (BYTE)size;
1845
+ return size+1;
1846
+ }
1847
+
1848
+
1849
+ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
1850
+ {
1851
+ int totalCost = 0;
1852
+ const U32 largestBits = huffNode[lastNonNull].nbBits;
1853
+
1854
+ /* early exit : all is fine */
1855
+ if (largestBits <= maxNbBits) return largestBits;
1856
+
1857
+ // now we have a few too large elements (at least >= 2)
1858
+ {
1859
+ const U32 baseCost = 1 << (largestBits - maxNbBits);
1860
+ U32 n = lastNonNull;
1861
+
1862
+ while (huffNode[n].nbBits > maxNbBits)
1863
+ {
1864
+ totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
1865
+ huffNode[n].nbBits = (BYTE)maxNbBits;
1866
+ n --;
1867
+ }
1868
+
1869
+ /* renorm totalCost */
1870
+ totalCost >>= (largestBits - maxNbBits); /* note : totalCost necessarily multiple of baseCost */
1871
+
1872
+ // repay cost
1873
+ while (huffNode[n].nbBits == maxNbBits) n--; // n at last of rank (maxNbBits-1)
1874
+
1875
+ {
1876
+ const U32 noOne = 0xF0F0F0F0;
1877
+ // Get pos of last (smallest) symbol per rank
1878
+ U32 rankLast[HUF_MAX_TABLELOG];
1879
+ U32 currentNbBits = maxNbBits;
1880
+ int pos;
1881
+ memset(rankLast, 0xF0, sizeof(rankLast));
1882
+ for (pos=n ; pos >= 0; pos--)
1883
+ {
1884
+ if (huffNode[pos].nbBits >= currentNbBits) continue;
1885
+ currentNbBits = huffNode[pos].nbBits;
1886
+ rankLast[maxNbBits-currentNbBits] = pos;
1887
+ }
1888
+
1889
+ while (totalCost > 0)
1890
+ {
1891
+ U32 nBitsToDecrease = FSE_highbit32(totalCost) + 1;
1892
+ for ( ; nBitsToDecrease > 1; nBitsToDecrease--)
1893
+ {
1894
+ U32 highPos = rankLast[nBitsToDecrease];
1895
+ U32 lowPos = rankLast[nBitsToDecrease-1];
1896
+ if (highPos == noOne) continue;
1897
+ if (lowPos == noOne) break;
1898
+ {
1899
+ U32 highTotal = huffNode[highPos].count;
1900
+ U32 lowTotal = 2 * huffNode[lowPos].count;
1901
+ if (highTotal <= lowTotal) break;
1902
+ }
1903
+ }
1904
+ while (rankLast[nBitsToDecrease] == noOne)
1905
+ nBitsToDecrease ++; // In some rare cases, no more rank 1 left => overshoot to closest
1906
+ totalCost -= 1 << (nBitsToDecrease-1);
1907
+ if (rankLast[nBitsToDecrease-1] == noOne)
1908
+ rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; // now there is one elt
1909
+ huffNode[rankLast[nBitsToDecrease]].nbBits ++;
1910
+ if (rankLast[nBitsToDecrease] == 0)
1911
+ rankLast[nBitsToDecrease] = noOne;
1912
+ else
1913
+ {
1914
+ rankLast[nBitsToDecrease]--;
1915
+ if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease)
1916
+ rankLast[nBitsToDecrease] = noOne; // rank list emptied
1917
+ }
1918
+ }
1919
+
1920
+ while (totalCost < 0) /* Sometimes, cost correction overshoot */
1921
+ {
1922
+ if (rankLast[1] == noOne) /* special case, no weight 1, let's find it back at n */
1923
+ {
1924
+ while (huffNode[n].nbBits == maxNbBits) n--;
1925
+ huffNode[n+1].nbBits--;
1926
+ rankLast[1] = n+1;
1927
+ totalCost++;
1928
+ continue;
1929
+ }
1930
+ huffNode[ rankLast[1] + 1 ].nbBits--;
1931
+ rankLast[1]++;
1932
+ totalCost ++;
1933
+ }
1934
+ }
1935
+ }
1936
+
1937
+ return maxNbBits;
1938
+ }
1939
+
1940
+
1941
+ typedef struct {
1942
+ U32 base;
1943
+ U32 current;
1944
+ } rankPos;
1945
+
1946
+ static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue)
1947
+ {
1948
+ rankPos rank[32];
1949
+ U32 n;
1271
1950
 
1272
- /* safety checks */
1273
- if (!sourceSize)
1951
+ memset(rank, 0, sizeof(rank));
1952
+ for (n=0; n<=maxSymbolValue; n++)
1274
1953
  {
1275
- memset(count, 0, (maxSymbolValue + 1) * sizeof(FSE_FUNCTION_TYPE));
1276
- *maxSymbolValuePtr = 0;
1277
- return 0;
1954
+ U32 r = FSE_highbit32(count[n] + 1);
1955
+ rank[r].base ++;
1278
1956
  }
1279
- if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_GENERIC; /* maxSymbolValue too large : unsupported */
1280
- if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE; /* 0 == default */
1281
-
1282
- if ((safe) || (sizeof(FSE_FUNCTION_TYPE)>1))
1957
+ for (n=30; n>0; n--) rank[n-1].base += rank[n].base;
1958
+ for (n=0; n<32; n++) rank[n].current = rank[n].base;
1959
+ for (n=0; n<=maxSymbolValue; n++)
1283
1960
  {
1284
- /* check input values, to avoid count table overflow */
1285
- while (ip < iend-3)
1286
- {
1287
- if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting1[*ip++]++;
1288
- if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting2[*ip++]++;
1289
- if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting3[*ip++]++;
1290
- if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting4[*ip++]++;
1291
- }
1961
+ U32 c = count[n];
1962
+ U32 r = FSE_highbit32(c+1) + 1;
1963
+ U32 pos = rank[r].current++;
1964
+ while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) huffNode[pos]=huffNode[pos-1], pos--;
1965
+ huffNode[pos].count = c;
1966
+ huffNode[pos].byte = (BYTE)n;
1292
1967
  }
1293
- else
1968
+ }
1969
+
1970
+
1971
+ #define STARTNODE (HUF_MAX_SYMBOL_VALUE+1)
1972
+ size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits)
1973
+ {
1974
+ nodeElt huffNode0[2*HUF_MAX_SYMBOL_VALUE+1 +1];
1975
+ nodeElt* huffNode = huffNode0 + 1;
1976
+ U32 n, nonNullRank;
1977
+ int lowS, lowN;
1978
+ U16 nodeNb = STARTNODE;
1979
+ U32 nodeRoot;
1980
+
1981
+ /* safety checks */
1982
+ if (maxNbBits == 0) maxNbBits = HUF_DEFAULT_TABLELOG;
1983
+ if (maxSymbolValue > HUF_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_GENERIC;
1984
+ memset(huffNode0, 0, sizeof(huffNode0));
1985
+
1986
+ // sort, decreasing order
1987
+ HUF_sort(huffNode, count, maxSymbolValue);
1988
+
1989
+ // init for parents
1990
+ nonNullRank = maxSymbolValue;
1991
+ while(huffNode[nonNullRank].count == 0) nonNullRank--;
1992
+ lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb;
1993
+ huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count;
1994
+ huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb;
1995
+ nodeNb++; lowS-=2;
1996
+ for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30);
1997
+ huffNode0[0].count = (U32)(1U<<31);
1998
+
1999
+ // create parents
2000
+ while (nodeNb <= nodeRoot)
1294
2001
  {
1295
- U32 cached = FSE_read32(ip); ip += 4;
1296
- while (ip < iend-15)
1297
- {
1298
- U32 c = cached; cached = FSE_read32(ip); ip += 4;
1299
- Counting1[(BYTE) c ]++;
1300
- Counting2[(BYTE)(c>>8) ]++;
1301
- Counting3[(BYTE)(c>>16)]++;
1302
- Counting4[ c>>24 ]++;
1303
- c = cached; cached = FSE_read32(ip); ip += 4;
1304
- Counting1[(BYTE) c ]++;
1305
- Counting2[(BYTE)(c>>8) ]++;
1306
- Counting3[(BYTE)(c>>16)]++;
1307
- Counting4[ c>>24 ]++;
1308
- c = cached; cached = FSE_read32(ip); ip += 4;
1309
- Counting1[(BYTE) c ]++;
1310
- Counting2[(BYTE)(c>>8) ]++;
1311
- Counting3[(BYTE)(c>>16)]++;
1312
- Counting4[ c>>24 ]++;
1313
- c = cached; cached = FSE_read32(ip); ip += 4;
1314
- Counting1[(BYTE) c ]++;
1315
- Counting2[(BYTE)(c>>8) ]++;
1316
- Counting3[(BYTE)(c>>16)]++;
1317
- Counting4[ c>>24 ]++;
1318
- }
1319
- ip-=4;
2002
+ U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
2003
+ U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
2004
+ huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count;
2005
+ huffNode[n1].parent = huffNode[n2].parent = nodeNb;
2006
+ nodeNb++;
1320
2007
  }
1321
2008
 
1322
- /* finish last symbols */
1323
- while (ip<iend) { if ((safe) && (*ip>maxSymbolValue)) return (size_t)-FSE_ERROR_GENERIC; Counting1[*ip++]++; }
2009
+ // distribute weights (unlimited tree height)
2010
+ huffNode[nodeRoot].nbBits = 0;
2011
+ for (n=nodeRoot-1; n>=STARTNODE; n--)
2012
+ huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
2013
+ for (n=0; n<=nonNullRank; n++)
2014
+ huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
1324
2015
 
1325
- for (s=0; s<=(int)maxSymbolValue; s++)
2016
+ // enforce maxTableLog
2017
+ maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits);
2018
+
2019
+ // fill result into tree (val, nbBits)
1326
2020
  {
1327
- count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
1328
- if (count[s] > max) max = count[s];
2021
+ U16 nbPerRank[HUF_ABSOLUTEMAX_TABLELOG+1] = {0};
2022
+ U16 valPerRank[HUF_ABSOLUTEMAX_TABLELOG+1];
2023
+ if (maxNbBits > HUF_ABSOLUTEMAX_TABLELOG) return (size_t)-FSE_ERROR_GENERIC; // check
2024
+ for (n=0; n<=nonNullRank; n++)
2025
+ nbPerRank[huffNode[n].nbBits]++;
2026
+ {
2027
+ // determine stating value per rank
2028
+ U16 min = 0;
2029
+ for (n=maxNbBits; n>0; n--)
2030
+ {
2031
+ valPerRank[n] = min; // get starting value within each rank
2032
+ min += nbPerRank[n];
2033
+ min >>= 1;
2034
+ }
2035
+ }
2036
+ for (n=0; n<=maxSymbolValue; n++)
2037
+ tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; // push nbBits per symbol, symbol order
2038
+ for (n=0; n<=maxSymbolValue; n++)
2039
+ tree[n].val = valPerRank[tree[n].nbBits]++; // assign value within rank, symbol order
1329
2040
  }
1330
2041
 
1331
- while (!count[maxSymbolValue]) maxSymbolValue--;
1332
- *maxSymbolValuePtr = maxSymbolValue;
1333
- return (int)max;
2042
+ return maxNbBits;
1334
2043
  }
1335
2044
 
1336
- /* hidden fast variant (unsafe) */
1337
- size_t FSE_FUNCTION_NAME(FSE_countFast, FSE_FUNCTION_EXTENSION) (unsigned* count, const FSE_FUNCTION_TYPE* source, size_t sourceSize, unsigned* maxSymbolValuePtr)
2045
+ static void HUF_encodeSymbol(FSE_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
1338
2046
  {
1339
- return FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) (count, source, sourceSize, maxSymbolValuePtr, 0);
2047
+ FSE_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
1340
2048
  }
1341
2049
 
1342
- size_t FSE_FUNCTION_NAME(FSE_count, FSE_FUNCTION_EXTENSION) (unsigned* count, const FSE_FUNCTION_TYPE* source, size_t sourceSize, unsigned* maxSymbolValuePtr)
2050
+ #define FSE_FLUSHBITS_1(stream) \
2051
+ if (sizeof((stream)->bitContainer)*8 < HUF_MAX_TABLELOG*2+7) FSE_FLUSHBITS(stream)
2052
+
2053
+ #define FSE_FLUSHBITS_2(stream) \
2054
+ if (sizeof((stream)->bitContainer)*8 < HUF_MAX_TABLELOG*4+7) FSE_FLUSHBITS(stream)
2055
+
2056
+ size_t HUF_compress_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, HUF_CElt* CTable)
1343
2057
  {
1344
- if ((sizeof(FSE_FUNCTION_TYPE)==1) && (*maxSymbolValuePtr >= 255))
2058
+ const BYTE* ip = (const BYTE*) src;
2059
+ BYTE* const ostart = (BYTE*)dst;
2060
+ BYTE* op = (BYTE*) ostart;
2061
+ BYTE* const oend = ostart + dstSize;
2062
+ U16* jumpTable = (U16*) dst;
2063
+ size_t n, streamSize;
2064
+ const unsigned fast = (dstSize >= HUF_BLOCKBOUND(srcSize));
2065
+ size_t errorCode;
2066
+ FSE_CStream_t bitC;
2067
+
2068
+ /* init */
2069
+ if (dstSize < 8) return 0;
2070
+ op += 6; /* jump Table -- could be optimized by delta / deviation */
2071
+ errorCode = FSE_initCStream(&bitC, op, oend-op);
2072
+ if (FSE_isError(errorCode)) return 0;
2073
+
2074
+ n = srcSize & ~15; // mod 16
2075
+ switch (srcSize & 15)
1345
2076
  {
1346
- *maxSymbolValuePtr = 255;
1347
- return FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) (count, source, sourceSize, maxSymbolValuePtr, 0);
2077
+ case 15: HUF_encodeSymbol(&bitC, ip[n+14], CTable);
2078
+ FSE_FLUSHBITS_1(&bitC);
2079
+ case 14: HUF_encodeSymbol(&bitC, ip[n+13], CTable);
2080
+ FSE_FLUSHBITS_2(&bitC);
2081
+ case 13: HUF_encodeSymbol(&bitC, ip[n+12], CTable);
2082
+ FSE_FLUSHBITS_1(&bitC);
2083
+ case 12: HUF_encodeSymbol(&bitC, ip[n+11], CTable);
2084
+ FSE_FLUSHBITS(&bitC);
2085
+ case 11: HUF_encodeSymbol(&bitC, ip[n+10], CTable);
2086
+ FSE_FLUSHBITS_1(&bitC);
2087
+ case 10: HUF_encodeSymbol(&bitC, ip[n+ 9], CTable);
2088
+ FSE_FLUSHBITS_2(&bitC);
2089
+ case 9 : HUF_encodeSymbol(&bitC, ip[n+ 8], CTable);
2090
+ FSE_FLUSHBITS_1(&bitC);
2091
+ case 8 : HUF_encodeSymbol(&bitC, ip[n+ 7], CTable);
2092
+ FSE_FLUSHBITS(&bitC);
2093
+ case 7 : HUF_encodeSymbol(&bitC, ip[n+ 6], CTable);
2094
+ FSE_FLUSHBITS_1(&bitC);
2095
+ case 6 : HUF_encodeSymbol(&bitC, ip[n+ 5], CTable);
2096
+ FSE_FLUSHBITS_2(&bitC);
2097
+ case 5 : HUF_encodeSymbol(&bitC, ip[n+ 4], CTable);
2098
+ FSE_FLUSHBITS_1(&bitC);
2099
+ case 4 : HUF_encodeSymbol(&bitC, ip[n+ 3], CTable);
2100
+ FSE_FLUSHBITS(&bitC);
2101
+ case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable);
2102
+ FSE_FLUSHBITS_2(&bitC);
2103
+ case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable);
2104
+ FSE_FLUSHBITS_1(&bitC);
2105
+ case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable);
2106
+ FSE_FLUSHBITS(&bitC);
2107
+ case 0 :
2108
+ default: ;
1348
2109
  }
1349
- return FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) (count, source, sourceSize, maxSymbolValuePtr, 1);
1350
- }
1351
2110
 
2111
+ for (; n>0; n-=16)
2112
+ {
2113
+ HUF_encodeSymbol(&bitC, ip[n- 4], CTable);
2114
+ FSE_FLUSHBITS_1(&bitC);
2115
+ HUF_encodeSymbol(&bitC, ip[n- 8], CTable);
2116
+ FSE_FLUSHBITS_2(&bitC);
2117
+ HUF_encodeSymbol(&bitC, ip[n-12], CTable);
2118
+ FSE_FLUSHBITS_1(&bitC);
2119
+ HUF_encodeSymbol(&bitC, ip[n-16], CTable);
2120
+ FSE_FLUSHBITS(&bitC);
2121
+ }
2122
+ streamSize = FSE_closeCStream(&bitC);
2123
+ if (streamSize==0) return 0; /* not enough space within dst buffer == uncompressible */
2124
+ FSE_writeLE16(jumpTable, (U16)streamSize);
2125
+ op += streamSize;
2126
+
2127
+ errorCode = FSE_initCStream(&bitC, op, oend-op);
2128
+ if (FSE_isError(errorCode)) return 0;
2129
+ n = srcSize & ~15; // mod 16
2130
+ for (; n>0; n-=16)
2131
+ {
2132
+ HUF_encodeSymbol(&bitC, ip[n- 3], CTable);
2133
+ FSE_FLUSHBITS_1(&bitC);
2134
+ HUF_encodeSymbol(&bitC, ip[n- 7], CTable);
2135
+ FSE_FLUSHBITS_2(&bitC);
2136
+ HUF_encodeSymbol(&bitC, ip[n-11], CTable);
2137
+ FSE_FLUSHBITS_1(&bitC);
2138
+ HUF_encodeSymbol(&bitC, ip[n-15], CTable);
2139
+ FSE_FLUSHBITS(&bitC);
2140
+ }
2141
+ streamSize = FSE_closeCStream(&bitC);
2142
+ if (streamSize==0) return 0; /* not enough space within dst buffer == uncompressible */
2143
+ FSE_writeLE16(jumpTable+1, (U16)streamSize);
2144
+ op += streamSize;
2145
+
2146
+ errorCode = FSE_initCStream(&bitC, op, oend-op);
2147
+ if (FSE_isError(errorCode)) return 0;
2148
+ n = srcSize & ~15; // mod 16
2149
+ for (; n>0; n-=16)
2150
+ {
2151
+ HUF_encodeSymbol(&bitC, ip[n- 2], CTable);
2152
+ FSE_FLUSHBITS_1(&bitC);
2153
+ HUF_encodeSymbol(&bitC, ip[n- 6], CTable);
2154
+ FSE_FLUSHBITS_2(&bitC);
2155
+ HUF_encodeSymbol(&bitC, ip[n-10], CTable);
2156
+ FSE_FLUSHBITS_1(&bitC);
2157
+ HUF_encodeSymbol(&bitC, ip[n-14], CTable);
2158
+ FSE_FLUSHBITS(&bitC);
2159
+ }
2160
+ streamSize = FSE_closeCStream(&bitC);
2161
+ if (streamSize==0) return 0; /* not enough space within dst buffer == uncompressible */
2162
+ FSE_writeLE16(jumpTable+2, (U16)streamSize);
2163
+ op += streamSize;
2164
+
2165
+ errorCode = FSE_initCStream(&bitC, op, oend-op);
2166
+ if (FSE_isError(errorCode)) return 0;
2167
+ n = srcSize & ~15; // mod 16
2168
+ for (; n>0; n-=16)
2169
+ {
2170
+ HUF_encodeSymbol(&bitC, ip[n- 1], CTable);
2171
+ FSE_FLUSHBITS_1(&bitC);
2172
+ HUF_encodeSymbol(&bitC, ip[n- 5], CTable);
2173
+ FSE_FLUSHBITS_2(&bitC);
2174
+ HUF_encodeSymbol(&bitC, ip[n- 9], CTable);
2175
+ FSE_FLUSHBITS_1(&bitC);
2176
+ HUF_encodeSymbol(&bitC, ip[n-13], CTable);
2177
+ FSE_FLUSHBITS(&bitC);
2178
+ }
2179
+ streamSize = FSE_closeCStream(&bitC);
2180
+ if (streamSize==0) return 0; /* not enough space within dst buffer == uncompressible */
2181
+ op += streamSize;
2182
+
2183
+ return op-ostart;
2184
+ }
1352
2185
 
1353
- static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; }
1354
2186
 
1355
- size_t FSE_FUNCTION_NAME(FSE_buildCTable, FSE_FUNCTION_EXTENSION)
1356
- (void* CTable, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
2187
+ size_t HUF_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog)
1357
2188
  {
1358
- const unsigned tableSize = 1 << tableLog;
1359
- const unsigned tableMask = tableSize - 1;
1360
- U16* tableU16 = ( (U16*) CTable) + 2;
1361
- FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) (((U32*)CTable) + 1 + (tableLog ? tableSize>>1 : 1) );
1362
- const unsigned step = FSE_tableStep(tableSize);
1363
- unsigned cumul[FSE_MAX_SYMBOL_VALUE+2];
1364
- U32 position = 0;
1365
- FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE];
1366
- U32 highThreshold = tableSize-1;
1367
- unsigned symbol;
1368
- unsigned i;
2189
+ BYTE* const ostart = (BYTE*)dst;
2190
+ BYTE* op = ostart;
2191
+ BYTE* const oend = ostart + dstSize;
1369
2192
 
1370
- /* safety checks */
1371
- if (((size_t)CTable) & 3) return (size_t)-FSE_ERROR_GENERIC; /* Must be allocated of 4 bytes boundaries */
2193
+ U32 count[HUF_MAX_SYMBOL_VALUE+1];
2194
+ HUF_CElt CTable[HUF_MAX_SYMBOL_VALUE+1];
2195
+ size_t errorCode;
1372
2196
 
1373
- /* header */
1374
- tableU16[-2] = (U16) tableLog;
1375
- tableU16[-1] = (U16) maxSymbolValue;
2197
+ /* early out */
2198
+ if (srcSize <= 1) return srcSize; /* Uncompressed or RLE */
2199
+ if (!maxSymbolValue) maxSymbolValue = HUF_MAX_SYMBOL_VALUE;
2200
+ if (!huffLog) huffLog = HUF_DEFAULT_TABLELOG;
1376
2201
 
1377
- /* For explanations on how to distribute symbol values over the table :
1378
- * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
2202
+ /* Scan input and build symbol stats */
2203
+ errorCode = FSE_count (count, &maxSymbolValue, (const BYTE*)src, srcSize);
2204
+ if (FSE_isError(errorCode)) return errorCode;
2205
+ if (errorCode == srcSize) return 1;
2206
+ if (errorCode < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */
1379
2207
 
1380
- /* symbol start positions */
1381
- cumul[0] = 0;
1382
- for (i=1; i<=maxSymbolValue+1; i++)
2208
+ /* Build Huffman Tree */
2209
+ errorCode = HUF_buildCTable (CTable, count, maxSymbolValue, huffLog);
2210
+ if (FSE_isError(errorCode)) return errorCode;
2211
+ huffLog = (U32)errorCode;
2212
+
2213
+ /* Write table description header */
2214
+ errorCode = HUF_writeCTable (op, dstSize, CTable, maxSymbolValue, huffLog); /* don't write last symbol, implied */
2215
+ if (FSE_isError(errorCode)) return errorCode;
2216
+ op += errorCode;
2217
+
2218
+ /* Compress */
2219
+ errorCode = HUF_compress_usingCTable(op, oend - op, src, srcSize, CTable);
2220
+ if (FSE_isError(errorCode)) return errorCode;
2221
+ if (errorCode==0) return 0;
2222
+ op += errorCode;
2223
+
2224
+ /* check compressibility */
2225
+ if ((size_t)(op-ostart) >= srcSize-1)
2226
+ return op-ostart;
2227
+
2228
+ return op-ostart;
2229
+ }
2230
+
2231
+ size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
2232
+ {
2233
+ return HUF_compress2(dst, maxDstSize, src, (U32)srcSize, 255, HUF_DEFAULT_TABLELOG);
2234
+ }
2235
+
2236
+
2237
+ /*********************************************************
2238
+ * Huff0 : Huffman block decompression
2239
+ *********************************************************/
2240
+ typedef struct {
2241
+ BYTE byte;
2242
+ BYTE nbBits;
2243
+ } HUF_DElt;
2244
+
2245
+ size_t HUF_readDTable (U16* DTable, const void* src, size_t srcSize)
2246
+ {
2247
+ BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1];
2248
+ U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; /* large enough for values from 0 to 16 */
2249
+ U32 weightTotal;
2250
+ U32 maxBits;
2251
+ const BYTE* ip = (const BYTE*) src;
2252
+ size_t iSize = ip[0];
2253
+ size_t oSize;
2254
+ U32 n;
2255
+ U32 nextRankStart;
2256
+ HUF_DElt* const dt = (HUF_DElt*)(DTable + 1);
2257
+
2258
+ FSE_STATIC_ASSERT(sizeof(HUF_DElt) == sizeof(U16)); /* if compilation fails here, assertion is false */
2259
+ //memset(huffWeight, 0, sizeof(huffWeight)); /* should not be necessary, but some analyzer complain ... */
2260
+ if (iSize >= 128) /* special header */
1383
2261
  {
1384
- if (normalizedCounter[i-1]==-1) /* Low prob symbol */
2262
+ if (iSize >= (242)) /* RLE */
1385
2263
  {
1386
- cumul[i] = cumul[i-1] + 1;
1387
- tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(i-1);
2264
+ static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 };
2265
+ oSize = l[iSize-242];
2266
+ memset(huffWeight, 1, oSize);
2267
+ iSize = 0;
2268
+ }
2269
+ else /* Incompressible */
2270
+ {
2271
+ oSize = iSize - 127;
2272
+ iSize = ((oSize+1)/2);
2273
+ if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
2274
+ ip += 1;
2275
+ for (n=0; n<oSize; n+=2)
2276
+ {
2277
+ huffWeight[n] = ip[n/2] >> 4;
2278
+ huffWeight[n+1] = ip[n/2] & 15;
2279
+ }
1388
2280
  }
1389
- else
1390
- cumul[i] = cumul[i-1] + normalizedCounter[i-1];
1391
2281
  }
1392
- cumul[maxSymbolValue+1] = tableSize+1;
2282
+ else /* header compressed with FSE (normal case) */
2283
+ {
2284
+ if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
2285
+ oSize = FSE_decompress(huffWeight, HUF_MAX_SYMBOL_VALUE, ip+1, iSize); /* max 255 values decoded, last one is implied */
2286
+ if (FSE_isError(oSize)) return oSize;
2287
+ }
1393
2288
 
1394
- /* Spread symbols */
1395
- for (symbol=0; symbol<=maxSymbolValue; symbol++)
2289
+ /* collect weight stats */
2290
+ memset(rankVal, 0, sizeof(rankVal));
2291
+ weightTotal = 0;
2292
+ for (n=0; n<oSize; n++)
1396
2293
  {
1397
- int nbOccurences;
1398
- for (nbOccurences=0; nbOccurences<normalizedCounter[symbol]; nbOccurences++)
1399
- {
1400
- tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
1401
- position = (position + step) & tableMask;
1402
- while (position > highThreshold) position = (position + step) & tableMask; /* Lowprob area */
1403
- }
2294
+ if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return (size_t)-FSE_ERROR_corruptionDetected;
2295
+ rankVal[huffWeight[n]]++;
2296
+ weightTotal += (1 << huffWeight[n]) >> 1;
1404
2297
  }
1405
2298
 
1406
- if (position!=0) return (size_t)-FSE_ERROR_GENERIC; /* Must have gone through all positions */
2299
+ /* get last non-null symbol weight (implied, total must be 2^n) */
2300
+ maxBits = FSE_highbit32(weightTotal) + 1;
2301
+ if (maxBits > DTable[0]) return (size_t)-FSE_ERROR_tableLog_tooLarge; /* DTable is too small */
2302
+ DTable[0] = (U16)maxBits;
2303
+ {
2304
+ U32 total = 1 << maxBits;
2305
+ U32 rest = total - weightTotal;
2306
+ U32 verif = 1 << FSE_highbit32(rest);
2307
+ U32 lastWeight = FSE_highbit32(rest) + 1;
2308
+ if (verif != rest) return (size_t)-FSE_ERROR_corruptionDetected; /* last value must be a clean power of 2 */
2309
+ huffWeight[oSize] = (BYTE)lastWeight;
2310
+ rankVal[lastWeight]++;
2311
+ }
1407
2312
 
1408
- /* Build table */
1409
- for (i=0; i<tableSize; i++)
2313
+ /* check tree construction validity */
2314
+ if ((rankVal[1] < 2) || (rankVal[1] & 1)) return (size_t)-FSE_ERROR_corruptionDetected; /* by construction : at least 2 elts of rank 1, must be even */
2315
+
2316
+ /* Prepare ranks */
2317
+ nextRankStart = 0;
2318
+ for (n=1; n<=maxBits; n++)
1410
2319
  {
1411
- FSE_FUNCTION_TYPE s = tableSymbol[i];
1412
- tableU16[cumul[s]++] = (U16) (tableSize+i); // Table U16 : sorted by symbol order; gives next state value
2320
+ U32 current = nextRankStart;
2321
+ nextRankStart += (rankVal[n] << (n-1));
2322
+ rankVal[n] = current;
1413
2323
  }
1414
2324
 
1415
- // Build Symbol Transformation Table
2325
+ /* fill DTable */
2326
+ for (n=0; n<=oSize; n++)
1416
2327
  {
1417
- unsigned s;
1418
- unsigned total = 0;
1419
- for (s=0; s<=maxSymbolValue; s++)
1420
- {
1421
- switch (normalizedCounter[s])
1422
- {
1423
- case 0:
1424
- break;
1425
- case -1:
1426
- case 1:
1427
- symbolTT[s].minBitsOut = (BYTE)tableLog;
1428
- symbolTT[s].deltaFindState = total - 1;
1429
- total ++;
1430
- symbolTT[s].maxState = (U16)( (tableSize*2) - 1); /* ensures state <= maxState */
1431
- break;
1432
- default :
1433
- symbolTT[s].minBitsOut = (BYTE)( (tableLog-1) - FSE_highbit32 (normalizedCounter[s]-1) );
1434
- symbolTT[s].deltaFindState = total - normalizedCounter[s];
1435
- total += normalizedCounter[s];
1436
- symbolTT[s].maxState = (U16)( (normalizedCounter[s] << (symbolTT[s].minBitsOut+1)) - 1);
1437
- }
1438
- }
2328
+ const U32 w = huffWeight[n];
2329
+ const U32 length = (1 << w) >> 1;
2330
+ U32 i;
2331
+ HUF_DElt D;
2332
+ D.byte = (BYTE)n; D.nbBits = (BYTE)(maxBits + 1 - w);
2333
+ for (i = rankVal[w]; i < rankVal[w] + length; i++)
2334
+ dt[i] = D;
2335
+ rankVal[w] += length;
1439
2336
  }
1440
2337
 
1441
- return 0;
2338
+ return iSize+1;
1442
2339
  }
1443
2340
 
1444
2341
 
1445
- #define FSE_DECODE_TYPE FSE_TYPE_NAME(FSE_decode_t, FSE_FUNCTION_EXTENSION)
1446
-
1447
- void* FSE_FUNCTION_NAME(FSE_createDTable, FSE_FUNCTION_EXTENSION) (unsigned tableLog)
2342
+ static BYTE HUF_decodeSymbol(FSE_DStream_t* Dstream, const HUF_DElt* dt, const U32 dtLog)
1448
2343
  {
1449
- if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
1450
- return malloc( ((size_t)1<<tableLog) * sizeof (FSE_DECODE_TYPE) );
2344
+ const size_t val = FSE_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
2345
+ const BYTE c = dt[val].byte;
2346
+ FSE_skipBits(Dstream, dt[val].nbBits);
2347
+ return c;
1451
2348
  }
1452
2349
 
1453
- void FSE_FUNCTION_NAME(FSE_freeDTable, FSE_FUNCTION_EXTENSION) (void* DTable)
2350
+ static size_t HUF_decompress_usingDTable( /* -3% slower when non static */
2351
+ void* dst, size_t maxDstSize,
2352
+ const void* cSrc, size_t cSrcSize,
2353
+ const U16* DTable)
1454
2354
  {
1455
- free(DTable);
1456
- }
2355
+ BYTE* const ostart = (BYTE*) dst;
2356
+ BYTE* op = ostart;
2357
+ BYTE* const omax = op + maxDstSize;
2358
+ BYTE* const olimit = omax-15;
1457
2359
 
2360
+ const HUF_DElt* const dt = (const HUF_DElt*)(DTable+1);
2361
+ const U32 dtLog = DTable[0];
2362
+ size_t errorCode;
2363
+ U32 reloadStatus;
1458
2364
 
1459
- size_t FSE_FUNCTION_NAME(FSE_buildDTable, FSE_FUNCTION_EXTENSION)
1460
- (void* DTable, const short* const normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
1461
- {
1462
- U32* const base32 = (U32*)DTable;
1463
- FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (base32+1);
1464
- const U32 tableSize = 1 << tableLog;
1465
- const U32 tableMask = tableSize-1;
1466
- const U32 step = FSE_tableStep(tableSize);
1467
- U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
1468
- U32 position = 0;
1469
- U32 highThreshold = tableSize-1;
1470
- const S16 largeLimit= 1 << (tableLog-1);
1471
- U32 noLarge = 1;
1472
- U32 s;
2365
+ /* Init */
1473
2366
 
1474
- /* Sanity Checks */
1475
- if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_maxSymbolValue_tooLarge;
1476
- if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_tableLog_tooLarge;
2367
+ const U16* jumpTable = (const U16*)cSrc;
2368
+ const size_t length1 = FSE_readLE16(jumpTable);
2369
+ const size_t length2 = FSE_readLE16(jumpTable+1);
2370
+ const size_t length3 = FSE_readLE16(jumpTable+2);
2371
+ const size_t length4 = cSrcSize - 6 - length1 - length2 - length3; // check coherency !!
2372
+ const char* const start1 = (const char*)(cSrc) + 6;
2373
+ const char* const start2 = start1 + length1;
2374
+ const char* const start3 = start2 + length2;
2375
+ const char* const start4 = start3 + length3;
2376
+ FSE_DStream_t bitD1, bitD2, bitD3, bitD4;
1477
2377
 
1478
- /* Init, lay down lowprob symbols */
1479
- base32[0] = tableLog;
1480
- for (s=0; s<=maxSymbolValue; s++)
1481
- {
1482
- if (normalizedCounter[s]==-1)
1483
- {
1484
- tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
1485
- symbolNext[s] = 1;
1486
- }
1487
- else
1488
- {
1489
- if (normalizedCounter[s] >= largeLimit) noLarge=0;
1490
- symbolNext[s] = normalizedCounter[s];
1491
- }
1492
- }
2378
+ if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
1493
2379
 
1494
- /* Spread symbols */
1495
- for (s=0; s<=maxSymbolValue; s++)
2380
+ errorCode = FSE_initDStream(&bitD1, start1, length1);
2381
+ if (FSE_isError(errorCode)) return errorCode;
2382
+ errorCode = FSE_initDStream(&bitD2, start2, length2);
2383
+ if (FSE_isError(errorCode)) return errorCode;
2384
+ errorCode = FSE_initDStream(&bitD3, start3, length3);
2385
+ if (FSE_isError(errorCode)) return errorCode;
2386
+ errorCode = FSE_initDStream(&bitD4, start4, length4);
2387
+ if (FSE_isError(errorCode)) return errorCode;
2388
+
2389
+ reloadStatus=FSE_reloadDStream(&bitD2);
2390
+
2391
+ /* 16 symbols per loop */
2392
+ for ( ; (reloadStatus<FSE_DStream_completed) && (op<olimit); /* D2-3-4 are supposed to be synchronized and finish together */
2393
+ op+=16, reloadStatus = FSE_reloadDStream(&bitD2) | FSE_reloadDStream(&bitD3) | FSE_reloadDStream(&bitD4), FSE_reloadDStream(&bitD1))
1496
2394
  {
1497
- int i;
1498
- for (i=0; i<normalizedCounter[s]; i++)
1499
- {
1500
- tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
1501
- position = (position + step) & tableMask;
1502
- while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
1503
- }
2395
+ #define HUF_DECODE_SYMBOL_0(n, Dstream) \
2396
+ op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog);
2397
+
2398
+ #define HUF_DECODE_SYMBOL_1(n, Dstream) \
2399
+ op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
2400
+ if (FSE_32bits() && (HUF_MAX_TABLELOG>12)) FSE_reloadDStream(&Dstream)
2401
+
2402
+ #define HUF_DECODE_SYMBOL_2(n, Dstream) \
2403
+ op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
2404
+ if (FSE_32bits()) FSE_reloadDStream(&Dstream)
2405
+
2406
+ HUF_DECODE_SYMBOL_1( 0, bitD1);
2407
+ HUF_DECODE_SYMBOL_1( 1, bitD2);
2408
+ HUF_DECODE_SYMBOL_1( 2, bitD3);
2409
+ HUF_DECODE_SYMBOL_1( 3, bitD4);
2410
+ HUF_DECODE_SYMBOL_2( 4, bitD1);
2411
+ HUF_DECODE_SYMBOL_2( 5, bitD2);
2412
+ HUF_DECODE_SYMBOL_2( 6, bitD3);
2413
+ HUF_DECODE_SYMBOL_2( 7, bitD4);
2414
+ HUF_DECODE_SYMBOL_1( 8, bitD1);
2415
+ HUF_DECODE_SYMBOL_1( 9, bitD2);
2416
+ HUF_DECODE_SYMBOL_1(10, bitD3);
2417
+ HUF_DECODE_SYMBOL_1(11, bitD4);
2418
+ HUF_DECODE_SYMBOL_0(12, bitD1);
2419
+ HUF_DECODE_SYMBOL_0(13, bitD2);
2420
+ HUF_DECODE_SYMBOL_0(14, bitD3);
2421
+ HUF_DECODE_SYMBOL_0(15, bitD4);
1504
2422
  }
1505
2423
 
1506
- if (position!=0) return (size_t)-FSE_ERROR_GENERIC; /* position must reach all cells once, otherwise normalizedCounter is incorrect */
2424
+ if (reloadStatus!=FSE_DStream_completed) /* not complete : some bitStream might be FSE_DStream_unfinished */
2425
+ return (size_t)-FSE_ERROR_corruptionDetected;
1507
2426
 
1508
- /* Build Decoding table */
2427
+ /* tail */
1509
2428
  {
1510
- U32 i;
1511
- for (i=0; i<tableSize; i++)
2429
+ // bitTail = bitD1; // *much* slower : -20% !??!
2430
+ FSE_DStream_t bitTail;
2431
+ bitTail.ptr = bitD1.ptr;
2432
+ bitTail.bitsConsumed = bitD1.bitsConsumed;
2433
+ bitTail.bitContainer = bitD1.bitContainer; // required in case of FSE_DStream_endOfBuffer
2434
+ bitTail.start = start1;
2435
+ for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op<omax) ; op++)
1512
2436
  {
1513
- FSE_FUNCTION_TYPE symbol = tableDecode[i].symbol;
1514
- U16 nextState = symbolNext[symbol]++;
1515
- tableDecode[i].nbBits = (BYTE) (tableLog - FSE_highbit32 ((U32)nextState) );
1516
- tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize);
2437
+ HUF_DECODE_SYMBOL_0(0, bitTail);
1517
2438
  }
2439
+
2440
+ if (FSE_endOfDStream(&bitTail))
2441
+ return op-ostart;
1518
2442
  }
1519
2443
 
1520
- return noLarge;
2444
+ if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */
2445
+
2446
+ return (size_t)-FSE_ERROR_corruptionDetected;
2447
+ }
2448
+
2449
+
2450
+ size_t HUF_decompress (void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
2451
+ {
2452
+ HUF_CREATE_STATIC_DTABLE(DTable, HUF_MAX_TABLELOG);
2453
+ const BYTE* ip = (const BYTE*) cSrc;
2454
+ size_t errorCode;
2455
+
2456
+ errorCode = HUF_readDTable (DTable, cSrc, cSrcSize);
2457
+ if (FSE_isError(errorCode)) return errorCode;
2458
+ if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
2459
+ ip += errorCode;
2460
+ cSrcSize -= errorCode;
2461
+
2462
+ return HUF_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, DTable);
1521
2463
  }
2464
+
2465
+
2466
+ #endif /* FSE_COMMONDEFS_ONLY */