extzstd 0.2 → 0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/HISTORY.ja.md +13 -0
- data/README.md +17 -14
- data/contrib/zstd/{NEWS → CHANGELOG} +115 -2
- data/contrib/zstd/CODE_OF_CONDUCT.md +5 -0
- data/contrib/zstd/Makefile +99 -53
- data/contrib/zstd/README.md +59 -39
- data/contrib/zstd/TESTING.md +1 -1
- data/contrib/zstd/appveyor.yml +17 -6
- data/contrib/zstd/lib/BUCK +29 -2
- data/contrib/zstd/lib/Makefile +118 -21
- data/contrib/zstd/lib/README.md +84 -44
- data/contrib/zstd/lib/common/bitstream.h +17 -33
- data/contrib/zstd/lib/common/compiler.h +62 -8
- data/contrib/zstd/lib/common/cpu.h +215 -0
- data/contrib/zstd/lib/common/debug.c +44 -0
- data/contrib/zstd/lib/common/debug.h +134 -0
- data/contrib/zstd/lib/common/entropy_common.c +16 -1
- data/contrib/zstd/lib/common/error_private.c +7 -0
- data/contrib/zstd/lib/common/fse.h +48 -44
- data/contrib/zstd/lib/common/fse_decompress.c +3 -3
- data/contrib/zstd/lib/common/huf.h +169 -113
- data/contrib/zstd/lib/common/mem.h +20 -2
- data/contrib/zstd/lib/common/pool.c +135 -49
- data/contrib/zstd/lib/common/pool.h +40 -21
- data/contrib/zstd/lib/common/threading.c +2 -2
- data/contrib/zstd/lib/common/threading.h +12 -12
- data/contrib/zstd/lib/common/xxhash.c +3 -2
- data/contrib/zstd/lib/common/zstd_common.c +3 -6
- data/contrib/zstd/lib/common/zstd_errors.h +17 -7
- data/contrib/zstd/lib/common/zstd_internal.h +76 -48
- data/contrib/zstd/lib/compress/fse_compress.c +89 -209
- data/contrib/zstd/lib/compress/hist.c +203 -0
- data/contrib/zstd/lib/compress/hist.h +95 -0
- data/contrib/zstd/lib/compress/huf_compress.c +188 -80
- data/contrib/zstd/lib/compress/zstd_compress.c +2500 -1203
- data/contrib/zstd/lib/compress/zstd_compress_internal.h +463 -62
- data/contrib/zstd/lib/compress/zstd_double_fast.c +321 -131
- data/contrib/zstd/lib/compress/zstd_double_fast.h +13 -4
- data/contrib/zstd/lib/compress/zstd_fast.c +335 -108
- data/contrib/zstd/lib/compress/zstd_fast.h +12 -6
- data/contrib/zstd/lib/compress/zstd_lazy.c +654 -313
- data/contrib/zstd/lib/compress/zstd_lazy.h +44 -16
- data/contrib/zstd/lib/compress/zstd_ldm.c +310 -420
- data/contrib/zstd/lib/compress/zstd_ldm.h +63 -26
- data/contrib/zstd/lib/compress/zstd_opt.c +773 -325
- data/contrib/zstd/lib/compress/zstd_opt.h +31 -5
- data/contrib/zstd/lib/compress/zstdmt_compress.c +1468 -518
- data/contrib/zstd/lib/compress/zstdmt_compress.h +96 -45
- data/contrib/zstd/lib/decompress/huf_decompress.c +518 -282
- data/contrib/zstd/lib/decompress/zstd_ddict.c +240 -0
- data/contrib/zstd/lib/decompress/zstd_ddict.h +44 -0
- data/contrib/zstd/lib/decompress/zstd_decompress.c +613 -1513
- data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1311 -0
- data/contrib/zstd/lib/decompress/zstd_decompress_block.h +59 -0
- data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +175 -0
- data/contrib/zstd/lib/dictBuilder/cover.c +194 -113
- data/contrib/zstd/lib/dictBuilder/cover.h +112 -0
- data/contrib/zstd/lib/dictBuilder/divsufsort.c +3 -3
- data/contrib/zstd/lib/dictBuilder/fastcover.c +740 -0
- data/contrib/zstd/lib/dictBuilder/zdict.c +142 -106
- data/contrib/zstd/lib/dictBuilder/zdict.h +115 -49
- data/contrib/zstd/lib/legacy/zstd_legacy.h +44 -12
- data/contrib/zstd/lib/legacy/zstd_v01.c +41 -10
- data/contrib/zstd/lib/legacy/zstd_v01.h +12 -7
- data/contrib/zstd/lib/legacy/zstd_v02.c +37 -12
- data/contrib/zstd/lib/legacy/zstd_v02.h +12 -7
- data/contrib/zstd/lib/legacy/zstd_v03.c +38 -12
- data/contrib/zstd/lib/legacy/zstd_v03.h +12 -7
- data/contrib/zstd/lib/legacy/zstd_v04.c +55 -174
- data/contrib/zstd/lib/legacy/zstd_v04.h +12 -7
- data/contrib/zstd/lib/legacy/zstd_v05.c +59 -31
- data/contrib/zstd/lib/legacy/zstd_v05.h +12 -7
- data/contrib/zstd/lib/legacy/zstd_v06.c +48 -20
- data/contrib/zstd/lib/legacy/zstd_v06.h +10 -5
- data/contrib/zstd/lib/legacy/zstd_v07.c +62 -29
- data/contrib/zstd/lib/legacy/zstd_v07.h +10 -5
- data/contrib/zstd/lib/zstd.h +1346 -832
- data/ext/extzstd.c +27 -19
- data/ext/extzstd_stream.c +20 -4
- data/ext/zstd_compress.c +1 -0
- data/ext/zstd_decompress.c +4 -0
- data/ext/zstd_dictbuilder.c +4 -0
- data/ext/zstd_dictbuilder_fastcover.c +5 -0
- data/lib/extzstd.rb +52 -220
- data/lib/extzstd/version.rb +1 -1
- metadata +21 -7
- data/contrib/zstd/circle.yml +0 -63
@@ -45,15 +45,15 @@ extern "C" {
|
|
45
45
|
|
46
46
|
/* mutex */
|
47
47
|
#define ZSTD_pthread_mutex_t CRITICAL_SECTION
|
48
|
-
#define ZSTD_pthread_mutex_init(a, b) (InitializeCriticalSection((a)), 0)
|
48
|
+
#define ZSTD_pthread_mutex_init(a, b) ((void)(b), InitializeCriticalSection((a)), 0)
|
49
49
|
#define ZSTD_pthread_mutex_destroy(a) DeleteCriticalSection((a))
|
50
50
|
#define ZSTD_pthread_mutex_lock(a) EnterCriticalSection((a))
|
51
51
|
#define ZSTD_pthread_mutex_unlock(a) LeaveCriticalSection((a))
|
52
52
|
|
53
53
|
/* condition variable */
|
54
54
|
#define ZSTD_pthread_cond_t CONDITION_VARIABLE
|
55
|
-
#define ZSTD_pthread_cond_init(a, b) (InitializeConditionVariable((a)), 0)
|
56
|
-
#define ZSTD_pthread_cond_destroy(a)
|
55
|
+
#define ZSTD_pthread_cond_init(a, b) ((void)(b), InitializeConditionVariable((a)), 0)
|
56
|
+
#define ZSTD_pthread_cond_destroy(a) ((void)(a))
|
57
57
|
#define ZSTD_pthread_cond_wait(a, b) SleepConditionVariableCS((a), (b), INFINITE)
|
58
58
|
#define ZSTD_pthread_cond_signal(a) WakeConditionVariable((a))
|
59
59
|
#define ZSTD_pthread_cond_broadcast(a) WakeAllConditionVariable((a))
|
@@ -100,17 +100,17 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr);
|
|
100
100
|
/* No multithreading support */
|
101
101
|
|
102
102
|
typedef int ZSTD_pthread_mutex_t;
|
103
|
-
#define ZSTD_pthread_mutex_init(a, b) ((void)a, 0)
|
104
|
-
#define ZSTD_pthread_mutex_destroy(a)
|
105
|
-
#define ZSTD_pthread_mutex_lock(a)
|
106
|
-
#define ZSTD_pthread_mutex_unlock(a)
|
103
|
+
#define ZSTD_pthread_mutex_init(a, b) ((void)(a), (void)(b), 0)
|
104
|
+
#define ZSTD_pthread_mutex_destroy(a) ((void)(a))
|
105
|
+
#define ZSTD_pthread_mutex_lock(a) ((void)(a))
|
106
|
+
#define ZSTD_pthread_mutex_unlock(a) ((void)(a))
|
107
107
|
|
108
108
|
typedef int ZSTD_pthread_cond_t;
|
109
|
-
#define ZSTD_pthread_cond_init(a, b) ((void)a, 0)
|
110
|
-
#define ZSTD_pthread_cond_destroy(a)
|
111
|
-
#define ZSTD_pthread_cond_wait(a, b)
|
112
|
-
#define ZSTD_pthread_cond_signal(a)
|
113
|
-
#define ZSTD_pthread_cond_broadcast(a)
|
109
|
+
#define ZSTD_pthread_cond_init(a, b) ((void)(a), (void)(b), 0)
|
110
|
+
#define ZSTD_pthread_cond_destroy(a) ((void)(a))
|
111
|
+
#define ZSTD_pthread_cond_wait(a, b) ((void)(a), (void)(b))
|
112
|
+
#define ZSTD_pthread_cond_signal(a) ((void)(a))
|
113
|
+
#define ZSTD_pthread_cond_broadcast(a) ((void)(a))
|
114
114
|
|
115
115
|
/* do not use ZSTD_pthread_t */
|
116
116
|
|
@@ -66,10 +66,10 @@
|
|
66
66
|
/* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */
|
67
67
|
|
68
68
|
/*!XXH_FORCE_NATIVE_FORMAT :
|
69
|
-
* By default, xxHash library provides endian-
|
69
|
+
* By default, xxHash library provides endian-independent Hash values, based on little-endian convention.
|
70
70
|
* Results are therefore identical for little-endian and big-endian CPU.
|
71
71
|
* This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
|
72
|
-
* Should endian-
|
72
|
+
* Should endian-independence be of no importance for your application, you may set the #define below to 1,
|
73
73
|
* to improve speed for Big-endian CPU.
|
74
74
|
* This option has no impact on Little_Endian CPU.
|
75
75
|
*/
|
@@ -98,6 +98,7 @@
|
|
98
98
|
/* Modify the local functions below should you wish to use some other memory routines */
|
99
99
|
/* for malloc(), free() */
|
100
100
|
#include <stdlib.h>
|
101
|
+
#include <stddef.h> /* size_t */
|
101
102
|
static void* XXH_malloc(size_t s) { return malloc(s); }
|
102
103
|
static void XXH_free (void* p) { free(p); }
|
103
104
|
/* for memcpy() */
|
@@ -30,8 +30,10 @@ const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; }
|
|
30
30
|
/*-****************************************
|
31
31
|
* ZSTD Error Management
|
32
32
|
******************************************/
|
33
|
+
#undef ZSTD_isError /* defined within zstd_internal.h */
|
33
34
|
/*! ZSTD_isError() :
|
34
|
-
* tells if a return value is an error code
|
35
|
+
* tells if a return value is an error code
|
36
|
+
* symbol is required for external callers */
|
35
37
|
unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
|
36
38
|
|
37
39
|
/*! ZSTD_getErrorName() :
|
@@ -46,11 +48,6 @@ ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); }
|
|
46
48
|
* provides error code string from enum */
|
47
49
|
const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); }
|
48
50
|
|
49
|
-
/*! g_debuglog_enable :
|
50
|
-
* turn on/off debug traces (global switch) */
|
51
|
-
#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG >= 2)
|
52
|
-
int g_debuglog_enable = 1;
|
53
|
-
#endif
|
54
51
|
|
55
52
|
|
56
53
|
/*=**************************************************************
|
@@ -35,12 +35,20 @@ extern "C" {
|
|
35
35
|
# define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY
|
36
36
|
#endif
|
37
37
|
|
38
|
-
|
39
|
-
*
|
40
|
-
|
41
|
-
*
|
42
|
-
*
|
43
|
-
|
38
|
+
/*-*********************************************
|
39
|
+
* Error codes list
|
40
|
+
*-*********************************************
|
41
|
+
* Error codes _values_ are pinned down since v1.3.1 only.
|
42
|
+
* Therefore, don't rely on values if you may link to any version < v1.3.1.
|
43
|
+
*
|
44
|
+
* Only values < 100 are considered stable.
|
45
|
+
*
|
46
|
+
* note 1 : this API shall be used with static linking only.
|
47
|
+
* dynamic linking is not yet officially supported.
|
48
|
+
* note 2 : Prefer relying on the enum than on its value whenever possible
|
49
|
+
* This is the only supported way to use the error list < v1.3.1
|
50
|
+
* note 3 : ZSTD_isError() is always correct, whatever the library version.
|
51
|
+
**********************************************/
|
44
52
|
typedef enum {
|
45
53
|
ZSTD_error_no_error = 0,
|
46
54
|
ZSTD_error_GENERIC = 1,
|
@@ -61,9 +69,11 @@ typedef enum {
|
|
61
69
|
ZSTD_error_stage_wrong = 60,
|
62
70
|
ZSTD_error_init_missing = 62,
|
63
71
|
ZSTD_error_memory_allocation = 64,
|
72
|
+
ZSTD_error_workSpace_tooSmall= 66,
|
64
73
|
ZSTD_error_dstSize_tooSmall = 70,
|
65
74
|
ZSTD_error_srcSize_wrong = 72,
|
66
|
-
|
75
|
+
ZSTD_error_dstBuffer_null = 74,
|
76
|
+
/* following error codes are __NOT STABLE__, they can be removed or changed in future versions */
|
67
77
|
ZSTD_error_frameIndex_tooLarge = 100,
|
68
78
|
ZSTD_error_seekableIO = 102,
|
69
79
|
ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */
|
@@ -21,6 +21,7 @@
|
|
21
21
|
***************************************/
|
22
22
|
#include "compiler.h"
|
23
23
|
#include "mem.h"
|
24
|
+
#include "debug.h" /* assert, DEBUGLOG, RAWLOG, g_debuglevel */
|
24
25
|
#include "error_private.h"
|
25
26
|
#define ZSTD_STATIC_LINKING_ONLY
|
26
27
|
#include "zstd.h"
|
@@ -38,43 +39,11 @@
|
|
38
39
|
extern "C" {
|
39
40
|
#endif
|
40
41
|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
#
|
46
|
-
# include <assert.h>
|
47
|
-
#else
|
48
|
-
# ifndef assert
|
49
|
-
# define assert(condition) ((void)0)
|
50
|
-
# endif
|
51
|
-
#endif
|
52
|
-
|
53
|
-
#define ZSTD_STATIC_ASSERT(c) { enum { ZSTD_static_assert = 1/(int)(!!(c)) }; }
|
54
|
-
|
55
|
-
#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=2)
|
56
|
-
# include <stdio.h>
|
57
|
-
extern int g_debuglog_enable;
|
58
|
-
/* recommended values for ZSTD_DEBUG display levels :
|
59
|
-
* 1 : no display, enables assert() only
|
60
|
-
* 2 : reserved for currently active debug path
|
61
|
-
* 3 : events once per object lifetime (CCtx, CDict, etc.)
|
62
|
-
* 4 : events once per frame
|
63
|
-
* 5 : events once per block
|
64
|
-
* 6 : events once per sequence (*very* verbose) */
|
65
|
-
# define RAWLOG(l, ...) { \
|
66
|
-
if ((g_debuglog_enable) & (l<=ZSTD_DEBUG)) { \
|
67
|
-
fprintf(stderr, __VA_ARGS__); \
|
68
|
-
} }
|
69
|
-
# define DEBUGLOG(l, ...) { \
|
70
|
-
if ((g_debuglog_enable) & (l<=ZSTD_DEBUG)) { \
|
71
|
-
fprintf(stderr, __FILE__ ": " __VA_ARGS__); \
|
72
|
-
fprintf(stderr, " \n"); \
|
73
|
-
} }
|
74
|
-
#else
|
75
|
-
# define RAWLOG(l, ...) {} /* disabled */
|
76
|
-
# define DEBUGLOG(l, ...) {} /* disabled */
|
77
|
-
#endif
|
42
|
+
/* ---- static assert (debug) --- */
|
43
|
+
#define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)
|
44
|
+
#define ZSTD_isError ERR_isError /* for inlining */
|
45
|
+
#define FSE_isError ERR_isError
|
46
|
+
#define HUF_isError ERR_isError
|
78
47
|
|
79
48
|
|
80
49
|
/*-*************************************
|
@@ -84,8 +53,50 @@ extern int g_debuglog_enable;
|
|
84
53
|
#undef MAX
|
85
54
|
#define MIN(a,b) ((a)<(b) ? (a) : (b))
|
86
55
|
#define MAX(a,b) ((a)>(b) ? (a) : (b))
|
87
|
-
|
88
|
-
|
56
|
+
|
57
|
+
/**
|
58
|
+
* Return the specified error if the condition evaluates to true.
|
59
|
+
*
|
60
|
+
* In debug modes, prints additional information. In order to do that
|
61
|
+
* (particularly, printing the conditional that failed), this can't just wrap
|
62
|
+
* RETURN_ERROR().
|
63
|
+
*/
|
64
|
+
#define RETURN_ERROR_IF(cond, err, ...) \
|
65
|
+
if (cond) { \
|
66
|
+
RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \
|
67
|
+
RAWLOG(3, ": " __VA_ARGS__); \
|
68
|
+
RAWLOG(3, "\n"); \
|
69
|
+
return ERROR(err); \
|
70
|
+
}
|
71
|
+
|
72
|
+
/**
|
73
|
+
* Unconditionally return the specified error.
|
74
|
+
*
|
75
|
+
* In debug modes, prints additional information.
|
76
|
+
*/
|
77
|
+
#define RETURN_ERROR(err, ...) \
|
78
|
+
do { \
|
79
|
+
RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \
|
80
|
+
RAWLOG(3, ": " __VA_ARGS__); \
|
81
|
+
RAWLOG(3, "\n"); \
|
82
|
+
return ERROR(err); \
|
83
|
+
} while(0);
|
84
|
+
|
85
|
+
/**
|
86
|
+
* If the provided expression evaluates to an error code, returns that error code.
|
87
|
+
*
|
88
|
+
* In debug modes, prints additional information.
|
89
|
+
*/
|
90
|
+
#define FORWARD_IF_ERROR(err, ...) \
|
91
|
+
do { \
|
92
|
+
size_t const err_code = (err); \
|
93
|
+
if (ERR_isError(err_code)) { \
|
94
|
+
RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \
|
95
|
+
RAWLOG(3, ": " __VA_ARGS__); \
|
96
|
+
RAWLOG(3, "\n"); \
|
97
|
+
return err_code; \
|
98
|
+
} \
|
99
|
+
} while(0);
|
89
100
|
|
90
101
|
|
91
102
|
/*-*************************************
|
@@ -109,12 +120,10 @@ static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
|
|
109
120
|
#define BIT0 1
|
110
121
|
|
111
122
|
#define ZSTD_WINDOWLOG_ABSOLUTEMIN 10
|
112
|
-
#define ZSTD_WINDOWLOG_DEFAULTMAX 27 /* Default maximum allowed window log */
|
113
123
|
static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
|
114
124
|
static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
|
115
125
|
|
116
|
-
#define ZSTD_FRAMEIDSIZE 4
|
117
|
-
static const size_t ZSTD_frameIdSize = ZSTD_FRAMEIDSIZE; /* magic number size */
|
126
|
+
#define ZSTD_FRAMEIDSIZE 4 /* magic number size */
|
118
127
|
|
119
128
|
#define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
|
120
129
|
static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
|
@@ -132,14 +141,15 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy
|
|
132
141
|
|
133
142
|
#define Litbits 8
|
134
143
|
#define MaxLit ((1<<Litbits) - 1)
|
135
|
-
#define MaxML
|
136
|
-
#define MaxLL
|
144
|
+
#define MaxML 52
|
145
|
+
#define MaxLL 35
|
137
146
|
#define DefaultMaxOff 28
|
138
|
-
#define MaxOff
|
147
|
+
#define MaxOff 31
|
139
148
|
#define MaxSeq MAX(MaxLL, MaxML) /* Assumption : MaxOff < MaxLL,MaxML */
|
140
149
|
#define MLFSELog 9
|
141
150
|
#define LLFSELog 9
|
142
151
|
#define OffFSELog 8
|
152
|
+
#define MaxFSELog MAX(MAX(MLFSELog, LLFSELog), OffFSELog)
|
143
153
|
|
144
154
|
static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0,
|
145
155
|
0, 0, 0, 0, 0, 0, 0, 0,
|
@@ -226,12 +236,23 @@ typedef struct {
|
|
226
236
|
BYTE* llCode;
|
227
237
|
BYTE* mlCode;
|
228
238
|
BYTE* ofCode;
|
239
|
+
size_t maxNbSeq;
|
240
|
+
size_t maxNbLit;
|
229
241
|
U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */
|
230
242
|
U32 longLengthPos;
|
231
|
-
U32 rep[ZSTD_REP_NUM];
|
232
|
-
U32 repToConfirm[ZSTD_REP_NUM];
|
233
243
|
} seqStore_t;
|
234
244
|
|
245
|
+
/**
|
246
|
+
* Contains the compressed frame size and an upper-bound for the decompressed frame size.
|
247
|
+
* Note: before using `compressedSize`, check for errors using ZSTD_isError().
|
248
|
+
* similarly, before using `decompressedBound`, check for errors using:
|
249
|
+
* `decompressedBound != ZSTD_CONTENTSIZE_ERROR`
|
250
|
+
*/
|
251
|
+
typedef struct {
|
252
|
+
size_t compressedSize;
|
253
|
+
unsigned long long decompressedBound;
|
254
|
+
} ZSTD_frameSizeInfo; /* decompress & legacy */
|
255
|
+
|
235
256
|
const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */
|
236
257
|
void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
|
237
258
|
|
@@ -276,7 +297,7 @@ typedef struct {
|
|
276
297
|
blockType_e blockType;
|
277
298
|
U32 lastBlock;
|
278
299
|
U32 origSize;
|
279
|
-
} blockProperties_t;
|
300
|
+
} blockProperties_t; /* declared here for decompress and fullbench */
|
280
301
|
|
281
302
|
/*! ZSTD_getcBlockSize() :
|
282
303
|
* Provides the size of compressed block from block header `src` */
|
@@ -284,6 +305,13 @@ typedef struct {
|
|
284
305
|
size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
|
285
306
|
blockProperties_t* bpPtr);
|
286
307
|
|
308
|
+
/*! ZSTD_decodeSeqHeaders() :
|
309
|
+
* decode sequence header from src */
|
310
|
+
/* Used by: decompress, fullbench (does not get its definition from here) */
|
311
|
+
size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
312
|
+
const void* src, size_t srcSize);
|
313
|
+
|
314
|
+
|
287
315
|
#if defined (__cplusplus)
|
288
316
|
}
|
289
317
|
#endif
|
@@ -1,6 +1,6 @@
|
|
1
1
|
/* ******************************************************************
|
2
2
|
FSE : Finite State Entropy encoder
|
3
|
-
Copyright (C) 2013-
|
3
|
+
Copyright (C) 2013-present, Yann Collet.
|
4
4
|
|
5
5
|
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
|
6
6
|
|
@@ -37,9 +37,11 @@
|
|
37
37
|
****************************************************************/
|
38
38
|
#include <stdlib.h> /* malloc, free, qsort */
|
39
39
|
#include <string.h> /* memcpy, memset */
|
40
|
-
#include <stdio.h> /* printf (debug) */
|
41
|
-
#include "bitstream.h"
|
42
40
|
#include "compiler.h"
|
41
|
+
#include "mem.h" /* U32, U16, etc. */
|
42
|
+
#include "debug.h" /* assert, DEBUGLOG */
|
43
|
+
#include "hist.h" /* HIST_count_wksp */
|
44
|
+
#include "bitstream.h"
|
43
45
|
#define FSE_STATIC_LINKING_ONLY
|
44
46
|
#include "fse.h"
|
45
47
|
#include "error_private.h"
|
@@ -49,7 +51,6 @@
|
|
49
51
|
* Error Management
|
50
52
|
****************************************************************/
|
51
53
|
#define FSE_isError ERR_isError
|
52
|
-
#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
|
53
54
|
|
54
55
|
|
55
56
|
/* **************************************************************
|
@@ -82,7 +83,9 @@
|
|
82
83
|
* wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)`
|
83
84
|
* workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements
|
84
85
|
*/
|
85
|
-
size_t FSE_buildCTable_wksp(FSE_CTable* ct,
|
86
|
+
size_t FSE_buildCTable_wksp(FSE_CTable* ct,
|
87
|
+
const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
|
88
|
+
void* workSpace, size_t wkspSize)
|
86
89
|
{
|
87
90
|
U32 const tableSize = 1 << tableLog;
|
88
91
|
U32 const tableMask = tableSize - 1;
|
@@ -100,14 +103,19 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsi
|
|
100
103
|
if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge);
|
101
104
|
tableU16[-2] = (U16) tableLog;
|
102
105
|
tableU16[-1] = (U16) maxSymbolValue;
|
106
|
+
assert(tableLog < 16); /* required for threshold strategy to work */
|
103
107
|
|
104
108
|
/* For explanations on how to distribute symbol values over the table :
|
105
|
-
|
109
|
+
* http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
|
110
|
+
|
111
|
+
#ifdef __clang_analyzer__
|
112
|
+
memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */
|
113
|
+
#endif
|
106
114
|
|
107
115
|
/* symbol start positions */
|
108
116
|
{ U32 u;
|
109
117
|
cumul[0] = 0;
|
110
|
-
for (u=1; u<=maxSymbolValue+1; u++) {
|
118
|
+
for (u=1; u <= maxSymbolValue+1; u++) {
|
111
119
|
if (normalizedCounter[u-1]==-1) { /* Low proba symbol */
|
112
120
|
cumul[u] = cumul[u-1] + 1;
|
113
121
|
tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);
|
@@ -121,14 +129,16 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsi
|
|
121
129
|
{ U32 position = 0;
|
122
130
|
U32 symbol;
|
123
131
|
for (symbol=0; symbol<=maxSymbolValue; symbol++) {
|
124
|
-
int
|
125
|
-
|
132
|
+
int nbOccurrences;
|
133
|
+
int const freq = normalizedCounter[symbol];
|
134
|
+
for (nbOccurrences=0; nbOccurrences<freq; nbOccurrences++) {
|
126
135
|
tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
|
127
136
|
position = (position + step) & tableMask;
|
128
|
-
while (position > highThreshold)
|
137
|
+
while (position > highThreshold)
|
138
|
+
position = (position + step) & tableMask; /* Low proba area */
|
129
139
|
} }
|
130
140
|
|
131
|
-
|
141
|
+
assert(position==0); /* Must have initialized all positions */
|
132
142
|
}
|
133
143
|
|
134
144
|
/* Build table */
|
@@ -143,7 +153,10 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsi
|
|
143
153
|
for (s=0; s<=maxSymbolValue; s++) {
|
144
154
|
switch (normalizedCounter[s])
|
145
155
|
{
|
146
|
-
case 0:
|
156
|
+
case 0:
|
157
|
+
/* filling nonetheless, for compatibility with FSE_getMaxNbBits() */
|
158
|
+
symbolTT[s].deltaNbBits = ((tableLog+1) << 16) - (1<<tableLog);
|
159
|
+
break;
|
147
160
|
|
148
161
|
case -1:
|
149
162
|
case 1:
|
@@ -160,6 +173,18 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsi
|
|
160
173
|
total += normalizedCounter[s];
|
161
174
|
} } } }
|
162
175
|
|
176
|
+
#if 0 /* debug : symbol costs */
|
177
|
+
DEBUGLOG(5, "\n --- table statistics : ");
|
178
|
+
{ U32 symbol;
|
179
|
+
for (symbol=0; symbol<=maxSymbolValue; symbol++) {
|
180
|
+
DEBUGLOG(5, "%3u: w=%3i, maxBits=%u, fracBits=%.2f",
|
181
|
+
symbol, normalizedCounter[symbol],
|
182
|
+
FSE_getMaxNbBits(symbolTT, symbol),
|
183
|
+
(double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256);
|
184
|
+
}
|
185
|
+
}
|
186
|
+
#endif
|
187
|
+
|
163
188
|
return 0;
|
164
189
|
}
|
165
190
|
|
@@ -174,8 +199,9 @@ size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned
|
|
174
199
|
|
175
200
|
#ifndef FSE_COMMONDEFS_ONLY
|
176
201
|
|
202
|
+
|
177
203
|
/*-**************************************************************
|
178
|
-
* FSE NCount encoding
|
204
|
+
* FSE NCount encoding
|
179
205
|
****************************************************************/
|
180
206
|
size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
|
181
207
|
{
|
@@ -183,9 +209,10 @@ size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
|
|
183
209
|
return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
|
184
210
|
}
|
185
211
|
|
186
|
-
static size_t
|
187
|
-
|
188
|
-
|
212
|
+
static size_t
|
213
|
+
FSE_writeNCount_generic (void* header, size_t headerBufferSize,
|
214
|
+
const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
|
215
|
+
unsigned writeIsSafe)
|
189
216
|
{
|
190
217
|
BYTE* const ostart = (BYTE*) header;
|
191
218
|
BYTE* out = ostart;
|
@@ -194,13 +221,12 @@ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
|
|
194
221
|
const int tableSize = 1 << tableLog;
|
195
222
|
int remaining;
|
196
223
|
int threshold;
|
197
|
-
U32 bitStream;
|
198
|
-
int bitCount;
|
199
|
-
unsigned
|
200
|
-
|
224
|
+
U32 bitStream = 0;
|
225
|
+
int bitCount = 0;
|
226
|
+
unsigned symbol = 0;
|
227
|
+
unsigned const alphabetSize = maxSymbolValue + 1;
|
228
|
+
int previousIs0 = 0;
|
201
229
|
|
202
|
-
bitStream = 0;
|
203
|
-
bitCount = 0;
|
204
230
|
/* Table Size */
|
205
231
|
bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount;
|
206
232
|
bitCount += 4;
|
@@ -210,48 +236,53 @@ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
|
|
210
236
|
threshold = tableSize;
|
211
237
|
nbBits = tableLog+1;
|
212
238
|
|
213
|
-
while (remaining>1) { /* stops at 1 */
|
214
|
-
if (
|
215
|
-
unsigned start =
|
216
|
-
while (!normalizedCounter[
|
217
|
-
|
239
|
+
while ((symbol < alphabetSize) && (remaining>1)) { /* stops at 1 */
|
240
|
+
if (previousIs0) {
|
241
|
+
unsigned start = symbol;
|
242
|
+
while ((symbol < alphabetSize) && !normalizedCounter[symbol]) symbol++;
|
243
|
+
if (symbol == alphabetSize) break; /* incorrect distribution */
|
244
|
+
while (symbol >= start+24) {
|
218
245
|
start+=24;
|
219
246
|
bitStream += 0xFFFFU << bitCount;
|
220
|
-
if ((!writeIsSafe) && (out > oend-2))
|
247
|
+
if ((!writeIsSafe) && (out > oend-2))
|
248
|
+
return ERROR(dstSize_tooSmall); /* Buffer overflow */
|
221
249
|
out[0] = (BYTE) bitStream;
|
222
250
|
out[1] = (BYTE)(bitStream>>8);
|
223
251
|
out+=2;
|
224
252
|
bitStream>>=16;
|
225
253
|
}
|
226
|
-
while (
|
254
|
+
while (symbol >= start+3) {
|
227
255
|
start+=3;
|
228
256
|
bitStream += 3 << bitCount;
|
229
257
|
bitCount += 2;
|
230
258
|
}
|
231
|
-
bitStream += (
|
259
|
+
bitStream += (symbol-start) << bitCount;
|
232
260
|
bitCount += 2;
|
233
261
|
if (bitCount>16) {
|
234
|
-
if ((!writeIsSafe) && (out > oend - 2))
|
262
|
+
if ((!writeIsSafe) && (out > oend - 2))
|
263
|
+
return ERROR(dstSize_tooSmall); /* Buffer overflow */
|
235
264
|
out[0] = (BYTE)bitStream;
|
236
265
|
out[1] = (BYTE)(bitStream>>8);
|
237
266
|
out += 2;
|
238
267
|
bitStream >>= 16;
|
239
268
|
bitCount -= 16;
|
240
269
|
} }
|
241
|
-
{ int count = normalizedCounter[
|
242
|
-
int const max = (2*threshold-1)-remaining;
|
270
|
+
{ int count = normalizedCounter[symbol++];
|
271
|
+
int const max = (2*threshold-1) - remaining;
|
243
272
|
remaining -= count < 0 ? -count : count;
|
244
273
|
count++; /* +1 for extra accuracy */
|
245
|
-
if (count>=threshold)
|
274
|
+
if (count>=threshold)
|
275
|
+
count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
|
246
276
|
bitStream += count << bitCount;
|
247
277
|
bitCount += nbBits;
|
248
278
|
bitCount -= (count<max);
|
249
|
-
|
279
|
+
previousIs0 = (count==1);
|
250
280
|
if (remaining<1) return ERROR(GENERIC);
|
251
|
-
while (remaining<threshold) nbBits
|
281
|
+
while (remaining<threshold) { nbBits--; threshold>>=1; }
|
252
282
|
}
|
253
283
|
if (bitCount>16) {
|
254
|
-
if ((!writeIsSafe) && (out > oend - 2))
|
284
|
+
if ((!writeIsSafe) && (out > oend - 2))
|
285
|
+
return ERROR(dstSize_tooSmall); /* Buffer overflow */
|
255
286
|
out[0] = (BYTE)bitStream;
|
256
287
|
out[1] = (BYTE)(bitStream>>8);
|
257
288
|
out += 2;
|
@@ -259,19 +290,23 @@ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
|
|
259
290
|
bitCount -= 16;
|
260
291
|
} }
|
261
292
|
|
293
|
+
if (remaining != 1)
|
294
|
+
return ERROR(GENERIC); /* incorrect normalized distribution */
|
295
|
+
assert(symbol <= alphabetSize);
|
296
|
+
|
262
297
|
/* flush remaining bitStream */
|
263
|
-
if ((!writeIsSafe) && (out > oend - 2))
|
298
|
+
if ((!writeIsSafe) && (out > oend - 2))
|
299
|
+
return ERROR(dstSize_tooSmall); /* Buffer overflow */
|
264
300
|
out[0] = (BYTE)bitStream;
|
265
301
|
out[1] = (BYTE)(bitStream>>8);
|
266
302
|
out+= (bitCount+7) /8;
|
267
303
|
|
268
|
-
if (charnum > maxSymbolValue + 1) return ERROR(GENERIC);
|
269
|
-
|
270
304
|
return (out-ostart);
|
271
305
|
}
|
272
306
|
|
273
307
|
|
274
|
-
size_t FSE_writeNCount (void* buffer, size_t bufferSize,
|
308
|
+
size_t FSE_writeNCount (void* buffer, size_t bufferSize,
|
309
|
+
const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
|
275
310
|
{
|
276
311
|
if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported */
|
277
312
|
if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported */
|
@@ -279,171 +314,13 @@ size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalized
|
|
279
314
|
if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
|
280
315
|
return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
|
281
316
|
|
282
|
-
return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1);
|
283
|
-
}
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
/*-**************************************************************
|
288
|
-
* Counting histogram
|
289
|
-
****************************************************************/
|
290
|
-
/*! FSE_count_simple
|
291
|
-
This function counts byte values within `src`, and store the histogram into table `count`.
|
292
|
-
It doesn't use any additional memory.
|
293
|
-
But this function is unsafe : it doesn't check that all values within `src` can fit into `count`.
|
294
|
-
For this reason, prefer using a table `count` with 256 elements.
|
295
|
-
@return : count of most numerous element
|
296
|
-
*/
|
297
|
-
size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
|
298
|
-
const void* src, size_t srcSize)
|
299
|
-
{
|
300
|
-
const BYTE* ip = (const BYTE*)src;
|
301
|
-
const BYTE* const end = ip + srcSize;
|
302
|
-
unsigned maxSymbolValue = *maxSymbolValuePtr;
|
303
|
-
unsigned max=0;
|
304
|
-
|
305
|
-
memset(count, 0, (maxSymbolValue+1)*sizeof(*count));
|
306
|
-
if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
|
307
|
-
|
308
|
-
while (ip<end) count[*ip++]++;
|
309
|
-
|
310
|
-
while (!count[maxSymbolValue]) maxSymbolValue--;
|
311
|
-
*maxSymbolValuePtr = maxSymbolValue;
|
312
|
-
|
313
|
-
{ U32 s; for (s=0; s<=maxSymbolValue; s++) if (count[s] > max) max = count[s]; }
|
314
|
-
|
315
|
-
return (size_t)max;
|
316
|
-
}
|
317
|
-
|
318
|
-
|
319
|
-
/* FSE_count_parallel_wksp() :
|
320
|
-
* Same as FSE_count_parallel(), but using an externally provided scratch buffer.
|
321
|
-
* `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`` */
|
322
|
-
static size_t FSE_count_parallel_wksp(
|
323
|
-
unsigned* count, unsigned* maxSymbolValuePtr,
|
324
|
-
const void* source, size_t sourceSize,
|
325
|
-
unsigned checkMax, unsigned* const workSpace)
|
326
|
-
{
|
327
|
-
const BYTE* ip = (const BYTE*)source;
|
328
|
-
const BYTE* const iend = ip+sourceSize;
|
329
|
-
unsigned maxSymbolValue = *maxSymbolValuePtr;
|
330
|
-
unsigned max=0;
|
331
|
-
U32* const Counting1 = workSpace;
|
332
|
-
U32* const Counting2 = Counting1 + 256;
|
333
|
-
U32* const Counting3 = Counting2 + 256;
|
334
|
-
U32* const Counting4 = Counting3 + 256;
|
335
|
-
|
336
|
-
memset(Counting1, 0, 4*256*sizeof(unsigned));
|
337
|
-
|
338
|
-
/* safety checks */
|
339
|
-
if (!sourceSize) {
|
340
|
-
memset(count, 0, maxSymbolValue + 1);
|
341
|
-
*maxSymbolValuePtr = 0;
|
342
|
-
return 0;
|
343
|
-
}
|
344
|
-
if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */
|
345
|
-
|
346
|
-
/* by stripes of 16 bytes */
|
347
|
-
{ U32 cached = MEM_read32(ip); ip += 4;
|
348
|
-
while (ip < iend-15) {
|
349
|
-
U32 c = cached; cached = MEM_read32(ip); ip += 4;
|
350
|
-
Counting1[(BYTE) c ]++;
|
351
|
-
Counting2[(BYTE)(c>>8) ]++;
|
352
|
-
Counting3[(BYTE)(c>>16)]++;
|
353
|
-
Counting4[ c>>24 ]++;
|
354
|
-
c = cached; cached = MEM_read32(ip); ip += 4;
|
355
|
-
Counting1[(BYTE) c ]++;
|
356
|
-
Counting2[(BYTE)(c>>8) ]++;
|
357
|
-
Counting3[(BYTE)(c>>16)]++;
|
358
|
-
Counting4[ c>>24 ]++;
|
359
|
-
c = cached; cached = MEM_read32(ip); ip += 4;
|
360
|
-
Counting1[(BYTE) c ]++;
|
361
|
-
Counting2[(BYTE)(c>>8) ]++;
|
362
|
-
Counting3[(BYTE)(c>>16)]++;
|
363
|
-
Counting4[ c>>24 ]++;
|
364
|
-
c = cached; cached = MEM_read32(ip); ip += 4;
|
365
|
-
Counting1[(BYTE) c ]++;
|
366
|
-
Counting2[(BYTE)(c>>8) ]++;
|
367
|
-
Counting3[(BYTE)(c>>16)]++;
|
368
|
-
Counting4[ c>>24 ]++;
|
369
|
-
}
|
370
|
-
ip-=4;
|
371
|
-
}
|
372
|
-
|
373
|
-
/* finish last symbols */
|
374
|
-
while (ip<iend) Counting1[*ip++]++;
|
375
|
-
|
376
|
-
if (checkMax) { /* verify stats will fit into destination table */
|
377
|
-
U32 s; for (s=255; s>maxSymbolValue; s--) {
|
378
|
-
Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
|
379
|
-
if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
|
380
|
-
} }
|
381
|
-
|
382
|
-
{ U32 s; for (s=0; s<=maxSymbolValue; s++) {
|
383
|
-
count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
|
384
|
-
if (count[s] > max) max = count[s];
|
385
|
-
} }
|
386
|
-
|
387
|
-
while (!count[maxSymbolValue]) maxSymbolValue--;
|
388
|
-
*maxSymbolValuePtr = maxSymbolValue;
|
389
|
-
return (size_t)max;
|
317
|
+
return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1 /* write in buffer is safe */);
|
390
318
|
}
|
391
319
|
|
392
|
-
/* FSE_countFast_wksp() :
|
393
|
-
* Same as FSE_countFast(), but using an externally provided scratch buffer.
|
394
|
-
* `workSpace` size must be table of >= `1024` unsigned */
|
395
|
-
size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
|
396
|
-
const void* source, size_t sourceSize, unsigned* workSpace)
|
397
|
-
{
|
398
|
-
if (sourceSize < 1500) return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize);
|
399
|
-
return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace);
|
400
|
-
}
|
401
|
-
|
402
|
-
/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
|
403
|
-
size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
|
404
|
-
const void* source, size_t sourceSize)
|
405
|
-
{
|
406
|
-
unsigned tmpCounters[1024];
|
407
|
-
return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters);
|
408
|
-
}
|
409
|
-
|
410
|
-
/* FSE_count_wksp() :
|
411
|
-
* Same as FSE_count(), but using an externally provided scratch buffer.
|
412
|
-
* `workSpace` size must be table of >= `1024` unsigned */
|
413
|
-
size_t FSE_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
|
414
|
-
const void* source, size_t sourceSize, unsigned* workSpace)
|
415
|
-
{
|
416
|
-
if (*maxSymbolValuePtr < 255)
|
417
|
-
return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace);
|
418
|
-
*maxSymbolValuePtr = 255;
|
419
|
-
return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace);
|
420
|
-
}
|
421
|
-
|
422
|
-
size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr,
|
423
|
-
const void* src, size_t srcSize)
|
424
|
-
{
|
425
|
-
unsigned tmpCounters[1024];
|
426
|
-
return FSE_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters);
|
427
|
-
}
|
428
|
-
|
429
|
-
|
430
320
|
|
431
321
|
/*-**************************************************************
|
432
322
|
* FSE Compression Code
|
433
323
|
****************************************************************/
|
434
|
-
/*! FSE_sizeof_CTable() :
|
435
|
-
FSE_CTable is a variable size structure which contains :
|
436
|
-
`U16 tableLog;`
|
437
|
-
`U16 maxSymbolValue;`
|
438
|
-
`U16 nextStateNumber[1 << tableLog];` // This size is variable
|
439
|
-
`FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];` // This size is variable
|
440
|
-
Allocation is manual (C standard does not support variable-size structures).
|
441
|
-
*/
|
442
|
-
size_t FSE_sizeof_CTable (unsigned maxSymbolValue, unsigned tableLog)
|
443
|
-
{
|
444
|
-
if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
|
445
|
-
return FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
|
446
|
-
}
|
447
324
|
|
448
325
|
FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
|
449
326
|
{
|
@@ -458,7 +335,7 @@ void FSE_freeCTable (FSE_CTable* ct) { free(ct); }
|
|
458
335
|
/* provides the minimum logSize to safely represent a distribution */
|
459
336
|
static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
|
460
337
|
{
|
461
|
-
U32 minBitsSrc = BIT_highbit32((U32)(srcSize
|
338
|
+
U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1;
|
462
339
|
U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
|
463
340
|
U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
|
464
341
|
assert(srcSize > 1); /* Not supported, RLE should be used instead */
|
@@ -521,6 +398,9 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
|
|
521
398
|
}
|
522
399
|
ToDistribute = (1 << tableLog) - distributed;
|
523
400
|
|
401
|
+
if (ToDistribute == 0)
|
402
|
+
return 0;
|
403
|
+
|
524
404
|
if ((total / ToDistribute) > lowOne) {
|
525
405
|
/* risk of rounding to zero */
|
526
406
|
lowOne = (U32)((total * 3) / (ToDistribute * 2));
|
@@ -540,7 +420,7 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
|
|
540
420
|
find max, then give all remaining points to max */
|
541
421
|
U32 maxV = 0, maxC = 0;
|
542
422
|
for (s=0; s<=maxSymbolValue; s++)
|
543
|
-
if (count[s] > maxC) maxV=s
|
423
|
+
if (count[s] > maxC) { maxV=s; maxC=count[s]; }
|
544
424
|
norm[maxV] += (short)ToDistribute;
|
545
425
|
return 0;
|
546
426
|
}
|
@@ -548,7 +428,7 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
|
|
548
428
|
if (total == 0) {
|
549
429
|
/* all of the symbols were low enough for the lowOne or lowThreshold */
|
550
430
|
for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1))
|
551
|
-
if (norm[s] > 0) ToDistribute
|
431
|
+
if (norm[s] > 0) { ToDistribute--; norm[s]++; }
|
552
432
|
return 0;
|
553
433
|
}
|
554
434
|
|
@@ -604,7 +484,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
|
|
604
484
|
U64 restToBeat = vStep * rtbTable[proba];
|
605
485
|
proba += (count[s]*step) - ((U64)proba<<scale) > restToBeat;
|
606
486
|
}
|
607
|
-
if (proba > largestP) largestP=proba
|
487
|
+
if (proba > largestP) { largestP=proba; largest=s; }
|
608
488
|
normalizedCounter[s] = proba;
|
609
489
|
stillToDistribute -= proba;
|
610
490
|
} }
|
@@ -621,11 +501,11 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
|
|
621
501
|
U32 s;
|
622
502
|
U32 nTotal = 0;
|
623
503
|
for (s=0; s<=maxSymbolValue; s++)
|
624
|
-
|
504
|
+
RAWLOG(2, "%3i: %4i \n", s, normalizedCounter[s]);
|
625
505
|
for (s=0; s<=maxSymbolValue; s++)
|
626
506
|
nTotal += abs(normalizedCounter[s]);
|
627
507
|
if (nTotal != (1U<<tableLog))
|
628
|
-
|
508
|
+
RAWLOG(2, "Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);
|
629
509
|
getchar();
|
630
510
|
}
|
631
511
|
#endif
|
@@ -778,7 +658,7 @@ size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t src
|
|
778
658
|
BYTE* op = ostart;
|
779
659
|
BYTE* const oend = ostart + dstSize;
|
780
660
|
|
781
|
-
|
661
|
+
unsigned count[FSE_MAX_SYMBOL_VALUE+1];
|
782
662
|
S16 norm[FSE_MAX_SYMBOL_VALUE+1];
|
783
663
|
FSE_CTable* CTable = (FSE_CTable*)workSpace;
|
784
664
|
size_t const CTableSize = FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue);
|
@@ -792,7 +672,7 @@ size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t src
|
|
792
672
|
if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
|
793
673
|
|
794
674
|
/* Scan input and build symbol stats */
|
795
|
-
{ CHECK_V_F(maxCount,
|
675
|
+
{ CHECK_V_F(maxCount, HIST_count_wksp(count, &maxSymbolValue, src, srcSize, scratchBuffer, scratchBufferSize) );
|
796
676
|
if (maxCount == srcSize) return 1; /* only a single symbol in src : rle */
|
797
677
|
if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
|
798
678
|
if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */
|
@@ -827,7 +707,7 @@ typedef struct {
|
|
827
707
|
size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
|
828
708
|
{
|
829
709
|
fseWkspMax_t scratchBuffer;
|
830
|
-
|
710
|
+
DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */
|
831
711
|
if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
|
832
712
|
return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer));
|
833
713
|
}
|