extzstd 0.2 → 0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/HISTORY.ja.md +13 -0
- data/README.md +17 -14
- data/contrib/zstd/{NEWS → CHANGELOG} +115 -2
- data/contrib/zstd/CODE_OF_CONDUCT.md +5 -0
- data/contrib/zstd/Makefile +99 -53
- data/contrib/zstd/README.md +59 -39
- data/contrib/zstd/TESTING.md +1 -1
- data/contrib/zstd/appveyor.yml +17 -6
- data/contrib/zstd/lib/BUCK +29 -2
- data/contrib/zstd/lib/Makefile +118 -21
- data/contrib/zstd/lib/README.md +84 -44
- data/contrib/zstd/lib/common/bitstream.h +17 -33
- data/contrib/zstd/lib/common/compiler.h +62 -8
- data/contrib/zstd/lib/common/cpu.h +215 -0
- data/contrib/zstd/lib/common/debug.c +44 -0
- data/contrib/zstd/lib/common/debug.h +134 -0
- data/contrib/zstd/lib/common/entropy_common.c +16 -1
- data/contrib/zstd/lib/common/error_private.c +7 -0
- data/contrib/zstd/lib/common/fse.h +48 -44
- data/contrib/zstd/lib/common/fse_decompress.c +3 -3
- data/contrib/zstd/lib/common/huf.h +169 -113
- data/contrib/zstd/lib/common/mem.h +20 -2
- data/contrib/zstd/lib/common/pool.c +135 -49
- data/contrib/zstd/lib/common/pool.h +40 -21
- data/contrib/zstd/lib/common/threading.c +2 -2
- data/contrib/zstd/lib/common/threading.h +12 -12
- data/contrib/zstd/lib/common/xxhash.c +3 -2
- data/contrib/zstd/lib/common/zstd_common.c +3 -6
- data/contrib/zstd/lib/common/zstd_errors.h +17 -7
- data/contrib/zstd/lib/common/zstd_internal.h +76 -48
- data/contrib/zstd/lib/compress/fse_compress.c +89 -209
- data/contrib/zstd/lib/compress/hist.c +203 -0
- data/contrib/zstd/lib/compress/hist.h +95 -0
- data/contrib/zstd/lib/compress/huf_compress.c +188 -80
- data/contrib/zstd/lib/compress/zstd_compress.c +2500 -1203
- data/contrib/zstd/lib/compress/zstd_compress_internal.h +463 -62
- data/contrib/zstd/lib/compress/zstd_double_fast.c +321 -131
- data/contrib/zstd/lib/compress/zstd_double_fast.h +13 -4
- data/contrib/zstd/lib/compress/zstd_fast.c +335 -108
- data/contrib/zstd/lib/compress/zstd_fast.h +12 -6
- data/contrib/zstd/lib/compress/zstd_lazy.c +654 -313
- data/contrib/zstd/lib/compress/zstd_lazy.h +44 -16
- data/contrib/zstd/lib/compress/zstd_ldm.c +310 -420
- data/contrib/zstd/lib/compress/zstd_ldm.h +63 -26
- data/contrib/zstd/lib/compress/zstd_opt.c +773 -325
- data/contrib/zstd/lib/compress/zstd_opt.h +31 -5
- data/contrib/zstd/lib/compress/zstdmt_compress.c +1468 -518
- data/contrib/zstd/lib/compress/zstdmt_compress.h +96 -45
- data/contrib/zstd/lib/decompress/huf_decompress.c +518 -282
- data/contrib/zstd/lib/decompress/zstd_ddict.c +240 -0
- data/contrib/zstd/lib/decompress/zstd_ddict.h +44 -0
- data/contrib/zstd/lib/decompress/zstd_decompress.c +613 -1513
- data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1311 -0
- data/contrib/zstd/lib/decompress/zstd_decompress_block.h +59 -0
- data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +175 -0
- data/contrib/zstd/lib/dictBuilder/cover.c +194 -113
- data/contrib/zstd/lib/dictBuilder/cover.h +112 -0
- data/contrib/zstd/lib/dictBuilder/divsufsort.c +3 -3
- data/contrib/zstd/lib/dictBuilder/fastcover.c +740 -0
- data/contrib/zstd/lib/dictBuilder/zdict.c +142 -106
- data/contrib/zstd/lib/dictBuilder/zdict.h +115 -49
- data/contrib/zstd/lib/legacy/zstd_legacy.h +44 -12
- data/contrib/zstd/lib/legacy/zstd_v01.c +41 -10
- data/contrib/zstd/lib/legacy/zstd_v01.h +12 -7
- data/contrib/zstd/lib/legacy/zstd_v02.c +37 -12
- data/contrib/zstd/lib/legacy/zstd_v02.h +12 -7
- data/contrib/zstd/lib/legacy/zstd_v03.c +38 -12
- data/contrib/zstd/lib/legacy/zstd_v03.h +12 -7
- data/contrib/zstd/lib/legacy/zstd_v04.c +55 -174
- data/contrib/zstd/lib/legacy/zstd_v04.h +12 -7
- data/contrib/zstd/lib/legacy/zstd_v05.c +59 -31
- data/contrib/zstd/lib/legacy/zstd_v05.h +12 -7
- data/contrib/zstd/lib/legacy/zstd_v06.c +48 -20
- data/contrib/zstd/lib/legacy/zstd_v06.h +10 -5
- data/contrib/zstd/lib/legacy/zstd_v07.c +62 -29
- data/contrib/zstd/lib/legacy/zstd_v07.h +10 -5
- data/contrib/zstd/lib/zstd.h +1346 -832
- data/ext/extzstd.c +27 -19
- data/ext/extzstd_stream.c +20 -4
- data/ext/zstd_compress.c +1 -0
- data/ext/zstd_decompress.c +4 -0
- data/ext/zstd_dictbuilder.c +4 -0
- data/ext/zstd_dictbuilder_fastcover.c +5 -0
- data/lib/extzstd.rb +52 -220
- data/lib/extzstd/version.rb +1 -1
- metadata +21 -7
- data/contrib/zstd/circle.yml +0 -63
|
@@ -45,15 +45,15 @@ extern "C" {
|
|
|
45
45
|
|
|
46
46
|
/* mutex */
|
|
47
47
|
#define ZSTD_pthread_mutex_t CRITICAL_SECTION
|
|
48
|
-
#define ZSTD_pthread_mutex_init(a, b) (InitializeCriticalSection((a)), 0)
|
|
48
|
+
#define ZSTD_pthread_mutex_init(a, b) ((void)(b), InitializeCriticalSection((a)), 0)
|
|
49
49
|
#define ZSTD_pthread_mutex_destroy(a) DeleteCriticalSection((a))
|
|
50
50
|
#define ZSTD_pthread_mutex_lock(a) EnterCriticalSection((a))
|
|
51
51
|
#define ZSTD_pthread_mutex_unlock(a) LeaveCriticalSection((a))
|
|
52
52
|
|
|
53
53
|
/* condition variable */
|
|
54
54
|
#define ZSTD_pthread_cond_t CONDITION_VARIABLE
|
|
55
|
-
#define ZSTD_pthread_cond_init(a, b) (InitializeConditionVariable((a)), 0)
|
|
56
|
-
#define ZSTD_pthread_cond_destroy(a)
|
|
55
|
+
#define ZSTD_pthread_cond_init(a, b) ((void)(b), InitializeConditionVariable((a)), 0)
|
|
56
|
+
#define ZSTD_pthread_cond_destroy(a) ((void)(a))
|
|
57
57
|
#define ZSTD_pthread_cond_wait(a, b) SleepConditionVariableCS((a), (b), INFINITE)
|
|
58
58
|
#define ZSTD_pthread_cond_signal(a) WakeConditionVariable((a))
|
|
59
59
|
#define ZSTD_pthread_cond_broadcast(a) WakeAllConditionVariable((a))
|
|
@@ -100,17 +100,17 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr);
|
|
|
100
100
|
/* No multithreading support */
|
|
101
101
|
|
|
102
102
|
typedef int ZSTD_pthread_mutex_t;
|
|
103
|
-
#define ZSTD_pthread_mutex_init(a, b) ((void)a, 0)
|
|
104
|
-
#define ZSTD_pthread_mutex_destroy(a)
|
|
105
|
-
#define ZSTD_pthread_mutex_lock(a)
|
|
106
|
-
#define ZSTD_pthread_mutex_unlock(a)
|
|
103
|
+
#define ZSTD_pthread_mutex_init(a, b) ((void)(a), (void)(b), 0)
|
|
104
|
+
#define ZSTD_pthread_mutex_destroy(a) ((void)(a))
|
|
105
|
+
#define ZSTD_pthread_mutex_lock(a) ((void)(a))
|
|
106
|
+
#define ZSTD_pthread_mutex_unlock(a) ((void)(a))
|
|
107
107
|
|
|
108
108
|
typedef int ZSTD_pthread_cond_t;
|
|
109
|
-
#define ZSTD_pthread_cond_init(a, b) ((void)a, 0)
|
|
110
|
-
#define ZSTD_pthread_cond_destroy(a)
|
|
111
|
-
#define ZSTD_pthread_cond_wait(a, b)
|
|
112
|
-
#define ZSTD_pthread_cond_signal(a)
|
|
113
|
-
#define ZSTD_pthread_cond_broadcast(a)
|
|
109
|
+
#define ZSTD_pthread_cond_init(a, b) ((void)(a), (void)(b), 0)
|
|
110
|
+
#define ZSTD_pthread_cond_destroy(a) ((void)(a))
|
|
111
|
+
#define ZSTD_pthread_cond_wait(a, b) ((void)(a), (void)(b))
|
|
112
|
+
#define ZSTD_pthread_cond_signal(a) ((void)(a))
|
|
113
|
+
#define ZSTD_pthread_cond_broadcast(a) ((void)(a))
|
|
114
114
|
|
|
115
115
|
/* do not use ZSTD_pthread_t */
|
|
116
116
|
|
|
@@ -66,10 +66,10 @@
|
|
|
66
66
|
/* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */
|
|
67
67
|
|
|
68
68
|
/*!XXH_FORCE_NATIVE_FORMAT :
|
|
69
|
-
* By default, xxHash library provides endian-
|
|
69
|
+
* By default, xxHash library provides endian-independent Hash values, based on little-endian convention.
|
|
70
70
|
* Results are therefore identical for little-endian and big-endian CPU.
|
|
71
71
|
* This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
|
|
72
|
-
* Should endian-
|
|
72
|
+
* Should endian-independence be of no importance for your application, you may set the #define below to 1,
|
|
73
73
|
* to improve speed for Big-endian CPU.
|
|
74
74
|
* This option has no impact on Little_Endian CPU.
|
|
75
75
|
*/
|
|
@@ -98,6 +98,7 @@
|
|
|
98
98
|
/* Modify the local functions below should you wish to use some other memory routines */
|
|
99
99
|
/* for malloc(), free() */
|
|
100
100
|
#include <stdlib.h>
|
|
101
|
+
#include <stddef.h> /* size_t */
|
|
101
102
|
static void* XXH_malloc(size_t s) { return malloc(s); }
|
|
102
103
|
static void XXH_free (void* p) { free(p); }
|
|
103
104
|
/* for memcpy() */
|
|
@@ -30,8 +30,10 @@ const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; }
|
|
|
30
30
|
/*-****************************************
|
|
31
31
|
* ZSTD Error Management
|
|
32
32
|
******************************************/
|
|
33
|
+
#undef ZSTD_isError /* defined within zstd_internal.h */
|
|
33
34
|
/*! ZSTD_isError() :
|
|
34
|
-
* tells if a return value is an error code
|
|
35
|
+
* tells if a return value is an error code
|
|
36
|
+
* symbol is required for external callers */
|
|
35
37
|
unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
|
|
36
38
|
|
|
37
39
|
/*! ZSTD_getErrorName() :
|
|
@@ -46,11 +48,6 @@ ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); }
|
|
|
46
48
|
* provides error code string from enum */
|
|
47
49
|
const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); }
|
|
48
50
|
|
|
49
|
-
/*! g_debuglog_enable :
|
|
50
|
-
* turn on/off debug traces (global switch) */
|
|
51
|
-
#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG >= 2)
|
|
52
|
-
int g_debuglog_enable = 1;
|
|
53
|
-
#endif
|
|
54
51
|
|
|
55
52
|
|
|
56
53
|
/*=**************************************************************
|
|
@@ -35,12 +35,20 @@ extern "C" {
|
|
|
35
35
|
# define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY
|
|
36
36
|
#endif
|
|
37
37
|
|
|
38
|
-
|
|
39
|
-
*
|
|
40
|
-
|
|
41
|
-
*
|
|
42
|
-
*
|
|
43
|
-
|
|
38
|
+
/*-*********************************************
|
|
39
|
+
* Error codes list
|
|
40
|
+
*-*********************************************
|
|
41
|
+
* Error codes _values_ are pinned down since v1.3.1 only.
|
|
42
|
+
* Therefore, don't rely on values if you may link to any version < v1.3.1.
|
|
43
|
+
*
|
|
44
|
+
* Only values < 100 are considered stable.
|
|
45
|
+
*
|
|
46
|
+
* note 1 : this API shall be used with static linking only.
|
|
47
|
+
* dynamic linking is not yet officially supported.
|
|
48
|
+
* note 2 : Prefer relying on the enum than on its value whenever possible
|
|
49
|
+
* This is the only supported way to use the error list < v1.3.1
|
|
50
|
+
* note 3 : ZSTD_isError() is always correct, whatever the library version.
|
|
51
|
+
**********************************************/
|
|
44
52
|
typedef enum {
|
|
45
53
|
ZSTD_error_no_error = 0,
|
|
46
54
|
ZSTD_error_GENERIC = 1,
|
|
@@ -61,9 +69,11 @@ typedef enum {
|
|
|
61
69
|
ZSTD_error_stage_wrong = 60,
|
|
62
70
|
ZSTD_error_init_missing = 62,
|
|
63
71
|
ZSTD_error_memory_allocation = 64,
|
|
72
|
+
ZSTD_error_workSpace_tooSmall= 66,
|
|
64
73
|
ZSTD_error_dstSize_tooSmall = 70,
|
|
65
74
|
ZSTD_error_srcSize_wrong = 72,
|
|
66
|
-
|
|
75
|
+
ZSTD_error_dstBuffer_null = 74,
|
|
76
|
+
/* following error codes are __NOT STABLE__, they can be removed or changed in future versions */
|
|
67
77
|
ZSTD_error_frameIndex_tooLarge = 100,
|
|
68
78
|
ZSTD_error_seekableIO = 102,
|
|
69
79
|
ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */
|
|
@@ -21,6 +21,7 @@
|
|
|
21
21
|
***************************************/
|
|
22
22
|
#include "compiler.h"
|
|
23
23
|
#include "mem.h"
|
|
24
|
+
#include "debug.h" /* assert, DEBUGLOG, RAWLOG, g_debuglevel */
|
|
24
25
|
#include "error_private.h"
|
|
25
26
|
#define ZSTD_STATIC_LINKING_ONLY
|
|
26
27
|
#include "zstd.h"
|
|
@@ -38,43 +39,11 @@
|
|
|
38
39
|
extern "C" {
|
|
39
40
|
#endif
|
|
40
41
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
#
|
|
46
|
-
# include <assert.h>
|
|
47
|
-
#else
|
|
48
|
-
# ifndef assert
|
|
49
|
-
# define assert(condition) ((void)0)
|
|
50
|
-
# endif
|
|
51
|
-
#endif
|
|
52
|
-
|
|
53
|
-
#define ZSTD_STATIC_ASSERT(c) { enum { ZSTD_static_assert = 1/(int)(!!(c)) }; }
|
|
54
|
-
|
|
55
|
-
#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=2)
|
|
56
|
-
# include <stdio.h>
|
|
57
|
-
extern int g_debuglog_enable;
|
|
58
|
-
/* recommended values for ZSTD_DEBUG display levels :
|
|
59
|
-
* 1 : no display, enables assert() only
|
|
60
|
-
* 2 : reserved for currently active debug path
|
|
61
|
-
* 3 : events once per object lifetime (CCtx, CDict, etc.)
|
|
62
|
-
* 4 : events once per frame
|
|
63
|
-
* 5 : events once per block
|
|
64
|
-
* 6 : events once per sequence (*very* verbose) */
|
|
65
|
-
# define RAWLOG(l, ...) { \
|
|
66
|
-
if ((g_debuglog_enable) & (l<=ZSTD_DEBUG)) { \
|
|
67
|
-
fprintf(stderr, __VA_ARGS__); \
|
|
68
|
-
} }
|
|
69
|
-
# define DEBUGLOG(l, ...) { \
|
|
70
|
-
if ((g_debuglog_enable) & (l<=ZSTD_DEBUG)) { \
|
|
71
|
-
fprintf(stderr, __FILE__ ": " __VA_ARGS__); \
|
|
72
|
-
fprintf(stderr, " \n"); \
|
|
73
|
-
} }
|
|
74
|
-
#else
|
|
75
|
-
# define RAWLOG(l, ...) {} /* disabled */
|
|
76
|
-
# define DEBUGLOG(l, ...) {} /* disabled */
|
|
77
|
-
#endif
|
|
42
|
+
/* ---- static assert (debug) --- */
|
|
43
|
+
#define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)
|
|
44
|
+
#define ZSTD_isError ERR_isError /* for inlining */
|
|
45
|
+
#define FSE_isError ERR_isError
|
|
46
|
+
#define HUF_isError ERR_isError
|
|
78
47
|
|
|
79
48
|
|
|
80
49
|
/*-*************************************
|
|
@@ -84,8 +53,50 @@ extern int g_debuglog_enable;
|
|
|
84
53
|
#undef MAX
|
|
85
54
|
#define MIN(a,b) ((a)<(b) ? (a) : (b))
|
|
86
55
|
#define MAX(a,b) ((a)>(b) ? (a) : (b))
|
|
87
|
-
|
|
88
|
-
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Return the specified error if the condition evaluates to true.
|
|
59
|
+
*
|
|
60
|
+
* In debug modes, prints additional information. In order to do that
|
|
61
|
+
* (particularly, printing the conditional that failed), this can't just wrap
|
|
62
|
+
* RETURN_ERROR().
|
|
63
|
+
*/
|
|
64
|
+
#define RETURN_ERROR_IF(cond, err, ...) \
|
|
65
|
+
if (cond) { \
|
|
66
|
+
RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \
|
|
67
|
+
RAWLOG(3, ": " __VA_ARGS__); \
|
|
68
|
+
RAWLOG(3, "\n"); \
|
|
69
|
+
return ERROR(err); \
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Unconditionally return the specified error.
|
|
74
|
+
*
|
|
75
|
+
* In debug modes, prints additional information.
|
|
76
|
+
*/
|
|
77
|
+
#define RETURN_ERROR(err, ...) \
|
|
78
|
+
do { \
|
|
79
|
+
RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \
|
|
80
|
+
RAWLOG(3, ": " __VA_ARGS__); \
|
|
81
|
+
RAWLOG(3, "\n"); \
|
|
82
|
+
return ERROR(err); \
|
|
83
|
+
} while(0);
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* If the provided expression evaluates to an error code, returns that error code.
|
|
87
|
+
*
|
|
88
|
+
* In debug modes, prints additional information.
|
|
89
|
+
*/
|
|
90
|
+
#define FORWARD_IF_ERROR(err, ...) \
|
|
91
|
+
do { \
|
|
92
|
+
size_t const err_code = (err); \
|
|
93
|
+
if (ERR_isError(err_code)) { \
|
|
94
|
+
RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \
|
|
95
|
+
RAWLOG(3, ": " __VA_ARGS__); \
|
|
96
|
+
RAWLOG(3, "\n"); \
|
|
97
|
+
return err_code; \
|
|
98
|
+
} \
|
|
99
|
+
} while(0);
|
|
89
100
|
|
|
90
101
|
|
|
91
102
|
/*-*************************************
|
|
@@ -109,12 +120,10 @@ static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
|
|
|
109
120
|
#define BIT0 1
|
|
110
121
|
|
|
111
122
|
#define ZSTD_WINDOWLOG_ABSOLUTEMIN 10
|
|
112
|
-
#define ZSTD_WINDOWLOG_DEFAULTMAX 27 /* Default maximum allowed window log */
|
|
113
123
|
static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
|
|
114
124
|
static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
|
|
115
125
|
|
|
116
|
-
#define ZSTD_FRAMEIDSIZE 4
|
|
117
|
-
static const size_t ZSTD_frameIdSize = ZSTD_FRAMEIDSIZE; /* magic number size */
|
|
126
|
+
#define ZSTD_FRAMEIDSIZE 4 /* magic number size */
|
|
118
127
|
|
|
119
128
|
#define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
|
|
120
129
|
static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
|
|
@@ -132,14 +141,15 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy
|
|
|
132
141
|
|
|
133
142
|
#define Litbits 8
|
|
134
143
|
#define MaxLit ((1<<Litbits) - 1)
|
|
135
|
-
#define MaxML
|
|
136
|
-
#define MaxLL
|
|
144
|
+
#define MaxML 52
|
|
145
|
+
#define MaxLL 35
|
|
137
146
|
#define DefaultMaxOff 28
|
|
138
|
-
#define MaxOff
|
|
147
|
+
#define MaxOff 31
|
|
139
148
|
#define MaxSeq MAX(MaxLL, MaxML) /* Assumption : MaxOff < MaxLL,MaxML */
|
|
140
149
|
#define MLFSELog 9
|
|
141
150
|
#define LLFSELog 9
|
|
142
151
|
#define OffFSELog 8
|
|
152
|
+
#define MaxFSELog MAX(MAX(MLFSELog, LLFSELog), OffFSELog)
|
|
143
153
|
|
|
144
154
|
static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0,
|
|
145
155
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
@@ -226,12 +236,23 @@ typedef struct {
|
|
|
226
236
|
BYTE* llCode;
|
|
227
237
|
BYTE* mlCode;
|
|
228
238
|
BYTE* ofCode;
|
|
239
|
+
size_t maxNbSeq;
|
|
240
|
+
size_t maxNbLit;
|
|
229
241
|
U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */
|
|
230
242
|
U32 longLengthPos;
|
|
231
|
-
U32 rep[ZSTD_REP_NUM];
|
|
232
|
-
U32 repToConfirm[ZSTD_REP_NUM];
|
|
233
243
|
} seqStore_t;
|
|
234
244
|
|
|
245
|
+
/**
|
|
246
|
+
* Contains the compressed frame size and an upper-bound for the decompressed frame size.
|
|
247
|
+
* Note: before using `compressedSize`, check for errors using ZSTD_isError().
|
|
248
|
+
* similarly, before using `decompressedBound`, check for errors using:
|
|
249
|
+
* `decompressedBound != ZSTD_CONTENTSIZE_ERROR`
|
|
250
|
+
*/
|
|
251
|
+
typedef struct {
|
|
252
|
+
size_t compressedSize;
|
|
253
|
+
unsigned long long decompressedBound;
|
|
254
|
+
} ZSTD_frameSizeInfo; /* decompress & legacy */
|
|
255
|
+
|
|
235
256
|
const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */
|
|
236
257
|
void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
|
|
237
258
|
|
|
@@ -276,7 +297,7 @@ typedef struct {
|
|
|
276
297
|
blockType_e blockType;
|
|
277
298
|
U32 lastBlock;
|
|
278
299
|
U32 origSize;
|
|
279
|
-
} blockProperties_t;
|
|
300
|
+
} blockProperties_t; /* declared here for decompress and fullbench */
|
|
280
301
|
|
|
281
302
|
/*! ZSTD_getcBlockSize() :
|
|
282
303
|
* Provides the size of compressed block from block header `src` */
|
|
@@ -284,6 +305,13 @@ typedef struct {
|
|
|
284
305
|
size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
|
|
285
306
|
blockProperties_t* bpPtr);
|
|
286
307
|
|
|
308
|
+
/*! ZSTD_decodeSeqHeaders() :
|
|
309
|
+
* decode sequence header from src */
|
|
310
|
+
/* Used by: decompress, fullbench (does not get its definition from here) */
|
|
311
|
+
size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
312
|
+
const void* src, size_t srcSize);
|
|
313
|
+
|
|
314
|
+
|
|
287
315
|
#if defined (__cplusplus)
|
|
288
316
|
}
|
|
289
317
|
#endif
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/* ******************************************************************
|
|
2
2
|
FSE : Finite State Entropy encoder
|
|
3
|
-
Copyright (C) 2013-
|
|
3
|
+
Copyright (C) 2013-present, Yann Collet.
|
|
4
4
|
|
|
5
5
|
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
|
|
6
6
|
|
|
@@ -37,9 +37,11 @@
|
|
|
37
37
|
****************************************************************/
|
|
38
38
|
#include <stdlib.h> /* malloc, free, qsort */
|
|
39
39
|
#include <string.h> /* memcpy, memset */
|
|
40
|
-
#include <stdio.h> /* printf (debug) */
|
|
41
|
-
#include "bitstream.h"
|
|
42
40
|
#include "compiler.h"
|
|
41
|
+
#include "mem.h" /* U32, U16, etc. */
|
|
42
|
+
#include "debug.h" /* assert, DEBUGLOG */
|
|
43
|
+
#include "hist.h" /* HIST_count_wksp */
|
|
44
|
+
#include "bitstream.h"
|
|
43
45
|
#define FSE_STATIC_LINKING_ONLY
|
|
44
46
|
#include "fse.h"
|
|
45
47
|
#include "error_private.h"
|
|
@@ -49,7 +51,6 @@
|
|
|
49
51
|
* Error Management
|
|
50
52
|
****************************************************************/
|
|
51
53
|
#define FSE_isError ERR_isError
|
|
52
|
-
#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
|
|
53
54
|
|
|
54
55
|
|
|
55
56
|
/* **************************************************************
|
|
@@ -82,7 +83,9 @@
|
|
|
82
83
|
* wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)`
|
|
83
84
|
* workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements
|
|
84
85
|
*/
|
|
85
|
-
size_t FSE_buildCTable_wksp(FSE_CTable* ct,
|
|
86
|
+
size_t FSE_buildCTable_wksp(FSE_CTable* ct,
|
|
87
|
+
const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
|
|
88
|
+
void* workSpace, size_t wkspSize)
|
|
86
89
|
{
|
|
87
90
|
U32 const tableSize = 1 << tableLog;
|
|
88
91
|
U32 const tableMask = tableSize - 1;
|
|
@@ -100,14 +103,19 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsi
|
|
|
100
103
|
if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge);
|
|
101
104
|
tableU16[-2] = (U16) tableLog;
|
|
102
105
|
tableU16[-1] = (U16) maxSymbolValue;
|
|
106
|
+
assert(tableLog < 16); /* required for threshold strategy to work */
|
|
103
107
|
|
|
104
108
|
/* For explanations on how to distribute symbol values over the table :
|
|
105
|
-
|
|
109
|
+
* http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
|
|
110
|
+
|
|
111
|
+
#ifdef __clang_analyzer__
|
|
112
|
+
memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */
|
|
113
|
+
#endif
|
|
106
114
|
|
|
107
115
|
/* symbol start positions */
|
|
108
116
|
{ U32 u;
|
|
109
117
|
cumul[0] = 0;
|
|
110
|
-
for (u=1; u<=maxSymbolValue+1; u++) {
|
|
118
|
+
for (u=1; u <= maxSymbolValue+1; u++) {
|
|
111
119
|
if (normalizedCounter[u-1]==-1) { /* Low proba symbol */
|
|
112
120
|
cumul[u] = cumul[u-1] + 1;
|
|
113
121
|
tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);
|
|
@@ -121,14 +129,16 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsi
|
|
|
121
129
|
{ U32 position = 0;
|
|
122
130
|
U32 symbol;
|
|
123
131
|
for (symbol=0; symbol<=maxSymbolValue; symbol++) {
|
|
124
|
-
int
|
|
125
|
-
|
|
132
|
+
int nbOccurrences;
|
|
133
|
+
int const freq = normalizedCounter[symbol];
|
|
134
|
+
for (nbOccurrences=0; nbOccurrences<freq; nbOccurrences++) {
|
|
126
135
|
tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
|
|
127
136
|
position = (position + step) & tableMask;
|
|
128
|
-
while (position > highThreshold)
|
|
137
|
+
while (position > highThreshold)
|
|
138
|
+
position = (position + step) & tableMask; /* Low proba area */
|
|
129
139
|
} }
|
|
130
140
|
|
|
131
|
-
|
|
141
|
+
assert(position==0); /* Must have initialized all positions */
|
|
132
142
|
}
|
|
133
143
|
|
|
134
144
|
/* Build table */
|
|
@@ -143,7 +153,10 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsi
|
|
|
143
153
|
for (s=0; s<=maxSymbolValue; s++) {
|
|
144
154
|
switch (normalizedCounter[s])
|
|
145
155
|
{
|
|
146
|
-
case 0:
|
|
156
|
+
case 0:
|
|
157
|
+
/* filling nonetheless, for compatibility with FSE_getMaxNbBits() */
|
|
158
|
+
symbolTT[s].deltaNbBits = ((tableLog+1) << 16) - (1<<tableLog);
|
|
159
|
+
break;
|
|
147
160
|
|
|
148
161
|
case -1:
|
|
149
162
|
case 1:
|
|
@@ -160,6 +173,18 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsi
|
|
|
160
173
|
total += normalizedCounter[s];
|
|
161
174
|
} } } }
|
|
162
175
|
|
|
176
|
+
#if 0 /* debug : symbol costs */
|
|
177
|
+
DEBUGLOG(5, "\n --- table statistics : ");
|
|
178
|
+
{ U32 symbol;
|
|
179
|
+
for (symbol=0; symbol<=maxSymbolValue; symbol++) {
|
|
180
|
+
DEBUGLOG(5, "%3u: w=%3i, maxBits=%u, fracBits=%.2f",
|
|
181
|
+
symbol, normalizedCounter[symbol],
|
|
182
|
+
FSE_getMaxNbBits(symbolTT, symbol),
|
|
183
|
+
(double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256);
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
#endif
|
|
187
|
+
|
|
163
188
|
return 0;
|
|
164
189
|
}
|
|
165
190
|
|
|
@@ -174,8 +199,9 @@ size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned
|
|
|
174
199
|
|
|
175
200
|
#ifndef FSE_COMMONDEFS_ONLY
|
|
176
201
|
|
|
202
|
+
|
|
177
203
|
/*-**************************************************************
|
|
178
|
-
* FSE NCount encoding
|
|
204
|
+
* FSE NCount encoding
|
|
179
205
|
****************************************************************/
|
|
180
206
|
size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
|
|
181
207
|
{
|
|
@@ -183,9 +209,10 @@ size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
|
|
|
183
209
|
return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
|
|
184
210
|
}
|
|
185
211
|
|
|
186
|
-
static size_t
|
|
187
|
-
|
|
188
|
-
|
|
212
|
+
static size_t
|
|
213
|
+
FSE_writeNCount_generic (void* header, size_t headerBufferSize,
|
|
214
|
+
const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
|
|
215
|
+
unsigned writeIsSafe)
|
|
189
216
|
{
|
|
190
217
|
BYTE* const ostart = (BYTE*) header;
|
|
191
218
|
BYTE* out = ostart;
|
|
@@ -194,13 +221,12 @@ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
|
|
|
194
221
|
const int tableSize = 1 << tableLog;
|
|
195
222
|
int remaining;
|
|
196
223
|
int threshold;
|
|
197
|
-
U32 bitStream;
|
|
198
|
-
int bitCount;
|
|
199
|
-
unsigned
|
|
200
|
-
|
|
224
|
+
U32 bitStream = 0;
|
|
225
|
+
int bitCount = 0;
|
|
226
|
+
unsigned symbol = 0;
|
|
227
|
+
unsigned const alphabetSize = maxSymbolValue + 1;
|
|
228
|
+
int previousIs0 = 0;
|
|
201
229
|
|
|
202
|
-
bitStream = 0;
|
|
203
|
-
bitCount = 0;
|
|
204
230
|
/* Table Size */
|
|
205
231
|
bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount;
|
|
206
232
|
bitCount += 4;
|
|
@@ -210,48 +236,53 @@ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
|
|
|
210
236
|
threshold = tableSize;
|
|
211
237
|
nbBits = tableLog+1;
|
|
212
238
|
|
|
213
|
-
while (remaining>1) { /* stops at 1 */
|
|
214
|
-
if (
|
|
215
|
-
unsigned start =
|
|
216
|
-
while (!normalizedCounter[
|
|
217
|
-
|
|
239
|
+
while ((symbol < alphabetSize) && (remaining>1)) { /* stops at 1 */
|
|
240
|
+
if (previousIs0) {
|
|
241
|
+
unsigned start = symbol;
|
|
242
|
+
while ((symbol < alphabetSize) && !normalizedCounter[symbol]) symbol++;
|
|
243
|
+
if (symbol == alphabetSize) break; /* incorrect distribution */
|
|
244
|
+
while (symbol >= start+24) {
|
|
218
245
|
start+=24;
|
|
219
246
|
bitStream += 0xFFFFU << bitCount;
|
|
220
|
-
if ((!writeIsSafe) && (out > oend-2))
|
|
247
|
+
if ((!writeIsSafe) && (out > oend-2))
|
|
248
|
+
return ERROR(dstSize_tooSmall); /* Buffer overflow */
|
|
221
249
|
out[0] = (BYTE) bitStream;
|
|
222
250
|
out[1] = (BYTE)(bitStream>>8);
|
|
223
251
|
out+=2;
|
|
224
252
|
bitStream>>=16;
|
|
225
253
|
}
|
|
226
|
-
while (
|
|
254
|
+
while (symbol >= start+3) {
|
|
227
255
|
start+=3;
|
|
228
256
|
bitStream += 3 << bitCount;
|
|
229
257
|
bitCount += 2;
|
|
230
258
|
}
|
|
231
|
-
bitStream += (
|
|
259
|
+
bitStream += (symbol-start) << bitCount;
|
|
232
260
|
bitCount += 2;
|
|
233
261
|
if (bitCount>16) {
|
|
234
|
-
if ((!writeIsSafe) && (out > oend - 2))
|
|
262
|
+
if ((!writeIsSafe) && (out > oend - 2))
|
|
263
|
+
return ERROR(dstSize_tooSmall); /* Buffer overflow */
|
|
235
264
|
out[0] = (BYTE)bitStream;
|
|
236
265
|
out[1] = (BYTE)(bitStream>>8);
|
|
237
266
|
out += 2;
|
|
238
267
|
bitStream >>= 16;
|
|
239
268
|
bitCount -= 16;
|
|
240
269
|
} }
|
|
241
|
-
{ int count = normalizedCounter[
|
|
242
|
-
int const max = (2*threshold-1)-remaining;
|
|
270
|
+
{ int count = normalizedCounter[symbol++];
|
|
271
|
+
int const max = (2*threshold-1) - remaining;
|
|
243
272
|
remaining -= count < 0 ? -count : count;
|
|
244
273
|
count++; /* +1 for extra accuracy */
|
|
245
|
-
if (count>=threshold)
|
|
274
|
+
if (count>=threshold)
|
|
275
|
+
count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
|
|
246
276
|
bitStream += count << bitCount;
|
|
247
277
|
bitCount += nbBits;
|
|
248
278
|
bitCount -= (count<max);
|
|
249
|
-
|
|
279
|
+
previousIs0 = (count==1);
|
|
250
280
|
if (remaining<1) return ERROR(GENERIC);
|
|
251
|
-
while (remaining<threshold) nbBits
|
|
281
|
+
while (remaining<threshold) { nbBits--; threshold>>=1; }
|
|
252
282
|
}
|
|
253
283
|
if (bitCount>16) {
|
|
254
|
-
if ((!writeIsSafe) && (out > oend - 2))
|
|
284
|
+
if ((!writeIsSafe) && (out > oend - 2))
|
|
285
|
+
return ERROR(dstSize_tooSmall); /* Buffer overflow */
|
|
255
286
|
out[0] = (BYTE)bitStream;
|
|
256
287
|
out[1] = (BYTE)(bitStream>>8);
|
|
257
288
|
out += 2;
|
|
@@ -259,19 +290,23 @@ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
|
|
|
259
290
|
bitCount -= 16;
|
|
260
291
|
} }
|
|
261
292
|
|
|
293
|
+
if (remaining != 1)
|
|
294
|
+
return ERROR(GENERIC); /* incorrect normalized distribution */
|
|
295
|
+
assert(symbol <= alphabetSize);
|
|
296
|
+
|
|
262
297
|
/* flush remaining bitStream */
|
|
263
|
-
if ((!writeIsSafe) && (out > oend - 2))
|
|
298
|
+
if ((!writeIsSafe) && (out > oend - 2))
|
|
299
|
+
return ERROR(dstSize_tooSmall); /* Buffer overflow */
|
|
264
300
|
out[0] = (BYTE)bitStream;
|
|
265
301
|
out[1] = (BYTE)(bitStream>>8);
|
|
266
302
|
out+= (bitCount+7) /8;
|
|
267
303
|
|
|
268
|
-
if (charnum > maxSymbolValue + 1) return ERROR(GENERIC);
|
|
269
|
-
|
|
270
304
|
return (out-ostart);
|
|
271
305
|
}
|
|
272
306
|
|
|
273
307
|
|
|
274
|
-
size_t FSE_writeNCount (void* buffer, size_t bufferSize,
|
|
308
|
+
size_t FSE_writeNCount (void* buffer, size_t bufferSize,
|
|
309
|
+
const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
|
|
275
310
|
{
|
|
276
311
|
if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported */
|
|
277
312
|
if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported */
|
|
@@ -279,171 +314,13 @@ size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalized
|
|
|
279
314
|
if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
|
|
280
315
|
return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
|
|
281
316
|
|
|
282
|
-
return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1);
|
|
283
|
-
}
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
/*-**************************************************************
|
|
288
|
-
* Counting histogram
|
|
289
|
-
****************************************************************/
|
|
290
|
-
/*! FSE_count_simple
|
|
291
|
-
This function counts byte values within `src`, and store the histogram into table `count`.
|
|
292
|
-
It doesn't use any additional memory.
|
|
293
|
-
But this function is unsafe : it doesn't check that all values within `src` can fit into `count`.
|
|
294
|
-
For this reason, prefer using a table `count` with 256 elements.
|
|
295
|
-
@return : count of most numerous element
|
|
296
|
-
*/
|
|
297
|
-
size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
|
|
298
|
-
const void* src, size_t srcSize)
|
|
299
|
-
{
|
|
300
|
-
const BYTE* ip = (const BYTE*)src;
|
|
301
|
-
const BYTE* const end = ip + srcSize;
|
|
302
|
-
unsigned maxSymbolValue = *maxSymbolValuePtr;
|
|
303
|
-
unsigned max=0;
|
|
304
|
-
|
|
305
|
-
memset(count, 0, (maxSymbolValue+1)*sizeof(*count));
|
|
306
|
-
if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
|
|
307
|
-
|
|
308
|
-
while (ip<end) count[*ip++]++;
|
|
309
|
-
|
|
310
|
-
while (!count[maxSymbolValue]) maxSymbolValue--;
|
|
311
|
-
*maxSymbolValuePtr = maxSymbolValue;
|
|
312
|
-
|
|
313
|
-
{ U32 s; for (s=0; s<=maxSymbolValue; s++) if (count[s] > max) max = count[s]; }
|
|
314
|
-
|
|
315
|
-
return (size_t)max;
|
|
316
|
-
}
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
/* FSE_count_parallel_wksp() :
|
|
320
|
-
* Same as FSE_count_parallel(), but using an externally provided scratch buffer.
|
|
321
|
-
* `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`` */
|
|
322
|
-
static size_t FSE_count_parallel_wksp(
|
|
323
|
-
unsigned* count, unsigned* maxSymbolValuePtr,
|
|
324
|
-
const void* source, size_t sourceSize,
|
|
325
|
-
unsigned checkMax, unsigned* const workSpace)
|
|
326
|
-
{
|
|
327
|
-
const BYTE* ip = (const BYTE*)source;
|
|
328
|
-
const BYTE* const iend = ip+sourceSize;
|
|
329
|
-
unsigned maxSymbolValue = *maxSymbolValuePtr;
|
|
330
|
-
unsigned max=0;
|
|
331
|
-
U32* const Counting1 = workSpace;
|
|
332
|
-
U32* const Counting2 = Counting1 + 256;
|
|
333
|
-
U32* const Counting3 = Counting2 + 256;
|
|
334
|
-
U32* const Counting4 = Counting3 + 256;
|
|
335
|
-
|
|
336
|
-
memset(Counting1, 0, 4*256*sizeof(unsigned));
|
|
337
|
-
|
|
338
|
-
/* safety checks */
|
|
339
|
-
if (!sourceSize) {
|
|
340
|
-
memset(count, 0, maxSymbolValue + 1);
|
|
341
|
-
*maxSymbolValuePtr = 0;
|
|
342
|
-
return 0;
|
|
343
|
-
}
|
|
344
|
-
if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */
|
|
345
|
-
|
|
346
|
-
/* by stripes of 16 bytes */
|
|
347
|
-
{ U32 cached = MEM_read32(ip); ip += 4;
|
|
348
|
-
while (ip < iend-15) {
|
|
349
|
-
U32 c = cached; cached = MEM_read32(ip); ip += 4;
|
|
350
|
-
Counting1[(BYTE) c ]++;
|
|
351
|
-
Counting2[(BYTE)(c>>8) ]++;
|
|
352
|
-
Counting3[(BYTE)(c>>16)]++;
|
|
353
|
-
Counting4[ c>>24 ]++;
|
|
354
|
-
c = cached; cached = MEM_read32(ip); ip += 4;
|
|
355
|
-
Counting1[(BYTE) c ]++;
|
|
356
|
-
Counting2[(BYTE)(c>>8) ]++;
|
|
357
|
-
Counting3[(BYTE)(c>>16)]++;
|
|
358
|
-
Counting4[ c>>24 ]++;
|
|
359
|
-
c = cached; cached = MEM_read32(ip); ip += 4;
|
|
360
|
-
Counting1[(BYTE) c ]++;
|
|
361
|
-
Counting2[(BYTE)(c>>8) ]++;
|
|
362
|
-
Counting3[(BYTE)(c>>16)]++;
|
|
363
|
-
Counting4[ c>>24 ]++;
|
|
364
|
-
c = cached; cached = MEM_read32(ip); ip += 4;
|
|
365
|
-
Counting1[(BYTE) c ]++;
|
|
366
|
-
Counting2[(BYTE)(c>>8) ]++;
|
|
367
|
-
Counting3[(BYTE)(c>>16)]++;
|
|
368
|
-
Counting4[ c>>24 ]++;
|
|
369
|
-
}
|
|
370
|
-
ip-=4;
|
|
371
|
-
}
|
|
372
|
-
|
|
373
|
-
/* finish last symbols */
|
|
374
|
-
while (ip<iend) Counting1[*ip++]++;
|
|
375
|
-
|
|
376
|
-
if (checkMax) { /* verify stats will fit into destination table */
|
|
377
|
-
U32 s; for (s=255; s>maxSymbolValue; s--) {
|
|
378
|
-
Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
|
|
379
|
-
if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
|
|
380
|
-
} }
|
|
381
|
-
|
|
382
|
-
{ U32 s; for (s=0; s<=maxSymbolValue; s++) {
|
|
383
|
-
count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
|
|
384
|
-
if (count[s] > max) max = count[s];
|
|
385
|
-
} }
|
|
386
|
-
|
|
387
|
-
while (!count[maxSymbolValue]) maxSymbolValue--;
|
|
388
|
-
*maxSymbolValuePtr = maxSymbolValue;
|
|
389
|
-
return (size_t)max;
|
|
317
|
+
return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1 /* write in buffer is safe */);
|
|
390
318
|
}
|
|
391
319
|
|
|
392
|
-
/* FSE_countFast_wksp() :
|
|
393
|
-
* Same as FSE_countFast(), but using an externally provided scratch buffer.
|
|
394
|
-
* `workSpace` size must be table of >= `1024` unsigned */
|
|
395
|
-
size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
|
|
396
|
-
const void* source, size_t sourceSize, unsigned* workSpace)
|
|
397
|
-
{
|
|
398
|
-
if (sourceSize < 1500) return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize);
|
|
399
|
-
return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace);
|
|
400
|
-
}
|
|
401
|
-
|
|
402
|
-
/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
|
|
403
|
-
size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
|
|
404
|
-
const void* source, size_t sourceSize)
|
|
405
|
-
{
|
|
406
|
-
unsigned tmpCounters[1024];
|
|
407
|
-
return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters);
|
|
408
|
-
}
|
|
409
|
-
|
|
410
|
-
/* FSE_count_wksp() :
|
|
411
|
-
* Same as FSE_count(), but using an externally provided scratch buffer.
|
|
412
|
-
* `workSpace` size must be table of >= `1024` unsigned */
|
|
413
|
-
size_t FSE_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
|
|
414
|
-
const void* source, size_t sourceSize, unsigned* workSpace)
|
|
415
|
-
{
|
|
416
|
-
if (*maxSymbolValuePtr < 255)
|
|
417
|
-
return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace);
|
|
418
|
-
*maxSymbolValuePtr = 255;
|
|
419
|
-
return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace);
|
|
420
|
-
}
|
|
421
|
-
|
|
422
|
-
size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr,
|
|
423
|
-
const void* src, size_t srcSize)
|
|
424
|
-
{
|
|
425
|
-
unsigned tmpCounters[1024];
|
|
426
|
-
return FSE_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters);
|
|
427
|
-
}
|
|
428
|
-
|
|
429
|
-
|
|
430
320
|
|
|
431
321
|
/*-**************************************************************
|
|
432
322
|
* FSE Compression Code
|
|
433
323
|
****************************************************************/
|
|
434
|
-
/*! FSE_sizeof_CTable() :
|
|
435
|
-
FSE_CTable is a variable size structure which contains :
|
|
436
|
-
`U16 tableLog;`
|
|
437
|
-
`U16 maxSymbolValue;`
|
|
438
|
-
`U16 nextStateNumber[1 << tableLog];` // This size is variable
|
|
439
|
-
`FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];` // This size is variable
|
|
440
|
-
Allocation is manual (C standard does not support variable-size structures).
|
|
441
|
-
*/
|
|
442
|
-
size_t FSE_sizeof_CTable (unsigned maxSymbolValue, unsigned tableLog)
|
|
443
|
-
{
|
|
444
|
-
if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
|
|
445
|
-
return FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
|
|
446
|
-
}
|
|
447
324
|
|
|
448
325
|
FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
|
|
449
326
|
{
|
|
@@ -458,7 +335,7 @@ void FSE_freeCTable (FSE_CTable* ct) { free(ct); }
|
|
|
458
335
|
/* provides the minimum logSize to safely represent a distribution */
|
|
459
336
|
static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
|
|
460
337
|
{
|
|
461
|
-
U32 minBitsSrc = BIT_highbit32((U32)(srcSize
|
|
338
|
+
U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1;
|
|
462
339
|
U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
|
|
463
340
|
U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
|
|
464
341
|
assert(srcSize > 1); /* Not supported, RLE should be used instead */
|
|
@@ -521,6 +398,9 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
|
|
|
521
398
|
}
|
|
522
399
|
ToDistribute = (1 << tableLog) - distributed;
|
|
523
400
|
|
|
401
|
+
if (ToDistribute == 0)
|
|
402
|
+
return 0;
|
|
403
|
+
|
|
524
404
|
if ((total / ToDistribute) > lowOne) {
|
|
525
405
|
/* risk of rounding to zero */
|
|
526
406
|
lowOne = (U32)((total * 3) / (ToDistribute * 2));
|
|
@@ -540,7 +420,7 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
|
|
|
540
420
|
find max, then give all remaining points to max */
|
|
541
421
|
U32 maxV = 0, maxC = 0;
|
|
542
422
|
for (s=0; s<=maxSymbolValue; s++)
|
|
543
|
-
if (count[s] > maxC) maxV=s
|
|
423
|
+
if (count[s] > maxC) { maxV=s; maxC=count[s]; }
|
|
544
424
|
norm[maxV] += (short)ToDistribute;
|
|
545
425
|
return 0;
|
|
546
426
|
}
|
|
@@ -548,7 +428,7 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
|
|
|
548
428
|
if (total == 0) {
|
|
549
429
|
/* all of the symbols were low enough for the lowOne or lowThreshold */
|
|
550
430
|
for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1))
|
|
551
|
-
if (norm[s] > 0) ToDistribute
|
|
431
|
+
if (norm[s] > 0) { ToDistribute--; norm[s]++; }
|
|
552
432
|
return 0;
|
|
553
433
|
}
|
|
554
434
|
|
|
@@ -604,7 +484,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
|
|
|
604
484
|
U64 restToBeat = vStep * rtbTable[proba];
|
|
605
485
|
proba += (count[s]*step) - ((U64)proba<<scale) > restToBeat;
|
|
606
486
|
}
|
|
607
|
-
if (proba > largestP) largestP=proba
|
|
487
|
+
if (proba > largestP) { largestP=proba; largest=s; }
|
|
608
488
|
normalizedCounter[s] = proba;
|
|
609
489
|
stillToDistribute -= proba;
|
|
610
490
|
} }
|
|
@@ -621,11 +501,11 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
|
|
|
621
501
|
U32 s;
|
|
622
502
|
U32 nTotal = 0;
|
|
623
503
|
for (s=0; s<=maxSymbolValue; s++)
|
|
624
|
-
|
|
504
|
+
RAWLOG(2, "%3i: %4i \n", s, normalizedCounter[s]);
|
|
625
505
|
for (s=0; s<=maxSymbolValue; s++)
|
|
626
506
|
nTotal += abs(normalizedCounter[s]);
|
|
627
507
|
if (nTotal != (1U<<tableLog))
|
|
628
|
-
|
|
508
|
+
RAWLOG(2, "Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);
|
|
629
509
|
getchar();
|
|
630
510
|
}
|
|
631
511
|
#endif
|
|
@@ -778,7 +658,7 @@ size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t src
|
|
|
778
658
|
BYTE* op = ostart;
|
|
779
659
|
BYTE* const oend = ostart + dstSize;
|
|
780
660
|
|
|
781
|
-
|
|
661
|
+
unsigned count[FSE_MAX_SYMBOL_VALUE+1];
|
|
782
662
|
S16 norm[FSE_MAX_SYMBOL_VALUE+1];
|
|
783
663
|
FSE_CTable* CTable = (FSE_CTable*)workSpace;
|
|
784
664
|
size_t const CTableSize = FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue);
|
|
@@ -792,7 +672,7 @@ size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t src
|
|
|
792
672
|
if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
|
|
793
673
|
|
|
794
674
|
/* Scan input and build symbol stats */
|
|
795
|
-
{ CHECK_V_F(maxCount,
|
|
675
|
+
{ CHECK_V_F(maxCount, HIST_count_wksp(count, &maxSymbolValue, src, srcSize, scratchBuffer, scratchBufferSize) );
|
|
796
676
|
if (maxCount == srcSize) return 1; /* only a single symbol in src : rle */
|
|
797
677
|
if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
|
|
798
678
|
if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */
|
|
@@ -827,7 +707,7 @@ typedef struct {
|
|
|
827
707
|
size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
|
|
828
708
|
{
|
|
829
709
|
fseWkspMax_t scratchBuffer;
|
|
830
|
-
|
|
710
|
+
DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */
|
|
831
711
|
if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
|
|
832
712
|
return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer));
|
|
833
713
|
}
|