zstdlib 0.8.0-x64-mingw32 → 0.9.0-x64-mingw32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +10 -0
- data/README.md +7 -1
- data/Rakefile +38 -8
- data/ext/{zstdlib → zstdlib_c}/extconf.rb +10 -5
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.2/zstdlib.c +2 -2
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.3/zstdlib.c +2 -2
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.4/zstdlib.c +2 -2
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.5/zstdlib.c +2 -2
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.6/zstdlib.c +2 -2
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.7/zstdlib.c +2 -2
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-3.0/zstdlib.c +2 -2
- data/ext/zstdlib_c/ruby/zlib-3.1/zstdlib.c +5076 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/adler32.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/compress.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/crc32.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/crc32.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/deflate.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/deflate.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/gzclose.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/gzguts.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/gzlib.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/gzread.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/gzwrite.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/infback.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inffast.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inffast.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inffixed.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inflate.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inflate.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inftrees.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inftrees.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/trees.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/trees.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/uncompr.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/zconf.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/zlib.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/zutil.c +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/zutil.h +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlib.mk +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlibwrapper/zlibwrapper.c +1 -5
- data/ext/{zstdlib → zstdlib_c}/zlibwrapper.mk +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/bitstream.h +24 -9
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/compiler.h +89 -43
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/cpu.h +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/debug.c +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/debug.h +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/entropy_common.c +11 -5
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/error_private.c +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/error_private.h +79 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/fse.h +2 -1
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/fse_decompress.c +1 -1
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/huf.h +24 -22
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/mem.h +18 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/pool.c +11 -6
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/pool.h +2 -2
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/portability_macros.h +137 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/threading.c +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/threading.h +0 -0
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/xxhash.c +24 -0
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/xxhash.h +5686 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/zstd_common.c +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/zstd_deps.h +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/zstd_internal.h +95 -92
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/common/zstd_trace.h +12 -3
- data/ext/zstdlib_c/zstd-1.5.2/lib/compress/clevels.h +134 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/fse_compress.c +63 -27
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/hist.c +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/hist.h +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/huf_compress.c +537 -104
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress.c +307 -373
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_internal.h +174 -83
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_literals.c +4 -3
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_literals.h +3 -1
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_sequences.c +15 -14
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_sequences.h +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_superblock.c +4 -3
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_superblock.h +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_cwksp.h +41 -27
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_double_fast.c +295 -120
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_double_fast.h +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_fast.c +309 -130
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_fast.h +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_lazy.c +482 -562
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_lazy.h +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_ldm.c +9 -7
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_ldm.h +1 -1
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_ldm_geartab.h +4 -1
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_opt.c +249 -148
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_opt.h +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstdmt_compress.c +76 -38
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/compress/zstdmt_compress.h +4 -1
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/decompress/huf_decompress.c +727 -189
- data/ext/zstdlib_c/zstd-1.5.2/lib/decompress/huf_decompress_amd64.S +585 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_ddict.c +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_ddict.h +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_decompress.c +85 -22
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_decompress_block.c +744 -220
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_decompress_block.h +8 -2
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_decompress_internal.h +34 -3
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/zdict.h +4 -4
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/zstd.h +179 -136
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/lib/zstd_errors.h +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzclose.c +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzcompatibility.h +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzguts.h +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzlib.c +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzread.c +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzwrite.c +0 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/zlibWrapper/zstd_zlibwrapper.c +7 -0
- data/ext/{zstdlib/zstd-1.5.0 → zstdlib_c/zstd-1.5.2}/zlibWrapper/zstd_zlibwrapper.h +0 -0
- data/ext/zstdlib_c/zstd.mk +15 -0
- data/lib/2.4/zstdlib_c.so +0 -0
- data/lib/2.5/zstdlib_c.so +0 -0
- data/lib/2.6/zstdlib_c.so +0 -0
- data/lib/2.7/zstdlib_c.so +0 -0
- data/lib/3.0/zstdlib_c.so +0 -0
- data/lib/zstdlib.rb +2 -2
- metadata +124 -121
- data/ext/zstdlib/zstd-1.5.0/lib/common/xxhash.c +0 -824
- data/ext/zstdlib/zstd-1.5.0/lib/common/xxhash.h +0 -285
- data/ext/zstdlib/zstd.mk +0 -14
- data/lib/2.2/zstdlib.so +0 -0
- data/lib/2.3/zstdlib.so +0 -0
- data/lib/2.4/zstdlib.so +0 -0
- data/lib/2.5/zstdlib.so +0 -0
- data/lib/2.6/zstdlib.so +0 -0
- data/lib/2.7/zstdlib.so +0 -0
File without changes
|
File without changes
|
@@ -19,10 +19,8 @@
|
|
19
19
|
/*-*************************************
|
20
20
|
* Dependencies
|
21
21
|
***************************************/
|
22
|
-
#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON)
|
23
|
-
#include <arm_neon.h>
|
24
|
-
#endif
|
25
22
|
#include "compiler.h"
|
23
|
+
#include "cpu.h"
|
26
24
|
#include "mem.h"
|
27
25
|
#include "debug.h" /* assert, DEBUGLOG, RAWLOG, g_debuglevel */
|
28
26
|
#include "error_private.h"
|
@@ -60,81 +58,7 @@ extern "C" {
|
|
60
58
|
#undef MAX
|
61
59
|
#define MIN(a,b) ((a)<(b) ? (a) : (b))
|
62
60
|
#define MAX(a,b) ((a)>(b) ? (a) : (b))
|
63
|
-
|
64
|
-
/**
|
65
|
-
* Ignore: this is an internal helper.
|
66
|
-
*
|
67
|
-
* This is a helper function to help force C99-correctness during compilation.
|
68
|
-
* Under strict compilation modes, variadic macro arguments can't be empty.
|
69
|
-
* However, variadic function arguments can be. Using a function therefore lets
|
70
|
-
* us statically check that at least one (string) argument was passed,
|
71
|
-
* independent of the compilation flags.
|
72
|
-
*/
|
73
|
-
static INLINE_KEYWORD UNUSED_ATTR
|
74
|
-
void _force_has_format_string(const char *format, ...) {
|
75
|
-
(void)format;
|
76
|
-
}
|
77
|
-
|
78
|
-
/**
|
79
|
-
* Ignore: this is an internal helper.
|
80
|
-
*
|
81
|
-
* We want to force this function invocation to be syntactically correct, but
|
82
|
-
* we don't want to force runtime evaluation of its arguments.
|
83
|
-
*/
|
84
|
-
#define _FORCE_HAS_FORMAT_STRING(...) \
|
85
|
-
if (0) { \
|
86
|
-
_force_has_format_string(__VA_ARGS__); \
|
87
|
-
}
|
88
|
-
|
89
|
-
/**
|
90
|
-
* Return the specified error if the condition evaluates to true.
|
91
|
-
*
|
92
|
-
* In debug modes, prints additional information.
|
93
|
-
* In order to do that (particularly, printing the conditional that failed),
|
94
|
-
* this can't just wrap RETURN_ERROR().
|
95
|
-
*/
|
96
|
-
#define RETURN_ERROR_IF(cond, err, ...) \
|
97
|
-
if (cond) { \
|
98
|
-
RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \
|
99
|
-
__FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \
|
100
|
-
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
|
101
|
-
RAWLOG(3, ": " __VA_ARGS__); \
|
102
|
-
RAWLOG(3, "\n"); \
|
103
|
-
return ERROR(err); \
|
104
|
-
}
|
105
|
-
|
106
|
-
/**
|
107
|
-
* Unconditionally return the specified error.
|
108
|
-
*
|
109
|
-
* In debug modes, prints additional information.
|
110
|
-
*/
|
111
|
-
#define RETURN_ERROR(err, ...) \
|
112
|
-
do { \
|
113
|
-
RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
|
114
|
-
__FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \
|
115
|
-
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
|
116
|
-
RAWLOG(3, ": " __VA_ARGS__); \
|
117
|
-
RAWLOG(3, "\n"); \
|
118
|
-
return ERROR(err); \
|
119
|
-
} while(0);
|
120
|
-
|
121
|
-
/**
|
122
|
-
* If the provided expression evaluates to an error code, returns that error code.
|
123
|
-
*
|
124
|
-
* In debug modes, prints additional information.
|
125
|
-
*/
|
126
|
-
#define FORWARD_IF_ERROR(err, ...) \
|
127
|
-
do { \
|
128
|
-
size_t const err_code = (err); \
|
129
|
-
if (ERR_isError(err_code)) { \
|
130
|
-
RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \
|
131
|
-
__FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \
|
132
|
-
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
|
133
|
-
RAWLOG(3, ": " __VA_ARGS__); \
|
134
|
-
RAWLOG(3, "\n"); \
|
135
|
-
return err_code; \
|
136
|
-
} \
|
137
|
-
} while(0);
|
61
|
+
#define BOUNDED(min,val,max) (MAX(min,MIN(val,max)))
|
138
62
|
|
139
63
|
|
140
64
|
/*-*************************************
|
@@ -143,7 +67,6 @@ void _force_has_format_string(const char *format, ...) {
|
|
143
67
|
#define ZSTD_OPT_NUM (1<<12)
|
144
68
|
|
145
69
|
#define ZSTD_REP_NUM 3 /* number of repcodes */
|
146
|
-
#define ZSTD_REP_MOVE (ZSTD_REP_NUM-1)
|
147
70
|
static UNUSED_ATTR const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
|
148
71
|
|
149
72
|
#define KB *(1 <<10)
|
@@ -195,7 +118,7 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy
|
|
195
118
|
/* Each table cannot take more than #symbols * FSELog bits */
|
196
119
|
#define ZSTD_MAX_FSE_HEADERS_SIZE (((MaxML + 1) * MLFSELog + (MaxLL + 1) * LLFSELog + (MaxOff + 1) * OffFSELog + 7) / 8)
|
197
120
|
|
198
|
-
static UNUSED_ATTR const
|
121
|
+
static UNUSED_ATTR const U8 LL_bits[MaxLL+1] = {
|
199
122
|
0, 0, 0, 0, 0, 0, 0, 0,
|
200
123
|
0, 0, 0, 0, 0, 0, 0, 0,
|
201
124
|
1, 1, 1, 1, 2, 2, 3, 3,
|
@@ -212,7 +135,7 @@ static UNUSED_ATTR const S16 LL_defaultNorm[MaxLL+1] = {
|
|
212
135
|
#define LL_DEFAULTNORMLOG 6 /* for static allocation */
|
213
136
|
static UNUSED_ATTR const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG;
|
214
137
|
|
215
|
-
static UNUSED_ATTR const
|
138
|
+
static UNUSED_ATTR const U8 ML_bits[MaxML+1] = {
|
216
139
|
0, 0, 0, 0, 0, 0, 0, 0,
|
217
140
|
0, 0, 0, 0, 0, 0, 0, 0,
|
218
141
|
0, 0, 0, 0, 0, 0, 0, 0,
|
@@ -247,19 +170,30 @@ static UNUSED_ATTR const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
|
|
247
170
|
* Shared functions to include for inlining
|
248
171
|
*********************************************/
|
249
172
|
static void ZSTD_copy8(void* dst, const void* src) {
|
250
|
-
#if
|
173
|
+
#if defined(ZSTD_ARCH_ARM_NEON)
|
251
174
|
vst1_u8((uint8_t*)dst, vld1_u8((const uint8_t*)src));
|
252
175
|
#else
|
253
176
|
ZSTD_memcpy(dst, src, 8);
|
254
177
|
#endif
|
255
178
|
}
|
256
|
-
|
257
179
|
#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
|
180
|
+
|
181
|
+
/* Need to use memmove here since the literal buffer can now be located within
|
182
|
+
the dst buffer. In circumstances where the op "catches up" to where the
|
183
|
+
literal buffer is, there can be partial overlaps in this call on the final
|
184
|
+
copy if the literal is being shifted by less than 16 bytes. */
|
258
185
|
static void ZSTD_copy16(void* dst, const void* src) {
|
259
|
-
#if
|
186
|
+
#if defined(ZSTD_ARCH_ARM_NEON)
|
260
187
|
vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src));
|
188
|
+
#elif defined(ZSTD_ARCH_X86_SSE2)
|
189
|
+
_mm_storeu_si128((__m128i*)dst, _mm_loadu_si128((const __m128i*)src));
|
190
|
+
#elif defined(__clang__)
|
191
|
+
ZSTD_memmove(dst, src, 16);
|
261
192
|
#else
|
262
|
-
|
193
|
+
/* ZSTD_memmove is not inlined properly by gcc */
|
194
|
+
BYTE copy16_buf[16];
|
195
|
+
ZSTD_memcpy(copy16_buf, src, 16);
|
196
|
+
ZSTD_memcpy(dst, copy16_buf, 16);
|
263
197
|
#endif
|
264
198
|
}
|
265
199
|
#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
|
@@ -288,8 +222,6 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e
|
|
288
222
|
BYTE* op = (BYTE*)dst;
|
289
223
|
BYTE* const oend = op + length;
|
290
224
|
|
291
|
-
assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN));
|
292
|
-
|
293
225
|
if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) {
|
294
226
|
/* Handle short offset copies. */
|
295
227
|
do {
|
@@ -352,9 +284,9 @@ typedef enum {
|
|
352
284
|
* Private declarations
|
353
285
|
*********************************************/
|
354
286
|
typedef struct seqDef_s {
|
355
|
-
U32
|
287
|
+
U32 offBase; /* offBase == Offset + ZSTD_REP_NUM, or repcode 1,2,3 */
|
356
288
|
U16 litLength;
|
357
|
-
U16 matchLength
|
289
|
+
U16 mlBase; /* mlBase == matchLength - MINMATCH */
|
358
290
|
} seqDef;
|
359
291
|
|
360
292
|
/* Controls whether seqStore has a single "long" litLength or matchLength. See seqStore_t. */
|
@@ -396,7 +328,7 @@ MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore
|
|
396
328
|
{
|
397
329
|
ZSTD_sequenceLength seqLen;
|
398
330
|
seqLen.litLength = seq->litLength;
|
399
|
-
seqLen.matchLength = seq->
|
331
|
+
seqLen.matchLength = seq->mlBase + MINMATCH;
|
400
332
|
if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) {
|
401
333
|
if (seqStore->longLengthType == ZSTD_llt_literalLength) {
|
402
334
|
seqLen.litLength += 0xFFFF;
|
@@ -436,8 +368,14 @@ MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus
|
|
436
368
|
# if STATIC_BMI2 == 1
|
437
369
|
return _lzcnt_u32(val)^31;
|
438
370
|
# else
|
439
|
-
|
440
|
-
|
371
|
+
if (val != 0) {
|
372
|
+
unsigned long r;
|
373
|
+
_BitScanReverse(&r, val);
|
374
|
+
return (unsigned)r;
|
375
|
+
} else {
|
376
|
+
/* Should not reach this code path */
|
377
|
+
__assume(0);
|
378
|
+
}
|
441
379
|
# endif
|
442
380
|
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
|
443
381
|
return __builtin_clz (val) ^ 31;
|
@@ -456,6 +394,63 @@ MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus
|
|
456
394
|
}
|
457
395
|
}
|
458
396
|
|
397
|
+
/**
|
398
|
+
* Counts the number of trailing zeros of a `size_t`.
|
399
|
+
* Most compilers should support CTZ as a builtin. A backup
|
400
|
+
* implementation is provided if the builtin isn't supported, but
|
401
|
+
* it may not be terribly efficient.
|
402
|
+
*/
|
403
|
+
MEM_STATIC unsigned ZSTD_countTrailingZeros(size_t val)
|
404
|
+
{
|
405
|
+
if (MEM_64bits()) {
|
406
|
+
# if defined(_MSC_VER) && defined(_WIN64)
|
407
|
+
# if STATIC_BMI2
|
408
|
+
return _tzcnt_u64(val);
|
409
|
+
# else
|
410
|
+
if (val != 0) {
|
411
|
+
unsigned long r;
|
412
|
+
_BitScanForward64(&r, (U64)val);
|
413
|
+
return (unsigned)r;
|
414
|
+
} else {
|
415
|
+
/* Should not reach this code path */
|
416
|
+
__assume(0);
|
417
|
+
}
|
418
|
+
# endif
|
419
|
+
# elif defined(__GNUC__) && (__GNUC__ >= 4)
|
420
|
+
return __builtin_ctzll((U64)val);
|
421
|
+
# else
|
422
|
+
static const int DeBruijnBytePos[64] = { 0, 1, 2, 7, 3, 13, 8, 19,
|
423
|
+
4, 25, 14, 28, 9, 34, 20, 56,
|
424
|
+
5, 17, 26, 54, 15, 41, 29, 43,
|
425
|
+
10, 31, 38, 35, 21, 45, 49, 57,
|
426
|
+
63, 6, 12, 18, 24, 27, 33, 55,
|
427
|
+
16, 53, 40, 42, 30, 37, 44, 48,
|
428
|
+
62, 11, 23, 32, 52, 39, 36, 47,
|
429
|
+
61, 22, 51, 46, 60, 50, 59, 58 };
|
430
|
+
return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
|
431
|
+
# endif
|
432
|
+
} else { /* 32 bits */
|
433
|
+
# if defined(_MSC_VER)
|
434
|
+
if (val != 0) {
|
435
|
+
unsigned long r;
|
436
|
+
_BitScanForward(&r, (U32)val);
|
437
|
+
return (unsigned)r;
|
438
|
+
} else {
|
439
|
+
/* Should not reach this code path */
|
440
|
+
__assume(0);
|
441
|
+
}
|
442
|
+
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
443
|
+
return __builtin_ctz((U32)val);
|
444
|
+
# else
|
445
|
+
static const int DeBruijnBytePos[32] = { 0, 1, 28, 2, 29, 14, 24, 3,
|
446
|
+
30, 22, 20, 15, 25, 17, 4, 8,
|
447
|
+
31, 27, 13, 23, 21, 19, 16, 7,
|
448
|
+
26, 12, 18, 6, 11, 5, 10, 9 };
|
449
|
+
return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
|
450
|
+
# endif
|
451
|
+
}
|
452
|
+
}
|
453
|
+
|
459
454
|
|
460
455
|
/* ZSTD_invalidateRepCodes() :
|
461
456
|
* ensures next compression will not use repcodes from previous block.
|
@@ -482,6 +477,14 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
|
|
482
477
|
size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
483
478
|
const void* src, size_t srcSize);
|
484
479
|
|
480
|
+
/**
|
481
|
+
* @returns true iff the CPU supports dynamic BMI2 dispatch.
|
482
|
+
*/
|
483
|
+
MEM_STATIC int ZSTD_cpuSupportsBmi2(void)
|
484
|
+
{
|
485
|
+
ZSTD_cpuid_t cpuid = ZSTD_cpuid();
|
486
|
+
return ZSTD_cpuid_bmi1(cpuid) && ZSTD_cpuid_bmi2(cpuid);
|
487
|
+
}
|
485
488
|
|
486
489
|
#if defined (__cplusplus)
|
487
490
|
}
|
@@ -17,10 +17,19 @@ extern "C" {
|
|
17
17
|
|
18
18
|
#include <stddef.h>
|
19
19
|
|
20
|
-
/* weak symbol support
|
21
|
-
|
20
|
+
/* weak symbol support
|
21
|
+
* For now, enable conservatively:
|
22
|
+
* - Only GNUC
|
23
|
+
* - Only ELF
|
24
|
+
* - Only x86-64 and i386
|
25
|
+
* Also, explicitly disable on platforms known not to work so they aren't
|
26
|
+
* forgotten in the future.
|
27
|
+
*/
|
28
|
+
#if !defined(ZSTD_HAVE_WEAK_SYMBOLS) && \
|
29
|
+
defined(__GNUC__) && defined(__ELF__) && \
|
30
|
+
(defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86)) && \
|
22
31
|
!defined(__APPLE__) && !defined(_WIN32) && !defined(__MINGW32__) && \
|
23
|
-
!defined(__CYGWIN__)
|
32
|
+
!defined(__CYGWIN__) && !defined(_AIX)
|
24
33
|
# define ZSTD_HAVE_WEAK_SYMBOLS 1
|
25
34
|
#else
|
26
35
|
# define ZSTD_HAVE_WEAK_SYMBOLS 0
|
@@ -0,0 +1,134 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) Yann Collet, Facebook, Inc.
|
3
|
+
* All rights reserved.
|
4
|
+
*
|
5
|
+
* This source code is licensed under both the BSD-style license (found in the
|
6
|
+
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
7
|
+
* in the COPYING file in the root directory of this source tree).
|
8
|
+
* You may select, at your option, one of the above-listed licenses.
|
9
|
+
*/
|
10
|
+
|
11
|
+
#ifndef ZSTD_CLEVELS_H
|
12
|
+
#define ZSTD_CLEVELS_H
|
13
|
+
|
14
|
+
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressionParameters */
|
15
|
+
#include "../zstd.h"
|
16
|
+
|
17
|
+
/*-===== Pre-defined compression levels =====-*/
|
18
|
+
|
19
|
+
#define ZSTD_MAX_CLEVEL 22
|
20
|
+
|
21
|
+
#ifdef __GNUC__
|
22
|
+
__attribute__((__unused__))
|
23
|
+
#endif
|
24
|
+
|
25
|
+
static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
|
26
|
+
{ /* "default" - for any srcSize > 256 KB */
|
27
|
+
/* W, C, H, S, L, TL, strat */
|
28
|
+
{ 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */
|
29
|
+
{ 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */
|
30
|
+
{ 20, 15, 16, 1, 6, 0, ZSTD_fast }, /* level 2 */
|
31
|
+
{ 21, 16, 17, 1, 5, 0, ZSTD_dfast }, /* level 3 */
|
32
|
+
{ 21, 18, 18, 1, 5, 0, ZSTD_dfast }, /* level 4 */
|
33
|
+
{ 21, 18, 19, 3, 5, 2, ZSTD_greedy }, /* level 5 */
|
34
|
+
{ 21, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6 */
|
35
|
+
{ 21, 19, 20, 4, 5, 8, ZSTD_lazy }, /* level 7 */
|
36
|
+
{ 21, 19, 20, 4, 5, 16, ZSTD_lazy2 }, /* level 8 */
|
37
|
+
{ 22, 20, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */
|
38
|
+
{ 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 10 */
|
39
|
+
{ 22, 21, 22, 6, 5, 16, ZSTD_lazy2 }, /* level 11 */
|
40
|
+
{ 22, 22, 23, 6, 5, 32, ZSTD_lazy2 }, /* level 12 */
|
41
|
+
{ 22, 22, 22, 4, 5, 32, ZSTD_btlazy2 }, /* level 13 */
|
42
|
+
{ 22, 22, 23, 5, 5, 32, ZSTD_btlazy2 }, /* level 14 */
|
43
|
+
{ 22, 23, 23, 6, 5, 32, ZSTD_btlazy2 }, /* level 15 */
|
44
|
+
{ 22, 22, 22, 5, 5, 48, ZSTD_btopt }, /* level 16 */
|
45
|
+
{ 23, 23, 22, 5, 4, 64, ZSTD_btopt }, /* level 17 */
|
46
|
+
{ 23, 23, 22, 6, 3, 64, ZSTD_btultra }, /* level 18 */
|
47
|
+
{ 23, 24, 22, 7, 3,256, ZSTD_btultra2}, /* level 19 */
|
48
|
+
{ 25, 25, 23, 7, 3,256, ZSTD_btultra2}, /* level 20 */
|
49
|
+
{ 26, 26, 24, 7, 3,512, ZSTD_btultra2}, /* level 21 */
|
50
|
+
{ 27, 27, 25, 9, 3,999, ZSTD_btultra2}, /* level 22 */
|
51
|
+
},
|
52
|
+
{ /* for srcSize <= 256 KB */
|
53
|
+
/* W, C, H, S, L, T, strat */
|
54
|
+
{ 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
|
55
|
+
{ 18, 13, 14, 1, 6, 0, ZSTD_fast }, /* level 1 */
|
56
|
+
{ 18, 14, 14, 1, 5, 0, ZSTD_dfast }, /* level 2 */
|
57
|
+
{ 18, 16, 16, 1, 4, 0, ZSTD_dfast }, /* level 3 */
|
58
|
+
{ 18, 16, 17, 3, 5, 2, ZSTD_greedy }, /* level 4.*/
|
59
|
+
{ 18, 17, 18, 5, 5, 2, ZSTD_greedy }, /* level 5.*/
|
60
|
+
{ 18, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6.*/
|
61
|
+
{ 18, 18, 19, 4, 4, 4, ZSTD_lazy }, /* level 7 */
|
62
|
+
{ 18, 18, 19, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
|
63
|
+
{ 18, 18, 19, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
|
64
|
+
{ 18, 18, 19, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
|
65
|
+
{ 18, 18, 19, 5, 4, 12, ZSTD_btlazy2 }, /* level 11.*/
|
66
|
+
{ 18, 19, 19, 7, 4, 12, ZSTD_btlazy2 }, /* level 12.*/
|
67
|
+
{ 18, 18, 19, 4, 4, 16, ZSTD_btopt }, /* level 13 */
|
68
|
+
{ 18, 18, 19, 4, 3, 32, ZSTD_btopt }, /* level 14.*/
|
69
|
+
{ 18, 18, 19, 6, 3,128, ZSTD_btopt }, /* level 15.*/
|
70
|
+
{ 18, 19, 19, 6, 3,128, ZSTD_btultra }, /* level 16.*/
|
71
|
+
{ 18, 19, 19, 8, 3,256, ZSTD_btultra }, /* level 17.*/
|
72
|
+
{ 18, 19, 19, 6, 3,128, ZSTD_btultra2}, /* level 18.*/
|
73
|
+
{ 18, 19, 19, 8, 3,256, ZSTD_btultra2}, /* level 19.*/
|
74
|
+
{ 18, 19, 19, 10, 3,512, ZSTD_btultra2}, /* level 20.*/
|
75
|
+
{ 18, 19, 19, 12, 3,512, ZSTD_btultra2}, /* level 21.*/
|
76
|
+
{ 18, 19, 19, 13, 3,999, ZSTD_btultra2}, /* level 22.*/
|
77
|
+
},
|
78
|
+
{ /* for srcSize <= 128 KB */
|
79
|
+
/* W, C, H, S, L, T, strat */
|
80
|
+
{ 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
|
81
|
+
{ 17, 12, 13, 1, 6, 0, ZSTD_fast }, /* level 1 */
|
82
|
+
{ 17, 13, 15, 1, 5, 0, ZSTD_fast }, /* level 2 */
|
83
|
+
{ 17, 15, 16, 2, 5, 0, ZSTD_dfast }, /* level 3 */
|
84
|
+
{ 17, 17, 17, 2, 4, 0, ZSTD_dfast }, /* level 4 */
|
85
|
+
{ 17, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */
|
86
|
+
{ 17, 16, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */
|
87
|
+
{ 17, 16, 17, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */
|
88
|
+
{ 17, 16, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
|
89
|
+
{ 17, 16, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
|
90
|
+
{ 17, 16, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
|
91
|
+
{ 17, 17, 17, 5, 4, 8, ZSTD_btlazy2 }, /* level 11 */
|
92
|
+
{ 17, 18, 17, 7, 4, 12, ZSTD_btlazy2 }, /* level 12 */
|
93
|
+
{ 17, 18, 17, 3, 4, 12, ZSTD_btopt }, /* level 13.*/
|
94
|
+
{ 17, 18, 17, 4, 3, 32, ZSTD_btopt }, /* level 14.*/
|
95
|
+
{ 17, 18, 17, 6, 3,256, ZSTD_btopt }, /* level 15.*/
|
96
|
+
{ 17, 18, 17, 6, 3,128, ZSTD_btultra }, /* level 16.*/
|
97
|
+
{ 17, 18, 17, 8, 3,256, ZSTD_btultra }, /* level 17.*/
|
98
|
+
{ 17, 18, 17, 10, 3,512, ZSTD_btultra }, /* level 18.*/
|
99
|
+
{ 17, 18, 17, 5, 3,256, ZSTD_btultra2}, /* level 19.*/
|
100
|
+
{ 17, 18, 17, 7, 3,512, ZSTD_btultra2}, /* level 20.*/
|
101
|
+
{ 17, 18, 17, 9, 3,512, ZSTD_btultra2}, /* level 21.*/
|
102
|
+
{ 17, 18, 17, 11, 3,999, ZSTD_btultra2}, /* level 22.*/
|
103
|
+
},
|
104
|
+
{ /* for srcSize <= 16 KB */
|
105
|
+
/* W, C, H, S, L, T, strat */
|
106
|
+
{ 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
|
107
|
+
{ 14, 14, 15, 1, 5, 0, ZSTD_fast }, /* level 1 */
|
108
|
+
{ 14, 14, 15, 1, 4, 0, ZSTD_fast }, /* level 2 */
|
109
|
+
{ 14, 14, 15, 2, 4, 0, ZSTD_dfast }, /* level 3 */
|
110
|
+
{ 14, 14, 14, 4, 4, 2, ZSTD_greedy }, /* level 4 */
|
111
|
+
{ 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 5.*/
|
112
|
+
{ 14, 14, 14, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */
|
113
|
+
{ 14, 14, 14, 6, 4, 8, ZSTD_lazy2 }, /* level 7 */
|
114
|
+
{ 14, 14, 14, 8, 4, 8, ZSTD_lazy2 }, /* level 8.*/
|
115
|
+
{ 14, 15, 14, 5, 4, 8, ZSTD_btlazy2 }, /* level 9.*/
|
116
|
+
{ 14, 15, 14, 9, 4, 8, ZSTD_btlazy2 }, /* level 10.*/
|
117
|
+
{ 14, 15, 14, 3, 4, 12, ZSTD_btopt }, /* level 11.*/
|
118
|
+
{ 14, 15, 14, 4, 3, 24, ZSTD_btopt }, /* level 12.*/
|
119
|
+
{ 14, 15, 14, 5, 3, 32, ZSTD_btultra }, /* level 13.*/
|
120
|
+
{ 14, 15, 15, 6, 3, 64, ZSTD_btultra }, /* level 14.*/
|
121
|
+
{ 14, 15, 15, 7, 3,256, ZSTD_btultra }, /* level 15.*/
|
122
|
+
{ 14, 15, 15, 5, 3, 48, ZSTD_btultra2}, /* level 16.*/
|
123
|
+
{ 14, 15, 15, 6, 3,128, ZSTD_btultra2}, /* level 17.*/
|
124
|
+
{ 14, 15, 15, 7, 3,256, ZSTD_btultra2}, /* level 18.*/
|
125
|
+
{ 14, 15, 15, 8, 3,256, ZSTD_btultra2}, /* level 19.*/
|
126
|
+
{ 14, 15, 15, 8, 3,512, ZSTD_btultra2}, /* level 20.*/
|
127
|
+
{ 14, 15, 15, 9, 3,512, ZSTD_btultra2}, /* level 21.*/
|
128
|
+
{ 14, 15, 15, 10, 3,999, ZSTD_btultra2}, /* level 22.*/
|
129
|
+
},
|
130
|
+
};
|
131
|
+
|
132
|
+
|
133
|
+
|
134
|
+
#endif /* ZSTD_CLEVELS_H */
|
@@ -75,13 +75,14 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
|
|
75
75
|
void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;
|
76
76
|
FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
|
77
77
|
U32 const step = FSE_TABLESTEP(tableSize);
|
78
|
+
U32 const maxSV1 = maxSymbolValue+1;
|
78
79
|
|
79
|
-
|
80
|
-
FSE_FUNCTION_TYPE* tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (
|
80
|
+
U16* cumul = (U16*)workSpace; /* size = maxSV1 */
|
81
|
+
FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSV1+1)); /* size = tableSize */
|
81
82
|
|
82
83
|
U32 highThreshold = tableSize-1;
|
83
84
|
|
84
|
-
|
85
|
+
assert(((size_t)workSpace & 1) == 0); /* Must be 2 bytes-aligned */
|
85
86
|
if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge);
|
86
87
|
/* CTable header */
|
87
88
|
tableU16[-2] = (U16) tableLog;
|
@@ -98,20 +99,61 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
|
|
98
99
|
/* symbol start positions */
|
99
100
|
{ U32 u;
|
100
101
|
cumul[0] = 0;
|
101
|
-
for (u=1; u <=
|
102
|
+
for (u=1; u <= maxSV1; u++) {
|
102
103
|
if (normalizedCounter[u-1]==-1) { /* Low proba symbol */
|
103
104
|
cumul[u] = cumul[u-1] + 1;
|
104
105
|
tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);
|
105
106
|
} else {
|
106
|
-
|
107
|
+
assert(normalizedCounter[u-1] >= 0);
|
108
|
+
cumul[u] = cumul[u-1] + (U16)normalizedCounter[u-1];
|
109
|
+
assert(cumul[u] >= cumul[u-1]); /* no overflow */
|
107
110
|
} }
|
108
|
-
cumul[
|
111
|
+
cumul[maxSV1] = (U16)(tableSize+1);
|
109
112
|
}
|
110
113
|
|
111
114
|
/* Spread symbols */
|
112
|
-
|
115
|
+
if (highThreshold == tableSize - 1) {
|
116
|
+
/* Case for no low prob count symbols. Lay down 8 bytes at a time
|
117
|
+
* to reduce branch misses since we are operating on a small block
|
118
|
+
*/
|
119
|
+
BYTE* const spread = tableSymbol + tableSize; /* size = tableSize + 8 (may write beyond tableSize) */
|
120
|
+
{ U64 const add = 0x0101010101010101ull;
|
121
|
+
size_t pos = 0;
|
122
|
+
U64 sv = 0;
|
123
|
+
U32 s;
|
124
|
+
for (s=0; s<maxSV1; ++s, sv += add) {
|
125
|
+
int i;
|
126
|
+
int const n = normalizedCounter[s];
|
127
|
+
MEM_write64(spread + pos, sv);
|
128
|
+
for (i = 8; i < n; i += 8) {
|
129
|
+
MEM_write64(spread + pos + i, sv);
|
130
|
+
}
|
131
|
+
assert(n>=0);
|
132
|
+
pos += (size_t)n;
|
133
|
+
}
|
134
|
+
}
|
135
|
+
/* Spread symbols across the table. Lack of lowprob symbols means that
|
136
|
+
* we don't need variable sized inner loop, so we can unroll the loop and
|
137
|
+
* reduce branch misses.
|
138
|
+
*/
|
139
|
+
{ size_t position = 0;
|
140
|
+
size_t s;
|
141
|
+
size_t const unroll = 2; /* Experimentally determined optimal unroll */
|
142
|
+
assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
|
143
|
+
for (s = 0; s < (size_t)tableSize; s += unroll) {
|
144
|
+
size_t u;
|
145
|
+
for (u = 0; u < unroll; ++u) {
|
146
|
+
size_t const uPosition = (position + (u * step)) & tableMask;
|
147
|
+
tableSymbol[uPosition] = spread[s + u];
|
148
|
+
}
|
149
|
+
position = (position + (unroll * step)) & tableMask;
|
150
|
+
}
|
151
|
+
assert(position == 0); /* Must have initialized all positions */
|
152
|
+
}
|
153
|
+
} else {
|
154
|
+
U32 position = 0;
|
113
155
|
U32 symbol;
|
114
|
-
for (symbol=0; symbol
|
156
|
+
for (symbol=0; symbol<maxSV1; symbol++) {
|
115
157
|
int nbOccurrences;
|
116
158
|
int const freq = normalizedCounter[symbol];
|
117
159
|
for (nbOccurrences=0; nbOccurrences<freq; nbOccurrences++) {
|
@@ -120,7 +162,6 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
|
|
120
162
|
while (position > highThreshold)
|
121
163
|
position = (position + step) & tableMask; /* Low proba area */
|
122
164
|
} }
|
123
|
-
|
124
165
|
assert(position==0); /* Must have initialized all positions */
|
125
166
|
}
|
126
167
|
|
@@ -144,16 +185,17 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
|
|
144
185
|
case -1:
|
145
186
|
case 1:
|
146
187
|
symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog);
|
147
|
-
|
188
|
+
assert(total <= INT_MAX);
|
189
|
+
symbolTT[s].deltaFindState = (int)(total - 1);
|
148
190
|
total ++;
|
149
191
|
break;
|
150
192
|
default :
|
151
|
-
|
152
|
-
|
153
|
-
U32 const minStatePlus = normalizedCounter[s] << maxBitsOut;
|
193
|
+
assert(normalizedCounter[s] > 1);
|
194
|
+
{ U32 const maxBitsOut = tableLog - BIT_highbit32 ((U32)normalizedCounter[s]-1);
|
195
|
+
U32 const minStatePlus = (U32)normalizedCounter[s] << maxBitsOut;
|
154
196
|
symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
|
155
|
-
symbolTT[s].deltaFindState = total - normalizedCounter[s];
|
156
|
-
total += normalizedCounter[s];
|
197
|
+
symbolTT[s].deltaFindState = (int)(total - (unsigned)normalizedCounter[s]);
|
198
|
+
total += (unsigned)normalizedCounter[s];
|
157
199
|
} } } }
|
158
200
|
|
159
201
|
#if 0 /* debug : symbol costs */
|
@@ -164,32 +206,26 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
|
|
164
206
|
symbol, normalizedCounter[symbol],
|
165
207
|
FSE_getMaxNbBits(symbolTT, symbol),
|
166
208
|
(double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256);
|
167
|
-
|
168
|
-
}
|
209
|
+
} }
|
169
210
|
#endif
|
170
211
|
|
171
212
|
return 0;
|
172
213
|
}
|
173
214
|
|
174
|
-
#ifndef ZSTD_NO_UNUSED_FUNCTIONS
|
175
|
-
size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
|
176
|
-
{
|
177
|
-
FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* memset() is not necessary, even if static analyzer complain about it */
|
178
|
-
return FSE_buildCTable_wksp(ct, normalizedCounter, maxSymbolValue, tableLog, tableSymbol, sizeof(tableSymbol));
|
179
|
-
}
|
180
|
-
#endif
|
181
|
-
|
182
215
|
|
183
216
|
|
184
217
|
#ifndef FSE_COMMONDEFS_ONLY
|
185
218
|
|
186
|
-
|
187
219
|
/*-**************************************************************
|
188
220
|
* FSE NCount encoding
|
189
221
|
****************************************************************/
|
190
222
|
size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
|
191
223
|
{
|
192
|
-
size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog
|
224
|
+
size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog
|
225
|
+
+ 4 /* bitCount initialized at 4 */
|
226
|
+
+ 2 /* first two symbols may use one additional bit each */) / 8)
|
227
|
+
+ 1 /* round up to whole nb bytes */
|
228
|
+
+ 2 /* additional two bytes for bitstream flush */;
|
193
229
|
return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
|
194
230
|
}
|
195
231
|
|
File without changes
|
File without changes
|