zstd-ruby 1.3.5.0 → 1.3.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +4 -2
- data/README.md +2 -1
- data/ext/zstdruby/libzstd/BUCK +1 -0
- data/ext/zstdruby/libzstd/Makefile +25 -13
- data/ext/zstdruby/libzstd/README.md +11 -10
- data/ext/zstdruby/libzstd/common/bitstream.h +8 -11
- data/ext/zstdruby/libzstd/common/compiler.h +30 -8
- data/ext/zstdruby/libzstd/common/cpu.h +1 -1
- data/ext/zstdruby/libzstd/common/mem.h +20 -2
- data/ext/zstdruby/libzstd/common/xxhash.c +1 -0
- data/ext/zstdruby/libzstd/common/zstd_internal.h +3 -2
- data/ext/zstdruby/libzstd/compress/fse_compress.c +55 -48
- data/ext/zstdruby/libzstd/compress/hist.h +1 -1
- data/ext/zstdruby/libzstd/compress/huf_compress.c +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +290 -147
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +5 -2
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +63 -51
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +3 -4
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +44 -33
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +3 -4
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +125 -116
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +13 -15
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +9 -11
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +0 -1
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +42 -36
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +8 -9
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +96 -51
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +16 -6
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +3 -3
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +169 -101
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +111 -87
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +83 -0
- data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +3 -3
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +728 -0
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +34 -31
- data/ext/zstdruby/libzstd/dictBuilder/zdict.h +60 -5
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +9 -3
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +6 -0
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +6 -0
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +1 -5
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +12 -9
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +10 -10
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +20 -18
- data/ext/zstdruby/libzstd/zstd.h +109 -50
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 35d073986861ccb9bdb6a87e3f7335dd5d08249913c668a6d6279aadcfca7551
|
4
|
+
data.tar.gz: 315406589d9487c367edd70a2dc9c090dd796b0764fb1d4200a32b4375c430bc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4395106140fe83001703df637cb1632f7ef17cba187d9935c385d30b8c3a06a8688d9a169f4c9cbd1cb1d53463b895c1e414503f8030b7115ff8e612510cba9b
|
7
|
+
data.tar.gz: ab48fc82d7d1eef1179242839d93ea3cd2eb88e27b3dc4957d3457a4cf125bf0764f48b2c28ae626346bb3866c6a81114da2de6c75547f80a4e1589dc8032689
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
[](https://badge.fury.io/rb/zstd-ruby)
|
1
2
|
[](https://travis-ci.org/SpringMT/zstd-ruby)
|
2
3
|
|
3
4
|
# zstd-ruby
|
@@ -9,7 +10,7 @@ See https://github.com/facebook/zstd
|
|
9
10
|
Fork from https://github.com/jarredholman/ruby-zstd.
|
10
11
|
|
11
12
|
## Zstd version
|
12
|
-
v1.3.
|
13
|
+
v1.3.7 (https://github.com/facebook/zstd/tree/v1.3.7)
|
13
14
|
|
14
15
|
## Installation
|
15
16
|
|
data/ext/zstdruby/libzstd/BUCK
CHANGED
@@ -19,15 +19,20 @@ LIBVER := $(shell echo $(LIBVER_SCRIPT))
|
|
19
19
|
VERSION?= $(LIBVER)
|
20
20
|
|
21
21
|
CPPFLAGS+= -I. -I./common -DXXH_NAMESPACE=ZSTD_
|
22
|
+
ifeq ($(OS),Windows_NT) # MinGW assumed
|
23
|
+
CPPFLAGS += -D__USE_MINGW_ANSI_STDIO # compatibility with %zu formatting
|
24
|
+
endif
|
22
25
|
CFLAGS ?= -O3
|
23
|
-
DEBUGFLAGS
|
26
|
+
DEBUGFLAGS= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
|
24
27
|
-Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
|
25
28
|
-Wstrict-prototypes -Wundef -Wpointer-arith -Wformat-security \
|
26
29
|
-Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
|
27
|
-
-Wredundant-decls
|
30
|
+
-Wredundant-decls -Wmissing-prototypes
|
28
31
|
CFLAGS += $(DEBUGFLAGS) $(MOREFLAGS)
|
29
32
|
FLAGS = $(CPPFLAGS) $(CFLAGS)
|
30
33
|
|
34
|
+
GREP = grep --color=never
|
35
|
+
|
31
36
|
ZSTDCOMMON_FILES := $(sort $(wildcard common/*.c))
|
32
37
|
ZSTDCOMP_FILES := $(sort $(wildcard compress/*.c))
|
33
38
|
ZSTDDECOMP_FILES := $(sort $(wildcard decompress/*.c))
|
@@ -35,7 +40,7 @@ ZDICT_FILES := $(sort $(wildcard dictBuilder/*.c))
|
|
35
40
|
ZDEPR_FILES := $(sort $(wildcard deprecated/*.c))
|
36
41
|
ZSTD_FILES := $(ZSTDCOMMON_FILES)
|
37
42
|
|
38
|
-
ZSTD_LEGACY_SUPPORT ?=
|
43
|
+
ZSTD_LEGACY_SUPPORT ?= 5
|
39
44
|
ZSTD_LIB_COMPRESSION ?= 1
|
40
45
|
ZSTD_LIB_DECOMPRESSION ?= 1
|
41
46
|
ZSTD_LIB_DICTBUILDER ?= 1
|
@@ -52,11 +57,11 @@ ifeq ($(ZSTD_LIB_DECOMPRESSION), 0)
|
|
52
57
|
endif
|
53
58
|
|
54
59
|
ifneq ($(ZSTD_LIB_COMPRESSION), 0)
|
55
|
-
ZSTD_FILES += $(ZSTDCOMP_FILES)
|
60
|
+
ZSTD_FILES += $(ZSTDCOMP_FILES)
|
56
61
|
endif
|
57
62
|
|
58
63
|
ifneq ($(ZSTD_LIB_DECOMPRESSION), 0)
|
59
|
-
ZSTD_FILES += $(ZSTDDECOMP_FILES)
|
64
|
+
ZSTD_FILES += $(ZSTDDECOMP_FILES)
|
60
65
|
endif
|
61
66
|
|
62
67
|
ifneq ($(ZSTD_LIB_DEPRECATED), 0)
|
@@ -69,7 +74,7 @@ endif
|
|
69
74
|
|
70
75
|
ifneq ($(ZSTD_LEGACY_SUPPORT), 0)
|
71
76
|
ifeq ($(shell test $(ZSTD_LEGACY_SUPPORT) -lt 8; echo $$?), 0)
|
72
|
-
ZSTD_FILES += $(shell ls legacy/*.c |
|
77
|
+
ZSTD_FILES += $(shell ls legacy/*.c | $(GREP) 'v0[$(ZSTD_LEGACY_SUPPORT)-7]')
|
73
78
|
endif
|
74
79
|
CPPFLAGS += -I./legacy
|
75
80
|
endif
|
@@ -91,8 +96,6 @@ else
|
|
91
96
|
SHARED_EXT_VER = $(SHARED_EXT).$(LIBVER)
|
92
97
|
endif
|
93
98
|
|
94
|
-
LIBZSTD = libzstd.$(SHARED_EXT_VER)
|
95
|
-
|
96
99
|
|
97
100
|
.PHONY: default all clean install uninstall
|
98
101
|
|
@@ -108,19 +111,28 @@ libzstd.a: $(ZSTD_OBJ)
|
|
108
111
|
libzstd.a-mt: CPPFLAGS += -DZSTD_MULTITHREAD
|
109
112
|
libzstd.a-mt: libzstd.a
|
110
113
|
|
111
|
-
|
114
|
+
ifneq (,$(filter Windows%,$(OS)))
|
115
|
+
|
116
|
+
LIBZSTD = dll\libzstd.dll
|
112
117
|
$(LIBZSTD): $(ZSTD_FILES)
|
113
118
|
@echo compiling dynamic library $(LIBVER)
|
114
|
-
|
115
|
-
|
116
|
-
|
119
|
+
@$(CC) $(FLAGS) -DZSTD_DLL_EXPORT=1 -shared $^ -o $@
|
120
|
+
dlltool -D $@ -d dll\libzstd.def -l dll\libzstd.lib
|
121
|
+
|
117
122
|
else
|
123
|
+
|
124
|
+
LIBZSTD = libzstd.$(SHARED_EXT_VER)
|
125
|
+
$(LIBZSTD): LDFLAGS += -shared -fPIC -fvisibility=hidden
|
126
|
+
$(LIBZSTD): $(ZSTD_FILES)
|
127
|
+
@echo compiling dynamic library $(LIBVER)
|
118
128
|
@$(CC) $(FLAGS) $^ $(LDFLAGS) $(SONAME_FLAGS) -o $@
|
119
129
|
@echo creating versioned links
|
120
130
|
@ln -sf $@ libzstd.$(SHARED_EXT_MAJOR)
|
121
131
|
@ln -sf $@ libzstd.$(SHARED_EXT)
|
132
|
+
|
122
133
|
endif
|
123
134
|
|
135
|
+
|
124
136
|
libzstd : $(LIBZSTD)
|
125
137
|
|
126
138
|
libzstd-mt : CPPFLAGS += -DZSTD_MULTITHREAD
|
@@ -154,7 +166,7 @@ clean:
|
|
154
166
|
#-----------------------------------------------------------------------------
|
155
167
|
# make install is validated only for Linux, macOS, BSD, Hurd and Solaris targets
|
156
168
|
#-----------------------------------------------------------------------------
|
157
|
-
ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS))
|
169
|
+
ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku))
|
158
170
|
|
159
171
|
DESTDIR ?=
|
160
172
|
# directory variables : GNU conventions prefer lowercase
|
@@ -13,7 +13,7 @@ including commands variables, staged install, directory variables and standard t
|
|
13
13
|
- `make install` : install libraries in default system directories
|
14
14
|
|
15
15
|
`libzstd` default scope includes compression, decompression, dictionary building,
|
16
|
-
and decoding support for legacy formats >= v0.
|
16
|
+
and decoding support for legacy formats >= v0.5.0.
|
17
17
|
|
18
18
|
|
19
19
|
#### API
|
@@ -48,23 +48,24 @@ It's possible to compile only a limited set of features.
|
|
48
48
|
This module depends on both `lib/common` and `lib/compress` .
|
49
49
|
- `lib/legacy` : source code to decompress legacy zstd formats, starting from `v0.1.0`.
|
50
50
|
This module depends on `lib/common` and `lib/decompress`.
|
51
|
-
To enable this feature,
|
52
|
-
|
53
|
-
Using higher number limits versions supported.
|
51
|
+
To enable this feature, define `ZSTD_LEGACY_SUPPORT` during compilation.
|
52
|
+
Specifying a number limits versions supported to that version onward.
|
54
53
|
For example, `ZSTD_LEGACY_SUPPORT=2` means : "support legacy formats >= v0.2.0".
|
55
54
|
`ZSTD_LEGACY_SUPPORT=3` means : "support legacy formats >= v0.3.0", and so on.
|
56
|
-
|
57
|
-
|
58
|
-
|
55
|
+
Currently, the default library setting is `ZST_LEGACY_SUPPORT=5`.
|
56
|
+
It can be changed at build by any other value.
|
57
|
+
Note that any number >= 8 translates into "do __not__ support legacy formats",
|
58
|
+
since all versions of `zstd` >= v0.8 are compatible with v1+ specification.
|
59
|
+
`ZSTD_LEGACY_SUPPORT=0` also means "do __not__ support legacy formats".
|
59
60
|
Once enabled, this capability is transparently triggered within decompression functions.
|
60
61
|
It's also possible to invoke directly legacy API, as exposed in `lib/legacy/zstd_legacy.h`.
|
61
62
|
Each version also provides an additional dedicated set of advanced API.
|
62
63
|
For example, advanced API for version `v0.4` is exposed in `lib/legacy/zstd_v04.h` .
|
63
64
|
Note : `lib/legacy` only supports _decoding_ legacy formats.
|
64
|
-
- Similarly, you can define `ZSTD_LIB_COMPRESSION, ZSTD_LIB_DECOMPRESSION`, `ZSTD_LIB_DICTBUILDER`,
|
65
|
-
and `ZSTD_LIB_DEPRECATED` as 0 to forgo compilation of the corresponding features. This will
|
65
|
+
- Similarly, you can define `ZSTD_LIB_COMPRESSION, ZSTD_LIB_DECOMPRESSION`, `ZSTD_LIB_DICTBUILDER`,
|
66
|
+
and `ZSTD_LIB_DEPRECATED` as 0 to forgo compilation of the corresponding features. This will
|
66
67
|
also disable compilation of all dependencies (eg. `ZSTD_LIB_COMPRESSION=0` will also disable
|
67
|
-
dictBuilder).
|
68
|
+
dictBuilder).
|
68
69
|
|
69
70
|
|
70
71
|
#### Multithreading support
|
@@ -339,17 +339,10 @@ MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
|
|
339
339
|
|
340
340
|
MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
|
341
341
|
{
|
342
|
-
|
343
|
-
|
344
|
-
if (sizeof(bitContainer)==8)
|
345
|
-
return _bextr_u64(bitContainer, start, nbBits);
|
346
|
-
else
|
347
|
-
# endif
|
348
|
-
return _bextr_u32(bitContainer, start, nbBits);
|
349
|
-
#else
|
342
|
+
U32 const regMask = sizeof(bitContainer)*8 - 1;
|
343
|
+
/* if start > regMask, bitstream is corrupted, and result is undefined */
|
350
344
|
assert(nbBits < BIT_MASK_SIZE);
|
351
|
-
return (bitContainer >> start) & BIT_mask[nbBits];
|
352
|
-
#endif
|
345
|
+
return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
|
353
346
|
}
|
354
347
|
|
355
348
|
MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
|
@@ -366,9 +359,13 @@ MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
|
|
366
359
|
* @return : value extracted */
|
367
360
|
MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
|
368
361
|
{
|
369
|
-
|
362
|
+
/* arbitrate between double-shift and shift+mask */
|
363
|
+
#if 1
|
364
|
+
/* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8,
|
365
|
+
* bitstream is likely corrupted, and result is undefined */
|
370
366
|
return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
|
371
367
|
#else
|
368
|
+
/* this code path is slower on my os-x laptop */
|
372
369
|
U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
|
373
370
|
return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask);
|
374
371
|
#endif
|
@@ -88,15 +88,37 @@
|
|
88
88
|
#endif
|
89
89
|
#endif
|
90
90
|
|
91
|
-
/* prefetch
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
91
|
+
/* prefetch
|
92
|
+
* can be disabled, by declaring NO_PREFETCH macro
|
93
|
+
* All prefetch invocations use a single default locality 2,
|
94
|
+
* generating instruction prefetcht1,
|
95
|
+
* which, according to Intel, means "load data into L2 cache".
|
96
|
+
* This is a good enough "middle ground" for the time being,
|
97
|
+
* though in theory, it would be better to specialize locality depending on data being prefetched.
|
98
|
+
* Tests could not determine any sensible difference based on locality value. */
|
99
|
+
#if defined(NO_PREFETCH)
|
100
|
+
# define PREFETCH(ptr) (void)(ptr) /* disabled */
|
97
101
|
#else
|
98
|
-
#
|
99
|
-
#
|
102
|
+
# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
|
103
|
+
# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
|
104
|
+
# define PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
|
105
|
+
# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
|
106
|
+
# define PREFETCH(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
|
107
|
+
# else
|
108
|
+
# define PREFETCH(ptr) (void)(ptr) /* disabled */
|
109
|
+
# endif
|
110
|
+
#endif /* NO_PREFETCH */
|
111
|
+
|
112
|
+
#define CACHELINE_SIZE 64
|
113
|
+
|
114
|
+
#define PREFETCH_AREA(p, s) { \
|
115
|
+
const char* const _ptr = (const char*)(p); \
|
116
|
+
size_t const _size = (size_t)(s); \
|
117
|
+
size_t _pos; \
|
118
|
+
for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \
|
119
|
+
PREFETCH(_ptr + _pos); \
|
120
|
+
} \
|
121
|
+
}
|
100
122
|
|
101
123
|
/* disable warnings */
|
102
124
|
#ifdef _MSC_VER /* Visual Studio */
|
@@ -39,6 +39,10 @@ extern "C" {
|
|
39
39
|
# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */
|
40
40
|
#endif
|
41
41
|
|
42
|
+
#ifndef __has_builtin
|
43
|
+
# define __has_builtin(x) 0 /* compat. with non-clang compilers */
|
44
|
+
#endif
|
45
|
+
|
42
46
|
/* code only tested on 32 and 64 bits systems */
|
43
47
|
#define MEM_STATIC_ASSERT(c) { enum { MEM_static_assert = 1/(int)(!!(c)) }; }
|
44
48
|
MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
|
@@ -57,11 +61,23 @@ MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (size
|
|
57
61
|
typedef uint64_t U64;
|
58
62
|
typedef int64_t S64;
|
59
63
|
#else
|
64
|
+
# include <limits.h>
|
65
|
+
#if CHAR_BIT != 8
|
66
|
+
# error "this implementation requires char to be exactly 8-bit type"
|
67
|
+
#endif
|
60
68
|
typedef unsigned char BYTE;
|
69
|
+
#if USHRT_MAX != 65535
|
70
|
+
# error "this implementation requires short to be exactly 16-bit type"
|
71
|
+
#endif
|
61
72
|
typedef unsigned short U16;
|
62
73
|
typedef signed short S16;
|
74
|
+
#if UINT_MAX != 4294967295
|
75
|
+
# error "this implementation requires int to be exactly 32-bit type"
|
76
|
+
#endif
|
63
77
|
typedef unsigned int U32;
|
64
78
|
typedef signed int S32;
|
79
|
+
/* note : there are no limits defined for long long type in C90.
|
80
|
+
* limits exist in C99, however, in such case, <stdint.h> is preferred */
|
65
81
|
typedef unsigned long long U64;
|
66
82
|
typedef signed long long S64;
|
67
83
|
#endif
|
@@ -186,7 +202,8 @@ MEM_STATIC U32 MEM_swap32(U32 in)
|
|
186
202
|
{
|
187
203
|
#if defined(_MSC_VER) /* Visual Studio */
|
188
204
|
return _byteswap_ulong(in);
|
189
|
-
#elif defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)
|
205
|
+
#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \
|
206
|
+
|| (defined(__clang__) && __has_builtin(__builtin_bswap32))
|
190
207
|
return __builtin_bswap32(in);
|
191
208
|
#else
|
192
209
|
return ((in << 24) & 0xff000000 ) |
|
@@ -200,7 +217,8 @@ MEM_STATIC U64 MEM_swap64(U64 in)
|
|
200
217
|
{
|
201
218
|
#if defined(_MSC_VER) /* Visual Studio */
|
202
219
|
return _byteswap_uint64(in);
|
203
|
-
#elif defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)
|
220
|
+
#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \
|
221
|
+
|| (defined(__clang__) && __has_builtin(__builtin_bswap64))
|
204
222
|
return __builtin_bswap64(in);
|
205
223
|
#else
|
206
224
|
return ((in << 56) & 0xff00000000000000ULL) |
|
@@ -98,6 +98,7 @@
|
|
98
98
|
/* Modify the local functions below should you wish to use some other memory routines */
|
99
99
|
/* for malloc(), free() */
|
100
100
|
#include <stdlib.h>
|
101
|
+
#include <stddef.h> /* size_t */
|
101
102
|
static void* XXH_malloc(size_t s) { return malloc(s); }
|
102
103
|
static void XXH_free (void* p) { free(p); }
|
103
104
|
/* for memcpy() */
|
@@ -79,8 +79,7 @@ static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
|
|
79
79
|
static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
|
80
80
|
static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
|
81
81
|
|
82
|
-
#define ZSTD_FRAMEIDSIZE 4
|
83
|
-
static const size_t ZSTD_frameIdSize = ZSTD_FRAMEIDSIZE; /* magic number size */
|
82
|
+
#define ZSTD_FRAMEIDSIZE 4 /* magic number size */
|
84
83
|
|
85
84
|
#define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
|
86
85
|
static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
|
@@ -193,6 +192,8 @@ typedef struct {
|
|
193
192
|
BYTE* llCode;
|
194
193
|
BYTE* mlCode;
|
195
194
|
BYTE* ofCode;
|
195
|
+
size_t maxNbSeq;
|
196
|
+
size_t maxNbLit;
|
196
197
|
U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */
|
197
198
|
U32 longLengthPos;
|
198
199
|
} seqStore_t;
|
@@ -83,7 +83,9 @@
|
|
83
83
|
* wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)`
|
84
84
|
* workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements
|
85
85
|
*/
|
86
|
-
size_t FSE_buildCTable_wksp(FSE_CTable* ct,
|
86
|
+
size_t FSE_buildCTable_wksp(FSE_CTable* ct,
|
87
|
+
const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
|
88
|
+
void* workSpace, size_t wkspSize)
|
87
89
|
{
|
88
90
|
U32 const tableSize = 1 << tableLog;
|
89
91
|
U32 const tableMask = tableSize - 1;
|
@@ -101,10 +103,14 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsi
|
|
101
103
|
if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge);
|
102
104
|
tableU16[-2] = (U16) tableLog;
|
103
105
|
tableU16[-1] = (U16) maxSymbolValue;
|
104
|
-
assert(tableLog < 16); /* required for
|
106
|
+
assert(tableLog < 16); /* required for threshold strategy to work */
|
105
107
|
|
106
108
|
/* For explanations on how to distribute symbol values over the table :
|
107
|
-
|
109
|
+
* http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
|
110
|
+
|
111
|
+
#ifdef __clang_analyzer__
|
112
|
+
memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */
|
113
|
+
#endif
|
108
114
|
|
109
115
|
/* symbol start positions */
|
110
116
|
{ U32 u;
|
@@ -124,13 +130,15 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsi
|
|
124
130
|
U32 symbol;
|
125
131
|
for (symbol=0; symbol<=maxSymbolValue; symbol++) {
|
126
132
|
int nbOccurences;
|
127
|
-
|
133
|
+
int const freq = normalizedCounter[symbol];
|
134
|
+
for (nbOccurences=0; nbOccurences<freq; nbOccurences++) {
|
128
135
|
tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
|
129
136
|
position = (position + step) & tableMask;
|
130
|
-
while (position > highThreshold)
|
137
|
+
while (position > highThreshold)
|
138
|
+
position = (position + step) & tableMask; /* Low proba area */
|
131
139
|
} }
|
132
140
|
|
133
|
-
|
141
|
+
assert(position==0); /* Must have initialized all positions */
|
134
142
|
}
|
135
143
|
|
136
144
|
/* Build table */
|
@@ -201,9 +209,10 @@ size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
|
|
201
209
|
return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
|
202
210
|
}
|
203
211
|
|
204
|
-
static size_t
|
205
|
-
|
206
|
-
|
212
|
+
static size_t
|
213
|
+
FSE_writeNCount_generic (void* header, size_t headerBufferSize,
|
214
|
+
const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
|
215
|
+
unsigned writeIsSafe)
|
207
216
|
{
|
208
217
|
BYTE* const ostart = (BYTE*) header;
|
209
218
|
BYTE* out = ostart;
|
@@ -212,13 +221,12 @@ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
|
|
212
221
|
const int tableSize = 1 << tableLog;
|
213
222
|
int remaining;
|
214
223
|
int threshold;
|
215
|
-
U32 bitStream;
|
216
|
-
int bitCount;
|
217
|
-
unsigned
|
218
|
-
|
224
|
+
U32 bitStream = 0;
|
225
|
+
int bitCount = 0;
|
226
|
+
unsigned symbol = 0;
|
227
|
+
unsigned const alphabetSize = maxSymbolValue + 1;
|
228
|
+
int previousIs0 = 0;
|
219
229
|
|
220
|
-
bitStream = 0;
|
221
|
-
bitCount = 0;
|
222
230
|
/* Table Size */
|
223
231
|
bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount;
|
224
232
|
bitCount += 4;
|
@@ -228,48 +236,53 @@ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
|
|
228
236
|
threshold = tableSize;
|
229
237
|
nbBits = tableLog+1;
|
230
238
|
|
231
|
-
while (remaining>1) { /* stops at 1 */
|
232
|
-
if (
|
233
|
-
unsigned start =
|
234
|
-
while (!normalizedCounter[
|
235
|
-
|
239
|
+
while ((symbol < alphabetSize) && (remaining>1)) { /* stops at 1 */
|
240
|
+
if (previousIs0) {
|
241
|
+
unsigned start = symbol;
|
242
|
+
while ((symbol < alphabetSize) && !normalizedCounter[symbol]) symbol++;
|
243
|
+
if (symbol == alphabetSize) break; /* incorrect distribution */
|
244
|
+
while (symbol >= start+24) {
|
236
245
|
start+=24;
|
237
246
|
bitStream += 0xFFFFU << bitCount;
|
238
|
-
if ((!writeIsSafe) && (out > oend-2))
|
247
|
+
if ((!writeIsSafe) && (out > oend-2))
|
248
|
+
return ERROR(dstSize_tooSmall); /* Buffer overflow */
|
239
249
|
out[0] = (BYTE) bitStream;
|
240
250
|
out[1] = (BYTE)(bitStream>>8);
|
241
251
|
out+=2;
|
242
252
|
bitStream>>=16;
|
243
253
|
}
|
244
|
-
while (
|
254
|
+
while (symbol >= start+3) {
|
245
255
|
start+=3;
|
246
256
|
bitStream += 3 << bitCount;
|
247
257
|
bitCount += 2;
|
248
258
|
}
|
249
|
-
bitStream += (
|
259
|
+
bitStream += (symbol-start) << bitCount;
|
250
260
|
bitCount += 2;
|
251
261
|
if (bitCount>16) {
|
252
|
-
if ((!writeIsSafe) && (out > oend - 2))
|
262
|
+
if ((!writeIsSafe) && (out > oend - 2))
|
263
|
+
return ERROR(dstSize_tooSmall); /* Buffer overflow */
|
253
264
|
out[0] = (BYTE)bitStream;
|
254
265
|
out[1] = (BYTE)(bitStream>>8);
|
255
266
|
out += 2;
|
256
267
|
bitStream >>= 16;
|
257
268
|
bitCount -= 16;
|
258
269
|
} }
|
259
|
-
{ int count = normalizedCounter[
|
260
|
-
int const max = (2*threshold-1)-remaining;
|
270
|
+
{ int count = normalizedCounter[symbol++];
|
271
|
+
int const max = (2*threshold-1) - remaining;
|
261
272
|
remaining -= count < 0 ? -count : count;
|
262
273
|
count++; /* +1 for extra accuracy */
|
263
|
-
if (count>=threshold)
|
274
|
+
if (count>=threshold)
|
275
|
+
count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
|
264
276
|
bitStream += count << bitCount;
|
265
277
|
bitCount += nbBits;
|
266
278
|
bitCount -= (count<max);
|
267
|
-
|
279
|
+
previousIs0 = (count==1);
|
268
280
|
if (remaining<1) return ERROR(GENERIC);
|
269
281
|
while (remaining<threshold) { nbBits--; threshold>>=1; }
|
270
282
|
}
|
271
283
|
if (bitCount>16) {
|
272
|
-
if ((!writeIsSafe) && (out > oend - 2))
|
284
|
+
if ((!writeIsSafe) && (out > oend - 2))
|
285
|
+
return ERROR(dstSize_tooSmall); /* Buffer overflow */
|
273
286
|
out[0] = (BYTE)bitStream;
|
274
287
|
out[1] = (BYTE)(bitStream>>8);
|
275
288
|
out += 2;
|
@@ -277,19 +290,23 @@ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
|
|
277
290
|
bitCount -= 16;
|
278
291
|
} }
|
279
292
|
|
293
|
+
if (remaining != 1)
|
294
|
+
return ERROR(GENERIC); /* incorrect normalized distribution */
|
295
|
+
assert(symbol <= alphabetSize);
|
296
|
+
|
280
297
|
/* flush remaining bitStream */
|
281
|
-
if ((!writeIsSafe) && (out > oend - 2))
|
298
|
+
if ((!writeIsSafe) && (out > oend - 2))
|
299
|
+
return ERROR(dstSize_tooSmall); /* Buffer overflow */
|
282
300
|
out[0] = (BYTE)bitStream;
|
283
301
|
out[1] = (BYTE)(bitStream>>8);
|
284
302
|
out+= (bitCount+7) /8;
|
285
303
|
|
286
|
-
if (charnum > maxSymbolValue + 1) return ERROR(GENERIC);
|
287
|
-
|
288
304
|
return (out-ostart);
|
289
305
|
}
|
290
306
|
|
291
307
|
|
292
|
-
size_t FSE_writeNCount (void* buffer, size_t bufferSize,
|
308
|
+
size_t FSE_writeNCount (void* buffer, size_t bufferSize,
|
309
|
+
const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
|
293
310
|
{
|
294
311
|
if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported */
|
295
312
|
if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported */
|
@@ -297,26 +314,13 @@ size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalized
|
|
297
314
|
if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
|
298
315
|
return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
|
299
316
|
|
300
|
-
return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1);
|
317
|
+
return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1 /* write in buffer is safe */);
|
301
318
|
}
|
302
319
|
|
303
320
|
|
304
321
|
/*-**************************************************************
|
305
322
|
* FSE Compression Code
|
306
323
|
****************************************************************/
|
307
|
-
/*! FSE_sizeof_CTable() :
|
308
|
-
FSE_CTable is a variable size structure which contains :
|
309
|
-
`U16 tableLog;`
|
310
|
-
`U16 maxSymbolValue;`
|
311
|
-
`U16 nextStateNumber[1 << tableLog];` // This size is variable
|
312
|
-
`FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];` // This size is variable
|
313
|
-
Allocation is manual (C standard does not support variable-size structures).
|
314
|
-
*/
|
315
|
-
size_t FSE_sizeof_CTable (unsigned maxSymbolValue, unsigned tableLog)
|
316
|
-
{
|
317
|
-
if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
|
318
|
-
return FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
|
319
|
-
}
|
320
324
|
|
321
325
|
FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
|
322
326
|
{
|
@@ -331,7 +335,7 @@ void FSE_freeCTable (FSE_CTable* ct) { free(ct); }
|
|
331
335
|
/* provides the minimum logSize to safely represent a distribution */
|
332
336
|
static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
|
333
337
|
{
|
334
|
-
U32 minBitsSrc = BIT_highbit32((U32)(srcSize
|
338
|
+
U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1;
|
335
339
|
U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
|
336
340
|
U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
|
337
341
|
assert(srcSize > 1); /* Not supported, RLE should be used instead */
|
@@ -394,6 +398,9 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
|
|
394
398
|
}
|
395
399
|
ToDistribute = (1 << tableLog) - distributed;
|
396
400
|
|
401
|
+
if (ToDistribute == 0)
|
402
|
+
return 0;
|
403
|
+
|
397
404
|
if ((total / ToDistribute) > lowOne) {
|
398
405
|
/* risk of rounding to zero */
|
399
406
|
lowOne = (U32)((total * 3) / (ToDistribute * 2));
|