zstd-ruby 1.3.5.0 → 1.3.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +4 -2
- data/README.md +2 -1
- data/ext/zstdruby/libzstd/BUCK +1 -0
- data/ext/zstdruby/libzstd/Makefile +25 -13
- data/ext/zstdruby/libzstd/README.md +11 -10
- data/ext/zstdruby/libzstd/common/bitstream.h +8 -11
- data/ext/zstdruby/libzstd/common/compiler.h +30 -8
- data/ext/zstdruby/libzstd/common/cpu.h +1 -1
- data/ext/zstdruby/libzstd/common/mem.h +20 -2
- data/ext/zstdruby/libzstd/common/xxhash.c +1 -0
- data/ext/zstdruby/libzstd/common/zstd_internal.h +3 -2
- data/ext/zstdruby/libzstd/compress/fse_compress.c +55 -48
- data/ext/zstdruby/libzstd/compress/hist.h +1 -1
- data/ext/zstdruby/libzstd/compress/huf_compress.c +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +290 -147
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +5 -2
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +63 -51
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +3 -4
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +44 -33
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +3 -4
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +125 -116
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +13 -15
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +9 -11
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +0 -1
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +42 -36
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +8 -9
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +96 -51
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +16 -6
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +3 -3
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +169 -101
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +111 -87
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +83 -0
- data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +3 -3
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +728 -0
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +34 -31
- data/ext/zstdruby/libzstd/dictBuilder/zdict.h +60 -5
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +9 -3
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +6 -0
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +6 -0
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +1 -5
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +12 -9
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +10 -10
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +20 -18
- data/ext/zstdruby/libzstd/zstd.h +109 -50
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 35d073986861ccb9bdb6a87e3f7335dd5d08249913c668a6d6279aadcfca7551
|
4
|
+
data.tar.gz: 315406589d9487c367edd70a2dc9c090dd796b0764fb1d4200a32b4375c430bc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4395106140fe83001703df637cb1632f7ef17cba187d9935c385d30b8c3a06a8688d9a169f4c9cbd1cb1d53463b895c1e414503f8030b7115ff8e612510cba9b
|
7
|
+
data.tar.gz: ab48fc82d7d1eef1179242839d93ea3cd2eb88e27b3dc4957d3457a4cf125bf0764f48b2c28ae626346bb3866c6a81114da2de6c75547f80a4e1589dc8032689
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
[![Gem Version](https://badge.fury.io/rb/zstd-ruby.svg)](https://badge.fury.io/rb/zstd-ruby)
|
1
2
|
[![Build Status](https://travis-ci.org/SpringMT/zstd-ruby.svg?branch=master)](https://travis-ci.org/SpringMT/zstd-ruby)
|
2
3
|
|
3
4
|
# zstd-ruby
|
@@ -9,7 +10,7 @@ See https://github.com/facebook/zstd
|
|
9
10
|
Fork from https://github.com/jarredholman/ruby-zstd.
|
10
11
|
|
11
12
|
## Zstd version
|
12
|
-
v1.3.
|
13
|
+
v1.3.7 (https://github.com/facebook/zstd/tree/v1.3.7)
|
13
14
|
|
14
15
|
## Installation
|
15
16
|
|
data/ext/zstdruby/libzstd/BUCK
CHANGED
@@ -19,15 +19,20 @@ LIBVER := $(shell echo $(LIBVER_SCRIPT))
|
|
19
19
|
VERSION?= $(LIBVER)
|
20
20
|
|
21
21
|
CPPFLAGS+= -I. -I./common -DXXH_NAMESPACE=ZSTD_
|
22
|
+
ifeq ($(OS),Windows_NT) # MinGW assumed
|
23
|
+
CPPFLAGS += -D__USE_MINGW_ANSI_STDIO # compatibility with %zu formatting
|
24
|
+
endif
|
22
25
|
CFLAGS ?= -O3
|
23
|
-
DEBUGFLAGS
|
26
|
+
DEBUGFLAGS= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
|
24
27
|
-Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
|
25
28
|
-Wstrict-prototypes -Wundef -Wpointer-arith -Wformat-security \
|
26
29
|
-Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
|
27
|
-
-Wredundant-decls
|
30
|
+
-Wredundant-decls -Wmissing-prototypes
|
28
31
|
CFLAGS += $(DEBUGFLAGS) $(MOREFLAGS)
|
29
32
|
FLAGS = $(CPPFLAGS) $(CFLAGS)
|
30
33
|
|
34
|
+
GREP = grep --color=never
|
35
|
+
|
31
36
|
ZSTDCOMMON_FILES := $(sort $(wildcard common/*.c))
|
32
37
|
ZSTDCOMP_FILES := $(sort $(wildcard compress/*.c))
|
33
38
|
ZSTDDECOMP_FILES := $(sort $(wildcard decompress/*.c))
|
@@ -35,7 +40,7 @@ ZDICT_FILES := $(sort $(wildcard dictBuilder/*.c))
|
|
35
40
|
ZDEPR_FILES := $(sort $(wildcard deprecated/*.c))
|
36
41
|
ZSTD_FILES := $(ZSTDCOMMON_FILES)
|
37
42
|
|
38
|
-
ZSTD_LEGACY_SUPPORT ?=
|
43
|
+
ZSTD_LEGACY_SUPPORT ?= 5
|
39
44
|
ZSTD_LIB_COMPRESSION ?= 1
|
40
45
|
ZSTD_LIB_DECOMPRESSION ?= 1
|
41
46
|
ZSTD_LIB_DICTBUILDER ?= 1
|
@@ -52,11 +57,11 @@ ifeq ($(ZSTD_LIB_DECOMPRESSION), 0)
|
|
52
57
|
endif
|
53
58
|
|
54
59
|
ifneq ($(ZSTD_LIB_COMPRESSION), 0)
|
55
|
-
ZSTD_FILES += $(ZSTDCOMP_FILES)
|
60
|
+
ZSTD_FILES += $(ZSTDCOMP_FILES)
|
56
61
|
endif
|
57
62
|
|
58
63
|
ifneq ($(ZSTD_LIB_DECOMPRESSION), 0)
|
59
|
-
ZSTD_FILES += $(ZSTDDECOMP_FILES)
|
64
|
+
ZSTD_FILES += $(ZSTDDECOMP_FILES)
|
60
65
|
endif
|
61
66
|
|
62
67
|
ifneq ($(ZSTD_LIB_DEPRECATED), 0)
|
@@ -69,7 +74,7 @@ endif
|
|
69
74
|
|
70
75
|
ifneq ($(ZSTD_LEGACY_SUPPORT), 0)
|
71
76
|
ifeq ($(shell test $(ZSTD_LEGACY_SUPPORT) -lt 8; echo $$?), 0)
|
72
|
-
ZSTD_FILES += $(shell ls legacy/*.c |
|
77
|
+
ZSTD_FILES += $(shell ls legacy/*.c | $(GREP) 'v0[$(ZSTD_LEGACY_SUPPORT)-7]')
|
73
78
|
endif
|
74
79
|
CPPFLAGS += -I./legacy
|
75
80
|
endif
|
@@ -91,8 +96,6 @@ else
|
|
91
96
|
SHARED_EXT_VER = $(SHARED_EXT).$(LIBVER)
|
92
97
|
endif
|
93
98
|
|
94
|
-
LIBZSTD = libzstd.$(SHARED_EXT_VER)
|
95
|
-
|
96
99
|
|
97
100
|
.PHONY: default all clean install uninstall
|
98
101
|
|
@@ -108,19 +111,28 @@ libzstd.a: $(ZSTD_OBJ)
|
|
108
111
|
libzstd.a-mt: CPPFLAGS += -DZSTD_MULTITHREAD
|
109
112
|
libzstd.a-mt: libzstd.a
|
110
113
|
|
111
|
-
|
114
|
+
ifneq (,$(filter Windows%,$(OS)))
|
115
|
+
|
116
|
+
LIBZSTD = dll\libzstd.dll
|
112
117
|
$(LIBZSTD): $(ZSTD_FILES)
|
113
118
|
@echo compiling dynamic library $(LIBVER)
|
114
|
-
|
115
|
-
|
116
|
-
|
119
|
+
@$(CC) $(FLAGS) -DZSTD_DLL_EXPORT=1 -shared $^ -o $@
|
120
|
+
dlltool -D $@ -d dll\libzstd.def -l dll\libzstd.lib
|
121
|
+
|
117
122
|
else
|
123
|
+
|
124
|
+
LIBZSTD = libzstd.$(SHARED_EXT_VER)
|
125
|
+
$(LIBZSTD): LDFLAGS += -shared -fPIC -fvisibility=hidden
|
126
|
+
$(LIBZSTD): $(ZSTD_FILES)
|
127
|
+
@echo compiling dynamic library $(LIBVER)
|
118
128
|
@$(CC) $(FLAGS) $^ $(LDFLAGS) $(SONAME_FLAGS) -o $@
|
119
129
|
@echo creating versioned links
|
120
130
|
@ln -sf $@ libzstd.$(SHARED_EXT_MAJOR)
|
121
131
|
@ln -sf $@ libzstd.$(SHARED_EXT)
|
132
|
+
|
122
133
|
endif
|
123
134
|
|
135
|
+
|
124
136
|
libzstd : $(LIBZSTD)
|
125
137
|
|
126
138
|
libzstd-mt : CPPFLAGS += -DZSTD_MULTITHREAD
|
@@ -154,7 +166,7 @@ clean:
|
|
154
166
|
#-----------------------------------------------------------------------------
|
155
167
|
# make install is validated only for Linux, macOS, BSD, Hurd and Solaris targets
|
156
168
|
#-----------------------------------------------------------------------------
|
157
|
-
ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS))
|
169
|
+
ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku))
|
158
170
|
|
159
171
|
DESTDIR ?=
|
160
172
|
# directory variables : GNU conventions prefer lowercase
|
@@ -13,7 +13,7 @@ including commands variables, staged install, directory variables and standard t
|
|
13
13
|
- `make install` : install libraries in default system directories
|
14
14
|
|
15
15
|
`libzstd` default scope includes compression, decompression, dictionary building,
|
16
|
-
and decoding support for legacy formats >= v0.
|
16
|
+
and decoding support for legacy formats >= v0.5.0.
|
17
17
|
|
18
18
|
|
19
19
|
#### API
|
@@ -48,23 +48,24 @@ It's possible to compile only a limited set of features.
|
|
48
48
|
This module depends on both `lib/common` and `lib/compress` .
|
49
49
|
- `lib/legacy` : source code to decompress legacy zstd formats, starting from `v0.1.0`.
|
50
50
|
This module depends on `lib/common` and `lib/decompress`.
|
51
|
-
To enable this feature,
|
52
|
-
|
53
|
-
Using higher number limits versions supported.
|
51
|
+
To enable this feature, define `ZSTD_LEGACY_SUPPORT` during compilation.
|
52
|
+
Specifying a number limits versions supported to that version onward.
|
54
53
|
For example, `ZSTD_LEGACY_SUPPORT=2` means : "support legacy formats >= v0.2.0".
|
55
54
|
`ZSTD_LEGACY_SUPPORT=3` means : "support legacy formats >= v0.3.0", and so on.
|
56
|
-
|
57
|
-
|
58
|
-
|
55
|
+
Currently, the default library setting is `ZST_LEGACY_SUPPORT=5`.
|
56
|
+
It can be changed at build by any other value.
|
57
|
+
Note that any number >= 8 translates into "do __not__ support legacy formats",
|
58
|
+
since all versions of `zstd` >= v0.8 are compatible with v1+ specification.
|
59
|
+
`ZSTD_LEGACY_SUPPORT=0` also means "do __not__ support legacy formats".
|
59
60
|
Once enabled, this capability is transparently triggered within decompression functions.
|
60
61
|
It's also possible to invoke directly legacy API, as exposed in `lib/legacy/zstd_legacy.h`.
|
61
62
|
Each version also provides an additional dedicated set of advanced API.
|
62
63
|
For example, advanced API for version `v0.4` is exposed in `lib/legacy/zstd_v04.h` .
|
63
64
|
Note : `lib/legacy` only supports _decoding_ legacy formats.
|
64
|
-
- Similarly, you can define `ZSTD_LIB_COMPRESSION, ZSTD_LIB_DECOMPRESSION`, `ZSTD_LIB_DICTBUILDER`,
|
65
|
-
and `ZSTD_LIB_DEPRECATED` as 0 to forgo compilation of the corresponding features. This will
|
65
|
+
- Similarly, you can define `ZSTD_LIB_COMPRESSION, ZSTD_LIB_DECOMPRESSION`, `ZSTD_LIB_DICTBUILDER`,
|
66
|
+
and `ZSTD_LIB_DEPRECATED` as 0 to forgo compilation of the corresponding features. This will
|
66
67
|
also disable compilation of all dependencies (eg. `ZSTD_LIB_COMPRESSION=0` will also disable
|
67
|
-
dictBuilder).
|
68
|
+
dictBuilder).
|
68
69
|
|
69
70
|
|
70
71
|
#### Multithreading support
|
@@ -339,17 +339,10 @@ MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
|
|
339
339
|
|
340
340
|
MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
|
341
341
|
{
|
342
|
-
|
343
|
-
|
344
|
-
if (sizeof(bitContainer)==8)
|
345
|
-
return _bextr_u64(bitContainer, start, nbBits);
|
346
|
-
else
|
347
|
-
# endif
|
348
|
-
return _bextr_u32(bitContainer, start, nbBits);
|
349
|
-
#else
|
342
|
+
U32 const regMask = sizeof(bitContainer)*8 - 1;
|
343
|
+
/* if start > regMask, bitstream is corrupted, and result is undefined */
|
350
344
|
assert(nbBits < BIT_MASK_SIZE);
|
351
|
-
return (bitContainer >> start) & BIT_mask[nbBits];
|
352
|
-
#endif
|
345
|
+
return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
|
353
346
|
}
|
354
347
|
|
355
348
|
MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
|
@@ -366,9 +359,13 @@ MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
|
|
366
359
|
* @return : value extracted */
|
367
360
|
MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
|
368
361
|
{
|
369
|
-
|
362
|
+
/* arbitrate between double-shift and shift+mask */
|
363
|
+
#if 1
|
364
|
+
/* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8,
|
365
|
+
* bitstream is likely corrupted, and result is undefined */
|
370
366
|
return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
|
371
367
|
#else
|
368
|
+
/* this code path is slower on my os-x laptop */
|
372
369
|
U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
|
373
370
|
return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask);
|
374
371
|
#endif
|
@@ -88,15 +88,37 @@
|
|
88
88
|
#endif
|
89
89
|
#endif
|
90
90
|
|
91
|
-
/* prefetch
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
91
|
+
/* prefetch
|
92
|
+
* can be disabled, by declaring NO_PREFETCH macro
|
93
|
+
* All prefetch invocations use a single default locality 2,
|
94
|
+
* generating instruction prefetcht1,
|
95
|
+
* which, according to Intel, means "load data into L2 cache".
|
96
|
+
* This is a good enough "middle ground" for the time being,
|
97
|
+
* though in theory, it would be better to specialize locality depending on data being prefetched.
|
98
|
+
* Tests could not determine any sensible difference based on locality value. */
|
99
|
+
#if defined(NO_PREFETCH)
|
100
|
+
# define PREFETCH(ptr) (void)(ptr) /* disabled */
|
97
101
|
#else
|
98
|
-
#
|
99
|
-
#
|
102
|
+
# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
|
103
|
+
# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
|
104
|
+
# define PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
|
105
|
+
# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
|
106
|
+
# define PREFETCH(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
|
107
|
+
# else
|
108
|
+
# define PREFETCH(ptr) (void)(ptr) /* disabled */
|
109
|
+
# endif
|
110
|
+
#endif /* NO_PREFETCH */
|
111
|
+
|
112
|
+
#define CACHELINE_SIZE 64
|
113
|
+
|
114
|
+
#define PREFETCH_AREA(p, s) { \
|
115
|
+
const char* const _ptr = (const char*)(p); \
|
116
|
+
size_t const _size = (size_t)(s); \
|
117
|
+
size_t _pos; \
|
118
|
+
for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \
|
119
|
+
PREFETCH(_ptr + _pos); \
|
120
|
+
} \
|
121
|
+
}
|
100
122
|
|
101
123
|
/* disable warnings */
|
102
124
|
#ifdef _MSC_VER /* Visual Studio */
|
@@ -39,6 +39,10 @@ extern "C" {
|
|
39
39
|
# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */
|
40
40
|
#endif
|
41
41
|
|
42
|
+
#ifndef __has_builtin
|
43
|
+
# define __has_builtin(x) 0 /* compat. with non-clang compilers */
|
44
|
+
#endif
|
45
|
+
|
42
46
|
/* code only tested on 32 and 64 bits systems */
|
43
47
|
#define MEM_STATIC_ASSERT(c) { enum { MEM_static_assert = 1/(int)(!!(c)) }; }
|
44
48
|
MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
|
@@ -57,11 +61,23 @@ MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (size
|
|
57
61
|
typedef uint64_t U64;
|
58
62
|
typedef int64_t S64;
|
59
63
|
#else
|
64
|
+
# include <limits.h>
|
65
|
+
#if CHAR_BIT != 8
|
66
|
+
# error "this implementation requires char to be exactly 8-bit type"
|
67
|
+
#endif
|
60
68
|
typedef unsigned char BYTE;
|
69
|
+
#if USHRT_MAX != 65535
|
70
|
+
# error "this implementation requires short to be exactly 16-bit type"
|
71
|
+
#endif
|
61
72
|
typedef unsigned short U16;
|
62
73
|
typedef signed short S16;
|
74
|
+
#if UINT_MAX != 4294967295
|
75
|
+
# error "this implementation requires int to be exactly 32-bit type"
|
76
|
+
#endif
|
63
77
|
typedef unsigned int U32;
|
64
78
|
typedef signed int S32;
|
79
|
+
/* note : there are no limits defined for long long type in C90.
|
80
|
+
* limits exist in C99, however, in such case, <stdint.h> is preferred */
|
65
81
|
typedef unsigned long long U64;
|
66
82
|
typedef signed long long S64;
|
67
83
|
#endif
|
@@ -186,7 +202,8 @@ MEM_STATIC U32 MEM_swap32(U32 in)
|
|
186
202
|
{
|
187
203
|
#if defined(_MSC_VER) /* Visual Studio */
|
188
204
|
return _byteswap_ulong(in);
|
189
|
-
#elif defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)
|
205
|
+
#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \
|
206
|
+
|| (defined(__clang__) && __has_builtin(__builtin_bswap32))
|
190
207
|
return __builtin_bswap32(in);
|
191
208
|
#else
|
192
209
|
return ((in << 24) & 0xff000000 ) |
|
@@ -200,7 +217,8 @@ MEM_STATIC U64 MEM_swap64(U64 in)
|
|
200
217
|
{
|
201
218
|
#if defined(_MSC_VER) /* Visual Studio */
|
202
219
|
return _byteswap_uint64(in);
|
203
|
-
#elif defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)
|
220
|
+
#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \
|
221
|
+
|| (defined(__clang__) && __has_builtin(__builtin_bswap64))
|
204
222
|
return __builtin_bswap64(in);
|
205
223
|
#else
|
206
224
|
return ((in << 56) & 0xff00000000000000ULL) |
|
@@ -98,6 +98,7 @@
|
|
98
98
|
/* Modify the local functions below should you wish to use some other memory routines */
|
99
99
|
/* for malloc(), free() */
|
100
100
|
#include <stdlib.h>
|
101
|
+
#include <stddef.h> /* size_t */
|
101
102
|
static void* XXH_malloc(size_t s) { return malloc(s); }
|
102
103
|
static void XXH_free (void* p) { free(p); }
|
103
104
|
/* for memcpy() */
|
@@ -79,8 +79,7 @@ static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
|
|
79
79
|
static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
|
80
80
|
static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
|
81
81
|
|
82
|
-
#define ZSTD_FRAMEIDSIZE 4
|
83
|
-
static const size_t ZSTD_frameIdSize = ZSTD_FRAMEIDSIZE; /* magic number size */
|
82
|
+
#define ZSTD_FRAMEIDSIZE 4 /* magic number size */
|
84
83
|
|
85
84
|
#define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
|
86
85
|
static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
|
@@ -193,6 +192,8 @@ typedef struct {
|
|
193
192
|
BYTE* llCode;
|
194
193
|
BYTE* mlCode;
|
195
194
|
BYTE* ofCode;
|
195
|
+
size_t maxNbSeq;
|
196
|
+
size_t maxNbLit;
|
196
197
|
U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */
|
197
198
|
U32 longLengthPos;
|
198
199
|
} seqStore_t;
|
@@ -83,7 +83,9 @@
|
|
83
83
|
* wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)`
|
84
84
|
* workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements
|
85
85
|
*/
|
86
|
-
size_t FSE_buildCTable_wksp(FSE_CTable* ct,
|
86
|
+
size_t FSE_buildCTable_wksp(FSE_CTable* ct,
|
87
|
+
const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
|
88
|
+
void* workSpace, size_t wkspSize)
|
87
89
|
{
|
88
90
|
U32 const tableSize = 1 << tableLog;
|
89
91
|
U32 const tableMask = tableSize - 1;
|
@@ -101,10 +103,14 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsi
|
|
101
103
|
if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge);
|
102
104
|
tableU16[-2] = (U16) tableLog;
|
103
105
|
tableU16[-1] = (U16) maxSymbolValue;
|
104
|
-
assert(tableLog < 16); /* required for
|
106
|
+
assert(tableLog < 16); /* required for threshold strategy to work */
|
105
107
|
|
106
108
|
/* For explanations on how to distribute symbol values over the table :
|
107
|
-
|
109
|
+
* http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
|
110
|
+
|
111
|
+
#ifdef __clang_analyzer__
|
112
|
+
memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */
|
113
|
+
#endif
|
108
114
|
|
109
115
|
/* symbol start positions */
|
110
116
|
{ U32 u;
|
@@ -124,13 +130,15 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsi
|
|
124
130
|
U32 symbol;
|
125
131
|
for (symbol=0; symbol<=maxSymbolValue; symbol++) {
|
126
132
|
int nbOccurences;
|
127
|
-
|
133
|
+
int const freq = normalizedCounter[symbol];
|
134
|
+
for (nbOccurences=0; nbOccurences<freq; nbOccurences++) {
|
128
135
|
tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
|
129
136
|
position = (position + step) & tableMask;
|
130
|
-
while (position > highThreshold)
|
137
|
+
while (position > highThreshold)
|
138
|
+
position = (position + step) & tableMask; /* Low proba area */
|
131
139
|
} }
|
132
140
|
|
133
|
-
|
141
|
+
assert(position==0); /* Must have initialized all positions */
|
134
142
|
}
|
135
143
|
|
136
144
|
/* Build table */
|
@@ -201,9 +209,10 @@ size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
|
|
201
209
|
return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
|
202
210
|
}
|
203
211
|
|
204
|
-
static size_t
|
205
|
-
|
206
|
-
|
212
|
+
static size_t
|
213
|
+
FSE_writeNCount_generic (void* header, size_t headerBufferSize,
|
214
|
+
const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
|
215
|
+
unsigned writeIsSafe)
|
207
216
|
{
|
208
217
|
BYTE* const ostart = (BYTE*) header;
|
209
218
|
BYTE* out = ostart;
|
@@ -212,13 +221,12 @@ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
|
|
212
221
|
const int tableSize = 1 << tableLog;
|
213
222
|
int remaining;
|
214
223
|
int threshold;
|
215
|
-
U32 bitStream;
|
216
|
-
int bitCount;
|
217
|
-
unsigned
|
218
|
-
|
224
|
+
U32 bitStream = 0;
|
225
|
+
int bitCount = 0;
|
226
|
+
unsigned symbol = 0;
|
227
|
+
unsigned const alphabetSize = maxSymbolValue + 1;
|
228
|
+
int previousIs0 = 0;
|
219
229
|
|
220
|
-
bitStream = 0;
|
221
|
-
bitCount = 0;
|
222
230
|
/* Table Size */
|
223
231
|
bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount;
|
224
232
|
bitCount += 4;
|
@@ -228,48 +236,53 @@ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
|
|
228
236
|
threshold = tableSize;
|
229
237
|
nbBits = tableLog+1;
|
230
238
|
|
231
|
-
while (remaining>1) { /* stops at 1 */
|
232
|
-
if (
|
233
|
-
unsigned start =
|
234
|
-
while (!normalizedCounter[
|
235
|
-
|
239
|
+
while ((symbol < alphabetSize) && (remaining>1)) { /* stops at 1 */
|
240
|
+
if (previousIs0) {
|
241
|
+
unsigned start = symbol;
|
242
|
+
while ((symbol < alphabetSize) && !normalizedCounter[symbol]) symbol++;
|
243
|
+
if (symbol == alphabetSize) break; /* incorrect distribution */
|
244
|
+
while (symbol >= start+24) {
|
236
245
|
start+=24;
|
237
246
|
bitStream += 0xFFFFU << bitCount;
|
238
|
-
if ((!writeIsSafe) && (out > oend-2))
|
247
|
+
if ((!writeIsSafe) && (out > oend-2))
|
248
|
+
return ERROR(dstSize_tooSmall); /* Buffer overflow */
|
239
249
|
out[0] = (BYTE) bitStream;
|
240
250
|
out[1] = (BYTE)(bitStream>>8);
|
241
251
|
out+=2;
|
242
252
|
bitStream>>=16;
|
243
253
|
}
|
244
|
-
while (
|
254
|
+
while (symbol >= start+3) {
|
245
255
|
start+=3;
|
246
256
|
bitStream += 3 << bitCount;
|
247
257
|
bitCount += 2;
|
248
258
|
}
|
249
|
-
bitStream += (
|
259
|
+
bitStream += (symbol-start) << bitCount;
|
250
260
|
bitCount += 2;
|
251
261
|
if (bitCount>16) {
|
252
|
-
if ((!writeIsSafe) && (out > oend - 2))
|
262
|
+
if ((!writeIsSafe) && (out > oend - 2))
|
263
|
+
return ERROR(dstSize_tooSmall); /* Buffer overflow */
|
253
264
|
out[0] = (BYTE)bitStream;
|
254
265
|
out[1] = (BYTE)(bitStream>>8);
|
255
266
|
out += 2;
|
256
267
|
bitStream >>= 16;
|
257
268
|
bitCount -= 16;
|
258
269
|
} }
|
259
|
-
{ int count = normalizedCounter[
|
260
|
-
int const max = (2*threshold-1)-remaining;
|
270
|
+
{ int count = normalizedCounter[symbol++];
|
271
|
+
int const max = (2*threshold-1) - remaining;
|
261
272
|
remaining -= count < 0 ? -count : count;
|
262
273
|
count++; /* +1 for extra accuracy */
|
263
|
-
if (count>=threshold)
|
274
|
+
if (count>=threshold)
|
275
|
+
count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
|
264
276
|
bitStream += count << bitCount;
|
265
277
|
bitCount += nbBits;
|
266
278
|
bitCount -= (count<max);
|
267
|
-
|
279
|
+
previousIs0 = (count==1);
|
268
280
|
if (remaining<1) return ERROR(GENERIC);
|
269
281
|
while (remaining<threshold) { nbBits--; threshold>>=1; }
|
270
282
|
}
|
271
283
|
if (bitCount>16) {
|
272
|
-
if ((!writeIsSafe) && (out > oend - 2))
|
284
|
+
if ((!writeIsSafe) && (out > oend - 2))
|
285
|
+
return ERROR(dstSize_tooSmall); /* Buffer overflow */
|
273
286
|
out[0] = (BYTE)bitStream;
|
274
287
|
out[1] = (BYTE)(bitStream>>8);
|
275
288
|
out += 2;
|
@@ -277,19 +290,23 @@ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
|
|
277
290
|
bitCount -= 16;
|
278
291
|
} }
|
279
292
|
|
293
|
+
if (remaining != 1)
|
294
|
+
return ERROR(GENERIC); /* incorrect normalized distribution */
|
295
|
+
assert(symbol <= alphabetSize);
|
296
|
+
|
280
297
|
/* flush remaining bitStream */
|
281
|
-
if ((!writeIsSafe) && (out > oend - 2))
|
298
|
+
if ((!writeIsSafe) && (out > oend - 2))
|
299
|
+
return ERROR(dstSize_tooSmall); /* Buffer overflow */
|
282
300
|
out[0] = (BYTE)bitStream;
|
283
301
|
out[1] = (BYTE)(bitStream>>8);
|
284
302
|
out+= (bitCount+7) /8;
|
285
303
|
|
286
|
-
if (charnum > maxSymbolValue + 1) return ERROR(GENERIC);
|
287
|
-
|
288
304
|
return (out-ostart);
|
289
305
|
}
|
290
306
|
|
291
307
|
|
292
|
-
size_t FSE_writeNCount (void* buffer, size_t bufferSize,
|
308
|
+
size_t FSE_writeNCount (void* buffer, size_t bufferSize,
|
309
|
+
const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
|
293
310
|
{
|
294
311
|
if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported */
|
295
312
|
if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported */
|
@@ -297,26 +314,13 @@ size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalized
|
|
297
314
|
if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
|
298
315
|
return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
|
299
316
|
|
300
|
-
return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1);
|
317
|
+
return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1 /* write in buffer is safe */);
|
301
318
|
}
|
302
319
|
|
303
320
|
|
304
321
|
/*-**************************************************************
|
305
322
|
* FSE Compression Code
|
306
323
|
****************************************************************/
|
307
|
-
/*! FSE_sizeof_CTable() :
|
308
|
-
FSE_CTable is a variable size structure which contains :
|
309
|
-
`U16 tableLog;`
|
310
|
-
`U16 maxSymbolValue;`
|
311
|
-
`U16 nextStateNumber[1 << tableLog];` // This size is variable
|
312
|
-
`FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];` // This size is variable
|
313
|
-
Allocation is manual (C standard does not support variable-size structures).
|
314
|
-
*/
|
315
|
-
size_t FSE_sizeof_CTable (unsigned maxSymbolValue, unsigned tableLog)
|
316
|
-
{
|
317
|
-
if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
|
318
|
-
return FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
|
319
|
-
}
|
320
324
|
|
321
325
|
FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
|
322
326
|
{
|
@@ -331,7 +335,7 @@ void FSE_freeCTable (FSE_CTable* ct) { free(ct); }
|
|
331
335
|
/* provides the minimum logSize to safely represent a distribution */
|
332
336
|
static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
|
333
337
|
{
|
334
|
-
U32 minBitsSrc = BIT_highbit32((U32)(srcSize
|
338
|
+
U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1;
|
335
339
|
U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
|
336
340
|
U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
|
337
341
|
assert(srcSize > 1); /* Not supported, RLE should be used instead */
|
@@ -394,6 +398,9 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
|
|
394
398
|
}
|
395
399
|
ToDistribute = (1 << tableLog) - distributed;
|
396
400
|
|
401
|
+
if (ToDistribute == 0)
|
402
|
+
return 0;
|
403
|
+
|
397
404
|
if ((total / ToDistribute) > lowOne) {
|
398
405
|
/* risk of rounding to zero */
|
399
406
|
lowOne = (U32)((total * 3) / (ToDistribute * 2));
|