multi_compress 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/multi_compress/extconf.rb +6 -1
- data/ext/multi_compress/vendor/zstd/lib/Makefile +26 -6
- data/ext/multi_compress/vendor/zstd/lib/README.md +11 -0
- data/ext/multi_compress/vendor/zstd/lib/common/bits.h +92 -87
- data/ext/multi_compress/vendor/zstd/lib/common/bitstream.h +26 -29
- data/ext/multi_compress/vendor/zstd/lib/common/compiler.h +36 -22
- data/ext/multi_compress/vendor/zstd/lib/common/cpu.h +1 -1
- data/ext/multi_compress/vendor/zstd/lib/common/debug.h +0 -9
- data/ext/multi_compress/vendor/zstd/lib/common/error_private.c +1 -0
- data/ext/multi_compress/vendor/zstd/lib/common/error_private.h +0 -10
- data/ext/multi_compress/vendor/zstd/lib/common/fse.h +2 -17
- data/ext/multi_compress/vendor/zstd/lib/common/fse_decompress.c +2 -0
- data/ext/multi_compress/vendor/zstd/lib/common/huf.h +0 -9
- data/ext/multi_compress/vendor/zstd/lib/common/mem.h +7 -11
- data/ext/multi_compress/vendor/zstd/lib/common/pool.h +0 -9
- data/ext/multi_compress/vendor/zstd/lib/common/portability_macros.h +22 -9
- data/ext/multi_compress/vendor/zstd/lib/common/threading.h +0 -8
- data/ext/multi_compress/vendor/zstd/lib/common/xxhash.h +93 -19
- data/ext/multi_compress/vendor/zstd/lib/common/zstd_deps.h +12 -0
- data/ext/multi_compress/vendor/zstd/lib/common/zstd_internal.h +1 -69
- data/ext/multi_compress/vendor/zstd/lib/common/zstd_trace.h +5 -12
- data/ext/multi_compress/vendor/zstd/lib/compress/hist.c +10 -0
- data/ext/multi_compress/vendor/zstd/lib/compress/hist.h +7 -0
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress.c +1057 -367
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_internal.h +227 -125
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_literals.c +1 -1
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_sequences.c +7 -7
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_sequences.h +7 -6
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_superblock.c +17 -17
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_cwksp.h +41 -24
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_double_fast.c +58 -50
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_double_fast.h +4 -12
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_fast.c +91 -74
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_fast.h +4 -12
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_lazy.c +64 -64
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_lazy.h +30 -39
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm.c +48 -33
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm.h +6 -14
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_opt.c +55 -51
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_opt.h +8 -16
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_preSplit.c +238 -0
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_preSplit.h +33 -0
- data/ext/multi_compress/vendor/zstd/lib/compress/zstdmt_compress.c +134 -93
- data/ext/multi_compress/vendor/zstd/lib/compress/zstdmt_compress.h +4 -15
- data/ext/multi_compress/vendor/zstd/lib/decompress/huf_decompress_amd64.S +10 -3
- data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress.c +14 -11
- data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_block.c +6 -12
- data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_internal.h +5 -5
- data/ext/multi_compress/vendor/zstd/lib/dictBuilder/cover.c +60 -19
- data/ext/multi_compress/vendor/zstd/lib/dictBuilder/divsufsort.h +0 -10
- data/ext/multi_compress/vendor/zstd/lib/dictBuilder/zdict.c +2 -2
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v01.c +2 -2
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v02.c +1 -1
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v03.c +1 -1
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v05.c +1 -1
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v06.c +6 -2
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v07.c +2 -2
- data/ext/multi_compress/vendor/zstd/lib/libzstd.mk +2 -4
- data/ext/multi_compress/vendor/zstd/lib/libzstd.pc.in +2 -2
- data/ext/multi_compress/vendor/zstd/lib/zdict.h +15 -8
- data/ext/multi_compress/vendor/zstd/lib/zstd.h +241 -132
- data/ext/multi_compress/vendor/zstd/lib/zstd_errors.h +1 -8
- data/lib/multi_compress/version.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: ae5350e9b46caeca01531ebf9dad7f603bcf1a5ef6d5c3e2638ff248ad3a4bab
|
|
4
|
+
data.tar.gz: 71eb14838de738d6590bdb33f31eca25e89a236c35b3ba54a1e01d10cfc88fe3
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 02c084372c04faf4b75ea6bddb0b95a63cc580ed897099e97dbff2b61ce33a2b2aa1ba50143e01badd7d109e87ce8d88b3429dc35aadb4448f41fb451530323f
|
|
7
|
+
data.tar.gz: 46579de5cf8264c15c24cc6330e01407dd59ad8501c17e35a43d3e762c29fcd06e88c3181656619d87744198247857041f2207b525dfa4a6aa33f705bd990e86
|
data/CHANGELOG.md
CHANGED
|
@@ -163,9 +163,14 @@ else
|
|
|
163
163
|
vpath_dirs = configure_vendored_libraries(VENDOR_DIR)
|
|
164
164
|
end
|
|
165
165
|
|
|
166
|
-
$CFLAGS += " -
|
|
166
|
+
$CFLAGS += " -O3"
|
|
167
167
|
$CFLAGS += " -DXXH_NAMESPACE=MULTICOMPRESS_"
|
|
168
168
|
|
|
169
|
+
case RUBY_PLATFORM
|
|
170
|
+
when /x86_64|amd64|aarch64|arm64/
|
|
171
|
+
$CFLAGS += " -DBROTLI_BUILD_LITTLE_ENDIAN"
|
|
172
|
+
end
|
|
173
|
+
|
|
169
174
|
have_header("ruby/fiber/scheduler.h")
|
|
170
175
|
|
|
171
176
|
have_library("pthread") unless RUBY_PLATFORM.include?("darwin")
|
|
@@ -63,6 +63,8 @@ CPPFLAGS_DYNLIB += -DZSTD_MULTITHREAD # dynamic library build defaults to multi
|
|
|
63
63
|
LDFLAGS_DYNLIB += -pthread
|
|
64
64
|
CPPFLAGS_STATICLIB += # static library build defaults to single-threaded
|
|
65
65
|
|
|
66
|
+
# pkg-config Libs.private points to LDFLAGS_DYNLIB
|
|
67
|
+
PCLIB := $(LDFLAGS_DYNLIB)
|
|
66
68
|
|
|
67
69
|
ifeq ($(findstring GCC,$(CCVER)),GCC)
|
|
68
70
|
decompress/zstd_decompress_block.o : CFLAGS+=-fno-tree-vectorize
|
|
@@ -71,13 +73,15 @@ endif
|
|
|
71
73
|
|
|
72
74
|
# macOS linker doesn't support -soname, and use different extension
|
|
73
75
|
# see : https://developer.apple.com/library/mac/documentation/DeveloperTools/Conceptual/DynamicLibraries/100-Articles/DynamicLibraryDesignGuidelines.html
|
|
74
|
-
|
|
76
|
+
UNAME_TARGET_SYSTEM ?= $(UNAME)
|
|
77
|
+
|
|
78
|
+
ifeq ($(UNAME_TARGET_SYSTEM), Darwin)
|
|
75
79
|
SHARED_EXT = dylib
|
|
76
80
|
SHARED_EXT_MAJOR = $(LIBVER_MAJOR).$(SHARED_EXT)
|
|
77
81
|
SHARED_EXT_VER = $(LIBVER).$(SHARED_EXT)
|
|
78
82
|
SONAME_FLAGS = -install_name $(LIBDIR)/libzstd.$(SHARED_EXT_MAJOR) -compatibility_version $(LIBVER_MAJOR) -current_version $(LIBVER)
|
|
79
83
|
else
|
|
80
|
-
ifeq ($(
|
|
84
|
+
ifeq ($(UNAME_TARGET_SYSTEM), AIX)
|
|
81
85
|
SONAME_FLAGS =
|
|
82
86
|
else
|
|
83
87
|
SONAME_FLAGS = -Wl,-soname=libzstd.$(SHARED_EXT).$(LIBVER_MAJOR)
|
|
@@ -186,12 +190,15 @@ lib : libzstd.a libzstd
|
|
|
186
190
|
%-mt : CPPFLAGS_DYNLIB := -DZSTD_MULTITHREAD
|
|
187
191
|
%-mt : CPPFLAGS_STATICLIB := -DZSTD_MULTITHREAD
|
|
188
192
|
%-mt : LDFLAGS_DYNLIB := -pthread
|
|
193
|
+
%-mt : PCLIB :=
|
|
194
|
+
%-mt : PCMTLIB := $(LDFLAGS_DYNLIB)
|
|
189
195
|
%-mt : %
|
|
190
196
|
@echo multi-threaded build completed
|
|
191
197
|
|
|
192
198
|
%-nomt : CPPFLAGS_DYNLIB :=
|
|
193
199
|
%-nomt : LDFLAGS_DYNLIB :=
|
|
194
200
|
%-nomt : CPPFLAGS_STATICLIB :=
|
|
201
|
+
%-nomt : PCLIB :=
|
|
195
202
|
%-nomt : %
|
|
196
203
|
@echo single-threaded build completed
|
|
197
204
|
|
|
@@ -261,7 +268,7 @@ clean:
|
|
|
261
268
|
#-----------------------------------------------------------------------------
|
|
262
269
|
# make install is validated only for below listed environments
|
|
263
270
|
#-----------------------------------------------------------------------------
|
|
264
|
-
ifneq (,$(filter
|
|
271
|
+
ifneq (,$(filter Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku AIX MSYS_NT% CYGWIN_NT%,$(UNAME)))
|
|
265
272
|
|
|
266
273
|
lib: libzstd.pc
|
|
267
274
|
|
|
@@ -292,13 +299,21 @@ PCLIBPREFIX := $(if $(findstring $(LIBDIR),$(PCLIBDIR)),,$${exec_prefix})
|
|
|
292
299
|
# to PREFIX, rather than as a resolved value.
|
|
293
300
|
PCEXEC_PREFIX := $(if $(HAS_EXPLICIT_EXEC_PREFIX),$(EXEC_PREFIX),$${prefix})
|
|
294
301
|
|
|
295
|
-
|
|
302
|
+
|
|
303
|
+
ifneq ($(MT),)
|
|
304
|
+
PCLIB :=
|
|
305
|
+
PCMTLIB := $(LDFLAGS_DYNLIB)
|
|
306
|
+
else
|
|
307
|
+
PCLIB := $(LDFLAGS_DYNLIB)
|
|
308
|
+
endif
|
|
309
|
+
|
|
310
|
+
ifneq (,$(filter FreeBSD NetBSD DragonFly,$(UNAME)))
|
|
296
311
|
PKGCONFIGDIR ?= $(PREFIX)/libdata/pkgconfig
|
|
297
312
|
else
|
|
298
313
|
PKGCONFIGDIR ?= $(LIBDIR)/pkgconfig
|
|
299
314
|
endif
|
|
300
315
|
|
|
301
|
-
ifneq (,$(filter
|
|
316
|
+
ifneq (,$(filter SunOS,$(UNAME)))
|
|
302
317
|
INSTALL ?= ginstall
|
|
303
318
|
else
|
|
304
319
|
INSTALL ?= install
|
|
@@ -308,6 +323,10 @@ INSTALL_PROGRAM ?= $(INSTALL)
|
|
|
308
323
|
INSTALL_DATA ?= $(INSTALL) -m 644
|
|
309
324
|
|
|
310
325
|
|
|
326
|
+
# pkg-config library define.
|
|
327
|
+
# For static single-threaded library declare -pthread in Libs.private
|
|
328
|
+
# For static multi-threaded library declare -pthread in Libs and Cflags
|
|
329
|
+
.PHONY: libzstd.pc
|
|
311
330
|
libzstd.pc: libzstd.pc.in
|
|
312
331
|
@echo creating pkgconfig
|
|
313
332
|
@sed \
|
|
@@ -316,7 +335,8 @@ libzstd.pc: libzstd.pc.in
|
|
|
316
335
|
-e 's|@INCLUDEDIR@|$(PCINCPREFIX)$(PCINCDIR)|' \
|
|
317
336
|
-e 's|@LIBDIR@|$(PCLIBPREFIX)$(PCLIBDIR)|' \
|
|
318
337
|
-e 's|@VERSION@|$(VERSION)|' \
|
|
319
|
-
-e 's|@
|
|
338
|
+
-e 's|@LIBS_MT@|$(PCMTLIB)|' \
|
|
339
|
+
-e 's|@LIBS_PRIVATE@|$(PCLIB)|' \
|
|
320
340
|
$< >$@
|
|
321
341
|
|
|
322
342
|
.PHONY: install
|
|
@@ -27,12 +27,16 @@ Enabling multithreading requires 2 conditions :
|
|
|
27
27
|
|
|
28
28
|
For convenience, we provide a build target to generate multi and single threaded libraries:
|
|
29
29
|
- Force enable multithreading on both dynamic and static libraries by appending `-mt` to the target, e.g. `make lib-mt`.
|
|
30
|
+
Note that the `.pc` generated on calling `make lib-mt` will already include the require Libs and Cflags.
|
|
30
31
|
- Force disable multithreading on both dynamic and static libraries by appending `-nomt` to the target, e.g. `make lib-nomt`.
|
|
31
32
|
- By default, as mentioned before, dynamic library is multithreaded, and static library is single-threaded, e.g. `make lib`.
|
|
32
33
|
|
|
33
34
|
When linking a POSIX program with a multithreaded version of `libzstd`,
|
|
34
35
|
note that it's necessary to invoke the `-pthread` flag during link stage.
|
|
35
36
|
|
|
37
|
+
The `.pc` generated from `make install` or `make install-pc` always assume a single-threaded static library
|
|
38
|
+
is compiled. To correctly generate a `.pc` for the multi-threaded static library, set `MT=1` as ENV variable.
|
|
39
|
+
|
|
36
40
|
Multithreading capabilities are exposed
|
|
37
41
|
via the [advanced API defined in `lib/zstd.h`](https://github.com/facebook/zstd/blob/v1.4.3/lib/zstd.h#L351).
|
|
38
42
|
|
|
@@ -145,6 +149,13 @@ The file structure is designed to make this selection manually achievable for an
|
|
|
145
149
|
will expose the deprecated `ZSTDMT` API exposed by `zstdmt_compress.h` in
|
|
146
150
|
the shared library, which is now hidden by default.
|
|
147
151
|
|
|
152
|
+
- The build macro `STATIC_BMI2` can be set to 1 to force usage of `bmi2` instructions.
|
|
153
|
+
It is generally not necessary to set this build macro,
|
|
154
|
+
because `STATIC_BMI2` will be automatically set to 1
|
|
155
|
+
on detecting the presence of the corresponding instruction set in the compilation target.
|
|
156
|
+
It's nonetheless available as an optional manual toggle for better control,
|
|
157
|
+
and can also be used to forcefully disable `bmi2` instructions by setting it to 0.
|
|
158
|
+
|
|
148
159
|
- The build macro `DYNAMIC_BMI2` can be set to 1 or 0 in order to generate binaries
|
|
149
160
|
which can detect at runtime the presence of BMI2 instructions, and use them only if present.
|
|
150
161
|
These instructions contribute to better performance, notably on the decoder side.
|
|
@@ -28,27 +28,29 @@ MEM_STATIC unsigned ZSTD_countTrailingZeros32_fallback(U32 val)
|
|
|
28
28
|
MEM_STATIC unsigned ZSTD_countTrailingZeros32(U32 val)
|
|
29
29
|
{
|
|
30
30
|
assert(val != 0);
|
|
31
|
-
#
|
|
32
|
-
#
|
|
33
|
-
|
|
34
|
-
#
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
#
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
31
|
+
#if defined(_MSC_VER)
|
|
32
|
+
# if STATIC_BMI2
|
|
33
|
+
return (unsigned)_tzcnt_u32(val);
|
|
34
|
+
# else
|
|
35
|
+
if (val != 0) {
|
|
36
|
+
unsigned long r;
|
|
37
|
+
_BitScanForward(&r, val);
|
|
38
|
+
return (unsigned)r;
|
|
39
|
+
} else {
|
|
40
|
+
__assume(0); /* Should not reach this code path */
|
|
41
|
+
}
|
|
42
|
+
# endif
|
|
43
|
+
#elif defined(__GNUC__) && (__GNUC__ >= 4)
|
|
44
|
+
return (unsigned)__builtin_ctz(val);
|
|
45
|
+
#elif defined(__ICCARM__)
|
|
46
|
+
return (unsigned)__builtin_ctz(val);
|
|
47
|
+
#else
|
|
48
|
+
return ZSTD_countTrailingZeros32_fallback(val);
|
|
49
|
+
#endif
|
|
49
50
|
}
|
|
50
51
|
|
|
51
|
-
MEM_STATIC unsigned ZSTD_countLeadingZeros32_fallback(U32 val)
|
|
52
|
+
MEM_STATIC unsigned ZSTD_countLeadingZeros32_fallback(U32 val)
|
|
53
|
+
{
|
|
52
54
|
assert(val != 0);
|
|
53
55
|
{
|
|
54
56
|
static const U32 DeBruijnClz[32] = {0, 9, 1, 10, 13, 21, 2, 29,
|
|
@@ -67,86 +69,89 @@ MEM_STATIC unsigned ZSTD_countLeadingZeros32_fallback(U32 val) {
|
|
|
67
69
|
MEM_STATIC unsigned ZSTD_countLeadingZeros32(U32 val)
|
|
68
70
|
{
|
|
69
71
|
assert(val != 0);
|
|
70
|
-
#
|
|
71
|
-
#
|
|
72
|
-
|
|
73
|
-
#
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
#
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
72
|
+
#if defined(_MSC_VER)
|
|
73
|
+
# if STATIC_BMI2
|
|
74
|
+
return (unsigned)_lzcnt_u32(val);
|
|
75
|
+
# else
|
|
76
|
+
if (val != 0) {
|
|
77
|
+
unsigned long r;
|
|
78
|
+
_BitScanReverse(&r, val);
|
|
79
|
+
return (unsigned)(31 - r);
|
|
80
|
+
} else {
|
|
81
|
+
__assume(0); /* Should not reach this code path */
|
|
82
|
+
}
|
|
83
|
+
# endif
|
|
84
|
+
#elif defined(__GNUC__) && (__GNUC__ >= 4)
|
|
85
|
+
return (unsigned)__builtin_clz(val);
|
|
86
|
+
#elif defined(__ICCARM__)
|
|
87
|
+
return (unsigned)__builtin_clz(val);
|
|
88
|
+
#else
|
|
89
|
+
return ZSTD_countLeadingZeros32_fallback(val);
|
|
90
|
+
#endif
|
|
88
91
|
}
|
|
89
92
|
|
|
90
93
|
MEM_STATIC unsigned ZSTD_countTrailingZeros64(U64 val)
|
|
91
94
|
{
|
|
92
95
|
assert(val != 0);
|
|
93
|
-
#
|
|
94
|
-
#
|
|
95
|
-
|
|
96
|
-
#
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
#
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
96
|
+
#if defined(_MSC_VER) && defined(_WIN64)
|
|
97
|
+
# if STATIC_BMI2
|
|
98
|
+
return (unsigned)_tzcnt_u64(val);
|
|
99
|
+
# else
|
|
100
|
+
if (val != 0) {
|
|
101
|
+
unsigned long r;
|
|
102
|
+
_BitScanForward64(&r, val);
|
|
103
|
+
return (unsigned)r;
|
|
104
|
+
} else {
|
|
105
|
+
__assume(0); /* Should not reach this code path */
|
|
106
|
+
}
|
|
107
|
+
# endif
|
|
108
|
+
#elif defined(__GNUC__) && (__GNUC__ >= 4) && defined(__LP64__)
|
|
109
|
+
return (unsigned)__builtin_ctzll(val);
|
|
110
|
+
#elif defined(__ICCARM__)
|
|
111
|
+
return (unsigned)__builtin_ctzll(val);
|
|
112
|
+
#else
|
|
113
|
+
{
|
|
114
|
+
U32 mostSignificantWord = (U32)(val >> 32);
|
|
115
|
+
U32 leastSignificantWord = (U32)val;
|
|
116
|
+
if (leastSignificantWord == 0) {
|
|
117
|
+
return 32 + ZSTD_countTrailingZeros32(mostSignificantWord);
|
|
118
|
+
} else {
|
|
119
|
+
return ZSTD_countTrailingZeros32(leastSignificantWord);
|
|
117
120
|
}
|
|
118
|
-
|
|
121
|
+
}
|
|
122
|
+
#endif
|
|
119
123
|
}
|
|
120
124
|
|
|
121
125
|
MEM_STATIC unsigned ZSTD_countLeadingZeros64(U64 val)
|
|
122
126
|
{
|
|
123
127
|
assert(val != 0);
|
|
124
|
-
#
|
|
125
|
-
#
|
|
126
|
-
|
|
127
|
-
#
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
#
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
128
|
+
#if defined(_MSC_VER) && defined(_WIN64)
|
|
129
|
+
# if STATIC_BMI2
|
|
130
|
+
return (unsigned)_lzcnt_u64(val);
|
|
131
|
+
# else
|
|
132
|
+
if (val != 0) {
|
|
133
|
+
unsigned long r;
|
|
134
|
+
_BitScanReverse64(&r, val);
|
|
135
|
+
return (unsigned)(63 - r);
|
|
136
|
+
} else {
|
|
137
|
+
__assume(0); /* Should not reach this code path */
|
|
138
|
+
}
|
|
139
|
+
# endif
|
|
140
|
+
#elif defined(__GNUC__) && (__GNUC__ >= 4)
|
|
141
|
+
return (unsigned)(__builtin_clzll(val));
|
|
142
|
+
#elif defined(__ICCARM__)
|
|
143
|
+
return (unsigned)(__builtin_clzll(val));
|
|
144
|
+
#else
|
|
145
|
+
{
|
|
146
|
+
U32 mostSignificantWord = (U32)(val >> 32);
|
|
147
|
+
U32 leastSignificantWord = (U32)val;
|
|
148
|
+
if (mostSignificantWord == 0) {
|
|
149
|
+
return 32 + ZSTD_countLeadingZeros32(leastSignificantWord);
|
|
150
|
+
} else {
|
|
151
|
+
return ZSTD_countLeadingZeros32(mostSignificantWord);
|
|
148
152
|
}
|
|
149
|
-
|
|
153
|
+
}
|
|
154
|
+
#endif
|
|
150
155
|
}
|
|
151
156
|
|
|
152
157
|
MEM_STATIC unsigned ZSTD_NbCommonBytes(size_t val)
|
|
@@ -14,9 +14,6 @@
|
|
|
14
14
|
#ifndef BITSTREAM_H_MODULE
|
|
15
15
|
#define BITSTREAM_H_MODULE
|
|
16
16
|
|
|
17
|
-
#if defined (__cplusplus)
|
|
18
|
-
extern "C" {
|
|
19
|
-
#endif
|
|
20
17
|
/*
|
|
21
18
|
* This API consists of small unitary functions, which must be inlined for best performance.
|
|
22
19
|
* Since link-time-optimization is not available for all compilers,
|
|
@@ -32,7 +29,6 @@ extern "C" {
|
|
|
32
29
|
#include "error_private.h" /* error codes and messages */
|
|
33
30
|
#include "bits.h" /* ZSTD_highbit32 */
|
|
34
31
|
|
|
35
|
-
|
|
36
32
|
/*=========================================
|
|
37
33
|
* Target specific
|
|
38
34
|
=========================================*/
|
|
@@ -52,12 +48,13 @@ extern "C" {
|
|
|
52
48
|
/*-******************************************
|
|
53
49
|
* bitStream encoding API (write forward)
|
|
54
50
|
********************************************/
|
|
51
|
+
typedef size_t BitContainerType;
|
|
55
52
|
/* bitStream can mix input from multiple sources.
|
|
56
53
|
* A critical property of these streams is that they encode and decode in **reverse** direction.
|
|
57
54
|
* So the first bit sequence you add will be the last to be read, like a LIFO stack.
|
|
58
55
|
*/
|
|
59
56
|
typedef struct {
|
|
60
|
-
|
|
57
|
+
BitContainerType bitContainer;
|
|
61
58
|
unsigned bitPos;
|
|
62
59
|
char* startPtr;
|
|
63
60
|
char* ptr;
|
|
@@ -65,7 +62,7 @@ typedef struct {
|
|
|
65
62
|
} BIT_CStream_t;
|
|
66
63
|
|
|
67
64
|
MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity);
|
|
68
|
-
MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
|
|
65
|
+
MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, BitContainerType value, unsigned nbBits);
|
|
69
66
|
MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC);
|
|
70
67
|
MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
|
|
71
68
|
|
|
@@ -74,7 +71,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
|
|
|
74
71
|
* `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code.
|
|
75
72
|
*
|
|
76
73
|
* bits are first added to a local register.
|
|
77
|
-
* Local register is
|
|
74
|
+
* Local register is BitContainerType, 64-bits on 64-bits systems, or 32-bits on 32-bits systems.
|
|
78
75
|
* Writing data into memory is an explicit operation, performed by the flushBits function.
|
|
79
76
|
* Hence keep track how many bits are potentially stored into local register to avoid register overflow.
|
|
80
77
|
* After a flushBits, a maximum of 7 bits might still be stored into local register.
|
|
@@ -90,7 +87,6 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
|
|
|
90
87
|
/*-********************************************
|
|
91
88
|
* bitStream decoding API (read backward)
|
|
92
89
|
**********************************************/
|
|
93
|
-
typedef size_t BitContainerType;
|
|
94
90
|
typedef struct {
|
|
95
91
|
BitContainerType bitContainer;
|
|
96
92
|
unsigned bitsConsumed;
|
|
@@ -106,7 +102,7 @@ typedef enum { BIT_DStream_unfinished = 0, /* fully refilled */
|
|
|
106
102
|
} BIT_DStream_status; /* result of BIT_reloadDStream() */
|
|
107
103
|
|
|
108
104
|
MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
|
|
109
|
-
MEM_STATIC
|
|
105
|
+
MEM_STATIC BitContainerType BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
|
|
110
106
|
MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);
|
|
111
107
|
MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
|
|
112
108
|
|
|
@@ -125,7 +121,7 @@ MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
|
|
|
125
121
|
/*-****************************************
|
|
126
122
|
* unsafe API
|
|
127
123
|
******************************************/
|
|
128
|
-
MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
|
|
124
|
+
MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, BitContainerType value, unsigned nbBits);
|
|
129
125
|
/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */
|
|
130
126
|
|
|
131
127
|
MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
|
|
@@ -163,10 +159,15 @@ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
|
|
|
163
159
|
return 0;
|
|
164
160
|
}
|
|
165
161
|
|
|
166
|
-
FORCE_INLINE_TEMPLATE
|
|
162
|
+
FORCE_INLINE_TEMPLATE BitContainerType BIT_getLowerBits(BitContainerType bitContainer, U32 const nbBits)
|
|
167
163
|
{
|
|
168
|
-
#if
|
|
169
|
-
|
|
164
|
+
#if STATIC_BMI2 && !defined(ZSTD_NO_INTRINSICS)
|
|
165
|
+
# if (defined(__x86_64__) || defined(_M_X64)) && !defined(__ILP32__)
|
|
166
|
+
return _bzhi_u64(bitContainer, nbBits);
|
|
167
|
+
# else
|
|
168
|
+
DEBUG_STATIC_ASSERT(sizeof(bitContainer) == sizeof(U32));
|
|
169
|
+
return _bzhi_u32(bitContainer, nbBits);
|
|
170
|
+
# endif
|
|
170
171
|
#else
|
|
171
172
|
assert(nbBits < BIT_MASK_SIZE);
|
|
172
173
|
return bitContainer & BIT_mask[nbBits];
|
|
@@ -177,7 +178,7 @@ FORCE_INLINE_TEMPLATE size_t BIT_getLowerBits(size_t bitContainer, U32 const nbB
|
|
|
177
178
|
* can add up to 31 bits into `bitC`.
|
|
178
179
|
* Note : does not check for register overflow ! */
|
|
179
180
|
MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
|
|
180
|
-
|
|
181
|
+
BitContainerType value, unsigned nbBits)
|
|
181
182
|
{
|
|
182
183
|
DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
|
|
183
184
|
assert(nbBits < BIT_MASK_SIZE);
|
|
@@ -190,7 +191,7 @@ MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
|
|
|
190
191
|
* works only if `value` is _clean_,
|
|
191
192
|
* meaning all high bits above nbBits are 0 */
|
|
192
193
|
MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
|
|
193
|
-
|
|
194
|
+
BitContainerType value, unsigned nbBits)
|
|
194
195
|
{
|
|
195
196
|
assert((value>>nbBits) == 0);
|
|
196
197
|
assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
|
|
@@ -237,7 +238,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
|
|
|
237
238
|
BIT_addBitsFast(bitC, 1, 1); /* endMark */
|
|
238
239
|
BIT_flushBits(bitC);
|
|
239
240
|
if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
|
|
240
|
-
return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
|
|
241
|
+
return (size_t)(bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
|
|
241
242
|
}
|
|
242
243
|
|
|
243
244
|
|
|
@@ -298,12 +299,12 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
|
|
|
298
299
|
return srcSize;
|
|
299
300
|
}
|
|
300
301
|
|
|
301
|
-
FORCE_INLINE_TEMPLATE
|
|
302
|
+
FORCE_INLINE_TEMPLATE BitContainerType BIT_getUpperBits(BitContainerType bitContainer, U32 const start)
|
|
302
303
|
{
|
|
303
304
|
return bitContainer >> start;
|
|
304
305
|
}
|
|
305
306
|
|
|
306
|
-
FORCE_INLINE_TEMPLATE
|
|
307
|
+
FORCE_INLINE_TEMPLATE BitContainerType BIT_getMiddleBits(BitContainerType bitContainer, U32 const start, U32 const nbBits)
|
|
307
308
|
{
|
|
308
309
|
U32 const regMask = sizeof(bitContainer)*8 - 1;
|
|
309
310
|
/* if start > regMask, bitstream is corrupted, and result is undefined */
|
|
@@ -313,7 +314,7 @@ FORCE_INLINE_TEMPLATE size_t BIT_getMiddleBits(BitContainerType bitContainer, U3
|
|
|
313
314
|
* such cpus old (pre-Haswell, 2013) and their performance is not of that
|
|
314
315
|
* importance.
|
|
315
316
|
*/
|
|
316
|
-
#if defined(__x86_64__) || defined(
|
|
317
|
+
#if defined(__x86_64__) || defined(_M_X64)
|
|
317
318
|
return (bitContainer >> (start & regMask)) & ((((U64)1) << nbBits) - 1);
|
|
318
319
|
#else
|
|
319
320
|
return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
|
|
@@ -326,7 +327,7 @@ FORCE_INLINE_TEMPLATE size_t BIT_getMiddleBits(BitContainerType bitContainer, U3
|
|
|
326
327
|
* On 32-bits, maxNbBits==24.
|
|
327
328
|
* On 64-bits, maxNbBits==56.
|
|
328
329
|
* @return : value extracted */
|
|
329
|
-
FORCE_INLINE_TEMPLATE
|
|
330
|
+
FORCE_INLINE_TEMPLATE BitContainerType BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
|
|
330
331
|
{
|
|
331
332
|
/* arbitrate between double-shift and shift+mask */
|
|
332
333
|
#if 1
|
|
@@ -342,7 +343,7 @@ FORCE_INLINE_TEMPLATE size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits
|
|
|
342
343
|
|
|
343
344
|
/*! BIT_lookBitsFast() :
|
|
344
345
|
* unsafe version; only works if nbBits >= 1 */
|
|
345
|
-
MEM_STATIC
|
|
346
|
+
MEM_STATIC BitContainerType BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
|
|
346
347
|
{
|
|
347
348
|
U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
|
|
348
349
|
assert(nbBits >= 1);
|
|
@@ -358,18 +359,18 @@ FORCE_INLINE_TEMPLATE void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
|
|
|
358
359
|
* Read (consume) next n bits from local register and update.
|
|
359
360
|
* Pay attention to not read more than nbBits contained into local register.
|
|
360
361
|
* @return : extracted value. */
|
|
361
|
-
FORCE_INLINE_TEMPLATE
|
|
362
|
+
FORCE_INLINE_TEMPLATE BitContainerType BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
|
|
362
363
|
{
|
|
363
|
-
|
|
364
|
+
BitContainerType const value = BIT_lookBits(bitD, nbBits);
|
|
364
365
|
BIT_skipBits(bitD, nbBits);
|
|
365
366
|
return value;
|
|
366
367
|
}
|
|
367
368
|
|
|
368
369
|
/*! BIT_readBitsFast() :
|
|
369
370
|
* unsafe version; only works if nbBits >= 1 */
|
|
370
|
-
MEM_STATIC
|
|
371
|
+
MEM_STATIC BitContainerType BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
|
|
371
372
|
{
|
|
372
|
-
|
|
373
|
+
BitContainerType const value = BIT_lookBitsFast(bitD, nbBits);
|
|
373
374
|
assert(nbBits >= 1);
|
|
374
375
|
BIT_skipBits(bitD, nbBits);
|
|
375
376
|
return value;
|
|
@@ -450,8 +451,4 @@ MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
|
|
|
450
451
|
return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
|
|
451
452
|
}
|
|
452
453
|
|
|
453
|
-
#if defined (__cplusplus)
|
|
454
|
-
}
|
|
455
|
-
#endif
|
|
456
|
-
|
|
457
454
|
#endif /* BITSTREAM_H_MODULE */
|