multi_compress 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/ext/multi_compress/extconf.rb +6 -1
  4. data/ext/multi_compress/vendor/zstd/lib/Makefile +26 -6
  5. data/ext/multi_compress/vendor/zstd/lib/README.md +11 -0
  6. data/ext/multi_compress/vendor/zstd/lib/common/bits.h +92 -87
  7. data/ext/multi_compress/vendor/zstd/lib/common/bitstream.h +26 -29
  8. data/ext/multi_compress/vendor/zstd/lib/common/compiler.h +36 -22
  9. data/ext/multi_compress/vendor/zstd/lib/common/cpu.h +1 -1
  10. data/ext/multi_compress/vendor/zstd/lib/common/debug.h +0 -9
  11. data/ext/multi_compress/vendor/zstd/lib/common/error_private.c +1 -0
  12. data/ext/multi_compress/vendor/zstd/lib/common/error_private.h +0 -10
  13. data/ext/multi_compress/vendor/zstd/lib/common/fse.h +2 -17
  14. data/ext/multi_compress/vendor/zstd/lib/common/fse_decompress.c +2 -0
  15. data/ext/multi_compress/vendor/zstd/lib/common/huf.h +0 -9
  16. data/ext/multi_compress/vendor/zstd/lib/common/mem.h +7 -11
  17. data/ext/multi_compress/vendor/zstd/lib/common/pool.h +0 -9
  18. data/ext/multi_compress/vendor/zstd/lib/common/portability_macros.h +22 -9
  19. data/ext/multi_compress/vendor/zstd/lib/common/threading.h +0 -8
  20. data/ext/multi_compress/vendor/zstd/lib/common/xxhash.h +93 -19
  21. data/ext/multi_compress/vendor/zstd/lib/common/zstd_deps.h +12 -0
  22. data/ext/multi_compress/vendor/zstd/lib/common/zstd_internal.h +1 -69
  23. data/ext/multi_compress/vendor/zstd/lib/common/zstd_trace.h +5 -12
  24. data/ext/multi_compress/vendor/zstd/lib/compress/hist.c +10 -0
  25. data/ext/multi_compress/vendor/zstd/lib/compress/hist.h +7 -0
  26. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress.c +1057 -367
  27. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_internal.h +227 -125
  28. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_literals.c +1 -1
  29. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_sequences.c +7 -7
  30. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_sequences.h +7 -6
  31. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_superblock.c +17 -17
  32. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_cwksp.h +41 -24
  33. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_double_fast.c +58 -50
  34. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_double_fast.h +4 -12
  35. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_fast.c +91 -74
  36. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_fast.h +4 -12
  37. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_lazy.c +64 -64
  38. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_lazy.h +30 -39
  39. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm.c +48 -33
  40. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm.h +6 -14
  41. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_opt.c +55 -51
  42. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_opt.h +8 -16
  43. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_preSplit.c +238 -0
  44. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_preSplit.h +33 -0
  45. data/ext/multi_compress/vendor/zstd/lib/compress/zstdmt_compress.c +134 -93
  46. data/ext/multi_compress/vendor/zstd/lib/compress/zstdmt_compress.h +4 -15
  47. data/ext/multi_compress/vendor/zstd/lib/decompress/huf_decompress_amd64.S +10 -3
  48. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress.c +14 -11
  49. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_block.c +6 -12
  50. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_internal.h +5 -5
  51. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/cover.c +60 -19
  52. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/divsufsort.h +0 -10
  53. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/zdict.c +2 -2
  54. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v01.c +2 -2
  55. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v02.c +1 -1
  56. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v03.c +1 -1
  57. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v05.c +1 -1
  58. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v06.c +6 -2
  59. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v07.c +2 -2
  60. data/ext/multi_compress/vendor/zstd/lib/libzstd.mk +2 -4
  61. data/ext/multi_compress/vendor/zstd/lib/libzstd.pc.in +2 -2
  62. data/ext/multi_compress/vendor/zstd/lib/zdict.h +15 -8
  63. data/ext/multi_compress/vendor/zstd/lib/zstd.h +241 -132
  64. data/ext/multi_compress/vendor/zstd/lib/zstd_errors.h +1 -8
  65. data/lib/multi_compress/version.rb +1 -1
  66. metadata +4 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b07dc7022c50225ee44cd8956e0182ac483f4f4e9bcc2f48cfc0c14b8ef74b31
4
- data.tar.gz: 42e399c199fb48cab2ad7eefc129149d8754d8df0b2255abb5b75f140fe9bf80
3
+ metadata.gz: ae5350e9b46caeca01531ebf9dad7f603bcf1a5ef6d5c3e2638ff248ad3a4bab
4
+ data.tar.gz: 71eb14838de738d6590bdb33f31eca25e89a236c35b3ba54a1e01d10cfc88fe3
5
5
  SHA512:
6
- metadata.gz: 8b123a2f7cee46ca4d61052c5e4b75403e327ab269ebee9d7b35db9f610affc51dfdfcc3118c1cd0949b134146ade9afdaa1ef79a1387af8843478e73fc0f74d
7
- data.tar.gz: '0574149cfcb0fba8c38690286149c0bbd0b057ab4b615a157ea2e0c63a687a963317988da3a25d1f9c686389f2b2db07064b4de51ac799b4b0b5ce5f75132d5e'
6
+ metadata.gz: 02c084372c04faf4b75ea6bddb0b95a63cc580ed897099e97dbff2b61ce33a2b2aa1ba50143e01badd7d109e87ce8d88b3429dc35aadb4448f41fb451530323f
7
+ data.tar.gz: 46579de5cf8264c15c24cc6330e01407dd59ad8501c17e35a43d3e762c29fcd06e88c3181656619d87744198247857041f2207b525dfa4a6aa33f705bd990e86
data/CHANGELOG.md CHANGED
@@ -1,5 +1,9 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.2.1] — 2026-04-15
4
+ - Change version zstd "1.5.6" -> "1.5.7".
5
+ - Micro optimization.
6
+
3
7
  ## [0.2.0] — 2026-04-15
4
8
 
5
9
  ### Added
@@ -163,9 +163,14 @@ else
163
163
  vpath_dirs = configure_vendored_libraries(VENDOR_DIR)
164
164
  end
165
165
 
166
- $CFLAGS += " -O2"
166
+ $CFLAGS += " -O3"
167
167
  $CFLAGS += " -DXXH_NAMESPACE=MULTICOMPRESS_"
168
168
 
169
+ case RUBY_PLATFORM
170
+ when /x86_64|amd64|aarch64|arm64/
171
+ $CFLAGS += " -DBROTLI_BUILD_LITTLE_ENDIAN"
172
+ end
173
+
169
174
  have_header("ruby/fiber/scheduler.h")
170
175
 
171
176
  have_library("pthread") unless RUBY_PLATFORM.include?("darwin")
@@ -63,6 +63,8 @@ CPPFLAGS_DYNLIB += -DZSTD_MULTITHREAD # dynamic library build defaults to multi
63
63
  LDFLAGS_DYNLIB += -pthread
64
64
  CPPFLAGS_STATICLIB += # static library build defaults to single-threaded
65
65
 
66
+ # pkg-config Libs.private points to LDFLAGS_DYNLIB
67
+ PCLIB := $(LDFLAGS_DYNLIB)
66
68
 
67
69
  ifeq ($(findstring GCC,$(CCVER)),GCC)
68
70
  decompress/zstd_decompress_block.o : CFLAGS+=-fno-tree-vectorize
@@ -71,13 +73,15 @@ endif
71
73
 
72
74
  # macOS linker doesn't support -soname, and use different extension
73
75
  # see : https://developer.apple.com/library/mac/documentation/DeveloperTools/Conceptual/DynamicLibraries/100-Articles/DynamicLibraryDesignGuidelines.html
74
- ifeq ($(UNAME), Darwin)
76
+ UNAME_TARGET_SYSTEM ?= $(UNAME)
77
+
78
+ ifeq ($(UNAME_TARGET_SYSTEM), Darwin)
75
79
  SHARED_EXT = dylib
76
80
  SHARED_EXT_MAJOR = $(LIBVER_MAJOR).$(SHARED_EXT)
77
81
  SHARED_EXT_VER = $(LIBVER).$(SHARED_EXT)
78
82
  SONAME_FLAGS = -install_name $(LIBDIR)/libzstd.$(SHARED_EXT_MAJOR) -compatibility_version $(LIBVER_MAJOR) -current_version $(LIBVER)
79
83
  else
80
- ifeq ($(UNAME), AIX)
84
+ ifeq ($(UNAME_TARGET_SYSTEM), AIX)
81
85
  SONAME_FLAGS =
82
86
  else
83
87
  SONAME_FLAGS = -Wl,-soname=libzstd.$(SHARED_EXT).$(LIBVER_MAJOR)
@@ -186,12 +190,15 @@ lib : libzstd.a libzstd
186
190
  %-mt : CPPFLAGS_DYNLIB := -DZSTD_MULTITHREAD
187
191
  %-mt : CPPFLAGS_STATICLIB := -DZSTD_MULTITHREAD
188
192
  %-mt : LDFLAGS_DYNLIB := -pthread
193
+ %-mt : PCLIB :=
194
+ %-mt : PCMTLIB := $(LDFLAGS_DYNLIB)
189
195
  %-mt : %
190
196
  @echo multi-threaded build completed
191
197
 
192
198
  %-nomt : CPPFLAGS_DYNLIB :=
193
199
  %-nomt : LDFLAGS_DYNLIB :=
194
200
  %-nomt : CPPFLAGS_STATICLIB :=
201
+ %-nomt : PCLIB :=
195
202
  %-nomt : %
196
203
  @echo single-threaded build completed
197
204
 
@@ -261,7 +268,7 @@ clean:
261
268
  #-----------------------------------------------------------------------------
262
269
  # make install is validated only for below listed environments
263
270
  #-----------------------------------------------------------------------------
264
- ifneq (,$(filter $(UNAME),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku AIX MSYS_NT CYGWIN_NT))
271
+ ifneq (,$(filter Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku AIX MSYS_NT% CYGWIN_NT%,$(UNAME)))
265
272
 
266
273
  lib: libzstd.pc
267
274
 
@@ -292,13 +299,21 @@ PCLIBPREFIX := $(if $(findstring $(LIBDIR),$(PCLIBDIR)),,$${exec_prefix})
292
299
  # to PREFIX, rather than as a resolved value.
293
300
  PCEXEC_PREFIX := $(if $(HAS_EXPLICIT_EXEC_PREFIX),$(EXEC_PREFIX),$${prefix})
294
301
 
295
- ifneq (,$(filter $(UNAME),FreeBSD NetBSD DragonFly))
302
+
303
+ ifneq ($(MT),)
304
+ PCLIB :=
305
+ PCMTLIB := $(LDFLAGS_DYNLIB)
306
+ else
307
+ PCLIB := $(LDFLAGS_DYNLIB)
308
+ endif
309
+
310
+ ifneq (,$(filter FreeBSD NetBSD DragonFly,$(UNAME)))
296
311
  PKGCONFIGDIR ?= $(PREFIX)/libdata/pkgconfig
297
312
  else
298
313
  PKGCONFIGDIR ?= $(LIBDIR)/pkgconfig
299
314
  endif
300
315
 
301
- ifneq (,$(filter $(UNAME),SunOS))
316
+ ifneq (,$(filter SunOS,$(UNAME)))
302
317
  INSTALL ?= ginstall
303
318
  else
304
319
  INSTALL ?= install
@@ -308,6 +323,10 @@ INSTALL_PROGRAM ?= $(INSTALL)
308
323
  INSTALL_DATA ?= $(INSTALL) -m 644
309
324
 
310
325
 
326
+ # pkg-config library define.
327
+ # For static single-threaded library declare -pthread in Libs.private
328
+ # For static multi-threaded library declare -pthread in Libs and Cflags
329
+ .PHONY: libzstd.pc
311
330
  libzstd.pc: libzstd.pc.in
312
331
  @echo creating pkgconfig
313
332
  @sed \
@@ -316,7 +335,8 @@ libzstd.pc: libzstd.pc.in
316
335
  -e 's|@INCLUDEDIR@|$(PCINCPREFIX)$(PCINCDIR)|' \
317
336
  -e 's|@LIBDIR@|$(PCLIBPREFIX)$(PCLIBDIR)|' \
318
337
  -e 's|@VERSION@|$(VERSION)|' \
319
- -e 's|@LIBS_PRIVATE@|$(LDFLAGS_DYNLIB)|' \
338
+ -e 's|@LIBS_MT@|$(PCMTLIB)|' \
339
+ -e 's|@LIBS_PRIVATE@|$(PCLIB)|' \
320
340
  $< >$@
321
341
 
322
342
  .PHONY: install
@@ -27,12 +27,16 @@ Enabling multithreading requires 2 conditions :
27
27
 
28
28
  For convenience, we provide a build target to generate multi and single threaded libraries:
29
29
  - Force enable multithreading on both dynamic and static libraries by appending `-mt` to the target, e.g. `make lib-mt`.
30
+ Note that the `.pc` generated on calling `make lib-mt` will already include the require Libs and Cflags.
30
31
  - Force disable multithreading on both dynamic and static libraries by appending `-nomt` to the target, e.g. `make lib-nomt`.
31
32
  - By default, as mentioned before, dynamic library is multithreaded, and static library is single-threaded, e.g. `make lib`.
32
33
 
33
34
  When linking a POSIX program with a multithreaded version of `libzstd`,
34
35
  note that it's necessary to invoke the `-pthread` flag during link stage.
35
36
 
37
+ The `.pc` generated from `make install` or `make install-pc` always assume a single-threaded static library
38
+ is compiled. To correctly generate a `.pc` for the multi-threaded static library, set `MT=1` as ENV variable.
39
+
36
40
  Multithreading capabilities are exposed
37
41
  via the [advanced API defined in `lib/zstd.h`](https://github.com/facebook/zstd/blob/v1.4.3/lib/zstd.h#L351).
38
42
 
@@ -145,6 +149,13 @@ The file structure is designed to make this selection manually achievable for an
145
149
  will expose the deprecated `ZSTDMT` API exposed by `zstdmt_compress.h` in
146
150
  the shared library, which is now hidden by default.
147
151
 
152
+ - The build macro `STATIC_BMI2` can be set to 1 to force usage of `bmi2` instructions.
153
+ It is generally not necessary to set this build macro,
154
+ because `STATIC_BMI2` will be automatically set to 1
155
+ on detecting the presence of the corresponding instruction set in the compilation target.
156
+ It's nonetheless available as an optional manual toggle for better control,
157
+ and can also be used to forcefully disable `bmi2` instructions by setting it to 0.
158
+
148
159
  - The build macro `DYNAMIC_BMI2` can be set to 1 or 0 in order to generate binaries
149
160
  which can detect at runtime the presence of BMI2 instructions, and use them only if present.
150
161
  These instructions contribute to better performance, notably on the decoder side.
@@ -28,27 +28,29 @@ MEM_STATIC unsigned ZSTD_countTrailingZeros32_fallback(U32 val)
28
28
  MEM_STATIC unsigned ZSTD_countTrailingZeros32(U32 val)
29
29
  {
30
30
  assert(val != 0);
31
- # if defined(_MSC_VER)
32
- # if STATIC_BMI2 == 1
33
- return (unsigned)_tzcnt_u32(val);
34
- # else
35
- if (val != 0) {
36
- unsigned long r;
37
- _BitScanForward(&r, val);
38
- return (unsigned)r;
39
- } else {
40
- /* Should not reach this code path */
41
- __assume(0);
42
- }
43
- # endif
44
- # elif defined(__GNUC__) && (__GNUC__ >= 4)
45
- return (unsigned)__builtin_ctz(val);
46
- # else
47
- return ZSTD_countTrailingZeros32_fallback(val);
48
- # endif
31
+ #if defined(_MSC_VER)
32
+ # if STATIC_BMI2
33
+ return (unsigned)_tzcnt_u32(val);
34
+ # else
35
+ if (val != 0) {
36
+ unsigned long r;
37
+ _BitScanForward(&r, val);
38
+ return (unsigned)r;
39
+ } else {
40
+ __assume(0); /* Should not reach this code path */
41
+ }
42
+ # endif
43
+ #elif defined(__GNUC__) && (__GNUC__ >= 4)
44
+ return (unsigned)__builtin_ctz(val);
45
+ #elif defined(__ICCARM__)
46
+ return (unsigned)__builtin_ctz(val);
47
+ #else
48
+ return ZSTD_countTrailingZeros32_fallback(val);
49
+ #endif
49
50
  }
50
51
 
51
- MEM_STATIC unsigned ZSTD_countLeadingZeros32_fallback(U32 val) {
52
+ MEM_STATIC unsigned ZSTD_countLeadingZeros32_fallback(U32 val)
53
+ {
52
54
  assert(val != 0);
53
55
  {
54
56
  static const U32 DeBruijnClz[32] = {0, 9, 1, 10, 13, 21, 2, 29,
@@ -67,86 +69,89 @@ MEM_STATIC unsigned ZSTD_countLeadingZeros32_fallback(U32 val) {
67
69
  MEM_STATIC unsigned ZSTD_countLeadingZeros32(U32 val)
68
70
  {
69
71
  assert(val != 0);
70
- # if defined(_MSC_VER)
71
- # if STATIC_BMI2 == 1
72
- return (unsigned)_lzcnt_u32(val);
73
- # else
74
- if (val != 0) {
75
- unsigned long r;
76
- _BitScanReverse(&r, val);
77
- return (unsigned)(31 - r);
78
- } else {
79
- /* Should not reach this code path */
80
- __assume(0);
81
- }
82
- # endif
83
- # elif defined(__GNUC__) && (__GNUC__ >= 4)
84
- return (unsigned)__builtin_clz(val);
85
- # else
86
- return ZSTD_countLeadingZeros32_fallback(val);
87
- # endif
72
+ #if defined(_MSC_VER)
73
+ # if STATIC_BMI2
74
+ return (unsigned)_lzcnt_u32(val);
75
+ # else
76
+ if (val != 0) {
77
+ unsigned long r;
78
+ _BitScanReverse(&r, val);
79
+ return (unsigned)(31 - r);
80
+ } else {
81
+ __assume(0); /* Should not reach this code path */
82
+ }
83
+ # endif
84
+ #elif defined(__GNUC__) && (__GNUC__ >= 4)
85
+ return (unsigned)__builtin_clz(val);
86
+ #elif defined(__ICCARM__)
87
+ return (unsigned)__builtin_clz(val);
88
+ #else
89
+ return ZSTD_countLeadingZeros32_fallback(val);
90
+ #endif
88
91
  }
89
92
 
90
93
  MEM_STATIC unsigned ZSTD_countTrailingZeros64(U64 val)
91
94
  {
92
95
  assert(val != 0);
93
- # if defined(_MSC_VER) && defined(_WIN64)
94
- # if STATIC_BMI2 == 1
95
- return (unsigned)_tzcnt_u64(val);
96
- # else
97
- if (val != 0) {
98
- unsigned long r;
99
- _BitScanForward64(&r, val);
100
- return (unsigned)r;
101
- } else {
102
- /* Should not reach this code path */
103
- __assume(0);
104
- }
105
- # endif
106
- # elif defined(__GNUC__) && (__GNUC__ >= 4) && defined(__LP64__)
107
- return (unsigned)__builtin_ctzll(val);
108
- # else
109
- {
110
- U32 mostSignificantWord = (U32)(val >> 32);
111
- U32 leastSignificantWord = (U32)val;
112
- if (leastSignificantWord == 0) {
113
- return 32 + ZSTD_countTrailingZeros32(mostSignificantWord);
114
- } else {
115
- return ZSTD_countTrailingZeros32(leastSignificantWord);
116
- }
96
+ #if defined(_MSC_VER) && defined(_WIN64)
97
+ # if STATIC_BMI2
98
+ return (unsigned)_tzcnt_u64(val);
99
+ # else
100
+ if (val != 0) {
101
+ unsigned long r;
102
+ _BitScanForward64(&r, val);
103
+ return (unsigned)r;
104
+ } else {
105
+ __assume(0); /* Should not reach this code path */
106
+ }
107
+ # endif
108
+ #elif defined(__GNUC__) && (__GNUC__ >= 4) && defined(__LP64__)
109
+ return (unsigned)__builtin_ctzll(val);
110
+ #elif defined(__ICCARM__)
111
+ return (unsigned)__builtin_ctzll(val);
112
+ #else
113
+ {
114
+ U32 mostSignificantWord = (U32)(val >> 32);
115
+ U32 leastSignificantWord = (U32)val;
116
+ if (leastSignificantWord == 0) {
117
+ return 32 + ZSTD_countTrailingZeros32(mostSignificantWord);
118
+ } else {
119
+ return ZSTD_countTrailingZeros32(leastSignificantWord);
117
120
  }
118
- # endif
121
+ }
122
+ #endif
119
123
  }
120
124
 
121
125
  MEM_STATIC unsigned ZSTD_countLeadingZeros64(U64 val)
122
126
  {
123
127
  assert(val != 0);
124
- # if defined(_MSC_VER) && defined(_WIN64)
125
- # if STATIC_BMI2 == 1
126
- return (unsigned)_lzcnt_u64(val);
127
- # else
128
- if (val != 0) {
129
- unsigned long r;
130
- _BitScanReverse64(&r, val);
131
- return (unsigned)(63 - r);
132
- } else {
133
- /* Should not reach this code path */
134
- __assume(0);
135
- }
136
- # endif
137
- # elif defined(__GNUC__) && (__GNUC__ >= 4)
138
- return (unsigned)(__builtin_clzll(val));
139
- # else
140
- {
141
- U32 mostSignificantWord = (U32)(val >> 32);
142
- U32 leastSignificantWord = (U32)val;
143
- if (mostSignificantWord == 0) {
144
- return 32 + ZSTD_countLeadingZeros32(leastSignificantWord);
145
- } else {
146
- return ZSTD_countLeadingZeros32(mostSignificantWord);
147
- }
128
+ #if defined(_MSC_VER) && defined(_WIN64)
129
+ # if STATIC_BMI2
130
+ return (unsigned)_lzcnt_u64(val);
131
+ # else
132
+ if (val != 0) {
133
+ unsigned long r;
134
+ _BitScanReverse64(&r, val);
135
+ return (unsigned)(63 - r);
136
+ } else {
137
+ __assume(0); /* Should not reach this code path */
138
+ }
139
+ # endif
140
+ #elif defined(__GNUC__) && (__GNUC__ >= 4)
141
+ return (unsigned)(__builtin_clzll(val));
142
+ #elif defined(__ICCARM__)
143
+ return (unsigned)(__builtin_clzll(val));
144
+ #else
145
+ {
146
+ U32 mostSignificantWord = (U32)(val >> 32);
147
+ U32 leastSignificantWord = (U32)val;
148
+ if (mostSignificantWord == 0) {
149
+ return 32 + ZSTD_countLeadingZeros32(leastSignificantWord);
150
+ } else {
151
+ return ZSTD_countLeadingZeros32(mostSignificantWord);
148
152
  }
149
- # endif
153
+ }
154
+ #endif
150
155
  }
151
156
 
152
157
  MEM_STATIC unsigned ZSTD_NbCommonBytes(size_t val)
@@ -14,9 +14,6 @@
14
14
  #ifndef BITSTREAM_H_MODULE
15
15
  #define BITSTREAM_H_MODULE
16
16
 
17
- #if defined (__cplusplus)
18
- extern "C" {
19
- #endif
20
17
  /*
21
18
  * This API consists of small unitary functions, which must be inlined for best performance.
22
19
  * Since link-time-optimization is not available for all compilers,
@@ -32,7 +29,6 @@ extern "C" {
32
29
  #include "error_private.h" /* error codes and messages */
33
30
  #include "bits.h" /* ZSTD_highbit32 */
34
31
 
35
-
36
32
  /*=========================================
37
33
  * Target specific
38
34
  =========================================*/
@@ -52,12 +48,13 @@ extern "C" {
52
48
  /*-******************************************
53
49
  * bitStream encoding API (write forward)
54
50
  ********************************************/
51
+ typedef size_t BitContainerType;
55
52
  /* bitStream can mix input from multiple sources.
56
53
  * A critical property of these streams is that they encode and decode in **reverse** direction.
57
54
  * So the first bit sequence you add will be the last to be read, like a LIFO stack.
58
55
  */
59
56
  typedef struct {
60
- size_t bitContainer;
57
+ BitContainerType bitContainer;
61
58
  unsigned bitPos;
62
59
  char* startPtr;
63
60
  char* ptr;
@@ -65,7 +62,7 @@ typedef struct {
65
62
  } BIT_CStream_t;
66
63
 
67
64
  MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity);
68
- MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
65
+ MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, BitContainerType value, unsigned nbBits);
69
66
  MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC);
70
67
  MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
71
68
 
@@ -74,7 +71,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
74
71
  * `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code.
75
72
  *
76
73
  * bits are first added to a local register.
77
- * Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems.
74
+ * Local register is BitContainerType, 64-bits on 64-bits systems, or 32-bits on 32-bits systems.
78
75
  * Writing data into memory is an explicit operation, performed by the flushBits function.
79
76
  * Hence keep track how many bits are potentially stored into local register to avoid register overflow.
80
77
  * After a flushBits, a maximum of 7 bits might still be stored into local register.
@@ -90,7 +87,6 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
90
87
  /*-********************************************
91
88
  * bitStream decoding API (read backward)
92
89
  **********************************************/
93
- typedef size_t BitContainerType;
94
90
  typedef struct {
95
91
  BitContainerType bitContainer;
96
92
  unsigned bitsConsumed;
@@ -106,7 +102,7 @@ typedef enum { BIT_DStream_unfinished = 0, /* fully refilled */
106
102
  } BIT_DStream_status; /* result of BIT_reloadDStream() */
107
103
 
108
104
  MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
109
- MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
105
+ MEM_STATIC BitContainerType BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
110
106
  MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);
111
107
  MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
112
108
 
@@ -125,7 +121,7 @@ MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
125
121
  /*-****************************************
126
122
  * unsafe API
127
123
  ******************************************/
128
- MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
124
+ MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, BitContainerType value, unsigned nbBits);
129
125
  /* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */
130
126
 
131
127
  MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
@@ -163,10 +159,15 @@ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
163
159
  return 0;
164
160
  }
165
161
 
166
- FORCE_INLINE_TEMPLATE size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
162
+ FORCE_INLINE_TEMPLATE BitContainerType BIT_getLowerBits(BitContainerType bitContainer, U32 const nbBits)
167
163
  {
168
- #if defined(STATIC_BMI2) && STATIC_BMI2 == 1 && !defined(ZSTD_NO_INTRINSICS)
169
- return _bzhi_u64(bitContainer, nbBits);
164
+ #if STATIC_BMI2 && !defined(ZSTD_NO_INTRINSICS)
165
+ # if (defined(__x86_64__) || defined(_M_X64)) && !defined(__ILP32__)
166
+ return _bzhi_u64(bitContainer, nbBits);
167
+ # else
168
+ DEBUG_STATIC_ASSERT(sizeof(bitContainer) == sizeof(U32));
169
+ return _bzhi_u32(bitContainer, nbBits);
170
+ # endif
170
171
  #else
171
172
  assert(nbBits < BIT_MASK_SIZE);
172
173
  return bitContainer & BIT_mask[nbBits];
@@ -177,7 +178,7 @@ FORCE_INLINE_TEMPLATE size_t BIT_getLowerBits(size_t bitContainer, U32 const nbB
177
178
  * can add up to 31 bits into `bitC`.
178
179
  * Note : does not check for register overflow ! */
179
180
  MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
180
- size_t value, unsigned nbBits)
181
+ BitContainerType value, unsigned nbBits)
181
182
  {
182
183
  DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
183
184
  assert(nbBits < BIT_MASK_SIZE);
@@ -190,7 +191,7 @@ MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
190
191
  * works only if `value` is _clean_,
191
192
  * meaning all high bits above nbBits are 0 */
192
193
  MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
193
- size_t value, unsigned nbBits)
194
+ BitContainerType value, unsigned nbBits)
194
195
  {
195
196
  assert((value>>nbBits) == 0);
196
197
  assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
@@ -237,7 +238,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
237
238
  BIT_addBitsFast(bitC, 1, 1); /* endMark */
238
239
  BIT_flushBits(bitC);
239
240
  if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
240
- return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
241
+ return (size_t)(bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
241
242
  }
242
243
 
243
244
 
@@ -298,12 +299,12 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
298
299
  return srcSize;
299
300
  }
300
301
 
301
- FORCE_INLINE_TEMPLATE size_t BIT_getUpperBits(BitContainerType bitContainer, U32 const start)
302
+ FORCE_INLINE_TEMPLATE BitContainerType BIT_getUpperBits(BitContainerType bitContainer, U32 const start)
302
303
  {
303
304
  return bitContainer >> start;
304
305
  }
305
306
 
306
- FORCE_INLINE_TEMPLATE size_t BIT_getMiddleBits(BitContainerType bitContainer, U32 const start, U32 const nbBits)
307
+ FORCE_INLINE_TEMPLATE BitContainerType BIT_getMiddleBits(BitContainerType bitContainer, U32 const start, U32 const nbBits)
307
308
  {
308
309
  U32 const regMask = sizeof(bitContainer)*8 - 1;
309
310
  /* if start > regMask, bitstream is corrupted, and result is undefined */
@@ -313,7 +314,7 @@ FORCE_INLINE_TEMPLATE size_t BIT_getMiddleBits(BitContainerType bitContainer, U3
313
314
  * such cpus old (pre-Haswell, 2013) and their performance is not of that
314
315
  * importance.
315
316
  */
316
- #if defined(__x86_64__) || defined(_M_X86)
317
+ #if defined(__x86_64__) || defined(_M_X64)
317
318
  return (bitContainer >> (start & regMask)) & ((((U64)1) << nbBits) - 1);
318
319
  #else
319
320
  return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
@@ -326,7 +327,7 @@ FORCE_INLINE_TEMPLATE size_t BIT_getMiddleBits(BitContainerType bitContainer, U3
326
327
  * On 32-bits, maxNbBits==24.
327
328
  * On 64-bits, maxNbBits==56.
328
329
  * @return : value extracted */
329
- FORCE_INLINE_TEMPLATE size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
330
+ FORCE_INLINE_TEMPLATE BitContainerType BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
330
331
  {
331
332
  /* arbitrate between double-shift and shift+mask */
332
333
  #if 1
@@ -342,7 +343,7 @@ FORCE_INLINE_TEMPLATE size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits
342
343
 
343
344
  /*! BIT_lookBitsFast() :
344
345
  * unsafe version; only works if nbBits >= 1 */
345
- MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
346
+ MEM_STATIC BitContainerType BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
346
347
  {
347
348
  U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
348
349
  assert(nbBits >= 1);
@@ -358,18 +359,18 @@ FORCE_INLINE_TEMPLATE void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
358
359
  * Read (consume) next n bits from local register and update.
359
360
  * Pay attention to not read more than nbBits contained into local register.
360
361
  * @return : extracted value. */
361
- FORCE_INLINE_TEMPLATE size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
362
+ FORCE_INLINE_TEMPLATE BitContainerType BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
362
363
  {
363
- size_t const value = BIT_lookBits(bitD, nbBits);
364
+ BitContainerType const value = BIT_lookBits(bitD, nbBits);
364
365
  BIT_skipBits(bitD, nbBits);
365
366
  return value;
366
367
  }
367
368
 
368
369
  /*! BIT_readBitsFast() :
369
370
  * unsafe version; only works if nbBits >= 1 */
370
- MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
371
+ MEM_STATIC BitContainerType BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
371
372
  {
372
- size_t const value = BIT_lookBitsFast(bitD, nbBits);
373
+ BitContainerType const value = BIT_lookBitsFast(bitD, nbBits);
373
374
  assert(nbBits >= 1);
374
375
  BIT_skipBits(bitD, nbBits);
375
376
  return value;
@@ -450,8 +451,4 @@ MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
450
451
  return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
451
452
  }
452
453
 
453
- #if defined (__cplusplus)
454
- }
455
- #endif
456
-
457
454
  #endif /* BITSTREAM_H_MODULE */