vibe_zstd 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. checksums.yaml +7 -0
  2. data/.standard.yml +3 -0
  3. data/CHANGELOG.md +22 -0
  4. data/LICENSE.txt +21 -0
  5. data/README.md +978 -0
  6. data/Rakefile +20 -0
  7. data/benchmark/README.md +198 -0
  8. data/benchmark/compression_levels.rb +99 -0
  9. data/benchmark/context_reuse.rb +174 -0
  10. data/benchmark/decompression_speed_by_level.rb +65 -0
  11. data/benchmark/dictionary_training.rb +182 -0
  12. data/benchmark/dictionary_usage.rb +121 -0
  13. data/benchmark/for_readme.rb +157 -0
  14. data/benchmark/generate_fixture.rb +82 -0
  15. data/benchmark/helpers.rb +237 -0
  16. data/benchmark/multithreading.rb +105 -0
  17. data/benchmark/run_all.rb +150 -0
  18. data/benchmark/streaming.rb +154 -0
  19. data/ext/vibe_zstd/Makefile +270 -0
  20. data/ext/vibe_zstd/cctx.c +565 -0
  21. data/ext/vibe_zstd/dctx.c +493 -0
  22. data/ext/vibe_zstd/dict.c +587 -0
  23. data/ext/vibe_zstd/extconf.rb +52 -0
  24. data/ext/vibe_zstd/frames.c +132 -0
  25. data/ext/vibe_zstd/libzstd/LICENSE +30 -0
  26. data/ext/vibe_zstd/libzstd/common/allocations.h +55 -0
  27. data/ext/vibe_zstd/libzstd/common/bits.h +205 -0
  28. data/ext/vibe_zstd/libzstd/common/bitstream.h +454 -0
  29. data/ext/vibe_zstd/libzstd/common/compiler.h +464 -0
  30. data/ext/vibe_zstd/libzstd/common/cpu.h +249 -0
  31. data/ext/vibe_zstd/libzstd/common/debug.c +30 -0
  32. data/ext/vibe_zstd/libzstd/common/debug.h +107 -0
  33. data/ext/vibe_zstd/libzstd/common/entropy_common.c +340 -0
  34. data/ext/vibe_zstd/libzstd/common/error_private.c +64 -0
  35. data/ext/vibe_zstd/libzstd/common/error_private.h +158 -0
  36. data/ext/vibe_zstd/libzstd/common/fse.h +625 -0
  37. data/ext/vibe_zstd/libzstd/common/fse_decompress.c +315 -0
  38. data/ext/vibe_zstd/libzstd/common/huf.h +277 -0
  39. data/ext/vibe_zstd/libzstd/common/mem.h +422 -0
  40. data/ext/vibe_zstd/libzstd/common/pool.c +371 -0
  41. data/ext/vibe_zstd/libzstd/common/pool.h +81 -0
  42. data/ext/vibe_zstd/libzstd/common/portability_macros.h +171 -0
  43. data/ext/vibe_zstd/libzstd/common/threading.c +182 -0
  44. data/ext/vibe_zstd/libzstd/common/threading.h +142 -0
  45. data/ext/vibe_zstd/libzstd/common/xxhash.c +18 -0
  46. data/ext/vibe_zstd/libzstd/common/xxhash.h +7094 -0
  47. data/ext/vibe_zstd/libzstd/common/zstd_common.c +48 -0
  48. data/ext/vibe_zstd/libzstd/common/zstd_deps.h +123 -0
  49. data/ext/vibe_zstd/libzstd/common/zstd_internal.h +324 -0
  50. data/ext/vibe_zstd/libzstd/common/zstd_trace.h +156 -0
  51. data/ext/vibe_zstd/libzstd/compress/clevels.h +134 -0
  52. data/ext/vibe_zstd/libzstd/compress/fse_compress.c +625 -0
  53. data/ext/vibe_zstd/libzstd/compress/hist.c +191 -0
  54. data/ext/vibe_zstd/libzstd/compress/hist.h +82 -0
  55. data/ext/vibe_zstd/libzstd/compress/huf_compress.c +1464 -0
  56. data/ext/vibe_zstd/libzstd/compress/zstd_compress.c +7843 -0
  57. data/ext/vibe_zstd/libzstd/compress/zstd_compress_internal.h +1636 -0
  58. data/ext/vibe_zstd/libzstd/compress/zstd_compress_literals.c +235 -0
  59. data/ext/vibe_zstd/libzstd/compress/zstd_compress_literals.h +39 -0
  60. data/ext/vibe_zstd/libzstd/compress/zstd_compress_sequences.c +442 -0
  61. data/ext/vibe_zstd/libzstd/compress/zstd_compress_sequences.h +55 -0
  62. data/ext/vibe_zstd/libzstd/compress/zstd_compress_superblock.c +688 -0
  63. data/ext/vibe_zstd/libzstd/compress/zstd_compress_superblock.h +32 -0
  64. data/ext/vibe_zstd/libzstd/compress/zstd_cwksp.h +765 -0
  65. data/ext/vibe_zstd/libzstd/compress/zstd_double_fast.c +778 -0
  66. data/ext/vibe_zstd/libzstd/compress/zstd_double_fast.h +42 -0
  67. data/ext/vibe_zstd/libzstd/compress/zstd_fast.c +985 -0
  68. data/ext/vibe_zstd/libzstd/compress/zstd_fast.h +30 -0
  69. data/ext/vibe_zstd/libzstd/compress/zstd_lazy.c +2199 -0
  70. data/ext/vibe_zstd/libzstd/compress/zstd_lazy.h +193 -0
  71. data/ext/vibe_zstd/libzstd/compress/zstd_ldm.c +745 -0
  72. data/ext/vibe_zstd/libzstd/compress/zstd_ldm.h +109 -0
  73. data/ext/vibe_zstd/libzstd/compress/zstd_ldm_geartab.h +106 -0
  74. data/ext/vibe_zstd/libzstd/compress/zstd_opt.c +1580 -0
  75. data/ext/vibe_zstd/libzstd/compress/zstd_opt.h +72 -0
  76. data/ext/vibe_zstd/libzstd/compress/zstd_preSplit.c +238 -0
  77. data/ext/vibe_zstd/libzstd/compress/zstd_preSplit.h +33 -0
  78. data/ext/vibe_zstd/libzstd/compress/zstdmt_compress.c +1923 -0
  79. data/ext/vibe_zstd/libzstd/compress/zstdmt_compress.h +102 -0
  80. data/ext/vibe_zstd/libzstd/decompress/huf_decompress.c +1944 -0
  81. data/ext/vibe_zstd/libzstd/decompress/huf_decompress_amd64.S +602 -0
  82. data/ext/vibe_zstd/libzstd/decompress/zstd_ddict.c +244 -0
  83. data/ext/vibe_zstd/libzstd/decompress/zstd_ddict.h +44 -0
  84. data/ext/vibe_zstd/libzstd/decompress/zstd_decompress.c +2410 -0
  85. data/ext/vibe_zstd/libzstd/decompress/zstd_decompress_block.c +2209 -0
  86. data/ext/vibe_zstd/libzstd/decompress/zstd_decompress_block.h +73 -0
  87. data/ext/vibe_zstd/libzstd/decompress/zstd_decompress_internal.h +240 -0
  88. data/ext/vibe_zstd/libzstd/deprecated/zbuff.h +214 -0
  89. data/ext/vibe_zstd/libzstd/deprecated/zbuff_common.c +26 -0
  90. data/ext/vibe_zstd/libzstd/deprecated/zbuff_compress.c +167 -0
  91. data/ext/vibe_zstd/libzstd/deprecated/zbuff_decompress.c +77 -0
  92. data/ext/vibe_zstd/libzstd/dictBuilder/cover.c +1302 -0
  93. data/ext/vibe_zstd/libzstd/dictBuilder/cover.h +152 -0
  94. data/ext/vibe_zstd/libzstd/dictBuilder/divsufsort.c +1913 -0
  95. data/ext/vibe_zstd/libzstd/dictBuilder/divsufsort.h +57 -0
  96. data/ext/vibe_zstd/libzstd/dictBuilder/fastcover.c +766 -0
  97. data/ext/vibe_zstd/libzstd/dictBuilder/zdict.c +1133 -0
  98. data/ext/vibe_zstd/libzstd/zdict.h +481 -0
  99. data/ext/vibe_zstd/libzstd/zstd.h +3198 -0
  100. data/ext/vibe_zstd/libzstd/zstd_errors.h +107 -0
  101. data/ext/vibe_zstd/streaming.c +410 -0
  102. data/ext/vibe_zstd/vibe_zstd.c +293 -0
  103. data/ext/vibe_zstd/vibe_zstd.h +56 -0
  104. data/ext/vibe_zstd/vibe_zstd_internal.h +27 -0
  105. data/lib/vibe_zstd/constants.rb +67 -0
  106. data/lib/vibe_zstd/version.rb +5 -0
  107. data/lib/vibe_zstd.rb +255 -0
  108. data/sig/vibe_zstd.rbs +76 -0
  109. metadata +179 -0
@@ -0,0 +1,464 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under both the BSD-style license (found in the
6
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
+ * in the COPYING file in the root directory of this source tree).
8
+ * You may select, at your option, one of the above-listed licenses.
9
+ */
10
+
11
+ #ifndef ZSTD_COMPILER_H
12
+ #define ZSTD_COMPILER_H
13
+
14
+ #include <stddef.h>
15
+
16
+ #include "portability_macros.h"
17
+
18
+ /*-*******************************************************
19
+ * Compiler specifics
20
+ *********************************************************/
21
+ /* force inlining */
22
+
23
+ #if !defined(ZSTD_NO_INLINE)
24
+ #if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
25
+ # define INLINE_KEYWORD inline
26
+ #else
27
+ # define INLINE_KEYWORD
28
+ #endif
29
+
30
+ #if defined(__GNUC__) || defined(__IAR_SYSTEMS_ICC__)
31
+ # define FORCE_INLINE_ATTR __attribute__((always_inline))
32
+ #elif defined(_MSC_VER)
33
+ # define FORCE_INLINE_ATTR __forceinline
34
+ #else
35
+ # define FORCE_INLINE_ATTR
36
+ #endif
37
+
38
+ #else
39
+
40
+ #define INLINE_KEYWORD
41
+ #define FORCE_INLINE_ATTR
42
+
43
+ #endif
44
+
45
+ /**
46
+ On MSVC qsort requires that functions passed into it use the __cdecl calling conversion(CC).
47
+ This explicitly marks such functions as __cdecl so that the code will still compile
48
+ if a CC other than __cdecl has been made the default.
49
+ */
50
+ #if defined(_MSC_VER)
51
+ # define WIN_CDECL __cdecl
52
+ #else
53
+ # define WIN_CDECL
54
+ #endif
55
+
56
+ /* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
57
+ #if defined(__GNUC__) || defined(__IAR_SYSTEMS_ICC__)
58
+ # define UNUSED_ATTR __attribute__((unused))
59
+ #else
60
+ # define UNUSED_ATTR
61
+ #endif
62
+
63
+ /**
64
+ * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
65
+ * parameters. They must be inlined for the compiler to eliminate the constant
66
+ * branches.
67
+ */
68
+ #define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR UNUSED_ATTR
69
+ /**
70
+ * HINT_INLINE is used to help the compiler generate better code. It is *not*
71
+ * used for "templates", so it can be tweaked based on the compilers
72
+ * performance.
73
+ *
74
+ * gcc-4.8 and gcc-4.9 have been shown to benefit from leaving off the
75
+ * always_inline attribute.
76
+ *
77
+ * clang up to 5.0.0 (trunk) benefit tremendously from the always_inline
78
+ * attribute.
79
+ */
80
+ #if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5
81
+ # define HINT_INLINE static INLINE_KEYWORD
82
+ #else
83
+ # define HINT_INLINE FORCE_INLINE_TEMPLATE
84
+ #endif
85
+
86
+ /* "soft" inline :
87
+ * The compiler is free to select if it's a good idea to inline or not.
88
+ * The main objective is to silence compiler warnings
89
+ * when a defined function in included but not used.
90
+ *
91
+ * Note : this macro is prefixed `MEM_` because it used to be provided by `mem.h` unit.
92
+ * Updating the prefix is probably preferable, but requires a fairly large codemod,
93
+ * since this name is used everywhere.
94
+ */
95
+ #ifndef MEM_STATIC /* already defined in Linux Kernel mem.h */
96
+ #if defined(__GNUC__)
97
+ # define MEM_STATIC static __inline UNUSED_ATTR
98
+ #elif defined(__IAR_SYSTEMS_ICC__)
99
+ # define MEM_STATIC static inline UNUSED_ATTR
100
+ #elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
101
+ # define MEM_STATIC static inline
102
+ #elif defined(_MSC_VER)
103
+ # define MEM_STATIC static __inline
104
+ #else
105
+ # define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */
106
+ #endif
107
+ #endif
108
+
109
+ /* force no inlining */
110
+ #ifdef _MSC_VER
111
+ # define FORCE_NOINLINE static __declspec(noinline)
112
+ #else
113
+ # if defined(__GNUC__) || defined(__IAR_SYSTEMS_ICC__)
114
+ # define FORCE_NOINLINE static __attribute__((__noinline__))
115
+ # else
116
+ # define FORCE_NOINLINE static
117
+ # endif
118
+ #endif
119
+
120
+
121
+ /* target attribute */
122
+ #if defined(__GNUC__) || defined(__IAR_SYSTEMS_ICC__)
123
+ # define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
124
+ #else
125
+ # define TARGET_ATTRIBUTE(target)
126
+ #endif
127
+
128
+ /* Target attribute for BMI2 dynamic dispatch.
129
+ * Enable lzcnt, bmi, and bmi2.
130
+ * We test for bmi1 & bmi2. lzcnt is included in bmi1.
131
+ */
132
+ #define BMI2_TARGET_ATTRIBUTE TARGET_ATTRIBUTE("lzcnt,bmi,bmi2")
133
+
134
+ /* prefetch
135
+ * can be disabled, by declaring NO_PREFETCH build macro */
136
+ #if defined(NO_PREFETCH)
137
+ # define PREFETCH_L1(ptr) do { (void)(ptr); } while (0) /* disabled */
138
+ # define PREFETCH_L2(ptr) do { (void)(ptr); } while (0) /* disabled */
139
+ #else
140
+ # if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) && !defined(_M_ARM64EC) /* _mm_prefetch() is not defined outside of x86/x64 */
141
+ # include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
142
+ # define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
143
+ # define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
144
+ # elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
145
+ # define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
146
+ # define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
147
+ # elif defined(__aarch64__)
148
+ # define PREFETCH_L1(ptr) do { __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))); } while (0)
149
+ # define PREFETCH_L2(ptr) do { __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))); } while (0)
150
+ # else
151
+ # define PREFETCH_L1(ptr) do { (void)(ptr); } while (0) /* disabled */
152
+ # define PREFETCH_L2(ptr) do { (void)(ptr); } while (0) /* disabled */
153
+ # endif
154
+ #endif /* NO_PREFETCH */
155
+
156
+ #define CACHELINE_SIZE 64
157
+
158
+ #define PREFETCH_AREA(p, s) \
159
+ do { \
160
+ const char* const _ptr = (const char*)(p); \
161
+ size_t const _size = (size_t)(s); \
162
+ size_t _pos; \
163
+ for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \
164
+ PREFETCH_L2(_ptr + _pos); \
165
+ } \
166
+ } while (0)
167
+
168
+ /* vectorization
169
+ * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax,
170
+ * and some compilers, like Intel ICC and MCST LCC, do not support it at all. */
171
+ #if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__) && !defined(__LCC__)
172
+ # if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5)
173
+ # define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
174
+ # else
175
+ # define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")")
176
+ # endif
177
+ #else
178
+ # define DONT_VECTORIZE
179
+ #endif
180
+
181
+ /* Tell the compiler that a branch is likely or unlikely.
182
+ * Only use these macros if it causes the compiler to generate better code.
183
+ * If you can remove a LIKELY/UNLIKELY annotation without speed changes in gcc
184
+ * and clang, please do.
185
+ */
186
+ #if defined(__GNUC__)
187
+ #define LIKELY(x) (__builtin_expect((x), 1))
188
+ #define UNLIKELY(x) (__builtin_expect((x), 0))
189
+ #else
190
+ #define LIKELY(x) (x)
191
+ #define UNLIKELY(x) (x)
192
+ #endif
193
+
194
+ #if __has_builtin(__builtin_unreachable) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)))
195
+ # define ZSTD_UNREACHABLE do { assert(0), __builtin_unreachable(); } while (0)
196
+ #else
197
+ # define ZSTD_UNREACHABLE do { assert(0); } while (0)
198
+ #endif
199
+
200
+ /* disable warnings */
201
+ #ifdef _MSC_VER /* Visual Studio */
202
+ # include <intrin.h> /* For Visual 2005 */
203
+ # pragma warning(disable : 4100) /* disable: C4100: unreferenced formal parameter */
204
+ # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
205
+ # pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */
206
+ # pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */
207
+ # pragma warning(disable : 4324) /* disable: C4324: padded structure */
208
+ #endif
209
+
210
+ /* compile time determination of SIMD support */
211
+ #if !defined(ZSTD_NO_INTRINSICS)
212
+ # if defined(__AVX2__)
213
+ # define ZSTD_ARCH_X86_AVX2
214
+ # endif
215
+ # if defined(__SSE2__) || defined(_M_X64) || (defined (_M_IX86) && defined(_M_IX86_FP) && (_M_IX86_FP >= 2))
216
+ # define ZSTD_ARCH_X86_SSE2
217
+ # endif
218
+ # if defined(__ARM_NEON) || defined(_M_ARM64)
219
+ # define ZSTD_ARCH_ARM_NEON
220
+ # endif
221
+ #
222
+ # if defined(ZSTD_ARCH_X86_AVX2)
223
+ # include <immintrin.h>
224
+ # endif
225
+ # if defined(ZSTD_ARCH_X86_SSE2)
226
+ # include <emmintrin.h>
227
+ # elif defined(ZSTD_ARCH_ARM_NEON)
228
+ # include <arm_neon.h>
229
+ # endif
230
+ #endif
231
+
232
+ /* C-language Attributes are added in C23. */
233
+ #if defined(__STDC_VERSION__) && (__STDC_VERSION__ > 201710L) && defined(__has_c_attribute)
234
+ # define ZSTD_HAS_C_ATTRIBUTE(x) __has_c_attribute(x)
235
+ #else
236
+ # define ZSTD_HAS_C_ATTRIBUTE(x) 0
237
+ #endif
238
+
239
+ /* Only use C++ attributes in C++. Some compilers report support for C++
240
+ * attributes when compiling with C.
241
+ */
242
+ #if defined(__cplusplus) && defined(__has_cpp_attribute)
243
+ # define ZSTD_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
244
+ #else
245
+ # define ZSTD_HAS_CPP_ATTRIBUTE(x) 0
246
+ #endif
247
+
248
+ /* Define ZSTD_FALLTHROUGH macro for annotating switch case with the 'fallthrough' attribute.
249
+ * - C23: https://en.cppreference.com/w/c/language/attributes/fallthrough
250
+ * - CPP17: https://en.cppreference.com/w/cpp/language/attributes/fallthrough
251
+ * - Else: __attribute__((__fallthrough__))
252
+ */
253
+ #ifndef ZSTD_FALLTHROUGH
254
+ # if ZSTD_HAS_C_ATTRIBUTE(fallthrough)
255
+ # define ZSTD_FALLTHROUGH [[fallthrough]]
256
+ # elif ZSTD_HAS_CPP_ATTRIBUTE(fallthrough)
257
+ # define ZSTD_FALLTHROUGH [[fallthrough]]
258
+ # elif __has_attribute(__fallthrough__)
259
+ /* Leading semicolon is to satisfy gcc-11 with -pedantic. Without the semicolon
260
+ * gcc complains about: a label can only be part of a statement and a declaration is not a statement.
261
+ */
262
+ # define ZSTD_FALLTHROUGH ; __attribute__((__fallthrough__))
263
+ # else
264
+ # define ZSTD_FALLTHROUGH
265
+ # endif
266
+ #endif
267
+
268
+ /*-**************************************************************
269
+ * Alignment
270
+ *****************************************************************/
271
+
272
+ /* @return 1 if @u is a 2^n value, 0 otherwise
273
+ * useful to check a value is valid for alignment restrictions */
274
+ MEM_STATIC int ZSTD_isPower2(size_t u) {
275
+ return (u & (u-1)) == 0;
276
+ }
277
+
278
+ /* this test was initially positioned in mem.h,
279
+ * but this file is removed (or replaced) for linux kernel
280
+ * so it's now hosted in compiler.h,
281
+ * which remains valid for both user & kernel spaces.
282
+ */
283
+
284
+ #ifndef ZSTD_ALIGNOF
285
+ # if defined(__GNUC__) || defined(_MSC_VER)
286
+ /* covers gcc, clang & MSVC */
287
+ /* note : this section must come first, before C11,
288
+ * due to a limitation in the kernel source generator */
289
+ # define ZSTD_ALIGNOF(T) __alignof(T)
290
+
291
+ # elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)
292
+ /* C11 support */
293
+ # include <stdalign.h>
294
+ # define ZSTD_ALIGNOF(T) alignof(T)
295
+
296
+ # else
297
+ /* No known support for alignof() - imperfect backup */
298
+ # define ZSTD_ALIGNOF(T) (sizeof(void*) < sizeof(T) ? sizeof(void*) : sizeof(T))
299
+
300
+ # endif
301
+ #endif /* ZSTD_ALIGNOF */
302
+
303
+ #ifndef ZSTD_ALIGNED
304
+ /* C90-compatible alignment macro (GCC/Clang). Adjust for other compilers if needed. */
305
+ # if defined(__GNUC__) || defined(__clang__)
306
+ # define ZSTD_ALIGNED(a) __attribute__((aligned(a)))
307
+ # elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11 */
308
+ # define ZSTD_ALIGNED(a) _Alignas(a)
309
+ #elif defined(_MSC_VER)
310
+ # define ZSTD_ALIGNED(n) __declspec(align(n))
311
+ # else
312
+ /* this compiler will require its own alignment instruction */
313
+ # define ZSTD_ALIGNED(...)
314
+ # endif
315
+ #endif /* ZSTD_ALIGNED */
316
+
317
+
318
+ /*-**************************************************************
319
+ * Sanitizer
320
+ *****************************************************************/
321
+
322
+ /**
323
+ * Zstd relies on pointer overflow in its decompressor.
324
+ * We add this attribute to functions that rely on pointer overflow.
325
+ */
326
+ #ifndef ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
327
+ # if __has_attribute(no_sanitize)
328
+ # if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 8
329
+ /* gcc < 8 only has signed-integer-overlow which triggers on pointer overflow */
330
+ # define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR __attribute__((no_sanitize("signed-integer-overflow")))
331
+ # else
332
+ /* older versions of clang [3.7, 5.0) will warn that pointer-overflow is ignored. */
333
+ # define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR __attribute__((no_sanitize("pointer-overflow")))
334
+ # endif
335
+ # else
336
+ # define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
337
+ # endif
338
+ #endif
339
+
340
+ /**
341
+ * Helper function to perform a wrapped pointer difference without triggering
342
+ * UBSAN.
343
+ *
344
+ * @returns lhs - rhs with wrapping
345
+ */
346
+ MEM_STATIC
347
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
348
+ ptrdiff_t ZSTD_wrappedPtrDiff(unsigned char const* lhs, unsigned char const* rhs)
349
+ {
350
+ return lhs - rhs;
351
+ }
352
+
353
+ /**
354
+ * Helper function to perform a wrapped pointer add without triggering UBSAN.
355
+ *
356
+ * @return ptr + add with wrapping
357
+ */
358
+ MEM_STATIC
359
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
360
+ unsigned char const* ZSTD_wrappedPtrAdd(unsigned char const* ptr, ptrdiff_t add)
361
+ {
362
+ return ptr + add;
363
+ }
364
+
365
+ /**
366
+ * Helper function to perform a wrapped pointer subtraction without triggering
367
+ * UBSAN.
368
+ *
369
+ * @return ptr - sub with wrapping
370
+ */
371
+ MEM_STATIC
372
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
373
+ unsigned char const* ZSTD_wrappedPtrSub(unsigned char const* ptr, ptrdiff_t sub)
374
+ {
375
+ return ptr - sub;
376
+ }
377
+
378
+ /**
379
+ * Helper function to add to a pointer that works around C's undefined behavior
380
+ * of adding 0 to NULL.
381
+ *
382
+ * @returns `ptr + add` except it defines `NULL + 0 == NULL`.
383
+ */
384
+ MEM_STATIC
385
+ unsigned char* ZSTD_maybeNullPtrAdd(unsigned char* ptr, ptrdiff_t add)
386
+ {
387
+ return add > 0 ? ptr + add : ptr;
388
+ }
389
+
390
+ /* Issue #3240 reports an ASAN failure on an llvm-mingw build. Out of an
391
+ * abundance of caution, disable our custom poisoning on mingw. */
392
+ #ifdef __MINGW32__
393
+ #ifndef ZSTD_ASAN_DONT_POISON_WORKSPACE
394
+ #define ZSTD_ASAN_DONT_POISON_WORKSPACE 1
395
+ #endif
396
+ #ifndef ZSTD_MSAN_DONT_POISON_WORKSPACE
397
+ #define ZSTD_MSAN_DONT_POISON_WORKSPACE 1
398
+ #endif
399
+ #endif
400
+
401
+ #if ZSTD_MEMORY_SANITIZER && !defined(ZSTD_MSAN_DONT_POISON_WORKSPACE)
402
+ /* Not all platforms that support msan provide sanitizers/msan_interface.h.
403
+ * We therefore declare the functions we need ourselves, rather than trying to
404
+ * include the header file... */
405
+ #include <stddef.h> /* size_t */
406
+ #define ZSTD_DEPS_NEED_STDINT
407
+ #include "zstd_deps.h" /* intptr_t */
408
+
409
+ /* Make memory region fully initialized (without changing its contents). */
410
+ void __msan_unpoison(const volatile void *a, size_t size);
411
+
412
+ /* Make memory region fully uninitialized (without changing its contents).
413
+ This is a legacy interface that does not update origin information. Use
414
+ __msan_allocated_memory() instead. */
415
+ void __msan_poison(const volatile void *a, size_t size);
416
+
417
+ /* Returns the offset of the first (at least partially) poisoned byte in the
418
+ memory range, or -1 if the whole range is good. */
419
+ intptr_t __msan_test_shadow(const volatile void *x, size_t size);
420
+
421
+ /* Print shadow and origin for the memory range to stderr in a human-readable
422
+ format. */
423
+ void __msan_print_shadow(const volatile void *x, size_t size);
424
+ #endif
425
+
426
+ #if ZSTD_ADDRESS_SANITIZER && !defined(ZSTD_ASAN_DONT_POISON_WORKSPACE)
427
+ /* Not all platforms that support asan provide sanitizers/asan_interface.h.
428
+ * We therefore declare the functions we need ourselves, rather than trying to
429
+ * include the header file... */
430
+ #include <stddef.h> /* size_t */
431
+
432
+ /**
433
+ * Marks a memory region (<c>[addr, addr+size)</c>) as unaddressable.
434
+ *
435
+ * This memory must be previously allocated by your program. Instrumented
436
+ * code is forbidden from accessing addresses in this region until it is
437
+ * unpoisoned. This function is not guaranteed to poison the entire region -
438
+ * it could poison only a subregion of <c>[addr, addr+size)</c> due to ASan
439
+ * alignment restrictions.
440
+ *
441
+ * \note This function is not thread-safe because no two threads can poison or
442
+ * unpoison memory in the same memory region simultaneously.
443
+ *
444
+ * \param addr Start of memory region.
445
+ * \param size Size of memory region. */
446
+ void __asan_poison_memory_region(void const volatile *addr, size_t size);
447
+
448
+ /**
449
+ * Marks a memory region (<c>[addr, addr+size)</c>) as addressable.
450
+ *
451
+ * This memory must be previously allocated by your program. Accessing
452
+ * addresses in this region is allowed until this region is poisoned again.
453
+ * This function could unpoison a super-region of <c>[addr, addr+size)</c> due
454
+ * to ASan alignment restrictions.
455
+ *
456
+ * \note This function is not thread-safe because no two threads can
457
+ * poison or unpoison memory in the same memory region simultaneously.
458
+ *
459
+ * \param addr Start of memory region.
460
+ * \param size Size of memory region. */
461
+ void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
462
+ #endif
463
+
464
+ #endif /* ZSTD_COMPILER_H */
@@ -0,0 +1,249 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under both the BSD-style license (found in the
6
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
+ * in the COPYING file in the root directory of this source tree).
8
+ * You may select, at your option, one of the above-listed licenses.
9
+ */
10
+
11
+ #ifndef ZSTD_COMMON_CPU_H
12
+ #define ZSTD_COMMON_CPU_H
13
+
14
+ /**
15
+ * Implementation taken from folly/CpuId.h
16
+ * https://github.com/facebook/folly/blob/master/folly/CpuId.h
17
+ */
18
+
19
+ #include "mem.h"
20
+
21
+ #ifdef _MSC_VER
22
+ #include <intrin.h>
23
+ #endif
24
+
25
+ typedef struct {
26
+ U32 f1c;
27
+ U32 f1d;
28
+ U32 f7b;
29
+ U32 f7c;
30
+ } ZSTD_cpuid_t;
31
+
32
+ MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
33
+ U32 f1c = 0;
34
+ U32 f1d = 0;
35
+ U32 f7b = 0;
36
+ U32 f7c = 0;
37
+ #if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
38
+ #if !defined(_M_X64) || !defined(__clang__) || __clang_major__ >= 16
39
+ int reg[4];
40
+ __cpuid((int*)reg, 0);
41
+ {
42
+ int const n = reg[0];
43
+ if (n >= 1) {
44
+ __cpuid((int*)reg, 1);
45
+ f1c = (U32)reg[2];
46
+ f1d = (U32)reg[3];
47
+ }
48
+ if (n >= 7) {
49
+ __cpuidex((int*)reg, 7, 0);
50
+ f7b = (U32)reg[1];
51
+ f7c = (U32)reg[2];
52
+ }
53
+ }
54
+ #else
55
+ /* Clang compiler has a bug (fixed in https://reviews.llvm.org/D101338) in
56
+ * which the `__cpuid` intrinsic does not save and restore `rbx` as it needs
57
+ * to due to being a reserved register. So in that case, do the `cpuid`
58
+ * ourselves. Clang supports inline assembly anyway.
59
+ */
60
+ U32 n;
61
+ __asm__(
62
+ "pushq %%rbx\n\t"
63
+ "cpuid\n\t"
64
+ "popq %%rbx\n\t"
65
+ : "=a"(n)
66
+ : "a"(0)
67
+ : "rcx", "rdx");
68
+ if (n >= 1) {
69
+ U32 f1a;
70
+ __asm__(
71
+ "pushq %%rbx\n\t"
72
+ "cpuid\n\t"
73
+ "popq %%rbx\n\t"
74
+ : "=a"(f1a), "=c"(f1c), "=d"(f1d)
75
+ : "a"(1)
76
+ :);
77
+ }
78
+ if (n >= 7) {
79
+ __asm__(
80
+ "pushq %%rbx\n\t"
81
+ "cpuid\n\t"
82
+ "movq %%rbx, %%rax\n\t"
83
+ "popq %%rbx"
84
+ : "=a"(f7b), "=c"(f7c)
85
+ : "a"(7), "c"(0)
86
+ : "rdx");
87
+ }
88
+ #endif
89
+ #elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__)
90
+ /* The following block like the normal cpuid branch below, but gcc
91
+ * reserves ebx for use of its pic register so we must specially
92
+ * handle the save and restore to avoid clobbering the register
93
+ */
94
+ U32 n;
95
+ __asm__(
96
+ "pushl %%ebx\n\t"
97
+ "cpuid\n\t"
98
+ "popl %%ebx\n\t"
99
+ : "=a"(n)
100
+ : "a"(0)
101
+ : "ecx", "edx");
102
+ if (n >= 1) {
103
+ U32 f1a;
104
+ __asm__(
105
+ "pushl %%ebx\n\t"
106
+ "cpuid\n\t"
107
+ "popl %%ebx\n\t"
108
+ : "=a"(f1a), "=c"(f1c), "=d"(f1d)
109
+ : "a"(1));
110
+ }
111
+ if (n >= 7) {
112
+ __asm__(
113
+ "pushl %%ebx\n\t"
114
+ "cpuid\n\t"
115
+ "movl %%ebx, %%eax\n\t"
116
+ "popl %%ebx"
117
+ : "=a"(f7b), "=c"(f7c)
118
+ : "a"(7), "c"(0)
119
+ : "edx");
120
+ }
121
+ #elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__)
122
+ U32 n;
123
+ __asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx");
124
+ if (n >= 1) {
125
+ U32 f1a;
126
+ __asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx");
127
+ }
128
+ if (n >= 7) {
129
+ U32 f7a;
130
+ __asm__("cpuid"
131
+ : "=a"(f7a), "=b"(f7b), "=c"(f7c)
132
+ : "a"(7), "c"(0)
133
+ : "edx");
134
+ }
135
+ #endif
136
+ {
137
+ ZSTD_cpuid_t cpuid;
138
+ cpuid.f1c = f1c;
139
+ cpuid.f1d = f1d;
140
+ cpuid.f7b = f7b;
141
+ cpuid.f7c = f7c;
142
+ return cpuid;
143
+ }
144
+ }
145
+
146
+ #define X(name, r, bit) \
147
+ MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) { \
148
+ return ((cpuid.r) & (1U << bit)) != 0; \
149
+ }
150
+
151
+ /* cpuid(1): Processor Info and Feature Bits. */
152
+ #define C(name, bit) X(name, f1c, bit)
153
+ C(sse3, 0)
154
+ C(pclmuldq, 1)
155
+ C(dtes64, 2)
156
+ C(monitor, 3)
157
+ C(dscpl, 4)
158
+ C(vmx, 5)
159
+ C(smx, 6)
160
+ C(eist, 7)
161
+ C(tm2, 8)
162
+ C(ssse3, 9)
163
+ C(cnxtid, 10)
164
+ C(fma, 12)
165
+ C(cx16, 13)
166
+ C(xtpr, 14)
167
+ C(pdcm, 15)
168
+ C(pcid, 17)
169
+ C(dca, 18)
170
+ C(sse41, 19)
171
+ C(sse42, 20)
172
+ C(x2apic, 21)
173
+ C(movbe, 22)
174
+ C(popcnt, 23)
175
+ C(tscdeadline, 24)
176
+ C(aes, 25)
177
+ C(xsave, 26)
178
+ C(osxsave, 27)
179
+ C(avx, 28)
180
+ C(f16c, 29)
181
+ C(rdrand, 30)
182
+ #undef C
183
+ #define D(name, bit) X(name, f1d, bit)
184
+ D(fpu, 0)
185
+ D(vme, 1)
186
+ D(de, 2)
187
+ D(pse, 3)
188
+ D(tsc, 4)
189
+ D(msr, 5)
190
+ D(pae, 6)
191
+ D(mce, 7)
192
+ D(cx8, 8)
193
+ D(apic, 9)
194
+ D(sep, 11)
195
+ D(mtrr, 12)
196
+ D(pge, 13)
197
+ D(mca, 14)
198
+ D(cmov, 15)
199
+ D(pat, 16)
200
+ D(pse36, 17)
201
+ D(psn, 18)
202
+ D(clfsh, 19)
203
+ D(ds, 21)
204
+ D(acpi, 22)
205
+ D(mmx, 23)
206
+ D(fxsr, 24)
207
+ D(sse, 25)
208
+ D(sse2, 26)
209
+ D(ss, 27)
210
+ D(htt, 28)
211
+ D(tm, 29)
212
+ D(pbe, 31)
213
+ #undef D
214
+
215
+ /* cpuid(7): Extended Features. */
216
+ #define B(name, bit) X(name, f7b, bit)
217
+ B(bmi1, 3)
218
+ B(hle, 4)
219
+ B(avx2, 5)
220
+ B(smep, 7)
221
+ B(bmi2, 8)
222
+ B(erms, 9)
223
+ B(invpcid, 10)
224
+ B(rtm, 11)
225
+ B(mpx, 14)
226
+ B(avx512f, 16)
227
+ B(avx512dq, 17)
228
+ B(rdseed, 18)
229
+ B(adx, 19)
230
+ B(smap, 20)
231
+ B(avx512ifma, 21)
232
+ B(pcommit, 22)
233
+ B(clflushopt, 23)
234
+ B(clwb, 24)
235
+ B(avx512pf, 26)
236
+ B(avx512er, 27)
237
+ B(avx512cd, 28)
238
+ B(sha, 29)
239
+ B(avx512bw, 30)
240
+ B(avx512vl, 31)
241
+ #undef B
242
+ #define C(name, bit) X(name, f7c, bit)
243
+ C(prefetchwt1, 0)
244
+ C(avx512vbmi, 1)
245
+ #undef C
246
+
247
+ #undef X
248
+
249
+ #endif /* ZSTD_COMMON_CPU_H */