zstd-ruby 1.4.2.0 → 1.4.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/ext/zstdruby/libzstd/Makefile +0 -2
- data/ext/zstdruby/libzstd/README.md +13 -2
- data/ext/zstdruby/libzstd/common/bitstream.h +7 -2
- data/ext/zstdruby/libzstd/common/compiler.h +17 -5
- data/ext/zstdruby/libzstd/common/fse.h +1 -1
- data/ext/zstdruby/libzstd/common/fse_decompress.c +2 -0
- data/ext/zstdruby/libzstd/common/mem.h +74 -1
- data/ext/zstdruby/libzstd/common/pool.c +7 -3
- data/ext/zstdruby/libzstd/common/threading.c +46 -1
- data/ext/zstdruby/libzstd/common/threading.h +32 -1
- data/ext/zstdruby/libzstd/common/xxhash.c +8 -2
- data/ext/zstdruby/libzstd/common/zstd_internal.h +37 -58
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +644 -445
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +98 -26
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +10 -5
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +3 -3
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +535 -0
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +11 -12
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +38 -45
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +35 -31
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +4 -4
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +6 -6
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +32 -26
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +2 -0
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +16 -17
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +149 -148
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +6 -5
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +7 -8
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +2 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +2 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +6 -2
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +1 -1
- data/ext/zstdruby/libzstd/libzstd.pc.in +3 -2
- data/ext/zstdruby/libzstd/zstd.h +170 -66
- data/lib/zstd-ruby/version.rb +1 -1
- data/zstd-ruby.gemspec +1 -1
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 53be8076094a9ed214cbc86a174eeb3b587dc3d4781f7fbcf8ee280ffd0ca169
|
4
|
+
data.tar.gz: ebf9bcf8d062447dab0589c1acfe63f2481f9445c78f51653daf1f60750dfdd5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7f3ac7650f5ec553aea615d60737bc092bca9d06c998eecc0570b5d542344f4cfc703335154e845c53927878d424ca244fa494f4bddb3e59f01bbd6111de5172
|
7
|
+
data.tar.gz: b96f8df37df844f461fab2cd1b60fe9b6a729a1a411f62f1dcef34c9684672ebd7c42963d79abdf5fe1e8d5d443104509492fe9128a7dbf3773e9db180592cbd
|
data/README.md
CHANGED
@@ -27,10 +27,10 @@ Enabling multithreading requires 2 conditions :
|
|
27
27
|
Both conditions are automatically applied when invoking `make lib-mt` target.
|
28
28
|
|
29
29
|
When linking a POSIX program with a multithreaded version of `libzstd`,
|
30
|
-
note that it's necessary to
|
30
|
+
note that it's necessary to invoke the `-pthread` flag during link stage.
|
31
31
|
|
32
32
|
Multithreading capabilities are exposed
|
33
|
-
via the [advanced API defined in `lib/zstd.h`](https://github.com/facebook/zstd/blob/v1.3
|
33
|
+
via the [advanced API defined in `lib/zstd.h`](https://github.com/facebook/zstd/blob/v1.4.3/lib/zstd.h#L351).
|
34
34
|
|
35
35
|
|
36
36
|
#### API
|
@@ -112,6 +112,17 @@ The file structure is designed to make this selection manually achievable for an
|
|
112
112
|
will expose the deprecated `ZSTDMT` API exposed by `zstdmt_compress.h` in
|
113
113
|
the shared library, which is now hidden by default.
|
114
114
|
|
115
|
+
- The build macro `DYNAMIC_BMI2` can be set to 1 or 0 in order to generate binaries
|
116
|
+
which can detect at runtime the presence of BMI2 instructions, and use them only if present.
|
117
|
+
These instructions contribute to better performance, notably on the decoder side.
|
118
|
+
By default, this feature is automatically enabled on detecting
|
119
|
+
the right instruction set (x64) and compiler (clang or gcc >= 5).
|
120
|
+
It's obviously disabled for different cpus,
|
121
|
+
or when BMI2 instruction set is _required_ by the compiler command line
|
122
|
+
(in this case, only the BMI2 code path is generated).
|
123
|
+
Setting this macro will either force to generate the BMI2 dispatcher (1)
|
124
|
+
or prevent it (0). It overrides automatic detection.
|
125
|
+
|
115
126
|
|
116
127
|
#### Windows : using MinGW+MSYS to create DLL
|
117
128
|
|
@@ -57,6 +57,8 @@ extern "C" {
|
|
57
57
|
=========================================*/
|
58
58
|
#if defined(__BMI__) && defined(__GNUC__)
|
59
59
|
# include <immintrin.h> /* support for bextr (experimental) */
|
60
|
+
#elif defined(__ICCARM__)
|
61
|
+
# include <intrinsics.h>
|
60
62
|
#endif
|
61
63
|
|
62
64
|
#define STREAM_ACCUMULATOR_MIN_32 25
|
@@ -162,7 +164,9 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val)
|
|
162
164
|
_BitScanReverse ( &r, val );
|
163
165
|
return (unsigned) r;
|
164
166
|
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
|
165
|
-
return
|
167
|
+
return __builtin_clz (val) ^ 31;
|
168
|
+
# elif defined(__ICCARM__) /* IAR Intrinsic */
|
169
|
+
return 31 - __CLZ(val);
|
166
170
|
# else /* Software version */
|
167
171
|
static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29,
|
168
172
|
11, 14, 16, 18, 22, 25, 3, 30,
|
@@ -240,9 +244,9 @@ MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
|
|
240
244
|
{
|
241
245
|
size_t const nbBytes = bitC->bitPos >> 3;
|
242
246
|
assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
|
247
|
+
assert(bitC->ptr <= bitC->endPtr);
|
243
248
|
MEM_writeLEST(bitC->ptr, bitC->bitContainer);
|
244
249
|
bitC->ptr += nbBytes;
|
245
|
-
assert(bitC->ptr <= bitC->endPtr);
|
246
250
|
bitC->bitPos &= 7;
|
247
251
|
bitC->bitContainer >>= nbBytes*8;
|
248
252
|
}
|
@@ -256,6 +260,7 @@ MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
|
|
256
260
|
{
|
257
261
|
size_t const nbBytes = bitC->bitPos >> 3;
|
258
262
|
assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
|
263
|
+
assert(bitC->ptr <= bitC->endPtr);
|
259
264
|
MEM_writeLEST(bitC->ptr, bitC->bitContainer);
|
260
265
|
bitC->ptr += nbBytes;
|
261
266
|
if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
|
@@ -23,7 +23,7 @@
|
|
23
23
|
# define INLINE_KEYWORD
|
24
24
|
#endif
|
25
25
|
|
26
|
-
#if defined(__GNUC__)
|
26
|
+
#if defined(__GNUC__) || defined(__ICCARM__)
|
27
27
|
# define FORCE_INLINE_ATTR __attribute__((always_inline))
|
28
28
|
#elif defined(_MSC_VER)
|
29
29
|
# define FORCE_INLINE_ATTR __forceinline
|
@@ -61,11 +61,18 @@
|
|
61
61
|
# define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR
|
62
62
|
#endif
|
63
63
|
|
64
|
+
/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
|
65
|
+
#if defined(__GNUC__)
|
66
|
+
# define UNUSED_ATTR __attribute__((unused))
|
67
|
+
#else
|
68
|
+
# define UNUSED_ATTR
|
69
|
+
#endif
|
70
|
+
|
64
71
|
/* force no inlining */
|
65
72
|
#ifdef _MSC_VER
|
66
73
|
# define FORCE_NOINLINE static __declspec(noinline)
|
67
74
|
#else
|
68
|
-
#
|
75
|
+
# if defined(__GNUC__) || defined(__ICCARM__)
|
69
76
|
# define FORCE_NOINLINE static __attribute__((__noinline__))
|
70
77
|
# else
|
71
78
|
# define FORCE_NOINLINE static
|
@@ -76,7 +83,7 @@
|
|
76
83
|
#ifndef __has_attribute
|
77
84
|
#define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */
|
78
85
|
#endif
|
79
|
-
#if defined(__GNUC__)
|
86
|
+
#if defined(__GNUC__) || defined(__ICCARM__)
|
80
87
|
# define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
|
81
88
|
#else
|
82
89
|
# define TARGET_ATTRIBUTE(target)
|
@@ -127,9 +134,14 @@
|
|
127
134
|
} \
|
128
135
|
}
|
129
136
|
|
130
|
-
/* vectorization
|
137
|
+
/* vectorization
|
138
|
+
* older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */
|
131
139
|
#if !defined(__clang__) && defined(__GNUC__)
|
132
|
-
#
|
140
|
+
# if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5)
|
141
|
+
# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
|
142
|
+
# else
|
143
|
+
# define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")")
|
144
|
+
# endif
|
133
145
|
#else
|
134
146
|
# define DONT_VECTORIZE
|
135
147
|
#endif
|
@@ -308,7 +308,7 @@ If there is an error, the function will return an error code, which can be teste
|
|
308
308
|
*******************************************/
|
309
309
|
/* FSE buffer bounds */
|
310
310
|
#define FSE_NCOUNTBOUND 512
|
311
|
-
#define FSE_BLOCKBOUND(size) (size + (size>>7))
|
311
|
+
#define FSE_BLOCKBOUND(size) (size + (size>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
|
312
312
|
#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
|
313
313
|
|
314
314
|
/* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */
|
@@ -52,7 +52,9 @@
|
|
52
52
|
#define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */
|
53
53
|
|
54
54
|
/* check and forward error code */
|
55
|
+
#ifndef CHECK_F
|
55
56
|
#define CHECK_F(f) { size_t const e = f; if (FSE_isError(e)) return e; }
|
57
|
+
#endif
|
56
58
|
|
57
59
|
|
58
60
|
/* **************************************************************
|
@@ -47,6 +47,79 @@ extern "C" {
|
|
47
47
|
#define MEM_STATIC_ASSERT(c) { enum { MEM_static_assert = 1/(int)(!!(c)) }; }
|
48
48
|
MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
|
49
49
|
|
50
|
+
/* detects whether we are being compiled under msan */
|
51
|
+
#if defined (__has_feature)
|
52
|
+
# if __has_feature(memory_sanitizer)
|
53
|
+
# define MEMORY_SANITIZER 1
|
54
|
+
# endif
|
55
|
+
#endif
|
56
|
+
|
57
|
+
#if defined (MEMORY_SANITIZER)
|
58
|
+
/* Not all platforms that support msan provide sanitizers/msan_interface.h.
|
59
|
+
* We therefore declare the functions we need ourselves, rather than trying to
|
60
|
+
* include the header file... */
|
61
|
+
|
62
|
+
#include <stdint.h> /* intptr_t */
|
63
|
+
|
64
|
+
/* Make memory region fully initialized (without changing its contents). */
|
65
|
+
void __msan_unpoison(const volatile void *a, size_t size);
|
66
|
+
|
67
|
+
/* Make memory region fully uninitialized (without changing its contents).
|
68
|
+
This is a legacy interface that does not update origin information. Use
|
69
|
+
__msan_allocated_memory() instead. */
|
70
|
+
void __msan_poison(const volatile void *a, size_t size);
|
71
|
+
|
72
|
+
/* Returns the offset of the first (at least partially) poisoned byte in the
|
73
|
+
memory range, or -1 if the whole range is good. */
|
74
|
+
intptr_t __msan_test_shadow(const volatile void *x, size_t size);
|
75
|
+
#endif
|
76
|
+
|
77
|
+
/* detects whether we are being compiled under asan */
|
78
|
+
#if defined (__has_feature)
|
79
|
+
# if __has_feature(address_sanitizer)
|
80
|
+
# define ADDRESS_SANITIZER 1
|
81
|
+
# endif
|
82
|
+
#elif defined(__SANITIZE_ADDRESS__)
|
83
|
+
# define ADDRESS_SANITIZER 1
|
84
|
+
#endif
|
85
|
+
|
86
|
+
#if defined (ADDRESS_SANITIZER)
|
87
|
+
/* Not all platforms that support asan provide sanitizers/asan_interface.h.
|
88
|
+
* We therefore declare the functions we need ourselves, rather than trying to
|
89
|
+
* include the header file... */
|
90
|
+
|
91
|
+
/**
|
92
|
+
* Marks a memory region (<c>[addr, addr+size)</c>) as unaddressable.
|
93
|
+
*
|
94
|
+
* This memory must be previously allocated by your program. Instrumented
|
95
|
+
* code is forbidden from accessing addresses in this region until it is
|
96
|
+
* unpoisoned. This function is not guaranteed to poison the entire region -
|
97
|
+
* it could poison only a subregion of <c>[addr, addr+size)</c> due to ASan
|
98
|
+
* alignment restrictions.
|
99
|
+
*
|
100
|
+
* \note This function is not thread-safe because no two threads can poison or
|
101
|
+
* unpoison memory in the same memory region simultaneously.
|
102
|
+
*
|
103
|
+
* \param addr Start of memory region.
|
104
|
+
* \param size Size of memory region. */
|
105
|
+
void __asan_poison_memory_region(void const volatile *addr, size_t size);
|
106
|
+
|
107
|
+
/**
|
108
|
+
* Marks a memory region (<c>[addr, addr+size)</c>) as addressable.
|
109
|
+
*
|
110
|
+
* This memory must be previously allocated by your program. Accessing
|
111
|
+
* addresses in this region is allowed until this region is poisoned again.
|
112
|
+
* This function could unpoison a super-region of <c>[addr, addr+size)</c> due
|
113
|
+
* to ASan alignment restrictions.
|
114
|
+
*
|
115
|
+
* \note This function is not thread-safe because no two threads can
|
116
|
+
* poison or unpoison memory in the same memory region simultaneously.
|
117
|
+
*
|
118
|
+
* \param addr Start of memory region.
|
119
|
+
* \param size Size of memory region. */
|
120
|
+
void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
|
121
|
+
#endif
|
122
|
+
|
50
123
|
|
51
124
|
/*-**************************************************************
|
52
125
|
* Basic Types
|
@@ -102,7 +175,7 @@ MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (size
|
|
102
175
|
#ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
|
103
176
|
# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
|
104
177
|
# define MEM_FORCE_MEMORY_ACCESS 2
|
105
|
-
# elif defined(__INTEL_COMPILER) || defined(__GNUC__)
|
178
|
+
# elif defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__)
|
106
179
|
# define MEM_FORCE_MEMORY_ACCESS 1
|
107
180
|
# endif
|
108
181
|
#endif
|
@@ -127,9 +127,13 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize,
|
|
127
127
|
ctx->queueTail = 0;
|
128
128
|
ctx->numThreadsBusy = 0;
|
129
129
|
ctx->queueEmpty = 1;
|
130
|
-
|
131
|
-
|
132
|
-
|
130
|
+
{
|
131
|
+
int error = 0;
|
132
|
+
error |= ZSTD_pthread_mutex_init(&ctx->queueMutex, NULL);
|
133
|
+
error |= ZSTD_pthread_cond_init(&ctx->queuePushCond, NULL);
|
134
|
+
error |= ZSTD_pthread_cond_init(&ctx->queuePopCond, NULL);
|
135
|
+
if (error) { POOL_free(ctx); return NULL; }
|
136
|
+
}
|
133
137
|
ctx->shutdown = 0;
|
134
138
|
/* Allocate space for the thread handles */
|
135
139
|
ctx->threads = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), customMem);
|
@@ -14,6 +14,8 @@
|
|
14
14
|
* This file will hold wrapper for systems, which do not support pthreads
|
15
15
|
*/
|
16
16
|
|
17
|
+
#include "threading.h"
|
18
|
+
|
17
19
|
/* create fake symbol to avoid empty translation unit warning */
|
18
20
|
int g_ZSTD_threading_useless_symbol;
|
19
21
|
|
@@ -28,7 +30,6 @@ int g_ZSTD_threading_useless_symbol;
|
|
28
30
|
/* === Dependencies === */
|
29
31
|
#include <process.h>
|
30
32
|
#include <errno.h>
|
31
|
-
#include "threading.h"
|
32
33
|
|
33
34
|
|
34
35
|
/* === Implementation === */
|
@@ -73,3 +74,47 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void **value_ptr)
|
|
73
74
|
}
|
74
75
|
|
75
76
|
#endif /* ZSTD_MULTITHREAD */
|
77
|
+
|
78
|
+
#if defined(ZSTD_MULTITHREAD) && DEBUGLEVEL >= 1 && !defined(_WIN32)
|
79
|
+
|
80
|
+
#include <stdlib.h>
|
81
|
+
|
82
|
+
int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr)
|
83
|
+
{
|
84
|
+
*mutex = (pthread_mutex_t*)malloc(sizeof(pthread_mutex_t));
|
85
|
+
if (!*mutex)
|
86
|
+
return 1;
|
87
|
+
return pthread_mutex_init(*mutex, attr);
|
88
|
+
}
|
89
|
+
|
90
|
+
int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex)
|
91
|
+
{
|
92
|
+
if (!*mutex)
|
93
|
+
return 0;
|
94
|
+
{
|
95
|
+
int const ret = pthread_mutex_destroy(*mutex);
|
96
|
+
free(*mutex);
|
97
|
+
return ret;
|
98
|
+
}
|
99
|
+
}
|
100
|
+
|
101
|
+
int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr)
|
102
|
+
{
|
103
|
+
*cond = (pthread_cond_t*)malloc(sizeof(pthread_cond_t));
|
104
|
+
if (!*cond)
|
105
|
+
return 1;
|
106
|
+
return pthread_cond_init(*cond, attr);
|
107
|
+
}
|
108
|
+
|
109
|
+
int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond)
|
110
|
+
{
|
111
|
+
if (!*cond)
|
112
|
+
return 0;
|
113
|
+
{
|
114
|
+
int const ret = pthread_cond_destroy(*cond);
|
115
|
+
free(*cond);
|
116
|
+
return ret;
|
117
|
+
}
|
118
|
+
}
|
119
|
+
|
120
|
+
#endif
|
@@ -13,6 +13,8 @@
|
|
13
13
|
#ifndef THREADING_H_938743
|
14
14
|
#define THREADING_H_938743
|
15
15
|
|
16
|
+
#include "debug.h"
|
17
|
+
|
16
18
|
#if defined (__cplusplus)
|
17
19
|
extern "C" {
|
18
20
|
#endif
|
@@ -75,10 +77,12 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr);
|
|
75
77
|
*/
|
76
78
|
|
77
79
|
|
78
|
-
#elif defined(ZSTD_MULTITHREAD)
|
80
|
+
#elif defined(ZSTD_MULTITHREAD) /* posix assumed ; need a better detection method */
|
79
81
|
/* === POSIX Systems === */
|
80
82
|
# include <pthread.h>
|
81
83
|
|
84
|
+
#if DEBUGLEVEL < 1
|
85
|
+
|
82
86
|
#define ZSTD_pthread_mutex_t pthread_mutex_t
|
83
87
|
#define ZSTD_pthread_mutex_init(a, b) pthread_mutex_init((a), (b))
|
84
88
|
#define ZSTD_pthread_mutex_destroy(a) pthread_mutex_destroy((a))
|
@@ -96,6 +100,33 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr);
|
|
96
100
|
#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d))
|
97
101
|
#define ZSTD_pthread_join(a, b) pthread_join((a),(b))
|
98
102
|
|
103
|
+
#else /* DEBUGLEVEL >= 1 */
|
104
|
+
|
105
|
+
/* Debug implementation of threading.
|
106
|
+
* In this implementation we use pointers for mutexes and condition variables.
|
107
|
+
* This way, if we forget to init/destroy them the program will crash or ASAN
|
108
|
+
* will report leaks.
|
109
|
+
*/
|
110
|
+
|
111
|
+
#define ZSTD_pthread_mutex_t pthread_mutex_t*
|
112
|
+
int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr);
|
113
|
+
int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex);
|
114
|
+
#define ZSTD_pthread_mutex_lock(a) pthread_mutex_lock(*(a))
|
115
|
+
#define ZSTD_pthread_mutex_unlock(a) pthread_mutex_unlock(*(a))
|
116
|
+
|
117
|
+
#define ZSTD_pthread_cond_t pthread_cond_t*
|
118
|
+
int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr);
|
119
|
+
int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond);
|
120
|
+
#define ZSTD_pthread_cond_wait(a, b) pthread_cond_wait(*(a), *(b))
|
121
|
+
#define ZSTD_pthread_cond_signal(a) pthread_cond_signal(*(a))
|
122
|
+
#define ZSTD_pthread_cond_broadcast(a) pthread_cond_broadcast(*(a))
|
123
|
+
|
124
|
+
#define ZSTD_pthread_t pthread_t
|
125
|
+
#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d))
|
126
|
+
#define ZSTD_pthread_join(a, b) pthread_join((a),(b))
|
127
|
+
|
128
|
+
#endif
|
129
|
+
|
99
130
|
#else /* ZSTD_MULTITHREAD not defined */
|
100
131
|
/* No multithreading support */
|
101
132
|
|
@@ -53,7 +53,8 @@
|
|
53
53
|
# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
|
54
54
|
# define XXH_FORCE_MEMORY_ACCESS 2
|
55
55
|
# elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
|
56
|
-
(defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
|
56
|
+
(defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) || \
|
57
|
+
defined(__ICCARM__)
|
57
58
|
# define XXH_FORCE_MEMORY_ACCESS 1
|
58
59
|
# endif
|
59
60
|
#endif
|
@@ -120,7 +121,7 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcp
|
|
120
121
|
# define INLINE_KEYWORD
|
121
122
|
#endif
|
122
123
|
|
123
|
-
#if defined(__GNUC__)
|
124
|
+
#if defined(__GNUC__) || defined(__ICCARM__)
|
124
125
|
# define FORCE_INLINE_ATTR __attribute__((always_inline))
|
125
126
|
#elif defined(_MSC_VER)
|
126
127
|
# define FORCE_INLINE_ATTR __forceinline
|
@@ -206,7 +207,12 @@ static U64 XXH_read64(const void* memPtr)
|
|
206
207
|
# define XXH_rotl32(x,r) _rotl(x,r)
|
207
208
|
# define XXH_rotl64(x,r) _rotl64(x,r)
|
208
209
|
#else
|
210
|
+
#if defined(__ICCARM__)
|
211
|
+
# include <intrinsics.h>
|
212
|
+
# define XXH_rotl32(x,r) __ROR(x,(32 - r))
|
213
|
+
#else
|
209
214
|
# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
|
215
|
+
#endif
|
210
216
|
# define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
|
211
217
|
#endif
|
212
218
|
|
@@ -56,9 +56,9 @@ extern "C" {
|
|
56
56
|
/**
|
57
57
|
* Return the specified error if the condition evaluates to true.
|
58
58
|
*
|
59
|
-
* In debug modes, prints additional information.
|
60
|
-
* (particularly, printing the conditional that failed),
|
61
|
-
* RETURN_ERROR().
|
59
|
+
* In debug modes, prints additional information.
|
60
|
+
* In order to do that (particularly, printing the conditional that failed),
|
61
|
+
* this can't just wrap RETURN_ERROR().
|
62
62
|
*/
|
63
63
|
#define RETURN_ERROR_IF(cond, err, ...) \
|
64
64
|
if (cond) { \
|
@@ -197,79 +197,56 @@ static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
|
|
197
197
|
static void ZSTD_copy16(void* dst, const void* src) { memcpy(dst, src, 16); }
|
198
198
|
#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
|
199
199
|
|
200
|
-
#define WILDCOPY_OVERLENGTH
|
201
|
-
#define
|
200
|
+
#define WILDCOPY_OVERLENGTH 32
|
201
|
+
#define WILDCOPY_VECLEN 16
|
202
202
|
|
203
203
|
typedef enum {
|
204
204
|
ZSTD_no_overlap,
|
205
|
-
ZSTD_overlap_src_before_dst
|
205
|
+
ZSTD_overlap_src_before_dst
|
206
206
|
/* ZSTD_overlap_dst_before_src, */
|
207
207
|
} ZSTD_overlap_e;
|
208
208
|
|
209
209
|
/*! ZSTD_wildcopy() :
|
210
|
-
*
|
210
|
+
* Custom version of memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0)
|
211
|
+
* @param ovtype controls the overlap detection
|
212
|
+
* - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
|
213
|
+
* - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart.
|
214
|
+
* The src buffer must be before the dst buffer.
|
215
|
+
*/
|
211
216
|
MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
|
212
|
-
void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype)
|
217
|
+
void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype)
|
213
218
|
{
|
214
219
|
ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
|
215
220
|
const BYTE* ip = (const BYTE*)src;
|
216
221
|
BYTE* op = (BYTE*)dst;
|
217
222
|
BYTE* const oend = op + length;
|
218
223
|
|
219
|
-
assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
224
|
+
assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN));
|
225
|
+
|
226
|
+
if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) {
|
227
|
+
/* Handle short offset copies. */
|
228
|
+
do {
|
229
|
+
COPY8(op, ip)
|
230
|
+
} while (op < oend);
|
231
|
+
} else {
|
232
|
+
assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN);
|
233
|
+
/* Separate out the first two COPY16() calls because the copy length is
|
234
|
+
* almost certain to be short, so the branches have different
|
235
|
+
* probabilities.
|
236
|
+
* On gcc-9 unrolling once is +1.6%, twice is +2%, thrice is +1.8%.
|
237
|
+
* On clang-8 unrolling once is +1.4%, twice is +3.3%, thrice is +3%.
|
238
|
+
*/
|
229
239
|
COPY16(op, ip);
|
230
|
-
}
|
231
|
-
while (op < oend);
|
232
|
-
}
|
233
|
-
}
|
234
|
-
|
235
|
-
/*! ZSTD_wildcopy_16min() :
|
236
|
-
* same semantics as ZSTD_wilcopy() except guaranteed to be able to copy 16 bytes at the start */
|
237
|
-
MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
|
238
|
-
void ZSTD_wildcopy_16min(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype)
|
239
|
-
{
|
240
|
-
ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
|
241
|
-
const BYTE* ip = (const BYTE*)src;
|
242
|
-
BYTE* op = (BYTE*)dst;
|
243
|
-
BYTE* const oend = op + length;
|
244
|
-
|
245
|
-
assert(length >= 8);
|
246
|
-
assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8));
|
247
|
-
|
248
|
-
if (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN) {
|
249
|
-
do
|
250
|
-
COPY8(op, ip)
|
251
|
-
while (op < oend);
|
252
|
-
}
|
253
|
-
else {
|
254
|
-
if ((length & 8) == 0)
|
255
|
-
COPY8(op, ip);
|
256
|
-
do {
|
257
240
|
COPY16(op, ip);
|
258
|
-
|
259
|
-
|
241
|
+
if (op >= oend) return;
|
242
|
+
do {
|
243
|
+
COPY16(op, ip);
|
244
|
+
COPY16(op, ip);
|
245
|
+
}
|
246
|
+
while (op < oend);
|
260
247
|
}
|
261
248
|
}
|
262
249
|
|
263
|
-
MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd) /* should be faster for decoding, but strangely, not verified on all platform */
|
264
|
-
{
|
265
|
-
const BYTE* ip = (const BYTE*)src;
|
266
|
-
BYTE* op = (BYTE*)dst;
|
267
|
-
BYTE* const oend = (BYTE*)dstEnd;
|
268
|
-
do
|
269
|
-
COPY8(op, ip)
|
270
|
-
while (op < oend);
|
271
|
-
}
|
272
|
-
|
273
250
|
|
274
251
|
/*-*******************************************
|
275
252
|
* Private declarations
|
@@ -323,7 +300,9 @@ MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus
|
|
323
300
|
_BitScanReverse(&r, val);
|
324
301
|
return (unsigned)r;
|
325
302
|
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
|
326
|
-
return
|
303
|
+
return __builtin_clz (val) ^ 31;
|
304
|
+
# elif defined(__ICCARM__) /* IAR Intrinsic */
|
305
|
+
return 31 - __CLZ(val);
|
327
306
|
# else /* Software version */
|
328
307
|
static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
|
329
308
|
U32 v = val;
|