zstd-ruby 1.4.2.0 → 1.4.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/ext/zstdruby/libzstd/Makefile +0 -2
  4. data/ext/zstdruby/libzstd/README.md +13 -2
  5. data/ext/zstdruby/libzstd/common/bitstream.h +7 -2
  6. data/ext/zstdruby/libzstd/common/compiler.h +17 -5
  7. data/ext/zstdruby/libzstd/common/fse.h +1 -1
  8. data/ext/zstdruby/libzstd/common/fse_decompress.c +2 -0
  9. data/ext/zstdruby/libzstd/common/mem.h +74 -1
  10. data/ext/zstdruby/libzstd/common/pool.c +7 -3
  11. data/ext/zstdruby/libzstd/common/threading.c +46 -1
  12. data/ext/zstdruby/libzstd/common/threading.h +32 -1
  13. data/ext/zstdruby/libzstd/common/xxhash.c +8 -2
  14. data/ext/zstdruby/libzstd/common/zstd_internal.h +37 -58
  15. data/ext/zstdruby/libzstd/compress/zstd_compress.c +644 -445
  16. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +98 -26
  17. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +10 -5
  18. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +1 -1
  19. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +3 -3
  20. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +1 -1
  21. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +535 -0
  22. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +11 -12
  23. data/ext/zstdruby/libzstd/compress/zstd_fast.c +38 -45
  24. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +35 -31
  25. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +4 -4
  26. data/ext/zstdruby/libzstd/compress/zstd_opt.c +6 -6
  27. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +32 -26
  28. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +2 -0
  29. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +16 -17
  30. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +149 -148
  31. data/ext/zstdruby/libzstd/deprecated/zbuff.h +6 -5
  32. data/ext/zstdruby/libzstd/dictBuilder/cover.c +7 -8
  33. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +1 -1
  34. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +1 -1
  35. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +2 -1
  36. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +2 -1
  37. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +6 -2
  38. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +1 -1
  39. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +1 -1
  40. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +1 -1
  41. data/ext/zstdruby/libzstd/libzstd.pc.in +3 -2
  42. data/ext/zstdruby/libzstd/zstd.h +170 -66
  43. data/lib/zstd-ruby/version.rb +1 -1
  44. data/zstd-ruby.gemspec +1 -1
  45. metadata +5 -4
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 69e6ba233f94b8ed930f34217befa3268b0bd94fb8116130066e7976c0b31d23
4
- data.tar.gz: 7d01847ddb0a3a5e1996eb4a45bb2c84ad9230f5b9c002dea32d202036be15d6
3
+ metadata.gz: 53be8076094a9ed214cbc86a174eeb3b587dc3d4781f7fbcf8ee280ffd0ca169
4
+ data.tar.gz: ebf9bcf8d062447dab0589c1acfe63f2481f9445c78f51653daf1f60750dfdd5
5
5
  SHA512:
6
- metadata.gz: 7122bbd06a6ff9a52240c8d0b708f083c6567e4b586affd3c48bc17e412cb6beaaf097288627ded338787c4c7a940ec5dab32f704db54c29a7cba7fde848dca8
7
- data.tar.gz: 1839b136a9b3c181cfc3f18cce8f07b96eaa2e537d0753d8f085243ea8f9b91019c2c59174a1a2a5ca2419ec39a23542faf175ba685c70b0543597f461c1d937
6
+ metadata.gz: 7f3ac7650f5ec553aea615d60737bc092bca9d06c998eecc0570b5d542344f4cfc703335154e845c53927878d424ca244fa494f4bddb3e59f01bbd6111de5172
7
+ data.tar.gz: b96f8df37df844f461fab2cd1b60fe9b6a729a1a411f62f1dcef34c9684672ebd7c42963d79abdf5fe1e8d5d443104509492fe9128a7dbf3773e9db180592cbd
data/README.md CHANGED
@@ -10,7 +10,7 @@ See https://github.com/facebook/zstd
10
10
  Fork from https://github.com/jarredholman/ruby-zstd.
11
11
 
12
12
  ## Zstd version
13
- v1.4.2 (https://github.com/facebook/zstd/tree/v1.4.2)
13
+ v1.4.4 (https://github.com/facebook/zstd/tree/v1.4.4)
14
14
 
15
15
  ## Installation
16
16
 
@@ -244,8 +244,6 @@ libzstd.pc:
244
244
  libzstd.pc: libzstd.pc.in
245
245
  @echo creating pkgconfig
246
246
  @sed -e 's|@PREFIX@|$(PREFIX)|' \
247
- -e 's|@LIBDIR@|$(LIBDIR)|' \
248
- -e 's|@INCLUDEDIR@|$(INCLUDEDIR)|' \
249
247
  -e 's|@VERSION@|$(VERSION)|' \
250
248
  $< >$@
251
249
 
@@ -27,10 +27,10 @@ Enabling multithreading requires 2 conditions :
27
27
  Both conditions are automatically applied when invoking `make lib-mt` target.
28
28
 
29
29
  When linking a POSIX program with a multithreaded version of `libzstd`,
30
- note that it's necessary to request the `-pthread` flag during link stage.
30
+ note that it's necessary to invoke the `-pthread` flag during link stage.
31
31
 
32
32
  Multithreading capabilities are exposed
33
- via the [advanced API defined in `lib/zstd.h`](https://github.com/facebook/zstd/blob/v1.3.8/lib/zstd.h#L592).
33
+ via the [advanced API defined in `lib/zstd.h`](https://github.com/facebook/zstd/blob/v1.4.3/lib/zstd.h#L351).
34
34
 
35
35
 
36
36
  #### API
@@ -112,6 +112,17 @@ The file structure is designed to make this selection manually achievable for an
112
112
  will expose the deprecated `ZSTDMT` API exposed by `zstdmt_compress.h` in
113
113
  the shared library, which is now hidden by default.
114
114
 
115
+ - The build macro `DYNAMIC_BMI2` can be set to 1 or 0 in order to generate binaries
116
+ which can detect at runtime the presence of BMI2 instructions, and use them only if present.
117
+ These instructions contribute to better performance, notably on the decoder side.
118
+ By default, this feature is automatically enabled on detecting
119
+ the right instruction set (x64) and compiler (clang or gcc >= 5).
120
+ It's obviously disabled for different cpus,
121
+ or when BMI2 instruction set is _required_ by the compiler command line
122
+ (in this case, only the BMI2 code path is generated).
123
+ Setting this macro will either force to generate the BMI2 dispatcher (1)
124
+ or prevent it (0). It overrides automatic detection.
125
+
115
126
 
116
127
  #### Windows : using MinGW+MSYS to create DLL
117
128
 
@@ -57,6 +57,8 @@ extern "C" {
57
57
  =========================================*/
58
58
  #if defined(__BMI__) && defined(__GNUC__)
59
59
  # include <immintrin.h> /* support for bextr (experimental) */
60
+ #elif defined(__ICCARM__)
61
+ # include <intrinsics.h>
60
62
  #endif
61
63
 
62
64
  #define STREAM_ACCUMULATOR_MIN_32 25
@@ -162,7 +164,9 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val)
162
164
  _BitScanReverse ( &r, val );
163
165
  return (unsigned) r;
164
166
  # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
165
- return 31 - __builtin_clz (val);
167
+ return __builtin_clz (val) ^ 31;
168
+ # elif defined(__ICCARM__) /* IAR Intrinsic */
169
+ return 31 - __CLZ(val);
166
170
  # else /* Software version */
167
171
  static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29,
168
172
  11, 14, 16, 18, 22, 25, 3, 30,
@@ -240,9 +244,9 @@ MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
240
244
  {
241
245
  size_t const nbBytes = bitC->bitPos >> 3;
242
246
  assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
247
+ assert(bitC->ptr <= bitC->endPtr);
243
248
  MEM_writeLEST(bitC->ptr, bitC->bitContainer);
244
249
  bitC->ptr += nbBytes;
245
- assert(bitC->ptr <= bitC->endPtr);
246
250
  bitC->bitPos &= 7;
247
251
  bitC->bitContainer >>= nbBytes*8;
248
252
  }
@@ -256,6 +260,7 @@ MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
256
260
  {
257
261
  size_t const nbBytes = bitC->bitPos >> 3;
258
262
  assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
263
+ assert(bitC->ptr <= bitC->endPtr);
259
264
  MEM_writeLEST(bitC->ptr, bitC->bitContainer);
260
265
  bitC->ptr += nbBytes;
261
266
  if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
@@ -23,7 +23,7 @@
23
23
  # define INLINE_KEYWORD
24
24
  #endif
25
25
 
26
- #if defined(__GNUC__)
26
+ #if defined(__GNUC__) || defined(__ICCARM__)
27
27
  # define FORCE_INLINE_ATTR __attribute__((always_inline))
28
28
  #elif defined(_MSC_VER)
29
29
  # define FORCE_INLINE_ATTR __forceinline
@@ -61,11 +61,18 @@
61
61
  # define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR
62
62
  #endif
63
63
 
64
+ /* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
65
+ #if defined(__GNUC__)
66
+ # define UNUSED_ATTR __attribute__((unused))
67
+ #else
68
+ # define UNUSED_ATTR
69
+ #endif
70
+
64
71
  /* force no inlining */
65
72
  #ifdef _MSC_VER
66
73
  # define FORCE_NOINLINE static __declspec(noinline)
67
74
  #else
68
- # ifdef __GNUC__
75
+ # if defined(__GNUC__) || defined(__ICCARM__)
69
76
  # define FORCE_NOINLINE static __attribute__((__noinline__))
70
77
  # else
71
78
  # define FORCE_NOINLINE static
@@ -76,7 +83,7 @@
76
83
  #ifndef __has_attribute
77
84
  #define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */
78
85
  #endif
79
- #if defined(__GNUC__)
86
+ #if defined(__GNUC__) || defined(__ICCARM__)
80
87
  # define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
81
88
  #else
82
89
  # define TARGET_ATTRIBUTE(target)
@@ -127,9 +134,14 @@
127
134
  } \
128
135
  }
129
136
 
130
- /* vectorization */
137
+ /* vectorization
138
+ * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */
131
139
  #if !defined(__clang__) && defined(__GNUC__)
132
- # define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
140
+ # if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5)
141
+ # define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
142
+ # else
143
+ # define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")")
144
+ # endif
133
145
  #else
134
146
  # define DONT_VECTORIZE
135
147
  #endif
@@ -308,7 +308,7 @@ If there is an error, the function will return an error code, which can be teste
308
308
  *******************************************/
309
309
  /* FSE buffer bounds */
310
310
  #define FSE_NCOUNTBOUND 512
311
- #define FSE_BLOCKBOUND(size) (size + (size>>7))
311
+ #define FSE_BLOCKBOUND(size) (size + (size>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
312
312
  #define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
313
313
 
314
314
  /* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */
@@ -52,7 +52,9 @@
52
52
  #define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */
53
53
 
54
54
  /* check and forward error code */
55
+ #ifndef CHECK_F
55
56
  #define CHECK_F(f) { size_t const e = f; if (FSE_isError(e)) return e; }
57
+ #endif
56
58
 
57
59
 
58
60
  /* **************************************************************
@@ -47,6 +47,79 @@ extern "C" {
47
47
  #define MEM_STATIC_ASSERT(c) { enum { MEM_static_assert = 1/(int)(!!(c)) }; }
48
48
  MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
49
49
 
50
+ /* detects whether we are being compiled under msan */
51
+ #if defined (__has_feature)
52
+ # if __has_feature(memory_sanitizer)
53
+ # define MEMORY_SANITIZER 1
54
+ # endif
55
+ #endif
56
+
57
+ #if defined (MEMORY_SANITIZER)
58
+ /* Not all platforms that support msan provide sanitizers/msan_interface.h.
59
+ * We therefore declare the functions we need ourselves, rather than trying to
60
+ * include the header file... */
61
+
62
+ #include <stdint.h> /* intptr_t */
63
+
64
+ /* Make memory region fully initialized (without changing its contents). */
65
+ void __msan_unpoison(const volatile void *a, size_t size);
66
+
67
+ /* Make memory region fully uninitialized (without changing its contents).
68
+ This is a legacy interface that does not update origin information. Use
69
+ __msan_allocated_memory() instead. */
70
+ void __msan_poison(const volatile void *a, size_t size);
71
+
72
+ /* Returns the offset of the first (at least partially) poisoned byte in the
73
+ memory range, or -1 if the whole range is good. */
74
+ intptr_t __msan_test_shadow(const volatile void *x, size_t size);
75
+ #endif
76
+
77
+ /* detects whether we are being compiled under asan */
78
+ #if defined (__has_feature)
79
+ # if __has_feature(address_sanitizer)
80
+ # define ADDRESS_SANITIZER 1
81
+ # endif
82
+ #elif defined(__SANITIZE_ADDRESS__)
83
+ # define ADDRESS_SANITIZER 1
84
+ #endif
85
+
86
+ #if defined (ADDRESS_SANITIZER)
87
+ /* Not all platforms that support asan provide sanitizers/asan_interface.h.
88
+ * We therefore declare the functions we need ourselves, rather than trying to
89
+ * include the header file... */
90
+
91
+ /**
92
+ * Marks a memory region (<c>[addr, addr+size)</c>) as unaddressable.
93
+ *
94
+ * This memory must be previously allocated by your program. Instrumented
95
+ * code is forbidden from accessing addresses in this region until it is
96
+ * unpoisoned. This function is not guaranteed to poison the entire region -
97
+ * it could poison only a subregion of <c>[addr, addr+size)</c> due to ASan
98
+ * alignment restrictions.
99
+ *
100
+ * \note This function is not thread-safe because no two threads can poison or
101
+ * unpoison memory in the same memory region simultaneously.
102
+ *
103
+ * \param addr Start of memory region.
104
+ * \param size Size of memory region. */
105
+ void __asan_poison_memory_region(void const volatile *addr, size_t size);
106
+
107
+ /**
108
+ * Marks a memory region (<c>[addr, addr+size)</c>) as addressable.
109
+ *
110
+ * This memory must be previously allocated by your program. Accessing
111
+ * addresses in this region is allowed until this region is poisoned again.
112
+ * This function could unpoison a super-region of <c>[addr, addr+size)</c> due
113
+ * to ASan alignment restrictions.
114
+ *
115
+ * \note This function is not thread-safe because no two threads can
116
+ * poison or unpoison memory in the same memory region simultaneously.
117
+ *
118
+ * \param addr Start of memory region.
119
+ * \param size Size of memory region. */
120
+ void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
121
+ #endif
122
+
50
123
 
51
124
  /*-**************************************************************
52
125
  * Basic Types
@@ -102,7 +175,7 @@ MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (size
102
175
  #ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
103
176
  # if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
104
177
  # define MEM_FORCE_MEMORY_ACCESS 2
105
- # elif defined(__INTEL_COMPILER) || defined(__GNUC__)
178
+ # elif defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__)
106
179
  # define MEM_FORCE_MEMORY_ACCESS 1
107
180
  # endif
108
181
  #endif
@@ -127,9 +127,13 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize,
127
127
  ctx->queueTail = 0;
128
128
  ctx->numThreadsBusy = 0;
129
129
  ctx->queueEmpty = 1;
130
- (void)ZSTD_pthread_mutex_init(&ctx->queueMutex, NULL);
131
- (void)ZSTD_pthread_cond_init(&ctx->queuePushCond, NULL);
132
- (void)ZSTD_pthread_cond_init(&ctx->queuePopCond, NULL);
130
+ {
131
+ int error = 0;
132
+ error |= ZSTD_pthread_mutex_init(&ctx->queueMutex, NULL);
133
+ error |= ZSTD_pthread_cond_init(&ctx->queuePushCond, NULL);
134
+ error |= ZSTD_pthread_cond_init(&ctx->queuePopCond, NULL);
135
+ if (error) { POOL_free(ctx); return NULL; }
136
+ }
133
137
  ctx->shutdown = 0;
134
138
  /* Allocate space for the thread handles */
135
139
  ctx->threads = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), customMem);
@@ -14,6 +14,8 @@
14
14
  * This file will hold wrapper for systems, which do not support pthreads
15
15
  */
16
16
 
17
+ #include "threading.h"
18
+
17
19
  /* create fake symbol to avoid empty translation unit warning */
18
20
  int g_ZSTD_threading_useless_symbol;
19
21
 
@@ -28,7 +30,6 @@ int g_ZSTD_threading_useless_symbol;
28
30
  /* === Dependencies === */
29
31
  #include <process.h>
30
32
  #include <errno.h>
31
- #include "threading.h"
32
33
 
33
34
 
34
35
  /* === Implementation === */
@@ -73,3 +74,47 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void **value_ptr)
73
74
  }
74
75
 
75
76
  #endif /* ZSTD_MULTITHREAD */
77
+
78
+ #if defined(ZSTD_MULTITHREAD) && DEBUGLEVEL >= 1 && !defined(_WIN32)
79
+
80
+ #include <stdlib.h>
81
+
82
+ int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr)
83
+ {
84
+ *mutex = (pthread_mutex_t*)malloc(sizeof(pthread_mutex_t));
85
+ if (!*mutex)
86
+ return 1;
87
+ return pthread_mutex_init(*mutex, attr);
88
+ }
89
+
90
+ int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex)
91
+ {
92
+ if (!*mutex)
93
+ return 0;
94
+ {
95
+ int const ret = pthread_mutex_destroy(*mutex);
96
+ free(*mutex);
97
+ return ret;
98
+ }
99
+ }
100
+
101
+ int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr)
102
+ {
103
+ *cond = (pthread_cond_t*)malloc(sizeof(pthread_cond_t));
104
+ if (!*cond)
105
+ return 1;
106
+ return pthread_cond_init(*cond, attr);
107
+ }
108
+
109
+ int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond)
110
+ {
111
+ if (!*cond)
112
+ return 0;
113
+ {
114
+ int const ret = pthread_cond_destroy(*cond);
115
+ free(*cond);
116
+ return ret;
117
+ }
118
+ }
119
+
120
+ #endif
@@ -13,6 +13,8 @@
13
13
  #ifndef THREADING_H_938743
14
14
  #define THREADING_H_938743
15
15
 
16
+ #include "debug.h"
17
+
16
18
  #if defined (__cplusplus)
17
19
  extern "C" {
18
20
  #endif
@@ -75,10 +77,12 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr);
75
77
  */
76
78
 
77
79
 
78
- #elif defined(ZSTD_MULTITHREAD) /* posix assumed ; need a better detection method */
80
+ #elif defined(ZSTD_MULTITHREAD) /* posix assumed ; need a better detection method */
79
81
  /* === POSIX Systems === */
80
82
  # include <pthread.h>
81
83
 
84
+ #if DEBUGLEVEL < 1
85
+
82
86
  #define ZSTD_pthread_mutex_t pthread_mutex_t
83
87
  #define ZSTD_pthread_mutex_init(a, b) pthread_mutex_init((a), (b))
84
88
  #define ZSTD_pthread_mutex_destroy(a) pthread_mutex_destroy((a))
@@ -96,6 +100,33 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr);
96
100
  #define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d))
97
101
  #define ZSTD_pthread_join(a, b) pthread_join((a),(b))
98
102
 
103
+ #else /* DEBUGLEVEL >= 1 */
104
+
105
+ /* Debug implementation of threading.
106
+ * In this implementation we use pointers for mutexes and condition variables.
107
+ * This way, if we forget to init/destroy them the program will crash or ASAN
108
+ * will report leaks.
109
+ */
110
+
111
+ #define ZSTD_pthread_mutex_t pthread_mutex_t*
112
+ int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr);
113
+ int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex);
114
+ #define ZSTD_pthread_mutex_lock(a) pthread_mutex_lock(*(a))
115
+ #define ZSTD_pthread_mutex_unlock(a) pthread_mutex_unlock(*(a))
116
+
117
+ #define ZSTD_pthread_cond_t pthread_cond_t*
118
+ int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr);
119
+ int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond);
120
+ #define ZSTD_pthread_cond_wait(a, b) pthread_cond_wait(*(a), *(b))
121
+ #define ZSTD_pthread_cond_signal(a) pthread_cond_signal(*(a))
122
+ #define ZSTD_pthread_cond_broadcast(a) pthread_cond_broadcast(*(a))
123
+
124
+ #define ZSTD_pthread_t pthread_t
125
+ #define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d))
126
+ #define ZSTD_pthread_join(a, b) pthread_join((a),(b))
127
+
128
+ #endif
129
+
99
130
  #else /* ZSTD_MULTITHREAD not defined */
100
131
  /* No multithreading support */
101
132
 
@@ -53,7 +53,8 @@
53
53
  # if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
54
54
  # define XXH_FORCE_MEMORY_ACCESS 2
55
55
  # elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
56
- (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
56
+ (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) || \
57
+ defined(__ICCARM__)
57
58
  # define XXH_FORCE_MEMORY_ACCESS 1
58
59
  # endif
59
60
  #endif
@@ -120,7 +121,7 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcp
120
121
  # define INLINE_KEYWORD
121
122
  #endif
122
123
 
123
- #if defined(__GNUC__)
124
+ #if defined(__GNUC__) || defined(__ICCARM__)
124
125
  # define FORCE_INLINE_ATTR __attribute__((always_inline))
125
126
  #elif defined(_MSC_VER)
126
127
  # define FORCE_INLINE_ATTR __forceinline
@@ -206,7 +207,12 @@ static U64 XXH_read64(const void* memPtr)
206
207
  # define XXH_rotl32(x,r) _rotl(x,r)
207
208
  # define XXH_rotl64(x,r) _rotl64(x,r)
208
209
  #else
210
+ #if defined(__ICCARM__)
211
+ # include <intrinsics.h>
212
+ # define XXH_rotl32(x,r) __ROR(x,(32 - r))
213
+ #else
209
214
  # define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
215
+ #endif
210
216
  # define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
211
217
  #endif
212
218
 
@@ -56,9 +56,9 @@ extern "C" {
56
56
  /**
57
57
  * Return the specified error if the condition evaluates to true.
58
58
  *
59
- * In debug modes, prints additional information. In order to do that
60
- * (particularly, printing the conditional that failed), this can't just wrap
61
- * RETURN_ERROR().
59
+ * In debug modes, prints additional information.
60
+ * In order to do that (particularly, printing the conditional that failed),
61
+ * this can't just wrap RETURN_ERROR().
62
62
  */
63
63
  #define RETURN_ERROR_IF(cond, err, ...) \
64
64
  if (cond) { \
@@ -197,79 +197,56 @@ static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
197
197
  static void ZSTD_copy16(void* dst, const void* src) { memcpy(dst, src, 16); }
198
198
  #define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
199
199
 
200
- #define WILDCOPY_OVERLENGTH 8
201
- #define VECLEN 16
200
+ #define WILDCOPY_OVERLENGTH 32
201
+ #define WILDCOPY_VECLEN 16
202
202
 
203
203
  typedef enum {
204
204
  ZSTD_no_overlap,
205
- ZSTD_overlap_src_before_dst,
205
+ ZSTD_overlap_src_before_dst
206
206
  /* ZSTD_overlap_dst_before_src, */
207
207
  } ZSTD_overlap_e;
208
208
 
209
209
  /*! ZSTD_wildcopy() :
210
- * custom version of memcpy(), can overwrite up to WILDCOPY_OVERLENGTH bytes (if length==0) */
210
+ * Custom version of memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0)
211
+ * @param ovtype controls the overlap detection
212
+ * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
213
+ * - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart.
214
+ * The src buffer must be before the dst buffer.
215
+ */
211
216
  MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
212
- void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype)
217
+ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype)
213
218
  {
214
219
  ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
215
220
  const BYTE* ip = (const BYTE*)src;
216
221
  BYTE* op = (BYTE*)dst;
217
222
  BYTE* const oend = op + length;
218
223
 
219
- assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8));
220
- if (length < VECLEN || (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN)) {
221
- do
222
- COPY8(op, ip)
223
- while (op < oend);
224
- }
225
- else {
226
- if ((length & 8) == 0)
227
- COPY8(op, ip);
228
- do {
224
+ assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN));
225
+
226
+ if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) {
227
+ /* Handle short offset copies. */
228
+ do {
229
+ COPY8(op, ip)
230
+ } while (op < oend);
231
+ } else {
232
+ assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN);
233
+ /* Separate out the first two COPY16() calls because the copy length is
234
+ * almost certain to be short, so the branches have different
235
+ * probabilities.
236
+ * On gcc-9 unrolling once is +1.6%, twice is +2%, thrice is +1.8%.
237
+ * On clang-8 unrolling once is +1.4%, twice is +3.3%, thrice is +3%.
238
+ */
229
239
  COPY16(op, ip);
230
- }
231
- while (op < oend);
232
- }
233
- }
234
-
235
- /*! ZSTD_wildcopy_16min() :
236
- * same semantics as ZSTD_wilcopy() except guaranteed to be able to copy 16 bytes at the start */
237
- MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
238
- void ZSTD_wildcopy_16min(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype)
239
- {
240
- ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
241
- const BYTE* ip = (const BYTE*)src;
242
- BYTE* op = (BYTE*)dst;
243
- BYTE* const oend = op + length;
244
-
245
- assert(length >= 8);
246
- assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8));
247
-
248
- if (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN) {
249
- do
250
- COPY8(op, ip)
251
- while (op < oend);
252
- }
253
- else {
254
- if ((length & 8) == 0)
255
- COPY8(op, ip);
256
- do {
257
240
  COPY16(op, ip);
258
- }
259
- while (op < oend);
241
+ if (op >= oend) return;
242
+ do {
243
+ COPY16(op, ip);
244
+ COPY16(op, ip);
245
+ }
246
+ while (op < oend);
260
247
  }
261
248
  }
262
249
 
263
- MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd) /* should be faster for decoding, but strangely, not verified on all platform */
264
- {
265
- const BYTE* ip = (const BYTE*)src;
266
- BYTE* op = (BYTE*)dst;
267
- BYTE* const oend = (BYTE*)dstEnd;
268
- do
269
- COPY8(op, ip)
270
- while (op < oend);
271
- }
272
-
273
250
 
274
251
  /*-*******************************************
275
252
  * Private declarations
@@ -323,7 +300,9 @@ MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus
323
300
  _BitScanReverse(&r, val);
324
301
  return (unsigned)r;
325
302
  # elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
326
- return 31 - __builtin_clz(val);
303
+ return __builtin_clz (val) ^ 31;
304
+ # elif defined(__ICCARM__) /* IAR Intrinsic */
305
+ return 31 - __CLZ(val);
327
306
  # else /* Software version */
328
307
  static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
329
308
  U32 v = val;