digest-blake3 1.4.1.0 → 1.5.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/ext/digest/blake3/blake3.c +11 -8
- data/ext/digest/blake3/blake3.h +1 -1
- data/ext/digest/blake3/blake3_dispatch.c +34 -5
- data/ext/digest/blake3/blake3_impl.h +5 -1
- data/ext/digest/blake3/blake3_neon.c +2 -4
- data/lib/digest/blake3/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2bf10e44aaa74a31f9a334b67ecfadfeda7f31a4d3055bd48c5f1a8609e53661
|
4
|
+
data.tar.gz: 53072abb4749ecdfd6748360fc33d39789078413078858953bffcd1ae1cfcdaf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b93a9bdf8b7f2fa4986090e466dfe6c0661d6e9e2de7864cb83846f314a07f198ce16bb6289edb50cf1a7333463136049d61c5ee6fa3db9c8c4855e229fd93a7
|
7
|
+
data.tar.gz: df226266cb38882b121c401d074ad84559265751c897fccda004a8f3d230c2e67215ee6086cfac13fc7043316955511179de4c024dd9d4b5f9048a948ea52080
|
data/Gemfile.lock
CHANGED
data/ext/digest/blake3/blake3.c
CHANGED
@@ -341,21 +341,24 @@ INLINE void compress_subtree_to_parent_node(
|
|
341
341
|
size_t num_cvs = blake3_compress_subtree_wide(input, input_len, key,
|
342
342
|
chunk_counter, flags, cv_array);
|
343
343
|
assert(num_cvs <= MAX_SIMD_DEGREE_OR_2);
|
344
|
-
|
345
|
-
//
|
344
|
+
// The following loop never executes when MAX_SIMD_DEGREE_OR_2 is 2, because
|
345
|
+
// as we just asserted, num_cvs will always be <=2 in that case. But GCC
|
346
|
+
// (particularly GCC 8.5) can't tell that it never executes, and if NDEBUG is
|
347
|
+
// set then it emits incorrect warnings here. We tried a few different
|
348
|
+
// hacks to silence these, but in the end our hacks just produced different
|
349
|
+
// warnings (see https://github.com/BLAKE3-team/BLAKE3/pull/380). Out of
|
350
|
+
// desperation, we ifdef out this entire loop when we know it's not needed.
|
351
|
+
#if MAX_SIMD_DEGREE_OR_2 > 2
|
352
|
+
// If MAX_SIMD_DEGREE_OR_2 is greater than 2 and there's enough input,
|
346
353
|
// compress_subtree_wide() returns more than 2 chaining values. Condense
|
347
354
|
// them into 2 by forming parent nodes repeatedly.
|
348
355
|
uint8_t out_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN / 2];
|
349
|
-
|
350
|
-
// asserted it above. But GCC can't tell that it's always true, and if NDEBUG
|
351
|
-
// is set on platforms where MAX_SIMD_DEGREE_OR_2 == 2, GCC emits spurious
|
352
|
-
// warnings here. GCC 8.5 is particularly sensitive, so if you're changing
|
353
|
-
// this code, test it against that version.
|
354
|
-
while (num_cvs > 2 && num_cvs <= MAX_SIMD_DEGREE_OR_2) {
|
356
|
+
while (num_cvs > 2) {
|
355
357
|
num_cvs =
|
356
358
|
compress_parents_parallel(cv_array, num_cvs, key, flags, out_array);
|
357
359
|
memcpy(cv_array, out_array, num_cvs * BLAKE3_OUT_LEN);
|
358
360
|
}
|
361
|
+
#endif
|
359
362
|
memcpy(out, cv_array, 2 * BLAKE3_OUT_LEN);
|
360
363
|
}
|
361
364
|
|
data/ext/digest/blake3/blake3.h
CHANGED
@@ -6,6 +6,7 @@
|
|
6
6
|
|
7
7
|
#if defined(IS_X86)
|
8
8
|
#if defined(_MSC_VER)
|
9
|
+
#include <Windows.h>
|
9
10
|
#include <intrin.h>
|
10
11
|
#elif defined(__GNUC__)
|
11
12
|
#include <immintrin.h>
|
@@ -14,6 +15,32 @@
|
|
14
15
|
#endif
|
15
16
|
#endif
|
16
17
|
|
18
|
+
#if !defined(BLAKE3_ATOMICS)
|
19
|
+
#if defined(__has_include)
|
20
|
+
#if __has_include(<stdatomic.h>) && !defined(_MSC_VER)
|
21
|
+
#define BLAKE3_ATOMICS 1
|
22
|
+
#else
|
23
|
+
#define BLAKE3_ATOMICS 0
|
24
|
+
#endif /* __has_include(<stdatomic.h>) && !defined(_MSC_VER) */
|
25
|
+
#else
|
26
|
+
#define BLAKE3_ATOMICS 0
|
27
|
+
#endif /* defined(__has_include) */
|
28
|
+
#endif /* BLAKE3_ATOMICS */
|
29
|
+
|
30
|
+
#if BLAKE3_ATOMICS
|
31
|
+
#define ATOMIC_INT _Atomic int
|
32
|
+
#define ATOMIC_LOAD(x) x
|
33
|
+
#define ATOMIC_STORE(x, y) x = y
|
34
|
+
#elif defined(_MSC_VER)
|
35
|
+
#define ATOMIC_INT LONG
|
36
|
+
#define ATOMIC_LOAD(x) InterlockedOr(&x, 0)
|
37
|
+
#define ATOMIC_STORE(x, y) InterlockedExchange(&x, y)
|
38
|
+
#else
|
39
|
+
#define ATOMIC_INT int
|
40
|
+
#define ATOMIC_LOAD(x) x
|
41
|
+
#define ATOMIC_STORE(x, y) x = y
|
42
|
+
#endif
|
43
|
+
|
17
44
|
#define MAYBE_UNUSED(x) (void)((x))
|
18
45
|
|
19
46
|
#if defined(IS_X86)
|
@@ -76,7 +103,7 @@ enum cpu_feature {
|
|
76
103
|
#if !defined(BLAKE3_TESTING)
|
77
104
|
static /* Allow the variable to be controlled manually for testing */
|
78
105
|
#endif
|
79
|
-
|
106
|
+
ATOMIC_INT g_cpu_features = UNDEFINED;
|
80
107
|
|
81
108
|
#if !defined(BLAKE3_TESTING)
|
82
109
|
static
|
@@ -84,14 +111,16 @@ static
|
|
84
111
|
enum cpu_feature
|
85
112
|
get_cpu_features(void) {
|
86
113
|
|
87
|
-
|
88
|
-
|
114
|
+
/* If TSAN detects a data race here, try compiling with -DBLAKE3_ATOMICS=1 */
|
115
|
+
enum cpu_feature features = ATOMIC_LOAD(g_cpu_features);
|
116
|
+
if (features != UNDEFINED) {
|
117
|
+
return features;
|
89
118
|
} else {
|
90
119
|
#if defined(IS_X86)
|
91
120
|
uint32_t regs[4] = {0};
|
92
121
|
uint32_t *eax = ®s[0], *ebx = ®s[1], *ecx = ®s[2], *edx = ®s[3];
|
93
122
|
(void)edx;
|
94
|
-
|
123
|
+
features = 0;
|
95
124
|
cpuid(regs, 0);
|
96
125
|
const int max_id = *eax;
|
97
126
|
cpuid(regs, 1);
|
@@ -124,7 +153,7 @@ static
|
|
124
153
|
}
|
125
154
|
}
|
126
155
|
}
|
127
|
-
g_cpu_features
|
156
|
+
ATOMIC_STORE(g_cpu_features, features);
|
128
157
|
return features;
|
129
158
|
#else
|
130
159
|
/* How to detect NEON? */
|
@@ -51,7 +51,11 @@ enum blake3_flags {
|
|
51
51
|
#if !defined(BLAKE3_USE_NEON)
|
52
52
|
// If BLAKE3_USE_NEON not manually set, autodetect based on AArch64ness
|
53
53
|
#if defined(IS_AARCH64)
|
54
|
-
#
|
54
|
+
#if defined(__ARM_BIG_ENDIAN)
|
55
|
+
#define BLAKE3_USE_NEON 0
|
56
|
+
#else
|
57
|
+
#define BLAKE3_USE_NEON 1
|
58
|
+
#endif
|
55
59
|
#else
|
56
60
|
#define BLAKE3_USE_NEON 0
|
57
61
|
#endif
|
@@ -10,14 +10,12 @@
|
|
10
10
|
|
11
11
|
INLINE uint32x4_t loadu_128(const uint8_t src[16]) {
|
12
12
|
// vld1q_u32 has alignment requirements. Don't use it.
|
13
|
-
|
14
|
-
memcpy(&x, src, 16);
|
15
|
-
return x;
|
13
|
+
return vreinterpretq_u32_u8(vld1q_u8(src));
|
16
14
|
}
|
17
15
|
|
18
16
|
INLINE void storeu_128(uint32x4_t src, uint8_t dest[16]) {
|
19
17
|
// vst1q_u32 has alignment requirements. Don't use it.
|
20
|
-
|
18
|
+
vst1q_u8(dest, vreinterpretq_u8_u32(src));
|
21
19
|
}
|
22
20
|
|
23
21
|
INLINE uint32x4_t add_128(uint32x4_t a, uint32x4_t b) {
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: digest-blake3
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.5.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Will Bryant
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-07-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|