digest-blake3 1.4.1.0 → 1.5.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/ext/digest/blake3/blake3.c +11 -8
- data/ext/digest/blake3/blake3.h +1 -1
- data/ext/digest/blake3/blake3_dispatch.c +34 -5
- data/ext/digest/blake3/blake3_impl.h +5 -1
- data/ext/digest/blake3/blake3_neon.c +2 -4
- data/lib/digest/blake3/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2bf10e44aaa74a31f9a334b67ecfadfeda7f31a4d3055bd48c5f1a8609e53661
|
4
|
+
data.tar.gz: 53072abb4749ecdfd6748360fc33d39789078413078858953bffcd1ae1cfcdaf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b93a9bdf8b7f2fa4986090e466dfe6c0661d6e9e2de7864cb83846f314a07f198ce16bb6289edb50cf1a7333463136049d61c5ee6fa3db9c8c4855e229fd93a7
|
7
|
+
data.tar.gz: df226266cb38882b121c401d074ad84559265751c897fccda004a8f3d230c2e67215ee6086cfac13fc7043316955511179de4c024dd9d4b5f9048a948ea52080
|
data/Gemfile.lock
CHANGED
data/ext/digest/blake3/blake3.c
CHANGED
@@ -341,21 +341,24 @@ INLINE void compress_subtree_to_parent_node(
|
|
341
341
|
size_t num_cvs = blake3_compress_subtree_wide(input, input_len, key,
|
342
342
|
chunk_counter, flags, cv_array);
|
343
343
|
assert(num_cvs <= MAX_SIMD_DEGREE_OR_2);
|
344
|
-
|
345
|
-
//
|
344
|
+
// The following loop never executes when MAX_SIMD_DEGREE_OR_2 is 2, because
|
345
|
+
// as we just asserted, num_cvs will always be <=2 in that case. But GCC
|
346
|
+
// (particularly GCC 8.5) can't tell that it never executes, and if NDEBUG is
|
347
|
+
// set then it emits incorrect warnings here. We tried a few different
|
348
|
+
// hacks to silence these, but in the end our hacks just produced different
|
349
|
+
// warnings (see https://github.com/BLAKE3-team/BLAKE3/pull/380). Out of
|
350
|
+
// desperation, we ifdef out this entire loop when we know it's not needed.
|
351
|
+
#if MAX_SIMD_DEGREE_OR_2 > 2
|
352
|
+
// If MAX_SIMD_DEGREE_OR_2 is greater than 2 and there's enough input,
|
346
353
|
// compress_subtree_wide() returns more than 2 chaining values. Condense
|
347
354
|
// them into 2 by forming parent nodes repeatedly.
|
348
355
|
uint8_t out_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN / 2];
|
349
|
-
|
350
|
-
// asserted it above. But GCC can't tell that it's always true, and if NDEBUG
|
351
|
-
// is set on platforms where MAX_SIMD_DEGREE_OR_2 == 2, GCC emits spurious
|
352
|
-
// warnings here. GCC 8.5 is particularly sensitive, so if you're changing
|
353
|
-
// this code, test it against that version.
|
354
|
-
while (num_cvs > 2 && num_cvs <= MAX_SIMD_DEGREE_OR_2) {
|
356
|
+
while (num_cvs > 2) {
|
355
357
|
num_cvs =
|
356
358
|
compress_parents_parallel(cv_array, num_cvs, key, flags, out_array);
|
357
359
|
memcpy(cv_array, out_array, num_cvs * BLAKE3_OUT_LEN);
|
358
360
|
}
|
361
|
+
#endif
|
359
362
|
memcpy(out, cv_array, 2 * BLAKE3_OUT_LEN);
|
360
363
|
}
|
361
364
|
|
data/ext/digest/blake3/blake3.h
CHANGED
@@ -6,6 +6,7 @@
|
|
6
6
|
|
7
7
|
#if defined(IS_X86)
|
8
8
|
#if defined(_MSC_VER)
|
9
|
+
#include <Windows.h>
|
9
10
|
#include <intrin.h>
|
10
11
|
#elif defined(__GNUC__)
|
11
12
|
#include <immintrin.h>
|
@@ -14,6 +15,32 @@
|
|
14
15
|
#endif
|
15
16
|
#endif
|
16
17
|
|
18
|
+
#if !defined(BLAKE3_ATOMICS)
|
19
|
+
#if defined(__has_include)
|
20
|
+
#if __has_include(<stdatomic.h>) && !defined(_MSC_VER)
|
21
|
+
#define BLAKE3_ATOMICS 1
|
22
|
+
#else
|
23
|
+
#define BLAKE3_ATOMICS 0
|
24
|
+
#endif /* __has_include(<stdatomic.h>) && !defined(_MSC_VER) */
|
25
|
+
#else
|
26
|
+
#define BLAKE3_ATOMICS 0
|
27
|
+
#endif /* defined(__has_include) */
|
28
|
+
#endif /* BLAKE3_ATOMICS */
|
29
|
+
|
30
|
+
#if BLAKE3_ATOMICS
|
31
|
+
#define ATOMIC_INT _Atomic int
|
32
|
+
#define ATOMIC_LOAD(x) x
|
33
|
+
#define ATOMIC_STORE(x, y) x = y
|
34
|
+
#elif defined(_MSC_VER)
|
35
|
+
#define ATOMIC_INT LONG
|
36
|
+
#define ATOMIC_LOAD(x) InterlockedOr(&x, 0)
|
37
|
+
#define ATOMIC_STORE(x, y) InterlockedExchange(&x, y)
|
38
|
+
#else
|
39
|
+
#define ATOMIC_INT int
|
40
|
+
#define ATOMIC_LOAD(x) x
|
41
|
+
#define ATOMIC_STORE(x, y) x = y
|
42
|
+
#endif
|
43
|
+
|
17
44
|
#define MAYBE_UNUSED(x) (void)((x))
|
18
45
|
|
19
46
|
#if defined(IS_X86)
|
@@ -76,7 +103,7 @@ enum cpu_feature {
|
|
76
103
|
#if !defined(BLAKE3_TESTING)
|
77
104
|
static /* Allow the variable to be controlled manually for testing */
|
78
105
|
#endif
|
79
|
-
|
106
|
+
ATOMIC_INT g_cpu_features = UNDEFINED;
|
80
107
|
|
81
108
|
#if !defined(BLAKE3_TESTING)
|
82
109
|
static
|
@@ -84,14 +111,16 @@ static
|
|
84
111
|
enum cpu_feature
|
85
112
|
get_cpu_features(void) {
|
86
113
|
|
87
|
-
|
88
|
-
|
114
|
+
/* If TSAN detects a data race here, try compiling with -DBLAKE3_ATOMICS=1 */
|
115
|
+
enum cpu_feature features = ATOMIC_LOAD(g_cpu_features);
|
116
|
+
if (features != UNDEFINED) {
|
117
|
+
return features;
|
89
118
|
} else {
|
90
119
|
#if defined(IS_X86)
|
91
120
|
uint32_t regs[4] = {0};
|
92
121
|
uint32_t *eax = ®s[0], *ebx = ®s[1], *ecx = ®s[2], *edx = ®s[3];
|
93
122
|
(void)edx;
|
94
|
-
|
123
|
+
features = 0;
|
95
124
|
cpuid(regs, 0);
|
96
125
|
const int max_id = *eax;
|
97
126
|
cpuid(regs, 1);
|
@@ -124,7 +153,7 @@ static
|
|
124
153
|
}
|
125
154
|
}
|
126
155
|
}
|
127
|
-
g_cpu_features
|
156
|
+
ATOMIC_STORE(g_cpu_features, features);
|
128
157
|
return features;
|
129
158
|
#else
|
130
159
|
/* How to detect NEON? */
|
@@ -51,7 +51,11 @@ enum blake3_flags {
|
|
51
51
|
#if !defined(BLAKE3_USE_NEON)
|
52
52
|
// If BLAKE3_USE_NEON not manually set, autodetect based on AArch64ness
|
53
53
|
#if defined(IS_AARCH64)
|
54
|
-
#
|
54
|
+
#if defined(__ARM_BIG_ENDIAN)
|
55
|
+
#define BLAKE3_USE_NEON 0
|
56
|
+
#else
|
57
|
+
#define BLAKE3_USE_NEON 1
|
58
|
+
#endif
|
55
59
|
#else
|
56
60
|
#define BLAKE3_USE_NEON 0
|
57
61
|
#endif
|
@@ -10,14 +10,12 @@
|
|
10
10
|
|
11
11
|
INLINE uint32x4_t loadu_128(const uint8_t src[16]) {
|
12
12
|
// vld1q_u32 has alignment requirements. Don't use it.
|
13
|
-
|
14
|
-
memcpy(&x, src, 16);
|
15
|
-
return x;
|
13
|
+
return vreinterpretq_u32_u8(vld1q_u8(src));
|
16
14
|
}
|
17
15
|
|
18
16
|
INLINE void storeu_128(uint32x4_t src, uint8_t dest[16]) {
|
19
17
|
// vst1q_u32 has alignment requirements. Don't use it.
|
20
|
-
|
18
|
+
vst1q_u8(dest, vreinterpretq_u8_u32(src));
|
21
19
|
}
|
22
20
|
|
23
21
|
INLINE uint32x4_t add_128(uint32x4_t a, uint32x4_t b) {
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: digest-blake3
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.5.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Will Bryant
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-07-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|