digest-blake3 0.37.0.1 → 1.2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/ext/digest/blake3/blake3.c +9 -1
- data/ext/digest/blake3/blake3.h +3 -2
- data/ext/digest/blake3/blake3_dispatch.c +8 -2
- data/ext/digest/blake3/blake3_impl.h +15 -2
- data/ext/digest/blake3/blake3_neon.c +6 -1
- data/ext/digest/blake3/blake3_sse2_x86-64_unix.S +2 -2
- data/ext/digest/blake3/blake3_sse2_x86-64_windows_gnu.S +8 -8
- data/ext/digest/blake3/blake3_sse2_x86-64_windows_msvc.asm +10 -10
- data/ext/digest/blake3/blake3_sse41_x86-64_windows_msvc.asm +4 -4
- data/lib/digest/blake3/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ec7a77d6875b688e1cb1fbe8470cbf67278f9fe3f2f8e516bafe7abc0bf54bc4
|
4
|
+
data.tar.gz: 74e13b2480eccd5c2fe3fa913a0962217c1f07c95b5db80b8303086488ee5d9f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: de0fb7b5ccce755c313da8e547a430950d181170c64561746890ce8855ce5e09d3232b16316f36d22320ae5d23cf7904e8221a26358e96d9566ba247ef613214
|
7
|
+
data.tar.gz: 33e15e9469128ba227dbe6b57d9c44fe55078b9031975bf9db783a469c93342c7ccbf38b763ddfed7f09c941a42a6df89302cfda0e38b0ad4967a12acac4b18a
|
data/Gemfile.lock
CHANGED
data/ext/digest/blake3/blake3.c
CHANGED
@@ -5,6 +5,8 @@
|
|
5
5
|
#include "blake3.h"
|
6
6
|
#include "blake3_impl.h"
|
7
7
|
|
8
|
+
const char *blake3_version(void) { return BLAKE3_VERSION_STRING; }
|
9
|
+
|
8
10
|
INLINE void chunk_state_init(blake3_chunk_state *self, const uint32_t key[8],
|
9
11
|
uint8_t flags) {
|
10
12
|
memcpy(self->cv, key, BLAKE3_KEY_LEN);
|
@@ -338,12 +340,18 @@ INLINE void compress_subtree_to_parent_node(
|
|
338
340
|
uint8_t cv_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN];
|
339
341
|
size_t num_cvs = blake3_compress_subtree_wide(input, input_len, key,
|
340
342
|
chunk_counter, flags, cv_array);
|
343
|
+
assert(num_cvs <= MAX_SIMD_DEGREE_OR_2);
|
341
344
|
|
342
345
|
// If MAX_SIMD_DEGREE is greater than 2 and there's enough input,
|
343
346
|
// compress_subtree_wide() returns more than 2 chaining values. Condense
|
344
347
|
// them into 2 by forming parent nodes repeatedly.
|
345
348
|
uint8_t out_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN / 2];
|
346
|
-
|
349
|
+
// The second half of this loop condition is always true, and we just
|
350
|
+
// asserted it above. But GCC can't tell that it's always true, and if NDEBUG
|
351
|
+
// is set on platforms where MAX_SIMD_DEGREE_OR_2 == 2, GCC emits spurious
|
352
|
+
// warnings here. GCC 8.5 is particularly sensitive, so if you're changing
|
353
|
+
// this code, test it against that version.
|
354
|
+
while (num_cvs > 2 && num_cvs <= MAX_SIMD_DEGREE_OR_2) {
|
347
355
|
num_cvs =
|
348
356
|
compress_parents_parallel(cv_array, num_cvs, key, flags, out_array);
|
349
357
|
memcpy(cv_array, out_array, num_cvs * BLAKE3_OUT_LEN);
|
data/ext/digest/blake3/blake3.h
CHANGED
@@ -8,12 +8,12 @@
|
|
8
8
|
extern "C" {
|
9
9
|
#endif
|
10
10
|
|
11
|
+
#define BLAKE3_VERSION_STRING "1.2.0"
|
11
12
|
#define BLAKE3_KEY_LEN 32
|
12
13
|
#define BLAKE3_OUT_LEN 32
|
13
14
|
#define BLAKE3_BLOCK_LEN 64
|
14
15
|
#define BLAKE3_CHUNK_LEN 1024
|
15
16
|
#define BLAKE3_MAX_DEPTH 54
|
16
|
-
#define BLAKE3_MAX_SIMD_DEGREE 16
|
17
17
|
|
18
18
|
// This struct is a private implementation detail. It has to be here because
|
19
19
|
// it's part of blake3_hasher below.
|
@@ -38,11 +38,12 @@ typedef struct {
|
|
38
38
|
uint8_t cv_stack[(BLAKE3_MAX_DEPTH + 1) * BLAKE3_OUT_LEN];
|
39
39
|
} blake3_hasher;
|
40
40
|
|
41
|
+
const char *blake3_version(void);
|
41
42
|
void blake3_hasher_init(blake3_hasher *self);
|
42
43
|
void blake3_hasher_init_keyed(blake3_hasher *self,
|
43
44
|
const uint8_t key[BLAKE3_KEY_LEN]);
|
44
45
|
void blake3_hasher_init_derive_key(blake3_hasher *self, const char *context);
|
45
|
-
void blake3_hasher_init_derive_key_raw(blake3_hasher *self, const void *context,
|
46
|
+
void blake3_hasher_init_derive_key_raw(blake3_hasher *self, const void *context,
|
46
47
|
size_t context_len);
|
47
48
|
void blake3_hasher_update(blake3_hasher *self, const void *input,
|
48
49
|
size_t input_len);
|
@@ -14,6 +14,8 @@
|
|
14
14
|
#endif
|
15
15
|
#endif
|
16
16
|
|
17
|
+
#define MAYBE_UNUSED(x) (void)((x))
|
18
|
+
|
17
19
|
#if defined(IS_X86)
|
18
20
|
static uint64_t xgetbv() {
|
19
21
|
#if defined(_MSC_VER)
|
@@ -137,6 +139,7 @@ void blake3_compress_in_place(uint32_t cv[8],
|
|
137
139
|
uint8_t flags) {
|
138
140
|
#if defined(IS_X86)
|
139
141
|
const enum cpu_feature features = get_cpu_features();
|
142
|
+
MAYBE_UNUSED(features);
|
140
143
|
#if !defined(BLAKE3_NO_AVX512)
|
141
144
|
if (features & AVX512VL) {
|
142
145
|
blake3_compress_in_place_avx512(cv, block, block_len, counter, flags);
|
@@ -165,6 +168,7 @@ void blake3_compress_xof(const uint32_t cv[8],
|
|
165
168
|
uint8_t out[64]) {
|
166
169
|
#if defined(IS_X86)
|
167
170
|
const enum cpu_feature features = get_cpu_features();
|
171
|
+
MAYBE_UNUSED(features);
|
168
172
|
#if !defined(BLAKE3_NO_AVX512)
|
169
173
|
if (features & AVX512VL) {
|
170
174
|
blake3_compress_xof_avx512(cv, block, block_len, counter, flags, out);
|
@@ -193,6 +197,7 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
|
|
193
197
|
uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
|
194
198
|
#if defined(IS_X86)
|
195
199
|
const enum cpu_feature features = get_cpu_features();
|
200
|
+
MAYBE_UNUSED(features);
|
196
201
|
#if !defined(BLAKE3_NO_AVX512)
|
197
202
|
if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) {
|
198
203
|
blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter,
|
@@ -227,7 +232,7 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
|
|
227
232
|
#endif
|
228
233
|
#endif
|
229
234
|
|
230
|
-
#if
|
235
|
+
#if BLAKE3_USE_NEON == 1
|
231
236
|
blake3_hash_many_neon(inputs, num_inputs, blocks, key, counter,
|
232
237
|
increment_counter, flags, flags_start, flags_end, out);
|
233
238
|
return;
|
@@ -242,6 +247,7 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
|
|
242
247
|
size_t blake3_simd_degree(void) {
|
243
248
|
#if defined(IS_X86)
|
244
249
|
const enum cpu_feature features = get_cpu_features();
|
250
|
+
MAYBE_UNUSED(features);
|
245
251
|
#if !defined(BLAKE3_NO_AVX512)
|
246
252
|
if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) {
|
247
253
|
return 16;
|
@@ -263,7 +269,7 @@ size_t blake3_simd_degree(void) {
|
|
263
269
|
}
|
264
270
|
#endif
|
265
271
|
#endif
|
266
|
-
#if
|
272
|
+
#if BLAKE3_USE_NEON == 1
|
267
273
|
return 4;
|
268
274
|
#endif
|
269
275
|
return 1;
|
@@ -38,6 +38,10 @@ enum blake3_flags {
|
|
38
38
|
#define IS_X86_32
|
39
39
|
#endif
|
40
40
|
|
41
|
+
#if defined(__aarch64__) || defined(_M_ARM64)
|
42
|
+
#define IS_AARCH64
|
43
|
+
#endif
|
44
|
+
|
41
45
|
#if defined(IS_X86)
|
42
46
|
#if defined(_MSC_VER)
|
43
47
|
#include <intrin.h>
|
@@ -45,9 +49,18 @@ enum blake3_flags {
|
|
45
49
|
#include <immintrin.h>
|
46
50
|
#endif
|
47
51
|
|
52
|
+
#if !defined(BLAKE3_USE_NEON)
|
53
|
+
// If BLAKE3_USE_NEON not manually set, autodetect based on AArch64ness
|
54
|
+
#if defined(IS_AARCH64)
|
55
|
+
#define BLAKE3_USE_NEON 1
|
56
|
+
#else
|
57
|
+
#define BLAKE3_USE_NEON 0
|
58
|
+
#endif
|
59
|
+
#endif
|
60
|
+
|
48
61
|
#if defined(IS_X86)
|
49
62
|
#define MAX_SIMD_DEGREE 16
|
50
|
-
#elif
|
63
|
+
#elif BLAKE3_USE_NEON == 1
|
51
64
|
#define MAX_SIMD_DEGREE 4
|
52
65
|
#else
|
53
66
|
#define MAX_SIMD_DEGREE 1
|
@@ -257,7 +270,7 @@ void blake3_hash_many_avx512(const uint8_t *const *inputs, size_t num_inputs,
|
|
257
270
|
#endif
|
258
271
|
#endif
|
259
272
|
|
260
|
-
#if
|
273
|
+
#if BLAKE3_USE_NEON == 1
|
261
274
|
void blake3_hash_many_neon(const uint8_t *const *inputs, size_t num_inputs,
|
262
275
|
size_t blocks, const uint32_t key[8],
|
263
276
|
uint64_t counter, bool increment_counter,
|
@@ -2,7 +2,12 @@
|
|
2
2
|
|
3
3
|
#include <arm_neon.h>
|
4
4
|
|
5
|
-
|
5
|
+
#ifdef __ARM_BIG_ENDIAN
|
6
|
+
#error "This implementation only supports little-endian ARM."
|
7
|
+
// It might be that all we need for big-endian support here is to get the loads
|
8
|
+
// and stores right, but step zero would be finding a way to test it in CI.
|
9
|
+
#endif
|
10
|
+
|
6
11
|
INLINE uint32x4_t loadu_128(const uint8_t src[16]) {
|
7
12
|
// vld1q_u32 has alignment requirements. Don't use it.
|
8
13
|
uint32x4_t x;
|
@@ -1704,7 +1704,7 @@ blake3_hash_many_sse2:
|
|
1704
1704
|
pshufd xmm15, xmm11, 0x93
|
1705
1705
|
shl rax, 0x20
|
1706
1706
|
or rax, 0x40
|
1707
|
-
|
1707
|
+
movq xmm3, rax
|
1708
1708
|
movdqa xmmword ptr [rsp+0x20], xmm3
|
1709
1709
|
movaps xmm3, xmmword ptr [rsp]
|
1710
1710
|
movaps xmm11, xmmword ptr [rsp+0x10]
|
@@ -1917,7 +1917,7 @@ blake3_hash_many_sse2:
|
|
1917
1917
|
movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
|
1918
1918
|
shl rax, 32
|
1919
1919
|
or rax, 64
|
1920
|
-
|
1920
|
+
movq xmm12, rax
|
1921
1921
|
movdqa xmm3, xmm13
|
1922
1922
|
punpcklqdq xmm3, xmm12
|
1923
1923
|
movups xmm4, xmmword ptr [r8+rdx-0x40]
|
@@ -1715,7 +1715,7 @@ blake3_hash_many_sse2:
|
|
1715
1715
|
pshufd xmm15, xmm11, 0x93
|
1716
1716
|
shl rax, 0x20
|
1717
1717
|
or rax, 0x40
|
1718
|
-
|
1718
|
+
movq xmm3, rax
|
1719
1719
|
movdqa xmmword ptr [rsp+0x20], xmm3
|
1720
1720
|
movaps xmm3, xmmword ptr [rsp]
|
1721
1721
|
movaps xmm11, xmmword ptr [rsp+0x10]
|
@@ -1928,7 +1928,7 @@ blake3_hash_many_sse2:
|
|
1928
1928
|
movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
|
1929
1929
|
shl rax, 32
|
1930
1930
|
or rax, 64
|
1931
|
-
|
1931
|
+
movq xmm12, rax
|
1932
1932
|
movdqa xmm3, xmm13
|
1933
1933
|
punpcklqdq xmm3, xmm12
|
1934
1934
|
movups xmm4, xmmword ptr [r8+rdx-0x40]
|
@@ -2137,10 +2137,10 @@ _blake3_compress_in_place_sse2:
|
|
2137
2137
|
por xmm9, xmm8
|
2138
2138
|
movdqa xmm8, xmm7
|
2139
2139
|
punpcklqdq xmm8, xmm5
|
2140
|
-
movdqa
|
2140
|
+
movdqa xmm14, xmm6
|
2141
2141
|
pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip]
|
2142
|
-
pand
|
2143
|
-
por xmm8,
|
2142
|
+
pand xmm14, xmmword ptr [PBLENDW_0xC0_MASK+rip]
|
2143
|
+
por xmm8, xmm14
|
2144
2144
|
pshufd xmm8, xmm8, 0x78
|
2145
2145
|
punpckhdq xmm5, xmm7
|
2146
2146
|
punpckldq xmm6, xmm5
|
@@ -2268,10 +2268,10 @@ blake3_compress_xof_sse2:
|
|
2268
2268
|
por xmm9, xmm8
|
2269
2269
|
movdqa xmm8, xmm7
|
2270
2270
|
punpcklqdq xmm8, xmm5
|
2271
|
-
movdqa
|
2271
|
+
movdqa xmm14, xmm6
|
2272
2272
|
pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip]
|
2273
|
-
pand
|
2274
|
-
por xmm8,
|
2273
|
+
pand xmm14, xmmword ptr [PBLENDW_0xC0_MASK+rip]
|
2274
|
+
por xmm8, xmm14
|
2275
2275
|
pshufd xmm8, xmm8, 0x78
|
2276
2276
|
punpckhdq xmm5, xmm7
|
2277
2277
|
punpckldq xmm6, xmm5
|
@@ -2054,8 +2054,8 @@ _blake3_compress_in_place_sse2 PROC
|
|
2054
2054
|
movzx r8d, r8b
|
2055
2055
|
shl rax, 32
|
2056
2056
|
add r8, rax
|
2057
|
-
|
2058
|
-
|
2057
|
+
movd xmm3, r9
|
2058
|
+
movd xmm4, r8
|
2059
2059
|
punpcklqdq xmm3, xmm4
|
2060
2060
|
movups xmm4, xmmword ptr [rdx]
|
2061
2061
|
movups xmm5, xmmword ptr [rdx+10H]
|
@@ -2139,10 +2139,10 @@ _blake3_compress_in_place_sse2 PROC
|
|
2139
2139
|
por xmm9, xmm8
|
2140
2140
|
movdqa xmm8, xmm7
|
2141
2141
|
punpcklqdq xmm8, xmm5
|
2142
|
-
movdqa
|
2142
|
+
movdqa xmm14, xmm6
|
2143
2143
|
pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK]
|
2144
|
-
pand
|
2145
|
-
por xmm8,
|
2144
|
+
pand xmm14, xmmword ptr [PBLENDW_0xC0_MASK]
|
2145
|
+
por xmm8, xmm14
|
2146
2146
|
pshufd xmm8, xmm8, 78H
|
2147
2147
|
punpckhdq xmm5, xmm7
|
2148
2148
|
punpckldq xmm6, xmm5
|
@@ -2186,8 +2186,8 @@ _blake3_compress_xof_sse2 PROC
|
|
2186
2186
|
mov r10, qword ptr [rsp+0A8H]
|
2187
2187
|
shl rax, 32
|
2188
2188
|
add r8, rax
|
2189
|
-
|
2190
|
-
|
2189
|
+
movd xmm3, r9
|
2190
|
+
movd xmm4, r8
|
2191
2191
|
punpcklqdq xmm3, xmm4
|
2192
2192
|
movups xmm4, xmmword ptr [rdx]
|
2193
2193
|
movups xmm5, xmmword ptr [rdx+10H]
|
@@ -2271,10 +2271,10 @@ _blake3_compress_xof_sse2 PROC
|
|
2271
2271
|
por xmm9, xmm8
|
2272
2272
|
movdqa xmm8, xmm7
|
2273
2273
|
punpcklqdq xmm8, xmm5
|
2274
|
-
movdqa
|
2274
|
+
movdqa xmm14, xmm6
|
2275
2275
|
pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK]
|
2276
|
-
pand
|
2277
|
-
por xmm8,
|
2276
|
+
pand xmm14, xmmword ptr [PBLENDW_0xC0_MASK]
|
2277
|
+
por xmm8, xmm14
|
2278
2278
|
pshufd xmm8, xmm8, 78H
|
2279
2279
|
punpckhdq xmm5, xmm7
|
2280
2280
|
punpckldq xmm6, xmm5
|
@@ -1817,8 +1817,8 @@ _blake3_compress_in_place_sse41 PROC
|
|
1817
1817
|
movzx r8d, r8b
|
1818
1818
|
shl rax, 32
|
1819
1819
|
add r8, rax
|
1820
|
-
|
1821
|
-
|
1820
|
+
movd xmm3, r9
|
1821
|
+
movd xmm4, r8
|
1822
1822
|
punpcklqdq xmm3, xmm4
|
1823
1823
|
movups xmm4, xmmword ptr [rdx]
|
1824
1824
|
movups xmm5, xmmword ptr [rdx+10H]
|
@@ -1938,8 +1938,8 @@ _blake3_compress_xof_sse41 PROC
|
|
1938
1938
|
mov r10, qword ptr [rsp+0A8H]
|
1939
1939
|
shl rax, 32
|
1940
1940
|
add r8, rax
|
1941
|
-
|
1942
|
-
|
1941
|
+
movd xmm3, r9
|
1942
|
+
movd xmm4, r8
|
1943
1943
|
punpcklqdq xmm3, xmm4
|
1944
1944
|
movups xmm4, xmmword ptr [rdx]
|
1945
1945
|
movups xmm5, xmmword ptr [rdx+10H]
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: digest-blake3
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.2.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Will Bryant
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-01-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|