digest-blake3 0.37.0.1 → 1.2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/ext/digest/blake3/blake3.c +9 -1
- data/ext/digest/blake3/blake3.h +3 -2
- data/ext/digest/blake3/blake3_dispatch.c +8 -2
- data/ext/digest/blake3/blake3_impl.h +15 -2
- data/ext/digest/blake3/blake3_neon.c +6 -1
- data/ext/digest/blake3/blake3_sse2_x86-64_unix.S +2 -2
- data/ext/digest/blake3/blake3_sse2_x86-64_windows_gnu.S +8 -8
- data/ext/digest/blake3/blake3_sse2_x86-64_windows_msvc.asm +10 -10
- data/ext/digest/blake3/blake3_sse41_x86-64_windows_msvc.asm +4 -4
- data/lib/digest/blake3/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ec7a77d6875b688e1cb1fbe8470cbf67278f9fe3f2f8e516bafe7abc0bf54bc4
|
4
|
+
data.tar.gz: 74e13b2480eccd5c2fe3fa913a0962217c1f07c95b5db80b8303086488ee5d9f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: de0fb7b5ccce755c313da8e547a430950d181170c64561746890ce8855ce5e09d3232b16316f36d22320ae5d23cf7904e8221a26358e96d9566ba247ef613214
|
7
|
+
data.tar.gz: 33e15e9469128ba227dbe6b57d9c44fe55078b9031975bf9db783a469c93342c7ccbf38b763ddfed7f09c941a42a6df89302cfda0e38b0ad4967a12acac4b18a
|
data/Gemfile.lock
CHANGED
data/ext/digest/blake3/blake3.c
CHANGED
@@ -5,6 +5,8 @@
|
|
5
5
|
#include "blake3.h"
|
6
6
|
#include "blake3_impl.h"
|
7
7
|
|
8
|
+
const char *blake3_version(void) { return BLAKE3_VERSION_STRING; }
|
9
|
+
|
8
10
|
INLINE void chunk_state_init(blake3_chunk_state *self, const uint32_t key[8],
|
9
11
|
uint8_t flags) {
|
10
12
|
memcpy(self->cv, key, BLAKE3_KEY_LEN);
|
@@ -338,12 +340,18 @@ INLINE void compress_subtree_to_parent_node(
|
|
338
340
|
uint8_t cv_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN];
|
339
341
|
size_t num_cvs = blake3_compress_subtree_wide(input, input_len, key,
|
340
342
|
chunk_counter, flags, cv_array);
|
343
|
+
assert(num_cvs <= MAX_SIMD_DEGREE_OR_2);
|
341
344
|
|
342
345
|
// If MAX_SIMD_DEGREE is greater than 2 and there's enough input,
|
343
346
|
// compress_subtree_wide() returns more than 2 chaining values. Condense
|
344
347
|
// them into 2 by forming parent nodes repeatedly.
|
345
348
|
uint8_t out_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN / 2];
|
346
|
-
|
349
|
+
// The second half of this loop condition is always true, and we just
|
350
|
+
// asserted it above. But GCC can't tell that it's always true, and if NDEBUG
|
351
|
+
// is set on platforms where MAX_SIMD_DEGREE_OR_2 == 2, GCC emits spurious
|
352
|
+
// warnings here. GCC 8.5 is particularly sensitive, so if you're changing
|
353
|
+
// this code, test it against that version.
|
354
|
+
while (num_cvs > 2 && num_cvs <= MAX_SIMD_DEGREE_OR_2) {
|
347
355
|
num_cvs =
|
348
356
|
compress_parents_parallel(cv_array, num_cvs, key, flags, out_array);
|
349
357
|
memcpy(cv_array, out_array, num_cvs * BLAKE3_OUT_LEN);
|
data/ext/digest/blake3/blake3.h
CHANGED
@@ -8,12 +8,12 @@
|
|
8
8
|
extern "C" {
|
9
9
|
#endif
|
10
10
|
|
11
|
+
#define BLAKE3_VERSION_STRING "1.2.0"
|
11
12
|
#define BLAKE3_KEY_LEN 32
|
12
13
|
#define BLAKE3_OUT_LEN 32
|
13
14
|
#define BLAKE3_BLOCK_LEN 64
|
14
15
|
#define BLAKE3_CHUNK_LEN 1024
|
15
16
|
#define BLAKE3_MAX_DEPTH 54
|
16
|
-
#define BLAKE3_MAX_SIMD_DEGREE 16
|
17
17
|
|
18
18
|
// This struct is a private implementation detail. It has to be here because
|
19
19
|
// it's part of blake3_hasher below.
|
@@ -38,11 +38,12 @@ typedef struct {
|
|
38
38
|
uint8_t cv_stack[(BLAKE3_MAX_DEPTH + 1) * BLAKE3_OUT_LEN];
|
39
39
|
} blake3_hasher;
|
40
40
|
|
41
|
+
const char *blake3_version(void);
|
41
42
|
void blake3_hasher_init(blake3_hasher *self);
|
42
43
|
void blake3_hasher_init_keyed(blake3_hasher *self,
|
43
44
|
const uint8_t key[BLAKE3_KEY_LEN]);
|
44
45
|
void blake3_hasher_init_derive_key(blake3_hasher *self, const char *context);
|
45
|
-
void blake3_hasher_init_derive_key_raw(blake3_hasher *self, const void *context,
|
46
|
+
void blake3_hasher_init_derive_key_raw(blake3_hasher *self, const void *context,
|
46
47
|
size_t context_len);
|
47
48
|
void blake3_hasher_update(blake3_hasher *self, const void *input,
|
48
49
|
size_t input_len);
|
@@ -14,6 +14,8 @@
|
|
14
14
|
#endif
|
15
15
|
#endif
|
16
16
|
|
17
|
+
#define MAYBE_UNUSED(x) (void)((x))
|
18
|
+
|
17
19
|
#if defined(IS_X86)
|
18
20
|
static uint64_t xgetbv() {
|
19
21
|
#if defined(_MSC_VER)
|
@@ -137,6 +139,7 @@ void blake3_compress_in_place(uint32_t cv[8],
|
|
137
139
|
uint8_t flags) {
|
138
140
|
#if defined(IS_X86)
|
139
141
|
const enum cpu_feature features = get_cpu_features();
|
142
|
+
MAYBE_UNUSED(features);
|
140
143
|
#if !defined(BLAKE3_NO_AVX512)
|
141
144
|
if (features & AVX512VL) {
|
142
145
|
blake3_compress_in_place_avx512(cv, block, block_len, counter, flags);
|
@@ -165,6 +168,7 @@ void blake3_compress_xof(const uint32_t cv[8],
|
|
165
168
|
uint8_t out[64]) {
|
166
169
|
#if defined(IS_X86)
|
167
170
|
const enum cpu_feature features = get_cpu_features();
|
171
|
+
MAYBE_UNUSED(features);
|
168
172
|
#if !defined(BLAKE3_NO_AVX512)
|
169
173
|
if (features & AVX512VL) {
|
170
174
|
blake3_compress_xof_avx512(cv, block, block_len, counter, flags, out);
|
@@ -193,6 +197,7 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
|
|
193
197
|
uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
|
194
198
|
#if defined(IS_X86)
|
195
199
|
const enum cpu_feature features = get_cpu_features();
|
200
|
+
MAYBE_UNUSED(features);
|
196
201
|
#if !defined(BLAKE3_NO_AVX512)
|
197
202
|
if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) {
|
198
203
|
blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter,
|
@@ -227,7 +232,7 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
|
|
227
232
|
#endif
|
228
233
|
#endif
|
229
234
|
|
230
|
-
#if
|
235
|
+
#if BLAKE3_USE_NEON == 1
|
231
236
|
blake3_hash_many_neon(inputs, num_inputs, blocks, key, counter,
|
232
237
|
increment_counter, flags, flags_start, flags_end, out);
|
233
238
|
return;
|
@@ -242,6 +247,7 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
|
|
242
247
|
size_t blake3_simd_degree(void) {
|
243
248
|
#if defined(IS_X86)
|
244
249
|
const enum cpu_feature features = get_cpu_features();
|
250
|
+
MAYBE_UNUSED(features);
|
245
251
|
#if !defined(BLAKE3_NO_AVX512)
|
246
252
|
if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) {
|
247
253
|
return 16;
|
@@ -263,7 +269,7 @@ size_t blake3_simd_degree(void) {
|
|
263
269
|
}
|
264
270
|
#endif
|
265
271
|
#endif
|
266
|
-
#if
|
272
|
+
#if BLAKE3_USE_NEON == 1
|
267
273
|
return 4;
|
268
274
|
#endif
|
269
275
|
return 1;
|
@@ -38,6 +38,10 @@ enum blake3_flags {
|
|
38
38
|
#define IS_X86_32
|
39
39
|
#endif
|
40
40
|
|
41
|
+
#if defined(__aarch64__) || defined(_M_ARM64)
|
42
|
+
#define IS_AARCH64
|
43
|
+
#endif
|
44
|
+
|
41
45
|
#if defined(IS_X86)
|
42
46
|
#if defined(_MSC_VER)
|
43
47
|
#include <intrin.h>
|
@@ -45,9 +49,18 @@ enum blake3_flags {
|
|
45
49
|
#include <immintrin.h>
|
46
50
|
#endif
|
47
51
|
|
52
|
+
#if !defined(BLAKE3_USE_NEON)
|
53
|
+
// If BLAKE3_USE_NEON not manually set, autodetect based on AArch64ness
|
54
|
+
#if defined(IS_AARCH64)
|
55
|
+
#define BLAKE3_USE_NEON 1
|
56
|
+
#else
|
57
|
+
#define BLAKE3_USE_NEON 0
|
58
|
+
#endif
|
59
|
+
#endif
|
60
|
+
|
48
61
|
#if defined(IS_X86)
|
49
62
|
#define MAX_SIMD_DEGREE 16
|
50
|
-
#elif
|
63
|
+
#elif BLAKE3_USE_NEON == 1
|
51
64
|
#define MAX_SIMD_DEGREE 4
|
52
65
|
#else
|
53
66
|
#define MAX_SIMD_DEGREE 1
|
@@ -257,7 +270,7 @@ void blake3_hash_many_avx512(const uint8_t *const *inputs, size_t num_inputs,
|
|
257
270
|
#endif
|
258
271
|
#endif
|
259
272
|
|
260
|
-
#if
|
273
|
+
#if BLAKE3_USE_NEON == 1
|
261
274
|
void blake3_hash_many_neon(const uint8_t *const *inputs, size_t num_inputs,
|
262
275
|
size_t blocks, const uint32_t key[8],
|
263
276
|
uint64_t counter, bool increment_counter,
|
@@ -2,7 +2,12 @@
|
|
2
2
|
|
3
3
|
#include <arm_neon.h>
|
4
4
|
|
5
|
-
|
5
|
+
#ifdef __ARM_BIG_ENDIAN
|
6
|
+
#error "This implementation only supports little-endian ARM."
|
7
|
+
// It might be that all we need for big-endian support here is to get the loads
|
8
|
+
// and stores right, but step zero would be finding a way to test it in CI.
|
9
|
+
#endif
|
10
|
+
|
6
11
|
INLINE uint32x4_t loadu_128(const uint8_t src[16]) {
|
7
12
|
// vld1q_u32 has alignment requirements. Don't use it.
|
8
13
|
uint32x4_t x;
|
@@ -1704,7 +1704,7 @@ blake3_hash_many_sse2:
|
|
1704
1704
|
pshufd xmm15, xmm11, 0x93
|
1705
1705
|
shl rax, 0x20
|
1706
1706
|
or rax, 0x40
|
1707
|
-
|
1707
|
+
movq xmm3, rax
|
1708
1708
|
movdqa xmmword ptr [rsp+0x20], xmm3
|
1709
1709
|
movaps xmm3, xmmword ptr [rsp]
|
1710
1710
|
movaps xmm11, xmmword ptr [rsp+0x10]
|
@@ -1917,7 +1917,7 @@ blake3_hash_many_sse2:
|
|
1917
1917
|
movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
|
1918
1918
|
shl rax, 32
|
1919
1919
|
or rax, 64
|
1920
|
-
|
1920
|
+
movq xmm12, rax
|
1921
1921
|
movdqa xmm3, xmm13
|
1922
1922
|
punpcklqdq xmm3, xmm12
|
1923
1923
|
movups xmm4, xmmword ptr [r8+rdx-0x40]
|
@@ -1715,7 +1715,7 @@ blake3_hash_many_sse2:
|
|
1715
1715
|
pshufd xmm15, xmm11, 0x93
|
1716
1716
|
shl rax, 0x20
|
1717
1717
|
or rax, 0x40
|
1718
|
-
|
1718
|
+
movq xmm3, rax
|
1719
1719
|
movdqa xmmword ptr [rsp+0x20], xmm3
|
1720
1720
|
movaps xmm3, xmmword ptr [rsp]
|
1721
1721
|
movaps xmm11, xmmword ptr [rsp+0x10]
|
@@ -1928,7 +1928,7 @@ blake3_hash_many_sse2:
|
|
1928
1928
|
movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
|
1929
1929
|
shl rax, 32
|
1930
1930
|
or rax, 64
|
1931
|
-
|
1931
|
+
movq xmm12, rax
|
1932
1932
|
movdqa xmm3, xmm13
|
1933
1933
|
punpcklqdq xmm3, xmm12
|
1934
1934
|
movups xmm4, xmmword ptr [r8+rdx-0x40]
|
@@ -2137,10 +2137,10 @@ _blake3_compress_in_place_sse2:
|
|
2137
2137
|
por xmm9, xmm8
|
2138
2138
|
movdqa xmm8, xmm7
|
2139
2139
|
punpcklqdq xmm8, xmm5
|
2140
|
-
movdqa
|
2140
|
+
movdqa xmm14, xmm6
|
2141
2141
|
pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip]
|
2142
|
-
pand
|
2143
|
-
por xmm8,
|
2142
|
+
pand xmm14, xmmword ptr [PBLENDW_0xC0_MASK+rip]
|
2143
|
+
por xmm8, xmm14
|
2144
2144
|
pshufd xmm8, xmm8, 0x78
|
2145
2145
|
punpckhdq xmm5, xmm7
|
2146
2146
|
punpckldq xmm6, xmm5
|
@@ -2268,10 +2268,10 @@ blake3_compress_xof_sse2:
|
|
2268
2268
|
por xmm9, xmm8
|
2269
2269
|
movdqa xmm8, xmm7
|
2270
2270
|
punpcklqdq xmm8, xmm5
|
2271
|
-
movdqa
|
2271
|
+
movdqa xmm14, xmm6
|
2272
2272
|
pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip]
|
2273
|
-
pand
|
2274
|
-
por xmm8,
|
2273
|
+
pand xmm14, xmmword ptr [PBLENDW_0xC0_MASK+rip]
|
2274
|
+
por xmm8, xmm14
|
2275
2275
|
pshufd xmm8, xmm8, 0x78
|
2276
2276
|
punpckhdq xmm5, xmm7
|
2277
2277
|
punpckldq xmm6, xmm5
|
@@ -2054,8 +2054,8 @@ _blake3_compress_in_place_sse2 PROC
|
|
2054
2054
|
movzx r8d, r8b
|
2055
2055
|
shl rax, 32
|
2056
2056
|
add r8, rax
|
2057
|
-
|
2058
|
-
|
2057
|
+
movd xmm3, r9
|
2058
|
+
movd xmm4, r8
|
2059
2059
|
punpcklqdq xmm3, xmm4
|
2060
2060
|
movups xmm4, xmmword ptr [rdx]
|
2061
2061
|
movups xmm5, xmmword ptr [rdx+10H]
|
@@ -2139,10 +2139,10 @@ _blake3_compress_in_place_sse2 PROC
|
|
2139
2139
|
por xmm9, xmm8
|
2140
2140
|
movdqa xmm8, xmm7
|
2141
2141
|
punpcklqdq xmm8, xmm5
|
2142
|
-
movdqa
|
2142
|
+
movdqa xmm14, xmm6
|
2143
2143
|
pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK]
|
2144
|
-
pand
|
2145
|
-
por xmm8,
|
2144
|
+
pand xmm14, xmmword ptr [PBLENDW_0xC0_MASK]
|
2145
|
+
por xmm8, xmm14
|
2146
2146
|
pshufd xmm8, xmm8, 78H
|
2147
2147
|
punpckhdq xmm5, xmm7
|
2148
2148
|
punpckldq xmm6, xmm5
|
@@ -2186,8 +2186,8 @@ _blake3_compress_xof_sse2 PROC
|
|
2186
2186
|
mov r10, qword ptr [rsp+0A8H]
|
2187
2187
|
shl rax, 32
|
2188
2188
|
add r8, rax
|
2189
|
-
|
2190
|
-
|
2189
|
+
movd xmm3, r9
|
2190
|
+
movd xmm4, r8
|
2191
2191
|
punpcklqdq xmm3, xmm4
|
2192
2192
|
movups xmm4, xmmword ptr [rdx]
|
2193
2193
|
movups xmm5, xmmword ptr [rdx+10H]
|
@@ -2271,10 +2271,10 @@ _blake3_compress_xof_sse2 PROC
|
|
2271
2271
|
por xmm9, xmm8
|
2272
2272
|
movdqa xmm8, xmm7
|
2273
2273
|
punpcklqdq xmm8, xmm5
|
2274
|
-
movdqa
|
2274
|
+
movdqa xmm14, xmm6
|
2275
2275
|
pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK]
|
2276
|
-
pand
|
2277
|
-
por xmm8,
|
2276
|
+
pand xmm14, xmmword ptr [PBLENDW_0xC0_MASK]
|
2277
|
+
por xmm8, xmm14
|
2278
2278
|
pshufd xmm8, xmm8, 78H
|
2279
2279
|
punpckhdq xmm5, xmm7
|
2280
2280
|
punpckldq xmm6, xmm5
|
@@ -1817,8 +1817,8 @@ _blake3_compress_in_place_sse41 PROC
|
|
1817
1817
|
movzx r8d, r8b
|
1818
1818
|
shl rax, 32
|
1819
1819
|
add r8, rax
|
1820
|
-
|
1821
|
-
|
1820
|
+
movd xmm3, r9
|
1821
|
+
movd xmm4, r8
|
1822
1822
|
punpcklqdq xmm3, xmm4
|
1823
1823
|
movups xmm4, xmmword ptr [rdx]
|
1824
1824
|
movups xmm5, xmmword ptr [rdx+10H]
|
@@ -1938,8 +1938,8 @@ _blake3_compress_xof_sse41 PROC
|
|
1938
1938
|
mov r10, qword ptr [rsp+0A8H]
|
1939
1939
|
shl rax, 32
|
1940
1940
|
add r8, rax
|
1941
|
-
|
1942
|
-
|
1941
|
+
movd xmm3, r9
|
1942
|
+
movd xmm4, r8
|
1943
1943
|
punpcklqdq xmm3, xmm4
|
1944
1944
|
movups xmm4, xmmword ptr [rdx]
|
1945
1945
|
movups xmm5, xmmword ptr [rdx+10H]
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: digest-blake3
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.2.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Will Bryant
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-01-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|