digest-blake3 0.22.1 → 0.34.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/ext/digest/blake3/blake3.c +24 -15
- data/ext/digest/blake3/blake3.h +4 -2
- data/ext/digest/blake3/blake3_avx512_x86-64_unix.S +17 -17
- data/ext/digest/blake3/blake3_avx512_x86-64_windows_gnu.S +17 -17
- data/ext/digest/blake3/blake3_avx512_x86-64_windows_msvc.asm +17 -17
- data/ext/digest/blake3/blake3_dispatch.c +3 -70
- data/ext/digest/blake3/blake3_impl.h +69 -1
- data/ext/digest/blake3/blake3_sse41_x86-64_windows_gnu.S +19 -7
- data/ext/digest/blake3/blake3_sse41_x86-64_windows_msvc.asm +19 -7
- data/lib/digest/blake3/version.rb +1 -1
- metadata +3 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a3db2fab1165a083a1a83d5c656c1c737d53f853de91babcb6c9c0e74ec7e23a
|
4
|
+
data.tar.gz: d4692ef2c6326a70ffa0cad5ed90219daa96c0940c0e9986d9ee7b4469d6b48d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7ef86ba9e54408a68179d43678d7863d1af3d51e6002315d4607e377f2a142d374f4dc0e4d5f8ddde641063d3b5e2f93214fb10274aba849eee757d5f884d854
|
7
|
+
data.tar.gz: e8bf900ad7eece0df62964ca7695af5c8681cbe23c3b7e6cc2af4b0ac2c1d6b3f74de987d7998f4627a5bfcb164c2bafa7ffda8bf8f96be5075ca44761aa2c23
|
data/Gemfile.lock
CHANGED
data/ext/digest/blake3/blake3.c
CHANGED
@@ -84,23 +84,26 @@ INLINE void output_chaining_value(const output_t *self, uint8_t cv[32]) {
|
|
84
84
|
memcpy(cv, cv_words, 32);
|
85
85
|
}
|
86
86
|
|
87
|
-
INLINE void output_root_bytes(const output_t *self, uint8_t *out,
|
87
|
+
INLINE void output_root_bytes(const output_t *self, uint64_t seek, uint8_t *out,
|
88
88
|
size_t out_len) {
|
89
|
-
uint64_t output_block_counter =
|
89
|
+
uint64_t output_block_counter = seek / 64;
|
90
|
+
size_t offset_within_block = seek % 64;
|
90
91
|
uint8_t wide_buf[64];
|
91
92
|
while (out_len > 0) {
|
92
93
|
blake3_compress_xof(self->input_cv, self->block, self->block_len,
|
93
94
|
output_block_counter, self->flags | ROOT, wide_buf);
|
95
|
+
size_t available_bytes = 64 - offset_within_block;
|
94
96
|
size_t memcpy_len;
|
95
|
-
if (out_len >
|
96
|
-
memcpy_len =
|
97
|
+
if (out_len > available_bytes) {
|
98
|
+
memcpy_len = available_bytes;
|
97
99
|
} else {
|
98
100
|
memcpy_len = out_len;
|
99
101
|
}
|
100
|
-
memcpy(out, wide_buf, memcpy_len);
|
102
|
+
memcpy(out, wide_buf + offset_within_block, memcpy_len);
|
101
103
|
out += memcpy_len;
|
102
104
|
out_len -= memcpy_len;
|
103
105
|
output_block_counter += 1;
|
106
|
+
offset_within_block = 0;
|
104
107
|
}
|
105
108
|
}
|
106
109
|
|
@@ -256,10 +259,11 @@ INLINE size_t compress_parents_parallel(const uint8_t *child_chaining_values,
|
|
256
259
|
// Why not just have the caller split the input on the first update(), instead
|
257
260
|
// of implementing this special rule? Because we don't want to limit SIMD or
|
258
261
|
// multi-threading parallelism for that update().
|
259
|
-
size_t blake3_compress_subtree_wide(const uint8_t *input,
|
260
|
-
|
261
|
-
|
262
|
-
|
262
|
+
static size_t blake3_compress_subtree_wide(const uint8_t *input,
|
263
|
+
size_t input_len,
|
264
|
+
const uint32_t key[8],
|
265
|
+
uint64_t chunk_counter,
|
266
|
+
uint8_t flags, uint8_t *out) {
|
263
267
|
// Note that the single chunk case does *not* bump the SIMD degree up to 2
|
264
268
|
// when it is 1. If this implementation adds multi-threading in the future,
|
265
269
|
// this gives us the option of multi-threading even the 2-chunk case, which
|
@@ -425,8 +429,8 @@ INLINE void hasher_merge_cv_stack(blake3_hasher *self, uint64_t total_len) {
|
|
425
429
|
// compress_subtree_to_parent_node(). That function always returns the top
|
426
430
|
// *two* chaining values of the subtree it's compressing. We then do lazy
|
427
431
|
// merging with each of them separately, so that the second CV will always
|
428
|
-
// remain unmerged. (
|
429
|
-
//
|
432
|
+
// remain unmerged. (That also helps us support extendable output when we're
|
433
|
+
// hashing an input all-at-once.)
|
430
434
|
INLINE void hasher_push_cv(blake3_hasher *self, uint8_t new_cv[BLAKE3_OUT_LEN],
|
431
435
|
uint64_t chunk_counter) {
|
432
436
|
hasher_merge_cv_stack(self, chunk_counter);
|
@@ -472,8 +476,8 @@ void blake3_hasher_update(blake3_hasher *self, const void *input,
|
|
472
476
|
|
473
477
|
// Now the chunk_state is clear, and we have more input. If there's more than
|
474
478
|
// a single chunk (so, definitely not the root chunk), hash the largest whole
|
475
|
-
// subtree we can, with the full benefits of SIMD and
|
476
|
-
// parallelism. Two restrictions:
|
479
|
+
// subtree we can, with the full benefits of SIMD (and maybe in the future,
|
480
|
+
// multi-threading) parallelism. Two restrictions:
|
477
481
|
// - The subtree has to be a power-of-2 number of chunks. Only subtrees along
|
478
482
|
// the right edge can be incomplete, and we don't know where the right edge
|
479
483
|
// is going to be until we get to finalize().
|
@@ -546,6 +550,11 @@ void blake3_hasher_update(blake3_hasher *self, const void *input,
|
|
546
550
|
|
547
551
|
void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
|
548
552
|
size_t out_len) {
|
553
|
+
blake3_hasher_finalize_seek(self, 0, out, out_len);
|
554
|
+
}
|
555
|
+
|
556
|
+
void blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek,
|
557
|
+
uint8_t *out, size_t out_len) {
|
549
558
|
// Explicitly checking for zero avoids causing UB by passing a null pointer
|
550
559
|
// to memcpy. This comes up in practice with things like:
|
551
560
|
// std::vector<uint8_t> v;
|
@@ -557,7 +566,7 @@ void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
|
|
557
566
|
// If the subtree stack is empty, then the current chunk is the root.
|
558
567
|
if (self->cv_stack_len == 0) {
|
559
568
|
output_t output = chunk_state_output(&self->chunk);
|
560
|
-
output_root_bytes(&output, out, out_len);
|
569
|
+
output_root_bytes(&output, seek, out, out_len);
|
561
570
|
return;
|
562
571
|
}
|
563
572
|
// If there are any bytes in the chunk state, finalize that chunk and do a
|
@@ -585,5 +594,5 @@ void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
|
|
585
594
|
output_chaining_value(&output, &parent_block[32]);
|
586
595
|
output = parent_output(parent_block, self->key, self->chunk.flags);
|
587
596
|
}
|
588
|
-
output_root_bytes(&output, out, out_len);
|
597
|
+
output_root_bytes(&output, seek, out, out_len);
|
589
598
|
}
|
data/ext/digest/blake3/blake3.h
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
#include <stddef.h>
|
5
5
|
#include <stdint.h>
|
6
6
|
|
7
|
-
#ifdef
|
7
|
+
#ifdef __cplusplus
|
8
8
|
extern "C" {
|
9
9
|
#endif
|
10
10
|
|
@@ -46,8 +46,10 @@ void blake3_hasher_update(blake3_hasher *self, const void *input,
|
|
46
46
|
size_t input_len);
|
47
47
|
void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
|
48
48
|
size_t out_len);
|
49
|
+
void blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek,
|
50
|
+
uint8_t *out, size_t out_len);
|
49
51
|
|
50
|
-
#ifdef
|
52
|
+
#ifdef __cplusplus
|
51
53
|
}
|
52
54
|
#endif
|
53
55
|
|
@@ -82,15 +82,15 @@ blake3_hash_many_avx512:
|
|
82
82
|
mov r14, qword ptr [rdi+0x50]
|
83
83
|
mov r15, qword ptr [rdi+0x58]
|
84
84
|
vmovdqu32 ymm16, ymmword ptr [rdx+r8-0x2*0x20]
|
85
|
-
|
85
|
+
vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01
|
86
86
|
vmovdqu32 ymm17, ymmword ptr [rdx+r9-0x2*0x20]
|
87
|
-
|
87
|
+
vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01
|
88
88
|
vpunpcklqdq zmm8, zmm16, zmm17
|
89
89
|
vpunpckhqdq zmm9, zmm16, zmm17
|
90
90
|
vmovdqu32 ymm18, ymmword ptr [rdx+r10-0x2*0x20]
|
91
|
-
|
91
|
+
vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01
|
92
92
|
vmovdqu32 ymm19, ymmword ptr [rdx+r11-0x2*0x20]
|
93
|
-
|
93
|
+
vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01
|
94
94
|
vpunpcklqdq zmm10, zmm18, zmm19
|
95
95
|
vpunpckhqdq zmm11, zmm18, zmm19
|
96
96
|
mov r8, qword ptr [rdi+0x20]
|
@@ -102,15 +102,15 @@ blake3_hash_many_avx512:
|
|
102
102
|
mov r14, qword ptr [rdi+0x70]
|
103
103
|
mov r15, qword ptr [rdi+0x78]
|
104
104
|
vmovdqu32 ymm16, ymmword ptr [rdx+r8-0x2*0x20]
|
105
|
-
|
105
|
+
vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01
|
106
106
|
vmovdqu32 ymm17, ymmword ptr [rdx+r9-0x2*0x20]
|
107
|
-
|
107
|
+
vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01
|
108
108
|
vpunpcklqdq zmm12, zmm16, zmm17
|
109
109
|
vpunpckhqdq zmm13, zmm16, zmm17
|
110
110
|
vmovdqu32 ymm18, ymmword ptr [rdx+r10-0x2*0x20]
|
111
|
-
|
111
|
+
vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01
|
112
112
|
vmovdqu32 ymm19, ymmword ptr [rdx+r11-0x2*0x20]
|
113
|
-
|
113
|
+
vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01
|
114
114
|
vpunpcklqdq zmm14, zmm18, zmm19
|
115
115
|
vpunpckhqdq zmm15, zmm18, zmm19
|
116
116
|
vmovdqa32 zmm27, zmmword ptr [INDEX0+rip]
|
@@ -144,15 +144,15 @@ blake3_hash_many_avx512:
|
|
144
144
|
mov r14, qword ptr [rdi+0x50]
|
145
145
|
mov r15, qword ptr [rdi+0x58]
|
146
146
|
vmovdqu32 ymm24, ymmword ptr [r8+rdx-0x1*0x20]
|
147
|
-
|
147
|
+
vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01
|
148
148
|
vmovdqu32 ymm25, ymmword ptr [r9+rdx-0x1*0x20]
|
149
|
-
|
149
|
+
vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01
|
150
150
|
vpunpcklqdq zmm8, zmm24, zmm25
|
151
151
|
vpunpckhqdq zmm9, zmm24, zmm25
|
152
152
|
vmovdqu32 ymm24, ymmword ptr [r10+rdx-0x1*0x20]
|
153
|
-
|
153
|
+
vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01
|
154
154
|
vmovdqu32 ymm25, ymmword ptr [r11+rdx-0x1*0x20]
|
155
|
-
|
155
|
+
vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01
|
156
156
|
vpunpcklqdq zmm10, zmm24, zmm25
|
157
157
|
vpunpckhqdq zmm11, zmm24, zmm25
|
158
158
|
prefetcht0 [r8+rdx+0x80]
|
@@ -172,15 +172,15 @@ blake3_hash_many_avx512:
|
|
172
172
|
mov r14, qword ptr [rdi+0x70]
|
173
173
|
mov r15, qword ptr [rdi+0x78]
|
174
174
|
vmovdqu32 ymm24, ymmword ptr [r8+rdx-0x1*0x20]
|
175
|
-
|
175
|
+
vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01
|
176
176
|
vmovdqu32 ymm25, ymmword ptr [r9+rdx-0x1*0x20]
|
177
|
-
|
177
|
+
vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01
|
178
178
|
vpunpcklqdq zmm12, zmm24, zmm25
|
179
179
|
vpunpckhqdq zmm13, zmm24, zmm25
|
180
180
|
vmovdqu32 ymm24, ymmword ptr [r10+rdx-0x1*0x20]
|
181
|
-
|
181
|
+
vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01
|
182
182
|
vmovdqu32 ymm25, ymmword ptr [r11+rdx-0x1*0x20]
|
183
|
-
|
183
|
+
vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01
|
184
184
|
vpunpcklqdq zmm14, zmm24, zmm25
|
185
185
|
vpunpckhqdq zmm15, zmm24, zmm25
|
186
186
|
prefetcht0 [r8+rdx+0x80]
|
@@ -2039,7 +2039,7 @@ blake3_hash_many_avx512:
|
|
2039
2039
|
vpermq ymm14, ymm14, 0xDC
|
2040
2040
|
vpermq ymm15, ymm15, 0xDC
|
2041
2041
|
vpbroadcastd zmm12, dword ptr [BLAKE3_BLOCK_LEN+rip]
|
2042
|
-
|
2042
|
+
vinserti64x4 zmm13, zmm14, ymm15, 0x01
|
2043
2043
|
mov eax, 17476
|
2044
2044
|
kmovw k2, eax
|
2045
2045
|
vpblendmd zmm13 {k2}, zmm13, zmm12
|
@@ -96,15 +96,15 @@ blake3_hash_many_avx512:
|
|
96
96
|
mov r14, qword ptr [rdi+0x50]
|
97
97
|
mov r15, qword ptr [rdi+0x58]
|
98
98
|
vmovdqu32 ymm16, ymmword ptr [rdx+r8-0x2*0x20]
|
99
|
-
|
99
|
+
vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01
|
100
100
|
vmovdqu32 ymm17, ymmword ptr [rdx+r9-0x2*0x20]
|
101
|
-
|
101
|
+
vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01
|
102
102
|
vpunpcklqdq zmm8, zmm16, zmm17
|
103
103
|
vpunpckhqdq zmm9, zmm16, zmm17
|
104
104
|
vmovdqu32 ymm18, ymmword ptr [rdx+r10-0x2*0x20]
|
105
|
-
|
105
|
+
vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01
|
106
106
|
vmovdqu32 ymm19, ymmword ptr [rdx+r11-0x2*0x20]
|
107
|
-
|
107
|
+
vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01
|
108
108
|
vpunpcklqdq zmm10, zmm18, zmm19
|
109
109
|
vpunpckhqdq zmm11, zmm18, zmm19
|
110
110
|
mov r8, qword ptr [rdi+0x20]
|
@@ -116,15 +116,15 @@ blake3_hash_many_avx512:
|
|
116
116
|
mov r14, qword ptr [rdi+0x70]
|
117
117
|
mov r15, qword ptr [rdi+0x78]
|
118
118
|
vmovdqu32 ymm16, ymmword ptr [rdx+r8-0x2*0x20]
|
119
|
-
|
119
|
+
vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01
|
120
120
|
vmovdqu32 ymm17, ymmword ptr [rdx+r9-0x2*0x20]
|
121
|
-
|
121
|
+
vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01
|
122
122
|
vpunpcklqdq zmm12, zmm16, zmm17
|
123
123
|
vpunpckhqdq zmm13, zmm16, zmm17
|
124
124
|
vmovdqu32 ymm18, ymmword ptr [rdx+r10-0x2*0x20]
|
125
|
-
|
125
|
+
vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01
|
126
126
|
vmovdqu32 ymm19, ymmword ptr [rdx+r11-0x2*0x20]
|
127
|
-
|
127
|
+
vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01
|
128
128
|
vpunpcklqdq zmm14, zmm18, zmm19
|
129
129
|
vpunpckhqdq zmm15, zmm18, zmm19
|
130
130
|
vmovdqa32 zmm27, zmmword ptr [INDEX0+rip]
|
@@ -158,15 +158,15 @@ blake3_hash_many_avx512:
|
|
158
158
|
mov r14, qword ptr [rdi+0x50]
|
159
159
|
mov r15, qword ptr [rdi+0x58]
|
160
160
|
vmovdqu32 ymm24, ymmword ptr [r8+rdx-0x1*0x20]
|
161
|
-
|
161
|
+
vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01
|
162
162
|
vmovdqu32 ymm25, ymmword ptr [r9+rdx-0x1*0x20]
|
163
|
-
|
163
|
+
vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01
|
164
164
|
vpunpcklqdq zmm8, zmm24, zmm25
|
165
165
|
vpunpckhqdq zmm9, zmm24, zmm25
|
166
166
|
vmovdqu32 ymm24, ymmword ptr [r10+rdx-0x1*0x20]
|
167
|
-
|
167
|
+
vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01
|
168
168
|
vmovdqu32 ymm25, ymmword ptr [r11+rdx-0x1*0x20]
|
169
|
-
|
169
|
+
vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01
|
170
170
|
vpunpcklqdq zmm10, zmm24, zmm25
|
171
171
|
vpunpckhqdq zmm11, zmm24, zmm25
|
172
172
|
prefetcht0 [r8+rdx+0x80]
|
@@ -186,15 +186,15 @@ blake3_hash_many_avx512:
|
|
186
186
|
mov r14, qword ptr [rdi+0x70]
|
187
187
|
mov r15, qword ptr [rdi+0x78]
|
188
188
|
vmovdqu32 ymm24, ymmword ptr [r8+rdx-0x1*0x20]
|
189
|
-
|
189
|
+
vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01
|
190
190
|
vmovdqu32 ymm25, ymmword ptr [r9+rdx-0x1*0x20]
|
191
|
-
|
191
|
+
vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01
|
192
192
|
vpunpcklqdq zmm12, zmm24, zmm25
|
193
193
|
vpunpckhqdq zmm13, zmm24, zmm25
|
194
194
|
vmovdqu32 ymm24, ymmword ptr [r10+rdx-0x1*0x20]
|
195
|
-
|
195
|
+
vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01
|
196
196
|
vmovdqu32 ymm25, ymmword ptr [r11+rdx-0x1*0x20]
|
197
|
-
|
197
|
+
vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01
|
198
198
|
vpunpcklqdq zmm14, zmm24, zmm25
|
199
199
|
vpunpckhqdq zmm15, zmm24, zmm25
|
200
200
|
prefetcht0 [r8+rdx+0x80]
|
@@ -2065,7 +2065,7 @@ blake3_hash_many_avx512:
|
|
2065
2065
|
vpermq ymm14, ymm14, 0xDC
|
2066
2066
|
vpermq ymm15, ymm15, 0xDC
|
2067
2067
|
vpbroadcastd zmm12, dword ptr [BLAKE3_BLOCK_LEN+rip]
|
2068
|
-
|
2068
|
+
vinserti64x4 zmm13, zmm14, ymm15, 0x01
|
2069
2069
|
mov eax, 17476
|
2070
2070
|
kmovw k2, eax
|
2071
2071
|
vpblendmd zmm13 {k2}, zmm13, zmm12
|
@@ -99,15 +99,15 @@ innerloop16:
|
|
99
99
|
mov r14, qword ptr [rdi+50H]
|
100
100
|
mov r15, qword ptr [rdi+58H]
|
101
101
|
vmovdqu32 ymm16, ymmword ptr [rdx+r8-2H*20H]
|
102
|
-
|
102
|
+
vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-2H*20H], 01H
|
103
103
|
vmovdqu32 ymm17, ymmword ptr [rdx+r9-2H*20H]
|
104
|
-
|
104
|
+
vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-2H*20H], 01H
|
105
105
|
vpunpcklqdq zmm8, zmm16, zmm17
|
106
106
|
vpunpckhqdq zmm9, zmm16, zmm17
|
107
107
|
vmovdqu32 ymm18, ymmword ptr [rdx+r10-2H*20H]
|
108
|
-
|
108
|
+
vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-2H*20H], 01H
|
109
109
|
vmovdqu32 ymm19, ymmword ptr [rdx+r11-2H*20H]
|
110
|
-
|
110
|
+
vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-2H*20H], 01H
|
111
111
|
vpunpcklqdq zmm10, zmm18, zmm19
|
112
112
|
vpunpckhqdq zmm11, zmm18, zmm19
|
113
113
|
mov r8, qword ptr [rdi+20H]
|
@@ -119,15 +119,15 @@ innerloop16:
|
|
119
119
|
mov r14, qword ptr [rdi+70H]
|
120
120
|
mov r15, qword ptr [rdi+78H]
|
121
121
|
vmovdqu32 ymm16, ymmword ptr [rdx+r8-2H*20H]
|
122
|
-
|
122
|
+
vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-2H*20H], 01H
|
123
123
|
vmovdqu32 ymm17, ymmword ptr [rdx+r9-2H*20H]
|
124
|
-
|
124
|
+
vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-2H*20H], 01H
|
125
125
|
vpunpcklqdq zmm12, zmm16, zmm17
|
126
126
|
vpunpckhqdq zmm13, zmm16, zmm17
|
127
127
|
vmovdqu32 ymm18, ymmword ptr [rdx+r10-2H*20H]
|
128
|
-
|
128
|
+
vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-2H*20H], 01H
|
129
129
|
vmovdqu32 ymm19, ymmword ptr [rdx+r11-2H*20H]
|
130
|
-
|
130
|
+
vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-2H*20H], 01H
|
131
131
|
vpunpcklqdq zmm14, zmm18, zmm19
|
132
132
|
vpunpckhqdq zmm15, zmm18, zmm19
|
133
133
|
vmovdqa32 zmm27, zmmword ptr [INDEX0]
|
@@ -161,15 +161,15 @@ innerloop16:
|
|
161
161
|
mov r14, qword ptr [rdi+50H]
|
162
162
|
mov r15, qword ptr [rdi+58H]
|
163
163
|
vmovdqu32 ymm24, ymmword ptr [r8+rdx-1H*20H]
|
164
|
-
|
164
|
+
vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-1H*20H], 01H
|
165
165
|
vmovdqu32 ymm25, ymmword ptr [r9+rdx-1H*20H]
|
166
|
-
|
166
|
+
vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-1H*20H], 01H
|
167
167
|
vpunpcklqdq zmm8, zmm24, zmm25
|
168
168
|
vpunpckhqdq zmm9, zmm24, zmm25
|
169
169
|
vmovdqu32 ymm24, ymmword ptr [r10+rdx-1H*20H]
|
170
|
-
|
170
|
+
vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-1H*20H], 01H
|
171
171
|
vmovdqu32 ymm25, ymmword ptr [r11+rdx-1H*20H]
|
172
|
-
|
172
|
+
vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-1H*20H], 01H
|
173
173
|
vpunpcklqdq zmm10, zmm24, zmm25
|
174
174
|
vpunpckhqdq zmm11, zmm24, zmm25
|
175
175
|
prefetcht0 byte ptr [r8+rdx+80H]
|
@@ -189,15 +189,15 @@ innerloop16:
|
|
189
189
|
mov r14, qword ptr [rdi+70H]
|
190
190
|
mov r15, qword ptr [rdi+78H]
|
191
191
|
vmovdqu32 ymm24, ymmword ptr [r8+rdx-1H*20H]
|
192
|
-
|
192
|
+
vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-1H*20H], 01H
|
193
193
|
vmovdqu32 ymm25, ymmword ptr [r9+rdx-1H*20H]
|
194
|
-
|
194
|
+
vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-1H*20H], 01H
|
195
195
|
vpunpcklqdq zmm12, zmm24, zmm25
|
196
196
|
vpunpckhqdq zmm13, zmm24, zmm25
|
197
197
|
vmovdqu32 ymm24, ymmword ptr [r10+rdx-1H*20H]
|
198
|
-
|
198
|
+
vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-1H*20H], 01H
|
199
199
|
vmovdqu32 ymm25, ymmword ptr [r11+rdx-1H*20H]
|
200
|
-
|
200
|
+
vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-1H*20H], 01H
|
201
201
|
vpunpcklqdq zmm14, zmm24, zmm25
|
202
202
|
vpunpckhqdq zmm15, zmm24, zmm25
|
203
203
|
prefetcht0 byte ptr [r8+rdx+80H]
|
@@ -2073,7 +2073,7 @@ final7blocks:
|
|
2073
2073
|
vpermq ymm14, ymm14, 0DCH
|
2074
2074
|
vpermq ymm15, ymm15, 0DCH
|
2075
2075
|
vpbroadcastd zmm12, dword ptr [BLAKE3_BLOCK_LEN]
|
2076
|
-
|
2076
|
+
vinserti64x4 zmm13, zmm14, ymm15, 01H
|
2077
2077
|
mov eax, 17476
|
2078
2078
|
kmovw k2, eax
|
2079
2079
|
vpblendmd zmm13 {k2}, zmm13, zmm12
|
@@ -14,73 +14,6 @@
|
|
14
14
|
#endif
|
15
15
|
#endif
|
16
16
|
|
17
|
-
// Declarations for implementation-specific functions.
|
18
|
-
void blake3_compress_in_place_portable(uint32_t cv[8],
|
19
|
-
const uint8_t block[BLAKE3_BLOCK_LEN],
|
20
|
-
uint8_t block_len, uint64_t counter,
|
21
|
-
uint8_t flags);
|
22
|
-
|
23
|
-
void blake3_compress_xof_portable(const uint32_t cv[8],
|
24
|
-
const uint8_t block[BLAKE3_BLOCK_LEN],
|
25
|
-
uint8_t block_len, uint64_t counter,
|
26
|
-
uint8_t flags, uint8_t out[64]);
|
27
|
-
|
28
|
-
void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs,
|
29
|
-
size_t blocks, const uint32_t key[8],
|
30
|
-
uint64_t counter, bool increment_counter,
|
31
|
-
uint8_t flags, uint8_t flags_start,
|
32
|
-
uint8_t flags_end, uint8_t *out);
|
33
|
-
|
34
|
-
#if defined(IS_X86)
|
35
|
-
#if !defined(BLAKE3_NO_SSE41)
|
36
|
-
void blake3_compress_in_place_sse41(uint32_t cv[8],
|
37
|
-
const uint8_t block[BLAKE3_BLOCK_LEN],
|
38
|
-
uint8_t block_len, uint64_t counter,
|
39
|
-
uint8_t flags);
|
40
|
-
void blake3_compress_xof_sse41(const uint32_t cv[8],
|
41
|
-
const uint8_t block[BLAKE3_BLOCK_LEN],
|
42
|
-
uint8_t block_len, uint64_t counter,
|
43
|
-
uint8_t flags, uint8_t out[64]);
|
44
|
-
void blake3_hash_many_sse41(const uint8_t *const *inputs, size_t num_inputs,
|
45
|
-
size_t blocks, const uint32_t key[8],
|
46
|
-
uint64_t counter, bool increment_counter,
|
47
|
-
uint8_t flags, uint8_t flags_start,
|
48
|
-
uint8_t flags_end, uint8_t *out);
|
49
|
-
#endif
|
50
|
-
#if !defined(BLAKE3_NO_AVX2)
|
51
|
-
void blake3_hash_many_avx2(const uint8_t *const *inputs, size_t num_inputs,
|
52
|
-
size_t blocks, const uint32_t key[8],
|
53
|
-
uint64_t counter, bool increment_counter,
|
54
|
-
uint8_t flags, uint8_t flags_start,
|
55
|
-
uint8_t flags_end, uint8_t *out);
|
56
|
-
#endif
|
57
|
-
#if !defined(BLAKE3_NO_AVX512)
|
58
|
-
void blake3_compress_in_place_avx512(uint32_t cv[8],
|
59
|
-
const uint8_t block[BLAKE3_BLOCK_LEN],
|
60
|
-
uint8_t block_len, uint64_t counter,
|
61
|
-
uint8_t flags);
|
62
|
-
|
63
|
-
void blake3_compress_xof_avx512(const uint32_t cv[8],
|
64
|
-
const uint8_t block[BLAKE3_BLOCK_LEN],
|
65
|
-
uint8_t block_len, uint64_t counter,
|
66
|
-
uint8_t flags, uint8_t out[64]);
|
67
|
-
|
68
|
-
void blake3_hash_many_avx512(const uint8_t *const *inputs, size_t num_inputs,
|
69
|
-
size_t blocks, const uint32_t key[8],
|
70
|
-
uint64_t counter, bool increment_counter,
|
71
|
-
uint8_t flags, uint8_t flags_start,
|
72
|
-
uint8_t flags_end, uint8_t *out);
|
73
|
-
#endif
|
74
|
-
#endif
|
75
|
-
|
76
|
-
#if defined(BLAKE3_USE_NEON)
|
77
|
-
void blake3_hash_many_neon(const uint8_t *const *inputs, size_t num_inputs,
|
78
|
-
size_t blocks, const uint32_t key[8],
|
79
|
-
uint64_t counter, bool increment_counter,
|
80
|
-
uint8_t flags, uint8_t flags_start,
|
81
|
-
uint8_t flags_end, uint8_t *out);
|
82
|
-
#endif
|
83
|
-
|
84
17
|
#if defined(IS_X86)
|
85
18
|
static uint64_t xgetbv() {
|
86
19
|
#if defined(_MSC_VER)
|
@@ -249,7 +182,7 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
|
|
249
182
|
#if defined(IS_X86)
|
250
183
|
const enum cpu_feature features = get_cpu_features();
|
251
184
|
#if !defined(BLAKE3_NO_AVX512)
|
252
|
-
if (features & AVX512F) {
|
185
|
+
if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) {
|
253
186
|
blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter,
|
254
187
|
increment_counter, flags, flags_start, flags_end,
|
255
188
|
out);
|
@@ -286,11 +219,11 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
|
|
286
219
|
}
|
287
220
|
|
288
221
|
// The dynamically detected SIMD degree of the current platform.
|
289
|
-
size_t blake3_simd_degree() {
|
222
|
+
size_t blake3_simd_degree(void) {
|
290
223
|
#if defined(IS_X86)
|
291
224
|
const enum cpu_feature features = get_cpu_features();
|
292
225
|
#if !defined(BLAKE3_NO_AVX512)
|
293
|
-
if (features & AVX512F) {
|
226
|
+
if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) {
|
294
227
|
return 16;
|
295
228
|
}
|
296
229
|
#endif
|
@@ -161,7 +161,75 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
|
|
161
161
|
bool increment_counter, uint8_t flags,
|
162
162
|
uint8_t flags_start, uint8_t flags_end, uint8_t *out);
|
163
163
|
|
164
|
-
size_t blake3_simd_degree();
|
164
|
+
size_t blake3_simd_degree(void);
|
165
|
+
|
166
|
+
|
167
|
+
// Declarations for implementation-specific functions.
|
168
|
+
void blake3_compress_in_place_portable(uint32_t cv[8],
|
169
|
+
const uint8_t block[BLAKE3_BLOCK_LEN],
|
170
|
+
uint8_t block_len, uint64_t counter,
|
171
|
+
uint8_t flags);
|
172
|
+
|
173
|
+
void blake3_compress_xof_portable(const uint32_t cv[8],
|
174
|
+
const uint8_t block[BLAKE3_BLOCK_LEN],
|
175
|
+
uint8_t block_len, uint64_t counter,
|
176
|
+
uint8_t flags, uint8_t out[64]);
|
177
|
+
|
178
|
+
void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs,
|
179
|
+
size_t blocks, const uint32_t key[8],
|
180
|
+
uint64_t counter, bool increment_counter,
|
181
|
+
uint8_t flags, uint8_t flags_start,
|
182
|
+
uint8_t flags_end, uint8_t *out);
|
183
|
+
|
184
|
+
#if defined(IS_X86)
|
185
|
+
#if !defined(BLAKE3_NO_SSE41)
|
186
|
+
void blake3_compress_in_place_sse41(uint32_t cv[8],
|
187
|
+
const uint8_t block[BLAKE3_BLOCK_LEN],
|
188
|
+
uint8_t block_len, uint64_t counter,
|
189
|
+
uint8_t flags);
|
190
|
+
void blake3_compress_xof_sse41(const uint32_t cv[8],
|
191
|
+
const uint8_t block[BLAKE3_BLOCK_LEN],
|
192
|
+
uint8_t block_len, uint64_t counter,
|
193
|
+
uint8_t flags, uint8_t out[64]);
|
194
|
+
void blake3_hash_many_sse41(const uint8_t *const *inputs, size_t num_inputs,
|
195
|
+
size_t blocks, const uint32_t key[8],
|
196
|
+
uint64_t counter, bool increment_counter,
|
197
|
+
uint8_t flags, uint8_t flags_start,
|
198
|
+
uint8_t flags_end, uint8_t *out);
|
199
|
+
#endif
|
200
|
+
#if !defined(BLAKE3_NO_AVX2)
|
201
|
+
void blake3_hash_many_avx2(const uint8_t *const *inputs, size_t num_inputs,
|
202
|
+
size_t blocks, const uint32_t key[8],
|
203
|
+
uint64_t counter, bool increment_counter,
|
204
|
+
uint8_t flags, uint8_t flags_start,
|
205
|
+
uint8_t flags_end, uint8_t *out);
|
206
|
+
#endif
|
207
|
+
#if !defined(BLAKE3_NO_AVX512)
|
208
|
+
void blake3_compress_in_place_avx512(uint32_t cv[8],
|
209
|
+
const uint8_t block[BLAKE3_BLOCK_LEN],
|
210
|
+
uint8_t block_len, uint64_t counter,
|
211
|
+
uint8_t flags);
|
212
|
+
|
213
|
+
void blake3_compress_xof_avx512(const uint32_t cv[8],
|
214
|
+
const uint8_t block[BLAKE3_BLOCK_LEN],
|
215
|
+
uint8_t block_len, uint64_t counter,
|
216
|
+
uint8_t flags, uint8_t out[64]);
|
217
|
+
|
218
|
+
void blake3_hash_many_avx512(const uint8_t *const *inputs, size_t num_inputs,
|
219
|
+
size_t blocks, const uint32_t key[8],
|
220
|
+
uint64_t counter, bool increment_counter,
|
221
|
+
uint8_t flags, uint8_t flags_start,
|
222
|
+
uint8_t flags_end, uint8_t *out);
|
223
|
+
#endif
|
224
|
+
#endif
|
225
|
+
|
226
|
+
#if defined(BLAKE3_USE_NEON)
|
227
|
+
void blake3_hash_many_neon(const uint8_t *const *inputs, size_t num_inputs,
|
228
|
+
size_t blocks, const uint32_t key[8],
|
229
|
+
uint64_t counter, bool increment_counter,
|
230
|
+
uint8_t flags, uint8_t flags_start,
|
231
|
+
uint8_t flags_end, uint8_t *out);
|
232
|
+
#endif
|
165
233
|
|
166
234
|
|
167
235
|
#endif /* BLAKE3_IMPL_H */
|
@@ -1800,15 +1800,18 @@ blake3_hash_many_sse41:
|
|
1800
1800
|
.p2align 6
|
1801
1801
|
blake3_compress_in_place_sse41:
|
1802
1802
|
_blake3_compress_in_place_sse41:
|
1803
|
-
sub rsp,
|
1803
|
+
sub rsp, 120
|
1804
1804
|
movdqa xmmword ptr [rsp], xmm6
|
1805
1805
|
movdqa xmmword ptr [rsp+0x10], xmm7
|
1806
1806
|
movdqa xmmword ptr [rsp+0x20], xmm8
|
1807
1807
|
movdqa xmmword ptr [rsp+0x30], xmm9
|
1808
|
+
movdqa xmmword ptr [rsp+0x40], xmm11
|
1809
|
+
movdqa xmmword ptr [rsp+0x50], xmm14
|
1810
|
+
movdqa xmmword ptr [rsp+0x60], xmm15
|
1808
1811
|
movups xmm0, xmmword ptr [rcx]
|
1809
1812
|
movups xmm1, xmmword ptr [rcx+0x10]
|
1810
1813
|
movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
|
1811
|
-
movzx eax, byte ptr [rsp+
|
1814
|
+
movzx eax, byte ptr [rsp+0xA0]
|
1812
1815
|
movzx r8d, r8b
|
1813
1816
|
shl rax, 32
|
1814
1817
|
add r8, rax
|
@@ -1906,24 +1909,30 @@ _blake3_compress_in_place_sse41:
|
|
1906
1909
|
movdqa xmm7, xmmword ptr [rsp+0x10]
|
1907
1910
|
movdqa xmm8, xmmword ptr [rsp+0x20]
|
1908
1911
|
movdqa xmm9, xmmword ptr [rsp+0x30]
|
1909
|
-
|
1912
|
+
movdqa xmm11, xmmword ptr [rsp+0x40]
|
1913
|
+
movdqa xmm14, xmmword ptr [rsp+0x50]
|
1914
|
+
movdqa xmm15, xmmword ptr [rsp+0x60]
|
1915
|
+
add rsp, 120
|
1910
1916
|
ret
|
1911
1917
|
|
1912
1918
|
|
1913
1919
|
.p2align 6
|
1914
1920
|
_blake3_compress_xof_sse41:
|
1915
1921
|
blake3_compress_xof_sse41:
|
1916
|
-
sub rsp,
|
1922
|
+
sub rsp, 120
|
1917
1923
|
movdqa xmmword ptr [rsp], xmm6
|
1918
1924
|
movdqa xmmword ptr [rsp+0x10], xmm7
|
1919
1925
|
movdqa xmmword ptr [rsp+0x20], xmm8
|
1920
1926
|
movdqa xmmword ptr [rsp+0x30], xmm9
|
1927
|
+
movdqa xmmword ptr [rsp+0x40], xmm11
|
1928
|
+
movdqa xmmword ptr [rsp+0x50], xmm14
|
1929
|
+
movdqa xmmword ptr [rsp+0x60], xmm15
|
1921
1930
|
movups xmm0, xmmword ptr [rcx]
|
1922
1931
|
movups xmm1, xmmword ptr [rcx+0x10]
|
1923
1932
|
movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
|
1924
|
-
movzx eax, byte ptr [rsp+
|
1933
|
+
movzx eax, byte ptr [rsp+0xA0]
|
1925
1934
|
movzx r8d, r8b
|
1926
|
-
mov r10, qword ptr [rsp+
|
1935
|
+
mov r10, qword ptr [rsp+0xA8]
|
1927
1936
|
shl rax, 32
|
1928
1937
|
add r8, rax
|
1929
1938
|
movq xmm3, r9
|
@@ -2026,7 +2035,10 @@ blake3_compress_xof_sse41:
|
|
2026
2035
|
movdqa xmm7, xmmword ptr [rsp+0x10]
|
2027
2036
|
movdqa xmm8, xmmword ptr [rsp+0x20]
|
2028
2037
|
movdqa xmm9, xmmword ptr [rsp+0x30]
|
2029
|
-
|
2038
|
+
movdqa xmm11, xmmword ptr [rsp+0x40]
|
2039
|
+
movdqa xmm14, xmmword ptr [rsp+0x50]
|
2040
|
+
movdqa xmm15, xmmword ptr [rsp+0x60]
|
2041
|
+
add rsp, 120
|
2030
2042
|
ret
|
2031
2043
|
|
2032
2044
|
|
@@ -1802,15 +1802,18 @@ blake3_hash_many_sse41 ENDP
|
|
1802
1802
|
|
1803
1803
|
blake3_compress_in_place_sse41 PROC
|
1804
1804
|
_blake3_compress_in_place_sse41 PROC
|
1805
|
-
sub rsp,
|
1805
|
+
sub rsp, 120
|
1806
1806
|
movdqa xmmword ptr [rsp], xmm6
|
1807
1807
|
movdqa xmmword ptr [rsp+10H], xmm7
|
1808
1808
|
movdqa xmmword ptr [rsp+20H], xmm8
|
1809
1809
|
movdqa xmmword ptr [rsp+30H], xmm9
|
1810
|
+
movdqa xmmword ptr [rsp+40H], xmm11
|
1811
|
+
movdqa xmmword ptr [rsp+50H], xmm14
|
1812
|
+
movdqa xmmword ptr [rsp+60H], xmm15
|
1810
1813
|
movups xmm0, xmmword ptr [rcx]
|
1811
1814
|
movups xmm1, xmmword ptr [rcx+10H]
|
1812
1815
|
movaps xmm2, xmmword ptr [BLAKE3_IV]
|
1813
|
-
movzx eax, byte ptr [rsp+
|
1816
|
+
movzx eax, byte ptr [rsp+0A0H]
|
1814
1817
|
movzx r8d, r8b
|
1815
1818
|
shl rax, 32
|
1816
1819
|
add r8, rax
|
@@ -1908,7 +1911,10 @@ _blake3_compress_in_place_sse41 PROC
|
|
1908
1911
|
movdqa xmm7, xmmword ptr [rsp+10H]
|
1909
1912
|
movdqa xmm8, xmmword ptr [rsp+20H]
|
1910
1913
|
movdqa xmm9, xmmword ptr [rsp+30H]
|
1911
|
-
|
1914
|
+
movdqa xmm11, xmmword ptr [rsp+40H]
|
1915
|
+
movdqa xmm14, xmmword ptr [rsp+50H]
|
1916
|
+
movdqa xmm15, xmmword ptr [rsp+60H]
|
1917
|
+
add rsp, 120
|
1912
1918
|
ret
|
1913
1919
|
_blake3_compress_in_place_sse41 ENDP
|
1914
1920
|
blake3_compress_in_place_sse41 ENDP
|
@@ -1916,17 +1922,20 @@ blake3_compress_in_place_sse41 ENDP
|
|
1916
1922
|
ALIGN 16
|
1917
1923
|
blake3_compress_xof_sse41 PROC
|
1918
1924
|
_blake3_compress_xof_sse41 PROC
|
1919
|
-
sub rsp,
|
1925
|
+
sub rsp, 120
|
1920
1926
|
movdqa xmmword ptr [rsp], xmm6
|
1921
1927
|
movdqa xmmword ptr [rsp+10H], xmm7
|
1922
1928
|
movdqa xmmword ptr [rsp+20H], xmm8
|
1923
1929
|
movdqa xmmword ptr [rsp+30H], xmm9
|
1930
|
+
movdqa xmmword ptr [rsp+40H], xmm11
|
1931
|
+
movdqa xmmword ptr [rsp+50H], xmm14
|
1932
|
+
movdqa xmmword ptr [rsp+60H], xmm15
|
1924
1933
|
movups xmm0, xmmword ptr [rcx]
|
1925
1934
|
movups xmm1, xmmword ptr [rcx+10H]
|
1926
1935
|
movaps xmm2, xmmword ptr [BLAKE3_IV]
|
1927
|
-
movzx eax, byte ptr [rsp+
|
1936
|
+
movzx eax, byte ptr [rsp+0A0H]
|
1928
1937
|
movzx r8d, r8b
|
1929
|
-
mov r10, qword ptr [rsp+
|
1938
|
+
mov r10, qword ptr [rsp+0A8H]
|
1930
1939
|
shl rax, 32
|
1931
1940
|
add r8, rax
|
1932
1941
|
movq xmm3, r9
|
@@ -2029,7 +2038,10 @@ _blake3_compress_xof_sse41 PROC
|
|
2029
2038
|
movdqa xmm7, xmmword ptr [rsp+10H]
|
2030
2039
|
movdqa xmm8, xmmword ptr [rsp+20H]
|
2031
2040
|
movdqa xmm9, xmmword ptr [rsp+30H]
|
2032
|
-
|
2041
|
+
movdqa xmm11, xmmword ptr [rsp+40H]
|
2042
|
+
movdqa xmm14, xmmword ptr [rsp+50H]
|
2043
|
+
movdqa xmm15, xmmword ptr [rsp+60H]
|
2044
|
+
add rsp, 120
|
2033
2045
|
ret
|
2034
2046
|
_blake3_compress_xof_sse41 ENDP
|
2035
2047
|
blake3_compress_xof_sse41 ENDP
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: digest-blake3
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.34.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Will Bryant
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-06-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -112,8 +112,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
112
112
|
- !ruby/object:Gem::Version
|
113
113
|
version: '0'
|
114
114
|
requirements: []
|
115
|
-
|
116
|
-
rubygems_version: 2.7.6
|
115
|
+
rubygems_version: 3.0.3
|
117
116
|
signing_key:
|
118
117
|
specification_version: 4
|
119
118
|
summary: BLAKE3 for Ruby
|