digest-blake3 0.22.1 → 0.34.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/ext/digest/blake3/blake3.c +24 -15
- data/ext/digest/blake3/blake3.h +4 -2
- data/ext/digest/blake3/blake3_avx512_x86-64_unix.S +17 -17
- data/ext/digest/blake3/blake3_avx512_x86-64_windows_gnu.S +17 -17
- data/ext/digest/blake3/blake3_avx512_x86-64_windows_msvc.asm +17 -17
- data/ext/digest/blake3/blake3_dispatch.c +3 -70
- data/ext/digest/blake3/blake3_impl.h +69 -1
- data/ext/digest/blake3/blake3_sse41_x86-64_windows_gnu.S +19 -7
- data/ext/digest/blake3/blake3_sse41_x86-64_windows_msvc.asm +19 -7
- data/lib/digest/blake3/version.rb +1 -1
- metadata +3 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a3db2fab1165a083a1a83d5c656c1c737d53f853de91babcb6c9c0e74ec7e23a
|
4
|
+
data.tar.gz: d4692ef2c6326a70ffa0cad5ed90219daa96c0940c0e9986d9ee7b4469d6b48d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7ef86ba9e54408a68179d43678d7863d1af3d51e6002315d4607e377f2a142d374f4dc0e4d5f8ddde641063d3b5e2f93214fb10274aba849eee757d5f884d854
|
7
|
+
data.tar.gz: e8bf900ad7eece0df62964ca7695af5c8681cbe23c3b7e6cc2af4b0ac2c1d6b3f74de987d7998f4627a5bfcb164c2bafa7ffda8bf8f96be5075ca44761aa2c23
|
data/Gemfile.lock
CHANGED
data/ext/digest/blake3/blake3.c
CHANGED
@@ -84,23 +84,26 @@ INLINE void output_chaining_value(const output_t *self, uint8_t cv[32]) {
|
|
84
84
|
memcpy(cv, cv_words, 32);
|
85
85
|
}
|
86
86
|
|
87
|
-
INLINE void output_root_bytes(const output_t *self, uint8_t *out,
|
87
|
+
INLINE void output_root_bytes(const output_t *self, uint64_t seek, uint8_t *out,
|
88
88
|
size_t out_len) {
|
89
|
-
uint64_t output_block_counter =
|
89
|
+
uint64_t output_block_counter = seek / 64;
|
90
|
+
size_t offset_within_block = seek % 64;
|
90
91
|
uint8_t wide_buf[64];
|
91
92
|
while (out_len > 0) {
|
92
93
|
blake3_compress_xof(self->input_cv, self->block, self->block_len,
|
93
94
|
output_block_counter, self->flags | ROOT, wide_buf);
|
95
|
+
size_t available_bytes = 64 - offset_within_block;
|
94
96
|
size_t memcpy_len;
|
95
|
-
if (out_len >
|
96
|
-
memcpy_len =
|
97
|
+
if (out_len > available_bytes) {
|
98
|
+
memcpy_len = available_bytes;
|
97
99
|
} else {
|
98
100
|
memcpy_len = out_len;
|
99
101
|
}
|
100
|
-
memcpy(out, wide_buf, memcpy_len);
|
102
|
+
memcpy(out, wide_buf + offset_within_block, memcpy_len);
|
101
103
|
out += memcpy_len;
|
102
104
|
out_len -= memcpy_len;
|
103
105
|
output_block_counter += 1;
|
106
|
+
offset_within_block = 0;
|
104
107
|
}
|
105
108
|
}
|
106
109
|
|
@@ -256,10 +259,11 @@ INLINE size_t compress_parents_parallel(const uint8_t *child_chaining_values,
|
|
256
259
|
// Why not just have the caller split the input on the first update(), instead
|
257
260
|
// of implementing this special rule? Because we don't want to limit SIMD or
|
258
261
|
// multi-threading parallelism for that update().
|
259
|
-
size_t blake3_compress_subtree_wide(const uint8_t *input,
|
260
|
-
|
261
|
-
|
262
|
-
|
262
|
+
static size_t blake3_compress_subtree_wide(const uint8_t *input,
|
263
|
+
size_t input_len,
|
264
|
+
const uint32_t key[8],
|
265
|
+
uint64_t chunk_counter,
|
266
|
+
uint8_t flags, uint8_t *out) {
|
263
267
|
// Note that the single chunk case does *not* bump the SIMD degree up to 2
|
264
268
|
// when it is 1. If this implementation adds multi-threading in the future,
|
265
269
|
// this gives us the option of multi-threading even the 2-chunk case, which
|
@@ -425,8 +429,8 @@ INLINE void hasher_merge_cv_stack(blake3_hasher *self, uint64_t total_len) {
|
|
425
429
|
// compress_subtree_to_parent_node(). That function always returns the top
|
426
430
|
// *two* chaining values of the subtree it's compressing. We then do lazy
|
427
431
|
// merging with each of them separately, so that the second CV will always
|
428
|
-
// remain unmerged. (
|
429
|
-
//
|
432
|
+
// remain unmerged. (That also helps us support extendable output when we're
|
433
|
+
// hashing an input all-at-once.)
|
430
434
|
INLINE void hasher_push_cv(blake3_hasher *self, uint8_t new_cv[BLAKE3_OUT_LEN],
|
431
435
|
uint64_t chunk_counter) {
|
432
436
|
hasher_merge_cv_stack(self, chunk_counter);
|
@@ -472,8 +476,8 @@ void blake3_hasher_update(blake3_hasher *self, const void *input,
|
|
472
476
|
|
473
477
|
// Now the chunk_state is clear, and we have more input. If there's more than
|
474
478
|
// a single chunk (so, definitely not the root chunk), hash the largest whole
|
475
|
-
// subtree we can, with the full benefits of SIMD and
|
476
|
-
// parallelism. Two restrictions:
|
479
|
+
// subtree we can, with the full benefits of SIMD (and maybe in the future,
|
480
|
+
// multi-threading) parallelism. Two restrictions:
|
477
481
|
// - The subtree has to be a power-of-2 number of chunks. Only subtrees along
|
478
482
|
// the right edge can be incomplete, and we don't know where the right edge
|
479
483
|
// is going to be until we get to finalize().
|
@@ -546,6 +550,11 @@ void blake3_hasher_update(blake3_hasher *self, const void *input,
|
|
546
550
|
|
547
551
|
void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
|
548
552
|
size_t out_len) {
|
553
|
+
blake3_hasher_finalize_seek(self, 0, out, out_len);
|
554
|
+
}
|
555
|
+
|
556
|
+
void blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek,
|
557
|
+
uint8_t *out, size_t out_len) {
|
549
558
|
// Explicitly checking for zero avoids causing UB by passing a null pointer
|
550
559
|
// to memcpy. This comes up in practice with things like:
|
551
560
|
// std::vector<uint8_t> v;
|
@@ -557,7 +566,7 @@ void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
|
|
557
566
|
// If the subtree stack is empty, then the current chunk is the root.
|
558
567
|
if (self->cv_stack_len == 0) {
|
559
568
|
output_t output = chunk_state_output(&self->chunk);
|
560
|
-
output_root_bytes(&output, out, out_len);
|
569
|
+
output_root_bytes(&output, seek, out, out_len);
|
561
570
|
return;
|
562
571
|
}
|
563
572
|
// If there are any bytes in the chunk state, finalize that chunk and do a
|
@@ -585,5 +594,5 @@ void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
|
|
585
594
|
output_chaining_value(&output, &parent_block[32]);
|
586
595
|
output = parent_output(parent_block, self->key, self->chunk.flags);
|
587
596
|
}
|
588
|
-
output_root_bytes(&output, out, out_len);
|
597
|
+
output_root_bytes(&output, seek, out, out_len);
|
589
598
|
}
|
data/ext/digest/blake3/blake3.h
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
#include <stddef.h>
|
5
5
|
#include <stdint.h>
|
6
6
|
|
7
|
-
#ifdef
|
7
|
+
#ifdef __cplusplus
|
8
8
|
extern "C" {
|
9
9
|
#endif
|
10
10
|
|
@@ -46,8 +46,10 @@ void blake3_hasher_update(blake3_hasher *self, const void *input,
|
|
46
46
|
size_t input_len);
|
47
47
|
void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
|
48
48
|
size_t out_len);
|
49
|
+
void blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek,
|
50
|
+
uint8_t *out, size_t out_len);
|
49
51
|
|
50
|
-
#ifdef
|
52
|
+
#ifdef __cplusplus
|
51
53
|
}
|
52
54
|
#endif
|
53
55
|
|
@@ -82,15 +82,15 @@ blake3_hash_many_avx512:
|
|
82
82
|
mov r14, qword ptr [rdi+0x50]
|
83
83
|
mov r15, qword ptr [rdi+0x58]
|
84
84
|
vmovdqu32 ymm16, ymmword ptr [rdx+r8-0x2*0x20]
|
85
|
-
|
85
|
+
vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01
|
86
86
|
vmovdqu32 ymm17, ymmword ptr [rdx+r9-0x2*0x20]
|
87
|
-
|
87
|
+
vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01
|
88
88
|
vpunpcklqdq zmm8, zmm16, zmm17
|
89
89
|
vpunpckhqdq zmm9, zmm16, zmm17
|
90
90
|
vmovdqu32 ymm18, ymmword ptr [rdx+r10-0x2*0x20]
|
91
|
-
|
91
|
+
vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01
|
92
92
|
vmovdqu32 ymm19, ymmword ptr [rdx+r11-0x2*0x20]
|
93
|
-
|
93
|
+
vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01
|
94
94
|
vpunpcklqdq zmm10, zmm18, zmm19
|
95
95
|
vpunpckhqdq zmm11, zmm18, zmm19
|
96
96
|
mov r8, qword ptr [rdi+0x20]
|
@@ -102,15 +102,15 @@ blake3_hash_many_avx512:
|
|
102
102
|
mov r14, qword ptr [rdi+0x70]
|
103
103
|
mov r15, qword ptr [rdi+0x78]
|
104
104
|
vmovdqu32 ymm16, ymmword ptr [rdx+r8-0x2*0x20]
|
105
|
-
|
105
|
+
vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01
|
106
106
|
vmovdqu32 ymm17, ymmword ptr [rdx+r9-0x2*0x20]
|
107
|
-
|
107
|
+
vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01
|
108
108
|
vpunpcklqdq zmm12, zmm16, zmm17
|
109
109
|
vpunpckhqdq zmm13, zmm16, zmm17
|
110
110
|
vmovdqu32 ymm18, ymmword ptr [rdx+r10-0x2*0x20]
|
111
|
-
|
111
|
+
vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01
|
112
112
|
vmovdqu32 ymm19, ymmword ptr [rdx+r11-0x2*0x20]
|
113
|
-
|
113
|
+
vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01
|
114
114
|
vpunpcklqdq zmm14, zmm18, zmm19
|
115
115
|
vpunpckhqdq zmm15, zmm18, zmm19
|
116
116
|
vmovdqa32 zmm27, zmmword ptr [INDEX0+rip]
|
@@ -144,15 +144,15 @@ blake3_hash_many_avx512:
|
|
144
144
|
mov r14, qword ptr [rdi+0x50]
|
145
145
|
mov r15, qword ptr [rdi+0x58]
|
146
146
|
vmovdqu32 ymm24, ymmword ptr [r8+rdx-0x1*0x20]
|
147
|
-
|
147
|
+
vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01
|
148
148
|
vmovdqu32 ymm25, ymmword ptr [r9+rdx-0x1*0x20]
|
149
|
-
|
149
|
+
vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01
|
150
150
|
vpunpcklqdq zmm8, zmm24, zmm25
|
151
151
|
vpunpckhqdq zmm9, zmm24, zmm25
|
152
152
|
vmovdqu32 ymm24, ymmword ptr [r10+rdx-0x1*0x20]
|
153
|
-
|
153
|
+
vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01
|
154
154
|
vmovdqu32 ymm25, ymmword ptr [r11+rdx-0x1*0x20]
|
155
|
-
|
155
|
+
vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01
|
156
156
|
vpunpcklqdq zmm10, zmm24, zmm25
|
157
157
|
vpunpckhqdq zmm11, zmm24, zmm25
|
158
158
|
prefetcht0 [r8+rdx+0x80]
|
@@ -172,15 +172,15 @@ blake3_hash_many_avx512:
|
|
172
172
|
mov r14, qword ptr [rdi+0x70]
|
173
173
|
mov r15, qword ptr [rdi+0x78]
|
174
174
|
vmovdqu32 ymm24, ymmword ptr [r8+rdx-0x1*0x20]
|
175
|
-
|
175
|
+
vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01
|
176
176
|
vmovdqu32 ymm25, ymmword ptr [r9+rdx-0x1*0x20]
|
177
|
-
|
177
|
+
vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01
|
178
178
|
vpunpcklqdq zmm12, zmm24, zmm25
|
179
179
|
vpunpckhqdq zmm13, zmm24, zmm25
|
180
180
|
vmovdqu32 ymm24, ymmword ptr [r10+rdx-0x1*0x20]
|
181
|
-
|
181
|
+
vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01
|
182
182
|
vmovdqu32 ymm25, ymmword ptr [r11+rdx-0x1*0x20]
|
183
|
-
|
183
|
+
vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01
|
184
184
|
vpunpcklqdq zmm14, zmm24, zmm25
|
185
185
|
vpunpckhqdq zmm15, zmm24, zmm25
|
186
186
|
prefetcht0 [r8+rdx+0x80]
|
@@ -2039,7 +2039,7 @@ blake3_hash_many_avx512:
|
|
2039
2039
|
vpermq ymm14, ymm14, 0xDC
|
2040
2040
|
vpermq ymm15, ymm15, 0xDC
|
2041
2041
|
vpbroadcastd zmm12, dword ptr [BLAKE3_BLOCK_LEN+rip]
|
2042
|
-
|
2042
|
+
vinserti64x4 zmm13, zmm14, ymm15, 0x01
|
2043
2043
|
mov eax, 17476
|
2044
2044
|
kmovw k2, eax
|
2045
2045
|
vpblendmd zmm13 {k2}, zmm13, zmm12
|
@@ -96,15 +96,15 @@ blake3_hash_many_avx512:
|
|
96
96
|
mov r14, qword ptr [rdi+0x50]
|
97
97
|
mov r15, qword ptr [rdi+0x58]
|
98
98
|
vmovdqu32 ymm16, ymmword ptr [rdx+r8-0x2*0x20]
|
99
|
-
|
99
|
+
vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01
|
100
100
|
vmovdqu32 ymm17, ymmword ptr [rdx+r9-0x2*0x20]
|
101
|
-
|
101
|
+
vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01
|
102
102
|
vpunpcklqdq zmm8, zmm16, zmm17
|
103
103
|
vpunpckhqdq zmm9, zmm16, zmm17
|
104
104
|
vmovdqu32 ymm18, ymmword ptr [rdx+r10-0x2*0x20]
|
105
|
-
|
105
|
+
vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01
|
106
106
|
vmovdqu32 ymm19, ymmword ptr [rdx+r11-0x2*0x20]
|
107
|
-
|
107
|
+
vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01
|
108
108
|
vpunpcklqdq zmm10, zmm18, zmm19
|
109
109
|
vpunpckhqdq zmm11, zmm18, zmm19
|
110
110
|
mov r8, qword ptr [rdi+0x20]
|
@@ -116,15 +116,15 @@ blake3_hash_many_avx512:
|
|
116
116
|
mov r14, qword ptr [rdi+0x70]
|
117
117
|
mov r15, qword ptr [rdi+0x78]
|
118
118
|
vmovdqu32 ymm16, ymmword ptr [rdx+r8-0x2*0x20]
|
119
|
-
|
119
|
+
vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01
|
120
120
|
vmovdqu32 ymm17, ymmword ptr [rdx+r9-0x2*0x20]
|
121
|
-
|
121
|
+
vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01
|
122
122
|
vpunpcklqdq zmm12, zmm16, zmm17
|
123
123
|
vpunpckhqdq zmm13, zmm16, zmm17
|
124
124
|
vmovdqu32 ymm18, ymmword ptr [rdx+r10-0x2*0x20]
|
125
|
-
|
125
|
+
vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01
|
126
126
|
vmovdqu32 ymm19, ymmword ptr [rdx+r11-0x2*0x20]
|
127
|
-
|
127
|
+
vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01
|
128
128
|
vpunpcklqdq zmm14, zmm18, zmm19
|
129
129
|
vpunpckhqdq zmm15, zmm18, zmm19
|
130
130
|
vmovdqa32 zmm27, zmmword ptr [INDEX0+rip]
|
@@ -158,15 +158,15 @@ blake3_hash_many_avx512:
|
|
158
158
|
mov r14, qword ptr [rdi+0x50]
|
159
159
|
mov r15, qword ptr [rdi+0x58]
|
160
160
|
vmovdqu32 ymm24, ymmword ptr [r8+rdx-0x1*0x20]
|
161
|
-
|
161
|
+
vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01
|
162
162
|
vmovdqu32 ymm25, ymmword ptr [r9+rdx-0x1*0x20]
|
163
|
-
|
163
|
+
vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01
|
164
164
|
vpunpcklqdq zmm8, zmm24, zmm25
|
165
165
|
vpunpckhqdq zmm9, zmm24, zmm25
|
166
166
|
vmovdqu32 ymm24, ymmword ptr [r10+rdx-0x1*0x20]
|
167
|
-
|
167
|
+
vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01
|
168
168
|
vmovdqu32 ymm25, ymmword ptr [r11+rdx-0x1*0x20]
|
169
|
-
|
169
|
+
vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01
|
170
170
|
vpunpcklqdq zmm10, zmm24, zmm25
|
171
171
|
vpunpckhqdq zmm11, zmm24, zmm25
|
172
172
|
prefetcht0 [r8+rdx+0x80]
|
@@ -186,15 +186,15 @@ blake3_hash_many_avx512:
|
|
186
186
|
mov r14, qword ptr [rdi+0x70]
|
187
187
|
mov r15, qword ptr [rdi+0x78]
|
188
188
|
vmovdqu32 ymm24, ymmword ptr [r8+rdx-0x1*0x20]
|
189
|
-
|
189
|
+
vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01
|
190
190
|
vmovdqu32 ymm25, ymmword ptr [r9+rdx-0x1*0x20]
|
191
|
-
|
191
|
+
vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01
|
192
192
|
vpunpcklqdq zmm12, zmm24, zmm25
|
193
193
|
vpunpckhqdq zmm13, zmm24, zmm25
|
194
194
|
vmovdqu32 ymm24, ymmword ptr [r10+rdx-0x1*0x20]
|
195
|
-
|
195
|
+
vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01
|
196
196
|
vmovdqu32 ymm25, ymmword ptr [r11+rdx-0x1*0x20]
|
197
|
-
|
197
|
+
vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01
|
198
198
|
vpunpcklqdq zmm14, zmm24, zmm25
|
199
199
|
vpunpckhqdq zmm15, zmm24, zmm25
|
200
200
|
prefetcht0 [r8+rdx+0x80]
|
@@ -2065,7 +2065,7 @@ blake3_hash_many_avx512:
|
|
2065
2065
|
vpermq ymm14, ymm14, 0xDC
|
2066
2066
|
vpermq ymm15, ymm15, 0xDC
|
2067
2067
|
vpbroadcastd zmm12, dword ptr [BLAKE3_BLOCK_LEN+rip]
|
2068
|
-
|
2068
|
+
vinserti64x4 zmm13, zmm14, ymm15, 0x01
|
2069
2069
|
mov eax, 17476
|
2070
2070
|
kmovw k2, eax
|
2071
2071
|
vpblendmd zmm13 {k2}, zmm13, zmm12
|
@@ -99,15 +99,15 @@ innerloop16:
|
|
99
99
|
mov r14, qword ptr [rdi+50H]
|
100
100
|
mov r15, qword ptr [rdi+58H]
|
101
101
|
vmovdqu32 ymm16, ymmword ptr [rdx+r8-2H*20H]
|
102
|
-
|
102
|
+
vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-2H*20H], 01H
|
103
103
|
vmovdqu32 ymm17, ymmword ptr [rdx+r9-2H*20H]
|
104
|
-
|
104
|
+
vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-2H*20H], 01H
|
105
105
|
vpunpcklqdq zmm8, zmm16, zmm17
|
106
106
|
vpunpckhqdq zmm9, zmm16, zmm17
|
107
107
|
vmovdqu32 ymm18, ymmword ptr [rdx+r10-2H*20H]
|
108
|
-
|
108
|
+
vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-2H*20H], 01H
|
109
109
|
vmovdqu32 ymm19, ymmword ptr [rdx+r11-2H*20H]
|
110
|
-
|
110
|
+
vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-2H*20H], 01H
|
111
111
|
vpunpcklqdq zmm10, zmm18, zmm19
|
112
112
|
vpunpckhqdq zmm11, zmm18, zmm19
|
113
113
|
mov r8, qword ptr [rdi+20H]
|
@@ -119,15 +119,15 @@ innerloop16:
|
|
119
119
|
mov r14, qword ptr [rdi+70H]
|
120
120
|
mov r15, qword ptr [rdi+78H]
|
121
121
|
vmovdqu32 ymm16, ymmword ptr [rdx+r8-2H*20H]
|
122
|
-
|
122
|
+
vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-2H*20H], 01H
|
123
123
|
vmovdqu32 ymm17, ymmword ptr [rdx+r9-2H*20H]
|
124
|
-
|
124
|
+
vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-2H*20H], 01H
|
125
125
|
vpunpcklqdq zmm12, zmm16, zmm17
|
126
126
|
vpunpckhqdq zmm13, zmm16, zmm17
|
127
127
|
vmovdqu32 ymm18, ymmword ptr [rdx+r10-2H*20H]
|
128
|
-
|
128
|
+
vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-2H*20H], 01H
|
129
129
|
vmovdqu32 ymm19, ymmword ptr [rdx+r11-2H*20H]
|
130
|
-
|
130
|
+
vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-2H*20H], 01H
|
131
131
|
vpunpcklqdq zmm14, zmm18, zmm19
|
132
132
|
vpunpckhqdq zmm15, zmm18, zmm19
|
133
133
|
vmovdqa32 zmm27, zmmword ptr [INDEX0]
|
@@ -161,15 +161,15 @@ innerloop16:
|
|
161
161
|
mov r14, qword ptr [rdi+50H]
|
162
162
|
mov r15, qword ptr [rdi+58H]
|
163
163
|
vmovdqu32 ymm24, ymmword ptr [r8+rdx-1H*20H]
|
164
|
-
|
164
|
+
vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-1H*20H], 01H
|
165
165
|
vmovdqu32 ymm25, ymmword ptr [r9+rdx-1H*20H]
|
166
|
-
|
166
|
+
vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-1H*20H], 01H
|
167
167
|
vpunpcklqdq zmm8, zmm24, zmm25
|
168
168
|
vpunpckhqdq zmm9, zmm24, zmm25
|
169
169
|
vmovdqu32 ymm24, ymmword ptr [r10+rdx-1H*20H]
|
170
|
-
|
170
|
+
vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-1H*20H], 01H
|
171
171
|
vmovdqu32 ymm25, ymmword ptr [r11+rdx-1H*20H]
|
172
|
-
|
172
|
+
vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-1H*20H], 01H
|
173
173
|
vpunpcklqdq zmm10, zmm24, zmm25
|
174
174
|
vpunpckhqdq zmm11, zmm24, zmm25
|
175
175
|
prefetcht0 byte ptr [r8+rdx+80H]
|
@@ -189,15 +189,15 @@ innerloop16:
|
|
189
189
|
mov r14, qword ptr [rdi+70H]
|
190
190
|
mov r15, qword ptr [rdi+78H]
|
191
191
|
vmovdqu32 ymm24, ymmword ptr [r8+rdx-1H*20H]
|
192
|
-
|
192
|
+
vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-1H*20H], 01H
|
193
193
|
vmovdqu32 ymm25, ymmword ptr [r9+rdx-1H*20H]
|
194
|
-
|
194
|
+
vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-1H*20H], 01H
|
195
195
|
vpunpcklqdq zmm12, zmm24, zmm25
|
196
196
|
vpunpckhqdq zmm13, zmm24, zmm25
|
197
197
|
vmovdqu32 ymm24, ymmword ptr [r10+rdx-1H*20H]
|
198
|
-
|
198
|
+
vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-1H*20H], 01H
|
199
199
|
vmovdqu32 ymm25, ymmword ptr [r11+rdx-1H*20H]
|
200
|
-
|
200
|
+
vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-1H*20H], 01H
|
201
201
|
vpunpcklqdq zmm14, zmm24, zmm25
|
202
202
|
vpunpckhqdq zmm15, zmm24, zmm25
|
203
203
|
prefetcht0 byte ptr [r8+rdx+80H]
|
@@ -2073,7 +2073,7 @@ final7blocks:
|
|
2073
2073
|
vpermq ymm14, ymm14, 0DCH
|
2074
2074
|
vpermq ymm15, ymm15, 0DCH
|
2075
2075
|
vpbroadcastd zmm12, dword ptr [BLAKE3_BLOCK_LEN]
|
2076
|
-
|
2076
|
+
vinserti64x4 zmm13, zmm14, ymm15, 01H
|
2077
2077
|
mov eax, 17476
|
2078
2078
|
kmovw k2, eax
|
2079
2079
|
vpblendmd zmm13 {k2}, zmm13, zmm12
|
@@ -14,73 +14,6 @@
|
|
14
14
|
#endif
|
15
15
|
#endif
|
16
16
|
|
17
|
-
// Declarations for implementation-specific functions.
|
18
|
-
void blake3_compress_in_place_portable(uint32_t cv[8],
|
19
|
-
const uint8_t block[BLAKE3_BLOCK_LEN],
|
20
|
-
uint8_t block_len, uint64_t counter,
|
21
|
-
uint8_t flags);
|
22
|
-
|
23
|
-
void blake3_compress_xof_portable(const uint32_t cv[8],
|
24
|
-
const uint8_t block[BLAKE3_BLOCK_LEN],
|
25
|
-
uint8_t block_len, uint64_t counter,
|
26
|
-
uint8_t flags, uint8_t out[64]);
|
27
|
-
|
28
|
-
void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs,
|
29
|
-
size_t blocks, const uint32_t key[8],
|
30
|
-
uint64_t counter, bool increment_counter,
|
31
|
-
uint8_t flags, uint8_t flags_start,
|
32
|
-
uint8_t flags_end, uint8_t *out);
|
33
|
-
|
34
|
-
#if defined(IS_X86)
|
35
|
-
#if !defined(BLAKE3_NO_SSE41)
|
36
|
-
void blake3_compress_in_place_sse41(uint32_t cv[8],
|
37
|
-
const uint8_t block[BLAKE3_BLOCK_LEN],
|
38
|
-
uint8_t block_len, uint64_t counter,
|
39
|
-
uint8_t flags);
|
40
|
-
void blake3_compress_xof_sse41(const uint32_t cv[8],
|
41
|
-
const uint8_t block[BLAKE3_BLOCK_LEN],
|
42
|
-
uint8_t block_len, uint64_t counter,
|
43
|
-
uint8_t flags, uint8_t out[64]);
|
44
|
-
void blake3_hash_many_sse41(const uint8_t *const *inputs, size_t num_inputs,
|
45
|
-
size_t blocks, const uint32_t key[8],
|
46
|
-
uint64_t counter, bool increment_counter,
|
47
|
-
uint8_t flags, uint8_t flags_start,
|
48
|
-
uint8_t flags_end, uint8_t *out);
|
49
|
-
#endif
|
50
|
-
#if !defined(BLAKE3_NO_AVX2)
|
51
|
-
void blake3_hash_many_avx2(const uint8_t *const *inputs, size_t num_inputs,
|
52
|
-
size_t blocks, const uint32_t key[8],
|
53
|
-
uint64_t counter, bool increment_counter,
|
54
|
-
uint8_t flags, uint8_t flags_start,
|
55
|
-
uint8_t flags_end, uint8_t *out);
|
56
|
-
#endif
|
57
|
-
#if !defined(BLAKE3_NO_AVX512)
|
58
|
-
void blake3_compress_in_place_avx512(uint32_t cv[8],
|
59
|
-
const uint8_t block[BLAKE3_BLOCK_LEN],
|
60
|
-
uint8_t block_len, uint64_t counter,
|
61
|
-
uint8_t flags);
|
62
|
-
|
63
|
-
void blake3_compress_xof_avx512(const uint32_t cv[8],
|
64
|
-
const uint8_t block[BLAKE3_BLOCK_LEN],
|
65
|
-
uint8_t block_len, uint64_t counter,
|
66
|
-
uint8_t flags, uint8_t out[64]);
|
67
|
-
|
68
|
-
void blake3_hash_many_avx512(const uint8_t *const *inputs, size_t num_inputs,
|
69
|
-
size_t blocks, const uint32_t key[8],
|
70
|
-
uint64_t counter, bool increment_counter,
|
71
|
-
uint8_t flags, uint8_t flags_start,
|
72
|
-
uint8_t flags_end, uint8_t *out);
|
73
|
-
#endif
|
74
|
-
#endif
|
75
|
-
|
76
|
-
#if defined(BLAKE3_USE_NEON)
|
77
|
-
void blake3_hash_many_neon(const uint8_t *const *inputs, size_t num_inputs,
|
78
|
-
size_t blocks, const uint32_t key[8],
|
79
|
-
uint64_t counter, bool increment_counter,
|
80
|
-
uint8_t flags, uint8_t flags_start,
|
81
|
-
uint8_t flags_end, uint8_t *out);
|
82
|
-
#endif
|
83
|
-
|
84
17
|
#if defined(IS_X86)
|
85
18
|
static uint64_t xgetbv() {
|
86
19
|
#if defined(_MSC_VER)
|
@@ -249,7 +182,7 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
|
|
249
182
|
#if defined(IS_X86)
|
250
183
|
const enum cpu_feature features = get_cpu_features();
|
251
184
|
#if !defined(BLAKE3_NO_AVX512)
|
252
|
-
if (features & AVX512F) {
|
185
|
+
if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) {
|
253
186
|
blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter,
|
254
187
|
increment_counter, flags, flags_start, flags_end,
|
255
188
|
out);
|
@@ -286,11 +219,11 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
|
|
286
219
|
}
|
287
220
|
|
288
221
|
// The dynamically detected SIMD degree of the current platform.
|
289
|
-
size_t blake3_simd_degree() {
|
222
|
+
size_t blake3_simd_degree(void) {
|
290
223
|
#if defined(IS_X86)
|
291
224
|
const enum cpu_feature features = get_cpu_features();
|
292
225
|
#if !defined(BLAKE3_NO_AVX512)
|
293
|
-
if (features & AVX512F) {
|
226
|
+
if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) {
|
294
227
|
return 16;
|
295
228
|
}
|
296
229
|
#endif
|
@@ -161,7 +161,75 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
|
|
161
161
|
bool increment_counter, uint8_t flags,
|
162
162
|
uint8_t flags_start, uint8_t flags_end, uint8_t *out);
|
163
163
|
|
164
|
-
size_t blake3_simd_degree();
|
164
|
+
size_t blake3_simd_degree(void);
|
165
|
+
|
166
|
+
|
167
|
+
// Declarations for implementation-specific functions.
|
168
|
+
void blake3_compress_in_place_portable(uint32_t cv[8],
|
169
|
+
const uint8_t block[BLAKE3_BLOCK_LEN],
|
170
|
+
uint8_t block_len, uint64_t counter,
|
171
|
+
uint8_t flags);
|
172
|
+
|
173
|
+
void blake3_compress_xof_portable(const uint32_t cv[8],
|
174
|
+
const uint8_t block[BLAKE3_BLOCK_LEN],
|
175
|
+
uint8_t block_len, uint64_t counter,
|
176
|
+
uint8_t flags, uint8_t out[64]);
|
177
|
+
|
178
|
+
void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs,
|
179
|
+
size_t blocks, const uint32_t key[8],
|
180
|
+
uint64_t counter, bool increment_counter,
|
181
|
+
uint8_t flags, uint8_t flags_start,
|
182
|
+
uint8_t flags_end, uint8_t *out);
|
183
|
+
|
184
|
+
#if defined(IS_X86)
|
185
|
+
#if !defined(BLAKE3_NO_SSE41)
|
186
|
+
void blake3_compress_in_place_sse41(uint32_t cv[8],
|
187
|
+
const uint8_t block[BLAKE3_BLOCK_LEN],
|
188
|
+
uint8_t block_len, uint64_t counter,
|
189
|
+
uint8_t flags);
|
190
|
+
void blake3_compress_xof_sse41(const uint32_t cv[8],
|
191
|
+
const uint8_t block[BLAKE3_BLOCK_LEN],
|
192
|
+
uint8_t block_len, uint64_t counter,
|
193
|
+
uint8_t flags, uint8_t out[64]);
|
194
|
+
void blake3_hash_many_sse41(const uint8_t *const *inputs, size_t num_inputs,
|
195
|
+
size_t blocks, const uint32_t key[8],
|
196
|
+
uint64_t counter, bool increment_counter,
|
197
|
+
uint8_t flags, uint8_t flags_start,
|
198
|
+
uint8_t flags_end, uint8_t *out);
|
199
|
+
#endif
|
200
|
+
#if !defined(BLAKE3_NO_AVX2)
|
201
|
+
void blake3_hash_many_avx2(const uint8_t *const *inputs, size_t num_inputs,
|
202
|
+
size_t blocks, const uint32_t key[8],
|
203
|
+
uint64_t counter, bool increment_counter,
|
204
|
+
uint8_t flags, uint8_t flags_start,
|
205
|
+
uint8_t flags_end, uint8_t *out);
|
206
|
+
#endif
|
207
|
+
#if !defined(BLAKE3_NO_AVX512)
|
208
|
+
void blake3_compress_in_place_avx512(uint32_t cv[8],
|
209
|
+
const uint8_t block[BLAKE3_BLOCK_LEN],
|
210
|
+
uint8_t block_len, uint64_t counter,
|
211
|
+
uint8_t flags);
|
212
|
+
|
213
|
+
void blake3_compress_xof_avx512(const uint32_t cv[8],
|
214
|
+
const uint8_t block[BLAKE3_BLOCK_LEN],
|
215
|
+
uint8_t block_len, uint64_t counter,
|
216
|
+
uint8_t flags, uint8_t out[64]);
|
217
|
+
|
218
|
+
void blake3_hash_many_avx512(const uint8_t *const *inputs, size_t num_inputs,
|
219
|
+
size_t blocks, const uint32_t key[8],
|
220
|
+
uint64_t counter, bool increment_counter,
|
221
|
+
uint8_t flags, uint8_t flags_start,
|
222
|
+
uint8_t flags_end, uint8_t *out);
|
223
|
+
#endif
|
224
|
+
#endif
|
225
|
+
|
226
|
+
#if defined(BLAKE3_USE_NEON)
|
227
|
+
void blake3_hash_many_neon(const uint8_t *const *inputs, size_t num_inputs,
|
228
|
+
size_t blocks, const uint32_t key[8],
|
229
|
+
uint64_t counter, bool increment_counter,
|
230
|
+
uint8_t flags, uint8_t flags_start,
|
231
|
+
uint8_t flags_end, uint8_t *out);
|
232
|
+
#endif
|
165
233
|
|
166
234
|
|
167
235
|
#endif /* BLAKE3_IMPL_H */
|
@@ -1800,15 +1800,18 @@ blake3_hash_many_sse41:
|
|
1800
1800
|
.p2align 6
|
1801
1801
|
blake3_compress_in_place_sse41:
|
1802
1802
|
_blake3_compress_in_place_sse41:
|
1803
|
-
sub rsp,
|
1803
|
+
sub rsp, 120
|
1804
1804
|
movdqa xmmword ptr [rsp], xmm6
|
1805
1805
|
movdqa xmmword ptr [rsp+0x10], xmm7
|
1806
1806
|
movdqa xmmword ptr [rsp+0x20], xmm8
|
1807
1807
|
movdqa xmmword ptr [rsp+0x30], xmm9
|
1808
|
+
movdqa xmmword ptr [rsp+0x40], xmm11
|
1809
|
+
movdqa xmmword ptr [rsp+0x50], xmm14
|
1810
|
+
movdqa xmmword ptr [rsp+0x60], xmm15
|
1808
1811
|
movups xmm0, xmmword ptr [rcx]
|
1809
1812
|
movups xmm1, xmmword ptr [rcx+0x10]
|
1810
1813
|
movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
|
1811
|
-
movzx eax, byte ptr [rsp+
|
1814
|
+
movzx eax, byte ptr [rsp+0xA0]
|
1812
1815
|
movzx r8d, r8b
|
1813
1816
|
shl rax, 32
|
1814
1817
|
add r8, rax
|
@@ -1906,24 +1909,30 @@ _blake3_compress_in_place_sse41:
|
|
1906
1909
|
movdqa xmm7, xmmword ptr [rsp+0x10]
|
1907
1910
|
movdqa xmm8, xmmword ptr [rsp+0x20]
|
1908
1911
|
movdqa xmm9, xmmword ptr [rsp+0x30]
|
1909
|
-
|
1912
|
+
movdqa xmm11, xmmword ptr [rsp+0x40]
|
1913
|
+
movdqa xmm14, xmmword ptr [rsp+0x50]
|
1914
|
+
movdqa xmm15, xmmword ptr [rsp+0x60]
|
1915
|
+
add rsp, 120
|
1910
1916
|
ret
|
1911
1917
|
|
1912
1918
|
|
1913
1919
|
.p2align 6
|
1914
1920
|
_blake3_compress_xof_sse41:
|
1915
1921
|
blake3_compress_xof_sse41:
|
1916
|
-
sub rsp,
|
1922
|
+
sub rsp, 120
|
1917
1923
|
movdqa xmmword ptr [rsp], xmm6
|
1918
1924
|
movdqa xmmword ptr [rsp+0x10], xmm7
|
1919
1925
|
movdqa xmmword ptr [rsp+0x20], xmm8
|
1920
1926
|
movdqa xmmword ptr [rsp+0x30], xmm9
|
1927
|
+
movdqa xmmword ptr [rsp+0x40], xmm11
|
1928
|
+
movdqa xmmword ptr [rsp+0x50], xmm14
|
1929
|
+
movdqa xmmword ptr [rsp+0x60], xmm15
|
1921
1930
|
movups xmm0, xmmword ptr [rcx]
|
1922
1931
|
movups xmm1, xmmword ptr [rcx+0x10]
|
1923
1932
|
movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
|
1924
|
-
movzx eax, byte ptr [rsp+
|
1933
|
+
movzx eax, byte ptr [rsp+0xA0]
|
1925
1934
|
movzx r8d, r8b
|
1926
|
-
mov r10, qword ptr [rsp+
|
1935
|
+
mov r10, qword ptr [rsp+0xA8]
|
1927
1936
|
shl rax, 32
|
1928
1937
|
add r8, rax
|
1929
1938
|
movq xmm3, r9
|
@@ -2026,7 +2035,10 @@ blake3_compress_xof_sse41:
|
|
2026
2035
|
movdqa xmm7, xmmword ptr [rsp+0x10]
|
2027
2036
|
movdqa xmm8, xmmword ptr [rsp+0x20]
|
2028
2037
|
movdqa xmm9, xmmword ptr [rsp+0x30]
|
2029
|
-
|
2038
|
+
movdqa xmm11, xmmword ptr [rsp+0x40]
|
2039
|
+
movdqa xmm14, xmmword ptr [rsp+0x50]
|
2040
|
+
movdqa xmm15, xmmword ptr [rsp+0x60]
|
2041
|
+
add rsp, 120
|
2030
2042
|
ret
|
2031
2043
|
|
2032
2044
|
|
@@ -1802,15 +1802,18 @@ blake3_hash_many_sse41 ENDP
|
|
1802
1802
|
|
1803
1803
|
blake3_compress_in_place_sse41 PROC
|
1804
1804
|
_blake3_compress_in_place_sse41 PROC
|
1805
|
-
sub rsp,
|
1805
|
+
sub rsp, 120
|
1806
1806
|
movdqa xmmword ptr [rsp], xmm6
|
1807
1807
|
movdqa xmmword ptr [rsp+10H], xmm7
|
1808
1808
|
movdqa xmmword ptr [rsp+20H], xmm8
|
1809
1809
|
movdqa xmmword ptr [rsp+30H], xmm9
|
1810
|
+
movdqa xmmword ptr [rsp+40H], xmm11
|
1811
|
+
movdqa xmmword ptr [rsp+50H], xmm14
|
1812
|
+
movdqa xmmword ptr [rsp+60H], xmm15
|
1810
1813
|
movups xmm0, xmmword ptr [rcx]
|
1811
1814
|
movups xmm1, xmmword ptr [rcx+10H]
|
1812
1815
|
movaps xmm2, xmmword ptr [BLAKE3_IV]
|
1813
|
-
movzx eax, byte ptr [rsp+
|
1816
|
+
movzx eax, byte ptr [rsp+0A0H]
|
1814
1817
|
movzx r8d, r8b
|
1815
1818
|
shl rax, 32
|
1816
1819
|
add r8, rax
|
@@ -1908,7 +1911,10 @@ _blake3_compress_in_place_sse41 PROC
|
|
1908
1911
|
movdqa xmm7, xmmword ptr [rsp+10H]
|
1909
1912
|
movdqa xmm8, xmmword ptr [rsp+20H]
|
1910
1913
|
movdqa xmm9, xmmword ptr [rsp+30H]
|
1911
|
-
|
1914
|
+
movdqa xmm11, xmmword ptr [rsp+40H]
|
1915
|
+
movdqa xmm14, xmmword ptr [rsp+50H]
|
1916
|
+
movdqa xmm15, xmmword ptr [rsp+60H]
|
1917
|
+
add rsp, 120
|
1912
1918
|
ret
|
1913
1919
|
_blake3_compress_in_place_sse41 ENDP
|
1914
1920
|
blake3_compress_in_place_sse41 ENDP
|
@@ -1916,17 +1922,20 @@ blake3_compress_in_place_sse41 ENDP
|
|
1916
1922
|
ALIGN 16
|
1917
1923
|
blake3_compress_xof_sse41 PROC
|
1918
1924
|
_blake3_compress_xof_sse41 PROC
|
1919
|
-
sub rsp,
|
1925
|
+
sub rsp, 120
|
1920
1926
|
movdqa xmmword ptr [rsp], xmm6
|
1921
1927
|
movdqa xmmword ptr [rsp+10H], xmm7
|
1922
1928
|
movdqa xmmword ptr [rsp+20H], xmm8
|
1923
1929
|
movdqa xmmword ptr [rsp+30H], xmm9
|
1930
|
+
movdqa xmmword ptr [rsp+40H], xmm11
|
1931
|
+
movdqa xmmword ptr [rsp+50H], xmm14
|
1932
|
+
movdqa xmmword ptr [rsp+60H], xmm15
|
1924
1933
|
movups xmm0, xmmword ptr [rcx]
|
1925
1934
|
movups xmm1, xmmword ptr [rcx+10H]
|
1926
1935
|
movaps xmm2, xmmword ptr [BLAKE3_IV]
|
1927
|
-
movzx eax, byte ptr [rsp+
|
1936
|
+
movzx eax, byte ptr [rsp+0A0H]
|
1928
1937
|
movzx r8d, r8b
|
1929
|
-
mov r10, qword ptr [rsp+
|
1938
|
+
mov r10, qword ptr [rsp+0A8H]
|
1930
1939
|
shl rax, 32
|
1931
1940
|
add r8, rax
|
1932
1941
|
movq xmm3, r9
|
@@ -2029,7 +2038,10 @@ _blake3_compress_xof_sse41 PROC
|
|
2029
2038
|
movdqa xmm7, xmmword ptr [rsp+10H]
|
2030
2039
|
movdqa xmm8, xmmword ptr [rsp+20H]
|
2031
2040
|
movdqa xmm9, xmmword ptr [rsp+30H]
|
2032
|
-
|
2041
|
+
movdqa xmm11, xmmword ptr [rsp+40H]
|
2042
|
+
movdqa xmm14, xmmword ptr [rsp+50H]
|
2043
|
+
movdqa xmm15, xmmword ptr [rsp+60H]
|
2044
|
+
add rsp, 120
|
2033
2045
|
ret
|
2034
2046
|
_blake3_compress_xof_sse41 ENDP
|
2035
2047
|
blake3_compress_xof_sse41 ENDP
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: digest-blake3
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.34.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Will Bryant
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-06-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -112,8 +112,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
112
112
|
- !ruby/object:Gem::Version
|
113
113
|
version: '0'
|
114
114
|
requirements: []
|
115
|
-
|
116
|
-
rubygems_version: 2.7.6
|
115
|
+
rubygems_version: 3.0.3
|
117
116
|
signing_key:
|
118
117
|
specification_version: 4
|
119
118
|
summary: BLAKE3 for Ruby
|