digest-blake3 0.22.1 → 0.34.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cc9e904530fd556aa81345371cdcc84ba0f96af0f34ad88e7d5fca03ea334413
4
- data.tar.gz: e7eb9d2902ea6b314598a476e36ee3d2ec1253a063283cce93b6bc9dbc33ad15
3
+ metadata.gz: a3db2fab1165a083a1a83d5c656c1c737d53f853de91babcb6c9c0e74ec7e23a
4
+ data.tar.gz: d4692ef2c6326a70ffa0cad5ed90219daa96c0940c0e9986d9ee7b4469d6b48d
5
5
  SHA512:
6
- metadata.gz: 0d1a215201ad7aae6cebca040db27f4336861582be4870d6d57adcf9b345fa3c44fd5dd7443b0417fc9fe407bf52d98d03eb7065d92f699091f6adfb10fa67c2
7
- data.tar.gz: 262293d252c00c9aaa00bcf7cbeb066a3be416a4b2314473740627906fbbec466a2b1cf199d1fdfa0aecfa1568bf40fe661ca2c22162243c2cacd49f43aed9dc
6
+ metadata.gz: 7ef86ba9e54408a68179d43678d7863d1af3d51e6002315d4607e377f2a142d374f4dc0e4d5f8ddde641063d3b5e2f93214fb10274aba849eee757d5f884d854
7
+ data.tar.gz: e8bf900ad7eece0df62964ca7695af5c8681cbe23c3b7e6cc2af4b0ac2c1d6b3f74de987d7998f4627a5bfcb164c2bafa7ffda8bf8f96be5075ca44761aa2c23
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- digest-blake3 (0.22.1)
4
+ digest-blake3 (0.34.0)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -84,23 +84,26 @@ INLINE void output_chaining_value(const output_t *self, uint8_t cv[32]) {
84
84
  memcpy(cv, cv_words, 32);
85
85
  }
86
86
 
87
- INLINE void output_root_bytes(const output_t *self, uint8_t *out,
87
+ INLINE void output_root_bytes(const output_t *self, uint64_t seek, uint8_t *out,
88
88
  size_t out_len) {
89
- uint64_t output_block_counter = 0;
89
+ uint64_t output_block_counter = seek / 64;
90
+ size_t offset_within_block = seek % 64;
90
91
  uint8_t wide_buf[64];
91
92
  while (out_len > 0) {
92
93
  blake3_compress_xof(self->input_cv, self->block, self->block_len,
93
94
  output_block_counter, self->flags | ROOT, wide_buf);
95
+ size_t available_bytes = 64 - offset_within_block;
94
96
  size_t memcpy_len;
95
- if (out_len > 64) {
96
- memcpy_len = 64;
97
+ if (out_len > available_bytes) {
98
+ memcpy_len = available_bytes;
97
99
  } else {
98
100
  memcpy_len = out_len;
99
101
  }
100
- memcpy(out, wide_buf, memcpy_len);
102
+ memcpy(out, wide_buf + offset_within_block, memcpy_len);
101
103
  out += memcpy_len;
102
104
  out_len -= memcpy_len;
103
105
  output_block_counter += 1;
106
+ offset_within_block = 0;
104
107
  }
105
108
  }
106
109
 
@@ -256,10 +259,11 @@ INLINE size_t compress_parents_parallel(const uint8_t *child_chaining_values,
256
259
  // Why not just have the caller split the input on the first update(), instead
257
260
  // of implementing this special rule? Because we don't want to limit SIMD or
258
261
  // multi-threading parallelism for that update().
259
- size_t blake3_compress_subtree_wide(const uint8_t *input, size_t input_len,
260
- const uint32_t key[8],
261
- uint64_t chunk_counter, uint8_t flags,
262
- uint8_t *out) {
262
+ static size_t blake3_compress_subtree_wide(const uint8_t *input,
263
+ size_t input_len,
264
+ const uint32_t key[8],
265
+ uint64_t chunk_counter,
266
+ uint8_t flags, uint8_t *out) {
263
267
  // Note that the single chunk case does *not* bump the SIMD degree up to 2
264
268
  // when it is 1. If this implementation adds multi-threading in the future,
265
269
  // this gives us the option of multi-threading even the 2-chunk case, which
@@ -425,8 +429,8 @@ INLINE void hasher_merge_cv_stack(blake3_hasher *self, uint64_t total_len) {
425
429
  // compress_subtree_to_parent_node(). That function always returns the top
426
430
  // *two* chaining values of the subtree it's compressing. We then do lazy
427
431
  // merging with each of them separately, so that the second CV will always
428
- // remain unmerged. (The compress_subtree_to_parent_node also helps us support
429
- // extendable output when we're hashing an input all-at-once.)
432
+ // remain unmerged. (That also helps us support extendable output when we're
433
+ // hashing an input all-at-once.)
430
434
  INLINE void hasher_push_cv(blake3_hasher *self, uint8_t new_cv[BLAKE3_OUT_LEN],
431
435
  uint64_t chunk_counter) {
432
436
  hasher_merge_cv_stack(self, chunk_counter);
@@ -472,8 +476,8 @@ void blake3_hasher_update(blake3_hasher *self, const void *input,
472
476
 
473
477
  // Now the chunk_state is clear, and we have more input. If there's more than
474
478
  // a single chunk (so, definitely not the root chunk), hash the largest whole
475
- // subtree we can, with the full benefits of SIMD and multi-threading
476
- // parallelism. Two restrictions:
479
+ // subtree we can, with the full benefits of SIMD (and maybe in the future,
480
+ // multi-threading) parallelism. Two restrictions:
477
481
  // - The subtree has to be a power-of-2 number of chunks. Only subtrees along
478
482
  // the right edge can be incomplete, and we don't know where the right edge
479
483
  // is going to be until we get to finalize().
@@ -546,6 +550,11 @@ void blake3_hasher_update(blake3_hasher *self, const void *input,
546
550
 
547
551
  void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
548
552
  size_t out_len) {
553
+ blake3_hasher_finalize_seek(self, 0, out, out_len);
554
+ }
555
+
556
+ void blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek,
557
+ uint8_t *out, size_t out_len) {
549
558
  // Explicitly checking for zero avoids causing UB by passing a null pointer
550
559
  // to memcpy. This comes up in practice with things like:
551
560
  // std::vector<uint8_t> v;
@@ -557,7 +566,7 @@ void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
557
566
  // If the subtree stack is empty, then the current chunk is the root.
558
567
  if (self->cv_stack_len == 0) {
559
568
  output_t output = chunk_state_output(&self->chunk);
560
- output_root_bytes(&output, out, out_len);
569
+ output_root_bytes(&output, seek, out, out_len);
561
570
  return;
562
571
  }
563
572
  // If there are any bytes in the chunk state, finalize that chunk and do a
@@ -585,5 +594,5 @@ void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
585
594
  output_chaining_value(&output, &parent_block[32]);
586
595
  output = parent_output(parent_block, self->key, self->chunk.flags);
587
596
  }
588
- output_root_bytes(&output, out, out_len);
597
+ output_root_bytes(&output, seek, out, out_len);
589
598
  }
@@ -4,7 +4,7 @@
4
4
  #include <stddef.h>
5
5
  #include <stdint.h>
6
6
 
7
- #ifdef __cplusplus
7
+ #ifdef __cplusplus
8
8
  extern "C" {
9
9
  #endif
10
10
 
@@ -46,8 +46,10 @@ void blake3_hasher_update(blake3_hasher *self, const void *input,
46
46
  size_t input_len);
47
47
  void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
48
48
  size_t out_len);
49
+ void blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek,
50
+ uint8_t *out, size_t out_len);
49
51
 
50
- #ifdef __cplusplus
52
+ #ifdef __cplusplus
51
53
  }
52
54
  #endif
53
55
 
@@ -82,15 +82,15 @@ blake3_hash_many_avx512:
82
82
  mov r14, qword ptr [rdi+0x50]
83
83
  mov r15, qword ptr [rdi+0x58]
84
84
  vmovdqu32 ymm16, ymmword ptr [rdx+r8-0x2*0x20]
85
- vinserti32x8 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01
85
+ vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01
86
86
  vmovdqu32 ymm17, ymmword ptr [rdx+r9-0x2*0x20]
87
- vinserti32x8 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01
87
+ vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01
88
88
  vpunpcklqdq zmm8, zmm16, zmm17
89
89
  vpunpckhqdq zmm9, zmm16, zmm17
90
90
  vmovdqu32 ymm18, ymmword ptr [rdx+r10-0x2*0x20]
91
- vinserti32x8 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01
91
+ vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01
92
92
  vmovdqu32 ymm19, ymmword ptr [rdx+r11-0x2*0x20]
93
- vinserti32x8 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01
93
+ vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01
94
94
  vpunpcklqdq zmm10, zmm18, zmm19
95
95
  vpunpckhqdq zmm11, zmm18, zmm19
96
96
  mov r8, qword ptr [rdi+0x20]
@@ -102,15 +102,15 @@ blake3_hash_many_avx512:
102
102
  mov r14, qword ptr [rdi+0x70]
103
103
  mov r15, qword ptr [rdi+0x78]
104
104
  vmovdqu32 ymm16, ymmword ptr [rdx+r8-0x2*0x20]
105
- vinserti32x8 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01
105
+ vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01
106
106
  vmovdqu32 ymm17, ymmword ptr [rdx+r9-0x2*0x20]
107
- vinserti32x8 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01
107
+ vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01
108
108
  vpunpcklqdq zmm12, zmm16, zmm17
109
109
  vpunpckhqdq zmm13, zmm16, zmm17
110
110
  vmovdqu32 ymm18, ymmword ptr [rdx+r10-0x2*0x20]
111
- vinserti32x8 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01
111
+ vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01
112
112
  vmovdqu32 ymm19, ymmword ptr [rdx+r11-0x2*0x20]
113
- vinserti32x8 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01
113
+ vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01
114
114
  vpunpcklqdq zmm14, zmm18, zmm19
115
115
  vpunpckhqdq zmm15, zmm18, zmm19
116
116
  vmovdqa32 zmm27, zmmword ptr [INDEX0+rip]
@@ -144,15 +144,15 @@ blake3_hash_many_avx512:
144
144
  mov r14, qword ptr [rdi+0x50]
145
145
  mov r15, qword ptr [rdi+0x58]
146
146
  vmovdqu32 ymm24, ymmword ptr [r8+rdx-0x1*0x20]
147
- vinserti32x8 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01
147
+ vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01
148
148
  vmovdqu32 ymm25, ymmword ptr [r9+rdx-0x1*0x20]
149
- vinserti32x8 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01
149
+ vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01
150
150
  vpunpcklqdq zmm8, zmm24, zmm25
151
151
  vpunpckhqdq zmm9, zmm24, zmm25
152
152
  vmovdqu32 ymm24, ymmword ptr [r10+rdx-0x1*0x20]
153
- vinserti32x8 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01
153
+ vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01
154
154
  vmovdqu32 ymm25, ymmword ptr [r11+rdx-0x1*0x20]
155
- vinserti32x8 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01
155
+ vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01
156
156
  vpunpcklqdq zmm10, zmm24, zmm25
157
157
  vpunpckhqdq zmm11, zmm24, zmm25
158
158
  prefetcht0 [r8+rdx+0x80]
@@ -172,15 +172,15 @@ blake3_hash_many_avx512:
172
172
  mov r14, qword ptr [rdi+0x70]
173
173
  mov r15, qword ptr [rdi+0x78]
174
174
  vmovdqu32 ymm24, ymmword ptr [r8+rdx-0x1*0x20]
175
- vinserti32x8 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01
175
+ vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01
176
176
  vmovdqu32 ymm25, ymmword ptr [r9+rdx-0x1*0x20]
177
- vinserti32x8 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01
177
+ vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01
178
178
  vpunpcklqdq zmm12, zmm24, zmm25
179
179
  vpunpckhqdq zmm13, zmm24, zmm25
180
180
  vmovdqu32 ymm24, ymmword ptr [r10+rdx-0x1*0x20]
181
- vinserti32x8 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01
181
+ vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01
182
182
  vmovdqu32 ymm25, ymmword ptr [r11+rdx-0x1*0x20]
183
- vinserti32x8 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01
183
+ vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01
184
184
  vpunpcklqdq zmm14, zmm24, zmm25
185
185
  vpunpckhqdq zmm15, zmm24, zmm25
186
186
  prefetcht0 [r8+rdx+0x80]
@@ -2039,7 +2039,7 @@ blake3_hash_many_avx512:
2039
2039
  vpermq ymm14, ymm14, 0xDC
2040
2040
  vpermq ymm15, ymm15, 0xDC
2041
2041
  vpbroadcastd zmm12, dword ptr [BLAKE3_BLOCK_LEN+rip]
2042
- vinserti32x8 zmm13, zmm14, ymm15, 0x01
2042
+ vinserti64x4 zmm13, zmm14, ymm15, 0x01
2043
2043
  mov eax, 17476
2044
2044
  kmovw k2, eax
2045
2045
  vpblendmd zmm13 {k2}, zmm13, zmm12
@@ -96,15 +96,15 @@ blake3_hash_many_avx512:
96
96
  mov r14, qword ptr [rdi+0x50]
97
97
  mov r15, qword ptr [rdi+0x58]
98
98
  vmovdqu32 ymm16, ymmword ptr [rdx+r8-0x2*0x20]
99
- vinserti32x8 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01
99
+ vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01
100
100
  vmovdqu32 ymm17, ymmword ptr [rdx+r9-0x2*0x20]
101
- vinserti32x8 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01
101
+ vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01
102
102
  vpunpcklqdq zmm8, zmm16, zmm17
103
103
  vpunpckhqdq zmm9, zmm16, zmm17
104
104
  vmovdqu32 ymm18, ymmword ptr [rdx+r10-0x2*0x20]
105
- vinserti32x8 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01
105
+ vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01
106
106
  vmovdqu32 ymm19, ymmword ptr [rdx+r11-0x2*0x20]
107
- vinserti32x8 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01
107
+ vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01
108
108
  vpunpcklqdq zmm10, zmm18, zmm19
109
109
  vpunpckhqdq zmm11, zmm18, zmm19
110
110
  mov r8, qword ptr [rdi+0x20]
@@ -116,15 +116,15 @@ blake3_hash_many_avx512:
116
116
  mov r14, qword ptr [rdi+0x70]
117
117
  mov r15, qword ptr [rdi+0x78]
118
118
  vmovdqu32 ymm16, ymmword ptr [rdx+r8-0x2*0x20]
119
- vinserti32x8 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01
119
+ vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01
120
120
  vmovdqu32 ymm17, ymmword ptr [rdx+r9-0x2*0x20]
121
- vinserti32x8 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01
121
+ vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01
122
122
  vpunpcklqdq zmm12, zmm16, zmm17
123
123
  vpunpckhqdq zmm13, zmm16, zmm17
124
124
  vmovdqu32 ymm18, ymmword ptr [rdx+r10-0x2*0x20]
125
- vinserti32x8 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01
125
+ vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01
126
126
  vmovdqu32 ymm19, ymmword ptr [rdx+r11-0x2*0x20]
127
- vinserti32x8 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01
127
+ vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01
128
128
  vpunpcklqdq zmm14, zmm18, zmm19
129
129
  vpunpckhqdq zmm15, zmm18, zmm19
130
130
  vmovdqa32 zmm27, zmmword ptr [INDEX0+rip]
@@ -158,15 +158,15 @@ blake3_hash_many_avx512:
158
158
  mov r14, qword ptr [rdi+0x50]
159
159
  mov r15, qword ptr [rdi+0x58]
160
160
  vmovdqu32 ymm24, ymmword ptr [r8+rdx-0x1*0x20]
161
- vinserti32x8 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01
161
+ vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01
162
162
  vmovdqu32 ymm25, ymmword ptr [r9+rdx-0x1*0x20]
163
- vinserti32x8 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01
163
+ vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01
164
164
  vpunpcklqdq zmm8, zmm24, zmm25
165
165
  vpunpckhqdq zmm9, zmm24, zmm25
166
166
  vmovdqu32 ymm24, ymmword ptr [r10+rdx-0x1*0x20]
167
- vinserti32x8 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01
167
+ vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01
168
168
  vmovdqu32 ymm25, ymmword ptr [r11+rdx-0x1*0x20]
169
- vinserti32x8 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01
169
+ vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01
170
170
  vpunpcklqdq zmm10, zmm24, zmm25
171
171
  vpunpckhqdq zmm11, zmm24, zmm25
172
172
  prefetcht0 [r8+rdx+0x80]
@@ -186,15 +186,15 @@ blake3_hash_many_avx512:
186
186
  mov r14, qword ptr [rdi+0x70]
187
187
  mov r15, qword ptr [rdi+0x78]
188
188
  vmovdqu32 ymm24, ymmword ptr [r8+rdx-0x1*0x20]
189
- vinserti32x8 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01
189
+ vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01
190
190
  vmovdqu32 ymm25, ymmword ptr [r9+rdx-0x1*0x20]
191
- vinserti32x8 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01
191
+ vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01
192
192
  vpunpcklqdq zmm12, zmm24, zmm25
193
193
  vpunpckhqdq zmm13, zmm24, zmm25
194
194
  vmovdqu32 ymm24, ymmword ptr [r10+rdx-0x1*0x20]
195
- vinserti32x8 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01
195
+ vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01
196
196
  vmovdqu32 ymm25, ymmword ptr [r11+rdx-0x1*0x20]
197
- vinserti32x8 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01
197
+ vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01
198
198
  vpunpcklqdq zmm14, zmm24, zmm25
199
199
  vpunpckhqdq zmm15, zmm24, zmm25
200
200
  prefetcht0 [r8+rdx+0x80]
@@ -2065,7 +2065,7 @@ blake3_hash_many_avx512:
2065
2065
  vpermq ymm14, ymm14, 0xDC
2066
2066
  vpermq ymm15, ymm15, 0xDC
2067
2067
  vpbroadcastd zmm12, dword ptr [BLAKE3_BLOCK_LEN+rip]
2068
- vinserti32x8 zmm13, zmm14, ymm15, 0x01
2068
+ vinserti64x4 zmm13, zmm14, ymm15, 0x01
2069
2069
  mov eax, 17476
2070
2070
  kmovw k2, eax
2071
2071
  vpblendmd zmm13 {k2}, zmm13, zmm12
@@ -99,15 +99,15 @@ innerloop16:
99
99
  mov r14, qword ptr [rdi+50H]
100
100
  mov r15, qword ptr [rdi+58H]
101
101
  vmovdqu32 ymm16, ymmword ptr [rdx+r8-2H*20H]
102
- vinserti32x8 zmm16, zmm16, ymmword ptr [rdx+r12-2H*20H], 01H
102
+ vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-2H*20H], 01H
103
103
  vmovdqu32 ymm17, ymmword ptr [rdx+r9-2H*20H]
104
- vinserti32x8 zmm17, zmm17, ymmword ptr [rdx+r13-2H*20H], 01H
104
+ vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-2H*20H], 01H
105
105
  vpunpcklqdq zmm8, zmm16, zmm17
106
106
  vpunpckhqdq zmm9, zmm16, zmm17
107
107
  vmovdqu32 ymm18, ymmword ptr [rdx+r10-2H*20H]
108
- vinserti32x8 zmm18, zmm18, ymmword ptr [rdx+r14-2H*20H], 01H
108
+ vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-2H*20H], 01H
109
109
  vmovdqu32 ymm19, ymmword ptr [rdx+r11-2H*20H]
110
- vinserti32x8 zmm19, zmm19, ymmword ptr [rdx+r15-2H*20H], 01H
110
+ vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-2H*20H], 01H
111
111
  vpunpcklqdq zmm10, zmm18, zmm19
112
112
  vpunpckhqdq zmm11, zmm18, zmm19
113
113
  mov r8, qword ptr [rdi+20H]
@@ -119,15 +119,15 @@ innerloop16:
119
119
  mov r14, qword ptr [rdi+70H]
120
120
  mov r15, qword ptr [rdi+78H]
121
121
  vmovdqu32 ymm16, ymmword ptr [rdx+r8-2H*20H]
122
- vinserti32x8 zmm16, zmm16, ymmword ptr [rdx+r12-2H*20H], 01H
122
+ vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-2H*20H], 01H
123
123
  vmovdqu32 ymm17, ymmword ptr [rdx+r9-2H*20H]
124
- vinserti32x8 zmm17, zmm17, ymmword ptr [rdx+r13-2H*20H], 01H
124
+ vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-2H*20H], 01H
125
125
  vpunpcklqdq zmm12, zmm16, zmm17
126
126
  vpunpckhqdq zmm13, zmm16, zmm17
127
127
  vmovdqu32 ymm18, ymmword ptr [rdx+r10-2H*20H]
128
- vinserti32x8 zmm18, zmm18, ymmword ptr [rdx+r14-2H*20H], 01H
128
+ vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-2H*20H], 01H
129
129
  vmovdqu32 ymm19, ymmword ptr [rdx+r11-2H*20H]
130
- vinserti32x8 zmm19, zmm19, ymmword ptr [rdx+r15-2H*20H], 01H
130
+ vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-2H*20H], 01H
131
131
  vpunpcklqdq zmm14, zmm18, zmm19
132
132
  vpunpckhqdq zmm15, zmm18, zmm19
133
133
  vmovdqa32 zmm27, zmmword ptr [INDEX0]
@@ -161,15 +161,15 @@ innerloop16:
161
161
  mov r14, qword ptr [rdi+50H]
162
162
  mov r15, qword ptr [rdi+58H]
163
163
  vmovdqu32 ymm24, ymmword ptr [r8+rdx-1H*20H]
164
- vinserti32x8 zmm24, zmm24, ymmword ptr [r12+rdx-1H*20H], 01H
164
+ vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-1H*20H], 01H
165
165
  vmovdqu32 ymm25, ymmword ptr [r9+rdx-1H*20H]
166
- vinserti32x8 zmm25, zmm25, ymmword ptr [r13+rdx-1H*20H], 01H
166
+ vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-1H*20H], 01H
167
167
  vpunpcklqdq zmm8, zmm24, zmm25
168
168
  vpunpckhqdq zmm9, zmm24, zmm25
169
169
  vmovdqu32 ymm24, ymmword ptr [r10+rdx-1H*20H]
170
- vinserti32x8 zmm24, zmm24, ymmword ptr [r14+rdx-1H*20H], 01H
170
+ vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-1H*20H], 01H
171
171
  vmovdqu32 ymm25, ymmword ptr [r11+rdx-1H*20H]
172
- vinserti32x8 zmm25, zmm25, ymmword ptr [r15+rdx-1H*20H], 01H
172
+ vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-1H*20H], 01H
173
173
  vpunpcklqdq zmm10, zmm24, zmm25
174
174
  vpunpckhqdq zmm11, zmm24, zmm25
175
175
  prefetcht0 byte ptr [r8+rdx+80H]
@@ -189,15 +189,15 @@ innerloop16:
189
189
  mov r14, qword ptr [rdi+70H]
190
190
  mov r15, qword ptr [rdi+78H]
191
191
  vmovdqu32 ymm24, ymmword ptr [r8+rdx-1H*20H]
192
- vinserti32x8 zmm24, zmm24, ymmword ptr [r12+rdx-1H*20H], 01H
192
+ vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-1H*20H], 01H
193
193
  vmovdqu32 ymm25, ymmword ptr [r9+rdx-1H*20H]
194
- vinserti32x8 zmm25, zmm25, ymmword ptr [r13+rdx-1H*20H], 01H
194
+ vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-1H*20H], 01H
195
195
  vpunpcklqdq zmm12, zmm24, zmm25
196
196
  vpunpckhqdq zmm13, zmm24, zmm25
197
197
  vmovdqu32 ymm24, ymmword ptr [r10+rdx-1H*20H]
198
- vinserti32x8 zmm24, zmm24, ymmword ptr [r14+rdx-1H*20H], 01H
198
+ vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-1H*20H], 01H
199
199
  vmovdqu32 ymm25, ymmword ptr [r11+rdx-1H*20H]
200
- vinserti32x8 zmm25, zmm25, ymmword ptr [r15+rdx-1H*20H], 01H
200
+ vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-1H*20H], 01H
201
201
  vpunpcklqdq zmm14, zmm24, zmm25
202
202
  vpunpckhqdq zmm15, zmm24, zmm25
203
203
  prefetcht0 byte ptr [r8+rdx+80H]
@@ -2073,7 +2073,7 @@ final7blocks:
2073
2073
  vpermq ymm14, ymm14, 0DCH
2074
2074
  vpermq ymm15, ymm15, 0DCH
2075
2075
  vpbroadcastd zmm12, dword ptr [BLAKE3_BLOCK_LEN]
2076
- vinserti32x8 zmm13, zmm14, ymm15, 01H
2076
+ vinserti64x4 zmm13, zmm14, ymm15, 01H
2077
2077
  mov eax, 17476
2078
2078
  kmovw k2, eax
2079
2079
  vpblendmd zmm13 {k2}, zmm13, zmm12
@@ -14,73 +14,6 @@
14
14
  #endif
15
15
  #endif
16
16
 
17
- // Declarations for implementation-specific functions.
18
- void blake3_compress_in_place_portable(uint32_t cv[8],
19
- const uint8_t block[BLAKE3_BLOCK_LEN],
20
- uint8_t block_len, uint64_t counter,
21
- uint8_t flags);
22
-
23
- void blake3_compress_xof_portable(const uint32_t cv[8],
24
- const uint8_t block[BLAKE3_BLOCK_LEN],
25
- uint8_t block_len, uint64_t counter,
26
- uint8_t flags, uint8_t out[64]);
27
-
28
- void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs,
29
- size_t blocks, const uint32_t key[8],
30
- uint64_t counter, bool increment_counter,
31
- uint8_t flags, uint8_t flags_start,
32
- uint8_t flags_end, uint8_t *out);
33
-
34
- #if defined(IS_X86)
35
- #if !defined(BLAKE3_NO_SSE41)
36
- void blake3_compress_in_place_sse41(uint32_t cv[8],
37
- const uint8_t block[BLAKE3_BLOCK_LEN],
38
- uint8_t block_len, uint64_t counter,
39
- uint8_t flags);
40
- void blake3_compress_xof_sse41(const uint32_t cv[8],
41
- const uint8_t block[BLAKE3_BLOCK_LEN],
42
- uint8_t block_len, uint64_t counter,
43
- uint8_t flags, uint8_t out[64]);
44
- void blake3_hash_many_sse41(const uint8_t *const *inputs, size_t num_inputs,
45
- size_t blocks, const uint32_t key[8],
46
- uint64_t counter, bool increment_counter,
47
- uint8_t flags, uint8_t flags_start,
48
- uint8_t flags_end, uint8_t *out);
49
- #endif
50
- #if !defined(BLAKE3_NO_AVX2)
51
- void blake3_hash_many_avx2(const uint8_t *const *inputs, size_t num_inputs,
52
- size_t blocks, const uint32_t key[8],
53
- uint64_t counter, bool increment_counter,
54
- uint8_t flags, uint8_t flags_start,
55
- uint8_t flags_end, uint8_t *out);
56
- #endif
57
- #if !defined(BLAKE3_NO_AVX512)
58
- void blake3_compress_in_place_avx512(uint32_t cv[8],
59
- const uint8_t block[BLAKE3_BLOCK_LEN],
60
- uint8_t block_len, uint64_t counter,
61
- uint8_t flags);
62
-
63
- void blake3_compress_xof_avx512(const uint32_t cv[8],
64
- const uint8_t block[BLAKE3_BLOCK_LEN],
65
- uint8_t block_len, uint64_t counter,
66
- uint8_t flags, uint8_t out[64]);
67
-
68
- void blake3_hash_many_avx512(const uint8_t *const *inputs, size_t num_inputs,
69
- size_t blocks, const uint32_t key[8],
70
- uint64_t counter, bool increment_counter,
71
- uint8_t flags, uint8_t flags_start,
72
- uint8_t flags_end, uint8_t *out);
73
- #endif
74
- #endif
75
-
76
- #if defined(BLAKE3_USE_NEON)
77
- void blake3_hash_many_neon(const uint8_t *const *inputs, size_t num_inputs,
78
- size_t blocks, const uint32_t key[8],
79
- uint64_t counter, bool increment_counter,
80
- uint8_t flags, uint8_t flags_start,
81
- uint8_t flags_end, uint8_t *out);
82
- #endif
83
-
84
17
  #if defined(IS_X86)
85
18
  static uint64_t xgetbv() {
86
19
  #if defined(_MSC_VER)
@@ -249,7 +182,7 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
249
182
  #if defined(IS_X86)
250
183
  const enum cpu_feature features = get_cpu_features();
251
184
  #if !defined(BLAKE3_NO_AVX512)
252
- if (features & AVX512F) {
185
+ if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) {
253
186
  blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter,
254
187
  increment_counter, flags, flags_start, flags_end,
255
188
  out);
@@ -286,11 +219,11 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
286
219
  }
287
220
 
288
221
  // The dynamically detected SIMD degree of the current platform.
289
- size_t blake3_simd_degree() {
222
+ size_t blake3_simd_degree(void) {
290
223
  #if defined(IS_X86)
291
224
  const enum cpu_feature features = get_cpu_features();
292
225
  #if !defined(BLAKE3_NO_AVX512)
293
- if (features & AVX512F) {
226
+ if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) {
294
227
  return 16;
295
228
  }
296
229
  #endif
@@ -161,7 +161,75 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
161
161
  bool increment_counter, uint8_t flags,
162
162
  uint8_t flags_start, uint8_t flags_end, uint8_t *out);
163
163
 
164
- size_t blake3_simd_degree();
164
+ size_t blake3_simd_degree(void);
165
+
166
+
167
+ // Declarations for implementation-specific functions.
168
+ void blake3_compress_in_place_portable(uint32_t cv[8],
169
+ const uint8_t block[BLAKE3_BLOCK_LEN],
170
+ uint8_t block_len, uint64_t counter,
171
+ uint8_t flags);
172
+
173
+ void blake3_compress_xof_portable(const uint32_t cv[8],
174
+ const uint8_t block[BLAKE3_BLOCK_LEN],
175
+ uint8_t block_len, uint64_t counter,
176
+ uint8_t flags, uint8_t out[64]);
177
+
178
+ void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs,
179
+ size_t blocks, const uint32_t key[8],
180
+ uint64_t counter, bool increment_counter,
181
+ uint8_t flags, uint8_t flags_start,
182
+ uint8_t flags_end, uint8_t *out);
183
+
184
+ #if defined(IS_X86)
185
+ #if !defined(BLAKE3_NO_SSE41)
186
+ void blake3_compress_in_place_sse41(uint32_t cv[8],
187
+ const uint8_t block[BLAKE3_BLOCK_LEN],
188
+ uint8_t block_len, uint64_t counter,
189
+ uint8_t flags);
190
+ void blake3_compress_xof_sse41(const uint32_t cv[8],
191
+ const uint8_t block[BLAKE3_BLOCK_LEN],
192
+ uint8_t block_len, uint64_t counter,
193
+ uint8_t flags, uint8_t out[64]);
194
+ void blake3_hash_many_sse41(const uint8_t *const *inputs, size_t num_inputs,
195
+ size_t blocks, const uint32_t key[8],
196
+ uint64_t counter, bool increment_counter,
197
+ uint8_t flags, uint8_t flags_start,
198
+ uint8_t flags_end, uint8_t *out);
199
+ #endif
200
+ #if !defined(BLAKE3_NO_AVX2)
201
+ void blake3_hash_many_avx2(const uint8_t *const *inputs, size_t num_inputs,
202
+ size_t blocks, const uint32_t key[8],
203
+ uint64_t counter, bool increment_counter,
204
+ uint8_t flags, uint8_t flags_start,
205
+ uint8_t flags_end, uint8_t *out);
206
+ #endif
207
+ #if !defined(BLAKE3_NO_AVX512)
208
+ void blake3_compress_in_place_avx512(uint32_t cv[8],
209
+ const uint8_t block[BLAKE3_BLOCK_LEN],
210
+ uint8_t block_len, uint64_t counter,
211
+ uint8_t flags);
212
+
213
+ void blake3_compress_xof_avx512(const uint32_t cv[8],
214
+ const uint8_t block[BLAKE3_BLOCK_LEN],
215
+ uint8_t block_len, uint64_t counter,
216
+ uint8_t flags, uint8_t out[64]);
217
+
218
+ void blake3_hash_many_avx512(const uint8_t *const *inputs, size_t num_inputs,
219
+ size_t blocks, const uint32_t key[8],
220
+ uint64_t counter, bool increment_counter,
221
+ uint8_t flags, uint8_t flags_start,
222
+ uint8_t flags_end, uint8_t *out);
223
+ #endif
224
+ #endif
225
+
226
+ #if defined(BLAKE3_USE_NEON)
227
+ void blake3_hash_many_neon(const uint8_t *const *inputs, size_t num_inputs,
228
+ size_t blocks, const uint32_t key[8],
229
+ uint64_t counter, bool increment_counter,
230
+ uint8_t flags, uint8_t flags_start,
231
+ uint8_t flags_end, uint8_t *out);
232
+ #endif
165
233
 
166
234
 
167
235
  #endif /* BLAKE3_IMPL_H */
@@ -1800,15 +1800,18 @@ blake3_hash_many_sse41:
1800
1800
  .p2align 6
1801
1801
  blake3_compress_in_place_sse41:
1802
1802
  _blake3_compress_in_place_sse41:
1803
- sub rsp, 72
1803
+ sub rsp, 120
1804
1804
  movdqa xmmword ptr [rsp], xmm6
1805
1805
  movdqa xmmword ptr [rsp+0x10], xmm7
1806
1806
  movdqa xmmword ptr [rsp+0x20], xmm8
1807
1807
  movdqa xmmword ptr [rsp+0x30], xmm9
1808
+ movdqa xmmword ptr [rsp+0x40], xmm11
1809
+ movdqa xmmword ptr [rsp+0x50], xmm14
1810
+ movdqa xmmword ptr [rsp+0x60], xmm15
1808
1811
  movups xmm0, xmmword ptr [rcx]
1809
1812
  movups xmm1, xmmword ptr [rcx+0x10]
1810
1813
  movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
1811
- movzx eax, byte ptr [rsp+0x70]
1814
+ movzx eax, byte ptr [rsp+0xA0]
1812
1815
  movzx r8d, r8b
1813
1816
  shl rax, 32
1814
1817
  add r8, rax
@@ -1906,24 +1909,30 @@ _blake3_compress_in_place_sse41:
1906
1909
  movdqa xmm7, xmmword ptr [rsp+0x10]
1907
1910
  movdqa xmm8, xmmword ptr [rsp+0x20]
1908
1911
  movdqa xmm9, xmmword ptr [rsp+0x30]
1909
- add rsp, 72
1912
+ movdqa xmm11, xmmword ptr [rsp+0x40]
1913
+ movdqa xmm14, xmmword ptr [rsp+0x50]
1914
+ movdqa xmm15, xmmword ptr [rsp+0x60]
1915
+ add rsp, 120
1910
1916
  ret
1911
1917
 
1912
1918
 
1913
1919
  .p2align 6
1914
1920
  _blake3_compress_xof_sse41:
1915
1921
  blake3_compress_xof_sse41:
1916
- sub rsp, 72
1922
+ sub rsp, 120
1917
1923
  movdqa xmmword ptr [rsp], xmm6
1918
1924
  movdqa xmmword ptr [rsp+0x10], xmm7
1919
1925
  movdqa xmmword ptr [rsp+0x20], xmm8
1920
1926
  movdqa xmmword ptr [rsp+0x30], xmm9
1927
+ movdqa xmmword ptr [rsp+0x40], xmm11
1928
+ movdqa xmmword ptr [rsp+0x50], xmm14
1929
+ movdqa xmmword ptr [rsp+0x60], xmm15
1921
1930
  movups xmm0, xmmword ptr [rcx]
1922
1931
  movups xmm1, xmmword ptr [rcx+0x10]
1923
1932
  movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
1924
- movzx eax, byte ptr [rsp+0x70]
1933
+ movzx eax, byte ptr [rsp+0xA0]
1925
1934
  movzx r8d, r8b
1926
- mov r10, qword ptr [rsp+0x78]
1935
+ mov r10, qword ptr [rsp+0xA8]
1927
1936
  shl rax, 32
1928
1937
  add r8, rax
1929
1938
  movq xmm3, r9
@@ -2026,7 +2035,10 @@ blake3_compress_xof_sse41:
2026
2035
  movdqa xmm7, xmmword ptr [rsp+0x10]
2027
2036
  movdqa xmm8, xmmword ptr [rsp+0x20]
2028
2037
  movdqa xmm9, xmmword ptr [rsp+0x30]
2029
- add rsp, 72
2038
+ movdqa xmm11, xmmword ptr [rsp+0x40]
2039
+ movdqa xmm14, xmmword ptr [rsp+0x50]
2040
+ movdqa xmm15, xmmword ptr [rsp+0x60]
2041
+ add rsp, 120
2030
2042
  ret
2031
2043
 
2032
2044
 
@@ -1802,15 +1802,18 @@ blake3_hash_many_sse41 ENDP
1802
1802
 
1803
1803
  blake3_compress_in_place_sse41 PROC
1804
1804
  _blake3_compress_in_place_sse41 PROC
1805
- sub rsp, 72
1805
+ sub rsp, 120
1806
1806
  movdqa xmmword ptr [rsp], xmm6
1807
1807
  movdqa xmmword ptr [rsp+10H], xmm7
1808
1808
  movdqa xmmword ptr [rsp+20H], xmm8
1809
1809
  movdqa xmmword ptr [rsp+30H], xmm9
1810
+ movdqa xmmword ptr [rsp+40H], xmm11
1811
+ movdqa xmmword ptr [rsp+50H], xmm14
1812
+ movdqa xmmword ptr [rsp+60H], xmm15
1810
1813
  movups xmm0, xmmword ptr [rcx]
1811
1814
  movups xmm1, xmmword ptr [rcx+10H]
1812
1815
  movaps xmm2, xmmword ptr [BLAKE3_IV]
1813
- movzx eax, byte ptr [rsp+70H]
1816
+ movzx eax, byte ptr [rsp+0A0H]
1814
1817
  movzx r8d, r8b
1815
1818
  shl rax, 32
1816
1819
  add r8, rax
@@ -1908,7 +1911,10 @@ _blake3_compress_in_place_sse41 PROC
1908
1911
  movdqa xmm7, xmmword ptr [rsp+10H]
1909
1912
  movdqa xmm8, xmmword ptr [rsp+20H]
1910
1913
  movdqa xmm9, xmmword ptr [rsp+30H]
1911
- add rsp, 72
1914
+ movdqa xmm11, xmmword ptr [rsp+40H]
1915
+ movdqa xmm14, xmmword ptr [rsp+50H]
1916
+ movdqa xmm15, xmmword ptr [rsp+60H]
1917
+ add rsp, 120
1912
1918
  ret
1913
1919
  _blake3_compress_in_place_sse41 ENDP
1914
1920
  blake3_compress_in_place_sse41 ENDP
@@ -1916,17 +1922,20 @@ blake3_compress_in_place_sse41 ENDP
1916
1922
  ALIGN 16
1917
1923
  blake3_compress_xof_sse41 PROC
1918
1924
  _blake3_compress_xof_sse41 PROC
1919
- sub rsp, 72
1925
+ sub rsp, 120
1920
1926
  movdqa xmmword ptr [rsp], xmm6
1921
1927
  movdqa xmmword ptr [rsp+10H], xmm7
1922
1928
  movdqa xmmword ptr [rsp+20H], xmm8
1923
1929
  movdqa xmmword ptr [rsp+30H], xmm9
1930
+ movdqa xmmword ptr [rsp+40H], xmm11
1931
+ movdqa xmmword ptr [rsp+50H], xmm14
1932
+ movdqa xmmword ptr [rsp+60H], xmm15
1924
1933
  movups xmm0, xmmword ptr [rcx]
1925
1934
  movups xmm1, xmmword ptr [rcx+10H]
1926
1935
  movaps xmm2, xmmword ptr [BLAKE3_IV]
1927
- movzx eax, byte ptr [rsp+70H]
1936
+ movzx eax, byte ptr [rsp+0A0H]
1928
1937
  movzx r8d, r8b
1929
- mov r10, qword ptr [rsp+78H]
1938
+ mov r10, qword ptr [rsp+0A8H]
1930
1939
  shl rax, 32
1931
1940
  add r8, rax
1932
1941
  movq xmm3, r9
@@ -2029,7 +2038,10 @@ _blake3_compress_xof_sse41 PROC
2029
2038
  movdqa xmm7, xmmword ptr [rsp+10H]
2030
2039
  movdqa xmm8, xmmword ptr [rsp+20H]
2031
2040
  movdqa xmm9, xmmword ptr [rsp+30H]
2032
- add rsp, 72
2041
+ movdqa xmm11, xmmword ptr [rsp+40H]
2042
+ movdqa xmm14, xmmword ptr [rsp+50H]
2043
+ movdqa xmm15, xmmword ptr [rsp+60H]
2044
+ add rsp, 120
2033
2045
  ret
2034
2046
  _blake3_compress_xof_sse41 ENDP
2035
2047
  blake3_compress_xof_sse41 ENDP
@@ -2,6 +2,6 @@ require 'digest'
2
2
 
3
3
  module Digest
4
4
  class BLAKE3 < Base
5
- VERSION = "0.22.1"
5
+ VERSION = "0.34.0"
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: digest-blake3
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.22.1
4
+ version: 0.34.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Will Bryant
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-03-15 00:00:00.000000000 Z
11
+ date: 2020-06-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -112,8 +112,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
112
112
  - !ruby/object:Gem::Version
113
113
  version: '0'
114
114
  requirements: []
115
- rubyforge_project:
116
- rubygems_version: 2.7.6
115
+ rubygems_version: 3.0.3
117
116
  signing_key:
118
117
  specification_version: 4
119
118
  summary: BLAKE3 for Ruby