digest-blake3 0.22.1 → 0.34.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cc9e904530fd556aa81345371cdcc84ba0f96af0f34ad88e7d5fca03ea334413
4
- data.tar.gz: e7eb9d2902ea6b314598a476e36ee3d2ec1253a063283cce93b6bc9dbc33ad15
3
+ metadata.gz: a3db2fab1165a083a1a83d5c656c1c737d53f853de91babcb6c9c0e74ec7e23a
4
+ data.tar.gz: d4692ef2c6326a70ffa0cad5ed90219daa96c0940c0e9986d9ee7b4469d6b48d
5
5
  SHA512:
6
- metadata.gz: 0d1a215201ad7aae6cebca040db27f4336861582be4870d6d57adcf9b345fa3c44fd5dd7443b0417fc9fe407bf52d98d03eb7065d92f699091f6adfb10fa67c2
7
- data.tar.gz: 262293d252c00c9aaa00bcf7cbeb066a3be416a4b2314473740627906fbbec466a2b1cf199d1fdfa0aecfa1568bf40fe661ca2c22162243c2cacd49f43aed9dc
6
+ metadata.gz: 7ef86ba9e54408a68179d43678d7863d1af3d51e6002315d4607e377f2a142d374f4dc0e4d5f8ddde641063d3b5e2f93214fb10274aba849eee757d5f884d854
7
+ data.tar.gz: e8bf900ad7eece0df62964ca7695af5c8681cbe23c3b7e6cc2af4b0ac2c1d6b3f74de987d7998f4627a5bfcb164c2bafa7ffda8bf8f96be5075ca44761aa2c23
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- digest-blake3 (0.22.1)
4
+ digest-blake3 (0.34.0)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -84,23 +84,26 @@ INLINE void output_chaining_value(const output_t *self, uint8_t cv[32]) {
84
84
  memcpy(cv, cv_words, 32);
85
85
  }
86
86
 
87
- INLINE void output_root_bytes(const output_t *self, uint8_t *out,
87
+ INLINE void output_root_bytes(const output_t *self, uint64_t seek, uint8_t *out,
88
88
  size_t out_len) {
89
- uint64_t output_block_counter = 0;
89
+ uint64_t output_block_counter = seek / 64;
90
+ size_t offset_within_block = seek % 64;
90
91
  uint8_t wide_buf[64];
91
92
  while (out_len > 0) {
92
93
  blake3_compress_xof(self->input_cv, self->block, self->block_len,
93
94
  output_block_counter, self->flags | ROOT, wide_buf);
95
+ size_t available_bytes = 64 - offset_within_block;
94
96
  size_t memcpy_len;
95
- if (out_len > 64) {
96
- memcpy_len = 64;
97
+ if (out_len > available_bytes) {
98
+ memcpy_len = available_bytes;
97
99
  } else {
98
100
  memcpy_len = out_len;
99
101
  }
100
- memcpy(out, wide_buf, memcpy_len);
102
+ memcpy(out, wide_buf + offset_within_block, memcpy_len);
101
103
  out += memcpy_len;
102
104
  out_len -= memcpy_len;
103
105
  output_block_counter += 1;
106
+ offset_within_block = 0;
104
107
  }
105
108
  }
106
109
 
@@ -256,10 +259,11 @@ INLINE size_t compress_parents_parallel(const uint8_t *child_chaining_values,
256
259
  // Why not just have the caller split the input on the first update(), instead
257
260
  // of implementing this special rule? Because we don't want to limit SIMD or
258
261
  // multi-threading parallelism for that update().
259
- size_t blake3_compress_subtree_wide(const uint8_t *input, size_t input_len,
260
- const uint32_t key[8],
261
- uint64_t chunk_counter, uint8_t flags,
262
- uint8_t *out) {
262
+ static size_t blake3_compress_subtree_wide(const uint8_t *input,
263
+ size_t input_len,
264
+ const uint32_t key[8],
265
+ uint64_t chunk_counter,
266
+ uint8_t flags, uint8_t *out) {
263
267
  // Note that the single chunk case does *not* bump the SIMD degree up to 2
264
268
  // when it is 1. If this implementation adds multi-threading in the future,
265
269
  // this gives us the option of multi-threading even the 2-chunk case, which
@@ -425,8 +429,8 @@ INLINE void hasher_merge_cv_stack(blake3_hasher *self, uint64_t total_len) {
425
429
  // compress_subtree_to_parent_node(). That function always returns the top
426
430
  // *two* chaining values of the subtree it's compressing. We then do lazy
427
431
  // merging with each of them separately, so that the second CV will always
428
- // remain unmerged. (The compress_subtree_to_parent_node also helps us support
429
- // extendable output when we're hashing an input all-at-once.)
432
+ // remain unmerged. (That also helps us support extendable output when we're
433
+ // hashing an input all-at-once.)
430
434
  INLINE void hasher_push_cv(blake3_hasher *self, uint8_t new_cv[BLAKE3_OUT_LEN],
431
435
  uint64_t chunk_counter) {
432
436
  hasher_merge_cv_stack(self, chunk_counter);
@@ -472,8 +476,8 @@ void blake3_hasher_update(blake3_hasher *self, const void *input,
472
476
 
473
477
  // Now the chunk_state is clear, and we have more input. If there's more than
474
478
  // a single chunk (so, definitely not the root chunk), hash the largest whole
475
- // subtree we can, with the full benefits of SIMD and multi-threading
476
- // parallelism. Two restrictions:
479
+ // subtree we can, with the full benefits of SIMD (and maybe in the future,
480
+ // multi-threading) parallelism. Two restrictions:
477
481
  // - The subtree has to be a power-of-2 number of chunks. Only subtrees along
478
482
  // the right edge can be incomplete, and we don't know where the right edge
479
483
  // is going to be until we get to finalize().
@@ -546,6 +550,11 @@ void blake3_hasher_update(blake3_hasher *self, const void *input,
546
550
 
547
551
  void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
548
552
  size_t out_len) {
553
+ blake3_hasher_finalize_seek(self, 0, out, out_len);
554
+ }
555
+
556
+ void blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek,
557
+ uint8_t *out, size_t out_len) {
549
558
  // Explicitly checking for zero avoids causing UB by passing a null pointer
550
559
  // to memcpy. This comes up in practice with things like:
551
560
  // std::vector<uint8_t> v;
@@ -557,7 +566,7 @@ void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
557
566
  // If the subtree stack is empty, then the current chunk is the root.
558
567
  if (self->cv_stack_len == 0) {
559
568
  output_t output = chunk_state_output(&self->chunk);
560
- output_root_bytes(&output, out, out_len);
569
+ output_root_bytes(&output, seek, out, out_len);
561
570
  return;
562
571
  }
563
572
  // If there are any bytes in the chunk state, finalize that chunk and do a
@@ -585,5 +594,5 @@ void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
585
594
  output_chaining_value(&output, &parent_block[32]);
586
595
  output = parent_output(parent_block, self->key, self->chunk.flags);
587
596
  }
588
- output_root_bytes(&output, out, out_len);
597
+ output_root_bytes(&output, seek, out, out_len);
589
598
  }
@@ -4,7 +4,7 @@
4
4
  #include <stddef.h>
5
5
  #include <stdint.h>
6
6
 
7
- #ifdef __cplusplus
7
+ #ifdef __cplusplus
8
8
  extern "C" {
9
9
  #endif
10
10
 
@@ -46,8 +46,10 @@ void blake3_hasher_update(blake3_hasher *self, const void *input,
46
46
  size_t input_len);
47
47
  void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
48
48
  size_t out_len);
49
+ void blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek,
50
+ uint8_t *out, size_t out_len);
49
51
 
50
- #ifdef __cplusplus
52
+ #ifdef __cplusplus
51
53
  }
52
54
  #endif
53
55
 
@@ -82,15 +82,15 @@ blake3_hash_many_avx512:
82
82
  mov r14, qword ptr [rdi+0x50]
83
83
  mov r15, qword ptr [rdi+0x58]
84
84
  vmovdqu32 ymm16, ymmword ptr [rdx+r8-0x2*0x20]
85
- vinserti32x8 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01
85
+ vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01
86
86
  vmovdqu32 ymm17, ymmword ptr [rdx+r9-0x2*0x20]
87
- vinserti32x8 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01
87
+ vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01
88
88
  vpunpcklqdq zmm8, zmm16, zmm17
89
89
  vpunpckhqdq zmm9, zmm16, zmm17
90
90
  vmovdqu32 ymm18, ymmword ptr [rdx+r10-0x2*0x20]
91
- vinserti32x8 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01
91
+ vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01
92
92
  vmovdqu32 ymm19, ymmword ptr [rdx+r11-0x2*0x20]
93
- vinserti32x8 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01
93
+ vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01
94
94
  vpunpcklqdq zmm10, zmm18, zmm19
95
95
  vpunpckhqdq zmm11, zmm18, zmm19
96
96
  mov r8, qword ptr [rdi+0x20]
@@ -102,15 +102,15 @@ blake3_hash_many_avx512:
102
102
  mov r14, qword ptr [rdi+0x70]
103
103
  mov r15, qword ptr [rdi+0x78]
104
104
  vmovdqu32 ymm16, ymmword ptr [rdx+r8-0x2*0x20]
105
- vinserti32x8 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01
105
+ vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01
106
106
  vmovdqu32 ymm17, ymmword ptr [rdx+r9-0x2*0x20]
107
- vinserti32x8 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01
107
+ vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01
108
108
  vpunpcklqdq zmm12, zmm16, zmm17
109
109
  vpunpckhqdq zmm13, zmm16, zmm17
110
110
  vmovdqu32 ymm18, ymmword ptr [rdx+r10-0x2*0x20]
111
- vinserti32x8 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01
111
+ vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01
112
112
  vmovdqu32 ymm19, ymmword ptr [rdx+r11-0x2*0x20]
113
- vinserti32x8 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01
113
+ vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01
114
114
  vpunpcklqdq zmm14, zmm18, zmm19
115
115
  vpunpckhqdq zmm15, zmm18, zmm19
116
116
  vmovdqa32 zmm27, zmmword ptr [INDEX0+rip]
@@ -144,15 +144,15 @@ blake3_hash_many_avx512:
144
144
  mov r14, qword ptr [rdi+0x50]
145
145
  mov r15, qword ptr [rdi+0x58]
146
146
  vmovdqu32 ymm24, ymmword ptr [r8+rdx-0x1*0x20]
147
- vinserti32x8 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01
147
+ vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01
148
148
  vmovdqu32 ymm25, ymmword ptr [r9+rdx-0x1*0x20]
149
- vinserti32x8 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01
149
+ vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01
150
150
  vpunpcklqdq zmm8, zmm24, zmm25
151
151
  vpunpckhqdq zmm9, zmm24, zmm25
152
152
  vmovdqu32 ymm24, ymmword ptr [r10+rdx-0x1*0x20]
153
- vinserti32x8 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01
153
+ vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01
154
154
  vmovdqu32 ymm25, ymmword ptr [r11+rdx-0x1*0x20]
155
- vinserti32x8 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01
155
+ vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01
156
156
  vpunpcklqdq zmm10, zmm24, zmm25
157
157
  vpunpckhqdq zmm11, zmm24, zmm25
158
158
  prefetcht0 [r8+rdx+0x80]
@@ -172,15 +172,15 @@ blake3_hash_many_avx512:
172
172
  mov r14, qword ptr [rdi+0x70]
173
173
  mov r15, qword ptr [rdi+0x78]
174
174
  vmovdqu32 ymm24, ymmword ptr [r8+rdx-0x1*0x20]
175
- vinserti32x8 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01
175
+ vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01
176
176
  vmovdqu32 ymm25, ymmword ptr [r9+rdx-0x1*0x20]
177
- vinserti32x8 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01
177
+ vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01
178
178
  vpunpcklqdq zmm12, zmm24, zmm25
179
179
  vpunpckhqdq zmm13, zmm24, zmm25
180
180
  vmovdqu32 ymm24, ymmword ptr [r10+rdx-0x1*0x20]
181
- vinserti32x8 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01
181
+ vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01
182
182
  vmovdqu32 ymm25, ymmword ptr [r11+rdx-0x1*0x20]
183
- vinserti32x8 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01
183
+ vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01
184
184
  vpunpcklqdq zmm14, zmm24, zmm25
185
185
  vpunpckhqdq zmm15, zmm24, zmm25
186
186
  prefetcht0 [r8+rdx+0x80]
@@ -2039,7 +2039,7 @@ blake3_hash_many_avx512:
2039
2039
  vpermq ymm14, ymm14, 0xDC
2040
2040
  vpermq ymm15, ymm15, 0xDC
2041
2041
  vpbroadcastd zmm12, dword ptr [BLAKE3_BLOCK_LEN+rip]
2042
- vinserti32x8 zmm13, zmm14, ymm15, 0x01
2042
+ vinserti64x4 zmm13, zmm14, ymm15, 0x01
2043
2043
  mov eax, 17476
2044
2044
  kmovw k2, eax
2045
2045
  vpblendmd zmm13 {k2}, zmm13, zmm12
@@ -96,15 +96,15 @@ blake3_hash_many_avx512:
96
96
  mov r14, qword ptr [rdi+0x50]
97
97
  mov r15, qword ptr [rdi+0x58]
98
98
  vmovdqu32 ymm16, ymmword ptr [rdx+r8-0x2*0x20]
99
- vinserti32x8 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01
99
+ vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01
100
100
  vmovdqu32 ymm17, ymmword ptr [rdx+r9-0x2*0x20]
101
- vinserti32x8 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01
101
+ vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01
102
102
  vpunpcklqdq zmm8, zmm16, zmm17
103
103
  vpunpckhqdq zmm9, zmm16, zmm17
104
104
  vmovdqu32 ymm18, ymmword ptr [rdx+r10-0x2*0x20]
105
- vinserti32x8 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01
105
+ vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01
106
106
  vmovdqu32 ymm19, ymmword ptr [rdx+r11-0x2*0x20]
107
- vinserti32x8 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01
107
+ vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01
108
108
  vpunpcklqdq zmm10, zmm18, zmm19
109
109
  vpunpckhqdq zmm11, zmm18, zmm19
110
110
  mov r8, qword ptr [rdi+0x20]
@@ -116,15 +116,15 @@ blake3_hash_many_avx512:
116
116
  mov r14, qword ptr [rdi+0x70]
117
117
  mov r15, qword ptr [rdi+0x78]
118
118
  vmovdqu32 ymm16, ymmword ptr [rdx+r8-0x2*0x20]
119
- vinserti32x8 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01
119
+ vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01
120
120
  vmovdqu32 ymm17, ymmword ptr [rdx+r9-0x2*0x20]
121
- vinserti32x8 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01
121
+ vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01
122
122
  vpunpcklqdq zmm12, zmm16, zmm17
123
123
  vpunpckhqdq zmm13, zmm16, zmm17
124
124
  vmovdqu32 ymm18, ymmword ptr [rdx+r10-0x2*0x20]
125
- vinserti32x8 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01
125
+ vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01
126
126
  vmovdqu32 ymm19, ymmword ptr [rdx+r11-0x2*0x20]
127
- vinserti32x8 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01
127
+ vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01
128
128
  vpunpcklqdq zmm14, zmm18, zmm19
129
129
  vpunpckhqdq zmm15, zmm18, zmm19
130
130
  vmovdqa32 zmm27, zmmword ptr [INDEX0+rip]
@@ -158,15 +158,15 @@ blake3_hash_many_avx512:
158
158
  mov r14, qword ptr [rdi+0x50]
159
159
  mov r15, qword ptr [rdi+0x58]
160
160
  vmovdqu32 ymm24, ymmword ptr [r8+rdx-0x1*0x20]
161
- vinserti32x8 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01
161
+ vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01
162
162
  vmovdqu32 ymm25, ymmword ptr [r9+rdx-0x1*0x20]
163
- vinserti32x8 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01
163
+ vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01
164
164
  vpunpcklqdq zmm8, zmm24, zmm25
165
165
  vpunpckhqdq zmm9, zmm24, zmm25
166
166
  vmovdqu32 ymm24, ymmword ptr [r10+rdx-0x1*0x20]
167
- vinserti32x8 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01
167
+ vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01
168
168
  vmovdqu32 ymm25, ymmword ptr [r11+rdx-0x1*0x20]
169
- vinserti32x8 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01
169
+ vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01
170
170
  vpunpcklqdq zmm10, zmm24, zmm25
171
171
  vpunpckhqdq zmm11, zmm24, zmm25
172
172
  prefetcht0 [r8+rdx+0x80]
@@ -186,15 +186,15 @@ blake3_hash_many_avx512:
186
186
  mov r14, qword ptr [rdi+0x70]
187
187
  mov r15, qword ptr [rdi+0x78]
188
188
  vmovdqu32 ymm24, ymmword ptr [r8+rdx-0x1*0x20]
189
- vinserti32x8 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01
189
+ vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01
190
190
  vmovdqu32 ymm25, ymmword ptr [r9+rdx-0x1*0x20]
191
- vinserti32x8 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01
191
+ vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01
192
192
  vpunpcklqdq zmm12, zmm24, zmm25
193
193
  vpunpckhqdq zmm13, zmm24, zmm25
194
194
  vmovdqu32 ymm24, ymmword ptr [r10+rdx-0x1*0x20]
195
- vinserti32x8 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01
195
+ vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01
196
196
  vmovdqu32 ymm25, ymmword ptr [r11+rdx-0x1*0x20]
197
- vinserti32x8 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01
197
+ vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01
198
198
  vpunpcklqdq zmm14, zmm24, zmm25
199
199
  vpunpckhqdq zmm15, zmm24, zmm25
200
200
  prefetcht0 [r8+rdx+0x80]
@@ -2065,7 +2065,7 @@ blake3_hash_many_avx512:
2065
2065
  vpermq ymm14, ymm14, 0xDC
2066
2066
  vpermq ymm15, ymm15, 0xDC
2067
2067
  vpbroadcastd zmm12, dword ptr [BLAKE3_BLOCK_LEN+rip]
2068
- vinserti32x8 zmm13, zmm14, ymm15, 0x01
2068
+ vinserti64x4 zmm13, zmm14, ymm15, 0x01
2069
2069
  mov eax, 17476
2070
2070
  kmovw k2, eax
2071
2071
  vpblendmd zmm13 {k2}, zmm13, zmm12
@@ -99,15 +99,15 @@ innerloop16:
99
99
  mov r14, qword ptr [rdi+50H]
100
100
  mov r15, qword ptr [rdi+58H]
101
101
  vmovdqu32 ymm16, ymmword ptr [rdx+r8-2H*20H]
102
- vinserti32x8 zmm16, zmm16, ymmword ptr [rdx+r12-2H*20H], 01H
102
+ vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-2H*20H], 01H
103
103
  vmovdqu32 ymm17, ymmword ptr [rdx+r9-2H*20H]
104
- vinserti32x8 zmm17, zmm17, ymmword ptr [rdx+r13-2H*20H], 01H
104
+ vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-2H*20H], 01H
105
105
  vpunpcklqdq zmm8, zmm16, zmm17
106
106
  vpunpckhqdq zmm9, zmm16, zmm17
107
107
  vmovdqu32 ymm18, ymmword ptr [rdx+r10-2H*20H]
108
- vinserti32x8 zmm18, zmm18, ymmword ptr [rdx+r14-2H*20H], 01H
108
+ vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-2H*20H], 01H
109
109
  vmovdqu32 ymm19, ymmword ptr [rdx+r11-2H*20H]
110
- vinserti32x8 zmm19, zmm19, ymmword ptr [rdx+r15-2H*20H], 01H
110
+ vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-2H*20H], 01H
111
111
  vpunpcklqdq zmm10, zmm18, zmm19
112
112
  vpunpckhqdq zmm11, zmm18, zmm19
113
113
  mov r8, qword ptr [rdi+20H]
@@ -119,15 +119,15 @@ innerloop16:
119
119
  mov r14, qword ptr [rdi+70H]
120
120
  mov r15, qword ptr [rdi+78H]
121
121
  vmovdqu32 ymm16, ymmword ptr [rdx+r8-2H*20H]
122
- vinserti32x8 zmm16, zmm16, ymmword ptr [rdx+r12-2H*20H], 01H
122
+ vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-2H*20H], 01H
123
123
  vmovdqu32 ymm17, ymmword ptr [rdx+r9-2H*20H]
124
- vinserti32x8 zmm17, zmm17, ymmword ptr [rdx+r13-2H*20H], 01H
124
+ vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-2H*20H], 01H
125
125
  vpunpcklqdq zmm12, zmm16, zmm17
126
126
  vpunpckhqdq zmm13, zmm16, zmm17
127
127
  vmovdqu32 ymm18, ymmword ptr [rdx+r10-2H*20H]
128
- vinserti32x8 zmm18, zmm18, ymmword ptr [rdx+r14-2H*20H], 01H
128
+ vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-2H*20H], 01H
129
129
  vmovdqu32 ymm19, ymmword ptr [rdx+r11-2H*20H]
130
- vinserti32x8 zmm19, zmm19, ymmword ptr [rdx+r15-2H*20H], 01H
130
+ vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-2H*20H], 01H
131
131
  vpunpcklqdq zmm14, zmm18, zmm19
132
132
  vpunpckhqdq zmm15, zmm18, zmm19
133
133
  vmovdqa32 zmm27, zmmword ptr [INDEX0]
@@ -161,15 +161,15 @@ innerloop16:
161
161
  mov r14, qword ptr [rdi+50H]
162
162
  mov r15, qword ptr [rdi+58H]
163
163
  vmovdqu32 ymm24, ymmword ptr [r8+rdx-1H*20H]
164
- vinserti32x8 zmm24, zmm24, ymmword ptr [r12+rdx-1H*20H], 01H
164
+ vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-1H*20H], 01H
165
165
  vmovdqu32 ymm25, ymmword ptr [r9+rdx-1H*20H]
166
- vinserti32x8 zmm25, zmm25, ymmword ptr [r13+rdx-1H*20H], 01H
166
+ vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-1H*20H], 01H
167
167
  vpunpcklqdq zmm8, zmm24, zmm25
168
168
  vpunpckhqdq zmm9, zmm24, zmm25
169
169
  vmovdqu32 ymm24, ymmword ptr [r10+rdx-1H*20H]
170
- vinserti32x8 zmm24, zmm24, ymmword ptr [r14+rdx-1H*20H], 01H
170
+ vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-1H*20H], 01H
171
171
  vmovdqu32 ymm25, ymmword ptr [r11+rdx-1H*20H]
172
- vinserti32x8 zmm25, zmm25, ymmword ptr [r15+rdx-1H*20H], 01H
172
+ vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-1H*20H], 01H
173
173
  vpunpcklqdq zmm10, zmm24, zmm25
174
174
  vpunpckhqdq zmm11, zmm24, zmm25
175
175
  prefetcht0 byte ptr [r8+rdx+80H]
@@ -189,15 +189,15 @@ innerloop16:
189
189
  mov r14, qword ptr [rdi+70H]
190
190
  mov r15, qword ptr [rdi+78H]
191
191
  vmovdqu32 ymm24, ymmword ptr [r8+rdx-1H*20H]
192
- vinserti32x8 zmm24, zmm24, ymmword ptr [r12+rdx-1H*20H], 01H
192
+ vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-1H*20H], 01H
193
193
  vmovdqu32 ymm25, ymmword ptr [r9+rdx-1H*20H]
194
- vinserti32x8 zmm25, zmm25, ymmword ptr [r13+rdx-1H*20H], 01H
194
+ vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-1H*20H], 01H
195
195
  vpunpcklqdq zmm12, zmm24, zmm25
196
196
  vpunpckhqdq zmm13, zmm24, zmm25
197
197
  vmovdqu32 ymm24, ymmword ptr [r10+rdx-1H*20H]
198
- vinserti32x8 zmm24, zmm24, ymmword ptr [r14+rdx-1H*20H], 01H
198
+ vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-1H*20H], 01H
199
199
  vmovdqu32 ymm25, ymmword ptr [r11+rdx-1H*20H]
200
- vinserti32x8 zmm25, zmm25, ymmword ptr [r15+rdx-1H*20H], 01H
200
+ vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-1H*20H], 01H
201
201
  vpunpcklqdq zmm14, zmm24, zmm25
202
202
  vpunpckhqdq zmm15, zmm24, zmm25
203
203
  prefetcht0 byte ptr [r8+rdx+80H]
@@ -2073,7 +2073,7 @@ final7blocks:
2073
2073
  vpermq ymm14, ymm14, 0DCH
2074
2074
  vpermq ymm15, ymm15, 0DCH
2075
2075
  vpbroadcastd zmm12, dword ptr [BLAKE3_BLOCK_LEN]
2076
- vinserti32x8 zmm13, zmm14, ymm15, 01H
2076
+ vinserti64x4 zmm13, zmm14, ymm15, 01H
2077
2077
  mov eax, 17476
2078
2078
  kmovw k2, eax
2079
2079
  vpblendmd zmm13 {k2}, zmm13, zmm12
@@ -14,73 +14,6 @@
14
14
  #endif
15
15
  #endif
16
16
 
17
- // Declarations for implementation-specific functions.
18
- void blake3_compress_in_place_portable(uint32_t cv[8],
19
- const uint8_t block[BLAKE3_BLOCK_LEN],
20
- uint8_t block_len, uint64_t counter,
21
- uint8_t flags);
22
-
23
- void blake3_compress_xof_portable(const uint32_t cv[8],
24
- const uint8_t block[BLAKE3_BLOCK_LEN],
25
- uint8_t block_len, uint64_t counter,
26
- uint8_t flags, uint8_t out[64]);
27
-
28
- void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs,
29
- size_t blocks, const uint32_t key[8],
30
- uint64_t counter, bool increment_counter,
31
- uint8_t flags, uint8_t flags_start,
32
- uint8_t flags_end, uint8_t *out);
33
-
34
- #if defined(IS_X86)
35
- #if !defined(BLAKE3_NO_SSE41)
36
- void blake3_compress_in_place_sse41(uint32_t cv[8],
37
- const uint8_t block[BLAKE3_BLOCK_LEN],
38
- uint8_t block_len, uint64_t counter,
39
- uint8_t flags);
40
- void blake3_compress_xof_sse41(const uint32_t cv[8],
41
- const uint8_t block[BLAKE3_BLOCK_LEN],
42
- uint8_t block_len, uint64_t counter,
43
- uint8_t flags, uint8_t out[64]);
44
- void blake3_hash_many_sse41(const uint8_t *const *inputs, size_t num_inputs,
45
- size_t blocks, const uint32_t key[8],
46
- uint64_t counter, bool increment_counter,
47
- uint8_t flags, uint8_t flags_start,
48
- uint8_t flags_end, uint8_t *out);
49
- #endif
50
- #if !defined(BLAKE3_NO_AVX2)
51
- void blake3_hash_many_avx2(const uint8_t *const *inputs, size_t num_inputs,
52
- size_t blocks, const uint32_t key[8],
53
- uint64_t counter, bool increment_counter,
54
- uint8_t flags, uint8_t flags_start,
55
- uint8_t flags_end, uint8_t *out);
56
- #endif
57
- #if !defined(BLAKE3_NO_AVX512)
58
- void blake3_compress_in_place_avx512(uint32_t cv[8],
59
- const uint8_t block[BLAKE3_BLOCK_LEN],
60
- uint8_t block_len, uint64_t counter,
61
- uint8_t flags);
62
-
63
- void blake3_compress_xof_avx512(const uint32_t cv[8],
64
- const uint8_t block[BLAKE3_BLOCK_LEN],
65
- uint8_t block_len, uint64_t counter,
66
- uint8_t flags, uint8_t out[64]);
67
-
68
- void blake3_hash_many_avx512(const uint8_t *const *inputs, size_t num_inputs,
69
- size_t blocks, const uint32_t key[8],
70
- uint64_t counter, bool increment_counter,
71
- uint8_t flags, uint8_t flags_start,
72
- uint8_t flags_end, uint8_t *out);
73
- #endif
74
- #endif
75
-
76
- #if defined(BLAKE3_USE_NEON)
77
- void blake3_hash_many_neon(const uint8_t *const *inputs, size_t num_inputs,
78
- size_t blocks, const uint32_t key[8],
79
- uint64_t counter, bool increment_counter,
80
- uint8_t flags, uint8_t flags_start,
81
- uint8_t flags_end, uint8_t *out);
82
- #endif
83
-
84
17
  #if defined(IS_X86)
85
18
  static uint64_t xgetbv() {
86
19
  #if defined(_MSC_VER)
@@ -249,7 +182,7 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
249
182
  #if defined(IS_X86)
250
183
  const enum cpu_feature features = get_cpu_features();
251
184
  #if !defined(BLAKE3_NO_AVX512)
252
- if (features & AVX512F) {
185
+ if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) {
253
186
  blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter,
254
187
  increment_counter, flags, flags_start, flags_end,
255
188
  out);
@@ -286,11 +219,11 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
286
219
  }
287
220
 
288
221
  // The dynamically detected SIMD degree of the current platform.
289
- size_t blake3_simd_degree() {
222
+ size_t blake3_simd_degree(void) {
290
223
  #if defined(IS_X86)
291
224
  const enum cpu_feature features = get_cpu_features();
292
225
  #if !defined(BLAKE3_NO_AVX512)
293
- if (features & AVX512F) {
226
+ if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) {
294
227
  return 16;
295
228
  }
296
229
  #endif
@@ -161,7 +161,75 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
161
161
  bool increment_counter, uint8_t flags,
162
162
  uint8_t flags_start, uint8_t flags_end, uint8_t *out);
163
163
 
164
- size_t blake3_simd_degree();
164
+ size_t blake3_simd_degree(void);
165
+
166
+
167
+ // Declarations for implementation-specific functions.
168
+ void blake3_compress_in_place_portable(uint32_t cv[8],
169
+ const uint8_t block[BLAKE3_BLOCK_LEN],
170
+ uint8_t block_len, uint64_t counter,
171
+ uint8_t flags);
172
+
173
+ void blake3_compress_xof_portable(const uint32_t cv[8],
174
+ const uint8_t block[BLAKE3_BLOCK_LEN],
175
+ uint8_t block_len, uint64_t counter,
176
+ uint8_t flags, uint8_t out[64]);
177
+
178
+ void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs,
179
+ size_t blocks, const uint32_t key[8],
180
+ uint64_t counter, bool increment_counter,
181
+ uint8_t flags, uint8_t flags_start,
182
+ uint8_t flags_end, uint8_t *out);
183
+
184
+ #if defined(IS_X86)
185
+ #if !defined(BLAKE3_NO_SSE41)
186
+ void blake3_compress_in_place_sse41(uint32_t cv[8],
187
+ const uint8_t block[BLAKE3_BLOCK_LEN],
188
+ uint8_t block_len, uint64_t counter,
189
+ uint8_t flags);
190
+ void blake3_compress_xof_sse41(const uint32_t cv[8],
191
+ const uint8_t block[BLAKE3_BLOCK_LEN],
192
+ uint8_t block_len, uint64_t counter,
193
+ uint8_t flags, uint8_t out[64]);
194
+ void blake3_hash_many_sse41(const uint8_t *const *inputs, size_t num_inputs,
195
+ size_t blocks, const uint32_t key[8],
196
+ uint64_t counter, bool increment_counter,
197
+ uint8_t flags, uint8_t flags_start,
198
+ uint8_t flags_end, uint8_t *out);
199
+ #endif
200
+ #if !defined(BLAKE3_NO_AVX2)
201
+ void blake3_hash_many_avx2(const uint8_t *const *inputs, size_t num_inputs,
202
+ size_t blocks, const uint32_t key[8],
203
+ uint64_t counter, bool increment_counter,
204
+ uint8_t flags, uint8_t flags_start,
205
+ uint8_t flags_end, uint8_t *out);
206
+ #endif
207
+ #if !defined(BLAKE3_NO_AVX512)
208
+ void blake3_compress_in_place_avx512(uint32_t cv[8],
209
+ const uint8_t block[BLAKE3_BLOCK_LEN],
210
+ uint8_t block_len, uint64_t counter,
211
+ uint8_t flags);
212
+
213
+ void blake3_compress_xof_avx512(const uint32_t cv[8],
214
+ const uint8_t block[BLAKE3_BLOCK_LEN],
215
+ uint8_t block_len, uint64_t counter,
216
+ uint8_t flags, uint8_t out[64]);
217
+
218
+ void blake3_hash_many_avx512(const uint8_t *const *inputs, size_t num_inputs,
219
+ size_t blocks, const uint32_t key[8],
220
+ uint64_t counter, bool increment_counter,
221
+ uint8_t flags, uint8_t flags_start,
222
+ uint8_t flags_end, uint8_t *out);
223
+ #endif
224
+ #endif
225
+
226
+ #if defined(BLAKE3_USE_NEON)
227
+ void blake3_hash_many_neon(const uint8_t *const *inputs, size_t num_inputs,
228
+ size_t blocks, const uint32_t key[8],
229
+ uint64_t counter, bool increment_counter,
230
+ uint8_t flags, uint8_t flags_start,
231
+ uint8_t flags_end, uint8_t *out);
232
+ #endif
165
233
 
166
234
 
167
235
  #endif /* BLAKE3_IMPL_H */
@@ -1800,15 +1800,18 @@ blake3_hash_many_sse41:
1800
1800
  .p2align 6
1801
1801
  blake3_compress_in_place_sse41:
1802
1802
  _blake3_compress_in_place_sse41:
1803
- sub rsp, 72
1803
+ sub rsp, 120
1804
1804
  movdqa xmmword ptr [rsp], xmm6
1805
1805
  movdqa xmmword ptr [rsp+0x10], xmm7
1806
1806
  movdqa xmmword ptr [rsp+0x20], xmm8
1807
1807
  movdqa xmmword ptr [rsp+0x30], xmm9
1808
+ movdqa xmmword ptr [rsp+0x40], xmm11
1809
+ movdqa xmmword ptr [rsp+0x50], xmm14
1810
+ movdqa xmmword ptr [rsp+0x60], xmm15
1808
1811
  movups xmm0, xmmword ptr [rcx]
1809
1812
  movups xmm1, xmmword ptr [rcx+0x10]
1810
1813
  movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
1811
- movzx eax, byte ptr [rsp+0x70]
1814
+ movzx eax, byte ptr [rsp+0xA0]
1812
1815
  movzx r8d, r8b
1813
1816
  shl rax, 32
1814
1817
  add r8, rax
@@ -1906,24 +1909,30 @@ _blake3_compress_in_place_sse41:
1906
1909
  movdqa xmm7, xmmword ptr [rsp+0x10]
1907
1910
  movdqa xmm8, xmmword ptr [rsp+0x20]
1908
1911
  movdqa xmm9, xmmword ptr [rsp+0x30]
1909
- add rsp, 72
1912
+ movdqa xmm11, xmmword ptr [rsp+0x40]
1913
+ movdqa xmm14, xmmword ptr [rsp+0x50]
1914
+ movdqa xmm15, xmmword ptr [rsp+0x60]
1915
+ add rsp, 120
1910
1916
  ret
1911
1917
 
1912
1918
 
1913
1919
  .p2align 6
1914
1920
  _blake3_compress_xof_sse41:
1915
1921
  blake3_compress_xof_sse41:
1916
- sub rsp, 72
1922
+ sub rsp, 120
1917
1923
  movdqa xmmword ptr [rsp], xmm6
1918
1924
  movdqa xmmword ptr [rsp+0x10], xmm7
1919
1925
  movdqa xmmword ptr [rsp+0x20], xmm8
1920
1926
  movdqa xmmword ptr [rsp+0x30], xmm9
1927
+ movdqa xmmword ptr [rsp+0x40], xmm11
1928
+ movdqa xmmword ptr [rsp+0x50], xmm14
1929
+ movdqa xmmword ptr [rsp+0x60], xmm15
1921
1930
  movups xmm0, xmmword ptr [rcx]
1922
1931
  movups xmm1, xmmword ptr [rcx+0x10]
1923
1932
  movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
1924
- movzx eax, byte ptr [rsp+0x70]
1933
+ movzx eax, byte ptr [rsp+0xA0]
1925
1934
  movzx r8d, r8b
1926
- mov r10, qword ptr [rsp+0x78]
1935
+ mov r10, qword ptr [rsp+0xA8]
1927
1936
  shl rax, 32
1928
1937
  add r8, rax
1929
1938
  movq xmm3, r9
@@ -2026,7 +2035,10 @@ blake3_compress_xof_sse41:
2026
2035
  movdqa xmm7, xmmword ptr [rsp+0x10]
2027
2036
  movdqa xmm8, xmmword ptr [rsp+0x20]
2028
2037
  movdqa xmm9, xmmword ptr [rsp+0x30]
2029
- add rsp, 72
2038
+ movdqa xmm11, xmmword ptr [rsp+0x40]
2039
+ movdqa xmm14, xmmword ptr [rsp+0x50]
2040
+ movdqa xmm15, xmmword ptr [rsp+0x60]
2041
+ add rsp, 120
2030
2042
  ret
2031
2043
 
2032
2044
 
@@ -1802,15 +1802,18 @@ blake3_hash_many_sse41 ENDP
1802
1802
 
1803
1803
  blake3_compress_in_place_sse41 PROC
1804
1804
  _blake3_compress_in_place_sse41 PROC
1805
- sub rsp, 72
1805
+ sub rsp, 120
1806
1806
  movdqa xmmword ptr [rsp], xmm6
1807
1807
  movdqa xmmword ptr [rsp+10H], xmm7
1808
1808
  movdqa xmmword ptr [rsp+20H], xmm8
1809
1809
  movdqa xmmword ptr [rsp+30H], xmm9
1810
+ movdqa xmmword ptr [rsp+40H], xmm11
1811
+ movdqa xmmword ptr [rsp+50H], xmm14
1812
+ movdqa xmmword ptr [rsp+60H], xmm15
1810
1813
  movups xmm0, xmmword ptr [rcx]
1811
1814
  movups xmm1, xmmword ptr [rcx+10H]
1812
1815
  movaps xmm2, xmmword ptr [BLAKE3_IV]
1813
- movzx eax, byte ptr [rsp+70H]
1816
+ movzx eax, byte ptr [rsp+0A0H]
1814
1817
  movzx r8d, r8b
1815
1818
  shl rax, 32
1816
1819
  add r8, rax
@@ -1908,7 +1911,10 @@ _blake3_compress_in_place_sse41 PROC
1908
1911
  movdqa xmm7, xmmword ptr [rsp+10H]
1909
1912
  movdqa xmm8, xmmword ptr [rsp+20H]
1910
1913
  movdqa xmm9, xmmword ptr [rsp+30H]
1911
- add rsp, 72
1914
+ movdqa xmm11, xmmword ptr [rsp+40H]
1915
+ movdqa xmm14, xmmword ptr [rsp+50H]
1916
+ movdqa xmm15, xmmword ptr [rsp+60H]
1917
+ add rsp, 120
1912
1918
  ret
1913
1919
  _blake3_compress_in_place_sse41 ENDP
1914
1920
  blake3_compress_in_place_sse41 ENDP
@@ -1916,17 +1922,20 @@ blake3_compress_in_place_sse41 ENDP
1916
1922
  ALIGN 16
1917
1923
  blake3_compress_xof_sse41 PROC
1918
1924
  _blake3_compress_xof_sse41 PROC
1919
- sub rsp, 72
1925
+ sub rsp, 120
1920
1926
  movdqa xmmword ptr [rsp], xmm6
1921
1927
  movdqa xmmword ptr [rsp+10H], xmm7
1922
1928
  movdqa xmmword ptr [rsp+20H], xmm8
1923
1929
  movdqa xmmword ptr [rsp+30H], xmm9
1930
+ movdqa xmmword ptr [rsp+40H], xmm11
1931
+ movdqa xmmword ptr [rsp+50H], xmm14
1932
+ movdqa xmmword ptr [rsp+60H], xmm15
1924
1933
  movups xmm0, xmmword ptr [rcx]
1925
1934
  movups xmm1, xmmword ptr [rcx+10H]
1926
1935
  movaps xmm2, xmmword ptr [BLAKE3_IV]
1927
- movzx eax, byte ptr [rsp+70H]
1936
+ movzx eax, byte ptr [rsp+0A0H]
1928
1937
  movzx r8d, r8b
1929
- mov r10, qword ptr [rsp+78H]
1938
+ mov r10, qword ptr [rsp+0A8H]
1930
1939
  shl rax, 32
1931
1940
  add r8, rax
1932
1941
  movq xmm3, r9
@@ -2029,7 +2038,10 @@ _blake3_compress_xof_sse41 PROC
2029
2038
  movdqa xmm7, xmmword ptr [rsp+10H]
2030
2039
  movdqa xmm8, xmmword ptr [rsp+20H]
2031
2040
  movdqa xmm9, xmmword ptr [rsp+30H]
2032
- add rsp, 72
2041
+ movdqa xmm11, xmmword ptr [rsp+40H]
2042
+ movdqa xmm14, xmmword ptr [rsp+50H]
2043
+ movdqa xmm15, xmmword ptr [rsp+60H]
2044
+ add rsp, 120
2033
2045
  ret
2034
2046
  _blake3_compress_xof_sse41 ENDP
2035
2047
  blake3_compress_xof_sse41 ENDP
@@ -2,6 +2,6 @@ require 'digest'
2
2
 
3
3
  module Digest
4
4
  class BLAKE3 < Base
5
- VERSION = "0.22.1"
5
+ VERSION = "0.34.0"
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: digest-blake3
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.22.1
4
+ version: 0.34.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Will Bryant
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-03-15 00:00:00.000000000 Z
11
+ date: 2020-06-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -112,8 +112,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
112
112
  - !ruby/object:Gem::Version
113
113
  version: '0'
114
114
  requirements: []
115
- rubyforge_project:
116
- rubygems_version: 2.7.6
115
+ rubygems_version: 3.0.3
117
116
  signing_key:
118
117
  specification_version: 4
119
118
  summary: BLAKE3 for Ruby