digest-blake3 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/.travis.yml +8 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +22 -0
- data/LICENSE.txt +21 -0
- data/README.md +65 -0
- data/Rakefile +15 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/digest-blake3.gemspec +29 -0
- data/ext/digest/blake3/blake3.c +589 -0
- data/ext/digest/blake3/blake3.h +54 -0
- data/ext/digest/blake3/blake3_avx2.c +325 -0
- data/ext/digest/blake3/blake3_avx2_x86-64_unix.S +1800 -0
- data/ext/digest/blake3/blake3_avx2_x86-64_windows_gnu.S +1817 -0
- data/ext/digest/blake3/blake3_avx2_x86-64_windows_msvc.asm +1828 -0
- data/ext/digest/blake3/blake3_avx512.c +1204 -0
- data/ext/digest/blake3/blake3_avx512_x86-64_unix.S +2569 -0
- data/ext/digest/blake3/blake3_avx512_x86-64_windows_gnu.S +2615 -0
- data/ext/digest/blake3/blake3_avx512_x86-64_windows_msvc.asm +2634 -0
- data/ext/digest/blake3/blake3_dispatch.c +312 -0
- data/ext/digest/blake3/blake3_impl.h +167 -0
- data/ext/digest/blake3/blake3_neon.c +346 -0
- data/ext/digest/blake3/blake3_portable.c +168 -0
- data/ext/digest/blake3/blake3_ruby.c +38 -0
- data/ext/digest/blake3/blake3_sse41.c +559 -0
- data/ext/digest/blake3/blake3_sse41_x86-64_unix.S +2011 -0
- data/ext/digest/blake3/blake3_sse41_x86-64_windows_gnu.S +2057 -0
- data/ext/digest/blake3/blake3_sse41_x86-64_windows_msvc.asm +2077 -0
- data/ext/digest/blake3/extconf.rb +54 -0
- data/lib/digest/blake3/version.rb +7 -0
- data/lib/digest/blake3.rb +2 -0
- metadata +120 -0
@@ -0,0 +1,312 @@
|
|
1
|
+
#include <stdbool.h>
|
2
|
+
#include <stddef.h>
|
3
|
+
#include <stdint.h>
|
4
|
+
|
5
|
+
#include "blake3_impl.h"
|
6
|
+
|
7
|
+
#if defined(IS_X86)
|
8
|
+
#if defined(_MSC_VER)
|
9
|
+
#include <intrin.h>
|
10
|
+
#elif defined(__GNUC__)
|
11
|
+
#include <immintrin.h>
|
12
|
+
#else
|
13
|
+
#error "Unimplemented!"
|
14
|
+
#endif
|
15
|
+
#endif
|
16
|
+
|
17
|
+
// Declarations for implementation-specific functions.
|
18
|
+
void blake3_compress_in_place_portable(uint32_t cv[8],
|
19
|
+
const uint8_t block[BLAKE3_BLOCK_LEN],
|
20
|
+
uint8_t block_len, uint64_t counter,
|
21
|
+
uint8_t flags);
|
22
|
+
|
23
|
+
void blake3_compress_xof_portable(const uint32_t cv[8],
|
24
|
+
const uint8_t block[BLAKE3_BLOCK_LEN],
|
25
|
+
uint8_t block_len, uint64_t counter,
|
26
|
+
uint8_t flags, uint8_t out[64]);
|
27
|
+
|
28
|
+
void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs,
|
29
|
+
size_t blocks, const uint32_t key[8],
|
30
|
+
uint64_t counter, bool increment_counter,
|
31
|
+
uint8_t flags, uint8_t flags_start,
|
32
|
+
uint8_t flags_end, uint8_t *out);
|
33
|
+
|
34
|
+
#if defined(IS_X86)
|
35
|
+
#if !defined(BLAKE3_NO_SSE41)
|
36
|
+
void blake3_compress_in_place_sse41(uint32_t cv[8],
|
37
|
+
const uint8_t block[BLAKE3_BLOCK_LEN],
|
38
|
+
uint8_t block_len, uint64_t counter,
|
39
|
+
uint8_t flags);
|
40
|
+
void blake3_compress_xof_sse41(const uint32_t cv[8],
|
41
|
+
const uint8_t block[BLAKE3_BLOCK_LEN],
|
42
|
+
uint8_t block_len, uint64_t counter,
|
43
|
+
uint8_t flags, uint8_t out[64]);
|
44
|
+
void blake3_hash_many_sse41(const uint8_t *const *inputs, size_t num_inputs,
|
45
|
+
size_t blocks, const uint32_t key[8],
|
46
|
+
uint64_t counter, bool increment_counter,
|
47
|
+
uint8_t flags, uint8_t flags_start,
|
48
|
+
uint8_t flags_end, uint8_t *out);
|
49
|
+
#endif
|
50
|
+
#if !defined(BLAKE3_NO_AVX2)
|
51
|
+
void blake3_hash_many_avx2(const uint8_t *const *inputs, size_t num_inputs,
|
52
|
+
size_t blocks, const uint32_t key[8],
|
53
|
+
uint64_t counter, bool increment_counter,
|
54
|
+
uint8_t flags, uint8_t flags_start,
|
55
|
+
uint8_t flags_end, uint8_t *out);
|
56
|
+
#endif
|
57
|
+
#if !defined(BLAKE3_NO_AVX512)
|
58
|
+
void blake3_compress_in_place_avx512(uint32_t cv[8],
|
59
|
+
const uint8_t block[BLAKE3_BLOCK_LEN],
|
60
|
+
uint8_t block_len, uint64_t counter,
|
61
|
+
uint8_t flags);
|
62
|
+
|
63
|
+
void blake3_compress_xof_avx512(const uint32_t cv[8],
|
64
|
+
const uint8_t block[BLAKE3_BLOCK_LEN],
|
65
|
+
uint8_t block_len, uint64_t counter,
|
66
|
+
uint8_t flags, uint8_t out[64]);
|
67
|
+
|
68
|
+
void blake3_hash_many_avx512(const uint8_t *const *inputs, size_t num_inputs,
|
69
|
+
size_t blocks, const uint32_t key[8],
|
70
|
+
uint64_t counter, bool increment_counter,
|
71
|
+
uint8_t flags, uint8_t flags_start,
|
72
|
+
uint8_t flags_end, uint8_t *out);
|
73
|
+
#endif
|
74
|
+
#endif
|
75
|
+
|
76
|
+
#if defined(BLAKE3_USE_NEON)
|
77
|
+
void blake3_hash_many_neon(const uint8_t *const *inputs, size_t num_inputs,
|
78
|
+
size_t blocks, const uint32_t key[8],
|
79
|
+
uint64_t counter, bool increment_counter,
|
80
|
+
uint8_t flags, uint8_t flags_start,
|
81
|
+
uint8_t flags_end, uint8_t *out);
|
82
|
+
#endif
|
83
|
+
|
84
|
+
#if defined(IS_X86)
|
85
|
+
static uint64_t xgetbv() {
|
86
|
+
#if defined(_MSC_VER)
|
87
|
+
return _xgetbv(0);
|
88
|
+
#else
|
89
|
+
uint32_t eax = 0, edx = 0;
|
90
|
+
__asm__ __volatile__("xgetbv\n" : "=a"(eax), "=d"(edx) : "c"(0));
|
91
|
+
return ((uint64_t)edx << 32) | eax;
|
92
|
+
#endif
|
93
|
+
}
|
94
|
+
|
95
|
+
static void cpuid(uint32_t out[4], uint32_t id) {
|
96
|
+
#if defined(_MSC_VER)
|
97
|
+
__cpuid((int *)out, id);
|
98
|
+
#elif defined(__i386__) || defined(_M_IX86)
|
99
|
+
__asm__ __volatile__("movl %%ebx, %1\n"
|
100
|
+
"cpuid\n"
|
101
|
+
"xchgl %1, %%ebx\n"
|
102
|
+
: "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3])
|
103
|
+
: "a"(id));
|
104
|
+
#else
|
105
|
+
__asm__ __volatile__("cpuid\n"
|
106
|
+
: "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3])
|
107
|
+
: "a"(id));
|
108
|
+
#endif
|
109
|
+
}
|
110
|
+
|
111
|
+
static void cpuidex(uint32_t out[4], uint32_t id, uint32_t sid) {
|
112
|
+
#if defined(_MSC_VER)
|
113
|
+
__cpuidex((int *)out, id, sid);
|
114
|
+
#elif defined(__i386__) || defined(_M_IX86)
|
115
|
+
__asm__ __volatile__("movl %%ebx, %1\n"
|
116
|
+
"cpuid\n"
|
117
|
+
"xchgl %1, %%ebx\n"
|
118
|
+
: "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3])
|
119
|
+
: "a"(id), "c"(sid));
|
120
|
+
#else
|
121
|
+
__asm__ __volatile__("cpuid\n"
|
122
|
+
: "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3])
|
123
|
+
: "a"(id), "c"(sid));
|
124
|
+
#endif
|
125
|
+
}
|
126
|
+
|
127
|
+
#endif
|
128
|
+
|
129
|
+
enum cpu_feature {
|
130
|
+
SSE2 = 1 << 0,
|
131
|
+
SSSE3 = 1 << 1,
|
132
|
+
SSE41 = 1 << 2,
|
133
|
+
AVX = 1 << 3,
|
134
|
+
AVX2 = 1 << 4,
|
135
|
+
AVX512F = 1 << 5,
|
136
|
+
AVX512VL = 1 << 6,
|
137
|
+
/* ... */
|
138
|
+
UNDEFINED = 1 << 30
|
139
|
+
};
|
140
|
+
|
141
|
+
#if !defined(BLAKE3_TESTING)
|
142
|
+
static /* Allow the variable to be controlled manually for testing */
|
143
|
+
#endif
|
144
|
+
enum cpu_feature g_cpu_features = UNDEFINED;
|
145
|
+
|
146
|
+
#if !defined(BLAKE3_TESTING)
|
147
|
+
static
|
148
|
+
#endif
|
149
|
+
enum cpu_feature
|
150
|
+
get_cpu_features() {
|
151
|
+
|
152
|
+
if (g_cpu_features != UNDEFINED) {
|
153
|
+
return g_cpu_features;
|
154
|
+
} else {
|
155
|
+
#if defined(IS_X86)
|
156
|
+
uint32_t regs[4] = {0};
|
157
|
+
uint32_t *eax = ®s[0], *ebx = ®s[1], *ecx = ®s[2], *edx = ®s[3];
|
158
|
+
(void)edx;
|
159
|
+
enum cpu_feature features = 0;
|
160
|
+
cpuid(regs, 0);
|
161
|
+
const int max_id = *eax;
|
162
|
+
cpuid(regs, 1);
|
163
|
+
#if defined(__amd64__) || defined(_M_X64)
|
164
|
+
features |= SSE2;
|
165
|
+
#else
|
166
|
+
if (*edx & (1UL << 26))
|
167
|
+
features |= SSE2;
|
168
|
+
#endif
|
169
|
+
if (*ecx & (1UL << 0))
|
170
|
+
features |= SSSE3;
|
171
|
+
if (*ecx & (1UL << 19))
|
172
|
+
features |= SSE41;
|
173
|
+
|
174
|
+
if (*ecx & (1UL << 27)) { // OSXSAVE
|
175
|
+
const uint64_t mask = xgetbv();
|
176
|
+
if ((mask & 6) == 6) { // SSE and AVX states
|
177
|
+
if (*ecx & (1UL << 28))
|
178
|
+
features |= AVX;
|
179
|
+
if (max_id >= 7) {
|
180
|
+
cpuidex(regs, 7, 0);
|
181
|
+
if (*ebx & (1UL << 5))
|
182
|
+
features |= AVX2;
|
183
|
+
if ((mask & 224) == 224) { // Opmask, ZMM_Hi256, Hi16_Zmm
|
184
|
+
if (*ebx & (1UL << 31))
|
185
|
+
features |= AVX512VL;
|
186
|
+
if (*ebx & (1UL << 16))
|
187
|
+
features |= AVX512F;
|
188
|
+
}
|
189
|
+
}
|
190
|
+
}
|
191
|
+
}
|
192
|
+
g_cpu_features = features;
|
193
|
+
return features;
|
194
|
+
#else
|
195
|
+
/* How to detect NEON? */
|
196
|
+
return 0;
|
197
|
+
#endif
|
198
|
+
}
|
199
|
+
}
|
200
|
+
|
201
|
+
void blake3_compress_in_place(uint32_t cv[8],
|
202
|
+
const uint8_t block[BLAKE3_BLOCK_LEN],
|
203
|
+
uint8_t block_len, uint64_t counter,
|
204
|
+
uint8_t flags) {
|
205
|
+
#if defined(IS_X86)
|
206
|
+
const enum cpu_feature features = get_cpu_features();
|
207
|
+
#if !defined(BLAKE3_NO_AVX512)
|
208
|
+
if (features & AVX512VL) {
|
209
|
+
blake3_compress_in_place_avx512(cv, block, block_len, counter, flags);
|
210
|
+
return;
|
211
|
+
}
|
212
|
+
#endif
|
213
|
+
#if !defined(BLAKE3_NO_SSE41)
|
214
|
+
if (features & SSE41) {
|
215
|
+
blake3_compress_in_place_sse41(cv, block, block_len, counter, flags);
|
216
|
+
return;
|
217
|
+
}
|
218
|
+
#endif
|
219
|
+
#endif
|
220
|
+
blake3_compress_in_place_portable(cv, block, block_len, counter, flags);
|
221
|
+
}
|
222
|
+
|
223
|
+
void blake3_compress_xof(const uint32_t cv[8],
|
224
|
+
const uint8_t block[BLAKE3_BLOCK_LEN],
|
225
|
+
uint8_t block_len, uint64_t counter, uint8_t flags,
|
226
|
+
uint8_t out[64]) {
|
227
|
+
#if defined(IS_X86)
|
228
|
+
const enum cpu_feature features = get_cpu_features();
|
229
|
+
#if !defined(BLAKE3_NO_AVX512)
|
230
|
+
if (features & AVX512VL) {
|
231
|
+
blake3_compress_xof_avx512(cv, block, block_len, counter, flags, out);
|
232
|
+
return;
|
233
|
+
}
|
234
|
+
#endif
|
235
|
+
#if !defined(BLAKE3_NO_SSE41)
|
236
|
+
if (features & SSE41) {
|
237
|
+
blake3_compress_xof_sse41(cv, block, block_len, counter, flags, out);
|
238
|
+
return;
|
239
|
+
}
|
240
|
+
#endif
|
241
|
+
#endif
|
242
|
+
blake3_compress_xof_portable(cv, block, block_len, counter, flags, out);
|
243
|
+
}
|
244
|
+
|
245
|
+
void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
|
246
|
+
size_t blocks, const uint32_t key[8], uint64_t counter,
|
247
|
+
bool increment_counter, uint8_t flags,
|
248
|
+
uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
|
249
|
+
#if defined(IS_X86)
|
250
|
+
const enum cpu_feature features = get_cpu_features();
|
251
|
+
#if !defined(BLAKE3_NO_AVX512)
|
252
|
+
if (features & AVX512F) {
|
253
|
+
blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter,
|
254
|
+
increment_counter, flags, flags_start, flags_end,
|
255
|
+
out);
|
256
|
+
return;
|
257
|
+
}
|
258
|
+
#endif
|
259
|
+
#if !defined(BLAKE3_NO_AVX2)
|
260
|
+
if (features & AVX2) {
|
261
|
+
blake3_hash_many_avx2(inputs, num_inputs, blocks, key, counter,
|
262
|
+
increment_counter, flags, flags_start, flags_end,
|
263
|
+
out);
|
264
|
+
return;
|
265
|
+
}
|
266
|
+
#endif
|
267
|
+
#if !defined(BLAKE3_NO_SSE41)
|
268
|
+
if (features & SSE41) {
|
269
|
+
blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter,
|
270
|
+
increment_counter, flags, flags_start, flags_end,
|
271
|
+
out);
|
272
|
+
return;
|
273
|
+
}
|
274
|
+
#endif
|
275
|
+
#endif
|
276
|
+
|
277
|
+
#if defined(BLAKE3_USE_NEON)
|
278
|
+
blake3_hash_many_neon(inputs, num_inputs, blocks, key, counter,
|
279
|
+
increment_counter, flags, flags_start, flags_end, out);
|
280
|
+
return;
|
281
|
+
#endif
|
282
|
+
|
283
|
+
blake3_hash_many_portable(inputs, num_inputs, blocks, key, counter,
|
284
|
+
increment_counter, flags, flags_start, flags_end,
|
285
|
+
out);
|
286
|
+
}
|
287
|
+
|
288
|
+
// The dynamically detected SIMD degree of the current platform.
|
289
|
+
size_t blake3_simd_degree() {
|
290
|
+
#if defined(IS_X86)
|
291
|
+
const enum cpu_feature features = get_cpu_features();
|
292
|
+
#if !defined(BLAKE3_NO_AVX512)
|
293
|
+
if (features & AVX512F) {
|
294
|
+
return 16;
|
295
|
+
}
|
296
|
+
#endif
|
297
|
+
#if !defined(BLAKE3_NO_AVX2)
|
298
|
+
if (features & AVX2) {
|
299
|
+
return 8;
|
300
|
+
}
|
301
|
+
#endif
|
302
|
+
#if !defined(BLAKE3_NO_SSE41)
|
303
|
+
if (features & SSE41) {
|
304
|
+
return 4;
|
305
|
+
}
|
306
|
+
#endif
|
307
|
+
#endif
|
308
|
+
#if defined(BLAKE3_USE_NEON)
|
309
|
+
return 4;
|
310
|
+
#endif
|
311
|
+
return 1;
|
312
|
+
}
|
@@ -0,0 +1,167 @@
|
|
1
|
+
#ifndef BLAKE3_IMPL_H
|
2
|
+
#define BLAKE3_IMPL_H
|
3
|
+
|
4
|
+
#include <assert.h>
|
5
|
+
#include <stdbool.h>
|
6
|
+
#include <stddef.h>
|
7
|
+
#include <stdint.h>
|
8
|
+
#include <string.h>
|
9
|
+
|
10
|
+
#include "blake3.h"
|
11
|
+
|
12
|
+
// internal flags
|
13
|
+
enum blake3_flags {
|
14
|
+
CHUNK_START = 1 << 0,
|
15
|
+
CHUNK_END = 1 << 1,
|
16
|
+
PARENT = 1 << 2,
|
17
|
+
ROOT = 1 << 3,
|
18
|
+
KEYED_HASH = 1 << 4,
|
19
|
+
DERIVE_KEY_CONTEXT = 1 << 5,
|
20
|
+
DERIVE_KEY_MATERIAL = 1 << 6,
|
21
|
+
};
|
22
|
+
|
23
|
+
// This C implementation tries to support recent versions of GCC, Clang, and
|
24
|
+
// MSVC.
|
25
|
+
#if defined(_MSC_VER)
|
26
|
+
#define INLINE static __forceinline
|
27
|
+
#else
|
28
|
+
#define INLINE static inline __attribute__((always_inline))
|
29
|
+
#endif
|
30
|
+
|
31
|
+
#if defined(__x86_64__) || defined(_M_X64)
|
32
|
+
#define IS_X86
|
33
|
+
#define IS_X86_64
|
34
|
+
#endif
|
35
|
+
|
36
|
+
#if defined(__i386__) || defined(_M_IX86)
|
37
|
+
#define IS_X86
|
38
|
+
#define IS_X86_32
|
39
|
+
#endif
|
40
|
+
|
41
|
+
#if defined(IS_X86)
|
42
|
+
#if defined(_MSC_VER)
|
43
|
+
#include <intrin.h>
|
44
|
+
#endif
|
45
|
+
#include <immintrin.h>
|
46
|
+
#endif
|
47
|
+
|
48
|
+
#if defined(IS_X86)
|
49
|
+
#define MAX_SIMD_DEGREE 16
|
50
|
+
#elif defined(BLAKE3_USE_NEON)
|
51
|
+
#define MAX_SIMD_DEGREE 4
|
52
|
+
#else
|
53
|
+
#define MAX_SIMD_DEGREE 1
|
54
|
+
#endif
|
55
|
+
|
56
|
+
// There are some places where we want a static size that's equal to the
|
57
|
+
// MAX_SIMD_DEGREE, but also at least 2.
|
58
|
+
#define MAX_SIMD_DEGREE_OR_2 (MAX_SIMD_DEGREE > 2 ? MAX_SIMD_DEGREE : 2)
|
59
|
+
|
60
|
+
static const uint32_t IV[8] = {0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL,
|
61
|
+
0xA54FF53AUL, 0x510E527FUL, 0x9B05688CUL,
|
62
|
+
0x1F83D9ABUL, 0x5BE0CD19UL};
|
63
|
+
|
64
|
+
static const uint8_t MSG_SCHEDULE[7][16] = {
|
65
|
+
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
|
66
|
+
{2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8},
|
67
|
+
{3, 4, 10, 12, 13, 2, 7, 14, 6, 5, 9, 0, 11, 15, 8, 1},
|
68
|
+
{10, 7, 12, 9, 14, 3, 13, 15, 4, 0, 11, 2, 5, 8, 1, 6},
|
69
|
+
{12, 13, 9, 11, 15, 10, 14, 8, 7, 2, 5, 3, 0, 1, 6, 4},
|
70
|
+
{9, 14, 11, 5, 8, 12, 15, 1, 13, 3, 0, 10, 2, 6, 4, 7},
|
71
|
+
{11, 15, 5, 0, 1, 9, 8, 6, 14, 10, 2, 12, 3, 4, 7, 13},
|
72
|
+
};
|
73
|
+
|
74
|
+
/* Find index of the highest set bit */
|
75
|
+
/* x is assumed to be nonzero. */
|
76
|
+
static unsigned int highest_one(uint64_t x) {
|
77
|
+
#if defined(__GNUC__) || defined(__clang__)
|
78
|
+
return 63 ^ __builtin_clzll(x);
|
79
|
+
#elif defined(_MSC_VER) && defined(IS_X86_64)
|
80
|
+
unsigned long index;
|
81
|
+
_BitScanReverse64(&index, x);
|
82
|
+
return index;
|
83
|
+
#elif defined(_MSC_VER) && defined(IS_X86_32)
|
84
|
+
if(x >> 32) {
|
85
|
+
unsigned long index;
|
86
|
+
_BitScanReverse(&index, x >> 32);
|
87
|
+
return 32 + index;
|
88
|
+
} else {
|
89
|
+
unsigned long index;
|
90
|
+
_BitScanReverse(&index, x);
|
91
|
+
return index;
|
92
|
+
}
|
93
|
+
#else
|
94
|
+
unsigned int c = 0;
|
95
|
+
if(x & 0xffffffff00000000ULL) { x >>= 32; c += 32; }
|
96
|
+
if(x & 0x00000000ffff0000ULL) { x >>= 16; c += 16; }
|
97
|
+
if(x & 0x000000000000ff00ULL) { x >>= 8; c += 8; }
|
98
|
+
if(x & 0x00000000000000f0ULL) { x >>= 4; c += 4; }
|
99
|
+
if(x & 0x000000000000000cULL) { x >>= 2; c += 2; }
|
100
|
+
if(x & 0x0000000000000002ULL) { c += 1; }
|
101
|
+
return c;
|
102
|
+
#endif
|
103
|
+
}
|
104
|
+
|
105
|
+
// Count the number of 1 bits.
|
106
|
+
INLINE unsigned int popcnt(uint64_t x) {
|
107
|
+
#if defined(__GNUC__) || defined(__clang__)
|
108
|
+
return __builtin_popcountll(x);
|
109
|
+
#else
|
110
|
+
unsigned int count = 0;
|
111
|
+
while (x != 0) {
|
112
|
+
count += 1;
|
113
|
+
x &= x - 1;
|
114
|
+
}
|
115
|
+
return count;
|
116
|
+
#endif
|
117
|
+
}
|
118
|
+
|
119
|
+
// Largest power of two less than or equal to x. As a special case, returns 1
|
120
|
+
// when x is 0.
|
121
|
+
INLINE uint64_t round_down_to_power_of_2(uint64_t x) {
|
122
|
+
return 1ULL << highest_one(x | 1);
|
123
|
+
}
|
124
|
+
|
125
|
+
INLINE uint32_t counter_low(uint64_t counter) { return (uint32_t)counter; }
|
126
|
+
|
127
|
+
INLINE uint32_t counter_high(uint64_t counter) {
|
128
|
+
return (uint32_t)(counter >> 32);
|
129
|
+
}
|
130
|
+
|
131
|
+
INLINE uint32_t load32(const void *src) {
|
132
|
+
const uint8_t *p = (const uint8_t *)src;
|
133
|
+
return ((uint32_t)(p[0]) << 0) | ((uint32_t)(p[1]) << 8) |
|
134
|
+
((uint32_t)(p[2]) << 16) | ((uint32_t)(p[3]) << 24);
|
135
|
+
}
|
136
|
+
|
137
|
+
INLINE void load_key_words(const uint8_t key[BLAKE3_KEY_LEN],
|
138
|
+
uint32_t key_words[8]) {
|
139
|
+
key_words[0] = load32(&key[0 * 4]);
|
140
|
+
key_words[1] = load32(&key[1 * 4]);
|
141
|
+
key_words[2] = load32(&key[2 * 4]);
|
142
|
+
key_words[3] = load32(&key[3 * 4]);
|
143
|
+
key_words[4] = load32(&key[4 * 4]);
|
144
|
+
key_words[5] = load32(&key[5 * 4]);
|
145
|
+
key_words[6] = load32(&key[6 * 4]);
|
146
|
+
key_words[7] = load32(&key[7 * 4]);
|
147
|
+
}
|
148
|
+
|
149
|
+
void blake3_compress_in_place(uint32_t cv[8],
|
150
|
+
const uint8_t block[BLAKE3_BLOCK_LEN],
|
151
|
+
uint8_t block_len, uint64_t counter,
|
152
|
+
uint8_t flags);
|
153
|
+
|
154
|
+
void blake3_compress_xof(const uint32_t cv[8],
|
155
|
+
const uint8_t block[BLAKE3_BLOCK_LEN],
|
156
|
+
uint8_t block_len, uint64_t counter, uint8_t flags,
|
157
|
+
uint8_t out[64]);
|
158
|
+
|
159
|
+
void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
|
160
|
+
size_t blocks, const uint32_t key[8], uint64_t counter,
|
161
|
+
bool increment_counter, uint8_t flags,
|
162
|
+
uint8_t flags_start, uint8_t flags_end, uint8_t *out);
|
163
|
+
|
164
|
+
size_t blake3_simd_degree();
|
165
|
+
|
166
|
+
|
167
|
+
#endif /* BLAKE3_IMPL_H */
|