digest-blake3 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/.travis.yml +8 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +22 -0
- data/LICENSE.txt +21 -0
- data/README.md +65 -0
- data/Rakefile +15 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/digest-blake3.gemspec +29 -0
- data/ext/digest/blake3/blake3.c +589 -0
- data/ext/digest/blake3/blake3.h +54 -0
- data/ext/digest/blake3/blake3_avx2.c +325 -0
- data/ext/digest/blake3/blake3_avx2_x86-64_unix.S +1800 -0
- data/ext/digest/blake3/blake3_avx2_x86-64_windows_gnu.S +1817 -0
- data/ext/digest/blake3/blake3_avx2_x86-64_windows_msvc.asm +1828 -0
- data/ext/digest/blake3/blake3_avx512.c +1204 -0
- data/ext/digest/blake3/blake3_avx512_x86-64_unix.S +2569 -0
- data/ext/digest/blake3/blake3_avx512_x86-64_windows_gnu.S +2615 -0
- data/ext/digest/blake3/blake3_avx512_x86-64_windows_msvc.asm +2634 -0
- data/ext/digest/blake3/blake3_dispatch.c +312 -0
- data/ext/digest/blake3/blake3_impl.h +167 -0
- data/ext/digest/blake3/blake3_neon.c +346 -0
- data/ext/digest/blake3/blake3_portable.c +168 -0
- data/ext/digest/blake3/blake3_ruby.c +38 -0
- data/ext/digest/blake3/blake3_sse41.c +559 -0
- data/ext/digest/blake3/blake3_sse41_x86-64_unix.S +2011 -0
- data/ext/digest/blake3/blake3_sse41_x86-64_windows_gnu.S +2057 -0
- data/ext/digest/blake3/blake3_sse41_x86-64_windows_msvc.asm +2077 -0
- data/ext/digest/blake3/extconf.rb +54 -0
- data/lib/digest/blake3/version.rb +7 -0
- data/lib/digest/blake3.rb +2 -0
- metadata +120 -0
@@ -0,0 +1,312 @@
|
|
1
|
+
#include <stdbool.h>
|
2
|
+
#include <stddef.h>
|
3
|
+
#include <stdint.h>
|
4
|
+
|
5
|
+
#include "blake3_impl.h"
|
6
|
+
|
7
|
+
#if defined(IS_X86)
|
8
|
+
#if defined(_MSC_VER)
|
9
|
+
#include <intrin.h>
|
10
|
+
#elif defined(__GNUC__)
|
11
|
+
#include <immintrin.h>
|
12
|
+
#else
|
13
|
+
#error "Unimplemented!"
|
14
|
+
#endif
|
15
|
+
#endif
|
16
|
+
|
17
|
+
// Declarations for implementation-specific functions.
|
18
|
+
void blake3_compress_in_place_portable(uint32_t cv[8],
|
19
|
+
const uint8_t block[BLAKE3_BLOCK_LEN],
|
20
|
+
uint8_t block_len, uint64_t counter,
|
21
|
+
uint8_t flags);
|
22
|
+
|
23
|
+
void blake3_compress_xof_portable(const uint32_t cv[8],
|
24
|
+
const uint8_t block[BLAKE3_BLOCK_LEN],
|
25
|
+
uint8_t block_len, uint64_t counter,
|
26
|
+
uint8_t flags, uint8_t out[64]);
|
27
|
+
|
28
|
+
void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs,
|
29
|
+
size_t blocks, const uint32_t key[8],
|
30
|
+
uint64_t counter, bool increment_counter,
|
31
|
+
uint8_t flags, uint8_t flags_start,
|
32
|
+
uint8_t flags_end, uint8_t *out);
|
33
|
+
|
34
|
+
#if defined(IS_X86)
|
35
|
+
#if !defined(BLAKE3_NO_SSE41)
|
36
|
+
void blake3_compress_in_place_sse41(uint32_t cv[8],
|
37
|
+
const uint8_t block[BLAKE3_BLOCK_LEN],
|
38
|
+
uint8_t block_len, uint64_t counter,
|
39
|
+
uint8_t flags);
|
40
|
+
void blake3_compress_xof_sse41(const uint32_t cv[8],
|
41
|
+
const uint8_t block[BLAKE3_BLOCK_LEN],
|
42
|
+
uint8_t block_len, uint64_t counter,
|
43
|
+
uint8_t flags, uint8_t out[64]);
|
44
|
+
void blake3_hash_many_sse41(const uint8_t *const *inputs, size_t num_inputs,
|
45
|
+
size_t blocks, const uint32_t key[8],
|
46
|
+
uint64_t counter, bool increment_counter,
|
47
|
+
uint8_t flags, uint8_t flags_start,
|
48
|
+
uint8_t flags_end, uint8_t *out);
|
49
|
+
#endif
|
50
|
+
#if !defined(BLAKE3_NO_AVX2)
|
51
|
+
void blake3_hash_many_avx2(const uint8_t *const *inputs, size_t num_inputs,
|
52
|
+
size_t blocks, const uint32_t key[8],
|
53
|
+
uint64_t counter, bool increment_counter,
|
54
|
+
uint8_t flags, uint8_t flags_start,
|
55
|
+
uint8_t flags_end, uint8_t *out);
|
56
|
+
#endif
|
57
|
+
#if !defined(BLAKE3_NO_AVX512)
|
58
|
+
void blake3_compress_in_place_avx512(uint32_t cv[8],
|
59
|
+
const uint8_t block[BLAKE3_BLOCK_LEN],
|
60
|
+
uint8_t block_len, uint64_t counter,
|
61
|
+
uint8_t flags);
|
62
|
+
|
63
|
+
void blake3_compress_xof_avx512(const uint32_t cv[8],
|
64
|
+
const uint8_t block[BLAKE3_BLOCK_LEN],
|
65
|
+
uint8_t block_len, uint64_t counter,
|
66
|
+
uint8_t flags, uint8_t out[64]);
|
67
|
+
|
68
|
+
void blake3_hash_many_avx512(const uint8_t *const *inputs, size_t num_inputs,
|
69
|
+
size_t blocks, const uint32_t key[8],
|
70
|
+
uint64_t counter, bool increment_counter,
|
71
|
+
uint8_t flags, uint8_t flags_start,
|
72
|
+
uint8_t flags_end, uint8_t *out);
|
73
|
+
#endif
|
74
|
+
#endif
|
75
|
+
|
76
|
+
#if defined(BLAKE3_USE_NEON)
|
77
|
+
void blake3_hash_many_neon(const uint8_t *const *inputs, size_t num_inputs,
|
78
|
+
size_t blocks, const uint32_t key[8],
|
79
|
+
uint64_t counter, bool increment_counter,
|
80
|
+
uint8_t flags, uint8_t flags_start,
|
81
|
+
uint8_t flags_end, uint8_t *out);
|
82
|
+
#endif
|
83
|
+
|
84
|
+
#if defined(IS_X86)
|
85
|
+
static uint64_t xgetbv() {
|
86
|
+
#if defined(_MSC_VER)
|
87
|
+
return _xgetbv(0);
|
88
|
+
#else
|
89
|
+
uint32_t eax = 0, edx = 0;
|
90
|
+
__asm__ __volatile__("xgetbv\n" : "=a"(eax), "=d"(edx) : "c"(0));
|
91
|
+
return ((uint64_t)edx << 32) | eax;
|
92
|
+
#endif
|
93
|
+
}
|
94
|
+
|
95
|
+
static void cpuid(uint32_t out[4], uint32_t id) {
|
96
|
+
#if defined(_MSC_VER)
|
97
|
+
__cpuid((int *)out, id);
|
98
|
+
#elif defined(__i386__) || defined(_M_IX86)
|
99
|
+
__asm__ __volatile__("movl %%ebx, %1\n"
|
100
|
+
"cpuid\n"
|
101
|
+
"xchgl %1, %%ebx\n"
|
102
|
+
: "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3])
|
103
|
+
: "a"(id));
|
104
|
+
#else
|
105
|
+
__asm__ __volatile__("cpuid\n"
|
106
|
+
: "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3])
|
107
|
+
: "a"(id));
|
108
|
+
#endif
|
109
|
+
}
|
110
|
+
|
111
|
+
static void cpuidex(uint32_t out[4], uint32_t id, uint32_t sid) {
|
112
|
+
#if defined(_MSC_VER)
|
113
|
+
__cpuidex((int *)out, id, sid);
|
114
|
+
#elif defined(__i386__) || defined(_M_IX86)
|
115
|
+
__asm__ __volatile__("movl %%ebx, %1\n"
|
116
|
+
"cpuid\n"
|
117
|
+
"xchgl %1, %%ebx\n"
|
118
|
+
: "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3])
|
119
|
+
: "a"(id), "c"(sid));
|
120
|
+
#else
|
121
|
+
__asm__ __volatile__("cpuid\n"
|
122
|
+
: "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3])
|
123
|
+
: "a"(id), "c"(sid));
|
124
|
+
#endif
|
125
|
+
}
|
126
|
+
|
127
|
+
#endif
|
128
|
+
|
129
|
+
enum cpu_feature {
|
130
|
+
SSE2 = 1 << 0,
|
131
|
+
SSSE3 = 1 << 1,
|
132
|
+
SSE41 = 1 << 2,
|
133
|
+
AVX = 1 << 3,
|
134
|
+
AVX2 = 1 << 4,
|
135
|
+
AVX512F = 1 << 5,
|
136
|
+
AVX512VL = 1 << 6,
|
137
|
+
/* ... */
|
138
|
+
UNDEFINED = 1 << 30
|
139
|
+
};
|
140
|
+
|
141
|
+
#if !defined(BLAKE3_TESTING)
|
142
|
+
static /* Allow the variable to be controlled manually for testing */
|
143
|
+
#endif
|
144
|
+
enum cpu_feature g_cpu_features = UNDEFINED;
|
145
|
+
|
146
|
+
#if !defined(BLAKE3_TESTING)
|
147
|
+
static
|
148
|
+
#endif
|
149
|
+
enum cpu_feature
|
150
|
+
get_cpu_features() {
|
151
|
+
|
152
|
+
if (g_cpu_features != UNDEFINED) {
|
153
|
+
return g_cpu_features;
|
154
|
+
} else {
|
155
|
+
#if defined(IS_X86)
|
156
|
+
uint32_t regs[4] = {0};
|
157
|
+
uint32_t *eax = ®s[0], *ebx = ®s[1], *ecx = ®s[2], *edx = ®s[3];
|
158
|
+
(void)edx;
|
159
|
+
enum cpu_feature features = 0;
|
160
|
+
cpuid(regs, 0);
|
161
|
+
const int max_id = *eax;
|
162
|
+
cpuid(regs, 1);
|
163
|
+
#if defined(__amd64__) || defined(_M_X64)
|
164
|
+
features |= SSE2;
|
165
|
+
#else
|
166
|
+
if (*edx & (1UL << 26))
|
167
|
+
features |= SSE2;
|
168
|
+
#endif
|
169
|
+
if (*ecx & (1UL << 0))
|
170
|
+
features |= SSSE3;
|
171
|
+
if (*ecx & (1UL << 19))
|
172
|
+
features |= SSE41;
|
173
|
+
|
174
|
+
if (*ecx & (1UL << 27)) { // OSXSAVE
|
175
|
+
const uint64_t mask = xgetbv();
|
176
|
+
if ((mask & 6) == 6) { // SSE and AVX states
|
177
|
+
if (*ecx & (1UL << 28))
|
178
|
+
features |= AVX;
|
179
|
+
if (max_id >= 7) {
|
180
|
+
cpuidex(regs, 7, 0);
|
181
|
+
if (*ebx & (1UL << 5))
|
182
|
+
features |= AVX2;
|
183
|
+
if ((mask & 224) == 224) { // Opmask, ZMM_Hi256, Hi16_Zmm
|
184
|
+
if (*ebx & (1UL << 31))
|
185
|
+
features |= AVX512VL;
|
186
|
+
if (*ebx & (1UL << 16))
|
187
|
+
features |= AVX512F;
|
188
|
+
}
|
189
|
+
}
|
190
|
+
}
|
191
|
+
}
|
192
|
+
g_cpu_features = features;
|
193
|
+
return features;
|
194
|
+
#else
|
195
|
+
/* How to detect NEON? */
|
196
|
+
return 0;
|
197
|
+
#endif
|
198
|
+
}
|
199
|
+
}
|
200
|
+
|
201
|
+
void blake3_compress_in_place(uint32_t cv[8],
|
202
|
+
const uint8_t block[BLAKE3_BLOCK_LEN],
|
203
|
+
uint8_t block_len, uint64_t counter,
|
204
|
+
uint8_t flags) {
|
205
|
+
#if defined(IS_X86)
|
206
|
+
const enum cpu_feature features = get_cpu_features();
|
207
|
+
#if !defined(BLAKE3_NO_AVX512)
|
208
|
+
if (features & AVX512VL) {
|
209
|
+
blake3_compress_in_place_avx512(cv, block, block_len, counter, flags);
|
210
|
+
return;
|
211
|
+
}
|
212
|
+
#endif
|
213
|
+
#if !defined(BLAKE3_NO_SSE41)
|
214
|
+
if (features & SSE41) {
|
215
|
+
blake3_compress_in_place_sse41(cv, block, block_len, counter, flags);
|
216
|
+
return;
|
217
|
+
}
|
218
|
+
#endif
|
219
|
+
#endif
|
220
|
+
blake3_compress_in_place_portable(cv, block, block_len, counter, flags);
|
221
|
+
}
|
222
|
+
|
223
|
+
void blake3_compress_xof(const uint32_t cv[8],
|
224
|
+
const uint8_t block[BLAKE3_BLOCK_LEN],
|
225
|
+
uint8_t block_len, uint64_t counter, uint8_t flags,
|
226
|
+
uint8_t out[64]) {
|
227
|
+
#if defined(IS_X86)
|
228
|
+
const enum cpu_feature features = get_cpu_features();
|
229
|
+
#if !defined(BLAKE3_NO_AVX512)
|
230
|
+
if (features & AVX512VL) {
|
231
|
+
blake3_compress_xof_avx512(cv, block, block_len, counter, flags, out);
|
232
|
+
return;
|
233
|
+
}
|
234
|
+
#endif
|
235
|
+
#if !defined(BLAKE3_NO_SSE41)
|
236
|
+
if (features & SSE41) {
|
237
|
+
blake3_compress_xof_sse41(cv, block, block_len, counter, flags, out);
|
238
|
+
return;
|
239
|
+
}
|
240
|
+
#endif
|
241
|
+
#endif
|
242
|
+
blake3_compress_xof_portable(cv, block, block_len, counter, flags, out);
|
243
|
+
}
|
244
|
+
|
245
|
+
void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
|
246
|
+
size_t blocks, const uint32_t key[8], uint64_t counter,
|
247
|
+
bool increment_counter, uint8_t flags,
|
248
|
+
uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
|
249
|
+
#if defined(IS_X86)
|
250
|
+
const enum cpu_feature features = get_cpu_features();
|
251
|
+
#if !defined(BLAKE3_NO_AVX512)
|
252
|
+
if (features & AVX512F) {
|
253
|
+
blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter,
|
254
|
+
increment_counter, flags, flags_start, flags_end,
|
255
|
+
out);
|
256
|
+
return;
|
257
|
+
}
|
258
|
+
#endif
|
259
|
+
#if !defined(BLAKE3_NO_AVX2)
|
260
|
+
if (features & AVX2) {
|
261
|
+
blake3_hash_many_avx2(inputs, num_inputs, blocks, key, counter,
|
262
|
+
increment_counter, flags, flags_start, flags_end,
|
263
|
+
out);
|
264
|
+
return;
|
265
|
+
}
|
266
|
+
#endif
|
267
|
+
#if !defined(BLAKE3_NO_SSE41)
|
268
|
+
if (features & SSE41) {
|
269
|
+
blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter,
|
270
|
+
increment_counter, flags, flags_start, flags_end,
|
271
|
+
out);
|
272
|
+
return;
|
273
|
+
}
|
274
|
+
#endif
|
275
|
+
#endif
|
276
|
+
|
277
|
+
#if defined(BLAKE3_USE_NEON)
|
278
|
+
blake3_hash_many_neon(inputs, num_inputs, blocks, key, counter,
|
279
|
+
increment_counter, flags, flags_start, flags_end, out);
|
280
|
+
return;
|
281
|
+
#endif
|
282
|
+
|
283
|
+
blake3_hash_many_portable(inputs, num_inputs, blocks, key, counter,
|
284
|
+
increment_counter, flags, flags_start, flags_end,
|
285
|
+
out);
|
286
|
+
}
|
287
|
+
|
288
|
+
// The dynamically detected SIMD degree of the current platform.
|
289
|
+
size_t blake3_simd_degree() {
|
290
|
+
#if defined(IS_X86)
|
291
|
+
const enum cpu_feature features = get_cpu_features();
|
292
|
+
#if !defined(BLAKE3_NO_AVX512)
|
293
|
+
if (features & AVX512F) {
|
294
|
+
return 16;
|
295
|
+
}
|
296
|
+
#endif
|
297
|
+
#if !defined(BLAKE3_NO_AVX2)
|
298
|
+
if (features & AVX2) {
|
299
|
+
return 8;
|
300
|
+
}
|
301
|
+
#endif
|
302
|
+
#if !defined(BLAKE3_NO_SSE41)
|
303
|
+
if (features & SSE41) {
|
304
|
+
return 4;
|
305
|
+
}
|
306
|
+
#endif
|
307
|
+
#endif
|
308
|
+
#if defined(BLAKE3_USE_NEON)
|
309
|
+
return 4;
|
310
|
+
#endif
|
311
|
+
return 1;
|
312
|
+
}
|
@@ -0,0 +1,167 @@
|
|
1
|
+
#ifndef BLAKE3_IMPL_H
|
2
|
+
#define BLAKE3_IMPL_H
|
3
|
+
|
4
|
+
#include <assert.h>
|
5
|
+
#include <stdbool.h>
|
6
|
+
#include <stddef.h>
|
7
|
+
#include <stdint.h>
|
8
|
+
#include <string.h>
|
9
|
+
|
10
|
+
#include "blake3.h"
|
11
|
+
|
12
|
+
// internal flags
|
13
|
+
enum blake3_flags {
|
14
|
+
CHUNK_START = 1 << 0,
|
15
|
+
CHUNK_END = 1 << 1,
|
16
|
+
PARENT = 1 << 2,
|
17
|
+
ROOT = 1 << 3,
|
18
|
+
KEYED_HASH = 1 << 4,
|
19
|
+
DERIVE_KEY_CONTEXT = 1 << 5,
|
20
|
+
DERIVE_KEY_MATERIAL = 1 << 6,
|
21
|
+
};
|
22
|
+
|
23
|
+
// This C implementation tries to support recent versions of GCC, Clang, and
|
24
|
+
// MSVC.
|
25
|
+
#if defined(_MSC_VER)
|
26
|
+
#define INLINE static __forceinline
|
27
|
+
#else
|
28
|
+
#define INLINE static inline __attribute__((always_inline))
|
29
|
+
#endif
|
30
|
+
|
31
|
+
#if defined(__x86_64__) || defined(_M_X64)
|
32
|
+
#define IS_X86
|
33
|
+
#define IS_X86_64
|
34
|
+
#endif
|
35
|
+
|
36
|
+
#if defined(__i386__) || defined(_M_IX86)
|
37
|
+
#define IS_X86
|
38
|
+
#define IS_X86_32
|
39
|
+
#endif
|
40
|
+
|
41
|
+
#if defined(IS_X86)
|
42
|
+
#if defined(_MSC_VER)
|
43
|
+
#include <intrin.h>
|
44
|
+
#endif
|
45
|
+
#include <immintrin.h>
|
46
|
+
#endif
|
47
|
+
|
48
|
+
#if defined(IS_X86)
|
49
|
+
#define MAX_SIMD_DEGREE 16
|
50
|
+
#elif defined(BLAKE3_USE_NEON)
|
51
|
+
#define MAX_SIMD_DEGREE 4
|
52
|
+
#else
|
53
|
+
#define MAX_SIMD_DEGREE 1
|
54
|
+
#endif
|
55
|
+
|
56
|
+
// There are some places where we want a static size that's equal to the
|
57
|
+
// MAX_SIMD_DEGREE, but also at least 2.
|
58
|
+
#define MAX_SIMD_DEGREE_OR_2 (MAX_SIMD_DEGREE > 2 ? MAX_SIMD_DEGREE : 2)
|
59
|
+
|
60
|
+
static const uint32_t IV[8] = {0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL,
|
61
|
+
0xA54FF53AUL, 0x510E527FUL, 0x9B05688CUL,
|
62
|
+
0x1F83D9ABUL, 0x5BE0CD19UL};
|
63
|
+
|
64
|
+
static const uint8_t MSG_SCHEDULE[7][16] = {
|
65
|
+
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
|
66
|
+
{2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8},
|
67
|
+
{3, 4, 10, 12, 13, 2, 7, 14, 6, 5, 9, 0, 11, 15, 8, 1},
|
68
|
+
{10, 7, 12, 9, 14, 3, 13, 15, 4, 0, 11, 2, 5, 8, 1, 6},
|
69
|
+
{12, 13, 9, 11, 15, 10, 14, 8, 7, 2, 5, 3, 0, 1, 6, 4},
|
70
|
+
{9, 14, 11, 5, 8, 12, 15, 1, 13, 3, 0, 10, 2, 6, 4, 7},
|
71
|
+
{11, 15, 5, 0, 1, 9, 8, 6, 14, 10, 2, 12, 3, 4, 7, 13},
|
72
|
+
};
|
73
|
+
|
74
|
+
/* Find index of the highest set bit */
|
75
|
+
/* x is assumed to be nonzero. */
|
76
|
+
static unsigned int highest_one(uint64_t x) {
|
77
|
+
#if defined(__GNUC__) || defined(__clang__)
|
78
|
+
return 63 ^ __builtin_clzll(x);
|
79
|
+
#elif defined(_MSC_VER) && defined(IS_X86_64)
|
80
|
+
unsigned long index;
|
81
|
+
_BitScanReverse64(&index, x);
|
82
|
+
return index;
|
83
|
+
#elif defined(_MSC_VER) && defined(IS_X86_32)
|
84
|
+
if(x >> 32) {
|
85
|
+
unsigned long index;
|
86
|
+
_BitScanReverse(&index, x >> 32);
|
87
|
+
return 32 + index;
|
88
|
+
} else {
|
89
|
+
unsigned long index;
|
90
|
+
_BitScanReverse(&index, x);
|
91
|
+
return index;
|
92
|
+
}
|
93
|
+
#else
|
94
|
+
unsigned int c = 0;
|
95
|
+
if(x & 0xffffffff00000000ULL) { x >>= 32; c += 32; }
|
96
|
+
if(x & 0x00000000ffff0000ULL) { x >>= 16; c += 16; }
|
97
|
+
if(x & 0x000000000000ff00ULL) { x >>= 8; c += 8; }
|
98
|
+
if(x & 0x00000000000000f0ULL) { x >>= 4; c += 4; }
|
99
|
+
if(x & 0x000000000000000cULL) { x >>= 2; c += 2; }
|
100
|
+
if(x & 0x0000000000000002ULL) { c += 1; }
|
101
|
+
return c;
|
102
|
+
#endif
|
103
|
+
}
|
104
|
+
|
105
|
+
// Count the number of 1 bits.
|
106
|
+
INLINE unsigned int popcnt(uint64_t x) {
|
107
|
+
#if defined(__GNUC__) || defined(__clang__)
|
108
|
+
return __builtin_popcountll(x);
|
109
|
+
#else
|
110
|
+
unsigned int count = 0;
|
111
|
+
while (x != 0) {
|
112
|
+
count += 1;
|
113
|
+
x &= x - 1;
|
114
|
+
}
|
115
|
+
return count;
|
116
|
+
#endif
|
117
|
+
}
|
118
|
+
|
119
|
+
// Largest power of two less than or equal to x. As a special case, returns 1
|
120
|
+
// when x is 0.
|
121
|
+
INLINE uint64_t round_down_to_power_of_2(uint64_t x) {
|
122
|
+
return 1ULL << highest_one(x | 1);
|
123
|
+
}
|
124
|
+
|
125
|
+
INLINE uint32_t counter_low(uint64_t counter) { return (uint32_t)counter; }
|
126
|
+
|
127
|
+
INLINE uint32_t counter_high(uint64_t counter) {
|
128
|
+
return (uint32_t)(counter >> 32);
|
129
|
+
}
|
130
|
+
|
131
|
+
INLINE uint32_t load32(const void *src) {
|
132
|
+
const uint8_t *p = (const uint8_t *)src;
|
133
|
+
return ((uint32_t)(p[0]) << 0) | ((uint32_t)(p[1]) << 8) |
|
134
|
+
((uint32_t)(p[2]) << 16) | ((uint32_t)(p[3]) << 24);
|
135
|
+
}
|
136
|
+
|
137
|
+
INLINE void load_key_words(const uint8_t key[BLAKE3_KEY_LEN],
|
138
|
+
uint32_t key_words[8]) {
|
139
|
+
key_words[0] = load32(&key[0 * 4]);
|
140
|
+
key_words[1] = load32(&key[1 * 4]);
|
141
|
+
key_words[2] = load32(&key[2 * 4]);
|
142
|
+
key_words[3] = load32(&key[3 * 4]);
|
143
|
+
key_words[4] = load32(&key[4 * 4]);
|
144
|
+
key_words[5] = load32(&key[5 * 4]);
|
145
|
+
key_words[6] = load32(&key[6 * 4]);
|
146
|
+
key_words[7] = load32(&key[7 * 4]);
|
147
|
+
}
|
148
|
+
|
149
|
+
void blake3_compress_in_place(uint32_t cv[8],
|
150
|
+
const uint8_t block[BLAKE3_BLOCK_LEN],
|
151
|
+
uint8_t block_len, uint64_t counter,
|
152
|
+
uint8_t flags);
|
153
|
+
|
154
|
+
void blake3_compress_xof(const uint32_t cv[8],
|
155
|
+
const uint8_t block[BLAKE3_BLOCK_LEN],
|
156
|
+
uint8_t block_len, uint64_t counter, uint8_t flags,
|
157
|
+
uint8_t out[64]);
|
158
|
+
|
159
|
+
void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
|
160
|
+
size_t blocks, const uint32_t key[8], uint64_t counter,
|
161
|
+
bool increment_counter, uint8_t flags,
|
162
|
+
uint8_t flags_start, uint8_t flags_end, uint8_t *out);
|
163
|
+
|
164
|
+
size_t blake3_simd_degree();
|
165
|
+
|
166
|
+
|
167
|
+
#endif /* BLAKE3_IMPL_H */
|