ed25519_blake2b 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +23 -0
- data/LICENSE +21 -0
- data/README.md +39 -0
- data/Rakefile +13 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/ed25519_blake2b.gemspec +31 -0
- data/ext/ed25519_blake2b/blake2-config.h +72 -0
- data/ext/ed25519_blake2b/blake2-impl.h +160 -0
- data/ext/ed25519_blake2b/blake2.h +195 -0
- data/ext/ed25519_blake2b/blake2b-load-sse2.h +68 -0
- data/ext/ed25519_blake2b/blake2b-load-sse41.h +402 -0
- data/ext/ed25519_blake2b/blake2b-ref.c +373 -0
- data/ext/ed25519_blake2b/blake2b-round.h +157 -0
- data/ext/ed25519_blake2b/curve25519-donna-32bit.h +579 -0
- data/ext/ed25519_blake2b/curve25519-donna-64bit.h +413 -0
- data/ext/ed25519_blake2b/curve25519-donna-helpers.h +67 -0
- data/ext/ed25519_blake2b/curve25519-donna-sse2.h +1112 -0
- data/ext/ed25519_blake2b/ed25519-donna-32bit-sse2.h +513 -0
- data/ext/ed25519_blake2b/ed25519-donna-32bit-tables.h +61 -0
- data/ext/ed25519_blake2b/ed25519-donna-64bit-sse2.h +436 -0
- data/ext/ed25519_blake2b/ed25519-donna-64bit-tables.h +53 -0
- data/ext/ed25519_blake2b/ed25519-donna-64bit-x86-32bit.h +435 -0
- data/ext/ed25519_blake2b/ed25519-donna-64bit-x86.h +351 -0
- data/ext/ed25519_blake2b/ed25519-donna-basepoint-table.h +259 -0
- data/ext/ed25519_blake2b/ed25519-donna-batchverify.h +275 -0
- data/ext/ed25519_blake2b/ed25519-donna-impl-base.h +364 -0
- data/ext/ed25519_blake2b/ed25519-donna-impl-sse2.h +390 -0
- data/ext/ed25519_blake2b/ed25519-donna-portable-identify.h +103 -0
- data/ext/ed25519_blake2b/ed25519-donna-portable.h +135 -0
- data/ext/ed25519_blake2b/ed25519-donna.h +115 -0
- data/ext/ed25519_blake2b/ed25519-hash-custom.c +28 -0
- data/ext/ed25519_blake2b/ed25519-hash-custom.h +30 -0
- data/ext/ed25519_blake2b/ed25519-hash.h +219 -0
- data/ext/ed25519_blake2b/ed25519-randombytes-custom.h +10 -0
- data/ext/ed25519_blake2b/ed25519-randombytes.h +91 -0
- data/ext/ed25519_blake2b/ed25519.c +150 -0
- data/ext/ed25519_blake2b/ed25519.h +30 -0
- data/ext/ed25519_blake2b/extconf.rb +3 -0
- data/ext/ed25519_blake2b/fuzz/README.md +173 -0
- data/ext/ed25519_blake2b/fuzz/build-nix.php +134 -0
- data/ext/ed25519_blake2b/fuzz/curve25519-ref10.c +1272 -0
- data/ext/ed25519_blake2b/fuzz/curve25519-ref10.h +8 -0
- data/ext/ed25519_blake2b/fuzz/ed25519-donna-sse2.c +3 -0
- data/ext/ed25519_blake2b/fuzz/ed25519-donna.c +1 -0
- data/ext/ed25519_blake2b/fuzz/ed25519-donna.h +34 -0
- data/ext/ed25519_blake2b/fuzz/ed25519-ref10.c +4647 -0
- data/ext/ed25519_blake2b/fuzz/ed25519-ref10.h +9 -0
- data/ext/ed25519_blake2b/fuzz/fuzz-curve25519.c +172 -0
- data/ext/ed25519_blake2b/fuzz/fuzz-ed25519.c +219 -0
- data/ext/ed25519_blake2b/modm-donna-32bit.h +469 -0
- data/ext/ed25519_blake2b/modm-donna-64bit.h +361 -0
- data/ext/ed25519_blake2b/rbext.c +25 -0
- data/ext/ed25519_blake2b/regression.h +1024 -0
- data/lib/ed25519_blake2b/ed25519_blake2b.rb +4 -0
- data/lib/ed25519_blake2b/version.rb +3 -0
- metadata +147 -0
@@ -0,0 +1,373 @@
|
|
1
|
+
/*
|
2
|
+
BLAKE2 reference source code package - optimized C implementations
|
3
|
+
|
4
|
+
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
|
5
|
+
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
|
6
|
+
your option. The terms of these licenses can be found at:
|
7
|
+
|
8
|
+
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
9
|
+
- OpenSSL license : https://www.openssl.org/source/license.html
|
10
|
+
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
|
12
|
+
More information about the BLAKE2 hash function can be found at
|
13
|
+
https://blake2.net.
|
14
|
+
*/
|
15
|
+
|
16
|
+
#include <stdint.h>
|
17
|
+
#include <string.h>
|
18
|
+
#include <stdio.h>
|
19
|
+
|
20
|
+
#include "blake2.h"
|
21
|
+
#include "blake2-impl.h"
|
22
|
+
|
23
|
+
#include "blake2-config.h"
|
24
|
+
|
25
|
+
#ifdef _MSC_VER
|
26
|
+
#include <intrin.h> /* for _mm_set_epi64x */
|
27
|
+
#endif
|
28
|
+
#include <emmintrin.h>
|
29
|
+
#if defined(HAVE_SSSE3)
|
30
|
+
#include <tmmintrin.h>
|
31
|
+
#endif
|
32
|
+
#if defined(HAVE_SSE41)
|
33
|
+
#include <smmintrin.h>
|
34
|
+
#endif
|
35
|
+
#if defined(HAVE_AVX)
|
36
|
+
#include <immintrin.h>
|
37
|
+
#endif
|
38
|
+
#if defined(HAVE_XOP)
|
39
|
+
#include <x86intrin.h>
|
40
|
+
#endif
|
41
|
+
|
42
|
+
#include "blake2b-round.h"
|
43
|
+
|
44
|
+
static const uint64_t blake2b_IV[8] =
|
45
|
+
{
|
46
|
+
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
|
47
|
+
0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
|
48
|
+
0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
|
49
|
+
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
|
50
|
+
};
|
51
|
+
|
52
|
+
/* Some helper functions */
|
53
|
+
static void blake2b_set_lastnode( blake2b_state *S )
|
54
|
+
{
|
55
|
+
S->f[1] = (uint64_t)-1;
|
56
|
+
}
|
57
|
+
|
58
|
+
static int blake2b_is_lastblock( const blake2b_state *S )
|
59
|
+
{
|
60
|
+
return S->f[0] != 0;
|
61
|
+
}
|
62
|
+
|
63
|
+
static void blake2b_set_lastblock( blake2b_state *S )
|
64
|
+
{
|
65
|
+
if( S->last_node ) blake2b_set_lastnode( S );
|
66
|
+
|
67
|
+
S->f[0] = (uint64_t)-1;
|
68
|
+
}
|
69
|
+
|
70
|
+
static void blake2b_increment_counter( blake2b_state *S, const uint64_t inc )
|
71
|
+
{
|
72
|
+
S->t[0] += inc;
|
73
|
+
S->t[1] += ( S->t[0] < inc );
|
74
|
+
}
|
75
|
+
|
76
|
+
/* init xors IV with input parameter block */
|
77
|
+
int blake2b_init_param( blake2b_state *S, const blake2b_param *P )
|
78
|
+
{
|
79
|
+
size_t i;
|
80
|
+
/*blake2b_init0( S ); */
|
81
|
+
const unsigned char * v = ( const unsigned char * )( blake2b_IV );
|
82
|
+
const unsigned char * p = ( const unsigned char * )( P );
|
83
|
+
unsigned char * h = ( unsigned char * )( S->h );
|
84
|
+
/* IV XOR ParamBlock */
|
85
|
+
memset( S, 0, sizeof( blake2b_state ) );
|
86
|
+
|
87
|
+
for( i = 0; i < BLAKE2B_OUTBYTES; ++i ) h[i] = v[i] ^ p[i];
|
88
|
+
|
89
|
+
S->outlen = P->digest_length;
|
90
|
+
return 0;
|
91
|
+
}
|
92
|
+
|
93
|
+
|
94
|
+
/* Some sort of default parameter block initialization, for sequential blake2b */
|
95
|
+
int blake2b_init( blake2b_state *S, size_t outlen )
|
96
|
+
{
|
97
|
+
blake2b_param P[1];
|
98
|
+
|
99
|
+
if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
|
100
|
+
|
101
|
+
P->digest_length = (uint8_t)outlen;
|
102
|
+
P->key_length = 0;
|
103
|
+
P->fanout = 1;
|
104
|
+
P->depth = 1;
|
105
|
+
store32( &P->leaf_length, 0 );
|
106
|
+
store32( &P->node_offset, 0 );
|
107
|
+
store32( &P->xof_length, 0 );
|
108
|
+
P->node_depth = 0;
|
109
|
+
P->inner_length = 0;
|
110
|
+
memset( P->reserved, 0, sizeof( P->reserved ) );
|
111
|
+
memset( P->salt, 0, sizeof( P->salt ) );
|
112
|
+
memset( P->personal, 0, sizeof( P->personal ) );
|
113
|
+
|
114
|
+
return blake2b_init_param( S, P );
|
115
|
+
}
|
116
|
+
|
117
|
+
int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen )
|
118
|
+
{
|
119
|
+
blake2b_param P[1];
|
120
|
+
|
121
|
+
if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
|
122
|
+
|
123
|
+
if ( ( !keylen ) || keylen > BLAKE2B_KEYBYTES ) return -1;
|
124
|
+
|
125
|
+
P->digest_length = (uint8_t)outlen;
|
126
|
+
P->key_length = (uint8_t)keylen;
|
127
|
+
P->fanout = 1;
|
128
|
+
P->depth = 1;
|
129
|
+
store32( &P->leaf_length, 0 );
|
130
|
+
store32( &P->node_offset, 0 );
|
131
|
+
store32( &P->xof_length, 0 );
|
132
|
+
P->node_depth = 0;
|
133
|
+
P->inner_length = 0;
|
134
|
+
memset( P->reserved, 0, sizeof( P->reserved ) );
|
135
|
+
memset( P->salt, 0, sizeof( P->salt ) );
|
136
|
+
memset( P->personal, 0, sizeof( P->personal ) );
|
137
|
+
|
138
|
+
if( blake2b_init_param( S, P ) < 0 )
|
139
|
+
return 0;
|
140
|
+
|
141
|
+
{
|
142
|
+
uint8_t block[BLAKE2B_BLOCKBYTES];
|
143
|
+
memset( block, 0, BLAKE2B_BLOCKBYTES );
|
144
|
+
memcpy( block, key, keylen );
|
145
|
+
blake2b_update( S, block, BLAKE2B_BLOCKBYTES );
|
146
|
+
secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */
|
147
|
+
}
|
148
|
+
return 0;
|
149
|
+
}
|
150
|
+
|
151
|
+
static void blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] )
|
152
|
+
{
|
153
|
+
__m128i row1l, row1h;
|
154
|
+
__m128i row2l, row2h;
|
155
|
+
__m128i row3l, row3h;
|
156
|
+
__m128i row4l, row4h;
|
157
|
+
__m128i b0, b1;
|
158
|
+
__m128i t0, t1;
|
159
|
+
#if defined(HAVE_SSSE3) && !defined(HAVE_XOP)
|
160
|
+
const __m128i r16 = _mm_setr_epi8( 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9 );
|
161
|
+
const __m128i r24 = _mm_setr_epi8( 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10 );
|
162
|
+
#endif
|
163
|
+
#if defined(HAVE_SSE41)
|
164
|
+
const __m128i m0 = LOADU( block + 00 );
|
165
|
+
const __m128i m1 = LOADU( block + 16 );
|
166
|
+
const __m128i m2 = LOADU( block + 32 );
|
167
|
+
const __m128i m3 = LOADU( block + 48 );
|
168
|
+
const __m128i m4 = LOADU( block + 64 );
|
169
|
+
const __m128i m5 = LOADU( block + 80 );
|
170
|
+
const __m128i m6 = LOADU( block + 96 );
|
171
|
+
const __m128i m7 = LOADU( block + 112 );
|
172
|
+
#else
|
173
|
+
const uint64_t m0 = load64(block + 0 * sizeof(uint64_t));
|
174
|
+
const uint64_t m1 = load64(block + 1 * sizeof(uint64_t));
|
175
|
+
const uint64_t m2 = load64(block + 2 * sizeof(uint64_t));
|
176
|
+
const uint64_t m3 = load64(block + 3 * sizeof(uint64_t));
|
177
|
+
const uint64_t m4 = load64(block + 4 * sizeof(uint64_t));
|
178
|
+
const uint64_t m5 = load64(block + 5 * sizeof(uint64_t));
|
179
|
+
const uint64_t m6 = load64(block + 6 * sizeof(uint64_t));
|
180
|
+
const uint64_t m7 = load64(block + 7 * sizeof(uint64_t));
|
181
|
+
const uint64_t m8 = load64(block + 8 * sizeof(uint64_t));
|
182
|
+
const uint64_t m9 = load64(block + 9 * sizeof(uint64_t));
|
183
|
+
const uint64_t m10 = load64(block + 10 * sizeof(uint64_t));
|
184
|
+
const uint64_t m11 = load64(block + 11 * sizeof(uint64_t));
|
185
|
+
const uint64_t m12 = load64(block + 12 * sizeof(uint64_t));
|
186
|
+
const uint64_t m13 = load64(block + 13 * sizeof(uint64_t));
|
187
|
+
const uint64_t m14 = load64(block + 14 * sizeof(uint64_t));
|
188
|
+
const uint64_t m15 = load64(block + 15 * sizeof(uint64_t));
|
189
|
+
#endif
|
190
|
+
row1l = LOADU( &S->h[0] );
|
191
|
+
row1h = LOADU( &S->h[2] );
|
192
|
+
row2l = LOADU( &S->h[4] );
|
193
|
+
row2h = LOADU( &S->h[6] );
|
194
|
+
row3l = LOADU( &blake2b_IV[0] );
|
195
|
+
row3h = LOADU( &blake2b_IV[2] );
|
196
|
+
row4l = _mm_xor_si128( LOADU( &blake2b_IV[4] ), LOADU( &S->t[0] ) );
|
197
|
+
row4h = _mm_xor_si128( LOADU( &blake2b_IV[6] ), LOADU( &S->f[0] ) );
|
198
|
+
ROUND( 0 );
|
199
|
+
ROUND( 1 );
|
200
|
+
ROUND( 2 );
|
201
|
+
ROUND( 3 );
|
202
|
+
ROUND( 4 );
|
203
|
+
ROUND( 5 );
|
204
|
+
ROUND( 6 );
|
205
|
+
ROUND( 7 );
|
206
|
+
ROUND( 8 );
|
207
|
+
ROUND( 9 );
|
208
|
+
ROUND( 10 );
|
209
|
+
ROUND( 11 );
|
210
|
+
row1l = _mm_xor_si128( row3l, row1l );
|
211
|
+
row1h = _mm_xor_si128( row3h, row1h );
|
212
|
+
STOREU( &S->h[0], _mm_xor_si128( LOADU( &S->h[0] ), row1l ) );
|
213
|
+
STOREU( &S->h[2], _mm_xor_si128( LOADU( &S->h[2] ), row1h ) );
|
214
|
+
row2l = _mm_xor_si128( row4l, row2l );
|
215
|
+
row2h = _mm_xor_si128( row4h, row2h );
|
216
|
+
STOREU( &S->h[4], _mm_xor_si128( LOADU( &S->h[4] ), row2l ) );
|
217
|
+
STOREU( &S->h[6], _mm_xor_si128( LOADU( &S->h[6] ), row2h ) );
|
218
|
+
}
|
219
|
+
|
220
|
+
|
221
|
+
int blake2b_update( blake2b_state *S, const void *pin, size_t inlen )
|
222
|
+
{
|
223
|
+
const unsigned char * in = (const unsigned char *)pin;
|
224
|
+
if( inlen > 0 )
|
225
|
+
{
|
226
|
+
size_t left = S->buflen;
|
227
|
+
size_t fill = BLAKE2B_BLOCKBYTES - left;
|
228
|
+
if( inlen > fill )
|
229
|
+
{
|
230
|
+
S->buflen = 0;
|
231
|
+
memcpy( S->buf + left, in, fill ); /* Fill buffer */
|
232
|
+
blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES );
|
233
|
+
blake2b_compress( S, S->buf ); /* Compress */
|
234
|
+
in += fill; inlen -= fill;
|
235
|
+
while(inlen > BLAKE2B_BLOCKBYTES) {
|
236
|
+
blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
|
237
|
+
blake2b_compress( S, in );
|
238
|
+
in += BLAKE2B_BLOCKBYTES;
|
239
|
+
inlen -= BLAKE2B_BLOCKBYTES;
|
240
|
+
}
|
241
|
+
}
|
242
|
+
memcpy( S->buf + S->buflen, in, inlen );
|
243
|
+
S->buflen += inlen;
|
244
|
+
}
|
245
|
+
return 0;
|
246
|
+
}
|
247
|
+
|
248
|
+
|
249
|
+
int blake2b_final( blake2b_state *S, void *out, size_t outlen )
|
250
|
+
{
|
251
|
+
if( out == NULL || outlen < S->outlen )
|
252
|
+
return -1;
|
253
|
+
|
254
|
+
if( blake2b_is_lastblock( S ) )
|
255
|
+
return -1;
|
256
|
+
|
257
|
+
blake2b_increment_counter( S, S->buflen );
|
258
|
+
blake2b_set_lastblock( S );
|
259
|
+
memset( S->buf + S->buflen, 0, BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */
|
260
|
+
blake2b_compress( S, S->buf );
|
261
|
+
|
262
|
+
memcpy( out, &S->h[0], S->outlen );
|
263
|
+
return 0;
|
264
|
+
}
|
265
|
+
|
266
|
+
|
267
|
+
int blake2b( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen )
|
268
|
+
{
|
269
|
+
blake2b_state S[1];
|
270
|
+
|
271
|
+
/* Verify parameters */
|
272
|
+
if ( NULL == in && inlen > 0 ) return -1;
|
273
|
+
|
274
|
+
if ( NULL == out ) return -1;
|
275
|
+
|
276
|
+
if( NULL == key && keylen > 0 ) return -1;
|
277
|
+
|
278
|
+
if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1;
|
279
|
+
|
280
|
+
if( keylen > BLAKE2B_KEYBYTES ) return -1;
|
281
|
+
|
282
|
+
if( keylen )
|
283
|
+
{
|
284
|
+
if( blake2b_init_key( S, outlen, key, keylen ) < 0 ) return -1;
|
285
|
+
}
|
286
|
+
else
|
287
|
+
{
|
288
|
+
if( blake2b_init( S, outlen ) < 0 ) return -1;
|
289
|
+
}
|
290
|
+
|
291
|
+
blake2b_update( S, ( const uint8_t * )in, inlen );
|
292
|
+
blake2b_final( S, out, outlen );
|
293
|
+
return 0;
|
294
|
+
}
|
295
|
+
|
296
|
+
int blake2( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) {
|
297
|
+
return blake2b(out, outlen, in, inlen, key, keylen);
|
298
|
+
}
|
299
|
+
|
300
|
+
#if defined(SUPERCOP)
|
301
|
+
int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen )
|
302
|
+
{
|
303
|
+
return blake2b( out, BLAKE2B_OUTBYTES, in, inlen, NULL, 0 );
|
304
|
+
}
|
305
|
+
#endif
|
306
|
+
|
307
|
+
#if defined(BLAKE2B_SELFTEST)
|
308
|
+
#include <string.h>
|
309
|
+
#include "blake2-kat.h"
|
310
|
+
int main( void )
|
311
|
+
{
|
312
|
+
uint8_t key[BLAKE2B_KEYBYTES];
|
313
|
+
uint8_t buf[BLAKE2_KAT_LENGTH];
|
314
|
+
size_t i, step;
|
315
|
+
|
316
|
+
for( i = 0; i < BLAKE2B_KEYBYTES; ++i )
|
317
|
+
key[i] = ( uint8_t )i;
|
318
|
+
|
319
|
+
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
|
320
|
+
buf[i] = ( uint8_t )i;
|
321
|
+
|
322
|
+
/* Test simple API */
|
323
|
+
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
|
324
|
+
{
|
325
|
+
uint8_t hash[BLAKE2B_OUTBYTES];
|
326
|
+
blake2b( hash, BLAKE2B_OUTBYTES, buf, i, key, BLAKE2B_KEYBYTES );
|
327
|
+
|
328
|
+
if( 0 != memcmp( hash, blake2b_keyed_kat[i], BLAKE2B_OUTBYTES ) )
|
329
|
+
{
|
330
|
+
goto fail;
|
331
|
+
}
|
332
|
+
}
|
333
|
+
|
334
|
+
/* Test streaming API */
|
335
|
+
for(step = 1; step < BLAKE2B_BLOCKBYTES; ++step) {
|
336
|
+
for (i = 0; i < BLAKE2_KAT_LENGTH; ++i) {
|
337
|
+
uint8_t hash[BLAKE2B_OUTBYTES];
|
338
|
+
blake2b_state S;
|
339
|
+
uint8_t * p = buf;
|
340
|
+
size_t mlen = i;
|
341
|
+
int err = 0;
|
342
|
+
|
343
|
+
if( (err = blake2b_init_key(&S, BLAKE2B_OUTBYTES, key, BLAKE2B_KEYBYTES)) < 0 ) {
|
344
|
+
goto fail;
|
345
|
+
}
|
346
|
+
|
347
|
+
while (mlen >= step) {
|
348
|
+
if ( (err = blake2b_update(&S, p, step)) < 0 ) {
|
349
|
+
goto fail;
|
350
|
+
}
|
351
|
+
mlen -= step;
|
352
|
+
p += step;
|
353
|
+
}
|
354
|
+
if ( (err = blake2b_update(&S, p, mlen)) < 0) {
|
355
|
+
goto fail;
|
356
|
+
}
|
357
|
+
if ( (err = blake2b_final(&S, hash, BLAKE2B_OUTBYTES)) < 0) {
|
358
|
+
goto fail;
|
359
|
+
}
|
360
|
+
|
361
|
+
if (0 != memcmp(hash, blake2b_keyed_kat[i], BLAKE2B_OUTBYTES)) {
|
362
|
+
goto fail;
|
363
|
+
}
|
364
|
+
}
|
365
|
+
}
|
366
|
+
|
367
|
+
puts( "ok" );
|
368
|
+
return 0;
|
369
|
+
fail:
|
370
|
+
puts("error");
|
371
|
+
return -1;
|
372
|
+
}
|
373
|
+
#endif
|
@@ -0,0 +1,157 @@
|
|
1
|
+
/*
|
2
|
+
BLAKE2 reference source code package - optimized C implementations
|
3
|
+
|
4
|
+
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
|
5
|
+
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
|
6
|
+
your option. The terms of these licenses can be found at:
|
7
|
+
|
8
|
+
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
9
|
+
- OpenSSL license : https://www.openssl.org/source/license.html
|
10
|
+
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
|
12
|
+
More information about the BLAKE2 hash function can be found at
|
13
|
+
https://blake2.net.
|
14
|
+
*/
|
15
|
+
#ifndef BLAKE2B_ROUND_H
|
16
|
+
#define BLAKE2B_ROUND_H
|
17
|
+
|
18
|
+
#define LOADU(p) _mm_loadu_si128( (const __m128i *)(p) )
|
19
|
+
#define STOREU(p,r) _mm_storeu_si128((__m128i *)(p), r)
|
20
|
+
|
21
|
+
#define TOF(reg) _mm_castsi128_ps((reg))
|
22
|
+
#define TOI(reg) _mm_castps_si128((reg))
|
23
|
+
|
24
|
+
#define LIKELY(x) __builtin_expect((x),1)
|
25
|
+
|
26
|
+
|
27
|
+
/* Microarchitecture-specific macros */
|
28
|
+
#ifndef HAVE_XOP
|
29
|
+
#ifdef HAVE_SSSE3
|
30
|
+
#define _mm_roti_epi64(x, c) \
|
31
|
+
(-(c) == 32) ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2,3,0,1)) \
|
32
|
+
: (-(c) == 24) ? _mm_shuffle_epi8((x), r24) \
|
33
|
+
: (-(c) == 16) ? _mm_shuffle_epi8((x), r16) \
|
34
|
+
: (-(c) == 63) ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_add_epi64((x), (x))) \
|
35
|
+
: _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_slli_epi64((x), 64-(-(c))))
|
36
|
+
#else
|
37
|
+
#define _mm_roti_epi64(r, c) _mm_xor_si128(_mm_srli_epi64( (r), -(c) ),_mm_slli_epi64( (r), 64-(-(c)) ))
|
38
|
+
#endif
|
39
|
+
#else
|
40
|
+
/* ... */
|
41
|
+
#endif
|
42
|
+
|
43
|
+
|
44
|
+
|
45
|
+
#define G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \
|
46
|
+
row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
|
47
|
+
row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
|
48
|
+
\
|
49
|
+
row4l = _mm_xor_si128(row4l, row1l); \
|
50
|
+
row4h = _mm_xor_si128(row4h, row1h); \
|
51
|
+
\
|
52
|
+
row4l = _mm_roti_epi64(row4l, -32); \
|
53
|
+
row4h = _mm_roti_epi64(row4h, -32); \
|
54
|
+
\
|
55
|
+
row3l = _mm_add_epi64(row3l, row4l); \
|
56
|
+
row3h = _mm_add_epi64(row3h, row4h); \
|
57
|
+
\
|
58
|
+
row2l = _mm_xor_si128(row2l, row3l); \
|
59
|
+
row2h = _mm_xor_si128(row2h, row3h); \
|
60
|
+
\
|
61
|
+
row2l = _mm_roti_epi64(row2l, -24); \
|
62
|
+
row2h = _mm_roti_epi64(row2h, -24); \
|
63
|
+
|
64
|
+
#define G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \
|
65
|
+
row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
|
66
|
+
row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
|
67
|
+
\
|
68
|
+
row4l = _mm_xor_si128(row4l, row1l); \
|
69
|
+
row4h = _mm_xor_si128(row4h, row1h); \
|
70
|
+
\
|
71
|
+
row4l = _mm_roti_epi64(row4l, -16); \
|
72
|
+
row4h = _mm_roti_epi64(row4h, -16); \
|
73
|
+
\
|
74
|
+
row3l = _mm_add_epi64(row3l, row4l); \
|
75
|
+
row3h = _mm_add_epi64(row3h, row4h); \
|
76
|
+
\
|
77
|
+
row2l = _mm_xor_si128(row2l, row3l); \
|
78
|
+
row2h = _mm_xor_si128(row2h, row3h); \
|
79
|
+
\
|
80
|
+
row2l = _mm_roti_epi64(row2l, -63); \
|
81
|
+
row2h = _mm_roti_epi64(row2h, -63); \
|
82
|
+
|
83
|
+
#if defined(HAVE_SSSE3)
|
84
|
+
#define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
|
85
|
+
t0 = _mm_alignr_epi8(row2h, row2l, 8); \
|
86
|
+
t1 = _mm_alignr_epi8(row2l, row2h, 8); \
|
87
|
+
row2l = t0; \
|
88
|
+
row2h = t1; \
|
89
|
+
\
|
90
|
+
t0 = row3l; \
|
91
|
+
row3l = row3h; \
|
92
|
+
row3h = t0; \
|
93
|
+
\
|
94
|
+
t0 = _mm_alignr_epi8(row4h, row4l, 8); \
|
95
|
+
t1 = _mm_alignr_epi8(row4l, row4h, 8); \
|
96
|
+
row4l = t1; \
|
97
|
+
row4h = t0;
|
98
|
+
|
99
|
+
#define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
|
100
|
+
t0 = _mm_alignr_epi8(row2l, row2h, 8); \
|
101
|
+
t1 = _mm_alignr_epi8(row2h, row2l, 8); \
|
102
|
+
row2l = t0; \
|
103
|
+
row2h = t1; \
|
104
|
+
\
|
105
|
+
t0 = row3l; \
|
106
|
+
row3l = row3h; \
|
107
|
+
row3h = t0; \
|
108
|
+
\
|
109
|
+
t0 = _mm_alignr_epi8(row4l, row4h, 8); \
|
110
|
+
t1 = _mm_alignr_epi8(row4h, row4l, 8); \
|
111
|
+
row4l = t1; \
|
112
|
+
row4h = t0;
|
113
|
+
#else
|
114
|
+
|
115
|
+
#define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
|
116
|
+
t0 = row4l;\
|
117
|
+
t1 = row2l;\
|
118
|
+
row4l = row3l;\
|
119
|
+
row3l = row3h;\
|
120
|
+
row3h = row4l;\
|
121
|
+
row4l = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t0, t0)); \
|
122
|
+
row4h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row4h, row4h)); \
|
123
|
+
row2l = _mm_unpackhi_epi64(row2l, _mm_unpacklo_epi64(row2h, row2h)); \
|
124
|
+
row2h = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(t1, t1))
|
125
|
+
|
126
|
+
#define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
|
127
|
+
t0 = row3l;\
|
128
|
+
row3l = row3h;\
|
129
|
+
row3h = t0;\
|
130
|
+
t0 = row2l;\
|
131
|
+
t1 = row4l;\
|
132
|
+
row2l = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(row2l, row2l)); \
|
133
|
+
row2h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row2h, row2h)); \
|
134
|
+
row4l = _mm_unpackhi_epi64(row4l, _mm_unpacklo_epi64(row4h, row4h)); \
|
135
|
+
row4h = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t1, t1))
|
136
|
+
|
137
|
+
#endif
|
138
|
+
|
139
|
+
#if defined(HAVE_SSE41)
|
140
|
+
#include "blake2b-load-sse41.h"
|
141
|
+
#else
|
142
|
+
#include "blake2b-load-sse2.h"
|
143
|
+
#endif
|
144
|
+
|
145
|
+
#define ROUND(r) \
|
146
|
+
LOAD_MSG_ ##r ##_1(b0, b1); \
|
147
|
+
G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
|
148
|
+
LOAD_MSG_ ##r ##_2(b0, b1); \
|
149
|
+
G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
|
150
|
+
DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \
|
151
|
+
LOAD_MSG_ ##r ##_3(b0, b1); \
|
152
|
+
G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
|
153
|
+
LOAD_MSG_ ##r ##_4(b0, b1); \
|
154
|
+
G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
|
155
|
+
UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h);
|
156
|
+
|
157
|
+
#endif
|