nanocurrency 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +11 -0
  3. data/.rspec +3 -0
  4. data/.travis.yml +7 -0
  5. data/CODE_OF_CONDUCT.md +74 -0
  6. data/Gemfile +6 -0
  7. data/Gemfile.lock +40 -0
  8. data/LICENSE.txt +21 -0
  9. data/README.md +43 -0
  10. data/Rakefile +16 -0
  11. data/bin/console +14 -0
  12. data/bin/setup +8 -0
  13. data/ext/.DS_Store +0 -0
  14. data/ext/nanocurrency_ext/blake2-config.h +72 -0
  15. data/ext/nanocurrency_ext/blake2-impl.h +160 -0
  16. data/ext/nanocurrency_ext/blake2.h +195 -0
  17. data/ext/nanocurrency_ext/blake2b-load-sse2.h +68 -0
  18. data/ext/nanocurrency_ext/blake2b-load-sse41.h +402 -0
  19. data/ext/nanocurrency_ext/blake2b-ref.c +373 -0
  20. data/ext/nanocurrency_ext/blake2b-round.h +157 -0
  21. data/ext/nanocurrency_ext/curve25519-donna-32bit.h +579 -0
  22. data/ext/nanocurrency_ext/curve25519-donna-64bit.h +413 -0
  23. data/ext/nanocurrency_ext/curve25519-donna-helpers.h +67 -0
  24. data/ext/nanocurrency_ext/curve25519-donna-sse2.h +1112 -0
  25. data/ext/nanocurrency_ext/ed25519-donna-32bit-sse2.h +513 -0
  26. data/ext/nanocurrency_ext/ed25519-donna-32bit-tables.h +61 -0
  27. data/ext/nanocurrency_ext/ed25519-donna-64bit-sse2.h +436 -0
  28. data/ext/nanocurrency_ext/ed25519-donna-64bit-tables.h +53 -0
  29. data/ext/nanocurrency_ext/ed25519-donna-64bit-x86-32bit.h +435 -0
  30. data/ext/nanocurrency_ext/ed25519-donna-64bit-x86.h +351 -0
  31. data/ext/nanocurrency_ext/ed25519-donna-basepoint-table.h +259 -0
  32. data/ext/nanocurrency_ext/ed25519-donna-batchverify.h +275 -0
  33. data/ext/nanocurrency_ext/ed25519-donna-impl-base.h +364 -0
  34. data/ext/nanocurrency_ext/ed25519-donna-impl-sse2.h +390 -0
  35. data/ext/nanocurrency_ext/ed25519-donna-portable-identify.h +103 -0
  36. data/ext/nanocurrency_ext/ed25519-donna-portable.h +135 -0
  37. data/ext/nanocurrency_ext/ed25519-donna.h +115 -0
  38. data/ext/nanocurrency_ext/ed25519-hash-custom.c +28 -0
  39. data/ext/nanocurrency_ext/ed25519-hash-custom.h +30 -0
  40. data/ext/nanocurrency_ext/ed25519-hash.h +219 -0
  41. data/ext/nanocurrency_ext/ed25519-randombytes-custom.h +10 -0
  42. data/ext/nanocurrency_ext/ed25519-randombytes.h +91 -0
  43. data/ext/nanocurrency_ext/ed25519.c +150 -0
  44. data/ext/nanocurrency_ext/ed25519.h +30 -0
  45. data/ext/nanocurrency_ext/extconf.rb +3 -0
  46. data/ext/nanocurrency_ext/fuzz/README.md +173 -0
  47. data/ext/nanocurrency_ext/fuzz/build-nix.php +134 -0
  48. data/ext/nanocurrency_ext/fuzz/curve25519-ref10.c +1272 -0
  49. data/ext/nanocurrency_ext/fuzz/curve25519-ref10.h +8 -0
  50. data/ext/nanocurrency_ext/fuzz/ed25519-donna-sse2.c +3 -0
  51. data/ext/nanocurrency_ext/fuzz/ed25519-donna.c +1 -0
  52. data/ext/nanocurrency_ext/fuzz/ed25519-donna.h +34 -0
  53. data/ext/nanocurrency_ext/fuzz/ed25519-ref10.c +4647 -0
  54. data/ext/nanocurrency_ext/fuzz/ed25519-ref10.h +9 -0
  55. data/ext/nanocurrency_ext/fuzz/fuzz-curve25519.c +172 -0
  56. data/ext/nanocurrency_ext/fuzz/fuzz-ed25519.c +219 -0
  57. data/ext/nanocurrency_ext/modm-donna-32bit.h +469 -0
  58. data/ext/nanocurrency_ext/modm-donna-64bit.h +361 -0
  59. data/ext/nanocurrency_ext/rbext.c +164 -0
  60. data/ext/nanocurrency_ext/regression.h +1024 -0
  61. data/lib/nano/account.rb +59 -0
  62. data/lib/nano/base32.rb +87 -0
  63. data/lib/nano/block.rb +142 -0
  64. data/lib/nano/check.rb +65 -0
  65. data/lib/nano/conversion.rb +102 -0
  66. data/lib/nano/hash.rb +43 -0
  67. data/lib/nano/key.rb +69 -0
  68. data/lib/nano/utils.rb +45 -0
  69. data/lib/nano/work.rb +51 -0
  70. data/lib/nanocurrency.rb +7 -0
  71. data/lib/nanocurrency/version.rb +3 -0
  72. data/lib/nanocurrency_ext.bundle +0 -0
  73. data/nanocurrency.gemspec +44 -0
  74. metadata +192 -0
@@ -0,0 +1,373 @@
1
+ /*
2
+ BLAKE2 reference source code package - optimized C implementations
3
+
4
+ Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
5
+ terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
6
+ your option. The terms of these licenses can be found at:
7
+
8
+ - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
9
+ - OpenSSL license : https://www.openssl.org/source/license.html
10
+ - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
11
+
12
+ More information about the BLAKE2 hash function can be found at
13
+ https://blake2.net.
14
+ */
15
+
16
+ #include <stdint.h>
17
+ #include <string.h>
18
+ #include <stdio.h>
19
+
20
+ #include "blake2.h"
21
+ #include "blake2-impl.h"
22
+
23
+ #include "blake2-config.h"
24
+
25
+ #ifdef _MSC_VER
26
+ #include <intrin.h> /* for _mm_set_epi64x */
27
+ #endif
28
+ #include <emmintrin.h>
29
+ #if defined(HAVE_SSSE3)
30
+ #include <tmmintrin.h>
31
+ #endif
32
+ #if defined(HAVE_SSE41)
33
+ #include <smmintrin.h>
34
+ #endif
35
+ #if defined(HAVE_AVX)
36
+ #include <immintrin.h>
37
+ #endif
38
+ #if defined(HAVE_XOP)
39
+ #include <x86intrin.h>
40
+ #endif
41
+
42
+ #include "blake2b-round.h"
43
+
44
+ static const uint64_t blake2b_IV[8] =
45
+ {
46
+ 0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
47
+ 0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
48
+ 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
49
+ 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
50
+ };
51
+
52
+ /* Some helper functions */
53
+ static void blake2b_set_lastnode( blake2b_state *S )
54
+ {
55
+ S->f[1] = (uint64_t)-1;
56
+ }
57
+
58
+ static int blake2b_is_lastblock( const blake2b_state *S )
59
+ {
60
+ return S->f[0] != 0;
61
+ }
62
+
63
+ static void blake2b_set_lastblock( blake2b_state *S )
64
+ {
65
+ if( S->last_node ) blake2b_set_lastnode( S );
66
+
67
+ S->f[0] = (uint64_t)-1;
68
+ }
69
+
70
+ static void blake2b_increment_counter( blake2b_state *S, const uint64_t inc )
71
+ {
72
+ S->t[0] += inc;
73
+ S->t[1] += ( S->t[0] < inc );
74
+ }
75
+
76
+ /* init xors IV with input parameter block */
77
+ int blake2b_init_param( blake2b_state *S, const blake2b_param *P )
78
+ {
79
+ size_t i;
80
+ /*blake2b_init0( S ); */
81
+ const unsigned char * v = ( const unsigned char * )( blake2b_IV );
82
+ const unsigned char * p = ( const unsigned char * )( P );
83
+ unsigned char * h = ( unsigned char * )( S->h );
84
+ /* IV XOR ParamBlock */
85
+ memset( S, 0, sizeof( blake2b_state ) );
86
+
87
+ for( i = 0; i < BLAKE2B_OUTBYTES; ++i ) h[i] = v[i] ^ p[i];
88
+
89
+ S->outlen = P->digest_length;
90
+ return 0;
91
+ }
92
+
93
+
94
+ /* Some sort of default parameter block initialization, for sequential blake2b */
95
+ int blake2b_init( blake2b_state *S, size_t outlen )
96
+ {
97
+ blake2b_param P[1];
98
+
99
+ if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
100
+
101
+ P->digest_length = (uint8_t)outlen;
102
+ P->key_length = 0;
103
+ P->fanout = 1;
104
+ P->depth = 1;
105
+ store32( &P->leaf_length, 0 );
106
+ store32( &P->node_offset, 0 );
107
+ store32( &P->xof_length, 0 );
108
+ P->node_depth = 0;
109
+ P->inner_length = 0;
110
+ memset( P->reserved, 0, sizeof( P->reserved ) );
111
+ memset( P->salt, 0, sizeof( P->salt ) );
112
+ memset( P->personal, 0, sizeof( P->personal ) );
113
+
114
+ return blake2b_init_param( S, P );
115
+ }
116
+
117
+ int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen )
118
+ {
119
+ blake2b_param P[1];
120
+
121
+ if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
122
+
123
+ if ( ( !keylen ) || keylen > BLAKE2B_KEYBYTES ) return -1;
124
+
125
+ P->digest_length = (uint8_t)outlen;
126
+ P->key_length = (uint8_t)keylen;
127
+ P->fanout = 1;
128
+ P->depth = 1;
129
+ store32( &P->leaf_length, 0 );
130
+ store32( &P->node_offset, 0 );
131
+ store32( &P->xof_length, 0 );
132
+ P->node_depth = 0;
133
+ P->inner_length = 0;
134
+ memset( P->reserved, 0, sizeof( P->reserved ) );
135
+ memset( P->salt, 0, sizeof( P->salt ) );
136
+ memset( P->personal, 0, sizeof( P->personal ) );
137
+
138
+ if( blake2b_init_param( S, P ) < 0 )
139
+ return 0;
140
+
141
+ {
142
+ uint8_t block[BLAKE2B_BLOCKBYTES];
143
+ memset( block, 0, BLAKE2B_BLOCKBYTES );
144
+ memcpy( block, key, keylen );
145
+ blake2b_update( S, block, BLAKE2B_BLOCKBYTES );
146
+ secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */
147
+ }
148
+ return 0;
149
+ }
150
+
151
+ static void blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] )
152
+ {
153
+ __m128i row1l, row1h;
154
+ __m128i row2l, row2h;
155
+ __m128i row3l, row3h;
156
+ __m128i row4l, row4h;
157
+ __m128i b0, b1;
158
+ __m128i t0, t1;
159
+ #if defined(HAVE_SSSE3) && !defined(HAVE_XOP)
160
+ const __m128i r16 = _mm_setr_epi8( 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9 );
161
+ const __m128i r24 = _mm_setr_epi8( 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10 );
162
+ #endif
163
+ #if defined(HAVE_SSE41)
164
+ const __m128i m0 = LOADU( block + 00 );
165
+ const __m128i m1 = LOADU( block + 16 );
166
+ const __m128i m2 = LOADU( block + 32 );
167
+ const __m128i m3 = LOADU( block + 48 );
168
+ const __m128i m4 = LOADU( block + 64 );
169
+ const __m128i m5 = LOADU( block + 80 );
170
+ const __m128i m6 = LOADU( block + 96 );
171
+ const __m128i m7 = LOADU( block + 112 );
172
+ #else
173
+ const uint64_t m0 = load64(block + 0 * sizeof(uint64_t));
174
+ const uint64_t m1 = load64(block + 1 * sizeof(uint64_t));
175
+ const uint64_t m2 = load64(block + 2 * sizeof(uint64_t));
176
+ const uint64_t m3 = load64(block + 3 * sizeof(uint64_t));
177
+ const uint64_t m4 = load64(block + 4 * sizeof(uint64_t));
178
+ const uint64_t m5 = load64(block + 5 * sizeof(uint64_t));
179
+ const uint64_t m6 = load64(block + 6 * sizeof(uint64_t));
180
+ const uint64_t m7 = load64(block + 7 * sizeof(uint64_t));
181
+ const uint64_t m8 = load64(block + 8 * sizeof(uint64_t));
182
+ const uint64_t m9 = load64(block + 9 * sizeof(uint64_t));
183
+ const uint64_t m10 = load64(block + 10 * sizeof(uint64_t));
184
+ const uint64_t m11 = load64(block + 11 * sizeof(uint64_t));
185
+ const uint64_t m12 = load64(block + 12 * sizeof(uint64_t));
186
+ const uint64_t m13 = load64(block + 13 * sizeof(uint64_t));
187
+ const uint64_t m14 = load64(block + 14 * sizeof(uint64_t));
188
+ const uint64_t m15 = load64(block + 15 * sizeof(uint64_t));
189
+ #endif
190
+ row1l = LOADU( &S->h[0] );
191
+ row1h = LOADU( &S->h[2] );
192
+ row2l = LOADU( &S->h[4] );
193
+ row2h = LOADU( &S->h[6] );
194
+ row3l = LOADU( &blake2b_IV[0] );
195
+ row3h = LOADU( &blake2b_IV[2] );
196
+ row4l = _mm_xor_si128( LOADU( &blake2b_IV[4] ), LOADU( &S->t[0] ) );
197
+ row4h = _mm_xor_si128( LOADU( &blake2b_IV[6] ), LOADU( &S->f[0] ) );
198
+ ROUND( 0 );
199
+ ROUND( 1 );
200
+ ROUND( 2 );
201
+ ROUND( 3 );
202
+ ROUND( 4 );
203
+ ROUND( 5 );
204
+ ROUND( 6 );
205
+ ROUND( 7 );
206
+ ROUND( 8 );
207
+ ROUND( 9 );
208
+ ROUND( 10 );
209
+ ROUND( 11 );
210
+ row1l = _mm_xor_si128( row3l, row1l );
211
+ row1h = _mm_xor_si128( row3h, row1h );
212
+ STOREU( &S->h[0], _mm_xor_si128( LOADU( &S->h[0] ), row1l ) );
213
+ STOREU( &S->h[2], _mm_xor_si128( LOADU( &S->h[2] ), row1h ) );
214
+ row2l = _mm_xor_si128( row4l, row2l );
215
+ row2h = _mm_xor_si128( row4h, row2h );
216
+ STOREU( &S->h[4], _mm_xor_si128( LOADU( &S->h[4] ), row2l ) );
217
+ STOREU( &S->h[6], _mm_xor_si128( LOADU( &S->h[6] ), row2h ) );
218
+ }
219
+
220
+
221
+ int blake2b_update( blake2b_state *S, const void *pin, size_t inlen )
222
+ {
223
+ const unsigned char * in = (const unsigned char *)pin;
224
+ if( inlen > 0 )
225
+ {
226
+ size_t left = S->buflen;
227
+ size_t fill = BLAKE2B_BLOCKBYTES - left;
228
+ if( inlen > fill )
229
+ {
230
+ S->buflen = 0;
231
+ memcpy( S->buf + left, in, fill ); /* Fill buffer */
232
+ blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES );
233
+ blake2b_compress( S, S->buf ); /* Compress */
234
+ in += fill; inlen -= fill;
235
+ while(inlen > BLAKE2B_BLOCKBYTES) {
236
+ blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
237
+ blake2b_compress( S, in );
238
+ in += BLAKE2B_BLOCKBYTES;
239
+ inlen -= BLAKE2B_BLOCKBYTES;
240
+ }
241
+ }
242
+ memcpy( S->buf + S->buflen, in, inlen );
243
+ S->buflen += inlen;
244
+ }
245
+ return 0;
246
+ }
247
+
248
+
249
+ int blake2b_final( blake2b_state *S, void *out, size_t outlen )
250
+ {
251
+ if( out == NULL || outlen < S->outlen )
252
+ return -1;
253
+
254
+ if( blake2b_is_lastblock( S ) )
255
+ return -1;
256
+
257
+ blake2b_increment_counter( S, S->buflen );
258
+ blake2b_set_lastblock( S );
259
+ memset( S->buf + S->buflen, 0, BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */
260
+ blake2b_compress( S, S->buf );
261
+
262
+ memcpy( out, &S->h[0], S->outlen );
263
+ return 0;
264
+ }
265
+
266
+
267
+ int blake2b( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen )
268
+ {
269
+ blake2b_state S[1];
270
+
271
+ /* Verify parameters */
272
+ if ( NULL == in && inlen > 0 ) return -1;
273
+
274
+ if ( NULL == out ) return -1;
275
+
276
+ if( NULL == key && keylen > 0 ) return -1;
277
+
278
+ if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1;
279
+
280
+ if( keylen > BLAKE2B_KEYBYTES ) return -1;
281
+
282
+ if( keylen )
283
+ {
284
+ if( blake2b_init_key( S, outlen, key, keylen ) < 0 ) return -1;
285
+ }
286
+ else
287
+ {
288
+ if( blake2b_init( S, outlen ) < 0 ) return -1;
289
+ }
290
+
291
+ blake2b_update( S, ( const uint8_t * )in, inlen );
292
+ blake2b_final( S, out, outlen );
293
+ return 0;
294
+ }
295
+
296
+ int blake2( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) {
297
+ return blake2b(out, outlen, in, inlen, key, keylen);
298
+ }
299
+
300
+ #if defined(SUPERCOP)
301
+ int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen )
302
+ {
303
+ return blake2b( out, BLAKE2B_OUTBYTES, in, inlen, NULL, 0 );
304
+ }
305
+ #endif
306
+
307
+ #if defined(BLAKE2B_SELFTEST)
308
+ #include <string.h>
309
+ #include "blake2-kat.h"
310
+ int main( void )
311
+ {
312
+ uint8_t key[BLAKE2B_KEYBYTES];
313
+ uint8_t buf[BLAKE2_KAT_LENGTH];
314
+ size_t i, step;
315
+
316
+ for( i = 0; i < BLAKE2B_KEYBYTES; ++i )
317
+ key[i] = ( uint8_t )i;
318
+
319
+ for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
320
+ buf[i] = ( uint8_t )i;
321
+
322
+ /* Test simple API */
323
+ for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
324
+ {
325
+ uint8_t hash[BLAKE2B_OUTBYTES];
326
+ blake2b( hash, BLAKE2B_OUTBYTES, buf, i, key, BLAKE2B_KEYBYTES );
327
+
328
+ if( 0 != memcmp( hash, blake2b_keyed_kat[i], BLAKE2B_OUTBYTES ) )
329
+ {
330
+ goto fail;
331
+ }
332
+ }
333
+
334
+ /* Test streaming API */
335
+ for(step = 1; step < BLAKE2B_BLOCKBYTES; ++step) {
336
+ for (i = 0; i < BLAKE2_KAT_LENGTH; ++i) {
337
+ uint8_t hash[BLAKE2B_OUTBYTES];
338
+ blake2b_state S;
339
+ uint8_t * p = buf;
340
+ size_t mlen = i;
341
+ int err = 0;
342
+
343
+ if( (err = blake2b_init_key(&S, BLAKE2B_OUTBYTES, key, BLAKE2B_KEYBYTES)) < 0 ) {
344
+ goto fail;
345
+ }
346
+
347
+ while (mlen >= step) {
348
+ if ( (err = blake2b_update(&S, p, step)) < 0 ) {
349
+ goto fail;
350
+ }
351
+ mlen -= step;
352
+ p += step;
353
+ }
354
+ if ( (err = blake2b_update(&S, p, mlen)) < 0) {
355
+ goto fail;
356
+ }
357
+ if ( (err = blake2b_final(&S, hash, BLAKE2B_OUTBYTES)) < 0) {
358
+ goto fail;
359
+ }
360
+
361
+ if (0 != memcmp(hash, blake2b_keyed_kat[i], BLAKE2B_OUTBYTES)) {
362
+ goto fail;
363
+ }
364
+ }
365
+ }
366
+
367
+ puts( "ok" );
368
+ return 0;
369
+ fail:
370
+ puts("error");
371
+ return -1;
372
+ }
373
+ #endif
@@ -0,0 +1,157 @@
1
+ /*
2
+ BLAKE2 reference source code package - optimized C implementations
3
+
4
+ Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
5
+ terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
6
+ your option. The terms of these licenses can be found at:
7
+
8
+ - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
9
+ - OpenSSL license : https://www.openssl.org/source/license.html
10
+ - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
11
+
12
+ More information about the BLAKE2 hash function can be found at
13
+ https://blake2.net.
14
+ */
15
+ #ifndef BLAKE2B_ROUND_H
16
+ #define BLAKE2B_ROUND_H
17
+
18
+ #define LOADU(p) _mm_loadu_si128( (const __m128i *)(p) )
19
+ #define STOREU(p,r) _mm_storeu_si128((__m128i *)(p), r)
20
+
21
+ #define TOF(reg) _mm_castsi128_ps((reg))
22
+ #define TOI(reg) _mm_castps_si128((reg))
23
+
24
+ #define LIKELY(x) __builtin_expect((x),1)
25
+
26
+
27
+ /* Microarchitecture-specific macros */
28
+ #ifndef HAVE_XOP
29
+ #ifdef HAVE_SSSE3
30
+ #define _mm_roti_epi64(x, c) \
31
+ (-(c) == 32) ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2,3,0,1)) \
32
+ : (-(c) == 24) ? _mm_shuffle_epi8((x), r24) \
33
+ : (-(c) == 16) ? _mm_shuffle_epi8((x), r16) \
34
+ : (-(c) == 63) ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_add_epi64((x), (x))) \
35
+ : _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_slli_epi64((x), 64-(-(c))))
36
+ #else
37
+ #define _mm_roti_epi64(r, c) _mm_xor_si128(_mm_srli_epi64( (r), -(c) ),_mm_slli_epi64( (r), 64-(-(c)) ))
38
+ #endif
39
+ #else
40
+ /* ... */
41
+ #endif
42
+
43
+
44
+
45
+ #define G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \
46
+ row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
47
+ row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
48
+ \
49
+ row4l = _mm_xor_si128(row4l, row1l); \
50
+ row4h = _mm_xor_si128(row4h, row1h); \
51
+ \
52
+ row4l = _mm_roti_epi64(row4l, -32); \
53
+ row4h = _mm_roti_epi64(row4h, -32); \
54
+ \
55
+ row3l = _mm_add_epi64(row3l, row4l); \
56
+ row3h = _mm_add_epi64(row3h, row4h); \
57
+ \
58
+ row2l = _mm_xor_si128(row2l, row3l); \
59
+ row2h = _mm_xor_si128(row2h, row3h); \
60
+ \
61
+ row2l = _mm_roti_epi64(row2l, -24); \
62
+ row2h = _mm_roti_epi64(row2h, -24); \
63
+
64
+ #define G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \
65
+ row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
66
+ row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
67
+ \
68
+ row4l = _mm_xor_si128(row4l, row1l); \
69
+ row4h = _mm_xor_si128(row4h, row1h); \
70
+ \
71
+ row4l = _mm_roti_epi64(row4l, -16); \
72
+ row4h = _mm_roti_epi64(row4h, -16); \
73
+ \
74
+ row3l = _mm_add_epi64(row3l, row4l); \
75
+ row3h = _mm_add_epi64(row3h, row4h); \
76
+ \
77
+ row2l = _mm_xor_si128(row2l, row3l); \
78
+ row2h = _mm_xor_si128(row2h, row3h); \
79
+ \
80
+ row2l = _mm_roti_epi64(row2l, -63); \
81
+ row2h = _mm_roti_epi64(row2h, -63); \
82
+
83
+ #if defined(HAVE_SSSE3)
84
+ #define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
85
+ t0 = _mm_alignr_epi8(row2h, row2l, 8); \
86
+ t1 = _mm_alignr_epi8(row2l, row2h, 8); \
87
+ row2l = t0; \
88
+ row2h = t1; \
89
+ \
90
+ t0 = row3l; \
91
+ row3l = row3h; \
92
+ row3h = t0; \
93
+ \
94
+ t0 = _mm_alignr_epi8(row4h, row4l, 8); \
95
+ t1 = _mm_alignr_epi8(row4l, row4h, 8); \
96
+ row4l = t1; \
97
+ row4h = t0;
98
+
99
+ #define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
100
+ t0 = _mm_alignr_epi8(row2l, row2h, 8); \
101
+ t1 = _mm_alignr_epi8(row2h, row2l, 8); \
102
+ row2l = t0; \
103
+ row2h = t1; \
104
+ \
105
+ t0 = row3l; \
106
+ row3l = row3h; \
107
+ row3h = t0; \
108
+ \
109
+ t0 = _mm_alignr_epi8(row4l, row4h, 8); \
110
+ t1 = _mm_alignr_epi8(row4h, row4l, 8); \
111
+ row4l = t1; \
112
+ row4h = t0;
113
+ #else
114
+
115
+ #define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
116
+ t0 = row4l;\
117
+ t1 = row2l;\
118
+ row4l = row3l;\
119
+ row3l = row3h;\
120
+ row3h = row4l;\
121
+ row4l = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t0, t0)); \
122
+ row4h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row4h, row4h)); \
123
+ row2l = _mm_unpackhi_epi64(row2l, _mm_unpacklo_epi64(row2h, row2h)); \
124
+ row2h = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(t1, t1))
125
+
126
+ #define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
127
+ t0 = row3l;\
128
+ row3l = row3h;\
129
+ row3h = t0;\
130
+ t0 = row2l;\
131
+ t1 = row4l;\
132
+ row2l = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(row2l, row2l)); \
133
+ row2h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row2h, row2h)); \
134
+ row4l = _mm_unpackhi_epi64(row4l, _mm_unpacklo_epi64(row4h, row4h)); \
135
+ row4h = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t1, t1))
136
+
137
+ #endif
138
+
139
+ #if defined(HAVE_SSE41)
140
+ #include "blake2b-load-sse41.h"
141
+ #else
142
+ #include "blake2b-load-sse2.h"
143
+ #endif
144
+
145
+ #define ROUND(r) \
146
+ LOAD_MSG_ ##r ##_1(b0, b1); \
147
+ G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
148
+ LOAD_MSG_ ##r ##_2(b0, b1); \
149
+ G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
150
+ DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \
151
+ LOAD_MSG_ ##r ##_3(b0, b1); \
152
+ G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
153
+ LOAD_MSG_ ##r ##_4(b0, b1); \
154
+ G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
155
+ UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h);
156
+
157
+ #endif