argon2id 0.8.0.rc1-arm-linux-gnu
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +142 -0
- data/Gemfile +9 -0
- data/LICENSE +11 -0
- data/README.md +371 -0
- data/Rakefile +70 -0
- data/argon2id.gemspec +59 -0
- data/ext/argon2id/argon2id.c +76 -0
- data/ext/argon2id/extconf.rb +17 -0
- data/ext/argon2id/libargon2/LICENSE +314 -0
- data/ext/argon2id/libargon2/argon2.c +452 -0
- data/ext/argon2id/libargon2/argon2.h +437 -0
- data/ext/argon2id/libargon2/blake2/blake2-impl.h +156 -0
- data/ext/argon2id/libargon2/blake2/blake2.h +89 -0
- data/ext/argon2id/libargon2/blake2/blake2b.c +390 -0
- data/ext/argon2id/libargon2/blake2/blamka-round-opt.h +471 -0
- data/ext/argon2id/libargon2/blake2/blamka-round-ref.h +56 -0
- data/ext/argon2id/libargon2/core.c +648 -0
- data/ext/argon2id/libargon2/core.h +228 -0
- data/ext/argon2id/libargon2/encoding.c +463 -0
- data/ext/argon2id/libargon2/encoding.h +57 -0
- data/ext/argon2id/libargon2/ref.c +194 -0
- data/ext/argon2id/libargon2/thread.c +57 -0
- data/ext/argon2id/libargon2/thread.h +67 -0
- data/lib/argon2id/3.1/argon2id.so +0 -0
- data/lib/argon2id/3.2/argon2id.so +0 -0
- data/lib/argon2id/3.3/argon2id.so +0 -0
- data/lib/argon2id/3.4/argon2id.so +0 -0
- data/lib/argon2id/extension.rb +71 -0
- data/lib/argon2id/password.rb +142 -0
- data/lib/argon2id/version.rb +5 -0
- data/lib/argon2id.rb +45 -0
- data/test/argon2id/test_password.rb +554 -0
- data/test/test_argon2id.rb +66 -0
- metadata +132 -0
@@ -0,0 +1,471 @@
|
|
1
|
+
/*
|
2
|
+
* Argon2 reference source code package - reference C implementations
|
3
|
+
*
|
4
|
+
* Copyright 2015
|
5
|
+
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
6
|
+
*
|
7
|
+
* You may use this work under the terms of a Creative Commons CC0 1.0
|
8
|
+
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
9
|
+
* these licenses can be found at:
|
10
|
+
*
|
11
|
+
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
12
|
+
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
13
|
+
*
|
14
|
+
* You should have received a copy of both of these licenses along with this
|
15
|
+
* software. If not, they may be obtained at the above URLs.
|
16
|
+
*/
|
17
|
+
|
18
|
+
#ifndef BLAKE_ROUND_MKA_OPT_H
|
19
|
+
#define BLAKE_ROUND_MKA_OPT_H
|
20
|
+
|
21
|
+
#include "blake2-impl.h"
|
22
|
+
|
23
|
+
#include <emmintrin.h>
|
24
|
+
#if defined(__SSSE3__)
|
25
|
+
#include <tmmintrin.h> /* for _mm_shuffle_epi8 and _mm_alignr_epi8 */
|
26
|
+
#endif
|
27
|
+
|
28
|
+
#if defined(__XOP__) && (defined(__GNUC__) || defined(__clang__))
|
29
|
+
#include <x86intrin.h>
|
30
|
+
#endif
|
31
|
+
|
32
|
+
#if !defined(__AVX512F__)
|
33
|
+
#if !defined(__AVX2__)
|
34
|
+
#if !defined(__XOP__)
|
35
|
+
#if defined(__SSSE3__)
|
36
|
+
#define r16 \
|
37
|
+
(_mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9))
|
38
|
+
#define r24 \
|
39
|
+
(_mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10))
|
40
|
+
#define _mm_roti_epi64(x, c) \
|
41
|
+
(-(c) == 32) \
|
42
|
+
? _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) \
|
43
|
+
: (-(c) == 24) \
|
44
|
+
? _mm_shuffle_epi8((x), r24) \
|
45
|
+
: (-(c) == 16) \
|
46
|
+
? _mm_shuffle_epi8((x), r16) \
|
47
|
+
: (-(c) == 63) \
|
48
|
+
? _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
|
49
|
+
_mm_add_epi64((x), (x))) \
|
50
|
+
: _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
|
51
|
+
_mm_slli_epi64((x), 64 - (-(c))))
|
52
|
+
#else /* defined(__SSE2__) */
|
53
|
+
#define _mm_roti_epi64(r, c) \
|
54
|
+
_mm_xor_si128(_mm_srli_epi64((r), -(c)), _mm_slli_epi64((r), 64 - (-(c))))
|
55
|
+
#endif
|
56
|
+
#else
|
57
|
+
#endif
|
58
|
+
|
59
|
+
static BLAKE2_INLINE __m128i fBlaMka(__m128i x, __m128i y) {
|
60
|
+
const __m128i z = _mm_mul_epu32(x, y);
|
61
|
+
return _mm_add_epi64(_mm_add_epi64(x, y), _mm_add_epi64(z, z));
|
62
|
+
}
|
63
|
+
|
64
|
+
#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \
|
65
|
+
do { \
|
66
|
+
A0 = fBlaMka(A0, B0); \
|
67
|
+
A1 = fBlaMka(A1, B1); \
|
68
|
+
\
|
69
|
+
D0 = _mm_xor_si128(D0, A0); \
|
70
|
+
D1 = _mm_xor_si128(D1, A1); \
|
71
|
+
\
|
72
|
+
D0 = _mm_roti_epi64(D0, -32); \
|
73
|
+
D1 = _mm_roti_epi64(D1, -32); \
|
74
|
+
\
|
75
|
+
C0 = fBlaMka(C0, D0); \
|
76
|
+
C1 = fBlaMka(C1, D1); \
|
77
|
+
\
|
78
|
+
B0 = _mm_xor_si128(B0, C0); \
|
79
|
+
B1 = _mm_xor_si128(B1, C1); \
|
80
|
+
\
|
81
|
+
B0 = _mm_roti_epi64(B0, -24); \
|
82
|
+
B1 = _mm_roti_epi64(B1, -24); \
|
83
|
+
} while ((void)0, 0)
|
84
|
+
|
85
|
+
#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \
|
86
|
+
do { \
|
87
|
+
A0 = fBlaMka(A0, B0); \
|
88
|
+
A1 = fBlaMka(A1, B1); \
|
89
|
+
\
|
90
|
+
D0 = _mm_xor_si128(D0, A0); \
|
91
|
+
D1 = _mm_xor_si128(D1, A1); \
|
92
|
+
\
|
93
|
+
D0 = _mm_roti_epi64(D0, -16); \
|
94
|
+
D1 = _mm_roti_epi64(D1, -16); \
|
95
|
+
\
|
96
|
+
C0 = fBlaMka(C0, D0); \
|
97
|
+
C1 = fBlaMka(C1, D1); \
|
98
|
+
\
|
99
|
+
B0 = _mm_xor_si128(B0, C0); \
|
100
|
+
B1 = _mm_xor_si128(B1, C1); \
|
101
|
+
\
|
102
|
+
B0 = _mm_roti_epi64(B0, -63); \
|
103
|
+
B1 = _mm_roti_epi64(B1, -63); \
|
104
|
+
} while ((void)0, 0)
|
105
|
+
|
106
|
+
#if defined(__SSSE3__)
|
107
|
+
#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
|
108
|
+
do { \
|
109
|
+
__m128i t0 = _mm_alignr_epi8(B1, B0, 8); \
|
110
|
+
__m128i t1 = _mm_alignr_epi8(B0, B1, 8); \
|
111
|
+
B0 = t0; \
|
112
|
+
B1 = t1; \
|
113
|
+
\
|
114
|
+
t0 = C0; \
|
115
|
+
C0 = C1; \
|
116
|
+
C1 = t0; \
|
117
|
+
\
|
118
|
+
t0 = _mm_alignr_epi8(D1, D0, 8); \
|
119
|
+
t1 = _mm_alignr_epi8(D0, D1, 8); \
|
120
|
+
D0 = t1; \
|
121
|
+
D1 = t0; \
|
122
|
+
} while ((void)0, 0)
|
123
|
+
|
124
|
+
#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
|
125
|
+
do { \
|
126
|
+
__m128i t0 = _mm_alignr_epi8(B0, B1, 8); \
|
127
|
+
__m128i t1 = _mm_alignr_epi8(B1, B0, 8); \
|
128
|
+
B0 = t0; \
|
129
|
+
B1 = t1; \
|
130
|
+
\
|
131
|
+
t0 = C0; \
|
132
|
+
C0 = C1; \
|
133
|
+
C1 = t0; \
|
134
|
+
\
|
135
|
+
t0 = _mm_alignr_epi8(D0, D1, 8); \
|
136
|
+
t1 = _mm_alignr_epi8(D1, D0, 8); \
|
137
|
+
D0 = t1; \
|
138
|
+
D1 = t0; \
|
139
|
+
} while ((void)0, 0)
|
140
|
+
#else /* SSE2 */
|
141
|
+
#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
|
142
|
+
do { \
|
143
|
+
__m128i t0 = D0; \
|
144
|
+
__m128i t1 = B0; \
|
145
|
+
D0 = C0; \
|
146
|
+
C0 = C1; \
|
147
|
+
C1 = D0; \
|
148
|
+
D0 = _mm_unpackhi_epi64(D1, _mm_unpacklo_epi64(t0, t0)); \
|
149
|
+
D1 = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(D1, D1)); \
|
150
|
+
B0 = _mm_unpackhi_epi64(B0, _mm_unpacklo_epi64(B1, B1)); \
|
151
|
+
B1 = _mm_unpackhi_epi64(B1, _mm_unpacklo_epi64(t1, t1)); \
|
152
|
+
} while ((void)0, 0)
|
153
|
+
|
154
|
+
#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
|
155
|
+
do { \
|
156
|
+
__m128i t0, t1; \
|
157
|
+
t0 = C0; \
|
158
|
+
C0 = C1; \
|
159
|
+
C1 = t0; \
|
160
|
+
t0 = B0; \
|
161
|
+
t1 = D0; \
|
162
|
+
B0 = _mm_unpackhi_epi64(B1, _mm_unpacklo_epi64(B0, B0)); \
|
163
|
+
B1 = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(B1, B1)); \
|
164
|
+
D0 = _mm_unpackhi_epi64(D0, _mm_unpacklo_epi64(D1, D1)); \
|
165
|
+
D1 = _mm_unpackhi_epi64(D1, _mm_unpacklo_epi64(t1, t1)); \
|
166
|
+
} while ((void)0, 0)
|
167
|
+
#endif
|
168
|
+
|
169
|
+
#define BLAKE2_ROUND(A0, A1, B0, B1, C0, C1, D0, D1) \
|
170
|
+
do { \
|
171
|
+
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
|
172
|
+
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
|
173
|
+
\
|
174
|
+
DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
|
175
|
+
\
|
176
|
+
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
|
177
|
+
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
|
178
|
+
\
|
179
|
+
UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
|
180
|
+
} while ((void)0, 0)
|
181
|
+
#else /* __AVX2__ */
|
182
|
+
|
183
|
+
#include <immintrin.h>
|
184
|
+
|
185
|
+
#define rotr32(x) _mm256_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1))
|
186
|
+
#define rotr24(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10, 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10))
|
187
|
+
#define rotr16(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9, 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9))
|
188
|
+
#define rotr63(x) _mm256_xor_si256(_mm256_srli_epi64((x), 63), _mm256_add_epi64((x), (x)))
|
189
|
+
|
190
|
+
#define G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
191
|
+
do { \
|
192
|
+
__m256i ml = _mm256_mul_epu32(A0, B0); \
|
193
|
+
ml = _mm256_add_epi64(ml, ml); \
|
194
|
+
A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \
|
195
|
+
D0 = _mm256_xor_si256(D0, A0); \
|
196
|
+
D0 = rotr32(D0); \
|
197
|
+
\
|
198
|
+
ml = _mm256_mul_epu32(C0, D0); \
|
199
|
+
ml = _mm256_add_epi64(ml, ml); \
|
200
|
+
C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \
|
201
|
+
\
|
202
|
+
B0 = _mm256_xor_si256(B0, C0); \
|
203
|
+
B0 = rotr24(B0); \
|
204
|
+
\
|
205
|
+
ml = _mm256_mul_epu32(A1, B1); \
|
206
|
+
ml = _mm256_add_epi64(ml, ml); \
|
207
|
+
A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \
|
208
|
+
D1 = _mm256_xor_si256(D1, A1); \
|
209
|
+
D1 = rotr32(D1); \
|
210
|
+
\
|
211
|
+
ml = _mm256_mul_epu32(C1, D1); \
|
212
|
+
ml = _mm256_add_epi64(ml, ml); \
|
213
|
+
C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \
|
214
|
+
\
|
215
|
+
B1 = _mm256_xor_si256(B1, C1); \
|
216
|
+
B1 = rotr24(B1); \
|
217
|
+
} while((void)0, 0);
|
218
|
+
|
219
|
+
#define G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
220
|
+
do { \
|
221
|
+
__m256i ml = _mm256_mul_epu32(A0, B0); \
|
222
|
+
ml = _mm256_add_epi64(ml, ml); \
|
223
|
+
A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \
|
224
|
+
D0 = _mm256_xor_si256(D0, A0); \
|
225
|
+
D0 = rotr16(D0); \
|
226
|
+
\
|
227
|
+
ml = _mm256_mul_epu32(C0, D0); \
|
228
|
+
ml = _mm256_add_epi64(ml, ml); \
|
229
|
+
C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \
|
230
|
+
B0 = _mm256_xor_si256(B0, C0); \
|
231
|
+
B0 = rotr63(B0); \
|
232
|
+
\
|
233
|
+
ml = _mm256_mul_epu32(A1, B1); \
|
234
|
+
ml = _mm256_add_epi64(ml, ml); \
|
235
|
+
A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \
|
236
|
+
D1 = _mm256_xor_si256(D1, A1); \
|
237
|
+
D1 = rotr16(D1); \
|
238
|
+
\
|
239
|
+
ml = _mm256_mul_epu32(C1, D1); \
|
240
|
+
ml = _mm256_add_epi64(ml, ml); \
|
241
|
+
C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \
|
242
|
+
B1 = _mm256_xor_si256(B1, C1); \
|
243
|
+
B1 = rotr63(B1); \
|
244
|
+
} while((void)0, 0);
|
245
|
+
|
246
|
+
#define DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
|
247
|
+
do { \
|
248
|
+
B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \
|
249
|
+
C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
|
250
|
+
D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \
|
251
|
+
\
|
252
|
+
B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \
|
253
|
+
C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
|
254
|
+
D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \
|
255
|
+
} while((void)0, 0);
|
256
|
+
|
257
|
+
#define DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
258
|
+
do { \
|
259
|
+
__m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \
|
260
|
+
__m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \
|
261
|
+
B1 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
|
262
|
+
B0 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
|
263
|
+
\
|
264
|
+
tmp1 = C0; \
|
265
|
+
C0 = C1; \
|
266
|
+
C1 = tmp1; \
|
267
|
+
\
|
268
|
+
tmp1 = _mm256_blend_epi32(D0, D1, 0xCC); \
|
269
|
+
tmp2 = _mm256_blend_epi32(D0, D1, 0x33); \
|
270
|
+
D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
|
271
|
+
D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
|
272
|
+
} while(0);
|
273
|
+
|
274
|
+
#define UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
|
275
|
+
do { \
|
276
|
+
B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \
|
277
|
+
C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
|
278
|
+
D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \
|
279
|
+
\
|
280
|
+
B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \
|
281
|
+
C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
|
282
|
+
D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \
|
283
|
+
} while((void)0, 0);
|
284
|
+
|
285
|
+
#define UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
286
|
+
do { \
|
287
|
+
__m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \
|
288
|
+
__m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \
|
289
|
+
B0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
|
290
|
+
B1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
|
291
|
+
\
|
292
|
+
tmp1 = C0; \
|
293
|
+
C0 = C1; \
|
294
|
+
C1 = tmp1; \
|
295
|
+
\
|
296
|
+
tmp1 = _mm256_blend_epi32(D0, D1, 0x33); \
|
297
|
+
tmp2 = _mm256_blend_epi32(D0, D1, 0xCC); \
|
298
|
+
D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
|
299
|
+
D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
|
300
|
+
} while((void)0, 0);
|
301
|
+
|
302
|
+
#define BLAKE2_ROUND_1(A0, A1, B0, B1, C0, C1, D0, D1) \
|
303
|
+
do{ \
|
304
|
+
G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
305
|
+
G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
306
|
+
\
|
307
|
+
DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
|
308
|
+
\
|
309
|
+
G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
310
|
+
G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
311
|
+
\
|
312
|
+
UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
|
313
|
+
} while((void)0, 0);
|
314
|
+
|
315
|
+
#define BLAKE2_ROUND_2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
316
|
+
do{ \
|
317
|
+
G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
318
|
+
G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
319
|
+
\
|
320
|
+
DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
321
|
+
\
|
322
|
+
G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
323
|
+
G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
324
|
+
\
|
325
|
+
UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
326
|
+
} while((void)0, 0);
|
327
|
+
|
328
|
+
#endif /* __AVX2__ */
|
329
|
+
|
330
|
+
#else /* __AVX512F__ */
|
331
|
+
|
332
|
+
#include <immintrin.h>
|
333
|
+
|
334
|
+
#define ror64(x, n) _mm512_ror_epi64((x), (n))
|
335
|
+
|
336
|
+
static __m512i muladd(__m512i x, __m512i y)
|
337
|
+
{
|
338
|
+
__m512i z = _mm512_mul_epu32(x, y);
|
339
|
+
return _mm512_add_epi64(_mm512_add_epi64(x, y), _mm512_add_epi64(z, z));
|
340
|
+
}
|
341
|
+
|
342
|
+
#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \
|
343
|
+
do { \
|
344
|
+
A0 = muladd(A0, B0); \
|
345
|
+
A1 = muladd(A1, B1); \
|
346
|
+
\
|
347
|
+
D0 = _mm512_xor_si512(D0, A0); \
|
348
|
+
D1 = _mm512_xor_si512(D1, A1); \
|
349
|
+
\
|
350
|
+
D0 = ror64(D0, 32); \
|
351
|
+
D1 = ror64(D1, 32); \
|
352
|
+
\
|
353
|
+
C0 = muladd(C0, D0); \
|
354
|
+
C1 = muladd(C1, D1); \
|
355
|
+
\
|
356
|
+
B0 = _mm512_xor_si512(B0, C0); \
|
357
|
+
B1 = _mm512_xor_si512(B1, C1); \
|
358
|
+
\
|
359
|
+
B0 = ror64(B0, 24); \
|
360
|
+
B1 = ror64(B1, 24); \
|
361
|
+
} while ((void)0, 0)
|
362
|
+
|
363
|
+
#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \
|
364
|
+
do { \
|
365
|
+
A0 = muladd(A0, B0); \
|
366
|
+
A1 = muladd(A1, B1); \
|
367
|
+
\
|
368
|
+
D0 = _mm512_xor_si512(D0, A0); \
|
369
|
+
D1 = _mm512_xor_si512(D1, A1); \
|
370
|
+
\
|
371
|
+
D0 = ror64(D0, 16); \
|
372
|
+
D1 = ror64(D1, 16); \
|
373
|
+
\
|
374
|
+
C0 = muladd(C0, D0); \
|
375
|
+
C1 = muladd(C1, D1); \
|
376
|
+
\
|
377
|
+
B0 = _mm512_xor_si512(B0, C0); \
|
378
|
+
B1 = _mm512_xor_si512(B1, C1); \
|
379
|
+
\
|
380
|
+
B0 = ror64(B0, 63); \
|
381
|
+
B1 = ror64(B1, 63); \
|
382
|
+
} while ((void)0, 0)
|
383
|
+
|
384
|
+
#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
|
385
|
+
do { \
|
386
|
+
B0 = _mm512_permutex_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \
|
387
|
+
B1 = _mm512_permutex_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \
|
388
|
+
\
|
389
|
+
C0 = _mm512_permutex_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
|
390
|
+
C1 = _mm512_permutex_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
|
391
|
+
\
|
392
|
+
D0 = _mm512_permutex_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \
|
393
|
+
D1 = _mm512_permutex_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \
|
394
|
+
} while ((void)0, 0)
|
395
|
+
|
396
|
+
#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
|
397
|
+
do { \
|
398
|
+
B0 = _mm512_permutex_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \
|
399
|
+
B1 = _mm512_permutex_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \
|
400
|
+
\
|
401
|
+
C0 = _mm512_permutex_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
|
402
|
+
C1 = _mm512_permutex_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
|
403
|
+
\
|
404
|
+
D0 = _mm512_permutex_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \
|
405
|
+
D1 = _mm512_permutex_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \
|
406
|
+
} while ((void)0, 0)
|
407
|
+
|
408
|
+
#define BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1) \
|
409
|
+
do { \
|
410
|
+
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
|
411
|
+
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
|
412
|
+
\
|
413
|
+
DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
|
414
|
+
\
|
415
|
+
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
|
416
|
+
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
|
417
|
+
\
|
418
|
+
UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
|
419
|
+
} while ((void)0, 0)
|
420
|
+
|
421
|
+
#define SWAP_HALVES(A0, A1) \
|
422
|
+
do { \
|
423
|
+
__m512i t0, t1; \
|
424
|
+
t0 = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(1, 0, 1, 0)); \
|
425
|
+
t1 = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(3, 2, 3, 2)); \
|
426
|
+
A0 = t0; \
|
427
|
+
A1 = t1; \
|
428
|
+
} while((void)0, 0)
|
429
|
+
|
430
|
+
#define SWAP_QUARTERS(A0, A1) \
|
431
|
+
do { \
|
432
|
+
SWAP_HALVES(A0, A1); \
|
433
|
+
A0 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A0); \
|
434
|
+
A1 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A1); \
|
435
|
+
} while((void)0, 0)
|
436
|
+
|
437
|
+
#define UNSWAP_QUARTERS(A0, A1) \
|
438
|
+
do { \
|
439
|
+
A0 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A0); \
|
440
|
+
A1 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A1); \
|
441
|
+
SWAP_HALVES(A0, A1); \
|
442
|
+
} while((void)0, 0)
|
443
|
+
|
444
|
+
#define BLAKE2_ROUND_1(A0, C0, B0, D0, A1, C1, B1, D1) \
|
445
|
+
do { \
|
446
|
+
SWAP_HALVES(A0, B0); \
|
447
|
+
SWAP_HALVES(C0, D0); \
|
448
|
+
SWAP_HALVES(A1, B1); \
|
449
|
+
SWAP_HALVES(C1, D1); \
|
450
|
+
BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1); \
|
451
|
+
SWAP_HALVES(A0, B0); \
|
452
|
+
SWAP_HALVES(C0, D0); \
|
453
|
+
SWAP_HALVES(A1, B1); \
|
454
|
+
SWAP_HALVES(C1, D1); \
|
455
|
+
} while ((void)0, 0)
|
456
|
+
|
457
|
+
#define BLAKE2_ROUND_2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
458
|
+
do { \
|
459
|
+
SWAP_QUARTERS(A0, A1); \
|
460
|
+
SWAP_QUARTERS(B0, B1); \
|
461
|
+
SWAP_QUARTERS(C0, C1); \
|
462
|
+
SWAP_QUARTERS(D0, D1); \
|
463
|
+
BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1); \
|
464
|
+
UNSWAP_QUARTERS(A0, A1); \
|
465
|
+
UNSWAP_QUARTERS(B0, B1); \
|
466
|
+
UNSWAP_QUARTERS(C0, C1); \
|
467
|
+
UNSWAP_QUARTERS(D0, D1); \
|
468
|
+
} while ((void)0, 0)
|
469
|
+
|
470
|
+
#endif /* __AVX512F__ */
|
471
|
+
#endif /* BLAKE_ROUND_MKA_OPT_H */
|
@@ -0,0 +1,56 @@
|
|
1
|
+
/*
|
2
|
+
* Argon2 reference source code package - reference C implementations
|
3
|
+
*
|
4
|
+
* Copyright 2015
|
5
|
+
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
6
|
+
*
|
7
|
+
* You may use this work under the terms of a Creative Commons CC0 1.0
|
8
|
+
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
9
|
+
* these licenses can be found at:
|
10
|
+
*
|
11
|
+
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
12
|
+
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
13
|
+
*
|
14
|
+
* You should have received a copy of both of these licenses along with this
|
15
|
+
* software. If not, they may be obtained at the above URLs.
|
16
|
+
*/
|
17
|
+
|
18
|
+
#ifndef BLAKE_ROUND_MKA_H
|
19
|
+
#define BLAKE_ROUND_MKA_H
|
20
|
+
|
21
|
+
#include "blake2.h"
|
22
|
+
#include "blake2-impl.h"
|
23
|
+
|
24
|
+
/* designed by the Lyra PHC team */
|
25
|
+
static BLAKE2_INLINE uint64_t fBlaMka(uint64_t x, uint64_t y) {
|
26
|
+
const uint64_t m = UINT64_C(0xFFFFFFFF);
|
27
|
+
const uint64_t xy = (x & m) * (y & m);
|
28
|
+
return x + y + 2 * xy;
|
29
|
+
}
|
30
|
+
|
31
|
+
#define G(a, b, c, d) \
|
32
|
+
do { \
|
33
|
+
a = fBlaMka(a, b); \
|
34
|
+
d = rotr64(d ^ a, 32); \
|
35
|
+
c = fBlaMka(c, d); \
|
36
|
+
b = rotr64(b ^ c, 24); \
|
37
|
+
a = fBlaMka(a, b); \
|
38
|
+
d = rotr64(d ^ a, 16); \
|
39
|
+
c = fBlaMka(c, d); \
|
40
|
+
b = rotr64(b ^ c, 63); \
|
41
|
+
} while ((void)0, 0)
|
42
|
+
|
43
|
+
#define BLAKE2_ROUND_NOMSG(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, \
|
44
|
+
v12, v13, v14, v15) \
|
45
|
+
do { \
|
46
|
+
G(v0, v4, v8, v12); \
|
47
|
+
G(v1, v5, v9, v13); \
|
48
|
+
G(v2, v6, v10, v14); \
|
49
|
+
G(v3, v7, v11, v15); \
|
50
|
+
G(v0, v5, v10, v15); \
|
51
|
+
G(v1, v6, v11, v12); \
|
52
|
+
G(v2, v7, v8, v13); \
|
53
|
+
G(v3, v4, v9, v14); \
|
54
|
+
} while ((void)0, 0)
|
55
|
+
|
56
|
+
#endif
|