argon2 2.3.0 → 2.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +2 -2
- data/Steepfile +1 -1
- data/argon2.gemspec +1 -1
- data/lib/argon2/version.rb +1 -1
- metadata +5 -88
- data/ext/argon2_wrap/argon_wrap.o +0 -0
- data/ext/phc-winner-argon2/.git +0 -1
- data/ext/phc-winner-argon2/.gitattributes +0 -10
- data/ext/phc-winner-argon2/.gitignore +0 -22
- data/ext/phc-winner-argon2/.travis.yml +0 -25
- data/ext/phc-winner-argon2/Argon2.sln +0 -158
- data/ext/phc-winner-argon2/CHANGELOG.md +0 -32
- data/ext/phc-winner-argon2/LICENSE +0 -314
- data/ext/phc-winner-argon2/Makefile +0 -255
- data/ext/phc-winner-argon2/Package.swift +0 -46
- data/ext/phc-winner-argon2/README.md +0 -303
- data/ext/phc-winner-argon2/appveyor.yml +0 -25
- data/ext/phc-winner-argon2/argon2-specs.pdf +0 -0
- data/ext/phc-winner-argon2/export.sh +0 -7
- data/ext/phc-winner-argon2/include/argon2.h +0 -437
- data/ext/phc-winner-argon2/kats/argon2d +0 -12304
- data/ext/phc-winner-argon2/kats/argon2d.shasum +0 -1
- data/ext/phc-winner-argon2/kats/argon2d_v16 +0 -12304
- data/ext/phc-winner-argon2/kats/argon2d_v16.shasum +0 -1
- data/ext/phc-winner-argon2/kats/argon2i +0 -12304
- data/ext/phc-winner-argon2/kats/argon2i.shasum +0 -1
- data/ext/phc-winner-argon2/kats/argon2i_v16 +0 -12304
- data/ext/phc-winner-argon2/kats/argon2i_v16.shasum +0 -1
- data/ext/phc-winner-argon2/kats/argon2id +0 -12304
- data/ext/phc-winner-argon2/kats/argon2id.shasum +0 -1
- data/ext/phc-winner-argon2/kats/argon2id_v16 +0 -12304
- data/ext/phc-winner-argon2/kats/argon2id_v16.shasum +0 -1
- data/ext/phc-winner-argon2/kats/check-sums.ps1 +0 -42
- data/ext/phc-winner-argon2/kats/check-sums.sh +0 -13
- data/ext/phc-winner-argon2/kats/test.ps1 +0 -50
- data/ext/phc-winner-argon2/kats/test.sh +0 -49
- data/ext/phc-winner-argon2/latex/IEEEtran.cls +0 -6347
- data/ext/phc-winner-argon2/latex/Makefile +0 -18
- data/ext/phc-winner-argon2/latex/argon2-specs.tex +0 -920
- data/ext/phc-winner-argon2/latex/pics/argon2-par.pdf +0 -0
- data/ext/phc-winner-argon2/latex/pics/compression.pdf +0 -0
- data/ext/phc-winner-argon2/latex/pics/generic.pdf +0 -0
- data/ext/phc-winner-argon2/latex/pics/power-distribution.jpg +0 -0
- data/ext/phc-winner-argon2/latex/tradeoff.bib +0 -822
- data/ext/phc-winner-argon2/libargon2.pc.in +0 -18
- data/ext/phc-winner-argon2/man/argon2.1 +0 -57
- data/ext/phc-winner-argon2/src/argon2.c +0 -452
- data/ext/phc-winner-argon2/src/bench.c +0 -111
- data/ext/phc-winner-argon2/src/blake2/blake2-impl.h +0 -156
- data/ext/phc-winner-argon2/src/blake2/blake2.h +0 -89
- data/ext/phc-winner-argon2/src/blake2/blake2b.c +0 -390
- data/ext/phc-winner-argon2/src/blake2/blamka-round-opt.h +0 -471
- data/ext/phc-winner-argon2/src/blake2/blamka-round-ref.h +0 -56
- data/ext/phc-winner-argon2/src/core.c +0 -648
- data/ext/phc-winner-argon2/src/core.h +0 -228
- data/ext/phc-winner-argon2/src/encoding.c +0 -463
- data/ext/phc-winner-argon2/src/encoding.h +0 -57
- data/ext/phc-winner-argon2/src/genkat.c +0 -213
- data/ext/phc-winner-argon2/src/genkat.h +0 -51
- data/ext/phc-winner-argon2/src/opt.c +0 -283
- data/ext/phc-winner-argon2/src/ref.c +0 -194
- data/ext/phc-winner-argon2/src/run.c +0 -337
- data/ext/phc-winner-argon2/src/test.c +0 -289
- data/ext/phc-winner-argon2/src/thread.c +0 -57
- data/ext/phc-winner-argon2/src/thread.h +0 -67
- data/ext/phc-winner-argon2/vs2015/Argon2Opt/Argon2Opt.vcxproj +0 -231
- data/ext/phc-winner-argon2/vs2015/Argon2Opt/Argon2Opt.vcxproj.filters +0 -69
- data/ext/phc-winner-argon2/vs2015/Argon2OptBench/Argon2OptBench.vcxproj +0 -231
- data/ext/phc-winner-argon2/vs2015/Argon2OptBench/Argon2OptBench.vcxproj.filters +0 -69
- data/ext/phc-winner-argon2/vs2015/Argon2OptDll/Argon2OptDll.vcxproj +0 -230
- data/ext/phc-winner-argon2/vs2015/Argon2OptDll/Argon2OptDll.vcxproj.filters +0 -66
- data/ext/phc-winner-argon2/vs2015/Argon2OptGenKAT/Argon2OptGenKAT.vcxproj +0 -244
- data/ext/phc-winner-argon2/vs2015/Argon2OptGenKAT/Argon2OptGenKAT.vcxproj.filters +0 -72
- data/ext/phc-winner-argon2/vs2015/Argon2OptTestCI/Argon2OptTestCI.vcxproj +0 -235
- data/ext/phc-winner-argon2/vs2015/Argon2OptTestCI/Argon2OptTestCI.vcxproj.filters +0 -69
- data/ext/phc-winner-argon2/vs2015/Argon2Ref/Argon2Ref.vcxproj +0 -243
- data/ext/phc-winner-argon2/vs2015/Argon2Ref/Argon2Ref.vcxproj.filters +0 -69
- data/ext/phc-winner-argon2/vs2015/Argon2RefBench/Argon2RefBench.vcxproj +0 -231
- data/ext/phc-winner-argon2/vs2015/Argon2RefBench/Argon2RefBench.vcxproj.filters +0 -69
- data/ext/phc-winner-argon2/vs2015/Argon2RefDll/Argon2RefDll.vcxproj +0 -230
- data/ext/phc-winner-argon2/vs2015/Argon2RefDll/Argon2RefDll.vcxproj.filters +0 -66
- data/ext/phc-winner-argon2/vs2015/Argon2RefGenKAT/Argon2RefGenKAT.vcxproj +0 -232
- data/ext/phc-winner-argon2/vs2015/Argon2RefGenKAT/Argon2RefGenKAT.vcxproj.filters +0 -72
- data/ext/phc-winner-argon2/vs2015/Argon2RefTestCI/Argon2RefTestCI.vcxproj +0 -231
- data/ext/phc-winner-argon2/vs2015/Argon2RefTestCI/Argon2RefTestCI.vcxproj.filters +0 -69
@@ -1,471 +0,0 @@
|
|
1
|
-
/*
|
2
|
-
* Argon2 reference source code package - reference C implementations
|
3
|
-
*
|
4
|
-
* Copyright 2015
|
5
|
-
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
6
|
-
*
|
7
|
-
* You may use this work under the terms of a Creative Commons CC0 1.0
|
8
|
-
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
9
|
-
* these licenses can be found at:
|
10
|
-
*
|
11
|
-
* - CC0 1.0 Universal : https://creativecommons.org/publicdomain/zero/1.0
|
12
|
-
* - Apache 2.0 : https://www.apache.org/licenses/LICENSE-2.0
|
13
|
-
*
|
14
|
-
* You should have received a copy of both of these licenses along with this
|
15
|
-
* software. If not, they may be obtained at the above URLs.
|
16
|
-
*/
|
17
|
-
|
18
|
-
#ifndef BLAKE_ROUND_MKA_OPT_H
|
19
|
-
#define BLAKE_ROUND_MKA_OPT_H
|
20
|
-
|
21
|
-
#include "blake2-impl.h"
|
22
|
-
|
23
|
-
#include <emmintrin.h>
|
24
|
-
#if defined(__SSSE3__)
|
25
|
-
#include <tmmintrin.h> /* for _mm_shuffle_epi8 and _mm_alignr_epi8 */
|
26
|
-
#endif
|
27
|
-
|
28
|
-
#if defined(__XOP__) && (defined(__GNUC__) || defined(__clang__))
|
29
|
-
#include <x86intrin.h>
|
30
|
-
#endif
|
31
|
-
|
32
|
-
#if !defined(__AVX512F__)
|
33
|
-
#if !defined(__AVX2__)
|
34
|
-
#if !defined(__XOP__)
|
35
|
-
#if defined(__SSSE3__)
|
36
|
-
#define r16 \
|
37
|
-
(_mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9))
|
38
|
-
#define r24 \
|
39
|
-
(_mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10))
|
40
|
-
#define _mm_roti_epi64(x, c) \
|
41
|
-
(-(c) == 32) \
|
42
|
-
? _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) \
|
43
|
-
: (-(c) == 24) \
|
44
|
-
? _mm_shuffle_epi8((x), r24) \
|
45
|
-
: (-(c) == 16) \
|
46
|
-
? _mm_shuffle_epi8((x), r16) \
|
47
|
-
: (-(c) == 63) \
|
48
|
-
? _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
|
49
|
-
_mm_add_epi64((x), (x))) \
|
50
|
-
: _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
|
51
|
-
_mm_slli_epi64((x), 64 - (-(c))))
|
52
|
-
#else /* defined(__SSE2__) */
|
53
|
-
#define _mm_roti_epi64(r, c) \
|
54
|
-
_mm_xor_si128(_mm_srli_epi64((r), -(c)), _mm_slli_epi64((r), 64 - (-(c))))
|
55
|
-
#endif
|
56
|
-
#else
|
57
|
-
#endif
|
58
|
-
|
59
|
-
static BLAKE2_INLINE __m128i fBlaMka(__m128i x, __m128i y) {
|
60
|
-
const __m128i z = _mm_mul_epu32(x, y);
|
61
|
-
return _mm_add_epi64(_mm_add_epi64(x, y), _mm_add_epi64(z, z));
|
62
|
-
}
|
63
|
-
|
64
|
-
#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \
|
65
|
-
do { \
|
66
|
-
A0 = fBlaMka(A0, B0); \
|
67
|
-
A1 = fBlaMka(A1, B1); \
|
68
|
-
\
|
69
|
-
D0 = _mm_xor_si128(D0, A0); \
|
70
|
-
D1 = _mm_xor_si128(D1, A1); \
|
71
|
-
\
|
72
|
-
D0 = _mm_roti_epi64(D0, -32); \
|
73
|
-
D1 = _mm_roti_epi64(D1, -32); \
|
74
|
-
\
|
75
|
-
C0 = fBlaMka(C0, D0); \
|
76
|
-
C1 = fBlaMka(C1, D1); \
|
77
|
-
\
|
78
|
-
B0 = _mm_xor_si128(B0, C0); \
|
79
|
-
B1 = _mm_xor_si128(B1, C1); \
|
80
|
-
\
|
81
|
-
B0 = _mm_roti_epi64(B0, -24); \
|
82
|
-
B1 = _mm_roti_epi64(B1, -24); \
|
83
|
-
} while ((void)0, 0)
|
84
|
-
|
85
|
-
#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \
|
86
|
-
do { \
|
87
|
-
A0 = fBlaMka(A0, B0); \
|
88
|
-
A1 = fBlaMka(A1, B1); \
|
89
|
-
\
|
90
|
-
D0 = _mm_xor_si128(D0, A0); \
|
91
|
-
D1 = _mm_xor_si128(D1, A1); \
|
92
|
-
\
|
93
|
-
D0 = _mm_roti_epi64(D0, -16); \
|
94
|
-
D1 = _mm_roti_epi64(D1, -16); \
|
95
|
-
\
|
96
|
-
C0 = fBlaMka(C0, D0); \
|
97
|
-
C1 = fBlaMka(C1, D1); \
|
98
|
-
\
|
99
|
-
B0 = _mm_xor_si128(B0, C0); \
|
100
|
-
B1 = _mm_xor_si128(B1, C1); \
|
101
|
-
\
|
102
|
-
B0 = _mm_roti_epi64(B0, -63); \
|
103
|
-
B1 = _mm_roti_epi64(B1, -63); \
|
104
|
-
} while ((void)0, 0)
|
105
|
-
|
106
|
-
#if defined(__SSSE3__)
|
107
|
-
#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
|
108
|
-
do { \
|
109
|
-
__m128i t0 = _mm_alignr_epi8(B1, B0, 8); \
|
110
|
-
__m128i t1 = _mm_alignr_epi8(B0, B1, 8); \
|
111
|
-
B0 = t0; \
|
112
|
-
B1 = t1; \
|
113
|
-
\
|
114
|
-
t0 = C0; \
|
115
|
-
C0 = C1; \
|
116
|
-
C1 = t0; \
|
117
|
-
\
|
118
|
-
t0 = _mm_alignr_epi8(D1, D0, 8); \
|
119
|
-
t1 = _mm_alignr_epi8(D0, D1, 8); \
|
120
|
-
D0 = t1; \
|
121
|
-
D1 = t0; \
|
122
|
-
} while ((void)0, 0)
|
123
|
-
|
124
|
-
#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
|
125
|
-
do { \
|
126
|
-
__m128i t0 = _mm_alignr_epi8(B0, B1, 8); \
|
127
|
-
__m128i t1 = _mm_alignr_epi8(B1, B0, 8); \
|
128
|
-
B0 = t0; \
|
129
|
-
B1 = t1; \
|
130
|
-
\
|
131
|
-
t0 = C0; \
|
132
|
-
C0 = C1; \
|
133
|
-
C1 = t0; \
|
134
|
-
\
|
135
|
-
t0 = _mm_alignr_epi8(D0, D1, 8); \
|
136
|
-
t1 = _mm_alignr_epi8(D1, D0, 8); \
|
137
|
-
D0 = t1; \
|
138
|
-
D1 = t0; \
|
139
|
-
} while ((void)0, 0)
|
140
|
-
#else /* SSE2 */
|
141
|
-
#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
|
142
|
-
do { \
|
143
|
-
__m128i t0 = D0; \
|
144
|
-
__m128i t1 = B0; \
|
145
|
-
D0 = C0; \
|
146
|
-
C0 = C1; \
|
147
|
-
C1 = D0; \
|
148
|
-
D0 = _mm_unpackhi_epi64(D1, _mm_unpacklo_epi64(t0, t0)); \
|
149
|
-
D1 = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(D1, D1)); \
|
150
|
-
B0 = _mm_unpackhi_epi64(B0, _mm_unpacklo_epi64(B1, B1)); \
|
151
|
-
B1 = _mm_unpackhi_epi64(B1, _mm_unpacklo_epi64(t1, t1)); \
|
152
|
-
} while ((void)0, 0)
|
153
|
-
|
154
|
-
#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
|
155
|
-
do { \
|
156
|
-
__m128i t0, t1; \
|
157
|
-
t0 = C0; \
|
158
|
-
C0 = C1; \
|
159
|
-
C1 = t0; \
|
160
|
-
t0 = B0; \
|
161
|
-
t1 = D0; \
|
162
|
-
B0 = _mm_unpackhi_epi64(B1, _mm_unpacklo_epi64(B0, B0)); \
|
163
|
-
B1 = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(B1, B1)); \
|
164
|
-
D0 = _mm_unpackhi_epi64(D0, _mm_unpacklo_epi64(D1, D1)); \
|
165
|
-
D1 = _mm_unpackhi_epi64(D1, _mm_unpacklo_epi64(t1, t1)); \
|
166
|
-
} while ((void)0, 0)
|
167
|
-
#endif
|
168
|
-
|
169
|
-
#define BLAKE2_ROUND(A0, A1, B0, B1, C0, C1, D0, D1) \
|
170
|
-
do { \
|
171
|
-
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
|
172
|
-
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
|
173
|
-
\
|
174
|
-
DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
|
175
|
-
\
|
176
|
-
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
|
177
|
-
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
|
178
|
-
\
|
179
|
-
UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
|
180
|
-
} while ((void)0, 0)
|
181
|
-
#else /* __AVX2__ */
|
182
|
-
|
183
|
-
#include <immintrin.h>
|
184
|
-
|
185
|
-
#define rotr32(x) _mm256_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1))
|
186
|
-
#define rotr24(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10, 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10))
|
187
|
-
#define rotr16(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9, 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9))
|
188
|
-
#define rotr63(x) _mm256_xor_si256(_mm256_srli_epi64((x), 63), _mm256_add_epi64((x), (x)))
|
189
|
-
|
190
|
-
#define G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
191
|
-
do { \
|
192
|
-
__m256i ml = _mm256_mul_epu32(A0, B0); \
|
193
|
-
ml = _mm256_add_epi64(ml, ml); \
|
194
|
-
A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \
|
195
|
-
D0 = _mm256_xor_si256(D0, A0); \
|
196
|
-
D0 = rotr32(D0); \
|
197
|
-
\
|
198
|
-
ml = _mm256_mul_epu32(C0, D0); \
|
199
|
-
ml = _mm256_add_epi64(ml, ml); \
|
200
|
-
C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \
|
201
|
-
\
|
202
|
-
B0 = _mm256_xor_si256(B0, C0); \
|
203
|
-
B0 = rotr24(B0); \
|
204
|
-
\
|
205
|
-
ml = _mm256_mul_epu32(A1, B1); \
|
206
|
-
ml = _mm256_add_epi64(ml, ml); \
|
207
|
-
A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \
|
208
|
-
D1 = _mm256_xor_si256(D1, A1); \
|
209
|
-
D1 = rotr32(D1); \
|
210
|
-
\
|
211
|
-
ml = _mm256_mul_epu32(C1, D1); \
|
212
|
-
ml = _mm256_add_epi64(ml, ml); \
|
213
|
-
C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \
|
214
|
-
\
|
215
|
-
B1 = _mm256_xor_si256(B1, C1); \
|
216
|
-
B1 = rotr24(B1); \
|
217
|
-
} while((void)0, 0);
|
218
|
-
|
219
|
-
#define G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
220
|
-
do { \
|
221
|
-
__m256i ml = _mm256_mul_epu32(A0, B0); \
|
222
|
-
ml = _mm256_add_epi64(ml, ml); \
|
223
|
-
A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \
|
224
|
-
D0 = _mm256_xor_si256(D0, A0); \
|
225
|
-
D0 = rotr16(D0); \
|
226
|
-
\
|
227
|
-
ml = _mm256_mul_epu32(C0, D0); \
|
228
|
-
ml = _mm256_add_epi64(ml, ml); \
|
229
|
-
C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \
|
230
|
-
B0 = _mm256_xor_si256(B0, C0); \
|
231
|
-
B0 = rotr63(B0); \
|
232
|
-
\
|
233
|
-
ml = _mm256_mul_epu32(A1, B1); \
|
234
|
-
ml = _mm256_add_epi64(ml, ml); \
|
235
|
-
A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \
|
236
|
-
D1 = _mm256_xor_si256(D1, A1); \
|
237
|
-
D1 = rotr16(D1); \
|
238
|
-
\
|
239
|
-
ml = _mm256_mul_epu32(C1, D1); \
|
240
|
-
ml = _mm256_add_epi64(ml, ml); \
|
241
|
-
C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \
|
242
|
-
B1 = _mm256_xor_si256(B1, C1); \
|
243
|
-
B1 = rotr63(B1); \
|
244
|
-
} while((void)0, 0);
|
245
|
-
|
246
|
-
#define DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
|
247
|
-
do { \
|
248
|
-
B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \
|
249
|
-
C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
|
250
|
-
D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \
|
251
|
-
\
|
252
|
-
B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \
|
253
|
-
C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
|
254
|
-
D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \
|
255
|
-
} while((void)0, 0);
|
256
|
-
|
257
|
-
#define DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
258
|
-
do { \
|
259
|
-
__m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \
|
260
|
-
__m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \
|
261
|
-
B1 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
|
262
|
-
B0 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
|
263
|
-
\
|
264
|
-
tmp1 = C0; \
|
265
|
-
C0 = C1; \
|
266
|
-
C1 = tmp1; \
|
267
|
-
\
|
268
|
-
tmp1 = _mm256_blend_epi32(D0, D1, 0xCC); \
|
269
|
-
tmp2 = _mm256_blend_epi32(D0, D1, 0x33); \
|
270
|
-
D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
|
271
|
-
D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
|
272
|
-
} while(0);
|
273
|
-
|
274
|
-
#define UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
|
275
|
-
do { \
|
276
|
-
B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \
|
277
|
-
C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
|
278
|
-
D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \
|
279
|
-
\
|
280
|
-
B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \
|
281
|
-
C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
|
282
|
-
D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \
|
283
|
-
} while((void)0, 0);
|
284
|
-
|
285
|
-
#define UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
286
|
-
do { \
|
287
|
-
__m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \
|
288
|
-
__m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \
|
289
|
-
B0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
|
290
|
-
B1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
|
291
|
-
\
|
292
|
-
tmp1 = C0; \
|
293
|
-
C0 = C1; \
|
294
|
-
C1 = tmp1; \
|
295
|
-
\
|
296
|
-
tmp1 = _mm256_blend_epi32(D0, D1, 0x33); \
|
297
|
-
tmp2 = _mm256_blend_epi32(D0, D1, 0xCC); \
|
298
|
-
D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
|
299
|
-
D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
|
300
|
-
} while((void)0, 0);
|
301
|
-
|
302
|
-
#define BLAKE2_ROUND_1(A0, A1, B0, B1, C0, C1, D0, D1) \
|
303
|
-
do{ \
|
304
|
-
G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
305
|
-
G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
306
|
-
\
|
307
|
-
DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
|
308
|
-
\
|
309
|
-
G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
310
|
-
G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
311
|
-
\
|
312
|
-
UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
|
313
|
-
} while((void)0, 0);
|
314
|
-
|
315
|
-
#define BLAKE2_ROUND_2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
316
|
-
do{ \
|
317
|
-
G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
318
|
-
G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
319
|
-
\
|
320
|
-
DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
321
|
-
\
|
322
|
-
G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
323
|
-
G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
324
|
-
\
|
325
|
-
UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
326
|
-
} while((void)0, 0);
|
327
|
-
|
328
|
-
#endif /* __AVX2__ */
|
329
|
-
|
330
|
-
#else /* __AVX512F__ */
|
331
|
-
|
332
|
-
#include <immintrin.h>
|
333
|
-
|
334
|
-
#define ror64(x, n) _mm512_ror_epi64((x), (n))
|
335
|
-
|
336
|
-
static __m512i muladd(__m512i x, __m512i y)
|
337
|
-
{
|
338
|
-
__m512i z = _mm512_mul_epu32(x, y);
|
339
|
-
return _mm512_add_epi64(_mm512_add_epi64(x, y), _mm512_add_epi64(z, z));
|
340
|
-
}
|
341
|
-
|
342
|
-
#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \
|
343
|
-
do { \
|
344
|
-
A0 = muladd(A0, B0); \
|
345
|
-
A1 = muladd(A1, B1); \
|
346
|
-
\
|
347
|
-
D0 = _mm512_xor_si512(D0, A0); \
|
348
|
-
D1 = _mm512_xor_si512(D1, A1); \
|
349
|
-
\
|
350
|
-
D0 = ror64(D0, 32); \
|
351
|
-
D1 = ror64(D1, 32); \
|
352
|
-
\
|
353
|
-
C0 = muladd(C0, D0); \
|
354
|
-
C1 = muladd(C1, D1); \
|
355
|
-
\
|
356
|
-
B0 = _mm512_xor_si512(B0, C0); \
|
357
|
-
B1 = _mm512_xor_si512(B1, C1); \
|
358
|
-
\
|
359
|
-
B0 = ror64(B0, 24); \
|
360
|
-
B1 = ror64(B1, 24); \
|
361
|
-
} while ((void)0, 0)
|
362
|
-
|
363
|
-
#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \
|
364
|
-
do { \
|
365
|
-
A0 = muladd(A0, B0); \
|
366
|
-
A1 = muladd(A1, B1); \
|
367
|
-
\
|
368
|
-
D0 = _mm512_xor_si512(D0, A0); \
|
369
|
-
D1 = _mm512_xor_si512(D1, A1); \
|
370
|
-
\
|
371
|
-
D0 = ror64(D0, 16); \
|
372
|
-
D1 = ror64(D1, 16); \
|
373
|
-
\
|
374
|
-
C0 = muladd(C0, D0); \
|
375
|
-
C1 = muladd(C1, D1); \
|
376
|
-
\
|
377
|
-
B0 = _mm512_xor_si512(B0, C0); \
|
378
|
-
B1 = _mm512_xor_si512(B1, C1); \
|
379
|
-
\
|
380
|
-
B0 = ror64(B0, 63); \
|
381
|
-
B1 = ror64(B1, 63); \
|
382
|
-
} while ((void)0, 0)
|
383
|
-
|
384
|
-
#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
|
385
|
-
do { \
|
386
|
-
B0 = _mm512_permutex_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \
|
387
|
-
B1 = _mm512_permutex_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \
|
388
|
-
\
|
389
|
-
C0 = _mm512_permutex_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
|
390
|
-
C1 = _mm512_permutex_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
|
391
|
-
\
|
392
|
-
D0 = _mm512_permutex_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \
|
393
|
-
D1 = _mm512_permutex_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \
|
394
|
-
} while ((void)0, 0)
|
395
|
-
|
396
|
-
#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
|
397
|
-
do { \
|
398
|
-
B0 = _mm512_permutex_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \
|
399
|
-
B1 = _mm512_permutex_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \
|
400
|
-
\
|
401
|
-
C0 = _mm512_permutex_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
|
402
|
-
C1 = _mm512_permutex_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
|
403
|
-
\
|
404
|
-
D0 = _mm512_permutex_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \
|
405
|
-
D1 = _mm512_permutex_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \
|
406
|
-
} while ((void)0, 0)
|
407
|
-
|
408
|
-
#define BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1) \
|
409
|
-
do { \
|
410
|
-
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
|
411
|
-
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
|
412
|
-
\
|
413
|
-
DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
|
414
|
-
\
|
415
|
-
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
|
416
|
-
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
|
417
|
-
\
|
418
|
-
UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
|
419
|
-
} while ((void)0, 0)
|
420
|
-
|
421
|
-
#define SWAP_HALVES(A0, A1) \
|
422
|
-
do { \
|
423
|
-
__m512i t0, t1; \
|
424
|
-
t0 = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(1, 0, 1, 0)); \
|
425
|
-
t1 = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(3, 2, 3, 2)); \
|
426
|
-
A0 = t0; \
|
427
|
-
A1 = t1; \
|
428
|
-
} while((void)0, 0)
|
429
|
-
|
430
|
-
#define SWAP_QUARTERS(A0, A1) \
|
431
|
-
do { \
|
432
|
-
SWAP_HALVES(A0, A1); \
|
433
|
-
A0 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A0); \
|
434
|
-
A1 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A1); \
|
435
|
-
} while((void)0, 0)
|
436
|
-
|
437
|
-
#define UNSWAP_QUARTERS(A0, A1) \
|
438
|
-
do { \
|
439
|
-
A0 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A0); \
|
440
|
-
A1 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A1); \
|
441
|
-
SWAP_HALVES(A0, A1); \
|
442
|
-
} while((void)0, 0)
|
443
|
-
|
444
|
-
#define BLAKE2_ROUND_1(A0, C0, B0, D0, A1, C1, B1, D1) \
|
445
|
-
do { \
|
446
|
-
SWAP_HALVES(A0, B0); \
|
447
|
-
SWAP_HALVES(C0, D0); \
|
448
|
-
SWAP_HALVES(A1, B1); \
|
449
|
-
SWAP_HALVES(C1, D1); \
|
450
|
-
BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1); \
|
451
|
-
SWAP_HALVES(A0, B0); \
|
452
|
-
SWAP_HALVES(C0, D0); \
|
453
|
-
SWAP_HALVES(A1, B1); \
|
454
|
-
SWAP_HALVES(C1, D1); \
|
455
|
-
} while ((void)0, 0)
|
456
|
-
|
457
|
-
#define BLAKE2_ROUND_2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
458
|
-
do { \
|
459
|
-
SWAP_QUARTERS(A0, A1); \
|
460
|
-
SWAP_QUARTERS(B0, B1); \
|
461
|
-
SWAP_QUARTERS(C0, C1); \
|
462
|
-
SWAP_QUARTERS(D0, D1); \
|
463
|
-
BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1); \
|
464
|
-
UNSWAP_QUARTERS(A0, A1); \
|
465
|
-
UNSWAP_QUARTERS(B0, B1); \
|
466
|
-
UNSWAP_QUARTERS(C0, C1); \
|
467
|
-
UNSWAP_QUARTERS(D0, D1); \
|
468
|
-
} while ((void)0, 0)
|
469
|
-
|
470
|
-
#endif /* __AVX512F__ */
|
471
|
-
#endif /* BLAKE_ROUND_MKA_OPT_H */
|
@@ -1,56 +0,0 @@
|
|
1
|
-
/*
|
2
|
-
* Argon2 reference source code package - reference C implementations
|
3
|
-
*
|
4
|
-
* Copyright 2015
|
5
|
-
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
6
|
-
*
|
7
|
-
* You may use this work under the terms of a Creative Commons CC0 1.0
|
8
|
-
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
9
|
-
* these licenses can be found at:
|
10
|
-
*
|
11
|
-
* - CC0 1.0 Universal : https://creativecommons.org/publicdomain/zero/1.0
|
12
|
-
* - Apache 2.0 : https://www.apache.org/licenses/LICENSE-2.0
|
13
|
-
*
|
14
|
-
* You should have received a copy of both of these licenses along with this
|
15
|
-
* software. If not, they may be obtained at the above URLs.
|
16
|
-
*/
|
17
|
-
|
18
|
-
#ifndef BLAKE_ROUND_MKA_H
|
19
|
-
#define BLAKE_ROUND_MKA_H
|
20
|
-
|
21
|
-
#include "blake2.h"
|
22
|
-
#include "blake2-impl.h"
|
23
|
-
|
24
|
-
/* designed by the Lyra PHC team */
|
25
|
-
static BLAKE2_INLINE uint64_t fBlaMka(uint64_t x, uint64_t y) {
|
26
|
-
const uint64_t m = UINT64_C(0xFFFFFFFF);
|
27
|
-
const uint64_t xy = (x & m) * (y & m);
|
28
|
-
return x + y + 2 * xy;
|
29
|
-
}
|
30
|
-
|
31
|
-
#define G(a, b, c, d) \
|
32
|
-
do { \
|
33
|
-
a = fBlaMka(a, b); \
|
34
|
-
d = rotr64(d ^ a, 32); \
|
35
|
-
c = fBlaMka(c, d); \
|
36
|
-
b = rotr64(b ^ c, 24); \
|
37
|
-
a = fBlaMka(a, b); \
|
38
|
-
d = rotr64(d ^ a, 16); \
|
39
|
-
c = fBlaMka(c, d); \
|
40
|
-
b = rotr64(b ^ c, 63); \
|
41
|
-
} while ((void)0, 0)
|
42
|
-
|
43
|
-
#define BLAKE2_ROUND_NOMSG(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, \
|
44
|
-
v12, v13, v14, v15) \
|
45
|
-
do { \
|
46
|
-
G(v0, v4, v8, v12); \
|
47
|
-
G(v1, v5, v9, v13); \
|
48
|
-
G(v2, v6, v10, v14); \
|
49
|
-
G(v3, v7, v11, v15); \
|
50
|
-
G(v0, v5, v10, v15); \
|
51
|
-
G(v1, v6, v11, v12); \
|
52
|
-
G(v2, v7, v8, v13); \
|
53
|
-
G(v3, v4, v9, v14); \
|
54
|
-
} while ((void)0, 0)
|
55
|
-
|
56
|
-
#endif
|