argon2 2.3.0 → 2.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +2 -2
  3. data/Steepfile +1 -1
  4. data/argon2.gemspec +1 -1
  5. data/lib/argon2/version.rb +1 -1
  6. metadata +5 -88
  7. data/ext/argon2_wrap/argon_wrap.o +0 -0
  8. data/ext/phc-winner-argon2/.git +0 -1
  9. data/ext/phc-winner-argon2/.gitattributes +0 -10
  10. data/ext/phc-winner-argon2/.gitignore +0 -22
  11. data/ext/phc-winner-argon2/.travis.yml +0 -25
  12. data/ext/phc-winner-argon2/Argon2.sln +0 -158
  13. data/ext/phc-winner-argon2/CHANGELOG.md +0 -32
  14. data/ext/phc-winner-argon2/LICENSE +0 -314
  15. data/ext/phc-winner-argon2/Makefile +0 -255
  16. data/ext/phc-winner-argon2/Package.swift +0 -46
  17. data/ext/phc-winner-argon2/README.md +0 -303
  18. data/ext/phc-winner-argon2/appveyor.yml +0 -25
  19. data/ext/phc-winner-argon2/argon2-specs.pdf +0 -0
  20. data/ext/phc-winner-argon2/export.sh +0 -7
  21. data/ext/phc-winner-argon2/include/argon2.h +0 -437
  22. data/ext/phc-winner-argon2/kats/argon2d +0 -12304
  23. data/ext/phc-winner-argon2/kats/argon2d.shasum +0 -1
  24. data/ext/phc-winner-argon2/kats/argon2d_v16 +0 -12304
  25. data/ext/phc-winner-argon2/kats/argon2d_v16.shasum +0 -1
  26. data/ext/phc-winner-argon2/kats/argon2i +0 -12304
  27. data/ext/phc-winner-argon2/kats/argon2i.shasum +0 -1
  28. data/ext/phc-winner-argon2/kats/argon2i_v16 +0 -12304
  29. data/ext/phc-winner-argon2/kats/argon2i_v16.shasum +0 -1
  30. data/ext/phc-winner-argon2/kats/argon2id +0 -12304
  31. data/ext/phc-winner-argon2/kats/argon2id.shasum +0 -1
  32. data/ext/phc-winner-argon2/kats/argon2id_v16 +0 -12304
  33. data/ext/phc-winner-argon2/kats/argon2id_v16.shasum +0 -1
  34. data/ext/phc-winner-argon2/kats/check-sums.ps1 +0 -42
  35. data/ext/phc-winner-argon2/kats/check-sums.sh +0 -13
  36. data/ext/phc-winner-argon2/kats/test.ps1 +0 -50
  37. data/ext/phc-winner-argon2/kats/test.sh +0 -49
  38. data/ext/phc-winner-argon2/latex/IEEEtran.cls +0 -6347
  39. data/ext/phc-winner-argon2/latex/Makefile +0 -18
  40. data/ext/phc-winner-argon2/latex/argon2-specs.tex +0 -920
  41. data/ext/phc-winner-argon2/latex/pics/argon2-par.pdf +0 -0
  42. data/ext/phc-winner-argon2/latex/pics/compression.pdf +0 -0
  43. data/ext/phc-winner-argon2/latex/pics/generic.pdf +0 -0
  44. data/ext/phc-winner-argon2/latex/pics/power-distribution.jpg +0 -0
  45. data/ext/phc-winner-argon2/latex/tradeoff.bib +0 -822
  46. data/ext/phc-winner-argon2/libargon2.pc.in +0 -18
  47. data/ext/phc-winner-argon2/man/argon2.1 +0 -57
  48. data/ext/phc-winner-argon2/src/argon2.c +0 -452
  49. data/ext/phc-winner-argon2/src/bench.c +0 -111
  50. data/ext/phc-winner-argon2/src/blake2/blake2-impl.h +0 -156
  51. data/ext/phc-winner-argon2/src/blake2/blake2.h +0 -89
  52. data/ext/phc-winner-argon2/src/blake2/blake2b.c +0 -390
  53. data/ext/phc-winner-argon2/src/blake2/blamka-round-opt.h +0 -471
  54. data/ext/phc-winner-argon2/src/blake2/blamka-round-ref.h +0 -56
  55. data/ext/phc-winner-argon2/src/core.c +0 -648
  56. data/ext/phc-winner-argon2/src/core.h +0 -228
  57. data/ext/phc-winner-argon2/src/encoding.c +0 -463
  58. data/ext/phc-winner-argon2/src/encoding.h +0 -57
  59. data/ext/phc-winner-argon2/src/genkat.c +0 -213
  60. data/ext/phc-winner-argon2/src/genkat.h +0 -51
  61. data/ext/phc-winner-argon2/src/opt.c +0 -283
  62. data/ext/phc-winner-argon2/src/ref.c +0 -194
  63. data/ext/phc-winner-argon2/src/run.c +0 -337
  64. data/ext/phc-winner-argon2/src/test.c +0 -289
  65. data/ext/phc-winner-argon2/src/thread.c +0 -57
  66. data/ext/phc-winner-argon2/src/thread.h +0 -67
  67. data/ext/phc-winner-argon2/vs2015/Argon2Opt/Argon2Opt.vcxproj +0 -231
  68. data/ext/phc-winner-argon2/vs2015/Argon2Opt/Argon2Opt.vcxproj.filters +0 -69
  69. data/ext/phc-winner-argon2/vs2015/Argon2OptBench/Argon2OptBench.vcxproj +0 -231
  70. data/ext/phc-winner-argon2/vs2015/Argon2OptBench/Argon2OptBench.vcxproj.filters +0 -69
  71. data/ext/phc-winner-argon2/vs2015/Argon2OptDll/Argon2OptDll.vcxproj +0 -230
  72. data/ext/phc-winner-argon2/vs2015/Argon2OptDll/Argon2OptDll.vcxproj.filters +0 -66
  73. data/ext/phc-winner-argon2/vs2015/Argon2OptGenKAT/Argon2OptGenKAT.vcxproj +0 -244
  74. data/ext/phc-winner-argon2/vs2015/Argon2OptGenKAT/Argon2OptGenKAT.vcxproj.filters +0 -72
  75. data/ext/phc-winner-argon2/vs2015/Argon2OptTestCI/Argon2OptTestCI.vcxproj +0 -235
  76. data/ext/phc-winner-argon2/vs2015/Argon2OptTestCI/Argon2OptTestCI.vcxproj.filters +0 -69
  77. data/ext/phc-winner-argon2/vs2015/Argon2Ref/Argon2Ref.vcxproj +0 -243
  78. data/ext/phc-winner-argon2/vs2015/Argon2Ref/Argon2Ref.vcxproj.filters +0 -69
  79. data/ext/phc-winner-argon2/vs2015/Argon2RefBench/Argon2RefBench.vcxproj +0 -231
  80. data/ext/phc-winner-argon2/vs2015/Argon2RefBench/Argon2RefBench.vcxproj.filters +0 -69
  81. data/ext/phc-winner-argon2/vs2015/Argon2RefDll/Argon2RefDll.vcxproj +0 -230
  82. data/ext/phc-winner-argon2/vs2015/Argon2RefDll/Argon2RefDll.vcxproj.filters +0 -66
  83. data/ext/phc-winner-argon2/vs2015/Argon2RefGenKAT/Argon2RefGenKAT.vcxproj +0 -232
  84. data/ext/phc-winner-argon2/vs2015/Argon2RefGenKAT/Argon2RefGenKAT.vcxproj.filters +0 -72
  85. data/ext/phc-winner-argon2/vs2015/Argon2RefTestCI/Argon2RefTestCI.vcxproj +0 -231
  86. data/ext/phc-winner-argon2/vs2015/Argon2RefTestCI/Argon2RefTestCI.vcxproj.filters +0 -69
@@ -1,471 +0,0 @@
1
- /*
2
- * Argon2 reference source code package - reference C implementations
3
- *
4
- * Copyright 2015
5
- * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
6
- *
7
- * You may use this work under the terms of a Creative Commons CC0 1.0
8
- * License/Waiver or the Apache Public License 2.0, at your option. The terms of
9
- * these licenses can be found at:
10
- *
11
- * - CC0 1.0 Universal : https://creativecommons.org/publicdomain/zero/1.0
12
- * - Apache 2.0 : https://www.apache.org/licenses/LICENSE-2.0
13
- *
14
- * You should have received a copy of both of these licenses along with this
15
- * software. If not, they may be obtained at the above URLs.
16
- */
17
-
18
- #ifndef BLAKE_ROUND_MKA_OPT_H
19
- #define BLAKE_ROUND_MKA_OPT_H
20
-
21
- #include "blake2-impl.h"
22
-
23
- #include <emmintrin.h>
24
- #if defined(__SSSE3__)
25
- #include <tmmintrin.h> /* for _mm_shuffle_epi8 and _mm_alignr_epi8 */
26
- #endif
27
-
28
- #if defined(__XOP__) && (defined(__GNUC__) || defined(__clang__))
29
- #include <x86intrin.h>
30
- #endif
31
-
32
- #if !defined(__AVX512F__)
33
- #if !defined(__AVX2__)
34
- #if !defined(__XOP__)
35
- #if defined(__SSSE3__)
36
- #define r16 \
37
- (_mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9))
38
- #define r24 \
39
- (_mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10))
40
- #define _mm_roti_epi64(x, c) \
41
- (-(c) == 32) \
42
- ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) \
43
- : (-(c) == 24) \
44
- ? _mm_shuffle_epi8((x), r24) \
45
- : (-(c) == 16) \
46
- ? _mm_shuffle_epi8((x), r16) \
47
- : (-(c) == 63) \
48
- ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
49
- _mm_add_epi64((x), (x))) \
50
- : _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
51
- _mm_slli_epi64((x), 64 - (-(c))))
52
- #else /* defined(__SSE2__) */
53
- #define _mm_roti_epi64(r, c) \
54
- _mm_xor_si128(_mm_srli_epi64((r), -(c)), _mm_slli_epi64((r), 64 - (-(c))))
55
- #endif
56
- #else
57
- #endif
58
-
59
- static BLAKE2_INLINE __m128i fBlaMka(__m128i x, __m128i y) {
60
- const __m128i z = _mm_mul_epu32(x, y);
61
- return _mm_add_epi64(_mm_add_epi64(x, y), _mm_add_epi64(z, z));
62
- }
63
-
64
- #define G1(A0, B0, C0, D0, A1, B1, C1, D1) \
65
- do { \
66
- A0 = fBlaMka(A0, B0); \
67
- A1 = fBlaMka(A1, B1); \
68
- \
69
- D0 = _mm_xor_si128(D0, A0); \
70
- D1 = _mm_xor_si128(D1, A1); \
71
- \
72
- D0 = _mm_roti_epi64(D0, -32); \
73
- D1 = _mm_roti_epi64(D1, -32); \
74
- \
75
- C0 = fBlaMka(C0, D0); \
76
- C1 = fBlaMka(C1, D1); \
77
- \
78
- B0 = _mm_xor_si128(B0, C0); \
79
- B1 = _mm_xor_si128(B1, C1); \
80
- \
81
- B0 = _mm_roti_epi64(B0, -24); \
82
- B1 = _mm_roti_epi64(B1, -24); \
83
- } while ((void)0, 0)
84
-
85
- #define G2(A0, B0, C0, D0, A1, B1, C1, D1) \
86
- do { \
87
- A0 = fBlaMka(A0, B0); \
88
- A1 = fBlaMka(A1, B1); \
89
- \
90
- D0 = _mm_xor_si128(D0, A0); \
91
- D1 = _mm_xor_si128(D1, A1); \
92
- \
93
- D0 = _mm_roti_epi64(D0, -16); \
94
- D1 = _mm_roti_epi64(D1, -16); \
95
- \
96
- C0 = fBlaMka(C0, D0); \
97
- C1 = fBlaMka(C1, D1); \
98
- \
99
- B0 = _mm_xor_si128(B0, C0); \
100
- B1 = _mm_xor_si128(B1, C1); \
101
- \
102
- B0 = _mm_roti_epi64(B0, -63); \
103
- B1 = _mm_roti_epi64(B1, -63); \
104
- } while ((void)0, 0)
105
-
106
- #if defined(__SSSE3__)
107
- #define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
108
- do { \
109
- __m128i t0 = _mm_alignr_epi8(B1, B0, 8); \
110
- __m128i t1 = _mm_alignr_epi8(B0, B1, 8); \
111
- B0 = t0; \
112
- B1 = t1; \
113
- \
114
- t0 = C0; \
115
- C0 = C1; \
116
- C1 = t0; \
117
- \
118
- t0 = _mm_alignr_epi8(D1, D0, 8); \
119
- t1 = _mm_alignr_epi8(D0, D1, 8); \
120
- D0 = t1; \
121
- D1 = t0; \
122
- } while ((void)0, 0)
123
-
124
- #define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
125
- do { \
126
- __m128i t0 = _mm_alignr_epi8(B0, B1, 8); \
127
- __m128i t1 = _mm_alignr_epi8(B1, B0, 8); \
128
- B0 = t0; \
129
- B1 = t1; \
130
- \
131
- t0 = C0; \
132
- C0 = C1; \
133
- C1 = t0; \
134
- \
135
- t0 = _mm_alignr_epi8(D0, D1, 8); \
136
- t1 = _mm_alignr_epi8(D1, D0, 8); \
137
- D0 = t1; \
138
- D1 = t0; \
139
- } while ((void)0, 0)
140
- #else /* SSE2 */
141
- #define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
142
- do { \
143
- __m128i t0 = D0; \
144
- __m128i t1 = B0; \
145
- D0 = C0; \
146
- C0 = C1; \
147
- C1 = D0; \
148
- D0 = _mm_unpackhi_epi64(D1, _mm_unpacklo_epi64(t0, t0)); \
149
- D1 = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(D1, D1)); \
150
- B0 = _mm_unpackhi_epi64(B0, _mm_unpacklo_epi64(B1, B1)); \
151
- B1 = _mm_unpackhi_epi64(B1, _mm_unpacklo_epi64(t1, t1)); \
152
- } while ((void)0, 0)
153
-
154
- #define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
155
- do { \
156
- __m128i t0, t1; \
157
- t0 = C0; \
158
- C0 = C1; \
159
- C1 = t0; \
160
- t0 = B0; \
161
- t1 = D0; \
162
- B0 = _mm_unpackhi_epi64(B1, _mm_unpacklo_epi64(B0, B0)); \
163
- B1 = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(B1, B1)); \
164
- D0 = _mm_unpackhi_epi64(D0, _mm_unpacklo_epi64(D1, D1)); \
165
- D1 = _mm_unpackhi_epi64(D1, _mm_unpacklo_epi64(t1, t1)); \
166
- } while ((void)0, 0)
167
- #endif
168
-
169
- #define BLAKE2_ROUND(A0, A1, B0, B1, C0, C1, D0, D1) \
170
- do { \
171
- G1(A0, B0, C0, D0, A1, B1, C1, D1); \
172
- G2(A0, B0, C0, D0, A1, B1, C1, D1); \
173
- \
174
- DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
175
- \
176
- G1(A0, B0, C0, D0, A1, B1, C1, D1); \
177
- G2(A0, B0, C0, D0, A1, B1, C1, D1); \
178
- \
179
- UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
180
- } while ((void)0, 0)
181
- #else /* __AVX2__ */
182
-
183
- #include <immintrin.h>
184
-
185
- #define rotr32(x) _mm256_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1))
186
- #define rotr24(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10, 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10))
187
- #define rotr16(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9, 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9))
188
- #define rotr63(x) _mm256_xor_si256(_mm256_srli_epi64((x), 63), _mm256_add_epi64((x), (x)))
189
-
190
- #define G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
191
- do { \
192
- __m256i ml = _mm256_mul_epu32(A0, B0); \
193
- ml = _mm256_add_epi64(ml, ml); \
194
- A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \
195
- D0 = _mm256_xor_si256(D0, A0); \
196
- D0 = rotr32(D0); \
197
- \
198
- ml = _mm256_mul_epu32(C0, D0); \
199
- ml = _mm256_add_epi64(ml, ml); \
200
- C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \
201
- \
202
- B0 = _mm256_xor_si256(B0, C0); \
203
- B0 = rotr24(B0); \
204
- \
205
- ml = _mm256_mul_epu32(A1, B1); \
206
- ml = _mm256_add_epi64(ml, ml); \
207
- A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \
208
- D1 = _mm256_xor_si256(D1, A1); \
209
- D1 = rotr32(D1); \
210
- \
211
- ml = _mm256_mul_epu32(C1, D1); \
212
- ml = _mm256_add_epi64(ml, ml); \
213
- C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \
214
- \
215
- B1 = _mm256_xor_si256(B1, C1); \
216
- B1 = rotr24(B1); \
217
- } while((void)0, 0);
218
-
219
- #define G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
220
- do { \
221
- __m256i ml = _mm256_mul_epu32(A0, B0); \
222
- ml = _mm256_add_epi64(ml, ml); \
223
- A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \
224
- D0 = _mm256_xor_si256(D0, A0); \
225
- D0 = rotr16(D0); \
226
- \
227
- ml = _mm256_mul_epu32(C0, D0); \
228
- ml = _mm256_add_epi64(ml, ml); \
229
- C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \
230
- B0 = _mm256_xor_si256(B0, C0); \
231
- B0 = rotr63(B0); \
232
- \
233
- ml = _mm256_mul_epu32(A1, B1); \
234
- ml = _mm256_add_epi64(ml, ml); \
235
- A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \
236
- D1 = _mm256_xor_si256(D1, A1); \
237
- D1 = rotr16(D1); \
238
- \
239
- ml = _mm256_mul_epu32(C1, D1); \
240
- ml = _mm256_add_epi64(ml, ml); \
241
- C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \
242
- B1 = _mm256_xor_si256(B1, C1); \
243
- B1 = rotr63(B1); \
244
- } while((void)0, 0);
245
-
246
- #define DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
247
- do { \
248
- B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \
249
- C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
250
- D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \
251
- \
252
- B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \
253
- C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
254
- D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \
255
- } while((void)0, 0);
256
-
257
- #define DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
258
- do { \
259
- __m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \
260
- __m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \
261
- B1 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
262
- B0 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
263
- \
264
- tmp1 = C0; \
265
- C0 = C1; \
266
- C1 = tmp1; \
267
- \
268
- tmp1 = _mm256_blend_epi32(D0, D1, 0xCC); \
269
- tmp2 = _mm256_blend_epi32(D0, D1, 0x33); \
270
- D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
271
- D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
272
- } while(0);
273
-
274
- #define UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
275
- do { \
276
- B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \
277
- C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
278
- D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \
279
- \
280
- B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \
281
- C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
282
- D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \
283
- } while((void)0, 0);
284
-
285
- #define UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
286
- do { \
287
- __m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \
288
- __m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \
289
- B0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
290
- B1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
291
- \
292
- tmp1 = C0; \
293
- C0 = C1; \
294
- C1 = tmp1; \
295
- \
296
- tmp1 = _mm256_blend_epi32(D0, D1, 0x33); \
297
- tmp2 = _mm256_blend_epi32(D0, D1, 0xCC); \
298
- D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
299
- D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
300
- } while((void)0, 0);
301
-
302
- #define BLAKE2_ROUND_1(A0, A1, B0, B1, C0, C1, D0, D1) \
303
- do{ \
304
- G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
305
- G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
306
- \
307
- DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
308
- \
309
- G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
310
- G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
311
- \
312
- UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
313
- } while((void)0, 0);
314
-
315
- #define BLAKE2_ROUND_2(A0, A1, B0, B1, C0, C1, D0, D1) \
316
- do{ \
317
- G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
318
- G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
319
- \
320
- DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
321
- \
322
- G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
323
- G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
324
- \
325
- UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
326
- } while((void)0, 0);
327
-
328
- #endif /* __AVX2__ */
329
-
330
- #else /* __AVX512F__ */
331
-
332
- #include <immintrin.h>
333
-
334
- #define ror64(x, n) _mm512_ror_epi64((x), (n))
335
-
336
- static __m512i muladd(__m512i x, __m512i y)
337
- {
338
- __m512i z = _mm512_mul_epu32(x, y);
339
- return _mm512_add_epi64(_mm512_add_epi64(x, y), _mm512_add_epi64(z, z));
340
- }
341
-
342
- #define G1(A0, B0, C0, D0, A1, B1, C1, D1) \
343
- do { \
344
- A0 = muladd(A0, B0); \
345
- A1 = muladd(A1, B1); \
346
- \
347
- D0 = _mm512_xor_si512(D0, A0); \
348
- D1 = _mm512_xor_si512(D1, A1); \
349
- \
350
- D0 = ror64(D0, 32); \
351
- D1 = ror64(D1, 32); \
352
- \
353
- C0 = muladd(C0, D0); \
354
- C1 = muladd(C1, D1); \
355
- \
356
- B0 = _mm512_xor_si512(B0, C0); \
357
- B1 = _mm512_xor_si512(B1, C1); \
358
- \
359
- B0 = ror64(B0, 24); \
360
- B1 = ror64(B1, 24); \
361
- } while ((void)0, 0)
362
-
363
- #define G2(A0, B0, C0, D0, A1, B1, C1, D1) \
364
- do { \
365
- A0 = muladd(A0, B0); \
366
- A1 = muladd(A1, B1); \
367
- \
368
- D0 = _mm512_xor_si512(D0, A0); \
369
- D1 = _mm512_xor_si512(D1, A1); \
370
- \
371
- D0 = ror64(D0, 16); \
372
- D1 = ror64(D1, 16); \
373
- \
374
- C0 = muladd(C0, D0); \
375
- C1 = muladd(C1, D1); \
376
- \
377
- B0 = _mm512_xor_si512(B0, C0); \
378
- B1 = _mm512_xor_si512(B1, C1); \
379
- \
380
- B0 = ror64(B0, 63); \
381
- B1 = ror64(B1, 63); \
382
- } while ((void)0, 0)
383
-
384
- #define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
385
- do { \
386
- B0 = _mm512_permutex_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \
387
- B1 = _mm512_permutex_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \
388
- \
389
- C0 = _mm512_permutex_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
390
- C1 = _mm512_permutex_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
391
- \
392
- D0 = _mm512_permutex_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \
393
- D1 = _mm512_permutex_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \
394
- } while ((void)0, 0)
395
-
396
- #define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
397
- do { \
398
- B0 = _mm512_permutex_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \
399
- B1 = _mm512_permutex_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \
400
- \
401
- C0 = _mm512_permutex_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
402
- C1 = _mm512_permutex_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
403
- \
404
- D0 = _mm512_permutex_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \
405
- D1 = _mm512_permutex_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \
406
- } while ((void)0, 0)
407
-
408
- #define BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1) \
409
- do { \
410
- G1(A0, B0, C0, D0, A1, B1, C1, D1); \
411
- G2(A0, B0, C0, D0, A1, B1, C1, D1); \
412
- \
413
- DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
414
- \
415
- G1(A0, B0, C0, D0, A1, B1, C1, D1); \
416
- G2(A0, B0, C0, D0, A1, B1, C1, D1); \
417
- \
418
- UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
419
- } while ((void)0, 0)
420
-
421
- #define SWAP_HALVES(A0, A1) \
422
- do { \
423
- __m512i t0, t1; \
424
- t0 = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(1, 0, 1, 0)); \
425
- t1 = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(3, 2, 3, 2)); \
426
- A0 = t0; \
427
- A1 = t1; \
428
- } while((void)0, 0)
429
-
430
- #define SWAP_QUARTERS(A0, A1) \
431
- do { \
432
- SWAP_HALVES(A0, A1); \
433
- A0 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A0); \
434
- A1 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A1); \
435
- } while((void)0, 0)
436
-
437
- #define UNSWAP_QUARTERS(A0, A1) \
438
- do { \
439
- A0 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A0); \
440
- A1 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A1); \
441
- SWAP_HALVES(A0, A1); \
442
- } while((void)0, 0)
443
-
444
- #define BLAKE2_ROUND_1(A0, C0, B0, D0, A1, C1, B1, D1) \
445
- do { \
446
- SWAP_HALVES(A0, B0); \
447
- SWAP_HALVES(C0, D0); \
448
- SWAP_HALVES(A1, B1); \
449
- SWAP_HALVES(C1, D1); \
450
- BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1); \
451
- SWAP_HALVES(A0, B0); \
452
- SWAP_HALVES(C0, D0); \
453
- SWAP_HALVES(A1, B1); \
454
- SWAP_HALVES(C1, D1); \
455
- } while ((void)0, 0)
456
-
457
- #define BLAKE2_ROUND_2(A0, A1, B0, B1, C0, C1, D0, D1) \
458
- do { \
459
- SWAP_QUARTERS(A0, A1); \
460
- SWAP_QUARTERS(B0, B1); \
461
- SWAP_QUARTERS(C0, C1); \
462
- SWAP_QUARTERS(D0, D1); \
463
- BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1); \
464
- UNSWAP_QUARTERS(A0, A1); \
465
- UNSWAP_QUARTERS(B0, B1); \
466
- UNSWAP_QUARTERS(C0, C1); \
467
- UNSWAP_QUARTERS(D0, D1); \
468
- } while ((void)0, 0)
469
-
470
- #endif /* __AVX512F__ */
471
- #endif /* BLAKE_ROUND_MKA_OPT_H */
@@ -1,56 +0,0 @@
1
- /*
2
- * Argon2 reference source code package - reference C implementations
3
- *
4
- * Copyright 2015
5
- * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
6
- *
7
- * You may use this work under the terms of a Creative Commons CC0 1.0
8
- * License/Waiver or the Apache Public License 2.0, at your option. The terms of
9
- * these licenses can be found at:
10
- *
11
- * - CC0 1.0 Universal : https://creativecommons.org/publicdomain/zero/1.0
12
- * - Apache 2.0 : https://www.apache.org/licenses/LICENSE-2.0
13
- *
14
- * You should have received a copy of both of these licenses along with this
15
- * software. If not, they may be obtained at the above URLs.
16
- */
17
-
18
- #ifndef BLAKE_ROUND_MKA_H
19
- #define BLAKE_ROUND_MKA_H
20
-
21
- #include "blake2.h"
22
- #include "blake2-impl.h"
23
-
24
- /* designed by the Lyra PHC team */
25
- static BLAKE2_INLINE uint64_t fBlaMka(uint64_t x, uint64_t y) {
26
- const uint64_t m = UINT64_C(0xFFFFFFFF);
27
- const uint64_t xy = (x & m) * (y & m);
28
- return x + y + 2 * xy;
29
- }
30
-
31
- #define G(a, b, c, d) \
32
- do { \
33
- a = fBlaMka(a, b); \
34
- d = rotr64(d ^ a, 32); \
35
- c = fBlaMka(c, d); \
36
- b = rotr64(b ^ c, 24); \
37
- a = fBlaMka(a, b); \
38
- d = rotr64(d ^ a, 16); \
39
- c = fBlaMka(c, d); \
40
- b = rotr64(b ^ c, 63); \
41
- } while ((void)0, 0)
42
-
43
- #define BLAKE2_ROUND_NOMSG(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, \
44
- v12, v13, v14, v15) \
45
- do { \
46
- G(v0, v4, v8, v12); \
47
- G(v1, v5, v9, v13); \
48
- G(v2, v6, v10, v14); \
49
- G(v3, v7, v11, v15); \
50
- G(v0, v5, v10, v15); \
51
- G(v1, v6, v11, v12); \
52
- G(v2, v7, v8, v13); \
53
- G(v3, v4, v9, v14); \
54
- } while ((void)0, 0)
55
-
56
- #endif