argon2 2.3.0 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +2 -2
  3. data/Steepfile +1 -1
  4. data/argon2.gemspec +1 -1
  5. data/lib/argon2/version.rb +1 -1
  6. metadata +5 -88
  7. data/ext/argon2_wrap/argon_wrap.o +0 -0
  8. data/ext/phc-winner-argon2/.git +0 -1
  9. data/ext/phc-winner-argon2/.gitattributes +0 -10
  10. data/ext/phc-winner-argon2/.gitignore +0 -22
  11. data/ext/phc-winner-argon2/.travis.yml +0 -25
  12. data/ext/phc-winner-argon2/Argon2.sln +0 -158
  13. data/ext/phc-winner-argon2/CHANGELOG.md +0 -32
  14. data/ext/phc-winner-argon2/LICENSE +0 -314
  15. data/ext/phc-winner-argon2/Makefile +0 -255
  16. data/ext/phc-winner-argon2/Package.swift +0 -46
  17. data/ext/phc-winner-argon2/README.md +0 -303
  18. data/ext/phc-winner-argon2/appveyor.yml +0 -25
  19. data/ext/phc-winner-argon2/argon2-specs.pdf +0 -0
  20. data/ext/phc-winner-argon2/export.sh +0 -7
  21. data/ext/phc-winner-argon2/include/argon2.h +0 -437
  22. data/ext/phc-winner-argon2/kats/argon2d +0 -12304
  23. data/ext/phc-winner-argon2/kats/argon2d.shasum +0 -1
  24. data/ext/phc-winner-argon2/kats/argon2d_v16 +0 -12304
  25. data/ext/phc-winner-argon2/kats/argon2d_v16.shasum +0 -1
  26. data/ext/phc-winner-argon2/kats/argon2i +0 -12304
  27. data/ext/phc-winner-argon2/kats/argon2i.shasum +0 -1
  28. data/ext/phc-winner-argon2/kats/argon2i_v16 +0 -12304
  29. data/ext/phc-winner-argon2/kats/argon2i_v16.shasum +0 -1
  30. data/ext/phc-winner-argon2/kats/argon2id +0 -12304
  31. data/ext/phc-winner-argon2/kats/argon2id.shasum +0 -1
  32. data/ext/phc-winner-argon2/kats/argon2id_v16 +0 -12304
  33. data/ext/phc-winner-argon2/kats/argon2id_v16.shasum +0 -1
  34. data/ext/phc-winner-argon2/kats/check-sums.ps1 +0 -42
  35. data/ext/phc-winner-argon2/kats/check-sums.sh +0 -13
  36. data/ext/phc-winner-argon2/kats/test.ps1 +0 -50
  37. data/ext/phc-winner-argon2/kats/test.sh +0 -49
  38. data/ext/phc-winner-argon2/latex/IEEEtran.cls +0 -6347
  39. data/ext/phc-winner-argon2/latex/Makefile +0 -18
  40. data/ext/phc-winner-argon2/latex/argon2-specs.tex +0 -920
  41. data/ext/phc-winner-argon2/latex/pics/argon2-par.pdf +0 -0
  42. data/ext/phc-winner-argon2/latex/pics/compression.pdf +0 -0
  43. data/ext/phc-winner-argon2/latex/pics/generic.pdf +0 -0
  44. data/ext/phc-winner-argon2/latex/pics/power-distribution.jpg +0 -0
  45. data/ext/phc-winner-argon2/latex/tradeoff.bib +0 -822
  46. data/ext/phc-winner-argon2/libargon2.pc.in +0 -18
  47. data/ext/phc-winner-argon2/man/argon2.1 +0 -57
  48. data/ext/phc-winner-argon2/src/argon2.c +0 -452
  49. data/ext/phc-winner-argon2/src/bench.c +0 -111
  50. data/ext/phc-winner-argon2/src/blake2/blake2-impl.h +0 -156
  51. data/ext/phc-winner-argon2/src/blake2/blake2.h +0 -89
  52. data/ext/phc-winner-argon2/src/blake2/blake2b.c +0 -390
  53. data/ext/phc-winner-argon2/src/blake2/blamka-round-opt.h +0 -471
  54. data/ext/phc-winner-argon2/src/blake2/blamka-round-ref.h +0 -56
  55. data/ext/phc-winner-argon2/src/core.c +0 -648
  56. data/ext/phc-winner-argon2/src/core.h +0 -228
  57. data/ext/phc-winner-argon2/src/encoding.c +0 -463
  58. data/ext/phc-winner-argon2/src/encoding.h +0 -57
  59. data/ext/phc-winner-argon2/src/genkat.c +0 -213
  60. data/ext/phc-winner-argon2/src/genkat.h +0 -51
  61. data/ext/phc-winner-argon2/src/opt.c +0 -283
  62. data/ext/phc-winner-argon2/src/ref.c +0 -194
  63. data/ext/phc-winner-argon2/src/run.c +0 -337
  64. data/ext/phc-winner-argon2/src/test.c +0 -289
  65. data/ext/phc-winner-argon2/src/thread.c +0 -57
  66. data/ext/phc-winner-argon2/src/thread.h +0 -67
  67. data/ext/phc-winner-argon2/vs2015/Argon2Opt/Argon2Opt.vcxproj +0 -231
  68. data/ext/phc-winner-argon2/vs2015/Argon2Opt/Argon2Opt.vcxproj.filters +0 -69
  69. data/ext/phc-winner-argon2/vs2015/Argon2OptBench/Argon2OptBench.vcxproj +0 -231
  70. data/ext/phc-winner-argon2/vs2015/Argon2OptBench/Argon2OptBench.vcxproj.filters +0 -69
  71. data/ext/phc-winner-argon2/vs2015/Argon2OptDll/Argon2OptDll.vcxproj +0 -230
  72. data/ext/phc-winner-argon2/vs2015/Argon2OptDll/Argon2OptDll.vcxproj.filters +0 -66
  73. data/ext/phc-winner-argon2/vs2015/Argon2OptGenKAT/Argon2OptGenKAT.vcxproj +0 -244
  74. data/ext/phc-winner-argon2/vs2015/Argon2OptGenKAT/Argon2OptGenKAT.vcxproj.filters +0 -72
  75. data/ext/phc-winner-argon2/vs2015/Argon2OptTestCI/Argon2OptTestCI.vcxproj +0 -235
  76. data/ext/phc-winner-argon2/vs2015/Argon2OptTestCI/Argon2OptTestCI.vcxproj.filters +0 -69
  77. data/ext/phc-winner-argon2/vs2015/Argon2Ref/Argon2Ref.vcxproj +0 -243
  78. data/ext/phc-winner-argon2/vs2015/Argon2Ref/Argon2Ref.vcxproj.filters +0 -69
  79. data/ext/phc-winner-argon2/vs2015/Argon2RefBench/Argon2RefBench.vcxproj +0 -231
  80. data/ext/phc-winner-argon2/vs2015/Argon2RefBench/Argon2RefBench.vcxproj.filters +0 -69
  81. data/ext/phc-winner-argon2/vs2015/Argon2RefDll/Argon2RefDll.vcxproj +0 -230
  82. data/ext/phc-winner-argon2/vs2015/Argon2RefDll/Argon2RefDll.vcxproj.filters +0 -66
  83. data/ext/phc-winner-argon2/vs2015/Argon2RefGenKAT/Argon2RefGenKAT.vcxproj +0 -232
  84. data/ext/phc-winner-argon2/vs2015/Argon2RefGenKAT/Argon2RefGenKAT.vcxproj.filters +0 -72
  85. data/ext/phc-winner-argon2/vs2015/Argon2RefTestCI/Argon2RefTestCI.vcxproj +0 -231
  86. data/ext/phc-winner-argon2/vs2015/Argon2RefTestCI/Argon2RefTestCI.vcxproj.filters +0 -69
@@ -1,471 +0,0 @@
1
- /*
2
- * Argon2 reference source code package - reference C implementations
3
- *
4
- * Copyright 2015
5
- * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
6
- *
7
- * You may use this work under the terms of a Creative Commons CC0 1.0
8
- * License/Waiver or the Apache Public License 2.0, at your option. The terms of
9
- * these licenses can be found at:
10
- *
11
- * - CC0 1.0 Universal : https://creativecommons.org/publicdomain/zero/1.0
12
- * - Apache 2.0 : https://www.apache.org/licenses/LICENSE-2.0
13
- *
14
- * You should have received a copy of both of these licenses along with this
15
- * software. If not, they may be obtained at the above URLs.
16
- */
17
-
18
- #ifndef BLAKE_ROUND_MKA_OPT_H
19
- #define BLAKE_ROUND_MKA_OPT_H
20
-
21
- #include "blake2-impl.h"
22
-
23
- #include <emmintrin.h>
24
- #if defined(__SSSE3__)
25
- #include <tmmintrin.h> /* for _mm_shuffle_epi8 and _mm_alignr_epi8 */
26
- #endif
27
-
28
- #if defined(__XOP__) && (defined(__GNUC__) || defined(__clang__))
29
- #include <x86intrin.h>
30
- #endif
31
-
32
- #if !defined(__AVX512F__)
33
- #if !defined(__AVX2__)
34
- #if !defined(__XOP__)
35
- #if defined(__SSSE3__)
36
- #define r16 \
37
- (_mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9))
38
- #define r24 \
39
- (_mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10))
40
- #define _mm_roti_epi64(x, c) \
41
- (-(c) == 32) \
42
- ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) \
43
- : (-(c) == 24) \
44
- ? _mm_shuffle_epi8((x), r24) \
45
- : (-(c) == 16) \
46
- ? _mm_shuffle_epi8((x), r16) \
47
- : (-(c) == 63) \
48
- ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
49
- _mm_add_epi64((x), (x))) \
50
- : _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
51
- _mm_slli_epi64((x), 64 - (-(c))))
52
- #else /* defined(__SSE2__) */
53
- #define _mm_roti_epi64(r, c) \
54
- _mm_xor_si128(_mm_srli_epi64((r), -(c)), _mm_slli_epi64((r), 64 - (-(c))))
55
- #endif
56
- #else
57
- #endif
58
-
59
- static BLAKE2_INLINE __m128i fBlaMka(__m128i x, __m128i y) {
60
- const __m128i z = _mm_mul_epu32(x, y);
61
- return _mm_add_epi64(_mm_add_epi64(x, y), _mm_add_epi64(z, z));
62
- }
63
-
64
- #define G1(A0, B0, C0, D0, A1, B1, C1, D1) \
65
- do { \
66
- A0 = fBlaMka(A0, B0); \
67
- A1 = fBlaMka(A1, B1); \
68
- \
69
- D0 = _mm_xor_si128(D0, A0); \
70
- D1 = _mm_xor_si128(D1, A1); \
71
- \
72
- D0 = _mm_roti_epi64(D0, -32); \
73
- D1 = _mm_roti_epi64(D1, -32); \
74
- \
75
- C0 = fBlaMka(C0, D0); \
76
- C1 = fBlaMka(C1, D1); \
77
- \
78
- B0 = _mm_xor_si128(B0, C0); \
79
- B1 = _mm_xor_si128(B1, C1); \
80
- \
81
- B0 = _mm_roti_epi64(B0, -24); \
82
- B1 = _mm_roti_epi64(B1, -24); \
83
- } while ((void)0, 0)
84
-
85
- #define G2(A0, B0, C0, D0, A1, B1, C1, D1) \
86
- do { \
87
- A0 = fBlaMka(A0, B0); \
88
- A1 = fBlaMka(A1, B1); \
89
- \
90
- D0 = _mm_xor_si128(D0, A0); \
91
- D1 = _mm_xor_si128(D1, A1); \
92
- \
93
- D0 = _mm_roti_epi64(D0, -16); \
94
- D1 = _mm_roti_epi64(D1, -16); \
95
- \
96
- C0 = fBlaMka(C0, D0); \
97
- C1 = fBlaMka(C1, D1); \
98
- \
99
- B0 = _mm_xor_si128(B0, C0); \
100
- B1 = _mm_xor_si128(B1, C1); \
101
- \
102
- B0 = _mm_roti_epi64(B0, -63); \
103
- B1 = _mm_roti_epi64(B1, -63); \
104
- } while ((void)0, 0)
105
-
106
- #if defined(__SSSE3__)
107
- #define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
108
- do { \
109
- __m128i t0 = _mm_alignr_epi8(B1, B0, 8); \
110
- __m128i t1 = _mm_alignr_epi8(B0, B1, 8); \
111
- B0 = t0; \
112
- B1 = t1; \
113
- \
114
- t0 = C0; \
115
- C0 = C1; \
116
- C1 = t0; \
117
- \
118
- t0 = _mm_alignr_epi8(D1, D0, 8); \
119
- t1 = _mm_alignr_epi8(D0, D1, 8); \
120
- D0 = t1; \
121
- D1 = t0; \
122
- } while ((void)0, 0)
123
-
124
- #define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
125
- do { \
126
- __m128i t0 = _mm_alignr_epi8(B0, B1, 8); \
127
- __m128i t1 = _mm_alignr_epi8(B1, B0, 8); \
128
- B0 = t0; \
129
- B1 = t1; \
130
- \
131
- t0 = C0; \
132
- C0 = C1; \
133
- C1 = t0; \
134
- \
135
- t0 = _mm_alignr_epi8(D0, D1, 8); \
136
- t1 = _mm_alignr_epi8(D1, D0, 8); \
137
- D0 = t1; \
138
- D1 = t0; \
139
- } while ((void)0, 0)
140
- #else /* SSE2 */
141
- #define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
142
- do { \
143
- __m128i t0 = D0; \
144
- __m128i t1 = B0; \
145
- D0 = C0; \
146
- C0 = C1; \
147
- C1 = D0; \
148
- D0 = _mm_unpackhi_epi64(D1, _mm_unpacklo_epi64(t0, t0)); \
149
- D1 = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(D1, D1)); \
150
- B0 = _mm_unpackhi_epi64(B0, _mm_unpacklo_epi64(B1, B1)); \
151
- B1 = _mm_unpackhi_epi64(B1, _mm_unpacklo_epi64(t1, t1)); \
152
- } while ((void)0, 0)
153
-
154
- #define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
155
- do { \
156
- __m128i t0, t1; \
157
- t0 = C0; \
158
- C0 = C1; \
159
- C1 = t0; \
160
- t0 = B0; \
161
- t1 = D0; \
162
- B0 = _mm_unpackhi_epi64(B1, _mm_unpacklo_epi64(B0, B0)); \
163
- B1 = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(B1, B1)); \
164
- D0 = _mm_unpackhi_epi64(D0, _mm_unpacklo_epi64(D1, D1)); \
165
- D1 = _mm_unpackhi_epi64(D1, _mm_unpacklo_epi64(t1, t1)); \
166
- } while ((void)0, 0)
167
- #endif
168
-
169
- #define BLAKE2_ROUND(A0, A1, B0, B1, C0, C1, D0, D1) \
170
- do { \
171
- G1(A0, B0, C0, D0, A1, B1, C1, D1); \
172
- G2(A0, B0, C0, D0, A1, B1, C1, D1); \
173
- \
174
- DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
175
- \
176
- G1(A0, B0, C0, D0, A1, B1, C1, D1); \
177
- G2(A0, B0, C0, D0, A1, B1, C1, D1); \
178
- \
179
- UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
180
- } while ((void)0, 0)
181
- #else /* __AVX2__ */
182
-
183
- #include <immintrin.h>
184
-
185
- #define rotr32(x) _mm256_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1))
186
- #define rotr24(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10, 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10))
187
- #define rotr16(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9, 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9))
188
- #define rotr63(x) _mm256_xor_si256(_mm256_srli_epi64((x), 63), _mm256_add_epi64((x), (x)))
189
-
190
- #define G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
191
- do { \
192
- __m256i ml = _mm256_mul_epu32(A0, B0); \
193
- ml = _mm256_add_epi64(ml, ml); \
194
- A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \
195
- D0 = _mm256_xor_si256(D0, A0); \
196
- D0 = rotr32(D0); \
197
- \
198
- ml = _mm256_mul_epu32(C0, D0); \
199
- ml = _mm256_add_epi64(ml, ml); \
200
- C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \
201
- \
202
- B0 = _mm256_xor_si256(B0, C0); \
203
- B0 = rotr24(B0); \
204
- \
205
- ml = _mm256_mul_epu32(A1, B1); \
206
- ml = _mm256_add_epi64(ml, ml); \
207
- A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \
208
- D1 = _mm256_xor_si256(D1, A1); \
209
- D1 = rotr32(D1); \
210
- \
211
- ml = _mm256_mul_epu32(C1, D1); \
212
- ml = _mm256_add_epi64(ml, ml); \
213
- C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \
214
- \
215
- B1 = _mm256_xor_si256(B1, C1); \
216
- B1 = rotr24(B1); \
217
- } while((void)0, 0);
218
-
219
- #define G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
220
- do { \
221
- __m256i ml = _mm256_mul_epu32(A0, B0); \
222
- ml = _mm256_add_epi64(ml, ml); \
223
- A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \
224
- D0 = _mm256_xor_si256(D0, A0); \
225
- D0 = rotr16(D0); \
226
- \
227
- ml = _mm256_mul_epu32(C0, D0); \
228
- ml = _mm256_add_epi64(ml, ml); \
229
- C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \
230
- B0 = _mm256_xor_si256(B0, C0); \
231
- B0 = rotr63(B0); \
232
- \
233
- ml = _mm256_mul_epu32(A1, B1); \
234
- ml = _mm256_add_epi64(ml, ml); \
235
- A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \
236
- D1 = _mm256_xor_si256(D1, A1); \
237
- D1 = rotr16(D1); \
238
- \
239
- ml = _mm256_mul_epu32(C1, D1); \
240
- ml = _mm256_add_epi64(ml, ml); \
241
- C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \
242
- B1 = _mm256_xor_si256(B1, C1); \
243
- B1 = rotr63(B1); \
244
- } while((void)0, 0);
245
-
246
- #define DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
247
- do { \
248
- B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \
249
- C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
250
- D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \
251
- \
252
- B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \
253
- C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
254
- D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \
255
- } while((void)0, 0);
256
-
257
- #define DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
258
- do { \
259
- __m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \
260
- __m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \
261
- B1 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
262
- B0 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
263
- \
264
- tmp1 = C0; \
265
- C0 = C1; \
266
- C1 = tmp1; \
267
- \
268
- tmp1 = _mm256_blend_epi32(D0, D1, 0xCC); \
269
- tmp2 = _mm256_blend_epi32(D0, D1, 0x33); \
270
- D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
271
- D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
272
- } while(0);
273
-
274
- #define UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
275
- do { \
276
- B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \
277
- C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
278
- D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \
279
- \
280
- B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \
281
- C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
282
- D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \
283
- } while((void)0, 0);
284
-
285
- #define UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
286
- do { \
287
- __m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \
288
- __m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \
289
- B0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
290
- B1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
291
- \
292
- tmp1 = C0; \
293
- C0 = C1; \
294
- C1 = tmp1; \
295
- \
296
- tmp1 = _mm256_blend_epi32(D0, D1, 0x33); \
297
- tmp2 = _mm256_blend_epi32(D0, D1, 0xCC); \
298
- D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
299
- D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
300
- } while((void)0, 0);
301
-
302
- #define BLAKE2_ROUND_1(A0, A1, B0, B1, C0, C1, D0, D1) \
303
- do{ \
304
- G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
305
- G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
306
- \
307
- DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
308
- \
309
- G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
310
- G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
311
- \
312
- UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
313
- } while((void)0, 0);
314
-
315
- #define BLAKE2_ROUND_2(A0, A1, B0, B1, C0, C1, D0, D1) \
316
- do{ \
317
- G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
318
- G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
319
- \
320
- DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
321
- \
322
- G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
323
- G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
324
- \
325
- UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
326
- } while((void)0, 0);
327
-
328
- #endif /* __AVX2__ */
329
-
330
- #else /* __AVX512F__ */
331
-
332
- #include <immintrin.h>
333
-
334
- #define ror64(x, n) _mm512_ror_epi64((x), (n))
335
-
336
- static __m512i muladd(__m512i x, __m512i y)
337
- {
338
- __m512i z = _mm512_mul_epu32(x, y);
339
- return _mm512_add_epi64(_mm512_add_epi64(x, y), _mm512_add_epi64(z, z));
340
- }
341
-
342
- #define G1(A0, B0, C0, D0, A1, B1, C1, D1) \
343
- do { \
344
- A0 = muladd(A0, B0); \
345
- A1 = muladd(A1, B1); \
346
- \
347
- D0 = _mm512_xor_si512(D0, A0); \
348
- D1 = _mm512_xor_si512(D1, A1); \
349
- \
350
- D0 = ror64(D0, 32); \
351
- D1 = ror64(D1, 32); \
352
- \
353
- C0 = muladd(C0, D0); \
354
- C1 = muladd(C1, D1); \
355
- \
356
- B0 = _mm512_xor_si512(B0, C0); \
357
- B1 = _mm512_xor_si512(B1, C1); \
358
- \
359
- B0 = ror64(B0, 24); \
360
- B1 = ror64(B1, 24); \
361
- } while ((void)0, 0)
362
-
363
- #define G2(A0, B0, C0, D0, A1, B1, C1, D1) \
364
- do { \
365
- A0 = muladd(A0, B0); \
366
- A1 = muladd(A1, B1); \
367
- \
368
- D0 = _mm512_xor_si512(D0, A0); \
369
- D1 = _mm512_xor_si512(D1, A1); \
370
- \
371
- D0 = ror64(D0, 16); \
372
- D1 = ror64(D1, 16); \
373
- \
374
- C0 = muladd(C0, D0); \
375
- C1 = muladd(C1, D1); \
376
- \
377
- B0 = _mm512_xor_si512(B0, C0); \
378
- B1 = _mm512_xor_si512(B1, C1); \
379
- \
380
- B0 = ror64(B0, 63); \
381
- B1 = ror64(B1, 63); \
382
- } while ((void)0, 0)
383
-
384
- #define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
385
- do { \
386
- B0 = _mm512_permutex_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \
387
- B1 = _mm512_permutex_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \
388
- \
389
- C0 = _mm512_permutex_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
390
- C1 = _mm512_permutex_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
391
- \
392
- D0 = _mm512_permutex_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \
393
- D1 = _mm512_permutex_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \
394
- } while ((void)0, 0)
395
-
396
- #define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
397
- do { \
398
- B0 = _mm512_permutex_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \
399
- B1 = _mm512_permutex_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \
400
- \
401
- C0 = _mm512_permutex_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
402
- C1 = _mm512_permutex_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
403
- \
404
- D0 = _mm512_permutex_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \
405
- D1 = _mm512_permutex_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \
406
- } while ((void)0, 0)
407
-
408
- #define BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1) \
409
- do { \
410
- G1(A0, B0, C0, D0, A1, B1, C1, D1); \
411
- G2(A0, B0, C0, D0, A1, B1, C1, D1); \
412
- \
413
- DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
414
- \
415
- G1(A0, B0, C0, D0, A1, B1, C1, D1); \
416
- G2(A0, B0, C0, D0, A1, B1, C1, D1); \
417
- \
418
- UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
419
- } while ((void)0, 0)
420
-
421
- #define SWAP_HALVES(A0, A1) \
422
- do { \
423
- __m512i t0, t1; \
424
- t0 = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(1, 0, 1, 0)); \
425
- t1 = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(3, 2, 3, 2)); \
426
- A0 = t0; \
427
- A1 = t1; \
428
- } while((void)0, 0)
429
-
430
- #define SWAP_QUARTERS(A0, A1) \
431
- do { \
432
- SWAP_HALVES(A0, A1); \
433
- A0 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A0); \
434
- A1 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A1); \
435
- } while((void)0, 0)
436
-
437
- #define UNSWAP_QUARTERS(A0, A1) \
438
- do { \
439
- A0 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A0); \
440
- A1 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A1); \
441
- SWAP_HALVES(A0, A1); \
442
- } while((void)0, 0)
443
-
444
- #define BLAKE2_ROUND_1(A0, C0, B0, D0, A1, C1, B1, D1) \
445
- do { \
446
- SWAP_HALVES(A0, B0); \
447
- SWAP_HALVES(C0, D0); \
448
- SWAP_HALVES(A1, B1); \
449
- SWAP_HALVES(C1, D1); \
450
- BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1); \
451
- SWAP_HALVES(A0, B0); \
452
- SWAP_HALVES(C0, D0); \
453
- SWAP_HALVES(A1, B1); \
454
- SWAP_HALVES(C1, D1); \
455
- } while ((void)0, 0)
456
-
457
- #define BLAKE2_ROUND_2(A0, A1, B0, B1, C0, C1, D0, D1) \
458
- do { \
459
- SWAP_QUARTERS(A0, A1); \
460
- SWAP_QUARTERS(B0, B1); \
461
- SWAP_QUARTERS(C0, C1); \
462
- SWAP_QUARTERS(D0, D1); \
463
- BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1); \
464
- UNSWAP_QUARTERS(A0, A1); \
465
- UNSWAP_QUARTERS(B0, B1); \
466
- UNSWAP_QUARTERS(C0, C1); \
467
- UNSWAP_QUARTERS(D0, D1); \
468
- } while ((void)0, 0)
469
-
470
- #endif /* __AVX512F__ */
471
- #endif /* BLAKE_ROUND_MKA_OPT_H */
@@ -1,56 +0,0 @@
1
- /*
2
- * Argon2 reference source code package - reference C implementations
3
- *
4
- * Copyright 2015
5
- * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
6
- *
7
- * You may use this work under the terms of a Creative Commons CC0 1.0
8
- * License/Waiver or the Apache Public License 2.0, at your option. The terms of
9
- * these licenses can be found at:
10
- *
11
- * - CC0 1.0 Universal : https://creativecommons.org/publicdomain/zero/1.0
12
- * - Apache 2.0 : https://www.apache.org/licenses/LICENSE-2.0
13
- *
14
- * You should have received a copy of both of these licenses along with this
15
- * software. If not, they may be obtained at the above URLs.
16
- */
17
-
18
- #ifndef BLAKE_ROUND_MKA_H
19
- #define BLAKE_ROUND_MKA_H
20
-
21
- #include "blake2.h"
22
- #include "blake2-impl.h"
23
-
24
- /* designed by the Lyra PHC team */
25
- static BLAKE2_INLINE uint64_t fBlaMka(uint64_t x, uint64_t y) {
26
- const uint64_t m = UINT64_C(0xFFFFFFFF);
27
- const uint64_t xy = (x & m) * (y & m);
28
- return x + y + 2 * xy;
29
- }
30
-
31
- #define G(a, b, c, d) \
32
- do { \
33
- a = fBlaMka(a, b); \
34
- d = rotr64(d ^ a, 32); \
35
- c = fBlaMka(c, d); \
36
- b = rotr64(b ^ c, 24); \
37
- a = fBlaMka(a, b); \
38
- d = rotr64(d ^ a, 16); \
39
- c = fBlaMka(c, d); \
40
- b = rotr64(b ^ c, 63); \
41
- } while ((void)0, 0)
42
-
43
- #define BLAKE2_ROUND_NOMSG(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, \
44
- v12, v13, v14, v15) \
45
- do { \
46
- G(v0, v4, v8, v12); \
47
- G(v1, v5, v9, v13); \
48
- G(v2, v6, v10, v14); \
49
- G(v3, v7, v11, v15); \
50
- G(v0, v5, v10, v15); \
51
- G(v1, v6, v11, v12); \
52
- G(v2, v7, v8, v13); \
53
- G(v3, v4, v9, v14); \
54
- } while ((void)0, 0)
55
-
56
- #endif