argon2id 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +16 -0
- data/Gemfile +9 -0
- data/LICENSE +11 -0
- data/README.md +248 -0
- data/Rakefile +57 -0
- data/argon2id.gemspec +58 -0
- data/ext/argon2id/argon2id.c +69 -0
- data/ext/argon2id/extconf.rb +17 -0
- data/ext/argon2id/libargon2/LICENSE +314 -0
- data/ext/argon2id/libargon2/argon2.c +452 -0
- data/ext/argon2id/libargon2/argon2.h +437 -0
- data/ext/argon2id/libargon2/blake2/blake2-impl.h +156 -0
- data/ext/argon2id/libargon2/blake2/blake2.h +89 -0
- data/ext/argon2id/libargon2/blake2/blake2b.c +390 -0
- data/ext/argon2id/libargon2/blake2/blamka-round-opt.h +471 -0
- data/ext/argon2id/libargon2/blake2/blamka-round-ref.h +56 -0
- data/ext/argon2id/libargon2/core.c +648 -0
- data/ext/argon2id/libargon2/core.h +228 -0
- data/ext/argon2id/libargon2/encoding.c +463 -0
- data/ext/argon2id/libargon2/encoding.h +57 -0
- data/ext/argon2id/libargon2/ref.c +194 -0
- data/ext/argon2id/libargon2/thread.c +57 -0
- data/ext/argon2id/libargon2/thread.h +67 -0
- data/lib/argon2id/password.rb +36 -0
- data/lib/argon2id/version.rb +5 -0
- data/lib/argon2id.rb +29 -0
- data/test/test_hash_encoded.rb +90 -0
- data/test/test_password.rb +86 -0
- data/test/test_verify.rb +24 -0
- metadata +121 -0
| @@ -0,0 +1,471 @@ | |
| 1 | 
            +
            /*
         | 
| 2 | 
            +
             * Argon2 reference source code package - reference C implementations
         | 
| 3 | 
            +
             *
         | 
| 4 | 
            +
             * Copyright 2015
         | 
| 5 | 
            +
             * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
         | 
| 6 | 
            +
             *
         | 
| 7 | 
            +
             * You may use this work under the terms of a Creative Commons CC0 1.0
         | 
| 8 | 
            +
             * License/Waiver or the Apache Public License 2.0, at your option. The terms of
         | 
| 9 | 
            +
             * these licenses can be found at:
         | 
| 10 | 
            +
             *
         | 
| 11 | 
            +
             * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
         | 
| 12 | 
            +
             * - Apache 2.0        : http://www.apache.org/licenses/LICENSE-2.0
         | 
| 13 | 
            +
             *
         | 
| 14 | 
            +
             * You should have received a copy of both of these licenses along with this
         | 
| 15 | 
            +
             * software. If not, they may be obtained at the above URLs.
         | 
| 16 | 
            +
             */
         | 
| 17 | 
            +
             | 
| 18 | 
            +
            #ifndef BLAKE_ROUND_MKA_OPT_H
         | 
| 19 | 
            +
            #define BLAKE_ROUND_MKA_OPT_H
         | 
| 20 | 
            +
             | 
| 21 | 
            +
            #include "blake2-impl.h"
         | 
| 22 | 
            +
             | 
| 23 | 
            +
            #include <emmintrin.h>
         | 
| 24 | 
            +
            #if defined(__SSSE3__)
         | 
| 25 | 
            +
            #include <tmmintrin.h> /* for _mm_shuffle_epi8 and _mm_alignr_epi8 */
         | 
| 26 | 
            +
            #endif
         | 
| 27 | 
            +
             | 
| 28 | 
            +
            #if defined(__XOP__) && (defined(__GNUC__) || defined(__clang__))
         | 
| 29 | 
            +
            #include <x86intrin.h>
         | 
| 30 | 
            +
            #endif
         | 
| 31 | 
            +
             | 
| 32 | 
            +
            #if !defined(__AVX512F__)
         | 
| 33 | 
            +
            #if !defined(__AVX2__)
         | 
| 34 | 
            +
            #if !defined(__XOP__)
         | 
| 35 | 
            +
            #if defined(__SSSE3__)
         | 
| 36 | 
            +
            #define r16                                                                    \
         | 
| 37 | 
            +
                (_mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9))
         | 
| 38 | 
            +
            #define r24                                                                    \
         | 
| 39 | 
            +
                (_mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10))
         | 
| 40 | 
            +
            #define _mm_roti_epi64(x, c)                                                   \
         | 
| 41 | 
            +
                (-(c) == 32)                                                               \
         | 
| 42 | 
            +
                    ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1))                      \
         | 
| 43 | 
            +
                    : (-(c) == 24)                                                         \
         | 
| 44 | 
            +
                          ? _mm_shuffle_epi8((x), r24)                                     \
         | 
| 45 | 
            +
                          : (-(c) == 16)                                                   \
         | 
| 46 | 
            +
                                ? _mm_shuffle_epi8((x), r16)                               \
         | 
| 47 | 
            +
                                : (-(c) == 63)                                             \
         | 
| 48 | 
            +
                                      ? _mm_xor_si128(_mm_srli_epi64((x), -(c)),           \
         | 
| 49 | 
            +
                                                      _mm_add_epi64((x), (x)))             \
         | 
| 50 | 
            +
                                      : _mm_xor_si128(_mm_srli_epi64((x), -(c)),           \
         | 
| 51 | 
            +
                                                      _mm_slli_epi64((x), 64 - (-(c))))
         | 
| 52 | 
            +
            #else /* defined(__SSE2__) */
         | 
| 53 | 
            +
            #define _mm_roti_epi64(r, c)                                                   \
         | 
| 54 | 
            +
                _mm_xor_si128(_mm_srli_epi64((r), -(c)), _mm_slli_epi64((r), 64 - (-(c))))
         | 
| 55 | 
            +
            #endif
         | 
| 56 | 
            +
            #else
         | 
| 57 | 
            +
            #endif
         | 
| 58 | 
            +
             | 
| 59 | 
            +
            static BLAKE2_INLINE __m128i fBlaMka(__m128i x, __m128i y) {
         | 
| 60 | 
            +
                const __m128i z = _mm_mul_epu32(x, y);
         | 
| 61 | 
            +
                return _mm_add_epi64(_mm_add_epi64(x, y), _mm_add_epi64(z, z));
         | 
| 62 | 
            +
            }
         | 
| 63 | 
            +
             | 
| 64 | 
            +
            #define G1(A0, B0, C0, D0, A1, B1, C1, D1)                                     \
         | 
| 65 | 
            +
                do {                                                                       \
         | 
| 66 | 
            +
                    A0 = fBlaMka(A0, B0);                                                  \
         | 
| 67 | 
            +
                    A1 = fBlaMka(A1, B1);                                                  \
         | 
| 68 | 
            +
                                                                                           \
         | 
| 69 | 
            +
                    D0 = _mm_xor_si128(D0, A0);                                            \
         | 
| 70 | 
            +
                    D1 = _mm_xor_si128(D1, A1);                                            \
         | 
| 71 | 
            +
                                                                                           \
         | 
| 72 | 
            +
                    D0 = _mm_roti_epi64(D0, -32);                                          \
         | 
| 73 | 
            +
                    D1 = _mm_roti_epi64(D1, -32);                                          \
         | 
| 74 | 
            +
                                                                                           \
         | 
| 75 | 
            +
                    C0 = fBlaMka(C0, D0);                                                  \
         | 
| 76 | 
            +
                    C1 = fBlaMka(C1, D1);                                                  \
         | 
| 77 | 
            +
                                                                                           \
         | 
| 78 | 
            +
                    B0 = _mm_xor_si128(B0, C0);                                            \
         | 
| 79 | 
            +
                    B1 = _mm_xor_si128(B1, C1);                                            \
         | 
| 80 | 
            +
                                                                                           \
         | 
| 81 | 
            +
                    B0 = _mm_roti_epi64(B0, -24);                                          \
         | 
| 82 | 
            +
                    B1 = _mm_roti_epi64(B1, -24);                                          \
         | 
| 83 | 
            +
                } while ((void)0, 0)
         | 
| 84 | 
            +
             | 
| 85 | 
            +
            #define G2(A0, B0, C0, D0, A1, B1, C1, D1)                                     \
         | 
| 86 | 
            +
                do {                                                                       \
         | 
| 87 | 
            +
                    A0 = fBlaMka(A0, B0);                                                  \
         | 
| 88 | 
            +
                    A1 = fBlaMka(A1, B1);                                                  \
         | 
| 89 | 
            +
                                                                                           \
         | 
| 90 | 
            +
                    D0 = _mm_xor_si128(D0, A0);                                            \
         | 
| 91 | 
            +
                    D1 = _mm_xor_si128(D1, A1);                                            \
         | 
| 92 | 
            +
                                                                                           \
         | 
| 93 | 
            +
                    D0 = _mm_roti_epi64(D0, -16);                                          \
         | 
| 94 | 
            +
                    D1 = _mm_roti_epi64(D1, -16);                                          \
         | 
| 95 | 
            +
                                                                                           \
         | 
| 96 | 
            +
                    C0 = fBlaMka(C0, D0);                                                  \
         | 
| 97 | 
            +
                    C1 = fBlaMka(C1, D1);                                                  \
         | 
| 98 | 
            +
                                                                                           \
         | 
| 99 | 
            +
                    B0 = _mm_xor_si128(B0, C0);                                            \
         | 
| 100 | 
            +
                    B1 = _mm_xor_si128(B1, C1);                                            \
         | 
| 101 | 
            +
                                                                                           \
         | 
| 102 | 
            +
                    B0 = _mm_roti_epi64(B0, -63);                                          \
         | 
| 103 | 
            +
                    B1 = _mm_roti_epi64(B1, -63);                                          \
         | 
| 104 | 
            +
                } while ((void)0, 0)
         | 
| 105 | 
            +
             | 
| 106 | 
            +
            #if defined(__SSSE3__)
         | 
| 107 | 
            +
            #define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1)                            \
         | 
| 108 | 
            +
                do {                                                                       \
         | 
| 109 | 
            +
                    __m128i t0 = _mm_alignr_epi8(B1, B0, 8);                               \
         | 
| 110 | 
            +
                    __m128i t1 = _mm_alignr_epi8(B0, B1, 8);                               \
         | 
| 111 | 
            +
                    B0 = t0;                                                               \
         | 
| 112 | 
            +
                    B1 = t1;                                                               \
         | 
| 113 | 
            +
                                                                                           \
         | 
| 114 | 
            +
                    t0 = C0;                                                               \
         | 
| 115 | 
            +
                    C0 = C1;                                                               \
         | 
| 116 | 
            +
                    C1 = t0;                                                               \
         | 
| 117 | 
            +
                                                                                           \
         | 
| 118 | 
            +
                    t0 = _mm_alignr_epi8(D1, D0, 8);                                       \
         | 
| 119 | 
            +
                    t1 = _mm_alignr_epi8(D0, D1, 8);                                       \
         | 
| 120 | 
            +
                    D0 = t1;                                                               \
         | 
| 121 | 
            +
                    D1 = t0;                                                               \
         | 
| 122 | 
            +
                } while ((void)0, 0)
         | 
| 123 | 
            +
             | 
| 124 | 
            +
            #define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1)                          \
         | 
| 125 | 
            +
                do {                                                                       \
         | 
| 126 | 
            +
                    __m128i t0 = _mm_alignr_epi8(B0, B1, 8);                               \
         | 
| 127 | 
            +
                    __m128i t1 = _mm_alignr_epi8(B1, B0, 8);                               \
         | 
| 128 | 
            +
                    B0 = t0;                                                               \
         | 
| 129 | 
            +
                    B1 = t1;                                                               \
         | 
| 130 | 
            +
                                                                                           \
         | 
| 131 | 
            +
                    t0 = C0;                                                               \
         | 
| 132 | 
            +
                    C0 = C1;                                                               \
         | 
| 133 | 
            +
                    C1 = t0;                                                               \
         | 
| 134 | 
            +
                                                                                           \
         | 
| 135 | 
            +
                    t0 = _mm_alignr_epi8(D0, D1, 8);                                       \
         | 
| 136 | 
            +
                    t1 = _mm_alignr_epi8(D1, D0, 8);                                       \
         | 
| 137 | 
            +
                    D0 = t1;                                                               \
         | 
| 138 | 
            +
                    D1 = t0;                                                               \
         | 
| 139 | 
            +
                } while ((void)0, 0)
         | 
| 140 | 
            +
            #else /* SSE2 */
         | 
| 141 | 
            +
            #define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1)                            \
         | 
| 142 | 
            +
                do {                                                                       \
         | 
| 143 | 
            +
                    __m128i t0 = D0;                                                       \
         | 
| 144 | 
            +
                    __m128i t1 = B0;                                                       \
         | 
| 145 | 
            +
                    D0 = C0;                                                               \
         | 
| 146 | 
            +
                    C0 = C1;                                                               \
         | 
| 147 | 
            +
                    C1 = D0;                                                               \
         | 
| 148 | 
            +
                    D0 = _mm_unpackhi_epi64(D1, _mm_unpacklo_epi64(t0, t0));               \
         | 
| 149 | 
            +
                    D1 = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(D1, D1));               \
         | 
| 150 | 
            +
                    B0 = _mm_unpackhi_epi64(B0, _mm_unpacklo_epi64(B1, B1));               \
         | 
| 151 | 
            +
                    B1 = _mm_unpackhi_epi64(B1, _mm_unpacklo_epi64(t1, t1));               \
         | 
| 152 | 
            +
                } while ((void)0, 0)
         | 
| 153 | 
            +
             | 
| 154 | 
            +
            #define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1)                          \
         | 
| 155 | 
            +
                do {                                                                       \
         | 
| 156 | 
            +
                    __m128i t0, t1;                                                        \
         | 
| 157 | 
            +
                    t0 = C0;                                                               \
         | 
| 158 | 
            +
                    C0 = C1;                                                               \
         | 
| 159 | 
            +
                    C1 = t0;                                                               \
         | 
| 160 | 
            +
                    t0 = B0;                                                               \
         | 
| 161 | 
            +
                    t1 = D0;                                                               \
         | 
| 162 | 
            +
                    B0 = _mm_unpackhi_epi64(B1, _mm_unpacklo_epi64(B0, B0));               \
         | 
| 163 | 
            +
                    B1 = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(B1, B1));               \
         | 
| 164 | 
            +
                    D0 = _mm_unpackhi_epi64(D0, _mm_unpacklo_epi64(D1, D1));               \
         | 
| 165 | 
            +
                    D1 = _mm_unpackhi_epi64(D1, _mm_unpacklo_epi64(t1, t1));               \
         | 
| 166 | 
            +
                } while ((void)0, 0)
         | 
| 167 | 
            +
            #endif
         | 
| 168 | 
            +
             | 
| 169 | 
            +
            #define BLAKE2_ROUND(A0, A1, B0, B1, C0, C1, D0, D1)                           \
         | 
| 170 | 
            +
                do {                                                                       \
         | 
| 171 | 
            +
                    G1(A0, B0, C0, D0, A1, B1, C1, D1);                                    \
         | 
| 172 | 
            +
                    G2(A0, B0, C0, D0, A1, B1, C1, D1);                                    \
         | 
| 173 | 
            +
                                                                                           \
         | 
| 174 | 
            +
                    DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1);                           \
         | 
| 175 | 
            +
                                                                                           \
         | 
| 176 | 
            +
                    G1(A0, B0, C0, D0, A1, B1, C1, D1);                                    \
         | 
| 177 | 
            +
                    G2(A0, B0, C0, D0, A1, B1, C1, D1);                                    \
         | 
| 178 | 
            +
                                                                                           \
         | 
| 179 | 
            +
                    UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1);                         \
         | 
| 180 | 
            +
                } while ((void)0, 0)
         | 
| 181 | 
            +
            #else /* __AVX2__ */
         | 
| 182 | 
            +
             | 
| 183 | 
            +
            #include <immintrin.h>
         | 
| 184 | 
            +
             | 
| 185 | 
            +
            #define rotr32(x)   _mm256_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1))
         | 
| 186 | 
            +
            #define rotr24(x)   _mm256_shuffle_epi8(x, _mm256_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10, 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10))
         | 
| 187 | 
            +
            #define rotr16(x)   _mm256_shuffle_epi8(x, _mm256_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9, 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9))
         | 
| 188 | 
            +
            #define rotr63(x)   _mm256_xor_si256(_mm256_srli_epi64((x), 63), _mm256_add_epi64((x), (x)))
         | 
| 189 | 
            +
             | 
| 190 | 
            +
            #define G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
         | 
| 191 | 
            +
                do { \
         | 
| 192 | 
            +
                    __m256i ml = _mm256_mul_epu32(A0, B0); \
         | 
| 193 | 
            +
                    ml = _mm256_add_epi64(ml, ml); \
         | 
| 194 | 
            +
                    A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \
         | 
| 195 | 
            +
                    D0 = _mm256_xor_si256(D0, A0); \
         | 
| 196 | 
            +
                    D0 = rotr32(D0); \
         | 
| 197 | 
            +
                    \
         | 
| 198 | 
            +
                    ml = _mm256_mul_epu32(C0, D0); \
         | 
| 199 | 
            +
                    ml = _mm256_add_epi64(ml, ml); \
         | 
| 200 | 
            +
                    C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \
         | 
| 201 | 
            +
                    \
         | 
| 202 | 
            +
                    B0 = _mm256_xor_si256(B0, C0); \
         | 
| 203 | 
            +
                    B0 = rotr24(B0); \
         | 
| 204 | 
            +
                    \
         | 
| 205 | 
            +
                    ml = _mm256_mul_epu32(A1, B1); \
         | 
| 206 | 
            +
                    ml = _mm256_add_epi64(ml, ml); \
         | 
| 207 | 
            +
                    A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \
         | 
| 208 | 
            +
                    D1 = _mm256_xor_si256(D1, A1); \
         | 
| 209 | 
            +
                    D1 = rotr32(D1); \
         | 
| 210 | 
            +
                    \
         | 
| 211 | 
            +
                    ml = _mm256_mul_epu32(C1, D1); \
         | 
| 212 | 
            +
                    ml = _mm256_add_epi64(ml, ml); \
         | 
| 213 | 
            +
                    C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \
         | 
| 214 | 
            +
                    \
         | 
| 215 | 
            +
                    B1 = _mm256_xor_si256(B1, C1); \
         | 
| 216 | 
            +
                    B1 = rotr24(B1); \
         | 
| 217 | 
            +
                } while((void)0, 0);
         | 
| 218 | 
            +
             | 
| 219 | 
            +
            #define G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
         | 
| 220 | 
            +
                do { \
         | 
| 221 | 
            +
                    __m256i ml = _mm256_mul_epu32(A0, B0); \
         | 
| 222 | 
            +
                    ml = _mm256_add_epi64(ml, ml); \
         | 
| 223 | 
            +
                    A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \
         | 
| 224 | 
            +
                    D0 = _mm256_xor_si256(D0, A0); \
         | 
| 225 | 
            +
                    D0 = rotr16(D0); \
         | 
| 226 | 
            +
                    \
         | 
| 227 | 
            +
                    ml = _mm256_mul_epu32(C0, D0); \
         | 
| 228 | 
            +
                    ml = _mm256_add_epi64(ml, ml); \
         | 
| 229 | 
            +
                    C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \
         | 
| 230 | 
            +
                    B0 = _mm256_xor_si256(B0, C0); \
         | 
| 231 | 
            +
                    B0 = rotr63(B0); \
         | 
| 232 | 
            +
                    \
         | 
| 233 | 
            +
                    ml = _mm256_mul_epu32(A1, B1); \
         | 
| 234 | 
            +
                    ml = _mm256_add_epi64(ml, ml); \
         | 
| 235 | 
            +
                    A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \
         | 
| 236 | 
            +
                    D1 = _mm256_xor_si256(D1, A1); \
         | 
| 237 | 
            +
                    D1 = rotr16(D1); \
         | 
| 238 | 
            +
                    \
         | 
| 239 | 
            +
                    ml = _mm256_mul_epu32(C1, D1); \
         | 
| 240 | 
            +
                    ml = _mm256_add_epi64(ml, ml); \
         | 
| 241 | 
            +
                    C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \
         | 
| 242 | 
            +
                    B1 = _mm256_xor_si256(B1, C1); \
         | 
| 243 | 
            +
                    B1 = rotr63(B1); \
         | 
| 244 | 
            +
                } while((void)0, 0);
         | 
| 245 | 
            +
             | 
| 246 | 
            +
            #define DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
         | 
| 247 | 
            +
                do { \
         | 
| 248 | 
            +
                    B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \
         | 
| 249 | 
            +
                    C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
         | 
| 250 | 
            +
                    D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \
         | 
| 251 | 
            +
                    \
         | 
| 252 | 
            +
                    B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \
         | 
| 253 | 
            +
                    C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
         | 
| 254 | 
            +
                    D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \
         | 
| 255 | 
            +
                } while((void)0, 0);
         | 
| 256 | 
            +
             | 
| 257 | 
            +
            #define DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
         | 
| 258 | 
            +
                do { \
         | 
| 259 | 
            +
                    __m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \
         | 
| 260 | 
            +
                    __m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \
         | 
| 261 | 
            +
                    B1 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
         | 
| 262 | 
            +
                    B0 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
         | 
| 263 | 
            +
                    \
         | 
| 264 | 
            +
                    tmp1 = C0; \
         | 
| 265 | 
            +
                    C0 = C1; \
         | 
| 266 | 
            +
                    C1 = tmp1; \
         | 
| 267 | 
            +
                    \
         | 
| 268 | 
            +
                    tmp1 = _mm256_blend_epi32(D0, D1, 0xCC); \
         | 
| 269 | 
            +
                    tmp2 = _mm256_blend_epi32(D0, D1, 0x33); \
         | 
| 270 | 
            +
                    D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
         | 
| 271 | 
            +
                    D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
         | 
| 272 | 
            +
                } while(0);
         | 
| 273 | 
            +
             | 
| 274 | 
            +
            #define UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
         | 
| 275 | 
            +
                do { \
         | 
| 276 | 
            +
                    B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \
         | 
| 277 | 
            +
                    C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
         | 
| 278 | 
            +
                    D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \
         | 
| 279 | 
            +
                    \
         | 
| 280 | 
            +
                    B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \
         | 
| 281 | 
            +
                    C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
         | 
| 282 | 
            +
                    D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \
         | 
| 283 | 
            +
                } while((void)0, 0);
         | 
| 284 | 
            +
             | 
| 285 | 
            +
            #define UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
         | 
| 286 | 
            +
                do { \
         | 
| 287 | 
            +
                    __m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \
         | 
| 288 | 
            +
                    __m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \
         | 
| 289 | 
            +
                    B0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
         | 
| 290 | 
            +
                    B1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
         | 
| 291 | 
            +
                    \
         | 
| 292 | 
            +
                    tmp1 = C0; \
         | 
| 293 | 
            +
                    C0 = C1; \
         | 
| 294 | 
            +
                    C1 = tmp1; \
         | 
| 295 | 
            +
                    \
         | 
| 296 | 
            +
                    tmp1 = _mm256_blend_epi32(D0, D1, 0x33); \
         | 
| 297 | 
            +
                    tmp2 = _mm256_blend_epi32(D0, D1, 0xCC); \
         | 
| 298 | 
            +
                    D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
         | 
| 299 | 
            +
                    D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
         | 
| 300 | 
            +
                } while((void)0, 0);
         | 
| 301 | 
            +
             | 
| 302 | 
            +
            #define BLAKE2_ROUND_1(A0, A1, B0, B1, C0, C1, D0, D1) \
         | 
| 303 | 
            +
                do{ \
         | 
| 304 | 
            +
                    G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
         | 
| 305 | 
            +
                    G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
         | 
| 306 | 
            +
                    \
         | 
| 307 | 
            +
                    DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
         | 
| 308 | 
            +
                    \
         | 
| 309 | 
            +
                    G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
         | 
| 310 | 
            +
                    G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
         | 
| 311 | 
            +
                    \
         | 
| 312 | 
            +
                    UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
         | 
| 313 | 
            +
                } while((void)0, 0);
         | 
| 314 | 
            +
             | 
| 315 | 
            +
            #define BLAKE2_ROUND_2(A0, A1, B0, B1, C0, C1, D0, D1) \
         | 
| 316 | 
            +
                do{ \
         | 
| 317 | 
            +
                    G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
         | 
| 318 | 
            +
                    G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
         | 
| 319 | 
            +
                    \
         | 
| 320 | 
            +
                    DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
         | 
| 321 | 
            +
                    \
         | 
| 322 | 
            +
                    G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
         | 
| 323 | 
            +
                    G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
         | 
| 324 | 
            +
                    \
         | 
| 325 | 
            +
                    UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
         | 
| 326 | 
            +
                } while((void)0, 0);
         | 
| 327 | 
            +
             | 
| 328 | 
            +
            #endif /* __AVX2__ */
         | 
| 329 | 
            +
             | 
| 330 | 
            +
            #else /* __AVX512F__ */
         | 
| 331 | 
            +
             | 
| 332 | 
            +
            #include <immintrin.h>
         | 
| 333 | 
            +
             | 
| 334 | 
            +
            #define ror64(x, n) _mm512_ror_epi64((x), (n))
         | 
| 335 | 
            +
             | 
| 336 | 
            +
            static __m512i muladd(__m512i x, __m512i y)
         | 
| 337 | 
            +
            {
         | 
| 338 | 
            +
                __m512i z = _mm512_mul_epu32(x, y);
         | 
| 339 | 
            +
                return _mm512_add_epi64(_mm512_add_epi64(x, y), _mm512_add_epi64(z, z));
         | 
| 340 | 
            +
            }
         | 
| 341 | 
            +
             | 
| 342 | 
            +
            #define G1(A0, B0, C0, D0, A1, B1, C1, D1) \
         | 
| 343 | 
            +
                do { \
         | 
| 344 | 
            +
                    A0 = muladd(A0, B0); \
         | 
| 345 | 
            +
                    A1 = muladd(A1, B1); \
         | 
| 346 | 
            +
            \
         | 
| 347 | 
            +
                    D0 = _mm512_xor_si512(D0, A0); \
         | 
| 348 | 
            +
                    D1 = _mm512_xor_si512(D1, A1); \
         | 
| 349 | 
            +
            \
         | 
| 350 | 
            +
                    D0 = ror64(D0, 32); \
         | 
| 351 | 
            +
                    D1 = ror64(D1, 32); \
         | 
| 352 | 
            +
            \
         | 
| 353 | 
            +
                    C0 = muladd(C0, D0); \
         | 
| 354 | 
            +
                    C1 = muladd(C1, D1); \
         | 
| 355 | 
            +
            \
         | 
| 356 | 
            +
                    B0 = _mm512_xor_si512(B0, C0); \
         | 
| 357 | 
            +
                    B1 = _mm512_xor_si512(B1, C1); \
         | 
| 358 | 
            +
            \
         | 
| 359 | 
            +
                    B0 = ror64(B0, 24); \
         | 
| 360 | 
            +
                    B1 = ror64(B1, 24); \
         | 
| 361 | 
            +
                } while ((void)0, 0)
         | 
| 362 | 
            +
             | 
| 363 | 
            +
            #define G2(A0, B0, C0, D0, A1, B1, C1, D1) \
         | 
| 364 | 
            +
                do { \
         | 
| 365 | 
            +
                    A0 = muladd(A0, B0); \
         | 
| 366 | 
            +
                    A1 = muladd(A1, B1); \
         | 
| 367 | 
            +
            \
         | 
| 368 | 
            +
                    D0 = _mm512_xor_si512(D0, A0); \
         | 
| 369 | 
            +
                    D1 = _mm512_xor_si512(D1, A1); \
         | 
| 370 | 
            +
            \
         | 
| 371 | 
            +
                    D0 = ror64(D0, 16); \
         | 
| 372 | 
            +
                    D1 = ror64(D1, 16); \
         | 
| 373 | 
            +
            \
         | 
| 374 | 
            +
                    C0 = muladd(C0, D0); \
         | 
| 375 | 
            +
                    C1 = muladd(C1, D1); \
         | 
| 376 | 
            +
            \
         | 
| 377 | 
            +
                    B0 = _mm512_xor_si512(B0, C0); \
         | 
| 378 | 
            +
                    B1 = _mm512_xor_si512(B1, C1); \
         | 
| 379 | 
            +
            \
         | 
| 380 | 
            +
                    B0 = ror64(B0, 63); \
         | 
| 381 | 
            +
                    B1 = ror64(B1, 63); \
         | 
| 382 | 
            +
                } while ((void)0, 0)
         | 
| 383 | 
            +
             | 
| 384 | 
            +
            #define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
         | 
| 385 | 
            +
                do { \
         | 
| 386 | 
            +
                    B0 = _mm512_permutex_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \
         | 
| 387 | 
            +
                    B1 = _mm512_permutex_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \
         | 
| 388 | 
            +
            \
         | 
| 389 | 
            +
                    C0 = _mm512_permutex_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
         | 
| 390 | 
            +
                    C1 = _mm512_permutex_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
         | 
| 391 | 
            +
            \
         | 
| 392 | 
            +
                    D0 = _mm512_permutex_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \
         | 
| 393 | 
            +
                    D1 = _mm512_permutex_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \
         | 
| 394 | 
            +
                } while ((void)0, 0)
         | 
| 395 | 
            +
             | 
| 396 | 
            +
            #define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
         | 
| 397 | 
            +
                do { \
         | 
| 398 | 
            +
                    B0 = _mm512_permutex_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \
         | 
| 399 | 
            +
                    B1 = _mm512_permutex_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \
         | 
| 400 | 
            +
            \
         | 
| 401 | 
            +
                    C0 = _mm512_permutex_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
         | 
| 402 | 
            +
                    C1 = _mm512_permutex_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
         | 
| 403 | 
            +
            \
         | 
| 404 | 
            +
                    D0 = _mm512_permutex_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \
         | 
| 405 | 
            +
                    D1 = _mm512_permutex_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \
         | 
| 406 | 
            +
                } while ((void)0, 0)
         | 
| 407 | 
            +
             | 
| 408 | 
            +
            #define BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1) \
         | 
| 409 | 
            +
                do { \
         | 
| 410 | 
            +
                    G1(A0, B0, C0, D0, A1, B1, C1, D1); \
         | 
| 411 | 
            +
                    G2(A0, B0, C0, D0, A1, B1, C1, D1); \
         | 
| 412 | 
            +
            \
         | 
| 413 | 
            +
                    DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
         | 
| 414 | 
            +
            \
         | 
| 415 | 
            +
                    G1(A0, B0, C0, D0, A1, B1, C1, D1); \
         | 
| 416 | 
            +
                    G2(A0, B0, C0, D0, A1, B1, C1, D1); \
         | 
| 417 | 
            +
            \
         | 
| 418 | 
            +
                    UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
         | 
| 419 | 
            +
                } while ((void)0, 0)
         | 
| 420 | 
            +
             | 
| 421 | 
            +
            #define SWAP_HALVES(A0, A1) \
         | 
| 422 | 
            +
                do { \
         | 
| 423 | 
            +
                    __m512i t0, t1; \
         | 
| 424 | 
            +
                    t0 = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(1, 0, 1, 0)); \
         | 
| 425 | 
            +
                    t1 = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(3, 2, 3, 2)); \
         | 
| 426 | 
            +
                    A0 = t0; \
         | 
| 427 | 
            +
                    A1 = t1; \
         | 
| 428 | 
            +
                } while((void)0, 0)
         | 
| 429 | 
            +
             | 
| 430 | 
            +
            #define SWAP_QUARTERS(A0, A1) \
         | 
| 431 | 
            +
                do { \
         | 
| 432 | 
            +
                    SWAP_HALVES(A0, A1); \
         | 
| 433 | 
            +
                    A0 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A0); \
         | 
| 434 | 
            +
                    A1 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A1); \
         | 
| 435 | 
            +
                } while((void)0, 0)
         | 
| 436 | 
            +
             | 
| 437 | 
            +
            #define UNSWAP_QUARTERS(A0, A1) \
         | 
| 438 | 
            +
                do { \
         | 
| 439 | 
            +
                    A0 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A0); \
         | 
| 440 | 
            +
                    A1 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A1); \
         | 
| 441 | 
            +
                    SWAP_HALVES(A0, A1); \
         | 
| 442 | 
            +
                } while((void)0, 0)
         | 
| 443 | 
            +
             | 
| 444 | 
            +
            #define BLAKE2_ROUND_1(A0, C0, B0, D0, A1, C1, B1, D1) \
         | 
| 445 | 
            +
                do { \
         | 
| 446 | 
            +
                    SWAP_HALVES(A0, B0); \
         | 
| 447 | 
            +
                    SWAP_HALVES(C0, D0); \
         | 
| 448 | 
            +
                    SWAP_HALVES(A1, B1); \
         | 
| 449 | 
            +
                    SWAP_HALVES(C1, D1); \
         | 
| 450 | 
            +
                    BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1); \
         | 
| 451 | 
            +
                    SWAP_HALVES(A0, B0); \
         | 
| 452 | 
            +
                    SWAP_HALVES(C0, D0); \
         | 
| 453 | 
            +
                    SWAP_HALVES(A1, B1); \
         | 
| 454 | 
            +
                    SWAP_HALVES(C1, D1); \
         | 
| 455 | 
            +
                } while ((void)0, 0)
         | 
| 456 | 
            +
             | 
| 457 | 
            +
            #define BLAKE2_ROUND_2(A0, A1, B0, B1, C0, C1, D0, D1) \
         | 
| 458 | 
            +
                do { \
         | 
| 459 | 
            +
                    SWAP_QUARTERS(A0, A1); \
         | 
| 460 | 
            +
                    SWAP_QUARTERS(B0, B1); \
         | 
| 461 | 
            +
                    SWAP_QUARTERS(C0, C1); \
         | 
| 462 | 
            +
                    SWAP_QUARTERS(D0, D1); \
         | 
| 463 | 
            +
                    BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1); \
         | 
| 464 | 
            +
                    UNSWAP_QUARTERS(A0, A1); \
         | 
| 465 | 
            +
                    UNSWAP_QUARTERS(B0, B1); \
         | 
| 466 | 
            +
                    UNSWAP_QUARTERS(C0, C1); \
         | 
| 467 | 
            +
                    UNSWAP_QUARTERS(D0, D1); \
         | 
| 468 | 
            +
                } while ((void)0, 0)
         | 
| 469 | 
            +
             | 
| 470 | 
            +
            #endif /* __AVX512F__ */
         | 
| 471 | 
            +
            #endif /* BLAKE_ROUND_MKA_OPT_H */
         | 
| @@ -0,0 +1,56 @@ | |
| 1 | 
            +
            /*
         | 
| 2 | 
            +
             * Argon2 reference source code package - reference C implementations
         | 
| 3 | 
            +
             *
         | 
| 4 | 
            +
             * Copyright 2015
         | 
| 5 | 
            +
             * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
         | 
| 6 | 
            +
             *
         | 
| 7 | 
            +
             * You may use this work under the terms of a Creative Commons CC0 1.0
         | 
| 8 | 
            +
             * License/Waiver or the Apache Public License 2.0, at your option. The terms of
         | 
| 9 | 
            +
             * these licenses can be found at:
         | 
| 10 | 
            +
             *
         | 
| 11 | 
            +
             * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
         | 
| 12 | 
            +
             * - Apache 2.0        : http://www.apache.org/licenses/LICENSE-2.0
         | 
| 13 | 
            +
             *
         | 
| 14 | 
            +
             * You should have received a copy of both of these licenses along with this
         | 
| 15 | 
            +
             * software. If not, they may be obtained at the above URLs.
         | 
| 16 | 
            +
             */
         | 
| 17 | 
            +
             | 
| 18 | 
            +
            #ifndef BLAKE_ROUND_MKA_H
         | 
| 19 | 
            +
            #define BLAKE_ROUND_MKA_H
         | 
| 20 | 
            +
             | 
| 21 | 
            +
            #include "blake2.h"
         | 
| 22 | 
            +
            #include "blake2-impl.h"
         | 
| 23 | 
            +
             | 
| 24 | 
            +
            /* designed by the Lyra PHC team */
         | 
| 25 | 
            +
            static BLAKE2_INLINE uint64_t fBlaMka(uint64_t x, uint64_t y) {
         | 
| 26 | 
            +
                const uint64_t m = UINT64_C(0xFFFFFFFF);
         | 
| 27 | 
            +
                const uint64_t xy = (x & m) * (y & m);
         | 
| 28 | 
            +
                return x + y + 2 * xy;
         | 
| 29 | 
            +
            }
         | 
| 30 | 
            +
             | 
| 31 | 
            +
            #define G(a, b, c, d)                                                          \
         | 
| 32 | 
            +
                do {                                                                       \
         | 
| 33 | 
            +
                    a = fBlaMka(a, b);                                                     \
         | 
| 34 | 
            +
                    d = rotr64(d ^ a, 32);                                                 \
         | 
| 35 | 
            +
                    c = fBlaMka(c, d);                                                     \
         | 
| 36 | 
            +
                    b = rotr64(b ^ c, 24);                                                 \
         | 
| 37 | 
            +
                    a = fBlaMka(a, b);                                                     \
         | 
| 38 | 
            +
                    d = rotr64(d ^ a, 16);                                                 \
         | 
| 39 | 
            +
                    c = fBlaMka(c, d);                                                     \
         | 
| 40 | 
            +
                    b = rotr64(b ^ c, 63);                                                 \
         | 
| 41 | 
            +
                } while ((void)0, 0)
         | 
| 42 | 
            +
             | 
| 43 | 
            +
            #define BLAKE2_ROUND_NOMSG(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11,   \
         | 
| 44 | 
            +
                                       v12, v13, v14, v15)                                 \
         | 
| 45 | 
            +
                do {                                                                       \
         | 
| 46 | 
            +
                    G(v0, v4, v8, v12);                                                    \
         | 
| 47 | 
            +
                    G(v1, v5, v9, v13);                                                    \
         | 
| 48 | 
            +
                    G(v2, v6, v10, v14);                                                   \
         | 
| 49 | 
            +
                    G(v3, v7, v11, v15);                                                   \
         | 
| 50 | 
            +
                    G(v0, v5, v10, v15);                                                   \
         | 
| 51 | 
            +
                    G(v1, v6, v11, v12);                                                   \
         | 
| 52 | 
            +
                    G(v2, v7, v8, v13);                                                    \
         | 
| 53 | 
            +
                    G(v3, v4, v9, v14);                                                    \
         | 
| 54 | 
            +
                } while ((void)0, 0)
         | 
| 55 | 
            +
             | 
| 56 | 
            +
            #endif
         |