rbnacl-libsodium 1.0.8 → 1.0.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +23 -0
- data/CHANGES.md +5 -0
- data/Gemfile +5 -2
- data/Rakefile +5 -0
- data/ext/rbnacl/extconf.rb +2 -1
- data/lib/rbnacl/libsodium.rb +8 -2
- data/lib/rbnacl/libsodium/version.rb +1 -1
- data/vendor/libsodium/AUTHORS +14 -0
- data/vendor/libsodium/ChangeLog +26 -0
- data/vendor/libsodium/LICENSE +1 -1
- data/vendor/libsodium/Makefile.am +1 -0
- data/vendor/libsodium/Makefile.in +9 -0
- data/vendor/libsodium/README.markdown +7 -0
- data/vendor/libsodium/aclocal.m4 +1 -0
- data/vendor/libsodium/appveyor.yml +25 -0
- data/vendor/libsodium/autom4te.cache/output.1 +640 -126
- data/vendor/libsodium/autom4te.cache/output.6 +19049 -0
- data/vendor/libsodium/autom4te.cache/requests +1151 -914
- data/vendor/libsodium/autom4te.cache/traces.1 +472 -426
- data/vendor/libsodium/autom4te.cache/traces.6 +3193 -0
- data/vendor/libsodium/builds/msvc/version.h +2 -2
- data/vendor/libsodium/builds/msvc/vs2010/libsodium.sln +50 -79
- data/vendor/libsodium/builds/msvc/vs2010/libsodium/libsodium.vcxproj +20 -8
- data/vendor/libsodium/builds/msvc/vs2010/libsodium/libsodium.vcxproj.filters +208 -166
- data/vendor/libsodium/builds/msvc/vs2012/libsodium.sln +50 -79
- data/vendor/libsodium/builds/msvc/vs2012/libsodium/libsodium.vcxproj +20 -8
- data/vendor/libsodium/builds/msvc/vs2012/libsodium/libsodium.vcxproj.filters +206 -164
- data/vendor/libsodium/builds/msvc/vs2013/libsodium.sln +52 -81
- data/vendor/libsodium/builds/msvc/vs2013/libsodium/libsodium.vcxproj +20 -8
- data/vendor/libsodium/builds/msvc/vs2013/libsodium/libsodium.vcxproj.filters +206 -164
- data/vendor/libsodium/builds/msvc/vs2015/libsodium.sln +52 -81
- data/vendor/libsodium/builds/msvc/vs2015/libsodium/libsodium.vcxproj +20 -8
- data/vendor/libsodium/builds/msvc/vs2015/libsodium/libsodium.vcxproj.filters +206 -164
- data/vendor/libsodium/configure +639 -125
- data/vendor/libsodium/configure.ac +94 -16
- data/vendor/libsodium/dist-build/Makefile.in +9 -0
- data/vendor/libsodium/dist-build/emscripten-symbols.def +370 -0
- data/vendor/libsodium/dist-build/emscripten.sh +9 -3
- data/vendor/libsodium/dist-build/generate-emscripten-symbols.sh +43 -0
- data/vendor/libsodium/libsodium-uninstalled.pc.in +1 -1
- data/vendor/libsodium/libsodium.pc.in +1 -1
- data/vendor/libsodium/libsodium.vcxproj +70 -66
- data/vendor/libsodium/libsodium.vcxproj.filters +204 -192
- data/vendor/libsodium/m4/ax_valgrind_check.m4 +190 -0
- data/vendor/libsodium/msvc-scripts/Makefile.in +9 -0
- data/vendor/libsodium/msvc-scripts/process.bat +2 -2
- data/vendor/libsodium/src/Makefile.in +9 -0
- data/vendor/libsodium/src/libsodium/Makefile.am +31 -6
- data/vendor/libsodium/src/libsodium/Makefile.in +238 -42
- data/vendor/libsodium/src/libsodium/crypto_aead/aes256gcm/aesni/aead_aes256gcm_aesni.c +234 -38
- data/vendor/libsodium/src/libsodium/crypto_aead/chacha20poly1305/sodium/aead_chacha20poly1305.c +208 -118
- data/vendor/libsodium/src/libsodium/crypto_box/crypto_box_seal.c +2 -2
- data/vendor/libsodium/src/libsodium/crypto_box/curve25519xsalsa20poly1305/ref/before_curve25519xsalsa20poly1305.c +1 -4
- data/vendor/libsodium/src/libsodium/crypto_core/curve25519/ref10/curve25519_ref10.c +1799 -1790
- data/vendor/libsodium/src/libsodium/crypto_core/curve25519/ref10/curve25519_ref10.h +39 -39
- data/vendor/libsodium/src/libsodium/crypto_core/hchacha20/core_hchacha20.c +86 -0
- data/vendor/libsodium/src/libsodium/crypto_core/hchacha20/core_hchacha20.h +28 -0
- data/vendor/libsodium/src/libsodium/crypto_core/hsalsa20/ref2/core_hsalsa20.c +38 -46
- data/vendor/libsodium/src/libsodium/crypto_core/salsa20/ref/core_salsa20.c +47 -55
- data/vendor/libsodium/src/libsodium/crypto_core/salsa2012/ref/core_salsa2012.c +47 -55
- data/vendor/libsodium/src/libsodium/crypto_core/salsa208/ref/core_salsa208.c +47 -55
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/generichash_blake2_api.c +7 -0
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2-impl.h +0 -89
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2.h +50 -141
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-compress-avx2.c +45 -0
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-compress-avx2.h +123 -0
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-compress-ref.c +3 -2
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-compress-sse41.c +2 -2
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/{blake2b-round.h → blake2b-compress-sse41.h} +2 -28
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-compress-ssse3.c +2 -4
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-compress-ssse3.h +97 -0
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-load-avx2.h +339 -0
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-load-sse2.h +0 -2
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-load-sse41.h +0 -2
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-ref.c +29 -18
- data/vendor/libsodium/src/libsodium/crypto_hash/sha256/cp/hash_sha256.c +4 -43
- data/vendor/libsodium/src/libsodium/crypto_hash/sha512/cp/hash_sha512.c +3 -32
- data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna.h +1 -20
- data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna32.h +22 -41
- data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna64.h +12 -39
- data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/sse2/poly1305_sse2.c +2 -4
- data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/sse2/poly1305_sse2.h +1 -20
- data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2-core.c +570 -0
- data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2-core.h +198 -0
- data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2-encoding.c +444 -0
- data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2-encoding.h +32 -0
- data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2-fill-block-ref.c +229 -0
- data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2-fill-block-ssse3.c +222 -0
- data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2-impl.h +40 -0
- data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2.c +238 -0
- data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2.h +251 -0
- data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/blake2b-long.c +80 -0
- data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/blake2b-long.h +8 -0
- data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/blamka-round-ref.h +38 -0
- data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/blamka-round-ssse3.h +117 -0
- data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/pwhash_argon2i.c +164 -0
- data/vendor/libsodium/src/libsodium/crypto_pwhash/crypto_pwhash.c +106 -0
- data/vendor/libsodium/src/libsodium/crypto_pwhash/scryptsalsa208sha256/crypto_scrypt-common.c +1 -1
- data/vendor/libsodium/src/libsodium/crypto_pwhash/scryptsalsa208sha256/crypto_scrypt.h +4 -4
- data/vendor/libsodium/src/libsodium/crypto_pwhash/scryptsalsa208sha256/nosse/pwhash_scryptsalsa208sha256_nosse.c +186 -186
- data/vendor/libsodium/src/libsodium/crypto_pwhash/scryptsalsa208sha256/pbkdf2-sha256.c +2 -2
- data/vendor/libsodium/src/libsodium/crypto_pwhash/scryptsalsa208sha256/pwhash_scryptsalsa208sha256.c +3 -2
- data/vendor/libsodium/src/libsodium/crypto_pwhash/scryptsalsa208sha256/scrypt_platform.c +33 -33
- data/vendor/libsodium/src/libsodium/crypto_pwhash/scryptsalsa208sha256/sse/pwhash_scryptsalsa208sha256_sse.c +253 -254
- data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/donna_c64/curve25519_donna_c64.c +16 -17
- data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/donna_c64/curve25519_donna_c64.h +1 -0
- data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/ref10/x25519_ref10.c +11 -11
- data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/ref10/x25519_ref10.h +1 -0
- data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/consts_namespace.h +1 -1
- data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/fe.h +3 -2
- data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/fe51.h +5 -3
- data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/fe51_invert.c +41 -41
- data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/fe51_mul.S +10 -2
- data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/fe51_namespace.h +1 -1
- data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/fe51_nsquare.S +4 -0
- data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/fe51_pack.S +4 -0
- data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/fe_frombytes_sandy2x.c +31 -32
- data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/ladder.S +4 -0
- data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/ladder.h +1 -1
- data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/ladder_base.S +4 -0
- data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/ladder_base.h +1 -1
- data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/ladder_base_namespace.h +1 -1
- data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/ladder_namespace.h +1 -1
- data/vendor/libsodium/src/libsodium/crypto_secretbox/crypto_secretbox_easy.c +2 -6
- data/vendor/libsodium/src/libsodium/crypto_shorthash/siphash24/ref/shorthash_siphash24.c +8 -28
- data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/open.c +75 -0
- data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/portable/afternm_aes128ctr.c +6 -6
- data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/portable/common.h +1 -18
- data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/portable/int128_aes128ctr.c +20 -20
- data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/portable/types.h +4 -4
- data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/portable/xor_afternm_aes128ctr.c +6 -6
- data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/ref/stream_chacha20_ref.c +56 -77
- data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/ref/stream_chacha20_ref.h +1 -0
- data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/vec/stream_chacha20_vec.h +1 -0
- data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/ref/stream_salsa20_ref.c +2 -8
- data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/ref/xor_salsa20_ref.c +2 -8
- data/vendor/libsodium/src/libsodium/crypto_stream/salsa2012/ref/stream_salsa2012.c +2 -8
- data/vendor/libsodium/src/libsodium/crypto_stream/salsa2012/ref/xor_salsa2012.c +2 -8
- data/vendor/libsodium/src/libsodium/crypto_stream/salsa208/ref/stream_salsa208.c +2 -8
- data/vendor/libsodium/src/libsodium/crypto_stream/salsa208/ref/xor_salsa208.c +2 -8
- data/vendor/libsodium/src/libsodium/crypto_stream/xsalsa20/ref/stream_xsalsa20.c +1 -5
- data/vendor/libsodium/src/libsodium/crypto_stream/xsalsa20/ref/xor_xsalsa20.c +1 -5
- data/vendor/libsodium/src/libsodium/include/Makefile.am +3 -0
- data/vendor/libsodium/src/libsodium/include/Makefile.in +19 -8
- data/vendor/libsodium/src/libsodium/include/sodium.h +3 -0
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_aead_aes256gcm.h +50 -0
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_aead_chacha20poly1305.h +94 -22
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_box_curve25519xsalsa20poly1305.h +6 -6
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_core_hchacha20.h +35 -0
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_generichash_blake2b.h +3 -0
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_pwhash.h +89 -0
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_pwhash_argon2i.h +86 -0
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_secretbox_xsalsa20poly1305.h +6 -6
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_sign_edwards25519sha512batch.h +0 -11
- data/vendor/libsodium/src/libsodium/include/sodium/runtime.h +3 -0
- data/vendor/libsodium/src/libsodium/randombytes/randombytes.c +3 -0
- data/vendor/libsodium/src/libsodium/randombytes/salsa20/randombytes_salsa20_random.c +5 -1
- data/vendor/libsodium/src/libsodium/sodium/common.h +150 -0
- data/vendor/libsodium/src/libsodium/sodium/core.c +3 -1
- data/vendor/libsodium/src/libsodium/sodium/runtime.c +37 -19
- data/vendor/libsodium/src/libsodium/sodium/utils.c +18 -9
- data/vendor/libsodium/test/Makefile.in +9 -0
- data/vendor/libsodium/test/default/Makefile.am +10 -0
- data/vendor/libsodium/test/default/Makefile.in +53 -20
- data/vendor/libsodium/test/default/aead_aes256gcm.c +43 -17
- data/vendor/libsodium/test/default/aead_chacha20poly1305.c +179 -86
- data/vendor/libsodium/test/default/auth7.c +5 -5
- data/vendor/libsodium/test/default/box.c +4 -4
- data/vendor/libsodium/test/default/box2.c +1 -1
- data/vendor/libsodium/test/default/core6.c +1 -1
- data/vendor/libsodium/test/default/generichash.c +12 -1
- data/vendor/libsodium/test/default/generichash2.c +2 -2
- data/vendor/libsodium/test/default/generichash3.c +21 -0
- data/vendor/libsodium/test/default/pwhash.c +186 -168
- data/vendor/libsodium/test/default/pwhash.exp +11 -30
- data/vendor/libsodium/test/default/pwhash_scrypt.c +349 -0
- data/vendor/libsodium/test/default/pwhash_scrypt.exp +31 -0
- data/vendor/libsodium/test/default/secretbox.c +1 -1
- data/vendor/libsodium/test/default/secretbox2.c +1 -1
- data/vendor/libsodium/test/default/sign.c +15 -0
- data/vendor/libsodium/test/default/sodium_utils2.c +8 -3
- data/vendor/libsodium/test/default/sodium_utils3.c +4 -2
- data/vendor/libsodium/test/default/verify1.c +0 -4
- data/vendor/libsodium/test/quirks/quirks.h +3 -0
- metadata +37 -22
- data/vendor/libsodium/builds/msvc/vs2010/test/test.props +0 -43
- data/vendor/libsodium/builds/msvc/vs2010/test/test.runner.bat +0 -78
- data/vendor/libsodium/builds/msvc/vs2010/test/test.vcxproj +0 -244
- data/vendor/libsodium/builds/msvc/vs2010/test/test.vcxproj.filters +0 -192
- data/vendor/libsodium/builds/msvc/vs2012/test/test.props +0 -43
- data/vendor/libsodium/builds/msvc/vs2012/test/test.runner.bat +0 -78
- data/vendor/libsodium/builds/msvc/vs2012/test/test.vcxproj +0 -244
- data/vendor/libsodium/builds/msvc/vs2012/test/test.vcxproj.filters +0 -192
- data/vendor/libsodium/builds/msvc/vs2013/test/test.props +0 -43
- data/vendor/libsodium/builds/msvc/vs2013/test/test.runner.bat +0 -78
- data/vendor/libsodium/builds/msvc/vs2013/test/test.vcxproj +0 -244
- data/vendor/libsodium/builds/msvc/vs2013/test/test.vcxproj.filters +0 -192
- data/vendor/libsodium/builds/msvc/vs2015/test/test.props +0 -43
- data/vendor/libsodium/builds/msvc/vs2015/test/test.runner.bat +0 -78
- data/vendor/libsodium/builds/msvc/vs2015/test/test.vcxproj +0 -244
- data/vendor/libsodium/builds/msvc/vs2015/test/test.vcxproj.filters +0 -192
- data/vendor/libsodium/src/libsodium/crypto_pwhash/scryptsalsa208sha256/sysendian.h +0 -146
- data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/portable/common_aes128ctr.c +0 -64
@@ -0,0 +1,45 @@
|
|
1
|
+
|
2
|
+
#define BLAKE2_USE_SSSE3
|
3
|
+
#define BLAKE2_USE_SSE41
|
4
|
+
#define BLAKE2_USE_AVX2
|
5
|
+
|
6
|
+
#include <stdint.h>
|
7
|
+
#include <string.h>
|
8
|
+
|
9
|
+
#if (defined(HAVE_AVX2INTRIN_H) && defined(HAVE_EMMINTRIN_H) && defined(HAVE_TMMINTRIN_H) && defined(HAVE_SMMINTRIN_H)) || \
|
10
|
+
(defined(_MSC_VER) && (defined(_M_X64) || defined(_M_AMD64)))
|
11
|
+
|
12
|
+
#pragma GCC target("sse2")
|
13
|
+
#pragma GCC target("ssse3")
|
14
|
+
#pragma GCC target("sse4.1")
|
15
|
+
#pragma GCC target("avx2")
|
16
|
+
|
17
|
+
#include <emmintrin.h>
|
18
|
+
#include <tmmintrin.h>
|
19
|
+
#include <smmintrin.h>
|
20
|
+
#include <immintrin.h>
|
21
|
+
|
22
|
+
#include "blake2.h"
|
23
|
+
#include "blake2-impl.h"
|
24
|
+
#include "blake2b-compress-avx2.h"
|
25
|
+
|
26
|
+
CRYPTO_ALIGN(64) static const uint64_t blake2b_IV[8] =
|
27
|
+
{
|
28
|
+
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
|
29
|
+
0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
|
30
|
+
0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
|
31
|
+
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
|
32
|
+
};
|
33
|
+
|
34
|
+
int blake2b_compress_avx2( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] )
|
35
|
+
{
|
36
|
+
__m256i a = LOADU(&S->h[0]);
|
37
|
+
__m256i b = LOADU(&S->h[4]);
|
38
|
+
BLAKE2B_COMPRESS_V1(a, b, block, S->t[0], S->t[1], S->f[0], S->f[1]);
|
39
|
+
STOREU(&S->h[0], a);
|
40
|
+
STOREU(&S->h[4], b);
|
41
|
+
|
42
|
+
return 0;
|
43
|
+
}
|
44
|
+
|
45
|
+
#endif
|
@@ -0,0 +1,123 @@
|
|
1
|
+
|
2
|
+
#ifndef blake2b_compress_avx2_H
|
3
|
+
#define blake2b_compress_avx2_H
|
4
|
+
|
5
|
+
#define LOAD128(p) _mm_load_si128((__m128i *)(p))
|
6
|
+
#define STORE128(p, r) _mm_store_si128((__m128i *)(p), r)
|
7
|
+
|
8
|
+
#define LOADU128(p) _mm_loadu_si128((__m128i *)(p))
|
9
|
+
#define STOREU128(p, r) _mm_storeu_si128((__m128i *)(p), r)
|
10
|
+
|
11
|
+
#define LOAD(p) _mm256_load_si256((__m256i *)(p))
|
12
|
+
#define STORE(p, r) _mm256_store_si256((__m256i *)(p), r)
|
13
|
+
|
14
|
+
#define LOADU(p) _mm256_loadu_si256((__m256i *)(p))
|
15
|
+
#define STOREU(p, r) _mm256_storeu_si256((__m256i *)(p), r)
|
16
|
+
|
17
|
+
static inline uint64_t LOADU64(const void *p) {
|
18
|
+
uint64_t v;
|
19
|
+
memcpy(&v, p, sizeof v);
|
20
|
+
return v;
|
21
|
+
}
|
22
|
+
|
23
|
+
#define ROTATE16 _mm256_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9, \
|
24
|
+
2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9)
|
25
|
+
|
26
|
+
#define ROTATE24 _mm256_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10, \
|
27
|
+
3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10)
|
28
|
+
|
29
|
+
#define ADD(a, b) _mm256_add_epi64(a, b)
|
30
|
+
#define SUB(a, b) _mm256_sub_epi64(a, b)
|
31
|
+
|
32
|
+
#define XOR(a, b) _mm256_xor_si256(a, b)
|
33
|
+
#define AND(a, b) _mm256_and_si256(a, b)
|
34
|
+
#define OR(a, b) _mm256_or_si256(a, b)
|
35
|
+
|
36
|
+
#define ROT32(x) _mm256_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1))
|
37
|
+
#define ROT24(x) _mm256_shuffle_epi8((x), ROTATE24)
|
38
|
+
#define ROT16(x) _mm256_shuffle_epi8((x), ROTATE16)
|
39
|
+
#define ROT63(x) _mm256_or_si256(_mm256_srli_epi64((x), 63), ADD((x), (x)))
|
40
|
+
|
41
|
+
#define BLAKE2B_G1_V1(a, b, c, d, m) do { \
|
42
|
+
a = ADD(a, m); \
|
43
|
+
a = ADD(a, b); d = XOR(d, a); d = ROT32(d); \
|
44
|
+
c = ADD(c, d); b = XOR(b, c); b = ROT24(b); \
|
45
|
+
} while(0)
|
46
|
+
|
47
|
+
#define BLAKE2B_G2_V1(a, b, c, d, m) do { \
|
48
|
+
a = ADD(a, m); \
|
49
|
+
a = ADD(a, b); d = XOR(d, a); d = ROT16(d); \
|
50
|
+
c = ADD(c, d); b = XOR(b, c); b = ROT63(b); \
|
51
|
+
} while(0)
|
52
|
+
|
53
|
+
#define BLAKE2B_DIAG_V1(a, b, c, d) do { \
|
54
|
+
d = _mm256_permute4x64_epi64(d, _MM_SHUFFLE(2,1,0,3)); \
|
55
|
+
c = _mm256_permute4x64_epi64(c, _MM_SHUFFLE(1,0,3,2)); \
|
56
|
+
b = _mm256_permute4x64_epi64(b, _MM_SHUFFLE(0,3,2,1)); \
|
57
|
+
} while(0)
|
58
|
+
|
59
|
+
#define BLAKE2B_UNDIAG_V1(a, b, c, d) do { \
|
60
|
+
d = _mm256_permute4x64_epi64(d, _MM_SHUFFLE(0,3,2,1)); \
|
61
|
+
c = _mm256_permute4x64_epi64(c, _MM_SHUFFLE(1,0,3,2)); \
|
62
|
+
b = _mm256_permute4x64_epi64(b, _MM_SHUFFLE(2,1,0,3)); \
|
63
|
+
} while(0)
|
64
|
+
|
65
|
+
#include "blake2b-load-avx2.h"
|
66
|
+
|
67
|
+
#define BLAKE2B_ROUND_V1(a, b, c, d, r, m) do { \
|
68
|
+
__m256i b0; \
|
69
|
+
BLAKE2B_LOAD_MSG_ ##r ##_1(b0); \
|
70
|
+
BLAKE2B_G1_V1(a, b, c, d, b0); \
|
71
|
+
BLAKE2B_LOAD_MSG_ ##r ##_2(b0); \
|
72
|
+
BLAKE2B_G2_V1(a, b, c, d, b0); \
|
73
|
+
BLAKE2B_DIAG_V1(a, b, c, d); \
|
74
|
+
BLAKE2B_LOAD_MSG_ ##r ##_3(b0); \
|
75
|
+
BLAKE2B_G1_V1(a, b, c, d, b0); \
|
76
|
+
BLAKE2B_LOAD_MSG_ ##r ##_4(b0); \
|
77
|
+
BLAKE2B_G2_V1(a, b, c, d, b0); \
|
78
|
+
BLAKE2B_UNDIAG_V1(a, b, c, d); \
|
79
|
+
} while(0)
|
80
|
+
|
81
|
+
#define BLAKE2B_ROUNDS_V1(a, b, c, d, m) do { \
|
82
|
+
BLAKE2B_ROUND_V1(a, b, c, d, 0, (m)); \
|
83
|
+
BLAKE2B_ROUND_V1(a, b, c, d, 1, (m)); \
|
84
|
+
BLAKE2B_ROUND_V1(a, b, c, d, 2, (m)); \
|
85
|
+
BLAKE2B_ROUND_V1(a, b, c, d, 3, (m)); \
|
86
|
+
BLAKE2B_ROUND_V1(a, b, c, d, 4, (m)); \
|
87
|
+
BLAKE2B_ROUND_V1(a, b, c, d, 5, (m)); \
|
88
|
+
BLAKE2B_ROUND_V1(a, b, c, d, 6, (m)); \
|
89
|
+
BLAKE2B_ROUND_V1(a, b, c, d, 7, (m)); \
|
90
|
+
BLAKE2B_ROUND_V1(a, b, c, d, 8, (m)); \
|
91
|
+
BLAKE2B_ROUND_V1(a, b, c, d, 9, (m)); \
|
92
|
+
BLAKE2B_ROUND_V1(a, b, c, d, 10, (m)); \
|
93
|
+
BLAKE2B_ROUND_V1(a, b, c, d, 11, (m)); \
|
94
|
+
} while(0)
|
95
|
+
|
96
|
+
#define DECLARE_MESSAGE_WORDS(m) \
|
97
|
+
const __m256i m0 = _mm256_broadcastsi128_si256(LOADU128((m) + 0)); \
|
98
|
+
const __m256i m1 = _mm256_broadcastsi128_si256(LOADU128((m) + 16)); \
|
99
|
+
const __m256i m2 = _mm256_broadcastsi128_si256(LOADU128((m) + 32)); \
|
100
|
+
const __m256i m3 = _mm256_broadcastsi128_si256(LOADU128((m) + 48)); \
|
101
|
+
const __m256i m4 = _mm256_broadcastsi128_si256(LOADU128((m) + 64)); \
|
102
|
+
const __m256i m5 = _mm256_broadcastsi128_si256(LOADU128((m) + 80)); \
|
103
|
+
const __m256i m6 = _mm256_broadcastsi128_si256(LOADU128((m) + 96)); \
|
104
|
+
const __m256i m7 = _mm256_broadcastsi128_si256(LOADU128((m) + 112)); \
|
105
|
+
__m256i t0, t1;
|
106
|
+
|
107
|
+
#define BLAKE2B_COMPRESS_V1(a, b, m, t0, t1, f0, f1) do { \
|
108
|
+
DECLARE_MESSAGE_WORDS(m) \
|
109
|
+
const __m256i iv0 = a; \
|
110
|
+
const __m256i iv1 = b; \
|
111
|
+
__m256i c = LOAD(&blake2b_IV[0]); \
|
112
|
+
__m256i d = XOR( \
|
113
|
+
LOAD(&blake2b_IV[4]), \
|
114
|
+
_mm256_set_epi64x(f1, f0, t1, t0) \
|
115
|
+
); \
|
116
|
+
BLAKE2B_ROUNDS_V1(a, b, c, d, m); \
|
117
|
+
a = XOR(a, c); \
|
118
|
+
b = XOR(b, d); \
|
119
|
+
a = XOR(a, iv0); \
|
120
|
+
b = XOR(b, iv1); \
|
121
|
+
} while(0)
|
122
|
+
|
123
|
+
#endif
|
@@ -4,8 +4,9 @@
|
|
4
4
|
|
5
5
|
#include "blake2.h"
|
6
6
|
#include "blake2-impl.h"
|
7
|
+
#include "../../sodium/common.h"
|
7
8
|
|
8
|
-
static const uint64_t blake2b_IV[8] =
|
9
|
+
CRYPTO_ALIGN(64) static const uint64_t blake2b_IV[8] =
|
9
10
|
{
|
10
11
|
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
|
11
12
|
0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
|
@@ -36,7 +37,7 @@ int blake2b_compress_ref( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYT
|
|
36
37
|
int i;
|
37
38
|
|
38
39
|
for( i = 0; i < 16; ++i )
|
39
|
-
m[i] =
|
40
|
+
m[i] = LOAD64_LE( block + i * sizeof( m[i] ) );
|
40
41
|
|
41
42
|
for( i = 0; i < 8; ++i )
|
42
43
|
v[i] = S->h[i];
|
@@ -18,9 +18,9 @@
|
|
18
18
|
|
19
19
|
#include "blake2.h"
|
20
20
|
#include "blake2-impl.h"
|
21
|
-
#include "blake2b-
|
21
|
+
#include "blake2b-compress-sse41.h"
|
22
22
|
|
23
|
-
static const uint64_t blake2b_IV[8] =
|
23
|
+
CRYPTO_ALIGN(64) static const uint64_t blake2b_IV[8] =
|
24
24
|
{
|
25
25
|
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
|
26
26
|
0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
|
@@ -1,31 +1,10 @@
|
|
1
|
-
/*
|
2
|
-
BLAKE2 reference source code package - optimized C implementations
|
3
1
|
|
4
|
-
|
5
|
-
|
6
|
-
To the extent possible under law, the author(s) have dedicated all copyright
|
7
|
-
and related and neighboring rights to this software to the public domain
|
8
|
-
worldwide. This software is distributed without any warranty.
|
9
|
-
|
10
|
-
You should have received a copy of the CC0 Public Domain Dedication along with
|
11
|
-
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
|
12
|
-
*/
|
13
|
-
|
14
|
-
#ifndef blake2b_round_H
|
15
|
-
#define blake2b_round_H
|
16
|
-
|
17
|
-
#ifndef BLAKE2_USE_SSSE3
|
18
|
-
# error BLAKE2_USE_SSSE3 must be defined in order to use this file
|
19
|
-
#endif
|
2
|
+
#ifndef blake2b_compress_sse41_H
|
3
|
+
#define blake2b_compress_sse41_H
|
20
4
|
|
21
5
|
#define LOADU(p) _mm_loadu_si128( (const __m128i *)(const void *)(p) )
|
22
6
|
#define STOREU(p,r) _mm_storeu_si128((__m128i *)(void *)(p), r)
|
23
7
|
|
24
|
-
#define TOF(reg) _mm_castsi128_ps((reg))
|
25
|
-
#define TOI(reg) _mm_castps_si128((reg))
|
26
|
-
|
27
|
-
|
28
|
-
/* Microarchitecture-specific macros */
|
29
8
|
#define _mm_roti_epi64(x, c) \
|
30
9
|
(-(c) == 32) ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2,3,0,1)) \
|
31
10
|
: (-(c) == 24) ? _mm_shuffle_epi8((x), r24) \
|
@@ -33,7 +12,6 @@
|
|
33
12
|
: (-(c) == 63) ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_add_epi64((x), (x))) \
|
34
13
|
: _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_slli_epi64((x), 64-(-(c))))
|
35
14
|
|
36
|
-
|
37
15
|
#define G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \
|
38
16
|
row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
|
39
17
|
row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
|
@@ -102,11 +80,7 @@
|
|
102
80
|
row4l = t1; \
|
103
81
|
row4h = t0;
|
104
82
|
|
105
|
-
#if defined(BLAKE2_USE_SSE41)
|
106
83
|
#include "blake2b-load-sse41.h"
|
107
|
-
#else
|
108
|
-
#include "blake2b-load-sse2.h"
|
109
|
-
#endif
|
110
84
|
|
111
85
|
#define ROUND(r) \
|
112
86
|
LOAD_MSG_ ##r ##_1(b0, b1); \
|
@@ -1,6 +1,4 @@
|
|
1
1
|
|
2
|
-
#define BLAKE2_USE_SSSE3
|
3
|
-
|
4
2
|
#include <stdint.h>
|
5
3
|
#include <string.h>
|
6
4
|
|
@@ -18,9 +16,9 @@
|
|
18
16
|
|
19
17
|
#include "blake2.h"
|
20
18
|
#include "blake2-impl.h"
|
21
|
-
#include "blake2b-
|
19
|
+
#include "blake2b-compress-ssse3.h"
|
22
20
|
|
23
|
-
static const uint64_t blake2b_IV[8] =
|
21
|
+
CRYPTO_ALIGN(64) static const uint64_t blake2b_IV[8] =
|
24
22
|
{
|
25
23
|
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
|
26
24
|
0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
|
@@ -0,0 +1,97 @@
|
|
1
|
+
|
2
|
+
#ifndef blake2b_compress_ssse3_H
|
3
|
+
#define blake2b_compress_ssse3_H
|
4
|
+
|
5
|
+
#define LOADU(p) _mm_loadu_si128( (const __m128i *)(const void *)(p) )
|
6
|
+
#define STOREU(p,r) _mm_storeu_si128((__m128i *)(void *)(p), r)
|
7
|
+
|
8
|
+
#define _mm_roti_epi64(x, c) \
|
9
|
+
(-(c) == 32) ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2,3,0,1)) \
|
10
|
+
: (-(c) == 24) ? _mm_shuffle_epi8((x), r24) \
|
11
|
+
: (-(c) == 16) ? _mm_shuffle_epi8((x), r16) \
|
12
|
+
: (-(c) == 63) ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_add_epi64((x), (x))) \
|
13
|
+
: _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_slli_epi64((x), 64-(-(c))))
|
14
|
+
|
15
|
+
#define G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \
|
16
|
+
row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
|
17
|
+
row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
|
18
|
+
\
|
19
|
+
row4l = _mm_xor_si128(row4l, row1l); \
|
20
|
+
row4h = _mm_xor_si128(row4h, row1h); \
|
21
|
+
\
|
22
|
+
row4l = _mm_roti_epi64(row4l, -32); \
|
23
|
+
row4h = _mm_roti_epi64(row4h, -32); \
|
24
|
+
\
|
25
|
+
row3l = _mm_add_epi64(row3l, row4l); \
|
26
|
+
row3h = _mm_add_epi64(row3h, row4h); \
|
27
|
+
\
|
28
|
+
row2l = _mm_xor_si128(row2l, row3l); \
|
29
|
+
row2h = _mm_xor_si128(row2h, row3h); \
|
30
|
+
\
|
31
|
+
row2l = _mm_roti_epi64(row2l, -24); \
|
32
|
+
row2h = _mm_roti_epi64(row2h, -24); \
|
33
|
+
|
34
|
+
#define G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \
|
35
|
+
row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
|
36
|
+
row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
|
37
|
+
\
|
38
|
+
row4l = _mm_xor_si128(row4l, row1l); \
|
39
|
+
row4h = _mm_xor_si128(row4h, row1h); \
|
40
|
+
\
|
41
|
+
row4l = _mm_roti_epi64(row4l, -16); \
|
42
|
+
row4h = _mm_roti_epi64(row4h, -16); \
|
43
|
+
\
|
44
|
+
row3l = _mm_add_epi64(row3l, row4l); \
|
45
|
+
row3h = _mm_add_epi64(row3h, row4h); \
|
46
|
+
\
|
47
|
+
row2l = _mm_xor_si128(row2l, row3l); \
|
48
|
+
row2h = _mm_xor_si128(row2h, row3h); \
|
49
|
+
\
|
50
|
+
row2l = _mm_roti_epi64(row2l, -63); \
|
51
|
+
row2h = _mm_roti_epi64(row2h, -63); \
|
52
|
+
|
53
|
+
#define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
|
54
|
+
t0 = _mm_alignr_epi8(row2h, row2l, 8); \
|
55
|
+
t1 = _mm_alignr_epi8(row2l, row2h, 8); \
|
56
|
+
row2l = t0; \
|
57
|
+
row2h = t1; \
|
58
|
+
\
|
59
|
+
t0 = row3l; \
|
60
|
+
row3l = row3h; \
|
61
|
+
row3h = t0; \
|
62
|
+
\
|
63
|
+
t0 = _mm_alignr_epi8(row4h, row4l, 8); \
|
64
|
+
t1 = _mm_alignr_epi8(row4l, row4h, 8); \
|
65
|
+
row4l = t1; \
|
66
|
+
row4h = t0;
|
67
|
+
|
68
|
+
#define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
|
69
|
+
t0 = _mm_alignr_epi8(row2l, row2h, 8); \
|
70
|
+
t1 = _mm_alignr_epi8(row2h, row2l, 8); \
|
71
|
+
row2l = t0; \
|
72
|
+
row2h = t1; \
|
73
|
+
\
|
74
|
+
t0 = row3l; \
|
75
|
+
row3l = row3h; \
|
76
|
+
row3h = t0; \
|
77
|
+
\
|
78
|
+
t0 = _mm_alignr_epi8(row4l, row4h, 8); \
|
79
|
+
t1 = _mm_alignr_epi8(row4h, row4l, 8); \
|
80
|
+
row4l = t1; \
|
81
|
+
row4h = t0;
|
82
|
+
|
83
|
+
#include "blake2b-load-sse2.h"
|
84
|
+
|
85
|
+
#define ROUND(r) \
|
86
|
+
LOAD_MSG_ ##r ##_1(b0, b1); \
|
87
|
+
G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
|
88
|
+
LOAD_MSG_ ##r ##_2(b0, b1); \
|
89
|
+
G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
|
90
|
+
DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \
|
91
|
+
LOAD_MSG_ ##r ##_3(b0, b1); \
|
92
|
+
G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
|
93
|
+
LOAD_MSG_ ##r ##_4(b0, b1); \
|
94
|
+
G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
|
95
|
+
UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h);
|
96
|
+
|
97
|
+
#endif
|
@@ -0,0 +1,339 @@
|
|
1
|
+
#ifndef blake2b_load_avx2_H
|
2
|
+
#define blake2b_load_avx2_H
|
3
|
+
|
4
|
+
#define BLAKE2B_LOAD_MSG_0_1(b0) do { \
|
5
|
+
t0 = _mm256_unpacklo_epi64(m0, m1); \
|
6
|
+
t1 = _mm256_unpacklo_epi64(m2, m3); \
|
7
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
8
|
+
} while(0)
|
9
|
+
|
10
|
+
#define BLAKE2B_LOAD_MSG_0_2(b0) \
|
11
|
+
do { \
|
12
|
+
t0 = _mm256_unpackhi_epi64(m0, m1); \
|
13
|
+
t1 = _mm256_unpackhi_epi64(m2, m3); \
|
14
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
15
|
+
} while(0)
|
16
|
+
|
17
|
+
#define BLAKE2B_LOAD_MSG_0_3(b0) \
|
18
|
+
do { \
|
19
|
+
t0 = _mm256_unpacklo_epi64(m4, m5); \
|
20
|
+
t1 = _mm256_unpacklo_epi64(m6, m7); \
|
21
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
22
|
+
} while(0)
|
23
|
+
|
24
|
+
#define BLAKE2B_LOAD_MSG_0_4(b0) \
|
25
|
+
do { \
|
26
|
+
t0 = _mm256_unpackhi_epi64(m4, m5); \
|
27
|
+
t1 = _mm256_unpackhi_epi64(m6, m7); \
|
28
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
29
|
+
} while(0)
|
30
|
+
|
31
|
+
#define BLAKE2B_LOAD_MSG_1_1(b0) \
|
32
|
+
do { \
|
33
|
+
t0 = _mm256_unpacklo_epi64(m7, m2); \
|
34
|
+
t1 = _mm256_unpackhi_epi64(m4, m6); \
|
35
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
36
|
+
} while(0)
|
37
|
+
|
38
|
+
#define BLAKE2B_LOAD_MSG_1_2(b0) \
|
39
|
+
do { \
|
40
|
+
t0 = _mm256_unpacklo_epi64(m5, m4); \
|
41
|
+
t1 = _mm256_alignr_epi8(m3, m7, 8); \
|
42
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
43
|
+
} while(0)
|
44
|
+
|
45
|
+
#define BLAKE2B_LOAD_MSG_1_3(b0) \
|
46
|
+
do { \
|
47
|
+
t0 = _mm256_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \
|
48
|
+
t1 = _mm256_unpackhi_epi64(m5, m2); \
|
49
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
50
|
+
} while(0)
|
51
|
+
|
52
|
+
#define BLAKE2B_LOAD_MSG_1_4(b0) \
|
53
|
+
do { \
|
54
|
+
t0 = _mm256_unpacklo_epi64(m6, m1); \
|
55
|
+
t1 = _mm256_unpackhi_epi64(m3, m1); \
|
56
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
57
|
+
} while(0)
|
58
|
+
|
59
|
+
#define BLAKE2B_LOAD_MSG_2_1(b0) \
|
60
|
+
do { \
|
61
|
+
t0 = _mm256_alignr_epi8(m6, m5, 8); \
|
62
|
+
t1 = _mm256_unpackhi_epi64(m2, m7); \
|
63
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
64
|
+
} while(0)
|
65
|
+
|
66
|
+
#define BLAKE2B_LOAD_MSG_2_2(b0) \
|
67
|
+
do { \
|
68
|
+
t0 = _mm256_unpacklo_epi64(m4, m0); \
|
69
|
+
t1 = _mm256_blend_epi32(m6, m1, 0x33); \
|
70
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
71
|
+
} while(0)
|
72
|
+
|
73
|
+
#define BLAKE2B_LOAD_MSG_2_3(b0) \
|
74
|
+
do { \
|
75
|
+
t0 = _mm256_blend_epi32(m1, m5, 0x33); \
|
76
|
+
t1 = _mm256_unpackhi_epi64(m3, m4); \
|
77
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
78
|
+
} while(0)
|
79
|
+
|
80
|
+
#define BLAKE2B_LOAD_MSG_2_4(b0) \
|
81
|
+
do { \
|
82
|
+
t0 = _mm256_unpacklo_epi64(m7, m3); \
|
83
|
+
t1 = _mm256_alignr_epi8(m2, m0, 8); \
|
84
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
85
|
+
} while(0)
|
86
|
+
|
87
|
+
#define BLAKE2B_LOAD_MSG_3_1(b0) \
|
88
|
+
do { \
|
89
|
+
t0 = _mm256_unpackhi_epi64(m3, m1); \
|
90
|
+
t1 = _mm256_unpackhi_epi64(m6, m5); \
|
91
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
92
|
+
} while(0)
|
93
|
+
|
94
|
+
#define BLAKE2B_LOAD_MSG_3_2(b0) \
|
95
|
+
do { \
|
96
|
+
t0 = _mm256_unpackhi_epi64(m4, m0); \
|
97
|
+
t1 = _mm256_unpacklo_epi64(m6, m7); \
|
98
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
99
|
+
} while(0)
|
100
|
+
|
101
|
+
#define BLAKE2B_LOAD_MSG_3_3(b0) \
|
102
|
+
do { \
|
103
|
+
t0 = _mm256_blend_epi32(m2, m1, 0x33); \
|
104
|
+
t1 = _mm256_blend_epi32(m7, m2, 0x33); \
|
105
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
106
|
+
} while(0)
|
107
|
+
|
108
|
+
#define BLAKE2B_LOAD_MSG_3_4(b0) \
|
109
|
+
do { \
|
110
|
+
t0 = _mm256_unpacklo_epi64(m3, m5); \
|
111
|
+
t1 = _mm256_unpacklo_epi64(m0, m4); \
|
112
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
113
|
+
} while(0)
|
114
|
+
|
115
|
+
#define BLAKE2B_LOAD_MSG_4_1(b0) \
|
116
|
+
do { \
|
117
|
+
t0 = _mm256_unpackhi_epi64(m4, m2); \
|
118
|
+
t1 = _mm256_unpacklo_epi64(m1, m5); \
|
119
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
120
|
+
} while(0)
|
121
|
+
|
122
|
+
#define BLAKE2B_LOAD_MSG_4_2(b0) \
|
123
|
+
do { \
|
124
|
+
t0 = _mm256_blend_epi32(m3, m0, 0x33); \
|
125
|
+
t1 = _mm256_blend_epi32(m7, m2, 0x33); \
|
126
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
127
|
+
} while(0)
|
128
|
+
|
129
|
+
#define BLAKE2B_LOAD_MSG_4_3(b0) \
|
130
|
+
do { \
|
131
|
+
t0 = _mm256_blend_epi32(m5, m7, 0x33); \
|
132
|
+
t1 = _mm256_blend_epi32(m1, m3, 0x33); \
|
133
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
134
|
+
} while(0)
|
135
|
+
|
136
|
+
#define BLAKE2B_LOAD_MSG_4_4(b0) \
|
137
|
+
do { \
|
138
|
+
t0 = _mm256_alignr_epi8(m6, m0, 8); \
|
139
|
+
t1 = _mm256_blend_epi32(m6, m4, 0x33); \
|
140
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
141
|
+
} while(0)
|
142
|
+
|
143
|
+
#define BLAKE2B_LOAD_MSG_5_1(b0) \
|
144
|
+
do { \
|
145
|
+
t0 = _mm256_unpacklo_epi64(m1, m3); \
|
146
|
+
t1 = _mm256_unpacklo_epi64(m0, m4); \
|
147
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
148
|
+
} while(0)
|
149
|
+
|
150
|
+
#define BLAKE2B_LOAD_MSG_5_2(b0) \
|
151
|
+
do { \
|
152
|
+
t0 = _mm256_unpacklo_epi64(m6, m5); \
|
153
|
+
t1 = _mm256_unpackhi_epi64(m5, m1); \
|
154
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
155
|
+
} while(0)
|
156
|
+
|
157
|
+
#define BLAKE2B_LOAD_MSG_5_3(b0) \
|
158
|
+
do { \
|
159
|
+
t0 = _mm256_blend_epi32(m3, m2, 0x33); \
|
160
|
+
t1 = _mm256_unpackhi_epi64(m7, m0); \
|
161
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
162
|
+
} while(0)
|
163
|
+
|
164
|
+
#define BLAKE2B_LOAD_MSG_5_4(b0) \
|
165
|
+
do { \
|
166
|
+
t0 = _mm256_unpackhi_epi64(m6, m2); \
|
167
|
+
t1 = _mm256_blend_epi32(m4, m7, 0x33); \
|
168
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
169
|
+
} while(0)
|
170
|
+
|
171
|
+
#define BLAKE2B_LOAD_MSG_6_1(b0) \
|
172
|
+
do { \
|
173
|
+
t0 = _mm256_blend_epi32(m0, m6, 0x33); \
|
174
|
+
t1 = _mm256_unpacklo_epi64(m7, m2); \
|
175
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
176
|
+
} while(0)
|
177
|
+
|
178
|
+
#define BLAKE2B_LOAD_MSG_6_2(b0) \
|
179
|
+
do { \
|
180
|
+
t0 = _mm256_unpackhi_epi64(m2, m7); \
|
181
|
+
t1 = _mm256_alignr_epi8(m5, m6, 8); \
|
182
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
183
|
+
} while(0)
|
184
|
+
|
185
|
+
#define BLAKE2B_LOAD_MSG_6_3(b0) \
|
186
|
+
do { \
|
187
|
+
t0 = _mm256_unpacklo_epi64(m0, m3); \
|
188
|
+
t1 = _mm256_shuffle_epi32(m4, _MM_SHUFFLE(1,0,3,2)); \
|
189
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
190
|
+
} while(0)
|
191
|
+
|
192
|
+
#define BLAKE2B_LOAD_MSG_6_4(b0) \
|
193
|
+
do { \
|
194
|
+
t0 = _mm256_unpackhi_epi64(m3, m1); \
|
195
|
+
t1 = _mm256_blend_epi32(m5, m1, 0x33); \
|
196
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
197
|
+
} while(0)
|
198
|
+
|
199
|
+
#define BLAKE2B_LOAD_MSG_7_1(b0) \
|
200
|
+
do { \
|
201
|
+
t0 = _mm256_unpackhi_epi64(m6, m3); \
|
202
|
+
t1 = _mm256_blend_epi32(m1, m6, 0x33); \
|
203
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
204
|
+
} while(0)
|
205
|
+
|
206
|
+
#define BLAKE2B_LOAD_MSG_7_2(b0) \
|
207
|
+
do { \
|
208
|
+
t0 = _mm256_alignr_epi8(m7, m5, 8); \
|
209
|
+
t1 = _mm256_unpackhi_epi64(m0, m4); \
|
210
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
211
|
+
} while(0)
|
212
|
+
|
213
|
+
#define BLAKE2B_LOAD_MSG_7_3(b0) \
|
214
|
+
do { \
|
215
|
+
t0 = _mm256_unpackhi_epi64(m2, m7); \
|
216
|
+
t1 = _mm256_unpacklo_epi64(m4, m1); \
|
217
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
218
|
+
} while(0)
|
219
|
+
|
220
|
+
#define BLAKE2B_LOAD_MSG_7_4(b0) \
|
221
|
+
do { \
|
222
|
+
t0 = _mm256_unpacklo_epi64(m0, m2); \
|
223
|
+
t1 = _mm256_unpacklo_epi64(m3, m5); \
|
224
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
225
|
+
} while(0)
|
226
|
+
|
227
|
+
#define BLAKE2B_LOAD_MSG_8_1(b0) \
|
228
|
+
do { \
|
229
|
+
t0 = _mm256_unpacklo_epi64(m3, m7); \
|
230
|
+
t1 = _mm256_alignr_epi8(m0, m5, 8); \
|
231
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
232
|
+
} while(0)
|
233
|
+
|
234
|
+
#define BLAKE2B_LOAD_MSG_8_2(b0) \
|
235
|
+
do { \
|
236
|
+
t0 = _mm256_unpackhi_epi64(m7, m4); \
|
237
|
+
t1 = _mm256_alignr_epi8(m4, m1, 8); \
|
238
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
239
|
+
} while(0)
|
240
|
+
|
241
|
+
#define BLAKE2B_LOAD_MSG_8_3(b0) \
|
242
|
+
do { \
|
243
|
+
t0 = m6; \
|
244
|
+
t1 = _mm256_alignr_epi8(m5, m0, 8); \
|
245
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
246
|
+
} while(0)
|
247
|
+
|
248
|
+
#define BLAKE2B_LOAD_MSG_8_4(b0) \
|
249
|
+
do { \
|
250
|
+
t0 = _mm256_blend_epi32(m3, m1, 0x33); \
|
251
|
+
t1 = m2; \
|
252
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
253
|
+
} while(0)
|
254
|
+
|
255
|
+
#define BLAKE2B_LOAD_MSG_9_1(b0) \
|
256
|
+
do { \
|
257
|
+
t0 = _mm256_unpacklo_epi64(m5, m4); \
|
258
|
+
t1 = _mm256_unpackhi_epi64(m3, m0); \
|
259
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
260
|
+
} while(0)
|
261
|
+
|
262
|
+
#define BLAKE2B_LOAD_MSG_9_2(b0) \
|
263
|
+
do { \
|
264
|
+
t0 = _mm256_unpacklo_epi64(m1, m2); \
|
265
|
+
t1 = _mm256_blend_epi32(m2, m3, 0x33); \
|
266
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
267
|
+
} while(0)
|
268
|
+
|
269
|
+
#define BLAKE2B_LOAD_MSG_9_3(b0) \
|
270
|
+
do { \
|
271
|
+
t0 = _mm256_unpackhi_epi64(m7, m4); \
|
272
|
+
t1 = _mm256_unpackhi_epi64(m1, m6); \
|
273
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
274
|
+
} while(0)
|
275
|
+
|
276
|
+
#define BLAKE2B_LOAD_MSG_9_4(b0) \
|
277
|
+
do { \
|
278
|
+
t0 = _mm256_alignr_epi8(m7, m5, 8); \
|
279
|
+
t1 = _mm256_unpacklo_epi64(m6, m0); \
|
280
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
281
|
+
} while(0)
|
282
|
+
|
283
|
+
#define BLAKE2B_LOAD_MSG_10_1(b0) \
|
284
|
+
do { \
|
285
|
+
t0 = _mm256_unpacklo_epi64(m0, m1); \
|
286
|
+
t1 = _mm256_unpacklo_epi64(m2, m3); \
|
287
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
288
|
+
} while(0)
|
289
|
+
|
290
|
+
#define BLAKE2B_LOAD_MSG_10_2(b0) \
|
291
|
+
do { \
|
292
|
+
t0 = _mm256_unpackhi_epi64(m0, m1); \
|
293
|
+
t1 = _mm256_unpackhi_epi64(m2, m3); \
|
294
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
295
|
+
} while(0)
|
296
|
+
|
297
|
+
#define BLAKE2B_LOAD_MSG_10_3(b0) \
|
298
|
+
do { \
|
299
|
+
t0 = _mm256_unpacklo_epi64(m4, m5); \
|
300
|
+
t1 = _mm256_unpacklo_epi64(m6, m7); \
|
301
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
302
|
+
} while(0)
|
303
|
+
|
304
|
+
#define BLAKE2B_LOAD_MSG_10_4(b0) \
|
305
|
+
do { \
|
306
|
+
t0 = _mm256_unpackhi_epi64(m4, m5); \
|
307
|
+
t1 = _mm256_unpackhi_epi64(m6, m7); \
|
308
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
309
|
+
} while(0)
|
310
|
+
|
311
|
+
#define BLAKE2B_LOAD_MSG_11_1(b0) \
|
312
|
+
do { \
|
313
|
+
t0 = _mm256_unpacklo_epi64(m7, m2); \
|
314
|
+
t1 = _mm256_unpackhi_epi64(m4, m6); \
|
315
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
316
|
+
} while(0)
|
317
|
+
|
318
|
+
#define BLAKE2B_LOAD_MSG_11_2(b0) \
|
319
|
+
do { \
|
320
|
+
t0 = _mm256_unpacklo_epi64(m5, m4); \
|
321
|
+
t1 = _mm256_alignr_epi8(m3, m7, 8); \
|
322
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
323
|
+
} while(0)
|
324
|
+
|
325
|
+
#define BLAKE2B_LOAD_MSG_11_3(b0) \
|
326
|
+
do { \
|
327
|
+
t0 = _mm256_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \
|
328
|
+
t1 = _mm256_unpackhi_epi64(m5, m2); \
|
329
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
330
|
+
} while(0)
|
331
|
+
|
332
|
+
#define BLAKE2B_LOAD_MSG_11_4(b0) \
|
333
|
+
do { \
|
334
|
+
t0 = _mm256_unpacklo_epi64(m6, m1); \
|
335
|
+
t1 = _mm256_unpackhi_epi64(m3, m1); \
|
336
|
+
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
|
337
|
+
} while(0)
|
338
|
+
|
339
|
+
#endif
|