rbnacl-libsodium 1.0.11 → 1.0.13
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGES.md +4 -0
- data/lib/rbnacl/libsodium/version.rb +1 -1
- data/vendor/libsodium/AUTHORS +45 -33
- data/vendor/libsodium/ChangeLog +63 -2
- data/vendor/libsodium/LICENSE +1 -1
- data/vendor/libsodium/Makefile.am +1 -0
- data/vendor/libsodium/Makefile.in +17 -14
- data/vendor/libsodium/README.markdown +1 -0
- data/vendor/libsodium/THANKS +38 -4
- data/vendor/libsodium/aclocal.m4 +25 -24
- data/vendor/libsodium/autogen.sh +12 -7
- data/vendor/libsodium/autom4te.cache/output.0 +5066 -2281
- data/vendor/libsodium/autom4te.cache/output.1 +1013 -600
- data/vendor/libsodium/autom4te.cache/output.2 +5066 -2281
- data/vendor/libsodium/autom4te.cache/requests +426 -1584
- data/vendor/libsodium/autom4te.cache/traces.0 +2044 -876
- data/vendor/libsodium/autom4te.cache/traces.1 +996 -523
- data/vendor/libsodium/autom4te.cache/traces.2 +1996 -828
- data/vendor/libsodium/build-aux/compile +5 -4
- data/vendor/libsodium/build-aux/config.guess +120 -68
- data/vendor/libsodium/build-aux/config.sub +51 -22
- data/vendor/libsodium/build-aux/depcomp +3 -3
- data/vendor/libsodium/build-aux/install-sh +2 -2
- data/vendor/libsodium/build-aux/missing +3 -3
- data/vendor/libsodium/build-aux/test-driver +3 -3
- data/vendor/libsodium/builds/msvc/properties/Win32.props +4 -1
- data/vendor/libsodium/builds/msvc/properties/x64.props +4 -1
- data/vendor/libsodium/builds/msvc/resource.h +14 -0
- data/vendor/libsodium/builds/msvc/resource.rc +63 -0
- data/vendor/libsodium/builds/msvc/version.h +7 -4
- data/vendor/libsodium/builds/msvc/vs2010/libsodium.import.props +1 -1
- data/vendor/libsodium/builds/msvc/vs2010/libsodium/libsodium.props +10 -5
- data/vendor/libsodium/builds/msvc/vs2010/libsodium/libsodium.vcxproj +182 -139
- data/vendor/libsodium/builds/msvc/vs2010/libsodium/libsodium.vcxproj.filters +632 -470
- data/vendor/libsodium/builds/msvc/vs2012/libsodium.import.props +1 -1
- data/vendor/libsodium/builds/msvc/vs2012/libsodium/libsodium.props +10 -5
- data/vendor/libsodium/builds/msvc/vs2012/libsodium/libsodium.vcxproj +182 -129
- data/vendor/libsodium/builds/msvc/vs2012/libsodium/libsodium.vcxproj.filters +632 -470
- data/vendor/libsodium/builds/msvc/vs2013/libsodium.import.props +1 -1
- data/vendor/libsodium/builds/msvc/vs2013/libsodium/libsodium.props +10 -5
- data/vendor/libsodium/builds/msvc/vs2013/libsodium/libsodium.vcxproj +182 -129
- data/vendor/libsodium/builds/msvc/vs2013/libsodium/libsodium.vcxproj.filters +632 -470
- data/vendor/libsodium/builds/msvc/vs2015/libsodium.import.props +1 -1
- data/vendor/libsodium/builds/msvc/vs2015/libsodium/libsodium.props +10 -5
- data/vendor/libsodium/builds/msvc/vs2015/libsodium/libsodium.vcxproj +181 -118
- data/vendor/libsodium/builds/msvc/vs2015/libsodium/libsodium.vcxproj.filters +632 -470
- data/vendor/libsodium/builds/msvc/vs2017/libsodium.import.props +52 -0
- data/vendor/libsodium/builds/msvc/vs2017/libsodium.import.xml +17 -0
- data/vendor/libsodium/builds/msvc/vs2017/libsodium.sln +52 -0
- data/vendor/libsodium/builds/msvc/vs2017/libsodium/libsodium.props +48 -0
- data/vendor/libsodium/builds/msvc/vs2017/libsodium/libsodium.vcxproj +320 -0
- data/vendor/libsodium/builds/msvc/vs2017/libsodium/libsodium.vcxproj.filters +962 -0
- data/vendor/libsodium/builds/msvc/vs2017/libsodium/libsodium.xml +15 -0
- data/vendor/libsodium/configure +1002 -589
- data/vendor/libsodium/configure.ac +48 -21
- data/vendor/libsodium/contrib/Findsodium.cmake +267 -0
- data/vendor/libsodium/contrib/Makefile.am +3 -0
- data/vendor/libsodium/contrib/Makefile.in +483 -0
- data/vendor/libsodium/dist-build/Makefile.in +11 -7
- data/vendor/libsodium/dist-build/android-armv8-a.sh +1 -1
- data/vendor/libsodium/dist-build/android-build.sh +25 -17
- data/vendor/libsodium/dist-build/android-mips32.sh +1 -1
- data/vendor/libsodium/dist-build/android-mips64.sh +1 -1
- data/vendor/libsodium/dist-build/android-x86_64.sh +1 -1
- data/vendor/libsodium/dist-build/emscripten-symbols.def +150 -2
- data/vendor/libsodium/dist-build/emscripten-wasm.sh +132 -0
- data/vendor/libsodium/dist-build/emscripten.sh +8 -6
- data/vendor/libsodium/dist-build/ios.sh +29 -5
- data/vendor/libsodium/libsodium.vcxproj +139 -77
- data/vendor/libsodium/libsodium.vcxproj.filters +315 -144
- data/vendor/libsodium/m4/ax_check_catchable_segv.m4 +42 -0
- data/vendor/libsodium/m4/ax_check_compile_flag.m4 +6 -4
- data/vendor/libsodium/m4/ax_check_define.m4 +3 -3
- data/vendor/libsodium/m4/ax_check_gnu_make.m4 +31 -25
- data/vendor/libsodium/m4/ax_check_link_flag.m4 +8 -6
- data/vendor/libsodium/m4/ax_pthread.m4 +275 -275
- data/vendor/libsodium/m4/ax_valgrind_check.m4 +92 -41
- data/vendor/libsodium/m4/pkg.m4 +1 -1
- data/vendor/libsodium/msvc-scripts/Makefile.in +11 -7
- data/vendor/libsodium/msvc-scripts/process.bat +4 -3
- data/vendor/libsodium/packaging/dotnet-core/README.md +59 -0
- data/vendor/libsodium/packaging/dotnet-core/desktop.targets +16 -0
- data/vendor/libsodium/packaging/dotnet-core/libsodium.props +33 -0
- data/vendor/libsodium/packaging/dotnet-core/prepare.py +262 -0
- data/vendor/libsodium/packaging/dotnet-core/recipes/alpine-x64 +3 -0
- data/vendor/libsodium/packaging/dotnet-core/recipes/build +9 -0
- data/vendor/libsodium/packaging/dotnet-core/recipes/centos-x64 +3 -0
- data/vendor/libsodium/packaging/dotnet-core/recipes/debian-x64 +4 -0
- data/vendor/libsodium/packaging/dotnet-core/recipes/fedora-x64 +3 -0
- data/vendor/libsodium/packaging/dotnet-core/recipes/opensuse-x64 +3 -0
- data/vendor/libsodium/packaging/dotnet-core/recipes/pack +5 -0
- data/vendor/libsodium/packaging/dotnet-core/recipes/test +27 -0
- data/vendor/libsodium/packaging/dotnet-core/recipes/ubuntu-x64 +4 -0
- data/vendor/libsodium/packaging/nuget/package.config +1 -1
- data/vendor/libsodium/packaging/nuget/package.gsl +3 -3
- data/vendor/libsodium/src/Makefile.in +11 -7
- data/vendor/libsodium/src/libsodium/Makefile.am +113 -98
- data/vendor/libsodium/src/libsodium/Makefile.in +1034 -1236
- data/vendor/libsodium/src/libsodium/crypto_aead/aes256gcm/aesni/aead_aes256gcm_aesni.c +31 -12
- data/vendor/libsodium/src/libsodium/crypto_aead/chacha20poly1305/sodium/aead_chacha20poly1305.c +31 -10
- data/vendor/libsodium/src/libsodium/crypto_aead/xchacha20poly1305/sodium/aead_xchacha20poly1305.c +153 -0
- data/vendor/libsodium/src/libsodium/crypto_auth/crypto_auth.c +7 -0
- data/vendor/libsodium/src/libsodium/crypto_auth/hmacsha256/{cp/hmac_hmacsha256.c → auth_hmacsha256.c} +43 -35
- data/vendor/libsodium/src/libsodium/crypto_auth/hmacsha512/{cp/hmac_hmacsha512.c → auth_hmacsha512.c} +43 -35
- data/vendor/libsodium/src/libsodium/crypto_auth/hmacsha512256/{cp/hmac_hmacsha512256.c → auth_hmacsha512256.c} +48 -9
- data/vendor/libsodium/src/libsodium/crypto_box/crypto_box_easy.c +4 -3
- data/vendor/libsodium/src/libsodium/crypto_box/crypto_box_seal.c +2 -1
- data/vendor/libsodium/src/libsodium/crypto_box/curve25519xchacha20poly1305/box_curve25519xchacha20poly1305.c +197 -0
- data/vendor/libsodium/src/libsodium/crypto_box/curve25519xchacha20poly1305/box_seal_curve25519xchacha20poly1305.c +79 -0
- data/vendor/libsodium/src/libsodium/crypto_box/curve25519xsalsa20poly1305/box_curve25519xsalsa20poly1305.c +150 -0
- data/vendor/libsodium/src/libsodium/crypto_core/curve25519/ref10/curve25519_ref10.c +1156 -662
- data/vendor/libsodium/src/libsodium/crypto_core/hchacha20/core_hchacha20.c +12 -5
- data/vendor/libsodium/src/libsodium/crypto_core/hsalsa20/{core_hsalsa20_api.c → core_hsalsa20.c} +0 -0
- data/vendor/libsodium/src/libsodium/crypto_core/hsalsa20/ref2/core_hsalsa20_ref2.c +95 -0
- data/vendor/libsodium/src/libsodium/crypto_core/salsa/ref/core_salsa_ref.c +195 -0
- data/vendor/libsodium/src/libsodium/crypto_generichash/{blake2/generichash_blake2_api.c → blake2b/generichash_blake2.c} +7 -0
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2b/ref/blake2.h +109 -0
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2b/ref/blake2b-compress-avx2.c +49 -0
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2b/ref/blake2b-compress-avx2.h +140 -0
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2b/ref/blake2b-compress-ref.c +92 -0
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2b/ref/blake2b-compress-sse41.c +87 -0
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2b/ref/blake2b-compress-sse41.h +103 -0
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2b/ref/blake2b-compress-ssse3.c +90 -0
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2b/ref/blake2b-compress-ssse3.h +103 -0
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2b/ref/blake2b-load-avx2.h +340 -0
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2b/ref/blake2b-load-sse2.h +164 -0
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2b/ref/blake2b-load-sse41.h +307 -0
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2b/ref/blake2b-ref.c +494 -0
- data/vendor/libsodium/src/libsodium/crypto_generichash/{blake2 → blake2b}/ref/generichash_blake2b.c +22 -26
- data/vendor/libsodium/src/libsodium/crypto_generichash/crypto_generichash.c +7 -0
- data/vendor/libsodium/src/libsodium/crypto_hash/sha256/cp/hash_sha256_cp.c +254 -0
- data/vendor/libsodium/src/libsodium/crypto_hash/sha256/{hash_sha256_api.c → hash_sha256.c} +4 -2
- data/vendor/libsodium/src/libsodium/crypto_hash/sha512/cp/hash_sha512_cp.c +280 -0
- data/vendor/libsodium/src/libsodium/crypto_hash/sha512/{hash_sha512_api.c → hash_sha512.c} +4 -2
- data/vendor/libsodium/src/libsodium/crypto_kdf/blake2b/kdf_blake2b.c +52 -0
- data/vendor/libsodium/src/libsodium/crypto_kdf/crypto_kdf.c +49 -0
- data/vendor/libsodium/src/libsodium/crypto_kx/crypto_kx.c +136 -0
- data/vendor/libsodium/src/libsodium/crypto_onetimeauth/crypto_onetimeauth.c +6 -0
- data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna.c +34 -27
- data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna.h +1 -1
- data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna32.h +203 -156
- data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna64.h +178 -134
- data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/onetimeauth_poly1305.c +22 -4
- data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/onetimeauth_poly1305.h +10 -12
- data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/sse2/poly1305_sse2.c +564 -315
- data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/sse2/poly1305_sse2.h +1 -1
- data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2-core.c +131 -84
- data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2-core.h +23 -18
- data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2-encoding.c +163 -145
- data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2-encoding.h +2 -1
- data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2-fill-block-avx2.c +247 -0
- data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2-fill-block-ref.c +42 -29
- data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2-fill-block-ssse3.c +71 -47
- data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2.c +100 -65
- data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2.h +77 -23
- data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/blake2b-long.c +30 -31
- data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/blamka-round-avx2.h +150 -0
- data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/blamka-round-ref.h +28 -26
- data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/blamka-round-ssse3.h +102 -99
- data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/pwhash_argon2i.c +90 -41
- data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/pwhash_argon2id.c +213 -0
- data/vendor/libsodium/src/libsodium/crypto_pwhash/crypto_pwhash.c +72 -4
- data/vendor/libsodium/src/libsodium/crypto_pwhash/scryptsalsa208sha256/crypto_scrypt-common.c +34 -37
- data/vendor/libsodium/src/libsodium/crypto_pwhash/scryptsalsa208sha256/crypto_scrypt.h +27 -32
- data/vendor/libsodium/src/libsodium/crypto_pwhash/scryptsalsa208sha256/nosse/pwhash_scryptsalsa208sha256_nosse.c +120 -86
- data/vendor/libsodium/src/libsodium/crypto_pwhash/scryptsalsa208sha256/pbkdf2-sha256.c +16 -13
- data/vendor/libsodium/src/libsodium/crypto_pwhash/scryptsalsa208sha256/pbkdf2-sha256.h +4 -4
- data/vendor/libsodium/src/libsodium/crypto_pwhash/scryptsalsa208sha256/pwhash_scryptsalsa208sha256.c +98 -50
- data/vendor/libsodium/src/libsodium/crypto_pwhash/scryptsalsa208sha256/scrypt_platform.c +23 -18
- data/vendor/libsodium/src/libsodium/crypto_pwhash/scryptsalsa208sha256/sse/pwhash_scryptsalsa208sha256_sse.c +105 -105
- data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/donna_c64/curve25519_donna_c64.c +395 -330
- data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/ref10/x25519_ref10.c +225 -198
- data/vendor/libsodium/src/libsodium/crypto_secretbox/crypto_secretbox.c +7 -0
- data/vendor/libsodium/src/libsodium/crypto_secretbox/crypto_secretbox_easy.c +6 -5
- data/vendor/libsodium/src/libsodium/crypto_secretbox/xchacha20poly1305/secretbox_xchacha20poly1305.c +170 -0
- data/vendor/libsodium/src/libsodium/crypto_secretbox/xsalsa20poly1305/secretbox_xsalsa20poly1305.c +83 -0
- data/vendor/libsodium/src/libsodium/crypto_shorthash/crypto_shorthash.c +7 -0
- data/vendor/libsodium/src/libsodium/crypto_shorthash/siphash24/ref/shorthash_siphash24_ref.c +65 -0
- data/vendor/libsodium/src/libsodium/crypto_shorthash/siphash24/ref/shorthash_siphash_ref.h +24 -0
- data/vendor/libsodium/src/libsodium/crypto_shorthash/siphash24/ref/shorthash_siphashx24_ref.c +71 -0
- data/vendor/libsodium/src/libsodium/crypto_shorthash/siphash24/{shorthash_siphash24_api.c → shorthash_siphash24.c} +0 -0
- data/vendor/libsodium/src/libsodium/crypto_shorthash/siphash24/shorthash_siphashx24.c +11 -0
- data/vendor/libsodium/src/libsodium/crypto_sign/crypto_sign.c +33 -0
- data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/ed25519_ref10.h +18 -0
- data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/keypair.c +18 -13
- data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/obsolete.c +29 -26
- data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/open.c +75 -36
- data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/sign.c +39 -15
- data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/sign_ed25519.c +91 -0
- data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/nacl/afternm_aes128ctr.c +174 -0
- data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/nacl/beforenm_aes128ctr.c +66 -0
- data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/nacl/common.h +766 -0
- data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/nacl/consts.h +28 -0
- data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/nacl/consts_aes128ctr.c +28 -0
- data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/nacl/int128.h +50 -0
- data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/nacl/int128_aes128ctr.c +149 -0
- data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/nacl/stream_aes128ctr_nacl.c +31 -0
- data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/nacl/xor_afternm_aes128ctr.c +195 -0
- data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/{stream_aes128ctr_api.c → stream_aes128ctr.c} +6 -3
- data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/dolbeau/chacha20_dolbeau-avx2.c +179 -0
- data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/dolbeau/chacha20_dolbeau-avx2.h +8 -0
- data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/dolbeau/chacha20_dolbeau-ssse3.c +173 -0
- data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/dolbeau/chacha20_dolbeau-ssse3.h +8 -0
- data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/dolbeau/u0.h +86 -0
- data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/dolbeau/u1.h +98 -0
- data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/dolbeau/u4.h +175 -0
- data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/dolbeau/u8.h +357 -0
- data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/ref/{stream_chacha20_ref.c → chacha20_ref.c} +93 -94
- data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/ref/chacha20_ref.h +8 -0
- data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/stream_chacha20.c +39 -7
- data/vendor/libsodium/src/libsodium/crypto_stream/crypto_stream.c +7 -0
- data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/ref/salsa20_ref.c +120 -0
- data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/ref/salsa20_ref.h +8 -0
- data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/stream_salsa20.c +93 -0
- data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/stream_salsa20.h +16 -0
- data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/{amd64_xmm6/stream_salsa20_amd64_xmm6.S → xmm6/salsa20_xmm6-asm.S} +20 -12
- data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/xmm6/salsa20_xmm6.c +31 -0
- data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/xmm6/salsa20_xmm6.h +8 -0
- data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/xmm6int/salsa20_xmm6int-avx2.c +131 -0
- data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/xmm6int/salsa20_xmm6int-avx2.h +8 -0
- data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/xmm6int/salsa20_xmm6int-sse2.c +122 -0
- data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/xmm6int/salsa20_xmm6int-sse2.h +8 -0
- data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/xmm6int/u0.h +195 -0
- data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/xmm6int/u1.h +207 -0
- data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/xmm6int/u4.h +547 -0
- data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/xmm6int/u8.h +476 -0
- data/vendor/libsodium/src/libsodium/crypto_stream/salsa2012/ref/stream_salsa2012_ref.c +106 -0
- data/vendor/libsodium/src/libsodium/crypto_stream/salsa2012/stream_salsa2012.c +20 -0
- data/vendor/libsodium/src/libsodium/crypto_stream/salsa208/ref/stream_salsa208_ref.c +106 -0
- data/vendor/libsodium/src/libsodium/crypto_stream/salsa208/stream_salsa208.c +20 -0
- data/vendor/libsodium/src/libsodium/crypto_stream/xchacha20/stream_xchacha20.c +63 -0
- data/vendor/libsodium/src/libsodium/crypto_stream/xsalsa20/stream_xsalsa20.c +60 -0
- data/vendor/libsodium/src/libsodium/crypto_verify/sodium/verify.c +61 -0
- data/vendor/libsodium/src/libsodium/include/Makefile.am +8 -6
- data/vendor/libsodium/src/libsodium/include/Makefile.in +29 -21
- data/vendor/libsodium/src/libsodium/include/sodium.h +15 -4
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_aead_aes256gcm.h +4 -0
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_aead_chacha20poly1305.h +6 -0
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_aead_xchacha20poly1305.h +91 -0
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_auth.h +3 -0
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_auth_hmacsha256.h +5 -0
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_auth_hmacsha512.h +4 -0
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_auth_hmacsha512256.h +4 -0
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_box_curve25519xchacha20poly1305.h +153 -0
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_generichash.h +4 -0
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_generichash_blake2b.h +3 -4
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_hash_sha256.h +4 -3
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_hash_sha512.h +4 -3
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_kdf.h +51 -0
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_kdf_blake2b.h +42 -0
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_kx.h +64 -0
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_onetimeauth.h +4 -0
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_onetimeauth_poly1305.h +11 -9
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_pwhash.h +37 -1
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_pwhash_argon2i.h +40 -10
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_pwhash_argon2id.h +116 -0
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_pwhash_scryptsalsa208sha256.h +37 -4
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_scalarmult_curve25519.h +0 -4
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_secretbox.h +3 -0
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_secretbox_xchacha20poly1305.h +62 -0
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_secretbox_xsalsa20poly1305.h +5 -1
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_shorthash.h +3 -0
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_shorthash_siphash24.h +18 -0
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_sign.h +22 -0
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_sign_ed25519.h +28 -0
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_stream.h +3 -0
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_stream_aes128ctr.h +10 -5
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_stream_chacha20.h +14 -3
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_stream_salsa20.h +4 -0
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_stream_salsa2012.h +3 -0
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_stream_salsa208.h +3 -0
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_stream_xchacha20.h +53 -0
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_stream_xsalsa20.h +4 -0
- data/vendor/libsodium/src/libsodium/include/sodium/private/common.h +84 -17
- data/vendor/libsodium/src/libsodium/include/sodium/private/implementations.h +11 -0
- data/vendor/libsodium/src/libsodium/include/sodium/private/sse2_64_32.h +50 -0
- data/vendor/libsodium/src/libsodium/include/sodium/randombytes.h +10 -2
- data/vendor/libsodium/src/libsodium/include/sodium/utils.h +4 -5
- data/vendor/libsodium/src/libsodium/include/sodium/version.h.in +4 -0
- data/vendor/libsodium/src/libsodium/randombytes/randombytes.c +47 -19
- data/vendor/libsodium/src/libsodium/randombytes/salsa20/randombytes_salsa20_random.c +30 -50
- data/vendor/libsodium/src/libsodium/randombytes/sysrandom/randombytes_sysrandom.c +25 -15
- data/vendor/libsodium/src/libsodium/sodium/core.c +25 -23
- data/vendor/libsodium/src/libsodium/sodium/runtime.c +66 -57
- data/vendor/libsodium/src/libsodium/sodium/utils.c +120 -106
- data/vendor/libsodium/src/libsodium/sodium/version.c +10 -0
- data/vendor/libsodium/test/Makefile.in +11 -7
- data/vendor/libsodium/test/default/Makefile.am +65 -5
- data/vendor/libsodium/test/default/Makefile.in +243 -78
- data/vendor/libsodium/test/default/aead_aes256gcm.c +2 -2
- data/vendor/libsodium/test/default/aead_xchacha20poly1305.c +188 -0
- data/vendor/libsodium/test/default/aead_xchacha20poly1305.exp +51 -0
- data/vendor/libsodium/test/default/auth.c +11 -7
- data/vendor/libsodium/test/default/auth2.c +15 -12
- data/vendor/libsodium/test/default/auth3.c +18 -15
- data/vendor/libsodium/test/default/auth5.c +3 -2
- data/vendor/libsodium/test/default/auth6.c +4 -3
- data/vendor/libsodium/test/default/auth7.c +3 -2
- data/vendor/libsodium/test/default/box.c +57 -52
- data/vendor/libsodium/test/default/box2.c +41 -36
- data/vendor/libsodium/test/default/box7.c +4 -3
- data/vendor/libsodium/test/default/box8.c +4 -3
- data/vendor/libsodium/test/default/box_easy.c +36 -32
- data/vendor/libsodium/test/default/box_easy2.c +41 -34
- data/vendor/libsodium/test/default/box_seal.c +7 -6
- data/vendor/libsodium/test/default/box_seed.c +10 -8
- data/vendor/libsodium/test/default/chacha20.c +18 -3
- data/vendor/libsodium/test/default/chacha20.exp +45 -0
- data/vendor/libsodium/test/default/cmptest.h +1 -0
- data/vendor/libsodium/test/default/core1.c +10 -9
- data/vendor/libsodium/test/default/core2.c +13 -12
- data/vendor/libsodium/test/default/core3.c +13 -12
- data/vendor/libsodium/test/default/core4.c +11 -12
- data/vendor/libsodium/test/default/core5.c +13 -12
- data/vendor/libsodium/test/default/core6.c +15 -13
- data/vendor/libsodium/test/default/ed25519_convert.c +12 -9
- data/vendor/libsodium/test/default/hash.c +10 -6
- data/vendor/libsodium/test/default/hash3.c +3 -2
- data/vendor/libsodium/test/default/index-wasm.html.tpl +118 -0
- data/vendor/libsodium/test/default/kdf.c +61 -0
- data/vendor/libsodium/test/default/kdf.exp +77 -0
- data/vendor/libsodium/test/default/keygen.c +64 -0
- data/vendor/libsodium/test/default/keygen.exp +1 -0
- data/vendor/libsodium/test/default/kx.c +119 -0
- data/vendor/libsodium/test/default/kx.exp +7 -0
- data/vendor/libsodium/test/default/nacl-test-wrapper.sh +9 -2
- data/vendor/libsodium/test/default/onetimeauth.c +26 -23
- data/vendor/libsodium/test/default/onetimeauth2.c +22 -20
- data/vendor/libsodium/test/default/onetimeauth7.c +3 -2
- data/vendor/libsodium/test/default/pwhash.c +209 -157
- data/vendor/libsodium/test/default/pwhash_argon2id.c +388 -0
- data/vendor/libsodium/test/default/pwhash_argon2id.exp +15 -0
- data/vendor/libsodium/test/default/pwhash_scrypt.c +232 -224
- data/vendor/libsodium/test/default/pwhash_scrypt.exp +2 -1
- data/vendor/libsodium/test/default/pwhash_scrypt_ll.c +39 -41
- data/vendor/libsodium/test/default/randombytes.c +34 -13
- data/vendor/libsodium/test/default/randombytes.exp +1 -0
- data/vendor/libsodium/test/default/scalarmult.c +21 -18
- data/vendor/libsodium/test/default/scalarmult2.c +8 -6
- data/vendor/libsodium/test/default/scalarmult5.c +13 -10
- data/vendor/libsodium/test/default/scalarmult6.c +17 -14
- data/vendor/libsodium/test/default/scalarmult7.c +9 -10
- data/vendor/libsodium/test/default/secretbox.c +39 -36
- data/vendor/libsodium/test/default/secretbox2.c +28 -25
- data/vendor/libsodium/test/default/secretbox7.c +3 -2
- data/vendor/libsodium/test/default/secretbox8.c +4 -3
- data/vendor/libsodium/test/default/secretbox_easy.c +40 -37
- data/vendor/libsodium/test/default/secretbox_easy2.c +19 -18
- data/vendor/libsodium/test/default/shorthash.c +4 -4
- data/vendor/libsodium/test/default/sign.c +70 -13
- data/vendor/libsodium/test/default/sign.exp +2 -0
- data/vendor/libsodium/test/default/siphashx24.c +33 -0
- data/vendor/libsodium/test/default/siphashx24.exp +64 -0
- data/vendor/libsodium/test/default/sodium_core.c +9 -8
- data/vendor/libsodium/test/default/sodium_utils.c +52 -46
- data/vendor/libsodium/test/default/sodium_utils2.c +17 -8
- data/vendor/libsodium/test/default/sodium_utils3.c +15 -6
- data/vendor/libsodium/test/default/sodium_version.c +7 -1
- data/vendor/libsodium/test/default/stream.c +31 -18
- data/vendor/libsodium/test/default/stream.exp +65 -0
- data/vendor/libsodium/test/default/stream2.c +13 -9
- data/vendor/libsodium/test/default/stream3.c +12 -10
- data/vendor/libsodium/test/default/stream4.c +30 -27
- data/vendor/libsodium/test/default/verify1.c +5 -4
- data/vendor/libsodium/test/default/xchacha20.c +376 -0
- data/vendor/libsodium/test/default/xchacha20.exp +5 -0
- data/vendor/libsodium/test/quirks/quirks.h +4 -3
- metadata +140 -111
- data/vendor/libsodium/autom4te.cache/output.3 +0 -17240
- data/vendor/libsodium/autom4te.cache/output.4 +0 -17517
- data/vendor/libsodium/autom4te.cache/output.5 +0 -18535
- data/vendor/libsodium/autom4te.cache/output.6 +0 -19077
- data/vendor/libsodium/autom4te.cache/output.7 +0 -19837
- data/vendor/libsodium/autom4te.cache/traces.3 +0 -2833
- data/vendor/libsodium/autom4te.cache/traces.4 +0 -2951
- data/vendor/libsodium/autom4te.cache/traces.5 +0 -3042
- data/vendor/libsodium/autom4te.cache/traces.6 +0 -3194
- data/vendor/libsodium/autom4te.cache/traces.7 +0 -3614
- data/vendor/libsodium/builds/msvc/properties/ARM.props +0 -20
- data/vendor/libsodium/compile +0 -347
- data/vendor/libsodium/config.guess +0 -1568
- data/vendor/libsodium/config.sub +0 -1793
- data/vendor/libsodium/depcomp +0 -791
- data/vendor/libsodium/install-sh +0 -527
- data/vendor/libsodium/ltmain.sh +0 -9655
- data/vendor/libsodium/missing +0 -215
- data/vendor/libsodium/src/libsodium/crypto_auth/hmacsha256/auth_hmacsha256_api.c +0 -16
- data/vendor/libsodium/src/libsodium/crypto_auth/hmacsha256/cp/verify_hmacsha256.c +0 -11
- data/vendor/libsodium/src/libsodium/crypto_auth/hmacsha512/auth_hmacsha512_api.c +0 -16
- data/vendor/libsodium/src/libsodium/crypto_auth/hmacsha512/cp/verify_hmacsha512.c +0 -12
- data/vendor/libsodium/src/libsodium/crypto_auth/hmacsha512256/auth_hmacsha512256_api.c +0 -16
- data/vendor/libsodium/src/libsodium/crypto_auth/hmacsha512256/cp/verify_hmacsha512256.c +0 -14
- data/vendor/libsodium/src/libsodium/crypto_box/curve25519xsalsa20poly1305/box_curve25519xsalsa20poly1305_api.c +0 -41
- data/vendor/libsodium/src/libsodium/crypto_box/curve25519xsalsa20poly1305/ref/after_curve25519xsalsa20poly1305.c +0 -22
- data/vendor/libsodium/src/libsodium/crypto_box/curve25519xsalsa20poly1305/ref/before_curve25519xsalsa20poly1305.c +0 -18
- data/vendor/libsodium/src/libsodium/crypto_box/curve25519xsalsa20poly1305/ref/box_curve25519xsalsa20poly1305.c +0 -42
- data/vendor/libsodium/src/libsodium/crypto_box/curve25519xsalsa20poly1305/ref/keypair_curve25519xsalsa20poly1305.c +0 -29
- data/vendor/libsodium/src/libsodium/crypto_core/hchacha20/core_hchacha20.h +0 -28
- data/vendor/libsodium/src/libsodium/crypto_core/hsalsa20/ref2/core_hsalsa20.c +0 -100
- data/vendor/libsodium/src/libsodium/crypto_core/salsa20/core_salsa20_api.c +0 -21
- data/vendor/libsodium/src/libsodium/crypto_core/salsa20/ref/core_salsa20.c +0 -126
- data/vendor/libsodium/src/libsodium/crypto_core/salsa2012/core_salsa2012_api.c +0 -21
- data/vendor/libsodium/src/libsodium/crypto_core/salsa2012/ref/core_salsa2012.c +0 -126
- data/vendor/libsodium/src/libsodium/crypto_core/salsa208/core_salsa208_api.c +0 -21
- data/vendor/libsodium/src/libsodium/crypto_core/salsa208/ref/core_salsa208.c +0 -126
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2-impl.h +0 -48
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2.h +0 -97
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-compress-avx2.c +0 -45
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-compress-avx2.h +0 -123
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-compress-ref.c +0 -94
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-compress-sse41.c +0 -80
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-compress-sse41.h +0 -97
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-compress-ssse3.c +0 -87
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-compress-ssse3.h +0 -97
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-load-avx2.h +0 -339
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-load-sse2.h +0 -66
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-load-sse41.h +0 -400
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-ref.c +0 -456
- data/vendor/libsodium/src/libsodium/crypto_hash/sha256/cp/hash_sha256.c +0 -269
- data/vendor/libsodium/src/libsodium/crypto_hash/sha512/cp/hash_sha512.c +0 -298
- data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2-impl.h +0 -40
- data/vendor/libsodium/src/libsodium/crypto_secretbox/xsalsa20poly1305/ref/box_xsalsa20poly1305.c +0 -35
- data/vendor/libsodium/src/libsodium/crypto_secretbox/xsalsa20poly1305/secretbox_xsalsa20poly1305_api.c +0 -26
- data/vendor/libsodium/src/libsodium/crypto_shorthash/siphash24/ref/shorthash_siphash24.c +0 -72
- data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/description +0 -1
- data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/sign_ed25519_api.c +0 -39
- data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/portable/afternm_aes128ctr.c +0 -159
- data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/portable/beforenm_aes128ctr.c +0 -59
- data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/portable/common.h +0 -771
- data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/portable/consts.h +0 -28
- data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/portable/consts_aes128ctr.c +0 -14
- data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/portable/int128.h +0 -56
- data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/portable/int128_aes128ctr.c +0 -131
- data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/portable/stream_aes128ctr.c +0 -29
- data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/portable/types.h +0 -10
- data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/portable/xor_afternm_aes128ctr.c +0 -180
- data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/ref/stream_chacha20_ref.h +0 -28
- data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/vec/stream_chacha20_vec.c +0 -336
- data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/vec/stream_chacha20_vec.h +0 -28
- data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/ref/stream_salsa20_ref.c +0 -55
- data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/ref/xor_salsa20_ref.c +0 -63
- data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/stream_salsa20_api.c +0 -19
- data/vendor/libsodium/src/libsodium/crypto_stream/salsa2012/ref/stream_salsa2012.c +0 -51
- data/vendor/libsodium/src/libsodium/crypto_stream/salsa2012/ref/xor_salsa2012.c +0 -54
- data/vendor/libsodium/src/libsodium/crypto_stream/salsa2012/stream_salsa2012_api.c +0 -11
- data/vendor/libsodium/src/libsodium/crypto_stream/salsa208/ref/stream_salsa208.c +0 -51
- data/vendor/libsodium/src/libsodium/crypto_stream/salsa208/ref/xor_salsa208.c +0 -54
- data/vendor/libsodium/src/libsodium/crypto_stream/salsa208/stream_salsa208_api.c +0 -11
- data/vendor/libsodium/src/libsodium/crypto_stream/xsalsa20/ref/stream_xsalsa20.c +0 -24
- data/vendor/libsodium/src/libsodium/crypto_stream/xsalsa20/ref/xor_xsalsa20.c +0 -35
- data/vendor/libsodium/src/libsodium/crypto_stream/xsalsa20/stream_xsalsa20_api.c +0 -11
- data/vendor/libsodium/src/libsodium/crypto_verify/16/ref/verify_16.c +0 -17
- data/vendor/libsodium/src/libsodium/crypto_verify/16/verify_16_api.c +0 -6
- data/vendor/libsodium/src/libsodium/crypto_verify/32/ref/verify_32.c +0 -17
- data/vendor/libsodium/src/libsodium/crypto_verify/32/verify_32_api.c +0 -6
- data/vendor/libsodium/src/libsodium/crypto_verify/64/ref/verify_64.c +0 -17
- data/vendor/libsodium/src/libsodium/crypto_verify/64/verify_64_api.c +0 -6
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_int32.h +0 -8
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_int64.h +0 -8
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_uint16.h +0 -8
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_uint32.h +0 -8
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_uint64.h +0 -8
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_uint8.h +0 -8
- data/vendor/libsodium/test-driver +0 -139
@@ -0,0 +1,98 @@
|
|
1
|
+
while (bytes >= 64) {
|
2
|
+
__m128i x_0, x_1, x_2, x_3;
|
3
|
+
__m128i t_1;
|
4
|
+
const __m128i rot16 =
|
5
|
+
_mm_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
|
6
|
+
const __m128i rot8 =
|
7
|
+
_mm_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
|
8
|
+
|
9
|
+
uint32_t in12;
|
10
|
+
uint32_t in13;
|
11
|
+
int i;
|
12
|
+
|
13
|
+
x_0 = _mm_loadu_si128((__m128i*) (x + 0));
|
14
|
+
x_1 = _mm_loadu_si128((__m128i*) (x + 4));
|
15
|
+
x_2 = _mm_loadu_si128((__m128i*) (x + 8));
|
16
|
+
x_3 = _mm_loadu_si128((__m128i*) (x + 12));
|
17
|
+
|
18
|
+
for (i = 0; i < ROUNDS; i += 2) {
|
19
|
+
x_0 = _mm_add_epi32(x_0, x_1);
|
20
|
+
x_3 = _mm_xor_si128(x_3, x_0);
|
21
|
+
x_3 = _mm_shuffle_epi8(x_3, rot16);
|
22
|
+
|
23
|
+
x_2 = _mm_add_epi32(x_2, x_3);
|
24
|
+
x_1 = _mm_xor_si128(x_1, x_2);
|
25
|
+
|
26
|
+
t_1 = x_1;
|
27
|
+
x_1 = _mm_slli_epi32(x_1, 12);
|
28
|
+
t_1 = _mm_srli_epi32(t_1, 20);
|
29
|
+
x_1 = _mm_xor_si128(x_1, t_1);
|
30
|
+
|
31
|
+
x_0 = _mm_add_epi32(x_0, x_1);
|
32
|
+
x_3 = _mm_xor_si128(x_3, x_0);
|
33
|
+
x_0 = _mm_shuffle_epi32(x_0, 0x93);
|
34
|
+
x_3 = _mm_shuffle_epi8(x_3, rot8);
|
35
|
+
|
36
|
+
x_2 = _mm_add_epi32(x_2, x_3);
|
37
|
+
x_3 = _mm_shuffle_epi32(x_3, 0x4e);
|
38
|
+
x_1 = _mm_xor_si128(x_1, x_2);
|
39
|
+
x_2 = _mm_shuffle_epi32(x_2, 0x39);
|
40
|
+
|
41
|
+
t_1 = x_1;
|
42
|
+
x_1 = _mm_slli_epi32(x_1, 7);
|
43
|
+
t_1 = _mm_srli_epi32(t_1, 25);
|
44
|
+
x_1 = _mm_xor_si128(x_1, t_1);
|
45
|
+
|
46
|
+
x_0 = _mm_add_epi32(x_0, x_1);
|
47
|
+
x_3 = _mm_xor_si128(x_3, x_0);
|
48
|
+
x_3 = _mm_shuffle_epi8(x_3, rot16);
|
49
|
+
|
50
|
+
x_2 = _mm_add_epi32(x_2, x_3);
|
51
|
+
x_1 = _mm_xor_si128(x_1, x_2);
|
52
|
+
|
53
|
+
t_1 = x_1;
|
54
|
+
x_1 = _mm_slli_epi32(x_1, 12);
|
55
|
+
t_1 = _mm_srli_epi32(t_1, 20);
|
56
|
+
x_1 = _mm_xor_si128(x_1, t_1);
|
57
|
+
|
58
|
+
x_0 = _mm_add_epi32(x_0, x_1);
|
59
|
+
x_3 = _mm_xor_si128(x_3, x_0);
|
60
|
+
x_0 = _mm_shuffle_epi32(x_0, 0x39);
|
61
|
+
x_3 = _mm_shuffle_epi8(x_3, rot8);
|
62
|
+
|
63
|
+
x_2 = _mm_add_epi32(x_2, x_3);
|
64
|
+
x_3 = _mm_shuffle_epi32(x_3, 0x4e);
|
65
|
+
x_1 = _mm_xor_si128(x_1, x_2);
|
66
|
+
x_2 = _mm_shuffle_epi32(x_2, 0x93);
|
67
|
+
|
68
|
+
t_1 = x_1;
|
69
|
+
x_1 = _mm_slli_epi32(x_1, 7);
|
70
|
+
t_1 = _mm_srli_epi32(t_1, 25);
|
71
|
+
x_1 = _mm_xor_si128(x_1, t_1);
|
72
|
+
}
|
73
|
+
x_0 = _mm_add_epi32(x_0, _mm_loadu_si128((__m128i*) (x + 0)));
|
74
|
+
x_1 = _mm_add_epi32(x_1, _mm_loadu_si128((__m128i*) (x + 4)));
|
75
|
+
x_2 = _mm_add_epi32(x_2, _mm_loadu_si128((__m128i*) (x + 8)));
|
76
|
+
x_3 = _mm_add_epi32(x_3, _mm_loadu_si128((__m128i*) (x + 12)));
|
77
|
+
x_0 = _mm_xor_si128(x_0, _mm_loadu_si128((__m128i*) (m + 0)));
|
78
|
+
x_1 = _mm_xor_si128(x_1, _mm_loadu_si128((__m128i*) (m + 16)));
|
79
|
+
x_2 = _mm_xor_si128(x_2, _mm_loadu_si128((__m128i*) (m + 32)));
|
80
|
+
x_3 = _mm_xor_si128(x_3, _mm_loadu_si128((__m128i*) (m + 48)));
|
81
|
+
_mm_storeu_si128((__m128i*) (c + 0), x_0);
|
82
|
+
_mm_storeu_si128((__m128i*) (c + 16), x_1);
|
83
|
+
_mm_storeu_si128((__m128i*) (c + 32), x_2);
|
84
|
+
_mm_storeu_si128((__m128i*) (c + 48), x_3);
|
85
|
+
|
86
|
+
in12 = x[12];
|
87
|
+
in13 = x[13];
|
88
|
+
in12++;
|
89
|
+
if (in12 == 0) {
|
90
|
+
in13++;
|
91
|
+
}
|
92
|
+
x[12] = in12;
|
93
|
+
x[13] = in13;
|
94
|
+
|
95
|
+
bytes -= 64;
|
96
|
+
c += 64;
|
97
|
+
m += 64;
|
98
|
+
}
|
@@ -0,0 +1,175 @@
|
|
1
|
+
|
2
|
+
#define VEC4_ROT(A, IMM) \
|
3
|
+
_mm_or_si128(_mm_slli_epi32(A, IMM), _mm_srli_epi32(A, (32 - IMM)))
|
4
|
+
|
5
|
+
/* same, but replace 2 of the shift/shift/or "rotation" by byte shuffles (8 &
|
6
|
+
* 16) (better) */
|
7
|
+
#define VEC4_QUARTERROUND_SHUFFLE(A, B, C, D) \
|
8
|
+
x_##A = _mm_add_epi32(x_##A, x_##B); \
|
9
|
+
t_##A = _mm_xor_si128(x_##D, x_##A); \
|
10
|
+
x_##D = _mm_shuffle_epi8(t_##A, rot16); \
|
11
|
+
x_##C = _mm_add_epi32(x_##C, x_##D); \
|
12
|
+
t_##C = _mm_xor_si128(x_##B, x_##C); \
|
13
|
+
x_##B = VEC4_ROT(t_##C, 12); \
|
14
|
+
x_##A = _mm_add_epi32(x_##A, x_##B); \
|
15
|
+
t_##A = _mm_xor_si128(x_##D, x_##A); \
|
16
|
+
x_##D = _mm_shuffle_epi8(t_##A, rot8); \
|
17
|
+
x_##C = _mm_add_epi32(x_##C, x_##D); \
|
18
|
+
t_##C = _mm_xor_si128(x_##B, x_##C); \
|
19
|
+
x_##B = VEC4_ROT(t_##C, 7)
|
20
|
+
|
21
|
+
#define VEC4_QUARTERROUND(A, B, C, D) VEC4_QUARTERROUND_SHUFFLE(A, B, C, D)
|
22
|
+
|
23
|
+
if (bytes >= 256) {
|
24
|
+
/* constant for shuffling bytes (replacing multiple-of-8 rotates) */
|
25
|
+
__m128i rot16 =
|
26
|
+
_mm_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
|
27
|
+
__m128i rot8 =
|
28
|
+
_mm_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
|
29
|
+
|
30
|
+
__m128i x_0 = _mm_set1_epi32(x[0]);
|
31
|
+
__m128i x_1 = _mm_set1_epi32(x[1]);
|
32
|
+
__m128i x_2 = _mm_set1_epi32(x[2]);
|
33
|
+
__m128i x_3 = _mm_set1_epi32(x[3]);
|
34
|
+
__m128i x_4 = _mm_set1_epi32(x[4]);
|
35
|
+
__m128i x_5 = _mm_set1_epi32(x[5]);
|
36
|
+
__m128i x_6 = _mm_set1_epi32(x[6]);
|
37
|
+
__m128i x_7 = _mm_set1_epi32(x[7]);
|
38
|
+
__m128i x_8 = _mm_set1_epi32(x[8]);
|
39
|
+
__m128i x_9 = _mm_set1_epi32(x[9]);
|
40
|
+
__m128i x_10 = _mm_set1_epi32(x[10]);
|
41
|
+
__m128i x_11 = _mm_set1_epi32(x[11]);
|
42
|
+
__m128i x_12;
|
43
|
+
__m128i x_13;
|
44
|
+
__m128i x_14 = _mm_set1_epi32(x[14]);
|
45
|
+
__m128i x_15 = _mm_set1_epi32(x[15]);
|
46
|
+
__m128i orig0 = x_0;
|
47
|
+
__m128i orig1 = x_1;
|
48
|
+
__m128i orig2 = x_2;
|
49
|
+
__m128i orig3 = x_3;
|
50
|
+
__m128i orig4 = x_4;
|
51
|
+
__m128i orig5 = x_5;
|
52
|
+
__m128i orig6 = x_6;
|
53
|
+
__m128i orig7 = x_7;
|
54
|
+
__m128i orig8 = x_8;
|
55
|
+
__m128i orig9 = x_9;
|
56
|
+
__m128i orig10 = x_10;
|
57
|
+
__m128i orig11 = x_11;
|
58
|
+
__m128i orig12;
|
59
|
+
__m128i orig13;
|
60
|
+
__m128i orig14 = x_14;
|
61
|
+
__m128i orig15 = x_15;
|
62
|
+
__m128i t_0, t_1, t_2, t_3, t_4, t_5, t_6, t_7, t_8, t_9, t_10, t_11, t_12,
|
63
|
+
t_13, t_14, t_15;
|
64
|
+
|
65
|
+
uint32_t in12, in13;
|
66
|
+
int i;
|
67
|
+
|
68
|
+
while (bytes >= 256) {
|
69
|
+
const __m128i addv12 = _mm_set_epi64x(1, 0);
|
70
|
+
const __m128i addv13 = _mm_set_epi64x(3, 2);
|
71
|
+
__m128i t12, t13;
|
72
|
+
uint64_t in1213;
|
73
|
+
|
74
|
+
x_0 = orig0;
|
75
|
+
x_1 = orig1;
|
76
|
+
x_2 = orig2;
|
77
|
+
x_3 = orig3;
|
78
|
+
x_4 = orig4;
|
79
|
+
x_5 = orig5;
|
80
|
+
x_6 = orig6;
|
81
|
+
x_7 = orig7;
|
82
|
+
x_8 = orig8;
|
83
|
+
x_9 = orig9;
|
84
|
+
x_10 = orig10;
|
85
|
+
x_11 = orig11;
|
86
|
+
x_14 = orig14;
|
87
|
+
x_15 = orig15;
|
88
|
+
|
89
|
+
in12 = x[12];
|
90
|
+
in13 = x[13];
|
91
|
+
in1213 = ((uint64_t) in12) | (((uint64_t) in13) << 32);
|
92
|
+
t12 = _mm_set1_epi64x(in1213);
|
93
|
+
t13 = _mm_set1_epi64x(in1213);
|
94
|
+
|
95
|
+
x_12 = _mm_add_epi64(addv12, t12);
|
96
|
+
x_13 = _mm_add_epi64(addv13, t13);
|
97
|
+
|
98
|
+
t12 = _mm_unpacklo_epi32(x_12, x_13);
|
99
|
+
t13 = _mm_unpackhi_epi32(x_12, x_13);
|
100
|
+
|
101
|
+
x_12 = _mm_unpacklo_epi32(t12, t13);
|
102
|
+
x_13 = _mm_unpackhi_epi32(t12, t13);
|
103
|
+
|
104
|
+
orig12 = x_12;
|
105
|
+
orig13 = x_13;
|
106
|
+
|
107
|
+
in1213 += 4;
|
108
|
+
|
109
|
+
x[12] = in1213 & 0xFFFFFFFF;
|
110
|
+
x[13] = (in1213 >> 32) & 0xFFFFFFFF;
|
111
|
+
|
112
|
+
for (i = 0; i < ROUNDS; i += 2) {
|
113
|
+
VEC4_QUARTERROUND(0, 4, 8, 12);
|
114
|
+
VEC4_QUARTERROUND(1, 5, 9, 13);
|
115
|
+
VEC4_QUARTERROUND(2, 6, 10, 14);
|
116
|
+
VEC4_QUARTERROUND(3, 7, 11, 15);
|
117
|
+
VEC4_QUARTERROUND(0, 5, 10, 15);
|
118
|
+
VEC4_QUARTERROUND(1, 6, 11, 12);
|
119
|
+
VEC4_QUARTERROUND(2, 7, 8, 13);
|
120
|
+
VEC4_QUARTERROUND(3, 4, 9, 14);
|
121
|
+
}
|
122
|
+
|
123
|
+
#define ONEQUAD_TRANSPOSE(A, B, C, D) \
|
124
|
+
{ \
|
125
|
+
__m128i t0, t1, t2, t3; \
|
126
|
+
\
|
127
|
+
x_##A = _mm_add_epi32(x_##A, orig##A); \
|
128
|
+
x_##B = _mm_add_epi32(x_##B, orig##B); \
|
129
|
+
x_##C = _mm_add_epi32(x_##C, orig##C); \
|
130
|
+
x_##D = _mm_add_epi32(x_##D, orig##D); \
|
131
|
+
t_##A = _mm_unpacklo_epi32(x_##A, x_##B); \
|
132
|
+
t_##B = _mm_unpacklo_epi32(x_##C, x_##D); \
|
133
|
+
t_##C = _mm_unpackhi_epi32(x_##A, x_##B); \
|
134
|
+
t_##D = _mm_unpackhi_epi32(x_##C, x_##D); \
|
135
|
+
x_##A = _mm_unpacklo_epi64(t_##A, t_##B); \
|
136
|
+
x_##B = _mm_unpackhi_epi64(t_##A, t_##B); \
|
137
|
+
x_##C = _mm_unpacklo_epi64(t_##C, t_##D); \
|
138
|
+
x_##D = _mm_unpackhi_epi64(t_##C, t_##D); \
|
139
|
+
\
|
140
|
+
t0 = _mm_xor_si128(x_##A, _mm_loadu_si128((__m128i*) (m + 0))); \
|
141
|
+
_mm_storeu_si128((__m128i*) (c + 0), t0); \
|
142
|
+
t1 = _mm_xor_si128(x_##B, _mm_loadu_si128((__m128i*) (m + 64))); \
|
143
|
+
_mm_storeu_si128((__m128i*) (c + 64), t1); \
|
144
|
+
t2 = _mm_xor_si128(x_##C, _mm_loadu_si128((__m128i*) (m + 128))); \
|
145
|
+
_mm_storeu_si128((__m128i*) (c + 128), t2); \
|
146
|
+
t3 = _mm_xor_si128(x_##D, _mm_loadu_si128((__m128i*) (m + 192))); \
|
147
|
+
_mm_storeu_si128((__m128i*) (c + 192), t3); \
|
148
|
+
}
|
149
|
+
|
150
|
+
#define ONEQUAD(A, B, C, D) ONEQUAD_TRANSPOSE(A, B, C, D)
|
151
|
+
|
152
|
+
ONEQUAD(0, 1, 2, 3);
|
153
|
+
m += 16;
|
154
|
+
c += 16;
|
155
|
+
ONEQUAD(4, 5, 6, 7);
|
156
|
+
m += 16;
|
157
|
+
c += 16;
|
158
|
+
ONEQUAD(8, 9, 10, 11);
|
159
|
+
m += 16;
|
160
|
+
c += 16;
|
161
|
+
ONEQUAD(12, 13, 14, 15);
|
162
|
+
m -= 48;
|
163
|
+
c -= 48;
|
164
|
+
|
165
|
+
#undef ONEQUAD
|
166
|
+
#undef ONEQUAD_TRANSPOSE
|
167
|
+
|
168
|
+
bytes -= 256;
|
169
|
+
c += 256;
|
170
|
+
m += 256;
|
171
|
+
}
|
172
|
+
}
|
173
|
+
#undef VEC4_ROT
|
174
|
+
#undef VEC4_QUARTERROUND
|
175
|
+
#undef VEC4_QUARTERROUND_SHUFFLE
|
@@ -0,0 +1,357 @@
|
|
1
|
+
|
2
|
+
#define VEC8_ROT(A, IMM) \
|
3
|
+
_mm256_or_si256(_mm256_slli_epi32(A, IMM), _mm256_srli_epi32(A, (32 - IMM)))
|
4
|
+
|
5
|
+
/* implements a vector quarter round by-the-book (naive!) */
|
6
|
+
#define VEC8_QUARTERROUND_NAIVE(A, B, C, D) \
|
7
|
+
x_##A = _mm256_add_epi32(x_##A, x_##B); \
|
8
|
+
t_##A = _mm256_xor_si256(x_##D, x_##A); \
|
9
|
+
x_##D = VEC8_ROT(t_##A, 16); \
|
10
|
+
x_##C = _mm256_add_epi32(x_##C, x_##D); \
|
11
|
+
t_##C = _mm256_xor_si256(x_##B, x_##C); \
|
12
|
+
x_##B = VEC8_ROT(t_##C, 12); \
|
13
|
+
x_##A = _mm256_add_epi32(x_##A, x_##B); \
|
14
|
+
t_##A = _mm256_xor_si256(x_##D, x_##A); \
|
15
|
+
x_##D = VEC8_ROT(t_##A, 8); \
|
16
|
+
x_##C = _mm256_add_epi32(x_##C, x_##D); \
|
17
|
+
t_##C = _mm256_xor_si256(x_##B, x_##C); \
|
18
|
+
x_##B = VEC8_ROT(t_##C, 7)
|
19
|
+
|
20
|
+
/* same, but replace 2 of the shift/shift/or "rotation" by byte shuffles (8 &
|
21
|
+
* 16) (better) */
|
22
|
+
#define VEC8_QUARTERROUND_SHUFFLE(A, B, C, D) \
|
23
|
+
x_##A = _mm256_add_epi32(x_##A, x_##B); \
|
24
|
+
t_##A = _mm256_xor_si256(x_##D, x_##A); \
|
25
|
+
x_##D = _mm256_shuffle_epi8(t_##A, rot16); \
|
26
|
+
x_##C = _mm256_add_epi32(x_##C, x_##D); \
|
27
|
+
t_##C = _mm256_xor_si256(x_##B, x_##C); \
|
28
|
+
x_##B = VEC8_ROT(t_##C, 12); \
|
29
|
+
x_##A = _mm256_add_epi32(x_##A, x_##B); \
|
30
|
+
t_##A = _mm256_xor_si256(x_##D, x_##A); \
|
31
|
+
x_##D = _mm256_shuffle_epi8(t_##A, rot8); \
|
32
|
+
x_##C = _mm256_add_epi32(x_##C, x_##D); \
|
33
|
+
t_##C = _mm256_xor_si256(x_##B, x_##C); \
|
34
|
+
x_##B = VEC8_ROT(t_##C, 7)
|
35
|
+
|
36
|
+
/* same, but replace 2 of the shift/shift/or "rotation" by byte & word shuffles
|
37
|
+
* (8 & 16) (not as good as previous) */
|
38
|
+
#define VEC8_QUARTERROUND_SHUFFLE2(A, B, C, D) \
|
39
|
+
x_##A = _mm256_add_epi32(x_##A, x_##B); \
|
40
|
+
t_##A = _mm256_xor_si256(x_##D, x_##A); \
|
41
|
+
x_##D = _mm256_shufflehi_epi16(_mm256_shufflelo_epi16(t_##A, 0xb1), 0xb1); \
|
42
|
+
x_##C = _mm256_add_epi32(x_##C, x_##D); \
|
43
|
+
t_##C = _mm256_xor_si256(x_##B, x_##C); \
|
44
|
+
x_##B = VEC8_ROT(t_##C, 12); \
|
45
|
+
x_##A = _mm256_add_epi32(x_##A, x_##B); \
|
46
|
+
t_##A = _mm256_xor_si256(x_##D, x_##A); \
|
47
|
+
x_##D = _mm256_shuffle_epi8(t_##A, rot8); \
|
48
|
+
x_##C = _mm256_add_epi32(x_##C, x_##D); \
|
49
|
+
t_##C = _mm256_xor_si256(x_##B, x_##C); \
|
50
|
+
x_##B = VEC8_ROT(t_##C, 7)
|
51
|
+
|
52
|
+
#define VEC8_QUARTERROUND(A, B, C, D) VEC8_QUARTERROUND_SHUFFLE(A, B, C, D)
|
53
|
+
|
54
|
+
#define VEC8_LINE1(A, B, C, D) \
|
55
|
+
x_##A = _mm256_add_epi32(x_##A, x_##B); \
|
56
|
+
x_##D = _mm256_shuffle_epi8(_mm256_xor_si256(x_##D, x_##A), rot16)
|
57
|
+
#define VEC8_LINE2(A, B, C, D) \
|
58
|
+
x_##C = _mm256_add_epi32(x_##C, x_##D); \
|
59
|
+
x_##B = VEC8_ROT(_mm256_xor_si256(x_##B, x_##C), 12)
|
60
|
+
#define VEC8_LINE3(A, B, C, D) \
|
61
|
+
x_##A = _mm256_add_epi32(x_##A, x_##B); \
|
62
|
+
x_##D = _mm256_shuffle_epi8(_mm256_xor_si256(x_##D, x_##A), rot8)
|
63
|
+
#define VEC8_LINE4(A, B, C, D) \
|
64
|
+
x_##C = _mm256_add_epi32(x_##C, x_##D); \
|
65
|
+
x_##B = VEC8_ROT(_mm256_xor_si256(x_##B, x_##C), 7)
|
66
|
+
|
67
|
+
#define VEC8_ROUND_SEQ(A1, B1, C1, D1, A2, B2, C2, D2, A3, B3, C3, D3, A4, B4, \
|
68
|
+
C4, D4) \
|
69
|
+
VEC8_LINE1(A1, B1, C1, D1); \
|
70
|
+
VEC8_LINE1(A2, B2, C2, D2); \
|
71
|
+
VEC8_LINE1(A3, B3, C3, D3); \
|
72
|
+
VEC8_LINE1(A4, B4, C4, D4); \
|
73
|
+
VEC8_LINE2(A1, B1, C1, D1); \
|
74
|
+
VEC8_LINE2(A2, B2, C2, D2); \
|
75
|
+
VEC8_LINE2(A3, B3, C3, D3); \
|
76
|
+
VEC8_LINE2(A4, B4, C4, D4); \
|
77
|
+
VEC8_LINE3(A1, B1, C1, D1); \
|
78
|
+
VEC8_LINE3(A2, B2, C2, D2); \
|
79
|
+
VEC8_LINE3(A3, B3, C3, D3); \
|
80
|
+
VEC8_LINE3(A4, B4, C4, D4); \
|
81
|
+
VEC8_LINE4(A1, B1, C1, D1); \
|
82
|
+
VEC8_LINE4(A2, B2, C2, D2); \
|
83
|
+
VEC8_LINE4(A3, B3, C3, D3); \
|
84
|
+
VEC8_LINE4(A4, B4, C4, D4)
|
85
|
+
|
86
|
+
#define VEC8_ROUND_HALF(A1, B1, C1, D1, A2, B2, C2, D2, A3, B3, C3, D3, A4, \
|
87
|
+
B4, C4, D4) \
|
88
|
+
VEC8_LINE1(A1, B1, C1, D1); \
|
89
|
+
VEC8_LINE1(A2, B2, C2, D2); \
|
90
|
+
VEC8_LINE2(A1, B1, C1, D1); \
|
91
|
+
VEC8_LINE2(A2, B2, C2, D2); \
|
92
|
+
VEC8_LINE3(A1, B1, C1, D1); \
|
93
|
+
VEC8_LINE3(A2, B2, C2, D2); \
|
94
|
+
VEC8_LINE4(A1, B1, C1, D1); \
|
95
|
+
VEC8_LINE4(A2, B2, C2, D2); \
|
96
|
+
VEC8_LINE1(A3, B3, C3, D3); \
|
97
|
+
VEC8_LINE1(A4, B4, C4, D4); \
|
98
|
+
VEC8_LINE2(A3, B3, C3, D3); \
|
99
|
+
VEC8_LINE2(A4, B4, C4, D4); \
|
100
|
+
VEC8_LINE3(A3, B3, C3, D3); \
|
101
|
+
VEC8_LINE3(A4, B4, C4, D4); \
|
102
|
+
VEC8_LINE4(A3, B3, C3, D3); \
|
103
|
+
VEC8_LINE4(A4, B4, C4, D4)
|
104
|
+
|
105
|
+
#define VEC8_ROUND_HALFANDHALF(A1, B1, C1, D1, A2, B2, C2, D2, A3, B3, C3, D3, \
|
106
|
+
A4, B4, C4, D4) \
|
107
|
+
VEC8_LINE1(A1, B1, C1, D1); \
|
108
|
+
VEC8_LINE1(A2, B2, C2, D2); \
|
109
|
+
VEC8_LINE2(A1, B1, C1, D1); \
|
110
|
+
VEC8_LINE2(A2, B2, C2, D2); \
|
111
|
+
VEC8_LINE1(A3, B3, C3, D3); \
|
112
|
+
VEC8_LINE1(A4, B4, C4, D4); \
|
113
|
+
VEC8_LINE2(A3, B3, C3, D3); \
|
114
|
+
VEC8_LINE2(A4, B4, C4, D4); \
|
115
|
+
VEC8_LINE3(A1, B1, C1, D1); \
|
116
|
+
VEC8_LINE3(A2, B2, C2, D2); \
|
117
|
+
VEC8_LINE4(A1, B1, C1, D1); \
|
118
|
+
VEC8_LINE4(A2, B2, C2, D2); \
|
119
|
+
VEC8_LINE3(A3, B3, C3, D3); \
|
120
|
+
VEC8_LINE3(A4, B4, C4, D4); \
|
121
|
+
VEC8_LINE4(A3, B3, C3, D3); \
|
122
|
+
VEC8_LINE4(A4, B4, C4, D4)
|
123
|
+
|
124
|
+
#define VEC8_ROUND(A1, B1, C1, D1, A2, B2, C2, D2, A3, B3, C3, D3, A4, B4, C4, \
|
125
|
+
D4) \
|
126
|
+
VEC8_ROUND_SEQ(A1, B1, C1, D1, A2, B2, C2, D2, A3, B3, C3, D3, A4, B4, C4, \
|
127
|
+
D4)
|
128
|
+
|
129
|
+
if (bytes >= 512) {
|
130
|
+
/* constant for shuffling bytes (replacing multiple-of-8 rotates) */
|
131
|
+
__m256i rot16 =
|
132
|
+
_mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2,
|
133
|
+
13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
|
134
|
+
__m256i rot8 =
|
135
|
+
_mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3,
|
136
|
+
14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
|
137
|
+
uint32_t in12, in13;
|
138
|
+
|
139
|
+
/* the naive way seems as fast (if not a bit faster) than the vector way */
|
140
|
+
__m256i x_0 = _mm256_set1_epi32(x[0]);
|
141
|
+
__m256i x_1 = _mm256_set1_epi32(x[1]);
|
142
|
+
__m256i x_2 = _mm256_set1_epi32(x[2]);
|
143
|
+
__m256i x_3 = _mm256_set1_epi32(x[3]);
|
144
|
+
__m256i x_4 = _mm256_set1_epi32(x[4]);
|
145
|
+
__m256i x_5 = _mm256_set1_epi32(x[5]);
|
146
|
+
__m256i x_6 = _mm256_set1_epi32(x[6]);
|
147
|
+
__m256i x_7 = _mm256_set1_epi32(x[7]);
|
148
|
+
__m256i x_8 = _mm256_set1_epi32(x[8]);
|
149
|
+
__m256i x_9 = _mm256_set1_epi32(x[9]);
|
150
|
+
__m256i x_10 = _mm256_set1_epi32(x[10]);
|
151
|
+
__m256i x_11 = _mm256_set1_epi32(x[11]);
|
152
|
+
__m256i x_12;
|
153
|
+
__m256i x_13;
|
154
|
+
__m256i x_14 = _mm256_set1_epi32(x[14]);
|
155
|
+
__m256i x_15 = _mm256_set1_epi32(x[15]);
|
156
|
+
|
157
|
+
__m256i orig0 = x_0;
|
158
|
+
__m256i orig1 = x_1;
|
159
|
+
__m256i orig2 = x_2;
|
160
|
+
__m256i orig3 = x_3;
|
161
|
+
__m256i orig4 = x_4;
|
162
|
+
__m256i orig5 = x_5;
|
163
|
+
__m256i orig6 = x_6;
|
164
|
+
__m256i orig7 = x_7;
|
165
|
+
__m256i orig8 = x_8;
|
166
|
+
__m256i orig9 = x_9;
|
167
|
+
__m256i orig10 = x_10;
|
168
|
+
__m256i orig11 = x_11;
|
169
|
+
__m256i orig12;
|
170
|
+
__m256i orig13;
|
171
|
+
__m256i orig14 = x_14;
|
172
|
+
__m256i orig15 = x_15;
|
173
|
+
__m256i t_0, t_1, t_2, t_3, t_4, t_5, t_6, t_7, t_8, t_9, t_10, t_11, t_12,
|
174
|
+
t_13, t_14, t_15;
|
175
|
+
|
176
|
+
while (bytes >= 512) {
|
177
|
+
const __m256i addv12 = _mm256_set_epi64x(3, 2, 1, 0);
|
178
|
+
const __m256i addv13 = _mm256_set_epi64x(7, 6, 5, 4);
|
179
|
+
const __m256i permute = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0);
|
180
|
+
__m256i t12, t13;
|
181
|
+
|
182
|
+
uint64_t in1213;
|
183
|
+
int i;
|
184
|
+
|
185
|
+
x_0 = orig0;
|
186
|
+
x_1 = orig1;
|
187
|
+
x_2 = orig2;
|
188
|
+
x_3 = orig3;
|
189
|
+
x_4 = orig4;
|
190
|
+
x_5 = orig5;
|
191
|
+
x_6 = orig6;
|
192
|
+
x_7 = orig7;
|
193
|
+
x_8 = orig8;
|
194
|
+
x_9 = orig9;
|
195
|
+
x_10 = orig10;
|
196
|
+
x_11 = orig11;
|
197
|
+
x_14 = orig14;
|
198
|
+
x_15 = orig15;
|
199
|
+
|
200
|
+
in12 = x[12];
|
201
|
+
in13 = x[13];
|
202
|
+
in1213 = ((uint64_t) in12) | (((uint64_t) in13) << 32);
|
203
|
+
x_12 = x_13 = _mm256_broadcastq_epi64(_mm_cvtsi64_si128(in1213));
|
204
|
+
|
205
|
+
t12 = _mm256_add_epi64(addv12, x_12);
|
206
|
+
t13 = _mm256_add_epi64(addv13, x_13);
|
207
|
+
|
208
|
+
x_12 = _mm256_unpacklo_epi32(t12, t13);
|
209
|
+
x_13 = _mm256_unpackhi_epi32(t12, t13);
|
210
|
+
|
211
|
+
t12 = _mm256_unpacklo_epi32(x_12, x_13);
|
212
|
+
t13 = _mm256_unpackhi_epi32(x_12, x_13);
|
213
|
+
|
214
|
+
/* required because unpack* are intra-lane */
|
215
|
+
x_12 = _mm256_permutevar8x32_epi32(t12, permute);
|
216
|
+
x_13 = _mm256_permutevar8x32_epi32(t13, permute);
|
217
|
+
|
218
|
+
orig12 = x_12;
|
219
|
+
orig13 = x_13;
|
220
|
+
|
221
|
+
in1213 += 8;
|
222
|
+
|
223
|
+
x[12] = in1213 & 0xFFFFFFFF;
|
224
|
+
x[13] = (in1213 >> 32) & 0xFFFFFFFF;
|
225
|
+
|
226
|
+
for (i = 0; i < ROUNDS; i += 2) {
|
227
|
+
VEC8_ROUND(0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15);
|
228
|
+
VEC8_ROUND(0, 5, 10, 15, 1, 6, 11, 12, 2, 7, 8, 13, 3, 4, 9, 14);
|
229
|
+
}
|
230
|
+
|
231
|
+
#define ONEQUAD_TRANSPOSE(A, B, C, D) \
|
232
|
+
{ \
|
233
|
+
__m128i t0, t1, t2, t3; \
|
234
|
+
x_##A = _mm256_add_epi32(x_##A, orig##A); \
|
235
|
+
x_##B = _mm256_add_epi32(x_##B, orig##B); \
|
236
|
+
x_##C = _mm256_add_epi32(x_##C, orig##C); \
|
237
|
+
x_##D = _mm256_add_epi32(x_##D, orig##D); \
|
238
|
+
t_##A = _mm256_unpacklo_epi32(x_##A, x_##B); \
|
239
|
+
t_##B = _mm256_unpacklo_epi32(x_##C, x_##D); \
|
240
|
+
t_##C = _mm256_unpackhi_epi32(x_##A, x_##B); \
|
241
|
+
t_##D = _mm256_unpackhi_epi32(x_##C, x_##D); \
|
242
|
+
x_##A = _mm256_unpacklo_epi64(t_##A, t_##B); \
|
243
|
+
x_##B = _mm256_unpackhi_epi64(t_##A, t_##B); \
|
244
|
+
x_##C = _mm256_unpacklo_epi64(t_##C, t_##D); \
|
245
|
+
x_##D = _mm256_unpackhi_epi64(t_##C, t_##D); \
|
246
|
+
t0 = _mm_xor_si128(_mm256_extracti128_si256(x_##A, 0), \
|
247
|
+
_mm_loadu_si128((__m128i*) (m + 0))); \
|
248
|
+
_mm_storeu_si128((__m128i*) (c + 0), t0); \
|
249
|
+
t1 = _mm_xor_si128(_mm256_extracti128_si256(x_##B, 0), \
|
250
|
+
_mm_loadu_si128((__m128i*) (m + 64))); \
|
251
|
+
_mm_storeu_si128((__m128i*) (c + 64), t1); \
|
252
|
+
t2 = _mm_xor_si128(_mm256_extracti128_si256(x_##C, 0), \
|
253
|
+
_mm_loadu_si128((__m128i*) (m + 128))); \
|
254
|
+
_mm_storeu_si128((__m128i*) (c + 128), t2); \
|
255
|
+
t3 = _mm_xor_si128(_mm256_extracti128_si256(x_##D, 0), \
|
256
|
+
_mm_loadu_si128((__m128i*) (m + 192))); \
|
257
|
+
_mm_storeu_si128((__m128i*) (c + 192), t3); \
|
258
|
+
t0 = _mm_xor_si128(_mm256_extracti128_si256(x_##A, 1), \
|
259
|
+
_mm_loadu_si128((__m128i*) (m + 256))); \
|
260
|
+
_mm_storeu_si128((__m128i*) (c + 256), t0); \
|
261
|
+
t1 = _mm_xor_si128(_mm256_extracti128_si256(x_##B, 1), \
|
262
|
+
_mm_loadu_si128((__m128i*) (m + 320))); \
|
263
|
+
_mm_storeu_si128((__m128i*) (c + 320), t1); \
|
264
|
+
t2 = _mm_xor_si128(_mm256_extracti128_si256(x_##C, 1), \
|
265
|
+
_mm_loadu_si128((__m128i*) (m + 384))); \
|
266
|
+
_mm_storeu_si128((__m128i*) (c + 384), t2); \
|
267
|
+
t3 = _mm_xor_si128(_mm256_extracti128_si256(x_##D, 1), \
|
268
|
+
_mm_loadu_si128((__m128i*) (m + 448))); \
|
269
|
+
_mm_storeu_si128((__m128i*) (c + 448), t3); \
|
270
|
+
}
|
271
|
+
|
272
|
+
#define ONEQUAD(A, B, C, D) ONEQUAD_TRANSPOSE(A, B, C, D)
|
273
|
+
|
274
|
+
#define ONEQUAD_UNPCK(A, B, C, D) \
|
275
|
+
{ \
|
276
|
+
x_##A = _mm256_add_epi32(x_##A, orig##A); \
|
277
|
+
x_##B = _mm256_add_epi32(x_##B, orig##B); \
|
278
|
+
x_##C = _mm256_add_epi32(x_##C, orig##C); \
|
279
|
+
x_##D = _mm256_add_epi32(x_##D, orig##D); \
|
280
|
+
t_##A = _mm256_unpacklo_epi32(x_##A, x_##B); \
|
281
|
+
t_##B = _mm256_unpacklo_epi32(x_##C, x_##D); \
|
282
|
+
t_##C = _mm256_unpackhi_epi32(x_##A, x_##B); \
|
283
|
+
t_##D = _mm256_unpackhi_epi32(x_##C, x_##D); \
|
284
|
+
x_##A = _mm256_unpacklo_epi64(t_##A, t_##B); \
|
285
|
+
x_##B = _mm256_unpackhi_epi64(t_##A, t_##B); \
|
286
|
+
x_##C = _mm256_unpacklo_epi64(t_##C, t_##D); \
|
287
|
+
x_##D = _mm256_unpackhi_epi64(t_##C, t_##D); \
|
288
|
+
}
|
289
|
+
|
290
|
+
#define ONEOCTO(A, B, C, D, A2, B2, C2, D2) \
|
291
|
+
{ \
|
292
|
+
ONEQUAD_UNPCK(A, B, C, D); \
|
293
|
+
ONEQUAD_UNPCK(A2, B2, C2, D2); \
|
294
|
+
t_##A = _mm256_permute2x128_si256(x_##A, x_##A2, 0x20); \
|
295
|
+
t_##A2 = _mm256_permute2x128_si256(x_##A, x_##A2, 0x31); \
|
296
|
+
t_##B = _mm256_permute2x128_si256(x_##B, x_##B2, 0x20); \
|
297
|
+
t_##B2 = _mm256_permute2x128_si256(x_##B, x_##B2, 0x31); \
|
298
|
+
t_##C = _mm256_permute2x128_si256(x_##C, x_##C2, 0x20); \
|
299
|
+
t_##C2 = _mm256_permute2x128_si256(x_##C, x_##C2, 0x31); \
|
300
|
+
t_##D = _mm256_permute2x128_si256(x_##D, x_##D2, 0x20); \
|
301
|
+
t_##D2 = _mm256_permute2x128_si256(x_##D, x_##D2, 0x31); \
|
302
|
+
t_##A = \
|
303
|
+
_mm256_xor_si256(t_##A, _mm256_loadu_si256((__m256i*) (m + 0))); \
|
304
|
+
t_##B = \
|
305
|
+
_mm256_xor_si256(t_##B, _mm256_loadu_si256((__m256i*) (m + 64))); \
|
306
|
+
t_##C = \
|
307
|
+
_mm256_xor_si256(t_##C, _mm256_loadu_si256((__m256i*) (m + 128))); \
|
308
|
+
t_##D = \
|
309
|
+
_mm256_xor_si256(t_##D, _mm256_loadu_si256((__m256i*) (m + 192))); \
|
310
|
+
t_##A2 = _mm256_xor_si256(t_##A2, \
|
311
|
+
_mm256_loadu_si256((__m256i*) (m + 256))); \
|
312
|
+
t_##B2 = _mm256_xor_si256(t_##B2, \
|
313
|
+
_mm256_loadu_si256((__m256i*) (m + 320))); \
|
314
|
+
t_##C2 = _mm256_xor_si256(t_##C2, \
|
315
|
+
_mm256_loadu_si256((__m256i*) (m + 384))); \
|
316
|
+
t_##D2 = _mm256_xor_si256(t_##D2, \
|
317
|
+
_mm256_loadu_si256((__m256i*) (m + 448))); \
|
318
|
+
_mm256_storeu_si256((__m256i*) (c + 0), t_##A); \
|
319
|
+
_mm256_storeu_si256((__m256i*) (c + 64), t_##B); \
|
320
|
+
_mm256_storeu_si256((__m256i*) (c + 128), t_##C); \
|
321
|
+
_mm256_storeu_si256((__m256i*) (c + 192), t_##D); \
|
322
|
+
_mm256_storeu_si256((__m256i*) (c + 256), t_##A2); \
|
323
|
+
_mm256_storeu_si256((__m256i*) (c + 320), t_##B2); \
|
324
|
+
_mm256_storeu_si256((__m256i*) (c + 384), t_##C2); \
|
325
|
+
_mm256_storeu_si256((__m256i*) (c + 448), t_##D2); \
|
326
|
+
}
|
327
|
+
|
328
|
+
ONEOCTO(0, 1, 2, 3, 4, 5, 6, 7);
|
329
|
+
m += 32;
|
330
|
+
c += 32;
|
331
|
+
ONEOCTO(8, 9, 10, 11, 12, 13, 14, 15);
|
332
|
+
m -= 32;
|
333
|
+
c -= 32;
|
334
|
+
|
335
|
+
#undef ONEQUAD
|
336
|
+
#undef ONEQUAD_TRANSPOSE
|
337
|
+
#undef ONEQUAD_UNPCK
|
338
|
+
#undef ONEOCTO
|
339
|
+
|
340
|
+
bytes -= 512;
|
341
|
+
c += 512;
|
342
|
+
m += 512;
|
343
|
+
}
|
344
|
+
}
|
345
|
+
#undef VEC8_ROT
|
346
|
+
#undef VEC8_QUARTERROUND
|
347
|
+
#undef VEC8_QUARTERROUND_NAIVE
|
348
|
+
#undef VEC8_QUARTERROUND_SHUFFLE
|
349
|
+
#undef VEC8_QUARTERROUND_SHUFFLE2
|
350
|
+
#undef VEC8_LINE1
|
351
|
+
#undef VEC8_LINE2
|
352
|
+
#undef VEC8_LINE3
|
353
|
+
#undef VEC8_LINE4
|
354
|
+
#undef VEC8_ROUND
|
355
|
+
#undef VEC8_ROUND_SEQ
|
356
|
+
#undef VEC8_ROUND_HALF
|
357
|
+
#undef VEC8_ROUND_HALFANDHALF
|