react-native-quick-crypto 1.0.0-beta.2 → 1.0.0-beta.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/QuickCrypto.podspec +143 -7
- package/README.md +12 -6
- package/android/CMakeLists.txt +82 -21
- package/android/build.gradle +47 -4
- package/android/src/main/cpp/cpp-adapter.cpp +3 -10
- package/android/src/main/java/com/margelo/nitro/quickcrypto/QuickCryptoPackage.java +13 -10
- package/app.plugin.js +3 -0
- package/cpp/blake3/HybridBlake3.cpp +118 -0
- package/cpp/blake3/HybridBlake3.hpp +35 -0
- package/cpp/cipher/CCMCipher.cpp +199 -0
- package/cpp/cipher/CCMCipher.hpp +26 -0
- package/cpp/cipher/ChaCha20Cipher.cpp +97 -0
- package/cpp/cipher/ChaCha20Cipher.hpp +25 -0
- package/cpp/cipher/ChaCha20Poly1305Cipher.cpp +170 -0
- package/cpp/cipher/ChaCha20Poly1305Cipher.hpp +30 -0
- package/cpp/cipher/HybridCipher.cpp +322 -0
- package/cpp/cipher/HybridCipher.hpp +68 -0
- package/cpp/cipher/HybridCipherFactory.hpp +97 -0
- package/cpp/cipher/OCBCipher.cpp +55 -0
- package/cpp/cipher/OCBCipher.hpp +19 -0
- package/cpp/cipher/XSalsa20Cipher.cpp +61 -0
- package/cpp/cipher/XSalsa20Cipher.hpp +33 -0
- package/cpp/ec/HybridEcKeyPair.cpp +428 -0
- package/cpp/ec/HybridEcKeyPair.hpp +48 -0
- package/cpp/ed25519/HybridEdKeyPair.cpp +300 -0
- package/cpp/ed25519/HybridEdKeyPair.hpp +63 -0
- package/cpp/hash/HybridHash.cpp +185 -0
- package/cpp/hash/HybridHash.hpp +43 -0
- package/cpp/hmac/HybridHmac.cpp +95 -0
- package/cpp/hmac/HybridHmac.hpp +31 -0
- package/cpp/keys/HybridKeyObjectHandle.cpp +243 -0
- package/cpp/keys/HybridKeyObjectHandle.hpp +42 -0
- package/cpp/keys/KeyObjectData.cpp +226 -0
- package/cpp/keys/KeyObjectData.hpp +71 -0
- package/cpp/keys/node.h +5 -0
- package/cpp/pbkdf2/HybridPbkdf2.cpp +51 -0
- package/cpp/pbkdf2/HybridPbkdf2.hpp +24 -0
- package/cpp/random/HybridRandom.cpp +32 -18
- package/cpp/random/HybridRandom.hpp +18 -30
- package/cpp/rsa/HybridRsaKeyPair.cpp +154 -0
- package/cpp/rsa/HybridRsaKeyPair.hpp +43 -0
- package/cpp/utils/Macros.hpp +68 -0
- package/cpp/utils/Utils.hpp +53 -1
- package/deps/blake3/.cargo/config.toml +2 -0
- package/deps/blake3/.git-blame-ignore-revs +2 -0
- package/deps/blake3/.github/workflows/build_b3sum.py +38 -0
- package/deps/blake3/.github/workflows/ci.yml +491 -0
- package/deps/blake3/.github/workflows/tag.yml +43 -0
- package/deps/blake3/.github/workflows/upload_github_release_asset.py +73 -0
- package/deps/blake3/CONTRIBUTING.md +31 -0
- package/deps/blake3/Cargo.toml +135 -0
- package/deps/blake3/LICENSE_A2 +202 -0
- package/deps/blake3/LICENSE_A2LLVM +219 -0
- package/deps/blake3/LICENSE_CC0 +121 -0
- package/deps/blake3/README.md +229 -0
- package/deps/blake3/b3sum/Cargo.lock +513 -0
- package/deps/blake3/b3sum/Cargo.toml +26 -0
- package/deps/blake3/b3sum/README.md +72 -0
- package/deps/blake3/b3sum/src/main.rs +564 -0
- package/deps/blake3/b3sum/src/unit_tests.rs +235 -0
- package/deps/blake3/b3sum/tests/cli_tests.rs +680 -0
- package/deps/blake3/b3sum/what_does_check_do.md +176 -0
- package/deps/blake3/benches/bench.rs +623 -0
- package/deps/blake3/build.rs +389 -0
- package/deps/blake3/c/CMakeLists.txt +383 -0
- package/deps/blake3/c/CMakePresets.json +73 -0
- package/deps/blake3/c/Makefile.testing +82 -0
- package/deps/blake3/c/README.md +403 -0
- package/deps/blake3/c/blake3-config.cmake.in +14 -0
- package/deps/blake3/c/blake3.c +650 -0
- package/deps/blake3/c/blake3.h +86 -0
- package/deps/blake3/c/blake3_avx2.c +326 -0
- package/deps/blake3/c/blake3_avx2_x86-64_unix.S +1815 -0
- package/deps/blake3/c/blake3_avx2_x86-64_windows_gnu.S +1817 -0
- package/deps/blake3/c/blake3_avx2_x86-64_windows_msvc.asm +1828 -0
- package/deps/blake3/c/blake3_avx512.c +1388 -0
- package/deps/blake3/c/blake3_avx512_x86-64_unix.S +4824 -0
- package/deps/blake3/c/blake3_avx512_x86-64_windows_gnu.S +2615 -0
- package/deps/blake3/c/blake3_avx512_x86-64_windows_msvc.asm +2634 -0
- package/deps/blake3/c/blake3_c_rust_bindings/Cargo.toml +32 -0
- package/deps/blake3/c/blake3_c_rust_bindings/README.md +4 -0
- package/deps/blake3/c/blake3_c_rust_bindings/benches/bench.rs +477 -0
- package/deps/blake3/c/blake3_c_rust_bindings/build.rs +253 -0
- package/deps/blake3/c/blake3_c_rust_bindings/cross_test.sh +31 -0
- package/deps/blake3/c/blake3_c_rust_bindings/src/lib.rs +333 -0
- package/deps/blake3/c/blake3_c_rust_bindings/src/test.rs +696 -0
- package/deps/blake3/c/blake3_dispatch.c +332 -0
- package/deps/blake3/c/blake3_impl.h +333 -0
- package/deps/blake3/c/blake3_neon.c +366 -0
- package/deps/blake3/c/blake3_portable.c +160 -0
- package/deps/blake3/c/blake3_sse2.c +566 -0
- package/deps/blake3/c/blake3_sse2_x86-64_unix.S +2291 -0
- package/deps/blake3/c/blake3_sse2_x86-64_windows_gnu.S +2332 -0
- package/deps/blake3/c/blake3_sse2_x86-64_windows_msvc.asm +2350 -0
- package/deps/blake3/c/blake3_sse41.c +560 -0
- package/deps/blake3/c/blake3_sse41_x86-64_unix.S +2028 -0
- package/deps/blake3/c/blake3_sse41_x86-64_windows_gnu.S +2069 -0
- package/deps/blake3/c/blake3_sse41_x86-64_windows_msvc.asm +2089 -0
- package/deps/blake3/c/blake3_tbb.cpp +37 -0
- package/deps/blake3/c/dependencies/CMakeLists.txt +3 -0
- package/deps/blake3/c/dependencies/tbb/CMakeLists.txt +28 -0
- package/deps/blake3/c/example.c +36 -0
- package/deps/blake3/c/example_tbb.c +57 -0
- package/deps/blake3/c/libblake3.pc.in +12 -0
- package/deps/blake3/c/main.c +166 -0
- package/deps/blake3/c/test.py +97 -0
- package/deps/blake3/media/B3.svg +70 -0
- package/deps/blake3/media/BLAKE3.svg +85 -0
- package/deps/blake3/media/speed.svg +1474 -0
- package/deps/blake3/reference_impl/Cargo.toml +8 -0
- package/deps/blake3/reference_impl/README.md +14 -0
- package/deps/blake3/reference_impl/reference_impl.rs +374 -0
- package/deps/blake3/src/ffi_avx2.rs +65 -0
- package/deps/blake3/src/ffi_avx512.rs +169 -0
- package/deps/blake3/src/ffi_neon.rs +82 -0
- package/deps/blake3/src/ffi_sse2.rs +126 -0
- package/deps/blake3/src/ffi_sse41.rs +126 -0
- package/deps/blake3/src/guts.rs +60 -0
- package/deps/blake3/src/hazmat.rs +704 -0
- package/deps/blake3/src/io.rs +64 -0
- package/deps/blake3/src/join.rs +92 -0
- package/deps/blake3/src/lib.rs +1835 -0
- package/deps/blake3/src/platform.rs +587 -0
- package/deps/blake3/src/portable.rs +198 -0
- package/deps/blake3/src/rust_avx2.rs +474 -0
- package/deps/blake3/src/rust_sse2.rs +775 -0
- package/deps/blake3/src/rust_sse41.rs +766 -0
- package/deps/blake3/src/test.rs +1049 -0
- package/deps/blake3/src/traits.rs +227 -0
- package/deps/blake3/src/wasm32_simd.rs +794 -0
- package/deps/blake3/test_vectors/Cargo.toml +19 -0
- package/deps/blake3/test_vectors/cross_test.sh +25 -0
- package/deps/blake3/test_vectors/src/bin/generate.rs +4 -0
- package/deps/blake3/test_vectors/src/lib.rs +350 -0
- package/deps/blake3/test_vectors/test_vectors.json +217 -0
- package/deps/blake3/tools/compiler_version/Cargo.toml +7 -0
- package/deps/blake3/tools/compiler_version/build.rs +6 -0
- package/deps/blake3/tools/compiler_version/src/main.rs +27 -0
- package/deps/blake3/tools/instruction_set_support/Cargo.toml +6 -0
- package/deps/blake3/tools/instruction_set_support/src/main.rs +10 -0
- package/deps/blake3/tools/release.md +16 -0
- package/deps/fastpbkdf2/fastpbkdf2.c +356 -0
- package/deps/fastpbkdf2/fastpbkdf2.h +68 -0
- package/deps/ncrypto/ncrypto.cc +4679 -0
- package/deps/ncrypto/ncrypto.h +1625 -0
- package/lib/commonjs/blake3.js +98 -0
- package/lib/commonjs/blake3.js.map +1 -0
- package/lib/commonjs/cipher.js +180 -0
- package/lib/commonjs/cipher.js.map +1 -0
- package/lib/commonjs/ec.js +344 -0
- package/lib/commonjs/ec.js.map +1 -0
- package/lib/commonjs/ed.js +185 -0
- package/lib/commonjs/ed.js.map +1 -0
- package/lib/commonjs/expo-plugin/@types.js +2 -0
- package/lib/commonjs/expo-plugin/@types.js.map +1 -0
- package/lib/commonjs/expo-plugin/withRNQC.js +25 -0
- package/lib/commonjs/expo-plugin/withRNQC.js.map +1 -0
- package/lib/commonjs/expo-plugin/withSodiumAndroid.js +25 -0
- package/lib/commonjs/expo-plugin/withSodiumAndroid.js.map +1 -0
- package/lib/commonjs/expo-plugin/withSodiumIos.js +26 -0
- package/lib/commonjs/expo-plugin/withSodiumIos.js.map +1 -0
- package/lib/commonjs/expo-plugin/withXCode.js +51 -0
- package/lib/commonjs/expo-plugin/withXCode.js.map +1 -0
- package/lib/commonjs/hash.js +215 -0
- package/lib/commonjs/hash.js.map +1 -0
- package/lib/commonjs/hmac.js +109 -0
- package/lib/commonjs/hmac.js.map +1 -0
- package/lib/commonjs/index.js +152 -32
- package/lib/commonjs/index.js.map +1 -1
- package/lib/commonjs/keys/classes.js +250 -0
- package/lib/commonjs/keys/classes.js.map +1 -0
- package/lib/commonjs/keys/generateKeyPair.js +102 -0
- package/lib/commonjs/keys/generateKeyPair.js.map +1 -0
- package/lib/commonjs/keys/index.js +89 -0
- package/lib/commonjs/keys/index.js.map +1 -0
- package/lib/commonjs/keys/signVerify.js +41 -0
- package/lib/commonjs/keys/signVerify.js.map +1 -0
- package/lib/commonjs/keys/utils.js +123 -0
- package/lib/commonjs/keys/utils.js.map +1 -0
- package/lib/commonjs/pbkdf2.js +89 -0
- package/lib/commonjs/pbkdf2.js.map +1 -0
- package/lib/commonjs/random.js +9 -3
- package/lib/commonjs/random.js.map +1 -1
- package/lib/commonjs/rsa.js +129 -0
- package/lib/commonjs/rsa.js.map +1 -0
- package/lib/commonjs/specs/blake3.nitro.js +6 -0
- package/lib/commonjs/specs/blake3.nitro.js.map +1 -0
- package/lib/commonjs/specs/cipher.nitro.js +6 -0
- package/lib/commonjs/specs/cipher.nitro.js.map +1 -0
- package/lib/commonjs/specs/ecKeyPair.nitro.js +6 -0
- package/lib/commonjs/specs/ecKeyPair.nitro.js.map +1 -0
- package/lib/commonjs/specs/edKeyPair.nitro.js +6 -0
- package/lib/commonjs/specs/edKeyPair.nitro.js.map +1 -0
- package/lib/commonjs/specs/hash.nitro.js +6 -0
- package/lib/commonjs/specs/hash.nitro.js.map +1 -0
- package/lib/commonjs/specs/hmac.nitro.js +6 -0
- package/lib/commonjs/specs/hmac.nitro.js.map +1 -0
- package/lib/commonjs/specs/keyObjectHandle.nitro.js +6 -0
- package/lib/commonjs/specs/keyObjectHandle.nitro.js.map +1 -0
- package/lib/commonjs/specs/pbkdf2.nitro.js +6 -0
- package/lib/commonjs/specs/pbkdf2.nitro.js.map +1 -0
- package/lib/commonjs/specs/rsaKeyPair.nitro.js +6 -0
- package/lib/commonjs/specs/rsaKeyPair.nitro.js.map +1 -0
- package/lib/commonjs/subtle.js +365 -0
- package/lib/commonjs/subtle.js.map +1 -0
- package/lib/commonjs/utils/cipher.js +64 -0
- package/lib/commonjs/utils/cipher.js.map +1 -0
- package/lib/commonjs/utils/conversion.js +140 -6
- package/lib/commonjs/utils/conversion.js.map +1 -1
- package/lib/commonjs/utils/errors.js +14 -0
- package/lib/commonjs/utils/errors.js.map +1 -0
- package/lib/commonjs/utils/hashnames.js +91 -0
- package/lib/commonjs/utils/hashnames.js.map +1 -0
- package/lib/commonjs/utils/index.js +65 -5
- package/lib/commonjs/utils/index.js.map +1 -1
- package/lib/commonjs/utils/noble.js +82 -0
- package/lib/commonjs/utils/noble.js.map +1 -0
- package/lib/commonjs/utils/types.js +52 -0
- package/lib/commonjs/utils/types.js.map +1 -1
- package/lib/commonjs/utils/validation.js +98 -0
- package/lib/commonjs/utils/validation.js.map +1 -0
- package/lib/module/blake3.js +90 -0
- package/lib/module/blake3.js.map +1 -0
- package/lib/module/cipher.js +173 -0
- package/lib/module/cipher.js.map +1 -0
- package/lib/module/ec.js +336 -0
- package/lib/module/ec.js.map +1 -0
- package/lib/module/ed.js +178 -0
- package/lib/module/ed.js.map +1 -0
- package/lib/module/expo-plugin/@types.js +2 -0
- package/lib/module/expo-plugin/@types.js.map +1 -0
- package/lib/module/expo-plugin/withRNQC.js +21 -0
- package/lib/module/expo-plugin/withRNQC.js.map +1 -0
- package/lib/module/expo-plugin/withSodiumAndroid.js +20 -0
- package/lib/module/expo-plugin/withSodiumAndroid.js.map +1 -0
- package/lib/module/expo-plugin/withSodiumIos.js +20 -0
- package/lib/module/expo-plugin/withSodiumIos.js.map +1 -0
- package/lib/module/expo-plugin/withXCode.js +46 -0
- package/lib/module/expo-plugin/withXCode.js.map +1 -0
- package/lib/module/hash.js +207 -0
- package/lib/module/hash.js.map +1 -0
- package/lib/module/hmac.js +104 -0
- package/lib/module/hmac.js.map +1 -0
- package/lib/module/index.js +33 -29
- package/lib/module/index.js.map +1 -1
- package/lib/module/keys/classes.js +241 -0
- package/lib/module/keys/classes.js.map +1 -0
- package/lib/module/keys/generateKeyPair.js +96 -0
- package/lib/module/keys/generateKeyPair.js.map +1 -0
- package/lib/module/keys/index.js +32 -0
- package/lib/module/keys/index.js.map +1 -0
- package/lib/module/keys/signVerify.js +41 -0
- package/lib/module/keys/signVerify.js.map +1 -0
- package/lib/module/keys/utils.js +114 -0
- package/lib/module/keys/utils.js.map +1 -0
- package/lib/module/pbkdf2.js +83 -0
- package/lib/module/pbkdf2.js.map +1 -0
- package/lib/module/random.js +7 -1
- package/lib/module/random.js.map +1 -1
- package/lib/module/rsa.js +123 -0
- package/lib/module/rsa.js.map +1 -0
- package/lib/module/specs/blake3.nitro.js +4 -0
- package/lib/module/specs/blake3.nitro.js.map +1 -0
- package/lib/module/specs/cipher.nitro.js +4 -0
- package/lib/module/specs/cipher.nitro.js.map +1 -0
- package/lib/module/specs/ecKeyPair.nitro.js +4 -0
- package/lib/module/specs/ecKeyPair.nitro.js.map +1 -0
- package/lib/module/specs/edKeyPair.nitro.js +4 -0
- package/lib/module/specs/edKeyPair.nitro.js.map +1 -0
- package/lib/module/specs/hash.nitro.js +4 -0
- package/lib/module/specs/hash.nitro.js.map +1 -0
- package/lib/module/specs/hmac.nitro.js +4 -0
- package/lib/module/specs/hmac.nitro.js.map +1 -0
- package/lib/module/specs/keyObjectHandle.nitro.js +4 -0
- package/lib/module/specs/keyObjectHandle.nitro.js.map +1 -0
- package/lib/module/specs/pbkdf2.nitro.js +4 -0
- package/lib/module/specs/pbkdf2.nitro.js.map +1 -0
- package/lib/module/specs/rsaKeyPair.nitro.js +4 -0
- package/lib/module/specs/rsaKeyPair.nitro.js.map +1 -0
- package/lib/module/subtle.js +360 -0
- package/lib/module/subtle.js.map +1 -0
- package/lib/module/utils/cipher.js +56 -0
- package/lib/module/utils/cipher.js.map +1 -0
- package/lib/module/utils/conversion.js +120 -8
- package/lib/module/utils/conversion.js.map +1 -1
- package/lib/module/utils/errors.js +10 -0
- package/lib/module/utils/errors.js.map +1 -0
- package/lib/module/utils/hashnames.js +89 -0
- package/lib/module/utils/hashnames.js.map +1 -0
- package/lib/module/utils/index.js +6 -5
- package/lib/module/utils/index.js.map +1 -1
- package/lib/module/utils/noble.js +76 -0
- package/lib/module/utils/noble.js.map +1 -0
- package/lib/module/utils/types.js +53 -0
- package/lib/module/utils/types.js.map +1 -1
- package/lib/module/utils/validation.js +87 -0
- package/lib/module/utils/validation.js.map +1 -0
- package/lib/tsconfig.tsbuildinfo +1 -1
- package/lib/typescript/blake3.d.ts +33 -0
- package/lib/typescript/blake3.d.ts.map +1 -0
- package/lib/typescript/cipher.d.ts +60 -0
- package/lib/typescript/cipher.d.ts.map +1 -0
- package/lib/typescript/ec.d.ts +13 -0
- package/lib/typescript/ec.d.ts.map +1 -0
- package/lib/typescript/ed.d.ts +43 -0
- package/lib/typescript/ed.d.ts.map +1 -0
- package/lib/typescript/expo-plugin/@types.d.ts +8 -0
- package/lib/typescript/expo-plugin/@types.d.ts.map +1 -0
- package/lib/typescript/expo-plugin/withRNQC.d.ts +4 -0
- package/lib/typescript/expo-plugin/withRNQC.d.ts.map +1 -0
- package/lib/typescript/expo-plugin/withSodiumAndroid.d.ts +4 -0
- package/lib/typescript/expo-plugin/withSodiumAndroid.d.ts.map +1 -0
- package/lib/typescript/expo-plugin/withSodiumIos.d.ts +4 -0
- package/lib/typescript/expo-plugin/withSodiumIos.d.ts.map +1 -0
- package/lib/typescript/expo-plugin/withXCode.d.ts +9 -0
- package/lib/typescript/expo-plugin/withXCode.d.ts.map +1 -0
- package/lib/typescript/hash.d.ts +122 -0
- package/lib/typescript/hash.d.ts.map +1 -0
- package/lib/typescript/hmac.d.ts +66 -0
- package/lib/typescript/hmac.d.ts.map +1 -0
- package/lib/typescript/index.d.ts +110 -9
- package/lib/typescript/index.d.ts.map +1 -1
- package/lib/typescript/keys/classes.d.ts +79 -0
- package/lib/typescript/keys/classes.d.ts.map +1 -0
- package/lib/typescript/keys/generateKeyPair.d.ts +6 -0
- package/lib/typescript/keys/generateKeyPair.d.ts.map +1 -0
- package/lib/typescript/keys/index.d.ts +7 -0
- package/lib/typescript/keys/index.d.ts.map +1 -0
- package/lib/typescript/keys/signVerify.d.ts +1 -0
- package/lib/typescript/keys/signVerify.d.ts.map +1 -0
- package/lib/typescript/keys/utils.d.ts +34 -0
- package/lib/typescript/keys/utils.d.ts.map +1 -0
- package/lib/typescript/pbkdf2.d.ts +12 -0
- package/lib/typescript/pbkdf2.d.ts.map +1 -0
- package/lib/typescript/random.d.ts +11 -5
- package/lib/typescript/random.d.ts.map +1 -1
- package/lib/typescript/rsa.d.ts +10 -0
- package/lib/typescript/rsa.d.ts.map +1 -0
- package/lib/typescript/specs/blake3.nitro.d.ts +15 -0
- package/lib/typescript/specs/blake3.nitro.d.ts.map +1 -0
- package/lib/typescript/specs/cipher.nitro.d.ts +29 -0
- package/lib/typescript/specs/cipher.nitro.d.ts.map +1 -0
- package/lib/typescript/specs/ecKeyPair.nitro.d.ts +20 -0
- package/lib/typescript/specs/ecKeyPair.nitro.d.ts.map +1 -0
- package/lib/typescript/specs/edKeyPair.nitro.d.ts +17 -0
- package/lib/typescript/specs/edKeyPair.nitro.d.ts.map +1 -0
- package/lib/typescript/specs/hash.nitro.d.ts +13 -0
- package/lib/typescript/specs/hash.nitro.d.ts.map +1 -0
- package/lib/typescript/specs/hmac.nitro.d.ts +10 -0
- package/lib/typescript/specs/hmac.nitro.d.ts.map +1 -0
- package/lib/typescript/specs/keyObjectHandle.nitro.d.ts +14 -0
- package/lib/typescript/specs/keyObjectHandle.nitro.d.ts.map +1 -0
- package/lib/typescript/specs/pbkdf2.nitro.d.ts +9 -0
- package/lib/typescript/specs/pbkdf2.nitro.d.ts.map +1 -0
- package/lib/typescript/specs/rsaKeyPair.nitro.d.ts +20 -0
- package/lib/typescript/specs/rsaKeyPair.nitro.d.ts.map +1 -0
- package/lib/typescript/subtle.d.ts +17 -0
- package/lib/typescript/subtle.d.ts.map +1 -0
- package/lib/typescript/utils/cipher.d.ts +7 -0
- package/lib/typescript/utils/cipher.d.ts.map +1 -0
- package/lib/typescript/utils/conversion.d.ts +24 -2
- package/lib/typescript/utils/conversion.d.ts.map +1 -1
- package/lib/typescript/utils/errors.d.ts +7 -0
- package/lib/typescript/utils/errors.d.ts.map +1 -0
- package/lib/typescript/utils/hashnames.d.ts +13 -0
- package/lib/typescript/utils/hashnames.d.ts.map +1 -0
- package/lib/typescript/utils/index.d.ts +6 -5
- package/lib/typescript/utils/index.d.ts.map +1 -1
- package/lib/typescript/utils/noble.d.ts +19 -0
- package/lib/typescript/utils/noble.d.ts.map +1 -0
- package/lib/typescript/utils/types.d.ts +252 -2
- package/lib/typescript/utils/types.d.ts.map +1 -1
- package/lib/typescript/utils/validation.d.ts +13 -0
- package/lib/typescript/utils/validation.d.ts.map +1 -0
- package/nitrogen/generated/.gitattributes +1 -0
- package/nitrogen/generated/android/QuickCrypto+autolinking.cmake +47 -4
- package/nitrogen/generated/android/QuickCrypto+autolinking.gradle +4 -3
- package/nitrogen/generated/android/QuickCryptoOnLoad.cpp +144 -0
- package/nitrogen/generated/android/QuickCryptoOnLoad.hpp +25 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/crypto/QuickCryptoOnLoad.kt +35 -0
- package/nitrogen/generated/ios/QuickCrypto+autolinking.rb +11 -8
- package/nitrogen/generated/ios/QuickCrypto-Swift-Cxx-Bridge.cpp +11 -3
- package/nitrogen/generated/ios/QuickCrypto-Swift-Cxx-Bridge.hpp +5 -3
- package/nitrogen/generated/ios/QuickCrypto-Swift-Cxx-Umbrella.hpp +16 -7
- package/nitrogen/generated/ios/QuickCryptoAutolinking.mm +135 -0
- package/nitrogen/generated/ios/QuickCryptoAutolinking.swift +12 -0
- package/nitrogen/generated/shared/c++/CFRGKeyPairType.hpp +84 -0
- package/nitrogen/generated/shared/c++/CipherArgs.hpp +86 -0
- package/nitrogen/generated/shared/c++/HybridBlake3Spec.cpp +28 -0
- package/nitrogen/generated/shared/c++/HybridBlake3Spec.hpp +76 -0
- package/nitrogen/generated/shared/c++/HybridCipherFactorySpec.cpp +21 -0
- package/nitrogen/generated/shared/c++/HybridCipherFactorySpec.hpp +67 -0
- package/nitrogen/generated/shared/c++/HybridCipherSpec.cpp +28 -0
- package/nitrogen/generated/shared/c++/HybridCipherSpec.hpp +76 -0
- package/nitrogen/generated/shared/c++/HybridEcKeyPairSpec.cpp +29 -0
- package/nitrogen/generated/shared/c++/HybridEcKeyPairSpec.hpp +77 -0
- package/nitrogen/generated/shared/c++/HybridEdKeyPairSpec.cpp +30 -0
- package/nitrogen/generated/shared/c++/HybridEdKeyPairSpec.hpp +75 -0
- package/nitrogen/generated/shared/c++/HybridHashSpec.cpp +26 -0
- package/nitrogen/generated/shared/c++/HybridHashSpec.hpp +75 -0
- package/nitrogen/generated/shared/c++/HybridHmacSpec.cpp +23 -0
- package/nitrogen/generated/shared/c++/HybridHmacSpec.hpp +66 -0
- package/nitrogen/generated/shared/c++/HybridKeyObjectHandleSpec.cpp +26 -0
- package/nitrogen/generated/shared/c++/HybridKeyObjectHandleSpec.hpp +92 -0
- package/nitrogen/generated/shared/c++/HybridPbkdf2Spec.cpp +22 -0
- package/nitrogen/generated/shared/c++/HybridPbkdf2Spec.hpp +66 -0
- package/nitrogen/generated/shared/c++/HybridRandomSpec.cpp +2 -3
- package/nitrogen/generated/shared/c++/HybridRandomSpec.hpp +9 -6
- package/nitrogen/generated/shared/c++/HybridRsaKeyPairSpec.cpp +29 -0
- package/nitrogen/generated/shared/c++/HybridRsaKeyPairSpec.hpp +77 -0
- package/nitrogen/generated/shared/c++/JWK.hpp +161 -0
- package/nitrogen/generated/shared/c++/JWKkty.hpp +84 -0
- package/nitrogen/generated/shared/c++/JWKuse.hpp +76 -0
- package/nitrogen/generated/shared/c++/KFormatType.hpp +63 -0
- package/nitrogen/generated/shared/c++/KeyDetail.hpp +92 -0
- package/nitrogen/generated/shared/c++/KeyEncoding.hpp +64 -0
- package/nitrogen/generated/shared/c++/KeyObject.hpp +67 -0
- package/nitrogen/generated/shared/c++/KeyType.hpp +63 -0
- package/nitrogen/generated/shared/c++/KeyUsage.hpp +116 -0
- package/nitrogen/generated/shared/c++/NamedCurve.hpp +80 -0
- package/package.json +66 -39
- package/src/blake3.ts +123 -0
- package/src/cipher.ts +335 -0
- package/src/ec.ts +432 -0
- package/src/ed.ts +256 -0
- package/src/expo-plugin/@types.ts +7 -0
- package/src/expo-plugin/withRNQC.ts +23 -0
- package/src/expo-plugin/withSodiumAndroid.ts +24 -0
- package/src/expo-plugin/withSodiumIos.ts +30 -0
- package/src/expo-plugin/withXCode.ts +55 -0
- package/src/hash.ts +274 -0
- package/src/hmac.ts +135 -0
- package/src/index.ts +32 -29
- package/src/keys/classes.ts +317 -0
- package/src/keys/generateKeyPair.ts +145 -0
- package/src/keys/index.ts +52 -0
- package/src/keys/signVerify.ts +39 -0
- package/src/keys/utils.ts +190 -0
- package/src/pbkdf2.ts +154 -0
- package/src/random.ts +26 -23
- package/src/rsa.ts +176 -0
- package/src/specs/blake3.nitro.ts +12 -0
- package/src/specs/cipher.nitro.ts +25 -0
- package/src/specs/ecKeyPair.nitro.ts +38 -0
- package/src/specs/edKeyPair.nitro.ts +43 -0
- package/src/specs/hash.nitro.ts +10 -0
- package/src/specs/hmac.nitro.ts +7 -0
- package/src/specs/keyObjectHandle.nitro.ts +31 -0
- package/src/specs/pbkdf2.nitro.ts +18 -0
- package/src/specs/random.nitro.ts +2 -2
- package/src/specs/rsaKeyPair.nitro.ts +33 -0
- package/src/subtle.ts +614 -0
- package/src/utils/cipher.ts +60 -0
- package/src/utils/conversion.ts +143 -9
- package/src/utils/errors.ts +15 -0
- package/src/utils/hashnames.ts +98 -0
- package/src/utils/index.ts +6 -6
- package/src/utils/noble.ts +85 -0
- package/src/utils/types.ts +423 -3
- package/src/utils/validation.ts +130 -0
- package/ios/QuickCryptoOnLoad.mm +0 -19
- package/lib/module/package.json +0 -1
|
@@ -0,0 +1,4824 @@
|
|
|
1
|
+
#if defined(__ELF__) && defined(__linux__)
|
|
2
|
+
.section .note.GNU-stack,"",%progbits
|
|
3
|
+
#endif
|
|
4
|
+
|
|
5
|
+
#if defined(__ELF__) && defined(__CET__) && defined(__has_include)
|
|
6
|
+
#if __has_include(<cet.h>)
|
|
7
|
+
#include <cet.h>
|
|
8
|
+
#endif
|
|
9
|
+
#endif
|
|
10
|
+
|
|
11
|
+
#if !defined(_CET_ENDBR)
|
|
12
|
+
#define _CET_ENDBR
|
|
13
|
+
#endif
|
|
14
|
+
|
|
15
|
+
.intel_syntax noprefix
|
|
16
|
+
.global _blake3_hash_many_avx512
|
|
17
|
+
.global blake3_hash_many_avx512
|
|
18
|
+
.global blake3_compress_in_place_avx512
|
|
19
|
+
.global _blake3_compress_in_place_avx512
|
|
20
|
+
.global blake3_compress_xof_avx512
|
|
21
|
+
.global _blake3_compress_xof_avx512
|
|
22
|
+
.global blake3_xof_many_avx512
|
|
23
|
+
.global _blake3_xof_many_avx512
|
|
24
|
+
|
|
25
|
+
#ifdef __APPLE__
|
|
26
|
+
.text
|
|
27
|
+
#else
|
|
28
|
+
.section .text
|
|
29
|
+
#endif
|
|
30
|
+
.p2align 6
|
|
31
|
+
_blake3_hash_many_avx512:
|
|
32
|
+
blake3_hash_many_avx512:
|
|
33
|
+
_CET_ENDBR
|
|
34
|
+
push r15
|
|
35
|
+
push r14
|
|
36
|
+
push r13
|
|
37
|
+
push r12
|
|
38
|
+
push rbx
|
|
39
|
+
push rbp
|
|
40
|
+
mov rbp, rsp
|
|
41
|
+
sub rsp, 144
|
|
42
|
+
and rsp, 0xFFFFFFFFFFFFFFC0
|
|
43
|
+
neg r9
|
|
44
|
+
kmovw k1, r9d
|
|
45
|
+
vmovd xmm0, r8d
|
|
46
|
+
vpbroadcastd ymm0, xmm0
|
|
47
|
+
shr r8, 32
|
|
48
|
+
vmovd xmm1, r8d
|
|
49
|
+
vpbroadcastd ymm1, xmm1
|
|
50
|
+
vmovdqa ymm4, ymm1
|
|
51
|
+
vmovdqa ymm5, ymm1
|
|
52
|
+
vpaddd ymm2, ymm0, ymmword ptr [ADD0+rip]
|
|
53
|
+
vpaddd ymm3, ymm0, ymmword ptr [ADD0+32+rip]
|
|
54
|
+
vpcmpltud k2, ymm2, ymm0
|
|
55
|
+
vpcmpltud k3, ymm3, ymm0
|
|
56
|
+
vpaddd ymm4 {k2}, ymm4, dword ptr [ADD1+rip] {1to8}
|
|
57
|
+
vpaddd ymm5 {k3}, ymm5, dword ptr [ADD1+rip] {1to8}
|
|
58
|
+
knotw k2, k1
|
|
59
|
+
vmovdqa32 ymm2 {k2}, ymm0
|
|
60
|
+
vmovdqa32 ymm3 {k2}, ymm0
|
|
61
|
+
vmovdqa32 ymm4 {k2}, ymm1
|
|
62
|
+
vmovdqa32 ymm5 {k2}, ymm1
|
|
63
|
+
vmovdqa ymmword ptr [rsp], ymm2
|
|
64
|
+
vmovdqa ymmword ptr [rsp+0x1*0x20], ymm3
|
|
65
|
+
vmovdqa ymmword ptr [rsp+0x2*0x20], ymm4
|
|
66
|
+
vmovdqa ymmword ptr [rsp+0x3*0x20], ymm5
|
|
67
|
+
shl rdx, 6
|
|
68
|
+
mov qword ptr [rsp+0x80], rdx
|
|
69
|
+
cmp rsi, 16
|
|
70
|
+
jc 3f
|
|
71
|
+
2:
|
|
72
|
+
vpbroadcastd zmm0, dword ptr [rcx]
|
|
73
|
+
vpbroadcastd zmm1, dword ptr [rcx+0x1*0x4]
|
|
74
|
+
vpbroadcastd zmm2, dword ptr [rcx+0x2*0x4]
|
|
75
|
+
vpbroadcastd zmm3, dword ptr [rcx+0x3*0x4]
|
|
76
|
+
vpbroadcastd zmm4, dword ptr [rcx+0x4*0x4]
|
|
77
|
+
vpbroadcastd zmm5, dword ptr [rcx+0x5*0x4]
|
|
78
|
+
vpbroadcastd zmm6, dword ptr [rcx+0x6*0x4]
|
|
79
|
+
vpbroadcastd zmm7, dword ptr [rcx+0x7*0x4]
|
|
80
|
+
movzx eax, byte ptr [rbp+0x38]
|
|
81
|
+
movzx ebx, byte ptr [rbp+0x40]
|
|
82
|
+
or eax, ebx
|
|
83
|
+
xor edx, edx
|
|
84
|
+
.p2align 5
|
|
85
|
+
9:
|
|
86
|
+
movzx ebx, byte ptr [rbp+0x48]
|
|
87
|
+
or ebx, eax
|
|
88
|
+
add rdx, 64
|
|
89
|
+
cmp rdx, qword ptr [rsp+0x80]
|
|
90
|
+
cmove eax, ebx
|
|
91
|
+
mov dword ptr [rsp+0x88], eax
|
|
92
|
+
mov r8, qword ptr [rdi]
|
|
93
|
+
mov r9, qword ptr [rdi+0x8]
|
|
94
|
+
mov r10, qword ptr [rdi+0x10]
|
|
95
|
+
mov r11, qword ptr [rdi+0x18]
|
|
96
|
+
mov r12, qword ptr [rdi+0x40]
|
|
97
|
+
mov r13, qword ptr [rdi+0x48]
|
|
98
|
+
mov r14, qword ptr [rdi+0x50]
|
|
99
|
+
mov r15, qword ptr [rdi+0x58]
|
|
100
|
+
vmovdqu32 ymm16, ymmword ptr [rdx+r8-0x2*0x20]
|
|
101
|
+
vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01
|
|
102
|
+
vmovdqu32 ymm17, ymmword ptr [rdx+r9-0x2*0x20]
|
|
103
|
+
vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01
|
|
104
|
+
vpunpcklqdq zmm8, zmm16, zmm17
|
|
105
|
+
vpunpckhqdq zmm9, zmm16, zmm17
|
|
106
|
+
vmovdqu32 ymm18, ymmword ptr [rdx+r10-0x2*0x20]
|
|
107
|
+
vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01
|
|
108
|
+
vmovdqu32 ymm19, ymmword ptr [rdx+r11-0x2*0x20]
|
|
109
|
+
vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01
|
|
110
|
+
vpunpcklqdq zmm10, zmm18, zmm19
|
|
111
|
+
vpunpckhqdq zmm11, zmm18, zmm19
|
|
112
|
+
mov r8, qword ptr [rdi+0x20]
|
|
113
|
+
mov r9, qword ptr [rdi+0x28]
|
|
114
|
+
mov r10, qword ptr [rdi+0x30]
|
|
115
|
+
mov r11, qword ptr [rdi+0x38]
|
|
116
|
+
mov r12, qword ptr [rdi+0x60]
|
|
117
|
+
mov r13, qword ptr [rdi+0x68]
|
|
118
|
+
mov r14, qword ptr [rdi+0x70]
|
|
119
|
+
mov r15, qword ptr [rdi+0x78]
|
|
120
|
+
vmovdqu32 ymm16, ymmword ptr [rdx+r8-0x2*0x20]
|
|
121
|
+
vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01
|
|
122
|
+
vmovdqu32 ymm17, ymmword ptr [rdx+r9-0x2*0x20]
|
|
123
|
+
vinserti64x4 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01
|
|
124
|
+
vpunpcklqdq zmm12, zmm16, zmm17
|
|
125
|
+
vpunpckhqdq zmm13, zmm16, zmm17
|
|
126
|
+
vmovdqu32 ymm18, ymmword ptr [rdx+r10-0x2*0x20]
|
|
127
|
+
vinserti64x4 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01
|
|
128
|
+
vmovdqu32 ymm19, ymmword ptr [rdx+r11-0x2*0x20]
|
|
129
|
+
vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01
|
|
130
|
+
vpunpcklqdq zmm14, zmm18, zmm19
|
|
131
|
+
vpunpckhqdq zmm15, zmm18, zmm19
|
|
132
|
+
vmovdqa32 zmm27, zmmword ptr [INDEX0+rip]
|
|
133
|
+
vmovdqa32 zmm31, zmmword ptr [INDEX1+rip]
|
|
134
|
+
vshufps zmm16, zmm8, zmm10, 136
|
|
135
|
+
vshufps zmm17, zmm12, zmm14, 136
|
|
136
|
+
vmovdqa32 zmm20, zmm16
|
|
137
|
+
vpermt2d zmm16, zmm27, zmm17
|
|
138
|
+
vpermt2d zmm20, zmm31, zmm17
|
|
139
|
+
vshufps zmm17, zmm8, zmm10, 221
|
|
140
|
+
vshufps zmm30, zmm12, zmm14, 221
|
|
141
|
+
vmovdqa32 zmm21, zmm17
|
|
142
|
+
vpermt2d zmm17, zmm27, zmm30
|
|
143
|
+
vpermt2d zmm21, zmm31, zmm30
|
|
144
|
+
vshufps zmm18, zmm9, zmm11, 136
|
|
145
|
+
vshufps zmm8, zmm13, zmm15, 136
|
|
146
|
+
vmovdqa32 zmm22, zmm18
|
|
147
|
+
vpermt2d zmm18, zmm27, zmm8
|
|
148
|
+
vpermt2d zmm22, zmm31, zmm8
|
|
149
|
+
vshufps zmm19, zmm9, zmm11, 221
|
|
150
|
+
vshufps zmm8, zmm13, zmm15, 221
|
|
151
|
+
vmovdqa32 zmm23, zmm19
|
|
152
|
+
vpermt2d zmm19, zmm27, zmm8
|
|
153
|
+
vpermt2d zmm23, zmm31, zmm8
|
|
154
|
+
mov r8, qword ptr [rdi]
|
|
155
|
+
mov r9, qword ptr [rdi+0x8]
|
|
156
|
+
mov r10, qword ptr [rdi+0x10]
|
|
157
|
+
mov r11, qword ptr [rdi+0x18]
|
|
158
|
+
mov r12, qword ptr [rdi+0x40]
|
|
159
|
+
mov r13, qword ptr [rdi+0x48]
|
|
160
|
+
mov r14, qword ptr [rdi+0x50]
|
|
161
|
+
mov r15, qword ptr [rdi+0x58]
|
|
162
|
+
vmovdqu32 ymm24, ymmword ptr [r8+rdx-0x1*0x20]
|
|
163
|
+
vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01
|
|
164
|
+
vmovdqu32 ymm25, ymmword ptr [r9+rdx-0x1*0x20]
|
|
165
|
+
vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01
|
|
166
|
+
vpunpcklqdq zmm8, zmm24, zmm25
|
|
167
|
+
vpunpckhqdq zmm9, zmm24, zmm25
|
|
168
|
+
vmovdqu32 ymm24, ymmword ptr [r10+rdx-0x1*0x20]
|
|
169
|
+
vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01
|
|
170
|
+
vmovdqu32 ymm25, ymmword ptr [r11+rdx-0x1*0x20]
|
|
171
|
+
vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01
|
|
172
|
+
vpunpcklqdq zmm10, zmm24, zmm25
|
|
173
|
+
vpunpckhqdq zmm11, zmm24, zmm25
|
|
174
|
+
prefetcht0 [r8+rdx+0x80]
|
|
175
|
+
prefetcht0 [r12+rdx+0x80]
|
|
176
|
+
prefetcht0 [r9+rdx+0x80]
|
|
177
|
+
prefetcht0 [r13+rdx+0x80]
|
|
178
|
+
prefetcht0 [r10+rdx+0x80]
|
|
179
|
+
prefetcht0 [r14+rdx+0x80]
|
|
180
|
+
prefetcht0 [r11+rdx+0x80]
|
|
181
|
+
prefetcht0 [r15+rdx+0x80]
|
|
182
|
+
mov r8, qword ptr [rdi+0x20]
|
|
183
|
+
mov r9, qword ptr [rdi+0x28]
|
|
184
|
+
mov r10, qword ptr [rdi+0x30]
|
|
185
|
+
mov r11, qword ptr [rdi+0x38]
|
|
186
|
+
mov r12, qword ptr [rdi+0x60]
|
|
187
|
+
mov r13, qword ptr [rdi+0x68]
|
|
188
|
+
mov r14, qword ptr [rdi+0x70]
|
|
189
|
+
mov r15, qword ptr [rdi+0x78]
|
|
190
|
+
vmovdqu32 ymm24, ymmword ptr [r8+rdx-0x1*0x20]
|
|
191
|
+
vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01
|
|
192
|
+
vmovdqu32 ymm25, ymmword ptr [r9+rdx-0x1*0x20]
|
|
193
|
+
vinserti64x4 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01
|
|
194
|
+
vpunpcklqdq zmm12, zmm24, zmm25
|
|
195
|
+
vpunpckhqdq zmm13, zmm24, zmm25
|
|
196
|
+
vmovdqu32 ymm24, ymmword ptr [r10+rdx-0x1*0x20]
|
|
197
|
+
vinserti64x4 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01
|
|
198
|
+
vmovdqu32 ymm25, ymmword ptr [r11+rdx-0x1*0x20]
|
|
199
|
+
vinserti64x4 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01
|
|
200
|
+
vpunpcklqdq zmm14, zmm24, zmm25
|
|
201
|
+
vpunpckhqdq zmm15, zmm24, zmm25
|
|
202
|
+
prefetcht0 [r8+rdx+0x80]
|
|
203
|
+
prefetcht0 [r12+rdx+0x80]
|
|
204
|
+
prefetcht0 [r9+rdx+0x80]
|
|
205
|
+
prefetcht0 [r13+rdx+0x80]
|
|
206
|
+
prefetcht0 [r10+rdx+0x80]
|
|
207
|
+
prefetcht0 [r14+rdx+0x80]
|
|
208
|
+
prefetcht0 [r11+rdx+0x80]
|
|
209
|
+
prefetcht0 [r15+rdx+0x80]
|
|
210
|
+
vshufps zmm24, zmm8, zmm10, 136
|
|
211
|
+
vshufps zmm30, zmm12, zmm14, 136
|
|
212
|
+
vmovdqa32 zmm28, zmm24
|
|
213
|
+
vpermt2d zmm24, zmm27, zmm30
|
|
214
|
+
vpermt2d zmm28, zmm31, zmm30
|
|
215
|
+
vshufps zmm25, zmm8, zmm10, 221
|
|
216
|
+
vshufps zmm30, zmm12, zmm14, 221
|
|
217
|
+
vmovdqa32 zmm29, zmm25
|
|
218
|
+
vpermt2d zmm25, zmm27, zmm30
|
|
219
|
+
vpermt2d zmm29, zmm31, zmm30
|
|
220
|
+
vshufps zmm26, zmm9, zmm11, 136
|
|
221
|
+
vshufps zmm8, zmm13, zmm15, 136
|
|
222
|
+
vmovdqa32 zmm30, zmm26
|
|
223
|
+
vpermt2d zmm26, zmm27, zmm8
|
|
224
|
+
vpermt2d zmm30, zmm31, zmm8
|
|
225
|
+
vshufps zmm8, zmm9, zmm11, 221
|
|
226
|
+
vshufps zmm10, zmm13, zmm15, 221
|
|
227
|
+
vpermi2d zmm27, zmm8, zmm10
|
|
228
|
+
vpermi2d zmm31, zmm8, zmm10
|
|
229
|
+
vpbroadcastd zmm8, dword ptr [BLAKE3_IV_0+rip]
|
|
230
|
+
vpbroadcastd zmm9, dword ptr [BLAKE3_IV_1+rip]
|
|
231
|
+
vpbroadcastd zmm10, dword ptr [BLAKE3_IV_2+rip]
|
|
232
|
+
vpbroadcastd zmm11, dword ptr [BLAKE3_IV_3+rip]
|
|
233
|
+
vmovdqa32 zmm12, zmmword ptr [rsp]
|
|
234
|
+
vmovdqa32 zmm13, zmmword ptr [rsp+0x1*0x40]
|
|
235
|
+
vpbroadcastd zmm14, dword ptr [BLAKE3_BLOCK_LEN+rip]
|
|
236
|
+
vpbroadcastd zmm15, dword ptr [rsp+0x22*0x4]
|
|
237
|
+
vpaddd zmm0, zmm0, zmm16
|
|
238
|
+
vpaddd zmm1, zmm1, zmm18
|
|
239
|
+
vpaddd zmm2, zmm2, zmm20
|
|
240
|
+
vpaddd zmm3, zmm3, zmm22
|
|
241
|
+
vpaddd zmm0, zmm0, zmm4
|
|
242
|
+
vpaddd zmm1, zmm1, zmm5
|
|
243
|
+
vpaddd zmm2, zmm2, zmm6
|
|
244
|
+
vpaddd zmm3, zmm3, zmm7
|
|
245
|
+
vpxord zmm12, zmm12, zmm0
|
|
246
|
+
vpxord zmm13, zmm13, zmm1
|
|
247
|
+
vpxord zmm14, zmm14, zmm2
|
|
248
|
+
vpxord zmm15, zmm15, zmm3
|
|
249
|
+
vprord zmm12, zmm12, 16
|
|
250
|
+
vprord zmm13, zmm13, 16
|
|
251
|
+
vprord zmm14, zmm14, 16
|
|
252
|
+
vprord zmm15, zmm15, 16
|
|
253
|
+
vpaddd zmm8, zmm8, zmm12
|
|
254
|
+
vpaddd zmm9, zmm9, zmm13
|
|
255
|
+
vpaddd zmm10, zmm10, zmm14
|
|
256
|
+
vpaddd zmm11, zmm11, zmm15
|
|
257
|
+
vpxord zmm4, zmm4, zmm8
|
|
258
|
+
vpxord zmm5, zmm5, zmm9
|
|
259
|
+
vpxord zmm6, zmm6, zmm10
|
|
260
|
+
vpxord zmm7, zmm7, zmm11
|
|
261
|
+
vprord zmm4, zmm4, 12
|
|
262
|
+
vprord zmm5, zmm5, 12
|
|
263
|
+
vprord zmm6, zmm6, 12
|
|
264
|
+
vprord zmm7, zmm7, 12
|
|
265
|
+
vpaddd zmm0, zmm0, zmm17
|
|
266
|
+
vpaddd zmm1, zmm1, zmm19
|
|
267
|
+
vpaddd zmm2, zmm2, zmm21
|
|
268
|
+
vpaddd zmm3, zmm3, zmm23
|
|
269
|
+
vpaddd zmm0, zmm0, zmm4
|
|
270
|
+
vpaddd zmm1, zmm1, zmm5
|
|
271
|
+
vpaddd zmm2, zmm2, zmm6
|
|
272
|
+
vpaddd zmm3, zmm3, zmm7
|
|
273
|
+
vpxord zmm12, zmm12, zmm0
|
|
274
|
+
vpxord zmm13, zmm13, zmm1
|
|
275
|
+
vpxord zmm14, zmm14, zmm2
|
|
276
|
+
vpxord zmm15, zmm15, zmm3
|
|
277
|
+
vprord zmm12, zmm12, 8
|
|
278
|
+
vprord zmm13, zmm13, 8
|
|
279
|
+
vprord zmm14, zmm14, 8
|
|
280
|
+
vprord zmm15, zmm15, 8
|
|
281
|
+
vpaddd zmm8, zmm8, zmm12
|
|
282
|
+
vpaddd zmm9, zmm9, zmm13
|
|
283
|
+
vpaddd zmm10, zmm10, zmm14
|
|
284
|
+
vpaddd zmm11, zmm11, zmm15
|
|
285
|
+
vpxord zmm4, zmm4, zmm8
|
|
286
|
+
vpxord zmm5, zmm5, zmm9
|
|
287
|
+
vpxord zmm6, zmm6, zmm10
|
|
288
|
+
vpxord zmm7, zmm7, zmm11
|
|
289
|
+
vprord zmm4, zmm4, 7
|
|
290
|
+
vprord zmm5, zmm5, 7
|
|
291
|
+
vprord zmm6, zmm6, 7
|
|
292
|
+
vprord zmm7, zmm7, 7
|
|
293
|
+
vpaddd zmm0, zmm0, zmm24
|
|
294
|
+
vpaddd zmm1, zmm1, zmm26
|
|
295
|
+
vpaddd zmm2, zmm2, zmm28
|
|
296
|
+
vpaddd zmm3, zmm3, zmm30
|
|
297
|
+
vpaddd zmm0, zmm0, zmm5
|
|
298
|
+
vpaddd zmm1, zmm1, zmm6
|
|
299
|
+
vpaddd zmm2, zmm2, zmm7
|
|
300
|
+
vpaddd zmm3, zmm3, zmm4
|
|
301
|
+
vpxord zmm15, zmm15, zmm0
|
|
302
|
+
vpxord zmm12, zmm12, zmm1
|
|
303
|
+
vpxord zmm13, zmm13, zmm2
|
|
304
|
+
vpxord zmm14, zmm14, zmm3
|
|
305
|
+
vprord zmm15, zmm15, 16
|
|
306
|
+
vprord zmm12, zmm12, 16
|
|
307
|
+
vprord zmm13, zmm13, 16
|
|
308
|
+
vprord zmm14, zmm14, 16
|
|
309
|
+
vpaddd zmm10, zmm10, zmm15
|
|
310
|
+
vpaddd zmm11, zmm11, zmm12
|
|
311
|
+
vpaddd zmm8, zmm8, zmm13
|
|
312
|
+
vpaddd zmm9, zmm9, zmm14
|
|
313
|
+
vpxord zmm5, zmm5, zmm10
|
|
314
|
+
vpxord zmm6, zmm6, zmm11
|
|
315
|
+
vpxord zmm7, zmm7, zmm8
|
|
316
|
+
vpxord zmm4, zmm4, zmm9
|
|
317
|
+
vprord zmm5, zmm5, 12
|
|
318
|
+
vprord zmm6, zmm6, 12
|
|
319
|
+
vprord zmm7, zmm7, 12
|
|
320
|
+
vprord zmm4, zmm4, 12
|
|
321
|
+
vpaddd zmm0, zmm0, zmm25
|
|
322
|
+
vpaddd zmm1, zmm1, zmm27
|
|
323
|
+
vpaddd zmm2, zmm2, zmm29
|
|
324
|
+
vpaddd zmm3, zmm3, zmm31
|
|
325
|
+
vpaddd zmm0, zmm0, zmm5
|
|
326
|
+
vpaddd zmm1, zmm1, zmm6
|
|
327
|
+
vpaddd zmm2, zmm2, zmm7
|
|
328
|
+
vpaddd zmm3, zmm3, zmm4
|
|
329
|
+
vpxord zmm15, zmm15, zmm0
|
|
330
|
+
vpxord zmm12, zmm12, zmm1
|
|
331
|
+
vpxord zmm13, zmm13, zmm2
|
|
332
|
+
vpxord zmm14, zmm14, zmm3
|
|
333
|
+
vprord zmm15, zmm15, 8
|
|
334
|
+
vprord zmm12, zmm12, 8
|
|
335
|
+
vprord zmm13, zmm13, 8
|
|
336
|
+
vprord zmm14, zmm14, 8
|
|
337
|
+
vpaddd zmm10, zmm10, zmm15
|
|
338
|
+
vpaddd zmm11, zmm11, zmm12
|
|
339
|
+
vpaddd zmm8, zmm8, zmm13
|
|
340
|
+
vpaddd zmm9, zmm9, zmm14
|
|
341
|
+
vpxord zmm5, zmm5, zmm10
|
|
342
|
+
vpxord zmm6, zmm6, zmm11
|
|
343
|
+
vpxord zmm7, zmm7, zmm8
|
|
344
|
+
vpxord zmm4, zmm4, zmm9
|
|
345
|
+
vprord zmm5, zmm5, 7
|
|
346
|
+
vprord zmm6, zmm6, 7
|
|
347
|
+
vprord zmm7, zmm7, 7
|
|
348
|
+
vprord zmm4, zmm4, 7
|
|
349
|
+
vpaddd zmm0, zmm0, zmm18
|
|
350
|
+
vpaddd zmm1, zmm1, zmm19
|
|
351
|
+
vpaddd zmm2, zmm2, zmm23
|
|
352
|
+
vpaddd zmm3, zmm3, zmm20
|
|
353
|
+
vpaddd zmm0, zmm0, zmm4
|
|
354
|
+
vpaddd zmm1, zmm1, zmm5
|
|
355
|
+
vpaddd zmm2, zmm2, zmm6
|
|
356
|
+
vpaddd zmm3, zmm3, zmm7
|
|
357
|
+
vpxord zmm12, zmm12, zmm0
|
|
358
|
+
vpxord zmm13, zmm13, zmm1
|
|
359
|
+
vpxord zmm14, zmm14, zmm2
|
|
360
|
+
vpxord zmm15, zmm15, zmm3
|
|
361
|
+
vprord zmm12, zmm12, 16
|
|
362
|
+
vprord zmm13, zmm13, 16
|
|
363
|
+
vprord zmm14, zmm14, 16
|
|
364
|
+
vprord zmm15, zmm15, 16
|
|
365
|
+
vpaddd zmm8, zmm8, zmm12
|
|
366
|
+
vpaddd zmm9, zmm9, zmm13
|
|
367
|
+
vpaddd zmm10, zmm10, zmm14
|
|
368
|
+
vpaddd zmm11, zmm11, zmm15
|
|
369
|
+
vpxord zmm4, zmm4, zmm8
|
|
370
|
+
vpxord zmm5, zmm5, zmm9
|
|
371
|
+
vpxord zmm6, zmm6, zmm10
|
|
372
|
+
vpxord zmm7, zmm7, zmm11
|
|
373
|
+
vprord zmm4, zmm4, 12
|
|
374
|
+
vprord zmm5, zmm5, 12
|
|
375
|
+
vprord zmm6, zmm6, 12
|
|
376
|
+
vprord zmm7, zmm7, 12
|
|
377
|
+
vpaddd zmm0, zmm0, zmm22
|
|
378
|
+
vpaddd zmm1, zmm1, zmm26
|
|
379
|
+
vpaddd zmm2, zmm2, zmm16
|
|
380
|
+
vpaddd zmm3, zmm3, zmm29
|
|
381
|
+
vpaddd zmm0, zmm0, zmm4
|
|
382
|
+
vpaddd zmm1, zmm1, zmm5
|
|
383
|
+
vpaddd zmm2, zmm2, zmm6
|
|
384
|
+
vpaddd zmm3, zmm3, zmm7
|
|
385
|
+
vpxord zmm12, zmm12, zmm0
|
|
386
|
+
vpxord zmm13, zmm13, zmm1
|
|
387
|
+
vpxord zmm14, zmm14, zmm2
|
|
388
|
+
vpxord zmm15, zmm15, zmm3
|
|
389
|
+
vprord zmm12, zmm12, 8
|
|
390
|
+
vprord zmm13, zmm13, 8
|
|
391
|
+
vprord zmm14, zmm14, 8
|
|
392
|
+
vprord zmm15, zmm15, 8
|
|
393
|
+
vpaddd zmm8, zmm8, zmm12
|
|
394
|
+
vpaddd zmm9, zmm9, zmm13
|
|
395
|
+
vpaddd zmm10, zmm10, zmm14
|
|
396
|
+
vpaddd zmm11, zmm11, zmm15
|
|
397
|
+
vpxord zmm4, zmm4, zmm8
|
|
398
|
+
vpxord zmm5, zmm5, zmm9
|
|
399
|
+
vpxord zmm6, zmm6, zmm10
|
|
400
|
+
vpxord zmm7, zmm7, zmm11
|
|
401
|
+
vprord zmm4, zmm4, 7
|
|
402
|
+
vprord zmm5, zmm5, 7
|
|
403
|
+
vprord zmm6, zmm6, 7
|
|
404
|
+
vprord zmm7, zmm7, 7
|
|
405
|
+
vpaddd zmm0, zmm0, zmm17
|
|
406
|
+
vpaddd zmm1, zmm1, zmm28
|
|
407
|
+
vpaddd zmm2, zmm2, zmm25
|
|
408
|
+
vpaddd zmm3, zmm3, zmm31
|
|
409
|
+
vpaddd zmm0, zmm0, zmm5
|
|
410
|
+
vpaddd zmm1, zmm1, zmm6
|
|
411
|
+
vpaddd zmm2, zmm2, zmm7
|
|
412
|
+
vpaddd zmm3, zmm3, zmm4
|
|
413
|
+
vpxord zmm15, zmm15, zmm0
|
|
414
|
+
vpxord zmm12, zmm12, zmm1
|
|
415
|
+
vpxord zmm13, zmm13, zmm2
|
|
416
|
+
vpxord zmm14, zmm14, zmm3
|
|
417
|
+
vprord zmm15, zmm15, 16
|
|
418
|
+
vprord zmm12, zmm12, 16
|
|
419
|
+
vprord zmm13, zmm13, 16
|
|
420
|
+
vprord zmm14, zmm14, 16
|
|
421
|
+
vpaddd zmm10, zmm10, zmm15
|
|
422
|
+
vpaddd zmm11, zmm11, zmm12
|
|
423
|
+
vpaddd zmm8, zmm8, zmm13
|
|
424
|
+
vpaddd zmm9, zmm9, zmm14
|
|
425
|
+
vpxord zmm5, zmm5, zmm10
|
|
426
|
+
vpxord zmm6, zmm6, zmm11
|
|
427
|
+
vpxord zmm7, zmm7, zmm8
|
|
428
|
+
vpxord zmm4, zmm4, zmm9
|
|
429
|
+
vprord zmm5, zmm5, 12
|
|
430
|
+
vprord zmm6, zmm6, 12
|
|
431
|
+
vprord zmm7, zmm7, 12
|
|
432
|
+
vprord zmm4, zmm4, 12
|
|
433
|
+
vpaddd zmm0, zmm0, zmm27
|
|
434
|
+
vpaddd zmm1, zmm1, zmm21
|
|
435
|
+
vpaddd zmm2, zmm2, zmm30
|
|
436
|
+
vpaddd zmm3, zmm3, zmm24
|
|
437
|
+
vpaddd zmm0, zmm0, zmm5
|
|
438
|
+
vpaddd zmm1, zmm1, zmm6
|
|
439
|
+
vpaddd zmm2, zmm2, zmm7
|
|
440
|
+
vpaddd zmm3, zmm3, zmm4
|
|
441
|
+
vpxord zmm15, zmm15, zmm0
|
|
442
|
+
vpxord zmm12, zmm12, zmm1
|
|
443
|
+
vpxord zmm13, zmm13, zmm2
|
|
444
|
+
vpxord zmm14, zmm14, zmm3
|
|
445
|
+
vprord zmm15, zmm15, 8
|
|
446
|
+
vprord zmm12, zmm12, 8
|
|
447
|
+
vprord zmm13, zmm13, 8
|
|
448
|
+
vprord zmm14, zmm14, 8
|
|
449
|
+
vpaddd zmm10, zmm10, zmm15
|
|
450
|
+
vpaddd zmm11, zmm11, zmm12
|
|
451
|
+
vpaddd zmm8, zmm8, zmm13
|
|
452
|
+
vpaddd zmm9, zmm9, zmm14
|
|
453
|
+
vpxord zmm5, zmm5, zmm10
|
|
454
|
+
vpxord zmm6, zmm6, zmm11
|
|
455
|
+
vpxord zmm7, zmm7, zmm8
|
|
456
|
+
vpxord zmm4, zmm4, zmm9
|
|
457
|
+
vprord zmm5, zmm5, 7
|
|
458
|
+
vprord zmm6, zmm6, 7
|
|
459
|
+
vprord zmm7, zmm7, 7
|
|
460
|
+
vprord zmm4, zmm4, 7
|
|
461
|
+
vpaddd zmm0, zmm0, zmm19
|
|
462
|
+
vpaddd zmm1, zmm1, zmm26
|
|
463
|
+
vpaddd zmm2, zmm2, zmm29
|
|
464
|
+
vpaddd zmm3, zmm3, zmm23
|
|
465
|
+
vpaddd zmm0, zmm0, zmm4
|
|
466
|
+
vpaddd zmm1, zmm1, zmm5
|
|
467
|
+
vpaddd zmm2, zmm2, zmm6
|
|
468
|
+
vpaddd zmm3, zmm3, zmm7
|
|
469
|
+
vpxord zmm12, zmm12, zmm0
|
|
470
|
+
vpxord zmm13, zmm13, zmm1
|
|
471
|
+
vpxord zmm14, zmm14, zmm2
|
|
472
|
+
vpxord zmm15, zmm15, zmm3
|
|
473
|
+
vprord zmm12, zmm12, 16
|
|
474
|
+
vprord zmm13, zmm13, 16
|
|
475
|
+
vprord zmm14, zmm14, 16
|
|
476
|
+
vprord zmm15, zmm15, 16
|
|
477
|
+
vpaddd zmm8, zmm8, zmm12
|
|
478
|
+
vpaddd zmm9, zmm9, zmm13
|
|
479
|
+
vpaddd zmm10, zmm10, zmm14
|
|
480
|
+
vpaddd zmm11, zmm11, zmm15
|
|
481
|
+
vpxord zmm4, zmm4, zmm8
|
|
482
|
+
vpxord zmm5, zmm5, zmm9
|
|
483
|
+
vpxord zmm6, zmm6, zmm10
|
|
484
|
+
vpxord zmm7, zmm7, zmm11
|
|
485
|
+
vprord zmm4, zmm4, 12
|
|
486
|
+
vprord zmm5, zmm5, 12
|
|
487
|
+
vprord zmm6, zmm6, 12
|
|
488
|
+
vprord zmm7, zmm7, 12
|
|
489
|
+
vpaddd zmm0, zmm0, zmm20
|
|
490
|
+
vpaddd zmm1, zmm1, zmm28
|
|
491
|
+
vpaddd zmm2, zmm2, zmm18
|
|
492
|
+
vpaddd zmm3, zmm3, zmm30
|
|
493
|
+
vpaddd zmm0, zmm0, zmm4
|
|
494
|
+
vpaddd zmm1, zmm1, zmm5
|
|
495
|
+
vpaddd zmm2, zmm2, zmm6
|
|
496
|
+
vpaddd zmm3, zmm3, zmm7
|
|
497
|
+
vpxord zmm12, zmm12, zmm0
|
|
498
|
+
vpxord zmm13, zmm13, zmm1
|
|
499
|
+
vpxord zmm14, zmm14, zmm2
|
|
500
|
+
vpxord zmm15, zmm15, zmm3
|
|
501
|
+
vprord zmm12, zmm12, 8
|
|
502
|
+
vprord zmm13, zmm13, 8
|
|
503
|
+
vprord zmm14, zmm14, 8
|
|
504
|
+
vprord zmm15, zmm15, 8
|
|
505
|
+
vpaddd zmm8, zmm8, zmm12
|
|
506
|
+
vpaddd zmm9, zmm9, zmm13
|
|
507
|
+
vpaddd zmm10, zmm10, zmm14
|
|
508
|
+
vpaddd zmm11, zmm11, zmm15
|
|
509
|
+
vpxord zmm4, zmm4, zmm8
|
|
510
|
+
vpxord zmm5, zmm5, zmm9
|
|
511
|
+
vpxord zmm6, zmm6, zmm10
|
|
512
|
+
vpxord zmm7, zmm7, zmm11
|
|
513
|
+
vprord zmm4, zmm4, 7
|
|
514
|
+
vprord zmm5, zmm5, 7
|
|
515
|
+
vprord zmm6, zmm6, 7
|
|
516
|
+
vprord zmm7, zmm7, 7
|
|
517
|
+
vpaddd zmm0, zmm0, zmm22
|
|
518
|
+
vpaddd zmm1, zmm1, zmm25
|
|
519
|
+
vpaddd zmm2, zmm2, zmm27
|
|
520
|
+
vpaddd zmm3, zmm3, zmm24
|
|
521
|
+
vpaddd zmm0, zmm0, zmm5
|
|
522
|
+
vpaddd zmm1, zmm1, zmm6
|
|
523
|
+
vpaddd zmm2, zmm2, zmm7
|
|
524
|
+
vpaddd zmm3, zmm3, zmm4
|
|
525
|
+
vpxord zmm15, zmm15, zmm0
|
|
526
|
+
vpxord zmm12, zmm12, zmm1
|
|
527
|
+
vpxord zmm13, zmm13, zmm2
|
|
528
|
+
vpxord zmm14, zmm14, zmm3
|
|
529
|
+
vprord zmm15, zmm15, 16
|
|
530
|
+
vprord zmm12, zmm12, 16
|
|
531
|
+
vprord zmm13, zmm13, 16
|
|
532
|
+
vprord zmm14, zmm14, 16
|
|
533
|
+
vpaddd zmm10, zmm10, zmm15
|
|
534
|
+
vpaddd zmm11, zmm11, zmm12
|
|
535
|
+
vpaddd zmm8, zmm8, zmm13
|
|
536
|
+
vpaddd zmm9, zmm9, zmm14
|
|
537
|
+
vpxord zmm5, zmm5, zmm10
|
|
538
|
+
vpxord zmm6, zmm6, zmm11
|
|
539
|
+
vpxord zmm7, zmm7, zmm8
|
|
540
|
+
vpxord zmm4, zmm4, zmm9
|
|
541
|
+
vprord zmm5, zmm5, 12
|
|
542
|
+
vprord zmm6, zmm6, 12
|
|
543
|
+
vprord zmm7, zmm7, 12
|
|
544
|
+
vprord zmm4, zmm4, 12
|
|
545
|
+
vpaddd zmm0, zmm0, zmm21
|
|
546
|
+
vpaddd zmm1, zmm1, zmm16
|
|
547
|
+
vpaddd zmm2, zmm2, zmm31
|
|
548
|
+
vpaddd zmm3, zmm3, zmm17
|
|
549
|
+
vpaddd zmm0, zmm0, zmm5
|
|
550
|
+
vpaddd zmm1, zmm1, zmm6
|
|
551
|
+
vpaddd zmm2, zmm2, zmm7
|
|
552
|
+
vpaddd zmm3, zmm3, zmm4
|
|
553
|
+
vpxord zmm15, zmm15, zmm0
|
|
554
|
+
vpxord zmm12, zmm12, zmm1
|
|
555
|
+
vpxord zmm13, zmm13, zmm2
|
|
556
|
+
vpxord zmm14, zmm14, zmm3
|
|
557
|
+
vprord zmm15, zmm15, 8
|
|
558
|
+
vprord zmm12, zmm12, 8
|
|
559
|
+
vprord zmm13, zmm13, 8
|
|
560
|
+
vprord zmm14, zmm14, 8
|
|
561
|
+
vpaddd zmm10, zmm10, zmm15
|
|
562
|
+
vpaddd zmm11, zmm11, zmm12
|
|
563
|
+
vpaddd zmm8, zmm8, zmm13
|
|
564
|
+
vpaddd zmm9, zmm9, zmm14
|
|
565
|
+
vpxord zmm5, zmm5, zmm10
|
|
566
|
+
vpxord zmm6, zmm6, zmm11
|
|
567
|
+
vpxord zmm7, zmm7, zmm8
|
|
568
|
+
vpxord zmm4, zmm4, zmm9
|
|
569
|
+
vprord zmm5, zmm5, 7
|
|
570
|
+
vprord zmm6, zmm6, 7
|
|
571
|
+
vprord zmm7, zmm7, 7
|
|
572
|
+
vprord zmm4, zmm4, 7
|
|
573
|
+
vpaddd zmm0, zmm0, zmm26
|
|
574
|
+
vpaddd zmm1, zmm1, zmm28
|
|
575
|
+
vpaddd zmm2, zmm2, zmm30
|
|
576
|
+
vpaddd zmm3, zmm3, zmm29
|
|
577
|
+
vpaddd zmm0, zmm0, zmm4
|
|
578
|
+
vpaddd zmm1, zmm1, zmm5
|
|
579
|
+
vpaddd zmm2, zmm2, zmm6
|
|
580
|
+
vpaddd zmm3, zmm3, zmm7
|
|
581
|
+
vpxord zmm12, zmm12, zmm0
|
|
582
|
+
vpxord zmm13, zmm13, zmm1
|
|
583
|
+
vpxord zmm14, zmm14, zmm2
|
|
584
|
+
vpxord zmm15, zmm15, zmm3
|
|
585
|
+
vprord zmm12, zmm12, 16
|
|
586
|
+
vprord zmm13, zmm13, 16
|
|
587
|
+
vprord zmm14, zmm14, 16
|
|
588
|
+
vprord zmm15, zmm15, 16
|
|
589
|
+
vpaddd zmm8, zmm8, zmm12
|
|
590
|
+
vpaddd zmm9, zmm9, zmm13
|
|
591
|
+
vpaddd zmm10, zmm10, zmm14
|
|
592
|
+
vpaddd zmm11, zmm11, zmm15
|
|
593
|
+
vpxord zmm4, zmm4, zmm8
|
|
594
|
+
vpxord zmm5, zmm5, zmm9
|
|
595
|
+
vpxord zmm6, zmm6, zmm10
|
|
596
|
+
vpxord zmm7, zmm7, zmm11
|
|
597
|
+
vprord zmm4, zmm4, 12
|
|
598
|
+
vprord zmm5, zmm5, 12
|
|
599
|
+
vprord zmm6, zmm6, 12
|
|
600
|
+
vprord zmm7, zmm7, 12
|
|
601
|
+
vpaddd zmm0, zmm0, zmm23
|
|
602
|
+
vpaddd zmm1, zmm1, zmm25
|
|
603
|
+
vpaddd zmm2, zmm2, zmm19
|
|
604
|
+
vpaddd zmm3, zmm3, zmm31
|
|
605
|
+
vpaddd zmm0, zmm0, zmm4
|
|
606
|
+
vpaddd zmm1, zmm1, zmm5
|
|
607
|
+
vpaddd zmm2, zmm2, zmm6
|
|
608
|
+
vpaddd zmm3, zmm3, zmm7
|
|
609
|
+
vpxord zmm12, zmm12, zmm0
|
|
610
|
+
vpxord zmm13, zmm13, zmm1
|
|
611
|
+
vpxord zmm14, zmm14, zmm2
|
|
612
|
+
vpxord zmm15, zmm15, zmm3
|
|
613
|
+
vprord zmm12, zmm12, 8
|
|
614
|
+
vprord zmm13, zmm13, 8
|
|
615
|
+
vprord zmm14, zmm14, 8
|
|
616
|
+
vprord zmm15, zmm15, 8
|
|
617
|
+
vpaddd zmm8, zmm8, zmm12
|
|
618
|
+
vpaddd zmm9, zmm9, zmm13
|
|
619
|
+
vpaddd zmm10, zmm10, zmm14
|
|
620
|
+
vpaddd zmm11, zmm11, zmm15
|
|
621
|
+
vpxord zmm4, zmm4, zmm8
|
|
622
|
+
vpxord zmm5, zmm5, zmm9
|
|
623
|
+
vpxord zmm6, zmm6, zmm10
|
|
624
|
+
vpxord zmm7, zmm7, zmm11
|
|
625
|
+
vprord zmm4, zmm4, 7
|
|
626
|
+
vprord zmm5, zmm5, 7
|
|
627
|
+
vprord zmm6, zmm6, 7
|
|
628
|
+
vprord zmm7, zmm7, 7
|
|
629
|
+
vpaddd zmm0, zmm0, zmm20
|
|
630
|
+
vpaddd zmm1, zmm1, zmm27
|
|
631
|
+
vpaddd zmm2, zmm2, zmm21
|
|
632
|
+
vpaddd zmm3, zmm3, zmm17
|
|
633
|
+
vpaddd zmm0, zmm0, zmm5
|
|
634
|
+
vpaddd zmm1, zmm1, zmm6
|
|
635
|
+
vpaddd zmm2, zmm2, zmm7
|
|
636
|
+
vpaddd zmm3, zmm3, zmm4
|
|
637
|
+
vpxord zmm15, zmm15, zmm0
|
|
638
|
+
vpxord zmm12, zmm12, zmm1
|
|
639
|
+
vpxord zmm13, zmm13, zmm2
|
|
640
|
+
vpxord zmm14, zmm14, zmm3
|
|
641
|
+
vprord zmm15, zmm15, 16
|
|
642
|
+
vprord zmm12, zmm12, 16
|
|
643
|
+
vprord zmm13, zmm13, 16
|
|
644
|
+
vprord zmm14, zmm14, 16
|
|
645
|
+
vpaddd zmm10, zmm10, zmm15
|
|
646
|
+
vpaddd zmm11, zmm11, zmm12
|
|
647
|
+
vpaddd zmm8, zmm8, zmm13
|
|
648
|
+
vpaddd zmm9, zmm9, zmm14
|
|
649
|
+
vpxord zmm5, zmm5, zmm10
|
|
650
|
+
vpxord zmm6, zmm6, zmm11
|
|
651
|
+
vpxord zmm7, zmm7, zmm8
|
|
652
|
+
vpxord zmm4, zmm4, zmm9
|
|
653
|
+
vprord zmm5, zmm5, 12
|
|
654
|
+
vprord zmm6, zmm6, 12
|
|
655
|
+
vprord zmm7, zmm7, 12
|
|
656
|
+
vprord zmm4, zmm4, 12
|
|
657
|
+
vpaddd zmm0, zmm0, zmm16
|
|
658
|
+
vpaddd zmm1, zmm1, zmm18
|
|
659
|
+
vpaddd zmm2, zmm2, zmm24
|
|
660
|
+
vpaddd zmm3, zmm3, zmm22
|
|
661
|
+
vpaddd zmm0, zmm0, zmm5
|
|
662
|
+
vpaddd zmm1, zmm1, zmm6
|
|
663
|
+
vpaddd zmm2, zmm2, zmm7
|
|
664
|
+
vpaddd zmm3, zmm3, zmm4
|
|
665
|
+
vpxord zmm15, zmm15, zmm0
|
|
666
|
+
vpxord zmm12, zmm12, zmm1
|
|
667
|
+
vpxord zmm13, zmm13, zmm2
|
|
668
|
+
vpxord zmm14, zmm14, zmm3
|
|
669
|
+
vprord zmm15, zmm15, 8
|
|
670
|
+
vprord zmm12, zmm12, 8
|
|
671
|
+
vprord zmm13, zmm13, 8
|
|
672
|
+
vprord zmm14, zmm14, 8
|
|
673
|
+
vpaddd zmm10, zmm10, zmm15
|
|
674
|
+
vpaddd zmm11, zmm11, zmm12
|
|
675
|
+
vpaddd zmm8, zmm8, zmm13
|
|
676
|
+
vpaddd zmm9, zmm9, zmm14
|
|
677
|
+
vpxord zmm5, zmm5, zmm10
|
|
678
|
+
vpxord zmm6, zmm6, zmm11
|
|
679
|
+
vpxord zmm7, zmm7, zmm8
|
|
680
|
+
vpxord zmm4, zmm4, zmm9
|
|
681
|
+
vprord zmm5, zmm5, 7
|
|
682
|
+
vprord zmm6, zmm6, 7
|
|
683
|
+
vprord zmm7, zmm7, 7
|
|
684
|
+
vprord zmm4, zmm4, 7
|
|
685
|
+
vpaddd zmm0, zmm0, zmm28
|
|
686
|
+
vpaddd zmm1, zmm1, zmm25
|
|
687
|
+
vpaddd zmm2, zmm2, zmm31
|
|
688
|
+
vpaddd zmm3, zmm3, zmm30
|
|
689
|
+
vpaddd zmm0, zmm0, zmm4
|
|
690
|
+
vpaddd zmm1, zmm1, zmm5
|
|
691
|
+
vpaddd zmm2, zmm2, zmm6
|
|
692
|
+
vpaddd zmm3, zmm3, zmm7
|
|
693
|
+
vpxord zmm12, zmm12, zmm0
|
|
694
|
+
vpxord zmm13, zmm13, zmm1
|
|
695
|
+
vpxord zmm14, zmm14, zmm2
|
|
696
|
+
vpxord zmm15, zmm15, zmm3
|
|
697
|
+
vprord zmm12, zmm12, 16
|
|
698
|
+
vprord zmm13, zmm13, 16
|
|
699
|
+
vprord zmm14, zmm14, 16
|
|
700
|
+
vprord zmm15, zmm15, 16
|
|
701
|
+
vpaddd zmm8, zmm8, zmm12
|
|
702
|
+
vpaddd zmm9, zmm9, zmm13
|
|
703
|
+
vpaddd zmm10, zmm10, zmm14
|
|
704
|
+
vpaddd zmm11, zmm11, zmm15
|
|
705
|
+
vpxord zmm4, zmm4, zmm8
|
|
706
|
+
vpxord zmm5, zmm5, zmm9
|
|
707
|
+
vpxord zmm6, zmm6, zmm10
|
|
708
|
+
vpxord zmm7, zmm7, zmm11
|
|
709
|
+
vprord zmm4, zmm4, 12
|
|
710
|
+
vprord zmm5, zmm5, 12
|
|
711
|
+
vprord zmm6, zmm6, 12
|
|
712
|
+
vprord zmm7, zmm7, 12
|
|
713
|
+
vpaddd zmm0, zmm0, zmm29
|
|
714
|
+
vpaddd zmm1, zmm1, zmm27
|
|
715
|
+
vpaddd zmm2, zmm2, zmm26
|
|
716
|
+
vpaddd zmm3, zmm3, zmm24
|
|
717
|
+
vpaddd zmm0, zmm0, zmm4
|
|
718
|
+
vpaddd zmm1, zmm1, zmm5
|
|
719
|
+
vpaddd zmm2, zmm2, zmm6
|
|
720
|
+
vpaddd zmm3, zmm3, zmm7
|
|
721
|
+
vpxord zmm12, zmm12, zmm0
|
|
722
|
+
vpxord zmm13, zmm13, zmm1
|
|
723
|
+
vpxord zmm14, zmm14, zmm2
|
|
724
|
+
vpxord zmm15, zmm15, zmm3
|
|
725
|
+
vprord zmm12, zmm12, 8
|
|
726
|
+
vprord zmm13, zmm13, 8
|
|
727
|
+
vprord zmm14, zmm14, 8
|
|
728
|
+
vprord zmm15, zmm15, 8
|
|
729
|
+
vpaddd zmm8, zmm8, zmm12
|
|
730
|
+
vpaddd zmm9, zmm9, zmm13
|
|
731
|
+
vpaddd zmm10, zmm10, zmm14
|
|
732
|
+
vpaddd zmm11, zmm11, zmm15
|
|
733
|
+
vpxord zmm4, zmm4, zmm8
|
|
734
|
+
vpxord zmm5, zmm5, zmm9
|
|
735
|
+
vpxord zmm6, zmm6, zmm10
|
|
736
|
+
vpxord zmm7, zmm7, zmm11
|
|
737
|
+
vprord zmm4, zmm4, 7
|
|
738
|
+
vprord zmm5, zmm5, 7
|
|
739
|
+
vprord zmm6, zmm6, 7
|
|
740
|
+
vprord zmm7, zmm7, 7
|
|
741
|
+
vpaddd zmm0, zmm0, zmm23
|
|
742
|
+
vpaddd zmm1, zmm1, zmm21
|
|
743
|
+
vpaddd zmm2, zmm2, zmm16
|
|
744
|
+
vpaddd zmm3, zmm3, zmm22
|
|
745
|
+
vpaddd zmm0, zmm0, zmm5
|
|
746
|
+
vpaddd zmm1, zmm1, zmm6
|
|
747
|
+
vpaddd zmm2, zmm2, zmm7
|
|
748
|
+
vpaddd zmm3, zmm3, zmm4
|
|
749
|
+
vpxord zmm15, zmm15, zmm0
|
|
750
|
+
vpxord zmm12, zmm12, zmm1
|
|
751
|
+
vpxord zmm13, zmm13, zmm2
|
|
752
|
+
vpxord zmm14, zmm14, zmm3
|
|
753
|
+
vprord zmm15, zmm15, 16
|
|
754
|
+
vprord zmm12, zmm12, 16
|
|
755
|
+
vprord zmm13, zmm13, 16
|
|
756
|
+
vprord zmm14, zmm14, 16
|
|
757
|
+
vpaddd zmm10, zmm10, zmm15
|
|
758
|
+
vpaddd zmm11, zmm11, zmm12
|
|
759
|
+
vpaddd zmm8, zmm8, zmm13
|
|
760
|
+
vpaddd zmm9, zmm9, zmm14
|
|
761
|
+
vpxord zmm5, zmm5, zmm10
|
|
762
|
+
vpxord zmm6, zmm6, zmm11
|
|
763
|
+
vpxord zmm7, zmm7, zmm8
|
|
764
|
+
vpxord zmm4, zmm4, zmm9
|
|
765
|
+
vprord zmm5, zmm5, 12
|
|
766
|
+
vprord zmm6, zmm6, 12
|
|
767
|
+
vprord zmm7, zmm7, 12
|
|
768
|
+
vprord zmm4, zmm4, 12
|
|
769
|
+
vpaddd zmm0, zmm0, zmm18
|
|
770
|
+
vpaddd zmm1, zmm1, zmm19
|
|
771
|
+
vpaddd zmm2, zmm2, zmm17
|
|
772
|
+
vpaddd zmm3, zmm3, zmm20
|
|
773
|
+
vpaddd zmm0, zmm0, zmm5
|
|
774
|
+
vpaddd zmm1, zmm1, zmm6
|
|
775
|
+
vpaddd zmm2, zmm2, zmm7
|
|
776
|
+
vpaddd zmm3, zmm3, zmm4
|
|
777
|
+
vpxord zmm15, zmm15, zmm0
|
|
778
|
+
vpxord zmm12, zmm12, zmm1
|
|
779
|
+
vpxord zmm13, zmm13, zmm2
|
|
780
|
+
vpxord zmm14, zmm14, zmm3
|
|
781
|
+
vprord zmm15, zmm15, 8
|
|
782
|
+
vprord zmm12, zmm12, 8
|
|
783
|
+
vprord zmm13, zmm13, 8
|
|
784
|
+
vprord zmm14, zmm14, 8
|
|
785
|
+
vpaddd zmm10, zmm10, zmm15
|
|
786
|
+
vpaddd zmm11, zmm11, zmm12
|
|
787
|
+
vpaddd zmm8, zmm8, zmm13
|
|
788
|
+
vpaddd zmm9, zmm9, zmm14
|
|
789
|
+
vpxord zmm5, zmm5, zmm10
|
|
790
|
+
vpxord zmm6, zmm6, zmm11
|
|
791
|
+
vpxord zmm7, zmm7, zmm8
|
|
792
|
+
vpxord zmm4, zmm4, zmm9
|
|
793
|
+
vprord zmm5, zmm5, 7
|
|
794
|
+
vprord zmm6, zmm6, 7
|
|
795
|
+
vprord zmm7, zmm7, 7
|
|
796
|
+
vprord zmm4, zmm4, 7
|
|
797
|
+
vpaddd zmm0, zmm0, zmm25
|
|
798
|
+
vpaddd zmm1, zmm1, zmm27
|
|
799
|
+
vpaddd zmm2, zmm2, zmm24
|
|
800
|
+
vpaddd zmm3, zmm3, zmm31
|
|
801
|
+
vpaddd zmm0, zmm0, zmm4
|
|
802
|
+
vpaddd zmm1, zmm1, zmm5
|
|
803
|
+
vpaddd zmm2, zmm2, zmm6
|
|
804
|
+
vpaddd zmm3, zmm3, zmm7
|
|
805
|
+
vpxord zmm12, zmm12, zmm0
|
|
806
|
+
vpxord zmm13, zmm13, zmm1
|
|
807
|
+
vpxord zmm14, zmm14, zmm2
|
|
808
|
+
vpxord zmm15, zmm15, zmm3
|
|
809
|
+
vprord zmm12, zmm12, 16
|
|
810
|
+
vprord zmm13, zmm13, 16
|
|
811
|
+
vprord zmm14, zmm14, 16
|
|
812
|
+
vprord zmm15, zmm15, 16
|
|
813
|
+
vpaddd zmm8, zmm8, zmm12
|
|
814
|
+
vpaddd zmm9, zmm9, zmm13
|
|
815
|
+
vpaddd zmm10, zmm10, zmm14
|
|
816
|
+
vpaddd zmm11, zmm11, zmm15
|
|
817
|
+
vpxord zmm4, zmm4, zmm8
|
|
818
|
+
vpxord zmm5, zmm5, zmm9
|
|
819
|
+
vpxord zmm6, zmm6, zmm10
|
|
820
|
+
vpxord zmm7, zmm7, zmm11
|
|
821
|
+
vprord zmm4, zmm4, 12
|
|
822
|
+
vprord zmm5, zmm5, 12
|
|
823
|
+
vprord zmm6, zmm6, 12
|
|
824
|
+
vprord zmm7, zmm7, 12
|
|
825
|
+
vpaddd zmm0, zmm0, zmm30
|
|
826
|
+
vpaddd zmm1, zmm1, zmm21
|
|
827
|
+
vpaddd zmm2, zmm2, zmm28
|
|
828
|
+
vpaddd zmm3, zmm3, zmm17
|
|
829
|
+
vpaddd zmm0, zmm0, zmm4
|
|
830
|
+
vpaddd zmm1, zmm1, zmm5
|
|
831
|
+
vpaddd zmm2, zmm2, zmm6
|
|
832
|
+
vpaddd zmm3, zmm3, zmm7
|
|
833
|
+
vpxord zmm12, zmm12, zmm0
|
|
834
|
+
vpxord zmm13, zmm13, zmm1
|
|
835
|
+
vpxord zmm14, zmm14, zmm2
|
|
836
|
+
vpxord zmm15, zmm15, zmm3
|
|
837
|
+
vprord zmm12, zmm12, 8
|
|
838
|
+
vprord zmm13, zmm13, 8
|
|
839
|
+
vprord zmm14, zmm14, 8
|
|
840
|
+
vprord zmm15, zmm15, 8
|
|
841
|
+
vpaddd zmm8, zmm8, zmm12
|
|
842
|
+
vpaddd zmm9, zmm9, zmm13
|
|
843
|
+
vpaddd zmm10, zmm10, zmm14
|
|
844
|
+
vpaddd zmm11, zmm11, zmm15
|
|
845
|
+
vpxord zmm4, zmm4, zmm8
|
|
846
|
+
vpxord zmm5, zmm5, zmm9
|
|
847
|
+
vpxord zmm6, zmm6, zmm10
|
|
848
|
+
vpxord zmm7, zmm7, zmm11
|
|
849
|
+
vprord zmm4, zmm4, 7
|
|
850
|
+
vprord zmm5, zmm5, 7
|
|
851
|
+
vprord zmm6, zmm6, 7
|
|
852
|
+
vprord zmm7, zmm7, 7
|
|
853
|
+
vpaddd zmm0, zmm0, zmm29
|
|
854
|
+
vpaddd zmm1, zmm1, zmm16
|
|
855
|
+
vpaddd zmm2, zmm2, zmm18
|
|
856
|
+
vpaddd zmm3, zmm3, zmm20
|
|
857
|
+
vpaddd zmm0, zmm0, zmm5
|
|
858
|
+
vpaddd zmm1, zmm1, zmm6
|
|
859
|
+
vpaddd zmm2, zmm2, zmm7
|
|
860
|
+
vpaddd zmm3, zmm3, zmm4
|
|
861
|
+
vpxord zmm15, zmm15, zmm0
|
|
862
|
+
vpxord zmm12, zmm12, zmm1
|
|
863
|
+
vpxord zmm13, zmm13, zmm2
|
|
864
|
+
vpxord zmm14, zmm14, zmm3
|
|
865
|
+
vprord zmm15, zmm15, 16
|
|
866
|
+
vprord zmm12, zmm12, 16
|
|
867
|
+
vprord zmm13, zmm13, 16
|
|
868
|
+
vprord zmm14, zmm14, 16
|
|
869
|
+
vpaddd zmm10, zmm10, zmm15
|
|
870
|
+
vpaddd zmm11, zmm11, zmm12
|
|
871
|
+
vpaddd zmm8, zmm8, zmm13
|
|
872
|
+
vpaddd zmm9, zmm9, zmm14
|
|
873
|
+
vpxord zmm5, zmm5, zmm10
|
|
874
|
+
vpxord zmm6, zmm6, zmm11
|
|
875
|
+
vpxord zmm7, zmm7, zmm8
|
|
876
|
+
vpxord zmm4, zmm4, zmm9
|
|
877
|
+
vprord zmm5, zmm5, 12
|
|
878
|
+
vprord zmm6, zmm6, 12
|
|
879
|
+
vprord zmm7, zmm7, 12
|
|
880
|
+
vprord zmm4, zmm4, 12
|
|
881
|
+
vpaddd zmm0, zmm0, zmm19
|
|
882
|
+
vpaddd zmm1, zmm1, zmm26
|
|
883
|
+
vpaddd zmm2, zmm2, zmm22
|
|
884
|
+
vpaddd zmm3, zmm3, zmm23
|
|
885
|
+
vpaddd zmm0, zmm0, zmm5
|
|
886
|
+
vpaddd zmm1, zmm1, zmm6
|
|
887
|
+
vpaddd zmm2, zmm2, zmm7
|
|
888
|
+
vpaddd zmm3, zmm3, zmm4
|
|
889
|
+
vpxord zmm15, zmm15, zmm0
|
|
890
|
+
vpxord zmm12, zmm12, zmm1
|
|
891
|
+
vpxord zmm13, zmm13, zmm2
|
|
892
|
+
vpxord zmm14, zmm14, zmm3
|
|
893
|
+
vprord zmm15, zmm15, 8
|
|
894
|
+
vprord zmm12, zmm12, 8
|
|
895
|
+
vprord zmm13, zmm13, 8
|
|
896
|
+
vprord zmm14, zmm14, 8
|
|
897
|
+
vpaddd zmm10, zmm10, zmm15
|
|
898
|
+
vpaddd zmm11, zmm11, zmm12
|
|
899
|
+
vpaddd zmm8, zmm8, zmm13
|
|
900
|
+
vpaddd zmm9, zmm9, zmm14
|
|
901
|
+
vpxord zmm5, zmm5, zmm10
|
|
902
|
+
vpxord zmm6, zmm6, zmm11
|
|
903
|
+
vpxord zmm7, zmm7, zmm8
|
|
904
|
+
vpxord zmm4, zmm4, zmm9
|
|
905
|
+
vprord zmm5, zmm5, 7
|
|
906
|
+
vprord zmm6, zmm6, 7
|
|
907
|
+
vprord zmm7, zmm7, 7
|
|
908
|
+
vprord zmm4, zmm4, 7
|
|
909
|
+
vpaddd zmm0, zmm0, zmm27
|
|
910
|
+
vpaddd zmm1, zmm1, zmm21
|
|
911
|
+
vpaddd zmm2, zmm2, zmm17
|
|
912
|
+
vpaddd zmm3, zmm3, zmm24
|
|
913
|
+
vpaddd zmm0, zmm0, zmm4
|
|
914
|
+
vpaddd zmm1, zmm1, zmm5
|
|
915
|
+
vpaddd zmm2, zmm2, zmm6
|
|
916
|
+
vpaddd zmm3, zmm3, zmm7
|
|
917
|
+
vpxord zmm12, zmm12, zmm0
|
|
918
|
+
vpxord zmm13, zmm13, zmm1
|
|
919
|
+
vpxord zmm14, zmm14, zmm2
|
|
920
|
+
vpxord zmm15, zmm15, zmm3
|
|
921
|
+
vprord zmm12, zmm12, 16
|
|
922
|
+
vprord zmm13, zmm13, 16
|
|
923
|
+
vprord zmm14, zmm14, 16
|
|
924
|
+
vprord zmm15, zmm15, 16
|
|
925
|
+
vpaddd zmm8, zmm8, zmm12
|
|
926
|
+
vpaddd zmm9, zmm9, zmm13
|
|
927
|
+
vpaddd zmm10, zmm10, zmm14
|
|
928
|
+
vpaddd zmm11, zmm11, zmm15
|
|
929
|
+
vpxord zmm4, zmm4, zmm8
|
|
930
|
+
vpxord zmm5, zmm5, zmm9
|
|
931
|
+
vpxord zmm6, zmm6, zmm10
|
|
932
|
+
vpxord zmm7, zmm7, zmm11
|
|
933
|
+
vprord zmm4, zmm4, 12
|
|
934
|
+
vprord zmm5, zmm5, 12
|
|
935
|
+
vprord zmm6, zmm6, 12
|
|
936
|
+
vprord zmm7, zmm7, 12
|
|
937
|
+
vpaddd zmm0, zmm0, zmm31
|
|
938
|
+
vpaddd zmm1, zmm1, zmm16
|
|
939
|
+
vpaddd zmm2, zmm2, zmm25
|
|
940
|
+
vpaddd zmm3, zmm3, zmm22
|
|
941
|
+
vpaddd zmm0, zmm0, zmm4
|
|
942
|
+
vpaddd zmm1, zmm1, zmm5
|
|
943
|
+
vpaddd zmm2, zmm2, zmm6
|
|
944
|
+
vpaddd zmm3, zmm3, zmm7
|
|
945
|
+
vpxord zmm12, zmm12, zmm0
|
|
946
|
+
vpxord zmm13, zmm13, zmm1
|
|
947
|
+
vpxord zmm14, zmm14, zmm2
|
|
948
|
+
vpxord zmm15, zmm15, zmm3
|
|
949
|
+
vprord zmm12, zmm12, 8
|
|
950
|
+
vprord zmm13, zmm13, 8
|
|
951
|
+
vprord zmm14, zmm14, 8
|
|
952
|
+
vprord zmm15, zmm15, 8
|
|
953
|
+
vpaddd zmm8, zmm8, zmm12
|
|
954
|
+
vpaddd zmm9, zmm9, zmm13
|
|
955
|
+
vpaddd zmm10, zmm10, zmm14
|
|
956
|
+
vpaddd zmm11, zmm11, zmm15
|
|
957
|
+
vpxord zmm4, zmm4, zmm8
|
|
958
|
+
vpxord zmm5, zmm5, zmm9
|
|
959
|
+
vpxord zmm6, zmm6, zmm10
|
|
960
|
+
vpxord zmm7, zmm7, zmm11
|
|
961
|
+
vprord zmm4, zmm4, 7
|
|
962
|
+
vprord zmm5, zmm5, 7
|
|
963
|
+
vprord zmm6, zmm6, 7
|
|
964
|
+
vprord zmm7, zmm7, 7
|
|
965
|
+
vpaddd zmm0, zmm0, zmm30
|
|
966
|
+
vpaddd zmm1, zmm1, zmm18
|
|
967
|
+
vpaddd zmm2, zmm2, zmm19
|
|
968
|
+
vpaddd zmm3, zmm3, zmm23
|
|
969
|
+
vpaddd zmm0, zmm0, zmm5
|
|
970
|
+
vpaddd zmm1, zmm1, zmm6
|
|
971
|
+
vpaddd zmm2, zmm2, zmm7
|
|
972
|
+
vpaddd zmm3, zmm3, zmm4
|
|
973
|
+
vpxord zmm15, zmm15, zmm0
|
|
974
|
+
vpxord zmm12, zmm12, zmm1
|
|
975
|
+
vpxord zmm13, zmm13, zmm2
|
|
976
|
+
vpxord zmm14, zmm14, zmm3
|
|
977
|
+
vprord zmm15, zmm15, 16
|
|
978
|
+
vprord zmm12, zmm12, 16
|
|
979
|
+
vprord zmm13, zmm13, 16
|
|
980
|
+
vprord zmm14, zmm14, 16
|
|
981
|
+
vpaddd zmm10, zmm10, zmm15
|
|
982
|
+
vpaddd zmm11, zmm11, zmm12
|
|
983
|
+
vpaddd zmm8, zmm8, zmm13
|
|
984
|
+
vpaddd zmm9, zmm9, zmm14
|
|
985
|
+
vpxord zmm5, zmm5, zmm10
|
|
986
|
+
vpxord zmm6, zmm6, zmm11
|
|
987
|
+
vpxord zmm7, zmm7, zmm8
|
|
988
|
+
vpxord zmm4, zmm4, zmm9
|
|
989
|
+
vprord zmm5, zmm5, 12
|
|
990
|
+
vprord zmm6, zmm6, 12
|
|
991
|
+
vprord zmm7, zmm7, 12
|
|
992
|
+
vprord zmm4, zmm4, 12
|
|
993
|
+
vpaddd zmm0, zmm0, zmm26
|
|
994
|
+
vpaddd zmm1, zmm1, zmm28
|
|
995
|
+
vpaddd zmm2, zmm2, zmm20
|
|
996
|
+
vpaddd zmm3, zmm3, zmm29
|
|
997
|
+
vpaddd zmm0, zmm0, zmm5
|
|
998
|
+
vpaddd zmm1, zmm1, zmm6
|
|
999
|
+
vpaddd zmm2, zmm2, zmm7
|
|
1000
|
+
vpaddd zmm3, zmm3, zmm4
|
|
1001
|
+
vpxord zmm15, zmm15, zmm0
|
|
1002
|
+
vpxord zmm12, zmm12, zmm1
|
|
1003
|
+
vpxord zmm13, zmm13, zmm2
|
|
1004
|
+
vpxord zmm14, zmm14, zmm3
|
|
1005
|
+
vprord zmm15, zmm15, 8
|
|
1006
|
+
vprord zmm12, zmm12, 8
|
|
1007
|
+
vprord zmm13, zmm13, 8
|
|
1008
|
+
vprord zmm14, zmm14, 8
|
|
1009
|
+
vpaddd zmm10, zmm10, zmm15
|
|
1010
|
+
vpaddd zmm11, zmm11, zmm12
|
|
1011
|
+
vpaddd zmm8, zmm8, zmm13
|
|
1012
|
+
vpaddd zmm9, zmm9, zmm14
|
|
1013
|
+
vpxord zmm5, zmm5, zmm10
|
|
1014
|
+
vpxord zmm6, zmm6, zmm11
|
|
1015
|
+
vpxord zmm7, zmm7, zmm8
|
|
1016
|
+
vpxord zmm4, zmm4, zmm9
|
|
1017
|
+
vprord zmm5, zmm5, 7
|
|
1018
|
+
vprord zmm6, zmm6, 7
|
|
1019
|
+
vprord zmm7, zmm7, 7
|
|
1020
|
+
vprord zmm4, zmm4, 7
|
|
1021
|
+
vpxord zmm0, zmm0, zmm8
|
|
1022
|
+
vpxord zmm1, zmm1, zmm9
|
|
1023
|
+
vpxord zmm2, zmm2, zmm10
|
|
1024
|
+
vpxord zmm3, zmm3, zmm11
|
|
1025
|
+
vpxord zmm4, zmm4, zmm12
|
|
1026
|
+
vpxord zmm5, zmm5, zmm13
|
|
1027
|
+
vpxord zmm6, zmm6, zmm14
|
|
1028
|
+
vpxord zmm7, zmm7, zmm15
|
|
1029
|
+
movzx eax, byte ptr [rbp+0x38]
|
|
1030
|
+
jne 9b
|
|
1031
|
+
mov rbx, qword ptr [rbp+0x50]
|
|
1032
|
+
vpunpckldq zmm16, zmm0, zmm1
|
|
1033
|
+
vpunpckhdq zmm17, zmm0, zmm1
|
|
1034
|
+
vpunpckldq zmm18, zmm2, zmm3
|
|
1035
|
+
vpunpckhdq zmm19, zmm2, zmm3
|
|
1036
|
+
vpunpckldq zmm20, zmm4, zmm5
|
|
1037
|
+
vpunpckhdq zmm21, zmm4, zmm5
|
|
1038
|
+
vpunpckldq zmm22, zmm6, zmm7
|
|
1039
|
+
vpunpckhdq zmm23, zmm6, zmm7
|
|
1040
|
+
vpunpcklqdq zmm0, zmm16, zmm18
|
|
1041
|
+
vpunpckhqdq zmm1, zmm16, zmm18
|
|
1042
|
+
vpunpcklqdq zmm2, zmm17, zmm19
|
|
1043
|
+
vpunpckhqdq zmm3, zmm17, zmm19
|
|
1044
|
+
vpunpcklqdq zmm4, zmm20, zmm22
|
|
1045
|
+
vpunpckhqdq zmm5, zmm20, zmm22
|
|
1046
|
+
vpunpcklqdq zmm6, zmm21, zmm23
|
|
1047
|
+
vpunpckhqdq zmm7, zmm21, zmm23
|
|
1048
|
+
vshufi32x4 zmm16, zmm0, zmm4, 0x88
|
|
1049
|
+
vshufi32x4 zmm17, zmm1, zmm5, 0x88
|
|
1050
|
+
vshufi32x4 zmm18, zmm2, zmm6, 0x88
|
|
1051
|
+
vshufi32x4 zmm19, zmm3, zmm7, 0x88
|
|
1052
|
+
vshufi32x4 zmm20, zmm0, zmm4, 0xDD
|
|
1053
|
+
vshufi32x4 zmm21, zmm1, zmm5, 0xDD
|
|
1054
|
+
vshufi32x4 zmm22, zmm2, zmm6, 0xDD
|
|
1055
|
+
vshufi32x4 zmm23, zmm3, zmm7, 0xDD
|
|
1056
|
+
vshufi32x4 zmm0, zmm16, zmm17, 0x88
|
|
1057
|
+
vshufi32x4 zmm1, zmm18, zmm19, 0x88
|
|
1058
|
+
vshufi32x4 zmm2, zmm20, zmm21, 0x88
|
|
1059
|
+
vshufi32x4 zmm3, zmm22, zmm23, 0x88
|
|
1060
|
+
vshufi32x4 zmm4, zmm16, zmm17, 0xDD
|
|
1061
|
+
vshufi32x4 zmm5, zmm18, zmm19, 0xDD
|
|
1062
|
+
vshufi32x4 zmm6, zmm20, zmm21, 0xDD
|
|
1063
|
+
vshufi32x4 zmm7, zmm22, zmm23, 0xDD
|
|
1064
|
+
vmovdqu32 zmmword ptr [rbx], zmm0
|
|
1065
|
+
vmovdqu32 zmmword ptr [rbx+0x1*0x40], zmm1
|
|
1066
|
+
vmovdqu32 zmmword ptr [rbx+0x2*0x40], zmm2
|
|
1067
|
+
vmovdqu32 zmmword ptr [rbx+0x3*0x40], zmm3
|
|
1068
|
+
vmovdqu32 zmmword ptr [rbx+0x4*0x40], zmm4
|
|
1069
|
+
vmovdqu32 zmmword ptr [rbx+0x5*0x40], zmm5
|
|
1070
|
+
vmovdqu32 zmmword ptr [rbx+0x6*0x40], zmm6
|
|
1071
|
+
vmovdqu32 zmmword ptr [rbx+0x7*0x40], zmm7
|
|
1072
|
+
vmovdqa32 zmm0, zmmword ptr [rsp]
|
|
1073
|
+
vmovdqa32 zmm1, zmmword ptr [rsp+0x1*0x40]
|
|
1074
|
+
vmovdqa32 zmm2, zmm0
|
|
1075
|
+
vpaddd zmm2{k1}, zmm0, dword ptr [ADD16+rip] {1to16}
|
|
1076
|
+
vpcmpltud k2, zmm2, zmm0
|
|
1077
|
+
vpaddd zmm1 {k2}, zmm1, dword ptr [ADD1+rip] {1to16}
|
|
1078
|
+
vmovdqa32 zmmword ptr [rsp], zmm2
|
|
1079
|
+
vmovdqa32 zmmword ptr [rsp+0x1*0x40], zmm1
|
|
1080
|
+
add rdi, 128
|
|
1081
|
+
add rbx, 512
|
|
1082
|
+
mov qword ptr [rbp+0x50], rbx
|
|
1083
|
+
sub rsi, 16
|
|
1084
|
+
cmp rsi, 16
|
|
1085
|
+
jnc 2b
|
|
1086
|
+
test rsi, rsi
|
|
1087
|
+
jnz 3f
|
|
1088
|
+
4:
|
|
1089
|
+
vzeroupper
|
|
1090
|
+
mov rsp, rbp
|
|
1091
|
+
pop rbp
|
|
1092
|
+
pop rbx
|
|
1093
|
+
pop r12
|
|
1094
|
+
pop r13
|
|
1095
|
+
pop r14
|
|
1096
|
+
pop r15
|
|
1097
|
+
ret
|
|
1098
|
+
.p2align 6
|
|
1099
|
+
3:
|
|
1100
|
+
test esi, 0x8
|
|
1101
|
+
je 3f
|
|
1102
|
+
vpbroadcastd ymm0, dword ptr [rcx]
|
|
1103
|
+
vpbroadcastd ymm1, dword ptr [rcx+0x4]
|
|
1104
|
+
vpbroadcastd ymm2, dword ptr [rcx+0x8]
|
|
1105
|
+
vpbroadcastd ymm3, dword ptr [rcx+0xC]
|
|
1106
|
+
vpbroadcastd ymm4, dword ptr [rcx+0x10]
|
|
1107
|
+
vpbroadcastd ymm5, dword ptr [rcx+0x14]
|
|
1108
|
+
vpbroadcastd ymm6, dword ptr [rcx+0x18]
|
|
1109
|
+
vpbroadcastd ymm7, dword ptr [rcx+0x1C]
|
|
1110
|
+
mov r8, qword ptr [rdi]
|
|
1111
|
+
mov r9, qword ptr [rdi+0x8]
|
|
1112
|
+
mov r10, qword ptr [rdi+0x10]
|
|
1113
|
+
mov r11, qword ptr [rdi+0x18]
|
|
1114
|
+
mov r12, qword ptr [rdi+0x20]
|
|
1115
|
+
mov r13, qword ptr [rdi+0x28]
|
|
1116
|
+
mov r14, qword ptr [rdi+0x30]
|
|
1117
|
+
mov r15, qword ptr [rdi+0x38]
|
|
1118
|
+
movzx eax, byte ptr [rbp+0x38]
|
|
1119
|
+
movzx ebx, byte ptr [rbp+0x40]
|
|
1120
|
+
or eax, ebx
|
|
1121
|
+
xor edx, edx
|
|
1122
|
+
2:
|
|
1123
|
+
movzx ebx, byte ptr [rbp+0x48]
|
|
1124
|
+
or ebx, eax
|
|
1125
|
+
add rdx, 64
|
|
1126
|
+
cmp rdx, qword ptr [rsp+0x80]
|
|
1127
|
+
cmove eax, ebx
|
|
1128
|
+
mov dword ptr [rsp+0x88], eax
|
|
1129
|
+
vmovups xmm8, xmmword ptr [r8+rdx-0x40]
|
|
1130
|
+
vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x40], 0x01
|
|
1131
|
+
vmovups xmm9, xmmword ptr [r9+rdx-0x40]
|
|
1132
|
+
vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x40], 0x01
|
|
1133
|
+
vunpcklpd ymm12, ymm8, ymm9
|
|
1134
|
+
vunpckhpd ymm13, ymm8, ymm9
|
|
1135
|
+
vmovups xmm10, xmmword ptr [r10+rdx-0x40]
|
|
1136
|
+
vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x40], 0x01
|
|
1137
|
+
vmovups xmm11, xmmword ptr [r11+rdx-0x40]
|
|
1138
|
+
vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x40], 0x01
|
|
1139
|
+
vunpcklpd ymm14, ymm10, ymm11
|
|
1140
|
+
vunpckhpd ymm15, ymm10, ymm11
|
|
1141
|
+
vshufps ymm16, ymm12, ymm14, 136
|
|
1142
|
+
vshufps ymm17, ymm12, ymm14, 221
|
|
1143
|
+
vshufps ymm18, ymm13, ymm15, 136
|
|
1144
|
+
vshufps ymm19, ymm13, ymm15, 221
|
|
1145
|
+
vmovups xmm8, xmmword ptr [r8+rdx-0x30]
|
|
1146
|
+
vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x30], 0x01
|
|
1147
|
+
vmovups xmm9, xmmword ptr [r9+rdx-0x30]
|
|
1148
|
+
vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x30], 0x01
|
|
1149
|
+
vunpcklpd ymm12, ymm8, ymm9
|
|
1150
|
+
vunpckhpd ymm13, ymm8, ymm9
|
|
1151
|
+
vmovups xmm10, xmmword ptr [r10+rdx-0x30]
|
|
1152
|
+
vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x30], 0x01
|
|
1153
|
+
vmovups xmm11, xmmword ptr [r11+rdx-0x30]
|
|
1154
|
+
vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x30], 0x01
|
|
1155
|
+
vunpcklpd ymm14, ymm10, ymm11
|
|
1156
|
+
vunpckhpd ymm15, ymm10, ymm11
|
|
1157
|
+
vshufps ymm20, ymm12, ymm14, 136
|
|
1158
|
+
vshufps ymm21, ymm12, ymm14, 221
|
|
1159
|
+
vshufps ymm22, ymm13, ymm15, 136
|
|
1160
|
+
vshufps ymm23, ymm13, ymm15, 221
|
|
1161
|
+
vmovups xmm8, xmmword ptr [r8+rdx-0x20]
|
|
1162
|
+
vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x20], 0x01
|
|
1163
|
+
vmovups xmm9, xmmword ptr [r9+rdx-0x20]
|
|
1164
|
+
vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x20], 0x01
|
|
1165
|
+
vunpcklpd ymm12, ymm8, ymm9
|
|
1166
|
+
vunpckhpd ymm13, ymm8, ymm9
|
|
1167
|
+
vmovups xmm10, xmmword ptr [r10+rdx-0x20]
|
|
1168
|
+
vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x20], 0x01
|
|
1169
|
+
vmovups xmm11, xmmword ptr [r11+rdx-0x20]
|
|
1170
|
+
vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x20], 0x01
|
|
1171
|
+
vunpcklpd ymm14, ymm10, ymm11
|
|
1172
|
+
vunpckhpd ymm15, ymm10, ymm11
|
|
1173
|
+
vshufps ymm24, ymm12, ymm14, 136
|
|
1174
|
+
vshufps ymm25, ymm12, ymm14, 221
|
|
1175
|
+
vshufps ymm26, ymm13, ymm15, 136
|
|
1176
|
+
vshufps ymm27, ymm13, ymm15, 221
|
|
1177
|
+
vmovups xmm8, xmmword ptr [r8+rdx-0x10]
|
|
1178
|
+
vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x10], 0x01
|
|
1179
|
+
vmovups xmm9, xmmword ptr [r9+rdx-0x10]
|
|
1180
|
+
vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x10], 0x01
|
|
1181
|
+
vunpcklpd ymm12, ymm8, ymm9
|
|
1182
|
+
vunpckhpd ymm13, ymm8, ymm9
|
|
1183
|
+
vmovups xmm10, xmmword ptr [r10+rdx-0x10]
|
|
1184
|
+
vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x10], 0x01
|
|
1185
|
+
vmovups xmm11, xmmword ptr [r11+rdx-0x10]
|
|
1186
|
+
vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x10], 0x01
|
|
1187
|
+
vunpcklpd ymm14, ymm10, ymm11
|
|
1188
|
+
vunpckhpd ymm15, ymm10, ymm11
|
|
1189
|
+
vshufps ymm28, ymm12, ymm14, 136
|
|
1190
|
+
vshufps ymm29, ymm12, ymm14, 221
|
|
1191
|
+
vshufps ymm30, ymm13, ymm15, 136
|
|
1192
|
+
vshufps ymm31, ymm13, ymm15, 221
|
|
1193
|
+
vpbroadcastd ymm8, dword ptr [BLAKE3_IV_0+rip]
|
|
1194
|
+
vpbroadcastd ymm9, dword ptr [BLAKE3_IV_1+rip]
|
|
1195
|
+
vpbroadcastd ymm10, dword ptr [BLAKE3_IV_2+rip]
|
|
1196
|
+
vpbroadcastd ymm11, dword ptr [BLAKE3_IV_3+rip]
|
|
1197
|
+
vmovdqa ymm12, ymmword ptr [rsp]
|
|
1198
|
+
vmovdqa ymm13, ymmword ptr [rsp+0x40]
|
|
1199
|
+
vpbroadcastd ymm14, dword ptr [BLAKE3_BLOCK_LEN+rip]
|
|
1200
|
+
vpbroadcastd ymm15, dword ptr [rsp+0x88]
|
|
1201
|
+
vpaddd ymm0, ymm0, ymm16
|
|
1202
|
+
vpaddd ymm1, ymm1, ymm18
|
|
1203
|
+
vpaddd ymm2, ymm2, ymm20
|
|
1204
|
+
vpaddd ymm3, ymm3, ymm22
|
|
1205
|
+
vpaddd ymm0, ymm0, ymm4
|
|
1206
|
+
vpaddd ymm1, ymm1, ymm5
|
|
1207
|
+
vpaddd ymm2, ymm2, ymm6
|
|
1208
|
+
vpaddd ymm3, ymm3, ymm7
|
|
1209
|
+
vpxord ymm12, ymm12, ymm0
|
|
1210
|
+
vpxord ymm13, ymm13, ymm1
|
|
1211
|
+
vpxord ymm14, ymm14, ymm2
|
|
1212
|
+
vpxord ymm15, ymm15, ymm3
|
|
1213
|
+
vprord ymm12, ymm12, 16
|
|
1214
|
+
vprord ymm13, ymm13, 16
|
|
1215
|
+
vprord ymm14, ymm14, 16
|
|
1216
|
+
vprord ymm15, ymm15, 16
|
|
1217
|
+
vpaddd ymm8, ymm8, ymm12
|
|
1218
|
+
vpaddd ymm9, ymm9, ymm13
|
|
1219
|
+
vpaddd ymm10, ymm10, ymm14
|
|
1220
|
+
vpaddd ymm11, ymm11, ymm15
|
|
1221
|
+
vpxord ymm4, ymm4, ymm8
|
|
1222
|
+
vpxord ymm5, ymm5, ymm9
|
|
1223
|
+
vpxord ymm6, ymm6, ymm10
|
|
1224
|
+
vpxord ymm7, ymm7, ymm11
|
|
1225
|
+
vprord ymm4, ymm4, 12
|
|
1226
|
+
vprord ymm5, ymm5, 12
|
|
1227
|
+
vprord ymm6, ymm6, 12
|
|
1228
|
+
vprord ymm7, ymm7, 12
|
|
1229
|
+
vpaddd ymm0, ymm0, ymm17
|
|
1230
|
+
vpaddd ymm1, ymm1, ymm19
|
|
1231
|
+
vpaddd ymm2, ymm2, ymm21
|
|
1232
|
+
vpaddd ymm3, ymm3, ymm23
|
|
1233
|
+
vpaddd ymm0, ymm0, ymm4
|
|
1234
|
+
vpaddd ymm1, ymm1, ymm5
|
|
1235
|
+
vpaddd ymm2, ymm2, ymm6
|
|
1236
|
+
vpaddd ymm3, ymm3, ymm7
|
|
1237
|
+
vpxord ymm12, ymm12, ymm0
|
|
1238
|
+
vpxord ymm13, ymm13, ymm1
|
|
1239
|
+
vpxord ymm14, ymm14, ymm2
|
|
1240
|
+
vpxord ymm15, ymm15, ymm3
|
|
1241
|
+
vprord ymm12, ymm12, 8
|
|
1242
|
+
vprord ymm13, ymm13, 8
|
|
1243
|
+
vprord ymm14, ymm14, 8
|
|
1244
|
+
vprord ymm15, ymm15, 8
|
|
1245
|
+
vpaddd ymm8, ymm8, ymm12
|
|
1246
|
+
vpaddd ymm9, ymm9, ymm13
|
|
1247
|
+
vpaddd ymm10, ymm10, ymm14
|
|
1248
|
+
vpaddd ymm11, ymm11, ymm15
|
|
1249
|
+
vpxord ymm4, ymm4, ymm8
|
|
1250
|
+
vpxord ymm5, ymm5, ymm9
|
|
1251
|
+
vpxord ymm6, ymm6, ymm10
|
|
1252
|
+
vpxord ymm7, ymm7, ymm11
|
|
1253
|
+
vprord ymm4, ymm4, 7
|
|
1254
|
+
vprord ymm5, ymm5, 7
|
|
1255
|
+
vprord ymm6, ymm6, 7
|
|
1256
|
+
vprord ymm7, ymm7, 7
|
|
1257
|
+
vpaddd ymm0, ymm0, ymm24
|
|
1258
|
+
vpaddd ymm1, ymm1, ymm26
|
|
1259
|
+
vpaddd ymm2, ymm2, ymm28
|
|
1260
|
+
vpaddd ymm3, ymm3, ymm30
|
|
1261
|
+
vpaddd ymm0, ymm0, ymm5
|
|
1262
|
+
vpaddd ymm1, ymm1, ymm6
|
|
1263
|
+
vpaddd ymm2, ymm2, ymm7
|
|
1264
|
+
vpaddd ymm3, ymm3, ymm4
|
|
1265
|
+
vpxord ymm15, ymm15, ymm0
|
|
1266
|
+
vpxord ymm12, ymm12, ymm1
|
|
1267
|
+
vpxord ymm13, ymm13, ymm2
|
|
1268
|
+
vpxord ymm14, ymm14, ymm3
|
|
1269
|
+
vprord ymm15, ymm15, 16
|
|
1270
|
+
vprord ymm12, ymm12, 16
|
|
1271
|
+
vprord ymm13, ymm13, 16
|
|
1272
|
+
vprord ymm14, ymm14, 16
|
|
1273
|
+
vpaddd ymm10, ymm10, ymm15
|
|
1274
|
+
vpaddd ymm11, ymm11, ymm12
|
|
1275
|
+
vpaddd ymm8, ymm8, ymm13
|
|
1276
|
+
vpaddd ymm9, ymm9, ymm14
|
|
1277
|
+
vpxord ymm5, ymm5, ymm10
|
|
1278
|
+
vpxord ymm6, ymm6, ymm11
|
|
1279
|
+
vpxord ymm7, ymm7, ymm8
|
|
1280
|
+
vpxord ymm4, ymm4, ymm9
|
|
1281
|
+
vprord ymm5, ymm5, 12
|
|
1282
|
+
vprord ymm6, ymm6, 12
|
|
1283
|
+
vprord ymm7, ymm7, 12
|
|
1284
|
+
vprord ymm4, ymm4, 12
|
|
1285
|
+
vpaddd ymm0, ymm0, ymm25
|
|
1286
|
+
vpaddd ymm1, ymm1, ymm27
|
|
1287
|
+
vpaddd ymm2, ymm2, ymm29
|
|
1288
|
+
vpaddd ymm3, ymm3, ymm31
|
|
1289
|
+
vpaddd ymm0, ymm0, ymm5
|
|
1290
|
+
vpaddd ymm1, ymm1, ymm6
|
|
1291
|
+
vpaddd ymm2, ymm2, ymm7
|
|
1292
|
+
vpaddd ymm3, ymm3, ymm4
|
|
1293
|
+
vpxord ymm15, ymm15, ymm0
|
|
1294
|
+
vpxord ymm12, ymm12, ymm1
|
|
1295
|
+
vpxord ymm13, ymm13, ymm2
|
|
1296
|
+
vpxord ymm14, ymm14, ymm3
|
|
1297
|
+
vprord ymm15, ymm15, 8
|
|
1298
|
+
vprord ymm12, ymm12, 8
|
|
1299
|
+
vprord ymm13, ymm13, 8
|
|
1300
|
+
vprord ymm14, ymm14, 8
|
|
1301
|
+
vpaddd ymm10, ymm10, ymm15
|
|
1302
|
+
vpaddd ymm11, ymm11, ymm12
|
|
1303
|
+
vpaddd ymm8, ymm8, ymm13
|
|
1304
|
+
vpaddd ymm9, ymm9, ymm14
|
|
1305
|
+
vpxord ymm5, ymm5, ymm10
|
|
1306
|
+
vpxord ymm6, ymm6, ymm11
|
|
1307
|
+
vpxord ymm7, ymm7, ymm8
|
|
1308
|
+
vpxord ymm4, ymm4, ymm9
|
|
1309
|
+
vprord ymm5, ymm5, 7
|
|
1310
|
+
vprord ymm6, ymm6, 7
|
|
1311
|
+
vprord ymm7, ymm7, 7
|
|
1312
|
+
vprord ymm4, ymm4, 7
|
|
1313
|
+
vpaddd ymm0, ymm0, ymm18
|
|
1314
|
+
vpaddd ymm1, ymm1, ymm19
|
|
1315
|
+
vpaddd ymm2, ymm2, ymm23
|
|
1316
|
+
vpaddd ymm3, ymm3, ymm20
|
|
1317
|
+
vpaddd ymm0, ymm0, ymm4
|
|
1318
|
+
vpaddd ymm1, ymm1, ymm5
|
|
1319
|
+
vpaddd ymm2, ymm2, ymm6
|
|
1320
|
+
vpaddd ymm3, ymm3, ymm7
|
|
1321
|
+
vpxord ymm12, ymm12, ymm0
|
|
1322
|
+
vpxord ymm13, ymm13, ymm1
|
|
1323
|
+
vpxord ymm14, ymm14, ymm2
|
|
1324
|
+
vpxord ymm15, ymm15, ymm3
|
|
1325
|
+
vprord ymm12, ymm12, 16
|
|
1326
|
+
vprord ymm13, ymm13, 16
|
|
1327
|
+
vprord ymm14, ymm14, 16
|
|
1328
|
+
vprord ymm15, ymm15, 16
|
|
1329
|
+
vpaddd ymm8, ymm8, ymm12
|
|
1330
|
+
vpaddd ymm9, ymm9, ymm13
|
|
1331
|
+
vpaddd ymm10, ymm10, ymm14
|
|
1332
|
+
vpaddd ymm11, ymm11, ymm15
|
|
1333
|
+
vpxord ymm4, ymm4, ymm8
|
|
1334
|
+
vpxord ymm5, ymm5, ymm9
|
|
1335
|
+
vpxord ymm6, ymm6, ymm10
|
|
1336
|
+
vpxord ymm7, ymm7, ymm11
|
|
1337
|
+
vprord ymm4, ymm4, 12
|
|
1338
|
+
vprord ymm5, ymm5, 12
|
|
1339
|
+
vprord ymm6, ymm6, 12
|
|
1340
|
+
vprord ymm7, ymm7, 12
|
|
1341
|
+
vpaddd ymm0, ymm0, ymm22
|
|
1342
|
+
vpaddd ymm1, ymm1, ymm26
|
|
1343
|
+
vpaddd ymm2, ymm2, ymm16
|
|
1344
|
+
vpaddd ymm3, ymm3, ymm29
|
|
1345
|
+
vpaddd ymm0, ymm0, ymm4
|
|
1346
|
+
vpaddd ymm1, ymm1, ymm5
|
|
1347
|
+
vpaddd ymm2, ymm2, ymm6
|
|
1348
|
+
vpaddd ymm3, ymm3, ymm7
|
|
1349
|
+
vpxord ymm12, ymm12, ymm0
|
|
1350
|
+
vpxord ymm13, ymm13, ymm1
|
|
1351
|
+
vpxord ymm14, ymm14, ymm2
|
|
1352
|
+
vpxord ymm15, ymm15, ymm3
|
|
1353
|
+
vprord ymm12, ymm12, 8
|
|
1354
|
+
vprord ymm13, ymm13, 8
|
|
1355
|
+
vprord ymm14, ymm14, 8
|
|
1356
|
+
vprord ymm15, ymm15, 8
|
|
1357
|
+
vpaddd ymm8, ymm8, ymm12
|
|
1358
|
+
vpaddd ymm9, ymm9, ymm13
|
|
1359
|
+
vpaddd ymm10, ymm10, ymm14
|
|
1360
|
+
vpaddd ymm11, ymm11, ymm15
|
|
1361
|
+
vpxord ymm4, ymm4, ymm8
|
|
1362
|
+
vpxord ymm5, ymm5, ymm9
|
|
1363
|
+
vpxord ymm6, ymm6, ymm10
|
|
1364
|
+
vpxord ymm7, ymm7, ymm11
|
|
1365
|
+
vprord ymm4, ymm4, 7
|
|
1366
|
+
vprord ymm5, ymm5, 7
|
|
1367
|
+
vprord ymm6, ymm6, 7
|
|
1368
|
+
vprord ymm7, ymm7, 7
|
|
1369
|
+
vpaddd ymm0, ymm0, ymm17
|
|
1370
|
+
vpaddd ymm1, ymm1, ymm28
|
|
1371
|
+
vpaddd ymm2, ymm2, ymm25
|
|
1372
|
+
vpaddd ymm3, ymm3, ymm31
|
|
1373
|
+
vpaddd ymm0, ymm0, ymm5
|
|
1374
|
+
vpaddd ymm1, ymm1, ymm6
|
|
1375
|
+
vpaddd ymm2, ymm2, ymm7
|
|
1376
|
+
vpaddd ymm3, ymm3, ymm4
|
|
1377
|
+
vpxord ymm15, ymm15, ymm0
|
|
1378
|
+
vpxord ymm12, ymm12, ymm1
|
|
1379
|
+
vpxord ymm13, ymm13, ymm2
|
|
1380
|
+
vpxord ymm14, ymm14, ymm3
|
|
1381
|
+
vprord ymm15, ymm15, 16
|
|
1382
|
+
vprord ymm12, ymm12, 16
|
|
1383
|
+
vprord ymm13, ymm13, 16
|
|
1384
|
+
vprord ymm14, ymm14, 16
|
|
1385
|
+
vpaddd ymm10, ymm10, ymm15
|
|
1386
|
+
vpaddd ymm11, ymm11, ymm12
|
|
1387
|
+
vpaddd ymm8, ymm8, ymm13
|
|
1388
|
+
vpaddd ymm9, ymm9, ymm14
|
|
1389
|
+
vpxord ymm5, ymm5, ymm10
|
|
1390
|
+
vpxord ymm6, ymm6, ymm11
|
|
1391
|
+
vpxord ymm7, ymm7, ymm8
|
|
1392
|
+
vpxord ymm4, ymm4, ymm9
|
|
1393
|
+
vprord ymm5, ymm5, 12
|
|
1394
|
+
vprord ymm6, ymm6, 12
|
|
1395
|
+
vprord ymm7, ymm7, 12
|
|
1396
|
+
vprord ymm4, ymm4, 12
|
|
1397
|
+
vpaddd ymm0, ymm0, ymm27
|
|
1398
|
+
vpaddd ymm1, ymm1, ymm21
|
|
1399
|
+
vpaddd ymm2, ymm2, ymm30
|
|
1400
|
+
vpaddd ymm3, ymm3, ymm24
|
|
1401
|
+
vpaddd ymm0, ymm0, ymm5
|
|
1402
|
+
vpaddd ymm1, ymm1, ymm6
|
|
1403
|
+
vpaddd ymm2, ymm2, ymm7
|
|
1404
|
+
vpaddd ymm3, ymm3, ymm4
|
|
1405
|
+
vpxord ymm15, ymm15, ymm0
|
|
1406
|
+
vpxord ymm12, ymm12, ymm1
|
|
1407
|
+
vpxord ymm13, ymm13, ymm2
|
|
1408
|
+
vpxord ymm14, ymm14, ymm3
|
|
1409
|
+
vprord ymm15, ymm15, 8
|
|
1410
|
+
vprord ymm12, ymm12, 8
|
|
1411
|
+
vprord ymm13, ymm13, 8
|
|
1412
|
+
vprord ymm14, ymm14, 8
|
|
1413
|
+
vpaddd ymm10, ymm10, ymm15
|
|
1414
|
+
vpaddd ymm11, ymm11, ymm12
|
|
1415
|
+
vpaddd ymm8, ymm8, ymm13
|
|
1416
|
+
vpaddd ymm9, ymm9, ymm14
|
|
1417
|
+
vpxord ymm5, ymm5, ymm10
|
|
1418
|
+
vpxord ymm6, ymm6, ymm11
|
|
1419
|
+
vpxord ymm7, ymm7, ymm8
|
|
1420
|
+
vpxord ymm4, ymm4, ymm9
|
|
1421
|
+
vprord ymm5, ymm5, 7
|
|
1422
|
+
vprord ymm6, ymm6, 7
|
|
1423
|
+
vprord ymm7, ymm7, 7
|
|
1424
|
+
vprord ymm4, ymm4, 7
|
|
1425
|
+
vpaddd ymm0, ymm0, ymm19
|
|
1426
|
+
vpaddd ymm1, ymm1, ymm26
|
|
1427
|
+
vpaddd ymm2, ymm2, ymm29
|
|
1428
|
+
vpaddd ymm3, ymm3, ymm23
|
|
1429
|
+
vpaddd ymm0, ymm0, ymm4
|
|
1430
|
+
vpaddd ymm1, ymm1, ymm5
|
|
1431
|
+
vpaddd ymm2, ymm2, ymm6
|
|
1432
|
+
vpaddd ymm3, ymm3, ymm7
|
|
1433
|
+
vpxord ymm12, ymm12, ymm0
|
|
1434
|
+
vpxord ymm13, ymm13, ymm1
|
|
1435
|
+
vpxord ymm14, ymm14, ymm2
|
|
1436
|
+
vpxord ymm15, ymm15, ymm3
|
|
1437
|
+
vprord ymm12, ymm12, 16
|
|
1438
|
+
vprord ymm13, ymm13, 16
|
|
1439
|
+
vprord ymm14, ymm14, 16
|
|
1440
|
+
vprord ymm15, ymm15, 16
|
|
1441
|
+
vpaddd ymm8, ymm8, ymm12
|
|
1442
|
+
vpaddd ymm9, ymm9, ymm13
|
|
1443
|
+
vpaddd ymm10, ymm10, ymm14
|
|
1444
|
+
vpaddd ymm11, ymm11, ymm15
|
|
1445
|
+
vpxord ymm4, ymm4, ymm8
|
|
1446
|
+
vpxord ymm5, ymm5, ymm9
|
|
1447
|
+
vpxord ymm6, ymm6, ymm10
|
|
1448
|
+
vpxord ymm7, ymm7, ymm11
|
|
1449
|
+
vprord ymm4, ymm4, 12
|
|
1450
|
+
vprord ymm5, ymm5, 12
|
|
1451
|
+
vprord ymm6, ymm6, 12
|
|
1452
|
+
vprord ymm7, ymm7, 12
|
|
1453
|
+
vpaddd ymm0, ymm0, ymm20
|
|
1454
|
+
vpaddd ymm1, ymm1, ymm28
|
|
1455
|
+
vpaddd ymm2, ymm2, ymm18
|
|
1456
|
+
vpaddd ymm3, ymm3, ymm30
|
|
1457
|
+
vpaddd ymm0, ymm0, ymm4
|
|
1458
|
+
vpaddd ymm1, ymm1, ymm5
|
|
1459
|
+
vpaddd ymm2, ymm2, ymm6
|
|
1460
|
+
vpaddd ymm3, ymm3, ymm7
|
|
1461
|
+
vpxord ymm12, ymm12, ymm0
|
|
1462
|
+
vpxord ymm13, ymm13, ymm1
|
|
1463
|
+
vpxord ymm14, ymm14, ymm2
|
|
1464
|
+
vpxord ymm15, ymm15, ymm3
|
|
1465
|
+
vprord ymm12, ymm12, 8
|
|
1466
|
+
vprord ymm13, ymm13, 8
|
|
1467
|
+
vprord ymm14, ymm14, 8
|
|
1468
|
+
vprord ymm15, ymm15, 8
|
|
1469
|
+
vpaddd ymm8, ymm8, ymm12
|
|
1470
|
+
vpaddd ymm9, ymm9, ymm13
|
|
1471
|
+
vpaddd ymm10, ymm10, ymm14
|
|
1472
|
+
vpaddd ymm11, ymm11, ymm15
|
|
1473
|
+
vpxord ymm4, ymm4, ymm8
|
|
1474
|
+
vpxord ymm5, ymm5, ymm9
|
|
1475
|
+
vpxord ymm6, ymm6, ymm10
|
|
1476
|
+
vpxord ymm7, ymm7, ymm11
|
|
1477
|
+
vprord ymm4, ymm4, 7
|
|
1478
|
+
vprord ymm5, ymm5, 7
|
|
1479
|
+
vprord ymm6, ymm6, 7
|
|
1480
|
+
vprord ymm7, ymm7, 7
|
|
1481
|
+
vpaddd ymm0, ymm0, ymm22
|
|
1482
|
+
vpaddd ymm1, ymm1, ymm25
|
|
1483
|
+
vpaddd ymm2, ymm2, ymm27
|
|
1484
|
+
vpaddd ymm3, ymm3, ymm24
|
|
1485
|
+
vpaddd ymm0, ymm0, ymm5
|
|
1486
|
+
vpaddd ymm1, ymm1, ymm6
|
|
1487
|
+
vpaddd ymm2, ymm2, ymm7
|
|
1488
|
+
vpaddd ymm3, ymm3, ymm4
|
|
1489
|
+
vpxord ymm15, ymm15, ymm0
|
|
1490
|
+
vpxord ymm12, ymm12, ymm1
|
|
1491
|
+
vpxord ymm13, ymm13, ymm2
|
|
1492
|
+
vpxord ymm14, ymm14, ymm3
|
|
1493
|
+
vprord ymm15, ymm15, 16
|
|
1494
|
+
vprord ymm12, ymm12, 16
|
|
1495
|
+
vprord ymm13, ymm13, 16
|
|
1496
|
+
vprord ymm14, ymm14, 16
|
|
1497
|
+
vpaddd ymm10, ymm10, ymm15
|
|
1498
|
+
vpaddd ymm11, ymm11, ymm12
|
|
1499
|
+
vpaddd ymm8, ymm8, ymm13
|
|
1500
|
+
vpaddd ymm9, ymm9, ymm14
|
|
1501
|
+
vpxord ymm5, ymm5, ymm10
|
|
1502
|
+
vpxord ymm6, ymm6, ymm11
|
|
1503
|
+
vpxord ymm7, ymm7, ymm8
|
|
1504
|
+
vpxord ymm4, ymm4, ymm9
|
|
1505
|
+
vprord ymm5, ymm5, 12
|
|
1506
|
+
vprord ymm6, ymm6, 12
|
|
1507
|
+
vprord ymm7, ymm7, 12
|
|
1508
|
+
vprord ymm4, ymm4, 12
|
|
1509
|
+
vpaddd ymm0, ymm0, ymm21
|
|
1510
|
+
vpaddd ymm1, ymm1, ymm16
|
|
1511
|
+
vpaddd ymm2, ymm2, ymm31
|
|
1512
|
+
vpaddd ymm3, ymm3, ymm17
|
|
1513
|
+
vpaddd ymm0, ymm0, ymm5
|
|
1514
|
+
vpaddd ymm1, ymm1, ymm6
|
|
1515
|
+
vpaddd ymm2, ymm2, ymm7
|
|
1516
|
+
vpaddd ymm3, ymm3, ymm4
|
|
1517
|
+
vpxord ymm15, ymm15, ymm0
|
|
1518
|
+
vpxord ymm12, ymm12, ymm1
|
|
1519
|
+
vpxord ymm13, ymm13, ymm2
|
|
1520
|
+
vpxord ymm14, ymm14, ymm3
|
|
1521
|
+
vprord ymm15, ymm15, 8
|
|
1522
|
+
vprord ymm12, ymm12, 8
|
|
1523
|
+
vprord ymm13, ymm13, 8
|
|
1524
|
+
vprord ymm14, ymm14, 8
|
|
1525
|
+
vpaddd ymm10, ymm10, ymm15
|
|
1526
|
+
vpaddd ymm11, ymm11, ymm12
|
|
1527
|
+
vpaddd ymm8, ymm8, ymm13
|
|
1528
|
+
vpaddd ymm9, ymm9, ymm14
|
|
1529
|
+
vpxord ymm5, ymm5, ymm10
|
|
1530
|
+
vpxord ymm6, ymm6, ymm11
|
|
1531
|
+
vpxord ymm7, ymm7, ymm8
|
|
1532
|
+
vpxord ymm4, ymm4, ymm9
|
|
1533
|
+
vprord ymm5, ymm5, 7
|
|
1534
|
+
vprord ymm6, ymm6, 7
|
|
1535
|
+
vprord ymm7, ymm7, 7
|
|
1536
|
+
vprord ymm4, ymm4, 7
|
|
1537
|
+
vpaddd ymm0, ymm0, ymm26
|
|
1538
|
+
vpaddd ymm1, ymm1, ymm28
|
|
1539
|
+
vpaddd ymm2, ymm2, ymm30
|
|
1540
|
+
vpaddd ymm3, ymm3, ymm29
|
|
1541
|
+
vpaddd ymm0, ymm0, ymm4
|
|
1542
|
+
vpaddd ymm1, ymm1, ymm5
|
|
1543
|
+
vpaddd ymm2, ymm2, ymm6
|
|
1544
|
+
vpaddd ymm3, ymm3, ymm7
|
|
1545
|
+
vpxord ymm12, ymm12, ymm0
|
|
1546
|
+
vpxord ymm13, ymm13, ymm1
|
|
1547
|
+
vpxord ymm14, ymm14, ymm2
|
|
1548
|
+
vpxord ymm15, ymm15, ymm3
|
|
1549
|
+
vprord ymm12, ymm12, 16
|
|
1550
|
+
vprord ymm13, ymm13, 16
|
|
1551
|
+
vprord ymm14, ymm14, 16
|
|
1552
|
+
vprord ymm15, ymm15, 16
|
|
1553
|
+
vpaddd ymm8, ymm8, ymm12
|
|
1554
|
+
vpaddd ymm9, ymm9, ymm13
|
|
1555
|
+
vpaddd ymm10, ymm10, ymm14
|
|
1556
|
+
vpaddd ymm11, ymm11, ymm15
|
|
1557
|
+
vpxord ymm4, ymm4, ymm8
|
|
1558
|
+
vpxord ymm5, ymm5, ymm9
|
|
1559
|
+
vpxord ymm6, ymm6, ymm10
|
|
1560
|
+
vpxord ymm7, ymm7, ymm11
|
|
1561
|
+
vprord ymm4, ymm4, 12
|
|
1562
|
+
vprord ymm5, ymm5, 12
|
|
1563
|
+
vprord ymm6, ymm6, 12
|
|
1564
|
+
vprord ymm7, ymm7, 12
|
|
1565
|
+
vpaddd ymm0, ymm0, ymm23
|
|
1566
|
+
vpaddd ymm1, ymm1, ymm25
|
|
1567
|
+
vpaddd ymm2, ymm2, ymm19
|
|
1568
|
+
vpaddd ymm3, ymm3, ymm31
|
|
1569
|
+
vpaddd ymm0, ymm0, ymm4
|
|
1570
|
+
vpaddd ymm1, ymm1, ymm5
|
|
1571
|
+
vpaddd ymm2, ymm2, ymm6
|
|
1572
|
+
vpaddd ymm3, ymm3, ymm7
|
|
1573
|
+
vpxord ymm12, ymm12, ymm0
|
|
1574
|
+
vpxord ymm13, ymm13, ymm1
|
|
1575
|
+
vpxord ymm14, ymm14, ymm2
|
|
1576
|
+
vpxord ymm15, ymm15, ymm3
|
|
1577
|
+
vprord ymm12, ymm12, 8
|
|
1578
|
+
vprord ymm13, ymm13, 8
|
|
1579
|
+
vprord ymm14, ymm14, 8
|
|
1580
|
+
vprord ymm15, ymm15, 8
|
|
1581
|
+
vpaddd ymm8, ymm8, ymm12
|
|
1582
|
+
vpaddd ymm9, ymm9, ymm13
|
|
1583
|
+
vpaddd ymm10, ymm10, ymm14
|
|
1584
|
+
vpaddd ymm11, ymm11, ymm15
|
|
1585
|
+
vpxord ymm4, ymm4, ymm8
|
|
1586
|
+
vpxord ymm5, ymm5, ymm9
|
|
1587
|
+
vpxord ymm6, ymm6, ymm10
|
|
1588
|
+
vpxord ymm7, ymm7, ymm11
|
|
1589
|
+
vprord ymm4, ymm4, 7
|
|
1590
|
+
vprord ymm5, ymm5, 7
|
|
1591
|
+
vprord ymm6, ymm6, 7
|
|
1592
|
+
vprord ymm7, ymm7, 7
|
|
1593
|
+
vpaddd ymm0, ymm0, ymm20
|
|
1594
|
+
vpaddd ymm1, ymm1, ymm27
|
|
1595
|
+
vpaddd ymm2, ymm2, ymm21
|
|
1596
|
+
vpaddd ymm3, ymm3, ymm17
|
|
1597
|
+
vpaddd ymm0, ymm0, ymm5
|
|
1598
|
+
vpaddd ymm1, ymm1, ymm6
|
|
1599
|
+
vpaddd ymm2, ymm2, ymm7
|
|
1600
|
+
vpaddd ymm3, ymm3, ymm4
|
|
1601
|
+
vpxord ymm15, ymm15, ymm0
|
|
1602
|
+
vpxord ymm12, ymm12, ymm1
|
|
1603
|
+
vpxord ymm13, ymm13, ymm2
|
|
1604
|
+
vpxord ymm14, ymm14, ymm3
|
|
1605
|
+
vprord ymm15, ymm15, 16
|
|
1606
|
+
vprord ymm12, ymm12, 16
|
|
1607
|
+
vprord ymm13, ymm13, 16
|
|
1608
|
+
vprord ymm14, ymm14, 16
|
|
1609
|
+
vpaddd ymm10, ymm10, ymm15
|
|
1610
|
+
vpaddd ymm11, ymm11, ymm12
|
|
1611
|
+
vpaddd ymm8, ymm8, ymm13
|
|
1612
|
+
vpaddd ymm9, ymm9, ymm14
|
|
1613
|
+
vpxord ymm5, ymm5, ymm10
|
|
1614
|
+
vpxord ymm6, ymm6, ymm11
|
|
1615
|
+
vpxord ymm7, ymm7, ymm8
|
|
1616
|
+
vpxord ymm4, ymm4, ymm9
|
|
1617
|
+
vprord ymm5, ymm5, 12
|
|
1618
|
+
vprord ymm6, ymm6, 12
|
|
1619
|
+
vprord ymm7, ymm7, 12
|
|
1620
|
+
vprord ymm4, ymm4, 12
|
|
1621
|
+
vpaddd ymm0, ymm0, ymm16
|
|
1622
|
+
vpaddd ymm1, ymm1, ymm18
|
|
1623
|
+
vpaddd ymm2, ymm2, ymm24
|
|
1624
|
+
vpaddd ymm3, ymm3, ymm22
|
|
1625
|
+
vpaddd ymm0, ymm0, ymm5
|
|
1626
|
+
vpaddd ymm1, ymm1, ymm6
|
|
1627
|
+
vpaddd ymm2, ymm2, ymm7
|
|
1628
|
+
vpaddd ymm3, ymm3, ymm4
|
|
1629
|
+
vpxord ymm15, ymm15, ymm0
|
|
1630
|
+
vpxord ymm12, ymm12, ymm1
|
|
1631
|
+
vpxord ymm13, ymm13, ymm2
|
|
1632
|
+
vpxord ymm14, ymm14, ymm3
|
|
1633
|
+
vprord ymm15, ymm15, 8
|
|
1634
|
+
vprord ymm12, ymm12, 8
|
|
1635
|
+
vprord ymm13, ymm13, 8
|
|
1636
|
+
vprord ymm14, ymm14, 8
|
|
1637
|
+
vpaddd ymm10, ymm10, ymm15
|
|
1638
|
+
vpaddd ymm11, ymm11, ymm12
|
|
1639
|
+
vpaddd ymm8, ymm8, ymm13
|
|
1640
|
+
vpaddd ymm9, ymm9, ymm14
|
|
1641
|
+
vpxord ymm5, ymm5, ymm10
|
|
1642
|
+
vpxord ymm6, ymm6, ymm11
|
|
1643
|
+
vpxord ymm7, ymm7, ymm8
|
|
1644
|
+
vpxord ymm4, ymm4, ymm9
|
|
1645
|
+
vprord ymm5, ymm5, 7
|
|
1646
|
+
vprord ymm6, ymm6, 7
|
|
1647
|
+
vprord ymm7, ymm7, 7
|
|
1648
|
+
vprord ymm4, ymm4, 7
|
|
1649
|
+
vpaddd ymm0, ymm0, ymm28
|
|
1650
|
+
vpaddd ymm1, ymm1, ymm25
|
|
1651
|
+
vpaddd ymm2, ymm2, ymm31
|
|
1652
|
+
vpaddd ymm3, ymm3, ymm30
|
|
1653
|
+
vpaddd ymm0, ymm0, ymm4
|
|
1654
|
+
vpaddd ymm1, ymm1, ymm5
|
|
1655
|
+
vpaddd ymm2, ymm2, ymm6
|
|
1656
|
+
vpaddd ymm3, ymm3, ymm7
|
|
1657
|
+
vpxord ymm12, ymm12, ymm0
|
|
1658
|
+
vpxord ymm13, ymm13, ymm1
|
|
1659
|
+
vpxord ymm14, ymm14, ymm2
|
|
1660
|
+
vpxord ymm15, ymm15, ymm3
|
|
1661
|
+
vprord ymm12, ymm12, 16
|
|
1662
|
+
vprord ymm13, ymm13, 16
|
|
1663
|
+
vprord ymm14, ymm14, 16
|
|
1664
|
+
vprord ymm15, ymm15, 16
|
|
1665
|
+
vpaddd ymm8, ymm8, ymm12
|
|
1666
|
+
vpaddd ymm9, ymm9, ymm13
|
|
1667
|
+
vpaddd ymm10, ymm10, ymm14
|
|
1668
|
+
vpaddd ymm11, ymm11, ymm15
|
|
1669
|
+
vpxord ymm4, ymm4, ymm8
|
|
1670
|
+
vpxord ymm5, ymm5, ymm9
|
|
1671
|
+
vpxord ymm6, ymm6, ymm10
|
|
1672
|
+
vpxord ymm7, ymm7, ymm11
|
|
1673
|
+
vprord ymm4, ymm4, 12
|
|
1674
|
+
vprord ymm5, ymm5, 12
|
|
1675
|
+
vprord ymm6, ymm6, 12
|
|
1676
|
+
vprord ymm7, ymm7, 12
|
|
1677
|
+
vpaddd ymm0, ymm0, ymm29
|
|
1678
|
+
vpaddd ymm1, ymm1, ymm27
|
|
1679
|
+
vpaddd ymm2, ymm2, ymm26
|
|
1680
|
+
vpaddd ymm3, ymm3, ymm24
|
|
1681
|
+
vpaddd ymm0, ymm0, ymm4
|
|
1682
|
+
vpaddd ymm1, ymm1, ymm5
|
|
1683
|
+
vpaddd ymm2, ymm2, ymm6
|
|
1684
|
+
vpaddd ymm3, ymm3, ymm7
|
|
1685
|
+
vpxord ymm12, ymm12, ymm0
|
|
1686
|
+
vpxord ymm13, ymm13, ymm1
|
|
1687
|
+
vpxord ymm14, ymm14, ymm2
|
|
1688
|
+
vpxord ymm15, ymm15, ymm3
|
|
1689
|
+
vprord ymm12, ymm12, 8
|
|
1690
|
+
vprord ymm13, ymm13, 8
|
|
1691
|
+
vprord ymm14, ymm14, 8
|
|
1692
|
+
vprord ymm15, ymm15, 8
|
|
1693
|
+
vpaddd ymm8, ymm8, ymm12
|
|
1694
|
+
vpaddd ymm9, ymm9, ymm13
|
|
1695
|
+
vpaddd ymm10, ymm10, ymm14
|
|
1696
|
+
vpaddd ymm11, ymm11, ymm15
|
|
1697
|
+
vpxord ymm4, ymm4, ymm8
|
|
1698
|
+
vpxord ymm5, ymm5, ymm9
|
|
1699
|
+
vpxord ymm6, ymm6, ymm10
|
|
1700
|
+
vpxord ymm7, ymm7, ymm11
|
|
1701
|
+
vprord ymm4, ymm4, 7
|
|
1702
|
+
vprord ymm5, ymm5, 7
|
|
1703
|
+
vprord ymm6, ymm6, 7
|
|
1704
|
+
vprord ymm7, ymm7, 7
|
|
1705
|
+
vpaddd ymm0, ymm0, ymm23
|
|
1706
|
+
vpaddd ymm1, ymm1, ymm21
|
|
1707
|
+
vpaddd ymm2, ymm2, ymm16
|
|
1708
|
+
vpaddd ymm3, ymm3, ymm22
|
|
1709
|
+
vpaddd ymm0, ymm0, ymm5
|
|
1710
|
+
vpaddd ymm1, ymm1, ymm6
|
|
1711
|
+
vpaddd ymm2, ymm2, ymm7
|
|
1712
|
+
vpaddd ymm3, ymm3, ymm4
|
|
1713
|
+
vpxord ymm15, ymm15, ymm0
|
|
1714
|
+
vpxord ymm12, ymm12, ymm1
|
|
1715
|
+
vpxord ymm13, ymm13, ymm2
|
|
1716
|
+
vpxord ymm14, ymm14, ymm3
|
|
1717
|
+
vprord ymm15, ymm15, 16
|
|
1718
|
+
vprord ymm12, ymm12, 16
|
|
1719
|
+
vprord ymm13, ymm13, 16
|
|
1720
|
+
vprord ymm14, ymm14, 16
|
|
1721
|
+
vpaddd ymm10, ymm10, ymm15
|
|
1722
|
+
vpaddd ymm11, ymm11, ymm12
|
|
1723
|
+
vpaddd ymm8, ymm8, ymm13
|
|
1724
|
+
vpaddd ymm9, ymm9, ymm14
|
|
1725
|
+
vpxord ymm5, ymm5, ymm10
|
|
1726
|
+
vpxord ymm6, ymm6, ymm11
|
|
1727
|
+
vpxord ymm7, ymm7, ymm8
|
|
1728
|
+
vpxord ymm4, ymm4, ymm9
|
|
1729
|
+
vprord ymm5, ymm5, 12
|
|
1730
|
+
vprord ymm6, ymm6, 12
|
|
1731
|
+
vprord ymm7, ymm7, 12
|
|
1732
|
+
vprord ymm4, ymm4, 12
|
|
1733
|
+
vpaddd ymm0, ymm0, ymm18
|
|
1734
|
+
vpaddd ymm1, ymm1, ymm19
|
|
1735
|
+
vpaddd ymm2, ymm2, ymm17
|
|
1736
|
+
vpaddd ymm3, ymm3, ymm20
|
|
1737
|
+
vpaddd ymm0, ymm0, ymm5
|
|
1738
|
+
vpaddd ymm1, ymm1, ymm6
|
|
1739
|
+
vpaddd ymm2, ymm2, ymm7
|
|
1740
|
+
vpaddd ymm3, ymm3, ymm4
|
|
1741
|
+
vpxord ymm15, ymm15, ymm0
|
|
1742
|
+
vpxord ymm12, ymm12, ymm1
|
|
1743
|
+
vpxord ymm13, ymm13, ymm2
|
|
1744
|
+
vpxord ymm14, ymm14, ymm3
|
|
1745
|
+
vprord ymm15, ymm15, 8
|
|
1746
|
+
vprord ymm12, ymm12, 8
|
|
1747
|
+
vprord ymm13, ymm13, 8
|
|
1748
|
+
vprord ymm14, ymm14, 8
|
|
1749
|
+
vpaddd ymm10, ymm10, ymm15
|
|
1750
|
+
vpaddd ymm11, ymm11, ymm12
|
|
1751
|
+
vpaddd ymm8, ymm8, ymm13
|
|
1752
|
+
vpaddd ymm9, ymm9, ymm14
|
|
1753
|
+
vpxord ymm5, ymm5, ymm10
|
|
1754
|
+
vpxord ymm6, ymm6, ymm11
|
|
1755
|
+
vpxord ymm7, ymm7, ymm8
|
|
1756
|
+
vpxord ymm4, ymm4, ymm9
|
|
1757
|
+
vprord ymm5, ymm5, 7
|
|
1758
|
+
vprord ymm6, ymm6, 7
|
|
1759
|
+
vprord ymm7, ymm7, 7
|
|
1760
|
+
vprord ymm4, ymm4, 7
|
|
1761
|
+
vpaddd ymm0, ymm0, ymm25
|
|
1762
|
+
vpaddd ymm1, ymm1, ymm27
|
|
1763
|
+
vpaddd ymm2, ymm2, ymm24
|
|
1764
|
+
vpaddd ymm3, ymm3, ymm31
|
|
1765
|
+
vpaddd ymm0, ymm0, ymm4
|
|
1766
|
+
vpaddd ymm1, ymm1, ymm5
|
|
1767
|
+
vpaddd ymm2, ymm2, ymm6
|
|
1768
|
+
vpaddd ymm3, ymm3, ymm7
|
|
1769
|
+
vpxord ymm12, ymm12, ymm0
|
|
1770
|
+
vpxord ymm13, ymm13, ymm1
|
|
1771
|
+
vpxord ymm14, ymm14, ymm2
|
|
1772
|
+
vpxord ymm15, ymm15, ymm3
|
|
1773
|
+
vprord ymm12, ymm12, 16
|
|
1774
|
+
vprord ymm13, ymm13, 16
|
|
1775
|
+
vprord ymm14, ymm14, 16
|
|
1776
|
+
vprord ymm15, ymm15, 16
|
|
1777
|
+
vpaddd ymm8, ymm8, ymm12
|
|
1778
|
+
vpaddd ymm9, ymm9, ymm13
|
|
1779
|
+
vpaddd ymm10, ymm10, ymm14
|
|
1780
|
+
vpaddd ymm11, ymm11, ymm15
|
|
1781
|
+
vpxord ymm4, ymm4, ymm8
|
|
1782
|
+
vpxord ymm5, ymm5, ymm9
|
|
1783
|
+
vpxord ymm6, ymm6, ymm10
|
|
1784
|
+
vpxord ymm7, ymm7, ymm11
|
|
1785
|
+
vprord ymm4, ymm4, 12
|
|
1786
|
+
vprord ymm5, ymm5, 12
|
|
1787
|
+
vprord ymm6, ymm6, 12
|
|
1788
|
+
vprord ymm7, ymm7, 12
|
|
1789
|
+
vpaddd ymm0, ymm0, ymm30
|
|
1790
|
+
vpaddd ymm1, ymm1, ymm21
|
|
1791
|
+
vpaddd ymm2, ymm2, ymm28
|
|
1792
|
+
vpaddd ymm3, ymm3, ymm17
|
|
1793
|
+
vpaddd ymm0, ymm0, ymm4
|
|
1794
|
+
vpaddd ymm1, ymm1, ymm5
|
|
1795
|
+
vpaddd ymm2, ymm2, ymm6
|
|
1796
|
+
vpaddd ymm3, ymm3, ymm7
|
|
1797
|
+
vpxord ymm12, ymm12, ymm0
|
|
1798
|
+
vpxord ymm13, ymm13, ymm1
|
|
1799
|
+
vpxord ymm14, ymm14, ymm2
|
|
1800
|
+
vpxord ymm15, ymm15, ymm3
|
|
1801
|
+
vprord ymm12, ymm12, 8
|
|
1802
|
+
vprord ymm13, ymm13, 8
|
|
1803
|
+
vprord ymm14, ymm14, 8
|
|
1804
|
+
vprord ymm15, ymm15, 8
|
|
1805
|
+
vpaddd ymm8, ymm8, ymm12
|
|
1806
|
+
vpaddd ymm9, ymm9, ymm13
|
|
1807
|
+
vpaddd ymm10, ymm10, ymm14
|
|
1808
|
+
vpaddd ymm11, ymm11, ymm15
|
|
1809
|
+
vpxord ymm4, ymm4, ymm8
|
|
1810
|
+
vpxord ymm5, ymm5, ymm9
|
|
1811
|
+
vpxord ymm6, ymm6, ymm10
|
|
1812
|
+
vpxord ymm7, ymm7, ymm11
|
|
1813
|
+
vprord ymm4, ymm4, 7
|
|
1814
|
+
vprord ymm5, ymm5, 7
|
|
1815
|
+
vprord ymm6, ymm6, 7
|
|
1816
|
+
vprord ymm7, ymm7, 7
|
|
1817
|
+
vpaddd ymm0, ymm0, ymm29
|
|
1818
|
+
vpaddd ymm1, ymm1, ymm16
|
|
1819
|
+
vpaddd ymm2, ymm2, ymm18
|
|
1820
|
+
vpaddd ymm3, ymm3, ymm20
|
|
1821
|
+
vpaddd ymm0, ymm0, ymm5
|
|
1822
|
+
vpaddd ymm1, ymm1, ymm6
|
|
1823
|
+
vpaddd ymm2, ymm2, ymm7
|
|
1824
|
+
vpaddd ymm3, ymm3, ymm4
|
|
1825
|
+
vpxord ymm15, ymm15, ymm0
|
|
1826
|
+
vpxord ymm12, ymm12, ymm1
|
|
1827
|
+
vpxord ymm13, ymm13, ymm2
|
|
1828
|
+
vpxord ymm14, ymm14, ymm3
|
|
1829
|
+
vprord ymm15, ymm15, 16
|
|
1830
|
+
vprord ymm12, ymm12, 16
|
|
1831
|
+
vprord ymm13, ymm13, 16
|
|
1832
|
+
vprord ymm14, ymm14, 16
|
|
1833
|
+
vpaddd ymm10, ymm10, ymm15
|
|
1834
|
+
vpaddd ymm11, ymm11, ymm12
|
|
1835
|
+
vpaddd ymm8, ymm8, ymm13
|
|
1836
|
+
vpaddd ymm9, ymm9, ymm14
|
|
1837
|
+
vpxord ymm5, ymm5, ymm10
|
|
1838
|
+
vpxord ymm6, ymm6, ymm11
|
|
1839
|
+
vpxord ymm7, ymm7, ymm8
|
|
1840
|
+
vpxord ymm4, ymm4, ymm9
|
|
1841
|
+
vprord ymm5, ymm5, 12
|
|
1842
|
+
vprord ymm6, ymm6, 12
|
|
1843
|
+
vprord ymm7, ymm7, 12
|
|
1844
|
+
vprord ymm4, ymm4, 12
|
|
1845
|
+
vpaddd ymm0, ymm0, ymm19
|
|
1846
|
+
vpaddd ymm1, ymm1, ymm26
|
|
1847
|
+
vpaddd ymm2, ymm2, ymm22
|
|
1848
|
+
vpaddd ymm3, ymm3, ymm23
|
|
1849
|
+
vpaddd ymm0, ymm0, ymm5
|
|
1850
|
+
vpaddd ymm1, ymm1, ymm6
|
|
1851
|
+
vpaddd ymm2, ymm2, ymm7
|
|
1852
|
+
vpaddd ymm3, ymm3, ymm4
|
|
1853
|
+
vpxord ymm15, ymm15, ymm0
|
|
1854
|
+
vpxord ymm12, ymm12, ymm1
|
|
1855
|
+
vpxord ymm13, ymm13, ymm2
|
|
1856
|
+
vpxord ymm14, ymm14, ymm3
|
|
1857
|
+
vprord ymm15, ymm15, 8
|
|
1858
|
+
vprord ymm12, ymm12, 8
|
|
1859
|
+
vprord ymm13, ymm13, 8
|
|
1860
|
+
vprord ymm14, ymm14, 8
|
|
1861
|
+
vpaddd ymm10, ymm10, ymm15
|
|
1862
|
+
vpaddd ymm11, ymm11, ymm12
|
|
1863
|
+
vpaddd ymm8, ymm8, ymm13
|
|
1864
|
+
vpaddd ymm9, ymm9, ymm14
|
|
1865
|
+
vpxord ymm5, ymm5, ymm10
|
|
1866
|
+
vpxord ymm6, ymm6, ymm11
|
|
1867
|
+
vpxord ymm7, ymm7, ymm8
|
|
1868
|
+
vpxord ymm4, ymm4, ymm9
|
|
1869
|
+
vprord ymm5, ymm5, 7
|
|
1870
|
+
vprord ymm6, ymm6, 7
|
|
1871
|
+
vprord ymm7, ymm7, 7
|
|
1872
|
+
vprord ymm4, ymm4, 7
|
|
1873
|
+
vpaddd ymm0, ymm0, ymm27
|
|
1874
|
+
vpaddd ymm1, ymm1, ymm21
|
|
1875
|
+
vpaddd ymm2, ymm2, ymm17
|
|
1876
|
+
vpaddd ymm3, ymm3, ymm24
|
|
1877
|
+
vpaddd ymm0, ymm0, ymm4
|
|
1878
|
+
vpaddd ymm1, ymm1, ymm5
|
|
1879
|
+
vpaddd ymm2, ymm2, ymm6
|
|
1880
|
+
vpaddd ymm3, ymm3, ymm7
|
|
1881
|
+
vpxord ymm12, ymm12, ymm0
|
|
1882
|
+
vpxord ymm13, ymm13, ymm1
|
|
1883
|
+
vpxord ymm14, ymm14, ymm2
|
|
1884
|
+
vpxord ymm15, ymm15, ymm3
|
|
1885
|
+
vprord ymm12, ymm12, 16
|
|
1886
|
+
vprord ymm13, ymm13, 16
|
|
1887
|
+
vprord ymm14, ymm14, 16
|
|
1888
|
+
vprord ymm15, ymm15, 16
|
|
1889
|
+
vpaddd ymm8, ymm8, ymm12
|
|
1890
|
+
vpaddd ymm9, ymm9, ymm13
|
|
1891
|
+
vpaddd ymm10, ymm10, ymm14
|
|
1892
|
+
vpaddd ymm11, ymm11, ymm15
|
|
1893
|
+
vpxord ymm4, ymm4, ymm8
|
|
1894
|
+
vpxord ymm5, ymm5, ymm9
|
|
1895
|
+
vpxord ymm6, ymm6, ymm10
|
|
1896
|
+
vpxord ymm7, ymm7, ymm11
|
|
1897
|
+
vprord ymm4, ymm4, 12
|
|
1898
|
+
vprord ymm5, ymm5, 12
|
|
1899
|
+
vprord ymm6, ymm6, 12
|
|
1900
|
+
vprord ymm7, ymm7, 12
|
|
1901
|
+
vpaddd ymm0, ymm0, ymm31
|
|
1902
|
+
vpaddd ymm1, ymm1, ymm16
|
|
1903
|
+
vpaddd ymm2, ymm2, ymm25
|
|
1904
|
+
vpaddd ymm3, ymm3, ymm22
|
|
1905
|
+
vpaddd ymm0, ymm0, ymm4
|
|
1906
|
+
vpaddd ymm1, ymm1, ymm5
|
|
1907
|
+
vpaddd ymm2, ymm2, ymm6
|
|
1908
|
+
vpaddd ymm3, ymm3, ymm7
|
|
1909
|
+
vpxord ymm12, ymm12, ymm0
|
|
1910
|
+
vpxord ymm13, ymm13, ymm1
|
|
1911
|
+
vpxord ymm14, ymm14, ymm2
|
|
1912
|
+
vpxord ymm15, ymm15, ymm3
|
|
1913
|
+
vprord ymm12, ymm12, 8
|
|
1914
|
+
vprord ymm13, ymm13, 8
|
|
1915
|
+
vprord ymm14, ymm14, 8
|
|
1916
|
+
vprord ymm15, ymm15, 8
|
|
1917
|
+
vpaddd ymm8, ymm8, ymm12
|
|
1918
|
+
vpaddd ymm9, ymm9, ymm13
|
|
1919
|
+
vpaddd ymm10, ymm10, ymm14
|
|
1920
|
+
vpaddd ymm11, ymm11, ymm15
|
|
1921
|
+
vpxord ymm4, ymm4, ymm8
|
|
1922
|
+
vpxord ymm5, ymm5, ymm9
|
|
1923
|
+
vpxord ymm6, ymm6, ymm10
|
|
1924
|
+
vpxord ymm7, ymm7, ymm11
|
|
1925
|
+
vprord ymm4, ymm4, 7
|
|
1926
|
+
vprord ymm5, ymm5, 7
|
|
1927
|
+
vprord ymm6, ymm6, 7
|
|
1928
|
+
vprord ymm7, ymm7, 7
|
|
1929
|
+
vpaddd ymm0, ymm0, ymm30
|
|
1930
|
+
vpaddd ymm1, ymm1, ymm18
|
|
1931
|
+
vpaddd ymm2, ymm2, ymm19
|
|
1932
|
+
vpaddd ymm3, ymm3, ymm23
|
|
1933
|
+
vpaddd ymm0, ymm0, ymm5
|
|
1934
|
+
vpaddd ymm1, ymm1, ymm6
|
|
1935
|
+
vpaddd ymm2, ymm2, ymm7
|
|
1936
|
+
vpaddd ymm3, ymm3, ymm4
|
|
1937
|
+
vpxord ymm15, ymm15, ymm0
|
|
1938
|
+
vpxord ymm12, ymm12, ymm1
|
|
1939
|
+
vpxord ymm13, ymm13, ymm2
|
|
1940
|
+
vpxord ymm14, ymm14, ymm3
|
|
1941
|
+
vprord ymm15, ymm15, 16
|
|
1942
|
+
vprord ymm12, ymm12, 16
|
|
1943
|
+
vprord ymm13, ymm13, 16
|
|
1944
|
+
vprord ymm14, ymm14, 16
|
|
1945
|
+
vpaddd ymm10, ymm10, ymm15
|
|
1946
|
+
vpaddd ymm11, ymm11, ymm12
|
|
1947
|
+
vpaddd ymm8, ymm8, ymm13
|
|
1948
|
+
vpaddd ymm9, ymm9, ymm14
|
|
1949
|
+
vpxord ymm5, ymm5, ymm10
|
|
1950
|
+
vpxord ymm6, ymm6, ymm11
|
|
1951
|
+
vpxord ymm7, ymm7, ymm8
|
|
1952
|
+
vpxord ymm4, ymm4, ymm9
|
|
1953
|
+
vprord ymm5, ymm5, 12
|
|
1954
|
+
vprord ymm6, ymm6, 12
|
|
1955
|
+
vprord ymm7, ymm7, 12
|
|
1956
|
+
vprord ymm4, ymm4, 12
|
|
1957
|
+
vpaddd ymm0, ymm0, ymm26
|
|
1958
|
+
vpaddd ymm1, ymm1, ymm28
|
|
1959
|
+
vpaddd ymm2, ymm2, ymm20
|
|
1960
|
+
vpaddd ymm3, ymm3, ymm29
|
|
1961
|
+
vpaddd ymm0, ymm0, ymm5
|
|
1962
|
+
vpaddd ymm1, ymm1, ymm6
|
|
1963
|
+
vpaddd ymm2, ymm2, ymm7
|
|
1964
|
+
vpaddd ymm3, ymm3, ymm4
|
|
1965
|
+
vpxord ymm15, ymm15, ymm0
|
|
1966
|
+
vpxord ymm12, ymm12, ymm1
|
|
1967
|
+
vpxord ymm13, ymm13, ymm2
|
|
1968
|
+
vpxord ymm14, ymm14, ymm3
|
|
1969
|
+
vprord ymm15, ymm15, 8
|
|
1970
|
+
vprord ymm12, ymm12, 8
|
|
1971
|
+
vprord ymm13, ymm13, 8
|
|
1972
|
+
vprord ymm14, ymm14, 8
|
|
1973
|
+
vpaddd ymm10, ymm10, ymm15
|
|
1974
|
+
vpaddd ymm11, ymm11, ymm12
|
|
1975
|
+
vpaddd ymm8, ymm8, ymm13
|
|
1976
|
+
vpaddd ymm9, ymm9, ymm14
|
|
1977
|
+
vpxord ymm5, ymm5, ymm10
|
|
1978
|
+
vpxord ymm6, ymm6, ymm11
|
|
1979
|
+
vpxord ymm7, ymm7, ymm8
|
|
1980
|
+
vpxord ymm4, ymm4, ymm9
|
|
1981
|
+
vprord ymm5, ymm5, 7
|
|
1982
|
+
vprord ymm6, ymm6, 7
|
|
1983
|
+
vprord ymm7, ymm7, 7
|
|
1984
|
+
vprord ymm4, ymm4, 7
|
|
1985
|
+
vpxor ymm0, ymm0, ymm8
|
|
1986
|
+
vpxor ymm1, ymm1, ymm9
|
|
1987
|
+
vpxor ymm2, ymm2, ymm10
|
|
1988
|
+
vpxor ymm3, ymm3, ymm11
|
|
1989
|
+
vpxor ymm4, ymm4, ymm12
|
|
1990
|
+
vpxor ymm5, ymm5, ymm13
|
|
1991
|
+
vpxor ymm6, ymm6, ymm14
|
|
1992
|
+
vpxor ymm7, ymm7, ymm15
|
|
1993
|
+
movzx eax, byte ptr [rbp+0x38]
|
|
1994
|
+
jne 2b
|
|
1995
|
+
mov rbx, qword ptr [rbp+0x50]
|
|
1996
|
+
vunpcklps ymm8, ymm0, ymm1
|
|
1997
|
+
vunpcklps ymm9, ymm2, ymm3
|
|
1998
|
+
vunpckhps ymm10, ymm0, ymm1
|
|
1999
|
+
vunpcklps ymm11, ymm4, ymm5
|
|
2000
|
+
vunpcklps ymm0, ymm6, ymm7
|
|
2001
|
+
vshufps ymm12, ymm8, ymm9, 78
|
|
2002
|
+
vblendps ymm1, ymm8, ymm12, 0xCC
|
|
2003
|
+
vshufps ymm8, ymm11, ymm0, 78
|
|
2004
|
+
vunpckhps ymm13, ymm2, ymm3
|
|
2005
|
+
vblendps ymm2, ymm11, ymm8, 0xCC
|
|
2006
|
+
vblendps ymm3, ymm12, ymm9, 0xCC
|
|
2007
|
+
vperm2f128 ymm12, ymm1, ymm2, 0x20
|
|
2008
|
+
vmovups ymmword ptr [rbx], ymm12
|
|
2009
|
+
vunpckhps ymm14, ymm4, ymm5
|
|
2010
|
+
vblendps ymm4, ymm8, ymm0, 0xCC
|
|
2011
|
+
vunpckhps ymm15, ymm6, ymm7
|
|
2012
|
+
vperm2f128 ymm7, ymm3, ymm4, 0x20
|
|
2013
|
+
vmovups ymmword ptr [rbx+0x20], ymm7
|
|
2014
|
+
vshufps ymm5, ymm10, ymm13, 78
|
|
2015
|
+
vblendps ymm6, ymm5, ymm13, 0xCC
|
|
2016
|
+
vshufps ymm13, ymm14, ymm15, 78
|
|
2017
|
+
vblendps ymm10, ymm10, ymm5, 0xCC
|
|
2018
|
+
vblendps ymm14, ymm14, ymm13, 0xCC
|
|
2019
|
+
vperm2f128 ymm8, ymm10, ymm14, 0x20
|
|
2020
|
+
vmovups ymmword ptr [rbx+0x40], ymm8
|
|
2021
|
+
vblendps ymm15, ymm13, ymm15, 0xCC
|
|
2022
|
+
vperm2f128 ymm13, ymm6, ymm15, 0x20
|
|
2023
|
+
vmovups ymmword ptr [rbx+0x60], ymm13
|
|
2024
|
+
vperm2f128 ymm9, ymm1, ymm2, 0x31
|
|
2025
|
+
vperm2f128 ymm11, ymm3, ymm4, 0x31
|
|
2026
|
+
vmovups ymmword ptr [rbx+0x80], ymm9
|
|
2027
|
+
vperm2f128 ymm14, ymm10, ymm14, 0x31
|
|
2028
|
+
vperm2f128 ymm15, ymm6, ymm15, 0x31
|
|
2029
|
+
vmovups ymmword ptr [rbx+0xA0], ymm11
|
|
2030
|
+
vmovups ymmword ptr [rbx+0xC0], ymm14
|
|
2031
|
+
vmovups ymmword ptr [rbx+0xE0], ymm15
|
|
2032
|
+
vmovdqa ymm0, ymmword ptr [rsp]
|
|
2033
|
+
vmovdqa ymm2, ymmword ptr [rsp+0x2*0x20]
|
|
2034
|
+
vmovdqa32 ymm0 {k1}, ymmword ptr [rsp+0x1*0x20]
|
|
2035
|
+
vmovdqa32 ymm2 {k1}, ymmword ptr [rsp+0x3*0x20]
|
|
2036
|
+
vmovdqa ymmword ptr [rsp], ymm0
|
|
2037
|
+
vmovdqa ymmword ptr [rsp+0x2*0x20], ymm2
|
|
2038
|
+
add rbx, 256
|
|
2039
|
+
mov qword ptr [rbp+0x50], rbx
|
|
2040
|
+
add rdi, 64
|
|
2041
|
+
sub rsi, 8
|
|
2042
|
+
3:
|
|
2043
|
+
mov rbx, qword ptr [rbp+0x50]
|
|
2044
|
+
mov r15, qword ptr [rsp+0x80]
|
|
2045
|
+
movzx r13, byte ptr [rbp+0x38]
|
|
2046
|
+
movzx r12, byte ptr [rbp+0x48]
|
|
2047
|
+
test esi, 0x4
|
|
2048
|
+
je 3f
|
|
2049
|
+
vbroadcasti32x4 zmm0, xmmword ptr [rcx]
|
|
2050
|
+
vbroadcasti32x4 zmm1, xmmword ptr [rcx+0x1*0x10]
|
|
2051
|
+
vmovdqa xmm12, xmmword ptr [rsp]
|
|
2052
|
+
vmovdqa xmm13, xmmword ptr [rsp+0x4*0x10]
|
|
2053
|
+
vpunpckldq xmm14, xmm12, xmm13
|
|
2054
|
+
vpunpckhdq xmm15, xmm12, xmm13
|
|
2055
|
+
vpermq ymm14, ymm14, 0xDC
|
|
2056
|
+
vpermq ymm15, ymm15, 0xDC
|
|
2057
|
+
vpbroadcastd zmm12, dword ptr [BLAKE3_BLOCK_LEN+rip]
|
|
2058
|
+
vinserti64x4 zmm13, zmm14, ymm15, 0x01
|
|
2059
|
+
mov eax, 17476
|
|
2060
|
+
kmovw k2, eax
|
|
2061
|
+
vpblendmd zmm13 {k2}, zmm13, zmm12
|
|
2062
|
+
vbroadcasti32x4 zmm15, xmmword ptr [BLAKE3_IV+rip]
|
|
2063
|
+
mov r8, qword ptr [rdi]
|
|
2064
|
+
mov r9, qword ptr [rdi+0x8]
|
|
2065
|
+
mov r10, qword ptr [rdi+0x10]
|
|
2066
|
+
mov r11, qword ptr [rdi+0x18]
|
|
2067
|
+
mov eax, 43690
|
|
2068
|
+
kmovw k3, eax
|
|
2069
|
+
mov eax, 34952
|
|
2070
|
+
kmovw k4, eax
|
|
2071
|
+
movzx eax, byte ptr [rbp+0x40]
|
|
2072
|
+
or eax, r13d
|
|
2073
|
+
xor edx, edx
|
|
2074
|
+
.p2align 5
|
|
2075
|
+
2:
|
|
2076
|
+
mov r14d, eax
|
|
2077
|
+
or eax, r12d
|
|
2078
|
+
add rdx, 64
|
|
2079
|
+
cmp rdx, r15
|
|
2080
|
+
cmovne eax, r14d
|
|
2081
|
+
mov dword ptr [rsp+0x88], eax
|
|
2082
|
+
vmovdqa32 zmm2, zmm15
|
|
2083
|
+
vpbroadcastd zmm8, dword ptr [rsp+0x22*0x4]
|
|
2084
|
+
vpblendmd zmm3 {k4}, zmm13, zmm8
|
|
2085
|
+
vmovups zmm8, zmmword ptr [r8+rdx-0x1*0x40]
|
|
2086
|
+
vinserti32x4 zmm8, zmm8, xmmword ptr [r9+rdx-0x4*0x10], 0x01
|
|
2087
|
+
vinserti32x4 zmm8, zmm8, xmmword ptr [r10+rdx-0x4*0x10], 0x02
|
|
2088
|
+
vinserti32x4 zmm8, zmm8, xmmword ptr [r11+rdx-0x4*0x10], 0x03
|
|
2089
|
+
vmovups zmm9, zmmword ptr [r8+rdx-0x30]
|
|
2090
|
+
vinserti32x4 zmm9, zmm9, xmmword ptr [r9+rdx-0x3*0x10], 0x01
|
|
2091
|
+
vinserti32x4 zmm9, zmm9, xmmword ptr [r10+rdx-0x3*0x10], 0x02
|
|
2092
|
+
vinserti32x4 zmm9, zmm9, xmmword ptr [r11+rdx-0x3*0x10], 0x03
|
|
2093
|
+
vshufps zmm4, zmm8, zmm9, 136
|
|
2094
|
+
vshufps zmm5, zmm8, zmm9, 221
|
|
2095
|
+
vmovups zmm8, zmmword ptr [r8+rdx-0x20]
|
|
2096
|
+
vinserti32x4 zmm8, zmm8, xmmword ptr [r9+rdx-0x2*0x10], 0x01
|
|
2097
|
+
vinserti32x4 zmm8, zmm8, xmmword ptr [r10+rdx-0x2*0x10], 0x02
|
|
2098
|
+
vinserti32x4 zmm8, zmm8, xmmword ptr [r11+rdx-0x2*0x10], 0x03
|
|
2099
|
+
vmovups zmm9, zmmword ptr [r8+rdx-0x10]
|
|
2100
|
+
vinserti32x4 zmm9, zmm9, xmmword ptr [r9+rdx-0x1*0x10], 0x01
|
|
2101
|
+
vinserti32x4 zmm9, zmm9, xmmword ptr [r10+rdx-0x1*0x10], 0x02
|
|
2102
|
+
vinserti32x4 zmm9, zmm9, xmmword ptr [r11+rdx-0x1*0x10], 0x03
|
|
2103
|
+
vshufps zmm6, zmm8, zmm9, 136
|
|
2104
|
+
vshufps zmm7, zmm8, zmm9, 221
|
|
2105
|
+
vpshufd zmm6, zmm6, 0x93
|
|
2106
|
+
vpshufd zmm7, zmm7, 0x93
|
|
2107
|
+
mov al, 7
|
|
2108
|
+
9:
|
|
2109
|
+
vpaddd zmm0, zmm0, zmm4
|
|
2110
|
+
vpaddd zmm0, zmm0, zmm1
|
|
2111
|
+
vpxord zmm3, zmm3, zmm0
|
|
2112
|
+
vprord zmm3, zmm3, 16
|
|
2113
|
+
vpaddd zmm2, zmm2, zmm3
|
|
2114
|
+
vpxord zmm1, zmm1, zmm2
|
|
2115
|
+
vprord zmm1, zmm1, 12
|
|
2116
|
+
vpaddd zmm0, zmm0, zmm5
|
|
2117
|
+
vpaddd zmm0, zmm0, zmm1
|
|
2118
|
+
vpxord zmm3, zmm3, zmm0
|
|
2119
|
+
vprord zmm3, zmm3, 8
|
|
2120
|
+
vpaddd zmm2, zmm2, zmm3
|
|
2121
|
+
vpxord zmm1, zmm1, zmm2
|
|
2122
|
+
vprord zmm1, zmm1, 7
|
|
2123
|
+
vpshufd zmm0, zmm0, 0x93
|
|
2124
|
+
vpshufd zmm3, zmm3, 0x4E
|
|
2125
|
+
vpshufd zmm2, zmm2, 0x39
|
|
2126
|
+
vpaddd zmm0, zmm0, zmm6
|
|
2127
|
+
vpaddd zmm0, zmm0, zmm1
|
|
2128
|
+
vpxord zmm3, zmm3, zmm0
|
|
2129
|
+
vprord zmm3, zmm3, 16
|
|
2130
|
+
vpaddd zmm2, zmm2, zmm3
|
|
2131
|
+
vpxord zmm1, zmm1, zmm2
|
|
2132
|
+
vprord zmm1, zmm1, 12
|
|
2133
|
+
vpaddd zmm0, zmm0, zmm7
|
|
2134
|
+
vpaddd zmm0, zmm0, zmm1
|
|
2135
|
+
vpxord zmm3, zmm3, zmm0
|
|
2136
|
+
vprord zmm3, zmm3, 8
|
|
2137
|
+
vpaddd zmm2, zmm2, zmm3
|
|
2138
|
+
vpxord zmm1, zmm1, zmm2
|
|
2139
|
+
vprord zmm1, zmm1, 7
|
|
2140
|
+
vpshufd zmm0, zmm0, 0x39
|
|
2141
|
+
vpshufd zmm3, zmm3, 0x4E
|
|
2142
|
+
vpshufd zmm2, zmm2, 0x93
|
|
2143
|
+
dec al
|
|
2144
|
+
jz 9f
|
|
2145
|
+
vshufps zmm8, zmm4, zmm5, 214
|
|
2146
|
+
vpshufd zmm9, zmm4, 0x0F
|
|
2147
|
+
vpshufd zmm4, zmm8, 0x39
|
|
2148
|
+
vshufps zmm8, zmm6, zmm7, 250
|
|
2149
|
+
vpblendmd zmm9 {k3}, zmm9, zmm8
|
|
2150
|
+
vpunpcklqdq zmm8, zmm7, zmm5
|
|
2151
|
+
vpblendmd zmm8 {k4}, zmm8, zmm6
|
|
2152
|
+
vpshufd zmm8, zmm8, 0x78
|
|
2153
|
+
vpunpckhdq zmm5, zmm5, zmm7
|
|
2154
|
+
vpunpckldq zmm6, zmm6, zmm5
|
|
2155
|
+
vpshufd zmm7, zmm6, 0x1E
|
|
2156
|
+
vmovdqa32 zmm5, zmm9
|
|
2157
|
+
vmovdqa32 zmm6, zmm8
|
|
2158
|
+
jmp 9b
|
|
2159
|
+
9:
|
|
2160
|
+
vpxord zmm0, zmm0, zmm2
|
|
2161
|
+
vpxord zmm1, zmm1, zmm3
|
|
2162
|
+
mov eax, r13d
|
|
2163
|
+
cmp rdx, r15
|
|
2164
|
+
jne 2b
|
|
2165
|
+
vmovdqu xmmword ptr [rbx], xmm0
|
|
2166
|
+
vmovdqu xmmword ptr [rbx+0x10], xmm1
|
|
2167
|
+
vextracti128 xmmword ptr [rbx+0x20], ymm0, 0x01
|
|
2168
|
+
vextracti128 xmmword ptr [rbx+0x30], ymm1, 0x01
|
|
2169
|
+
vextracti32x4 xmmword ptr [rbx+0x4*0x10], zmm0, 0x02
|
|
2170
|
+
vextracti32x4 xmmword ptr [rbx+0x5*0x10], zmm1, 0x02
|
|
2171
|
+
vextracti32x4 xmmword ptr [rbx+0x6*0x10], zmm0, 0x03
|
|
2172
|
+
vextracti32x4 xmmword ptr [rbx+0x7*0x10], zmm1, 0x03
|
|
2173
|
+
vmovdqa xmm0, xmmword ptr [rsp]
|
|
2174
|
+
vmovdqa xmm2, xmmword ptr [rsp+0x40]
|
|
2175
|
+
vmovdqa32 xmm0 {k1}, xmmword ptr [rsp+0x1*0x10]
|
|
2176
|
+
vmovdqa32 xmm2 {k1}, xmmword ptr [rsp+0x5*0x10]
|
|
2177
|
+
vmovdqa xmmword ptr [rsp], xmm0
|
|
2178
|
+
vmovdqa xmmword ptr [rsp+0x40], xmm2
|
|
2179
|
+
add rbx, 128
|
|
2180
|
+
add rdi, 32
|
|
2181
|
+
sub rsi, 4
|
|
2182
|
+
3:
|
|
2183
|
+
test esi, 0x2
|
|
2184
|
+
je 3f
|
|
2185
|
+
vbroadcasti128 ymm0, xmmword ptr [rcx]
|
|
2186
|
+
vbroadcasti128 ymm1, xmmword ptr [rcx+0x10]
|
|
2187
|
+
vmovd xmm13, dword ptr [rsp]
|
|
2188
|
+
vpinsrd xmm13, xmm13, dword ptr [rsp+0x40], 1
|
|
2189
|
+
vpinsrd xmm13, xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
|
|
2190
|
+
vmovd xmm14, dword ptr [rsp+0x4]
|
|
2191
|
+
vpinsrd xmm14, xmm14, dword ptr [rsp+0x44], 1
|
|
2192
|
+
vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
|
|
2193
|
+
vinserti128 ymm13, ymm13, xmm14, 0x01
|
|
2194
|
+
mov r8, qword ptr [rdi]
|
|
2195
|
+
mov r9, qword ptr [rdi+0x8]
|
|
2196
|
+
movzx eax, byte ptr [rbp+0x40]
|
|
2197
|
+
or eax, r13d
|
|
2198
|
+
xor edx, edx
|
|
2199
|
+
.p2align 5
|
|
2200
|
+
2:
|
|
2201
|
+
mov r14d, eax
|
|
2202
|
+
or eax, r12d
|
|
2203
|
+
add rdx, 64
|
|
2204
|
+
cmp rdx, r15
|
|
2205
|
+
cmovne eax, r14d
|
|
2206
|
+
mov dword ptr [rsp+0x88], eax
|
|
2207
|
+
vbroadcasti128 ymm2, xmmword ptr [BLAKE3_IV+rip]
|
|
2208
|
+
vpbroadcastd ymm8, dword ptr [rsp+0x88]
|
|
2209
|
+
vpblendd ymm3, ymm13, ymm8, 0x88
|
|
2210
|
+
vmovups ymm8, ymmword ptr [r8+rdx-0x40]
|
|
2211
|
+
vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-0x40], 0x01
|
|
2212
|
+
vmovups ymm9, ymmword ptr [r8+rdx-0x30]
|
|
2213
|
+
vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-0x30], 0x01
|
|
2214
|
+
vshufps ymm4, ymm8, ymm9, 136
|
|
2215
|
+
vshufps ymm5, ymm8, ymm9, 221
|
|
2216
|
+
vmovups ymm8, ymmword ptr [r8+rdx-0x20]
|
|
2217
|
+
vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-0x20], 0x01
|
|
2218
|
+
vmovups ymm9, ymmword ptr [r8+rdx-0x10]
|
|
2219
|
+
vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-0x10], 0x01
|
|
2220
|
+
vshufps ymm6, ymm8, ymm9, 136
|
|
2221
|
+
vshufps ymm7, ymm8, ymm9, 221
|
|
2222
|
+
vpshufd ymm6, ymm6, 0x93
|
|
2223
|
+
vpshufd ymm7, ymm7, 0x93
|
|
2224
|
+
mov al, 7
|
|
2225
|
+
9:
|
|
2226
|
+
vpaddd ymm0, ymm0, ymm4
|
|
2227
|
+
vpaddd ymm0, ymm0, ymm1
|
|
2228
|
+
vpxord ymm3, ymm3, ymm0
|
|
2229
|
+
vprord ymm3, ymm3, 16
|
|
2230
|
+
vpaddd ymm2, ymm2, ymm3
|
|
2231
|
+
vpxord ymm1, ymm1, ymm2
|
|
2232
|
+
vprord ymm1, ymm1, 12
|
|
2233
|
+
vpaddd ymm0, ymm0, ymm5
|
|
2234
|
+
vpaddd ymm0, ymm0, ymm1
|
|
2235
|
+
vpxord ymm3, ymm3, ymm0
|
|
2236
|
+
vprord ymm3, ymm3, 8
|
|
2237
|
+
vpaddd ymm2, ymm2, ymm3
|
|
2238
|
+
vpxord ymm1, ymm1, ymm2
|
|
2239
|
+
vprord ymm1, ymm1, 7
|
|
2240
|
+
vpshufd ymm0, ymm0, 0x93
|
|
2241
|
+
vpshufd ymm3, ymm3, 0x4E
|
|
2242
|
+
vpshufd ymm2, ymm2, 0x39
|
|
2243
|
+
vpaddd ymm0, ymm0, ymm6
|
|
2244
|
+
vpaddd ymm0, ymm0, ymm1
|
|
2245
|
+
vpxord ymm3, ymm3, ymm0
|
|
2246
|
+
vprord ymm3, ymm3, 16
|
|
2247
|
+
vpaddd ymm2, ymm2, ymm3
|
|
2248
|
+
vpxord ymm1, ymm1, ymm2
|
|
2249
|
+
vprord ymm1, ymm1, 12
|
|
2250
|
+
vpaddd ymm0, ymm0, ymm7
|
|
2251
|
+
vpaddd ymm0, ymm0, ymm1
|
|
2252
|
+
vpxord ymm3, ymm3, ymm0
|
|
2253
|
+
vprord ymm3, ymm3, 8
|
|
2254
|
+
vpaddd ymm2, ymm2, ymm3
|
|
2255
|
+
vpxord ymm1, ymm1, ymm2
|
|
2256
|
+
vprord ymm1, ymm1, 7
|
|
2257
|
+
vpshufd ymm0, ymm0, 0x39
|
|
2258
|
+
vpshufd ymm3, ymm3, 0x4E
|
|
2259
|
+
vpshufd ymm2, ymm2, 0x93
|
|
2260
|
+
dec al
|
|
2261
|
+
jz 9f
|
|
2262
|
+
vshufps ymm8, ymm4, ymm5, 214
|
|
2263
|
+
vpshufd ymm9, ymm4, 0x0F
|
|
2264
|
+
vpshufd ymm4, ymm8, 0x39
|
|
2265
|
+
vshufps ymm8, ymm6, ymm7, 250
|
|
2266
|
+
vpblendd ymm9, ymm9, ymm8, 0xAA
|
|
2267
|
+
vpunpcklqdq ymm8, ymm7, ymm5
|
|
2268
|
+
vpblendd ymm8, ymm8, ymm6, 0x88
|
|
2269
|
+
vpshufd ymm8, ymm8, 0x78
|
|
2270
|
+
vpunpckhdq ymm5, ymm5, ymm7
|
|
2271
|
+
vpunpckldq ymm6, ymm6, ymm5
|
|
2272
|
+
vpshufd ymm7, ymm6, 0x1E
|
|
2273
|
+
vmovdqa ymm5, ymm9
|
|
2274
|
+
vmovdqa ymm6, ymm8
|
|
2275
|
+
jmp 9b
|
|
2276
|
+
9:
|
|
2277
|
+
vpxor ymm0, ymm0, ymm2
|
|
2278
|
+
vpxor ymm1, ymm1, ymm3
|
|
2279
|
+
mov eax, r13d
|
|
2280
|
+
cmp rdx, r15
|
|
2281
|
+
jne 2b
|
|
2282
|
+
vmovdqu xmmword ptr [rbx], xmm0
|
|
2283
|
+
vmovdqu xmmword ptr [rbx+0x10], xmm1
|
|
2284
|
+
vextracti128 xmmword ptr [rbx+0x20], ymm0, 0x01
|
|
2285
|
+
vextracti128 xmmword ptr [rbx+0x30], ymm1, 0x01
|
|
2286
|
+
vmovdqa xmm0, xmmword ptr [rsp]
|
|
2287
|
+
vmovdqa xmm2, xmmword ptr [rsp+0x4*0x10]
|
|
2288
|
+
vmovdqu32 xmm0 {k1}, xmmword ptr [rsp+0x8]
|
|
2289
|
+
vmovdqu32 xmm2 {k1}, xmmword ptr [rsp+0x48]
|
|
2290
|
+
vmovdqa xmmword ptr [rsp], xmm0
|
|
2291
|
+
vmovdqa xmmword ptr [rsp+0x4*0x10], xmm2
|
|
2292
|
+
add rbx, 64
|
|
2293
|
+
add rdi, 16
|
|
2294
|
+
sub rsi, 2
|
|
2295
|
+
3:
|
|
2296
|
+
test esi, 0x1
|
|
2297
|
+
je 4b
|
|
2298
|
+
vmovdqu xmm0, xmmword ptr [rcx]
|
|
2299
|
+
vmovdqu xmm1, xmmword ptr [rcx+0x10]
|
|
2300
|
+
vmovd xmm14, dword ptr [rsp]
|
|
2301
|
+
vpinsrd xmm14, xmm14, dword ptr [rsp+0x40], 1
|
|
2302
|
+
vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
|
|
2303
|
+
vmovdqa xmm15, xmmword ptr [BLAKE3_IV+rip]
|
|
2304
|
+
mov r8, qword ptr [rdi]
|
|
2305
|
+
movzx eax, byte ptr [rbp+0x40]
|
|
2306
|
+
or eax, r13d
|
|
2307
|
+
xor edx, edx
|
|
2308
|
+
.p2align 5
|
|
2309
|
+
2:
|
|
2310
|
+
mov r14d, eax
|
|
2311
|
+
or eax, r12d
|
|
2312
|
+
add rdx, 64
|
|
2313
|
+
cmp rdx, r15
|
|
2314
|
+
cmovne eax, r14d
|
|
2315
|
+
vpinsrd xmm3, xmm14, eax, 3
|
|
2316
|
+
vmovdqa xmm2, xmm15
|
|
2317
|
+
vmovups xmm8, xmmword ptr [r8+rdx-0x40]
|
|
2318
|
+
vmovups xmm9, xmmword ptr [r8+rdx-0x30]
|
|
2319
|
+
vshufps xmm4, xmm8, xmm9, 136
|
|
2320
|
+
vshufps xmm5, xmm8, xmm9, 221
|
|
2321
|
+
vmovups xmm8, xmmword ptr [r8+rdx-0x20]
|
|
2322
|
+
vmovups xmm9, xmmword ptr [r8+rdx-0x10]
|
|
2323
|
+
vshufps xmm6, xmm8, xmm9, 136
|
|
2324
|
+
vshufps xmm7, xmm8, xmm9, 221
|
|
2325
|
+
vpshufd xmm6, xmm6, 0x93
|
|
2326
|
+
vpshufd xmm7, xmm7, 0x93
|
|
2327
|
+
mov al, 7
|
|
2328
|
+
9:
|
|
2329
|
+
vpaddd xmm0, xmm0, xmm4
|
|
2330
|
+
vpaddd xmm0, xmm0, xmm1
|
|
2331
|
+
vpxord xmm3, xmm3, xmm0
|
|
2332
|
+
vprord xmm3, xmm3, 16
|
|
2333
|
+
vpaddd xmm2, xmm2, xmm3
|
|
2334
|
+
vpxord xmm1, xmm1, xmm2
|
|
2335
|
+
vprord xmm1, xmm1, 12
|
|
2336
|
+
vpaddd xmm0, xmm0, xmm5
|
|
2337
|
+
vpaddd xmm0, xmm0, xmm1
|
|
2338
|
+
vpxord xmm3, xmm3, xmm0
|
|
2339
|
+
vprord xmm3, xmm3, 8
|
|
2340
|
+
vpaddd xmm2, xmm2, xmm3
|
|
2341
|
+
vpxord xmm1, xmm1, xmm2
|
|
2342
|
+
vprord xmm1, xmm1, 7
|
|
2343
|
+
vpshufd xmm0, xmm0, 0x93
|
|
2344
|
+
vpshufd xmm3, xmm3, 0x4E
|
|
2345
|
+
vpshufd xmm2, xmm2, 0x39
|
|
2346
|
+
vpaddd xmm0, xmm0, xmm6
|
|
2347
|
+
vpaddd xmm0, xmm0, xmm1
|
|
2348
|
+
vpxord xmm3, xmm3, xmm0
|
|
2349
|
+
vprord xmm3, xmm3, 16
|
|
2350
|
+
vpaddd xmm2, xmm2, xmm3
|
|
2351
|
+
vpxord xmm1, xmm1, xmm2
|
|
2352
|
+
vprord xmm1, xmm1, 12
|
|
2353
|
+
vpaddd xmm0, xmm0, xmm7
|
|
2354
|
+
vpaddd xmm0, xmm0, xmm1
|
|
2355
|
+
vpxord xmm3, xmm3, xmm0
|
|
2356
|
+
vprord xmm3, xmm3, 8
|
|
2357
|
+
vpaddd xmm2, xmm2, xmm3
|
|
2358
|
+
vpxord xmm1, xmm1, xmm2
|
|
2359
|
+
vprord xmm1, xmm1, 7
|
|
2360
|
+
vpshufd xmm0, xmm0, 0x39
|
|
2361
|
+
vpshufd xmm3, xmm3, 0x4E
|
|
2362
|
+
vpshufd xmm2, xmm2, 0x93
|
|
2363
|
+
dec al
|
|
2364
|
+
jz 9f
|
|
2365
|
+
vshufps xmm8, xmm4, xmm5, 214
|
|
2366
|
+
vpshufd xmm9, xmm4, 0x0F
|
|
2367
|
+
vpshufd xmm4, xmm8, 0x39
|
|
2368
|
+
vshufps xmm8, xmm6, xmm7, 250
|
|
2369
|
+
vpblendd xmm9, xmm9, xmm8, 0xAA
|
|
2370
|
+
vpunpcklqdq xmm8, xmm7, xmm5
|
|
2371
|
+
vpblendd xmm8, xmm8, xmm6, 0x88
|
|
2372
|
+
vpshufd xmm8, xmm8, 0x78
|
|
2373
|
+
vpunpckhdq xmm5, xmm5, xmm7
|
|
2374
|
+
vpunpckldq xmm6, xmm6, xmm5
|
|
2375
|
+
vpshufd xmm7, xmm6, 0x1E
|
|
2376
|
+
vmovdqa xmm5, xmm9
|
|
2377
|
+
vmovdqa xmm6, xmm8
|
|
2378
|
+
jmp 9b
|
|
2379
|
+
9:
|
|
2380
|
+
vpxor xmm0, xmm0, xmm2
|
|
2381
|
+
vpxor xmm1, xmm1, xmm3
|
|
2382
|
+
mov eax, r13d
|
|
2383
|
+
cmp rdx, r15
|
|
2384
|
+
jne 2b
|
|
2385
|
+
vmovdqu xmmword ptr [rbx], xmm0
|
|
2386
|
+
vmovdqu xmmword ptr [rbx+0x10], xmm1
|
|
2387
|
+
jmp 4b
|
|
2388
|
+
.p2align 6
|
|
2389
|
+
_blake3_compress_in_place_avx512:
|
|
2390
|
+
blake3_compress_in_place_avx512:
|
|
2391
|
+
_CET_ENDBR
|
|
2392
|
+
vmovdqu xmm0, xmmword ptr [rdi]
|
|
2393
|
+
vmovdqu xmm1, xmmword ptr [rdi+0x10]
|
|
2394
|
+
movzx eax, r8b
|
|
2395
|
+
movzx edx, dl
|
|
2396
|
+
shl rax, 32
|
|
2397
|
+
add rdx, rax
|
|
2398
|
+
vmovq xmm3, rcx
|
|
2399
|
+
vmovq xmm4, rdx
|
|
2400
|
+
vpunpcklqdq xmm3, xmm3, xmm4
|
|
2401
|
+
vmovaps xmm2, xmmword ptr [BLAKE3_IV+rip]
|
|
2402
|
+
vmovups xmm8, xmmword ptr [rsi]
|
|
2403
|
+
vmovups xmm9, xmmword ptr [rsi+0x10]
|
|
2404
|
+
vshufps xmm4, xmm8, xmm9, 136
|
|
2405
|
+
vshufps xmm5, xmm8, xmm9, 221
|
|
2406
|
+
vmovups xmm8, xmmword ptr [rsi+0x20]
|
|
2407
|
+
vmovups xmm9, xmmword ptr [rsi+0x30]
|
|
2408
|
+
vshufps xmm6, xmm8, xmm9, 136
|
|
2409
|
+
vshufps xmm7, xmm8, xmm9, 221
|
|
2410
|
+
vpshufd xmm6, xmm6, 0x93
|
|
2411
|
+
vpshufd xmm7, xmm7, 0x93
|
|
2412
|
+
mov al, 7
|
|
2413
|
+
9:
|
|
2414
|
+
vpaddd xmm0, xmm0, xmm4
|
|
2415
|
+
vpaddd xmm0, xmm0, xmm1
|
|
2416
|
+
vpxord xmm3, xmm3, xmm0
|
|
2417
|
+
vprord xmm3, xmm3, 16
|
|
2418
|
+
vpaddd xmm2, xmm2, xmm3
|
|
2419
|
+
vpxord xmm1, xmm1, xmm2
|
|
2420
|
+
vprord xmm1, xmm1, 12
|
|
2421
|
+
vpaddd xmm0, xmm0, xmm5
|
|
2422
|
+
vpaddd xmm0, xmm0, xmm1
|
|
2423
|
+
vpxord xmm3, xmm3, xmm0
|
|
2424
|
+
vprord xmm3, xmm3, 8
|
|
2425
|
+
vpaddd xmm2, xmm2, xmm3
|
|
2426
|
+
vpxord xmm1, xmm1, xmm2
|
|
2427
|
+
vprord xmm1, xmm1, 7
|
|
2428
|
+
vpshufd xmm0, xmm0, 0x93
|
|
2429
|
+
vpshufd xmm3, xmm3, 0x4E
|
|
2430
|
+
vpshufd xmm2, xmm2, 0x39
|
|
2431
|
+
vpaddd xmm0, xmm0, xmm6
|
|
2432
|
+
vpaddd xmm0, xmm0, xmm1
|
|
2433
|
+
vpxord xmm3, xmm3, xmm0
|
|
2434
|
+
vprord xmm3, xmm3, 16
|
|
2435
|
+
vpaddd xmm2, xmm2, xmm3
|
|
2436
|
+
vpxord xmm1, xmm1, xmm2
|
|
2437
|
+
vprord xmm1, xmm1, 12
|
|
2438
|
+
vpaddd xmm0, xmm0, xmm7
|
|
2439
|
+
vpaddd xmm0, xmm0, xmm1
|
|
2440
|
+
vpxord xmm3, xmm3, xmm0
|
|
2441
|
+
vprord xmm3, xmm3, 8
|
|
2442
|
+
vpaddd xmm2, xmm2, xmm3
|
|
2443
|
+
vpxord xmm1, xmm1, xmm2
|
|
2444
|
+
vprord xmm1, xmm1, 7
|
|
2445
|
+
vpshufd xmm0, xmm0, 0x39
|
|
2446
|
+
vpshufd xmm3, xmm3, 0x4E
|
|
2447
|
+
vpshufd xmm2, xmm2, 0x93
|
|
2448
|
+
dec al
|
|
2449
|
+
jz 9f
|
|
2450
|
+
vshufps xmm8, xmm4, xmm5, 214
|
|
2451
|
+
vpshufd xmm9, xmm4, 0x0F
|
|
2452
|
+
vpshufd xmm4, xmm8, 0x39
|
|
2453
|
+
vshufps xmm8, xmm6, xmm7, 250
|
|
2454
|
+
vpblendd xmm9, xmm9, xmm8, 0xAA
|
|
2455
|
+
vpunpcklqdq xmm8, xmm7, xmm5
|
|
2456
|
+
vpblendd xmm8, xmm8, xmm6, 0x88
|
|
2457
|
+
vpshufd xmm8, xmm8, 0x78
|
|
2458
|
+
vpunpckhdq xmm5, xmm5, xmm7
|
|
2459
|
+
vpunpckldq xmm6, xmm6, xmm5
|
|
2460
|
+
vpshufd xmm7, xmm6, 0x1E
|
|
2461
|
+
vmovdqa xmm5, xmm9
|
|
2462
|
+
vmovdqa xmm6, xmm8
|
|
2463
|
+
jmp 9b
|
|
2464
|
+
9:
|
|
2465
|
+
vpxor xmm0, xmm0, xmm2
|
|
2466
|
+
vpxor xmm1, xmm1, xmm3
|
|
2467
|
+
vmovdqu xmmword ptr [rdi], xmm0
|
|
2468
|
+
vmovdqu xmmword ptr [rdi+0x10], xmm1
|
|
2469
|
+
ret
|
|
2470
|
+
|
|
2471
|
+
.p2align 6
|
|
2472
|
+
_blake3_compress_xof_avx512:
|
|
2473
|
+
blake3_compress_xof_avx512:
|
|
2474
|
+
_CET_ENDBR
|
|
2475
|
+
vmovdqu xmm0, xmmword ptr [rdi]
|
|
2476
|
+
vmovdqu xmm1, xmmword ptr [rdi+0x10]
|
|
2477
|
+
movzx eax, r8b
|
|
2478
|
+
movzx edx, dl
|
|
2479
|
+
shl rax, 32
|
|
2480
|
+
add rdx, rax
|
|
2481
|
+
vmovq xmm3, rcx
|
|
2482
|
+
vmovq xmm4, rdx
|
|
2483
|
+
vpunpcklqdq xmm3, xmm3, xmm4
|
|
2484
|
+
vmovaps xmm2, xmmword ptr [BLAKE3_IV+rip]
|
|
2485
|
+
vmovups xmm8, xmmword ptr [rsi]
|
|
2486
|
+
vmovups xmm9, xmmword ptr [rsi+0x10]
|
|
2487
|
+
vshufps xmm4, xmm8, xmm9, 136
|
|
2488
|
+
vshufps xmm5, xmm8, xmm9, 221
|
|
2489
|
+
vmovups xmm8, xmmword ptr [rsi+0x20]
|
|
2490
|
+
vmovups xmm9, xmmword ptr [rsi+0x30]
|
|
2491
|
+
vshufps xmm6, xmm8, xmm9, 136
|
|
2492
|
+
vshufps xmm7, xmm8, xmm9, 221
|
|
2493
|
+
vpshufd xmm6, xmm6, 0x93
|
|
2494
|
+
vpshufd xmm7, xmm7, 0x93
|
|
2495
|
+
mov al, 7
|
|
2496
|
+
9:
|
|
2497
|
+
vpaddd xmm0, xmm0, xmm4
|
|
2498
|
+
vpaddd xmm0, xmm0, xmm1
|
|
2499
|
+
vpxord xmm3, xmm3, xmm0
|
|
2500
|
+
vprord xmm3, xmm3, 16
|
|
2501
|
+
vpaddd xmm2, xmm2, xmm3
|
|
2502
|
+
vpxord xmm1, xmm1, xmm2
|
|
2503
|
+
vprord xmm1, xmm1, 12
|
|
2504
|
+
vpaddd xmm0, xmm0, xmm5
|
|
2505
|
+
vpaddd xmm0, xmm0, xmm1
|
|
2506
|
+
vpxord xmm3, xmm3, xmm0
|
|
2507
|
+
vprord xmm3, xmm3, 8
|
|
2508
|
+
vpaddd xmm2, xmm2, xmm3
|
|
2509
|
+
vpxord xmm1, xmm1, xmm2
|
|
2510
|
+
vprord xmm1, xmm1, 7
|
|
2511
|
+
vpshufd xmm0, xmm0, 0x93
|
|
2512
|
+
vpshufd xmm3, xmm3, 0x4E
|
|
2513
|
+
vpshufd xmm2, xmm2, 0x39
|
|
2514
|
+
vpaddd xmm0, xmm0, xmm6
|
|
2515
|
+
vpaddd xmm0, xmm0, xmm1
|
|
2516
|
+
vpxord xmm3, xmm3, xmm0
|
|
2517
|
+
vprord xmm3, xmm3, 16
|
|
2518
|
+
vpaddd xmm2, xmm2, xmm3
|
|
2519
|
+
vpxord xmm1, xmm1, xmm2
|
|
2520
|
+
vprord xmm1, xmm1, 12
|
|
2521
|
+
vpaddd xmm0, xmm0, xmm7
|
|
2522
|
+
vpaddd xmm0, xmm0, xmm1
|
|
2523
|
+
vpxord xmm3, xmm3, xmm0
|
|
2524
|
+
vprord xmm3, xmm3, 8
|
|
2525
|
+
vpaddd xmm2, xmm2, xmm3
|
|
2526
|
+
vpxord xmm1, xmm1, xmm2
|
|
2527
|
+
vprord xmm1, xmm1, 7
|
|
2528
|
+
vpshufd xmm0, xmm0, 0x39
|
|
2529
|
+
vpshufd xmm3, xmm3, 0x4E
|
|
2530
|
+
vpshufd xmm2, xmm2, 0x93
|
|
2531
|
+
dec al
|
|
2532
|
+
jz 9f
|
|
2533
|
+
vshufps xmm8, xmm4, xmm5, 214
|
|
2534
|
+
vpshufd xmm9, xmm4, 0x0F
|
|
2535
|
+
vpshufd xmm4, xmm8, 0x39
|
|
2536
|
+
vshufps xmm8, xmm6, xmm7, 250
|
|
2537
|
+
vpblendd xmm9, xmm9, xmm8, 0xAA
|
|
2538
|
+
vpunpcklqdq xmm8, xmm7, xmm5
|
|
2539
|
+
vpblendd xmm8, xmm8, xmm6, 0x88
|
|
2540
|
+
vpshufd xmm8, xmm8, 0x78
|
|
2541
|
+
vpunpckhdq xmm5, xmm5, xmm7
|
|
2542
|
+
vpunpckldq xmm6, xmm6, xmm5
|
|
2543
|
+
vpshufd xmm7, xmm6, 0x1E
|
|
2544
|
+
vmovdqa xmm5, xmm9
|
|
2545
|
+
vmovdqa xmm6, xmm8
|
|
2546
|
+
jmp 9b
|
|
2547
|
+
9:
|
|
2548
|
+
vpxor xmm0, xmm0, xmm2
|
|
2549
|
+
vpxor xmm1, xmm1, xmm3
|
|
2550
|
+
vpxor xmm2, xmm2, [rdi]
|
|
2551
|
+
vpxor xmm3, xmm3, [rdi+0x10]
|
|
2552
|
+
vmovdqu xmmword ptr [r9], xmm0
|
|
2553
|
+
vmovdqu xmmword ptr [r9+0x10], xmm1
|
|
2554
|
+
vmovdqu xmmword ptr [r9+0x20], xmm2
|
|
2555
|
+
vmovdqu xmmword ptr [r9+0x30], xmm3
|
|
2556
|
+
ret
|
|
2557
|
+
|
|
2558
|
+
.p2align 6
|
|
2559
|
+
blake3_xof_many_avx512:
|
|
2560
|
+
_blake3_xof_many_avx512:
|
|
2561
|
+
_CET_ENDBR
|
|
2562
|
+
mov r10,QWORD PTR [rsp+0x8]
|
|
2563
|
+
cmp r10,0x1
|
|
2564
|
+
ja 2f
|
|
2565
|
+
vmovdqu xmm0,XMMWORD PTR [rdi]
|
|
2566
|
+
vmovdqu xmm1,XMMWORD PTR [rdi+0x10]
|
|
2567
|
+
movzx eax,r8b
|
|
2568
|
+
movzx edx,dl
|
|
2569
|
+
shl rax,0x20
|
|
2570
|
+
add rdx,rax
|
|
2571
|
+
vmovq xmm3,rcx
|
|
2572
|
+
vmovq xmm4,rdx
|
|
2573
|
+
vpunpcklqdq xmm3,xmm3,xmm4
|
|
2574
|
+
vmovaps xmm2,XMMWORD PTR [BLAKE3_IV+rip]
|
|
2575
|
+
vmovups xmm8,XMMWORD PTR [rsi]
|
|
2576
|
+
vmovups xmm9,XMMWORD PTR [rsi+0x10]
|
|
2577
|
+
vshufps xmm4,xmm8,xmm9,0x88
|
|
2578
|
+
vshufps xmm5,xmm8,xmm9,0xdd
|
|
2579
|
+
vmovups xmm8,XMMWORD PTR [rsi+0x20]
|
|
2580
|
+
vmovups xmm9,XMMWORD PTR [rsi+0x30]
|
|
2581
|
+
vshufps xmm6,xmm8,xmm9,0x88
|
|
2582
|
+
vshufps xmm7,xmm8,xmm9,0xdd
|
|
2583
|
+
vpshufd xmm6,xmm6,0x93
|
|
2584
|
+
vpshufd xmm7,xmm7,0x93
|
|
2585
|
+
mov al,0x7
|
|
2586
|
+
3:
|
|
2587
|
+
vpaddd xmm0,xmm0,xmm4
|
|
2588
|
+
vpaddd xmm0,xmm0,xmm1
|
|
2589
|
+
vpxord xmm3,xmm3,xmm0
|
|
2590
|
+
vprord xmm3,xmm3,0x10
|
|
2591
|
+
vpaddd xmm2,xmm2,xmm3
|
|
2592
|
+
vpxord xmm1,xmm1,xmm2
|
|
2593
|
+
vprord xmm1,xmm1,0xc
|
|
2594
|
+
vpaddd xmm0,xmm0,xmm5
|
|
2595
|
+
vpaddd xmm0,xmm0,xmm1
|
|
2596
|
+
vpxord xmm3,xmm3,xmm0
|
|
2597
|
+
vprord xmm3,xmm3,0x8
|
|
2598
|
+
vpaddd xmm2,xmm2,xmm3
|
|
2599
|
+
vpxord xmm1,xmm1,xmm2
|
|
2600
|
+
vprord xmm1,xmm1,0x7
|
|
2601
|
+
vpshufd xmm0,xmm0,0x93
|
|
2602
|
+
vpshufd xmm3,xmm3,0x4e
|
|
2603
|
+
vpshufd xmm2,xmm2,0x39
|
|
2604
|
+
vpaddd xmm0,xmm0,xmm6
|
|
2605
|
+
vpaddd xmm0,xmm0,xmm1
|
|
2606
|
+
vpxord xmm3,xmm3,xmm0
|
|
2607
|
+
vprord xmm3,xmm3,0x10
|
|
2608
|
+
vpaddd xmm2,xmm2,xmm3
|
|
2609
|
+
vpxord xmm1,xmm1,xmm2
|
|
2610
|
+
vprord xmm1,xmm1,0xc
|
|
2611
|
+
vpaddd xmm0,xmm0,xmm7
|
|
2612
|
+
vpaddd xmm0,xmm0,xmm1
|
|
2613
|
+
vpxord xmm3,xmm3,xmm0
|
|
2614
|
+
vprord xmm3,xmm3,0x8
|
|
2615
|
+
vpaddd xmm2,xmm2,xmm3
|
|
2616
|
+
vpxord xmm1,xmm1,xmm2
|
|
2617
|
+
vprord xmm1,xmm1,0x7
|
|
2618
|
+
vpshufd xmm0,xmm0,0x39
|
|
2619
|
+
vpshufd xmm3,xmm3,0x4e
|
|
2620
|
+
vpshufd xmm2,xmm2,0x93
|
|
2621
|
+
dec al
|
|
2622
|
+
je 3f
|
|
2623
|
+
vshufps xmm8,xmm4,xmm5,0xd6
|
|
2624
|
+
vpshufd xmm9,xmm4,0xf
|
|
2625
|
+
vpshufd xmm4,xmm8,0x39
|
|
2626
|
+
vshufps xmm8,xmm6,xmm7,0xfa
|
|
2627
|
+
vpblendd xmm9,xmm9,xmm8,0xaa
|
|
2628
|
+
vpunpcklqdq xmm8,xmm7,xmm5
|
|
2629
|
+
vpblendd xmm8,xmm8,xmm6,0x88
|
|
2630
|
+
vpshufd xmm8,xmm8,0x78
|
|
2631
|
+
vpunpckhdq xmm5,xmm5,xmm7
|
|
2632
|
+
vpunpckldq xmm6,xmm6,xmm5
|
|
2633
|
+
vpshufd xmm7,xmm6,0x1e
|
|
2634
|
+
vmovdqa xmm5,xmm9
|
|
2635
|
+
vmovdqa xmm6,xmm8
|
|
2636
|
+
jmp 3b
|
|
2637
|
+
3:
|
|
2638
|
+
vpxor xmm0,xmm0,xmm2
|
|
2639
|
+
vpxor xmm1,xmm1,xmm3
|
|
2640
|
+
vpxor xmm2,xmm2,XMMWORD PTR [rdi]
|
|
2641
|
+
vpxor xmm3,xmm3,XMMWORD PTR [rdi+0x10]
|
|
2642
|
+
vmovdqu XMMWORD PTR [r9],xmm0
|
|
2643
|
+
vmovdqu XMMWORD PTR [r9+0x10],xmm1
|
|
2644
|
+
vmovdqu XMMWORD PTR [r9+0x20],xmm2
|
|
2645
|
+
vmovdqu XMMWORD PTR [r9+0x30],xmm3
|
|
2646
|
+
ret
|
|
2647
|
+
.p2align 6
|
|
2648
|
+
2:
|
|
2649
|
+
push rbp
|
|
2650
|
+
mov rbp,rsp
|
|
2651
|
+
sub rsp,0x90
|
|
2652
|
+
and rsp,0xffffffffffffffc0
|
|
2653
|
+
vpbroadcastd zmm0,ecx
|
|
2654
|
+
shr rcx,0x20
|
|
2655
|
+
vpbroadcastd zmm1,ecx
|
|
2656
|
+
vpaddd zmm2,zmm0,ZMMWORD PTR [ADD0+rip]
|
|
2657
|
+
vpcmpltud k1,zmm2,zmm0
|
|
2658
|
+
vpaddd zmm1{k1},zmm1,DWORD PTR [ADD1+rip]{1to16}
|
|
2659
|
+
vmovdqa32 ZMMWORD PTR [rsp],zmm2
|
|
2660
|
+
vmovdqa32 ZMMWORD PTR [rsp+0x40],zmm1
|
|
2661
|
+
cmp r10,0x10
|
|
2662
|
+
jb 2f
|
|
2663
|
+
3:
|
|
2664
|
+
vpbroadcastd zmm16,DWORD PTR [rsi]
|
|
2665
|
+
vpbroadcastd zmm17,DWORD PTR [rsi+0x4]
|
|
2666
|
+
vpbroadcastd zmm18,DWORD PTR [rsi+0x8]
|
|
2667
|
+
vpbroadcastd zmm19,DWORD PTR [rsi+0xc]
|
|
2668
|
+
vpbroadcastd zmm20,DWORD PTR [rsi+0x10]
|
|
2669
|
+
vpbroadcastd zmm21,DWORD PTR [rsi+0x14]
|
|
2670
|
+
vpbroadcastd zmm22,DWORD PTR [rsi+0x18]
|
|
2671
|
+
vpbroadcastd zmm23,DWORD PTR [rsi+0x1c]
|
|
2672
|
+
vpbroadcastd zmm24,DWORD PTR [rsi+0x20]
|
|
2673
|
+
vpbroadcastd zmm25,DWORD PTR [rsi+0x24]
|
|
2674
|
+
vpbroadcastd zmm26,DWORD PTR [rsi+0x28]
|
|
2675
|
+
vpbroadcastd zmm27,DWORD PTR [rsi+0x2c]
|
|
2676
|
+
vpbroadcastd zmm28,DWORD PTR [rsi+0x30]
|
|
2677
|
+
vpbroadcastd zmm29,DWORD PTR [rsi+0x34]
|
|
2678
|
+
vpbroadcastd zmm30,DWORD PTR [rsi+0x38]
|
|
2679
|
+
vpbroadcastd zmm31,DWORD PTR [rsi+0x3c]
|
|
2680
|
+
vpbroadcastd zmm0,DWORD PTR [rdi]
|
|
2681
|
+
vpbroadcastd zmm1,DWORD PTR [rdi+0x4]
|
|
2682
|
+
vpbroadcastd zmm2,DWORD PTR [rdi+0x8]
|
|
2683
|
+
vpbroadcastd zmm3,DWORD PTR [rdi+0xc]
|
|
2684
|
+
vpbroadcastd zmm4,DWORD PTR [rdi+0x10]
|
|
2685
|
+
vpbroadcastd zmm5,DWORD PTR [rdi+0x14]
|
|
2686
|
+
vpbroadcastd zmm6,DWORD PTR [rdi+0x18]
|
|
2687
|
+
vpbroadcastd zmm7,DWORD PTR [rdi+0x1c]
|
|
2688
|
+
vpbroadcastd zmm8,DWORD PTR [BLAKE3_IV_0+rip]
|
|
2689
|
+
vpbroadcastd zmm9,DWORD PTR [BLAKE3_IV_1+rip]
|
|
2690
|
+
vpbroadcastd zmm10,DWORD PTR [BLAKE3_IV_2+rip]
|
|
2691
|
+
vpbroadcastd zmm11,DWORD PTR [BLAKE3_IV_3+rip]
|
|
2692
|
+
vmovdqa32 zmm12,ZMMWORD PTR [rsp]
|
|
2693
|
+
vmovdqa32 zmm13,ZMMWORD PTR [rsp+0x40]
|
|
2694
|
+
vpbroadcastd zmm14,edx
|
|
2695
|
+
vpbroadcastd zmm15,r8d
|
|
2696
|
+
vpaddd zmm0,zmm0,zmm16
|
|
2697
|
+
vpaddd zmm1,zmm1,zmm18
|
|
2698
|
+
vpaddd zmm2,zmm2,zmm20
|
|
2699
|
+
vpaddd zmm3,zmm3,zmm22
|
|
2700
|
+
vpaddd zmm0,zmm0,zmm4
|
|
2701
|
+
vpaddd zmm1,zmm1,zmm5
|
|
2702
|
+
vpaddd zmm2,zmm2,zmm6
|
|
2703
|
+
vpaddd zmm3,zmm3,zmm7
|
|
2704
|
+
vpxord zmm12,zmm12,zmm0
|
|
2705
|
+
vpxord zmm13,zmm13,zmm1
|
|
2706
|
+
vpxord zmm14,zmm14,zmm2
|
|
2707
|
+
vpxord zmm15,zmm15,zmm3
|
|
2708
|
+
vprord zmm12,zmm12,0x10
|
|
2709
|
+
vprord zmm13,zmm13,0x10
|
|
2710
|
+
vprord zmm14,zmm14,0x10
|
|
2711
|
+
vprord zmm15,zmm15,0x10
|
|
2712
|
+
vpaddd zmm8,zmm8,zmm12
|
|
2713
|
+
vpaddd zmm9,zmm9,zmm13
|
|
2714
|
+
vpaddd zmm10,zmm10,zmm14
|
|
2715
|
+
vpaddd zmm11,zmm11,zmm15
|
|
2716
|
+
vpxord zmm4,zmm4,zmm8
|
|
2717
|
+
vpxord zmm5,zmm5,zmm9
|
|
2718
|
+
vpxord zmm6,zmm6,zmm10
|
|
2719
|
+
vpxord zmm7,zmm7,zmm11
|
|
2720
|
+
vprord zmm4,zmm4,0xc
|
|
2721
|
+
vprord zmm5,zmm5,0xc
|
|
2722
|
+
vprord zmm6,zmm6,0xc
|
|
2723
|
+
vprord zmm7,zmm7,0xc
|
|
2724
|
+
vpaddd zmm0,zmm0,zmm17
|
|
2725
|
+
vpaddd zmm1,zmm1,zmm19
|
|
2726
|
+
vpaddd zmm2,zmm2,zmm21
|
|
2727
|
+
vpaddd zmm3,zmm3,zmm23
|
|
2728
|
+
vpaddd zmm0,zmm0,zmm4
|
|
2729
|
+
vpaddd zmm1,zmm1,zmm5
|
|
2730
|
+
vpaddd zmm2,zmm2,zmm6
|
|
2731
|
+
vpaddd zmm3,zmm3,zmm7
|
|
2732
|
+
vpxord zmm12,zmm12,zmm0
|
|
2733
|
+
vpxord zmm13,zmm13,zmm1
|
|
2734
|
+
vpxord zmm14,zmm14,zmm2
|
|
2735
|
+
vpxord zmm15,zmm15,zmm3
|
|
2736
|
+
vprord zmm12,zmm12,0x8
|
|
2737
|
+
vprord zmm13,zmm13,0x8
|
|
2738
|
+
vprord zmm14,zmm14,0x8
|
|
2739
|
+
vprord zmm15,zmm15,0x8
|
|
2740
|
+
vpaddd zmm8,zmm8,zmm12
|
|
2741
|
+
vpaddd zmm9,zmm9,zmm13
|
|
2742
|
+
vpaddd zmm10,zmm10,zmm14
|
|
2743
|
+
vpaddd zmm11,zmm11,zmm15
|
|
2744
|
+
vpxord zmm4,zmm4,zmm8
|
|
2745
|
+
vpxord zmm5,zmm5,zmm9
|
|
2746
|
+
vpxord zmm6,zmm6,zmm10
|
|
2747
|
+
vpxord zmm7,zmm7,zmm11
|
|
2748
|
+
vprord zmm4,zmm4,0x7
|
|
2749
|
+
vprord zmm5,zmm5,0x7
|
|
2750
|
+
vprord zmm6,zmm6,0x7
|
|
2751
|
+
vprord zmm7,zmm7,0x7
|
|
2752
|
+
vpaddd zmm0,zmm0,zmm24
|
|
2753
|
+
vpaddd zmm1,zmm1,zmm26
|
|
2754
|
+
vpaddd zmm2,zmm2,zmm28
|
|
2755
|
+
vpaddd zmm3,zmm3,zmm30
|
|
2756
|
+
vpaddd zmm0,zmm0,zmm5
|
|
2757
|
+
vpaddd zmm1,zmm1,zmm6
|
|
2758
|
+
vpaddd zmm2,zmm2,zmm7
|
|
2759
|
+
vpaddd zmm3,zmm3,zmm4
|
|
2760
|
+
vpxord zmm15,zmm15,zmm0
|
|
2761
|
+
vpxord zmm12,zmm12,zmm1
|
|
2762
|
+
vpxord zmm13,zmm13,zmm2
|
|
2763
|
+
vpxord zmm14,zmm14,zmm3
|
|
2764
|
+
vprord zmm15,zmm15,0x10
|
|
2765
|
+
vprord zmm12,zmm12,0x10
|
|
2766
|
+
vprord zmm13,zmm13,0x10
|
|
2767
|
+
vprord zmm14,zmm14,0x10
|
|
2768
|
+
vpaddd zmm10,zmm10,zmm15
|
|
2769
|
+
vpaddd zmm11,zmm11,zmm12
|
|
2770
|
+
vpaddd zmm8,zmm8,zmm13
|
|
2771
|
+
vpaddd zmm9,zmm9,zmm14
|
|
2772
|
+
vpxord zmm5,zmm5,zmm10
|
|
2773
|
+
vpxord zmm6,zmm6,zmm11
|
|
2774
|
+
vpxord zmm7,zmm7,zmm8
|
|
2775
|
+
vpxord zmm4,zmm4,zmm9
|
|
2776
|
+
vprord zmm5,zmm5,0xc
|
|
2777
|
+
vprord zmm6,zmm6,0xc
|
|
2778
|
+
vprord zmm7,zmm7,0xc
|
|
2779
|
+
vprord zmm4,zmm4,0xc
|
|
2780
|
+
vpaddd zmm0,zmm0,zmm25
|
|
2781
|
+
vpaddd zmm1,zmm1,zmm27
|
|
2782
|
+
vpaddd zmm2,zmm2,zmm29
|
|
2783
|
+
vpaddd zmm3,zmm3,zmm31
|
|
2784
|
+
vpaddd zmm0,zmm0,zmm5
|
|
2785
|
+
vpaddd zmm1,zmm1,zmm6
|
|
2786
|
+
vpaddd zmm2,zmm2,zmm7
|
|
2787
|
+
vpaddd zmm3,zmm3,zmm4
|
|
2788
|
+
vpxord zmm15,zmm15,zmm0
|
|
2789
|
+
vpxord zmm12,zmm12,zmm1
|
|
2790
|
+
vpxord zmm13,zmm13,zmm2
|
|
2791
|
+
vpxord zmm14,zmm14,zmm3
|
|
2792
|
+
vprord zmm15,zmm15,0x8
|
|
2793
|
+
vprord zmm12,zmm12,0x8
|
|
2794
|
+
vprord zmm13,zmm13,0x8
|
|
2795
|
+
vprord zmm14,zmm14,0x8
|
|
2796
|
+
vpaddd zmm10,zmm10,zmm15
|
|
2797
|
+
vpaddd zmm11,zmm11,zmm12
|
|
2798
|
+
vpaddd zmm8,zmm8,zmm13
|
|
2799
|
+
vpaddd zmm9,zmm9,zmm14
|
|
2800
|
+
vpxord zmm5,zmm5,zmm10
|
|
2801
|
+
vpxord zmm6,zmm6,zmm11
|
|
2802
|
+
vpxord zmm7,zmm7,zmm8
|
|
2803
|
+
vpxord zmm4,zmm4,zmm9
|
|
2804
|
+
vprord zmm5,zmm5,0x7
|
|
2805
|
+
vprord zmm6,zmm6,0x7
|
|
2806
|
+
vprord zmm7,zmm7,0x7
|
|
2807
|
+
vprord zmm4,zmm4,0x7
|
|
2808
|
+
vpaddd zmm0,zmm0,zmm18
|
|
2809
|
+
vpaddd zmm1,zmm1,zmm19
|
|
2810
|
+
vpaddd zmm2,zmm2,zmm23
|
|
2811
|
+
vpaddd zmm3,zmm3,zmm20
|
|
2812
|
+
vpaddd zmm0,zmm0,zmm4
|
|
2813
|
+
vpaddd zmm1,zmm1,zmm5
|
|
2814
|
+
vpaddd zmm2,zmm2,zmm6
|
|
2815
|
+
vpaddd zmm3,zmm3,zmm7
|
|
2816
|
+
vpxord zmm12,zmm12,zmm0
|
|
2817
|
+
vpxord zmm13,zmm13,zmm1
|
|
2818
|
+
vpxord zmm14,zmm14,zmm2
|
|
2819
|
+
vpxord zmm15,zmm15,zmm3
|
|
2820
|
+
vprord zmm12,zmm12,0x10
|
|
2821
|
+
vprord zmm13,zmm13,0x10
|
|
2822
|
+
vprord zmm14,zmm14,0x10
|
|
2823
|
+
vprord zmm15,zmm15,0x10
|
|
2824
|
+
vpaddd zmm8,zmm8,zmm12
|
|
2825
|
+
vpaddd zmm9,zmm9,zmm13
|
|
2826
|
+
vpaddd zmm10,zmm10,zmm14
|
|
2827
|
+
vpaddd zmm11,zmm11,zmm15
|
|
2828
|
+
vpxord zmm4,zmm4,zmm8
|
|
2829
|
+
vpxord zmm5,zmm5,zmm9
|
|
2830
|
+
vpxord zmm6,zmm6,zmm10
|
|
2831
|
+
vpxord zmm7,zmm7,zmm11
|
|
2832
|
+
vprord zmm4,zmm4,0xc
|
|
2833
|
+
vprord zmm5,zmm5,0xc
|
|
2834
|
+
vprord zmm6,zmm6,0xc
|
|
2835
|
+
vprord zmm7,zmm7,0xc
|
|
2836
|
+
vpaddd zmm0,zmm0,zmm22
|
|
2837
|
+
vpaddd zmm1,zmm1,zmm26
|
|
2838
|
+
vpaddd zmm2,zmm2,zmm16
|
|
2839
|
+
vpaddd zmm3,zmm3,zmm29
|
|
2840
|
+
vpaddd zmm0,zmm0,zmm4
|
|
2841
|
+
vpaddd zmm1,zmm1,zmm5
|
|
2842
|
+
vpaddd zmm2,zmm2,zmm6
|
|
2843
|
+
vpaddd zmm3,zmm3,zmm7
|
|
2844
|
+
vpxord zmm12,zmm12,zmm0
|
|
2845
|
+
vpxord zmm13,zmm13,zmm1
|
|
2846
|
+
vpxord zmm14,zmm14,zmm2
|
|
2847
|
+
vpxord zmm15,zmm15,zmm3
|
|
2848
|
+
vprord zmm12,zmm12,0x8
|
|
2849
|
+
vprord zmm13,zmm13,0x8
|
|
2850
|
+
vprord zmm14,zmm14,0x8
|
|
2851
|
+
vprord zmm15,zmm15,0x8
|
|
2852
|
+
vpaddd zmm8,zmm8,zmm12
|
|
2853
|
+
vpaddd zmm9,zmm9,zmm13
|
|
2854
|
+
vpaddd zmm10,zmm10,zmm14
|
|
2855
|
+
vpaddd zmm11,zmm11,zmm15
|
|
2856
|
+
vpxord zmm4,zmm4,zmm8
|
|
2857
|
+
vpxord zmm5,zmm5,zmm9
|
|
2858
|
+
vpxord zmm6,zmm6,zmm10
|
|
2859
|
+
vpxord zmm7,zmm7,zmm11
|
|
2860
|
+
vprord zmm4,zmm4,0x7
|
|
2861
|
+
vprord zmm5,zmm5,0x7
|
|
2862
|
+
vprord zmm6,zmm6,0x7
|
|
2863
|
+
vprord zmm7,zmm7,0x7
|
|
2864
|
+
vpaddd zmm0,zmm0,zmm17
|
|
2865
|
+
vpaddd zmm1,zmm1,zmm28
|
|
2866
|
+
vpaddd zmm2,zmm2,zmm25
|
|
2867
|
+
vpaddd zmm3,zmm3,zmm31
|
|
2868
|
+
vpaddd zmm0,zmm0,zmm5
|
|
2869
|
+
vpaddd zmm1,zmm1,zmm6
|
|
2870
|
+
vpaddd zmm2,zmm2,zmm7
|
|
2871
|
+
vpaddd zmm3,zmm3,zmm4
|
|
2872
|
+
vpxord zmm15,zmm15,zmm0
|
|
2873
|
+
vpxord zmm12,zmm12,zmm1
|
|
2874
|
+
vpxord zmm13,zmm13,zmm2
|
|
2875
|
+
vpxord zmm14,zmm14,zmm3
|
|
2876
|
+
vprord zmm15,zmm15,0x10
|
|
2877
|
+
vprord zmm12,zmm12,0x10
|
|
2878
|
+
vprord zmm13,zmm13,0x10
|
|
2879
|
+
vprord zmm14,zmm14,0x10
|
|
2880
|
+
vpaddd zmm10,zmm10,zmm15
|
|
2881
|
+
vpaddd zmm11,zmm11,zmm12
|
|
2882
|
+
vpaddd zmm8,zmm8,zmm13
|
|
2883
|
+
vpaddd zmm9,zmm9,zmm14
|
|
2884
|
+
vpxord zmm5,zmm5,zmm10
|
|
2885
|
+
vpxord zmm6,zmm6,zmm11
|
|
2886
|
+
vpxord zmm7,zmm7,zmm8
|
|
2887
|
+
vpxord zmm4,zmm4,zmm9
|
|
2888
|
+
vprord zmm5,zmm5,0xc
|
|
2889
|
+
vprord zmm6,zmm6,0xc
|
|
2890
|
+
vprord zmm7,zmm7,0xc
|
|
2891
|
+
vprord zmm4,zmm4,0xc
|
|
2892
|
+
vpaddd zmm0,zmm0,zmm27
|
|
2893
|
+
vpaddd zmm1,zmm1,zmm21
|
|
2894
|
+
vpaddd zmm2,zmm2,zmm30
|
|
2895
|
+
vpaddd zmm3,zmm3,zmm24
|
|
2896
|
+
vpaddd zmm0,zmm0,zmm5
|
|
2897
|
+
vpaddd zmm1,zmm1,zmm6
|
|
2898
|
+
vpaddd zmm2,zmm2,zmm7
|
|
2899
|
+
vpaddd zmm3,zmm3,zmm4
|
|
2900
|
+
vpxord zmm15,zmm15,zmm0
|
|
2901
|
+
vpxord zmm12,zmm12,zmm1
|
|
2902
|
+
vpxord zmm13,zmm13,zmm2
|
|
2903
|
+
vpxord zmm14,zmm14,zmm3
|
|
2904
|
+
vprord zmm15,zmm15,0x8
|
|
2905
|
+
vprord zmm12,zmm12,0x8
|
|
2906
|
+
vprord zmm13,zmm13,0x8
|
|
2907
|
+
vprord zmm14,zmm14,0x8
|
|
2908
|
+
vpaddd zmm10,zmm10,zmm15
|
|
2909
|
+
vpaddd zmm11,zmm11,zmm12
|
|
2910
|
+
vpaddd zmm8,zmm8,zmm13
|
|
2911
|
+
vpaddd zmm9,zmm9,zmm14
|
|
2912
|
+
vpxord zmm5,zmm5,zmm10
|
|
2913
|
+
vpxord zmm6,zmm6,zmm11
|
|
2914
|
+
vpxord zmm7,zmm7,zmm8
|
|
2915
|
+
vpxord zmm4,zmm4,zmm9
|
|
2916
|
+
vprord zmm5,zmm5,0x7
|
|
2917
|
+
vprord zmm6,zmm6,0x7
|
|
2918
|
+
vprord zmm7,zmm7,0x7
|
|
2919
|
+
vprord zmm4,zmm4,0x7
|
|
2920
|
+
vpaddd zmm0,zmm0,zmm19
|
|
2921
|
+
vpaddd zmm1,zmm1,zmm26
|
|
2922
|
+
vpaddd zmm2,zmm2,zmm29
|
|
2923
|
+
vpaddd zmm3,zmm3,zmm23
|
|
2924
|
+
vpaddd zmm0,zmm0,zmm4
|
|
2925
|
+
vpaddd zmm1,zmm1,zmm5
|
|
2926
|
+
vpaddd zmm2,zmm2,zmm6
|
|
2927
|
+
vpaddd zmm3,zmm3,zmm7
|
|
2928
|
+
vpxord zmm12,zmm12,zmm0
|
|
2929
|
+
vpxord zmm13,zmm13,zmm1
|
|
2930
|
+
vpxord zmm14,zmm14,zmm2
|
|
2931
|
+
vpxord zmm15,zmm15,zmm3
|
|
2932
|
+
vprord zmm12,zmm12,0x10
|
|
2933
|
+
vprord zmm13,zmm13,0x10
|
|
2934
|
+
vprord zmm14,zmm14,0x10
|
|
2935
|
+
vprord zmm15,zmm15,0x10
|
|
2936
|
+
vpaddd zmm8,zmm8,zmm12
|
|
2937
|
+
vpaddd zmm9,zmm9,zmm13
|
|
2938
|
+
vpaddd zmm10,zmm10,zmm14
|
|
2939
|
+
vpaddd zmm11,zmm11,zmm15
|
|
2940
|
+
vpxord zmm4,zmm4,zmm8
|
|
2941
|
+
vpxord zmm5,zmm5,zmm9
|
|
2942
|
+
vpxord zmm6,zmm6,zmm10
|
|
2943
|
+
vpxord zmm7,zmm7,zmm11
|
|
2944
|
+
vprord zmm4,zmm4,0xc
|
|
2945
|
+
vprord zmm5,zmm5,0xc
|
|
2946
|
+
vprord zmm6,zmm6,0xc
|
|
2947
|
+
vprord zmm7,zmm7,0xc
|
|
2948
|
+
vpaddd zmm0,zmm0,zmm20
|
|
2949
|
+
vpaddd zmm1,zmm1,zmm28
|
|
2950
|
+
vpaddd zmm2,zmm2,zmm18
|
|
2951
|
+
vpaddd zmm3,zmm3,zmm30
|
|
2952
|
+
vpaddd zmm0,zmm0,zmm4
|
|
2953
|
+
vpaddd zmm1,zmm1,zmm5
|
|
2954
|
+
vpaddd zmm2,zmm2,zmm6
|
|
2955
|
+
vpaddd zmm3,zmm3,zmm7
|
|
2956
|
+
vpxord zmm12,zmm12,zmm0
|
|
2957
|
+
vpxord zmm13,zmm13,zmm1
|
|
2958
|
+
vpxord zmm14,zmm14,zmm2
|
|
2959
|
+
vpxord zmm15,zmm15,zmm3
|
|
2960
|
+
vprord zmm12,zmm12,0x8
|
|
2961
|
+
vprord zmm13,zmm13,0x8
|
|
2962
|
+
vprord zmm14,zmm14,0x8
|
|
2963
|
+
vprord zmm15,zmm15,0x8
|
|
2964
|
+
vpaddd zmm8,zmm8,zmm12
|
|
2965
|
+
vpaddd zmm9,zmm9,zmm13
|
|
2966
|
+
vpaddd zmm10,zmm10,zmm14
|
|
2967
|
+
vpaddd zmm11,zmm11,zmm15
|
|
2968
|
+
vpxord zmm4,zmm4,zmm8
|
|
2969
|
+
vpxord zmm5,zmm5,zmm9
|
|
2970
|
+
vpxord zmm6,zmm6,zmm10
|
|
2971
|
+
vpxord zmm7,zmm7,zmm11
|
|
2972
|
+
vprord zmm4,zmm4,0x7
|
|
2973
|
+
vprord zmm5,zmm5,0x7
|
|
2974
|
+
vprord zmm6,zmm6,0x7
|
|
2975
|
+
vprord zmm7,zmm7,0x7
|
|
2976
|
+
vpaddd zmm0,zmm0,zmm22
|
|
2977
|
+
vpaddd zmm1,zmm1,zmm25
|
|
2978
|
+
vpaddd zmm2,zmm2,zmm27
|
|
2979
|
+
vpaddd zmm3,zmm3,zmm24
|
|
2980
|
+
vpaddd zmm0,zmm0,zmm5
|
|
2981
|
+
vpaddd zmm1,zmm1,zmm6
|
|
2982
|
+
vpaddd zmm2,zmm2,zmm7
|
|
2983
|
+
vpaddd zmm3,zmm3,zmm4
|
|
2984
|
+
vpxord zmm15,zmm15,zmm0
|
|
2985
|
+
vpxord zmm12,zmm12,zmm1
|
|
2986
|
+
vpxord zmm13,zmm13,zmm2
|
|
2987
|
+
vpxord zmm14,zmm14,zmm3
|
|
2988
|
+
vprord zmm15,zmm15,0x10
|
|
2989
|
+
vprord zmm12,zmm12,0x10
|
|
2990
|
+
vprord zmm13,zmm13,0x10
|
|
2991
|
+
vprord zmm14,zmm14,0x10
|
|
2992
|
+
vpaddd zmm10,zmm10,zmm15
|
|
2993
|
+
vpaddd zmm11,zmm11,zmm12
|
|
2994
|
+
vpaddd zmm8,zmm8,zmm13
|
|
2995
|
+
vpaddd zmm9,zmm9,zmm14
|
|
2996
|
+
vpxord zmm5,zmm5,zmm10
|
|
2997
|
+
vpxord zmm6,zmm6,zmm11
|
|
2998
|
+
vpxord zmm7,zmm7,zmm8
|
|
2999
|
+
vpxord zmm4,zmm4,zmm9
|
|
3000
|
+
vprord zmm5,zmm5,0xc
|
|
3001
|
+
vprord zmm6,zmm6,0xc
|
|
3002
|
+
vprord zmm7,zmm7,0xc
|
|
3003
|
+
vprord zmm4,zmm4,0xc
|
|
3004
|
+
vpaddd zmm0,zmm0,zmm21
|
|
3005
|
+
vpaddd zmm1,zmm1,zmm16
|
|
3006
|
+
vpaddd zmm2,zmm2,zmm31
|
|
3007
|
+
vpaddd zmm3,zmm3,zmm17
|
|
3008
|
+
vpaddd zmm0,zmm0,zmm5
|
|
3009
|
+
vpaddd zmm1,zmm1,zmm6
|
|
3010
|
+
vpaddd zmm2,zmm2,zmm7
|
|
3011
|
+
vpaddd zmm3,zmm3,zmm4
|
|
3012
|
+
vpxord zmm15,zmm15,zmm0
|
|
3013
|
+
vpxord zmm12,zmm12,zmm1
|
|
3014
|
+
vpxord zmm13,zmm13,zmm2
|
|
3015
|
+
vpxord zmm14,zmm14,zmm3
|
|
3016
|
+
vprord zmm15,zmm15,0x8
|
|
3017
|
+
vprord zmm12,zmm12,0x8
|
|
3018
|
+
vprord zmm13,zmm13,0x8
|
|
3019
|
+
vprord zmm14,zmm14,0x8
|
|
3020
|
+
vpaddd zmm10,zmm10,zmm15
|
|
3021
|
+
vpaddd zmm11,zmm11,zmm12
|
|
3022
|
+
vpaddd zmm8,zmm8,zmm13
|
|
3023
|
+
vpaddd zmm9,zmm9,zmm14
|
|
3024
|
+
vpxord zmm5,zmm5,zmm10
|
|
3025
|
+
vpxord zmm6,zmm6,zmm11
|
|
3026
|
+
vpxord zmm7,zmm7,zmm8
|
|
3027
|
+
vpxord zmm4,zmm4,zmm9
|
|
3028
|
+
vprord zmm5,zmm5,0x7
|
|
3029
|
+
vprord zmm6,zmm6,0x7
|
|
3030
|
+
vprord zmm7,zmm7,0x7
|
|
3031
|
+
vprord zmm4,zmm4,0x7
|
|
3032
|
+
vpaddd zmm0,zmm0,zmm26
|
|
3033
|
+
vpaddd zmm1,zmm1,zmm28
|
|
3034
|
+
vpaddd zmm2,zmm2,zmm30
|
|
3035
|
+
vpaddd zmm3,zmm3,zmm29
|
|
3036
|
+
vpaddd zmm0,zmm0,zmm4
|
|
3037
|
+
vpaddd zmm1,zmm1,zmm5
|
|
3038
|
+
vpaddd zmm2,zmm2,zmm6
|
|
3039
|
+
vpaddd zmm3,zmm3,zmm7
|
|
3040
|
+
vpxord zmm12,zmm12,zmm0
|
|
3041
|
+
vpxord zmm13,zmm13,zmm1
|
|
3042
|
+
vpxord zmm14,zmm14,zmm2
|
|
3043
|
+
vpxord zmm15,zmm15,zmm3
|
|
3044
|
+
vprord zmm12,zmm12,0x10
|
|
3045
|
+
vprord zmm13,zmm13,0x10
|
|
3046
|
+
vprord zmm14,zmm14,0x10
|
|
3047
|
+
vprord zmm15,zmm15,0x10
|
|
3048
|
+
vpaddd zmm8,zmm8,zmm12
|
|
3049
|
+
vpaddd zmm9,zmm9,zmm13
|
|
3050
|
+
vpaddd zmm10,zmm10,zmm14
|
|
3051
|
+
vpaddd zmm11,zmm11,zmm15
|
|
3052
|
+
vpxord zmm4,zmm4,zmm8
|
|
3053
|
+
vpxord zmm5,zmm5,zmm9
|
|
3054
|
+
vpxord zmm6,zmm6,zmm10
|
|
3055
|
+
vpxord zmm7,zmm7,zmm11
|
|
3056
|
+
vprord zmm4,zmm4,0xc
|
|
3057
|
+
vprord zmm5,zmm5,0xc
|
|
3058
|
+
vprord zmm6,zmm6,0xc
|
|
3059
|
+
vprord zmm7,zmm7,0xc
|
|
3060
|
+
vpaddd zmm0,zmm0,zmm23
|
|
3061
|
+
vpaddd zmm1,zmm1,zmm25
|
|
3062
|
+
vpaddd zmm2,zmm2,zmm19
|
|
3063
|
+
vpaddd zmm3,zmm3,zmm31
|
|
3064
|
+
vpaddd zmm0,zmm0,zmm4
|
|
3065
|
+
vpaddd zmm1,zmm1,zmm5
|
|
3066
|
+
vpaddd zmm2,zmm2,zmm6
|
|
3067
|
+
vpaddd zmm3,zmm3,zmm7
|
|
3068
|
+
vpxord zmm12,zmm12,zmm0
|
|
3069
|
+
vpxord zmm13,zmm13,zmm1
|
|
3070
|
+
vpxord zmm14,zmm14,zmm2
|
|
3071
|
+
vpxord zmm15,zmm15,zmm3
|
|
3072
|
+
vprord zmm12,zmm12,0x8
|
|
3073
|
+
vprord zmm13,zmm13,0x8
|
|
3074
|
+
vprord zmm14,zmm14,0x8
|
|
3075
|
+
vprord zmm15,zmm15,0x8
|
|
3076
|
+
vpaddd zmm8,zmm8,zmm12
|
|
3077
|
+
vpaddd zmm9,zmm9,zmm13
|
|
3078
|
+
vpaddd zmm10,zmm10,zmm14
|
|
3079
|
+
vpaddd zmm11,zmm11,zmm15
|
|
3080
|
+
vpxord zmm4,zmm4,zmm8
|
|
3081
|
+
vpxord zmm5,zmm5,zmm9
|
|
3082
|
+
vpxord zmm6,zmm6,zmm10
|
|
3083
|
+
vpxord zmm7,zmm7,zmm11
|
|
3084
|
+
vprord zmm4,zmm4,0x7
|
|
3085
|
+
vprord zmm5,zmm5,0x7
|
|
3086
|
+
vprord zmm6,zmm6,0x7
|
|
3087
|
+
vprord zmm7,zmm7,0x7
|
|
3088
|
+
vpaddd zmm0,zmm0,zmm20
|
|
3089
|
+
vpaddd zmm1,zmm1,zmm27
|
|
3090
|
+
vpaddd zmm2,zmm2,zmm21
|
|
3091
|
+
vpaddd zmm3,zmm3,zmm17
|
|
3092
|
+
vpaddd zmm0,zmm0,zmm5
|
|
3093
|
+
vpaddd zmm1,zmm1,zmm6
|
|
3094
|
+
vpaddd zmm2,zmm2,zmm7
|
|
3095
|
+
vpaddd zmm3,zmm3,zmm4
|
|
3096
|
+
vpxord zmm15,zmm15,zmm0
|
|
3097
|
+
vpxord zmm12,zmm12,zmm1
|
|
3098
|
+
vpxord zmm13,zmm13,zmm2
|
|
3099
|
+
vpxord zmm14,zmm14,zmm3
|
|
3100
|
+
vprord zmm15,zmm15,0x10
|
|
3101
|
+
vprord zmm12,zmm12,0x10
|
|
3102
|
+
vprord zmm13,zmm13,0x10
|
|
3103
|
+
vprord zmm14,zmm14,0x10
|
|
3104
|
+
vpaddd zmm10,zmm10,zmm15
|
|
3105
|
+
vpaddd zmm11,zmm11,zmm12
|
|
3106
|
+
vpaddd zmm8,zmm8,zmm13
|
|
3107
|
+
vpaddd zmm9,zmm9,zmm14
|
|
3108
|
+
vpxord zmm5,zmm5,zmm10
|
|
3109
|
+
vpxord zmm6,zmm6,zmm11
|
|
3110
|
+
vpxord zmm7,zmm7,zmm8
|
|
3111
|
+
vpxord zmm4,zmm4,zmm9
|
|
3112
|
+
vprord zmm5,zmm5,0xc
|
|
3113
|
+
vprord zmm6,zmm6,0xc
|
|
3114
|
+
vprord zmm7,zmm7,0xc
|
|
3115
|
+
vprord zmm4,zmm4,0xc
|
|
3116
|
+
vpaddd zmm0,zmm0,zmm16
|
|
3117
|
+
vpaddd zmm1,zmm1,zmm18
|
|
3118
|
+
vpaddd zmm2,zmm2,zmm24
|
|
3119
|
+
vpaddd zmm3,zmm3,zmm22
|
|
3120
|
+
vpaddd zmm0,zmm0,zmm5
|
|
3121
|
+
vpaddd zmm1,zmm1,zmm6
|
|
3122
|
+
vpaddd zmm2,zmm2,zmm7
|
|
3123
|
+
vpaddd zmm3,zmm3,zmm4
|
|
3124
|
+
vpxord zmm15,zmm15,zmm0
|
|
3125
|
+
vpxord zmm12,zmm12,zmm1
|
|
3126
|
+
vpxord zmm13,zmm13,zmm2
|
|
3127
|
+
vpxord zmm14,zmm14,zmm3
|
|
3128
|
+
vprord zmm15,zmm15,0x8
|
|
3129
|
+
vprord zmm12,zmm12,0x8
|
|
3130
|
+
vprord zmm13,zmm13,0x8
|
|
3131
|
+
vprord zmm14,zmm14,0x8
|
|
3132
|
+
vpaddd zmm10,zmm10,zmm15
|
|
3133
|
+
vpaddd zmm11,zmm11,zmm12
|
|
3134
|
+
vpaddd zmm8,zmm8,zmm13
|
|
3135
|
+
vpaddd zmm9,zmm9,zmm14
|
|
3136
|
+
vpxord zmm5,zmm5,zmm10
|
|
3137
|
+
vpxord zmm6,zmm6,zmm11
|
|
3138
|
+
vpxord zmm7,zmm7,zmm8
|
|
3139
|
+
vpxord zmm4,zmm4,zmm9
|
|
3140
|
+
vprord zmm5,zmm5,0x7
|
|
3141
|
+
vprord zmm6,zmm6,0x7
|
|
3142
|
+
vprord zmm7,zmm7,0x7
|
|
3143
|
+
vprord zmm4,zmm4,0x7
|
|
3144
|
+
vpaddd zmm0,zmm0,zmm28
|
|
3145
|
+
vpaddd zmm1,zmm1,zmm25
|
|
3146
|
+
vpaddd zmm2,zmm2,zmm31
|
|
3147
|
+
vpaddd zmm3,zmm3,zmm30
|
|
3148
|
+
vpaddd zmm0,zmm0,zmm4
|
|
3149
|
+
vpaddd zmm1,zmm1,zmm5
|
|
3150
|
+
vpaddd zmm2,zmm2,zmm6
|
|
3151
|
+
vpaddd zmm3,zmm3,zmm7
|
|
3152
|
+
vpxord zmm12,zmm12,zmm0
|
|
3153
|
+
vpxord zmm13,zmm13,zmm1
|
|
3154
|
+
vpxord zmm14,zmm14,zmm2
|
|
3155
|
+
vpxord zmm15,zmm15,zmm3
|
|
3156
|
+
vprord zmm12,zmm12,0x10
|
|
3157
|
+
vprord zmm13,zmm13,0x10
|
|
3158
|
+
vprord zmm14,zmm14,0x10
|
|
3159
|
+
vprord zmm15,zmm15,0x10
|
|
3160
|
+
vpaddd zmm8,zmm8,zmm12
|
|
3161
|
+
vpaddd zmm9,zmm9,zmm13
|
|
3162
|
+
vpaddd zmm10,zmm10,zmm14
|
|
3163
|
+
vpaddd zmm11,zmm11,zmm15
|
|
3164
|
+
vpxord zmm4,zmm4,zmm8
|
|
3165
|
+
vpxord zmm5,zmm5,zmm9
|
|
3166
|
+
vpxord zmm6,zmm6,zmm10
|
|
3167
|
+
vpxord zmm7,zmm7,zmm11
|
|
3168
|
+
vprord zmm4,zmm4,0xc
|
|
3169
|
+
vprord zmm5,zmm5,0xc
|
|
3170
|
+
vprord zmm6,zmm6,0xc
|
|
3171
|
+
vprord zmm7,zmm7,0xc
|
|
3172
|
+
vpaddd zmm0,zmm0,zmm29
|
|
3173
|
+
vpaddd zmm1,zmm1,zmm27
|
|
3174
|
+
vpaddd zmm2,zmm2,zmm26
|
|
3175
|
+
vpaddd zmm3,zmm3,zmm24
|
|
3176
|
+
vpaddd zmm0,zmm0,zmm4
|
|
3177
|
+
vpaddd zmm1,zmm1,zmm5
|
|
3178
|
+
vpaddd zmm2,zmm2,zmm6
|
|
3179
|
+
vpaddd zmm3,zmm3,zmm7
|
|
3180
|
+
vpxord zmm12,zmm12,zmm0
|
|
3181
|
+
vpxord zmm13,zmm13,zmm1
|
|
3182
|
+
vpxord zmm14,zmm14,zmm2
|
|
3183
|
+
vpxord zmm15,zmm15,zmm3
|
|
3184
|
+
vprord zmm12,zmm12,0x8
|
|
3185
|
+
vprord zmm13,zmm13,0x8
|
|
3186
|
+
vprord zmm14,zmm14,0x8
|
|
3187
|
+
vprord zmm15,zmm15,0x8
|
|
3188
|
+
vpaddd zmm8,zmm8,zmm12
|
|
3189
|
+
vpaddd zmm9,zmm9,zmm13
|
|
3190
|
+
vpaddd zmm10,zmm10,zmm14
|
|
3191
|
+
vpaddd zmm11,zmm11,zmm15
|
|
3192
|
+
vpxord zmm4,zmm4,zmm8
|
|
3193
|
+
vpxord zmm5,zmm5,zmm9
|
|
3194
|
+
vpxord zmm6,zmm6,zmm10
|
|
3195
|
+
vpxord zmm7,zmm7,zmm11
|
|
3196
|
+
vprord zmm4,zmm4,0x7
|
|
3197
|
+
vprord zmm5,zmm5,0x7
|
|
3198
|
+
vprord zmm6,zmm6,0x7
|
|
3199
|
+
vprord zmm7,zmm7,0x7
|
|
3200
|
+
vpaddd zmm0,zmm0,zmm23
|
|
3201
|
+
vpaddd zmm1,zmm1,zmm21
|
|
3202
|
+
vpaddd zmm2,zmm2,zmm16
|
|
3203
|
+
vpaddd zmm3,zmm3,zmm22
|
|
3204
|
+
vpaddd zmm0,zmm0,zmm5
|
|
3205
|
+
vpaddd zmm1,zmm1,zmm6
|
|
3206
|
+
vpaddd zmm2,zmm2,zmm7
|
|
3207
|
+
vpaddd zmm3,zmm3,zmm4
|
|
3208
|
+
vpxord zmm15,zmm15,zmm0
|
|
3209
|
+
vpxord zmm12,zmm12,zmm1
|
|
3210
|
+
vpxord zmm13,zmm13,zmm2
|
|
3211
|
+
vpxord zmm14,zmm14,zmm3
|
|
3212
|
+
vprord zmm15,zmm15,0x10
|
|
3213
|
+
vprord zmm12,zmm12,0x10
|
|
3214
|
+
vprord zmm13,zmm13,0x10
|
|
3215
|
+
vprord zmm14,zmm14,0x10
|
|
3216
|
+
vpaddd zmm10,zmm10,zmm15
|
|
3217
|
+
vpaddd zmm11,zmm11,zmm12
|
|
3218
|
+
vpaddd zmm8,zmm8,zmm13
|
|
3219
|
+
vpaddd zmm9,zmm9,zmm14
|
|
3220
|
+
vpxord zmm5,zmm5,zmm10
|
|
3221
|
+
vpxord zmm6,zmm6,zmm11
|
|
3222
|
+
vpxord zmm7,zmm7,zmm8
|
|
3223
|
+
vpxord zmm4,zmm4,zmm9
|
|
3224
|
+
vprord zmm5,zmm5,0xc
|
|
3225
|
+
vprord zmm6,zmm6,0xc
|
|
3226
|
+
vprord zmm7,zmm7,0xc
|
|
3227
|
+
vprord zmm4,zmm4,0xc
|
|
3228
|
+
vpaddd zmm0,zmm0,zmm18
|
|
3229
|
+
vpaddd zmm1,zmm1,zmm19
|
|
3230
|
+
vpaddd zmm2,zmm2,zmm17
|
|
3231
|
+
vpaddd zmm3,zmm3,zmm20
|
|
3232
|
+
vpaddd zmm0,zmm0,zmm5
|
|
3233
|
+
vpaddd zmm1,zmm1,zmm6
|
|
3234
|
+
vpaddd zmm2,zmm2,zmm7
|
|
3235
|
+
vpaddd zmm3,zmm3,zmm4
|
|
3236
|
+
vpxord zmm15,zmm15,zmm0
|
|
3237
|
+
vpxord zmm12,zmm12,zmm1
|
|
3238
|
+
vpxord zmm13,zmm13,zmm2
|
|
3239
|
+
vpxord zmm14,zmm14,zmm3
|
|
3240
|
+
vprord zmm15,zmm15,0x8
|
|
3241
|
+
vprord zmm12,zmm12,0x8
|
|
3242
|
+
vprord zmm13,zmm13,0x8
|
|
3243
|
+
vprord zmm14,zmm14,0x8
|
|
3244
|
+
vpaddd zmm10,zmm10,zmm15
|
|
3245
|
+
vpaddd zmm11,zmm11,zmm12
|
|
3246
|
+
vpaddd zmm8,zmm8,zmm13
|
|
3247
|
+
vpaddd zmm9,zmm9,zmm14
|
|
3248
|
+
vpxord zmm5,zmm5,zmm10
|
|
3249
|
+
vpxord zmm6,zmm6,zmm11
|
|
3250
|
+
vpxord zmm7,zmm7,zmm8
|
|
3251
|
+
vpxord zmm4,zmm4,zmm9
|
|
3252
|
+
vprord zmm5,zmm5,0x7
|
|
3253
|
+
vprord zmm6,zmm6,0x7
|
|
3254
|
+
vprord zmm7,zmm7,0x7
|
|
3255
|
+
vprord zmm4,zmm4,0x7
|
|
3256
|
+
vpaddd zmm0,zmm0,zmm25
|
|
3257
|
+
vpaddd zmm1,zmm1,zmm27
|
|
3258
|
+
vpaddd zmm2,zmm2,zmm24
|
|
3259
|
+
vpaddd zmm3,zmm3,zmm31
|
|
3260
|
+
vpaddd zmm0,zmm0,zmm4
|
|
3261
|
+
vpaddd zmm1,zmm1,zmm5
|
|
3262
|
+
vpaddd zmm2,zmm2,zmm6
|
|
3263
|
+
vpaddd zmm3,zmm3,zmm7
|
|
3264
|
+
vpxord zmm12,zmm12,zmm0
|
|
3265
|
+
vpxord zmm13,zmm13,zmm1
|
|
3266
|
+
vpxord zmm14,zmm14,zmm2
|
|
3267
|
+
vpxord zmm15,zmm15,zmm3
|
|
3268
|
+
vprord zmm12,zmm12,0x10
|
|
3269
|
+
vprord zmm13,zmm13,0x10
|
|
3270
|
+
vprord zmm14,zmm14,0x10
|
|
3271
|
+
vprord zmm15,zmm15,0x10
|
|
3272
|
+
vpaddd zmm8,zmm8,zmm12
|
|
3273
|
+
vpaddd zmm9,zmm9,zmm13
|
|
3274
|
+
vpaddd zmm10,zmm10,zmm14
|
|
3275
|
+
vpaddd zmm11,zmm11,zmm15
|
|
3276
|
+
vpxord zmm4,zmm4,zmm8
|
|
3277
|
+
vpxord zmm5,zmm5,zmm9
|
|
3278
|
+
vpxord zmm6,zmm6,zmm10
|
|
3279
|
+
vpxord zmm7,zmm7,zmm11
|
|
3280
|
+
vprord zmm4,zmm4,0xc
|
|
3281
|
+
vprord zmm5,zmm5,0xc
|
|
3282
|
+
vprord zmm6,zmm6,0xc
|
|
3283
|
+
vprord zmm7,zmm7,0xc
|
|
3284
|
+
vpaddd zmm0,zmm0,zmm30
|
|
3285
|
+
vpaddd zmm1,zmm1,zmm21
|
|
3286
|
+
vpaddd zmm2,zmm2,zmm28
|
|
3287
|
+
vpaddd zmm3,zmm3,zmm17
|
|
3288
|
+
vpaddd zmm0,zmm0,zmm4
|
|
3289
|
+
vpaddd zmm1,zmm1,zmm5
|
|
3290
|
+
vpaddd zmm2,zmm2,zmm6
|
|
3291
|
+
vpaddd zmm3,zmm3,zmm7
|
|
3292
|
+
vpxord zmm12,zmm12,zmm0
|
|
3293
|
+
vpxord zmm13,zmm13,zmm1
|
|
3294
|
+
vpxord zmm14,zmm14,zmm2
|
|
3295
|
+
vpxord zmm15,zmm15,zmm3
|
|
3296
|
+
vprord zmm12,zmm12,0x8
|
|
3297
|
+
vprord zmm13,zmm13,0x8
|
|
3298
|
+
vprord zmm14,zmm14,0x8
|
|
3299
|
+
vprord zmm15,zmm15,0x8
|
|
3300
|
+
vpaddd zmm8,zmm8,zmm12
|
|
3301
|
+
vpaddd zmm9,zmm9,zmm13
|
|
3302
|
+
vpaddd zmm10,zmm10,zmm14
|
|
3303
|
+
vpaddd zmm11,zmm11,zmm15
|
|
3304
|
+
vpxord zmm4,zmm4,zmm8
|
|
3305
|
+
vpxord zmm5,zmm5,zmm9
|
|
3306
|
+
vpxord zmm6,zmm6,zmm10
|
|
3307
|
+
vpxord zmm7,zmm7,zmm11
|
|
3308
|
+
vprord zmm4,zmm4,0x7
|
|
3309
|
+
vprord zmm5,zmm5,0x7
|
|
3310
|
+
vprord zmm6,zmm6,0x7
|
|
3311
|
+
vprord zmm7,zmm7,0x7
|
|
3312
|
+
vpaddd zmm0,zmm0,zmm29
|
|
3313
|
+
vpaddd zmm1,zmm1,zmm16
|
|
3314
|
+
vpaddd zmm2,zmm2,zmm18
|
|
3315
|
+
vpaddd zmm3,zmm3,zmm20
|
|
3316
|
+
vpaddd zmm0,zmm0,zmm5
|
|
3317
|
+
vpaddd zmm1,zmm1,zmm6
|
|
3318
|
+
vpaddd zmm2,zmm2,zmm7
|
|
3319
|
+
vpaddd zmm3,zmm3,zmm4
|
|
3320
|
+
vpxord zmm15,zmm15,zmm0
|
|
3321
|
+
vpxord zmm12,zmm12,zmm1
|
|
3322
|
+
vpxord zmm13,zmm13,zmm2
|
|
3323
|
+
vpxord zmm14,zmm14,zmm3
|
|
3324
|
+
vprord zmm15,zmm15,0x10
|
|
3325
|
+
vprord zmm12,zmm12,0x10
|
|
3326
|
+
vprord zmm13,zmm13,0x10
|
|
3327
|
+
vprord zmm14,zmm14,0x10
|
|
3328
|
+
vpaddd zmm10,zmm10,zmm15
|
|
3329
|
+
vpaddd zmm11,zmm11,zmm12
|
|
3330
|
+
vpaddd zmm8,zmm8,zmm13
|
|
3331
|
+
vpaddd zmm9,zmm9,zmm14
|
|
3332
|
+
vpxord zmm5,zmm5,zmm10
|
|
3333
|
+
vpxord zmm6,zmm6,zmm11
|
|
3334
|
+
vpxord zmm7,zmm7,zmm8
|
|
3335
|
+
vpxord zmm4,zmm4,zmm9
|
|
3336
|
+
vprord zmm5,zmm5,0xc
|
|
3337
|
+
vprord zmm6,zmm6,0xc
|
|
3338
|
+
vprord zmm7,zmm7,0xc
|
|
3339
|
+
vprord zmm4,zmm4,0xc
|
|
3340
|
+
vpaddd zmm0,zmm0,zmm19
|
|
3341
|
+
vpaddd zmm1,zmm1,zmm26
|
|
3342
|
+
vpaddd zmm2,zmm2,zmm22
|
|
3343
|
+
vpaddd zmm3,zmm3,zmm23
|
|
3344
|
+
vpaddd zmm0,zmm0,zmm5
|
|
3345
|
+
vpaddd zmm1,zmm1,zmm6
|
|
3346
|
+
vpaddd zmm2,zmm2,zmm7
|
|
3347
|
+
vpaddd zmm3,zmm3,zmm4
|
|
3348
|
+
vpxord zmm15,zmm15,zmm0
|
|
3349
|
+
vpxord zmm12,zmm12,zmm1
|
|
3350
|
+
vpxord zmm13,zmm13,zmm2
|
|
3351
|
+
vpxord zmm14,zmm14,zmm3
|
|
3352
|
+
vprord zmm15,zmm15,0x8
|
|
3353
|
+
vprord zmm12,zmm12,0x8
|
|
3354
|
+
vprord zmm13,zmm13,0x8
|
|
3355
|
+
vprord zmm14,zmm14,0x8
|
|
3356
|
+
vpaddd zmm10,zmm10,zmm15
|
|
3357
|
+
vpaddd zmm11,zmm11,zmm12
|
|
3358
|
+
vpaddd zmm8,zmm8,zmm13
|
|
3359
|
+
vpaddd zmm9,zmm9,zmm14
|
|
3360
|
+
vpxord zmm5,zmm5,zmm10
|
|
3361
|
+
vpxord zmm6,zmm6,zmm11
|
|
3362
|
+
vpxord zmm7,zmm7,zmm8
|
|
3363
|
+
vpxord zmm4,zmm4,zmm9
|
|
3364
|
+
vprord zmm5,zmm5,0x7
|
|
3365
|
+
vprord zmm6,zmm6,0x7
|
|
3366
|
+
vprord zmm7,zmm7,0x7
|
|
3367
|
+
vprord zmm4,zmm4,0x7
|
|
3368
|
+
vpaddd zmm0,zmm0,zmm27
|
|
3369
|
+
vpaddd zmm1,zmm1,zmm21
|
|
3370
|
+
vpaddd zmm2,zmm2,zmm17
|
|
3371
|
+
vpaddd zmm3,zmm3,zmm24
|
|
3372
|
+
vpaddd zmm0,zmm0,zmm4
|
|
3373
|
+
vpaddd zmm1,zmm1,zmm5
|
|
3374
|
+
vpaddd zmm2,zmm2,zmm6
|
|
3375
|
+
vpaddd zmm3,zmm3,zmm7
|
|
3376
|
+
vpxord zmm12,zmm12,zmm0
|
|
3377
|
+
vpxord zmm13,zmm13,zmm1
|
|
3378
|
+
vpxord zmm14,zmm14,zmm2
|
|
3379
|
+
vpxord zmm15,zmm15,zmm3
|
|
3380
|
+
vprord zmm12,zmm12,0x10
|
|
3381
|
+
vprord zmm13,zmm13,0x10
|
|
3382
|
+
vprord zmm14,zmm14,0x10
|
|
3383
|
+
vprord zmm15,zmm15,0x10
|
|
3384
|
+
vpaddd zmm8,zmm8,zmm12
|
|
3385
|
+
vpaddd zmm9,zmm9,zmm13
|
|
3386
|
+
vpaddd zmm10,zmm10,zmm14
|
|
3387
|
+
vpaddd zmm11,zmm11,zmm15
|
|
3388
|
+
vpxord zmm4,zmm4,zmm8
|
|
3389
|
+
vpxord zmm5,zmm5,zmm9
|
|
3390
|
+
vpxord zmm6,zmm6,zmm10
|
|
3391
|
+
vpxord zmm7,zmm7,zmm11
|
|
3392
|
+
vprord zmm4,zmm4,0xc
|
|
3393
|
+
vprord zmm5,zmm5,0xc
|
|
3394
|
+
vprord zmm6,zmm6,0xc
|
|
3395
|
+
vprord zmm7,zmm7,0xc
|
|
3396
|
+
vpaddd zmm0,zmm0,zmm31
|
|
3397
|
+
vpaddd zmm1,zmm1,zmm16
|
|
3398
|
+
vpaddd zmm2,zmm2,zmm25
|
|
3399
|
+
vpaddd zmm3,zmm3,zmm22
|
|
3400
|
+
vpaddd zmm0,zmm0,zmm4
|
|
3401
|
+
vpaddd zmm1,zmm1,zmm5
|
|
3402
|
+
vpaddd zmm2,zmm2,zmm6
|
|
3403
|
+
vpaddd zmm3,zmm3,zmm7
|
|
3404
|
+
vpxord zmm12,zmm12,zmm0
|
|
3405
|
+
vpxord zmm13,zmm13,zmm1
|
|
3406
|
+
vpxord zmm14,zmm14,zmm2
|
|
3407
|
+
vpxord zmm15,zmm15,zmm3
|
|
3408
|
+
vprord zmm12,zmm12,0x8
|
|
3409
|
+
vprord zmm13,zmm13,0x8
|
|
3410
|
+
vprord zmm14,zmm14,0x8
|
|
3411
|
+
vprord zmm15,zmm15,0x8
|
|
3412
|
+
vpaddd zmm8,zmm8,zmm12
|
|
3413
|
+
vpaddd zmm9,zmm9,zmm13
|
|
3414
|
+
vpaddd zmm10,zmm10,zmm14
|
|
3415
|
+
vpaddd zmm11,zmm11,zmm15
|
|
3416
|
+
vpxord zmm4,zmm4,zmm8
|
|
3417
|
+
vpxord zmm5,zmm5,zmm9
|
|
3418
|
+
vpxord zmm6,zmm6,zmm10
|
|
3419
|
+
vpxord zmm7,zmm7,zmm11
|
|
3420
|
+
vprord zmm4,zmm4,0x7
|
|
3421
|
+
vprord zmm5,zmm5,0x7
|
|
3422
|
+
vprord zmm6,zmm6,0x7
|
|
3423
|
+
vprord zmm7,zmm7,0x7
|
|
3424
|
+
vpaddd zmm0,zmm0,zmm30
|
|
3425
|
+
vpaddd zmm1,zmm1,zmm18
|
|
3426
|
+
vpaddd zmm2,zmm2,zmm19
|
|
3427
|
+
vpaddd zmm3,zmm3,zmm23
|
|
3428
|
+
vpaddd zmm0,zmm0,zmm5
|
|
3429
|
+
vpaddd zmm1,zmm1,zmm6
|
|
3430
|
+
vpaddd zmm2,zmm2,zmm7
|
|
3431
|
+
vpaddd zmm3,zmm3,zmm4
|
|
3432
|
+
vpxord zmm15,zmm15,zmm0
|
|
3433
|
+
vpxord zmm12,zmm12,zmm1
|
|
3434
|
+
vpxord zmm13,zmm13,zmm2
|
|
3435
|
+
vpxord zmm14,zmm14,zmm3
|
|
3436
|
+
vprord zmm15,zmm15,0x10
|
|
3437
|
+
vprord zmm12,zmm12,0x10
|
|
3438
|
+
vprord zmm13,zmm13,0x10
|
|
3439
|
+
vprord zmm14,zmm14,0x10
|
|
3440
|
+
vpaddd zmm10,zmm10,zmm15
|
|
3441
|
+
vpaddd zmm11,zmm11,zmm12
|
|
3442
|
+
vpaddd zmm8,zmm8,zmm13
|
|
3443
|
+
vpaddd zmm9,zmm9,zmm14
|
|
3444
|
+
vpxord zmm5,zmm5,zmm10
|
|
3445
|
+
vpxord zmm6,zmm6,zmm11
|
|
3446
|
+
vpxord zmm7,zmm7,zmm8
|
|
3447
|
+
vpxord zmm4,zmm4,zmm9
|
|
3448
|
+
vprord zmm5,zmm5,0xc
|
|
3449
|
+
vprord zmm6,zmm6,0xc
|
|
3450
|
+
vprord zmm7,zmm7,0xc
|
|
3451
|
+
vprord zmm4,zmm4,0xc
|
|
3452
|
+
vpaddd zmm0,zmm0,zmm26
|
|
3453
|
+
vpaddd zmm1,zmm1,zmm28
|
|
3454
|
+
vpaddd zmm2,zmm2,zmm20
|
|
3455
|
+
vpaddd zmm3,zmm3,zmm29
|
|
3456
|
+
vpaddd zmm0,zmm0,zmm5
|
|
3457
|
+
vpaddd zmm1,zmm1,zmm6
|
|
3458
|
+
vpaddd zmm2,zmm2,zmm7
|
|
3459
|
+
vpaddd zmm3,zmm3,zmm4
|
|
3460
|
+
vpxord zmm15,zmm15,zmm0
|
|
3461
|
+
vpxord zmm12,zmm12,zmm1
|
|
3462
|
+
vpxord zmm13,zmm13,zmm2
|
|
3463
|
+
vpxord zmm14,zmm14,zmm3
|
|
3464
|
+
vprord zmm15,zmm15,0x8
|
|
3465
|
+
vprord zmm12,zmm12,0x8
|
|
3466
|
+
vprord zmm13,zmm13,0x8
|
|
3467
|
+
vprord zmm14,zmm14,0x8
|
|
3468
|
+
vpaddd zmm10,zmm10,zmm15
|
|
3469
|
+
vpaddd zmm11,zmm11,zmm12
|
|
3470
|
+
vpaddd zmm8,zmm8,zmm13
|
|
3471
|
+
vpaddd zmm9,zmm9,zmm14
|
|
3472
|
+
vpxord zmm5,zmm5,zmm10
|
|
3473
|
+
vpxord zmm6,zmm6,zmm11
|
|
3474
|
+
vpxord zmm7,zmm7,zmm8
|
|
3475
|
+
vpxord zmm4,zmm4,zmm9
|
|
3476
|
+
vprord zmm5,zmm5,0x7
|
|
3477
|
+
vprord zmm6,zmm6,0x7
|
|
3478
|
+
vprord zmm7,zmm7,0x7
|
|
3479
|
+
vprord zmm4,zmm4,0x7
|
|
3480
|
+
vpxord zmm0,zmm0,zmm8
|
|
3481
|
+
vpxord zmm1,zmm1,zmm9
|
|
3482
|
+
vpxord zmm2,zmm2,zmm10
|
|
3483
|
+
vpxord zmm3,zmm3,zmm11
|
|
3484
|
+
vpxord zmm4,zmm4,zmm12
|
|
3485
|
+
vpxord zmm5,zmm5,zmm13
|
|
3486
|
+
vpxord zmm6,zmm6,zmm14
|
|
3487
|
+
vpxord zmm7,zmm7,zmm15
|
|
3488
|
+
vpxord zmm8,zmm8,DWORD PTR [rdi]{1to16}
|
|
3489
|
+
vpxord zmm9,zmm9,DWORD PTR [rdi+0x4]{1to16}
|
|
3490
|
+
vpxord zmm10,zmm10,DWORD PTR [rdi+0x8]{1to16}
|
|
3491
|
+
vpxord zmm11,zmm11,DWORD PTR [rdi+0xc]{1to16}
|
|
3492
|
+
vpxord zmm12,zmm12,DWORD PTR [rdi+0x10]{1to16}
|
|
3493
|
+
vpxord zmm13,zmm13,DWORD PTR [rdi+0x14]{1to16}
|
|
3494
|
+
vpxord zmm14,zmm14,DWORD PTR [rdi+0x18]{1to16}
|
|
3495
|
+
vpxord zmm15,zmm15,DWORD PTR [rdi+0x1c]{1to16}
|
|
3496
|
+
vpunpckldq zmm16,zmm0,zmm1
|
|
3497
|
+
vpunpckhdq zmm17,zmm0,zmm1
|
|
3498
|
+
vpunpckldq zmm18,zmm2,zmm3
|
|
3499
|
+
vpunpckhdq zmm19,zmm2,zmm3
|
|
3500
|
+
vpunpckldq zmm20,zmm4,zmm5
|
|
3501
|
+
vpunpckhdq zmm21,zmm4,zmm5
|
|
3502
|
+
vpunpckldq zmm22,zmm6,zmm7
|
|
3503
|
+
vpunpckhdq zmm23,zmm6,zmm7
|
|
3504
|
+
vpunpckldq zmm24,zmm8,zmm9
|
|
3505
|
+
vpunpckhdq zmm25,zmm8,zmm9
|
|
3506
|
+
vpunpckldq zmm26,zmm10,zmm11
|
|
3507
|
+
vpunpckhdq zmm27,zmm10,zmm11
|
|
3508
|
+
vpunpckldq zmm28,zmm12,zmm13
|
|
3509
|
+
vpunpckhdq zmm29,zmm12,zmm13
|
|
3510
|
+
vpunpckldq zmm30,zmm14,zmm15
|
|
3511
|
+
vpunpckhdq zmm31,zmm14,zmm15
|
|
3512
|
+
vpunpcklqdq zmm0,zmm16,zmm18
|
|
3513
|
+
vpunpckhqdq zmm1,zmm16,zmm18
|
|
3514
|
+
vpunpcklqdq zmm2,zmm17,zmm19
|
|
3515
|
+
vpunpckhqdq zmm3,zmm17,zmm19
|
|
3516
|
+
vpunpcklqdq zmm4,zmm20,zmm22
|
|
3517
|
+
vpunpckhqdq zmm5,zmm20,zmm22
|
|
3518
|
+
vpunpcklqdq zmm6,zmm21,zmm23
|
|
3519
|
+
vpunpckhqdq zmm7,zmm21,zmm23
|
|
3520
|
+
vpunpcklqdq zmm8,zmm24,zmm26
|
|
3521
|
+
vpunpckhqdq zmm9,zmm24,zmm26
|
|
3522
|
+
vpunpcklqdq zmm10,zmm25,zmm27
|
|
3523
|
+
vpunpckhqdq zmm11,zmm25,zmm27
|
|
3524
|
+
vpunpcklqdq zmm12,zmm28,zmm30
|
|
3525
|
+
vpunpckhqdq zmm13,zmm28,zmm30
|
|
3526
|
+
vpunpcklqdq zmm14,zmm29,zmm31
|
|
3527
|
+
vpunpckhqdq zmm15,zmm29,zmm31
|
|
3528
|
+
vshufi32x4 zmm16,zmm0,zmm4,0x88
|
|
3529
|
+
vshufi32x4 zmm17,zmm1,zmm5,0x88
|
|
3530
|
+
vshufi32x4 zmm18,zmm2,zmm6,0x88
|
|
3531
|
+
vshufi32x4 zmm19,zmm3,zmm7,0x88
|
|
3532
|
+
vshufi32x4 zmm20,zmm0,zmm4,0xdd
|
|
3533
|
+
vshufi32x4 zmm21,zmm1,zmm5,0xdd
|
|
3534
|
+
vshufi32x4 zmm22,zmm2,zmm6,0xdd
|
|
3535
|
+
vshufi32x4 zmm23,zmm3,zmm7,0xdd
|
|
3536
|
+
vshufi32x4 zmm24,zmm8,zmm12,0x88
|
|
3537
|
+
vshufi32x4 zmm25,zmm9,zmm13,0x88
|
|
3538
|
+
vshufi32x4 zmm26,zmm10,zmm14,0x88
|
|
3539
|
+
vshufi32x4 zmm27,zmm11,zmm15,0x88
|
|
3540
|
+
vshufi32x4 zmm28,zmm8,zmm12,0xdd
|
|
3541
|
+
vshufi32x4 zmm29,zmm9,zmm13,0xdd
|
|
3542
|
+
vshufi32x4 zmm30,zmm10,zmm14,0xdd
|
|
3543
|
+
vshufi32x4 zmm31,zmm11,zmm15,0xdd
|
|
3544
|
+
vshufi32x4 zmm0,zmm16,zmm24,0x88
|
|
3545
|
+
vshufi32x4 zmm1,zmm17,zmm25,0x88
|
|
3546
|
+
vshufi32x4 zmm2,zmm18,zmm26,0x88
|
|
3547
|
+
vshufi32x4 zmm3,zmm19,zmm27,0x88
|
|
3548
|
+
vshufi32x4 zmm4,zmm20,zmm28,0x88
|
|
3549
|
+
vshufi32x4 zmm5,zmm21,zmm29,0x88
|
|
3550
|
+
vshufi32x4 zmm6,zmm22,zmm30,0x88
|
|
3551
|
+
vshufi32x4 zmm7,zmm23,zmm31,0x88
|
|
3552
|
+
vshufi32x4 zmm8,zmm16,zmm24,0xdd
|
|
3553
|
+
vshufi32x4 zmm9,zmm17,zmm25,0xdd
|
|
3554
|
+
vshufi32x4 zmm10,zmm18,zmm26,0xdd
|
|
3555
|
+
vshufi32x4 zmm11,zmm19,zmm27,0xdd
|
|
3556
|
+
vshufi32x4 zmm12,zmm20,zmm28,0xdd
|
|
3557
|
+
vshufi32x4 zmm13,zmm21,zmm29,0xdd
|
|
3558
|
+
vshufi32x4 zmm14,zmm22,zmm30,0xdd
|
|
3559
|
+
vshufi32x4 zmm15,zmm23,zmm31,0xdd
|
|
3560
|
+
vmovdqu32 ZMMWORD PTR [r9],zmm0
|
|
3561
|
+
vmovdqu32 ZMMWORD PTR [r9+0x40],zmm1
|
|
3562
|
+
vmovdqu32 ZMMWORD PTR [r9+0x80],zmm2
|
|
3563
|
+
vmovdqu32 ZMMWORD PTR [r9+0xc0],zmm3
|
|
3564
|
+
vmovdqu32 ZMMWORD PTR [r9+0x100],zmm4
|
|
3565
|
+
vmovdqu32 ZMMWORD PTR [r9+0x140],zmm5
|
|
3566
|
+
vmovdqu32 ZMMWORD PTR [r9+0x180],zmm6
|
|
3567
|
+
vmovdqu32 ZMMWORD PTR [r9+0x1c0],zmm7
|
|
3568
|
+
vmovdqu32 ZMMWORD PTR [r9+0x200],zmm8
|
|
3569
|
+
vmovdqu32 ZMMWORD PTR [r9+0x240],zmm9
|
|
3570
|
+
vmovdqu32 ZMMWORD PTR [r9+0x280],zmm10
|
|
3571
|
+
vmovdqu32 ZMMWORD PTR [r9+0x2c0],zmm11
|
|
3572
|
+
vmovdqu32 ZMMWORD PTR [r9+0x300],zmm12
|
|
3573
|
+
vmovdqu32 ZMMWORD PTR [r9+0x340],zmm13
|
|
3574
|
+
vmovdqu32 ZMMWORD PTR [r9+0x380],zmm14
|
|
3575
|
+
vmovdqu32 ZMMWORD PTR [r9+0x3c0],zmm15
|
|
3576
|
+
vmovdqa32 zmm0,ZMMWORD PTR [rsp]
|
|
3577
|
+
vmovdqa32 zmm1,ZMMWORD PTR [rsp+0x40]
|
|
3578
|
+
vpaddd zmm2,zmm0,DWORD PTR [ADD16+rip]{1to16}
|
|
3579
|
+
vpcmpltud k1,zmm2,zmm0
|
|
3580
|
+
vpaddd zmm1{k1},zmm1,DWORD PTR [ADD1+rip]{1to16}
|
|
3581
|
+
vmovdqa32 ZMMWORD PTR [rsp],zmm2
|
|
3582
|
+
vmovdqa32 ZMMWORD PTR [rsp+0x40],zmm1
|
|
3583
|
+
add r9,0x400
|
|
3584
|
+
sub r10,0x10
|
|
3585
|
+
cmp r10,0x10
|
|
3586
|
+
jae 3b
|
|
3587
|
+
test r10,r10
|
|
3588
|
+
jne 2f
|
|
3589
|
+
9:
|
|
3590
|
+
vzeroupper
|
|
3591
|
+
mov rsp,rbp
|
|
3592
|
+
pop rbp
|
|
3593
|
+
ret
|
|
3594
|
+
2:
|
|
3595
|
+
test r10,0x8
|
|
3596
|
+
je 2f
|
|
3597
|
+
vpbroadcastd ymm16,DWORD PTR [rsi]
|
|
3598
|
+
vpbroadcastd ymm17,DWORD PTR [rsi+0x4]
|
|
3599
|
+
vpbroadcastd ymm18,DWORD PTR [rsi+0x8]
|
|
3600
|
+
vpbroadcastd ymm19,DWORD PTR [rsi+0xc]
|
|
3601
|
+
vpbroadcastd ymm20,DWORD PTR [rsi+0x10]
|
|
3602
|
+
vpbroadcastd ymm21,DWORD PTR [rsi+0x14]
|
|
3603
|
+
vpbroadcastd ymm22,DWORD PTR [rsi+0x18]
|
|
3604
|
+
vpbroadcastd ymm23,DWORD PTR [rsi+0x1c]
|
|
3605
|
+
vpbroadcastd ymm24,DWORD PTR [rsi+0x20]
|
|
3606
|
+
vpbroadcastd ymm25,DWORD PTR [rsi+0x24]
|
|
3607
|
+
vpbroadcastd ymm26,DWORD PTR [rsi+0x28]
|
|
3608
|
+
vpbroadcastd ymm27,DWORD PTR [rsi+0x2c]
|
|
3609
|
+
vpbroadcastd ymm28,DWORD PTR [rsi+0x30]
|
|
3610
|
+
vpbroadcastd ymm29,DWORD PTR [rsi+0x34]
|
|
3611
|
+
vpbroadcastd ymm30,DWORD PTR [rsi+0x38]
|
|
3612
|
+
vpbroadcastd ymm31,DWORD PTR [rsi+0x3c]
|
|
3613
|
+
vpbroadcastd ymm0,DWORD PTR [rdi]
|
|
3614
|
+
vpbroadcastd ymm1,DWORD PTR [rdi+0x4]
|
|
3615
|
+
vpbroadcastd ymm2,DWORD PTR [rdi+0x8]
|
|
3616
|
+
vpbroadcastd ymm3,DWORD PTR [rdi+0xc]
|
|
3617
|
+
vpbroadcastd ymm4,DWORD PTR [rdi+0x10]
|
|
3618
|
+
vpbroadcastd ymm5,DWORD PTR [rdi+0x14]
|
|
3619
|
+
vpbroadcastd ymm6,DWORD PTR [rdi+0x18]
|
|
3620
|
+
vpbroadcastd ymm7,DWORD PTR [rdi+0x1c]
|
|
3621
|
+
vpbroadcastd ymm8,DWORD PTR [BLAKE3_IV_0+rip]
|
|
3622
|
+
vpbroadcastd ymm9,DWORD PTR [BLAKE3_IV_1+rip]
|
|
3623
|
+
vpbroadcastd ymm10,DWORD PTR [BLAKE3_IV_2+rip]
|
|
3624
|
+
vpbroadcastd ymm11,DWORD PTR [BLAKE3_IV_3+rip]
|
|
3625
|
+
vmovdqa ymm12,YMMWORD PTR [rsp]
|
|
3626
|
+
vmovdqa ymm13,YMMWORD PTR [rsp+0x40]
|
|
3627
|
+
vpbroadcastd ymm14,edx
|
|
3628
|
+
vpbroadcastd ymm15,r8d
|
|
3629
|
+
vpaddd ymm0,ymm0,ymm16
|
|
3630
|
+
vpaddd ymm1,ymm1,ymm18
|
|
3631
|
+
vpaddd ymm2,ymm2,ymm20
|
|
3632
|
+
vpaddd ymm3,ymm3,ymm22
|
|
3633
|
+
vpaddd ymm0,ymm0,ymm4
|
|
3634
|
+
vpaddd ymm1,ymm1,ymm5
|
|
3635
|
+
vpaddd ymm2,ymm2,ymm6
|
|
3636
|
+
vpaddd ymm3,ymm3,ymm7
|
|
3637
|
+
vpxord ymm12,ymm12,ymm0
|
|
3638
|
+
vpxord ymm13,ymm13,ymm1
|
|
3639
|
+
vpxord ymm14,ymm14,ymm2
|
|
3640
|
+
vpxord ymm15,ymm15,ymm3
|
|
3641
|
+
vprord ymm12,ymm12,0x10
|
|
3642
|
+
vprord ymm13,ymm13,0x10
|
|
3643
|
+
vprord ymm14,ymm14,0x10
|
|
3644
|
+
vprord ymm15,ymm15,0x10
|
|
3645
|
+
vpaddd ymm8,ymm8,ymm12
|
|
3646
|
+
vpaddd ymm9,ymm9,ymm13
|
|
3647
|
+
vpaddd ymm10,ymm10,ymm14
|
|
3648
|
+
vpaddd ymm11,ymm11,ymm15
|
|
3649
|
+
vpxord ymm4,ymm4,ymm8
|
|
3650
|
+
vpxord ymm5,ymm5,ymm9
|
|
3651
|
+
vpxord ymm6,ymm6,ymm10
|
|
3652
|
+
vpxord ymm7,ymm7,ymm11
|
|
3653
|
+
vprord ymm4,ymm4,0xc
|
|
3654
|
+
vprord ymm5,ymm5,0xc
|
|
3655
|
+
vprord ymm6,ymm6,0xc
|
|
3656
|
+
vprord ymm7,ymm7,0xc
|
|
3657
|
+
vpaddd ymm0,ymm0,ymm17
|
|
3658
|
+
vpaddd ymm1,ymm1,ymm19
|
|
3659
|
+
vpaddd ymm2,ymm2,ymm21
|
|
3660
|
+
vpaddd ymm3,ymm3,ymm23
|
|
3661
|
+
vpaddd ymm0,ymm0,ymm4
|
|
3662
|
+
vpaddd ymm1,ymm1,ymm5
|
|
3663
|
+
vpaddd ymm2,ymm2,ymm6
|
|
3664
|
+
vpaddd ymm3,ymm3,ymm7
|
|
3665
|
+
vpxord ymm12,ymm12,ymm0
|
|
3666
|
+
vpxord ymm13,ymm13,ymm1
|
|
3667
|
+
vpxord ymm14,ymm14,ymm2
|
|
3668
|
+
vpxord ymm15,ymm15,ymm3
|
|
3669
|
+
vprord ymm12,ymm12,0x8
|
|
3670
|
+
vprord ymm13,ymm13,0x8
|
|
3671
|
+
vprord ymm14,ymm14,0x8
|
|
3672
|
+
vprord ymm15,ymm15,0x8
|
|
3673
|
+
vpaddd ymm8,ymm8,ymm12
|
|
3674
|
+
vpaddd ymm9,ymm9,ymm13
|
|
3675
|
+
vpaddd ymm10,ymm10,ymm14
|
|
3676
|
+
vpaddd ymm11,ymm11,ymm15
|
|
3677
|
+
vpxord ymm4,ymm4,ymm8
|
|
3678
|
+
vpxord ymm5,ymm5,ymm9
|
|
3679
|
+
vpxord ymm6,ymm6,ymm10
|
|
3680
|
+
vpxord ymm7,ymm7,ymm11
|
|
3681
|
+
vprord ymm4,ymm4,0x7
|
|
3682
|
+
vprord ymm5,ymm5,0x7
|
|
3683
|
+
vprord ymm6,ymm6,0x7
|
|
3684
|
+
vprord ymm7,ymm7,0x7
|
|
3685
|
+
vpaddd ymm0,ymm0,ymm24
|
|
3686
|
+
vpaddd ymm1,ymm1,ymm26
|
|
3687
|
+
vpaddd ymm2,ymm2,ymm28
|
|
3688
|
+
vpaddd ymm3,ymm3,ymm30
|
|
3689
|
+
vpaddd ymm0,ymm0,ymm5
|
|
3690
|
+
vpaddd ymm1,ymm1,ymm6
|
|
3691
|
+
vpaddd ymm2,ymm2,ymm7
|
|
3692
|
+
vpaddd ymm3,ymm3,ymm4
|
|
3693
|
+
vpxord ymm15,ymm15,ymm0
|
|
3694
|
+
vpxord ymm12,ymm12,ymm1
|
|
3695
|
+
vpxord ymm13,ymm13,ymm2
|
|
3696
|
+
vpxord ymm14,ymm14,ymm3
|
|
3697
|
+
vprord ymm15,ymm15,0x10
|
|
3698
|
+
vprord ymm12,ymm12,0x10
|
|
3699
|
+
vprord ymm13,ymm13,0x10
|
|
3700
|
+
vprord ymm14,ymm14,0x10
|
|
3701
|
+
vpaddd ymm10,ymm10,ymm15
|
|
3702
|
+
vpaddd ymm11,ymm11,ymm12
|
|
3703
|
+
vpaddd ymm8,ymm8,ymm13
|
|
3704
|
+
vpaddd ymm9,ymm9,ymm14
|
|
3705
|
+
vpxord ymm5,ymm5,ymm10
|
|
3706
|
+
vpxord ymm6,ymm6,ymm11
|
|
3707
|
+
vpxord ymm7,ymm7,ymm8
|
|
3708
|
+
vpxord ymm4,ymm4,ymm9
|
|
3709
|
+
vprord ymm5,ymm5,0xc
|
|
3710
|
+
vprord ymm6,ymm6,0xc
|
|
3711
|
+
vprord ymm7,ymm7,0xc
|
|
3712
|
+
vprord ymm4,ymm4,0xc
|
|
3713
|
+
vpaddd ymm0,ymm0,ymm25
|
|
3714
|
+
vpaddd ymm1,ymm1,ymm27
|
|
3715
|
+
vpaddd ymm2,ymm2,ymm29
|
|
3716
|
+
vpaddd ymm3,ymm3,ymm31
|
|
3717
|
+
vpaddd ymm0,ymm0,ymm5
|
|
3718
|
+
vpaddd ymm1,ymm1,ymm6
|
|
3719
|
+
vpaddd ymm2,ymm2,ymm7
|
|
3720
|
+
vpaddd ymm3,ymm3,ymm4
|
|
3721
|
+
vpxord ymm15,ymm15,ymm0
|
|
3722
|
+
vpxord ymm12,ymm12,ymm1
|
|
3723
|
+
vpxord ymm13,ymm13,ymm2
|
|
3724
|
+
vpxord ymm14,ymm14,ymm3
|
|
3725
|
+
vprord ymm15,ymm15,0x8
|
|
3726
|
+
vprord ymm12,ymm12,0x8
|
|
3727
|
+
vprord ymm13,ymm13,0x8
|
|
3728
|
+
vprord ymm14,ymm14,0x8
|
|
3729
|
+
vpaddd ymm10,ymm10,ymm15
|
|
3730
|
+
vpaddd ymm11,ymm11,ymm12
|
|
3731
|
+
vpaddd ymm8,ymm8,ymm13
|
|
3732
|
+
vpaddd ymm9,ymm9,ymm14
|
|
3733
|
+
vpxord ymm5,ymm5,ymm10
|
|
3734
|
+
vpxord ymm6,ymm6,ymm11
|
|
3735
|
+
vpxord ymm7,ymm7,ymm8
|
|
3736
|
+
vpxord ymm4,ymm4,ymm9
|
|
3737
|
+
vprord ymm5,ymm5,0x7
|
|
3738
|
+
vprord ymm6,ymm6,0x7
|
|
3739
|
+
vprord ymm7,ymm7,0x7
|
|
3740
|
+
vprord ymm4,ymm4,0x7
|
|
3741
|
+
vpaddd ymm0,ymm0,ymm18
|
|
3742
|
+
vpaddd ymm1,ymm1,ymm19
|
|
3743
|
+
vpaddd ymm2,ymm2,ymm23
|
|
3744
|
+
vpaddd ymm3,ymm3,ymm20
|
|
3745
|
+
vpaddd ymm0,ymm0,ymm4
|
|
3746
|
+
vpaddd ymm1,ymm1,ymm5
|
|
3747
|
+
vpaddd ymm2,ymm2,ymm6
|
|
3748
|
+
vpaddd ymm3,ymm3,ymm7
|
|
3749
|
+
vpxord ymm12,ymm12,ymm0
|
|
3750
|
+
vpxord ymm13,ymm13,ymm1
|
|
3751
|
+
vpxord ymm14,ymm14,ymm2
|
|
3752
|
+
vpxord ymm15,ymm15,ymm3
|
|
3753
|
+
vprord ymm12,ymm12,0x10
|
|
3754
|
+
vprord ymm13,ymm13,0x10
|
|
3755
|
+
vprord ymm14,ymm14,0x10
|
|
3756
|
+
vprord ymm15,ymm15,0x10
|
|
3757
|
+
vpaddd ymm8,ymm8,ymm12
|
|
3758
|
+
vpaddd ymm9,ymm9,ymm13
|
|
3759
|
+
vpaddd ymm10,ymm10,ymm14
|
|
3760
|
+
vpaddd ymm11,ymm11,ymm15
|
|
3761
|
+
vpxord ymm4,ymm4,ymm8
|
|
3762
|
+
vpxord ymm5,ymm5,ymm9
|
|
3763
|
+
vpxord ymm6,ymm6,ymm10
|
|
3764
|
+
vpxord ymm7,ymm7,ymm11
|
|
3765
|
+
vprord ymm4,ymm4,0xc
|
|
3766
|
+
vprord ymm5,ymm5,0xc
|
|
3767
|
+
vprord ymm6,ymm6,0xc
|
|
3768
|
+
vprord ymm7,ymm7,0xc
|
|
3769
|
+
vpaddd ymm0,ymm0,ymm22
|
|
3770
|
+
vpaddd ymm1,ymm1,ymm26
|
|
3771
|
+
vpaddd ymm2,ymm2,ymm16
|
|
3772
|
+
vpaddd ymm3,ymm3,ymm29
|
|
3773
|
+
vpaddd ymm0,ymm0,ymm4
|
|
3774
|
+
vpaddd ymm1,ymm1,ymm5
|
|
3775
|
+
vpaddd ymm2,ymm2,ymm6
|
|
3776
|
+
vpaddd ymm3,ymm3,ymm7
|
|
3777
|
+
vpxord ymm12,ymm12,ymm0
|
|
3778
|
+
vpxord ymm13,ymm13,ymm1
|
|
3779
|
+
vpxord ymm14,ymm14,ymm2
|
|
3780
|
+
vpxord ymm15,ymm15,ymm3
|
|
3781
|
+
vprord ymm12,ymm12,0x8
|
|
3782
|
+
vprord ymm13,ymm13,0x8
|
|
3783
|
+
vprord ymm14,ymm14,0x8
|
|
3784
|
+
vprord ymm15,ymm15,0x8
|
|
3785
|
+
vpaddd ymm8,ymm8,ymm12
|
|
3786
|
+
vpaddd ymm9,ymm9,ymm13
|
|
3787
|
+
vpaddd ymm10,ymm10,ymm14
|
|
3788
|
+
vpaddd ymm11,ymm11,ymm15
|
|
3789
|
+
vpxord ymm4,ymm4,ymm8
|
|
3790
|
+
vpxord ymm5,ymm5,ymm9
|
|
3791
|
+
vpxord ymm6,ymm6,ymm10
|
|
3792
|
+
vpxord ymm7,ymm7,ymm11
|
|
3793
|
+
vprord ymm4,ymm4,0x7
|
|
3794
|
+
vprord ymm5,ymm5,0x7
|
|
3795
|
+
vprord ymm6,ymm6,0x7
|
|
3796
|
+
vprord ymm7,ymm7,0x7
|
|
3797
|
+
vpaddd ymm0,ymm0,ymm17
|
|
3798
|
+
vpaddd ymm1,ymm1,ymm28
|
|
3799
|
+
vpaddd ymm2,ymm2,ymm25
|
|
3800
|
+
vpaddd ymm3,ymm3,ymm31
|
|
3801
|
+
vpaddd ymm0,ymm0,ymm5
|
|
3802
|
+
vpaddd ymm1,ymm1,ymm6
|
|
3803
|
+
vpaddd ymm2,ymm2,ymm7
|
|
3804
|
+
vpaddd ymm3,ymm3,ymm4
|
|
3805
|
+
vpxord ymm15,ymm15,ymm0
|
|
3806
|
+
vpxord ymm12,ymm12,ymm1
|
|
3807
|
+
vpxord ymm13,ymm13,ymm2
|
|
3808
|
+
vpxord ymm14,ymm14,ymm3
|
|
3809
|
+
vprord ymm15,ymm15,0x10
|
|
3810
|
+
vprord ymm12,ymm12,0x10
|
|
3811
|
+
vprord ymm13,ymm13,0x10
|
|
3812
|
+
vprord ymm14,ymm14,0x10
|
|
3813
|
+
vpaddd ymm10,ymm10,ymm15
|
|
3814
|
+
vpaddd ymm11,ymm11,ymm12
|
|
3815
|
+
vpaddd ymm8,ymm8,ymm13
|
|
3816
|
+
vpaddd ymm9,ymm9,ymm14
|
|
3817
|
+
vpxord ymm5,ymm5,ymm10
|
|
3818
|
+
vpxord ymm6,ymm6,ymm11
|
|
3819
|
+
vpxord ymm7,ymm7,ymm8
|
|
3820
|
+
vpxord ymm4,ymm4,ymm9
|
|
3821
|
+
vprord ymm5,ymm5,0xc
|
|
3822
|
+
vprord ymm6,ymm6,0xc
|
|
3823
|
+
vprord ymm7,ymm7,0xc
|
|
3824
|
+
vprord ymm4,ymm4,0xc
|
|
3825
|
+
vpaddd ymm0,ymm0,ymm27
|
|
3826
|
+
vpaddd ymm1,ymm1,ymm21
|
|
3827
|
+
vpaddd ymm2,ymm2,ymm30
|
|
3828
|
+
vpaddd ymm3,ymm3,ymm24
|
|
3829
|
+
vpaddd ymm0,ymm0,ymm5
|
|
3830
|
+
vpaddd ymm1,ymm1,ymm6
|
|
3831
|
+
vpaddd ymm2,ymm2,ymm7
|
|
3832
|
+
vpaddd ymm3,ymm3,ymm4
|
|
3833
|
+
vpxord ymm15,ymm15,ymm0
|
|
3834
|
+
vpxord ymm12,ymm12,ymm1
|
|
3835
|
+
vpxord ymm13,ymm13,ymm2
|
|
3836
|
+
vpxord ymm14,ymm14,ymm3
|
|
3837
|
+
vprord ymm15,ymm15,0x8
|
|
3838
|
+
vprord ymm12,ymm12,0x8
|
|
3839
|
+
vprord ymm13,ymm13,0x8
|
|
3840
|
+
vprord ymm14,ymm14,0x8
|
|
3841
|
+
vpaddd ymm10,ymm10,ymm15
|
|
3842
|
+
vpaddd ymm11,ymm11,ymm12
|
|
3843
|
+
vpaddd ymm8,ymm8,ymm13
|
|
3844
|
+
vpaddd ymm9,ymm9,ymm14
|
|
3845
|
+
vpxord ymm5,ymm5,ymm10
|
|
3846
|
+
vpxord ymm6,ymm6,ymm11
|
|
3847
|
+
vpxord ymm7,ymm7,ymm8
|
|
3848
|
+
vpxord ymm4,ymm4,ymm9
|
|
3849
|
+
vprord ymm5,ymm5,0x7
|
|
3850
|
+
vprord ymm6,ymm6,0x7
|
|
3851
|
+
vprord ymm7,ymm7,0x7
|
|
3852
|
+
vprord ymm4,ymm4,0x7
|
|
3853
|
+
vpaddd ymm0,ymm0,ymm19
|
|
3854
|
+
vpaddd ymm1,ymm1,ymm26
|
|
3855
|
+
vpaddd ymm2,ymm2,ymm29
|
|
3856
|
+
vpaddd ymm3,ymm3,ymm23
|
|
3857
|
+
vpaddd ymm0,ymm0,ymm4
|
|
3858
|
+
vpaddd ymm1,ymm1,ymm5
|
|
3859
|
+
vpaddd ymm2,ymm2,ymm6
|
|
3860
|
+
vpaddd ymm3,ymm3,ymm7
|
|
3861
|
+
vpxord ymm12,ymm12,ymm0
|
|
3862
|
+
vpxord ymm13,ymm13,ymm1
|
|
3863
|
+
vpxord ymm14,ymm14,ymm2
|
|
3864
|
+
vpxord ymm15,ymm15,ymm3
|
|
3865
|
+
vprord ymm12,ymm12,0x10
|
|
3866
|
+
vprord ymm13,ymm13,0x10
|
|
3867
|
+
vprord ymm14,ymm14,0x10
|
|
3868
|
+
vprord ymm15,ymm15,0x10
|
|
3869
|
+
vpaddd ymm8,ymm8,ymm12
|
|
3870
|
+
vpaddd ymm9,ymm9,ymm13
|
|
3871
|
+
vpaddd ymm10,ymm10,ymm14
|
|
3872
|
+
vpaddd ymm11,ymm11,ymm15
|
|
3873
|
+
vpxord ymm4,ymm4,ymm8
|
|
3874
|
+
vpxord ymm5,ymm5,ymm9
|
|
3875
|
+
vpxord ymm6,ymm6,ymm10
|
|
3876
|
+
vpxord ymm7,ymm7,ymm11
|
|
3877
|
+
vprord ymm4,ymm4,0xc
|
|
3878
|
+
vprord ymm5,ymm5,0xc
|
|
3879
|
+
vprord ymm6,ymm6,0xc
|
|
3880
|
+
vprord ymm7,ymm7,0xc
|
|
3881
|
+
vpaddd ymm0,ymm0,ymm20
|
|
3882
|
+
vpaddd ymm1,ymm1,ymm28
|
|
3883
|
+
vpaddd ymm2,ymm2,ymm18
|
|
3884
|
+
vpaddd ymm3,ymm3,ymm30
|
|
3885
|
+
vpaddd ymm0,ymm0,ymm4
|
|
3886
|
+
vpaddd ymm1,ymm1,ymm5
|
|
3887
|
+
vpaddd ymm2,ymm2,ymm6
|
|
3888
|
+
vpaddd ymm3,ymm3,ymm7
|
|
3889
|
+
vpxord ymm12,ymm12,ymm0
|
|
3890
|
+
vpxord ymm13,ymm13,ymm1
|
|
3891
|
+
vpxord ymm14,ymm14,ymm2
|
|
3892
|
+
vpxord ymm15,ymm15,ymm3
|
|
3893
|
+
vprord ymm12,ymm12,0x8
|
|
3894
|
+
vprord ymm13,ymm13,0x8
|
|
3895
|
+
vprord ymm14,ymm14,0x8
|
|
3896
|
+
vprord ymm15,ymm15,0x8
|
|
3897
|
+
vpaddd ymm8,ymm8,ymm12
|
|
3898
|
+
vpaddd ymm9,ymm9,ymm13
|
|
3899
|
+
vpaddd ymm10,ymm10,ymm14
|
|
3900
|
+
vpaddd ymm11,ymm11,ymm15
|
|
3901
|
+
vpxord ymm4,ymm4,ymm8
|
|
3902
|
+
vpxord ymm5,ymm5,ymm9
|
|
3903
|
+
vpxord ymm6,ymm6,ymm10
|
|
3904
|
+
vpxord ymm7,ymm7,ymm11
|
|
3905
|
+
vprord ymm4,ymm4,0x7
|
|
3906
|
+
vprord ymm5,ymm5,0x7
|
|
3907
|
+
vprord ymm6,ymm6,0x7
|
|
3908
|
+
vprord ymm7,ymm7,0x7
|
|
3909
|
+
vpaddd ymm0,ymm0,ymm22
|
|
3910
|
+
vpaddd ymm1,ymm1,ymm25
|
|
3911
|
+
vpaddd ymm2,ymm2,ymm27
|
|
3912
|
+
vpaddd ymm3,ymm3,ymm24
|
|
3913
|
+
vpaddd ymm0,ymm0,ymm5
|
|
3914
|
+
vpaddd ymm1,ymm1,ymm6
|
|
3915
|
+
vpaddd ymm2,ymm2,ymm7
|
|
3916
|
+
vpaddd ymm3,ymm3,ymm4
|
|
3917
|
+
vpxord ymm15,ymm15,ymm0
|
|
3918
|
+
vpxord ymm12,ymm12,ymm1
|
|
3919
|
+
vpxord ymm13,ymm13,ymm2
|
|
3920
|
+
vpxord ymm14,ymm14,ymm3
|
|
3921
|
+
vprord ymm15,ymm15,0x10
|
|
3922
|
+
vprord ymm12,ymm12,0x10
|
|
3923
|
+
vprord ymm13,ymm13,0x10
|
|
3924
|
+
vprord ymm14,ymm14,0x10
|
|
3925
|
+
vpaddd ymm10,ymm10,ymm15
|
|
3926
|
+
vpaddd ymm11,ymm11,ymm12
|
|
3927
|
+
vpaddd ymm8,ymm8,ymm13
|
|
3928
|
+
vpaddd ymm9,ymm9,ymm14
|
|
3929
|
+
vpxord ymm5,ymm5,ymm10
|
|
3930
|
+
vpxord ymm6,ymm6,ymm11
|
|
3931
|
+
vpxord ymm7,ymm7,ymm8
|
|
3932
|
+
vpxord ymm4,ymm4,ymm9
|
|
3933
|
+
vprord ymm5,ymm5,0xc
|
|
3934
|
+
vprord ymm6,ymm6,0xc
|
|
3935
|
+
vprord ymm7,ymm7,0xc
|
|
3936
|
+
vprord ymm4,ymm4,0xc
|
|
3937
|
+
vpaddd ymm0,ymm0,ymm21
|
|
3938
|
+
vpaddd ymm1,ymm1,ymm16
|
|
3939
|
+
vpaddd ymm2,ymm2,ymm31
|
|
3940
|
+
vpaddd ymm3,ymm3,ymm17
|
|
3941
|
+
vpaddd ymm0,ymm0,ymm5
|
|
3942
|
+
vpaddd ymm1,ymm1,ymm6
|
|
3943
|
+
vpaddd ymm2,ymm2,ymm7
|
|
3944
|
+
vpaddd ymm3,ymm3,ymm4
|
|
3945
|
+
vpxord ymm15,ymm15,ymm0
|
|
3946
|
+
vpxord ymm12,ymm12,ymm1
|
|
3947
|
+
vpxord ymm13,ymm13,ymm2
|
|
3948
|
+
vpxord ymm14,ymm14,ymm3
|
|
3949
|
+
vprord ymm15,ymm15,0x8
|
|
3950
|
+
vprord ymm12,ymm12,0x8
|
|
3951
|
+
vprord ymm13,ymm13,0x8
|
|
3952
|
+
vprord ymm14,ymm14,0x8
|
|
3953
|
+
vpaddd ymm10,ymm10,ymm15
|
|
3954
|
+
vpaddd ymm11,ymm11,ymm12
|
|
3955
|
+
vpaddd ymm8,ymm8,ymm13
|
|
3956
|
+
vpaddd ymm9,ymm9,ymm14
|
|
3957
|
+
vpxord ymm5,ymm5,ymm10
|
|
3958
|
+
vpxord ymm6,ymm6,ymm11
|
|
3959
|
+
vpxord ymm7,ymm7,ymm8
|
|
3960
|
+
vpxord ymm4,ymm4,ymm9
|
|
3961
|
+
vprord ymm5,ymm5,0x7
|
|
3962
|
+
vprord ymm6,ymm6,0x7
|
|
3963
|
+
vprord ymm7,ymm7,0x7
|
|
3964
|
+
vprord ymm4,ymm4,0x7
|
|
3965
|
+
vpaddd ymm0,ymm0,ymm26
|
|
3966
|
+
vpaddd ymm1,ymm1,ymm28
|
|
3967
|
+
vpaddd ymm2,ymm2,ymm30
|
|
3968
|
+
vpaddd ymm3,ymm3,ymm29
|
|
3969
|
+
vpaddd ymm0,ymm0,ymm4
|
|
3970
|
+
vpaddd ymm1,ymm1,ymm5
|
|
3971
|
+
vpaddd ymm2,ymm2,ymm6
|
|
3972
|
+
vpaddd ymm3,ymm3,ymm7
|
|
3973
|
+
vpxord ymm12,ymm12,ymm0
|
|
3974
|
+
vpxord ymm13,ymm13,ymm1
|
|
3975
|
+
vpxord ymm14,ymm14,ymm2
|
|
3976
|
+
vpxord ymm15,ymm15,ymm3
|
|
3977
|
+
vprord ymm12,ymm12,0x10
|
|
3978
|
+
vprord ymm13,ymm13,0x10
|
|
3979
|
+
vprord ymm14,ymm14,0x10
|
|
3980
|
+
vprord ymm15,ymm15,0x10
|
|
3981
|
+
vpaddd ymm8,ymm8,ymm12
|
|
3982
|
+
vpaddd ymm9,ymm9,ymm13
|
|
3983
|
+
vpaddd ymm10,ymm10,ymm14
|
|
3984
|
+
vpaddd ymm11,ymm11,ymm15
|
|
3985
|
+
vpxord ymm4,ymm4,ymm8
|
|
3986
|
+
vpxord ymm5,ymm5,ymm9
|
|
3987
|
+
vpxord ymm6,ymm6,ymm10
|
|
3988
|
+
vpxord ymm7,ymm7,ymm11
|
|
3989
|
+
vprord ymm4,ymm4,0xc
|
|
3990
|
+
vprord ymm5,ymm5,0xc
|
|
3991
|
+
vprord ymm6,ymm6,0xc
|
|
3992
|
+
vprord ymm7,ymm7,0xc
|
|
3993
|
+
vpaddd ymm0,ymm0,ymm23
|
|
3994
|
+
vpaddd ymm1,ymm1,ymm25
|
|
3995
|
+
vpaddd ymm2,ymm2,ymm19
|
|
3996
|
+
vpaddd ymm3,ymm3,ymm31
|
|
3997
|
+
vpaddd ymm0,ymm0,ymm4
|
|
3998
|
+
vpaddd ymm1,ymm1,ymm5
|
|
3999
|
+
vpaddd ymm2,ymm2,ymm6
|
|
4000
|
+
vpaddd ymm3,ymm3,ymm7
|
|
4001
|
+
vpxord ymm12,ymm12,ymm0
|
|
4002
|
+
vpxord ymm13,ymm13,ymm1
|
|
4003
|
+
vpxord ymm14,ymm14,ymm2
|
|
4004
|
+
vpxord ymm15,ymm15,ymm3
|
|
4005
|
+
vprord ymm12,ymm12,0x8
|
|
4006
|
+
vprord ymm13,ymm13,0x8
|
|
4007
|
+
vprord ymm14,ymm14,0x8
|
|
4008
|
+
vprord ymm15,ymm15,0x8
|
|
4009
|
+
vpaddd ymm8,ymm8,ymm12
|
|
4010
|
+
vpaddd ymm9,ymm9,ymm13
|
|
4011
|
+
vpaddd ymm10,ymm10,ymm14
|
|
4012
|
+
vpaddd ymm11,ymm11,ymm15
|
|
4013
|
+
vpxord ymm4,ymm4,ymm8
|
|
4014
|
+
vpxord ymm5,ymm5,ymm9
|
|
4015
|
+
vpxord ymm6,ymm6,ymm10
|
|
4016
|
+
vpxord ymm7,ymm7,ymm11
|
|
4017
|
+
vprord ymm4,ymm4,0x7
|
|
4018
|
+
vprord ymm5,ymm5,0x7
|
|
4019
|
+
vprord ymm6,ymm6,0x7
|
|
4020
|
+
vprord ymm7,ymm7,0x7
|
|
4021
|
+
vpaddd ymm0,ymm0,ymm20
|
|
4022
|
+
vpaddd ymm1,ymm1,ymm27
|
|
4023
|
+
vpaddd ymm2,ymm2,ymm21
|
|
4024
|
+
vpaddd ymm3,ymm3,ymm17
|
|
4025
|
+
vpaddd ymm0,ymm0,ymm5
|
|
4026
|
+
vpaddd ymm1,ymm1,ymm6
|
|
4027
|
+
vpaddd ymm2,ymm2,ymm7
|
|
4028
|
+
vpaddd ymm3,ymm3,ymm4
|
|
4029
|
+
vpxord ymm15,ymm15,ymm0
|
|
4030
|
+
vpxord ymm12,ymm12,ymm1
|
|
4031
|
+
vpxord ymm13,ymm13,ymm2
|
|
4032
|
+
vpxord ymm14,ymm14,ymm3
|
|
4033
|
+
vprord ymm15,ymm15,0x10
|
|
4034
|
+
vprord ymm12,ymm12,0x10
|
|
4035
|
+
vprord ymm13,ymm13,0x10
|
|
4036
|
+
vprord ymm14,ymm14,0x10
|
|
4037
|
+
vpaddd ymm10,ymm10,ymm15
|
|
4038
|
+
vpaddd ymm11,ymm11,ymm12
|
|
4039
|
+
vpaddd ymm8,ymm8,ymm13
|
|
4040
|
+
vpaddd ymm9,ymm9,ymm14
|
|
4041
|
+
vpxord ymm5,ymm5,ymm10
|
|
4042
|
+
vpxord ymm6,ymm6,ymm11
|
|
4043
|
+
vpxord ymm7,ymm7,ymm8
|
|
4044
|
+
vpxord ymm4,ymm4,ymm9
|
|
4045
|
+
vprord ymm5,ymm5,0xc
|
|
4046
|
+
vprord ymm6,ymm6,0xc
|
|
4047
|
+
vprord ymm7,ymm7,0xc
|
|
4048
|
+
vprord ymm4,ymm4,0xc
|
|
4049
|
+
vpaddd ymm0,ymm0,ymm16
|
|
4050
|
+
vpaddd ymm1,ymm1,ymm18
|
|
4051
|
+
vpaddd ymm2,ymm2,ymm24
|
|
4052
|
+
vpaddd ymm3,ymm3,ymm22
|
|
4053
|
+
vpaddd ymm0,ymm0,ymm5
|
|
4054
|
+
vpaddd ymm1,ymm1,ymm6
|
|
4055
|
+
vpaddd ymm2,ymm2,ymm7
|
|
4056
|
+
vpaddd ymm3,ymm3,ymm4
|
|
4057
|
+
vpxord ymm15,ymm15,ymm0
|
|
4058
|
+
vpxord ymm12,ymm12,ymm1
|
|
4059
|
+
vpxord ymm13,ymm13,ymm2
|
|
4060
|
+
vpxord ymm14,ymm14,ymm3
|
|
4061
|
+
vprord ymm15,ymm15,0x8
|
|
4062
|
+
vprord ymm12,ymm12,0x8
|
|
4063
|
+
vprord ymm13,ymm13,0x8
|
|
4064
|
+
vprord ymm14,ymm14,0x8
|
|
4065
|
+
vpaddd ymm10,ymm10,ymm15
|
|
4066
|
+
vpaddd ymm11,ymm11,ymm12
|
|
4067
|
+
vpaddd ymm8,ymm8,ymm13
|
|
4068
|
+
vpaddd ymm9,ymm9,ymm14
|
|
4069
|
+
vpxord ymm5,ymm5,ymm10
|
|
4070
|
+
vpxord ymm6,ymm6,ymm11
|
|
4071
|
+
vpxord ymm7,ymm7,ymm8
|
|
4072
|
+
vpxord ymm4,ymm4,ymm9
|
|
4073
|
+
vprord ymm5,ymm5,0x7
|
|
4074
|
+
vprord ymm6,ymm6,0x7
|
|
4075
|
+
vprord ymm7,ymm7,0x7
|
|
4076
|
+
vprord ymm4,ymm4,0x7
|
|
4077
|
+
vpaddd ymm0,ymm0,ymm28
|
|
4078
|
+
vpaddd ymm1,ymm1,ymm25
|
|
4079
|
+
vpaddd ymm2,ymm2,ymm31
|
|
4080
|
+
vpaddd ymm3,ymm3,ymm30
|
|
4081
|
+
vpaddd ymm0,ymm0,ymm4
|
|
4082
|
+
vpaddd ymm1,ymm1,ymm5
|
|
4083
|
+
vpaddd ymm2,ymm2,ymm6
|
|
4084
|
+
vpaddd ymm3,ymm3,ymm7
|
|
4085
|
+
vpxord ymm12,ymm12,ymm0
|
|
4086
|
+
vpxord ymm13,ymm13,ymm1
|
|
4087
|
+
vpxord ymm14,ymm14,ymm2
|
|
4088
|
+
vpxord ymm15,ymm15,ymm3
|
|
4089
|
+
vprord ymm12,ymm12,0x10
|
|
4090
|
+
vprord ymm13,ymm13,0x10
|
|
4091
|
+
vprord ymm14,ymm14,0x10
|
|
4092
|
+
vprord ymm15,ymm15,0x10
|
|
4093
|
+
vpaddd ymm8,ymm8,ymm12
|
|
4094
|
+
vpaddd ymm9,ymm9,ymm13
|
|
4095
|
+
vpaddd ymm10,ymm10,ymm14
|
|
4096
|
+
vpaddd ymm11,ymm11,ymm15
|
|
4097
|
+
vpxord ymm4,ymm4,ymm8
|
|
4098
|
+
vpxord ymm5,ymm5,ymm9
|
|
4099
|
+
vpxord ymm6,ymm6,ymm10
|
|
4100
|
+
vpxord ymm7,ymm7,ymm11
|
|
4101
|
+
vprord ymm4,ymm4,0xc
|
|
4102
|
+
vprord ymm5,ymm5,0xc
|
|
4103
|
+
vprord ymm6,ymm6,0xc
|
|
4104
|
+
vprord ymm7,ymm7,0xc
|
|
4105
|
+
vpaddd ymm0,ymm0,ymm29
|
|
4106
|
+
vpaddd ymm1,ymm1,ymm27
|
|
4107
|
+
vpaddd ymm2,ymm2,ymm26
|
|
4108
|
+
vpaddd ymm3,ymm3,ymm24
|
|
4109
|
+
vpaddd ymm0,ymm0,ymm4
|
|
4110
|
+
vpaddd ymm1,ymm1,ymm5
|
|
4111
|
+
vpaddd ymm2,ymm2,ymm6
|
|
4112
|
+
vpaddd ymm3,ymm3,ymm7
|
|
4113
|
+
vpxord ymm12,ymm12,ymm0
|
|
4114
|
+
vpxord ymm13,ymm13,ymm1
|
|
4115
|
+
vpxord ymm14,ymm14,ymm2
|
|
4116
|
+
vpxord ymm15,ymm15,ymm3
|
|
4117
|
+
vprord ymm12,ymm12,0x8
|
|
4118
|
+
vprord ymm13,ymm13,0x8
|
|
4119
|
+
vprord ymm14,ymm14,0x8
|
|
4120
|
+
vprord ymm15,ymm15,0x8
|
|
4121
|
+
vpaddd ymm8,ymm8,ymm12
|
|
4122
|
+
vpaddd ymm9,ymm9,ymm13
|
|
4123
|
+
vpaddd ymm10,ymm10,ymm14
|
|
4124
|
+
vpaddd ymm11,ymm11,ymm15
|
|
4125
|
+
vpxord ymm4,ymm4,ymm8
|
|
4126
|
+
vpxord ymm5,ymm5,ymm9
|
|
4127
|
+
vpxord ymm6,ymm6,ymm10
|
|
4128
|
+
vpxord ymm7,ymm7,ymm11
|
|
4129
|
+
vprord ymm4,ymm4,0x7
|
|
4130
|
+
vprord ymm5,ymm5,0x7
|
|
4131
|
+
vprord ymm6,ymm6,0x7
|
|
4132
|
+
vprord ymm7,ymm7,0x7
|
|
4133
|
+
vpaddd ymm0,ymm0,ymm23
|
|
4134
|
+
vpaddd ymm1,ymm1,ymm21
|
|
4135
|
+
vpaddd ymm2,ymm2,ymm16
|
|
4136
|
+
vpaddd ymm3,ymm3,ymm22
|
|
4137
|
+
vpaddd ymm0,ymm0,ymm5
|
|
4138
|
+
vpaddd ymm1,ymm1,ymm6
|
|
4139
|
+
vpaddd ymm2,ymm2,ymm7
|
|
4140
|
+
vpaddd ymm3,ymm3,ymm4
|
|
4141
|
+
vpxord ymm15,ymm15,ymm0
|
|
4142
|
+
vpxord ymm12,ymm12,ymm1
|
|
4143
|
+
vpxord ymm13,ymm13,ymm2
|
|
4144
|
+
vpxord ymm14,ymm14,ymm3
|
|
4145
|
+
vprord ymm15,ymm15,0x10
|
|
4146
|
+
vprord ymm12,ymm12,0x10
|
|
4147
|
+
vprord ymm13,ymm13,0x10
|
|
4148
|
+
vprord ymm14,ymm14,0x10
|
|
4149
|
+
vpaddd ymm10,ymm10,ymm15
|
|
4150
|
+
vpaddd ymm11,ymm11,ymm12
|
|
4151
|
+
vpaddd ymm8,ymm8,ymm13
|
|
4152
|
+
vpaddd ymm9,ymm9,ymm14
|
|
4153
|
+
vpxord ymm5,ymm5,ymm10
|
|
4154
|
+
vpxord ymm6,ymm6,ymm11
|
|
4155
|
+
vpxord ymm7,ymm7,ymm8
|
|
4156
|
+
vpxord ymm4,ymm4,ymm9
|
|
4157
|
+
vprord ymm5,ymm5,0xc
|
|
4158
|
+
vprord ymm6,ymm6,0xc
|
|
4159
|
+
vprord ymm7,ymm7,0xc
|
|
4160
|
+
vprord ymm4,ymm4,0xc
|
|
4161
|
+
vpaddd ymm0,ymm0,ymm18
|
|
4162
|
+
vpaddd ymm1,ymm1,ymm19
|
|
4163
|
+
vpaddd ymm2,ymm2,ymm17
|
|
4164
|
+
vpaddd ymm3,ymm3,ymm20
|
|
4165
|
+
vpaddd ymm0,ymm0,ymm5
|
|
4166
|
+
vpaddd ymm1,ymm1,ymm6
|
|
4167
|
+
vpaddd ymm2,ymm2,ymm7
|
|
4168
|
+
vpaddd ymm3,ymm3,ymm4
|
|
4169
|
+
vpxord ymm15,ymm15,ymm0
|
|
4170
|
+
vpxord ymm12,ymm12,ymm1
|
|
4171
|
+
vpxord ymm13,ymm13,ymm2
|
|
4172
|
+
vpxord ymm14,ymm14,ymm3
|
|
4173
|
+
vprord ymm15,ymm15,0x8
|
|
4174
|
+
vprord ymm12,ymm12,0x8
|
|
4175
|
+
vprord ymm13,ymm13,0x8
|
|
4176
|
+
vprord ymm14,ymm14,0x8
|
|
4177
|
+
vpaddd ymm10,ymm10,ymm15
|
|
4178
|
+
vpaddd ymm11,ymm11,ymm12
|
|
4179
|
+
vpaddd ymm8,ymm8,ymm13
|
|
4180
|
+
vpaddd ymm9,ymm9,ymm14
|
|
4181
|
+
vpxord ymm5,ymm5,ymm10
|
|
4182
|
+
vpxord ymm6,ymm6,ymm11
|
|
4183
|
+
vpxord ymm7,ymm7,ymm8
|
|
4184
|
+
vpxord ymm4,ymm4,ymm9
|
|
4185
|
+
vprord ymm5,ymm5,0x7
|
|
4186
|
+
vprord ymm6,ymm6,0x7
|
|
4187
|
+
vprord ymm7,ymm7,0x7
|
|
4188
|
+
vprord ymm4,ymm4,0x7
|
|
4189
|
+
vpaddd ymm0,ymm0,ymm25
|
|
4190
|
+
vpaddd ymm1,ymm1,ymm27
|
|
4191
|
+
vpaddd ymm2,ymm2,ymm24
|
|
4192
|
+
vpaddd ymm3,ymm3,ymm31
|
|
4193
|
+
vpaddd ymm0,ymm0,ymm4
|
|
4194
|
+
vpaddd ymm1,ymm1,ymm5
|
|
4195
|
+
vpaddd ymm2,ymm2,ymm6
|
|
4196
|
+
vpaddd ymm3,ymm3,ymm7
|
|
4197
|
+
vpxord ymm12,ymm12,ymm0
|
|
4198
|
+
vpxord ymm13,ymm13,ymm1
|
|
4199
|
+
vpxord ymm14,ymm14,ymm2
|
|
4200
|
+
vpxord ymm15,ymm15,ymm3
|
|
4201
|
+
vprord ymm12,ymm12,0x10
|
|
4202
|
+
vprord ymm13,ymm13,0x10
|
|
4203
|
+
vprord ymm14,ymm14,0x10
|
|
4204
|
+
vprord ymm15,ymm15,0x10
|
|
4205
|
+
vpaddd ymm8,ymm8,ymm12
|
|
4206
|
+
vpaddd ymm9,ymm9,ymm13
|
|
4207
|
+
vpaddd ymm10,ymm10,ymm14
|
|
4208
|
+
vpaddd ymm11,ymm11,ymm15
|
|
4209
|
+
vpxord ymm4,ymm4,ymm8
|
|
4210
|
+
vpxord ymm5,ymm5,ymm9
|
|
4211
|
+
vpxord ymm6,ymm6,ymm10
|
|
4212
|
+
vpxord ymm7,ymm7,ymm11
|
|
4213
|
+
vprord ymm4,ymm4,0xc
|
|
4214
|
+
vprord ymm5,ymm5,0xc
|
|
4215
|
+
vprord ymm6,ymm6,0xc
|
|
4216
|
+
vprord ymm7,ymm7,0xc
|
|
4217
|
+
vpaddd ymm0,ymm0,ymm30
|
|
4218
|
+
vpaddd ymm1,ymm1,ymm21
|
|
4219
|
+
vpaddd ymm2,ymm2,ymm28
|
|
4220
|
+
vpaddd ymm3,ymm3,ymm17
|
|
4221
|
+
vpaddd ymm0,ymm0,ymm4
|
|
4222
|
+
vpaddd ymm1,ymm1,ymm5
|
|
4223
|
+
vpaddd ymm2,ymm2,ymm6
|
|
4224
|
+
vpaddd ymm3,ymm3,ymm7
|
|
4225
|
+
vpxord ymm12,ymm12,ymm0
|
|
4226
|
+
vpxord ymm13,ymm13,ymm1
|
|
4227
|
+
vpxord ymm14,ymm14,ymm2
|
|
4228
|
+
vpxord ymm15,ymm15,ymm3
|
|
4229
|
+
vprord ymm12,ymm12,0x8
|
|
4230
|
+
vprord ymm13,ymm13,0x8
|
|
4231
|
+
vprord ymm14,ymm14,0x8
|
|
4232
|
+
vprord ymm15,ymm15,0x8
|
|
4233
|
+
vpaddd ymm8,ymm8,ymm12
|
|
4234
|
+
vpaddd ymm9,ymm9,ymm13
|
|
4235
|
+
vpaddd ymm10,ymm10,ymm14
|
|
4236
|
+
vpaddd ymm11,ymm11,ymm15
|
|
4237
|
+
vpxord ymm4,ymm4,ymm8
|
|
4238
|
+
vpxord ymm5,ymm5,ymm9
|
|
4239
|
+
vpxord ymm6,ymm6,ymm10
|
|
4240
|
+
vpxord ymm7,ymm7,ymm11
|
|
4241
|
+
vprord ymm4,ymm4,0x7
|
|
4242
|
+
vprord ymm5,ymm5,0x7
|
|
4243
|
+
vprord ymm6,ymm6,0x7
|
|
4244
|
+
vprord ymm7,ymm7,0x7
|
|
4245
|
+
vpaddd ymm0,ymm0,ymm29
|
|
4246
|
+
vpaddd ymm1,ymm1,ymm16
|
|
4247
|
+
vpaddd ymm2,ymm2,ymm18
|
|
4248
|
+
vpaddd ymm3,ymm3,ymm20
|
|
4249
|
+
vpaddd ymm0,ymm0,ymm5
|
|
4250
|
+
vpaddd ymm1,ymm1,ymm6
|
|
4251
|
+
vpaddd ymm2,ymm2,ymm7
|
|
4252
|
+
vpaddd ymm3,ymm3,ymm4
|
|
4253
|
+
vpxord ymm15,ymm15,ymm0
|
|
4254
|
+
vpxord ymm12,ymm12,ymm1
|
|
4255
|
+
vpxord ymm13,ymm13,ymm2
|
|
4256
|
+
vpxord ymm14,ymm14,ymm3
|
|
4257
|
+
vprord ymm15,ymm15,0x10
|
|
4258
|
+
vprord ymm12,ymm12,0x10
|
|
4259
|
+
vprord ymm13,ymm13,0x10
|
|
4260
|
+
vprord ymm14,ymm14,0x10
|
|
4261
|
+
vpaddd ymm10,ymm10,ymm15
|
|
4262
|
+
vpaddd ymm11,ymm11,ymm12
|
|
4263
|
+
vpaddd ymm8,ymm8,ymm13
|
|
4264
|
+
vpaddd ymm9,ymm9,ymm14
|
|
4265
|
+
vpxord ymm5,ymm5,ymm10
|
|
4266
|
+
vpxord ymm6,ymm6,ymm11
|
|
4267
|
+
vpxord ymm7,ymm7,ymm8
|
|
4268
|
+
vpxord ymm4,ymm4,ymm9
|
|
4269
|
+
vprord ymm5,ymm5,0xc
|
|
4270
|
+
vprord ymm6,ymm6,0xc
|
|
4271
|
+
vprord ymm7,ymm7,0xc
|
|
4272
|
+
vprord ymm4,ymm4,0xc
|
|
4273
|
+
vpaddd ymm0,ymm0,ymm19
|
|
4274
|
+
vpaddd ymm1,ymm1,ymm26
|
|
4275
|
+
vpaddd ymm2,ymm2,ymm22
|
|
4276
|
+
vpaddd ymm3,ymm3,ymm23
|
|
4277
|
+
vpaddd ymm0,ymm0,ymm5
|
|
4278
|
+
vpaddd ymm1,ymm1,ymm6
|
|
4279
|
+
vpaddd ymm2,ymm2,ymm7
|
|
4280
|
+
vpaddd ymm3,ymm3,ymm4
|
|
4281
|
+
vpxord ymm15,ymm15,ymm0
|
|
4282
|
+
vpxord ymm12,ymm12,ymm1
|
|
4283
|
+
vpxord ymm13,ymm13,ymm2
|
|
4284
|
+
vpxord ymm14,ymm14,ymm3
|
|
4285
|
+
vprord ymm15,ymm15,0x8
|
|
4286
|
+
vprord ymm12,ymm12,0x8
|
|
4287
|
+
vprord ymm13,ymm13,0x8
|
|
4288
|
+
vprord ymm14,ymm14,0x8
|
|
4289
|
+
vpaddd ymm10,ymm10,ymm15
|
|
4290
|
+
vpaddd ymm11,ymm11,ymm12
|
|
4291
|
+
vpaddd ymm8,ymm8,ymm13
|
|
4292
|
+
vpaddd ymm9,ymm9,ymm14
|
|
4293
|
+
vpxord ymm5,ymm5,ymm10
|
|
4294
|
+
vpxord ymm6,ymm6,ymm11
|
|
4295
|
+
vpxord ymm7,ymm7,ymm8
|
|
4296
|
+
vpxord ymm4,ymm4,ymm9
|
|
4297
|
+
vprord ymm5,ymm5,0x7
|
|
4298
|
+
vprord ymm6,ymm6,0x7
|
|
4299
|
+
vprord ymm7,ymm7,0x7
|
|
4300
|
+
vprord ymm4,ymm4,0x7
|
|
4301
|
+
vpaddd ymm0,ymm0,ymm27
|
|
4302
|
+
vpaddd ymm1,ymm1,ymm21
|
|
4303
|
+
vpaddd ymm2,ymm2,ymm17
|
|
4304
|
+
vpaddd ymm3,ymm3,ymm24
|
|
4305
|
+
vpaddd ymm0,ymm0,ymm4
|
|
4306
|
+
vpaddd ymm1,ymm1,ymm5
|
|
4307
|
+
vpaddd ymm2,ymm2,ymm6
|
|
4308
|
+
vpaddd ymm3,ymm3,ymm7
|
|
4309
|
+
vpxord ymm12,ymm12,ymm0
|
|
4310
|
+
vpxord ymm13,ymm13,ymm1
|
|
4311
|
+
vpxord ymm14,ymm14,ymm2
|
|
4312
|
+
vpxord ymm15,ymm15,ymm3
|
|
4313
|
+
vprord ymm12,ymm12,0x10
|
|
4314
|
+
vprord ymm13,ymm13,0x10
|
|
4315
|
+
vprord ymm14,ymm14,0x10
|
|
4316
|
+
vprord ymm15,ymm15,0x10
|
|
4317
|
+
vpaddd ymm8,ymm8,ymm12
|
|
4318
|
+
vpaddd ymm9,ymm9,ymm13
|
|
4319
|
+
vpaddd ymm10,ymm10,ymm14
|
|
4320
|
+
vpaddd ymm11,ymm11,ymm15
|
|
4321
|
+
vpxord ymm4,ymm4,ymm8
|
|
4322
|
+
vpxord ymm5,ymm5,ymm9
|
|
4323
|
+
vpxord ymm6,ymm6,ymm10
|
|
4324
|
+
vpxord ymm7,ymm7,ymm11
|
|
4325
|
+
vprord ymm4,ymm4,0xc
|
|
4326
|
+
vprord ymm5,ymm5,0xc
|
|
4327
|
+
vprord ymm6,ymm6,0xc
|
|
4328
|
+
vprord ymm7,ymm7,0xc
|
|
4329
|
+
vpaddd ymm0,ymm0,ymm31
|
|
4330
|
+
vpaddd ymm1,ymm1,ymm16
|
|
4331
|
+
vpaddd ymm2,ymm2,ymm25
|
|
4332
|
+
vpaddd ymm3,ymm3,ymm22
|
|
4333
|
+
vpaddd ymm0,ymm0,ymm4
|
|
4334
|
+
vpaddd ymm1,ymm1,ymm5
|
|
4335
|
+
vpaddd ymm2,ymm2,ymm6
|
|
4336
|
+
vpaddd ymm3,ymm3,ymm7
|
|
4337
|
+
vpxord ymm12,ymm12,ymm0
|
|
4338
|
+
vpxord ymm13,ymm13,ymm1
|
|
4339
|
+
vpxord ymm14,ymm14,ymm2
|
|
4340
|
+
vpxord ymm15,ymm15,ymm3
|
|
4341
|
+
vprord ymm12,ymm12,0x8
|
|
4342
|
+
vprord ymm13,ymm13,0x8
|
|
4343
|
+
vprord ymm14,ymm14,0x8
|
|
4344
|
+
vprord ymm15,ymm15,0x8
|
|
4345
|
+
vpaddd ymm8,ymm8,ymm12
|
|
4346
|
+
vpaddd ymm9,ymm9,ymm13
|
|
4347
|
+
vpaddd ymm10,ymm10,ymm14
|
|
4348
|
+
vpaddd ymm11,ymm11,ymm15
|
|
4349
|
+
vpxord ymm4,ymm4,ymm8
|
|
4350
|
+
vpxord ymm5,ymm5,ymm9
|
|
4351
|
+
vpxord ymm6,ymm6,ymm10
|
|
4352
|
+
vpxord ymm7,ymm7,ymm11
|
|
4353
|
+
vprord ymm4,ymm4,0x7
|
|
4354
|
+
vprord ymm5,ymm5,0x7
|
|
4355
|
+
vprord ymm6,ymm6,0x7
|
|
4356
|
+
vprord ymm7,ymm7,0x7
|
|
4357
|
+
vpaddd ymm0,ymm0,ymm30
|
|
4358
|
+
vpaddd ymm1,ymm1,ymm18
|
|
4359
|
+
vpaddd ymm2,ymm2,ymm19
|
|
4360
|
+
vpaddd ymm3,ymm3,ymm23
|
|
4361
|
+
vpaddd ymm0,ymm0,ymm5
|
|
4362
|
+
vpaddd ymm1,ymm1,ymm6
|
|
4363
|
+
vpaddd ymm2,ymm2,ymm7
|
|
4364
|
+
vpaddd ymm3,ymm3,ymm4
|
|
4365
|
+
vpxord ymm15,ymm15,ymm0
|
|
4366
|
+
vpxord ymm12,ymm12,ymm1
|
|
4367
|
+
vpxord ymm13,ymm13,ymm2
|
|
4368
|
+
vpxord ymm14,ymm14,ymm3
|
|
4369
|
+
vprord ymm15,ymm15,0x10
|
|
4370
|
+
vprord ymm12,ymm12,0x10
|
|
4371
|
+
vprord ymm13,ymm13,0x10
|
|
4372
|
+
vprord ymm14,ymm14,0x10
|
|
4373
|
+
vpaddd ymm10,ymm10,ymm15
|
|
4374
|
+
vpaddd ymm11,ymm11,ymm12
|
|
4375
|
+
vpaddd ymm8,ymm8,ymm13
|
|
4376
|
+
vpaddd ymm9,ymm9,ymm14
|
|
4377
|
+
vpxord ymm5,ymm5,ymm10
|
|
4378
|
+
vpxord ymm6,ymm6,ymm11
|
|
4379
|
+
vpxord ymm7,ymm7,ymm8
|
|
4380
|
+
vpxord ymm4,ymm4,ymm9
|
|
4381
|
+
vprord ymm5,ymm5,0xc
|
|
4382
|
+
vprord ymm6,ymm6,0xc
|
|
4383
|
+
vprord ymm7,ymm7,0xc
|
|
4384
|
+
vprord ymm4,ymm4,0xc
|
|
4385
|
+
vpaddd ymm0,ymm0,ymm26
|
|
4386
|
+
vpaddd ymm1,ymm1,ymm28
|
|
4387
|
+
vpaddd ymm2,ymm2,ymm20
|
|
4388
|
+
vpaddd ymm3,ymm3,ymm29
|
|
4389
|
+
vpaddd ymm0,ymm0,ymm5
|
|
4390
|
+
vpaddd ymm1,ymm1,ymm6
|
|
4391
|
+
vpaddd ymm2,ymm2,ymm7
|
|
4392
|
+
vpaddd ymm3,ymm3,ymm4
|
|
4393
|
+
vpxord ymm15,ymm15,ymm0
|
|
4394
|
+
vpxord ymm12,ymm12,ymm1
|
|
4395
|
+
vpxord ymm13,ymm13,ymm2
|
|
4396
|
+
vpxord ymm14,ymm14,ymm3
|
|
4397
|
+
vprord ymm15,ymm15,0x8
|
|
4398
|
+
vprord ymm12,ymm12,0x8
|
|
4399
|
+
vprord ymm13,ymm13,0x8
|
|
4400
|
+
vprord ymm14,ymm14,0x8
|
|
4401
|
+
vpaddd ymm10,ymm10,ymm15
|
|
4402
|
+
vpaddd ymm11,ymm11,ymm12
|
|
4403
|
+
vpaddd ymm8,ymm8,ymm13
|
|
4404
|
+
vpaddd ymm9,ymm9,ymm14
|
|
4405
|
+
vpxord ymm5,ymm5,ymm10
|
|
4406
|
+
vpxord ymm6,ymm6,ymm11
|
|
4407
|
+
vpxord ymm7,ymm7,ymm8
|
|
4408
|
+
vpxord ymm4,ymm4,ymm9
|
|
4409
|
+
vprord ymm5,ymm5,0x7
|
|
4410
|
+
vprord ymm6,ymm6,0x7
|
|
4411
|
+
vprord ymm7,ymm7,0x7
|
|
4412
|
+
vprord ymm4,ymm4,0x7
|
|
4413
|
+
vpxor ymm0,ymm0,ymm8
|
|
4414
|
+
vpxor ymm1,ymm1,ymm9
|
|
4415
|
+
vpxor ymm2,ymm2,ymm10
|
|
4416
|
+
vpxor ymm3,ymm3,ymm11
|
|
4417
|
+
vpxor ymm4,ymm4,ymm12
|
|
4418
|
+
vpxor ymm5,ymm5,ymm13
|
|
4419
|
+
vpxor ymm6,ymm6,ymm14
|
|
4420
|
+
vpxor ymm7,ymm7,ymm15
|
|
4421
|
+
vpxord ymm8,ymm8,DWORD PTR [rdi]{1to8}
|
|
4422
|
+
vpxord ymm9,ymm9,DWORD PTR [rdi+0x4]{1to8}
|
|
4423
|
+
vpxord ymm10,ymm10,DWORD PTR [rdi+0x8]{1to8}
|
|
4424
|
+
vpxord ymm11,ymm11,DWORD PTR [rdi+0xc]{1to8}
|
|
4425
|
+
vpxord ymm12,ymm12,DWORD PTR [rdi+0x10]{1to8}
|
|
4426
|
+
vpxord ymm13,ymm13,DWORD PTR [rdi+0x14]{1to8}
|
|
4427
|
+
vpxord ymm14,ymm14,DWORD PTR [rdi+0x18]{1to8}
|
|
4428
|
+
vpxord ymm15,ymm15,DWORD PTR [rdi+0x1c]{1to8}
|
|
4429
|
+
vpunpckldq ymm16,ymm0,ymm1
|
|
4430
|
+
vpunpckhdq ymm17,ymm0,ymm1
|
|
4431
|
+
vpunpckldq ymm18,ymm2,ymm3
|
|
4432
|
+
vpunpckhdq ymm19,ymm2,ymm3
|
|
4433
|
+
vpunpckldq ymm20,ymm4,ymm5
|
|
4434
|
+
vpunpckhdq ymm21,ymm4,ymm5
|
|
4435
|
+
vpunpckldq ymm22,ymm6,ymm7
|
|
4436
|
+
vpunpckhdq ymm23,ymm6,ymm7
|
|
4437
|
+
vpunpckldq ymm24,ymm8,ymm9
|
|
4438
|
+
vpunpckhdq ymm25,ymm8,ymm9
|
|
4439
|
+
vpunpckldq ymm26,ymm10,ymm11
|
|
4440
|
+
vpunpckhdq ymm27,ymm10,ymm11
|
|
4441
|
+
vpunpckldq ymm28,ymm12,ymm13
|
|
4442
|
+
vpunpckhdq ymm29,ymm12,ymm13
|
|
4443
|
+
vpunpckldq ymm30,ymm14,ymm15
|
|
4444
|
+
vpunpckhdq ymm31,ymm14,ymm15
|
|
4445
|
+
vpunpcklqdq ymm0,ymm16,ymm18
|
|
4446
|
+
vpunpckhqdq ymm1,ymm16,ymm18
|
|
4447
|
+
vpunpcklqdq ymm2,ymm17,ymm19
|
|
4448
|
+
vpunpckhqdq ymm3,ymm17,ymm19
|
|
4449
|
+
vpunpcklqdq ymm4,ymm20,ymm22
|
|
4450
|
+
vpunpckhqdq ymm5,ymm20,ymm22
|
|
4451
|
+
vpunpcklqdq ymm6,ymm21,ymm23
|
|
4452
|
+
vpunpckhqdq ymm7,ymm21,ymm23
|
|
4453
|
+
vpunpcklqdq ymm8,ymm24,ymm26
|
|
4454
|
+
vpunpckhqdq ymm9,ymm24,ymm26
|
|
4455
|
+
vpunpcklqdq ymm10,ymm25,ymm27
|
|
4456
|
+
vpunpckhqdq ymm11,ymm25,ymm27
|
|
4457
|
+
vpunpcklqdq ymm12,ymm28,ymm30
|
|
4458
|
+
vpunpckhqdq ymm13,ymm28,ymm30
|
|
4459
|
+
vpunpcklqdq ymm14,ymm29,ymm31
|
|
4460
|
+
vpunpckhqdq ymm15,ymm29,ymm31
|
|
4461
|
+
vshufi32x4 ymm16,ymm0,ymm4,0x0
|
|
4462
|
+
vshufi32x4 ymm17,ymm8,ymm12,0x0
|
|
4463
|
+
vshufi32x4 ymm18,ymm1,ymm5,0x0
|
|
4464
|
+
vshufi32x4 ymm19,ymm9,ymm13,0x0
|
|
4465
|
+
vshufi32x4 ymm20,ymm2,ymm6,0x0
|
|
4466
|
+
vshufi32x4 ymm21,ymm10,ymm14,0x0
|
|
4467
|
+
vshufi32x4 ymm22,ymm3,ymm7,0x0
|
|
4468
|
+
vshufi32x4 ymm23,ymm11,ymm15,0x0
|
|
4469
|
+
vshufi32x4 ymm24,ymm0,ymm4,0x3
|
|
4470
|
+
vshufi32x4 ymm25,ymm8,ymm12,0x3
|
|
4471
|
+
vshufi32x4 ymm26,ymm1,ymm5,0x3
|
|
4472
|
+
vshufi32x4 ymm27,ymm9,ymm13,0x3
|
|
4473
|
+
vshufi32x4 ymm28,ymm2,ymm6,0x3
|
|
4474
|
+
vshufi32x4 ymm29,ymm10,ymm14,0x3
|
|
4475
|
+
vshufi32x4 ymm30,ymm3,ymm7,0x3
|
|
4476
|
+
vshufi32x4 ymm31,ymm11,ymm15,0x3
|
|
4477
|
+
vmovdqu32 YMMWORD PTR [r9],ymm16
|
|
4478
|
+
vmovdqu32 YMMWORD PTR [r9+0x20],ymm17
|
|
4479
|
+
vmovdqu32 YMMWORD PTR [r9+0x40],ymm18
|
|
4480
|
+
vmovdqu32 YMMWORD PTR [r9+0x60],ymm19
|
|
4481
|
+
vmovdqu32 YMMWORD PTR [r9+0x80],ymm20
|
|
4482
|
+
vmovdqu32 YMMWORD PTR [r9+0xa0],ymm21
|
|
4483
|
+
vmovdqu32 YMMWORD PTR [r9+0xc0],ymm22
|
|
4484
|
+
vmovdqu32 YMMWORD PTR [r9+0xe0],ymm23
|
|
4485
|
+
vmovdqu32 YMMWORD PTR [r9+0x100],ymm24
|
|
4486
|
+
vmovdqu32 YMMWORD PTR [r9+0x120],ymm25
|
|
4487
|
+
vmovdqu32 YMMWORD PTR [r9+0x140],ymm26
|
|
4488
|
+
vmovdqu32 YMMWORD PTR [r9+0x160],ymm27
|
|
4489
|
+
vmovdqu32 YMMWORD PTR [r9+0x180],ymm28
|
|
4490
|
+
vmovdqu32 YMMWORD PTR [r9+0x1a0],ymm29
|
|
4491
|
+
vmovdqu32 YMMWORD PTR [r9+0x1c0],ymm30
|
|
4492
|
+
vmovdqu32 YMMWORD PTR [r9+0x1e0],ymm31
|
|
4493
|
+
vmovdqa ymm0,YMMWORD PTR [rsp+0x20]
|
|
4494
|
+
vmovdqa ymm1,YMMWORD PTR [rsp+0x60]
|
|
4495
|
+
vmovdqa YMMWORD PTR [rsp],ymm0
|
|
4496
|
+
vmovdqa YMMWORD PTR [rsp+0x40],ymm1
|
|
4497
|
+
add r9,0x200
|
|
4498
|
+
sub r10,0x8
|
|
4499
|
+
2:
|
|
4500
|
+
test r10,0x4
|
|
4501
|
+
je 2f
|
|
4502
|
+
vbroadcasti32x4 zmm0,XMMWORD PTR [rdi]
|
|
4503
|
+
vbroadcasti32x4 zmm1,XMMWORD PTR [rdi+0x10]
|
|
4504
|
+
vbroadcasti32x4 zmm2,XMMWORD PTR [BLAKE3_IV+rip]
|
|
4505
|
+
vmovdqa xmm12,XMMWORD PTR [rsp]
|
|
4506
|
+
vmovdqa xmm13,XMMWORD PTR [rsp+0x40]
|
|
4507
|
+
vpunpckldq xmm14,xmm12,xmm13
|
|
4508
|
+
vpunpckhdq xmm15,xmm12,xmm13
|
|
4509
|
+
vpermq ymm14,ymm14,0xdc
|
|
4510
|
+
vpermq ymm15,ymm15,0xdc
|
|
4511
|
+
vpbroadcastd zmm12,edx
|
|
4512
|
+
vinserti64x4 zmm13,zmm14,ymm15,0x1
|
|
4513
|
+
mov eax,0x4444
|
|
4514
|
+
kmovw k2,eax
|
|
4515
|
+
vpblendmd zmm13{k2},zmm13,zmm12
|
|
4516
|
+
vpbroadcastd zmm15,r8d
|
|
4517
|
+
mov eax,0x8888
|
|
4518
|
+
kmovw k4,eax
|
|
4519
|
+
vpblendmd zmm3{k4},zmm13,zmm15
|
|
4520
|
+
mov eax,0xaaaa
|
|
4521
|
+
kmovw k3,eax
|
|
4522
|
+
vbroadcasti32x4 zmm8,XMMWORD PTR [rsi]
|
|
4523
|
+
vbroadcasti32x4 zmm9,XMMWORD PTR [rsi+0x10]
|
|
4524
|
+
vshufps zmm4,zmm8,zmm9,0x88
|
|
4525
|
+
vshufps zmm5,zmm8,zmm9,0xdd
|
|
4526
|
+
vbroadcasti32x4 zmm8,XMMWORD PTR [rsi+0x20]
|
|
4527
|
+
vbroadcasti32x4 zmm9,XMMWORD PTR [rsi+0x30]
|
|
4528
|
+
vshufps zmm6,zmm8,zmm9,0x88
|
|
4529
|
+
vshufps zmm7,zmm8,zmm9,0xdd
|
|
4530
|
+
vpshufd zmm6,zmm6,0x93
|
|
4531
|
+
vpshufd zmm7,zmm7,0x93
|
|
4532
|
+
mov al,0x7
|
|
4533
|
+
3:
|
|
4534
|
+
vpaddd zmm0,zmm0,zmm4
|
|
4535
|
+
vpaddd zmm0,zmm0,zmm1
|
|
4536
|
+
vpxord zmm3,zmm3,zmm0
|
|
4537
|
+
vprord zmm3,zmm3,0x10
|
|
4538
|
+
vpaddd zmm2,zmm2,zmm3
|
|
4539
|
+
vpxord zmm1,zmm1,zmm2
|
|
4540
|
+
vprord zmm1,zmm1,0xc
|
|
4541
|
+
vpaddd zmm0,zmm0,zmm5
|
|
4542
|
+
vpaddd zmm0,zmm0,zmm1
|
|
4543
|
+
vpxord zmm3,zmm3,zmm0
|
|
4544
|
+
vprord zmm3,zmm3,0x8
|
|
4545
|
+
vpaddd zmm2,zmm2,zmm3
|
|
4546
|
+
vpxord zmm1,zmm1,zmm2
|
|
4547
|
+
vprord zmm1,zmm1,0x7
|
|
4548
|
+
vpshufd zmm0,zmm0,0x93
|
|
4549
|
+
vpshufd zmm3,zmm3,0x4e
|
|
4550
|
+
vpshufd zmm2,zmm2,0x39
|
|
4551
|
+
vpaddd zmm0,zmm0,zmm6
|
|
4552
|
+
vpaddd zmm0,zmm0,zmm1
|
|
4553
|
+
vpxord zmm3,zmm3,zmm0
|
|
4554
|
+
vprord zmm3,zmm3,0x10
|
|
4555
|
+
vpaddd zmm2,zmm2,zmm3
|
|
4556
|
+
vpxord zmm1,zmm1,zmm2
|
|
4557
|
+
vprord zmm1,zmm1,0xc
|
|
4558
|
+
vpaddd zmm0,zmm0,zmm7
|
|
4559
|
+
vpaddd zmm0,zmm0,zmm1
|
|
4560
|
+
vpxord zmm3,zmm3,zmm0
|
|
4561
|
+
vprord zmm3,zmm3,0x8
|
|
4562
|
+
vpaddd zmm2,zmm2,zmm3
|
|
4563
|
+
vpxord zmm1,zmm1,zmm2
|
|
4564
|
+
vprord zmm1,zmm1,0x7
|
|
4565
|
+
vpshufd zmm0,zmm0,0x39
|
|
4566
|
+
vpshufd zmm3,zmm3,0x4e
|
|
4567
|
+
vpshufd zmm2,zmm2,0x93
|
|
4568
|
+
dec al
|
|
4569
|
+
je 3f
|
|
4570
|
+
vshufps zmm8,zmm4,zmm5,0xd6
|
|
4571
|
+
vpshufd zmm9,zmm4,0xf
|
|
4572
|
+
vpshufd zmm4,zmm8,0x39
|
|
4573
|
+
vshufps zmm8,zmm6,zmm7,0xfa
|
|
4574
|
+
vpblendmd zmm9{k3},zmm9,zmm8
|
|
4575
|
+
vpunpcklqdq zmm8,zmm7,zmm5
|
|
4576
|
+
vpblendmd zmm8{k4},zmm8,zmm6
|
|
4577
|
+
vpshufd zmm8,zmm8,0x78
|
|
4578
|
+
vpunpckhdq zmm5,zmm5,zmm7
|
|
4579
|
+
vpunpckldq zmm6,zmm6,zmm5
|
|
4580
|
+
vpshufd zmm7,zmm6,0x1e
|
|
4581
|
+
vmovdqa32 zmm5,zmm9
|
|
4582
|
+
vmovdqa32 zmm6,zmm8
|
|
4583
|
+
jmp 3b
|
|
4584
|
+
3:
|
|
4585
|
+
vpxord zmm0,zmm0,zmm2
|
|
4586
|
+
vpxord zmm1,zmm1,zmm3
|
|
4587
|
+
vbroadcasti32x4 zmm8,XMMWORD PTR [rdi]
|
|
4588
|
+
vbroadcasti32x4 zmm9,XMMWORD PTR [rdi+0x10]
|
|
4589
|
+
vpxord zmm2,zmm2,zmm8
|
|
4590
|
+
vpxord zmm3,zmm3,zmm9
|
|
4591
|
+
vmovdqu XMMWORD PTR [r9],xmm0
|
|
4592
|
+
vmovdqu XMMWORD PTR [r9+0x10],xmm1
|
|
4593
|
+
vmovdqu XMMWORD PTR [r9+0x20],xmm2
|
|
4594
|
+
vmovdqu XMMWORD PTR [r9+0x30],xmm3
|
|
4595
|
+
vextracti128 XMMWORD PTR [r9+0x40],ymm0,0x1
|
|
4596
|
+
vextracti128 XMMWORD PTR [r9+0x50],ymm1,0x1
|
|
4597
|
+
vextracti128 XMMWORD PTR [r9+0x60],ymm2,0x1
|
|
4598
|
+
vextracti128 XMMWORD PTR [r9+0x70],ymm3,0x1
|
|
4599
|
+
vextracti32x4 XMMWORD PTR [r9+0x80],zmm0,0x2
|
|
4600
|
+
vextracti32x4 XMMWORD PTR [r9+0x90],zmm1,0x2
|
|
4601
|
+
vextracti32x4 XMMWORD PTR [r9+0xa0],zmm2,0x2
|
|
4602
|
+
vextracti32x4 XMMWORD PTR [r9+0xb0],zmm3,0x2
|
|
4603
|
+
vextracti32x4 XMMWORD PTR [r9+0xc0],zmm0,0x3
|
|
4604
|
+
vextracti32x4 XMMWORD PTR [r9+0xd0],zmm1,0x3
|
|
4605
|
+
vextracti32x4 XMMWORD PTR [r9+0xe0],zmm2,0x3
|
|
4606
|
+
vextracti32x4 XMMWORD PTR [r9+0xf0],zmm3,0x3
|
|
4607
|
+
vmovdqa xmm0,XMMWORD PTR [rsp+0x10]
|
|
4608
|
+
vmovdqa xmm1,XMMWORD PTR [rsp+0x50]
|
|
4609
|
+
vmovdqa XMMWORD PTR [rsp],xmm0
|
|
4610
|
+
vmovdqa XMMWORD PTR [rsp+0x40],xmm1
|
|
4611
|
+
add r9,0x100
|
|
4612
|
+
sub r10,0x4
|
|
4613
|
+
2:
|
|
4614
|
+
test r10,0x2
|
|
4615
|
+
je 2f
|
|
4616
|
+
vbroadcasti128 ymm0,XMMWORD PTR [rdi]
|
|
4617
|
+
vbroadcasti128 ymm1,XMMWORD PTR [rdi+0x10]
|
|
4618
|
+
vmovd xmm13,DWORD PTR [rsp]
|
|
4619
|
+
vpinsrd xmm13,xmm13,DWORD PTR [rsp+0x40],0x1
|
|
4620
|
+
vpinsrd xmm13,xmm13,edx,0x2
|
|
4621
|
+
vmovd xmm14,DWORD PTR [rsp+0x4]
|
|
4622
|
+
vpinsrd xmm14,xmm14,DWORD PTR [rsp+0x44],0x1
|
|
4623
|
+
vpinsrd xmm14,xmm14,edx,0x2
|
|
4624
|
+
vinserti128 ymm13,ymm13,xmm14,0x1
|
|
4625
|
+
vbroadcasti128 ymm2,XMMWORD PTR [BLAKE3_IV+rip]
|
|
4626
|
+
vpbroadcastd ymm8,r8d
|
|
4627
|
+
vpblendd ymm3,ymm13,ymm8,0x88
|
|
4628
|
+
vbroadcasti128 ymm8,XMMWORD PTR [rsi]
|
|
4629
|
+
vbroadcasti128 ymm9,XMMWORD PTR [rsi+0x10]
|
|
4630
|
+
vshufps ymm4,ymm8,ymm9,0x88
|
|
4631
|
+
vshufps ymm5,ymm8,ymm9,0xdd
|
|
4632
|
+
vbroadcasti128 ymm8,XMMWORD PTR [rsi+0x20]
|
|
4633
|
+
vbroadcasti128 ymm9,XMMWORD PTR [rsi+0x30]
|
|
4634
|
+
vshufps ymm6,ymm8,ymm9,0x88
|
|
4635
|
+
vshufps ymm7,ymm8,ymm9,0xdd
|
|
4636
|
+
vpshufd ymm6,ymm6,0x93
|
|
4637
|
+
vpshufd ymm7,ymm7,0x93
|
|
4638
|
+
mov al,0x7
|
|
4639
|
+
3:
|
|
4640
|
+
vpaddd ymm0,ymm0,ymm4
|
|
4641
|
+
vpaddd ymm0,ymm0,ymm1
|
|
4642
|
+
vpxord ymm3,ymm3,ymm0
|
|
4643
|
+
vprord ymm3,ymm3,0x10
|
|
4644
|
+
vpaddd ymm2,ymm2,ymm3
|
|
4645
|
+
vpxord ymm1,ymm1,ymm2
|
|
4646
|
+
vprord ymm1,ymm1,0xc
|
|
4647
|
+
vpaddd ymm0,ymm0,ymm5
|
|
4648
|
+
vpaddd ymm0,ymm0,ymm1
|
|
4649
|
+
vpxord ymm3,ymm3,ymm0
|
|
4650
|
+
vprord ymm3,ymm3,0x8
|
|
4651
|
+
vpaddd ymm2,ymm2,ymm3
|
|
4652
|
+
vpxord ymm1,ymm1,ymm2
|
|
4653
|
+
vprord ymm1,ymm1,0x7
|
|
4654
|
+
vpshufd ymm0,ymm0,0x93
|
|
4655
|
+
vpshufd ymm3,ymm3,0x4e
|
|
4656
|
+
vpshufd ymm2,ymm2,0x39
|
|
4657
|
+
vpaddd ymm0,ymm0,ymm6
|
|
4658
|
+
vpaddd ymm0,ymm0,ymm1
|
|
4659
|
+
vpxord ymm3,ymm3,ymm0
|
|
4660
|
+
vprord ymm3,ymm3,0x10
|
|
4661
|
+
vpaddd ymm2,ymm2,ymm3
|
|
4662
|
+
vpxord ymm1,ymm1,ymm2
|
|
4663
|
+
vprord ymm1,ymm1,0xc
|
|
4664
|
+
vpaddd ymm0,ymm0,ymm7
|
|
4665
|
+
vpaddd ymm0,ymm0,ymm1
|
|
4666
|
+
vpxord ymm3,ymm3,ymm0
|
|
4667
|
+
vprord ymm3,ymm3,0x8
|
|
4668
|
+
vpaddd ymm2,ymm2,ymm3
|
|
4669
|
+
vpxord ymm1,ymm1,ymm2
|
|
4670
|
+
vprord ymm1,ymm1,0x7
|
|
4671
|
+
vpshufd ymm0,ymm0,0x39
|
|
4672
|
+
vpshufd ymm3,ymm3,0x4e
|
|
4673
|
+
vpshufd ymm2,ymm2,0x93
|
|
4674
|
+
dec al
|
|
4675
|
+
je 3f
|
|
4676
|
+
vshufps ymm8,ymm4,ymm5,0xd6
|
|
4677
|
+
vpshufd ymm9,ymm4,0xf
|
|
4678
|
+
vpshufd ymm4,ymm8,0x39
|
|
4679
|
+
vshufps ymm8,ymm6,ymm7,0xfa
|
|
4680
|
+
vpblendd ymm9,ymm9,ymm8,0xaa
|
|
4681
|
+
vpunpcklqdq ymm8,ymm7,ymm5
|
|
4682
|
+
vpblendd ymm8,ymm8,ymm6,0x88
|
|
4683
|
+
vpshufd ymm8,ymm8,0x78
|
|
4684
|
+
vpunpckhdq ymm5,ymm5,ymm7
|
|
4685
|
+
vpunpckldq ymm6,ymm6,ymm5
|
|
4686
|
+
vpshufd ymm7,ymm6,0x1e
|
|
4687
|
+
vmovdqa ymm5,ymm9
|
|
4688
|
+
vmovdqa ymm6,ymm8
|
|
4689
|
+
jmp 3b
|
|
4690
|
+
3:
|
|
4691
|
+
vpxor ymm0,ymm0,ymm2
|
|
4692
|
+
vpxor ymm1,ymm1,ymm3
|
|
4693
|
+
vbroadcasti128 ymm8,XMMWORD PTR [rdi]
|
|
4694
|
+
vbroadcasti128 ymm9,XMMWORD PTR [rdi+0x10]
|
|
4695
|
+
vpxor ymm2,ymm2,ymm8
|
|
4696
|
+
vpxor ymm3,ymm3,ymm9
|
|
4697
|
+
vmovdqu XMMWORD PTR [r9],xmm0
|
|
4698
|
+
vmovdqu XMMWORD PTR [r9+0x10],xmm1
|
|
4699
|
+
vmovdqu XMMWORD PTR [r9+0x20],xmm2
|
|
4700
|
+
vmovdqu XMMWORD PTR [r9+0x30],xmm3
|
|
4701
|
+
vextracti128 XMMWORD PTR [r9+0x40],ymm0,0x1
|
|
4702
|
+
vextracti128 XMMWORD PTR [r9+0x50],ymm1,0x1
|
|
4703
|
+
vextracti128 XMMWORD PTR [r9+0x60],ymm2,0x1
|
|
4704
|
+
vextracti128 XMMWORD PTR [r9+0x70],ymm3,0x1
|
|
4705
|
+
vmovdqu xmm0,XMMWORD PTR [rsp+0x8]
|
|
4706
|
+
vmovdqu xmm1,XMMWORD PTR [rsp+0x48]
|
|
4707
|
+
vmovdqa XMMWORD PTR [rsp],xmm0
|
|
4708
|
+
vmovdqa XMMWORD PTR [rsp+0x40],xmm1
|
|
4709
|
+
add r9,0x80
|
|
4710
|
+
sub r10,0x2
|
|
4711
|
+
2:
|
|
4712
|
+
test r10,0x1
|
|
4713
|
+
je 9b
|
|
4714
|
+
vmovdqu xmm0,XMMWORD PTR [rdi]
|
|
4715
|
+
vmovdqu xmm1,XMMWORD PTR [rdi+0x10]
|
|
4716
|
+
vmovd xmm14,DWORD PTR [rsp]
|
|
4717
|
+
vpinsrd xmm14,xmm14,DWORD PTR [rsp+0x40],0x1
|
|
4718
|
+
vpinsrd xmm14,xmm14,edx,0x2
|
|
4719
|
+
vmovdqa xmm2,XMMWORD PTR [BLAKE3_IV+rip]
|
|
4720
|
+
vpinsrd xmm3,xmm14,r8d,0x3
|
|
4721
|
+
vmovups xmm8,XMMWORD PTR [rsi]
|
|
4722
|
+
vmovups xmm9,XMMWORD PTR [rsi+0x10]
|
|
4723
|
+
vshufps xmm4,xmm8,xmm9,0x88
|
|
4724
|
+
vshufps xmm5,xmm8,xmm9,0xdd
|
|
4725
|
+
vmovups xmm8,XMMWORD PTR [rsi+0x20]
|
|
4726
|
+
vmovups xmm9,XMMWORD PTR [rsi+0x30]
|
|
4727
|
+
vshufps xmm6,xmm8,xmm9,0x88
|
|
4728
|
+
vshufps xmm7,xmm8,xmm9,0xdd
|
|
4729
|
+
vpshufd xmm6,xmm6,0x93
|
|
4730
|
+
vpshufd xmm7,xmm7,0x93
|
|
4731
|
+
mov al,0x7
|
|
4732
|
+
3:
|
|
4733
|
+
vpaddd xmm0,xmm0,xmm4
|
|
4734
|
+
vpaddd xmm0,xmm0,xmm1
|
|
4735
|
+
vpxord xmm3,xmm3,xmm0
|
|
4736
|
+
vprord xmm3,xmm3,0x10
|
|
4737
|
+
vpaddd xmm2,xmm2,xmm3
|
|
4738
|
+
vpxord xmm1,xmm1,xmm2
|
|
4739
|
+
vprord xmm1,xmm1,0xc
|
|
4740
|
+
vpaddd xmm0,xmm0,xmm5
|
|
4741
|
+
vpaddd xmm0,xmm0,xmm1
|
|
4742
|
+
vpxord xmm3,xmm3,xmm0
|
|
4743
|
+
vprord xmm3,xmm3,0x8
|
|
4744
|
+
vpaddd xmm2,xmm2,xmm3
|
|
4745
|
+
vpxord xmm1,xmm1,xmm2
|
|
4746
|
+
vprord xmm1,xmm1,0x7
|
|
4747
|
+
vpshufd xmm0,xmm0,0x93
|
|
4748
|
+
vpshufd xmm3,xmm3,0x4e
|
|
4749
|
+
vpshufd xmm2,xmm2,0x39
|
|
4750
|
+
vpaddd xmm0,xmm0,xmm6
|
|
4751
|
+
vpaddd xmm0,xmm0,xmm1
|
|
4752
|
+
vpxord xmm3,xmm3,xmm0
|
|
4753
|
+
vprord xmm3,xmm3,0x10
|
|
4754
|
+
vpaddd xmm2,xmm2,xmm3
|
|
4755
|
+
vpxord xmm1,xmm1,xmm2
|
|
4756
|
+
vprord xmm1,xmm1,0xc
|
|
4757
|
+
vpaddd xmm0,xmm0,xmm7
|
|
4758
|
+
vpaddd xmm0,xmm0,xmm1
|
|
4759
|
+
vpxord xmm3,xmm3,xmm0
|
|
4760
|
+
vprord xmm3,xmm3,0x8
|
|
4761
|
+
vpaddd xmm2,xmm2,xmm3
|
|
4762
|
+
vpxord xmm1,xmm1,xmm2
|
|
4763
|
+
vprord xmm1,xmm1,0x7
|
|
4764
|
+
vpshufd xmm0,xmm0,0x39
|
|
4765
|
+
vpshufd xmm3,xmm3,0x4e
|
|
4766
|
+
vpshufd xmm2,xmm2,0x93
|
|
4767
|
+
dec al
|
|
4768
|
+
je 3f
|
|
4769
|
+
vshufps xmm8,xmm4,xmm5,0xd6
|
|
4770
|
+
vpshufd xmm9,xmm4,0xf
|
|
4771
|
+
vpshufd xmm4,xmm8,0x39
|
|
4772
|
+
vshufps xmm8,xmm6,xmm7,0xfa
|
|
4773
|
+
vpblendd xmm9,xmm9,xmm8,0xaa
|
|
4774
|
+
vpunpcklqdq xmm8,xmm7,xmm5
|
|
4775
|
+
vpblendd xmm8,xmm8,xmm6,0x88
|
|
4776
|
+
vpshufd xmm8,xmm8,0x78
|
|
4777
|
+
vpunpckhdq xmm5,xmm5,xmm7
|
|
4778
|
+
vpunpckldq xmm6,xmm6,xmm5
|
|
4779
|
+
vpshufd xmm7,xmm6,0x1e
|
|
4780
|
+
vmovdqa xmm5,xmm9
|
|
4781
|
+
vmovdqa xmm6,xmm8
|
|
4782
|
+
jmp 3b
|
|
4783
|
+
3:
|
|
4784
|
+
vpxor xmm0,xmm0,xmm2
|
|
4785
|
+
vpxor xmm1,xmm1,xmm3
|
|
4786
|
+
vpxor xmm2,xmm2,XMMWORD PTR [rdi]
|
|
4787
|
+
vpxor xmm3,xmm3,XMMWORD PTR [rdi+0x10]
|
|
4788
|
+
vmovdqu XMMWORD PTR [r9],xmm0
|
|
4789
|
+
vmovdqu XMMWORD PTR [r9+0x10],xmm1
|
|
4790
|
+
vmovdqu XMMWORD PTR [r9+0x20],xmm2
|
|
4791
|
+
vmovdqu XMMWORD PTR [r9+0x30],xmm3
|
|
4792
|
+
jmp 9b
|
|
4793
|
+
|
|
4794
|
+
|
|
4795
|
+
#ifdef __APPLE__
|
|
4796
|
+
.static_data
|
|
4797
|
+
#else
|
|
4798
|
+
.section .rodata
|
|
4799
|
+
#endif
|
|
4800
|
+
.p2align 6
|
|
4801
|
+
INDEX0:
|
|
4802
|
+
.long 0, 1, 2, 3, 16, 17, 18, 19
|
|
4803
|
+
.long 8, 9, 10, 11, 24, 25, 26, 27
|
|
4804
|
+
INDEX1:
|
|
4805
|
+
.long 4, 5, 6, 7, 20, 21, 22, 23
|
|
4806
|
+
.long 12, 13, 14, 15, 28, 29, 30, 31
|
|
4807
|
+
ADD0:
|
|
4808
|
+
.long 0, 1, 2, 3, 4, 5, 6, 7
|
|
4809
|
+
.long 8, 9, 10, 11, 12, 13, 14, 15
|
|
4810
|
+
ADD1: .long 1
|
|
4811
|
+
|
|
4812
|
+
ADD16: .long 16
|
|
4813
|
+
BLAKE3_BLOCK_LEN:
|
|
4814
|
+
.long 64
|
|
4815
|
+
.p2align 6
|
|
4816
|
+
BLAKE3_IV:
|
|
4817
|
+
BLAKE3_IV_0:
|
|
4818
|
+
.long 0x6A09E667
|
|
4819
|
+
BLAKE3_IV_1:
|
|
4820
|
+
.long 0xBB67AE85
|
|
4821
|
+
BLAKE3_IV_2:
|
|
4822
|
+
.long 0x3C6EF372
|
|
4823
|
+
BLAKE3_IV_3:
|
|
4824
|
+
.long 0xA54FF53A
|