@leocuvee/wrkzcoin-multi-hashing 0.0.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.travis.yml +5 -0
- package/LICENSE +674 -0
- package/README.md +87 -0
- package/appveyor.yml +12 -0
- package/argon2/.gitattributes +10 -0
- package/argon2/.travis.yml +25 -0
- package/argon2/Argon2.sln +160 -0
- package/argon2/CHANGELOG.md +32 -0
- package/argon2/CMakeLists.txt +87 -0
- package/argon2/LICENSE +314 -0
- package/argon2/Makefile +196 -0
- package/argon2/README.md +297 -0
- package/argon2/appveyor.yml +40 -0
- package/argon2/argon2-specs.pdf +0 -0
- package/argon2/export.sh +7 -0
- package/argon2/include/argon2.h +427 -0
- package/argon2/latex/CMakeLists.txt +34 -0
- package/argon2/latex/IEEEtran.cls +6347 -0
- package/argon2/latex/Makefile +18 -0
- package/argon2/latex/argon2-specs.tex +920 -0
- package/argon2/latex/pics/argon2-par.pdf +0 -0
- package/argon2/latex/pics/compression.pdf +0 -0
- package/argon2/latex/pics/generic.pdf +0 -0
- package/argon2/latex/pics/power-distribution.jpg +0 -0
- package/argon2/latex/tradeoff.bib +822 -0
- package/argon2/libargon2.pc +16 -0
- package/argon2/man/CMakeLists.txt +8 -0
- package/argon2/man/argon2.1 +57 -0
- package/argon2/meson.build +16 -0
- package/argon2/meson_options.txt +1 -0
- package/argon2/src/CMakeLists.txt +147 -0
- package/argon2/src/argon2.c +452 -0
- package/argon2/src/argon2.pc.in +11 -0
- package/argon2/src/blake2/blake2-impl.h +156 -0
- package/argon2/src/blake2/blake2.h +89 -0
- package/argon2/src/blake2/blake2b.c +390 -0
- package/argon2/src/blake2/blamka-round-opt.h +471 -0
- package/argon2/src/blake2/blamka-round-ref.h +56 -0
- package/argon2/src/core.c +634 -0
- package/argon2/src/core.h +228 -0
- package/argon2/src/encoding.c +467 -0
- package/argon2/src/encoding.h +57 -0
- package/argon2/src/genkat.h +51 -0
- package/argon2/src/meson.build +68 -0
- package/argon2/src/opt.c +283 -0
- package/argon2/src/optimization/CMakeLists.txt +10 -0
- package/argon2/src/ref.c +194 -0
- package/argon2/src/thread.c +57 -0
- package/argon2/src/thread.h +67 -0
- package/argon2/tests/CMakeLists.txt +43 -0
- package/argon2/tests/bench.c +111 -0
- package/argon2/tests/genkat.c +207 -0
- package/argon2/tests/kats/argon2d +12304 -0
- package/argon2/tests/kats/argon2d.shasum +1 -0
- package/argon2/tests/kats/argon2d_v16 +12304 -0
- package/argon2/tests/kats/argon2d_v16.shasum +1 -0
- package/argon2/tests/kats/argon2i +12304 -0
- package/argon2/tests/kats/argon2i.shasum +1 -0
- package/argon2/tests/kats/argon2i_v16 +12304 -0
- package/argon2/tests/kats/argon2i_v16.shasum +1 -0
- package/argon2/tests/kats/argon2id +12304 -0
- package/argon2/tests/kats/argon2id.shasum +1 -0
- package/argon2/tests/kats/argon2id_v16 +12304 -0
- package/argon2/tests/kats/argon2id_v16.shasum +1 -0
- package/argon2/tests/kats/check-sums.ps1 +48 -0
- package/argon2/tests/kats/check-sums.sh +16 -0
- package/argon2/tests/kats/test.ps1 +132 -0
- package/argon2/tests/kats/test.sh +117 -0
- package/argon2/tests/meson.build +34 -0
- package/argon2/tests/test.c +289 -0
- package/argon2/tool/CMakeLists.txt +7 -0
- package/argon2/tool/main.c +339 -0
- package/argon2/tool/meson.build +8 -0
- package/argon2/vs2015/Argon2Opt/Argon2Opt.vcxproj +226 -0
- package/argon2/vs2015/Argon2Opt/Argon2Opt.vcxproj.filters +69 -0
- package/argon2/vs2015/Argon2OptBench/Argon2OptBench.vcxproj +226 -0
- package/argon2/vs2015/Argon2OptBench/Argon2OptBench.vcxproj.filters +69 -0
- package/argon2/vs2015/Argon2OptDll/Argon2OptDll.vcxproj +225 -0
- package/argon2/vs2015/Argon2OptDll/Argon2OptDll.vcxproj.filters +66 -0
- package/argon2/vs2015/Argon2OptGenKAT/Argon2OptGenKAT.vcxproj +239 -0
- package/argon2/vs2015/Argon2OptGenKAT/Argon2OptGenKAT.vcxproj.filters +72 -0
- package/argon2/vs2015/Argon2OptTestCI/Argon2OptTestCI.vcxproj +227 -0
- package/argon2/vs2015/Argon2OptTestCI/Argon2OptTestCI.vcxproj.filters +69 -0
- package/argon2/vs2015/Argon2Ref/Argon2Ref.vcxproj +226 -0
- package/argon2/vs2015/Argon2Ref/Argon2Ref.vcxproj.filters +69 -0
- package/argon2/vs2015/Argon2RefBench/Argon2RefBench.vcxproj +226 -0
- package/argon2/vs2015/Argon2RefBench/Argon2RefBench.vcxproj.filters +69 -0
- package/argon2/vs2015/Argon2RefDll/Argon2RefDll.vcxproj +225 -0
- package/argon2/vs2015/Argon2RefDll/Argon2RefDll.vcxproj.filters +66 -0
- package/argon2/vs2015/Argon2RefGenKAT/Argon2RefGenKAT.vcxproj +227 -0
- package/argon2/vs2015/Argon2RefGenKAT/Argon2RefGenKAT.vcxproj.filters +72 -0
- package/argon2/vs2015/Argon2RefTestCI/Argon2RefTestCI.vcxproj +226 -0
- package/argon2/vs2015/Argon2RefTestCI/Argon2RefTestCI.vcxproj.filters +69 -0
- package/bcrypt.c +566 -0
- package/bcrypt.h +14 -0
- package/binding.gyp +93 -0
- package/blake.c +17 -0
- package/blake.h +16 -0
- package/boolberry.cc +11 -0
- package/boolberry.h +6 -0
- package/build/Makefile +354 -0
- package/build/Release/.deps/Release/multihashing.node.d +1 -0
- package/build/Release/.deps/Release/obj.target/multihashing/argon2/src/argon2.o.d +8 -0
- package/build/Release/.deps/Release/obj.target/multihashing/argon2/src/blake2/blake2b.o.d +8 -0
- package/build/Release/.deps/Release/obj.target/multihashing/argon2/src/core.o.d +10 -0
- package/build/Release/.deps/Release/obj.target/multihashing/argon2/src/encoding.o.d +8 -0
- package/build/Release/.deps/Release/obj.target/multihashing/argon2/src/ref.o.d +14 -0
- package/build/Release/.deps/Release/obj.target/multihashing/argon2/src/thread.o.d +5 -0
- package/build/Release/.deps/Release/obj.target/multihashing/bcrypt.o.d +4 -0
- package/build/Release/.deps/Release/obj.target/multihashing/blake.o.d +7 -0
- package/build/Release/.deps/Release/obj.target/multihashing/boolberry.o.d +12 -0
- package/build/Release/.deps/Release/obj.target/multihashing/c11.o.d +20 -0
- package/build/Release/.deps/Release/obj.target/multihashing/crypto/aesb.o.d +3 -0
- package/build/Release/.deps/Release/obj.target/multihashing/crypto/c_blake256.o.d +5 -0
- package/build/Release/.deps/Release/obj.target/multihashing/crypto/c_groestl.o.d +10 -0
- package/build/Release/.deps/Release/obj.target/multihashing/crypto/c_jh.o.d +9 -0
- package/build/Release/.deps/Release/obj.target/multihashing/crypto/c_keccak.o.d +7 -0
- package/build/Release/.deps/Release/obj.target/multihashing/crypto/c_skein.o.d +10 -0
- package/build/Release/.deps/Release/obj.target/multihashing/crypto/hash.o.d +7 -0
- package/build/Release/.deps/Release/obj.target/multihashing/crypto/oaes_lib.o.d +6 -0
- package/build/Release/.deps/Release/obj.target/multihashing/crypto/wild_keccak.o.d +8 -0
- package/build/Release/.deps/Release/obj.target/multihashing/cryptonight.o.d +18 -0
- package/build/Release/.deps/Release/obj.target/multihashing/cryptonight_dark.o.d +18 -0
- package/build/Release/.deps/Release/obj.target/multihashing/cryptonight_dark_lite.o.d +18 -0
- package/build/Release/.deps/Release/obj.target/multihashing/cryptonight_fast.o.d +18 -0
- package/build/Release/.deps/Release/obj.target/multihashing/cryptonight_lite.o.d +18 -0
- package/build/Release/.deps/Release/obj.target/multihashing/cryptonight_soft_shell.o.d +18 -0
- package/build/Release/.deps/Release/obj.target/multihashing/cryptonight_turtle.o.d +18 -0
- package/build/Release/.deps/Release/obj.target/multihashing/cryptonight_turtle_lite.o.d +18 -0
- package/build/Release/.deps/Release/obj.target/multihashing/fresh.o.d +10 -0
- package/build/Release/.deps/Release/obj.target/multihashing/fugue.o.d +7 -0
- package/build/Release/.deps/Release/obj.target/multihashing/groestl.o.d +8 -0
- package/build/Release/.deps/Release/obj.target/multihashing/hefty1.o.d +12 -0
- package/build/Release/.deps/Release/obj.target/multihashing/keccak.o.d +8 -0
- package/build/Release/.deps/Release/obj.target/multihashing/multihashing.o.d +155 -0
- package/build/Release/.deps/Release/obj.target/multihashing/nist5.o.d +12 -0
- package/build/Release/.deps/Release/obj.target/multihashing/quark.o.d +14 -0
- package/build/Release/.deps/Release/obj.target/multihashing/qubit.o.d +12 -0
- package/build/Release/.deps/Release/obj.target/multihashing/scryptjane.o.d +30 -0
- package/build/Release/.deps/Release/obj.target/multihashing/scryptn.o.d +6 -0
- package/build/Release/.deps/Release/obj.target/multihashing/sha1.o.d +24 -0
- package/build/Release/.deps/Release/obj.target/multihashing/sha3/aes_helper.o.d +5 -0
- package/build/Release/.deps/Release/obj.target/multihashing/sha3/hamsi.o.d +7 -0
- package/build/Release/.deps/Release/obj.target/multihashing/sha3/sph_blake.o.d +6 -0
- package/build/Release/.deps/Release/obj.target/multihashing/sha3/sph_bmw.o.d +6 -0
- package/build/Release/.deps/Release/obj.target/multihashing/sha3/sph_cubehash.o.d +6 -0
- package/build/Release/.deps/Release/obj.target/multihashing/sha3/sph_echo.o.d +7 -0
- package/build/Release/.deps/Release/obj.target/multihashing/sha3/sph_fugue.o.d +6 -0
- package/build/Release/.deps/Release/obj.target/multihashing/sha3/sph_groestl.o.d +6 -0
- package/build/Release/.deps/Release/obj.target/multihashing/sha3/sph_hefty1.o.d +5 -0
- package/build/Release/.deps/Release/obj.target/multihashing/sha3/sph_jh.o.d +6 -0
- package/build/Release/.deps/Release/obj.target/multihashing/sha3/sph_keccak.o.d +6 -0
- package/build/Release/.deps/Release/obj.target/multihashing/sha3/sph_luffa.o.d +6 -0
- package/build/Release/.deps/Release/obj.target/multihashing/sha3/sph_shabal.o.d +6 -0
- package/build/Release/.deps/Release/obj.target/multihashing/sha3/sph_shavite.o.d +7 -0
- package/build/Release/.deps/Release/obj.target/multihashing/sha3/sph_simd.o.d +6 -0
- package/build/Release/.deps/Release/obj.target/multihashing/sha3/sph_skein.o.d +6 -0
- package/build/Release/.deps/Release/obj.target/multihashing/sha3/sph_whirlpool.o.d +8 -0
- package/build/Release/.deps/Release/obj.target/multihashing/shavite3.o.d +7 -0
- package/build/Release/.deps/Release/obj.target/multihashing/skein.o.d +8 -0
- package/build/Release/.deps/Release/obj.target/multihashing/x11.o.d +20 -0
- package/build/Release/.deps/Release/obj.target/multihashing/x13.o.d +23 -0
- package/build/Release/.deps/Release/obj.target/multihashing/x15.o.d +26 -0
- package/build/Release/.deps/Release/obj.target/multihashing.node.d +1 -0
- package/build/Release/multihashing.node +0 -0
- package/build/binding.Makefile +6 -0
- package/build/multihashing.target.mk +255 -0
- package/c11.c +85 -0
- package/c11.h +17 -0
- package/crypto/aesb.c +177 -0
- package/crypto/c_blake256.c +326 -0
- package/crypto/c_blake256.h +43 -0
- package/crypto/c_groestl.c +360 -0
- package/crypto/c_groestl.h +56 -0
- package/crypto/c_jh.c +367 -0
- package/crypto/c_jh.h +20 -0
- package/crypto/c_keccak.c +112 -0
- package/crypto/c_keccak.h +26 -0
- package/crypto/c_skein.c +2036 -0
- package/crypto/c_skein.h +45 -0
- package/crypto/crypto.h +186 -0
- package/crypto/cryptonote_core/account.cpp +50 -0
- package/crypto/cryptonote_core/account.h +61 -0
- package/crypto/cryptonote_core/cryptonote_basic_impl.cpp +186 -0
- package/crypto/cryptonote_core/cryptonote_basic_impl.h +65 -0
- package/crypto/cryptonote_core/cryptonote_format_utils.cpp +766 -0
- package/crypto/cryptonote_core/cryptonote_format_utils.h +30 -0
- package/crypto/cryptonote_protocol/cryptonote_protocol_defs.h +152 -0
- package/crypto/groestl_tables.h +38 -0
- package/crypto/hash-ops.h +57 -0
- package/crypto/hash.c +24 -0
- package/crypto/hash.h +22 -0
- package/crypto/int-util.h +230 -0
- package/crypto/oaes_config.h +50 -0
- package/crypto/oaes_lib.c +1468 -0
- package/crypto/oaes_lib.h +215 -0
- package/crypto/skein_port.h +190 -0
- package/crypto/variant2_int_sqrt.h +168 -0
- package/crypto/wild_keccak.cpp +119 -0
- package/crypto/wild_keccak.h +168 -0
- package/cryptonight.c +300 -0
- package/cryptonight.h +17 -0
- package/cryptonight_dark.c +300 -0
- package/cryptonight_dark.h +17 -0
- package/cryptonight_dark_lite.c +300 -0
- package/cryptonight_dark_lite.h +17 -0
- package/cryptonight_fast.c +300 -0
- package/cryptonight_fast.h +17 -0
- package/cryptonight_lite.c +300 -0
- package/cryptonight_lite.h +17 -0
- package/cryptonight_soft_shell.c +298 -0
- package/cryptonight_soft_shell.h +17 -0
- package/cryptonight_turtle.c +300 -0
- package/cryptonight_turtle.h +17 -0
- package/cryptonight_turtle_lite.c +300 -0
- package/cryptonight_turtle_lite.h +17 -0
- package/fresh.c +42 -0
- package/fresh.h +16 -0
- package/fugue.c +12 -0
- package/fugue.h +16 -0
- package/groestl.c +40 -0
- package/groestl.h +17 -0
- package/hefty1.c +63 -0
- package/hefty1.h +16 -0
- package/index.js +1 -0
- package/keccak.c +14 -0
- package/keccak.h +16 -0
- package/leocuvee-wrkzcoin-multi-hashing-0.0.20.tgz +0 -0
- package/multihashing.cc +699 -0
- package/nist5.c +46 -0
- package/nist5.h +16 -0
- package/package.json +56 -0
- package/quark.c +210 -0
- package/quark.h +16 -0
- package/qubit.c +45 -0
- package/qubit.h +16 -0
- package/scryptjane/scrypt-jane-chacha.h +132 -0
- package/scryptjane/scrypt-jane-hash.h +48 -0
- package/scryptjane/scrypt-jane-hash_keccak.h +168 -0
- package/scryptjane/scrypt-jane-hash_sha256.h +135 -0
- package/scryptjane/scrypt-jane-mix_chacha-avx.h +340 -0
- package/scryptjane/scrypt-jane-mix_chacha-sse2.h +371 -0
- package/scryptjane/scrypt-jane-mix_chacha-ssse3.h +348 -0
- package/scryptjane/scrypt-jane-mix_chacha.h +69 -0
- package/scryptjane/scrypt-jane-mix_salsa-avx.h +381 -0
- package/scryptjane/scrypt-jane-mix_salsa-sse2.h +443 -0
- package/scryptjane/scrypt-jane-mix_salsa.h +70 -0
- package/scryptjane/scrypt-jane-pbkdf2.h +112 -0
- package/scryptjane/scrypt-jane-portable-x86.h +364 -0
- package/scryptjane/scrypt-jane-portable.h +281 -0
- package/scryptjane/scrypt-jane-romix-basic.h +67 -0
- package/scryptjane/scrypt-jane-romix-template.h +118 -0
- package/scryptjane/scrypt-jane-romix.h +27 -0
- package/scryptjane/scrypt-jane-salsa.h +106 -0
- package/scryptjane/scrypt-jane-test-vectors.h +261 -0
- package/scryptjane.c +223 -0
- package/scryptjane.h +36 -0
- package/scryptn.c +258 -0
- package/scryptn.h +16 -0
- package/sha1.c +65 -0
- package/sha1.h +16 -0
- package/sha256.h +440 -0
- package/sha3/aes_helper.c +392 -0
- package/sha3/hamsi.c +867 -0
- package/sha3/hamsi_helper.c +39648 -0
- package/sha3/md_helper.c +347 -0
- package/sha3/sph_blake.c +1114 -0
- package/sha3/sph_blake.h +327 -0
- package/sha3/sph_bmw.c +965 -0
- package/sha3/sph_bmw.h +328 -0
- package/sha3/sph_cubehash.c +723 -0
- package/sha3/sph_cubehash.h +292 -0
- package/sha3/sph_echo.c +1031 -0
- package/sha3/sph_echo.h +320 -0
- package/sha3/sph_fugue.c +1208 -0
- package/sha3/sph_fugue.h +81 -0
- package/sha3/sph_groestl.c +3119 -0
- package/sha3/sph_groestl.h +329 -0
- package/sha3/sph_hamsi.h +321 -0
- package/sha3/sph_hefty1.c +378 -0
- package/sha3/sph_hefty1.h +66 -0
- package/sha3/sph_jh.c +1116 -0
- package/sha3/sph_jh.h +298 -0
- package/sha3/sph_keccak.c +1824 -0
- package/sha3/sph_keccak.h +293 -0
- package/sha3/sph_luffa.c +1426 -0
- package/sha3/sph_luffa.h +296 -0
- package/sha3/sph_shabal.c +806 -0
- package/sha3/sph_shabal.h +344 -0
- package/sha3/sph_shavite.c +1764 -0
- package/sha3/sph_shavite.h +314 -0
- package/sha3/sph_simd.c +1799 -0
- package/sha3/sph_simd.h +309 -0
- package/sha3/sph_skein.c +1254 -0
- package/sha3/sph_skein.h +298 -0
- package/sha3/sph_types.h +1976 -0
- package/sha3/sph_whirlpool.c +3480 -0
- package/sha3/sph_whirlpool.h +209 -0
- package/shavite3.c +24 -0
- package/shavite3.h +16 -0
- package/skein.c +26 -0
- package/skein.h +16 -0
- package/stdint.h +259 -0
- package/tests/argon2-tests.js +16 -0
- package/tests/benchmark.js +36 -0
- package/tests/cryptonight-tests.js +189 -0
- package/tests/cryptonight_monero.js +53 -0
- package/tests/test.js +16 -0
- package/x11.c +85 -0
- package/x11.h +16 -0
- package/x13.c +97 -0
- package/x13.h +5 -0
- package/x15.c +106 -0
- package/x15.h +16 -0
|
@@ -0,0 +1,1824 @@
|
|
|
1
|
+
/* $Id: keccak.c 259 2011-07-19 22:11:27Z tp $ */
|
|
2
|
+
/*
|
|
3
|
+
* Keccak implementation.
|
|
4
|
+
*
|
|
5
|
+
* ==========================(LICENSE BEGIN)============================
|
|
6
|
+
*
|
|
7
|
+
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
|
8
|
+
*
|
|
9
|
+
* Permission is hereby granted, free of charge, to any person obtaining
|
|
10
|
+
* a copy of this software and associated documentation files (the
|
|
11
|
+
* "Software"), to deal in the Software without restriction, including
|
|
12
|
+
* without limitation the rights to use, copy, modify, merge, publish,
|
|
13
|
+
* distribute, sublicense, and/or sell copies of the Software, and to
|
|
14
|
+
* permit persons to whom the Software is furnished to do so, subject to
|
|
15
|
+
* the following conditions:
|
|
16
|
+
*
|
|
17
|
+
* The above copyright notice and this permission notice shall be
|
|
18
|
+
* included in all copies or substantial portions of the Software.
|
|
19
|
+
*
|
|
20
|
+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
21
|
+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
22
|
+
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
23
|
+
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
24
|
+
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
25
|
+
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
26
|
+
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
27
|
+
*
|
|
28
|
+
* ===========================(LICENSE END)=============================
|
|
29
|
+
*
|
|
30
|
+
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
|
31
|
+
*/
|
|
32
|
+
|
|
33
|
+
#include <stddef.h>
|
|
34
|
+
#include <string.h>
|
|
35
|
+
|
|
36
|
+
#include "sph_keccak.h"
|
|
37
|
+
|
|
38
|
+
#ifdef __cplusplus
|
|
39
|
+
extern "C"{
|
|
40
|
+
#endif
|
|
41
|
+
|
|
42
|
+
/*
|
|
43
|
+
* Parameters:
|
|
44
|
+
*
|
|
45
|
+
* SPH_KECCAK_64 use a 64-bit type
|
|
46
|
+
* SPH_KECCAK_UNROLL number of loops to unroll (0/undef for full unroll)
|
|
47
|
+
* SPH_KECCAK_INTERLEAVE use bit-interleaving (32-bit type only)
|
|
48
|
+
* SPH_KECCAK_NOCOPY do not copy the state into local variables
|
|
49
|
+
*
|
|
50
|
+
* If there is no usable 64-bit type, the code automatically switches
|
|
51
|
+
* back to the 32-bit implementation.
|
|
52
|
+
*
|
|
53
|
+
* Some tests on an Intel Core2 Q6600 (both 64-bit and 32-bit, 32 kB L1
|
|
54
|
+
* code cache), a PowerPC (G3, 32 kB L1 code cache), an ARM920T core
|
|
55
|
+
* (16 kB L1 code cache), and a small MIPS-compatible CPU (Broadcom BCM3302,
|
|
56
|
+
* 8 kB L1 code cache), seem to show that the following are optimal:
|
|
57
|
+
*
|
|
58
|
+
* -- x86, 64-bit: use the 64-bit implementation, unroll 8 rounds,
|
|
59
|
+
* do not copy the state; unrolling 2, 6 or all rounds also provides
|
|
60
|
+
* near-optimal performance.
|
|
61
|
+
* -- x86, 32-bit: use the 32-bit implementation, unroll 6 rounds,
|
|
62
|
+
* interleave, do not copy the state. Unrolling 1, 2, 4 or 8 rounds
|
|
63
|
+
* also provides near-optimal performance.
|
|
64
|
+
* -- PowerPC: use the 64-bit implementation, unroll 8 rounds,
|
|
65
|
+
* copy the state. Unrolling 4 or 6 rounds is near-optimal.
|
|
66
|
+
* -- ARM: use the 64-bit implementation, unroll 2 or 4 rounds,
|
|
67
|
+
* copy the state.
|
|
68
|
+
* -- MIPS: use the 64-bit implementation, unroll 2 rounds, copy
|
|
69
|
+
* the state. Unrolling only 1 round is also near-optimal.
|
|
70
|
+
*
|
|
71
|
+
* Also, interleaving does not always yield actual improvements when
|
|
72
|
+
* using a 32-bit implementation; in particular when the architecture
|
|
73
|
+
* does not offer a native rotation opcode (interleaving replaces one
|
|
74
|
+
* 64-bit rotation with two 32-bit rotations, which is a gain only if
|
|
75
|
+
* there is a native 32-bit rotation opcode and not a native 64-bit
|
|
76
|
+
* rotation opcode; also, interleaving implies a small overhead when
|
|
77
|
+
* processing input words).
|
|
78
|
+
*
|
|
79
|
+
* To sum up:
|
|
80
|
+
* -- when possible, use the 64-bit code
|
|
81
|
+
* -- exception: on 32-bit x86, use 32-bit code
|
|
82
|
+
* -- when using 32-bit code, use interleaving
|
|
83
|
+
* -- copy the state, except on x86
|
|
84
|
+
* -- unroll 8 rounds on "big" machine, 2 rounds on "small" machines
|
|
85
|
+
*/
|
|
86
|
+
|
|
87
|
+
#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_KECCAK
|
|
88
|
+
#define SPH_SMALL_FOOTPRINT_KECCAK 1
|
|
89
|
+
#endif
|
|
90
|
+
|
|
91
|
+
/*
|
|
92
|
+
* By default, we select the 64-bit implementation if a 64-bit type
|
|
93
|
+
* is available, unless a 32-bit x86 is detected.
|
|
94
|
+
*/
|
|
95
|
+
#if !defined SPH_KECCAK_64 && SPH_64 \
|
|
96
|
+
&& !(defined __i386__ || SPH_I386_GCC || SPH_I386_MSVC)
|
|
97
|
+
#define SPH_KECCAK_64 1
|
|
98
|
+
#endif
|
|
99
|
+
|
|
100
|
+
/*
|
|
101
|
+
* If using a 32-bit implementation, we prefer to interleave.
|
|
102
|
+
*/
|
|
103
|
+
#if !SPH_KECCAK_64 && !defined SPH_KECCAK_INTERLEAVE
|
|
104
|
+
#define SPH_KECCAK_INTERLEAVE 1
|
|
105
|
+
#endif
|
|
106
|
+
|
|
107
|
+
/*
|
|
108
|
+
* Unroll 8 rounds on big systems, 2 rounds on small systems.
|
|
109
|
+
*/
|
|
110
|
+
#ifndef SPH_KECCAK_UNROLL
|
|
111
|
+
#if SPH_SMALL_FOOTPRINT_KECCAK
|
|
112
|
+
#define SPH_KECCAK_UNROLL 2
|
|
113
|
+
#else
|
|
114
|
+
#define SPH_KECCAK_UNROLL 8
|
|
115
|
+
#endif
|
|
116
|
+
#endif
|
|
117
|
+
|
|
118
|
+
/*
|
|
119
|
+
* We do not want to copy the state to local variables on x86 (32-bit
|
|
120
|
+
* and 64-bit alike).
|
|
121
|
+
*/
|
|
122
|
+
#ifndef SPH_KECCAK_NOCOPY
|
|
123
|
+
#if defined __i386__ || defined __x86_64 || SPH_I386_MSVC || SPH_I386_GCC
|
|
124
|
+
#define SPH_KECCAK_NOCOPY 1
|
|
125
|
+
#else
|
|
126
|
+
#define SPH_KECCAK_NOCOPY 0
|
|
127
|
+
#endif
|
|
128
|
+
#endif
|
|
129
|
+
|
|
130
|
+
#ifdef _MSC_VER
|
|
131
|
+
#pragma warning (disable: 4146)
|
|
132
|
+
#endif
|
|
133
|
+
|
|
134
|
+
#if SPH_KECCAK_64
|
|
135
|
+
|
|
136
|
+
static const sph_u64 RC[] = {
|
|
137
|
+
SPH_C64(0x0000000000000001), SPH_C64(0x0000000000008082),
|
|
138
|
+
SPH_C64(0x800000000000808A), SPH_C64(0x8000000080008000),
|
|
139
|
+
SPH_C64(0x000000000000808B), SPH_C64(0x0000000080000001),
|
|
140
|
+
SPH_C64(0x8000000080008081), SPH_C64(0x8000000000008009),
|
|
141
|
+
SPH_C64(0x000000000000008A), SPH_C64(0x0000000000000088),
|
|
142
|
+
SPH_C64(0x0000000080008009), SPH_C64(0x000000008000000A),
|
|
143
|
+
SPH_C64(0x000000008000808B), SPH_C64(0x800000000000008B),
|
|
144
|
+
SPH_C64(0x8000000000008089), SPH_C64(0x8000000000008003),
|
|
145
|
+
SPH_C64(0x8000000000008002), SPH_C64(0x8000000000000080),
|
|
146
|
+
SPH_C64(0x000000000000800A), SPH_C64(0x800000008000000A),
|
|
147
|
+
SPH_C64(0x8000000080008081), SPH_C64(0x8000000000008080),
|
|
148
|
+
SPH_C64(0x0000000080000001), SPH_C64(0x8000000080008008)
|
|
149
|
+
};
|
|
150
|
+
|
|
151
|
+
#if SPH_KECCAK_NOCOPY
|
|
152
|
+
|
|
153
|
+
#define a00 (kc->u.wide[ 0])
|
|
154
|
+
#define a10 (kc->u.wide[ 1])
|
|
155
|
+
#define a20 (kc->u.wide[ 2])
|
|
156
|
+
#define a30 (kc->u.wide[ 3])
|
|
157
|
+
#define a40 (kc->u.wide[ 4])
|
|
158
|
+
#define a01 (kc->u.wide[ 5])
|
|
159
|
+
#define a11 (kc->u.wide[ 6])
|
|
160
|
+
#define a21 (kc->u.wide[ 7])
|
|
161
|
+
#define a31 (kc->u.wide[ 8])
|
|
162
|
+
#define a41 (kc->u.wide[ 9])
|
|
163
|
+
#define a02 (kc->u.wide[10])
|
|
164
|
+
#define a12 (kc->u.wide[11])
|
|
165
|
+
#define a22 (kc->u.wide[12])
|
|
166
|
+
#define a32 (kc->u.wide[13])
|
|
167
|
+
#define a42 (kc->u.wide[14])
|
|
168
|
+
#define a03 (kc->u.wide[15])
|
|
169
|
+
#define a13 (kc->u.wide[16])
|
|
170
|
+
#define a23 (kc->u.wide[17])
|
|
171
|
+
#define a33 (kc->u.wide[18])
|
|
172
|
+
#define a43 (kc->u.wide[19])
|
|
173
|
+
#define a04 (kc->u.wide[20])
|
|
174
|
+
#define a14 (kc->u.wide[21])
|
|
175
|
+
#define a24 (kc->u.wide[22])
|
|
176
|
+
#define a34 (kc->u.wide[23])
|
|
177
|
+
#define a44 (kc->u.wide[24])
|
|
178
|
+
|
|
179
|
+
#define DECL_STATE
|
|
180
|
+
#define READ_STATE(sc)
|
|
181
|
+
#define WRITE_STATE(sc)
|
|
182
|
+
|
|
183
|
+
#define INPUT_BUF(size) do { \
|
|
184
|
+
size_t j; \
|
|
185
|
+
for (j = 0; j < (size); j += 8) { \
|
|
186
|
+
kc->u.wide[j >> 3] ^= sph_dec64le_aligned(buf + j); \
|
|
187
|
+
} \
|
|
188
|
+
} while (0)
|
|
189
|
+
|
|
190
|
+
#define INPUT_BUF144 INPUT_BUF(144)
|
|
191
|
+
#define INPUT_BUF136 INPUT_BUF(136)
|
|
192
|
+
#define INPUT_BUF104 INPUT_BUF(104)
|
|
193
|
+
#define INPUT_BUF72 INPUT_BUF(72)
|
|
194
|
+
|
|
195
|
+
#else
|
|
196
|
+
|
|
197
|
+
#define DECL_STATE \
|
|
198
|
+
sph_u64 a00, a01, a02, a03, a04; \
|
|
199
|
+
sph_u64 a10, a11, a12, a13, a14; \
|
|
200
|
+
sph_u64 a20, a21, a22, a23, a24; \
|
|
201
|
+
sph_u64 a30, a31, a32, a33, a34; \
|
|
202
|
+
sph_u64 a40, a41, a42, a43, a44;
|
|
203
|
+
|
|
204
|
+
#define READ_STATE(state) do { \
|
|
205
|
+
a00 = (state)->u.wide[ 0]; \
|
|
206
|
+
a10 = (state)->u.wide[ 1]; \
|
|
207
|
+
a20 = (state)->u.wide[ 2]; \
|
|
208
|
+
a30 = (state)->u.wide[ 3]; \
|
|
209
|
+
a40 = (state)->u.wide[ 4]; \
|
|
210
|
+
a01 = (state)->u.wide[ 5]; \
|
|
211
|
+
a11 = (state)->u.wide[ 6]; \
|
|
212
|
+
a21 = (state)->u.wide[ 7]; \
|
|
213
|
+
a31 = (state)->u.wide[ 8]; \
|
|
214
|
+
a41 = (state)->u.wide[ 9]; \
|
|
215
|
+
a02 = (state)->u.wide[10]; \
|
|
216
|
+
a12 = (state)->u.wide[11]; \
|
|
217
|
+
a22 = (state)->u.wide[12]; \
|
|
218
|
+
a32 = (state)->u.wide[13]; \
|
|
219
|
+
a42 = (state)->u.wide[14]; \
|
|
220
|
+
a03 = (state)->u.wide[15]; \
|
|
221
|
+
a13 = (state)->u.wide[16]; \
|
|
222
|
+
a23 = (state)->u.wide[17]; \
|
|
223
|
+
a33 = (state)->u.wide[18]; \
|
|
224
|
+
a43 = (state)->u.wide[19]; \
|
|
225
|
+
a04 = (state)->u.wide[20]; \
|
|
226
|
+
a14 = (state)->u.wide[21]; \
|
|
227
|
+
a24 = (state)->u.wide[22]; \
|
|
228
|
+
a34 = (state)->u.wide[23]; \
|
|
229
|
+
a44 = (state)->u.wide[24]; \
|
|
230
|
+
} while (0)
|
|
231
|
+
|
|
232
|
+
#define WRITE_STATE(state) do { \
|
|
233
|
+
(state)->u.wide[ 0] = a00; \
|
|
234
|
+
(state)->u.wide[ 1] = a10; \
|
|
235
|
+
(state)->u.wide[ 2] = a20; \
|
|
236
|
+
(state)->u.wide[ 3] = a30; \
|
|
237
|
+
(state)->u.wide[ 4] = a40; \
|
|
238
|
+
(state)->u.wide[ 5] = a01; \
|
|
239
|
+
(state)->u.wide[ 6] = a11; \
|
|
240
|
+
(state)->u.wide[ 7] = a21; \
|
|
241
|
+
(state)->u.wide[ 8] = a31; \
|
|
242
|
+
(state)->u.wide[ 9] = a41; \
|
|
243
|
+
(state)->u.wide[10] = a02; \
|
|
244
|
+
(state)->u.wide[11] = a12; \
|
|
245
|
+
(state)->u.wide[12] = a22; \
|
|
246
|
+
(state)->u.wide[13] = a32; \
|
|
247
|
+
(state)->u.wide[14] = a42; \
|
|
248
|
+
(state)->u.wide[15] = a03; \
|
|
249
|
+
(state)->u.wide[16] = a13; \
|
|
250
|
+
(state)->u.wide[17] = a23; \
|
|
251
|
+
(state)->u.wide[18] = a33; \
|
|
252
|
+
(state)->u.wide[19] = a43; \
|
|
253
|
+
(state)->u.wide[20] = a04; \
|
|
254
|
+
(state)->u.wide[21] = a14; \
|
|
255
|
+
(state)->u.wide[22] = a24; \
|
|
256
|
+
(state)->u.wide[23] = a34; \
|
|
257
|
+
(state)->u.wide[24] = a44; \
|
|
258
|
+
} while (0)
|
|
259
|
+
|
|
260
|
+
#define INPUT_BUF144 do { \
|
|
261
|
+
a00 ^= sph_dec64le_aligned(buf + 0); \
|
|
262
|
+
a10 ^= sph_dec64le_aligned(buf + 8); \
|
|
263
|
+
a20 ^= sph_dec64le_aligned(buf + 16); \
|
|
264
|
+
a30 ^= sph_dec64le_aligned(buf + 24); \
|
|
265
|
+
a40 ^= sph_dec64le_aligned(buf + 32); \
|
|
266
|
+
a01 ^= sph_dec64le_aligned(buf + 40); \
|
|
267
|
+
a11 ^= sph_dec64le_aligned(buf + 48); \
|
|
268
|
+
a21 ^= sph_dec64le_aligned(buf + 56); \
|
|
269
|
+
a31 ^= sph_dec64le_aligned(buf + 64); \
|
|
270
|
+
a41 ^= sph_dec64le_aligned(buf + 72); \
|
|
271
|
+
a02 ^= sph_dec64le_aligned(buf + 80); \
|
|
272
|
+
a12 ^= sph_dec64le_aligned(buf + 88); \
|
|
273
|
+
a22 ^= sph_dec64le_aligned(buf + 96); \
|
|
274
|
+
a32 ^= sph_dec64le_aligned(buf + 104); \
|
|
275
|
+
a42 ^= sph_dec64le_aligned(buf + 112); \
|
|
276
|
+
a03 ^= sph_dec64le_aligned(buf + 120); \
|
|
277
|
+
a13 ^= sph_dec64le_aligned(buf + 128); \
|
|
278
|
+
a23 ^= sph_dec64le_aligned(buf + 136); \
|
|
279
|
+
} while (0)
|
|
280
|
+
|
|
281
|
+
#define INPUT_BUF136 do { \
|
|
282
|
+
a00 ^= sph_dec64le_aligned(buf + 0); \
|
|
283
|
+
a10 ^= sph_dec64le_aligned(buf + 8); \
|
|
284
|
+
a20 ^= sph_dec64le_aligned(buf + 16); \
|
|
285
|
+
a30 ^= sph_dec64le_aligned(buf + 24); \
|
|
286
|
+
a40 ^= sph_dec64le_aligned(buf + 32); \
|
|
287
|
+
a01 ^= sph_dec64le_aligned(buf + 40); \
|
|
288
|
+
a11 ^= sph_dec64le_aligned(buf + 48); \
|
|
289
|
+
a21 ^= sph_dec64le_aligned(buf + 56); \
|
|
290
|
+
a31 ^= sph_dec64le_aligned(buf + 64); \
|
|
291
|
+
a41 ^= sph_dec64le_aligned(buf + 72); \
|
|
292
|
+
a02 ^= sph_dec64le_aligned(buf + 80); \
|
|
293
|
+
a12 ^= sph_dec64le_aligned(buf + 88); \
|
|
294
|
+
a22 ^= sph_dec64le_aligned(buf + 96); \
|
|
295
|
+
a32 ^= sph_dec64le_aligned(buf + 104); \
|
|
296
|
+
a42 ^= sph_dec64le_aligned(buf + 112); \
|
|
297
|
+
a03 ^= sph_dec64le_aligned(buf + 120); \
|
|
298
|
+
a13 ^= sph_dec64le_aligned(buf + 128); \
|
|
299
|
+
} while (0)
|
|
300
|
+
|
|
301
|
+
#define INPUT_BUF104 do { \
|
|
302
|
+
a00 ^= sph_dec64le_aligned(buf + 0); \
|
|
303
|
+
a10 ^= sph_dec64le_aligned(buf + 8); \
|
|
304
|
+
a20 ^= sph_dec64le_aligned(buf + 16); \
|
|
305
|
+
a30 ^= sph_dec64le_aligned(buf + 24); \
|
|
306
|
+
a40 ^= sph_dec64le_aligned(buf + 32); \
|
|
307
|
+
a01 ^= sph_dec64le_aligned(buf + 40); \
|
|
308
|
+
a11 ^= sph_dec64le_aligned(buf + 48); \
|
|
309
|
+
a21 ^= sph_dec64le_aligned(buf + 56); \
|
|
310
|
+
a31 ^= sph_dec64le_aligned(buf + 64); \
|
|
311
|
+
a41 ^= sph_dec64le_aligned(buf + 72); \
|
|
312
|
+
a02 ^= sph_dec64le_aligned(buf + 80); \
|
|
313
|
+
a12 ^= sph_dec64le_aligned(buf + 88); \
|
|
314
|
+
a22 ^= sph_dec64le_aligned(buf + 96); \
|
|
315
|
+
} while (0)
|
|
316
|
+
|
|
317
|
+
#define INPUT_BUF72 do { \
|
|
318
|
+
a00 ^= sph_dec64le_aligned(buf + 0); \
|
|
319
|
+
a10 ^= sph_dec64le_aligned(buf + 8); \
|
|
320
|
+
a20 ^= sph_dec64le_aligned(buf + 16); \
|
|
321
|
+
a30 ^= sph_dec64le_aligned(buf + 24); \
|
|
322
|
+
a40 ^= sph_dec64le_aligned(buf + 32); \
|
|
323
|
+
a01 ^= sph_dec64le_aligned(buf + 40); \
|
|
324
|
+
a11 ^= sph_dec64le_aligned(buf + 48); \
|
|
325
|
+
a21 ^= sph_dec64le_aligned(buf + 56); \
|
|
326
|
+
a31 ^= sph_dec64le_aligned(buf + 64); \
|
|
327
|
+
} while (0)
|
|
328
|
+
|
|
329
|
+
#define INPUT_BUF(lim) do { \
|
|
330
|
+
a00 ^= sph_dec64le_aligned(buf + 0); \
|
|
331
|
+
a10 ^= sph_dec64le_aligned(buf + 8); \
|
|
332
|
+
a20 ^= sph_dec64le_aligned(buf + 16); \
|
|
333
|
+
a30 ^= sph_dec64le_aligned(buf + 24); \
|
|
334
|
+
a40 ^= sph_dec64le_aligned(buf + 32); \
|
|
335
|
+
a01 ^= sph_dec64le_aligned(buf + 40); \
|
|
336
|
+
a11 ^= sph_dec64le_aligned(buf + 48); \
|
|
337
|
+
a21 ^= sph_dec64le_aligned(buf + 56); \
|
|
338
|
+
a31 ^= sph_dec64le_aligned(buf + 64); \
|
|
339
|
+
if ((lim) == 72) \
|
|
340
|
+
break; \
|
|
341
|
+
a41 ^= sph_dec64le_aligned(buf + 72); \
|
|
342
|
+
a02 ^= sph_dec64le_aligned(buf + 80); \
|
|
343
|
+
a12 ^= sph_dec64le_aligned(buf + 88); \
|
|
344
|
+
a22 ^= sph_dec64le_aligned(buf + 96); \
|
|
345
|
+
if ((lim) == 104) \
|
|
346
|
+
break; \
|
|
347
|
+
a32 ^= sph_dec64le_aligned(buf + 104); \
|
|
348
|
+
a42 ^= sph_dec64le_aligned(buf + 112); \
|
|
349
|
+
a03 ^= sph_dec64le_aligned(buf + 120); \
|
|
350
|
+
a13 ^= sph_dec64le_aligned(buf + 128); \
|
|
351
|
+
if ((lim) == 136) \
|
|
352
|
+
break; \
|
|
353
|
+
a23 ^= sph_dec64le_aligned(buf + 136); \
|
|
354
|
+
} while (0)
|
|
355
|
+
|
|
356
|
+
#endif
|
|
357
|
+
|
|
358
|
+
#define DECL64(x) sph_u64 x
|
|
359
|
+
#define MOV64(d, s) (d = s)
|
|
360
|
+
#define XOR64(d, a, b) (d = a ^ b)
|
|
361
|
+
#define AND64(d, a, b) (d = a & b)
|
|
362
|
+
#define OR64(d, a, b) (d = a | b)
|
|
363
|
+
#define NOT64(d, s) (d = SPH_T64(~s))
|
|
364
|
+
#define ROL64(d, v, n) (d = SPH_ROTL64(v, n))
|
|
365
|
+
#define XOR64_IOTA XOR64
|
|
366
|
+
|
|
367
|
+
#else
|
|
368
|
+
|
|
369
|
+
static const struct {
|
|
370
|
+
sph_u32 high, low;
|
|
371
|
+
} RC[] = {
|
|
372
|
+
#if SPH_KECCAK_INTERLEAVE
|
|
373
|
+
{ SPH_C32(0x00000000), SPH_C32(0x00000001) },
|
|
374
|
+
{ SPH_C32(0x00000089), SPH_C32(0x00000000) },
|
|
375
|
+
{ SPH_C32(0x8000008B), SPH_C32(0x00000000) },
|
|
376
|
+
{ SPH_C32(0x80008080), SPH_C32(0x00000000) },
|
|
377
|
+
{ SPH_C32(0x0000008B), SPH_C32(0x00000001) },
|
|
378
|
+
{ SPH_C32(0x00008000), SPH_C32(0x00000001) },
|
|
379
|
+
{ SPH_C32(0x80008088), SPH_C32(0x00000001) },
|
|
380
|
+
{ SPH_C32(0x80000082), SPH_C32(0x00000001) },
|
|
381
|
+
{ SPH_C32(0x0000000B), SPH_C32(0x00000000) },
|
|
382
|
+
{ SPH_C32(0x0000000A), SPH_C32(0x00000000) },
|
|
383
|
+
{ SPH_C32(0x00008082), SPH_C32(0x00000001) },
|
|
384
|
+
{ SPH_C32(0x00008003), SPH_C32(0x00000000) },
|
|
385
|
+
{ SPH_C32(0x0000808B), SPH_C32(0x00000001) },
|
|
386
|
+
{ SPH_C32(0x8000000B), SPH_C32(0x00000001) },
|
|
387
|
+
{ SPH_C32(0x8000008A), SPH_C32(0x00000001) },
|
|
388
|
+
{ SPH_C32(0x80000081), SPH_C32(0x00000001) },
|
|
389
|
+
{ SPH_C32(0x80000081), SPH_C32(0x00000000) },
|
|
390
|
+
{ SPH_C32(0x80000008), SPH_C32(0x00000000) },
|
|
391
|
+
{ SPH_C32(0x00000083), SPH_C32(0x00000000) },
|
|
392
|
+
{ SPH_C32(0x80008003), SPH_C32(0x00000000) },
|
|
393
|
+
{ SPH_C32(0x80008088), SPH_C32(0x00000001) },
|
|
394
|
+
{ SPH_C32(0x80000088), SPH_C32(0x00000000) },
|
|
395
|
+
{ SPH_C32(0x00008000), SPH_C32(0x00000001) },
|
|
396
|
+
{ SPH_C32(0x80008082), SPH_C32(0x00000000) }
|
|
397
|
+
#else
|
|
398
|
+
{ SPH_C32(0x00000000), SPH_C32(0x00000001) },
|
|
399
|
+
{ SPH_C32(0x00000000), SPH_C32(0x00008082) },
|
|
400
|
+
{ SPH_C32(0x80000000), SPH_C32(0x0000808A) },
|
|
401
|
+
{ SPH_C32(0x80000000), SPH_C32(0x80008000) },
|
|
402
|
+
{ SPH_C32(0x00000000), SPH_C32(0x0000808B) },
|
|
403
|
+
{ SPH_C32(0x00000000), SPH_C32(0x80000001) },
|
|
404
|
+
{ SPH_C32(0x80000000), SPH_C32(0x80008081) },
|
|
405
|
+
{ SPH_C32(0x80000000), SPH_C32(0x00008009) },
|
|
406
|
+
{ SPH_C32(0x00000000), SPH_C32(0x0000008A) },
|
|
407
|
+
{ SPH_C32(0x00000000), SPH_C32(0x00000088) },
|
|
408
|
+
{ SPH_C32(0x00000000), SPH_C32(0x80008009) },
|
|
409
|
+
{ SPH_C32(0x00000000), SPH_C32(0x8000000A) },
|
|
410
|
+
{ SPH_C32(0x00000000), SPH_C32(0x8000808B) },
|
|
411
|
+
{ SPH_C32(0x80000000), SPH_C32(0x0000008B) },
|
|
412
|
+
{ SPH_C32(0x80000000), SPH_C32(0x00008089) },
|
|
413
|
+
{ SPH_C32(0x80000000), SPH_C32(0x00008003) },
|
|
414
|
+
{ SPH_C32(0x80000000), SPH_C32(0x00008002) },
|
|
415
|
+
{ SPH_C32(0x80000000), SPH_C32(0x00000080) },
|
|
416
|
+
{ SPH_C32(0x00000000), SPH_C32(0x0000800A) },
|
|
417
|
+
{ SPH_C32(0x80000000), SPH_C32(0x8000000A) },
|
|
418
|
+
{ SPH_C32(0x80000000), SPH_C32(0x80008081) },
|
|
419
|
+
{ SPH_C32(0x80000000), SPH_C32(0x00008080) },
|
|
420
|
+
{ SPH_C32(0x00000000), SPH_C32(0x80000001) },
|
|
421
|
+
{ SPH_C32(0x80000000), SPH_C32(0x80008008) }
|
|
422
|
+
#endif
|
|
423
|
+
};
|
|
424
|
+
|
|
425
|
+
#if SPH_KECCAK_INTERLEAVE
|
|
426
|
+
|
|
427
|
+
#define INTERLEAVE(xl, xh) do { \
|
|
428
|
+
sph_u32 l, h, t; \
|
|
429
|
+
l = (xl); h = (xh); \
|
|
430
|
+
t = (l ^ (l >> 1)) & SPH_C32(0x22222222); l ^= t ^ (t << 1); \
|
|
431
|
+
t = (h ^ (h >> 1)) & SPH_C32(0x22222222); h ^= t ^ (t << 1); \
|
|
432
|
+
t = (l ^ (l >> 2)) & SPH_C32(0x0C0C0C0C); l ^= t ^ (t << 2); \
|
|
433
|
+
t = (h ^ (h >> 2)) & SPH_C32(0x0C0C0C0C); h ^= t ^ (t << 2); \
|
|
434
|
+
t = (l ^ (l >> 4)) & SPH_C32(0x00F000F0); l ^= t ^ (t << 4); \
|
|
435
|
+
t = (h ^ (h >> 4)) & SPH_C32(0x00F000F0); h ^= t ^ (t << 4); \
|
|
436
|
+
t = (l ^ (l >> 8)) & SPH_C32(0x0000FF00); l ^= t ^ (t << 8); \
|
|
437
|
+
t = (h ^ (h >> 8)) & SPH_C32(0x0000FF00); h ^= t ^ (t << 8); \
|
|
438
|
+
t = (l ^ SPH_T32(h << 16)) & SPH_C32(0xFFFF0000); \
|
|
439
|
+
l ^= t; h ^= t >> 16; \
|
|
440
|
+
(xl) = l; (xh) = h; \
|
|
441
|
+
} while (0)
|
|
442
|
+
|
|
443
|
+
#define UNINTERLEAVE(xl, xh) do { \
|
|
444
|
+
sph_u32 l, h, t; \
|
|
445
|
+
l = (xl); h = (xh); \
|
|
446
|
+
t = (l ^ SPH_T32(h << 16)) & SPH_C32(0xFFFF0000); \
|
|
447
|
+
l ^= t; h ^= t >> 16; \
|
|
448
|
+
t = (l ^ (l >> 8)) & SPH_C32(0x0000FF00); l ^= t ^ (t << 8); \
|
|
449
|
+
t = (h ^ (h >> 8)) & SPH_C32(0x0000FF00); h ^= t ^ (t << 8); \
|
|
450
|
+
t = (l ^ (l >> 4)) & SPH_C32(0x00F000F0); l ^= t ^ (t << 4); \
|
|
451
|
+
t = (h ^ (h >> 4)) & SPH_C32(0x00F000F0); h ^= t ^ (t << 4); \
|
|
452
|
+
t = (l ^ (l >> 2)) & SPH_C32(0x0C0C0C0C); l ^= t ^ (t << 2); \
|
|
453
|
+
t = (h ^ (h >> 2)) & SPH_C32(0x0C0C0C0C); h ^= t ^ (t << 2); \
|
|
454
|
+
t = (l ^ (l >> 1)) & SPH_C32(0x22222222); l ^= t ^ (t << 1); \
|
|
455
|
+
t = (h ^ (h >> 1)) & SPH_C32(0x22222222); h ^= t ^ (t << 1); \
|
|
456
|
+
(xl) = l; (xh) = h; \
|
|
457
|
+
} while (0)
|
|
458
|
+
|
|
459
|
+
#else
|
|
460
|
+
|
|
461
|
+
#define INTERLEAVE(l, h)
|
|
462
|
+
#define UNINTERLEAVE(l, h)
|
|
463
|
+
|
|
464
|
+
#endif
|
|
465
|
+
|
|
466
|
+
#if SPH_KECCAK_NOCOPY
|
|
467
|
+
|
|
468
|
+
#define a00l (kc->u.narrow[2 * 0 + 0])
|
|
469
|
+
#define a00h (kc->u.narrow[2 * 0 + 1])
|
|
470
|
+
#define a10l (kc->u.narrow[2 * 1 + 0])
|
|
471
|
+
#define a10h (kc->u.narrow[2 * 1 + 1])
|
|
472
|
+
#define a20l (kc->u.narrow[2 * 2 + 0])
|
|
473
|
+
#define a20h (kc->u.narrow[2 * 2 + 1])
|
|
474
|
+
#define a30l (kc->u.narrow[2 * 3 + 0])
|
|
475
|
+
#define a30h (kc->u.narrow[2 * 3 + 1])
|
|
476
|
+
#define a40l (kc->u.narrow[2 * 4 + 0])
|
|
477
|
+
#define a40h (kc->u.narrow[2 * 4 + 1])
|
|
478
|
+
#define a01l (kc->u.narrow[2 * 5 + 0])
|
|
479
|
+
#define a01h (kc->u.narrow[2 * 5 + 1])
|
|
480
|
+
#define a11l (kc->u.narrow[2 * 6 + 0])
|
|
481
|
+
#define a11h (kc->u.narrow[2 * 6 + 1])
|
|
482
|
+
#define a21l (kc->u.narrow[2 * 7 + 0])
|
|
483
|
+
#define a21h (kc->u.narrow[2 * 7 + 1])
|
|
484
|
+
#define a31l (kc->u.narrow[2 * 8 + 0])
|
|
485
|
+
#define a31h (kc->u.narrow[2 * 8 + 1])
|
|
486
|
+
#define a41l (kc->u.narrow[2 * 9 + 0])
|
|
487
|
+
#define a41h (kc->u.narrow[2 * 9 + 1])
|
|
488
|
+
#define a02l (kc->u.narrow[2 * 10 + 0])
|
|
489
|
+
#define a02h (kc->u.narrow[2 * 10 + 1])
|
|
490
|
+
#define a12l (kc->u.narrow[2 * 11 + 0])
|
|
491
|
+
#define a12h (kc->u.narrow[2 * 11 + 1])
|
|
492
|
+
#define a22l (kc->u.narrow[2 * 12 + 0])
|
|
493
|
+
#define a22h (kc->u.narrow[2 * 12 + 1])
|
|
494
|
+
#define a32l (kc->u.narrow[2 * 13 + 0])
|
|
495
|
+
#define a32h (kc->u.narrow[2 * 13 + 1])
|
|
496
|
+
#define a42l (kc->u.narrow[2 * 14 + 0])
|
|
497
|
+
#define a42h (kc->u.narrow[2 * 14 + 1])
|
|
498
|
+
#define a03l (kc->u.narrow[2 * 15 + 0])
|
|
499
|
+
#define a03h (kc->u.narrow[2 * 15 + 1])
|
|
500
|
+
#define a13l (kc->u.narrow[2 * 16 + 0])
|
|
501
|
+
#define a13h (kc->u.narrow[2 * 16 + 1])
|
|
502
|
+
#define a23l (kc->u.narrow[2 * 17 + 0])
|
|
503
|
+
#define a23h (kc->u.narrow[2 * 17 + 1])
|
|
504
|
+
#define a33l (kc->u.narrow[2 * 18 + 0])
|
|
505
|
+
#define a33h (kc->u.narrow[2 * 18 + 1])
|
|
506
|
+
#define a43l (kc->u.narrow[2 * 19 + 0])
|
|
507
|
+
#define a43h (kc->u.narrow[2 * 19 + 1])
|
|
508
|
+
#define a04l (kc->u.narrow[2 * 20 + 0])
|
|
509
|
+
#define a04h (kc->u.narrow[2 * 20 + 1])
|
|
510
|
+
#define a14l (kc->u.narrow[2 * 21 + 0])
|
|
511
|
+
#define a14h (kc->u.narrow[2 * 21 + 1])
|
|
512
|
+
#define a24l (kc->u.narrow[2 * 22 + 0])
|
|
513
|
+
#define a24h (kc->u.narrow[2 * 22 + 1])
|
|
514
|
+
#define a34l (kc->u.narrow[2 * 23 + 0])
|
|
515
|
+
#define a34h (kc->u.narrow[2 * 23 + 1])
|
|
516
|
+
#define a44l (kc->u.narrow[2 * 24 + 0])
|
|
517
|
+
#define a44h (kc->u.narrow[2 * 24 + 1])
|
|
518
|
+
|
|
519
|
+
#define DECL_STATE
|
|
520
|
+
#define READ_STATE(state)
|
|
521
|
+
#define WRITE_STATE(state)
|
|
522
|
+
|
|
523
|
+
#define INPUT_BUF(size) do { \
|
|
524
|
+
size_t j; \
|
|
525
|
+
for (j = 0; j < (size); j += 8) { \
|
|
526
|
+
sph_u32 tl, th; \
|
|
527
|
+
tl = sph_dec32le_aligned(buf + j + 0); \
|
|
528
|
+
th = sph_dec32le_aligned(buf + j + 4); \
|
|
529
|
+
INTERLEAVE(tl, th); \
|
|
530
|
+
kc->u.narrow[(j >> 2) + 0] ^= tl; \
|
|
531
|
+
kc->u.narrow[(j >> 2) + 1] ^= th; \
|
|
532
|
+
} \
|
|
533
|
+
} while (0)
|
|
534
|
+
|
|
535
|
+
#define INPUT_BUF144 INPUT_BUF(144)
|
|
536
|
+
#define INPUT_BUF136 INPUT_BUF(136)
|
|
537
|
+
#define INPUT_BUF104 INPUT_BUF(104)
|
|
538
|
+
#define INPUT_BUF72 INPUT_BUF(72)
|
|
539
|
+
|
|
540
|
+
#else
|
|
541
|
+
|
|
542
|
+
#define DECL_STATE \
|
|
543
|
+
sph_u32 a00l, a00h, a01l, a01h, a02l, a02h, a03l, a03h, a04l, a04h; \
|
|
544
|
+
sph_u32 a10l, a10h, a11l, a11h, a12l, a12h, a13l, a13h, a14l, a14h; \
|
|
545
|
+
sph_u32 a20l, a20h, a21l, a21h, a22l, a22h, a23l, a23h, a24l, a24h; \
|
|
546
|
+
sph_u32 a30l, a30h, a31l, a31h, a32l, a32h, a33l, a33h, a34l, a34h; \
|
|
547
|
+
sph_u32 a40l, a40h, a41l, a41h, a42l, a42h, a43l, a43h, a44l, a44h;
|
|
548
|
+
|
|
549
|
+
#define READ_STATE(state) do { \
|
|
550
|
+
a00l = (state)->u.narrow[2 * 0 + 0]; \
|
|
551
|
+
a00h = (state)->u.narrow[2 * 0 + 1]; \
|
|
552
|
+
a10l = (state)->u.narrow[2 * 1 + 0]; \
|
|
553
|
+
a10h = (state)->u.narrow[2 * 1 + 1]; \
|
|
554
|
+
a20l = (state)->u.narrow[2 * 2 + 0]; \
|
|
555
|
+
a20h = (state)->u.narrow[2 * 2 + 1]; \
|
|
556
|
+
a30l = (state)->u.narrow[2 * 3 + 0]; \
|
|
557
|
+
a30h = (state)->u.narrow[2 * 3 + 1]; \
|
|
558
|
+
a40l = (state)->u.narrow[2 * 4 + 0]; \
|
|
559
|
+
a40h = (state)->u.narrow[2 * 4 + 1]; \
|
|
560
|
+
a01l = (state)->u.narrow[2 * 5 + 0]; \
|
|
561
|
+
a01h = (state)->u.narrow[2 * 5 + 1]; \
|
|
562
|
+
a11l = (state)->u.narrow[2 * 6 + 0]; \
|
|
563
|
+
a11h = (state)->u.narrow[2 * 6 + 1]; \
|
|
564
|
+
a21l = (state)->u.narrow[2 * 7 + 0]; \
|
|
565
|
+
a21h = (state)->u.narrow[2 * 7 + 1]; \
|
|
566
|
+
a31l = (state)->u.narrow[2 * 8 + 0]; \
|
|
567
|
+
a31h = (state)->u.narrow[2 * 8 + 1]; \
|
|
568
|
+
a41l = (state)->u.narrow[2 * 9 + 0]; \
|
|
569
|
+
a41h = (state)->u.narrow[2 * 9 + 1]; \
|
|
570
|
+
a02l = (state)->u.narrow[2 * 10 + 0]; \
|
|
571
|
+
a02h = (state)->u.narrow[2 * 10 + 1]; \
|
|
572
|
+
a12l = (state)->u.narrow[2 * 11 + 0]; \
|
|
573
|
+
a12h = (state)->u.narrow[2 * 11 + 1]; \
|
|
574
|
+
a22l = (state)->u.narrow[2 * 12 + 0]; \
|
|
575
|
+
a22h = (state)->u.narrow[2 * 12 + 1]; \
|
|
576
|
+
a32l = (state)->u.narrow[2 * 13 + 0]; \
|
|
577
|
+
a32h = (state)->u.narrow[2 * 13 + 1]; \
|
|
578
|
+
a42l = (state)->u.narrow[2 * 14 + 0]; \
|
|
579
|
+
a42h = (state)->u.narrow[2 * 14 + 1]; \
|
|
580
|
+
a03l = (state)->u.narrow[2 * 15 + 0]; \
|
|
581
|
+
a03h = (state)->u.narrow[2 * 15 + 1]; \
|
|
582
|
+
a13l = (state)->u.narrow[2 * 16 + 0]; \
|
|
583
|
+
a13h = (state)->u.narrow[2 * 16 + 1]; \
|
|
584
|
+
a23l = (state)->u.narrow[2 * 17 + 0]; \
|
|
585
|
+
a23h = (state)->u.narrow[2 * 17 + 1]; \
|
|
586
|
+
a33l = (state)->u.narrow[2 * 18 + 0]; \
|
|
587
|
+
a33h = (state)->u.narrow[2 * 18 + 1]; \
|
|
588
|
+
a43l = (state)->u.narrow[2 * 19 + 0]; \
|
|
589
|
+
a43h = (state)->u.narrow[2 * 19 + 1]; \
|
|
590
|
+
a04l = (state)->u.narrow[2 * 20 + 0]; \
|
|
591
|
+
a04h = (state)->u.narrow[2 * 20 + 1]; \
|
|
592
|
+
a14l = (state)->u.narrow[2 * 21 + 0]; \
|
|
593
|
+
a14h = (state)->u.narrow[2 * 21 + 1]; \
|
|
594
|
+
a24l = (state)->u.narrow[2 * 22 + 0]; \
|
|
595
|
+
a24h = (state)->u.narrow[2 * 22 + 1]; \
|
|
596
|
+
a34l = (state)->u.narrow[2 * 23 + 0]; \
|
|
597
|
+
a34h = (state)->u.narrow[2 * 23 + 1]; \
|
|
598
|
+
a44l = (state)->u.narrow[2 * 24 + 0]; \
|
|
599
|
+
a44h = (state)->u.narrow[2 * 24 + 1]; \
|
|
600
|
+
} while (0)
|
|
601
|
+
|
|
602
|
+
#define WRITE_STATE(state) do { \
|
|
603
|
+
(state)->u.narrow[2 * 0 + 0] = a00l; \
|
|
604
|
+
(state)->u.narrow[2 * 0 + 1] = a00h; \
|
|
605
|
+
(state)->u.narrow[2 * 1 + 0] = a10l; \
|
|
606
|
+
(state)->u.narrow[2 * 1 + 1] = a10h; \
|
|
607
|
+
(state)->u.narrow[2 * 2 + 0] = a20l; \
|
|
608
|
+
(state)->u.narrow[2 * 2 + 1] = a20h; \
|
|
609
|
+
(state)->u.narrow[2 * 3 + 0] = a30l; \
|
|
610
|
+
(state)->u.narrow[2 * 3 + 1] = a30h; \
|
|
611
|
+
(state)->u.narrow[2 * 4 + 0] = a40l; \
|
|
612
|
+
(state)->u.narrow[2 * 4 + 1] = a40h; \
|
|
613
|
+
(state)->u.narrow[2 * 5 + 0] = a01l; \
|
|
614
|
+
(state)->u.narrow[2 * 5 + 1] = a01h; \
|
|
615
|
+
(state)->u.narrow[2 * 6 + 0] = a11l; \
|
|
616
|
+
(state)->u.narrow[2 * 6 + 1] = a11h; \
|
|
617
|
+
(state)->u.narrow[2 * 7 + 0] = a21l; \
|
|
618
|
+
(state)->u.narrow[2 * 7 + 1] = a21h; \
|
|
619
|
+
(state)->u.narrow[2 * 8 + 0] = a31l; \
|
|
620
|
+
(state)->u.narrow[2 * 8 + 1] = a31h; \
|
|
621
|
+
(state)->u.narrow[2 * 9 + 0] = a41l; \
|
|
622
|
+
(state)->u.narrow[2 * 9 + 1] = a41h; \
|
|
623
|
+
(state)->u.narrow[2 * 10 + 0] = a02l; \
|
|
624
|
+
(state)->u.narrow[2 * 10 + 1] = a02h; \
|
|
625
|
+
(state)->u.narrow[2 * 11 + 0] = a12l; \
|
|
626
|
+
(state)->u.narrow[2 * 11 + 1] = a12h; \
|
|
627
|
+
(state)->u.narrow[2 * 12 + 0] = a22l; \
|
|
628
|
+
(state)->u.narrow[2 * 12 + 1] = a22h; \
|
|
629
|
+
(state)->u.narrow[2 * 13 + 0] = a32l; \
|
|
630
|
+
(state)->u.narrow[2 * 13 + 1] = a32h; \
|
|
631
|
+
(state)->u.narrow[2 * 14 + 0] = a42l; \
|
|
632
|
+
(state)->u.narrow[2 * 14 + 1] = a42h; \
|
|
633
|
+
(state)->u.narrow[2 * 15 + 0] = a03l; \
|
|
634
|
+
(state)->u.narrow[2 * 15 + 1] = a03h; \
|
|
635
|
+
(state)->u.narrow[2 * 16 + 0] = a13l; \
|
|
636
|
+
(state)->u.narrow[2 * 16 + 1] = a13h; \
|
|
637
|
+
(state)->u.narrow[2 * 17 + 0] = a23l; \
|
|
638
|
+
(state)->u.narrow[2 * 17 + 1] = a23h; \
|
|
639
|
+
(state)->u.narrow[2 * 18 + 0] = a33l; \
|
|
640
|
+
(state)->u.narrow[2 * 18 + 1] = a33h; \
|
|
641
|
+
(state)->u.narrow[2 * 19 + 0] = a43l; \
|
|
642
|
+
(state)->u.narrow[2 * 19 + 1] = a43h; \
|
|
643
|
+
(state)->u.narrow[2 * 20 + 0] = a04l; \
|
|
644
|
+
(state)->u.narrow[2 * 20 + 1] = a04h; \
|
|
645
|
+
(state)->u.narrow[2 * 21 + 0] = a14l; \
|
|
646
|
+
(state)->u.narrow[2 * 21 + 1] = a14h; \
|
|
647
|
+
(state)->u.narrow[2 * 22 + 0] = a24l; \
|
|
648
|
+
(state)->u.narrow[2 * 22 + 1] = a24h; \
|
|
649
|
+
(state)->u.narrow[2 * 23 + 0] = a34l; \
|
|
650
|
+
(state)->u.narrow[2 * 23 + 1] = a34h; \
|
|
651
|
+
(state)->u.narrow[2 * 24 + 0] = a44l; \
|
|
652
|
+
(state)->u.narrow[2 * 24 + 1] = a44h; \
|
|
653
|
+
} while (0)
|
|
654
|
+
|
|
655
|
+
#define READ64(d, off) do { \
|
|
656
|
+
sph_u32 tl, th; \
|
|
657
|
+
tl = sph_dec32le_aligned(buf + (off)); \
|
|
658
|
+
th = sph_dec32le_aligned(buf + (off) + 4); \
|
|
659
|
+
INTERLEAVE(tl, th); \
|
|
660
|
+
d ## l ^= tl; \
|
|
661
|
+
d ## h ^= th; \
|
|
662
|
+
} while (0)
|
|
663
|
+
|
|
664
|
+
#define INPUT_BUF144 do { \
|
|
665
|
+
READ64(a00, 0); \
|
|
666
|
+
READ64(a10, 8); \
|
|
667
|
+
READ64(a20, 16); \
|
|
668
|
+
READ64(a30, 24); \
|
|
669
|
+
READ64(a40, 32); \
|
|
670
|
+
READ64(a01, 40); \
|
|
671
|
+
READ64(a11, 48); \
|
|
672
|
+
READ64(a21, 56); \
|
|
673
|
+
READ64(a31, 64); \
|
|
674
|
+
READ64(a41, 72); \
|
|
675
|
+
READ64(a02, 80); \
|
|
676
|
+
READ64(a12, 88); \
|
|
677
|
+
READ64(a22, 96); \
|
|
678
|
+
READ64(a32, 104); \
|
|
679
|
+
READ64(a42, 112); \
|
|
680
|
+
READ64(a03, 120); \
|
|
681
|
+
READ64(a13, 128); \
|
|
682
|
+
READ64(a23, 136); \
|
|
683
|
+
} while (0)
|
|
684
|
+
|
|
685
|
+
#define INPUT_BUF136 do { \
|
|
686
|
+
READ64(a00, 0); \
|
|
687
|
+
READ64(a10, 8); \
|
|
688
|
+
READ64(a20, 16); \
|
|
689
|
+
READ64(a30, 24); \
|
|
690
|
+
READ64(a40, 32); \
|
|
691
|
+
READ64(a01, 40); \
|
|
692
|
+
READ64(a11, 48); \
|
|
693
|
+
READ64(a21, 56); \
|
|
694
|
+
READ64(a31, 64); \
|
|
695
|
+
READ64(a41, 72); \
|
|
696
|
+
READ64(a02, 80); \
|
|
697
|
+
READ64(a12, 88); \
|
|
698
|
+
READ64(a22, 96); \
|
|
699
|
+
READ64(a32, 104); \
|
|
700
|
+
READ64(a42, 112); \
|
|
701
|
+
READ64(a03, 120); \
|
|
702
|
+
READ64(a13, 128); \
|
|
703
|
+
} while (0)
|
|
704
|
+
|
|
705
|
+
#define INPUT_BUF104 do { \
|
|
706
|
+
READ64(a00, 0); \
|
|
707
|
+
READ64(a10, 8); \
|
|
708
|
+
READ64(a20, 16); \
|
|
709
|
+
READ64(a30, 24); \
|
|
710
|
+
READ64(a40, 32); \
|
|
711
|
+
READ64(a01, 40); \
|
|
712
|
+
READ64(a11, 48); \
|
|
713
|
+
READ64(a21, 56); \
|
|
714
|
+
READ64(a31, 64); \
|
|
715
|
+
READ64(a41, 72); \
|
|
716
|
+
READ64(a02, 80); \
|
|
717
|
+
READ64(a12, 88); \
|
|
718
|
+
READ64(a22, 96); \
|
|
719
|
+
} while (0)
|
|
720
|
+
|
|
721
|
+
#define INPUT_BUF72 do { \
|
|
722
|
+
READ64(a00, 0); \
|
|
723
|
+
READ64(a10, 8); \
|
|
724
|
+
READ64(a20, 16); \
|
|
725
|
+
READ64(a30, 24); \
|
|
726
|
+
READ64(a40, 32); \
|
|
727
|
+
READ64(a01, 40); \
|
|
728
|
+
READ64(a11, 48); \
|
|
729
|
+
READ64(a21, 56); \
|
|
730
|
+
READ64(a31, 64); \
|
|
731
|
+
} while (0)
|
|
732
|
+
|
|
733
|
+
#define INPUT_BUF(lim) do { \
|
|
734
|
+
READ64(a00, 0); \
|
|
735
|
+
READ64(a10, 8); \
|
|
736
|
+
READ64(a20, 16); \
|
|
737
|
+
READ64(a30, 24); \
|
|
738
|
+
READ64(a40, 32); \
|
|
739
|
+
READ64(a01, 40); \
|
|
740
|
+
READ64(a11, 48); \
|
|
741
|
+
READ64(a21, 56); \
|
|
742
|
+
READ64(a31, 64); \
|
|
743
|
+
if ((lim) == 72) \
|
|
744
|
+
break; \
|
|
745
|
+
READ64(a41, 72); \
|
|
746
|
+
READ64(a02, 80); \
|
|
747
|
+
READ64(a12, 88); \
|
|
748
|
+
READ64(a22, 96); \
|
|
749
|
+
if ((lim) == 104) \
|
|
750
|
+
break; \
|
|
751
|
+
READ64(a32, 104); \
|
|
752
|
+
READ64(a42, 112); \
|
|
753
|
+
READ64(a03, 120); \
|
|
754
|
+
READ64(a13, 128); \
|
|
755
|
+
if ((lim) == 136) \
|
|
756
|
+
break; \
|
|
757
|
+
READ64(a23, 136); \
|
|
758
|
+
} while (0)
|
|
759
|
+
|
|
760
|
+
#endif
|
|
761
|
+
|
|
762
|
+
#define DECL64(x) sph_u64 x ## l, x ## h
|
|
763
|
+
#define MOV64(d, s) (d ## l = s ## l, d ## h = s ## h)
|
|
764
|
+
#define XOR64(d, a, b) (d ## l = a ## l ^ b ## l, d ## h = a ## h ^ b ## h)
|
|
765
|
+
#define AND64(d, a, b) (d ## l = a ## l & b ## l, d ## h = a ## h & b ## h)
|
|
766
|
+
#define OR64(d, a, b) (d ## l = a ## l | b ## l, d ## h = a ## h | b ## h)
|
|
767
|
+
#define NOT64(d, s) (d ## l = SPH_T32(~s ## l), d ## h = SPH_T32(~s ## h))
|
|
768
|
+
#define ROL64(d, v, n) ROL64_ ## n(d, v)
|
|
769
|
+
|
|
770
|
+
#if SPH_KECCAK_INTERLEAVE
|
|
771
|
+
|
|
772
|
+
#define ROL64_odd1(d, v) do { \
|
|
773
|
+
sph_u32 tmp; \
|
|
774
|
+
tmp = v ## l; \
|
|
775
|
+
d ## l = SPH_T32(v ## h << 1) | (v ## h >> 31); \
|
|
776
|
+
d ## h = tmp; \
|
|
777
|
+
} while (0)
|
|
778
|
+
|
|
779
|
+
#define ROL64_odd63(d, v) do { \
|
|
780
|
+
sph_u32 tmp; \
|
|
781
|
+
tmp = SPH_T32(v ## l << 31) | (v ## l >> 1); \
|
|
782
|
+
d ## l = v ## h; \
|
|
783
|
+
d ## h = tmp; \
|
|
784
|
+
} while (0)
|
|
785
|
+
|
|
786
|
+
#define ROL64_odd(d, v, n) do { \
|
|
787
|
+
sph_u32 tmp; \
|
|
788
|
+
tmp = SPH_T32(v ## l << (n - 1)) | (v ## l >> (33 - n)); \
|
|
789
|
+
d ## l = SPH_T32(v ## h << n) | (v ## h >> (32 - n)); \
|
|
790
|
+
d ## h = tmp; \
|
|
791
|
+
} while (0)
|
|
792
|
+
|
|
793
|
+
#define ROL64_even(d, v, n) do { \
|
|
794
|
+
d ## l = SPH_T32(v ## l << n) | (v ## l >> (32 - n)); \
|
|
795
|
+
d ## h = SPH_T32(v ## h << n) | (v ## h >> (32 - n)); \
|
|
796
|
+
} while (0)
|
|
797
|
+
|
|
798
|
+
#define ROL64_0(d, v)
|
|
799
|
+
#define ROL64_1(d, v) ROL64_odd1(d, v)
|
|
800
|
+
#define ROL64_2(d, v) ROL64_even(d, v, 1)
|
|
801
|
+
#define ROL64_3(d, v) ROL64_odd( d, v, 2)
|
|
802
|
+
#define ROL64_4(d, v) ROL64_even(d, v, 2)
|
|
803
|
+
#define ROL64_5(d, v) ROL64_odd( d, v, 3)
|
|
804
|
+
#define ROL64_6(d, v) ROL64_even(d, v, 3)
|
|
805
|
+
#define ROL64_7(d, v) ROL64_odd( d, v, 4)
|
|
806
|
+
#define ROL64_8(d, v) ROL64_even(d, v, 4)
|
|
807
|
+
#define ROL64_9(d, v) ROL64_odd( d, v, 5)
|
|
808
|
+
#define ROL64_10(d, v) ROL64_even(d, v, 5)
|
|
809
|
+
#define ROL64_11(d, v) ROL64_odd( d, v, 6)
|
|
810
|
+
#define ROL64_12(d, v) ROL64_even(d, v, 6)
|
|
811
|
+
#define ROL64_13(d, v) ROL64_odd( d, v, 7)
|
|
812
|
+
#define ROL64_14(d, v) ROL64_even(d, v, 7)
|
|
813
|
+
#define ROL64_15(d, v) ROL64_odd( d, v, 8)
|
|
814
|
+
#define ROL64_16(d, v) ROL64_even(d, v, 8)
|
|
815
|
+
#define ROL64_17(d, v) ROL64_odd( d, v, 9)
|
|
816
|
+
#define ROL64_18(d, v) ROL64_even(d, v, 9)
|
|
817
|
+
#define ROL64_19(d, v) ROL64_odd( d, v, 10)
|
|
818
|
+
#define ROL64_20(d, v) ROL64_even(d, v, 10)
|
|
819
|
+
#define ROL64_21(d, v) ROL64_odd( d, v, 11)
|
|
820
|
+
#define ROL64_22(d, v) ROL64_even(d, v, 11)
|
|
821
|
+
#define ROL64_23(d, v) ROL64_odd( d, v, 12)
|
|
822
|
+
#define ROL64_24(d, v) ROL64_even(d, v, 12)
|
|
823
|
+
#define ROL64_25(d, v) ROL64_odd( d, v, 13)
|
|
824
|
+
#define ROL64_26(d, v) ROL64_even(d, v, 13)
|
|
825
|
+
#define ROL64_27(d, v) ROL64_odd( d, v, 14)
|
|
826
|
+
#define ROL64_28(d, v) ROL64_even(d, v, 14)
|
|
827
|
+
#define ROL64_29(d, v) ROL64_odd( d, v, 15)
|
|
828
|
+
#define ROL64_30(d, v) ROL64_even(d, v, 15)
|
|
829
|
+
#define ROL64_31(d, v) ROL64_odd( d, v, 16)
|
|
830
|
+
#define ROL64_32(d, v) ROL64_even(d, v, 16)
|
|
831
|
+
#define ROL64_33(d, v) ROL64_odd( d, v, 17)
|
|
832
|
+
#define ROL64_34(d, v) ROL64_even(d, v, 17)
|
|
833
|
+
#define ROL64_35(d, v) ROL64_odd( d, v, 18)
|
|
834
|
+
#define ROL64_36(d, v) ROL64_even(d, v, 18)
|
|
835
|
+
#define ROL64_37(d, v) ROL64_odd( d, v, 19)
|
|
836
|
+
#define ROL64_38(d, v) ROL64_even(d, v, 19)
|
|
837
|
+
#define ROL64_39(d, v) ROL64_odd( d, v, 20)
|
|
838
|
+
#define ROL64_40(d, v) ROL64_even(d, v, 20)
|
|
839
|
+
#define ROL64_41(d, v) ROL64_odd( d, v, 21)
|
|
840
|
+
#define ROL64_42(d, v) ROL64_even(d, v, 21)
|
|
841
|
+
#define ROL64_43(d, v) ROL64_odd( d, v, 22)
|
|
842
|
+
#define ROL64_44(d, v) ROL64_even(d, v, 22)
|
|
843
|
+
#define ROL64_45(d, v) ROL64_odd( d, v, 23)
|
|
844
|
+
#define ROL64_46(d, v) ROL64_even(d, v, 23)
|
|
845
|
+
#define ROL64_47(d, v) ROL64_odd( d, v, 24)
|
|
846
|
+
#define ROL64_48(d, v) ROL64_even(d, v, 24)
|
|
847
|
+
#define ROL64_49(d, v) ROL64_odd( d, v, 25)
|
|
848
|
+
#define ROL64_50(d, v) ROL64_even(d, v, 25)
|
|
849
|
+
#define ROL64_51(d, v) ROL64_odd( d, v, 26)
|
|
850
|
+
#define ROL64_52(d, v) ROL64_even(d, v, 26)
|
|
851
|
+
#define ROL64_53(d, v) ROL64_odd( d, v, 27)
|
|
852
|
+
#define ROL64_54(d, v) ROL64_even(d, v, 27)
|
|
853
|
+
#define ROL64_55(d, v) ROL64_odd( d, v, 28)
|
|
854
|
+
#define ROL64_56(d, v) ROL64_even(d, v, 28)
|
|
855
|
+
#define ROL64_57(d, v) ROL64_odd( d, v, 29)
|
|
856
|
+
#define ROL64_58(d, v) ROL64_even(d, v, 29)
|
|
857
|
+
#define ROL64_59(d, v) ROL64_odd( d, v, 30)
|
|
858
|
+
#define ROL64_60(d, v) ROL64_even(d, v, 30)
|
|
859
|
+
#define ROL64_61(d, v) ROL64_odd( d, v, 31)
|
|
860
|
+
#define ROL64_62(d, v) ROL64_even(d, v, 31)
|
|
861
|
+
#define ROL64_63(d, v) ROL64_odd63(d, v)
|
|
862
|
+
|
|
863
|
+
#else
|
|
864
|
+
|
|
865
|
+
#define ROL64_small(d, v, n) do { \
|
|
866
|
+
sph_u32 tmp; \
|
|
867
|
+
tmp = SPH_T32(v ## l << n) | (v ## h >> (32 - n)); \
|
|
868
|
+
d ## h = SPH_T32(v ## h << n) | (v ## l >> (32 - n)); \
|
|
869
|
+
d ## l = tmp; \
|
|
870
|
+
} while (0)
|
|
871
|
+
|
|
872
|
+
#define ROL64_0(d, v) 0
|
|
873
|
+
#define ROL64_1(d, v) ROL64_small(d, v, 1)
|
|
874
|
+
#define ROL64_2(d, v) ROL64_small(d, v, 2)
|
|
875
|
+
#define ROL64_3(d, v) ROL64_small(d, v, 3)
|
|
876
|
+
#define ROL64_4(d, v) ROL64_small(d, v, 4)
|
|
877
|
+
#define ROL64_5(d, v) ROL64_small(d, v, 5)
|
|
878
|
+
#define ROL64_6(d, v) ROL64_small(d, v, 6)
|
|
879
|
+
#define ROL64_7(d, v) ROL64_small(d, v, 7)
|
|
880
|
+
#define ROL64_8(d, v) ROL64_small(d, v, 8)
|
|
881
|
+
#define ROL64_9(d, v) ROL64_small(d, v, 9)
|
|
882
|
+
#define ROL64_10(d, v) ROL64_small(d, v, 10)
|
|
883
|
+
#define ROL64_11(d, v) ROL64_small(d, v, 11)
|
|
884
|
+
#define ROL64_12(d, v) ROL64_small(d, v, 12)
|
|
885
|
+
#define ROL64_13(d, v) ROL64_small(d, v, 13)
|
|
886
|
+
#define ROL64_14(d, v) ROL64_small(d, v, 14)
|
|
887
|
+
#define ROL64_15(d, v) ROL64_small(d, v, 15)
|
|
888
|
+
#define ROL64_16(d, v) ROL64_small(d, v, 16)
|
|
889
|
+
#define ROL64_17(d, v) ROL64_small(d, v, 17)
|
|
890
|
+
#define ROL64_18(d, v) ROL64_small(d, v, 18)
|
|
891
|
+
#define ROL64_19(d, v) ROL64_small(d, v, 19)
|
|
892
|
+
#define ROL64_20(d, v) ROL64_small(d, v, 20)
|
|
893
|
+
#define ROL64_21(d, v) ROL64_small(d, v, 21)
|
|
894
|
+
#define ROL64_22(d, v) ROL64_small(d, v, 22)
|
|
895
|
+
#define ROL64_23(d, v) ROL64_small(d, v, 23)
|
|
896
|
+
#define ROL64_24(d, v) ROL64_small(d, v, 24)
|
|
897
|
+
#define ROL64_25(d, v) ROL64_small(d, v, 25)
|
|
898
|
+
#define ROL64_26(d, v) ROL64_small(d, v, 26)
|
|
899
|
+
#define ROL64_27(d, v) ROL64_small(d, v, 27)
|
|
900
|
+
#define ROL64_28(d, v) ROL64_small(d, v, 28)
|
|
901
|
+
#define ROL64_29(d, v) ROL64_small(d, v, 29)
|
|
902
|
+
#define ROL64_30(d, v) ROL64_small(d, v, 30)
|
|
903
|
+
#define ROL64_31(d, v) ROL64_small(d, v, 31)
|
|
904
|
+
|
|
905
|
+
#define ROL64_32(d, v) do { \
|
|
906
|
+
sph_u32 tmp; \
|
|
907
|
+
tmp = v ## l; \
|
|
908
|
+
d ## l = v ## h; \
|
|
909
|
+
d ## h = tmp; \
|
|
910
|
+
} while (0)
|
|
911
|
+
|
|
912
|
+
#define ROL64_big(d, v, n) do { \
|
|
913
|
+
sph_u32 trl, trh; \
|
|
914
|
+
ROL64_small(tr, v, n); \
|
|
915
|
+
d ## h = trl; \
|
|
916
|
+
d ## l = trh; \
|
|
917
|
+
} while (0)
|
|
918
|
+
|
|
919
|
+
#define ROL64_33(d, v) ROL64_big(d, v, 1)
|
|
920
|
+
#define ROL64_34(d, v) ROL64_big(d, v, 2)
|
|
921
|
+
#define ROL64_35(d, v) ROL64_big(d, v, 3)
|
|
922
|
+
#define ROL64_36(d, v) ROL64_big(d, v, 4)
|
|
923
|
+
#define ROL64_37(d, v) ROL64_big(d, v, 5)
|
|
924
|
+
#define ROL64_38(d, v) ROL64_big(d, v, 6)
|
|
925
|
+
#define ROL64_39(d, v) ROL64_big(d, v, 7)
|
|
926
|
+
#define ROL64_40(d, v) ROL64_big(d, v, 8)
|
|
927
|
+
#define ROL64_41(d, v) ROL64_big(d, v, 9)
|
|
928
|
+
#define ROL64_42(d, v) ROL64_big(d, v, 10)
|
|
929
|
+
#define ROL64_43(d, v) ROL64_big(d, v, 11)
|
|
930
|
+
#define ROL64_44(d, v) ROL64_big(d, v, 12)
|
|
931
|
+
#define ROL64_45(d, v) ROL64_big(d, v, 13)
|
|
932
|
+
#define ROL64_46(d, v) ROL64_big(d, v, 14)
|
|
933
|
+
#define ROL64_47(d, v) ROL64_big(d, v, 15)
|
|
934
|
+
#define ROL64_48(d, v) ROL64_big(d, v, 16)
|
|
935
|
+
#define ROL64_49(d, v) ROL64_big(d, v, 17)
|
|
936
|
+
#define ROL64_50(d, v) ROL64_big(d, v, 18)
|
|
937
|
+
#define ROL64_51(d, v) ROL64_big(d, v, 19)
|
|
938
|
+
#define ROL64_52(d, v) ROL64_big(d, v, 20)
|
|
939
|
+
#define ROL64_53(d, v) ROL64_big(d, v, 21)
|
|
940
|
+
#define ROL64_54(d, v) ROL64_big(d, v, 22)
|
|
941
|
+
#define ROL64_55(d, v) ROL64_big(d, v, 23)
|
|
942
|
+
#define ROL64_56(d, v) ROL64_big(d, v, 24)
|
|
943
|
+
#define ROL64_57(d, v) ROL64_big(d, v, 25)
|
|
944
|
+
#define ROL64_58(d, v) ROL64_big(d, v, 26)
|
|
945
|
+
#define ROL64_59(d, v) ROL64_big(d, v, 27)
|
|
946
|
+
#define ROL64_60(d, v) ROL64_big(d, v, 28)
|
|
947
|
+
#define ROL64_61(d, v) ROL64_big(d, v, 29)
|
|
948
|
+
#define ROL64_62(d, v) ROL64_big(d, v, 30)
|
|
949
|
+
#define ROL64_63(d, v) ROL64_big(d, v, 31)
|
|
950
|
+
|
|
951
|
+
#endif
|
|
952
|
+
|
|
953
|
+
#define XOR64_IOTA(d, s, k) \
|
|
954
|
+
(d ## l = s ## l ^ k.low, d ## h = s ## h ^ k.high)
|
|
955
|
+
|
|
956
|
+
#endif
|
|
957
|
+
|
|
958
|
+
#define TH_ELT(t, c0, c1, c2, c3, c4, d0, d1, d2, d3, d4) do { \
|
|
959
|
+
DECL64(tt0); \
|
|
960
|
+
DECL64(tt1); \
|
|
961
|
+
DECL64(tt2); \
|
|
962
|
+
DECL64(tt3); \
|
|
963
|
+
XOR64(tt0, d0, d1); \
|
|
964
|
+
XOR64(tt1, d2, d3); \
|
|
965
|
+
XOR64(tt0, tt0, d4); \
|
|
966
|
+
XOR64(tt0, tt0, tt1); \
|
|
967
|
+
ROL64(tt0, tt0, 1); \
|
|
968
|
+
XOR64(tt2, c0, c1); \
|
|
969
|
+
XOR64(tt3, c2, c3); \
|
|
970
|
+
XOR64(tt0, tt0, c4); \
|
|
971
|
+
XOR64(tt2, tt2, tt3); \
|
|
972
|
+
XOR64(t, tt0, tt2); \
|
|
973
|
+
} while (0)
|
|
974
|
+
|
|
975
|
+
#define THETA(b00, b01, b02, b03, b04, b10, b11, b12, b13, b14, \
|
|
976
|
+
b20, b21, b22, b23, b24, b30, b31, b32, b33, b34, \
|
|
977
|
+
b40, b41, b42, b43, b44) \
|
|
978
|
+
do { \
|
|
979
|
+
DECL64(t0); \
|
|
980
|
+
DECL64(t1); \
|
|
981
|
+
DECL64(t2); \
|
|
982
|
+
DECL64(t3); \
|
|
983
|
+
DECL64(t4); \
|
|
984
|
+
TH_ELT(t0, b40, b41, b42, b43, b44, b10, b11, b12, b13, b14); \
|
|
985
|
+
TH_ELT(t1, b00, b01, b02, b03, b04, b20, b21, b22, b23, b24); \
|
|
986
|
+
TH_ELT(t2, b10, b11, b12, b13, b14, b30, b31, b32, b33, b34); \
|
|
987
|
+
TH_ELT(t3, b20, b21, b22, b23, b24, b40, b41, b42, b43, b44); \
|
|
988
|
+
TH_ELT(t4, b30, b31, b32, b33, b34, b00, b01, b02, b03, b04); \
|
|
989
|
+
XOR64(b00, b00, t0); \
|
|
990
|
+
XOR64(b01, b01, t0); \
|
|
991
|
+
XOR64(b02, b02, t0); \
|
|
992
|
+
XOR64(b03, b03, t0); \
|
|
993
|
+
XOR64(b04, b04, t0); \
|
|
994
|
+
XOR64(b10, b10, t1); \
|
|
995
|
+
XOR64(b11, b11, t1); \
|
|
996
|
+
XOR64(b12, b12, t1); \
|
|
997
|
+
XOR64(b13, b13, t1); \
|
|
998
|
+
XOR64(b14, b14, t1); \
|
|
999
|
+
XOR64(b20, b20, t2); \
|
|
1000
|
+
XOR64(b21, b21, t2); \
|
|
1001
|
+
XOR64(b22, b22, t2); \
|
|
1002
|
+
XOR64(b23, b23, t2); \
|
|
1003
|
+
XOR64(b24, b24, t2); \
|
|
1004
|
+
XOR64(b30, b30, t3); \
|
|
1005
|
+
XOR64(b31, b31, t3); \
|
|
1006
|
+
XOR64(b32, b32, t3); \
|
|
1007
|
+
XOR64(b33, b33, t3); \
|
|
1008
|
+
XOR64(b34, b34, t3); \
|
|
1009
|
+
XOR64(b40, b40, t4); \
|
|
1010
|
+
XOR64(b41, b41, t4); \
|
|
1011
|
+
XOR64(b42, b42, t4); \
|
|
1012
|
+
XOR64(b43, b43, t4); \
|
|
1013
|
+
XOR64(b44, b44, t4); \
|
|
1014
|
+
} while (0)
|
|
1015
|
+
|
|
1016
|
+
#define RHO(b00, b01, b02, b03, b04, b10, b11, b12, b13, b14, \
|
|
1017
|
+
b20, b21, b22, b23, b24, b30, b31, b32, b33, b34, \
|
|
1018
|
+
b40, b41, b42, b43, b44) \
|
|
1019
|
+
do { \
|
|
1020
|
+
/* ROL64(b00, b00, 0); */ \
|
|
1021
|
+
ROL64(b01, b01, 36); \
|
|
1022
|
+
ROL64(b02, b02, 3); \
|
|
1023
|
+
ROL64(b03, b03, 41); \
|
|
1024
|
+
ROL64(b04, b04, 18); \
|
|
1025
|
+
ROL64(b10, b10, 1); \
|
|
1026
|
+
ROL64(b11, b11, 44); \
|
|
1027
|
+
ROL64(b12, b12, 10); \
|
|
1028
|
+
ROL64(b13, b13, 45); \
|
|
1029
|
+
ROL64(b14, b14, 2); \
|
|
1030
|
+
ROL64(b20, b20, 62); \
|
|
1031
|
+
ROL64(b21, b21, 6); \
|
|
1032
|
+
ROL64(b22, b22, 43); \
|
|
1033
|
+
ROL64(b23, b23, 15); \
|
|
1034
|
+
ROL64(b24, b24, 61); \
|
|
1035
|
+
ROL64(b30, b30, 28); \
|
|
1036
|
+
ROL64(b31, b31, 55); \
|
|
1037
|
+
ROL64(b32, b32, 25); \
|
|
1038
|
+
ROL64(b33, b33, 21); \
|
|
1039
|
+
ROL64(b34, b34, 56); \
|
|
1040
|
+
ROL64(b40, b40, 27); \
|
|
1041
|
+
ROL64(b41, b41, 20); \
|
|
1042
|
+
ROL64(b42, b42, 39); \
|
|
1043
|
+
ROL64(b43, b43, 8); \
|
|
1044
|
+
ROL64(b44, b44, 14); \
|
|
1045
|
+
} while (0)
|
|
1046
|
+
|
|
1047
|
+
/*
|
|
1048
|
+
* The KHI macro integrates the "lane complement" optimization. On input,
|
|
1049
|
+
* some words are complemented:
|
|
1050
|
+
* a00 a01 a02 a04 a13 a20 a21 a22 a30 a33 a34 a43
|
|
1051
|
+
* On output, the following words are complemented:
|
|
1052
|
+
* a04 a10 a20 a22 a23 a31
|
|
1053
|
+
*
|
|
1054
|
+
* The (implicit) permutation and the theta expansion will bring back
|
|
1055
|
+
* the input mask for the next round.
|
|
1056
|
+
*/
|
|
1057
|
+
|
|
1058
|
+
#define KHI_XO(d, a, b, c) do { \
|
|
1059
|
+
DECL64(kt); \
|
|
1060
|
+
OR64(kt, b, c); \
|
|
1061
|
+
XOR64(d, a, kt); \
|
|
1062
|
+
} while (0)
|
|
1063
|
+
|
|
1064
|
+
#define KHI_XA(d, a, b, c) do { \
|
|
1065
|
+
DECL64(kt); \
|
|
1066
|
+
AND64(kt, b, c); \
|
|
1067
|
+
XOR64(d, a, kt); \
|
|
1068
|
+
} while (0)
|
|
1069
|
+
|
|
1070
|
+
#define KHI(b00, b01, b02, b03, b04, b10, b11, b12, b13, b14, \
|
|
1071
|
+
b20, b21, b22, b23, b24, b30, b31, b32, b33, b34, \
|
|
1072
|
+
b40, b41, b42, b43, b44) \
|
|
1073
|
+
do { \
|
|
1074
|
+
DECL64(c0); \
|
|
1075
|
+
DECL64(c1); \
|
|
1076
|
+
DECL64(c2); \
|
|
1077
|
+
DECL64(c3); \
|
|
1078
|
+
DECL64(c4); \
|
|
1079
|
+
DECL64(bnn); \
|
|
1080
|
+
NOT64(bnn, b20); \
|
|
1081
|
+
KHI_XO(c0, b00, b10, b20); \
|
|
1082
|
+
KHI_XO(c1, b10, bnn, b30); \
|
|
1083
|
+
KHI_XA(c2, b20, b30, b40); \
|
|
1084
|
+
KHI_XO(c3, b30, b40, b00); \
|
|
1085
|
+
KHI_XA(c4, b40, b00, b10); \
|
|
1086
|
+
MOV64(b00, c0); \
|
|
1087
|
+
MOV64(b10, c1); \
|
|
1088
|
+
MOV64(b20, c2); \
|
|
1089
|
+
MOV64(b30, c3); \
|
|
1090
|
+
MOV64(b40, c4); \
|
|
1091
|
+
NOT64(bnn, b41); \
|
|
1092
|
+
KHI_XO(c0, b01, b11, b21); \
|
|
1093
|
+
KHI_XA(c1, b11, b21, b31); \
|
|
1094
|
+
KHI_XO(c2, b21, b31, bnn); \
|
|
1095
|
+
KHI_XO(c3, b31, b41, b01); \
|
|
1096
|
+
KHI_XA(c4, b41, b01, b11); \
|
|
1097
|
+
MOV64(b01, c0); \
|
|
1098
|
+
MOV64(b11, c1); \
|
|
1099
|
+
MOV64(b21, c2); \
|
|
1100
|
+
MOV64(b31, c3); \
|
|
1101
|
+
MOV64(b41, c4); \
|
|
1102
|
+
NOT64(bnn, b32); \
|
|
1103
|
+
KHI_XO(c0, b02, b12, b22); \
|
|
1104
|
+
KHI_XA(c1, b12, b22, b32); \
|
|
1105
|
+
KHI_XA(c2, b22, bnn, b42); \
|
|
1106
|
+
KHI_XO(c3, bnn, b42, b02); \
|
|
1107
|
+
KHI_XA(c4, b42, b02, b12); \
|
|
1108
|
+
MOV64(b02, c0); \
|
|
1109
|
+
MOV64(b12, c1); \
|
|
1110
|
+
MOV64(b22, c2); \
|
|
1111
|
+
MOV64(b32, c3); \
|
|
1112
|
+
MOV64(b42, c4); \
|
|
1113
|
+
NOT64(bnn, b33); \
|
|
1114
|
+
KHI_XA(c0, b03, b13, b23); \
|
|
1115
|
+
KHI_XO(c1, b13, b23, b33); \
|
|
1116
|
+
KHI_XO(c2, b23, bnn, b43); \
|
|
1117
|
+
KHI_XA(c3, bnn, b43, b03); \
|
|
1118
|
+
KHI_XO(c4, b43, b03, b13); \
|
|
1119
|
+
MOV64(b03, c0); \
|
|
1120
|
+
MOV64(b13, c1); \
|
|
1121
|
+
MOV64(b23, c2); \
|
|
1122
|
+
MOV64(b33, c3); \
|
|
1123
|
+
MOV64(b43, c4); \
|
|
1124
|
+
NOT64(bnn, b14); \
|
|
1125
|
+
KHI_XA(c0, b04, bnn, b24); \
|
|
1126
|
+
KHI_XO(c1, bnn, b24, b34); \
|
|
1127
|
+
KHI_XA(c2, b24, b34, b44); \
|
|
1128
|
+
KHI_XO(c3, b34, b44, b04); \
|
|
1129
|
+
KHI_XA(c4, b44, b04, b14); \
|
|
1130
|
+
MOV64(b04, c0); \
|
|
1131
|
+
MOV64(b14, c1); \
|
|
1132
|
+
MOV64(b24, c2); \
|
|
1133
|
+
MOV64(b34, c3); \
|
|
1134
|
+
MOV64(b44, c4); \
|
|
1135
|
+
} while (0)
|
|
1136
|
+
|
|
1137
|
+
#define IOTA(r) XOR64_IOTA(a00, a00, r)
|
|
1138
|
+
|
|
1139
|
+
#define P0 a00, a01, a02, a03, a04, a10, a11, a12, a13, a14, a20, a21, \
|
|
1140
|
+
a22, a23, a24, a30, a31, a32, a33, a34, a40, a41, a42, a43, a44
|
|
1141
|
+
#define P1 a00, a30, a10, a40, a20, a11, a41, a21, a01, a31, a22, a02, \
|
|
1142
|
+
a32, a12, a42, a33, a13, a43, a23, a03, a44, a24, a04, a34, a14
|
|
1143
|
+
#define P2 a00, a33, a11, a44, a22, a41, a24, a02, a30, a13, a32, a10, \
|
|
1144
|
+
a43, a21, a04, a23, a01, a34, a12, a40, a14, a42, a20, a03, a31
|
|
1145
|
+
#define P3 a00, a23, a41, a14, a32, a24, a42, a10, a33, a01, a43, a11, \
|
|
1146
|
+
a34, a02, a20, a12, a30, a03, a21, a44, a31, a04, a22, a40, a13
|
|
1147
|
+
#define P4 a00, a12, a24, a31, a43, a42, a04, a11, a23, a30, a34, a41, \
|
|
1148
|
+
a03, a10, a22, a21, a33, a40, a02, a14, a13, a20, a32, a44, a01
|
|
1149
|
+
#define P5 a00, a21, a42, a13, a34, a04, a20, a41, a12, a33, a03, a24, \
|
|
1150
|
+
a40, a11, a32, a02, a23, a44, a10, a31, a01, a22, a43, a14, a30
|
|
1151
|
+
#define P6 a00, a02, a04, a01, a03, a20, a22, a24, a21, a23, a40, a42, \
|
|
1152
|
+
a44, a41, a43, a10, a12, a14, a11, a13, a30, a32, a34, a31, a33
|
|
1153
|
+
#define P7 a00, a10, a20, a30, a40, a22, a32, a42, a02, a12, a44, a04, \
|
|
1154
|
+
a14, a24, a34, a11, a21, a31, a41, a01, a33, a43, a03, a13, a23
|
|
1155
|
+
#define P8 a00, a11, a22, a33, a44, a32, a43, a04, a10, a21, a14, a20, \
|
|
1156
|
+
a31, a42, a03, a41, a02, a13, a24, a30, a23, a34, a40, a01, a12
|
|
1157
|
+
#define P9 a00, a41, a32, a23, a14, a43, a34, a20, a11, a02, a31, a22, \
|
|
1158
|
+
a13, a04, a40, a24, a10, a01, a42, a33, a12, a03, a44, a30, a21
|
|
1159
|
+
#define P10 a00, a24, a43, a12, a31, a34, a03, a22, a41, a10, a13, a32, \
|
|
1160
|
+
a01, a20, a44, a42, a11, a30, a04, a23, a21, a40, a14, a33, a02
|
|
1161
|
+
#define P11 a00, a42, a34, a21, a13, a03, a40, a32, a24, a11, a01, a43, \
|
|
1162
|
+
a30, a22, a14, a04, a41, a33, a20, a12, a02, a44, a31, a23, a10
|
|
1163
|
+
#define P12 a00, a04, a03, a02, a01, a40, a44, a43, a42, a41, a30, a34, \
|
|
1164
|
+
a33, a32, a31, a20, a24, a23, a22, a21, a10, a14, a13, a12, a11
|
|
1165
|
+
#define P13 a00, a20, a40, a10, a30, a44, a14, a34, a04, a24, a33, a03, \
|
|
1166
|
+
a23, a43, a13, a22, a42, a12, a32, a02, a11, a31, a01, a21, a41
|
|
1167
|
+
#define P14 a00, a22, a44, a11, a33, a14, a31, a03, a20, a42, a23, a40, \
|
|
1168
|
+
a12, a34, a01, a32, a04, a21, a43, a10, a41, a13, a30, a02, a24
|
|
1169
|
+
#define P15 a00, a32, a14, a41, a23, a31, a13, a40, a22, a04, a12, a44, \
|
|
1170
|
+
a21, a03, a30, a43, a20, a02, a34, a11, a24, a01, a33, a10, a42
|
|
1171
|
+
#define P16 a00, a43, a31, a24, a12, a13, a01, a44, a32, a20, a21, a14, \
|
|
1172
|
+
a02, a40, a33, a34, a22, a10, a03, a41, a42, a30, a23, a11, a04
|
|
1173
|
+
#define P17 a00, a34, a13, a42, a21, a01, a30, a14, a43, a22, a02, a31, \
|
|
1174
|
+
a10, a44, a23, a03, a32, a11, a40, a24, a04, a33, a12, a41, a20
|
|
1175
|
+
#define P18 a00, a03, a01, a04, a02, a30, a33, a31, a34, a32, a10, a13, \
|
|
1176
|
+
a11, a14, a12, a40, a43, a41, a44, a42, a20, a23, a21, a24, a22
|
|
1177
|
+
#define P19 a00, a40, a30, a20, a10, a33, a23, a13, a03, a43, a11, a01, \
|
|
1178
|
+
a41, a31, a21, a44, a34, a24, a14, a04, a22, a12, a02, a42, a32
|
|
1179
|
+
#define P20 a00, a44, a33, a22, a11, a23, a12, a01, a40, a34, a41, a30, \
|
|
1180
|
+
a24, a13, a02, a14, a03, a42, a31, a20, a32, a21, a10, a04, a43
|
|
1181
|
+
#define P21 a00, a14, a23, a32, a41, a12, a21, a30, a44, a03, a24, a33, \
|
|
1182
|
+
a42, a01, a10, a31, a40, a04, a13, a22, a43, a02, a11, a20, a34
|
|
1183
|
+
#define P22 a00, a31, a12, a43, a24, a21, a02, a33, a14, a40, a42, a23, \
|
|
1184
|
+
a04, a30, a11, a13, a44, a20, a01, a32, a34, a10, a41, a22, a03
|
|
1185
|
+
#define P23 a00, a13, a21, a34, a42, a02, a10, a23, a31, a44, a04, a12, \
|
|
1186
|
+
a20, a33, a41, a01, a14, a22, a30, a43, a03, a11, a24, a32, a40
|
|
1187
|
+
|
|
1188
|
+
#define P1_TO_P0 do { \
|
|
1189
|
+
DECL64(t); \
|
|
1190
|
+
MOV64(t, a01); \
|
|
1191
|
+
MOV64(a01, a30); \
|
|
1192
|
+
MOV64(a30, a33); \
|
|
1193
|
+
MOV64(a33, a23); \
|
|
1194
|
+
MOV64(a23, a12); \
|
|
1195
|
+
MOV64(a12, a21); \
|
|
1196
|
+
MOV64(a21, a02); \
|
|
1197
|
+
MOV64(a02, a10); \
|
|
1198
|
+
MOV64(a10, a11); \
|
|
1199
|
+
MOV64(a11, a41); \
|
|
1200
|
+
MOV64(a41, a24); \
|
|
1201
|
+
MOV64(a24, a42); \
|
|
1202
|
+
MOV64(a42, a04); \
|
|
1203
|
+
MOV64(a04, a20); \
|
|
1204
|
+
MOV64(a20, a22); \
|
|
1205
|
+
MOV64(a22, a32); \
|
|
1206
|
+
MOV64(a32, a43); \
|
|
1207
|
+
MOV64(a43, a34); \
|
|
1208
|
+
MOV64(a34, a03); \
|
|
1209
|
+
MOV64(a03, a40); \
|
|
1210
|
+
MOV64(a40, a44); \
|
|
1211
|
+
MOV64(a44, a14); \
|
|
1212
|
+
MOV64(a14, a31); \
|
|
1213
|
+
MOV64(a31, a13); \
|
|
1214
|
+
MOV64(a13, t); \
|
|
1215
|
+
} while (0)
|
|
1216
|
+
|
|
1217
|
+
#define P2_TO_P0 do { \
|
|
1218
|
+
DECL64(t); \
|
|
1219
|
+
MOV64(t, a01); \
|
|
1220
|
+
MOV64(a01, a33); \
|
|
1221
|
+
MOV64(a33, a12); \
|
|
1222
|
+
MOV64(a12, a02); \
|
|
1223
|
+
MOV64(a02, a11); \
|
|
1224
|
+
MOV64(a11, a24); \
|
|
1225
|
+
MOV64(a24, a04); \
|
|
1226
|
+
MOV64(a04, a22); \
|
|
1227
|
+
MOV64(a22, a43); \
|
|
1228
|
+
MOV64(a43, a03); \
|
|
1229
|
+
MOV64(a03, a44); \
|
|
1230
|
+
MOV64(a44, a31); \
|
|
1231
|
+
MOV64(a31, t); \
|
|
1232
|
+
MOV64(t, a10); \
|
|
1233
|
+
MOV64(a10, a41); \
|
|
1234
|
+
MOV64(a41, a42); \
|
|
1235
|
+
MOV64(a42, a20); \
|
|
1236
|
+
MOV64(a20, a32); \
|
|
1237
|
+
MOV64(a32, a34); \
|
|
1238
|
+
MOV64(a34, a40); \
|
|
1239
|
+
MOV64(a40, a14); \
|
|
1240
|
+
MOV64(a14, a13); \
|
|
1241
|
+
MOV64(a13, a30); \
|
|
1242
|
+
MOV64(a30, a23); \
|
|
1243
|
+
MOV64(a23, a21); \
|
|
1244
|
+
MOV64(a21, t); \
|
|
1245
|
+
} while (0)
|
|
1246
|
+
|
|
1247
|
+
#define P4_TO_P0 do { \
|
|
1248
|
+
DECL64(t); \
|
|
1249
|
+
MOV64(t, a01); \
|
|
1250
|
+
MOV64(a01, a12); \
|
|
1251
|
+
MOV64(a12, a11); \
|
|
1252
|
+
MOV64(a11, a04); \
|
|
1253
|
+
MOV64(a04, a43); \
|
|
1254
|
+
MOV64(a43, a44); \
|
|
1255
|
+
MOV64(a44, t); \
|
|
1256
|
+
MOV64(t, a02); \
|
|
1257
|
+
MOV64(a02, a24); \
|
|
1258
|
+
MOV64(a24, a22); \
|
|
1259
|
+
MOV64(a22, a03); \
|
|
1260
|
+
MOV64(a03, a31); \
|
|
1261
|
+
MOV64(a31, a33); \
|
|
1262
|
+
MOV64(a33, t); \
|
|
1263
|
+
MOV64(t, a10); \
|
|
1264
|
+
MOV64(a10, a42); \
|
|
1265
|
+
MOV64(a42, a32); \
|
|
1266
|
+
MOV64(a32, a40); \
|
|
1267
|
+
MOV64(a40, a13); \
|
|
1268
|
+
MOV64(a13, a23); \
|
|
1269
|
+
MOV64(a23, t); \
|
|
1270
|
+
MOV64(t, a14); \
|
|
1271
|
+
MOV64(a14, a30); \
|
|
1272
|
+
MOV64(a30, a21); \
|
|
1273
|
+
MOV64(a21, a41); \
|
|
1274
|
+
MOV64(a41, a20); \
|
|
1275
|
+
MOV64(a20, a34); \
|
|
1276
|
+
MOV64(a34, t); \
|
|
1277
|
+
} while (0)
|
|
1278
|
+
|
|
1279
|
+
#define P6_TO_P0 do { \
|
|
1280
|
+
DECL64(t); \
|
|
1281
|
+
MOV64(t, a01); \
|
|
1282
|
+
MOV64(a01, a02); \
|
|
1283
|
+
MOV64(a02, a04); \
|
|
1284
|
+
MOV64(a04, a03); \
|
|
1285
|
+
MOV64(a03, t); \
|
|
1286
|
+
MOV64(t, a10); \
|
|
1287
|
+
MOV64(a10, a20); \
|
|
1288
|
+
MOV64(a20, a40); \
|
|
1289
|
+
MOV64(a40, a30); \
|
|
1290
|
+
MOV64(a30, t); \
|
|
1291
|
+
MOV64(t, a11); \
|
|
1292
|
+
MOV64(a11, a22); \
|
|
1293
|
+
MOV64(a22, a44); \
|
|
1294
|
+
MOV64(a44, a33); \
|
|
1295
|
+
MOV64(a33, t); \
|
|
1296
|
+
MOV64(t, a12); \
|
|
1297
|
+
MOV64(a12, a24); \
|
|
1298
|
+
MOV64(a24, a43); \
|
|
1299
|
+
MOV64(a43, a31); \
|
|
1300
|
+
MOV64(a31, t); \
|
|
1301
|
+
MOV64(t, a13); \
|
|
1302
|
+
MOV64(a13, a21); \
|
|
1303
|
+
MOV64(a21, a42); \
|
|
1304
|
+
MOV64(a42, a34); \
|
|
1305
|
+
MOV64(a34, t); \
|
|
1306
|
+
MOV64(t, a14); \
|
|
1307
|
+
MOV64(a14, a23); \
|
|
1308
|
+
MOV64(a23, a41); \
|
|
1309
|
+
MOV64(a41, a32); \
|
|
1310
|
+
MOV64(a32, t); \
|
|
1311
|
+
} while (0)
|
|
1312
|
+
|
|
1313
|
+
#define P8_TO_P0 do { \
|
|
1314
|
+
DECL64(t); \
|
|
1315
|
+
MOV64(t, a01); \
|
|
1316
|
+
MOV64(a01, a11); \
|
|
1317
|
+
MOV64(a11, a43); \
|
|
1318
|
+
MOV64(a43, t); \
|
|
1319
|
+
MOV64(t, a02); \
|
|
1320
|
+
MOV64(a02, a22); \
|
|
1321
|
+
MOV64(a22, a31); \
|
|
1322
|
+
MOV64(a31, t); \
|
|
1323
|
+
MOV64(t, a03); \
|
|
1324
|
+
MOV64(a03, a33); \
|
|
1325
|
+
MOV64(a33, a24); \
|
|
1326
|
+
MOV64(a24, t); \
|
|
1327
|
+
MOV64(t, a04); \
|
|
1328
|
+
MOV64(a04, a44); \
|
|
1329
|
+
MOV64(a44, a12); \
|
|
1330
|
+
MOV64(a12, t); \
|
|
1331
|
+
MOV64(t, a10); \
|
|
1332
|
+
MOV64(a10, a32); \
|
|
1333
|
+
MOV64(a32, a13); \
|
|
1334
|
+
MOV64(a13, t); \
|
|
1335
|
+
MOV64(t, a14); \
|
|
1336
|
+
MOV64(a14, a21); \
|
|
1337
|
+
MOV64(a21, a20); \
|
|
1338
|
+
MOV64(a20, t); \
|
|
1339
|
+
MOV64(t, a23); \
|
|
1340
|
+
MOV64(a23, a42); \
|
|
1341
|
+
MOV64(a42, a40); \
|
|
1342
|
+
MOV64(a40, t); \
|
|
1343
|
+
MOV64(t, a30); \
|
|
1344
|
+
MOV64(a30, a41); \
|
|
1345
|
+
MOV64(a41, a34); \
|
|
1346
|
+
MOV64(a34, t); \
|
|
1347
|
+
} while (0)
|
|
1348
|
+
|
|
1349
|
+
#define P12_TO_P0 do { \
|
|
1350
|
+
DECL64(t); \
|
|
1351
|
+
MOV64(t, a01); \
|
|
1352
|
+
MOV64(a01, a04); \
|
|
1353
|
+
MOV64(a04, t); \
|
|
1354
|
+
MOV64(t, a02); \
|
|
1355
|
+
MOV64(a02, a03); \
|
|
1356
|
+
MOV64(a03, t); \
|
|
1357
|
+
MOV64(t, a10); \
|
|
1358
|
+
MOV64(a10, a40); \
|
|
1359
|
+
MOV64(a40, t); \
|
|
1360
|
+
MOV64(t, a11); \
|
|
1361
|
+
MOV64(a11, a44); \
|
|
1362
|
+
MOV64(a44, t); \
|
|
1363
|
+
MOV64(t, a12); \
|
|
1364
|
+
MOV64(a12, a43); \
|
|
1365
|
+
MOV64(a43, t); \
|
|
1366
|
+
MOV64(t, a13); \
|
|
1367
|
+
MOV64(a13, a42); \
|
|
1368
|
+
MOV64(a42, t); \
|
|
1369
|
+
MOV64(t, a14); \
|
|
1370
|
+
MOV64(a14, a41); \
|
|
1371
|
+
MOV64(a41, t); \
|
|
1372
|
+
MOV64(t, a20); \
|
|
1373
|
+
MOV64(a20, a30); \
|
|
1374
|
+
MOV64(a30, t); \
|
|
1375
|
+
MOV64(t, a21); \
|
|
1376
|
+
MOV64(a21, a34); \
|
|
1377
|
+
MOV64(a34, t); \
|
|
1378
|
+
MOV64(t, a22); \
|
|
1379
|
+
MOV64(a22, a33); \
|
|
1380
|
+
MOV64(a33, t); \
|
|
1381
|
+
MOV64(t, a23); \
|
|
1382
|
+
MOV64(a23, a32); \
|
|
1383
|
+
MOV64(a32, t); \
|
|
1384
|
+
MOV64(t, a24); \
|
|
1385
|
+
MOV64(a24, a31); \
|
|
1386
|
+
MOV64(a31, t); \
|
|
1387
|
+
} while (0)
|
|
1388
|
+
|
|
1389
|
+
#define LPAR (
|
|
1390
|
+
#define RPAR )
|
|
1391
|
+
|
|
1392
|
+
#define KF_ELT(r, s, k) do { \
|
|
1393
|
+
THETA LPAR P ## r RPAR; \
|
|
1394
|
+
RHO LPAR P ## r RPAR; \
|
|
1395
|
+
KHI LPAR P ## s RPAR; \
|
|
1396
|
+
IOTA(k); \
|
|
1397
|
+
} while (0)
|
|
1398
|
+
|
|
1399
|
+
#define DO(x) x
|
|
1400
|
+
|
|
1401
|
+
#define KECCAK_F_1600 DO(KECCAK_F_1600_)
|
|
1402
|
+
|
|
1403
|
+
#if SPH_KECCAK_UNROLL == 1
|
|
1404
|
+
|
|
1405
|
+
#define KECCAK_F_1600_ do { \
|
|
1406
|
+
int j; \
|
|
1407
|
+
for (j = 0; j < 24; j ++) { \
|
|
1408
|
+
KF_ELT( 0, 1, RC[j + 0]); \
|
|
1409
|
+
P1_TO_P0; \
|
|
1410
|
+
} \
|
|
1411
|
+
} while (0)
|
|
1412
|
+
|
|
1413
|
+
#elif SPH_KECCAK_UNROLL == 2
|
|
1414
|
+
|
|
1415
|
+
#define KECCAK_F_1600_ do { \
|
|
1416
|
+
int j; \
|
|
1417
|
+
for (j = 0; j < 24; j += 2) { \
|
|
1418
|
+
KF_ELT( 0, 1, RC[j + 0]); \
|
|
1419
|
+
KF_ELT( 1, 2, RC[j + 1]); \
|
|
1420
|
+
P2_TO_P0; \
|
|
1421
|
+
} \
|
|
1422
|
+
} while (0)
|
|
1423
|
+
|
|
1424
|
+
#elif SPH_KECCAK_UNROLL == 4
|
|
1425
|
+
|
|
1426
|
+
#define KECCAK_F_1600_ do { \
|
|
1427
|
+
int j; \
|
|
1428
|
+
for (j = 0; j < 24; j += 4) { \
|
|
1429
|
+
KF_ELT( 0, 1, RC[j + 0]); \
|
|
1430
|
+
KF_ELT( 1, 2, RC[j + 1]); \
|
|
1431
|
+
KF_ELT( 2, 3, RC[j + 2]); \
|
|
1432
|
+
KF_ELT( 3, 4, RC[j + 3]); \
|
|
1433
|
+
P4_TO_P0; \
|
|
1434
|
+
} \
|
|
1435
|
+
} while (0)
|
|
1436
|
+
|
|
1437
|
+
#elif SPH_KECCAK_UNROLL == 6
|
|
1438
|
+
|
|
1439
|
+
#define KECCAK_F_1600_ do { \
|
|
1440
|
+
int j; \
|
|
1441
|
+
for (j = 0; j < 24; j += 6) { \
|
|
1442
|
+
KF_ELT( 0, 1, RC[j + 0]); \
|
|
1443
|
+
KF_ELT( 1, 2, RC[j + 1]); \
|
|
1444
|
+
KF_ELT( 2, 3, RC[j + 2]); \
|
|
1445
|
+
KF_ELT( 3, 4, RC[j + 3]); \
|
|
1446
|
+
KF_ELT( 4, 5, RC[j + 4]); \
|
|
1447
|
+
KF_ELT( 5, 6, RC[j + 5]); \
|
|
1448
|
+
P6_TO_P0; \
|
|
1449
|
+
} \
|
|
1450
|
+
} while (0)
|
|
1451
|
+
|
|
1452
|
+
#elif SPH_KECCAK_UNROLL == 8
|
|
1453
|
+
|
|
1454
|
+
#define KECCAK_F_1600_ do { \
|
|
1455
|
+
int j; \
|
|
1456
|
+
for (j = 0; j < 24; j += 8) { \
|
|
1457
|
+
KF_ELT( 0, 1, RC[j + 0]); \
|
|
1458
|
+
KF_ELT( 1, 2, RC[j + 1]); \
|
|
1459
|
+
KF_ELT( 2, 3, RC[j + 2]); \
|
|
1460
|
+
KF_ELT( 3, 4, RC[j + 3]); \
|
|
1461
|
+
KF_ELT( 4, 5, RC[j + 4]); \
|
|
1462
|
+
KF_ELT( 5, 6, RC[j + 5]); \
|
|
1463
|
+
KF_ELT( 6, 7, RC[j + 6]); \
|
|
1464
|
+
KF_ELT( 7, 8, RC[j + 7]); \
|
|
1465
|
+
P8_TO_P0; \
|
|
1466
|
+
} \
|
|
1467
|
+
} while (0)
|
|
1468
|
+
|
|
1469
|
+
#elif SPH_KECCAK_UNROLL == 12
|
|
1470
|
+
|
|
1471
|
+
#define KECCAK_F_1600_ do { \
|
|
1472
|
+
int j; \
|
|
1473
|
+
for (j = 0; j < 24; j += 12) { \
|
|
1474
|
+
KF_ELT( 0, 1, RC[j + 0]); \
|
|
1475
|
+
KF_ELT( 1, 2, RC[j + 1]); \
|
|
1476
|
+
KF_ELT( 2, 3, RC[j + 2]); \
|
|
1477
|
+
KF_ELT( 3, 4, RC[j + 3]); \
|
|
1478
|
+
KF_ELT( 4, 5, RC[j + 4]); \
|
|
1479
|
+
KF_ELT( 5, 6, RC[j + 5]); \
|
|
1480
|
+
KF_ELT( 6, 7, RC[j + 6]); \
|
|
1481
|
+
KF_ELT( 7, 8, RC[j + 7]); \
|
|
1482
|
+
KF_ELT( 8, 9, RC[j + 8]); \
|
|
1483
|
+
KF_ELT( 9, 10, RC[j + 9]); \
|
|
1484
|
+
KF_ELT(10, 11, RC[j + 10]); \
|
|
1485
|
+
KF_ELT(11, 12, RC[j + 11]); \
|
|
1486
|
+
P12_TO_P0; \
|
|
1487
|
+
} \
|
|
1488
|
+
} while (0)
|
|
1489
|
+
|
|
1490
|
+
#elif SPH_KECCAK_UNROLL == 0
|
|
1491
|
+
|
|
1492
|
+
#define KECCAK_F_1600_ do { \
|
|
1493
|
+
KF_ELT( 0, 1, RC[ 0]); \
|
|
1494
|
+
KF_ELT( 1, 2, RC[ 1]); \
|
|
1495
|
+
KF_ELT( 2, 3, RC[ 2]); \
|
|
1496
|
+
KF_ELT( 3, 4, RC[ 3]); \
|
|
1497
|
+
KF_ELT( 4, 5, RC[ 4]); \
|
|
1498
|
+
KF_ELT( 5, 6, RC[ 5]); \
|
|
1499
|
+
KF_ELT( 6, 7, RC[ 6]); \
|
|
1500
|
+
KF_ELT( 7, 8, RC[ 7]); \
|
|
1501
|
+
KF_ELT( 8, 9, RC[ 8]); \
|
|
1502
|
+
KF_ELT( 9, 10, RC[ 9]); \
|
|
1503
|
+
KF_ELT(10, 11, RC[10]); \
|
|
1504
|
+
KF_ELT(11, 12, RC[11]); \
|
|
1505
|
+
KF_ELT(12, 13, RC[12]); \
|
|
1506
|
+
KF_ELT(13, 14, RC[13]); \
|
|
1507
|
+
KF_ELT(14, 15, RC[14]); \
|
|
1508
|
+
KF_ELT(15, 16, RC[15]); \
|
|
1509
|
+
KF_ELT(16, 17, RC[16]); \
|
|
1510
|
+
KF_ELT(17, 18, RC[17]); \
|
|
1511
|
+
KF_ELT(18, 19, RC[18]); \
|
|
1512
|
+
KF_ELT(19, 20, RC[19]); \
|
|
1513
|
+
KF_ELT(20, 21, RC[20]); \
|
|
1514
|
+
KF_ELT(21, 22, RC[21]); \
|
|
1515
|
+
KF_ELT(22, 23, RC[22]); \
|
|
1516
|
+
KF_ELT(23, 0, RC[23]); \
|
|
1517
|
+
} while (0)
|
|
1518
|
+
|
|
1519
|
+
#else
|
|
1520
|
+
|
|
1521
|
+
#error Unimplemented unroll count for Keccak.
|
|
1522
|
+
|
|
1523
|
+
#endif
|
|
1524
|
+
|
|
1525
|
+
static void
|
|
1526
|
+
keccak_init(sph_keccak_context *kc, unsigned out_size)
|
|
1527
|
+
{
|
|
1528
|
+
int i;
|
|
1529
|
+
|
|
1530
|
+
#if SPH_KECCAK_64
|
|
1531
|
+
for (i = 0; i < 25; i ++)
|
|
1532
|
+
kc->u.wide[i] = 0;
|
|
1533
|
+
/*
|
|
1534
|
+
* Initialization for the "lane complement".
|
|
1535
|
+
*/
|
|
1536
|
+
kc->u.wide[ 1] = SPH_C64(0xFFFFFFFFFFFFFFFF);
|
|
1537
|
+
kc->u.wide[ 2] = SPH_C64(0xFFFFFFFFFFFFFFFF);
|
|
1538
|
+
kc->u.wide[ 8] = SPH_C64(0xFFFFFFFFFFFFFFFF);
|
|
1539
|
+
kc->u.wide[12] = SPH_C64(0xFFFFFFFFFFFFFFFF);
|
|
1540
|
+
kc->u.wide[17] = SPH_C64(0xFFFFFFFFFFFFFFFF);
|
|
1541
|
+
kc->u.wide[20] = SPH_C64(0xFFFFFFFFFFFFFFFF);
|
|
1542
|
+
#else
|
|
1543
|
+
|
|
1544
|
+
for (i = 0; i < 50; i ++)
|
|
1545
|
+
kc->u.narrow[i] = 0;
|
|
1546
|
+
/*
|
|
1547
|
+
* Initialization for the "lane complement".
|
|
1548
|
+
* Note: since we set to all-one full 64-bit words,
|
|
1549
|
+
* interleaving (if applicable) is a no-op.
|
|
1550
|
+
*/
|
|
1551
|
+
kc->u.narrow[ 2] = SPH_C32(0xFFFFFFFF);
|
|
1552
|
+
kc->u.narrow[ 3] = SPH_C32(0xFFFFFFFF);
|
|
1553
|
+
kc->u.narrow[ 4] = SPH_C32(0xFFFFFFFF);
|
|
1554
|
+
kc->u.narrow[ 5] = SPH_C32(0xFFFFFFFF);
|
|
1555
|
+
kc->u.narrow[16] = SPH_C32(0xFFFFFFFF);
|
|
1556
|
+
kc->u.narrow[17] = SPH_C32(0xFFFFFFFF);
|
|
1557
|
+
kc->u.narrow[24] = SPH_C32(0xFFFFFFFF);
|
|
1558
|
+
kc->u.narrow[25] = SPH_C32(0xFFFFFFFF);
|
|
1559
|
+
kc->u.narrow[34] = SPH_C32(0xFFFFFFFF);
|
|
1560
|
+
kc->u.narrow[35] = SPH_C32(0xFFFFFFFF);
|
|
1561
|
+
kc->u.narrow[40] = SPH_C32(0xFFFFFFFF);
|
|
1562
|
+
kc->u.narrow[41] = SPH_C32(0xFFFFFFFF);
|
|
1563
|
+
#endif
|
|
1564
|
+
kc->ptr = 0;
|
|
1565
|
+
kc->lim = 200 - (out_size >> 2);
|
|
1566
|
+
}
|
|
1567
|
+
|
|
1568
|
+
static void
|
|
1569
|
+
keccak_core(sph_keccak_context *kc, const void *data, size_t len, size_t lim)
|
|
1570
|
+
{
|
|
1571
|
+
unsigned char *buf;
|
|
1572
|
+
size_t ptr;
|
|
1573
|
+
DECL_STATE
|
|
1574
|
+
|
|
1575
|
+
buf = kc->buf;
|
|
1576
|
+
ptr = kc->ptr;
|
|
1577
|
+
|
|
1578
|
+
if (len < (lim - ptr)) {
|
|
1579
|
+
memcpy(buf + ptr, data, len);
|
|
1580
|
+
kc->ptr = ptr + len;
|
|
1581
|
+
return;
|
|
1582
|
+
}
|
|
1583
|
+
|
|
1584
|
+
READ_STATE(kc);
|
|
1585
|
+
while (len > 0) {
|
|
1586
|
+
size_t clen;
|
|
1587
|
+
|
|
1588
|
+
clen = (lim - ptr);
|
|
1589
|
+
if (clen > len)
|
|
1590
|
+
clen = len;
|
|
1591
|
+
memcpy(buf + ptr, data, clen);
|
|
1592
|
+
ptr += clen;
|
|
1593
|
+
data = (const unsigned char *)data + clen;
|
|
1594
|
+
len -= clen;
|
|
1595
|
+
if (ptr == lim) {
|
|
1596
|
+
INPUT_BUF(lim);
|
|
1597
|
+
KECCAK_F_1600;
|
|
1598
|
+
ptr = 0;
|
|
1599
|
+
}
|
|
1600
|
+
}
|
|
1601
|
+
WRITE_STATE(kc);
|
|
1602
|
+
kc->ptr = ptr;
|
|
1603
|
+
}
|
|
1604
|
+
|
|
1605
|
+
#if SPH_KECCAK_64
|
|
1606
|
+
|
|
1607
|
+
#define DEFCLOSE(d, lim) \
|
|
1608
|
+
static void keccak_close ## d( \
|
|
1609
|
+
sph_keccak_context *kc, unsigned ub, unsigned n, void *dst) \
|
|
1610
|
+
{ \
|
|
1611
|
+
unsigned eb; \
|
|
1612
|
+
union { \
|
|
1613
|
+
unsigned char tmp[lim + 1]; \
|
|
1614
|
+
sph_u64 dummy; /* for alignment */ \
|
|
1615
|
+
} u; \
|
|
1616
|
+
size_t j; \
|
|
1617
|
+
\
|
|
1618
|
+
eb = (0x100 | (ub & 0xFF)) >> (8 - n); \
|
|
1619
|
+
if (kc->ptr == (lim - 1)) { \
|
|
1620
|
+
if (n == 7) { \
|
|
1621
|
+
u.tmp[0] = eb; \
|
|
1622
|
+
memset(u.tmp + 1, 0, lim - 1); \
|
|
1623
|
+
u.tmp[lim] = 0x80; \
|
|
1624
|
+
j = 1 + lim; \
|
|
1625
|
+
} else { \
|
|
1626
|
+
u.tmp[0] = eb | 0x80; \
|
|
1627
|
+
j = 1; \
|
|
1628
|
+
} \
|
|
1629
|
+
} else { \
|
|
1630
|
+
j = lim - kc->ptr; \
|
|
1631
|
+
u.tmp[0] = eb; \
|
|
1632
|
+
memset(u.tmp + 1, 0, j - 2); \
|
|
1633
|
+
u.tmp[j - 1] = 0x80; \
|
|
1634
|
+
} \
|
|
1635
|
+
keccak_core(kc, u.tmp, j, lim); \
|
|
1636
|
+
/* Finalize the "lane complement" */ \
|
|
1637
|
+
kc->u.wide[ 1] = ~kc->u.wide[ 1]; \
|
|
1638
|
+
kc->u.wide[ 2] = ~kc->u.wide[ 2]; \
|
|
1639
|
+
kc->u.wide[ 8] = ~kc->u.wide[ 8]; \
|
|
1640
|
+
kc->u.wide[12] = ~kc->u.wide[12]; \
|
|
1641
|
+
kc->u.wide[17] = ~kc->u.wide[17]; \
|
|
1642
|
+
kc->u.wide[20] = ~kc->u.wide[20]; \
|
|
1643
|
+
for (j = 0; j < d; j += 8) \
|
|
1644
|
+
sph_enc64le_aligned(u.tmp + j, kc->u.wide[j >> 3]); \
|
|
1645
|
+
memcpy(dst, u.tmp, d); \
|
|
1646
|
+
keccak_init(kc, (unsigned)d << 3); \
|
|
1647
|
+
} \
|
|
1648
|
+
|
|
1649
|
+
#else
|
|
1650
|
+
|
|
1651
|
+
#define DEFCLOSE(d, lim) \
|
|
1652
|
+
static void keccak_close ## d( \
|
|
1653
|
+
sph_keccak_context *kc, unsigned ub, unsigned n, void *dst) \
|
|
1654
|
+
{ \
|
|
1655
|
+
unsigned eb; \
|
|
1656
|
+
union { \
|
|
1657
|
+
unsigned char tmp[lim + 1]; \
|
|
1658
|
+
sph_u64 dummy; /* for alignment */ \
|
|
1659
|
+
} u; \
|
|
1660
|
+
size_t j; \
|
|
1661
|
+
\
|
|
1662
|
+
eb = (0x100 | (ub & 0xFF)) >> (8 - n); \
|
|
1663
|
+
if (kc->ptr == (lim - 1)) { \
|
|
1664
|
+
if (n == 7) { \
|
|
1665
|
+
u.tmp[0] = eb; \
|
|
1666
|
+
memset(u.tmp + 1, 0, lim - 1); \
|
|
1667
|
+
u.tmp[lim] = 0x80; \
|
|
1668
|
+
j = 1 + lim; \
|
|
1669
|
+
} else { \
|
|
1670
|
+
u.tmp[0] = eb | 0x80; \
|
|
1671
|
+
j = 1; \
|
|
1672
|
+
} \
|
|
1673
|
+
} else { \
|
|
1674
|
+
j = lim - kc->ptr; \
|
|
1675
|
+
u.tmp[0] = eb; \
|
|
1676
|
+
memset(u.tmp + 1, 0, j - 2); \
|
|
1677
|
+
u.tmp[j - 1] = 0x80; \
|
|
1678
|
+
} \
|
|
1679
|
+
keccak_core(kc, u.tmp, j, lim); \
|
|
1680
|
+
/* Finalize the "lane complement" */ \
|
|
1681
|
+
kc->u.narrow[ 2] = ~kc->u.narrow[ 2]; \
|
|
1682
|
+
kc->u.narrow[ 3] = ~kc->u.narrow[ 3]; \
|
|
1683
|
+
kc->u.narrow[ 4] = ~kc->u.narrow[ 4]; \
|
|
1684
|
+
kc->u.narrow[ 5] = ~kc->u.narrow[ 5]; \
|
|
1685
|
+
kc->u.narrow[16] = ~kc->u.narrow[16]; \
|
|
1686
|
+
kc->u.narrow[17] = ~kc->u.narrow[17]; \
|
|
1687
|
+
kc->u.narrow[24] = ~kc->u.narrow[24]; \
|
|
1688
|
+
kc->u.narrow[25] = ~kc->u.narrow[25]; \
|
|
1689
|
+
kc->u.narrow[34] = ~kc->u.narrow[34]; \
|
|
1690
|
+
kc->u.narrow[35] = ~kc->u.narrow[35]; \
|
|
1691
|
+
kc->u.narrow[40] = ~kc->u.narrow[40]; \
|
|
1692
|
+
kc->u.narrow[41] = ~kc->u.narrow[41]; \
|
|
1693
|
+
/* un-interleave */ \
|
|
1694
|
+
for (j = 0; j < 50; j += 2) \
|
|
1695
|
+
UNINTERLEAVE(kc->u.narrow[j], kc->u.narrow[j + 1]); \
|
|
1696
|
+
for (j = 0; j < d; j += 4) \
|
|
1697
|
+
sph_enc32le_aligned(u.tmp + j, kc->u.narrow[j >> 2]); \
|
|
1698
|
+
memcpy(dst, u.tmp, d); \
|
|
1699
|
+
keccak_init(kc, (unsigned)d << 3); \
|
|
1700
|
+
} \
|
|
1701
|
+
|
|
1702
|
+
#endif
|
|
1703
|
+
|
|
1704
|
+
DEFCLOSE(28, 144)
|
|
1705
|
+
DEFCLOSE(32, 136)
|
|
1706
|
+
DEFCLOSE(48, 104)
|
|
1707
|
+
DEFCLOSE(64, 72)
|
|
1708
|
+
|
|
1709
|
+
/* see sph_keccak.h */
|
|
1710
|
+
void
|
|
1711
|
+
sph_keccak224_init(void *cc)
|
|
1712
|
+
{
|
|
1713
|
+
keccak_init(cc, 224);
|
|
1714
|
+
}
|
|
1715
|
+
|
|
1716
|
+
/* see sph_keccak.h */
|
|
1717
|
+
void
|
|
1718
|
+
sph_keccak224(void *cc, const void *data, size_t len)
|
|
1719
|
+
{
|
|
1720
|
+
keccak_core(cc, data, len, 144);
|
|
1721
|
+
}
|
|
1722
|
+
|
|
1723
|
+
/* see sph_keccak.h */
|
|
1724
|
+
void
|
|
1725
|
+
sph_keccak224_close(void *cc, void *dst)
|
|
1726
|
+
{
|
|
1727
|
+
sph_keccak224_addbits_and_close(cc, 0, 0, dst);
|
|
1728
|
+
}
|
|
1729
|
+
|
|
1730
|
+
/* see sph_keccak.h */
|
|
1731
|
+
void
|
|
1732
|
+
sph_keccak224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
|
1733
|
+
{
|
|
1734
|
+
keccak_close28(cc, ub, n, dst);
|
|
1735
|
+
}
|
|
1736
|
+
|
|
1737
|
+
/* see sph_keccak.h */
|
|
1738
|
+
void
|
|
1739
|
+
sph_keccak256_init(void *cc)
|
|
1740
|
+
{
|
|
1741
|
+
keccak_init(cc, 256);
|
|
1742
|
+
}
|
|
1743
|
+
|
|
1744
|
+
/* see sph_keccak.h */
|
|
1745
|
+
void
|
|
1746
|
+
sph_keccak256(void *cc, const void *data, size_t len)
|
|
1747
|
+
{
|
|
1748
|
+
keccak_core(cc, data, len, 136);
|
|
1749
|
+
}
|
|
1750
|
+
|
|
1751
|
+
/* see sph_keccak.h */
|
|
1752
|
+
void
|
|
1753
|
+
sph_keccak256_close(void *cc, void *dst)
|
|
1754
|
+
{
|
|
1755
|
+
sph_keccak256_addbits_and_close(cc, 0, 0, dst);
|
|
1756
|
+
}
|
|
1757
|
+
|
|
1758
|
+
/* see sph_keccak.h */
|
|
1759
|
+
void
|
|
1760
|
+
sph_keccak256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
|
1761
|
+
{
|
|
1762
|
+
keccak_close32(cc, ub, n, dst);
|
|
1763
|
+
}
|
|
1764
|
+
|
|
1765
|
+
/* see sph_keccak.h */
|
|
1766
|
+
void
|
|
1767
|
+
sph_keccak384_init(void *cc)
|
|
1768
|
+
{
|
|
1769
|
+
keccak_init(cc, 384);
|
|
1770
|
+
}
|
|
1771
|
+
|
|
1772
|
+
/* see sph_keccak.h */
|
|
1773
|
+
void
|
|
1774
|
+
sph_keccak384(void *cc, const void *data, size_t len)
|
|
1775
|
+
{
|
|
1776
|
+
keccak_core(cc, data, len, 104);
|
|
1777
|
+
}
|
|
1778
|
+
|
|
1779
|
+
/* see sph_keccak.h */
|
|
1780
|
+
void
|
|
1781
|
+
sph_keccak384_close(void *cc, void *dst)
|
|
1782
|
+
{
|
|
1783
|
+
sph_keccak384_addbits_and_close(cc, 0, 0, dst);
|
|
1784
|
+
}
|
|
1785
|
+
|
|
1786
|
+
/* see sph_keccak.h */
|
|
1787
|
+
void
|
|
1788
|
+
sph_keccak384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
|
1789
|
+
{
|
|
1790
|
+
keccak_close48(cc, ub, n, dst);
|
|
1791
|
+
}
|
|
1792
|
+
|
|
1793
|
+
/* see sph_keccak.h */
|
|
1794
|
+
void
|
|
1795
|
+
sph_keccak512_init(void *cc)
|
|
1796
|
+
{
|
|
1797
|
+
keccak_init(cc, 512);
|
|
1798
|
+
}
|
|
1799
|
+
|
|
1800
|
+
/* see sph_keccak.h */
|
|
1801
|
+
void
|
|
1802
|
+
sph_keccak512(void *cc, const void *data, size_t len)
|
|
1803
|
+
{
|
|
1804
|
+
keccak_core(cc, data, len, 72);
|
|
1805
|
+
}
|
|
1806
|
+
|
|
1807
|
+
/* see sph_keccak.h */
|
|
1808
|
+
void
|
|
1809
|
+
sph_keccak512_close(void *cc, void *dst)
|
|
1810
|
+
{
|
|
1811
|
+
sph_keccak512_addbits_and_close(cc, 0, 0, dst);
|
|
1812
|
+
}
|
|
1813
|
+
|
|
1814
|
+
/* see sph_keccak.h */
|
|
1815
|
+
void
|
|
1816
|
+
sph_keccak512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
|
1817
|
+
{
|
|
1818
|
+
keccak_close64(cc, ub, n, dst);
|
|
1819
|
+
}
|
|
1820
|
+
|
|
1821
|
+
|
|
1822
|
+
#ifdef __cplusplus
|
|
1823
|
+
}
|
|
1824
|
+
#endif
|