sleeping_kangaroo12 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +127 -0
- data/ext/Rakefile +73 -0
- data/ext/binding/sleeping_kangaroo12.c +39 -0
- data/ext/config/xkcp.build +17 -0
- data/ext/xkcp/LICENSE +1 -0
- data/ext/xkcp/Makefile +15 -0
- data/ext/xkcp/Makefile.build +200 -0
- data/ext/xkcp/README.markdown +296 -0
- data/ext/xkcp/lib/HighLevel.build +143 -0
- data/ext/xkcp/lib/LowLevel.build +757 -0
- data/ext/xkcp/lib/common/align.h +33 -0
- data/ext/xkcp/lib/common/brg_endian.h +143 -0
- data/ext/xkcp/lib/high/KangarooTwelve/KangarooTwelve.c +301 -0
- data/ext/xkcp/lib/high/KangarooTwelve/KangarooTwelve.h +97 -0
- data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.c +81 -0
- data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.h +125 -0
- data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.c +48 -0
- data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.h +79 -0
- data/ext/xkcp/lib/high/Keccak/KeccakDuplex.c +81 -0
- data/ext/xkcp/lib/high/Keccak/KeccakDuplex.h +73 -0
- data/ext/xkcp/lib/high/Keccak/KeccakDuplex.inc +195 -0
- data/ext/xkcp/lib/high/Keccak/KeccakSponge.c +111 -0
- data/ext/xkcp/lib/high/Keccak/KeccakSponge.h +76 -0
- data/ext/xkcp/lib/high/Keccak/KeccakSponge.inc +314 -0
- data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.c +61 -0
- data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.h +67 -0
- data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.inc +128 -0
- data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.c +93 -0
- data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.h +599 -0
- data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.inc +573 -0
- data/ext/xkcp/lib/high/Ketje/Ketjev2.c +87 -0
- data/ext/xkcp/lib/high/Ketje/Ketjev2.h +88 -0
- data/ext/xkcp/lib/high/Ketje/Ketjev2.inc +274 -0
- data/ext/xkcp/lib/high/Keyak/Keyakv2.c +132 -0
- data/ext/xkcp/lib/high/Keyak/Keyakv2.h +217 -0
- data/ext/xkcp/lib/high/Keyak/Keyakv2.inc +81 -0
- data/ext/xkcp/lib/high/Keyak/Motorist.inc +953 -0
- data/ext/xkcp/lib/high/Kravatte/Kravatte.c +533 -0
- data/ext/xkcp/lib/high/Kravatte/Kravatte.h +115 -0
- data/ext/xkcp/lib/high/Kravatte/KravatteModes.c +557 -0
- data/ext/xkcp/lib/high/Kravatte/KravatteModes.h +247 -0
- data/ext/xkcp/lib/high/Xoodyak/Cyclist.h +66 -0
- data/ext/xkcp/lib/high/Xoodyak/Cyclist.inc +336 -0
- data/ext/xkcp/lib/high/Xoodyak/Xoodyak-parameters.h +26 -0
- data/ext/xkcp/lib/high/Xoodyak/Xoodyak.c +55 -0
- data/ext/xkcp/lib/high/Xoodyak/Xoodyak.h +35 -0
- data/ext/xkcp/lib/high/Xoofff/Xoofff.c +634 -0
- data/ext/xkcp/lib/high/Xoofff/Xoofff.h +147 -0
- data/ext/xkcp/lib/high/Xoofff/XoofffModes.c +483 -0
- data/ext/xkcp/lib/high/Xoofff/XoofffModes.h +241 -0
- data/ext/xkcp/lib/high/common/Phases.h +25 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-SnP.h +41 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-armcc.s +1666 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-gcc.s +1655 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-armcc.s +1268 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-gcc.s +1264 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-armcc.s +1178 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-gcc.s +1175 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-armcc.s +1338 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-gcc.s +1336 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-armcc.s +1343 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-gcc.s +1339 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-SnP.h +42 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-armcc.s +823 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-gcc.s +831 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-SnP.h +31 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-armv8a-neon.s +540 -0
- data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-SnP.h +42 -0
- data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-compact.s +733 -0
- data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-fast.s +1121 -0
- data/ext/xkcp/lib/low/KeccakP-1600/AVX2/KeccakP-1600-AVX2.s +1100 -0
- data/ext/xkcp/lib/low/KeccakP-1600/AVX2/KeccakP-1600-SnP.h +52 -0
- data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-AVX512.c +623 -0
- data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-SnP.h +47 -0
- data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u12/KeccakP-1600-AVX512-config.h +6 -0
- data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u6/KeccakP-1600-AVX512-config.h +6 -0
- data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/ua/KeccakP-1600-AVX512-config.h +6 -0
- data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-AVX512.s +1031 -0
- data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-SnP.h +53 -0
- data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-SnP.h +44 -0
- data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-XOP.c +476 -0
- data/ext/xkcp/lib/low/KeccakP-1600/XOP/u6/KeccakP-1600-XOP-config.h +6 -0
- data/ext/xkcp/lib/low/KeccakP-1600/XOP/ua/KeccakP-1600-XOP-config.h +6 -0
- data/ext/xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-64.macros +748 -0
- data/ext/xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-unrolling.macros +305 -0
- data/ext/xkcp/lib/low/KeccakP-1600/compact/KeccakP-1600-SnP.h +40 -0
- data/ext/xkcp/lib/low/KeccakP-1600/compact/KeccakP-1600-compact64.c +420 -0
- data/ext/xkcp/lib/low/KeccakP-1600/plain-32bits-inplace/KeccakP-1600-SnP.h +43 -0
- data/ext/xkcp/lib/low/KeccakP-1600/plain-32bits-inplace/KeccakP-1600-inplace32BI.c +1163 -0
- data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/KeccakP-1600-SnP.h +54 -0
- data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/KeccakP-1600-opt64.c +565 -0
- data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcu6/KeccakP-1600-opt64-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua/KeccakP-1600-opt64-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua-shld/KeccakP-1600-opt64-config.h +8 -0
- data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/u6/KeccakP-1600-opt64-config.h +6 -0
- data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/ua/KeccakP-1600-opt64-config.h +6 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-SnP.h +44 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference.h +23 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference32BI.c +625 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-SnP.h +44 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.c +440 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.h +23 -0
- data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-SnP.h +42 -0
- data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas.s +1196 -0
- data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas_Apple.s +1124 -0
- data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-shld-gas.s +1196 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-armcc.s +1392 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-gcc.s +1394 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-times2-SnP.h +42 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u12/SIMD512-2-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u4/SIMD512-2-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512ufull/SIMD512-2-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SIMD512.c +850 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SnP.h +51 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SIMD128.c +957 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SnP.h +49 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-u2/SIMD128-config.h +8 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-ua/SIMD128-config.h +8 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-u2/SIMD128-config.h +9 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-ua/SIMD128-config.h +9 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-on1.c +37 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SIMD256.c +1321 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SnP.h +55 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u12/SIMD256-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u6/SIMD256-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/ua/SIMD256-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u12/SIMD512-4-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u4/SIMD512-4-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512ufull/SIMD512-4-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SIMD512.c +881 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SnP.h +51 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-on1.c +37 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-on2.c +38 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SIMD512.c +1615 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SnP.h +57 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u12/SIMD512-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u4/SIMD512-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/ua/SIMD512-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-on1.c +37 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-on2.c +38 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-on4.c +38 -0
- data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-SnP.h +41 -0
- data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-armcc.s +442 -0
- data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-gcc.s +446 -0
- data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-armcc.s +419 -0
- data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-gcc.s +427 -0
- data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-SnP.h +41 -0
- data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-avr8-fast.s +647 -0
- data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-SnP.h +39 -0
- data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-compact.c +190 -0
- data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-SnP.h +43 -0
- data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.c +412 -0
- data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.h +23 -0
- data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-SnP.h +41 -0
- data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-armcc.s +454 -0
- data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-gcc.s +458 -0
- data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-armcc.s +455 -0
- data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-gcc.s +458 -0
- data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-SnP.h +41 -0
- data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-avr8-fast.s +728 -0
- data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-SnP.h +43 -0
- data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.c +414 -0
- data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.h +23 -0
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-SnP.h +42 -0
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-armcc.s +527 -0
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-gcc.s +533 -0
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-armcc.s +528 -0
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-gcc.s +534 -0
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-armcc.s +521 -0
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-gcc.s +527 -0
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-armcc.s +517 -0
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-gcc.s +523 -0
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-armcc.s +550 -0
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-gcc.s +556 -0
- data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-SnP.h +32 -0
- data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-armv8a-neon.s +432 -0
- data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-SnP.h +42 -0
- data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-avr8-fast.s +929 -0
- data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-SnP.h +40 -0
- data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-compact.c +244 -0
- data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-SnP.h +46 -0
- data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32-bis.macros +184 -0
- data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.c +454 -0
- data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.macros +459 -0
- data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling-bis.macros +83 -0
- data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling.macros +88 -0
- data/ext/xkcp/lib/low/KeccakP-800/plain/lcu2/KeccakP-800-opt32-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-800/plain/lcua/KeccakP-800-opt32-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-800/plain/u2/KeccakP-800-opt32-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-800/plain/ua/KeccakP-800-opt32-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-SnP.h +44 -0
- data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.c +437 -0
- data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.h +23 -0
- data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/Ket.h +57 -0
- data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-armcc.s +475 -0
- data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-gcc.s +480 -0
- data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-armcc.s +590 -0
- data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-gcc.s +590 -0
- data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.c +126 -0
- data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.h +68 -0
- data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.inc +174 -0
- data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.c +80 -0
- data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.h +68 -0
- data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.inc +142 -0
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-SnP.h +55 -0
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-armcc.s +1086 -0
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-gcc.s +1092 -0
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-armcc.s +721 -0
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-gcc.s +726 -0
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-armcc.s +723 -0
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-gcc.s +729 -0
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-armcc.s +1164 -0
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-gcc.s +1165 -0
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-armcc.s +562 -0
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-gcc.s +563 -0
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-armcc.s +563 -0
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-gcc.s +565 -0
- data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-SnP.h +55 -0
- data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-armcc.s +476 -0
- data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-gcc.s +485 -0
- data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-armcc.s +362 -0
- data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-gcc.s +367 -0
- data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-SnP.h +43 -0
- data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-avr8-u1.s +1341 -0
- data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SIMD512.c +581 -0
- data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SnP.h +58 -0
- data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodyak-full-block-SIMD512.c +332 -0
- data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SIMD128.c +329 -0
- data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SnP.h +53 -0
- data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodyak-full-block-SIMD128.c +355 -0
- data/ext/xkcp/lib/low/Xoodoo/Xoodoo.h +79 -0
- data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-SnP.h +56 -0
- data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-optimized.c +399 -0
- data/ext/xkcp/lib/low/Xoodoo/plain/Xoodyak-full-blocks.c +127 -0
- data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-SnP.h +43 -0
- data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-reference.c +253 -0
- data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SIMD512.c +1044 -0
- data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SnP.h +49 -0
- data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-SnP.h +45 -0
- data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-on1.c +37 -0
- data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-ARMv7A.s +1587 -0
- data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-SnP.h +48 -0
- data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SIMD512.c +1202 -0
- data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SnP.h +48 -0
- data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SIMD128.c +484 -0
- data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SnP.h +44 -0
- data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-SnP.h +45 -0
- data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-on1.c +37 -0
- data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SIMD256.c +939 -0
- data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SnP.h +49 -0
- data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SIMD512.c +1216 -0
- data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SnP.h +48 -0
- data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-SnP.h +45 -0
- data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-on1.c +37 -0
- data/ext/xkcp/lib/low/common/PlSnP-Fallback.inc +290 -0
- data/ext/xkcp/lib/low/common/SnP-Relaned.h +141 -0
- data/ext/xkcp/support/Build/ExpandProducts.xsl +79 -0
- data/ext/xkcp/support/Build/ToGlobalMakefile.xsl +206 -0
- data/ext/xkcp/support/Build/ToOneTarget.xsl +89 -0
- data/ext/xkcp/support/Build/ToTargetConfigFile.xsl +37 -0
- data/ext/xkcp/support/Build/ToTargetMakefile.xsl +298 -0
- data/ext/xkcp/support/Build/ToVCXProj.xsl +198 -0
- data/ext/xkcp/support/Kernel-PMU/Kernel-pmu.md +133 -0
- data/ext/xkcp/support/Kernel-PMU/Makefile +8 -0
- data/ext/xkcp/support/Kernel-PMU/enable_arm_pmu.c +129 -0
- data/ext/xkcp/support/Kernel-PMU/load-module +1 -0
- data/ext/xkcp/util/KeccakSum/KeccakSum.c +394 -0
- data/ext/xkcp/util/KeccakSum/base64.c +86 -0
- data/ext/xkcp/util/KeccakSum/base64.h +12 -0
- data/lib/sleeping_kangaroo12/binding.rb +15 -0
- data/lib/sleeping_kangaroo12/build/loader.rb +40 -0
- data/lib/sleeping_kangaroo12/build/platform.rb +37 -0
- data/lib/sleeping_kangaroo12/build.rb +4 -0
- data/lib/sleeping_kangaroo12/digest.rb +103 -0
- data/lib/sleeping_kangaroo12/version.rb +5 -0
- data/lib/sleeping_kangaroo12.rb +7 -0
- metadata +372 -0
|
@@ -0,0 +1,823 @@
|
|
|
1
|
+
;
|
|
2
|
+
; The eXtended Keccak Code Package (XKCP)
|
|
3
|
+
; https://github.com/XKCP/XKCP
|
|
4
|
+
;
|
|
5
|
+
; The Keccak-p permutations, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche.
|
|
6
|
+
;
|
|
7
|
+
; Implementation by Ronny Van Keer, hereby denoted as "the implementer".
|
|
8
|
+
;
|
|
9
|
+
; For more information, feedback or questions, please refer to the Keccak Team website:
|
|
10
|
+
; https://keccak.team/
|
|
11
|
+
;
|
|
12
|
+
; To the extent possible under law, the implementer has waived all copyright
|
|
13
|
+
; and related or neighboring rights to the source code in this file.
|
|
14
|
+
; http://creativecommons.org/publicdomain/zero/1.0/
|
|
15
|
+
;
|
|
16
|
+
; ---
|
|
17
|
+
;
|
|
18
|
+
; This file implements Keccak-p[1600] in a SnP-compatible way.
|
|
19
|
+
; Please refer to SnP-documentation.h for more details.
|
|
20
|
+
;
|
|
21
|
+
; This implementation comes with KeccakP-1600-SnP.h in the same folder.
|
|
22
|
+
; Please refer to LowLevel.build for the exact list of other files it must be combined with.
|
|
23
|
+
;
|
|
24
|
+
|
|
25
|
+
; WARNING: These functions work only on little endian CPU with ARMv7A + NEON architecture
|
|
26
|
+
; WARNING: State must be 256 bit (32 bytes) aligned, best is 64-byte (cache alignment).
|
|
27
|
+
; INFO: Tested on Cortex-A8 (BeagleBone Black), using gcc.
|
|
28
|
+
|
|
29
|
+
PRESERVE8
|
|
30
|
+
AREA |.text|, CODE, READONLY
|
|
31
|
+
|
|
32
|
+
; conditional assembly settings
|
|
33
|
+
LoopUnroll equ 1 ; possible values 1, 2, 4, 6, 12
|
|
34
|
+
|
|
35
|
+
; offsets in state
|
|
36
|
+
_ba equ 0*8
|
|
37
|
+
_be equ 1*8
|
|
38
|
+
_bi equ 2*8
|
|
39
|
+
_bo equ 3*8
|
|
40
|
+
_bu equ 4*8
|
|
41
|
+
_ga equ 5*8
|
|
42
|
+
_ge equ 6*8
|
|
43
|
+
_gi equ 7*8
|
|
44
|
+
_go equ 8*8
|
|
45
|
+
_gu equ 9*8
|
|
46
|
+
_ka equ 10*8
|
|
47
|
+
_ke equ 11*8
|
|
48
|
+
_ki equ 12*8
|
|
49
|
+
_ko equ 13*8
|
|
50
|
+
_ku equ 14*8
|
|
51
|
+
_ma equ 15*8
|
|
52
|
+
_me equ 16*8
|
|
53
|
+
_mi equ 17*8
|
|
54
|
+
_mo equ 18*8
|
|
55
|
+
_mu equ 19*8
|
|
56
|
+
_sa equ 20*8
|
|
57
|
+
_se equ 21*8
|
|
58
|
+
_si equ 22*8
|
|
59
|
+
_so equ 23*8
|
|
60
|
+
_su equ 24*8
|
|
61
|
+
|
|
62
|
+
; macros
|
|
63
|
+
|
|
64
|
+
MACRO
|
|
65
|
+
LoadState
|
|
66
|
+
vld1.64 d0, [r0:64]!
|
|
67
|
+
vld1.64 d2, [r0:64]!
|
|
68
|
+
vld1.64 d4, [r0:64]!
|
|
69
|
+
vld1.64 d6, [r0:64]!
|
|
70
|
+
vld1.64 d8, [r0:64]!
|
|
71
|
+
vld1.64 d1, [r0:64]!
|
|
72
|
+
vld1.64 d3, [r0:64]!
|
|
73
|
+
vld1.64 d5, [r0:64]!
|
|
74
|
+
vld1.64 d7, [r0:64]!
|
|
75
|
+
vld1.64 d9, [r0:64]!
|
|
76
|
+
vld1.64 d10, [r0:64]!
|
|
77
|
+
vld1.64 d12, [r0:64]!
|
|
78
|
+
vld1.64 d14, [r0:64]!
|
|
79
|
+
vld1.64 d16, [r0:64]!
|
|
80
|
+
vld1.64 d18, [r0:64]!
|
|
81
|
+
vld1.64 d11, [r0:64]!
|
|
82
|
+
vld1.64 d13, [r0:64]!
|
|
83
|
+
vld1.64 d15, [r0:64]!
|
|
84
|
+
vld1.64 d17, [r0:64]!
|
|
85
|
+
vld1.64 d19, [r0:64]!
|
|
86
|
+
vld1.64 { d20, d21 }, [r0:128]!
|
|
87
|
+
vld1.64 { d22, d23 }, [r0:128]!
|
|
88
|
+
vld1.64 d24, [r0:64]
|
|
89
|
+
sub r0, r0, #24*8
|
|
90
|
+
MEND
|
|
91
|
+
|
|
92
|
+
MACRO
|
|
93
|
+
StoreState
|
|
94
|
+
vst1.64 d0, [r0:64]!
|
|
95
|
+
vst1.64 d2, [r0:64]!
|
|
96
|
+
vst1.64 d4, [r0:64]!
|
|
97
|
+
vst1.64 d6, [r0:64]!
|
|
98
|
+
vst1.64 d8, [r0:64]!
|
|
99
|
+
vst1.64 d1, [r0:64]!
|
|
100
|
+
vst1.64 d3, [r0:64]!
|
|
101
|
+
vst1.64 d5, [r0:64]!
|
|
102
|
+
vst1.64 d7, [r0:64]!
|
|
103
|
+
vst1.64 d9, [r0:64]!
|
|
104
|
+
vst1.64 d10, [r0:64]!
|
|
105
|
+
vst1.64 d12, [r0:64]!
|
|
106
|
+
vst1.64 d14, [r0:64]!
|
|
107
|
+
vst1.64 d16, [r0:64]!
|
|
108
|
+
vst1.64 d18, [r0:64]!
|
|
109
|
+
vst1.64 d11, [r0:64]!
|
|
110
|
+
vst1.64 d13, [r0:64]!
|
|
111
|
+
vst1.64 d15, [r0:64]!
|
|
112
|
+
vst1.64 d17, [r0:64]!
|
|
113
|
+
vst1.64 d19, [r0:64]!
|
|
114
|
+
vst1.64 { d20, d21 }, [r0:128]!
|
|
115
|
+
vst1.64 { d22, d23 }, [r0:128]!
|
|
116
|
+
vst1.64 d24, [r0:64]
|
|
117
|
+
MEND
|
|
118
|
+
|
|
119
|
+
MACRO
|
|
120
|
+
RhoPi4 $dst1, $src1, $rot1, $dst2, $src2, $rot2, $dst3, $src3, $rot3, $dst4, $src4, $rot4
|
|
121
|
+
if ($rot1 :AND: 7) != 0
|
|
122
|
+
vshl.u64 $dst1, $src1, #$rot1
|
|
123
|
+
else
|
|
124
|
+
vext.8 $dst1, $src1, $src1, #8-$rot1/8
|
|
125
|
+
endif
|
|
126
|
+
if ($rot2 :AND: 7) != 0
|
|
127
|
+
vshl.u64 $dst2, $src2, #$rot2
|
|
128
|
+
else
|
|
129
|
+
vext.8 $dst2, $src2, $src2, #8-$rot2/8
|
|
130
|
+
endif
|
|
131
|
+
if ($rot3 :AND: 7) != 0
|
|
132
|
+
vshl.u64 $dst3, $src3, #$rot3
|
|
133
|
+
else
|
|
134
|
+
vext.8 $dst3, $src3, $src3, #8-$rot3/8
|
|
135
|
+
endif
|
|
136
|
+
if ($rot4 :AND: 7) != 0
|
|
137
|
+
vshl.u64 $dst4, $src4, #$rot4
|
|
138
|
+
else
|
|
139
|
+
vext.8 $dst4, $src4, $src4, #8-$rot4/8
|
|
140
|
+
endif
|
|
141
|
+
if ($rot1 :AND: 7) != 0
|
|
142
|
+
vsri.u64 $dst1, $src1, #64-$rot1
|
|
143
|
+
endif
|
|
144
|
+
if ($rot2 :AND: 7) != 0
|
|
145
|
+
vsri.u64 $dst2, $src2, #64-$rot2
|
|
146
|
+
endif
|
|
147
|
+
if ($rot3 :AND: 7) != 0
|
|
148
|
+
vsri.u64 $dst3, $src3, #64-$rot3
|
|
149
|
+
endif
|
|
150
|
+
if ($rot4 :AND: 7) != 0
|
|
151
|
+
vsri.u64 $dst4, $src4, #64-$rot4
|
|
152
|
+
endif
|
|
153
|
+
MEND
|
|
154
|
+
|
|
155
|
+
MACRO
|
|
156
|
+
KeccakRound
|
|
157
|
+
|
|
158
|
+
;Prepare Theta
|
|
159
|
+
veor.64 q13, q0, q5
|
|
160
|
+
vst1.64 {q12}, [r0:128]!
|
|
161
|
+
veor.64 q14, q1, q6
|
|
162
|
+
vst1.64 {q4}, [r0:128]!
|
|
163
|
+
veor.64 d26, d26, d27
|
|
164
|
+
vst1.64 {q9}, [r0:128]
|
|
165
|
+
veor.64 d28, d28, d29
|
|
166
|
+
veor.64 d26, d26, d20
|
|
167
|
+
veor.64 d27, d28, d21
|
|
168
|
+
|
|
169
|
+
veor.64 q14, q2, q7
|
|
170
|
+
veor.64 q15, q3, q8
|
|
171
|
+
veor.64 q4, q4, q9
|
|
172
|
+
veor.64 d28, d28, d29
|
|
173
|
+
veor.64 d30, d30, d31
|
|
174
|
+
veor.64 d25, d8, d9
|
|
175
|
+
veor.64 d28, d28, d22
|
|
176
|
+
veor.64 d29, d30, d23
|
|
177
|
+
veor.64 d25, d25, d24
|
|
178
|
+
sub r0, r0, #32
|
|
179
|
+
|
|
180
|
+
;Apply Theta
|
|
181
|
+
vadd.u64 d30, d27, d27
|
|
182
|
+
vadd.u64 d24, d28, d28
|
|
183
|
+
vadd.u64 d8, d29, d29
|
|
184
|
+
vadd.u64 d18, d25, d25
|
|
185
|
+
|
|
186
|
+
vsri.64 d30, d27, #63
|
|
187
|
+
vsri.64 d24, d28, #63
|
|
188
|
+
vsri.64 d8, d29, #63
|
|
189
|
+
vsri.64 d18, d25, #63
|
|
190
|
+
|
|
191
|
+
veor.64 d30, d30, d25
|
|
192
|
+
veor.64 d24, d24, d26
|
|
193
|
+
veor.64 d8, d8, d27
|
|
194
|
+
vadd.u64 d27, d26, d26 ;u
|
|
195
|
+
veor.64 d18, d18, d28
|
|
196
|
+
|
|
197
|
+
vmov.i64 d31, d30
|
|
198
|
+
vmov.i64 d25, d24
|
|
199
|
+
vsri.64 d27, d26, #63 ;u
|
|
200
|
+
vmov.i64 d9, d8
|
|
201
|
+
vmov.i64 d19, d18
|
|
202
|
+
|
|
203
|
+
veor.64 d20, d20, d30
|
|
204
|
+
veor.64 d21, d21, d24
|
|
205
|
+
veor.64 d27, d27, d29 ;u
|
|
206
|
+
veor.64 d22, d22, d8
|
|
207
|
+
veor.64 d23, d23, d18
|
|
208
|
+
vmov.i64 d26, d27 ;u
|
|
209
|
+
|
|
210
|
+
veor.64 q0, q0, q15
|
|
211
|
+
veor.64 q1, q1, q12
|
|
212
|
+
veor.64 q2, q2, q4
|
|
213
|
+
veor.64 q3, q3, q9
|
|
214
|
+
|
|
215
|
+
veor.64 q5, q5, q15
|
|
216
|
+
veor.64 q6, q6, q12
|
|
217
|
+
vld1.64 {q12}, [r0:128]!
|
|
218
|
+
veor.64 q7, q7, q4
|
|
219
|
+
vld1.64 {q4}, [r0:128]!
|
|
220
|
+
veor.64 q8, q8, q9
|
|
221
|
+
vld1.64 {q9}, [r0:128]
|
|
222
|
+
veor.64 d24, d24, d26 ;u
|
|
223
|
+
sub r0, r0, #32
|
|
224
|
+
veor.64 q4, q4, q13 ;u
|
|
225
|
+
veor.64 q9, q9, q13 ;u
|
|
226
|
+
|
|
227
|
+
;Rho Pi
|
|
228
|
+
vmov.i64 d27, d2
|
|
229
|
+
vmov.i64 d28, d4
|
|
230
|
+
vmov.i64 d29, d6
|
|
231
|
+
vmov.i64 d25, d8
|
|
232
|
+
|
|
233
|
+
RhoPi4 d2, d3, 44, d4, d14, 43, d8, d24, 14, d6, d17, 21 ; 1 < 6, 2 < 12, 4 < 24, 3 < 18
|
|
234
|
+
RhoPi4 d3, d9, 20, d14, d16, 25, d24, d21, 2, d17, d15, 15 ; 6 < 9, 12 < 13, 24 < 21, 18 < 17
|
|
235
|
+
RhoPi4 d9, d22, 61, d16, d19, 8, d21, d7, 55, d15, d12, 10 ; 9 < 22, 13 < 19, 21 < 8, 17 < 11
|
|
236
|
+
RhoPi4 d22, d18, 39, d19, d23, 56, d7, d13, 45, d12, d5, 6 ; 22 < 14, 19 < 23, 8 < 16, 11 < 7
|
|
237
|
+
RhoPi4 d18, d20, 18, d23, d11, 41, d13, d1, 36, d5, d10, 3 ; 14 < 20, 23 < 15, 16 < 5, 7 < 10
|
|
238
|
+
RhoPi4 d20, d28, 62, d11, d25, 27, d1, d29, 28, d10, d27, 1 ; 20 < 2, 15 < 4, 5 < 3, 10 < 1
|
|
239
|
+
|
|
240
|
+
;Chi b+g
|
|
241
|
+
vmov.i64 q13, q0
|
|
242
|
+
vbic.64 q15, q2, q1 ; ba ^= ~be & bi
|
|
243
|
+
veor.64 q0, q15
|
|
244
|
+
vmov.i64 q14, q1
|
|
245
|
+
vbic.64 q15, q3, q2 ; be ^= ~bi & bo
|
|
246
|
+
veor.64 q1, q15
|
|
247
|
+
vbic.64 q15, q4, q3 ; bi ^= ~bo & bu
|
|
248
|
+
veor.64 q2, q15
|
|
249
|
+
vbic.64 q15, q13, q4 ; bo ^= ~bu & ba
|
|
250
|
+
vbic.64 q13, q14, q13 ; bu ^= ~ba & be
|
|
251
|
+
veor.64 q3, q15
|
|
252
|
+
veor.64 q4, q13
|
|
253
|
+
|
|
254
|
+
;Chi k+m
|
|
255
|
+
vmov.i64 q13, q5
|
|
256
|
+
vbic.64 q15, q7, q6 ; ba ^= ~be & bi
|
|
257
|
+
veor.64 q5, q15
|
|
258
|
+
vmov.i64 q14, q6
|
|
259
|
+
vbic.64 q15, q8, q7 ; be ^= ~bi & bo
|
|
260
|
+
veor.64 q6, q15
|
|
261
|
+
vbic.64 q15, q9, q8 ; bi ^= ~bo & bu
|
|
262
|
+
veor.64 q7, q15
|
|
263
|
+
vbic.64 q15, q13, q9 ; bo ^= ~bu & ba
|
|
264
|
+
vbic.64 q13, q14, q13 ; bu ^= ~ba & be
|
|
265
|
+
veor.64 q8, q15
|
|
266
|
+
veor.64 q9, q13
|
|
267
|
+
|
|
268
|
+
;Chi s
|
|
269
|
+
vmov.i64 q13, q10
|
|
270
|
+
vbic.64 d30, d22, d21 ; ba ^= ~be & bi
|
|
271
|
+
vbic.64 d31, d23, d22 ; be ^= ~bi & bo
|
|
272
|
+
veor.64 q10, q15
|
|
273
|
+
vbic.64 d30, d24, d23 ; bi ^= ~bo & bu
|
|
274
|
+
vbic.64 d31, d26, d24 ; bo ^= ~bu & ba
|
|
275
|
+
vbic.64 d26, d27, d26 ; bu ^= ~ba & be
|
|
276
|
+
veor.64 q11, q15
|
|
277
|
+
vld1.64 d30, [r1:64]! ; Iota
|
|
278
|
+
veor.64 d24, d26
|
|
279
|
+
veor.64 d0, d0, d30 ; Iota
|
|
280
|
+
MEND
|
|
281
|
+
|
|
282
|
+
;----------------------------------------------------------------------------
|
|
283
|
+
;
|
|
284
|
+
; void KeccakP1600_StaticInitialize( void )
|
|
285
|
+
;
|
|
286
|
+
ALIGN
|
|
287
|
+
EXPORT KeccakP1600_StaticInitialize
|
|
288
|
+
KeccakP1600_StaticInitialize PROC
|
|
289
|
+
bx lr
|
|
290
|
+
ENDP
|
|
291
|
+
|
|
292
|
+
;----------------------------------------------------------------------------
|
|
293
|
+
;
|
|
294
|
+
; void KeccakP1600_Initialize(void *state)
|
|
295
|
+
;
|
|
296
|
+
ALIGN
|
|
297
|
+
EXPORT KeccakP1600_Initialize
|
|
298
|
+
KeccakP1600_Initialize PROC
|
|
299
|
+
vmov.i64 q0, #0
|
|
300
|
+
vmov.i64 q1, #0
|
|
301
|
+
vmov.i64 q2, #0
|
|
302
|
+
vmov.i64 q3, #0
|
|
303
|
+
vstm r0!, { d0 - d7 } ; clear 8 lanes at a time
|
|
304
|
+
vstm r0!, { d0 - d7 }
|
|
305
|
+
vstm r0!, { d0 - d7 }
|
|
306
|
+
vstm r0!, { d0 }
|
|
307
|
+
bx lr
|
|
308
|
+
ENDP
|
|
309
|
+
|
|
310
|
+
; ----------------------------------------------------------------------------
|
|
311
|
+
;
|
|
312
|
+
; void KeccakP1600_AddByte(void *state, unsigned char byte, unsigned int offset)
|
|
313
|
+
;
|
|
314
|
+
ALIGN
|
|
315
|
+
EXPORT KeccakP1600_AddByte
|
|
316
|
+
KeccakP1600_AddByte PROC
|
|
317
|
+
ldrb r3, [r0, r2]
|
|
318
|
+
eors r3, r3, r1
|
|
319
|
+
strb r3, [r0, r2]
|
|
320
|
+
bx lr
|
|
321
|
+
ENDP
|
|
322
|
+
|
|
323
|
+
; ----------------------------------------------------------------------------
|
|
324
|
+
;
|
|
325
|
+
; void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
|
|
326
|
+
;
|
|
327
|
+
ALIGN
|
|
328
|
+
EXPORT KeccakP1600_AddBytes
|
|
329
|
+
KeccakP1600_AddBytes PROC
|
|
330
|
+
push {r4,lr}
|
|
331
|
+
adds r0, r0, r2 ; state += offset
|
|
332
|
+
subs r3, r3, #8 ; if length >= lane size
|
|
333
|
+
bcc KeccakP1600_AddBytes_Bytes
|
|
334
|
+
KeccakP1600_AddBytes_LanesLoop ; then, perform on lanes
|
|
335
|
+
ldr r2, [r0]
|
|
336
|
+
ldr r4, [r1], #4
|
|
337
|
+
ldr r12, [r0, #4]
|
|
338
|
+
ldr lr, [r1], #4
|
|
339
|
+
eors r2, r2, r4
|
|
340
|
+
eors r12, r12, lr
|
|
341
|
+
subs r3, r3, #8
|
|
342
|
+
str r2, [r0], #4
|
|
343
|
+
str r12, [r0], #4
|
|
344
|
+
bcs KeccakP1600_AddBytes_LanesLoop
|
|
345
|
+
KeccakP1600_AddBytes_Bytes
|
|
346
|
+
adds r3, r3, #7
|
|
347
|
+
bcc KeccakP1600_AddBytes_Exit
|
|
348
|
+
KeccakP1600_AddBytes_BytesLoop
|
|
349
|
+
ldrb r2, [r0]
|
|
350
|
+
ldrb r4, [r1], #1
|
|
351
|
+
eors r2, r2, r4
|
|
352
|
+
strb r2, [r0], #1
|
|
353
|
+
subs r3, r3, #1
|
|
354
|
+
bcs KeccakP1600_AddBytes_BytesLoop
|
|
355
|
+
KeccakP1600_AddBytes_Exit
|
|
356
|
+
pop {r4,pc}
|
|
357
|
+
ENDP
|
|
358
|
+
|
|
359
|
+
; ----------------------------------------------------------------------------
|
|
360
|
+
;
|
|
361
|
+
; void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
|
|
362
|
+
;
|
|
363
|
+
ALIGN
|
|
364
|
+
EXPORT KeccakP1600_OverwriteBytes
|
|
365
|
+
KeccakP1600_OverwriteBytes PROC
|
|
366
|
+
adds r0, r0, r2 ; state += offset
|
|
367
|
+
subs r3, r3, #8 ; if length >= lane size
|
|
368
|
+
bcc KeccakP1600_OverwriteBytes_Bytes
|
|
369
|
+
KeccakP1600_OverwriteBytes_LanesLoop ; then, perform on lanes
|
|
370
|
+
ldr r2, [r1], #4
|
|
371
|
+
ldr r12, [r1], #4
|
|
372
|
+
subs r3, r3, #8
|
|
373
|
+
str r2, [r0], #4
|
|
374
|
+
str r12, [r0], #4
|
|
375
|
+
bcs KeccakP1600_OverwriteBytes_LanesLoop
|
|
376
|
+
KeccakP1600_OverwriteBytes_Bytes
|
|
377
|
+
adds r3, r3, #7
|
|
378
|
+
bcc KeccakP1600_OverwriteBytes_Exit
|
|
379
|
+
KeccakP1600_OverwriteBytes_BytesLoop
|
|
380
|
+
ldrb r2, [r1], #1
|
|
381
|
+
subs r3, r3, #1
|
|
382
|
+
strb r2, [r0], #1
|
|
383
|
+
bcs KeccakP1600_OverwriteBytes_BytesLoop
|
|
384
|
+
KeccakP1600_OverwriteBytes_Exit
|
|
385
|
+
bx lr
|
|
386
|
+
ENDP
|
|
387
|
+
|
|
388
|
+
;----------------------------------------------------------------------------
|
|
389
|
+
;
|
|
390
|
+
; void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount)
|
|
391
|
+
;
|
|
392
|
+
ALIGN
|
|
393
|
+
EXPORT KeccakP1600_OverwriteWithZeroes
|
|
394
|
+
KeccakP1600_OverwriteWithZeroes PROC
|
|
395
|
+
lsrs r2, r1, #3
|
|
396
|
+
beq KeccakP1600_OverwriteWithZeroes_Bytes
|
|
397
|
+
vmov.i64 d0, #0
|
|
398
|
+
KeccakP1600_OverwriteWithZeroes_LoopLanes
|
|
399
|
+
subs r2, r2, #1
|
|
400
|
+
vstm r0!, { d0 }
|
|
401
|
+
bne KeccakP1600_OverwriteWithZeroes_LoopLanes
|
|
402
|
+
KeccakP1600_OverwriteWithZeroes_Bytes
|
|
403
|
+
ands r1, #7
|
|
404
|
+
beq KeccakP1600_OverwriteWithZeroes_Exit
|
|
405
|
+
movs r3, #0
|
|
406
|
+
KeccakP1600_OverwriteWithZeroes_LoopBytes
|
|
407
|
+
subs r1, r1, #1
|
|
408
|
+
strb r3, [r0], #1
|
|
409
|
+
bne KeccakP1600_OverwriteWithZeroes_LoopBytes
|
|
410
|
+
KeccakP1600_OverwriteWithZeroes_Exit
|
|
411
|
+
bx lr
|
|
412
|
+
ENDP
|
|
413
|
+
|
|
414
|
+
; ----------------------------------------------------------------------------
|
|
415
|
+
;
|
|
416
|
+
; void KeccakP1600_ExtractBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
|
|
417
|
+
;
|
|
418
|
+
ALIGN
|
|
419
|
+
EXPORT KeccakP1600_ExtractBytes
|
|
420
|
+
KeccakP1600_ExtractBytes PROC
|
|
421
|
+
adds r0, r0, r2 ; state += offset
|
|
422
|
+
subs r3, r3, #8 ; if length >= lane size
|
|
423
|
+
bcc KeccakP1600_ExtractBytes_Bytes
|
|
424
|
+
KeccakP1600_ExtractBytes_LanesLoop ; then, handle lanes
|
|
425
|
+
ldr r2, [r0], #4
|
|
426
|
+
ldr r12, [r0], #4
|
|
427
|
+
subs r3, r3, #8
|
|
428
|
+
str r2, [r1], #4
|
|
429
|
+
str r12, [r1], #4
|
|
430
|
+
bcs KeccakP1600_ExtractBytes_LanesLoop
|
|
431
|
+
KeccakP1600_ExtractBytes_Bytes
|
|
432
|
+
adds r3, r3, #7
|
|
433
|
+
bcc KeccakP1600_ExtractBytes_Exit
|
|
434
|
+
KeccakP1600_ExtractBytes_BytesLoop
|
|
435
|
+
ldrb r2, [r0], #1
|
|
436
|
+
subs r3, r3, #1
|
|
437
|
+
strb r2, [r1], #1
|
|
438
|
+
bcs KeccakP1600_ExtractBytes_BytesLoop
|
|
439
|
+
KeccakP1600_ExtractBytes_Exit
|
|
440
|
+
bx lr
|
|
441
|
+
ENDP
|
|
442
|
+
|
|
443
|
+
; ----------------------------------------------------------------------------
|
|
444
|
+
;
|
|
445
|
+
; void KeccakP800_ExtractAndAddBytes(void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
|
|
446
|
+
;
|
|
447
|
+
ALIGN
|
|
448
|
+
EXPORT KeccakP1600_ExtractAndAddBytes
|
|
449
|
+
KeccakP1600_ExtractAndAddBytes PROC
|
|
450
|
+
push {r4,r5}
|
|
451
|
+
add r0, r0, r3 ; state += offset (offset register no longer needed, reuse for length)
|
|
452
|
+
ldr r3, [sp, #8] ; get length argument from stack
|
|
453
|
+
subs r3, r3, #8 ; if length >= lane size
|
|
454
|
+
bcc KeccakP1600_ExtractAndAddBytes_Bytes
|
|
455
|
+
KeccakP1600_ExtractAndAddBytes_LanesLoop ; then, handle lanes
|
|
456
|
+
ldr r5, [r0], #4
|
|
457
|
+
ldr r4, [r1], #4
|
|
458
|
+
eor r5, r5, r4
|
|
459
|
+
str r5, [r2], #4
|
|
460
|
+
subs r3, r3, #8
|
|
461
|
+
ldr r5, [r0], #4
|
|
462
|
+
ldr r4, [r1], #4
|
|
463
|
+
eor r5, r5, r4
|
|
464
|
+
str r5, [r2], #4
|
|
465
|
+
bcs KeccakP1600_ExtractAndAddBytes_LanesLoop
|
|
466
|
+
KeccakP1600_ExtractAndAddBytes_Bytes
|
|
467
|
+
adds r3, r3, #7
|
|
468
|
+
bcc KeccakP1600_ExtractAndAddBytes_Exit
|
|
469
|
+
KeccakP1600_ExtractAndAddBytes_BytesLoop
|
|
470
|
+
ldrb r5, [r0], #1
|
|
471
|
+
ldrb r4, [r1], #1
|
|
472
|
+
eor r5, r5, r4
|
|
473
|
+
strb r5, [r2], #1
|
|
474
|
+
subs r3, r3, #1
|
|
475
|
+
bcs KeccakP1600_ExtractAndAddBytes_BytesLoop
|
|
476
|
+
KeccakP1600_ExtractAndAddBytes_Exit
|
|
477
|
+
pop {r4,r5}
|
|
478
|
+
bx lr
|
|
479
|
+
ENDP
|
|
480
|
+
|
|
481
|
+
; ----------------------------------------------------------------------------
|
|
482
|
+
;
|
|
483
|
+
; void KeccakP1600_Permute_Nrounds(void *state, unsigned int nrounds)
|
|
484
|
+
;
|
|
485
|
+
ALIGN
|
|
486
|
+
EXPORT KeccakP1600_Permute_Nrounds
|
|
487
|
+
KeccakP1600_Permute_Nrounds PROC
|
|
488
|
+
movs r2, r1
|
|
489
|
+
adr r1, KeccakP1600_Permute_RoundConstants0
|
|
490
|
+
sub r1, r1, r2, LSL #3
|
|
491
|
+
b KeccakP1600_Permute
|
|
492
|
+
ENDP
|
|
493
|
+
|
|
494
|
+
; ----------------------------------------------------------------------------
|
|
495
|
+
;
|
|
496
|
+
; void KeccakP1600_Permute_12rounds( void *state )
|
|
497
|
+
;
|
|
498
|
+
ALIGN
|
|
499
|
+
EXPORT KeccakP1600_Permute_12rounds
|
|
500
|
+
KeccakP1600_Permute_12rounds PROC
|
|
501
|
+
adr r1, KeccakP1600_Permute_RoundConstants12
|
|
502
|
+
movs r2, #12
|
|
503
|
+
b KeccakP1600_Permute
|
|
504
|
+
ENDP
|
|
505
|
+
|
|
506
|
+
; ----------------------------------------------------------------------------
|
|
507
|
+
;
|
|
508
|
+
; void KeccakP1600_Permute_24rounds( void *state )
|
|
509
|
+
;
|
|
510
|
+
ALIGN
|
|
511
|
+
EXPORT KeccakP1600_Permute_24rounds
|
|
512
|
+
KeccakP1600_Permute_24rounds PROC
|
|
513
|
+
adr r1, KeccakP1600_Permute_RoundConstants24
|
|
514
|
+
movs r2, #24
|
|
515
|
+
b KeccakP1600_Permute
|
|
516
|
+
ENDP
|
|
517
|
+
|
|
518
|
+
ALIGN
|
|
519
|
+
KeccakP1600_Permute_RoundConstants24
|
|
520
|
+
dcq 0x0000000000000001
|
|
521
|
+
dcq 0x0000000000008082
|
|
522
|
+
dcq 0x800000000000808a
|
|
523
|
+
dcq 0x8000000080008000
|
|
524
|
+
dcq 0x000000000000808b
|
|
525
|
+
dcq 0x0000000080000001
|
|
526
|
+
dcq 0x8000000080008081
|
|
527
|
+
dcq 0x8000000000008009
|
|
528
|
+
dcq 0x000000000000008a
|
|
529
|
+
dcq 0x0000000000000088
|
|
530
|
+
dcq 0x0000000080008009
|
|
531
|
+
dcq 0x000000008000000a
|
|
532
|
+
KeccakP1600_Permute_RoundConstants12
|
|
533
|
+
dcq 0x000000008000808b
|
|
534
|
+
dcq 0x800000000000008b
|
|
535
|
+
dcq 0x8000000000008089
|
|
536
|
+
dcq 0x8000000000008003
|
|
537
|
+
dcq 0x8000000000008002
|
|
538
|
+
dcq 0x8000000000000080
|
|
539
|
+
dcq 0x000000000000800a
|
|
540
|
+
dcq 0x800000008000000a
|
|
541
|
+
dcq 0x8000000080008081
|
|
542
|
+
dcq 0x8000000000008080
|
|
543
|
+
dcq 0x0000000080000001
|
|
544
|
+
dcq 0x8000000080008008
|
|
545
|
+
KeccakP1600_Permute_RoundConstants0
|
|
546
|
+
|
|
547
|
+
ALIGN
|
|
548
|
+
KeccakP1600_XORandPermuteAsmOnly PROC
|
|
549
|
+
|
|
550
|
+
add pc, pc, r5, LSL #3
|
|
551
|
+
mov r1, #0 ; dummy instruction for PC alignment, not executed
|
|
552
|
+
veor.64 d0, d0, d30
|
|
553
|
+
b KeccakP1600_PermuteAsmOnly
|
|
554
|
+
veor.64 d2, d2, d30
|
|
555
|
+
b KeccakP1600_PermuteAsmOnly
|
|
556
|
+
veor.64 d4, d4, d30
|
|
557
|
+
b KeccakP1600_PermuteAsmOnly
|
|
558
|
+
veor.64 d6, d6, d30
|
|
559
|
+
b KeccakP1600_PermuteAsmOnly
|
|
560
|
+
veor.64 d8, d8, d30
|
|
561
|
+
b KeccakP1600_PermuteAsmOnly
|
|
562
|
+
|
|
563
|
+
veor.64 d1, d1, d30
|
|
564
|
+
b KeccakP1600_PermuteAsmOnly
|
|
565
|
+
veor.64 d3, d3, d30
|
|
566
|
+
b KeccakP1600_PermuteAsmOnly
|
|
567
|
+
veor.64 d5, d5, d30
|
|
568
|
+
b KeccakP1600_PermuteAsmOnly
|
|
569
|
+
veor.64 d7, d7, d30
|
|
570
|
+
b KeccakP1600_PermuteAsmOnly
|
|
571
|
+
veor.64 d9, d9, d30
|
|
572
|
+
b KeccakP1600_PermuteAsmOnly
|
|
573
|
+
|
|
574
|
+
veor.64 d10, d10, d30
|
|
575
|
+
b KeccakP1600_PermuteAsmOnly
|
|
576
|
+
veor.64 d12, d12, d30
|
|
577
|
+
b KeccakP1600_PermuteAsmOnly
|
|
578
|
+
veor.64 d14, d14, d30
|
|
579
|
+
b KeccakP1600_PermuteAsmOnly
|
|
580
|
+
veor.64 d16, d16, d30
|
|
581
|
+
b KeccakP1600_PermuteAsmOnly
|
|
582
|
+
veor.64 d18, d18, d30
|
|
583
|
+
b KeccakP1600_PermuteAsmOnly
|
|
584
|
+
|
|
585
|
+
veor.64 d11, d11, d30
|
|
586
|
+
b KeccakP1600_PermuteAsmOnly
|
|
587
|
+
veor.64 d13, d13, d30
|
|
588
|
+
b KeccakP1600_PermuteAsmOnly
|
|
589
|
+
veor.64 d15, d15, d30
|
|
590
|
+
b KeccakP1600_PermuteAsmOnly
|
|
591
|
+
veor.64 d17, d17, d30
|
|
592
|
+
b KeccakP1600_PermuteAsmOnly
|
|
593
|
+
veor.64 d19, d19, d30
|
|
594
|
+
b KeccakP1600_PermuteAsmOnly
|
|
595
|
+
|
|
596
|
+
veor.64 d20, d20, d30
|
|
597
|
+
b KeccakP1600_PermuteAsmOnly
|
|
598
|
+
veor.64 d21, d21, d30
|
|
599
|
+
b KeccakP1600_PermuteAsmOnly
|
|
600
|
+
veor.64 d22, d22, d30
|
|
601
|
+
b KeccakP1600_PermuteAsmOnly
|
|
602
|
+
veor.64 d23, d23, d30
|
|
603
|
+
b KeccakP1600_PermuteAsmOnly
|
|
604
|
+
veor.64 d24, d24, d30
|
|
605
|
+
KeccakP1600_PermuteAsmOnly
|
|
606
|
+
KeccakP1600_Permute_RoundLoop
|
|
607
|
+
KeccakRound
|
|
608
|
+
if LoopUnroll > 1
|
|
609
|
+
KeccakRound
|
|
610
|
+
if LoopUnroll > 2
|
|
611
|
+
KeccakRound
|
|
612
|
+
KeccakRound
|
|
613
|
+
if LoopUnroll > 4
|
|
614
|
+
KeccakRound
|
|
615
|
+
KeccakRound
|
|
616
|
+
if LoopUnroll > 6
|
|
617
|
+
KeccakRound
|
|
618
|
+
KeccakRound
|
|
619
|
+
KeccakRound
|
|
620
|
+
KeccakRound
|
|
621
|
+
KeccakRound
|
|
622
|
+
KeccakRound
|
|
623
|
+
endif
|
|
624
|
+
endif
|
|
625
|
+
endif
|
|
626
|
+
endif
|
|
627
|
+
subs r2, #LoopUnroll
|
|
628
|
+
bne KeccakP1600_Permute_RoundLoop
|
|
629
|
+
bx lr
|
|
630
|
+
ENDP
|
|
631
|
+
|
|
632
|
+
;----------------------------------------------------------------------------
|
|
633
|
+
;
|
|
634
|
+
; void KeccakP1600_Permute( void *state, void *roundConstants, unsigned int numberOfRounds )
|
|
635
|
+
;
|
|
636
|
+
ALIGN
|
|
637
|
+
EXPORT KeccakP1600_Permute
|
|
638
|
+
KeccakP1600_Permute PROC
|
|
639
|
+
mov r3, lr
|
|
640
|
+
vpush {q4-q7}
|
|
641
|
+
LoadState
|
|
642
|
+
bl KeccakP1600_PermuteAsmOnly
|
|
643
|
+
StoreState
|
|
644
|
+
vpop {q4-q7}
|
|
645
|
+
bx r3
|
|
646
|
+
ENDP
|
|
647
|
+
|
|
648
|
+
if FastLoop != 0
|
|
649
|
+
|
|
650
|
+
;----------------------------------------------------------------------------
|
|
651
|
+
;
|
|
652
|
+
; size_t KeccakF1600_FastLoop_Absorb( void *state, unsigned int laneCount, unsigned char *data,
|
|
653
|
+
; size_t dataByteLen, unsigned char trailingBits )
|
|
654
|
+
;
|
|
655
|
+
ALIGN
|
|
656
|
+
EXPORT KeccakF1600_FastLoop_Absorb
|
|
657
|
+
KeccakF1600_FastLoop_Absorb PROC
|
|
658
|
+
push {r4-r8,lr} ; 6 CPU registers (24 bytes)
|
|
659
|
+
lsr r3, r3, #3 ; r3 nbrLanes = dataByteLen / SnP_laneLengthInBytes
|
|
660
|
+
mov r6, r2 ; r6 data pointer
|
|
661
|
+
subs r3, r3, r1 ; if (nbrLanes >= laneCount)
|
|
662
|
+
mov r4, r2 ; r4 initial data pointer
|
|
663
|
+
bcc KeccakF1600_FastLoop_Absorb_Exit
|
|
664
|
+
mov r5, r1
|
|
665
|
+
vpush {q4-q7} ; 4 quad registers (64 bytes)
|
|
666
|
+
LoadState
|
|
667
|
+
|
|
668
|
+
sub sp, sp, #8 ; alloc space for trailingBits lane
|
|
669
|
+
veor.64 d30, d30, d30
|
|
670
|
+
add r7, sp, #(6+16+2)*4
|
|
671
|
+
vld1.8 {d30[0]}, [r7]
|
|
672
|
+
vst1.64 {d30}, [sp:64]
|
|
673
|
+
|
|
674
|
+
cmp r5, #21
|
|
675
|
+
bne KeccakF1600_FastLoop_Absorb_Not21Lanes
|
|
676
|
+
KeccakF1600_FastLoop_Absorb_Loop21Lanes
|
|
677
|
+
vld1.64 { d26, d27, d28, d29 }, [r6]! ; XOR first 21 lanes
|
|
678
|
+
veor.64 d0, d0, d26
|
|
679
|
+
veor.64 d2, d2, d27
|
|
680
|
+
veor.64 d4, d4, d28
|
|
681
|
+
veor.64 d6, d6, d29
|
|
682
|
+
vld1.64 { d26, d27, d28, d29 }, [r6]!
|
|
683
|
+
veor.64 d8, d8, d26
|
|
684
|
+
veor.64 d1, d1, d27
|
|
685
|
+
veor.64 d3, d3, d28
|
|
686
|
+
veor.64 d5, d5, d29
|
|
687
|
+
vld1.64 { d26, d27, d28, d29 }, [r6]!
|
|
688
|
+
veor.64 d7, d7, d26
|
|
689
|
+
veor.64 d9, d9, d27
|
|
690
|
+
veor.64 d10, d10, d28
|
|
691
|
+
veor.64 d12, d12, d29
|
|
692
|
+
vld1.64 { d26, d27, d28, d29 }, [r6]!
|
|
693
|
+
veor.64 d14, d14, d26
|
|
694
|
+
veor.64 d16, d16, d27
|
|
695
|
+
veor.64 d18, d18, d28
|
|
696
|
+
veor.64 d11, d11, d29
|
|
697
|
+
vld1.64 { d26, d27, d28, d29 }, [r6]!
|
|
698
|
+
veor.64 d13, d13, d26
|
|
699
|
+
veor.64 d15, d15, d27
|
|
700
|
+
veor.64 d17, d17, d28
|
|
701
|
+
veor.64 d19, d19, d29
|
|
702
|
+
vld1.64 { d26 }, [r6]!
|
|
703
|
+
veor.64 d20, d20, d26
|
|
704
|
+
|
|
705
|
+
vld1.64 {d30}, [sp:64] ; xor trailingBits
|
|
706
|
+
veor.64 d21, d21, d30
|
|
707
|
+
bl KeccakP1600_PermuteAsmOnly
|
|
708
|
+
subs r3, r3, r5 ; nbrLanes -= laneCount
|
|
709
|
+
bcs KeccakF1600_FastLoop_Absorb_Loop21Lanes
|
|
710
|
+
KeccakF1600_FastLoop_Absorb_Done
|
|
711
|
+
add sp, sp, #8 ; free trailingBits lane
|
|
712
|
+
StoreState
|
|
713
|
+
vpop {q4-q7}
|
|
714
|
+
KeccakF1600_FastLoop_Absorb_Exit
|
|
715
|
+
sub r0, r6, r4 ; processed = data pointer - initial data pointer
|
|
716
|
+
pop {r4-r8,pc}
|
|
717
|
+
KeccakF1600_FastLoop_Absorb_Not21Lanes
|
|
718
|
+
cmp r5, #16
|
|
719
|
+
mvn r7, #7 ; r7 = -8
|
|
720
|
+
blo KeccakF1600_FastLoop_Absorb_LoopLessThan16Lanes
|
|
721
|
+
KeccakF1600_FastLoop_Absorb_Loop16OrMoreLanes
|
|
722
|
+
vld1.64 { d26, d27, d28, d29 }, [r6]! ; XOR first 16 lanes
|
|
723
|
+
veor.64 d0, d0, d26
|
|
724
|
+
veor.64 d2, d2, d27
|
|
725
|
+
veor.64 d4, d4, d28
|
|
726
|
+
veor.64 d6, d6, d29
|
|
727
|
+
vld1.64 { d26, d27, d28, d29 }, [r6]!
|
|
728
|
+
veor.64 d8, d8, d26
|
|
729
|
+
veor.64 d1, d1, d27
|
|
730
|
+
veor.64 d3, d3, d28
|
|
731
|
+
veor.64 d5, d5, d29
|
|
732
|
+
vld1.64 { d26, d27, d28, d29 }, [r6]!
|
|
733
|
+
veor.64 d7, d7, d26
|
|
734
|
+
veor.64 d9, d9, d27
|
|
735
|
+
veor.64 d10, d10, d28
|
|
736
|
+
veor.64 d12, d12, d29
|
|
737
|
+
vld1.64 { d26, d27, d28, d29 }, [r6]!
|
|
738
|
+
veor.64 d14, d14, d26
|
|
739
|
+
veor.64 d16, d16, d27
|
|
740
|
+
veor.64 d18, d18, d28
|
|
741
|
+
veor.64 d11, d11, d29
|
|
742
|
+
|
|
743
|
+
sub r2, r5, #16 ; XOR last n lanes, maximum 9
|
|
744
|
+
rsb r1, r2, #9
|
|
745
|
+
add r6, r6, r2, LSL #3 ; data += n lanes * 8
|
|
746
|
+
sub r2, r6, #8 ; r2 tempdata = data - 8
|
|
747
|
+
add pc, pc, r1, LSL #3
|
|
748
|
+
mov r1, #0 ; dummy instruction for PC alignment, not executed
|
|
749
|
+
vld1.64 d30, [r2], r7
|
|
750
|
+
veor.64 d24, d24, d30
|
|
751
|
+
vld1.64 d30, [r2], r7
|
|
752
|
+
veor.64 d23, d23, d30
|
|
753
|
+
vld1.64 d30, [r2], r7
|
|
754
|
+
veor.64 d22, d22, d30
|
|
755
|
+
vld1.64 d30, [r2], r7
|
|
756
|
+
veor.64 d21, d21, d30
|
|
757
|
+
vld1.64 d30, [r2], r7
|
|
758
|
+
veor.64 d20, d20, d30
|
|
759
|
+
|
|
760
|
+
vld1.64 d30, [r2], r7
|
|
761
|
+
veor.64 d19, d19, d30
|
|
762
|
+
vld1.64 d30, [r2], r7
|
|
763
|
+
veor.64 d17, d17, d30
|
|
764
|
+
vld1.64 d30, [r2], r7
|
|
765
|
+
veor.64 d15, d15, d30
|
|
766
|
+
vld1.64 d30, [r2], r7
|
|
767
|
+
veor.64 d13, d13, d30
|
|
768
|
+
|
|
769
|
+
vld1.64 {d30}, [sp:64]
|
|
770
|
+
bl KeccakP1600_XORandPermuteAsmOnly
|
|
771
|
+
subs r3, r3, r5 ; nbrLanes -= laneCount
|
|
772
|
+
bcs KeccakF1600_FastLoop_Absorb_Loop16OrMoreLanes
|
|
773
|
+
b KeccakF1600_FastLoop_Absorb_Done
|
|
774
|
+
KeccakF1600_FastLoop_Absorb_LoopLessThan16Lanes
|
|
775
|
+
rsb r1, r5, #15 ; XOR up to 15 lanes
|
|
776
|
+
add r6, r6, r5, LSL #3 ; data += laneCount * 8
|
|
777
|
+
sub r2, r6, #8 ; r2 tempdata = data - 8
|
|
778
|
+
add pc, pc, r1, LSL #3
|
|
779
|
+
mov r1, #0 ; dummy instruction for PC alignment, not executed
|
|
780
|
+
|
|
781
|
+
vld1.64 d30, [r2], r7
|
|
782
|
+
veor.64 d18, d18, d30
|
|
783
|
+
vld1.64 d30, [r2], r7
|
|
784
|
+
veor.64 d16, d16, d30
|
|
785
|
+
vld1.64 d30, [r2], r7
|
|
786
|
+
veor.64 d14, d14, d30
|
|
787
|
+
vld1.64 d30, [r2], r7
|
|
788
|
+
veor.64 d12, d12, d30
|
|
789
|
+
vld1.64 d30, [r2], r7
|
|
790
|
+
veor.64 d10, d10, d30
|
|
791
|
+
|
|
792
|
+
vld1.64 d30, [r2], r7
|
|
793
|
+
veor.64 d9, d9, d30
|
|
794
|
+
vld1.64 d30, [r2], r7
|
|
795
|
+
veor.64 d7, d7, d30
|
|
796
|
+
vld1.64 d30, [r2], r7
|
|
797
|
+
veor.64 d5, d5, d30
|
|
798
|
+
vld1.64 d30, [r2], r7
|
|
799
|
+
veor.64 d3, d3, d30
|
|
800
|
+
vld1.64 d30, [r2], r7
|
|
801
|
+
veor.64 d1, d1, d30
|
|
802
|
+
|
|
803
|
+
vld1.64 d30, [r2], r7
|
|
804
|
+
veor.64 d8, d8, d30
|
|
805
|
+
vld1.64 d30, [r2], r7
|
|
806
|
+
veor.64 d6, d6, d30
|
|
807
|
+
vld1.64 d30, [r2], r7
|
|
808
|
+
veor.64 d4, d4, d30
|
|
809
|
+
vld1.64 d30, [r2], r7
|
|
810
|
+
veor.64 d2, d2, d30
|
|
811
|
+
vld1.64 d30, [r2], r7
|
|
812
|
+
veor.64 d0, d0, d30
|
|
813
|
+
|
|
814
|
+
vld1.64 {d30}, [sp:64]
|
|
815
|
+
bl KeccakP1600_XORandPermuteAsmOnly
|
|
816
|
+
subs r3, r3, r5 ; nbrLanes -= laneCount
|
|
817
|
+
bcs KeccakF1600_FastLoop_Absorb_LoopLessThan16Lanes
|
|
818
|
+
b KeccakF1600_FastLoop_Absorb_Done
|
|
819
|
+
ENDP
|
|
820
|
+
|
|
821
|
+
endif
|
|
822
|
+
|
|
823
|
+
END
|