sleeping_kangaroo12 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +127 -0
- data/ext/Rakefile +73 -0
- data/ext/binding/sleeping_kangaroo12.c +39 -0
- data/ext/config/xkcp.build +17 -0
- data/ext/xkcp/LICENSE +1 -0
- data/ext/xkcp/Makefile +15 -0
- data/ext/xkcp/Makefile.build +200 -0
- data/ext/xkcp/README.markdown +296 -0
- data/ext/xkcp/lib/HighLevel.build +143 -0
- data/ext/xkcp/lib/LowLevel.build +757 -0
- data/ext/xkcp/lib/common/align.h +33 -0
- data/ext/xkcp/lib/common/brg_endian.h +143 -0
- data/ext/xkcp/lib/high/KangarooTwelve/KangarooTwelve.c +301 -0
- data/ext/xkcp/lib/high/KangarooTwelve/KangarooTwelve.h +97 -0
- data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.c +81 -0
- data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.h +125 -0
- data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.c +48 -0
- data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.h +79 -0
- data/ext/xkcp/lib/high/Keccak/KeccakDuplex.c +81 -0
- data/ext/xkcp/lib/high/Keccak/KeccakDuplex.h +73 -0
- data/ext/xkcp/lib/high/Keccak/KeccakDuplex.inc +195 -0
- data/ext/xkcp/lib/high/Keccak/KeccakSponge.c +111 -0
- data/ext/xkcp/lib/high/Keccak/KeccakSponge.h +76 -0
- data/ext/xkcp/lib/high/Keccak/KeccakSponge.inc +314 -0
- data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.c +61 -0
- data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.h +67 -0
- data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.inc +128 -0
- data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.c +93 -0
- data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.h +599 -0
- data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.inc +573 -0
- data/ext/xkcp/lib/high/Ketje/Ketjev2.c +87 -0
- data/ext/xkcp/lib/high/Ketje/Ketjev2.h +88 -0
- data/ext/xkcp/lib/high/Ketje/Ketjev2.inc +274 -0
- data/ext/xkcp/lib/high/Keyak/Keyakv2.c +132 -0
- data/ext/xkcp/lib/high/Keyak/Keyakv2.h +217 -0
- data/ext/xkcp/lib/high/Keyak/Keyakv2.inc +81 -0
- data/ext/xkcp/lib/high/Keyak/Motorist.inc +953 -0
- data/ext/xkcp/lib/high/Kravatte/Kravatte.c +533 -0
- data/ext/xkcp/lib/high/Kravatte/Kravatte.h +115 -0
- data/ext/xkcp/lib/high/Kravatte/KravatteModes.c +557 -0
- data/ext/xkcp/lib/high/Kravatte/KravatteModes.h +247 -0
- data/ext/xkcp/lib/high/Xoodyak/Cyclist.h +66 -0
- data/ext/xkcp/lib/high/Xoodyak/Cyclist.inc +336 -0
- data/ext/xkcp/lib/high/Xoodyak/Xoodyak-parameters.h +26 -0
- data/ext/xkcp/lib/high/Xoodyak/Xoodyak.c +55 -0
- data/ext/xkcp/lib/high/Xoodyak/Xoodyak.h +35 -0
- data/ext/xkcp/lib/high/Xoofff/Xoofff.c +634 -0
- data/ext/xkcp/lib/high/Xoofff/Xoofff.h +147 -0
- data/ext/xkcp/lib/high/Xoofff/XoofffModes.c +483 -0
- data/ext/xkcp/lib/high/Xoofff/XoofffModes.h +241 -0
- data/ext/xkcp/lib/high/common/Phases.h +25 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-SnP.h +41 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-armcc.s +1666 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-gcc.s +1655 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-armcc.s +1268 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-gcc.s +1264 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-armcc.s +1178 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-gcc.s +1175 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-armcc.s +1338 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-gcc.s +1336 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-armcc.s +1343 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-gcc.s +1339 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-SnP.h +42 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-armcc.s +823 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-gcc.s +831 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-SnP.h +31 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-armv8a-neon.s +540 -0
- data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-SnP.h +42 -0
- data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-compact.s +733 -0
- data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-fast.s +1121 -0
- data/ext/xkcp/lib/low/KeccakP-1600/AVX2/KeccakP-1600-AVX2.s +1100 -0
- data/ext/xkcp/lib/low/KeccakP-1600/AVX2/KeccakP-1600-SnP.h +52 -0
- data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-AVX512.c +623 -0
- data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-SnP.h +47 -0
- data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u12/KeccakP-1600-AVX512-config.h +6 -0
- data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u6/KeccakP-1600-AVX512-config.h +6 -0
- data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/ua/KeccakP-1600-AVX512-config.h +6 -0
- data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-AVX512.s +1031 -0
- data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-SnP.h +53 -0
- data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-SnP.h +44 -0
- data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-XOP.c +476 -0
- data/ext/xkcp/lib/low/KeccakP-1600/XOP/u6/KeccakP-1600-XOP-config.h +6 -0
- data/ext/xkcp/lib/low/KeccakP-1600/XOP/ua/KeccakP-1600-XOP-config.h +6 -0
- data/ext/xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-64.macros +748 -0
- data/ext/xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-unrolling.macros +305 -0
- data/ext/xkcp/lib/low/KeccakP-1600/compact/KeccakP-1600-SnP.h +40 -0
- data/ext/xkcp/lib/low/KeccakP-1600/compact/KeccakP-1600-compact64.c +420 -0
- data/ext/xkcp/lib/low/KeccakP-1600/plain-32bits-inplace/KeccakP-1600-SnP.h +43 -0
- data/ext/xkcp/lib/low/KeccakP-1600/plain-32bits-inplace/KeccakP-1600-inplace32BI.c +1163 -0
- data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/KeccakP-1600-SnP.h +54 -0
- data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/KeccakP-1600-opt64.c +565 -0
- data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcu6/KeccakP-1600-opt64-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua/KeccakP-1600-opt64-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua-shld/KeccakP-1600-opt64-config.h +8 -0
- data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/u6/KeccakP-1600-opt64-config.h +6 -0
- data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/ua/KeccakP-1600-opt64-config.h +6 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-SnP.h +44 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference.h +23 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference32BI.c +625 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-SnP.h +44 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.c +440 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.h +23 -0
- data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-SnP.h +42 -0
- data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas.s +1196 -0
- data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas_Apple.s +1124 -0
- data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-shld-gas.s +1196 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-armcc.s +1392 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-gcc.s +1394 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-times2-SnP.h +42 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u12/SIMD512-2-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u4/SIMD512-2-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512ufull/SIMD512-2-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SIMD512.c +850 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SnP.h +51 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SIMD128.c +957 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SnP.h +49 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-u2/SIMD128-config.h +8 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-ua/SIMD128-config.h +8 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-u2/SIMD128-config.h +9 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-ua/SIMD128-config.h +9 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-on1.c +37 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SIMD256.c +1321 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SnP.h +55 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u12/SIMD256-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u6/SIMD256-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/ua/SIMD256-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u12/SIMD512-4-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u4/SIMD512-4-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512ufull/SIMD512-4-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SIMD512.c +881 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SnP.h +51 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-on1.c +37 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-on2.c +38 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SIMD512.c +1615 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SnP.h +57 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u12/SIMD512-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u4/SIMD512-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/ua/SIMD512-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-on1.c +37 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-on2.c +38 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-on4.c +38 -0
- data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-SnP.h +41 -0
- data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-armcc.s +442 -0
- data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-gcc.s +446 -0
- data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-armcc.s +419 -0
- data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-gcc.s +427 -0
- data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-SnP.h +41 -0
- data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-avr8-fast.s +647 -0
- data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-SnP.h +39 -0
- data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-compact.c +190 -0
- data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-SnP.h +43 -0
- data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.c +412 -0
- data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.h +23 -0
- data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-SnP.h +41 -0
- data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-armcc.s +454 -0
- data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-gcc.s +458 -0
- data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-armcc.s +455 -0
- data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-gcc.s +458 -0
- data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-SnP.h +41 -0
- data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-avr8-fast.s +728 -0
- data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-SnP.h +43 -0
- data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.c +414 -0
- data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.h +23 -0
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-SnP.h +42 -0
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-armcc.s +527 -0
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-gcc.s +533 -0
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-armcc.s +528 -0
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-gcc.s +534 -0
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-armcc.s +521 -0
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-gcc.s +527 -0
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-armcc.s +517 -0
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-gcc.s +523 -0
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-armcc.s +550 -0
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-gcc.s +556 -0
- data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-SnP.h +32 -0
- data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-armv8a-neon.s +432 -0
- data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-SnP.h +42 -0
- data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-avr8-fast.s +929 -0
- data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-SnP.h +40 -0
- data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-compact.c +244 -0
- data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-SnP.h +46 -0
- data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32-bis.macros +184 -0
- data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.c +454 -0
- data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.macros +459 -0
- data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling-bis.macros +83 -0
- data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling.macros +88 -0
- data/ext/xkcp/lib/low/KeccakP-800/plain/lcu2/KeccakP-800-opt32-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-800/plain/lcua/KeccakP-800-opt32-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-800/plain/u2/KeccakP-800-opt32-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-800/plain/ua/KeccakP-800-opt32-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-SnP.h +44 -0
- data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.c +437 -0
- data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.h +23 -0
- data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/Ket.h +57 -0
- data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-armcc.s +475 -0
- data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-gcc.s +480 -0
- data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-armcc.s +590 -0
- data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-gcc.s +590 -0
- data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.c +126 -0
- data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.h +68 -0
- data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.inc +174 -0
- data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.c +80 -0
- data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.h +68 -0
- data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.inc +142 -0
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-SnP.h +55 -0
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-armcc.s +1086 -0
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-gcc.s +1092 -0
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-armcc.s +721 -0
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-gcc.s +726 -0
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-armcc.s +723 -0
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-gcc.s +729 -0
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-armcc.s +1164 -0
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-gcc.s +1165 -0
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-armcc.s +562 -0
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-gcc.s +563 -0
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-armcc.s +563 -0
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-gcc.s +565 -0
- data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-SnP.h +55 -0
- data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-armcc.s +476 -0
- data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-gcc.s +485 -0
- data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-armcc.s +362 -0
- data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-gcc.s +367 -0
- data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-SnP.h +43 -0
- data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-avr8-u1.s +1341 -0
- data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SIMD512.c +581 -0
- data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SnP.h +58 -0
- data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodyak-full-block-SIMD512.c +332 -0
- data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SIMD128.c +329 -0
- data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SnP.h +53 -0
- data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodyak-full-block-SIMD128.c +355 -0
- data/ext/xkcp/lib/low/Xoodoo/Xoodoo.h +79 -0
- data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-SnP.h +56 -0
- data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-optimized.c +399 -0
- data/ext/xkcp/lib/low/Xoodoo/plain/Xoodyak-full-blocks.c +127 -0
- data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-SnP.h +43 -0
- data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-reference.c +253 -0
- data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SIMD512.c +1044 -0
- data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SnP.h +49 -0
- data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-SnP.h +45 -0
- data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-on1.c +37 -0
- data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-ARMv7A.s +1587 -0
- data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-SnP.h +48 -0
- data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SIMD512.c +1202 -0
- data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SnP.h +48 -0
- data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SIMD128.c +484 -0
- data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SnP.h +44 -0
- data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-SnP.h +45 -0
- data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-on1.c +37 -0
- data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SIMD256.c +939 -0
- data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SnP.h +49 -0
- data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SIMD512.c +1216 -0
- data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SnP.h +48 -0
- data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-SnP.h +45 -0
- data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-on1.c +37 -0
- data/ext/xkcp/lib/low/common/PlSnP-Fallback.inc +290 -0
- data/ext/xkcp/lib/low/common/SnP-Relaned.h +141 -0
- data/ext/xkcp/support/Build/ExpandProducts.xsl +79 -0
- data/ext/xkcp/support/Build/ToGlobalMakefile.xsl +206 -0
- data/ext/xkcp/support/Build/ToOneTarget.xsl +89 -0
- data/ext/xkcp/support/Build/ToTargetConfigFile.xsl +37 -0
- data/ext/xkcp/support/Build/ToTargetMakefile.xsl +298 -0
- data/ext/xkcp/support/Build/ToVCXProj.xsl +198 -0
- data/ext/xkcp/support/Kernel-PMU/Kernel-pmu.md +133 -0
- data/ext/xkcp/support/Kernel-PMU/Makefile +8 -0
- data/ext/xkcp/support/Kernel-PMU/enable_arm_pmu.c +129 -0
- data/ext/xkcp/support/Kernel-PMU/load-module +1 -0
- data/ext/xkcp/util/KeccakSum/KeccakSum.c +394 -0
- data/ext/xkcp/util/KeccakSum/base64.c +86 -0
- data/ext/xkcp/util/KeccakSum/base64.h +12 -0
- data/lib/sleeping_kangaroo12/binding.rb +15 -0
- data/lib/sleeping_kangaroo12/build/loader.rb +40 -0
- data/lib/sleeping_kangaroo12/build/platform.rb +37 -0
- data/lib/sleeping_kangaroo12/build.rb +4 -0
- data/lib/sleeping_kangaroo12/digest.rb +103 -0
- data/lib/sleeping_kangaroo12/version.rb +5 -0
- data/lib/sleeping_kangaroo12.rb +7 -0
- metadata +372 -0
|
@@ -0,0 +1,1031 @@
|
|
|
1
|
+
# The eXtended Keccak Code Package (XKCP)
|
|
2
|
+
# https://github.com/XKCP/XKCP
|
|
3
|
+
#
|
|
4
|
+
# Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org>
|
|
5
|
+
# Copyright (c) 2018 Ronny Van Keer
|
|
6
|
+
# All rights reserved.
|
|
7
|
+
#
|
|
8
|
+
# The source code in this file is licensed under the CRYPTOGAMS license.
|
|
9
|
+
# For further details see http://www.openssl.org/~appro/cryptogams/.
|
|
10
|
+
#
|
|
11
|
+
# Notes:
|
|
12
|
+
# The code for the permutation (__KeccakF1600) was generated with
|
|
13
|
+
# Andy Polyakov's keccak1600-avx512.pl from the CRYPTOGAMS project
|
|
14
|
+
# (https://github.com/dot-asm/cryptogams/blob/master/x86_64/keccak1600-avx512.pl).
|
|
15
|
+
# The rest of the code was written by Ronny Van Keer.
|
|
16
|
+
# Adaptations for macOS by Stéphane Léon.
|
|
17
|
+
# Adaptations for mingw-w64 (changes macOS too) by Jorrit Jongma.
|
|
18
|
+
|
|
19
|
+
.text
|
|
20
|
+
|
|
21
|
+
# -----------------------------------------------------------------------------
|
|
22
|
+
#
|
|
23
|
+
# void KeccakP1600_Initialize(void *state);
|
|
24
|
+
#
|
|
25
|
+
.globl KeccakP1600_Initialize
|
|
26
|
+
.globl _KeccakP1600_Initialize
|
|
27
|
+
.ifndef old_gas_syntax
|
|
28
|
+
.type KeccakP1600_Initialize,@function
|
|
29
|
+
.endif
|
|
30
|
+
KeccakP1600_Initialize:
|
|
31
|
+
_KeccakP1600_Initialize:
|
|
32
|
+
.balign 32
|
|
33
|
+
vpxorq %zmm0,%zmm0,%zmm0
|
|
34
|
+
vmovdqu64 %zmm0,0*64(%rdi)
|
|
35
|
+
vmovdqu64 %zmm0,1*64(%rdi)
|
|
36
|
+
vmovdqu64 %zmm0,2*64(%rdi)
|
|
37
|
+
movq $0,3*64(%rdi)
|
|
38
|
+
ret
|
|
39
|
+
.ifndef old_gas_syntax
|
|
40
|
+
.size KeccakP1600_Initialize,.-KeccakP1600_Initialize
|
|
41
|
+
.endif
|
|
42
|
+
|
|
43
|
+
# -----------------------------------------------------------------------------
|
|
44
|
+
#
|
|
45
|
+
# void KeccakP1600_AddByte(void *state, unsigned char data, unsigned int offset);
|
|
46
|
+
# %rdi %rsi %rdx
|
|
47
|
+
#!!
|
|
48
|
+
#.globl KeccakP1600_AddByte
|
|
49
|
+
#.type KeccakP1600_AddByte,@function
|
|
50
|
+
#.align 32
|
|
51
|
+
#KeccakP1600_AddByte:
|
|
52
|
+
# mov %rdx, %rax
|
|
53
|
+
# and $7, %rax
|
|
54
|
+
# and $0xFFFFFFF8, %edx
|
|
55
|
+
# mov mapState(%rdx), %rdx
|
|
56
|
+
# add %rdx, %rdi
|
|
57
|
+
# add %rax, %rdi
|
|
58
|
+
# xorb %sil, (%rdi)
|
|
59
|
+
# ret
|
|
60
|
+
#.size KeccakP1600_AddByte,.-KeccakP1600_AddByte
|
|
61
|
+
|
|
62
|
+
# -----------------------------------------------------------------------------
|
|
63
|
+
#
|
|
64
|
+
# void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
|
|
65
|
+
# %rdi %rsi %rdx %rcx
|
|
66
|
+
#
|
|
67
|
+
.globl KeccakP1600_AddBytes
|
|
68
|
+
.globl _KeccakP1600_AddBytes
|
|
69
|
+
.ifndef old_gas_syntax
|
|
70
|
+
.type KeccakP1600_AddBytes,@function
|
|
71
|
+
.endif
|
|
72
|
+
KeccakP1600_AddBytes:
|
|
73
|
+
_KeccakP1600_AddBytes:
|
|
74
|
+
.balign 32
|
|
75
|
+
cmp $0, %rcx
|
|
76
|
+
jz KeccakP1600_AddBytes_Exit
|
|
77
|
+
add %rdx, %rdi # state += offset
|
|
78
|
+
and $7, %rdx
|
|
79
|
+
jz KeccakP1600_AddBytes_LaneAlignedCheck
|
|
80
|
+
mov $8, %r9 # r9 is (max) length of incomplete lane
|
|
81
|
+
sub %rdx, %r9
|
|
82
|
+
cmp %rcx, %r9
|
|
83
|
+
cmovae %rcx, %r9
|
|
84
|
+
sub %r9, %rcx # length -= length of incomplete lane
|
|
85
|
+
KeccakP1600_AddBytes_NotAlignedLoop:
|
|
86
|
+
mov (%rsi), %r8b
|
|
87
|
+
inc %rsi
|
|
88
|
+
xorb %r8b, (%rdi)
|
|
89
|
+
inc %rdi
|
|
90
|
+
dec %r9
|
|
91
|
+
jnz KeccakP1600_AddBytes_NotAlignedLoop
|
|
92
|
+
jmp KeccakP1600_AddBytes_LaneAlignedCheck
|
|
93
|
+
KeccakP1600_AddBytes_LaneAlignedLoop:
|
|
94
|
+
mov (%rsi), %r8
|
|
95
|
+
add $8, %rsi
|
|
96
|
+
xor %r8, (%rdi)
|
|
97
|
+
add $8, %rdi
|
|
98
|
+
KeccakP1600_AddBytes_LaneAlignedCheck:
|
|
99
|
+
sub $8, %rcx
|
|
100
|
+
jnc KeccakP1600_AddBytes_LaneAlignedLoop
|
|
101
|
+
KeccakP1600_AddBytes_LastIncompleteLane:
|
|
102
|
+
add $8, %rcx
|
|
103
|
+
jz KeccakP1600_AddBytes_Exit
|
|
104
|
+
KeccakP1600_AddBytes_LastIncompleteLaneLoop:
|
|
105
|
+
mov (%rsi), %r8b
|
|
106
|
+
inc %rsi
|
|
107
|
+
xor %r8b, (%rdi)
|
|
108
|
+
inc %rdi
|
|
109
|
+
dec %rcx
|
|
110
|
+
jnz KeccakP1600_AddBytes_LastIncompleteLaneLoop
|
|
111
|
+
KeccakP1600_AddBytes_Exit:
|
|
112
|
+
ret
|
|
113
|
+
.ifndef old_gas_syntax
|
|
114
|
+
.size KeccakP1600_AddBytes,.-KeccakP1600_AddBytes
|
|
115
|
+
.endif
|
|
116
|
+
|
|
117
|
+
# -----------------------------------------------------------------------------
|
|
118
|
+
#
|
|
119
|
+
# void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
|
|
120
|
+
# %rdi %rsi %rdx %rcx
|
|
121
|
+
#
|
|
122
|
+
.globl KeccakP1600_OverwriteBytes
|
|
123
|
+
.globl _KeccakP1600_OverwriteBytes
|
|
124
|
+
.ifndef old_gas_syntax
|
|
125
|
+
.type KeccakP1600_OverwriteBytes,@function
|
|
126
|
+
.endif
|
|
127
|
+
KeccakP1600_OverwriteBytes:
|
|
128
|
+
_KeccakP1600_OverwriteBytes:
|
|
129
|
+
.balign 32
|
|
130
|
+
cmp $0, %rcx
|
|
131
|
+
jz KeccakP1600_OverwriteBytes_Exit
|
|
132
|
+
add %rdx, %rdi # state += offset
|
|
133
|
+
and $7, %rdx
|
|
134
|
+
jz KeccakP1600_OverwriteBytes_LaneAlignedCheck
|
|
135
|
+
mov $8, %r9 # r9 is (max) length of incomplete lane
|
|
136
|
+
sub %rdx, %r9
|
|
137
|
+
cmp %rcx, %r9
|
|
138
|
+
cmovae %rcx, %r9
|
|
139
|
+
sub %r9, %rcx # length -= length of incomplete lane
|
|
140
|
+
KeccakP1600_OverwriteBytes_NotAlignedLoop:
|
|
141
|
+
mov (%rsi), %r8b
|
|
142
|
+
inc %rsi
|
|
143
|
+
mov %r8b, (%rdi)
|
|
144
|
+
inc %rdi
|
|
145
|
+
dec %r9
|
|
146
|
+
jnz KeccakP1600_OverwriteBytes_NotAlignedLoop
|
|
147
|
+
jmp KeccakP1600_OverwriteBytes_LaneAlignedCheck
|
|
148
|
+
KeccakP1600_OverwriteBytes_LaneAlignedLoop:
|
|
149
|
+
mov (%rsi), %r8
|
|
150
|
+
add $8, %rsi
|
|
151
|
+
mov %r8, (%rdi)
|
|
152
|
+
add $8, %rdi
|
|
153
|
+
KeccakP1600_OverwriteBytes_LaneAlignedCheck:
|
|
154
|
+
sub $8, %rcx
|
|
155
|
+
jnc KeccakP1600_OverwriteBytes_LaneAlignedLoop
|
|
156
|
+
KeccakP1600_OverwriteBytes_LastIncompleteLane:
|
|
157
|
+
add $8, %rcx
|
|
158
|
+
jz KeccakP1600_OverwriteBytes_Exit
|
|
159
|
+
KeccakP1600_OverwriteBytes_LastIncompleteLaneLoop:
|
|
160
|
+
mov (%rsi), %r8b
|
|
161
|
+
inc %rsi
|
|
162
|
+
mov %r8b, (%rdi)
|
|
163
|
+
inc %rdi
|
|
164
|
+
dec %rcx
|
|
165
|
+
jnz KeccakP1600_OverwriteBytes_LastIncompleteLaneLoop
|
|
166
|
+
KeccakP1600_OverwriteBytes_Exit:
|
|
167
|
+
ret
|
|
168
|
+
.ifndef old_gas_syntax
|
|
169
|
+
.size KeccakP1600_OverwriteBytes,.-KeccakP1600_OverwriteBytes
|
|
170
|
+
.endif
|
|
171
|
+
|
|
172
|
+
# -----------------------------------------------------------------------------
|
|
173
|
+
#
|
|
174
|
+
# void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount);
|
|
175
|
+
# %rdi %rsi
|
|
176
|
+
#
|
|
177
|
+
.globl KeccakP1600_OverwriteWithZeroes
|
|
178
|
+
.globl _KeccakP1600_OverwriteWithZeroes
|
|
179
|
+
.ifndef old_gas_syntax
|
|
180
|
+
.type KeccakP1600_OverwriteWithZeroes,@function
|
|
181
|
+
.endif
|
|
182
|
+
KeccakP1600_OverwriteWithZeroes:
|
|
183
|
+
_KeccakP1600_OverwriteWithZeroes:
|
|
184
|
+
.balign 32
|
|
185
|
+
cmp $0, %rsi
|
|
186
|
+
jz KeccakP1600_OverwriteWithZeroes_Exit
|
|
187
|
+
jmp KeccakP1600_OverwriteWithZeroes_LaneAlignedCheck
|
|
188
|
+
KeccakP1600_OverwriteWithZeroes_LaneAlignedLoop:
|
|
189
|
+
movq $0, (%rdi)
|
|
190
|
+
add $8, %rdi
|
|
191
|
+
KeccakP1600_OverwriteWithZeroes_LaneAlignedCheck:
|
|
192
|
+
sub $8, %rsi
|
|
193
|
+
jnc KeccakP1600_OverwriteWithZeroes_LaneAlignedLoop
|
|
194
|
+
KeccakP1600_OverwriteWithZeroes_LastIncompleteLane:
|
|
195
|
+
add $8, %rsi
|
|
196
|
+
jz KeccakP1600_OverwriteWithZeroes_Exit
|
|
197
|
+
KeccakP1600_OverwriteWithZeroes_LastIncompleteLaneLoop:
|
|
198
|
+
movb $0, (%rdi)
|
|
199
|
+
inc %rdi
|
|
200
|
+
dec %rsi
|
|
201
|
+
jnz KeccakP1600_OverwriteWithZeroes_LastIncompleteLaneLoop
|
|
202
|
+
KeccakP1600_OverwriteWithZeroes_Exit:
|
|
203
|
+
ret
|
|
204
|
+
.ifndef old_gas_syntax
|
|
205
|
+
.size KeccakP1600_OverwriteWithZeroes,.-KeccakP1600_OverwriteWithZeroes
|
|
206
|
+
.endif
|
|
207
|
+
|
|
208
|
+
# -----------------------------------------------------------------------------
|
|
209
|
+
#
|
|
210
|
+
# void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length);
|
|
211
|
+
# %rdi %rsi %rdx %rcx
|
|
212
|
+
#
|
|
213
|
+
.globl KeccakP1600_ExtractBytes
|
|
214
|
+
.globl _KeccakP1600_ExtractBytes
|
|
215
|
+
.ifndef old_gas_syntax
|
|
216
|
+
.type KeccakP1600_ExtractBytes,@function
|
|
217
|
+
.endif
|
|
218
|
+
KeccakP1600_ExtractBytes:
|
|
219
|
+
_KeccakP1600_ExtractBytes:
|
|
220
|
+
.balign 32
|
|
221
|
+
cmp $0, %rcx
|
|
222
|
+
jz KeccakP1600_ExtractBytes_Exit
|
|
223
|
+
add %rdx, %rdi # state += offset
|
|
224
|
+
and $7, %rdx
|
|
225
|
+
jz KeccakP1600_ExtractBytes_LaneAlignedCheck
|
|
226
|
+
mov $8, %rax # rax is (max) length of incomplete lane
|
|
227
|
+
sub %rdx, %rax
|
|
228
|
+
cmp %rcx, %rax
|
|
229
|
+
cmovae %rcx, %rax
|
|
230
|
+
sub %rax, %rcx # length -= length of incomplete lane
|
|
231
|
+
KeccakP1600_ExtractBytes_NotAlignedLoop:
|
|
232
|
+
mov (%rdi), %r8b
|
|
233
|
+
inc %rdi
|
|
234
|
+
mov %r8b, (%rsi)
|
|
235
|
+
inc %rsi
|
|
236
|
+
dec %rax
|
|
237
|
+
jnz KeccakP1600_ExtractBytes_NotAlignedLoop
|
|
238
|
+
jmp KeccakP1600_ExtractBytes_LaneAlignedCheck
|
|
239
|
+
KeccakP1600_ExtractBytes_LaneAlignedLoop:
|
|
240
|
+
mov (%rdi), %r8
|
|
241
|
+
add $8, %rdi
|
|
242
|
+
mov %r8, (%rsi)
|
|
243
|
+
add $8, %rsi
|
|
244
|
+
KeccakP1600_ExtractBytes_LaneAlignedCheck:
|
|
245
|
+
sub $8, %rcx
|
|
246
|
+
jnc KeccakP1600_ExtractBytes_LaneAlignedLoop
|
|
247
|
+
KeccakP1600_ExtractBytes_LastIncompleteLane:
|
|
248
|
+
add $8, %rcx
|
|
249
|
+
jz KeccakP1600_ExtractBytes_Exit
|
|
250
|
+
mov (%rdi), %r8
|
|
251
|
+
KeccakP1600_ExtractBytes_LastIncompleteLaneLoop:
|
|
252
|
+
mov %r8b, (%rsi)
|
|
253
|
+
shr $8, %r8
|
|
254
|
+
inc %rsi
|
|
255
|
+
dec %rcx
|
|
256
|
+
jnz KeccakP1600_ExtractBytes_LastIncompleteLaneLoop
|
|
257
|
+
KeccakP1600_ExtractBytes_Exit:
|
|
258
|
+
ret
|
|
259
|
+
.ifndef old_gas_syntax
|
|
260
|
+
.size KeccakP1600_ExtractBytes,.-KeccakP1600_ExtractBytes
|
|
261
|
+
.endif
|
|
262
|
+
|
|
263
|
+
# -----------------------------------------------------------------------------
|
|
264
|
+
#
|
|
265
|
+
# void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length);
|
|
266
|
+
# %rdi %rsi %rdx %rcx %r8
|
|
267
|
+
#
|
|
268
|
+
.globl KeccakP1600_ExtractAndAddBytes
|
|
269
|
+
.globl _KeccakP1600_ExtractAndAddBytes
|
|
270
|
+
.ifndef old_gas_syntax
|
|
271
|
+
.type KeccakP1600_ExtractAndAddBytes,@function
|
|
272
|
+
.endif
|
|
273
|
+
KeccakP1600_ExtractAndAddBytes:
|
|
274
|
+
_KeccakP1600_ExtractAndAddBytes:
|
|
275
|
+
.balign 32
|
|
276
|
+
push %rbx
|
|
277
|
+
cmp $0, %r8
|
|
278
|
+
jz KeccakP1600_ExtractAndAddBytes_Exit
|
|
279
|
+
add %rcx, %rdi # state += offset
|
|
280
|
+
and $7, %rcx
|
|
281
|
+
jz KeccakP1600_ExtractAndAddBytes_LaneAlignedCheck
|
|
282
|
+
mov $8, %rbx # rbx is (max) length of incomplete lane
|
|
283
|
+
sub %rcx, %rbx
|
|
284
|
+
cmp %r8, %rbx
|
|
285
|
+
cmovae %r8, %rbx
|
|
286
|
+
sub %rbx, %r8 # length -= length of incomplete lane
|
|
287
|
+
KeccakP1600_ExtractAndAddBytes_NotAlignedLoop:
|
|
288
|
+
mov (%rdi), %r9b
|
|
289
|
+
inc %rdi
|
|
290
|
+
xor (%rsi), %r9b
|
|
291
|
+
inc %rsi
|
|
292
|
+
mov %r9b, (%rdx)
|
|
293
|
+
inc %rdx
|
|
294
|
+
dec %rbx
|
|
295
|
+
jnz KeccakP1600_ExtractAndAddBytes_NotAlignedLoop
|
|
296
|
+
jmp KeccakP1600_ExtractAndAddBytes_LaneAlignedCheck
|
|
297
|
+
KeccakP1600_ExtractAndAddBytes_LaneAlignedLoop:
|
|
298
|
+
mov (%rdi), %r9
|
|
299
|
+
add $8, %rdi
|
|
300
|
+
xor (%rsi), %r9
|
|
301
|
+
add $8, %rsi
|
|
302
|
+
mov %r9, (%rdx)
|
|
303
|
+
add $8, %rdx
|
|
304
|
+
KeccakP1600_ExtractAndAddBytes_LaneAlignedCheck:
|
|
305
|
+
sub $8, %r8
|
|
306
|
+
jnc KeccakP1600_ExtractAndAddBytes_LaneAlignedLoop
|
|
307
|
+
KeccakP1600_ExtractAndAddBytes_LastIncompleteLane:
|
|
308
|
+
add $8, %r8
|
|
309
|
+
jz KeccakP1600_ExtractAndAddBytes_Exit
|
|
310
|
+
mov (%rdi), %r9
|
|
311
|
+
KeccakP1600_ExtractAndAddBytes_LastIncompleteLaneLoop:
|
|
312
|
+
xor (%rsi), %r9b
|
|
313
|
+
inc %rsi
|
|
314
|
+
mov %r9b, (%rdx)
|
|
315
|
+
inc %rdx
|
|
316
|
+
shr $8, %r9
|
|
317
|
+
dec %r8
|
|
318
|
+
jnz KeccakP1600_ExtractAndAddBytes_LastIncompleteLaneLoop
|
|
319
|
+
KeccakP1600_ExtractAndAddBytes_Exit:
|
|
320
|
+
pop %rbx
|
|
321
|
+
ret
|
|
322
|
+
.ifndef old_gas_syntax
|
|
323
|
+
.size KeccakP1600_ExtractAndAddBytes,.-KeccakP1600_ExtractAndAddBytes
|
|
324
|
+
.endif
|
|
325
|
+
|
|
326
|
+
# -----------------------------------------------------------------------------
|
|
327
|
+
#
|
|
328
|
+
# internal
|
|
329
|
+
#
|
|
330
|
+
.text
|
|
331
|
+
.ifndef old_gas_syntax
|
|
332
|
+
.type __KeccakF1600,@function
|
|
333
|
+
.endif
|
|
334
|
+
.balign 32
|
|
335
|
+
__KeccakF1600:
|
|
336
|
+
.Loop_avx512:
|
|
337
|
+
######################################### Theta, even round
|
|
338
|
+
vmovdqa64 %zmm0,%zmm5 # put aside original A00
|
|
339
|
+
vpternlogq $0x96,%zmm2,%zmm1,%zmm0 # and use it as "C00"
|
|
340
|
+
vpternlogq $0x96,%zmm4,%zmm3,%zmm0
|
|
341
|
+
vprolq $1,%zmm0,%zmm6
|
|
342
|
+
vpermq %zmm0,%zmm13,%zmm0
|
|
343
|
+
vpermq %zmm6,%zmm16,%zmm6
|
|
344
|
+
vpternlogq $0x96,%zmm0,%zmm6,%zmm5 # T[0] is original A00
|
|
345
|
+
vpternlogq $0x96,%zmm0,%zmm6,%zmm1
|
|
346
|
+
vpternlogq $0x96,%zmm0,%zmm6,%zmm2
|
|
347
|
+
vpternlogq $0x96,%zmm0,%zmm6,%zmm3
|
|
348
|
+
vpternlogq $0x96,%zmm0,%zmm6,%zmm4
|
|
349
|
+
######################################### Rho
|
|
350
|
+
vprolvq %zmm22,%zmm5,%zmm0 # T[0] is original A00
|
|
351
|
+
vprolvq %zmm23,%zmm1,%zmm1
|
|
352
|
+
vprolvq %zmm24,%zmm2,%zmm2
|
|
353
|
+
vprolvq %zmm25,%zmm3,%zmm3
|
|
354
|
+
vprolvq %zmm26,%zmm4,%zmm4
|
|
355
|
+
######################################### Pi
|
|
356
|
+
vpermq %zmm0,%zmm17,%zmm0
|
|
357
|
+
vpermq %zmm1,%zmm18,%zmm1
|
|
358
|
+
vpermq %zmm2,%zmm19,%zmm2
|
|
359
|
+
vpermq %zmm3,%zmm20,%zmm3
|
|
360
|
+
vpermq %zmm4,%zmm21,%zmm4
|
|
361
|
+
######################################### Chi
|
|
362
|
+
vmovdqa64 %zmm0,%zmm5
|
|
363
|
+
vmovdqa64 %zmm1,%zmm6
|
|
364
|
+
vpternlogq $0xD2,%zmm2,%zmm1,%zmm0
|
|
365
|
+
vpternlogq $0xD2,%zmm3,%zmm2,%zmm1
|
|
366
|
+
vpternlogq $0xD2,%zmm4,%zmm3,%zmm2
|
|
367
|
+
vpternlogq $0xD2,%zmm5,%zmm4,%zmm3
|
|
368
|
+
vpternlogq $0xD2,%zmm6,%zmm5,%zmm4
|
|
369
|
+
######################################### Iota
|
|
370
|
+
vpxorq (%r10),%zmm0,%zmm0{%k1}
|
|
371
|
+
lea 16(%r10),%r10
|
|
372
|
+
######################################### Harmonize rounds
|
|
373
|
+
vpblendmq %zmm2,%zmm1,%zmm6{%k2}
|
|
374
|
+
vpblendmq %zmm3,%zmm2,%zmm7{%k2}
|
|
375
|
+
vpblendmq %zmm4,%zmm3,%zmm8{%k2}
|
|
376
|
+
vpblendmq %zmm1,%zmm0,%zmm5{%k2}
|
|
377
|
+
vpblendmq %zmm0,%zmm4,%zmm9{%k2}
|
|
378
|
+
vpblendmq %zmm3,%zmm6,%zmm6{%k3}
|
|
379
|
+
vpblendmq %zmm4,%zmm7,%zmm7{%k3}
|
|
380
|
+
vpblendmq %zmm2,%zmm5,%zmm5{%k3}
|
|
381
|
+
vpblendmq %zmm0,%zmm8,%zmm8{%k3}
|
|
382
|
+
vpblendmq %zmm1,%zmm9,%zmm9{%k3}
|
|
383
|
+
vpblendmq %zmm4,%zmm6,%zmm6{%k4}
|
|
384
|
+
vpblendmq %zmm3,%zmm5,%zmm5{%k4}
|
|
385
|
+
vpblendmq %zmm0,%zmm7,%zmm7{%k4}
|
|
386
|
+
vpblendmq %zmm1,%zmm8,%zmm8{%k4}
|
|
387
|
+
vpblendmq %zmm2,%zmm9,%zmm9{%k4}
|
|
388
|
+
vpblendmq %zmm4,%zmm5,%zmm5{%k5}
|
|
389
|
+
vpblendmq %zmm0,%zmm6,%zmm6{%k5}
|
|
390
|
+
vpblendmq %zmm1,%zmm7,%zmm7{%k5}
|
|
391
|
+
vpblendmq %zmm2,%zmm8,%zmm8{%k5}
|
|
392
|
+
vpblendmq %zmm3,%zmm9,%zmm9{%k5}
|
|
393
|
+
#vpermq %zmm5,%zmm33,%zmm0 # doesn't actually change order
|
|
394
|
+
vpermq %zmm6,%zmm13,%zmm1
|
|
395
|
+
vpermq %zmm7,%zmm14,%zmm2
|
|
396
|
+
vpermq %zmm8,%zmm15,%zmm3
|
|
397
|
+
vpermq %zmm9,%zmm16,%zmm4
|
|
398
|
+
######################################### Theta, odd round
|
|
399
|
+
vmovdqa64 %zmm5,%zmm0 # real A00
|
|
400
|
+
vpternlogq $0x96,%zmm2,%zmm1,%zmm5 # C00 is %zmm5's alias
|
|
401
|
+
vpternlogq $0x96,%zmm4,%zmm3,%zmm5
|
|
402
|
+
vprolq $1,%zmm5,%zmm6
|
|
403
|
+
vpermq %zmm5,%zmm13,%zmm5
|
|
404
|
+
vpermq %zmm6,%zmm16,%zmm6
|
|
405
|
+
vpternlogq $0x96,%zmm5,%zmm6,%zmm0
|
|
406
|
+
vpternlogq $0x96,%zmm5,%zmm6,%zmm3
|
|
407
|
+
vpternlogq $0x96,%zmm5,%zmm6,%zmm1
|
|
408
|
+
vpternlogq $0x96,%zmm5,%zmm6,%zmm4
|
|
409
|
+
vpternlogq $0x96,%zmm5,%zmm6,%zmm2
|
|
410
|
+
######################################### Rho
|
|
411
|
+
vprolvq %zmm27,%zmm0,%zmm0
|
|
412
|
+
vprolvq %zmm30,%zmm3,%zmm6
|
|
413
|
+
vprolvq %zmm28,%zmm1,%zmm7
|
|
414
|
+
vprolvq %zmm31,%zmm4,%zmm8
|
|
415
|
+
vprolvq %zmm29,%zmm2,%zmm9
|
|
416
|
+
vpermq %zmm0,%zmm16,%zmm10
|
|
417
|
+
vpermq %zmm0,%zmm15,%zmm11
|
|
418
|
+
######################################### Iota
|
|
419
|
+
vpxorq -8(%r10),%zmm0,%zmm0{%k1}
|
|
420
|
+
######################################### Pi
|
|
421
|
+
vpermq %zmm6,%zmm14,%zmm1
|
|
422
|
+
vpermq %zmm7,%zmm16,%zmm2
|
|
423
|
+
vpermq %zmm8,%zmm13,%zmm3
|
|
424
|
+
vpermq %zmm9,%zmm15,%zmm4
|
|
425
|
+
######################################### Chi
|
|
426
|
+
vpternlogq $0xD2,%zmm11,%zmm10,%zmm0
|
|
427
|
+
vpermq %zmm6,%zmm13,%zmm12
|
|
428
|
+
#vpermq %zmm6,%zmm33,%zmm6
|
|
429
|
+
vpternlogq $0xD2,%zmm6,%zmm12,%zmm1
|
|
430
|
+
vpermq %zmm7,%zmm15,%zmm5
|
|
431
|
+
vpermq %zmm7,%zmm14,%zmm7
|
|
432
|
+
vpternlogq $0xD2,%zmm7,%zmm5,%zmm2
|
|
433
|
+
#vpermq %zmm8,%zmm33,%zmm8
|
|
434
|
+
vpermq %zmm8,%zmm16,%zmm6
|
|
435
|
+
vpternlogq $0xD2,%zmm6,%zmm8,%zmm3
|
|
436
|
+
vpermq %zmm9,%zmm14,%zmm5
|
|
437
|
+
vpermq %zmm9,%zmm13,%zmm9
|
|
438
|
+
vpternlogq $0xD2,%zmm9,%zmm5,%zmm4
|
|
439
|
+
dec %eax
|
|
440
|
+
jnz .Loop_avx512
|
|
441
|
+
ret
|
|
442
|
+
.ifndef old_gas_syntax
|
|
443
|
+
.size __KeccakF1600,.-__KeccakF1600
|
|
444
|
+
.endif
|
|
445
|
+
|
|
446
|
+
# -----------------------------------------------------------------------------
|
|
447
|
+
#
|
|
448
|
+
# void KeccakP1600_Permute_24rounds(void *state);
|
|
449
|
+
# %rdi
|
|
450
|
+
#
|
|
451
|
+
.globl KeccakP1600_Permute_24rounds
|
|
452
|
+
.globl _KeccakP1600_Permute_24rounds
|
|
453
|
+
.ifndef old_gas_syntax
|
|
454
|
+
.type KeccakP1600_Permute_24rounds,@function
|
|
455
|
+
.endif
|
|
456
|
+
KeccakP1600_Permute_24rounds:
|
|
457
|
+
_KeccakP1600_Permute_24rounds:
|
|
458
|
+
.balign 32
|
|
459
|
+
lea 96(%rdi),%rdi
|
|
460
|
+
lea theta_perm(%rip),%r8
|
|
461
|
+
kxnorw %k6,%k6,%k6
|
|
462
|
+
kshiftrw $15,%k6,%k1
|
|
463
|
+
kshiftrw $11,%k6,%k6
|
|
464
|
+
kshiftlw $1,%k1,%k2
|
|
465
|
+
kshiftlw $2,%k1,%k3
|
|
466
|
+
kshiftlw $3,%k1,%k4
|
|
467
|
+
kshiftlw $4,%k1,%k5
|
|
468
|
+
#vmovdqa64 64*0(%r8),%zmm33
|
|
469
|
+
vmovdqa64 64*1(%r8),%zmm13
|
|
470
|
+
vmovdqa64 64*2(%r8),%zmm14
|
|
471
|
+
vmovdqa64 64*3(%r8),%zmm15
|
|
472
|
+
vmovdqa64 64*4(%r8),%zmm16
|
|
473
|
+
vmovdqa64 64*5(%r8),%zmm27
|
|
474
|
+
vmovdqa64 64*6(%r8),%zmm28
|
|
475
|
+
vmovdqa64 64*7(%r8),%zmm29
|
|
476
|
+
vmovdqa64 64*8(%r8),%zmm30
|
|
477
|
+
vmovdqa64 64*9(%r8),%zmm31
|
|
478
|
+
vmovdqa64 64*10(%r8),%zmm22
|
|
479
|
+
vmovdqa64 64*11(%r8),%zmm23
|
|
480
|
+
vmovdqa64 64*12(%r8),%zmm24
|
|
481
|
+
vmovdqa64 64*13(%r8),%zmm25
|
|
482
|
+
vmovdqa64 64*14(%r8),%zmm26
|
|
483
|
+
vmovdqa64 64*15(%r8),%zmm17
|
|
484
|
+
vmovdqa64 64*16(%r8),%zmm18
|
|
485
|
+
vmovdqa64 64*17(%r8),%zmm19
|
|
486
|
+
vmovdqa64 64*18(%r8),%zmm20
|
|
487
|
+
vmovdqa64 64*19(%r8),%zmm21
|
|
488
|
+
vmovdqu64 40*0-96(%rdi),%zmm0{%k6}{z}
|
|
489
|
+
# vpxorq %zmm5,%zmm5,%zmm5
|
|
490
|
+
vmovdqu64 40*1-96(%rdi),%zmm1{%k6}{z}
|
|
491
|
+
vmovdqu64 40*2-96(%rdi),%zmm2{%k6}{z}
|
|
492
|
+
vmovdqu64 40*3-96(%rdi),%zmm3{%k6}{z}
|
|
493
|
+
vmovdqu64 40*4-96(%rdi),%zmm4{%k6}{z}
|
|
494
|
+
lea iotas(%rip), %r10
|
|
495
|
+
mov $24/2, %eax
|
|
496
|
+
call __KeccakF1600
|
|
497
|
+
vmovdqu64 %zmm0,40*0-96(%rdi){%k6}
|
|
498
|
+
vmovdqu64 %zmm1,40*1-96(%rdi){%k6}
|
|
499
|
+
vmovdqu64 %zmm2,40*2-96(%rdi){%k6}
|
|
500
|
+
vmovdqu64 %zmm3,40*3-96(%rdi){%k6}
|
|
501
|
+
vmovdqu64 %zmm4,40*4-96(%rdi){%k6}
|
|
502
|
+
vzeroupper
|
|
503
|
+
ret
|
|
504
|
+
.ifndef old_gas_syntax
|
|
505
|
+
.size KeccakP1600_Permute_24rounds,.-KeccakP1600_Permute_24rounds
|
|
506
|
+
.endif
|
|
507
|
+
|
|
508
|
+
# -----------------------------------------------------------------------------
|
|
509
|
+
#
|
|
510
|
+
# void KeccakP1600_Permute_12rounds(void *state);
|
|
511
|
+
# %rdi
|
|
512
|
+
#
|
|
513
|
+
.globl KeccakP1600_Permute_12rounds
|
|
514
|
+
.globl _KeccakP1600_Permute_12rounds
|
|
515
|
+
.ifndef old_gas_syntax
|
|
516
|
+
.type KeccakP1600_Permute_12rounds,@function
|
|
517
|
+
.endif
|
|
518
|
+
KeccakP1600_Permute_12rounds:
|
|
519
|
+
_KeccakP1600_Permute_12rounds:
|
|
520
|
+
.balign 32
|
|
521
|
+
lea 96(%rdi),%rdi
|
|
522
|
+
lea theta_perm(%rip),%r8
|
|
523
|
+
kxnorw %k6,%k6,%k6
|
|
524
|
+
kshiftrw $15,%k6,%k1
|
|
525
|
+
kshiftrw $11,%k6,%k6
|
|
526
|
+
kshiftlw $1,%k1,%k2
|
|
527
|
+
kshiftlw $2,%k1,%k3
|
|
528
|
+
kshiftlw $3,%k1,%k4
|
|
529
|
+
kshiftlw $4,%k1,%k5
|
|
530
|
+
#vmovdqa64 64*0(%r8),%zmm33
|
|
531
|
+
vmovdqa64 64*1(%r8),%zmm13
|
|
532
|
+
vmovdqa64 64*2(%r8),%zmm14
|
|
533
|
+
vmovdqa64 64*3(%r8),%zmm15
|
|
534
|
+
vmovdqa64 64*4(%r8),%zmm16
|
|
535
|
+
vmovdqa64 64*5(%r8),%zmm27
|
|
536
|
+
vmovdqa64 64*6(%r8),%zmm28
|
|
537
|
+
vmovdqa64 64*7(%r8),%zmm29
|
|
538
|
+
vmovdqa64 64*8(%r8),%zmm30
|
|
539
|
+
vmovdqa64 64*9(%r8),%zmm31
|
|
540
|
+
vmovdqa64 64*10(%r8),%zmm22
|
|
541
|
+
vmovdqa64 64*11(%r8),%zmm23
|
|
542
|
+
vmovdqa64 64*12(%r8),%zmm24
|
|
543
|
+
vmovdqa64 64*13(%r8),%zmm25
|
|
544
|
+
vmovdqa64 64*14(%r8),%zmm26
|
|
545
|
+
vmovdqa64 64*15(%r8),%zmm17
|
|
546
|
+
vmovdqa64 64*16(%r8),%zmm18
|
|
547
|
+
vmovdqa64 64*17(%r8),%zmm19
|
|
548
|
+
vmovdqa64 64*18(%r8),%zmm20
|
|
549
|
+
vmovdqa64 64*19(%r8),%zmm21
|
|
550
|
+
vmovdqu64 40*0-96(%rdi),%zmm0{%k6}{z}
|
|
551
|
+
# vpxorq %zmm5,%zmm5,%zmm5
|
|
552
|
+
vmovdqu64 40*1-96(%rdi),%zmm1{%k6}{z}
|
|
553
|
+
vmovdqu64 40*2-96(%rdi),%zmm2{%k6}{z}
|
|
554
|
+
vmovdqu64 40*3-96(%rdi),%zmm3{%k6}{z}
|
|
555
|
+
vmovdqu64 40*4-96(%rdi),%zmm4{%k6}{z}
|
|
556
|
+
lea iotas+12*8(%rip), %r10
|
|
557
|
+
mov $12/2, %eax
|
|
558
|
+
call __KeccakF1600
|
|
559
|
+
vmovdqu64 %zmm0,40*0-96(%rdi){%k6}
|
|
560
|
+
vmovdqu64 %zmm1,40*1-96(%rdi){%k6}
|
|
561
|
+
vmovdqu64 %zmm2,40*2-96(%rdi){%k6}
|
|
562
|
+
vmovdqu64 %zmm3,40*3-96(%rdi){%k6}
|
|
563
|
+
vmovdqu64 %zmm4,40*4-96(%rdi){%k6}
|
|
564
|
+
vzeroupper
|
|
565
|
+
ret
|
|
566
|
+
.ifndef old_gas_syntax
|
|
567
|
+
.size KeccakP1600_Permute_12rounds,.-KeccakP1600_Permute_12rounds
|
|
568
|
+
.endif
|
|
569
|
+
|
|
570
|
+
# -----------------------------------------------------------------------------
|
|
571
|
+
#
|
|
572
|
+
# void KeccakP1600_Permute_Nrounds(void *state, unsigned int nrounds);
|
|
573
|
+
# %rdi %rsi
|
|
574
|
+
#
|
|
575
|
+
.globl KeccakP1600_Permute_Nrounds
|
|
576
|
+
.globl _KeccakP1600_Permute_Nrounds
|
|
577
|
+
.ifndef old_gas_syntax
|
|
578
|
+
.type KeccakP1600_Permute_Nrounds,@function
|
|
579
|
+
.endif
|
|
580
|
+
KeccakP1600_Permute_Nrounds:
|
|
581
|
+
_KeccakP1600_Permute_Nrounds:
|
|
582
|
+
.balign 32
|
|
583
|
+
lea 96(%rdi),%rdi
|
|
584
|
+
lea theta_perm(%rip),%r8
|
|
585
|
+
kxnorw %k6,%k6,%k6
|
|
586
|
+
kshiftrw $15,%k6,%k1
|
|
587
|
+
kshiftrw $11,%k6,%k6
|
|
588
|
+
kshiftlw $1,%k1,%k2
|
|
589
|
+
kshiftlw $2,%k1,%k3
|
|
590
|
+
kshiftlw $3,%k1,%k4
|
|
591
|
+
kshiftlw $4,%k1,%k5
|
|
592
|
+
vmovdqa64 64*1(%r8),%zmm13
|
|
593
|
+
vmovdqa64 64*2(%r8),%zmm14
|
|
594
|
+
vmovdqa64 64*3(%r8),%zmm15
|
|
595
|
+
vmovdqa64 64*4(%r8),%zmm16
|
|
596
|
+
vmovdqa64 64*5(%r8),%zmm27
|
|
597
|
+
vmovdqa64 64*6(%r8),%zmm28
|
|
598
|
+
vmovdqa64 64*7(%r8),%zmm29
|
|
599
|
+
vmovdqa64 64*8(%r8),%zmm30
|
|
600
|
+
vmovdqa64 64*9(%r8),%zmm31
|
|
601
|
+
vmovdqa64 64*10(%r8),%zmm22
|
|
602
|
+
vmovdqa64 64*11(%r8),%zmm23
|
|
603
|
+
vmovdqa64 64*12(%r8),%zmm24
|
|
604
|
+
vmovdqa64 64*13(%r8),%zmm25
|
|
605
|
+
vmovdqa64 64*14(%r8),%zmm26
|
|
606
|
+
vmovdqa64 64*15(%r8),%zmm17
|
|
607
|
+
vmovdqa64 64*16(%r8),%zmm18
|
|
608
|
+
vmovdqa64 64*17(%r8),%zmm19
|
|
609
|
+
vmovdqa64 64*18(%r8),%zmm20
|
|
610
|
+
vmovdqa64 64*19(%r8),%zmm21
|
|
611
|
+
vmovdqu64 40*0-96(%rdi),%zmm0{%k6}{z}
|
|
612
|
+
vmovdqu64 40*1-96(%rdi),%zmm1{%k6}{z}
|
|
613
|
+
vmovdqu64 40*2-96(%rdi),%zmm2{%k6}{z}
|
|
614
|
+
vmovdqu64 40*3-96(%rdi),%zmm3{%k6}{z}
|
|
615
|
+
vmovdqu64 40*4-96(%rdi),%zmm4{%k6}{z}
|
|
616
|
+
mov %rsi, %rax # r10 pointer in iota table
|
|
617
|
+
lea iotas_end(%rip), %r10
|
|
618
|
+
shl $3, %rsi
|
|
619
|
+
sub %rsi, %r10
|
|
620
|
+
test $1, %eax
|
|
621
|
+
jz .KeccakP1600_Permute_Nrounds_DoubleRound
|
|
622
|
+
# do odd round
|
|
623
|
+
######################################### Theta
|
|
624
|
+
vmovdqa64 %zmm0,%zmm5 # put aside original A00
|
|
625
|
+
vpternlogq $0x96,%zmm2,%zmm1,%zmm0 # and use it as "C00"
|
|
626
|
+
vpternlogq $0x96,%zmm4,%zmm3,%zmm0
|
|
627
|
+
vprolq $1,%zmm0,%zmm6
|
|
628
|
+
vpermq %zmm0,%zmm13,%zmm0
|
|
629
|
+
vpermq %zmm6,%zmm16,%zmm6
|
|
630
|
+
vpternlogq $0x96,%zmm0,%zmm6,%zmm5 # T[0] is original A00
|
|
631
|
+
vpternlogq $0x96,%zmm0,%zmm6,%zmm1
|
|
632
|
+
vpternlogq $0x96,%zmm0,%zmm6,%zmm2
|
|
633
|
+
vpternlogq $0x96,%zmm0,%zmm6,%zmm3
|
|
634
|
+
vpternlogq $0x96,%zmm0,%zmm6,%zmm4
|
|
635
|
+
######################################### Rho
|
|
636
|
+
vprolvq %zmm22,%zmm5,%zmm0 # T[0] is original A00
|
|
637
|
+
vprolvq %zmm23,%zmm1,%zmm1
|
|
638
|
+
vprolvq %zmm24,%zmm2,%zmm2
|
|
639
|
+
vprolvq %zmm25,%zmm3,%zmm3
|
|
640
|
+
vprolvq %zmm26,%zmm4,%zmm4
|
|
641
|
+
######################################### Pi
|
|
642
|
+
vpermq %zmm0,%zmm17,%zmm0
|
|
643
|
+
vpermq %zmm1,%zmm18,%zmm1
|
|
644
|
+
vpermq %zmm2,%zmm19,%zmm2
|
|
645
|
+
vpermq %zmm3,%zmm20,%zmm3
|
|
646
|
+
vpermq %zmm4,%zmm21,%zmm4
|
|
647
|
+
######################################### Chi
|
|
648
|
+
vmovdqa64 %zmm0,%zmm5
|
|
649
|
+
vmovdqa64 %zmm1,%zmm6
|
|
650
|
+
vpternlogq $0xD2,%zmm2,%zmm1,%zmm0
|
|
651
|
+
vpternlogq $0xD2,%zmm3,%zmm2,%zmm1
|
|
652
|
+
vpternlogq $0xD2,%zmm4,%zmm3,%zmm2
|
|
653
|
+
vpternlogq $0xD2,%zmm5,%zmm4,%zmm3
|
|
654
|
+
vpternlogq $0xD2,%zmm6,%zmm5,%zmm4
|
|
655
|
+
######################################### Iota
|
|
656
|
+
vpxorq (%r10),%zmm0,%zmm0{%k1}
|
|
657
|
+
lea 8(%r10),%r10
|
|
658
|
+
######################################### Harmonize single round
|
|
659
|
+
vpermq %zmm1,%zmm13,%zmm1
|
|
660
|
+
vpermq %zmm2,%zmm14,%zmm2
|
|
661
|
+
vpermq %zmm3,%zmm15,%zmm3
|
|
662
|
+
vpermq %zmm4,%zmm16,%zmm4
|
|
663
|
+
vpblendmq %zmm1,%zmm0,%zmm5{%k2}
|
|
664
|
+
vpblendmq %zmm2,%zmm1,%zmm6{%k2}
|
|
665
|
+
vpblendmq %zmm3,%zmm2,%zmm7{%k2}
|
|
666
|
+
vpblendmq %zmm4,%zmm3,%zmm8{%k2}
|
|
667
|
+
vpblendmq %zmm0,%zmm4,%zmm9{%k2}
|
|
668
|
+
vpblendmq %zmm2,%zmm5,%zmm5{%k3}
|
|
669
|
+
vpblendmq %zmm3,%zmm6,%zmm6{%k3}
|
|
670
|
+
vpblendmq %zmm4,%zmm7,%zmm7{%k3}
|
|
671
|
+
vpblendmq %zmm0,%zmm8,%zmm8{%k3}
|
|
672
|
+
vpblendmq %zmm1,%zmm9,%zmm9{%k3}
|
|
673
|
+
vpblendmq %zmm3,%zmm5,%zmm5{%k4}
|
|
674
|
+
vpblendmq %zmm4,%zmm6,%zmm6{%k4}
|
|
675
|
+
vpblendmq %zmm0,%zmm7,%zmm7{%k4}
|
|
676
|
+
vpblendmq %zmm1,%zmm8,%zmm8{%k4}
|
|
677
|
+
vpblendmq %zmm2,%zmm9,%zmm9{%k4}
|
|
678
|
+
vpblendmq %zmm0,%zmm6,%zmm6{%k5}
|
|
679
|
+
vpblendmq %zmm4,%zmm5,%zmm0{%k5}
|
|
680
|
+
vpblendmq %zmm1,%zmm7,%zmm7{%k5}
|
|
681
|
+
vpblendmq %zmm2,%zmm8,%zmm8{%k5}
|
|
682
|
+
vpblendmq %zmm3,%zmm9,%zmm9{%k5}
|
|
683
|
+
vpermq %zmm6,%zmm13,%zmm4
|
|
684
|
+
vpermq %zmm7,%zmm14,%zmm3
|
|
685
|
+
vpermq %zmm8,%zmm15,%zmm2
|
|
686
|
+
vpermq %zmm9,%zmm16,%zmm1
|
|
687
|
+
.KeccakP1600_Permute_Nrounds_DoubleRound:
|
|
688
|
+
shr $1, %eax
|
|
689
|
+
jz .KeccakP1600_Permute_Nrounds_End
|
|
690
|
+
call __KeccakF1600
|
|
691
|
+
.KeccakP1600_Permute_Nrounds_End:
|
|
692
|
+
vmovdqu64 %zmm0,40*0-96(%rdi){%k6}
|
|
693
|
+
vmovdqu64 %zmm1,40*1-96(%rdi){%k6}
|
|
694
|
+
vmovdqu64 %zmm2,40*2-96(%rdi){%k6}
|
|
695
|
+
vmovdqu64 %zmm3,40*3-96(%rdi){%k6}
|
|
696
|
+
vmovdqu64 %zmm4,40*4-96(%rdi){%k6}
|
|
697
|
+
vzeroupper
|
|
698
|
+
ret
|
|
699
|
+
.ifndef old_gas_syntax
|
|
700
|
+
.size KeccakP1600_Permute_Nrounds,.-KeccakP1600_Permute_Nrounds
|
|
701
|
+
.endif
|
|
702
|
+
|
|
703
|
+
# -----------------------------------------------------------------------------
|
|
704
|
+
#
|
|
705
|
+
# size_t KeccakF1600_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen);
|
|
706
|
+
# %rdi %rsi %rdx %rcx
|
|
707
|
+
#
|
|
708
|
+
.globl KeccakF1600_FastLoop_Absorb
|
|
709
|
+
.globl _KeccakF1600_FastLoop_Absorb
|
|
710
|
+
.ifndef old_gas_syntax
|
|
711
|
+
.type KeccakF1600_FastLoop_Absorb,@function
|
|
712
|
+
.endif
|
|
713
|
+
KeccakF1600_FastLoop_Absorb:
|
|
714
|
+
_KeccakF1600_FastLoop_Absorb:
|
|
715
|
+
.balign 32
|
|
716
|
+
push %rbx
|
|
717
|
+
push %r10
|
|
718
|
+
shr $3, %rcx # rcx = data length in lanes
|
|
719
|
+
mov %rdx, %rbx # rbx = initial data pointer
|
|
720
|
+
cmp %rsi, %rcx
|
|
721
|
+
jb KeccakF1600_FastLoop_Absorb_Exit
|
|
722
|
+
lea 96(%rdi),%rdi
|
|
723
|
+
lea theta_perm(%rip),%r8
|
|
724
|
+
kxnorw %k6,%k6,%k6
|
|
725
|
+
kshiftrw $15,%k6,%k1
|
|
726
|
+
kshiftrw $11,%k6,%k6
|
|
727
|
+
kshiftlw $1,%k1,%k2
|
|
728
|
+
kshiftlw $2,%k1,%k3
|
|
729
|
+
kshiftlw $3,%k1,%k4
|
|
730
|
+
kshiftlw $4,%k1,%k5
|
|
731
|
+
vmovdqa64 64*1(%r8),%zmm13
|
|
732
|
+
vmovdqa64 64*2(%r8),%zmm14
|
|
733
|
+
vmovdqa64 64*3(%r8),%zmm15
|
|
734
|
+
vmovdqa64 64*4(%r8),%zmm16
|
|
735
|
+
vmovdqa64 64*5(%r8),%zmm27
|
|
736
|
+
vmovdqa64 64*6(%r8),%zmm28
|
|
737
|
+
vmovdqa64 64*7(%r8),%zmm29
|
|
738
|
+
vmovdqa64 64*8(%r8),%zmm30
|
|
739
|
+
vmovdqa64 64*9(%r8),%zmm31
|
|
740
|
+
vmovdqa64 64*10(%r8),%zmm22
|
|
741
|
+
vmovdqa64 64*11(%r8),%zmm23
|
|
742
|
+
vmovdqa64 64*12(%r8),%zmm24
|
|
743
|
+
vmovdqa64 64*13(%r8),%zmm25
|
|
744
|
+
vmovdqa64 64*14(%r8),%zmm26
|
|
745
|
+
vmovdqa64 64*15(%r8),%zmm17
|
|
746
|
+
vmovdqa64 64*16(%r8),%zmm18
|
|
747
|
+
vmovdqa64 64*17(%r8),%zmm19
|
|
748
|
+
vmovdqa64 64*18(%r8),%zmm20
|
|
749
|
+
vmovdqa64 64*19(%r8),%zmm21
|
|
750
|
+
vmovdqu64 40*0-96(%rdi),%zmm0{%k6}{z}
|
|
751
|
+
vmovdqu64 40*1-96(%rdi),%zmm1{%k6}{z}
|
|
752
|
+
vmovdqu64 40*2-96(%rdi),%zmm2{%k6}{z}
|
|
753
|
+
vmovdqu64 40*3-96(%rdi),%zmm3{%k6}{z}
|
|
754
|
+
vmovdqu64 40*4-96(%rdi),%zmm4{%k6}{z}
|
|
755
|
+
cmp $21, %rsi
|
|
756
|
+
jnz KeccakF1600_FastLoop_Absorb_Not21Lanes
|
|
757
|
+
sub $21, %rcx
|
|
758
|
+
KeccakF1600_FastLoop_Absorb_Loop21Lanes:
|
|
759
|
+
vmovdqu64 8*0(%rdx),%zmm5{%k6}{z}
|
|
760
|
+
vmovdqu64 8*5(%rdx),%zmm6{%k6}{z}
|
|
761
|
+
vmovdqu64 8*10(%rdx),%zmm7{%k6}{z}
|
|
762
|
+
vmovdqu64 8*15(%rdx),%zmm8{%k6}{z}
|
|
763
|
+
vmovdqu64 8*20(%rdx),%zmm9{%k1}{z}
|
|
764
|
+
vpxorq %zmm5,%zmm0,%zmm0
|
|
765
|
+
vpxorq %zmm6,%zmm1,%zmm1
|
|
766
|
+
vpxorq %zmm7,%zmm2,%zmm2
|
|
767
|
+
vpxorq %zmm8,%zmm3,%zmm3
|
|
768
|
+
vpxorq %zmm9,%zmm4,%zmm4
|
|
769
|
+
add $21*8, %rdx
|
|
770
|
+
lea iotas(%rip), %r10
|
|
771
|
+
mov $12, %eax
|
|
772
|
+
call __KeccakF1600
|
|
773
|
+
sub $21, %rcx
|
|
774
|
+
jnc KeccakF1600_FastLoop_Absorb_Loop21Lanes
|
|
775
|
+
KeccakF1600_FastLoop_Absorb_SaveAndExit:
|
|
776
|
+
vmovdqu64 %zmm0,40*0-96(%rdi){%k6}
|
|
777
|
+
vmovdqu64 %zmm1,40*1-96(%rdi){%k6}
|
|
778
|
+
vmovdqu64 %zmm2,40*2-96(%rdi){%k6}
|
|
779
|
+
vmovdqu64 %zmm3,40*3-96(%rdi){%k6}
|
|
780
|
+
vmovdqu64 %zmm4,40*4-96(%rdi){%k6}
|
|
781
|
+
KeccakF1600_FastLoop_Absorb_Exit:
|
|
782
|
+
vzeroupper
|
|
783
|
+
mov %rdx, %rax # return number of bytes processed
|
|
784
|
+
sub %rbx, %rax
|
|
785
|
+
pop %r10
|
|
786
|
+
pop %rbx
|
|
787
|
+
ret
|
|
788
|
+
KeccakF1600_FastLoop_Absorb_Not21Lanes:
|
|
789
|
+
cmp $17, %rsi
|
|
790
|
+
jnz KeccakF1600_FastLoop_Absorb_Not17Lanes
|
|
791
|
+
sub $17, %rcx
|
|
792
|
+
KeccakF1600_FastLoop_Absorb_Loop17Lanes:
|
|
793
|
+
vmovdqu64 8*0(%rdx),%zmm5{%k6}{z}
|
|
794
|
+
vmovdqu64 8*5(%rdx),%zmm6{%k6}{z}
|
|
795
|
+
vmovdqu64 8*10(%rdx),%zmm7{%k6}{z}
|
|
796
|
+
vmovdqu64 8*15(%rdx),%zmm8{%k1}{z}
|
|
797
|
+
vmovdqu64 8*15(%rdx),%zmm8{%k2}
|
|
798
|
+
vpxorq %zmm5,%zmm0,%zmm0
|
|
799
|
+
vpxorq %zmm6,%zmm1,%zmm1
|
|
800
|
+
vpxorq %zmm7,%zmm2,%zmm2
|
|
801
|
+
vpxorq %zmm8,%zmm3,%zmm3
|
|
802
|
+
add $17*8, %rdx
|
|
803
|
+
lea iotas(%rip), %r10
|
|
804
|
+
mov $12, %eax
|
|
805
|
+
call __KeccakF1600
|
|
806
|
+
sub $17, %rcx
|
|
807
|
+
jnc KeccakF1600_FastLoop_Absorb_Loop17Lanes
|
|
808
|
+
jmp KeccakF1600_FastLoop_Absorb_SaveAndExit
|
|
809
|
+
KeccakF1600_FastLoop_Absorb_Not17Lanes:
|
|
810
|
+
lea -96(%rdi), %rdi
|
|
811
|
+
KeccakF1600_FastLoop_Absorb_LanesLoop:
|
|
812
|
+
mov %rsi, %rax
|
|
813
|
+
mov %rdi, %r10
|
|
814
|
+
KeccakF1600_FastLoop_Absorb_LanesAddLoop:
|
|
815
|
+
mov (%rdx), %r8
|
|
816
|
+
add $8, %rdx
|
|
817
|
+
xor %r8, (%r10)
|
|
818
|
+
add $8, %r10
|
|
819
|
+
sub $1, %rax
|
|
820
|
+
jnz KeccakF1600_FastLoop_Absorb_LanesAddLoop
|
|
821
|
+
sub %rsi, %rcx
|
|
822
|
+
push %rdi
|
|
823
|
+
push %rsi
|
|
824
|
+
push %rdx
|
|
825
|
+
push %rcx
|
|
826
|
+
.ifdef no_plt
|
|
827
|
+
call KeccakP1600_Permute_24rounds
|
|
828
|
+
.else
|
|
829
|
+
call KeccakP1600_Permute_24rounds@PLT
|
|
830
|
+
.endif
|
|
831
|
+
pop %rcx
|
|
832
|
+
pop %rdx
|
|
833
|
+
pop %rsi
|
|
834
|
+
pop %rdi
|
|
835
|
+
cmp %rsi, %rcx
|
|
836
|
+
jae KeccakF1600_FastLoop_Absorb_LanesLoop
|
|
837
|
+
jmp KeccakF1600_FastLoop_Absorb_Exit
|
|
838
|
+
.ifndef old_gas_syntax
|
|
839
|
+
.size KeccakF1600_FastLoop_Absorb,.-KeccakF1600_FastLoop_Absorb
|
|
840
|
+
.endif
|
|
841
|
+
|
|
842
|
+
# -----------------------------------------------------------------------------
|
|
843
|
+
#
|
|
844
|
+
# size_t KeccakP1600_12rounds_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen);
|
|
845
|
+
# %rdi %rsi %rdx %rcx
|
|
846
|
+
#
|
|
847
|
+
.globl KeccakP1600_12rounds_FastLoop_Absorb
|
|
848
|
+
.globl _KeccakP1600_12rounds_FastLoop_Absorb
|
|
849
|
+
.ifndef old_gas_syntax
|
|
850
|
+
.type KeccakP1600_12rounds_FastLoop_Absorb,@function
|
|
851
|
+
.endif
|
|
852
|
+
KeccakP1600_12rounds_FastLoop_Absorb:
|
|
853
|
+
_KeccakP1600_12rounds_FastLoop_Absorb:
|
|
854
|
+
.balign 32
|
|
855
|
+
push %rbx
|
|
856
|
+
push %r10
|
|
857
|
+
shr $3, %rcx # rcx = data length in lanes
|
|
858
|
+
mov %rdx, %rbx # rbx = initial data pointer
|
|
859
|
+
cmp %rsi, %rcx
|
|
860
|
+
jb KeccakP1600_FastLoop_Absorb_Exit
|
|
861
|
+
lea 96(%rdi),%rdi
|
|
862
|
+
lea theta_perm(%rip),%r8
|
|
863
|
+
kxnorw %k6,%k6,%k6
|
|
864
|
+
kshiftrw $15,%k6,%k1
|
|
865
|
+
kshiftrw $11,%k6,%k6
|
|
866
|
+
kshiftlw $1,%k1,%k2
|
|
867
|
+
kshiftlw $2,%k1,%k3
|
|
868
|
+
kshiftlw $3,%k1,%k4
|
|
869
|
+
kshiftlw $4,%k1,%k5
|
|
870
|
+
vmovdqa64 64*1(%r8),%zmm13
|
|
871
|
+
vmovdqa64 64*2(%r8),%zmm14
|
|
872
|
+
vmovdqa64 64*3(%r8),%zmm15
|
|
873
|
+
vmovdqa64 64*4(%r8),%zmm16
|
|
874
|
+
vmovdqa64 64*5(%r8),%zmm27
|
|
875
|
+
vmovdqa64 64*6(%r8),%zmm28
|
|
876
|
+
vmovdqa64 64*7(%r8),%zmm29
|
|
877
|
+
vmovdqa64 64*8(%r8),%zmm30
|
|
878
|
+
vmovdqa64 64*9(%r8),%zmm31
|
|
879
|
+
vmovdqa64 64*10(%r8),%zmm22
|
|
880
|
+
vmovdqa64 64*11(%r8),%zmm23
|
|
881
|
+
vmovdqa64 64*12(%r8),%zmm24
|
|
882
|
+
vmovdqa64 64*13(%r8),%zmm25
|
|
883
|
+
vmovdqa64 64*14(%r8),%zmm26
|
|
884
|
+
vmovdqa64 64*15(%r8),%zmm17
|
|
885
|
+
vmovdqa64 64*16(%r8),%zmm18
|
|
886
|
+
vmovdqa64 64*17(%r8),%zmm19
|
|
887
|
+
vmovdqa64 64*18(%r8),%zmm20
|
|
888
|
+
vmovdqa64 64*19(%r8),%zmm21
|
|
889
|
+
vmovdqu64 40*0-96(%rdi),%zmm0{%k6}{z}
|
|
890
|
+
vmovdqu64 40*1-96(%rdi),%zmm1{%k6}{z}
|
|
891
|
+
vmovdqu64 40*2-96(%rdi),%zmm2{%k6}{z}
|
|
892
|
+
vmovdqu64 40*3-96(%rdi),%zmm3{%k6}{z}
|
|
893
|
+
vmovdqu64 40*4-96(%rdi),%zmm4{%k6}{z}
|
|
894
|
+
cmp $21, %rsi
|
|
895
|
+
jnz KeccakP1600_FastLoop_Absorb_Not21Lanes
|
|
896
|
+
sub $21, %rcx
|
|
897
|
+
KeccakP1600_FastLoop_Absorb_Loop21Lanes:
|
|
898
|
+
vmovdqu64 8*0(%rdx),%zmm5{%k6}{z}
|
|
899
|
+
vmovdqu64 8*5(%rdx),%zmm6{%k6}{z}
|
|
900
|
+
vmovdqu64 8*10(%rdx),%zmm7{%k6}{z}
|
|
901
|
+
vmovdqu64 8*15(%rdx),%zmm8{%k6}{z}
|
|
902
|
+
vmovdqu64 8*20(%rdx),%zmm9{%k1}{z}
|
|
903
|
+
vpxorq %zmm5,%zmm0,%zmm0
|
|
904
|
+
vpxorq %zmm6,%zmm1,%zmm1
|
|
905
|
+
vpxorq %zmm7,%zmm2,%zmm2
|
|
906
|
+
vpxorq %zmm8,%zmm3,%zmm3
|
|
907
|
+
vpxorq %zmm9,%zmm4,%zmm4
|
|
908
|
+
add $21*8, %rdx
|
|
909
|
+
lea iotas+12*8(%rip), %r10
|
|
910
|
+
mov $12/2, %eax
|
|
911
|
+
call __KeccakF1600
|
|
912
|
+
sub $21, %rcx
|
|
913
|
+
jnc KeccakP1600_FastLoop_Absorb_Loop21Lanes
|
|
914
|
+
KeccakP1600_FastLoop_Absorb_SaveAndExit:
|
|
915
|
+
vmovdqu64 %zmm0,40*0-96(%rdi){%k6}
|
|
916
|
+
vmovdqu64 %zmm1,40*1-96(%rdi){%k6}
|
|
917
|
+
vmovdqu64 %zmm2,40*2-96(%rdi){%k6}
|
|
918
|
+
vmovdqu64 %zmm3,40*3-96(%rdi){%k6}
|
|
919
|
+
vmovdqu64 %zmm4,40*4-96(%rdi){%k6}
|
|
920
|
+
KeccakP1600_FastLoop_Absorb_Exit:
|
|
921
|
+
vzeroupper
|
|
922
|
+
mov %rdx, %rax # return number of bytes processed
|
|
923
|
+
sub %rbx, %rax
|
|
924
|
+
pop %r10
|
|
925
|
+
pop %rbx
|
|
926
|
+
ret
|
|
927
|
+
KeccakP1600_FastLoop_Absorb_Not21Lanes:
|
|
928
|
+
cmp $17, %rsi
|
|
929
|
+
jnz KeccakP1600_FastLoop_Absorb_Not17Lanes
|
|
930
|
+
sub $17, %rcx
|
|
931
|
+
KeccakP1600_FastLoop_Absorb_Loop17Lanes:
|
|
932
|
+
vmovdqu64 8*0(%rdx),%zmm5{%k6}{z}
|
|
933
|
+
vmovdqu64 8*5(%rdx),%zmm6{%k6}{z}
|
|
934
|
+
vmovdqu64 8*10(%rdx),%zmm7{%k6}{z}
|
|
935
|
+
vmovdqu64 8*15(%rdx),%zmm8{%k1}{z}
|
|
936
|
+
vmovdqu64 8*15(%rdx),%zmm8{%k2}
|
|
937
|
+
vpxorq %zmm5,%zmm0,%zmm0
|
|
938
|
+
vpxorq %zmm6,%zmm1,%zmm1
|
|
939
|
+
vpxorq %zmm7,%zmm2,%zmm2
|
|
940
|
+
vpxorq %zmm8,%zmm3,%zmm3
|
|
941
|
+
add $17*8, %rdx
|
|
942
|
+
lea iotas+12*8(%rip), %r10
|
|
943
|
+
mov $12/2, %eax
|
|
944
|
+
call __KeccakF1600
|
|
945
|
+
sub $17, %rcx
|
|
946
|
+
jnc KeccakP1600_FastLoop_Absorb_Loop17Lanes
|
|
947
|
+
jmp KeccakP1600_FastLoop_Absorb_SaveAndExit
|
|
948
|
+
KeccakP1600_FastLoop_Absorb_Not17Lanes:
|
|
949
|
+
lea -96(%rdi), %rdi
|
|
950
|
+
KeccakP1600_FastLoop_Absorb_LanesLoop:
|
|
951
|
+
mov %rsi, %rax
|
|
952
|
+
mov %rdi, %r10
|
|
953
|
+
KeccakP1600_FastLoop_Absorb_LanesAddLoop:
|
|
954
|
+
mov (%rdx), %r8
|
|
955
|
+
add $8, %rdx
|
|
956
|
+
xor %r8, (%r10)
|
|
957
|
+
add $8, %r10
|
|
958
|
+
sub $1, %rax
|
|
959
|
+
jnz KeccakP1600_FastLoop_Absorb_LanesAddLoop
|
|
960
|
+
sub %rsi, %rcx
|
|
961
|
+
push %rdi
|
|
962
|
+
push %rsi
|
|
963
|
+
push %rdx
|
|
964
|
+
push %rcx
|
|
965
|
+
.ifdef no_plt
|
|
966
|
+
call KeccakP1600_Permute_12rounds
|
|
967
|
+
.else
|
|
968
|
+
call KeccakP1600_Permute_12rounds@PLT
|
|
969
|
+
.endif
|
|
970
|
+
pop %rcx
|
|
971
|
+
pop %rdx
|
|
972
|
+
pop %rsi
|
|
973
|
+
pop %rdi
|
|
974
|
+
cmp %rsi, %rcx
|
|
975
|
+
jae KeccakP1600_FastLoop_Absorb_LanesLoop
|
|
976
|
+
jmp KeccakP1600_FastLoop_Absorb_Exit
|
|
977
|
+
.ifndef old_gas_syntax
|
|
978
|
+
.size KeccakP1600_12rounds_FastLoop_Absorb,.-KeccakP1600_12rounds_FastLoop_Absorb
|
|
979
|
+
.endif
|
|
980
|
+
.balign 64
|
|
981
|
+
theta_perm:
|
|
982
|
+
.quad 0, 1, 2, 3, 4, 5, 6, 7 # [not used]
|
|
983
|
+
.quad 4, 0, 1, 2, 3, 5, 6, 7
|
|
984
|
+
.quad 3, 4, 0, 1, 2, 5, 6, 7
|
|
985
|
+
.quad 2, 3, 4, 0, 1, 5, 6, 7
|
|
986
|
+
.quad 1, 2, 3, 4, 0, 5, 6, 7
|
|
987
|
+
rhotates1:
|
|
988
|
+
.quad 0, 44, 43, 21, 14, 0, 0, 0 # [0][0] [1][1] [2][2] [3][3] [4][4]
|
|
989
|
+
.quad 18, 1, 6, 25, 8, 0, 0, 0 # [4][0] [0][1] [1][2] [2][3] [3][4]
|
|
990
|
+
.quad 41, 2, 62, 55, 39, 0, 0, 0 # [3][0] [4][1] [0][2] [1][3] [2][4]
|
|
991
|
+
.quad 3, 45, 61, 28, 20, 0, 0, 0 # [2][0] [3][1] [4][2] [0][3] [1][4]
|
|
992
|
+
.quad 36, 10, 15, 56, 27, 0, 0, 0 # [1][0] [2][1] [3][2] [4][3] [0][4]
|
|
993
|
+
rhotates0:
|
|
994
|
+
.quad 0, 1, 62, 28, 27, 0, 0, 0
|
|
995
|
+
.quad 36, 44, 6, 55, 20, 0, 0, 0
|
|
996
|
+
.quad 3, 10, 43, 25, 39, 0, 0, 0
|
|
997
|
+
.quad 41, 45, 15, 21, 8, 0, 0, 0
|
|
998
|
+
.quad 18, 2, 61, 56, 14, 0, 0, 0
|
|
999
|
+
pi0_perm:
|
|
1000
|
+
.quad 0, 3, 1, 4, 2, 5, 6, 7
|
|
1001
|
+
.quad 1, 4, 2, 0, 3, 5, 6, 7
|
|
1002
|
+
.quad 2, 0, 3, 1, 4, 5, 6, 7
|
|
1003
|
+
.quad 3, 1, 4, 2, 0, 5, 6, 7
|
|
1004
|
+
.quad 4, 2, 0, 3, 1, 5, 6, 7
|
|
1005
|
+
iotas:
|
|
1006
|
+
.quad 0x0000000000000001
|
|
1007
|
+
.quad 0x0000000000008082
|
|
1008
|
+
.quad 0x800000000000808a
|
|
1009
|
+
.quad 0x8000000080008000
|
|
1010
|
+
.quad 0x000000000000808b
|
|
1011
|
+
.quad 0x0000000080000001
|
|
1012
|
+
.quad 0x8000000080008081
|
|
1013
|
+
.quad 0x8000000000008009
|
|
1014
|
+
.quad 0x000000000000008a
|
|
1015
|
+
.quad 0x0000000000000088
|
|
1016
|
+
.quad 0x0000000080008009
|
|
1017
|
+
.quad 0x000000008000000a
|
|
1018
|
+
.quad 0x000000008000808b
|
|
1019
|
+
.quad 0x800000000000008b
|
|
1020
|
+
.quad 0x8000000000008089
|
|
1021
|
+
.quad 0x8000000000008003
|
|
1022
|
+
.quad 0x8000000000008002
|
|
1023
|
+
.quad 0x8000000000000080
|
|
1024
|
+
.quad 0x000000000000800a
|
|
1025
|
+
.quad 0x800000008000000a
|
|
1026
|
+
.quad 0x8000000080008081
|
|
1027
|
+
.quad 0x8000000000008080
|
|
1028
|
+
.quad 0x0000000080000001
|
|
1029
|
+
.quad 0x8000000080008008
|
|
1030
|
+
iotas_end:
|
|
1031
|
+
.asciz "Keccak-1600 for AVX-512F, CRYPTOGAMS by <appro@openssl.org>"
|