sleeping_kangaroo12 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +127 -0
- data/ext/Rakefile +73 -0
- data/ext/binding/sleeping_kangaroo12.c +39 -0
- data/ext/config/xkcp.build +17 -0
- data/ext/xkcp/LICENSE +1 -0
- data/ext/xkcp/Makefile +15 -0
- data/ext/xkcp/Makefile.build +200 -0
- data/ext/xkcp/README.markdown +296 -0
- data/ext/xkcp/lib/HighLevel.build +143 -0
- data/ext/xkcp/lib/LowLevel.build +757 -0
- data/ext/xkcp/lib/common/align.h +33 -0
- data/ext/xkcp/lib/common/brg_endian.h +143 -0
- data/ext/xkcp/lib/high/KangarooTwelve/KangarooTwelve.c +301 -0
- data/ext/xkcp/lib/high/KangarooTwelve/KangarooTwelve.h +97 -0
- data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.c +81 -0
- data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.h +125 -0
- data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.c +48 -0
- data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.h +79 -0
- data/ext/xkcp/lib/high/Keccak/KeccakDuplex.c +81 -0
- data/ext/xkcp/lib/high/Keccak/KeccakDuplex.h +73 -0
- data/ext/xkcp/lib/high/Keccak/KeccakDuplex.inc +195 -0
- data/ext/xkcp/lib/high/Keccak/KeccakSponge.c +111 -0
- data/ext/xkcp/lib/high/Keccak/KeccakSponge.h +76 -0
- data/ext/xkcp/lib/high/Keccak/KeccakSponge.inc +314 -0
- data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.c +61 -0
- data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.h +67 -0
- data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.inc +128 -0
- data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.c +93 -0
- data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.h +599 -0
- data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.inc +573 -0
- data/ext/xkcp/lib/high/Ketje/Ketjev2.c +87 -0
- data/ext/xkcp/lib/high/Ketje/Ketjev2.h +88 -0
- data/ext/xkcp/lib/high/Ketje/Ketjev2.inc +274 -0
- data/ext/xkcp/lib/high/Keyak/Keyakv2.c +132 -0
- data/ext/xkcp/lib/high/Keyak/Keyakv2.h +217 -0
- data/ext/xkcp/lib/high/Keyak/Keyakv2.inc +81 -0
- data/ext/xkcp/lib/high/Keyak/Motorist.inc +953 -0
- data/ext/xkcp/lib/high/Kravatte/Kravatte.c +533 -0
- data/ext/xkcp/lib/high/Kravatte/Kravatte.h +115 -0
- data/ext/xkcp/lib/high/Kravatte/KravatteModes.c +557 -0
- data/ext/xkcp/lib/high/Kravatte/KravatteModes.h +247 -0
- data/ext/xkcp/lib/high/Xoodyak/Cyclist.h +66 -0
- data/ext/xkcp/lib/high/Xoodyak/Cyclist.inc +336 -0
- data/ext/xkcp/lib/high/Xoodyak/Xoodyak-parameters.h +26 -0
- data/ext/xkcp/lib/high/Xoodyak/Xoodyak.c +55 -0
- data/ext/xkcp/lib/high/Xoodyak/Xoodyak.h +35 -0
- data/ext/xkcp/lib/high/Xoofff/Xoofff.c +634 -0
- data/ext/xkcp/lib/high/Xoofff/Xoofff.h +147 -0
- data/ext/xkcp/lib/high/Xoofff/XoofffModes.c +483 -0
- data/ext/xkcp/lib/high/Xoofff/XoofffModes.h +241 -0
- data/ext/xkcp/lib/high/common/Phases.h +25 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-SnP.h +41 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-armcc.s +1666 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-gcc.s +1655 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-armcc.s +1268 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-gcc.s +1264 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-armcc.s +1178 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-gcc.s +1175 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-armcc.s +1338 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-gcc.s +1336 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-armcc.s +1343 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-gcc.s +1339 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-SnP.h +42 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-armcc.s +823 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-gcc.s +831 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-SnP.h +31 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-armv8a-neon.s +540 -0
- data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-SnP.h +42 -0
- data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-compact.s +733 -0
- data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-fast.s +1121 -0
- data/ext/xkcp/lib/low/KeccakP-1600/AVX2/KeccakP-1600-AVX2.s +1100 -0
- data/ext/xkcp/lib/low/KeccakP-1600/AVX2/KeccakP-1600-SnP.h +52 -0
- data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-AVX512.c +623 -0
- data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-SnP.h +47 -0
- data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u12/KeccakP-1600-AVX512-config.h +6 -0
- data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u6/KeccakP-1600-AVX512-config.h +6 -0
- data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/ua/KeccakP-1600-AVX512-config.h +6 -0
- data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-AVX512.s +1031 -0
- data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-SnP.h +53 -0
- data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-SnP.h +44 -0
- data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-XOP.c +476 -0
- data/ext/xkcp/lib/low/KeccakP-1600/XOP/u6/KeccakP-1600-XOP-config.h +6 -0
- data/ext/xkcp/lib/low/KeccakP-1600/XOP/ua/KeccakP-1600-XOP-config.h +6 -0
- data/ext/xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-64.macros +748 -0
- data/ext/xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-unrolling.macros +305 -0
- data/ext/xkcp/lib/low/KeccakP-1600/compact/KeccakP-1600-SnP.h +40 -0
- data/ext/xkcp/lib/low/KeccakP-1600/compact/KeccakP-1600-compact64.c +420 -0
- data/ext/xkcp/lib/low/KeccakP-1600/plain-32bits-inplace/KeccakP-1600-SnP.h +43 -0
- data/ext/xkcp/lib/low/KeccakP-1600/plain-32bits-inplace/KeccakP-1600-inplace32BI.c +1163 -0
- data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/KeccakP-1600-SnP.h +54 -0
- data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/KeccakP-1600-opt64.c +565 -0
- data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcu6/KeccakP-1600-opt64-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua/KeccakP-1600-opt64-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua-shld/KeccakP-1600-opt64-config.h +8 -0
- data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/u6/KeccakP-1600-opt64-config.h +6 -0
- data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/ua/KeccakP-1600-opt64-config.h +6 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-SnP.h +44 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference.h +23 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference32BI.c +625 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-SnP.h +44 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.c +440 -0
- data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.h +23 -0
- data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-SnP.h +42 -0
- data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas.s +1196 -0
- data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas_Apple.s +1124 -0
- data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-shld-gas.s +1196 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-armcc.s +1392 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-gcc.s +1394 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-times2-SnP.h +42 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u12/SIMD512-2-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u4/SIMD512-2-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512ufull/SIMD512-2-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SIMD512.c +850 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SnP.h +51 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SIMD128.c +957 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SnP.h +49 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-u2/SIMD128-config.h +8 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-ua/SIMD128-config.h +8 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-u2/SIMD128-config.h +9 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-ua/SIMD128-config.h +9 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-on1.c +37 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SIMD256.c +1321 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SnP.h +55 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u12/SIMD256-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u6/SIMD256-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/ua/SIMD256-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u12/SIMD512-4-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u4/SIMD512-4-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512ufull/SIMD512-4-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SIMD512.c +881 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SnP.h +51 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-on1.c +37 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-on2.c +38 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SIMD512.c +1615 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SnP.h +57 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u12/SIMD512-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u4/SIMD512-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/ua/SIMD512-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-on1.c +37 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-on2.c +38 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-on4.c +38 -0
- data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-SnP.h +41 -0
- data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-armcc.s +442 -0
- data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-gcc.s +446 -0
- data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-armcc.s +419 -0
- data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-gcc.s +427 -0
- data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-SnP.h +41 -0
- data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-avr8-fast.s +647 -0
- data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-SnP.h +39 -0
- data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-compact.c +190 -0
- data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-SnP.h +43 -0
- data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.c +412 -0
- data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.h +23 -0
- data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-SnP.h +41 -0
- data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-armcc.s +454 -0
- data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-gcc.s +458 -0
- data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-armcc.s +455 -0
- data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-gcc.s +458 -0
- data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-SnP.h +41 -0
- data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-avr8-fast.s +728 -0
- data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-SnP.h +43 -0
- data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.c +414 -0
- data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.h +23 -0
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-SnP.h +42 -0
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-armcc.s +527 -0
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-gcc.s +533 -0
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-armcc.s +528 -0
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-gcc.s +534 -0
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-armcc.s +521 -0
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-gcc.s +527 -0
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-armcc.s +517 -0
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-gcc.s +523 -0
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-armcc.s +550 -0
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-gcc.s +556 -0
- data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-SnP.h +32 -0
- data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-armv8a-neon.s +432 -0
- data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-SnP.h +42 -0
- data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-avr8-fast.s +929 -0
- data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-SnP.h +40 -0
- data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-compact.c +244 -0
- data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-SnP.h +46 -0
- data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32-bis.macros +184 -0
- data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.c +454 -0
- data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.macros +459 -0
- data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling-bis.macros +83 -0
- data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling.macros +88 -0
- data/ext/xkcp/lib/low/KeccakP-800/plain/lcu2/KeccakP-800-opt32-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-800/plain/lcua/KeccakP-800-opt32-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-800/plain/u2/KeccakP-800-opt32-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-800/plain/ua/KeccakP-800-opt32-config.h +7 -0
- data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-SnP.h +44 -0
- data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.c +437 -0
- data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.h +23 -0
- data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/Ket.h +57 -0
- data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-armcc.s +475 -0
- data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-gcc.s +480 -0
- data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-armcc.s +590 -0
- data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-gcc.s +590 -0
- data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.c +126 -0
- data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.h +68 -0
- data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.inc +174 -0
- data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.c +80 -0
- data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.h +68 -0
- data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.inc +142 -0
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-SnP.h +55 -0
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-armcc.s +1086 -0
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-gcc.s +1092 -0
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-armcc.s +721 -0
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-gcc.s +726 -0
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-armcc.s +723 -0
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-gcc.s +729 -0
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-armcc.s +1164 -0
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-gcc.s +1165 -0
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-armcc.s +562 -0
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-gcc.s +563 -0
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-armcc.s +563 -0
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-gcc.s +565 -0
- data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-SnP.h +55 -0
- data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-armcc.s +476 -0
- data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-gcc.s +485 -0
- data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-armcc.s +362 -0
- data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-gcc.s +367 -0
- data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-SnP.h +43 -0
- data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-avr8-u1.s +1341 -0
- data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SIMD512.c +581 -0
- data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SnP.h +58 -0
- data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodyak-full-block-SIMD512.c +332 -0
- data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SIMD128.c +329 -0
- data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SnP.h +53 -0
- data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodyak-full-block-SIMD128.c +355 -0
- data/ext/xkcp/lib/low/Xoodoo/Xoodoo.h +79 -0
- data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-SnP.h +56 -0
- data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-optimized.c +399 -0
- data/ext/xkcp/lib/low/Xoodoo/plain/Xoodyak-full-blocks.c +127 -0
- data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-SnP.h +43 -0
- data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-reference.c +253 -0
- data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SIMD512.c +1044 -0
- data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SnP.h +49 -0
- data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-SnP.h +45 -0
- data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-on1.c +37 -0
- data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-ARMv7A.s +1587 -0
- data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-SnP.h +48 -0
- data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SIMD512.c +1202 -0
- data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SnP.h +48 -0
- data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SIMD128.c +484 -0
- data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SnP.h +44 -0
- data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-SnP.h +45 -0
- data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-on1.c +37 -0
- data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SIMD256.c +939 -0
- data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SnP.h +49 -0
- data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SIMD512.c +1216 -0
- data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SnP.h +48 -0
- data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-SnP.h +45 -0
- data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-on1.c +37 -0
- data/ext/xkcp/lib/low/common/PlSnP-Fallback.inc +290 -0
- data/ext/xkcp/lib/low/common/SnP-Relaned.h +141 -0
- data/ext/xkcp/support/Build/ExpandProducts.xsl +79 -0
- data/ext/xkcp/support/Build/ToGlobalMakefile.xsl +206 -0
- data/ext/xkcp/support/Build/ToOneTarget.xsl +89 -0
- data/ext/xkcp/support/Build/ToTargetConfigFile.xsl +37 -0
- data/ext/xkcp/support/Build/ToTargetMakefile.xsl +298 -0
- data/ext/xkcp/support/Build/ToVCXProj.xsl +198 -0
- data/ext/xkcp/support/Kernel-PMU/Kernel-pmu.md +133 -0
- data/ext/xkcp/support/Kernel-PMU/Makefile +8 -0
- data/ext/xkcp/support/Kernel-PMU/enable_arm_pmu.c +129 -0
- data/ext/xkcp/support/Kernel-PMU/load-module +1 -0
- data/ext/xkcp/util/KeccakSum/KeccakSum.c +394 -0
- data/ext/xkcp/util/KeccakSum/base64.c +86 -0
- data/ext/xkcp/util/KeccakSum/base64.h +12 -0
- data/lib/sleeping_kangaroo12/binding.rb +15 -0
- data/lib/sleeping_kangaroo12/build/loader.rb +40 -0
- data/lib/sleeping_kangaroo12/build/platform.rb +37 -0
- data/lib/sleeping_kangaroo12/build.rb +4 -0
- data/lib/sleeping_kangaroo12/digest.rb +103 -0
- data/lib/sleeping_kangaroo12/version.rb +5 -0
- data/lib/sleeping_kangaroo12.rb +7 -0
- metadata +372 -0
|
@@ -0,0 +1,1341 @@
|
|
|
1
|
+
;
|
|
2
|
+
; The eXtended Keccak Code Package (XKCP)
|
|
3
|
+
; https://github.com/XKCP/XKCP
|
|
4
|
+
;
|
|
5
|
+
; The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer.
|
|
6
|
+
;
|
|
7
|
+
; Implementation by Ronny Van Keer, hereby denoted as "the implementer".
|
|
8
|
+
;
|
|
9
|
+
; For more information, feedback or questions, please refer to the Keccak Team website:
|
|
10
|
+
; https://keccak.team/
|
|
11
|
+
;
|
|
12
|
+
; To the extent possible under law, the implementer has waived all copyright
|
|
13
|
+
; and related or neighboring rights to the source code in this file.
|
|
14
|
+
; http://creativecommons.org/publicdomain/zero/1.0/
|
|
15
|
+
;
|
|
16
|
+
; ---
|
|
17
|
+
;
|
|
18
|
+
; This file implements Xoodoo in a SnP-compatible way.
|
|
19
|
+
; Please refer to SnP-documentation.h for more details.
|
|
20
|
+
;
|
|
21
|
+
; This implementation comes with Xoodoo-SnP.h in the same folder.
|
|
22
|
+
; Please refer to LowLevel.build for the exact list of other files it must be combined with.
|
|
23
|
+
;
|
|
24
|
+
|
|
25
|
+
; INFO: Tested on ATmega1280 simulator
|
|
26
|
+
|
|
27
|
+
; Registers used in all routines
|
|
28
|
+
#define zero 1
|
|
29
|
+
#define rpState 24
|
|
30
|
+
#define rX 26
|
|
31
|
+
#define rY 28
|
|
32
|
+
#define rZ 30
|
|
33
|
+
#define sp 0x3D
|
|
34
|
+
|
|
35
|
+
;----------------------------------------------------------------------------
|
|
36
|
+
;
|
|
37
|
+
; void Xoodoo_StaticInitialize( void )
|
|
38
|
+
;
|
|
39
|
+
.global Xoodoo_StaticInitialize
|
|
40
|
+
|
|
41
|
+
;----------------------------------------------------------------------------
|
|
42
|
+
;
|
|
43
|
+
; void Xoodoo_Initialize(void *state)
|
|
44
|
+
;
|
|
45
|
+
; argument state is passed in r24:r25
|
|
46
|
+
;
|
|
47
|
+
.global Xoodoo_Initialize
|
|
48
|
+
Xoodoo_Initialize:
|
|
49
|
+
movw rZ, r24
|
|
50
|
+
ldi r23, 3*4/2 ; clear state (8 bytes / 2 lanes) per iteration
|
|
51
|
+
Xoodoo_Initialize_Loop:
|
|
52
|
+
st z+, zero
|
|
53
|
+
st z+, zero
|
|
54
|
+
st z+, zero
|
|
55
|
+
st z+, zero
|
|
56
|
+
st z+, zero
|
|
57
|
+
st z+, zero
|
|
58
|
+
st z+, zero
|
|
59
|
+
st z+, zero
|
|
60
|
+
dec r23
|
|
61
|
+
brne Xoodoo_Initialize_Loop
|
|
62
|
+
Xoodoo_StaticInitialize:
|
|
63
|
+
ret
|
|
64
|
+
|
|
65
|
+
;----------------------------------------------------------------------------
|
|
66
|
+
;
|
|
67
|
+
; void Xoodoo_AddByte(void *state, unsigned char data, unsigned int offset)
|
|
68
|
+
;
|
|
69
|
+
; argument state is passed in r24:r25
|
|
70
|
+
; argument data is passed in r22:r23, only LSB (r22) is used
|
|
71
|
+
; argument offset is passed in r20:r21, only LSB (r20) is used
|
|
72
|
+
;
|
|
73
|
+
.global Xoodoo_AddByte
|
|
74
|
+
Xoodoo_AddByte:
|
|
75
|
+
movw rZ, r24
|
|
76
|
+
add rZ, r20
|
|
77
|
+
adc rZ+1, zero
|
|
78
|
+
ld r0, Z
|
|
79
|
+
eor r0, r22
|
|
80
|
+
st Z, r0
|
|
81
|
+
ret
|
|
82
|
+
|
|
83
|
+
;----------------------------------------------------------------------------
|
|
84
|
+
;
|
|
85
|
+
; void Xoodoo_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
|
|
86
|
+
;
|
|
87
|
+
; argument state is passed in r24:r25
|
|
88
|
+
; argument data is passed in r22:r23
|
|
89
|
+
; argument offset is passed in r20:r21, only LSB (r20) is used
|
|
90
|
+
; argument length is passed in r18:r19, only LSB (r18) is used
|
|
91
|
+
;
|
|
92
|
+
.global Xoodoo_AddBytes
|
|
93
|
+
Xoodoo_AddBytes:
|
|
94
|
+
movw rZ, r24
|
|
95
|
+
add rZ, r20
|
|
96
|
+
adc rZ+1, zero
|
|
97
|
+
movw rX, r22
|
|
98
|
+
subi r18, 8
|
|
99
|
+
brcs Xoodoo_AddBytes_Byte
|
|
100
|
+
;do 8 bytes per iteration
|
|
101
|
+
Xoodoo_AddBytes_Loop8:
|
|
102
|
+
ld r21, X+
|
|
103
|
+
ld r0, Z
|
|
104
|
+
eor r0, r21
|
|
105
|
+
st Z+, r0
|
|
106
|
+
ld r21, X+
|
|
107
|
+
ld r0, Z
|
|
108
|
+
eor r0, r21
|
|
109
|
+
st Z+, r0
|
|
110
|
+
ld r21, X+
|
|
111
|
+
ld r0, Z
|
|
112
|
+
eor r0, r21
|
|
113
|
+
st Z+, r0
|
|
114
|
+
ld r21, X+
|
|
115
|
+
ld r0, Z
|
|
116
|
+
eor r0, r21
|
|
117
|
+
st Z+, r0
|
|
118
|
+
ld r21, X+
|
|
119
|
+
ld r0, Z
|
|
120
|
+
eor r0, r21
|
|
121
|
+
st Z+, r0
|
|
122
|
+
ld r21, X+
|
|
123
|
+
ld r0, Z
|
|
124
|
+
eor r0, r21
|
|
125
|
+
st Z+, r0
|
|
126
|
+
ld r21, X+
|
|
127
|
+
ld r0, Z
|
|
128
|
+
eor r0, r21
|
|
129
|
+
st Z+, r0
|
|
130
|
+
ld r21, X+
|
|
131
|
+
ld r0, Z
|
|
132
|
+
eor r0, r21
|
|
133
|
+
st Z+, r0
|
|
134
|
+
subi r18, 8
|
|
135
|
+
brcc Xoodoo_AddBytes_Loop8
|
|
136
|
+
Xoodoo_AddBytes_Byte:
|
|
137
|
+
ldi r19, 8
|
|
138
|
+
add r18, r19
|
|
139
|
+
breq Xoodoo_AddBytes_End
|
|
140
|
+
Xoodoo_AddBytes_Loop1:
|
|
141
|
+
ld r21, X+
|
|
142
|
+
ld r0, Z
|
|
143
|
+
eor r0, r21
|
|
144
|
+
st Z+, r0
|
|
145
|
+
dec r18
|
|
146
|
+
brne Xoodoo_AddBytes_Loop1
|
|
147
|
+
Xoodoo_AddBytes_End:
|
|
148
|
+
ret
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
;----------------------------------------------------------------------------
|
|
152
|
+
;
|
|
153
|
+
; void Xoodoo_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
|
|
154
|
+
;
|
|
155
|
+
; argument state is passed in r24:r25
|
|
156
|
+
; argument data is passed in r22:r23
|
|
157
|
+
; argument offset is passed in r20:r21, only LSB (r20) is used
|
|
158
|
+
; argument length is passed in r18:r19, only LSB (r18) is used
|
|
159
|
+
;
|
|
160
|
+
.global Xoodoo_OverwriteBytes
|
|
161
|
+
Xoodoo_OverwriteBytes:
|
|
162
|
+
movw rZ, r24
|
|
163
|
+
add rZ, r20
|
|
164
|
+
adc rZ+1, zero
|
|
165
|
+
movw rX, r22
|
|
166
|
+
subi r18, 8
|
|
167
|
+
brcs Xoodoo_OverwriteBytes_Byte
|
|
168
|
+
;do 8 bytes per iteration
|
|
169
|
+
Xoodoo_OverwriteBytes_Loop8:
|
|
170
|
+
ld r0, X+
|
|
171
|
+
st Z+, r0
|
|
172
|
+
ld r0, X+
|
|
173
|
+
st Z+, r0
|
|
174
|
+
ld r0, X+
|
|
175
|
+
st Z+, r0
|
|
176
|
+
ld r0, X+
|
|
177
|
+
st Z+, r0
|
|
178
|
+
ld r0, X+
|
|
179
|
+
st Z+, r0
|
|
180
|
+
ld r0, X+
|
|
181
|
+
st Z+, r0
|
|
182
|
+
ld r0, X+
|
|
183
|
+
st Z+, r0
|
|
184
|
+
ld r0, X+
|
|
185
|
+
st Z+, r0
|
|
186
|
+
subi r18, 8
|
|
187
|
+
brcc Xoodoo_OverwriteBytes_Loop8
|
|
188
|
+
Xoodoo_OverwriteBytes_Byte:
|
|
189
|
+
ldi r19, 8
|
|
190
|
+
add r18, r19
|
|
191
|
+
breq Xoodoo_OverwriteBytes_End
|
|
192
|
+
Xoodoo_OverwriteBytes_Loop1:
|
|
193
|
+
ld r0, X+
|
|
194
|
+
st Z+, r0
|
|
195
|
+
dec r18
|
|
196
|
+
brne Xoodoo_OverwriteBytes_Loop1
|
|
197
|
+
Xoodoo_OverwriteBytes_End:
|
|
198
|
+
ret
|
|
199
|
+
|
|
200
|
+
;----------------------------------------------------------------------------
|
|
201
|
+
;
|
|
202
|
+
; void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount)
|
|
203
|
+
;
|
|
204
|
+
; argument state is passed in r24:r25
|
|
205
|
+
; argument byteCount is passed in r22:r23, only LSB (r22) is used
|
|
206
|
+
;
|
|
207
|
+
.global Xoodoo_OverwriteWithZeroes
|
|
208
|
+
Xoodoo_OverwriteWithZeroes:
|
|
209
|
+
movw rZ, r24 ; rZ = state
|
|
210
|
+
mov r23, r22
|
|
211
|
+
lsr r23
|
|
212
|
+
lsr r23
|
|
213
|
+
lsr r23
|
|
214
|
+
breq Xoodoo_OverwriteWithZeroes_Bytes
|
|
215
|
+
Xoodoo_OverwriteWithZeroes_LoopLanes:
|
|
216
|
+
st Z+, r1
|
|
217
|
+
st Z+, r1
|
|
218
|
+
st Z+, r1
|
|
219
|
+
st Z+, r1
|
|
220
|
+
st Z+, r1
|
|
221
|
+
st Z+, r1
|
|
222
|
+
st Z+, r1
|
|
223
|
+
st Z+, r1
|
|
224
|
+
dec r23
|
|
225
|
+
brne Xoodoo_OverwriteWithZeroes_LoopLanes
|
|
226
|
+
Xoodoo_OverwriteWithZeroes_Bytes:
|
|
227
|
+
andi r22, 7
|
|
228
|
+
breq Xoodoo_OverwriteWithZeroes_End
|
|
229
|
+
Xoodoo_OverwriteWithZeroes_LoopBytes:
|
|
230
|
+
st Z+, r1
|
|
231
|
+
dec r22
|
|
232
|
+
brne Xoodoo_OverwriteWithZeroes_LoopBytes
|
|
233
|
+
Xoodoo_OverwriteWithZeroes_End:
|
|
234
|
+
ret
|
|
235
|
+
|
|
236
|
+
;----------------------------------------------------------------------------
|
|
237
|
+
;
|
|
238
|
+
; void Xoodoo_ExtractBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
|
|
239
|
+
;
|
|
240
|
+
; argument state is passed in r24:r25
|
|
241
|
+
; argument data is passed in r22:r23
|
|
242
|
+
; argument offset is passed in r20:r21, only LSB (r20) is used
|
|
243
|
+
; argument length is passed in r18:r19, only LSB (r18) is used
|
|
244
|
+
;
|
|
245
|
+
.global Xoodoo_ExtractBytes
|
|
246
|
+
Xoodoo_ExtractBytes:
|
|
247
|
+
movw rZ, r24
|
|
248
|
+
add rZ, r20
|
|
249
|
+
adc rZ+1, zero
|
|
250
|
+
movw rX, r22
|
|
251
|
+
subi r18, 8
|
|
252
|
+
brcs Xoodoo_ExtractBytes_Byte
|
|
253
|
+
;do 8 bytes per iteration
|
|
254
|
+
Xoodoo_ExtractBytes_Loop8:
|
|
255
|
+
ld r0, Z+
|
|
256
|
+
st X+, r0
|
|
257
|
+
ld r0, Z+
|
|
258
|
+
st X+, r0
|
|
259
|
+
ld r0, Z+
|
|
260
|
+
st X+, r0
|
|
261
|
+
ld r0, Z+
|
|
262
|
+
st X+, r0
|
|
263
|
+
ld r0, Z+
|
|
264
|
+
st X+, r0
|
|
265
|
+
ld r0, Z+
|
|
266
|
+
st X+, r0
|
|
267
|
+
ld r0, Z+
|
|
268
|
+
st X+, r0
|
|
269
|
+
ld r0, Z+
|
|
270
|
+
st X+, r0
|
|
271
|
+
subi r18, 8
|
|
272
|
+
brcc Xoodoo_ExtractBytes_Loop8
|
|
273
|
+
Xoodoo_ExtractBytes_Byte:
|
|
274
|
+
ldi r19, 8
|
|
275
|
+
add r18, r19
|
|
276
|
+
breq Xoodoo_ExtractBytes_End
|
|
277
|
+
Xoodoo_ExtractBytes_Loop1:
|
|
278
|
+
ld r0, Z+
|
|
279
|
+
st X+, r0
|
|
280
|
+
dec r18
|
|
281
|
+
brne Xoodoo_ExtractBytes_Loop1
|
|
282
|
+
Xoodoo_ExtractBytes_End:
|
|
283
|
+
ret
|
|
284
|
+
|
|
285
|
+
;----------------------------------------------------------------------------
|
|
286
|
+
;
|
|
287
|
+
; void Xoodoo_ExtractAndAddBytes(void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
|
|
288
|
+
;
|
|
289
|
+
; argument state is passed in r24:r25
|
|
290
|
+
; argument input is passed in r22:r23
|
|
291
|
+
; argument output is passed in r20:r21
|
|
292
|
+
; argument offset is passed in r18:r19, only LSB (r18) is used
|
|
293
|
+
; argument length is passed in r16:r17, only LSB (r16) is used
|
|
294
|
+
;
|
|
295
|
+
.global Xoodoo_ExtractAndAddBytes
|
|
296
|
+
Xoodoo_ExtractAndAddBytes:
|
|
297
|
+
tst r16
|
|
298
|
+
breq Xoodoo_ExtractAndAddBytes_End
|
|
299
|
+
push r16
|
|
300
|
+
push r28
|
|
301
|
+
push r29
|
|
302
|
+
movw rZ, r24
|
|
303
|
+
add rZ, r18
|
|
304
|
+
adc rZ+1, zero
|
|
305
|
+
movw rX, r22
|
|
306
|
+
movw rY, r20
|
|
307
|
+
subi r16, 8
|
|
308
|
+
brcs Xoodoo_ExtractAndAddBytes_Byte
|
|
309
|
+
Xoodoo_ExtractAndAddBytes_LoopLane:
|
|
310
|
+
ld r21, Z+
|
|
311
|
+
ld r0, X+
|
|
312
|
+
eor r0, r21
|
|
313
|
+
st Y+, r0
|
|
314
|
+
ld r21, Z+
|
|
315
|
+
ld r0, X+
|
|
316
|
+
eor r0, r21
|
|
317
|
+
st Y+, r0
|
|
318
|
+
ld r21, Z+
|
|
319
|
+
ld r0, X+
|
|
320
|
+
eor r0, r21
|
|
321
|
+
st Y+, r0
|
|
322
|
+
ld r21, Z+
|
|
323
|
+
ld r0, X+
|
|
324
|
+
eor r0, r21
|
|
325
|
+
st Y+, r0
|
|
326
|
+
ld r21, Z+
|
|
327
|
+
ld r0, X+
|
|
328
|
+
eor r0, r21
|
|
329
|
+
st Y+, r0
|
|
330
|
+
ld r21, Z+
|
|
331
|
+
ld r0, X+
|
|
332
|
+
eor r0, r21
|
|
333
|
+
st Y+, r0
|
|
334
|
+
ld r21, Z+
|
|
335
|
+
ld r0, X+
|
|
336
|
+
eor r0, r21
|
|
337
|
+
st Y+, r0
|
|
338
|
+
ld r21, Z+
|
|
339
|
+
ld r0, X+
|
|
340
|
+
eor r0, r21
|
|
341
|
+
st Y+, r0
|
|
342
|
+
subi r16, 8
|
|
343
|
+
brcc Xoodoo_ExtractAndAddBytes_LoopLane
|
|
344
|
+
Xoodoo_ExtractAndAddBytes_Byte:
|
|
345
|
+
ldi r19, 8
|
|
346
|
+
add r16, r19
|
|
347
|
+
breq Xoodoo_ExtractAndAddBytes_Done
|
|
348
|
+
Xoodoo_ExtractAndAddBytes_Loop1:
|
|
349
|
+
ld r21, Z+
|
|
350
|
+
ld r0, X+
|
|
351
|
+
eor r0, r21
|
|
352
|
+
st Y+, r0
|
|
353
|
+
dec r16
|
|
354
|
+
brne Xoodoo_ExtractAndAddBytes_Loop1
|
|
355
|
+
Xoodoo_ExtractAndAddBytes_Done:
|
|
356
|
+
pop r29
|
|
357
|
+
pop r28
|
|
358
|
+
pop r16
|
|
359
|
+
Xoodoo_ExtractAndAddBytes_End:
|
|
360
|
+
ret
|
|
361
|
+
|
|
362
|
+
Xoodoo_RoundConstants_12:
|
|
363
|
+
.BYTE 0x58, 0x00
|
|
364
|
+
.BYTE 0x38, 0x00
|
|
365
|
+
.BYTE 0xC0, 0x03
|
|
366
|
+
.BYTE 0xD0, 0x00
|
|
367
|
+
.BYTE 0x20, 0x01
|
|
368
|
+
.BYTE 0x14, 0x00
|
|
369
|
+
Xoodoo_RoundConstants_6:
|
|
370
|
+
.BYTE 0x60, 0x00
|
|
371
|
+
.BYTE 0x2C, 0x00
|
|
372
|
+
.BYTE 0x80, 0x03
|
|
373
|
+
.BYTE 0xF0, 0x00
|
|
374
|
+
.BYTE 0xA0, 0x01
|
|
375
|
+
.BYTE 0x12, 0x00
|
|
376
|
+
Xoodoo_RoundConstants_0:
|
|
377
|
+
.BYTE 0xFF, 0 ; terminator
|
|
378
|
+
|
|
379
|
+
.text
|
|
380
|
+
|
|
381
|
+
; Register variables used in permutation
|
|
382
|
+
#define rC0 2 // 4 regs (2-5)
|
|
383
|
+
#define rC1 6 // 4 regs (6-9)
|
|
384
|
+
#define rC2 10 // 4 regs (10-13)
|
|
385
|
+
#define rC3 14 // 4 regs (14-17)
|
|
386
|
+
#define rVv 18 // 4 regs (18-21)
|
|
387
|
+
#define rTt 22 // 4 regs (22-25)
|
|
388
|
+
// r26-27 free
|
|
389
|
+
#define a00 0
|
|
390
|
+
#define a01 4
|
|
391
|
+
#define a02 8
|
|
392
|
+
#define a03 12
|
|
393
|
+
#define a10 16
|
|
394
|
+
#define a11 20
|
|
395
|
+
#define a12 24
|
|
396
|
+
#define a13 28
|
|
397
|
+
#define a20 32
|
|
398
|
+
#define a21 36
|
|
399
|
+
#define a22 40
|
|
400
|
+
#define a23 44
|
|
401
|
+
|
|
402
|
+
;----------------------------------------------------------------------------
|
|
403
|
+
;
|
|
404
|
+
; void Xoodoo_Permute_Nrounds( void *state, unsigned int nrounds )
|
|
405
|
+
;
|
|
406
|
+
; argument state is passed in r24:r25
|
|
407
|
+
; argument nrounds is passed in r22:r23 (only LSB (r22) is used)
|
|
408
|
+
;
|
|
409
|
+
.global Xoodoo_Permute_Nrounds
|
|
410
|
+
Xoodoo_Permute_Nrounds:
|
|
411
|
+
mov r26, r22
|
|
412
|
+
ldi rZ+0, lo8(Xoodoo_RoundConstants_0)
|
|
413
|
+
ldi rZ+1, hi8(Xoodoo_RoundConstants_0)
|
|
414
|
+
lsl r26
|
|
415
|
+
sub rZ, r26
|
|
416
|
+
sbc rZ+1, zero
|
|
417
|
+
rjmp Xoodoo_Permute
|
|
418
|
+
|
|
419
|
+
;----------------------------------------------------------------------------
|
|
420
|
+
;
|
|
421
|
+
; void Xoodoo_Permute_6rounds( void *state )
|
|
422
|
+
;
|
|
423
|
+
; argument state is passed in r24:r25
|
|
424
|
+
;
|
|
425
|
+
.global Xoodoo_Permute_6rounds
|
|
426
|
+
Xoodoo_Permute_6rounds:
|
|
427
|
+
ldi rZ+0, lo8(Xoodoo_RoundConstants_6)
|
|
428
|
+
ldi rZ+1, hi8(Xoodoo_RoundConstants_6)
|
|
429
|
+
rjmp Xoodoo_Permute
|
|
430
|
+
|
|
431
|
+
;----------------------------------------------------------------------------
|
|
432
|
+
;
|
|
433
|
+
; void Xoodoo_Permute_12rounds( void *state )
|
|
434
|
+
;
|
|
435
|
+
; argument state is passed in r24:r25
|
|
436
|
+
;
|
|
437
|
+
.global Xoodoo_Permute_12rounds
|
|
438
|
+
Xoodoo_Permute_12rounds:
|
|
439
|
+
ldi rZ+0, lo8(Xoodoo_RoundConstants_12)
|
|
440
|
+
ldi rZ+1, hi8(Xoodoo_RoundConstants_12)
|
|
441
|
+
Xoodoo_Permute:
|
|
442
|
+
push r2
|
|
443
|
+
push r3
|
|
444
|
+
push r4
|
|
445
|
+
push r5
|
|
446
|
+
push r6
|
|
447
|
+
push r7
|
|
448
|
+
push r8
|
|
449
|
+
push r9
|
|
450
|
+
push r10
|
|
451
|
+
push r11
|
|
452
|
+
push r12
|
|
453
|
+
push r13
|
|
454
|
+
push r14
|
|
455
|
+
push r15
|
|
456
|
+
push r16
|
|
457
|
+
push r17
|
|
458
|
+
push r28
|
|
459
|
+
push r29
|
|
460
|
+
|
|
461
|
+
; Initial Prepare Theta
|
|
462
|
+
movw rY, rpState
|
|
463
|
+
ld rC0+0, Y+ ; a00
|
|
464
|
+
ld rC0+1, Y+
|
|
465
|
+
ld rC0+2, Y+
|
|
466
|
+
ld rC0+3, Y+
|
|
467
|
+
ld rC1+0, Y+ ; a01
|
|
468
|
+
ld rC1+1, Y+
|
|
469
|
+
ld rC1+2, Y+
|
|
470
|
+
ld rC1+3, Y+
|
|
471
|
+
ld rC2+0, Y+ ; a02
|
|
472
|
+
ld rC2+1, Y+
|
|
473
|
+
ld rC2+2, Y+
|
|
474
|
+
ld rC2+3, Y+
|
|
475
|
+
ld rC3+0, Y+ ; a03
|
|
476
|
+
ld rC3+1, Y+
|
|
477
|
+
ld rC3+2, Y+
|
|
478
|
+
ld rC3+3, Y+
|
|
479
|
+
|
|
480
|
+
ld r0, Y+ ; a10
|
|
481
|
+
eor rC0+0, r0
|
|
482
|
+
ld r0, Y+
|
|
483
|
+
eor rC0+1, r0
|
|
484
|
+
ld r0, Y+
|
|
485
|
+
eor rC0+2, r0
|
|
486
|
+
ld r0, Y+
|
|
487
|
+
eor rC0+3, r0
|
|
488
|
+
ld r0, Y+ ; a11
|
|
489
|
+
eor rC1+0, r0
|
|
490
|
+
ld r0, Y+
|
|
491
|
+
eor rC1+1, r0
|
|
492
|
+
ld r0, Y+
|
|
493
|
+
eor rC1+2, r0
|
|
494
|
+
ld r0, Y+
|
|
495
|
+
eor rC1+3, r0
|
|
496
|
+
ld r0, Y+ ; a12
|
|
497
|
+
eor rC2+0, r0
|
|
498
|
+
ld r0, Y+
|
|
499
|
+
eor rC2+1, r0
|
|
500
|
+
ld r0, Y+
|
|
501
|
+
eor rC2+2, r0
|
|
502
|
+
ld r0, Y+
|
|
503
|
+
eor rC2+3, r0
|
|
504
|
+
ld r0, Y+ ; a13
|
|
505
|
+
eor rC3+0, r0
|
|
506
|
+
ld r0, Y+
|
|
507
|
+
eor rC3+1, r0
|
|
508
|
+
ld r0, Y+
|
|
509
|
+
eor rC3+2, r0
|
|
510
|
+
ld r0, Y+
|
|
511
|
+
eor rC3+3, r0
|
|
512
|
+
|
|
513
|
+
ld r0, Y+ ; a20
|
|
514
|
+
eor rC0+0, r0
|
|
515
|
+
ld r0, Y+
|
|
516
|
+
eor rC0+1, r0
|
|
517
|
+
ld r0, Y+
|
|
518
|
+
eor rC0+2, r0
|
|
519
|
+
ld r0, Y+
|
|
520
|
+
eor rC0+3, r0
|
|
521
|
+
ld r0, Y+ ; a21
|
|
522
|
+
eor rC1+0, r0
|
|
523
|
+
ld r0, Y+
|
|
524
|
+
eor rC1+1, r0
|
|
525
|
+
ld r0, Y+
|
|
526
|
+
eor rC1+2, r0
|
|
527
|
+
ld r0, Y+
|
|
528
|
+
eor rC1+3, r0
|
|
529
|
+
ld r0, Y+ ; a22
|
|
530
|
+
eor rC2+0, r0
|
|
531
|
+
ld r0, Y+
|
|
532
|
+
eor rC2+1, r0
|
|
533
|
+
ld r0, Y+
|
|
534
|
+
eor rC2+2, r0
|
|
535
|
+
ld r0, Y+
|
|
536
|
+
eor rC2+3, r0
|
|
537
|
+
ld r0, Y+ ; a23
|
|
538
|
+
eor rC3+0, r0
|
|
539
|
+
ld r0, Y+
|
|
540
|
+
eor rC3+1, r0
|
|
541
|
+
ld r0, Y+
|
|
542
|
+
eor rC3+2, r0
|
|
543
|
+
ld r0, Y+
|
|
544
|
+
eor rC3+3, r0
|
|
545
|
+
sbiw rY, 48
|
|
546
|
+
|
|
547
|
+
Xoodoo_RoundLoop:
|
|
548
|
+
; Theta + Rho west
|
|
549
|
+
; c0 = ROTL32(c0 ^ ROTL32(c0, 9), 5);
|
|
550
|
+
mov rVv+1, rC0+0 ; rol 9
|
|
551
|
+
mov rVv+2, rC0+1
|
|
552
|
+
mov rVv+3, rC0+2
|
|
553
|
+
mov rVv+0, rC0+3
|
|
554
|
+
lsl rVv+0
|
|
555
|
+
rol rVv+1
|
|
556
|
+
rol rVv+2
|
|
557
|
+
rol rVv+3
|
|
558
|
+
adc rVv+0, zero
|
|
559
|
+
eor rVv+0, rC0+0
|
|
560
|
+
eor rVv+1, rC0+1
|
|
561
|
+
eor rVv+2, rC0+2
|
|
562
|
+
eor rVv+3, rC0+3
|
|
563
|
+
bst rVv, 0 ; rol 5 (= ror 3 + rol 8)
|
|
564
|
+
ror rVv+3
|
|
565
|
+
ror rVv+2
|
|
566
|
+
ror rVv+1
|
|
567
|
+
ror rVv
|
|
568
|
+
bld rVv+3, 7
|
|
569
|
+
bst rVv, 0
|
|
570
|
+
ror rVv+3
|
|
571
|
+
ror rVv+2
|
|
572
|
+
ror rVv+1
|
|
573
|
+
ror rVv
|
|
574
|
+
bld rVv+3, 7
|
|
575
|
+
bst rVv, 0
|
|
576
|
+
ror rVv+3
|
|
577
|
+
ror rVv+2
|
|
578
|
+
ror rVv+1
|
|
579
|
+
ror rVv
|
|
580
|
+
bld rVv+3, 7
|
|
581
|
+
mov rC0+0, rVv+3
|
|
582
|
+
mov rC0+1, rVv+0
|
|
583
|
+
mov rC0+2, rVv+1
|
|
584
|
+
mov rC0+3, rVv+2
|
|
585
|
+
|
|
586
|
+
; c1 = ROTL32(c1 ^ ROTL32(c1, 9), 5);
|
|
587
|
+
mov rVv+1, rC1+0 ; rol 9
|
|
588
|
+
mov rVv+2, rC1+1
|
|
589
|
+
mov rVv+3, rC1+2
|
|
590
|
+
mov rVv+0, rC1+3
|
|
591
|
+
lsl rVv+0
|
|
592
|
+
rol rVv+1
|
|
593
|
+
rol rVv+2
|
|
594
|
+
rol rVv+3
|
|
595
|
+
adc rVv+0, zero
|
|
596
|
+
eor rVv+0, rC1+0
|
|
597
|
+
eor rVv+1, rC1+1
|
|
598
|
+
eor rVv+2, rC1+2
|
|
599
|
+
eor rVv+3, rC1+3
|
|
600
|
+
bst rVv, 0 ; rol 5 (= ror 3 + rol 8)
|
|
601
|
+
ror rVv+3
|
|
602
|
+
ror rVv+2
|
|
603
|
+
ror rVv+1
|
|
604
|
+
ror rVv
|
|
605
|
+
bld rVv+3, 7
|
|
606
|
+
bst rVv, 0
|
|
607
|
+
ror rVv+3
|
|
608
|
+
ror rVv+2
|
|
609
|
+
ror rVv+1
|
|
610
|
+
ror rVv
|
|
611
|
+
bld rVv+3, 7
|
|
612
|
+
bst rVv, 0
|
|
613
|
+
ror rVv+3
|
|
614
|
+
ror rVv+2
|
|
615
|
+
ror rVv+1
|
|
616
|
+
ror rVv
|
|
617
|
+
bld rVv+3, 7
|
|
618
|
+
mov rC1+0, rVv+3
|
|
619
|
+
mov rC1+1, rVv+0
|
|
620
|
+
mov rC1+2, rVv+1
|
|
621
|
+
mov rC1+3, rVv+2
|
|
622
|
+
|
|
623
|
+
; c2 = ROTL32(c2 ^ ROTL32(c2, 9), 5);
|
|
624
|
+
mov rVv+1, rC2+0 ; rol 9
|
|
625
|
+
mov rVv+2, rC2+1
|
|
626
|
+
mov rVv+3, rC2+2
|
|
627
|
+
mov rVv+0, rC2+3
|
|
628
|
+
lsl rVv+0
|
|
629
|
+
rol rVv+1
|
|
630
|
+
rol rVv+2
|
|
631
|
+
rol rVv+3
|
|
632
|
+
adc rVv+0, zero
|
|
633
|
+
eor rVv+0, rC2+0
|
|
634
|
+
eor rVv+1, rC2+1
|
|
635
|
+
eor rVv+2, rC2+2
|
|
636
|
+
eor rVv+3, rC2+3
|
|
637
|
+
bst rVv, 0 ; rol 5 (= ror 3 + rol 8)
|
|
638
|
+
ror rVv+3
|
|
639
|
+
ror rVv+2
|
|
640
|
+
ror rVv+1
|
|
641
|
+
ror rVv
|
|
642
|
+
bld rVv+3, 7
|
|
643
|
+
bst rVv, 0
|
|
644
|
+
ror rVv+3
|
|
645
|
+
ror rVv+2
|
|
646
|
+
ror rVv+1
|
|
647
|
+
ror rVv
|
|
648
|
+
bld rVv+3, 7
|
|
649
|
+
bst rVv, 0
|
|
650
|
+
ror rVv+3
|
|
651
|
+
ror rVv+2
|
|
652
|
+
ror rVv+1
|
|
653
|
+
ror rVv
|
|
654
|
+
bld rVv+3, 7
|
|
655
|
+
mov rC2+0, rVv+3
|
|
656
|
+
mov rC2+1, rVv+0
|
|
657
|
+
mov rC2+2, rVv+1
|
|
658
|
+
mov rC2+3, rVv+2
|
|
659
|
+
|
|
660
|
+
; c3 = ROTL32(c3 ^ ROTL32(c3, 9), 5);
|
|
661
|
+
mov rVv+1, rC3+0 ; rol 9
|
|
662
|
+
mov rVv+2, rC3+1
|
|
663
|
+
mov rVv+3, rC3+2
|
|
664
|
+
mov rVv+0, rC3+3
|
|
665
|
+
lsl rVv+0
|
|
666
|
+
rol rVv+1
|
|
667
|
+
rol rVv+2
|
|
668
|
+
rol rVv+3
|
|
669
|
+
adc rVv+0, zero
|
|
670
|
+
eor rVv+0, rC3+0
|
|
671
|
+
eor rVv+1, rC3+1
|
|
672
|
+
eor rVv+2, rC3+2
|
|
673
|
+
eor rVv+3, rC3+3
|
|
674
|
+
bst rVv, 0 ; rol 5 (= ror 3 + rol 8)
|
|
675
|
+
ror rVv+3
|
|
676
|
+
ror rVv+2
|
|
677
|
+
ror rVv+1
|
|
678
|
+
ror rVv
|
|
679
|
+
bld rVv+3, 7
|
|
680
|
+
bst rVv, 0
|
|
681
|
+
ror rVv+3
|
|
682
|
+
ror rVv+2
|
|
683
|
+
ror rVv+1
|
|
684
|
+
ror rVv
|
|
685
|
+
bld rVv+3, 7
|
|
686
|
+
bst rVv, 0
|
|
687
|
+
ror rVv+3
|
|
688
|
+
ror rVv+2
|
|
689
|
+
ror rVv+1
|
|
690
|
+
ror rVv
|
|
691
|
+
bld rVv+3, 7
|
|
692
|
+
mov rC3+0, rVv+3
|
|
693
|
+
mov rC3+1, rVv+0
|
|
694
|
+
mov rC3+2, rVv+1
|
|
695
|
+
mov rC3+3, rVv+2
|
|
696
|
+
|
|
697
|
+
; v1 = a13;
|
|
698
|
+
ldd rVv+0, Y+a13+0
|
|
699
|
+
ldd rVv+1, Y+a13+1
|
|
700
|
+
ldd rVv+2, Y+a13+2
|
|
701
|
+
ldd rVv+3, Y+a13+3
|
|
702
|
+
|
|
703
|
+
; a13 = a12 ^ c1;
|
|
704
|
+
ldd r0, Y+a12+0
|
|
705
|
+
eor r0, rC1+0
|
|
706
|
+
std Y+a13+0, r0
|
|
707
|
+
ldd r0, Y+a12+1
|
|
708
|
+
eor r0, rC1+1
|
|
709
|
+
std Y+a13+1, r0
|
|
710
|
+
ldd r0, Y+a12+2
|
|
711
|
+
eor r0, rC1+2
|
|
712
|
+
std Y+a13+2, r0
|
|
713
|
+
ldd r0, Y+a12+3
|
|
714
|
+
eor r0, rC1+3
|
|
715
|
+
std Y+a13+3, r0
|
|
716
|
+
|
|
717
|
+
; a12 = a11 ^ c0;
|
|
718
|
+
ldd r0, Y+a11+0
|
|
719
|
+
eor r0, rC0+0
|
|
720
|
+
std Y+a12+0, r0
|
|
721
|
+
ldd r0, Y+a11+1
|
|
722
|
+
eor r0, rC0+1
|
|
723
|
+
std Y+a12+1, r0
|
|
724
|
+
ldd r0, Y+a11+2
|
|
725
|
+
eor r0, rC0+2
|
|
726
|
+
std Y+a12+2, r0
|
|
727
|
+
ldd r0, Y+a11+3
|
|
728
|
+
eor r0, rC0+3
|
|
729
|
+
std Y+a12+3, r0
|
|
730
|
+
|
|
731
|
+
; a11 = a10 ^ c3;
|
|
732
|
+
ldd r0, Y+a10+0
|
|
733
|
+
eor r0, rC3+0
|
|
734
|
+
std Y+a11+0, r0
|
|
735
|
+
ldd r0, Y+a10+1
|
|
736
|
+
eor r0, rC3+1
|
|
737
|
+
std Y+a11+1, r0
|
|
738
|
+
ldd r0, Y+a10+2
|
|
739
|
+
eor r0, rC3+2
|
|
740
|
+
std Y+a11+2, r0
|
|
741
|
+
ldd r0, Y+a10+3
|
|
742
|
+
eor r0, rC3+3
|
|
743
|
+
std Y+a11+3, r0
|
|
744
|
+
|
|
745
|
+
; a10 = v1 ^ c2;
|
|
746
|
+
eor rVv+0, rC2+0
|
|
747
|
+
std Y+a10+0, rVv+0
|
|
748
|
+
eor rVv+1, rC2+1
|
|
749
|
+
std Y+a10+1, rVv+1
|
|
750
|
+
eor rVv+2, rC2+2
|
|
751
|
+
std Y+a10+2, rVv+2
|
|
752
|
+
eor rVv+3, rC2+3
|
|
753
|
+
std Y+a10+3, rVv+3
|
|
754
|
+
|
|
755
|
+
; a20 = ROTL32(a20 ^ c3, 11);
|
|
756
|
+
ldd rVv+0, Y+a20+3
|
|
757
|
+
eor rVv+0, rC3+3
|
|
758
|
+
ldd rVv+1, Y+a20+0
|
|
759
|
+
eor rVv+1, rC3+0
|
|
760
|
+
ldd rVv+2, Y+a20+1
|
|
761
|
+
eor rVv+2, rC3+1
|
|
762
|
+
ldd rVv+3, Y+a20+2
|
|
763
|
+
eor rVv+3, rC3+2
|
|
764
|
+
lsl rVv+0
|
|
765
|
+
rol rVv+1
|
|
766
|
+
rol rVv+2
|
|
767
|
+
rol rVv+3
|
|
768
|
+
adc rVv+0, zero
|
|
769
|
+
lsl rVv+0
|
|
770
|
+
rol rVv+1
|
|
771
|
+
rol rVv+2
|
|
772
|
+
rol rVv+3
|
|
773
|
+
adc rVv+0, zero
|
|
774
|
+
lsl rVv+0
|
|
775
|
+
rol rVv+1
|
|
776
|
+
rol rVv+2
|
|
777
|
+
rol rVv+3
|
|
778
|
+
adc rVv+0, zero
|
|
779
|
+
std Y+a20+0, rVv+0
|
|
780
|
+
std Y+a20+1, rVv+1
|
|
781
|
+
std Y+a20+2, rVv+2
|
|
782
|
+
std Y+a20+3, rVv+3
|
|
783
|
+
|
|
784
|
+
; a21 = ROTL32(a21 ^ c0, 11);
|
|
785
|
+
ldd rVv+0, Y+a21+3
|
|
786
|
+
eor rVv+0, rC0+3
|
|
787
|
+
ldd rVv+1, Y+a21+0
|
|
788
|
+
eor rVv+1, rC0+0
|
|
789
|
+
ldd rVv+2, Y+a21+1
|
|
790
|
+
eor rVv+2, rC0+1
|
|
791
|
+
ldd rVv+3, Y+a21+2
|
|
792
|
+
eor rVv+3, rC0+2
|
|
793
|
+
lsl rVv+0
|
|
794
|
+
rol rVv+1
|
|
795
|
+
rol rVv+2
|
|
796
|
+
rol rVv+3
|
|
797
|
+
adc rVv+0, zero
|
|
798
|
+
lsl rVv+0
|
|
799
|
+
rol rVv+1
|
|
800
|
+
rol rVv+2
|
|
801
|
+
rol rVv+3
|
|
802
|
+
adc rVv+0, zero
|
|
803
|
+
lsl rVv+0
|
|
804
|
+
rol rVv+1
|
|
805
|
+
rol rVv+2
|
|
806
|
+
rol rVv+3
|
|
807
|
+
adc rVv+0, zero
|
|
808
|
+
std Y+a21+0, rVv+0
|
|
809
|
+
std Y+a21+1, rVv+1
|
|
810
|
+
std Y+a21+2, rVv+2
|
|
811
|
+
std Y+a21+3, rVv+3
|
|
812
|
+
|
|
813
|
+
; a22 = ROTL32(a22 ^ c1, 11);
|
|
814
|
+
ldd rVv+0, Y+a22+3
|
|
815
|
+
eor rVv+0, rC1+3
|
|
816
|
+
ldd rVv+1, Y+a22+0
|
|
817
|
+
eor rVv+1, rC1+0
|
|
818
|
+
ldd rVv+2, Y+a22+1
|
|
819
|
+
eor rVv+2, rC1+1
|
|
820
|
+
ldd rVv+3, Y+a22+2
|
|
821
|
+
eor rVv+3, rC1+2
|
|
822
|
+
lsl rVv+0
|
|
823
|
+
rol rVv+1
|
|
824
|
+
rol rVv+2
|
|
825
|
+
rol rVv+3
|
|
826
|
+
adc rVv+0, zero
|
|
827
|
+
lsl rVv+0
|
|
828
|
+
rol rVv+1
|
|
829
|
+
rol rVv+2
|
|
830
|
+
rol rVv+3
|
|
831
|
+
adc rVv+0, zero
|
|
832
|
+
lsl rVv+0
|
|
833
|
+
rol rVv+1
|
|
834
|
+
rol rVv+2
|
|
835
|
+
rol rVv+3
|
|
836
|
+
adc rVv+0, zero
|
|
837
|
+
std Y+a22+0, rVv+0
|
|
838
|
+
std Y+a22+1, rVv+1
|
|
839
|
+
std Y+a22+2, rVv+2
|
|
840
|
+
std Y+a22+3, rVv+3
|
|
841
|
+
|
|
842
|
+
; a23 = ROTL32(a23 ^ c2, 11);
|
|
843
|
+
ldd rVv+0, Y+a23+3
|
|
844
|
+
eor rVv+0, rC2+3
|
|
845
|
+
ldd rVv+1, Y+a23+0
|
|
846
|
+
eor rVv+1, rC2+0
|
|
847
|
+
ldd rVv+2, Y+a23+1
|
|
848
|
+
eor rVv+2, rC2+1
|
|
849
|
+
ldd rVv+3, Y+a23+2
|
|
850
|
+
eor rVv+3, rC2+2
|
|
851
|
+
lsl rVv+0
|
|
852
|
+
rol rVv+1
|
|
853
|
+
rol rVv+2
|
|
854
|
+
rol rVv+3
|
|
855
|
+
adc rVv+0, zero
|
|
856
|
+
lsl rVv+0
|
|
857
|
+
rol rVv+1
|
|
858
|
+
rol rVv+2
|
|
859
|
+
rol rVv+3
|
|
860
|
+
adc rVv+0, zero
|
|
861
|
+
lsl rVv+0
|
|
862
|
+
rol rVv+1
|
|
863
|
+
rol rVv+2
|
|
864
|
+
rol rVv+3
|
|
865
|
+
adc rVv+0, zero
|
|
866
|
+
std Y+a23+0, rVv+0
|
|
867
|
+
std Y+a23+1, rVv+1
|
|
868
|
+
std Y+a23+2, rVv+2
|
|
869
|
+
std Y+a23+3, rVv+3
|
|
870
|
+
|
|
871
|
+
; v1 = c3;
|
|
872
|
+
movw rVv+0, rC3+0
|
|
873
|
+
movw rVv+2, rC3+2
|
|
874
|
+
|
|
875
|
+
; c3 = a03 ^ c2; /* a03 resides in c3 */
|
|
876
|
+
ldd rC3+0, Y+a03+0
|
|
877
|
+
eor rC3+0, rC2+0
|
|
878
|
+
ldd rC3+1, Y+a03+1
|
|
879
|
+
eor rC3+1, rC2+1
|
|
880
|
+
ldd rC3+2, Y+a03+2
|
|
881
|
+
eor rC3+2, rC2+2
|
|
882
|
+
ldd rC3+3, Y+a03+3
|
|
883
|
+
eor rC3+3, rC2+3
|
|
884
|
+
|
|
885
|
+
; c2 = a02 ^ c1; /* a02 resides in c2 */
|
|
886
|
+
ldd rC2+0, Y+a02+0
|
|
887
|
+
eor rC2+0, rC1+0
|
|
888
|
+
ldd rC2+1, Y+a02+1
|
|
889
|
+
eor rC2+1, rC1+1
|
|
890
|
+
ldd rC2+2, Y+a02+2
|
|
891
|
+
eor rC2+2, rC1+2
|
|
892
|
+
ldd rC2+3, Y+a02+3
|
|
893
|
+
eor rC2+3, rC1+3
|
|
894
|
+
|
|
895
|
+
; c1 = a01 ^ c0; /* a01 resides in c1 */
|
|
896
|
+
ldd rC1+0, Y+a01+0
|
|
897
|
+
eor rC1+0, rC0+0
|
|
898
|
+
ldd rC1+1, Y+a01+1
|
|
899
|
+
eor rC1+1, rC0+1
|
|
900
|
+
ldd rC1+2, Y+a01+2
|
|
901
|
+
eor rC1+2, rC0+2
|
|
902
|
+
ldd rC1+3, Y+a01+3
|
|
903
|
+
eor rC1+3, rC0+3
|
|
904
|
+
|
|
905
|
+
; c0 = a00 ^ v1; /* a00 resides in c0 */
|
|
906
|
+
ldd rC0+0, Y+a00+0
|
|
907
|
+
eor rC0+0, rVv+0
|
|
908
|
+
ldd rC0+1, Y+a00+1
|
|
909
|
+
eor rC0+1, rVv+1
|
|
910
|
+
ldd rC0+2, Y+a00+2
|
|
911
|
+
eor rC0+2, rVv+2
|
|
912
|
+
ldd rC0+3, Y+a00+3
|
|
913
|
+
eor rC0+3, rVv+3
|
|
914
|
+
|
|
915
|
+
; c0 ^= __rc; /* +Iota */
|
|
916
|
+
lpm rVv+0, Z+
|
|
917
|
+
lpm rVv+1, Z+
|
|
918
|
+
eor rC0+0, rVv+0
|
|
919
|
+
eor rC0+1, rVv+1
|
|
920
|
+
|
|
921
|
+
; Chi + Rho east + Early Theta
|
|
922
|
+
; a00 = c0 ^= ~a10 & a20;
|
|
923
|
+
ldd r0, Y+a10+0
|
|
924
|
+
com r0
|
|
925
|
+
ldd rTt+0, Y+a20+0 ; a20 in rTt
|
|
926
|
+
and r0, rTt+0
|
|
927
|
+
eor rC0+0, r0
|
|
928
|
+
std Y+a00+0, rC0+0
|
|
929
|
+
ldd r0, Y+a10+1
|
|
930
|
+
com r0
|
|
931
|
+
ldd rTt+1, Y+a20+1
|
|
932
|
+
and r0, rTt+1
|
|
933
|
+
eor rC0+1, r0
|
|
934
|
+
std Y+a00+1, rC0+1
|
|
935
|
+
ldd r0, Y+a10+2
|
|
936
|
+
com r0
|
|
937
|
+
ldd rTt+2, Y+a20+2
|
|
938
|
+
and r0, rTt+2
|
|
939
|
+
eor rC0+2, r0
|
|
940
|
+
std Y+a00+2, rC0+2
|
|
941
|
+
ldd r0, Y+a10+3
|
|
942
|
+
com r0
|
|
943
|
+
ldd rTt+3, Y+a20+3
|
|
944
|
+
and r0, rTt+3
|
|
945
|
+
eor rC0+3, r0
|
|
946
|
+
std Y+a00+3, rC0+3
|
|
947
|
+
|
|
948
|
+
; a10 ^= ~a20 & c0;
|
|
949
|
+
com rTt+0
|
|
950
|
+
and rTt+0, rC0+0
|
|
951
|
+
ldd r0, Y+a10+0
|
|
952
|
+
eor rTt+0, r0 ; new a10 in rTt
|
|
953
|
+
std Y+a10+0, rTt+0
|
|
954
|
+
com rTt+1
|
|
955
|
+
and rTt+1, rC0+1
|
|
956
|
+
ldd r0, Y+a10+1
|
|
957
|
+
eor rTt+1, r0
|
|
958
|
+
std Y+a10+1, rTt+1
|
|
959
|
+
com rTt+2
|
|
960
|
+
and rTt+2, rC0+2
|
|
961
|
+
ldd r0, Y+a10+2
|
|
962
|
+
eor rTt+2, r0
|
|
963
|
+
std Y+a10+2, rTt+2
|
|
964
|
+
com rTt+3
|
|
965
|
+
and rTt+3, rC0+3
|
|
966
|
+
ldd r0, Y+a10+3
|
|
967
|
+
eor rTt+3, r0
|
|
968
|
+
std Y+a10+3, rTt+3
|
|
969
|
+
|
|
970
|
+
; v1(a20) = ROTL32(a20 ^ ~c0 & a10, 8);
|
|
971
|
+
movw rVv+0, rTt+0 ; a10 in rVv
|
|
972
|
+
movw rVv+2, rTt+2
|
|
973
|
+
mov r0, rC0+0
|
|
974
|
+
com r0
|
|
975
|
+
and rTt+0, r0
|
|
976
|
+
ldd r0, Y+a20+0
|
|
977
|
+
eor rTt+0, r0
|
|
978
|
+
|
|
979
|
+
mov r0, rC0+1
|
|
980
|
+
com r0
|
|
981
|
+
and rTt+1, r0
|
|
982
|
+
ldd r0, Y+a20+1
|
|
983
|
+
eor rTt+1, r0
|
|
984
|
+
|
|
985
|
+
mov r0, rC0+2
|
|
986
|
+
com r0
|
|
987
|
+
and rTt+2, r0
|
|
988
|
+
ldd r0, Y+a20+2
|
|
989
|
+
eor rTt+2, r0
|
|
990
|
+
|
|
991
|
+
mov r0, rC0+3
|
|
992
|
+
com r0
|
|
993
|
+
and rTt+3, r0
|
|
994
|
+
ldd r0, Y+a20+3
|
|
995
|
+
eor rTt+3, r0
|
|
996
|
+
std Y+a20+0, rTt+3
|
|
997
|
+
std Y+a20+1, rTt+0
|
|
998
|
+
std Y+a20+2, rTt+1
|
|
999
|
+
std Y+a20+3, rTt+2
|
|
1000
|
+
|
|
1001
|
+
; c0 ^= a10 = ROTL32(a10, 1);
|
|
1002
|
+
lsl rVv+0
|
|
1003
|
+
rol rVv+1
|
|
1004
|
+
std Y+a10+1, rVv+1
|
|
1005
|
+
eor rC0+1, rVv+1
|
|
1006
|
+
rol rVv+2
|
|
1007
|
+
std Y+a10+2, rVv+2
|
|
1008
|
+
eor rC0+2, rVv+2
|
|
1009
|
+
rol rVv+3
|
|
1010
|
+
std Y+a10+3, rVv+3
|
|
1011
|
+
eor rC0+3, rVv+3
|
|
1012
|
+
adc rVv+0, zero
|
|
1013
|
+
std Y+a10+0, rVv+0
|
|
1014
|
+
eor rC0+0, rVv+0
|
|
1015
|
+
|
|
1016
|
+
; a02 = c2 ^= ~a12 & a22;
|
|
1017
|
+
ldd r0, Y+a12+0
|
|
1018
|
+
com r0
|
|
1019
|
+
ldd rVv+0, Y+a22+0 ; a22 in rVv
|
|
1020
|
+
and r0, rVv+0
|
|
1021
|
+
eor rC2+0, r0
|
|
1022
|
+
std Y+a02+0, rC2+0
|
|
1023
|
+
ldd r0, Y+a12+1
|
|
1024
|
+
com r0
|
|
1025
|
+
ldd rVv+1, Y+a22+1
|
|
1026
|
+
and r0, rVv+1
|
|
1027
|
+
eor rC2+1, r0
|
|
1028
|
+
std Y+a02+1, rC2+1
|
|
1029
|
+
ldd r0, Y+a12+2
|
|
1030
|
+
com r0
|
|
1031
|
+
ldd rVv+2, Y+a22+2
|
|
1032
|
+
and r0, rVv+2
|
|
1033
|
+
eor rC2+2, r0
|
|
1034
|
+
std Y+a02+2, rC2+2
|
|
1035
|
+
ldd r0, Y+a12+3
|
|
1036
|
+
com r0
|
|
1037
|
+
ldd rVv+3, Y+a22+3
|
|
1038
|
+
and r0, rVv+3
|
|
1039
|
+
eor rC2+3, r0
|
|
1040
|
+
std Y+a02+3, rC2+3
|
|
1041
|
+
|
|
1042
|
+
; a12 ^= ~a22 & c2;
|
|
1043
|
+
mov r0, rVv+0 ; a12 in rTt
|
|
1044
|
+
com r0
|
|
1045
|
+
and r0, rC2+0
|
|
1046
|
+
ldd rTt+0, Y+a12+0
|
|
1047
|
+
eor rTt+0, r0
|
|
1048
|
+
std Y+a12+0, rTt+0
|
|
1049
|
+
mov r0, rVv+1
|
|
1050
|
+
com r0
|
|
1051
|
+
and r0, rC2+1
|
|
1052
|
+
ldd rTt+1, Y+a12+1
|
|
1053
|
+
eor rTt+1, r0
|
|
1054
|
+
std Y+a12+1, rTt+1
|
|
1055
|
+
mov r0, rVv+2
|
|
1056
|
+
com r0
|
|
1057
|
+
and r0, rC2+2
|
|
1058
|
+
ldd rTt+2, Y+a12+2
|
|
1059
|
+
eor rTt+2, r0
|
|
1060
|
+
std Y+a12+2, rTt+2
|
|
1061
|
+
mov r0, rVv+3
|
|
1062
|
+
com r0
|
|
1063
|
+
and r0, rC2+3
|
|
1064
|
+
ldd rTt+3, Y+a12+3
|
|
1065
|
+
eor rTt+3, r0
|
|
1066
|
+
std Y+a12+3, rTt+3
|
|
1067
|
+
|
|
1068
|
+
; c0 ^= a20 = ROTL32(a22 ^ ~c2 & a12, 8);
|
|
1069
|
+
mov r0, rC2+0
|
|
1070
|
+
com r0
|
|
1071
|
+
and r0, rTt+0
|
|
1072
|
+
eor r0, rVv+0
|
|
1073
|
+
ldd rVv+0, Y+a20+1 ; rVv = a22
|
|
1074
|
+
std Y+a20+1, r0
|
|
1075
|
+
eor rC0+1, r0
|
|
1076
|
+
mov r0, rC2+1
|
|
1077
|
+
com r0
|
|
1078
|
+
and r0, rTt+1
|
|
1079
|
+
eor r0, rVv+1
|
|
1080
|
+
ldd rVv+1, Y+a20+2
|
|
1081
|
+
std Y+a20+2, r0
|
|
1082
|
+
eor rC0+2, r0
|
|
1083
|
+
mov r0, rC2+2
|
|
1084
|
+
com r0
|
|
1085
|
+
and r0, rTt+2
|
|
1086
|
+
eor r0, rVv+2
|
|
1087
|
+
ldd rVv+2, Y+a20+3
|
|
1088
|
+
std Y+a20+3, r0
|
|
1089
|
+
eor rC0+3, r0
|
|
1090
|
+
mov r0, rC2+3
|
|
1091
|
+
com r0
|
|
1092
|
+
and r0, rTt+3
|
|
1093
|
+
eor r0, rVv+3
|
|
1094
|
+
ldd rVv+3, Y+a20+0
|
|
1095
|
+
std Y+a20+0, r0
|
|
1096
|
+
eor rC0+0, r0
|
|
1097
|
+
|
|
1098
|
+
; c2 ^= a12 = ROTL32(a12, 1);
|
|
1099
|
+
lsl rTt+0
|
|
1100
|
+
rol rTt+1
|
|
1101
|
+
eor rC2+1, rTt+1
|
|
1102
|
+
std Y+a12+1, rTt+1
|
|
1103
|
+
rol rTt+2
|
|
1104
|
+
eor rC2+2, rTt+2
|
|
1105
|
+
std Y+a12+2, rTt+2
|
|
1106
|
+
rol rTt+3
|
|
1107
|
+
eor rC2+3, rTt+3
|
|
1108
|
+
std Y+a12+3, rTt+3
|
|
1109
|
+
adc rTt+0, zero
|
|
1110
|
+
eor rC2+0, rTt+0
|
|
1111
|
+
std Y+a12+0, rTt+0
|
|
1112
|
+
|
|
1113
|
+
; a22 = v1;
|
|
1114
|
+
std Y+a22+0, rVv+3
|
|
1115
|
+
std Y+a22+1, rVv+0
|
|
1116
|
+
std Y+a22+2, rVv+1
|
|
1117
|
+
std Y+a22+3, rVv+2
|
|
1118
|
+
|
|
1119
|
+
; c2 ^= v1;
|
|
1120
|
+
eor rC2+0, rVv+3
|
|
1121
|
+
eor rC2+1, rVv+0
|
|
1122
|
+
eor rC2+2, rVv+1
|
|
1123
|
+
eor rC2+3, rVv+2
|
|
1124
|
+
|
|
1125
|
+
; a01 = c1 ^= ~a11 & a21;
|
|
1126
|
+
ldd rTt+0, Y+a11+0 ;rTt holds a11
|
|
1127
|
+
mov r0, rTt+0
|
|
1128
|
+
com r0
|
|
1129
|
+
ldd rVv+0, Y+a21+0 ;rVv holds a21
|
|
1130
|
+
and r0, rVv+0
|
|
1131
|
+
eor rC1+0, r0
|
|
1132
|
+
std Y+a01+0, rC1+0
|
|
1133
|
+
ldd rTt+1, Y+a11+1
|
|
1134
|
+
mov r0, rTt+1
|
|
1135
|
+
com r0
|
|
1136
|
+
ldd rVv+1, Y+a21+1
|
|
1137
|
+
and r0, rVv+1
|
|
1138
|
+
eor rC1+1, r0
|
|
1139
|
+
std Y+a01+1, rC1+1
|
|
1140
|
+
ldd rTt+2, Y+a11+2
|
|
1141
|
+
mov r0, rTt+2
|
|
1142
|
+
com r0
|
|
1143
|
+
ldd rVv+2, Y+a21+2
|
|
1144
|
+
and r0, rVv+2
|
|
1145
|
+
eor rC1+2, r0
|
|
1146
|
+
std Y+a01+2, rC1+2
|
|
1147
|
+
ldd rTt+3, Y+a11+3
|
|
1148
|
+
mov r0, rTt+3
|
|
1149
|
+
com r0
|
|
1150
|
+
ldd rVv+3, Y+a21+3
|
|
1151
|
+
and r0, rVv+3
|
|
1152
|
+
eor rC1+3, r0
|
|
1153
|
+
std Y+a01+3, rC1+3
|
|
1154
|
+
|
|
1155
|
+
; a11 ^= ~a21 & c1;
|
|
1156
|
+
mov r0, rVv+0
|
|
1157
|
+
com r0
|
|
1158
|
+
and r0, rC1+0
|
|
1159
|
+
eor rTt+0, r0
|
|
1160
|
+
std Y+a11+0, rTt+0
|
|
1161
|
+
mov r0, rVv+1
|
|
1162
|
+
com r0
|
|
1163
|
+
and r0, rC1+1
|
|
1164
|
+
eor rTt+1, r0
|
|
1165
|
+
std Y+a11+1, rTt+1
|
|
1166
|
+
mov r0, rVv+2
|
|
1167
|
+
com r0
|
|
1168
|
+
and r0, rC1+2
|
|
1169
|
+
eor rTt+2, r0
|
|
1170
|
+
std Y+a11+2, rTt+2
|
|
1171
|
+
mov r0, rVv+3
|
|
1172
|
+
com r0
|
|
1173
|
+
and r0, rC1+3
|
|
1174
|
+
eor rTt+3, r0
|
|
1175
|
+
std Y+a11+3, rTt+3
|
|
1176
|
+
|
|
1177
|
+
; v1 = ROTL32(a21 ^ ~c1 & a11, 8);
|
|
1178
|
+
mov r0, rC1+0
|
|
1179
|
+
com r0
|
|
1180
|
+
and r0, rTt+0
|
|
1181
|
+
eor rVv+0, r0 ; v1 not yet ROTL32'ed(8)
|
|
1182
|
+
mov r0, rC1+1
|
|
1183
|
+
com r0
|
|
1184
|
+
and r0, rTt+1
|
|
1185
|
+
eor rVv+1, r0
|
|
1186
|
+
mov r0, rC1+2
|
|
1187
|
+
com r0
|
|
1188
|
+
and r0, rTt+2
|
|
1189
|
+
eor rVv+2, r0
|
|
1190
|
+
mov r0, rC1+3
|
|
1191
|
+
com r0
|
|
1192
|
+
and r0, rTt+3
|
|
1193
|
+
eor rVv+3, r0
|
|
1194
|
+
|
|
1195
|
+
; c1 ^= a11 = ROTL32(a11, 1);
|
|
1196
|
+
lsl rTt+0
|
|
1197
|
+
rol rTt+1
|
|
1198
|
+
eor rC1+1, rTt+1
|
|
1199
|
+
std Y+a11+1, rTt+1
|
|
1200
|
+
rol rTt+2
|
|
1201
|
+
eor rC1+2, rTt+2
|
|
1202
|
+
std Y+a11+2, rTt+2
|
|
1203
|
+
rol rTt+3
|
|
1204
|
+
eor rC1+3, rTt+3
|
|
1205
|
+
std Y+a11+3, rTt+3
|
|
1206
|
+
adc rTt+0, zero
|
|
1207
|
+
eor rC1+0, rTt+0
|
|
1208
|
+
std Y+a11+0, rTt+0
|
|
1209
|
+
|
|
1210
|
+
; a03 = c3 ^= ~a13 & a23;
|
|
1211
|
+
ldd r0, Y+a13+0
|
|
1212
|
+
com r0
|
|
1213
|
+
ldd rTt+0, Y+a23+0 ; a23 in rTt
|
|
1214
|
+
and r0, rTt+0
|
|
1215
|
+
eor rC3+0, r0
|
|
1216
|
+
std Y+a03+0, rC3+0
|
|
1217
|
+
ldd r0, Y+a13+1
|
|
1218
|
+
com r0
|
|
1219
|
+
ldd rTt+1, Y+a23+1
|
|
1220
|
+
and r0, rTt+1
|
|
1221
|
+
eor rC3+1, r0
|
|
1222
|
+
std Y+a03+1, rC3+1
|
|
1223
|
+
ldd r0, Y+a13+2
|
|
1224
|
+
com r0
|
|
1225
|
+
ldd rTt+2, Y+a23+2
|
|
1226
|
+
and r0, rTt+2
|
|
1227
|
+
eor rC3+2, r0
|
|
1228
|
+
std Y+a03+2, rC3+2
|
|
1229
|
+
ldd r0, Y+a13+3
|
|
1230
|
+
com r0
|
|
1231
|
+
ldd rTt+3, Y+a23+3
|
|
1232
|
+
and r0, rTt+3
|
|
1233
|
+
eor rC3+3, r0
|
|
1234
|
+
std Y+a03+3, rC3+3
|
|
1235
|
+
|
|
1236
|
+
; a13 ^= ~a23 & c3;
|
|
1237
|
+
mov r0, rTt+0
|
|
1238
|
+
com r0
|
|
1239
|
+
and r0, rC3+0
|
|
1240
|
+
ldd rTt+0, Y+a13+0 ; a13 in rTt
|
|
1241
|
+
eor rTt+0, r0
|
|
1242
|
+
mov r0, rTt+1
|
|
1243
|
+
com r0
|
|
1244
|
+
and r0, rC3+1
|
|
1245
|
+
ldd rTt+1, Y+a13+1
|
|
1246
|
+
eor rTt+1, r0
|
|
1247
|
+
mov r0, rTt+2
|
|
1248
|
+
com r0
|
|
1249
|
+
and r0, rC3+2
|
|
1250
|
+
ldd rTt+2, Y+a13+2
|
|
1251
|
+
eor rTt+2, r0
|
|
1252
|
+
mov r0, rTt+3
|
|
1253
|
+
com r0
|
|
1254
|
+
and r0, rC3+3
|
|
1255
|
+
ldd rTt+3, Y+a13+3
|
|
1256
|
+
eor rTt+3, r0
|
|
1257
|
+
|
|
1258
|
+
; c1 ^= a21 = ROTL32(a23 ^ ~c3 & a13, 8);
|
|
1259
|
+
push rVv
|
|
1260
|
+
mov r0, rC3+0
|
|
1261
|
+
com r0
|
|
1262
|
+
and r0, rTt+0
|
|
1263
|
+
ldd rVv, Y+a23+0
|
|
1264
|
+
eor r0, rVv
|
|
1265
|
+
eor rC1+1, r0
|
|
1266
|
+
std Y+a21+1, r0
|
|
1267
|
+
mov r0, rC3+1
|
|
1268
|
+
com r0
|
|
1269
|
+
and r0, rTt+1
|
|
1270
|
+
ldd rVv, Y+a23+1
|
|
1271
|
+
eor r0, rVv
|
|
1272
|
+
eor rC1+2, r0
|
|
1273
|
+
std Y+a21+2, r0
|
|
1274
|
+
mov r0, rC3+2
|
|
1275
|
+
com r0
|
|
1276
|
+
and r0, rTt+2
|
|
1277
|
+
ldd rVv, Y+a23+2
|
|
1278
|
+
eor r0, rVv
|
|
1279
|
+
eor rC1+3, r0
|
|
1280
|
+
std Y+a21+3, r0
|
|
1281
|
+
mov r0, rC3+3
|
|
1282
|
+
com r0
|
|
1283
|
+
and r0, rTt+3
|
|
1284
|
+
ldd rVv, Y+a23+3
|
|
1285
|
+
eor r0, rVv
|
|
1286
|
+
eor rC1+0, r0
|
|
1287
|
+
std Y+a21+0, r0
|
|
1288
|
+
pop rVv
|
|
1289
|
+
|
|
1290
|
+
; a23 = v1;
|
|
1291
|
+
std Y+a23+0, rVv+3 ; rol8(rVv)
|
|
1292
|
+
std Y+a23+1, rVv+0
|
|
1293
|
+
std Y+a23+2, rVv+1
|
|
1294
|
+
std Y+a23+3, rVv+2
|
|
1295
|
+
|
|
1296
|
+
; c3 ^= v1;
|
|
1297
|
+
eor rC3+0, rVv+3
|
|
1298
|
+
eor rC3+1, rVv+0
|
|
1299
|
+
eor rC3+2, rVv+1
|
|
1300
|
+
eor rC3+3, rVv+2
|
|
1301
|
+
|
|
1302
|
+
; c3 ^= a13 = ROTL32(a13, 1);
|
|
1303
|
+
lsl rTt+0
|
|
1304
|
+
rol rTt+1
|
|
1305
|
+
std Y+a13+1, rTt+1
|
|
1306
|
+
eor rC3+1, rTt+1
|
|
1307
|
+
rol rTt+2
|
|
1308
|
+
std Y+a13+2, rTt+2
|
|
1309
|
+
eor rC3+2, rTt+2
|
|
1310
|
+
rol rTt+3
|
|
1311
|
+
std Y+a13+3, rTt+3
|
|
1312
|
+
eor rC3+3, rTt+3
|
|
1313
|
+
adc rTt+0, zero
|
|
1314
|
+
std Y+a13+0, rTt+0
|
|
1315
|
+
eor rC3+0, rTt+0
|
|
1316
|
+
|
|
1317
|
+
; Check for terminator
|
|
1318
|
+
lpm r0, Z
|
|
1319
|
+
inc r0
|
|
1320
|
+
breq Xoodoo_Done
|
|
1321
|
+
rjmp Xoodoo_RoundLoop
|
|
1322
|
+
Xoodoo_Done:
|
|
1323
|
+
pop r29
|
|
1324
|
+
pop r28
|
|
1325
|
+
pop r17
|
|
1326
|
+
pop r16
|
|
1327
|
+
pop r15
|
|
1328
|
+
pop r14
|
|
1329
|
+
pop r13
|
|
1330
|
+
pop r12
|
|
1331
|
+
pop r11
|
|
1332
|
+
pop r10
|
|
1333
|
+
pop r9
|
|
1334
|
+
pop r8
|
|
1335
|
+
pop r7
|
|
1336
|
+
pop r6
|
|
1337
|
+
pop r5
|
|
1338
|
+
pop r4
|
|
1339
|
+
pop r3
|
|
1340
|
+
pop r2
|
|
1341
|
+
ret
|