digest-kangarootwelve 0.2.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +51 -11
- data/Rakefile +2 -2
- data/digest-kangarootwelve.gemspec +322 -42
- data/ext/digest/kangarootwelve/ext.c +1 -1
- data/ext/digest/kangarootwelve/extconf.rb +13 -1
- data/ext/digest/kangarootwelve/keccak/armv6m/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv6m/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv6m/KeccakP-1600-SnP.h +36 -0
- data/ext/digest/kangarootwelve/{KeccakP-1600-times2-SnP.h → keccak/armv6m/KeccakP-1600-times2-SnP.h} +10 -10
- data/ext/digest/kangarootwelve/{KeccakP-1600-times2-on1.c → keccak/armv6m/KeccakP-1600-times2-on1.c} +13 -7
- data/ext/digest/kangarootwelve/{KeccakP-1600-times4-SnP.h → keccak/armv6m/KeccakP-1600-times4-SnP.h} +10 -10
- data/ext/digest/kangarootwelve/{KeccakP-1600-times4-on1.c → keccak/armv6m/KeccakP-1600-times4-on1.c} +13 -7
- data/ext/digest/kangarootwelve/{KeccakP-1600-times8-SnP.h → keccak/armv6m/KeccakP-1600-times8-SnP.h} +10 -10
- data/ext/digest/kangarootwelve/{KeccakP-1600-times8-on1.c → keccak/armv6m/KeccakP-1600-times8-on1.c} +13 -7
- data/ext/digest/kangarootwelve/keccak/armv6m/KeccakP-1600-u2-32bi-armv6m-le-gcc.s +1334 -0
- data/ext/digest/kangarootwelve/keccak/armv6m/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/{PlSnP-Fallback.inc → keccak/armv6m/PlSnP-Fallback.inc} +11 -7
- data/ext/digest/kangarootwelve/keccak/armv6m/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-SnP.h +37 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-armv7a-le-neon-gcc.s +826 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-inplace-pl2-armv7a-neon-le-gcc.s +1245 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times2-SnP.h +38 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times4-on2.c +38 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times8-on2.c +38 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-SnP.h +36 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-inplace-32bi-armv7m-le-gcc.s +1170 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-SnP.h +28 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-armv8a-neon.s +537 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-SnP.h +37 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-x86-64-gas.s +1190 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-SnP.h +37 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-x86-64-shld-gas.s +1190 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-SnP.h +37 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-avr8-fast.s +1116 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/avr8/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/avr8/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-SnP.h +39 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-XOP-config.h +6 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-XOP.c +473 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times2-SIMD128.c +954 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times2-SnP.h +47 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times4-on2.c +38 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times8-on2.c +38 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-unrolling.macros +302 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/SIMD128-config.h +9 -0
- data/ext/digest/kangarootwelve/{SnP-Relaned.h → keccak/bulldozer/SnP-Relaned.h} +13 -7
- data/ext/digest/kangarootwelve/keccak/bulldozer/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/{KangarooTwelve.c → keccak/common/KangarooTwelve.c} +6 -10
- data/ext/digest/kangarootwelve/{KangarooTwelve.h → keccak/common/KangarooTwelve.h} +3 -7
- data/ext/digest/kangarootwelve/keccak/common/KeccakDuplex-common.h +37 -0
- data/ext/digest/kangarootwelve/keccak/common/KeccakDuplex.inc +192 -0
- data/ext/digest/kangarootwelve/keccak/common/KeccakDuplexWidth1600.c +34 -0
- data/ext/digest/kangarootwelve/keccak/common/KeccakDuplexWidth1600.h +25 -0
- data/ext/digest/kangarootwelve/{KeccakSponge-common.h → keccak/common/KeccakSponge-common.h} +5 -7
- data/ext/digest/kangarootwelve/{KeccakSponge.inc → keccak/common/KeccakSponge.inc} +6 -8
- data/ext/digest/kangarootwelve/{KeccakSpongeWidth1600.c → keccak/common/KeccakSpongeWidth1600.c} +6 -8
- data/ext/digest/kangarootwelve/{KeccakSpongeWidth1600.h → keccak/common/KeccakSpongeWidth1600.h} +5 -7
- data/ext/digest/kangarootwelve/{Phases.h → keccak/common/Phases.h} +3 -7
- data/ext/digest/kangarootwelve/{align.h → keccak/common/align.h} +5 -7
- data/ext/digest/kangarootwelve/{brg_endian.h → keccak/common/brg_endian.h} +0 -0
- data/ext/digest/kangarootwelve/keccak/compact/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/compact/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/{KeccakP-1600-SnP.h → keccak/compact/KeccakP-1600-SnP.h} +7 -10
- data/ext/digest/kangarootwelve/{KeccakP-1600-compact64.c → keccak/compact/KeccakP-1600-compact64.c} +11 -7
- data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/compact/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/compact/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/compact/SnP-Relaned.h +140 -0
- data/ext/digest/kangarootwelve/keccak/compact/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-SnP.h +38 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-inplace32BI.c +1162 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic32/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/generic32/SnP-Relaned.h +140 -0
- data/ext/digest/kangarootwelve/keccak/generic32/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-SnP.h +38 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-inplace32BI.c +1162 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/SnP-Relaned.h +140 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-64.macros +2195 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-SnP.h +49 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-opt64-config.h +6 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-opt64.c +541 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-unrolling.macros +302 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic64/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/generic64/SnP-Relaned.h +140 -0
- data/ext/digest/kangarootwelve/keccak/generic64/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-64.macros +2195 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-SnP.h +49 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-opt64-config.h +7 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-opt64.c +541 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-unrolling.macros +302 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/SnP-Relaned.h +140 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-AVX2.s +993 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-SnP.h +41 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times2-SIMD128.c +954 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times2-SnP.h +47 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times4-SIMD256.c +1303 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times4-SnP.h +53 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times8-on4.c +38 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-unrolling.macros +302 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/haswell/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/haswell/SIMD128-config.h +8 -0
- data/ext/digest/kangarootwelve/keccak/haswell/SIMD256-config.h +7 -0
- data/ext/digest/kangarootwelve/keccak/haswell/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-64.macros +2195 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-SnP.h +49 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-opt64-config.h +7 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-opt64.c +541 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times2-SIMD128.c +954 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times2-SnP.h +47 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times4-on2.c +38 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times8-on2.c +38 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-unrolling.macros +302 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/SIMD128-config.h +8 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/SnP-Relaned.h +140 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/reference/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-SnP.h +41 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-reference.c +424 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-reference.h +20 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/reference/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/reference/displayIntermediateValues.c +176 -0
- data/ext/digest/kangarootwelve/keccak/reference/displayIntermediateValues.h +29 -0
- data/ext/digest/kangarootwelve/keccak/reference/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-SnP.h +41 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-reference.h +20 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-reference32BI.c +612 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/displayIntermediateValues.c +176 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/displayIntermediateValues.h +29 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-64.macros +2195 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-SnP.h +49 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-opt64-config.h +8 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-opt64.c +541 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times2-SIMD128.c +954 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times2-SnP.h +47 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times4-on2.c +38 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times8-on2.c +38 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-unrolling.macros +302 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/SIMD128-config.h +8 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/SnP-Relaned.h +140 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-AVX512-config.h +6 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-AVX512.c +621 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-SnP.h +42 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times2-SIMD512.c +852 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times2-SnP.h +49 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times4-SIMD512.c +883 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times4-SnP.h +49 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times8-SIMD512.c +1473 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times8-SnP.h +53 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/SIMD512-2-config.h +7 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/SIMD512-4-config.h +7 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/SIMD512-config.h +7 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/ext.link.c +1 -0
- data/lib/digest/kangarootwelve/version.rb +1 -1
- metadata +299 -21
@@ -0,0 +1,1245 @@
|
|
1
|
+
@
|
2
|
+
@ Implementation by Ronny Van Keer, hereby denoted as "the implementer".
|
3
|
+
@
|
4
|
+
@ For more information, feedback or questions, please refer to our website:
|
5
|
+
@ https://keccak.team/
|
6
|
+
@
|
7
|
+
@ To the extent possible under law, the implementer has waived all copyright
|
8
|
+
@ and related or neighboring rights to the source code in this file.
|
9
|
+
@ http://creativecommons.org/publicdomain/zero/1.0/
|
10
|
+
@
|
11
|
+
@ ---
|
12
|
+
@
|
13
|
+
@ This file implements Keccak-p[1600]×2 in a PlSnP-compatible way.
|
14
|
+
@ Please refer to PlSnP-documentation.h for more details.
|
15
|
+
@
|
16
|
+
@ This implementation comes with KeccakP-1600-times2-SnP.h in the same folder.
|
17
|
+
@ Please refer to LowLevel.build for the exact list of other files it must be combined with.
|
18
|
+
@
|
19
|
+
|
20
|
+
@ WARNING: These functions work only on little endian CPU with@ ARMv7A + NEON architecture
|
21
|
+
@ WARNING: State must be 256 bit (32 bytes) aligned, best is 64-byte (cache alignment).
|
22
|
+
|
23
|
+
@ INFO: Tested on Cortex-A8 (BeagleBone Black), using gcc.
|
24
|
+
@ INFO: Parallel execution of Keccak-P permutation on 2 lane interleaved states.
|
25
|
+
|
26
|
+
@ INFO: KeccakP1600times2_PermuteAll_12rounds() execution time is 7690 cycles on a Cortex-A8 (BeagleBone Black)
|
27
|
+
|
28
|
+
|
29
|
+
|
30
|
+
.text
|
31
|
+
|
32
|
+
@----------------------------------------------------------------------------
|
33
|
+
|
34
|
+
@ --- offsets in state
|
35
|
+
.equ _ba , 0*16
|
36
|
+
.equ _be , 1*16
|
37
|
+
.equ _bi , 2*16
|
38
|
+
.equ _bo , 3*16
|
39
|
+
.equ _bu , 4*16
|
40
|
+
.equ _ga , 5*16
|
41
|
+
.equ _ge , 6*16
|
42
|
+
.equ _gi , 7*16
|
43
|
+
.equ _go , 8*16
|
44
|
+
.equ _gu , 9*16
|
45
|
+
.equ _ka , 10*16
|
46
|
+
.equ _ke , 11*16
|
47
|
+
.equ _ki , 12*16
|
48
|
+
.equ _ko , 13*16
|
49
|
+
.equ _ku , 14*16
|
50
|
+
.equ _ma , 15*16
|
51
|
+
.equ _me , 16*16
|
52
|
+
.equ _mi , 17*16
|
53
|
+
.equ _mo , 18*16
|
54
|
+
.equ _mu , 19*16
|
55
|
+
.equ _sa , 20*16
|
56
|
+
.equ _se , 21*16
|
57
|
+
.equ _si , 22*16
|
58
|
+
.equ _so , 23*16
|
59
|
+
.equ _su , 24*16
|
60
|
+
|
61
|
+
@ --- macros for Single permutation
|
62
|
+
|
63
|
+
.macro KeccakS_ThetaRhoPiChiIota argA1, argA2, argA3, argA4, argA5
|
64
|
+
|
65
|
+
@Prepare Theta
|
66
|
+
@ Ca = Aba^Aga^Aka^Ama^Asa
|
67
|
+
@ Ce = Abe^Age^Ake^Ame^Ase
|
68
|
+
@ Ci = Abi^Agi^Aki^Ami^Asi
|
69
|
+
@ Co = Abo^Ago^Ako^Amo^Aso
|
70
|
+
@ Cu = Abu^Agu^Aku^Amu^Asu
|
71
|
+
@ De = Ca^ROL64(Ci, 1)
|
72
|
+
@ Di = Ce^ROL64(Co, 1)
|
73
|
+
@ Do = Ci^ROL64(Cu, 1)
|
74
|
+
@ Du = Co^ROL64(Ca, 1)
|
75
|
+
@ Da = Cu^ROL64(Ce, 1)
|
76
|
+
veor.64 q4, q6, q7
|
77
|
+
veor.64 q5, q9, q10
|
78
|
+
veor.64 d8, d8, d9
|
79
|
+
veor.64 d10, d10, d11
|
80
|
+
veor.64 d1, d8, d16
|
81
|
+
veor.64 d2, d10, d17
|
82
|
+
|
83
|
+
veor.64 q4, q11, q12
|
84
|
+
veor.64 q5, q14, q15
|
85
|
+
veor.64 d8, d8, d9
|
86
|
+
veor.64 d10, d10, d11
|
87
|
+
veor.64 d3, d8, d26
|
88
|
+
|
89
|
+
vadd.u64 q4, q1, q1
|
90
|
+
veor.64 d4, d10, d27
|
91
|
+
vmov.64 d0, d5
|
92
|
+
vsri.64 q4, q1, #63
|
93
|
+
|
94
|
+
vadd.u64 q5, q2, q2
|
95
|
+
veor.64 q4, q4, q0
|
96
|
+
vsri.64 q5, q2, #63
|
97
|
+
vadd.u64 d7, d1, d1
|
98
|
+
veor.64 \argA2, \argA2,d8
|
99
|
+
veor.64 q5, q5, q1
|
100
|
+
|
101
|
+
vsri.64 d7, d1, #63
|
102
|
+
vshl.u64 d1, \argA2,#44
|
103
|
+
veor.64 \argA3, \argA3,d9
|
104
|
+
veor.64 d7, d7, d4
|
105
|
+
|
106
|
+
@ Ba = argA1^Da
|
107
|
+
@ Be = ROL64((argA2^De), 44)
|
108
|
+
@ Bi = ROL64((argA3^Di), 43)
|
109
|
+
@ Bo = ROL64((argA4^Do), 21)
|
110
|
+
@ Bu = ROL64((argA5^Du), 14)
|
111
|
+
@ argA2 = Be ^((~Bi)& Bo )
|
112
|
+
@ argA3 = Bi ^((~Bo)& Bu )
|
113
|
+
@ argA4 = Bo ^((~Bu)& Ba )
|
114
|
+
@ argA5 = Bu ^((~Ba)& Be )
|
115
|
+
@ argA1 = Ba ^((~Be)& Bi )
|
116
|
+
@ argA1 ^= KeccakP1600RoundConstants[i+round]
|
117
|
+
vsri.64 d1, \argA2, #64-44
|
118
|
+
vshl.u64 d2, \argA3, #43
|
119
|
+
vldr.64 d0, [r0, #\argA1]
|
120
|
+
veor.64 \argA4, \argA4, d10
|
121
|
+
vsri.64 d2, \argA3, #64-43
|
122
|
+
vshl.u64 d3, \argA4, #21
|
123
|
+
veor.64 \argA5, \argA5, d11
|
124
|
+
veor.64 d0, d0, d7
|
125
|
+
vsri.64 d3, \argA4, #64-21
|
126
|
+
vbic.64 d5, d2, d1
|
127
|
+
vshl.u64 d4, \argA5, #14
|
128
|
+
vbic.64 \argA2, d3, d2
|
129
|
+
vld1.64 d6, [r1]!
|
130
|
+
veor.64 d5, d0
|
131
|
+
vsri.64 d4, \argA5, #64-14
|
132
|
+
veor.64 d5, d6
|
133
|
+
vbic.64 \argA5, d1, d0
|
134
|
+
vbic.64 \argA3, d4, d3
|
135
|
+
vbic.64 \argA4, d0, d4
|
136
|
+
veor.64 \argA2, d1
|
137
|
+
vstr.64 d5, [r0, #\argA1]
|
138
|
+
veor.64 \argA3, d2
|
139
|
+
veor.64 \argA4, d3
|
140
|
+
veor.64 \argA5, d4
|
141
|
+
.endm
|
142
|
+
|
143
|
+
.macro KeccakS_ThetaRhoPiChi1 argA1, argA2, argA3, argA4, argA5
|
144
|
+
|
145
|
+
@ Bi = ROL64((argA1^Da), 3)
|
146
|
+
@ Bo = ROL64((argA2^De), 45)
|
147
|
+
@ Bu = ROL64((argA3^Di), 61)
|
148
|
+
@ Ba = ROL64((argA4^Do), 28)
|
149
|
+
@ Be = ROL64((argA5^Du), 20)
|
150
|
+
@ argA1 = Ba ^((~Be)& Bi )
|
151
|
+
@ Ca ^= argA1
|
152
|
+
@ argA2 = Be ^((~Bi)& Bo )
|
153
|
+
@ argA3 = Bi ^((~Bo)& Bu )
|
154
|
+
@ argA4 = Bo ^((~Bu)& Ba )
|
155
|
+
@ argA5 = Bu ^((~Ba)& Be )
|
156
|
+
veor.64 \argA2, \argA2, d8
|
157
|
+
veor.64 \argA3, \argA3, d9
|
158
|
+
vshl.u64 d3, \argA2, #45
|
159
|
+
vldr.64 d6, [r0, #\argA1]
|
160
|
+
vshl.u64 d4, \argA3, #61
|
161
|
+
veor.64 \argA4, \argA4, d10
|
162
|
+
vsri.64 d3, \argA2, #64-45
|
163
|
+
veor.64 \argA5, \argA5, d11
|
164
|
+
vsri.64 d4, \argA3, #64-61
|
165
|
+
vshl.u64 d0, \argA4, #28
|
166
|
+
veor.64 d6, d6, d7
|
167
|
+
vshl.u64 d1, \argA5, #20
|
168
|
+
vbic.64 \argA3, d4, d3
|
169
|
+
vsri.64 d0, \argA4, #64-28
|
170
|
+
vbic.64 \argA4, d0, d4
|
171
|
+
vshl.u64 d2, d6, #3
|
172
|
+
vsri.64 d1, \argA5, #64-20
|
173
|
+
veor.64 \argA4, d3
|
174
|
+
vsri.64 d2, d6, #64-3
|
175
|
+
vbic.64 \argA5, d1, d0
|
176
|
+
vbic.64 d6, d2, d1
|
177
|
+
vbic.64 \argA2, d3, d2
|
178
|
+
veor.64 d6, d0
|
179
|
+
veor.64 \argA2, d1
|
180
|
+
vstr.64 d6, [r0, #\argA1]
|
181
|
+
veor.64 \argA3, d2
|
182
|
+
veor.64 d5, d6
|
183
|
+
veor.64 \argA5, d4
|
184
|
+
.endm
|
185
|
+
|
186
|
+
.macro KeccakS_ThetaRhoPiChi2 argA1, argA2, argA3, argA4, argA5
|
187
|
+
|
188
|
+
@ Bu = ROL64((argA1^Da), 18)
|
189
|
+
@ Ba = ROL64((argA2^De), 1)
|
190
|
+
@ Be = ROL64((argA3^Di), 6)
|
191
|
+
@ Bi = ROL64((argA4^Do), 25)
|
192
|
+
@ Bo = ROL64((argA5^Du), 8)
|
193
|
+
@ argA1 = Ba ^((~Be)& Bi )
|
194
|
+
@ Ca ^= argA1@
|
195
|
+
@ argA2 = Be ^((~Bi)& Bo )
|
196
|
+
@ argA3 = Bi ^((~Bo)& Bu )
|
197
|
+
@ argA4 = Bo ^((~Bu)& Ba )
|
198
|
+
@ argA5 = Bu ^((~Ba)& Be )
|
199
|
+
veor.64 \argA3, \argA3, d9
|
200
|
+
veor.64 \argA4, \argA4, d10
|
201
|
+
vshl.u64 d1, \argA3, #6
|
202
|
+
vldr.64 d6, [r0, #\argA1]
|
203
|
+
vshl.u64 d2, \argA4, #25
|
204
|
+
veor.64 \argA5, \argA5, d11
|
205
|
+
vsri.64 d1, \argA3, #64-6
|
206
|
+
veor.64 \argA2, \argA2, d8
|
207
|
+
vsri.64 d2, \argA4, #64-25
|
208
|
+
vext.8 d3, \argA5, \argA5, #7
|
209
|
+
veor.64 d6, d6, d7
|
210
|
+
vbic.64 \argA3, d2, d1
|
211
|
+
vadd.u64 d0, \argA2, \argA2
|
212
|
+
vbic.64 \argA4, d3, d2
|
213
|
+
vsri.64 d0, \argA2, #64-1
|
214
|
+
vshl.u64 d4, d6, #18
|
215
|
+
veor.64 \argA2, d1, \argA4
|
216
|
+
veor.64 \argA3, d0
|
217
|
+
vsri.64 d4, d6, #64-18
|
218
|
+
vstr.64 \argA3, [r0, #\argA1]
|
219
|
+
veor.64 d5, \argA3
|
220
|
+
vbic.64 \argA5, d1, d0
|
221
|
+
vbic.64 \argA3, d4, d3
|
222
|
+
vbic.64 \argA4, d0, d4
|
223
|
+
veor.64 \argA3, d2
|
224
|
+
veor.64 \argA4, d3
|
225
|
+
veor.64 \argA5, d4
|
226
|
+
.endm
|
227
|
+
|
228
|
+
.macro KeccakS_ThetaRhoPiChi3 argA1, argA2, argA3, argA4, argA5
|
229
|
+
|
230
|
+
@ Be = ROL64((argA1^Da), 36)
|
231
|
+
@ Bi = ROL64((argA2^De), 10)
|
232
|
+
@ Bo = ROL64((argA3^Di), 15)
|
233
|
+
@ Bu = ROL64((argA4^Do), 56)
|
234
|
+
@ Ba = ROL64((argA5^Du), 27)
|
235
|
+
@ argA1 = Ba ^((~Be)& Bi )
|
236
|
+
@ Ca ^= argA1
|
237
|
+
@ argA2 = Be ^((~Bi)& Bo )
|
238
|
+
@ argA3 = Bi ^((~Bo)& Bu )
|
239
|
+
@ argA4 = Bo ^((~Bu)& Ba )
|
240
|
+
@ argA5 = Bu ^((~Ba)& Be )
|
241
|
+
veor.64 \argA2, \argA2, d8
|
242
|
+
veor.64 \argA3, \argA3, d9
|
243
|
+
vshl.u64 d2, \argA2, #10
|
244
|
+
vldr.64 d6, [r0, #\argA1]
|
245
|
+
vshl.u64 d3, \argA3, #15
|
246
|
+
veor.64 \argA4, \argA4, d10
|
247
|
+
vsri.64 d2, \argA2, #64-10
|
248
|
+
vsri.64 d3, \argA3, #64-15
|
249
|
+
veor.64 \argA5, \argA5, d11
|
250
|
+
vext.8 d4, \argA4, \argA4, #1
|
251
|
+
vbic.64 \argA2, d3, d2
|
252
|
+
vshl.u64 d0, \argA5, #27
|
253
|
+
veor.64 d6, d6, d7
|
254
|
+
vbic.64 \argA3, d4, d3
|
255
|
+
vsri.64 d0, \argA5, #64-27
|
256
|
+
vshl.u64 d1, d6, #36
|
257
|
+
veor.64 \argA3, d2
|
258
|
+
vbic.64 \argA4, d0, d4
|
259
|
+
vsri.64 d1, d6, #64-36
|
260
|
+
veor.64 \argA4, d3
|
261
|
+
vbic.64 d6, d2, d1
|
262
|
+
vbic.64 \argA5, d1, d0
|
263
|
+
veor.64 d6, d0
|
264
|
+
veor.64 \argA2, d1
|
265
|
+
vstr.64 d6, [r0, #\argA1]
|
266
|
+
veor.64 d5, d6
|
267
|
+
veor.64 \argA5, d4
|
268
|
+
.endm
|
269
|
+
|
270
|
+
.macro KeccakS_ThetaRhoPiChi4 argA1, argA2, argA3, argA4, argA5
|
271
|
+
|
272
|
+
@ Bo = ROL64((argA1^Da), 41)
|
273
|
+
@ Bu = ROL64((argA2^De), 2)
|
274
|
+
@ Ba = ROL64((argA3^Di), 62)
|
275
|
+
@ Be = ROL64((argA4^Do), 55)
|
276
|
+
@ Bi = ROL64((argA5^Du), 39)
|
277
|
+
@ argA1 = Ba ^((~Be)& Bi )
|
278
|
+
@ Ca ^= argA1
|
279
|
+
@ argA2 = Be ^((~Bi)& Bo )
|
280
|
+
@ argA3 = Bi ^((~Bo)& Bu )
|
281
|
+
@ argA4 = Bo ^((~Bu)& Ba )
|
282
|
+
@ argA5 = Bu ^((~Ba)& Be )
|
283
|
+
veor.64 \argA2, \argA2, d8
|
284
|
+
veor.64 \argA3, \argA3, d9
|
285
|
+
vshl.u64 d4, \argA2, #2
|
286
|
+
veor.64 \argA5, \argA5, d11
|
287
|
+
vshl.u64 d0, \argA3, #62
|
288
|
+
vldr.64 d6, [r0, #\argA1]
|
289
|
+
vsri.64 d4, \argA2, #64-2
|
290
|
+
veor.64 \argA4, \argA4, d10
|
291
|
+
vsri.64 d0, \argA3, #64-62
|
292
|
+
vshl.u64 d1, \argA4, #55
|
293
|
+
veor.64 d6, d6, d7
|
294
|
+
vshl.u64 d2, \argA5, #39
|
295
|
+
vsri.64 d1, \argA4, #64-55
|
296
|
+
vbic.64 \argA4, d0, d4
|
297
|
+
vsri.64 d2, \argA5, #64-39
|
298
|
+
vbic.64 \argA2, d1, d0
|
299
|
+
vshl.u64 d3, d6, #41
|
300
|
+
veor.64 \argA5, d4, \argA2
|
301
|
+
vbic.64 \argA2, d2, d1
|
302
|
+
vsri.64 d3, d6, #64-41
|
303
|
+
veor.64 d6, d0, \argA2
|
304
|
+
vbic.64 \argA2, d3, d2
|
305
|
+
vbic.64 \argA3, d4, d3
|
306
|
+
veor.64 \argA2, d1
|
307
|
+
vstr.64 d6, [r0, #\argA1]
|
308
|
+
veor.64 d5, d6
|
309
|
+
veor.64 \argA3, d2
|
310
|
+
veor.64 \argA4, d3
|
311
|
+
.endm
|
312
|
+
|
313
|
+
@ --- macros for Parallel permutation
|
314
|
+
|
315
|
+
.macro m_pls start
|
316
|
+
.if \start != -1
|
317
|
+
add r3, r0, #\start
|
318
|
+
.endif
|
319
|
+
.endm
|
320
|
+
|
321
|
+
.macro m_ld qreg, next
|
322
|
+
.if \next == 16
|
323
|
+
vld1.64 { \qreg }, [r3:128]!
|
324
|
+
.else
|
325
|
+
vld1.64 { \qreg }, [r3:128], r4
|
326
|
+
.endif
|
327
|
+
.endm
|
328
|
+
|
329
|
+
.macro m_st qreg, next
|
330
|
+
.if \next == 16
|
331
|
+
vst1.64 { \qreg }, [r3:128]!
|
332
|
+
.else
|
333
|
+
vst1.64 { \qreg }, [r3:128], r4
|
334
|
+
.endif
|
335
|
+
.endm
|
336
|
+
|
337
|
+
.macro KeccakP_ThetaRhoPiChiIota ofs1, ofs2, ofs3, ofs4, ofs5, next, ofsn1
|
338
|
+
|
339
|
+
@ De = Ca ^ ROL64(Ci, 1)
|
340
|
+
@ Di = Ce ^ ROL64(Co, 1)
|
341
|
+
@ Do = Ci ^ ROL64(Cu, 1)
|
342
|
+
@ Du = Co ^ ROL64(Ca, 1)
|
343
|
+
@ Da = Cu ^ ROL64(Ce, 1)
|
344
|
+
vadd.u64 q6, q2, q2
|
345
|
+
vadd.u64 q7, q3, q3
|
346
|
+
vadd.u64 q8, q4, q4
|
347
|
+
vadd.u64 q9, q0, q0
|
348
|
+
vadd.u64 q5, q1, q1
|
349
|
+
|
350
|
+
vsri.64 q6, q2, #63
|
351
|
+
vsri.64 q7, q3, #63
|
352
|
+
vsri.64 q8, q4, #63
|
353
|
+
vsri.64 q9, q0, #63
|
354
|
+
vsri.64 q5, q1, #63
|
355
|
+
|
356
|
+
veor.64 q6, q6, q0
|
357
|
+
veor.64 q7, q7, q1
|
358
|
+
veor.64 q8, q8, q2
|
359
|
+
.if \next != 16
|
360
|
+
mov r4, #\next
|
361
|
+
.endif
|
362
|
+
veor.64 q9, q9, q3
|
363
|
+
veor.64 q5, q5, q4
|
364
|
+
|
365
|
+
@ Ba = argA1^Da
|
366
|
+
@ Be = ROL64(argA2^De, 44)
|
367
|
+
@ Bi = ROL64(argA3^Di, 43)
|
368
|
+
@ Bo = ROL64(argA4^Do, 21)
|
369
|
+
@ Bu = ROL64(argA5^Du, 14)
|
370
|
+
m_ld q10, \next
|
371
|
+
m_pls \ofs2
|
372
|
+
m_ld q1, \next
|
373
|
+
m_pls \ofs3
|
374
|
+
veor.64 q10, q10, q5
|
375
|
+
m_ld q2, \next
|
376
|
+
m_pls \ofs4
|
377
|
+
veor.64 q1, q1, q6
|
378
|
+
m_ld q3, \next
|
379
|
+
m_pls \ofs5
|
380
|
+
veor.64 q2, q2, q7
|
381
|
+
m_ld q4, \next
|
382
|
+
veor.64 q3, q3, q8
|
383
|
+
mov r6, r5
|
384
|
+
veor.64 q4, q4, q9
|
385
|
+
|
386
|
+
vst1.64 { q6 }, [r6:128]!
|
387
|
+
vshl.u64 q11, q1, #44
|
388
|
+
vshl.u64 q12, q2, #43
|
389
|
+
vst1.64 { q7 }, [r6:128]!
|
390
|
+
vshl.u64 q13, q3, #21
|
391
|
+
vshl.u64 q14, q4, #14
|
392
|
+
vst1.64 { q8 }, [r6:128]!
|
393
|
+
vsri.64 q11, q1, #64-44
|
394
|
+
vsri.64 q12, q2, #64-43
|
395
|
+
vst1.64 { q9 }, [r6:128]!
|
396
|
+
vsri.64 q13, q3, #64-21
|
397
|
+
vsri.64 q14, q4, #64-14
|
398
|
+
|
399
|
+
@ argA1 = Ba ^(~Be & Bi) ^ KeccakP1600RoundConstants[round]
|
400
|
+
@ argA2 = Be ^(~Bi & Bo)
|
401
|
+
@ argA3 = Bi ^(~Bo & Bu)
|
402
|
+
@ argA4 = Bo ^(~Bu & Ba)
|
403
|
+
@ argA5 = Bu ^(~Ba & Be)
|
404
|
+
vld1.64 { d30 }, [r1:64]
|
405
|
+
vbic.64 q0, q12, q11
|
406
|
+
vbic.64 q1, q13, q12
|
407
|
+
vld1.64 { d31 }, [r1:64]!
|
408
|
+
veor.64 q0, q10
|
409
|
+
vbic.64 q4, q11, q10
|
410
|
+
veor.64 q0, q15
|
411
|
+
vbic.64 q2, q14, q13
|
412
|
+
vbic.64 q3, q10, q14
|
413
|
+
|
414
|
+
m_pls \ofs1
|
415
|
+
veor.64 q1, q11
|
416
|
+
m_st q0, \next
|
417
|
+
m_pls \ofs2
|
418
|
+
veor.64 q2, q12
|
419
|
+
m_st q1, \next
|
420
|
+
m_pls \ofs3
|
421
|
+
veor.64 q3, q13
|
422
|
+
m_st q2, \next
|
423
|
+
m_pls \ofs4
|
424
|
+
veor.64 q4, q14
|
425
|
+
m_st q3, \next
|
426
|
+
m_pls \ofs5
|
427
|
+
m_st q4, \next
|
428
|
+
m_pls \ofsn1
|
429
|
+
.endm
|
430
|
+
|
431
|
+
.macro KeccakP_ThetaRhoPiChi ofs1, ofs2, ofs3, ofs4, ofs5, next, ofsn1, Bb1, Bb2, Bb3, Bb4, Bb5, Rr1, Rr2, Rr3, Rr4, Rr5
|
432
|
+
|
433
|
+
@ Bb1 = ROL64((argA1^Da), Rr1)
|
434
|
+
@ Bb2 = ROL64((argA2^De), Rr2)
|
435
|
+
@ Bb3 = ROL64((argA3^Di), Rr3)
|
436
|
+
@ Bb4 = ROL64((argA4^Do), Rr4)
|
437
|
+
@ Bb5 = ROL64((argA5^Du), Rr5)
|
438
|
+
|
439
|
+
.if \next != 16
|
440
|
+
mov r4, #\next
|
441
|
+
.endif
|
442
|
+
|
443
|
+
m_ld \Bb1, \next
|
444
|
+
m_pls \ofs2
|
445
|
+
m_ld \Bb2, \next
|
446
|
+
m_pls \ofs3
|
447
|
+
veor.64 q15, q5, \Bb1
|
448
|
+
m_ld \Bb3, \next
|
449
|
+
m_pls \ofs4
|
450
|
+
veor.64 q6, q6, \Bb2
|
451
|
+
m_ld \Bb4, \next
|
452
|
+
m_pls \ofs5
|
453
|
+
veor.64 q7, q7, \Bb3
|
454
|
+
m_ld \Bb5, \next
|
455
|
+
veor.64 q8, q8, \Bb4
|
456
|
+
veor.64 q9, q9, \Bb5
|
457
|
+
|
458
|
+
vshl.u64 \Bb1, q15, #\Rr1
|
459
|
+
vshl.u64 \Bb2, q6, #\Rr2
|
460
|
+
vshl.u64 \Bb3, q7, #\Rr3
|
461
|
+
vshl.u64 \Bb4, q8, #\Rr4
|
462
|
+
vshl.u64 \Bb5, q9, #\Rr5
|
463
|
+
|
464
|
+
vsri.64 \Bb1, q15, #64-\Rr1
|
465
|
+
vsri.64 \Bb2, q6, #64-\Rr2
|
466
|
+
vsri.64 \Bb3, q7, #64-\Rr3
|
467
|
+
vsri.64 \Bb4, q8, #64-\Rr4
|
468
|
+
vsri.64 \Bb5, q9, #64-\Rr5
|
469
|
+
|
470
|
+
@ argA1 = Ba ^((~Be)& Bi ), Ca ^= argA1
|
471
|
+
@ argA2 = Be ^((~Bi)& Bo ), Ce ^= argA2
|
472
|
+
@ argA3 = Bi ^((~Bo)& Bu ), Ci ^= argA3
|
473
|
+
@ argA4 = Bo ^((~Bu)& Ba ), Co ^= argA4
|
474
|
+
@ argA5 = Bu ^((~Ba)& Be ), Cu ^= argA5
|
475
|
+
vbic.64 q15, q12, q11
|
476
|
+
mov r6, r5
|
477
|
+
vbic.64 q6, q13, q12
|
478
|
+
m_pls \ofs1
|
479
|
+
vbic.64 q7, q14, q13
|
480
|
+
vbic.64 q8, q10, q14
|
481
|
+
vbic.64 q9, q11, q10
|
482
|
+
|
483
|
+
veor.64 q15, q15, q10
|
484
|
+
veor.64 q6, q6, q11
|
485
|
+
|
486
|
+
m_st q15, \next
|
487
|
+
m_pls \ofs2
|
488
|
+
veor.64 q7, q7, q12
|
489
|
+
|
490
|
+
m_st q6, \next
|
491
|
+
m_pls \ofs3
|
492
|
+
veor.64 q1, q1, q6
|
493
|
+
vld1.64 { q6 }, [r6:128]!
|
494
|
+
veor.64 q8, q8, q13
|
495
|
+
|
496
|
+
m_st q7, \next
|
497
|
+
m_pls \ofs4
|
498
|
+
veor.64 q2, q2, q7
|
499
|
+
vld1.64 { q7 }, [r6:128]!
|
500
|
+
veor.64 q9, q9, q14
|
501
|
+
|
502
|
+
m_st q8, \next
|
503
|
+
m_pls \ofs5
|
504
|
+
veor.64 q3, q3, q8
|
505
|
+
|
506
|
+
m_st q9, \next
|
507
|
+
|
508
|
+
vld1.64 { q8 }, [r6:128]!
|
509
|
+
veor.64 q4, q4, q9
|
510
|
+
m_pls \ofsn1
|
511
|
+
vld1.64 { q9 }, [r6:128]!
|
512
|
+
veor.64 q0, q0, q15
|
513
|
+
.endm
|
514
|
+
|
515
|
+
.macro KeccakP_ThetaRhoPiChi1 ofs1, ofs2, ofs3, ofs4, ofs5, next, ofsn1
|
516
|
+
KeccakP_ThetaRhoPiChi \ofs1, \ofs2, \ofs3, \ofs4, \ofs5, \next, \ofsn1, q12, q13, q14, q10, q11, 3, 45, 61, 28, 20
|
517
|
+
.endm
|
518
|
+
|
519
|
+
.macro KeccakP_ThetaRhoPiChi2 ofs1, ofs2, ofs3, ofs4, ofs5, next, ofsn1
|
520
|
+
KeccakP_ThetaRhoPiChi \ofs1, \ofs2, \ofs3, \ofs4, \ofs5, \next, \ofsn1, q14, q10, q11, q12, q13, 18, 1, 6, 25, 8
|
521
|
+
.endm
|
522
|
+
|
523
|
+
.macro KeccakP_ThetaRhoPiChi3 ofs1, ofs2, ofs3, ofs4, ofs5, next, ofsn1
|
524
|
+
KeccakP_ThetaRhoPiChi \ofs1, \ofs2, \ofs3, \ofs4, \ofs5, \next, \ofsn1, q11, q12, q13, q14, q10, 36, 10, 15, 56, 27
|
525
|
+
.endm
|
526
|
+
|
527
|
+
.macro KeccakP_ThetaRhoPiChi4 ofs1, ofs2, ofs3, ofs4, ofs5, next, ofsn1
|
528
|
+
|
529
|
+
@ Bo = ROL64((argA1^Da), 41)
|
530
|
+
@ Bu = ROL64((argA2^De), 2)
|
531
|
+
@ Ba = ROL64((argA3^Di), 62)
|
532
|
+
@ Be = ROL64((argA4^Do), 55)
|
533
|
+
@ Bi = ROL64((argA5^Du), 39)
|
534
|
+
@ KeccakChi
|
535
|
+
|
536
|
+
.if \next != 16
|
537
|
+
mov r4, #\next
|
538
|
+
.endif
|
539
|
+
|
540
|
+
m_ld q13, \next
|
541
|
+
m_pls \ofs2
|
542
|
+
m_ld q14, \next
|
543
|
+
m_pls \ofs3
|
544
|
+
veor.64 q5, q5, q13
|
545
|
+
m_ld q10, \next
|
546
|
+
m_pls \ofs4
|
547
|
+
veor.64 q6, q6, q14
|
548
|
+
m_ld q11, \next
|
549
|
+
m_pls \ofs5
|
550
|
+
veor.64 q7, q7, q10
|
551
|
+
m_ld q12, \next
|
552
|
+
veor.64 q8, q8, q11
|
553
|
+
veor.64 q9, q9, q12
|
554
|
+
|
555
|
+
vshl.u64 q13, q5, #41
|
556
|
+
vshl.u64 q14, q6, #2
|
557
|
+
vshl.u64 q10, q7, #62
|
558
|
+
vshl.u64 q11, q8, #55
|
559
|
+
vshl.u64 q12, q9, #39
|
560
|
+
|
561
|
+
vsri.64 q13, q5, #64-41
|
562
|
+
vsri.64 q14, q6, #64-2
|
563
|
+
vsri.64 q11, q8, #64-55
|
564
|
+
vsri.64 q12, q9, #64-39
|
565
|
+
vsri.64 q10, q7, #64-62
|
566
|
+
|
567
|
+
vbic.64 q5, q12, q11
|
568
|
+
vbic.64 q6, q13, q12
|
569
|
+
vbic.64 q7, q14, q13
|
570
|
+
vbic.64 q8, q10, q14
|
571
|
+
vbic.64 q9, q11, q10
|
572
|
+
veor.64 q5, q5, q10
|
573
|
+
veor.64 q6, q6, q11
|
574
|
+
veor.64 q7, q7, q12
|
575
|
+
veor.64 q8, q8, q13
|
576
|
+
m_pls \ofs1
|
577
|
+
veor.64 q9, q9, q14
|
578
|
+
m_st q5, \next
|
579
|
+
m_pls \ofs2
|
580
|
+
veor.64 q0, q0, q5
|
581
|
+
m_st q6, \next
|
582
|
+
m_pls \ofs3
|
583
|
+
veor.64 q1, q1, q6
|
584
|
+
m_st q7, \next
|
585
|
+
m_pls \ofs4
|
586
|
+
veor.64 q2, q2, q7
|
587
|
+
m_st q8, \next
|
588
|
+
m_pls \ofs5
|
589
|
+
veor.64 q3, q3, q8
|
590
|
+
m_st q9, \next
|
591
|
+
m_pls \ofsn1
|
592
|
+
veor.64 q4, q4, q9
|
593
|
+
.endm
|
594
|
+
|
595
|
+
@----------------------------------------------------------------------------
|
596
|
+
@
|
597
|
+
@ void KeccakP1600_Pl_StaticInitialize( void )
|
598
|
+
@
|
599
|
+
.align 8
|
600
|
+
.global KeccakP1600_Pl_StaticInitialize
|
601
|
+
.type KeccakP1600_Pl_StaticInitialize, %function;
|
602
|
+
KeccakP1600_Pl_StaticInitialize:
|
603
|
+
bx lr
|
604
|
+
|
605
|
+
|
606
|
+
@----------------------------------------------------------------------------
|
607
|
+
@
|
608
|
+
@ void KeccakP1600times2_InitializeAll( void *states )
|
609
|
+
@
|
610
|
+
.align 8
|
611
|
+
.global KeccakP1600times2_InitializeAll
|
612
|
+
.type KeccakP1600times2_InitializeAll, %function;
|
613
|
+
KeccakP1600times2_InitializeAll:
|
614
|
+
vmov.i64 q0, #0
|
615
|
+
vmov.i64 q1, #0
|
616
|
+
vmov.i64 q2, #0
|
617
|
+
vmov.i64 q3, #0
|
618
|
+
vstm r0!, { d0 - d7 } @ 8 (clear 8 lanes at a time)
|
619
|
+
vstm r0!, { d0 - d7 } @ 16
|
620
|
+
vstm r0!, { d0 - d7 } @ 24
|
621
|
+
vstm r0!, { d0 - d7 } @ 32
|
622
|
+
vstm r0!, { d0 - d7 } @ 40
|
623
|
+
vstm r0!, { d0 - d7 } @ 48
|
624
|
+
vstm r0!, { d0 - d1} @ 50
|
625
|
+
bx lr
|
626
|
+
|
627
|
+
|
628
|
+
|
629
|
+
@----------------------------------------------------------------------------
|
630
|
+
@
|
631
|
+
@ void KeccakP1600times2_AddByte( void *states, unsigned int instanceIndex, unsigned char byte, unsigned int offset )
|
632
|
+
@
|
633
|
+
.align 8
|
634
|
+
.global KeccakP1600times2_AddByte
|
635
|
+
.type KeccakP1600times2_AddByte, %function;
|
636
|
+
KeccakP1600times2_AddByte:
|
637
|
+
add r0, r0, r1, LSL #3 @ states += 8 * instanceIndex
|
638
|
+
lsr r1, r3, #3 @ states += (offset & ~7) * 2
|
639
|
+
add r0, r0, r1, LSL #4
|
640
|
+
and r3, r3, #7
|
641
|
+
add r0, r0, r3 @ states += offset & 7
|
642
|
+
ldrb r1, [r0]
|
643
|
+
eor r1, r1, r2
|
644
|
+
strb r1, [r0]
|
645
|
+
bx lr
|
646
|
+
|
647
|
+
|
648
|
+
@----------------------------------------------------------------------------
|
649
|
+
@
|
650
|
+
@ void KeccakP1600times2_AddBytes( void *states, unsigned int instanceIndex, const unsigned char *data,
|
651
|
+
@ unsigned int offset, unsigned int length )
|
652
|
+
@
|
653
|
+
.align 8
|
654
|
+
.global KeccakP1600times2_AddBytes
|
655
|
+
.type KeccakP1600times2_AddBytes, %function;
|
656
|
+
KeccakP1600times2_AddBytes:
|
657
|
+
add r0, r0, r1, LSL #3 @ states += 8 * instanceIndex
|
658
|
+
ldr r1, [sp, #0*4] @ r1 = length
|
659
|
+
cmp r1, #0
|
660
|
+
beq KeccakP1600times2_AddBytes_Exit
|
661
|
+
push { r4- r7 }
|
662
|
+
lsr r4, r3, #3 @ states += (offset & ~7) * 2
|
663
|
+
add r0, r0, r4, LSL #4
|
664
|
+
ands r3, r3, #7 @ .if (offset & 7) != 0
|
665
|
+
beq KeccakP1600times2_AddBytes_CheckLanes
|
666
|
+
add r0, r0, r3 @ states += offset & 7
|
667
|
+
rsb r3, r3, #8 @ lenInLane = 8 - (offset & 7)
|
668
|
+
KeccakP1600times2_AddBytes_LoopBytesFirst:
|
669
|
+
ldrb r4, [r0]
|
670
|
+
ldrb r5, [r2], #1
|
671
|
+
eor r4, r4, r5
|
672
|
+
subs r1, r1, #1
|
673
|
+
strb r4, [r0], #1
|
674
|
+
beq KeccakP1600times2_AddBytes_Done
|
675
|
+
subs r3, r3, #1
|
676
|
+
bne KeccakP1600times2_AddBytes_LoopBytesFirst
|
677
|
+
add r0, r0, #8 @ states += 8 (next lane of current state part)
|
678
|
+
KeccakP1600times2_AddBytes_CheckLanes:
|
679
|
+
lsrs r3, r1, #3
|
680
|
+
beq KeccakP1600times2_AddBytes_CheckBytesLast
|
681
|
+
KeccakP1600times2_AddBytes_LoopLanes:
|
682
|
+
ldr r4, [r0]
|
683
|
+
ldr r5, [r0, #4]
|
684
|
+
ldr r6, [r2], #4
|
685
|
+
ldr r7, [r2], #4
|
686
|
+
eor r4, r4, r6
|
687
|
+
eor r5, r5, r7
|
688
|
+
subs r3, r3, #1
|
689
|
+
str r4, [r0], #4
|
690
|
+
str r5, [r0], #12 @ states += 8 (next lane of current state part)
|
691
|
+
bne KeccakP1600times2_AddBytes_LoopLanes
|
692
|
+
KeccakP1600times2_AddBytes_CheckBytesLast:
|
693
|
+
ands r1, r1, #7
|
694
|
+
beq KeccakP1600times2_AddBytes_Done
|
695
|
+
KeccakP1600times2_AddBytes_LoopBytesLast:
|
696
|
+
ldrb r4, [r0]
|
697
|
+
ldrb r5, [r2], #1
|
698
|
+
eor r4, r4, r5
|
699
|
+
subs r1, r1, #1
|
700
|
+
strb r4, [r0], #1
|
701
|
+
bne KeccakP1600times2_AddBytes_LoopBytesLast
|
702
|
+
KeccakP1600times2_AddBytes_Done:
|
703
|
+
pop { r4- r7 }
|
704
|
+
KeccakP1600times2_AddBytes_Exit:
|
705
|
+
bx lr
|
706
|
+
|
707
|
+
|
708
|
+
@----------------------------------------------------------------------------
|
709
|
+
@
|
710
|
+
@ void KeccakP1600times2_AddLanesAll( void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset )
|
711
|
+
@
|
712
|
+
.global KeccakP1600times2_AddLanesAll
|
713
|
+
.type KeccakP1600times2_AddLanesAll, %function;
|
714
|
+
.align 8
|
715
|
+
KeccakP1600times2_AddLanesAll:
|
716
|
+
cmp r2, #0
|
717
|
+
beq KeccakP1600times2_AddLanesAll_Exit
|
718
|
+
add r3, r1, r3, LSL #3 @ r3: data + 8 * laneOffset
|
719
|
+
push {r4 - r7}
|
720
|
+
KeccakP1600times2_AddLanesAll_Loop:
|
721
|
+
ldr r4, [r1], #4 @ index 0
|
722
|
+
ldr r5, [r1], #4
|
723
|
+
ldrd r6, r7, [r0]
|
724
|
+
eor r6, r6, r4
|
725
|
+
eor r7, r7, r5
|
726
|
+
strd r6, r7, [r0], #8
|
727
|
+
ldr r4, [r3], #4 @ index 1
|
728
|
+
ldr r5, [r3], #4
|
729
|
+
ldrd r6, r7, [r0]
|
730
|
+
eor r6, r6, r4
|
731
|
+
eor r7, r7, r5
|
732
|
+
strd r6, r7, [r0], #8
|
733
|
+
subs r2, r2, #1
|
734
|
+
bne KeccakP1600times2_AddLanesAll_Loop
|
735
|
+
pop {r4 - r7}
|
736
|
+
KeccakP1600times2_AddLanesAll_Exit:
|
737
|
+
bx lr
|
738
|
+
|
739
|
+
|
740
|
+
@----------------------------------------------------------------------------
|
741
|
+
@
|
742
|
+
@ void KeccakP1600times2_OverwriteBytes( void *states, unsigned int instanceIndex, const unsigned char *data,
|
743
|
+
@ unsigned int offset, unsigned int length )
|
744
|
+
@
|
745
|
+
.align 8
|
746
|
+
.global KeccakP1600times2_OverwriteBytes
|
747
|
+
.type KeccakP1600times2_OverwriteBytes, %function;
|
748
|
+
KeccakP1600times2_OverwriteBytes:
|
749
|
+
add r0, r0, r1, LSL #3 @ states += 8 * instanceIndex
|
750
|
+
ldr r1, [sp, #0*4] @ r1 = length
|
751
|
+
cmp r1, #0
|
752
|
+
beq KeccakP1600times2_OverwriteBytes_Exit
|
753
|
+
push { r4-r5 }
|
754
|
+
lsr r4, r3, #3 @ states += (offset & ~7) * 2
|
755
|
+
add r0, r0, r4, LSL #4
|
756
|
+
ands r3, r3, #7 @ .if (offset & 7) != 0
|
757
|
+
beq KeccakP1600times2_OverwriteBytes_CheckLanes
|
758
|
+
add r0, r0, r3 @ states += offset & 7
|
759
|
+
rsb r3, r3, #8 @ lenInLane = 8 - (offset & 7)
|
760
|
+
KeccakP1600times2_OverwriteBytes_LoopBytesFirst:
|
761
|
+
ldrb r4, [r2], #1
|
762
|
+
strb r4, [r0], #1
|
763
|
+
subs r1, r1, #1
|
764
|
+
beq KeccakP1600times2_OverwriteBytes_Done
|
765
|
+
subs r3, r3, #1
|
766
|
+
bne KeccakP1600times2_OverwriteBytes_LoopBytesFirst
|
767
|
+
add r0, r0, #8 @ states += 8 (next lane of current state part)
|
768
|
+
KeccakP1600times2_OverwriteBytes_CheckLanes:
|
769
|
+
lsrs r3, r1, #3
|
770
|
+
beq KeccakP1600times2_OverwriteBytes_CheckBytesLast
|
771
|
+
KeccakP1600times2_OverwriteBytes_LoopLanes:
|
772
|
+
ldr r4, [r2], #4
|
773
|
+
ldr r5, [r2], #4
|
774
|
+
str r4, [r0], #4
|
775
|
+
str r5, [r0], #12 @ states += 8 (next lane of current state part)
|
776
|
+
subs r3, r3, #1
|
777
|
+
bne KeccakP1600times2_OverwriteBytes_LoopLanes
|
778
|
+
KeccakP1600times2_OverwriteBytes_CheckBytesLast:
|
779
|
+
ands r1, r1, #7
|
780
|
+
beq KeccakP1600times2_OverwriteBytes_Done
|
781
|
+
KeccakP1600times2_OverwriteBytes_LoopBytesLast:
|
782
|
+
ldrb r4, [r2], #1
|
783
|
+
subs r1, r1, #1
|
784
|
+
strb r4, [r0], #1
|
785
|
+
bne KeccakP1600times2_OverwriteBytes_LoopBytesLast
|
786
|
+
KeccakP1600times2_OverwriteBytes_Done:
|
787
|
+
pop { r4- r5 }
|
788
|
+
KeccakP1600times2_OverwriteBytes_Exit:
|
789
|
+
bx lr
|
790
|
+
|
791
|
+
|
792
|
+
@----------------------------------------------------------------------------
|
793
|
+
@
|
794
|
+
@ KeccakP1600times2_OverwriteLanesAll( void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset )
|
795
|
+
@
|
796
|
+
.align 8
|
797
|
+
.global KeccakP1600times2_OverwriteLanesAll
|
798
|
+
.type KeccakP1600times2_OverwriteLanesAll, %function;
|
799
|
+
KeccakP1600times2_OverwriteLanesAll:
|
800
|
+
cmp r2, #0
|
801
|
+
beq KeccakP1600times2_OverwriteLanesAll_Exit
|
802
|
+
lsls r12, r1, #32-3
|
803
|
+
bne KeccakP1600times2_OverwriteLanesAll_Unaligned
|
804
|
+
add r3, r1, r3, LSL #3 @ r3(pointer instance 1): data + 8 * laneOffset
|
805
|
+
lsrs r2, r2, #1
|
806
|
+
bcc KeccakP1600times2_OverwriteLanesAll_LoopAligned
|
807
|
+
vldm r1!, { d0 }
|
808
|
+
vldm r3!, { d1 }
|
809
|
+
vstm r0!, { d0 - d1 }
|
810
|
+
beq KeccakP1600times2_OverwriteLanesAll_Exit
|
811
|
+
KeccakP1600times2_OverwriteLanesAll_LoopAligned:
|
812
|
+
vldm r1!, { d0 }
|
813
|
+
vldm r1!, { d2 }
|
814
|
+
vldm r3!, { d1 }
|
815
|
+
vldm r3!, { d3 }
|
816
|
+
subs r2, r2, #1
|
817
|
+
vstm r0!, { d0 - d3 }
|
818
|
+
bne KeccakP1600times2_OverwriteLanesAll_LoopAligned
|
819
|
+
bx lr
|
820
|
+
KeccakP1600times2_OverwriteLanesAll_Unaligned:
|
821
|
+
add r3, r1, r3, LSL #3 @ r3(pointer instance 1): data + 8 * laneOffset
|
822
|
+
push { r4, r5 }
|
823
|
+
KeccakP1600times2_OverwriteLanesAll_LoopUnaligned:
|
824
|
+
ldr r4, [r1], #4
|
825
|
+
ldr r5, [r1], #4
|
826
|
+
strd r4, r5, [r0], #8
|
827
|
+
ldr r4, [r3], #4
|
828
|
+
ldr r5, [r3], #4
|
829
|
+
subs r2, r2, #1
|
830
|
+
strd r4, r5, [r0], #8
|
831
|
+
bne KeccakP1600times2_OverwriteLanesAll_LoopUnaligned
|
832
|
+
pop { r4, r5 }
|
833
|
+
KeccakP1600times2_OverwriteLanesAll_Exit:
|
834
|
+
bx lr
|
835
|
+
|
836
|
+
|
837
|
+
@----------------------------------------------------------------------------
|
838
|
+
@
|
839
|
+
@ void KeccakP1600times2_OverwriteWithZeroes( void *states, unsigned int instanceIndex, unsigned int byteCount )
|
840
|
+
@
|
841
|
+
.align 8
|
842
|
+
.global KeccakP1600times2_OverwriteWithZeroes
|
843
|
+
.type KeccakP1600times2_OverwriteWithZeroes, %function;
|
844
|
+
KeccakP1600times2_OverwriteWithZeroes:
|
845
|
+
add r0, r0, r1, LSL #3 @ states += 8 * instanceIndex
|
846
|
+
lsrs r1, r2, #3 @ r1: laneCount
|
847
|
+
beq KeccakP1600times2_OverwriteWithZeroes_Bytes
|
848
|
+
vmov.i64 d0, #0
|
849
|
+
KeccakP1600times2_OverwriteWithZeroes_LoopLanes:
|
850
|
+
subs r1, r1, #1
|
851
|
+
vstm r0!, { d0 }
|
852
|
+
add r0, r0, #8
|
853
|
+
bne KeccakP1600times2_OverwriteWithZeroes_LoopLanes
|
854
|
+
KeccakP1600times2_OverwriteWithZeroes_Bytes:
|
855
|
+
ands r2, r2, #7 @ r2: byteCount remaining
|
856
|
+
beq KeccakP1600times2_OverwriteWithZeroes_Exit
|
857
|
+
movs r3, #0
|
858
|
+
KeccakP1600times2_OverwriteWithZeroes_LoopBytes:
|
859
|
+
subs r2, r2, #1
|
860
|
+
strb r3, [r0], #1
|
861
|
+
bne KeccakP1600times2_OverwriteWithZeroes_LoopBytes
|
862
|
+
KeccakP1600times2_OverwriteWithZeroes_Exit:
|
863
|
+
bx lr
|
864
|
+
|
865
|
+
|
866
|
+
@----------------------------------------------------------------------------
|
867
|
+
@
|
868
|
+
@ void KeccakP1600times2_ExtractBytes( void *states, unsigned int instanceIndex, const unsigned char *data,
|
869
|
+
@ unsigned int offset, unsigned int length )
|
870
|
+
@
|
871
|
+
.align 8
|
872
|
+
.global KeccakP1600times2_ExtractBytes
|
873
|
+
.type KeccakP1600times2_ExtractBytes, %function;
|
874
|
+
KeccakP1600times2_ExtractBytes:
|
875
|
+
add r0, r0, r1, LSL #3 @ states += 8 * instanceIndex
|
876
|
+
ldr r1, [sp, #0*4] @ r1 = length
|
877
|
+
cmp r1, #0
|
878
|
+
beq KeccakP1600times2_ExtractBytes_Exit
|
879
|
+
push { r4-r5 }
|
880
|
+
lsr r4, r3, #3 @ states += (offset & ~7) * 2
|
881
|
+
add r0, r0, r4, LSL #4
|
882
|
+
ands r3, r3, #7 @ .if (offset & 7) != 0
|
883
|
+
beq KeccakP1600times2_ExtractBytes_CheckLanes
|
884
|
+
add r0, r0, r3 @ states += offset & 7
|
885
|
+
rsb r3, r3, #8 @ lenInLane = 8 - (offset & 7)
|
886
|
+
KeccakP1600times2_ExtractBytes_LoopBytesFirst:
|
887
|
+
ldrb r4, [r0], #1
|
888
|
+
strb r4, [r2], #1
|
889
|
+
subs r1, r1, #1
|
890
|
+
beq KeccakP1600times2_ExtractBytes_Done
|
891
|
+
subs r3, r3, #1
|
892
|
+
bne KeccakP1600times2_ExtractBytes_LoopBytesFirst
|
893
|
+
add r0, r0, #8 @ states += 8 (next lane of current state part)
|
894
|
+
KeccakP1600times2_ExtractBytes_CheckLanes:
|
895
|
+
lsrs r3, r1, #3
|
896
|
+
beq KeccakP1600times2_ExtractBytes_CheckBytesLast
|
897
|
+
KeccakP1600times2_ExtractBytes_LoopLanes:
|
898
|
+
ldr r4, [r0], #4
|
899
|
+
ldr r5, [r0], #12 @ states += 8 (next lane of current state part)
|
900
|
+
str r4, [r2], #4
|
901
|
+
str r5, [r2], #4
|
902
|
+
subs r3, r3, #1
|
903
|
+
bne KeccakP1600times2_ExtractBytes_LoopLanes
|
904
|
+
KeccakP1600times2_ExtractBytes_CheckBytesLast:
|
905
|
+
ands r1, r1, #7
|
906
|
+
beq KeccakP1600times2_ExtractBytes_Done
|
907
|
+
KeccakP1600times2_ExtractBytes_LoopBytesLast:
|
908
|
+
ldrb r4, [r0], #1
|
909
|
+
subs r1, r1, #1
|
910
|
+
strb r4, [r2], #1
|
911
|
+
bne KeccakP1600times2_ExtractBytes_LoopBytesLast
|
912
|
+
KeccakP1600times2_ExtractBytes_Done:
|
913
|
+
pop { r4-r5 }
|
914
|
+
KeccakP1600times2_ExtractBytes_Exit:
|
915
|
+
bx lr
|
916
|
+
|
917
|
+
|
918
|
+
@----------------------------------------------------------------------------
|
919
|
+
@
|
920
|
+
@ void KeccakP1600times2_ExtractLanesAll( const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset )
|
921
|
+
@
|
922
|
+
.align 8
|
923
|
+
.global KeccakP1600times2_ExtractLanesAll
|
924
|
+
.type KeccakP1600times2_ExtractLanesAll, %function;
|
925
|
+
KeccakP1600times2_ExtractLanesAll:
|
926
|
+
cmp r2, #0
|
927
|
+
beq KeccakP1600times2_ExtractLanesAll_Exit
|
928
|
+
lsls r12, r1, #32-3
|
929
|
+
bne KeccakP1600times2_ExtractLanesAll_Unaligned
|
930
|
+
add r3, r1, r3, LSL #3 @ r3(pointer instance 1): data + 8 * laneOffset
|
931
|
+
lsrs r2, r2, #1
|
932
|
+
bcc KeccakP1600times2_ExtractLanesAll_LoopAligned
|
933
|
+
vldm r0!, { d0 - d1 }
|
934
|
+
vstm r1!, { d0 }
|
935
|
+
vstm r3!, { d1 }
|
936
|
+
beq KeccakP1600times2_ExtractLanesAll_Exit
|
937
|
+
KeccakP1600times2_ExtractLanesAll_LoopAligned:
|
938
|
+
vldm r0!, { d0 - d3 }
|
939
|
+
subs r2, r2, #1
|
940
|
+
vstm r1!, { d0 }
|
941
|
+
vstm r1!, { d2 }
|
942
|
+
vstm r3!, { d1 }
|
943
|
+
vstm r3!, { d3 }
|
944
|
+
bne KeccakP1600times2_ExtractLanesAll_LoopAligned
|
945
|
+
bx lr
|
946
|
+
KeccakP1600times2_ExtractLanesAll_Unaligned:
|
947
|
+
add r3, r1, r3, LSL #3 @ r3(pointer instance 1): data + 8 * laneOffset
|
948
|
+
push { r4, r5 }
|
949
|
+
KeccakP1600times2_ExtractLanesAll_LoopUnaligned:
|
950
|
+
ldrd r4, r5, [r0], #8
|
951
|
+
str r4, [r1], #4
|
952
|
+
str r5, [r1], #4
|
953
|
+
ldrd r4, r5, [r0], #8
|
954
|
+
subs r2, r2, #1
|
955
|
+
str r4, [r3], #4
|
956
|
+
str r5, [r3], #4
|
957
|
+
bne KeccakP1600times2_ExtractLanesAll_LoopUnaligned
|
958
|
+
pop { r4, r5 }
|
959
|
+
KeccakP1600times2_ExtractLanesAll_Exit:
|
960
|
+
bx lr
|
961
|
+
|
962
|
+
|
963
|
+
@----------------------------------------------------------------------------
|
964
|
+
@
|
965
|
+
@ void KeccakP1600times2_ExtractAndAddBytes( void *states, unsigned int instanceIndex,
|
966
|
+
@ const unsigned char *input, unsigned char *output,
|
967
|
+
@ unsigned int offset, unsigned int length )
|
968
|
+
@
|
969
|
+
.align 8
|
970
|
+
.global KeccakP1600times2_ExtractAndAddBytes
|
971
|
+
.type KeccakP1600times2_ExtractAndAddBytes, %function;
|
972
|
+
KeccakP1600times2_ExtractAndAddBytes:
|
973
|
+
add r0, r0, r1, LSL #3 @ states += 8 * instanceIndex
|
974
|
+
ldr r1, [sp, #1*4] @ r1 = length
|
975
|
+
cmp r1, #0
|
976
|
+
beq KeccakP1600times2_ExtractAndAddBytes_Exit
|
977
|
+
push { r4 - r9 }
|
978
|
+
ldr r8, [sp, #6*4] @ r8 = offset
|
979
|
+
lsr r4, r8, #3 @ states += (offset & ~7) * 2
|
980
|
+
add r0, r0, r4, LSL #4
|
981
|
+
ands r8, r8, #7 @ .if (offset & 7) != 0
|
982
|
+
beq KeccakP1600times2_ExtractAndAddBytes_CheckLanes
|
983
|
+
add r0, r0, r8 @ states += offset & 7
|
984
|
+
rsb r8, r8, #8 @ lenInLane = 8 - (offset & 7)
|
985
|
+
KeccakP1600times2_ExtractAndAddBytes_LoopBytesFirst:
|
986
|
+
ldrb r4, [r0], #1
|
987
|
+
ldrb r5, [r2], #1
|
988
|
+
eor r4, r4, r5
|
989
|
+
strb r4, [r3], #1
|
990
|
+
subs r1, r1, #1
|
991
|
+
beq KeccakP1600times2_ExtractAndAddBytes_Done
|
992
|
+
subs r8, r8, #1
|
993
|
+
bne KeccakP1600times2_ExtractAndAddBytes_LoopBytesFirst
|
994
|
+
add r0, r0, #8 @ states += 8 (next lane of current state part)
|
995
|
+
KeccakP1600times2_ExtractAndAddBytes_CheckLanes:
|
996
|
+
lsrs r8, r1, #3
|
997
|
+
beq KeccakP1600times2_ExtractAndAddBytes_CheckBytesLast
|
998
|
+
KeccakP1600times2_ExtractAndAddBytes_LoopLanes:
|
999
|
+
ldr r4, [r0], #4
|
1000
|
+
ldr r5, [r0], #12
|
1001
|
+
ldr r6, [r2], #4
|
1002
|
+
ldr r7, [r2], #4
|
1003
|
+
eor r4, r4, r6
|
1004
|
+
eor r5, r5, r7
|
1005
|
+
str r4, [r3], #4
|
1006
|
+
str r5, [r3], #4 @ states += 8 (next lane of current state part)
|
1007
|
+
subs r8, r8, #1
|
1008
|
+
bne KeccakP1600times2_ExtractAndAddBytes_LoopLanes
|
1009
|
+
KeccakP1600times2_ExtractAndAddBytes_CheckBytesLast:
|
1010
|
+
ands r1, r1, #7
|
1011
|
+
beq KeccakP1600times2_ExtractAndAddBytes_Done
|
1012
|
+
KeccakP1600times2_ExtractAndAddBytes_LoopBytesLast:
|
1013
|
+
ldrb r4, [r0], #1
|
1014
|
+
ldrb r5, [r2], #1
|
1015
|
+
eor r4, r4, r5
|
1016
|
+
strb r4, [r3], #1
|
1017
|
+
subs r1, r1, #1
|
1018
|
+
bne KeccakP1600times2_ExtractAndAddBytes_LoopBytesLast
|
1019
|
+
KeccakP1600times2_ExtractAndAddBytes_Done:
|
1020
|
+
pop { r4 - r9 }
|
1021
|
+
KeccakP1600times2_ExtractAndAddBytes_Exit:
|
1022
|
+
bx lr
|
1023
|
+
|
1024
|
+
|
1025
|
+
@----------------------------------------------------------------------------
|
1026
|
+
@
|
1027
|
+
@ void KeccakP1600times2_ExtractAndAddLanesAll( const void *states,
|
1028
|
+
@ const unsigned char *input, unsigned char *output,
|
1029
|
+
@ unsigned int laneCount, unsigned int laneOffset )
|
1030
|
+
@
|
1031
|
+
.align 8
|
1032
|
+
.global KeccakP1600times2_ExtractAndAddLanesAll
|
1033
|
+
.type KeccakP1600times2_ExtractAndAddLanesAll, %function;
|
1034
|
+
KeccakP1600times2_ExtractAndAddLanesAll:
|
1035
|
+
cmp r3, #0
|
1036
|
+
beq KeccakP1600times2_ExtractAndAddLanesAll_Exit
|
1037
|
+
orr r12, r1, r2
|
1038
|
+
lsls r12, r12, #32-3 @ unaligned access .if input or output unaligned
|
1039
|
+
bne KeccakP1600times2_ExtractAndAddLanesAll_Unaligned
|
1040
|
+
push {r4,r5}
|
1041
|
+
ldr r12, [sp, #2*4] @ r12 = laneOffset
|
1042
|
+
lsrs r3, r3, #1
|
1043
|
+
add r4, r1, r12, LSL #3 @ r4(input instance 1): input + 8 * laneOffset
|
1044
|
+
add r5, r2, r12, LSL #3 @ r5(output instance 1): output + 8 * laneOffset
|
1045
|
+
bcc KeccakP1600times2_ExtractAndAddLanesAll_LoopAligned
|
1046
|
+
vldm r0!, { d0 - d1 }
|
1047
|
+
vldm r1!, { d2 }
|
1048
|
+
vldm r4!, { d3 }
|
1049
|
+
veor q0, q0, q1
|
1050
|
+
vstm r2!, { d0 }
|
1051
|
+
vstm r5!, { d1 }
|
1052
|
+
beq KeccakP1600times2_ExtractAndAddLanesAll_AlignedDone
|
1053
|
+
KeccakP1600times2_ExtractAndAddLanesAll_LoopAligned:
|
1054
|
+
vldm r0!, { d0 - d3 }
|
1055
|
+
vldm r1!, { d4 }
|
1056
|
+
vldm r1!, { d6 }
|
1057
|
+
vldm r4!, { d5 }
|
1058
|
+
vldm r4!, { d7 }
|
1059
|
+
subs r3, r3, #1
|
1060
|
+
veor q0, q0, q2
|
1061
|
+
veor q1, q1, q3
|
1062
|
+
vstm r2!, { d0 }
|
1063
|
+
vstm r2!, { d2 }
|
1064
|
+
vstm r5!, { d1 }
|
1065
|
+
vstm r5!, { d3 }
|
1066
|
+
bne KeccakP1600times2_ExtractAndAddLanesAll_LoopAligned
|
1067
|
+
KeccakP1600times2_ExtractAndAddLanesAll_AlignedDone:
|
1068
|
+
pop {r4,r5}
|
1069
|
+
bx lr
|
1070
|
+
KeccakP1600times2_ExtractAndAddLanesAll_Unaligned:
|
1071
|
+
push {r4-r9}
|
1072
|
+
ldr r12, [sp, #6*4] @ r12 = laneOffset
|
1073
|
+
add r4, r1, r12, LSL #3 @ r4(input instance 1): input + 8 * laneOffset
|
1074
|
+
add r5, r2, r12, LSL #3 @ r5(output instance 1): output + 8 * laneOffset
|
1075
|
+
KeccakP1600times2_ExtractAndAddLanesAll_LoopUnaligned:
|
1076
|
+
ldrd r8, r9, [r0], #8
|
1077
|
+
ldr r6, [r1], #4
|
1078
|
+
ldr r7, [r1], #4
|
1079
|
+
eor r8, r8, r6
|
1080
|
+
eor r9, r9, r7
|
1081
|
+
str r8, [r2], #4
|
1082
|
+
str r9, [r2], #4
|
1083
|
+
ldrd r8, r9, [r0], #8
|
1084
|
+
ldr r6, [r4], #4
|
1085
|
+
ldr r7, [r4], #4
|
1086
|
+
eor r8, r8, r6
|
1087
|
+
eor r9, r9, r7
|
1088
|
+
str r8, [r5], #4
|
1089
|
+
subs r3, r3, #1
|
1090
|
+
str r9, [r5], #4
|
1091
|
+
bne KeccakP1600times2_ExtractAndAddLanesAll_LoopUnaligned
|
1092
|
+
pop { r4 - r9 }
|
1093
|
+
KeccakP1600times2_ExtractAndAddLanesAll_Exit:
|
1094
|
+
bx lr
|
1095
|
+
|
1096
|
+
|
1097
|
+
@----------------------------------------------------------------------------
|
1098
|
+
@
|
1099
|
+
@ void KeccakP1600times2_PermuteAll_24rounds( void *states )
|
1100
|
+
@
|
1101
|
+
.align 8
|
1102
|
+
.global KeccakP1600times2_PermuteAll_24rounds
|
1103
|
+
.type KeccakP1600times2_PermuteAll_24rounds, %function;
|
1104
|
+
KeccakP1600times2_PermuteAll_24rounds:
|
1105
|
+
adr r1, KeccakP1600times2_Permute_RoundConstants24
|
1106
|
+
movs r2, #24
|
1107
|
+
b KeccakP1600times2_PermuteAll
|
1108
|
+
|
1109
|
+
|
1110
|
+
@----------------------------------------------------------------------------
|
1111
|
+
@
|
1112
|
+
@ void KeccakP1600times2_PermuteAll_12rounds( void *states )
|
1113
|
+
@
|
1114
|
+
.align 8
|
1115
|
+
.global KeccakP1600times2_PermuteAll_12rounds
|
1116
|
+
.type KeccakP1600times2_PermuteAll_12rounds, %function;
|
1117
|
+
KeccakP1600times2_PermuteAll_12rounds:
|
1118
|
+
adr r1, KeccakP1600times2_Permute_RoundConstants12
|
1119
|
+
movs r2, #12
|
1120
|
+
b KeccakP1600times2_PermuteAll
|
1121
|
+
|
1122
|
+
|
1123
|
+
.align 8
|
1124
|
+
KeccakP1600times2_Permute_RoundConstants24:
|
1125
|
+
.quad 0x0000000000000001
|
1126
|
+
.quad 0x0000000000008082
|
1127
|
+
.quad 0x800000000000808a
|
1128
|
+
.quad 0x8000000080008000
|
1129
|
+
.quad 0x000000000000808b
|
1130
|
+
.quad 0x0000000080000001
|
1131
|
+
.quad 0x8000000080008081
|
1132
|
+
.quad 0x8000000000008009
|
1133
|
+
.quad 0x000000000000008a
|
1134
|
+
.quad 0x0000000000000088
|
1135
|
+
.quad 0x0000000080008009
|
1136
|
+
.quad 0x000000008000000a
|
1137
|
+
KeccakP1600times2_Permute_RoundConstants12:
|
1138
|
+
.quad 0x000000008000808b
|
1139
|
+
.quad 0x800000000000008b
|
1140
|
+
.quad 0x8000000000008089
|
1141
|
+
.quad 0x8000000000008003
|
1142
|
+
.quad 0x8000000000008002
|
1143
|
+
.quad 0x8000000000000080
|
1144
|
+
.quad 0x000000000000800a
|
1145
|
+
.quad 0x800000008000000a
|
1146
|
+
.quad 0x8000000080008081
|
1147
|
+
.quad 0x8000000000008080
|
1148
|
+
.quad 0x0000000080000001
|
1149
|
+
.quad 0x8000000080008008
|
1150
|
+
|
1151
|
+
@----------------------------------------------------------------------------
|
1152
|
+
@
|
1153
|
+
@ void KeccakP1600times2_PermuteAll( void *states, void *rc, unsigned int nr )
|
1154
|
+
@
|
1155
|
+
.align 8
|
1156
|
+
KeccakP1600times2_PermuteAll:
|
1157
|
+
vpush {q4-q7}
|
1158
|
+
push {r4-r7}
|
1159
|
+
sub sp, #4*2*8+8 @allocate 4 D double lanes (plus 8bytes to allow alignment on 16 bytes)
|
1160
|
+
mov r3, r0
|
1161
|
+
add r5, sp, #8
|
1162
|
+
|
1163
|
+
@PrepareTheta
|
1164
|
+
@ Ca = ba ^ ga ^ ka ^ ma ^ sa
|
1165
|
+
@ Ce = be ^ ge ^ ke ^ me ^ se
|
1166
|
+
@ Ci = bi ^ gi ^ ki ^ mi ^ si
|
1167
|
+
@ Co = bo ^ go ^ ko ^ mo ^ so
|
1168
|
+
@ Cu = bu ^ gu ^ ku ^ mu ^ su
|
1169
|
+
vld1.64 { d0, d1, d2, d3 }, [r3:256]! @ _ba _be
|
1170
|
+
bic r5, #15
|
1171
|
+
vld1.64 { d4, d5, d6, d7 }, [r3:256]! @ _bi _bo
|
1172
|
+
vld1.64 { d8, d9, d10, d11 }, [r3:256]! @ _bu _ga
|
1173
|
+
vld1.64 { d12, d13 }, [r3:128]! @ _ge
|
1174
|
+
veor.64 q0, q0, q5
|
1175
|
+
vld1.64 { d14, d15 }, [r3:128]! @ _gi
|
1176
|
+
veor.64 q1, q1, q6
|
1177
|
+
vld1.64 { d16, d17 }, [r3:128]! @ _go
|
1178
|
+
veor.64 q2, q2, q7
|
1179
|
+
vld1.64 { d18, d19 }, [r3:128]! @ _gu
|
1180
|
+
veor.64 q3, q3, q8
|
1181
|
+
vld1.64 { d10, d11 }, [r3:128]! @ _ka
|
1182
|
+
veor.64 q4, q4, q9
|
1183
|
+
vld1.64 { d12, d13 }, [r3:128]! @ _ke
|
1184
|
+
veor.64 q0, q0, q5
|
1185
|
+
vld1.64 { d14, d15 }, [r3:128]! @ _ki
|
1186
|
+
veor.64 q1, q1, q6
|
1187
|
+
vld1.64 { d16, d17 }, [r3:128]! @ _ko
|
1188
|
+
veor.64 q2, q2, q7
|
1189
|
+
vld1.64 { d18, d19 }, [r3:128]! @ _ku
|
1190
|
+
veor.64 q3, q3, q8
|
1191
|
+
vld1.64 { d10, d11 }, [r3:128]! @ _ma
|
1192
|
+
veor.64 q4, q4, q9
|
1193
|
+
vld1.64 { d12, d13 }, [r3:128]! @ _me
|
1194
|
+
veor.64 q0, q0, q5
|
1195
|
+
vld1.64 { d14, d15 }, [r3:128]! @ _mi
|
1196
|
+
veor.64 q1, q1, q6
|
1197
|
+
vld1.64 { d16, d17 }, [r3:128]! @ _mo
|
1198
|
+
veor.64 q2, q2, q7
|
1199
|
+
vld1.64 { d18, d19 }, [r3:128]! @ _mu
|
1200
|
+
veor.64 q3, q3, q8
|
1201
|
+
vld1.64 { d10, d11 }, [r3:128]! @ _sa
|
1202
|
+
veor.64 q4, q4, q9
|
1203
|
+
vld1.64 { d12, d13 }, [r3:128]! @ _se
|
1204
|
+
veor.64 q0, q0, q5
|
1205
|
+
vld1.64 { d14, d15 }, [r3:128]! @ _si
|
1206
|
+
veor.64 q1, q1, q6
|
1207
|
+
vld1.64 { d16, d17 }, [r3:128]! @ _so
|
1208
|
+
veor.64 q2, q2, q7
|
1209
|
+
vld1.64 { d18, d19 }, [r3:128]! @ _su
|
1210
|
+
mov r3, r0
|
1211
|
+
veor.64 q3, q3, q8
|
1212
|
+
veor.64 q4, q4, q9
|
1213
|
+
|
1214
|
+
KeccakP1600times2_PermuteAll_RoundLoop:
|
1215
|
+
KeccakP_ThetaRhoPiChiIota _ba, -1, -1, -1, -1, _ge-_ba, _ka @ _ba, _ge, _ki, _mo, _su
|
1216
|
+
KeccakP_ThetaRhoPiChi1 _ka, -1, -1, _bo, -1, _me-_ka, _sa @ _ka, _me, _si, _bo, _gu
|
1217
|
+
KeccakP_ThetaRhoPiChi2 _sa, _be, -1, -1, -1, _gi-_be, _ga @ _sa, _be, _gi, _ko, _mu
|
1218
|
+
KeccakP_ThetaRhoPiChi3 _ga, -1, -1, -1, _bu, _ke-_ga, _ma @ _ga, _ke, _mi, _so, _bu
|
1219
|
+
KeccakP_ThetaRhoPiChi4 _ma, -1, _bi, -1, -1, _se-_ma, _ba @ _ma, _se, _bi, _go, _ku
|
1220
|
+
|
1221
|
+
KeccakP_ThetaRhoPiChiIota _ba, -1, _gi, -1, _ku, _me-_ba, _sa @ _ba, _me, _gi, _so, _ku
|
1222
|
+
KeccakP_ThetaRhoPiChi1 _sa, _ke, _bi, -1, _gu, _mo-_bi, _ma @ _sa, _ke, _bi, _mo, _gu
|
1223
|
+
KeccakP_ThetaRhoPiChi2 _ma, _ge, -1, _ko, _bu, _si-_ge, _ka @ _ma, _ge, _si, _ko, _bu
|
1224
|
+
KeccakP_ThetaRhoPiChi3 _ka, _be, -1, _go, -1, _mi-_be, _ga @ _ka, _be, _mi, _go, _su
|
1225
|
+
KeccakP_ThetaRhoPiChi4 _ga, -1, _ki, _bo, -1, _se-_ga, _ba @ _ga, _se, _ki, _bo, _mu
|
1226
|
+
|
1227
|
+
KeccakP_ThetaRhoPiChiIota _ba, -1, -1, _go, -1, _ke-_ba, _ma @ _ba, _ke, _si, _go, _mu
|
1228
|
+
KeccakP_ThetaRhoPiChi1 _ma, _be, -1, -1, _gu, _ki-_be, _ga @ _ma, _be, _ki, _so, _gu
|
1229
|
+
KeccakP_ThetaRhoPiChi2 _ga, -1, _bi, -1, -1, _me-_ga, _sa @ _ga, _me, _bi, _ko, _su
|
1230
|
+
KeccakP_ThetaRhoPiChi3 _sa, _ge, -1, _bo, -1, _mi-_ge, _ka @ _sa, _ge, _mi, _bo, _ku
|
1231
|
+
KeccakP_ThetaRhoPiChi4 _ka, -1, _gi, -1, _bu, _se-_ka, _ba @ _ka, _se, _gi, _mo, _bu
|
1232
|
+
|
1233
|
+
KeccakP_ThetaRhoPiChiIota _ba, -1, -1, -1, -1, _be-_ba, _ga @ _ba, _be, _bi, _bo, _bu
|
1234
|
+
KeccakP_ThetaRhoPiChi1 _ga, -1, -1, -1, -1, _ge-_ga, _ka @ _ga, _ge, _gi, _go, _gu
|
1235
|
+
KeccakP_ThetaRhoPiChi2 _ka, -1, -1, -1, -1, _ke-_ka, _ma @ _ka, _ke, _ki, _ko, _ku
|
1236
|
+
KeccakP_ThetaRhoPiChi3 _ma, -1, -1, -1, -1, _me-_ma, _sa @ _ma, _me, _mi, _mo, _mu
|
1237
|
+
subs r2, #4
|
1238
|
+
KeccakP_ThetaRhoPiChi4 _sa, -1, -1, -1, -1, _se-_sa, _ba @ _sa, _se, _si, _so, _su
|
1239
|
+
bne KeccakP1600times2_PermuteAll_RoundLoop
|
1240
|
+
add sp, #4*2*8+8 @ free 4.5 D lanes
|
1241
|
+
pop {r4-r7}
|
1242
|
+
vpop {q4-q7}
|
1243
|
+
bx lr
|
1244
|
+
|
1245
|
+
|