digest-kangarootwelve 0.2.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +51 -11
- data/Rakefile +2 -2
- data/digest-kangarootwelve.gemspec +322 -42
- data/ext/digest/kangarootwelve/ext.c +1 -1
- data/ext/digest/kangarootwelve/extconf.rb +13 -1
- data/ext/digest/kangarootwelve/keccak/armv6m/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv6m/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv6m/KeccakP-1600-SnP.h +36 -0
- data/ext/digest/kangarootwelve/{KeccakP-1600-times2-SnP.h → keccak/armv6m/KeccakP-1600-times2-SnP.h} +10 -10
- data/ext/digest/kangarootwelve/{KeccakP-1600-times2-on1.c → keccak/armv6m/KeccakP-1600-times2-on1.c} +13 -7
- data/ext/digest/kangarootwelve/{KeccakP-1600-times4-SnP.h → keccak/armv6m/KeccakP-1600-times4-SnP.h} +10 -10
- data/ext/digest/kangarootwelve/{KeccakP-1600-times4-on1.c → keccak/armv6m/KeccakP-1600-times4-on1.c} +13 -7
- data/ext/digest/kangarootwelve/{KeccakP-1600-times8-SnP.h → keccak/armv6m/KeccakP-1600-times8-SnP.h} +10 -10
- data/ext/digest/kangarootwelve/{KeccakP-1600-times8-on1.c → keccak/armv6m/KeccakP-1600-times8-on1.c} +13 -7
- data/ext/digest/kangarootwelve/keccak/armv6m/KeccakP-1600-u2-32bi-armv6m-le-gcc.s +1334 -0
- data/ext/digest/kangarootwelve/keccak/armv6m/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/{PlSnP-Fallback.inc → keccak/armv6m/PlSnP-Fallback.inc} +11 -7
- data/ext/digest/kangarootwelve/keccak/armv6m/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-SnP.h +37 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-armv7a-le-neon-gcc.s +826 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-inplace-pl2-armv7a-neon-le-gcc.s +1245 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times2-SnP.h +38 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times4-on2.c +38 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times8-on2.c +38 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-SnP.h +36 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-inplace-32bi-armv7m-le-gcc.s +1170 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-SnP.h +28 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-armv8a-neon.s +537 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-SnP.h +37 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-x86-64-gas.s +1190 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-SnP.h +37 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-x86-64-shld-gas.s +1190 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-SnP.h +37 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-avr8-fast.s +1116 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/avr8/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/avr8/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-SnP.h +39 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-XOP-config.h +6 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-XOP.c +473 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times2-SIMD128.c +954 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times2-SnP.h +47 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times4-on2.c +38 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times8-on2.c +38 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-unrolling.macros +302 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/SIMD128-config.h +9 -0
- data/ext/digest/kangarootwelve/{SnP-Relaned.h → keccak/bulldozer/SnP-Relaned.h} +13 -7
- data/ext/digest/kangarootwelve/keccak/bulldozer/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/{KangarooTwelve.c → keccak/common/KangarooTwelve.c} +6 -10
- data/ext/digest/kangarootwelve/{KangarooTwelve.h → keccak/common/KangarooTwelve.h} +3 -7
- data/ext/digest/kangarootwelve/keccak/common/KeccakDuplex-common.h +37 -0
- data/ext/digest/kangarootwelve/keccak/common/KeccakDuplex.inc +192 -0
- data/ext/digest/kangarootwelve/keccak/common/KeccakDuplexWidth1600.c +34 -0
- data/ext/digest/kangarootwelve/keccak/common/KeccakDuplexWidth1600.h +25 -0
- data/ext/digest/kangarootwelve/{KeccakSponge-common.h → keccak/common/KeccakSponge-common.h} +5 -7
- data/ext/digest/kangarootwelve/{KeccakSponge.inc → keccak/common/KeccakSponge.inc} +6 -8
- data/ext/digest/kangarootwelve/{KeccakSpongeWidth1600.c → keccak/common/KeccakSpongeWidth1600.c} +6 -8
- data/ext/digest/kangarootwelve/{KeccakSpongeWidth1600.h → keccak/common/KeccakSpongeWidth1600.h} +5 -7
- data/ext/digest/kangarootwelve/{Phases.h → keccak/common/Phases.h} +3 -7
- data/ext/digest/kangarootwelve/{align.h → keccak/common/align.h} +5 -7
- data/ext/digest/kangarootwelve/{brg_endian.h → keccak/common/brg_endian.h} +0 -0
- data/ext/digest/kangarootwelve/keccak/compact/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/compact/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/{KeccakP-1600-SnP.h → keccak/compact/KeccakP-1600-SnP.h} +7 -10
- data/ext/digest/kangarootwelve/{KeccakP-1600-compact64.c → keccak/compact/KeccakP-1600-compact64.c} +11 -7
- data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/compact/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/compact/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/compact/SnP-Relaned.h +140 -0
- data/ext/digest/kangarootwelve/keccak/compact/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-SnP.h +38 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-inplace32BI.c +1162 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic32/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/generic32/SnP-Relaned.h +140 -0
- data/ext/digest/kangarootwelve/keccak/generic32/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-SnP.h +38 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-inplace32BI.c +1162 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/SnP-Relaned.h +140 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-64.macros +2195 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-SnP.h +49 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-opt64-config.h +6 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-opt64.c +541 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-unrolling.macros +302 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic64/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/generic64/SnP-Relaned.h +140 -0
- data/ext/digest/kangarootwelve/keccak/generic64/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-64.macros +2195 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-SnP.h +49 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-opt64-config.h +7 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-opt64.c +541 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-unrolling.macros +302 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/SnP-Relaned.h +140 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-AVX2.s +993 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-SnP.h +41 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times2-SIMD128.c +954 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times2-SnP.h +47 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times4-SIMD256.c +1303 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times4-SnP.h +53 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times8-on4.c +38 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-unrolling.macros +302 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/haswell/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/haswell/SIMD128-config.h +8 -0
- data/ext/digest/kangarootwelve/keccak/haswell/SIMD256-config.h +7 -0
- data/ext/digest/kangarootwelve/keccak/haswell/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-64.macros +2195 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-SnP.h +49 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-opt64-config.h +7 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-opt64.c +541 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times2-SIMD128.c +954 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times2-SnP.h +47 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times4-on2.c +38 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times8-on2.c +38 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-unrolling.macros +302 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/SIMD128-config.h +8 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/SnP-Relaned.h +140 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/reference/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-SnP.h +41 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-reference.c +424 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-reference.h +20 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/reference/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/reference/displayIntermediateValues.c +176 -0
- data/ext/digest/kangarootwelve/keccak/reference/displayIntermediateValues.h +29 -0
- data/ext/digest/kangarootwelve/keccak/reference/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-SnP.h +41 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-reference.h +20 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-reference32BI.c +612 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/displayIntermediateValues.c +176 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/displayIntermediateValues.h +29 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-64.macros +2195 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-SnP.h +49 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-opt64-config.h +8 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-opt64.c +541 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times2-SIMD128.c +954 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times2-SnP.h +47 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times4-on2.c +38 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times8-on2.c +38 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-unrolling.macros +302 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/SIMD128-config.h +8 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/SnP-Relaned.h +140 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-AVX512-config.h +6 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-AVX512.c +621 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-SnP.h +42 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times2-SIMD512.c +852 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times2-SnP.h +49 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times4-SIMD512.c +883 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times4-SnP.h +49 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times8-SIMD512.c +1473 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times8-SnP.h +53 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/SIMD512-2-config.h +7 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/SIMD512-4-config.h +7 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/SIMD512-config.h +7 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/ext.link.c +1 -0
- data/lib/digest/kangarootwelve/version.rb +1 -1
- metadata +299 -21
@@ -0,0 +1 @@
|
|
1
|
+
#include "../common/KeccakSpongeWidth1600.c"
|
@@ -1,16 +1,20 @@
|
|
1
1
|
/*
|
2
|
-
Implementation by
|
3
|
-
Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
|
4
|
-
denoted as "the implementer".
|
2
|
+
Implementation by Gilles Van Assche, hereby denoted as "the implementer".
|
5
3
|
|
6
|
-
For more information, feedback or questions, please refer to our
|
7
|
-
|
8
|
-
http://keyak.noekeon.org/
|
9
|
-
http://ketje.noekeon.org/
|
4
|
+
For more information, feedback or questions, please refer to our website:
|
5
|
+
https://keccak.team/
|
10
6
|
|
11
7
|
To the extent possible under law, the implementer has waived all copyright
|
12
8
|
and related or neighboring rights to the source code in this file.
|
13
9
|
http://creativecommons.org/publicdomain/zero/1.0/
|
10
|
+
|
11
|
+
---
|
12
|
+
|
13
|
+
This file contains macros that help make a PlSnP-compatible implementation by
|
14
|
+
serially falling back on a SnP-compatible implementation or on a PlSnP-compatible
|
15
|
+
implementation of lower parallism degree.
|
16
|
+
|
17
|
+
Please refer to PlSnP-documentation.h for more details.
|
14
18
|
*/
|
15
19
|
|
16
20
|
/* expect PlSnP_baseParallelism, PlSnP_targetParallelism */
|
@@ -0,0 +1 @@
|
|
1
|
+
#include "../../ext.c"
|
@@ -0,0 +1 @@
|
|
1
|
+
#include "../common/KangarooTwelve.c"
|
@@ -0,0 +1 @@
|
|
1
|
+
#include "../common/KeccakDuplexWidth1600.c"
|
@@ -0,0 +1,37 @@
|
|
1
|
+
/*
|
2
|
+
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
|
3
|
+
|
4
|
+
For more information, feedback or questions, please refer to our website:
|
5
|
+
https://keccak.team/
|
6
|
+
|
7
|
+
To the extent possible under law, the implementer has waived all copyright
|
8
|
+
and related or neighboring rights to the source code in this file.
|
9
|
+
http://creativecommons.org/publicdomain/zero/1.0/
|
10
|
+
|
11
|
+
---
|
12
|
+
|
13
|
+
Please refer to SnP-documentation.h for more details.
|
14
|
+
*/
|
15
|
+
|
16
|
+
#ifndef _KeccakP_1600_SnP_h_
|
17
|
+
#define _KeccakP_1600_SnP_h_
|
18
|
+
|
19
|
+
#define KeccakP1600_implementation "64-bit optimized ARM assembler implementation"
|
20
|
+
#define KeccakP1600_stateSizeInBytes 200
|
21
|
+
#define KeccakP1600_stateAlignment 32
|
22
|
+
|
23
|
+
/* void KeccakP1600_StaticInitialize( void ); */
|
24
|
+
#define KeccakP1600_StaticInitialize()
|
25
|
+
void KeccakP1600_Initialize(void *state);
|
26
|
+
/* void KeccakP1600_AddByte(void *state, unsigned char data, unsigned int offset); */
|
27
|
+
#define KeccakP1600_AddByte(argS, argData, argOffset) ((unsigned char*)argS)[argOffset] ^= (argData)
|
28
|
+
void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
|
29
|
+
void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
|
30
|
+
void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount);
|
31
|
+
void KeccakP1600_Permute_Nrounds(void *state, unsigned int nrounds);
|
32
|
+
void KeccakP1600_Permute_12rounds(void *state);
|
33
|
+
void KeccakP1600_Permute_24rounds(void *state);
|
34
|
+
void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length);
|
35
|
+
void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length);
|
36
|
+
|
37
|
+
#endif
|
@@ -0,0 +1,826 @@
|
|
1
|
+
@
|
2
|
+
@ Implementation by Ronny Van Keer, hereby denoted as "the implementer".
|
3
|
+
@
|
4
|
+
@ For more information, feedback or questions, please refer to our website:
|
5
|
+
@ https://keccak.team/
|
6
|
+
@
|
7
|
+
@ To the extent possible under law, the implementer has waived all copyright
|
8
|
+
@ and related or neighboring rights to the source code in this file.
|
9
|
+
@ http://creativecommons.org/publicdomain/zero/1.0/
|
10
|
+
@
|
11
|
+
@ ---
|
12
|
+
@
|
13
|
+
@ This file implements Keccak-p[1600] in a SnP-compatible way.
|
14
|
+
@ Please refer to SnP-documentation.h for more details.
|
15
|
+
@
|
16
|
+
@ This implementation comes with KeccakP-1600-SnP.h in the same folder.
|
17
|
+
@ Please refer to LowLevel.build for the exact list of other files it must be combined with.
|
18
|
+
@
|
19
|
+
|
20
|
+
@ WARNING: These functions work only on little endian CPU with@ ARMv7A + NEON architecture
|
21
|
+
@ WARNING: State must be 256 bit (32 bytes) aligned, best is 64-byte (cache alignment).
|
22
|
+
@ INFO: Tested on Cortex-A8 (BeagleBone Black), using gcc.
|
23
|
+
|
24
|
+
|
25
|
+
.text
|
26
|
+
|
27
|
+
@ conditional assembly settings
|
28
|
+
.equ LoopUnroll , 1 @ possible values 1, 2, 4, 6, 12
|
29
|
+
|
30
|
+
@ offsets in state
|
31
|
+
.equ _ba , 0*8
|
32
|
+
.equ _be , 1*8
|
33
|
+
.equ _bi , 2*8
|
34
|
+
.equ _bo , 3*8
|
35
|
+
.equ _bu , 4*8
|
36
|
+
.equ _ga , 5*8
|
37
|
+
.equ _ge , 6*8
|
38
|
+
.equ _gi , 7*8
|
39
|
+
.equ _go , 8*8
|
40
|
+
.equ _gu , 9*8
|
41
|
+
.equ _ka , 10*8
|
42
|
+
.equ _ke , 11*8
|
43
|
+
.equ _ki , 12*8
|
44
|
+
.equ _ko , 13*8
|
45
|
+
.equ _ku , 14*8
|
46
|
+
.equ _ma , 15*8
|
47
|
+
.equ _me , 16*8
|
48
|
+
.equ _mi , 17*8
|
49
|
+
.equ _mo , 18*8
|
50
|
+
.equ _mu , 19*8
|
51
|
+
.equ _sa , 20*8
|
52
|
+
.equ _se , 21*8
|
53
|
+
.equ _si , 22*8
|
54
|
+
.equ _so , 23*8
|
55
|
+
.equ _su , 24*8
|
56
|
+
|
57
|
+
@ macros
|
58
|
+
|
59
|
+
.macro LoadState
|
60
|
+
vld1.64 d0, [r0:64]!
|
61
|
+
vld1.64 d2, [r0:64]!
|
62
|
+
vld1.64 d4, [r0:64]!
|
63
|
+
vld1.64 d6, [r0:64]!
|
64
|
+
vld1.64 d8, [r0:64]!
|
65
|
+
vld1.64 d1, [r0:64]!
|
66
|
+
vld1.64 d3, [r0:64]!
|
67
|
+
vld1.64 d5, [r0:64]!
|
68
|
+
vld1.64 d7, [r0:64]!
|
69
|
+
vld1.64 d9, [r0:64]!
|
70
|
+
vld1.64 d10, [r0:64]!
|
71
|
+
vld1.64 d12, [r0:64]!
|
72
|
+
vld1.64 d14, [r0:64]!
|
73
|
+
vld1.64 d16, [r0:64]!
|
74
|
+
vld1.64 d18, [r0:64]!
|
75
|
+
vld1.64 d11, [r0:64]!
|
76
|
+
vld1.64 d13, [r0:64]!
|
77
|
+
vld1.64 d15, [r0:64]!
|
78
|
+
vld1.64 d17, [r0:64]!
|
79
|
+
vld1.64 d19, [r0:64]!
|
80
|
+
vld1.64 { d20, d21 }, [r0:128]!
|
81
|
+
vld1.64 { d22, d23 }, [r0:128]!
|
82
|
+
vld1.64 d24, [r0:64]
|
83
|
+
sub r0, r0, #24*8
|
84
|
+
.endm
|
85
|
+
|
86
|
+
.macro StoreState
|
87
|
+
vst1.64 d0, [r0:64]!
|
88
|
+
vst1.64 d2, [r0:64]!
|
89
|
+
vst1.64 d4, [r0:64]!
|
90
|
+
vst1.64 d6, [r0:64]!
|
91
|
+
vst1.64 d8, [r0:64]!
|
92
|
+
vst1.64 d1, [r0:64]!
|
93
|
+
vst1.64 d3, [r0:64]!
|
94
|
+
vst1.64 d5, [r0:64]!
|
95
|
+
vst1.64 d7, [r0:64]!
|
96
|
+
vst1.64 d9, [r0:64]!
|
97
|
+
vst1.64 d10, [r0:64]!
|
98
|
+
vst1.64 d12, [r0:64]!
|
99
|
+
vst1.64 d14, [r0:64]!
|
100
|
+
vst1.64 d16, [r0:64]!
|
101
|
+
vst1.64 d18, [r0:64]!
|
102
|
+
vst1.64 d11, [r0:64]!
|
103
|
+
vst1.64 d13, [r0:64]!
|
104
|
+
vst1.64 d15, [r0:64]!
|
105
|
+
vst1.64 d17, [r0:64]!
|
106
|
+
vst1.64 d19, [r0:64]!
|
107
|
+
vst1.64 { d20, d21 }, [r0:128]!
|
108
|
+
vst1.64 { d22, d23 }, [r0:128]!
|
109
|
+
vst1.64 d24, [r0:64]
|
110
|
+
.endm
|
111
|
+
|
112
|
+
.macro RhoPi4 dst1, src1, rot1, dst2, src2, rot2, dst3, src3, rot3, dst4, src4, rot4
|
113
|
+
.if (\rot1 & 7) != 0
|
114
|
+
vshl.u64 \dst1, \src1, #\rot1
|
115
|
+
.else
|
116
|
+
vext.8 \dst1, \src1, \src1, #8-\rot1/8
|
117
|
+
.endif
|
118
|
+
.if (\rot2 & 7) != 0
|
119
|
+
vshl.u64 \dst2, \src2, #\rot2
|
120
|
+
.else
|
121
|
+
vext.8 \dst2, \src2, \src2, #8-\rot2/8
|
122
|
+
.endif
|
123
|
+
.if (\rot3 & 7) != 0
|
124
|
+
vshl.u64 \dst3, \src3, #\rot3
|
125
|
+
.else
|
126
|
+
vext.8 \dst3, \src3, \src3, #8-\rot3/8
|
127
|
+
.endif
|
128
|
+
.if (\rot4 & 7) != 0
|
129
|
+
vshl.u64 \dst4, \src4, #\rot4
|
130
|
+
.else
|
131
|
+
vext.8 \dst4, \src4, \src4, #8-\rot4/8
|
132
|
+
.endif
|
133
|
+
.if (\rot1 & 7) != 0
|
134
|
+
vsri.u64 \dst1, \src1, #64-\rot1
|
135
|
+
.endif
|
136
|
+
.if (\rot2 & 7) != 0
|
137
|
+
vsri.u64 \dst2, \src2, #64-\rot2
|
138
|
+
.endif
|
139
|
+
.if (\rot3 & 7) != 0
|
140
|
+
vsri.u64 \dst3, \src3, #64-\rot3
|
141
|
+
.endif
|
142
|
+
.if (\rot4 & 7) != 0
|
143
|
+
vsri.u64 \dst4, \src4, #64-\rot4
|
144
|
+
.endif
|
145
|
+
.endm
|
146
|
+
|
147
|
+
.macro KeccakRound
|
148
|
+
|
149
|
+
@Prepare Theta
|
150
|
+
veor.64 q13, q0, q5
|
151
|
+
vst1.64 {q12}, [r0:128]!
|
152
|
+
veor.64 q14, q1, q6
|
153
|
+
vst1.64 {q4}, [r0:128]!
|
154
|
+
veor.64 d26, d26, d27
|
155
|
+
vst1.64 {q9}, [r0:128]
|
156
|
+
veor.64 d28, d28, d29
|
157
|
+
veor.64 d26, d26, d20
|
158
|
+
veor.64 d27, d28, d21
|
159
|
+
|
160
|
+
veor.64 q14, q2, q7
|
161
|
+
veor.64 q15, q3, q8
|
162
|
+
veor.64 q4, q4, q9
|
163
|
+
veor.64 d28, d28, d29
|
164
|
+
veor.64 d30, d30, d31
|
165
|
+
veor.64 d25, d8, d9
|
166
|
+
veor.64 d28, d28, d22
|
167
|
+
veor.64 d29, d30, d23
|
168
|
+
veor.64 d25, d25, d24
|
169
|
+
sub r0, r0, #32
|
170
|
+
|
171
|
+
@Apply Theta
|
172
|
+
vadd.u64 d30, d27, d27
|
173
|
+
vadd.u64 d24, d28, d28
|
174
|
+
vadd.u64 d8, d29, d29
|
175
|
+
vadd.u64 d18, d25, d25
|
176
|
+
|
177
|
+
vsri.64 d30, d27, #63
|
178
|
+
vsri.64 d24, d28, #63
|
179
|
+
vsri.64 d8, d29, #63
|
180
|
+
vsri.64 d18, d25, #63
|
181
|
+
|
182
|
+
veor.64 d30, d30, d25
|
183
|
+
veor.64 d24, d24, d26
|
184
|
+
veor.64 d8, d8, d27
|
185
|
+
vadd.u64 d27, d26, d26 @u
|
186
|
+
veor.64 d18, d18, d28
|
187
|
+
|
188
|
+
vmov.i64 d31, d30
|
189
|
+
vmov.i64 d25, d24
|
190
|
+
vsri.64 d27, d26, #63 @u
|
191
|
+
vmov.i64 d9, d8
|
192
|
+
vmov.i64 d19, d18
|
193
|
+
|
194
|
+
veor.64 d20, d20, d30
|
195
|
+
veor.64 d21, d21, d24
|
196
|
+
veor.64 d27, d27, d29 @u
|
197
|
+
veor.64 d22, d22, d8
|
198
|
+
veor.64 d23, d23, d18
|
199
|
+
vmov.i64 d26, d27 @u
|
200
|
+
|
201
|
+
veor.64 q0, q0, q15
|
202
|
+
veor.64 q1, q1, q12
|
203
|
+
veor.64 q2, q2, q4
|
204
|
+
veor.64 q3, q3, q9
|
205
|
+
|
206
|
+
veor.64 q5, q5, q15
|
207
|
+
veor.64 q6, q6, q12
|
208
|
+
vld1.64 {q12}, [r0:128]!
|
209
|
+
veor.64 q7, q7, q4
|
210
|
+
vld1.64 {q4}, [r0:128]!
|
211
|
+
veor.64 q8, q8, q9
|
212
|
+
vld1.64 {q9}, [r0:128]
|
213
|
+
veor.64 d24, d24, d26 @u
|
214
|
+
sub r0, r0, #32
|
215
|
+
veor.64 q4, q4, q13 @u
|
216
|
+
veor.64 q9, q9, q13 @u
|
217
|
+
|
218
|
+
@Rho Pi
|
219
|
+
vmov.i64 d27, d2
|
220
|
+
vmov.i64 d28, d4
|
221
|
+
vmov.i64 d29, d6
|
222
|
+
vmov.i64 d25, d8
|
223
|
+
|
224
|
+
RhoPi4 d2, d3, 44, d4, d14, 43, d8, d24, 14, d6, d17, 21 @ 1 < 6, 2 < 12, 4 < 24, 3 < 18
|
225
|
+
RhoPi4 d3, d9, 20, d14, d16, 25, d24, d21, 2, d17, d15, 15 @ 6 < 9, 12 < 13, 24 < 21, 18 < 17
|
226
|
+
RhoPi4 d9, d22, 61, d16, d19, 8, d21, d7, 55, d15, d12, 10 @ 9 < 22, 13 < 19, 21 < 8, 17 < 11
|
227
|
+
RhoPi4 d22, d18, 39, d19, d23, 56, d7, d13, 45, d12, d5, 6 @ 22 < 14, 19 < 23, 8 < 16, 11 < 7
|
228
|
+
RhoPi4 d18, d20, 18, d23, d11, 41, d13, d1, 36, d5, d10, 3 @ 14 < 20, 23 < 15, 16 < 5, 7 < 10
|
229
|
+
RhoPi4 d20, d28, 62, d11, d25, 27, d1, d29, 28, d10, d27, 1 @ 20 < 2, 15 < 4, 5 < 3, 10 < 1
|
230
|
+
|
231
|
+
@Chi b+g
|
232
|
+
vmov.i64 q13, q0
|
233
|
+
vbic.64 q15, q2, q1 @ ba ^= ~be & bi
|
234
|
+
veor.64 q0, q15
|
235
|
+
vmov.i64 q14, q1
|
236
|
+
vbic.64 q15, q3, q2 @ be ^= ~bi & bo
|
237
|
+
veor.64 q1, q15
|
238
|
+
vbic.64 q15, q4, q3 @ bi ^= ~bo & bu
|
239
|
+
veor.64 q2, q15
|
240
|
+
vbic.64 q15, q13, q4 @ bo ^= ~bu & ba
|
241
|
+
vbic.64 q13, q14, q13 @ bu ^= ~ba & be
|
242
|
+
veor.64 q3, q15
|
243
|
+
veor.64 q4, q13
|
244
|
+
|
245
|
+
@Chi k+m
|
246
|
+
vmov.i64 q13, q5
|
247
|
+
vbic.64 q15, q7, q6 @ ba ^= ~be & bi
|
248
|
+
veor.64 q5, q15
|
249
|
+
vmov.i64 q14, q6
|
250
|
+
vbic.64 q15, q8, q7 @ be ^= ~bi & bo
|
251
|
+
veor.64 q6, q15
|
252
|
+
vbic.64 q15, q9, q8 @ bi ^= ~bo & bu
|
253
|
+
veor.64 q7, q15
|
254
|
+
vbic.64 q15, q13, q9 @ bo ^= ~bu & ba
|
255
|
+
vbic.64 q13, q14, q13 @ bu ^= ~ba & be
|
256
|
+
veor.64 q8, q15
|
257
|
+
veor.64 q9, q13
|
258
|
+
|
259
|
+
@Chi s
|
260
|
+
vmov.i64 q13, q10
|
261
|
+
vbic.64 d30, d22, d21 @ ba ^= ~be & bi
|
262
|
+
vbic.64 d31, d23, d22 @ be ^= ~bi & bo
|
263
|
+
veor.64 q10, q15
|
264
|
+
vbic.64 d30, d24, d23 @ bi ^= ~bo & bu
|
265
|
+
vbic.64 d31, d26, d24 @ bo ^= ~bu & ba
|
266
|
+
vbic.64 d26, d27, d26 @ bu ^= ~ba & be
|
267
|
+
veor.64 q11, q15
|
268
|
+
vld1.64 d30, [r1:64]! @ Iota
|
269
|
+
veor.64 d24, d26
|
270
|
+
veor.64 d0, d0, d30 @ Iota
|
271
|
+
.endm
|
272
|
+
|
273
|
+
@----------------------------------------------------------------------------
|
274
|
+
@
|
275
|
+
@ void KeccakP1600_StaticInitialize( void )
|
276
|
+
@
|
277
|
+
.align 8
|
278
|
+
.global KeccakP1600_StaticInitialize
|
279
|
+
.type KeccakP1600_StaticInitialize, %function;
|
280
|
+
KeccakP1600_StaticInitialize:
|
281
|
+
bx lr
|
282
|
+
|
283
|
+
|
284
|
+
@----------------------------------------------------------------------------
|
285
|
+
@
|
286
|
+
@ void KeccakP1600_Initialize(void *state)
|
287
|
+
@
|
288
|
+
.align 8
|
289
|
+
.global KeccakP1600_Initialize
|
290
|
+
.type KeccakP1600_Initialize, %function;
|
291
|
+
KeccakP1600_Initialize:
|
292
|
+
vmov.i64 q0, #0
|
293
|
+
vmov.i64 q1, #0
|
294
|
+
vmov.i64 q2, #0
|
295
|
+
vmov.i64 q3, #0
|
296
|
+
vstm r0!, { d0 - d7 } @ clear 8 lanes at a time
|
297
|
+
vstm r0!, { d0 - d7 }
|
298
|
+
vstm r0!, { d0 - d7 }
|
299
|
+
vstm r0!, { d0 }
|
300
|
+
bx lr
|
301
|
+
|
302
|
+
|
303
|
+
@ ----------------------------------------------------------------------------
|
304
|
+
@
|
305
|
+
@ void KeccakP1600_AddByte(void *state, unsigned char byte, unsigned int offset)
|
306
|
+
@
|
307
|
+
.align 8
|
308
|
+
.global KeccakP1600_AddByte
|
309
|
+
.type KeccakP1600_AddByte, %function;
|
310
|
+
KeccakP1600_AddByte:
|
311
|
+
ldrb r3, [r0, r2]
|
312
|
+
eors r3, r3, r1
|
313
|
+
strb r3, [r0, r2]
|
314
|
+
bx lr
|
315
|
+
|
316
|
+
|
317
|
+
@ ----------------------------------------------------------------------------
|
318
|
+
@
|
319
|
+
@ void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
|
320
|
+
@
|
321
|
+
.align 8
|
322
|
+
.global KeccakP1600_AddBytes
|
323
|
+
.type KeccakP1600_AddBytes, %function;
|
324
|
+
KeccakP1600_AddBytes:
|
325
|
+
push {r4,lr}
|
326
|
+
adds r0, r0, r2 @ state += offset
|
327
|
+
subs r3, r3, #8 @ .if length >= lane size
|
328
|
+
bcc KeccakP1600_AddBytes_Bytes
|
329
|
+
KeccakP1600_AddBytes_LanesLoop: @ then, perform on lanes
|
330
|
+
ldr r2, [r0]
|
331
|
+
ldr r4, [r1], #4
|
332
|
+
ldr r12, [r0, #4]
|
333
|
+
ldr lr, [r1], #4
|
334
|
+
eors r2, r2, r4
|
335
|
+
eors r12, r12, lr
|
336
|
+
subs r3, r3, #8
|
337
|
+
str r2, [r0], #4
|
338
|
+
str r12, [r0], #4
|
339
|
+
bcs KeccakP1600_AddBytes_LanesLoop
|
340
|
+
KeccakP1600_AddBytes_Bytes:
|
341
|
+
adds r3, r3, #7
|
342
|
+
bcc KeccakP1600_AddBytes_Exit
|
343
|
+
KeccakP1600_AddBytes_BytesLoop:
|
344
|
+
ldrb r2, [r0]
|
345
|
+
ldrb r4, [r1], #1
|
346
|
+
eors r2, r2, r4
|
347
|
+
strb r2, [r0], #1
|
348
|
+
subs r3, r3, #1
|
349
|
+
bcs KeccakP1600_AddBytes_BytesLoop
|
350
|
+
KeccakP1600_AddBytes_Exit:
|
351
|
+
pop {r4,pc}
|
352
|
+
|
353
|
+
|
354
|
+
@ ----------------------------------------------------------------------------
|
355
|
+
@
|
356
|
+
@ void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
|
357
|
+
@
|
358
|
+
.align 8
|
359
|
+
.global KeccakP1600_OverwriteBytes
|
360
|
+
.type KeccakP1600_OverwriteBytes, %function;
|
361
|
+
KeccakP1600_OverwriteBytes:
|
362
|
+
adds r0, r0, r2 @ state += offset
|
363
|
+
subs r3, r3, #8 @ .if length >= lane size
|
364
|
+
bcc KeccakP1600_OverwriteBytes_Bytes
|
365
|
+
KeccakP1600_OverwriteBytes_LanesLoop: @ then, perform on lanes
|
366
|
+
ldr r2, [r1], #4
|
367
|
+
ldr r12, [r1], #4
|
368
|
+
subs r3, r3, #8
|
369
|
+
str r2, [r0], #4
|
370
|
+
str r12, [r0], #4
|
371
|
+
bcs KeccakP1600_OverwriteBytes_LanesLoop
|
372
|
+
KeccakP1600_OverwriteBytes_Bytes:
|
373
|
+
adds r3, r3, #7
|
374
|
+
bcc KeccakP1600_OverwriteBytes_Exit
|
375
|
+
KeccakP1600_OverwriteBytes_BytesLoop:
|
376
|
+
ldrb r2, [r1], #1
|
377
|
+
subs r3, r3, #1
|
378
|
+
strb r2, [r0], #1
|
379
|
+
bcs KeccakP1600_OverwriteBytes_BytesLoop
|
380
|
+
KeccakP1600_OverwriteBytes_Exit:
|
381
|
+
bx lr
|
382
|
+
|
383
|
+
|
384
|
+
@----------------------------------------------------------------------------
|
385
|
+
@
|
386
|
+
@ void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount)
|
387
|
+
@
|
388
|
+
.align 8
|
389
|
+
.global KeccakP1600_OverwriteWithZeroes
|
390
|
+
.type KeccakP1600_OverwriteWithZeroes, %function;
|
391
|
+
KeccakP1600_OverwriteWithZeroes:
|
392
|
+
lsrs r2, r1, #3
|
393
|
+
beq KeccakP1600_OverwriteWithZeroes_Bytes
|
394
|
+
vmov.i64 d0, #0
|
395
|
+
KeccakP1600_OverwriteWithZeroes_LoopLanes:
|
396
|
+
subs r2, r2, #1
|
397
|
+
vstm r0!, { d0 }
|
398
|
+
bne KeccakP1600_OverwriteWithZeroes_LoopLanes
|
399
|
+
KeccakP1600_OverwriteWithZeroes_Bytes:
|
400
|
+
ands r1, #7
|
401
|
+
beq KeccakP1600_OverwriteWithZeroes_Exit
|
402
|
+
movs r3, #0
|
403
|
+
KeccakP1600_OverwriteWithZeroes_LoopBytes:
|
404
|
+
subs r1, r1, #1
|
405
|
+
strb r3, [r0], #1
|
406
|
+
bne KeccakP1600_OverwriteWithZeroes_LoopBytes
|
407
|
+
KeccakP1600_OverwriteWithZeroes_Exit:
|
408
|
+
bx lr
|
409
|
+
|
410
|
+
|
411
|
+
@ ----------------------------------------------------------------------------
|
412
|
+
@
|
413
|
+
@ void KeccakP1600_ExtractBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
|
414
|
+
@
|
415
|
+
.align 8
|
416
|
+
.global KeccakP1600_ExtractBytes
|
417
|
+
.type KeccakP1600_ExtractBytes, %function;
|
418
|
+
KeccakP1600_ExtractBytes:
|
419
|
+
adds r0, r0, r2 @ state += offset
|
420
|
+
subs r3, r3, #8 @ .if length >= lane size
|
421
|
+
bcc KeccakP1600_ExtractBytes_Bytes
|
422
|
+
KeccakP1600_ExtractBytes_LanesLoop: @ then, handle lanes
|
423
|
+
ldr r2, [r0], #4
|
424
|
+
ldr r12, [r0], #4
|
425
|
+
subs r3, r3, #8
|
426
|
+
str r2, [r1], #4
|
427
|
+
str r12, [r1], #4
|
428
|
+
bcs KeccakP1600_ExtractBytes_LanesLoop
|
429
|
+
KeccakP1600_ExtractBytes_Bytes:
|
430
|
+
adds r3, r3, #7
|
431
|
+
bcc KeccakP1600_ExtractBytes_Exit
|
432
|
+
KeccakP1600_ExtractBytes_BytesLoop:
|
433
|
+
ldrb r2, [r0], #1
|
434
|
+
subs r3, r3, #1
|
435
|
+
strb r2, [r1], #1
|
436
|
+
bcs KeccakP1600_ExtractBytes_BytesLoop
|
437
|
+
KeccakP1600_ExtractBytes_Exit:
|
438
|
+
bx lr
|
439
|
+
|
440
|
+
|
441
|
+
@ ----------------------------------------------------------------------------
|
442
|
+
@
|
443
|
+
@ void KeccakP800_ExtractAndAddBytes(void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
|
444
|
+
@
|
445
|
+
.align 8
|
446
|
+
.global KeccakP1600_ExtractAndAddBytes
|
447
|
+
.type KeccakP1600_ExtractAndAddBytes, %function;
|
448
|
+
KeccakP1600_ExtractAndAddBytes:
|
449
|
+
push {r4,r5}
|
450
|
+
add r0, r0, r3 @ state += offset (offset register no longer needed, reuse for length)
|
451
|
+
ldr r3, [sp, #8] @ get length argument from stack
|
452
|
+
subs r3, r3, #8 @ .if length >= lane size
|
453
|
+
bcc KeccakP1600_ExtractAndAddBytes_Bytes
|
454
|
+
KeccakP1600_ExtractAndAddBytes_LanesLoop: @ then, handle lanes
|
455
|
+
ldr r5, [r0], #4
|
456
|
+
ldr r4, [r1], #4
|
457
|
+
eor r5, r5, r4
|
458
|
+
str r5, [r2], #4
|
459
|
+
subs r3, r3, #8
|
460
|
+
ldr r5, [r0], #4
|
461
|
+
ldr r4, [r1], #4
|
462
|
+
eor r5, r5, r4
|
463
|
+
str r5, [r2], #4
|
464
|
+
bcs KeccakP1600_ExtractAndAddBytes_LanesLoop
|
465
|
+
KeccakP1600_ExtractAndAddBytes_Bytes:
|
466
|
+
adds r3, r3, #7
|
467
|
+
bcc KeccakP1600_ExtractAndAddBytes_Exit
|
468
|
+
KeccakP1600_ExtractAndAddBytes_BytesLoop:
|
469
|
+
ldrb r5, [r0], #1
|
470
|
+
ldrb r4, [r1], #1
|
471
|
+
eor r5, r5, r4
|
472
|
+
strb r5, [r2], #1
|
473
|
+
subs r3, r3, #1
|
474
|
+
bcs KeccakP1600_ExtractAndAddBytes_BytesLoop
|
475
|
+
KeccakP1600_ExtractAndAddBytes_Exit:
|
476
|
+
pop {r4,r5}
|
477
|
+
bx lr
|
478
|
+
|
479
|
+
|
480
|
+
@ ----------------------------------------------------------------------------
|
481
|
+
@
|
482
|
+
@ void KeccakP1600_Permute_Nrounds(void *state, unsigned int nrounds)
|
483
|
+
@
|
484
|
+
.align 8
|
485
|
+
.global KeccakP1600_Permute_Nrounds
|
486
|
+
.type KeccakP1600_Permute_Nrounds, %function;
|
487
|
+
KeccakP1600_Permute_Nrounds:
|
488
|
+
movs r2, r1
|
489
|
+
adr r1, KeccakP1600_Permute_RoundConstants0
|
490
|
+
sub r1, r1, r2, LSL #3
|
491
|
+
b KeccakP1600_Permute
|
492
|
+
|
493
|
+
|
494
|
+
@ ----------------------------------------------------------------------------
|
495
|
+
@
|
496
|
+
@ void KeccakP1600_Permute_12rounds( void *state )
|
497
|
+
@
|
498
|
+
.align 8
|
499
|
+
.global KeccakP1600_Permute_12rounds
|
500
|
+
.type KeccakP1600_Permute_12rounds, %function;
|
501
|
+
KeccakP1600_Permute_12rounds:
|
502
|
+
adr r1, KeccakP1600_Permute_RoundConstants12
|
503
|
+
movs r2, #12
|
504
|
+
b KeccakP1600_Permute
|
505
|
+
|
506
|
+
|
507
|
+
@ ----------------------------------------------------------------------------
|
508
|
+
@
|
509
|
+
@ void KeccakP1600_Permute_24rounds( void *state )
|
510
|
+
@
|
511
|
+
.align 8
|
512
|
+
.global KeccakP1600_Permute_24rounds
|
513
|
+
.type KeccakP1600_Permute_24rounds, %function;
|
514
|
+
KeccakP1600_Permute_24rounds:
|
515
|
+
adr r1, KeccakP1600_Permute_RoundConstants24
|
516
|
+
movs r2, #24
|
517
|
+
b KeccakP1600_Permute
|
518
|
+
|
519
|
+
|
520
|
+
.align 8
|
521
|
+
KeccakP1600_Permute_RoundConstants24:
|
522
|
+
.quad 0x0000000000000001
|
523
|
+
.quad 0x0000000000008082
|
524
|
+
.quad 0x800000000000808a
|
525
|
+
.quad 0x8000000080008000
|
526
|
+
.quad 0x000000000000808b
|
527
|
+
.quad 0x0000000080000001
|
528
|
+
.quad 0x8000000080008081
|
529
|
+
.quad 0x8000000000008009
|
530
|
+
.quad 0x000000000000008a
|
531
|
+
.quad 0x0000000000000088
|
532
|
+
.quad 0x0000000080008009
|
533
|
+
.quad 0x000000008000000a
|
534
|
+
KeccakP1600_Permute_RoundConstants12:
|
535
|
+
.quad 0x000000008000808b
|
536
|
+
.quad 0x800000000000008b
|
537
|
+
.quad 0x8000000000008089
|
538
|
+
.quad 0x8000000000008003
|
539
|
+
.quad 0x8000000000008002
|
540
|
+
.quad 0x8000000000000080
|
541
|
+
.quad 0x000000000000800a
|
542
|
+
.quad 0x800000008000000a
|
543
|
+
.quad 0x8000000080008081
|
544
|
+
.quad 0x8000000000008080
|
545
|
+
.quad 0x0000000080000001
|
546
|
+
.quad 0x8000000080008008
|
547
|
+
KeccakP1600_Permute_RoundConstants0:
|
548
|
+
|
549
|
+
.align 8
|
550
|
+
KeccakP1600_XORandPermuteAsmOnly:
|
551
|
+
|
552
|
+
add pc, pc, r5, LSL #3
|
553
|
+
mov r1, #0 @ dummy instruction for PC alignment, not executed
|
554
|
+
veor.64 d0, d0, d30
|
555
|
+
b KeccakP1600_PermuteAsmOnly
|
556
|
+
veor.64 d2, d2, d30
|
557
|
+
b KeccakP1600_PermuteAsmOnly
|
558
|
+
veor.64 d4, d4, d30
|
559
|
+
b KeccakP1600_PermuteAsmOnly
|
560
|
+
veor.64 d6, d6, d30
|
561
|
+
b KeccakP1600_PermuteAsmOnly
|
562
|
+
veor.64 d8, d8, d30
|
563
|
+
b KeccakP1600_PermuteAsmOnly
|
564
|
+
|
565
|
+
veor.64 d1, d1, d30
|
566
|
+
b KeccakP1600_PermuteAsmOnly
|
567
|
+
veor.64 d3, d3, d30
|
568
|
+
b KeccakP1600_PermuteAsmOnly
|
569
|
+
veor.64 d5, d5, d30
|
570
|
+
b KeccakP1600_PermuteAsmOnly
|
571
|
+
veor.64 d7, d7, d30
|
572
|
+
b KeccakP1600_PermuteAsmOnly
|
573
|
+
veor.64 d9, d9, d30
|
574
|
+
b KeccakP1600_PermuteAsmOnly
|
575
|
+
|
576
|
+
veor.64 d10, d10, d30
|
577
|
+
b KeccakP1600_PermuteAsmOnly
|
578
|
+
veor.64 d12, d12, d30
|
579
|
+
b KeccakP1600_PermuteAsmOnly
|
580
|
+
veor.64 d14, d14, d30
|
581
|
+
b KeccakP1600_PermuteAsmOnly
|
582
|
+
veor.64 d16, d16, d30
|
583
|
+
b KeccakP1600_PermuteAsmOnly
|
584
|
+
veor.64 d18, d18, d30
|
585
|
+
b KeccakP1600_PermuteAsmOnly
|
586
|
+
|
587
|
+
veor.64 d11, d11, d30
|
588
|
+
b KeccakP1600_PermuteAsmOnly
|
589
|
+
veor.64 d13, d13, d30
|
590
|
+
b KeccakP1600_PermuteAsmOnly
|
591
|
+
veor.64 d15, d15, d30
|
592
|
+
b KeccakP1600_PermuteAsmOnly
|
593
|
+
veor.64 d17, d17, d30
|
594
|
+
b KeccakP1600_PermuteAsmOnly
|
595
|
+
veor.64 d19, d19, d30
|
596
|
+
b KeccakP1600_PermuteAsmOnly
|
597
|
+
|
598
|
+
veor.64 d20, d20, d30
|
599
|
+
b KeccakP1600_PermuteAsmOnly
|
600
|
+
veor.64 d21, d21, d30
|
601
|
+
b KeccakP1600_PermuteAsmOnly
|
602
|
+
veor.64 d22, d22, d30
|
603
|
+
b KeccakP1600_PermuteAsmOnly
|
604
|
+
veor.64 d23, d23, d30
|
605
|
+
b KeccakP1600_PermuteAsmOnly
|
606
|
+
veor.64 d24, d24, d30
|
607
|
+
KeccakP1600_PermuteAsmOnly:
|
608
|
+
KeccakP1600_Permute_RoundLoop:
|
609
|
+
KeccakRound
|
610
|
+
.if LoopUnroll > 1
|
611
|
+
KeccakRound
|
612
|
+
.if LoopUnroll > 2
|
613
|
+
KeccakRound
|
614
|
+
KeccakRound
|
615
|
+
.if LoopUnroll > 4
|
616
|
+
KeccakRound
|
617
|
+
KeccakRound
|
618
|
+
.if LoopUnroll > 6
|
619
|
+
KeccakRound
|
620
|
+
KeccakRound
|
621
|
+
KeccakRound
|
622
|
+
KeccakRound
|
623
|
+
KeccakRound
|
624
|
+
KeccakRound
|
625
|
+
.endif
|
626
|
+
.endif
|
627
|
+
.endif
|
628
|
+
.endif
|
629
|
+
subs r2, #LoopUnroll
|
630
|
+
bne KeccakP1600_Permute_RoundLoop
|
631
|
+
bx lr
|
632
|
+
|
633
|
+
|
634
|
+
@----------------------------------------------------------------------------
|
635
|
+
@
|
636
|
+
@ void KeccakP1600_Permute( void *state, void *roundConstants, unsigned int numberOfRounds )
|
637
|
+
@
|
638
|
+
.align 8
|
639
|
+
.global KeccakP1600_Permute
|
640
|
+
.type KeccakP1600_Permute, %function;
|
641
|
+
KeccakP1600_Permute:
|
642
|
+
mov r3, lr
|
643
|
+
vpush {q4-q7}
|
644
|
+
LoadState
|
645
|
+
bl KeccakP1600_PermuteAsmOnly
|
646
|
+
StoreState
|
647
|
+
vpop {q4-q7}
|
648
|
+
bx r3
|
649
|
+
|
650
|
+
|
651
|
+
.if FastLoop != 0
|
652
|
+
|
653
|
+
@----------------------------------------------------------------------------
|
654
|
+
@
|
655
|
+
@ size_t KeccakF1600_FastLoop_Absorb( void *state, unsigned int laneCount, unsigned char *data,
|
656
|
+
@ size_t dataByteLen, unsigned char trailingBits )
|
657
|
+
@
|
658
|
+
.align 8
|
659
|
+
.global KeccakF1600_FastLoop_Absorb
|
660
|
+
.type KeccakF1600_FastLoop_Absorb, %function;
|
661
|
+
KeccakF1600_FastLoop_Absorb:
|
662
|
+
push {r4-r8,lr} @ 6 CPU registers (24 bytes)
|
663
|
+
lsr r3, r3, #3 @ r3 nbrLanes = dataByteLen / SnP_laneLengthInBytes
|
664
|
+
mov r6, r2 @ r6 data pointer
|
665
|
+
subs r3, r3, r1 @ .if (nbrLanes >= laneCount)
|
666
|
+
mov r4, r2 @ r4 initial data pointer
|
667
|
+
bcc KeccakF1600_FastLoop_Absorb_Exit
|
668
|
+
mov r5, r1
|
669
|
+
vpush {q4-q7} @ 4 quad registers (64 bytes)
|
670
|
+
LoadState
|
671
|
+
|
672
|
+
sub sp, sp, #8 @ alloc space for trailingBits lane
|
673
|
+
veor.64 d30, d30, d30
|
674
|
+
add r7, sp, #(6+16+2)*4
|
675
|
+
vld1.8 {d30[0]}, [r7]
|
676
|
+
vst1.64 {d30}, [sp:64]
|
677
|
+
|
678
|
+
cmp r5, #21
|
679
|
+
bne KeccakF1600_FastLoop_Absorb_Not21Lanes
|
680
|
+
KeccakF1600_FastLoop_Absorb_Loop21Lanes:
|
681
|
+
vld1.64 { d26, d27, d28, d29 }, [r6]! @ XOR first 21 lanes
|
682
|
+
veor.64 d0, d0, d26
|
683
|
+
veor.64 d2, d2, d27
|
684
|
+
veor.64 d4, d4, d28
|
685
|
+
veor.64 d6, d6, d29
|
686
|
+
vld1.64 { d26, d27, d28, d29 }, [r6]!
|
687
|
+
veor.64 d8, d8, d26
|
688
|
+
veor.64 d1, d1, d27
|
689
|
+
veor.64 d3, d3, d28
|
690
|
+
veor.64 d5, d5, d29
|
691
|
+
vld1.64 { d26, d27, d28, d29 }, [r6]!
|
692
|
+
veor.64 d7, d7, d26
|
693
|
+
veor.64 d9, d9, d27
|
694
|
+
veor.64 d10, d10, d28
|
695
|
+
veor.64 d12, d12, d29
|
696
|
+
vld1.64 { d26, d27, d28, d29 }, [r6]!
|
697
|
+
veor.64 d14, d14, d26
|
698
|
+
veor.64 d16, d16, d27
|
699
|
+
veor.64 d18, d18, d28
|
700
|
+
veor.64 d11, d11, d29
|
701
|
+
vld1.64 { d26, d27, d28, d29 }, [r6]!
|
702
|
+
veor.64 d13, d13, d26
|
703
|
+
veor.64 d15, d15, d27
|
704
|
+
veor.64 d17, d17, d28
|
705
|
+
veor.64 d19, d19, d29
|
706
|
+
vld1.64 { d26 }, [r6]!
|
707
|
+
veor.64 d20, d20, d26
|
708
|
+
|
709
|
+
vld1.64 {d30}, [sp:64] @ xor trailingBits
|
710
|
+
veor.64 d21, d21, d30
|
711
|
+
bl KeccakP1600_PermuteAsmOnly
|
712
|
+
subs r3, r3, r5 @ nbrLanes -= laneCount
|
713
|
+
bcs KeccakF1600_FastLoop_Absorb_Loop21Lanes
|
714
|
+
KeccakF1600_FastLoop_Absorb_Done:
|
715
|
+
add sp, sp, #8 @ free trailingBits lane
|
716
|
+
StoreState
|
717
|
+
vpop {q4-q7}
|
718
|
+
KeccakF1600_FastLoop_Absorb_Exit:
|
719
|
+
sub r0, r6, r4 @ processed = data pointer - initial data pointer
|
720
|
+
pop {r4-r8,pc}
|
721
|
+
KeccakF1600_FastLoop_Absorb_Not21Lanes:
|
722
|
+
cmp r5, #16
|
723
|
+
mvn r7, #7 @ r7 = -8
|
724
|
+
blo KeccakF1600_FastLoop_Absorb_LoopLessThan16Lanes
|
725
|
+
KeccakF1600_FastLoop_Absorb_Loop16OrMoreLanes:
|
726
|
+
vld1.64 { d26, d27, d28, d29 }, [r6]! @ XOR first 16 lanes
|
727
|
+
veor.64 d0, d0, d26
|
728
|
+
veor.64 d2, d2, d27
|
729
|
+
veor.64 d4, d4, d28
|
730
|
+
veor.64 d6, d6, d29
|
731
|
+
vld1.64 { d26, d27, d28, d29 }, [r6]!
|
732
|
+
veor.64 d8, d8, d26
|
733
|
+
veor.64 d1, d1, d27
|
734
|
+
veor.64 d3, d3, d28
|
735
|
+
veor.64 d5, d5, d29
|
736
|
+
vld1.64 { d26, d27, d28, d29 }, [r6]!
|
737
|
+
veor.64 d7, d7, d26
|
738
|
+
veor.64 d9, d9, d27
|
739
|
+
veor.64 d10, d10, d28
|
740
|
+
veor.64 d12, d12, d29
|
741
|
+
vld1.64 { d26, d27, d28, d29 }, [r6]!
|
742
|
+
veor.64 d14, d14, d26
|
743
|
+
veor.64 d16, d16, d27
|
744
|
+
veor.64 d18, d18, d28
|
745
|
+
veor.64 d11, d11, d29
|
746
|
+
|
747
|
+
sub r2, r5, #16 @ XOR last n lanes, maximum 9
|
748
|
+
rsb r1, r2, #9
|
749
|
+
add r6, r6, r2, LSL #3 @ data += n lanes * 8
|
750
|
+
sub r2, r6, #8 @ r2 tempdata = data - 8
|
751
|
+
add pc, pc, r1, LSL #3
|
752
|
+
mov r1, #0 @ dummy instruction for PC alignment, not executed
|
753
|
+
vld1.64 d30, [r2], r7
|
754
|
+
veor.64 d24, d24, d30
|
755
|
+
vld1.64 d30, [r2], r7
|
756
|
+
veor.64 d23, d23, d30
|
757
|
+
vld1.64 d30, [r2], r7
|
758
|
+
veor.64 d22, d22, d30
|
759
|
+
vld1.64 d30, [r2], r7
|
760
|
+
veor.64 d21, d21, d30
|
761
|
+
vld1.64 d30, [r2], r7
|
762
|
+
veor.64 d20, d20, d30
|
763
|
+
|
764
|
+
vld1.64 d30, [r2], r7
|
765
|
+
veor.64 d19, d19, d30
|
766
|
+
vld1.64 d30, [r2], r7
|
767
|
+
veor.64 d17, d17, d30
|
768
|
+
vld1.64 d30, [r2], r7
|
769
|
+
veor.64 d15, d15, d30
|
770
|
+
vld1.64 d30, [r2], r7
|
771
|
+
veor.64 d13, d13, d30
|
772
|
+
|
773
|
+
vld1.64 {d30}, [sp:64]
|
774
|
+
bl KeccakP1600_XORandPermuteAsmOnly
|
775
|
+
subs r3, r3, r5 @ nbrLanes -= laneCount
|
776
|
+
bcs KeccakF1600_FastLoop_Absorb_Loop16OrMoreLanes
|
777
|
+
b KeccakF1600_FastLoop_Absorb_Done
|
778
|
+
KeccakF1600_FastLoop_Absorb_LoopLessThan16Lanes:
|
779
|
+
rsb r1, r5, #15 @ XOR up to 15 lanes
|
780
|
+
add r6, r6, r5, LSL #3 @ data += laneCount * 8
|
781
|
+
sub r2, r6, #8 @ r2 tempdata = data - 8
|
782
|
+
add pc, pc, r1, LSL #3
|
783
|
+
mov r1, #0 @ dummy instruction for PC alignment, not executed
|
784
|
+
|
785
|
+
vld1.64 d30, [r2], r7
|
786
|
+
veor.64 d18, d18, d30
|
787
|
+
vld1.64 d30, [r2], r7
|
788
|
+
veor.64 d16, d16, d30
|
789
|
+
vld1.64 d30, [r2], r7
|
790
|
+
veor.64 d14, d14, d30
|
791
|
+
vld1.64 d30, [r2], r7
|
792
|
+
veor.64 d12, d12, d30
|
793
|
+
vld1.64 d30, [r2], r7
|
794
|
+
veor.64 d10, d10, d30
|
795
|
+
|
796
|
+
vld1.64 d30, [r2], r7
|
797
|
+
veor.64 d9, d9, d30
|
798
|
+
vld1.64 d30, [r2], r7
|
799
|
+
veor.64 d7, d7, d30
|
800
|
+
vld1.64 d30, [r2], r7
|
801
|
+
veor.64 d5, d5, d30
|
802
|
+
vld1.64 d30, [r2], r7
|
803
|
+
veor.64 d3, d3, d30
|
804
|
+
vld1.64 d30, [r2], r7
|
805
|
+
veor.64 d1, d1, d30
|
806
|
+
|
807
|
+
vld1.64 d30, [r2], r7
|
808
|
+
veor.64 d8, d8, d30
|
809
|
+
vld1.64 d30, [r2], r7
|
810
|
+
veor.64 d6, d6, d30
|
811
|
+
vld1.64 d30, [r2], r7
|
812
|
+
veor.64 d4, d4, d30
|
813
|
+
vld1.64 d30, [r2], r7
|
814
|
+
veor.64 d2, d2, d30
|
815
|
+
vld1.64 d30, [r2], r7
|
816
|
+
veor.64 d0, d0, d30
|
817
|
+
|
818
|
+
vld1.64 {d30}, [sp:64]
|
819
|
+
bl KeccakP1600_XORandPermuteAsmOnly
|
820
|
+
subs r3, r3, r5 @ nbrLanes -= laneCount
|
821
|
+
bcs KeccakF1600_FastLoop_Absorb_LoopLessThan16Lanes
|
822
|
+
b KeccakF1600_FastLoop_Absorb_Done
|
823
|
+
|
824
|
+
|
825
|
+
.endif
|
826
|
+
|