digest-kangarootwelve 0.2.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +51 -11
- data/Rakefile +2 -2
- data/digest-kangarootwelve.gemspec +322 -42
- data/ext/digest/kangarootwelve/ext.c +1 -1
- data/ext/digest/kangarootwelve/extconf.rb +13 -1
- data/ext/digest/kangarootwelve/keccak/armv6m/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv6m/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv6m/KeccakP-1600-SnP.h +36 -0
- data/ext/digest/kangarootwelve/{KeccakP-1600-times2-SnP.h → keccak/armv6m/KeccakP-1600-times2-SnP.h} +10 -10
- data/ext/digest/kangarootwelve/{KeccakP-1600-times2-on1.c → keccak/armv6m/KeccakP-1600-times2-on1.c} +13 -7
- data/ext/digest/kangarootwelve/{KeccakP-1600-times4-SnP.h → keccak/armv6m/KeccakP-1600-times4-SnP.h} +10 -10
- data/ext/digest/kangarootwelve/{KeccakP-1600-times4-on1.c → keccak/armv6m/KeccakP-1600-times4-on1.c} +13 -7
- data/ext/digest/kangarootwelve/{KeccakP-1600-times8-SnP.h → keccak/armv6m/KeccakP-1600-times8-SnP.h} +10 -10
- data/ext/digest/kangarootwelve/{KeccakP-1600-times8-on1.c → keccak/armv6m/KeccakP-1600-times8-on1.c} +13 -7
- data/ext/digest/kangarootwelve/keccak/armv6m/KeccakP-1600-u2-32bi-armv6m-le-gcc.s +1334 -0
- data/ext/digest/kangarootwelve/keccak/armv6m/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/{PlSnP-Fallback.inc → keccak/armv6m/PlSnP-Fallback.inc} +11 -7
- data/ext/digest/kangarootwelve/keccak/armv6m/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-SnP.h +37 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-armv7a-le-neon-gcc.s +826 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-inplace-pl2-armv7a-neon-le-gcc.s +1245 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times2-SnP.h +38 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times4-on2.c +38 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times8-on2.c +38 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-SnP.h +36 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-inplace-32bi-armv7m-le-gcc.s +1170 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-SnP.h +28 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-armv8a-neon.s +537 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-SnP.h +37 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-x86-64-gas.s +1190 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-SnP.h +37 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-x86-64-shld-gas.s +1190 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-SnP.h +37 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-avr8-fast.s +1116 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/avr8/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/avr8/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-SnP.h +39 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-XOP-config.h +6 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-XOP.c +473 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times2-SIMD128.c +954 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times2-SnP.h +47 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times4-on2.c +38 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times8-on2.c +38 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-unrolling.macros +302 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/SIMD128-config.h +9 -0
- data/ext/digest/kangarootwelve/{SnP-Relaned.h → keccak/bulldozer/SnP-Relaned.h} +13 -7
- data/ext/digest/kangarootwelve/keccak/bulldozer/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/{KangarooTwelve.c → keccak/common/KangarooTwelve.c} +6 -10
- data/ext/digest/kangarootwelve/{KangarooTwelve.h → keccak/common/KangarooTwelve.h} +3 -7
- data/ext/digest/kangarootwelve/keccak/common/KeccakDuplex-common.h +37 -0
- data/ext/digest/kangarootwelve/keccak/common/KeccakDuplex.inc +192 -0
- data/ext/digest/kangarootwelve/keccak/common/KeccakDuplexWidth1600.c +34 -0
- data/ext/digest/kangarootwelve/keccak/common/KeccakDuplexWidth1600.h +25 -0
- data/ext/digest/kangarootwelve/{KeccakSponge-common.h → keccak/common/KeccakSponge-common.h} +5 -7
- data/ext/digest/kangarootwelve/{KeccakSponge.inc → keccak/common/KeccakSponge.inc} +6 -8
- data/ext/digest/kangarootwelve/{KeccakSpongeWidth1600.c → keccak/common/KeccakSpongeWidth1600.c} +6 -8
- data/ext/digest/kangarootwelve/{KeccakSpongeWidth1600.h → keccak/common/KeccakSpongeWidth1600.h} +5 -7
- data/ext/digest/kangarootwelve/{Phases.h → keccak/common/Phases.h} +3 -7
- data/ext/digest/kangarootwelve/{align.h → keccak/common/align.h} +5 -7
- data/ext/digest/kangarootwelve/{brg_endian.h → keccak/common/brg_endian.h} +0 -0
- data/ext/digest/kangarootwelve/keccak/compact/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/compact/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/{KeccakP-1600-SnP.h → keccak/compact/KeccakP-1600-SnP.h} +7 -10
- data/ext/digest/kangarootwelve/{KeccakP-1600-compact64.c → keccak/compact/KeccakP-1600-compact64.c} +11 -7
- data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/compact/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/compact/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/compact/SnP-Relaned.h +140 -0
- data/ext/digest/kangarootwelve/keccak/compact/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-SnP.h +38 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-inplace32BI.c +1162 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic32/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/generic32/SnP-Relaned.h +140 -0
- data/ext/digest/kangarootwelve/keccak/generic32/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-SnP.h +38 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-inplace32BI.c +1162 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/SnP-Relaned.h +140 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-64.macros +2195 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-SnP.h +49 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-opt64-config.h +6 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-opt64.c +541 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-unrolling.macros +302 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic64/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/generic64/SnP-Relaned.h +140 -0
- data/ext/digest/kangarootwelve/keccak/generic64/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-64.macros +2195 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-SnP.h +49 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-opt64-config.h +7 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-opt64.c +541 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-unrolling.macros +302 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/SnP-Relaned.h +140 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-AVX2.s +993 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-SnP.h +41 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times2-SIMD128.c +954 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times2-SnP.h +47 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times4-SIMD256.c +1303 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times4-SnP.h +53 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times8-on4.c +38 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-unrolling.macros +302 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/haswell/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/haswell/SIMD128-config.h +8 -0
- data/ext/digest/kangarootwelve/keccak/haswell/SIMD256-config.h +7 -0
- data/ext/digest/kangarootwelve/keccak/haswell/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-64.macros +2195 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-SnP.h +49 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-opt64-config.h +7 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-opt64.c +541 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times2-SIMD128.c +954 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times2-SnP.h +47 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times4-on2.c +38 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times8-on2.c +38 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-unrolling.macros +302 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/SIMD128-config.h +8 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/SnP-Relaned.h +140 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/reference/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-SnP.h +41 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-reference.c +424 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-reference.h +20 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/reference/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/reference/displayIntermediateValues.c +176 -0
- data/ext/digest/kangarootwelve/keccak/reference/displayIntermediateValues.h +29 -0
- data/ext/digest/kangarootwelve/keccak/reference/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-SnP.h +41 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-reference.h +20 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-reference32BI.c +612 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/displayIntermediateValues.c +176 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/displayIntermediateValues.h +29 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-64.macros +2195 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-SnP.h +49 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-opt64-config.h +8 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-opt64.c +541 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times2-SIMD128.c +954 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times2-SnP.h +47 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times4-on2.c +38 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times8-on2.c +38 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-unrolling.macros +302 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/SIMD128-config.h +8 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/SnP-Relaned.h +140 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-AVX512-config.h +6 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-AVX512.c +621 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-SnP.h +42 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times2-SIMD512.c +852 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times2-SnP.h +49 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times4-SIMD512.c +883 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times4-SnP.h +49 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times8-SIMD512.c +1473 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times8-SnP.h +53 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/SIMD512-2-config.h +7 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/SIMD512-4-config.h +7 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/SIMD512-config.h +7 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/ext.link.c +1 -0
- data/lib/digest/kangarootwelve/version.rb +1 -1
- metadata +299 -21
@@ -0,0 +1 @@
|
|
1
|
+
#include "../common/KeccakSpongeWidth1600.c"
|
@@ -0,0 +1,287 @@
|
|
1
|
+
/*
|
2
|
+
Implementation by Gilles Van Assche, hereby denoted as "the implementer".
|
3
|
+
|
4
|
+
For more information, feedback or questions, please refer to our website:
|
5
|
+
https://keccak.team/
|
6
|
+
|
7
|
+
To the extent possible under law, the implementer has waived all copyright
|
8
|
+
and related or neighboring rights to the source code in this file.
|
9
|
+
http://creativecommons.org/publicdomain/zero/1.0/
|
10
|
+
|
11
|
+
---
|
12
|
+
|
13
|
+
This file contains macros that help make a PlSnP-compatible implementation by
|
14
|
+
serially falling back on a SnP-compatible implementation or on a PlSnP-compatible
|
15
|
+
implementation of lower parallism degree.
|
16
|
+
|
17
|
+
Please refer to PlSnP-documentation.h for more details.
|
18
|
+
*/
|
19
|
+
|
20
|
+
/* expect PlSnP_baseParallelism, PlSnP_targetParallelism */
|
21
|
+
/* expect SnP_stateSizeInBytes, SnP_stateAlignment */
|
22
|
+
/* expect prefix */
|
23
|
+
/* expect SnP_* */
|
24
|
+
|
25
|
+
#define JOIN0(a, b) a ## b
|
26
|
+
#define JOIN(a, b) JOIN0(a, b)
|
27
|
+
|
28
|
+
#define PlSnP_StaticInitialize JOIN(prefix, _StaticInitialize)
|
29
|
+
#define PlSnP_InitializeAll JOIN(prefix, _InitializeAll)
|
30
|
+
#define PlSnP_AddByte JOIN(prefix, _AddByte)
|
31
|
+
#define PlSnP_AddBytes JOIN(prefix, _AddBytes)
|
32
|
+
#define PlSnP_AddLanesAll JOIN(prefix, _AddLanesAll)
|
33
|
+
#define PlSnP_OverwriteBytes JOIN(prefix, _OverwriteBytes)
|
34
|
+
#define PlSnP_OverwriteLanesAll JOIN(prefix, _OverwriteLanesAll)
|
35
|
+
#define PlSnP_OverwriteWithZeroes JOIN(prefix, _OverwriteWithZeroes)
|
36
|
+
#define PlSnP_ExtractBytes JOIN(prefix, _ExtractBytes)
|
37
|
+
#define PlSnP_ExtractLanesAll JOIN(prefix, _ExtractLanesAll)
|
38
|
+
#define PlSnP_ExtractAndAddBytes JOIN(prefix, _ExtractAndAddBytes)
|
39
|
+
#define PlSnP_ExtractAndAddLanesAll JOIN(prefix, _ExtractAndAddLanesAll)
|
40
|
+
|
41
|
+
#if (PlSnP_baseParallelism == 1)
|
42
|
+
#define SnP_stateSizeInBytes JOIN(SnP, _stateSizeInBytes)
|
43
|
+
#define SnP_stateAlignment JOIN(SnP, _stateAlignment)
|
44
|
+
#else
|
45
|
+
#define SnP_stateSizeInBytes JOIN(SnP, _statesSizeInBytes)
|
46
|
+
#define SnP_stateAlignment JOIN(SnP, _statesAlignment)
|
47
|
+
#endif
|
48
|
+
#define PlSnP_factor ((PlSnP_targetParallelism)/(PlSnP_baseParallelism))
|
49
|
+
#define SnP_stateOffset (((SnP_stateSizeInBytes+(SnP_stateAlignment-1))/SnP_stateAlignment)*SnP_stateAlignment)
|
50
|
+
#define stateWithIndex(i) ((unsigned char *)states+((i)*SnP_stateOffset))
|
51
|
+
|
52
|
+
#define SnP_StaticInitialize JOIN(SnP, _StaticInitialize)
|
53
|
+
#define SnP_Initialize JOIN(SnP, _Initialize)
|
54
|
+
#define SnP_InitializeAll JOIN(SnP, _InitializeAll)
|
55
|
+
#define SnP_AddByte JOIN(SnP, _AddByte)
|
56
|
+
#define SnP_AddBytes JOIN(SnP, _AddBytes)
|
57
|
+
#define SnP_AddLanesAll JOIN(SnP, _AddLanesAll)
|
58
|
+
#define SnP_OverwriteBytes JOIN(SnP, _OverwriteBytes)
|
59
|
+
#define SnP_OverwriteLanesAll JOIN(SnP, _OverwriteLanesAll)
|
60
|
+
#define SnP_OverwriteWithZeroes JOIN(SnP, _OverwriteWithZeroes)
|
61
|
+
#define SnP_ExtractBytes JOIN(SnP, _ExtractBytes)
|
62
|
+
#define SnP_ExtractLanesAll JOIN(SnP, _ExtractLanesAll)
|
63
|
+
#define SnP_ExtractAndAddBytes JOIN(SnP, _ExtractAndAddBytes)
|
64
|
+
#define SnP_ExtractAndAddLanesAll JOIN(SnP, _ExtractAndAddLanesAll)
|
65
|
+
|
66
|
+
void PlSnP_StaticInitialize( void )
|
67
|
+
{
|
68
|
+
SnP_StaticInitialize();
|
69
|
+
}
|
70
|
+
|
71
|
+
void PlSnP_InitializeAll(void *states)
|
72
|
+
{
|
73
|
+
unsigned int i;
|
74
|
+
|
75
|
+
for(i=0; i<PlSnP_factor; i++)
|
76
|
+
#if (PlSnP_baseParallelism == 1)
|
77
|
+
SnP_Initialize(stateWithIndex(i));
|
78
|
+
#else
|
79
|
+
SnP_InitializeAll(stateWithIndex(i));
|
80
|
+
#endif
|
81
|
+
}
|
82
|
+
|
83
|
+
void PlSnP_AddByte(void *states, unsigned int instanceIndex, unsigned char byte, unsigned int offset)
|
84
|
+
{
|
85
|
+
#if (PlSnP_baseParallelism == 1)
|
86
|
+
SnP_AddByte(stateWithIndex(instanceIndex), byte, offset);
|
87
|
+
#else
|
88
|
+
SnP_AddByte(stateWithIndex(instanceIndex/PlSnP_baseParallelism), instanceIndex%PlSnP_baseParallelism, byte, offset);
|
89
|
+
#endif
|
90
|
+
}
|
91
|
+
|
92
|
+
void PlSnP_AddBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length)
|
93
|
+
{
|
94
|
+
#if (PlSnP_baseParallelism == 1)
|
95
|
+
SnP_AddBytes(stateWithIndex(instanceIndex), data, offset, length);
|
96
|
+
#else
|
97
|
+
SnP_AddBytes(stateWithIndex(instanceIndex/PlSnP_baseParallelism), instanceIndex%PlSnP_baseParallelism, data, offset, length);
|
98
|
+
#endif
|
99
|
+
}
|
100
|
+
|
101
|
+
void PlSnP_AddLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
|
102
|
+
{
|
103
|
+
unsigned int i;
|
104
|
+
|
105
|
+
for(i=0; i<PlSnP_factor; i++) {
|
106
|
+
#if (PlSnP_baseParallelism == 1)
|
107
|
+
SnP_AddBytes(stateWithIndex(i), data, 0, laneCount*SnP_laneLengthInBytes);
|
108
|
+
#else
|
109
|
+
SnP_AddLanesAll(stateWithIndex(i), data, laneCount, laneOffset);
|
110
|
+
#endif
|
111
|
+
data += PlSnP_baseParallelism*laneOffset*SnP_laneLengthInBytes;
|
112
|
+
}
|
113
|
+
}
|
114
|
+
|
115
|
+
void PlSnP_OverwriteBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length)
|
116
|
+
{
|
117
|
+
#if (PlSnP_baseParallelism == 1)
|
118
|
+
SnP_OverwriteBytes(stateWithIndex(instanceIndex), data, offset, length);
|
119
|
+
#else
|
120
|
+
SnP_OverwriteBytes(stateWithIndex(instanceIndex/PlSnP_baseParallelism), instanceIndex%PlSnP_baseParallelism, data, offset, length);
|
121
|
+
#endif
|
122
|
+
}
|
123
|
+
|
124
|
+
void PlSnP_OverwriteLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
|
125
|
+
{
|
126
|
+
unsigned int i;
|
127
|
+
|
128
|
+
for(i=0; i<PlSnP_factor; i++) {
|
129
|
+
#if (PlSnP_baseParallelism == 1)
|
130
|
+
SnP_OverwriteBytes(stateWithIndex(i), data, 0, laneCount*SnP_laneLengthInBytes);
|
131
|
+
#else
|
132
|
+
SnP_OverwriteLanesAll(stateWithIndex(i), data, laneCount, laneOffset);
|
133
|
+
#endif
|
134
|
+
data += PlSnP_baseParallelism*laneOffset*SnP_laneLengthInBytes;
|
135
|
+
}
|
136
|
+
}
|
137
|
+
|
138
|
+
void PlSnP_OverwriteWithZeroes(void *states, unsigned int instanceIndex, unsigned int byteCount)
|
139
|
+
{
|
140
|
+
#if (PlSnP_baseParallelism == 1)
|
141
|
+
SnP_OverwriteWithZeroes(stateWithIndex(instanceIndex), byteCount);
|
142
|
+
#else
|
143
|
+
SnP_OverwriteWithZeroes(stateWithIndex(instanceIndex/PlSnP_baseParallelism), instanceIndex%PlSnP_baseParallelism, byteCount);
|
144
|
+
#endif
|
145
|
+
}
|
146
|
+
|
147
|
+
void PlSnP_PermuteAll(void *states)
|
148
|
+
{
|
149
|
+
unsigned int i;
|
150
|
+
|
151
|
+
for(i=0; i<PlSnP_factor; i++) {
|
152
|
+
#if (PlSnP_baseParallelism == 1)
|
153
|
+
SnP_Permute(stateWithIndex(i));
|
154
|
+
#else
|
155
|
+
SnP_PermuteAll(stateWithIndex(i));
|
156
|
+
#endif
|
157
|
+
}
|
158
|
+
}
|
159
|
+
|
160
|
+
#if (defined(SnP_Permute_12rounds) || defined(SnP_PermuteAll_12rounds))
|
161
|
+
void PlSnP_PermuteAll_12rounds(void *states)
|
162
|
+
{
|
163
|
+
unsigned int i;
|
164
|
+
|
165
|
+
for(i=0; i<PlSnP_factor; i++) {
|
166
|
+
#if (PlSnP_baseParallelism == 1)
|
167
|
+
SnP_Permute_12rounds(stateWithIndex(i));
|
168
|
+
#else
|
169
|
+
SnP_PermuteAll_12rounds(stateWithIndex(i));
|
170
|
+
#endif
|
171
|
+
}
|
172
|
+
}
|
173
|
+
#endif
|
174
|
+
|
175
|
+
#if (defined(SnP_Permute_Nrounds) || defined(SnP_PermuteAll_6rounds))
|
176
|
+
void PlSnP_PermuteAll_6rounds(void *states)
|
177
|
+
{
|
178
|
+
unsigned int i;
|
179
|
+
|
180
|
+
for(i=0; i<PlSnP_factor; i++) {
|
181
|
+
#if (PlSnP_baseParallelism == 1)
|
182
|
+
SnP_Permute_Nrounds(stateWithIndex(i), 6);
|
183
|
+
#else
|
184
|
+
SnP_PermuteAll_6rounds(stateWithIndex(i));
|
185
|
+
#endif
|
186
|
+
}
|
187
|
+
}
|
188
|
+
#endif
|
189
|
+
|
190
|
+
#if (defined(SnP_Permute_Nrounds) || defined(SnP_PermuteAll_4rounds))
|
191
|
+
void PlSnP_PermuteAll_4rounds(void *states)
|
192
|
+
{
|
193
|
+
unsigned int i;
|
194
|
+
|
195
|
+
for(i=0; i<PlSnP_factor; i++) {
|
196
|
+
#if (PlSnP_baseParallelism == 1)
|
197
|
+
SnP_Permute_Nrounds(stateWithIndex(i), 4);
|
198
|
+
#else
|
199
|
+
SnP_PermuteAll_4rounds(stateWithIndex(i));
|
200
|
+
#endif
|
201
|
+
}
|
202
|
+
}
|
203
|
+
#endif
|
204
|
+
|
205
|
+
void PlSnP_ExtractBytes(void *states, unsigned int instanceIndex, unsigned char *data, unsigned int offset, unsigned int length)
|
206
|
+
{
|
207
|
+
#if (PlSnP_baseParallelism == 1)
|
208
|
+
SnP_ExtractBytes(stateWithIndex(instanceIndex), data, offset, length);
|
209
|
+
#else
|
210
|
+
SnP_ExtractBytes(stateWithIndex(instanceIndex/PlSnP_baseParallelism), instanceIndex%PlSnP_baseParallelism, data, offset, length);
|
211
|
+
#endif
|
212
|
+
}
|
213
|
+
|
214
|
+
void PlSnP_ExtractLanesAll(const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
|
215
|
+
{
|
216
|
+
unsigned int i;
|
217
|
+
|
218
|
+
for(i=0; i<PlSnP_factor; i++) {
|
219
|
+
#if (PlSnP_baseParallelism == 1)
|
220
|
+
SnP_ExtractBytes(stateWithIndex(i), data, 0, laneCount*SnP_laneLengthInBytes);
|
221
|
+
#else
|
222
|
+
SnP_ExtractLanesAll(stateWithIndex(i), data, laneCount, laneOffset);
|
223
|
+
#endif
|
224
|
+
data += laneOffset*SnP_laneLengthInBytes*PlSnP_baseParallelism;
|
225
|
+
}
|
226
|
+
}
|
227
|
+
|
228
|
+
void PlSnP_ExtractAndAddBytes(void *states, unsigned int instanceIndex, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
|
229
|
+
{
|
230
|
+
#if (PlSnP_baseParallelism == 1)
|
231
|
+
SnP_ExtractAndAddBytes(stateWithIndex(instanceIndex), input, output, offset, length);
|
232
|
+
#else
|
233
|
+
SnP_ExtractAndAddBytes(stateWithIndex(instanceIndex/PlSnP_baseParallelism), instanceIndex%PlSnP_baseParallelism, input, output, offset, length);
|
234
|
+
#endif
|
235
|
+
}
|
236
|
+
|
237
|
+
void PlSnP_ExtractAndAddLanesAll(const void *states, const unsigned char *input, unsigned char *output, unsigned int laneCount, unsigned int laneOffset)
|
238
|
+
{
|
239
|
+
unsigned int i;
|
240
|
+
|
241
|
+
for(i=0; i<PlSnP_factor; i++) {
|
242
|
+
#if (PlSnP_baseParallelism == 1)
|
243
|
+
SnP_ExtractAndAddBytes(stateWithIndex(i), input, output, 0, laneCount*SnP_laneLengthInBytes);
|
244
|
+
#else
|
245
|
+
SnP_ExtractAndAddLanesAll(stateWithIndex(i), input, output, laneCount, laneOffset);
|
246
|
+
#endif
|
247
|
+
input += laneOffset*SnP_laneLengthInBytes*PlSnP_baseParallelism;
|
248
|
+
output += laneOffset*SnP_laneLengthInBytes*PlSnP_baseParallelism;
|
249
|
+
}
|
250
|
+
}
|
251
|
+
|
252
|
+
#undef PlSnP_factor
|
253
|
+
#undef SnP_stateOffset
|
254
|
+
#undef stateWithIndex
|
255
|
+
#undef JOIN0
|
256
|
+
#undef JOIN
|
257
|
+
#undef PlSnP_StaticInitialize
|
258
|
+
#undef PlSnP_InitializeAll
|
259
|
+
#undef PlSnP_AddByte
|
260
|
+
#undef PlSnP_AddBytes
|
261
|
+
#undef PlSnP_AddLanesAll
|
262
|
+
#undef PlSnP_OverwriteBytes
|
263
|
+
#undef PlSnP_OverwriteLanesAll
|
264
|
+
#undef PlSnP_OverwriteWithZeroes
|
265
|
+
#undef PlSnP_PermuteAll
|
266
|
+
#undef PlSnP_ExtractBytes
|
267
|
+
#undef PlSnP_ExtractLanesAll
|
268
|
+
#undef PlSnP_ExtractAndAddBytes
|
269
|
+
#undef PlSnP_ExtractAndAddLanesAll
|
270
|
+
#undef SnP_stateAlignment
|
271
|
+
#undef SnP_stateSizeInBytes
|
272
|
+
#undef PlSnP_factor
|
273
|
+
#undef SnP_stateOffset
|
274
|
+
#undef stateWithIndex
|
275
|
+
#undef SnP_StaticInitialize
|
276
|
+
#undef SnP_Initialize
|
277
|
+
#undef SnP_InitializeAll
|
278
|
+
#undef SnP_AddByte
|
279
|
+
#undef SnP_AddBytes
|
280
|
+
#undef SnP_AddLanesAll
|
281
|
+
#undef SnP_OverwriteBytes
|
282
|
+
#undef SnP_OverwriteWithZeroes
|
283
|
+
#undef SnP_OverwriteLanesAll
|
284
|
+
#undef SnP_ExtractBytes
|
285
|
+
#undef SnP_ExtractLanesAll
|
286
|
+
#undef SnP_ExtractAndAddBytes
|
287
|
+
#undef SnP_ExtractAndAddLanesAll
|
@@ -0,0 +1,140 @@
|
|
1
|
+
/*
|
2
|
+
Implementation by the Keccak Team, namely, Guido Bertoni, Joan Daemen,
|
3
|
+
Michaël Peeters, Gilles Van Assche and Ronny Van Keer,
|
4
|
+
hereby denoted as "the implementer".
|
5
|
+
|
6
|
+
For more information, feedback or questions, please refer to our website:
|
7
|
+
https://keccak.team/
|
8
|
+
|
9
|
+
To the extent possible under law, the implementer has waived all copyright
|
10
|
+
and related or neighboring rights to the source code in this file.
|
11
|
+
http://creativecommons.org/publicdomain/zero/1.0/
|
12
|
+
|
13
|
+
---
|
14
|
+
|
15
|
+
This file contains macros that help implement a permutation in a SnP-compatible way.
|
16
|
+
It converts an implementation that implement state input/output functions
|
17
|
+
in a lane-oriented fashion (i.e., using SnP_AddLanes() and SnP_AddBytesInLane,
|
18
|
+
and similarly for Overwite, Extract and ExtractAndAdd) to the byte-oriented SnP.
|
19
|
+
Please refer to SnP-documentation.h for more details.
|
20
|
+
*/
|
21
|
+
|
22
|
+
#ifndef _SnP_Relaned_h_
|
23
|
+
#define _SnP_Relaned_h_
|
24
|
+
|
25
|
+
#define SnP_AddBytes(state, data, offset, length, SnP_AddLanes, SnP_AddBytesInLane, SnP_laneLengthInBytes) \
|
26
|
+
{ \
|
27
|
+
if ((offset) == 0) { \
|
28
|
+
SnP_AddLanes(state, data, (length)/SnP_laneLengthInBytes); \
|
29
|
+
SnP_AddBytesInLane(state, \
|
30
|
+
(length)/SnP_laneLengthInBytes, \
|
31
|
+
(data)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \
|
32
|
+
0, \
|
33
|
+
(length)%SnP_laneLengthInBytes); \
|
34
|
+
} \
|
35
|
+
else { \
|
36
|
+
unsigned int _sizeLeft = (length); \
|
37
|
+
unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \
|
38
|
+
unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \
|
39
|
+
const unsigned char *_curData = (data); \
|
40
|
+
while(_sizeLeft > 0) { \
|
41
|
+
unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \
|
42
|
+
if (_bytesInLane > _sizeLeft) \
|
43
|
+
_bytesInLane = _sizeLeft; \
|
44
|
+
SnP_AddBytesInLane(state, _lanePosition, _curData, _offsetInLane, _bytesInLane); \
|
45
|
+
_sizeLeft -= _bytesInLane; \
|
46
|
+
_lanePosition++; \
|
47
|
+
_offsetInLane = 0; \
|
48
|
+
_curData += _bytesInLane; \
|
49
|
+
} \
|
50
|
+
} \
|
51
|
+
}
|
52
|
+
|
53
|
+
#define SnP_OverwriteBytes(state, data, offset, length, SnP_OverwriteLanes, SnP_OverwriteBytesInLane, SnP_laneLengthInBytes) \
|
54
|
+
{ \
|
55
|
+
if ((offset) == 0) { \
|
56
|
+
SnP_OverwriteLanes(state, data, (length)/SnP_laneLengthInBytes); \
|
57
|
+
SnP_OverwriteBytesInLane(state, \
|
58
|
+
(length)/SnP_laneLengthInBytes, \
|
59
|
+
(data)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \
|
60
|
+
0, \
|
61
|
+
(length)%SnP_laneLengthInBytes); \
|
62
|
+
} \
|
63
|
+
else { \
|
64
|
+
unsigned int _sizeLeft = (length); \
|
65
|
+
unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \
|
66
|
+
unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \
|
67
|
+
const unsigned char *_curData = (data); \
|
68
|
+
while(_sizeLeft > 0) { \
|
69
|
+
unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \
|
70
|
+
if (_bytesInLane > _sizeLeft) \
|
71
|
+
_bytesInLane = _sizeLeft; \
|
72
|
+
SnP_OverwriteBytesInLane(state, _lanePosition, _curData, _offsetInLane, _bytesInLane); \
|
73
|
+
_sizeLeft -= _bytesInLane; \
|
74
|
+
_lanePosition++; \
|
75
|
+
_offsetInLane = 0; \
|
76
|
+
_curData += _bytesInLane; \
|
77
|
+
} \
|
78
|
+
} \
|
79
|
+
}
|
80
|
+
|
81
|
+
#define SnP_ExtractBytes(state, data, offset, length, SnP_ExtractLanes, SnP_ExtractBytesInLane, SnP_laneLengthInBytes) \
|
82
|
+
{ \
|
83
|
+
if ((offset) == 0) { \
|
84
|
+
SnP_ExtractLanes(state, data, (length)/SnP_laneLengthInBytes); \
|
85
|
+
SnP_ExtractBytesInLane(state, \
|
86
|
+
(length)/SnP_laneLengthInBytes, \
|
87
|
+
(data)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \
|
88
|
+
0, \
|
89
|
+
(length)%SnP_laneLengthInBytes); \
|
90
|
+
} \
|
91
|
+
else { \
|
92
|
+
unsigned int _sizeLeft = (length); \
|
93
|
+
unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \
|
94
|
+
unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \
|
95
|
+
unsigned char *_curData = (data); \
|
96
|
+
while(_sizeLeft > 0) { \
|
97
|
+
unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \
|
98
|
+
if (_bytesInLane > _sizeLeft) \
|
99
|
+
_bytesInLane = _sizeLeft; \
|
100
|
+
SnP_ExtractBytesInLane(state, _lanePosition, _curData, _offsetInLane, _bytesInLane); \
|
101
|
+
_sizeLeft -= _bytesInLane; \
|
102
|
+
_lanePosition++; \
|
103
|
+
_offsetInLane = 0; \
|
104
|
+
_curData += _bytesInLane; \
|
105
|
+
} \
|
106
|
+
} \
|
107
|
+
}
|
108
|
+
|
109
|
+
#define SnP_ExtractAndAddBytes(state, input, output, offset, length, SnP_ExtractAndAddLanes, SnP_ExtractAndAddBytesInLane, SnP_laneLengthInBytes) \
|
110
|
+
{ \
|
111
|
+
if ((offset) == 0) { \
|
112
|
+
SnP_ExtractAndAddLanes(state, input, output, (length)/SnP_laneLengthInBytes); \
|
113
|
+
SnP_ExtractAndAddBytesInLane(state, \
|
114
|
+
(length)/SnP_laneLengthInBytes, \
|
115
|
+
(input)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \
|
116
|
+
(output)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \
|
117
|
+
0, \
|
118
|
+
(length)%SnP_laneLengthInBytes); \
|
119
|
+
} \
|
120
|
+
else { \
|
121
|
+
unsigned int _sizeLeft = (length); \
|
122
|
+
unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \
|
123
|
+
unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \
|
124
|
+
const unsigned char *_curInput = (input); \
|
125
|
+
unsigned char *_curOutput = (output); \
|
126
|
+
while(_sizeLeft > 0) { \
|
127
|
+
unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \
|
128
|
+
if (_bytesInLane > _sizeLeft) \
|
129
|
+
_bytesInLane = _sizeLeft; \
|
130
|
+
SnP_ExtractAndAddBytesInLane(state, _lanePosition, _curInput, _curOutput, _offsetInLane, _bytesInLane); \
|
131
|
+
_sizeLeft -= _bytesInLane; \
|
132
|
+
_lanePosition++; \
|
133
|
+
_offsetInLane = 0; \
|
134
|
+
_curInput += _bytesInLane; \
|
135
|
+
_curOutput += _bytesInLane; \
|
136
|
+
} \
|
137
|
+
} \
|
138
|
+
}
|
139
|
+
|
140
|
+
#endif
|
@@ -0,0 +1 @@
|
|
1
|
+
#include "../../ext.c"
|
@@ -0,0 +1 @@
|
|
1
|
+
#include "../common/KangarooTwelve.c"
|
@@ -0,0 +1 @@
|
|
1
|
+
#include "../common/KeccakDuplexWidth1600.c"
|
@@ -0,0 +1,993 @@
|
|
1
|
+
# Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org>
|
2
|
+
# Copyright (c) 2017 Ronny Van Keer
|
3
|
+
# All rights reserved.
|
4
|
+
#
|
5
|
+
# The source code in this file is licensed under the CRYPTOGAMS license.
|
6
|
+
# For further details see http://www.openssl.org/~appro/cryptogams/.
|
7
|
+
#
|
8
|
+
# Notes:
|
9
|
+
# The code for the permutation (__KeccakF1600) was generated with
|
10
|
+
# Andy Polyakov's keccak1600-avx2.pl from the CRYPTOGAMS project
|
11
|
+
# (https://github.com/dot-asm/cryptogams/blob/master/x86_64/keccak1600-avx2.pl).
|
12
|
+
# The rest of the code was written by Ronny Van Keer.
|
13
|
+
|
14
|
+
.text
|
15
|
+
|
16
|
+
# -----------------------------------------------------------------------------
|
17
|
+
#
|
18
|
+
# void KeccakP1600_Initialize(void *state);
|
19
|
+
#
|
20
|
+
.globl KeccakP1600_Initialize
|
21
|
+
.type KeccakP1600_Initialize,@function
|
22
|
+
.align 32
|
23
|
+
KeccakP1600_Initialize:
|
24
|
+
vpxor %ymm0,%ymm0,%ymm0
|
25
|
+
vmovdqa %ymm0,0*32(%rdi)
|
26
|
+
vmovdqa %ymm0,1*32(%rdi)
|
27
|
+
vmovdqa %ymm0,2*32(%rdi)
|
28
|
+
vmovdqa %ymm0,3*32(%rdi)
|
29
|
+
vmovdqa %ymm0,4*32(%rdi)
|
30
|
+
vmovdqa %ymm0,5*32(%rdi)
|
31
|
+
movq $0,6*32(%rdi)
|
32
|
+
ret
|
33
|
+
.size KeccakP1600_Initialize,.-KeccakP1600_Initialize
|
34
|
+
|
35
|
+
# -----------------------------------------------------------------------------
|
36
|
+
#
|
37
|
+
# void KeccakP1600_AddByte(void *state, unsigned char data, unsigned int offset);
|
38
|
+
# %rdi %rsi %rdx
|
39
|
+
#
|
40
|
+
.globl KeccakP1600_AddByte
|
41
|
+
.type KeccakP1600_AddByte,@function
|
42
|
+
.align 32
|
43
|
+
KeccakP1600_AddByte:
|
44
|
+
mov %rdx, %rax
|
45
|
+
and $7, %rax
|
46
|
+
and $0xFFFFFFF8, %edx
|
47
|
+
mov mapState(%rdx), %rdx
|
48
|
+
add %rdx, %rdi
|
49
|
+
add %rax, %rdi
|
50
|
+
xorb %sil, (%rdi)
|
51
|
+
ret
|
52
|
+
.size KeccakP1600_AddByte,.-KeccakP1600_AddByte
|
53
|
+
|
54
|
+
# -----------------------------------------------------------------------------
|
55
|
+
#
|
56
|
+
# void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
|
57
|
+
# %rdi %rsi %rdx %rcx
|
58
|
+
#
|
59
|
+
.globl KeccakP1600_AddBytes
|
60
|
+
.type KeccakP1600_AddBytes,@function
|
61
|
+
.align 32
|
62
|
+
KeccakP1600_AddBytes:
|
63
|
+
cmp $0, %rcx
|
64
|
+
jz KeccakP1600_AddBytes_Exit
|
65
|
+
mov %rdx, %rax # rax offset in lane
|
66
|
+
and $0xFFFFFFF8, %edx # rdx pointer into state index mapper
|
67
|
+
lea mapState(%rdx), %rdx
|
68
|
+
and $7, %rax
|
69
|
+
jz KeccakP1600_AddBytes_LaneAlignedCheck
|
70
|
+
mov $8, %r9 # r9 is (max) length of incomplete lane
|
71
|
+
sub %rax, %r9
|
72
|
+
cmp %rcx, %r9
|
73
|
+
cmovae %rcx, %r9
|
74
|
+
sub %r9, %rcx # length -= length of incomplete lane
|
75
|
+
add (%rdx), %rax # rax = pointer to state lane
|
76
|
+
add $8, %rdx
|
77
|
+
add %rdi, %rax
|
78
|
+
KeccakP1600_AddBytes_NotAlignedLoop:
|
79
|
+
mov (%rsi), %r8b
|
80
|
+
inc %rsi
|
81
|
+
xorb %r8b, (%rax)
|
82
|
+
inc %rax
|
83
|
+
dec %r9
|
84
|
+
jnz KeccakP1600_AddBytes_NotAlignedLoop
|
85
|
+
jmp KeccakP1600_AddBytes_LaneAlignedCheck
|
86
|
+
KeccakP1600_AddBytes_LaneAlignedLoop:
|
87
|
+
mov (%rsi), %r8
|
88
|
+
add $8, %rsi
|
89
|
+
mov (%rdx), %rax
|
90
|
+
add $8, %rdx
|
91
|
+
add %rdi, %rax
|
92
|
+
xor %r8, (%rax)
|
93
|
+
KeccakP1600_AddBytes_LaneAlignedCheck:
|
94
|
+
sub $8, %rcx
|
95
|
+
jnc KeccakP1600_AddBytes_LaneAlignedLoop
|
96
|
+
KeccakP1600_AddBytes_LastIncompleteLane:
|
97
|
+
add $8, %rcx
|
98
|
+
jz KeccakP1600_AddBytes_Exit
|
99
|
+
mov (%rdx), %rax
|
100
|
+
add %rdi, %rax
|
101
|
+
KeccakP1600_AddBytes_LastIncompleteLaneLoop:
|
102
|
+
mov (%rsi), %r8b
|
103
|
+
inc %rsi
|
104
|
+
xor %r8b, (%rax)
|
105
|
+
inc %rax
|
106
|
+
dec %rcx
|
107
|
+
jnz KeccakP1600_AddBytes_LastIncompleteLaneLoop
|
108
|
+
KeccakP1600_AddBytes_Exit:
|
109
|
+
ret
|
110
|
+
.size KeccakP1600_AddBytes,.-KeccakP1600_AddBytes
|
111
|
+
|
112
|
+
# -----------------------------------------------------------------------------
|
113
|
+
#
|
114
|
+
# void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
|
115
|
+
# %rdi %rsi %rdx %rcx
|
116
|
+
#
|
117
|
+
.globl KeccakP1600_OverwriteBytes
|
118
|
+
.type KeccakP1600_OverwriteBytes,@function
|
119
|
+
.align 32
|
120
|
+
KeccakP1600_OverwriteBytes:
|
121
|
+
cmp $0, %rcx
|
122
|
+
jz KeccakP1600_OverwriteBytes_Exit
|
123
|
+
mov %rdx, %rax # rax offset in lane
|
124
|
+
and $0xFFFFFFF8, %edx # rdx pointer into state index mapper
|
125
|
+
lea mapState(%rdx), %rdx
|
126
|
+
and $7, %rax
|
127
|
+
jz KeccakP1600_OverwriteBytes_LaneAlignedCheck
|
128
|
+
mov $8, %r9 # r9 is (max) length of incomplete lane
|
129
|
+
sub %rax, %r9
|
130
|
+
cmp %rcx, %r9
|
131
|
+
cmovae %rcx, %r9
|
132
|
+
sub %r9, %rcx # length -= length of incomplete lane
|
133
|
+
add (%rdx), %rax # rax = pointer to state lane
|
134
|
+
add $8, %rdx
|
135
|
+
add %rdi, %rax
|
136
|
+
KeccakP1600_OverwriteBytes_NotAlignedLoop:
|
137
|
+
mov (%rsi), %r8b
|
138
|
+
inc %rsi
|
139
|
+
mov %r8b, (%rax)
|
140
|
+
inc %rax
|
141
|
+
dec %r9
|
142
|
+
jnz KeccakP1600_OverwriteBytes_NotAlignedLoop
|
143
|
+
jmp KeccakP1600_OverwriteBytes_LaneAlignedCheck
|
144
|
+
KeccakP1600_OverwriteBytes_LaneAlignedLoop:
|
145
|
+
mov (%rsi), %r8
|
146
|
+
add $8, %rsi
|
147
|
+
mov (%rdx), %rax
|
148
|
+
add $8, %rdx
|
149
|
+
add %rdi, %rax
|
150
|
+
mov %r8, (%rax)
|
151
|
+
KeccakP1600_OverwriteBytes_LaneAlignedCheck:
|
152
|
+
sub $8, %rcx
|
153
|
+
jnc KeccakP1600_OverwriteBytes_LaneAlignedLoop
|
154
|
+
KeccakP1600_OverwriteBytes_LastIncompleteLane:
|
155
|
+
add $8, %rcx
|
156
|
+
jz KeccakP1600_OverwriteBytes_Exit
|
157
|
+
mov (%rdx), %rax
|
158
|
+
add %rdi, %rax
|
159
|
+
KeccakP1600_OverwriteBytes_LastIncompleteLaneLoop:
|
160
|
+
mov (%rsi), %r8b
|
161
|
+
inc %rsi
|
162
|
+
mov %r8b, (%rax)
|
163
|
+
inc %rax
|
164
|
+
dec %rcx
|
165
|
+
jnz KeccakP1600_OverwriteBytes_LastIncompleteLaneLoop
|
166
|
+
KeccakP1600_OverwriteBytes_Exit:
|
167
|
+
ret
|
168
|
+
.size KeccakP1600_OverwriteBytes,.-KeccakP1600_OverwriteBytes
|
169
|
+
|
170
|
+
# -----------------------------------------------------------------------------
|
171
|
+
#
|
172
|
+
# void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount);
|
173
|
+
# %rdi %rsi
|
174
|
+
#
|
175
|
+
.globl KeccakP1600_OverwriteWithZeroes
|
176
|
+
.type KeccakP1600_OverwriteWithZeroes,@function
|
177
|
+
.align 32
|
178
|
+
KeccakP1600_OverwriteWithZeroes:
|
179
|
+
cmp $0, %rsi
|
180
|
+
jz KeccakP1600_OverwriteWithZeroes_Exit
|
181
|
+
lea mapState, %rdx # rdx pointer into state index mapper
|
182
|
+
jmp KeccakP1600_OverwriteWithZeroes_LaneAlignedCheck
|
183
|
+
KeccakP1600_OverwriteWithZeroes_LaneAlignedLoop:
|
184
|
+
mov (%rdx), %rax
|
185
|
+
add $8, %rdx
|
186
|
+
add %rdi, %rax
|
187
|
+
movq $0, (%rax)
|
188
|
+
KeccakP1600_OverwriteWithZeroes_LaneAlignedCheck:
|
189
|
+
sub $8, %rsi
|
190
|
+
jnc KeccakP1600_OverwriteWithZeroes_LaneAlignedLoop
|
191
|
+
KeccakP1600_OverwriteWithZeroes_LastIncompleteLane:
|
192
|
+
add $8, %rsi
|
193
|
+
jz KeccakP1600_OverwriteWithZeroes_Exit
|
194
|
+
mov (%rdx), %rax
|
195
|
+
add %rdi, %rax
|
196
|
+
KeccakP1600_OverwriteWithZeroes_LastIncompleteLaneLoop:
|
197
|
+
movb $0, (%rax)
|
198
|
+
inc %rax
|
199
|
+
dec %rsi
|
200
|
+
jnz KeccakP1600_OverwriteWithZeroes_LastIncompleteLaneLoop
|
201
|
+
KeccakP1600_OverwriteWithZeroes_Exit:
|
202
|
+
ret
|
203
|
+
.size KeccakP1600_OverwriteWithZeroes,.-KeccakP1600_OverwriteWithZeroes
|
204
|
+
|
205
|
+
# -----------------------------------------------------------------------------
|
206
|
+
#
|
207
|
+
# void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length);
|
208
|
+
# %rdi %rsi %rdx %rcx
|
209
|
+
#
|
210
|
+
.globl KeccakP1600_ExtractBytes
|
211
|
+
.type KeccakP1600_ExtractBytes,@function
|
212
|
+
.align 32
|
213
|
+
KeccakP1600_ExtractBytes:
|
214
|
+
push %rbx
|
215
|
+
cmp $0, %rcx
|
216
|
+
jz KeccakP1600_ExtractBytes_Exit
|
217
|
+
mov %rdx, %rax # rax offset in lane
|
218
|
+
and $0xFFFFFFF8, %edx # rdx pointer into state index mapper
|
219
|
+
lea mapState(%rdx), %rdx
|
220
|
+
and $7, %rax
|
221
|
+
jz KeccakP1600_ExtractBytes_LaneAlignedCheck
|
222
|
+
mov $8, %rbx # rbx is (max) length of incomplete lane
|
223
|
+
sub %rax, %rbx
|
224
|
+
cmp %rcx, %rbx
|
225
|
+
cmovae %rcx, %rbx
|
226
|
+
sub %rbx, %rcx # length -= length of incomplete lane
|
227
|
+
mov (%rdx), %r9
|
228
|
+
add $8, %rdx
|
229
|
+
add %rdi, %r9
|
230
|
+
add %rax, %r9
|
231
|
+
KeccakP1600_ExtractBytes_NotAlignedLoop:
|
232
|
+
mov (%r9), %r8b
|
233
|
+
inc %r9
|
234
|
+
mov %r8b, (%rsi)
|
235
|
+
inc %rsi
|
236
|
+
dec %rbx
|
237
|
+
jnz KeccakP1600_ExtractBytes_NotAlignedLoop
|
238
|
+
jmp KeccakP1600_ExtractBytes_LaneAlignedCheck
|
239
|
+
KeccakP1600_ExtractBytes_LaneAlignedLoop:
|
240
|
+
mov (%rdx), %rax
|
241
|
+
add $8, %rdx
|
242
|
+
add %rdi, %rax
|
243
|
+
mov (%rax), %r8
|
244
|
+
mov %r8, (%rsi)
|
245
|
+
add $8, %rsi
|
246
|
+
KeccakP1600_ExtractBytes_LaneAlignedCheck:
|
247
|
+
sub $8, %rcx
|
248
|
+
jnc KeccakP1600_ExtractBytes_LaneAlignedLoop
|
249
|
+
KeccakP1600_ExtractBytes_LastIncompleteLane:
|
250
|
+
add $8, %rcx
|
251
|
+
jz KeccakP1600_ExtractBytes_Exit
|
252
|
+
mov (%rdx), %rax
|
253
|
+
add %rdi, %rax
|
254
|
+
mov (%rax), %r8
|
255
|
+
KeccakP1600_ExtractBytes_LastIncompleteLaneLoop:
|
256
|
+
mov %r8b, (%rsi)
|
257
|
+
shr $8, %r8
|
258
|
+
inc %rsi
|
259
|
+
dec %rcx
|
260
|
+
jnz KeccakP1600_ExtractBytes_LastIncompleteLaneLoop
|
261
|
+
KeccakP1600_ExtractBytes_Exit:
|
262
|
+
pop %rbx
|
263
|
+
ret
|
264
|
+
.size KeccakP1600_ExtractBytes,.-KeccakP1600_ExtractBytes
|
265
|
+
|
266
|
+
# -----------------------------------------------------------------------------
|
267
|
+
#
|
268
|
+
# void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length);
|
269
|
+
# %rdi %rsi %rdx %rcx %r8
|
270
|
+
#
|
271
|
+
.globl KeccakP1600_ExtractAndAddBytes
|
272
|
+
.type KeccakP1600_ExtractAndAddBytes,@function
|
273
|
+
.align 32
|
274
|
+
KeccakP1600_ExtractAndAddBytes:
|
275
|
+
push %rbx
|
276
|
+
push %r10
|
277
|
+
cmp $0, %r8
|
278
|
+
jz KeccakP1600_ExtractAndAddBytes_Exit
|
279
|
+
mov %rcx, %rax # rax offset in lane
|
280
|
+
and $0xFFFFFFF8, %ecx # rcx pointer into state index mapper
|
281
|
+
lea mapState(%rcx), %rcx
|
282
|
+
and $7, %rax
|
283
|
+
jz KeccakP1600_ExtractAndAddBytes_LaneAlignedCheck
|
284
|
+
mov $8, %rbx # rbx is (max) length of incomplete lane
|
285
|
+
sub %rax, %rbx
|
286
|
+
cmp %r8, %rbx
|
287
|
+
cmovae %r8, %rbx
|
288
|
+
sub %rbx, %r8 # length -= length of incomplete lane
|
289
|
+
mov (%rcx), %r9
|
290
|
+
add $8, %rcx
|
291
|
+
add %rdi, %r9
|
292
|
+
add %rax, %r9
|
293
|
+
KeccakP1600_ExtractAndAddBytes_NotAlignedLoop:
|
294
|
+
mov (%r9), %r10b
|
295
|
+
inc %r9
|
296
|
+
xor (%rsi), %r10b
|
297
|
+
inc %rsi
|
298
|
+
mov %r10b, (%rdx)
|
299
|
+
inc %rdx
|
300
|
+
dec %rbx
|
301
|
+
jnz KeccakP1600_ExtractAndAddBytes_NotAlignedLoop
|
302
|
+
jmp KeccakP1600_ExtractAndAddBytes_LaneAlignedCheck
|
303
|
+
KeccakP1600_ExtractAndAddBytes_LaneAlignedLoop:
|
304
|
+
mov (%rcx), %rax
|
305
|
+
add $8, %rcx
|
306
|
+
add %rdi, %rax
|
307
|
+
mov (%rax), %r10
|
308
|
+
xor (%rsi), %r10
|
309
|
+
add $8, %rsi
|
310
|
+
mov %r10, (%rdx)
|
311
|
+
add $8, %rdx
|
312
|
+
KeccakP1600_ExtractAndAddBytes_LaneAlignedCheck:
|
313
|
+
sub $8, %r8
|
314
|
+
jnc KeccakP1600_ExtractAndAddBytes_LaneAlignedLoop
|
315
|
+
KeccakP1600_ExtractAndAddBytes_LastIncompleteLane:
|
316
|
+
add $8, %r8
|
317
|
+
jz KeccakP1600_ExtractAndAddBytes_Exit
|
318
|
+
mov (%rcx), %rax
|
319
|
+
add %rdi, %rax
|
320
|
+
mov (%rax), %r10
|
321
|
+
KeccakP1600_ExtractAndAddBytes_LastIncompleteLaneLoop:
|
322
|
+
xor (%rsi), %r10b
|
323
|
+
inc %rsi
|
324
|
+
mov %r10b, (%rdx)
|
325
|
+
inc %rdx
|
326
|
+
shr $8, %r10
|
327
|
+
dec %r8
|
328
|
+
jnz KeccakP1600_ExtractAndAddBytes_LastIncompleteLaneLoop
|
329
|
+
KeccakP1600_ExtractAndAddBytes_Exit:
|
330
|
+
pop %r10
|
331
|
+
pop %rbx
|
332
|
+
ret
|
333
|
+
.size KeccakP1600_ExtractAndAddBytes,.-KeccakP1600_ExtractAndAddBytes
|
334
|
+
|
335
|
+
# -----------------------------------------------------------------------------
|
336
|
+
#
|
337
|
+
# internal
|
338
|
+
#
|
339
|
+
.type __KeccakF1600,@function
|
340
|
+
.align 32
|
341
|
+
__KeccakF1600:
|
342
|
+
.Loop_avx2:
|
343
|
+
######################################### Theta
|
344
|
+
vpshufd $0b01001110,%ymm2,%ymm13
|
345
|
+
vpxor %ymm3,%ymm5,%ymm12
|
346
|
+
vpxor %ymm6,%ymm4,%ymm9
|
347
|
+
vpxor %ymm1,%ymm12,%ymm12
|
348
|
+
vpxor %ymm9,%ymm12,%ymm12 # C[1..4]
|
349
|
+
|
350
|
+
vpermq $0b10010011,%ymm12,%ymm11
|
351
|
+
vpxor %ymm2,%ymm13,%ymm13
|
352
|
+
vpermq $0b01001110,%ymm13,%ymm7
|
353
|
+
|
354
|
+
vpsrlq $63,%ymm12,%ymm8
|
355
|
+
vpaddq %ymm12,%ymm12,%ymm9
|
356
|
+
vpor %ymm9,%ymm8,%ymm8 # ROL64(C[1..4],1)
|
357
|
+
|
358
|
+
vpermq $0b00111001,%ymm8,%ymm15
|
359
|
+
vpxor %ymm11,%ymm8,%ymm14
|
360
|
+
vpermq $0b00000000,%ymm14,%ymm14 # D[0..0] = ROL64(C[1],1) ^ C[4]
|
361
|
+
|
362
|
+
vpxor %ymm0,%ymm13,%ymm13
|
363
|
+
vpxor %ymm7,%ymm13,%ymm13 # C[0..0]
|
364
|
+
|
365
|
+
vpsrlq $63,%ymm13,%ymm7
|
366
|
+
vpaddq %ymm13,%ymm13,%ymm8
|
367
|
+
vpor %ymm7,%ymm8,%ymm8 # ROL64(C[0..0],1)
|
368
|
+
|
369
|
+
vpxor %ymm14,%ymm2,%ymm2 # ^= D[0..0]
|
370
|
+
vpxor %ymm14,%ymm0,%ymm0 # ^= D[0..0]
|
371
|
+
|
372
|
+
vpblendd $0b11000000,%ymm8,%ymm15,%ymm15
|
373
|
+
vpblendd $0b00000011,%ymm13,%ymm11,%ymm11
|
374
|
+
vpxor %ymm11,%ymm15,%ymm15 # D[1..4] = ROL64(C[2..4,0),1) ^ C[0..3]
|
375
|
+
|
376
|
+
######################################### Rho + Pi + pre-Chi shuffle
|
377
|
+
vpsllvq 0*32-96(%r8),%ymm2,%ymm10
|
378
|
+
vpsrlvq 0*32-96(%r9),%ymm2,%ymm2
|
379
|
+
vpor %ymm10,%ymm2,%ymm2
|
380
|
+
|
381
|
+
vpxor %ymm15,%ymm3,%ymm3 # ^= D[1..4] from Theta
|
382
|
+
vpsllvq 2*32-96(%r8),%ymm3,%ymm11
|
383
|
+
vpsrlvq 2*32-96(%r9),%ymm3,%ymm3
|
384
|
+
vpor %ymm11,%ymm3,%ymm3
|
385
|
+
|
386
|
+
vpxor %ymm15,%ymm4,%ymm4 # ^= D[1..4] from Theta
|
387
|
+
vpsllvq 3*32-96(%r8),%ymm4,%ymm12
|
388
|
+
vpsrlvq 3*32-96(%r9),%ymm4,%ymm4
|
389
|
+
vpor %ymm12,%ymm4,%ymm4
|
390
|
+
|
391
|
+
vpxor %ymm15,%ymm5,%ymm5 # ^= D[1..4] from Theta
|
392
|
+
vpsllvq 4*32-96(%r8),%ymm5,%ymm13
|
393
|
+
vpsrlvq 4*32-96(%r9),%ymm5,%ymm5
|
394
|
+
vpor %ymm13,%ymm5,%ymm5
|
395
|
+
|
396
|
+
vpxor %ymm15,%ymm6,%ymm6 # ^= D[1..4] from Theta
|
397
|
+
vpermq $0b10001101,%ymm2,%ymm10 # %ymm2 -> future %ymm3
|
398
|
+
vpermq $0b10001101,%ymm3,%ymm11 # %ymm3 -> future %ymm4
|
399
|
+
vpsllvq 5*32-96(%r8),%ymm6,%ymm14
|
400
|
+
vpsrlvq 5*32-96(%r9),%ymm6,%ymm8
|
401
|
+
vpor %ymm14,%ymm8,%ymm8 # %ymm6 -> future %ymm1
|
402
|
+
|
403
|
+
vpxor %ymm15,%ymm1,%ymm1 # ^= D[1..4] from Theta
|
404
|
+
vpermq $0b00011011,%ymm4,%ymm12 # %ymm4 -> future %ymm5
|
405
|
+
vpermq $0b01110010,%ymm5,%ymm13 # %ymm5 -> future %ymm6
|
406
|
+
vpsllvq 1*32-96(%r8),%ymm1,%ymm15
|
407
|
+
vpsrlvq 1*32-96(%r9),%ymm1,%ymm9
|
408
|
+
vpor %ymm15,%ymm9,%ymm9 # %ymm1 -> future %ymm2
|
409
|
+
|
410
|
+
######################################### Chi
|
411
|
+
vpsrldq $8,%ymm8,%ymm14
|
412
|
+
vpandn %ymm14,%ymm8,%ymm7 # tgting [0][0] [0][0] [0][0] [0][0]
|
413
|
+
|
414
|
+
vpblendd $0b00001100,%ymm13,%ymm9,%ymm3 # [4][4] [2][0]
|
415
|
+
vpblendd $0b00001100,%ymm9,%ymm11,%ymm15 # [4][0] [2][1]
|
416
|
+
vpblendd $0b00001100,%ymm11,%ymm10,%ymm5 # [4][2] [2][4]
|
417
|
+
vpblendd $0b00001100,%ymm10,%ymm9,%ymm14 # [4][3] [2][0]
|
418
|
+
vpblendd $0b00110000,%ymm11,%ymm3,%ymm3 # [1][3] [4][4] [2][0]
|
419
|
+
vpblendd $0b00110000,%ymm12,%ymm15,%ymm15 # [1][4] [4][0] [2][1]
|
420
|
+
vpblendd $0b00110000,%ymm9,%ymm5,%ymm5 # [1][0] [4][2] [2][4]
|
421
|
+
vpblendd $0b00110000,%ymm13,%ymm14,%ymm14 # [1][1] [4][3] [2][0]
|
422
|
+
vpblendd $0b11000000,%ymm12,%ymm3,%ymm3 # [3][2] [1][3] [4][4] [2][0]
|
423
|
+
vpblendd $0b11000000,%ymm13,%ymm15,%ymm15 # [3][3] [1][4] [4][0] [2][1]
|
424
|
+
vpblendd $0b11000000,%ymm13,%ymm5,%ymm5 # [3][3] [1][0] [4][2] [2][4]
|
425
|
+
vpblendd $0b11000000,%ymm11,%ymm14,%ymm14 # [3][4] [1][1] [4][3] [2][0]
|
426
|
+
vpandn %ymm15,%ymm3,%ymm3 # tgting [3][1] [1][2] [4][3] [2][4]
|
427
|
+
vpandn %ymm14,%ymm5,%ymm5 # tgting [3][2] [1][4] [4][1] [2][3]
|
428
|
+
|
429
|
+
vpblendd $0b00001100,%ymm9,%ymm12,%ymm6 # [4][0] [2][3]
|
430
|
+
vpblendd $0b00001100,%ymm12,%ymm10,%ymm15 # [4][1] [2][4]
|
431
|
+
vpxor %ymm10,%ymm3,%ymm3
|
432
|
+
vpblendd $0b00110000,%ymm10,%ymm6,%ymm6 # [1][2] [4][0] [2][3]
|
433
|
+
vpblendd $0b00110000,%ymm11,%ymm15,%ymm15 # [1][3] [4][1] [2][4]
|
434
|
+
vpxor %ymm12,%ymm5,%ymm5
|
435
|
+
vpblendd $0b11000000,%ymm11,%ymm6,%ymm6 # [3][4] [1][2] [4][0] [2][3]
|
436
|
+
vpblendd $0b11000000,%ymm9,%ymm15,%ymm15 # [3][0] [1][3] [4][1] [2][4]
|
437
|
+
vpandn %ymm15,%ymm6,%ymm6 # tgting [3][3] [1][1] [4][4] [2][2]
|
438
|
+
vpxor %ymm13,%ymm6,%ymm6
|
439
|
+
|
440
|
+
vpermq $0b00011110,%ymm8,%ymm4 # [0][1] [0][2] [0][4] [0][3]
|
441
|
+
vpblendd $0b00110000,%ymm0,%ymm4,%ymm15 # [0][1] [0][0] [0][4] [0][3]
|
442
|
+
vpermq $0b00111001,%ymm8,%ymm1 # [0][1] [0][4] [0][3] [0][2]
|
443
|
+
vpblendd $0b11000000,%ymm0,%ymm1,%ymm1 # [0][0] [0][4] [0][3] [0][2]
|
444
|
+
vpandn %ymm15,%ymm1,%ymm1 # tgting [0][4] [0][3] [0][2] [0][1]
|
445
|
+
|
446
|
+
vpblendd $0b00001100,%ymm12,%ymm11,%ymm2 # [4][1] [2][1]
|
447
|
+
vpblendd $0b00001100,%ymm11,%ymm13,%ymm14 # [4][2] [2][2]
|
448
|
+
vpblendd $0b00110000,%ymm13,%ymm2,%ymm2 # [1][1] [4][1] [2][1]
|
449
|
+
vpblendd $0b00110000,%ymm10,%ymm14,%ymm14 # [1][2] [4][2] [2][2]
|
450
|
+
vpblendd $0b11000000,%ymm10,%ymm2,%ymm2 # [3][1] [1][1] [4][1] [2][1]
|
451
|
+
vpblendd $0b11000000,%ymm12,%ymm14,%ymm14 # [3][2] [1][2] [4][2] [2][2]
|
452
|
+
vpandn %ymm14,%ymm2,%ymm2 # tgting [3][0] [1][0] [4][0] [2][0]
|
453
|
+
vpxor %ymm9,%ymm2,%ymm2
|
454
|
+
|
455
|
+
vpermq $0b00000000,%ymm7,%ymm7 # [0][0] [0][0] [0][0] [0][0]
|
456
|
+
vpermq $0b00011011,%ymm3,%ymm3 # post-Chi shuffle
|
457
|
+
vpermq $0b10001101,%ymm5,%ymm5
|
458
|
+
vpermq $0b01110010,%ymm6,%ymm6
|
459
|
+
|
460
|
+
vpblendd $0b00001100,%ymm10,%ymm13,%ymm4 # [4][3] [2][2]
|
461
|
+
vpblendd $0b00001100,%ymm13,%ymm12,%ymm14 # [4][4] [2][3]
|
462
|
+
vpblendd $0b00110000,%ymm12,%ymm4,%ymm4 # [1][4] [4][3] [2][2]
|
463
|
+
vpblendd $0b00110000,%ymm9,%ymm14,%ymm14 # [1][0] [4][4] [2][3]
|
464
|
+
vpblendd $0b11000000,%ymm9,%ymm4,%ymm4 # [3][0] [1][4] [4][3] [2][2]
|
465
|
+
vpblendd $0b11000000,%ymm10,%ymm14,%ymm14 # [3][1] [1][0] [4][4] [2][3]
|
466
|
+
vpandn %ymm14,%ymm4,%ymm4 # tgting [3][4] [1][3] [4][2] [2][1]
|
467
|
+
|
468
|
+
vpxor %ymm7,%ymm0,%ymm0
|
469
|
+
vpxor %ymm8,%ymm1,%ymm1
|
470
|
+
vpxor %ymm11,%ymm4,%ymm4
|
471
|
+
|
472
|
+
######################################### Iota
|
473
|
+
vpxor (%r10),%ymm0,%ymm0
|
474
|
+
lea 32(%r10),%r10
|
475
|
+
|
476
|
+
dec %eax
|
477
|
+
jnz .Loop_avx2
|
478
|
+
ret
|
479
|
+
.size __KeccakF1600,.-__KeccakF1600
|
480
|
+
|
481
|
+
|
482
|
+
|
483
|
+
.globl KeccakP1600_Permute_24rounds
|
484
|
+
.type KeccakP1600_Permute_24rounds,@function
|
485
|
+
.align 32
|
486
|
+
KeccakP1600_Permute_24rounds:
|
487
|
+
lea rhotates_left+96(%rip),%r8
|
488
|
+
lea rhotates_right+96(%rip),%r9
|
489
|
+
lea iotas(%rip),%r10
|
490
|
+
mov $24,%eax
|
491
|
+
lea 96(%rdi),%rdi
|
492
|
+
vzeroupper
|
493
|
+
vpbroadcastq -96(%rdi),%ymm0 # load A[5][5]
|
494
|
+
vmovdqu 8+32*0-96(%rdi),%ymm1
|
495
|
+
vmovdqu 8+32*1-96(%rdi),%ymm2
|
496
|
+
vmovdqu 8+32*2-96(%rdi),%ymm3
|
497
|
+
vmovdqu 8+32*3-96(%rdi),%ymm4
|
498
|
+
vmovdqu 8+32*4-96(%rdi),%ymm5
|
499
|
+
vmovdqu 8+32*5-96(%rdi),%ymm6
|
500
|
+
call __KeccakF1600
|
501
|
+
vmovq %xmm0,-96(%rdi)
|
502
|
+
vmovdqu %ymm1,8+32*0-96(%rdi)
|
503
|
+
vmovdqu %ymm2,8+32*1-96(%rdi)
|
504
|
+
vmovdqu %ymm3,8+32*2-96(%rdi)
|
505
|
+
vmovdqu %ymm4,8+32*3-96(%rdi)
|
506
|
+
vmovdqu %ymm5,8+32*4-96(%rdi)
|
507
|
+
vmovdqu %ymm6,8+32*5-96(%rdi)
|
508
|
+
vzeroupper
|
509
|
+
ret
|
510
|
+
.size KeccakP1600_Permute_24rounds,.-KeccakP1600_Permute_24rounds
|
511
|
+
|
512
|
+
.globl KeccakP1600_Permute_12rounds
|
513
|
+
.type KeccakP1600_Permute_12rounds,@function
|
514
|
+
.align 32
|
515
|
+
KeccakP1600_Permute_12rounds:
|
516
|
+
lea rhotates_left+96(%rip),%r8
|
517
|
+
lea rhotates_right+96(%rip),%r9
|
518
|
+
lea iotas+12*4*8(%rip),%r10
|
519
|
+
mov $12,%eax
|
520
|
+
lea 96(%rdi),%rdi
|
521
|
+
vzeroupper
|
522
|
+
vpbroadcastq -96(%rdi),%ymm0 # load A[5][5]
|
523
|
+
vmovdqu 8+32*0-96(%rdi),%ymm1
|
524
|
+
vmovdqu 8+32*1-96(%rdi),%ymm2
|
525
|
+
vmovdqu 8+32*2-96(%rdi),%ymm3
|
526
|
+
vmovdqu 8+32*3-96(%rdi),%ymm4
|
527
|
+
vmovdqu 8+32*4-96(%rdi),%ymm5
|
528
|
+
vmovdqu 8+32*5-96(%rdi),%ymm6
|
529
|
+
call __KeccakF1600
|
530
|
+
vmovq %xmm0,-96(%rdi)
|
531
|
+
vmovdqu %ymm1,8+32*0-96(%rdi)
|
532
|
+
vmovdqu %ymm2,8+32*1-96(%rdi)
|
533
|
+
vmovdqu %ymm3,8+32*2-96(%rdi)
|
534
|
+
vmovdqu %ymm4,8+32*3-96(%rdi)
|
535
|
+
vmovdqu %ymm5,8+32*4-96(%rdi)
|
536
|
+
vmovdqu %ymm6,8+32*5-96(%rdi)
|
537
|
+
vzeroupper
|
538
|
+
ret
|
539
|
+
.size KeccakP1600_Permute_12rounds,.-KeccakP1600_Permute_12rounds
|
540
|
+
|
541
|
+
.globl KeccakP1600_Permute_Nrounds
|
542
|
+
.type KeccakP1600_Permute_Nrounds,@function
|
543
|
+
.align 32
|
544
|
+
KeccakP1600_Permute_Nrounds:
|
545
|
+
lea rhotates_left+96(%rip),%r8
|
546
|
+
lea rhotates_right+96(%rip),%r9
|
547
|
+
lea iotas+24*4*8(%rip),%r10
|
548
|
+
mov %rsi,%rax
|
549
|
+
shl $2+3,%rsi
|
550
|
+
sub %rsi, %r10
|
551
|
+
lea 96(%rdi),%rdi
|
552
|
+
vzeroupper
|
553
|
+
vpbroadcastq -96(%rdi),%ymm0 # load A[5][5]
|
554
|
+
vmovdqu 8+32*0-96(%rdi),%ymm1
|
555
|
+
vmovdqu 8+32*1-96(%rdi),%ymm2
|
556
|
+
vmovdqu 8+32*2-96(%rdi),%ymm3
|
557
|
+
vmovdqu 8+32*3-96(%rdi),%ymm4
|
558
|
+
vmovdqu 8+32*4-96(%rdi),%ymm5
|
559
|
+
vmovdqu 8+32*5-96(%rdi),%ymm6
|
560
|
+
call __KeccakF1600
|
561
|
+
vmovq %xmm0,-96(%rdi)
|
562
|
+
vmovdqu %ymm1,8+32*0-96(%rdi)
|
563
|
+
vmovdqu %ymm2,8+32*1-96(%rdi)
|
564
|
+
vmovdqu %ymm3,8+32*2-96(%rdi)
|
565
|
+
vmovdqu %ymm4,8+32*3-96(%rdi)
|
566
|
+
vmovdqu %ymm5,8+32*4-96(%rdi)
|
567
|
+
vmovdqu %ymm6,8+32*5-96(%rdi)
|
568
|
+
vzeroupper
|
569
|
+
ret
|
570
|
+
.size KeccakP1600_Permute_Nrounds,.-KeccakP1600_Permute_Nrounds
|
571
|
+
|
572
|
+
# -----------------------------------------------------------------------------
|
573
|
+
#
|
574
|
+
# size_t KeccakF1600_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen);
|
575
|
+
# %rdi %rsi %rdx %rcx
|
576
|
+
#
|
577
|
+
.globl KeccakF1600_FastLoop_Absorb
|
578
|
+
.type KeccakF1600_FastLoop_Absorb,@function
|
579
|
+
.align 32
|
580
|
+
KeccakF1600_FastLoop_Absorb:
|
581
|
+
push %rbx
|
582
|
+
push %r10
|
583
|
+
shr $3, %rcx # rcx = data length in lanes
|
584
|
+
mov %rdx, %rbx # rbx = initial data pointer
|
585
|
+
cmp %rsi, %rcx
|
586
|
+
jb KeccakF1600_FastLoop_Absorb_Exit
|
587
|
+
vzeroupper
|
588
|
+
cmp $21, %rsi
|
589
|
+
jnz KeccakF1600_FastLoop_Absorb_Not21Lanes
|
590
|
+
sub $21, %rcx
|
591
|
+
lea rhotates_left+96(%rip),%r8
|
592
|
+
lea rhotates_right+96(%rip),%r9
|
593
|
+
lea 96(%rdi),%rdi
|
594
|
+
vpbroadcastq -96(%rdi),%ymm0 # load A[5][5]
|
595
|
+
vmovdqu 8+32*0-96(%rdi),%ymm1
|
596
|
+
vmovdqu 8+32*1-96(%rdi),%ymm2
|
597
|
+
vmovdqu 8+32*2-96(%rdi),%ymm3
|
598
|
+
vmovdqu 8+32*3-96(%rdi),%ymm4
|
599
|
+
vmovdqu 8+32*4-96(%rdi),%ymm5
|
600
|
+
vmovdqu 8+32*5-96(%rdi),%ymm6
|
601
|
+
KeccakF1600_FastLoop_Absorb_Loop21Lanes:
|
602
|
+
vpbroadcastq (%rdx),%ymm7
|
603
|
+
vmovdqu 8(%rdx),%ymm8
|
604
|
+
|
605
|
+
vmovdqa map2(%rip), %xmm15
|
606
|
+
vpcmpeqq %ymm14, %ymm14, %ymm14
|
607
|
+
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm9
|
608
|
+
|
609
|
+
vmovdqa mask3_21(%rip), %ymm14
|
610
|
+
vpxor %ymm10, %ymm10, %ymm10
|
611
|
+
vmovdqa map3(%rip), %xmm15
|
612
|
+
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm10
|
613
|
+
|
614
|
+
vmovdqa mask4_21(%rip), %ymm14
|
615
|
+
vpxor %ymm11, %ymm11, %ymm11
|
616
|
+
vmovdqa map4(%rip), %xmm15
|
617
|
+
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm11
|
618
|
+
|
619
|
+
vmovdqa mask5_21(%rip), %ymm14
|
620
|
+
vpxor %ymm12, %ymm12, %ymm12
|
621
|
+
vmovdqa map5(%rip), %xmm15
|
622
|
+
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm12
|
623
|
+
|
624
|
+
vmovdqa mask6_21(%rip), %ymm14
|
625
|
+
vpxor %ymm13, %ymm13, %ymm13
|
626
|
+
vmovdqa map6(%rip), %xmm15
|
627
|
+
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm13
|
628
|
+
|
629
|
+
vpxor %ymm7,%ymm0,%ymm0
|
630
|
+
vpxor %ymm8,%ymm1,%ymm1
|
631
|
+
vpxor %ymm9,%ymm2,%ymm2
|
632
|
+
vpxor %ymm10,%ymm3,%ymm3
|
633
|
+
vpxor %ymm11,%ymm4,%ymm4
|
634
|
+
vpxor %ymm12,%ymm5,%ymm5
|
635
|
+
vpxor %ymm13,%ymm6,%ymm6
|
636
|
+
add $21*8, %rdx
|
637
|
+
lea iotas(%rip),%r10
|
638
|
+
mov $24,%eax
|
639
|
+
call __KeccakF1600
|
640
|
+
sub $21, %rcx
|
641
|
+
jnc KeccakF1600_FastLoop_Absorb_Loop21Lanes
|
642
|
+
KeccakF1600_FastLoop_Absorb_SaveAndExit:
|
643
|
+
vmovq %xmm0,-96(%rdi)
|
644
|
+
vmovdqu %ymm1,8+32*0-96(%rdi)
|
645
|
+
vmovdqu %ymm2,8+32*1-96(%rdi)
|
646
|
+
vmovdqu %ymm3,8+32*2-96(%rdi)
|
647
|
+
vmovdqu %ymm4,8+32*3-96(%rdi)
|
648
|
+
vmovdqu %ymm5,8+32*4-96(%rdi)
|
649
|
+
vmovdqu %ymm6,8+32*5-96(%rdi)
|
650
|
+
KeccakF1600_FastLoop_Absorb_Exit:
|
651
|
+
vzeroupper
|
652
|
+
mov %rdx, %rax # return number of bytes processed
|
653
|
+
sub %rbx, %rax
|
654
|
+
pop %r10
|
655
|
+
pop %rbx
|
656
|
+
ret
|
657
|
+
KeccakF1600_FastLoop_Absorb_Not21Lanes:
|
658
|
+
cmp $17, %rsi
|
659
|
+
jnz KeccakF1600_FastLoop_Absorb_Not17Lanes
|
660
|
+
sub $17, %rcx
|
661
|
+
lea rhotates_left+96(%rip),%r8
|
662
|
+
lea rhotates_right+96(%rip),%r9
|
663
|
+
lea 96(%rdi),%rdi
|
664
|
+
vpbroadcastq -96(%rdi),%ymm0 # load A[5][5]
|
665
|
+
vmovdqu 8+32*0-96(%rdi),%ymm1
|
666
|
+
vmovdqu 8+32*1-96(%rdi),%ymm2
|
667
|
+
vmovdqu 8+32*2-96(%rdi),%ymm3
|
668
|
+
vmovdqu 8+32*3-96(%rdi),%ymm4
|
669
|
+
vmovdqu 8+32*4-96(%rdi),%ymm5
|
670
|
+
vmovdqu 8+32*5-96(%rdi),%ymm6
|
671
|
+
KeccakF1600_FastLoop_Absorb_Loop17Lanes:
|
672
|
+
vpbroadcastq (%rdx),%ymm7
|
673
|
+
vmovdqu 8(%rdx),%ymm8
|
674
|
+
|
675
|
+
vmovdqa mask2_17(%rip), %ymm14
|
676
|
+
vpxor %ymm9, %ymm9, %ymm9
|
677
|
+
vmovdqa map2(%rip), %xmm15
|
678
|
+
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm9
|
679
|
+
|
680
|
+
vmovdqa mask3_17(%rip), %ymm14
|
681
|
+
vpxor %ymm10, %ymm10, %ymm10
|
682
|
+
vmovdqa map3(%rip), %xmm15
|
683
|
+
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm10
|
684
|
+
|
685
|
+
vmovdqa mask4_17(%rip), %ymm14
|
686
|
+
vpxor %ymm11, %ymm11, %ymm11
|
687
|
+
vmovdqa map4(%rip), %xmm15
|
688
|
+
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm11
|
689
|
+
|
690
|
+
vmovdqa mask5_17(%rip), %ymm14
|
691
|
+
vpxor %ymm12, %ymm12, %ymm12
|
692
|
+
vmovdqa map5(%rip), %xmm15
|
693
|
+
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm12
|
694
|
+
|
695
|
+
vmovdqa mask6_17(%rip), %ymm14
|
696
|
+
vpxor %ymm13, %ymm13, %ymm13
|
697
|
+
vmovdqa map6(%rip), %xmm15
|
698
|
+
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm13
|
699
|
+
|
700
|
+
vpxor %ymm7,%ymm0,%ymm0
|
701
|
+
vpxor %ymm8,%ymm1,%ymm1
|
702
|
+
vpxor %ymm9,%ymm2,%ymm2
|
703
|
+
vpxor %ymm10,%ymm3,%ymm3
|
704
|
+
vpxor %ymm11,%ymm4,%ymm4
|
705
|
+
vpxor %ymm12,%ymm5,%ymm5
|
706
|
+
vpxor %ymm13,%ymm6,%ymm6
|
707
|
+
add $17*8, %rdx
|
708
|
+
lea iotas(%rip),%r10
|
709
|
+
mov $24,%eax
|
710
|
+
call __KeccakF1600
|
711
|
+
sub $17, %rcx
|
712
|
+
jnc KeccakF1600_FastLoop_Absorb_Loop17Lanes
|
713
|
+
jmp KeccakF1600_FastLoop_Absorb_SaveAndExit
|
714
|
+
KeccakF1600_FastLoop_Absorb_Not17Lanes:
|
715
|
+
lea mapState(%rip), %r9
|
716
|
+
mov %rsi, %rax
|
717
|
+
KeccakF1600_FastLoop_Absorb_LanesAddLoop:
|
718
|
+
mov (%rdx), %r8
|
719
|
+
add $8, %rdx
|
720
|
+
mov (%r9), %r10
|
721
|
+
add $8, %r9
|
722
|
+
add %rdi, %r10
|
723
|
+
xor %r8, (%r10)
|
724
|
+
sub $1, %rax
|
725
|
+
jnz KeccakF1600_FastLoop_Absorb_LanesAddLoop
|
726
|
+
sub %rsi, %rcx
|
727
|
+
push %rdi
|
728
|
+
push %rsi
|
729
|
+
push %rdx
|
730
|
+
push %rcx
|
731
|
+
call KeccakP1600_Permute_24rounds
|
732
|
+
pop %rcx
|
733
|
+
pop %rdx
|
734
|
+
pop %rsi
|
735
|
+
pop %rdi
|
736
|
+
cmp %rsi, %rcx
|
737
|
+
jae KeccakF1600_FastLoop_Absorb_Not17Lanes
|
738
|
+
jmp KeccakF1600_FastLoop_Absorb_Exit
|
739
|
+
.size KeccakF1600_FastLoop_Absorb,.-KeccakF1600_FastLoop_Absorb
|
740
|
+
|
741
|
+
# -----------------------------------------------------------------------------
|
742
|
+
#
|
743
|
+
# size_t KeccakP1600_12rounds_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen);
|
744
|
+
# %rdi %rsi %rdx %rcx
|
745
|
+
#
|
746
|
+
.globl KeccakP1600_12rounds_FastLoop_Absorb
|
747
|
+
.type KeccakP1600_12rounds_FastLoop_Absorb,@function
|
748
|
+
.align 32
|
749
|
+
KeccakP1600_12rounds_FastLoop_Absorb:
|
750
|
+
push %rbx
|
751
|
+
push %r10
|
752
|
+
shr $3, %rcx # rcx = data length in lanes
|
753
|
+
mov %rdx, %rbx # rbx = initial data pointer
|
754
|
+
cmp %rsi, %rcx
|
755
|
+
jb KeccakP1600_12rounds_FastLoop_Absorb_Exit
|
756
|
+
vzeroupper
|
757
|
+
cmp $21, %rsi
|
758
|
+
jnz KeccakP1600_12rounds_FastLoop_Absorb_Not21Lanes
|
759
|
+
sub $21, %rcx
|
760
|
+
lea rhotates_left+96(%rip),%r8
|
761
|
+
lea rhotates_right+96(%rip),%r9
|
762
|
+
lea 96(%rdi),%rdi
|
763
|
+
vpbroadcastq -96(%rdi),%ymm0 # load A[5][5]
|
764
|
+
vmovdqu 8+32*0-96(%rdi),%ymm1
|
765
|
+
vmovdqu 8+32*1-96(%rdi),%ymm2
|
766
|
+
vmovdqu 8+32*2-96(%rdi),%ymm3
|
767
|
+
vmovdqu 8+32*3-96(%rdi),%ymm4
|
768
|
+
vmovdqu 8+32*4-96(%rdi),%ymm5
|
769
|
+
vmovdqu 8+32*5-96(%rdi),%ymm6
|
770
|
+
KeccakP1600_12rounds_FastLoop_Absorb_Loop21Lanes:
|
771
|
+
vpbroadcastq (%rdx),%ymm7
|
772
|
+
vmovdqu 8(%rdx),%ymm8
|
773
|
+
|
774
|
+
vmovdqa map2(%rip), %xmm15
|
775
|
+
vpcmpeqq %ymm14, %ymm14, %ymm14
|
776
|
+
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm9
|
777
|
+
|
778
|
+
vmovdqa mask3_21(%rip), %ymm14
|
779
|
+
vpxor %ymm10, %ymm10, %ymm10
|
780
|
+
vmovdqa map3(%rip), %xmm15
|
781
|
+
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm10
|
782
|
+
|
783
|
+
vmovdqa mask4_21(%rip), %ymm14
|
784
|
+
vpxor %ymm11, %ymm11, %ymm11
|
785
|
+
vmovdqa map4(%rip), %xmm15
|
786
|
+
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm11
|
787
|
+
|
788
|
+
vmovdqa mask5_21(%rip), %ymm14
|
789
|
+
vpxor %ymm12, %ymm12, %ymm12
|
790
|
+
vmovdqa map5(%rip), %xmm15
|
791
|
+
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm12
|
792
|
+
|
793
|
+
vmovdqa mask6_21(%rip), %ymm14
|
794
|
+
vpxor %ymm13, %ymm13, %ymm13
|
795
|
+
vmovdqa map6(%rip), %xmm15
|
796
|
+
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm13
|
797
|
+
|
798
|
+
vpxor %ymm7,%ymm0,%ymm0
|
799
|
+
vpxor %ymm8,%ymm1,%ymm1
|
800
|
+
vpxor %ymm9,%ymm2,%ymm2
|
801
|
+
vpxor %ymm10,%ymm3,%ymm3
|
802
|
+
vpxor %ymm11,%ymm4,%ymm4
|
803
|
+
vpxor %ymm12,%ymm5,%ymm5
|
804
|
+
vpxor %ymm13,%ymm6,%ymm6
|
805
|
+
add $21*8, %rdx
|
806
|
+
lea iotas+12*4*8(%rip),%r10
|
807
|
+
mov $12,%eax
|
808
|
+
call __KeccakF1600
|
809
|
+
sub $21, %rcx
|
810
|
+
jnc KeccakP1600_12rounds_FastLoop_Absorb_Loop21Lanes
|
811
|
+
KeccakP1600_12rounds_FastLoop_Absorb_SaveAndExit:
|
812
|
+
vmovq %xmm0,-96(%rdi)
|
813
|
+
vmovdqu %ymm1,8+32*0-96(%rdi)
|
814
|
+
vmovdqu %ymm2,8+32*1-96(%rdi)
|
815
|
+
vmovdqu %ymm3,8+32*2-96(%rdi)
|
816
|
+
vmovdqu %ymm4,8+32*3-96(%rdi)
|
817
|
+
vmovdqu %ymm5,8+32*4-96(%rdi)
|
818
|
+
vmovdqu %ymm6,8+32*5-96(%rdi)
|
819
|
+
KeccakP1600_12rounds_FastLoop_Absorb_Exit:
|
820
|
+
vzeroupper
|
821
|
+
mov %rdx, %rax # return number of bytes processed
|
822
|
+
sub %rbx, %rax
|
823
|
+
pop %r10
|
824
|
+
pop %rbx
|
825
|
+
ret
|
826
|
+
KeccakP1600_12rounds_FastLoop_Absorb_Not21Lanes:
|
827
|
+
cmp $17, %rsi
|
828
|
+
jnz KeccakP1600_12rounds_FastLoop_Absorb_Not17Lanes
|
829
|
+
sub $17, %rcx
|
830
|
+
lea rhotates_left+96(%rip),%r8
|
831
|
+
lea rhotates_right+96(%rip),%r9
|
832
|
+
lea 96(%rdi),%rdi
|
833
|
+
vpbroadcastq -96(%rdi),%ymm0 # load A[5][5]
|
834
|
+
vmovdqu 8+32*0-96(%rdi),%ymm1
|
835
|
+
vmovdqu 8+32*1-96(%rdi),%ymm2
|
836
|
+
vmovdqu 8+32*2-96(%rdi),%ymm3
|
837
|
+
vmovdqu 8+32*3-96(%rdi),%ymm4
|
838
|
+
vmovdqu 8+32*4-96(%rdi),%ymm5
|
839
|
+
vmovdqu 8+32*5-96(%rdi),%ymm6
|
840
|
+
KeccakP1600_12rounds_FastLoop_Absorb_Loop17Lanes:
|
841
|
+
vpbroadcastq (%rdx),%ymm7
|
842
|
+
vmovdqu 8(%rdx),%ymm8
|
843
|
+
|
844
|
+
vmovdqa mask2_17(%rip), %ymm14
|
845
|
+
vpxor %ymm9, %ymm9, %ymm9
|
846
|
+
vmovdqa map2(%rip), %xmm15
|
847
|
+
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm9
|
848
|
+
|
849
|
+
vmovdqa mask3_17(%rip), %ymm14
|
850
|
+
vpxor %ymm10, %ymm10, %ymm10
|
851
|
+
vmovdqa map3(%rip), %xmm15
|
852
|
+
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm10
|
853
|
+
|
854
|
+
vmovdqa mask4_17(%rip), %ymm14
|
855
|
+
vpxor %ymm11, %ymm11, %ymm11
|
856
|
+
vmovdqa map4(%rip), %xmm15
|
857
|
+
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm11
|
858
|
+
|
859
|
+
vmovdqa mask5_17(%rip), %ymm14
|
860
|
+
vpxor %ymm12, %ymm12, %ymm12
|
861
|
+
vmovdqa map5(%rip), %xmm15
|
862
|
+
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm12
|
863
|
+
|
864
|
+
vmovdqa mask6_17(%rip), %ymm14
|
865
|
+
vpxor %ymm13, %ymm13, %ymm13
|
866
|
+
vmovdqa map6(%rip), %xmm15
|
867
|
+
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm13
|
868
|
+
|
869
|
+
vpxor %ymm7,%ymm0,%ymm0
|
870
|
+
vpxor %ymm8,%ymm1,%ymm1
|
871
|
+
vpxor %ymm9,%ymm2,%ymm2
|
872
|
+
vpxor %ymm10,%ymm3,%ymm3
|
873
|
+
vpxor %ymm11,%ymm4,%ymm4
|
874
|
+
vpxor %ymm12,%ymm5,%ymm5
|
875
|
+
vpxor %ymm13,%ymm6,%ymm6
|
876
|
+
add $17*8, %rdx
|
877
|
+
lea iotas+12*4*8(%rip),%r10
|
878
|
+
mov $12,%eax
|
879
|
+
call __KeccakF1600
|
880
|
+
sub $17, %rcx
|
881
|
+
jnc KeccakP1600_12rounds_FastLoop_Absorb_Loop17Lanes
|
882
|
+
jmp KeccakP1600_12rounds_FastLoop_Absorb_SaveAndExit
|
883
|
+
KeccakP1600_12rounds_FastLoop_Absorb_Not17Lanes:
|
884
|
+
lea mapState(%rip), %r9
|
885
|
+
mov %rsi, %rax
|
886
|
+
KeccakP1600_12rounds_FastLoop_Absorb_LanesAddLoop:
|
887
|
+
mov (%rdx), %r8
|
888
|
+
add $8, %rdx
|
889
|
+
mov (%r9), %r10
|
890
|
+
add $8, %r9
|
891
|
+
add %rdi, %r10
|
892
|
+
xor %r8, (%r10)
|
893
|
+
sub $1, %rax
|
894
|
+
jnz KeccakP1600_12rounds_FastLoop_Absorb_LanesAddLoop
|
895
|
+
sub %rsi, %rcx
|
896
|
+
push %rdi
|
897
|
+
push %rsi
|
898
|
+
push %rdx
|
899
|
+
push %rcx
|
900
|
+
call KeccakP1600_Permute_12rounds
|
901
|
+
pop %rcx
|
902
|
+
pop %rdx
|
903
|
+
pop %rsi
|
904
|
+
pop %rdi
|
905
|
+
cmp %rsi, %rcx
|
906
|
+
jae KeccakP1600_12rounds_FastLoop_Absorb_Not17Lanes
|
907
|
+
jmp KeccakP1600_12rounds_FastLoop_Absorb_Exit
|
908
|
+
.size KeccakP1600_12rounds_FastLoop_Absorb,.-KeccakP1600_12rounds_FastLoop_Absorb
|
909
|
+
|
910
|
+
.equ ALLON, 0xFFFFFFFFFFFFFFFF
|
911
|
+
|
912
|
+
.align 64
|
913
|
+
rhotates_left:
|
914
|
+
.quad 3, 18, 36, 41 # [2][0] [4][0] [1][0] [3][0]
|
915
|
+
.quad 1, 62, 28, 27 # [0][1] [0][2] [0][3] [0][4]
|
916
|
+
.quad 45, 6, 56, 39 # [3][1] [1][2] [4][3] [2][4]
|
917
|
+
.quad 10, 61, 55, 8 # [2][1] [4][2] [1][3] [3][4]
|
918
|
+
.quad 2, 15, 25, 20 # [4][1] [3][2] [2][3] [1][4]
|
919
|
+
.quad 44, 43, 21, 14 # [1][1] [2][2] [3][3] [4][4]
|
920
|
+
rhotates_right:
|
921
|
+
.quad 64-3, 64-18, 64-36, 64-41
|
922
|
+
.quad 64-1, 64-62, 64-28, 64-27
|
923
|
+
.quad 64-45, 64-6, 64-56, 64-39
|
924
|
+
.quad 64-10, 64-61, 64-55, 64-8
|
925
|
+
.quad 64-2, 64-15, 64-25, 64-20
|
926
|
+
.quad 64-44, 64-43, 64-21, 64-14
|
927
|
+
iotas:
|
928
|
+
.quad 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, 0x0000000000000001
|
929
|
+
.quad 0x0000000000008082, 0x0000000000008082, 0x0000000000008082, 0x0000000000008082
|
930
|
+
.quad 0x800000000000808a, 0x800000000000808a, 0x800000000000808a, 0x800000000000808a
|
931
|
+
.quad 0x8000000080008000, 0x8000000080008000, 0x8000000080008000, 0x8000000080008000
|
932
|
+
.quad 0x000000000000808b, 0x000000000000808b, 0x000000000000808b, 0x000000000000808b
|
933
|
+
.quad 0x0000000080000001, 0x0000000080000001, 0x0000000080000001, 0x0000000080000001
|
934
|
+
.quad 0x8000000080008081, 0x8000000080008081, 0x8000000080008081, 0x8000000080008081
|
935
|
+
.quad 0x8000000000008009, 0x8000000000008009, 0x8000000000008009, 0x8000000000008009
|
936
|
+
.quad 0x000000000000008a, 0x000000000000008a, 0x000000000000008a, 0x000000000000008a
|
937
|
+
.quad 0x0000000000000088, 0x0000000000000088, 0x0000000000000088, 0x0000000000000088
|
938
|
+
.quad 0x0000000080008009, 0x0000000080008009, 0x0000000080008009, 0x0000000080008009
|
939
|
+
.quad 0x000000008000000a, 0x000000008000000a, 0x000000008000000a, 0x000000008000000a
|
940
|
+
.quad 0x000000008000808b, 0x000000008000808b, 0x000000008000808b, 0x000000008000808b
|
941
|
+
.quad 0x800000000000008b, 0x800000000000008b, 0x800000000000008b, 0x800000000000008b
|
942
|
+
.quad 0x8000000000008089, 0x8000000000008089, 0x8000000000008089, 0x8000000000008089
|
943
|
+
.quad 0x8000000000008003, 0x8000000000008003, 0x8000000000008003, 0x8000000000008003
|
944
|
+
.quad 0x8000000000008002, 0x8000000000008002, 0x8000000000008002, 0x8000000000008002
|
945
|
+
.quad 0x8000000000000080, 0x8000000000000080, 0x8000000000000080, 0x8000000000000080
|
946
|
+
.quad 0x000000000000800a, 0x000000000000800a, 0x000000000000800a, 0x000000000000800a
|
947
|
+
.quad 0x800000008000000a, 0x800000008000000a, 0x800000008000000a, 0x800000008000000a
|
948
|
+
.quad 0x8000000080008081, 0x8000000080008081, 0x8000000080008081, 0x8000000080008081
|
949
|
+
.quad 0x8000000000008080, 0x8000000000008080, 0x8000000000008080, 0x8000000000008080
|
950
|
+
.quad 0x0000000080000001, 0x0000000080000001, 0x0000000080000001, 0x0000000080000001
|
951
|
+
.quad 0x8000000080008008, 0x8000000080008008, 0x8000000080008008, 0x8000000080008008
|
952
|
+
|
953
|
+
mapState:
|
954
|
+
.quad 0*8, 1*8, 2*8, 3*8, 4*8
|
955
|
+
.quad 7*8, 21*8, 10*8, 15*8, 20*8
|
956
|
+
.quad 5*8, 13*8, 22*8, 19*8, 12*8
|
957
|
+
.quad 8*8, 9*8, 18*8, 23*8, 16*8
|
958
|
+
.quad 6*8, 17*8, 14*8, 11*8, 24*8
|
959
|
+
|
960
|
+
.align 16
|
961
|
+
map2:
|
962
|
+
.long 10*8, 20*8, 5*8, 15*8
|
963
|
+
map3:
|
964
|
+
.long 16*8, 7*8, 23*8, 14*8
|
965
|
+
map4:
|
966
|
+
.long 11*8, 22*8, 8*8, 19*8
|
967
|
+
map5:
|
968
|
+
.long 21*8, 17*8, 13*8, 9*8
|
969
|
+
map6:
|
970
|
+
.long 6*8, 12*8, 18*8, 24*8
|
971
|
+
|
972
|
+
.align 32
|
973
|
+
mask3_21:
|
974
|
+
.quad ALLON, ALLON, 0, ALLON
|
975
|
+
mask4_21:
|
976
|
+
.quad ALLON, 0, ALLON, ALLON
|
977
|
+
mask5_21:
|
978
|
+
.quad 0, ALLON, ALLON, ALLON
|
979
|
+
mask6_21:
|
980
|
+
.quad ALLON, ALLON, ALLON, 0
|
981
|
+
|
982
|
+
mask2_17:
|
983
|
+
.quad ALLON, 0, ALLON, ALLON
|
984
|
+
mask3_17:
|
985
|
+
.quad ALLON, ALLON, 0, ALLON
|
986
|
+
mask4_17:
|
987
|
+
.quad ALLON, 0, ALLON, 0
|
988
|
+
mask5_17:
|
989
|
+
.quad 0, 0, ALLON, ALLON
|
990
|
+
mask6_17:
|
991
|
+
.quad ALLON, ALLON, 0, 0
|
992
|
+
|
993
|
+
.asciz "Keccak-1600 for AVX2, CRYPTOGAMS by <appro@openssl.org>"
|