digest-kangarootwelve 0.2.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +51 -11
- data/Rakefile +2 -2
- data/digest-kangarootwelve.gemspec +322 -42
- data/ext/digest/kangarootwelve/ext.c +1 -1
- data/ext/digest/kangarootwelve/extconf.rb +13 -1
- data/ext/digest/kangarootwelve/keccak/armv6m/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv6m/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv6m/KeccakP-1600-SnP.h +36 -0
- data/ext/digest/kangarootwelve/{KeccakP-1600-times2-SnP.h → keccak/armv6m/KeccakP-1600-times2-SnP.h} +10 -10
- data/ext/digest/kangarootwelve/{KeccakP-1600-times2-on1.c → keccak/armv6m/KeccakP-1600-times2-on1.c} +13 -7
- data/ext/digest/kangarootwelve/{KeccakP-1600-times4-SnP.h → keccak/armv6m/KeccakP-1600-times4-SnP.h} +10 -10
- data/ext/digest/kangarootwelve/{KeccakP-1600-times4-on1.c → keccak/armv6m/KeccakP-1600-times4-on1.c} +13 -7
- data/ext/digest/kangarootwelve/{KeccakP-1600-times8-SnP.h → keccak/armv6m/KeccakP-1600-times8-SnP.h} +10 -10
- data/ext/digest/kangarootwelve/{KeccakP-1600-times8-on1.c → keccak/armv6m/KeccakP-1600-times8-on1.c} +13 -7
- data/ext/digest/kangarootwelve/keccak/armv6m/KeccakP-1600-u2-32bi-armv6m-le-gcc.s +1334 -0
- data/ext/digest/kangarootwelve/keccak/armv6m/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/{PlSnP-Fallback.inc → keccak/armv6m/PlSnP-Fallback.inc} +11 -7
- data/ext/digest/kangarootwelve/keccak/armv6m/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-SnP.h +37 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-armv7a-le-neon-gcc.s +826 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-inplace-pl2-armv7a-neon-le-gcc.s +1245 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times2-SnP.h +38 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times4-on2.c +38 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times8-on2.c +38 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-SnP.h +36 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-inplace-32bi-armv7m-le-gcc.s +1170 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-SnP.h +28 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-armv8a-neon.s +537 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-SnP.h +37 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-x86-64-gas.s +1190 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-SnP.h +37 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-x86-64-shld-gas.s +1190 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-SnP.h +37 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-avr8-fast.s +1116 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/avr8/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/avr8/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-SnP.h +39 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-XOP-config.h +6 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-XOP.c +473 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times2-SIMD128.c +954 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times2-SnP.h +47 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times4-on2.c +38 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times8-on2.c +38 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-unrolling.macros +302 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/SIMD128-config.h +9 -0
- data/ext/digest/kangarootwelve/{SnP-Relaned.h → keccak/bulldozer/SnP-Relaned.h} +13 -7
- data/ext/digest/kangarootwelve/keccak/bulldozer/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/{KangarooTwelve.c → keccak/common/KangarooTwelve.c} +6 -10
- data/ext/digest/kangarootwelve/{KangarooTwelve.h → keccak/common/KangarooTwelve.h} +3 -7
- data/ext/digest/kangarootwelve/keccak/common/KeccakDuplex-common.h +37 -0
- data/ext/digest/kangarootwelve/keccak/common/KeccakDuplex.inc +192 -0
- data/ext/digest/kangarootwelve/keccak/common/KeccakDuplexWidth1600.c +34 -0
- data/ext/digest/kangarootwelve/keccak/common/KeccakDuplexWidth1600.h +25 -0
- data/ext/digest/kangarootwelve/{KeccakSponge-common.h → keccak/common/KeccakSponge-common.h} +5 -7
- data/ext/digest/kangarootwelve/{KeccakSponge.inc → keccak/common/KeccakSponge.inc} +6 -8
- data/ext/digest/kangarootwelve/{KeccakSpongeWidth1600.c → keccak/common/KeccakSpongeWidth1600.c} +6 -8
- data/ext/digest/kangarootwelve/{KeccakSpongeWidth1600.h → keccak/common/KeccakSpongeWidth1600.h} +5 -7
- data/ext/digest/kangarootwelve/{Phases.h → keccak/common/Phases.h} +3 -7
- data/ext/digest/kangarootwelve/{align.h → keccak/common/align.h} +5 -7
- data/ext/digest/kangarootwelve/{brg_endian.h → keccak/common/brg_endian.h} +0 -0
- data/ext/digest/kangarootwelve/keccak/compact/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/compact/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/{KeccakP-1600-SnP.h → keccak/compact/KeccakP-1600-SnP.h} +7 -10
- data/ext/digest/kangarootwelve/{KeccakP-1600-compact64.c → keccak/compact/KeccakP-1600-compact64.c} +11 -7
- data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/compact/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/compact/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/compact/SnP-Relaned.h +140 -0
- data/ext/digest/kangarootwelve/keccak/compact/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-SnP.h +38 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-inplace32BI.c +1162 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic32/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/generic32/SnP-Relaned.h +140 -0
- data/ext/digest/kangarootwelve/keccak/generic32/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-SnP.h +38 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-inplace32BI.c +1162 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/SnP-Relaned.h +140 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-64.macros +2195 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-SnP.h +49 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-opt64-config.h +6 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-opt64.c +541 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-unrolling.macros +302 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic64/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/generic64/SnP-Relaned.h +140 -0
- data/ext/digest/kangarootwelve/keccak/generic64/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-64.macros +2195 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-SnP.h +49 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-opt64-config.h +7 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-opt64.c +541 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-unrolling.macros +302 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/SnP-Relaned.h +140 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-AVX2.s +993 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-SnP.h +41 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times2-SIMD128.c +954 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times2-SnP.h +47 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times4-SIMD256.c +1303 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times4-SnP.h +53 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times8-on4.c +38 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-unrolling.macros +302 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/haswell/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/haswell/SIMD128-config.h +8 -0
- data/ext/digest/kangarootwelve/keccak/haswell/SIMD256-config.h +7 -0
- data/ext/digest/kangarootwelve/keccak/haswell/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-64.macros +2195 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-SnP.h +49 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-opt64-config.h +7 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-opt64.c +541 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times2-SIMD128.c +954 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times2-SnP.h +47 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times4-on2.c +38 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times8-on2.c +38 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-unrolling.macros +302 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/SIMD128-config.h +8 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/SnP-Relaned.h +140 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/reference/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-SnP.h +41 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-reference.c +424 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-reference.h +20 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/reference/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/reference/displayIntermediateValues.c +176 -0
- data/ext/digest/kangarootwelve/keccak/reference/displayIntermediateValues.h +29 -0
- data/ext/digest/kangarootwelve/keccak/reference/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-SnP.h +41 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-reference.h +20 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-reference32BI.c +612 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/displayIntermediateValues.c +176 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/displayIntermediateValues.h +29 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-64.macros +2195 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-SnP.h +49 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-opt64-config.h +8 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-opt64.c +541 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times2-SIMD128.c +954 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times2-SnP.h +47 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times4-on2.c +38 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times8-on2.c +38 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-unrolling.macros +302 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/SIMD128-config.h +8 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/SnP-Relaned.h +140 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-AVX512-config.h +6 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-AVX512.c +621 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-SnP.h +42 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times2-SIMD512.c +852 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times2-SnP.h +49 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times4-SIMD512.c +883 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times4-SnP.h +49 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times8-SIMD512.c +1473 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times8-SnP.h +53 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/SIMD512-2-config.h +7 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/SIMD512-4-config.h +7 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/SIMD512-config.h +7 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/ext.link.c +1 -0
- data/lib/digest/kangarootwelve/version.rb +1 -1
- metadata +299 -21
@@ -0,0 +1,41 @@
|
|
1
|
+
/*
|
2
|
+
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
|
3
|
+
|
4
|
+
For more information, feedback or questions, please refer to our website:
|
5
|
+
https://keccak.team/
|
6
|
+
|
7
|
+
To the extent possible under law, the implementer has waived all copyright
|
8
|
+
and related or neighboring rights to the source code in this file.
|
9
|
+
http://creativecommons.org/publicdomain/zero/1.0/
|
10
|
+
|
11
|
+
---
|
12
|
+
|
13
|
+
Please refer to SnP-documentation.h for more details.
|
14
|
+
*/
|
15
|
+
|
16
|
+
#ifndef _KeccakP_1600_SnP_h_
|
17
|
+
#define _KeccakP_1600_SnP_h_
|
18
|
+
|
19
|
+
#include <stddef.h>
|
20
|
+
|
21
|
+
#define KeccakP1600_implementation "AVX2 optimized implementation"
|
22
|
+
#define KeccakP1600_stateSizeInBytes 200
|
23
|
+
#define KeccakP1600_stateAlignment 32
|
24
|
+
#define KeccakF1600_FastLoop_supported
|
25
|
+
#define KeccakP1600_12rounds_FastLoop_supported
|
26
|
+
|
27
|
+
#define KeccakP1600_StaticInitialize()
|
28
|
+
void KeccakP1600_Initialize(void *state);
|
29
|
+
void KeccakP1600_AddByte(void *state, unsigned char data, unsigned int offset);
|
30
|
+
void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
|
31
|
+
void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
|
32
|
+
void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount);
|
33
|
+
void KeccakP1600_Permute_Nrounds(void *state, unsigned int nrounds);
|
34
|
+
void KeccakP1600_Permute_12rounds(void *state);
|
35
|
+
void KeccakP1600_Permute_24rounds(void *state);
|
36
|
+
void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length);
|
37
|
+
void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length);
|
38
|
+
size_t KeccakF1600_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen);
|
39
|
+
size_t KeccakP1600_12rounds_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen);
|
40
|
+
|
41
|
+
#endif
|
@@ -0,0 +1,954 @@
|
|
1
|
+
/*
|
2
|
+
Implementation by Gilles Van Assche, hereby denoted as "the implementer".
|
3
|
+
|
4
|
+
For more information, feedback or questions, please refer to our website:
|
5
|
+
https://keccak.team/
|
6
|
+
|
7
|
+
To the extent possible under law, the implementer has waived all copyright
|
8
|
+
and related or neighboring rights to the source code in this file.
|
9
|
+
http://creativecommons.org/publicdomain/zero/1.0/
|
10
|
+
|
11
|
+
---
|
12
|
+
|
13
|
+
This file implements Keccak-p[1600]×2 in a PlSnP-compatible way.
|
14
|
+
Please refer to PlSnP-documentation.h for more details.
|
15
|
+
|
16
|
+
This implementation comes with KeccakP-1600-times2-SnP.h in the same folder.
|
17
|
+
Please refer to LowLevel.build for the exact list of other files it must be combined with.
|
18
|
+
*/
|
19
|
+
|
20
|
+
#include <stdio.h>
|
21
|
+
#include <stdlib.h>
|
22
|
+
#include <string.h>
|
23
|
+
#include <x86intrin.h>
|
24
|
+
#include "align.h"
|
25
|
+
#include "KeccakP-1600-times2-SnP.h"
|
26
|
+
#include "SIMD128-config.h"
|
27
|
+
|
28
|
+
#include "brg_endian.h"
|
29
|
+
#if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN)
|
30
|
+
#error Expecting a little-endian platform
|
31
|
+
#endif
|
32
|
+
|
33
|
+
typedef unsigned char UINT8;
|
34
|
+
typedef unsigned long long int UINT64;
|
35
|
+
typedef __m128i V128;
|
36
|
+
|
37
|
+
#define laneIndex(instanceIndex, lanePosition) ((lanePosition)*2 + instanceIndex)
|
38
|
+
|
39
|
+
#if defined(KeccakP1600times2_useSSE)
|
40
|
+
#define ANDnu128(a, b) _mm_andnot_si128(a, b)
|
41
|
+
#define CONST128(a) _mm_load_si128((const V128 *)&(a))
|
42
|
+
#define LOAD128(a) _mm_load_si128((const V128 *)&(a))
|
43
|
+
#define LOAD128u(a) _mm_loadu_si128((const V128 *)&(a))
|
44
|
+
#define LOAD6464(a, b) _mm_set_epi64((__m64)(a), (__m64)(b))
|
45
|
+
#define CONST128_64(a) _mm_set1_epi64((__m64)(a))
|
46
|
+
#if defined(KeccakP1600times2_useXOP)
|
47
|
+
#define ROL64in128(a, o) _mm_roti_epi64(a, o)
|
48
|
+
#define ROL64in128_8(a) ROL64in128(a, 8)
|
49
|
+
#define ROL64in128_56(a) ROL64in128(a, 56)
|
50
|
+
#else
|
51
|
+
#define ROL64in128(a, o) _mm_or_si128(_mm_slli_epi64(a, o), _mm_srli_epi64(a, 64-(o)))
|
52
|
+
#define ROL64in128_8(a) _mm_shuffle_epi8(a, CONST128(rho8))
|
53
|
+
#define ROL64in128_56(a) _mm_shuffle_epi8(a, CONST128(rho56))
|
54
|
+
static const UINT64 rho8[2] = {0x0605040302010007, 0x0E0D0C0B0A09080F};
|
55
|
+
static const UINT64 rho56[2] = {0x0007060504030201, 0x080F0E0D0C0B0A09};
|
56
|
+
#endif
|
57
|
+
#define STORE128(a, b) _mm_store_si128((V128 *)&(a), b)
|
58
|
+
#define STORE128u(a, b) _mm_storeu_si128((V128 *)&(a), b)
|
59
|
+
#define STORE64L(a, b) _mm_storel_pi((__m64 *)&(a), (__m128)b)
|
60
|
+
#define STORE64H(a, b) _mm_storeh_pi((__m64 *)&(a), (__m128)b)
|
61
|
+
#define XOR128(a, b) _mm_xor_si128(a, b)
|
62
|
+
#define XOReq128(a, b) a = _mm_xor_si128(a, b)
|
63
|
+
#define ZERO128() _mm_setzero_si128()
|
64
|
+
#if defined(KeccakP1600times2_useSSE2)
|
65
|
+
#define UNPACKL( a, b ) _mm_unpacklo_epi64((a), (b))
|
66
|
+
#define UNPACKH( a, b ) _mm_unpackhi_epi64((a), (b))
|
67
|
+
#endif
|
68
|
+
#endif
|
69
|
+
|
70
|
+
#define SnP_laneLengthInBytes 8
|
71
|
+
|
72
|
+
void KeccakP1600times2_InitializeAll(void *states)
|
73
|
+
{
|
74
|
+
memset(states, 0, KeccakP1600times2_statesSizeInBytes);
|
75
|
+
}
|
76
|
+
|
77
|
+
void KeccakP1600times2_AddBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length)
|
78
|
+
{
|
79
|
+
unsigned int sizeLeft = length;
|
80
|
+
unsigned int lanePosition = offset/SnP_laneLengthInBytes;
|
81
|
+
unsigned int offsetInLane = offset%SnP_laneLengthInBytes;
|
82
|
+
const unsigned char *curData = data;
|
83
|
+
UINT64 *statesAsLanes = (UINT64 *)states;
|
84
|
+
|
85
|
+
if ((sizeLeft > 0) && (offsetInLane != 0)) {
|
86
|
+
unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
|
87
|
+
UINT64 lane = 0;
|
88
|
+
if (bytesInLane > sizeLeft)
|
89
|
+
bytesInLane = sizeLeft;
|
90
|
+
memcpy((unsigned char*)&lane + offsetInLane, curData, bytesInLane);
|
91
|
+
statesAsLanes[laneIndex(instanceIndex, lanePosition)] ^= lane;
|
92
|
+
sizeLeft -= bytesInLane;
|
93
|
+
lanePosition++;
|
94
|
+
curData += bytesInLane;
|
95
|
+
}
|
96
|
+
|
97
|
+
while(sizeLeft >= SnP_laneLengthInBytes) {
|
98
|
+
UINT64 lane = *((const UINT64*)curData);
|
99
|
+
statesAsLanes[laneIndex(instanceIndex, lanePosition)] ^= lane;
|
100
|
+
sizeLeft -= SnP_laneLengthInBytes;
|
101
|
+
lanePosition++;
|
102
|
+
curData += SnP_laneLengthInBytes;
|
103
|
+
}
|
104
|
+
|
105
|
+
if (sizeLeft > 0) {
|
106
|
+
UINT64 lane = 0;
|
107
|
+
memcpy(&lane, curData, sizeLeft);
|
108
|
+
statesAsLanes[laneIndex(instanceIndex, lanePosition)] ^= lane;
|
109
|
+
}
|
110
|
+
}
|
111
|
+
|
112
|
+
void KeccakP1600times2_AddLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
|
113
|
+
{
|
114
|
+
V128 *stateAsLanes = (V128 *)states;
|
115
|
+
unsigned int i;
|
116
|
+
const UINT64 *curData0 = (const UINT64 *)data;
|
117
|
+
const UINT64 *curData1 = (const UINT64 *)(data+laneOffset*SnP_laneLengthInBytes);
|
118
|
+
#define XOR_In( argIndex ) XOReq128( stateAsLanes[argIndex], LOAD6464(curData1[argIndex], curData0[argIndex]))
|
119
|
+
if ( laneCount >= 17 ) {
|
120
|
+
XOR_In( 0 );
|
121
|
+
XOR_In( 1 );
|
122
|
+
XOR_In( 2 );
|
123
|
+
XOR_In( 3 );
|
124
|
+
XOR_In( 4 );
|
125
|
+
XOR_In( 5 );
|
126
|
+
XOR_In( 6 );
|
127
|
+
XOR_In( 7 );
|
128
|
+
XOR_In( 8 );
|
129
|
+
XOR_In( 9 );
|
130
|
+
XOR_In( 10 );
|
131
|
+
XOR_In( 11 );
|
132
|
+
XOR_In( 12 );
|
133
|
+
XOR_In( 13 );
|
134
|
+
XOR_In( 14 );
|
135
|
+
XOR_In( 15 );
|
136
|
+
XOR_In( 16 );
|
137
|
+
if ( laneCount >= 21 ) {
|
138
|
+
XOR_In( 17 );
|
139
|
+
XOR_In( 18 );
|
140
|
+
XOR_In( 19 );
|
141
|
+
XOR_In( 20 );
|
142
|
+
for(i=21; i<laneCount; i++)
|
143
|
+
XOR_In( i );
|
144
|
+
}
|
145
|
+
else {
|
146
|
+
for(i=17; i<laneCount; i++)
|
147
|
+
XOR_In( i );
|
148
|
+
}
|
149
|
+
}
|
150
|
+
else {
|
151
|
+
for(i=0; i<laneCount; i++)
|
152
|
+
XOR_In( i );
|
153
|
+
}
|
154
|
+
#undef XOR_In
|
155
|
+
}
|
156
|
+
|
157
|
+
void KeccakP1600times2_OverwriteBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length)
|
158
|
+
{
|
159
|
+
unsigned int sizeLeft = length;
|
160
|
+
unsigned int lanePosition = offset/SnP_laneLengthInBytes;
|
161
|
+
unsigned int offsetInLane = offset%SnP_laneLengthInBytes;
|
162
|
+
const unsigned char *curData = data;
|
163
|
+
UINT64 *statesAsLanes = (UINT64 *)states;
|
164
|
+
|
165
|
+
if ((sizeLeft > 0) && (offsetInLane != 0)) {
|
166
|
+
unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
|
167
|
+
if (bytesInLane > sizeLeft)
|
168
|
+
bytesInLane = sizeLeft;
|
169
|
+
memcpy( ((unsigned char *)&statesAsLanes[laneIndex(instanceIndex, lanePosition)]) + offsetInLane, curData, bytesInLane);
|
170
|
+
sizeLeft -= bytesInLane;
|
171
|
+
lanePosition++;
|
172
|
+
curData += bytesInLane;
|
173
|
+
}
|
174
|
+
|
175
|
+
while(sizeLeft >= SnP_laneLengthInBytes) {
|
176
|
+
UINT64 lane = *((const UINT64*)curData);
|
177
|
+
statesAsLanes[laneIndex(instanceIndex, lanePosition)] = lane;
|
178
|
+
sizeLeft -= SnP_laneLengthInBytes;
|
179
|
+
lanePosition++;
|
180
|
+
curData += SnP_laneLengthInBytes;
|
181
|
+
}
|
182
|
+
|
183
|
+
if (sizeLeft > 0) {
|
184
|
+
memcpy(&statesAsLanes[laneIndex(instanceIndex, lanePosition)], curData, sizeLeft);
|
185
|
+
}
|
186
|
+
}
|
187
|
+
|
188
|
+
void KeccakP1600times2_OverwriteLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
|
189
|
+
{
|
190
|
+
V128 *stateAsLanes = (V128 *)states;
|
191
|
+
unsigned int i;
|
192
|
+
const UINT64 *curData0 = (const UINT64 *)data;
|
193
|
+
const UINT64 *curData1 = (const UINT64 *)(data+laneOffset*SnP_laneLengthInBytes);
|
194
|
+
#define OverWr( argIndex ) STORE128(stateAsLanes[argIndex], LOAD6464(curData1[argIndex], curData0[argIndex]))
|
195
|
+
if ( laneCount >= 17 ) {
|
196
|
+
OverWr( 0 );
|
197
|
+
OverWr( 1 );
|
198
|
+
OverWr( 2 );
|
199
|
+
OverWr( 3 );
|
200
|
+
OverWr( 4 );
|
201
|
+
OverWr( 5 );
|
202
|
+
OverWr( 6 );
|
203
|
+
OverWr( 7 );
|
204
|
+
OverWr( 8 );
|
205
|
+
OverWr( 9 );
|
206
|
+
OverWr( 10 );
|
207
|
+
OverWr( 11 );
|
208
|
+
OverWr( 12 );
|
209
|
+
OverWr( 13 );
|
210
|
+
OverWr( 14 );
|
211
|
+
OverWr( 15 );
|
212
|
+
OverWr( 16 );
|
213
|
+
if ( laneCount >= 21 ) {
|
214
|
+
OverWr( 17 );
|
215
|
+
OverWr( 18 );
|
216
|
+
OverWr( 19 );
|
217
|
+
OverWr( 20 );
|
218
|
+
for(i=21; i<laneCount; i++)
|
219
|
+
OverWr( i );
|
220
|
+
}
|
221
|
+
else {
|
222
|
+
for(i=17; i<laneCount; i++)
|
223
|
+
OverWr( i );
|
224
|
+
}
|
225
|
+
}
|
226
|
+
else {
|
227
|
+
for(i=0; i<laneCount; i++)
|
228
|
+
OverWr( i );
|
229
|
+
}
|
230
|
+
#undef OverWr
|
231
|
+
}
|
232
|
+
|
233
|
+
void KeccakP1600times2_OverwriteWithZeroes(void *states, unsigned int instanceIndex, unsigned int byteCount)
|
234
|
+
{
|
235
|
+
unsigned int sizeLeft = byteCount;
|
236
|
+
unsigned int lanePosition = 0;
|
237
|
+
UINT64 *statesAsLanes = (UINT64 *)states;
|
238
|
+
|
239
|
+
while(sizeLeft >= SnP_laneLengthInBytes) {
|
240
|
+
statesAsLanes[laneIndex(instanceIndex, lanePosition)] = 0;
|
241
|
+
sizeLeft -= SnP_laneLengthInBytes;
|
242
|
+
lanePosition++;
|
243
|
+
}
|
244
|
+
|
245
|
+
if (sizeLeft > 0) {
|
246
|
+
memset(&statesAsLanes[laneIndex(instanceIndex, lanePosition)], 0, sizeLeft);
|
247
|
+
}
|
248
|
+
}
|
249
|
+
|
250
|
+
void KeccakP1600times2_ExtractBytes(const void *states, unsigned int instanceIndex, unsigned char *data, unsigned int offset, unsigned int length)
|
251
|
+
{
|
252
|
+
unsigned int sizeLeft = length;
|
253
|
+
unsigned int lanePosition = offset/SnP_laneLengthInBytes;
|
254
|
+
unsigned int offsetInLane = offset%SnP_laneLengthInBytes;
|
255
|
+
unsigned char *curData = data;
|
256
|
+
const UINT64 *statesAsLanes = (const UINT64 *)states;
|
257
|
+
|
258
|
+
if ((sizeLeft > 0) && (offsetInLane != 0)) {
|
259
|
+
unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
|
260
|
+
if (bytesInLane > sizeLeft)
|
261
|
+
bytesInLane = sizeLeft;
|
262
|
+
memcpy( curData, ((unsigned char *)&statesAsLanes[laneIndex(instanceIndex, lanePosition)]) + offsetInLane, bytesInLane);
|
263
|
+
sizeLeft -= bytesInLane;
|
264
|
+
lanePosition++;
|
265
|
+
curData += bytesInLane;
|
266
|
+
}
|
267
|
+
|
268
|
+
while(sizeLeft >= SnP_laneLengthInBytes) {
|
269
|
+
*(UINT64*)curData = statesAsLanes[laneIndex(instanceIndex, lanePosition)];
|
270
|
+
sizeLeft -= SnP_laneLengthInBytes;
|
271
|
+
lanePosition++;
|
272
|
+
curData += SnP_laneLengthInBytes;
|
273
|
+
}
|
274
|
+
|
275
|
+
if (sizeLeft > 0) {
|
276
|
+
memcpy( curData, &statesAsLanes[laneIndex(instanceIndex, lanePosition)], sizeLeft);
|
277
|
+
}
|
278
|
+
}
|
279
|
+
|
280
|
+
void KeccakP1600times2_ExtractLanesAll(const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
|
281
|
+
{
|
282
|
+
const V128 *stateAsLanes = (const V128 *)states;
|
283
|
+
V128 lanes;
|
284
|
+
unsigned int i;
|
285
|
+
UINT64 *curData0 = (UINT64 *)data;
|
286
|
+
UINT64 *curData1 = (UINT64 *)(data+laneOffset*SnP_laneLengthInBytes);
|
287
|
+
|
288
|
+
#define Extr( argIndex ) lanes = LOAD128( stateAsLanes[argIndex] ), \
|
289
|
+
STORE64L( curData0[argIndex], lanes ), \
|
290
|
+
STORE64H( curData1[argIndex], lanes )
|
291
|
+
|
292
|
+
#if defined(KeccakP1600times2_useSSE2)
|
293
|
+
#define Extr2( argIndex ) lanes0 = LOAD128( stateAsLanes[argIndex] ), \
|
294
|
+
lanes1 = LOAD128( stateAsLanes[(argIndex)+1] ), \
|
295
|
+
lanes = UNPACKL( lanes0, lanes1 ), \
|
296
|
+
lanes0 = UNPACKH( lanes0, lanes1 ), \
|
297
|
+
STORE128u( *(V128*)&curData0[argIndex], lanes ), \
|
298
|
+
STORE128u( *(V128*)&curData1[argIndex], lanes0 )
|
299
|
+
if ( laneCount >= 16 ) {
|
300
|
+
V128 lanes0, lanes1;
|
301
|
+
Extr2( 0 );
|
302
|
+
Extr2( 2 );
|
303
|
+
Extr2( 4 );
|
304
|
+
Extr2( 6 );
|
305
|
+
Extr2( 8 );
|
306
|
+
Extr2( 10 );
|
307
|
+
Extr2( 12 );
|
308
|
+
Extr2( 14 );
|
309
|
+
if ( laneCount >= 20 ) {
|
310
|
+
Extr2( 16 );
|
311
|
+
Extr2( 18 );
|
312
|
+
for(i=20; i<laneCount; i++)
|
313
|
+
Extr( i );
|
314
|
+
}
|
315
|
+
else {
|
316
|
+
for(i=16; i<laneCount; i++)
|
317
|
+
Extr( i );
|
318
|
+
}
|
319
|
+
}
|
320
|
+
#undef Extr2
|
321
|
+
#else
|
322
|
+
if ( laneCount >= 17 ) {
|
323
|
+
Extr( 0 );
|
324
|
+
Extr( 1 );
|
325
|
+
Extr( 2 );
|
326
|
+
Extr( 3 );
|
327
|
+
Extr( 4 );
|
328
|
+
Extr( 5 );
|
329
|
+
Extr( 6 );
|
330
|
+
Extr( 7 );
|
331
|
+
Extr( 8 );
|
332
|
+
Extr( 9 );
|
333
|
+
Extr( 10 );
|
334
|
+
Extr( 11 );
|
335
|
+
Extr( 12 );
|
336
|
+
Extr( 13 );
|
337
|
+
Extr( 14 );
|
338
|
+
Extr( 15 );
|
339
|
+
Extr( 16 );
|
340
|
+
if ( laneCount >= 21 ) {
|
341
|
+
Extr( 17 );
|
342
|
+
Extr( 18 );
|
343
|
+
Extr( 19 );
|
344
|
+
Extr( 20 );
|
345
|
+
for(i=21; i<laneCount; i++)
|
346
|
+
Extr( i );
|
347
|
+
}
|
348
|
+
else {
|
349
|
+
for(i=17; i<laneCount; i++)
|
350
|
+
Extr( i );
|
351
|
+
}
|
352
|
+
}
|
353
|
+
#endif
|
354
|
+
else {
|
355
|
+
for(i=0; i<laneCount; i++)
|
356
|
+
Extr( i );
|
357
|
+
}
|
358
|
+
#undef Extr
|
359
|
+
}
|
360
|
+
|
361
|
+
void KeccakP1600times2_ExtractAndAddBytes(const void *states, unsigned int instanceIndex, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
|
362
|
+
{
|
363
|
+
unsigned int sizeLeft = length;
|
364
|
+
unsigned int lanePosition = offset/SnP_laneLengthInBytes;
|
365
|
+
unsigned int offsetInLane = offset%SnP_laneLengthInBytes;
|
366
|
+
const unsigned char *curInput = input;
|
367
|
+
unsigned char *curOutput = output;
|
368
|
+
const UINT64 *statesAsLanes = (const UINT64 *)states;
|
369
|
+
|
370
|
+
if ((sizeLeft > 0) && (offsetInLane != 0)) {
|
371
|
+
unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
|
372
|
+
UINT64 lane = statesAsLanes[laneIndex(instanceIndex, lanePosition)] >> (8 * offsetInLane);
|
373
|
+
if (bytesInLane > sizeLeft)
|
374
|
+
bytesInLane = sizeLeft;
|
375
|
+
sizeLeft -= bytesInLane;
|
376
|
+
do {
|
377
|
+
*(curOutput++) = *(curInput++) ^ (unsigned char)lane;
|
378
|
+
lane >>= 8;
|
379
|
+
} while ( --bytesInLane != 0);
|
380
|
+
lanePosition++;
|
381
|
+
}
|
382
|
+
|
383
|
+
while(sizeLeft >= SnP_laneLengthInBytes) {
|
384
|
+
*((UINT64*)curOutput) = *((UINT64*)curInput) ^ statesAsLanes[laneIndex(instanceIndex, lanePosition)];
|
385
|
+
sizeLeft -= SnP_laneLengthInBytes;
|
386
|
+
lanePosition++;
|
387
|
+
curInput += SnP_laneLengthInBytes;
|
388
|
+
curOutput += SnP_laneLengthInBytes;
|
389
|
+
}
|
390
|
+
|
391
|
+
if (sizeLeft != 0) {
|
392
|
+
UINT64 lane = statesAsLanes[laneIndex(instanceIndex, lanePosition)];
|
393
|
+
do {
|
394
|
+
*(curOutput++) = *(curInput++) ^ (unsigned char)lane;
|
395
|
+
lane >>= 8;
|
396
|
+
} while ( --sizeLeft != 0);
|
397
|
+
}
|
398
|
+
}
|
399
|
+
|
400
|
+
void KeccakP1600times2_ExtractAndAddLanesAll(const void *states, const unsigned char *input, unsigned char *output, unsigned int laneCount, unsigned int laneOffset)
|
401
|
+
{
|
402
|
+
const UINT64 *stateAsLanes = (const UINT64 *)states;
|
403
|
+
unsigned int i;
|
404
|
+
const UINT64 *curInput0 = (UINT64 *)input;
|
405
|
+
const UINT64 *curInput1 = (UINT64 *)(input+laneOffset*SnP_laneLengthInBytes);
|
406
|
+
UINT64 *curOutput0 = (UINT64 *)output;
|
407
|
+
UINT64 *curOutput1 = (UINT64 *)(output+laneOffset*SnP_laneLengthInBytes);
|
408
|
+
|
409
|
+
#define ExtrXOR( argIndex ) curOutput0[argIndex] = curInput0[argIndex] ^ stateAsLanes[2*(argIndex)], curOutput1[argIndex] = curInput1[argIndex] ^ stateAsLanes[2*(argIndex)+1]
|
410
|
+
|
411
|
+
if ( laneCount >= 17 ) {
|
412
|
+
ExtrXOR( 0 );
|
413
|
+
ExtrXOR( 1 );
|
414
|
+
ExtrXOR( 2 );
|
415
|
+
ExtrXOR( 3 );
|
416
|
+
ExtrXOR( 4 );
|
417
|
+
ExtrXOR( 5 );
|
418
|
+
ExtrXOR( 6 );
|
419
|
+
ExtrXOR( 7 );
|
420
|
+
ExtrXOR( 8 );
|
421
|
+
ExtrXOR( 9 );
|
422
|
+
ExtrXOR( 10 );
|
423
|
+
ExtrXOR( 11 );
|
424
|
+
ExtrXOR( 12 );
|
425
|
+
ExtrXOR( 13 );
|
426
|
+
ExtrXOR( 14 );
|
427
|
+
ExtrXOR( 15 );
|
428
|
+
ExtrXOR( 16 );
|
429
|
+
if ( laneCount >= 21 ) {
|
430
|
+
ExtrXOR( 17 );
|
431
|
+
ExtrXOR( 18 );
|
432
|
+
ExtrXOR( 19 );
|
433
|
+
ExtrXOR( 20 );
|
434
|
+
for(i=21; i<laneCount; i++)
|
435
|
+
ExtrXOR( i );
|
436
|
+
}
|
437
|
+
else {
|
438
|
+
for(i=17; i<laneCount; i++)
|
439
|
+
ExtrXOR( i );
|
440
|
+
}
|
441
|
+
}
|
442
|
+
else {
|
443
|
+
for(i=0; i<laneCount; i++)
|
444
|
+
ExtrXOR( i );
|
445
|
+
}
|
446
|
+
#undef ExtrXOR
|
447
|
+
}
|
448
|
+
|
449
|
+
#define declareABCDE \
|
450
|
+
V128 Aba, Abe, Abi, Abo, Abu; \
|
451
|
+
V128 Aga, Age, Agi, Ago, Agu; \
|
452
|
+
V128 Aka, Ake, Aki, Ako, Aku; \
|
453
|
+
V128 Ama, Ame, Ami, Amo, Amu; \
|
454
|
+
V128 Asa, Ase, Asi, Aso, Asu; \
|
455
|
+
V128 Bba, Bbe, Bbi, Bbo, Bbu; \
|
456
|
+
V128 Bga, Bge, Bgi, Bgo, Bgu; \
|
457
|
+
V128 Bka, Bke, Bki, Bko, Bku; \
|
458
|
+
V128 Bma, Bme, Bmi, Bmo, Bmu; \
|
459
|
+
V128 Bsa, Bse, Bsi, Bso, Bsu; \
|
460
|
+
V128 Ca, Ce, Ci, Co, Cu; \
|
461
|
+
V128 Da, De, Di, Do, Du; \
|
462
|
+
V128 Eba, Ebe, Ebi, Ebo, Ebu; \
|
463
|
+
V128 Ega, Ege, Egi, Ego, Egu; \
|
464
|
+
V128 Eka, Eke, Eki, Eko, Eku; \
|
465
|
+
V128 Ema, Eme, Emi, Emo, Emu; \
|
466
|
+
V128 Esa, Ese, Esi, Eso, Esu; \
|
467
|
+
|
468
|
+
#define prepareTheta \
|
469
|
+
Ca = XOR128(Aba, XOR128(Aga, XOR128(Aka, XOR128(Ama, Asa)))); \
|
470
|
+
Ce = XOR128(Abe, XOR128(Age, XOR128(Ake, XOR128(Ame, Ase)))); \
|
471
|
+
Ci = XOR128(Abi, XOR128(Agi, XOR128(Aki, XOR128(Ami, Asi)))); \
|
472
|
+
Co = XOR128(Abo, XOR128(Ago, XOR128(Ako, XOR128(Amo, Aso)))); \
|
473
|
+
Cu = XOR128(Abu, XOR128(Agu, XOR128(Aku, XOR128(Amu, Asu)))); \
|
474
|
+
|
475
|
+
/* --- Theta Rho Pi Chi Iota Prepare-theta */
|
476
|
+
/* --- 64-bit lanes mapped to 64-bit words */
|
477
|
+
#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
|
478
|
+
Da = XOR128(Cu, ROL64in128(Ce, 1)); \
|
479
|
+
De = XOR128(Ca, ROL64in128(Ci, 1)); \
|
480
|
+
Di = XOR128(Ce, ROL64in128(Co, 1)); \
|
481
|
+
Do = XOR128(Ci, ROL64in128(Cu, 1)); \
|
482
|
+
Du = XOR128(Co, ROL64in128(Ca, 1)); \
|
483
|
+
\
|
484
|
+
XOReq128(A##ba, Da); \
|
485
|
+
Bba = A##ba; \
|
486
|
+
XOReq128(A##ge, De); \
|
487
|
+
Bbe = ROL64in128(A##ge, 44); \
|
488
|
+
XOReq128(A##ki, Di); \
|
489
|
+
Bbi = ROL64in128(A##ki, 43); \
|
490
|
+
E##ba = XOR128(Bba, ANDnu128(Bbe, Bbi)); \
|
491
|
+
XOReq128(E##ba, CONST128_64(KeccakF1600RoundConstants[i])); \
|
492
|
+
Ca = E##ba; \
|
493
|
+
XOReq128(A##mo, Do); \
|
494
|
+
Bbo = ROL64in128(A##mo, 21); \
|
495
|
+
E##be = XOR128(Bbe, ANDnu128(Bbi, Bbo)); \
|
496
|
+
Ce = E##be; \
|
497
|
+
XOReq128(A##su, Du); \
|
498
|
+
Bbu = ROL64in128(A##su, 14); \
|
499
|
+
E##bi = XOR128(Bbi, ANDnu128(Bbo, Bbu)); \
|
500
|
+
Ci = E##bi; \
|
501
|
+
E##bo = XOR128(Bbo, ANDnu128(Bbu, Bba)); \
|
502
|
+
Co = E##bo; \
|
503
|
+
E##bu = XOR128(Bbu, ANDnu128(Bba, Bbe)); \
|
504
|
+
Cu = E##bu; \
|
505
|
+
\
|
506
|
+
XOReq128(A##bo, Do); \
|
507
|
+
Bga = ROL64in128(A##bo, 28); \
|
508
|
+
XOReq128(A##gu, Du); \
|
509
|
+
Bge = ROL64in128(A##gu, 20); \
|
510
|
+
XOReq128(A##ka, Da); \
|
511
|
+
Bgi = ROL64in128(A##ka, 3); \
|
512
|
+
E##ga = XOR128(Bga, ANDnu128(Bge, Bgi)); \
|
513
|
+
XOReq128(Ca, E##ga); \
|
514
|
+
XOReq128(A##me, De); \
|
515
|
+
Bgo = ROL64in128(A##me, 45); \
|
516
|
+
E##ge = XOR128(Bge, ANDnu128(Bgi, Bgo)); \
|
517
|
+
XOReq128(Ce, E##ge); \
|
518
|
+
XOReq128(A##si, Di); \
|
519
|
+
Bgu = ROL64in128(A##si, 61); \
|
520
|
+
E##gi = XOR128(Bgi, ANDnu128(Bgo, Bgu)); \
|
521
|
+
XOReq128(Ci, E##gi); \
|
522
|
+
E##go = XOR128(Bgo, ANDnu128(Bgu, Bga)); \
|
523
|
+
XOReq128(Co, E##go); \
|
524
|
+
E##gu = XOR128(Bgu, ANDnu128(Bga, Bge)); \
|
525
|
+
XOReq128(Cu, E##gu); \
|
526
|
+
\
|
527
|
+
XOReq128(A##be, De); \
|
528
|
+
Bka = ROL64in128(A##be, 1); \
|
529
|
+
XOReq128(A##gi, Di); \
|
530
|
+
Bke = ROL64in128(A##gi, 6); \
|
531
|
+
XOReq128(A##ko, Do); \
|
532
|
+
Bki = ROL64in128(A##ko, 25); \
|
533
|
+
E##ka = XOR128(Bka, ANDnu128(Bke, Bki)); \
|
534
|
+
XOReq128(Ca, E##ka); \
|
535
|
+
XOReq128(A##mu, Du); \
|
536
|
+
Bko = ROL64in128_8(A##mu); \
|
537
|
+
E##ke = XOR128(Bke, ANDnu128(Bki, Bko)); \
|
538
|
+
XOReq128(Ce, E##ke); \
|
539
|
+
XOReq128(A##sa, Da); \
|
540
|
+
Bku = ROL64in128(A##sa, 18); \
|
541
|
+
E##ki = XOR128(Bki, ANDnu128(Bko, Bku)); \
|
542
|
+
XOReq128(Ci, E##ki); \
|
543
|
+
E##ko = XOR128(Bko, ANDnu128(Bku, Bka)); \
|
544
|
+
XOReq128(Co, E##ko); \
|
545
|
+
E##ku = XOR128(Bku, ANDnu128(Bka, Bke)); \
|
546
|
+
XOReq128(Cu, E##ku); \
|
547
|
+
\
|
548
|
+
XOReq128(A##bu, Du); \
|
549
|
+
Bma = ROL64in128(A##bu, 27); \
|
550
|
+
XOReq128(A##ga, Da); \
|
551
|
+
Bme = ROL64in128(A##ga, 36); \
|
552
|
+
XOReq128(A##ke, De); \
|
553
|
+
Bmi = ROL64in128(A##ke, 10); \
|
554
|
+
E##ma = XOR128(Bma, ANDnu128(Bme, Bmi)); \
|
555
|
+
XOReq128(Ca, E##ma); \
|
556
|
+
XOReq128(A##mi, Di); \
|
557
|
+
Bmo = ROL64in128(A##mi, 15); \
|
558
|
+
E##me = XOR128(Bme, ANDnu128(Bmi, Bmo)); \
|
559
|
+
XOReq128(Ce, E##me); \
|
560
|
+
XOReq128(A##so, Do); \
|
561
|
+
Bmu = ROL64in128_56(A##so); \
|
562
|
+
E##mi = XOR128(Bmi, ANDnu128(Bmo, Bmu)); \
|
563
|
+
XOReq128(Ci, E##mi); \
|
564
|
+
E##mo = XOR128(Bmo, ANDnu128(Bmu, Bma)); \
|
565
|
+
XOReq128(Co, E##mo); \
|
566
|
+
E##mu = XOR128(Bmu, ANDnu128(Bma, Bme)); \
|
567
|
+
XOReq128(Cu, E##mu); \
|
568
|
+
\
|
569
|
+
XOReq128(A##bi, Di); \
|
570
|
+
Bsa = ROL64in128(A##bi, 62); \
|
571
|
+
XOReq128(A##go, Do); \
|
572
|
+
Bse = ROL64in128(A##go, 55); \
|
573
|
+
XOReq128(A##ku, Du); \
|
574
|
+
Bsi = ROL64in128(A##ku, 39); \
|
575
|
+
E##sa = XOR128(Bsa, ANDnu128(Bse, Bsi)); \
|
576
|
+
XOReq128(Ca, E##sa); \
|
577
|
+
XOReq128(A##ma, Da); \
|
578
|
+
Bso = ROL64in128(A##ma, 41); \
|
579
|
+
E##se = XOR128(Bse, ANDnu128(Bsi, Bso)); \
|
580
|
+
XOReq128(Ce, E##se); \
|
581
|
+
XOReq128(A##se, De); \
|
582
|
+
Bsu = ROL64in128(A##se, 2); \
|
583
|
+
E##si = XOR128(Bsi, ANDnu128(Bso, Bsu)); \
|
584
|
+
XOReq128(Ci, E##si); \
|
585
|
+
E##so = XOR128(Bso, ANDnu128(Bsu, Bsa)); \
|
586
|
+
XOReq128(Co, E##so); \
|
587
|
+
E##su = XOR128(Bsu, ANDnu128(Bsa, Bse)); \
|
588
|
+
XOReq128(Cu, E##su); \
|
589
|
+
\
|
590
|
+
|
591
|
+
/* --- Theta Rho Pi Chi Iota */
|
592
|
+
/* --- 64-bit lanes mapped to 64-bit words */
|
593
|
+
#define thetaRhoPiChiIota(i, A, E) \
|
594
|
+
Da = XOR128(Cu, ROL64in128(Ce, 1)); \
|
595
|
+
De = XOR128(Ca, ROL64in128(Ci, 1)); \
|
596
|
+
Di = XOR128(Ce, ROL64in128(Co, 1)); \
|
597
|
+
Do = XOR128(Ci, ROL64in128(Cu, 1)); \
|
598
|
+
Du = XOR128(Co, ROL64in128(Ca, 1)); \
|
599
|
+
\
|
600
|
+
XOReq128(A##ba, Da); \
|
601
|
+
Bba = A##ba; \
|
602
|
+
XOReq128(A##ge, De); \
|
603
|
+
Bbe = ROL64in128(A##ge, 44); \
|
604
|
+
XOReq128(A##ki, Di); \
|
605
|
+
Bbi = ROL64in128(A##ki, 43); \
|
606
|
+
E##ba = XOR128(Bba, ANDnu128(Bbe, Bbi)); \
|
607
|
+
XOReq128(E##ba, CONST128_64(KeccakF1600RoundConstants[i])); \
|
608
|
+
XOReq128(A##mo, Do); \
|
609
|
+
Bbo = ROL64in128(A##mo, 21); \
|
610
|
+
E##be = XOR128(Bbe, ANDnu128(Bbi, Bbo)); \
|
611
|
+
XOReq128(A##su, Du); \
|
612
|
+
Bbu = ROL64in128(A##su, 14); \
|
613
|
+
E##bi = XOR128(Bbi, ANDnu128(Bbo, Bbu)); \
|
614
|
+
E##bo = XOR128(Bbo, ANDnu128(Bbu, Bba)); \
|
615
|
+
E##bu = XOR128(Bbu, ANDnu128(Bba, Bbe)); \
|
616
|
+
\
|
617
|
+
XOReq128(A##bo, Do); \
|
618
|
+
Bga = ROL64in128(A##bo, 28); \
|
619
|
+
XOReq128(A##gu, Du); \
|
620
|
+
Bge = ROL64in128(A##gu, 20); \
|
621
|
+
XOReq128(A##ka, Da); \
|
622
|
+
Bgi = ROL64in128(A##ka, 3); \
|
623
|
+
E##ga = XOR128(Bga, ANDnu128(Bge, Bgi)); \
|
624
|
+
XOReq128(A##me, De); \
|
625
|
+
Bgo = ROL64in128(A##me, 45); \
|
626
|
+
E##ge = XOR128(Bge, ANDnu128(Bgi, Bgo)); \
|
627
|
+
XOReq128(A##si, Di); \
|
628
|
+
Bgu = ROL64in128(A##si, 61); \
|
629
|
+
E##gi = XOR128(Bgi, ANDnu128(Bgo, Bgu)); \
|
630
|
+
E##go = XOR128(Bgo, ANDnu128(Bgu, Bga)); \
|
631
|
+
E##gu = XOR128(Bgu, ANDnu128(Bga, Bge)); \
|
632
|
+
\
|
633
|
+
XOReq128(A##be, De); \
|
634
|
+
Bka = ROL64in128(A##be, 1); \
|
635
|
+
XOReq128(A##gi, Di); \
|
636
|
+
Bke = ROL64in128(A##gi, 6); \
|
637
|
+
XOReq128(A##ko, Do); \
|
638
|
+
Bki = ROL64in128(A##ko, 25); \
|
639
|
+
E##ka = XOR128(Bka, ANDnu128(Bke, Bki)); \
|
640
|
+
XOReq128(A##mu, Du); \
|
641
|
+
Bko = ROL64in128_8(A##mu); \
|
642
|
+
E##ke = XOR128(Bke, ANDnu128(Bki, Bko)); \
|
643
|
+
XOReq128(A##sa, Da); \
|
644
|
+
Bku = ROL64in128(A##sa, 18); \
|
645
|
+
E##ki = XOR128(Bki, ANDnu128(Bko, Bku)); \
|
646
|
+
E##ko = XOR128(Bko, ANDnu128(Bku, Bka)); \
|
647
|
+
E##ku = XOR128(Bku, ANDnu128(Bka, Bke)); \
|
648
|
+
\
|
649
|
+
XOReq128(A##bu, Du); \
|
650
|
+
Bma = ROL64in128(A##bu, 27); \
|
651
|
+
XOReq128(A##ga, Da); \
|
652
|
+
Bme = ROL64in128(A##ga, 36); \
|
653
|
+
XOReq128(A##ke, De); \
|
654
|
+
Bmi = ROL64in128(A##ke, 10); \
|
655
|
+
E##ma = XOR128(Bma, ANDnu128(Bme, Bmi)); \
|
656
|
+
XOReq128(A##mi, Di); \
|
657
|
+
Bmo = ROL64in128(A##mi, 15); \
|
658
|
+
E##me = XOR128(Bme, ANDnu128(Bmi, Bmo)); \
|
659
|
+
XOReq128(A##so, Do); \
|
660
|
+
Bmu = ROL64in128_56(A##so); \
|
661
|
+
E##mi = XOR128(Bmi, ANDnu128(Bmo, Bmu)); \
|
662
|
+
E##mo = XOR128(Bmo, ANDnu128(Bmu, Bma)); \
|
663
|
+
E##mu = XOR128(Bmu, ANDnu128(Bma, Bme)); \
|
664
|
+
\
|
665
|
+
XOReq128(A##bi, Di); \
|
666
|
+
Bsa = ROL64in128(A##bi, 62); \
|
667
|
+
XOReq128(A##go, Do); \
|
668
|
+
Bse = ROL64in128(A##go, 55); \
|
669
|
+
XOReq128(A##ku, Du); \
|
670
|
+
Bsi = ROL64in128(A##ku, 39); \
|
671
|
+
E##sa = XOR128(Bsa, ANDnu128(Bse, Bsi)); \
|
672
|
+
XOReq128(A##ma, Da); \
|
673
|
+
Bso = ROL64in128(A##ma, 41); \
|
674
|
+
E##se = XOR128(Bse, ANDnu128(Bsi, Bso)); \
|
675
|
+
XOReq128(A##se, De); \
|
676
|
+
Bsu = ROL64in128(A##se, 2); \
|
677
|
+
E##si = XOR128(Bsi, ANDnu128(Bso, Bsu)); \
|
678
|
+
E##so = XOR128(Bso, ANDnu128(Bsu, Bsa)); \
|
679
|
+
E##su = XOR128(Bsu, ANDnu128(Bsa, Bse)); \
|
680
|
+
\
|
681
|
+
|
682
|
+
static ALIGN(KeccakP1600times2_statesAlignment) const UINT64 KeccakF1600RoundConstants[24] = {
|
683
|
+
0x0000000000000001ULL,
|
684
|
+
0x0000000000008082ULL,
|
685
|
+
0x800000000000808aULL,
|
686
|
+
0x8000000080008000ULL,
|
687
|
+
0x000000000000808bULL,
|
688
|
+
0x0000000080000001ULL,
|
689
|
+
0x8000000080008081ULL,
|
690
|
+
0x8000000000008009ULL,
|
691
|
+
0x000000000000008aULL,
|
692
|
+
0x0000000000000088ULL,
|
693
|
+
0x0000000080008009ULL,
|
694
|
+
0x000000008000000aULL,
|
695
|
+
0x000000008000808bULL,
|
696
|
+
0x800000000000008bULL,
|
697
|
+
0x8000000000008089ULL,
|
698
|
+
0x8000000000008003ULL,
|
699
|
+
0x8000000000008002ULL,
|
700
|
+
0x8000000000000080ULL,
|
701
|
+
0x000000000000800aULL,
|
702
|
+
0x800000008000000aULL,
|
703
|
+
0x8000000080008081ULL,
|
704
|
+
0x8000000000008080ULL,
|
705
|
+
0x0000000080000001ULL,
|
706
|
+
0x8000000080008008ULL};
|
707
|
+
|
708
|
+
#define copyFromState(X, state) \
|
709
|
+
X##ba = LOAD128(state[ 0]); \
|
710
|
+
X##be = LOAD128(state[ 1]); \
|
711
|
+
X##bi = LOAD128(state[ 2]); \
|
712
|
+
X##bo = LOAD128(state[ 3]); \
|
713
|
+
X##bu = LOAD128(state[ 4]); \
|
714
|
+
X##ga = LOAD128(state[ 5]); \
|
715
|
+
X##ge = LOAD128(state[ 6]); \
|
716
|
+
X##gi = LOAD128(state[ 7]); \
|
717
|
+
X##go = LOAD128(state[ 8]); \
|
718
|
+
X##gu = LOAD128(state[ 9]); \
|
719
|
+
X##ka = LOAD128(state[10]); \
|
720
|
+
X##ke = LOAD128(state[11]); \
|
721
|
+
X##ki = LOAD128(state[12]); \
|
722
|
+
X##ko = LOAD128(state[13]); \
|
723
|
+
X##ku = LOAD128(state[14]); \
|
724
|
+
X##ma = LOAD128(state[15]); \
|
725
|
+
X##me = LOAD128(state[16]); \
|
726
|
+
X##mi = LOAD128(state[17]); \
|
727
|
+
X##mo = LOAD128(state[18]); \
|
728
|
+
X##mu = LOAD128(state[19]); \
|
729
|
+
X##sa = LOAD128(state[20]); \
|
730
|
+
X##se = LOAD128(state[21]); \
|
731
|
+
X##si = LOAD128(state[22]); \
|
732
|
+
X##so = LOAD128(state[23]); \
|
733
|
+
X##su = LOAD128(state[24]); \
|
734
|
+
|
735
|
+
#define copyToState(state, X) \
|
736
|
+
STORE128(state[ 0], X##ba); \
|
737
|
+
STORE128(state[ 1], X##be); \
|
738
|
+
STORE128(state[ 2], X##bi); \
|
739
|
+
STORE128(state[ 3], X##bo); \
|
740
|
+
STORE128(state[ 4], X##bu); \
|
741
|
+
STORE128(state[ 5], X##ga); \
|
742
|
+
STORE128(state[ 6], X##ge); \
|
743
|
+
STORE128(state[ 7], X##gi); \
|
744
|
+
STORE128(state[ 8], X##go); \
|
745
|
+
STORE128(state[ 9], X##gu); \
|
746
|
+
STORE128(state[10], X##ka); \
|
747
|
+
STORE128(state[11], X##ke); \
|
748
|
+
STORE128(state[12], X##ki); \
|
749
|
+
STORE128(state[13], X##ko); \
|
750
|
+
STORE128(state[14], X##ku); \
|
751
|
+
STORE128(state[15], X##ma); \
|
752
|
+
STORE128(state[16], X##me); \
|
753
|
+
STORE128(state[17], X##mi); \
|
754
|
+
STORE128(state[18], X##mo); \
|
755
|
+
STORE128(state[19], X##mu); \
|
756
|
+
STORE128(state[20], X##sa); \
|
757
|
+
STORE128(state[21], X##se); \
|
758
|
+
STORE128(state[22], X##si); \
|
759
|
+
STORE128(state[23], X##so); \
|
760
|
+
STORE128(state[24], X##su); \
|
761
|
+
|
762
|
+
#define copyStateVariables(X, Y) \
|
763
|
+
X##ba = Y##ba; \
|
764
|
+
X##be = Y##be; \
|
765
|
+
X##bi = Y##bi; \
|
766
|
+
X##bo = Y##bo; \
|
767
|
+
X##bu = Y##bu; \
|
768
|
+
X##ga = Y##ga; \
|
769
|
+
X##ge = Y##ge; \
|
770
|
+
X##gi = Y##gi; \
|
771
|
+
X##go = Y##go; \
|
772
|
+
X##gu = Y##gu; \
|
773
|
+
X##ka = Y##ka; \
|
774
|
+
X##ke = Y##ke; \
|
775
|
+
X##ki = Y##ki; \
|
776
|
+
X##ko = Y##ko; \
|
777
|
+
X##ku = Y##ku; \
|
778
|
+
X##ma = Y##ma; \
|
779
|
+
X##me = Y##me; \
|
780
|
+
X##mi = Y##mi; \
|
781
|
+
X##mo = Y##mo; \
|
782
|
+
X##mu = Y##mu; \
|
783
|
+
X##sa = Y##sa; \
|
784
|
+
X##se = Y##se; \
|
785
|
+
X##si = Y##si; \
|
786
|
+
X##so = Y##so; \
|
787
|
+
X##su = Y##su; \
|
788
|
+
|
789
|
+
#ifdef KeccakP1600times2_fullUnrolling
|
790
|
+
#define FullUnrolling
|
791
|
+
#else
|
792
|
+
#define Unrolling KeccakP1600times2_unrolling
|
793
|
+
#endif
|
794
|
+
#include "KeccakP-1600-unrolling.macros"
|
795
|
+
|
796
|
+
void KeccakP1600times2_PermuteAll_24rounds(void *states)
|
797
|
+
{
|
798
|
+
V128 *statesAsLanes = (V128 *)states;
|
799
|
+
declareABCDE
|
800
|
+
#ifndef KeccakP1600times2_fullUnrolling
|
801
|
+
unsigned int i;
|
802
|
+
#endif
|
803
|
+
|
804
|
+
copyFromState(A, statesAsLanes)
|
805
|
+
rounds24
|
806
|
+
copyToState(statesAsLanes, A)
|
807
|
+
#if defined(UseMMX)
|
808
|
+
_mm_empty();
|
809
|
+
#endif
|
810
|
+
}
|
811
|
+
|
812
|
+
void KeccakP1600times2_PermuteAll_12rounds(void *states)
|
813
|
+
{
|
814
|
+
V128 *statesAsLanes = (V128 *)states;
|
815
|
+
declareABCDE
|
816
|
+
#ifndef KeccakP1600times2_fullUnrolling
|
817
|
+
unsigned int i;
|
818
|
+
#endif
|
819
|
+
|
820
|
+
copyFromState(A, statesAsLanes)
|
821
|
+
rounds12
|
822
|
+
copyToState(statesAsLanes, A)
|
823
|
+
#if defined(UseMMX)
|
824
|
+
_mm_empty();
|
825
|
+
#endif
|
826
|
+
}
|
827
|
+
|
828
|
+
void KeccakP1600times2_PermuteAll_6rounds(void *states)
|
829
|
+
{
|
830
|
+
V128 *statesAsLanes = (V128 *)states;
|
831
|
+
declareABCDE
|
832
|
+
#ifndef KeccakP1600times2_fullUnrolling
|
833
|
+
unsigned int i;
|
834
|
+
#endif
|
835
|
+
|
836
|
+
copyFromState(A, statesAsLanes)
|
837
|
+
rounds6
|
838
|
+
copyToState(statesAsLanes, A)
|
839
|
+
#if defined(UseMMX)
|
840
|
+
_mm_empty();
|
841
|
+
#endif
|
842
|
+
}
|
843
|
+
|
844
|
+
void KeccakP1600times2_PermuteAll_4rounds(void *states)
|
845
|
+
{
|
846
|
+
V128 *statesAsLanes = (V128 *)states;
|
847
|
+
declareABCDE
|
848
|
+
#ifndef KeccakP1600times2_fullUnrolling
|
849
|
+
unsigned int i;
|
850
|
+
#endif
|
851
|
+
|
852
|
+
copyFromState(A, statesAsLanes)
|
853
|
+
rounds4
|
854
|
+
copyToState(statesAsLanes, A)
|
855
|
+
#if defined(UseMMX)
|
856
|
+
_mm_empty();
|
857
|
+
#endif
|
858
|
+
}
|
859
|
+
|
860
|
+
size_t KeccakF1600times2_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen)
|
861
|
+
{
|
862
|
+
if (laneCount == 21) {
|
863
|
+
#if 1
|
864
|
+
unsigned int i;
|
865
|
+
const unsigned char *dataStart = data;
|
866
|
+
|
867
|
+
while(dataByteLen >= (laneOffsetParallel + laneCount)*8) {
|
868
|
+
V128 *stateAsLanes = (V128 *)states;
|
869
|
+
unsigned int i;
|
870
|
+
const UINT64 *curData0 = (const UINT64 *)data;
|
871
|
+
const UINT64 *curData1 = (const UINT64 *)(data+laneOffsetParallel*SnP_laneLengthInBytes);
|
872
|
+
#define XOR_In( argIndex ) XOReq128( stateAsLanes[argIndex], LOAD6464(curData1[argIndex], curData0[argIndex]))
|
873
|
+
XOR_In( 0 );
|
874
|
+
XOR_In( 1 );
|
875
|
+
XOR_In( 2 );
|
876
|
+
XOR_In( 3 );
|
877
|
+
XOR_In( 4 );
|
878
|
+
XOR_In( 5 );
|
879
|
+
XOR_In( 6 );
|
880
|
+
XOR_In( 7 );
|
881
|
+
XOR_In( 8 );
|
882
|
+
XOR_In( 9 );
|
883
|
+
XOR_In( 10 );
|
884
|
+
XOR_In( 11 );
|
885
|
+
XOR_In( 12 );
|
886
|
+
XOR_In( 13 );
|
887
|
+
XOR_In( 14 );
|
888
|
+
XOR_In( 15 );
|
889
|
+
XOR_In( 16 );
|
890
|
+
XOR_In( 17 );
|
891
|
+
XOR_In( 18 );
|
892
|
+
XOR_In( 19 );
|
893
|
+
XOR_In( 20 );
|
894
|
+
#undef XOR_In
|
895
|
+
KeccakP1600times2_PermuteAll_24rounds(states);
|
896
|
+
data += laneOffsetSerial*8;
|
897
|
+
dataByteLen -= laneOffsetSerial*8;
|
898
|
+
}
|
899
|
+
return data - dataStart;
|
900
|
+
#else
|
901
|
+
unsigned int i;
|
902
|
+
const unsigned char *dataStart = data;
|
903
|
+
const UINT64 *curData0 = (const UINT64 *)data;
|
904
|
+
const UINT64 *curData1 = (const UINT64 *)(data+laneOffsetParallel*SnP_laneLengthInBytes);
|
905
|
+
V128 *statesAsLanes = (V128 *)states;
|
906
|
+
declareABCDE
|
907
|
+
|
908
|
+
copyFromState(A, statesAsLanes)
|
909
|
+
while(dataByteLen >= (laneOffsetParallel + laneCount)*8) {
|
910
|
+
#define XOR_In( Xxx, argIndex ) XOReq128( Xxx, LOAD6464(curData1[argIndex], curData0[argIndex]))
|
911
|
+
XOR_In( Aba, 0 );
|
912
|
+
XOR_In( Abe, 1 );
|
913
|
+
XOR_In( Abi, 2 );
|
914
|
+
XOR_In( Abo, 3 );
|
915
|
+
XOR_In( Abu, 4 );
|
916
|
+
XOR_In( Aga, 5 );
|
917
|
+
XOR_In( Age, 6 );
|
918
|
+
XOR_In( Agi, 7 );
|
919
|
+
XOR_In( Ago, 8 );
|
920
|
+
XOR_In( Agu, 9 );
|
921
|
+
XOR_In( Aka, 10 );
|
922
|
+
XOR_In( Ake, 11 );
|
923
|
+
XOR_In( Aki, 12 );
|
924
|
+
XOR_In( Ako, 13 );
|
925
|
+
XOR_In( Aku, 14 );
|
926
|
+
XOR_In( Ama, 15 );
|
927
|
+
XOR_In( Ame, 16 );
|
928
|
+
XOR_In( Ami, 17 );
|
929
|
+
XOR_In( Amo, 18 );
|
930
|
+
XOR_In( Amu, 19 );
|
931
|
+
XOR_In( Asa, 20 );
|
932
|
+
#undef XOR_In
|
933
|
+
rounds24
|
934
|
+
curData0 += laneOffsetSerial;
|
935
|
+
curData1 += laneOffsetSerial;
|
936
|
+
dataByteLen -= laneOffsetSerial*8;
|
937
|
+
}
|
938
|
+
copyToState(statesAsLanes, A)
|
939
|
+
return (const unsigned char *)curData0 - dataStart;
|
940
|
+
#endif
|
941
|
+
}
|
942
|
+
else {
|
943
|
+
unsigned int i;
|
944
|
+
const unsigned char *dataStart = data;
|
945
|
+
|
946
|
+
while(dataByteLen >= (laneOffsetParallel + laneCount)*8) {
|
947
|
+
KeccakP1600times2_AddLanesAll(states, data, laneCount, laneOffsetParallel);
|
948
|
+
KeccakP1600times2_PermuteAll_24rounds(states);
|
949
|
+
data += laneOffsetSerial*8;
|
950
|
+
dataByteLen -= laneOffsetSerial*8;
|
951
|
+
}
|
952
|
+
return data - dataStart;
|
953
|
+
}
|
954
|
+
}
|