digest-kangarootwelve 0.2.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +51 -11
- data/Rakefile +2 -2
- data/digest-kangarootwelve.gemspec +322 -42
- data/ext/digest/kangarootwelve/ext.c +1 -1
- data/ext/digest/kangarootwelve/extconf.rb +13 -1
- data/ext/digest/kangarootwelve/keccak/armv6m/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv6m/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv6m/KeccakP-1600-SnP.h +36 -0
- data/ext/digest/kangarootwelve/{KeccakP-1600-times2-SnP.h → keccak/armv6m/KeccakP-1600-times2-SnP.h} +10 -10
- data/ext/digest/kangarootwelve/{KeccakP-1600-times2-on1.c → keccak/armv6m/KeccakP-1600-times2-on1.c} +13 -7
- data/ext/digest/kangarootwelve/{KeccakP-1600-times4-SnP.h → keccak/armv6m/KeccakP-1600-times4-SnP.h} +10 -10
- data/ext/digest/kangarootwelve/{KeccakP-1600-times4-on1.c → keccak/armv6m/KeccakP-1600-times4-on1.c} +13 -7
- data/ext/digest/kangarootwelve/{KeccakP-1600-times8-SnP.h → keccak/armv6m/KeccakP-1600-times8-SnP.h} +10 -10
- data/ext/digest/kangarootwelve/{KeccakP-1600-times8-on1.c → keccak/armv6m/KeccakP-1600-times8-on1.c} +13 -7
- data/ext/digest/kangarootwelve/keccak/armv6m/KeccakP-1600-u2-32bi-armv6m-le-gcc.s +1334 -0
- data/ext/digest/kangarootwelve/keccak/armv6m/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/{PlSnP-Fallback.inc → keccak/armv6m/PlSnP-Fallback.inc} +11 -7
- data/ext/digest/kangarootwelve/keccak/armv6m/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-SnP.h +37 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-armv7a-le-neon-gcc.s +826 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-inplace-pl2-armv7a-neon-le-gcc.s +1245 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times2-SnP.h +38 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times4-on2.c +38 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times8-on2.c +38 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/armv7a/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-SnP.h +36 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-inplace-32bi-armv7m-le-gcc.s +1170 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/armv7m/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-SnP.h +28 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-armv8a-neon.s +537 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/armv8a/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-SnP.h +37 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-x86-64-gas.s +1190 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-SnP.h +37 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-x86-64-shld-gas.s +1190 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/asmx86-64shld/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-SnP.h +37 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-avr8-fast.s +1116 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/avr8/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/avr8/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/avr8/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-SnP.h +39 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-XOP-config.h +6 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-XOP.c +473 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times2-SIMD128.c +954 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times2-SnP.h +47 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times4-on2.c +38 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times8-on2.c +38 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-unrolling.macros +302 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/bulldozer/SIMD128-config.h +9 -0
- data/ext/digest/kangarootwelve/{SnP-Relaned.h → keccak/bulldozer/SnP-Relaned.h} +13 -7
- data/ext/digest/kangarootwelve/keccak/bulldozer/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/{KangarooTwelve.c → keccak/common/KangarooTwelve.c} +6 -10
- data/ext/digest/kangarootwelve/{KangarooTwelve.h → keccak/common/KangarooTwelve.h} +3 -7
- data/ext/digest/kangarootwelve/keccak/common/KeccakDuplex-common.h +37 -0
- data/ext/digest/kangarootwelve/keccak/common/KeccakDuplex.inc +192 -0
- data/ext/digest/kangarootwelve/keccak/common/KeccakDuplexWidth1600.c +34 -0
- data/ext/digest/kangarootwelve/keccak/common/KeccakDuplexWidth1600.h +25 -0
- data/ext/digest/kangarootwelve/{KeccakSponge-common.h → keccak/common/KeccakSponge-common.h} +5 -7
- data/ext/digest/kangarootwelve/{KeccakSponge.inc → keccak/common/KeccakSponge.inc} +6 -8
- data/ext/digest/kangarootwelve/{KeccakSpongeWidth1600.c → keccak/common/KeccakSpongeWidth1600.c} +6 -8
- data/ext/digest/kangarootwelve/{KeccakSpongeWidth1600.h → keccak/common/KeccakSpongeWidth1600.h} +5 -7
- data/ext/digest/kangarootwelve/{Phases.h → keccak/common/Phases.h} +3 -7
- data/ext/digest/kangarootwelve/{align.h → keccak/common/align.h} +5 -7
- data/ext/digest/kangarootwelve/{brg_endian.h → keccak/common/brg_endian.h} +0 -0
- data/ext/digest/kangarootwelve/keccak/compact/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/compact/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/{KeccakP-1600-SnP.h → keccak/compact/KeccakP-1600-SnP.h} +7 -10
- data/ext/digest/kangarootwelve/{KeccakP-1600-compact64.c → keccak/compact/KeccakP-1600-compact64.c} +11 -7
- data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/compact/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/compact/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/compact/SnP-Relaned.h +140 -0
- data/ext/digest/kangarootwelve/keccak/compact/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-SnP.h +38 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-inplace32BI.c +1162 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic32/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic32/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/generic32/SnP-Relaned.h +140 -0
- data/ext/digest/kangarootwelve/keccak/generic32/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-SnP.h +38 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-inplace32BI.c +1162 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/SnP-Relaned.h +140 -0
- data/ext/digest/kangarootwelve/keccak/generic32lc/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-64.macros +2195 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-SnP.h +49 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-opt64-config.h +6 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-opt64.c +541 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-unrolling.macros +302 -0
- data/ext/digest/kangarootwelve/keccak/generic64/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic64/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/generic64/SnP-Relaned.h +140 -0
- data/ext/digest/kangarootwelve/keccak/generic64/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-64.macros +2195 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-SnP.h +49 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-opt64-config.h +7 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-opt64.c +541 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-unrolling.macros +302 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/SnP-Relaned.h +140 -0
- data/ext/digest/kangarootwelve/keccak/generic64lc/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-AVX2.s +993 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-SnP.h +41 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times2-SIMD128.c +954 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times2-SnP.h +47 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times4-SIMD256.c +1303 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times4-SnP.h +53 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times8-on4.c +38 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-unrolling.macros +302 -0
- data/ext/digest/kangarootwelve/keccak/haswell/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/haswell/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/haswell/SIMD128-config.h +8 -0
- data/ext/digest/kangarootwelve/keccak/haswell/SIMD256-config.h +7 -0
- data/ext/digest/kangarootwelve/keccak/haswell/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-64.macros +2195 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-SnP.h +49 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-opt64-config.h +7 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-opt64.c +541 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times2-SIMD128.c +954 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times2-SnP.h +47 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times4-on2.c +38 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times8-on2.c +38 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-unrolling.macros +302 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/SIMD128-config.h +8 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/SnP-Relaned.h +140 -0
- data/ext/digest/kangarootwelve/keccak/nehalem/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/reference/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-SnP.h +41 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-reference.c +424 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-reference.h +20 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/reference/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/reference/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/reference/displayIntermediateValues.c +176 -0
- data/ext/digest/kangarootwelve/keccak/reference/displayIntermediateValues.h +29 -0
- data/ext/digest/kangarootwelve/keccak/reference/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-SnP.h +41 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-reference.h +20 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-reference32BI.c +612 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times2-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times2-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times4-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times8-on1.c +37 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/displayIntermediateValues.c +176 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/displayIntermediateValues.h +29 -0
- data/ext/digest/kangarootwelve/keccak/reference32bits/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-64.macros +2195 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-SnP.h +49 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-opt64-config.h +8 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-opt64.c +541 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times2-SIMD128.c +954 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times2-SnP.h +47 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times4-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times4-on2.c +38 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times8-SnP.h +45 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times8-on2.c +38 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-unrolling.macros +302 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/PlSnP-Fallback.inc +287 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/SIMD128-config.h +8 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/SnP-Relaned.h +140 -0
- data/ext/digest/kangarootwelve/keccak/sandybridge/ext.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KangarooTwelve.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakDuplexWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-AVX512-config.h +6 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-AVX512.c +621 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-SnP.h +42 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times2-SIMD512.c +852 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times2-SnP.h +49 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times4-SIMD512.c +883 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times4-SnP.h +49 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times8-SIMD512.c +1473 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times8-SnP.h +53 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/KeccakSpongeWidth1600.link.c +1 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/SIMD512-2-config.h +7 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/SIMD512-4-config.h +7 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/SIMD512-config.h +7 -0
- data/ext/digest/kangarootwelve/keccak/skylakex/ext.link.c +1 -0
- data/lib/digest/kangarootwelve/version.rb +1 -1
- metadata +299 -21
@@ -0,0 +1 @@
|
|
1
|
+
#include "../common/KeccakSpongeWidth1600.c"
|
@@ -0,0 +1,287 @@
|
|
1
|
+
/*
|
2
|
+
Implementation by Gilles Van Assche, hereby denoted as "the implementer".
|
3
|
+
|
4
|
+
For more information, feedback or questions, please refer to our website:
|
5
|
+
https://keccak.team/
|
6
|
+
|
7
|
+
To the extent possible under law, the implementer has waived all copyright
|
8
|
+
and related or neighboring rights to the source code in this file.
|
9
|
+
http://creativecommons.org/publicdomain/zero/1.0/
|
10
|
+
|
11
|
+
---
|
12
|
+
|
13
|
+
This file contains macros that help make a PlSnP-compatible implementation by
|
14
|
+
serially falling back on a SnP-compatible implementation or on a PlSnP-compatible
|
15
|
+
implementation of lower parallism degree.
|
16
|
+
|
17
|
+
Please refer to PlSnP-documentation.h for more details.
|
18
|
+
*/
|
19
|
+
|
20
|
+
/* expect PlSnP_baseParallelism, PlSnP_targetParallelism */
|
21
|
+
/* expect SnP_stateSizeInBytes, SnP_stateAlignment */
|
22
|
+
/* expect prefix */
|
23
|
+
/* expect SnP_* */
|
24
|
+
|
25
|
+
#define JOIN0(a, b) a ## b
|
26
|
+
#define JOIN(a, b) JOIN0(a, b)
|
27
|
+
|
28
|
+
#define PlSnP_StaticInitialize JOIN(prefix, _StaticInitialize)
|
29
|
+
#define PlSnP_InitializeAll JOIN(prefix, _InitializeAll)
|
30
|
+
#define PlSnP_AddByte JOIN(prefix, _AddByte)
|
31
|
+
#define PlSnP_AddBytes JOIN(prefix, _AddBytes)
|
32
|
+
#define PlSnP_AddLanesAll JOIN(prefix, _AddLanesAll)
|
33
|
+
#define PlSnP_OverwriteBytes JOIN(prefix, _OverwriteBytes)
|
34
|
+
#define PlSnP_OverwriteLanesAll JOIN(prefix, _OverwriteLanesAll)
|
35
|
+
#define PlSnP_OverwriteWithZeroes JOIN(prefix, _OverwriteWithZeroes)
|
36
|
+
#define PlSnP_ExtractBytes JOIN(prefix, _ExtractBytes)
|
37
|
+
#define PlSnP_ExtractLanesAll JOIN(prefix, _ExtractLanesAll)
|
38
|
+
#define PlSnP_ExtractAndAddBytes JOIN(prefix, _ExtractAndAddBytes)
|
39
|
+
#define PlSnP_ExtractAndAddLanesAll JOIN(prefix, _ExtractAndAddLanesAll)
|
40
|
+
|
41
|
+
#if (PlSnP_baseParallelism == 1)
|
42
|
+
#define SnP_stateSizeInBytes JOIN(SnP, _stateSizeInBytes)
|
43
|
+
#define SnP_stateAlignment JOIN(SnP, _stateAlignment)
|
44
|
+
#else
|
45
|
+
#define SnP_stateSizeInBytes JOIN(SnP, _statesSizeInBytes)
|
46
|
+
#define SnP_stateAlignment JOIN(SnP, _statesAlignment)
|
47
|
+
#endif
|
48
|
+
#define PlSnP_factor ((PlSnP_targetParallelism)/(PlSnP_baseParallelism))
|
49
|
+
#define SnP_stateOffset (((SnP_stateSizeInBytes+(SnP_stateAlignment-1))/SnP_stateAlignment)*SnP_stateAlignment)
|
50
|
+
#define stateWithIndex(i) ((unsigned char *)states+((i)*SnP_stateOffset))
|
51
|
+
|
52
|
+
#define SnP_StaticInitialize JOIN(SnP, _StaticInitialize)
|
53
|
+
#define SnP_Initialize JOIN(SnP, _Initialize)
|
54
|
+
#define SnP_InitializeAll JOIN(SnP, _InitializeAll)
|
55
|
+
#define SnP_AddByte JOIN(SnP, _AddByte)
|
56
|
+
#define SnP_AddBytes JOIN(SnP, _AddBytes)
|
57
|
+
#define SnP_AddLanesAll JOIN(SnP, _AddLanesAll)
|
58
|
+
#define SnP_OverwriteBytes JOIN(SnP, _OverwriteBytes)
|
59
|
+
#define SnP_OverwriteLanesAll JOIN(SnP, _OverwriteLanesAll)
|
60
|
+
#define SnP_OverwriteWithZeroes JOIN(SnP, _OverwriteWithZeroes)
|
61
|
+
#define SnP_ExtractBytes JOIN(SnP, _ExtractBytes)
|
62
|
+
#define SnP_ExtractLanesAll JOIN(SnP, _ExtractLanesAll)
|
63
|
+
#define SnP_ExtractAndAddBytes JOIN(SnP, _ExtractAndAddBytes)
|
64
|
+
#define SnP_ExtractAndAddLanesAll JOIN(SnP, _ExtractAndAddLanesAll)
|
65
|
+
|
66
|
+
void PlSnP_StaticInitialize( void )
|
67
|
+
{
|
68
|
+
SnP_StaticInitialize();
|
69
|
+
}
|
70
|
+
|
71
|
+
void PlSnP_InitializeAll(void *states)
|
72
|
+
{
|
73
|
+
unsigned int i;
|
74
|
+
|
75
|
+
for(i=0; i<PlSnP_factor; i++)
|
76
|
+
#if (PlSnP_baseParallelism == 1)
|
77
|
+
SnP_Initialize(stateWithIndex(i));
|
78
|
+
#else
|
79
|
+
SnP_InitializeAll(stateWithIndex(i));
|
80
|
+
#endif
|
81
|
+
}
|
82
|
+
|
83
|
+
void PlSnP_AddByte(void *states, unsigned int instanceIndex, unsigned char byte, unsigned int offset)
|
84
|
+
{
|
85
|
+
#if (PlSnP_baseParallelism == 1)
|
86
|
+
SnP_AddByte(stateWithIndex(instanceIndex), byte, offset);
|
87
|
+
#else
|
88
|
+
SnP_AddByte(stateWithIndex(instanceIndex/PlSnP_baseParallelism), instanceIndex%PlSnP_baseParallelism, byte, offset);
|
89
|
+
#endif
|
90
|
+
}
|
91
|
+
|
92
|
+
void PlSnP_AddBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length)
|
93
|
+
{
|
94
|
+
#if (PlSnP_baseParallelism == 1)
|
95
|
+
SnP_AddBytes(stateWithIndex(instanceIndex), data, offset, length);
|
96
|
+
#else
|
97
|
+
SnP_AddBytes(stateWithIndex(instanceIndex/PlSnP_baseParallelism), instanceIndex%PlSnP_baseParallelism, data, offset, length);
|
98
|
+
#endif
|
99
|
+
}
|
100
|
+
|
101
|
+
void PlSnP_AddLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
|
102
|
+
{
|
103
|
+
unsigned int i;
|
104
|
+
|
105
|
+
for(i=0; i<PlSnP_factor; i++) {
|
106
|
+
#if (PlSnP_baseParallelism == 1)
|
107
|
+
SnP_AddBytes(stateWithIndex(i), data, 0, laneCount*SnP_laneLengthInBytes);
|
108
|
+
#else
|
109
|
+
SnP_AddLanesAll(stateWithIndex(i), data, laneCount, laneOffset);
|
110
|
+
#endif
|
111
|
+
data += PlSnP_baseParallelism*laneOffset*SnP_laneLengthInBytes;
|
112
|
+
}
|
113
|
+
}
|
114
|
+
|
115
|
+
void PlSnP_OverwriteBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length)
|
116
|
+
{
|
117
|
+
#if (PlSnP_baseParallelism == 1)
|
118
|
+
SnP_OverwriteBytes(stateWithIndex(instanceIndex), data, offset, length);
|
119
|
+
#else
|
120
|
+
SnP_OverwriteBytes(stateWithIndex(instanceIndex/PlSnP_baseParallelism), instanceIndex%PlSnP_baseParallelism, data, offset, length);
|
121
|
+
#endif
|
122
|
+
}
|
123
|
+
|
124
|
+
void PlSnP_OverwriteLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
|
125
|
+
{
|
126
|
+
unsigned int i;
|
127
|
+
|
128
|
+
for(i=0; i<PlSnP_factor; i++) {
|
129
|
+
#if (PlSnP_baseParallelism == 1)
|
130
|
+
SnP_OverwriteBytes(stateWithIndex(i), data, 0, laneCount*SnP_laneLengthInBytes);
|
131
|
+
#else
|
132
|
+
SnP_OverwriteLanesAll(stateWithIndex(i), data, laneCount, laneOffset);
|
133
|
+
#endif
|
134
|
+
data += PlSnP_baseParallelism*laneOffset*SnP_laneLengthInBytes;
|
135
|
+
}
|
136
|
+
}
|
137
|
+
|
138
|
+
void PlSnP_OverwriteWithZeroes(void *states, unsigned int instanceIndex, unsigned int byteCount)
|
139
|
+
{
|
140
|
+
#if (PlSnP_baseParallelism == 1)
|
141
|
+
SnP_OverwriteWithZeroes(stateWithIndex(instanceIndex), byteCount);
|
142
|
+
#else
|
143
|
+
SnP_OverwriteWithZeroes(stateWithIndex(instanceIndex/PlSnP_baseParallelism), instanceIndex%PlSnP_baseParallelism, byteCount);
|
144
|
+
#endif
|
145
|
+
}
|
146
|
+
|
147
|
+
void PlSnP_PermuteAll(void *states)
|
148
|
+
{
|
149
|
+
unsigned int i;
|
150
|
+
|
151
|
+
for(i=0; i<PlSnP_factor; i++) {
|
152
|
+
#if (PlSnP_baseParallelism == 1)
|
153
|
+
SnP_Permute(stateWithIndex(i));
|
154
|
+
#else
|
155
|
+
SnP_PermuteAll(stateWithIndex(i));
|
156
|
+
#endif
|
157
|
+
}
|
158
|
+
}
|
159
|
+
|
160
|
+
#if (defined(SnP_Permute_12rounds) || defined(SnP_PermuteAll_12rounds))
|
161
|
+
void PlSnP_PermuteAll_12rounds(void *states)
|
162
|
+
{
|
163
|
+
unsigned int i;
|
164
|
+
|
165
|
+
for(i=0; i<PlSnP_factor; i++) {
|
166
|
+
#if (PlSnP_baseParallelism == 1)
|
167
|
+
SnP_Permute_12rounds(stateWithIndex(i));
|
168
|
+
#else
|
169
|
+
SnP_PermuteAll_12rounds(stateWithIndex(i));
|
170
|
+
#endif
|
171
|
+
}
|
172
|
+
}
|
173
|
+
#endif
|
174
|
+
|
175
|
+
#if (defined(SnP_Permute_Nrounds) || defined(SnP_PermuteAll_6rounds))
|
176
|
+
void PlSnP_PermuteAll_6rounds(void *states)
|
177
|
+
{
|
178
|
+
unsigned int i;
|
179
|
+
|
180
|
+
for(i=0; i<PlSnP_factor; i++) {
|
181
|
+
#if (PlSnP_baseParallelism == 1)
|
182
|
+
SnP_Permute_Nrounds(stateWithIndex(i), 6);
|
183
|
+
#else
|
184
|
+
SnP_PermuteAll_6rounds(stateWithIndex(i));
|
185
|
+
#endif
|
186
|
+
}
|
187
|
+
}
|
188
|
+
#endif
|
189
|
+
|
190
|
+
#if (defined(SnP_Permute_Nrounds) || defined(SnP_PermuteAll_4rounds))
|
191
|
+
void PlSnP_PermuteAll_4rounds(void *states)
|
192
|
+
{
|
193
|
+
unsigned int i;
|
194
|
+
|
195
|
+
for(i=0; i<PlSnP_factor; i++) {
|
196
|
+
#if (PlSnP_baseParallelism == 1)
|
197
|
+
SnP_Permute_Nrounds(stateWithIndex(i), 4);
|
198
|
+
#else
|
199
|
+
SnP_PermuteAll_4rounds(stateWithIndex(i));
|
200
|
+
#endif
|
201
|
+
}
|
202
|
+
}
|
203
|
+
#endif
|
204
|
+
|
205
|
+
void PlSnP_ExtractBytes(void *states, unsigned int instanceIndex, unsigned char *data, unsigned int offset, unsigned int length)
|
206
|
+
{
|
207
|
+
#if (PlSnP_baseParallelism == 1)
|
208
|
+
SnP_ExtractBytes(stateWithIndex(instanceIndex), data, offset, length);
|
209
|
+
#else
|
210
|
+
SnP_ExtractBytes(stateWithIndex(instanceIndex/PlSnP_baseParallelism), instanceIndex%PlSnP_baseParallelism, data, offset, length);
|
211
|
+
#endif
|
212
|
+
}
|
213
|
+
|
214
|
+
void PlSnP_ExtractLanesAll(const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
|
215
|
+
{
|
216
|
+
unsigned int i;
|
217
|
+
|
218
|
+
for(i=0; i<PlSnP_factor; i++) {
|
219
|
+
#if (PlSnP_baseParallelism == 1)
|
220
|
+
SnP_ExtractBytes(stateWithIndex(i), data, 0, laneCount*SnP_laneLengthInBytes);
|
221
|
+
#else
|
222
|
+
SnP_ExtractLanesAll(stateWithIndex(i), data, laneCount, laneOffset);
|
223
|
+
#endif
|
224
|
+
data += laneOffset*SnP_laneLengthInBytes*PlSnP_baseParallelism;
|
225
|
+
}
|
226
|
+
}
|
227
|
+
|
228
|
+
void PlSnP_ExtractAndAddBytes(void *states, unsigned int instanceIndex, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
|
229
|
+
{
|
230
|
+
#if (PlSnP_baseParallelism == 1)
|
231
|
+
SnP_ExtractAndAddBytes(stateWithIndex(instanceIndex), input, output, offset, length);
|
232
|
+
#else
|
233
|
+
SnP_ExtractAndAddBytes(stateWithIndex(instanceIndex/PlSnP_baseParallelism), instanceIndex%PlSnP_baseParallelism, input, output, offset, length);
|
234
|
+
#endif
|
235
|
+
}
|
236
|
+
|
237
|
+
void PlSnP_ExtractAndAddLanesAll(const void *states, const unsigned char *input, unsigned char *output, unsigned int laneCount, unsigned int laneOffset)
|
238
|
+
{
|
239
|
+
unsigned int i;
|
240
|
+
|
241
|
+
for(i=0; i<PlSnP_factor; i++) {
|
242
|
+
#if (PlSnP_baseParallelism == 1)
|
243
|
+
SnP_ExtractAndAddBytes(stateWithIndex(i), input, output, 0, laneCount*SnP_laneLengthInBytes);
|
244
|
+
#else
|
245
|
+
SnP_ExtractAndAddLanesAll(stateWithIndex(i), input, output, laneCount, laneOffset);
|
246
|
+
#endif
|
247
|
+
input += laneOffset*SnP_laneLengthInBytes*PlSnP_baseParallelism;
|
248
|
+
output += laneOffset*SnP_laneLengthInBytes*PlSnP_baseParallelism;
|
249
|
+
}
|
250
|
+
}
|
251
|
+
|
252
|
+
#undef PlSnP_factor
|
253
|
+
#undef SnP_stateOffset
|
254
|
+
#undef stateWithIndex
|
255
|
+
#undef JOIN0
|
256
|
+
#undef JOIN
|
257
|
+
#undef PlSnP_StaticInitialize
|
258
|
+
#undef PlSnP_InitializeAll
|
259
|
+
#undef PlSnP_AddByte
|
260
|
+
#undef PlSnP_AddBytes
|
261
|
+
#undef PlSnP_AddLanesAll
|
262
|
+
#undef PlSnP_OverwriteBytes
|
263
|
+
#undef PlSnP_OverwriteLanesAll
|
264
|
+
#undef PlSnP_OverwriteWithZeroes
|
265
|
+
#undef PlSnP_PermuteAll
|
266
|
+
#undef PlSnP_ExtractBytes
|
267
|
+
#undef PlSnP_ExtractLanesAll
|
268
|
+
#undef PlSnP_ExtractAndAddBytes
|
269
|
+
#undef PlSnP_ExtractAndAddLanesAll
|
270
|
+
#undef SnP_stateAlignment
|
271
|
+
#undef SnP_stateSizeInBytes
|
272
|
+
#undef PlSnP_factor
|
273
|
+
#undef SnP_stateOffset
|
274
|
+
#undef stateWithIndex
|
275
|
+
#undef SnP_StaticInitialize
|
276
|
+
#undef SnP_Initialize
|
277
|
+
#undef SnP_InitializeAll
|
278
|
+
#undef SnP_AddByte
|
279
|
+
#undef SnP_AddBytes
|
280
|
+
#undef SnP_AddLanesAll
|
281
|
+
#undef SnP_OverwriteBytes
|
282
|
+
#undef SnP_OverwriteWithZeroes
|
283
|
+
#undef SnP_OverwriteLanesAll
|
284
|
+
#undef SnP_ExtractBytes
|
285
|
+
#undef SnP_ExtractLanesAll
|
286
|
+
#undef SnP_ExtractAndAddBytes
|
287
|
+
#undef SnP_ExtractAndAddLanesAll
|
@@ -0,0 +1 @@
|
|
1
|
+
#include "../../ext.c"
|
@@ -0,0 +1 @@
|
|
1
|
+
#include "../common/KangarooTwelve.c"
|
@@ -0,0 +1 @@
|
|
1
|
+
#include "../common/KeccakDuplexWidth1600.c"
|
@@ -0,0 +1,37 @@
|
|
1
|
+
/*
|
2
|
+
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
|
3
|
+
|
4
|
+
For more information, feedback or questions, please refer to our website:
|
5
|
+
https://keccak.team/
|
6
|
+
|
7
|
+
To the extent possible under law, the implementer has waived all copyright
|
8
|
+
and related or neighboring rights to the source code in this file.
|
9
|
+
http://creativecommons.org/publicdomain/zero/1.0/
|
10
|
+
|
11
|
+
---
|
12
|
+
|
13
|
+
Please refer to SnP-documentation.h for more details.
|
14
|
+
*/
|
15
|
+
|
16
|
+
#ifndef _KeccakP_1600_SnP_h_
|
17
|
+
#define _KeccakP_1600_SnP_h_
|
18
|
+
|
19
|
+
#define KeccakP1600_implementation "8-bit optimized AVR assembler implementation"
|
20
|
+
#define KeccakP1600_stateSizeInBytes 200
|
21
|
+
#define KeccakP1600_stateAlignment 8
|
22
|
+
|
23
|
+
void KeccakP1600_StaticInitialize( void );
|
24
|
+
/* #define KeccakP1600_StaticInitialize() */
|
25
|
+
void KeccakP1600_Initialize(void *state);
|
26
|
+
void KeccakP1600_AddByte(void *state, unsigned char data, unsigned int offset);
|
27
|
+
/* #define KeccakP1600_AddByte(argS, argData, argOffset) ((unsigned char*)argS)[argOffset] ^= (argData) */
|
28
|
+
void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
|
29
|
+
void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
|
30
|
+
void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount);
|
31
|
+
void KeccakP1600_Permute_Nrounds(void *state, unsigned int nrounds);
|
32
|
+
void KeccakP1600_Permute_12rounds(void *state);
|
33
|
+
void KeccakP1600_Permute_24rounds(void *state);
|
34
|
+
void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length);
|
35
|
+
void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length);
|
36
|
+
|
37
|
+
#endif
|
@@ -0,0 +1,1116 @@
|
|
1
|
+
;
|
2
|
+
; Implementation by Ronny Van Keer, hereby denoted as "the implementer".
|
3
|
+
;
|
4
|
+
; For more information, feedback or questions, please refer to our website:
|
5
|
+
; https://keccak.team/
|
6
|
+
;
|
7
|
+
; To the extent possible under law, the implementer has waived all copyright
|
8
|
+
; and related or neighboring rights to the source code in this file.
|
9
|
+
; http://creativecommons.org/publicdomain/zero/1.0/
|
10
|
+
;
|
11
|
+
; ---
|
12
|
+
;
|
13
|
+
; This file implements Keccak-p[1600] in a SnP-compatible way.
|
14
|
+
; Please refer to SnP-documentation.h for more details.
|
15
|
+
;
|
16
|
+
; This implementation comes with KeccakP-1600-SnP.h in the same folder.
|
17
|
+
; Please refer to LowLevel.build for the exact list of other files it must be combined with.
|
18
|
+
;
|
19
|
+
|
20
|
+
; INFO: Tested on ATmega1280 simulator
|
21
|
+
|
22
|
+
; Registers used in all routines
|
23
|
+
#define zero 1
|
24
|
+
#define rpState 24
|
25
|
+
#define rX 26
|
26
|
+
#define rY 28
|
27
|
+
#define rZ 30
|
28
|
+
#define sp 0x3D
|
29
|
+
|
30
|
+
;----------------------------------------------------------------------------
|
31
|
+
;
|
32
|
+
; void KeccakP1600_StaticInitialize( void )
|
33
|
+
;
|
34
|
+
.global KeccakP1600_StaticInitialize
|
35
|
+
|
36
|
+
;----------------------------------------------------------------------------
|
37
|
+
;
|
38
|
+
; void KeccakP1600_Initialize(void *state)
|
39
|
+
;
|
40
|
+
; argument state is passed in r24:r25
|
41
|
+
;
|
42
|
+
.global KeccakP1600_Initialize
|
43
|
+
KeccakP1600_Initialize:
|
44
|
+
movw rZ, r24
|
45
|
+
ldi r23, 5*5 ; clear state (8 bytes/1 lane per iteration)
|
46
|
+
KeccakP1600_Initialize_Loop:
|
47
|
+
st z+, zero
|
48
|
+
st z+, zero
|
49
|
+
st z+, zero
|
50
|
+
st z+, zero
|
51
|
+
st z+, zero
|
52
|
+
st z+, zero
|
53
|
+
st z+, zero
|
54
|
+
st z+, zero
|
55
|
+
dec r23
|
56
|
+
brne KeccakP1600_Initialize_Loop
|
57
|
+
KeccakP1600_StaticInitialize:
|
58
|
+
ret
|
59
|
+
|
60
|
+
;----------------------------------------------------------------------------
|
61
|
+
;
|
62
|
+
; void KeccakP1600_AddByte(void *state, unsigned char data, unsigned int offset)
|
63
|
+
;
|
64
|
+
; argument state is passed in r24:r25
|
65
|
+
; argument data is passed in r22:r23, only LSB (r22) is used
|
66
|
+
; argument offset is passed in r20:r21, only LSB (r20) is used
|
67
|
+
;
|
68
|
+
.global KeccakP1600_AddByte
|
69
|
+
KeccakP1600_AddByte:
|
70
|
+
movw rZ, r24
|
71
|
+
add rZ, r20
|
72
|
+
adc rZ+1, zero
|
73
|
+
ld r0, Z
|
74
|
+
eor r0, r22
|
75
|
+
st Z, r0
|
76
|
+
ret
|
77
|
+
|
78
|
+
;----------------------------------------------------------------------------
|
79
|
+
;
|
80
|
+
; void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
|
81
|
+
;
|
82
|
+
; argument state is passed in r24:r25
|
83
|
+
; argument data is passed in r22:r23
|
84
|
+
; argument offset is passed in r20:r21, only LSB (r20) is used
|
85
|
+
; argument length is passed in r18:r19, only LSB (r18) is used
|
86
|
+
;
|
87
|
+
.global KeccakP1600_AddBytes
|
88
|
+
KeccakP1600_AddBytes:
|
89
|
+
movw rZ, r24
|
90
|
+
add rZ, r20
|
91
|
+
adc rZ+1, zero
|
92
|
+
movw rX, r22
|
93
|
+
subi r18, 8
|
94
|
+
brcs KeccakP1600_AddBytes_Byte
|
95
|
+
;do 8 bytes per iteration
|
96
|
+
KeccakP1600_AddBytes_Loop8:
|
97
|
+
ld r21, X+
|
98
|
+
ld r0, Z
|
99
|
+
eor r0, r21
|
100
|
+
st Z+, r0
|
101
|
+
ld r21, X+
|
102
|
+
ld r0, Z
|
103
|
+
eor r0, r21
|
104
|
+
st Z+, r0
|
105
|
+
ld r21, X+
|
106
|
+
ld r0, Z
|
107
|
+
eor r0, r21
|
108
|
+
st Z+, r0
|
109
|
+
ld r21, X+
|
110
|
+
ld r0, Z
|
111
|
+
eor r0, r21
|
112
|
+
st Z+, r0
|
113
|
+
ld r21, X+
|
114
|
+
ld r0, Z
|
115
|
+
eor r0, r21
|
116
|
+
st Z+, r0
|
117
|
+
ld r21, X+
|
118
|
+
ld r0, Z
|
119
|
+
eor r0, r21
|
120
|
+
st Z+, r0
|
121
|
+
ld r21, X+
|
122
|
+
ld r0, Z
|
123
|
+
eor r0, r21
|
124
|
+
st Z+, r0
|
125
|
+
ld r21, X+
|
126
|
+
ld r0, Z
|
127
|
+
eor r0, r21
|
128
|
+
st Z+, r0
|
129
|
+
subi r18, 8
|
130
|
+
brcc KeccakP1600_AddBytes_Loop8
|
131
|
+
KeccakP1600_AddBytes_Byte:
|
132
|
+
ldi r19, 8
|
133
|
+
add r18, r19
|
134
|
+
breq KeccakP1600_AddBytes_End
|
135
|
+
KeccakP1600_AddBytes_Loop1:
|
136
|
+
ld r21, X+
|
137
|
+
ld r0, Z
|
138
|
+
eor r0, r21
|
139
|
+
st Z+, r0
|
140
|
+
dec r18
|
141
|
+
brne KeccakP1600_AddBytes_Loop1
|
142
|
+
KeccakP1600_AddBytes_End:
|
143
|
+
ret
|
144
|
+
|
145
|
+
|
146
|
+
;----------------------------------------------------------------------------
|
147
|
+
;
|
148
|
+
; void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
|
149
|
+
;
|
150
|
+
; argument state is passed in r24:r25
|
151
|
+
; argument data is passed in r22:r23
|
152
|
+
; argument offset is passed in r20:r21, only LSB (r20) is used
|
153
|
+
; argument length is passed in r18:r19, only LSB (r18) is used
|
154
|
+
;
|
155
|
+
.global KeccakP1600_OverwriteBytes
|
156
|
+
KeccakP1600_OverwriteBytes:
|
157
|
+
movw rZ, r24
|
158
|
+
add rZ, r20
|
159
|
+
adc rZ+1, zero
|
160
|
+
movw rX, r22
|
161
|
+
subi r18, 8
|
162
|
+
brcs KeccakP1600_OverwriteBytes_Byte
|
163
|
+
;do 8 bytes per iteration
|
164
|
+
KeccakP1600_OverwriteBytes_Loop8:
|
165
|
+
ld r0, X+
|
166
|
+
st Z+, r0
|
167
|
+
ld r0, X+
|
168
|
+
st Z+, r0
|
169
|
+
ld r0, X+
|
170
|
+
st Z+, r0
|
171
|
+
ld r0, X+
|
172
|
+
st Z+, r0
|
173
|
+
ld r0, X+
|
174
|
+
st Z+, r0
|
175
|
+
ld r0, X+
|
176
|
+
st Z+, r0
|
177
|
+
ld r0, X+
|
178
|
+
st Z+, r0
|
179
|
+
ld r0, X+
|
180
|
+
st Z+, r0
|
181
|
+
subi r18, 8
|
182
|
+
brcc KeccakP1600_OverwriteBytes_Loop8
|
183
|
+
KeccakP1600_OverwriteBytes_Byte:
|
184
|
+
ldi r19, 8
|
185
|
+
add r18, r19
|
186
|
+
breq KeccakP1600_OverwriteBytes_End
|
187
|
+
KeccakP1600_OverwriteBytes_Loop1:
|
188
|
+
ld r0, X+
|
189
|
+
st Z+, r0
|
190
|
+
dec r18
|
191
|
+
brne KeccakP1600_OverwriteBytes_Loop1
|
192
|
+
KeccakP1600_OverwriteBytes_End:
|
193
|
+
ret
|
194
|
+
|
195
|
+
;----------------------------------------------------------------------------
|
196
|
+
;
|
197
|
+
; void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount)
|
198
|
+
;
|
199
|
+
; argument state is passed in r24:r25
|
200
|
+
; argument byteCount is passed in r22:r23, only LSB (r22) is used
|
201
|
+
;
|
202
|
+
.global KeccakP1600_OverwriteWithZeroes
|
203
|
+
KeccakP1600_OverwriteWithZeroes:
|
204
|
+
movw rZ, r24 ; rZ = state
|
205
|
+
mov r23, r22
|
206
|
+
lsr r23
|
207
|
+
lsr r23
|
208
|
+
lsr r23
|
209
|
+
breq KeccakP1600_OverwriteWithZeroes_Bytes
|
210
|
+
KeccakP1600_OverwriteWithZeroes_LoopLanes:
|
211
|
+
st Z+, r1
|
212
|
+
st Z+, r1
|
213
|
+
st Z+, r1
|
214
|
+
st Z+, r1
|
215
|
+
st Z+, r1
|
216
|
+
st Z+, r1
|
217
|
+
st Z+, r1
|
218
|
+
st Z+, r1
|
219
|
+
dec r23
|
220
|
+
brne KeccakP1600_OverwriteWithZeroes_LoopLanes
|
221
|
+
KeccakP1600_OverwriteWithZeroes_Bytes:
|
222
|
+
andi r22, 7
|
223
|
+
breq KeccakP1600_OverwriteWithZeroes_End
|
224
|
+
KeccakP1600_OverwriteWithZeroes_LoopBytes:
|
225
|
+
st Z+, r1
|
226
|
+
dec r22
|
227
|
+
brne KeccakP1600_OverwriteWithZeroes_LoopBytes
|
228
|
+
KeccakP1600_OverwriteWithZeroes_End:
|
229
|
+
ret
|
230
|
+
|
231
|
+
;----------------------------------------------------------------------------
|
232
|
+
;
|
233
|
+
; void KeccakP1600_ExtractBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
|
234
|
+
;
|
235
|
+
; argument state is passed in r24:r25
|
236
|
+
; argument data is passed in r22:r23
|
237
|
+
; argument offset is passed in r20:r21, only LSB (r20) is used
|
238
|
+
; argument length is passed in r18:r19, only LSB (r18) is used
|
239
|
+
;
|
240
|
+
.global KeccakP1600_ExtractBytes
|
241
|
+
KeccakP1600_ExtractBytes:
|
242
|
+
movw rZ, r24
|
243
|
+
add rZ, r20
|
244
|
+
adc rZ+1, zero
|
245
|
+
movw rX, r22
|
246
|
+
subi r18, 8
|
247
|
+
brcs KeccakP1600_ExtractBytes_Byte
|
248
|
+
;do 8 bytes per iteration
|
249
|
+
KeccakP1600_ExtractBytes_Loop8:
|
250
|
+
ld r0, Z+
|
251
|
+
st X+, r0
|
252
|
+
ld r0, Z+
|
253
|
+
st X+, r0
|
254
|
+
ld r0, Z+
|
255
|
+
st X+, r0
|
256
|
+
ld r0, Z+
|
257
|
+
st X+, r0
|
258
|
+
ld r0, Z+
|
259
|
+
st X+, r0
|
260
|
+
ld r0, Z+
|
261
|
+
st X+, r0
|
262
|
+
ld r0, Z+
|
263
|
+
st X+, r0
|
264
|
+
ld r0, Z+
|
265
|
+
st X+, r0
|
266
|
+
subi r18, 8
|
267
|
+
brcc KeccakP1600_ExtractBytes_Loop8
|
268
|
+
KeccakP1600_ExtractBytes_Byte:
|
269
|
+
ldi r19, 8
|
270
|
+
add r18, r19
|
271
|
+
breq KeccakP1600_ExtractBytes_End
|
272
|
+
KeccakP1600_ExtractBytes_Loop1:
|
273
|
+
ld r0, Z+
|
274
|
+
st X+, r0
|
275
|
+
dec r18
|
276
|
+
brne KeccakP1600_ExtractBytes_Loop1
|
277
|
+
KeccakP1600_ExtractBytes_End:
|
278
|
+
ret
|
279
|
+
|
280
|
+
;----------------------------------------------------------------------------
|
281
|
+
;
|
282
|
+
; void KeccakP1600_ExtractAndAddBytes(void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
|
283
|
+
;
|
284
|
+
; argument state is passed in r24:r25
|
285
|
+
; argument input is passed in r22:r23
|
286
|
+
; argument output is passed in r20:r21
|
287
|
+
; argument offset is passed in r18:r19, only LSB (r18) is used
|
288
|
+
; argument length is passed in r16:r17, only LSB (r16) is used
|
289
|
+
;
|
290
|
+
.global KeccakP1600_ExtractAndAddBytes
|
291
|
+
KeccakP1600_ExtractAndAddBytes:
|
292
|
+
tst r16
|
293
|
+
breq KeccakP1600_ExtractAndAddBytes_End
|
294
|
+
push r16
|
295
|
+
push r28
|
296
|
+
push r29
|
297
|
+
movw rZ, r24
|
298
|
+
add rZ, r18
|
299
|
+
adc rZ+1, zero
|
300
|
+
movw rX, r22
|
301
|
+
movw rY, r20
|
302
|
+
subi r16, 8
|
303
|
+
brcs KeccakP1600_ExtractAndAddBytes_Byte
|
304
|
+
KeccakP1600_ExtractAndAddBytes_LoopLane:
|
305
|
+
ld r21, Z+
|
306
|
+
ld r0, X+
|
307
|
+
eor r0, r21
|
308
|
+
st Y+, r0
|
309
|
+
ld r21, Z+
|
310
|
+
ld r0, X+
|
311
|
+
eor r0, r21
|
312
|
+
st Y+, r0
|
313
|
+
ld r21, Z+
|
314
|
+
ld r0, X+
|
315
|
+
eor r0, r21
|
316
|
+
st Y+, r0
|
317
|
+
ld r21, Z+
|
318
|
+
ld r0, X+
|
319
|
+
eor r0, r21
|
320
|
+
st Y+, r0
|
321
|
+
ld r21, Z+
|
322
|
+
ld r0, X+
|
323
|
+
eor r0, r21
|
324
|
+
st Y+, r0
|
325
|
+
ld r21, Z+
|
326
|
+
ld r0, X+
|
327
|
+
eor r0, r21
|
328
|
+
st Y+, r0
|
329
|
+
ld r21, Z+
|
330
|
+
ld r0, X+
|
331
|
+
eor r0, r21
|
332
|
+
st Y+, r0
|
333
|
+
ld r21, Z+
|
334
|
+
ld r0, X+
|
335
|
+
eor r0, r21
|
336
|
+
st Y+, r0
|
337
|
+
subi r16, 8
|
338
|
+
brcc KeccakP1600_ExtractAndAddBytes_LoopLane
|
339
|
+
KeccakP1600_ExtractAndAddBytes_Byte:
|
340
|
+
ldi r19, 8
|
341
|
+
add r16, r19
|
342
|
+
breq KeccakP1600_ExtractAndAddBytes_Done
|
343
|
+
KeccakP1600_ExtractAndAddBytes_Loop1:
|
344
|
+
ld r21, Z+
|
345
|
+
ld r0, X+
|
346
|
+
eor r0, r21
|
347
|
+
st Y+, r0
|
348
|
+
dec r16
|
349
|
+
brne KeccakP1600_ExtractAndAddBytes_Loop1
|
350
|
+
KeccakP1600_ExtractAndAddBytes_Done:
|
351
|
+
pop r29
|
352
|
+
pop r28
|
353
|
+
pop r16
|
354
|
+
KeccakP1600_ExtractAndAddBytes_End:
|
355
|
+
ret
|
356
|
+
|
357
|
+
|
358
|
+
#define ROT_BIT(a) ((a) & 7)
|
359
|
+
#define ROT_BYTE(a) ((((a)/8 + !!(((a)%8) > 4)) & 7) * 9)
|
360
|
+
|
361
|
+
KeccakP1600_RhoPiConstants:
|
362
|
+
.BYTE ROT_BIT( 1), ROT_BYTE( 3), 10 * 8
|
363
|
+
.BYTE ROT_BIT( 3), ROT_BYTE( 6), 7 * 8
|
364
|
+
.BYTE ROT_BIT( 6), ROT_BYTE(10), 11 * 8
|
365
|
+
.BYTE ROT_BIT(10), ROT_BYTE(15), 17 * 8
|
366
|
+
.BYTE ROT_BIT(15), ROT_BYTE(21), 18 * 8
|
367
|
+
.BYTE ROT_BIT(21), ROT_BYTE(28), 3 * 8
|
368
|
+
.BYTE ROT_BIT(28), ROT_BYTE(36), 5 * 8
|
369
|
+
.BYTE ROT_BIT(36), ROT_BYTE(45), 16 * 8
|
370
|
+
.BYTE ROT_BIT(45), ROT_BYTE(55), 8 * 8
|
371
|
+
.BYTE ROT_BIT(55), ROT_BYTE( 2), 21 * 8
|
372
|
+
.BYTE ROT_BIT( 2), ROT_BYTE(14), 24 * 8
|
373
|
+
.BYTE ROT_BIT(14), ROT_BYTE(27), 4 * 8
|
374
|
+
.BYTE ROT_BIT(27), ROT_BYTE(41), 15 * 8
|
375
|
+
.BYTE ROT_BIT(41), ROT_BYTE(56), 23 * 8
|
376
|
+
.BYTE ROT_BIT(56), ROT_BYTE( 8), 19 * 8
|
377
|
+
.BYTE ROT_BIT( 8), ROT_BYTE(25), 13 * 8
|
378
|
+
.BYTE ROT_BIT(25), ROT_BYTE(43), 12 * 8
|
379
|
+
.BYTE ROT_BIT(43), ROT_BYTE(62), 2 * 8
|
380
|
+
.BYTE ROT_BIT(62), ROT_BYTE(18), 20 * 8
|
381
|
+
.BYTE ROT_BIT(18), ROT_BYTE(39), 14 * 8
|
382
|
+
.BYTE ROT_BIT(39), ROT_BYTE(61), 22 * 8
|
383
|
+
.BYTE ROT_BIT(61), ROT_BYTE(20), 9 * 8
|
384
|
+
.BYTE ROT_BIT(20), ROT_BYTE(44), 6 * 8
|
385
|
+
.BYTE ROT_BIT(44), ROT_BYTE( 1), 1 * 8
|
386
|
+
|
387
|
+
KeccakP1600_RoundConstants_24:
|
388
|
+
.BYTE 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
|
389
|
+
.BYTE 0x82, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
|
390
|
+
.BYTE 0x8a, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80
|
391
|
+
.BYTE 0x00, 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80
|
392
|
+
.BYTE 0x8b, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
|
393
|
+
.BYTE 0x01, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00
|
394
|
+
.BYTE 0x81, 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80
|
395
|
+
.BYTE 0x09, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80
|
396
|
+
.BYTE 0x8a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
|
397
|
+
.BYTE 0x88, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
|
398
|
+
.BYTE 0x09, 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00
|
399
|
+
.BYTE 0x0a, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00
|
400
|
+
KeccakP1600_RoundConstants_12:
|
401
|
+
.BYTE 0x8b, 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00
|
402
|
+
.BYTE 0x8b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80
|
403
|
+
.BYTE 0x89, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80
|
404
|
+
.BYTE 0x03, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80
|
405
|
+
.BYTE 0x02, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80
|
406
|
+
.BYTE 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80
|
407
|
+
.BYTE 0x0a, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
|
408
|
+
.BYTE 0x0a, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80
|
409
|
+
.BYTE 0x81, 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80
|
410
|
+
.BYTE 0x80, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80
|
411
|
+
.BYTE 0x01, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00
|
412
|
+
.BYTE 0x08, 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80
|
413
|
+
KeccakP1600_RoundConstants_0:
|
414
|
+
.BYTE 0xFF, 0 ; terminator
|
415
|
+
|
416
|
+
.text
|
417
|
+
|
418
|
+
#define pRound 22 // 2 regs (22-23)
|
419
|
+
|
420
|
+
;----------------------------------------------------------------------------
|
421
|
+
;
|
422
|
+
; void KeccakP1600_Permute_Nrounds( void *state, unsigned int nrounds )
|
423
|
+
;
|
424
|
+
; argument state is passed in r24:r25
|
425
|
+
; argument nrounds is passed in r22:r23 (only LSB (r22) is used)
|
426
|
+
;
|
427
|
+
.global KeccakP1600_Permute_Nrounds
|
428
|
+
KeccakP1600_Permute_Nrounds:
|
429
|
+
mov r26, r22
|
430
|
+
ldi pRound, lo8(KeccakP1600_RoundConstants_0)
|
431
|
+
ldi pRound+1, hi8(KeccakP1600_RoundConstants_0)
|
432
|
+
lsl r26
|
433
|
+
lsl r26
|
434
|
+
lsl r26
|
435
|
+
sub pRound, r26
|
436
|
+
sbc pRound+1, zero
|
437
|
+
rjmp KeccakP1600_Permute
|
438
|
+
|
439
|
+
;----------------------------------------------------------------------------
|
440
|
+
;
|
441
|
+
; void KeccakP1600_Permute_24rounds( void *state )
|
442
|
+
;
|
443
|
+
.global KeccakP1600_Permute_24rounds
|
444
|
+
KeccakP1600_Permute_24rounds:
|
445
|
+
ldi pRound, lo8(KeccakP1600_RoundConstants_24)
|
446
|
+
ldi pRound+1, hi8(KeccakP1600_RoundConstants_24)
|
447
|
+
rjmp KeccakP1600_Permute
|
448
|
+
|
449
|
+
;----------------------------------------------------------------------------
|
450
|
+
;
|
451
|
+
; void KeccakP1600_Permute_12rounds( void *state )
|
452
|
+
;
|
453
|
+
.global KeccakP1600_Permute_12rounds
|
454
|
+
KeccakP1600_Permute_12rounds:
|
455
|
+
ldi pRound, lo8(KeccakP1600_RoundConstants_12)
|
456
|
+
ldi pRound+1, hi8(KeccakP1600_RoundConstants_12)
|
457
|
+
KeccakP1600_Permute:
|
458
|
+
push r2
|
459
|
+
push r3
|
460
|
+
push r4
|
461
|
+
push r5
|
462
|
+
push r6
|
463
|
+
push r7
|
464
|
+
push r8
|
465
|
+
push r9
|
466
|
+
push r10
|
467
|
+
push r11
|
468
|
+
push r12
|
469
|
+
push r13
|
470
|
+
push r14
|
471
|
+
push r15
|
472
|
+
push r16
|
473
|
+
push r17
|
474
|
+
push r28
|
475
|
+
push r29
|
476
|
+
|
477
|
+
; Allocate C variables (5*8)
|
478
|
+
in rZ, sp
|
479
|
+
in rZ+1, sp+1
|
480
|
+
sbiw rZ, 40
|
481
|
+
in r0, 0x3F
|
482
|
+
cli
|
483
|
+
out sp+1, rZ+1
|
484
|
+
out sp, rZ ; Z points to 5 C lanes
|
485
|
+
out 0x3F, r0
|
486
|
+
|
487
|
+
; Variables used in multiple operations
|
488
|
+
#define rTemp 2 // 8 regs (2-9)
|
489
|
+
#define rTempBis 10 // 8 regs (10-17)
|
490
|
+
#define rTempTer 18 // 4 regs (18-21)
|
491
|
+
|
492
|
+
; Initial Prepare Theta
|
493
|
+
#define TCIPx rTempTer
|
494
|
+
|
495
|
+
ldi TCIPx, 5
|
496
|
+
movw rY, rpState
|
497
|
+
KeccakInitialPrepTheta_Loop:
|
498
|
+
ld rTemp+0, Y+ ; state[x]
|
499
|
+
ld rTemp+1, Y+
|
500
|
+
ld rTemp+2, Y+
|
501
|
+
ld rTemp+3, Y+
|
502
|
+
ld rTemp+4, Y+
|
503
|
+
ld rTemp+5, Y+
|
504
|
+
ld rTemp+6, Y+
|
505
|
+
ld rTemp+7, Y+
|
506
|
+
|
507
|
+
adiw rY, 32
|
508
|
+
ld r0, Y+ ; state[5+x]
|
509
|
+
eor rTemp+0, r0
|
510
|
+
ld r0, Y+
|
511
|
+
eor rTemp+1, r0
|
512
|
+
ld r0, Y+
|
513
|
+
eor rTemp+2, r0
|
514
|
+
ld r0, Y+
|
515
|
+
eor rTemp+3, r0
|
516
|
+
ld r0, Y+
|
517
|
+
eor rTemp+4, r0
|
518
|
+
ld r0, Y+
|
519
|
+
eor rTemp+5, r0
|
520
|
+
ld r0, Y+
|
521
|
+
eor rTemp+6, r0
|
522
|
+
ld r0, Y+
|
523
|
+
eor rTemp+7, r0
|
524
|
+
|
525
|
+
adiw rY, 32
|
526
|
+
ld r0, Y+ ; state[10+x]
|
527
|
+
eor rTemp+0, r0
|
528
|
+
ld r0, Y+
|
529
|
+
eor rTemp+1, r0
|
530
|
+
ld r0, Y+
|
531
|
+
eor rTemp+2, r0
|
532
|
+
ld r0, Y+
|
533
|
+
eor rTemp+3, r0
|
534
|
+
ld r0, Y+
|
535
|
+
eor rTemp+4, r0
|
536
|
+
ld r0, Y+
|
537
|
+
eor rTemp+5, r0
|
538
|
+
ld r0, Y+
|
539
|
+
eor rTemp+6, r0
|
540
|
+
ld r0, Y+
|
541
|
+
eor rTemp+7, r0
|
542
|
+
|
543
|
+
adiw rY, 32
|
544
|
+
ld r0, Y+ ; state[15+x]
|
545
|
+
eor rTemp+0, r0
|
546
|
+
ld r0, Y+
|
547
|
+
eor rTemp+1, r0
|
548
|
+
ld r0, Y+
|
549
|
+
eor rTemp+2, r0
|
550
|
+
ld r0, Y+
|
551
|
+
eor rTemp+3, r0
|
552
|
+
ld r0, Y+
|
553
|
+
eor rTemp+4, r0
|
554
|
+
ld r0, Y+
|
555
|
+
eor rTemp+5, r0
|
556
|
+
ld r0, Y+
|
557
|
+
eor rTemp+6, r0
|
558
|
+
ld r0, Y+
|
559
|
+
eor rTemp+7, r0
|
560
|
+
|
561
|
+
adiw rY, 32
|
562
|
+
ld r0, Y+ ; state[20+x]
|
563
|
+
eor rTemp+0, r0
|
564
|
+
ld r0, Y+
|
565
|
+
eor rTemp+1, r0
|
566
|
+
ld r0, Y+
|
567
|
+
eor rTemp+2, r0
|
568
|
+
ld r0, Y+
|
569
|
+
eor rTemp+3, r0
|
570
|
+
ld r0, Y+
|
571
|
+
eor rTemp+4, r0
|
572
|
+
ld r0, Y+
|
573
|
+
eor rTemp+5, r0
|
574
|
+
ld r0, Y+
|
575
|
+
eor rTemp+6, r0
|
576
|
+
ld r0, Y+
|
577
|
+
eor rTemp+7, r0
|
578
|
+
|
579
|
+
st Z+, rTemp+0
|
580
|
+
st Z+, rTemp+1
|
581
|
+
st Z+, rTemp+2
|
582
|
+
st Z+, rTemp+3
|
583
|
+
st Z+, rTemp+4
|
584
|
+
st Z+, rTemp+5
|
585
|
+
st Z+, rTemp+6
|
586
|
+
st Z+, rTemp+7
|
587
|
+
|
588
|
+
subi rY, 160
|
589
|
+
sbc rY+1, zero
|
590
|
+
|
591
|
+
subi TCIPx, 1
|
592
|
+
breq KeccakInitialPrepTheta_Done
|
593
|
+
rjmp KeccakInitialPrepTheta_Loop
|
594
|
+
KeccakInitialPrepTheta_Done:
|
595
|
+
#undef TCIPx
|
596
|
+
|
597
|
+
Keccak_RoundLoop:
|
598
|
+
|
599
|
+
; Theta
|
600
|
+
#define TCplus rX
|
601
|
+
#define TCminus rZ
|
602
|
+
#define TCcoordX rTempTer
|
603
|
+
#define TCcoordY rTempTer+1
|
604
|
+
|
605
|
+
in TCminus, sp
|
606
|
+
in TCminus+1, sp+1
|
607
|
+
movw TCplus, TCminus
|
608
|
+
adiw TCminus, 4*8
|
609
|
+
adiw TCplus, 1*8
|
610
|
+
movw rY, rpState
|
611
|
+
|
612
|
+
ldi TCcoordX, 0x16
|
613
|
+
KeccakTheta_Loop1:
|
614
|
+
ld rTemp+0, X+
|
615
|
+
ld rTemp+1, X+
|
616
|
+
ld rTemp+2, X+
|
617
|
+
ld rTemp+3, X+
|
618
|
+
ld rTemp+4, X+
|
619
|
+
ld rTemp+5, X+
|
620
|
+
ld rTemp+6, X+
|
621
|
+
ld rTemp+7, X+
|
622
|
+
|
623
|
+
lsl rTemp+0
|
624
|
+
rol rTemp+1
|
625
|
+
rol rTemp+2
|
626
|
+
rol rTemp+3
|
627
|
+
rol rTemp+4
|
628
|
+
rol rTemp+5
|
629
|
+
rol rTemp+6
|
630
|
+
rol rTemp+7
|
631
|
+
adc rTemp+0, zero
|
632
|
+
|
633
|
+
ld r0, Z+
|
634
|
+
eor rTemp+0, r0
|
635
|
+
ld r0, Z+
|
636
|
+
eor rTemp+1, r0
|
637
|
+
ld r0, Z+
|
638
|
+
eor rTemp+2, r0
|
639
|
+
ld r0, Z+
|
640
|
+
eor rTemp+3, r0
|
641
|
+
ld r0, Z+
|
642
|
+
eor rTemp+4, r0
|
643
|
+
ld r0, Z+
|
644
|
+
eor rTemp+5, r0
|
645
|
+
ld r0, Z+
|
646
|
+
eor rTemp+6, r0
|
647
|
+
ld r0, Z+
|
648
|
+
eor rTemp+7, r0
|
649
|
+
|
650
|
+
ldi TCcoordY, 5
|
651
|
+
KeccakTheta_Loop2:
|
652
|
+
ld r0, Y
|
653
|
+
eor r0, rTemp+0
|
654
|
+
st Y+, r0
|
655
|
+
ld r0, Y
|
656
|
+
eor r0, rTemp+1
|
657
|
+
st Y+, r0
|
658
|
+
ld r0, Y
|
659
|
+
eor r0, rTemp+2
|
660
|
+
st Y+, r0
|
661
|
+
ld r0, Y
|
662
|
+
eor r0, rTemp+3
|
663
|
+
st Y+, r0
|
664
|
+
ld r0, Y
|
665
|
+
eor r0, rTemp+4
|
666
|
+
st Y+, r0
|
667
|
+
ld r0, Y
|
668
|
+
eor r0, rTemp+5
|
669
|
+
st Y+, r0
|
670
|
+
ld r0, Y
|
671
|
+
eor r0, rTemp+6
|
672
|
+
st Y+, r0
|
673
|
+
ld r0, Y
|
674
|
+
eor r0, rTemp+7
|
675
|
+
st Y+, r0
|
676
|
+
adiw rY, 32
|
677
|
+
|
678
|
+
dec TCcoordY
|
679
|
+
brne KeccakTheta_Loop2
|
680
|
+
|
681
|
+
subi rY, 200-8
|
682
|
+
sbc rY+1, zero
|
683
|
+
|
684
|
+
lsr TCcoordX
|
685
|
+
brcc 1f
|
686
|
+
breq KeccakTheta_End
|
687
|
+
rjmp KeccakTheta_Loop1
|
688
|
+
1:
|
689
|
+
cpi TCcoordX, 0x0B
|
690
|
+
brne 2f
|
691
|
+
sbiw TCminus, 40
|
692
|
+
rjmp KeccakTheta_Loop1
|
693
|
+
2:
|
694
|
+
sbiw TCplus, 40
|
695
|
+
rjmp KeccakTheta_Loop1
|
696
|
+
|
697
|
+
KeccakTheta_End:
|
698
|
+
#undef TCplus
|
699
|
+
#undef TCminus
|
700
|
+
#undef TCcoordX
|
701
|
+
#undef TCcoordY
|
702
|
+
|
703
|
+
; Rho Pi
|
704
|
+
#define RPpConst rTempTer // 2 regs
|
705
|
+
#define RPindex rTempTer+2
|
706
|
+
#define RPpBitRot rX
|
707
|
+
#define RPpByteRot pRound
|
708
|
+
|
709
|
+
sbiw rY, 32
|
710
|
+
|
711
|
+
ld rTemp+0, Y+
|
712
|
+
ld rTemp+1, Y+
|
713
|
+
ld rTemp+2, Y+
|
714
|
+
ld rTemp+3, Y+
|
715
|
+
ld rTemp+4, Y+
|
716
|
+
ld rTemp+5, Y+
|
717
|
+
ld rTemp+6, Y+
|
718
|
+
ld rTemp+7, Y+
|
719
|
+
|
720
|
+
push pRound
|
721
|
+
push pRound+1
|
722
|
+
ldi RPpConst, lo8(KeccakP1600_RhoPiConstants)
|
723
|
+
ldi RPpConst+1, hi8(KeccakP1600_RhoPiConstants)
|
724
|
+
ldi RPpBitRot, pm_lo8(bit_rot_jmp_table)
|
725
|
+
ldi RPpBitRot+1, pm_hi8(bit_rot_jmp_table)
|
726
|
+
ldi RPpByteRot, pm_lo8(rotate64_0byte_left)
|
727
|
+
ldi RPpByteRot+1, pm_hi8(rotate64_0byte_left)
|
728
|
+
|
729
|
+
KeccakRhoPi_Loop:
|
730
|
+
; get rotation codes and state index
|
731
|
+
movw rZ, RPpConst
|
732
|
+
lpm r0, Z+ ; bits
|
733
|
+
lpm rTempBis, Z+ ; bytes
|
734
|
+
lpm RPindex, Z+
|
735
|
+
movw RPpConst, rZ
|
736
|
+
|
737
|
+
; do bit rotation
|
738
|
+
movw rZ, RPpBitRot
|
739
|
+
add rZ, r0
|
740
|
+
adc rZ+1, zero
|
741
|
+
ijmp
|
742
|
+
|
743
|
+
KeccakRhoPi_RhoBitRotateDone:
|
744
|
+
movw rY, rpState
|
745
|
+
add rY, RPindex
|
746
|
+
adc rY+1, zero
|
747
|
+
|
748
|
+
movw rZ, RPpByteRot
|
749
|
+
add rZ, rTempBis
|
750
|
+
adc rZ+1, zero
|
751
|
+
ijmp
|
752
|
+
|
753
|
+
KeccakRhoPi_PiStore:
|
754
|
+
sbiw rY, 8
|
755
|
+
st Y+, rTemp+0
|
756
|
+
st Y+, rTemp+1
|
757
|
+
st Y+, rTemp+2
|
758
|
+
st Y+, rTemp+3
|
759
|
+
st Y+, rTemp+4
|
760
|
+
st Y+, rTemp+5
|
761
|
+
st Y+, rTemp+6
|
762
|
+
st Y+, rTemp+7
|
763
|
+
|
764
|
+
movw rTemp+0, rTempBis+0
|
765
|
+
movw rTemp+2, rTempBis+2
|
766
|
+
movw rTemp+4, rTempBis+4
|
767
|
+
movw rTemp+6, rTempBis+6
|
768
|
+
KeccakRhoPi_RhoDone:
|
769
|
+
subi RPindex, 8
|
770
|
+
brne KeccakRhoPi_Loop
|
771
|
+
pop pRound+1
|
772
|
+
pop pRound
|
773
|
+
|
774
|
+
#undef RPpConst
|
775
|
+
#undef RPindex
|
776
|
+
#undef RPpBitrot
|
777
|
+
#undef RPpByteRot
|
778
|
+
|
779
|
+
|
780
|
+
; Chi Iota prepare Theta
|
781
|
+
#define CIPTa0 rTemp
|
782
|
+
#define CIPTa1 rTemp+1
|
783
|
+
#define CIPTa2 rTemp+2
|
784
|
+
#define CIPTa3 rTemp+3
|
785
|
+
#define CIPTa4 rTemp+4
|
786
|
+
#define CIPTc0 rTempBis
|
787
|
+
#define CIPTc1 rTempBis+1
|
788
|
+
#define CIPTc2 rTempBis+2
|
789
|
+
#define CIPTc3 rTempBis+3
|
790
|
+
#define CIPTc4 rTempBis+4
|
791
|
+
#define CIPTz rTempBis+6
|
792
|
+
#define CIPTy rTempBis+7
|
793
|
+
|
794
|
+
in rX, sp ; 5 * C
|
795
|
+
in rX+1, sp+1
|
796
|
+
movw rY, rpState
|
797
|
+
movw rZ, pRound
|
798
|
+
|
799
|
+
ldi CIPTz, 8
|
800
|
+
KeccakChiIotaPrepareTheta_zLoop:
|
801
|
+
mov CIPTc0, zero
|
802
|
+
mov CIPTc1, zero
|
803
|
+
movw CIPTc2, CIPTc0
|
804
|
+
mov CIPTc4, zero
|
805
|
+
|
806
|
+
ldi CIPTy, 5
|
807
|
+
KeccakChiIotaPrepareTheta_yLoop:
|
808
|
+
ld CIPTa0, Y
|
809
|
+
ldd CIPTa1, Y+8
|
810
|
+
ldd CIPTa2, Y+16
|
811
|
+
ldd CIPTa3, Y+24
|
812
|
+
ldd CIPTa4, Y+32
|
813
|
+
|
814
|
+
;*p = t = a0 ^ ((~a1) & a2); c0 ^= t;
|
815
|
+
mov r0, CIPTa1
|
816
|
+
com r0
|
817
|
+
and r0, CIPTa2
|
818
|
+
eor r0, CIPTa0
|
819
|
+
eor CIPTc0, r0
|
820
|
+
st Y, r0
|
821
|
+
|
822
|
+
;*(p+8) = t = a1 ^ ((~a2) & a3); c1 ^= t;
|
823
|
+
mov r0, CIPTa2
|
824
|
+
com r0
|
825
|
+
and r0, CIPTa3
|
826
|
+
eor r0, CIPTa1
|
827
|
+
eor CIPTc1, r0
|
828
|
+
std Y+8, r0
|
829
|
+
|
830
|
+
;*(p+16) = a2 ^= ((~a3) & a4); c2 ^= a2;
|
831
|
+
mov r0, CIPTa3
|
832
|
+
com r0
|
833
|
+
and r0, CIPTa4
|
834
|
+
eor r0, CIPTa2
|
835
|
+
eor CIPTc2, r0
|
836
|
+
std Y+16, r0
|
837
|
+
|
838
|
+
;*(p+24) = a3 ^= ((~a4) & a0); c3 ^= a3;
|
839
|
+
mov r0, CIPTa4
|
840
|
+
com r0
|
841
|
+
and r0, CIPTa0
|
842
|
+
eor r0, CIPTa3
|
843
|
+
eor CIPTc3, r0
|
844
|
+
std Y+24, r0
|
845
|
+
|
846
|
+
;*(p+32) = a4 ^= ((~a0) & a1); c4 ^= a4;
|
847
|
+
com CIPTa0
|
848
|
+
and CIPTa0, CIPTa1
|
849
|
+
eor CIPTa0, CIPTa4
|
850
|
+
eor CIPTc4, CIPTa0
|
851
|
+
std Y+32, CIPTa0
|
852
|
+
|
853
|
+
adiw rY, 40
|
854
|
+
dec CIPTy
|
855
|
+
brne KeccakChiIotaPrepareTheta_yLoop
|
856
|
+
|
857
|
+
subi rY, 200
|
858
|
+
sbc rY+1, zero
|
859
|
+
|
860
|
+
lpm r0, Z+ ;Round Constant
|
861
|
+
ld CIPTa0, Y
|
862
|
+
eor CIPTa0, r0
|
863
|
+
st Y+, CIPTa0
|
864
|
+
|
865
|
+
movw pRound, rZ
|
866
|
+
movw rZ, rX
|
867
|
+
eor CIPTc0, r0
|
868
|
+
st Z+, CIPTc0
|
869
|
+
std Z+7, CIPTc1
|
870
|
+
std Z+15, CIPTc2
|
871
|
+
std Z+23, CIPTc3
|
872
|
+
std Z+31, CIPTc4
|
873
|
+
movw rX, rZ
|
874
|
+
movw rZ, pRound
|
875
|
+
|
876
|
+
dec CIPTz
|
877
|
+
brne KeccakChiIotaPrepareTheta_zLoop
|
878
|
+
|
879
|
+
#undef CIPTa0
|
880
|
+
#undef CIPTa1
|
881
|
+
#undef CIPTa2
|
882
|
+
#undef CIPTa3
|
883
|
+
#undef CIPTa4
|
884
|
+
#undef CIPTc0
|
885
|
+
#undef CIPTc1
|
886
|
+
#undef CIPTc2
|
887
|
+
#undef CIPTc3
|
888
|
+
#undef CIPTc4
|
889
|
+
#undef CIPTz
|
890
|
+
#undef CIPTy
|
891
|
+
|
892
|
+
;Check for terminator
|
893
|
+
lpm r0, Z
|
894
|
+
inc r0
|
895
|
+
breq Keccak_Done
|
896
|
+
rjmp Keccak_RoundLoop
|
897
|
+
Keccak_Done:
|
898
|
+
|
899
|
+
; Free C(on stack) and registers
|
900
|
+
in rX, sp ; free 5 C lanes
|
901
|
+
in rX+1, sp+1
|
902
|
+
adiw rX, 40
|
903
|
+
in r0, 0x3F
|
904
|
+
cli
|
905
|
+
out sp+1, rX+1
|
906
|
+
out sp, rX
|
907
|
+
out 0x3F, r0
|
908
|
+
|
909
|
+
pop r29
|
910
|
+
pop r28
|
911
|
+
pop r17
|
912
|
+
pop r16
|
913
|
+
pop r15
|
914
|
+
pop r14
|
915
|
+
pop r13
|
916
|
+
pop r12
|
917
|
+
pop r11
|
918
|
+
pop r10
|
919
|
+
pop r9
|
920
|
+
pop r8
|
921
|
+
pop r7
|
922
|
+
pop r6
|
923
|
+
pop r5
|
924
|
+
pop r4
|
925
|
+
pop r3
|
926
|
+
pop r2
|
927
|
+
ret
|
928
|
+
|
929
|
+
bit_rot_jmp_table:
|
930
|
+
rjmp KeccakRhoPi_RhoBitRotateDone
|
931
|
+
rjmp rotate64_1bit_left
|
932
|
+
rjmp rotate64_2bit_left
|
933
|
+
rjmp rotate64_3bit_left
|
934
|
+
rjmp rotate64_4bit_left
|
935
|
+
rjmp rotate64_3bit_right
|
936
|
+
rjmp rotate64_2bit_right
|
937
|
+
rjmp rotate64_1bit_right
|
938
|
+
|
939
|
+
rotate64_4bit_left:
|
940
|
+
lsl rTemp
|
941
|
+
rol rTemp+1
|
942
|
+
rol rTemp+2
|
943
|
+
rol rTemp+3
|
944
|
+
rol rTemp+4
|
945
|
+
rol rTemp+5
|
946
|
+
rol rTemp+6
|
947
|
+
rol rTemp+7
|
948
|
+
adc rTemp, r1
|
949
|
+
rotate64_3bit_left:
|
950
|
+
lsl rTemp
|
951
|
+
rol rTemp+1
|
952
|
+
rol rTemp+2
|
953
|
+
rol rTemp+3
|
954
|
+
rol rTemp+4
|
955
|
+
rol rTemp+5
|
956
|
+
rol rTemp+6
|
957
|
+
rol rTemp+7
|
958
|
+
adc rTemp, r1
|
959
|
+
rotate64_2bit_left:
|
960
|
+
lsl rTemp
|
961
|
+
rol rTemp+1
|
962
|
+
rol rTemp+2
|
963
|
+
rol rTemp+3
|
964
|
+
rol rTemp+4
|
965
|
+
rol rTemp+5
|
966
|
+
rol rTemp+6
|
967
|
+
rol rTemp+7
|
968
|
+
adc rTemp, r1
|
969
|
+
rotate64_1bit_left:
|
970
|
+
lsl rTemp
|
971
|
+
rol rTemp+1
|
972
|
+
rol rTemp+2
|
973
|
+
rol rTemp+3
|
974
|
+
rol rTemp+4
|
975
|
+
rol rTemp+5
|
976
|
+
rol rTemp+6
|
977
|
+
rol rTemp+7
|
978
|
+
adc rTemp, r1
|
979
|
+
rjmp KeccakRhoPi_RhoBitRotateDone
|
980
|
+
|
981
|
+
rotate64_3bit_right:
|
982
|
+
bst rTemp, 0
|
983
|
+
ror rTemp+7
|
984
|
+
ror rTemp+6
|
985
|
+
ror rTemp+5
|
986
|
+
ror rTemp+4
|
987
|
+
ror rTemp+3
|
988
|
+
ror rTemp+2
|
989
|
+
ror rTemp+1
|
990
|
+
ror rTemp
|
991
|
+
bld rTemp+7, 7
|
992
|
+
rotate64_2bit_right:
|
993
|
+
bst rTemp, 0
|
994
|
+
ror rTemp+7
|
995
|
+
ror rTemp+6
|
996
|
+
ror rTemp+5
|
997
|
+
ror rTemp+4
|
998
|
+
ror rTemp+3
|
999
|
+
ror rTemp+2
|
1000
|
+
ror rTemp+1
|
1001
|
+
ror rTemp
|
1002
|
+
bld rTemp+7, 7
|
1003
|
+
rotate64_1bit_right:
|
1004
|
+
bst rTemp, 0
|
1005
|
+
ror rTemp+7
|
1006
|
+
ror rTemp+6
|
1007
|
+
ror rTemp+5
|
1008
|
+
ror rTemp+4
|
1009
|
+
ror rTemp+3
|
1010
|
+
ror rTemp+2
|
1011
|
+
ror rTemp+1
|
1012
|
+
ror rTemp
|
1013
|
+
bld rTemp+7, 7
|
1014
|
+
rjmp KeccakRhoPi_RhoBitRotateDone
|
1015
|
+
|
1016
|
+
; Each byte rotate routine must be 9 instructions long.
|
1017
|
+
|
1018
|
+
rotate64_0byte_left:
|
1019
|
+
ld rTempBis+0, Y+
|
1020
|
+
ld rTempBis+1, Y+
|
1021
|
+
ld rTempBis+2, Y+
|
1022
|
+
ld rTempBis+3, Y+
|
1023
|
+
ld rTempBis+4, Y+
|
1024
|
+
ld rTempBis+5, Y+
|
1025
|
+
ld rTempBis+6, Y+
|
1026
|
+
ld rTempBis+7, Y+
|
1027
|
+
rjmp KeccakRhoPi_PiStore
|
1028
|
+
|
1029
|
+
rotate64_1byte_left:
|
1030
|
+
ld rTempBis+1, Y+
|
1031
|
+
ld rTempBis+2, Y+
|
1032
|
+
ld rTempBis+3, Y+
|
1033
|
+
ld rTempBis+4, Y+
|
1034
|
+
ld rTempBis+5, Y+
|
1035
|
+
ld rTempBis+6, Y+
|
1036
|
+
ld rTempBis+7, Y+
|
1037
|
+
ld rTempBis+0, Y+
|
1038
|
+
rjmp KeccakRhoPi_PiStore
|
1039
|
+
|
1040
|
+
rotate64_2byte_left:
|
1041
|
+
ld rTempBis+2, Y+
|
1042
|
+
ld rTempBis+3, Y+
|
1043
|
+
ld rTempBis+4, Y+
|
1044
|
+
ld rTempBis+5, Y+
|
1045
|
+
ld rTempBis+6, Y+
|
1046
|
+
ld rTempBis+7, Y+
|
1047
|
+
ld rTempBis+0, Y+
|
1048
|
+
ld rTempBis+1, Y+
|
1049
|
+
rjmp KeccakRhoPi_PiStore
|
1050
|
+
|
1051
|
+
rotate64_3byte_left:
|
1052
|
+
ld rTempBis+3, Y+
|
1053
|
+
ld rTempBis+4, Y+
|
1054
|
+
ld rTempBis+5, Y+
|
1055
|
+
ld rTempBis+6, Y+
|
1056
|
+
ld rTempBis+7, Y+
|
1057
|
+
ld rTempBis+0, Y+
|
1058
|
+
ld rTempBis+1, Y+
|
1059
|
+
ld rTempBis+2, Y+
|
1060
|
+
rjmp KeccakRhoPi_PiStore
|
1061
|
+
|
1062
|
+
rotate64_4byte_left:
|
1063
|
+
ld rTempBis+4, Y+
|
1064
|
+
ld rTempBis+5, Y+
|
1065
|
+
ld rTempBis+6, Y+
|
1066
|
+
ld rTempBis+7, Y+
|
1067
|
+
ld rTempBis+0, Y+
|
1068
|
+
ld rTempBis+1, Y+
|
1069
|
+
ld rTempBis+2, Y+
|
1070
|
+
ld rTempBis+3, Y+
|
1071
|
+
rjmp KeccakRhoPi_PiStore
|
1072
|
+
|
1073
|
+
rotate64_5byte_left:
|
1074
|
+
ld rTempBis+5, Y+
|
1075
|
+
ld rTempBis+6, Y+
|
1076
|
+
ld rTempBis+7, Y+
|
1077
|
+
ld rTempBis+0, Y+
|
1078
|
+
ld rTempBis+1, Y+
|
1079
|
+
ld rTempBis+2, Y+
|
1080
|
+
ld rTempBis+3, Y+
|
1081
|
+
ld rTempBis+4, Y+
|
1082
|
+
rjmp KeccakRhoPi_PiStore
|
1083
|
+
|
1084
|
+
rotate64_6byte_left:
|
1085
|
+
ld rTempBis+6, Y+
|
1086
|
+
ld rTempBis+7, Y+
|
1087
|
+
ld rTempBis+0, Y+
|
1088
|
+
ld rTempBis+1, Y+
|
1089
|
+
ld rTempBis+2, Y+
|
1090
|
+
ld rTempBis+3, Y+
|
1091
|
+
ld rTempBis+4, Y+
|
1092
|
+
ld rTempBis+5, Y+
|
1093
|
+
rjmp KeccakRhoPi_PiStore
|
1094
|
+
|
1095
|
+
rotate64_7byte_left:
|
1096
|
+
ld rTempBis+7, Y+
|
1097
|
+
ld rTempBis+0, Y+
|
1098
|
+
ld rTempBis+1, Y+
|
1099
|
+
ld rTempBis+2, Y+
|
1100
|
+
ld rTempBis+3, Y+
|
1101
|
+
ld rTempBis+4, Y+
|
1102
|
+
ld rTempBis+5, Y+
|
1103
|
+
ld rTempBis+6, Y+
|
1104
|
+
rjmp KeccakRhoPi_PiStore
|
1105
|
+
|
1106
|
+
#undef rTemp
|
1107
|
+
#undef rTempBis
|
1108
|
+
#undef rTempTer
|
1109
|
+
#undef pRound
|
1110
|
+
|
1111
|
+
#undef rpState
|
1112
|
+
#undef zero
|
1113
|
+
#undef rX
|
1114
|
+
#undef rY
|
1115
|
+
#undef rZ
|
1116
|
+
#undef sp
|