sleeping_kangaroo12 0.0.1 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE.md +27 -0
- data/README.md +48 -53
- data/ext/Rakefile +12 -37
- data/ext/binding/sleeping_kangaroo12.c +1 -16
- data/ext/{xkcp → k12}/Makefile +0 -0
- data/ext/k12/Makefile.build +118 -0
- data/ext/k12/README.markdown +86 -0
- data/ext/k12/lib/ARMv8Asha3/KeccakP-1600-ARMv8Asha3.S +623 -0
- data/ext/k12/lib/ARMv8Asha3/KeccakP-1600-SnP.h +65 -0
- data/ext/k12/lib/ARMv8Asha3/KeccakP-1600-opt64.c +227 -0
- data/ext/{xkcp/lib/low/KeccakP-1600/compact → k12/lib/Inplace32BI}/KeccakP-1600-SnP.h +4 -9
- data/ext/{xkcp/lib/low/KeccakP-1600/plain-32bits-inplace → k12/lib/Inplace32BI}/KeccakP-1600-inplace32BI.c +65 -160
- data/ext/k12/lib/KangarooTwelve.c +332 -0
- data/ext/{xkcp/lib/high/KangarooTwelve → k12/lib}/KangarooTwelve.h +53 -16
- data/ext/{xkcp/lib/low/KeccakP-1600/AVX2 → k12/lib/Optimized64}/KeccakP-1600-AVX2.s +122 -558
- data/ext/k12/lib/Optimized64/KeccakP-1600-AVX512-plainC.c +241 -0
- data/ext/k12/lib/Optimized64/KeccakP-1600-AVX512.s +551 -0
- data/ext/k12/lib/Optimized64/KeccakP-1600-SnP.h +74 -0
- data/ext/{xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-64.macros → k12/lib/Optimized64/KeccakP-1600-opt64.c} +447 -169
- data/ext/k12/lib/Optimized64/KeccakP-1600-runtimeDispatch.c +406 -0
- data/ext/k12/lib/Optimized64/KeccakP-1600-timesN-AVX2.c +419 -0
- data/ext/k12/lib/Optimized64/KeccakP-1600-timesN-AVX512.c +458 -0
- data/ext/k12/lib/Optimized64/KeccakP-1600-timesN-SSSE3.c +438 -0
- data/ext/{xkcp/lib/low/KeccakP-1600/plain-64bits → k12/lib/Plain64}/KeccakP-1600-SnP.h +14 -20
- data/ext/{xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.h → k12/lib/Plain64/KeccakP-1600-plain64.c} +9 -8
- data/ext/{xkcp/lib/common → k12/lib}/align.h +3 -2
- data/ext/{xkcp/lib/common → k12/lib}/brg_endian.h +0 -0
- data/ext/{xkcp → k12}/support/Build/ExpandProducts.xsl +0 -0
- data/ext/{xkcp → k12}/support/Build/ToGlobalMakefile.xsl +0 -0
- data/ext/{xkcp → k12}/support/Build/ToOneTarget.xsl +0 -0
- data/ext/{xkcp → k12}/support/Build/ToTargetConfigFile.xsl +0 -0
- data/ext/{xkcp → k12}/support/Build/ToTargetMakefile.xsl +10 -16
- data/ext/{xkcp → k12}/support/Build/ToVCXProj.xsl +0 -0
- data/lib/sleeping_kangaroo12/binding.rb +2 -1
- data/lib/sleeping_kangaroo12/build/loader.rb +1 -0
- data/lib/sleeping_kangaroo12/build/platform.rb +1 -0
- data/lib/sleeping_kangaroo12/digest.rb +38 -4
- data/lib/sleeping_kangaroo12/version.rb +1 -1
- metadata +48 -288
- data/ext/config/xkcp.build +0 -17
- data/ext/xkcp/LICENSE +0 -1
- data/ext/xkcp/Makefile.build +0 -200
- data/ext/xkcp/README.markdown +0 -296
- data/ext/xkcp/lib/HighLevel.build +0 -143
- data/ext/xkcp/lib/LowLevel.build +0 -757
- data/ext/xkcp/lib/high/KangarooTwelve/KangarooTwelve.c +0 -301
- data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.c +0 -81
- data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.h +0 -125
- data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.c +0 -48
- data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.h +0 -79
- data/ext/xkcp/lib/high/Keccak/KeccakDuplex.c +0 -81
- data/ext/xkcp/lib/high/Keccak/KeccakDuplex.h +0 -73
- data/ext/xkcp/lib/high/Keccak/KeccakDuplex.inc +0 -195
- data/ext/xkcp/lib/high/Keccak/KeccakSponge.c +0 -111
- data/ext/xkcp/lib/high/Keccak/KeccakSponge.h +0 -76
- data/ext/xkcp/lib/high/Keccak/KeccakSponge.inc +0 -314
- data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.c +0 -61
- data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.h +0 -67
- data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.inc +0 -128
- data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.c +0 -93
- data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.h +0 -599
- data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.inc +0 -573
- data/ext/xkcp/lib/high/Ketje/Ketjev2.c +0 -87
- data/ext/xkcp/lib/high/Ketje/Ketjev2.h +0 -88
- data/ext/xkcp/lib/high/Ketje/Ketjev2.inc +0 -274
- data/ext/xkcp/lib/high/Keyak/Keyakv2.c +0 -132
- data/ext/xkcp/lib/high/Keyak/Keyakv2.h +0 -217
- data/ext/xkcp/lib/high/Keyak/Keyakv2.inc +0 -81
- data/ext/xkcp/lib/high/Keyak/Motorist.inc +0 -953
- data/ext/xkcp/lib/high/Kravatte/Kravatte.c +0 -533
- data/ext/xkcp/lib/high/Kravatte/Kravatte.h +0 -115
- data/ext/xkcp/lib/high/Kravatte/KravatteModes.c +0 -557
- data/ext/xkcp/lib/high/Kravatte/KravatteModes.h +0 -247
- data/ext/xkcp/lib/high/Xoodyak/Cyclist.h +0 -66
- data/ext/xkcp/lib/high/Xoodyak/Cyclist.inc +0 -336
- data/ext/xkcp/lib/high/Xoodyak/Xoodyak-parameters.h +0 -26
- data/ext/xkcp/lib/high/Xoodyak/Xoodyak.c +0 -55
- data/ext/xkcp/lib/high/Xoodyak/Xoodyak.h +0 -35
- data/ext/xkcp/lib/high/Xoofff/Xoofff.c +0 -634
- data/ext/xkcp/lib/high/Xoofff/Xoofff.h +0 -147
- data/ext/xkcp/lib/high/Xoofff/XoofffModes.c +0 -483
- data/ext/xkcp/lib/high/Xoofff/XoofffModes.h +0 -241
- data/ext/xkcp/lib/high/common/Phases.h +0 -25
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-SnP.h +0 -41
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-armcc.s +0 -1666
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-gcc.s +0 -1655
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-armcc.s +0 -1268
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-gcc.s +0 -1264
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-armcc.s +0 -1178
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-gcc.s +0 -1175
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-armcc.s +0 -1338
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-gcc.s +0 -1336
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-armcc.s +0 -1343
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-gcc.s +0 -1339
- data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-SnP.h +0 -42
- data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-armcc.s +0 -823
- data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-gcc.s +0 -831
- data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-SnP.h +0 -31
- data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-armv8a-neon.s +0 -540
- data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-SnP.h +0 -42
- data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-compact.s +0 -733
- data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-fast.s +0 -1121
- data/ext/xkcp/lib/low/KeccakP-1600/AVX2/KeccakP-1600-SnP.h +0 -52
- data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-AVX512.c +0 -623
- data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-SnP.h +0 -47
- data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u12/KeccakP-1600-AVX512-config.h +0 -6
- data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u6/KeccakP-1600-AVX512-config.h +0 -6
- data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/ua/KeccakP-1600-AVX512-config.h +0 -6
- data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-AVX512.s +0 -1031
- data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-SnP.h +0 -53
- data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-SnP.h +0 -44
- data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-XOP.c +0 -476
- data/ext/xkcp/lib/low/KeccakP-1600/XOP/u6/KeccakP-1600-XOP-config.h +0 -6
- data/ext/xkcp/lib/low/KeccakP-1600/XOP/ua/KeccakP-1600-XOP-config.h +0 -6
- data/ext/xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-unrolling.macros +0 -305
- data/ext/xkcp/lib/low/KeccakP-1600/compact/KeccakP-1600-compact64.c +0 -420
- data/ext/xkcp/lib/low/KeccakP-1600/plain-32bits-inplace/KeccakP-1600-SnP.h +0 -43
- data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/KeccakP-1600-opt64.c +0 -565
- data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcu6/KeccakP-1600-opt64-config.h +0 -7
- data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua/KeccakP-1600-opt64-config.h +0 -7
- data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua-shld/KeccakP-1600-opt64-config.h +0 -8
- data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/u6/KeccakP-1600-opt64-config.h +0 -6
- data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/ua/KeccakP-1600-opt64-config.h +0 -6
- data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-SnP.h +0 -44
- data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference.h +0 -23
- data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference32BI.c +0 -625
- data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-SnP.h +0 -44
- data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.c +0 -440
- data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-SnP.h +0 -42
- data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas.s +0 -1196
- data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas_Apple.s +0 -1124
- data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-shld-gas.s +0 -1196
- data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-armcc.s +0 -1392
- data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-gcc.s +0 -1394
- data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-times2-SnP.h +0 -42
- data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u12/SIMD512-2-config.h +0 -7
- data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u4/SIMD512-2-config.h +0 -7
- data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512ufull/SIMD512-2-config.h +0 -7
- data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SIMD512.c +0 -850
- data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SnP.h +0 -51
- data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SIMD128.c +0 -957
- data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SnP.h +0 -49
- data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-u2/SIMD128-config.h +0 -8
- data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-ua/SIMD128-config.h +0 -8
- data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-u2/SIMD128-config.h +0 -9
- data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-ua/SIMD128-config.h +0 -9
- data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-SnP.h +0 -45
- data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-on1.c +0 -37
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SIMD256.c +0 -1321
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SnP.h +0 -55
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u12/SIMD256-config.h +0 -7
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u6/SIMD256-config.h +0 -7
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/ua/SIMD256-config.h +0 -7
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u12/SIMD512-4-config.h +0 -7
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u4/SIMD512-4-config.h +0 -7
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512ufull/SIMD512-4-config.h +0 -7
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SIMD512.c +0 -881
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SnP.h +0 -51
- data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-SnP.h +0 -45
- data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-on1.c +0 -37
- data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-SnP.h +0 -45
- data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-on2.c +0 -38
- data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SIMD512.c +0 -1615
- data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SnP.h +0 -57
- data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u12/SIMD512-config.h +0 -7
- data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u4/SIMD512-config.h +0 -7
- data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/ua/SIMD512-config.h +0 -7
- data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-SnP.h +0 -45
- data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-on1.c +0 -37
- data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-SnP.h +0 -45
- data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-on2.c +0 -38
- data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-SnP.h +0 -45
- data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-on4.c +0 -38
- data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-SnP.h +0 -41
- data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-armcc.s +0 -442
- data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-gcc.s +0 -446
- data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-armcc.s +0 -419
- data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-gcc.s +0 -427
- data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-SnP.h +0 -41
- data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-avr8-fast.s +0 -647
- data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-SnP.h +0 -39
- data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-compact.c +0 -190
- data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-SnP.h +0 -43
- data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.c +0 -412
- data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.h +0 -23
- data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-SnP.h +0 -41
- data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-armcc.s +0 -454
- data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-gcc.s +0 -458
- data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-armcc.s +0 -455
- data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-gcc.s +0 -458
- data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-SnP.h +0 -41
- data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-avr8-fast.s +0 -728
- data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-SnP.h +0 -43
- data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.c +0 -414
- data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.h +0 -23
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-SnP.h +0 -42
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-armcc.s +0 -527
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-gcc.s +0 -533
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-armcc.s +0 -528
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-gcc.s +0 -534
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-armcc.s +0 -521
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-gcc.s +0 -527
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-armcc.s +0 -517
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-gcc.s +0 -523
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-armcc.s +0 -550
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-gcc.s +0 -556
- data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-SnP.h +0 -32
- data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-armv8a-neon.s +0 -432
- data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-SnP.h +0 -42
- data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-avr8-fast.s +0 -929
- data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-SnP.h +0 -40
- data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-compact.c +0 -244
- data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-SnP.h +0 -46
- data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32-bis.macros +0 -184
- data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.c +0 -454
- data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.macros +0 -459
- data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling-bis.macros +0 -83
- data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling.macros +0 -88
- data/ext/xkcp/lib/low/KeccakP-800/plain/lcu2/KeccakP-800-opt32-config.h +0 -7
- data/ext/xkcp/lib/low/KeccakP-800/plain/lcua/KeccakP-800-opt32-config.h +0 -7
- data/ext/xkcp/lib/low/KeccakP-800/plain/u2/KeccakP-800-opt32-config.h +0 -7
- data/ext/xkcp/lib/low/KeccakP-800/plain/ua/KeccakP-800-opt32-config.h +0 -7
- data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-SnP.h +0 -44
- data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.c +0 -437
- data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.h +0 -23
- data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/Ket.h +0 -57
- data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-armcc.s +0 -475
- data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-gcc.s +0 -480
- data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-armcc.s +0 -590
- data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-gcc.s +0 -590
- data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.c +0 -126
- data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.h +0 -68
- data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.inc +0 -174
- data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.c +0 -80
- data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.h +0 -68
- data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.inc +0 -142
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-SnP.h +0 -55
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-armcc.s +0 -1086
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-gcc.s +0 -1092
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-armcc.s +0 -721
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-gcc.s +0 -726
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-armcc.s +0 -723
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-gcc.s +0 -729
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-armcc.s +0 -1164
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-gcc.s +0 -1165
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-armcc.s +0 -562
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-gcc.s +0 -563
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-armcc.s +0 -563
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-gcc.s +0 -565
- data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-SnP.h +0 -55
- data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-armcc.s +0 -476
- data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-gcc.s +0 -485
- data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-armcc.s +0 -362
- data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-gcc.s +0 -367
- data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-SnP.h +0 -43
- data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-avr8-u1.s +0 -1341
- data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SIMD512.c +0 -581
- data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SnP.h +0 -58
- data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodyak-full-block-SIMD512.c +0 -332
- data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SIMD128.c +0 -329
- data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SnP.h +0 -53
- data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodyak-full-block-SIMD128.c +0 -355
- data/ext/xkcp/lib/low/Xoodoo/Xoodoo.h +0 -79
- data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-SnP.h +0 -56
- data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-optimized.c +0 -399
- data/ext/xkcp/lib/low/Xoodoo/plain/Xoodyak-full-blocks.c +0 -127
- data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-SnP.h +0 -43
- data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-reference.c +0 -253
- data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SIMD512.c +0 -1044
- data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SnP.h +0 -49
- data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-SnP.h +0 -45
- data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-on1.c +0 -37
- data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-ARMv7A.s +0 -1587
- data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-SnP.h +0 -48
- data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SIMD512.c +0 -1202
- data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SnP.h +0 -48
- data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SIMD128.c +0 -484
- data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SnP.h +0 -44
- data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-SnP.h +0 -45
- data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-on1.c +0 -37
- data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SIMD256.c +0 -939
- data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SnP.h +0 -49
- data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SIMD512.c +0 -1216
- data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SnP.h +0 -48
- data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-SnP.h +0 -45
- data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-on1.c +0 -37
- data/ext/xkcp/lib/low/common/PlSnP-Fallback.inc +0 -290
- data/ext/xkcp/lib/low/common/SnP-Relaned.h +0 -141
- data/ext/xkcp/support/Kernel-PMU/Kernel-pmu.md +0 -133
- data/ext/xkcp/support/Kernel-PMU/Makefile +0 -8
- data/ext/xkcp/support/Kernel-PMU/enable_arm_pmu.c +0 -129
- data/ext/xkcp/support/Kernel-PMU/load-module +0 -1
- data/ext/xkcp/util/KeccakSum/KeccakSum.c +0 -394
- data/ext/xkcp/util/KeccakSum/base64.c +0 -86
- data/ext/xkcp/util/KeccakSum/base64.h +0 -12
@@ -1,850 +0,0 @@
|
|
1
|
-
/*
|
2
|
-
The Keccak-p permutations, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche.
|
3
|
-
|
4
|
-
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
|
5
|
-
|
6
|
-
For more information, feedback or questions, please refer to the Keccak Team website:
|
7
|
-
https://keccak.team/
|
8
|
-
|
9
|
-
To the extent possible under law, the implementer has waived all copyright
|
10
|
-
and related or neighboring rights to the source code in this file.
|
11
|
-
http://creativecommons.org/publicdomain/zero/1.0/
|
12
|
-
|
13
|
-
---
|
14
|
-
|
15
|
-
This file implements Keccak-p[1600]×2 in a PlSnP-compatible way.
|
16
|
-
Please refer to PlSnP-documentation.h for more details.
|
17
|
-
|
18
|
-
This implementation comes with KeccakP-1600-times2-SnP.h in the same folder.
|
19
|
-
Please refer to LowLevel.build for the exact list of other files it must be combined with.
|
20
|
-
*/
|
21
|
-
|
22
|
-
#include <stdio.h>
|
23
|
-
#include <stdlib.h>
|
24
|
-
#include <string.h>
|
25
|
-
#include <stdint.h>
|
26
|
-
#include <smmintrin.h>
|
27
|
-
#include <wmmintrin.h>
|
28
|
-
#include <immintrin.h>
|
29
|
-
#include <emmintrin.h>
|
30
|
-
#include "align.h"
|
31
|
-
#include "KeccakP-1600-times2-SnP.h"
|
32
|
-
#include "SIMD512-2-config.h"
|
33
|
-
|
34
|
-
#include "brg_endian.h"
|
35
|
-
#if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN)
|
36
|
-
#error Expecting a little-endian platform
|
37
|
-
#endif
|
38
|
-
|
39
|
-
/* Comment the define hereunder when compiling for a CPU with AVX-512 SIMD */
|
40
|
-
/*
|
41
|
-
* Warning: This code has only been tested on Haswell (AVX2) with SIMULATE_AVX512 defined,
|
42
|
-
* errors will occur if we did a bad interpretation of the AVX-512 intrinsics'
|
43
|
-
* API or functionality.
|
44
|
-
*/
|
45
|
-
/* #define SIMULATE_AVX512 */
|
46
|
-
|
47
|
-
#if defined(SIMULATE_AVX512)
|
48
|
-
|
49
|
-
typedef struct
|
50
|
-
{
|
51
|
-
uint64_t x[8];
|
52
|
-
} __m512i;
|
53
|
-
|
54
|
-
static __m512i _mm512_xor_si512( __m512i a, __m512i b)
|
55
|
-
{
|
56
|
-
__m512i r;
|
57
|
-
unsigned int i;
|
58
|
-
|
59
|
-
for ( i = 0; i < 8; ++i )
|
60
|
-
r.x[i] = a.x[i] ^ b.x[i];
|
61
|
-
return(r);
|
62
|
-
}
|
63
|
-
|
64
|
-
static __m128i _mm_ternarylogic_epi64(__m128i a, __m128i b, __m128i c, int imm)
|
65
|
-
{
|
66
|
-
|
67
|
-
if (imm == 0x96)
|
68
|
-
return _mm_xor_si128( _mm_xor_si128( a, b ), c );
|
69
|
-
if (imm == 0xD2)
|
70
|
-
return _mm_xor_si128( a, _mm_andnot_si128(b, c) );
|
71
|
-
printf( "_mm_ternarylogic_epi64( a, b, c, %02X) not implemented!\n", imm );
|
72
|
-
exit(1);
|
73
|
-
}
|
74
|
-
|
75
|
-
static __m128i _mm_rol_epi64(__m128i a, int offset)
|
76
|
-
{
|
77
|
-
return _mm_or_si128(_mm_slli_epi64(a, offset), _mm_srli_epi64(a, 64-offset));
|
78
|
-
}
|
79
|
-
|
80
|
-
static __m512i _mm512_i32gather_epi64(__m256i idx, const void *p, int scale)
|
81
|
-
{
|
82
|
-
__m512i r;
|
83
|
-
unsigned int i;
|
84
|
-
uint32_t offset[8];
|
85
|
-
|
86
|
-
_mm256_store_si256( (__m256i*)offset, idx );
|
87
|
-
for ( i = 0; i < 8; ++i )
|
88
|
-
r.x[i] = *(const uint64_t*)((const char*)p + offset[i] * scale);
|
89
|
-
return(r);
|
90
|
-
}
|
91
|
-
|
92
|
-
static void _mm_i32scatter_epi64( void *p, __m128i idx, __m128i value, int scale)
|
93
|
-
{
|
94
|
-
unsigned int i;
|
95
|
-
uint64_t v[2];
|
96
|
-
uint32_t offset[4];
|
97
|
-
|
98
|
-
_mm_store_ps( (float*)offset, (__m128)idx );
|
99
|
-
_mm_store_pd( (double*)v, (__m128d)value );
|
100
|
-
for ( i = 0; i < 2; ++i )
|
101
|
-
*(uint64_t*)((char*)p + offset[i] * scale) = v[i];
|
102
|
-
}
|
103
|
-
|
104
|
-
static void _mm512_i32scatter_epi64( void *p, __m256i idx, __m512i value, int scale)
|
105
|
-
{
|
106
|
-
unsigned int i;
|
107
|
-
uint32_t offset[8];
|
108
|
-
|
109
|
-
_mm256_store_si256( (__m256i*)offset, idx );
|
110
|
-
for ( i = 0; i < 8; ++i )
|
111
|
-
*(uint64_t*)((char*)p + offset[i] * scale) = value.x[i];
|
112
|
-
}
|
113
|
-
|
114
|
-
#endif
|
115
|
-
|
116
|
-
typedef __m128i V128;
|
117
|
-
typedef __m256i V256;
|
118
|
-
typedef __m512i V512;
|
119
|
-
|
120
|
-
#if defined(KeccakP1600times2_useAVX512)
|
121
|
-
|
122
|
-
#define XOR(a,b) _mm_xor_si128(a,b)
|
123
|
-
#define XOR3(a,b,c) _mm_ternarylogic_epi64(a,b,c,0x96)
|
124
|
-
#define XOR5(a,b,c,d,e) XOR3(XOR3(a,b,c),d,e)
|
125
|
-
#define XOR512(a,b) _mm512_xor_si512(a,b)
|
126
|
-
#define ROL(a,offset) _mm_rol_epi64(a,offset)
|
127
|
-
#define Chi(a,b,c) _mm_ternarylogic_epi64(a,b,c,0xD2)
|
128
|
-
|
129
|
-
#define CONST128_64(a) _mm_set1_epi64x(a)
|
130
|
-
#define LOAD4_32(a,b,c,d) _mm_set_epi32((uint64_t)(a), (uint32_t)(b), (uint32_t)(c), (uint32_t)(d))
|
131
|
-
#define LOAD8_32(a,b,c,d,e,f,g,h) _mm256_set_epi32((uint64_t)(a), (uint32_t)(b), (uint32_t)(c), (uint32_t)(d), (uint32_t)(e), (uint32_t)(f), (uint32_t)(g), (uint32_t)(h))
|
132
|
-
#define LOAD_GATHER2_64(idx,p) _mm_i32gather_epi64( (const void*)(p), idx, 8)
|
133
|
-
#define LOAD_GATHER8_64(idx,p) _mm512_i32gather_epi64( idx, (const void*)(p), 8)
|
134
|
-
#define STORE_SCATTER2_64(p,idx, v) _mm_i32scatter_epi64( (void*)(p), idx, v, 8)
|
135
|
-
#define STORE_SCATTER8_64(p,idx, v) _mm512_i32scatter_epi64( (void*)(p), idx, v, 8)
|
136
|
-
|
137
|
-
#endif
|
138
|
-
|
139
|
-
#define laneIndex(instanceIndex, lanePosition) ((lanePosition)*2 + instanceIndex)
|
140
|
-
#define SnP_laneLengthInBytes 8
|
141
|
-
|
142
|
-
void KeccakP1600times2_InitializeAll(void *states)
|
143
|
-
{
|
144
|
-
memset(states, 0, KeccakP1600times2_statesSizeInBytes);
|
145
|
-
}
|
146
|
-
|
147
|
-
void KeccakP1600times2_AddBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length)
|
148
|
-
{
|
149
|
-
unsigned int sizeLeft = length;
|
150
|
-
unsigned int lanePosition = offset/SnP_laneLengthInBytes;
|
151
|
-
unsigned int offsetInLane = offset%SnP_laneLengthInBytes;
|
152
|
-
const unsigned char *curData = data;
|
153
|
-
uint64_t *statesAsLanes = states;
|
154
|
-
|
155
|
-
if ((sizeLeft > 0) && (offsetInLane != 0)) {
|
156
|
-
unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
|
157
|
-
uint64_t lane = 0;
|
158
|
-
if (bytesInLane > sizeLeft)
|
159
|
-
bytesInLane = sizeLeft;
|
160
|
-
memcpy((unsigned char*)&lane + offsetInLane, curData, bytesInLane);
|
161
|
-
statesAsLanes[laneIndex(instanceIndex, lanePosition)] ^= lane;
|
162
|
-
sizeLeft -= bytesInLane;
|
163
|
-
lanePosition++;
|
164
|
-
curData += bytesInLane;
|
165
|
-
}
|
166
|
-
|
167
|
-
while(sizeLeft >= SnP_laneLengthInBytes) {
|
168
|
-
uint64_t lane = *((const uint64_t*)curData);
|
169
|
-
statesAsLanes[laneIndex(instanceIndex, lanePosition)] ^= lane;
|
170
|
-
sizeLeft -= SnP_laneLengthInBytes;
|
171
|
-
lanePosition++;
|
172
|
-
curData += SnP_laneLengthInBytes;
|
173
|
-
}
|
174
|
-
|
175
|
-
if (sizeLeft > 0) {
|
176
|
-
uint64_t lane = 0;
|
177
|
-
memcpy(&lane, curData, sizeLeft);
|
178
|
-
statesAsLanes[laneIndex(instanceIndex, lanePosition)] ^= lane;
|
179
|
-
}
|
180
|
-
}
|
181
|
-
|
182
|
-
void KeccakP1600times2_AddLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
|
183
|
-
{
|
184
|
-
V128 *stateAsLanes128 = states;
|
185
|
-
V512 *stateAsLanes512 = states;
|
186
|
-
const uint64_t *dataAsLanes = (const uint64_t *)data;
|
187
|
-
unsigned int i;
|
188
|
-
V128 index128 = LOAD4_32(0, 0, 1*laneOffset, 0*laneOffset);
|
189
|
-
V256 index512 = LOAD8_32(1*laneOffset+3, 0*laneOffset+3, 1*laneOffset+2, 0*laneOffset+2, 1*laneOffset+1, 0*laneOffset+1, 1*laneOffset, 0*laneOffset);
|
190
|
-
|
191
|
-
#define Add_In1( argIndex ) stateAsLanes128[argIndex] = XOR(stateAsLanes128[argIndex], LOAD_GATHER2_64(index128, dataAsLanes+argIndex))
|
192
|
-
#define Add_In4( argIndex ) stateAsLanes512[argIndex/4] = XOR512(stateAsLanes512[argIndex/4], LOAD_GATHER8_64(index512, dataAsLanes+argIndex))
|
193
|
-
if ( laneCount >= 16 ) {
|
194
|
-
Add_In4( 0 );
|
195
|
-
Add_In4( 4 );
|
196
|
-
Add_In4( 8 );
|
197
|
-
Add_In4( 12 );
|
198
|
-
if ( laneCount >= 20 ) {
|
199
|
-
Add_In4( 16 );
|
200
|
-
for(i=20; i<laneCount; i++)
|
201
|
-
Add_In1( i );
|
202
|
-
}
|
203
|
-
else {
|
204
|
-
for(i=16; i<laneCount; i++)
|
205
|
-
Add_In1( i );
|
206
|
-
}
|
207
|
-
}
|
208
|
-
else {
|
209
|
-
for(i=0; i<laneCount; i++)
|
210
|
-
Add_In1( i );
|
211
|
-
}
|
212
|
-
#undef Add_In1
|
213
|
-
#undef Add_In4
|
214
|
-
}
|
215
|
-
|
216
|
-
void KeccakP1600times2_OverwriteBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length)
|
217
|
-
{
|
218
|
-
unsigned int sizeLeft = length;
|
219
|
-
unsigned int lanePosition = offset/SnP_laneLengthInBytes;
|
220
|
-
unsigned int offsetInLane = offset%SnP_laneLengthInBytes;
|
221
|
-
const unsigned char *curData = data;
|
222
|
-
uint64_t *statesAsLanes = states;
|
223
|
-
|
224
|
-
if ((sizeLeft > 0) && (offsetInLane != 0)) {
|
225
|
-
unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
|
226
|
-
if (bytesInLane > sizeLeft)
|
227
|
-
bytesInLane = sizeLeft;
|
228
|
-
memcpy( ((unsigned char *)&statesAsLanes[laneIndex(instanceIndex, lanePosition)]) + offsetInLane, curData, bytesInLane);
|
229
|
-
sizeLeft -= bytesInLane;
|
230
|
-
lanePosition++;
|
231
|
-
curData += bytesInLane;
|
232
|
-
}
|
233
|
-
|
234
|
-
while(sizeLeft >= SnP_laneLengthInBytes) {
|
235
|
-
uint64_t lane = *((const uint64_t*)curData);
|
236
|
-
statesAsLanes[laneIndex(instanceIndex, lanePosition)] = lane;
|
237
|
-
sizeLeft -= SnP_laneLengthInBytes;
|
238
|
-
lanePosition++;
|
239
|
-
curData += SnP_laneLengthInBytes;
|
240
|
-
}
|
241
|
-
|
242
|
-
if (sizeLeft > 0) {
|
243
|
-
memcpy(&statesAsLanes[laneIndex(instanceIndex, lanePosition)], curData, sizeLeft);
|
244
|
-
}
|
245
|
-
}
|
246
|
-
|
247
|
-
void KeccakP1600times2_OverwriteLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
|
248
|
-
{
|
249
|
-
V128 *stateAsLanes128 = states;
|
250
|
-
V512 *stateAsLanes512 = states;
|
251
|
-
const uint64_t *dataAsLanes = (const uint64_t *)data;
|
252
|
-
unsigned int i;
|
253
|
-
V128 index128 = LOAD4_32(0, 0, 1*laneOffset, 0*laneOffset);
|
254
|
-
V256 index512 = LOAD8_32(1*laneOffset+3, 0*laneOffset+3, 1*laneOffset+2, 0*laneOffset+2, 1*laneOffset+1, 0*laneOffset+1, 1*laneOffset, 0*laneOffset);
|
255
|
-
|
256
|
-
#define OverWr1( argIndex ) stateAsLanes128[argIndex] = LOAD_GATHER2_64(index128, dataAsLanes+argIndex)
|
257
|
-
#define OverWr4( argIndex ) stateAsLanes512[argIndex/4] = LOAD_GATHER8_64(index512, dataAsLanes+argIndex)
|
258
|
-
if ( laneCount >= 16 ) {
|
259
|
-
OverWr4( 0 );
|
260
|
-
OverWr4( 4 );
|
261
|
-
OverWr4( 8 );
|
262
|
-
OverWr4( 12 );
|
263
|
-
if ( laneCount >= 20 ) {
|
264
|
-
OverWr4( 16 );
|
265
|
-
for(i=20; i<laneCount; i++)
|
266
|
-
OverWr1( i );
|
267
|
-
}
|
268
|
-
else {
|
269
|
-
for(i=16; i<laneCount; i++)
|
270
|
-
OverWr1( i );
|
271
|
-
}
|
272
|
-
}
|
273
|
-
else {
|
274
|
-
for(i=0; i<laneCount; i++)
|
275
|
-
OverWr1( i );
|
276
|
-
}
|
277
|
-
#undef OverWr1
|
278
|
-
#undef OverWr4
|
279
|
-
}
|
280
|
-
|
281
|
-
void KeccakP1600times2_OverwriteWithZeroes(void *states, unsigned int instanceIndex, unsigned int byteCount)
|
282
|
-
{
|
283
|
-
unsigned int sizeLeft = byteCount;
|
284
|
-
unsigned int lanePosition = 0;
|
285
|
-
uint64_t *statesAsLanes = states;
|
286
|
-
|
287
|
-
while(sizeLeft >= SnP_laneLengthInBytes) {
|
288
|
-
statesAsLanes[laneIndex(instanceIndex, lanePosition)] = 0;
|
289
|
-
sizeLeft -= SnP_laneLengthInBytes;
|
290
|
-
lanePosition++;
|
291
|
-
}
|
292
|
-
|
293
|
-
if (sizeLeft > 0) {
|
294
|
-
memset(&statesAsLanes[laneIndex(instanceIndex, lanePosition)], 0, sizeLeft);
|
295
|
-
}
|
296
|
-
}
|
297
|
-
|
298
|
-
void KeccakP1600times2_ExtractBytes(const void *states, unsigned int instanceIndex, unsigned char *data, unsigned int offset, unsigned int length)
|
299
|
-
{
|
300
|
-
unsigned int sizeLeft = length;
|
301
|
-
unsigned int lanePosition = offset/SnP_laneLengthInBytes;
|
302
|
-
unsigned int offsetInLane = offset%SnP_laneLengthInBytes;
|
303
|
-
unsigned char *curData = data;
|
304
|
-
const uint64_t *statesAsLanes = states;
|
305
|
-
|
306
|
-
if ((sizeLeft > 0) && (offsetInLane != 0)) {
|
307
|
-
unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
|
308
|
-
if (bytesInLane > sizeLeft)
|
309
|
-
bytesInLane = sizeLeft;
|
310
|
-
memcpy( curData, ((unsigned char *)&statesAsLanes[laneIndex(instanceIndex, lanePosition)]) + offsetInLane, bytesInLane);
|
311
|
-
sizeLeft -= bytesInLane;
|
312
|
-
lanePosition++;
|
313
|
-
curData += bytesInLane;
|
314
|
-
}
|
315
|
-
|
316
|
-
while(sizeLeft >= SnP_laneLengthInBytes) {
|
317
|
-
*(uint64_t*)curData = statesAsLanes[laneIndex(instanceIndex, lanePosition)];
|
318
|
-
sizeLeft -= SnP_laneLengthInBytes;
|
319
|
-
lanePosition++;
|
320
|
-
curData += SnP_laneLengthInBytes;
|
321
|
-
}
|
322
|
-
|
323
|
-
if (sizeLeft > 0) {
|
324
|
-
memcpy( curData, &statesAsLanes[laneIndex(instanceIndex, lanePosition)], sizeLeft);
|
325
|
-
}
|
326
|
-
}
|
327
|
-
|
328
|
-
void KeccakP1600times2_ExtractLanesAll(const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
|
329
|
-
{
|
330
|
-
const V128 *stateAsLanes128 = states;
|
331
|
-
const V512 *stateAsLanes512 = states;
|
332
|
-
uint64_t *dataAsLanes = (uint64_t *)data;
|
333
|
-
unsigned int i;
|
334
|
-
V128 index128 = LOAD4_32(0, 0, 1*laneOffset, 0*laneOffset);
|
335
|
-
V256 index512 = LOAD8_32(1*laneOffset+3, 0*laneOffset+3, 1*laneOffset+2, 0*laneOffset+2, 1*laneOffset+1, 0*laneOffset+1, 1*laneOffset, 0*laneOffset);
|
336
|
-
|
337
|
-
#define Extr1( argIndex ) STORE_SCATTER2_64(dataAsLanes+argIndex, index128, stateAsLanes128[argIndex])
|
338
|
-
#define Extr4( argIndex ) STORE_SCATTER8_64(dataAsLanes+argIndex, index512, stateAsLanes512[argIndex/4])
|
339
|
-
if ( laneCount >= 16 ) {
|
340
|
-
Extr4( 0 );
|
341
|
-
Extr4( 4 );
|
342
|
-
Extr4( 8 );
|
343
|
-
Extr4( 12 );
|
344
|
-
if ( laneCount >= 20 ) {
|
345
|
-
Extr4( 16 );
|
346
|
-
for(i=20; i<laneCount; i++)
|
347
|
-
Extr1( i );
|
348
|
-
}
|
349
|
-
else {
|
350
|
-
for(i=16; i<laneCount; i++)
|
351
|
-
Extr1( i );
|
352
|
-
}
|
353
|
-
}
|
354
|
-
else {
|
355
|
-
for(i=0; i<laneCount; i++)
|
356
|
-
Extr1( i );
|
357
|
-
}
|
358
|
-
#undef Extr1
|
359
|
-
#undef Extr4
|
360
|
-
}
|
361
|
-
|
362
|
-
void KeccakP1600times2_ExtractAndAddBytes(const void *states, unsigned int instanceIndex, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
|
363
|
-
{
|
364
|
-
unsigned int sizeLeft = length;
|
365
|
-
unsigned int lanePosition = offset/SnP_laneLengthInBytes;
|
366
|
-
unsigned int offsetInLane = offset%SnP_laneLengthInBytes;
|
367
|
-
const unsigned char *curInput = input;
|
368
|
-
unsigned char *curOutput = output;
|
369
|
-
const uint64_t *statesAsLanes = states;
|
370
|
-
|
371
|
-
if ((sizeLeft > 0) && (offsetInLane != 0)) {
|
372
|
-
unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
|
373
|
-
uint64_t lane = statesAsLanes[laneIndex(instanceIndex, lanePosition)] >> (8 * offsetInLane);
|
374
|
-
if (bytesInLane > sizeLeft)
|
375
|
-
bytesInLane = sizeLeft;
|
376
|
-
sizeLeft -= bytesInLane;
|
377
|
-
do {
|
378
|
-
*(curOutput++) = *(curInput++) ^ (unsigned char)lane;
|
379
|
-
lane >>= 8;
|
380
|
-
} while ( --bytesInLane != 0);
|
381
|
-
lanePosition++;
|
382
|
-
}
|
383
|
-
|
384
|
-
while(sizeLeft >= SnP_laneLengthInBytes) {
|
385
|
-
*((uint64_t*)curOutput) = *((uint64_t*)curInput) ^ statesAsLanes[laneIndex(instanceIndex, lanePosition)];
|
386
|
-
sizeLeft -= SnP_laneLengthInBytes;
|
387
|
-
lanePosition++;
|
388
|
-
curInput += SnP_laneLengthInBytes;
|
389
|
-
curOutput += SnP_laneLengthInBytes;
|
390
|
-
}
|
391
|
-
|
392
|
-
if (sizeLeft != 0) {
|
393
|
-
uint64_t lane = statesAsLanes[laneIndex(instanceIndex, lanePosition)];
|
394
|
-
do {
|
395
|
-
*(curOutput++) = *(curInput++) ^ (unsigned char)lane;
|
396
|
-
lane >>= 8;
|
397
|
-
} while ( --sizeLeft != 0);
|
398
|
-
}
|
399
|
-
}
|
400
|
-
|
401
|
-
void KeccakP1600times2_ExtractAndAddLanesAll(const void *states, const unsigned char *input, unsigned char *output, unsigned int laneCount, unsigned int laneOffset)
|
402
|
-
{
|
403
|
-
const V128 *stateAsLanes128 = states;
|
404
|
-
const V512 *stateAsLanes512 = states;
|
405
|
-
const uint64_t *inAsLanes = (const uint64_t *)input;
|
406
|
-
uint64_t *outAsLanes = (uint64_t *)output;
|
407
|
-
unsigned int i;
|
408
|
-
V128 index128 = LOAD4_32(0, 0, 1*laneOffset, 0*laneOffset);
|
409
|
-
V256 index512 = LOAD8_32(1*laneOffset+3, 0*laneOffset+3, 1*laneOffset+2, 0*laneOffset+2, 1*laneOffset+1, 0*laneOffset+1, 1*laneOffset, 0*laneOffset);
|
410
|
-
|
411
|
-
#define ExtrAdd1( argIndex ) STORE_SCATTER2_64(outAsLanes+argIndex, index128, XOR(stateAsLanes128[argIndex], LOAD_GATHER2_64(index128, inAsLanes+argIndex)))
|
412
|
-
#define ExtrAdd4( argIndex ) STORE_SCATTER8_64(outAsLanes+argIndex, index512, XOR512(stateAsLanes512[argIndex/4], LOAD_GATHER8_64(index512, inAsLanes+argIndex)))
|
413
|
-
if ( laneCount >= 16 ) {
|
414
|
-
ExtrAdd4( 0 );
|
415
|
-
ExtrAdd4( 4 );
|
416
|
-
ExtrAdd4( 8 );
|
417
|
-
ExtrAdd4( 12 );
|
418
|
-
if ( laneCount >= 20 ) {
|
419
|
-
ExtrAdd4( 16 );
|
420
|
-
for(i=20; i<laneCount; i++)
|
421
|
-
ExtrAdd1( i );
|
422
|
-
}
|
423
|
-
else {
|
424
|
-
for(i=16; i<laneCount; i++)
|
425
|
-
ExtrAdd1( i );
|
426
|
-
}
|
427
|
-
}
|
428
|
-
else {
|
429
|
-
for(i=0; i<laneCount; i++)
|
430
|
-
ExtrAdd1( i );
|
431
|
-
}
|
432
|
-
#undef ExtrAdd1
|
433
|
-
#undef ExtrAdd4
|
434
|
-
|
435
|
-
}
|
436
|
-
|
437
|
-
static ALIGN(KeccakP1600times2_statesAlignment) const uint64_t KeccakP1600RoundConstants[24] = {
|
438
|
-
0x0000000000000001ULL,
|
439
|
-
0x0000000000008082ULL,
|
440
|
-
0x800000000000808aULL,
|
441
|
-
0x8000000080008000ULL,
|
442
|
-
0x000000000000808bULL,
|
443
|
-
0x0000000080000001ULL,
|
444
|
-
0x8000000080008081ULL,
|
445
|
-
0x8000000000008009ULL,
|
446
|
-
0x000000000000008aULL,
|
447
|
-
0x0000000000000088ULL,
|
448
|
-
0x0000000080008009ULL,
|
449
|
-
0x000000008000000aULL,
|
450
|
-
0x000000008000808bULL,
|
451
|
-
0x800000000000008bULL,
|
452
|
-
0x8000000000008089ULL,
|
453
|
-
0x8000000000008003ULL,
|
454
|
-
0x8000000000008002ULL,
|
455
|
-
0x8000000000000080ULL,
|
456
|
-
0x000000000000800aULL,
|
457
|
-
0x800000008000000aULL,
|
458
|
-
0x8000000080008081ULL,
|
459
|
-
0x8000000000008080ULL,
|
460
|
-
0x0000000080000001ULL,
|
461
|
-
0x8000000080008008ULL};
|
462
|
-
|
463
|
-
#define KeccakP_DeclareVars \
|
464
|
-
V128 _Ba, _Be, _Bi, _Bo, _Bu; \
|
465
|
-
V128 _Da, _De, _Di, _Do, _Du; \
|
466
|
-
V128 _ba, _be, _bi, _bo, _bu; \
|
467
|
-
V128 _ga, _ge, _gi, _go, _gu; \
|
468
|
-
V128 _ka, _ke, _ki, _ko, _ku; \
|
469
|
-
V128 _ma, _me, _mi, _mo, _mu; \
|
470
|
-
V128 _sa, _se, _si, _so, _su
|
471
|
-
|
472
|
-
#define KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Bb1, _Bb2, _Bb3, _Bb4, _Bb5, _Rr1, _Rr2, _Rr3, _Rr4, _Rr5 ) \
|
473
|
-
_Bb1 = XOR(_L1, _Da); \
|
474
|
-
_Bb2 = XOR(_L2, _De); \
|
475
|
-
_Bb3 = XOR(_L3, _Di); \
|
476
|
-
_Bb4 = XOR(_L4, _Do); \
|
477
|
-
_Bb5 = XOR(_L5, _Du); \
|
478
|
-
if (_Rr1 != 0) _Bb1 = ROL(_Bb1, _Rr1); \
|
479
|
-
_Bb2 = ROL(_Bb2, _Rr2); \
|
480
|
-
_Bb3 = ROL(_Bb3, _Rr3); \
|
481
|
-
_Bb4 = ROL(_Bb4, _Rr4); \
|
482
|
-
_Bb5 = ROL(_Bb5, _Rr5); \
|
483
|
-
_L1 = Chi( _Ba, _Be, _Bi); \
|
484
|
-
_L2 = Chi( _Be, _Bi, _Bo); \
|
485
|
-
_L3 = Chi( _Bi, _Bo, _Bu); \
|
486
|
-
_L4 = Chi( _Bo, _Bu, _Ba); \
|
487
|
-
_L5 = Chi( _Bu, _Ba, _Be);
|
488
|
-
|
489
|
-
#define KeccakP_ThetaRhoPiChiIota0( _L1, _L2, _L3, _L4, _L5, _rc ) \
|
490
|
-
_Ba = XOR5( _ba, _ga, _ka, _ma, _sa ); /* Theta effect */ \
|
491
|
-
_Be = XOR5( _be, _ge, _ke, _me, _se ); \
|
492
|
-
_Bi = XOR5( _bi, _gi, _ki, _mi, _si ); \
|
493
|
-
_Bo = XOR5( _bo, _go, _ko, _mo, _so ); \
|
494
|
-
_Bu = XOR5( _bu, _gu, _ku, _mu, _su ); \
|
495
|
-
_Da = ROL( _Be, 1 ); \
|
496
|
-
_De = ROL( _Bi, 1 ); \
|
497
|
-
_Di = ROL( _Bo, 1 ); \
|
498
|
-
_Do = ROL( _Bu, 1 ); \
|
499
|
-
_Du = ROL( _Ba, 1 ); \
|
500
|
-
_Da = XOR( _Da, _Bu ); \
|
501
|
-
_De = XOR( _De, _Ba ); \
|
502
|
-
_Di = XOR( _Di, _Be ); \
|
503
|
-
_Do = XOR( _Do, _Bi ); \
|
504
|
-
_Du = XOR( _Du, _Bo ); \
|
505
|
-
KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Ba, _Be, _Bi, _Bo, _Bu, 0, 44, 43, 21, 14 ); \
|
506
|
-
_L1 = XOR(_L1, _rc) /* Iota */
|
507
|
-
|
508
|
-
#define KeccakP_ThetaRhoPiChi1( _L1, _L2, _L3, _L4, _L5 ) \
|
509
|
-
KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Bi, _Bo, _Bu, _Ba, _Be, 3, 45, 61, 28, 20 )
|
510
|
-
|
511
|
-
#define KeccakP_ThetaRhoPiChi2( _L1, _L2, _L3, _L4, _L5 ) \
|
512
|
-
KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Bu, _Ba, _Be, _Bi, _Bo, 18, 1, 6, 25, 8 )
|
513
|
-
|
514
|
-
#define KeccakP_ThetaRhoPiChi3( _L1, _L2, _L3, _L4, _L5 ) \
|
515
|
-
KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Be, _Bi, _Bo, _Bu, _Ba, 36, 10, 15, 56, 27 )
|
516
|
-
|
517
|
-
#define KeccakP_ThetaRhoPiChi4( _L1, _L2, _L3, _L4, _L5 ) \
|
518
|
-
KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Bo, _Bu, _Ba, _Be, _Bi, 41, 2, 62, 55, 39 )
|
519
|
-
|
520
|
-
#define KeccakP_4rounds( i ) \
|
521
|
-
KeccakP_ThetaRhoPiChiIota0(_ba, _ge, _ki, _mo, _su, CONST128_64(KeccakP1600RoundConstants[i]) ); \
|
522
|
-
KeccakP_ThetaRhoPiChi1( _ka, _me, _si, _bo, _gu ); \
|
523
|
-
KeccakP_ThetaRhoPiChi2( _sa, _be, _gi, _ko, _mu ); \
|
524
|
-
KeccakP_ThetaRhoPiChi3( _ga, _ke, _mi, _so, _bu ); \
|
525
|
-
KeccakP_ThetaRhoPiChi4( _ma, _se, _bi, _go, _ku ); \
|
526
|
-
\
|
527
|
-
KeccakP_ThetaRhoPiChiIota0(_ba, _me, _gi, _so, _ku, CONST128_64(KeccakP1600RoundConstants[i+1]) ); \
|
528
|
-
KeccakP_ThetaRhoPiChi1( _sa, _ke, _bi, _mo, _gu ); \
|
529
|
-
KeccakP_ThetaRhoPiChi2( _ma, _ge, _si, _ko, _bu ); \
|
530
|
-
KeccakP_ThetaRhoPiChi3( _ka, _be, _mi, _go, _su ); \
|
531
|
-
KeccakP_ThetaRhoPiChi4( _ga, _se, _ki, _bo, _mu ); \
|
532
|
-
\
|
533
|
-
KeccakP_ThetaRhoPiChiIota0(_ba, _ke, _si, _go, _mu, CONST128_64(KeccakP1600RoundConstants[i+2]) ); \
|
534
|
-
KeccakP_ThetaRhoPiChi1( _ma, _be, _ki, _so, _gu ); \
|
535
|
-
KeccakP_ThetaRhoPiChi2( _ga, _me, _bi, _ko, _su ); \
|
536
|
-
KeccakP_ThetaRhoPiChi3( _sa, _ge, _mi, _bo, _ku ); \
|
537
|
-
KeccakP_ThetaRhoPiChi4( _ka, _se, _gi, _mo, _bu ); \
|
538
|
-
\
|
539
|
-
KeccakP_ThetaRhoPiChiIota0(_ba, _be, _bi, _bo, _bu, CONST128_64(KeccakP1600RoundConstants[i+3]) ); \
|
540
|
-
KeccakP_ThetaRhoPiChi1( _ga, _ge, _gi, _go, _gu ); \
|
541
|
-
KeccakP_ThetaRhoPiChi2( _ka, _ke, _ki, _ko, _ku ); \
|
542
|
-
KeccakP_ThetaRhoPiChi3( _ma, _me, _mi, _mo, _mu ); \
|
543
|
-
KeccakP_ThetaRhoPiChi4( _sa, _se, _si, _so, _su )
|
544
|
-
|
545
|
-
#define KeccakP_2rounds( i ) \
|
546
|
-
KeccakP_ThetaRhoPiChiIota0(_ba, _ke, _si, _go, _mu, CONST128_64(KeccakP1600RoundConstants[i]) ); \
|
547
|
-
KeccakP_ThetaRhoPiChi1( _ma, _be, _ki, _so, _gu ); \
|
548
|
-
KeccakP_ThetaRhoPiChi2( _ga, _me, _bi, _ko, _su ); \
|
549
|
-
KeccakP_ThetaRhoPiChi3( _sa, _ge, _mi, _bo, _ku ); \
|
550
|
-
KeccakP_ThetaRhoPiChi4( _ka, _se, _gi, _mo, _bu ); \
|
551
|
-
\
|
552
|
-
KeccakP_ThetaRhoPiChiIota0(_ba, _be, _bi, _bo, _bu, CONST128_64(KeccakP1600RoundConstants[i+1]) ); \
|
553
|
-
KeccakP_ThetaRhoPiChi1( _ga, _ge, _gi, _go, _gu ); \
|
554
|
-
KeccakP_ThetaRhoPiChi2( _ka, _ke, _ki, _ko, _ku ); \
|
555
|
-
KeccakP_ThetaRhoPiChi3( _ma, _me, _mi, _mo, _mu ); \
|
556
|
-
KeccakP_ThetaRhoPiChi4( _sa, _se, _si, _so, _su )
|
557
|
-
|
558
|
-
#ifdef KeccakP1600times2_fullUnrolling
|
559
|
-
|
560
|
-
#define rounds12 \
|
561
|
-
KeccakP_4rounds( 12 ); \
|
562
|
-
KeccakP_4rounds( 16 ); \
|
563
|
-
KeccakP_4rounds( 20 )
|
564
|
-
|
565
|
-
#define rounds24 \
|
566
|
-
KeccakP_4rounds( 0 ); \
|
567
|
-
KeccakP_4rounds( 4 ); \
|
568
|
-
KeccakP_4rounds( 8 ); \
|
569
|
-
KeccakP_4rounds( 12 ); \
|
570
|
-
KeccakP_4rounds( 16 ); \
|
571
|
-
KeccakP_4rounds( 20 )
|
572
|
-
|
573
|
-
#elif (KeccakP1600times2_unrolling == 4)
|
574
|
-
|
575
|
-
#define rounds12 \
|
576
|
-
i = 12; \
|
577
|
-
do { \
|
578
|
-
KeccakP_4rounds( i ); \
|
579
|
-
} while( (i += 4) < 24 )
|
580
|
-
|
581
|
-
#define rounds24 \
|
582
|
-
i = 0; \
|
583
|
-
do { \
|
584
|
-
KeccakP_4rounds( i ); \
|
585
|
-
} while( (i += 4) < 24 )
|
586
|
-
|
587
|
-
#elif (KeccakP1600times2_unrolling == 12)
|
588
|
-
|
589
|
-
#define rounds12 \
|
590
|
-
KeccakP_4rounds( 12 ); \
|
591
|
-
KeccakP_4rounds( 16 ); \
|
592
|
-
KeccakP_4rounds( 20 )
|
593
|
-
|
594
|
-
#define rounds24 \
|
595
|
-
i = 0; \
|
596
|
-
do { \
|
597
|
-
KeccakP_4rounds( i ); \
|
598
|
-
KeccakP_4rounds( i+4 ); \
|
599
|
-
KeccakP_4rounds( i+8 ); \
|
600
|
-
} while( (i += 12) < 24 )
|
601
|
-
|
602
|
-
#else
|
603
|
-
#error "Unrolling is not correctly specified!"
|
604
|
-
#endif
|
605
|
-
|
606
|
-
#define copyFromState2rounds(pState) \
|
607
|
-
_ba = pState[ 0]; \
|
608
|
-
_be = pState[16]; /* me */ \
|
609
|
-
_bi = pState[ 7]; /* gi */ \
|
610
|
-
_bo = pState[23]; /* so */ \
|
611
|
-
_bu = pState[14]; /* ku */ \
|
612
|
-
_ga = pState[20]; /* sa */ \
|
613
|
-
_ge = pState[11]; /* ke */ \
|
614
|
-
_gi = pState[ 2]; /* bi */ \
|
615
|
-
_go = pState[18]; /* mo */ \
|
616
|
-
_gu = pState[ 9]; \
|
617
|
-
_ka = pState[15]; /* ma */ \
|
618
|
-
_ke = pState[ 6]; /* ge */ \
|
619
|
-
_ki = pState[22]; /* si */ \
|
620
|
-
_ko = pState[13]; \
|
621
|
-
_ku = pState[ 4]; /* bu */ \
|
622
|
-
_ma = pState[10]; /* ka */ \
|
623
|
-
_me = pState[ 1]; /* be */ \
|
624
|
-
_mi = pState[17]; \
|
625
|
-
_mo = pState[ 8]; /* go */ \
|
626
|
-
_mu = pState[24]; /* su */ \
|
627
|
-
_sa = pState[ 5]; /* ga */ \
|
628
|
-
_se = pState[21]; \
|
629
|
-
_si = pState[12]; /* ki */ \
|
630
|
-
_so = pState[ 3]; /* bo */ \
|
631
|
-
_su = pState[19] /* mu */
|
632
|
-
|
633
|
-
#define copyFromState(pState) \
|
634
|
-
_ba = pState[ 0]; \
|
635
|
-
_be = pState[ 1]; \
|
636
|
-
_bi = pState[ 2]; \
|
637
|
-
_bo = pState[ 3]; \
|
638
|
-
_bu = pState[ 4]; \
|
639
|
-
_ga = pState[ 5]; \
|
640
|
-
_ge = pState[ 6]; \
|
641
|
-
_gi = pState[ 7]; \
|
642
|
-
_go = pState[ 8]; \
|
643
|
-
_gu = pState[ 9]; \
|
644
|
-
_ka = pState[10]; \
|
645
|
-
_ke = pState[11]; \
|
646
|
-
_ki = pState[12]; \
|
647
|
-
_ko = pState[13]; \
|
648
|
-
_ku = pState[14]; \
|
649
|
-
_ma = pState[15]; \
|
650
|
-
_me = pState[16]; \
|
651
|
-
_mi = pState[17]; \
|
652
|
-
_mo = pState[18]; \
|
653
|
-
_mu = pState[19]; \
|
654
|
-
_sa = pState[20]; \
|
655
|
-
_se = pState[21]; \
|
656
|
-
_si = pState[22]; \
|
657
|
-
_so = pState[23]; \
|
658
|
-
_su = pState[24]
|
659
|
-
|
660
|
-
#define copyToState(pState) \
|
661
|
-
pState[ 0] = _ba; \
|
662
|
-
pState[ 1] = _be; \
|
663
|
-
pState[ 2] = _bi; \
|
664
|
-
pState[ 3] = _bo; \
|
665
|
-
pState[ 4] = _bu; \
|
666
|
-
pState[ 5] = _ga; \
|
667
|
-
pState[ 6] = _ge; \
|
668
|
-
pState[ 7] = _gi; \
|
669
|
-
pState[ 8] = _go; \
|
670
|
-
pState[ 9] = _gu; \
|
671
|
-
pState[10] = _ka; \
|
672
|
-
pState[11] = _ke; \
|
673
|
-
pState[12] = _ki; \
|
674
|
-
pState[13] = _ko; \
|
675
|
-
pState[14] = _ku; \
|
676
|
-
pState[15] = _ma; \
|
677
|
-
pState[16] = _me; \
|
678
|
-
pState[17] = _mi; \
|
679
|
-
pState[18] = _mo; \
|
680
|
-
pState[19] = _mu; \
|
681
|
-
pState[20] = _sa; \
|
682
|
-
pState[21] = _se; \
|
683
|
-
pState[22] = _si; \
|
684
|
-
pState[23] = _so; \
|
685
|
-
pState[24] = _su
|
686
|
-
|
687
|
-
void KeccakP1600times2_PermuteAll_24rounds(void *states)
|
688
|
-
{
|
689
|
-
V128 *statesAsLanes = states;
|
690
|
-
KeccakP_DeclareVars;
|
691
|
-
#ifndef KeccakP1600times2_fullUnrolling
|
692
|
-
unsigned int i;
|
693
|
-
#endif
|
694
|
-
|
695
|
-
copyFromState(statesAsLanes);
|
696
|
-
rounds24;
|
697
|
-
copyToState(statesAsLanes);
|
698
|
-
}
|
699
|
-
|
700
|
-
void KeccakP1600times2_PermuteAll_12rounds(void *states)
|
701
|
-
{
|
702
|
-
V128 *statesAsLanes = states;
|
703
|
-
KeccakP_DeclareVars;
|
704
|
-
#if (KeccakP1600times2_unrolling < 12)
|
705
|
-
unsigned int i;
|
706
|
-
#endif
|
707
|
-
|
708
|
-
copyFromState(statesAsLanes);
|
709
|
-
rounds12;
|
710
|
-
copyToState(statesAsLanes);
|
711
|
-
}
|
712
|
-
|
713
|
-
void KeccakP1600times2_PermuteAll_6rounds(void *states)
|
714
|
-
{
|
715
|
-
V128 *statesAsLanes = states;
|
716
|
-
KeccakP_DeclareVars;
|
717
|
-
|
718
|
-
copyFromState2rounds(statesAsLanes);
|
719
|
-
KeccakP_2rounds( 18 );
|
720
|
-
KeccakP_4rounds( 20 );
|
721
|
-
copyToState(statesAsLanes);
|
722
|
-
}
|
723
|
-
|
724
|
-
void KeccakP1600times2_PermuteAll_4rounds(void *states)
|
725
|
-
{
|
726
|
-
V128 *statesAsLanes = states;
|
727
|
-
KeccakP_DeclareVars;
|
728
|
-
|
729
|
-
copyFromState(statesAsLanes);
|
730
|
-
KeccakP_4rounds( 20 );
|
731
|
-
copyToState(statesAsLanes);
|
732
|
-
}
|
733
|
-
|
734
|
-
size_t KeccakF1600times2_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen)
|
735
|
-
{
|
736
|
-
size_t dataMinimumSize = (laneOffsetParallel*1 + laneCount)*8;
|
737
|
-
|
738
|
-
if (laneCount == 21) {
|
739
|
-
#ifndef KeccakP1600times2_fullUnrolling
|
740
|
-
unsigned int i;
|
741
|
-
#endif
|
742
|
-
const unsigned char *dataStart = data;
|
743
|
-
V128 *statesAsLanes = states;
|
744
|
-
const uint64_t *dataAsLanes = (const uint64_t *)data;
|
745
|
-
KeccakP_DeclareVars;
|
746
|
-
V128 index = LOAD4_32(0, 0, 1*laneOffsetParallel, 0*laneOffsetParallel);
|
747
|
-
|
748
|
-
copyFromState(statesAsLanes);
|
749
|
-
while(dataByteLen >= dataMinimumSize) {
|
750
|
-
#define Add_In( argLane, argIndex ) argLane = XOR(argLane, LOAD_GATHER2_64(index, dataAsLanes+argIndex))
|
751
|
-
Add_In( _ba, 0 );
|
752
|
-
Add_In( _be, 1 );
|
753
|
-
Add_In( _bi, 2 );
|
754
|
-
Add_In( _bo, 3 );
|
755
|
-
Add_In( _bu, 4 );
|
756
|
-
Add_In( _ga, 5 );
|
757
|
-
Add_In( _ge, 6 );
|
758
|
-
Add_In( _gi, 7 );
|
759
|
-
Add_In( _go, 8 );
|
760
|
-
Add_In( _gu, 9 );
|
761
|
-
Add_In( _ka, 10 );
|
762
|
-
Add_In( _ke, 11 );
|
763
|
-
Add_In( _ki, 12 );
|
764
|
-
Add_In( _ko, 13 );
|
765
|
-
Add_In( _ku, 14 );
|
766
|
-
Add_In( _ma, 15 );
|
767
|
-
Add_In( _me, 16 );
|
768
|
-
Add_In( _mi, 17 );
|
769
|
-
Add_In( _mo, 18 );
|
770
|
-
Add_In( _mu, 19 );
|
771
|
-
Add_In( _sa, 20 );
|
772
|
-
#undef Add_In
|
773
|
-
rounds24;
|
774
|
-
dataAsLanes += laneOffsetSerial;
|
775
|
-
dataByteLen -= laneOffsetSerial*8;
|
776
|
-
}
|
777
|
-
copyToState(statesAsLanes);
|
778
|
-
return (const unsigned char *)dataAsLanes - dataStart;
|
779
|
-
}
|
780
|
-
else {
|
781
|
-
const unsigned char *dataStart = data;
|
782
|
-
|
783
|
-
while(dataByteLen >= dataMinimumSize) {
|
784
|
-
KeccakP1600times2_AddLanesAll(states, data, laneCount, laneOffsetParallel);
|
785
|
-
KeccakP1600times2_PermuteAll_24rounds(states);
|
786
|
-
data += laneOffsetSerial*8;
|
787
|
-
dataByteLen -= laneOffsetSerial*8;
|
788
|
-
}
|
789
|
-
return data - dataStart;
|
790
|
-
}
|
791
|
-
}
|
792
|
-
|
793
|
-
size_t KeccakP1600times2_12rounds_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen)
|
794
|
-
{
|
795
|
-
size_t dataMinimumSize = (laneOffsetParallel*1 + laneCount)*8;
|
796
|
-
|
797
|
-
if (laneCount == 21) {
|
798
|
-
#if (KeccakP1600times2_unrolling < 12)
|
799
|
-
unsigned int i;
|
800
|
-
#endif
|
801
|
-
const unsigned char *dataStart = data;
|
802
|
-
V128 *statesAsLanes = states;
|
803
|
-
const uint64_t *dataAsLanes = (const uint64_t *)data;
|
804
|
-
KeccakP_DeclareVars;
|
805
|
-
V128 index = LOAD4_32(0, 0, 1*laneOffsetParallel, 0*laneOffsetParallel);
|
806
|
-
|
807
|
-
copyFromState(statesAsLanes);
|
808
|
-
while(dataByteLen >= dataMinimumSize) {
|
809
|
-
#define Add_In( argLane, argIndex ) argLane = XOR(argLane, LOAD_GATHER2_64(index, dataAsLanes+argIndex))
|
810
|
-
Add_In( _ba, 0 );
|
811
|
-
Add_In( _be, 1 );
|
812
|
-
Add_In( _bi, 2 );
|
813
|
-
Add_In( _bo, 3 );
|
814
|
-
Add_In( _bu, 4 );
|
815
|
-
Add_In( _ga, 5 );
|
816
|
-
Add_In( _ge, 6 );
|
817
|
-
Add_In( _gi, 7 );
|
818
|
-
Add_In( _go, 8 );
|
819
|
-
Add_In( _gu, 9 );
|
820
|
-
Add_In( _ka, 10 );
|
821
|
-
Add_In( _ke, 11 );
|
822
|
-
Add_In( _ki, 12 );
|
823
|
-
Add_In( _ko, 13 );
|
824
|
-
Add_In( _ku, 14 );
|
825
|
-
Add_In( _ma, 15 );
|
826
|
-
Add_In( _me, 16 );
|
827
|
-
Add_In( _mi, 17 );
|
828
|
-
Add_In( _mo, 18 );
|
829
|
-
Add_In( _mu, 19 );
|
830
|
-
Add_In( _sa, 20 );
|
831
|
-
#undef Add_In
|
832
|
-
rounds12;
|
833
|
-
dataAsLanes += laneOffsetSerial;
|
834
|
-
dataByteLen -= laneOffsetSerial*8;
|
835
|
-
}
|
836
|
-
copyToState(statesAsLanes);
|
837
|
-
return (const unsigned char *)dataAsLanes - dataStart;
|
838
|
-
}
|
839
|
-
else {
|
840
|
-
const unsigned char *dataStart = data;
|
841
|
-
|
842
|
-
while(dataByteLen >= dataMinimumSize) {
|
843
|
-
KeccakP1600times2_AddLanesAll(states, data, laneCount, laneOffsetParallel);
|
844
|
-
KeccakP1600times2_PermuteAll_12rounds(states);
|
845
|
-
data += laneOffsetSerial*8;
|
846
|
-
dataByteLen -= laneOffsetSerial*8;
|
847
|
-
}
|
848
|
-
return data - dataStart;
|
849
|
-
}
|
850
|
-
}
|