sleeping_kangaroo12 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +34 -67
- data/ext/Rakefile +12 -37
- data/ext/binding/sleeping_kangaroo12.c +1 -16
- data/ext/{xkcp → k12}/Makefile +0 -0
- data/ext/k12/Makefile.build +118 -0
- data/ext/k12/README.markdown +86 -0
- data/ext/k12/lib/ARMv8Asha3/KeccakP-1600-ARMv8Asha3.S +623 -0
- data/ext/k12/lib/ARMv8Asha3/KeccakP-1600-SnP.h +65 -0
- data/ext/k12/lib/ARMv8Asha3/KeccakP-1600-opt64.c +227 -0
- data/ext/{xkcp/lib/low/KeccakP-1600/compact → k12/lib/Inplace32BI}/KeccakP-1600-SnP.h +4 -9
- data/ext/{xkcp/lib/low/KeccakP-1600/plain-32bits-inplace → k12/lib/Inplace32BI}/KeccakP-1600-inplace32BI.c +65 -160
- data/ext/k12/lib/KangarooTwelve.c +332 -0
- data/ext/{xkcp/lib/high/KangarooTwelve → k12/lib}/KangarooTwelve.h +53 -16
- data/ext/{xkcp/lib/low/KeccakP-1600/AVX2 → k12/lib/Optimized64}/KeccakP-1600-AVX2.s +122 -558
- data/ext/k12/lib/Optimized64/KeccakP-1600-AVX512-plainC.c +241 -0
- data/ext/k12/lib/Optimized64/KeccakP-1600-AVX512.s +551 -0
- data/ext/k12/lib/Optimized64/KeccakP-1600-SnP.h +74 -0
- data/ext/{xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-64.macros → k12/lib/Optimized64/KeccakP-1600-opt64.c} +447 -169
- data/ext/k12/lib/Optimized64/KeccakP-1600-runtimeDispatch.c +406 -0
- data/ext/k12/lib/Optimized64/KeccakP-1600-timesN-AVX2.c +419 -0
- data/ext/k12/lib/Optimized64/KeccakP-1600-timesN-AVX512.c +458 -0
- data/ext/k12/lib/Optimized64/KeccakP-1600-timesN-SSSE3.c +438 -0
- data/ext/{xkcp/lib/low/KeccakP-1600/plain-64bits → k12/lib/Plain64}/KeccakP-1600-SnP.h +14 -20
- data/ext/{xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.h → k12/lib/Plain64/KeccakP-1600-plain64.c} +9 -8
- data/ext/{xkcp/lib/common → k12/lib}/align.h +3 -2
- data/ext/{xkcp/lib/common → k12/lib}/brg_endian.h +0 -0
- data/ext/{xkcp → k12}/support/Build/ExpandProducts.xsl +0 -0
- data/ext/{xkcp → k12}/support/Build/ToGlobalMakefile.xsl +0 -0
- data/ext/{xkcp → k12}/support/Build/ToOneTarget.xsl +0 -0
- data/ext/{xkcp → k12}/support/Build/ToTargetConfigFile.xsl +0 -0
- data/ext/{xkcp → k12}/support/Build/ToTargetMakefile.xsl +10 -16
- data/ext/{xkcp → k12}/support/Build/ToVCXProj.xsl +0 -0
- data/lib/sleeping_kangaroo12/version.rb +1 -1
- metadata +33 -276
- data/ext/config/xkcp.build +0 -17
- data/ext/xkcp/LICENSE +0 -1
- data/ext/xkcp/Makefile.build +0 -200
- data/ext/xkcp/README.markdown +0 -296
- data/ext/xkcp/lib/HighLevel.build +0 -143
- data/ext/xkcp/lib/LowLevel.build +0 -757
- data/ext/xkcp/lib/high/KangarooTwelve/KangarooTwelve.c +0 -301
- data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.c +0 -81
- data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.h +0 -125
- data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.c +0 -48
- data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.h +0 -79
- data/ext/xkcp/lib/high/Keccak/KeccakDuplex.c +0 -81
- data/ext/xkcp/lib/high/Keccak/KeccakDuplex.h +0 -73
- data/ext/xkcp/lib/high/Keccak/KeccakDuplex.inc +0 -195
- data/ext/xkcp/lib/high/Keccak/KeccakSponge.c +0 -111
- data/ext/xkcp/lib/high/Keccak/KeccakSponge.h +0 -76
- data/ext/xkcp/lib/high/Keccak/KeccakSponge.inc +0 -314
- data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.c +0 -61
- data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.h +0 -67
- data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.inc +0 -128
- data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.c +0 -93
- data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.h +0 -599
- data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.inc +0 -573
- data/ext/xkcp/lib/high/Ketje/Ketjev2.c +0 -87
- data/ext/xkcp/lib/high/Ketje/Ketjev2.h +0 -88
- data/ext/xkcp/lib/high/Ketje/Ketjev2.inc +0 -274
- data/ext/xkcp/lib/high/Keyak/Keyakv2.c +0 -132
- data/ext/xkcp/lib/high/Keyak/Keyakv2.h +0 -217
- data/ext/xkcp/lib/high/Keyak/Keyakv2.inc +0 -81
- data/ext/xkcp/lib/high/Keyak/Motorist.inc +0 -953
- data/ext/xkcp/lib/high/Kravatte/Kravatte.c +0 -533
- data/ext/xkcp/lib/high/Kravatte/Kravatte.h +0 -115
- data/ext/xkcp/lib/high/Kravatte/KravatteModes.c +0 -557
- data/ext/xkcp/lib/high/Kravatte/KravatteModes.h +0 -247
- data/ext/xkcp/lib/high/Xoodyak/Cyclist.h +0 -66
- data/ext/xkcp/lib/high/Xoodyak/Cyclist.inc +0 -336
- data/ext/xkcp/lib/high/Xoodyak/Xoodyak-parameters.h +0 -26
- data/ext/xkcp/lib/high/Xoodyak/Xoodyak.c +0 -55
- data/ext/xkcp/lib/high/Xoodyak/Xoodyak.h +0 -35
- data/ext/xkcp/lib/high/Xoofff/Xoofff.c +0 -634
- data/ext/xkcp/lib/high/Xoofff/Xoofff.h +0 -147
- data/ext/xkcp/lib/high/Xoofff/XoofffModes.c +0 -483
- data/ext/xkcp/lib/high/Xoofff/XoofffModes.h +0 -241
- data/ext/xkcp/lib/high/common/Phases.h +0 -25
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-SnP.h +0 -41
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-armcc.s +0 -1666
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-gcc.s +0 -1655
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-armcc.s +0 -1268
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-gcc.s +0 -1264
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-armcc.s +0 -1178
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-gcc.s +0 -1175
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-armcc.s +0 -1338
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-gcc.s +0 -1336
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-armcc.s +0 -1343
- data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-gcc.s +0 -1339
- data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-SnP.h +0 -42
- data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-armcc.s +0 -823
- data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-gcc.s +0 -831
- data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-SnP.h +0 -31
- data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-armv8a-neon.s +0 -540
- data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-SnP.h +0 -42
- data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-compact.s +0 -733
- data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-fast.s +0 -1121
- data/ext/xkcp/lib/low/KeccakP-1600/AVX2/KeccakP-1600-SnP.h +0 -52
- data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-AVX512.c +0 -623
- data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-SnP.h +0 -47
- data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u12/KeccakP-1600-AVX512-config.h +0 -6
- data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u6/KeccakP-1600-AVX512-config.h +0 -6
- data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/ua/KeccakP-1600-AVX512-config.h +0 -6
- data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-AVX512.s +0 -1031
- data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-SnP.h +0 -53
- data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-SnP.h +0 -44
- data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-XOP.c +0 -476
- data/ext/xkcp/lib/low/KeccakP-1600/XOP/u6/KeccakP-1600-XOP-config.h +0 -6
- data/ext/xkcp/lib/low/KeccakP-1600/XOP/ua/KeccakP-1600-XOP-config.h +0 -6
- data/ext/xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-unrolling.macros +0 -305
- data/ext/xkcp/lib/low/KeccakP-1600/compact/KeccakP-1600-compact64.c +0 -420
- data/ext/xkcp/lib/low/KeccakP-1600/plain-32bits-inplace/KeccakP-1600-SnP.h +0 -43
- data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/KeccakP-1600-opt64.c +0 -565
- data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcu6/KeccakP-1600-opt64-config.h +0 -7
- data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua/KeccakP-1600-opt64-config.h +0 -7
- data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua-shld/KeccakP-1600-opt64-config.h +0 -8
- data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/u6/KeccakP-1600-opt64-config.h +0 -6
- data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/ua/KeccakP-1600-opt64-config.h +0 -6
- data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-SnP.h +0 -44
- data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference.h +0 -23
- data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference32BI.c +0 -625
- data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-SnP.h +0 -44
- data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.c +0 -440
- data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-SnP.h +0 -42
- data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas.s +0 -1196
- data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas_Apple.s +0 -1124
- data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-shld-gas.s +0 -1196
- data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-armcc.s +0 -1392
- data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-gcc.s +0 -1394
- data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-times2-SnP.h +0 -42
- data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u12/SIMD512-2-config.h +0 -7
- data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u4/SIMD512-2-config.h +0 -7
- data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512ufull/SIMD512-2-config.h +0 -7
- data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SIMD512.c +0 -850
- data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SnP.h +0 -51
- data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SIMD128.c +0 -957
- data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SnP.h +0 -49
- data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-u2/SIMD128-config.h +0 -8
- data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-ua/SIMD128-config.h +0 -8
- data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-u2/SIMD128-config.h +0 -9
- data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-ua/SIMD128-config.h +0 -9
- data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-SnP.h +0 -45
- data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-on1.c +0 -37
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SIMD256.c +0 -1321
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SnP.h +0 -55
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u12/SIMD256-config.h +0 -7
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u6/SIMD256-config.h +0 -7
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/ua/SIMD256-config.h +0 -7
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u12/SIMD512-4-config.h +0 -7
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u4/SIMD512-4-config.h +0 -7
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512ufull/SIMD512-4-config.h +0 -7
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SIMD512.c +0 -881
- data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SnP.h +0 -51
- data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-SnP.h +0 -45
- data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-on1.c +0 -37
- data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-SnP.h +0 -45
- data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-on2.c +0 -38
- data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SIMD512.c +0 -1615
- data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SnP.h +0 -57
- data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u12/SIMD512-config.h +0 -7
- data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u4/SIMD512-config.h +0 -7
- data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/ua/SIMD512-config.h +0 -7
- data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-SnP.h +0 -45
- data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-on1.c +0 -37
- data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-SnP.h +0 -45
- data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-on2.c +0 -38
- data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-SnP.h +0 -45
- data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-on4.c +0 -38
- data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-SnP.h +0 -41
- data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-armcc.s +0 -442
- data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-gcc.s +0 -446
- data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-armcc.s +0 -419
- data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-gcc.s +0 -427
- data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-SnP.h +0 -41
- data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-avr8-fast.s +0 -647
- data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-SnP.h +0 -39
- data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-compact.c +0 -190
- data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-SnP.h +0 -43
- data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.c +0 -412
- data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.h +0 -23
- data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-SnP.h +0 -41
- data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-armcc.s +0 -454
- data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-gcc.s +0 -458
- data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-armcc.s +0 -455
- data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-gcc.s +0 -458
- data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-SnP.h +0 -41
- data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-avr8-fast.s +0 -728
- data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-SnP.h +0 -43
- data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.c +0 -414
- data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.h +0 -23
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-SnP.h +0 -42
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-armcc.s +0 -527
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-gcc.s +0 -533
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-armcc.s +0 -528
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-gcc.s +0 -534
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-armcc.s +0 -521
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-gcc.s +0 -527
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-armcc.s +0 -517
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-gcc.s +0 -523
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-armcc.s +0 -550
- data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-gcc.s +0 -556
- data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-SnP.h +0 -32
- data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-armv8a-neon.s +0 -432
- data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-SnP.h +0 -42
- data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-avr8-fast.s +0 -929
- data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-SnP.h +0 -40
- data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-compact.c +0 -244
- data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-SnP.h +0 -46
- data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32-bis.macros +0 -184
- data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.c +0 -454
- data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.macros +0 -459
- data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling-bis.macros +0 -83
- data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling.macros +0 -88
- data/ext/xkcp/lib/low/KeccakP-800/plain/lcu2/KeccakP-800-opt32-config.h +0 -7
- data/ext/xkcp/lib/low/KeccakP-800/plain/lcua/KeccakP-800-opt32-config.h +0 -7
- data/ext/xkcp/lib/low/KeccakP-800/plain/u2/KeccakP-800-opt32-config.h +0 -7
- data/ext/xkcp/lib/low/KeccakP-800/plain/ua/KeccakP-800-opt32-config.h +0 -7
- data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-SnP.h +0 -44
- data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.c +0 -437
- data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.h +0 -23
- data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/Ket.h +0 -57
- data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-armcc.s +0 -475
- data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-gcc.s +0 -480
- data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-armcc.s +0 -590
- data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-gcc.s +0 -590
- data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.c +0 -126
- data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.h +0 -68
- data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.inc +0 -174
- data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.c +0 -80
- data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.h +0 -68
- data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.inc +0 -142
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-SnP.h +0 -55
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-armcc.s +0 -1086
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-gcc.s +0 -1092
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-armcc.s +0 -721
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-gcc.s +0 -726
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-armcc.s +0 -723
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-gcc.s +0 -729
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-armcc.s +0 -1164
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-gcc.s +0 -1165
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-armcc.s +0 -562
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-gcc.s +0 -563
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-armcc.s +0 -563
- data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-gcc.s +0 -565
- data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-SnP.h +0 -55
- data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-armcc.s +0 -476
- data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-gcc.s +0 -485
- data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-armcc.s +0 -362
- data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-gcc.s +0 -367
- data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-SnP.h +0 -43
- data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-avr8-u1.s +0 -1341
- data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SIMD512.c +0 -581
- data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SnP.h +0 -58
- data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodyak-full-block-SIMD512.c +0 -332
- data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SIMD128.c +0 -329
- data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SnP.h +0 -53
- data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodyak-full-block-SIMD128.c +0 -355
- data/ext/xkcp/lib/low/Xoodoo/Xoodoo.h +0 -79
- data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-SnP.h +0 -56
- data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-optimized.c +0 -399
- data/ext/xkcp/lib/low/Xoodoo/plain/Xoodyak-full-blocks.c +0 -127
- data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-SnP.h +0 -43
- data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-reference.c +0 -253
- data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SIMD512.c +0 -1044
- data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SnP.h +0 -49
- data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-SnP.h +0 -45
- data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-on1.c +0 -37
- data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-ARMv7A.s +0 -1587
- data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-SnP.h +0 -48
- data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SIMD512.c +0 -1202
- data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SnP.h +0 -48
- data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SIMD128.c +0 -484
- data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SnP.h +0 -44
- data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-SnP.h +0 -45
- data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-on1.c +0 -37
- data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SIMD256.c +0 -939
- data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SnP.h +0 -49
- data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SIMD512.c +0 -1216
- data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SnP.h +0 -48
- data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-SnP.h +0 -45
- data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-on1.c +0 -37
- data/ext/xkcp/lib/low/common/PlSnP-Fallback.inc +0 -290
- data/ext/xkcp/lib/low/common/SnP-Relaned.h +0 -141
- data/ext/xkcp/support/Kernel-PMU/Kernel-pmu.md +0 -133
- data/ext/xkcp/support/Kernel-PMU/Makefile +0 -8
- data/ext/xkcp/support/Kernel-PMU/enable_arm_pmu.c +0 -129
- data/ext/xkcp/support/Kernel-PMU/load-module +0 -1
- data/ext/xkcp/util/KeccakSum/KeccakSum.c +0 -394
- data/ext/xkcp/util/KeccakSum/base64.c +0 -86
- data/ext/xkcp/util/KeccakSum/base64.h +0 -12
@@ -1,6 +1,3 @@
|
|
1
|
-
# The eXtended Keccak Code Package (XKCP)
|
2
|
-
# https://github.com/XKCP/XKCP
|
3
|
-
#
|
4
1
|
# Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org>
|
5
2
|
# Copyright (c) 2017 Ronny Van Keer
|
6
3
|
# All rights reserved.
|
@@ -14,22 +11,22 @@
|
|
14
11
|
# (https://github.com/dot-asm/cryptogams/blob/master/x86_64/keccak1600-avx2.pl).
|
15
12
|
# The rest of the code was written by Ronny Van Keer.
|
16
13
|
# Adaptations for macOS by Stéphane Léon.
|
17
|
-
# Adaptations for mingw-w64 (changes macOS too) by Jorrit Jongma.
|
18
14
|
|
19
15
|
.text
|
20
16
|
|
21
17
|
# -----------------------------------------------------------------------------
|
22
18
|
#
|
23
|
-
# void
|
19
|
+
# void KeccakP1600_AVX2_Initialize(void *state);
|
24
20
|
#
|
25
|
-
.
|
26
|
-
.globl
|
27
|
-
|
28
|
-
.
|
21
|
+
.ifdef macOS
|
22
|
+
.globl _KeccakP1600_AVX2_Initialize
|
23
|
+
_KeccakP1600_AVX2_Initialize:
|
24
|
+
.else
|
25
|
+
.globl KeccakP1600_AVX2_Initialize
|
26
|
+
.type KeccakP1600_AVX2_Initialize,@function
|
27
|
+
KeccakP1600_AVX2_Initialize:
|
29
28
|
.endif
|
30
|
-
|
31
|
-
_KeccakP1600_Initialize:
|
32
|
-
.balign 32
|
29
|
+
.balign 32
|
33
30
|
vpxor %ymm0,%ymm0,%ymm0
|
34
31
|
vmovdqu %ymm0,0*32(%rdi)
|
35
32
|
vmovdqu %ymm0,1*32(%rdi)
|
@@ -39,22 +36,24 @@ _KeccakP1600_Initialize:
|
|
39
36
|
vmovdqu %ymm0,5*32(%rdi)
|
40
37
|
movq $0,6*32(%rdi)
|
41
38
|
ret
|
42
|
-
.
|
43
|
-
.
|
39
|
+
.ifdef macOS
|
40
|
+
.else
|
41
|
+
.size KeccakP1600_AVX2_Initialize,.-KeccakP1600_AVX2_Initialize
|
44
42
|
.endif
|
45
43
|
|
46
44
|
# -----------------------------------------------------------------------------
|
47
45
|
#
|
48
|
-
# void
|
46
|
+
# void KeccakP1600_AVX2_AddByte(void *state, unsigned char data, unsigned int offset);
|
49
47
|
# %rdi %rsi %rdx
|
50
48
|
#
|
51
|
-
.
|
52
|
-
.globl
|
53
|
-
|
54
|
-
.
|
49
|
+
.ifdef macOS
|
50
|
+
.globl _KeccakP1600_AVX2_AddByte
|
51
|
+
_KeccakP1600_AVX2_AddByte:
|
52
|
+
.else
|
53
|
+
.globl KeccakP1600_AVX2_AddByte
|
54
|
+
.type KeccakP1600_AVX2_AddByte,@function
|
55
|
+
KeccakP1600_AVX2_AddByte:
|
55
56
|
.endif
|
56
|
-
KeccakP1600_AddByte:
|
57
|
-
_KeccakP1600_AddByte:
|
58
57
|
.balign 32
|
59
58
|
mov %rdx, %rax
|
60
59
|
and $7, %rax
|
@@ -65,31 +64,33 @@ _KeccakP1600_AddByte:
|
|
65
64
|
add %rax, %rdi
|
66
65
|
xorb %sil, (%rdi)
|
67
66
|
ret
|
68
|
-
.
|
69
|
-
.
|
67
|
+
.ifdef macOS
|
68
|
+
.else
|
69
|
+
.size KeccakP1600_AVX2_AddByte,.-KeccakP1600_AVX2_AddByte
|
70
70
|
.endif
|
71
71
|
|
72
72
|
# -----------------------------------------------------------------------------
|
73
73
|
#
|
74
|
-
# void
|
74
|
+
# void KeccakP1600_AVX2_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
|
75
75
|
# %rdi %rsi %rdx %rcx
|
76
76
|
#
|
77
|
-
.
|
78
|
-
.globl
|
79
|
-
|
80
|
-
.
|
77
|
+
.ifdef macOS
|
78
|
+
.globl _KeccakP1600_AVX2_AddBytes
|
79
|
+
_KeccakP1600_AVX2_AddBytes:
|
80
|
+
.else
|
81
|
+
.globl KeccakP1600_AVX2_AddBytes
|
82
|
+
.type KeccakP1600_AVX2_AddBytes,@function
|
83
|
+
KeccakP1600_AVX2_AddBytes:
|
81
84
|
.endif
|
82
|
-
KeccakP1600_AddBytes:
|
83
|
-
_KeccakP1600_AddBytes:
|
84
85
|
.balign 32
|
85
86
|
cmp $0, %rcx
|
86
|
-
jz
|
87
|
+
jz KeccakP1600_AVX2_AddBytes_Exit
|
87
88
|
mov %rdx, %rax # rax offset in lane
|
88
89
|
and $0xFFFFFFF8, %edx # rdx pointer into state index mapper
|
89
90
|
lea mapState(%rip), %r9
|
90
91
|
add %r9, %rdx
|
91
92
|
and $7, %rax
|
92
|
-
jz
|
93
|
+
jz KeccakP1600_AVX2_AddBytes_LaneAlignedCheck
|
93
94
|
mov $8, %r9 # r9 is (max) length of incomplete lane
|
94
95
|
sub %rax, %r9
|
95
96
|
cmp %rcx, %r9
|
@@ -98,170 +99,66 @@ _KeccakP1600_AddBytes:
|
|
98
99
|
add (%rdx), %rax # rax = pointer to state lane
|
99
100
|
add $8, %rdx
|
100
101
|
add %rdi, %rax
|
101
|
-
|
102
|
+
KeccakP1600_AVX2_AddBytes_NotAlignedLoop:
|
102
103
|
mov (%rsi), %r8b
|
103
104
|
inc %rsi
|
104
105
|
xorb %r8b, (%rax)
|
105
106
|
inc %rax
|
106
107
|
dec %r9
|
107
|
-
jnz
|
108
|
-
jmp
|
109
|
-
|
108
|
+
jnz KeccakP1600_AVX2_AddBytes_NotAlignedLoop
|
109
|
+
jmp KeccakP1600_AVX2_AddBytes_LaneAlignedCheck
|
110
|
+
KeccakP1600_AVX2_AddBytes_LaneAlignedLoop:
|
110
111
|
mov (%rsi), %r8
|
111
112
|
add $8, %rsi
|
112
113
|
mov (%rdx), %rax
|
113
114
|
add $8, %rdx
|
114
115
|
add %rdi, %rax
|
115
116
|
xor %r8, (%rax)
|
116
|
-
|
117
|
+
KeccakP1600_AVX2_AddBytes_LaneAlignedCheck:
|
117
118
|
sub $8, %rcx
|
118
|
-
jnc
|
119
|
-
|
119
|
+
jnc KeccakP1600_AVX2_AddBytes_LaneAlignedLoop
|
120
|
+
KeccakP1600_AVX2_AddBytes_LastIncompleteLane:
|
120
121
|
add $8, %rcx
|
121
|
-
jz
|
122
|
+
jz KeccakP1600_AVX2_AddBytes_Exit
|
122
123
|
mov (%rdx), %rax
|
123
124
|
add %rdi, %rax
|
124
|
-
|
125
|
+
KeccakP1600_AVX2_AddBytes_LastIncompleteLaneLoop:
|
125
126
|
mov (%rsi), %r8b
|
126
127
|
inc %rsi
|
127
128
|
xor %r8b, (%rax)
|
128
129
|
inc %rax
|
129
130
|
dec %rcx
|
130
|
-
jnz
|
131
|
-
|
131
|
+
jnz KeccakP1600_AVX2_AddBytes_LastIncompleteLaneLoop
|
132
|
+
KeccakP1600_AVX2_AddBytes_Exit:
|
132
133
|
ret
|
133
|
-
.
|
134
|
-
.
|
135
|
-
.
|
136
|
-
|
137
|
-
# -----------------------------------------------------------------------------
|
138
|
-
#
|
139
|
-
# void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
|
140
|
-
# %rdi %rsi %rdx %rcx
|
141
|
-
#
|
142
|
-
.globl KeccakP1600_OverwriteBytes
|
143
|
-
.globl _KeccakP1600_OverwriteBytes
|
144
|
-
.ifndef old_gas_syntax
|
145
|
-
.type KeccakP1600_OverwriteBytes,@function
|
146
|
-
.endif
|
147
|
-
KeccakP1600_OverwriteBytes:
|
148
|
-
_KeccakP1600_OverwriteBytes:
|
149
|
-
.balign 32
|
150
|
-
cmp $0, %rcx
|
151
|
-
jz KeccakP1600_OverwriteBytes_Exit
|
152
|
-
mov %rdx, %rax # rax offset in lane
|
153
|
-
and $0xFFFFFFF8, %edx # rdx pointer into state index mapper
|
154
|
-
lea mapState(%rip), %r9
|
155
|
-
add %r9, %rdx
|
156
|
-
and $7, %rax
|
157
|
-
jz KeccakP1600_OverwriteBytes_LaneAlignedCheck
|
158
|
-
mov $8, %r9 # r9 is (max) length of incomplete lane
|
159
|
-
sub %rax, %r9
|
160
|
-
cmp %rcx, %r9
|
161
|
-
cmovae %rcx, %r9
|
162
|
-
sub %r9, %rcx # length -= length of incomplete lane
|
163
|
-
add (%rdx), %rax # rax = pointer to state lane
|
164
|
-
add $8, %rdx
|
165
|
-
add %rdi, %rax
|
166
|
-
KeccakP1600_OverwriteBytes_NotAlignedLoop:
|
167
|
-
mov (%rsi), %r8b
|
168
|
-
inc %rsi
|
169
|
-
mov %r8b, (%rax)
|
170
|
-
inc %rax
|
171
|
-
dec %r9
|
172
|
-
jnz KeccakP1600_OverwriteBytes_NotAlignedLoop
|
173
|
-
jmp KeccakP1600_OverwriteBytes_LaneAlignedCheck
|
174
|
-
KeccakP1600_OverwriteBytes_LaneAlignedLoop:
|
175
|
-
mov (%rsi), %r8
|
176
|
-
add $8, %rsi
|
177
|
-
mov (%rdx), %rax
|
178
|
-
add $8, %rdx
|
179
|
-
add %rdi, %rax
|
180
|
-
mov %r8, (%rax)
|
181
|
-
KeccakP1600_OverwriteBytes_LaneAlignedCheck:
|
182
|
-
sub $8, %rcx
|
183
|
-
jnc KeccakP1600_OverwriteBytes_LaneAlignedLoop
|
184
|
-
KeccakP1600_OverwriteBytes_LastIncompleteLane:
|
185
|
-
add $8, %rcx
|
186
|
-
jz KeccakP1600_OverwriteBytes_Exit
|
187
|
-
mov (%rdx), %rax
|
188
|
-
add %rdi, %rax
|
189
|
-
KeccakP1600_OverwriteBytes_LastIncompleteLaneLoop:
|
190
|
-
mov (%rsi), %r8b
|
191
|
-
inc %rsi
|
192
|
-
mov %r8b, (%rax)
|
193
|
-
inc %rax
|
194
|
-
dec %rcx
|
195
|
-
jnz KeccakP1600_OverwriteBytes_LastIncompleteLaneLoop
|
196
|
-
KeccakP1600_OverwriteBytes_Exit:
|
197
|
-
ret
|
198
|
-
.ifndef old_gas_syntax
|
199
|
-
.size KeccakP1600_OverwriteBytes,.-KeccakP1600_OverwriteBytes
|
200
|
-
.endif
|
201
|
-
|
202
|
-
# -----------------------------------------------------------------------------
|
203
|
-
#
|
204
|
-
# void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount);
|
205
|
-
# %rdi %rsi
|
206
|
-
#
|
207
|
-
.globl KeccakP1600_OverwriteWithZeroes
|
208
|
-
.globl _KeccakP1600_OverwriteWithZeroes
|
209
|
-
.ifndef old_gas_syntax
|
210
|
-
.type KeccakP1600_OverwriteWithZeroes,@function
|
211
|
-
.endif
|
212
|
-
KeccakP1600_OverwriteWithZeroes:
|
213
|
-
_KeccakP1600_OverwriteWithZeroes:
|
214
|
-
.balign 32
|
215
|
-
cmp $0, %rsi
|
216
|
-
jz KeccakP1600_OverwriteWithZeroes_Exit
|
217
|
-
lea mapState(%rip), %rdx # rdx pointer into state index mapper
|
218
|
-
jmp KeccakP1600_OverwriteWithZeroes_LaneAlignedCheck
|
219
|
-
KeccakP1600_OverwriteWithZeroes_LaneAlignedLoop:
|
220
|
-
mov (%rdx), %rax
|
221
|
-
add $8, %rdx
|
222
|
-
add %rdi, %rax
|
223
|
-
movq $0, (%rax)
|
224
|
-
KeccakP1600_OverwriteWithZeroes_LaneAlignedCheck:
|
225
|
-
sub $8, %rsi
|
226
|
-
jnc KeccakP1600_OverwriteWithZeroes_LaneAlignedLoop
|
227
|
-
KeccakP1600_OverwriteWithZeroes_LastIncompleteLane:
|
228
|
-
add $8, %rsi
|
229
|
-
jz KeccakP1600_OverwriteWithZeroes_Exit
|
230
|
-
mov (%rdx), %rax
|
231
|
-
add %rdi, %rax
|
232
|
-
KeccakP1600_OverwriteWithZeroes_LastIncompleteLaneLoop:
|
233
|
-
movb $0, (%rax)
|
234
|
-
inc %rax
|
235
|
-
dec %rsi
|
236
|
-
jnz KeccakP1600_OverwriteWithZeroes_LastIncompleteLaneLoop
|
237
|
-
KeccakP1600_OverwriteWithZeroes_Exit:
|
238
|
-
ret
|
239
|
-
.ifndef old_gas_syntax
|
240
|
-
.size KeccakP1600_OverwriteWithZeroes,.-KeccakP1600_OverwriteWithZeroes
|
134
|
+
.ifdef macOS
|
135
|
+
.else
|
136
|
+
.size KeccakP1600_AVX2_AddBytes,.-KeccakP1600_AVX2_AddBytes
|
241
137
|
.endif
|
242
138
|
|
243
139
|
# -----------------------------------------------------------------------------
|
244
140
|
#
|
245
|
-
# void
|
141
|
+
# void KeccakP1600_AVX2_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length);
|
246
142
|
# %rdi %rsi %rdx %rcx
|
247
143
|
#
|
248
|
-
.
|
249
|
-
.globl
|
250
|
-
|
251
|
-
.
|
144
|
+
.ifdef macOS
|
145
|
+
.globl _KeccakP1600_AVX2_ExtractBytes
|
146
|
+
_KeccakP1600_AVX2_ExtractBytes:
|
147
|
+
.else
|
148
|
+
.globl KeccakP1600_AVX2_ExtractBytes
|
149
|
+
.type KeccakP1600_AVX2_ExtractBytes,@function
|
150
|
+
KeccakP1600_AVX2_ExtractBytes:
|
252
151
|
.endif
|
253
|
-
|
254
|
-
_KeccakP1600_ExtractBytes:
|
255
|
-
.balign 32
|
152
|
+
.balign 32
|
256
153
|
push %rbx
|
257
154
|
cmp $0, %rcx
|
258
|
-
jz
|
155
|
+
jz KeccakP1600_AVX2_ExtractBytes_Exit
|
259
156
|
mov %rdx, %rax # rax offset in lane
|
260
157
|
and $0xFFFFFFF8, %edx # rdx pointer into state index mapper
|
261
158
|
lea mapState(%rip), %r9
|
262
159
|
add %r9, %rdx
|
263
160
|
and $7, %rax
|
264
|
-
jz
|
161
|
+
jz KeccakP1600_AVX2_ExtractBytes_LaneAlignedCheck
|
265
162
|
mov $8, %rbx # rbx is (max) length of incomplete lane
|
266
163
|
sub %rax, %rbx
|
267
164
|
cmp %rcx, %rbx
|
@@ -271,127 +168,53 @@ _KeccakP1600_ExtractBytes:
|
|
271
168
|
add $8, %rdx
|
272
169
|
add %rdi, %r9
|
273
170
|
add %rax, %r9
|
274
|
-
|
171
|
+
KeccakP1600_AVX2_ExtractBytes_NotAlignedLoop:
|
275
172
|
mov (%r9), %r8b
|
276
173
|
inc %r9
|
277
174
|
mov %r8b, (%rsi)
|
278
175
|
inc %rsi
|
279
176
|
dec %rbx
|
280
|
-
jnz
|
281
|
-
jmp
|
282
|
-
|
177
|
+
jnz KeccakP1600_AVX2_ExtractBytes_NotAlignedLoop
|
178
|
+
jmp KeccakP1600_AVX2_ExtractBytes_LaneAlignedCheck
|
179
|
+
KeccakP1600_AVX2_ExtractBytes_LaneAlignedLoop:
|
283
180
|
mov (%rdx), %rax
|
284
181
|
add $8, %rdx
|
285
182
|
add %rdi, %rax
|
286
183
|
mov (%rax), %r8
|
287
184
|
mov %r8, (%rsi)
|
288
185
|
add $8, %rsi
|
289
|
-
|
186
|
+
KeccakP1600_AVX2_ExtractBytes_LaneAlignedCheck:
|
290
187
|
sub $8, %rcx
|
291
|
-
jnc
|
292
|
-
|
188
|
+
jnc KeccakP1600_AVX2_ExtractBytes_LaneAlignedLoop
|
189
|
+
KeccakP1600_AVX2_ExtractBytes_LastIncompleteLane:
|
293
190
|
add $8, %rcx
|
294
|
-
jz
|
191
|
+
jz KeccakP1600_AVX2_ExtractBytes_Exit
|
295
192
|
mov (%rdx), %rax
|
296
193
|
add %rdi, %rax
|
297
194
|
mov (%rax), %r8
|
298
|
-
|
195
|
+
KeccakP1600_AVX2_ExtractBytes_LastIncompleteLaneLoop:
|
299
196
|
mov %r8b, (%rsi)
|
300
197
|
shr $8, %r8
|
301
198
|
inc %rsi
|
302
199
|
dec %rcx
|
303
|
-
jnz
|
304
|
-
|
200
|
+
jnz KeccakP1600_AVX2_ExtractBytes_LastIncompleteLaneLoop
|
201
|
+
KeccakP1600_AVX2_ExtractBytes_Exit:
|
305
202
|
pop %rbx
|
306
203
|
ret
|
307
|
-
.
|
308
|
-
.
|
309
|
-
.
|
310
|
-
|
311
|
-
# -----------------------------------------------------------------------------
|
312
|
-
#
|
313
|
-
# void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length);
|
314
|
-
# %rdi %rsi %rdx %rcx %r8
|
315
|
-
#
|
316
|
-
.globl KeccakP1600_ExtractAndAddBytes
|
317
|
-
.globl _KeccakP1600_ExtractAndAddBytes
|
318
|
-
.ifndef old_gas_syntax
|
319
|
-
.type KeccakP1600_ExtractAndAddBytes,@function
|
320
|
-
.endif
|
321
|
-
KeccakP1600_ExtractAndAddBytes:
|
322
|
-
_KeccakP1600_ExtractAndAddBytes:
|
323
|
-
.balign 32
|
324
|
-
push %rbx
|
325
|
-
push %r10
|
326
|
-
cmp $0, %r8
|
327
|
-
jz KeccakP1600_ExtractAndAddBytes_Exit
|
328
|
-
mov %rcx, %rax # rax offset in lane
|
329
|
-
and $0xFFFFFFF8, %ecx # rcx pointer into state index mapper
|
330
|
-
lea mapState(%rip), %r9
|
331
|
-
add %r9, %rcx
|
332
|
-
and $7, %rax
|
333
|
-
jz KeccakP1600_ExtractAndAddBytes_LaneAlignedCheck
|
334
|
-
mov $8, %rbx # rbx is (max) length of incomplete lane
|
335
|
-
sub %rax, %rbx
|
336
|
-
cmp %r8, %rbx
|
337
|
-
cmovae %r8, %rbx
|
338
|
-
sub %rbx, %r8 # length -= length of incomplete lane
|
339
|
-
mov (%rcx), %r9
|
340
|
-
add $8, %rcx
|
341
|
-
add %rdi, %r9
|
342
|
-
add %rax, %r9
|
343
|
-
KeccakP1600_ExtractAndAddBytes_NotAlignedLoop:
|
344
|
-
mov (%r9), %r10b
|
345
|
-
inc %r9
|
346
|
-
xor (%rsi), %r10b
|
347
|
-
inc %rsi
|
348
|
-
mov %r10b, (%rdx)
|
349
|
-
inc %rdx
|
350
|
-
dec %rbx
|
351
|
-
jnz KeccakP1600_ExtractAndAddBytes_NotAlignedLoop
|
352
|
-
jmp KeccakP1600_ExtractAndAddBytes_LaneAlignedCheck
|
353
|
-
KeccakP1600_ExtractAndAddBytes_LaneAlignedLoop:
|
354
|
-
mov (%rcx), %rax
|
355
|
-
add $8, %rcx
|
356
|
-
add %rdi, %rax
|
357
|
-
mov (%rax), %r10
|
358
|
-
xor (%rsi), %r10
|
359
|
-
add $8, %rsi
|
360
|
-
mov %r10, (%rdx)
|
361
|
-
add $8, %rdx
|
362
|
-
KeccakP1600_ExtractAndAddBytes_LaneAlignedCheck:
|
363
|
-
sub $8, %r8
|
364
|
-
jnc KeccakP1600_ExtractAndAddBytes_LaneAlignedLoop
|
365
|
-
KeccakP1600_ExtractAndAddBytes_LastIncompleteLane:
|
366
|
-
add $8, %r8
|
367
|
-
jz KeccakP1600_ExtractAndAddBytes_Exit
|
368
|
-
mov (%rcx), %rax
|
369
|
-
add %rdi, %rax
|
370
|
-
mov (%rax), %r10
|
371
|
-
KeccakP1600_ExtractAndAddBytes_LastIncompleteLaneLoop:
|
372
|
-
xor (%rsi), %r10b
|
373
|
-
inc %rsi
|
374
|
-
mov %r10b, (%rdx)
|
375
|
-
inc %rdx
|
376
|
-
shr $8, %r10
|
377
|
-
dec %r8
|
378
|
-
jnz KeccakP1600_ExtractAndAddBytes_LastIncompleteLaneLoop
|
379
|
-
KeccakP1600_ExtractAndAddBytes_Exit:
|
380
|
-
pop %r10
|
381
|
-
pop %rbx
|
382
|
-
ret
|
383
|
-
.ifndef old_gas_syntax
|
384
|
-
.size KeccakP1600_ExtractAndAddBytes,.-KeccakP1600_ExtractAndAddBytes
|
204
|
+
.ifdef macOS
|
205
|
+
.else
|
206
|
+
.size KeccakP1600_AVX2_ExtractBytes,.-KeccakP1600_AVX2_ExtractBytes
|
385
207
|
.endif
|
386
208
|
|
387
209
|
# -----------------------------------------------------------------------------
|
388
210
|
#
|
389
211
|
# internal
|
390
212
|
#
|
391
|
-
.
|
213
|
+
.ifdef macOS
|
214
|
+
.else
|
392
215
|
.type __KeccakF1600,@function
|
393
216
|
.endif
|
394
|
-
.balign
|
217
|
+
.balign 32
|
395
218
|
__KeccakF1600:
|
396
219
|
.Loop_avx2:
|
397
220
|
######################################### Theta
|
@@ -530,63 +353,22 @@ __KeccakF1600:
|
|
530
353
|
dec %eax
|
531
354
|
jnz .Loop_avx2
|
532
355
|
ret
|
533
|
-
.
|
356
|
+
.ifdef macOS
|
357
|
+
.else
|
534
358
|
.size __KeccakF1600,.-__KeccakF1600
|
535
359
|
.endif
|
536
360
|
|
537
|
-
# -----------------------------------------------------------------------------
|
538
|
-
#
|
539
|
-
# void KeccakP1600_Permute_24rounds(void *state);
|
540
|
-
# %rdi
|
541
|
-
#
|
542
|
-
.globl KeccakP1600_Permute_24rounds
|
543
|
-
.globl _KeccakP1600_Permute_24rounds
|
544
|
-
.ifndef old_gas_syntax
|
545
|
-
.type KeccakP1600_Permute_24rounds,@function
|
546
|
-
.endif
|
547
|
-
KeccakP1600_Permute_24rounds:
|
548
|
-
_KeccakP1600_Permute_24rounds:
|
549
|
-
.balign 32
|
550
|
-
lea rhotates_left+96(%rip),%r8
|
551
|
-
lea rhotates_right+96(%rip),%r9
|
552
|
-
lea iotas(%rip),%r10
|
553
|
-
mov $24,%eax
|
554
|
-
lea 96(%rdi),%rdi
|
555
|
-
vzeroupper
|
556
|
-
vpbroadcastq -96(%rdi),%ymm0 # load A[5][5]
|
557
|
-
vmovdqu 8+32*0-96(%rdi),%ymm1
|
558
|
-
vmovdqu 8+32*1-96(%rdi),%ymm2
|
559
|
-
vmovdqu 8+32*2-96(%rdi),%ymm3
|
560
|
-
vmovdqu 8+32*3-96(%rdi),%ymm4
|
561
|
-
vmovdqu 8+32*4-96(%rdi),%ymm5
|
562
|
-
vmovdqu 8+32*5-96(%rdi),%ymm6
|
563
|
-
call __KeccakF1600
|
564
|
-
vmovq %xmm0,-96(%rdi)
|
565
|
-
vmovdqu %ymm1,8+32*0-96(%rdi)
|
566
|
-
vmovdqu %ymm2,8+32*1-96(%rdi)
|
567
|
-
vmovdqu %ymm3,8+32*2-96(%rdi)
|
568
|
-
vmovdqu %ymm4,8+32*3-96(%rdi)
|
569
|
-
vmovdqu %ymm5,8+32*4-96(%rdi)
|
570
|
-
vmovdqu %ymm6,8+32*5-96(%rdi)
|
571
|
-
vzeroupper
|
572
|
-
ret
|
573
|
-
.ifndef old_gas_syntax
|
574
|
-
.size KeccakP1600_Permute_24rounds,.-KeccakP1600_Permute_24rounds
|
575
|
-
.endif
|
576
361
|
|
577
|
-
|
578
|
-
|
579
|
-
|
580
|
-
|
581
|
-
|
582
|
-
.globl
|
583
|
-
.
|
584
|
-
|
585
|
-
.type KeccakP1600_Permute_12rounds,@function
|
362
|
+
|
363
|
+
.ifdef macOS
|
364
|
+
.globl _KeccakP1600_AVX2_Permute_12rounds
|
365
|
+
_KeccakP1600_AVX2_Permute_12rounds:
|
366
|
+
.else
|
367
|
+
.globl KeccakP1600_AVX2_Permute_12rounds
|
368
|
+
.type KeccakP1600_AVX2_Permute_12rounds,@function
|
369
|
+
KeccakP1600_AVX2_Permute_12rounds:
|
586
370
|
.endif
|
587
|
-
|
588
|
-
_KeccakP1600_Permute_12rounds:
|
589
|
-
.balign 32
|
371
|
+
.balign 32
|
590
372
|
lea rhotates_left+96(%rip),%r8
|
591
373
|
lea rhotates_right+96(%rip),%r9
|
592
374
|
lea iotas+12*4*8(%rip),%r10
|
@@ -610,253 +392,34 @@ _KeccakP1600_Permute_12rounds:
|
|
610
392
|
vmovdqu %ymm6,8+32*5-96(%rdi)
|
611
393
|
vzeroupper
|
612
394
|
ret
|
613
|
-
.
|
614
|
-
.size KeccakP1600_Permute_12rounds,.-KeccakP1600_Permute_12rounds
|
615
|
-
.endif
|
616
|
-
|
617
|
-
# -----------------------------------------------------------------------------
|
618
|
-
#
|
619
|
-
# void KeccakP1600_Permute_Nrounds(void *state, unsigned int nrounds);
|
620
|
-
# %rdi %rsi
|
621
|
-
#
|
622
|
-
.globl KeccakP1600_Permute_Nrounds
|
623
|
-
.globl _KeccakP1600_Permute_Nrounds
|
624
|
-
.ifndef old_gas_syntax
|
625
|
-
.type KeccakP1600_Permute_Nrounds,@function
|
626
|
-
.endif
|
627
|
-
KeccakP1600_Permute_Nrounds:
|
628
|
-
_KeccakP1600_Permute_Nrounds:
|
629
|
-
.balign 32
|
630
|
-
lea rhotates_left+96(%rip),%r8
|
631
|
-
lea rhotates_right+96(%rip),%r9
|
632
|
-
lea iotas+24*4*8(%rip),%r10
|
633
|
-
mov %rsi,%rax
|
634
|
-
shl $2+3,%rsi
|
635
|
-
sub %rsi, %r10
|
636
|
-
lea 96(%rdi),%rdi
|
637
|
-
vzeroupper
|
638
|
-
vpbroadcastq -96(%rdi),%ymm0 # load A[5][5]
|
639
|
-
vmovdqu 8+32*0-96(%rdi),%ymm1
|
640
|
-
vmovdqu 8+32*1-96(%rdi),%ymm2
|
641
|
-
vmovdqu 8+32*2-96(%rdi),%ymm3
|
642
|
-
vmovdqu 8+32*3-96(%rdi),%ymm4
|
643
|
-
vmovdqu 8+32*4-96(%rdi),%ymm5
|
644
|
-
vmovdqu 8+32*5-96(%rdi),%ymm6
|
645
|
-
call __KeccakF1600
|
646
|
-
vmovq %xmm0,-96(%rdi)
|
647
|
-
vmovdqu %ymm1,8+32*0-96(%rdi)
|
648
|
-
vmovdqu %ymm2,8+32*1-96(%rdi)
|
649
|
-
vmovdqu %ymm3,8+32*2-96(%rdi)
|
650
|
-
vmovdqu %ymm4,8+32*3-96(%rdi)
|
651
|
-
vmovdqu %ymm5,8+32*4-96(%rdi)
|
652
|
-
vmovdqu %ymm6,8+32*5-96(%rdi)
|
653
|
-
vzeroupper
|
654
|
-
ret
|
655
|
-
.ifndef old_gas_syntax
|
656
|
-
.size KeccakP1600_Permute_Nrounds,.-KeccakP1600_Permute_Nrounds
|
657
|
-
.endif
|
658
|
-
|
659
|
-
# -----------------------------------------------------------------------------
|
660
|
-
#
|
661
|
-
# size_t KeccakF1600_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen);
|
662
|
-
# %rdi %rsi %rdx %rcx
|
663
|
-
#
|
664
|
-
.globl KeccakF1600_FastLoop_Absorb
|
665
|
-
.globl _KeccakF1600_FastLoop_Absorb
|
666
|
-
.ifndef old_gas_syntax
|
667
|
-
.type KeccakF1600_FastLoop_Absorb,@function
|
668
|
-
.endif
|
669
|
-
KeccakF1600_FastLoop_Absorb:
|
670
|
-
_KeccakF1600_FastLoop_Absorb:
|
671
|
-
.balign 32
|
672
|
-
push %rbx
|
673
|
-
push %r10
|
674
|
-
shr $3, %rcx # rcx = data length in lanes
|
675
|
-
mov %rdx, %rbx # rbx = initial data pointer
|
676
|
-
cmp %rsi, %rcx
|
677
|
-
jb KeccakF1600_FastLoop_Absorb_Exit
|
678
|
-
vzeroupper
|
679
|
-
cmp $21, %rsi
|
680
|
-
jnz KeccakF1600_FastLoop_Absorb_Not21Lanes
|
681
|
-
sub $21, %rcx
|
682
|
-
lea rhotates_left+96(%rip),%r8
|
683
|
-
lea rhotates_right+96(%rip),%r9
|
684
|
-
lea 96(%rdi),%rdi
|
685
|
-
vpbroadcastq -96(%rdi),%ymm0 # load A[5][5]
|
686
|
-
vmovdqu 8+32*0-96(%rdi),%ymm1
|
687
|
-
vmovdqu 8+32*1-96(%rdi),%ymm2
|
688
|
-
vmovdqu 8+32*2-96(%rdi),%ymm3
|
689
|
-
vmovdqu 8+32*3-96(%rdi),%ymm4
|
690
|
-
vmovdqu 8+32*4-96(%rdi),%ymm5
|
691
|
-
vmovdqu 8+32*5-96(%rdi),%ymm6
|
692
|
-
KeccakF1600_FastLoop_Absorb_Loop21Lanes:
|
693
|
-
vpbroadcastq (%rdx),%ymm7
|
694
|
-
vmovdqu 8(%rdx),%ymm8
|
695
|
-
|
696
|
-
vmovdqa map2(%rip), %xmm15
|
697
|
-
vpcmpeqd %ymm14, %ymm14, %ymm14
|
698
|
-
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm9
|
699
|
-
|
700
|
-
vmovdqa mask3_21(%rip), %ymm14
|
701
|
-
vpxor %ymm10, %ymm10, %ymm10
|
702
|
-
vmovdqa map3(%rip), %xmm15
|
703
|
-
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm10
|
704
|
-
|
705
|
-
vmovdqa mask4_21(%rip), %ymm14
|
706
|
-
vpxor %ymm11, %ymm11, %ymm11
|
707
|
-
vmovdqa map4(%rip), %xmm15
|
708
|
-
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm11
|
709
|
-
|
710
|
-
vmovdqa mask5_21(%rip), %ymm14
|
711
|
-
vpxor %ymm12, %ymm12, %ymm12
|
712
|
-
vmovdqa map5(%rip), %xmm15
|
713
|
-
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm12
|
714
|
-
|
715
|
-
vmovdqa mask6_21(%rip), %ymm14
|
716
|
-
vpxor %ymm13, %ymm13, %ymm13
|
717
|
-
vmovdqa map6(%rip), %xmm15
|
718
|
-
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm13
|
719
|
-
|
720
|
-
vpxor %ymm7,%ymm0,%ymm0
|
721
|
-
vpxor %ymm8,%ymm1,%ymm1
|
722
|
-
vpxor %ymm9,%ymm2,%ymm2
|
723
|
-
vpxor %ymm10,%ymm3,%ymm3
|
724
|
-
vpxor %ymm11,%ymm4,%ymm4
|
725
|
-
vpxor %ymm12,%ymm5,%ymm5
|
726
|
-
vpxor %ymm13,%ymm6,%ymm6
|
727
|
-
add $21*8, %rdx
|
728
|
-
lea iotas(%rip),%r10
|
729
|
-
mov $24,%eax
|
730
|
-
call __KeccakF1600
|
731
|
-
sub $21, %rcx
|
732
|
-
jnc KeccakF1600_FastLoop_Absorb_Loop21Lanes
|
733
|
-
KeccakF1600_FastLoop_Absorb_SaveAndExit:
|
734
|
-
vmovq %xmm0,-96(%rdi)
|
735
|
-
vmovdqu %ymm1,8+32*0-96(%rdi)
|
736
|
-
vmovdqu %ymm2,8+32*1-96(%rdi)
|
737
|
-
vmovdqu %ymm3,8+32*2-96(%rdi)
|
738
|
-
vmovdqu %ymm4,8+32*3-96(%rdi)
|
739
|
-
vmovdqu %ymm5,8+32*4-96(%rdi)
|
740
|
-
vmovdqu %ymm6,8+32*5-96(%rdi)
|
741
|
-
KeccakF1600_FastLoop_Absorb_Exit:
|
742
|
-
vzeroupper
|
743
|
-
mov %rdx, %rax # return number of bytes processed
|
744
|
-
sub %rbx, %rax
|
745
|
-
pop %r10
|
746
|
-
pop %rbx
|
747
|
-
ret
|
748
|
-
KeccakF1600_FastLoop_Absorb_Not21Lanes:
|
749
|
-
cmp $17, %rsi
|
750
|
-
jnz KeccakF1600_FastLoop_Absorb_Not17Lanes
|
751
|
-
sub $17, %rcx
|
752
|
-
lea rhotates_left+96(%rip),%r8
|
753
|
-
lea rhotates_right+96(%rip),%r9
|
754
|
-
lea 96(%rdi),%rdi
|
755
|
-
vpbroadcastq -96(%rdi),%ymm0 # load A[5][5]
|
756
|
-
vmovdqu 8+32*0-96(%rdi),%ymm1
|
757
|
-
vmovdqu 8+32*1-96(%rdi),%ymm2
|
758
|
-
vmovdqu 8+32*2-96(%rdi),%ymm3
|
759
|
-
vmovdqu 8+32*3-96(%rdi),%ymm4
|
760
|
-
vmovdqu 8+32*4-96(%rdi),%ymm5
|
761
|
-
vmovdqu 8+32*5-96(%rdi),%ymm6
|
762
|
-
KeccakF1600_FastLoop_Absorb_Loop17Lanes:
|
763
|
-
vpbroadcastq (%rdx),%ymm7
|
764
|
-
vmovdqu 8(%rdx),%ymm8
|
765
|
-
|
766
|
-
vmovdqa mask2_17(%rip), %ymm14
|
767
|
-
vpxor %ymm9, %ymm9, %ymm9
|
768
|
-
vmovdqa map2(%rip), %xmm15
|
769
|
-
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm9
|
770
|
-
|
771
|
-
vmovdqa mask3_17(%rip), %ymm14
|
772
|
-
vpxor %ymm10, %ymm10, %ymm10
|
773
|
-
vmovdqa map3(%rip), %xmm15
|
774
|
-
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm10
|
775
|
-
|
776
|
-
vmovdqa mask4_17(%rip), %ymm14
|
777
|
-
vpxor %ymm11, %ymm11, %ymm11
|
778
|
-
vmovdqa map4(%rip), %xmm15
|
779
|
-
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm11
|
780
|
-
|
781
|
-
vmovdqa mask5_17(%rip), %ymm14
|
782
|
-
vpxor %ymm12, %ymm12, %ymm12
|
783
|
-
vmovdqa map5(%rip), %xmm15
|
784
|
-
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm12
|
785
|
-
|
786
|
-
vmovdqa mask6_17(%rip), %ymm14
|
787
|
-
vpxor %ymm13, %ymm13, %ymm13
|
788
|
-
vmovdqa map6(%rip), %xmm15
|
789
|
-
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm13
|
790
|
-
|
791
|
-
vpxor %ymm7,%ymm0,%ymm0
|
792
|
-
vpxor %ymm8,%ymm1,%ymm1
|
793
|
-
vpxor %ymm9,%ymm2,%ymm2
|
794
|
-
vpxor %ymm10,%ymm3,%ymm3
|
795
|
-
vpxor %ymm11,%ymm4,%ymm4
|
796
|
-
vpxor %ymm12,%ymm5,%ymm5
|
797
|
-
vpxor %ymm13,%ymm6,%ymm6
|
798
|
-
add $17*8, %rdx
|
799
|
-
lea iotas(%rip),%r10
|
800
|
-
mov $24,%eax
|
801
|
-
call __KeccakF1600
|
802
|
-
sub $17, %rcx
|
803
|
-
jnc KeccakF1600_FastLoop_Absorb_Loop17Lanes
|
804
|
-
jmp KeccakF1600_FastLoop_Absorb_SaveAndExit
|
805
|
-
KeccakF1600_FastLoop_Absorb_Not17Lanes:
|
806
|
-
lea mapState(%rip), %r9
|
807
|
-
mov %rsi, %rax
|
808
|
-
KeccakF1600_FastLoop_Absorb_LanesAddLoop:
|
809
|
-
mov (%rdx), %r8
|
810
|
-
add $8, %rdx
|
811
|
-
mov (%r9), %r10
|
812
|
-
add $8, %r9
|
813
|
-
add %rdi, %r10
|
814
|
-
xor %r8, (%r10)
|
815
|
-
sub $1, %rax
|
816
|
-
jnz KeccakF1600_FastLoop_Absorb_LanesAddLoop
|
817
|
-
sub %rsi, %rcx
|
818
|
-
push %rdi
|
819
|
-
push %rsi
|
820
|
-
push %rdx
|
821
|
-
push %rcx
|
822
|
-
.ifdef no_plt
|
823
|
-
call KeccakP1600_Permute_24rounds
|
395
|
+
.ifdef macOS
|
824
396
|
.else
|
825
|
-
|
826
|
-
.endif
|
827
|
-
pop %rcx
|
828
|
-
pop %rdx
|
829
|
-
pop %rsi
|
830
|
-
pop %rdi
|
831
|
-
cmp %rsi, %rcx
|
832
|
-
jae KeccakF1600_FastLoop_Absorb_Not17Lanes
|
833
|
-
jmp KeccakF1600_FastLoop_Absorb_Exit
|
834
|
-
.ifndef old_gas_syntax
|
835
|
-
.size KeccakF1600_FastLoop_Absorb,.-KeccakF1600_FastLoop_Absorb
|
397
|
+
.size KeccakP1600_AVX2_Permute_12rounds,.-KeccakP1600_AVX2_Permute_12rounds
|
836
398
|
.endif
|
837
399
|
|
838
400
|
# -----------------------------------------------------------------------------
|
839
401
|
#
|
840
|
-
# size_t
|
402
|
+
# size_t KeccakP1600_AVX2_12rounds_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen);
|
841
403
|
# %rdi %rsi %rdx %rcx
|
842
404
|
#
|
843
|
-
.
|
844
|
-
.globl
|
845
|
-
|
846
|
-
.
|
405
|
+
.ifdef macOS
|
406
|
+
.globl _KeccakP1600_AVX2_12rounds_FastLoop_Absorb
|
407
|
+
_KeccakP1600_AVX2_12rounds_FastLoop_Absorb:
|
408
|
+
.else
|
409
|
+
.globl KeccakP1600_AVX2_12rounds_FastLoop_Absorb
|
410
|
+
.type KeccakP1600_AVX2_12rounds_FastLoop_Absorb,@function
|
411
|
+
KeccakP1600_AVX2_12rounds_FastLoop_Absorb:
|
847
412
|
.endif
|
848
|
-
|
849
|
-
_KeccakP1600_12rounds_FastLoop_Absorb:
|
850
|
-
.balign 32
|
413
|
+
.balign 32
|
851
414
|
push %rbx
|
852
415
|
push %r10
|
853
416
|
shr $3, %rcx # rcx = data length in lanes
|
854
417
|
mov %rdx, %rbx # rbx = initial data pointer
|
855
418
|
cmp %rsi, %rcx
|
856
|
-
jb
|
419
|
+
jb KeccakP1600_AVX2_12rounds_FastLoop_Absorb_Exit
|
857
420
|
vzeroupper
|
858
421
|
cmp $21, %rsi
|
859
|
-
jnz
|
422
|
+
jnz KeccakP1600_AVX2_12rounds_FastLoop_Absorb_Not21Lanes
|
860
423
|
sub $21, %rcx
|
861
424
|
lea rhotates_left+96(%rip),%r8
|
862
425
|
lea rhotates_right+96(%rip),%r9
|
@@ -868,7 +431,7 @@ _KeccakP1600_12rounds_FastLoop_Absorb:
|
|
868
431
|
vmovdqu 8+32*3-96(%rdi),%ymm4
|
869
432
|
vmovdqu 8+32*4-96(%rdi),%ymm5
|
870
433
|
vmovdqu 8+32*5-96(%rdi),%ymm6
|
871
|
-
|
434
|
+
KeccakP1600_AVX2_12rounds_FastLoop_Absorb_Loop21Lanes:
|
872
435
|
vpbroadcastq (%rdx),%ymm7
|
873
436
|
vmovdqu 8(%rdx),%ymm8
|
874
437
|
|
@@ -908,8 +471,8 @@ KeccakP1600_12rounds_FastLoop_Absorb_Loop21Lanes:
|
|
908
471
|
mov $12,%eax
|
909
472
|
call __KeccakF1600
|
910
473
|
sub $21, %rcx
|
911
|
-
jnc
|
912
|
-
|
474
|
+
jnc KeccakP1600_AVX2_12rounds_FastLoop_Absorb_Loop21Lanes
|
475
|
+
KeccakP1600_AVX2_12rounds_FastLoop_Absorb_SaveAndExit:
|
913
476
|
vmovq %xmm0,-96(%rdi)
|
914
477
|
vmovdqu %ymm1,8+32*0-96(%rdi)
|
915
478
|
vmovdqu %ymm2,8+32*1-96(%rdi)
|
@@ -917,16 +480,16 @@ KeccakP1600_12rounds_FastLoop_Absorb_SaveAndExit:
|
|
917
480
|
vmovdqu %ymm4,8+32*3-96(%rdi)
|
918
481
|
vmovdqu %ymm5,8+32*4-96(%rdi)
|
919
482
|
vmovdqu %ymm6,8+32*5-96(%rdi)
|
920
|
-
|
483
|
+
KeccakP1600_AVX2_12rounds_FastLoop_Absorb_Exit:
|
921
484
|
vzeroupper
|
922
485
|
mov %rdx, %rax # return number of bytes processed
|
923
486
|
sub %rbx, %rax
|
924
487
|
pop %r10
|
925
488
|
pop %rbx
|
926
489
|
ret
|
927
|
-
|
490
|
+
KeccakP1600_AVX2_12rounds_FastLoop_Absorb_Not21Lanes:
|
928
491
|
cmp $17, %rsi
|
929
|
-
jnz
|
492
|
+
jnz KeccakP1600_AVX2_12rounds_FastLoop_Absorb_Not17Lanes
|
930
493
|
sub $17, %rcx
|
931
494
|
lea rhotates_left+96(%rip),%r8
|
932
495
|
lea rhotates_right+96(%rip),%r9
|
@@ -938,7 +501,7 @@ KeccakP1600_12rounds_FastLoop_Absorb_Not21Lanes:
|
|
938
501
|
vmovdqu 8+32*3-96(%rdi),%ymm4
|
939
502
|
vmovdqu 8+32*4-96(%rdi),%ymm5
|
940
503
|
vmovdqu 8+32*5-96(%rdi),%ymm6
|
941
|
-
|
504
|
+
KeccakP1600_AVX2_12rounds_FastLoop_Absorb_Loop17Lanes:
|
942
505
|
vpbroadcastq (%rdx),%ymm7
|
943
506
|
vmovdqu 8(%rdx),%ymm8
|
944
507
|
|
@@ -979,12 +542,12 @@ KeccakP1600_12rounds_FastLoop_Absorb_Loop17Lanes:
|
|
979
542
|
mov $12,%eax
|
980
543
|
call __KeccakF1600
|
981
544
|
sub $17, %rcx
|
982
|
-
jnc
|
983
|
-
jmp
|
984
|
-
|
545
|
+
jnc KeccakP1600_AVX2_12rounds_FastLoop_Absorb_Loop17Lanes
|
546
|
+
jmp KeccakP1600_AVX2_12rounds_FastLoop_Absorb_SaveAndExit
|
547
|
+
KeccakP1600_AVX2_12rounds_FastLoop_Absorb_Not17Lanes:
|
985
548
|
lea mapState(%rip), %r9
|
986
549
|
mov %rsi, %rax
|
987
|
-
|
550
|
+
KeccakP1600_AVX2_12rounds_FastLoop_Absorb_LanesAddLoop:
|
988
551
|
mov (%rdx), %r8
|
989
552
|
add $8, %rdx
|
990
553
|
mov (%r9), %r10
|
@@ -992,31 +555,32 @@ KeccakP1600_12rounds_FastLoop_Absorb_LanesAddLoop:
|
|
992
555
|
add %rdi, %r10
|
993
556
|
xor %r8, (%r10)
|
994
557
|
sub $1, %rax
|
995
|
-
jnz
|
558
|
+
jnz KeccakP1600_AVX2_12rounds_FastLoop_Absorb_LanesAddLoop
|
996
559
|
sub %rsi, %rcx
|
997
560
|
push %rdi
|
998
561
|
push %rsi
|
999
562
|
push %rdx
|
1000
563
|
push %rcx
|
1001
|
-
.ifdef
|
1002
|
-
call
|
564
|
+
.ifdef macOS
|
565
|
+
call _KeccakP1600_AVX2_Permute_12rounds
|
1003
566
|
.else
|
1004
|
-
call
|
567
|
+
call KeccakP1600_AVX2_Permute_12rounds@PLT
|
1005
568
|
.endif
|
1006
569
|
pop %rcx
|
1007
570
|
pop %rdx
|
1008
571
|
pop %rsi
|
1009
572
|
pop %rdi
|
1010
573
|
cmp %rsi, %rcx
|
1011
|
-
jae
|
1012
|
-
jmp
|
1013
|
-
.
|
1014
|
-
.
|
574
|
+
jae KeccakP1600_AVX2_12rounds_FastLoop_Absorb_Not17Lanes
|
575
|
+
jmp KeccakP1600_AVX2_12rounds_FastLoop_Absorb_Exit
|
576
|
+
.ifdef macOS
|
577
|
+
.else
|
578
|
+
.size KeccakP1600_AVX2_12rounds_FastLoop_Absorb,.-KeccakP1600_AVX2_12rounds_FastLoop_Absorb
|
1015
579
|
.endif
|
1016
580
|
|
1017
581
|
.equ ALLON, 0xFFFFFFFFFFFFFFFF
|
1018
582
|
|
1019
|
-
.balign
|
583
|
+
.balign 64
|
1020
584
|
rhotates_left:
|
1021
585
|
.quad 3, 18, 36, 41 # [2][0] [4][0] [1][0] [3][0]
|
1022
586
|
.quad 1, 62, 28, 27 # [0][1] [0][2] [0][3] [0][4]
|
@@ -1064,7 +628,7 @@ mapState:
|
|
1064
628
|
.quad 8*8, 9*8, 18*8, 23*8, 16*8
|
1065
629
|
.quad 6*8, 17*8, 14*8, 11*8, 24*8
|
1066
630
|
|
1067
|
-
.balign
|
631
|
+
.balign 16
|
1068
632
|
map2:
|
1069
633
|
.long 10*8, 20*8, 5*8, 15*8
|
1070
634
|
map3:
|
@@ -1076,7 +640,7 @@ map5:
|
|
1076
640
|
map6:
|
1077
641
|
.long 6*8, 12*8, 18*8, 24*8
|
1078
642
|
|
1079
|
-
.balign
|
643
|
+
.balign 32
|
1080
644
|
mask3_21:
|
1081
645
|
.quad ALLON, ALLON, 0, ALLON
|
1082
646
|
mask4_21:
|