sleeping_kangaroo12 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (291) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +34 -67
  3. data/ext/Rakefile +12 -37
  4. data/ext/binding/sleeping_kangaroo12.c +1 -16
  5. data/ext/{xkcp → k12}/Makefile +0 -0
  6. data/ext/k12/Makefile.build +118 -0
  7. data/ext/k12/README.markdown +86 -0
  8. data/ext/k12/lib/ARMv8Asha3/KeccakP-1600-ARMv8Asha3.S +623 -0
  9. data/ext/k12/lib/ARMv8Asha3/KeccakP-1600-SnP.h +65 -0
  10. data/ext/k12/lib/ARMv8Asha3/KeccakP-1600-opt64.c +227 -0
  11. data/ext/{xkcp/lib/low/KeccakP-1600/compact → k12/lib/Inplace32BI}/KeccakP-1600-SnP.h +4 -9
  12. data/ext/{xkcp/lib/low/KeccakP-1600/plain-32bits-inplace → k12/lib/Inplace32BI}/KeccakP-1600-inplace32BI.c +65 -160
  13. data/ext/k12/lib/KangarooTwelve.c +332 -0
  14. data/ext/{xkcp/lib/high/KangarooTwelve → k12/lib}/KangarooTwelve.h +53 -16
  15. data/ext/{xkcp/lib/low/KeccakP-1600/AVX2 → k12/lib/Optimized64}/KeccakP-1600-AVX2.s +122 -558
  16. data/ext/k12/lib/Optimized64/KeccakP-1600-AVX512-plainC.c +241 -0
  17. data/ext/k12/lib/Optimized64/KeccakP-1600-AVX512.s +551 -0
  18. data/ext/k12/lib/Optimized64/KeccakP-1600-SnP.h +74 -0
  19. data/ext/{xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-64.macros → k12/lib/Optimized64/KeccakP-1600-opt64.c} +447 -169
  20. data/ext/k12/lib/Optimized64/KeccakP-1600-runtimeDispatch.c +406 -0
  21. data/ext/k12/lib/Optimized64/KeccakP-1600-timesN-AVX2.c +419 -0
  22. data/ext/k12/lib/Optimized64/KeccakP-1600-timesN-AVX512.c +458 -0
  23. data/ext/k12/lib/Optimized64/KeccakP-1600-timesN-SSSE3.c +438 -0
  24. data/ext/{xkcp/lib/low/KeccakP-1600/plain-64bits → k12/lib/Plain64}/KeccakP-1600-SnP.h +14 -20
  25. data/ext/{xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.h → k12/lib/Plain64/KeccakP-1600-plain64.c} +9 -8
  26. data/ext/{xkcp/lib/common → k12/lib}/align.h +3 -2
  27. data/ext/{xkcp/lib/common → k12/lib}/brg_endian.h +0 -0
  28. data/ext/{xkcp → k12}/support/Build/ExpandProducts.xsl +0 -0
  29. data/ext/{xkcp → k12}/support/Build/ToGlobalMakefile.xsl +0 -0
  30. data/ext/{xkcp → k12}/support/Build/ToOneTarget.xsl +0 -0
  31. data/ext/{xkcp → k12}/support/Build/ToTargetConfigFile.xsl +0 -0
  32. data/ext/{xkcp → k12}/support/Build/ToTargetMakefile.xsl +10 -16
  33. data/ext/{xkcp → k12}/support/Build/ToVCXProj.xsl +0 -0
  34. data/lib/sleeping_kangaroo12/version.rb +1 -1
  35. metadata +33 -276
  36. data/ext/config/xkcp.build +0 -17
  37. data/ext/xkcp/LICENSE +0 -1
  38. data/ext/xkcp/Makefile.build +0 -200
  39. data/ext/xkcp/README.markdown +0 -296
  40. data/ext/xkcp/lib/HighLevel.build +0 -143
  41. data/ext/xkcp/lib/LowLevel.build +0 -757
  42. data/ext/xkcp/lib/high/KangarooTwelve/KangarooTwelve.c +0 -301
  43. data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.c +0 -81
  44. data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.h +0 -125
  45. data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.c +0 -48
  46. data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.h +0 -79
  47. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.c +0 -81
  48. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.h +0 -73
  49. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.inc +0 -195
  50. data/ext/xkcp/lib/high/Keccak/KeccakSponge.c +0 -111
  51. data/ext/xkcp/lib/high/Keccak/KeccakSponge.h +0 -76
  52. data/ext/xkcp/lib/high/Keccak/KeccakSponge.inc +0 -314
  53. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.c +0 -61
  54. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.h +0 -67
  55. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.inc +0 -128
  56. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.c +0 -93
  57. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.h +0 -599
  58. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.inc +0 -573
  59. data/ext/xkcp/lib/high/Ketje/Ketjev2.c +0 -87
  60. data/ext/xkcp/lib/high/Ketje/Ketjev2.h +0 -88
  61. data/ext/xkcp/lib/high/Ketje/Ketjev2.inc +0 -274
  62. data/ext/xkcp/lib/high/Keyak/Keyakv2.c +0 -132
  63. data/ext/xkcp/lib/high/Keyak/Keyakv2.h +0 -217
  64. data/ext/xkcp/lib/high/Keyak/Keyakv2.inc +0 -81
  65. data/ext/xkcp/lib/high/Keyak/Motorist.inc +0 -953
  66. data/ext/xkcp/lib/high/Kravatte/Kravatte.c +0 -533
  67. data/ext/xkcp/lib/high/Kravatte/Kravatte.h +0 -115
  68. data/ext/xkcp/lib/high/Kravatte/KravatteModes.c +0 -557
  69. data/ext/xkcp/lib/high/Kravatte/KravatteModes.h +0 -247
  70. data/ext/xkcp/lib/high/Xoodyak/Cyclist.h +0 -66
  71. data/ext/xkcp/lib/high/Xoodyak/Cyclist.inc +0 -336
  72. data/ext/xkcp/lib/high/Xoodyak/Xoodyak-parameters.h +0 -26
  73. data/ext/xkcp/lib/high/Xoodyak/Xoodyak.c +0 -55
  74. data/ext/xkcp/lib/high/Xoodyak/Xoodyak.h +0 -35
  75. data/ext/xkcp/lib/high/Xoofff/Xoofff.c +0 -634
  76. data/ext/xkcp/lib/high/Xoofff/Xoofff.h +0 -147
  77. data/ext/xkcp/lib/high/Xoofff/XoofffModes.c +0 -483
  78. data/ext/xkcp/lib/high/Xoofff/XoofffModes.h +0 -241
  79. data/ext/xkcp/lib/high/common/Phases.h +0 -25
  80. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-SnP.h +0 -41
  81. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-armcc.s +0 -1666
  82. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-gcc.s +0 -1655
  83. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-armcc.s +0 -1268
  84. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-gcc.s +0 -1264
  85. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-armcc.s +0 -1178
  86. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-gcc.s +0 -1175
  87. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-armcc.s +0 -1338
  88. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-gcc.s +0 -1336
  89. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-armcc.s +0 -1343
  90. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-gcc.s +0 -1339
  91. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-SnP.h +0 -42
  92. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-armcc.s +0 -823
  93. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-gcc.s +0 -831
  94. data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-SnP.h +0 -31
  95. data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-armv8a-neon.s +0 -540
  96. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-SnP.h +0 -42
  97. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-compact.s +0 -733
  98. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-fast.s +0 -1121
  99. data/ext/xkcp/lib/low/KeccakP-1600/AVX2/KeccakP-1600-SnP.h +0 -52
  100. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-AVX512.c +0 -623
  101. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-SnP.h +0 -47
  102. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u12/KeccakP-1600-AVX512-config.h +0 -6
  103. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u6/KeccakP-1600-AVX512-config.h +0 -6
  104. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/ua/KeccakP-1600-AVX512-config.h +0 -6
  105. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-AVX512.s +0 -1031
  106. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-SnP.h +0 -53
  107. data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-SnP.h +0 -44
  108. data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-XOP.c +0 -476
  109. data/ext/xkcp/lib/low/KeccakP-1600/XOP/u6/KeccakP-1600-XOP-config.h +0 -6
  110. data/ext/xkcp/lib/low/KeccakP-1600/XOP/ua/KeccakP-1600-XOP-config.h +0 -6
  111. data/ext/xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-unrolling.macros +0 -305
  112. data/ext/xkcp/lib/low/KeccakP-1600/compact/KeccakP-1600-compact64.c +0 -420
  113. data/ext/xkcp/lib/low/KeccakP-1600/plain-32bits-inplace/KeccakP-1600-SnP.h +0 -43
  114. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/KeccakP-1600-opt64.c +0 -565
  115. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcu6/KeccakP-1600-opt64-config.h +0 -7
  116. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua/KeccakP-1600-opt64-config.h +0 -7
  117. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua-shld/KeccakP-1600-opt64-config.h +0 -8
  118. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/u6/KeccakP-1600-opt64-config.h +0 -6
  119. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/ua/KeccakP-1600-opt64-config.h +0 -6
  120. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-SnP.h +0 -44
  121. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference.h +0 -23
  122. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference32BI.c +0 -625
  123. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-SnP.h +0 -44
  124. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.c +0 -440
  125. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-SnP.h +0 -42
  126. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas.s +0 -1196
  127. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas_Apple.s +0 -1124
  128. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-shld-gas.s +0 -1196
  129. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-armcc.s +0 -1392
  130. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-gcc.s +0 -1394
  131. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-times2-SnP.h +0 -42
  132. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u12/SIMD512-2-config.h +0 -7
  133. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u4/SIMD512-2-config.h +0 -7
  134. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512ufull/SIMD512-2-config.h +0 -7
  135. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SIMD512.c +0 -850
  136. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SnP.h +0 -51
  137. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SIMD128.c +0 -957
  138. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SnP.h +0 -49
  139. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-u2/SIMD128-config.h +0 -8
  140. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-ua/SIMD128-config.h +0 -8
  141. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-u2/SIMD128-config.h +0 -9
  142. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-ua/SIMD128-config.h +0 -9
  143. data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-SnP.h +0 -45
  144. data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-on1.c +0 -37
  145. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SIMD256.c +0 -1321
  146. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SnP.h +0 -55
  147. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u12/SIMD256-config.h +0 -7
  148. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u6/SIMD256-config.h +0 -7
  149. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/ua/SIMD256-config.h +0 -7
  150. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u12/SIMD512-4-config.h +0 -7
  151. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u4/SIMD512-4-config.h +0 -7
  152. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512ufull/SIMD512-4-config.h +0 -7
  153. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SIMD512.c +0 -881
  154. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SnP.h +0 -51
  155. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-SnP.h +0 -45
  156. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-on1.c +0 -37
  157. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-SnP.h +0 -45
  158. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-on2.c +0 -38
  159. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SIMD512.c +0 -1615
  160. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SnP.h +0 -57
  161. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u12/SIMD512-config.h +0 -7
  162. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u4/SIMD512-config.h +0 -7
  163. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/ua/SIMD512-config.h +0 -7
  164. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-SnP.h +0 -45
  165. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-on1.c +0 -37
  166. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-SnP.h +0 -45
  167. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-on2.c +0 -38
  168. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-SnP.h +0 -45
  169. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-on4.c +0 -38
  170. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-SnP.h +0 -41
  171. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-armcc.s +0 -442
  172. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-gcc.s +0 -446
  173. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-armcc.s +0 -419
  174. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-gcc.s +0 -427
  175. data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-SnP.h +0 -41
  176. data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-avr8-fast.s +0 -647
  177. data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-SnP.h +0 -39
  178. data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-compact.c +0 -190
  179. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-SnP.h +0 -43
  180. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.c +0 -412
  181. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.h +0 -23
  182. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-SnP.h +0 -41
  183. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-armcc.s +0 -454
  184. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-gcc.s +0 -458
  185. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-armcc.s +0 -455
  186. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-gcc.s +0 -458
  187. data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-SnP.h +0 -41
  188. data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-avr8-fast.s +0 -728
  189. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-SnP.h +0 -43
  190. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.c +0 -414
  191. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.h +0 -23
  192. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-SnP.h +0 -42
  193. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-armcc.s +0 -527
  194. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-gcc.s +0 -533
  195. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-armcc.s +0 -528
  196. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-gcc.s +0 -534
  197. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-armcc.s +0 -521
  198. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-gcc.s +0 -527
  199. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-armcc.s +0 -517
  200. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-gcc.s +0 -523
  201. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-armcc.s +0 -550
  202. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-gcc.s +0 -556
  203. data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-SnP.h +0 -32
  204. data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-armv8a-neon.s +0 -432
  205. data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-SnP.h +0 -42
  206. data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-avr8-fast.s +0 -929
  207. data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-SnP.h +0 -40
  208. data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-compact.c +0 -244
  209. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-SnP.h +0 -46
  210. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32-bis.macros +0 -184
  211. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.c +0 -454
  212. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.macros +0 -459
  213. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling-bis.macros +0 -83
  214. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling.macros +0 -88
  215. data/ext/xkcp/lib/low/KeccakP-800/plain/lcu2/KeccakP-800-opt32-config.h +0 -7
  216. data/ext/xkcp/lib/low/KeccakP-800/plain/lcua/KeccakP-800-opt32-config.h +0 -7
  217. data/ext/xkcp/lib/low/KeccakP-800/plain/u2/KeccakP-800-opt32-config.h +0 -7
  218. data/ext/xkcp/lib/low/KeccakP-800/plain/ua/KeccakP-800-opt32-config.h +0 -7
  219. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-SnP.h +0 -44
  220. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.c +0 -437
  221. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.h +0 -23
  222. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/Ket.h +0 -57
  223. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-armcc.s +0 -475
  224. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-gcc.s +0 -480
  225. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-armcc.s +0 -590
  226. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-gcc.s +0 -590
  227. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.c +0 -126
  228. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.h +0 -68
  229. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.inc +0 -174
  230. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.c +0 -80
  231. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.h +0 -68
  232. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.inc +0 -142
  233. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-SnP.h +0 -55
  234. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-armcc.s +0 -1086
  235. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-gcc.s +0 -1092
  236. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-armcc.s +0 -721
  237. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-gcc.s +0 -726
  238. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-armcc.s +0 -723
  239. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-gcc.s +0 -729
  240. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-armcc.s +0 -1164
  241. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-gcc.s +0 -1165
  242. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-armcc.s +0 -562
  243. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-gcc.s +0 -563
  244. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-armcc.s +0 -563
  245. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-gcc.s +0 -565
  246. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-SnP.h +0 -55
  247. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-armcc.s +0 -476
  248. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-gcc.s +0 -485
  249. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-armcc.s +0 -362
  250. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-gcc.s +0 -367
  251. data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-SnP.h +0 -43
  252. data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-avr8-u1.s +0 -1341
  253. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SIMD512.c +0 -581
  254. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SnP.h +0 -58
  255. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodyak-full-block-SIMD512.c +0 -332
  256. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SIMD128.c +0 -329
  257. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SnP.h +0 -53
  258. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodyak-full-block-SIMD128.c +0 -355
  259. data/ext/xkcp/lib/low/Xoodoo/Xoodoo.h +0 -79
  260. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-SnP.h +0 -56
  261. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-optimized.c +0 -399
  262. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodyak-full-blocks.c +0 -127
  263. data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-SnP.h +0 -43
  264. data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-reference.c +0 -253
  265. data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SIMD512.c +0 -1044
  266. data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SnP.h +0 -49
  267. data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-SnP.h +0 -45
  268. data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-on1.c +0 -37
  269. data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-ARMv7A.s +0 -1587
  270. data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-SnP.h +0 -48
  271. data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SIMD512.c +0 -1202
  272. data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SnP.h +0 -48
  273. data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SIMD128.c +0 -484
  274. data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SnP.h +0 -44
  275. data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-SnP.h +0 -45
  276. data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-on1.c +0 -37
  277. data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SIMD256.c +0 -939
  278. data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SnP.h +0 -49
  279. data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SIMD512.c +0 -1216
  280. data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SnP.h +0 -48
  281. data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-SnP.h +0 -45
  282. data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-on1.c +0 -37
  283. data/ext/xkcp/lib/low/common/PlSnP-Fallback.inc +0 -290
  284. data/ext/xkcp/lib/low/common/SnP-Relaned.h +0 -141
  285. data/ext/xkcp/support/Kernel-PMU/Kernel-pmu.md +0 -133
  286. data/ext/xkcp/support/Kernel-PMU/Makefile +0 -8
  287. data/ext/xkcp/support/Kernel-PMU/enable_arm_pmu.c +0 -129
  288. data/ext/xkcp/support/Kernel-PMU/load-module +0 -1
  289. data/ext/xkcp/util/KeccakSum/KeccakSum.c +0 -394
  290. data/ext/xkcp/util/KeccakSum/base64.c +0 -86
  291. data/ext/xkcp/util/KeccakSum/base64.h +0 -12
@@ -1,1394 +0,0 @@
1
- @
2
- @ The Keccak-p permutations, designed by Guido Bertoni, Joan Daemen, Micha�l Peeters and Gilles Van Assche.
3
- @
4
- @ Implementation by Ronny Van Keer, hereby denoted as "the implementer".
5
- @
6
- @ For more information, feedback or questions, please refer to the Keccak Team website:
7
- @ https://keccak.team/
8
- @
9
- @ To the extent possible under law, the implementer has waived all copyright
10
- @ and related or neighboring rights to the source code in this file.
11
- @ http://creativecommons.org/publicdomain/zero/1.0/
12
- @
13
- @ ---
14
- @
15
- @ This file implements Keccak-p[1600]�2 in a PlSnP-compatible way.
16
- @ Please refer to PlSnP-documentation.h for more details.
17
- @
18
- @ This implementation comes with KeccakP-1600-times2-SnP.h in the same folder.
19
- @ Please refer to LowLevel.build for the exact list of other files it must be combined with.
20
- @
21
-
22
- @ WARNING: These functions work only on little endian CPU with@ ARMv7A + NEON architecture
23
- @ WARNING: State must be 256 bit (32 bytes) aligned, best is 64-byte (cache alignment).
24
-
25
- @ INFO: Tested on Cortex-A8 (BeagleBone Black), using gcc.
26
- @ INFO: Parallel execution of Keccak-P permutation on 2 lane interleaved states.
27
-
28
- @ INFO: KeccakP1600times2_PermuteAll_12rounds() execution time is 7690 cycles on a Cortex-A8 (BeagleBone Black)
29
-
30
-
31
-
32
- .text
33
-
34
- @----------------------------------------------------------------------------
35
-
36
- @ --- offsets in state
37
- .equ _ba , 0*16
38
- .equ _be , 1*16
39
- .equ _bi , 2*16
40
- .equ _bo , 3*16
41
- .equ _bu , 4*16
42
- .equ _ga , 5*16
43
- .equ _ge , 6*16
44
- .equ _gi , 7*16
45
- .equ _go , 8*16
46
- .equ _gu , 9*16
47
- .equ _ka , 10*16
48
- .equ _ke , 11*16
49
- .equ _ki , 12*16
50
- .equ _ko , 13*16
51
- .equ _ku , 14*16
52
- .equ _ma , 15*16
53
- .equ _me , 16*16
54
- .equ _mi , 17*16
55
- .equ _mo , 18*16
56
- .equ _mu , 19*16
57
- .equ _sa , 20*16
58
- .equ _se , 21*16
59
- .equ _si , 22*16
60
- .equ _so , 23*16
61
- .equ _su , 24*16
62
-
63
- @ --- macros for Single permutation
64
-
65
- .macro KeccakS_ThetaRhoPiChiIota argA1, argA2, argA3, argA4, argA5
66
-
67
- @Prepare Theta
68
- @ Ca = Aba^Aga^Aka^Ama^Asa
69
- @ Ce = Abe^Age^Ake^Ame^Ase
70
- @ Ci = Abi^Agi^Aki^Ami^Asi
71
- @ Co = Abo^Ago^Ako^Amo^Aso
72
- @ Cu = Abu^Agu^Aku^Amu^Asu
73
- @ De = Ca^ROL64(Ci, 1)
74
- @ Di = Ce^ROL64(Co, 1)
75
- @ Do = Ci^ROL64(Cu, 1)
76
- @ Du = Co^ROL64(Ca, 1)
77
- @ Da = Cu^ROL64(Ce, 1)
78
- veor.64 q4, q6, q7
79
- veor.64 q5, q9, q10
80
- veor.64 d8, d8, d9
81
- veor.64 d10, d10, d11
82
- veor.64 d1, d8, d16
83
- veor.64 d2, d10, d17
84
-
85
- veor.64 q4, q11, q12
86
- veor.64 q5, q14, q15
87
- veor.64 d8, d8, d9
88
- veor.64 d10, d10, d11
89
- veor.64 d3, d8, d26
90
-
91
- vadd.u64 q4, q1, q1
92
- veor.64 d4, d10, d27
93
- vmov.64 d0, d5
94
- vsri.64 q4, q1, #63
95
-
96
- vadd.u64 q5, q2, q2
97
- veor.64 q4, q4, q0
98
- vsri.64 q5, q2, #63
99
- vadd.u64 d7, d1, d1
100
- veor.64 \argA2, \argA2, d8
101
- veor.64 q5, q5, q1
102
-
103
- vsri.64 d7, d1, #63
104
- vshl.u64 d1, \argA2, #44
105
- veor.64 \argA3, \argA3, d9
106
- veor.64 d7, d7, d4
107
-
108
- @ Ba = argA1^Da
109
- @ Be = ROL64((argA2^De), 44)
110
- @ Bi = ROL64((argA3^Di), 43)
111
- @ Bo = ROL64((argA4^Do), 21)
112
- @ Bu = ROL64((argA5^Du), 14)
113
- @ argA2 = Be ^((~Bi)& Bo )
114
- @ argA3 = Bi ^((~Bo)& Bu )
115
- @ argA4 = Bo ^((~Bu)& Ba )
116
- @ argA5 = Bu ^((~Ba)& Be )
117
- @ argA1 = Ba ^((~Be)& Bi )
118
- @ argA1 ^= KeccakP1600RoundConstants[i+round]
119
- vsri.64 d1, \argA2, #64-44
120
- vshl.u64 d2, \argA3, #43
121
- vldr.64 d0, [r0, #\argA1]
122
- veor.64 \argA4, \argA4, d10
123
- vsri.64 d2, \argA3, #64-43
124
- vshl.u64 d3, \argA4, #21
125
- veor.64 \argA5, \argA5, d11
126
- veor.64 d0, d0, d7
127
- vsri.64 d3, \argA4, #64-21
128
- vbic.64 d5, d2, d1
129
- vshl.u64 d4, \argA5, #14
130
- vbic.64 \argA2, d3, d2
131
- vld1.64 d6, [r1]!
132
- veor.64 d5, d0
133
- vsri.64 d4, \argA5, #64-14
134
- veor.64 d5, d6
135
- vbic.64 \argA5, d1, d0
136
- vbic.64 \argA3, d4, d3
137
- vbic.64 \argA4, d0, d4
138
- veor.64 \argA2, d1
139
- vstr.64 d5, [r0, #\argA1]
140
- veor.64 \argA3, d2
141
- veor.64 \argA4, d3
142
- veor.64 \argA5, d4
143
- .endm
144
-
145
- .macro KeccakS_ThetaRhoPiChi1 argA1, argA2, argA3, argA4, argA5
146
-
147
- @ Bi = ROL64((argA1^Da), 3)
148
- @ Bo = ROL64((argA2^De), 45)
149
- @ Bu = ROL64((argA3^Di), 61)
150
- @ Ba = ROL64((argA4^Do), 28)
151
- @ Be = ROL64((argA5^Du), 20)
152
- @ argA1 = Ba ^((~Be)& Bi )
153
- @ Ca ^= argA1
154
- @ argA2 = Be ^((~Bi)& Bo )
155
- @ argA3 = Bi ^((~Bo)& Bu )
156
- @ argA4 = Bo ^((~Bu)& Ba )
157
- @ argA5 = Bu ^((~Ba)& Be )
158
- veor.64 \argA2, \argA2, d8
159
- veor.64 \argA3, \argA3, d9
160
- vshl.u64 d3, \argA2, #45
161
- vldr.64 d6, [r0, #\argA1]
162
- vshl.u64 d4, \argA3, #61
163
- veor.64 \argA4, \argA4, d10
164
- vsri.64 d3, \argA2, #64-45
165
- veor.64 \argA5, \argA5, d11
166
- vsri.64 d4, \argA3, #64-61
167
- vshl.u64 d0, \argA4, #28
168
- veor.64 d6, d6, d7
169
- vshl.u64 d1, \argA5, #20
170
- vbic.64 \argA3, d4, d3
171
- vsri.64 d0, \argA4, #64-28
172
- vbic.64 \argA4, d0, d4
173
- vshl.u64 d2, d6, #3
174
- vsri.64 d1, \argA5, #64-20
175
- veor.64 \argA4, d3
176
- vsri.64 d2, d6, #64-3
177
- vbic.64 \argA5, d1, d0
178
- vbic.64 d6, d2, d1
179
- vbic.64 \argA2, d3, d2
180
- veor.64 d6, d0
181
- veor.64 \argA2, d1
182
- vstr.64 d6, [r0, #\argA1]
183
- veor.64 \argA3, d2
184
- veor.64 d5, d6
185
- veor.64 \argA5, d4
186
- .endm
187
-
188
- .macro KeccakS_ThetaRhoPiChi2 argA1, argA2, argA3, argA4, argA5
189
-
190
- @ Bu = ROL64((argA1^Da), 18)
191
- @ Ba = ROL64((argA2^De), 1)
192
- @ Be = ROL64((argA3^Di), 6)
193
- @ Bi = ROL64((argA4^Do), 25)
194
- @ Bo = ROL64((argA5^Du), 8)
195
- @ argA1 = Ba ^((~Be)& Bi )
196
- @ Ca ^= argA1@
197
- @ argA2 = Be ^((~Bi)& Bo )
198
- @ argA3 = Bi ^((~Bo)& Bu )
199
- @ argA4 = Bo ^((~Bu)& Ba )
200
- @ argA5 = Bu ^((~Ba)& Be )
201
- veor.64 \argA3, \argA3, d9
202
- veor.64 \argA4, \argA4, d10
203
- vshl.u64 d1, \argA3, #6
204
- vldr.64 d6, [r0, #\argA1]
205
- vshl.u64 d2, \argA4, #25
206
- veor.64 \argA5, \argA5, d11
207
- vsri.64 d1, \argA3, #64-6
208
- veor.64 \argA2, \argA2, d8
209
- vsri.64 d2, \argA4, #64-25
210
- vext.8 d3, \argA5, \argA5, #7
211
- veor.64 d6, d6, d7
212
- vbic.64 \argA3, d2, d1
213
- vadd.u64 d0, \argA2, \argA2
214
- vbic.64 \argA4, d3, d2
215
- vsri.64 d0, \argA2, #64-1
216
- vshl.u64 d4, d6, #18
217
- veor.64 \argA2, d1, \argA4
218
- veor.64 \argA3, d0
219
- vsri.64 d4, d6, #64-18
220
- vstr.64 \argA3, [r0, #\argA1]
221
- veor.64 d5, \argA3
222
- vbic.64 \argA5, d1, d0
223
- vbic.64 \argA3, d4, d3
224
- vbic.64 \argA4, d0, d4
225
- veor.64 \argA3, d2
226
- veor.64 \argA4, d3
227
- veor.64 \argA5, d4
228
- .endm
229
-
230
- .macro KeccakS_ThetaRhoPiChi3 argA1, argA2, argA3, argA4, argA5
231
-
232
- @ Be = ROL64((argA1^Da), 36)
233
- @ Bi = ROL64((argA2^De), 10)
234
- @ Bo = ROL64((argA3^Di), 15)
235
- @ Bu = ROL64((argA4^Do), 56)
236
- @ Ba = ROL64((argA5^Du), 27)
237
- @ argA1 = Ba ^((~Be)& Bi )
238
- @ Ca ^= argA1
239
- @ argA2 = Be ^((~Bi)& Bo )
240
- @ argA3 = Bi ^((~Bo)& Bu )
241
- @ argA4 = Bo ^((~Bu)& Ba )
242
- @ argA5 = Bu ^((~Ba)& Be )
243
- veor.64 \argA2, \argA2, d8
244
- veor.64 \argA3, \argA3, d9
245
- vshl.u64 d2, \argA2, #10
246
- vldr.64 d6, [r0, #\argA1]
247
- vshl.u64 d3, \argA3, #15
248
- veor.64 \argA4, \argA4, d10
249
- vsri.64 d2, \argA2, #64-10
250
- vsri.64 d3, \argA3, #64-15
251
- veor.64 \argA5, \argA5, d11
252
- vext.8 d4, \argA4, \argA4, #1
253
- vbic.64 \argA2, d3, d2
254
- vshl.u64 d0, \argA5, #27
255
- veor.64 d6, d6, d7
256
- vbic.64 \argA3, d4, d3
257
- vsri.64 d0, \argA5, #64-27
258
- vshl.u64 d1, d6, #36
259
- veor.64 \argA3, d2
260
- vbic.64 \argA4, d0, d4
261
- vsri.64 d1, d6, #64-36
262
- veor.64 \argA4, d3
263
- vbic.64 d6, d2, d1
264
- vbic.64 \argA5, d1, d0
265
- veor.64 d6, d0
266
- veor.64 \argA2, d1
267
- vstr.64 d6, [r0, #\argA1]
268
- veor.64 d5, d6
269
- veor.64 \argA5, d4
270
- .endm
271
-
272
- .macro KeccakS_ThetaRhoPiChi4 argA1, argA2, argA3, argA4, argA5
273
-
274
- @ Bo = ROL64((argA1^Da), 41)
275
- @ Bu = ROL64((argA2^De), 2)
276
- @ Ba = ROL64((argA3^Di), 62)
277
- @ Be = ROL64((argA4^Do), 55)
278
- @ Bi = ROL64((argA5^Du), 39)
279
- @ argA1 = Ba ^((~Be)& Bi )
280
- @ Ca ^= argA1
281
- @ argA2 = Be ^((~Bi)& Bo )
282
- @ argA3 = Bi ^((~Bo)& Bu )
283
- @ argA4 = Bo ^((~Bu)& Ba )
284
- @ argA5 = Bu ^((~Ba)& Be )
285
- veor.64 \argA2, \argA2, d8
286
- veor.64 \argA3, \argA3, d9
287
- vshl.u64 d4, \argA2, #2
288
- veor.64 \argA5, \argA5, d11
289
- vshl.u64 d0, \argA3, #62
290
- vldr.64 d6, [r0, #\argA1]
291
- vsri.64 d4, \argA2, #64-2
292
- veor.64 \argA4, \argA4, d10
293
- vsri.64 d0, \argA3, #64-62
294
- vshl.u64 d1, \argA4, #55
295
- veor.64 d6, d6, d7
296
- vshl.u64 d2, \argA5, #39
297
- vsri.64 d1, \argA4, #64-55
298
- vbic.64 \argA4, d0, d4
299
- vsri.64 d2, \argA5, #64-39
300
- vbic.64 \argA2, d1, d0
301
- vshl.u64 d3, d6, #41
302
- veor.64 \argA5, d4, \argA2
303
- vbic.64 \argA2, d2, d1
304
- vsri.64 d3, d6, #64-41
305
- veor.64 d6, d0, \argA2
306
- vbic.64 \argA2, d3, d2
307
- vbic.64 \argA3, d4, d3
308
- veor.64 \argA2, d1
309
- vstr.64 d6, [r0, #\argA1]
310
- veor.64 d5, d6
311
- veor.64 \argA3, d2
312
- veor.64 \argA4, d3
313
- .endm
314
-
315
- @ --- macros for Parallel permutation
316
-
317
- .macro m_pls start
318
- .if \start != -1
319
- add r3, r0, #\start
320
- .endif
321
- .endm
322
-
323
- .macro m_ld qreg, next
324
- .if \next == 16
325
- vld1.64 { \qreg }, [r3:128]!
326
- .else
327
- vld1.64 { \qreg }, [r3:128], r4
328
- .endif
329
- .endm
330
-
331
- .macro m_st qreg, next
332
- .if \next == 16
333
- vst1.64 { \qreg }, [r3:128]!
334
- .else
335
- vst1.64 { \qreg }, [r3:128], r4
336
- .endif
337
- .endm
338
-
339
- .macro KeccakP_ThetaRhoPiChiIota ofs1, ofs2, ofs3, ofs4, ofs5, next, ofsn1
340
-
341
- @ De = Ca ^ ROL64(Ci, 1)
342
- @ Di = Ce ^ ROL64(Co, 1)
343
- @ Do = Ci ^ ROL64(Cu, 1)
344
- @ Du = Co ^ ROL64(Ca, 1)
345
- @ Da = Cu ^ ROL64(Ce, 1)
346
- vadd.u64 q6, q2, q2
347
- vadd.u64 q7, q3, q3
348
- vadd.u64 q8, q4, q4
349
- vadd.u64 q9, q0, q0
350
- vadd.u64 q5, q1, q1
351
-
352
- vsri.64 q6, q2, #63
353
- vsri.64 q7, q3, #63
354
- vsri.64 q8, q4, #63
355
- vsri.64 q9, q0, #63
356
- vsri.64 q5, q1, #63
357
-
358
- veor.64 q6, q6, q0
359
- veor.64 q7, q7, q1
360
- veor.64 q8, q8, q2
361
- .if \next != 16
362
- mov r4, #\next
363
- .endif
364
- veor.64 q9, q9, q3
365
- veor.64 q5, q5, q4
366
-
367
- @ Ba = argA1^Da
368
- @ Be = ROL64(argA2^De, 44)
369
- @ Bi = ROL64(argA3^Di, 43)
370
- @ Bo = ROL64(argA4^Do, 21)
371
- @ Bu = ROL64(argA5^Du, 14)
372
- m_ld q10, \next
373
- m_pls \ofs2
374
- m_ld q1, \next
375
- m_pls \ofs3
376
- veor.64 q10, q10, q5
377
- m_ld q2, \next
378
- m_pls \ofs4
379
- veor.64 q1, q1, q6
380
- m_ld q3, \next
381
- m_pls \ofs5
382
- veor.64 q2, q2, q7
383
- m_ld q4, \next
384
- veor.64 q3, q3, q8
385
- mov r6, r5
386
- veor.64 q4, q4, q9
387
-
388
- vst1.64 { q6 }, [r6:128]!
389
- vshl.u64 q11, q1, #44
390
- vshl.u64 q12, q2, #43
391
- vst1.64 { q7 }, [r6:128]!
392
- vshl.u64 q13, q3, #21
393
- vshl.u64 q14, q4, #14
394
- vst1.64 { q8 }, [r6:128]!
395
- vsri.64 q11, q1, #64-44
396
- vsri.64 q12, q2, #64-43
397
- vst1.64 { q9 }, [r6:128]!
398
- vsri.64 q13, q3, #64-21
399
- vsri.64 q14, q4, #64-14
400
-
401
- @ argA1 = Ba ^(~Be & Bi) ^ KeccakP1600RoundConstants[round]
402
- @ argA2 = Be ^(~Bi & Bo)
403
- @ argA3 = Bi ^(~Bo & Bu)
404
- @ argA4 = Bo ^(~Bu & Ba)
405
- @ argA5 = Bu ^(~Ba & Be)
406
- vld1.64 { d30 }, [r1:64]
407
- vbic.64 q0, q12, q11
408
- vbic.64 q1, q13, q12
409
- vld1.64 { d31 }, [r1:64]!
410
- veor.64 q0, q10
411
- vbic.64 q4, q11, q10
412
- veor.64 q0, q15
413
- vbic.64 q2, q14, q13
414
- vbic.64 q3, q10, q14
415
-
416
- m_pls \ofs1
417
- veor.64 q1, q11
418
- m_st q0, \next
419
- m_pls \ofs2
420
- veor.64 q2, q12
421
- m_st q1, \next
422
- m_pls \ofs3
423
- veor.64 q3, q13
424
- m_st q2, \next
425
- m_pls \ofs4
426
- veor.64 q4, q14
427
- m_st q3, \next
428
- m_pls \ofs5
429
- m_st q4, \next
430
- m_pls \ofsn1
431
- .endm
432
-
433
- .macro KeccakP_ThetaRhoPiChi ofs1, ofs2, ofs3, ofs4, ofs5, next, ofsn1, Bb1, Bb2, Bb3, Bb4, Bb5, Rr1, Rr2, Rr3, Rr4, Rr5
434
-
435
- @ Bb1 = ROL64((argA1^Da), Rr1)
436
- @ Bb2 = ROL64((argA2^De), Rr2)
437
- @ Bb3 = ROL64((argA3^Di), Rr3)
438
- @ Bb4 = ROL64((argA4^Do), Rr4)
439
- @ Bb5 = ROL64((argA5^Du), Rr5)
440
-
441
- .if \next != 16
442
- mov r4, #\next
443
- .endif
444
-
445
- m_ld \Bb1, \next
446
- m_pls \ofs2
447
- m_ld \Bb2, \next
448
- m_pls \ofs3
449
- veor.64 q15, q5, \Bb1
450
- m_ld \Bb3, \next
451
- m_pls \ofs4
452
- veor.64 q6, q6, \Bb2
453
- m_ld \Bb4, \next
454
- m_pls \ofs5
455
- veor.64 q7, q7, \Bb3
456
- m_ld \Bb5, \next
457
- veor.64 q8, q8, \Bb4
458
- veor.64 q9, q9, \Bb5
459
-
460
- vshl.u64 \Bb1, q15, #\Rr1
461
- vshl.u64 \Bb2, q6, #\Rr2
462
- vshl.u64 \Bb3, q7, #\Rr3
463
- vshl.u64 \Bb4, q8, #\Rr4
464
- vshl.u64 \Bb5, q9, #\Rr5
465
-
466
- vsri.64 \Bb1, q15, #64-\Rr1
467
- vsri.64 \Bb2, q6, #64-\Rr2
468
- vsri.64 \Bb3, q7, #64-\Rr3
469
- vsri.64 \Bb4, q8, #64-\Rr4
470
- vsri.64 \Bb5, q9, #64-\Rr5
471
-
472
- @ argA1 = Ba ^((~Be)& Bi ), Ca ^= argA1
473
- @ argA2 = Be ^((~Bi)& Bo ), Ce ^= argA2
474
- @ argA3 = Bi ^((~Bo)& Bu ), Ci ^= argA3
475
- @ argA4 = Bo ^((~Bu)& Ba ), Co ^= argA4
476
- @ argA5 = Bu ^((~Ba)& Be ), Cu ^= argA5
477
- vbic.64 q15, q12, q11
478
- mov r6, r5
479
- vbic.64 q6, q13, q12
480
- m_pls \ofs1
481
- vbic.64 q7, q14, q13
482
- vbic.64 q8, q10, q14
483
- vbic.64 q9, q11, q10
484
-
485
- veor.64 q15, q15, q10
486
- veor.64 q6, q6, q11
487
-
488
- m_st q15, \next
489
- m_pls \ofs2
490
- veor.64 q7, q7, q12
491
-
492
- m_st q6, \next
493
- m_pls \ofs3
494
- veor.64 q1, q1, q6
495
- vld1.64 { q6 }, [r6:128]!
496
- veor.64 q8, q8, q13
497
-
498
- m_st q7, \next
499
- m_pls \ofs4
500
- veor.64 q2, q2, q7
501
- vld1.64 { q7 }, [r6:128]!
502
- veor.64 q9, q9, q14
503
-
504
- m_st q8, \next
505
- m_pls \ofs5
506
- veor.64 q3, q3, q8
507
-
508
- m_st q9, \next
509
-
510
- vld1.64 { q8 }, [r6:128]!
511
- veor.64 q4, q4, q9
512
- m_pls \ofsn1
513
- vld1.64 { q9 }, [r6:128]!
514
- veor.64 q0, q0, q15
515
- .endm
516
-
517
- .macro KeccakP_ThetaRhoPiChi1 ofs1, ofs2, ofs3, ofs4, ofs5, next, ofsn1
518
- KeccakP_ThetaRhoPiChi \ofs1, \ofs2, \ofs3, \ofs4, \ofs5, \next, \ofsn1, q12, q13, q14, q10, q11, 3, 45, 61, 28, 20
519
- .endm
520
-
521
- .macro KeccakP_ThetaRhoPiChi2 ofs1, ofs2, ofs3, ofs4, ofs5, next, ofsn1
522
- KeccakP_ThetaRhoPiChi \ofs1, \ofs2, \ofs3, \ofs4, \ofs5, \next, \ofsn1, q14, q10, q11, q12, q13, 18, 1, 6, 25, 8
523
- .endm
524
-
525
- .macro KeccakP_ThetaRhoPiChi3 ofs1, ofs2, ofs3, ofs4, ofs5, next, ofsn1
526
- KeccakP_ThetaRhoPiChi \ofs1, \ofs2, \ofs3, \ofs4, \ofs5, \next, \ofsn1, q11, q12, q13, q14, q10, 36, 10, 15, 56, 27
527
- .endm
528
-
529
- .macro KeccakP_ThetaRhoPiChi4 ofs1, ofs2, ofs3, ofs4, ofs5, next, ofsn1
530
-
531
- @ Bo = ROL64((argA1^Da), 41)
532
- @ Bu = ROL64((argA2^De), 2)
533
- @ Ba = ROL64((argA3^Di), 62)
534
- @ Be = ROL64((argA4^Do), 55)
535
- @ Bi = ROL64((argA5^Du), 39)
536
- @ KeccakChi
537
-
538
- .if \next != 16
539
- mov r4, #\next
540
- .endif
541
-
542
- m_ld q13, \next
543
- m_pls \ofs2
544
- m_ld q14, \next
545
- m_pls \ofs3
546
- veor.64 q5, q5, q13
547
- m_ld q10, \next
548
- m_pls \ofs4
549
- veor.64 q6, q6, q14
550
- m_ld q11, \next
551
- m_pls \ofs5
552
- veor.64 q7, q7, q10
553
- m_ld q12, \next
554
- veor.64 q8, q8, q11
555
- veor.64 q9, q9, q12
556
-
557
- vshl.u64 q13, q5, #41
558
- vshl.u64 q14, q6, #2
559
- vshl.u64 q10, q7, #62
560
- vshl.u64 q11, q8, #55
561
- vshl.u64 q12, q9, #39
562
-
563
- vsri.64 q13, q5, #64-41
564
- vsri.64 q14, q6, #64-2
565
- vsri.64 q11, q8, #64-55
566
- vsri.64 q12, q9, #64-39
567
- vsri.64 q10, q7, #64-62
568
-
569
- vbic.64 q5, q12, q11
570
- vbic.64 q6, q13, q12
571
- vbic.64 q7, q14, q13
572
- vbic.64 q8, q10, q14
573
- vbic.64 q9, q11, q10
574
- veor.64 q5, q5, q10
575
- veor.64 q6, q6, q11
576
- veor.64 q7, q7, q12
577
- veor.64 q8, q8, q13
578
- m_pls \ofs1
579
- veor.64 q9, q9, q14
580
- m_st q5, \next
581
- m_pls \ofs2
582
- veor.64 q0, q0, q5
583
- m_st q6, \next
584
- m_pls \ofs3
585
- veor.64 q1, q1, q6
586
- m_st q7, \next
587
- m_pls \ofs4
588
- veor.64 q2, q2, q7
589
- m_st q8, \next
590
- m_pls \ofs5
591
- veor.64 q3, q3, q8
592
- m_st q9, \next
593
- m_pls \ofsn1
594
- veor.64 q4, q4, q9
595
- .endm
596
-
597
- @----------------------------------------------------------------------------
598
- @
599
- @ void KeccakP1600times2_StaticInitialize( void )
600
- @
601
- .align 8
602
- .global KeccakP1600times2_StaticInitialize
603
- .type KeccakP1600times2_StaticInitialize, %function;
604
- KeccakP1600times2_StaticInitialize:
605
- bx lr
606
-
607
-
608
- @----------------------------------------------------------------------------
609
- @
610
- @ void KeccakP1600times2_InitializeAll( void *states )
611
- @
612
- .align 8
613
- .global KeccakP1600times2_InitializeAll
614
- .type KeccakP1600times2_InitializeAll, %function;
615
- KeccakP1600times2_InitializeAll:
616
- vmov.i64 q0, #0
617
- vmov.i64 q1, #0
618
- vmov.i64 q2, #0
619
- vmov.i64 q3, #0
620
- vstm r0!, { d0 - d7 } @ 8 (clear 8 lanes at a time)
621
- vstm r0!, { d0 - d7 } @ 16
622
- vstm r0!, { d0 - d7 } @ 24
623
- vstm r0!, { d0 - d7 } @ 32
624
- vstm r0!, { d0 - d7 } @ 40
625
- vstm r0!, { d0 - d7 } @ 48
626
- vstm r0!, { d0 - d1} @ 50
627
- bx lr
628
-
629
-
630
-
631
- @----------------------------------------------------------------------------
632
- @
633
- @ void KeccakP1600times2_AddByte( void *states, unsigned int instanceIndex, unsigned char byte, unsigned int offset )
634
- @
635
- .align 8
636
- .global KeccakP1600times2_AddByte
637
- .type KeccakP1600times2_AddByte, %function;
638
- KeccakP1600times2_AddByte:
639
- add r0, r0, r1, LSL #3 @ states += 8 * instanceIndex
640
- lsr r1, r3, #3 @ states += (offset & ~7) * 2
641
- add r0, r0, r1, LSL #4
642
- and r3, r3, #7
643
- add r0, r0, r3 @ states += offset & 7
644
- ldrb r1, [r0]
645
- eor r1, r1, r2
646
- strb r1, [r0]
647
- bx lr
648
-
649
-
650
- @----------------------------------------------------------------------------
651
- @
652
- @ void KeccakP1600times2_AddBytes( void *states, unsigned int instanceIndex, const unsigned char *data,
653
- @ unsigned int offset, unsigned int length )
654
- @
655
- .align 8
656
- .global KeccakP1600times2_AddBytes
657
- .type KeccakP1600times2_AddBytes, %function;
658
- KeccakP1600times2_AddBytes:
659
- add r0, r0, r1, LSL #3 @ states += 8 * instanceIndex
660
- ldr r1, [sp, #0*4] @ r1 = length
661
- cmp r1, #0
662
- beq KeccakP1600times2_AddBytes_Exit
663
- push { r4- r7 }
664
- lsr r4, r3, #3 @ states += (offset & ~7) * 2
665
- add r0, r0, r4, LSL #4
666
- ands r3, r3, #7 @ .if (offset & 7) != 0
667
- beq KeccakP1600times2_AddBytes_CheckLanes
668
- add r0, r0, r3 @ states += offset & 7
669
- rsb r3, r3, #8 @ lenInLane = 8 - (offset & 7)
670
- KeccakP1600times2_AddBytes_LoopBytesFirst:
671
- ldrb r4, [r0]
672
- ldrb r5, [r2], #1
673
- eor r4, r4, r5
674
- subs r1, r1, #1
675
- strb r4, [r0], #1
676
- beq KeccakP1600times2_AddBytes_Done
677
- subs r3, r3, #1
678
- bne KeccakP1600times2_AddBytes_LoopBytesFirst
679
- add r0, r0, #8 @ states += 8 (next lane of current state part)
680
- KeccakP1600times2_AddBytes_CheckLanes:
681
- lsrs r3, r1, #3
682
- beq KeccakP1600times2_AddBytes_CheckBytesLast
683
- KeccakP1600times2_AddBytes_LoopLanes:
684
- ldr r4, [r0]
685
- ldr r5, [r0, #4]
686
- ldr r6, [r2], #4
687
- ldr r7, [r2], #4
688
- eor r4, r4, r6
689
- eor r5, r5, r7
690
- subs r3, r3, #1
691
- str r4, [r0], #4
692
- str r5, [r0], #12 @ states += 8 (next lane of current state part)
693
- bne KeccakP1600times2_AddBytes_LoopLanes
694
- KeccakP1600times2_AddBytes_CheckBytesLast:
695
- ands r1, r1, #7
696
- beq KeccakP1600times2_AddBytes_Done
697
- KeccakP1600times2_AddBytes_LoopBytesLast:
698
- ldrb r4, [r0]
699
- ldrb r5, [r2], #1
700
- eor r4, r4, r5
701
- subs r1, r1, #1
702
- strb r4, [r0], #1
703
- bne KeccakP1600times2_AddBytes_LoopBytesLast
704
- KeccakP1600times2_AddBytes_Done:
705
- pop { r4- r7 }
706
- KeccakP1600times2_AddBytes_Exit:
707
- bx lr
708
-
709
-
710
- @----------------------------------------------------------------------------
711
- @
712
- @ void KeccakP1600times2_AddLanesAll( void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset )
713
- @
714
- .align 8
715
- .global KeccakP1600times2_AddLanesAll
716
- .type KeccakP1600times2_AddLanesAll, %function;
717
- KeccakP1600times2_AddLanesAll:
718
- cmp r2, #0
719
- beq KeccakP1600times2_AddLanesAll_Exit
720
- add r3, r1, r3, LSL #3 @ r3: data + 8 * laneOffset
721
- push {r4 - r7}
722
- KeccakP1600times2_AddLanesAll_Loop:
723
- ldr r4, [r1], #4 @ index 0
724
- ldr r5, [r1], #4
725
- ldrd r6, r7, [r0]
726
- eor r6, r6, r4
727
- eor r7, r7, r5
728
- strd r6, r7, [r0], #8
729
- ldr r4, [r3], #4 @ index 1
730
- ldr r5, [r3], #4
731
- ldrd r6, r7, [r0]
732
- eor r6, r6, r4
733
- eor r7, r7, r5
734
- strd r6, r7, [r0], #8
735
- subs r2, r2, #1
736
- bne KeccakP1600times2_AddLanesAll_Loop
737
- pop {r4 - r7}
738
- KeccakP1600times2_AddLanesAll_Exit:
739
- bx lr
740
-
741
-
742
- @----------------------------------------------------------------------------
743
- @
744
- @ void KeccakP1600times2_OverwriteBytes( void *states, unsigned int instanceIndex, const unsigned char *data,
745
- @ unsigned int offset, unsigned int length )
746
- @
747
- .align 8
748
- .global KeccakP1600times2_OverwriteBytes
749
- .type KeccakP1600times2_OverwriteBytes, %function;
750
- KeccakP1600times2_OverwriteBytes:
751
- add r0, r0, r1, LSL #3 @ states += 8 * instanceIndex
752
- ldr r1, [sp, #0*4] @ r1 = length
753
- cmp r1, #0
754
- beq KeccakP1600times2_OverwriteBytes_Exit
755
- push { r4-r5 }
756
- lsr r4, r3, #3 @ states += (offset & ~7) * 2
757
- add r0, r0, r4, LSL #4
758
- ands r3, r3, #7 @ .if (offset & 7) != 0
759
- beq KeccakP1600times2_OverwriteBytes_CheckLanes
760
- add r0, r0, r3 @ states += offset & 7
761
- rsb r3, r3, #8 @ lenInLane = 8 - (offset & 7)
762
- KeccakP1600times2_OverwriteBytes_LoopBytesFirst:
763
- ldrb r4, [r2], #1
764
- strb r4, [r0], #1
765
- subs r1, r1, #1
766
- beq KeccakP1600times2_OverwriteBytes_Done
767
- subs r3, r3, #1
768
- bne KeccakP1600times2_OverwriteBytes_LoopBytesFirst
769
- add r0, r0, #8 @ states += 8 (next lane of current state part)
770
- KeccakP1600times2_OverwriteBytes_CheckLanes:
771
- lsrs r3, r1, #3
772
- beq KeccakP1600times2_OverwriteBytes_CheckBytesLast
773
- KeccakP1600times2_OverwriteBytes_LoopLanes:
774
- ldr r4, [r2], #4
775
- ldr r5, [r2], #4
776
- str r4, [r0], #4
777
- str r5, [r0], #12 @ states += 8 (next lane of current state part)
778
- subs r3, r3, #1
779
- bne KeccakP1600times2_OverwriteBytes_LoopLanes
780
- KeccakP1600times2_OverwriteBytes_CheckBytesLast:
781
- ands r1, r1, #7
782
- beq KeccakP1600times2_OverwriteBytes_Done
783
- KeccakP1600times2_OverwriteBytes_LoopBytesLast:
784
- ldrb r4, [r2], #1
785
- subs r1, r1, #1
786
- strb r4, [r0], #1
787
- bne KeccakP1600times2_OverwriteBytes_LoopBytesLast
788
- KeccakP1600times2_OverwriteBytes_Done:
789
- pop { r4- r5 }
790
- KeccakP1600times2_OverwriteBytes_Exit:
791
- bx lr
792
-
793
-
794
- @----------------------------------------------------------------------------
795
- @
796
- @ KeccakP1600times2_OverwriteLanesAll( void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset )
797
- @
798
- .align 8
799
- .global KeccakP1600times2_OverwriteLanesAll
800
- .type KeccakP1600times2_OverwriteLanesAll, %function;
801
- KeccakP1600times2_OverwriteLanesAll:
802
- cmp r2, #0
803
- beq KeccakP1600times2_OverwriteLanesAll_Exit
804
- lsls r12, r1, #32-3
805
- bne KeccakP1600times2_OverwriteLanesAll_Unaligned
806
- add r3, r1, r3, LSL #3 @ r3(pointer instance 1): data + 8 * laneOffset
807
- lsrs r2, r2, #1
808
- bcc KeccakP1600times2_OverwriteLanesAll_LoopAligned
809
- vldm r1!, { d0 }
810
- vldm r3!, { d1 }
811
- vstm r0!, { d0 - d1 }
812
- beq KeccakP1600times2_OverwriteLanesAll_Exit
813
- KeccakP1600times2_OverwriteLanesAll_LoopAligned:
814
- vldm r1!, { d0 }
815
- vldm r1!, { d2 }
816
- vldm r3!, { d1 }
817
- vldm r3!, { d3 }
818
- subs r2, r2, #1
819
- vstm r0!, { d0 - d3 }
820
- bne KeccakP1600times2_OverwriteLanesAll_LoopAligned
821
- bx lr
822
- KeccakP1600times2_OverwriteLanesAll_Unaligned:
823
- add r3, r1, r3, LSL #3 @ r3(pointer instance 1): data + 8 * laneOffset
824
- push { r4, r5 }
825
- KeccakP1600times2_OverwriteLanesAll_LoopUnaligned:
826
- ldr r4, [r1], #4
827
- ldr r5, [r1], #4
828
- strd r4, r5, [r0], #8
829
- ldr r4, [r3], #4
830
- ldr r5, [r3], #4
831
- subs r2, r2, #1
832
- strd r4, r5, [r0], #8
833
- bne KeccakP1600times2_OverwriteLanesAll_LoopUnaligned
834
- pop { r4, r5 }
835
- KeccakP1600times2_OverwriteLanesAll_Exit:
836
- bx lr
837
-
838
-
839
- @----------------------------------------------------------------------------
840
- @
841
- @ void KeccakP1600times2_OverwriteWithZeroes( void *states, unsigned int instanceIndex, unsigned int byteCount )
842
- @
843
- .align 8
844
- .global KeccakP1600times2_OverwriteWithZeroes
845
- .type KeccakP1600times2_OverwriteWithZeroes, %function;
846
- KeccakP1600times2_OverwriteWithZeroes:
847
- add r0, r0, r1, LSL #3 @ states += 8 * instanceIndex
848
- lsrs r1, r2, #3 @ r1: laneCount
849
- beq KeccakP1600times2_OverwriteWithZeroes_Bytes
850
- vmov.i64 d0, #0
851
- KeccakP1600times2_OverwriteWithZeroes_LoopLanes:
852
- subs r1, r1, #1
853
- vstm r0!, { d0 }
854
- add r0, r0, #8
855
- bne KeccakP1600times2_OverwriteWithZeroes_LoopLanes
856
- KeccakP1600times2_OverwriteWithZeroes_Bytes:
857
- ands r2, r2, #7 @ r2: byteCount remaining
858
- beq KeccakP1600times2_OverwriteWithZeroes_Exit
859
- movs r3, #0
860
- KeccakP1600times2_OverwriteWithZeroes_LoopBytes:
861
- subs r2, r2, #1
862
- strb r3, [r0], #1
863
- bne KeccakP1600times2_OverwriteWithZeroes_LoopBytes
864
- KeccakP1600times2_OverwriteWithZeroes_Exit:
865
- bx lr
866
-
867
-
868
- @----------------------------------------------------------------------------
869
- @
870
- @ void KeccakP1600times2_ExtractBytes( void *states, unsigned int instanceIndex, const unsigned char *data,
871
- @ unsigned int offset, unsigned int length )
872
- @
873
- .align 8
874
- .global KeccakP1600times2_ExtractBytes
875
- .type KeccakP1600times2_ExtractBytes, %function;
876
- KeccakP1600times2_ExtractBytes:
877
- add r0, r0, r1, LSL #3 @ states += 8 * instanceIndex
878
- ldr r1, [sp, #0*4] @ r1 = length
879
- cmp r1, #0
880
- beq KeccakP1600times2_ExtractBytes_Exit
881
- push { r4-r5 }
882
- lsr r4, r3, #3 @ states += (offset & ~7) * 2
883
- add r0, r0, r4, LSL #4
884
- ands r3, r3, #7 @ .if (offset & 7) != 0
885
- beq KeccakP1600times2_ExtractBytes_CheckLanes
886
- add r0, r0, r3 @ states += offset & 7
887
- rsb r3, r3, #8 @ lenInLane = 8 - (offset & 7)
888
- KeccakP1600times2_ExtractBytes_LoopBytesFirst:
889
- ldrb r4, [r0], #1
890
- strb r4, [r2], #1
891
- subs r1, r1, #1
892
- beq KeccakP1600times2_ExtractBytes_Done
893
- subs r3, r3, #1
894
- bne KeccakP1600times2_ExtractBytes_LoopBytesFirst
895
- add r0, r0, #8 @ states += 8 (next lane of current state part)
896
- KeccakP1600times2_ExtractBytes_CheckLanes:
897
- lsrs r3, r1, #3
898
- beq KeccakP1600times2_ExtractBytes_CheckBytesLast
899
- KeccakP1600times2_ExtractBytes_LoopLanes:
900
- ldr r4, [r0], #4
901
- ldr r5, [r0], #12 @ states += 8 (next lane of current state part)
902
- str r4, [r2], #4
903
- str r5, [r2], #4
904
- subs r3, r3, #1
905
- bne KeccakP1600times2_ExtractBytes_LoopLanes
906
- KeccakP1600times2_ExtractBytes_CheckBytesLast:
907
- ands r1, r1, #7
908
- beq KeccakP1600times2_ExtractBytes_Done
909
- KeccakP1600times2_ExtractBytes_LoopBytesLast:
910
- ldrb r4, [r0], #1
911
- subs r1, r1, #1
912
- strb r4, [r2], #1
913
- bne KeccakP1600times2_ExtractBytes_LoopBytesLast
914
- KeccakP1600times2_ExtractBytes_Done:
915
- pop { r4-r5 }
916
- KeccakP1600times2_ExtractBytes_Exit:
917
- bx lr
918
-
919
-
920
- @----------------------------------------------------------------------------
921
- @
922
- @ void KeccakP1600times2_ExtractLanesAll( const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset )
923
- @
924
- .align 8
925
- .global KeccakP1600times2_ExtractLanesAll
926
- .type KeccakP1600times2_ExtractLanesAll, %function;
927
- KeccakP1600times2_ExtractLanesAll:
928
- cmp r2, #0
929
- beq KeccakP1600times2_ExtractLanesAll_Exit
930
- lsls r12, r1, #32-3
931
- bne KeccakP1600times2_ExtractLanesAll_Unaligned
932
- add r3, r1, r3, LSL #3 @ r3(pointer instance 1): data + 8 * laneOffset
933
- lsrs r2, r2, #1
934
- bcc KeccakP1600times2_ExtractLanesAll_LoopAligned
935
- vldm r0!, { d0 - d1 }
936
- vstm r1!, { d0 }
937
- vstm r3!, { d1 }
938
- beq KeccakP1600times2_ExtractLanesAll_Exit
939
- KeccakP1600times2_ExtractLanesAll_LoopAligned:
940
- vldm r0!, { d0 - d3 }
941
- subs r2, r2, #1
942
- vstm r1!, { d0 }
943
- vstm r1!, { d2 }
944
- vstm r3!, { d1 }
945
- vstm r3!, { d3 }
946
- bne KeccakP1600times2_ExtractLanesAll_LoopAligned
947
- bx lr
948
- KeccakP1600times2_ExtractLanesAll_Unaligned:
949
- add r3, r1, r3, LSL #3 @ r3(pointer instance 1): data + 8 * laneOffset
950
- push { r4, r5 }
951
- KeccakP1600times2_ExtractLanesAll_LoopUnaligned:
952
- ldrd r4, r5, [r0], #8
953
- str r4, [r1], #4
954
- str r5, [r1], #4
955
- ldrd r4, r5, [r0], #8
956
- subs r2, r2, #1
957
- str r4, [r3], #4
958
- str r5, [r3], #4
959
- bne KeccakP1600times2_ExtractLanesAll_LoopUnaligned
960
- pop { r4, r5 }
961
- KeccakP1600times2_ExtractLanesAll_Exit:
962
- bx lr
963
-
964
-
965
- @----------------------------------------------------------------------------
966
- @
967
- @ void KeccakP1600times2_ExtractAndAddBytes( void *states, unsigned int instanceIndex,
968
- @ const unsigned char *input, unsigned char *output,
969
- @ unsigned int offset, unsigned int length )
970
- @
971
- .align 8
972
- .global KeccakP1600times2_ExtractAndAddBytes
973
- .type KeccakP1600times2_ExtractAndAddBytes, %function;
974
- KeccakP1600times2_ExtractAndAddBytes:
975
- add r0, r0, r1, LSL #3 @ states += 8 * instanceIndex
976
- ldr r1, [sp, #1*4] @ r1 = length
977
- cmp r1, #0
978
- beq KeccakP1600times2_ExtractAndAddBytes_Exit
979
- push { r4 - r9 }
980
- ldr r8, [sp, #6*4] @ r8 = offset
981
- lsr r4, r8, #3 @ states += (offset & ~7) * 2
982
- add r0, r0, r4, LSL #4
983
- ands r8, r8, #7 @ .if (offset & 7) != 0
984
- beq KeccakP1600times2_ExtractAndAddBytes_CheckLanes
985
- add r0, r0, r8 @ states += offset & 7
986
- rsb r8, r8, #8 @ lenInLane = 8 - (offset & 7)
987
- KeccakP1600times2_ExtractAndAddBytes_LoopBytesFirst:
988
- ldrb r4, [r0], #1
989
- ldrb r5, [r2], #1
990
- eor r4, r4, r5
991
- strb r4, [r3], #1
992
- subs r1, r1, #1
993
- beq KeccakP1600times2_ExtractAndAddBytes_Done
994
- subs r8, r8, #1
995
- bne KeccakP1600times2_ExtractAndAddBytes_LoopBytesFirst
996
- add r0, r0, #8 @ states += 8 (next lane of current state part)
997
- KeccakP1600times2_ExtractAndAddBytes_CheckLanes:
998
- lsrs r8, r1, #3
999
- beq KeccakP1600times2_ExtractAndAddBytes_CheckBytesLast
1000
- KeccakP1600times2_ExtractAndAddBytes_LoopLanes:
1001
- ldr r4, [r0], #4
1002
- ldr r5, [r0], #12
1003
- ldr r6, [r2], #4
1004
- ldr r7, [r2], #4
1005
- eor r4, r4, r6
1006
- eor r5, r5, r7
1007
- str r4, [r3], #4
1008
- str r5, [r3], #4 @ states += 8 (next lane of current state part)
1009
- subs r8, r8, #1
1010
- bne KeccakP1600times2_ExtractAndAddBytes_LoopLanes
1011
- KeccakP1600times2_ExtractAndAddBytes_CheckBytesLast:
1012
- ands r1, r1, #7
1013
- beq KeccakP1600times2_ExtractAndAddBytes_Done
1014
- KeccakP1600times2_ExtractAndAddBytes_LoopBytesLast:
1015
- ldrb r4, [r0], #1
1016
- ldrb r5, [r2], #1
1017
- eor r4, r4, r5
1018
- strb r4, [r3], #1
1019
- subs r1, r1, #1
1020
- bne KeccakP1600times2_ExtractAndAddBytes_LoopBytesLast
1021
- KeccakP1600times2_ExtractAndAddBytes_Done:
1022
- pop { r4 - r9 }
1023
- KeccakP1600times2_ExtractAndAddBytes_Exit:
1024
- bx lr
1025
-
1026
-
1027
- @----------------------------------------------------------------------------
1028
- @
1029
- @ void KeccakP1600times2_ExtractAndAddLanesAll( const void *states,
1030
- @ const unsigned char *input, unsigned char *output,
1031
- @ unsigned int laneCount, unsigned int laneOffset )
1032
- @
1033
- .align 8
1034
- .global KeccakP1600times2_ExtractAndAddLanesAll
1035
- .type KeccakP1600times2_ExtractAndAddLanesAll, %function;
1036
- KeccakP1600times2_ExtractAndAddLanesAll:
1037
- cmp r3, #0
1038
- beq KeccakP1600times2_ExtractAndAddLanesAll_Exit
1039
- orr r12, r1, r2
1040
- lsls r12, r12, #32-3 @ unaligned access .if input or output unaligned
1041
- bne KeccakP1600times2_ExtractAndAddLanesAll_Unaligned
1042
- push {r4,r5}
1043
- ldr r12, [sp, #2*4] @ r12 = laneOffset
1044
- lsrs r3, r3, #1
1045
- add r4, r1, r12, LSL #3 @ r4(input instance 1): input + 8 * laneOffset
1046
- add r5, r2, r12, LSL #3 @ r5(output instance 1): output + 8 * laneOffset
1047
- bcc KeccakP1600times2_ExtractAndAddLanesAll_LoopAligned
1048
- vldm r0!, { d0 - d1 }
1049
- vldm r1!, { d2 }
1050
- vldm r4!, { d3 }
1051
- veor q0, q0, q1
1052
- vstm r2!, { d0 }
1053
- vstm r5!, { d1 }
1054
- beq KeccakP1600times2_ExtractAndAddLanesAll_AlignedDone
1055
- KeccakP1600times2_ExtractAndAddLanesAll_LoopAligned:
1056
- vldm r0!, { d0 - d3 }
1057
- vldm r1!, { d4 }
1058
- vldm r1!, { d6 }
1059
- vldm r4!, { d5 }
1060
- vldm r4!, { d7 }
1061
- subs r3, r3, #1
1062
- veor q0, q0, q2
1063
- veor q1, q1, q3
1064
- vstm r2!, { d0 }
1065
- vstm r2!, { d2 }
1066
- vstm r5!, { d1 }
1067
- vstm r5!, { d3 }
1068
- bne KeccakP1600times2_ExtractAndAddLanesAll_LoopAligned
1069
- KeccakP1600times2_ExtractAndAddLanesAll_AlignedDone:
1070
- pop {r4,r5}
1071
- bx lr
1072
- KeccakP1600times2_ExtractAndAddLanesAll_Unaligned:
1073
- push {r4-r9}
1074
- ldr r12, [sp, #6*4] @ r12 = laneOffset
1075
- add r4, r1, r12, LSL #3 @ r4(input instance 1): input + 8 * laneOffset
1076
- add r5, r2, r12, LSL #3 @ r5(output instance 1): output + 8 * laneOffset
1077
- KeccakP1600times2_ExtractAndAddLanesAll_LoopUnaligned:
1078
- ldrd r8, r9, [r0], #8
1079
- ldr r6, [r1], #4
1080
- ldr r7, [r1], #4
1081
- eor r8, r8, r6
1082
- eor r9, r9, r7
1083
- str r8, [r2], #4
1084
- str r9, [r2], #4
1085
- ldrd r8, r9, [r0], #8
1086
- ldr r6, [r4], #4
1087
- ldr r7, [r4], #4
1088
- eor r8, r8, r6
1089
- eor r9, r9, r7
1090
- str r8, [r5], #4
1091
- subs r3, r3, #1
1092
- str r9, [r5], #4
1093
- bne KeccakP1600times2_ExtractAndAddLanesAll_LoopUnaligned
1094
- pop { r4 - r9 }
1095
- KeccakP1600times2_ExtractAndAddLanesAll_Exit:
1096
- bx lr
1097
-
1098
-
1099
- @----------------------------------------------------------------------------
1100
- @
1101
- @ void KeccakP1600times2_PermuteAll_6rounds( void *states )
1102
- @
1103
- .align 8
1104
- .global KeccakP1600times2_PermuteAll_6rounds
1105
- .type KeccakP1600times2_PermuteAll_6rounds, %function;
1106
- KeccakP1600times2_PermuteAll_6rounds:
1107
- adr r1, KeccakP1600times2_Permute_RoundConstants6
1108
- movs r2, #6+2
1109
- vpush {q4-q7}
1110
- push {r4-r7}
1111
- sub sp, #4*2*8+8 @allocate 4 D double lanes (plus 8bytes to allow alignment on 16 bytes)
1112
- add r5, sp, #8
1113
-
1114
- @ ba
1115
- @ be = me, me = be
1116
- @ bi = gi, gi = bi
1117
- @ bo = so, so = bo
1118
- @ bu = ku, ku = bu
1119
-
1120
- @ ga = sa, sa = ga
1121
- @ ge = ke, ke = ge
1122
- @ go = mo, mo = go
1123
- @ gu
1124
-
1125
- @ ka = ma, ma = ka
1126
- @ ki = si, si = ki
1127
- @ ko
1128
-
1129
- @ mu = su, su = mu
1130
- @ mi
1131
- @ se
1132
-
1133
- @PrepareTheta
1134
- @ Ca = ba ^ ga ^ ka ^ ma ^ sa
1135
- @ Ce = be ^ ge ^ ke ^ me ^ se
1136
- @ Ci = bi ^ gi ^ ki ^ mi ^ si
1137
- @ Co = bo ^ go ^ ko ^ mo ^ so
1138
- @ Cu = bu ^ gu ^ ku ^ mu ^ su
1139
- vldm r0, { q0 - q4 } @ ba be bi bo bu
1140
- bic r5, #15
1141
- add r3, r0, #_me
1142
- vldm r3, { q6 } @ me
1143
- vstm r3, { q1 }
1144
- veor.64 q1, q1, q6
1145
- add r4, r0, #_be
1146
- vstm r4!, { q6 } @ be
1147
-
1148
- add r3, r0, #_ga
1149
- vldm r3, { q10 - q14 } @ ga ge gi go gu
1150
- add r3, r0, #_gi
1151
- vstm r3, { q2 }
1152
- veor.64 q2, q2, q12
1153
- vstm r4!, { q12 } @ bi
1154
-
1155
- add r3, r0, #_so
1156
- vldm r3, { q8 } @ so
1157
- vstm r3, { q3 }
1158
- veor.64 q3, q3, q8
1159
- vstm r4!, { q8 } @ bo
1160
-
1161
- add r3, r0, #_ku
1162
- vldm r3, { q9 } @ ku
1163
- vstm r3, { q4 }
1164
- veor.64 q4, q4, q9
1165
- vstm r4!, { q9 } @ bu
1166
-
1167
- add r3, r0, #_sa
1168
- vldm r3, { q5 } @ sa
1169
- vstm r3, { q10 }
1170
- add r4, r0, #_ga
1171
- veor.64 q0, q0, q5
1172
- veor.64 q0, q0, q10
1173
- vstm r4!, { q5 } @ ga
1174
-
1175
- add r3, r0, #_ke
1176
- vldm r3, { q6 } @ ke
1177
- vstm r3, { q11 }
1178
- veor.64 q1, q1, q6
1179
- veor.64 q1, q1, q11
1180
- vstm r4!, { q6 } @ ge
1181
-
1182
- add r3, r0, #_mo
1183
- vldm r3, { q8 } @ mo
1184
- vstm r3, { q13 }
1185
- add r4, r0, #_go
1186
- veor.64 q3, q3, q8
1187
- veor.64 q3, q3, q13
1188
- vstm r4!, { q8 } @ go
1189
- veor.64 q4, q4, q14 @ gu
1190
-
1191
- add r4, r0, #_ka @ ka
1192
- vldm r4, { q10 }
1193
- add r3, r0, #_ma
1194
- vldm r3, { q5 } @ ma
1195
- vstm r3, { q10 }
1196
- veor.64 q0, q0, q5
1197
- veor.64 q0, q0, q10
1198
- vstm r4!, { q5 } @ ka
1199
-
1200
- add r4, r0, #_ki @ ki ko
1201
- vldm r4, { q12, q13 }
1202
- add r3, r0, #_si
1203
- vldm r3, { q7 } @ si
1204
- vstm r3, { q12 }
1205
- veor.64 q2, q2, q7
1206
- veor.64 q2, q2, q12
1207
- vstm r4, { q7 } @ ki
1208
- veor.64 q3, q3, q13 @ ko
1209
-
1210
- add r4, r0, #_mu @ mu
1211
- vldm r4, { q14 }
1212
- add r3, r0, #_su
1213
- vldm r3, { q9 } @ su
1214
- vstm r3, { q14 }
1215
- veor.64 q4, q4, q9
1216
- veor.64 q4, q4, q14
1217
- vstm r4, { q9 } @ mu
1218
-
1219
- add r4, r0, #_mi @ mi
1220
- vldm r4, { q12 }
1221
- veor.64 q2, q2, q12
1222
- add r3, r0, #_se @ se
1223
- vldm r3, { q6 }
1224
- veor.64 q1, q1, q6
1225
-
1226
- mov r3, r0
1227
- b KeccakP1600times2_PermuteAll_Round2
1228
-
1229
-
1230
- .align 8
1231
- KeccakP1600times2_Permute_RoundConstants24:
1232
- .quad 0x0000000000000001
1233
- .quad 0x0000000000008082
1234
- .quad 0x800000000000808a
1235
- .quad 0x8000000080008000
1236
- .quad 0x000000000000808b
1237
- .quad 0x0000000080000001
1238
- .quad 0x8000000080008081
1239
- .quad 0x8000000000008009
1240
- .quad 0x000000000000008a
1241
- .quad 0x0000000000000088
1242
- .quad 0x0000000080008009
1243
- .quad 0x000000008000000a
1244
- KeccakP1600times2_Permute_RoundConstants12:
1245
- .quad 0x000000008000808b
1246
- .quad 0x800000000000008b
1247
- .quad 0x8000000000008089
1248
- .quad 0x8000000000008003
1249
- .quad 0x8000000000008002
1250
- .quad 0x8000000000000080
1251
- KeccakP1600times2_Permute_RoundConstants6:
1252
- .quad 0x000000000000800a
1253
- .quad 0x800000008000000a
1254
- KeccakP1600times2_Permute_RoundConstants4:
1255
- .quad 0x8000000080008081
1256
- .quad 0x8000000000008080
1257
- .quad 0x0000000080000001
1258
- .quad 0x8000000080008008
1259
-
1260
- @----------------------------------------------------------------------------
1261
- @
1262
- @ void KeccakP1600times2_PermuteAll_24rounds( void *states )
1263
- @
1264
- .align 8
1265
- .global KeccakP1600times2_PermuteAll_24rounds
1266
- .type KeccakP1600times2_PermuteAll_24rounds, %function;
1267
- KeccakP1600times2_PermuteAll_24rounds:
1268
- adr r1, KeccakP1600times2_Permute_RoundConstants24
1269
- movs r2, #24
1270
- b KeccakP1600times2_PermuteAll
1271
-
1272
-
1273
- @----------------------------------------------------------------------------
1274
- @
1275
- @ void KeccakP1600times2_PermuteAll_12rounds( void *states )
1276
- @
1277
- .align 8
1278
- .global KeccakP1600times2_PermuteAll_12rounds
1279
- .type KeccakP1600times2_PermuteAll_12rounds, %function;
1280
- KeccakP1600times2_PermuteAll_12rounds:
1281
- adr r1, KeccakP1600times2_Permute_RoundConstants12
1282
- movs r2, #12
1283
- b KeccakP1600times2_PermuteAll
1284
-
1285
-
1286
- @----------------------------------------------------------------------------
1287
- @
1288
- @ void KeccakP1600times2_PermuteAll_4rounds( void *states )
1289
- @
1290
- .align 8
1291
- .global KeccakP1600times2_PermuteAll_4rounds
1292
- .type KeccakP1600times2_PermuteAll_4rounds, %function;
1293
- KeccakP1600times2_PermuteAll_4rounds:
1294
- adr r1, KeccakP1600times2_Permute_RoundConstants4
1295
- movs r2, #4
1296
- b KeccakP1600times2_PermuteAll
1297
-
1298
-
1299
- @----------------------------------------------------------------------------
1300
- @
1301
- @ void KeccakP1600times2_PermuteAll( void *states, void *rc, unsigned int nr )
1302
- @
1303
- .align 8
1304
- .type KeccakP1600times2_PermuteAll, %function;
1305
- KeccakP1600times2_PermuteAll:
1306
- vpush {q4-q7}
1307
- push {r4-r7}
1308
- sub sp, #4*2*8+8 @allocate 4 D double lanes (plus 8bytes to allow alignment on 16 bytes)
1309
- mov r3, r0
1310
- add r5, sp, #8
1311
-
1312
- @PrepareTheta
1313
- @ Ca = ba ^ ga ^ ka ^ ma ^ sa
1314
- @ Ce = be ^ ge ^ ke ^ me ^ se
1315
- @ Ci = bi ^ gi ^ ki ^ mi ^ si
1316
- @ Co = bo ^ go ^ ko ^ mo ^ so
1317
- @ Cu = bu ^ gu ^ ku ^ mu ^ su
1318
- vld1.64 { d0, d1, d2, d3 }, [r3:256]! @ _ba _be
1319
- bic r5, #15
1320
- vld1.64 { d4, d5, d6, d7 }, [r3:256]! @ _bi _bo
1321
- vld1.64 { d8, d9, d10, d11 }, [r3:256]! @ _bu _ga
1322
- vld1.64 { d12, d13 }, [r3:128]! @ _ge
1323
- veor.64 q0, q0, q5
1324
- vld1.64 { d14, d15 }, [r3:128]! @ _gi
1325
- veor.64 q1, q1, q6
1326
- vld1.64 { d16, d17 }, [r3:128]! @ _go
1327
- veor.64 q2, q2, q7
1328
- vld1.64 { d18, d19 }, [r3:128]! @ _gu
1329
- veor.64 q3, q3, q8
1330
- vld1.64 { d10, d11 }, [r3:128]! @ _ka
1331
- veor.64 q4, q4, q9
1332
- vld1.64 { d12, d13 }, [r3:128]! @ _ke
1333
- veor.64 q0, q0, q5
1334
- vld1.64 { d14, d15 }, [r3:128]! @ _ki
1335
- veor.64 q1, q1, q6
1336
- vld1.64 { d16, d17 }, [r3:128]! @ _ko
1337
- veor.64 q2, q2, q7
1338
- vld1.64 { d18, d19 }, [r3:128]! @ _ku
1339
- veor.64 q3, q3, q8
1340
- vld1.64 { d10, d11 }, [r3:128]! @ _ma
1341
- veor.64 q4, q4, q9
1342
- vld1.64 { d12, d13 }, [r3:128]! @ _me
1343
- veor.64 q0, q0, q5
1344
- vld1.64 { d14, d15 }, [r3:128]! @ _mi
1345
- veor.64 q1, q1, q6
1346
- vld1.64 { d16, d17 }, [r3:128]! @ _mo
1347
- veor.64 q2, q2, q7
1348
- vld1.64 { d18, d19 }, [r3:128]! @ _mu
1349
- veor.64 q3, q3, q8
1350
- vld1.64 { d10, d11 }, [r3:128]! @ _sa
1351
- veor.64 q4, q4, q9
1352
- vld1.64 { d12, d13 }, [r3:128]! @ _se
1353
- veor.64 q0, q0, q5
1354
- vld1.64 { d14, d15 }, [r3:128]! @ _si
1355
- veor.64 q1, q1, q6
1356
- vld1.64 { d16, d17 }, [r3:128]! @ _so
1357
- veor.64 q2, q2, q7
1358
- vld1.64 { d18, d19 }, [r3:128]! @ _su
1359
- mov r3, r0
1360
- veor.64 q3, q3, q8
1361
- veor.64 q4, q4, q9
1362
-
1363
- KeccakP1600times2_PermuteAll_RoundLoop:
1364
- KeccakP_ThetaRhoPiChiIota _ba, -1, -1, -1, -1, _ge-_ba, _ka @ _ba, _ge, _ki, _mo, _su
1365
- KeccakP_ThetaRhoPiChi1 _ka, -1, -1, _bo, -1, _me-_ka, _sa @ _ka, _me, _si, _bo, _gu
1366
- KeccakP_ThetaRhoPiChi2 _sa, _be, -1, -1, -1, _gi-_be, _ga @ _sa, _be, _gi, _ko, _mu
1367
- KeccakP_ThetaRhoPiChi3 _ga, -1, -1, -1, _bu, _ke-_ga, _ma @ _ga, _ke, _mi, _so, _bu
1368
- KeccakP_ThetaRhoPiChi4 _ma, -1, _bi, -1, -1, _se-_ma, _ba @ _ma, _se, _bi, _go, _ku
1369
-
1370
- KeccakP_ThetaRhoPiChiIota _ba, -1, _gi, -1, _ku, _me-_ba, _sa @ _ba, _me, _gi, _so, _ku
1371
- KeccakP_ThetaRhoPiChi1 _sa, _ke, _bi, -1, _gu, _mo-_bi, _ma @ _sa, _ke, _bi, _mo, _gu
1372
- KeccakP_ThetaRhoPiChi2 _ma, _ge, -1, _ko, _bu, _si-_ge, _ka @ _ma, _ge, _si, _ko, _bu
1373
- KeccakP_ThetaRhoPiChi3 _ka, _be, -1, _go, -1, _mi-_be, _ga @ _ka, _be, _mi, _go, _su
1374
- KeccakP_ThetaRhoPiChi4 _ga, -1, _ki, _bo, -1, _se-_ga, _ba @ _ga, _se, _ki, _bo, _mu
1375
- KeccakP1600times2_PermuteAll_Round2:
1376
- KeccakP_ThetaRhoPiChiIota _ba, -1, -1, _go, -1, _ke-_ba, _ma @ _ba, _ke, _si, _go, _mu
1377
- KeccakP_ThetaRhoPiChi1 _ma, _be, -1, -1, _gu, _ki-_be, _ga @ _ma, _be, _ki, _so, _gu
1378
- KeccakP_ThetaRhoPiChi2 _ga, -1, _bi, -1, -1, _me-_ga, _sa @ _ga, _me, _bi, _ko, _su
1379
- KeccakP_ThetaRhoPiChi3 _sa, _ge, -1, _bo, -1, _mi-_ge, _ka @ _sa, _ge, _mi, _bo, _ku
1380
- KeccakP_ThetaRhoPiChi4 _ka, -1, _gi, -1, _bu, _se-_ka, _ba @ _ka, _se, _gi, _mo, _bu
1381
-
1382
- KeccakP_ThetaRhoPiChiIota _ba, -1, -1, -1, -1, _be-_ba, _ga @ _ba, _be, _bi, _bo, _bu
1383
- KeccakP_ThetaRhoPiChi1 _ga, -1, -1, -1, -1, _ge-_ga, _ka @ _ga, _ge, _gi, _go, _gu
1384
- KeccakP_ThetaRhoPiChi2 _ka, -1, -1, -1, -1, _ke-_ka, _ma @ _ka, _ke, _ki, _ko, _ku
1385
- KeccakP_ThetaRhoPiChi3 _ma, -1, -1, -1, -1, _me-_ma, _sa @ _ma, _me, _mi, _mo, _mu
1386
- subs r2, #4
1387
- KeccakP_ThetaRhoPiChi4 _sa, -1, -1, -1, -1, _se-_sa, _ba @ _sa, _se, _si, _so, _su
1388
- bne KeccakP1600times2_PermuteAll_RoundLoop
1389
- add sp, #4*2*8+8 @ free 4.5 D lanes
1390
- pop {r4-r7}
1391
- vpop {q4-q7}
1392
- bx lr
1393
-
1394
-