sleeping_kangaroo12 0.0.1 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (296) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE.md +27 -0
  3. data/README.md +48 -53
  4. data/ext/Rakefile +12 -37
  5. data/ext/binding/sleeping_kangaroo12.c +1 -16
  6. data/ext/{xkcp → k12}/Makefile +0 -0
  7. data/ext/k12/Makefile.build +118 -0
  8. data/ext/k12/README.markdown +86 -0
  9. data/ext/k12/lib/ARMv8Asha3/KeccakP-1600-ARMv8Asha3.S +623 -0
  10. data/ext/k12/lib/ARMv8Asha3/KeccakP-1600-SnP.h +65 -0
  11. data/ext/k12/lib/ARMv8Asha3/KeccakP-1600-opt64.c +227 -0
  12. data/ext/{xkcp/lib/low/KeccakP-1600/compact → k12/lib/Inplace32BI}/KeccakP-1600-SnP.h +4 -9
  13. data/ext/{xkcp/lib/low/KeccakP-1600/plain-32bits-inplace → k12/lib/Inplace32BI}/KeccakP-1600-inplace32BI.c +65 -160
  14. data/ext/k12/lib/KangarooTwelve.c +332 -0
  15. data/ext/{xkcp/lib/high/KangarooTwelve → k12/lib}/KangarooTwelve.h +53 -16
  16. data/ext/{xkcp/lib/low/KeccakP-1600/AVX2 → k12/lib/Optimized64}/KeccakP-1600-AVX2.s +122 -558
  17. data/ext/k12/lib/Optimized64/KeccakP-1600-AVX512-plainC.c +241 -0
  18. data/ext/k12/lib/Optimized64/KeccakP-1600-AVX512.s +551 -0
  19. data/ext/k12/lib/Optimized64/KeccakP-1600-SnP.h +74 -0
  20. data/ext/{xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-64.macros → k12/lib/Optimized64/KeccakP-1600-opt64.c} +447 -169
  21. data/ext/k12/lib/Optimized64/KeccakP-1600-runtimeDispatch.c +406 -0
  22. data/ext/k12/lib/Optimized64/KeccakP-1600-timesN-AVX2.c +419 -0
  23. data/ext/k12/lib/Optimized64/KeccakP-1600-timesN-AVX512.c +458 -0
  24. data/ext/k12/lib/Optimized64/KeccakP-1600-timesN-SSSE3.c +438 -0
  25. data/ext/{xkcp/lib/low/KeccakP-1600/plain-64bits → k12/lib/Plain64}/KeccakP-1600-SnP.h +14 -20
  26. data/ext/{xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.h → k12/lib/Plain64/KeccakP-1600-plain64.c} +9 -8
  27. data/ext/{xkcp/lib/common → k12/lib}/align.h +3 -2
  28. data/ext/{xkcp/lib/common → k12/lib}/brg_endian.h +0 -0
  29. data/ext/{xkcp → k12}/support/Build/ExpandProducts.xsl +0 -0
  30. data/ext/{xkcp → k12}/support/Build/ToGlobalMakefile.xsl +0 -0
  31. data/ext/{xkcp → k12}/support/Build/ToOneTarget.xsl +0 -0
  32. data/ext/{xkcp → k12}/support/Build/ToTargetConfigFile.xsl +0 -0
  33. data/ext/{xkcp → k12}/support/Build/ToTargetMakefile.xsl +10 -16
  34. data/ext/{xkcp → k12}/support/Build/ToVCXProj.xsl +0 -0
  35. data/lib/sleeping_kangaroo12/binding.rb +2 -1
  36. data/lib/sleeping_kangaroo12/build/loader.rb +1 -0
  37. data/lib/sleeping_kangaroo12/build/platform.rb +1 -0
  38. data/lib/sleeping_kangaroo12/digest.rb +38 -4
  39. data/lib/sleeping_kangaroo12/version.rb +1 -1
  40. metadata +48 -288
  41. data/ext/config/xkcp.build +0 -17
  42. data/ext/xkcp/LICENSE +0 -1
  43. data/ext/xkcp/Makefile.build +0 -200
  44. data/ext/xkcp/README.markdown +0 -296
  45. data/ext/xkcp/lib/HighLevel.build +0 -143
  46. data/ext/xkcp/lib/LowLevel.build +0 -757
  47. data/ext/xkcp/lib/high/KangarooTwelve/KangarooTwelve.c +0 -301
  48. data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.c +0 -81
  49. data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.h +0 -125
  50. data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.c +0 -48
  51. data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.h +0 -79
  52. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.c +0 -81
  53. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.h +0 -73
  54. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.inc +0 -195
  55. data/ext/xkcp/lib/high/Keccak/KeccakSponge.c +0 -111
  56. data/ext/xkcp/lib/high/Keccak/KeccakSponge.h +0 -76
  57. data/ext/xkcp/lib/high/Keccak/KeccakSponge.inc +0 -314
  58. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.c +0 -61
  59. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.h +0 -67
  60. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.inc +0 -128
  61. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.c +0 -93
  62. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.h +0 -599
  63. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.inc +0 -573
  64. data/ext/xkcp/lib/high/Ketje/Ketjev2.c +0 -87
  65. data/ext/xkcp/lib/high/Ketje/Ketjev2.h +0 -88
  66. data/ext/xkcp/lib/high/Ketje/Ketjev2.inc +0 -274
  67. data/ext/xkcp/lib/high/Keyak/Keyakv2.c +0 -132
  68. data/ext/xkcp/lib/high/Keyak/Keyakv2.h +0 -217
  69. data/ext/xkcp/lib/high/Keyak/Keyakv2.inc +0 -81
  70. data/ext/xkcp/lib/high/Keyak/Motorist.inc +0 -953
  71. data/ext/xkcp/lib/high/Kravatte/Kravatte.c +0 -533
  72. data/ext/xkcp/lib/high/Kravatte/Kravatte.h +0 -115
  73. data/ext/xkcp/lib/high/Kravatte/KravatteModes.c +0 -557
  74. data/ext/xkcp/lib/high/Kravatte/KravatteModes.h +0 -247
  75. data/ext/xkcp/lib/high/Xoodyak/Cyclist.h +0 -66
  76. data/ext/xkcp/lib/high/Xoodyak/Cyclist.inc +0 -336
  77. data/ext/xkcp/lib/high/Xoodyak/Xoodyak-parameters.h +0 -26
  78. data/ext/xkcp/lib/high/Xoodyak/Xoodyak.c +0 -55
  79. data/ext/xkcp/lib/high/Xoodyak/Xoodyak.h +0 -35
  80. data/ext/xkcp/lib/high/Xoofff/Xoofff.c +0 -634
  81. data/ext/xkcp/lib/high/Xoofff/Xoofff.h +0 -147
  82. data/ext/xkcp/lib/high/Xoofff/XoofffModes.c +0 -483
  83. data/ext/xkcp/lib/high/Xoofff/XoofffModes.h +0 -241
  84. data/ext/xkcp/lib/high/common/Phases.h +0 -25
  85. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-SnP.h +0 -41
  86. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-armcc.s +0 -1666
  87. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-gcc.s +0 -1655
  88. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-armcc.s +0 -1268
  89. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-gcc.s +0 -1264
  90. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-armcc.s +0 -1178
  91. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-gcc.s +0 -1175
  92. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-armcc.s +0 -1338
  93. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-gcc.s +0 -1336
  94. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-armcc.s +0 -1343
  95. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-gcc.s +0 -1339
  96. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-SnP.h +0 -42
  97. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-armcc.s +0 -823
  98. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-gcc.s +0 -831
  99. data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-SnP.h +0 -31
  100. data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-armv8a-neon.s +0 -540
  101. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-SnP.h +0 -42
  102. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-compact.s +0 -733
  103. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-fast.s +0 -1121
  104. data/ext/xkcp/lib/low/KeccakP-1600/AVX2/KeccakP-1600-SnP.h +0 -52
  105. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-AVX512.c +0 -623
  106. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-SnP.h +0 -47
  107. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u12/KeccakP-1600-AVX512-config.h +0 -6
  108. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u6/KeccakP-1600-AVX512-config.h +0 -6
  109. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/ua/KeccakP-1600-AVX512-config.h +0 -6
  110. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-AVX512.s +0 -1031
  111. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-SnP.h +0 -53
  112. data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-SnP.h +0 -44
  113. data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-XOP.c +0 -476
  114. data/ext/xkcp/lib/low/KeccakP-1600/XOP/u6/KeccakP-1600-XOP-config.h +0 -6
  115. data/ext/xkcp/lib/low/KeccakP-1600/XOP/ua/KeccakP-1600-XOP-config.h +0 -6
  116. data/ext/xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-unrolling.macros +0 -305
  117. data/ext/xkcp/lib/low/KeccakP-1600/compact/KeccakP-1600-compact64.c +0 -420
  118. data/ext/xkcp/lib/low/KeccakP-1600/plain-32bits-inplace/KeccakP-1600-SnP.h +0 -43
  119. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/KeccakP-1600-opt64.c +0 -565
  120. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcu6/KeccakP-1600-opt64-config.h +0 -7
  121. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua/KeccakP-1600-opt64-config.h +0 -7
  122. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua-shld/KeccakP-1600-opt64-config.h +0 -8
  123. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/u6/KeccakP-1600-opt64-config.h +0 -6
  124. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/ua/KeccakP-1600-opt64-config.h +0 -6
  125. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-SnP.h +0 -44
  126. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference.h +0 -23
  127. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference32BI.c +0 -625
  128. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-SnP.h +0 -44
  129. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.c +0 -440
  130. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-SnP.h +0 -42
  131. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas.s +0 -1196
  132. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas_Apple.s +0 -1124
  133. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-shld-gas.s +0 -1196
  134. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-armcc.s +0 -1392
  135. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-gcc.s +0 -1394
  136. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-times2-SnP.h +0 -42
  137. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u12/SIMD512-2-config.h +0 -7
  138. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u4/SIMD512-2-config.h +0 -7
  139. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512ufull/SIMD512-2-config.h +0 -7
  140. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SIMD512.c +0 -850
  141. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SnP.h +0 -51
  142. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SIMD128.c +0 -957
  143. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SnP.h +0 -49
  144. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-u2/SIMD128-config.h +0 -8
  145. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-ua/SIMD128-config.h +0 -8
  146. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-u2/SIMD128-config.h +0 -9
  147. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-ua/SIMD128-config.h +0 -9
  148. data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-SnP.h +0 -45
  149. data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-on1.c +0 -37
  150. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SIMD256.c +0 -1321
  151. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SnP.h +0 -55
  152. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u12/SIMD256-config.h +0 -7
  153. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u6/SIMD256-config.h +0 -7
  154. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/ua/SIMD256-config.h +0 -7
  155. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u12/SIMD512-4-config.h +0 -7
  156. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u4/SIMD512-4-config.h +0 -7
  157. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512ufull/SIMD512-4-config.h +0 -7
  158. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SIMD512.c +0 -881
  159. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SnP.h +0 -51
  160. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-SnP.h +0 -45
  161. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-on1.c +0 -37
  162. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-SnP.h +0 -45
  163. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-on2.c +0 -38
  164. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SIMD512.c +0 -1615
  165. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SnP.h +0 -57
  166. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u12/SIMD512-config.h +0 -7
  167. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u4/SIMD512-config.h +0 -7
  168. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/ua/SIMD512-config.h +0 -7
  169. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-SnP.h +0 -45
  170. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-on1.c +0 -37
  171. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-SnP.h +0 -45
  172. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-on2.c +0 -38
  173. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-SnP.h +0 -45
  174. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-on4.c +0 -38
  175. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-SnP.h +0 -41
  176. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-armcc.s +0 -442
  177. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-gcc.s +0 -446
  178. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-armcc.s +0 -419
  179. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-gcc.s +0 -427
  180. data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-SnP.h +0 -41
  181. data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-avr8-fast.s +0 -647
  182. data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-SnP.h +0 -39
  183. data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-compact.c +0 -190
  184. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-SnP.h +0 -43
  185. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.c +0 -412
  186. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.h +0 -23
  187. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-SnP.h +0 -41
  188. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-armcc.s +0 -454
  189. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-gcc.s +0 -458
  190. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-armcc.s +0 -455
  191. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-gcc.s +0 -458
  192. data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-SnP.h +0 -41
  193. data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-avr8-fast.s +0 -728
  194. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-SnP.h +0 -43
  195. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.c +0 -414
  196. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.h +0 -23
  197. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-SnP.h +0 -42
  198. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-armcc.s +0 -527
  199. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-gcc.s +0 -533
  200. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-armcc.s +0 -528
  201. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-gcc.s +0 -534
  202. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-armcc.s +0 -521
  203. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-gcc.s +0 -527
  204. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-armcc.s +0 -517
  205. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-gcc.s +0 -523
  206. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-armcc.s +0 -550
  207. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-gcc.s +0 -556
  208. data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-SnP.h +0 -32
  209. data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-armv8a-neon.s +0 -432
  210. data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-SnP.h +0 -42
  211. data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-avr8-fast.s +0 -929
  212. data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-SnP.h +0 -40
  213. data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-compact.c +0 -244
  214. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-SnP.h +0 -46
  215. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32-bis.macros +0 -184
  216. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.c +0 -454
  217. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.macros +0 -459
  218. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling-bis.macros +0 -83
  219. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling.macros +0 -88
  220. data/ext/xkcp/lib/low/KeccakP-800/plain/lcu2/KeccakP-800-opt32-config.h +0 -7
  221. data/ext/xkcp/lib/low/KeccakP-800/plain/lcua/KeccakP-800-opt32-config.h +0 -7
  222. data/ext/xkcp/lib/low/KeccakP-800/plain/u2/KeccakP-800-opt32-config.h +0 -7
  223. data/ext/xkcp/lib/low/KeccakP-800/plain/ua/KeccakP-800-opt32-config.h +0 -7
  224. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-SnP.h +0 -44
  225. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.c +0 -437
  226. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.h +0 -23
  227. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/Ket.h +0 -57
  228. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-armcc.s +0 -475
  229. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-gcc.s +0 -480
  230. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-armcc.s +0 -590
  231. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-gcc.s +0 -590
  232. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.c +0 -126
  233. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.h +0 -68
  234. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.inc +0 -174
  235. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.c +0 -80
  236. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.h +0 -68
  237. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.inc +0 -142
  238. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-SnP.h +0 -55
  239. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-armcc.s +0 -1086
  240. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-gcc.s +0 -1092
  241. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-armcc.s +0 -721
  242. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-gcc.s +0 -726
  243. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-armcc.s +0 -723
  244. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-gcc.s +0 -729
  245. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-armcc.s +0 -1164
  246. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-gcc.s +0 -1165
  247. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-armcc.s +0 -562
  248. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-gcc.s +0 -563
  249. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-armcc.s +0 -563
  250. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-gcc.s +0 -565
  251. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-SnP.h +0 -55
  252. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-armcc.s +0 -476
  253. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-gcc.s +0 -485
  254. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-armcc.s +0 -362
  255. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-gcc.s +0 -367
  256. data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-SnP.h +0 -43
  257. data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-avr8-u1.s +0 -1341
  258. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SIMD512.c +0 -581
  259. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SnP.h +0 -58
  260. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodyak-full-block-SIMD512.c +0 -332
  261. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SIMD128.c +0 -329
  262. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SnP.h +0 -53
  263. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodyak-full-block-SIMD128.c +0 -355
  264. data/ext/xkcp/lib/low/Xoodoo/Xoodoo.h +0 -79
  265. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-SnP.h +0 -56
  266. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-optimized.c +0 -399
  267. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodyak-full-blocks.c +0 -127
  268. data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-SnP.h +0 -43
  269. data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-reference.c +0 -253
  270. data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SIMD512.c +0 -1044
  271. data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SnP.h +0 -49
  272. data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-SnP.h +0 -45
  273. data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-on1.c +0 -37
  274. data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-ARMv7A.s +0 -1587
  275. data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-SnP.h +0 -48
  276. data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SIMD512.c +0 -1202
  277. data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SnP.h +0 -48
  278. data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SIMD128.c +0 -484
  279. data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SnP.h +0 -44
  280. data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-SnP.h +0 -45
  281. data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-on1.c +0 -37
  282. data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SIMD256.c +0 -939
  283. data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SnP.h +0 -49
  284. data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SIMD512.c +0 -1216
  285. data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SnP.h +0 -48
  286. data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-SnP.h +0 -45
  287. data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-on1.c +0 -37
  288. data/ext/xkcp/lib/low/common/PlSnP-Fallback.inc +0 -290
  289. data/ext/xkcp/lib/low/common/SnP-Relaned.h +0 -141
  290. data/ext/xkcp/support/Kernel-PMU/Kernel-pmu.md +0 -133
  291. data/ext/xkcp/support/Kernel-PMU/Makefile +0 -8
  292. data/ext/xkcp/support/Kernel-PMU/enable_arm_pmu.c +0 -129
  293. data/ext/xkcp/support/Kernel-PMU/load-module +0 -1
  294. data/ext/xkcp/util/KeccakSum/KeccakSum.c +0 -394
  295. data/ext/xkcp/util/KeccakSum/base64.c +0 -86
  296. data/ext/xkcp/util/KeccakSum/base64.h +0 -12
@@ -1,1392 +0,0 @@
1
- ;
2
- ; The Keccak-p permutations, designed by Guido Bertoni, Joan Daemen, Micha�l Peeters and Gilles Van Assche.
3
- ;
4
- ; Implementation by Ronny Van Keer, hereby denoted as "the implementer".
5
- ;
6
- ; For more information, feedback or questions, please refer to the Keccak Team website:
7
- ; https://keccak.team/
8
- ;
9
- ; To the extent possible under law, the implementer has waived all copyright
10
- ; and related or neighboring rights to the source code in this file.
11
- ; http://creativecommons.org/publicdomain/zero/1.0/
12
- ;
13
- ; ---
14
- ;
15
- ; This file implements Keccak-p[1600]�2 in a PlSnP-compatible way.
16
- ; Please refer to PlSnP-documentation.h for more details.
17
- ;
18
- ; This implementation comes with KeccakP-1600-times2-SnP.h in the same folder.
19
- ; Please refer to LowLevel.build for the exact list of other files it must be combined with.
20
- ;
21
-
22
- ; WARNING: These functions work only on little endian CPU with ARMv7A + NEON architecture
23
- ; WARNING: State must be 256 bit (32 bytes) aligned, best is 64-byte (cache alignment).
24
-
25
- ; INFO: Tested on Cortex-A8 (BeagleBone Black), using gcc.
26
- ; INFO: Parallel execution of Keccak-P permutation on 2 lane interleaved states.
27
-
28
- ; INFO: KeccakP1600times2_PermuteAll_12rounds() execution time is 7690 cycles on a Cortex-A8 (BeagleBone Black)
29
-
30
-
31
- PRESERVE8
32
- AREA |.text|, CODE, READONLY
33
-
34
- ;----------------------------------------------------------------------------
35
-
36
- ; --- offsets in state
37
- _ba equ 0*16
38
- _be equ 1*16
39
- _bi equ 2*16
40
- _bo equ 3*16
41
- _bu equ 4*16
42
- _ga equ 5*16
43
- _ge equ 6*16
44
- _gi equ 7*16
45
- _go equ 8*16
46
- _gu equ 9*16
47
- _ka equ 10*16
48
- _ke equ 11*16
49
- _ki equ 12*16
50
- _ko equ 13*16
51
- _ku equ 14*16
52
- _ma equ 15*16
53
- _me equ 16*16
54
- _mi equ 17*16
55
- _mo equ 18*16
56
- _mu equ 19*16
57
- _sa equ 20*16
58
- _se equ 21*16
59
- _si equ 22*16
60
- _so equ 23*16
61
- _su equ 24*16
62
-
63
- ; --- macros for Single permutation
64
-
65
- MACRO
66
- KeccakS_ThetaRhoPiChiIota $argA1, $argA2, $argA3, $argA4, $argA5
67
-
68
- ;Prepare Theta
69
- ; Ca = Aba^Aga^Aka^Ama^Asa
70
- ; Ce = Abe^Age^Ake^Ame^Ase
71
- ; Ci = Abi^Agi^Aki^Ami^Asi
72
- ; Co = Abo^Ago^Ako^Amo^Aso
73
- ; Cu = Abu^Agu^Aku^Amu^Asu
74
- ; De = Ca^ROL64(Ci, 1)
75
- ; Di = Ce^ROL64(Co, 1)
76
- ; Do = Ci^ROL64(Cu, 1)
77
- ; Du = Co^ROL64(Ca, 1)
78
- ; Da = Cu^ROL64(Ce, 1)
79
- veor.64 q4, q6, q7
80
- veor.64 q5, q9, q10
81
- veor.64 d8, d8, d9
82
- veor.64 d10, d10, d11
83
- veor.64 d1, d8, d16
84
- veor.64 d2, d10, d17
85
-
86
- veor.64 q4, q11, q12
87
- veor.64 q5, q14, q15
88
- veor.64 d8, d8, d9
89
- veor.64 d10, d10, d11
90
- veor.64 d3, d8, d26
91
-
92
- vadd.u64 q4, q1, q1
93
- veor.64 d4, d10, d27
94
- vmov.64 d0, d5
95
- vsri.64 q4, q1, #63
96
-
97
- vadd.u64 q5, q2, q2
98
- veor.64 q4, q4, q0
99
- vsri.64 q5, q2, #63
100
- vadd.u64 d7, d1, d1
101
- veor.64 $argA2, $argA2, d8
102
- veor.64 q5, q5, q1
103
-
104
- vsri.64 d7, d1, #63
105
- vshl.u64 d1, $argA2, #44
106
- veor.64 $argA3, $argA3, d9
107
- veor.64 d7, d7, d4
108
-
109
- ; Ba = argA1^Da
110
- ; Be = ROL64((argA2^De), 44)
111
- ; Bi = ROL64((argA3^Di), 43)
112
- ; Bo = ROL64((argA4^Do), 21)
113
- ; Bu = ROL64((argA5^Du), 14)
114
- ; argA2 = Be ^((~Bi)& Bo )
115
- ; argA3 = Bi ^((~Bo)& Bu )
116
- ; argA4 = Bo ^((~Bu)& Ba )
117
- ; argA5 = Bu ^((~Ba)& Be )
118
- ; argA1 = Ba ^((~Be)& Bi )
119
- ; argA1 ^= KeccakP1600RoundConstants[i+round]
120
- vsri.64 d1, $argA2, #64-44
121
- vshl.u64 d2, $argA3, #43
122
- vldr.64 d0, [r0, #$argA1]
123
- veor.64 $argA4, $argA4, d10
124
- vsri.64 d2, $argA3, #64-43
125
- vshl.u64 d3, $argA4, #21
126
- veor.64 $argA5, $argA5, d11
127
- veor.64 d0, d0, d7
128
- vsri.64 d3, $argA4, #64-21
129
- vbic.64 d5, d2, d1
130
- vshl.u64 d4, $argA5, #14
131
- vbic.64 $argA2, d3, d2
132
- vld1.64 d6, [r1]!
133
- veor.64 d5, d0
134
- vsri.64 d4, $argA5, #64-14
135
- veor.64 d5, d6
136
- vbic.64 $argA5, d1, d0
137
- vbic.64 $argA3, d4, d3
138
- vbic.64 $argA4, d0, d4
139
- veor.64 $argA2, d1
140
- vstr.64 d5, [r0, #$argA1]
141
- veor.64 $argA3, d2
142
- veor.64 $argA4, d3
143
- veor.64 $argA5, d4
144
- MEND
145
-
146
- MACRO
147
- KeccakS_ThetaRhoPiChi1 $argA1, $argA2, $argA3, $argA4, $argA5
148
-
149
- ; Bi = ROL64((argA1^Da), 3)
150
- ; Bo = ROL64((argA2^De), 45)
151
- ; Bu = ROL64((argA3^Di), 61)
152
- ; Ba = ROL64((argA4^Do), 28)
153
- ; Be = ROL64((argA5^Du), 20)
154
- ; argA1 = Ba ^((~Be)& Bi )
155
- ; Ca ^= argA1
156
- ; argA2 = Be ^((~Bi)& Bo )
157
- ; argA3 = Bi ^((~Bo)& Bu )
158
- ; argA4 = Bo ^((~Bu)& Ba )
159
- ; argA5 = Bu ^((~Ba)& Be )
160
- veor.64 $argA2, $argA2, d8
161
- veor.64 $argA3, $argA3, d9
162
- vshl.u64 d3, $argA2, #45
163
- vldr.64 d6, [r0, #$argA1]
164
- vshl.u64 d4, $argA3, #61
165
- veor.64 $argA4, $argA4, d10
166
- vsri.64 d3, $argA2, #64-45
167
- veor.64 $argA5, $argA5, d11
168
- vsri.64 d4, $argA3, #64-61
169
- vshl.u64 d0, $argA4, #28
170
- veor.64 d6, d6, d7
171
- vshl.u64 d1, $argA5, #20
172
- vbic.64 $argA3, d4, d3
173
- vsri.64 d0, $argA4, #64-28
174
- vbic.64 $argA4, d0, d4
175
- vshl.u64 d2, d6, #3
176
- vsri.64 d1, $argA5, #64-20
177
- veor.64 $argA4, d3
178
- vsri.64 d2, d6, #64-3
179
- vbic.64 $argA5, d1, d0
180
- vbic.64 d6, d2, d1
181
- vbic.64 $argA2, d3, d2
182
- veor.64 d6, d0
183
- veor.64 $argA2, d1
184
- vstr.64 d6, [r0, #$argA1]
185
- veor.64 $argA3, d2
186
- veor.64 d5, d6
187
- veor.64 $argA5, d4
188
- MEND
189
-
190
- MACRO
191
- KeccakS_ThetaRhoPiChi2 $argA1, $argA2, $argA3, $argA4, $argA5
192
-
193
- ; Bu = ROL64((argA1^Da), 18)
194
- ; Ba = ROL64((argA2^De), 1)
195
- ; Be = ROL64((argA3^Di), 6)
196
- ; Bi = ROL64((argA4^Do), 25)
197
- ; Bo = ROL64((argA5^Du), 8)
198
- ; argA1 = Ba ^((~Be)& Bi )
199
- ; Ca ^= argA1;
200
- ; argA2 = Be ^((~Bi)& Bo )
201
- ; argA3 = Bi ^((~Bo)& Bu )
202
- ; argA4 = Bo ^((~Bu)& Ba )
203
- ; argA5 = Bu ^((~Ba)& Be )
204
- veor.64 $argA3, $argA3, d9
205
- veor.64 $argA4, $argA4, d10
206
- vshl.u64 d1, $argA3, #6
207
- vldr.64 d6, [r0, #$argA1]
208
- vshl.u64 d2, $argA4, #25
209
- veor.64 $argA5, $argA5, d11
210
- vsri.64 d1, $argA3, #64-6
211
- veor.64 $argA2, $argA2, d8
212
- vsri.64 d2, $argA4, #64-25
213
- vext.8 d3, $argA5, $argA5, #7
214
- veor.64 d6, d6, d7
215
- vbic.64 $argA3, d2, d1
216
- vadd.u64 d0, $argA2, $argA2
217
- vbic.64 $argA4, d3, d2
218
- vsri.64 d0, $argA2, #64-1
219
- vshl.u64 d4, d6, #18
220
- veor.64 $argA2, d1, $argA4
221
- veor.64 $argA3, d0
222
- vsri.64 d4, d6, #64-18
223
- vstr.64 $argA3, [r0, #$argA1]
224
- veor.64 d5, $argA3
225
- vbic.64 $argA5, d1, d0
226
- vbic.64 $argA3, d4, d3
227
- vbic.64 $argA4, d0, d4
228
- veor.64 $argA3, d2
229
- veor.64 $argA4, d3
230
- veor.64 $argA5, d4
231
- MEND
232
-
233
- MACRO
234
- KeccakS_ThetaRhoPiChi3 $argA1, $argA2, $argA3, $argA4, $argA5
235
-
236
- ; Be = ROL64((argA1^Da), 36)
237
- ; Bi = ROL64((argA2^De), 10)
238
- ; Bo = ROL64((argA3^Di), 15)
239
- ; Bu = ROL64((argA4^Do), 56)
240
- ; Ba = ROL64((argA5^Du), 27)
241
- ; argA1 = Ba ^((~Be)& Bi )
242
- ; Ca ^= argA1
243
- ; argA2 = Be ^((~Bi)& Bo )
244
- ; argA3 = Bi ^((~Bo)& Bu )
245
- ; argA4 = Bo ^((~Bu)& Ba )
246
- ; argA5 = Bu ^((~Ba)& Be )
247
- veor.64 $argA2, $argA2, d8
248
- veor.64 $argA3, $argA3, d9
249
- vshl.u64 d2, $argA2, #10
250
- vldr.64 d6, [r0, #$argA1]
251
- vshl.u64 d3, $argA3, #15
252
- veor.64 $argA4, $argA4, d10
253
- vsri.64 d2, $argA2, #64-10
254
- vsri.64 d3, $argA3, #64-15
255
- veor.64 $argA5, $argA5, d11
256
- vext.8 d4, $argA4, $argA4, #1
257
- vbic.64 $argA2, d3, d2
258
- vshl.u64 d0, $argA5, #27
259
- veor.64 d6, d6, d7
260
- vbic.64 $argA3, d4, d3
261
- vsri.64 d0, $argA5, #64-27
262
- vshl.u64 d1, d6, #36
263
- veor.64 $argA3, d2
264
- vbic.64 $argA4, d0, d4
265
- vsri.64 d1, d6, #64-36
266
- veor.64 $argA4, d3
267
- vbic.64 d6, d2, d1
268
- vbic.64 $argA5, d1, d0
269
- veor.64 d6, d0
270
- veor.64 $argA2, d1
271
- vstr.64 d6, [r0, #$argA1]
272
- veor.64 d5, d6
273
- veor.64 $argA5, d4
274
- MEND
275
-
276
- MACRO
277
- KeccakS_ThetaRhoPiChi4 $argA1, $argA2, $argA3, $argA4, $argA5
278
-
279
- ; Bo = ROL64((argA1^Da), 41)
280
- ; Bu = ROL64((argA2^De), 2)
281
- ; Ba = ROL64((argA3^Di), 62)
282
- ; Be = ROL64((argA4^Do), 55)
283
- ; Bi = ROL64((argA5^Du), 39)
284
- ; argA1 = Ba ^((~Be)& Bi )
285
- ; Ca ^= argA1
286
- ; argA2 = Be ^((~Bi)& Bo )
287
- ; argA3 = Bi ^((~Bo)& Bu )
288
- ; argA4 = Bo ^((~Bu)& Ba )
289
- ; argA5 = Bu ^((~Ba)& Be )
290
- veor.64 $argA2, $argA2, d8
291
- veor.64 $argA3, $argA3, d9
292
- vshl.u64 d4, $argA2, #2
293
- veor.64 $argA5, $argA5, d11
294
- vshl.u64 d0, $argA3, #62
295
- vldr.64 d6, [r0, #$argA1]
296
- vsri.64 d4, $argA2, #64-2
297
- veor.64 $argA4, $argA4, d10
298
- vsri.64 d0, $argA3, #64-62
299
- vshl.u64 d1, $argA4, #55
300
- veor.64 d6, d6, d7
301
- vshl.u64 d2, $argA5, #39
302
- vsri.64 d1, $argA4, #64-55
303
- vbic.64 $argA4, d0, d4
304
- vsri.64 d2, $argA5, #64-39
305
- vbic.64 $argA2, d1, d0
306
- vshl.u64 d3, d6, #41
307
- veor.64 $argA5, d4, $argA2
308
- vbic.64 $argA2, d2, d1
309
- vsri.64 d3, d6, #64-41
310
- veor.64 d6, d0, $argA2
311
- vbic.64 $argA2, d3, d2
312
- vbic.64 $argA3, d4, d3
313
- veor.64 $argA2, d1
314
- vstr.64 d6, [r0, #$argA1]
315
- veor.64 d5, d6
316
- veor.64 $argA3, d2
317
- veor.64 $argA4, d3
318
- MEND
319
-
320
- ; --- macros for Parallel permutation
321
-
322
- MACRO
323
- m_pls $start
324
- if $start != -1
325
- add r3, r0, #$start
326
- endif
327
- MEND
328
-
329
- MACRO
330
- m_ld $qreg, $next
331
- if $next == 16
332
- vld1.64 { $qreg }, [r3:128]!
333
- else
334
- vld1.64 { $qreg }, [r3:128], r4
335
- endif
336
- MEND
337
-
338
- MACRO
339
- m_st $qreg, $next
340
- if $next == 16
341
- vst1.64 { $qreg }, [r3:128]!
342
- else
343
- vst1.64 { $qreg }, [r3:128], r4
344
- endif
345
- MEND
346
-
347
- MACRO
348
- KeccakP_ThetaRhoPiChiIota $ofs1, $ofs2, $ofs3, $ofs4, $ofs5, $next, $ofsn1
349
-
350
- ; De = Ca ^ ROL64(Ci, 1)
351
- ; Di = Ce ^ ROL64(Co, 1)
352
- ; Do = Ci ^ ROL64(Cu, 1)
353
- ; Du = Co ^ ROL64(Ca, 1)
354
- ; Da = Cu ^ ROL64(Ce, 1)
355
- vadd.u64 q6, q2, q2
356
- vadd.u64 q7, q3, q3
357
- vadd.u64 q8, q4, q4
358
- vadd.u64 q9, q0, q0
359
- vadd.u64 q5, q1, q1
360
-
361
- vsri.64 q6, q2, #63
362
- vsri.64 q7, q3, #63
363
- vsri.64 q8, q4, #63
364
- vsri.64 q9, q0, #63
365
- vsri.64 q5, q1, #63
366
-
367
- veor.64 q6, q6, q0
368
- veor.64 q7, q7, q1
369
- veor.64 q8, q8, q2
370
- if $next != 16
371
- mov r4, #$next
372
- endif
373
- veor.64 q9, q9, q3
374
- veor.64 q5, q5, q4
375
-
376
- ; Ba = argA1^Da
377
- ; Be = ROL64(argA2^De, 44)
378
- ; Bi = ROL64(argA3^Di, 43)
379
- ; Bo = ROL64(argA4^Do, 21)
380
- ; Bu = ROL64(argA5^Du, 14)
381
- m_ld q10, $next
382
- m_pls $ofs2
383
- m_ld q1, $next
384
- m_pls $ofs3
385
- veor.64 q10, q10, q5
386
- m_ld q2, $next
387
- m_pls $ofs4
388
- veor.64 q1, q1, q6
389
- m_ld q3, $next
390
- m_pls $ofs5
391
- veor.64 q2, q2, q7
392
- m_ld q4, $next
393
- veor.64 q3, q3, q8
394
- mov r6, r5
395
- veor.64 q4, q4, q9
396
-
397
- vst1.64 { q6 }, [r6:128]!
398
- vshl.u64 q11, q1, #44
399
- vshl.u64 q12, q2, #43
400
- vst1.64 { q7 }, [r6:128]!
401
- vshl.u64 q13, q3, #21
402
- vshl.u64 q14, q4, #14
403
- vst1.64 { q8 }, [r6:128]!
404
- vsri.64 q11, q1, #64-44
405
- vsri.64 q12, q2, #64-43
406
- vst1.64 { q9 }, [r6:128]!
407
- vsri.64 q13, q3, #64-21
408
- vsri.64 q14, q4, #64-14
409
-
410
- ; argA1 = Ba ^(~Be & Bi) ^ KeccakP1600RoundConstants[round]
411
- ; argA2 = Be ^(~Bi & Bo)
412
- ; argA3 = Bi ^(~Bo & Bu)
413
- ; argA4 = Bo ^(~Bu & Ba)
414
- ; argA5 = Bu ^(~Ba & Be)
415
- vld1.64 { d30 }, [r1:64]
416
- vbic.64 q0, q12, q11
417
- vbic.64 q1, q13, q12
418
- vld1.64 { d31 }, [r1:64]!
419
- veor.64 q0, q10
420
- vbic.64 q4, q11, q10
421
- veor.64 q0, q15
422
- vbic.64 q2, q14, q13
423
- vbic.64 q3, q10, q14
424
-
425
- m_pls $ofs1
426
- veor.64 q1, q11
427
- m_st q0, $next
428
- m_pls $ofs2
429
- veor.64 q2, q12
430
- m_st q1, $next
431
- m_pls $ofs3
432
- veor.64 q3, q13
433
- m_st q2, $next
434
- m_pls $ofs4
435
- veor.64 q4, q14
436
- m_st q3, $next
437
- m_pls $ofs5
438
- m_st q4, $next
439
- m_pls $ofsn1
440
- MEND
441
-
442
- MACRO
443
- KeccakP_ThetaRhoPiChi $ofs1, $ofs2, $ofs3, $ofs4, $ofs5, $next, $ofsn1, $Bb1, $Bb2, $Bb3, $Bb4, $Bb5, $Rr1, $Rr2, $Rr3, $Rr4, $Rr5
444
-
445
- ; Bb1 = ROL64((argA1^Da), Rr1)
446
- ; Bb2 = ROL64((argA2^De), Rr2)
447
- ; Bb3 = ROL64((argA3^Di), Rr3)
448
- ; Bb4 = ROL64((argA4^Do), Rr4)
449
- ; Bb5 = ROL64((argA5^Du), Rr5)
450
-
451
- if $next != 16
452
- mov r4, #$next
453
- endif
454
-
455
- m_ld $Bb1, $next
456
- m_pls $ofs2
457
- m_ld $Bb2, $next
458
- m_pls $ofs3
459
- veor.64 q15, q5, $Bb1
460
- m_ld $Bb3, $next
461
- m_pls $ofs4
462
- veor.64 q6, q6, $Bb2
463
- m_ld $Bb4, $next
464
- m_pls $ofs5
465
- veor.64 q7, q7, $Bb3
466
- m_ld $Bb5, $next
467
- veor.64 q8, q8, $Bb4
468
- veor.64 q9, q9, $Bb5
469
-
470
- vshl.u64 $Bb1, q15, #$Rr1
471
- vshl.u64 $Bb2, q6, #$Rr2
472
- vshl.u64 $Bb3, q7, #$Rr3
473
- vshl.u64 $Bb4, q8, #$Rr4
474
- vshl.u64 $Bb5, q9, #$Rr5
475
-
476
- vsri.64 $Bb1, q15, #64-$Rr1
477
- vsri.64 $Bb2, q6, #64-$Rr2
478
- vsri.64 $Bb3, q7, #64-$Rr3
479
- vsri.64 $Bb4, q8, #64-$Rr4
480
- vsri.64 $Bb5, q9, #64-$Rr5
481
-
482
- ; argA1 = Ba ^((~Be)& Bi ), Ca ^= argA1
483
- ; argA2 = Be ^((~Bi)& Bo ), Ce ^= argA2
484
- ; argA3 = Bi ^((~Bo)& Bu ), Ci ^= argA3
485
- ; argA4 = Bo ^((~Bu)& Ba ), Co ^= argA4
486
- ; argA5 = Bu ^((~Ba)& Be ), Cu ^= argA5
487
- vbic.64 q15, q12, q11
488
- mov r6, r5
489
- vbic.64 q6, q13, q12
490
- m_pls $ofs1
491
- vbic.64 q7, q14, q13
492
- vbic.64 q8, q10, q14
493
- vbic.64 q9, q11, q10
494
-
495
- veor.64 q15, q15, q10
496
- veor.64 q6, q6, q11
497
-
498
- m_st q15, $next
499
- m_pls $ofs2
500
- veor.64 q7, q7, q12
501
-
502
- m_st q6, $next
503
- m_pls $ofs3
504
- veor.64 q1, q1, q6
505
- vld1.64 { q6 }, [r6:128]!
506
- veor.64 q8, q8, q13
507
-
508
- m_st q7, $next
509
- m_pls $ofs4
510
- veor.64 q2, q2, q7
511
- vld1.64 { q7 }, [r6:128]!
512
- veor.64 q9, q9, q14
513
-
514
- m_st q8, $next
515
- m_pls $ofs5
516
- veor.64 q3, q3, q8
517
-
518
- m_st q9, $next
519
-
520
- vld1.64 { q8 }, [r6:128]!
521
- veor.64 q4, q4, q9
522
- m_pls $ofsn1
523
- vld1.64 { q9 }, [r6:128]!
524
- veor.64 q0, q0, q15
525
- MEND
526
-
527
- MACRO
528
- KeccakP_ThetaRhoPiChi1 $ofs1, $ofs2, $ofs3, $ofs4, $ofs5, $next, $ofsn1
529
- KeccakP_ThetaRhoPiChi $ofs1, $ofs2, $ofs3, $ofs4, $ofs5, $next, $ofsn1, q12, q13, q14, q10, q11, 3, 45, 61, 28, 20
530
- MEND
531
-
532
- MACRO
533
- KeccakP_ThetaRhoPiChi2 $ofs1, $ofs2, $ofs3, $ofs4, $ofs5, $next, $ofsn1
534
- KeccakP_ThetaRhoPiChi $ofs1, $ofs2, $ofs3, $ofs4, $ofs5, $next, $ofsn1, q14, q10, q11, q12, q13, 18, 1, 6, 25, 8
535
- MEND
536
-
537
- MACRO
538
- KeccakP_ThetaRhoPiChi3 $ofs1, $ofs2, $ofs3, $ofs4, $ofs5, $next, $ofsn1
539
- KeccakP_ThetaRhoPiChi $ofs1, $ofs2, $ofs3, $ofs4, $ofs5, $next, $ofsn1, q11, q12, q13, q14, q10, 36, 10, 15, 56, 27
540
- MEND
541
-
542
- MACRO
543
- KeccakP_ThetaRhoPiChi4 $ofs1, $ofs2, $ofs3, $ofs4, $ofs5, $next, $ofsn1
544
-
545
- ; Bo = ROL64((argA1^Da), 41)
546
- ; Bu = ROL64((argA2^De), 2)
547
- ; Ba = ROL64((argA3^Di), 62)
548
- ; Be = ROL64((argA4^Do), 55)
549
- ; Bi = ROL64((argA5^Du), 39)
550
- ; KeccakChi
551
-
552
- if $next != 16
553
- mov r4, #$next
554
- endif
555
-
556
- m_ld q13, $next
557
- m_pls $ofs2
558
- m_ld q14, $next
559
- m_pls $ofs3
560
- veor.64 q5, q5, q13
561
- m_ld q10, $next
562
- m_pls $ofs4
563
- veor.64 q6, q6, q14
564
- m_ld q11, $next
565
- m_pls $ofs5
566
- veor.64 q7, q7, q10
567
- m_ld q12, $next
568
- veor.64 q8, q8, q11
569
- veor.64 q9, q9, q12
570
-
571
- vshl.u64 q13, q5, #41
572
- vshl.u64 q14, q6, #2
573
- vshl.u64 q10, q7, #62
574
- vshl.u64 q11, q8, #55
575
- vshl.u64 q12, q9, #39
576
-
577
- vsri.64 q13, q5, #64-41
578
- vsri.64 q14, q6, #64-2
579
- vsri.64 q11, q8, #64-55
580
- vsri.64 q12, q9, #64-39
581
- vsri.64 q10, q7, #64-62
582
-
583
- vbic.64 q5, q12, q11
584
- vbic.64 q6, q13, q12
585
- vbic.64 q7, q14, q13
586
- vbic.64 q8, q10, q14
587
- vbic.64 q9, q11, q10
588
- veor.64 q5, q5, q10
589
- veor.64 q6, q6, q11
590
- veor.64 q7, q7, q12
591
- veor.64 q8, q8, q13
592
- m_pls $ofs1
593
- veor.64 q9, q9, q14
594
- m_st q5, $next
595
- m_pls $ofs2
596
- veor.64 q0, q0, q5
597
- m_st q6, $next
598
- m_pls $ofs3
599
- veor.64 q1, q1, q6
600
- m_st q7, $next
601
- m_pls $ofs4
602
- veor.64 q2, q2, q7
603
- m_st q8, $next
604
- m_pls $ofs5
605
- veor.64 q3, q3, q8
606
- m_st q9, $next
607
- m_pls $ofsn1
608
- veor.64 q4, q4, q9
609
- MEND
610
-
611
- ;----------------------------------------------------------------------------
612
- ;
613
- ; void KeccakP1600times2_StaticInitialize( void )
614
- ;
615
- ALIGN
616
- EXPORT KeccakP1600times2_StaticInitialize
617
- KeccakP1600times2_StaticInitialize PROC
618
- bx lr
619
- ENDP
620
-
621
- ;----------------------------------------------------------------------------
622
- ;
623
- ; void KeccakP1600times2_InitializeAll( void *states )
624
- ;
625
- ALIGN
626
- EXPORT KeccakP1600times2_InitializeAll
627
- KeccakP1600times2_InitializeAll PROC
628
- vmov.i64 q0, #0
629
- vmov.i64 q1, #0
630
- vmov.i64 q2, #0
631
- vmov.i64 q3, #0
632
- vstm r0!, { d0 - d7 } ; 8 (clear 8 lanes at a time)
633
- vstm r0!, { d0 - d7 } ; 16
634
- vstm r0!, { d0 - d7 } ; 24
635
- vstm r0!, { d0 - d7 } ; 32
636
- vstm r0!, { d0 - d7 } ; 40
637
- vstm r0!, { d0 - d7 } ; 48
638
- vstm r0!, { d0 - d1} ; 50
639
- bx lr
640
- ENDP
641
-
642
-
643
- ;----------------------------------------------------------------------------
644
- ;
645
- ; void KeccakP1600times2_AddByte( void *states, unsigned int instanceIndex, unsigned char byte, unsigned int offset )
646
- ;
647
- ALIGN
648
- EXPORT KeccakP1600times2_AddByte
649
- KeccakP1600times2_AddByte PROC
650
- add r0, r0, r1, LSL #3 ; states += 8 * instanceIndex
651
- lsr r1, r3, #3 ; states += (offset & ~7) * 2
652
- add r0, r0, r1, LSL #4
653
- and r3, r3, #7
654
- add r0, r0, r3 ; states += offset & 7
655
- ldrb r1, [r0]
656
- eor r1, r1, r2
657
- strb r1, [r0]
658
- bx lr
659
- ENDP
660
-
661
- ;----------------------------------------------------------------------------
662
- ;
663
- ; void KeccakP1600times2_AddBytes( void *states, unsigned int instanceIndex, const unsigned char *data,
664
- ; unsigned int offset, unsigned int length )
665
- ;
666
- ALIGN
667
- EXPORT KeccakP1600times2_AddBytes
668
- KeccakP1600times2_AddBytes PROC
669
- add r0, r0, r1, LSL #3 ; states += 8 * instanceIndex
670
- ldr r1, [sp, #0*4] ; r1 = length
671
- cmp r1, #0
672
- beq KeccakP1600times2_AddBytes_Exit
673
- push { r4- r7 }
674
- lsr r4, r3, #3 ; states += (offset & ~7) * 2
675
- add r0, r0, r4, LSL #4
676
- ands r3, r3, #7 ; if (offset & 7) != 0
677
- beq KeccakP1600times2_AddBytes_CheckLanes
678
- add r0, r0, r3 ; states += offset & 7
679
- rsb r3, r3, #8 ; lenInLane = 8 - (offset & 7)
680
- KeccakP1600times2_AddBytes_LoopBytesFirst
681
- ldrb r4, [r0]
682
- ldrb r5, [r2], #1
683
- eor r4, r4, r5
684
- subs r1, r1, #1
685
- strb r4, [r0], #1
686
- beq KeccakP1600times2_AddBytes_Done
687
- subs r3, r3, #1
688
- bne KeccakP1600times2_AddBytes_LoopBytesFirst
689
- add r0, r0, #8 ; states += 8 (next lane of current state part)
690
- KeccakP1600times2_AddBytes_CheckLanes
691
- lsrs r3, r1, #3
692
- beq KeccakP1600times2_AddBytes_CheckBytesLast
693
- KeccakP1600times2_AddBytes_LoopLanes
694
- ldr r4, [r0]
695
- ldr r5, [r0, #4]
696
- ldr r6, [r2], #4
697
- ldr r7, [r2], #4
698
- eor r4, r4, r6
699
- eor r5, r5, r7
700
- subs r3, r3, #1
701
- str r4, [r0], #4
702
- str r5, [r0], #12 ; states += 8 (next lane of current state part)
703
- bne KeccakP1600times2_AddBytes_LoopLanes
704
- KeccakP1600times2_AddBytes_CheckBytesLast
705
- ands r1, r1, #7
706
- beq KeccakP1600times2_AddBytes_Done
707
- KeccakP1600times2_AddBytes_LoopBytesLast
708
- ldrb r4, [r0]
709
- ldrb r5, [r2], #1
710
- eor r4, r4, r5
711
- subs r1, r1, #1
712
- strb r4, [r0], #1
713
- bne KeccakP1600times2_AddBytes_LoopBytesLast
714
- KeccakP1600times2_AddBytes_Done
715
- pop { r4- r7 }
716
- KeccakP1600times2_AddBytes_Exit
717
- bx lr
718
- ENDP
719
-
720
- ;----------------------------------------------------------------------------
721
- ;
722
- ; void KeccakP1600times2_AddLanesAll( void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset )
723
- ;
724
- ALIGN
725
- EXPORT KeccakP1600times2_AddLanesAll
726
- KeccakP1600times2_AddLanesAll PROC
727
- cmp r2, #0
728
- beq KeccakP1600times2_AddLanesAll_Exit
729
- add r3, r1, r3, LSL #3 ; r3: data + 8 * laneOffset
730
- push {r4 - r7}
731
- KeccakP1600times2_AddLanesAll_Loop
732
- ldr r4, [r1], #4 ; index 0
733
- ldr r5, [r1], #4
734
- ldrd r6, r7, [r0]
735
- eor r6, r6, r4
736
- eor r7, r7, r5
737
- strd r6, r7, [r0], #8
738
- ldr r4, [r3], #4 ; index 1
739
- ldr r5, [r3], #4
740
- ldrd r6, r7, [r0]
741
- eor r6, r6, r4
742
- eor r7, r7, r5
743
- strd r6, r7, [r0], #8
744
- subs r2, r2, #1
745
- bne KeccakP1600times2_AddLanesAll_Loop
746
- pop {r4 - r7}
747
- KeccakP1600times2_AddLanesAll_Exit
748
- bx lr
749
- ENDP
750
-
751
- ;----------------------------------------------------------------------------
752
- ;
753
- ; void KeccakP1600times2_OverwriteBytes( void *states, unsigned int instanceIndex, const unsigned char *data,
754
- ; unsigned int offset, unsigned int length )
755
- ;
756
- ALIGN
757
- EXPORT KeccakP1600times2_OverwriteBytes
758
- KeccakP1600times2_OverwriteBytes PROC
759
- add r0, r0, r1, LSL #3 ; states += 8 * instanceIndex
760
- ldr r1, [sp, #0*4] ; r1 = length
761
- cmp r1, #0
762
- beq KeccakP1600times2_OverwriteBytes_Exit
763
- push { r4-r5 }
764
- lsr r4, r3, #3 ; states += (offset & ~7) * 2
765
- add r0, r0, r4, LSL #4
766
- ands r3, r3, #7 ; if (offset & 7) != 0
767
- beq KeccakP1600times2_OverwriteBytes_CheckLanes
768
- add r0, r0, r3 ; states += offset & 7
769
- rsb r3, r3, #8 ; lenInLane = 8 - (offset & 7)
770
- KeccakP1600times2_OverwriteBytes_LoopBytesFirst
771
- ldrb r4, [r2], #1
772
- strb r4, [r0], #1
773
- subs r1, r1, #1
774
- beq KeccakP1600times2_OverwriteBytes_Done
775
- subs r3, r3, #1
776
- bne KeccakP1600times2_OverwriteBytes_LoopBytesFirst
777
- add r0, r0, #8 ; states += 8 (next lane of current state part)
778
- KeccakP1600times2_OverwriteBytes_CheckLanes
779
- lsrs r3, r1, #3
780
- beq KeccakP1600times2_OverwriteBytes_CheckBytesLast
781
- KeccakP1600times2_OverwriteBytes_LoopLanes
782
- ldr r4, [r2], #4
783
- ldr r5, [r2], #4
784
- str r4, [r0], #4
785
- str r5, [r0], #12 ; states += 8 (next lane of current state part)
786
- subs r3, r3, #1
787
- bne KeccakP1600times2_OverwriteBytes_LoopLanes
788
- KeccakP1600times2_OverwriteBytes_CheckBytesLast
789
- ands r1, r1, #7
790
- beq KeccakP1600times2_OverwriteBytes_Done
791
- KeccakP1600times2_OverwriteBytes_LoopBytesLast
792
- ldrb r4, [r2], #1
793
- subs r1, r1, #1
794
- strb r4, [r0], #1
795
- bne KeccakP1600times2_OverwriteBytes_LoopBytesLast
796
- KeccakP1600times2_OverwriteBytes_Done
797
- pop { r4- r5 }
798
- KeccakP1600times2_OverwriteBytes_Exit
799
- bx lr
800
- ENDP
801
-
802
- ;----------------------------------------------------------------------------
803
- ;
804
- ; KeccakP1600times2_OverwriteLanesAll( void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset )
805
- ;
806
- ALIGN
807
- EXPORT KeccakP1600times2_OverwriteLanesAll
808
- KeccakP1600times2_OverwriteLanesAll PROC
809
- cmp r2, #0
810
- beq KeccakP1600times2_OverwriteLanesAll_Exit
811
- lsls r12, r1, #32-3
812
- bne KeccakP1600times2_OverwriteLanesAll_Unaligned
813
- add r3, r1, r3, LSL #3 ; r3(pointer instance 1): data + 8 * laneOffset
814
- lsrs r2, r2, #1
815
- bcc KeccakP1600times2_OverwriteLanesAll_LoopAligned
816
- vldm r1!, { d0 }
817
- vldm r3!, { d1 }
818
- vstm r0!, { d0 - d1 }
819
- beq KeccakP1600times2_OverwriteLanesAll_Exit
820
- KeccakP1600times2_OverwriteLanesAll_LoopAligned
821
- vldm r1!, { d0 }
822
- vldm r1!, { d2 }
823
- vldm r3!, { d1 }
824
- vldm r3!, { d3 }
825
- subs r2, r2, #1
826
- vstm r0!, { d0 - d3 }
827
- bne KeccakP1600times2_OverwriteLanesAll_LoopAligned
828
- bx lr
829
- KeccakP1600times2_OverwriteLanesAll_Unaligned
830
- add r3, r1, r3, LSL #3 ; r3(pointer instance 1): data + 8 * laneOffset
831
- push { r4, r5 }
832
- KeccakP1600times2_OverwriteLanesAll_LoopUnaligned
833
- ldr r4, [r1], #4
834
- ldr r5, [r1], #4
835
- strd r4, r5, [r0], #8
836
- ldr r4, [r3], #4
837
- ldr r5, [r3], #4
838
- subs r2, r2, #1
839
- strd r4, r5, [r0], #8
840
- bne KeccakP1600times2_OverwriteLanesAll_LoopUnaligned
841
- pop { r4, r5 }
842
- KeccakP1600times2_OverwriteLanesAll_Exit
843
- bx lr
844
- ENDP
845
-
846
- ;----------------------------------------------------------------------------
847
- ;
848
- ; void KeccakP1600times2_OverwriteWithZeroes( void *states, unsigned int instanceIndex, unsigned int byteCount )
849
- ;
850
- ALIGN
851
- EXPORT KeccakP1600times2_OverwriteWithZeroes
852
- KeccakP1600times2_OverwriteWithZeroes PROC
853
- add r0, r0, r1, LSL #3 ; states += 8 * instanceIndex
854
- lsrs r1, r2, #3 ; r1: laneCount
855
- beq KeccakP1600times2_OverwriteWithZeroes_Bytes
856
- vmov.i64 d0, #0
857
- KeccakP1600times2_OverwriteWithZeroes_LoopLanes
858
- subs r1, r1, #1
859
- vstm r0!, { d0 }
860
- add r0, r0, #8
861
- bne KeccakP1600times2_OverwriteWithZeroes_LoopLanes
862
- KeccakP1600times2_OverwriteWithZeroes_Bytes
863
- ands r2, r2, #7 ; r2: byteCount remaining
864
- beq KeccakP1600times2_OverwriteWithZeroes_Exit
865
- movs r3, #0
866
- KeccakP1600times2_OverwriteWithZeroes_LoopBytes
867
- subs r2, r2, #1
868
- strb r3, [r0], #1
869
- bne KeccakP1600times2_OverwriteWithZeroes_LoopBytes
870
- KeccakP1600times2_OverwriteWithZeroes_Exit
871
- bx lr
872
- ENDP
873
-
874
- ;----------------------------------------------------------------------------
875
- ;
876
- ; void KeccakP1600times2_ExtractBytes( void *states, unsigned int instanceIndex, const unsigned char *data,
877
- ; unsigned int offset, unsigned int length )
878
- ;
879
- ALIGN
880
- EXPORT KeccakP1600times2_ExtractBytes
881
- KeccakP1600times2_ExtractBytes PROC
882
- add r0, r0, r1, LSL #3 ; states += 8 * instanceIndex
883
- ldr r1, [sp, #0*4] ; r1 = length
884
- cmp r1, #0
885
- beq KeccakP1600times2_ExtractBytes_Exit
886
- push { r4-r5 }
887
- lsr r4, r3, #3 ; states += (offset & ~7) * 2
888
- add r0, r0, r4, LSL #4
889
- ands r3, r3, #7 ; if (offset & 7) != 0
890
- beq KeccakP1600times2_ExtractBytes_CheckLanes
891
- add r0, r0, r3 ; states += offset & 7
892
- rsb r3, r3, #8 ; lenInLane = 8 - (offset & 7)
893
- KeccakP1600times2_ExtractBytes_LoopBytesFirst
894
- ldrb r4, [r0], #1
895
- strb r4, [r2], #1
896
- subs r1, r1, #1
897
- beq KeccakP1600times2_ExtractBytes_Done
898
- subs r3, r3, #1
899
- bne KeccakP1600times2_ExtractBytes_LoopBytesFirst
900
- add r0, r0, #8 ; states += 8 (next lane of current state part)
901
- KeccakP1600times2_ExtractBytes_CheckLanes
902
- lsrs r3, r1, #3
903
- beq KeccakP1600times2_ExtractBytes_CheckBytesLast
904
- KeccakP1600times2_ExtractBytes_LoopLanes
905
- ldr r4, [r0], #4
906
- ldr r5, [r0], #12 ; states += 8 (next lane of current state part)
907
- str r4, [r2], #4
908
- str r5, [r2], #4
909
- subs r3, r3, #1
910
- bne KeccakP1600times2_ExtractBytes_LoopLanes
911
- KeccakP1600times2_ExtractBytes_CheckBytesLast
912
- ands r1, r1, #7
913
- beq KeccakP1600times2_ExtractBytes_Done
914
- KeccakP1600times2_ExtractBytes_LoopBytesLast
915
- ldrb r4, [r0], #1
916
- subs r1, r1, #1
917
- strb r4, [r2], #1
918
- bne KeccakP1600times2_ExtractBytes_LoopBytesLast
919
- KeccakP1600times2_ExtractBytes_Done
920
- pop { r4-r5 }
921
- KeccakP1600times2_ExtractBytes_Exit
922
- bx lr
923
- ENDP
924
-
925
- ;----------------------------------------------------------------------------
926
- ;
927
- ; void KeccakP1600times2_ExtractLanesAll( const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset )
928
- ;
929
- ALIGN
930
- EXPORT KeccakP1600times2_ExtractLanesAll
931
- KeccakP1600times2_ExtractLanesAll PROC
932
- cmp r2, #0
933
- beq KeccakP1600times2_ExtractLanesAll_Exit
934
- lsls r12, r1, #32-3
935
- bne KeccakP1600times2_ExtractLanesAll_Unaligned
936
- add r3, r1, r3, LSL #3 ; r3(pointer instance 1): data + 8 * laneOffset
937
- lsrs r2, r2, #1
938
- bcc KeccakP1600times2_ExtractLanesAll_LoopAligned
939
- vldm r0!, { d0 - d1 }
940
- vstm r1!, { d0 }
941
- vstm r3!, { d1 }
942
- beq KeccakP1600times2_ExtractLanesAll_Exit
943
- KeccakP1600times2_ExtractLanesAll_LoopAligned
944
- vldm r0!, { d0 - d3 }
945
- subs r2, r2, #1
946
- vstm r1!, { d0 }
947
- vstm r1!, { d2 }
948
- vstm r3!, { d1 }
949
- vstm r3!, { d3 }
950
- bne KeccakP1600times2_ExtractLanesAll_LoopAligned
951
- bx lr
952
- KeccakP1600times2_ExtractLanesAll_Unaligned
953
- add r3, r1, r3, LSL #3 ; r3(pointer instance 1): data + 8 * laneOffset
954
- push { r4, r5 }
955
- KeccakP1600times2_ExtractLanesAll_LoopUnaligned
956
- ldrd r4, r5, [r0], #8
957
- str r4, [r1], #4
958
- str r5, [r1], #4
959
- ldrd r4, r5, [r0], #8
960
- subs r2, r2, #1
961
- str r4, [r3], #4
962
- str r5, [r3], #4
963
- bne KeccakP1600times2_ExtractLanesAll_LoopUnaligned
964
- pop { r4, r5 }
965
- KeccakP1600times2_ExtractLanesAll_Exit
966
- bx lr
967
- ENDP
968
-
969
- ;----------------------------------------------------------------------------
970
- ;
971
- ; void KeccakP1600times2_ExtractAndAddBytes( void *states, unsigned int instanceIndex,
972
- ; const unsigned char *input, unsigned char *output,
973
- ; unsigned int offset, unsigned int length )
974
- ;
975
- ALIGN
976
- EXPORT KeccakP1600times2_ExtractAndAddBytes
977
- KeccakP1600times2_ExtractAndAddBytes PROC
978
- add r0, r0, r1, LSL #3 ; states += 8 * instanceIndex
979
- ldr r1, [sp, #1*4] ; r1 = length
980
- cmp r1, #0
981
- beq KeccakP1600times2_ExtractAndAddBytes_Exit
982
- push { r4 - r9 }
983
- ldr r8, [sp, #6*4] ; r8 = offset
984
- lsr r4, r8, #3 ; states += (offset & ~7) * 2
985
- add r0, r0, r4, LSL #4
986
- ands r8, r8, #7 ; if (offset & 7) != 0
987
- beq KeccakP1600times2_ExtractAndAddBytes_CheckLanes
988
- add r0, r0, r8 ; states += offset & 7
989
- rsb r8, r8, #8 ; lenInLane = 8 - (offset & 7)
990
- KeccakP1600times2_ExtractAndAddBytes_LoopBytesFirst
991
- ldrb r4, [r0], #1
992
- ldrb r5, [r2], #1
993
- eor r4, r4, r5
994
- strb r4, [r3], #1
995
- subs r1, r1, #1
996
- beq KeccakP1600times2_ExtractAndAddBytes_Done
997
- subs r8, r8, #1
998
- bne KeccakP1600times2_ExtractAndAddBytes_LoopBytesFirst
999
- add r0, r0, #8 ; states += 8 (next lane of current state part)
1000
- KeccakP1600times2_ExtractAndAddBytes_CheckLanes
1001
- lsrs r8, r1, #3
1002
- beq KeccakP1600times2_ExtractAndAddBytes_CheckBytesLast
1003
- KeccakP1600times2_ExtractAndAddBytes_LoopLanes
1004
- ldr r4, [r0], #4
1005
- ldr r5, [r0], #12
1006
- ldr r6, [r2], #4
1007
- ldr r7, [r2], #4
1008
- eor r4, r4, r6
1009
- eor r5, r5, r7
1010
- str r4, [r3], #4
1011
- str r5, [r3], #4 ; states += 8 (next lane of current state part)
1012
- subs r8, r8, #1
1013
- bne KeccakP1600times2_ExtractAndAddBytes_LoopLanes
1014
- KeccakP1600times2_ExtractAndAddBytes_CheckBytesLast
1015
- ands r1, r1, #7
1016
- beq KeccakP1600times2_ExtractAndAddBytes_Done
1017
- KeccakP1600times2_ExtractAndAddBytes_LoopBytesLast
1018
- ldrb r4, [r0], #1
1019
- ldrb r5, [r2], #1
1020
- eor r4, r4, r5
1021
- strb r4, [r3], #1
1022
- subs r1, r1, #1
1023
- bne KeccakP1600times2_ExtractAndAddBytes_LoopBytesLast
1024
- KeccakP1600times2_ExtractAndAddBytes_Done
1025
- pop { r4 - r9 }
1026
- KeccakP1600times2_ExtractAndAddBytes_Exit
1027
- bx lr
1028
- ENDP
1029
-
1030
- ;----------------------------------------------------------------------------
1031
- ;
1032
- ; void KeccakP1600times2_ExtractAndAddLanesAll( const void *states,
1033
- ; const unsigned char *input, unsigned char *output,
1034
- ; unsigned int laneCount, unsigned int laneOffset )
1035
- ;
1036
- ALIGN
1037
- EXPORT KeccakP1600times2_ExtractAndAddLanesAll
1038
- KeccakP1600times2_ExtractAndAddLanesAll PROC
1039
- cmp r3, #0
1040
- beq KeccakP1600times2_ExtractAndAddLanesAll_Exit
1041
- orr r12, r1, r2
1042
- lsls r12, r12, #32-3 ; unaligned access if input or output unaligned
1043
- bne KeccakP1600times2_ExtractAndAddLanesAll_Unaligned
1044
- push {r4,r5}
1045
- ldr r12, [sp, #2*4] ; r12 = laneOffset
1046
- lsrs r3, r3, #1
1047
- add r4, r1, r12, LSL #3 ; r4(input instance 1): input + 8 * laneOffset
1048
- add r5, r2, r12, LSL #3 ; r5(output instance 1): output + 8 * laneOffset
1049
- bcc KeccakP1600times2_ExtractAndAddLanesAll_LoopAligned
1050
- vldm r0!, { d0 - d1 }
1051
- vldm r1!, { d2 }
1052
- vldm r4!, { d3 }
1053
- veor q0, q0, q1
1054
- vstm r2!, { d0 }
1055
- vstm r5!, { d1 }
1056
- beq KeccakP1600times2_ExtractAndAddLanesAll_AlignedDone
1057
- KeccakP1600times2_ExtractAndAddLanesAll_LoopAligned
1058
- vldm r0!, { d0 - d3 }
1059
- vldm r1!, { d4 }
1060
- vldm r1!, { d6 }
1061
- vldm r4!, { d5 }
1062
- vldm r4!, { d7 }
1063
- subs r3, r3, #1
1064
- veor q0, q0, q2
1065
- veor q1, q1, q3
1066
- vstm r2!, { d0 }
1067
- vstm r2!, { d2 }
1068
- vstm r5!, { d1 }
1069
- vstm r5!, { d3 }
1070
- bne KeccakP1600times2_ExtractAndAddLanesAll_LoopAligned
1071
- KeccakP1600times2_ExtractAndAddLanesAll_AlignedDone
1072
- pop {r4,r5}
1073
- bx lr
1074
- KeccakP1600times2_ExtractAndAddLanesAll_Unaligned
1075
- push {r4-r9}
1076
- ldr r12, [sp, #6*4] ; r12 = laneOffset
1077
- add r4, r1, r12, LSL #3 ; r4(input instance 1): input + 8 * laneOffset
1078
- add r5, r2, r12, LSL #3 ; r5(output instance 1): output + 8 * laneOffset
1079
- KeccakP1600times2_ExtractAndAddLanesAll_LoopUnaligned
1080
- ldrd r8, r9, [r0], #8
1081
- ldr r6, [r1], #4
1082
- ldr r7, [r1], #4
1083
- eor r8, r8, r6
1084
- eor r9, r9, r7
1085
- str r8, [r2], #4
1086
- str r9, [r2], #4
1087
- ldrd r8, r9, [r0], #8
1088
- ldr r6, [r4], #4
1089
- ldr r7, [r4], #4
1090
- eor r8, r8, r6
1091
- eor r9, r9, r7
1092
- str r8, [r5], #4
1093
- subs r3, r3, #1
1094
- str r9, [r5], #4
1095
- bne KeccakP1600times2_ExtractAndAddLanesAll_LoopUnaligned
1096
- pop { r4 - r9 }
1097
- KeccakP1600times2_ExtractAndAddLanesAll_Exit
1098
- bx lr
1099
- ENDP
1100
-
1101
- ;----------------------------------------------------------------------------
1102
- ;
1103
- ; void KeccakP1600times2_PermuteAll_6rounds( void *states )
1104
- ;
1105
- ALIGN
1106
- EXPORT KeccakP1600times2_PermuteAll_6rounds
1107
- KeccakP1600times2_PermuteAll_6rounds PROC
1108
- adr r1, KeccakP1600times2_Permute_RoundConstants6
1109
- movs r2, #6+2
1110
- vpush {q4-q7}
1111
- push {r4-r7}
1112
- sub sp, #4*2*8+8 ;allocate 4 D double lanes (plus 8bytes to allow alignment on 16 bytes)
1113
- add r5, sp, #8
1114
-
1115
- ; ba
1116
- ; be = me, me = be
1117
- ; bi = gi, gi = bi
1118
- ; bo = so, so = bo
1119
- ; bu = ku, ku = bu
1120
-
1121
- ; ga = sa, sa = ga
1122
- ; ge = ke, ke = ge
1123
- ; go = mo, mo = go
1124
- ; gu
1125
-
1126
- ; ka = ma, ma = ka
1127
- ; ki = si, si = ki
1128
- ; ko
1129
-
1130
- ; mu = su, su = mu
1131
- ; mi
1132
- ; se
1133
-
1134
- ;PrepareTheta
1135
- ; Ca = ba ^ ga ^ ka ^ ma ^ sa
1136
- ; Ce = be ^ ge ^ ke ^ me ^ se
1137
- ; Ci = bi ^ gi ^ ki ^ mi ^ si
1138
- ; Co = bo ^ go ^ ko ^ mo ^ so
1139
- ; Cu = bu ^ gu ^ ku ^ mu ^ su
1140
- vldm r0, { q0 - q4 } ; ba be bi bo bu
1141
- bic r5, #15
1142
- add r3, r0, #_me
1143
- vldm r3, { q6 } ; me
1144
- vstm r3, { q1 }
1145
- veor.64 q1, q1, q6
1146
- add r4, r0, #_be
1147
- vstm r4!, { q6 } ; be
1148
-
1149
- add r3, r0, #_ga
1150
- vldm r3, { q10 - q14 } ; ga ge gi go gu
1151
- add r3, r0, #_gi
1152
- vstm r3, { q2 }
1153
- veor.64 q2, q2, q12
1154
- vstm r4!, { q12 } ; bi
1155
-
1156
- add r3, r0, #_so
1157
- vldm r3, { q8 } ; so
1158
- vstm r3, { q3 }
1159
- veor.64 q3, q3, q8
1160
- vstm r4!, { q8 } ; bo
1161
-
1162
- add r3, r0, #_ku
1163
- vldm r3, { q9 } ; ku
1164
- vstm r3, { q4 }
1165
- veor.64 q4, q4, q9
1166
- vstm r4!, { q9 } ; bu
1167
-
1168
- add r3, r0, #_sa
1169
- vldm r3, { q5 } ; sa
1170
- vstm r3, { q10 }
1171
- add r4, r0, #_ga
1172
- veor.64 q0, q0, q5
1173
- veor.64 q0, q0, q10
1174
- vstm r4!, { q5 } ; ga
1175
-
1176
- add r3, r0, #_ke
1177
- vldm r3, { q6 } ; ke
1178
- vstm r3, { q11 }
1179
- veor.64 q1, q1, q6
1180
- veor.64 q1, q1, q11
1181
- vstm r4!, { q6 } ; ge
1182
-
1183
- add r3, r0, #_mo
1184
- vldm r3, { q8 } ; mo
1185
- vstm r3, { q13 }
1186
- add r4, r0, #_go
1187
- veor.64 q3, q3, q8
1188
- veor.64 q3, q3, q13
1189
- vstm r4!, { q8 } ; go
1190
- veor.64 q4, q4, q14 ; gu
1191
-
1192
- add r4, r0, #_ka ; ka
1193
- vldm r4, { q10 }
1194
- add r3, r0, #_ma
1195
- vldm r3, { q5 } ; ma
1196
- vstm r3, { q10 }
1197
- veor.64 q0, q0, q5
1198
- veor.64 q0, q0, q10
1199
- vstm r4!, { q5 } ; ka
1200
-
1201
- add r4, r0, #_ki ; ki ko
1202
- vldm r4, { q12, q13 }
1203
- add r3, r0, #_si
1204
- vldm r3, { q7 } ; si
1205
- vstm r3, { q12 }
1206
- veor.64 q2, q2, q7
1207
- veor.64 q2, q2, q12
1208
- vstm r4, { q7 } ; ki
1209
- veor.64 q3, q3, q13 ; ko
1210
-
1211
- add r4, r0, #_mu ; mu
1212
- vldm r4, { q14 }
1213
- add r3, r0, #_su
1214
- vldm r3, { q9 } ; su
1215
- vstm r3, { q14 }
1216
- veor.64 q4, q4, q9
1217
- veor.64 q4, q4, q14
1218
- vstm r4, { q9 } ; mu
1219
-
1220
- add r4, r0, #_mi ; mi
1221
- vldm r4, { q12 }
1222
- veor.64 q2, q2, q12
1223
- add r3, r0, #_se ; se
1224
- vldm r3, { q6 }
1225
- veor.64 q1, q1, q6
1226
-
1227
- mov r3, r0
1228
- b KeccakP1600times2_PermuteAll_Round2
1229
- ENDP
1230
-
1231
- ALIGN
1232
- KeccakP1600times2_Permute_RoundConstants24
1233
- dcq 0x0000000000000001
1234
- dcq 0x0000000000008082
1235
- dcq 0x800000000000808a
1236
- dcq 0x8000000080008000
1237
- dcq 0x000000000000808b
1238
- dcq 0x0000000080000001
1239
- dcq 0x8000000080008081
1240
- dcq 0x8000000000008009
1241
- dcq 0x000000000000008a
1242
- dcq 0x0000000000000088
1243
- dcq 0x0000000080008009
1244
- dcq 0x000000008000000a
1245
- KeccakP1600times2_Permute_RoundConstants12
1246
- dcq 0x000000008000808b
1247
- dcq 0x800000000000008b
1248
- dcq 0x8000000000008089
1249
- dcq 0x8000000000008003
1250
- dcq 0x8000000000008002
1251
- dcq 0x8000000000000080
1252
- KeccakP1600times2_Permute_RoundConstants6
1253
- dcq 0x000000000000800a
1254
- dcq 0x800000008000000a
1255
- KeccakP1600times2_Permute_RoundConstants4
1256
- dcq 0x8000000080008081
1257
- dcq 0x8000000000008080
1258
- dcq 0x0000000080000001
1259
- dcq 0x8000000080008008
1260
-
1261
- ;----------------------------------------------------------------------------
1262
- ;
1263
- ; void KeccakP1600times2_PermuteAll_24rounds( void *states )
1264
- ;
1265
- ALIGN
1266
- EXPORT KeccakP1600times2_PermuteAll_24rounds
1267
- KeccakP1600times2_PermuteAll_24rounds PROC
1268
- adr r1, KeccakP1600times2_Permute_RoundConstants24
1269
- movs r2, #24
1270
- b KeccakP1600times2_PermuteAll
1271
- ENDP
1272
-
1273
- ;----------------------------------------------------------------------------
1274
- ;
1275
- ; void KeccakP1600times2_PermuteAll_12rounds( void *states )
1276
- ;
1277
- ALIGN
1278
- EXPORT KeccakP1600times2_PermuteAll_12rounds
1279
- KeccakP1600times2_PermuteAll_12rounds PROC
1280
- adr r1, KeccakP1600times2_Permute_RoundConstants12
1281
- movs r2, #12
1282
- b KeccakP1600times2_PermuteAll
1283
- ENDP
1284
-
1285
- ;----------------------------------------------------------------------------
1286
- ;
1287
- ; void KeccakP1600times2_PermuteAll_4rounds( void *states )
1288
- ;
1289
- ALIGN
1290
- EXPORT KeccakP1600times2_PermuteAll_4rounds
1291
- KeccakP1600times2_PermuteAll_4rounds PROC
1292
- adr r1, KeccakP1600times2_Permute_RoundConstants4
1293
- movs r2, #4
1294
- b KeccakP1600times2_PermuteAll
1295
- ENDP
1296
-
1297
- ;----------------------------------------------------------------------------
1298
- ;
1299
- ; void KeccakP1600times2_PermuteAll( void *states, void *rc, unsigned int nr )
1300
- ;
1301
- ALIGN
1302
- KeccakP1600times2_PermuteAll PROC
1303
- vpush {q4-q7}
1304
- push {r4-r7}
1305
- sub sp, #4*2*8+8 ;allocate 4 D double lanes (plus 8bytes to allow alignment on 16 bytes)
1306
- mov r3, r0
1307
- add r5, sp, #8
1308
-
1309
- ;PrepareTheta
1310
- ; Ca = ba ^ ga ^ ka ^ ma ^ sa
1311
- ; Ce = be ^ ge ^ ke ^ me ^ se
1312
- ; Ci = bi ^ gi ^ ki ^ mi ^ si
1313
- ; Co = bo ^ go ^ ko ^ mo ^ so
1314
- ; Cu = bu ^ gu ^ ku ^ mu ^ su
1315
- vld1.64 { d0, d1, d2, d3 }, [r3:256]! ; _ba _be
1316
- bic r5, #15
1317
- vld1.64 { d4, d5, d6, d7 }, [r3:256]! ; _bi _bo
1318
- vld1.64 { d8, d9, d10, d11 }, [r3:256]! ; _bu _ga
1319
- vld1.64 { d12, d13 }, [r3:128]! ; _ge
1320
- veor.64 q0, q0, q5
1321
- vld1.64 { d14, d15 }, [r3:128]! ; _gi
1322
- veor.64 q1, q1, q6
1323
- vld1.64 { d16, d17 }, [r3:128]! ; _go
1324
- veor.64 q2, q2, q7
1325
- vld1.64 { d18, d19 }, [r3:128]! ; _gu
1326
- veor.64 q3, q3, q8
1327
- vld1.64 { d10, d11 }, [r3:128]! ; _ka
1328
- veor.64 q4, q4, q9
1329
- vld1.64 { d12, d13 }, [r3:128]! ; _ke
1330
- veor.64 q0, q0, q5
1331
- vld1.64 { d14, d15 }, [r3:128]! ; _ki
1332
- veor.64 q1, q1, q6
1333
- vld1.64 { d16, d17 }, [r3:128]! ; _ko
1334
- veor.64 q2, q2, q7
1335
- vld1.64 { d18, d19 }, [r3:128]! ; _ku
1336
- veor.64 q3, q3, q8
1337
- vld1.64 { d10, d11 }, [r3:128]! ; _ma
1338
- veor.64 q4, q4, q9
1339
- vld1.64 { d12, d13 }, [r3:128]! ; _me
1340
- veor.64 q0, q0, q5
1341
- vld1.64 { d14, d15 }, [r3:128]! ; _mi
1342
- veor.64 q1, q1, q6
1343
- vld1.64 { d16, d17 }, [r3:128]! ; _mo
1344
- veor.64 q2, q2, q7
1345
- vld1.64 { d18, d19 }, [r3:128]! ; _mu
1346
- veor.64 q3, q3, q8
1347
- vld1.64 { d10, d11 }, [r3:128]! ; _sa
1348
- veor.64 q4, q4, q9
1349
- vld1.64 { d12, d13 }, [r3:128]! ; _se
1350
- veor.64 q0, q0, q5
1351
- vld1.64 { d14, d15 }, [r3:128]! ; _si
1352
- veor.64 q1, q1, q6
1353
- vld1.64 { d16, d17 }, [r3:128]! ; _so
1354
- veor.64 q2, q2, q7
1355
- vld1.64 { d18, d19 }, [r3:128]! ; _su
1356
- mov r3, r0
1357
- veor.64 q3, q3, q8
1358
- veor.64 q4, q4, q9
1359
-
1360
- KeccakP1600times2_PermuteAll_RoundLoop
1361
- KeccakP_ThetaRhoPiChiIota _ba, -1, -1, -1, -1, _ge-_ba, _ka ; _ba, _ge, _ki, _mo, _su
1362
- KeccakP_ThetaRhoPiChi1 _ka, -1, -1, _bo, -1, _me-_ka, _sa ; _ka, _me, _si, _bo, _gu
1363
- KeccakP_ThetaRhoPiChi2 _sa, _be, -1, -1, -1, _gi-_be, _ga ; _sa, _be, _gi, _ko, _mu
1364
- KeccakP_ThetaRhoPiChi3 _ga, -1, -1, -1, _bu, _ke-_ga, _ma ; _ga, _ke, _mi, _so, _bu
1365
- KeccakP_ThetaRhoPiChi4 _ma, -1, _bi, -1, -1, _se-_ma, _ba ; _ma, _se, _bi, _go, _ku
1366
-
1367
- KeccakP_ThetaRhoPiChiIota _ba, -1, _gi, -1, _ku, _me-_ba, _sa ; _ba, _me, _gi, _so, _ku
1368
- KeccakP_ThetaRhoPiChi1 _sa, _ke, _bi, -1, _gu, _mo-_bi, _ma ; _sa, _ke, _bi, _mo, _gu
1369
- KeccakP_ThetaRhoPiChi2 _ma, _ge, -1, _ko, _bu, _si-_ge, _ka ; _ma, _ge, _si, _ko, _bu
1370
- KeccakP_ThetaRhoPiChi3 _ka, _be, -1, _go, -1, _mi-_be, _ga ; _ka, _be, _mi, _go, _su
1371
- KeccakP_ThetaRhoPiChi4 _ga, -1, _ki, _bo, -1, _se-_ga, _ba ; _ga, _se, _ki, _bo, _mu
1372
- KeccakP1600times2_PermuteAll_Round2
1373
- KeccakP_ThetaRhoPiChiIota _ba, -1, -1, _go, -1, _ke-_ba, _ma ; _ba, _ke, _si, _go, _mu
1374
- KeccakP_ThetaRhoPiChi1 _ma, _be, -1, -1, _gu, _ki-_be, _ga ; _ma, _be, _ki, _so, _gu
1375
- KeccakP_ThetaRhoPiChi2 _ga, -1, _bi, -1, -1, _me-_ga, _sa ; _ga, _me, _bi, _ko, _su
1376
- KeccakP_ThetaRhoPiChi3 _sa, _ge, -1, _bo, -1, _mi-_ge, _ka ; _sa, _ge, _mi, _bo, _ku
1377
- KeccakP_ThetaRhoPiChi4 _ka, -1, _gi, -1, _bu, _se-_ka, _ba ; _ka, _se, _gi, _mo, _bu
1378
-
1379
- KeccakP_ThetaRhoPiChiIota _ba, -1, -1, -1, -1, _be-_ba, _ga ; _ba, _be, _bi, _bo, _bu
1380
- KeccakP_ThetaRhoPiChi1 _ga, -1, -1, -1, -1, _ge-_ga, _ka ; _ga, _ge, _gi, _go, _gu
1381
- KeccakP_ThetaRhoPiChi2 _ka, -1, -1, -1, -1, _ke-_ka, _ma ; _ka, _ke, _ki, _ko, _ku
1382
- KeccakP_ThetaRhoPiChi3 _ma, -1, -1, -1, -1, _me-_ma, _sa ; _ma, _me, _mi, _mo, _mu
1383
- subs r2, #4
1384
- KeccakP_ThetaRhoPiChi4 _sa, -1, -1, -1, -1, _se-_sa, _ba ; _sa, _se, _si, _so, _su
1385
- bne KeccakP1600times2_PermuteAll_RoundLoop
1386
- add sp, #4*2*8+8 ; free 4.5 D lanes
1387
- pop {r4-r7}
1388
- vpop {q4-q7}
1389
- bx lr
1390
- ENDP
1391
-
1392
- END