digest-kangarootwelve 0.2.0 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (305) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +51 -11
  3. data/Rakefile +2 -2
  4. data/digest-kangarootwelve.gemspec +322 -42
  5. data/ext/digest/kangarootwelve/ext.c +1 -1
  6. data/ext/digest/kangarootwelve/extconf.rb +13 -1
  7. data/ext/digest/kangarootwelve/keccak/armv6m/KangarooTwelve.link.c +1 -0
  8. data/ext/digest/kangarootwelve/keccak/armv6m/KeccakDuplexWidth1600.link.c +1 -0
  9. data/ext/digest/kangarootwelve/keccak/armv6m/KeccakP-1600-SnP.h +36 -0
  10. data/ext/digest/kangarootwelve/{KeccakP-1600-times2-SnP.h → keccak/armv6m/KeccakP-1600-times2-SnP.h} +10 -10
  11. data/ext/digest/kangarootwelve/{KeccakP-1600-times2-on1.c → keccak/armv6m/KeccakP-1600-times2-on1.c} +13 -7
  12. data/ext/digest/kangarootwelve/{KeccakP-1600-times4-SnP.h → keccak/armv6m/KeccakP-1600-times4-SnP.h} +10 -10
  13. data/ext/digest/kangarootwelve/{KeccakP-1600-times4-on1.c → keccak/armv6m/KeccakP-1600-times4-on1.c} +13 -7
  14. data/ext/digest/kangarootwelve/{KeccakP-1600-times8-SnP.h → keccak/armv6m/KeccakP-1600-times8-SnP.h} +10 -10
  15. data/ext/digest/kangarootwelve/{KeccakP-1600-times8-on1.c → keccak/armv6m/KeccakP-1600-times8-on1.c} +13 -7
  16. data/ext/digest/kangarootwelve/keccak/armv6m/KeccakP-1600-u2-32bi-armv6m-le-gcc.s +1334 -0
  17. data/ext/digest/kangarootwelve/keccak/armv6m/KeccakSpongeWidth1600.link.c +1 -0
  18. data/ext/digest/kangarootwelve/{PlSnP-Fallback.inc → keccak/armv6m/PlSnP-Fallback.inc} +11 -7
  19. data/ext/digest/kangarootwelve/keccak/armv6m/ext.link.c +1 -0
  20. data/ext/digest/kangarootwelve/keccak/armv7a/KangarooTwelve.link.c +1 -0
  21. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakDuplexWidth1600.link.c +1 -0
  22. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-SnP.h +37 -0
  23. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-armv7a-le-neon-gcc.s +826 -0
  24. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-inplace-pl2-armv7a-neon-le-gcc.s +1245 -0
  25. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times2-SnP.h +38 -0
  26. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times4-SnP.h +45 -0
  27. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times4-on2.c +38 -0
  28. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times8-SnP.h +45 -0
  29. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times8-on2.c +38 -0
  30. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakSpongeWidth1600.link.c +1 -0
  31. data/ext/digest/kangarootwelve/keccak/armv7a/PlSnP-Fallback.inc +287 -0
  32. data/ext/digest/kangarootwelve/keccak/armv7a/ext.link.c +1 -0
  33. data/ext/digest/kangarootwelve/keccak/armv7m/KangarooTwelve.link.c +1 -0
  34. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakDuplexWidth1600.link.c +1 -0
  35. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-SnP.h +36 -0
  36. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-inplace-32bi-armv7m-le-gcc.s +1170 -0
  37. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times2-SnP.h +45 -0
  38. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times2-on1.c +37 -0
  39. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times4-SnP.h +45 -0
  40. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times4-on1.c +37 -0
  41. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times8-SnP.h +45 -0
  42. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times8-on1.c +37 -0
  43. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakSpongeWidth1600.link.c +1 -0
  44. data/ext/digest/kangarootwelve/keccak/armv7m/PlSnP-Fallback.inc +287 -0
  45. data/ext/digest/kangarootwelve/keccak/armv7m/ext.link.c +1 -0
  46. data/ext/digest/kangarootwelve/keccak/armv8a/KangarooTwelve.link.c +1 -0
  47. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakDuplexWidth1600.link.c +1 -0
  48. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-SnP.h +28 -0
  49. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-armv8a-neon.s +537 -0
  50. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times2-SnP.h +45 -0
  51. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times2-on1.c +37 -0
  52. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times4-SnP.h +45 -0
  53. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times4-on1.c +37 -0
  54. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times8-SnP.h +45 -0
  55. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times8-on1.c +37 -0
  56. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakSpongeWidth1600.link.c +1 -0
  57. data/ext/digest/kangarootwelve/keccak/armv8a/PlSnP-Fallback.inc +287 -0
  58. data/ext/digest/kangarootwelve/keccak/armv8a/ext.link.c +1 -0
  59. data/ext/digest/kangarootwelve/keccak/asmx86-64/KangarooTwelve.link.c +1 -0
  60. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakDuplexWidth1600.link.c +1 -0
  61. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-SnP.h +37 -0
  62. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times2-SnP.h +45 -0
  63. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times2-on1.c +37 -0
  64. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times4-SnP.h +45 -0
  65. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times4-on1.c +37 -0
  66. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times8-SnP.h +45 -0
  67. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times8-on1.c +37 -0
  68. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-x86-64-gas.s +1190 -0
  69. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakSpongeWidth1600.link.c +1 -0
  70. data/ext/digest/kangarootwelve/keccak/asmx86-64/PlSnP-Fallback.inc +287 -0
  71. data/ext/digest/kangarootwelve/keccak/asmx86-64/ext.link.c +1 -0
  72. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KangarooTwelve.link.c +1 -0
  73. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakDuplexWidth1600.link.c +1 -0
  74. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-SnP.h +37 -0
  75. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times2-SnP.h +45 -0
  76. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times2-on1.c +37 -0
  77. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times4-SnP.h +45 -0
  78. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times4-on1.c +37 -0
  79. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times8-SnP.h +45 -0
  80. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times8-on1.c +37 -0
  81. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-x86-64-shld-gas.s +1190 -0
  82. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakSpongeWidth1600.link.c +1 -0
  83. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/PlSnP-Fallback.inc +287 -0
  84. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/ext.link.c +1 -0
  85. data/ext/digest/kangarootwelve/keccak/avr8/KangarooTwelve.link.c +1 -0
  86. data/ext/digest/kangarootwelve/keccak/avr8/KeccakDuplexWidth1600.link.c +1 -0
  87. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-SnP.h +37 -0
  88. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-avr8-fast.s +1116 -0
  89. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times2-SnP.h +45 -0
  90. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times2-on1.c +37 -0
  91. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times4-SnP.h +45 -0
  92. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times4-on1.c +37 -0
  93. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times8-SnP.h +45 -0
  94. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times8-on1.c +37 -0
  95. data/ext/digest/kangarootwelve/keccak/avr8/KeccakSpongeWidth1600.link.c +1 -0
  96. data/ext/digest/kangarootwelve/keccak/avr8/PlSnP-Fallback.inc +287 -0
  97. data/ext/digest/kangarootwelve/keccak/avr8/ext.link.c +1 -0
  98. data/ext/digest/kangarootwelve/keccak/bulldozer/KangarooTwelve.link.c +1 -0
  99. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakDuplexWidth1600.link.c +1 -0
  100. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-SnP.h +39 -0
  101. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-XOP-config.h +6 -0
  102. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-XOP.c +473 -0
  103. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times2-SIMD128.c +954 -0
  104. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times2-SnP.h +47 -0
  105. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times4-SnP.h +45 -0
  106. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times4-on2.c +38 -0
  107. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times8-SnP.h +45 -0
  108. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times8-on2.c +38 -0
  109. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-unrolling.macros +302 -0
  110. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakSpongeWidth1600.link.c +1 -0
  111. data/ext/digest/kangarootwelve/keccak/bulldozer/PlSnP-Fallback.inc +287 -0
  112. data/ext/digest/kangarootwelve/keccak/bulldozer/SIMD128-config.h +9 -0
  113. data/ext/digest/kangarootwelve/{SnP-Relaned.h → keccak/bulldozer/SnP-Relaned.h} +13 -7
  114. data/ext/digest/kangarootwelve/keccak/bulldozer/ext.link.c +1 -0
  115. data/ext/digest/kangarootwelve/{KangarooTwelve.c → keccak/common/KangarooTwelve.c} +6 -10
  116. data/ext/digest/kangarootwelve/{KangarooTwelve.h → keccak/common/KangarooTwelve.h} +3 -7
  117. data/ext/digest/kangarootwelve/keccak/common/KeccakDuplex-common.h +37 -0
  118. data/ext/digest/kangarootwelve/keccak/common/KeccakDuplex.inc +192 -0
  119. data/ext/digest/kangarootwelve/keccak/common/KeccakDuplexWidth1600.c +34 -0
  120. data/ext/digest/kangarootwelve/keccak/common/KeccakDuplexWidth1600.h +25 -0
  121. data/ext/digest/kangarootwelve/{KeccakSponge-common.h → keccak/common/KeccakSponge-common.h} +5 -7
  122. data/ext/digest/kangarootwelve/{KeccakSponge.inc → keccak/common/KeccakSponge.inc} +6 -8
  123. data/ext/digest/kangarootwelve/{KeccakSpongeWidth1600.c → keccak/common/KeccakSpongeWidth1600.c} +6 -8
  124. data/ext/digest/kangarootwelve/{KeccakSpongeWidth1600.h → keccak/common/KeccakSpongeWidth1600.h} +5 -7
  125. data/ext/digest/kangarootwelve/{Phases.h → keccak/common/Phases.h} +3 -7
  126. data/ext/digest/kangarootwelve/{align.h → keccak/common/align.h} +5 -7
  127. data/ext/digest/kangarootwelve/{brg_endian.h → keccak/common/brg_endian.h} +0 -0
  128. data/ext/digest/kangarootwelve/keccak/compact/KangarooTwelve.link.c +1 -0
  129. data/ext/digest/kangarootwelve/keccak/compact/KeccakDuplexWidth1600.link.c +1 -0
  130. data/ext/digest/kangarootwelve/{KeccakP-1600-SnP.h → keccak/compact/KeccakP-1600-SnP.h} +7 -10
  131. data/ext/digest/kangarootwelve/{KeccakP-1600-compact64.c → keccak/compact/KeccakP-1600-compact64.c} +11 -7
  132. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times2-SnP.h +45 -0
  133. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times2-on1.c +37 -0
  134. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times4-SnP.h +45 -0
  135. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times4-on1.c +37 -0
  136. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times8-SnP.h +45 -0
  137. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times8-on1.c +37 -0
  138. data/ext/digest/kangarootwelve/keccak/compact/KeccakSpongeWidth1600.link.c +1 -0
  139. data/ext/digest/kangarootwelve/keccak/compact/PlSnP-Fallback.inc +287 -0
  140. data/ext/digest/kangarootwelve/keccak/compact/SnP-Relaned.h +140 -0
  141. data/ext/digest/kangarootwelve/keccak/compact/ext.link.c +1 -0
  142. data/ext/digest/kangarootwelve/keccak/generic32/KangarooTwelve.link.c +1 -0
  143. data/ext/digest/kangarootwelve/keccak/generic32/KeccakDuplexWidth1600.link.c +1 -0
  144. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-SnP.h +38 -0
  145. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-inplace32BI.c +1162 -0
  146. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times2-SnP.h +45 -0
  147. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times2-on1.c +37 -0
  148. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times4-SnP.h +45 -0
  149. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times4-on1.c +37 -0
  150. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times8-SnP.h +45 -0
  151. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times8-on1.c +37 -0
  152. data/ext/digest/kangarootwelve/keccak/generic32/KeccakSpongeWidth1600.link.c +1 -0
  153. data/ext/digest/kangarootwelve/keccak/generic32/PlSnP-Fallback.inc +287 -0
  154. data/ext/digest/kangarootwelve/keccak/generic32/SnP-Relaned.h +140 -0
  155. data/ext/digest/kangarootwelve/keccak/generic32/ext.link.c +1 -0
  156. data/ext/digest/kangarootwelve/keccak/generic32lc/KangarooTwelve.link.c +1 -0
  157. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakDuplexWidth1600.link.c +1 -0
  158. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-SnP.h +38 -0
  159. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-inplace32BI.c +1162 -0
  160. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times2-SnP.h +45 -0
  161. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times2-on1.c +37 -0
  162. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times4-SnP.h +45 -0
  163. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times4-on1.c +37 -0
  164. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times8-SnP.h +45 -0
  165. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times8-on1.c +37 -0
  166. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakSpongeWidth1600.link.c +1 -0
  167. data/ext/digest/kangarootwelve/keccak/generic32lc/PlSnP-Fallback.inc +287 -0
  168. data/ext/digest/kangarootwelve/keccak/generic32lc/SnP-Relaned.h +140 -0
  169. data/ext/digest/kangarootwelve/keccak/generic32lc/ext.link.c +1 -0
  170. data/ext/digest/kangarootwelve/keccak/generic64/KangarooTwelve.link.c +1 -0
  171. data/ext/digest/kangarootwelve/keccak/generic64/KeccakDuplexWidth1600.link.c +1 -0
  172. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-64.macros +2195 -0
  173. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-SnP.h +49 -0
  174. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-opt64-config.h +6 -0
  175. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-opt64.c +541 -0
  176. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times2-SnP.h +45 -0
  177. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times2-on1.c +37 -0
  178. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times4-SnP.h +45 -0
  179. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times4-on1.c +37 -0
  180. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times8-SnP.h +45 -0
  181. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times8-on1.c +37 -0
  182. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-unrolling.macros +302 -0
  183. data/ext/digest/kangarootwelve/keccak/generic64/KeccakSpongeWidth1600.link.c +1 -0
  184. data/ext/digest/kangarootwelve/keccak/generic64/PlSnP-Fallback.inc +287 -0
  185. data/ext/digest/kangarootwelve/keccak/generic64/SnP-Relaned.h +140 -0
  186. data/ext/digest/kangarootwelve/keccak/generic64/ext.link.c +1 -0
  187. data/ext/digest/kangarootwelve/keccak/generic64lc/KangarooTwelve.link.c +1 -0
  188. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakDuplexWidth1600.link.c +1 -0
  189. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-64.macros +2195 -0
  190. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-SnP.h +49 -0
  191. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-opt64-config.h +7 -0
  192. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-opt64.c +541 -0
  193. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times2-SnP.h +45 -0
  194. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times2-on1.c +37 -0
  195. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times4-SnP.h +45 -0
  196. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times4-on1.c +37 -0
  197. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times8-SnP.h +45 -0
  198. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times8-on1.c +37 -0
  199. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-unrolling.macros +302 -0
  200. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakSpongeWidth1600.link.c +1 -0
  201. data/ext/digest/kangarootwelve/keccak/generic64lc/PlSnP-Fallback.inc +287 -0
  202. data/ext/digest/kangarootwelve/keccak/generic64lc/SnP-Relaned.h +140 -0
  203. data/ext/digest/kangarootwelve/keccak/generic64lc/ext.link.c +1 -0
  204. data/ext/digest/kangarootwelve/keccak/haswell/KangarooTwelve.link.c +1 -0
  205. data/ext/digest/kangarootwelve/keccak/haswell/KeccakDuplexWidth1600.link.c +1 -0
  206. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-AVX2.s +993 -0
  207. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-SnP.h +41 -0
  208. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times2-SIMD128.c +954 -0
  209. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times2-SnP.h +47 -0
  210. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times4-SIMD256.c +1303 -0
  211. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times4-SnP.h +53 -0
  212. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times8-SnP.h +45 -0
  213. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times8-on4.c +38 -0
  214. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-unrolling.macros +302 -0
  215. data/ext/digest/kangarootwelve/keccak/haswell/KeccakSpongeWidth1600.link.c +1 -0
  216. data/ext/digest/kangarootwelve/keccak/haswell/PlSnP-Fallback.inc +287 -0
  217. data/ext/digest/kangarootwelve/keccak/haswell/SIMD128-config.h +8 -0
  218. data/ext/digest/kangarootwelve/keccak/haswell/SIMD256-config.h +7 -0
  219. data/ext/digest/kangarootwelve/keccak/haswell/ext.link.c +1 -0
  220. data/ext/digest/kangarootwelve/keccak/nehalem/KangarooTwelve.link.c +1 -0
  221. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakDuplexWidth1600.link.c +1 -0
  222. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-64.macros +2195 -0
  223. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-SnP.h +49 -0
  224. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-opt64-config.h +7 -0
  225. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-opt64.c +541 -0
  226. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times2-SIMD128.c +954 -0
  227. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times2-SnP.h +47 -0
  228. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times4-SnP.h +45 -0
  229. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times4-on2.c +38 -0
  230. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times8-SnP.h +45 -0
  231. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times8-on2.c +38 -0
  232. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-unrolling.macros +302 -0
  233. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakSpongeWidth1600.link.c +1 -0
  234. data/ext/digest/kangarootwelve/keccak/nehalem/PlSnP-Fallback.inc +287 -0
  235. data/ext/digest/kangarootwelve/keccak/nehalem/SIMD128-config.h +8 -0
  236. data/ext/digest/kangarootwelve/keccak/nehalem/SnP-Relaned.h +140 -0
  237. data/ext/digest/kangarootwelve/keccak/nehalem/ext.link.c +1 -0
  238. data/ext/digest/kangarootwelve/keccak/reference/KangarooTwelve.link.c +1 -0
  239. data/ext/digest/kangarootwelve/keccak/reference/KeccakDuplexWidth1600.link.c +1 -0
  240. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-SnP.h +41 -0
  241. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-reference.c +424 -0
  242. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-reference.h +20 -0
  243. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times2-SnP.h +45 -0
  244. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times2-on1.c +37 -0
  245. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times4-SnP.h +45 -0
  246. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times4-on1.c +37 -0
  247. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times8-SnP.h +45 -0
  248. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times8-on1.c +37 -0
  249. data/ext/digest/kangarootwelve/keccak/reference/KeccakSpongeWidth1600.link.c +1 -0
  250. data/ext/digest/kangarootwelve/keccak/reference/PlSnP-Fallback.inc +287 -0
  251. data/ext/digest/kangarootwelve/keccak/reference/displayIntermediateValues.c +176 -0
  252. data/ext/digest/kangarootwelve/keccak/reference/displayIntermediateValues.h +29 -0
  253. data/ext/digest/kangarootwelve/keccak/reference/ext.link.c +1 -0
  254. data/ext/digest/kangarootwelve/keccak/reference32bits/KangarooTwelve.link.c +1 -0
  255. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakDuplexWidth1600.link.c +1 -0
  256. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-SnP.h +41 -0
  257. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-reference.h +20 -0
  258. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-reference32BI.c +612 -0
  259. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times2-SnP.h +45 -0
  260. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times2-on1.c +37 -0
  261. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times4-SnP.h +45 -0
  262. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times4-on1.c +37 -0
  263. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times8-SnP.h +45 -0
  264. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times8-on1.c +37 -0
  265. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakSpongeWidth1600.link.c +1 -0
  266. data/ext/digest/kangarootwelve/keccak/reference32bits/PlSnP-Fallback.inc +287 -0
  267. data/ext/digest/kangarootwelve/keccak/reference32bits/displayIntermediateValues.c +176 -0
  268. data/ext/digest/kangarootwelve/keccak/reference32bits/displayIntermediateValues.h +29 -0
  269. data/ext/digest/kangarootwelve/keccak/reference32bits/ext.link.c +1 -0
  270. data/ext/digest/kangarootwelve/keccak/sandybridge/KangarooTwelve.link.c +1 -0
  271. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakDuplexWidth1600.link.c +1 -0
  272. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-64.macros +2195 -0
  273. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-SnP.h +49 -0
  274. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-opt64-config.h +8 -0
  275. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-opt64.c +541 -0
  276. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times2-SIMD128.c +954 -0
  277. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times2-SnP.h +47 -0
  278. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times4-SnP.h +45 -0
  279. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times4-on2.c +38 -0
  280. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times8-SnP.h +45 -0
  281. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times8-on2.c +38 -0
  282. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-unrolling.macros +302 -0
  283. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakSpongeWidth1600.link.c +1 -0
  284. data/ext/digest/kangarootwelve/keccak/sandybridge/PlSnP-Fallback.inc +287 -0
  285. data/ext/digest/kangarootwelve/keccak/sandybridge/SIMD128-config.h +8 -0
  286. data/ext/digest/kangarootwelve/keccak/sandybridge/SnP-Relaned.h +140 -0
  287. data/ext/digest/kangarootwelve/keccak/sandybridge/ext.link.c +1 -0
  288. data/ext/digest/kangarootwelve/keccak/skylakex/KangarooTwelve.link.c +1 -0
  289. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakDuplexWidth1600.link.c +1 -0
  290. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-AVX512-config.h +6 -0
  291. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-AVX512.c +621 -0
  292. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-SnP.h +42 -0
  293. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times2-SIMD512.c +852 -0
  294. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times2-SnP.h +49 -0
  295. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times4-SIMD512.c +883 -0
  296. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times4-SnP.h +49 -0
  297. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times8-SIMD512.c +1473 -0
  298. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times8-SnP.h +53 -0
  299. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakSpongeWidth1600.link.c +1 -0
  300. data/ext/digest/kangarootwelve/keccak/skylakex/SIMD512-2-config.h +7 -0
  301. data/ext/digest/kangarootwelve/keccak/skylakex/SIMD512-4-config.h +7 -0
  302. data/ext/digest/kangarootwelve/keccak/skylakex/SIMD512-config.h +7 -0
  303. data/ext/digest/kangarootwelve/keccak/skylakex/ext.link.c +1 -0
  304. data/lib/digest/kangarootwelve/version.rb +1 -1
  305. metadata +299 -21
@@ -0,0 +1,1245 @@
1
+ @
2
+ @ Implementation by Ronny Van Keer, hereby denoted as "the implementer".
3
+ @
4
+ @ For more information, feedback or questions, please refer to our website:
5
+ @ https://keccak.team/
6
+ @
7
+ @ To the extent possible under law, the implementer has waived all copyright
8
+ @ and related or neighboring rights to the source code in this file.
9
+ @ http://creativecommons.org/publicdomain/zero/1.0/
10
+ @
11
+ @ ---
12
+ @
13
+ @ This file implements Keccak-p[1600]×2 in a PlSnP-compatible way.
14
+ @ Please refer to PlSnP-documentation.h for more details.
15
+ @
16
+ @ This implementation comes with KeccakP-1600-times2-SnP.h in the same folder.
17
+ @ Please refer to LowLevel.build for the exact list of other files it must be combined with.
18
+ @
19
+
20
+ @ WARNING: These functions work only on little endian CPU with@ ARMv7A + NEON architecture
21
+ @ WARNING: State must be 256 bit (32 bytes) aligned, best is 64-byte (cache alignment).
22
+
23
+ @ INFO: Tested on Cortex-A8 (BeagleBone Black), using gcc.
24
+ @ INFO: Parallel execution of Keccak-P permutation on 2 lane interleaved states.
25
+
26
+ @ INFO: KeccakP1600times2_PermuteAll_12rounds() execution time is 7690 cycles on a Cortex-A8 (BeagleBone Black)
27
+
28
+
29
+
30
+ .text
31
+
32
+ @----------------------------------------------------------------------------
33
+
34
+ @ --- offsets in state
35
+ .equ _ba , 0*16
36
+ .equ _be , 1*16
37
+ .equ _bi , 2*16
38
+ .equ _bo , 3*16
39
+ .equ _bu , 4*16
40
+ .equ _ga , 5*16
41
+ .equ _ge , 6*16
42
+ .equ _gi , 7*16
43
+ .equ _go , 8*16
44
+ .equ _gu , 9*16
45
+ .equ _ka , 10*16
46
+ .equ _ke , 11*16
47
+ .equ _ki , 12*16
48
+ .equ _ko , 13*16
49
+ .equ _ku , 14*16
50
+ .equ _ma , 15*16
51
+ .equ _me , 16*16
52
+ .equ _mi , 17*16
53
+ .equ _mo , 18*16
54
+ .equ _mu , 19*16
55
+ .equ _sa , 20*16
56
+ .equ _se , 21*16
57
+ .equ _si , 22*16
58
+ .equ _so , 23*16
59
+ .equ _su , 24*16
60
+
61
+ @ --- macros for Single permutation
62
+
63
+ .macro KeccakS_ThetaRhoPiChiIota argA1, argA2, argA3, argA4, argA5
64
+
65
+ @Prepare Theta
66
+ @ Ca = Aba^Aga^Aka^Ama^Asa
67
+ @ Ce = Abe^Age^Ake^Ame^Ase
68
+ @ Ci = Abi^Agi^Aki^Ami^Asi
69
+ @ Co = Abo^Ago^Ako^Amo^Aso
70
+ @ Cu = Abu^Agu^Aku^Amu^Asu
71
+ @ De = Ca^ROL64(Ci, 1)
72
+ @ Di = Ce^ROL64(Co, 1)
73
+ @ Do = Ci^ROL64(Cu, 1)
74
+ @ Du = Co^ROL64(Ca, 1)
75
+ @ Da = Cu^ROL64(Ce, 1)
76
+ veor.64 q4, q6, q7
77
+ veor.64 q5, q9, q10
78
+ veor.64 d8, d8, d9
79
+ veor.64 d10, d10, d11
80
+ veor.64 d1, d8, d16
81
+ veor.64 d2, d10, d17
82
+
83
+ veor.64 q4, q11, q12
84
+ veor.64 q5, q14, q15
85
+ veor.64 d8, d8, d9
86
+ veor.64 d10, d10, d11
87
+ veor.64 d3, d8, d26
88
+
89
+ vadd.u64 q4, q1, q1
90
+ veor.64 d4, d10, d27
91
+ vmov.64 d0, d5
92
+ vsri.64 q4, q1, #63
93
+
94
+ vadd.u64 q5, q2, q2
95
+ veor.64 q4, q4, q0
96
+ vsri.64 q5, q2, #63
97
+ vadd.u64 d7, d1, d1
98
+ veor.64 \argA2, \argA2,d8
99
+ veor.64 q5, q5, q1
100
+
101
+ vsri.64 d7, d1, #63
102
+ vshl.u64 d1, \argA2,#44
103
+ veor.64 \argA3, \argA3,d9
104
+ veor.64 d7, d7, d4
105
+
106
+ @ Ba = argA1^Da
107
+ @ Be = ROL64((argA2^De), 44)
108
+ @ Bi = ROL64((argA3^Di), 43)
109
+ @ Bo = ROL64((argA4^Do), 21)
110
+ @ Bu = ROL64((argA5^Du), 14)
111
+ @ argA2 = Be ^((~Bi)& Bo )
112
+ @ argA3 = Bi ^((~Bo)& Bu )
113
+ @ argA4 = Bo ^((~Bu)& Ba )
114
+ @ argA5 = Bu ^((~Ba)& Be )
115
+ @ argA1 = Ba ^((~Be)& Bi )
116
+ @ argA1 ^= KeccakP1600RoundConstants[i+round]
117
+ vsri.64 d1, \argA2, #64-44
118
+ vshl.u64 d2, \argA3, #43
119
+ vldr.64 d0, [r0, #\argA1]
120
+ veor.64 \argA4, \argA4, d10
121
+ vsri.64 d2, \argA3, #64-43
122
+ vshl.u64 d3, \argA4, #21
123
+ veor.64 \argA5, \argA5, d11
124
+ veor.64 d0, d0, d7
125
+ vsri.64 d3, \argA4, #64-21
126
+ vbic.64 d5, d2, d1
127
+ vshl.u64 d4, \argA5, #14
128
+ vbic.64 \argA2, d3, d2
129
+ vld1.64 d6, [r1]!
130
+ veor.64 d5, d0
131
+ vsri.64 d4, \argA5, #64-14
132
+ veor.64 d5, d6
133
+ vbic.64 \argA5, d1, d0
134
+ vbic.64 \argA3, d4, d3
135
+ vbic.64 \argA4, d0, d4
136
+ veor.64 \argA2, d1
137
+ vstr.64 d5, [r0, #\argA1]
138
+ veor.64 \argA3, d2
139
+ veor.64 \argA4, d3
140
+ veor.64 \argA5, d4
141
+ .endm
142
+
143
+ .macro KeccakS_ThetaRhoPiChi1 argA1, argA2, argA3, argA4, argA5
144
+
145
+ @ Bi = ROL64((argA1^Da), 3)
146
+ @ Bo = ROL64((argA2^De), 45)
147
+ @ Bu = ROL64((argA3^Di), 61)
148
+ @ Ba = ROL64((argA4^Do), 28)
149
+ @ Be = ROL64((argA5^Du), 20)
150
+ @ argA1 = Ba ^((~Be)& Bi )
151
+ @ Ca ^= argA1
152
+ @ argA2 = Be ^((~Bi)& Bo )
153
+ @ argA3 = Bi ^((~Bo)& Bu )
154
+ @ argA4 = Bo ^((~Bu)& Ba )
155
+ @ argA5 = Bu ^((~Ba)& Be )
156
+ veor.64 \argA2, \argA2, d8
157
+ veor.64 \argA3, \argA3, d9
158
+ vshl.u64 d3, \argA2, #45
159
+ vldr.64 d6, [r0, #\argA1]
160
+ vshl.u64 d4, \argA3, #61
161
+ veor.64 \argA4, \argA4, d10
162
+ vsri.64 d3, \argA2, #64-45
163
+ veor.64 \argA5, \argA5, d11
164
+ vsri.64 d4, \argA3, #64-61
165
+ vshl.u64 d0, \argA4, #28
166
+ veor.64 d6, d6, d7
167
+ vshl.u64 d1, \argA5, #20
168
+ vbic.64 \argA3, d4, d3
169
+ vsri.64 d0, \argA4, #64-28
170
+ vbic.64 \argA4, d0, d4
171
+ vshl.u64 d2, d6, #3
172
+ vsri.64 d1, \argA5, #64-20
173
+ veor.64 \argA4, d3
174
+ vsri.64 d2, d6, #64-3
175
+ vbic.64 \argA5, d1, d0
176
+ vbic.64 d6, d2, d1
177
+ vbic.64 \argA2, d3, d2
178
+ veor.64 d6, d0
179
+ veor.64 \argA2, d1
180
+ vstr.64 d6, [r0, #\argA1]
181
+ veor.64 \argA3, d2
182
+ veor.64 d5, d6
183
+ veor.64 \argA5, d4
184
+ .endm
185
+
186
+ .macro KeccakS_ThetaRhoPiChi2 argA1, argA2, argA3, argA4, argA5
187
+
188
+ @ Bu = ROL64((argA1^Da), 18)
189
+ @ Ba = ROL64((argA2^De), 1)
190
+ @ Be = ROL64((argA3^Di), 6)
191
+ @ Bi = ROL64((argA4^Do), 25)
192
+ @ Bo = ROL64((argA5^Du), 8)
193
+ @ argA1 = Ba ^((~Be)& Bi )
194
+ @ Ca ^= argA1@
195
+ @ argA2 = Be ^((~Bi)& Bo )
196
+ @ argA3 = Bi ^((~Bo)& Bu )
197
+ @ argA4 = Bo ^((~Bu)& Ba )
198
+ @ argA5 = Bu ^((~Ba)& Be )
199
+ veor.64 \argA3, \argA3, d9
200
+ veor.64 \argA4, \argA4, d10
201
+ vshl.u64 d1, \argA3, #6
202
+ vldr.64 d6, [r0, #\argA1]
203
+ vshl.u64 d2, \argA4, #25
204
+ veor.64 \argA5, \argA5, d11
205
+ vsri.64 d1, \argA3, #64-6
206
+ veor.64 \argA2, \argA2, d8
207
+ vsri.64 d2, \argA4, #64-25
208
+ vext.8 d3, \argA5, \argA5, #7
209
+ veor.64 d6, d6, d7
210
+ vbic.64 \argA3, d2, d1
211
+ vadd.u64 d0, \argA2, \argA2
212
+ vbic.64 \argA4, d3, d2
213
+ vsri.64 d0, \argA2, #64-1
214
+ vshl.u64 d4, d6, #18
215
+ veor.64 \argA2, d1, \argA4
216
+ veor.64 \argA3, d0
217
+ vsri.64 d4, d6, #64-18
218
+ vstr.64 \argA3, [r0, #\argA1]
219
+ veor.64 d5, \argA3
220
+ vbic.64 \argA5, d1, d0
221
+ vbic.64 \argA3, d4, d3
222
+ vbic.64 \argA4, d0, d4
223
+ veor.64 \argA3, d2
224
+ veor.64 \argA4, d3
225
+ veor.64 \argA5, d4
226
+ .endm
227
+
228
+ .macro KeccakS_ThetaRhoPiChi3 argA1, argA2, argA3, argA4, argA5
229
+
230
+ @ Be = ROL64((argA1^Da), 36)
231
+ @ Bi = ROL64((argA2^De), 10)
232
+ @ Bo = ROL64((argA3^Di), 15)
233
+ @ Bu = ROL64((argA4^Do), 56)
234
+ @ Ba = ROL64((argA5^Du), 27)
235
+ @ argA1 = Ba ^((~Be)& Bi )
236
+ @ Ca ^= argA1
237
+ @ argA2 = Be ^((~Bi)& Bo )
238
+ @ argA3 = Bi ^((~Bo)& Bu )
239
+ @ argA4 = Bo ^((~Bu)& Ba )
240
+ @ argA5 = Bu ^((~Ba)& Be )
241
+ veor.64 \argA2, \argA2, d8
242
+ veor.64 \argA3, \argA3, d9
243
+ vshl.u64 d2, \argA2, #10
244
+ vldr.64 d6, [r0, #\argA1]
245
+ vshl.u64 d3, \argA3, #15
246
+ veor.64 \argA4, \argA4, d10
247
+ vsri.64 d2, \argA2, #64-10
248
+ vsri.64 d3, \argA3, #64-15
249
+ veor.64 \argA5, \argA5, d11
250
+ vext.8 d4, \argA4, \argA4, #1
251
+ vbic.64 \argA2, d3, d2
252
+ vshl.u64 d0, \argA5, #27
253
+ veor.64 d6, d6, d7
254
+ vbic.64 \argA3, d4, d3
255
+ vsri.64 d0, \argA5, #64-27
256
+ vshl.u64 d1, d6, #36
257
+ veor.64 \argA3, d2
258
+ vbic.64 \argA4, d0, d4
259
+ vsri.64 d1, d6, #64-36
260
+ veor.64 \argA4, d3
261
+ vbic.64 d6, d2, d1
262
+ vbic.64 \argA5, d1, d0
263
+ veor.64 d6, d0
264
+ veor.64 \argA2, d1
265
+ vstr.64 d6, [r0, #\argA1]
266
+ veor.64 d5, d6
267
+ veor.64 \argA5, d4
268
+ .endm
269
+
270
+ .macro KeccakS_ThetaRhoPiChi4 argA1, argA2, argA3, argA4, argA5
271
+
272
+ @ Bo = ROL64((argA1^Da), 41)
273
+ @ Bu = ROL64((argA2^De), 2)
274
+ @ Ba = ROL64((argA3^Di), 62)
275
+ @ Be = ROL64((argA4^Do), 55)
276
+ @ Bi = ROL64((argA5^Du), 39)
277
+ @ argA1 = Ba ^((~Be)& Bi )
278
+ @ Ca ^= argA1
279
+ @ argA2 = Be ^((~Bi)& Bo )
280
+ @ argA3 = Bi ^((~Bo)& Bu )
281
+ @ argA4 = Bo ^((~Bu)& Ba )
282
+ @ argA5 = Bu ^((~Ba)& Be )
283
+ veor.64 \argA2, \argA2, d8
284
+ veor.64 \argA3, \argA3, d9
285
+ vshl.u64 d4, \argA2, #2
286
+ veor.64 \argA5, \argA5, d11
287
+ vshl.u64 d0, \argA3, #62
288
+ vldr.64 d6, [r0, #\argA1]
289
+ vsri.64 d4, \argA2, #64-2
290
+ veor.64 \argA4, \argA4, d10
291
+ vsri.64 d0, \argA3, #64-62
292
+ vshl.u64 d1, \argA4, #55
293
+ veor.64 d6, d6, d7
294
+ vshl.u64 d2, \argA5, #39
295
+ vsri.64 d1, \argA4, #64-55
296
+ vbic.64 \argA4, d0, d4
297
+ vsri.64 d2, \argA5, #64-39
298
+ vbic.64 \argA2, d1, d0
299
+ vshl.u64 d3, d6, #41
300
+ veor.64 \argA5, d4, \argA2
301
+ vbic.64 \argA2, d2, d1
302
+ vsri.64 d3, d6, #64-41
303
+ veor.64 d6, d0, \argA2
304
+ vbic.64 \argA2, d3, d2
305
+ vbic.64 \argA3, d4, d3
306
+ veor.64 \argA2, d1
307
+ vstr.64 d6, [r0, #\argA1]
308
+ veor.64 d5, d6
309
+ veor.64 \argA3, d2
310
+ veor.64 \argA4, d3
311
+ .endm
312
+
313
+ @ --- macros for Parallel permutation
314
+
315
+ .macro m_pls start
316
+ .if \start != -1
317
+ add r3, r0, #\start
318
+ .endif
319
+ .endm
320
+
321
+ .macro m_ld qreg, next
322
+ .if \next == 16
323
+ vld1.64 { \qreg }, [r3:128]!
324
+ .else
325
+ vld1.64 { \qreg }, [r3:128], r4
326
+ .endif
327
+ .endm
328
+
329
+ .macro m_st qreg, next
330
+ .if \next == 16
331
+ vst1.64 { \qreg }, [r3:128]!
332
+ .else
333
+ vst1.64 { \qreg }, [r3:128], r4
334
+ .endif
335
+ .endm
336
+
337
+ .macro KeccakP_ThetaRhoPiChiIota ofs1, ofs2, ofs3, ofs4, ofs5, next, ofsn1
338
+
339
+ @ De = Ca ^ ROL64(Ci, 1)
340
+ @ Di = Ce ^ ROL64(Co, 1)
341
+ @ Do = Ci ^ ROL64(Cu, 1)
342
+ @ Du = Co ^ ROL64(Ca, 1)
343
+ @ Da = Cu ^ ROL64(Ce, 1)
344
+ vadd.u64 q6, q2, q2
345
+ vadd.u64 q7, q3, q3
346
+ vadd.u64 q8, q4, q4
347
+ vadd.u64 q9, q0, q0
348
+ vadd.u64 q5, q1, q1
349
+
350
+ vsri.64 q6, q2, #63
351
+ vsri.64 q7, q3, #63
352
+ vsri.64 q8, q4, #63
353
+ vsri.64 q9, q0, #63
354
+ vsri.64 q5, q1, #63
355
+
356
+ veor.64 q6, q6, q0
357
+ veor.64 q7, q7, q1
358
+ veor.64 q8, q8, q2
359
+ .if \next != 16
360
+ mov r4, #\next
361
+ .endif
362
+ veor.64 q9, q9, q3
363
+ veor.64 q5, q5, q4
364
+
365
+ @ Ba = argA1^Da
366
+ @ Be = ROL64(argA2^De, 44)
367
+ @ Bi = ROL64(argA3^Di, 43)
368
+ @ Bo = ROL64(argA4^Do, 21)
369
+ @ Bu = ROL64(argA5^Du, 14)
370
+ m_ld q10, \next
371
+ m_pls \ofs2
372
+ m_ld q1, \next
373
+ m_pls \ofs3
374
+ veor.64 q10, q10, q5
375
+ m_ld q2, \next
376
+ m_pls \ofs4
377
+ veor.64 q1, q1, q6
378
+ m_ld q3, \next
379
+ m_pls \ofs5
380
+ veor.64 q2, q2, q7
381
+ m_ld q4, \next
382
+ veor.64 q3, q3, q8
383
+ mov r6, r5
384
+ veor.64 q4, q4, q9
385
+
386
+ vst1.64 { q6 }, [r6:128]!
387
+ vshl.u64 q11, q1, #44
388
+ vshl.u64 q12, q2, #43
389
+ vst1.64 { q7 }, [r6:128]!
390
+ vshl.u64 q13, q3, #21
391
+ vshl.u64 q14, q4, #14
392
+ vst1.64 { q8 }, [r6:128]!
393
+ vsri.64 q11, q1, #64-44
394
+ vsri.64 q12, q2, #64-43
395
+ vst1.64 { q9 }, [r6:128]!
396
+ vsri.64 q13, q3, #64-21
397
+ vsri.64 q14, q4, #64-14
398
+
399
+ @ argA1 = Ba ^(~Be & Bi) ^ KeccakP1600RoundConstants[round]
400
+ @ argA2 = Be ^(~Bi & Bo)
401
+ @ argA3 = Bi ^(~Bo & Bu)
402
+ @ argA4 = Bo ^(~Bu & Ba)
403
+ @ argA5 = Bu ^(~Ba & Be)
404
+ vld1.64 { d30 }, [r1:64]
405
+ vbic.64 q0, q12, q11
406
+ vbic.64 q1, q13, q12
407
+ vld1.64 { d31 }, [r1:64]!
408
+ veor.64 q0, q10
409
+ vbic.64 q4, q11, q10
410
+ veor.64 q0, q15
411
+ vbic.64 q2, q14, q13
412
+ vbic.64 q3, q10, q14
413
+
414
+ m_pls \ofs1
415
+ veor.64 q1, q11
416
+ m_st q0, \next
417
+ m_pls \ofs2
418
+ veor.64 q2, q12
419
+ m_st q1, \next
420
+ m_pls \ofs3
421
+ veor.64 q3, q13
422
+ m_st q2, \next
423
+ m_pls \ofs4
424
+ veor.64 q4, q14
425
+ m_st q3, \next
426
+ m_pls \ofs5
427
+ m_st q4, \next
428
+ m_pls \ofsn1
429
+ .endm
430
+
431
+ .macro KeccakP_ThetaRhoPiChi ofs1, ofs2, ofs3, ofs4, ofs5, next, ofsn1, Bb1, Bb2, Bb3, Bb4, Bb5, Rr1, Rr2, Rr3, Rr4, Rr5
432
+
433
+ @ Bb1 = ROL64((argA1^Da), Rr1)
434
+ @ Bb2 = ROL64((argA2^De), Rr2)
435
+ @ Bb3 = ROL64((argA3^Di), Rr3)
436
+ @ Bb4 = ROL64((argA4^Do), Rr4)
437
+ @ Bb5 = ROL64((argA5^Du), Rr5)
438
+
439
+ .if \next != 16
440
+ mov r4, #\next
441
+ .endif
442
+
443
+ m_ld \Bb1, \next
444
+ m_pls \ofs2
445
+ m_ld \Bb2, \next
446
+ m_pls \ofs3
447
+ veor.64 q15, q5, \Bb1
448
+ m_ld \Bb3, \next
449
+ m_pls \ofs4
450
+ veor.64 q6, q6, \Bb2
451
+ m_ld \Bb4, \next
452
+ m_pls \ofs5
453
+ veor.64 q7, q7, \Bb3
454
+ m_ld \Bb5, \next
455
+ veor.64 q8, q8, \Bb4
456
+ veor.64 q9, q9, \Bb5
457
+
458
+ vshl.u64 \Bb1, q15, #\Rr1
459
+ vshl.u64 \Bb2, q6, #\Rr2
460
+ vshl.u64 \Bb3, q7, #\Rr3
461
+ vshl.u64 \Bb4, q8, #\Rr4
462
+ vshl.u64 \Bb5, q9, #\Rr5
463
+
464
+ vsri.64 \Bb1, q15, #64-\Rr1
465
+ vsri.64 \Bb2, q6, #64-\Rr2
466
+ vsri.64 \Bb3, q7, #64-\Rr3
467
+ vsri.64 \Bb4, q8, #64-\Rr4
468
+ vsri.64 \Bb5, q9, #64-\Rr5
469
+
470
+ @ argA1 = Ba ^((~Be)& Bi ), Ca ^= argA1
471
+ @ argA2 = Be ^((~Bi)& Bo ), Ce ^= argA2
472
+ @ argA3 = Bi ^((~Bo)& Bu ), Ci ^= argA3
473
+ @ argA4 = Bo ^((~Bu)& Ba ), Co ^= argA4
474
+ @ argA5 = Bu ^((~Ba)& Be ), Cu ^= argA5
475
+ vbic.64 q15, q12, q11
476
+ mov r6, r5
477
+ vbic.64 q6, q13, q12
478
+ m_pls \ofs1
479
+ vbic.64 q7, q14, q13
480
+ vbic.64 q8, q10, q14
481
+ vbic.64 q9, q11, q10
482
+
483
+ veor.64 q15, q15, q10
484
+ veor.64 q6, q6, q11
485
+
486
+ m_st q15, \next
487
+ m_pls \ofs2
488
+ veor.64 q7, q7, q12
489
+
490
+ m_st q6, \next
491
+ m_pls \ofs3
492
+ veor.64 q1, q1, q6
493
+ vld1.64 { q6 }, [r6:128]!
494
+ veor.64 q8, q8, q13
495
+
496
+ m_st q7, \next
497
+ m_pls \ofs4
498
+ veor.64 q2, q2, q7
499
+ vld1.64 { q7 }, [r6:128]!
500
+ veor.64 q9, q9, q14
501
+
502
+ m_st q8, \next
503
+ m_pls \ofs5
504
+ veor.64 q3, q3, q8
505
+
506
+ m_st q9, \next
507
+
508
+ vld1.64 { q8 }, [r6:128]!
509
+ veor.64 q4, q4, q9
510
+ m_pls \ofsn1
511
+ vld1.64 { q9 }, [r6:128]!
512
+ veor.64 q0, q0, q15
513
+ .endm
514
+
515
+ .macro KeccakP_ThetaRhoPiChi1 ofs1, ofs2, ofs3, ofs4, ofs5, next, ofsn1
516
+ KeccakP_ThetaRhoPiChi \ofs1, \ofs2, \ofs3, \ofs4, \ofs5, \next, \ofsn1, q12, q13, q14, q10, q11, 3, 45, 61, 28, 20
517
+ .endm
518
+
519
+ .macro KeccakP_ThetaRhoPiChi2 ofs1, ofs2, ofs3, ofs4, ofs5, next, ofsn1
520
+ KeccakP_ThetaRhoPiChi \ofs1, \ofs2, \ofs3, \ofs4, \ofs5, \next, \ofsn1, q14, q10, q11, q12, q13, 18, 1, 6, 25, 8
521
+ .endm
522
+
523
+ .macro KeccakP_ThetaRhoPiChi3 ofs1, ofs2, ofs3, ofs4, ofs5, next, ofsn1
524
+ KeccakP_ThetaRhoPiChi \ofs1, \ofs2, \ofs3, \ofs4, \ofs5, \next, \ofsn1, q11, q12, q13, q14, q10, 36, 10, 15, 56, 27
525
+ .endm
526
+
527
+ .macro KeccakP_ThetaRhoPiChi4 ofs1, ofs2, ofs3, ofs4, ofs5, next, ofsn1
528
+
529
+ @ Bo = ROL64((argA1^Da), 41)
530
+ @ Bu = ROL64((argA2^De), 2)
531
+ @ Ba = ROL64((argA3^Di), 62)
532
+ @ Be = ROL64((argA4^Do), 55)
533
+ @ Bi = ROL64((argA5^Du), 39)
534
+ @ KeccakChi
535
+
536
+ .if \next != 16
537
+ mov r4, #\next
538
+ .endif
539
+
540
+ m_ld q13, \next
541
+ m_pls \ofs2
542
+ m_ld q14, \next
543
+ m_pls \ofs3
544
+ veor.64 q5, q5, q13
545
+ m_ld q10, \next
546
+ m_pls \ofs4
547
+ veor.64 q6, q6, q14
548
+ m_ld q11, \next
549
+ m_pls \ofs5
550
+ veor.64 q7, q7, q10
551
+ m_ld q12, \next
552
+ veor.64 q8, q8, q11
553
+ veor.64 q9, q9, q12
554
+
555
+ vshl.u64 q13, q5, #41
556
+ vshl.u64 q14, q6, #2
557
+ vshl.u64 q10, q7, #62
558
+ vshl.u64 q11, q8, #55
559
+ vshl.u64 q12, q9, #39
560
+
561
+ vsri.64 q13, q5, #64-41
562
+ vsri.64 q14, q6, #64-2
563
+ vsri.64 q11, q8, #64-55
564
+ vsri.64 q12, q9, #64-39
565
+ vsri.64 q10, q7, #64-62
566
+
567
+ vbic.64 q5, q12, q11
568
+ vbic.64 q6, q13, q12
569
+ vbic.64 q7, q14, q13
570
+ vbic.64 q8, q10, q14
571
+ vbic.64 q9, q11, q10
572
+ veor.64 q5, q5, q10
573
+ veor.64 q6, q6, q11
574
+ veor.64 q7, q7, q12
575
+ veor.64 q8, q8, q13
576
+ m_pls \ofs1
577
+ veor.64 q9, q9, q14
578
+ m_st q5, \next
579
+ m_pls \ofs2
580
+ veor.64 q0, q0, q5
581
+ m_st q6, \next
582
+ m_pls \ofs3
583
+ veor.64 q1, q1, q6
584
+ m_st q7, \next
585
+ m_pls \ofs4
586
+ veor.64 q2, q2, q7
587
+ m_st q8, \next
588
+ m_pls \ofs5
589
+ veor.64 q3, q3, q8
590
+ m_st q9, \next
591
+ m_pls \ofsn1
592
+ veor.64 q4, q4, q9
593
+ .endm
594
+
595
+ @----------------------------------------------------------------------------
596
+ @
597
+ @ void KeccakP1600_Pl_StaticInitialize( void )
598
+ @
599
+ .align 8
600
+ .global KeccakP1600_Pl_StaticInitialize
601
+ .type KeccakP1600_Pl_StaticInitialize, %function;
602
+ KeccakP1600_Pl_StaticInitialize:
603
+ bx lr
604
+
605
+
606
+ @----------------------------------------------------------------------------
607
+ @
608
+ @ void KeccakP1600times2_InitializeAll( void *states )
609
+ @
610
+ .align 8
611
+ .global KeccakP1600times2_InitializeAll
612
+ .type KeccakP1600times2_InitializeAll, %function;
613
+ KeccakP1600times2_InitializeAll:
614
+ vmov.i64 q0, #0
615
+ vmov.i64 q1, #0
616
+ vmov.i64 q2, #0
617
+ vmov.i64 q3, #0
618
+ vstm r0!, { d0 - d7 } @ 8 (clear 8 lanes at a time)
619
+ vstm r0!, { d0 - d7 } @ 16
620
+ vstm r0!, { d0 - d7 } @ 24
621
+ vstm r0!, { d0 - d7 } @ 32
622
+ vstm r0!, { d0 - d7 } @ 40
623
+ vstm r0!, { d0 - d7 } @ 48
624
+ vstm r0!, { d0 - d1} @ 50
625
+ bx lr
626
+
627
+
628
+
629
+ @----------------------------------------------------------------------------
630
+ @
631
+ @ void KeccakP1600times2_AddByte( void *states, unsigned int instanceIndex, unsigned char byte, unsigned int offset )
632
+ @
633
+ .align 8
634
+ .global KeccakP1600times2_AddByte
635
+ .type KeccakP1600times2_AddByte, %function;
636
+ KeccakP1600times2_AddByte:
637
+ add r0, r0, r1, LSL #3 @ states += 8 * instanceIndex
638
+ lsr r1, r3, #3 @ states += (offset & ~7) * 2
639
+ add r0, r0, r1, LSL #4
640
+ and r3, r3, #7
641
+ add r0, r0, r3 @ states += offset & 7
642
+ ldrb r1, [r0]
643
+ eor r1, r1, r2
644
+ strb r1, [r0]
645
+ bx lr
646
+
647
+
648
+ @----------------------------------------------------------------------------
649
+ @
650
+ @ void KeccakP1600times2_AddBytes( void *states, unsigned int instanceIndex, const unsigned char *data,
651
+ @ unsigned int offset, unsigned int length )
652
+ @
653
+ .align 8
654
+ .global KeccakP1600times2_AddBytes
655
+ .type KeccakP1600times2_AddBytes, %function;
656
+ KeccakP1600times2_AddBytes:
657
+ add r0, r0, r1, LSL #3 @ states += 8 * instanceIndex
658
+ ldr r1, [sp, #0*4] @ r1 = length
659
+ cmp r1, #0
660
+ beq KeccakP1600times2_AddBytes_Exit
661
+ push { r4- r7 }
662
+ lsr r4, r3, #3 @ states += (offset & ~7) * 2
663
+ add r0, r0, r4, LSL #4
664
+ ands r3, r3, #7 @ .if (offset & 7) != 0
665
+ beq KeccakP1600times2_AddBytes_CheckLanes
666
+ add r0, r0, r3 @ states += offset & 7
667
+ rsb r3, r3, #8 @ lenInLane = 8 - (offset & 7)
668
+ KeccakP1600times2_AddBytes_LoopBytesFirst:
669
+ ldrb r4, [r0]
670
+ ldrb r5, [r2], #1
671
+ eor r4, r4, r5
672
+ subs r1, r1, #1
673
+ strb r4, [r0], #1
674
+ beq KeccakP1600times2_AddBytes_Done
675
+ subs r3, r3, #1
676
+ bne KeccakP1600times2_AddBytes_LoopBytesFirst
677
+ add r0, r0, #8 @ states += 8 (next lane of current state part)
678
+ KeccakP1600times2_AddBytes_CheckLanes:
679
+ lsrs r3, r1, #3
680
+ beq KeccakP1600times2_AddBytes_CheckBytesLast
681
+ KeccakP1600times2_AddBytes_LoopLanes:
682
+ ldr r4, [r0]
683
+ ldr r5, [r0, #4]
684
+ ldr r6, [r2], #4
685
+ ldr r7, [r2], #4
686
+ eor r4, r4, r6
687
+ eor r5, r5, r7
688
+ subs r3, r3, #1
689
+ str r4, [r0], #4
690
+ str r5, [r0], #12 @ states += 8 (next lane of current state part)
691
+ bne KeccakP1600times2_AddBytes_LoopLanes
692
+ KeccakP1600times2_AddBytes_CheckBytesLast:
693
+ ands r1, r1, #7
694
+ beq KeccakP1600times2_AddBytes_Done
695
+ KeccakP1600times2_AddBytes_LoopBytesLast:
696
+ ldrb r4, [r0]
697
+ ldrb r5, [r2], #1
698
+ eor r4, r4, r5
699
+ subs r1, r1, #1
700
+ strb r4, [r0], #1
701
+ bne KeccakP1600times2_AddBytes_LoopBytesLast
702
+ KeccakP1600times2_AddBytes_Done:
703
+ pop { r4- r7 }
704
+ KeccakP1600times2_AddBytes_Exit:
705
+ bx lr
706
+
707
+
708
+ @----------------------------------------------------------------------------
709
+ @
710
+ @ void KeccakP1600times2_AddLanesAll( void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset )
711
+ @
712
+ .global KeccakP1600times2_AddLanesAll
713
+ .type KeccakP1600times2_AddLanesAll, %function;
714
+ .align 8
715
+ KeccakP1600times2_AddLanesAll:
716
+ cmp r2, #0
717
+ beq KeccakP1600times2_AddLanesAll_Exit
718
+ add r3, r1, r3, LSL #3 @ r3: data + 8 * laneOffset
719
+ push {r4 - r7}
720
+ KeccakP1600times2_AddLanesAll_Loop:
721
+ ldr r4, [r1], #4 @ index 0
722
+ ldr r5, [r1], #4
723
+ ldrd r6, r7, [r0]
724
+ eor r6, r6, r4
725
+ eor r7, r7, r5
726
+ strd r6, r7, [r0], #8
727
+ ldr r4, [r3], #4 @ index 1
728
+ ldr r5, [r3], #4
729
+ ldrd r6, r7, [r0]
730
+ eor r6, r6, r4
731
+ eor r7, r7, r5
732
+ strd r6, r7, [r0], #8
733
+ subs r2, r2, #1
734
+ bne KeccakP1600times2_AddLanesAll_Loop
735
+ pop {r4 - r7}
736
+ KeccakP1600times2_AddLanesAll_Exit:
737
+ bx lr
738
+
739
+
740
+ @----------------------------------------------------------------------------
741
+ @
742
+ @ void KeccakP1600times2_OverwriteBytes( void *states, unsigned int instanceIndex, const unsigned char *data,
743
+ @ unsigned int offset, unsigned int length )
744
+ @
745
+ .align 8
746
+ .global KeccakP1600times2_OverwriteBytes
747
+ .type KeccakP1600times2_OverwriteBytes, %function;
748
+ KeccakP1600times2_OverwriteBytes:
749
+ add r0, r0, r1, LSL #3 @ states += 8 * instanceIndex
750
+ ldr r1, [sp, #0*4] @ r1 = length
751
+ cmp r1, #0
752
+ beq KeccakP1600times2_OverwriteBytes_Exit
753
+ push { r4-r5 }
754
+ lsr r4, r3, #3 @ states += (offset & ~7) * 2
755
+ add r0, r0, r4, LSL #4
756
+ ands r3, r3, #7 @ .if (offset & 7) != 0
757
+ beq KeccakP1600times2_OverwriteBytes_CheckLanes
758
+ add r0, r0, r3 @ states += offset & 7
759
+ rsb r3, r3, #8 @ lenInLane = 8 - (offset & 7)
760
+ KeccakP1600times2_OverwriteBytes_LoopBytesFirst:
761
+ ldrb r4, [r2], #1
762
+ strb r4, [r0], #1
763
+ subs r1, r1, #1
764
+ beq KeccakP1600times2_OverwriteBytes_Done
765
+ subs r3, r3, #1
766
+ bne KeccakP1600times2_OverwriteBytes_LoopBytesFirst
767
+ add r0, r0, #8 @ states += 8 (next lane of current state part)
768
+ KeccakP1600times2_OverwriteBytes_CheckLanes:
769
+ lsrs r3, r1, #3
770
+ beq KeccakP1600times2_OverwriteBytes_CheckBytesLast
771
+ KeccakP1600times2_OverwriteBytes_LoopLanes:
772
+ ldr r4, [r2], #4
773
+ ldr r5, [r2], #4
774
+ str r4, [r0], #4
775
+ str r5, [r0], #12 @ states += 8 (next lane of current state part)
776
+ subs r3, r3, #1
777
+ bne KeccakP1600times2_OverwriteBytes_LoopLanes
778
+ KeccakP1600times2_OverwriteBytes_CheckBytesLast:
779
+ ands r1, r1, #7
780
+ beq KeccakP1600times2_OverwriteBytes_Done
781
+ KeccakP1600times2_OverwriteBytes_LoopBytesLast:
782
+ ldrb r4, [r2], #1
783
+ subs r1, r1, #1
784
+ strb r4, [r0], #1
785
+ bne KeccakP1600times2_OverwriteBytes_LoopBytesLast
786
+ KeccakP1600times2_OverwriteBytes_Done:
787
+ pop { r4- r5 }
788
+ KeccakP1600times2_OverwriteBytes_Exit:
789
+ bx lr
790
+
791
+
792
+ @----------------------------------------------------------------------------
793
+ @
794
+ @ KeccakP1600times2_OverwriteLanesAll( void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset )
795
+ @
796
+ .align 8
797
+ .global KeccakP1600times2_OverwriteLanesAll
798
+ .type KeccakP1600times2_OverwriteLanesAll, %function;
799
+ KeccakP1600times2_OverwriteLanesAll:
800
+ cmp r2, #0
801
+ beq KeccakP1600times2_OverwriteLanesAll_Exit
802
+ lsls r12, r1, #32-3
803
+ bne KeccakP1600times2_OverwriteLanesAll_Unaligned
804
+ add r3, r1, r3, LSL #3 @ r3(pointer instance 1): data + 8 * laneOffset
805
+ lsrs r2, r2, #1
806
+ bcc KeccakP1600times2_OverwriteLanesAll_LoopAligned
807
+ vldm r1!, { d0 }
808
+ vldm r3!, { d1 }
809
+ vstm r0!, { d0 - d1 }
810
+ beq KeccakP1600times2_OverwriteLanesAll_Exit
811
+ KeccakP1600times2_OverwriteLanesAll_LoopAligned:
812
+ vldm r1!, { d0 }
813
+ vldm r1!, { d2 }
814
+ vldm r3!, { d1 }
815
+ vldm r3!, { d3 }
816
+ subs r2, r2, #1
817
+ vstm r0!, { d0 - d3 }
818
+ bne KeccakP1600times2_OverwriteLanesAll_LoopAligned
819
+ bx lr
820
+ KeccakP1600times2_OverwriteLanesAll_Unaligned:
821
+ add r3, r1, r3, LSL #3 @ r3(pointer instance 1): data + 8 * laneOffset
822
+ push { r4, r5 }
823
+ KeccakP1600times2_OverwriteLanesAll_LoopUnaligned:
824
+ ldr r4, [r1], #4
825
+ ldr r5, [r1], #4
826
+ strd r4, r5, [r0], #8
827
+ ldr r4, [r3], #4
828
+ ldr r5, [r3], #4
829
+ subs r2, r2, #1
830
+ strd r4, r5, [r0], #8
831
+ bne KeccakP1600times2_OverwriteLanesAll_LoopUnaligned
832
+ pop { r4, r5 }
833
+ KeccakP1600times2_OverwriteLanesAll_Exit:
834
+ bx lr
835
+
836
+
837
+ @----------------------------------------------------------------------------
838
+ @
839
+ @ void KeccakP1600times2_OverwriteWithZeroes( void *states, unsigned int instanceIndex, unsigned int byteCount )
840
+ @
841
+ .align 8
842
+ .global KeccakP1600times2_OverwriteWithZeroes
843
+ .type KeccakP1600times2_OverwriteWithZeroes, %function;
844
+ KeccakP1600times2_OverwriteWithZeroes:
845
+ add r0, r0, r1, LSL #3 @ states += 8 * instanceIndex
846
+ lsrs r1, r2, #3 @ r1: laneCount
847
+ beq KeccakP1600times2_OverwriteWithZeroes_Bytes
848
+ vmov.i64 d0, #0
849
+ KeccakP1600times2_OverwriteWithZeroes_LoopLanes:
850
+ subs r1, r1, #1
851
+ vstm r0!, { d0 }
852
+ add r0, r0, #8
853
+ bne KeccakP1600times2_OverwriteWithZeroes_LoopLanes
854
+ KeccakP1600times2_OverwriteWithZeroes_Bytes:
855
+ ands r2, r2, #7 @ r2: byteCount remaining
856
+ beq KeccakP1600times2_OverwriteWithZeroes_Exit
857
+ movs r3, #0
858
+ KeccakP1600times2_OverwriteWithZeroes_LoopBytes:
859
+ subs r2, r2, #1
860
+ strb r3, [r0], #1
861
+ bne KeccakP1600times2_OverwriteWithZeroes_LoopBytes
862
+ KeccakP1600times2_OverwriteWithZeroes_Exit:
863
+ bx lr
864
+
865
+
866
+ @----------------------------------------------------------------------------
867
+ @
868
+ @ void KeccakP1600times2_ExtractBytes( void *states, unsigned int instanceIndex, const unsigned char *data,
869
+ @ unsigned int offset, unsigned int length )
870
+ @
871
+ .align 8
872
+ .global KeccakP1600times2_ExtractBytes
873
+ .type KeccakP1600times2_ExtractBytes, %function;
874
+ KeccakP1600times2_ExtractBytes:
875
+ add r0, r0, r1, LSL #3 @ states += 8 * instanceIndex
876
+ ldr r1, [sp, #0*4] @ r1 = length
877
+ cmp r1, #0
878
+ beq KeccakP1600times2_ExtractBytes_Exit
879
+ push { r4-r5 }
880
+ lsr r4, r3, #3 @ states += (offset & ~7) * 2
881
+ add r0, r0, r4, LSL #4
882
+ ands r3, r3, #7 @ .if (offset & 7) != 0
883
+ beq KeccakP1600times2_ExtractBytes_CheckLanes
884
+ add r0, r0, r3 @ states += offset & 7
885
+ rsb r3, r3, #8 @ lenInLane = 8 - (offset & 7)
886
+ KeccakP1600times2_ExtractBytes_LoopBytesFirst:
887
+ ldrb r4, [r0], #1
888
+ strb r4, [r2], #1
889
+ subs r1, r1, #1
890
+ beq KeccakP1600times2_ExtractBytes_Done
891
+ subs r3, r3, #1
892
+ bne KeccakP1600times2_ExtractBytes_LoopBytesFirst
893
+ add r0, r0, #8 @ states += 8 (next lane of current state part)
894
+ KeccakP1600times2_ExtractBytes_CheckLanes:
895
+ lsrs r3, r1, #3
896
+ beq KeccakP1600times2_ExtractBytes_CheckBytesLast
897
+ KeccakP1600times2_ExtractBytes_LoopLanes:
898
+ ldr r4, [r0], #4
899
+ ldr r5, [r0], #12 @ states += 8 (next lane of current state part)
900
+ str r4, [r2], #4
901
+ str r5, [r2], #4
902
+ subs r3, r3, #1
903
+ bne KeccakP1600times2_ExtractBytes_LoopLanes
904
+ KeccakP1600times2_ExtractBytes_CheckBytesLast:
905
+ ands r1, r1, #7
906
+ beq KeccakP1600times2_ExtractBytes_Done
907
+ KeccakP1600times2_ExtractBytes_LoopBytesLast:
908
+ ldrb r4, [r0], #1
909
+ subs r1, r1, #1
910
+ strb r4, [r2], #1
911
+ bne KeccakP1600times2_ExtractBytes_LoopBytesLast
912
+ KeccakP1600times2_ExtractBytes_Done:
913
+ pop { r4-r5 }
914
+ KeccakP1600times2_ExtractBytes_Exit:
915
+ bx lr
916
+
917
+
918
+ @----------------------------------------------------------------------------
919
+ @
920
+ @ void KeccakP1600times2_ExtractLanesAll( const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset )
921
+ @
922
+ .align 8
923
+ .global KeccakP1600times2_ExtractLanesAll
924
+ .type KeccakP1600times2_ExtractLanesAll, %function;
925
+ KeccakP1600times2_ExtractLanesAll:
926
+ cmp r2, #0
927
+ beq KeccakP1600times2_ExtractLanesAll_Exit
928
+ lsls r12, r1, #32-3
929
+ bne KeccakP1600times2_ExtractLanesAll_Unaligned
930
+ add r3, r1, r3, LSL #3 @ r3(pointer instance 1): data + 8 * laneOffset
931
+ lsrs r2, r2, #1
932
+ bcc KeccakP1600times2_ExtractLanesAll_LoopAligned
933
+ vldm r0!, { d0 - d1 }
934
+ vstm r1!, { d0 }
935
+ vstm r3!, { d1 }
936
+ beq KeccakP1600times2_ExtractLanesAll_Exit
937
+ KeccakP1600times2_ExtractLanesAll_LoopAligned:
938
+ vldm r0!, { d0 - d3 }
939
+ subs r2, r2, #1
940
+ vstm r1!, { d0 }
941
+ vstm r1!, { d2 }
942
+ vstm r3!, { d1 }
943
+ vstm r3!, { d3 }
944
+ bne KeccakP1600times2_ExtractLanesAll_LoopAligned
945
+ bx lr
946
+ KeccakP1600times2_ExtractLanesAll_Unaligned:
947
+ add r3, r1, r3, LSL #3 @ r3(pointer instance 1): data + 8 * laneOffset
948
+ push { r4, r5 }
949
+ KeccakP1600times2_ExtractLanesAll_LoopUnaligned:
950
+ ldrd r4, r5, [r0], #8
951
+ str r4, [r1], #4
952
+ str r5, [r1], #4
953
+ ldrd r4, r5, [r0], #8
954
+ subs r2, r2, #1
955
+ str r4, [r3], #4
956
+ str r5, [r3], #4
957
+ bne KeccakP1600times2_ExtractLanesAll_LoopUnaligned
958
+ pop { r4, r5 }
959
+ KeccakP1600times2_ExtractLanesAll_Exit:
960
+ bx lr
961
+
962
+
963
+ @----------------------------------------------------------------------------
964
+ @
965
+ @ void KeccakP1600times2_ExtractAndAddBytes( void *states, unsigned int instanceIndex,
966
+ @ const unsigned char *input, unsigned char *output,
967
+ @ unsigned int offset, unsigned int length )
968
+ @
969
+ .align 8
970
+ .global KeccakP1600times2_ExtractAndAddBytes
971
+ .type KeccakP1600times2_ExtractAndAddBytes, %function;
972
+ KeccakP1600times2_ExtractAndAddBytes:
973
+ add r0, r0, r1, LSL #3 @ states += 8 * instanceIndex
974
+ ldr r1, [sp, #1*4] @ r1 = length
975
+ cmp r1, #0
976
+ beq KeccakP1600times2_ExtractAndAddBytes_Exit
977
+ push { r4 - r9 }
978
+ ldr r8, [sp, #6*4] @ r8 = offset
979
+ lsr r4, r8, #3 @ states += (offset & ~7) * 2
980
+ add r0, r0, r4, LSL #4
981
+ ands r8, r8, #7 @ .if (offset & 7) != 0
982
+ beq KeccakP1600times2_ExtractAndAddBytes_CheckLanes
983
+ add r0, r0, r8 @ states += offset & 7
984
+ rsb r8, r8, #8 @ lenInLane = 8 - (offset & 7)
985
+ KeccakP1600times2_ExtractAndAddBytes_LoopBytesFirst:
986
+ ldrb r4, [r0], #1
987
+ ldrb r5, [r2], #1
988
+ eor r4, r4, r5
989
+ strb r4, [r3], #1
990
+ subs r1, r1, #1
991
+ beq KeccakP1600times2_ExtractAndAddBytes_Done
992
+ subs r8, r8, #1
993
+ bne KeccakP1600times2_ExtractAndAddBytes_LoopBytesFirst
994
+ add r0, r0, #8 @ states += 8 (next lane of current state part)
995
+ KeccakP1600times2_ExtractAndAddBytes_CheckLanes:
996
+ lsrs r8, r1, #3
997
+ beq KeccakP1600times2_ExtractAndAddBytes_CheckBytesLast
998
+ KeccakP1600times2_ExtractAndAddBytes_LoopLanes:
999
+ ldr r4, [r0], #4
1000
+ ldr r5, [r0], #12
1001
+ ldr r6, [r2], #4
1002
+ ldr r7, [r2], #4
1003
+ eor r4, r4, r6
1004
+ eor r5, r5, r7
1005
+ str r4, [r3], #4
1006
+ str r5, [r3], #4 @ states += 8 (next lane of current state part)
1007
+ subs r8, r8, #1
1008
+ bne KeccakP1600times2_ExtractAndAddBytes_LoopLanes
1009
+ KeccakP1600times2_ExtractAndAddBytes_CheckBytesLast:
1010
+ ands r1, r1, #7
1011
+ beq KeccakP1600times2_ExtractAndAddBytes_Done
1012
+ KeccakP1600times2_ExtractAndAddBytes_LoopBytesLast:
1013
+ ldrb r4, [r0], #1
1014
+ ldrb r5, [r2], #1
1015
+ eor r4, r4, r5
1016
+ strb r4, [r3], #1
1017
+ subs r1, r1, #1
1018
+ bne KeccakP1600times2_ExtractAndAddBytes_LoopBytesLast
1019
+ KeccakP1600times2_ExtractAndAddBytes_Done:
1020
+ pop { r4 - r9 }
1021
+ KeccakP1600times2_ExtractAndAddBytes_Exit:
1022
+ bx lr
1023
+
1024
+
1025
+ @----------------------------------------------------------------------------
1026
+ @
1027
+ @ void KeccakP1600times2_ExtractAndAddLanesAll( const void *states,
1028
+ @ const unsigned char *input, unsigned char *output,
1029
+ @ unsigned int laneCount, unsigned int laneOffset )
1030
+ @
1031
+ .align 8
1032
+ .global KeccakP1600times2_ExtractAndAddLanesAll
1033
+ .type KeccakP1600times2_ExtractAndAddLanesAll, %function;
1034
+ KeccakP1600times2_ExtractAndAddLanesAll:
1035
+ cmp r3, #0
1036
+ beq KeccakP1600times2_ExtractAndAddLanesAll_Exit
1037
+ orr r12, r1, r2
1038
+ lsls r12, r12, #32-3 @ unaligned access .if input or output unaligned
1039
+ bne KeccakP1600times2_ExtractAndAddLanesAll_Unaligned
1040
+ push {r4,r5}
1041
+ ldr r12, [sp, #2*4] @ r12 = laneOffset
1042
+ lsrs r3, r3, #1
1043
+ add r4, r1, r12, LSL #3 @ r4(input instance 1): input + 8 * laneOffset
1044
+ add r5, r2, r12, LSL #3 @ r5(output instance 1): output + 8 * laneOffset
1045
+ bcc KeccakP1600times2_ExtractAndAddLanesAll_LoopAligned
1046
+ vldm r0!, { d0 - d1 }
1047
+ vldm r1!, { d2 }
1048
+ vldm r4!, { d3 }
1049
+ veor q0, q0, q1
1050
+ vstm r2!, { d0 }
1051
+ vstm r5!, { d1 }
1052
+ beq KeccakP1600times2_ExtractAndAddLanesAll_AlignedDone
1053
+ KeccakP1600times2_ExtractAndAddLanesAll_LoopAligned:
1054
+ vldm r0!, { d0 - d3 }
1055
+ vldm r1!, { d4 }
1056
+ vldm r1!, { d6 }
1057
+ vldm r4!, { d5 }
1058
+ vldm r4!, { d7 }
1059
+ subs r3, r3, #1
1060
+ veor q0, q0, q2
1061
+ veor q1, q1, q3
1062
+ vstm r2!, { d0 }
1063
+ vstm r2!, { d2 }
1064
+ vstm r5!, { d1 }
1065
+ vstm r5!, { d3 }
1066
+ bne KeccakP1600times2_ExtractAndAddLanesAll_LoopAligned
1067
+ KeccakP1600times2_ExtractAndAddLanesAll_AlignedDone:
1068
+ pop {r4,r5}
1069
+ bx lr
1070
+ KeccakP1600times2_ExtractAndAddLanesAll_Unaligned:
1071
+ push {r4-r9}
1072
+ ldr r12, [sp, #6*4] @ r12 = laneOffset
1073
+ add r4, r1, r12, LSL #3 @ r4(input instance 1): input + 8 * laneOffset
1074
+ add r5, r2, r12, LSL #3 @ r5(output instance 1): output + 8 * laneOffset
1075
+ KeccakP1600times2_ExtractAndAddLanesAll_LoopUnaligned:
1076
+ ldrd r8, r9, [r0], #8
1077
+ ldr r6, [r1], #4
1078
+ ldr r7, [r1], #4
1079
+ eor r8, r8, r6
1080
+ eor r9, r9, r7
1081
+ str r8, [r2], #4
1082
+ str r9, [r2], #4
1083
+ ldrd r8, r9, [r0], #8
1084
+ ldr r6, [r4], #4
1085
+ ldr r7, [r4], #4
1086
+ eor r8, r8, r6
1087
+ eor r9, r9, r7
1088
+ str r8, [r5], #4
1089
+ subs r3, r3, #1
1090
+ str r9, [r5], #4
1091
+ bne KeccakP1600times2_ExtractAndAddLanesAll_LoopUnaligned
1092
+ pop { r4 - r9 }
1093
+ KeccakP1600times2_ExtractAndAddLanesAll_Exit:
1094
+ bx lr
1095
+
1096
+
1097
+ @----------------------------------------------------------------------------
1098
+ @
1099
+ @ void KeccakP1600times2_PermuteAll_24rounds( void *states )
1100
+ @
1101
+ .align 8
1102
+ .global KeccakP1600times2_PermuteAll_24rounds
1103
+ .type KeccakP1600times2_PermuteAll_24rounds, %function;
1104
+ KeccakP1600times2_PermuteAll_24rounds:
1105
+ adr r1, KeccakP1600times2_Permute_RoundConstants24
1106
+ movs r2, #24
1107
+ b KeccakP1600times2_PermuteAll
1108
+
1109
+
1110
+ @----------------------------------------------------------------------------
1111
+ @
1112
+ @ void KeccakP1600times2_PermuteAll_12rounds( void *states )
1113
+ @
1114
+ .align 8
1115
+ .global KeccakP1600times2_PermuteAll_12rounds
1116
+ .type KeccakP1600times2_PermuteAll_12rounds, %function;
1117
+ KeccakP1600times2_PermuteAll_12rounds:
1118
+ adr r1, KeccakP1600times2_Permute_RoundConstants12
1119
+ movs r2, #12
1120
+ b KeccakP1600times2_PermuteAll
1121
+
1122
+
1123
+ .align 8
1124
+ KeccakP1600times2_Permute_RoundConstants24:
1125
+ .quad 0x0000000000000001
1126
+ .quad 0x0000000000008082
1127
+ .quad 0x800000000000808a
1128
+ .quad 0x8000000080008000
1129
+ .quad 0x000000000000808b
1130
+ .quad 0x0000000080000001
1131
+ .quad 0x8000000080008081
1132
+ .quad 0x8000000000008009
1133
+ .quad 0x000000000000008a
1134
+ .quad 0x0000000000000088
1135
+ .quad 0x0000000080008009
1136
+ .quad 0x000000008000000a
1137
+ KeccakP1600times2_Permute_RoundConstants12:
1138
+ .quad 0x000000008000808b
1139
+ .quad 0x800000000000008b
1140
+ .quad 0x8000000000008089
1141
+ .quad 0x8000000000008003
1142
+ .quad 0x8000000000008002
1143
+ .quad 0x8000000000000080
1144
+ .quad 0x000000000000800a
1145
+ .quad 0x800000008000000a
1146
+ .quad 0x8000000080008081
1147
+ .quad 0x8000000000008080
1148
+ .quad 0x0000000080000001
1149
+ .quad 0x8000000080008008
1150
+
1151
+ @----------------------------------------------------------------------------
1152
+ @
1153
+ @ void KeccakP1600times2_PermuteAll( void *states, void *rc, unsigned int nr )
1154
+ @
1155
+ .align 8
1156
+ KeccakP1600times2_PermuteAll:
1157
+ vpush {q4-q7}
1158
+ push {r4-r7}
1159
+ sub sp, #4*2*8+8 @allocate 4 D double lanes (plus 8bytes to allow alignment on 16 bytes)
1160
+ mov r3, r0
1161
+ add r5, sp, #8
1162
+
1163
+ @PrepareTheta
1164
+ @ Ca = ba ^ ga ^ ka ^ ma ^ sa
1165
+ @ Ce = be ^ ge ^ ke ^ me ^ se
1166
+ @ Ci = bi ^ gi ^ ki ^ mi ^ si
1167
+ @ Co = bo ^ go ^ ko ^ mo ^ so
1168
+ @ Cu = bu ^ gu ^ ku ^ mu ^ su
1169
+ vld1.64 { d0, d1, d2, d3 }, [r3:256]! @ _ba _be
1170
+ bic r5, #15
1171
+ vld1.64 { d4, d5, d6, d7 }, [r3:256]! @ _bi _bo
1172
+ vld1.64 { d8, d9, d10, d11 }, [r3:256]! @ _bu _ga
1173
+ vld1.64 { d12, d13 }, [r3:128]! @ _ge
1174
+ veor.64 q0, q0, q5
1175
+ vld1.64 { d14, d15 }, [r3:128]! @ _gi
1176
+ veor.64 q1, q1, q6
1177
+ vld1.64 { d16, d17 }, [r3:128]! @ _go
1178
+ veor.64 q2, q2, q7
1179
+ vld1.64 { d18, d19 }, [r3:128]! @ _gu
1180
+ veor.64 q3, q3, q8
1181
+ vld1.64 { d10, d11 }, [r3:128]! @ _ka
1182
+ veor.64 q4, q4, q9
1183
+ vld1.64 { d12, d13 }, [r3:128]! @ _ke
1184
+ veor.64 q0, q0, q5
1185
+ vld1.64 { d14, d15 }, [r3:128]! @ _ki
1186
+ veor.64 q1, q1, q6
1187
+ vld1.64 { d16, d17 }, [r3:128]! @ _ko
1188
+ veor.64 q2, q2, q7
1189
+ vld1.64 { d18, d19 }, [r3:128]! @ _ku
1190
+ veor.64 q3, q3, q8
1191
+ vld1.64 { d10, d11 }, [r3:128]! @ _ma
1192
+ veor.64 q4, q4, q9
1193
+ vld1.64 { d12, d13 }, [r3:128]! @ _me
1194
+ veor.64 q0, q0, q5
1195
+ vld1.64 { d14, d15 }, [r3:128]! @ _mi
1196
+ veor.64 q1, q1, q6
1197
+ vld1.64 { d16, d17 }, [r3:128]! @ _mo
1198
+ veor.64 q2, q2, q7
1199
+ vld1.64 { d18, d19 }, [r3:128]! @ _mu
1200
+ veor.64 q3, q3, q8
1201
+ vld1.64 { d10, d11 }, [r3:128]! @ _sa
1202
+ veor.64 q4, q4, q9
1203
+ vld1.64 { d12, d13 }, [r3:128]! @ _se
1204
+ veor.64 q0, q0, q5
1205
+ vld1.64 { d14, d15 }, [r3:128]! @ _si
1206
+ veor.64 q1, q1, q6
1207
+ vld1.64 { d16, d17 }, [r3:128]! @ _so
1208
+ veor.64 q2, q2, q7
1209
+ vld1.64 { d18, d19 }, [r3:128]! @ _su
1210
+ mov r3, r0
1211
+ veor.64 q3, q3, q8
1212
+ veor.64 q4, q4, q9
1213
+
1214
+ KeccakP1600times2_PermuteAll_RoundLoop:
1215
+ KeccakP_ThetaRhoPiChiIota _ba, -1, -1, -1, -1, _ge-_ba, _ka @ _ba, _ge, _ki, _mo, _su
1216
+ KeccakP_ThetaRhoPiChi1 _ka, -1, -1, _bo, -1, _me-_ka, _sa @ _ka, _me, _si, _bo, _gu
1217
+ KeccakP_ThetaRhoPiChi2 _sa, _be, -1, -1, -1, _gi-_be, _ga @ _sa, _be, _gi, _ko, _mu
1218
+ KeccakP_ThetaRhoPiChi3 _ga, -1, -1, -1, _bu, _ke-_ga, _ma @ _ga, _ke, _mi, _so, _bu
1219
+ KeccakP_ThetaRhoPiChi4 _ma, -1, _bi, -1, -1, _se-_ma, _ba @ _ma, _se, _bi, _go, _ku
1220
+
1221
+ KeccakP_ThetaRhoPiChiIota _ba, -1, _gi, -1, _ku, _me-_ba, _sa @ _ba, _me, _gi, _so, _ku
1222
+ KeccakP_ThetaRhoPiChi1 _sa, _ke, _bi, -1, _gu, _mo-_bi, _ma @ _sa, _ke, _bi, _mo, _gu
1223
+ KeccakP_ThetaRhoPiChi2 _ma, _ge, -1, _ko, _bu, _si-_ge, _ka @ _ma, _ge, _si, _ko, _bu
1224
+ KeccakP_ThetaRhoPiChi3 _ka, _be, -1, _go, -1, _mi-_be, _ga @ _ka, _be, _mi, _go, _su
1225
+ KeccakP_ThetaRhoPiChi4 _ga, -1, _ki, _bo, -1, _se-_ga, _ba @ _ga, _se, _ki, _bo, _mu
1226
+
1227
+ KeccakP_ThetaRhoPiChiIota _ba, -1, -1, _go, -1, _ke-_ba, _ma @ _ba, _ke, _si, _go, _mu
1228
+ KeccakP_ThetaRhoPiChi1 _ma, _be, -1, -1, _gu, _ki-_be, _ga @ _ma, _be, _ki, _so, _gu
1229
+ KeccakP_ThetaRhoPiChi2 _ga, -1, _bi, -1, -1, _me-_ga, _sa @ _ga, _me, _bi, _ko, _su
1230
+ KeccakP_ThetaRhoPiChi3 _sa, _ge, -1, _bo, -1, _mi-_ge, _ka @ _sa, _ge, _mi, _bo, _ku
1231
+ KeccakP_ThetaRhoPiChi4 _ka, -1, _gi, -1, _bu, _se-_ka, _ba @ _ka, _se, _gi, _mo, _bu
1232
+
1233
+ KeccakP_ThetaRhoPiChiIota _ba, -1, -1, -1, -1, _be-_ba, _ga @ _ba, _be, _bi, _bo, _bu
1234
+ KeccakP_ThetaRhoPiChi1 _ga, -1, -1, -1, -1, _ge-_ga, _ka @ _ga, _ge, _gi, _go, _gu
1235
+ KeccakP_ThetaRhoPiChi2 _ka, -1, -1, -1, -1, _ke-_ka, _ma @ _ka, _ke, _ki, _ko, _ku
1236
+ KeccakP_ThetaRhoPiChi3 _ma, -1, -1, -1, -1, _me-_ma, _sa @ _ma, _me, _mi, _mo, _mu
1237
+ subs r2, #4
1238
+ KeccakP_ThetaRhoPiChi4 _sa, -1, -1, -1, -1, _se-_sa, _ba @ _sa, _se, _si, _so, _su
1239
+ bne KeccakP1600times2_PermuteAll_RoundLoop
1240
+ add sp, #4*2*8+8 @ free 4.5 D lanes
1241
+ pop {r4-r7}
1242
+ vpop {q4-q7}
1243
+ bx lr
1244
+
1245
+