digest-kangarootwelve 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (305) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +51 -11
  3. data/Rakefile +2 -2
  4. data/digest-kangarootwelve.gemspec +322 -42
  5. data/ext/digest/kangarootwelve/ext.c +1 -1
  6. data/ext/digest/kangarootwelve/extconf.rb +13 -1
  7. data/ext/digest/kangarootwelve/keccak/armv6m/KangarooTwelve.link.c +1 -0
  8. data/ext/digest/kangarootwelve/keccak/armv6m/KeccakDuplexWidth1600.link.c +1 -0
  9. data/ext/digest/kangarootwelve/keccak/armv6m/KeccakP-1600-SnP.h +36 -0
  10. data/ext/digest/kangarootwelve/{KeccakP-1600-times2-SnP.h → keccak/armv6m/KeccakP-1600-times2-SnP.h} +10 -10
  11. data/ext/digest/kangarootwelve/{KeccakP-1600-times2-on1.c → keccak/armv6m/KeccakP-1600-times2-on1.c} +13 -7
  12. data/ext/digest/kangarootwelve/{KeccakP-1600-times4-SnP.h → keccak/armv6m/KeccakP-1600-times4-SnP.h} +10 -10
  13. data/ext/digest/kangarootwelve/{KeccakP-1600-times4-on1.c → keccak/armv6m/KeccakP-1600-times4-on1.c} +13 -7
  14. data/ext/digest/kangarootwelve/{KeccakP-1600-times8-SnP.h → keccak/armv6m/KeccakP-1600-times8-SnP.h} +10 -10
  15. data/ext/digest/kangarootwelve/{KeccakP-1600-times8-on1.c → keccak/armv6m/KeccakP-1600-times8-on1.c} +13 -7
  16. data/ext/digest/kangarootwelve/keccak/armv6m/KeccakP-1600-u2-32bi-armv6m-le-gcc.s +1334 -0
  17. data/ext/digest/kangarootwelve/keccak/armv6m/KeccakSpongeWidth1600.link.c +1 -0
  18. data/ext/digest/kangarootwelve/{PlSnP-Fallback.inc → keccak/armv6m/PlSnP-Fallback.inc} +11 -7
  19. data/ext/digest/kangarootwelve/keccak/armv6m/ext.link.c +1 -0
  20. data/ext/digest/kangarootwelve/keccak/armv7a/KangarooTwelve.link.c +1 -0
  21. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakDuplexWidth1600.link.c +1 -0
  22. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-SnP.h +37 -0
  23. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-armv7a-le-neon-gcc.s +826 -0
  24. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-inplace-pl2-armv7a-neon-le-gcc.s +1245 -0
  25. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times2-SnP.h +38 -0
  26. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times4-SnP.h +45 -0
  27. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times4-on2.c +38 -0
  28. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times8-SnP.h +45 -0
  29. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times8-on2.c +38 -0
  30. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakSpongeWidth1600.link.c +1 -0
  31. data/ext/digest/kangarootwelve/keccak/armv7a/PlSnP-Fallback.inc +287 -0
  32. data/ext/digest/kangarootwelve/keccak/armv7a/ext.link.c +1 -0
  33. data/ext/digest/kangarootwelve/keccak/armv7m/KangarooTwelve.link.c +1 -0
  34. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakDuplexWidth1600.link.c +1 -0
  35. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-SnP.h +36 -0
  36. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-inplace-32bi-armv7m-le-gcc.s +1170 -0
  37. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times2-SnP.h +45 -0
  38. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times2-on1.c +37 -0
  39. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times4-SnP.h +45 -0
  40. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times4-on1.c +37 -0
  41. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times8-SnP.h +45 -0
  42. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times8-on1.c +37 -0
  43. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakSpongeWidth1600.link.c +1 -0
  44. data/ext/digest/kangarootwelve/keccak/armv7m/PlSnP-Fallback.inc +287 -0
  45. data/ext/digest/kangarootwelve/keccak/armv7m/ext.link.c +1 -0
  46. data/ext/digest/kangarootwelve/keccak/armv8a/KangarooTwelve.link.c +1 -0
  47. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakDuplexWidth1600.link.c +1 -0
  48. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-SnP.h +28 -0
  49. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-armv8a-neon.s +537 -0
  50. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times2-SnP.h +45 -0
  51. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times2-on1.c +37 -0
  52. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times4-SnP.h +45 -0
  53. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times4-on1.c +37 -0
  54. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times8-SnP.h +45 -0
  55. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times8-on1.c +37 -0
  56. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakSpongeWidth1600.link.c +1 -0
  57. data/ext/digest/kangarootwelve/keccak/armv8a/PlSnP-Fallback.inc +287 -0
  58. data/ext/digest/kangarootwelve/keccak/armv8a/ext.link.c +1 -0
  59. data/ext/digest/kangarootwelve/keccak/asmx86-64/KangarooTwelve.link.c +1 -0
  60. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakDuplexWidth1600.link.c +1 -0
  61. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-SnP.h +37 -0
  62. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times2-SnP.h +45 -0
  63. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times2-on1.c +37 -0
  64. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times4-SnP.h +45 -0
  65. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times4-on1.c +37 -0
  66. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times8-SnP.h +45 -0
  67. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times8-on1.c +37 -0
  68. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-x86-64-gas.s +1190 -0
  69. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakSpongeWidth1600.link.c +1 -0
  70. data/ext/digest/kangarootwelve/keccak/asmx86-64/PlSnP-Fallback.inc +287 -0
  71. data/ext/digest/kangarootwelve/keccak/asmx86-64/ext.link.c +1 -0
  72. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KangarooTwelve.link.c +1 -0
  73. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakDuplexWidth1600.link.c +1 -0
  74. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-SnP.h +37 -0
  75. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times2-SnP.h +45 -0
  76. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times2-on1.c +37 -0
  77. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times4-SnP.h +45 -0
  78. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times4-on1.c +37 -0
  79. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times8-SnP.h +45 -0
  80. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times8-on1.c +37 -0
  81. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-x86-64-shld-gas.s +1190 -0
  82. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakSpongeWidth1600.link.c +1 -0
  83. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/PlSnP-Fallback.inc +287 -0
  84. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/ext.link.c +1 -0
  85. data/ext/digest/kangarootwelve/keccak/avr8/KangarooTwelve.link.c +1 -0
  86. data/ext/digest/kangarootwelve/keccak/avr8/KeccakDuplexWidth1600.link.c +1 -0
  87. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-SnP.h +37 -0
  88. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-avr8-fast.s +1116 -0
  89. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times2-SnP.h +45 -0
  90. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times2-on1.c +37 -0
  91. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times4-SnP.h +45 -0
  92. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times4-on1.c +37 -0
  93. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times8-SnP.h +45 -0
  94. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times8-on1.c +37 -0
  95. data/ext/digest/kangarootwelve/keccak/avr8/KeccakSpongeWidth1600.link.c +1 -0
  96. data/ext/digest/kangarootwelve/keccak/avr8/PlSnP-Fallback.inc +287 -0
  97. data/ext/digest/kangarootwelve/keccak/avr8/ext.link.c +1 -0
  98. data/ext/digest/kangarootwelve/keccak/bulldozer/KangarooTwelve.link.c +1 -0
  99. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakDuplexWidth1600.link.c +1 -0
  100. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-SnP.h +39 -0
  101. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-XOP-config.h +6 -0
  102. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-XOP.c +473 -0
  103. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times2-SIMD128.c +954 -0
  104. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times2-SnP.h +47 -0
  105. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times4-SnP.h +45 -0
  106. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times4-on2.c +38 -0
  107. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times8-SnP.h +45 -0
  108. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times8-on2.c +38 -0
  109. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-unrolling.macros +302 -0
  110. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakSpongeWidth1600.link.c +1 -0
  111. data/ext/digest/kangarootwelve/keccak/bulldozer/PlSnP-Fallback.inc +287 -0
  112. data/ext/digest/kangarootwelve/keccak/bulldozer/SIMD128-config.h +9 -0
  113. data/ext/digest/kangarootwelve/{SnP-Relaned.h → keccak/bulldozer/SnP-Relaned.h} +13 -7
  114. data/ext/digest/kangarootwelve/keccak/bulldozer/ext.link.c +1 -0
  115. data/ext/digest/kangarootwelve/{KangarooTwelve.c → keccak/common/KangarooTwelve.c} +6 -10
  116. data/ext/digest/kangarootwelve/{KangarooTwelve.h → keccak/common/KangarooTwelve.h} +3 -7
  117. data/ext/digest/kangarootwelve/keccak/common/KeccakDuplex-common.h +37 -0
  118. data/ext/digest/kangarootwelve/keccak/common/KeccakDuplex.inc +192 -0
  119. data/ext/digest/kangarootwelve/keccak/common/KeccakDuplexWidth1600.c +34 -0
  120. data/ext/digest/kangarootwelve/keccak/common/KeccakDuplexWidth1600.h +25 -0
  121. data/ext/digest/kangarootwelve/{KeccakSponge-common.h → keccak/common/KeccakSponge-common.h} +5 -7
  122. data/ext/digest/kangarootwelve/{KeccakSponge.inc → keccak/common/KeccakSponge.inc} +6 -8
  123. data/ext/digest/kangarootwelve/{KeccakSpongeWidth1600.c → keccak/common/KeccakSpongeWidth1600.c} +6 -8
  124. data/ext/digest/kangarootwelve/{KeccakSpongeWidth1600.h → keccak/common/KeccakSpongeWidth1600.h} +5 -7
  125. data/ext/digest/kangarootwelve/{Phases.h → keccak/common/Phases.h} +3 -7
  126. data/ext/digest/kangarootwelve/{align.h → keccak/common/align.h} +5 -7
  127. data/ext/digest/kangarootwelve/{brg_endian.h → keccak/common/brg_endian.h} +0 -0
  128. data/ext/digest/kangarootwelve/keccak/compact/KangarooTwelve.link.c +1 -0
  129. data/ext/digest/kangarootwelve/keccak/compact/KeccakDuplexWidth1600.link.c +1 -0
  130. data/ext/digest/kangarootwelve/{KeccakP-1600-SnP.h → keccak/compact/KeccakP-1600-SnP.h} +7 -10
  131. data/ext/digest/kangarootwelve/{KeccakP-1600-compact64.c → keccak/compact/KeccakP-1600-compact64.c} +11 -7
  132. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times2-SnP.h +45 -0
  133. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times2-on1.c +37 -0
  134. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times4-SnP.h +45 -0
  135. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times4-on1.c +37 -0
  136. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times8-SnP.h +45 -0
  137. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times8-on1.c +37 -0
  138. data/ext/digest/kangarootwelve/keccak/compact/KeccakSpongeWidth1600.link.c +1 -0
  139. data/ext/digest/kangarootwelve/keccak/compact/PlSnP-Fallback.inc +287 -0
  140. data/ext/digest/kangarootwelve/keccak/compact/SnP-Relaned.h +140 -0
  141. data/ext/digest/kangarootwelve/keccak/compact/ext.link.c +1 -0
  142. data/ext/digest/kangarootwelve/keccak/generic32/KangarooTwelve.link.c +1 -0
  143. data/ext/digest/kangarootwelve/keccak/generic32/KeccakDuplexWidth1600.link.c +1 -0
  144. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-SnP.h +38 -0
  145. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-inplace32BI.c +1162 -0
  146. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times2-SnP.h +45 -0
  147. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times2-on1.c +37 -0
  148. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times4-SnP.h +45 -0
  149. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times4-on1.c +37 -0
  150. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times8-SnP.h +45 -0
  151. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times8-on1.c +37 -0
  152. data/ext/digest/kangarootwelve/keccak/generic32/KeccakSpongeWidth1600.link.c +1 -0
  153. data/ext/digest/kangarootwelve/keccak/generic32/PlSnP-Fallback.inc +287 -0
  154. data/ext/digest/kangarootwelve/keccak/generic32/SnP-Relaned.h +140 -0
  155. data/ext/digest/kangarootwelve/keccak/generic32/ext.link.c +1 -0
  156. data/ext/digest/kangarootwelve/keccak/generic32lc/KangarooTwelve.link.c +1 -0
  157. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakDuplexWidth1600.link.c +1 -0
  158. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-SnP.h +38 -0
  159. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-inplace32BI.c +1162 -0
  160. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times2-SnP.h +45 -0
  161. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times2-on1.c +37 -0
  162. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times4-SnP.h +45 -0
  163. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times4-on1.c +37 -0
  164. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times8-SnP.h +45 -0
  165. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times8-on1.c +37 -0
  166. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakSpongeWidth1600.link.c +1 -0
  167. data/ext/digest/kangarootwelve/keccak/generic32lc/PlSnP-Fallback.inc +287 -0
  168. data/ext/digest/kangarootwelve/keccak/generic32lc/SnP-Relaned.h +140 -0
  169. data/ext/digest/kangarootwelve/keccak/generic32lc/ext.link.c +1 -0
  170. data/ext/digest/kangarootwelve/keccak/generic64/KangarooTwelve.link.c +1 -0
  171. data/ext/digest/kangarootwelve/keccak/generic64/KeccakDuplexWidth1600.link.c +1 -0
  172. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-64.macros +2195 -0
  173. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-SnP.h +49 -0
  174. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-opt64-config.h +6 -0
  175. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-opt64.c +541 -0
  176. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times2-SnP.h +45 -0
  177. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times2-on1.c +37 -0
  178. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times4-SnP.h +45 -0
  179. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times4-on1.c +37 -0
  180. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times8-SnP.h +45 -0
  181. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times8-on1.c +37 -0
  182. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-unrolling.macros +302 -0
  183. data/ext/digest/kangarootwelve/keccak/generic64/KeccakSpongeWidth1600.link.c +1 -0
  184. data/ext/digest/kangarootwelve/keccak/generic64/PlSnP-Fallback.inc +287 -0
  185. data/ext/digest/kangarootwelve/keccak/generic64/SnP-Relaned.h +140 -0
  186. data/ext/digest/kangarootwelve/keccak/generic64/ext.link.c +1 -0
  187. data/ext/digest/kangarootwelve/keccak/generic64lc/KangarooTwelve.link.c +1 -0
  188. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakDuplexWidth1600.link.c +1 -0
  189. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-64.macros +2195 -0
  190. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-SnP.h +49 -0
  191. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-opt64-config.h +7 -0
  192. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-opt64.c +541 -0
  193. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times2-SnP.h +45 -0
  194. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times2-on1.c +37 -0
  195. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times4-SnP.h +45 -0
  196. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times4-on1.c +37 -0
  197. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times8-SnP.h +45 -0
  198. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times8-on1.c +37 -0
  199. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-unrolling.macros +302 -0
  200. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakSpongeWidth1600.link.c +1 -0
  201. data/ext/digest/kangarootwelve/keccak/generic64lc/PlSnP-Fallback.inc +287 -0
  202. data/ext/digest/kangarootwelve/keccak/generic64lc/SnP-Relaned.h +140 -0
  203. data/ext/digest/kangarootwelve/keccak/generic64lc/ext.link.c +1 -0
  204. data/ext/digest/kangarootwelve/keccak/haswell/KangarooTwelve.link.c +1 -0
  205. data/ext/digest/kangarootwelve/keccak/haswell/KeccakDuplexWidth1600.link.c +1 -0
  206. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-AVX2.s +993 -0
  207. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-SnP.h +41 -0
  208. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times2-SIMD128.c +954 -0
  209. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times2-SnP.h +47 -0
  210. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times4-SIMD256.c +1303 -0
  211. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times4-SnP.h +53 -0
  212. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times8-SnP.h +45 -0
  213. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times8-on4.c +38 -0
  214. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-unrolling.macros +302 -0
  215. data/ext/digest/kangarootwelve/keccak/haswell/KeccakSpongeWidth1600.link.c +1 -0
  216. data/ext/digest/kangarootwelve/keccak/haswell/PlSnP-Fallback.inc +287 -0
  217. data/ext/digest/kangarootwelve/keccak/haswell/SIMD128-config.h +8 -0
  218. data/ext/digest/kangarootwelve/keccak/haswell/SIMD256-config.h +7 -0
  219. data/ext/digest/kangarootwelve/keccak/haswell/ext.link.c +1 -0
  220. data/ext/digest/kangarootwelve/keccak/nehalem/KangarooTwelve.link.c +1 -0
  221. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakDuplexWidth1600.link.c +1 -0
  222. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-64.macros +2195 -0
  223. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-SnP.h +49 -0
  224. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-opt64-config.h +7 -0
  225. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-opt64.c +541 -0
  226. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times2-SIMD128.c +954 -0
  227. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times2-SnP.h +47 -0
  228. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times4-SnP.h +45 -0
  229. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times4-on2.c +38 -0
  230. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times8-SnP.h +45 -0
  231. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times8-on2.c +38 -0
  232. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-unrolling.macros +302 -0
  233. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakSpongeWidth1600.link.c +1 -0
  234. data/ext/digest/kangarootwelve/keccak/nehalem/PlSnP-Fallback.inc +287 -0
  235. data/ext/digest/kangarootwelve/keccak/nehalem/SIMD128-config.h +8 -0
  236. data/ext/digest/kangarootwelve/keccak/nehalem/SnP-Relaned.h +140 -0
  237. data/ext/digest/kangarootwelve/keccak/nehalem/ext.link.c +1 -0
  238. data/ext/digest/kangarootwelve/keccak/reference/KangarooTwelve.link.c +1 -0
  239. data/ext/digest/kangarootwelve/keccak/reference/KeccakDuplexWidth1600.link.c +1 -0
  240. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-SnP.h +41 -0
  241. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-reference.c +424 -0
  242. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-reference.h +20 -0
  243. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times2-SnP.h +45 -0
  244. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times2-on1.c +37 -0
  245. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times4-SnP.h +45 -0
  246. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times4-on1.c +37 -0
  247. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times8-SnP.h +45 -0
  248. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times8-on1.c +37 -0
  249. data/ext/digest/kangarootwelve/keccak/reference/KeccakSpongeWidth1600.link.c +1 -0
  250. data/ext/digest/kangarootwelve/keccak/reference/PlSnP-Fallback.inc +287 -0
  251. data/ext/digest/kangarootwelve/keccak/reference/displayIntermediateValues.c +176 -0
  252. data/ext/digest/kangarootwelve/keccak/reference/displayIntermediateValues.h +29 -0
  253. data/ext/digest/kangarootwelve/keccak/reference/ext.link.c +1 -0
  254. data/ext/digest/kangarootwelve/keccak/reference32bits/KangarooTwelve.link.c +1 -0
  255. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakDuplexWidth1600.link.c +1 -0
  256. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-SnP.h +41 -0
  257. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-reference.h +20 -0
  258. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-reference32BI.c +612 -0
  259. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times2-SnP.h +45 -0
  260. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times2-on1.c +37 -0
  261. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times4-SnP.h +45 -0
  262. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times4-on1.c +37 -0
  263. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times8-SnP.h +45 -0
  264. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times8-on1.c +37 -0
  265. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakSpongeWidth1600.link.c +1 -0
  266. data/ext/digest/kangarootwelve/keccak/reference32bits/PlSnP-Fallback.inc +287 -0
  267. data/ext/digest/kangarootwelve/keccak/reference32bits/displayIntermediateValues.c +176 -0
  268. data/ext/digest/kangarootwelve/keccak/reference32bits/displayIntermediateValues.h +29 -0
  269. data/ext/digest/kangarootwelve/keccak/reference32bits/ext.link.c +1 -0
  270. data/ext/digest/kangarootwelve/keccak/sandybridge/KangarooTwelve.link.c +1 -0
  271. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakDuplexWidth1600.link.c +1 -0
  272. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-64.macros +2195 -0
  273. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-SnP.h +49 -0
  274. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-opt64-config.h +8 -0
  275. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-opt64.c +541 -0
  276. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times2-SIMD128.c +954 -0
  277. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times2-SnP.h +47 -0
  278. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times4-SnP.h +45 -0
  279. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times4-on2.c +38 -0
  280. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times8-SnP.h +45 -0
  281. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times8-on2.c +38 -0
  282. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-unrolling.macros +302 -0
  283. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakSpongeWidth1600.link.c +1 -0
  284. data/ext/digest/kangarootwelve/keccak/sandybridge/PlSnP-Fallback.inc +287 -0
  285. data/ext/digest/kangarootwelve/keccak/sandybridge/SIMD128-config.h +8 -0
  286. data/ext/digest/kangarootwelve/keccak/sandybridge/SnP-Relaned.h +140 -0
  287. data/ext/digest/kangarootwelve/keccak/sandybridge/ext.link.c +1 -0
  288. data/ext/digest/kangarootwelve/keccak/skylakex/KangarooTwelve.link.c +1 -0
  289. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakDuplexWidth1600.link.c +1 -0
  290. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-AVX512-config.h +6 -0
  291. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-AVX512.c +621 -0
  292. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-SnP.h +42 -0
  293. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times2-SIMD512.c +852 -0
  294. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times2-SnP.h +49 -0
  295. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times4-SIMD512.c +883 -0
  296. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times4-SnP.h +49 -0
  297. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times8-SIMD512.c +1473 -0
  298. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times8-SnP.h +53 -0
  299. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakSpongeWidth1600.link.c +1 -0
  300. data/ext/digest/kangarootwelve/keccak/skylakex/SIMD512-2-config.h +7 -0
  301. data/ext/digest/kangarootwelve/keccak/skylakex/SIMD512-4-config.h +7 -0
  302. data/ext/digest/kangarootwelve/keccak/skylakex/SIMD512-config.h +7 -0
  303. data/ext/digest/kangarootwelve/keccak/skylakex/ext.link.c +1 -0
  304. data/lib/digest/kangarootwelve/version.rb +1 -1
  305. metadata +299 -21
@@ -0,0 +1,1245 @@
1
+ @
2
+ @ Implementation by Ronny Van Keer, hereby denoted as "the implementer".
3
+ @
4
+ @ For more information, feedback or questions, please refer to our website:
5
+ @ https://keccak.team/
6
+ @
7
+ @ To the extent possible under law, the implementer has waived all copyright
8
+ @ and related or neighboring rights to the source code in this file.
9
+ @ http://creativecommons.org/publicdomain/zero/1.0/
10
+ @
11
+ @ ---
12
+ @
13
+ @ This file implements Keccak-p[1600]×2 in a PlSnP-compatible way.
14
+ @ Please refer to PlSnP-documentation.h for more details.
15
+ @
16
+ @ This implementation comes with KeccakP-1600-times2-SnP.h in the same folder.
17
+ @ Please refer to LowLevel.build for the exact list of other files it must be combined with.
18
+ @
19
+
20
+ @ WARNING: These functions work only on little endian CPU with@ ARMv7A + NEON architecture
21
+ @ WARNING: State must be 256 bit (32 bytes) aligned, best is 64-byte (cache alignment).
22
+
23
+ @ INFO: Tested on Cortex-A8 (BeagleBone Black), using gcc.
24
+ @ INFO: Parallel execution of Keccak-P permutation on 2 lane interleaved states.
25
+
26
+ @ INFO: KeccakP1600times2_PermuteAll_12rounds() execution time is 7690 cycles on a Cortex-A8 (BeagleBone Black)
27
+
28
+
29
+
30
+ .text
31
+
32
+ @----------------------------------------------------------------------------
33
+
34
+ @ --- offsets in state
35
+ .equ _ba , 0*16
36
+ .equ _be , 1*16
37
+ .equ _bi , 2*16
38
+ .equ _bo , 3*16
39
+ .equ _bu , 4*16
40
+ .equ _ga , 5*16
41
+ .equ _ge , 6*16
42
+ .equ _gi , 7*16
43
+ .equ _go , 8*16
44
+ .equ _gu , 9*16
45
+ .equ _ka , 10*16
46
+ .equ _ke , 11*16
47
+ .equ _ki , 12*16
48
+ .equ _ko , 13*16
49
+ .equ _ku , 14*16
50
+ .equ _ma , 15*16
51
+ .equ _me , 16*16
52
+ .equ _mi , 17*16
53
+ .equ _mo , 18*16
54
+ .equ _mu , 19*16
55
+ .equ _sa , 20*16
56
+ .equ _se , 21*16
57
+ .equ _si , 22*16
58
+ .equ _so , 23*16
59
+ .equ _su , 24*16
60
+
61
+ @ --- macros for Single permutation
62
+
63
+ .macro KeccakS_ThetaRhoPiChiIota argA1, argA2, argA3, argA4, argA5
64
+
65
+ @Prepare Theta
66
+ @ Ca = Aba^Aga^Aka^Ama^Asa
67
+ @ Ce = Abe^Age^Ake^Ame^Ase
68
+ @ Ci = Abi^Agi^Aki^Ami^Asi
69
+ @ Co = Abo^Ago^Ako^Amo^Aso
70
+ @ Cu = Abu^Agu^Aku^Amu^Asu
71
+ @ De = Ca^ROL64(Ci, 1)
72
+ @ Di = Ce^ROL64(Co, 1)
73
+ @ Do = Ci^ROL64(Cu, 1)
74
+ @ Du = Co^ROL64(Ca, 1)
75
+ @ Da = Cu^ROL64(Ce, 1)
76
+ veor.64 q4, q6, q7
77
+ veor.64 q5, q9, q10
78
+ veor.64 d8, d8, d9
79
+ veor.64 d10, d10, d11
80
+ veor.64 d1, d8, d16
81
+ veor.64 d2, d10, d17
82
+
83
+ veor.64 q4, q11, q12
84
+ veor.64 q5, q14, q15
85
+ veor.64 d8, d8, d9
86
+ veor.64 d10, d10, d11
87
+ veor.64 d3, d8, d26
88
+
89
+ vadd.u64 q4, q1, q1
90
+ veor.64 d4, d10, d27
91
+ vmov.64 d0, d5
92
+ vsri.64 q4, q1, #63
93
+
94
+ vadd.u64 q5, q2, q2
95
+ veor.64 q4, q4, q0
96
+ vsri.64 q5, q2, #63
97
+ vadd.u64 d7, d1, d1
98
+ veor.64 \argA2, \argA2,d8
99
+ veor.64 q5, q5, q1
100
+
101
+ vsri.64 d7, d1, #63
102
+ vshl.u64 d1, \argA2,#44
103
+ veor.64 \argA3, \argA3,d9
104
+ veor.64 d7, d7, d4
105
+
106
+ @ Ba = argA1^Da
107
+ @ Be = ROL64((argA2^De), 44)
108
+ @ Bi = ROL64((argA3^Di), 43)
109
+ @ Bo = ROL64((argA4^Do), 21)
110
+ @ Bu = ROL64((argA5^Du), 14)
111
+ @ argA2 = Be ^((~Bi)& Bo )
112
+ @ argA3 = Bi ^((~Bo)& Bu )
113
+ @ argA4 = Bo ^((~Bu)& Ba )
114
+ @ argA5 = Bu ^((~Ba)& Be )
115
+ @ argA1 = Ba ^((~Be)& Bi )
116
+ @ argA1 ^= KeccakP1600RoundConstants[i+round]
117
+ vsri.64 d1, \argA2, #64-44
118
+ vshl.u64 d2, \argA3, #43
119
+ vldr.64 d0, [r0, #\argA1]
120
+ veor.64 \argA4, \argA4, d10
121
+ vsri.64 d2, \argA3, #64-43
122
+ vshl.u64 d3, \argA4, #21
123
+ veor.64 \argA5, \argA5, d11
124
+ veor.64 d0, d0, d7
125
+ vsri.64 d3, \argA4, #64-21
126
+ vbic.64 d5, d2, d1
127
+ vshl.u64 d4, \argA5, #14
128
+ vbic.64 \argA2, d3, d2
129
+ vld1.64 d6, [r1]!
130
+ veor.64 d5, d0
131
+ vsri.64 d4, \argA5, #64-14
132
+ veor.64 d5, d6
133
+ vbic.64 \argA5, d1, d0
134
+ vbic.64 \argA3, d4, d3
135
+ vbic.64 \argA4, d0, d4
136
+ veor.64 \argA2, d1
137
+ vstr.64 d5, [r0, #\argA1]
138
+ veor.64 \argA3, d2
139
+ veor.64 \argA4, d3
140
+ veor.64 \argA5, d4
141
+ .endm
142
+
143
+ .macro KeccakS_ThetaRhoPiChi1 argA1, argA2, argA3, argA4, argA5
144
+
145
+ @ Bi = ROL64((argA1^Da), 3)
146
+ @ Bo = ROL64((argA2^De), 45)
147
+ @ Bu = ROL64((argA3^Di), 61)
148
+ @ Ba = ROL64((argA4^Do), 28)
149
+ @ Be = ROL64((argA5^Du), 20)
150
+ @ argA1 = Ba ^((~Be)& Bi )
151
+ @ Ca ^= argA1
152
+ @ argA2 = Be ^((~Bi)& Bo )
153
+ @ argA3 = Bi ^((~Bo)& Bu )
154
+ @ argA4 = Bo ^((~Bu)& Ba )
155
+ @ argA5 = Bu ^((~Ba)& Be )
156
+ veor.64 \argA2, \argA2, d8
157
+ veor.64 \argA3, \argA3, d9
158
+ vshl.u64 d3, \argA2, #45
159
+ vldr.64 d6, [r0, #\argA1]
160
+ vshl.u64 d4, \argA3, #61
161
+ veor.64 \argA4, \argA4, d10
162
+ vsri.64 d3, \argA2, #64-45
163
+ veor.64 \argA5, \argA5, d11
164
+ vsri.64 d4, \argA3, #64-61
165
+ vshl.u64 d0, \argA4, #28
166
+ veor.64 d6, d6, d7
167
+ vshl.u64 d1, \argA5, #20
168
+ vbic.64 \argA3, d4, d3
169
+ vsri.64 d0, \argA4, #64-28
170
+ vbic.64 \argA4, d0, d4
171
+ vshl.u64 d2, d6, #3
172
+ vsri.64 d1, \argA5, #64-20
173
+ veor.64 \argA4, d3
174
+ vsri.64 d2, d6, #64-3
175
+ vbic.64 \argA5, d1, d0
176
+ vbic.64 d6, d2, d1
177
+ vbic.64 \argA2, d3, d2
178
+ veor.64 d6, d0
179
+ veor.64 \argA2, d1
180
+ vstr.64 d6, [r0, #\argA1]
181
+ veor.64 \argA3, d2
182
+ veor.64 d5, d6
183
+ veor.64 \argA5, d4
184
+ .endm
185
+
186
+ .macro KeccakS_ThetaRhoPiChi2 argA1, argA2, argA3, argA4, argA5
187
+
188
+ @ Bu = ROL64((argA1^Da), 18)
189
+ @ Ba = ROL64((argA2^De), 1)
190
+ @ Be = ROL64((argA3^Di), 6)
191
+ @ Bi = ROL64((argA4^Do), 25)
192
+ @ Bo = ROL64((argA5^Du), 8)
193
+ @ argA1 = Ba ^((~Be)& Bi )
194
+ @ Ca ^= argA1@
195
+ @ argA2 = Be ^((~Bi)& Bo )
196
+ @ argA3 = Bi ^((~Bo)& Bu )
197
+ @ argA4 = Bo ^((~Bu)& Ba )
198
+ @ argA5 = Bu ^((~Ba)& Be )
199
+ veor.64 \argA3, \argA3, d9
200
+ veor.64 \argA4, \argA4, d10
201
+ vshl.u64 d1, \argA3, #6
202
+ vldr.64 d6, [r0, #\argA1]
203
+ vshl.u64 d2, \argA4, #25
204
+ veor.64 \argA5, \argA5, d11
205
+ vsri.64 d1, \argA3, #64-6
206
+ veor.64 \argA2, \argA2, d8
207
+ vsri.64 d2, \argA4, #64-25
208
+ vext.8 d3, \argA5, \argA5, #7
209
+ veor.64 d6, d6, d7
210
+ vbic.64 \argA3, d2, d1
211
+ vadd.u64 d0, \argA2, \argA2
212
+ vbic.64 \argA4, d3, d2
213
+ vsri.64 d0, \argA2, #64-1
214
+ vshl.u64 d4, d6, #18
215
+ veor.64 \argA2, d1, \argA4
216
+ veor.64 \argA3, d0
217
+ vsri.64 d4, d6, #64-18
218
+ vstr.64 \argA3, [r0, #\argA1]
219
+ veor.64 d5, \argA3
220
+ vbic.64 \argA5, d1, d0
221
+ vbic.64 \argA3, d4, d3
222
+ vbic.64 \argA4, d0, d4
223
+ veor.64 \argA3, d2
224
+ veor.64 \argA4, d3
225
+ veor.64 \argA5, d4
226
+ .endm
227
+
228
+ .macro KeccakS_ThetaRhoPiChi3 argA1, argA2, argA3, argA4, argA5
229
+
230
+ @ Be = ROL64((argA1^Da), 36)
231
+ @ Bi = ROL64((argA2^De), 10)
232
+ @ Bo = ROL64((argA3^Di), 15)
233
+ @ Bu = ROL64((argA4^Do), 56)
234
+ @ Ba = ROL64((argA5^Du), 27)
235
+ @ argA1 = Ba ^((~Be)& Bi )
236
+ @ Ca ^= argA1
237
+ @ argA2 = Be ^((~Bi)& Bo )
238
+ @ argA3 = Bi ^((~Bo)& Bu )
239
+ @ argA4 = Bo ^((~Bu)& Ba )
240
+ @ argA5 = Bu ^((~Ba)& Be )
241
+ veor.64 \argA2, \argA2, d8
242
+ veor.64 \argA3, \argA3, d9
243
+ vshl.u64 d2, \argA2, #10
244
+ vldr.64 d6, [r0, #\argA1]
245
+ vshl.u64 d3, \argA3, #15
246
+ veor.64 \argA4, \argA4, d10
247
+ vsri.64 d2, \argA2, #64-10
248
+ vsri.64 d3, \argA3, #64-15
249
+ veor.64 \argA5, \argA5, d11
250
+ vext.8 d4, \argA4, \argA4, #1
251
+ vbic.64 \argA2, d3, d2
252
+ vshl.u64 d0, \argA5, #27
253
+ veor.64 d6, d6, d7
254
+ vbic.64 \argA3, d4, d3
255
+ vsri.64 d0, \argA5, #64-27
256
+ vshl.u64 d1, d6, #36
257
+ veor.64 \argA3, d2
258
+ vbic.64 \argA4, d0, d4
259
+ vsri.64 d1, d6, #64-36
260
+ veor.64 \argA4, d3
261
+ vbic.64 d6, d2, d1
262
+ vbic.64 \argA5, d1, d0
263
+ veor.64 d6, d0
264
+ veor.64 \argA2, d1
265
+ vstr.64 d6, [r0, #\argA1]
266
+ veor.64 d5, d6
267
+ veor.64 \argA5, d4
268
+ .endm
269
+
270
+ .macro KeccakS_ThetaRhoPiChi4 argA1, argA2, argA3, argA4, argA5
271
+
272
+ @ Bo = ROL64((argA1^Da), 41)
273
+ @ Bu = ROL64((argA2^De), 2)
274
+ @ Ba = ROL64((argA3^Di), 62)
275
+ @ Be = ROL64((argA4^Do), 55)
276
+ @ Bi = ROL64((argA5^Du), 39)
277
+ @ argA1 = Ba ^((~Be)& Bi )
278
+ @ Ca ^= argA1
279
+ @ argA2 = Be ^((~Bi)& Bo )
280
+ @ argA3 = Bi ^((~Bo)& Bu )
281
+ @ argA4 = Bo ^((~Bu)& Ba )
282
+ @ argA5 = Bu ^((~Ba)& Be )
283
+ veor.64 \argA2, \argA2, d8
284
+ veor.64 \argA3, \argA3, d9
285
+ vshl.u64 d4, \argA2, #2
286
+ veor.64 \argA5, \argA5, d11
287
+ vshl.u64 d0, \argA3, #62
288
+ vldr.64 d6, [r0, #\argA1]
289
+ vsri.64 d4, \argA2, #64-2
290
+ veor.64 \argA4, \argA4, d10
291
+ vsri.64 d0, \argA3, #64-62
292
+ vshl.u64 d1, \argA4, #55
293
+ veor.64 d6, d6, d7
294
+ vshl.u64 d2, \argA5, #39
295
+ vsri.64 d1, \argA4, #64-55
296
+ vbic.64 \argA4, d0, d4
297
+ vsri.64 d2, \argA5, #64-39
298
+ vbic.64 \argA2, d1, d0
299
+ vshl.u64 d3, d6, #41
300
+ veor.64 \argA5, d4, \argA2
301
+ vbic.64 \argA2, d2, d1
302
+ vsri.64 d3, d6, #64-41
303
+ veor.64 d6, d0, \argA2
304
+ vbic.64 \argA2, d3, d2
305
+ vbic.64 \argA3, d4, d3
306
+ veor.64 \argA2, d1
307
+ vstr.64 d6, [r0, #\argA1]
308
+ veor.64 d5, d6
309
+ veor.64 \argA3, d2
310
+ veor.64 \argA4, d3
311
+ .endm
312
+
313
+ @ --- macros for Parallel permutation
314
+
315
+ .macro m_pls start
316
+ .if \start != -1
317
+ add r3, r0, #\start
318
+ .endif
319
+ .endm
320
+
321
+ .macro m_ld qreg, next
322
+ .if \next == 16
323
+ vld1.64 { \qreg }, [r3:128]!
324
+ .else
325
+ vld1.64 { \qreg }, [r3:128], r4
326
+ .endif
327
+ .endm
328
+
329
+ .macro m_st qreg, next
330
+ .if \next == 16
331
+ vst1.64 { \qreg }, [r3:128]!
332
+ .else
333
+ vst1.64 { \qreg }, [r3:128], r4
334
+ .endif
335
+ .endm
336
+
337
+ .macro KeccakP_ThetaRhoPiChiIota ofs1, ofs2, ofs3, ofs4, ofs5, next, ofsn1
338
+
339
+ @ De = Ca ^ ROL64(Ci, 1)
340
+ @ Di = Ce ^ ROL64(Co, 1)
341
+ @ Do = Ci ^ ROL64(Cu, 1)
342
+ @ Du = Co ^ ROL64(Ca, 1)
343
+ @ Da = Cu ^ ROL64(Ce, 1)
344
+ vadd.u64 q6, q2, q2
345
+ vadd.u64 q7, q3, q3
346
+ vadd.u64 q8, q4, q4
347
+ vadd.u64 q9, q0, q0
348
+ vadd.u64 q5, q1, q1
349
+
350
+ vsri.64 q6, q2, #63
351
+ vsri.64 q7, q3, #63
352
+ vsri.64 q8, q4, #63
353
+ vsri.64 q9, q0, #63
354
+ vsri.64 q5, q1, #63
355
+
356
+ veor.64 q6, q6, q0
357
+ veor.64 q7, q7, q1
358
+ veor.64 q8, q8, q2
359
+ .if \next != 16
360
+ mov r4, #\next
361
+ .endif
362
+ veor.64 q9, q9, q3
363
+ veor.64 q5, q5, q4
364
+
365
+ @ Ba = argA1^Da
366
+ @ Be = ROL64(argA2^De, 44)
367
+ @ Bi = ROL64(argA3^Di, 43)
368
+ @ Bo = ROL64(argA4^Do, 21)
369
+ @ Bu = ROL64(argA5^Du, 14)
370
+ m_ld q10, \next
371
+ m_pls \ofs2
372
+ m_ld q1, \next
373
+ m_pls \ofs3
374
+ veor.64 q10, q10, q5
375
+ m_ld q2, \next
376
+ m_pls \ofs4
377
+ veor.64 q1, q1, q6
378
+ m_ld q3, \next
379
+ m_pls \ofs5
380
+ veor.64 q2, q2, q7
381
+ m_ld q4, \next
382
+ veor.64 q3, q3, q8
383
+ mov r6, r5
384
+ veor.64 q4, q4, q9
385
+
386
+ vst1.64 { q6 }, [r6:128]!
387
+ vshl.u64 q11, q1, #44
388
+ vshl.u64 q12, q2, #43
389
+ vst1.64 { q7 }, [r6:128]!
390
+ vshl.u64 q13, q3, #21
391
+ vshl.u64 q14, q4, #14
392
+ vst1.64 { q8 }, [r6:128]!
393
+ vsri.64 q11, q1, #64-44
394
+ vsri.64 q12, q2, #64-43
395
+ vst1.64 { q9 }, [r6:128]!
396
+ vsri.64 q13, q3, #64-21
397
+ vsri.64 q14, q4, #64-14
398
+
399
+ @ argA1 = Ba ^(~Be & Bi) ^ KeccakP1600RoundConstants[round]
400
+ @ argA2 = Be ^(~Bi & Bo)
401
+ @ argA3 = Bi ^(~Bo & Bu)
402
+ @ argA4 = Bo ^(~Bu & Ba)
403
+ @ argA5 = Bu ^(~Ba & Be)
404
+ vld1.64 { d30 }, [r1:64]
405
+ vbic.64 q0, q12, q11
406
+ vbic.64 q1, q13, q12
407
+ vld1.64 { d31 }, [r1:64]!
408
+ veor.64 q0, q10
409
+ vbic.64 q4, q11, q10
410
+ veor.64 q0, q15
411
+ vbic.64 q2, q14, q13
412
+ vbic.64 q3, q10, q14
413
+
414
+ m_pls \ofs1
415
+ veor.64 q1, q11
416
+ m_st q0, \next
417
+ m_pls \ofs2
418
+ veor.64 q2, q12
419
+ m_st q1, \next
420
+ m_pls \ofs3
421
+ veor.64 q3, q13
422
+ m_st q2, \next
423
+ m_pls \ofs4
424
+ veor.64 q4, q14
425
+ m_st q3, \next
426
+ m_pls \ofs5
427
+ m_st q4, \next
428
+ m_pls \ofsn1
429
+ .endm
430
+
431
+ .macro KeccakP_ThetaRhoPiChi ofs1, ofs2, ofs3, ofs4, ofs5, next, ofsn1, Bb1, Bb2, Bb3, Bb4, Bb5, Rr1, Rr2, Rr3, Rr4, Rr5
432
+
433
+ @ Bb1 = ROL64((argA1^Da), Rr1)
434
+ @ Bb2 = ROL64((argA2^De), Rr2)
435
+ @ Bb3 = ROL64((argA3^Di), Rr3)
436
+ @ Bb4 = ROL64((argA4^Do), Rr4)
437
+ @ Bb5 = ROL64((argA5^Du), Rr5)
438
+
439
+ .if \next != 16
440
+ mov r4, #\next
441
+ .endif
442
+
443
+ m_ld \Bb1, \next
444
+ m_pls \ofs2
445
+ m_ld \Bb2, \next
446
+ m_pls \ofs3
447
+ veor.64 q15, q5, \Bb1
448
+ m_ld \Bb3, \next
449
+ m_pls \ofs4
450
+ veor.64 q6, q6, \Bb2
451
+ m_ld \Bb4, \next
452
+ m_pls \ofs5
453
+ veor.64 q7, q7, \Bb3
454
+ m_ld \Bb5, \next
455
+ veor.64 q8, q8, \Bb4
456
+ veor.64 q9, q9, \Bb5
457
+
458
+ vshl.u64 \Bb1, q15, #\Rr1
459
+ vshl.u64 \Bb2, q6, #\Rr2
460
+ vshl.u64 \Bb3, q7, #\Rr3
461
+ vshl.u64 \Bb4, q8, #\Rr4
462
+ vshl.u64 \Bb5, q9, #\Rr5
463
+
464
+ vsri.64 \Bb1, q15, #64-\Rr1
465
+ vsri.64 \Bb2, q6, #64-\Rr2
466
+ vsri.64 \Bb3, q7, #64-\Rr3
467
+ vsri.64 \Bb4, q8, #64-\Rr4
468
+ vsri.64 \Bb5, q9, #64-\Rr5
469
+
470
+ @ argA1 = Ba ^((~Be)& Bi ), Ca ^= argA1
471
+ @ argA2 = Be ^((~Bi)& Bo ), Ce ^= argA2
472
+ @ argA3 = Bi ^((~Bo)& Bu ), Ci ^= argA3
473
+ @ argA4 = Bo ^((~Bu)& Ba ), Co ^= argA4
474
+ @ argA5 = Bu ^((~Ba)& Be ), Cu ^= argA5
475
+ vbic.64 q15, q12, q11
476
+ mov r6, r5
477
+ vbic.64 q6, q13, q12
478
+ m_pls \ofs1
479
+ vbic.64 q7, q14, q13
480
+ vbic.64 q8, q10, q14
481
+ vbic.64 q9, q11, q10
482
+
483
+ veor.64 q15, q15, q10
484
+ veor.64 q6, q6, q11
485
+
486
+ m_st q15, \next
487
+ m_pls \ofs2
488
+ veor.64 q7, q7, q12
489
+
490
+ m_st q6, \next
491
+ m_pls \ofs3
492
+ veor.64 q1, q1, q6
493
+ vld1.64 { q6 }, [r6:128]!
494
+ veor.64 q8, q8, q13
495
+
496
+ m_st q7, \next
497
+ m_pls \ofs4
498
+ veor.64 q2, q2, q7
499
+ vld1.64 { q7 }, [r6:128]!
500
+ veor.64 q9, q9, q14
501
+
502
+ m_st q8, \next
503
+ m_pls \ofs5
504
+ veor.64 q3, q3, q8
505
+
506
+ m_st q9, \next
507
+
508
+ vld1.64 { q8 }, [r6:128]!
509
+ veor.64 q4, q4, q9
510
+ m_pls \ofsn1
511
+ vld1.64 { q9 }, [r6:128]!
512
+ veor.64 q0, q0, q15
513
+ .endm
514
+
515
+ .macro KeccakP_ThetaRhoPiChi1 ofs1, ofs2, ofs3, ofs4, ofs5, next, ofsn1
516
+ KeccakP_ThetaRhoPiChi \ofs1, \ofs2, \ofs3, \ofs4, \ofs5, \next, \ofsn1, q12, q13, q14, q10, q11, 3, 45, 61, 28, 20
517
+ .endm
518
+
519
+ .macro KeccakP_ThetaRhoPiChi2 ofs1, ofs2, ofs3, ofs4, ofs5, next, ofsn1
520
+ KeccakP_ThetaRhoPiChi \ofs1, \ofs2, \ofs3, \ofs4, \ofs5, \next, \ofsn1, q14, q10, q11, q12, q13, 18, 1, 6, 25, 8
521
+ .endm
522
+
523
+ .macro KeccakP_ThetaRhoPiChi3 ofs1, ofs2, ofs3, ofs4, ofs5, next, ofsn1
524
+ KeccakP_ThetaRhoPiChi \ofs1, \ofs2, \ofs3, \ofs4, \ofs5, \next, \ofsn1, q11, q12, q13, q14, q10, 36, 10, 15, 56, 27
525
+ .endm
526
+
527
+ .macro KeccakP_ThetaRhoPiChi4 ofs1, ofs2, ofs3, ofs4, ofs5, next, ofsn1
528
+
529
+ @ Bo = ROL64((argA1^Da), 41)
530
+ @ Bu = ROL64((argA2^De), 2)
531
+ @ Ba = ROL64((argA3^Di), 62)
532
+ @ Be = ROL64((argA4^Do), 55)
533
+ @ Bi = ROL64((argA5^Du), 39)
534
+ @ KeccakChi
535
+
536
+ .if \next != 16
537
+ mov r4, #\next
538
+ .endif
539
+
540
+ m_ld q13, \next
541
+ m_pls \ofs2
542
+ m_ld q14, \next
543
+ m_pls \ofs3
544
+ veor.64 q5, q5, q13
545
+ m_ld q10, \next
546
+ m_pls \ofs4
547
+ veor.64 q6, q6, q14
548
+ m_ld q11, \next
549
+ m_pls \ofs5
550
+ veor.64 q7, q7, q10
551
+ m_ld q12, \next
552
+ veor.64 q8, q8, q11
553
+ veor.64 q9, q9, q12
554
+
555
+ vshl.u64 q13, q5, #41
556
+ vshl.u64 q14, q6, #2
557
+ vshl.u64 q10, q7, #62
558
+ vshl.u64 q11, q8, #55
559
+ vshl.u64 q12, q9, #39
560
+
561
+ vsri.64 q13, q5, #64-41
562
+ vsri.64 q14, q6, #64-2
563
+ vsri.64 q11, q8, #64-55
564
+ vsri.64 q12, q9, #64-39
565
+ vsri.64 q10, q7, #64-62
566
+
567
+ vbic.64 q5, q12, q11
568
+ vbic.64 q6, q13, q12
569
+ vbic.64 q7, q14, q13
570
+ vbic.64 q8, q10, q14
571
+ vbic.64 q9, q11, q10
572
+ veor.64 q5, q5, q10
573
+ veor.64 q6, q6, q11
574
+ veor.64 q7, q7, q12
575
+ veor.64 q8, q8, q13
576
+ m_pls \ofs1
577
+ veor.64 q9, q9, q14
578
+ m_st q5, \next
579
+ m_pls \ofs2
580
+ veor.64 q0, q0, q5
581
+ m_st q6, \next
582
+ m_pls \ofs3
583
+ veor.64 q1, q1, q6
584
+ m_st q7, \next
585
+ m_pls \ofs4
586
+ veor.64 q2, q2, q7
587
+ m_st q8, \next
588
+ m_pls \ofs5
589
+ veor.64 q3, q3, q8
590
+ m_st q9, \next
591
+ m_pls \ofsn1
592
+ veor.64 q4, q4, q9
593
+ .endm
594
+
595
+ @----------------------------------------------------------------------------
596
+ @
597
+ @ void KeccakP1600_Pl_StaticInitialize( void )
598
+ @
599
+ .align 8
600
+ .global KeccakP1600_Pl_StaticInitialize
601
+ .type KeccakP1600_Pl_StaticInitialize, %function;
602
+ KeccakP1600_Pl_StaticInitialize:
603
+ bx lr
604
+
605
+
606
+ @----------------------------------------------------------------------------
607
+ @
608
+ @ void KeccakP1600times2_InitializeAll( void *states )
609
+ @
610
+ .align 8
611
+ .global KeccakP1600times2_InitializeAll
612
+ .type KeccakP1600times2_InitializeAll, %function;
613
+ KeccakP1600times2_InitializeAll:
614
+ vmov.i64 q0, #0
615
+ vmov.i64 q1, #0
616
+ vmov.i64 q2, #0
617
+ vmov.i64 q3, #0
618
+ vstm r0!, { d0 - d7 } @ 8 (clear 8 lanes at a time)
619
+ vstm r0!, { d0 - d7 } @ 16
620
+ vstm r0!, { d0 - d7 } @ 24
621
+ vstm r0!, { d0 - d7 } @ 32
622
+ vstm r0!, { d0 - d7 } @ 40
623
+ vstm r0!, { d0 - d7 } @ 48
624
+ vstm r0!, { d0 - d1} @ 50
625
+ bx lr
626
+
627
+
628
+
629
+ @----------------------------------------------------------------------------
630
+ @
631
+ @ void KeccakP1600times2_AddByte( void *states, unsigned int instanceIndex, unsigned char byte, unsigned int offset )
632
+ @
633
+ .align 8
634
+ .global KeccakP1600times2_AddByte
635
+ .type KeccakP1600times2_AddByte, %function;
636
+ KeccakP1600times2_AddByte:
637
+ add r0, r0, r1, LSL #3 @ states += 8 * instanceIndex
638
+ lsr r1, r3, #3 @ states += (offset & ~7) * 2
639
+ add r0, r0, r1, LSL #4
640
+ and r3, r3, #7
641
+ add r0, r0, r3 @ states += offset & 7
642
+ ldrb r1, [r0]
643
+ eor r1, r1, r2
644
+ strb r1, [r0]
645
+ bx lr
646
+
647
+
648
+ @----------------------------------------------------------------------------
649
+ @
650
+ @ void KeccakP1600times2_AddBytes( void *states, unsigned int instanceIndex, const unsigned char *data,
651
+ @ unsigned int offset, unsigned int length )
652
+ @
653
+ .align 8
654
+ .global KeccakP1600times2_AddBytes
655
+ .type KeccakP1600times2_AddBytes, %function;
656
+ KeccakP1600times2_AddBytes:
657
+ add r0, r0, r1, LSL #3 @ states += 8 * instanceIndex
658
+ ldr r1, [sp, #0*4] @ r1 = length
659
+ cmp r1, #0
660
+ beq KeccakP1600times2_AddBytes_Exit
661
+ push { r4- r7 }
662
+ lsr r4, r3, #3 @ states += (offset & ~7) * 2
663
+ add r0, r0, r4, LSL #4
664
+ ands r3, r3, #7 @ .if (offset & 7) != 0
665
+ beq KeccakP1600times2_AddBytes_CheckLanes
666
+ add r0, r0, r3 @ states += offset & 7
667
+ rsb r3, r3, #8 @ lenInLane = 8 - (offset & 7)
668
+ KeccakP1600times2_AddBytes_LoopBytesFirst:
669
+ ldrb r4, [r0]
670
+ ldrb r5, [r2], #1
671
+ eor r4, r4, r5
672
+ subs r1, r1, #1
673
+ strb r4, [r0], #1
674
+ beq KeccakP1600times2_AddBytes_Done
675
+ subs r3, r3, #1
676
+ bne KeccakP1600times2_AddBytes_LoopBytesFirst
677
+ add r0, r0, #8 @ states += 8 (next lane of current state part)
678
+ KeccakP1600times2_AddBytes_CheckLanes:
679
+ lsrs r3, r1, #3
680
+ beq KeccakP1600times2_AddBytes_CheckBytesLast
681
+ KeccakP1600times2_AddBytes_LoopLanes:
682
+ ldr r4, [r0]
683
+ ldr r5, [r0, #4]
684
+ ldr r6, [r2], #4
685
+ ldr r7, [r2], #4
686
+ eor r4, r4, r6
687
+ eor r5, r5, r7
688
+ subs r3, r3, #1
689
+ str r4, [r0], #4
690
+ str r5, [r0], #12 @ states += 8 (next lane of current state part)
691
+ bne KeccakP1600times2_AddBytes_LoopLanes
692
+ KeccakP1600times2_AddBytes_CheckBytesLast:
693
+ ands r1, r1, #7
694
+ beq KeccakP1600times2_AddBytes_Done
695
+ KeccakP1600times2_AddBytes_LoopBytesLast:
696
+ ldrb r4, [r0]
697
+ ldrb r5, [r2], #1
698
+ eor r4, r4, r5
699
+ subs r1, r1, #1
700
+ strb r4, [r0], #1
701
+ bne KeccakP1600times2_AddBytes_LoopBytesLast
702
+ KeccakP1600times2_AddBytes_Done:
703
+ pop { r4- r7 }
704
+ KeccakP1600times2_AddBytes_Exit:
705
+ bx lr
706
+
707
+
708
+ @----------------------------------------------------------------------------
709
+ @
710
+ @ void KeccakP1600times2_AddLanesAll( void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset )
711
+ @
712
+ .global KeccakP1600times2_AddLanesAll
713
+ .type KeccakP1600times2_AddLanesAll, %function;
714
+ .align 8
715
+ KeccakP1600times2_AddLanesAll:
716
+ cmp r2, #0
717
+ beq KeccakP1600times2_AddLanesAll_Exit
718
+ add r3, r1, r3, LSL #3 @ r3: data + 8 * laneOffset
719
+ push {r4 - r7}
720
+ KeccakP1600times2_AddLanesAll_Loop:
721
+ ldr r4, [r1], #4 @ index 0
722
+ ldr r5, [r1], #4
723
+ ldrd r6, r7, [r0]
724
+ eor r6, r6, r4
725
+ eor r7, r7, r5
726
+ strd r6, r7, [r0], #8
727
+ ldr r4, [r3], #4 @ index 1
728
+ ldr r5, [r3], #4
729
+ ldrd r6, r7, [r0]
730
+ eor r6, r6, r4
731
+ eor r7, r7, r5
732
+ strd r6, r7, [r0], #8
733
+ subs r2, r2, #1
734
+ bne KeccakP1600times2_AddLanesAll_Loop
735
+ pop {r4 - r7}
736
+ KeccakP1600times2_AddLanesAll_Exit:
737
+ bx lr
738
+
739
+
740
+ @----------------------------------------------------------------------------
741
+ @
742
+ @ void KeccakP1600times2_OverwriteBytes( void *states, unsigned int instanceIndex, const unsigned char *data,
743
+ @ unsigned int offset, unsigned int length )
744
+ @
745
+ .align 8
746
+ .global KeccakP1600times2_OverwriteBytes
747
+ .type KeccakP1600times2_OverwriteBytes, %function;
748
+ KeccakP1600times2_OverwriteBytes:
749
+ add r0, r0, r1, LSL #3 @ states += 8 * instanceIndex
750
+ ldr r1, [sp, #0*4] @ r1 = length
751
+ cmp r1, #0
752
+ beq KeccakP1600times2_OverwriteBytes_Exit
753
+ push { r4-r5 }
754
+ lsr r4, r3, #3 @ states += (offset & ~7) * 2
755
+ add r0, r0, r4, LSL #4
756
+ ands r3, r3, #7 @ .if (offset & 7) != 0
757
+ beq KeccakP1600times2_OverwriteBytes_CheckLanes
758
+ add r0, r0, r3 @ states += offset & 7
759
+ rsb r3, r3, #8 @ lenInLane = 8 - (offset & 7)
760
+ KeccakP1600times2_OverwriteBytes_LoopBytesFirst:
761
+ ldrb r4, [r2], #1
762
+ strb r4, [r0], #1
763
+ subs r1, r1, #1
764
+ beq KeccakP1600times2_OverwriteBytes_Done
765
+ subs r3, r3, #1
766
+ bne KeccakP1600times2_OverwriteBytes_LoopBytesFirst
767
+ add r0, r0, #8 @ states += 8 (next lane of current state part)
768
+ KeccakP1600times2_OverwriteBytes_CheckLanes:
769
+ lsrs r3, r1, #3
770
+ beq KeccakP1600times2_OverwriteBytes_CheckBytesLast
771
+ KeccakP1600times2_OverwriteBytes_LoopLanes:
772
+ ldr r4, [r2], #4
773
+ ldr r5, [r2], #4
774
+ str r4, [r0], #4
775
+ str r5, [r0], #12 @ states += 8 (next lane of current state part)
776
+ subs r3, r3, #1
777
+ bne KeccakP1600times2_OverwriteBytes_LoopLanes
778
+ KeccakP1600times2_OverwriteBytes_CheckBytesLast:
779
+ ands r1, r1, #7
780
+ beq KeccakP1600times2_OverwriteBytes_Done
781
+ KeccakP1600times2_OverwriteBytes_LoopBytesLast:
782
+ ldrb r4, [r2], #1
783
+ subs r1, r1, #1
784
+ strb r4, [r0], #1
785
+ bne KeccakP1600times2_OverwriteBytes_LoopBytesLast
786
+ KeccakP1600times2_OverwriteBytes_Done:
787
+ pop { r4- r5 }
788
+ KeccakP1600times2_OverwriteBytes_Exit:
789
+ bx lr
790
+
791
+
792
+ @----------------------------------------------------------------------------
793
+ @
794
+ @ KeccakP1600times2_OverwriteLanesAll( void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset )
795
+ @
796
+ .align 8
797
+ .global KeccakP1600times2_OverwriteLanesAll
798
+ .type KeccakP1600times2_OverwriteLanesAll, %function;
799
+ KeccakP1600times2_OverwriteLanesAll:
800
+ cmp r2, #0
801
+ beq KeccakP1600times2_OverwriteLanesAll_Exit
802
+ lsls r12, r1, #32-3
803
+ bne KeccakP1600times2_OverwriteLanesAll_Unaligned
804
+ add r3, r1, r3, LSL #3 @ r3(pointer instance 1): data + 8 * laneOffset
805
+ lsrs r2, r2, #1
806
+ bcc KeccakP1600times2_OverwriteLanesAll_LoopAligned
807
+ vldm r1!, { d0 }
808
+ vldm r3!, { d1 }
809
+ vstm r0!, { d0 - d1 }
810
+ beq KeccakP1600times2_OverwriteLanesAll_Exit
811
+ KeccakP1600times2_OverwriteLanesAll_LoopAligned:
812
+ vldm r1!, { d0 }
813
+ vldm r1!, { d2 }
814
+ vldm r3!, { d1 }
815
+ vldm r3!, { d3 }
816
+ subs r2, r2, #1
817
+ vstm r0!, { d0 - d3 }
818
+ bne KeccakP1600times2_OverwriteLanesAll_LoopAligned
819
+ bx lr
820
+ KeccakP1600times2_OverwriteLanesAll_Unaligned:
821
+ add r3, r1, r3, LSL #3 @ r3(pointer instance 1): data + 8 * laneOffset
822
+ push { r4, r5 }
823
+ KeccakP1600times2_OverwriteLanesAll_LoopUnaligned:
824
+ ldr r4, [r1], #4
825
+ ldr r5, [r1], #4
826
+ strd r4, r5, [r0], #8
827
+ ldr r4, [r3], #4
828
+ ldr r5, [r3], #4
829
+ subs r2, r2, #1
830
+ strd r4, r5, [r0], #8
831
+ bne KeccakP1600times2_OverwriteLanesAll_LoopUnaligned
832
+ pop { r4, r5 }
833
+ KeccakP1600times2_OverwriteLanesAll_Exit:
834
+ bx lr
835
+
836
+
837
+ @----------------------------------------------------------------------------
838
+ @
839
+ @ void KeccakP1600times2_OverwriteWithZeroes( void *states, unsigned int instanceIndex, unsigned int byteCount )
840
+ @
841
+ .align 8
842
+ .global KeccakP1600times2_OverwriteWithZeroes
843
+ .type KeccakP1600times2_OverwriteWithZeroes, %function;
844
+ KeccakP1600times2_OverwriteWithZeroes:
845
+ add r0, r0, r1, LSL #3 @ states += 8 * instanceIndex
846
+ lsrs r1, r2, #3 @ r1: laneCount
847
+ beq KeccakP1600times2_OverwriteWithZeroes_Bytes
848
+ vmov.i64 d0, #0
849
+ KeccakP1600times2_OverwriteWithZeroes_LoopLanes:
850
+ subs r1, r1, #1
851
+ vstm r0!, { d0 }
852
+ add r0, r0, #8
853
+ bne KeccakP1600times2_OverwriteWithZeroes_LoopLanes
854
+ KeccakP1600times2_OverwriteWithZeroes_Bytes:
855
+ ands r2, r2, #7 @ r2: byteCount remaining
856
+ beq KeccakP1600times2_OverwriteWithZeroes_Exit
857
+ movs r3, #0
858
+ KeccakP1600times2_OverwriteWithZeroes_LoopBytes:
859
+ subs r2, r2, #1
860
+ strb r3, [r0], #1
861
+ bne KeccakP1600times2_OverwriteWithZeroes_LoopBytes
862
+ KeccakP1600times2_OverwriteWithZeroes_Exit:
863
+ bx lr
864
+
865
+
866
+ @----------------------------------------------------------------------------
867
+ @
868
+ @ void KeccakP1600times2_ExtractBytes( void *states, unsigned int instanceIndex, const unsigned char *data,
869
+ @ unsigned int offset, unsigned int length )
870
+ @
871
+ .align 8
872
+ .global KeccakP1600times2_ExtractBytes
873
+ .type KeccakP1600times2_ExtractBytes, %function;
874
+ KeccakP1600times2_ExtractBytes:
875
+ add r0, r0, r1, LSL #3 @ states += 8 * instanceIndex
876
+ ldr r1, [sp, #0*4] @ r1 = length
877
+ cmp r1, #0
878
+ beq KeccakP1600times2_ExtractBytes_Exit
879
+ push { r4-r5 }
880
+ lsr r4, r3, #3 @ states += (offset & ~7) * 2
881
+ add r0, r0, r4, LSL #4
882
+ ands r3, r3, #7 @ .if (offset & 7) != 0
883
+ beq KeccakP1600times2_ExtractBytes_CheckLanes
884
+ add r0, r0, r3 @ states += offset & 7
885
+ rsb r3, r3, #8 @ lenInLane = 8 - (offset & 7)
886
+ KeccakP1600times2_ExtractBytes_LoopBytesFirst:
887
+ ldrb r4, [r0], #1
888
+ strb r4, [r2], #1
889
+ subs r1, r1, #1
890
+ beq KeccakP1600times2_ExtractBytes_Done
891
+ subs r3, r3, #1
892
+ bne KeccakP1600times2_ExtractBytes_LoopBytesFirst
893
+ add r0, r0, #8 @ states += 8 (next lane of current state part)
894
+ KeccakP1600times2_ExtractBytes_CheckLanes:
895
+ lsrs r3, r1, #3
896
+ beq KeccakP1600times2_ExtractBytes_CheckBytesLast
897
+ KeccakP1600times2_ExtractBytes_LoopLanes:
898
+ ldr r4, [r0], #4
899
+ ldr r5, [r0], #12 @ states += 8 (next lane of current state part)
900
+ str r4, [r2], #4
901
+ str r5, [r2], #4
902
+ subs r3, r3, #1
903
+ bne KeccakP1600times2_ExtractBytes_LoopLanes
904
+ KeccakP1600times2_ExtractBytes_CheckBytesLast:
905
+ ands r1, r1, #7
906
+ beq KeccakP1600times2_ExtractBytes_Done
907
+ KeccakP1600times2_ExtractBytes_LoopBytesLast:
908
+ ldrb r4, [r0], #1
909
+ subs r1, r1, #1
910
+ strb r4, [r2], #1
911
+ bne KeccakP1600times2_ExtractBytes_LoopBytesLast
912
+ KeccakP1600times2_ExtractBytes_Done:
913
+ pop { r4-r5 }
914
+ KeccakP1600times2_ExtractBytes_Exit:
915
+ bx lr
916
+
917
+
918
+ @----------------------------------------------------------------------------
919
+ @
920
+ @ void KeccakP1600times2_ExtractLanesAll( const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset )
921
+ @
922
+ .align 8
923
+ .global KeccakP1600times2_ExtractLanesAll
924
+ .type KeccakP1600times2_ExtractLanesAll, %function;
925
+ KeccakP1600times2_ExtractLanesAll:
926
+ cmp r2, #0
927
+ beq KeccakP1600times2_ExtractLanesAll_Exit
928
+ lsls r12, r1, #32-3
929
+ bne KeccakP1600times2_ExtractLanesAll_Unaligned
930
+ add r3, r1, r3, LSL #3 @ r3(pointer instance 1): data + 8 * laneOffset
931
+ lsrs r2, r2, #1
932
+ bcc KeccakP1600times2_ExtractLanesAll_LoopAligned
933
+ vldm r0!, { d0 - d1 }
934
+ vstm r1!, { d0 }
935
+ vstm r3!, { d1 }
936
+ beq KeccakP1600times2_ExtractLanesAll_Exit
937
+ KeccakP1600times2_ExtractLanesAll_LoopAligned:
938
+ vldm r0!, { d0 - d3 }
939
+ subs r2, r2, #1
940
+ vstm r1!, { d0 }
941
+ vstm r1!, { d2 }
942
+ vstm r3!, { d1 }
943
+ vstm r3!, { d3 }
944
+ bne KeccakP1600times2_ExtractLanesAll_LoopAligned
945
+ bx lr
946
+ KeccakP1600times2_ExtractLanesAll_Unaligned:
947
+ add r3, r1, r3, LSL #3 @ r3(pointer instance 1): data + 8 * laneOffset
948
+ push { r4, r5 }
949
+ KeccakP1600times2_ExtractLanesAll_LoopUnaligned:
950
+ ldrd r4, r5, [r0], #8
951
+ str r4, [r1], #4
952
+ str r5, [r1], #4
953
+ ldrd r4, r5, [r0], #8
954
+ subs r2, r2, #1
955
+ str r4, [r3], #4
956
+ str r5, [r3], #4
957
+ bne KeccakP1600times2_ExtractLanesAll_LoopUnaligned
958
+ pop { r4, r5 }
959
+ KeccakP1600times2_ExtractLanesAll_Exit:
960
+ bx lr
961
+
962
+
963
+ @----------------------------------------------------------------------------
964
+ @
965
+ @ void KeccakP1600times2_ExtractAndAddBytes( void *states, unsigned int instanceIndex,
966
+ @ const unsigned char *input, unsigned char *output,
967
+ @ unsigned int offset, unsigned int length )
968
+ @
969
+ .align 8
970
+ .global KeccakP1600times2_ExtractAndAddBytes
971
+ .type KeccakP1600times2_ExtractAndAddBytes, %function;
972
+ KeccakP1600times2_ExtractAndAddBytes:
973
+ add r0, r0, r1, LSL #3 @ states += 8 * instanceIndex
974
+ ldr r1, [sp, #1*4] @ r1 = length
975
+ cmp r1, #0
976
+ beq KeccakP1600times2_ExtractAndAddBytes_Exit
977
+ push { r4 - r9 }
978
+ ldr r8, [sp, #6*4] @ r8 = offset
979
+ lsr r4, r8, #3 @ states += (offset & ~7) * 2
980
+ add r0, r0, r4, LSL #4
981
+ ands r8, r8, #7 @ .if (offset & 7) != 0
982
+ beq KeccakP1600times2_ExtractAndAddBytes_CheckLanes
983
+ add r0, r0, r8 @ states += offset & 7
984
+ rsb r8, r8, #8 @ lenInLane = 8 - (offset & 7)
985
+ KeccakP1600times2_ExtractAndAddBytes_LoopBytesFirst:
986
+ ldrb r4, [r0], #1
987
+ ldrb r5, [r2], #1
988
+ eor r4, r4, r5
989
+ strb r4, [r3], #1
990
+ subs r1, r1, #1
991
+ beq KeccakP1600times2_ExtractAndAddBytes_Done
992
+ subs r8, r8, #1
993
+ bne KeccakP1600times2_ExtractAndAddBytes_LoopBytesFirst
994
+ add r0, r0, #8 @ states += 8 (next lane of current state part)
995
+ KeccakP1600times2_ExtractAndAddBytes_CheckLanes:
996
+ lsrs r8, r1, #3
997
+ beq KeccakP1600times2_ExtractAndAddBytes_CheckBytesLast
998
+ KeccakP1600times2_ExtractAndAddBytes_LoopLanes:
999
+ ldr r4, [r0], #4
1000
+ ldr r5, [r0], #12
1001
+ ldr r6, [r2], #4
1002
+ ldr r7, [r2], #4
1003
+ eor r4, r4, r6
1004
+ eor r5, r5, r7
1005
+ str r4, [r3], #4
1006
+ str r5, [r3], #4 @ states += 8 (next lane of current state part)
1007
+ subs r8, r8, #1
1008
+ bne KeccakP1600times2_ExtractAndAddBytes_LoopLanes
1009
+ KeccakP1600times2_ExtractAndAddBytes_CheckBytesLast:
1010
+ ands r1, r1, #7
1011
+ beq KeccakP1600times2_ExtractAndAddBytes_Done
1012
+ KeccakP1600times2_ExtractAndAddBytes_LoopBytesLast:
1013
+ ldrb r4, [r0], #1
1014
+ ldrb r5, [r2], #1
1015
+ eor r4, r4, r5
1016
+ strb r4, [r3], #1
1017
+ subs r1, r1, #1
1018
+ bne KeccakP1600times2_ExtractAndAddBytes_LoopBytesLast
1019
+ KeccakP1600times2_ExtractAndAddBytes_Done:
1020
+ pop { r4 - r9 }
1021
+ KeccakP1600times2_ExtractAndAddBytes_Exit:
1022
+ bx lr
1023
+
1024
+
1025
+ @----------------------------------------------------------------------------
1026
+ @
1027
+ @ void KeccakP1600times2_ExtractAndAddLanesAll( const void *states,
1028
+ @ const unsigned char *input, unsigned char *output,
1029
+ @ unsigned int laneCount, unsigned int laneOffset )
1030
+ @
1031
+ .align 8
1032
+ .global KeccakP1600times2_ExtractAndAddLanesAll
1033
+ .type KeccakP1600times2_ExtractAndAddLanesAll, %function;
1034
+ KeccakP1600times2_ExtractAndAddLanesAll:
1035
+ cmp r3, #0
1036
+ beq KeccakP1600times2_ExtractAndAddLanesAll_Exit
1037
+ orr r12, r1, r2
1038
+ lsls r12, r12, #32-3 @ unaligned access .if input or output unaligned
1039
+ bne KeccakP1600times2_ExtractAndAddLanesAll_Unaligned
1040
+ push {r4,r5}
1041
+ ldr r12, [sp, #2*4] @ r12 = laneOffset
1042
+ lsrs r3, r3, #1
1043
+ add r4, r1, r12, LSL #3 @ r4(input instance 1): input + 8 * laneOffset
1044
+ add r5, r2, r12, LSL #3 @ r5(output instance 1): output + 8 * laneOffset
1045
+ bcc KeccakP1600times2_ExtractAndAddLanesAll_LoopAligned
1046
+ vldm r0!, { d0 - d1 }
1047
+ vldm r1!, { d2 }
1048
+ vldm r4!, { d3 }
1049
+ veor q0, q0, q1
1050
+ vstm r2!, { d0 }
1051
+ vstm r5!, { d1 }
1052
+ beq KeccakP1600times2_ExtractAndAddLanesAll_AlignedDone
1053
+ KeccakP1600times2_ExtractAndAddLanesAll_LoopAligned:
1054
+ vldm r0!, { d0 - d3 }
1055
+ vldm r1!, { d4 }
1056
+ vldm r1!, { d6 }
1057
+ vldm r4!, { d5 }
1058
+ vldm r4!, { d7 }
1059
+ subs r3, r3, #1
1060
+ veor q0, q0, q2
1061
+ veor q1, q1, q3
1062
+ vstm r2!, { d0 }
1063
+ vstm r2!, { d2 }
1064
+ vstm r5!, { d1 }
1065
+ vstm r5!, { d3 }
1066
+ bne KeccakP1600times2_ExtractAndAddLanesAll_LoopAligned
1067
+ KeccakP1600times2_ExtractAndAddLanesAll_AlignedDone:
1068
+ pop {r4,r5}
1069
+ bx lr
1070
+ KeccakP1600times2_ExtractAndAddLanesAll_Unaligned:
1071
+ push {r4-r9}
1072
+ ldr r12, [sp, #6*4] @ r12 = laneOffset
1073
+ add r4, r1, r12, LSL #3 @ r4(input instance 1): input + 8 * laneOffset
1074
+ add r5, r2, r12, LSL #3 @ r5(output instance 1): output + 8 * laneOffset
1075
+ KeccakP1600times2_ExtractAndAddLanesAll_LoopUnaligned:
1076
+ ldrd r8, r9, [r0], #8
1077
+ ldr r6, [r1], #4
1078
+ ldr r7, [r1], #4
1079
+ eor r8, r8, r6
1080
+ eor r9, r9, r7
1081
+ str r8, [r2], #4
1082
+ str r9, [r2], #4
1083
+ ldrd r8, r9, [r0], #8
1084
+ ldr r6, [r4], #4
1085
+ ldr r7, [r4], #4
1086
+ eor r8, r8, r6
1087
+ eor r9, r9, r7
1088
+ str r8, [r5], #4
1089
+ subs r3, r3, #1
1090
+ str r9, [r5], #4
1091
+ bne KeccakP1600times2_ExtractAndAddLanesAll_LoopUnaligned
1092
+ pop { r4 - r9 }
1093
+ KeccakP1600times2_ExtractAndAddLanesAll_Exit:
1094
+ bx lr
1095
+
1096
+
1097
+ @----------------------------------------------------------------------------
1098
+ @
1099
+ @ void KeccakP1600times2_PermuteAll_24rounds( void *states )
1100
+ @
1101
+ .align 8
1102
+ .global KeccakP1600times2_PermuteAll_24rounds
1103
+ .type KeccakP1600times2_PermuteAll_24rounds, %function;
1104
+ KeccakP1600times2_PermuteAll_24rounds:
1105
+ adr r1, KeccakP1600times2_Permute_RoundConstants24
1106
+ movs r2, #24
1107
+ b KeccakP1600times2_PermuteAll
1108
+
1109
+
1110
+ @----------------------------------------------------------------------------
1111
+ @
1112
+ @ void KeccakP1600times2_PermuteAll_12rounds( void *states )
1113
+ @
1114
+ .align 8
1115
+ .global KeccakP1600times2_PermuteAll_12rounds
1116
+ .type KeccakP1600times2_PermuteAll_12rounds, %function;
1117
+ KeccakP1600times2_PermuteAll_12rounds:
1118
+ adr r1, KeccakP1600times2_Permute_RoundConstants12
1119
+ movs r2, #12
1120
+ b KeccakP1600times2_PermuteAll
1121
+
1122
+
1123
+ .align 8
1124
+ KeccakP1600times2_Permute_RoundConstants24:
1125
+ .quad 0x0000000000000001
1126
+ .quad 0x0000000000008082
1127
+ .quad 0x800000000000808a
1128
+ .quad 0x8000000080008000
1129
+ .quad 0x000000000000808b
1130
+ .quad 0x0000000080000001
1131
+ .quad 0x8000000080008081
1132
+ .quad 0x8000000000008009
1133
+ .quad 0x000000000000008a
1134
+ .quad 0x0000000000000088
1135
+ .quad 0x0000000080008009
1136
+ .quad 0x000000008000000a
1137
+ KeccakP1600times2_Permute_RoundConstants12:
1138
+ .quad 0x000000008000808b
1139
+ .quad 0x800000000000008b
1140
+ .quad 0x8000000000008089
1141
+ .quad 0x8000000000008003
1142
+ .quad 0x8000000000008002
1143
+ .quad 0x8000000000000080
1144
+ .quad 0x000000000000800a
1145
+ .quad 0x800000008000000a
1146
+ .quad 0x8000000080008081
1147
+ .quad 0x8000000000008080
1148
+ .quad 0x0000000080000001
1149
+ .quad 0x8000000080008008
1150
+
1151
+ @----------------------------------------------------------------------------
1152
+ @
1153
+ @ void KeccakP1600times2_PermuteAll( void *states, void *rc, unsigned int nr )
1154
+ @
1155
+ .align 8
1156
+ KeccakP1600times2_PermuteAll:
1157
+ vpush {q4-q7}
1158
+ push {r4-r7}
1159
+ sub sp, #4*2*8+8 @allocate 4 D double lanes (plus 8bytes to allow alignment on 16 bytes)
1160
+ mov r3, r0
1161
+ add r5, sp, #8
1162
+
1163
+ @PrepareTheta
1164
+ @ Ca = ba ^ ga ^ ka ^ ma ^ sa
1165
+ @ Ce = be ^ ge ^ ke ^ me ^ se
1166
+ @ Ci = bi ^ gi ^ ki ^ mi ^ si
1167
+ @ Co = bo ^ go ^ ko ^ mo ^ so
1168
+ @ Cu = bu ^ gu ^ ku ^ mu ^ su
1169
+ vld1.64 { d0, d1, d2, d3 }, [r3:256]! @ _ba _be
1170
+ bic r5, #15
1171
+ vld1.64 { d4, d5, d6, d7 }, [r3:256]! @ _bi _bo
1172
+ vld1.64 { d8, d9, d10, d11 }, [r3:256]! @ _bu _ga
1173
+ vld1.64 { d12, d13 }, [r3:128]! @ _ge
1174
+ veor.64 q0, q0, q5
1175
+ vld1.64 { d14, d15 }, [r3:128]! @ _gi
1176
+ veor.64 q1, q1, q6
1177
+ vld1.64 { d16, d17 }, [r3:128]! @ _go
1178
+ veor.64 q2, q2, q7
1179
+ vld1.64 { d18, d19 }, [r3:128]! @ _gu
1180
+ veor.64 q3, q3, q8
1181
+ vld1.64 { d10, d11 }, [r3:128]! @ _ka
1182
+ veor.64 q4, q4, q9
1183
+ vld1.64 { d12, d13 }, [r3:128]! @ _ke
1184
+ veor.64 q0, q0, q5
1185
+ vld1.64 { d14, d15 }, [r3:128]! @ _ki
1186
+ veor.64 q1, q1, q6
1187
+ vld1.64 { d16, d17 }, [r3:128]! @ _ko
1188
+ veor.64 q2, q2, q7
1189
+ vld1.64 { d18, d19 }, [r3:128]! @ _ku
1190
+ veor.64 q3, q3, q8
1191
+ vld1.64 { d10, d11 }, [r3:128]! @ _ma
1192
+ veor.64 q4, q4, q9
1193
+ vld1.64 { d12, d13 }, [r3:128]! @ _me
1194
+ veor.64 q0, q0, q5
1195
+ vld1.64 { d14, d15 }, [r3:128]! @ _mi
1196
+ veor.64 q1, q1, q6
1197
+ vld1.64 { d16, d17 }, [r3:128]! @ _mo
1198
+ veor.64 q2, q2, q7
1199
+ vld1.64 { d18, d19 }, [r3:128]! @ _mu
1200
+ veor.64 q3, q3, q8
1201
+ vld1.64 { d10, d11 }, [r3:128]! @ _sa
1202
+ veor.64 q4, q4, q9
1203
+ vld1.64 { d12, d13 }, [r3:128]! @ _se
1204
+ veor.64 q0, q0, q5
1205
+ vld1.64 { d14, d15 }, [r3:128]! @ _si
1206
+ veor.64 q1, q1, q6
1207
+ vld1.64 { d16, d17 }, [r3:128]! @ _so
1208
+ veor.64 q2, q2, q7
1209
+ vld1.64 { d18, d19 }, [r3:128]! @ _su
1210
+ mov r3, r0
1211
+ veor.64 q3, q3, q8
1212
+ veor.64 q4, q4, q9
1213
+
1214
+ KeccakP1600times2_PermuteAll_RoundLoop:
1215
+ KeccakP_ThetaRhoPiChiIota _ba, -1, -1, -1, -1, _ge-_ba, _ka @ _ba, _ge, _ki, _mo, _su
1216
+ KeccakP_ThetaRhoPiChi1 _ka, -1, -1, _bo, -1, _me-_ka, _sa @ _ka, _me, _si, _bo, _gu
1217
+ KeccakP_ThetaRhoPiChi2 _sa, _be, -1, -1, -1, _gi-_be, _ga @ _sa, _be, _gi, _ko, _mu
1218
+ KeccakP_ThetaRhoPiChi3 _ga, -1, -1, -1, _bu, _ke-_ga, _ma @ _ga, _ke, _mi, _so, _bu
1219
+ KeccakP_ThetaRhoPiChi4 _ma, -1, _bi, -1, -1, _se-_ma, _ba @ _ma, _se, _bi, _go, _ku
1220
+
1221
+ KeccakP_ThetaRhoPiChiIota _ba, -1, _gi, -1, _ku, _me-_ba, _sa @ _ba, _me, _gi, _so, _ku
1222
+ KeccakP_ThetaRhoPiChi1 _sa, _ke, _bi, -1, _gu, _mo-_bi, _ma @ _sa, _ke, _bi, _mo, _gu
1223
+ KeccakP_ThetaRhoPiChi2 _ma, _ge, -1, _ko, _bu, _si-_ge, _ka @ _ma, _ge, _si, _ko, _bu
1224
+ KeccakP_ThetaRhoPiChi3 _ka, _be, -1, _go, -1, _mi-_be, _ga @ _ka, _be, _mi, _go, _su
1225
+ KeccakP_ThetaRhoPiChi4 _ga, -1, _ki, _bo, -1, _se-_ga, _ba @ _ga, _se, _ki, _bo, _mu
1226
+
1227
+ KeccakP_ThetaRhoPiChiIota _ba, -1, -1, _go, -1, _ke-_ba, _ma @ _ba, _ke, _si, _go, _mu
1228
+ KeccakP_ThetaRhoPiChi1 _ma, _be, -1, -1, _gu, _ki-_be, _ga @ _ma, _be, _ki, _so, _gu
1229
+ KeccakP_ThetaRhoPiChi2 _ga, -1, _bi, -1, -1, _me-_ga, _sa @ _ga, _me, _bi, _ko, _su
1230
+ KeccakP_ThetaRhoPiChi3 _sa, _ge, -1, _bo, -1, _mi-_ge, _ka @ _sa, _ge, _mi, _bo, _ku
1231
+ KeccakP_ThetaRhoPiChi4 _ka, -1, _gi, -1, _bu, _se-_ka, _ba @ _ka, _se, _gi, _mo, _bu
1232
+
1233
+ KeccakP_ThetaRhoPiChiIota _ba, -1, -1, -1, -1, _be-_ba, _ga @ _ba, _be, _bi, _bo, _bu
1234
+ KeccakP_ThetaRhoPiChi1 _ga, -1, -1, -1, -1, _ge-_ga, _ka @ _ga, _ge, _gi, _go, _gu
1235
+ KeccakP_ThetaRhoPiChi2 _ka, -1, -1, -1, -1, _ke-_ka, _ma @ _ka, _ke, _ki, _ko, _ku
1236
+ KeccakP_ThetaRhoPiChi3 _ma, -1, -1, -1, -1, _me-_ma, _sa @ _ma, _me, _mi, _mo, _mu
1237
+ subs r2, #4
1238
+ KeccakP_ThetaRhoPiChi4 _sa, -1, -1, -1, -1, _se-_sa, _ba @ _sa, _se, _si, _so, _su
1239
+ bne KeccakP1600times2_PermuteAll_RoundLoop
1240
+ add sp, #4*2*8+8 @ free 4.5 D lanes
1241
+ pop {r4-r7}
1242
+ vpop {q4-q7}
1243
+ bx lr
1244
+
1245
+