sleeping_kangaroo12 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (284) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +127 -0
  3. data/ext/Rakefile +73 -0
  4. data/ext/binding/sleeping_kangaroo12.c +39 -0
  5. data/ext/config/xkcp.build +17 -0
  6. data/ext/xkcp/LICENSE +1 -0
  7. data/ext/xkcp/Makefile +15 -0
  8. data/ext/xkcp/Makefile.build +200 -0
  9. data/ext/xkcp/README.markdown +296 -0
  10. data/ext/xkcp/lib/HighLevel.build +143 -0
  11. data/ext/xkcp/lib/LowLevel.build +757 -0
  12. data/ext/xkcp/lib/common/align.h +33 -0
  13. data/ext/xkcp/lib/common/brg_endian.h +143 -0
  14. data/ext/xkcp/lib/high/KangarooTwelve/KangarooTwelve.c +301 -0
  15. data/ext/xkcp/lib/high/KangarooTwelve/KangarooTwelve.h +97 -0
  16. data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.c +81 -0
  17. data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.h +125 -0
  18. data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.c +48 -0
  19. data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.h +79 -0
  20. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.c +81 -0
  21. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.h +73 -0
  22. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.inc +195 -0
  23. data/ext/xkcp/lib/high/Keccak/KeccakSponge.c +111 -0
  24. data/ext/xkcp/lib/high/Keccak/KeccakSponge.h +76 -0
  25. data/ext/xkcp/lib/high/Keccak/KeccakSponge.inc +314 -0
  26. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.c +61 -0
  27. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.h +67 -0
  28. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.inc +128 -0
  29. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.c +93 -0
  30. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.h +599 -0
  31. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.inc +573 -0
  32. data/ext/xkcp/lib/high/Ketje/Ketjev2.c +87 -0
  33. data/ext/xkcp/lib/high/Ketje/Ketjev2.h +88 -0
  34. data/ext/xkcp/lib/high/Ketje/Ketjev2.inc +274 -0
  35. data/ext/xkcp/lib/high/Keyak/Keyakv2.c +132 -0
  36. data/ext/xkcp/lib/high/Keyak/Keyakv2.h +217 -0
  37. data/ext/xkcp/lib/high/Keyak/Keyakv2.inc +81 -0
  38. data/ext/xkcp/lib/high/Keyak/Motorist.inc +953 -0
  39. data/ext/xkcp/lib/high/Kravatte/Kravatte.c +533 -0
  40. data/ext/xkcp/lib/high/Kravatte/Kravatte.h +115 -0
  41. data/ext/xkcp/lib/high/Kravatte/KravatteModes.c +557 -0
  42. data/ext/xkcp/lib/high/Kravatte/KravatteModes.h +247 -0
  43. data/ext/xkcp/lib/high/Xoodyak/Cyclist.h +66 -0
  44. data/ext/xkcp/lib/high/Xoodyak/Cyclist.inc +336 -0
  45. data/ext/xkcp/lib/high/Xoodyak/Xoodyak-parameters.h +26 -0
  46. data/ext/xkcp/lib/high/Xoodyak/Xoodyak.c +55 -0
  47. data/ext/xkcp/lib/high/Xoodyak/Xoodyak.h +35 -0
  48. data/ext/xkcp/lib/high/Xoofff/Xoofff.c +634 -0
  49. data/ext/xkcp/lib/high/Xoofff/Xoofff.h +147 -0
  50. data/ext/xkcp/lib/high/Xoofff/XoofffModes.c +483 -0
  51. data/ext/xkcp/lib/high/Xoofff/XoofffModes.h +241 -0
  52. data/ext/xkcp/lib/high/common/Phases.h +25 -0
  53. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-SnP.h +41 -0
  54. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-armcc.s +1666 -0
  55. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-gcc.s +1655 -0
  56. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-armcc.s +1268 -0
  57. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-gcc.s +1264 -0
  58. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-armcc.s +1178 -0
  59. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-gcc.s +1175 -0
  60. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-armcc.s +1338 -0
  61. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-gcc.s +1336 -0
  62. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-armcc.s +1343 -0
  63. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-gcc.s +1339 -0
  64. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-SnP.h +42 -0
  65. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-armcc.s +823 -0
  66. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-gcc.s +831 -0
  67. data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-SnP.h +31 -0
  68. data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-armv8a-neon.s +540 -0
  69. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-SnP.h +42 -0
  70. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-compact.s +733 -0
  71. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-fast.s +1121 -0
  72. data/ext/xkcp/lib/low/KeccakP-1600/AVX2/KeccakP-1600-AVX2.s +1100 -0
  73. data/ext/xkcp/lib/low/KeccakP-1600/AVX2/KeccakP-1600-SnP.h +52 -0
  74. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-AVX512.c +623 -0
  75. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-SnP.h +47 -0
  76. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u12/KeccakP-1600-AVX512-config.h +6 -0
  77. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u6/KeccakP-1600-AVX512-config.h +6 -0
  78. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/ua/KeccakP-1600-AVX512-config.h +6 -0
  79. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-AVX512.s +1031 -0
  80. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-SnP.h +53 -0
  81. data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-SnP.h +44 -0
  82. data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-XOP.c +476 -0
  83. data/ext/xkcp/lib/low/KeccakP-1600/XOP/u6/KeccakP-1600-XOP-config.h +6 -0
  84. data/ext/xkcp/lib/low/KeccakP-1600/XOP/ua/KeccakP-1600-XOP-config.h +6 -0
  85. data/ext/xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-64.macros +748 -0
  86. data/ext/xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-unrolling.macros +305 -0
  87. data/ext/xkcp/lib/low/KeccakP-1600/compact/KeccakP-1600-SnP.h +40 -0
  88. data/ext/xkcp/lib/low/KeccakP-1600/compact/KeccakP-1600-compact64.c +420 -0
  89. data/ext/xkcp/lib/low/KeccakP-1600/plain-32bits-inplace/KeccakP-1600-SnP.h +43 -0
  90. data/ext/xkcp/lib/low/KeccakP-1600/plain-32bits-inplace/KeccakP-1600-inplace32BI.c +1163 -0
  91. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/KeccakP-1600-SnP.h +54 -0
  92. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/KeccakP-1600-opt64.c +565 -0
  93. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcu6/KeccakP-1600-opt64-config.h +7 -0
  94. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua/KeccakP-1600-opt64-config.h +7 -0
  95. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua-shld/KeccakP-1600-opt64-config.h +8 -0
  96. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/u6/KeccakP-1600-opt64-config.h +6 -0
  97. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/ua/KeccakP-1600-opt64-config.h +6 -0
  98. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-SnP.h +44 -0
  99. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference.h +23 -0
  100. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference32BI.c +625 -0
  101. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-SnP.h +44 -0
  102. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.c +440 -0
  103. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.h +23 -0
  104. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-SnP.h +42 -0
  105. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas.s +1196 -0
  106. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas_Apple.s +1124 -0
  107. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-shld-gas.s +1196 -0
  108. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-armcc.s +1392 -0
  109. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-gcc.s +1394 -0
  110. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-times2-SnP.h +42 -0
  111. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u12/SIMD512-2-config.h +7 -0
  112. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u4/SIMD512-2-config.h +7 -0
  113. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512ufull/SIMD512-2-config.h +7 -0
  114. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SIMD512.c +850 -0
  115. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SnP.h +51 -0
  116. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SIMD128.c +957 -0
  117. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SnP.h +49 -0
  118. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-u2/SIMD128-config.h +8 -0
  119. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-ua/SIMD128-config.h +8 -0
  120. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-u2/SIMD128-config.h +9 -0
  121. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-ua/SIMD128-config.h +9 -0
  122. data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-SnP.h +45 -0
  123. data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-on1.c +37 -0
  124. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SIMD256.c +1321 -0
  125. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SnP.h +55 -0
  126. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u12/SIMD256-config.h +7 -0
  127. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u6/SIMD256-config.h +7 -0
  128. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/ua/SIMD256-config.h +7 -0
  129. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u12/SIMD512-4-config.h +7 -0
  130. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u4/SIMD512-4-config.h +7 -0
  131. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512ufull/SIMD512-4-config.h +7 -0
  132. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SIMD512.c +881 -0
  133. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SnP.h +51 -0
  134. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-SnP.h +45 -0
  135. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-on1.c +37 -0
  136. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-SnP.h +45 -0
  137. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-on2.c +38 -0
  138. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SIMD512.c +1615 -0
  139. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SnP.h +57 -0
  140. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u12/SIMD512-config.h +7 -0
  141. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u4/SIMD512-config.h +7 -0
  142. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/ua/SIMD512-config.h +7 -0
  143. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-SnP.h +45 -0
  144. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-on1.c +37 -0
  145. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-SnP.h +45 -0
  146. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-on2.c +38 -0
  147. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-SnP.h +45 -0
  148. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-on4.c +38 -0
  149. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-SnP.h +41 -0
  150. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-armcc.s +442 -0
  151. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-gcc.s +446 -0
  152. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-armcc.s +419 -0
  153. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-gcc.s +427 -0
  154. data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-SnP.h +41 -0
  155. data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-avr8-fast.s +647 -0
  156. data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-SnP.h +39 -0
  157. data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-compact.c +190 -0
  158. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-SnP.h +43 -0
  159. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.c +412 -0
  160. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.h +23 -0
  161. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-SnP.h +41 -0
  162. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-armcc.s +454 -0
  163. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-gcc.s +458 -0
  164. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-armcc.s +455 -0
  165. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-gcc.s +458 -0
  166. data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-SnP.h +41 -0
  167. data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-avr8-fast.s +728 -0
  168. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-SnP.h +43 -0
  169. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.c +414 -0
  170. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.h +23 -0
  171. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-SnP.h +42 -0
  172. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-armcc.s +527 -0
  173. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-gcc.s +533 -0
  174. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-armcc.s +528 -0
  175. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-gcc.s +534 -0
  176. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-armcc.s +521 -0
  177. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-gcc.s +527 -0
  178. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-armcc.s +517 -0
  179. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-gcc.s +523 -0
  180. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-armcc.s +550 -0
  181. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-gcc.s +556 -0
  182. data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-SnP.h +32 -0
  183. data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-armv8a-neon.s +432 -0
  184. data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-SnP.h +42 -0
  185. data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-avr8-fast.s +929 -0
  186. data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-SnP.h +40 -0
  187. data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-compact.c +244 -0
  188. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-SnP.h +46 -0
  189. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32-bis.macros +184 -0
  190. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.c +454 -0
  191. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.macros +459 -0
  192. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling-bis.macros +83 -0
  193. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling.macros +88 -0
  194. data/ext/xkcp/lib/low/KeccakP-800/plain/lcu2/KeccakP-800-opt32-config.h +7 -0
  195. data/ext/xkcp/lib/low/KeccakP-800/plain/lcua/KeccakP-800-opt32-config.h +7 -0
  196. data/ext/xkcp/lib/low/KeccakP-800/plain/u2/KeccakP-800-opt32-config.h +7 -0
  197. data/ext/xkcp/lib/low/KeccakP-800/plain/ua/KeccakP-800-opt32-config.h +7 -0
  198. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-SnP.h +44 -0
  199. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.c +437 -0
  200. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.h +23 -0
  201. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/Ket.h +57 -0
  202. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-armcc.s +475 -0
  203. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-gcc.s +480 -0
  204. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-armcc.s +590 -0
  205. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-gcc.s +590 -0
  206. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.c +126 -0
  207. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.h +68 -0
  208. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.inc +174 -0
  209. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.c +80 -0
  210. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.h +68 -0
  211. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.inc +142 -0
  212. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-SnP.h +55 -0
  213. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-armcc.s +1086 -0
  214. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-gcc.s +1092 -0
  215. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-armcc.s +721 -0
  216. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-gcc.s +726 -0
  217. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-armcc.s +723 -0
  218. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-gcc.s +729 -0
  219. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-armcc.s +1164 -0
  220. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-gcc.s +1165 -0
  221. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-armcc.s +562 -0
  222. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-gcc.s +563 -0
  223. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-armcc.s +563 -0
  224. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-gcc.s +565 -0
  225. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-SnP.h +55 -0
  226. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-armcc.s +476 -0
  227. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-gcc.s +485 -0
  228. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-armcc.s +362 -0
  229. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-gcc.s +367 -0
  230. data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-SnP.h +43 -0
  231. data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-avr8-u1.s +1341 -0
  232. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SIMD512.c +581 -0
  233. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SnP.h +58 -0
  234. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodyak-full-block-SIMD512.c +332 -0
  235. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SIMD128.c +329 -0
  236. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SnP.h +53 -0
  237. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodyak-full-block-SIMD128.c +355 -0
  238. data/ext/xkcp/lib/low/Xoodoo/Xoodoo.h +79 -0
  239. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-SnP.h +56 -0
  240. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-optimized.c +399 -0
  241. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodyak-full-blocks.c +127 -0
  242. data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-SnP.h +43 -0
  243. data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-reference.c +253 -0
  244. data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SIMD512.c +1044 -0
  245. data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SnP.h +49 -0
  246. data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-SnP.h +45 -0
  247. data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-on1.c +37 -0
  248. data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-ARMv7A.s +1587 -0
  249. data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-SnP.h +48 -0
  250. data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SIMD512.c +1202 -0
  251. data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SnP.h +48 -0
  252. data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SIMD128.c +484 -0
  253. data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SnP.h +44 -0
  254. data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-SnP.h +45 -0
  255. data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-on1.c +37 -0
  256. data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SIMD256.c +939 -0
  257. data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SnP.h +49 -0
  258. data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SIMD512.c +1216 -0
  259. data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SnP.h +48 -0
  260. data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-SnP.h +45 -0
  261. data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-on1.c +37 -0
  262. data/ext/xkcp/lib/low/common/PlSnP-Fallback.inc +290 -0
  263. data/ext/xkcp/lib/low/common/SnP-Relaned.h +141 -0
  264. data/ext/xkcp/support/Build/ExpandProducts.xsl +79 -0
  265. data/ext/xkcp/support/Build/ToGlobalMakefile.xsl +206 -0
  266. data/ext/xkcp/support/Build/ToOneTarget.xsl +89 -0
  267. data/ext/xkcp/support/Build/ToTargetConfigFile.xsl +37 -0
  268. data/ext/xkcp/support/Build/ToTargetMakefile.xsl +298 -0
  269. data/ext/xkcp/support/Build/ToVCXProj.xsl +198 -0
  270. data/ext/xkcp/support/Kernel-PMU/Kernel-pmu.md +133 -0
  271. data/ext/xkcp/support/Kernel-PMU/Makefile +8 -0
  272. data/ext/xkcp/support/Kernel-PMU/enable_arm_pmu.c +129 -0
  273. data/ext/xkcp/support/Kernel-PMU/load-module +1 -0
  274. data/ext/xkcp/util/KeccakSum/KeccakSum.c +394 -0
  275. data/ext/xkcp/util/KeccakSum/base64.c +86 -0
  276. data/ext/xkcp/util/KeccakSum/base64.h +12 -0
  277. data/lib/sleeping_kangaroo12/binding.rb +15 -0
  278. data/lib/sleeping_kangaroo12/build/loader.rb +40 -0
  279. data/lib/sleeping_kangaroo12/build/platform.rb +37 -0
  280. data/lib/sleeping_kangaroo12/build.rb +4 -0
  281. data/lib/sleeping_kangaroo12/digest.rb +103 -0
  282. data/lib/sleeping_kangaroo12/version.rb +5 -0
  283. data/lib/sleeping_kangaroo12.rb +7 -0
  284. metadata +372 -0
@@ -0,0 +1,1394 @@
1
+ @
2
+ @ The Keccak-p permutations, designed by Guido Bertoni, Joan Daemen, Micha�l Peeters and Gilles Van Assche.
3
+ @
4
+ @ Implementation by Ronny Van Keer, hereby denoted as "the implementer".
5
+ @
6
+ @ For more information, feedback or questions, please refer to the Keccak Team website:
7
+ @ https://keccak.team/
8
+ @
9
+ @ To the extent possible under law, the implementer has waived all copyright
10
+ @ and related or neighboring rights to the source code in this file.
11
+ @ http://creativecommons.org/publicdomain/zero/1.0/
12
+ @
13
+ @ ---
14
+ @
15
+ @ This file implements Keccak-p[1600]�2 in a PlSnP-compatible way.
16
+ @ Please refer to PlSnP-documentation.h for more details.
17
+ @
18
+ @ This implementation comes with KeccakP-1600-times2-SnP.h in the same folder.
19
+ @ Please refer to LowLevel.build for the exact list of other files it must be combined with.
20
+ @
21
+
22
+ @ WARNING: These functions work only on little endian CPU with@ ARMv7A + NEON architecture
23
+ @ WARNING: State must be 256 bit (32 bytes) aligned, best is 64-byte (cache alignment).
24
+
25
+ @ INFO: Tested on Cortex-A8 (BeagleBone Black), using gcc.
26
+ @ INFO: Parallel execution of Keccak-P permutation on 2 lane interleaved states.
27
+
28
+ @ INFO: KeccakP1600times2_PermuteAll_12rounds() execution time is 7690 cycles on a Cortex-A8 (BeagleBone Black)
29
+
30
+
31
+
32
+ .text
33
+
34
+ @----------------------------------------------------------------------------
35
+
36
+ @ --- offsets in state
37
+ .equ _ba , 0*16
38
+ .equ _be , 1*16
39
+ .equ _bi , 2*16
40
+ .equ _bo , 3*16
41
+ .equ _bu , 4*16
42
+ .equ _ga , 5*16
43
+ .equ _ge , 6*16
44
+ .equ _gi , 7*16
45
+ .equ _go , 8*16
46
+ .equ _gu , 9*16
47
+ .equ _ka , 10*16
48
+ .equ _ke , 11*16
49
+ .equ _ki , 12*16
50
+ .equ _ko , 13*16
51
+ .equ _ku , 14*16
52
+ .equ _ma , 15*16
53
+ .equ _me , 16*16
54
+ .equ _mi , 17*16
55
+ .equ _mo , 18*16
56
+ .equ _mu , 19*16
57
+ .equ _sa , 20*16
58
+ .equ _se , 21*16
59
+ .equ _si , 22*16
60
+ .equ _so , 23*16
61
+ .equ _su , 24*16
62
+
63
+ @ --- macros for Single permutation
64
+
65
+ .macro KeccakS_ThetaRhoPiChiIota argA1, argA2, argA3, argA4, argA5
66
+
67
+ @Prepare Theta
68
+ @ Ca = Aba^Aga^Aka^Ama^Asa
69
+ @ Ce = Abe^Age^Ake^Ame^Ase
70
+ @ Ci = Abi^Agi^Aki^Ami^Asi
71
+ @ Co = Abo^Ago^Ako^Amo^Aso
72
+ @ Cu = Abu^Agu^Aku^Amu^Asu
73
+ @ De = Ca^ROL64(Ci, 1)
74
+ @ Di = Ce^ROL64(Co, 1)
75
+ @ Do = Ci^ROL64(Cu, 1)
76
+ @ Du = Co^ROL64(Ca, 1)
77
+ @ Da = Cu^ROL64(Ce, 1)
78
+ veor.64 q4, q6, q7
79
+ veor.64 q5, q9, q10
80
+ veor.64 d8, d8, d9
81
+ veor.64 d10, d10, d11
82
+ veor.64 d1, d8, d16
83
+ veor.64 d2, d10, d17
84
+
85
+ veor.64 q4, q11, q12
86
+ veor.64 q5, q14, q15
87
+ veor.64 d8, d8, d9
88
+ veor.64 d10, d10, d11
89
+ veor.64 d3, d8, d26
90
+
91
+ vadd.u64 q4, q1, q1
92
+ veor.64 d4, d10, d27
93
+ vmov.64 d0, d5
94
+ vsri.64 q4, q1, #63
95
+
96
+ vadd.u64 q5, q2, q2
97
+ veor.64 q4, q4, q0
98
+ vsri.64 q5, q2, #63
99
+ vadd.u64 d7, d1, d1
100
+ veor.64 \argA2, \argA2, d8
101
+ veor.64 q5, q5, q1
102
+
103
+ vsri.64 d7, d1, #63
104
+ vshl.u64 d1, \argA2, #44
105
+ veor.64 \argA3, \argA3, d9
106
+ veor.64 d7, d7, d4
107
+
108
+ @ Ba = argA1^Da
109
+ @ Be = ROL64((argA2^De), 44)
110
+ @ Bi = ROL64((argA3^Di), 43)
111
+ @ Bo = ROL64((argA4^Do), 21)
112
+ @ Bu = ROL64((argA5^Du), 14)
113
+ @ argA2 = Be ^((~Bi)& Bo )
114
+ @ argA3 = Bi ^((~Bo)& Bu )
115
+ @ argA4 = Bo ^((~Bu)& Ba )
116
+ @ argA5 = Bu ^((~Ba)& Be )
117
+ @ argA1 = Ba ^((~Be)& Bi )
118
+ @ argA1 ^= KeccakP1600RoundConstants[i+round]
119
+ vsri.64 d1, \argA2, #64-44
120
+ vshl.u64 d2, \argA3, #43
121
+ vldr.64 d0, [r0, #\argA1]
122
+ veor.64 \argA4, \argA4, d10
123
+ vsri.64 d2, \argA3, #64-43
124
+ vshl.u64 d3, \argA4, #21
125
+ veor.64 \argA5, \argA5, d11
126
+ veor.64 d0, d0, d7
127
+ vsri.64 d3, \argA4, #64-21
128
+ vbic.64 d5, d2, d1
129
+ vshl.u64 d4, \argA5, #14
130
+ vbic.64 \argA2, d3, d2
131
+ vld1.64 d6, [r1]!
132
+ veor.64 d5, d0
133
+ vsri.64 d4, \argA5, #64-14
134
+ veor.64 d5, d6
135
+ vbic.64 \argA5, d1, d0
136
+ vbic.64 \argA3, d4, d3
137
+ vbic.64 \argA4, d0, d4
138
+ veor.64 \argA2, d1
139
+ vstr.64 d5, [r0, #\argA1]
140
+ veor.64 \argA3, d2
141
+ veor.64 \argA4, d3
142
+ veor.64 \argA5, d4
143
+ .endm
144
+
145
+ .macro KeccakS_ThetaRhoPiChi1 argA1, argA2, argA3, argA4, argA5
146
+
147
+ @ Bi = ROL64((argA1^Da), 3)
148
+ @ Bo = ROL64((argA2^De), 45)
149
+ @ Bu = ROL64((argA3^Di), 61)
150
+ @ Ba = ROL64((argA4^Do), 28)
151
+ @ Be = ROL64((argA5^Du), 20)
152
+ @ argA1 = Ba ^((~Be)& Bi )
153
+ @ Ca ^= argA1
154
+ @ argA2 = Be ^((~Bi)& Bo )
155
+ @ argA3 = Bi ^((~Bo)& Bu )
156
+ @ argA4 = Bo ^((~Bu)& Ba )
157
+ @ argA5 = Bu ^((~Ba)& Be )
158
+ veor.64 \argA2, \argA2, d8
159
+ veor.64 \argA3, \argA3, d9
160
+ vshl.u64 d3, \argA2, #45
161
+ vldr.64 d6, [r0, #\argA1]
162
+ vshl.u64 d4, \argA3, #61
163
+ veor.64 \argA4, \argA4, d10
164
+ vsri.64 d3, \argA2, #64-45
165
+ veor.64 \argA5, \argA5, d11
166
+ vsri.64 d4, \argA3, #64-61
167
+ vshl.u64 d0, \argA4, #28
168
+ veor.64 d6, d6, d7
169
+ vshl.u64 d1, \argA5, #20
170
+ vbic.64 \argA3, d4, d3
171
+ vsri.64 d0, \argA4, #64-28
172
+ vbic.64 \argA4, d0, d4
173
+ vshl.u64 d2, d6, #3
174
+ vsri.64 d1, \argA5, #64-20
175
+ veor.64 \argA4, d3
176
+ vsri.64 d2, d6, #64-3
177
+ vbic.64 \argA5, d1, d0
178
+ vbic.64 d6, d2, d1
179
+ vbic.64 \argA2, d3, d2
180
+ veor.64 d6, d0
181
+ veor.64 \argA2, d1
182
+ vstr.64 d6, [r0, #\argA1]
183
+ veor.64 \argA3, d2
184
+ veor.64 d5, d6
185
+ veor.64 \argA5, d4
186
+ .endm
187
+
188
+ .macro KeccakS_ThetaRhoPiChi2 argA1, argA2, argA3, argA4, argA5
189
+
190
+ @ Bu = ROL64((argA1^Da), 18)
191
+ @ Ba = ROL64((argA2^De), 1)
192
+ @ Be = ROL64((argA3^Di), 6)
193
+ @ Bi = ROL64((argA4^Do), 25)
194
+ @ Bo = ROL64((argA5^Du), 8)
195
+ @ argA1 = Ba ^((~Be)& Bi )
196
+ @ Ca ^= argA1@
197
+ @ argA2 = Be ^((~Bi)& Bo )
198
+ @ argA3 = Bi ^((~Bo)& Bu )
199
+ @ argA4 = Bo ^((~Bu)& Ba )
200
+ @ argA5 = Bu ^((~Ba)& Be )
201
+ veor.64 \argA3, \argA3, d9
202
+ veor.64 \argA4, \argA4, d10
203
+ vshl.u64 d1, \argA3, #6
204
+ vldr.64 d6, [r0, #\argA1]
205
+ vshl.u64 d2, \argA4, #25
206
+ veor.64 \argA5, \argA5, d11
207
+ vsri.64 d1, \argA3, #64-6
208
+ veor.64 \argA2, \argA2, d8
209
+ vsri.64 d2, \argA4, #64-25
210
+ vext.8 d3, \argA5, \argA5, #7
211
+ veor.64 d6, d6, d7
212
+ vbic.64 \argA3, d2, d1
213
+ vadd.u64 d0, \argA2, \argA2
214
+ vbic.64 \argA4, d3, d2
215
+ vsri.64 d0, \argA2, #64-1
216
+ vshl.u64 d4, d6, #18
217
+ veor.64 \argA2, d1, \argA4
218
+ veor.64 \argA3, d0
219
+ vsri.64 d4, d6, #64-18
220
+ vstr.64 \argA3, [r0, #\argA1]
221
+ veor.64 d5, \argA3
222
+ vbic.64 \argA5, d1, d0
223
+ vbic.64 \argA3, d4, d3
224
+ vbic.64 \argA4, d0, d4
225
+ veor.64 \argA3, d2
226
+ veor.64 \argA4, d3
227
+ veor.64 \argA5, d4
228
+ .endm
229
+
230
+ .macro KeccakS_ThetaRhoPiChi3 argA1, argA2, argA3, argA4, argA5
231
+
232
+ @ Be = ROL64((argA1^Da), 36)
233
+ @ Bi = ROL64((argA2^De), 10)
234
+ @ Bo = ROL64((argA3^Di), 15)
235
+ @ Bu = ROL64((argA4^Do), 56)
236
+ @ Ba = ROL64((argA5^Du), 27)
237
+ @ argA1 = Ba ^((~Be)& Bi )
238
+ @ Ca ^= argA1
239
+ @ argA2 = Be ^((~Bi)& Bo )
240
+ @ argA3 = Bi ^((~Bo)& Bu )
241
+ @ argA4 = Bo ^((~Bu)& Ba )
242
+ @ argA5 = Bu ^((~Ba)& Be )
243
+ veor.64 \argA2, \argA2, d8
244
+ veor.64 \argA3, \argA3, d9
245
+ vshl.u64 d2, \argA2, #10
246
+ vldr.64 d6, [r0, #\argA1]
247
+ vshl.u64 d3, \argA3, #15
248
+ veor.64 \argA4, \argA4, d10
249
+ vsri.64 d2, \argA2, #64-10
250
+ vsri.64 d3, \argA3, #64-15
251
+ veor.64 \argA5, \argA5, d11
252
+ vext.8 d4, \argA4, \argA4, #1
253
+ vbic.64 \argA2, d3, d2
254
+ vshl.u64 d0, \argA5, #27
255
+ veor.64 d6, d6, d7
256
+ vbic.64 \argA3, d4, d3
257
+ vsri.64 d0, \argA5, #64-27
258
+ vshl.u64 d1, d6, #36
259
+ veor.64 \argA3, d2
260
+ vbic.64 \argA4, d0, d4
261
+ vsri.64 d1, d6, #64-36
262
+ veor.64 \argA4, d3
263
+ vbic.64 d6, d2, d1
264
+ vbic.64 \argA5, d1, d0
265
+ veor.64 d6, d0
266
+ veor.64 \argA2, d1
267
+ vstr.64 d6, [r0, #\argA1]
268
+ veor.64 d5, d6
269
+ veor.64 \argA5, d4
270
+ .endm
271
+
272
+ .macro KeccakS_ThetaRhoPiChi4 argA1, argA2, argA3, argA4, argA5
273
+
274
+ @ Bo = ROL64((argA1^Da), 41)
275
+ @ Bu = ROL64((argA2^De), 2)
276
+ @ Ba = ROL64((argA3^Di), 62)
277
+ @ Be = ROL64((argA4^Do), 55)
278
+ @ Bi = ROL64((argA5^Du), 39)
279
+ @ argA1 = Ba ^((~Be)& Bi )
280
+ @ Ca ^= argA1
281
+ @ argA2 = Be ^((~Bi)& Bo )
282
+ @ argA3 = Bi ^((~Bo)& Bu )
283
+ @ argA4 = Bo ^((~Bu)& Ba )
284
+ @ argA5 = Bu ^((~Ba)& Be )
285
+ veor.64 \argA2, \argA2, d8
286
+ veor.64 \argA3, \argA3, d9
287
+ vshl.u64 d4, \argA2, #2
288
+ veor.64 \argA5, \argA5, d11
289
+ vshl.u64 d0, \argA3, #62
290
+ vldr.64 d6, [r0, #\argA1]
291
+ vsri.64 d4, \argA2, #64-2
292
+ veor.64 \argA4, \argA4, d10
293
+ vsri.64 d0, \argA3, #64-62
294
+ vshl.u64 d1, \argA4, #55
295
+ veor.64 d6, d6, d7
296
+ vshl.u64 d2, \argA5, #39
297
+ vsri.64 d1, \argA4, #64-55
298
+ vbic.64 \argA4, d0, d4
299
+ vsri.64 d2, \argA5, #64-39
300
+ vbic.64 \argA2, d1, d0
301
+ vshl.u64 d3, d6, #41
302
+ veor.64 \argA5, d4, \argA2
303
+ vbic.64 \argA2, d2, d1
304
+ vsri.64 d3, d6, #64-41
305
+ veor.64 d6, d0, \argA2
306
+ vbic.64 \argA2, d3, d2
307
+ vbic.64 \argA3, d4, d3
308
+ veor.64 \argA2, d1
309
+ vstr.64 d6, [r0, #\argA1]
310
+ veor.64 d5, d6
311
+ veor.64 \argA3, d2
312
+ veor.64 \argA4, d3
313
+ .endm
314
+
315
+ @ --- macros for Parallel permutation
316
+
317
+ .macro m_pls start
318
+ .if \start != -1
319
+ add r3, r0, #\start
320
+ .endif
321
+ .endm
322
+
323
+ .macro m_ld qreg, next
324
+ .if \next == 16
325
+ vld1.64 { \qreg }, [r3:128]!
326
+ .else
327
+ vld1.64 { \qreg }, [r3:128], r4
328
+ .endif
329
+ .endm
330
+
331
+ .macro m_st qreg, next
332
+ .if \next == 16
333
+ vst1.64 { \qreg }, [r3:128]!
334
+ .else
335
+ vst1.64 { \qreg }, [r3:128], r4
336
+ .endif
337
+ .endm
338
+
339
+ .macro KeccakP_ThetaRhoPiChiIota ofs1, ofs2, ofs3, ofs4, ofs5, next, ofsn1
340
+
341
+ @ De = Ca ^ ROL64(Ci, 1)
342
+ @ Di = Ce ^ ROL64(Co, 1)
343
+ @ Do = Ci ^ ROL64(Cu, 1)
344
+ @ Du = Co ^ ROL64(Ca, 1)
345
+ @ Da = Cu ^ ROL64(Ce, 1)
346
+ vadd.u64 q6, q2, q2
347
+ vadd.u64 q7, q3, q3
348
+ vadd.u64 q8, q4, q4
349
+ vadd.u64 q9, q0, q0
350
+ vadd.u64 q5, q1, q1
351
+
352
+ vsri.64 q6, q2, #63
353
+ vsri.64 q7, q3, #63
354
+ vsri.64 q8, q4, #63
355
+ vsri.64 q9, q0, #63
356
+ vsri.64 q5, q1, #63
357
+
358
+ veor.64 q6, q6, q0
359
+ veor.64 q7, q7, q1
360
+ veor.64 q8, q8, q2
361
+ .if \next != 16
362
+ mov r4, #\next
363
+ .endif
364
+ veor.64 q9, q9, q3
365
+ veor.64 q5, q5, q4
366
+
367
+ @ Ba = argA1^Da
368
+ @ Be = ROL64(argA2^De, 44)
369
+ @ Bi = ROL64(argA3^Di, 43)
370
+ @ Bo = ROL64(argA4^Do, 21)
371
+ @ Bu = ROL64(argA5^Du, 14)
372
+ m_ld q10, \next
373
+ m_pls \ofs2
374
+ m_ld q1, \next
375
+ m_pls \ofs3
376
+ veor.64 q10, q10, q5
377
+ m_ld q2, \next
378
+ m_pls \ofs4
379
+ veor.64 q1, q1, q6
380
+ m_ld q3, \next
381
+ m_pls \ofs5
382
+ veor.64 q2, q2, q7
383
+ m_ld q4, \next
384
+ veor.64 q3, q3, q8
385
+ mov r6, r5
386
+ veor.64 q4, q4, q9
387
+
388
+ vst1.64 { q6 }, [r6:128]!
389
+ vshl.u64 q11, q1, #44
390
+ vshl.u64 q12, q2, #43
391
+ vst1.64 { q7 }, [r6:128]!
392
+ vshl.u64 q13, q3, #21
393
+ vshl.u64 q14, q4, #14
394
+ vst1.64 { q8 }, [r6:128]!
395
+ vsri.64 q11, q1, #64-44
396
+ vsri.64 q12, q2, #64-43
397
+ vst1.64 { q9 }, [r6:128]!
398
+ vsri.64 q13, q3, #64-21
399
+ vsri.64 q14, q4, #64-14
400
+
401
+ @ argA1 = Ba ^(~Be & Bi) ^ KeccakP1600RoundConstants[round]
402
+ @ argA2 = Be ^(~Bi & Bo)
403
+ @ argA3 = Bi ^(~Bo & Bu)
404
+ @ argA4 = Bo ^(~Bu & Ba)
405
+ @ argA5 = Bu ^(~Ba & Be)
406
+ vld1.64 { d30 }, [r1:64]
407
+ vbic.64 q0, q12, q11
408
+ vbic.64 q1, q13, q12
409
+ vld1.64 { d31 }, [r1:64]!
410
+ veor.64 q0, q10
411
+ vbic.64 q4, q11, q10
412
+ veor.64 q0, q15
413
+ vbic.64 q2, q14, q13
414
+ vbic.64 q3, q10, q14
415
+
416
+ m_pls \ofs1
417
+ veor.64 q1, q11
418
+ m_st q0, \next
419
+ m_pls \ofs2
420
+ veor.64 q2, q12
421
+ m_st q1, \next
422
+ m_pls \ofs3
423
+ veor.64 q3, q13
424
+ m_st q2, \next
425
+ m_pls \ofs4
426
+ veor.64 q4, q14
427
+ m_st q3, \next
428
+ m_pls \ofs5
429
+ m_st q4, \next
430
+ m_pls \ofsn1
431
+ .endm
432
+
433
+ .macro KeccakP_ThetaRhoPiChi ofs1, ofs2, ofs3, ofs4, ofs5, next, ofsn1, Bb1, Bb2, Bb3, Bb4, Bb5, Rr1, Rr2, Rr3, Rr4, Rr5
434
+
435
+ @ Bb1 = ROL64((argA1^Da), Rr1)
436
+ @ Bb2 = ROL64((argA2^De), Rr2)
437
+ @ Bb3 = ROL64((argA3^Di), Rr3)
438
+ @ Bb4 = ROL64((argA4^Do), Rr4)
439
+ @ Bb5 = ROL64((argA5^Du), Rr5)
440
+
441
+ .if \next != 16
442
+ mov r4, #\next
443
+ .endif
444
+
445
+ m_ld \Bb1, \next
446
+ m_pls \ofs2
447
+ m_ld \Bb2, \next
448
+ m_pls \ofs3
449
+ veor.64 q15, q5, \Bb1
450
+ m_ld \Bb3, \next
451
+ m_pls \ofs4
452
+ veor.64 q6, q6, \Bb2
453
+ m_ld \Bb4, \next
454
+ m_pls \ofs5
455
+ veor.64 q7, q7, \Bb3
456
+ m_ld \Bb5, \next
457
+ veor.64 q8, q8, \Bb4
458
+ veor.64 q9, q9, \Bb5
459
+
460
+ vshl.u64 \Bb1, q15, #\Rr1
461
+ vshl.u64 \Bb2, q6, #\Rr2
462
+ vshl.u64 \Bb3, q7, #\Rr3
463
+ vshl.u64 \Bb4, q8, #\Rr4
464
+ vshl.u64 \Bb5, q9, #\Rr5
465
+
466
+ vsri.64 \Bb1, q15, #64-\Rr1
467
+ vsri.64 \Bb2, q6, #64-\Rr2
468
+ vsri.64 \Bb3, q7, #64-\Rr3
469
+ vsri.64 \Bb4, q8, #64-\Rr4
470
+ vsri.64 \Bb5, q9, #64-\Rr5
471
+
472
+ @ argA1 = Ba ^((~Be)& Bi ), Ca ^= argA1
473
+ @ argA2 = Be ^((~Bi)& Bo ), Ce ^= argA2
474
+ @ argA3 = Bi ^((~Bo)& Bu ), Ci ^= argA3
475
+ @ argA4 = Bo ^((~Bu)& Ba ), Co ^= argA4
476
+ @ argA5 = Bu ^((~Ba)& Be ), Cu ^= argA5
477
+ vbic.64 q15, q12, q11
478
+ mov r6, r5
479
+ vbic.64 q6, q13, q12
480
+ m_pls \ofs1
481
+ vbic.64 q7, q14, q13
482
+ vbic.64 q8, q10, q14
483
+ vbic.64 q9, q11, q10
484
+
485
+ veor.64 q15, q15, q10
486
+ veor.64 q6, q6, q11
487
+
488
+ m_st q15, \next
489
+ m_pls \ofs2
490
+ veor.64 q7, q7, q12
491
+
492
+ m_st q6, \next
493
+ m_pls \ofs3
494
+ veor.64 q1, q1, q6
495
+ vld1.64 { q6 }, [r6:128]!
496
+ veor.64 q8, q8, q13
497
+
498
+ m_st q7, \next
499
+ m_pls \ofs4
500
+ veor.64 q2, q2, q7
501
+ vld1.64 { q7 }, [r6:128]!
502
+ veor.64 q9, q9, q14
503
+
504
+ m_st q8, \next
505
+ m_pls \ofs5
506
+ veor.64 q3, q3, q8
507
+
508
+ m_st q9, \next
509
+
510
+ vld1.64 { q8 }, [r6:128]!
511
+ veor.64 q4, q4, q9
512
+ m_pls \ofsn1
513
+ vld1.64 { q9 }, [r6:128]!
514
+ veor.64 q0, q0, q15
515
+ .endm
516
+
517
+ .macro KeccakP_ThetaRhoPiChi1 ofs1, ofs2, ofs3, ofs4, ofs5, next, ofsn1
518
+ KeccakP_ThetaRhoPiChi \ofs1, \ofs2, \ofs3, \ofs4, \ofs5, \next, \ofsn1, q12, q13, q14, q10, q11, 3, 45, 61, 28, 20
519
+ .endm
520
+
521
+ .macro KeccakP_ThetaRhoPiChi2 ofs1, ofs2, ofs3, ofs4, ofs5, next, ofsn1
522
+ KeccakP_ThetaRhoPiChi \ofs1, \ofs2, \ofs3, \ofs4, \ofs5, \next, \ofsn1, q14, q10, q11, q12, q13, 18, 1, 6, 25, 8
523
+ .endm
524
+
525
+ .macro KeccakP_ThetaRhoPiChi3 ofs1, ofs2, ofs3, ofs4, ofs5, next, ofsn1
526
+ KeccakP_ThetaRhoPiChi \ofs1, \ofs2, \ofs3, \ofs4, \ofs5, \next, \ofsn1, q11, q12, q13, q14, q10, 36, 10, 15, 56, 27
527
+ .endm
528
+
529
+ .macro KeccakP_ThetaRhoPiChi4 ofs1, ofs2, ofs3, ofs4, ofs5, next, ofsn1
530
+
531
+ @ Bo = ROL64((argA1^Da), 41)
532
+ @ Bu = ROL64((argA2^De), 2)
533
+ @ Ba = ROL64((argA3^Di), 62)
534
+ @ Be = ROL64((argA4^Do), 55)
535
+ @ Bi = ROL64((argA5^Du), 39)
536
+ @ KeccakChi
537
+
538
+ .if \next != 16
539
+ mov r4, #\next
540
+ .endif
541
+
542
+ m_ld q13, \next
543
+ m_pls \ofs2
544
+ m_ld q14, \next
545
+ m_pls \ofs3
546
+ veor.64 q5, q5, q13
547
+ m_ld q10, \next
548
+ m_pls \ofs4
549
+ veor.64 q6, q6, q14
550
+ m_ld q11, \next
551
+ m_pls \ofs5
552
+ veor.64 q7, q7, q10
553
+ m_ld q12, \next
554
+ veor.64 q8, q8, q11
555
+ veor.64 q9, q9, q12
556
+
557
+ vshl.u64 q13, q5, #41
558
+ vshl.u64 q14, q6, #2
559
+ vshl.u64 q10, q7, #62
560
+ vshl.u64 q11, q8, #55
561
+ vshl.u64 q12, q9, #39
562
+
563
+ vsri.64 q13, q5, #64-41
564
+ vsri.64 q14, q6, #64-2
565
+ vsri.64 q11, q8, #64-55
566
+ vsri.64 q12, q9, #64-39
567
+ vsri.64 q10, q7, #64-62
568
+
569
+ vbic.64 q5, q12, q11
570
+ vbic.64 q6, q13, q12
571
+ vbic.64 q7, q14, q13
572
+ vbic.64 q8, q10, q14
573
+ vbic.64 q9, q11, q10
574
+ veor.64 q5, q5, q10
575
+ veor.64 q6, q6, q11
576
+ veor.64 q7, q7, q12
577
+ veor.64 q8, q8, q13
578
+ m_pls \ofs1
579
+ veor.64 q9, q9, q14
580
+ m_st q5, \next
581
+ m_pls \ofs2
582
+ veor.64 q0, q0, q5
583
+ m_st q6, \next
584
+ m_pls \ofs3
585
+ veor.64 q1, q1, q6
586
+ m_st q7, \next
587
+ m_pls \ofs4
588
+ veor.64 q2, q2, q7
589
+ m_st q8, \next
590
+ m_pls \ofs5
591
+ veor.64 q3, q3, q8
592
+ m_st q9, \next
593
+ m_pls \ofsn1
594
+ veor.64 q4, q4, q9
595
+ .endm
596
+
597
+ @----------------------------------------------------------------------------
598
+ @
599
+ @ void KeccakP1600times2_StaticInitialize( void )
600
+ @
601
+ .align 8
602
+ .global KeccakP1600times2_StaticInitialize
603
+ .type KeccakP1600times2_StaticInitialize, %function;
604
+ KeccakP1600times2_StaticInitialize:
605
+ bx lr
606
+
607
+
608
+ @----------------------------------------------------------------------------
609
+ @
610
+ @ void KeccakP1600times2_InitializeAll( void *states )
611
+ @
612
+ .align 8
613
+ .global KeccakP1600times2_InitializeAll
614
+ .type KeccakP1600times2_InitializeAll, %function;
615
+ KeccakP1600times2_InitializeAll:
616
+ vmov.i64 q0, #0
617
+ vmov.i64 q1, #0
618
+ vmov.i64 q2, #0
619
+ vmov.i64 q3, #0
620
+ vstm r0!, { d0 - d7 } @ 8 (clear 8 lanes at a time)
621
+ vstm r0!, { d0 - d7 } @ 16
622
+ vstm r0!, { d0 - d7 } @ 24
623
+ vstm r0!, { d0 - d7 } @ 32
624
+ vstm r0!, { d0 - d7 } @ 40
625
+ vstm r0!, { d0 - d7 } @ 48
626
+ vstm r0!, { d0 - d1} @ 50
627
+ bx lr
628
+
629
+
630
+
631
+ @----------------------------------------------------------------------------
632
+ @
633
+ @ void KeccakP1600times2_AddByte( void *states, unsigned int instanceIndex, unsigned char byte, unsigned int offset )
634
+ @
635
+ .align 8
636
+ .global KeccakP1600times2_AddByte
637
+ .type KeccakP1600times2_AddByte, %function;
638
+ KeccakP1600times2_AddByte:
639
+ add r0, r0, r1, LSL #3 @ states += 8 * instanceIndex
640
+ lsr r1, r3, #3 @ states += (offset & ~7) * 2
641
+ add r0, r0, r1, LSL #4
642
+ and r3, r3, #7
643
+ add r0, r0, r3 @ states += offset & 7
644
+ ldrb r1, [r0]
645
+ eor r1, r1, r2
646
+ strb r1, [r0]
647
+ bx lr
648
+
649
+
650
+ @----------------------------------------------------------------------------
651
+ @
652
+ @ void KeccakP1600times2_AddBytes( void *states, unsigned int instanceIndex, const unsigned char *data,
653
+ @ unsigned int offset, unsigned int length )
654
+ @
655
+ .align 8
656
+ .global KeccakP1600times2_AddBytes
657
+ .type KeccakP1600times2_AddBytes, %function;
658
+ KeccakP1600times2_AddBytes:
659
+ add r0, r0, r1, LSL #3 @ states += 8 * instanceIndex
660
+ ldr r1, [sp, #0*4] @ r1 = length
661
+ cmp r1, #0
662
+ beq KeccakP1600times2_AddBytes_Exit
663
+ push { r4- r7 }
664
+ lsr r4, r3, #3 @ states += (offset & ~7) * 2
665
+ add r0, r0, r4, LSL #4
666
+ ands r3, r3, #7 @ .if (offset & 7) != 0
667
+ beq KeccakP1600times2_AddBytes_CheckLanes
668
+ add r0, r0, r3 @ states += offset & 7
669
+ rsb r3, r3, #8 @ lenInLane = 8 - (offset & 7)
670
+ KeccakP1600times2_AddBytes_LoopBytesFirst:
671
+ ldrb r4, [r0]
672
+ ldrb r5, [r2], #1
673
+ eor r4, r4, r5
674
+ subs r1, r1, #1
675
+ strb r4, [r0], #1
676
+ beq KeccakP1600times2_AddBytes_Done
677
+ subs r3, r3, #1
678
+ bne KeccakP1600times2_AddBytes_LoopBytesFirst
679
+ add r0, r0, #8 @ states += 8 (next lane of current state part)
680
+ KeccakP1600times2_AddBytes_CheckLanes:
681
+ lsrs r3, r1, #3
682
+ beq KeccakP1600times2_AddBytes_CheckBytesLast
683
+ KeccakP1600times2_AddBytes_LoopLanes:
684
+ ldr r4, [r0]
685
+ ldr r5, [r0, #4]
686
+ ldr r6, [r2], #4
687
+ ldr r7, [r2], #4
688
+ eor r4, r4, r6
689
+ eor r5, r5, r7
690
+ subs r3, r3, #1
691
+ str r4, [r0], #4
692
+ str r5, [r0], #12 @ states += 8 (next lane of current state part)
693
+ bne KeccakP1600times2_AddBytes_LoopLanes
694
+ KeccakP1600times2_AddBytes_CheckBytesLast:
695
+ ands r1, r1, #7
696
+ beq KeccakP1600times2_AddBytes_Done
697
+ KeccakP1600times2_AddBytes_LoopBytesLast:
698
+ ldrb r4, [r0]
699
+ ldrb r5, [r2], #1
700
+ eor r4, r4, r5
701
+ subs r1, r1, #1
702
+ strb r4, [r0], #1
703
+ bne KeccakP1600times2_AddBytes_LoopBytesLast
704
+ KeccakP1600times2_AddBytes_Done:
705
+ pop { r4- r7 }
706
+ KeccakP1600times2_AddBytes_Exit:
707
+ bx lr
708
+
709
+
710
+ @----------------------------------------------------------------------------
711
+ @
712
+ @ void KeccakP1600times2_AddLanesAll( void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset )
713
+ @
714
+ .align 8
715
+ .global KeccakP1600times2_AddLanesAll
716
+ .type KeccakP1600times2_AddLanesAll, %function;
717
+ KeccakP1600times2_AddLanesAll:
718
+ cmp r2, #0
719
+ beq KeccakP1600times2_AddLanesAll_Exit
720
+ add r3, r1, r3, LSL #3 @ r3: data + 8 * laneOffset
721
+ push {r4 - r7}
722
+ KeccakP1600times2_AddLanesAll_Loop:
723
+ ldr r4, [r1], #4 @ index 0
724
+ ldr r5, [r1], #4
725
+ ldrd r6, r7, [r0]
726
+ eor r6, r6, r4
727
+ eor r7, r7, r5
728
+ strd r6, r7, [r0], #8
729
+ ldr r4, [r3], #4 @ index 1
730
+ ldr r5, [r3], #4
731
+ ldrd r6, r7, [r0]
732
+ eor r6, r6, r4
733
+ eor r7, r7, r5
734
+ strd r6, r7, [r0], #8
735
+ subs r2, r2, #1
736
+ bne KeccakP1600times2_AddLanesAll_Loop
737
+ pop {r4 - r7}
738
+ KeccakP1600times2_AddLanesAll_Exit:
739
+ bx lr
740
+
741
+
742
+ @----------------------------------------------------------------------------
743
+ @
744
+ @ void KeccakP1600times2_OverwriteBytes( void *states, unsigned int instanceIndex, const unsigned char *data,
745
+ @ unsigned int offset, unsigned int length )
746
+ @
747
+ .align 8
748
+ .global KeccakP1600times2_OverwriteBytes
749
+ .type KeccakP1600times2_OverwriteBytes, %function;
750
+ KeccakP1600times2_OverwriteBytes:
751
+ add r0, r0, r1, LSL #3 @ states += 8 * instanceIndex
752
+ ldr r1, [sp, #0*4] @ r1 = length
753
+ cmp r1, #0
754
+ beq KeccakP1600times2_OverwriteBytes_Exit
755
+ push { r4-r5 }
756
+ lsr r4, r3, #3 @ states += (offset & ~7) * 2
757
+ add r0, r0, r4, LSL #4
758
+ ands r3, r3, #7 @ .if (offset & 7) != 0
759
+ beq KeccakP1600times2_OverwriteBytes_CheckLanes
760
+ add r0, r0, r3 @ states += offset & 7
761
+ rsb r3, r3, #8 @ lenInLane = 8 - (offset & 7)
762
+ KeccakP1600times2_OverwriteBytes_LoopBytesFirst:
763
+ ldrb r4, [r2], #1
764
+ strb r4, [r0], #1
765
+ subs r1, r1, #1
766
+ beq KeccakP1600times2_OverwriteBytes_Done
767
+ subs r3, r3, #1
768
+ bne KeccakP1600times2_OverwriteBytes_LoopBytesFirst
769
+ add r0, r0, #8 @ states += 8 (next lane of current state part)
770
+ KeccakP1600times2_OverwriteBytes_CheckLanes:
771
+ lsrs r3, r1, #3
772
+ beq KeccakP1600times2_OverwriteBytes_CheckBytesLast
773
+ KeccakP1600times2_OverwriteBytes_LoopLanes:
774
+ ldr r4, [r2], #4
775
+ ldr r5, [r2], #4
776
+ str r4, [r0], #4
777
+ str r5, [r0], #12 @ states += 8 (next lane of current state part)
778
+ subs r3, r3, #1
779
+ bne KeccakP1600times2_OverwriteBytes_LoopLanes
780
+ KeccakP1600times2_OverwriteBytes_CheckBytesLast:
781
+ ands r1, r1, #7
782
+ beq KeccakP1600times2_OverwriteBytes_Done
783
+ KeccakP1600times2_OverwriteBytes_LoopBytesLast:
784
+ ldrb r4, [r2], #1
785
+ subs r1, r1, #1
786
+ strb r4, [r0], #1
787
+ bne KeccakP1600times2_OverwriteBytes_LoopBytesLast
788
+ KeccakP1600times2_OverwriteBytes_Done:
789
+ pop { r4- r5 }
790
+ KeccakP1600times2_OverwriteBytes_Exit:
791
+ bx lr
792
+
793
+
794
+ @----------------------------------------------------------------------------
795
+ @
796
+ @ KeccakP1600times2_OverwriteLanesAll( void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset )
797
+ @
798
+ .align 8
799
+ .global KeccakP1600times2_OverwriteLanesAll
800
+ .type KeccakP1600times2_OverwriteLanesAll, %function;
801
+ KeccakP1600times2_OverwriteLanesAll:
802
+ cmp r2, #0
803
+ beq KeccakP1600times2_OverwriteLanesAll_Exit
804
+ lsls r12, r1, #32-3
805
+ bne KeccakP1600times2_OverwriteLanesAll_Unaligned
806
+ add r3, r1, r3, LSL #3 @ r3(pointer instance 1): data + 8 * laneOffset
807
+ lsrs r2, r2, #1
808
+ bcc KeccakP1600times2_OverwriteLanesAll_LoopAligned
809
+ vldm r1!, { d0 }
810
+ vldm r3!, { d1 }
811
+ vstm r0!, { d0 - d1 }
812
+ beq KeccakP1600times2_OverwriteLanesAll_Exit
813
+ KeccakP1600times2_OverwriteLanesAll_LoopAligned:
814
+ vldm r1!, { d0 }
815
+ vldm r1!, { d2 }
816
+ vldm r3!, { d1 }
817
+ vldm r3!, { d3 }
818
+ subs r2, r2, #1
819
+ vstm r0!, { d0 - d3 }
820
+ bne KeccakP1600times2_OverwriteLanesAll_LoopAligned
821
+ bx lr
822
+ KeccakP1600times2_OverwriteLanesAll_Unaligned:
823
+ add r3, r1, r3, LSL #3 @ r3(pointer instance 1): data + 8 * laneOffset
824
+ push { r4, r5 }
825
+ KeccakP1600times2_OverwriteLanesAll_LoopUnaligned:
826
+ ldr r4, [r1], #4
827
+ ldr r5, [r1], #4
828
+ strd r4, r5, [r0], #8
829
+ ldr r4, [r3], #4
830
+ ldr r5, [r3], #4
831
+ subs r2, r2, #1
832
+ strd r4, r5, [r0], #8
833
+ bne KeccakP1600times2_OverwriteLanesAll_LoopUnaligned
834
+ pop { r4, r5 }
835
+ KeccakP1600times2_OverwriteLanesAll_Exit:
836
+ bx lr
837
+
838
+
839
+ @----------------------------------------------------------------------------
840
+ @
841
+ @ void KeccakP1600times2_OverwriteWithZeroes( void *states, unsigned int instanceIndex, unsigned int byteCount )
842
+ @
843
+ .align 8
844
+ .global KeccakP1600times2_OverwriteWithZeroes
845
+ .type KeccakP1600times2_OverwriteWithZeroes, %function;
846
+ KeccakP1600times2_OverwriteWithZeroes:
847
+ add r0, r0, r1, LSL #3 @ states += 8 * instanceIndex
848
+ lsrs r1, r2, #3 @ r1: laneCount
849
+ beq KeccakP1600times2_OverwriteWithZeroes_Bytes
850
+ vmov.i64 d0, #0
851
+ KeccakP1600times2_OverwriteWithZeroes_LoopLanes:
852
+ subs r1, r1, #1
853
+ vstm r0!, { d0 }
854
+ add r0, r0, #8
855
+ bne KeccakP1600times2_OverwriteWithZeroes_LoopLanes
856
+ KeccakP1600times2_OverwriteWithZeroes_Bytes:
857
+ ands r2, r2, #7 @ r2: byteCount remaining
858
+ beq KeccakP1600times2_OverwriteWithZeroes_Exit
859
+ movs r3, #0
860
+ KeccakP1600times2_OverwriteWithZeroes_LoopBytes:
861
+ subs r2, r2, #1
862
+ strb r3, [r0], #1
863
+ bne KeccakP1600times2_OverwriteWithZeroes_LoopBytes
864
+ KeccakP1600times2_OverwriteWithZeroes_Exit:
865
+ bx lr
866
+
867
+
868
+ @----------------------------------------------------------------------------
869
+ @
870
+ @ void KeccakP1600times2_ExtractBytes( void *states, unsigned int instanceIndex, const unsigned char *data,
871
+ @ unsigned int offset, unsigned int length )
872
+ @
873
+ .align 8
874
+ .global KeccakP1600times2_ExtractBytes
875
+ .type KeccakP1600times2_ExtractBytes, %function;
876
+ KeccakP1600times2_ExtractBytes:
877
+ add r0, r0, r1, LSL #3 @ states += 8 * instanceIndex
878
+ ldr r1, [sp, #0*4] @ r1 = length
879
+ cmp r1, #0
880
+ beq KeccakP1600times2_ExtractBytes_Exit
881
+ push { r4-r5 }
882
+ lsr r4, r3, #3 @ states += (offset & ~7) * 2
883
+ add r0, r0, r4, LSL #4
884
+ ands r3, r3, #7 @ .if (offset & 7) != 0
885
+ beq KeccakP1600times2_ExtractBytes_CheckLanes
886
+ add r0, r0, r3 @ states += offset & 7
887
+ rsb r3, r3, #8 @ lenInLane = 8 - (offset & 7)
888
+ KeccakP1600times2_ExtractBytes_LoopBytesFirst:
889
+ ldrb r4, [r0], #1
890
+ strb r4, [r2], #1
891
+ subs r1, r1, #1
892
+ beq KeccakP1600times2_ExtractBytes_Done
893
+ subs r3, r3, #1
894
+ bne KeccakP1600times2_ExtractBytes_LoopBytesFirst
895
+ add r0, r0, #8 @ states += 8 (next lane of current state part)
896
+ KeccakP1600times2_ExtractBytes_CheckLanes:
897
+ lsrs r3, r1, #3
898
+ beq KeccakP1600times2_ExtractBytes_CheckBytesLast
899
+ KeccakP1600times2_ExtractBytes_LoopLanes:
900
+ ldr r4, [r0], #4
901
+ ldr r5, [r0], #12 @ states += 8 (next lane of current state part)
902
+ str r4, [r2], #4
903
+ str r5, [r2], #4
904
+ subs r3, r3, #1
905
+ bne KeccakP1600times2_ExtractBytes_LoopLanes
906
+ KeccakP1600times2_ExtractBytes_CheckBytesLast:
907
+ ands r1, r1, #7
908
+ beq KeccakP1600times2_ExtractBytes_Done
909
+ KeccakP1600times2_ExtractBytes_LoopBytesLast:
910
+ ldrb r4, [r0], #1
911
+ subs r1, r1, #1
912
+ strb r4, [r2], #1
913
+ bne KeccakP1600times2_ExtractBytes_LoopBytesLast
914
+ KeccakP1600times2_ExtractBytes_Done:
915
+ pop { r4-r5 }
916
+ KeccakP1600times2_ExtractBytes_Exit:
917
+ bx lr
918
+
919
+
920
+ @----------------------------------------------------------------------------
921
+ @
922
+ @ void KeccakP1600times2_ExtractLanesAll( const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset )
923
+ @
924
+ .align 8
925
+ .global KeccakP1600times2_ExtractLanesAll
926
+ .type KeccakP1600times2_ExtractLanesAll, %function;
927
+ KeccakP1600times2_ExtractLanesAll:
928
+ cmp r2, #0
929
+ beq KeccakP1600times2_ExtractLanesAll_Exit
930
+ lsls r12, r1, #32-3
931
+ bne KeccakP1600times2_ExtractLanesAll_Unaligned
932
+ add r3, r1, r3, LSL #3 @ r3(pointer instance 1): data + 8 * laneOffset
933
+ lsrs r2, r2, #1
934
+ bcc KeccakP1600times2_ExtractLanesAll_LoopAligned
935
+ vldm r0!, { d0 - d1 }
936
+ vstm r1!, { d0 }
937
+ vstm r3!, { d1 }
938
+ beq KeccakP1600times2_ExtractLanesAll_Exit
939
+ KeccakP1600times2_ExtractLanesAll_LoopAligned:
940
+ vldm r0!, { d0 - d3 }
941
+ subs r2, r2, #1
942
+ vstm r1!, { d0 }
943
+ vstm r1!, { d2 }
944
+ vstm r3!, { d1 }
945
+ vstm r3!, { d3 }
946
+ bne KeccakP1600times2_ExtractLanesAll_LoopAligned
947
+ bx lr
948
+ KeccakP1600times2_ExtractLanesAll_Unaligned:
949
+ add r3, r1, r3, LSL #3 @ r3(pointer instance 1): data + 8 * laneOffset
950
+ push { r4, r5 }
951
+ KeccakP1600times2_ExtractLanesAll_LoopUnaligned:
952
+ ldrd r4, r5, [r0], #8
953
+ str r4, [r1], #4
954
+ str r5, [r1], #4
955
+ ldrd r4, r5, [r0], #8
956
+ subs r2, r2, #1
957
+ str r4, [r3], #4
958
+ str r5, [r3], #4
959
+ bne KeccakP1600times2_ExtractLanesAll_LoopUnaligned
960
+ pop { r4, r5 }
961
+ KeccakP1600times2_ExtractLanesAll_Exit:
962
+ bx lr
963
+
964
+
965
+ @----------------------------------------------------------------------------
966
+ @
967
+ @ void KeccakP1600times2_ExtractAndAddBytes( void *states, unsigned int instanceIndex,
968
+ @ const unsigned char *input, unsigned char *output,
969
+ @ unsigned int offset, unsigned int length )
970
+ @
971
+ .align 8
972
+ .global KeccakP1600times2_ExtractAndAddBytes
973
+ .type KeccakP1600times2_ExtractAndAddBytes, %function;
974
+ KeccakP1600times2_ExtractAndAddBytes:
975
+ add r0, r0, r1, LSL #3 @ states += 8 * instanceIndex
976
+ ldr r1, [sp, #1*4] @ r1 = length
977
+ cmp r1, #0
978
+ beq KeccakP1600times2_ExtractAndAddBytes_Exit
979
+ push { r4 - r9 }
980
+ ldr r8, [sp, #6*4] @ r8 = offset
981
+ lsr r4, r8, #3 @ states += (offset & ~7) * 2
982
+ add r0, r0, r4, LSL #4
983
+ ands r8, r8, #7 @ .if (offset & 7) != 0
984
+ beq KeccakP1600times2_ExtractAndAddBytes_CheckLanes
985
+ add r0, r0, r8 @ states += offset & 7
986
+ rsb r8, r8, #8 @ lenInLane = 8 - (offset & 7)
987
+ KeccakP1600times2_ExtractAndAddBytes_LoopBytesFirst:
988
+ ldrb r4, [r0], #1
989
+ ldrb r5, [r2], #1
990
+ eor r4, r4, r5
991
+ strb r4, [r3], #1
992
+ subs r1, r1, #1
993
+ beq KeccakP1600times2_ExtractAndAddBytes_Done
994
+ subs r8, r8, #1
995
+ bne KeccakP1600times2_ExtractAndAddBytes_LoopBytesFirst
996
+ add r0, r0, #8 @ states += 8 (next lane of current state part)
997
+ KeccakP1600times2_ExtractAndAddBytes_CheckLanes:
998
+ lsrs r8, r1, #3
999
+ beq KeccakP1600times2_ExtractAndAddBytes_CheckBytesLast
1000
+ KeccakP1600times2_ExtractAndAddBytes_LoopLanes:
1001
+ ldr r4, [r0], #4
1002
+ ldr r5, [r0], #12
1003
+ ldr r6, [r2], #4
1004
+ ldr r7, [r2], #4
1005
+ eor r4, r4, r6
1006
+ eor r5, r5, r7
1007
+ str r4, [r3], #4
1008
+ str r5, [r3], #4 @ states += 8 (next lane of current state part)
1009
+ subs r8, r8, #1
1010
+ bne KeccakP1600times2_ExtractAndAddBytes_LoopLanes
1011
+ KeccakP1600times2_ExtractAndAddBytes_CheckBytesLast:
1012
+ ands r1, r1, #7
1013
+ beq KeccakP1600times2_ExtractAndAddBytes_Done
1014
+ KeccakP1600times2_ExtractAndAddBytes_LoopBytesLast:
1015
+ ldrb r4, [r0], #1
1016
+ ldrb r5, [r2], #1
1017
+ eor r4, r4, r5
1018
+ strb r4, [r3], #1
1019
+ subs r1, r1, #1
1020
+ bne KeccakP1600times2_ExtractAndAddBytes_LoopBytesLast
1021
+ KeccakP1600times2_ExtractAndAddBytes_Done:
1022
+ pop { r4 - r9 }
1023
+ KeccakP1600times2_ExtractAndAddBytes_Exit:
1024
+ bx lr
1025
+
1026
+
1027
+ @----------------------------------------------------------------------------
1028
+ @
1029
+ @ void KeccakP1600times2_ExtractAndAddLanesAll( const void *states,
1030
+ @ const unsigned char *input, unsigned char *output,
1031
+ @ unsigned int laneCount, unsigned int laneOffset )
1032
+ @
1033
+ .align 8
1034
+ .global KeccakP1600times2_ExtractAndAddLanesAll
1035
+ .type KeccakP1600times2_ExtractAndAddLanesAll, %function;
1036
+ KeccakP1600times2_ExtractAndAddLanesAll:
1037
+ cmp r3, #0
1038
+ beq KeccakP1600times2_ExtractAndAddLanesAll_Exit
1039
+ orr r12, r1, r2
1040
+ lsls r12, r12, #32-3 @ unaligned access .if input or output unaligned
1041
+ bne KeccakP1600times2_ExtractAndAddLanesAll_Unaligned
1042
+ push {r4,r5}
1043
+ ldr r12, [sp, #2*4] @ r12 = laneOffset
1044
+ lsrs r3, r3, #1
1045
+ add r4, r1, r12, LSL #3 @ r4(input instance 1): input + 8 * laneOffset
1046
+ add r5, r2, r12, LSL #3 @ r5(output instance 1): output + 8 * laneOffset
1047
+ bcc KeccakP1600times2_ExtractAndAddLanesAll_LoopAligned
1048
+ vldm r0!, { d0 - d1 }
1049
+ vldm r1!, { d2 }
1050
+ vldm r4!, { d3 }
1051
+ veor q0, q0, q1
1052
+ vstm r2!, { d0 }
1053
+ vstm r5!, { d1 }
1054
+ beq KeccakP1600times2_ExtractAndAddLanesAll_AlignedDone
1055
+ KeccakP1600times2_ExtractAndAddLanesAll_LoopAligned:
1056
+ vldm r0!, { d0 - d3 }
1057
+ vldm r1!, { d4 }
1058
+ vldm r1!, { d6 }
1059
+ vldm r4!, { d5 }
1060
+ vldm r4!, { d7 }
1061
+ subs r3, r3, #1
1062
+ veor q0, q0, q2
1063
+ veor q1, q1, q3
1064
+ vstm r2!, { d0 }
1065
+ vstm r2!, { d2 }
1066
+ vstm r5!, { d1 }
1067
+ vstm r5!, { d3 }
1068
+ bne KeccakP1600times2_ExtractAndAddLanesAll_LoopAligned
1069
+ KeccakP1600times2_ExtractAndAddLanesAll_AlignedDone:
1070
+ pop {r4,r5}
1071
+ bx lr
1072
+ KeccakP1600times2_ExtractAndAddLanesAll_Unaligned:
1073
+ push {r4-r9}
1074
+ ldr r12, [sp, #6*4] @ r12 = laneOffset
1075
+ add r4, r1, r12, LSL #3 @ r4(input instance 1): input + 8 * laneOffset
1076
+ add r5, r2, r12, LSL #3 @ r5(output instance 1): output + 8 * laneOffset
1077
+ KeccakP1600times2_ExtractAndAddLanesAll_LoopUnaligned:
1078
+ ldrd r8, r9, [r0], #8
1079
+ ldr r6, [r1], #4
1080
+ ldr r7, [r1], #4
1081
+ eor r8, r8, r6
1082
+ eor r9, r9, r7
1083
+ str r8, [r2], #4
1084
+ str r9, [r2], #4
1085
+ ldrd r8, r9, [r0], #8
1086
+ ldr r6, [r4], #4
1087
+ ldr r7, [r4], #4
1088
+ eor r8, r8, r6
1089
+ eor r9, r9, r7
1090
+ str r8, [r5], #4
1091
+ subs r3, r3, #1
1092
+ str r9, [r5], #4
1093
+ bne KeccakP1600times2_ExtractAndAddLanesAll_LoopUnaligned
1094
+ pop { r4 - r9 }
1095
+ KeccakP1600times2_ExtractAndAddLanesAll_Exit:
1096
+ bx lr
1097
+
1098
+
1099
+ @----------------------------------------------------------------------------
1100
+ @
1101
+ @ void KeccakP1600times2_PermuteAll_6rounds( void *states )
1102
+ @
1103
+ .align 8
1104
+ .global KeccakP1600times2_PermuteAll_6rounds
1105
+ .type KeccakP1600times2_PermuteAll_6rounds, %function;
1106
+ KeccakP1600times2_PermuteAll_6rounds:
1107
+ adr r1, KeccakP1600times2_Permute_RoundConstants6
1108
+ movs r2, #6+2
1109
+ vpush {q4-q7}
1110
+ push {r4-r7}
1111
+ sub sp, #4*2*8+8 @allocate 4 D double lanes (plus 8bytes to allow alignment on 16 bytes)
1112
+ add r5, sp, #8
1113
+
1114
+ @ ba
1115
+ @ be = me, me = be
1116
+ @ bi = gi, gi = bi
1117
+ @ bo = so, so = bo
1118
+ @ bu = ku, ku = bu
1119
+
1120
+ @ ga = sa, sa = ga
1121
+ @ ge = ke, ke = ge
1122
+ @ go = mo, mo = go
1123
+ @ gu
1124
+
1125
+ @ ka = ma, ma = ka
1126
+ @ ki = si, si = ki
1127
+ @ ko
1128
+
1129
+ @ mu = su, su = mu
1130
+ @ mi
1131
+ @ se
1132
+
1133
+ @PrepareTheta
1134
+ @ Ca = ba ^ ga ^ ka ^ ma ^ sa
1135
+ @ Ce = be ^ ge ^ ke ^ me ^ se
1136
+ @ Ci = bi ^ gi ^ ki ^ mi ^ si
1137
+ @ Co = bo ^ go ^ ko ^ mo ^ so
1138
+ @ Cu = bu ^ gu ^ ku ^ mu ^ su
1139
+ vldm r0, { q0 - q4 } @ ba be bi bo bu
1140
+ bic r5, #15
1141
+ add r3, r0, #_me
1142
+ vldm r3, { q6 } @ me
1143
+ vstm r3, { q1 }
1144
+ veor.64 q1, q1, q6
1145
+ add r4, r0, #_be
1146
+ vstm r4!, { q6 } @ be
1147
+
1148
+ add r3, r0, #_ga
1149
+ vldm r3, { q10 - q14 } @ ga ge gi go gu
1150
+ add r3, r0, #_gi
1151
+ vstm r3, { q2 }
1152
+ veor.64 q2, q2, q12
1153
+ vstm r4!, { q12 } @ bi
1154
+
1155
+ add r3, r0, #_so
1156
+ vldm r3, { q8 } @ so
1157
+ vstm r3, { q3 }
1158
+ veor.64 q3, q3, q8
1159
+ vstm r4!, { q8 } @ bo
1160
+
1161
+ add r3, r0, #_ku
1162
+ vldm r3, { q9 } @ ku
1163
+ vstm r3, { q4 }
1164
+ veor.64 q4, q4, q9
1165
+ vstm r4!, { q9 } @ bu
1166
+
1167
+ add r3, r0, #_sa
1168
+ vldm r3, { q5 } @ sa
1169
+ vstm r3, { q10 }
1170
+ add r4, r0, #_ga
1171
+ veor.64 q0, q0, q5
1172
+ veor.64 q0, q0, q10
1173
+ vstm r4!, { q5 } @ ga
1174
+
1175
+ add r3, r0, #_ke
1176
+ vldm r3, { q6 } @ ke
1177
+ vstm r3, { q11 }
1178
+ veor.64 q1, q1, q6
1179
+ veor.64 q1, q1, q11
1180
+ vstm r4!, { q6 } @ ge
1181
+
1182
+ add r3, r0, #_mo
1183
+ vldm r3, { q8 } @ mo
1184
+ vstm r3, { q13 }
1185
+ add r4, r0, #_go
1186
+ veor.64 q3, q3, q8
1187
+ veor.64 q3, q3, q13
1188
+ vstm r4!, { q8 } @ go
1189
+ veor.64 q4, q4, q14 @ gu
1190
+
1191
+ add r4, r0, #_ka @ ka
1192
+ vldm r4, { q10 }
1193
+ add r3, r0, #_ma
1194
+ vldm r3, { q5 } @ ma
1195
+ vstm r3, { q10 }
1196
+ veor.64 q0, q0, q5
1197
+ veor.64 q0, q0, q10
1198
+ vstm r4!, { q5 } @ ka
1199
+
1200
+ add r4, r0, #_ki @ ki ko
1201
+ vldm r4, { q12, q13 }
1202
+ add r3, r0, #_si
1203
+ vldm r3, { q7 } @ si
1204
+ vstm r3, { q12 }
1205
+ veor.64 q2, q2, q7
1206
+ veor.64 q2, q2, q12
1207
+ vstm r4, { q7 } @ ki
1208
+ veor.64 q3, q3, q13 @ ko
1209
+
1210
+ add r4, r0, #_mu @ mu
1211
+ vldm r4, { q14 }
1212
+ add r3, r0, #_su
1213
+ vldm r3, { q9 } @ su
1214
+ vstm r3, { q14 }
1215
+ veor.64 q4, q4, q9
1216
+ veor.64 q4, q4, q14
1217
+ vstm r4, { q9 } @ mu
1218
+
1219
+ add r4, r0, #_mi @ mi
1220
+ vldm r4, { q12 }
1221
+ veor.64 q2, q2, q12
1222
+ add r3, r0, #_se @ se
1223
+ vldm r3, { q6 }
1224
+ veor.64 q1, q1, q6
1225
+
1226
+ mov r3, r0
1227
+ b KeccakP1600times2_PermuteAll_Round2
1228
+
1229
+
1230
+ .align 8
1231
+ KeccakP1600times2_Permute_RoundConstants24:
1232
+ .quad 0x0000000000000001
1233
+ .quad 0x0000000000008082
1234
+ .quad 0x800000000000808a
1235
+ .quad 0x8000000080008000
1236
+ .quad 0x000000000000808b
1237
+ .quad 0x0000000080000001
1238
+ .quad 0x8000000080008081
1239
+ .quad 0x8000000000008009
1240
+ .quad 0x000000000000008a
1241
+ .quad 0x0000000000000088
1242
+ .quad 0x0000000080008009
1243
+ .quad 0x000000008000000a
1244
+ KeccakP1600times2_Permute_RoundConstants12:
1245
+ .quad 0x000000008000808b
1246
+ .quad 0x800000000000008b
1247
+ .quad 0x8000000000008089
1248
+ .quad 0x8000000000008003
1249
+ .quad 0x8000000000008002
1250
+ .quad 0x8000000000000080
1251
+ KeccakP1600times2_Permute_RoundConstants6:
1252
+ .quad 0x000000000000800a
1253
+ .quad 0x800000008000000a
1254
+ KeccakP1600times2_Permute_RoundConstants4:
1255
+ .quad 0x8000000080008081
1256
+ .quad 0x8000000000008080
1257
+ .quad 0x0000000080000001
1258
+ .quad 0x8000000080008008
1259
+
1260
+ @----------------------------------------------------------------------------
1261
+ @
1262
+ @ void KeccakP1600times2_PermuteAll_24rounds( void *states )
1263
+ @
1264
+ .align 8
1265
+ .global KeccakP1600times2_PermuteAll_24rounds
1266
+ .type KeccakP1600times2_PermuteAll_24rounds, %function;
1267
+ KeccakP1600times2_PermuteAll_24rounds:
1268
+ adr r1, KeccakP1600times2_Permute_RoundConstants24
1269
+ movs r2, #24
1270
+ b KeccakP1600times2_PermuteAll
1271
+
1272
+
1273
+ @----------------------------------------------------------------------------
1274
+ @
1275
+ @ void KeccakP1600times2_PermuteAll_12rounds( void *states )
1276
+ @
1277
+ .align 8
1278
+ .global KeccakP1600times2_PermuteAll_12rounds
1279
+ .type KeccakP1600times2_PermuteAll_12rounds, %function;
1280
+ KeccakP1600times2_PermuteAll_12rounds:
1281
+ adr r1, KeccakP1600times2_Permute_RoundConstants12
1282
+ movs r2, #12
1283
+ b KeccakP1600times2_PermuteAll
1284
+
1285
+
1286
+ @----------------------------------------------------------------------------
1287
+ @
1288
+ @ void KeccakP1600times2_PermuteAll_4rounds( void *states )
1289
+ @
1290
+ .align 8
1291
+ .global KeccakP1600times2_PermuteAll_4rounds
1292
+ .type KeccakP1600times2_PermuteAll_4rounds, %function;
1293
+ KeccakP1600times2_PermuteAll_4rounds:
1294
+ adr r1, KeccakP1600times2_Permute_RoundConstants4
1295
+ movs r2, #4
1296
+ b KeccakP1600times2_PermuteAll
1297
+
1298
+
1299
+ @----------------------------------------------------------------------------
1300
+ @
1301
+ @ void KeccakP1600times2_PermuteAll( void *states, void *rc, unsigned int nr )
1302
+ @
1303
+ .align 8
1304
+ .type KeccakP1600times2_PermuteAll, %function;
1305
+ KeccakP1600times2_PermuteAll:
1306
+ vpush {q4-q7}
1307
+ push {r4-r7}
1308
+ sub sp, #4*2*8+8 @allocate 4 D double lanes (plus 8bytes to allow alignment on 16 bytes)
1309
+ mov r3, r0
1310
+ add r5, sp, #8
1311
+
1312
+ @PrepareTheta
1313
+ @ Ca = ba ^ ga ^ ka ^ ma ^ sa
1314
+ @ Ce = be ^ ge ^ ke ^ me ^ se
1315
+ @ Ci = bi ^ gi ^ ki ^ mi ^ si
1316
+ @ Co = bo ^ go ^ ko ^ mo ^ so
1317
+ @ Cu = bu ^ gu ^ ku ^ mu ^ su
1318
+ vld1.64 { d0, d1, d2, d3 }, [r3:256]! @ _ba _be
1319
+ bic r5, #15
1320
+ vld1.64 { d4, d5, d6, d7 }, [r3:256]! @ _bi _bo
1321
+ vld1.64 { d8, d9, d10, d11 }, [r3:256]! @ _bu _ga
1322
+ vld1.64 { d12, d13 }, [r3:128]! @ _ge
1323
+ veor.64 q0, q0, q5
1324
+ vld1.64 { d14, d15 }, [r3:128]! @ _gi
1325
+ veor.64 q1, q1, q6
1326
+ vld1.64 { d16, d17 }, [r3:128]! @ _go
1327
+ veor.64 q2, q2, q7
1328
+ vld1.64 { d18, d19 }, [r3:128]! @ _gu
1329
+ veor.64 q3, q3, q8
1330
+ vld1.64 { d10, d11 }, [r3:128]! @ _ka
1331
+ veor.64 q4, q4, q9
1332
+ vld1.64 { d12, d13 }, [r3:128]! @ _ke
1333
+ veor.64 q0, q0, q5
1334
+ vld1.64 { d14, d15 }, [r3:128]! @ _ki
1335
+ veor.64 q1, q1, q6
1336
+ vld1.64 { d16, d17 }, [r3:128]! @ _ko
1337
+ veor.64 q2, q2, q7
1338
+ vld1.64 { d18, d19 }, [r3:128]! @ _ku
1339
+ veor.64 q3, q3, q8
1340
+ vld1.64 { d10, d11 }, [r3:128]! @ _ma
1341
+ veor.64 q4, q4, q9
1342
+ vld1.64 { d12, d13 }, [r3:128]! @ _me
1343
+ veor.64 q0, q0, q5
1344
+ vld1.64 { d14, d15 }, [r3:128]! @ _mi
1345
+ veor.64 q1, q1, q6
1346
+ vld1.64 { d16, d17 }, [r3:128]! @ _mo
1347
+ veor.64 q2, q2, q7
1348
+ vld1.64 { d18, d19 }, [r3:128]! @ _mu
1349
+ veor.64 q3, q3, q8
1350
+ vld1.64 { d10, d11 }, [r3:128]! @ _sa
1351
+ veor.64 q4, q4, q9
1352
+ vld1.64 { d12, d13 }, [r3:128]! @ _se
1353
+ veor.64 q0, q0, q5
1354
+ vld1.64 { d14, d15 }, [r3:128]! @ _si
1355
+ veor.64 q1, q1, q6
1356
+ vld1.64 { d16, d17 }, [r3:128]! @ _so
1357
+ veor.64 q2, q2, q7
1358
+ vld1.64 { d18, d19 }, [r3:128]! @ _su
1359
+ mov r3, r0
1360
+ veor.64 q3, q3, q8
1361
+ veor.64 q4, q4, q9
1362
+
1363
+ KeccakP1600times2_PermuteAll_RoundLoop:
1364
+ KeccakP_ThetaRhoPiChiIota _ba, -1, -1, -1, -1, _ge-_ba, _ka @ _ba, _ge, _ki, _mo, _su
1365
+ KeccakP_ThetaRhoPiChi1 _ka, -1, -1, _bo, -1, _me-_ka, _sa @ _ka, _me, _si, _bo, _gu
1366
+ KeccakP_ThetaRhoPiChi2 _sa, _be, -1, -1, -1, _gi-_be, _ga @ _sa, _be, _gi, _ko, _mu
1367
+ KeccakP_ThetaRhoPiChi3 _ga, -1, -1, -1, _bu, _ke-_ga, _ma @ _ga, _ke, _mi, _so, _bu
1368
+ KeccakP_ThetaRhoPiChi4 _ma, -1, _bi, -1, -1, _se-_ma, _ba @ _ma, _se, _bi, _go, _ku
1369
+
1370
+ KeccakP_ThetaRhoPiChiIota _ba, -1, _gi, -1, _ku, _me-_ba, _sa @ _ba, _me, _gi, _so, _ku
1371
+ KeccakP_ThetaRhoPiChi1 _sa, _ke, _bi, -1, _gu, _mo-_bi, _ma @ _sa, _ke, _bi, _mo, _gu
1372
+ KeccakP_ThetaRhoPiChi2 _ma, _ge, -1, _ko, _bu, _si-_ge, _ka @ _ma, _ge, _si, _ko, _bu
1373
+ KeccakP_ThetaRhoPiChi3 _ka, _be, -1, _go, -1, _mi-_be, _ga @ _ka, _be, _mi, _go, _su
1374
+ KeccakP_ThetaRhoPiChi4 _ga, -1, _ki, _bo, -1, _se-_ga, _ba @ _ga, _se, _ki, _bo, _mu
1375
+ KeccakP1600times2_PermuteAll_Round2:
1376
+ KeccakP_ThetaRhoPiChiIota _ba, -1, -1, _go, -1, _ke-_ba, _ma @ _ba, _ke, _si, _go, _mu
1377
+ KeccakP_ThetaRhoPiChi1 _ma, _be, -1, -1, _gu, _ki-_be, _ga @ _ma, _be, _ki, _so, _gu
1378
+ KeccakP_ThetaRhoPiChi2 _ga, -1, _bi, -1, -1, _me-_ga, _sa @ _ga, _me, _bi, _ko, _su
1379
+ KeccakP_ThetaRhoPiChi3 _sa, _ge, -1, _bo, -1, _mi-_ge, _ka @ _sa, _ge, _mi, _bo, _ku
1380
+ KeccakP_ThetaRhoPiChi4 _ka, -1, _gi, -1, _bu, _se-_ka, _ba @ _ka, _se, _gi, _mo, _bu
1381
+
1382
+ KeccakP_ThetaRhoPiChiIota _ba, -1, -1, -1, -1, _be-_ba, _ga @ _ba, _be, _bi, _bo, _bu
1383
+ KeccakP_ThetaRhoPiChi1 _ga, -1, -1, -1, -1, _ge-_ga, _ka @ _ga, _ge, _gi, _go, _gu
1384
+ KeccakP_ThetaRhoPiChi2 _ka, -1, -1, -1, -1, _ke-_ka, _ma @ _ka, _ke, _ki, _ko, _ku
1385
+ KeccakP_ThetaRhoPiChi3 _ma, -1, -1, -1, -1, _me-_ma, _sa @ _ma, _me, _mi, _mo, _mu
1386
+ subs r2, #4
1387
+ KeccakP_ThetaRhoPiChi4 _sa, -1, -1, -1, -1, _se-_sa, _ba @ _sa, _se, _si, _so, _su
1388
+ bne KeccakP1600times2_PermuteAll_RoundLoop
1389
+ add sp, #4*2*8+8 @ free 4.5 D lanes
1390
+ pop {r4-r7}
1391
+ vpop {q4-q7}
1392
+ bx lr
1393
+
1394
+