sleeping_kangaroo12 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (284) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +127 -0
  3. data/ext/Rakefile +73 -0
  4. data/ext/binding/sleeping_kangaroo12.c +39 -0
  5. data/ext/config/xkcp.build +17 -0
  6. data/ext/xkcp/LICENSE +1 -0
  7. data/ext/xkcp/Makefile +15 -0
  8. data/ext/xkcp/Makefile.build +200 -0
  9. data/ext/xkcp/README.markdown +296 -0
  10. data/ext/xkcp/lib/HighLevel.build +143 -0
  11. data/ext/xkcp/lib/LowLevel.build +757 -0
  12. data/ext/xkcp/lib/common/align.h +33 -0
  13. data/ext/xkcp/lib/common/brg_endian.h +143 -0
  14. data/ext/xkcp/lib/high/KangarooTwelve/KangarooTwelve.c +301 -0
  15. data/ext/xkcp/lib/high/KangarooTwelve/KangarooTwelve.h +97 -0
  16. data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.c +81 -0
  17. data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.h +125 -0
  18. data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.c +48 -0
  19. data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.h +79 -0
  20. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.c +81 -0
  21. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.h +73 -0
  22. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.inc +195 -0
  23. data/ext/xkcp/lib/high/Keccak/KeccakSponge.c +111 -0
  24. data/ext/xkcp/lib/high/Keccak/KeccakSponge.h +76 -0
  25. data/ext/xkcp/lib/high/Keccak/KeccakSponge.inc +314 -0
  26. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.c +61 -0
  27. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.h +67 -0
  28. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.inc +128 -0
  29. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.c +93 -0
  30. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.h +599 -0
  31. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.inc +573 -0
  32. data/ext/xkcp/lib/high/Ketje/Ketjev2.c +87 -0
  33. data/ext/xkcp/lib/high/Ketje/Ketjev2.h +88 -0
  34. data/ext/xkcp/lib/high/Ketje/Ketjev2.inc +274 -0
  35. data/ext/xkcp/lib/high/Keyak/Keyakv2.c +132 -0
  36. data/ext/xkcp/lib/high/Keyak/Keyakv2.h +217 -0
  37. data/ext/xkcp/lib/high/Keyak/Keyakv2.inc +81 -0
  38. data/ext/xkcp/lib/high/Keyak/Motorist.inc +953 -0
  39. data/ext/xkcp/lib/high/Kravatte/Kravatte.c +533 -0
  40. data/ext/xkcp/lib/high/Kravatte/Kravatte.h +115 -0
  41. data/ext/xkcp/lib/high/Kravatte/KravatteModes.c +557 -0
  42. data/ext/xkcp/lib/high/Kravatte/KravatteModes.h +247 -0
  43. data/ext/xkcp/lib/high/Xoodyak/Cyclist.h +66 -0
  44. data/ext/xkcp/lib/high/Xoodyak/Cyclist.inc +336 -0
  45. data/ext/xkcp/lib/high/Xoodyak/Xoodyak-parameters.h +26 -0
  46. data/ext/xkcp/lib/high/Xoodyak/Xoodyak.c +55 -0
  47. data/ext/xkcp/lib/high/Xoodyak/Xoodyak.h +35 -0
  48. data/ext/xkcp/lib/high/Xoofff/Xoofff.c +634 -0
  49. data/ext/xkcp/lib/high/Xoofff/Xoofff.h +147 -0
  50. data/ext/xkcp/lib/high/Xoofff/XoofffModes.c +483 -0
  51. data/ext/xkcp/lib/high/Xoofff/XoofffModes.h +241 -0
  52. data/ext/xkcp/lib/high/common/Phases.h +25 -0
  53. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-SnP.h +41 -0
  54. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-armcc.s +1666 -0
  55. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-gcc.s +1655 -0
  56. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-armcc.s +1268 -0
  57. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-gcc.s +1264 -0
  58. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-armcc.s +1178 -0
  59. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-gcc.s +1175 -0
  60. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-armcc.s +1338 -0
  61. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-gcc.s +1336 -0
  62. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-armcc.s +1343 -0
  63. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-gcc.s +1339 -0
  64. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-SnP.h +42 -0
  65. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-armcc.s +823 -0
  66. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-gcc.s +831 -0
  67. data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-SnP.h +31 -0
  68. data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-armv8a-neon.s +540 -0
  69. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-SnP.h +42 -0
  70. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-compact.s +733 -0
  71. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-fast.s +1121 -0
  72. data/ext/xkcp/lib/low/KeccakP-1600/AVX2/KeccakP-1600-AVX2.s +1100 -0
  73. data/ext/xkcp/lib/low/KeccakP-1600/AVX2/KeccakP-1600-SnP.h +52 -0
  74. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-AVX512.c +623 -0
  75. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-SnP.h +47 -0
  76. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u12/KeccakP-1600-AVX512-config.h +6 -0
  77. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u6/KeccakP-1600-AVX512-config.h +6 -0
  78. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/ua/KeccakP-1600-AVX512-config.h +6 -0
  79. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-AVX512.s +1031 -0
  80. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-SnP.h +53 -0
  81. data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-SnP.h +44 -0
  82. data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-XOP.c +476 -0
  83. data/ext/xkcp/lib/low/KeccakP-1600/XOP/u6/KeccakP-1600-XOP-config.h +6 -0
  84. data/ext/xkcp/lib/low/KeccakP-1600/XOP/ua/KeccakP-1600-XOP-config.h +6 -0
  85. data/ext/xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-64.macros +748 -0
  86. data/ext/xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-unrolling.macros +305 -0
  87. data/ext/xkcp/lib/low/KeccakP-1600/compact/KeccakP-1600-SnP.h +40 -0
  88. data/ext/xkcp/lib/low/KeccakP-1600/compact/KeccakP-1600-compact64.c +420 -0
  89. data/ext/xkcp/lib/low/KeccakP-1600/plain-32bits-inplace/KeccakP-1600-SnP.h +43 -0
  90. data/ext/xkcp/lib/low/KeccakP-1600/plain-32bits-inplace/KeccakP-1600-inplace32BI.c +1163 -0
  91. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/KeccakP-1600-SnP.h +54 -0
  92. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/KeccakP-1600-opt64.c +565 -0
  93. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcu6/KeccakP-1600-opt64-config.h +7 -0
  94. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua/KeccakP-1600-opt64-config.h +7 -0
  95. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua-shld/KeccakP-1600-opt64-config.h +8 -0
  96. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/u6/KeccakP-1600-opt64-config.h +6 -0
  97. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/ua/KeccakP-1600-opt64-config.h +6 -0
  98. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-SnP.h +44 -0
  99. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference.h +23 -0
  100. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference32BI.c +625 -0
  101. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-SnP.h +44 -0
  102. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.c +440 -0
  103. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.h +23 -0
  104. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-SnP.h +42 -0
  105. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas.s +1196 -0
  106. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas_Apple.s +1124 -0
  107. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-shld-gas.s +1196 -0
  108. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-armcc.s +1392 -0
  109. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-gcc.s +1394 -0
  110. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-times2-SnP.h +42 -0
  111. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u12/SIMD512-2-config.h +7 -0
  112. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u4/SIMD512-2-config.h +7 -0
  113. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512ufull/SIMD512-2-config.h +7 -0
  114. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SIMD512.c +850 -0
  115. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SnP.h +51 -0
  116. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SIMD128.c +957 -0
  117. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SnP.h +49 -0
  118. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-u2/SIMD128-config.h +8 -0
  119. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-ua/SIMD128-config.h +8 -0
  120. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-u2/SIMD128-config.h +9 -0
  121. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-ua/SIMD128-config.h +9 -0
  122. data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-SnP.h +45 -0
  123. data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-on1.c +37 -0
  124. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SIMD256.c +1321 -0
  125. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SnP.h +55 -0
  126. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u12/SIMD256-config.h +7 -0
  127. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u6/SIMD256-config.h +7 -0
  128. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/ua/SIMD256-config.h +7 -0
  129. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u12/SIMD512-4-config.h +7 -0
  130. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u4/SIMD512-4-config.h +7 -0
  131. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512ufull/SIMD512-4-config.h +7 -0
  132. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SIMD512.c +881 -0
  133. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SnP.h +51 -0
  134. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-SnP.h +45 -0
  135. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-on1.c +37 -0
  136. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-SnP.h +45 -0
  137. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-on2.c +38 -0
  138. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SIMD512.c +1615 -0
  139. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SnP.h +57 -0
  140. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u12/SIMD512-config.h +7 -0
  141. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u4/SIMD512-config.h +7 -0
  142. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/ua/SIMD512-config.h +7 -0
  143. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-SnP.h +45 -0
  144. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-on1.c +37 -0
  145. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-SnP.h +45 -0
  146. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-on2.c +38 -0
  147. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-SnP.h +45 -0
  148. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-on4.c +38 -0
  149. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-SnP.h +41 -0
  150. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-armcc.s +442 -0
  151. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-gcc.s +446 -0
  152. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-armcc.s +419 -0
  153. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-gcc.s +427 -0
  154. data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-SnP.h +41 -0
  155. data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-avr8-fast.s +647 -0
  156. data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-SnP.h +39 -0
  157. data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-compact.c +190 -0
  158. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-SnP.h +43 -0
  159. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.c +412 -0
  160. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.h +23 -0
  161. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-SnP.h +41 -0
  162. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-armcc.s +454 -0
  163. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-gcc.s +458 -0
  164. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-armcc.s +455 -0
  165. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-gcc.s +458 -0
  166. data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-SnP.h +41 -0
  167. data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-avr8-fast.s +728 -0
  168. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-SnP.h +43 -0
  169. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.c +414 -0
  170. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.h +23 -0
  171. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-SnP.h +42 -0
  172. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-armcc.s +527 -0
  173. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-gcc.s +533 -0
  174. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-armcc.s +528 -0
  175. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-gcc.s +534 -0
  176. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-armcc.s +521 -0
  177. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-gcc.s +527 -0
  178. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-armcc.s +517 -0
  179. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-gcc.s +523 -0
  180. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-armcc.s +550 -0
  181. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-gcc.s +556 -0
  182. data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-SnP.h +32 -0
  183. data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-armv8a-neon.s +432 -0
  184. data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-SnP.h +42 -0
  185. data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-avr8-fast.s +929 -0
  186. data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-SnP.h +40 -0
  187. data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-compact.c +244 -0
  188. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-SnP.h +46 -0
  189. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32-bis.macros +184 -0
  190. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.c +454 -0
  191. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.macros +459 -0
  192. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling-bis.macros +83 -0
  193. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling.macros +88 -0
  194. data/ext/xkcp/lib/low/KeccakP-800/plain/lcu2/KeccakP-800-opt32-config.h +7 -0
  195. data/ext/xkcp/lib/low/KeccakP-800/plain/lcua/KeccakP-800-opt32-config.h +7 -0
  196. data/ext/xkcp/lib/low/KeccakP-800/plain/u2/KeccakP-800-opt32-config.h +7 -0
  197. data/ext/xkcp/lib/low/KeccakP-800/plain/ua/KeccakP-800-opt32-config.h +7 -0
  198. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-SnP.h +44 -0
  199. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.c +437 -0
  200. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.h +23 -0
  201. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/Ket.h +57 -0
  202. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-armcc.s +475 -0
  203. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-gcc.s +480 -0
  204. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-armcc.s +590 -0
  205. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-gcc.s +590 -0
  206. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.c +126 -0
  207. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.h +68 -0
  208. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.inc +174 -0
  209. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.c +80 -0
  210. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.h +68 -0
  211. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.inc +142 -0
  212. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-SnP.h +55 -0
  213. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-armcc.s +1086 -0
  214. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-gcc.s +1092 -0
  215. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-armcc.s +721 -0
  216. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-gcc.s +726 -0
  217. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-armcc.s +723 -0
  218. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-gcc.s +729 -0
  219. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-armcc.s +1164 -0
  220. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-gcc.s +1165 -0
  221. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-armcc.s +562 -0
  222. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-gcc.s +563 -0
  223. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-armcc.s +563 -0
  224. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-gcc.s +565 -0
  225. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-SnP.h +55 -0
  226. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-armcc.s +476 -0
  227. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-gcc.s +485 -0
  228. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-armcc.s +362 -0
  229. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-gcc.s +367 -0
  230. data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-SnP.h +43 -0
  231. data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-avr8-u1.s +1341 -0
  232. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SIMD512.c +581 -0
  233. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SnP.h +58 -0
  234. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodyak-full-block-SIMD512.c +332 -0
  235. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SIMD128.c +329 -0
  236. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SnP.h +53 -0
  237. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodyak-full-block-SIMD128.c +355 -0
  238. data/ext/xkcp/lib/low/Xoodoo/Xoodoo.h +79 -0
  239. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-SnP.h +56 -0
  240. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-optimized.c +399 -0
  241. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodyak-full-blocks.c +127 -0
  242. data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-SnP.h +43 -0
  243. data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-reference.c +253 -0
  244. data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SIMD512.c +1044 -0
  245. data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SnP.h +49 -0
  246. data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-SnP.h +45 -0
  247. data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-on1.c +37 -0
  248. data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-ARMv7A.s +1587 -0
  249. data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-SnP.h +48 -0
  250. data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SIMD512.c +1202 -0
  251. data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SnP.h +48 -0
  252. data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SIMD128.c +484 -0
  253. data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SnP.h +44 -0
  254. data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-SnP.h +45 -0
  255. data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-on1.c +37 -0
  256. data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SIMD256.c +939 -0
  257. data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SnP.h +49 -0
  258. data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SIMD512.c +1216 -0
  259. data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SnP.h +48 -0
  260. data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-SnP.h +45 -0
  261. data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-on1.c +37 -0
  262. data/ext/xkcp/lib/low/common/PlSnP-Fallback.inc +290 -0
  263. data/ext/xkcp/lib/low/common/SnP-Relaned.h +141 -0
  264. data/ext/xkcp/support/Build/ExpandProducts.xsl +79 -0
  265. data/ext/xkcp/support/Build/ToGlobalMakefile.xsl +206 -0
  266. data/ext/xkcp/support/Build/ToOneTarget.xsl +89 -0
  267. data/ext/xkcp/support/Build/ToTargetConfigFile.xsl +37 -0
  268. data/ext/xkcp/support/Build/ToTargetMakefile.xsl +298 -0
  269. data/ext/xkcp/support/Build/ToVCXProj.xsl +198 -0
  270. data/ext/xkcp/support/Kernel-PMU/Kernel-pmu.md +133 -0
  271. data/ext/xkcp/support/Kernel-PMU/Makefile +8 -0
  272. data/ext/xkcp/support/Kernel-PMU/enable_arm_pmu.c +129 -0
  273. data/ext/xkcp/support/Kernel-PMU/load-module +1 -0
  274. data/ext/xkcp/util/KeccakSum/KeccakSum.c +394 -0
  275. data/ext/xkcp/util/KeccakSum/base64.c +86 -0
  276. data/ext/xkcp/util/KeccakSum/base64.h +12 -0
  277. data/lib/sleeping_kangaroo12/binding.rb +15 -0
  278. data/lib/sleeping_kangaroo12/build/loader.rb +40 -0
  279. data/lib/sleeping_kangaroo12/build/platform.rb +37 -0
  280. data/lib/sleeping_kangaroo12/build.rb +4 -0
  281. data/lib/sleeping_kangaroo12/digest.rb +103 -0
  282. data/lib/sleeping_kangaroo12/version.rb +5 -0
  283. data/lib/sleeping_kangaroo12.rb +7 -0
  284. metadata +372 -0
@@ -0,0 +1,1392 @@
1
+ ;
2
+ ; The Keccak-p permutations, designed by Guido Bertoni, Joan Daemen, Micha�l Peeters and Gilles Van Assche.
3
+ ;
4
+ ; Implementation by Ronny Van Keer, hereby denoted as "the implementer".
5
+ ;
6
+ ; For more information, feedback or questions, please refer to the Keccak Team website:
7
+ ; https://keccak.team/
8
+ ;
9
+ ; To the extent possible under law, the implementer has waived all copyright
10
+ ; and related or neighboring rights to the source code in this file.
11
+ ; http://creativecommons.org/publicdomain/zero/1.0/
12
+ ;
13
+ ; ---
14
+ ;
15
+ ; This file implements Keccak-p[1600]�2 in a PlSnP-compatible way.
16
+ ; Please refer to PlSnP-documentation.h for more details.
17
+ ;
18
+ ; This implementation comes with KeccakP-1600-times2-SnP.h in the same folder.
19
+ ; Please refer to LowLevel.build for the exact list of other files it must be combined with.
20
+ ;
21
+
22
+ ; WARNING: These functions work only on little endian CPU with ARMv7A + NEON architecture
23
+ ; WARNING: State must be 256 bit (32 bytes) aligned, best is 64-byte (cache alignment).
24
+
25
+ ; INFO: Tested on Cortex-A8 (BeagleBone Black), using gcc.
26
+ ; INFO: Parallel execution of Keccak-P permutation on 2 lane interleaved states.
27
+
28
+ ; INFO: KeccakP1600times2_PermuteAll_12rounds() execution time is 7690 cycles on a Cortex-A8 (BeagleBone Black)
29
+
30
+
31
+ PRESERVE8
32
+ AREA |.text|, CODE, READONLY
33
+
34
+ ;----------------------------------------------------------------------------
35
+
36
+ ; --- offsets in state
37
+ _ba equ 0*16
38
+ _be equ 1*16
39
+ _bi equ 2*16
40
+ _bo equ 3*16
41
+ _bu equ 4*16
42
+ _ga equ 5*16
43
+ _ge equ 6*16
44
+ _gi equ 7*16
45
+ _go equ 8*16
46
+ _gu equ 9*16
47
+ _ka equ 10*16
48
+ _ke equ 11*16
49
+ _ki equ 12*16
50
+ _ko equ 13*16
51
+ _ku equ 14*16
52
+ _ma equ 15*16
53
+ _me equ 16*16
54
+ _mi equ 17*16
55
+ _mo equ 18*16
56
+ _mu equ 19*16
57
+ _sa equ 20*16
58
+ _se equ 21*16
59
+ _si equ 22*16
60
+ _so equ 23*16
61
+ _su equ 24*16
62
+
63
+ ; --- macros for Single permutation
64
+
65
+ MACRO
66
+ KeccakS_ThetaRhoPiChiIota $argA1, $argA2, $argA3, $argA4, $argA5
67
+
68
+ ;Prepare Theta
69
+ ; Ca = Aba^Aga^Aka^Ama^Asa
70
+ ; Ce = Abe^Age^Ake^Ame^Ase
71
+ ; Ci = Abi^Agi^Aki^Ami^Asi
72
+ ; Co = Abo^Ago^Ako^Amo^Aso
73
+ ; Cu = Abu^Agu^Aku^Amu^Asu
74
+ ; De = Ca^ROL64(Ci, 1)
75
+ ; Di = Ce^ROL64(Co, 1)
76
+ ; Do = Ci^ROL64(Cu, 1)
77
+ ; Du = Co^ROL64(Ca, 1)
78
+ ; Da = Cu^ROL64(Ce, 1)
79
+ veor.64 q4, q6, q7
80
+ veor.64 q5, q9, q10
81
+ veor.64 d8, d8, d9
82
+ veor.64 d10, d10, d11
83
+ veor.64 d1, d8, d16
84
+ veor.64 d2, d10, d17
85
+
86
+ veor.64 q4, q11, q12
87
+ veor.64 q5, q14, q15
88
+ veor.64 d8, d8, d9
89
+ veor.64 d10, d10, d11
90
+ veor.64 d3, d8, d26
91
+
92
+ vadd.u64 q4, q1, q1
93
+ veor.64 d4, d10, d27
94
+ vmov.64 d0, d5
95
+ vsri.64 q4, q1, #63
96
+
97
+ vadd.u64 q5, q2, q2
98
+ veor.64 q4, q4, q0
99
+ vsri.64 q5, q2, #63
100
+ vadd.u64 d7, d1, d1
101
+ veor.64 $argA2, $argA2, d8
102
+ veor.64 q5, q5, q1
103
+
104
+ vsri.64 d7, d1, #63
105
+ vshl.u64 d1, $argA2, #44
106
+ veor.64 $argA3, $argA3, d9
107
+ veor.64 d7, d7, d4
108
+
109
+ ; Ba = argA1^Da
110
+ ; Be = ROL64((argA2^De), 44)
111
+ ; Bi = ROL64((argA3^Di), 43)
112
+ ; Bo = ROL64((argA4^Do), 21)
113
+ ; Bu = ROL64((argA5^Du), 14)
114
+ ; argA2 = Be ^((~Bi)& Bo )
115
+ ; argA3 = Bi ^((~Bo)& Bu )
116
+ ; argA4 = Bo ^((~Bu)& Ba )
117
+ ; argA5 = Bu ^((~Ba)& Be )
118
+ ; argA1 = Ba ^((~Be)& Bi )
119
+ ; argA1 ^= KeccakP1600RoundConstants[i+round]
120
+ vsri.64 d1, $argA2, #64-44
121
+ vshl.u64 d2, $argA3, #43
122
+ vldr.64 d0, [r0, #$argA1]
123
+ veor.64 $argA4, $argA4, d10
124
+ vsri.64 d2, $argA3, #64-43
125
+ vshl.u64 d3, $argA4, #21
126
+ veor.64 $argA5, $argA5, d11
127
+ veor.64 d0, d0, d7
128
+ vsri.64 d3, $argA4, #64-21
129
+ vbic.64 d5, d2, d1
130
+ vshl.u64 d4, $argA5, #14
131
+ vbic.64 $argA2, d3, d2
132
+ vld1.64 d6, [r1]!
133
+ veor.64 d5, d0
134
+ vsri.64 d4, $argA5, #64-14
135
+ veor.64 d5, d6
136
+ vbic.64 $argA5, d1, d0
137
+ vbic.64 $argA3, d4, d3
138
+ vbic.64 $argA4, d0, d4
139
+ veor.64 $argA2, d1
140
+ vstr.64 d5, [r0, #$argA1]
141
+ veor.64 $argA3, d2
142
+ veor.64 $argA4, d3
143
+ veor.64 $argA5, d4
144
+ MEND
145
+
146
+ MACRO
147
+ KeccakS_ThetaRhoPiChi1 $argA1, $argA2, $argA3, $argA4, $argA5
148
+
149
+ ; Bi = ROL64((argA1^Da), 3)
150
+ ; Bo = ROL64((argA2^De), 45)
151
+ ; Bu = ROL64((argA3^Di), 61)
152
+ ; Ba = ROL64((argA4^Do), 28)
153
+ ; Be = ROL64((argA5^Du), 20)
154
+ ; argA1 = Ba ^((~Be)& Bi )
155
+ ; Ca ^= argA1
156
+ ; argA2 = Be ^((~Bi)& Bo )
157
+ ; argA3 = Bi ^((~Bo)& Bu )
158
+ ; argA4 = Bo ^((~Bu)& Ba )
159
+ ; argA5 = Bu ^((~Ba)& Be )
160
+ veor.64 $argA2, $argA2, d8
161
+ veor.64 $argA3, $argA3, d9
162
+ vshl.u64 d3, $argA2, #45
163
+ vldr.64 d6, [r0, #$argA1]
164
+ vshl.u64 d4, $argA3, #61
165
+ veor.64 $argA4, $argA4, d10
166
+ vsri.64 d3, $argA2, #64-45
167
+ veor.64 $argA5, $argA5, d11
168
+ vsri.64 d4, $argA3, #64-61
169
+ vshl.u64 d0, $argA4, #28
170
+ veor.64 d6, d6, d7
171
+ vshl.u64 d1, $argA5, #20
172
+ vbic.64 $argA3, d4, d3
173
+ vsri.64 d0, $argA4, #64-28
174
+ vbic.64 $argA4, d0, d4
175
+ vshl.u64 d2, d6, #3
176
+ vsri.64 d1, $argA5, #64-20
177
+ veor.64 $argA4, d3
178
+ vsri.64 d2, d6, #64-3
179
+ vbic.64 $argA5, d1, d0
180
+ vbic.64 d6, d2, d1
181
+ vbic.64 $argA2, d3, d2
182
+ veor.64 d6, d0
183
+ veor.64 $argA2, d1
184
+ vstr.64 d6, [r0, #$argA1]
185
+ veor.64 $argA3, d2
186
+ veor.64 d5, d6
187
+ veor.64 $argA5, d4
188
+ MEND
189
+
190
+ MACRO
191
+ KeccakS_ThetaRhoPiChi2 $argA1, $argA2, $argA3, $argA4, $argA5
192
+
193
+ ; Bu = ROL64((argA1^Da), 18)
194
+ ; Ba = ROL64((argA2^De), 1)
195
+ ; Be = ROL64((argA3^Di), 6)
196
+ ; Bi = ROL64((argA4^Do), 25)
197
+ ; Bo = ROL64((argA5^Du), 8)
198
+ ; argA1 = Ba ^((~Be)& Bi )
199
+ ; Ca ^= argA1;
200
+ ; argA2 = Be ^((~Bi)& Bo )
201
+ ; argA3 = Bi ^((~Bo)& Bu )
202
+ ; argA4 = Bo ^((~Bu)& Ba )
203
+ ; argA5 = Bu ^((~Ba)& Be )
204
+ veor.64 $argA3, $argA3, d9
205
+ veor.64 $argA4, $argA4, d10
206
+ vshl.u64 d1, $argA3, #6
207
+ vldr.64 d6, [r0, #$argA1]
208
+ vshl.u64 d2, $argA4, #25
209
+ veor.64 $argA5, $argA5, d11
210
+ vsri.64 d1, $argA3, #64-6
211
+ veor.64 $argA2, $argA2, d8
212
+ vsri.64 d2, $argA4, #64-25
213
+ vext.8 d3, $argA5, $argA5, #7
214
+ veor.64 d6, d6, d7
215
+ vbic.64 $argA3, d2, d1
216
+ vadd.u64 d0, $argA2, $argA2
217
+ vbic.64 $argA4, d3, d2
218
+ vsri.64 d0, $argA2, #64-1
219
+ vshl.u64 d4, d6, #18
220
+ veor.64 $argA2, d1, $argA4
221
+ veor.64 $argA3, d0
222
+ vsri.64 d4, d6, #64-18
223
+ vstr.64 $argA3, [r0, #$argA1]
224
+ veor.64 d5, $argA3
225
+ vbic.64 $argA5, d1, d0
226
+ vbic.64 $argA3, d4, d3
227
+ vbic.64 $argA4, d0, d4
228
+ veor.64 $argA3, d2
229
+ veor.64 $argA4, d3
230
+ veor.64 $argA5, d4
231
+ MEND
232
+
233
+ MACRO
234
+ KeccakS_ThetaRhoPiChi3 $argA1, $argA2, $argA3, $argA4, $argA5
235
+
236
+ ; Be = ROL64((argA1^Da), 36)
237
+ ; Bi = ROL64((argA2^De), 10)
238
+ ; Bo = ROL64((argA3^Di), 15)
239
+ ; Bu = ROL64((argA4^Do), 56)
240
+ ; Ba = ROL64((argA5^Du), 27)
241
+ ; argA1 = Ba ^((~Be)& Bi )
242
+ ; Ca ^= argA1
243
+ ; argA2 = Be ^((~Bi)& Bo )
244
+ ; argA3 = Bi ^((~Bo)& Bu )
245
+ ; argA4 = Bo ^((~Bu)& Ba )
246
+ ; argA5 = Bu ^((~Ba)& Be )
247
+ veor.64 $argA2, $argA2, d8
248
+ veor.64 $argA3, $argA3, d9
249
+ vshl.u64 d2, $argA2, #10
250
+ vldr.64 d6, [r0, #$argA1]
251
+ vshl.u64 d3, $argA3, #15
252
+ veor.64 $argA4, $argA4, d10
253
+ vsri.64 d2, $argA2, #64-10
254
+ vsri.64 d3, $argA3, #64-15
255
+ veor.64 $argA5, $argA5, d11
256
+ vext.8 d4, $argA4, $argA4, #1
257
+ vbic.64 $argA2, d3, d2
258
+ vshl.u64 d0, $argA5, #27
259
+ veor.64 d6, d6, d7
260
+ vbic.64 $argA3, d4, d3
261
+ vsri.64 d0, $argA5, #64-27
262
+ vshl.u64 d1, d6, #36
263
+ veor.64 $argA3, d2
264
+ vbic.64 $argA4, d0, d4
265
+ vsri.64 d1, d6, #64-36
266
+ veor.64 $argA4, d3
267
+ vbic.64 d6, d2, d1
268
+ vbic.64 $argA5, d1, d0
269
+ veor.64 d6, d0
270
+ veor.64 $argA2, d1
271
+ vstr.64 d6, [r0, #$argA1]
272
+ veor.64 d5, d6
273
+ veor.64 $argA5, d4
274
+ MEND
275
+
276
+ MACRO
277
+ KeccakS_ThetaRhoPiChi4 $argA1, $argA2, $argA3, $argA4, $argA5
278
+
279
+ ; Bo = ROL64((argA1^Da), 41)
280
+ ; Bu = ROL64((argA2^De), 2)
281
+ ; Ba = ROL64((argA3^Di), 62)
282
+ ; Be = ROL64((argA4^Do), 55)
283
+ ; Bi = ROL64((argA5^Du), 39)
284
+ ; argA1 = Ba ^((~Be)& Bi )
285
+ ; Ca ^= argA1
286
+ ; argA2 = Be ^((~Bi)& Bo )
287
+ ; argA3 = Bi ^((~Bo)& Bu )
288
+ ; argA4 = Bo ^((~Bu)& Ba )
289
+ ; argA5 = Bu ^((~Ba)& Be )
290
+ veor.64 $argA2, $argA2, d8
291
+ veor.64 $argA3, $argA3, d9
292
+ vshl.u64 d4, $argA2, #2
293
+ veor.64 $argA5, $argA5, d11
294
+ vshl.u64 d0, $argA3, #62
295
+ vldr.64 d6, [r0, #$argA1]
296
+ vsri.64 d4, $argA2, #64-2
297
+ veor.64 $argA4, $argA4, d10
298
+ vsri.64 d0, $argA3, #64-62
299
+ vshl.u64 d1, $argA4, #55
300
+ veor.64 d6, d6, d7
301
+ vshl.u64 d2, $argA5, #39
302
+ vsri.64 d1, $argA4, #64-55
303
+ vbic.64 $argA4, d0, d4
304
+ vsri.64 d2, $argA5, #64-39
305
+ vbic.64 $argA2, d1, d0
306
+ vshl.u64 d3, d6, #41
307
+ veor.64 $argA5, d4, $argA2
308
+ vbic.64 $argA2, d2, d1
309
+ vsri.64 d3, d6, #64-41
310
+ veor.64 d6, d0, $argA2
311
+ vbic.64 $argA2, d3, d2
312
+ vbic.64 $argA3, d4, d3
313
+ veor.64 $argA2, d1
314
+ vstr.64 d6, [r0, #$argA1]
315
+ veor.64 d5, d6
316
+ veor.64 $argA3, d2
317
+ veor.64 $argA4, d3
318
+ MEND
319
+
320
+ ; --- macros for Parallel permutation
321
+
322
+ MACRO
323
+ m_pls $start
324
+ if $start != -1
325
+ add r3, r0, #$start
326
+ endif
327
+ MEND
328
+
329
+ MACRO
330
+ m_ld $qreg, $next
331
+ if $next == 16
332
+ vld1.64 { $qreg }, [r3:128]!
333
+ else
334
+ vld1.64 { $qreg }, [r3:128], r4
335
+ endif
336
+ MEND
337
+
338
+ MACRO
339
+ m_st $qreg, $next
340
+ if $next == 16
341
+ vst1.64 { $qreg }, [r3:128]!
342
+ else
343
+ vst1.64 { $qreg }, [r3:128], r4
344
+ endif
345
+ MEND
346
+
347
+ MACRO
348
+ KeccakP_ThetaRhoPiChiIota $ofs1, $ofs2, $ofs3, $ofs4, $ofs5, $next, $ofsn1
349
+
350
+ ; De = Ca ^ ROL64(Ci, 1)
351
+ ; Di = Ce ^ ROL64(Co, 1)
352
+ ; Do = Ci ^ ROL64(Cu, 1)
353
+ ; Du = Co ^ ROL64(Ca, 1)
354
+ ; Da = Cu ^ ROL64(Ce, 1)
355
+ vadd.u64 q6, q2, q2
356
+ vadd.u64 q7, q3, q3
357
+ vadd.u64 q8, q4, q4
358
+ vadd.u64 q9, q0, q0
359
+ vadd.u64 q5, q1, q1
360
+
361
+ vsri.64 q6, q2, #63
362
+ vsri.64 q7, q3, #63
363
+ vsri.64 q8, q4, #63
364
+ vsri.64 q9, q0, #63
365
+ vsri.64 q5, q1, #63
366
+
367
+ veor.64 q6, q6, q0
368
+ veor.64 q7, q7, q1
369
+ veor.64 q8, q8, q2
370
+ if $next != 16
371
+ mov r4, #$next
372
+ endif
373
+ veor.64 q9, q9, q3
374
+ veor.64 q5, q5, q4
375
+
376
+ ; Ba = argA1^Da
377
+ ; Be = ROL64(argA2^De, 44)
378
+ ; Bi = ROL64(argA3^Di, 43)
379
+ ; Bo = ROL64(argA4^Do, 21)
380
+ ; Bu = ROL64(argA5^Du, 14)
381
+ m_ld q10, $next
382
+ m_pls $ofs2
383
+ m_ld q1, $next
384
+ m_pls $ofs3
385
+ veor.64 q10, q10, q5
386
+ m_ld q2, $next
387
+ m_pls $ofs4
388
+ veor.64 q1, q1, q6
389
+ m_ld q3, $next
390
+ m_pls $ofs5
391
+ veor.64 q2, q2, q7
392
+ m_ld q4, $next
393
+ veor.64 q3, q3, q8
394
+ mov r6, r5
395
+ veor.64 q4, q4, q9
396
+
397
+ vst1.64 { q6 }, [r6:128]!
398
+ vshl.u64 q11, q1, #44
399
+ vshl.u64 q12, q2, #43
400
+ vst1.64 { q7 }, [r6:128]!
401
+ vshl.u64 q13, q3, #21
402
+ vshl.u64 q14, q4, #14
403
+ vst1.64 { q8 }, [r6:128]!
404
+ vsri.64 q11, q1, #64-44
405
+ vsri.64 q12, q2, #64-43
406
+ vst1.64 { q9 }, [r6:128]!
407
+ vsri.64 q13, q3, #64-21
408
+ vsri.64 q14, q4, #64-14
409
+
410
+ ; argA1 = Ba ^(~Be & Bi) ^ KeccakP1600RoundConstants[round]
411
+ ; argA2 = Be ^(~Bi & Bo)
412
+ ; argA3 = Bi ^(~Bo & Bu)
413
+ ; argA4 = Bo ^(~Bu & Ba)
414
+ ; argA5 = Bu ^(~Ba & Be)
415
+ vld1.64 { d30 }, [r1:64]
416
+ vbic.64 q0, q12, q11
417
+ vbic.64 q1, q13, q12
418
+ vld1.64 { d31 }, [r1:64]!
419
+ veor.64 q0, q10
420
+ vbic.64 q4, q11, q10
421
+ veor.64 q0, q15
422
+ vbic.64 q2, q14, q13
423
+ vbic.64 q3, q10, q14
424
+
425
+ m_pls $ofs1
426
+ veor.64 q1, q11
427
+ m_st q0, $next
428
+ m_pls $ofs2
429
+ veor.64 q2, q12
430
+ m_st q1, $next
431
+ m_pls $ofs3
432
+ veor.64 q3, q13
433
+ m_st q2, $next
434
+ m_pls $ofs4
435
+ veor.64 q4, q14
436
+ m_st q3, $next
437
+ m_pls $ofs5
438
+ m_st q4, $next
439
+ m_pls $ofsn1
440
+ MEND
441
+
442
+ MACRO
443
+ KeccakP_ThetaRhoPiChi $ofs1, $ofs2, $ofs3, $ofs4, $ofs5, $next, $ofsn1, $Bb1, $Bb2, $Bb3, $Bb4, $Bb5, $Rr1, $Rr2, $Rr3, $Rr4, $Rr5
444
+
445
+ ; Bb1 = ROL64((argA1^Da), Rr1)
446
+ ; Bb2 = ROL64((argA2^De), Rr2)
447
+ ; Bb3 = ROL64((argA3^Di), Rr3)
448
+ ; Bb4 = ROL64((argA4^Do), Rr4)
449
+ ; Bb5 = ROL64((argA5^Du), Rr5)
450
+
451
+ if $next != 16
452
+ mov r4, #$next
453
+ endif
454
+
455
+ m_ld $Bb1, $next
456
+ m_pls $ofs2
457
+ m_ld $Bb2, $next
458
+ m_pls $ofs3
459
+ veor.64 q15, q5, $Bb1
460
+ m_ld $Bb3, $next
461
+ m_pls $ofs4
462
+ veor.64 q6, q6, $Bb2
463
+ m_ld $Bb4, $next
464
+ m_pls $ofs5
465
+ veor.64 q7, q7, $Bb3
466
+ m_ld $Bb5, $next
467
+ veor.64 q8, q8, $Bb4
468
+ veor.64 q9, q9, $Bb5
469
+
470
+ vshl.u64 $Bb1, q15, #$Rr1
471
+ vshl.u64 $Bb2, q6, #$Rr2
472
+ vshl.u64 $Bb3, q7, #$Rr3
473
+ vshl.u64 $Bb4, q8, #$Rr4
474
+ vshl.u64 $Bb5, q9, #$Rr5
475
+
476
+ vsri.64 $Bb1, q15, #64-$Rr1
477
+ vsri.64 $Bb2, q6, #64-$Rr2
478
+ vsri.64 $Bb3, q7, #64-$Rr3
479
+ vsri.64 $Bb4, q8, #64-$Rr4
480
+ vsri.64 $Bb5, q9, #64-$Rr5
481
+
482
+ ; argA1 = Ba ^((~Be)& Bi ), Ca ^= argA1
483
+ ; argA2 = Be ^((~Bi)& Bo ), Ce ^= argA2
484
+ ; argA3 = Bi ^((~Bo)& Bu ), Ci ^= argA3
485
+ ; argA4 = Bo ^((~Bu)& Ba ), Co ^= argA4
486
+ ; argA5 = Bu ^((~Ba)& Be ), Cu ^= argA5
487
+ vbic.64 q15, q12, q11
488
+ mov r6, r5
489
+ vbic.64 q6, q13, q12
490
+ m_pls $ofs1
491
+ vbic.64 q7, q14, q13
492
+ vbic.64 q8, q10, q14
493
+ vbic.64 q9, q11, q10
494
+
495
+ veor.64 q15, q15, q10
496
+ veor.64 q6, q6, q11
497
+
498
+ m_st q15, $next
499
+ m_pls $ofs2
500
+ veor.64 q7, q7, q12
501
+
502
+ m_st q6, $next
503
+ m_pls $ofs3
504
+ veor.64 q1, q1, q6
505
+ vld1.64 { q6 }, [r6:128]!
506
+ veor.64 q8, q8, q13
507
+
508
+ m_st q7, $next
509
+ m_pls $ofs4
510
+ veor.64 q2, q2, q7
511
+ vld1.64 { q7 }, [r6:128]!
512
+ veor.64 q9, q9, q14
513
+
514
+ m_st q8, $next
515
+ m_pls $ofs5
516
+ veor.64 q3, q3, q8
517
+
518
+ m_st q9, $next
519
+
520
+ vld1.64 { q8 }, [r6:128]!
521
+ veor.64 q4, q4, q9
522
+ m_pls $ofsn1
523
+ vld1.64 { q9 }, [r6:128]!
524
+ veor.64 q0, q0, q15
525
+ MEND
526
+
527
+ MACRO
528
+ KeccakP_ThetaRhoPiChi1 $ofs1, $ofs2, $ofs3, $ofs4, $ofs5, $next, $ofsn1
529
+ KeccakP_ThetaRhoPiChi $ofs1, $ofs2, $ofs3, $ofs4, $ofs5, $next, $ofsn1, q12, q13, q14, q10, q11, 3, 45, 61, 28, 20
530
+ MEND
531
+
532
+ MACRO
533
+ KeccakP_ThetaRhoPiChi2 $ofs1, $ofs2, $ofs3, $ofs4, $ofs5, $next, $ofsn1
534
+ KeccakP_ThetaRhoPiChi $ofs1, $ofs2, $ofs3, $ofs4, $ofs5, $next, $ofsn1, q14, q10, q11, q12, q13, 18, 1, 6, 25, 8
535
+ MEND
536
+
537
+ MACRO
538
+ KeccakP_ThetaRhoPiChi3 $ofs1, $ofs2, $ofs3, $ofs4, $ofs5, $next, $ofsn1
539
+ KeccakP_ThetaRhoPiChi $ofs1, $ofs2, $ofs3, $ofs4, $ofs5, $next, $ofsn1, q11, q12, q13, q14, q10, 36, 10, 15, 56, 27
540
+ MEND
541
+
542
+ MACRO
543
+ KeccakP_ThetaRhoPiChi4 $ofs1, $ofs2, $ofs3, $ofs4, $ofs5, $next, $ofsn1
544
+
545
+ ; Bo = ROL64((argA1^Da), 41)
546
+ ; Bu = ROL64((argA2^De), 2)
547
+ ; Ba = ROL64((argA3^Di), 62)
548
+ ; Be = ROL64((argA4^Do), 55)
549
+ ; Bi = ROL64((argA5^Du), 39)
550
+ ; KeccakChi
551
+
552
+ if $next != 16
553
+ mov r4, #$next
554
+ endif
555
+
556
+ m_ld q13, $next
557
+ m_pls $ofs2
558
+ m_ld q14, $next
559
+ m_pls $ofs3
560
+ veor.64 q5, q5, q13
561
+ m_ld q10, $next
562
+ m_pls $ofs4
563
+ veor.64 q6, q6, q14
564
+ m_ld q11, $next
565
+ m_pls $ofs5
566
+ veor.64 q7, q7, q10
567
+ m_ld q12, $next
568
+ veor.64 q8, q8, q11
569
+ veor.64 q9, q9, q12
570
+
571
+ vshl.u64 q13, q5, #41
572
+ vshl.u64 q14, q6, #2
573
+ vshl.u64 q10, q7, #62
574
+ vshl.u64 q11, q8, #55
575
+ vshl.u64 q12, q9, #39
576
+
577
+ vsri.64 q13, q5, #64-41
578
+ vsri.64 q14, q6, #64-2
579
+ vsri.64 q11, q8, #64-55
580
+ vsri.64 q12, q9, #64-39
581
+ vsri.64 q10, q7, #64-62
582
+
583
+ vbic.64 q5, q12, q11
584
+ vbic.64 q6, q13, q12
585
+ vbic.64 q7, q14, q13
586
+ vbic.64 q8, q10, q14
587
+ vbic.64 q9, q11, q10
588
+ veor.64 q5, q5, q10
589
+ veor.64 q6, q6, q11
590
+ veor.64 q7, q7, q12
591
+ veor.64 q8, q8, q13
592
+ m_pls $ofs1
593
+ veor.64 q9, q9, q14
594
+ m_st q5, $next
595
+ m_pls $ofs2
596
+ veor.64 q0, q0, q5
597
+ m_st q6, $next
598
+ m_pls $ofs3
599
+ veor.64 q1, q1, q6
600
+ m_st q7, $next
601
+ m_pls $ofs4
602
+ veor.64 q2, q2, q7
603
+ m_st q8, $next
604
+ m_pls $ofs5
605
+ veor.64 q3, q3, q8
606
+ m_st q9, $next
607
+ m_pls $ofsn1
608
+ veor.64 q4, q4, q9
609
+ MEND
610
+
611
+ ;----------------------------------------------------------------------------
612
+ ;
613
+ ; void KeccakP1600times2_StaticInitialize( void )
614
+ ;
615
+ ALIGN
616
+ EXPORT KeccakP1600times2_StaticInitialize
617
+ KeccakP1600times2_StaticInitialize PROC
618
+ bx lr
619
+ ENDP
620
+
621
+ ;----------------------------------------------------------------------------
622
+ ;
623
+ ; void KeccakP1600times2_InitializeAll( void *states )
624
+ ;
625
+ ALIGN
626
+ EXPORT KeccakP1600times2_InitializeAll
627
+ KeccakP1600times2_InitializeAll PROC
628
+ vmov.i64 q0, #0
629
+ vmov.i64 q1, #0
630
+ vmov.i64 q2, #0
631
+ vmov.i64 q3, #0
632
+ vstm r0!, { d0 - d7 } ; 8 (clear 8 lanes at a time)
633
+ vstm r0!, { d0 - d7 } ; 16
634
+ vstm r0!, { d0 - d7 } ; 24
635
+ vstm r0!, { d0 - d7 } ; 32
636
+ vstm r0!, { d0 - d7 } ; 40
637
+ vstm r0!, { d0 - d7 } ; 48
638
+ vstm r0!, { d0 - d1} ; 50
639
+ bx lr
640
+ ENDP
641
+
642
+
643
+ ;----------------------------------------------------------------------------
644
+ ;
645
+ ; void KeccakP1600times2_AddByte( void *states, unsigned int instanceIndex, unsigned char byte, unsigned int offset )
646
+ ;
647
+ ALIGN
648
+ EXPORT KeccakP1600times2_AddByte
649
+ KeccakP1600times2_AddByte PROC
650
+ add r0, r0, r1, LSL #3 ; states += 8 * instanceIndex
651
+ lsr r1, r3, #3 ; states += (offset & ~7) * 2
652
+ add r0, r0, r1, LSL #4
653
+ and r3, r3, #7
654
+ add r0, r0, r3 ; states += offset & 7
655
+ ldrb r1, [r0]
656
+ eor r1, r1, r2
657
+ strb r1, [r0]
658
+ bx lr
659
+ ENDP
660
+
661
+ ;----------------------------------------------------------------------------
662
+ ;
663
+ ; void KeccakP1600times2_AddBytes( void *states, unsigned int instanceIndex, const unsigned char *data,
664
+ ; unsigned int offset, unsigned int length )
665
+ ;
666
+ ALIGN
667
+ EXPORT KeccakP1600times2_AddBytes
668
+ KeccakP1600times2_AddBytes PROC
669
+ add r0, r0, r1, LSL #3 ; states += 8 * instanceIndex
670
+ ldr r1, [sp, #0*4] ; r1 = length
671
+ cmp r1, #0
672
+ beq KeccakP1600times2_AddBytes_Exit
673
+ push { r4- r7 }
674
+ lsr r4, r3, #3 ; states += (offset & ~7) * 2
675
+ add r0, r0, r4, LSL #4
676
+ ands r3, r3, #7 ; if (offset & 7) != 0
677
+ beq KeccakP1600times2_AddBytes_CheckLanes
678
+ add r0, r0, r3 ; states += offset & 7
679
+ rsb r3, r3, #8 ; lenInLane = 8 - (offset & 7)
680
+ KeccakP1600times2_AddBytes_LoopBytesFirst
681
+ ldrb r4, [r0]
682
+ ldrb r5, [r2], #1
683
+ eor r4, r4, r5
684
+ subs r1, r1, #1
685
+ strb r4, [r0], #1
686
+ beq KeccakP1600times2_AddBytes_Done
687
+ subs r3, r3, #1
688
+ bne KeccakP1600times2_AddBytes_LoopBytesFirst
689
+ add r0, r0, #8 ; states += 8 (next lane of current state part)
690
+ KeccakP1600times2_AddBytes_CheckLanes
691
+ lsrs r3, r1, #3
692
+ beq KeccakP1600times2_AddBytes_CheckBytesLast
693
+ KeccakP1600times2_AddBytes_LoopLanes
694
+ ldr r4, [r0]
695
+ ldr r5, [r0, #4]
696
+ ldr r6, [r2], #4
697
+ ldr r7, [r2], #4
698
+ eor r4, r4, r6
699
+ eor r5, r5, r7
700
+ subs r3, r3, #1
701
+ str r4, [r0], #4
702
+ str r5, [r0], #12 ; states += 8 (next lane of current state part)
703
+ bne KeccakP1600times2_AddBytes_LoopLanes
704
+ KeccakP1600times2_AddBytes_CheckBytesLast
705
+ ands r1, r1, #7
706
+ beq KeccakP1600times2_AddBytes_Done
707
+ KeccakP1600times2_AddBytes_LoopBytesLast
708
+ ldrb r4, [r0]
709
+ ldrb r5, [r2], #1
710
+ eor r4, r4, r5
711
+ subs r1, r1, #1
712
+ strb r4, [r0], #1
713
+ bne KeccakP1600times2_AddBytes_LoopBytesLast
714
+ KeccakP1600times2_AddBytes_Done
715
+ pop { r4- r7 }
716
+ KeccakP1600times2_AddBytes_Exit
717
+ bx lr
718
+ ENDP
719
+
720
+ ;----------------------------------------------------------------------------
721
+ ;
722
+ ; void KeccakP1600times2_AddLanesAll( void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset )
723
+ ;
724
+ ALIGN
725
+ EXPORT KeccakP1600times2_AddLanesAll
726
+ KeccakP1600times2_AddLanesAll PROC
727
+ cmp r2, #0
728
+ beq KeccakP1600times2_AddLanesAll_Exit
729
+ add r3, r1, r3, LSL #3 ; r3: data + 8 * laneOffset
730
+ push {r4 - r7}
731
+ KeccakP1600times2_AddLanesAll_Loop
732
+ ldr r4, [r1], #4 ; index 0
733
+ ldr r5, [r1], #4
734
+ ldrd r6, r7, [r0]
735
+ eor r6, r6, r4
736
+ eor r7, r7, r5
737
+ strd r6, r7, [r0], #8
738
+ ldr r4, [r3], #4 ; index 1
739
+ ldr r5, [r3], #4
740
+ ldrd r6, r7, [r0]
741
+ eor r6, r6, r4
742
+ eor r7, r7, r5
743
+ strd r6, r7, [r0], #8
744
+ subs r2, r2, #1
745
+ bne KeccakP1600times2_AddLanesAll_Loop
746
+ pop {r4 - r7}
747
+ KeccakP1600times2_AddLanesAll_Exit
748
+ bx lr
749
+ ENDP
750
+
751
+ ;----------------------------------------------------------------------------
752
+ ;
753
+ ; void KeccakP1600times2_OverwriteBytes( void *states, unsigned int instanceIndex, const unsigned char *data,
754
+ ; unsigned int offset, unsigned int length )
755
+ ;
756
+ ALIGN
757
+ EXPORT KeccakP1600times2_OverwriteBytes
758
+ KeccakP1600times2_OverwriteBytes PROC
759
+ add r0, r0, r1, LSL #3 ; states += 8 * instanceIndex
760
+ ldr r1, [sp, #0*4] ; r1 = length
761
+ cmp r1, #0
762
+ beq KeccakP1600times2_OverwriteBytes_Exit
763
+ push { r4-r5 }
764
+ lsr r4, r3, #3 ; states += (offset & ~7) * 2
765
+ add r0, r0, r4, LSL #4
766
+ ands r3, r3, #7 ; if (offset & 7) != 0
767
+ beq KeccakP1600times2_OverwriteBytes_CheckLanes
768
+ add r0, r0, r3 ; states += offset & 7
769
+ rsb r3, r3, #8 ; lenInLane = 8 - (offset & 7)
770
+ KeccakP1600times2_OverwriteBytes_LoopBytesFirst
771
+ ldrb r4, [r2], #1
772
+ strb r4, [r0], #1
773
+ subs r1, r1, #1
774
+ beq KeccakP1600times2_OverwriteBytes_Done
775
+ subs r3, r3, #1
776
+ bne KeccakP1600times2_OverwriteBytes_LoopBytesFirst
777
+ add r0, r0, #8 ; states += 8 (next lane of current state part)
778
+ KeccakP1600times2_OverwriteBytes_CheckLanes
779
+ lsrs r3, r1, #3
780
+ beq KeccakP1600times2_OverwriteBytes_CheckBytesLast
781
+ KeccakP1600times2_OverwriteBytes_LoopLanes
782
+ ldr r4, [r2], #4
783
+ ldr r5, [r2], #4
784
+ str r4, [r0], #4
785
+ str r5, [r0], #12 ; states += 8 (next lane of current state part)
786
+ subs r3, r3, #1
787
+ bne KeccakP1600times2_OverwriteBytes_LoopLanes
788
+ KeccakP1600times2_OverwriteBytes_CheckBytesLast
789
+ ands r1, r1, #7
790
+ beq KeccakP1600times2_OverwriteBytes_Done
791
+ KeccakP1600times2_OverwriteBytes_LoopBytesLast
792
+ ldrb r4, [r2], #1
793
+ subs r1, r1, #1
794
+ strb r4, [r0], #1
795
+ bne KeccakP1600times2_OverwriteBytes_LoopBytesLast
796
+ KeccakP1600times2_OverwriteBytes_Done
797
+ pop { r4- r5 }
798
+ KeccakP1600times2_OverwriteBytes_Exit
799
+ bx lr
800
+ ENDP
801
+
802
+ ;----------------------------------------------------------------------------
803
+ ;
804
+ ; KeccakP1600times2_OverwriteLanesAll( void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset )
805
+ ;
806
+ ALIGN
807
+ EXPORT KeccakP1600times2_OverwriteLanesAll
808
+ KeccakP1600times2_OverwriteLanesAll PROC
809
+ cmp r2, #0
810
+ beq KeccakP1600times2_OverwriteLanesAll_Exit
811
+ lsls r12, r1, #32-3
812
+ bne KeccakP1600times2_OverwriteLanesAll_Unaligned
813
+ add r3, r1, r3, LSL #3 ; r3(pointer instance 1): data + 8 * laneOffset
814
+ lsrs r2, r2, #1
815
+ bcc KeccakP1600times2_OverwriteLanesAll_LoopAligned
816
+ vldm r1!, { d0 }
817
+ vldm r3!, { d1 }
818
+ vstm r0!, { d0 - d1 }
819
+ beq KeccakP1600times2_OverwriteLanesAll_Exit
820
+ KeccakP1600times2_OverwriteLanesAll_LoopAligned
821
+ vldm r1!, { d0 }
822
+ vldm r1!, { d2 }
823
+ vldm r3!, { d1 }
824
+ vldm r3!, { d3 }
825
+ subs r2, r2, #1
826
+ vstm r0!, { d0 - d3 }
827
+ bne KeccakP1600times2_OverwriteLanesAll_LoopAligned
828
+ bx lr
829
+ KeccakP1600times2_OverwriteLanesAll_Unaligned
830
+ add r3, r1, r3, LSL #3 ; r3(pointer instance 1): data + 8 * laneOffset
831
+ push { r4, r5 }
832
+ KeccakP1600times2_OverwriteLanesAll_LoopUnaligned
833
+ ldr r4, [r1], #4
834
+ ldr r5, [r1], #4
835
+ strd r4, r5, [r0], #8
836
+ ldr r4, [r3], #4
837
+ ldr r5, [r3], #4
838
+ subs r2, r2, #1
839
+ strd r4, r5, [r0], #8
840
+ bne KeccakP1600times2_OverwriteLanesAll_LoopUnaligned
841
+ pop { r4, r5 }
842
+ KeccakP1600times2_OverwriteLanesAll_Exit
843
+ bx lr
844
+ ENDP
845
+
846
+ ;----------------------------------------------------------------------------
847
+ ;
848
+ ; void KeccakP1600times2_OverwriteWithZeroes( void *states, unsigned int instanceIndex, unsigned int byteCount )
849
+ ;
850
+ ALIGN
851
+ EXPORT KeccakP1600times2_OverwriteWithZeroes
852
+ KeccakP1600times2_OverwriteWithZeroes PROC
853
+ add r0, r0, r1, LSL #3 ; states += 8 * instanceIndex
854
+ lsrs r1, r2, #3 ; r1: laneCount
855
+ beq KeccakP1600times2_OverwriteWithZeroes_Bytes
856
+ vmov.i64 d0, #0
857
+ KeccakP1600times2_OverwriteWithZeroes_LoopLanes
858
+ subs r1, r1, #1
859
+ vstm r0!, { d0 }
860
+ add r0, r0, #8
861
+ bne KeccakP1600times2_OverwriteWithZeroes_LoopLanes
862
+ KeccakP1600times2_OverwriteWithZeroes_Bytes
863
+ ands r2, r2, #7 ; r2: byteCount remaining
864
+ beq KeccakP1600times2_OverwriteWithZeroes_Exit
865
+ movs r3, #0
866
+ KeccakP1600times2_OverwriteWithZeroes_LoopBytes
867
+ subs r2, r2, #1
868
+ strb r3, [r0], #1
869
+ bne KeccakP1600times2_OverwriteWithZeroes_LoopBytes
870
+ KeccakP1600times2_OverwriteWithZeroes_Exit
871
+ bx lr
872
+ ENDP
873
+
874
+ ;----------------------------------------------------------------------------
875
+ ;
876
+ ; void KeccakP1600times2_ExtractBytes( void *states, unsigned int instanceIndex, const unsigned char *data,
877
+ ; unsigned int offset, unsigned int length )
878
+ ;
879
+ ALIGN
880
+ EXPORT KeccakP1600times2_ExtractBytes
881
+ KeccakP1600times2_ExtractBytes PROC
882
+ add r0, r0, r1, LSL #3 ; states += 8 * instanceIndex
883
+ ldr r1, [sp, #0*4] ; r1 = length
884
+ cmp r1, #0
885
+ beq KeccakP1600times2_ExtractBytes_Exit
886
+ push { r4-r5 }
887
+ lsr r4, r3, #3 ; states += (offset & ~7) * 2
888
+ add r0, r0, r4, LSL #4
889
+ ands r3, r3, #7 ; if (offset & 7) != 0
890
+ beq KeccakP1600times2_ExtractBytes_CheckLanes
891
+ add r0, r0, r3 ; states += offset & 7
892
+ rsb r3, r3, #8 ; lenInLane = 8 - (offset & 7)
893
+ KeccakP1600times2_ExtractBytes_LoopBytesFirst
894
+ ldrb r4, [r0], #1
895
+ strb r4, [r2], #1
896
+ subs r1, r1, #1
897
+ beq KeccakP1600times2_ExtractBytes_Done
898
+ subs r3, r3, #1
899
+ bne KeccakP1600times2_ExtractBytes_LoopBytesFirst
900
+ add r0, r0, #8 ; states += 8 (next lane of current state part)
901
+ KeccakP1600times2_ExtractBytes_CheckLanes
902
+ lsrs r3, r1, #3
903
+ beq KeccakP1600times2_ExtractBytes_CheckBytesLast
904
+ KeccakP1600times2_ExtractBytes_LoopLanes
905
+ ldr r4, [r0], #4
906
+ ldr r5, [r0], #12 ; states += 8 (next lane of current state part)
907
+ str r4, [r2], #4
908
+ str r5, [r2], #4
909
+ subs r3, r3, #1
910
+ bne KeccakP1600times2_ExtractBytes_LoopLanes
911
+ KeccakP1600times2_ExtractBytes_CheckBytesLast
912
+ ands r1, r1, #7
913
+ beq KeccakP1600times2_ExtractBytes_Done
914
+ KeccakP1600times2_ExtractBytes_LoopBytesLast
915
+ ldrb r4, [r0], #1
916
+ subs r1, r1, #1
917
+ strb r4, [r2], #1
918
+ bne KeccakP1600times2_ExtractBytes_LoopBytesLast
919
+ KeccakP1600times2_ExtractBytes_Done
920
+ pop { r4-r5 }
921
+ KeccakP1600times2_ExtractBytes_Exit
922
+ bx lr
923
+ ENDP
924
+
925
+ ;----------------------------------------------------------------------------
926
+ ;
927
+ ; void KeccakP1600times2_ExtractLanesAll( const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset )
928
+ ;
929
+ ALIGN
930
+ EXPORT KeccakP1600times2_ExtractLanesAll
931
+ KeccakP1600times2_ExtractLanesAll PROC
932
+ cmp r2, #0
933
+ beq KeccakP1600times2_ExtractLanesAll_Exit
934
+ lsls r12, r1, #32-3
935
+ bne KeccakP1600times2_ExtractLanesAll_Unaligned
936
+ add r3, r1, r3, LSL #3 ; r3(pointer instance 1): data + 8 * laneOffset
937
+ lsrs r2, r2, #1
938
+ bcc KeccakP1600times2_ExtractLanesAll_LoopAligned
939
+ vldm r0!, { d0 - d1 }
940
+ vstm r1!, { d0 }
941
+ vstm r3!, { d1 }
942
+ beq KeccakP1600times2_ExtractLanesAll_Exit
943
+ KeccakP1600times2_ExtractLanesAll_LoopAligned
944
+ vldm r0!, { d0 - d3 }
945
+ subs r2, r2, #1
946
+ vstm r1!, { d0 }
947
+ vstm r1!, { d2 }
948
+ vstm r3!, { d1 }
949
+ vstm r3!, { d3 }
950
+ bne KeccakP1600times2_ExtractLanesAll_LoopAligned
951
+ bx lr
952
+ KeccakP1600times2_ExtractLanesAll_Unaligned
953
+ add r3, r1, r3, LSL #3 ; r3(pointer instance 1): data + 8 * laneOffset
954
+ push { r4, r5 }
955
+ KeccakP1600times2_ExtractLanesAll_LoopUnaligned
956
+ ldrd r4, r5, [r0], #8
957
+ str r4, [r1], #4
958
+ str r5, [r1], #4
959
+ ldrd r4, r5, [r0], #8
960
+ subs r2, r2, #1
961
+ str r4, [r3], #4
962
+ str r5, [r3], #4
963
+ bne KeccakP1600times2_ExtractLanesAll_LoopUnaligned
964
+ pop { r4, r5 }
965
+ KeccakP1600times2_ExtractLanesAll_Exit
966
+ bx lr
967
+ ENDP
968
+
969
+ ;----------------------------------------------------------------------------
970
+ ;
971
+ ; void KeccakP1600times2_ExtractAndAddBytes( void *states, unsigned int instanceIndex,
972
+ ; const unsigned char *input, unsigned char *output,
973
+ ; unsigned int offset, unsigned int length )
974
+ ;
975
+ ALIGN
976
+ EXPORT KeccakP1600times2_ExtractAndAddBytes
977
+ KeccakP1600times2_ExtractAndAddBytes PROC
978
+ add r0, r0, r1, LSL #3 ; states += 8 * instanceIndex
979
+ ldr r1, [sp, #1*4] ; r1 = length
980
+ cmp r1, #0
981
+ beq KeccakP1600times2_ExtractAndAddBytes_Exit
982
+ push { r4 - r9 }
983
+ ldr r8, [sp, #6*4] ; r8 = offset
984
+ lsr r4, r8, #3 ; states += (offset & ~7) * 2
985
+ add r0, r0, r4, LSL #4
986
+ ands r8, r8, #7 ; if (offset & 7) != 0
987
+ beq KeccakP1600times2_ExtractAndAddBytes_CheckLanes
988
+ add r0, r0, r8 ; states += offset & 7
989
+ rsb r8, r8, #8 ; lenInLane = 8 - (offset & 7)
990
+ KeccakP1600times2_ExtractAndAddBytes_LoopBytesFirst
991
+ ldrb r4, [r0], #1
992
+ ldrb r5, [r2], #1
993
+ eor r4, r4, r5
994
+ strb r4, [r3], #1
995
+ subs r1, r1, #1
996
+ beq KeccakP1600times2_ExtractAndAddBytes_Done
997
+ subs r8, r8, #1
998
+ bne KeccakP1600times2_ExtractAndAddBytes_LoopBytesFirst
999
+ add r0, r0, #8 ; states += 8 (next lane of current state part)
1000
+ KeccakP1600times2_ExtractAndAddBytes_CheckLanes
1001
+ lsrs r8, r1, #3
1002
+ beq KeccakP1600times2_ExtractAndAddBytes_CheckBytesLast
1003
+ KeccakP1600times2_ExtractAndAddBytes_LoopLanes
1004
+ ldr r4, [r0], #4
1005
+ ldr r5, [r0], #12
1006
+ ldr r6, [r2], #4
1007
+ ldr r7, [r2], #4
1008
+ eor r4, r4, r6
1009
+ eor r5, r5, r7
1010
+ str r4, [r3], #4
1011
+ str r5, [r3], #4 ; states += 8 (next lane of current state part)
1012
+ subs r8, r8, #1
1013
+ bne KeccakP1600times2_ExtractAndAddBytes_LoopLanes
1014
+ KeccakP1600times2_ExtractAndAddBytes_CheckBytesLast
1015
+ ands r1, r1, #7
1016
+ beq KeccakP1600times2_ExtractAndAddBytes_Done
1017
+ KeccakP1600times2_ExtractAndAddBytes_LoopBytesLast
1018
+ ldrb r4, [r0], #1
1019
+ ldrb r5, [r2], #1
1020
+ eor r4, r4, r5
1021
+ strb r4, [r3], #1
1022
+ subs r1, r1, #1
1023
+ bne KeccakP1600times2_ExtractAndAddBytes_LoopBytesLast
1024
+ KeccakP1600times2_ExtractAndAddBytes_Done
1025
+ pop { r4 - r9 }
1026
+ KeccakP1600times2_ExtractAndAddBytes_Exit
1027
+ bx lr
1028
+ ENDP
1029
+
1030
+ ;----------------------------------------------------------------------------
1031
+ ;
1032
+ ; void KeccakP1600times2_ExtractAndAddLanesAll( const void *states,
1033
+ ; const unsigned char *input, unsigned char *output,
1034
+ ; unsigned int laneCount, unsigned int laneOffset )
1035
+ ;
1036
+ ALIGN
1037
+ EXPORT KeccakP1600times2_ExtractAndAddLanesAll
1038
+ KeccakP1600times2_ExtractAndAddLanesAll PROC
1039
+ cmp r3, #0
1040
+ beq KeccakP1600times2_ExtractAndAddLanesAll_Exit
1041
+ orr r12, r1, r2
1042
+ lsls r12, r12, #32-3 ; unaligned access if input or output unaligned
1043
+ bne KeccakP1600times2_ExtractAndAddLanesAll_Unaligned
1044
+ push {r4,r5}
1045
+ ldr r12, [sp, #2*4] ; r12 = laneOffset
1046
+ lsrs r3, r3, #1
1047
+ add r4, r1, r12, LSL #3 ; r4(input instance 1): input + 8 * laneOffset
1048
+ add r5, r2, r12, LSL #3 ; r5(output instance 1): output + 8 * laneOffset
1049
+ bcc KeccakP1600times2_ExtractAndAddLanesAll_LoopAligned
1050
+ vldm r0!, { d0 - d1 }
1051
+ vldm r1!, { d2 }
1052
+ vldm r4!, { d3 }
1053
+ veor q0, q0, q1
1054
+ vstm r2!, { d0 }
1055
+ vstm r5!, { d1 }
1056
+ beq KeccakP1600times2_ExtractAndAddLanesAll_AlignedDone
1057
+ KeccakP1600times2_ExtractAndAddLanesAll_LoopAligned
1058
+ vldm r0!, { d0 - d3 }
1059
+ vldm r1!, { d4 }
1060
+ vldm r1!, { d6 }
1061
+ vldm r4!, { d5 }
1062
+ vldm r4!, { d7 }
1063
+ subs r3, r3, #1
1064
+ veor q0, q0, q2
1065
+ veor q1, q1, q3
1066
+ vstm r2!, { d0 }
1067
+ vstm r2!, { d2 }
1068
+ vstm r5!, { d1 }
1069
+ vstm r5!, { d3 }
1070
+ bne KeccakP1600times2_ExtractAndAddLanesAll_LoopAligned
1071
+ KeccakP1600times2_ExtractAndAddLanesAll_AlignedDone
1072
+ pop {r4,r5}
1073
+ bx lr
1074
+ KeccakP1600times2_ExtractAndAddLanesAll_Unaligned
1075
+ push {r4-r9}
1076
+ ldr r12, [sp, #6*4] ; r12 = laneOffset
1077
+ add r4, r1, r12, LSL #3 ; r4(input instance 1): input + 8 * laneOffset
1078
+ add r5, r2, r12, LSL #3 ; r5(output instance 1): output + 8 * laneOffset
1079
+ KeccakP1600times2_ExtractAndAddLanesAll_LoopUnaligned
1080
+ ldrd r8, r9, [r0], #8
1081
+ ldr r6, [r1], #4
1082
+ ldr r7, [r1], #4
1083
+ eor r8, r8, r6
1084
+ eor r9, r9, r7
1085
+ str r8, [r2], #4
1086
+ str r9, [r2], #4
1087
+ ldrd r8, r9, [r0], #8
1088
+ ldr r6, [r4], #4
1089
+ ldr r7, [r4], #4
1090
+ eor r8, r8, r6
1091
+ eor r9, r9, r7
1092
+ str r8, [r5], #4
1093
+ subs r3, r3, #1
1094
+ str r9, [r5], #4
1095
+ bne KeccakP1600times2_ExtractAndAddLanesAll_LoopUnaligned
1096
+ pop { r4 - r9 }
1097
+ KeccakP1600times2_ExtractAndAddLanesAll_Exit
1098
+ bx lr
1099
+ ENDP
1100
+
1101
+ ;----------------------------------------------------------------------------
1102
+ ;
1103
+ ; void KeccakP1600times2_PermuteAll_6rounds( void *states )
1104
+ ;
1105
+ ALIGN
1106
+ EXPORT KeccakP1600times2_PermuteAll_6rounds
1107
+ KeccakP1600times2_PermuteAll_6rounds PROC
1108
+ adr r1, KeccakP1600times2_Permute_RoundConstants6
1109
+ movs r2, #6+2
1110
+ vpush {q4-q7}
1111
+ push {r4-r7}
1112
+ sub sp, #4*2*8+8 ;allocate 4 D double lanes (plus 8bytes to allow alignment on 16 bytes)
1113
+ add r5, sp, #8
1114
+
1115
+ ; ba
1116
+ ; be = me, me = be
1117
+ ; bi = gi, gi = bi
1118
+ ; bo = so, so = bo
1119
+ ; bu = ku, ku = bu
1120
+
1121
+ ; ga = sa, sa = ga
1122
+ ; ge = ke, ke = ge
1123
+ ; go = mo, mo = go
1124
+ ; gu
1125
+
1126
+ ; ka = ma, ma = ka
1127
+ ; ki = si, si = ki
1128
+ ; ko
1129
+
1130
+ ; mu = su, su = mu
1131
+ ; mi
1132
+ ; se
1133
+
1134
+ ;PrepareTheta
1135
+ ; Ca = ba ^ ga ^ ka ^ ma ^ sa
1136
+ ; Ce = be ^ ge ^ ke ^ me ^ se
1137
+ ; Ci = bi ^ gi ^ ki ^ mi ^ si
1138
+ ; Co = bo ^ go ^ ko ^ mo ^ so
1139
+ ; Cu = bu ^ gu ^ ku ^ mu ^ su
1140
+ vldm r0, { q0 - q4 } ; ba be bi bo bu
1141
+ bic r5, #15
1142
+ add r3, r0, #_me
1143
+ vldm r3, { q6 } ; me
1144
+ vstm r3, { q1 }
1145
+ veor.64 q1, q1, q6
1146
+ add r4, r0, #_be
1147
+ vstm r4!, { q6 } ; be
1148
+
1149
+ add r3, r0, #_ga
1150
+ vldm r3, { q10 - q14 } ; ga ge gi go gu
1151
+ add r3, r0, #_gi
1152
+ vstm r3, { q2 }
1153
+ veor.64 q2, q2, q12
1154
+ vstm r4!, { q12 } ; bi
1155
+
1156
+ add r3, r0, #_so
1157
+ vldm r3, { q8 } ; so
1158
+ vstm r3, { q3 }
1159
+ veor.64 q3, q3, q8
1160
+ vstm r4!, { q8 } ; bo
1161
+
1162
+ add r3, r0, #_ku
1163
+ vldm r3, { q9 } ; ku
1164
+ vstm r3, { q4 }
1165
+ veor.64 q4, q4, q9
1166
+ vstm r4!, { q9 } ; bu
1167
+
1168
+ add r3, r0, #_sa
1169
+ vldm r3, { q5 } ; sa
1170
+ vstm r3, { q10 }
1171
+ add r4, r0, #_ga
1172
+ veor.64 q0, q0, q5
1173
+ veor.64 q0, q0, q10
1174
+ vstm r4!, { q5 } ; ga
1175
+
1176
+ add r3, r0, #_ke
1177
+ vldm r3, { q6 } ; ke
1178
+ vstm r3, { q11 }
1179
+ veor.64 q1, q1, q6
1180
+ veor.64 q1, q1, q11
1181
+ vstm r4!, { q6 } ; ge
1182
+
1183
+ add r3, r0, #_mo
1184
+ vldm r3, { q8 } ; mo
1185
+ vstm r3, { q13 }
1186
+ add r4, r0, #_go
1187
+ veor.64 q3, q3, q8
1188
+ veor.64 q3, q3, q13
1189
+ vstm r4!, { q8 } ; go
1190
+ veor.64 q4, q4, q14 ; gu
1191
+
1192
+ add r4, r0, #_ka ; ka
1193
+ vldm r4, { q10 }
1194
+ add r3, r0, #_ma
1195
+ vldm r3, { q5 } ; ma
1196
+ vstm r3, { q10 }
1197
+ veor.64 q0, q0, q5
1198
+ veor.64 q0, q0, q10
1199
+ vstm r4!, { q5 } ; ka
1200
+
1201
+ add r4, r0, #_ki ; ki ko
1202
+ vldm r4, { q12, q13 }
1203
+ add r3, r0, #_si
1204
+ vldm r3, { q7 } ; si
1205
+ vstm r3, { q12 }
1206
+ veor.64 q2, q2, q7
1207
+ veor.64 q2, q2, q12
1208
+ vstm r4, { q7 } ; ki
1209
+ veor.64 q3, q3, q13 ; ko
1210
+
1211
+ add r4, r0, #_mu ; mu
1212
+ vldm r4, { q14 }
1213
+ add r3, r0, #_su
1214
+ vldm r3, { q9 } ; su
1215
+ vstm r3, { q14 }
1216
+ veor.64 q4, q4, q9
1217
+ veor.64 q4, q4, q14
1218
+ vstm r4, { q9 } ; mu
1219
+
1220
+ add r4, r0, #_mi ; mi
1221
+ vldm r4, { q12 }
1222
+ veor.64 q2, q2, q12
1223
+ add r3, r0, #_se ; se
1224
+ vldm r3, { q6 }
1225
+ veor.64 q1, q1, q6
1226
+
1227
+ mov r3, r0
1228
+ b KeccakP1600times2_PermuteAll_Round2
1229
+ ENDP
1230
+
1231
+ ALIGN
1232
+ KeccakP1600times2_Permute_RoundConstants24
1233
+ dcq 0x0000000000000001
1234
+ dcq 0x0000000000008082
1235
+ dcq 0x800000000000808a
1236
+ dcq 0x8000000080008000
1237
+ dcq 0x000000000000808b
1238
+ dcq 0x0000000080000001
1239
+ dcq 0x8000000080008081
1240
+ dcq 0x8000000000008009
1241
+ dcq 0x000000000000008a
1242
+ dcq 0x0000000000000088
1243
+ dcq 0x0000000080008009
1244
+ dcq 0x000000008000000a
1245
+ KeccakP1600times2_Permute_RoundConstants12
1246
+ dcq 0x000000008000808b
1247
+ dcq 0x800000000000008b
1248
+ dcq 0x8000000000008089
1249
+ dcq 0x8000000000008003
1250
+ dcq 0x8000000000008002
1251
+ dcq 0x8000000000000080
1252
+ KeccakP1600times2_Permute_RoundConstants6
1253
+ dcq 0x000000000000800a
1254
+ dcq 0x800000008000000a
1255
+ KeccakP1600times2_Permute_RoundConstants4
1256
+ dcq 0x8000000080008081
1257
+ dcq 0x8000000000008080
1258
+ dcq 0x0000000080000001
1259
+ dcq 0x8000000080008008
1260
+
1261
+ ;----------------------------------------------------------------------------
1262
+ ;
1263
+ ; void KeccakP1600times2_PermuteAll_24rounds( void *states )
1264
+ ;
1265
+ ALIGN
1266
+ EXPORT KeccakP1600times2_PermuteAll_24rounds
1267
+ KeccakP1600times2_PermuteAll_24rounds PROC
1268
+ adr r1, KeccakP1600times2_Permute_RoundConstants24
1269
+ movs r2, #24
1270
+ b KeccakP1600times2_PermuteAll
1271
+ ENDP
1272
+
1273
+ ;----------------------------------------------------------------------------
1274
+ ;
1275
+ ; void KeccakP1600times2_PermuteAll_12rounds( void *states )
1276
+ ;
1277
+ ALIGN
1278
+ EXPORT KeccakP1600times2_PermuteAll_12rounds
1279
+ KeccakP1600times2_PermuteAll_12rounds PROC
1280
+ adr r1, KeccakP1600times2_Permute_RoundConstants12
1281
+ movs r2, #12
1282
+ b KeccakP1600times2_PermuteAll
1283
+ ENDP
1284
+
1285
+ ;----------------------------------------------------------------------------
1286
+ ;
1287
+ ; void KeccakP1600times2_PermuteAll_4rounds( void *states )
1288
+ ;
1289
+ ALIGN
1290
+ EXPORT KeccakP1600times2_PermuteAll_4rounds
1291
+ KeccakP1600times2_PermuteAll_4rounds PROC
1292
+ adr r1, KeccakP1600times2_Permute_RoundConstants4
1293
+ movs r2, #4
1294
+ b KeccakP1600times2_PermuteAll
1295
+ ENDP
1296
+
1297
+ ;----------------------------------------------------------------------------
1298
+ ;
1299
+ ; void KeccakP1600times2_PermuteAll( void *states, void *rc, unsigned int nr )
1300
+ ;
1301
+ ALIGN
1302
+ KeccakP1600times2_PermuteAll PROC
1303
+ vpush {q4-q7}
1304
+ push {r4-r7}
1305
+ sub sp, #4*2*8+8 ;allocate 4 D double lanes (plus 8bytes to allow alignment on 16 bytes)
1306
+ mov r3, r0
1307
+ add r5, sp, #8
1308
+
1309
+ ;PrepareTheta
1310
+ ; Ca = ba ^ ga ^ ka ^ ma ^ sa
1311
+ ; Ce = be ^ ge ^ ke ^ me ^ se
1312
+ ; Ci = bi ^ gi ^ ki ^ mi ^ si
1313
+ ; Co = bo ^ go ^ ko ^ mo ^ so
1314
+ ; Cu = bu ^ gu ^ ku ^ mu ^ su
1315
+ vld1.64 { d0, d1, d2, d3 }, [r3:256]! ; _ba _be
1316
+ bic r5, #15
1317
+ vld1.64 { d4, d5, d6, d7 }, [r3:256]! ; _bi _bo
1318
+ vld1.64 { d8, d9, d10, d11 }, [r3:256]! ; _bu _ga
1319
+ vld1.64 { d12, d13 }, [r3:128]! ; _ge
1320
+ veor.64 q0, q0, q5
1321
+ vld1.64 { d14, d15 }, [r3:128]! ; _gi
1322
+ veor.64 q1, q1, q6
1323
+ vld1.64 { d16, d17 }, [r3:128]! ; _go
1324
+ veor.64 q2, q2, q7
1325
+ vld1.64 { d18, d19 }, [r3:128]! ; _gu
1326
+ veor.64 q3, q3, q8
1327
+ vld1.64 { d10, d11 }, [r3:128]! ; _ka
1328
+ veor.64 q4, q4, q9
1329
+ vld1.64 { d12, d13 }, [r3:128]! ; _ke
1330
+ veor.64 q0, q0, q5
1331
+ vld1.64 { d14, d15 }, [r3:128]! ; _ki
1332
+ veor.64 q1, q1, q6
1333
+ vld1.64 { d16, d17 }, [r3:128]! ; _ko
1334
+ veor.64 q2, q2, q7
1335
+ vld1.64 { d18, d19 }, [r3:128]! ; _ku
1336
+ veor.64 q3, q3, q8
1337
+ vld1.64 { d10, d11 }, [r3:128]! ; _ma
1338
+ veor.64 q4, q4, q9
1339
+ vld1.64 { d12, d13 }, [r3:128]! ; _me
1340
+ veor.64 q0, q0, q5
1341
+ vld1.64 { d14, d15 }, [r3:128]! ; _mi
1342
+ veor.64 q1, q1, q6
1343
+ vld1.64 { d16, d17 }, [r3:128]! ; _mo
1344
+ veor.64 q2, q2, q7
1345
+ vld1.64 { d18, d19 }, [r3:128]! ; _mu
1346
+ veor.64 q3, q3, q8
1347
+ vld1.64 { d10, d11 }, [r3:128]! ; _sa
1348
+ veor.64 q4, q4, q9
1349
+ vld1.64 { d12, d13 }, [r3:128]! ; _se
1350
+ veor.64 q0, q0, q5
1351
+ vld1.64 { d14, d15 }, [r3:128]! ; _si
1352
+ veor.64 q1, q1, q6
1353
+ vld1.64 { d16, d17 }, [r3:128]! ; _so
1354
+ veor.64 q2, q2, q7
1355
+ vld1.64 { d18, d19 }, [r3:128]! ; _su
1356
+ mov r3, r0
1357
+ veor.64 q3, q3, q8
1358
+ veor.64 q4, q4, q9
1359
+
1360
+ KeccakP1600times2_PermuteAll_RoundLoop
1361
+ KeccakP_ThetaRhoPiChiIota _ba, -1, -1, -1, -1, _ge-_ba, _ka ; _ba, _ge, _ki, _mo, _su
1362
+ KeccakP_ThetaRhoPiChi1 _ka, -1, -1, _bo, -1, _me-_ka, _sa ; _ka, _me, _si, _bo, _gu
1363
+ KeccakP_ThetaRhoPiChi2 _sa, _be, -1, -1, -1, _gi-_be, _ga ; _sa, _be, _gi, _ko, _mu
1364
+ KeccakP_ThetaRhoPiChi3 _ga, -1, -1, -1, _bu, _ke-_ga, _ma ; _ga, _ke, _mi, _so, _bu
1365
+ KeccakP_ThetaRhoPiChi4 _ma, -1, _bi, -1, -1, _se-_ma, _ba ; _ma, _se, _bi, _go, _ku
1366
+
1367
+ KeccakP_ThetaRhoPiChiIota _ba, -1, _gi, -1, _ku, _me-_ba, _sa ; _ba, _me, _gi, _so, _ku
1368
+ KeccakP_ThetaRhoPiChi1 _sa, _ke, _bi, -1, _gu, _mo-_bi, _ma ; _sa, _ke, _bi, _mo, _gu
1369
+ KeccakP_ThetaRhoPiChi2 _ma, _ge, -1, _ko, _bu, _si-_ge, _ka ; _ma, _ge, _si, _ko, _bu
1370
+ KeccakP_ThetaRhoPiChi3 _ka, _be, -1, _go, -1, _mi-_be, _ga ; _ka, _be, _mi, _go, _su
1371
+ KeccakP_ThetaRhoPiChi4 _ga, -1, _ki, _bo, -1, _se-_ga, _ba ; _ga, _se, _ki, _bo, _mu
1372
+ KeccakP1600times2_PermuteAll_Round2
1373
+ KeccakP_ThetaRhoPiChiIota _ba, -1, -1, _go, -1, _ke-_ba, _ma ; _ba, _ke, _si, _go, _mu
1374
+ KeccakP_ThetaRhoPiChi1 _ma, _be, -1, -1, _gu, _ki-_be, _ga ; _ma, _be, _ki, _so, _gu
1375
+ KeccakP_ThetaRhoPiChi2 _ga, -1, _bi, -1, -1, _me-_ga, _sa ; _ga, _me, _bi, _ko, _su
1376
+ KeccakP_ThetaRhoPiChi3 _sa, _ge, -1, _bo, -1, _mi-_ge, _ka ; _sa, _ge, _mi, _bo, _ku
1377
+ KeccakP_ThetaRhoPiChi4 _ka, -1, _gi, -1, _bu, _se-_ka, _ba ; _ka, _se, _gi, _mo, _bu
1378
+
1379
+ KeccakP_ThetaRhoPiChiIota _ba, -1, -1, -1, -1, _be-_ba, _ga ; _ba, _be, _bi, _bo, _bu
1380
+ KeccakP_ThetaRhoPiChi1 _ga, -1, -1, -1, -1, _ge-_ga, _ka ; _ga, _ge, _gi, _go, _gu
1381
+ KeccakP_ThetaRhoPiChi2 _ka, -1, -1, -1, -1, _ke-_ka, _ma ; _ka, _ke, _ki, _ko, _ku
1382
+ KeccakP_ThetaRhoPiChi3 _ma, -1, -1, -1, -1, _me-_ma, _sa ; _ma, _me, _mi, _mo, _mu
1383
+ subs r2, #4
1384
+ KeccakP_ThetaRhoPiChi4 _sa, -1, -1, -1, -1, _se-_sa, _ba ; _sa, _se, _si, _so, _su
1385
+ bne KeccakP1600times2_PermuteAll_RoundLoop
1386
+ add sp, #4*2*8+8 ; free 4.5 D lanes
1387
+ pop {r4-r7}
1388
+ vpop {q4-q7}
1389
+ bx lr
1390
+ ENDP
1391
+
1392
+ END