sleeping_kangaroo12 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (284) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +127 -0
  3. data/ext/Rakefile +73 -0
  4. data/ext/binding/sleeping_kangaroo12.c +39 -0
  5. data/ext/config/xkcp.build +17 -0
  6. data/ext/xkcp/LICENSE +1 -0
  7. data/ext/xkcp/Makefile +15 -0
  8. data/ext/xkcp/Makefile.build +200 -0
  9. data/ext/xkcp/README.markdown +296 -0
  10. data/ext/xkcp/lib/HighLevel.build +143 -0
  11. data/ext/xkcp/lib/LowLevel.build +757 -0
  12. data/ext/xkcp/lib/common/align.h +33 -0
  13. data/ext/xkcp/lib/common/brg_endian.h +143 -0
  14. data/ext/xkcp/lib/high/KangarooTwelve/KangarooTwelve.c +301 -0
  15. data/ext/xkcp/lib/high/KangarooTwelve/KangarooTwelve.h +97 -0
  16. data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.c +81 -0
  17. data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.h +125 -0
  18. data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.c +48 -0
  19. data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.h +79 -0
  20. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.c +81 -0
  21. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.h +73 -0
  22. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.inc +195 -0
  23. data/ext/xkcp/lib/high/Keccak/KeccakSponge.c +111 -0
  24. data/ext/xkcp/lib/high/Keccak/KeccakSponge.h +76 -0
  25. data/ext/xkcp/lib/high/Keccak/KeccakSponge.inc +314 -0
  26. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.c +61 -0
  27. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.h +67 -0
  28. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.inc +128 -0
  29. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.c +93 -0
  30. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.h +599 -0
  31. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.inc +573 -0
  32. data/ext/xkcp/lib/high/Ketje/Ketjev2.c +87 -0
  33. data/ext/xkcp/lib/high/Ketje/Ketjev2.h +88 -0
  34. data/ext/xkcp/lib/high/Ketje/Ketjev2.inc +274 -0
  35. data/ext/xkcp/lib/high/Keyak/Keyakv2.c +132 -0
  36. data/ext/xkcp/lib/high/Keyak/Keyakv2.h +217 -0
  37. data/ext/xkcp/lib/high/Keyak/Keyakv2.inc +81 -0
  38. data/ext/xkcp/lib/high/Keyak/Motorist.inc +953 -0
  39. data/ext/xkcp/lib/high/Kravatte/Kravatte.c +533 -0
  40. data/ext/xkcp/lib/high/Kravatte/Kravatte.h +115 -0
  41. data/ext/xkcp/lib/high/Kravatte/KravatteModes.c +557 -0
  42. data/ext/xkcp/lib/high/Kravatte/KravatteModes.h +247 -0
  43. data/ext/xkcp/lib/high/Xoodyak/Cyclist.h +66 -0
  44. data/ext/xkcp/lib/high/Xoodyak/Cyclist.inc +336 -0
  45. data/ext/xkcp/lib/high/Xoodyak/Xoodyak-parameters.h +26 -0
  46. data/ext/xkcp/lib/high/Xoodyak/Xoodyak.c +55 -0
  47. data/ext/xkcp/lib/high/Xoodyak/Xoodyak.h +35 -0
  48. data/ext/xkcp/lib/high/Xoofff/Xoofff.c +634 -0
  49. data/ext/xkcp/lib/high/Xoofff/Xoofff.h +147 -0
  50. data/ext/xkcp/lib/high/Xoofff/XoofffModes.c +483 -0
  51. data/ext/xkcp/lib/high/Xoofff/XoofffModes.h +241 -0
  52. data/ext/xkcp/lib/high/common/Phases.h +25 -0
  53. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-SnP.h +41 -0
  54. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-armcc.s +1666 -0
  55. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-gcc.s +1655 -0
  56. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-armcc.s +1268 -0
  57. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-gcc.s +1264 -0
  58. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-armcc.s +1178 -0
  59. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-gcc.s +1175 -0
  60. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-armcc.s +1338 -0
  61. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-gcc.s +1336 -0
  62. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-armcc.s +1343 -0
  63. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-gcc.s +1339 -0
  64. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-SnP.h +42 -0
  65. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-armcc.s +823 -0
  66. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-gcc.s +831 -0
  67. data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-SnP.h +31 -0
  68. data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-armv8a-neon.s +540 -0
  69. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-SnP.h +42 -0
  70. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-compact.s +733 -0
  71. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-fast.s +1121 -0
  72. data/ext/xkcp/lib/low/KeccakP-1600/AVX2/KeccakP-1600-AVX2.s +1100 -0
  73. data/ext/xkcp/lib/low/KeccakP-1600/AVX2/KeccakP-1600-SnP.h +52 -0
  74. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-AVX512.c +623 -0
  75. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-SnP.h +47 -0
  76. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u12/KeccakP-1600-AVX512-config.h +6 -0
  77. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u6/KeccakP-1600-AVX512-config.h +6 -0
  78. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/ua/KeccakP-1600-AVX512-config.h +6 -0
  79. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-AVX512.s +1031 -0
  80. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-SnP.h +53 -0
  81. data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-SnP.h +44 -0
  82. data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-XOP.c +476 -0
  83. data/ext/xkcp/lib/low/KeccakP-1600/XOP/u6/KeccakP-1600-XOP-config.h +6 -0
  84. data/ext/xkcp/lib/low/KeccakP-1600/XOP/ua/KeccakP-1600-XOP-config.h +6 -0
  85. data/ext/xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-64.macros +748 -0
  86. data/ext/xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-unrolling.macros +305 -0
  87. data/ext/xkcp/lib/low/KeccakP-1600/compact/KeccakP-1600-SnP.h +40 -0
  88. data/ext/xkcp/lib/low/KeccakP-1600/compact/KeccakP-1600-compact64.c +420 -0
  89. data/ext/xkcp/lib/low/KeccakP-1600/plain-32bits-inplace/KeccakP-1600-SnP.h +43 -0
  90. data/ext/xkcp/lib/low/KeccakP-1600/plain-32bits-inplace/KeccakP-1600-inplace32BI.c +1163 -0
  91. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/KeccakP-1600-SnP.h +54 -0
  92. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/KeccakP-1600-opt64.c +565 -0
  93. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcu6/KeccakP-1600-opt64-config.h +7 -0
  94. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua/KeccakP-1600-opt64-config.h +7 -0
  95. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua-shld/KeccakP-1600-opt64-config.h +8 -0
  96. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/u6/KeccakP-1600-opt64-config.h +6 -0
  97. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/ua/KeccakP-1600-opt64-config.h +6 -0
  98. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-SnP.h +44 -0
  99. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference.h +23 -0
  100. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference32BI.c +625 -0
  101. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-SnP.h +44 -0
  102. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.c +440 -0
  103. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.h +23 -0
  104. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-SnP.h +42 -0
  105. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas.s +1196 -0
  106. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas_Apple.s +1124 -0
  107. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-shld-gas.s +1196 -0
  108. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-armcc.s +1392 -0
  109. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-gcc.s +1394 -0
  110. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-times2-SnP.h +42 -0
  111. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u12/SIMD512-2-config.h +7 -0
  112. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u4/SIMD512-2-config.h +7 -0
  113. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512ufull/SIMD512-2-config.h +7 -0
  114. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SIMD512.c +850 -0
  115. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SnP.h +51 -0
  116. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SIMD128.c +957 -0
  117. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SnP.h +49 -0
  118. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-u2/SIMD128-config.h +8 -0
  119. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-ua/SIMD128-config.h +8 -0
  120. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-u2/SIMD128-config.h +9 -0
  121. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-ua/SIMD128-config.h +9 -0
  122. data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-SnP.h +45 -0
  123. data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-on1.c +37 -0
  124. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SIMD256.c +1321 -0
  125. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SnP.h +55 -0
  126. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u12/SIMD256-config.h +7 -0
  127. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u6/SIMD256-config.h +7 -0
  128. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/ua/SIMD256-config.h +7 -0
  129. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u12/SIMD512-4-config.h +7 -0
  130. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u4/SIMD512-4-config.h +7 -0
  131. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512ufull/SIMD512-4-config.h +7 -0
  132. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SIMD512.c +881 -0
  133. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SnP.h +51 -0
  134. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-SnP.h +45 -0
  135. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-on1.c +37 -0
  136. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-SnP.h +45 -0
  137. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-on2.c +38 -0
  138. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SIMD512.c +1615 -0
  139. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SnP.h +57 -0
  140. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u12/SIMD512-config.h +7 -0
  141. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u4/SIMD512-config.h +7 -0
  142. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/ua/SIMD512-config.h +7 -0
  143. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-SnP.h +45 -0
  144. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-on1.c +37 -0
  145. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-SnP.h +45 -0
  146. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-on2.c +38 -0
  147. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-SnP.h +45 -0
  148. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-on4.c +38 -0
  149. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-SnP.h +41 -0
  150. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-armcc.s +442 -0
  151. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-gcc.s +446 -0
  152. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-armcc.s +419 -0
  153. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-gcc.s +427 -0
  154. data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-SnP.h +41 -0
  155. data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-avr8-fast.s +647 -0
  156. data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-SnP.h +39 -0
  157. data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-compact.c +190 -0
  158. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-SnP.h +43 -0
  159. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.c +412 -0
  160. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.h +23 -0
  161. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-SnP.h +41 -0
  162. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-armcc.s +454 -0
  163. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-gcc.s +458 -0
  164. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-armcc.s +455 -0
  165. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-gcc.s +458 -0
  166. data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-SnP.h +41 -0
  167. data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-avr8-fast.s +728 -0
  168. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-SnP.h +43 -0
  169. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.c +414 -0
  170. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.h +23 -0
  171. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-SnP.h +42 -0
  172. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-armcc.s +527 -0
  173. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-gcc.s +533 -0
  174. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-armcc.s +528 -0
  175. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-gcc.s +534 -0
  176. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-armcc.s +521 -0
  177. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-gcc.s +527 -0
  178. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-armcc.s +517 -0
  179. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-gcc.s +523 -0
  180. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-armcc.s +550 -0
  181. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-gcc.s +556 -0
  182. data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-SnP.h +32 -0
  183. data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-armv8a-neon.s +432 -0
  184. data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-SnP.h +42 -0
  185. data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-avr8-fast.s +929 -0
  186. data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-SnP.h +40 -0
  187. data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-compact.c +244 -0
  188. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-SnP.h +46 -0
  189. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32-bis.macros +184 -0
  190. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.c +454 -0
  191. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.macros +459 -0
  192. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling-bis.macros +83 -0
  193. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling.macros +88 -0
  194. data/ext/xkcp/lib/low/KeccakP-800/plain/lcu2/KeccakP-800-opt32-config.h +7 -0
  195. data/ext/xkcp/lib/low/KeccakP-800/plain/lcua/KeccakP-800-opt32-config.h +7 -0
  196. data/ext/xkcp/lib/low/KeccakP-800/plain/u2/KeccakP-800-opt32-config.h +7 -0
  197. data/ext/xkcp/lib/low/KeccakP-800/plain/ua/KeccakP-800-opt32-config.h +7 -0
  198. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-SnP.h +44 -0
  199. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.c +437 -0
  200. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.h +23 -0
  201. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/Ket.h +57 -0
  202. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-armcc.s +475 -0
  203. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-gcc.s +480 -0
  204. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-armcc.s +590 -0
  205. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-gcc.s +590 -0
  206. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.c +126 -0
  207. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.h +68 -0
  208. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.inc +174 -0
  209. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.c +80 -0
  210. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.h +68 -0
  211. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.inc +142 -0
  212. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-SnP.h +55 -0
  213. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-armcc.s +1086 -0
  214. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-gcc.s +1092 -0
  215. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-armcc.s +721 -0
  216. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-gcc.s +726 -0
  217. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-armcc.s +723 -0
  218. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-gcc.s +729 -0
  219. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-armcc.s +1164 -0
  220. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-gcc.s +1165 -0
  221. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-armcc.s +562 -0
  222. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-gcc.s +563 -0
  223. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-armcc.s +563 -0
  224. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-gcc.s +565 -0
  225. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-SnP.h +55 -0
  226. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-armcc.s +476 -0
  227. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-gcc.s +485 -0
  228. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-armcc.s +362 -0
  229. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-gcc.s +367 -0
  230. data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-SnP.h +43 -0
  231. data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-avr8-u1.s +1341 -0
  232. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SIMD512.c +581 -0
  233. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SnP.h +58 -0
  234. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodyak-full-block-SIMD512.c +332 -0
  235. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SIMD128.c +329 -0
  236. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SnP.h +53 -0
  237. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodyak-full-block-SIMD128.c +355 -0
  238. data/ext/xkcp/lib/low/Xoodoo/Xoodoo.h +79 -0
  239. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-SnP.h +56 -0
  240. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-optimized.c +399 -0
  241. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodyak-full-blocks.c +127 -0
  242. data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-SnP.h +43 -0
  243. data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-reference.c +253 -0
  244. data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SIMD512.c +1044 -0
  245. data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SnP.h +49 -0
  246. data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-SnP.h +45 -0
  247. data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-on1.c +37 -0
  248. data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-ARMv7A.s +1587 -0
  249. data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-SnP.h +48 -0
  250. data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SIMD512.c +1202 -0
  251. data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SnP.h +48 -0
  252. data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SIMD128.c +484 -0
  253. data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SnP.h +44 -0
  254. data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-SnP.h +45 -0
  255. data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-on1.c +37 -0
  256. data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SIMD256.c +939 -0
  257. data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SnP.h +49 -0
  258. data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SIMD512.c +1216 -0
  259. data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SnP.h +48 -0
  260. data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-SnP.h +45 -0
  261. data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-on1.c +37 -0
  262. data/ext/xkcp/lib/low/common/PlSnP-Fallback.inc +290 -0
  263. data/ext/xkcp/lib/low/common/SnP-Relaned.h +141 -0
  264. data/ext/xkcp/support/Build/ExpandProducts.xsl +79 -0
  265. data/ext/xkcp/support/Build/ToGlobalMakefile.xsl +206 -0
  266. data/ext/xkcp/support/Build/ToOneTarget.xsl +89 -0
  267. data/ext/xkcp/support/Build/ToTargetConfigFile.xsl +37 -0
  268. data/ext/xkcp/support/Build/ToTargetMakefile.xsl +298 -0
  269. data/ext/xkcp/support/Build/ToVCXProj.xsl +198 -0
  270. data/ext/xkcp/support/Kernel-PMU/Kernel-pmu.md +133 -0
  271. data/ext/xkcp/support/Kernel-PMU/Makefile +8 -0
  272. data/ext/xkcp/support/Kernel-PMU/enable_arm_pmu.c +129 -0
  273. data/ext/xkcp/support/Kernel-PMU/load-module +1 -0
  274. data/ext/xkcp/util/KeccakSum/KeccakSum.c +394 -0
  275. data/ext/xkcp/util/KeccakSum/base64.c +86 -0
  276. data/ext/xkcp/util/KeccakSum/base64.h +12 -0
  277. data/lib/sleeping_kangaroo12/binding.rb +15 -0
  278. data/lib/sleeping_kangaroo12/build/loader.rb +40 -0
  279. data/lib/sleeping_kangaroo12/build/platform.rb +37 -0
  280. data/lib/sleeping_kangaroo12/build.rb +4 -0
  281. data/lib/sleeping_kangaroo12/digest.rb +103 -0
  282. data/lib/sleeping_kangaroo12/version.rb +5 -0
  283. data/lib/sleeping_kangaroo12.rb +7 -0
  284. metadata +372 -0
@@ -0,0 +1,1392 @@
1
+ ;
2
+ ; The Keccak-p permutations, designed by Guido Bertoni, Joan Daemen, Micha�l Peeters and Gilles Van Assche.
3
+ ;
4
+ ; Implementation by Ronny Van Keer, hereby denoted as "the implementer".
5
+ ;
6
+ ; For more information, feedback or questions, please refer to the Keccak Team website:
7
+ ; https://keccak.team/
8
+ ;
9
+ ; To the extent possible under law, the implementer has waived all copyright
10
+ ; and related or neighboring rights to the source code in this file.
11
+ ; http://creativecommons.org/publicdomain/zero/1.0/
12
+ ;
13
+ ; ---
14
+ ;
15
+ ; This file implements Keccak-p[1600]�2 in a PlSnP-compatible way.
16
+ ; Please refer to PlSnP-documentation.h for more details.
17
+ ;
18
+ ; This implementation comes with KeccakP-1600-times2-SnP.h in the same folder.
19
+ ; Please refer to LowLevel.build for the exact list of other files it must be combined with.
20
+ ;
21
+
22
+ ; WARNING: These functions work only on little endian CPU with ARMv7A + NEON architecture
23
+ ; WARNING: State must be 256 bit (32 bytes) aligned, best is 64-byte (cache alignment).
24
+
25
+ ; INFO: Tested on Cortex-A8 (BeagleBone Black), using gcc.
26
+ ; INFO: Parallel execution of Keccak-P permutation on 2 lane interleaved states.
27
+
28
+ ; INFO: KeccakP1600times2_PermuteAll_12rounds() execution time is 7690 cycles on a Cortex-A8 (BeagleBone Black)
29
+
30
+
31
+ PRESERVE8
32
+ AREA |.text|, CODE, READONLY
33
+
34
+ ;----------------------------------------------------------------------------
35
+
36
+ ; --- offsets in state
37
+ _ba equ 0*16
38
+ _be equ 1*16
39
+ _bi equ 2*16
40
+ _bo equ 3*16
41
+ _bu equ 4*16
42
+ _ga equ 5*16
43
+ _ge equ 6*16
44
+ _gi equ 7*16
45
+ _go equ 8*16
46
+ _gu equ 9*16
47
+ _ka equ 10*16
48
+ _ke equ 11*16
49
+ _ki equ 12*16
50
+ _ko equ 13*16
51
+ _ku equ 14*16
52
+ _ma equ 15*16
53
+ _me equ 16*16
54
+ _mi equ 17*16
55
+ _mo equ 18*16
56
+ _mu equ 19*16
57
+ _sa equ 20*16
58
+ _se equ 21*16
59
+ _si equ 22*16
60
+ _so equ 23*16
61
+ _su equ 24*16
62
+
63
+ ; --- macros for Single permutation
64
+
65
+ MACRO
66
+ KeccakS_ThetaRhoPiChiIota $argA1, $argA2, $argA3, $argA4, $argA5
67
+
68
+ ;Prepare Theta
69
+ ; Ca = Aba^Aga^Aka^Ama^Asa
70
+ ; Ce = Abe^Age^Ake^Ame^Ase
71
+ ; Ci = Abi^Agi^Aki^Ami^Asi
72
+ ; Co = Abo^Ago^Ako^Amo^Aso
73
+ ; Cu = Abu^Agu^Aku^Amu^Asu
74
+ ; De = Ca^ROL64(Ci, 1)
75
+ ; Di = Ce^ROL64(Co, 1)
76
+ ; Do = Ci^ROL64(Cu, 1)
77
+ ; Du = Co^ROL64(Ca, 1)
78
+ ; Da = Cu^ROL64(Ce, 1)
79
+ veor.64 q4, q6, q7
80
+ veor.64 q5, q9, q10
81
+ veor.64 d8, d8, d9
82
+ veor.64 d10, d10, d11
83
+ veor.64 d1, d8, d16
84
+ veor.64 d2, d10, d17
85
+
86
+ veor.64 q4, q11, q12
87
+ veor.64 q5, q14, q15
88
+ veor.64 d8, d8, d9
89
+ veor.64 d10, d10, d11
90
+ veor.64 d3, d8, d26
91
+
92
+ vadd.u64 q4, q1, q1
93
+ veor.64 d4, d10, d27
94
+ vmov.64 d0, d5
95
+ vsri.64 q4, q1, #63
96
+
97
+ vadd.u64 q5, q2, q2
98
+ veor.64 q4, q4, q0
99
+ vsri.64 q5, q2, #63
100
+ vadd.u64 d7, d1, d1
101
+ veor.64 $argA2, $argA2, d8
102
+ veor.64 q5, q5, q1
103
+
104
+ vsri.64 d7, d1, #63
105
+ vshl.u64 d1, $argA2, #44
106
+ veor.64 $argA3, $argA3, d9
107
+ veor.64 d7, d7, d4
108
+
109
+ ; Ba = argA1^Da
110
+ ; Be = ROL64((argA2^De), 44)
111
+ ; Bi = ROL64((argA3^Di), 43)
112
+ ; Bo = ROL64((argA4^Do), 21)
113
+ ; Bu = ROL64((argA5^Du), 14)
114
+ ; argA2 = Be ^((~Bi)& Bo )
115
+ ; argA3 = Bi ^((~Bo)& Bu )
116
+ ; argA4 = Bo ^((~Bu)& Ba )
117
+ ; argA5 = Bu ^((~Ba)& Be )
118
+ ; argA1 = Ba ^((~Be)& Bi )
119
+ ; argA1 ^= KeccakP1600RoundConstants[i+round]
120
+ vsri.64 d1, $argA2, #64-44
121
+ vshl.u64 d2, $argA3, #43
122
+ vldr.64 d0, [r0, #$argA1]
123
+ veor.64 $argA4, $argA4, d10
124
+ vsri.64 d2, $argA3, #64-43
125
+ vshl.u64 d3, $argA4, #21
126
+ veor.64 $argA5, $argA5, d11
127
+ veor.64 d0, d0, d7
128
+ vsri.64 d3, $argA4, #64-21
129
+ vbic.64 d5, d2, d1
130
+ vshl.u64 d4, $argA5, #14
131
+ vbic.64 $argA2, d3, d2
132
+ vld1.64 d6, [r1]!
133
+ veor.64 d5, d0
134
+ vsri.64 d4, $argA5, #64-14
135
+ veor.64 d5, d6
136
+ vbic.64 $argA5, d1, d0
137
+ vbic.64 $argA3, d4, d3
138
+ vbic.64 $argA4, d0, d4
139
+ veor.64 $argA2, d1
140
+ vstr.64 d5, [r0, #$argA1]
141
+ veor.64 $argA3, d2
142
+ veor.64 $argA4, d3
143
+ veor.64 $argA5, d4
144
+ MEND
145
+
146
+ MACRO
147
+ KeccakS_ThetaRhoPiChi1 $argA1, $argA2, $argA3, $argA4, $argA5
148
+
149
+ ; Bi = ROL64((argA1^Da), 3)
150
+ ; Bo = ROL64((argA2^De), 45)
151
+ ; Bu = ROL64((argA3^Di), 61)
152
+ ; Ba = ROL64((argA4^Do), 28)
153
+ ; Be = ROL64((argA5^Du), 20)
154
+ ; argA1 = Ba ^((~Be)& Bi )
155
+ ; Ca ^= argA1
156
+ ; argA2 = Be ^((~Bi)& Bo )
157
+ ; argA3 = Bi ^((~Bo)& Bu )
158
+ ; argA4 = Bo ^((~Bu)& Ba )
159
+ ; argA5 = Bu ^((~Ba)& Be )
160
+ veor.64 $argA2, $argA2, d8
161
+ veor.64 $argA3, $argA3, d9
162
+ vshl.u64 d3, $argA2, #45
163
+ vldr.64 d6, [r0, #$argA1]
164
+ vshl.u64 d4, $argA3, #61
165
+ veor.64 $argA4, $argA4, d10
166
+ vsri.64 d3, $argA2, #64-45
167
+ veor.64 $argA5, $argA5, d11
168
+ vsri.64 d4, $argA3, #64-61
169
+ vshl.u64 d0, $argA4, #28
170
+ veor.64 d6, d6, d7
171
+ vshl.u64 d1, $argA5, #20
172
+ vbic.64 $argA3, d4, d3
173
+ vsri.64 d0, $argA4, #64-28
174
+ vbic.64 $argA4, d0, d4
175
+ vshl.u64 d2, d6, #3
176
+ vsri.64 d1, $argA5, #64-20
177
+ veor.64 $argA4, d3
178
+ vsri.64 d2, d6, #64-3
179
+ vbic.64 $argA5, d1, d0
180
+ vbic.64 d6, d2, d1
181
+ vbic.64 $argA2, d3, d2
182
+ veor.64 d6, d0
183
+ veor.64 $argA2, d1
184
+ vstr.64 d6, [r0, #$argA1]
185
+ veor.64 $argA3, d2
186
+ veor.64 d5, d6
187
+ veor.64 $argA5, d4
188
+ MEND
189
+
190
+ MACRO
191
+ KeccakS_ThetaRhoPiChi2 $argA1, $argA2, $argA3, $argA4, $argA5
192
+
193
+ ; Bu = ROL64((argA1^Da), 18)
194
+ ; Ba = ROL64((argA2^De), 1)
195
+ ; Be = ROL64((argA3^Di), 6)
196
+ ; Bi = ROL64((argA4^Do), 25)
197
+ ; Bo = ROL64((argA5^Du), 8)
198
+ ; argA1 = Ba ^((~Be)& Bi )
199
+ ; Ca ^= argA1;
200
+ ; argA2 = Be ^((~Bi)& Bo )
201
+ ; argA3 = Bi ^((~Bo)& Bu )
202
+ ; argA4 = Bo ^((~Bu)& Ba )
203
+ ; argA5 = Bu ^((~Ba)& Be )
204
+ veor.64 $argA3, $argA3, d9
205
+ veor.64 $argA4, $argA4, d10
206
+ vshl.u64 d1, $argA3, #6
207
+ vldr.64 d6, [r0, #$argA1]
208
+ vshl.u64 d2, $argA4, #25
209
+ veor.64 $argA5, $argA5, d11
210
+ vsri.64 d1, $argA3, #64-6
211
+ veor.64 $argA2, $argA2, d8
212
+ vsri.64 d2, $argA4, #64-25
213
+ vext.8 d3, $argA5, $argA5, #7
214
+ veor.64 d6, d6, d7
215
+ vbic.64 $argA3, d2, d1
216
+ vadd.u64 d0, $argA2, $argA2
217
+ vbic.64 $argA4, d3, d2
218
+ vsri.64 d0, $argA2, #64-1
219
+ vshl.u64 d4, d6, #18
220
+ veor.64 $argA2, d1, $argA4
221
+ veor.64 $argA3, d0
222
+ vsri.64 d4, d6, #64-18
223
+ vstr.64 $argA3, [r0, #$argA1]
224
+ veor.64 d5, $argA3
225
+ vbic.64 $argA5, d1, d0
226
+ vbic.64 $argA3, d4, d3
227
+ vbic.64 $argA4, d0, d4
228
+ veor.64 $argA3, d2
229
+ veor.64 $argA4, d3
230
+ veor.64 $argA5, d4
231
+ MEND
232
+
233
+ MACRO
234
+ KeccakS_ThetaRhoPiChi3 $argA1, $argA2, $argA3, $argA4, $argA5
235
+
236
+ ; Be = ROL64((argA1^Da), 36)
237
+ ; Bi = ROL64((argA2^De), 10)
238
+ ; Bo = ROL64((argA3^Di), 15)
239
+ ; Bu = ROL64((argA4^Do), 56)
240
+ ; Ba = ROL64((argA5^Du), 27)
241
+ ; argA1 = Ba ^((~Be)& Bi )
242
+ ; Ca ^= argA1
243
+ ; argA2 = Be ^((~Bi)& Bo )
244
+ ; argA3 = Bi ^((~Bo)& Bu )
245
+ ; argA4 = Bo ^((~Bu)& Ba )
246
+ ; argA5 = Bu ^((~Ba)& Be )
247
+ veor.64 $argA2, $argA2, d8
248
+ veor.64 $argA3, $argA3, d9
249
+ vshl.u64 d2, $argA2, #10
250
+ vldr.64 d6, [r0, #$argA1]
251
+ vshl.u64 d3, $argA3, #15
252
+ veor.64 $argA4, $argA4, d10
253
+ vsri.64 d2, $argA2, #64-10
254
+ vsri.64 d3, $argA3, #64-15
255
+ veor.64 $argA5, $argA5, d11
256
+ vext.8 d4, $argA4, $argA4, #1
257
+ vbic.64 $argA2, d3, d2
258
+ vshl.u64 d0, $argA5, #27
259
+ veor.64 d6, d6, d7
260
+ vbic.64 $argA3, d4, d3
261
+ vsri.64 d0, $argA5, #64-27
262
+ vshl.u64 d1, d6, #36
263
+ veor.64 $argA3, d2
264
+ vbic.64 $argA4, d0, d4
265
+ vsri.64 d1, d6, #64-36
266
+ veor.64 $argA4, d3
267
+ vbic.64 d6, d2, d1
268
+ vbic.64 $argA5, d1, d0
269
+ veor.64 d6, d0
270
+ veor.64 $argA2, d1
271
+ vstr.64 d6, [r0, #$argA1]
272
+ veor.64 d5, d6
273
+ veor.64 $argA5, d4
274
+ MEND
275
+
276
+ MACRO
277
+ KeccakS_ThetaRhoPiChi4 $argA1, $argA2, $argA3, $argA4, $argA5
278
+
279
+ ; Bo = ROL64((argA1^Da), 41)
280
+ ; Bu = ROL64((argA2^De), 2)
281
+ ; Ba = ROL64((argA3^Di), 62)
282
+ ; Be = ROL64((argA4^Do), 55)
283
+ ; Bi = ROL64((argA5^Du), 39)
284
+ ; argA1 = Ba ^((~Be)& Bi )
285
+ ; Ca ^= argA1
286
+ ; argA2 = Be ^((~Bi)& Bo )
287
+ ; argA3 = Bi ^((~Bo)& Bu )
288
+ ; argA4 = Bo ^((~Bu)& Ba )
289
+ ; argA5 = Bu ^((~Ba)& Be )
290
+ veor.64 $argA2, $argA2, d8
291
+ veor.64 $argA3, $argA3, d9
292
+ vshl.u64 d4, $argA2, #2
293
+ veor.64 $argA5, $argA5, d11
294
+ vshl.u64 d0, $argA3, #62
295
+ vldr.64 d6, [r0, #$argA1]
296
+ vsri.64 d4, $argA2, #64-2
297
+ veor.64 $argA4, $argA4, d10
298
+ vsri.64 d0, $argA3, #64-62
299
+ vshl.u64 d1, $argA4, #55
300
+ veor.64 d6, d6, d7
301
+ vshl.u64 d2, $argA5, #39
302
+ vsri.64 d1, $argA4, #64-55
303
+ vbic.64 $argA4, d0, d4
304
+ vsri.64 d2, $argA5, #64-39
305
+ vbic.64 $argA2, d1, d0
306
+ vshl.u64 d3, d6, #41
307
+ veor.64 $argA5, d4, $argA2
308
+ vbic.64 $argA2, d2, d1
309
+ vsri.64 d3, d6, #64-41
310
+ veor.64 d6, d0, $argA2
311
+ vbic.64 $argA2, d3, d2
312
+ vbic.64 $argA3, d4, d3
313
+ veor.64 $argA2, d1
314
+ vstr.64 d6, [r0, #$argA1]
315
+ veor.64 d5, d6
316
+ veor.64 $argA3, d2
317
+ veor.64 $argA4, d3
318
+ MEND
319
+
320
+ ; --- macros for Parallel permutation
321
+
322
+ MACRO
323
+ m_pls $start
324
+ if $start != -1
325
+ add r3, r0, #$start
326
+ endif
327
+ MEND
328
+
329
+ MACRO
330
+ m_ld $qreg, $next
331
+ if $next == 16
332
+ vld1.64 { $qreg }, [r3:128]!
333
+ else
334
+ vld1.64 { $qreg }, [r3:128], r4
335
+ endif
336
+ MEND
337
+
338
+ MACRO
339
+ m_st $qreg, $next
340
+ if $next == 16
341
+ vst1.64 { $qreg }, [r3:128]!
342
+ else
343
+ vst1.64 { $qreg }, [r3:128], r4
344
+ endif
345
+ MEND
346
+
347
+ MACRO
348
+ KeccakP_ThetaRhoPiChiIota $ofs1, $ofs2, $ofs3, $ofs4, $ofs5, $next, $ofsn1
349
+
350
+ ; De = Ca ^ ROL64(Ci, 1)
351
+ ; Di = Ce ^ ROL64(Co, 1)
352
+ ; Do = Ci ^ ROL64(Cu, 1)
353
+ ; Du = Co ^ ROL64(Ca, 1)
354
+ ; Da = Cu ^ ROL64(Ce, 1)
355
+ vadd.u64 q6, q2, q2
356
+ vadd.u64 q7, q3, q3
357
+ vadd.u64 q8, q4, q4
358
+ vadd.u64 q9, q0, q0
359
+ vadd.u64 q5, q1, q1
360
+
361
+ vsri.64 q6, q2, #63
362
+ vsri.64 q7, q3, #63
363
+ vsri.64 q8, q4, #63
364
+ vsri.64 q9, q0, #63
365
+ vsri.64 q5, q1, #63
366
+
367
+ veor.64 q6, q6, q0
368
+ veor.64 q7, q7, q1
369
+ veor.64 q8, q8, q2
370
+ if $next != 16
371
+ mov r4, #$next
372
+ endif
373
+ veor.64 q9, q9, q3
374
+ veor.64 q5, q5, q4
375
+
376
+ ; Ba = argA1^Da
377
+ ; Be = ROL64(argA2^De, 44)
378
+ ; Bi = ROL64(argA3^Di, 43)
379
+ ; Bo = ROL64(argA4^Do, 21)
380
+ ; Bu = ROL64(argA5^Du, 14)
381
+ m_ld q10, $next
382
+ m_pls $ofs2
383
+ m_ld q1, $next
384
+ m_pls $ofs3
385
+ veor.64 q10, q10, q5
386
+ m_ld q2, $next
387
+ m_pls $ofs4
388
+ veor.64 q1, q1, q6
389
+ m_ld q3, $next
390
+ m_pls $ofs5
391
+ veor.64 q2, q2, q7
392
+ m_ld q4, $next
393
+ veor.64 q3, q3, q8
394
+ mov r6, r5
395
+ veor.64 q4, q4, q9
396
+
397
+ vst1.64 { q6 }, [r6:128]!
398
+ vshl.u64 q11, q1, #44
399
+ vshl.u64 q12, q2, #43
400
+ vst1.64 { q7 }, [r6:128]!
401
+ vshl.u64 q13, q3, #21
402
+ vshl.u64 q14, q4, #14
403
+ vst1.64 { q8 }, [r6:128]!
404
+ vsri.64 q11, q1, #64-44
405
+ vsri.64 q12, q2, #64-43
406
+ vst1.64 { q9 }, [r6:128]!
407
+ vsri.64 q13, q3, #64-21
408
+ vsri.64 q14, q4, #64-14
409
+
410
+ ; argA1 = Ba ^(~Be & Bi) ^ KeccakP1600RoundConstants[round]
411
+ ; argA2 = Be ^(~Bi & Bo)
412
+ ; argA3 = Bi ^(~Bo & Bu)
413
+ ; argA4 = Bo ^(~Bu & Ba)
414
+ ; argA5 = Bu ^(~Ba & Be)
415
+ vld1.64 { d30 }, [r1:64]
416
+ vbic.64 q0, q12, q11
417
+ vbic.64 q1, q13, q12
418
+ vld1.64 { d31 }, [r1:64]!
419
+ veor.64 q0, q10
420
+ vbic.64 q4, q11, q10
421
+ veor.64 q0, q15
422
+ vbic.64 q2, q14, q13
423
+ vbic.64 q3, q10, q14
424
+
425
+ m_pls $ofs1
426
+ veor.64 q1, q11
427
+ m_st q0, $next
428
+ m_pls $ofs2
429
+ veor.64 q2, q12
430
+ m_st q1, $next
431
+ m_pls $ofs3
432
+ veor.64 q3, q13
433
+ m_st q2, $next
434
+ m_pls $ofs4
435
+ veor.64 q4, q14
436
+ m_st q3, $next
437
+ m_pls $ofs5
438
+ m_st q4, $next
439
+ m_pls $ofsn1
440
+ MEND
441
+
442
+ MACRO
443
+ KeccakP_ThetaRhoPiChi $ofs1, $ofs2, $ofs3, $ofs4, $ofs5, $next, $ofsn1, $Bb1, $Bb2, $Bb3, $Bb4, $Bb5, $Rr1, $Rr2, $Rr3, $Rr4, $Rr5
444
+
445
+ ; Bb1 = ROL64((argA1^Da), Rr1)
446
+ ; Bb2 = ROL64((argA2^De), Rr2)
447
+ ; Bb3 = ROL64((argA3^Di), Rr3)
448
+ ; Bb4 = ROL64((argA4^Do), Rr4)
449
+ ; Bb5 = ROL64((argA5^Du), Rr5)
450
+
451
+ if $next != 16
452
+ mov r4, #$next
453
+ endif
454
+
455
+ m_ld $Bb1, $next
456
+ m_pls $ofs2
457
+ m_ld $Bb2, $next
458
+ m_pls $ofs3
459
+ veor.64 q15, q5, $Bb1
460
+ m_ld $Bb3, $next
461
+ m_pls $ofs4
462
+ veor.64 q6, q6, $Bb2
463
+ m_ld $Bb4, $next
464
+ m_pls $ofs5
465
+ veor.64 q7, q7, $Bb3
466
+ m_ld $Bb5, $next
467
+ veor.64 q8, q8, $Bb4
468
+ veor.64 q9, q9, $Bb5
469
+
470
+ vshl.u64 $Bb1, q15, #$Rr1
471
+ vshl.u64 $Bb2, q6, #$Rr2
472
+ vshl.u64 $Bb3, q7, #$Rr3
473
+ vshl.u64 $Bb4, q8, #$Rr4
474
+ vshl.u64 $Bb5, q9, #$Rr5
475
+
476
+ vsri.64 $Bb1, q15, #64-$Rr1
477
+ vsri.64 $Bb2, q6, #64-$Rr2
478
+ vsri.64 $Bb3, q7, #64-$Rr3
479
+ vsri.64 $Bb4, q8, #64-$Rr4
480
+ vsri.64 $Bb5, q9, #64-$Rr5
481
+
482
+ ; argA1 = Ba ^((~Be)& Bi ), Ca ^= argA1
483
+ ; argA2 = Be ^((~Bi)& Bo ), Ce ^= argA2
484
+ ; argA3 = Bi ^((~Bo)& Bu ), Ci ^= argA3
485
+ ; argA4 = Bo ^((~Bu)& Ba ), Co ^= argA4
486
+ ; argA5 = Bu ^((~Ba)& Be ), Cu ^= argA5
487
+ vbic.64 q15, q12, q11
488
+ mov r6, r5
489
+ vbic.64 q6, q13, q12
490
+ m_pls $ofs1
491
+ vbic.64 q7, q14, q13
492
+ vbic.64 q8, q10, q14
493
+ vbic.64 q9, q11, q10
494
+
495
+ veor.64 q15, q15, q10
496
+ veor.64 q6, q6, q11
497
+
498
+ m_st q15, $next
499
+ m_pls $ofs2
500
+ veor.64 q7, q7, q12
501
+
502
+ m_st q6, $next
503
+ m_pls $ofs3
504
+ veor.64 q1, q1, q6
505
+ vld1.64 { q6 }, [r6:128]!
506
+ veor.64 q8, q8, q13
507
+
508
+ m_st q7, $next
509
+ m_pls $ofs4
510
+ veor.64 q2, q2, q7
511
+ vld1.64 { q7 }, [r6:128]!
512
+ veor.64 q9, q9, q14
513
+
514
+ m_st q8, $next
515
+ m_pls $ofs5
516
+ veor.64 q3, q3, q8
517
+
518
+ m_st q9, $next
519
+
520
+ vld1.64 { q8 }, [r6:128]!
521
+ veor.64 q4, q4, q9
522
+ m_pls $ofsn1
523
+ vld1.64 { q9 }, [r6:128]!
524
+ veor.64 q0, q0, q15
525
+ MEND
526
+
527
+ MACRO
528
+ KeccakP_ThetaRhoPiChi1 $ofs1, $ofs2, $ofs3, $ofs4, $ofs5, $next, $ofsn1
529
+ KeccakP_ThetaRhoPiChi $ofs1, $ofs2, $ofs3, $ofs4, $ofs5, $next, $ofsn1, q12, q13, q14, q10, q11, 3, 45, 61, 28, 20
530
+ MEND
531
+
532
+ MACRO
533
+ KeccakP_ThetaRhoPiChi2 $ofs1, $ofs2, $ofs3, $ofs4, $ofs5, $next, $ofsn1
534
+ KeccakP_ThetaRhoPiChi $ofs1, $ofs2, $ofs3, $ofs4, $ofs5, $next, $ofsn1, q14, q10, q11, q12, q13, 18, 1, 6, 25, 8
535
+ MEND
536
+
537
+ MACRO
538
+ KeccakP_ThetaRhoPiChi3 $ofs1, $ofs2, $ofs3, $ofs4, $ofs5, $next, $ofsn1
539
+ KeccakP_ThetaRhoPiChi $ofs1, $ofs2, $ofs3, $ofs4, $ofs5, $next, $ofsn1, q11, q12, q13, q14, q10, 36, 10, 15, 56, 27
540
+ MEND
541
+
542
+ MACRO
543
+ KeccakP_ThetaRhoPiChi4 $ofs1, $ofs2, $ofs3, $ofs4, $ofs5, $next, $ofsn1
544
+
545
+ ; Bo = ROL64((argA1^Da), 41)
546
+ ; Bu = ROL64((argA2^De), 2)
547
+ ; Ba = ROL64((argA3^Di), 62)
548
+ ; Be = ROL64((argA4^Do), 55)
549
+ ; Bi = ROL64((argA5^Du), 39)
550
+ ; KeccakChi
551
+
552
+ if $next != 16
553
+ mov r4, #$next
554
+ endif
555
+
556
+ m_ld q13, $next
557
+ m_pls $ofs2
558
+ m_ld q14, $next
559
+ m_pls $ofs3
560
+ veor.64 q5, q5, q13
561
+ m_ld q10, $next
562
+ m_pls $ofs4
563
+ veor.64 q6, q6, q14
564
+ m_ld q11, $next
565
+ m_pls $ofs5
566
+ veor.64 q7, q7, q10
567
+ m_ld q12, $next
568
+ veor.64 q8, q8, q11
569
+ veor.64 q9, q9, q12
570
+
571
+ vshl.u64 q13, q5, #41
572
+ vshl.u64 q14, q6, #2
573
+ vshl.u64 q10, q7, #62
574
+ vshl.u64 q11, q8, #55
575
+ vshl.u64 q12, q9, #39
576
+
577
+ vsri.64 q13, q5, #64-41
578
+ vsri.64 q14, q6, #64-2
579
+ vsri.64 q11, q8, #64-55
580
+ vsri.64 q12, q9, #64-39
581
+ vsri.64 q10, q7, #64-62
582
+
583
+ vbic.64 q5, q12, q11
584
+ vbic.64 q6, q13, q12
585
+ vbic.64 q7, q14, q13
586
+ vbic.64 q8, q10, q14
587
+ vbic.64 q9, q11, q10
588
+ veor.64 q5, q5, q10
589
+ veor.64 q6, q6, q11
590
+ veor.64 q7, q7, q12
591
+ veor.64 q8, q8, q13
592
+ m_pls $ofs1
593
+ veor.64 q9, q9, q14
594
+ m_st q5, $next
595
+ m_pls $ofs2
596
+ veor.64 q0, q0, q5
597
+ m_st q6, $next
598
+ m_pls $ofs3
599
+ veor.64 q1, q1, q6
600
+ m_st q7, $next
601
+ m_pls $ofs4
602
+ veor.64 q2, q2, q7
603
+ m_st q8, $next
604
+ m_pls $ofs5
605
+ veor.64 q3, q3, q8
606
+ m_st q9, $next
607
+ m_pls $ofsn1
608
+ veor.64 q4, q4, q9
609
+ MEND
610
+
611
+ ;----------------------------------------------------------------------------
612
+ ;
613
+ ; void KeccakP1600times2_StaticInitialize( void )
614
+ ;
615
+ ALIGN
616
+ EXPORT KeccakP1600times2_StaticInitialize
617
+ KeccakP1600times2_StaticInitialize PROC
618
+ bx lr
619
+ ENDP
620
+
621
+ ;----------------------------------------------------------------------------
622
+ ;
623
+ ; void KeccakP1600times2_InitializeAll( void *states )
624
+ ;
625
+ ALIGN
626
+ EXPORT KeccakP1600times2_InitializeAll
627
+ KeccakP1600times2_InitializeAll PROC
628
+ vmov.i64 q0, #0
629
+ vmov.i64 q1, #0
630
+ vmov.i64 q2, #0
631
+ vmov.i64 q3, #0
632
+ vstm r0!, { d0 - d7 } ; 8 (clear 8 lanes at a time)
633
+ vstm r0!, { d0 - d7 } ; 16
634
+ vstm r0!, { d0 - d7 } ; 24
635
+ vstm r0!, { d0 - d7 } ; 32
636
+ vstm r0!, { d0 - d7 } ; 40
637
+ vstm r0!, { d0 - d7 } ; 48
638
+ vstm r0!, { d0 - d1} ; 50
639
+ bx lr
640
+ ENDP
641
+
642
+
643
+ ;----------------------------------------------------------------------------
644
+ ;
645
+ ; void KeccakP1600times2_AddByte( void *states, unsigned int instanceIndex, unsigned char byte, unsigned int offset )
646
+ ;
647
+ ALIGN
648
+ EXPORT KeccakP1600times2_AddByte
649
+ KeccakP1600times2_AddByte PROC
650
+ add r0, r0, r1, LSL #3 ; states += 8 * instanceIndex
651
+ lsr r1, r3, #3 ; states += (offset & ~7) * 2
652
+ add r0, r0, r1, LSL #4
653
+ and r3, r3, #7
654
+ add r0, r0, r3 ; states += offset & 7
655
+ ldrb r1, [r0]
656
+ eor r1, r1, r2
657
+ strb r1, [r0]
658
+ bx lr
659
+ ENDP
660
+
661
+ ;----------------------------------------------------------------------------
662
+ ;
663
+ ; void KeccakP1600times2_AddBytes( void *states, unsigned int instanceIndex, const unsigned char *data,
664
+ ; unsigned int offset, unsigned int length )
665
+ ;
666
+ ALIGN
667
+ EXPORT KeccakP1600times2_AddBytes
668
+ KeccakP1600times2_AddBytes PROC
669
+ add r0, r0, r1, LSL #3 ; states += 8 * instanceIndex
670
+ ldr r1, [sp, #0*4] ; r1 = length
671
+ cmp r1, #0
672
+ beq KeccakP1600times2_AddBytes_Exit
673
+ push { r4- r7 }
674
+ lsr r4, r3, #3 ; states += (offset & ~7) * 2
675
+ add r0, r0, r4, LSL #4
676
+ ands r3, r3, #7 ; if (offset & 7) != 0
677
+ beq KeccakP1600times2_AddBytes_CheckLanes
678
+ add r0, r0, r3 ; states += offset & 7
679
+ rsb r3, r3, #8 ; lenInLane = 8 - (offset & 7)
680
+ KeccakP1600times2_AddBytes_LoopBytesFirst
681
+ ldrb r4, [r0]
682
+ ldrb r5, [r2], #1
683
+ eor r4, r4, r5
684
+ subs r1, r1, #1
685
+ strb r4, [r0], #1
686
+ beq KeccakP1600times2_AddBytes_Done
687
+ subs r3, r3, #1
688
+ bne KeccakP1600times2_AddBytes_LoopBytesFirst
689
+ add r0, r0, #8 ; states += 8 (next lane of current state part)
690
+ KeccakP1600times2_AddBytes_CheckLanes
691
+ lsrs r3, r1, #3
692
+ beq KeccakP1600times2_AddBytes_CheckBytesLast
693
+ KeccakP1600times2_AddBytes_LoopLanes
694
+ ldr r4, [r0]
695
+ ldr r5, [r0, #4]
696
+ ldr r6, [r2], #4
697
+ ldr r7, [r2], #4
698
+ eor r4, r4, r6
699
+ eor r5, r5, r7
700
+ subs r3, r3, #1
701
+ str r4, [r0], #4
702
+ str r5, [r0], #12 ; states += 8 (next lane of current state part)
703
+ bne KeccakP1600times2_AddBytes_LoopLanes
704
+ KeccakP1600times2_AddBytes_CheckBytesLast
705
+ ands r1, r1, #7
706
+ beq KeccakP1600times2_AddBytes_Done
707
+ KeccakP1600times2_AddBytes_LoopBytesLast
708
+ ldrb r4, [r0]
709
+ ldrb r5, [r2], #1
710
+ eor r4, r4, r5
711
+ subs r1, r1, #1
712
+ strb r4, [r0], #1
713
+ bne KeccakP1600times2_AddBytes_LoopBytesLast
714
+ KeccakP1600times2_AddBytes_Done
715
+ pop { r4- r7 }
716
+ KeccakP1600times2_AddBytes_Exit
717
+ bx lr
718
+ ENDP
719
+
720
+ ;----------------------------------------------------------------------------
721
+ ;
722
+ ; void KeccakP1600times2_AddLanesAll( void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset )
723
+ ;
724
+ ALIGN
725
+ EXPORT KeccakP1600times2_AddLanesAll
726
+ KeccakP1600times2_AddLanesAll PROC
727
+ cmp r2, #0
728
+ beq KeccakP1600times2_AddLanesAll_Exit
729
+ add r3, r1, r3, LSL #3 ; r3: data + 8 * laneOffset
730
+ push {r4 - r7}
731
+ KeccakP1600times2_AddLanesAll_Loop
732
+ ldr r4, [r1], #4 ; index 0
733
+ ldr r5, [r1], #4
734
+ ldrd r6, r7, [r0]
735
+ eor r6, r6, r4
736
+ eor r7, r7, r5
737
+ strd r6, r7, [r0], #8
738
+ ldr r4, [r3], #4 ; index 1
739
+ ldr r5, [r3], #4
740
+ ldrd r6, r7, [r0]
741
+ eor r6, r6, r4
742
+ eor r7, r7, r5
743
+ strd r6, r7, [r0], #8
744
+ subs r2, r2, #1
745
+ bne KeccakP1600times2_AddLanesAll_Loop
746
+ pop {r4 - r7}
747
+ KeccakP1600times2_AddLanesAll_Exit
748
+ bx lr
749
+ ENDP
750
+
751
+ ;----------------------------------------------------------------------------
752
+ ;
753
+ ; void KeccakP1600times2_OverwriteBytes( void *states, unsigned int instanceIndex, const unsigned char *data,
754
+ ; unsigned int offset, unsigned int length )
755
+ ;
756
+ ALIGN
757
+ EXPORT KeccakP1600times2_OverwriteBytes
758
+ KeccakP1600times2_OverwriteBytes PROC
759
+ add r0, r0, r1, LSL #3 ; states += 8 * instanceIndex
760
+ ldr r1, [sp, #0*4] ; r1 = length
761
+ cmp r1, #0
762
+ beq KeccakP1600times2_OverwriteBytes_Exit
763
+ push { r4-r5 }
764
+ lsr r4, r3, #3 ; states += (offset & ~7) * 2
765
+ add r0, r0, r4, LSL #4
766
+ ands r3, r3, #7 ; if (offset & 7) != 0
767
+ beq KeccakP1600times2_OverwriteBytes_CheckLanes
768
+ add r0, r0, r3 ; states += offset & 7
769
+ rsb r3, r3, #8 ; lenInLane = 8 - (offset & 7)
770
+ KeccakP1600times2_OverwriteBytes_LoopBytesFirst
771
+ ldrb r4, [r2], #1
772
+ strb r4, [r0], #1
773
+ subs r1, r1, #1
774
+ beq KeccakP1600times2_OverwriteBytes_Done
775
+ subs r3, r3, #1
776
+ bne KeccakP1600times2_OverwriteBytes_LoopBytesFirst
777
+ add r0, r0, #8 ; states += 8 (next lane of current state part)
778
+ KeccakP1600times2_OverwriteBytes_CheckLanes
779
+ lsrs r3, r1, #3
780
+ beq KeccakP1600times2_OverwriteBytes_CheckBytesLast
781
+ KeccakP1600times2_OverwriteBytes_LoopLanes
782
+ ldr r4, [r2], #4
783
+ ldr r5, [r2], #4
784
+ str r4, [r0], #4
785
+ str r5, [r0], #12 ; states += 8 (next lane of current state part)
786
+ subs r3, r3, #1
787
+ bne KeccakP1600times2_OverwriteBytes_LoopLanes
788
+ KeccakP1600times2_OverwriteBytes_CheckBytesLast
789
+ ands r1, r1, #7
790
+ beq KeccakP1600times2_OverwriteBytes_Done
791
+ KeccakP1600times2_OverwriteBytes_LoopBytesLast
792
+ ldrb r4, [r2], #1
793
+ subs r1, r1, #1
794
+ strb r4, [r0], #1
795
+ bne KeccakP1600times2_OverwriteBytes_LoopBytesLast
796
+ KeccakP1600times2_OverwriteBytes_Done
797
+ pop { r4- r5 }
798
+ KeccakP1600times2_OverwriteBytes_Exit
799
+ bx lr
800
+ ENDP
801
+
802
+ ;----------------------------------------------------------------------------
803
+ ;
804
+ ; KeccakP1600times2_OverwriteLanesAll( void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset )
805
+ ;
806
+ ALIGN
807
+ EXPORT KeccakP1600times2_OverwriteLanesAll
808
+ KeccakP1600times2_OverwriteLanesAll PROC
809
+ cmp r2, #0
810
+ beq KeccakP1600times2_OverwriteLanesAll_Exit
811
+ lsls r12, r1, #32-3
812
+ bne KeccakP1600times2_OverwriteLanesAll_Unaligned
813
+ add r3, r1, r3, LSL #3 ; r3(pointer instance 1): data + 8 * laneOffset
814
+ lsrs r2, r2, #1
815
+ bcc KeccakP1600times2_OverwriteLanesAll_LoopAligned
816
+ vldm r1!, { d0 }
817
+ vldm r3!, { d1 }
818
+ vstm r0!, { d0 - d1 }
819
+ beq KeccakP1600times2_OverwriteLanesAll_Exit
820
+ KeccakP1600times2_OverwriteLanesAll_LoopAligned
821
+ vldm r1!, { d0 }
822
+ vldm r1!, { d2 }
823
+ vldm r3!, { d1 }
824
+ vldm r3!, { d3 }
825
+ subs r2, r2, #1
826
+ vstm r0!, { d0 - d3 }
827
+ bne KeccakP1600times2_OverwriteLanesAll_LoopAligned
828
+ bx lr
829
+ KeccakP1600times2_OverwriteLanesAll_Unaligned
830
+ add r3, r1, r3, LSL #3 ; r3(pointer instance 1): data + 8 * laneOffset
831
+ push { r4, r5 }
832
+ KeccakP1600times2_OverwriteLanesAll_LoopUnaligned
833
+ ldr r4, [r1], #4
834
+ ldr r5, [r1], #4
835
+ strd r4, r5, [r0], #8
836
+ ldr r4, [r3], #4
837
+ ldr r5, [r3], #4
838
+ subs r2, r2, #1
839
+ strd r4, r5, [r0], #8
840
+ bne KeccakP1600times2_OverwriteLanesAll_LoopUnaligned
841
+ pop { r4, r5 }
842
+ KeccakP1600times2_OverwriteLanesAll_Exit
843
+ bx lr
844
+ ENDP
845
+
846
+ ;----------------------------------------------------------------------------
847
+ ;
848
+ ; void KeccakP1600times2_OverwriteWithZeroes( void *states, unsigned int instanceIndex, unsigned int byteCount )
849
+ ;
850
+ ALIGN
851
+ EXPORT KeccakP1600times2_OverwriteWithZeroes
852
+ KeccakP1600times2_OverwriteWithZeroes PROC
853
+ add r0, r0, r1, LSL #3 ; states += 8 * instanceIndex
854
+ lsrs r1, r2, #3 ; r1: laneCount
855
+ beq KeccakP1600times2_OverwriteWithZeroes_Bytes
856
+ vmov.i64 d0, #0
857
+ KeccakP1600times2_OverwriteWithZeroes_LoopLanes
858
+ subs r1, r1, #1
859
+ vstm r0!, { d0 }
860
+ add r0, r0, #8
861
+ bne KeccakP1600times2_OverwriteWithZeroes_LoopLanes
862
+ KeccakP1600times2_OverwriteWithZeroes_Bytes
863
+ ands r2, r2, #7 ; r2: byteCount remaining
864
+ beq KeccakP1600times2_OverwriteWithZeroes_Exit
865
+ movs r3, #0
866
+ KeccakP1600times2_OverwriteWithZeroes_LoopBytes
867
+ subs r2, r2, #1
868
+ strb r3, [r0], #1
869
+ bne KeccakP1600times2_OverwriteWithZeroes_LoopBytes
870
+ KeccakP1600times2_OverwriteWithZeroes_Exit
871
+ bx lr
872
+ ENDP
873
+
874
+ ;----------------------------------------------------------------------------
875
+ ;
876
+ ; void KeccakP1600times2_ExtractBytes( void *states, unsigned int instanceIndex, const unsigned char *data,
877
+ ; unsigned int offset, unsigned int length )
878
+ ;
879
+ ALIGN
880
+ EXPORT KeccakP1600times2_ExtractBytes
881
+ KeccakP1600times2_ExtractBytes PROC
882
+ add r0, r0, r1, LSL #3 ; states += 8 * instanceIndex
883
+ ldr r1, [sp, #0*4] ; r1 = length
884
+ cmp r1, #0
885
+ beq KeccakP1600times2_ExtractBytes_Exit
886
+ push { r4-r5 }
887
+ lsr r4, r3, #3 ; states += (offset & ~7) * 2
888
+ add r0, r0, r4, LSL #4
889
+ ands r3, r3, #7 ; if (offset & 7) != 0
890
+ beq KeccakP1600times2_ExtractBytes_CheckLanes
891
+ add r0, r0, r3 ; states += offset & 7
892
+ rsb r3, r3, #8 ; lenInLane = 8 - (offset & 7)
893
+ KeccakP1600times2_ExtractBytes_LoopBytesFirst
894
+ ldrb r4, [r0], #1
895
+ strb r4, [r2], #1
896
+ subs r1, r1, #1
897
+ beq KeccakP1600times2_ExtractBytes_Done
898
+ subs r3, r3, #1
899
+ bne KeccakP1600times2_ExtractBytes_LoopBytesFirst
900
+ add r0, r0, #8 ; states += 8 (next lane of current state part)
901
+ KeccakP1600times2_ExtractBytes_CheckLanes
902
+ lsrs r3, r1, #3
903
+ beq KeccakP1600times2_ExtractBytes_CheckBytesLast
904
+ KeccakP1600times2_ExtractBytes_LoopLanes
905
+ ldr r4, [r0], #4
906
+ ldr r5, [r0], #12 ; states += 8 (next lane of current state part)
907
+ str r4, [r2], #4
908
+ str r5, [r2], #4
909
+ subs r3, r3, #1
910
+ bne KeccakP1600times2_ExtractBytes_LoopLanes
911
+ KeccakP1600times2_ExtractBytes_CheckBytesLast
912
+ ands r1, r1, #7
913
+ beq KeccakP1600times2_ExtractBytes_Done
914
+ KeccakP1600times2_ExtractBytes_LoopBytesLast
915
+ ldrb r4, [r0], #1
916
+ subs r1, r1, #1
917
+ strb r4, [r2], #1
918
+ bne KeccakP1600times2_ExtractBytes_LoopBytesLast
919
+ KeccakP1600times2_ExtractBytes_Done
920
+ pop { r4-r5 }
921
+ KeccakP1600times2_ExtractBytes_Exit
922
+ bx lr
923
+ ENDP
924
+
925
+ ;----------------------------------------------------------------------------
926
+ ;
927
+ ; void KeccakP1600times2_ExtractLanesAll( const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset )
928
+ ;
929
+ ALIGN
930
+ EXPORT KeccakP1600times2_ExtractLanesAll
931
+ KeccakP1600times2_ExtractLanesAll PROC
932
+ cmp r2, #0
933
+ beq KeccakP1600times2_ExtractLanesAll_Exit
934
+ lsls r12, r1, #32-3
935
+ bne KeccakP1600times2_ExtractLanesAll_Unaligned
936
+ add r3, r1, r3, LSL #3 ; r3(pointer instance 1): data + 8 * laneOffset
937
+ lsrs r2, r2, #1
938
+ bcc KeccakP1600times2_ExtractLanesAll_LoopAligned
939
+ vldm r0!, { d0 - d1 }
940
+ vstm r1!, { d0 }
941
+ vstm r3!, { d1 }
942
+ beq KeccakP1600times2_ExtractLanesAll_Exit
943
+ KeccakP1600times2_ExtractLanesAll_LoopAligned
944
+ vldm r0!, { d0 - d3 }
945
+ subs r2, r2, #1
946
+ vstm r1!, { d0 }
947
+ vstm r1!, { d2 }
948
+ vstm r3!, { d1 }
949
+ vstm r3!, { d3 }
950
+ bne KeccakP1600times2_ExtractLanesAll_LoopAligned
951
+ bx lr
952
+ KeccakP1600times2_ExtractLanesAll_Unaligned
953
+ add r3, r1, r3, LSL #3 ; r3(pointer instance 1): data + 8 * laneOffset
954
+ push { r4, r5 }
955
+ KeccakP1600times2_ExtractLanesAll_LoopUnaligned
956
+ ldrd r4, r5, [r0], #8
957
+ str r4, [r1], #4
958
+ str r5, [r1], #4
959
+ ldrd r4, r5, [r0], #8
960
+ subs r2, r2, #1
961
+ str r4, [r3], #4
962
+ str r5, [r3], #4
963
+ bne KeccakP1600times2_ExtractLanesAll_LoopUnaligned
964
+ pop { r4, r5 }
965
+ KeccakP1600times2_ExtractLanesAll_Exit
966
+ bx lr
967
+ ENDP
968
+
969
+ ;----------------------------------------------------------------------------
970
+ ;
971
+ ; void KeccakP1600times2_ExtractAndAddBytes( void *states, unsigned int instanceIndex,
972
+ ; const unsigned char *input, unsigned char *output,
973
+ ; unsigned int offset, unsigned int length )
974
+ ;
975
+ ALIGN
976
+ EXPORT KeccakP1600times2_ExtractAndAddBytes
977
+ KeccakP1600times2_ExtractAndAddBytes PROC
978
+ add r0, r0, r1, LSL #3 ; states += 8 * instanceIndex
979
+ ldr r1, [sp, #1*4] ; r1 = length
980
+ cmp r1, #0
981
+ beq KeccakP1600times2_ExtractAndAddBytes_Exit
982
+ push { r4 - r9 }
983
+ ldr r8, [sp, #6*4] ; r8 = offset
984
+ lsr r4, r8, #3 ; states += (offset & ~7) * 2
985
+ add r0, r0, r4, LSL #4
986
+ ands r8, r8, #7 ; if (offset & 7) != 0
987
+ beq KeccakP1600times2_ExtractAndAddBytes_CheckLanes
988
+ add r0, r0, r8 ; states += offset & 7
989
+ rsb r8, r8, #8 ; lenInLane = 8 - (offset & 7)
990
+ KeccakP1600times2_ExtractAndAddBytes_LoopBytesFirst
991
+ ldrb r4, [r0], #1
992
+ ldrb r5, [r2], #1
993
+ eor r4, r4, r5
994
+ strb r4, [r3], #1
995
+ subs r1, r1, #1
996
+ beq KeccakP1600times2_ExtractAndAddBytes_Done
997
+ subs r8, r8, #1
998
+ bne KeccakP1600times2_ExtractAndAddBytes_LoopBytesFirst
999
+ add r0, r0, #8 ; states += 8 (next lane of current state part)
1000
+ KeccakP1600times2_ExtractAndAddBytes_CheckLanes
1001
+ lsrs r8, r1, #3
1002
+ beq KeccakP1600times2_ExtractAndAddBytes_CheckBytesLast
1003
+ KeccakP1600times2_ExtractAndAddBytes_LoopLanes
1004
+ ldr r4, [r0], #4
1005
+ ldr r5, [r0], #12
1006
+ ldr r6, [r2], #4
1007
+ ldr r7, [r2], #4
1008
+ eor r4, r4, r6
1009
+ eor r5, r5, r7
1010
+ str r4, [r3], #4
1011
+ str r5, [r3], #4 ; states += 8 (next lane of current state part)
1012
+ subs r8, r8, #1
1013
+ bne KeccakP1600times2_ExtractAndAddBytes_LoopLanes
1014
+ KeccakP1600times2_ExtractAndAddBytes_CheckBytesLast
1015
+ ands r1, r1, #7
1016
+ beq KeccakP1600times2_ExtractAndAddBytes_Done
1017
+ KeccakP1600times2_ExtractAndAddBytes_LoopBytesLast
1018
+ ldrb r4, [r0], #1
1019
+ ldrb r5, [r2], #1
1020
+ eor r4, r4, r5
1021
+ strb r4, [r3], #1
1022
+ subs r1, r1, #1
1023
+ bne KeccakP1600times2_ExtractAndAddBytes_LoopBytesLast
1024
+ KeccakP1600times2_ExtractAndAddBytes_Done
1025
+ pop { r4 - r9 }
1026
+ KeccakP1600times2_ExtractAndAddBytes_Exit
1027
+ bx lr
1028
+ ENDP
1029
+
1030
+ ;----------------------------------------------------------------------------
1031
+ ;
1032
+ ; void KeccakP1600times2_ExtractAndAddLanesAll( const void *states,
1033
+ ; const unsigned char *input, unsigned char *output,
1034
+ ; unsigned int laneCount, unsigned int laneOffset )
1035
+ ;
1036
+ ALIGN
1037
+ EXPORT KeccakP1600times2_ExtractAndAddLanesAll
1038
+ KeccakP1600times2_ExtractAndAddLanesAll PROC
1039
+ cmp r3, #0
1040
+ beq KeccakP1600times2_ExtractAndAddLanesAll_Exit
1041
+ orr r12, r1, r2
1042
+ lsls r12, r12, #32-3 ; unaligned access if input or output unaligned
1043
+ bne KeccakP1600times2_ExtractAndAddLanesAll_Unaligned
1044
+ push {r4,r5}
1045
+ ldr r12, [sp, #2*4] ; r12 = laneOffset
1046
+ lsrs r3, r3, #1
1047
+ add r4, r1, r12, LSL #3 ; r4(input instance 1): input + 8 * laneOffset
1048
+ add r5, r2, r12, LSL #3 ; r5(output instance 1): output + 8 * laneOffset
1049
+ bcc KeccakP1600times2_ExtractAndAddLanesAll_LoopAligned
1050
+ vldm r0!, { d0 - d1 }
1051
+ vldm r1!, { d2 }
1052
+ vldm r4!, { d3 }
1053
+ veor q0, q0, q1
1054
+ vstm r2!, { d0 }
1055
+ vstm r5!, { d1 }
1056
+ beq KeccakP1600times2_ExtractAndAddLanesAll_AlignedDone
1057
+ KeccakP1600times2_ExtractAndAddLanesAll_LoopAligned
1058
+ vldm r0!, { d0 - d3 }
1059
+ vldm r1!, { d4 }
1060
+ vldm r1!, { d6 }
1061
+ vldm r4!, { d5 }
1062
+ vldm r4!, { d7 }
1063
+ subs r3, r3, #1
1064
+ veor q0, q0, q2
1065
+ veor q1, q1, q3
1066
+ vstm r2!, { d0 }
1067
+ vstm r2!, { d2 }
1068
+ vstm r5!, { d1 }
1069
+ vstm r5!, { d3 }
1070
+ bne KeccakP1600times2_ExtractAndAddLanesAll_LoopAligned
1071
+ KeccakP1600times2_ExtractAndAddLanesAll_AlignedDone
1072
+ pop {r4,r5}
1073
+ bx lr
1074
+ KeccakP1600times2_ExtractAndAddLanesAll_Unaligned
1075
+ push {r4-r9}
1076
+ ldr r12, [sp, #6*4] ; r12 = laneOffset
1077
+ add r4, r1, r12, LSL #3 ; r4(input instance 1): input + 8 * laneOffset
1078
+ add r5, r2, r12, LSL #3 ; r5(output instance 1): output + 8 * laneOffset
1079
+ KeccakP1600times2_ExtractAndAddLanesAll_LoopUnaligned
1080
+ ldrd r8, r9, [r0], #8
1081
+ ldr r6, [r1], #4
1082
+ ldr r7, [r1], #4
1083
+ eor r8, r8, r6
1084
+ eor r9, r9, r7
1085
+ str r8, [r2], #4
1086
+ str r9, [r2], #4
1087
+ ldrd r8, r9, [r0], #8
1088
+ ldr r6, [r4], #4
1089
+ ldr r7, [r4], #4
1090
+ eor r8, r8, r6
1091
+ eor r9, r9, r7
1092
+ str r8, [r5], #4
1093
+ subs r3, r3, #1
1094
+ str r9, [r5], #4
1095
+ bne KeccakP1600times2_ExtractAndAddLanesAll_LoopUnaligned
1096
+ pop { r4 - r9 }
1097
+ KeccakP1600times2_ExtractAndAddLanesAll_Exit
1098
+ bx lr
1099
+ ENDP
1100
+
1101
+ ;----------------------------------------------------------------------------
1102
+ ;
1103
+ ; void KeccakP1600times2_PermuteAll_6rounds( void *states )
1104
+ ;
1105
+ ALIGN
1106
+ EXPORT KeccakP1600times2_PermuteAll_6rounds
1107
+ KeccakP1600times2_PermuteAll_6rounds PROC
1108
+ adr r1, KeccakP1600times2_Permute_RoundConstants6
1109
+ movs r2, #6+2
1110
+ vpush {q4-q7}
1111
+ push {r4-r7}
1112
+ sub sp, #4*2*8+8 ;allocate 4 D double lanes (plus 8bytes to allow alignment on 16 bytes)
1113
+ add r5, sp, #8
1114
+
1115
+ ; ba
1116
+ ; be = me, me = be
1117
+ ; bi = gi, gi = bi
1118
+ ; bo = so, so = bo
1119
+ ; bu = ku, ku = bu
1120
+
1121
+ ; ga = sa, sa = ga
1122
+ ; ge = ke, ke = ge
1123
+ ; go = mo, mo = go
1124
+ ; gu
1125
+
1126
+ ; ka = ma, ma = ka
1127
+ ; ki = si, si = ki
1128
+ ; ko
1129
+
1130
+ ; mu = su, su = mu
1131
+ ; mi
1132
+ ; se
1133
+
1134
+ ;PrepareTheta
1135
+ ; Ca = ba ^ ga ^ ka ^ ma ^ sa
1136
+ ; Ce = be ^ ge ^ ke ^ me ^ se
1137
+ ; Ci = bi ^ gi ^ ki ^ mi ^ si
1138
+ ; Co = bo ^ go ^ ko ^ mo ^ so
1139
+ ; Cu = bu ^ gu ^ ku ^ mu ^ su
1140
+ vldm r0, { q0 - q4 } ; ba be bi bo bu
1141
+ bic r5, #15
1142
+ add r3, r0, #_me
1143
+ vldm r3, { q6 } ; me
1144
+ vstm r3, { q1 }
1145
+ veor.64 q1, q1, q6
1146
+ add r4, r0, #_be
1147
+ vstm r4!, { q6 } ; be
1148
+
1149
+ add r3, r0, #_ga
1150
+ vldm r3, { q10 - q14 } ; ga ge gi go gu
1151
+ add r3, r0, #_gi
1152
+ vstm r3, { q2 }
1153
+ veor.64 q2, q2, q12
1154
+ vstm r4!, { q12 } ; bi
1155
+
1156
+ add r3, r0, #_so
1157
+ vldm r3, { q8 } ; so
1158
+ vstm r3, { q3 }
1159
+ veor.64 q3, q3, q8
1160
+ vstm r4!, { q8 } ; bo
1161
+
1162
+ add r3, r0, #_ku
1163
+ vldm r3, { q9 } ; ku
1164
+ vstm r3, { q4 }
1165
+ veor.64 q4, q4, q9
1166
+ vstm r4!, { q9 } ; bu
1167
+
1168
+ add r3, r0, #_sa
1169
+ vldm r3, { q5 } ; sa
1170
+ vstm r3, { q10 }
1171
+ add r4, r0, #_ga
1172
+ veor.64 q0, q0, q5
1173
+ veor.64 q0, q0, q10
1174
+ vstm r4!, { q5 } ; ga
1175
+
1176
+ add r3, r0, #_ke
1177
+ vldm r3, { q6 } ; ke
1178
+ vstm r3, { q11 }
1179
+ veor.64 q1, q1, q6
1180
+ veor.64 q1, q1, q11
1181
+ vstm r4!, { q6 } ; ge
1182
+
1183
+ add r3, r0, #_mo
1184
+ vldm r3, { q8 } ; mo
1185
+ vstm r3, { q13 }
1186
+ add r4, r0, #_go
1187
+ veor.64 q3, q3, q8
1188
+ veor.64 q3, q3, q13
1189
+ vstm r4!, { q8 } ; go
1190
+ veor.64 q4, q4, q14 ; gu
1191
+
1192
+ add r4, r0, #_ka ; ka
1193
+ vldm r4, { q10 }
1194
+ add r3, r0, #_ma
1195
+ vldm r3, { q5 } ; ma
1196
+ vstm r3, { q10 }
1197
+ veor.64 q0, q0, q5
1198
+ veor.64 q0, q0, q10
1199
+ vstm r4!, { q5 } ; ka
1200
+
1201
+ add r4, r0, #_ki ; ki ko
1202
+ vldm r4, { q12, q13 }
1203
+ add r3, r0, #_si
1204
+ vldm r3, { q7 } ; si
1205
+ vstm r3, { q12 }
1206
+ veor.64 q2, q2, q7
1207
+ veor.64 q2, q2, q12
1208
+ vstm r4, { q7 } ; ki
1209
+ veor.64 q3, q3, q13 ; ko
1210
+
1211
+ add r4, r0, #_mu ; mu
1212
+ vldm r4, { q14 }
1213
+ add r3, r0, #_su
1214
+ vldm r3, { q9 } ; su
1215
+ vstm r3, { q14 }
1216
+ veor.64 q4, q4, q9
1217
+ veor.64 q4, q4, q14
1218
+ vstm r4, { q9 } ; mu
1219
+
1220
+ add r4, r0, #_mi ; mi
1221
+ vldm r4, { q12 }
1222
+ veor.64 q2, q2, q12
1223
+ add r3, r0, #_se ; se
1224
+ vldm r3, { q6 }
1225
+ veor.64 q1, q1, q6
1226
+
1227
+ mov r3, r0
1228
+ b KeccakP1600times2_PermuteAll_Round2
1229
+ ENDP
1230
+
1231
+ ALIGN
1232
+ KeccakP1600times2_Permute_RoundConstants24
1233
+ dcq 0x0000000000000001
1234
+ dcq 0x0000000000008082
1235
+ dcq 0x800000000000808a
1236
+ dcq 0x8000000080008000
1237
+ dcq 0x000000000000808b
1238
+ dcq 0x0000000080000001
1239
+ dcq 0x8000000080008081
1240
+ dcq 0x8000000000008009
1241
+ dcq 0x000000000000008a
1242
+ dcq 0x0000000000000088
1243
+ dcq 0x0000000080008009
1244
+ dcq 0x000000008000000a
1245
+ KeccakP1600times2_Permute_RoundConstants12
1246
+ dcq 0x000000008000808b
1247
+ dcq 0x800000000000008b
1248
+ dcq 0x8000000000008089
1249
+ dcq 0x8000000000008003
1250
+ dcq 0x8000000000008002
1251
+ dcq 0x8000000000000080
1252
+ KeccakP1600times2_Permute_RoundConstants6
1253
+ dcq 0x000000000000800a
1254
+ dcq 0x800000008000000a
1255
+ KeccakP1600times2_Permute_RoundConstants4
1256
+ dcq 0x8000000080008081
1257
+ dcq 0x8000000000008080
1258
+ dcq 0x0000000080000001
1259
+ dcq 0x8000000080008008
1260
+
1261
+ ;----------------------------------------------------------------------------
1262
+ ;
1263
+ ; void KeccakP1600times2_PermuteAll_24rounds( void *states )
1264
+ ;
1265
+ ALIGN
1266
+ EXPORT KeccakP1600times2_PermuteAll_24rounds
1267
+ KeccakP1600times2_PermuteAll_24rounds PROC
1268
+ adr r1, KeccakP1600times2_Permute_RoundConstants24
1269
+ movs r2, #24
1270
+ b KeccakP1600times2_PermuteAll
1271
+ ENDP
1272
+
1273
+ ;----------------------------------------------------------------------------
1274
+ ;
1275
+ ; void KeccakP1600times2_PermuteAll_12rounds( void *states )
1276
+ ;
1277
+ ALIGN
1278
+ EXPORT KeccakP1600times2_PermuteAll_12rounds
1279
+ KeccakP1600times2_PermuteAll_12rounds PROC
1280
+ adr r1, KeccakP1600times2_Permute_RoundConstants12
1281
+ movs r2, #12
1282
+ b KeccakP1600times2_PermuteAll
1283
+ ENDP
1284
+
1285
+ ;----------------------------------------------------------------------------
1286
+ ;
1287
+ ; void KeccakP1600times2_PermuteAll_4rounds( void *states )
1288
+ ;
1289
+ ALIGN
1290
+ EXPORT KeccakP1600times2_PermuteAll_4rounds
1291
+ KeccakP1600times2_PermuteAll_4rounds PROC
1292
+ adr r1, KeccakP1600times2_Permute_RoundConstants4
1293
+ movs r2, #4
1294
+ b KeccakP1600times2_PermuteAll
1295
+ ENDP
1296
+
1297
+ ;----------------------------------------------------------------------------
1298
+ ;
1299
+ ; void KeccakP1600times2_PermuteAll( void *states, void *rc, unsigned int nr )
1300
+ ;
1301
+ ALIGN
1302
+ KeccakP1600times2_PermuteAll PROC
1303
+ vpush {q4-q7}
1304
+ push {r4-r7}
1305
+ sub sp, #4*2*8+8 ;allocate 4 D double lanes (plus 8bytes to allow alignment on 16 bytes)
1306
+ mov r3, r0
1307
+ add r5, sp, #8
1308
+
1309
+ ;PrepareTheta
1310
+ ; Ca = ba ^ ga ^ ka ^ ma ^ sa
1311
+ ; Ce = be ^ ge ^ ke ^ me ^ se
1312
+ ; Ci = bi ^ gi ^ ki ^ mi ^ si
1313
+ ; Co = bo ^ go ^ ko ^ mo ^ so
1314
+ ; Cu = bu ^ gu ^ ku ^ mu ^ su
1315
+ vld1.64 { d0, d1, d2, d3 }, [r3:256]! ; _ba _be
1316
+ bic r5, #15
1317
+ vld1.64 { d4, d5, d6, d7 }, [r3:256]! ; _bi _bo
1318
+ vld1.64 { d8, d9, d10, d11 }, [r3:256]! ; _bu _ga
1319
+ vld1.64 { d12, d13 }, [r3:128]! ; _ge
1320
+ veor.64 q0, q0, q5
1321
+ vld1.64 { d14, d15 }, [r3:128]! ; _gi
1322
+ veor.64 q1, q1, q6
1323
+ vld1.64 { d16, d17 }, [r3:128]! ; _go
1324
+ veor.64 q2, q2, q7
1325
+ vld1.64 { d18, d19 }, [r3:128]! ; _gu
1326
+ veor.64 q3, q3, q8
1327
+ vld1.64 { d10, d11 }, [r3:128]! ; _ka
1328
+ veor.64 q4, q4, q9
1329
+ vld1.64 { d12, d13 }, [r3:128]! ; _ke
1330
+ veor.64 q0, q0, q5
1331
+ vld1.64 { d14, d15 }, [r3:128]! ; _ki
1332
+ veor.64 q1, q1, q6
1333
+ vld1.64 { d16, d17 }, [r3:128]! ; _ko
1334
+ veor.64 q2, q2, q7
1335
+ vld1.64 { d18, d19 }, [r3:128]! ; _ku
1336
+ veor.64 q3, q3, q8
1337
+ vld1.64 { d10, d11 }, [r3:128]! ; _ma
1338
+ veor.64 q4, q4, q9
1339
+ vld1.64 { d12, d13 }, [r3:128]! ; _me
1340
+ veor.64 q0, q0, q5
1341
+ vld1.64 { d14, d15 }, [r3:128]! ; _mi
1342
+ veor.64 q1, q1, q6
1343
+ vld1.64 { d16, d17 }, [r3:128]! ; _mo
1344
+ veor.64 q2, q2, q7
1345
+ vld1.64 { d18, d19 }, [r3:128]! ; _mu
1346
+ veor.64 q3, q3, q8
1347
+ vld1.64 { d10, d11 }, [r3:128]! ; _sa
1348
+ veor.64 q4, q4, q9
1349
+ vld1.64 { d12, d13 }, [r3:128]! ; _se
1350
+ veor.64 q0, q0, q5
1351
+ vld1.64 { d14, d15 }, [r3:128]! ; _si
1352
+ veor.64 q1, q1, q6
1353
+ vld1.64 { d16, d17 }, [r3:128]! ; _so
1354
+ veor.64 q2, q2, q7
1355
+ vld1.64 { d18, d19 }, [r3:128]! ; _su
1356
+ mov r3, r0
1357
+ veor.64 q3, q3, q8
1358
+ veor.64 q4, q4, q9
1359
+
1360
+ KeccakP1600times2_PermuteAll_RoundLoop
1361
+ KeccakP_ThetaRhoPiChiIota _ba, -1, -1, -1, -1, _ge-_ba, _ka ; _ba, _ge, _ki, _mo, _su
1362
+ KeccakP_ThetaRhoPiChi1 _ka, -1, -1, _bo, -1, _me-_ka, _sa ; _ka, _me, _si, _bo, _gu
1363
+ KeccakP_ThetaRhoPiChi2 _sa, _be, -1, -1, -1, _gi-_be, _ga ; _sa, _be, _gi, _ko, _mu
1364
+ KeccakP_ThetaRhoPiChi3 _ga, -1, -1, -1, _bu, _ke-_ga, _ma ; _ga, _ke, _mi, _so, _bu
1365
+ KeccakP_ThetaRhoPiChi4 _ma, -1, _bi, -1, -1, _se-_ma, _ba ; _ma, _se, _bi, _go, _ku
1366
+
1367
+ KeccakP_ThetaRhoPiChiIota _ba, -1, _gi, -1, _ku, _me-_ba, _sa ; _ba, _me, _gi, _so, _ku
1368
+ KeccakP_ThetaRhoPiChi1 _sa, _ke, _bi, -1, _gu, _mo-_bi, _ma ; _sa, _ke, _bi, _mo, _gu
1369
+ KeccakP_ThetaRhoPiChi2 _ma, _ge, -1, _ko, _bu, _si-_ge, _ka ; _ma, _ge, _si, _ko, _bu
1370
+ KeccakP_ThetaRhoPiChi3 _ka, _be, -1, _go, -1, _mi-_be, _ga ; _ka, _be, _mi, _go, _su
1371
+ KeccakP_ThetaRhoPiChi4 _ga, -1, _ki, _bo, -1, _se-_ga, _ba ; _ga, _se, _ki, _bo, _mu
1372
+ KeccakP1600times2_PermuteAll_Round2
1373
+ KeccakP_ThetaRhoPiChiIota _ba, -1, -1, _go, -1, _ke-_ba, _ma ; _ba, _ke, _si, _go, _mu
1374
+ KeccakP_ThetaRhoPiChi1 _ma, _be, -1, -1, _gu, _ki-_be, _ga ; _ma, _be, _ki, _so, _gu
1375
+ KeccakP_ThetaRhoPiChi2 _ga, -1, _bi, -1, -1, _me-_ga, _sa ; _ga, _me, _bi, _ko, _su
1376
+ KeccakP_ThetaRhoPiChi3 _sa, _ge, -1, _bo, -1, _mi-_ge, _ka ; _sa, _ge, _mi, _bo, _ku
1377
+ KeccakP_ThetaRhoPiChi4 _ka, -1, _gi, -1, _bu, _se-_ka, _ba ; _ka, _se, _gi, _mo, _bu
1378
+
1379
+ KeccakP_ThetaRhoPiChiIota _ba, -1, -1, -1, -1, _be-_ba, _ga ; _ba, _be, _bi, _bo, _bu
1380
+ KeccakP_ThetaRhoPiChi1 _ga, -1, -1, -1, -1, _ge-_ga, _ka ; _ga, _ge, _gi, _go, _gu
1381
+ KeccakP_ThetaRhoPiChi2 _ka, -1, -1, -1, -1, _ke-_ka, _ma ; _ka, _ke, _ki, _ko, _ku
1382
+ KeccakP_ThetaRhoPiChi3 _ma, -1, -1, -1, -1, _me-_ma, _sa ; _ma, _me, _mi, _mo, _mu
1383
+ subs r2, #4
1384
+ KeccakP_ThetaRhoPiChi4 _sa, -1, -1, -1, -1, _se-_sa, _ba ; _sa, _se, _si, _so, _su
1385
+ bne KeccakP1600times2_PermuteAll_RoundLoop
1386
+ add sp, #4*2*8+8 ; free 4.5 D lanes
1387
+ pop {r4-r7}
1388
+ vpop {q4-q7}
1389
+ bx lr
1390
+ ENDP
1391
+
1392
+ END