sleeping_kangaroo12 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (284) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +127 -0
  3. data/ext/Rakefile +73 -0
  4. data/ext/binding/sleeping_kangaroo12.c +39 -0
  5. data/ext/config/xkcp.build +17 -0
  6. data/ext/xkcp/LICENSE +1 -0
  7. data/ext/xkcp/Makefile +15 -0
  8. data/ext/xkcp/Makefile.build +200 -0
  9. data/ext/xkcp/README.markdown +296 -0
  10. data/ext/xkcp/lib/HighLevel.build +143 -0
  11. data/ext/xkcp/lib/LowLevel.build +757 -0
  12. data/ext/xkcp/lib/common/align.h +33 -0
  13. data/ext/xkcp/lib/common/brg_endian.h +143 -0
  14. data/ext/xkcp/lib/high/KangarooTwelve/KangarooTwelve.c +301 -0
  15. data/ext/xkcp/lib/high/KangarooTwelve/KangarooTwelve.h +97 -0
  16. data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.c +81 -0
  17. data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.h +125 -0
  18. data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.c +48 -0
  19. data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.h +79 -0
  20. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.c +81 -0
  21. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.h +73 -0
  22. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.inc +195 -0
  23. data/ext/xkcp/lib/high/Keccak/KeccakSponge.c +111 -0
  24. data/ext/xkcp/lib/high/Keccak/KeccakSponge.h +76 -0
  25. data/ext/xkcp/lib/high/Keccak/KeccakSponge.inc +314 -0
  26. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.c +61 -0
  27. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.h +67 -0
  28. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.inc +128 -0
  29. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.c +93 -0
  30. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.h +599 -0
  31. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.inc +573 -0
  32. data/ext/xkcp/lib/high/Ketje/Ketjev2.c +87 -0
  33. data/ext/xkcp/lib/high/Ketje/Ketjev2.h +88 -0
  34. data/ext/xkcp/lib/high/Ketje/Ketjev2.inc +274 -0
  35. data/ext/xkcp/lib/high/Keyak/Keyakv2.c +132 -0
  36. data/ext/xkcp/lib/high/Keyak/Keyakv2.h +217 -0
  37. data/ext/xkcp/lib/high/Keyak/Keyakv2.inc +81 -0
  38. data/ext/xkcp/lib/high/Keyak/Motorist.inc +953 -0
  39. data/ext/xkcp/lib/high/Kravatte/Kravatte.c +533 -0
  40. data/ext/xkcp/lib/high/Kravatte/Kravatte.h +115 -0
  41. data/ext/xkcp/lib/high/Kravatte/KravatteModes.c +557 -0
  42. data/ext/xkcp/lib/high/Kravatte/KravatteModes.h +247 -0
  43. data/ext/xkcp/lib/high/Xoodyak/Cyclist.h +66 -0
  44. data/ext/xkcp/lib/high/Xoodyak/Cyclist.inc +336 -0
  45. data/ext/xkcp/lib/high/Xoodyak/Xoodyak-parameters.h +26 -0
  46. data/ext/xkcp/lib/high/Xoodyak/Xoodyak.c +55 -0
  47. data/ext/xkcp/lib/high/Xoodyak/Xoodyak.h +35 -0
  48. data/ext/xkcp/lib/high/Xoofff/Xoofff.c +634 -0
  49. data/ext/xkcp/lib/high/Xoofff/Xoofff.h +147 -0
  50. data/ext/xkcp/lib/high/Xoofff/XoofffModes.c +483 -0
  51. data/ext/xkcp/lib/high/Xoofff/XoofffModes.h +241 -0
  52. data/ext/xkcp/lib/high/common/Phases.h +25 -0
  53. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-SnP.h +41 -0
  54. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-armcc.s +1666 -0
  55. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-gcc.s +1655 -0
  56. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-armcc.s +1268 -0
  57. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-gcc.s +1264 -0
  58. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-armcc.s +1178 -0
  59. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-gcc.s +1175 -0
  60. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-armcc.s +1338 -0
  61. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-gcc.s +1336 -0
  62. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-armcc.s +1343 -0
  63. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-gcc.s +1339 -0
  64. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-SnP.h +42 -0
  65. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-armcc.s +823 -0
  66. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-gcc.s +831 -0
  67. data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-SnP.h +31 -0
  68. data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-armv8a-neon.s +540 -0
  69. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-SnP.h +42 -0
  70. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-compact.s +733 -0
  71. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-fast.s +1121 -0
  72. data/ext/xkcp/lib/low/KeccakP-1600/AVX2/KeccakP-1600-AVX2.s +1100 -0
  73. data/ext/xkcp/lib/low/KeccakP-1600/AVX2/KeccakP-1600-SnP.h +52 -0
  74. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-AVX512.c +623 -0
  75. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-SnP.h +47 -0
  76. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u12/KeccakP-1600-AVX512-config.h +6 -0
  77. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u6/KeccakP-1600-AVX512-config.h +6 -0
  78. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/ua/KeccakP-1600-AVX512-config.h +6 -0
  79. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-AVX512.s +1031 -0
  80. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-SnP.h +53 -0
  81. data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-SnP.h +44 -0
  82. data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-XOP.c +476 -0
  83. data/ext/xkcp/lib/low/KeccakP-1600/XOP/u6/KeccakP-1600-XOP-config.h +6 -0
  84. data/ext/xkcp/lib/low/KeccakP-1600/XOP/ua/KeccakP-1600-XOP-config.h +6 -0
  85. data/ext/xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-64.macros +748 -0
  86. data/ext/xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-unrolling.macros +305 -0
  87. data/ext/xkcp/lib/low/KeccakP-1600/compact/KeccakP-1600-SnP.h +40 -0
  88. data/ext/xkcp/lib/low/KeccakP-1600/compact/KeccakP-1600-compact64.c +420 -0
  89. data/ext/xkcp/lib/low/KeccakP-1600/plain-32bits-inplace/KeccakP-1600-SnP.h +43 -0
  90. data/ext/xkcp/lib/low/KeccakP-1600/plain-32bits-inplace/KeccakP-1600-inplace32BI.c +1163 -0
  91. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/KeccakP-1600-SnP.h +54 -0
  92. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/KeccakP-1600-opt64.c +565 -0
  93. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcu6/KeccakP-1600-opt64-config.h +7 -0
  94. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua/KeccakP-1600-opt64-config.h +7 -0
  95. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua-shld/KeccakP-1600-opt64-config.h +8 -0
  96. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/u6/KeccakP-1600-opt64-config.h +6 -0
  97. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/ua/KeccakP-1600-opt64-config.h +6 -0
  98. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-SnP.h +44 -0
  99. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference.h +23 -0
  100. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference32BI.c +625 -0
  101. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-SnP.h +44 -0
  102. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.c +440 -0
  103. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.h +23 -0
  104. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-SnP.h +42 -0
  105. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas.s +1196 -0
  106. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas_Apple.s +1124 -0
  107. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-shld-gas.s +1196 -0
  108. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-armcc.s +1392 -0
  109. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-gcc.s +1394 -0
  110. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-times2-SnP.h +42 -0
  111. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u12/SIMD512-2-config.h +7 -0
  112. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u4/SIMD512-2-config.h +7 -0
  113. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512ufull/SIMD512-2-config.h +7 -0
  114. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SIMD512.c +850 -0
  115. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SnP.h +51 -0
  116. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SIMD128.c +957 -0
  117. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SnP.h +49 -0
  118. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-u2/SIMD128-config.h +8 -0
  119. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-ua/SIMD128-config.h +8 -0
  120. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-u2/SIMD128-config.h +9 -0
  121. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-ua/SIMD128-config.h +9 -0
  122. data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-SnP.h +45 -0
  123. data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-on1.c +37 -0
  124. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SIMD256.c +1321 -0
  125. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SnP.h +55 -0
  126. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u12/SIMD256-config.h +7 -0
  127. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u6/SIMD256-config.h +7 -0
  128. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/ua/SIMD256-config.h +7 -0
  129. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u12/SIMD512-4-config.h +7 -0
  130. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u4/SIMD512-4-config.h +7 -0
  131. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512ufull/SIMD512-4-config.h +7 -0
  132. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SIMD512.c +881 -0
  133. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SnP.h +51 -0
  134. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-SnP.h +45 -0
  135. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-on1.c +37 -0
  136. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-SnP.h +45 -0
  137. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-on2.c +38 -0
  138. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SIMD512.c +1615 -0
  139. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SnP.h +57 -0
  140. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u12/SIMD512-config.h +7 -0
  141. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u4/SIMD512-config.h +7 -0
  142. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/ua/SIMD512-config.h +7 -0
  143. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-SnP.h +45 -0
  144. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-on1.c +37 -0
  145. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-SnP.h +45 -0
  146. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-on2.c +38 -0
  147. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-SnP.h +45 -0
  148. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-on4.c +38 -0
  149. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-SnP.h +41 -0
  150. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-armcc.s +442 -0
  151. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-gcc.s +446 -0
  152. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-armcc.s +419 -0
  153. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-gcc.s +427 -0
  154. data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-SnP.h +41 -0
  155. data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-avr8-fast.s +647 -0
  156. data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-SnP.h +39 -0
  157. data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-compact.c +190 -0
  158. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-SnP.h +43 -0
  159. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.c +412 -0
  160. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.h +23 -0
  161. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-SnP.h +41 -0
  162. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-armcc.s +454 -0
  163. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-gcc.s +458 -0
  164. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-armcc.s +455 -0
  165. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-gcc.s +458 -0
  166. data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-SnP.h +41 -0
  167. data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-avr8-fast.s +728 -0
  168. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-SnP.h +43 -0
  169. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.c +414 -0
  170. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.h +23 -0
  171. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-SnP.h +42 -0
  172. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-armcc.s +527 -0
  173. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-gcc.s +533 -0
  174. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-armcc.s +528 -0
  175. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-gcc.s +534 -0
  176. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-armcc.s +521 -0
  177. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-gcc.s +527 -0
  178. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-armcc.s +517 -0
  179. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-gcc.s +523 -0
  180. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-armcc.s +550 -0
  181. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-gcc.s +556 -0
  182. data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-SnP.h +32 -0
  183. data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-armv8a-neon.s +432 -0
  184. data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-SnP.h +42 -0
  185. data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-avr8-fast.s +929 -0
  186. data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-SnP.h +40 -0
  187. data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-compact.c +244 -0
  188. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-SnP.h +46 -0
  189. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32-bis.macros +184 -0
  190. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.c +454 -0
  191. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.macros +459 -0
  192. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling-bis.macros +83 -0
  193. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling.macros +88 -0
  194. data/ext/xkcp/lib/low/KeccakP-800/plain/lcu2/KeccakP-800-opt32-config.h +7 -0
  195. data/ext/xkcp/lib/low/KeccakP-800/plain/lcua/KeccakP-800-opt32-config.h +7 -0
  196. data/ext/xkcp/lib/low/KeccakP-800/plain/u2/KeccakP-800-opt32-config.h +7 -0
  197. data/ext/xkcp/lib/low/KeccakP-800/plain/ua/KeccakP-800-opt32-config.h +7 -0
  198. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-SnP.h +44 -0
  199. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.c +437 -0
  200. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.h +23 -0
  201. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/Ket.h +57 -0
  202. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-armcc.s +475 -0
  203. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-gcc.s +480 -0
  204. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-armcc.s +590 -0
  205. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-gcc.s +590 -0
  206. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.c +126 -0
  207. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.h +68 -0
  208. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.inc +174 -0
  209. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.c +80 -0
  210. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.h +68 -0
  211. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.inc +142 -0
  212. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-SnP.h +55 -0
  213. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-armcc.s +1086 -0
  214. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-gcc.s +1092 -0
  215. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-armcc.s +721 -0
  216. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-gcc.s +726 -0
  217. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-armcc.s +723 -0
  218. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-gcc.s +729 -0
  219. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-armcc.s +1164 -0
  220. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-gcc.s +1165 -0
  221. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-armcc.s +562 -0
  222. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-gcc.s +563 -0
  223. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-armcc.s +563 -0
  224. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-gcc.s +565 -0
  225. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-SnP.h +55 -0
  226. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-armcc.s +476 -0
  227. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-gcc.s +485 -0
  228. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-armcc.s +362 -0
  229. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-gcc.s +367 -0
  230. data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-SnP.h +43 -0
  231. data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-avr8-u1.s +1341 -0
  232. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SIMD512.c +581 -0
  233. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SnP.h +58 -0
  234. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodyak-full-block-SIMD512.c +332 -0
  235. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SIMD128.c +329 -0
  236. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SnP.h +53 -0
  237. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodyak-full-block-SIMD128.c +355 -0
  238. data/ext/xkcp/lib/low/Xoodoo/Xoodoo.h +79 -0
  239. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-SnP.h +56 -0
  240. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-optimized.c +399 -0
  241. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodyak-full-blocks.c +127 -0
  242. data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-SnP.h +43 -0
  243. data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-reference.c +253 -0
  244. data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SIMD512.c +1044 -0
  245. data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SnP.h +49 -0
  246. data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-SnP.h +45 -0
  247. data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-on1.c +37 -0
  248. data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-ARMv7A.s +1587 -0
  249. data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-SnP.h +48 -0
  250. data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SIMD512.c +1202 -0
  251. data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SnP.h +48 -0
  252. data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SIMD128.c +484 -0
  253. data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SnP.h +44 -0
  254. data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-SnP.h +45 -0
  255. data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-on1.c +37 -0
  256. data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SIMD256.c +939 -0
  257. data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SnP.h +49 -0
  258. data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SIMD512.c +1216 -0
  259. data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SnP.h +48 -0
  260. data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-SnP.h +45 -0
  261. data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-on1.c +37 -0
  262. data/ext/xkcp/lib/low/common/PlSnP-Fallback.inc +290 -0
  263. data/ext/xkcp/lib/low/common/SnP-Relaned.h +141 -0
  264. data/ext/xkcp/support/Build/ExpandProducts.xsl +79 -0
  265. data/ext/xkcp/support/Build/ToGlobalMakefile.xsl +206 -0
  266. data/ext/xkcp/support/Build/ToOneTarget.xsl +89 -0
  267. data/ext/xkcp/support/Build/ToTargetConfigFile.xsl +37 -0
  268. data/ext/xkcp/support/Build/ToTargetMakefile.xsl +298 -0
  269. data/ext/xkcp/support/Build/ToVCXProj.xsl +198 -0
  270. data/ext/xkcp/support/Kernel-PMU/Kernel-pmu.md +133 -0
  271. data/ext/xkcp/support/Kernel-PMU/Makefile +8 -0
  272. data/ext/xkcp/support/Kernel-PMU/enable_arm_pmu.c +129 -0
  273. data/ext/xkcp/support/Kernel-PMU/load-module +1 -0
  274. data/ext/xkcp/util/KeccakSum/KeccakSum.c +394 -0
  275. data/ext/xkcp/util/KeccakSum/base64.c +86 -0
  276. data/ext/xkcp/util/KeccakSum/base64.h +12 -0
  277. data/lib/sleeping_kangaroo12/binding.rb +15 -0
  278. data/lib/sleeping_kangaroo12/build/loader.rb +40 -0
  279. data/lib/sleeping_kangaroo12/build/platform.rb +37 -0
  280. data/lib/sleeping_kangaroo12/build.rb +4 -0
  281. data/lib/sleeping_kangaroo12/digest.rb +103 -0
  282. data/lib/sleeping_kangaroo12/version.rb +5 -0
  283. data/lib/sleeping_kangaroo12.rb +7 -0
  284. metadata +372 -0
@@ -0,0 +1,1196 @@
1
+ #
2
+ # The eXtended Keccak Code Package (XKCP)
3
+ # https://github.com/XKCP/XKCP
4
+ #
5
+ # The Keccak-p permutations, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche.
6
+ #
7
+ # Implementation by Ronny Van Keer, hereby denoted as "the implementer".
8
+ #
9
+ # For more information, feedback or questions, please refer to the Keccak Team website:
10
+ # https://keccak.team/
11
+ #
12
+ # To the extent possible under law, the implementer has waived all copyright
13
+ # and related or neighboring rights to the source code in this file.
14
+ # http://creativecommons.org/publicdomain/zero/1.0/
15
+ #
16
+ # ---
17
+ #
18
+ # This file implements Keccak-p[1600] in a SnP-compatible way.
19
+ # Please refer to SnP-documentation.h for more details.
20
+ #
21
+ # This implementation comes with KeccakP-1600-SnP.h in the same folder.
22
+ # Please refer to LowLevel.build for the exact list of other files it must be combined with.
23
+ #
24
+
25
+ # WARNING: State must be 256 bit (32 bytes) aligned, better is 64-byte aligned (cache line)
26
+
27
+ .text
28
+
29
+ # conditional assembly settings
30
+ .equ UseSIMD, 0
31
+ .equ InlinePerm, 1
32
+
33
+ # offsets in state
34
+ .equ _ba, 0*8
35
+ .equ _be, 1*8
36
+ .equ _bi, 2*8
37
+ .equ _bo, 3*8
38
+ .equ _bu, 4*8
39
+ .equ _ga, 5*8
40
+ .equ _ge, 6*8
41
+ .equ _gi, 7*8
42
+ .equ _go, 8*8
43
+ .equ _gu, 9*8
44
+ .equ _ka, 10*8
45
+ .equ _ke, 11*8
46
+ .equ _ki, 12*8
47
+ .equ _ko, 13*8
48
+ .equ _ku, 14*8
49
+ .equ _ma, 15*8
50
+ .equ _me, 16*8
51
+ .equ _mi, 17*8
52
+ .equ _mo, 18*8
53
+ .equ _mu, 19*8
54
+ .equ _sa, 20*8
55
+ .equ _se, 21*8
56
+ .equ _si, 22*8
57
+ .equ _so, 23*8
58
+ .equ _su, 24*8
59
+
60
+ # arguments passed in registers
61
+ .equ arg1, %rdi
62
+ .equ arg2, %rsi
63
+ .equ arg3, %rdx
64
+ .equ arg4, %rcx
65
+ .equ arg5, %r8
66
+ .equ arg6, %r9
67
+
68
+ # temporary registers
69
+ .equ rT1, %rax
70
+ .equ rT1a, rT1
71
+ .equ rT1e, %rbx
72
+ .equ rT1i, %r14
73
+ .equ rT1o, %r15
74
+ .equ rT1u, arg6
75
+ .equ rT2a, %r10
76
+ .equ rT2e, %r11
77
+ .equ rT2i, %r12
78
+ .equ rT2o, %r13
79
+ .equ rT2u, arg5
80
+
81
+ # round vars
82
+ .equ rpState, arg1
83
+ .equ rpStack, %rsp
84
+
85
+ .equ rDa, %rbx
86
+ .equ rDe, %rcx
87
+ .equ rDi, %rdx
88
+ .equ rDo, %r8
89
+ .equ rDu, %r9
90
+
91
+ .equ rBa, %r10
92
+ .equ rBe, %r11
93
+ .equ rBi, %r12
94
+ .equ rBo, %r13
95
+ .equ rBu, %r14
96
+
97
+ .equ rCa, %rsi
98
+ .equ rCe, %rbp
99
+ .equ rCi, rBi
100
+ .equ rCo, rBo
101
+ .equ rCu, %r15
102
+
103
+ .macro mKeccakRound iState, oState, rc, lastRound
104
+
105
+ # prepare Theta bis
106
+ movq rCe, rDa
107
+ rolq rDa
108
+
109
+ movq _bi(\iState), rCi
110
+ xorq _gi(\iState), rDi
111
+ xorq rCu, rDa
112
+ xorq _ki(\iState), rCi
113
+ xorq _mi(\iState), rDi
114
+ xorq rDi, rCi
115
+
116
+ movq rCi, rDe
117
+ rolq rDe
118
+
119
+ movq _bo(\iState), rCo
120
+ xorq _go(\iState), rDo
121
+ xorq rCa, rDe
122
+ xorq _ko(\iState), rCo
123
+ xorq _mo(\iState), rDo
124
+ xorq rDo, rCo
125
+
126
+ movq rCo, rDi
127
+ rolq rDi
128
+
129
+ movq rCu, rDo
130
+ xorq rCe, rDi
131
+ rolq rDo
132
+
133
+ movq rCa, rDu
134
+ xorq rCi, rDo
135
+ rolq rDu
136
+
137
+ # Theta Rho Pi Chi Iota, result b
138
+ movq _ba(\iState), rBa
139
+ movq _ge(\iState), rBe
140
+ xorq rCo, rDu
141
+ movq _ki(\iState), rBi
142
+ movq _mo(\iState), rBo
143
+ movq _su(\iState), rBu
144
+ xorq rDe, rBe
145
+ rolq $44, rBe
146
+ xorq rDi, rBi
147
+ xorq rDa, rBa
148
+ rolq $43, rBi
149
+
150
+ movq rBe, rCa
151
+ movq $\rc, rT1
152
+ orq rBi, rCa
153
+ xorq rBa, rT1
154
+ xorq rT1, rCa
155
+ movq rCa, _ba(\oState)
156
+
157
+ xorq rDu, rBu
158
+ rolq $14, rBu
159
+ movq rBa, rCu
160
+ andq rBe, rCu
161
+ xorq rBu, rCu
162
+ movq rCu, _bu(\oState)
163
+
164
+ xorq rDo, rBo
165
+ rolq $21, rBo
166
+ movq rBo, rT1
167
+ andq rBu, rT1
168
+ xorq rBi, rT1
169
+ movq rT1, _bi(\oState)
170
+
171
+ notq rBi
172
+ orq rBa, rBu
173
+ orq rBo, rBi
174
+ xorq rBo, rBu
175
+ xorq rBe, rBi
176
+ movq rBu, _bo(\oState)
177
+ movq rBi, _be(\oState)
178
+ .if \lastRound == 0
179
+ movq rBi, rCe
180
+ .endif
181
+
182
+ # Theta Rho Pi Chi, result g
183
+ movq _gu(\iState), rBe
184
+ xorq rDu, rBe
185
+ movq _ka(\iState), rBi
186
+ rolq $20, rBe
187
+ xorq rDa, rBi
188
+ rolq $3, rBi
189
+ movq _bo(\iState), rBa
190
+ movq rBe, rT1
191
+ orq rBi, rT1
192
+ xorq rDo, rBa
193
+ movq _me(\iState), rBo
194
+ movq _si(\iState), rBu
195
+ rolq $28, rBa
196
+ xorq rBa, rT1
197
+ movq rT1, _ga(\oState)
198
+ .if \lastRound == 0
199
+ xorq rT1, rCa
200
+ .endif
201
+
202
+ xorq rDe, rBo
203
+ rolq $45, rBo
204
+ movq rBi, rT1
205
+ andq rBo, rT1
206
+ xorq rBe, rT1
207
+ movq rT1, _ge(\oState)
208
+ .if \lastRound == 0
209
+ xorq rT1, rCe
210
+ .endif
211
+
212
+ xorq rDi, rBu
213
+ rolq $61, rBu
214
+ movq rBu, rT1
215
+ orq rBa, rT1
216
+ xorq rBo, rT1
217
+ movq rT1, _go(\oState)
218
+
219
+ andq rBe, rBa
220
+ xorq rBu, rBa
221
+ movq rBa, _gu(\oState)
222
+ notq rBu
223
+ .if \lastRound == 0
224
+ xorq rBa, rCu
225
+ .endif
226
+
227
+ orq rBu, rBo
228
+ xorq rBi, rBo
229
+ movq rBo, _gi(\oState)
230
+
231
+ # Theta Rho Pi Chi, result k
232
+ movq _be(\iState), rBa
233
+ movq _gi(\iState), rBe
234
+ movq _ko(\iState), rBi
235
+ movq _mu(\iState), rBo
236
+ movq _sa(\iState), rBu
237
+ xorq rDi, rBe
238
+ rolq $6, rBe
239
+ xorq rDo, rBi
240
+ rolq $25, rBi
241
+ movq rBe, rT1
242
+ orq rBi, rT1
243
+ xorq rDe, rBa
244
+ rolq $1, rBa
245
+ xorq rBa, rT1
246
+ movq rT1, _ka(\oState)
247
+ .if \lastRound == 0
248
+ xorq rT1, rCa
249
+ .endif
250
+
251
+ xorq rDu, rBo
252
+ rolq $8, rBo
253
+ movq rBi, rT1
254
+ andq rBo, rT1
255
+ xorq rBe, rT1
256
+ movq rT1, _ke(\oState)
257
+ .if \lastRound == 0
258
+ xorq rT1, rCe
259
+ .endif
260
+
261
+ xorq rDa, rBu
262
+ rolq $18, rBu
263
+ notq rBo
264
+ movq rBo, rT1
265
+ andq rBu, rT1
266
+ xorq rBi, rT1
267
+ movq rT1, _ki(\oState)
268
+
269
+ movq rBu, rT1
270
+ orq rBa, rT1
271
+ xorq rBo, rT1
272
+ movq rT1, _ko(\oState)
273
+
274
+ andq rBe, rBa
275
+ xorq rBu, rBa
276
+ movq rBa, _ku(\oState)
277
+ .if \lastRound == 0
278
+ xorq rBa, rCu
279
+ .endif
280
+
281
+ # Theta Rho Pi Chi, result m
282
+ movq _ga(\iState), rBe
283
+ xorq rDa, rBe
284
+ movq _ke(\iState), rBi
285
+ rolq $36, rBe
286
+ xorq rDe, rBi
287
+ movq _bu(\iState), rBa
288
+ rolq $10, rBi
289
+ movq rBe, rT1
290
+ movq _mi(\iState), rBo
291
+ andq rBi, rT1
292
+ xorq rDu, rBa
293
+ movq _so(\iState), rBu
294
+ rolq $27, rBa
295
+ xorq rBa, rT1
296
+ movq rT1, _ma(\oState)
297
+ .if \lastRound == 0
298
+ xorq rT1, rCa
299
+ .endif
300
+
301
+ xorq rDi, rBo
302
+ rolq $15, rBo
303
+ movq rBi, rT1
304
+ orq rBo, rT1
305
+ xorq rBe, rT1
306
+ movq rT1, _me(\oState)
307
+ .if \lastRound == 0
308
+ xorq rT1, rCe
309
+ .endif
310
+
311
+ xorq rDo, rBu
312
+ rolq $56, rBu
313
+ notq rBo
314
+ movq rBo, rT1
315
+ orq rBu, rT1
316
+ xorq rBi, rT1
317
+ movq rT1, _mi(\oState)
318
+
319
+ orq rBa, rBe
320
+ xorq rBu, rBe
321
+ movq rBe, _mu(\oState)
322
+
323
+ andq rBa, rBu
324
+ xorq rBo, rBu
325
+ movq rBu, _mo(\oState)
326
+ .if \lastRound == 0
327
+ xorq rBe, rCu
328
+ .endif
329
+
330
+ # Theta Rho Pi Chi, result s
331
+ movq _bi(\iState), rBa
332
+ movq _go(\iState), rBe
333
+ movq _ku(\iState), rBi
334
+ xorq rDi, rBa
335
+ movq _ma(\iState), rBo
336
+ rolq $62, rBa
337
+ xorq rDo, rBe
338
+ movq _se(\iState), rBu
339
+ rolq $55, rBe
340
+
341
+ xorq rDu, rBi
342
+ movq rBa, rDu
343
+ xorq rDe, rBu
344
+ rolq $2, rBu
345
+ andq rBe, rDu
346
+ xorq rBu, rDu
347
+ movq rDu, _su(\oState)
348
+
349
+ rolq $39, rBi
350
+ .if \lastRound == 0
351
+ xorq rDu, rCu
352
+ .endif
353
+ notq rBe
354
+ xorq rDa, rBo
355
+ movq rBe, rDa
356
+ andq rBi, rDa
357
+ xorq rBa, rDa
358
+ movq rDa, _sa(\oState)
359
+ .if \lastRound == 0
360
+ xorq rDa, rCa
361
+ .endif
362
+
363
+ rolq $41, rBo
364
+ movq rBi, rDe
365
+ orq rBo, rDe
366
+ xorq rBe, rDe
367
+ movq rDe, _se(\oState)
368
+ .if \lastRound == 0
369
+ xorq rDe, rCe
370
+ .endif
371
+
372
+ movq rBo, rDi
373
+ movq rBu, rDo
374
+ andq rBu, rDi
375
+ orq rBa, rDo
376
+ xorq rBi, rDi
377
+ xorq rBo, rDo
378
+ movq rDi, _si(\oState)
379
+ movq rDo, _so(\oState)
380
+
381
+ .endm
382
+
383
+ .macro mKeccakPermutation12
384
+
385
+ subq $8*25, %rsp
386
+
387
+ movq _ba(rpState), rCa
388
+ movq _be(rpState), rCe
389
+ movq _bu(rpState), rCu
390
+
391
+ xorq _ga(rpState), rCa
392
+ xorq _ge(rpState), rCe
393
+ xorq _gu(rpState), rCu
394
+
395
+ xorq _ka(rpState), rCa
396
+ xorq _ke(rpState), rCe
397
+ xorq _ku(rpState), rCu
398
+
399
+ xorq _ma(rpState), rCa
400
+ xorq _me(rpState), rCe
401
+ xorq _mu(rpState), rCu
402
+
403
+ xorq _sa(rpState), rCa
404
+ xorq _se(rpState), rCe
405
+ movq _si(rpState), rDi
406
+ movq _so(rpState), rDo
407
+ xorq _su(rpState), rCu
408
+
409
+ mKeccakRound rpState, rpStack, 0x000000008000808b, 0
410
+ mKeccakRound rpStack, rpState, 0x800000000000008b, 0
411
+ mKeccakRound rpState, rpStack, 0x8000000000008089, 0
412
+ mKeccakRound rpStack, rpState, 0x8000000000008003, 0
413
+ mKeccakRound rpState, rpStack, 0x8000000000008002, 0
414
+ mKeccakRound rpStack, rpState, 0x8000000000000080, 0
415
+ mKeccakRound rpState, rpStack, 0x000000000000800a, 0
416
+ mKeccakRound rpStack, rpState, 0x800000008000000a, 0
417
+ mKeccakRound rpState, rpStack, 0x8000000080008081, 0
418
+ mKeccakRound rpStack, rpState, 0x8000000000008080, 0
419
+ mKeccakRound rpState, rpStack, 0x0000000080000001, 0
420
+ mKeccakRound rpStack, rpState, 0x8000000080008008, 1
421
+ addq $8*25, %rsp
422
+ .endm
423
+
424
+ .macro mKeccakPermutation24
425
+
426
+ subq $8*25, %rsp
427
+
428
+ movq _ba(rpState), rCa
429
+ movq _be(rpState), rCe
430
+ movq _bu(rpState), rCu
431
+
432
+ xorq _ga(rpState), rCa
433
+ xorq _ge(rpState), rCe
434
+ xorq _gu(rpState), rCu
435
+
436
+ xorq _ka(rpState), rCa
437
+ xorq _ke(rpState), rCe
438
+ xorq _ku(rpState), rCu
439
+
440
+ xorq _ma(rpState), rCa
441
+ xorq _me(rpState), rCe
442
+ xorq _mu(rpState), rCu
443
+
444
+ xorq _sa(rpState), rCa
445
+ xorq _se(rpState), rCe
446
+ movq _si(rpState), rDi
447
+ movq _so(rpState), rDo
448
+ xorq _su(rpState), rCu
449
+
450
+ mKeccakRound rpState, rpStack, 0x0000000000000001, 0
451
+ mKeccakRound rpStack, rpState, 0x0000000000008082, 0
452
+ mKeccakRound rpState, rpStack, 0x800000000000808a, 0
453
+ mKeccakRound rpStack, rpState, 0x8000000080008000, 0
454
+ mKeccakRound rpState, rpStack, 0x000000000000808b, 0
455
+ mKeccakRound rpStack, rpState, 0x0000000080000001, 0
456
+ mKeccakRound rpState, rpStack, 0x8000000080008081, 0
457
+ mKeccakRound rpStack, rpState, 0x8000000000008009, 0
458
+ mKeccakRound rpState, rpStack, 0x000000000000008a, 0
459
+ mKeccakRound rpStack, rpState, 0x0000000000000088, 0
460
+ mKeccakRound rpState, rpStack, 0x0000000080008009, 0
461
+ mKeccakRound rpStack, rpState, 0x000000008000000a, 0
462
+
463
+ mKeccakRound rpState, rpStack, 0x000000008000808b, 0
464
+ mKeccakRound rpStack, rpState, 0x800000000000008b, 0
465
+ mKeccakRound rpState, rpStack, 0x8000000000008089, 0
466
+ mKeccakRound rpStack, rpState, 0x8000000000008003, 0
467
+ mKeccakRound rpState, rpStack, 0x8000000000008002, 0
468
+ mKeccakRound rpStack, rpState, 0x8000000000000080, 0
469
+ mKeccakRound rpState, rpStack, 0x000000000000800a, 0
470
+ mKeccakRound rpStack, rpState, 0x800000008000000a, 0
471
+ mKeccakRound rpState, rpStack, 0x8000000080008081, 0
472
+ mKeccakRound rpStack, rpState, 0x8000000000008080, 0
473
+ mKeccakRound rpState, rpStack, 0x0000000080000001, 0
474
+ mKeccakRound rpStack, rpState, 0x8000000080008008, 1
475
+ addq $8*25, %rsp
476
+ .endm
477
+
478
+ .macro mKeccakPermutationInlinable24
479
+ .if InlinePerm == 1
480
+ mKeccakPermutation24
481
+ .else
482
+ callq KeccakP1600_Permute_24rounds@PLT
483
+ .endif
484
+ .endm
485
+
486
+ .macro mPushRegs
487
+ pushq %rbx
488
+ pushq %rbp
489
+ pushq %r12
490
+ pushq %r13
491
+ pushq %r14
492
+ pushq %r15
493
+ .endm
494
+
495
+ .macro mPopRegs
496
+ popq %r15
497
+ popq %r14
498
+ popq %r13
499
+ popq %r12
500
+ popq %rbp
501
+ popq %rbx
502
+ .endm
503
+
504
+ .macro mXor128 input, output, offset
505
+ .if UseSIMD == 0
506
+ movq \offset(\input), rT1a
507
+ movq \offset+8(\input), rT1e
508
+ xorq rT1a, \offset(\output)
509
+ xorq rT1e, \offset+8(\output)
510
+ .else
511
+ movdqu \offset(\input), %xmm0
512
+ movdqu \offset(\output), %xmm1
513
+ pxor %xmm1, %xmm0
514
+ movdqu %xmm0, \offset(\output)
515
+ .endif
516
+ .endm
517
+
518
+ .macro mXor256 input, output, offset
519
+ .if UseSIMD == 0
520
+ movq \offset(\input), rT1a
521
+ movq \offset+8(\input), rT1e
522
+ movq \offset+16(\input), rT1i
523
+ movq \offset+24(\input), rT1o
524
+ xorq rT1a, \offset(\output)
525
+ xorq rT1e, \offset+8(\output)
526
+ xorq rT1i, \offset+16(\output)
527
+ xorq rT1o, \offset+24(\output)
528
+ .else
529
+ movdqu \offset(\input), %xmm0
530
+ movdqu \offset(\output), %xmm1
531
+ pxor %xmm1, %xmm0
532
+ movdqu %xmm0, \offset(\output)
533
+ movdqu \offset+16(\input), %xmm0
534
+ movdqu \offset+16(\output), %xmm1
535
+ pxor %xmm1, %xmm0
536
+ movdqu %xmm0, \offset+16(\output)
537
+ .endif
538
+ .endm
539
+
540
+ .macro mXor512 input, output, offset
541
+ .if UseSIMD == 0
542
+ mXor256 \input, \output, \offset
543
+ mXor256 \input, \output, \offset+32
544
+ .else
545
+ movdqu \offset(\input), %xmm0
546
+ movdqu \offset(\output), %xmm1
547
+ pxor %xmm1, %xmm0
548
+ movdqu %xmm0, \offset(\output)
549
+ movdqu \offset+16(\input), %xmm0
550
+ movdqu \offset+16(\output), %xmm1
551
+ pxor %xmm1, %xmm0
552
+ movdqu %xmm0, \offset+16(\output)
553
+ movdqu \offset+32(\input), %xmm0
554
+ movdqu \offset+32(\output), %xmm1
555
+ pxor %xmm1, %xmm0
556
+ movdqu %xmm0, \offset+32(\output)
557
+ movdqu \offset+48(\input), %xmm0
558
+ movdqu \offset+48(\output), %xmm1
559
+ pxor %xmm1, %xmm0
560
+ movdqu %xmm0, \offset+48(\output)
561
+ .endif
562
+ .endm
563
+
564
+ #----------------------------------------------------------------------------
565
+ #
566
+ # void KeccakP1600_StaticInitialize( void )
567
+ #
568
+ .size KeccakP1600_StaticInitialize, .-KeccakP1600_StaticInitialize
569
+ .align 8
570
+ .global KeccakP1600_StaticInitialize
571
+ .type KeccakP1600_StaticInitialize, %function
572
+ KeccakP1600_StaticInitialize:
573
+ retq
574
+
575
+ #----------------------------------------------------------------------------
576
+ #
577
+ # void KeccakP1600_Initialize(void *state)
578
+ #
579
+ .size KeccakP1600_Initialize, .-KeccakP1600_Initialize
580
+ .align 8
581
+ .global KeccakP1600_Initialize
582
+ .type KeccakP1600_Initialize, %function
583
+ KeccakP1600_Initialize:
584
+ xorq %rax, %rax
585
+ xorq %rcx, %rcx
586
+ notq %rcx
587
+ .if UseSIMD == 0
588
+ movq %rax, _ba(arg1)
589
+ movq %rcx, _be(arg1)
590
+ movq %rcx, _bi(arg1)
591
+ movq %rax, _bo(arg1)
592
+ movq %rax, _bu(arg1)
593
+ movq %rax, _ga(arg1)
594
+ movq %rax, _ge(arg1)
595
+ movq %rax, _gi(arg1)
596
+ movq %rcx, _go(arg1)
597
+ movq %rax, _gu(arg1)
598
+ movq %rax, _ka(arg1)
599
+ movq %rax, _ke(arg1)
600
+ movq %rcx, _ki(arg1)
601
+ movq %rax, _ko(arg1)
602
+ movq %rax, _ku(arg1)
603
+ movq %rax, _ma(arg1)
604
+ movq %rax, _me(arg1)
605
+ movq %rcx, _mi(arg1)
606
+ movq %rax, _mo(arg1)
607
+ movq %rax, _mu(arg1)
608
+ movq %rcx, _sa(arg1)
609
+ movq %rax, _se(arg1)
610
+ movq %rax, _si(arg1)
611
+ movq %rax, _so(arg1)
612
+ movq %rax, _su(arg1)
613
+ .else
614
+ pxor %xmm0, %xmm0
615
+ movq %rax, _ba(arg1)
616
+ movq %rcx, _be(arg1)
617
+ movq %rcx, _bi(arg1)
618
+ movq %rax, _bo(arg1)
619
+ movdqu %xmm0, _bu(arg1)
620
+ movdqu %xmm0, _ge(arg1)
621
+ movq %rcx, _go(arg1)
622
+ movq %rax, _gu(arg1)
623
+ movdqu %xmm0, _ka(arg1)
624
+ movq %rcx, _ki(arg1)
625
+ movq %rax, _ko(arg1)
626
+ movdqu %xmm0, _ku(arg1)
627
+ movq %rax, _me(arg1)
628
+ movq %rcx, _mi(arg1)
629
+ movdqu %xmm0, _mo(arg1)
630
+ movq %rcx, _sa(arg1)
631
+ movq %rax, _se(arg1)
632
+ movdqu %xmm0, _si(arg1)
633
+ movq %rax, _su(arg1)
634
+ .endif
635
+ retq
636
+
637
+ #----------------------------------------------------------------------------
638
+ #
639
+ # void KeccakP1600_AddByte(void *state, unsigned char data, unsigned int offset)
640
+ #
641
+ .size KeccakP1600_AddByte, .-KeccakP1600_AddByte
642
+ .align 8
643
+ .global KeccakP1600_AddByte
644
+ .type KeccakP1600_AddByte, %function
645
+ KeccakP1600_AddByte:
646
+ addq arg3, arg1
647
+ mov arg2, %rax
648
+ xorb %al, (arg1)
649
+ retq
650
+
651
+ #----------------------------------------------------------------------------
652
+ #
653
+ # void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
654
+ #
655
+ .size KeccakP1600_AddBytes, .-KeccakP1600_AddBytes
656
+ .align 8
657
+ .global KeccakP1600_AddBytes
658
+ .type KeccakP1600_AddBytes, %function
659
+ KeccakP1600_AddBytes:
660
+ pushq rT1e
661
+ pushq rT1i
662
+ pushq rT1o
663
+ addq arg3, arg1
664
+ testq $0xF8, arg4
665
+ jz KeccakP1600_AddBytes_Bytes
666
+ movq arg4, arg6
667
+ shrq $3, arg6
668
+ testq $16, arg6
669
+ jz KeccakP1600_AddBytes_8Lanes
670
+ mXor512 arg2, arg1, 0
671
+ mXor512 arg2, arg1, 64
672
+ addq $128, arg2
673
+ addq $128, arg1
674
+ KeccakP1600_AddBytes_8Lanes:
675
+ testq $8, arg6
676
+ jz KeccakP1600_AddBytes_4Lanes
677
+ mXor512 arg2, arg1, 0
678
+ addq $64, arg2
679
+ addq $64, arg1
680
+ KeccakP1600_AddBytes_4Lanes:
681
+ testq $4, arg6
682
+ jz KeccakP1600_AddBytes_2Lanes
683
+ mXor256 arg2, arg1, 0
684
+ addq $32, arg2
685
+ addq $32, arg1
686
+ KeccakP1600_AddBytes_2Lanes:
687
+ testq $2, arg6
688
+ jz KeccakP1600_AddBytes_1Lane
689
+ mXor128 arg2, arg1, 0
690
+ addq $16, arg2
691
+ addq $16, arg1
692
+ KeccakP1600_AddBytes_1Lane:
693
+ testq $1, arg6
694
+ jz KeccakP1600_AddBytes_Bytes
695
+ movq (arg2), rT1
696
+ xorq rT1, (arg1)
697
+ addq $8, arg2
698
+ addq $8, arg1
699
+ KeccakP1600_AddBytes_Bytes:
700
+ andq $7, arg4
701
+ jz KeccakP1600_AddBytes_Exit
702
+ KeccakP1600_AddBytes_BytesLoop:
703
+ movb (arg2), %al
704
+ xorb %al, (arg1)
705
+ addq $1, arg2
706
+ addq $1, arg1
707
+ subq $1, arg4
708
+ jnz KeccakP1600_AddBytes_BytesLoop
709
+ KeccakP1600_AddBytes_Exit:
710
+ popq rT1o
711
+ popq rT1i
712
+ popq rT1e
713
+ retq
714
+
715
+
716
+ KeccakLaneComplementTable:
717
+ .quad 0
718
+ .quad 0xFFFFFFFFFFFFFFFF # 1 be
719
+ .quad 0xFFFFFFFFFFFFFFFF # 2 bi
720
+ .quad 0
721
+ .quad 0
722
+
723
+ .quad 0
724
+ .quad 0
725
+ .quad 0
726
+ .quad 0xFFFFFFFFFFFFFFFF # 8 go
727
+ .quad 0
728
+
729
+ .quad 0
730
+ .quad 0
731
+ .quad 0xFFFFFFFFFFFFFFFF # 12 ki
732
+ .quad 0
733
+ .quad 0
734
+
735
+ .quad 0
736
+ .quad 0
737
+ .quad 0xFFFFFFFFFFFFFFFF # 17 mi
738
+ .quad 0
739
+ .quad 0
740
+
741
+ .quad 0xFFFFFFFFFFFFFFFF # 20 sa
742
+ .quad 0
743
+ .quad 0
744
+ .quad 0
745
+ .quad 0
746
+
747
+ #----------------------------------------------------------------------------
748
+ #
749
+ # void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
750
+ #
751
+ .size KeccakP1600_OverwriteBytes, .-KeccakP1600_OverwriteBytes
752
+ .align 8
753
+ .global KeccakP1600_OverwriteBytes
754
+ .type KeccakP1600_OverwriteBytes, %function
755
+ KeccakP1600_OverwriteBytes:
756
+ addq arg3, arg1
757
+ leaq KeccakLaneComplementTable(%rip), arg5
758
+ addq arg3, arg5
759
+ subq $8, arg4
760
+ jc KeccakP1600_OverwriteBytes_Bytes
761
+ KeccakP1600_OverwriteBytes_LanesLoop:
762
+ movq (arg2), rT1
763
+ xorq (arg5), rT1
764
+ movq rT1, (arg1)
765
+ addq $8, arg2
766
+ addq $8, arg5
767
+ addq $8, arg1
768
+ subq $8, arg4
769
+ jnc KeccakP1600_OverwriteBytes_LanesLoop
770
+ KeccakP1600_OverwriteBytes_Bytes:
771
+ addq $8, arg4
772
+ jz KeccakP1600_OverwriteBytes_Exit
773
+ KeccakP1600_OverwriteBytes_BytesLoop:
774
+ movb (arg2), %al
775
+ xorb (arg5), %al
776
+ movb %al, (arg1)
777
+ addq $1, arg2
778
+ addq $1, arg5
779
+ addq $1, arg1
780
+ subq $1, arg4
781
+ jnz KeccakP1600_OverwriteBytes_BytesLoop
782
+ KeccakP1600_OverwriteBytes_Exit:
783
+ retq
784
+
785
+ #----------------------------------------------------------------------------
786
+ #
787
+ # void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount)
788
+ #
789
+ .size KeccakP1600_OverwriteWithZeroes, .-KeccakP1600_OverwriteWithZeroes
790
+ .align 8
791
+ .global KeccakP1600_OverwriteWithZeroes
792
+ .type KeccakP1600_OverwriteWithZeroes, %function
793
+ KeccakP1600_OverwriteWithZeroes:
794
+ leaq KeccakLaneComplementTable(%rip), arg5
795
+ subq $8, arg2
796
+ jc KeccakP1600_OverwriteWithZeroes_Bytes
797
+ KeccakP1600_OverwriteWithZeroes_LanesLoop:
798
+ movq $0, rT1
799
+ xorq (arg5), rT1
800
+ movq rT1, (arg1)
801
+ addq $8, arg5
802
+ addq $8, arg1
803
+ subq $8, arg2
804
+ jnc KeccakP1600_OverwriteWithZeroes_LanesLoop
805
+ KeccakP1600_OverwriteWithZeroes_Bytes:
806
+ addq $8, arg2
807
+ jz KeccakP1600_OverwriteWithZeroes_Exit
808
+ KeccakP1600_OverwriteWithZeroes_BytesLoop:
809
+ movb $0, %al
810
+ xorb (arg5), %al
811
+ movb %al, (arg1)
812
+ addq $1, arg5
813
+ addq $1, arg1
814
+ subq $1, arg2
815
+ jnz KeccakP1600_OverwriteWithZeroes_BytesLoop
816
+ KeccakP1600_OverwriteWithZeroes_Exit:
817
+ retq
818
+
819
+ #----------------------------------------------------------------------------
820
+ #
821
+ # void KeccakP1600_ExtractBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
822
+ #
823
+ .size KeccakP1600_ExtractBytes, .-KeccakP1600_ExtractBytes
824
+ .align 8
825
+ .global KeccakP1600_ExtractBytes
826
+ .type KeccakP1600_ExtractBytes, %function
827
+ KeccakP1600_ExtractBytes:
828
+ addq arg3, arg1
829
+ leaq KeccakLaneComplementTable(%rip), arg5
830
+ addq arg3, arg5
831
+ subq $8, arg4
832
+ jc KeccakP1600_ExtractBytes_Bytes
833
+ KeccakP1600_ExtractBytes_LanesLoop:
834
+ movq (arg1), rT1
835
+ xorq (arg5), rT1
836
+ movq rT1, (arg2)
837
+ addq $8, arg2
838
+ addq $8, arg5
839
+ addq $8, arg1
840
+ subq $8, arg4
841
+ jnc KeccakP1600_ExtractBytes_LanesLoop
842
+ KeccakP1600_ExtractBytes_Bytes:
843
+ addq $8, arg4
844
+ jz KeccakP1600_ExtractBytes_Exit
845
+ KeccakP1600_ExtractBytes_BytesLoop:
846
+ movb (arg1), %al
847
+ xorb (arg5), %al
848
+ movb %al, (arg2)
849
+ addq $1, arg2
850
+ addq $1, arg5
851
+ addq $1, arg1
852
+ subq $1, arg4
853
+ jnz KeccakP1600_ExtractBytes_BytesLoop
854
+ KeccakP1600_ExtractBytes_Exit:
855
+ retq
856
+
857
+ #----------------------------------------------------------------------------
858
+ #
859
+ # void KeccakP1600_ExtractAndAddBytes(void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
860
+ #
861
+ .size KeccakP1600_ExtractAndAddBytes, .-KeccakP1600_ExtractAndAddBytes
862
+ .align 8
863
+ .global KeccakP1600_ExtractAndAddBytes
864
+ .type KeccakP1600_ExtractAndAddBytes, %function
865
+ KeccakP1600_ExtractAndAddBytes:
866
+ addq arg4, arg1
867
+ leaq KeccakLaneComplementTable(%rip), arg6
868
+ addq arg4, arg6
869
+ subq $8, arg5
870
+ jc KeccakP1600_ExtractAndAddBytes_Bytes
871
+ KeccakP1600_ExtractAndAddBytes_LanesLoop:
872
+ movq (arg1), rT1
873
+ xorq (arg6), rT1
874
+ xorq (arg2), rT1
875
+ movq rT1, (arg3)
876
+ addq $8, arg2
877
+ addq $8, arg3
878
+ addq $8, arg6
879
+ addq $8, arg1
880
+ subq $8, arg5
881
+ jnc KeccakP1600_ExtractAndAddBytes_LanesLoop
882
+ KeccakP1600_ExtractAndAddBytes_Bytes:
883
+ addq $8, arg5
884
+ jz KeccakP1600_ExtractAndAddBytes_Exit
885
+ KeccakP1600_ExtractAndAddBytes_BytesLoop:
886
+ movb (arg1), %al
887
+ xorb (arg6), %al
888
+ xorb (arg2), %al
889
+ movb %al, (arg3)
890
+ addq $1, arg2
891
+ addq $1, arg3
892
+ addq $1, arg6
893
+ addq $1, arg1
894
+ subq $1, arg5
895
+ jnz KeccakP1600_ExtractAndAddBytes_BytesLoop
896
+ KeccakP1600_ExtractAndAddBytes_Exit:
897
+ retq
898
+
899
+ #----------------------------------------------------------------------------
900
+ #
901
+ # void KeccakP1600_Permute_Nrounds( void *state, unsigned int nrounds )
902
+ #
903
+ .size KeccakP1600_Permute_Nrounds, .-KeccakP1600_Permute_Nrounds
904
+ .align 8
905
+ .global KeccakP1600_Permute_Nrounds
906
+ .type KeccakP1600_Permute_Nrounds, %function
907
+ KeccakP1600_Permute_Nrounds:
908
+ mPushRegs
909
+ subq $8*25, %rsp
910
+ movq arg2, rT1
911
+
912
+ movq _ba(rpState), rCa
913
+ movq _be(rpState), rCe
914
+ movq _bu(rpState), rCu
915
+
916
+ xorq _ga(rpState), rCa
917
+ xorq _ge(rpState), rCe
918
+ xorq _gu(rpState), rCu
919
+
920
+ xorq _ka(rpState), rCa
921
+ xorq _ke(rpState), rCe
922
+ xorq _ku(rpState), rCu
923
+
924
+ xorq _ma(rpState), rCa
925
+ xorq _me(rpState), rCe
926
+ xorq _mu(rpState), rCu
927
+
928
+ xorq _sa(rpState), rCa
929
+ xorq _se(rpState), rCe
930
+ movq _si(rpState), rDi
931
+ movq _so(rpState), rDo
932
+ xorq _su(rpState), rCu
933
+
934
+ testq $1, rT1
935
+ jz KeccakP1600_Permute_Nrounds_Dispatch
936
+ movq _ba(rpState), rT2a # copy to stack
937
+ movq rT2a, _ba(rpStack)
938
+ movq _be(rpState), rT2a
939
+ movq rT2a, _be(rpStack)
940
+ movq _bi(rpState), rT2a
941
+ movq rT2a, _bi(rpStack)
942
+ movq _bo(rpState), rT2a
943
+ movq rT2a, _bo(rpStack)
944
+ movq _bu(rpState), rT2a
945
+ movq rT2a, _bu(rpStack)
946
+ movq _ga(rpState), rT2a
947
+ movq rT2a, _ga(rpStack)
948
+ movq _ge(rpState), rT2a
949
+ movq rT2a, _ge(rpStack)
950
+ movq _gi(rpState), rT2a
951
+ movq rT2a, _gi(rpStack)
952
+ movq _go(rpState), rT2a
953
+ movq rT2a, _go(rpStack)
954
+ movq _gu(rpState), rT2a
955
+ movq rT2a, _gu(rpStack)
956
+ movq _ka(rpState), rT2a
957
+ movq rT2a, _ka(rpStack)
958
+ movq _ke(rpState), rT2a
959
+ movq rT2a, _ke(rpStack)
960
+ movq _ki(rpState), rT2a
961
+ movq rT2a, _ki(rpStack)
962
+ movq _ko(rpState), rT2a
963
+ movq rT2a, _ko(rpStack)
964
+ movq _ku(rpState), rT2a
965
+ movq rT2a, _ku(rpStack)
966
+ movq _ma(rpState), rT2a
967
+ movq rT2a, _ma(rpStack)
968
+ movq _me(rpState), rT2a
969
+ movq rT2a, _me(rpStack)
970
+ movq _mi(rpState), rT2a
971
+ movq rT2a, _mi(rpStack)
972
+ movq _mo(rpState), rT2a
973
+ movq rT2a, _mo(rpStack)
974
+ movq _mu(rpState), rT2a
975
+ movq rT2a, _mu(rpStack)
976
+ movq _sa(rpState), rT2a
977
+ movq rT2a, _sa(rpStack)
978
+ movq _se(rpState), rT2a
979
+ movq rT2a, _se(rpStack)
980
+ movq _si(rpState), rT2a
981
+ movq rT2a, _si(rpStack)
982
+ movq _so(rpState), rT2a
983
+ movq rT2a, _so(rpStack)
984
+ movq _su(rpState), rT2a
985
+ movq rT2a, _su(rpStack)
986
+ KeccakP1600_Permute_Nrounds_Dispatch:
987
+ shlq $3, rT1
988
+ leaq KeccakP1600_Permute_NroundsTable-8(%rip), rT2a
989
+ jmp *(rT1, rT2a)
990
+
991
+ KeccakP1600_Permute_Nrounds24:
992
+ mKeccakRound rpState, rpStack, 0x0000000000000001, 0
993
+ KeccakP1600_Permute_Nrounds23:
994
+ mKeccakRound rpStack, rpState, 0x0000000000008082, 0
995
+ KeccakP1600_Permute_Nrounds22:
996
+ mKeccakRound rpState, rpStack, 0x800000000000808a, 0
997
+ KeccakP1600_Permute_Nrounds21:
998
+ mKeccakRound rpStack, rpState, 0x8000000080008000, 0
999
+ KeccakP1600_Permute_Nrounds20:
1000
+ mKeccakRound rpState, rpStack, 0x000000000000808b, 0
1001
+ KeccakP1600_Permute_Nrounds19:
1002
+ mKeccakRound rpStack, rpState, 0x0000000080000001, 0
1003
+ KeccakP1600_Permute_Nrounds18:
1004
+ mKeccakRound rpState, rpStack, 0x8000000080008081, 0
1005
+ KeccakP1600_Permute_Nrounds17:
1006
+ mKeccakRound rpStack, rpState, 0x8000000000008009, 0
1007
+ KeccakP1600_Permute_Nrounds16:
1008
+ mKeccakRound rpState, rpStack, 0x000000000000008a, 0
1009
+ KeccakP1600_Permute_Nrounds15:
1010
+ mKeccakRound rpStack, rpState, 0x0000000000000088, 0
1011
+ KeccakP1600_Permute_Nrounds14:
1012
+ mKeccakRound rpState, rpStack, 0x0000000080008009, 0
1013
+ KeccakP1600_Permute_Nrounds13:
1014
+ mKeccakRound rpStack, rpState, 0x000000008000000a, 0
1015
+ KeccakP1600_Permute_Nrounds12:
1016
+ mKeccakRound rpState, rpStack, 0x000000008000808b, 0
1017
+ KeccakP1600_Permute_Nrounds11:
1018
+ mKeccakRound rpStack, rpState, 0x800000000000008b, 0
1019
+ KeccakP1600_Permute_Nrounds10:
1020
+ mKeccakRound rpState, rpStack, 0x8000000000008089, 0
1021
+ KeccakP1600_Permute_Nrounds9:
1022
+ mKeccakRound rpStack, rpState, 0x8000000000008003, 0
1023
+ KeccakP1600_Permute_Nrounds8:
1024
+ mKeccakRound rpState, rpStack, 0x8000000000008002, 0
1025
+ KeccakP1600_Permute_Nrounds7:
1026
+ mKeccakRound rpStack, rpState, 0x8000000000000080, 0
1027
+ KeccakP1600_Permute_Nrounds6:
1028
+ mKeccakRound rpState, rpStack, 0x000000000000800a, 0
1029
+ KeccakP1600_Permute_Nrounds5:
1030
+ mKeccakRound rpStack, rpState, 0x800000008000000a, 0
1031
+ KeccakP1600_Permute_Nrounds4:
1032
+ mKeccakRound rpState, rpStack, 0x8000000080008081, 0
1033
+ KeccakP1600_Permute_Nrounds3:
1034
+ mKeccakRound rpStack, rpState, 0x8000000000008080, 0
1035
+ KeccakP1600_Permute_Nrounds2:
1036
+ mKeccakRound rpState, rpStack, 0x0000000080000001, 0
1037
+ KeccakP1600_Permute_Nrounds1:
1038
+ mKeccakRound rpStack, rpState, 0x8000000080008008, 1
1039
+ addq $8*25, %rsp
1040
+ mPopRegs
1041
+ retq
1042
+
1043
+ KeccakP1600_Permute_NroundsTable:
1044
+ .quad KeccakP1600_Permute_Nrounds1
1045
+ .quad KeccakP1600_Permute_Nrounds2
1046
+ .quad KeccakP1600_Permute_Nrounds3
1047
+ .quad KeccakP1600_Permute_Nrounds4
1048
+ .quad KeccakP1600_Permute_Nrounds5
1049
+ .quad KeccakP1600_Permute_Nrounds6
1050
+ .quad KeccakP1600_Permute_Nrounds7
1051
+ .quad KeccakP1600_Permute_Nrounds8
1052
+ .quad KeccakP1600_Permute_Nrounds9
1053
+ .quad KeccakP1600_Permute_Nrounds10
1054
+ .quad KeccakP1600_Permute_Nrounds11
1055
+ .quad KeccakP1600_Permute_Nrounds12
1056
+ .quad KeccakP1600_Permute_Nrounds13
1057
+ .quad KeccakP1600_Permute_Nrounds14
1058
+ .quad KeccakP1600_Permute_Nrounds15
1059
+ .quad KeccakP1600_Permute_Nrounds16
1060
+ .quad KeccakP1600_Permute_Nrounds17
1061
+ .quad KeccakP1600_Permute_Nrounds18
1062
+ .quad KeccakP1600_Permute_Nrounds19
1063
+ .quad KeccakP1600_Permute_Nrounds20
1064
+ .quad KeccakP1600_Permute_Nrounds21
1065
+ .quad KeccakP1600_Permute_Nrounds22
1066
+ .quad KeccakP1600_Permute_Nrounds23
1067
+ .quad KeccakP1600_Permute_Nrounds24
1068
+
1069
+ #----------------------------------------------------------------------------
1070
+ #
1071
+ # void KeccakP1600_Permute_12rounds( void *state )
1072
+ #
1073
+ .size KeccakP1600_Permute_12rounds, .-KeccakP1600_Permute_12rounds
1074
+ .align 8
1075
+ .global KeccakP1600_Permute_12rounds
1076
+ .type KeccakP1600_Permute_12rounds, %function
1077
+ KeccakP1600_Permute_12rounds:
1078
+ mPushRegs
1079
+ mKeccakPermutation12
1080
+ mPopRegs
1081
+ retq
1082
+
1083
+ #----------------------------------------------------------------------------
1084
+ #
1085
+ # void KeccakP1600_Permute_24rounds( void *state )
1086
+ #
1087
+ .size KeccakP1600_Permute_24rounds, .-KeccakP1600_Permute_24rounds
1088
+ .align 8
1089
+ .global KeccakP1600_Permute_24rounds
1090
+ .type KeccakP1600_Permute_24rounds, %function
1091
+ KeccakP1600_Permute_24rounds:
1092
+ mPushRegs
1093
+ mKeccakPermutation24
1094
+ mPopRegs
1095
+ retq
1096
+
1097
+ #----------------------------------------------------------------------------
1098
+ #
1099
+ # size_t KeccakF1600_FastLoop_Absorb( void *state, unsigned int laneCount, unsigned char *data,
1100
+ # size_t dataByteLen, unsigned char trailingBits )
1101
+ #
1102
+ .size KeccakF1600_FastLoop_Absorb, .-KeccakF1600_FastLoop_Absorb
1103
+ .align 8
1104
+ .global KeccakF1600_FastLoop_Absorb
1105
+ .type KeccakF1600_FastLoop_Absorb, %function
1106
+ KeccakF1600_FastLoop_Absorb:
1107
+ mPushRegs
1108
+ pushq arg3 # save initial data pointer
1109
+ pushq arg5 # save trailingBits
1110
+ shrq $3, arg4 # nbrLanes = dataByteLen / SnP_laneLengthInBytes
1111
+ subq arg2, arg4 # if (nbrLanes >= laneCount)
1112
+ jc KeccakF1600_FastLoop_Absorb_Exit
1113
+ cmpq $21, arg2
1114
+ jnz KeccakF1600_FastLoop_Absorb_VariableLaneCountLoop
1115
+ KeccakF1600_FastLoop_Absorb_Loop21: # Fixed laneCount = 21 (rate = 1344, capacity = 256)
1116
+ movq _ba(arg3), rT1a
1117
+ movq _be(arg3), rT1e
1118
+ movq _bi(arg3), rT1i
1119
+ movq _bo(arg3), rT1o
1120
+ movq _bu(arg3), rT1u
1121
+ movq _ga(arg3), rT2a
1122
+ movq _ge(arg3), rT2e
1123
+ movq _gi(arg3), rT2i
1124
+ movq _go(arg3), rT2o
1125
+ movq _gu(arg3), rT2u
1126
+ xorq rT1a, _ba(arg1)
1127
+ xorq rT1e, _be(arg1)
1128
+ xorq rT1i, _bi(arg1)
1129
+ xorq rT1o, _bo(arg1)
1130
+ xorq rT1u, _bu(arg1)
1131
+ xorq rT2a, _ga(arg1)
1132
+ xorq rT2e, _ge(arg1)
1133
+ xorq rT2i, _gi(arg1)
1134
+ xorq rT2o, _go(arg1)
1135
+ xorq rT2u, _gu(arg1)
1136
+ movq _ka(arg3), rT1a
1137
+ movq _ke(arg3), rT1e
1138
+ movq _ki(arg3), rT1i
1139
+ movq _ko(arg3), rT1o
1140
+ movq _ku(arg3), rT1u
1141
+ movq _ma(arg3), rT2a
1142
+ movq _me(arg3), rT2e
1143
+ movq _mi(arg3), rT2i
1144
+ movq _mo(arg3), rT2o
1145
+ movq _mu(arg3), rT2u
1146
+ xorq rT1a, _ka(arg1)
1147
+ xorq rT1e, _ke(arg1)
1148
+ xorq rT1i, _ki(arg1)
1149
+ xorq rT1o, _ko(arg1)
1150
+ xorq rT1u, _ku(arg1)
1151
+ movq _sa(arg3), rT1a
1152
+ movq (%rsp), rT1e # xor trailingBits
1153
+ xorq rT2a, _ma(arg1)
1154
+ xorq rT2e, _me(arg1)
1155
+ xorq rT2i, _mi(arg1)
1156
+ addq $_se, arg3
1157
+ xorq rT2o, _mo(arg1)
1158
+ xorq rT2u, _mu(arg1)
1159
+ xorq rT1a, _sa(arg1)
1160
+ xorq rT1e, _se(arg1)
1161
+ pushq arg3
1162
+ pushq arg4
1163
+ mKeccakPermutationInlinable24
1164
+ popq arg4
1165
+ popq arg3
1166
+ subq $21, arg4 # while (nbrLanes >= 21)
1167
+ jnc KeccakF1600_FastLoop_Absorb_Loop21
1168
+ KeccakF1600_FastLoop_Absorb_Exit:
1169
+ addq $8, %rsp # free trailingBits
1170
+ popq rT1a # restore initial data pointer
1171
+ subq rT1a, arg3 # processed = data pointer - initial data pointer
1172
+ movq arg3, rT1a
1173
+ mPopRegs
1174
+ retq
1175
+ KeccakF1600_FastLoop_Absorb_VariableLaneCountLoop:
1176
+ pushq arg4
1177
+ pushq arg2
1178
+ pushq arg1
1179
+ movq arg2, arg4 # prepare xor call: length (in bytes)
1180
+ shlq $3, arg4
1181
+ movq arg3, arg2 # data pointer
1182
+ xorq arg3, arg3 # offset = 0
1183
+ callq KeccakP1600_AddBytes@PLT # (void *state, const unsigned char *data, unsigned int offset, unsigned int length)
1184
+ movq arg2, arg3 # updated data pointer
1185
+ movq 24(%rsp), rT1a # xor trailingBits
1186
+ xorq rT1a, (arg1)
1187
+ popq arg1
1188
+ pushq arg3
1189
+ callq KeccakP1600_Permute_24rounds@PLT
1190
+ popq arg3
1191
+ popq arg2
1192
+ popq arg4
1193
+ subq arg2, arg4 # while (nbrLanes >= 21)
1194
+ jnc KeccakF1600_FastLoop_Absorb_VariableLaneCountLoop
1195
+ jmp KeccakF1600_FastLoop_Absorb_Exit
1196
+