sleeping_kangaroo12 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (284) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +127 -0
  3. data/ext/Rakefile +73 -0
  4. data/ext/binding/sleeping_kangaroo12.c +39 -0
  5. data/ext/config/xkcp.build +17 -0
  6. data/ext/xkcp/LICENSE +1 -0
  7. data/ext/xkcp/Makefile +15 -0
  8. data/ext/xkcp/Makefile.build +200 -0
  9. data/ext/xkcp/README.markdown +296 -0
  10. data/ext/xkcp/lib/HighLevel.build +143 -0
  11. data/ext/xkcp/lib/LowLevel.build +757 -0
  12. data/ext/xkcp/lib/common/align.h +33 -0
  13. data/ext/xkcp/lib/common/brg_endian.h +143 -0
  14. data/ext/xkcp/lib/high/KangarooTwelve/KangarooTwelve.c +301 -0
  15. data/ext/xkcp/lib/high/KangarooTwelve/KangarooTwelve.h +97 -0
  16. data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.c +81 -0
  17. data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.h +125 -0
  18. data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.c +48 -0
  19. data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.h +79 -0
  20. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.c +81 -0
  21. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.h +73 -0
  22. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.inc +195 -0
  23. data/ext/xkcp/lib/high/Keccak/KeccakSponge.c +111 -0
  24. data/ext/xkcp/lib/high/Keccak/KeccakSponge.h +76 -0
  25. data/ext/xkcp/lib/high/Keccak/KeccakSponge.inc +314 -0
  26. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.c +61 -0
  27. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.h +67 -0
  28. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.inc +128 -0
  29. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.c +93 -0
  30. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.h +599 -0
  31. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.inc +573 -0
  32. data/ext/xkcp/lib/high/Ketje/Ketjev2.c +87 -0
  33. data/ext/xkcp/lib/high/Ketje/Ketjev2.h +88 -0
  34. data/ext/xkcp/lib/high/Ketje/Ketjev2.inc +274 -0
  35. data/ext/xkcp/lib/high/Keyak/Keyakv2.c +132 -0
  36. data/ext/xkcp/lib/high/Keyak/Keyakv2.h +217 -0
  37. data/ext/xkcp/lib/high/Keyak/Keyakv2.inc +81 -0
  38. data/ext/xkcp/lib/high/Keyak/Motorist.inc +953 -0
  39. data/ext/xkcp/lib/high/Kravatte/Kravatte.c +533 -0
  40. data/ext/xkcp/lib/high/Kravatte/Kravatte.h +115 -0
  41. data/ext/xkcp/lib/high/Kravatte/KravatteModes.c +557 -0
  42. data/ext/xkcp/lib/high/Kravatte/KravatteModes.h +247 -0
  43. data/ext/xkcp/lib/high/Xoodyak/Cyclist.h +66 -0
  44. data/ext/xkcp/lib/high/Xoodyak/Cyclist.inc +336 -0
  45. data/ext/xkcp/lib/high/Xoodyak/Xoodyak-parameters.h +26 -0
  46. data/ext/xkcp/lib/high/Xoodyak/Xoodyak.c +55 -0
  47. data/ext/xkcp/lib/high/Xoodyak/Xoodyak.h +35 -0
  48. data/ext/xkcp/lib/high/Xoofff/Xoofff.c +634 -0
  49. data/ext/xkcp/lib/high/Xoofff/Xoofff.h +147 -0
  50. data/ext/xkcp/lib/high/Xoofff/XoofffModes.c +483 -0
  51. data/ext/xkcp/lib/high/Xoofff/XoofffModes.h +241 -0
  52. data/ext/xkcp/lib/high/common/Phases.h +25 -0
  53. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-SnP.h +41 -0
  54. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-armcc.s +1666 -0
  55. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-gcc.s +1655 -0
  56. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-armcc.s +1268 -0
  57. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-gcc.s +1264 -0
  58. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-armcc.s +1178 -0
  59. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-gcc.s +1175 -0
  60. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-armcc.s +1338 -0
  61. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-gcc.s +1336 -0
  62. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-armcc.s +1343 -0
  63. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-gcc.s +1339 -0
  64. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-SnP.h +42 -0
  65. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-armcc.s +823 -0
  66. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-gcc.s +831 -0
  67. data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-SnP.h +31 -0
  68. data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-armv8a-neon.s +540 -0
  69. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-SnP.h +42 -0
  70. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-compact.s +733 -0
  71. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-fast.s +1121 -0
  72. data/ext/xkcp/lib/low/KeccakP-1600/AVX2/KeccakP-1600-AVX2.s +1100 -0
  73. data/ext/xkcp/lib/low/KeccakP-1600/AVX2/KeccakP-1600-SnP.h +52 -0
  74. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-AVX512.c +623 -0
  75. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-SnP.h +47 -0
  76. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u12/KeccakP-1600-AVX512-config.h +6 -0
  77. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u6/KeccakP-1600-AVX512-config.h +6 -0
  78. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/ua/KeccakP-1600-AVX512-config.h +6 -0
  79. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-AVX512.s +1031 -0
  80. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-SnP.h +53 -0
  81. data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-SnP.h +44 -0
  82. data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-XOP.c +476 -0
  83. data/ext/xkcp/lib/low/KeccakP-1600/XOP/u6/KeccakP-1600-XOP-config.h +6 -0
  84. data/ext/xkcp/lib/low/KeccakP-1600/XOP/ua/KeccakP-1600-XOP-config.h +6 -0
  85. data/ext/xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-64.macros +748 -0
  86. data/ext/xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-unrolling.macros +305 -0
  87. data/ext/xkcp/lib/low/KeccakP-1600/compact/KeccakP-1600-SnP.h +40 -0
  88. data/ext/xkcp/lib/low/KeccakP-1600/compact/KeccakP-1600-compact64.c +420 -0
  89. data/ext/xkcp/lib/low/KeccakP-1600/plain-32bits-inplace/KeccakP-1600-SnP.h +43 -0
  90. data/ext/xkcp/lib/low/KeccakP-1600/plain-32bits-inplace/KeccakP-1600-inplace32BI.c +1163 -0
  91. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/KeccakP-1600-SnP.h +54 -0
  92. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/KeccakP-1600-opt64.c +565 -0
  93. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcu6/KeccakP-1600-opt64-config.h +7 -0
  94. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua/KeccakP-1600-opt64-config.h +7 -0
  95. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua-shld/KeccakP-1600-opt64-config.h +8 -0
  96. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/u6/KeccakP-1600-opt64-config.h +6 -0
  97. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/ua/KeccakP-1600-opt64-config.h +6 -0
  98. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-SnP.h +44 -0
  99. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference.h +23 -0
  100. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference32BI.c +625 -0
  101. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-SnP.h +44 -0
  102. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.c +440 -0
  103. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.h +23 -0
  104. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-SnP.h +42 -0
  105. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas.s +1196 -0
  106. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas_Apple.s +1124 -0
  107. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-shld-gas.s +1196 -0
  108. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-armcc.s +1392 -0
  109. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-gcc.s +1394 -0
  110. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-times2-SnP.h +42 -0
  111. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u12/SIMD512-2-config.h +7 -0
  112. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u4/SIMD512-2-config.h +7 -0
  113. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512ufull/SIMD512-2-config.h +7 -0
  114. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SIMD512.c +850 -0
  115. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SnP.h +51 -0
  116. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SIMD128.c +957 -0
  117. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SnP.h +49 -0
  118. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-u2/SIMD128-config.h +8 -0
  119. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-ua/SIMD128-config.h +8 -0
  120. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-u2/SIMD128-config.h +9 -0
  121. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-ua/SIMD128-config.h +9 -0
  122. data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-SnP.h +45 -0
  123. data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-on1.c +37 -0
  124. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SIMD256.c +1321 -0
  125. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SnP.h +55 -0
  126. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u12/SIMD256-config.h +7 -0
  127. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u6/SIMD256-config.h +7 -0
  128. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/ua/SIMD256-config.h +7 -0
  129. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u12/SIMD512-4-config.h +7 -0
  130. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u4/SIMD512-4-config.h +7 -0
  131. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512ufull/SIMD512-4-config.h +7 -0
  132. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SIMD512.c +881 -0
  133. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SnP.h +51 -0
  134. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-SnP.h +45 -0
  135. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-on1.c +37 -0
  136. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-SnP.h +45 -0
  137. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-on2.c +38 -0
  138. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SIMD512.c +1615 -0
  139. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SnP.h +57 -0
  140. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u12/SIMD512-config.h +7 -0
  141. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u4/SIMD512-config.h +7 -0
  142. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/ua/SIMD512-config.h +7 -0
  143. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-SnP.h +45 -0
  144. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-on1.c +37 -0
  145. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-SnP.h +45 -0
  146. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-on2.c +38 -0
  147. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-SnP.h +45 -0
  148. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-on4.c +38 -0
  149. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-SnP.h +41 -0
  150. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-armcc.s +442 -0
  151. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-gcc.s +446 -0
  152. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-armcc.s +419 -0
  153. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-gcc.s +427 -0
  154. data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-SnP.h +41 -0
  155. data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-avr8-fast.s +647 -0
  156. data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-SnP.h +39 -0
  157. data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-compact.c +190 -0
  158. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-SnP.h +43 -0
  159. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.c +412 -0
  160. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.h +23 -0
  161. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-SnP.h +41 -0
  162. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-armcc.s +454 -0
  163. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-gcc.s +458 -0
  164. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-armcc.s +455 -0
  165. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-gcc.s +458 -0
  166. data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-SnP.h +41 -0
  167. data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-avr8-fast.s +728 -0
  168. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-SnP.h +43 -0
  169. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.c +414 -0
  170. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.h +23 -0
  171. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-SnP.h +42 -0
  172. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-armcc.s +527 -0
  173. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-gcc.s +533 -0
  174. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-armcc.s +528 -0
  175. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-gcc.s +534 -0
  176. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-armcc.s +521 -0
  177. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-gcc.s +527 -0
  178. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-armcc.s +517 -0
  179. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-gcc.s +523 -0
  180. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-armcc.s +550 -0
  181. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-gcc.s +556 -0
  182. data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-SnP.h +32 -0
  183. data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-armv8a-neon.s +432 -0
  184. data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-SnP.h +42 -0
  185. data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-avr8-fast.s +929 -0
  186. data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-SnP.h +40 -0
  187. data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-compact.c +244 -0
  188. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-SnP.h +46 -0
  189. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32-bis.macros +184 -0
  190. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.c +454 -0
  191. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.macros +459 -0
  192. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling-bis.macros +83 -0
  193. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling.macros +88 -0
  194. data/ext/xkcp/lib/low/KeccakP-800/plain/lcu2/KeccakP-800-opt32-config.h +7 -0
  195. data/ext/xkcp/lib/low/KeccakP-800/plain/lcua/KeccakP-800-opt32-config.h +7 -0
  196. data/ext/xkcp/lib/low/KeccakP-800/plain/u2/KeccakP-800-opt32-config.h +7 -0
  197. data/ext/xkcp/lib/low/KeccakP-800/plain/ua/KeccakP-800-opt32-config.h +7 -0
  198. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-SnP.h +44 -0
  199. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.c +437 -0
  200. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.h +23 -0
  201. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/Ket.h +57 -0
  202. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-armcc.s +475 -0
  203. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-gcc.s +480 -0
  204. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-armcc.s +590 -0
  205. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-gcc.s +590 -0
  206. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.c +126 -0
  207. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.h +68 -0
  208. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.inc +174 -0
  209. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.c +80 -0
  210. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.h +68 -0
  211. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.inc +142 -0
  212. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-SnP.h +55 -0
  213. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-armcc.s +1086 -0
  214. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-gcc.s +1092 -0
  215. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-armcc.s +721 -0
  216. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-gcc.s +726 -0
  217. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-armcc.s +723 -0
  218. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-gcc.s +729 -0
  219. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-armcc.s +1164 -0
  220. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-gcc.s +1165 -0
  221. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-armcc.s +562 -0
  222. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-gcc.s +563 -0
  223. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-armcc.s +563 -0
  224. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-gcc.s +565 -0
  225. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-SnP.h +55 -0
  226. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-armcc.s +476 -0
  227. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-gcc.s +485 -0
  228. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-armcc.s +362 -0
  229. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-gcc.s +367 -0
  230. data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-SnP.h +43 -0
  231. data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-avr8-u1.s +1341 -0
  232. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SIMD512.c +581 -0
  233. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SnP.h +58 -0
  234. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodyak-full-block-SIMD512.c +332 -0
  235. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SIMD128.c +329 -0
  236. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SnP.h +53 -0
  237. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodyak-full-block-SIMD128.c +355 -0
  238. data/ext/xkcp/lib/low/Xoodoo/Xoodoo.h +79 -0
  239. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-SnP.h +56 -0
  240. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-optimized.c +399 -0
  241. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodyak-full-blocks.c +127 -0
  242. data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-SnP.h +43 -0
  243. data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-reference.c +253 -0
  244. data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SIMD512.c +1044 -0
  245. data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SnP.h +49 -0
  246. data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-SnP.h +45 -0
  247. data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-on1.c +37 -0
  248. data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-ARMv7A.s +1587 -0
  249. data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-SnP.h +48 -0
  250. data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SIMD512.c +1202 -0
  251. data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SnP.h +48 -0
  252. data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SIMD128.c +484 -0
  253. data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SnP.h +44 -0
  254. data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-SnP.h +45 -0
  255. data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-on1.c +37 -0
  256. data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SIMD256.c +939 -0
  257. data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SnP.h +49 -0
  258. data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SIMD512.c +1216 -0
  259. data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SnP.h +48 -0
  260. data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-SnP.h +45 -0
  261. data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-on1.c +37 -0
  262. data/ext/xkcp/lib/low/common/PlSnP-Fallback.inc +290 -0
  263. data/ext/xkcp/lib/low/common/SnP-Relaned.h +141 -0
  264. data/ext/xkcp/support/Build/ExpandProducts.xsl +79 -0
  265. data/ext/xkcp/support/Build/ToGlobalMakefile.xsl +206 -0
  266. data/ext/xkcp/support/Build/ToOneTarget.xsl +89 -0
  267. data/ext/xkcp/support/Build/ToTargetConfigFile.xsl +37 -0
  268. data/ext/xkcp/support/Build/ToTargetMakefile.xsl +298 -0
  269. data/ext/xkcp/support/Build/ToVCXProj.xsl +198 -0
  270. data/ext/xkcp/support/Kernel-PMU/Kernel-pmu.md +133 -0
  271. data/ext/xkcp/support/Kernel-PMU/Makefile +8 -0
  272. data/ext/xkcp/support/Kernel-PMU/enable_arm_pmu.c +129 -0
  273. data/ext/xkcp/support/Kernel-PMU/load-module +1 -0
  274. data/ext/xkcp/util/KeccakSum/KeccakSum.c +394 -0
  275. data/ext/xkcp/util/KeccakSum/base64.c +86 -0
  276. data/ext/xkcp/util/KeccakSum/base64.h +12 -0
  277. data/lib/sleeping_kangaroo12/binding.rb +15 -0
  278. data/lib/sleeping_kangaroo12/build/loader.rb +40 -0
  279. data/lib/sleeping_kangaroo12/build/platform.rb +37 -0
  280. data/lib/sleeping_kangaroo12/build.rb +4 -0
  281. data/lib/sleeping_kangaroo12/digest.rb +103 -0
  282. data/lib/sleeping_kangaroo12/version.rb +5 -0
  283. data/lib/sleeping_kangaroo12.rb +7 -0
  284. metadata +372 -0
@@ -0,0 +1,1100 @@
1
+ # The eXtended Keccak Code Package (XKCP)
2
+ # https://github.com/XKCP/XKCP
3
+ #
4
+ # Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org>
5
+ # Copyright (c) 2017 Ronny Van Keer
6
+ # All rights reserved.
7
+ #
8
+ # The source code in this file is licensed under the CRYPTOGAMS license.
9
+ # For further details see http://www.openssl.org/~appro/cryptogams/.
10
+ #
11
+ # Notes:
12
+ # The code for the permutation (__KeccakF1600) was generated with
13
+ # Andy Polyakov's keccak1600-avx2.pl from the CRYPTOGAMS project
14
+ # (https://github.com/dot-asm/cryptogams/blob/master/x86_64/keccak1600-avx2.pl).
15
+ # The rest of the code was written by Ronny Van Keer.
16
+ # Adaptations for macOS by Stéphane Léon.
17
+ # Adaptations for mingw-w64 (changes macOS too) by Jorrit Jongma.
18
+
19
+ .text
20
+
21
+ # -----------------------------------------------------------------------------
22
+ #
23
+ # void KeccakP1600_Initialize(void *state);
24
+ #
25
+ .globl KeccakP1600_Initialize
26
+ .globl _KeccakP1600_Initialize
27
+ .ifndef old_gas_syntax
28
+ .type KeccakP1600_Initialize,@function
29
+ .endif
30
+ KeccakP1600_Initialize:
31
+ _KeccakP1600_Initialize:
32
+ .balign 32
33
+ vpxor %ymm0,%ymm0,%ymm0
34
+ vmovdqu %ymm0,0*32(%rdi)
35
+ vmovdqu %ymm0,1*32(%rdi)
36
+ vmovdqu %ymm0,2*32(%rdi)
37
+ vmovdqu %ymm0,3*32(%rdi)
38
+ vmovdqu %ymm0,4*32(%rdi)
39
+ vmovdqu %ymm0,5*32(%rdi)
40
+ movq $0,6*32(%rdi)
41
+ ret
42
+ .ifndef old_gas_syntax
43
+ .size KeccakP1600_Initialize,.-KeccakP1600_Initialize
44
+ .endif
45
+
46
+ # -----------------------------------------------------------------------------
47
+ #
48
+ # void KeccakP1600_AddByte(void *state, unsigned char data, unsigned int offset);
49
+ # %rdi %rsi %rdx
50
+ #
51
+ .globl KeccakP1600_AddByte
52
+ .globl _KeccakP1600_AddByte
53
+ .ifndef old_gas_syntax
54
+ .type KeccakP1600_AddByte,@function
55
+ .endif
56
+ KeccakP1600_AddByte:
57
+ _KeccakP1600_AddByte:
58
+ .balign 32
59
+ mov %rdx, %rax
60
+ and $7, %rax
61
+ and $0xFFFFFFF8, %edx
62
+ lea mapState(%rip), %r9
63
+ mov (%r9, %rdx), %rdx
64
+ add %rdx, %rdi
65
+ add %rax, %rdi
66
+ xorb %sil, (%rdi)
67
+ ret
68
+ .ifndef old_gas_syntax
69
+ .size KeccakP1600_AddByte,.-KeccakP1600_AddByte
70
+ .endif
71
+
72
+ # -----------------------------------------------------------------------------
73
+ #
74
+ # void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
75
+ # %rdi %rsi %rdx %rcx
76
+ #
77
+ .globl KeccakP1600_AddBytes
78
+ .globl _KeccakP1600_AddBytes
79
+ .ifndef old_gas_syntax
80
+ .type KeccakP1600_AddBytes,@function
81
+ .endif
82
+ KeccakP1600_AddBytes:
83
+ _KeccakP1600_AddBytes:
84
+ .balign 32
85
+ cmp $0, %rcx
86
+ jz KeccakP1600_AddBytes_Exit
87
+ mov %rdx, %rax # rax offset in lane
88
+ and $0xFFFFFFF8, %edx # rdx pointer into state index mapper
89
+ lea mapState(%rip), %r9
90
+ add %r9, %rdx
91
+ and $7, %rax
92
+ jz KeccakP1600_AddBytes_LaneAlignedCheck
93
+ mov $8, %r9 # r9 is (max) length of incomplete lane
94
+ sub %rax, %r9
95
+ cmp %rcx, %r9
96
+ cmovae %rcx, %r9
97
+ sub %r9, %rcx # length -= length of incomplete lane
98
+ add (%rdx), %rax # rax = pointer to state lane
99
+ add $8, %rdx
100
+ add %rdi, %rax
101
+ KeccakP1600_AddBytes_NotAlignedLoop:
102
+ mov (%rsi), %r8b
103
+ inc %rsi
104
+ xorb %r8b, (%rax)
105
+ inc %rax
106
+ dec %r9
107
+ jnz KeccakP1600_AddBytes_NotAlignedLoop
108
+ jmp KeccakP1600_AddBytes_LaneAlignedCheck
109
+ KeccakP1600_AddBytes_LaneAlignedLoop:
110
+ mov (%rsi), %r8
111
+ add $8, %rsi
112
+ mov (%rdx), %rax
113
+ add $8, %rdx
114
+ add %rdi, %rax
115
+ xor %r8, (%rax)
116
+ KeccakP1600_AddBytes_LaneAlignedCheck:
117
+ sub $8, %rcx
118
+ jnc KeccakP1600_AddBytes_LaneAlignedLoop
119
+ KeccakP1600_AddBytes_LastIncompleteLane:
120
+ add $8, %rcx
121
+ jz KeccakP1600_AddBytes_Exit
122
+ mov (%rdx), %rax
123
+ add %rdi, %rax
124
+ KeccakP1600_AddBytes_LastIncompleteLaneLoop:
125
+ mov (%rsi), %r8b
126
+ inc %rsi
127
+ xor %r8b, (%rax)
128
+ inc %rax
129
+ dec %rcx
130
+ jnz KeccakP1600_AddBytes_LastIncompleteLaneLoop
131
+ KeccakP1600_AddBytes_Exit:
132
+ ret
133
+ .ifndef old_gas_syntax
134
+ .size KeccakP1600_AddBytes,.-KeccakP1600_AddBytes
135
+ .endif
136
+
137
+ # -----------------------------------------------------------------------------
138
+ #
139
+ # void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
140
+ # %rdi %rsi %rdx %rcx
141
+ #
142
+ .globl KeccakP1600_OverwriteBytes
143
+ .globl _KeccakP1600_OverwriteBytes
144
+ .ifndef old_gas_syntax
145
+ .type KeccakP1600_OverwriteBytes,@function
146
+ .endif
147
+ KeccakP1600_OverwriteBytes:
148
+ _KeccakP1600_OverwriteBytes:
149
+ .balign 32
150
+ cmp $0, %rcx
151
+ jz KeccakP1600_OverwriteBytes_Exit
152
+ mov %rdx, %rax # rax offset in lane
153
+ and $0xFFFFFFF8, %edx # rdx pointer into state index mapper
154
+ lea mapState(%rip), %r9
155
+ add %r9, %rdx
156
+ and $7, %rax
157
+ jz KeccakP1600_OverwriteBytes_LaneAlignedCheck
158
+ mov $8, %r9 # r9 is (max) length of incomplete lane
159
+ sub %rax, %r9
160
+ cmp %rcx, %r9
161
+ cmovae %rcx, %r9
162
+ sub %r9, %rcx # length -= length of incomplete lane
163
+ add (%rdx), %rax # rax = pointer to state lane
164
+ add $8, %rdx
165
+ add %rdi, %rax
166
+ KeccakP1600_OverwriteBytes_NotAlignedLoop:
167
+ mov (%rsi), %r8b
168
+ inc %rsi
169
+ mov %r8b, (%rax)
170
+ inc %rax
171
+ dec %r9
172
+ jnz KeccakP1600_OverwriteBytes_NotAlignedLoop
173
+ jmp KeccakP1600_OverwriteBytes_LaneAlignedCheck
174
+ KeccakP1600_OverwriteBytes_LaneAlignedLoop:
175
+ mov (%rsi), %r8
176
+ add $8, %rsi
177
+ mov (%rdx), %rax
178
+ add $8, %rdx
179
+ add %rdi, %rax
180
+ mov %r8, (%rax)
181
+ KeccakP1600_OverwriteBytes_LaneAlignedCheck:
182
+ sub $8, %rcx
183
+ jnc KeccakP1600_OverwriteBytes_LaneAlignedLoop
184
+ KeccakP1600_OverwriteBytes_LastIncompleteLane:
185
+ add $8, %rcx
186
+ jz KeccakP1600_OverwriteBytes_Exit
187
+ mov (%rdx), %rax
188
+ add %rdi, %rax
189
+ KeccakP1600_OverwriteBytes_LastIncompleteLaneLoop:
190
+ mov (%rsi), %r8b
191
+ inc %rsi
192
+ mov %r8b, (%rax)
193
+ inc %rax
194
+ dec %rcx
195
+ jnz KeccakP1600_OverwriteBytes_LastIncompleteLaneLoop
196
+ KeccakP1600_OverwriteBytes_Exit:
197
+ ret
198
+ .ifndef old_gas_syntax
199
+ .size KeccakP1600_OverwriteBytes,.-KeccakP1600_OverwriteBytes
200
+ .endif
201
+
202
+ # -----------------------------------------------------------------------------
203
+ #
204
+ # void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount);
205
+ # %rdi %rsi
206
+ #
207
+ .globl KeccakP1600_OverwriteWithZeroes
208
+ .globl _KeccakP1600_OverwriteWithZeroes
209
+ .ifndef old_gas_syntax
210
+ .type KeccakP1600_OverwriteWithZeroes,@function
211
+ .endif
212
+ KeccakP1600_OverwriteWithZeroes:
213
+ _KeccakP1600_OverwriteWithZeroes:
214
+ .balign 32
215
+ cmp $0, %rsi
216
+ jz KeccakP1600_OverwriteWithZeroes_Exit
217
+ lea mapState(%rip), %rdx # rdx pointer into state index mapper
218
+ jmp KeccakP1600_OverwriteWithZeroes_LaneAlignedCheck
219
+ KeccakP1600_OverwriteWithZeroes_LaneAlignedLoop:
220
+ mov (%rdx), %rax
221
+ add $8, %rdx
222
+ add %rdi, %rax
223
+ movq $0, (%rax)
224
+ KeccakP1600_OverwriteWithZeroes_LaneAlignedCheck:
225
+ sub $8, %rsi
226
+ jnc KeccakP1600_OverwriteWithZeroes_LaneAlignedLoop
227
+ KeccakP1600_OverwriteWithZeroes_LastIncompleteLane:
228
+ add $8, %rsi
229
+ jz KeccakP1600_OverwriteWithZeroes_Exit
230
+ mov (%rdx), %rax
231
+ add %rdi, %rax
232
+ KeccakP1600_OverwriteWithZeroes_LastIncompleteLaneLoop:
233
+ movb $0, (%rax)
234
+ inc %rax
235
+ dec %rsi
236
+ jnz KeccakP1600_OverwriteWithZeroes_LastIncompleteLaneLoop
237
+ KeccakP1600_OverwriteWithZeroes_Exit:
238
+ ret
239
+ .ifndef old_gas_syntax
240
+ .size KeccakP1600_OverwriteWithZeroes,.-KeccakP1600_OverwriteWithZeroes
241
+ .endif
242
+
243
+ # -----------------------------------------------------------------------------
244
+ #
245
+ # void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length);
246
+ # %rdi %rsi %rdx %rcx
247
+ #
248
+ .globl KeccakP1600_ExtractBytes
249
+ .globl _KeccakP1600_ExtractBytes
250
+ .ifndef old_gas_syntax
251
+ .type KeccakP1600_ExtractBytes,@function
252
+ .endif
253
+ KeccakP1600_ExtractBytes:
254
+ _KeccakP1600_ExtractBytes:
255
+ .balign 32
256
+ push %rbx
257
+ cmp $0, %rcx
258
+ jz KeccakP1600_ExtractBytes_Exit
259
+ mov %rdx, %rax # rax offset in lane
260
+ and $0xFFFFFFF8, %edx # rdx pointer into state index mapper
261
+ lea mapState(%rip), %r9
262
+ add %r9, %rdx
263
+ and $7, %rax
264
+ jz KeccakP1600_ExtractBytes_LaneAlignedCheck
265
+ mov $8, %rbx # rbx is (max) length of incomplete lane
266
+ sub %rax, %rbx
267
+ cmp %rcx, %rbx
268
+ cmovae %rcx, %rbx
269
+ sub %rbx, %rcx # length -= length of incomplete lane
270
+ mov (%rdx), %r9
271
+ add $8, %rdx
272
+ add %rdi, %r9
273
+ add %rax, %r9
274
+ KeccakP1600_ExtractBytes_NotAlignedLoop:
275
+ mov (%r9), %r8b
276
+ inc %r9
277
+ mov %r8b, (%rsi)
278
+ inc %rsi
279
+ dec %rbx
280
+ jnz KeccakP1600_ExtractBytes_NotAlignedLoop
281
+ jmp KeccakP1600_ExtractBytes_LaneAlignedCheck
282
+ KeccakP1600_ExtractBytes_LaneAlignedLoop:
283
+ mov (%rdx), %rax
284
+ add $8, %rdx
285
+ add %rdi, %rax
286
+ mov (%rax), %r8
287
+ mov %r8, (%rsi)
288
+ add $8, %rsi
289
+ KeccakP1600_ExtractBytes_LaneAlignedCheck:
290
+ sub $8, %rcx
291
+ jnc KeccakP1600_ExtractBytes_LaneAlignedLoop
292
+ KeccakP1600_ExtractBytes_LastIncompleteLane:
293
+ add $8, %rcx
294
+ jz KeccakP1600_ExtractBytes_Exit
295
+ mov (%rdx), %rax
296
+ add %rdi, %rax
297
+ mov (%rax), %r8
298
+ KeccakP1600_ExtractBytes_LastIncompleteLaneLoop:
299
+ mov %r8b, (%rsi)
300
+ shr $8, %r8
301
+ inc %rsi
302
+ dec %rcx
303
+ jnz KeccakP1600_ExtractBytes_LastIncompleteLaneLoop
304
+ KeccakP1600_ExtractBytes_Exit:
305
+ pop %rbx
306
+ ret
307
+ .ifndef old_gas_syntax
308
+ .size KeccakP1600_ExtractBytes,.-KeccakP1600_ExtractBytes
309
+ .endif
310
+
311
+ # -----------------------------------------------------------------------------
312
+ #
313
+ # void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length);
314
+ # %rdi %rsi %rdx %rcx %r8
315
+ #
316
+ .globl KeccakP1600_ExtractAndAddBytes
317
+ .globl _KeccakP1600_ExtractAndAddBytes
318
+ .ifndef old_gas_syntax
319
+ .type KeccakP1600_ExtractAndAddBytes,@function
320
+ .endif
321
+ KeccakP1600_ExtractAndAddBytes:
322
+ _KeccakP1600_ExtractAndAddBytes:
323
+ .balign 32
324
+ push %rbx
325
+ push %r10
326
+ cmp $0, %r8
327
+ jz KeccakP1600_ExtractAndAddBytes_Exit
328
+ mov %rcx, %rax # rax offset in lane
329
+ and $0xFFFFFFF8, %ecx # rcx pointer into state index mapper
330
+ lea mapState(%rip), %r9
331
+ add %r9, %rcx
332
+ and $7, %rax
333
+ jz KeccakP1600_ExtractAndAddBytes_LaneAlignedCheck
334
+ mov $8, %rbx # rbx is (max) length of incomplete lane
335
+ sub %rax, %rbx
336
+ cmp %r8, %rbx
337
+ cmovae %r8, %rbx
338
+ sub %rbx, %r8 # length -= length of incomplete lane
339
+ mov (%rcx), %r9
340
+ add $8, %rcx
341
+ add %rdi, %r9
342
+ add %rax, %r9
343
+ KeccakP1600_ExtractAndAddBytes_NotAlignedLoop:
344
+ mov (%r9), %r10b
345
+ inc %r9
346
+ xor (%rsi), %r10b
347
+ inc %rsi
348
+ mov %r10b, (%rdx)
349
+ inc %rdx
350
+ dec %rbx
351
+ jnz KeccakP1600_ExtractAndAddBytes_NotAlignedLoop
352
+ jmp KeccakP1600_ExtractAndAddBytes_LaneAlignedCheck
353
+ KeccakP1600_ExtractAndAddBytes_LaneAlignedLoop:
354
+ mov (%rcx), %rax
355
+ add $8, %rcx
356
+ add %rdi, %rax
357
+ mov (%rax), %r10
358
+ xor (%rsi), %r10
359
+ add $8, %rsi
360
+ mov %r10, (%rdx)
361
+ add $8, %rdx
362
+ KeccakP1600_ExtractAndAddBytes_LaneAlignedCheck:
363
+ sub $8, %r8
364
+ jnc KeccakP1600_ExtractAndAddBytes_LaneAlignedLoop
365
+ KeccakP1600_ExtractAndAddBytes_LastIncompleteLane:
366
+ add $8, %r8
367
+ jz KeccakP1600_ExtractAndAddBytes_Exit
368
+ mov (%rcx), %rax
369
+ add %rdi, %rax
370
+ mov (%rax), %r10
371
+ KeccakP1600_ExtractAndAddBytes_LastIncompleteLaneLoop:
372
+ xor (%rsi), %r10b
373
+ inc %rsi
374
+ mov %r10b, (%rdx)
375
+ inc %rdx
376
+ shr $8, %r10
377
+ dec %r8
378
+ jnz KeccakP1600_ExtractAndAddBytes_LastIncompleteLaneLoop
379
+ KeccakP1600_ExtractAndAddBytes_Exit:
380
+ pop %r10
381
+ pop %rbx
382
+ ret
383
+ .ifndef old_gas_syntax
384
+ .size KeccakP1600_ExtractAndAddBytes,.-KeccakP1600_ExtractAndAddBytes
385
+ .endif
386
+
387
+ # -----------------------------------------------------------------------------
388
+ #
389
+ # internal
390
+ #
391
+ .ifndef old_gas_syntax
392
+ .type __KeccakF1600,@function
393
+ .endif
394
+ .balign 32
395
+ __KeccakF1600:
396
+ .Loop_avx2:
397
+ ######################################### Theta
398
+ vpshufd $0b01001110,%ymm2,%ymm13
399
+ vpxor %ymm3,%ymm5,%ymm12
400
+ vpxor %ymm6,%ymm4,%ymm9
401
+ vpxor %ymm1,%ymm12,%ymm12
402
+ vpxor %ymm9,%ymm12,%ymm12 # C[1..4]
403
+
404
+ vpermq $0b10010011,%ymm12,%ymm11
405
+ vpxor %ymm2,%ymm13,%ymm13
406
+ vpermq $0b01001110,%ymm13,%ymm7
407
+
408
+ vpsrlq $63,%ymm12,%ymm8
409
+ vpaddq %ymm12,%ymm12,%ymm9
410
+ vpor %ymm9,%ymm8,%ymm8 # ROL64(C[1..4],1)
411
+
412
+ vpermq $0b00111001,%ymm8,%ymm15
413
+ vpxor %ymm11,%ymm8,%ymm14
414
+ vpermq $0b00000000,%ymm14,%ymm14 # D[0..0] = ROL64(C[1],1) ^ C[4]
415
+
416
+ vpxor %ymm0,%ymm13,%ymm13
417
+ vpxor %ymm7,%ymm13,%ymm13 # C[0..0]
418
+
419
+ vpsrlq $63,%ymm13,%ymm7
420
+ vpaddq %ymm13,%ymm13,%ymm8
421
+ vpor %ymm7,%ymm8,%ymm8 # ROL64(C[0..0],1)
422
+
423
+ vpxor %ymm14,%ymm2,%ymm2 # ^= D[0..0]
424
+ vpxor %ymm14,%ymm0,%ymm0 # ^= D[0..0]
425
+
426
+ vpblendd $0b11000000,%ymm8,%ymm15,%ymm15
427
+ vpblendd $0b00000011,%ymm13,%ymm11,%ymm11
428
+ vpxor %ymm11,%ymm15,%ymm15 # D[1..4] = ROL64(C[2..4,0),1) ^ C[0..3]
429
+
430
+ ######################################### Rho + Pi + pre-Chi shuffle
431
+ vpsllvq 0*32-96(%r8),%ymm2,%ymm10
432
+ vpsrlvq 0*32-96(%r9),%ymm2,%ymm2
433
+ vpor %ymm10,%ymm2,%ymm2
434
+
435
+ vpxor %ymm15,%ymm3,%ymm3 # ^= D[1..4] from Theta
436
+ vpsllvq 2*32-96(%r8),%ymm3,%ymm11
437
+ vpsrlvq 2*32-96(%r9),%ymm3,%ymm3
438
+ vpor %ymm11,%ymm3,%ymm3
439
+
440
+ vpxor %ymm15,%ymm4,%ymm4 # ^= D[1..4] from Theta
441
+ vpsllvq 3*32-96(%r8),%ymm4,%ymm12
442
+ vpsrlvq 3*32-96(%r9),%ymm4,%ymm4
443
+ vpor %ymm12,%ymm4,%ymm4
444
+
445
+ vpxor %ymm15,%ymm5,%ymm5 # ^= D[1..4] from Theta
446
+ vpsllvq 4*32-96(%r8),%ymm5,%ymm13
447
+ vpsrlvq 4*32-96(%r9),%ymm5,%ymm5
448
+ vpor %ymm13,%ymm5,%ymm5
449
+
450
+ vpxor %ymm15,%ymm6,%ymm6 # ^= D[1..4] from Theta
451
+ vpermq $0b10001101,%ymm2,%ymm10 # %ymm2 -> future %ymm3
452
+ vpermq $0b10001101,%ymm3,%ymm11 # %ymm3 -> future %ymm4
453
+ vpsllvq 5*32-96(%r8),%ymm6,%ymm14
454
+ vpsrlvq 5*32-96(%r9),%ymm6,%ymm8
455
+ vpor %ymm14,%ymm8,%ymm8 # %ymm6 -> future %ymm1
456
+
457
+ vpxor %ymm15,%ymm1,%ymm1 # ^= D[1..4] from Theta
458
+ vpermq $0b00011011,%ymm4,%ymm12 # %ymm4 -> future %ymm5
459
+ vpermq $0b01110010,%ymm5,%ymm13 # %ymm5 -> future %ymm6
460
+ vpsllvq 1*32-96(%r8),%ymm1,%ymm15
461
+ vpsrlvq 1*32-96(%r9),%ymm1,%ymm9
462
+ vpor %ymm15,%ymm9,%ymm9 # %ymm1 -> future %ymm2
463
+
464
+ ######################################### Chi
465
+ vpsrldq $8,%ymm8,%ymm14
466
+ vpandn %ymm14,%ymm8,%ymm7 # tgting [0][0] [0][0] [0][0] [0][0]
467
+
468
+ vpblendd $0b00001100,%ymm13,%ymm9,%ymm3 # [4][4] [2][0]
469
+ vpblendd $0b00001100,%ymm9,%ymm11,%ymm15 # [4][0] [2][1]
470
+ vpblendd $0b00001100,%ymm11,%ymm10,%ymm5 # [4][2] [2][4]
471
+ vpblendd $0b00001100,%ymm10,%ymm9,%ymm14 # [4][3] [2][0]
472
+ vpblendd $0b00110000,%ymm11,%ymm3,%ymm3 # [1][3] [4][4] [2][0]
473
+ vpblendd $0b00110000,%ymm12,%ymm15,%ymm15 # [1][4] [4][0] [2][1]
474
+ vpblendd $0b00110000,%ymm9,%ymm5,%ymm5 # [1][0] [4][2] [2][4]
475
+ vpblendd $0b00110000,%ymm13,%ymm14,%ymm14 # [1][1] [4][3] [2][0]
476
+ vpblendd $0b11000000,%ymm12,%ymm3,%ymm3 # [3][2] [1][3] [4][4] [2][0]
477
+ vpblendd $0b11000000,%ymm13,%ymm15,%ymm15 # [3][3] [1][4] [4][0] [2][1]
478
+ vpblendd $0b11000000,%ymm13,%ymm5,%ymm5 # [3][3] [1][0] [4][2] [2][4]
479
+ vpblendd $0b11000000,%ymm11,%ymm14,%ymm14 # [3][4] [1][1] [4][3] [2][0]
480
+ vpandn %ymm15,%ymm3,%ymm3 # tgting [3][1] [1][2] [4][3] [2][4]
481
+ vpandn %ymm14,%ymm5,%ymm5 # tgting [3][2] [1][4] [4][1] [2][3]
482
+
483
+ vpblendd $0b00001100,%ymm9,%ymm12,%ymm6 # [4][0] [2][3]
484
+ vpblendd $0b00001100,%ymm12,%ymm10,%ymm15 # [4][1] [2][4]
485
+ vpxor %ymm10,%ymm3,%ymm3
486
+ vpblendd $0b00110000,%ymm10,%ymm6,%ymm6 # [1][2] [4][0] [2][3]
487
+ vpblendd $0b00110000,%ymm11,%ymm15,%ymm15 # [1][3] [4][1] [2][4]
488
+ vpxor %ymm12,%ymm5,%ymm5
489
+ vpblendd $0b11000000,%ymm11,%ymm6,%ymm6 # [3][4] [1][2] [4][0] [2][3]
490
+ vpblendd $0b11000000,%ymm9,%ymm15,%ymm15 # [3][0] [1][3] [4][1] [2][4]
491
+ vpandn %ymm15,%ymm6,%ymm6 # tgting [3][3] [1][1] [4][4] [2][2]
492
+ vpxor %ymm13,%ymm6,%ymm6
493
+
494
+ vpermq $0b00011110,%ymm8,%ymm4 # [0][1] [0][2] [0][4] [0][3]
495
+ vpblendd $0b00110000,%ymm0,%ymm4,%ymm15 # [0][1] [0][0] [0][4] [0][3]
496
+ vpermq $0b00111001,%ymm8,%ymm1 # [0][1] [0][4] [0][3] [0][2]
497
+ vpblendd $0b11000000,%ymm0,%ymm1,%ymm1 # [0][0] [0][4] [0][3] [0][2]
498
+ vpandn %ymm15,%ymm1,%ymm1 # tgting [0][4] [0][3] [0][2] [0][1]
499
+
500
+ vpblendd $0b00001100,%ymm12,%ymm11,%ymm2 # [4][1] [2][1]
501
+ vpblendd $0b00001100,%ymm11,%ymm13,%ymm14 # [4][2] [2][2]
502
+ vpblendd $0b00110000,%ymm13,%ymm2,%ymm2 # [1][1] [4][1] [2][1]
503
+ vpblendd $0b00110000,%ymm10,%ymm14,%ymm14 # [1][2] [4][2] [2][2]
504
+ vpblendd $0b11000000,%ymm10,%ymm2,%ymm2 # [3][1] [1][1] [4][1] [2][1]
505
+ vpblendd $0b11000000,%ymm12,%ymm14,%ymm14 # [3][2] [1][2] [4][2] [2][2]
506
+ vpandn %ymm14,%ymm2,%ymm2 # tgting [3][0] [1][0] [4][0] [2][0]
507
+ vpxor %ymm9,%ymm2,%ymm2
508
+
509
+ vpermq $0b00000000,%ymm7,%ymm7 # [0][0] [0][0] [0][0] [0][0]
510
+ vpermq $0b00011011,%ymm3,%ymm3 # post-Chi shuffle
511
+ vpermq $0b10001101,%ymm5,%ymm5
512
+ vpermq $0b01110010,%ymm6,%ymm6
513
+
514
+ vpblendd $0b00001100,%ymm10,%ymm13,%ymm4 # [4][3] [2][2]
515
+ vpblendd $0b00001100,%ymm13,%ymm12,%ymm14 # [4][4] [2][3]
516
+ vpblendd $0b00110000,%ymm12,%ymm4,%ymm4 # [1][4] [4][3] [2][2]
517
+ vpblendd $0b00110000,%ymm9,%ymm14,%ymm14 # [1][0] [4][4] [2][3]
518
+ vpblendd $0b11000000,%ymm9,%ymm4,%ymm4 # [3][0] [1][4] [4][3] [2][2]
519
+ vpblendd $0b11000000,%ymm10,%ymm14,%ymm14 # [3][1] [1][0] [4][4] [2][3]
520
+ vpandn %ymm14,%ymm4,%ymm4 # tgting [3][4] [1][3] [4][2] [2][1]
521
+
522
+ vpxor %ymm7,%ymm0,%ymm0
523
+ vpxor %ymm8,%ymm1,%ymm1
524
+ vpxor %ymm11,%ymm4,%ymm4
525
+
526
+ ######################################### Iota
527
+ vpxor (%r10),%ymm0,%ymm0
528
+ lea 32(%r10),%r10
529
+
530
+ dec %eax
531
+ jnz .Loop_avx2
532
+ ret
533
+ .ifndef old_gas_syntax
534
+ .size __KeccakF1600,.-__KeccakF1600
535
+ .endif
536
+
537
+ # -----------------------------------------------------------------------------
538
+ #
539
+ # void KeccakP1600_Permute_24rounds(void *state);
540
+ # %rdi
541
+ #
542
+ .globl KeccakP1600_Permute_24rounds
543
+ .globl _KeccakP1600_Permute_24rounds
544
+ .ifndef old_gas_syntax
545
+ .type KeccakP1600_Permute_24rounds,@function
546
+ .endif
547
+ KeccakP1600_Permute_24rounds:
548
+ _KeccakP1600_Permute_24rounds:
549
+ .balign 32
550
+ lea rhotates_left+96(%rip),%r8
551
+ lea rhotates_right+96(%rip),%r9
552
+ lea iotas(%rip),%r10
553
+ mov $24,%eax
554
+ lea 96(%rdi),%rdi
555
+ vzeroupper
556
+ vpbroadcastq -96(%rdi),%ymm0 # load A[5][5]
557
+ vmovdqu 8+32*0-96(%rdi),%ymm1
558
+ vmovdqu 8+32*1-96(%rdi),%ymm2
559
+ vmovdqu 8+32*2-96(%rdi),%ymm3
560
+ vmovdqu 8+32*3-96(%rdi),%ymm4
561
+ vmovdqu 8+32*4-96(%rdi),%ymm5
562
+ vmovdqu 8+32*5-96(%rdi),%ymm6
563
+ call __KeccakF1600
564
+ vmovq %xmm0,-96(%rdi)
565
+ vmovdqu %ymm1,8+32*0-96(%rdi)
566
+ vmovdqu %ymm2,8+32*1-96(%rdi)
567
+ vmovdqu %ymm3,8+32*2-96(%rdi)
568
+ vmovdqu %ymm4,8+32*3-96(%rdi)
569
+ vmovdqu %ymm5,8+32*4-96(%rdi)
570
+ vmovdqu %ymm6,8+32*5-96(%rdi)
571
+ vzeroupper
572
+ ret
573
+ .ifndef old_gas_syntax
574
+ .size KeccakP1600_Permute_24rounds,.-KeccakP1600_Permute_24rounds
575
+ .endif
576
+
577
+ # -----------------------------------------------------------------------------
578
+ #
579
+ # void KeccakP1600_Permute_12rounds(void *state);
580
+ # %rdi
581
+ #
582
+ .globl KeccakP1600_Permute_12rounds
583
+ .globl _KeccakP1600_Permute_12rounds
584
+ .ifndef old_gas_syntax
585
+ .type KeccakP1600_Permute_12rounds,@function
586
+ .endif
587
+ KeccakP1600_Permute_12rounds:
588
+ _KeccakP1600_Permute_12rounds:
589
+ .balign 32
590
+ lea rhotates_left+96(%rip),%r8
591
+ lea rhotates_right+96(%rip),%r9
592
+ lea iotas+12*4*8(%rip),%r10
593
+ mov $12,%eax
594
+ lea 96(%rdi),%rdi
595
+ vzeroupper
596
+ vpbroadcastq -96(%rdi),%ymm0 # load A[5][5]
597
+ vmovdqu 8+32*0-96(%rdi),%ymm1
598
+ vmovdqu 8+32*1-96(%rdi),%ymm2
599
+ vmovdqu 8+32*2-96(%rdi),%ymm3
600
+ vmovdqu 8+32*3-96(%rdi),%ymm4
601
+ vmovdqu 8+32*4-96(%rdi),%ymm5
602
+ vmovdqu 8+32*5-96(%rdi),%ymm6
603
+ call __KeccakF1600
604
+ vmovq %xmm0,-96(%rdi)
605
+ vmovdqu %ymm1,8+32*0-96(%rdi)
606
+ vmovdqu %ymm2,8+32*1-96(%rdi)
607
+ vmovdqu %ymm3,8+32*2-96(%rdi)
608
+ vmovdqu %ymm4,8+32*3-96(%rdi)
609
+ vmovdqu %ymm5,8+32*4-96(%rdi)
610
+ vmovdqu %ymm6,8+32*5-96(%rdi)
611
+ vzeroupper
612
+ ret
613
+ .ifndef old_gas_syntax
614
+ .size KeccakP1600_Permute_12rounds,.-KeccakP1600_Permute_12rounds
615
+ .endif
616
+
617
+ # -----------------------------------------------------------------------------
618
+ #
619
+ # void KeccakP1600_Permute_Nrounds(void *state, unsigned int nrounds);
620
+ # %rdi %rsi
621
+ #
622
+ .globl KeccakP1600_Permute_Nrounds
623
+ .globl _KeccakP1600_Permute_Nrounds
624
+ .ifndef old_gas_syntax
625
+ .type KeccakP1600_Permute_Nrounds,@function
626
+ .endif
627
+ KeccakP1600_Permute_Nrounds:
628
+ _KeccakP1600_Permute_Nrounds:
629
+ .balign 32
630
+ lea rhotates_left+96(%rip),%r8
631
+ lea rhotates_right+96(%rip),%r9
632
+ lea iotas+24*4*8(%rip),%r10
633
+ mov %rsi,%rax
634
+ shl $2+3,%rsi
635
+ sub %rsi, %r10
636
+ lea 96(%rdi),%rdi
637
+ vzeroupper
638
+ vpbroadcastq -96(%rdi),%ymm0 # load A[5][5]
639
+ vmovdqu 8+32*0-96(%rdi),%ymm1
640
+ vmovdqu 8+32*1-96(%rdi),%ymm2
641
+ vmovdqu 8+32*2-96(%rdi),%ymm3
642
+ vmovdqu 8+32*3-96(%rdi),%ymm4
643
+ vmovdqu 8+32*4-96(%rdi),%ymm5
644
+ vmovdqu 8+32*5-96(%rdi),%ymm6
645
+ call __KeccakF1600
646
+ vmovq %xmm0,-96(%rdi)
647
+ vmovdqu %ymm1,8+32*0-96(%rdi)
648
+ vmovdqu %ymm2,8+32*1-96(%rdi)
649
+ vmovdqu %ymm3,8+32*2-96(%rdi)
650
+ vmovdqu %ymm4,8+32*3-96(%rdi)
651
+ vmovdqu %ymm5,8+32*4-96(%rdi)
652
+ vmovdqu %ymm6,8+32*5-96(%rdi)
653
+ vzeroupper
654
+ ret
655
+ .ifndef old_gas_syntax
656
+ .size KeccakP1600_Permute_Nrounds,.-KeccakP1600_Permute_Nrounds
657
+ .endif
658
+
659
+ # -----------------------------------------------------------------------------
660
+ #
661
+ # size_t KeccakF1600_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen);
662
+ # %rdi %rsi %rdx %rcx
663
+ #
664
+ .globl KeccakF1600_FastLoop_Absorb
665
+ .globl _KeccakF1600_FastLoop_Absorb
666
+ .ifndef old_gas_syntax
667
+ .type KeccakF1600_FastLoop_Absorb,@function
668
+ .endif
669
+ KeccakF1600_FastLoop_Absorb:
670
+ _KeccakF1600_FastLoop_Absorb:
671
+ .balign 32
672
+ push %rbx
673
+ push %r10
674
+ shr $3, %rcx # rcx = data length in lanes
675
+ mov %rdx, %rbx # rbx = initial data pointer
676
+ cmp %rsi, %rcx
677
+ jb KeccakF1600_FastLoop_Absorb_Exit
678
+ vzeroupper
679
+ cmp $21, %rsi
680
+ jnz KeccakF1600_FastLoop_Absorb_Not21Lanes
681
+ sub $21, %rcx
682
+ lea rhotates_left+96(%rip),%r8
683
+ lea rhotates_right+96(%rip),%r9
684
+ lea 96(%rdi),%rdi
685
+ vpbroadcastq -96(%rdi),%ymm0 # load A[5][5]
686
+ vmovdqu 8+32*0-96(%rdi),%ymm1
687
+ vmovdqu 8+32*1-96(%rdi),%ymm2
688
+ vmovdqu 8+32*2-96(%rdi),%ymm3
689
+ vmovdqu 8+32*3-96(%rdi),%ymm4
690
+ vmovdqu 8+32*4-96(%rdi),%ymm5
691
+ vmovdqu 8+32*5-96(%rdi),%ymm6
692
+ KeccakF1600_FastLoop_Absorb_Loop21Lanes:
693
+ vpbroadcastq (%rdx),%ymm7
694
+ vmovdqu 8(%rdx),%ymm8
695
+
696
+ vmovdqa map2(%rip), %xmm15
697
+ vpcmpeqd %ymm14, %ymm14, %ymm14
698
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm9
699
+
700
+ vmovdqa mask3_21(%rip), %ymm14
701
+ vpxor %ymm10, %ymm10, %ymm10
702
+ vmovdqa map3(%rip), %xmm15
703
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm10
704
+
705
+ vmovdqa mask4_21(%rip), %ymm14
706
+ vpxor %ymm11, %ymm11, %ymm11
707
+ vmovdqa map4(%rip), %xmm15
708
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm11
709
+
710
+ vmovdqa mask5_21(%rip), %ymm14
711
+ vpxor %ymm12, %ymm12, %ymm12
712
+ vmovdqa map5(%rip), %xmm15
713
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm12
714
+
715
+ vmovdqa mask6_21(%rip), %ymm14
716
+ vpxor %ymm13, %ymm13, %ymm13
717
+ vmovdqa map6(%rip), %xmm15
718
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm13
719
+
720
+ vpxor %ymm7,%ymm0,%ymm0
721
+ vpxor %ymm8,%ymm1,%ymm1
722
+ vpxor %ymm9,%ymm2,%ymm2
723
+ vpxor %ymm10,%ymm3,%ymm3
724
+ vpxor %ymm11,%ymm4,%ymm4
725
+ vpxor %ymm12,%ymm5,%ymm5
726
+ vpxor %ymm13,%ymm6,%ymm6
727
+ add $21*8, %rdx
728
+ lea iotas(%rip),%r10
729
+ mov $24,%eax
730
+ call __KeccakF1600
731
+ sub $21, %rcx
732
+ jnc KeccakF1600_FastLoop_Absorb_Loop21Lanes
733
+ KeccakF1600_FastLoop_Absorb_SaveAndExit:
734
+ vmovq %xmm0,-96(%rdi)
735
+ vmovdqu %ymm1,8+32*0-96(%rdi)
736
+ vmovdqu %ymm2,8+32*1-96(%rdi)
737
+ vmovdqu %ymm3,8+32*2-96(%rdi)
738
+ vmovdqu %ymm4,8+32*3-96(%rdi)
739
+ vmovdqu %ymm5,8+32*4-96(%rdi)
740
+ vmovdqu %ymm6,8+32*5-96(%rdi)
741
+ KeccakF1600_FastLoop_Absorb_Exit:
742
+ vzeroupper
743
+ mov %rdx, %rax # return number of bytes processed
744
+ sub %rbx, %rax
745
+ pop %r10
746
+ pop %rbx
747
+ ret
748
+ KeccakF1600_FastLoop_Absorb_Not21Lanes:
749
+ cmp $17, %rsi
750
+ jnz KeccakF1600_FastLoop_Absorb_Not17Lanes
751
+ sub $17, %rcx
752
+ lea rhotates_left+96(%rip),%r8
753
+ lea rhotates_right+96(%rip),%r9
754
+ lea 96(%rdi),%rdi
755
+ vpbroadcastq -96(%rdi),%ymm0 # load A[5][5]
756
+ vmovdqu 8+32*0-96(%rdi),%ymm1
757
+ vmovdqu 8+32*1-96(%rdi),%ymm2
758
+ vmovdqu 8+32*2-96(%rdi),%ymm3
759
+ vmovdqu 8+32*3-96(%rdi),%ymm4
760
+ vmovdqu 8+32*4-96(%rdi),%ymm5
761
+ vmovdqu 8+32*5-96(%rdi),%ymm6
762
+ KeccakF1600_FastLoop_Absorb_Loop17Lanes:
763
+ vpbroadcastq (%rdx),%ymm7
764
+ vmovdqu 8(%rdx),%ymm8
765
+
766
+ vmovdqa mask2_17(%rip), %ymm14
767
+ vpxor %ymm9, %ymm9, %ymm9
768
+ vmovdqa map2(%rip), %xmm15
769
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm9
770
+
771
+ vmovdqa mask3_17(%rip), %ymm14
772
+ vpxor %ymm10, %ymm10, %ymm10
773
+ vmovdqa map3(%rip), %xmm15
774
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm10
775
+
776
+ vmovdqa mask4_17(%rip), %ymm14
777
+ vpxor %ymm11, %ymm11, %ymm11
778
+ vmovdqa map4(%rip), %xmm15
779
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm11
780
+
781
+ vmovdqa mask5_17(%rip), %ymm14
782
+ vpxor %ymm12, %ymm12, %ymm12
783
+ vmovdqa map5(%rip), %xmm15
784
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm12
785
+
786
+ vmovdqa mask6_17(%rip), %ymm14
787
+ vpxor %ymm13, %ymm13, %ymm13
788
+ vmovdqa map6(%rip), %xmm15
789
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm13
790
+
791
+ vpxor %ymm7,%ymm0,%ymm0
792
+ vpxor %ymm8,%ymm1,%ymm1
793
+ vpxor %ymm9,%ymm2,%ymm2
794
+ vpxor %ymm10,%ymm3,%ymm3
795
+ vpxor %ymm11,%ymm4,%ymm4
796
+ vpxor %ymm12,%ymm5,%ymm5
797
+ vpxor %ymm13,%ymm6,%ymm6
798
+ add $17*8, %rdx
799
+ lea iotas(%rip),%r10
800
+ mov $24,%eax
801
+ call __KeccakF1600
802
+ sub $17, %rcx
803
+ jnc KeccakF1600_FastLoop_Absorb_Loop17Lanes
804
+ jmp KeccakF1600_FastLoop_Absorb_SaveAndExit
805
+ KeccakF1600_FastLoop_Absorb_Not17Lanes:
806
+ lea mapState(%rip), %r9
807
+ mov %rsi, %rax
808
+ KeccakF1600_FastLoop_Absorb_LanesAddLoop:
809
+ mov (%rdx), %r8
810
+ add $8, %rdx
811
+ mov (%r9), %r10
812
+ add $8, %r9
813
+ add %rdi, %r10
814
+ xor %r8, (%r10)
815
+ sub $1, %rax
816
+ jnz KeccakF1600_FastLoop_Absorb_LanesAddLoop
817
+ sub %rsi, %rcx
818
+ push %rdi
819
+ push %rsi
820
+ push %rdx
821
+ push %rcx
822
+ .ifdef no_plt
823
+ call KeccakP1600_Permute_24rounds
824
+ .else
825
+ call KeccakP1600_Permute_24rounds@PLT
826
+ .endif
827
+ pop %rcx
828
+ pop %rdx
829
+ pop %rsi
830
+ pop %rdi
831
+ cmp %rsi, %rcx
832
+ jae KeccakF1600_FastLoop_Absorb_Not17Lanes
833
+ jmp KeccakF1600_FastLoop_Absorb_Exit
834
+ .ifndef old_gas_syntax
835
+ .size KeccakF1600_FastLoop_Absorb,.-KeccakF1600_FastLoop_Absorb
836
+ .endif
837
+
838
+ # -----------------------------------------------------------------------------
839
+ #
840
+ # size_t KeccakP1600_12rounds_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen);
841
+ # %rdi %rsi %rdx %rcx
842
+ #
843
+ .globl KeccakP1600_12rounds_FastLoop_Absorb
844
+ .globl _KeccakP1600_12rounds_FastLoop_Absorb
845
+ .ifndef old_gas_syntax
846
+ .type KeccakP1600_12rounds_FastLoop_Absorb,@function
847
+ .endif
848
+ KeccakP1600_12rounds_FastLoop_Absorb:
849
+ _KeccakP1600_12rounds_FastLoop_Absorb:
850
+ .balign 32
851
+ push %rbx
852
+ push %r10
853
+ shr $3, %rcx # rcx = data length in lanes
854
+ mov %rdx, %rbx # rbx = initial data pointer
855
+ cmp %rsi, %rcx
856
+ jb KeccakP1600_12rounds_FastLoop_Absorb_Exit
857
+ vzeroupper
858
+ cmp $21, %rsi
859
+ jnz KeccakP1600_12rounds_FastLoop_Absorb_Not21Lanes
860
+ sub $21, %rcx
861
+ lea rhotates_left+96(%rip),%r8
862
+ lea rhotates_right+96(%rip),%r9
863
+ lea 96(%rdi),%rdi
864
+ vpbroadcastq -96(%rdi),%ymm0 # load A[5][5]
865
+ vmovdqu 8+32*0-96(%rdi),%ymm1
866
+ vmovdqu 8+32*1-96(%rdi),%ymm2
867
+ vmovdqu 8+32*2-96(%rdi),%ymm3
868
+ vmovdqu 8+32*3-96(%rdi),%ymm4
869
+ vmovdqu 8+32*4-96(%rdi),%ymm5
870
+ vmovdqu 8+32*5-96(%rdi),%ymm6
871
+ KeccakP1600_12rounds_FastLoop_Absorb_Loop21Lanes:
872
+ vpbroadcastq (%rdx),%ymm7
873
+ vmovdqu 8(%rdx),%ymm8
874
+
875
+ vmovdqa map2(%rip), %xmm15
876
+ vpcmpeqd %ymm14, %ymm14, %ymm14
877
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm9
878
+
879
+ vmovdqa mask3_21(%rip), %ymm14
880
+ vpxor %ymm10, %ymm10, %ymm10
881
+ vmovdqa map3(%rip), %xmm15
882
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm10
883
+
884
+ vmovdqa mask4_21(%rip), %ymm14
885
+ vpxor %ymm11, %ymm11, %ymm11
886
+ vmovdqa map4(%rip), %xmm15
887
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm11
888
+
889
+ vmovdqa mask5_21(%rip), %ymm14
890
+ vpxor %ymm12, %ymm12, %ymm12
891
+ vmovdqa map5(%rip), %xmm15
892
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm12
893
+
894
+ vmovdqa mask6_21(%rip), %ymm14
895
+ vpxor %ymm13, %ymm13, %ymm13
896
+ vmovdqa map6(%rip), %xmm15
897
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm13
898
+
899
+ vpxor %ymm7,%ymm0,%ymm0
900
+ vpxor %ymm8,%ymm1,%ymm1
901
+ vpxor %ymm9,%ymm2,%ymm2
902
+ vpxor %ymm10,%ymm3,%ymm3
903
+ vpxor %ymm11,%ymm4,%ymm4
904
+ vpxor %ymm12,%ymm5,%ymm5
905
+ vpxor %ymm13,%ymm6,%ymm6
906
+ add $21*8, %rdx
907
+ lea iotas+12*4*8(%rip),%r10
908
+ mov $12,%eax
909
+ call __KeccakF1600
910
+ sub $21, %rcx
911
+ jnc KeccakP1600_12rounds_FastLoop_Absorb_Loop21Lanes
912
+ KeccakP1600_12rounds_FastLoop_Absorb_SaveAndExit:
913
+ vmovq %xmm0,-96(%rdi)
914
+ vmovdqu %ymm1,8+32*0-96(%rdi)
915
+ vmovdqu %ymm2,8+32*1-96(%rdi)
916
+ vmovdqu %ymm3,8+32*2-96(%rdi)
917
+ vmovdqu %ymm4,8+32*3-96(%rdi)
918
+ vmovdqu %ymm5,8+32*4-96(%rdi)
919
+ vmovdqu %ymm6,8+32*5-96(%rdi)
920
+ KeccakP1600_12rounds_FastLoop_Absorb_Exit:
921
+ vzeroupper
922
+ mov %rdx, %rax # return number of bytes processed
923
+ sub %rbx, %rax
924
+ pop %r10
925
+ pop %rbx
926
+ ret
927
+ KeccakP1600_12rounds_FastLoop_Absorb_Not21Lanes:
928
+ cmp $17, %rsi
929
+ jnz KeccakP1600_12rounds_FastLoop_Absorb_Not17Lanes
930
+ sub $17, %rcx
931
+ lea rhotates_left+96(%rip),%r8
932
+ lea rhotates_right+96(%rip),%r9
933
+ lea 96(%rdi),%rdi
934
+ vpbroadcastq -96(%rdi),%ymm0 # load A[5][5]
935
+ vmovdqu 8+32*0-96(%rdi),%ymm1
936
+ vmovdqu 8+32*1-96(%rdi),%ymm2
937
+ vmovdqu 8+32*2-96(%rdi),%ymm3
938
+ vmovdqu 8+32*3-96(%rdi),%ymm4
939
+ vmovdqu 8+32*4-96(%rdi),%ymm5
940
+ vmovdqu 8+32*5-96(%rdi),%ymm6
941
+ KeccakP1600_12rounds_FastLoop_Absorb_Loop17Lanes:
942
+ vpbroadcastq (%rdx),%ymm7
943
+ vmovdqu 8(%rdx),%ymm8
944
+
945
+ vmovdqa mask2_17(%rip), %ymm14
946
+ vpxor %ymm9, %ymm9, %ymm9
947
+ vmovdqa map2(%rip), %xmm15
948
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm9
949
+
950
+ vmovdqa mask3_17(%rip), %ymm14
951
+ vpxor %ymm10, %ymm10, %ymm10
952
+ vmovdqa map3(%rip), %xmm15
953
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm10
954
+
955
+ vmovdqa mask4_17(%rip), %ymm14
956
+ vpxor %ymm11, %ymm11, %ymm11
957
+ vmovdqa map4(%rip), %xmm15
958
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm11
959
+
960
+ vmovdqa mask5_17(%rip), %ymm14
961
+ vpxor %ymm12, %ymm12, %ymm12
962
+ vmovdqa map5(%rip), %xmm15
963
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm12
964
+
965
+ vmovdqa mask6_17(%rip), %ymm14
966
+ vpxor %ymm13, %ymm13, %ymm13
967
+ vmovdqa map6(%rip), %xmm15
968
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm13
969
+
970
+ vpxor %ymm7,%ymm0,%ymm0
971
+ vpxor %ymm8,%ymm1,%ymm1
972
+ vpxor %ymm9,%ymm2,%ymm2
973
+ vpxor %ymm10,%ymm3,%ymm3
974
+ vpxor %ymm11,%ymm4,%ymm4
975
+ vpxor %ymm12,%ymm5,%ymm5
976
+ vpxor %ymm13,%ymm6,%ymm6
977
+ add $17*8, %rdx
978
+ lea iotas+12*4*8(%rip),%r10
979
+ mov $12,%eax
980
+ call __KeccakF1600
981
+ sub $17, %rcx
982
+ jnc KeccakP1600_12rounds_FastLoop_Absorb_Loop17Lanes
983
+ jmp KeccakP1600_12rounds_FastLoop_Absorb_SaveAndExit
984
+ KeccakP1600_12rounds_FastLoop_Absorb_Not17Lanes:
985
+ lea mapState(%rip), %r9
986
+ mov %rsi, %rax
987
+ KeccakP1600_12rounds_FastLoop_Absorb_LanesAddLoop:
988
+ mov (%rdx), %r8
989
+ add $8, %rdx
990
+ mov (%r9), %r10
991
+ add $8, %r9
992
+ add %rdi, %r10
993
+ xor %r8, (%r10)
994
+ sub $1, %rax
995
+ jnz KeccakP1600_12rounds_FastLoop_Absorb_LanesAddLoop
996
+ sub %rsi, %rcx
997
+ push %rdi
998
+ push %rsi
999
+ push %rdx
1000
+ push %rcx
1001
+ .ifdef no_plt
1002
+ call KeccakP1600_Permute_12rounds
1003
+ .else
1004
+ call KeccakP1600_Permute_12rounds@PLT
1005
+ .endif
1006
+ pop %rcx
1007
+ pop %rdx
1008
+ pop %rsi
1009
+ pop %rdi
1010
+ cmp %rsi, %rcx
1011
+ jae KeccakP1600_12rounds_FastLoop_Absorb_Not17Lanes
1012
+ jmp KeccakP1600_12rounds_FastLoop_Absorb_Exit
1013
+ .ifndef old_gas_syntax
1014
+ .size KeccakP1600_12rounds_FastLoop_Absorb,.-KeccakP1600_12rounds_FastLoop_Absorb
1015
+ .endif
1016
+
1017
+ .equ ALLON, 0xFFFFFFFFFFFFFFFF
1018
+
1019
+ .balign 64
1020
+ rhotates_left:
1021
+ .quad 3, 18, 36, 41 # [2][0] [4][0] [1][0] [3][0]
1022
+ .quad 1, 62, 28, 27 # [0][1] [0][2] [0][3] [0][4]
1023
+ .quad 45, 6, 56, 39 # [3][1] [1][2] [4][3] [2][4]
1024
+ .quad 10, 61, 55, 8 # [2][1] [4][2] [1][3] [3][4]
1025
+ .quad 2, 15, 25, 20 # [4][1] [3][2] [2][3] [1][4]
1026
+ .quad 44, 43, 21, 14 # [1][1] [2][2] [3][3] [4][4]
1027
+ rhotates_right:
1028
+ .quad 64-3, 64-18, 64-36, 64-41
1029
+ .quad 64-1, 64-62, 64-28, 64-27
1030
+ .quad 64-45, 64-6, 64-56, 64-39
1031
+ .quad 64-10, 64-61, 64-55, 64-8
1032
+ .quad 64-2, 64-15, 64-25, 64-20
1033
+ .quad 64-44, 64-43, 64-21, 64-14
1034
+ iotas:
1035
+ .quad 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, 0x0000000000000001
1036
+ .quad 0x0000000000008082, 0x0000000000008082, 0x0000000000008082, 0x0000000000008082
1037
+ .quad 0x800000000000808a, 0x800000000000808a, 0x800000000000808a, 0x800000000000808a
1038
+ .quad 0x8000000080008000, 0x8000000080008000, 0x8000000080008000, 0x8000000080008000
1039
+ .quad 0x000000000000808b, 0x000000000000808b, 0x000000000000808b, 0x000000000000808b
1040
+ .quad 0x0000000080000001, 0x0000000080000001, 0x0000000080000001, 0x0000000080000001
1041
+ .quad 0x8000000080008081, 0x8000000080008081, 0x8000000080008081, 0x8000000080008081
1042
+ .quad 0x8000000000008009, 0x8000000000008009, 0x8000000000008009, 0x8000000000008009
1043
+ .quad 0x000000000000008a, 0x000000000000008a, 0x000000000000008a, 0x000000000000008a
1044
+ .quad 0x0000000000000088, 0x0000000000000088, 0x0000000000000088, 0x0000000000000088
1045
+ .quad 0x0000000080008009, 0x0000000080008009, 0x0000000080008009, 0x0000000080008009
1046
+ .quad 0x000000008000000a, 0x000000008000000a, 0x000000008000000a, 0x000000008000000a
1047
+ .quad 0x000000008000808b, 0x000000008000808b, 0x000000008000808b, 0x000000008000808b
1048
+ .quad 0x800000000000008b, 0x800000000000008b, 0x800000000000008b, 0x800000000000008b
1049
+ .quad 0x8000000000008089, 0x8000000000008089, 0x8000000000008089, 0x8000000000008089
1050
+ .quad 0x8000000000008003, 0x8000000000008003, 0x8000000000008003, 0x8000000000008003
1051
+ .quad 0x8000000000008002, 0x8000000000008002, 0x8000000000008002, 0x8000000000008002
1052
+ .quad 0x8000000000000080, 0x8000000000000080, 0x8000000000000080, 0x8000000000000080
1053
+ .quad 0x000000000000800a, 0x000000000000800a, 0x000000000000800a, 0x000000000000800a
1054
+ .quad 0x800000008000000a, 0x800000008000000a, 0x800000008000000a, 0x800000008000000a
1055
+ .quad 0x8000000080008081, 0x8000000080008081, 0x8000000080008081, 0x8000000080008081
1056
+ .quad 0x8000000000008080, 0x8000000000008080, 0x8000000000008080, 0x8000000000008080
1057
+ .quad 0x0000000080000001, 0x0000000080000001, 0x0000000080000001, 0x0000000080000001
1058
+ .quad 0x8000000080008008, 0x8000000080008008, 0x8000000080008008, 0x8000000080008008
1059
+
1060
+ mapState:
1061
+ .quad 0*8, 1*8, 2*8, 3*8, 4*8
1062
+ .quad 7*8, 21*8, 10*8, 15*8, 20*8
1063
+ .quad 5*8, 13*8, 22*8, 19*8, 12*8
1064
+ .quad 8*8, 9*8, 18*8, 23*8, 16*8
1065
+ .quad 6*8, 17*8, 14*8, 11*8, 24*8
1066
+
1067
+ .balign 16
1068
+ map2:
1069
+ .long 10*8, 20*8, 5*8, 15*8
1070
+ map3:
1071
+ .long 16*8, 7*8, 23*8, 14*8
1072
+ map4:
1073
+ .long 11*8, 22*8, 8*8, 19*8
1074
+ map5:
1075
+ .long 21*8, 17*8, 13*8, 9*8
1076
+ map6:
1077
+ .long 6*8, 12*8, 18*8, 24*8
1078
+
1079
+ .balign 32
1080
+ mask3_21:
1081
+ .quad ALLON, ALLON, 0, ALLON
1082
+ mask4_21:
1083
+ .quad ALLON, 0, ALLON, ALLON
1084
+ mask5_21:
1085
+ .quad 0, ALLON, ALLON, ALLON
1086
+ mask6_21:
1087
+ .quad ALLON, ALLON, ALLON, 0
1088
+
1089
+ mask2_17:
1090
+ .quad ALLON, 0, ALLON, ALLON
1091
+ mask3_17:
1092
+ .quad ALLON, ALLON, 0, ALLON
1093
+ mask4_17:
1094
+ .quad ALLON, 0, ALLON, 0
1095
+ mask5_17:
1096
+ .quad 0, 0, ALLON, ALLON
1097
+ mask6_17:
1098
+ .quad ALLON, ALLON, 0, 0
1099
+
1100
+ .asciz "Keccak-1600 for AVX2, CRYPTOGAMS by <appro@openssl.org>"