sleeping_kangaroo12 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (284) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +127 -0
  3. data/ext/Rakefile +73 -0
  4. data/ext/binding/sleeping_kangaroo12.c +39 -0
  5. data/ext/config/xkcp.build +17 -0
  6. data/ext/xkcp/LICENSE +1 -0
  7. data/ext/xkcp/Makefile +15 -0
  8. data/ext/xkcp/Makefile.build +200 -0
  9. data/ext/xkcp/README.markdown +296 -0
  10. data/ext/xkcp/lib/HighLevel.build +143 -0
  11. data/ext/xkcp/lib/LowLevel.build +757 -0
  12. data/ext/xkcp/lib/common/align.h +33 -0
  13. data/ext/xkcp/lib/common/brg_endian.h +143 -0
  14. data/ext/xkcp/lib/high/KangarooTwelve/KangarooTwelve.c +301 -0
  15. data/ext/xkcp/lib/high/KangarooTwelve/KangarooTwelve.h +97 -0
  16. data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.c +81 -0
  17. data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.h +125 -0
  18. data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.c +48 -0
  19. data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.h +79 -0
  20. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.c +81 -0
  21. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.h +73 -0
  22. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.inc +195 -0
  23. data/ext/xkcp/lib/high/Keccak/KeccakSponge.c +111 -0
  24. data/ext/xkcp/lib/high/Keccak/KeccakSponge.h +76 -0
  25. data/ext/xkcp/lib/high/Keccak/KeccakSponge.inc +314 -0
  26. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.c +61 -0
  27. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.h +67 -0
  28. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.inc +128 -0
  29. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.c +93 -0
  30. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.h +599 -0
  31. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.inc +573 -0
  32. data/ext/xkcp/lib/high/Ketje/Ketjev2.c +87 -0
  33. data/ext/xkcp/lib/high/Ketje/Ketjev2.h +88 -0
  34. data/ext/xkcp/lib/high/Ketje/Ketjev2.inc +274 -0
  35. data/ext/xkcp/lib/high/Keyak/Keyakv2.c +132 -0
  36. data/ext/xkcp/lib/high/Keyak/Keyakv2.h +217 -0
  37. data/ext/xkcp/lib/high/Keyak/Keyakv2.inc +81 -0
  38. data/ext/xkcp/lib/high/Keyak/Motorist.inc +953 -0
  39. data/ext/xkcp/lib/high/Kravatte/Kravatte.c +533 -0
  40. data/ext/xkcp/lib/high/Kravatte/Kravatte.h +115 -0
  41. data/ext/xkcp/lib/high/Kravatte/KravatteModes.c +557 -0
  42. data/ext/xkcp/lib/high/Kravatte/KravatteModes.h +247 -0
  43. data/ext/xkcp/lib/high/Xoodyak/Cyclist.h +66 -0
  44. data/ext/xkcp/lib/high/Xoodyak/Cyclist.inc +336 -0
  45. data/ext/xkcp/lib/high/Xoodyak/Xoodyak-parameters.h +26 -0
  46. data/ext/xkcp/lib/high/Xoodyak/Xoodyak.c +55 -0
  47. data/ext/xkcp/lib/high/Xoodyak/Xoodyak.h +35 -0
  48. data/ext/xkcp/lib/high/Xoofff/Xoofff.c +634 -0
  49. data/ext/xkcp/lib/high/Xoofff/Xoofff.h +147 -0
  50. data/ext/xkcp/lib/high/Xoofff/XoofffModes.c +483 -0
  51. data/ext/xkcp/lib/high/Xoofff/XoofffModes.h +241 -0
  52. data/ext/xkcp/lib/high/common/Phases.h +25 -0
  53. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-SnP.h +41 -0
  54. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-armcc.s +1666 -0
  55. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-gcc.s +1655 -0
  56. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-armcc.s +1268 -0
  57. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-gcc.s +1264 -0
  58. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-armcc.s +1178 -0
  59. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-gcc.s +1175 -0
  60. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-armcc.s +1338 -0
  61. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-gcc.s +1336 -0
  62. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-armcc.s +1343 -0
  63. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-gcc.s +1339 -0
  64. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-SnP.h +42 -0
  65. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-armcc.s +823 -0
  66. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-gcc.s +831 -0
  67. data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-SnP.h +31 -0
  68. data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-armv8a-neon.s +540 -0
  69. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-SnP.h +42 -0
  70. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-compact.s +733 -0
  71. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-fast.s +1121 -0
  72. data/ext/xkcp/lib/low/KeccakP-1600/AVX2/KeccakP-1600-AVX2.s +1100 -0
  73. data/ext/xkcp/lib/low/KeccakP-1600/AVX2/KeccakP-1600-SnP.h +52 -0
  74. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-AVX512.c +623 -0
  75. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-SnP.h +47 -0
  76. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u12/KeccakP-1600-AVX512-config.h +6 -0
  77. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u6/KeccakP-1600-AVX512-config.h +6 -0
  78. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/ua/KeccakP-1600-AVX512-config.h +6 -0
  79. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-AVX512.s +1031 -0
  80. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-SnP.h +53 -0
  81. data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-SnP.h +44 -0
  82. data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-XOP.c +476 -0
  83. data/ext/xkcp/lib/low/KeccakP-1600/XOP/u6/KeccakP-1600-XOP-config.h +6 -0
  84. data/ext/xkcp/lib/low/KeccakP-1600/XOP/ua/KeccakP-1600-XOP-config.h +6 -0
  85. data/ext/xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-64.macros +748 -0
  86. data/ext/xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-unrolling.macros +305 -0
  87. data/ext/xkcp/lib/low/KeccakP-1600/compact/KeccakP-1600-SnP.h +40 -0
  88. data/ext/xkcp/lib/low/KeccakP-1600/compact/KeccakP-1600-compact64.c +420 -0
  89. data/ext/xkcp/lib/low/KeccakP-1600/plain-32bits-inplace/KeccakP-1600-SnP.h +43 -0
  90. data/ext/xkcp/lib/low/KeccakP-1600/plain-32bits-inplace/KeccakP-1600-inplace32BI.c +1163 -0
  91. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/KeccakP-1600-SnP.h +54 -0
  92. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/KeccakP-1600-opt64.c +565 -0
  93. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcu6/KeccakP-1600-opt64-config.h +7 -0
  94. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua/KeccakP-1600-opt64-config.h +7 -0
  95. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua-shld/KeccakP-1600-opt64-config.h +8 -0
  96. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/u6/KeccakP-1600-opt64-config.h +6 -0
  97. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/ua/KeccakP-1600-opt64-config.h +6 -0
  98. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-SnP.h +44 -0
  99. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference.h +23 -0
  100. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference32BI.c +625 -0
  101. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-SnP.h +44 -0
  102. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.c +440 -0
  103. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.h +23 -0
  104. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-SnP.h +42 -0
  105. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas.s +1196 -0
  106. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas_Apple.s +1124 -0
  107. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-shld-gas.s +1196 -0
  108. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-armcc.s +1392 -0
  109. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-gcc.s +1394 -0
  110. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-times2-SnP.h +42 -0
  111. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u12/SIMD512-2-config.h +7 -0
  112. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u4/SIMD512-2-config.h +7 -0
  113. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512ufull/SIMD512-2-config.h +7 -0
  114. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SIMD512.c +850 -0
  115. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SnP.h +51 -0
  116. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SIMD128.c +957 -0
  117. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SnP.h +49 -0
  118. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-u2/SIMD128-config.h +8 -0
  119. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-ua/SIMD128-config.h +8 -0
  120. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-u2/SIMD128-config.h +9 -0
  121. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-ua/SIMD128-config.h +9 -0
  122. data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-SnP.h +45 -0
  123. data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-on1.c +37 -0
  124. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SIMD256.c +1321 -0
  125. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SnP.h +55 -0
  126. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u12/SIMD256-config.h +7 -0
  127. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u6/SIMD256-config.h +7 -0
  128. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/ua/SIMD256-config.h +7 -0
  129. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u12/SIMD512-4-config.h +7 -0
  130. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u4/SIMD512-4-config.h +7 -0
  131. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512ufull/SIMD512-4-config.h +7 -0
  132. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SIMD512.c +881 -0
  133. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SnP.h +51 -0
  134. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-SnP.h +45 -0
  135. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-on1.c +37 -0
  136. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-SnP.h +45 -0
  137. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-on2.c +38 -0
  138. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SIMD512.c +1615 -0
  139. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SnP.h +57 -0
  140. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u12/SIMD512-config.h +7 -0
  141. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u4/SIMD512-config.h +7 -0
  142. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/ua/SIMD512-config.h +7 -0
  143. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-SnP.h +45 -0
  144. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-on1.c +37 -0
  145. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-SnP.h +45 -0
  146. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-on2.c +38 -0
  147. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-SnP.h +45 -0
  148. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-on4.c +38 -0
  149. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-SnP.h +41 -0
  150. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-armcc.s +442 -0
  151. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-gcc.s +446 -0
  152. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-armcc.s +419 -0
  153. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-gcc.s +427 -0
  154. data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-SnP.h +41 -0
  155. data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-avr8-fast.s +647 -0
  156. data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-SnP.h +39 -0
  157. data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-compact.c +190 -0
  158. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-SnP.h +43 -0
  159. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.c +412 -0
  160. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.h +23 -0
  161. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-SnP.h +41 -0
  162. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-armcc.s +454 -0
  163. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-gcc.s +458 -0
  164. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-armcc.s +455 -0
  165. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-gcc.s +458 -0
  166. data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-SnP.h +41 -0
  167. data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-avr8-fast.s +728 -0
  168. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-SnP.h +43 -0
  169. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.c +414 -0
  170. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.h +23 -0
  171. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-SnP.h +42 -0
  172. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-armcc.s +527 -0
  173. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-gcc.s +533 -0
  174. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-armcc.s +528 -0
  175. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-gcc.s +534 -0
  176. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-armcc.s +521 -0
  177. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-gcc.s +527 -0
  178. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-armcc.s +517 -0
  179. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-gcc.s +523 -0
  180. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-armcc.s +550 -0
  181. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-gcc.s +556 -0
  182. data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-SnP.h +32 -0
  183. data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-armv8a-neon.s +432 -0
  184. data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-SnP.h +42 -0
  185. data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-avr8-fast.s +929 -0
  186. data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-SnP.h +40 -0
  187. data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-compact.c +244 -0
  188. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-SnP.h +46 -0
  189. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32-bis.macros +184 -0
  190. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.c +454 -0
  191. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.macros +459 -0
  192. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling-bis.macros +83 -0
  193. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling.macros +88 -0
  194. data/ext/xkcp/lib/low/KeccakP-800/plain/lcu2/KeccakP-800-opt32-config.h +7 -0
  195. data/ext/xkcp/lib/low/KeccakP-800/plain/lcua/KeccakP-800-opt32-config.h +7 -0
  196. data/ext/xkcp/lib/low/KeccakP-800/plain/u2/KeccakP-800-opt32-config.h +7 -0
  197. data/ext/xkcp/lib/low/KeccakP-800/plain/ua/KeccakP-800-opt32-config.h +7 -0
  198. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-SnP.h +44 -0
  199. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.c +437 -0
  200. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.h +23 -0
  201. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/Ket.h +57 -0
  202. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-armcc.s +475 -0
  203. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-gcc.s +480 -0
  204. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-armcc.s +590 -0
  205. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-gcc.s +590 -0
  206. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.c +126 -0
  207. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.h +68 -0
  208. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.inc +174 -0
  209. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.c +80 -0
  210. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.h +68 -0
  211. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.inc +142 -0
  212. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-SnP.h +55 -0
  213. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-armcc.s +1086 -0
  214. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-gcc.s +1092 -0
  215. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-armcc.s +721 -0
  216. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-gcc.s +726 -0
  217. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-armcc.s +723 -0
  218. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-gcc.s +729 -0
  219. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-armcc.s +1164 -0
  220. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-gcc.s +1165 -0
  221. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-armcc.s +562 -0
  222. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-gcc.s +563 -0
  223. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-armcc.s +563 -0
  224. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-gcc.s +565 -0
  225. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-SnP.h +55 -0
  226. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-armcc.s +476 -0
  227. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-gcc.s +485 -0
  228. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-armcc.s +362 -0
  229. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-gcc.s +367 -0
  230. data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-SnP.h +43 -0
  231. data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-avr8-u1.s +1341 -0
  232. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SIMD512.c +581 -0
  233. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SnP.h +58 -0
  234. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodyak-full-block-SIMD512.c +332 -0
  235. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SIMD128.c +329 -0
  236. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SnP.h +53 -0
  237. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodyak-full-block-SIMD128.c +355 -0
  238. data/ext/xkcp/lib/low/Xoodoo/Xoodoo.h +79 -0
  239. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-SnP.h +56 -0
  240. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-optimized.c +399 -0
  241. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodyak-full-blocks.c +127 -0
  242. data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-SnP.h +43 -0
  243. data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-reference.c +253 -0
  244. data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SIMD512.c +1044 -0
  245. data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SnP.h +49 -0
  246. data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-SnP.h +45 -0
  247. data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-on1.c +37 -0
  248. data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-ARMv7A.s +1587 -0
  249. data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-SnP.h +48 -0
  250. data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SIMD512.c +1202 -0
  251. data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SnP.h +48 -0
  252. data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SIMD128.c +484 -0
  253. data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SnP.h +44 -0
  254. data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-SnP.h +45 -0
  255. data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-on1.c +37 -0
  256. data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SIMD256.c +939 -0
  257. data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SnP.h +49 -0
  258. data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SIMD512.c +1216 -0
  259. data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SnP.h +48 -0
  260. data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-SnP.h +45 -0
  261. data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-on1.c +37 -0
  262. data/ext/xkcp/lib/low/common/PlSnP-Fallback.inc +290 -0
  263. data/ext/xkcp/lib/low/common/SnP-Relaned.h +141 -0
  264. data/ext/xkcp/support/Build/ExpandProducts.xsl +79 -0
  265. data/ext/xkcp/support/Build/ToGlobalMakefile.xsl +206 -0
  266. data/ext/xkcp/support/Build/ToOneTarget.xsl +89 -0
  267. data/ext/xkcp/support/Build/ToTargetConfigFile.xsl +37 -0
  268. data/ext/xkcp/support/Build/ToTargetMakefile.xsl +298 -0
  269. data/ext/xkcp/support/Build/ToVCXProj.xsl +198 -0
  270. data/ext/xkcp/support/Kernel-PMU/Kernel-pmu.md +133 -0
  271. data/ext/xkcp/support/Kernel-PMU/Makefile +8 -0
  272. data/ext/xkcp/support/Kernel-PMU/enable_arm_pmu.c +129 -0
  273. data/ext/xkcp/support/Kernel-PMU/load-module +1 -0
  274. data/ext/xkcp/util/KeccakSum/KeccakSum.c +394 -0
  275. data/ext/xkcp/util/KeccakSum/base64.c +86 -0
  276. data/ext/xkcp/util/KeccakSum/base64.h +12 -0
  277. data/lib/sleeping_kangaroo12/binding.rb +15 -0
  278. data/lib/sleeping_kangaroo12/build/loader.rb +40 -0
  279. data/lib/sleeping_kangaroo12/build/platform.rb +37 -0
  280. data/lib/sleeping_kangaroo12/build.rb +4 -0
  281. data/lib/sleeping_kangaroo12/digest.rb +103 -0
  282. data/lib/sleeping_kangaroo12/version.rb +5 -0
  283. data/lib/sleeping_kangaroo12.rb +7 -0
  284. metadata +372 -0
@@ -0,0 +1,1031 @@
1
+ # The eXtended Keccak Code Package (XKCP)
2
+ # https://github.com/XKCP/XKCP
3
+ #
4
+ # Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org>
5
+ # Copyright (c) 2018 Ronny Van Keer
6
+ # All rights reserved.
7
+ #
8
+ # The source code in this file is licensed under the CRYPTOGAMS license.
9
+ # For further details see http://www.openssl.org/~appro/cryptogams/.
10
+ #
11
+ # Notes:
12
+ # The code for the permutation (__KeccakF1600) was generated with
13
+ # Andy Polyakov's keccak1600-avx512.pl from the CRYPTOGAMS project
14
+ # (https://github.com/dot-asm/cryptogams/blob/master/x86_64/keccak1600-avx512.pl).
15
+ # The rest of the code was written by Ronny Van Keer.
16
+ # Adaptations for macOS by Stéphane Léon.
17
+ # Adaptations for mingw-w64 (changes macOS too) by Jorrit Jongma.
18
+
19
+ .text
20
+
21
+ # -----------------------------------------------------------------------------
22
+ #
23
+ # void KeccakP1600_Initialize(void *state);
24
+ #
25
+ .globl KeccakP1600_Initialize
26
+ .globl _KeccakP1600_Initialize
27
+ .ifndef old_gas_syntax
28
+ .type KeccakP1600_Initialize,@function
29
+ .endif
30
+ KeccakP1600_Initialize:
31
+ _KeccakP1600_Initialize:
32
+ .balign 32
33
+ vpxorq %zmm0,%zmm0,%zmm0
34
+ vmovdqu64 %zmm0,0*64(%rdi)
35
+ vmovdqu64 %zmm0,1*64(%rdi)
36
+ vmovdqu64 %zmm0,2*64(%rdi)
37
+ movq $0,3*64(%rdi)
38
+ ret
39
+ .ifndef old_gas_syntax
40
+ .size KeccakP1600_Initialize,.-KeccakP1600_Initialize
41
+ .endif
42
+
43
+ # -----------------------------------------------------------------------------
44
+ #
45
+ # void KeccakP1600_AddByte(void *state, unsigned char data, unsigned int offset);
46
+ # %rdi %rsi %rdx
47
+ #!!
48
+ #.globl KeccakP1600_AddByte
49
+ #.type KeccakP1600_AddByte,@function
50
+ #.align 32
51
+ #KeccakP1600_AddByte:
52
+ # mov %rdx, %rax
53
+ # and $7, %rax
54
+ # and $0xFFFFFFF8, %edx
55
+ # mov mapState(%rdx), %rdx
56
+ # add %rdx, %rdi
57
+ # add %rax, %rdi
58
+ # xorb %sil, (%rdi)
59
+ # ret
60
+ #.size KeccakP1600_AddByte,.-KeccakP1600_AddByte
61
+
62
+ # -----------------------------------------------------------------------------
63
+ #
64
+ # void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
65
+ # %rdi %rsi %rdx %rcx
66
+ #
67
+ .globl KeccakP1600_AddBytes
68
+ .globl _KeccakP1600_AddBytes
69
+ .ifndef old_gas_syntax
70
+ .type KeccakP1600_AddBytes,@function
71
+ .endif
72
+ KeccakP1600_AddBytes:
73
+ _KeccakP1600_AddBytes:
74
+ .balign 32
75
+ cmp $0, %rcx
76
+ jz KeccakP1600_AddBytes_Exit
77
+ add %rdx, %rdi # state += offset
78
+ and $7, %rdx
79
+ jz KeccakP1600_AddBytes_LaneAlignedCheck
80
+ mov $8, %r9 # r9 is (max) length of incomplete lane
81
+ sub %rdx, %r9
82
+ cmp %rcx, %r9
83
+ cmovae %rcx, %r9
84
+ sub %r9, %rcx # length -= length of incomplete lane
85
+ KeccakP1600_AddBytes_NotAlignedLoop:
86
+ mov (%rsi), %r8b
87
+ inc %rsi
88
+ xorb %r8b, (%rdi)
89
+ inc %rdi
90
+ dec %r9
91
+ jnz KeccakP1600_AddBytes_NotAlignedLoop
92
+ jmp KeccakP1600_AddBytes_LaneAlignedCheck
93
+ KeccakP1600_AddBytes_LaneAlignedLoop:
94
+ mov (%rsi), %r8
95
+ add $8, %rsi
96
+ xor %r8, (%rdi)
97
+ add $8, %rdi
98
+ KeccakP1600_AddBytes_LaneAlignedCheck:
99
+ sub $8, %rcx
100
+ jnc KeccakP1600_AddBytes_LaneAlignedLoop
101
+ KeccakP1600_AddBytes_LastIncompleteLane:
102
+ add $8, %rcx
103
+ jz KeccakP1600_AddBytes_Exit
104
+ KeccakP1600_AddBytes_LastIncompleteLaneLoop:
105
+ mov (%rsi), %r8b
106
+ inc %rsi
107
+ xor %r8b, (%rdi)
108
+ inc %rdi
109
+ dec %rcx
110
+ jnz KeccakP1600_AddBytes_LastIncompleteLaneLoop
111
+ KeccakP1600_AddBytes_Exit:
112
+ ret
113
+ .ifndef old_gas_syntax
114
+ .size KeccakP1600_AddBytes,.-KeccakP1600_AddBytes
115
+ .endif
116
+
117
+ # -----------------------------------------------------------------------------
118
+ #
119
+ # void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
120
+ # %rdi %rsi %rdx %rcx
121
+ #
122
+ .globl KeccakP1600_OverwriteBytes
123
+ .globl _KeccakP1600_OverwriteBytes
124
+ .ifndef old_gas_syntax
125
+ .type KeccakP1600_OverwriteBytes,@function
126
+ .endif
127
+ KeccakP1600_OverwriteBytes:
128
+ _KeccakP1600_OverwriteBytes:
129
+ .balign 32
130
+ cmp $0, %rcx
131
+ jz KeccakP1600_OverwriteBytes_Exit
132
+ add %rdx, %rdi # state += offset
133
+ and $7, %rdx
134
+ jz KeccakP1600_OverwriteBytes_LaneAlignedCheck
135
+ mov $8, %r9 # r9 is (max) length of incomplete lane
136
+ sub %rdx, %r9
137
+ cmp %rcx, %r9
138
+ cmovae %rcx, %r9
139
+ sub %r9, %rcx # length -= length of incomplete lane
140
+ KeccakP1600_OverwriteBytes_NotAlignedLoop:
141
+ mov (%rsi), %r8b
142
+ inc %rsi
143
+ mov %r8b, (%rdi)
144
+ inc %rdi
145
+ dec %r9
146
+ jnz KeccakP1600_OverwriteBytes_NotAlignedLoop
147
+ jmp KeccakP1600_OverwriteBytes_LaneAlignedCheck
148
+ KeccakP1600_OverwriteBytes_LaneAlignedLoop:
149
+ mov (%rsi), %r8
150
+ add $8, %rsi
151
+ mov %r8, (%rdi)
152
+ add $8, %rdi
153
+ KeccakP1600_OverwriteBytes_LaneAlignedCheck:
154
+ sub $8, %rcx
155
+ jnc KeccakP1600_OverwriteBytes_LaneAlignedLoop
156
+ KeccakP1600_OverwriteBytes_LastIncompleteLane:
157
+ add $8, %rcx
158
+ jz KeccakP1600_OverwriteBytes_Exit
159
+ KeccakP1600_OverwriteBytes_LastIncompleteLaneLoop:
160
+ mov (%rsi), %r8b
161
+ inc %rsi
162
+ mov %r8b, (%rdi)
163
+ inc %rdi
164
+ dec %rcx
165
+ jnz KeccakP1600_OverwriteBytes_LastIncompleteLaneLoop
166
+ KeccakP1600_OverwriteBytes_Exit:
167
+ ret
168
+ .ifndef old_gas_syntax
169
+ .size KeccakP1600_OverwriteBytes,.-KeccakP1600_OverwriteBytes
170
+ .endif
171
+
172
+ # -----------------------------------------------------------------------------
173
+ #
174
+ # void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount);
175
+ # %rdi %rsi
176
+ #
177
+ .globl KeccakP1600_OverwriteWithZeroes
178
+ .globl _KeccakP1600_OverwriteWithZeroes
179
+ .ifndef old_gas_syntax
180
+ .type KeccakP1600_OverwriteWithZeroes,@function
181
+ .endif
182
+ KeccakP1600_OverwriteWithZeroes:
183
+ _KeccakP1600_OverwriteWithZeroes:
184
+ .balign 32
185
+ cmp $0, %rsi
186
+ jz KeccakP1600_OverwriteWithZeroes_Exit
187
+ jmp KeccakP1600_OverwriteWithZeroes_LaneAlignedCheck
188
+ KeccakP1600_OverwriteWithZeroes_LaneAlignedLoop:
189
+ movq $0, (%rdi)
190
+ add $8, %rdi
191
+ KeccakP1600_OverwriteWithZeroes_LaneAlignedCheck:
192
+ sub $8, %rsi
193
+ jnc KeccakP1600_OverwriteWithZeroes_LaneAlignedLoop
194
+ KeccakP1600_OverwriteWithZeroes_LastIncompleteLane:
195
+ add $8, %rsi
196
+ jz KeccakP1600_OverwriteWithZeroes_Exit
197
+ KeccakP1600_OverwriteWithZeroes_LastIncompleteLaneLoop:
198
+ movb $0, (%rdi)
199
+ inc %rdi
200
+ dec %rsi
201
+ jnz KeccakP1600_OverwriteWithZeroes_LastIncompleteLaneLoop
202
+ KeccakP1600_OverwriteWithZeroes_Exit:
203
+ ret
204
+ .ifndef old_gas_syntax
205
+ .size KeccakP1600_OverwriteWithZeroes,.-KeccakP1600_OverwriteWithZeroes
206
+ .endif
207
+
208
+ # -----------------------------------------------------------------------------
209
+ #
210
+ # void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length);
211
+ # %rdi %rsi %rdx %rcx
212
+ #
213
+ .globl KeccakP1600_ExtractBytes
214
+ .globl _KeccakP1600_ExtractBytes
215
+ .ifndef old_gas_syntax
216
+ .type KeccakP1600_ExtractBytes,@function
217
+ .endif
218
+ KeccakP1600_ExtractBytes:
219
+ _KeccakP1600_ExtractBytes:
220
+ .balign 32
221
+ cmp $0, %rcx
222
+ jz KeccakP1600_ExtractBytes_Exit
223
+ add %rdx, %rdi # state += offset
224
+ and $7, %rdx
225
+ jz KeccakP1600_ExtractBytes_LaneAlignedCheck
226
+ mov $8, %rax # rax is (max) length of incomplete lane
227
+ sub %rdx, %rax
228
+ cmp %rcx, %rax
229
+ cmovae %rcx, %rax
230
+ sub %rax, %rcx # length -= length of incomplete lane
231
+ KeccakP1600_ExtractBytes_NotAlignedLoop:
232
+ mov (%rdi), %r8b
233
+ inc %rdi
234
+ mov %r8b, (%rsi)
235
+ inc %rsi
236
+ dec %rax
237
+ jnz KeccakP1600_ExtractBytes_NotAlignedLoop
238
+ jmp KeccakP1600_ExtractBytes_LaneAlignedCheck
239
+ KeccakP1600_ExtractBytes_LaneAlignedLoop:
240
+ mov (%rdi), %r8
241
+ add $8, %rdi
242
+ mov %r8, (%rsi)
243
+ add $8, %rsi
244
+ KeccakP1600_ExtractBytes_LaneAlignedCheck:
245
+ sub $8, %rcx
246
+ jnc KeccakP1600_ExtractBytes_LaneAlignedLoop
247
+ KeccakP1600_ExtractBytes_LastIncompleteLane:
248
+ add $8, %rcx
249
+ jz KeccakP1600_ExtractBytes_Exit
250
+ mov (%rdi), %r8
251
+ KeccakP1600_ExtractBytes_LastIncompleteLaneLoop:
252
+ mov %r8b, (%rsi)
253
+ shr $8, %r8
254
+ inc %rsi
255
+ dec %rcx
256
+ jnz KeccakP1600_ExtractBytes_LastIncompleteLaneLoop
257
+ KeccakP1600_ExtractBytes_Exit:
258
+ ret
259
+ .ifndef old_gas_syntax
260
+ .size KeccakP1600_ExtractBytes,.-KeccakP1600_ExtractBytes
261
+ .endif
262
+
263
+ # -----------------------------------------------------------------------------
264
+ #
265
+ # void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length);
266
+ # %rdi %rsi %rdx %rcx %r8
267
+ #
268
+ .globl KeccakP1600_ExtractAndAddBytes
269
+ .globl _KeccakP1600_ExtractAndAddBytes
270
+ .ifndef old_gas_syntax
271
+ .type KeccakP1600_ExtractAndAddBytes,@function
272
+ .endif
273
+ KeccakP1600_ExtractAndAddBytes:
274
+ _KeccakP1600_ExtractAndAddBytes:
275
+ .balign 32
276
+ push %rbx
277
+ cmp $0, %r8
278
+ jz KeccakP1600_ExtractAndAddBytes_Exit
279
+ add %rcx, %rdi # state += offset
280
+ and $7, %rcx
281
+ jz KeccakP1600_ExtractAndAddBytes_LaneAlignedCheck
282
+ mov $8, %rbx # rbx is (max) length of incomplete lane
283
+ sub %rcx, %rbx
284
+ cmp %r8, %rbx
285
+ cmovae %r8, %rbx
286
+ sub %rbx, %r8 # length -= length of incomplete lane
287
+ KeccakP1600_ExtractAndAddBytes_NotAlignedLoop:
288
+ mov (%rdi), %r9b
289
+ inc %rdi
290
+ xor (%rsi), %r9b
291
+ inc %rsi
292
+ mov %r9b, (%rdx)
293
+ inc %rdx
294
+ dec %rbx
295
+ jnz KeccakP1600_ExtractAndAddBytes_NotAlignedLoop
296
+ jmp KeccakP1600_ExtractAndAddBytes_LaneAlignedCheck
297
+ KeccakP1600_ExtractAndAddBytes_LaneAlignedLoop:
298
+ mov (%rdi), %r9
299
+ add $8, %rdi
300
+ xor (%rsi), %r9
301
+ add $8, %rsi
302
+ mov %r9, (%rdx)
303
+ add $8, %rdx
304
+ KeccakP1600_ExtractAndAddBytes_LaneAlignedCheck:
305
+ sub $8, %r8
306
+ jnc KeccakP1600_ExtractAndAddBytes_LaneAlignedLoop
307
+ KeccakP1600_ExtractAndAddBytes_LastIncompleteLane:
308
+ add $8, %r8
309
+ jz KeccakP1600_ExtractAndAddBytes_Exit
310
+ mov (%rdi), %r9
311
+ KeccakP1600_ExtractAndAddBytes_LastIncompleteLaneLoop:
312
+ xor (%rsi), %r9b
313
+ inc %rsi
314
+ mov %r9b, (%rdx)
315
+ inc %rdx
316
+ shr $8, %r9
317
+ dec %r8
318
+ jnz KeccakP1600_ExtractAndAddBytes_LastIncompleteLaneLoop
319
+ KeccakP1600_ExtractAndAddBytes_Exit:
320
+ pop %rbx
321
+ ret
322
+ .ifndef old_gas_syntax
323
+ .size KeccakP1600_ExtractAndAddBytes,.-KeccakP1600_ExtractAndAddBytes
324
+ .endif
325
+
326
+ # -----------------------------------------------------------------------------
327
+ #
328
+ # internal
329
+ #
330
+ .text
331
+ .ifndef old_gas_syntax
332
+ .type __KeccakF1600,@function
333
+ .endif
334
+ .balign 32
335
+ __KeccakF1600:
336
+ .Loop_avx512:
337
+ ######################################### Theta, even round
338
+ vmovdqa64 %zmm0,%zmm5 # put aside original A00
339
+ vpternlogq $0x96,%zmm2,%zmm1,%zmm0 # and use it as "C00"
340
+ vpternlogq $0x96,%zmm4,%zmm3,%zmm0
341
+ vprolq $1,%zmm0,%zmm6
342
+ vpermq %zmm0,%zmm13,%zmm0
343
+ vpermq %zmm6,%zmm16,%zmm6
344
+ vpternlogq $0x96,%zmm0,%zmm6,%zmm5 # T[0] is original A00
345
+ vpternlogq $0x96,%zmm0,%zmm6,%zmm1
346
+ vpternlogq $0x96,%zmm0,%zmm6,%zmm2
347
+ vpternlogq $0x96,%zmm0,%zmm6,%zmm3
348
+ vpternlogq $0x96,%zmm0,%zmm6,%zmm4
349
+ ######################################### Rho
350
+ vprolvq %zmm22,%zmm5,%zmm0 # T[0] is original A00
351
+ vprolvq %zmm23,%zmm1,%zmm1
352
+ vprolvq %zmm24,%zmm2,%zmm2
353
+ vprolvq %zmm25,%zmm3,%zmm3
354
+ vprolvq %zmm26,%zmm4,%zmm4
355
+ ######################################### Pi
356
+ vpermq %zmm0,%zmm17,%zmm0
357
+ vpermq %zmm1,%zmm18,%zmm1
358
+ vpermq %zmm2,%zmm19,%zmm2
359
+ vpermq %zmm3,%zmm20,%zmm3
360
+ vpermq %zmm4,%zmm21,%zmm4
361
+ ######################################### Chi
362
+ vmovdqa64 %zmm0,%zmm5
363
+ vmovdqa64 %zmm1,%zmm6
364
+ vpternlogq $0xD2,%zmm2,%zmm1,%zmm0
365
+ vpternlogq $0xD2,%zmm3,%zmm2,%zmm1
366
+ vpternlogq $0xD2,%zmm4,%zmm3,%zmm2
367
+ vpternlogq $0xD2,%zmm5,%zmm4,%zmm3
368
+ vpternlogq $0xD2,%zmm6,%zmm5,%zmm4
369
+ ######################################### Iota
370
+ vpxorq (%r10),%zmm0,%zmm0{%k1}
371
+ lea 16(%r10),%r10
372
+ ######################################### Harmonize rounds
373
+ vpblendmq %zmm2,%zmm1,%zmm6{%k2}
374
+ vpblendmq %zmm3,%zmm2,%zmm7{%k2}
375
+ vpblendmq %zmm4,%zmm3,%zmm8{%k2}
376
+ vpblendmq %zmm1,%zmm0,%zmm5{%k2}
377
+ vpblendmq %zmm0,%zmm4,%zmm9{%k2}
378
+ vpblendmq %zmm3,%zmm6,%zmm6{%k3}
379
+ vpblendmq %zmm4,%zmm7,%zmm7{%k3}
380
+ vpblendmq %zmm2,%zmm5,%zmm5{%k3}
381
+ vpblendmq %zmm0,%zmm8,%zmm8{%k3}
382
+ vpblendmq %zmm1,%zmm9,%zmm9{%k3}
383
+ vpblendmq %zmm4,%zmm6,%zmm6{%k4}
384
+ vpblendmq %zmm3,%zmm5,%zmm5{%k4}
385
+ vpblendmq %zmm0,%zmm7,%zmm7{%k4}
386
+ vpblendmq %zmm1,%zmm8,%zmm8{%k4}
387
+ vpblendmq %zmm2,%zmm9,%zmm9{%k4}
388
+ vpblendmq %zmm4,%zmm5,%zmm5{%k5}
389
+ vpblendmq %zmm0,%zmm6,%zmm6{%k5}
390
+ vpblendmq %zmm1,%zmm7,%zmm7{%k5}
391
+ vpblendmq %zmm2,%zmm8,%zmm8{%k5}
392
+ vpblendmq %zmm3,%zmm9,%zmm9{%k5}
393
+ #vpermq %zmm5,%zmm33,%zmm0 # doesn't actually change order
394
+ vpermq %zmm6,%zmm13,%zmm1
395
+ vpermq %zmm7,%zmm14,%zmm2
396
+ vpermq %zmm8,%zmm15,%zmm3
397
+ vpermq %zmm9,%zmm16,%zmm4
398
+ ######################################### Theta, odd round
399
+ vmovdqa64 %zmm5,%zmm0 # real A00
400
+ vpternlogq $0x96,%zmm2,%zmm1,%zmm5 # C00 is %zmm5's alias
401
+ vpternlogq $0x96,%zmm4,%zmm3,%zmm5
402
+ vprolq $1,%zmm5,%zmm6
403
+ vpermq %zmm5,%zmm13,%zmm5
404
+ vpermq %zmm6,%zmm16,%zmm6
405
+ vpternlogq $0x96,%zmm5,%zmm6,%zmm0
406
+ vpternlogq $0x96,%zmm5,%zmm6,%zmm3
407
+ vpternlogq $0x96,%zmm5,%zmm6,%zmm1
408
+ vpternlogq $0x96,%zmm5,%zmm6,%zmm4
409
+ vpternlogq $0x96,%zmm5,%zmm6,%zmm2
410
+ ######################################### Rho
411
+ vprolvq %zmm27,%zmm0,%zmm0
412
+ vprolvq %zmm30,%zmm3,%zmm6
413
+ vprolvq %zmm28,%zmm1,%zmm7
414
+ vprolvq %zmm31,%zmm4,%zmm8
415
+ vprolvq %zmm29,%zmm2,%zmm9
416
+ vpermq %zmm0,%zmm16,%zmm10
417
+ vpermq %zmm0,%zmm15,%zmm11
418
+ ######################################### Iota
419
+ vpxorq -8(%r10),%zmm0,%zmm0{%k1}
420
+ ######################################### Pi
421
+ vpermq %zmm6,%zmm14,%zmm1
422
+ vpermq %zmm7,%zmm16,%zmm2
423
+ vpermq %zmm8,%zmm13,%zmm3
424
+ vpermq %zmm9,%zmm15,%zmm4
425
+ ######################################### Chi
426
+ vpternlogq $0xD2,%zmm11,%zmm10,%zmm0
427
+ vpermq %zmm6,%zmm13,%zmm12
428
+ #vpermq %zmm6,%zmm33,%zmm6
429
+ vpternlogq $0xD2,%zmm6,%zmm12,%zmm1
430
+ vpermq %zmm7,%zmm15,%zmm5
431
+ vpermq %zmm7,%zmm14,%zmm7
432
+ vpternlogq $0xD2,%zmm7,%zmm5,%zmm2
433
+ #vpermq %zmm8,%zmm33,%zmm8
434
+ vpermq %zmm8,%zmm16,%zmm6
435
+ vpternlogq $0xD2,%zmm6,%zmm8,%zmm3
436
+ vpermq %zmm9,%zmm14,%zmm5
437
+ vpermq %zmm9,%zmm13,%zmm9
438
+ vpternlogq $0xD2,%zmm9,%zmm5,%zmm4
439
+ dec %eax
440
+ jnz .Loop_avx512
441
+ ret
442
+ .ifndef old_gas_syntax
443
+ .size __KeccakF1600,.-__KeccakF1600
444
+ .endif
445
+
446
+ # -----------------------------------------------------------------------------
447
+ #
448
+ # void KeccakP1600_Permute_24rounds(void *state);
449
+ # %rdi
450
+ #
451
+ .globl KeccakP1600_Permute_24rounds
452
+ .globl _KeccakP1600_Permute_24rounds
453
+ .ifndef old_gas_syntax
454
+ .type KeccakP1600_Permute_24rounds,@function
455
+ .endif
456
+ KeccakP1600_Permute_24rounds:
457
+ _KeccakP1600_Permute_24rounds:
458
+ .balign 32
459
+ lea 96(%rdi),%rdi
460
+ lea theta_perm(%rip),%r8
461
+ kxnorw %k6,%k6,%k6
462
+ kshiftrw $15,%k6,%k1
463
+ kshiftrw $11,%k6,%k6
464
+ kshiftlw $1,%k1,%k2
465
+ kshiftlw $2,%k1,%k3
466
+ kshiftlw $3,%k1,%k4
467
+ kshiftlw $4,%k1,%k5
468
+ #vmovdqa64 64*0(%r8),%zmm33
469
+ vmovdqa64 64*1(%r8),%zmm13
470
+ vmovdqa64 64*2(%r8),%zmm14
471
+ vmovdqa64 64*3(%r8),%zmm15
472
+ vmovdqa64 64*4(%r8),%zmm16
473
+ vmovdqa64 64*5(%r8),%zmm27
474
+ vmovdqa64 64*6(%r8),%zmm28
475
+ vmovdqa64 64*7(%r8),%zmm29
476
+ vmovdqa64 64*8(%r8),%zmm30
477
+ vmovdqa64 64*9(%r8),%zmm31
478
+ vmovdqa64 64*10(%r8),%zmm22
479
+ vmovdqa64 64*11(%r8),%zmm23
480
+ vmovdqa64 64*12(%r8),%zmm24
481
+ vmovdqa64 64*13(%r8),%zmm25
482
+ vmovdqa64 64*14(%r8),%zmm26
483
+ vmovdqa64 64*15(%r8),%zmm17
484
+ vmovdqa64 64*16(%r8),%zmm18
485
+ vmovdqa64 64*17(%r8),%zmm19
486
+ vmovdqa64 64*18(%r8),%zmm20
487
+ vmovdqa64 64*19(%r8),%zmm21
488
+ vmovdqu64 40*0-96(%rdi),%zmm0{%k6}{z}
489
+ # vpxorq %zmm5,%zmm5,%zmm5
490
+ vmovdqu64 40*1-96(%rdi),%zmm1{%k6}{z}
491
+ vmovdqu64 40*2-96(%rdi),%zmm2{%k6}{z}
492
+ vmovdqu64 40*3-96(%rdi),%zmm3{%k6}{z}
493
+ vmovdqu64 40*4-96(%rdi),%zmm4{%k6}{z}
494
+ lea iotas(%rip), %r10
495
+ mov $24/2, %eax
496
+ call __KeccakF1600
497
+ vmovdqu64 %zmm0,40*0-96(%rdi){%k6}
498
+ vmovdqu64 %zmm1,40*1-96(%rdi){%k6}
499
+ vmovdqu64 %zmm2,40*2-96(%rdi){%k6}
500
+ vmovdqu64 %zmm3,40*3-96(%rdi){%k6}
501
+ vmovdqu64 %zmm4,40*4-96(%rdi){%k6}
502
+ vzeroupper
503
+ ret
504
+ .ifndef old_gas_syntax
505
+ .size KeccakP1600_Permute_24rounds,.-KeccakP1600_Permute_24rounds
506
+ .endif
507
+
508
+ # -----------------------------------------------------------------------------
509
+ #
510
+ # void KeccakP1600_Permute_12rounds(void *state);
511
+ # %rdi
512
+ #
513
+ .globl KeccakP1600_Permute_12rounds
514
+ .globl _KeccakP1600_Permute_12rounds
515
+ .ifndef old_gas_syntax
516
+ .type KeccakP1600_Permute_12rounds,@function
517
+ .endif
518
+ KeccakP1600_Permute_12rounds:
519
+ _KeccakP1600_Permute_12rounds:
520
+ .balign 32
521
+ lea 96(%rdi),%rdi
522
+ lea theta_perm(%rip),%r8
523
+ kxnorw %k6,%k6,%k6
524
+ kshiftrw $15,%k6,%k1
525
+ kshiftrw $11,%k6,%k6
526
+ kshiftlw $1,%k1,%k2
527
+ kshiftlw $2,%k1,%k3
528
+ kshiftlw $3,%k1,%k4
529
+ kshiftlw $4,%k1,%k5
530
+ #vmovdqa64 64*0(%r8),%zmm33
531
+ vmovdqa64 64*1(%r8),%zmm13
532
+ vmovdqa64 64*2(%r8),%zmm14
533
+ vmovdqa64 64*3(%r8),%zmm15
534
+ vmovdqa64 64*4(%r8),%zmm16
535
+ vmovdqa64 64*5(%r8),%zmm27
536
+ vmovdqa64 64*6(%r8),%zmm28
537
+ vmovdqa64 64*7(%r8),%zmm29
538
+ vmovdqa64 64*8(%r8),%zmm30
539
+ vmovdqa64 64*9(%r8),%zmm31
540
+ vmovdqa64 64*10(%r8),%zmm22
541
+ vmovdqa64 64*11(%r8),%zmm23
542
+ vmovdqa64 64*12(%r8),%zmm24
543
+ vmovdqa64 64*13(%r8),%zmm25
544
+ vmovdqa64 64*14(%r8),%zmm26
545
+ vmovdqa64 64*15(%r8),%zmm17
546
+ vmovdqa64 64*16(%r8),%zmm18
547
+ vmovdqa64 64*17(%r8),%zmm19
548
+ vmovdqa64 64*18(%r8),%zmm20
549
+ vmovdqa64 64*19(%r8),%zmm21
550
+ vmovdqu64 40*0-96(%rdi),%zmm0{%k6}{z}
551
+ # vpxorq %zmm5,%zmm5,%zmm5
552
+ vmovdqu64 40*1-96(%rdi),%zmm1{%k6}{z}
553
+ vmovdqu64 40*2-96(%rdi),%zmm2{%k6}{z}
554
+ vmovdqu64 40*3-96(%rdi),%zmm3{%k6}{z}
555
+ vmovdqu64 40*4-96(%rdi),%zmm4{%k6}{z}
556
+ lea iotas+12*8(%rip), %r10
557
+ mov $12/2, %eax
558
+ call __KeccakF1600
559
+ vmovdqu64 %zmm0,40*0-96(%rdi){%k6}
560
+ vmovdqu64 %zmm1,40*1-96(%rdi){%k6}
561
+ vmovdqu64 %zmm2,40*2-96(%rdi){%k6}
562
+ vmovdqu64 %zmm3,40*3-96(%rdi){%k6}
563
+ vmovdqu64 %zmm4,40*4-96(%rdi){%k6}
564
+ vzeroupper
565
+ ret
566
+ .ifndef old_gas_syntax
567
+ .size KeccakP1600_Permute_12rounds,.-KeccakP1600_Permute_12rounds
568
+ .endif
569
+
570
+ # -----------------------------------------------------------------------------
571
+ #
572
+ # void KeccakP1600_Permute_Nrounds(void *state, unsigned int nrounds);
573
+ # %rdi %rsi
574
+ #
575
+ .globl KeccakP1600_Permute_Nrounds
576
+ .globl _KeccakP1600_Permute_Nrounds
577
+ .ifndef old_gas_syntax
578
+ .type KeccakP1600_Permute_Nrounds,@function
579
+ .endif
580
+ KeccakP1600_Permute_Nrounds:
581
+ _KeccakP1600_Permute_Nrounds:
582
+ .balign 32
583
+ lea 96(%rdi),%rdi
584
+ lea theta_perm(%rip),%r8
585
+ kxnorw %k6,%k6,%k6
586
+ kshiftrw $15,%k6,%k1
587
+ kshiftrw $11,%k6,%k6
588
+ kshiftlw $1,%k1,%k2
589
+ kshiftlw $2,%k1,%k3
590
+ kshiftlw $3,%k1,%k4
591
+ kshiftlw $4,%k1,%k5
592
+ vmovdqa64 64*1(%r8),%zmm13
593
+ vmovdqa64 64*2(%r8),%zmm14
594
+ vmovdqa64 64*3(%r8),%zmm15
595
+ vmovdqa64 64*4(%r8),%zmm16
596
+ vmovdqa64 64*5(%r8),%zmm27
597
+ vmovdqa64 64*6(%r8),%zmm28
598
+ vmovdqa64 64*7(%r8),%zmm29
599
+ vmovdqa64 64*8(%r8),%zmm30
600
+ vmovdqa64 64*9(%r8),%zmm31
601
+ vmovdqa64 64*10(%r8),%zmm22
602
+ vmovdqa64 64*11(%r8),%zmm23
603
+ vmovdqa64 64*12(%r8),%zmm24
604
+ vmovdqa64 64*13(%r8),%zmm25
605
+ vmovdqa64 64*14(%r8),%zmm26
606
+ vmovdqa64 64*15(%r8),%zmm17
607
+ vmovdqa64 64*16(%r8),%zmm18
608
+ vmovdqa64 64*17(%r8),%zmm19
609
+ vmovdqa64 64*18(%r8),%zmm20
610
+ vmovdqa64 64*19(%r8),%zmm21
611
+ vmovdqu64 40*0-96(%rdi),%zmm0{%k6}{z}
612
+ vmovdqu64 40*1-96(%rdi),%zmm1{%k6}{z}
613
+ vmovdqu64 40*2-96(%rdi),%zmm2{%k6}{z}
614
+ vmovdqu64 40*3-96(%rdi),%zmm3{%k6}{z}
615
+ vmovdqu64 40*4-96(%rdi),%zmm4{%k6}{z}
616
+ mov %rsi, %rax # r10 pointer in iota table
617
+ lea iotas_end(%rip), %r10
618
+ shl $3, %rsi
619
+ sub %rsi, %r10
620
+ test $1, %eax
621
+ jz .KeccakP1600_Permute_Nrounds_DoubleRound
622
+ # do odd round
623
+ ######################################### Theta
624
+ vmovdqa64 %zmm0,%zmm5 # put aside original A00
625
+ vpternlogq $0x96,%zmm2,%zmm1,%zmm0 # and use it as "C00"
626
+ vpternlogq $0x96,%zmm4,%zmm3,%zmm0
627
+ vprolq $1,%zmm0,%zmm6
628
+ vpermq %zmm0,%zmm13,%zmm0
629
+ vpermq %zmm6,%zmm16,%zmm6
630
+ vpternlogq $0x96,%zmm0,%zmm6,%zmm5 # T[0] is original A00
631
+ vpternlogq $0x96,%zmm0,%zmm6,%zmm1
632
+ vpternlogq $0x96,%zmm0,%zmm6,%zmm2
633
+ vpternlogq $0x96,%zmm0,%zmm6,%zmm3
634
+ vpternlogq $0x96,%zmm0,%zmm6,%zmm4
635
+ ######################################### Rho
636
+ vprolvq %zmm22,%zmm5,%zmm0 # T[0] is original A00
637
+ vprolvq %zmm23,%zmm1,%zmm1
638
+ vprolvq %zmm24,%zmm2,%zmm2
639
+ vprolvq %zmm25,%zmm3,%zmm3
640
+ vprolvq %zmm26,%zmm4,%zmm4
641
+ ######################################### Pi
642
+ vpermq %zmm0,%zmm17,%zmm0
643
+ vpermq %zmm1,%zmm18,%zmm1
644
+ vpermq %zmm2,%zmm19,%zmm2
645
+ vpermq %zmm3,%zmm20,%zmm3
646
+ vpermq %zmm4,%zmm21,%zmm4
647
+ ######################################### Chi
648
+ vmovdqa64 %zmm0,%zmm5
649
+ vmovdqa64 %zmm1,%zmm6
650
+ vpternlogq $0xD2,%zmm2,%zmm1,%zmm0
651
+ vpternlogq $0xD2,%zmm3,%zmm2,%zmm1
652
+ vpternlogq $0xD2,%zmm4,%zmm3,%zmm2
653
+ vpternlogq $0xD2,%zmm5,%zmm4,%zmm3
654
+ vpternlogq $0xD2,%zmm6,%zmm5,%zmm4
655
+ ######################################### Iota
656
+ vpxorq (%r10),%zmm0,%zmm0{%k1}
657
+ lea 8(%r10),%r10
658
+ ######################################### Harmonize single round
659
+ vpermq %zmm1,%zmm13,%zmm1
660
+ vpermq %zmm2,%zmm14,%zmm2
661
+ vpermq %zmm3,%zmm15,%zmm3
662
+ vpermq %zmm4,%zmm16,%zmm4
663
+ vpblendmq %zmm1,%zmm0,%zmm5{%k2}
664
+ vpblendmq %zmm2,%zmm1,%zmm6{%k2}
665
+ vpblendmq %zmm3,%zmm2,%zmm7{%k2}
666
+ vpblendmq %zmm4,%zmm3,%zmm8{%k2}
667
+ vpblendmq %zmm0,%zmm4,%zmm9{%k2}
668
+ vpblendmq %zmm2,%zmm5,%zmm5{%k3}
669
+ vpblendmq %zmm3,%zmm6,%zmm6{%k3}
670
+ vpblendmq %zmm4,%zmm7,%zmm7{%k3}
671
+ vpblendmq %zmm0,%zmm8,%zmm8{%k3}
672
+ vpblendmq %zmm1,%zmm9,%zmm9{%k3}
673
+ vpblendmq %zmm3,%zmm5,%zmm5{%k4}
674
+ vpblendmq %zmm4,%zmm6,%zmm6{%k4}
675
+ vpblendmq %zmm0,%zmm7,%zmm7{%k4}
676
+ vpblendmq %zmm1,%zmm8,%zmm8{%k4}
677
+ vpblendmq %zmm2,%zmm9,%zmm9{%k4}
678
+ vpblendmq %zmm0,%zmm6,%zmm6{%k5}
679
+ vpblendmq %zmm4,%zmm5,%zmm0{%k5}
680
+ vpblendmq %zmm1,%zmm7,%zmm7{%k5}
681
+ vpblendmq %zmm2,%zmm8,%zmm8{%k5}
682
+ vpblendmq %zmm3,%zmm9,%zmm9{%k5}
683
+ vpermq %zmm6,%zmm13,%zmm4
684
+ vpermq %zmm7,%zmm14,%zmm3
685
+ vpermq %zmm8,%zmm15,%zmm2
686
+ vpermq %zmm9,%zmm16,%zmm1
687
+ .KeccakP1600_Permute_Nrounds_DoubleRound:
688
+ shr $1, %eax
689
+ jz .KeccakP1600_Permute_Nrounds_End
690
+ call __KeccakF1600
691
+ .KeccakP1600_Permute_Nrounds_End:
692
+ vmovdqu64 %zmm0,40*0-96(%rdi){%k6}
693
+ vmovdqu64 %zmm1,40*1-96(%rdi){%k6}
694
+ vmovdqu64 %zmm2,40*2-96(%rdi){%k6}
695
+ vmovdqu64 %zmm3,40*3-96(%rdi){%k6}
696
+ vmovdqu64 %zmm4,40*4-96(%rdi){%k6}
697
+ vzeroupper
698
+ ret
699
+ .ifndef old_gas_syntax
700
+ .size KeccakP1600_Permute_Nrounds,.-KeccakP1600_Permute_Nrounds
701
+ .endif
702
+
703
+ # -----------------------------------------------------------------------------
704
+ #
705
+ # size_t KeccakF1600_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen);
706
+ # %rdi %rsi %rdx %rcx
707
+ #
708
+ .globl KeccakF1600_FastLoop_Absorb
709
+ .globl _KeccakF1600_FastLoop_Absorb
710
+ .ifndef old_gas_syntax
711
+ .type KeccakF1600_FastLoop_Absorb,@function
712
+ .endif
713
+ KeccakF1600_FastLoop_Absorb:
714
+ _KeccakF1600_FastLoop_Absorb:
715
+ .balign 32
716
+ push %rbx
717
+ push %r10
718
+ shr $3, %rcx # rcx = data length in lanes
719
+ mov %rdx, %rbx # rbx = initial data pointer
720
+ cmp %rsi, %rcx
721
+ jb KeccakF1600_FastLoop_Absorb_Exit
722
+ lea 96(%rdi),%rdi
723
+ lea theta_perm(%rip),%r8
724
+ kxnorw %k6,%k6,%k6
725
+ kshiftrw $15,%k6,%k1
726
+ kshiftrw $11,%k6,%k6
727
+ kshiftlw $1,%k1,%k2
728
+ kshiftlw $2,%k1,%k3
729
+ kshiftlw $3,%k1,%k4
730
+ kshiftlw $4,%k1,%k5
731
+ vmovdqa64 64*1(%r8),%zmm13
732
+ vmovdqa64 64*2(%r8),%zmm14
733
+ vmovdqa64 64*3(%r8),%zmm15
734
+ vmovdqa64 64*4(%r8),%zmm16
735
+ vmovdqa64 64*5(%r8),%zmm27
736
+ vmovdqa64 64*6(%r8),%zmm28
737
+ vmovdqa64 64*7(%r8),%zmm29
738
+ vmovdqa64 64*8(%r8),%zmm30
739
+ vmovdqa64 64*9(%r8),%zmm31
740
+ vmovdqa64 64*10(%r8),%zmm22
741
+ vmovdqa64 64*11(%r8),%zmm23
742
+ vmovdqa64 64*12(%r8),%zmm24
743
+ vmovdqa64 64*13(%r8),%zmm25
744
+ vmovdqa64 64*14(%r8),%zmm26
745
+ vmovdqa64 64*15(%r8),%zmm17
746
+ vmovdqa64 64*16(%r8),%zmm18
747
+ vmovdqa64 64*17(%r8),%zmm19
748
+ vmovdqa64 64*18(%r8),%zmm20
749
+ vmovdqa64 64*19(%r8),%zmm21
750
+ vmovdqu64 40*0-96(%rdi),%zmm0{%k6}{z}
751
+ vmovdqu64 40*1-96(%rdi),%zmm1{%k6}{z}
752
+ vmovdqu64 40*2-96(%rdi),%zmm2{%k6}{z}
753
+ vmovdqu64 40*3-96(%rdi),%zmm3{%k6}{z}
754
+ vmovdqu64 40*4-96(%rdi),%zmm4{%k6}{z}
755
+ cmp $21, %rsi
756
+ jnz KeccakF1600_FastLoop_Absorb_Not21Lanes
757
+ sub $21, %rcx
758
+ KeccakF1600_FastLoop_Absorb_Loop21Lanes:
759
+ vmovdqu64 8*0(%rdx),%zmm5{%k6}{z}
760
+ vmovdqu64 8*5(%rdx),%zmm6{%k6}{z}
761
+ vmovdqu64 8*10(%rdx),%zmm7{%k6}{z}
762
+ vmovdqu64 8*15(%rdx),%zmm8{%k6}{z}
763
+ vmovdqu64 8*20(%rdx),%zmm9{%k1}{z}
764
+ vpxorq %zmm5,%zmm0,%zmm0
765
+ vpxorq %zmm6,%zmm1,%zmm1
766
+ vpxorq %zmm7,%zmm2,%zmm2
767
+ vpxorq %zmm8,%zmm3,%zmm3
768
+ vpxorq %zmm9,%zmm4,%zmm4
769
+ add $21*8, %rdx
770
+ lea iotas(%rip), %r10
771
+ mov $12, %eax
772
+ call __KeccakF1600
773
+ sub $21, %rcx
774
+ jnc KeccakF1600_FastLoop_Absorb_Loop21Lanes
775
+ KeccakF1600_FastLoop_Absorb_SaveAndExit:
776
+ vmovdqu64 %zmm0,40*0-96(%rdi){%k6}
777
+ vmovdqu64 %zmm1,40*1-96(%rdi){%k6}
778
+ vmovdqu64 %zmm2,40*2-96(%rdi){%k6}
779
+ vmovdqu64 %zmm3,40*3-96(%rdi){%k6}
780
+ vmovdqu64 %zmm4,40*4-96(%rdi){%k6}
781
+ KeccakF1600_FastLoop_Absorb_Exit:
782
+ vzeroupper
783
+ mov %rdx, %rax # return number of bytes processed
784
+ sub %rbx, %rax
785
+ pop %r10
786
+ pop %rbx
787
+ ret
788
+ KeccakF1600_FastLoop_Absorb_Not21Lanes:
789
+ cmp $17, %rsi
790
+ jnz KeccakF1600_FastLoop_Absorb_Not17Lanes
791
+ sub $17, %rcx
792
+ KeccakF1600_FastLoop_Absorb_Loop17Lanes:
793
+ vmovdqu64 8*0(%rdx),%zmm5{%k6}{z}
794
+ vmovdqu64 8*5(%rdx),%zmm6{%k6}{z}
795
+ vmovdqu64 8*10(%rdx),%zmm7{%k6}{z}
796
+ vmovdqu64 8*15(%rdx),%zmm8{%k1}{z}
797
+ vmovdqu64 8*15(%rdx),%zmm8{%k2}
798
+ vpxorq %zmm5,%zmm0,%zmm0
799
+ vpxorq %zmm6,%zmm1,%zmm1
800
+ vpxorq %zmm7,%zmm2,%zmm2
801
+ vpxorq %zmm8,%zmm3,%zmm3
802
+ add $17*8, %rdx
803
+ lea iotas(%rip), %r10
804
+ mov $12, %eax
805
+ call __KeccakF1600
806
+ sub $17, %rcx
807
+ jnc KeccakF1600_FastLoop_Absorb_Loop17Lanes
808
+ jmp KeccakF1600_FastLoop_Absorb_SaveAndExit
809
+ KeccakF1600_FastLoop_Absorb_Not17Lanes:
810
+ lea -96(%rdi), %rdi
811
+ KeccakF1600_FastLoop_Absorb_LanesLoop:
812
+ mov %rsi, %rax
813
+ mov %rdi, %r10
814
+ KeccakF1600_FastLoop_Absorb_LanesAddLoop:
815
+ mov (%rdx), %r8
816
+ add $8, %rdx
817
+ xor %r8, (%r10)
818
+ add $8, %r10
819
+ sub $1, %rax
820
+ jnz KeccakF1600_FastLoop_Absorb_LanesAddLoop
821
+ sub %rsi, %rcx
822
+ push %rdi
823
+ push %rsi
824
+ push %rdx
825
+ push %rcx
826
+ .ifdef no_plt
827
+ call KeccakP1600_Permute_24rounds
828
+ .else
829
+ call KeccakP1600_Permute_24rounds@PLT
830
+ .endif
831
+ pop %rcx
832
+ pop %rdx
833
+ pop %rsi
834
+ pop %rdi
835
+ cmp %rsi, %rcx
836
+ jae KeccakF1600_FastLoop_Absorb_LanesLoop
837
+ jmp KeccakF1600_FastLoop_Absorb_Exit
838
+ .ifndef old_gas_syntax
839
+ .size KeccakF1600_FastLoop_Absorb,.-KeccakF1600_FastLoop_Absorb
840
+ .endif
841
+
842
+ # -----------------------------------------------------------------------------
843
+ #
844
+ # size_t KeccakP1600_12rounds_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen);
845
+ # %rdi %rsi %rdx %rcx
846
+ #
847
+ .globl KeccakP1600_12rounds_FastLoop_Absorb
848
+ .globl _KeccakP1600_12rounds_FastLoop_Absorb
849
+ .ifndef old_gas_syntax
850
+ .type KeccakP1600_12rounds_FastLoop_Absorb,@function
851
+ .endif
852
+ KeccakP1600_12rounds_FastLoop_Absorb:
853
+ _KeccakP1600_12rounds_FastLoop_Absorb:
854
+ .balign 32
855
+ push %rbx
856
+ push %r10
857
+ shr $3, %rcx # rcx = data length in lanes
858
+ mov %rdx, %rbx # rbx = initial data pointer
859
+ cmp %rsi, %rcx
860
+ jb KeccakP1600_FastLoop_Absorb_Exit
861
+ lea 96(%rdi),%rdi
862
+ lea theta_perm(%rip),%r8
863
+ kxnorw %k6,%k6,%k6
864
+ kshiftrw $15,%k6,%k1
865
+ kshiftrw $11,%k6,%k6
866
+ kshiftlw $1,%k1,%k2
867
+ kshiftlw $2,%k1,%k3
868
+ kshiftlw $3,%k1,%k4
869
+ kshiftlw $4,%k1,%k5
870
+ vmovdqa64 64*1(%r8),%zmm13
871
+ vmovdqa64 64*2(%r8),%zmm14
872
+ vmovdqa64 64*3(%r8),%zmm15
873
+ vmovdqa64 64*4(%r8),%zmm16
874
+ vmovdqa64 64*5(%r8),%zmm27
875
+ vmovdqa64 64*6(%r8),%zmm28
876
+ vmovdqa64 64*7(%r8),%zmm29
877
+ vmovdqa64 64*8(%r8),%zmm30
878
+ vmovdqa64 64*9(%r8),%zmm31
879
+ vmovdqa64 64*10(%r8),%zmm22
880
+ vmovdqa64 64*11(%r8),%zmm23
881
+ vmovdqa64 64*12(%r8),%zmm24
882
+ vmovdqa64 64*13(%r8),%zmm25
883
+ vmovdqa64 64*14(%r8),%zmm26
884
+ vmovdqa64 64*15(%r8),%zmm17
885
+ vmovdqa64 64*16(%r8),%zmm18
886
+ vmovdqa64 64*17(%r8),%zmm19
887
+ vmovdqa64 64*18(%r8),%zmm20
888
+ vmovdqa64 64*19(%r8),%zmm21
889
+ vmovdqu64 40*0-96(%rdi),%zmm0{%k6}{z}
890
+ vmovdqu64 40*1-96(%rdi),%zmm1{%k6}{z}
891
+ vmovdqu64 40*2-96(%rdi),%zmm2{%k6}{z}
892
+ vmovdqu64 40*3-96(%rdi),%zmm3{%k6}{z}
893
+ vmovdqu64 40*4-96(%rdi),%zmm4{%k6}{z}
894
+ cmp $21, %rsi
895
+ jnz KeccakP1600_FastLoop_Absorb_Not21Lanes
896
+ sub $21, %rcx
897
+ KeccakP1600_FastLoop_Absorb_Loop21Lanes:
898
+ vmovdqu64 8*0(%rdx),%zmm5{%k6}{z}
899
+ vmovdqu64 8*5(%rdx),%zmm6{%k6}{z}
900
+ vmovdqu64 8*10(%rdx),%zmm7{%k6}{z}
901
+ vmovdqu64 8*15(%rdx),%zmm8{%k6}{z}
902
+ vmovdqu64 8*20(%rdx),%zmm9{%k1}{z}
903
+ vpxorq %zmm5,%zmm0,%zmm0
904
+ vpxorq %zmm6,%zmm1,%zmm1
905
+ vpxorq %zmm7,%zmm2,%zmm2
906
+ vpxorq %zmm8,%zmm3,%zmm3
907
+ vpxorq %zmm9,%zmm4,%zmm4
908
+ add $21*8, %rdx
909
+ lea iotas+12*8(%rip), %r10
910
+ mov $12/2, %eax
911
+ call __KeccakF1600
912
+ sub $21, %rcx
913
+ jnc KeccakP1600_FastLoop_Absorb_Loop21Lanes
914
+ KeccakP1600_FastLoop_Absorb_SaveAndExit:
915
+ vmovdqu64 %zmm0,40*0-96(%rdi){%k6}
916
+ vmovdqu64 %zmm1,40*1-96(%rdi){%k6}
917
+ vmovdqu64 %zmm2,40*2-96(%rdi){%k6}
918
+ vmovdqu64 %zmm3,40*3-96(%rdi){%k6}
919
+ vmovdqu64 %zmm4,40*4-96(%rdi){%k6}
920
+ KeccakP1600_FastLoop_Absorb_Exit:
921
+ vzeroupper
922
+ mov %rdx, %rax # return number of bytes processed
923
+ sub %rbx, %rax
924
+ pop %r10
925
+ pop %rbx
926
+ ret
927
+ KeccakP1600_FastLoop_Absorb_Not21Lanes:
928
+ cmp $17, %rsi
929
+ jnz KeccakP1600_FastLoop_Absorb_Not17Lanes
930
+ sub $17, %rcx
931
+ KeccakP1600_FastLoop_Absorb_Loop17Lanes:
932
+ vmovdqu64 8*0(%rdx),%zmm5{%k6}{z}
933
+ vmovdqu64 8*5(%rdx),%zmm6{%k6}{z}
934
+ vmovdqu64 8*10(%rdx),%zmm7{%k6}{z}
935
+ vmovdqu64 8*15(%rdx),%zmm8{%k1}{z}
936
+ vmovdqu64 8*15(%rdx),%zmm8{%k2}
937
+ vpxorq %zmm5,%zmm0,%zmm0
938
+ vpxorq %zmm6,%zmm1,%zmm1
939
+ vpxorq %zmm7,%zmm2,%zmm2
940
+ vpxorq %zmm8,%zmm3,%zmm3
941
+ add $17*8, %rdx
942
+ lea iotas+12*8(%rip), %r10
943
+ mov $12/2, %eax
944
+ call __KeccakF1600
945
+ sub $17, %rcx
946
+ jnc KeccakP1600_FastLoop_Absorb_Loop17Lanes
947
+ jmp KeccakP1600_FastLoop_Absorb_SaveAndExit
948
+ KeccakP1600_FastLoop_Absorb_Not17Lanes:
949
+ lea -96(%rdi), %rdi
950
+ KeccakP1600_FastLoop_Absorb_LanesLoop:
951
+ mov %rsi, %rax
952
+ mov %rdi, %r10
953
+ KeccakP1600_FastLoop_Absorb_LanesAddLoop:
954
+ mov (%rdx), %r8
955
+ add $8, %rdx
956
+ xor %r8, (%r10)
957
+ add $8, %r10
958
+ sub $1, %rax
959
+ jnz KeccakP1600_FastLoop_Absorb_LanesAddLoop
960
+ sub %rsi, %rcx
961
+ push %rdi
962
+ push %rsi
963
+ push %rdx
964
+ push %rcx
965
+ .ifdef no_plt
966
+ call KeccakP1600_Permute_12rounds
967
+ .else
968
+ call KeccakP1600_Permute_12rounds@PLT
969
+ .endif
970
+ pop %rcx
971
+ pop %rdx
972
+ pop %rsi
973
+ pop %rdi
974
+ cmp %rsi, %rcx
975
+ jae KeccakP1600_FastLoop_Absorb_LanesLoop
976
+ jmp KeccakP1600_FastLoop_Absorb_Exit
977
+ .ifndef old_gas_syntax
978
+ .size KeccakP1600_12rounds_FastLoop_Absorb,.-KeccakP1600_12rounds_FastLoop_Absorb
979
+ .endif
980
+ .balign 64
981
+ theta_perm:
982
+ .quad 0, 1, 2, 3, 4, 5, 6, 7 # [not used]
983
+ .quad 4, 0, 1, 2, 3, 5, 6, 7
984
+ .quad 3, 4, 0, 1, 2, 5, 6, 7
985
+ .quad 2, 3, 4, 0, 1, 5, 6, 7
986
+ .quad 1, 2, 3, 4, 0, 5, 6, 7
987
+ rhotates1:
988
+ .quad 0, 44, 43, 21, 14, 0, 0, 0 # [0][0] [1][1] [2][2] [3][3] [4][4]
989
+ .quad 18, 1, 6, 25, 8, 0, 0, 0 # [4][0] [0][1] [1][2] [2][3] [3][4]
990
+ .quad 41, 2, 62, 55, 39, 0, 0, 0 # [3][0] [4][1] [0][2] [1][3] [2][4]
991
+ .quad 3, 45, 61, 28, 20, 0, 0, 0 # [2][0] [3][1] [4][2] [0][3] [1][4]
992
+ .quad 36, 10, 15, 56, 27, 0, 0, 0 # [1][0] [2][1] [3][2] [4][3] [0][4]
993
+ rhotates0:
994
+ .quad 0, 1, 62, 28, 27, 0, 0, 0
995
+ .quad 36, 44, 6, 55, 20, 0, 0, 0
996
+ .quad 3, 10, 43, 25, 39, 0, 0, 0
997
+ .quad 41, 45, 15, 21, 8, 0, 0, 0
998
+ .quad 18, 2, 61, 56, 14, 0, 0, 0
999
+ pi0_perm:
1000
+ .quad 0, 3, 1, 4, 2, 5, 6, 7
1001
+ .quad 1, 4, 2, 0, 3, 5, 6, 7
1002
+ .quad 2, 0, 3, 1, 4, 5, 6, 7
1003
+ .quad 3, 1, 4, 2, 0, 5, 6, 7
1004
+ .quad 4, 2, 0, 3, 1, 5, 6, 7
1005
+ iotas:
1006
+ .quad 0x0000000000000001
1007
+ .quad 0x0000000000008082
1008
+ .quad 0x800000000000808a
1009
+ .quad 0x8000000080008000
1010
+ .quad 0x000000000000808b
1011
+ .quad 0x0000000080000001
1012
+ .quad 0x8000000080008081
1013
+ .quad 0x8000000000008009
1014
+ .quad 0x000000000000008a
1015
+ .quad 0x0000000000000088
1016
+ .quad 0x0000000080008009
1017
+ .quad 0x000000008000000a
1018
+ .quad 0x000000008000808b
1019
+ .quad 0x800000000000008b
1020
+ .quad 0x8000000000008089
1021
+ .quad 0x8000000000008003
1022
+ .quad 0x8000000000008002
1023
+ .quad 0x8000000000000080
1024
+ .quad 0x000000000000800a
1025
+ .quad 0x800000008000000a
1026
+ .quad 0x8000000080008081
1027
+ .quad 0x8000000000008080
1028
+ .quad 0x0000000080000001
1029
+ .quad 0x8000000080008008
1030
+ iotas_end:
1031
+ .asciz "Keccak-1600 for AVX-512F, CRYPTOGAMS by <appro@openssl.org>"