sleeping_kangaroo12 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (284) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +127 -0
  3. data/ext/Rakefile +73 -0
  4. data/ext/binding/sleeping_kangaroo12.c +39 -0
  5. data/ext/config/xkcp.build +17 -0
  6. data/ext/xkcp/LICENSE +1 -0
  7. data/ext/xkcp/Makefile +15 -0
  8. data/ext/xkcp/Makefile.build +200 -0
  9. data/ext/xkcp/README.markdown +296 -0
  10. data/ext/xkcp/lib/HighLevel.build +143 -0
  11. data/ext/xkcp/lib/LowLevel.build +757 -0
  12. data/ext/xkcp/lib/common/align.h +33 -0
  13. data/ext/xkcp/lib/common/brg_endian.h +143 -0
  14. data/ext/xkcp/lib/high/KangarooTwelve/KangarooTwelve.c +301 -0
  15. data/ext/xkcp/lib/high/KangarooTwelve/KangarooTwelve.h +97 -0
  16. data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.c +81 -0
  17. data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.h +125 -0
  18. data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.c +48 -0
  19. data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.h +79 -0
  20. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.c +81 -0
  21. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.h +73 -0
  22. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.inc +195 -0
  23. data/ext/xkcp/lib/high/Keccak/KeccakSponge.c +111 -0
  24. data/ext/xkcp/lib/high/Keccak/KeccakSponge.h +76 -0
  25. data/ext/xkcp/lib/high/Keccak/KeccakSponge.inc +314 -0
  26. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.c +61 -0
  27. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.h +67 -0
  28. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.inc +128 -0
  29. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.c +93 -0
  30. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.h +599 -0
  31. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.inc +573 -0
  32. data/ext/xkcp/lib/high/Ketje/Ketjev2.c +87 -0
  33. data/ext/xkcp/lib/high/Ketje/Ketjev2.h +88 -0
  34. data/ext/xkcp/lib/high/Ketje/Ketjev2.inc +274 -0
  35. data/ext/xkcp/lib/high/Keyak/Keyakv2.c +132 -0
  36. data/ext/xkcp/lib/high/Keyak/Keyakv2.h +217 -0
  37. data/ext/xkcp/lib/high/Keyak/Keyakv2.inc +81 -0
  38. data/ext/xkcp/lib/high/Keyak/Motorist.inc +953 -0
  39. data/ext/xkcp/lib/high/Kravatte/Kravatte.c +533 -0
  40. data/ext/xkcp/lib/high/Kravatte/Kravatte.h +115 -0
  41. data/ext/xkcp/lib/high/Kravatte/KravatteModes.c +557 -0
  42. data/ext/xkcp/lib/high/Kravatte/KravatteModes.h +247 -0
  43. data/ext/xkcp/lib/high/Xoodyak/Cyclist.h +66 -0
  44. data/ext/xkcp/lib/high/Xoodyak/Cyclist.inc +336 -0
  45. data/ext/xkcp/lib/high/Xoodyak/Xoodyak-parameters.h +26 -0
  46. data/ext/xkcp/lib/high/Xoodyak/Xoodyak.c +55 -0
  47. data/ext/xkcp/lib/high/Xoodyak/Xoodyak.h +35 -0
  48. data/ext/xkcp/lib/high/Xoofff/Xoofff.c +634 -0
  49. data/ext/xkcp/lib/high/Xoofff/Xoofff.h +147 -0
  50. data/ext/xkcp/lib/high/Xoofff/XoofffModes.c +483 -0
  51. data/ext/xkcp/lib/high/Xoofff/XoofffModes.h +241 -0
  52. data/ext/xkcp/lib/high/common/Phases.h +25 -0
  53. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-SnP.h +41 -0
  54. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-armcc.s +1666 -0
  55. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-gcc.s +1655 -0
  56. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-armcc.s +1268 -0
  57. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-gcc.s +1264 -0
  58. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-armcc.s +1178 -0
  59. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-gcc.s +1175 -0
  60. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-armcc.s +1338 -0
  61. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-gcc.s +1336 -0
  62. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-armcc.s +1343 -0
  63. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-gcc.s +1339 -0
  64. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-SnP.h +42 -0
  65. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-armcc.s +823 -0
  66. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-gcc.s +831 -0
  67. data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-SnP.h +31 -0
  68. data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-armv8a-neon.s +540 -0
  69. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-SnP.h +42 -0
  70. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-compact.s +733 -0
  71. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-fast.s +1121 -0
  72. data/ext/xkcp/lib/low/KeccakP-1600/AVX2/KeccakP-1600-AVX2.s +1100 -0
  73. data/ext/xkcp/lib/low/KeccakP-1600/AVX2/KeccakP-1600-SnP.h +52 -0
  74. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-AVX512.c +623 -0
  75. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-SnP.h +47 -0
  76. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u12/KeccakP-1600-AVX512-config.h +6 -0
  77. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u6/KeccakP-1600-AVX512-config.h +6 -0
  78. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/ua/KeccakP-1600-AVX512-config.h +6 -0
  79. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-AVX512.s +1031 -0
  80. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-SnP.h +53 -0
  81. data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-SnP.h +44 -0
  82. data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-XOP.c +476 -0
  83. data/ext/xkcp/lib/low/KeccakP-1600/XOP/u6/KeccakP-1600-XOP-config.h +6 -0
  84. data/ext/xkcp/lib/low/KeccakP-1600/XOP/ua/KeccakP-1600-XOP-config.h +6 -0
  85. data/ext/xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-64.macros +748 -0
  86. data/ext/xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-unrolling.macros +305 -0
  87. data/ext/xkcp/lib/low/KeccakP-1600/compact/KeccakP-1600-SnP.h +40 -0
  88. data/ext/xkcp/lib/low/KeccakP-1600/compact/KeccakP-1600-compact64.c +420 -0
  89. data/ext/xkcp/lib/low/KeccakP-1600/plain-32bits-inplace/KeccakP-1600-SnP.h +43 -0
  90. data/ext/xkcp/lib/low/KeccakP-1600/plain-32bits-inplace/KeccakP-1600-inplace32BI.c +1163 -0
  91. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/KeccakP-1600-SnP.h +54 -0
  92. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/KeccakP-1600-opt64.c +565 -0
  93. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcu6/KeccakP-1600-opt64-config.h +7 -0
  94. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua/KeccakP-1600-opt64-config.h +7 -0
  95. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua-shld/KeccakP-1600-opt64-config.h +8 -0
  96. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/u6/KeccakP-1600-opt64-config.h +6 -0
  97. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/ua/KeccakP-1600-opt64-config.h +6 -0
  98. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-SnP.h +44 -0
  99. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference.h +23 -0
  100. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference32BI.c +625 -0
  101. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-SnP.h +44 -0
  102. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.c +440 -0
  103. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.h +23 -0
  104. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-SnP.h +42 -0
  105. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas.s +1196 -0
  106. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas_Apple.s +1124 -0
  107. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-shld-gas.s +1196 -0
  108. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-armcc.s +1392 -0
  109. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-gcc.s +1394 -0
  110. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-times2-SnP.h +42 -0
  111. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u12/SIMD512-2-config.h +7 -0
  112. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u4/SIMD512-2-config.h +7 -0
  113. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512ufull/SIMD512-2-config.h +7 -0
  114. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SIMD512.c +850 -0
  115. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SnP.h +51 -0
  116. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SIMD128.c +957 -0
  117. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SnP.h +49 -0
  118. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-u2/SIMD128-config.h +8 -0
  119. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-ua/SIMD128-config.h +8 -0
  120. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-u2/SIMD128-config.h +9 -0
  121. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-ua/SIMD128-config.h +9 -0
  122. data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-SnP.h +45 -0
  123. data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-on1.c +37 -0
  124. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SIMD256.c +1321 -0
  125. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SnP.h +55 -0
  126. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u12/SIMD256-config.h +7 -0
  127. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u6/SIMD256-config.h +7 -0
  128. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/ua/SIMD256-config.h +7 -0
  129. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u12/SIMD512-4-config.h +7 -0
  130. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u4/SIMD512-4-config.h +7 -0
  131. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512ufull/SIMD512-4-config.h +7 -0
  132. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SIMD512.c +881 -0
  133. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SnP.h +51 -0
  134. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-SnP.h +45 -0
  135. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-on1.c +37 -0
  136. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-SnP.h +45 -0
  137. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-on2.c +38 -0
  138. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SIMD512.c +1615 -0
  139. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SnP.h +57 -0
  140. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u12/SIMD512-config.h +7 -0
  141. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u4/SIMD512-config.h +7 -0
  142. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/ua/SIMD512-config.h +7 -0
  143. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-SnP.h +45 -0
  144. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-on1.c +37 -0
  145. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-SnP.h +45 -0
  146. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-on2.c +38 -0
  147. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-SnP.h +45 -0
  148. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-on4.c +38 -0
  149. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-SnP.h +41 -0
  150. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-armcc.s +442 -0
  151. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-gcc.s +446 -0
  152. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-armcc.s +419 -0
  153. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-gcc.s +427 -0
  154. data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-SnP.h +41 -0
  155. data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-avr8-fast.s +647 -0
  156. data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-SnP.h +39 -0
  157. data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-compact.c +190 -0
  158. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-SnP.h +43 -0
  159. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.c +412 -0
  160. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.h +23 -0
  161. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-SnP.h +41 -0
  162. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-armcc.s +454 -0
  163. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-gcc.s +458 -0
  164. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-armcc.s +455 -0
  165. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-gcc.s +458 -0
  166. data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-SnP.h +41 -0
  167. data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-avr8-fast.s +728 -0
  168. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-SnP.h +43 -0
  169. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.c +414 -0
  170. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.h +23 -0
  171. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-SnP.h +42 -0
  172. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-armcc.s +527 -0
  173. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-gcc.s +533 -0
  174. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-armcc.s +528 -0
  175. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-gcc.s +534 -0
  176. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-armcc.s +521 -0
  177. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-gcc.s +527 -0
  178. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-armcc.s +517 -0
  179. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-gcc.s +523 -0
  180. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-armcc.s +550 -0
  181. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-gcc.s +556 -0
  182. data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-SnP.h +32 -0
  183. data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-armv8a-neon.s +432 -0
  184. data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-SnP.h +42 -0
  185. data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-avr8-fast.s +929 -0
  186. data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-SnP.h +40 -0
  187. data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-compact.c +244 -0
  188. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-SnP.h +46 -0
  189. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32-bis.macros +184 -0
  190. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.c +454 -0
  191. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.macros +459 -0
  192. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling-bis.macros +83 -0
  193. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling.macros +88 -0
  194. data/ext/xkcp/lib/low/KeccakP-800/plain/lcu2/KeccakP-800-opt32-config.h +7 -0
  195. data/ext/xkcp/lib/low/KeccakP-800/plain/lcua/KeccakP-800-opt32-config.h +7 -0
  196. data/ext/xkcp/lib/low/KeccakP-800/plain/u2/KeccakP-800-opt32-config.h +7 -0
  197. data/ext/xkcp/lib/low/KeccakP-800/plain/ua/KeccakP-800-opt32-config.h +7 -0
  198. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-SnP.h +44 -0
  199. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.c +437 -0
  200. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.h +23 -0
  201. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/Ket.h +57 -0
  202. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-armcc.s +475 -0
  203. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-gcc.s +480 -0
  204. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-armcc.s +590 -0
  205. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-gcc.s +590 -0
  206. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.c +126 -0
  207. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.h +68 -0
  208. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.inc +174 -0
  209. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.c +80 -0
  210. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.h +68 -0
  211. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.inc +142 -0
  212. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-SnP.h +55 -0
  213. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-armcc.s +1086 -0
  214. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-gcc.s +1092 -0
  215. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-armcc.s +721 -0
  216. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-gcc.s +726 -0
  217. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-armcc.s +723 -0
  218. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-gcc.s +729 -0
  219. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-armcc.s +1164 -0
  220. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-gcc.s +1165 -0
  221. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-armcc.s +562 -0
  222. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-gcc.s +563 -0
  223. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-armcc.s +563 -0
  224. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-gcc.s +565 -0
  225. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-SnP.h +55 -0
  226. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-armcc.s +476 -0
  227. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-gcc.s +485 -0
  228. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-armcc.s +362 -0
  229. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-gcc.s +367 -0
  230. data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-SnP.h +43 -0
  231. data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-avr8-u1.s +1341 -0
  232. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SIMD512.c +581 -0
  233. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SnP.h +58 -0
  234. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodyak-full-block-SIMD512.c +332 -0
  235. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SIMD128.c +329 -0
  236. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SnP.h +53 -0
  237. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodyak-full-block-SIMD128.c +355 -0
  238. data/ext/xkcp/lib/low/Xoodoo/Xoodoo.h +79 -0
  239. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-SnP.h +56 -0
  240. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-optimized.c +399 -0
  241. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodyak-full-blocks.c +127 -0
  242. data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-SnP.h +43 -0
  243. data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-reference.c +253 -0
  244. data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SIMD512.c +1044 -0
  245. data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SnP.h +49 -0
  246. data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-SnP.h +45 -0
  247. data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-on1.c +37 -0
  248. data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-ARMv7A.s +1587 -0
  249. data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-SnP.h +48 -0
  250. data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SIMD512.c +1202 -0
  251. data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SnP.h +48 -0
  252. data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SIMD128.c +484 -0
  253. data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SnP.h +44 -0
  254. data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-SnP.h +45 -0
  255. data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-on1.c +37 -0
  256. data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SIMD256.c +939 -0
  257. data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SnP.h +49 -0
  258. data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SIMD512.c +1216 -0
  259. data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SnP.h +48 -0
  260. data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-SnP.h +45 -0
  261. data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-on1.c +37 -0
  262. data/ext/xkcp/lib/low/common/PlSnP-Fallback.inc +290 -0
  263. data/ext/xkcp/lib/low/common/SnP-Relaned.h +141 -0
  264. data/ext/xkcp/support/Build/ExpandProducts.xsl +79 -0
  265. data/ext/xkcp/support/Build/ToGlobalMakefile.xsl +206 -0
  266. data/ext/xkcp/support/Build/ToOneTarget.xsl +89 -0
  267. data/ext/xkcp/support/Build/ToTargetConfigFile.xsl +37 -0
  268. data/ext/xkcp/support/Build/ToTargetMakefile.xsl +298 -0
  269. data/ext/xkcp/support/Build/ToVCXProj.xsl +198 -0
  270. data/ext/xkcp/support/Kernel-PMU/Kernel-pmu.md +133 -0
  271. data/ext/xkcp/support/Kernel-PMU/Makefile +8 -0
  272. data/ext/xkcp/support/Kernel-PMU/enable_arm_pmu.c +129 -0
  273. data/ext/xkcp/support/Kernel-PMU/load-module +1 -0
  274. data/ext/xkcp/util/KeccakSum/KeccakSum.c +394 -0
  275. data/ext/xkcp/util/KeccakSum/base64.c +86 -0
  276. data/ext/xkcp/util/KeccakSum/base64.h +12 -0
  277. data/lib/sleeping_kangaroo12/binding.rb +15 -0
  278. data/lib/sleeping_kangaroo12/build/loader.rb +40 -0
  279. data/lib/sleeping_kangaroo12/build/platform.rb +37 -0
  280. data/lib/sleeping_kangaroo12/build.rb +4 -0
  281. data/lib/sleeping_kangaroo12/digest.rb +103 -0
  282. data/lib/sleeping_kangaroo12/version.rb +5 -0
  283. data/lib/sleeping_kangaroo12.rb +7 -0
  284. metadata +372 -0
@@ -0,0 +1,1100 @@
1
+ # The eXtended Keccak Code Package (XKCP)
2
+ # https://github.com/XKCP/XKCP
3
+ #
4
+ # Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org>
5
+ # Copyright (c) 2017 Ronny Van Keer
6
+ # All rights reserved.
7
+ #
8
+ # The source code in this file is licensed under the CRYPTOGAMS license.
9
+ # For further details see http://www.openssl.org/~appro/cryptogams/.
10
+ #
11
+ # Notes:
12
+ # The code for the permutation (__KeccakF1600) was generated with
13
+ # Andy Polyakov's keccak1600-avx2.pl from the CRYPTOGAMS project
14
+ # (https://github.com/dot-asm/cryptogams/blob/master/x86_64/keccak1600-avx2.pl).
15
+ # The rest of the code was written by Ronny Van Keer.
16
+ # Adaptations for macOS by Stéphane Léon.
17
+ # Adaptations for mingw-w64 (changes macOS too) by Jorrit Jongma.
18
+
19
+ .text
20
+
21
+ # -----------------------------------------------------------------------------
22
+ #
23
+ # void KeccakP1600_Initialize(void *state);
24
+ #
25
+ .globl KeccakP1600_Initialize
26
+ .globl _KeccakP1600_Initialize
27
+ .ifndef old_gas_syntax
28
+ .type KeccakP1600_Initialize,@function
29
+ .endif
30
+ KeccakP1600_Initialize:
31
+ _KeccakP1600_Initialize:
32
+ .balign 32
33
+ vpxor %ymm0,%ymm0,%ymm0
34
+ vmovdqu %ymm0,0*32(%rdi)
35
+ vmovdqu %ymm0,1*32(%rdi)
36
+ vmovdqu %ymm0,2*32(%rdi)
37
+ vmovdqu %ymm0,3*32(%rdi)
38
+ vmovdqu %ymm0,4*32(%rdi)
39
+ vmovdqu %ymm0,5*32(%rdi)
40
+ movq $0,6*32(%rdi)
41
+ ret
42
+ .ifndef old_gas_syntax
43
+ .size KeccakP1600_Initialize,.-KeccakP1600_Initialize
44
+ .endif
45
+
46
+ # -----------------------------------------------------------------------------
47
+ #
48
+ # void KeccakP1600_AddByte(void *state, unsigned char data, unsigned int offset);
49
+ # %rdi %rsi %rdx
50
+ #
51
+ .globl KeccakP1600_AddByte
52
+ .globl _KeccakP1600_AddByte
53
+ .ifndef old_gas_syntax
54
+ .type KeccakP1600_AddByte,@function
55
+ .endif
56
+ KeccakP1600_AddByte:
57
+ _KeccakP1600_AddByte:
58
+ .balign 32
59
+ mov %rdx, %rax
60
+ and $7, %rax
61
+ and $0xFFFFFFF8, %edx
62
+ lea mapState(%rip), %r9
63
+ mov (%r9, %rdx), %rdx
64
+ add %rdx, %rdi
65
+ add %rax, %rdi
66
+ xorb %sil, (%rdi)
67
+ ret
68
+ .ifndef old_gas_syntax
69
+ .size KeccakP1600_AddByte,.-KeccakP1600_AddByte
70
+ .endif
71
+
72
+ # -----------------------------------------------------------------------------
73
+ #
74
+ # void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
75
+ # %rdi %rsi %rdx %rcx
76
+ #
77
+ .globl KeccakP1600_AddBytes
78
+ .globl _KeccakP1600_AddBytes
79
+ .ifndef old_gas_syntax
80
+ .type KeccakP1600_AddBytes,@function
81
+ .endif
82
+ KeccakP1600_AddBytes:
83
+ _KeccakP1600_AddBytes:
84
+ .balign 32
85
+ cmp $0, %rcx
86
+ jz KeccakP1600_AddBytes_Exit
87
+ mov %rdx, %rax # rax offset in lane
88
+ and $0xFFFFFFF8, %edx # rdx pointer into state index mapper
89
+ lea mapState(%rip), %r9
90
+ add %r9, %rdx
91
+ and $7, %rax
92
+ jz KeccakP1600_AddBytes_LaneAlignedCheck
93
+ mov $8, %r9 # r9 is (max) length of incomplete lane
94
+ sub %rax, %r9
95
+ cmp %rcx, %r9
96
+ cmovae %rcx, %r9
97
+ sub %r9, %rcx # length -= length of incomplete lane
98
+ add (%rdx), %rax # rax = pointer to state lane
99
+ add $8, %rdx
100
+ add %rdi, %rax
101
+ KeccakP1600_AddBytes_NotAlignedLoop:
102
+ mov (%rsi), %r8b
103
+ inc %rsi
104
+ xorb %r8b, (%rax)
105
+ inc %rax
106
+ dec %r9
107
+ jnz KeccakP1600_AddBytes_NotAlignedLoop
108
+ jmp KeccakP1600_AddBytes_LaneAlignedCheck
109
+ KeccakP1600_AddBytes_LaneAlignedLoop:
110
+ mov (%rsi), %r8
111
+ add $8, %rsi
112
+ mov (%rdx), %rax
113
+ add $8, %rdx
114
+ add %rdi, %rax
115
+ xor %r8, (%rax)
116
+ KeccakP1600_AddBytes_LaneAlignedCheck:
117
+ sub $8, %rcx
118
+ jnc KeccakP1600_AddBytes_LaneAlignedLoop
119
+ KeccakP1600_AddBytes_LastIncompleteLane:
120
+ add $8, %rcx
121
+ jz KeccakP1600_AddBytes_Exit
122
+ mov (%rdx), %rax
123
+ add %rdi, %rax
124
+ KeccakP1600_AddBytes_LastIncompleteLaneLoop:
125
+ mov (%rsi), %r8b
126
+ inc %rsi
127
+ xor %r8b, (%rax)
128
+ inc %rax
129
+ dec %rcx
130
+ jnz KeccakP1600_AddBytes_LastIncompleteLaneLoop
131
+ KeccakP1600_AddBytes_Exit:
132
+ ret
133
+ .ifndef old_gas_syntax
134
+ .size KeccakP1600_AddBytes,.-KeccakP1600_AddBytes
135
+ .endif
136
+
137
+ # -----------------------------------------------------------------------------
138
+ #
139
+ # void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
140
+ # %rdi %rsi %rdx %rcx
141
+ #
142
+ .globl KeccakP1600_OverwriteBytes
143
+ .globl _KeccakP1600_OverwriteBytes
144
+ .ifndef old_gas_syntax
145
+ .type KeccakP1600_OverwriteBytes,@function
146
+ .endif
147
+ KeccakP1600_OverwriteBytes:
148
+ _KeccakP1600_OverwriteBytes:
149
+ .balign 32
150
+ cmp $0, %rcx
151
+ jz KeccakP1600_OverwriteBytes_Exit
152
+ mov %rdx, %rax # rax offset in lane
153
+ and $0xFFFFFFF8, %edx # rdx pointer into state index mapper
154
+ lea mapState(%rip), %r9
155
+ add %r9, %rdx
156
+ and $7, %rax
157
+ jz KeccakP1600_OverwriteBytes_LaneAlignedCheck
158
+ mov $8, %r9 # r9 is (max) length of incomplete lane
159
+ sub %rax, %r9
160
+ cmp %rcx, %r9
161
+ cmovae %rcx, %r9
162
+ sub %r9, %rcx # length -= length of incomplete lane
163
+ add (%rdx), %rax # rax = pointer to state lane
164
+ add $8, %rdx
165
+ add %rdi, %rax
166
+ KeccakP1600_OverwriteBytes_NotAlignedLoop:
167
+ mov (%rsi), %r8b
168
+ inc %rsi
169
+ mov %r8b, (%rax)
170
+ inc %rax
171
+ dec %r9
172
+ jnz KeccakP1600_OverwriteBytes_NotAlignedLoop
173
+ jmp KeccakP1600_OverwriteBytes_LaneAlignedCheck
174
+ KeccakP1600_OverwriteBytes_LaneAlignedLoop:
175
+ mov (%rsi), %r8
176
+ add $8, %rsi
177
+ mov (%rdx), %rax
178
+ add $8, %rdx
179
+ add %rdi, %rax
180
+ mov %r8, (%rax)
181
+ KeccakP1600_OverwriteBytes_LaneAlignedCheck:
182
+ sub $8, %rcx
183
+ jnc KeccakP1600_OverwriteBytes_LaneAlignedLoop
184
+ KeccakP1600_OverwriteBytes_LastIncompleteLane:
185
+ add $8, %rcx
186
+ jz KeccakP1600_OverwriteBytes_Exit
187
+ mov (%rdx), %rax
188
+ add %rdi, %rax
189
+ KeccakP1600_OverwriteBytes_LastIncompleteLaneLoop:
190
+ mov (%rsi), %r8b
191
+ inc %rsi
192
+ mov %r8b, (%rax)
193
+ inc %rax
194
+ dec %rcx
195
+ jnz KeccakP1600_OverwriteBytes_LastIncompleteLaneLoop
196
+ KeccakP1600_OverwriteBytes_Exit:
197
+ ret
198
+ .ifndef old_gas_syntax
199
+ .size KeccakP1600_OverwriteBytes,.-KeccakP1600_OverwriteBytes
200
+ .endif
201
+
202
+ # -----------------------------------------------------------------------------
203
+ #
204
+ # void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount);
205
+ # %rdi %rsi
206
+ #
207
+ .globl KeccakP1600_OverwriteWithZeroes
208
+ .globl _KeccakP1600_OverwriteWithZeroes
209
+ .ifndef old_gas_syntax
210
+ .type KeccakP1600_OverwriteWithZeroes,@function
211
+ .endif
212
+ KeccakP1600_OverwriteWithZeroes:
213
+ _KeccakP1600_OverwriteWithZeroes:
214
+ .balign 32
215
+ cmp $0, %rsi
216
+ jz KeccakP1600_OverwriteWithZeroes_Exit
217
+ lea mapState(%rip), %rdx # rdx pointer into state index mapper
218
+ jmp KeccakP1600_OverwriteWithZeroes_LaneAlignedCheck
219
+ KeccakP1600_OverwriteWithZeroes_LaneAlignedLoop:
220
+ mov (%rdx), %rax
221
+ add $8, %rdx
222
+ add %rdi, %rax
223
+ movq $0, (%rax)
224
+ KeccakP1600_OverwriteWithZeroes_LaneAlignedCheck:
225
+ sub $8, %rsi
226
+ jnc KeccakP1600_OverwriteWithZeroes_LaneAlignedLoop
227
+ KeccakP1600_OverwriteWithZeroes_LastIncompleteLane:
228
+ add $8, %rsi
229
+ jz KeccakP1600_OverwriteWithZeroes_Exit
230
+ mov (%rdx), %rax
231
+ add %rdi, %rax
232
+ KeccakP1600_OverwriteWithZeroes_LastIncompleteLaneLoop:
233
+ movb $0, (%rax)
234
+ inc %rax
235
+ dec %rsi
236
+ jnz KeccakP1600_OverwriteWithZeroes_LastIncompleteLaneLoop
237
+ KeccakP1600_OverwriteWithZeroes_Exit:
238
+ ret
239
+ .ifndef old_gas_syntax
240
+ .size KeccakP1600_OverwriteWithZeroes,.-KeccakP1600_OverwriteWithZeroes
241
+ .endif
242
+
243
+ # -----------------------------------------------------------------------------
244
+ #
245
+ # void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length);
246
+ # %rdi %rsi %rdx %rcx
247
+ #
248
+ .globl KeccakP1600_ExtractBytes
249
+ .globl _KeccakP1600_ExtractBytes
250
+ .ifndef old_gas_syntax
251
+ .type KeccakP1600_ExtractBytes,@function
252
+ .endif
253
+ KeccakP1600_ExtractBytes:
254
+ _KeccakP1600_ExtractBytes:
255
+ .balign 32
256
+ push %rbx
257
+ cmp $0, %rcx
258
+ jz KeccakP1600_ExtractBytes_Exit
259
+ mov %rdx, %rax # rax offset in lane
260
+ and $0xFFFFFFF8, %edx # rdx pointer into state index mapper
261
+ lea mapState(%rip), %r9
262
+ add %r9, %rdx
263
+ and $7, %rax
264
+ jz KeccakP1600_ExtractBytes_LaneAlignedCheck
265
+ mov $8, %rbx # rbx is (max) length of incomplete lane
266
+ sub %rax, %rbx
267
+ cmp %rcx, %rbx
268
+ cmovae %rcx, %rbx
269
+ sub %rbx, %rcx # length -= length of incomplete lane
270
+ mov (%rdx), %r9
271
+ add $8, %rdx
272
+ add %rdi, %r9
273
+ add %rax, %r9
274
+ KeccakP1600_ExtractBytes_NotAlignedLoop:
275
+ mov (%r9), %r8b
276
+ inc %r9
277
+ mov %r8b, (%rsi)
278
+ inc %rsi
279
+ dec %rbx
280
+ jnz KeccakP1600_ExtractBytes_NotAlignedLoop
281
+ jmp KeccakP1600_ExtractBytes_LaneAlignedCheck
282
+ KeccakP1600_ExtractBytes_LaneAlignedLoop:
283
+ mov (%rdx), %rax
284
+ add $8, %rdx
285
+ add %rdi, %rax
286
+ mov (%rax), %r8
287
+ mov %r8, (%rsi)
288
+ add $8, %rsi
289
+ KeccakP1600_ExtractBytes_LaneAlignedCheck:
290
+ sub $8, %rcx
291
+ jnc KeccakP1600_ExtractBytes_LaneAlignedLoop
292
+ KeccakP1600_ExtractBytes_LastIncompleteLane:
293
+ add $8, %rcx
294
+ jz KeccakP1600_ExtractBytes_Exit
295
+ mov (%rdx), %rax
296
+ add %rdi, %rax
297
+ mov (%rax), %r8
298
+ KeccakP1600_ExtractBytes_LastIncompleteLaneLoop:
299
+ mov %r8b, (%rsi)
300
+ shr $8, %r8
301
+ inc %rsi
302
+ dec %rcx
303
+ jnz KeccakP1600_ExtractBytes_LastIncompleteLaneLoop
304
+ KeccakP1600_ExtractBytes_Exit:
305
+ pop %rbx
306
+ ret
307
+ .ifndef old_gas_syntax
308
+ .size KeccakP1600_ExtractBytes,.-KeccakP1600_ExtractBytes
309
+ .endif
310
+
311
+ # -----------------------------------------------------------------------------
312
+ #
313
+ # void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length);
314
+ # %rdi %rsi %rdx %rcx %r8
315
+ #
316
+ .globl KeccakP1600_ExtractAndAddBytes
317
+ .globl _KeccakP1600_ExtractAndAddBytes
318
+ .ifndef old_gas_syntax
319
+ .type KeccakP1600_ExtractAndAddBytes,@function
320
+ .endif
321
+ KeccakP1600_ExtractAndAddBytes:
322
+ _KeccakP1600_ExtractAndAddBytes:
323
+ .balign 32
324
+ push %rbx
325
+ push %r10
326
+ cmp $0, %r8
327
+ jz KeccakP1600_ExtractAndAddBytes_Exit
328
+ mov %rcx, %rax # rax offset in lane
329
+ and $0xFFFFFFF8, %ecx # rcx pointer into state index mapper
330
+ lea mapState(%rip), %r9
331
+ add %r9, %rcx
332
+ and $7, %rax
333
+ jz KeccakP1600_ExtractAndAddBytes_LaneAlignedCheck
334
+ mov $8, %rbx # rbx is (max) length of incomplete lane
335
+ sub %rax, %rbx
336
+ cmp %r8, %rbx
337
+ cmovae %r8, %rbx
338
+ sub %rbx, %r8 # length -= length of incomplete lane
339
+ mov (%rcx), %r9
340
+ add $8, %rcx
341
+ add %rdi, %r9
342
+ add %rax, %r9
343
+ KeccakP1600_ExtractAndAddBytes_NotAlignedLoop:
344
+ mov (%r9), %r10b
345
+ inc %r9
346
+ xor (%rsi), %r10b
347
+ inc %rsi
348
+ mov %r10b, (%rdx)
349
+ inc %rdx
350
+ dec %rbx
351
+ jnz KeccakP1600_ExtractAndAddBytes_NotAlignedLoop
352
+ jmp KeccakP1600_ExtractAndAddBytes_LaneAlignedCheck
353
+ KeccakP1600_ExtractAndAddBytes_LaneAlignedLoop:
354
+ mov (%rcx), %rax
355
+ add $8, %rcx
356
+ add %rdi, %rax
357
+ mov (%rax), %r10
358
+ xor (%rsi), %r10
359
+ add $8, %rsi
360
+ mov %r10, (%rdx)
361
+ add $8, %rdx
362
+ KeccakP1600_ExtractAndAddBytes_LaneAlignedCheck:
363
+ sub $8, %r8
364
+ jnc KeccakP1600_ExtractAndAddBytes_LaneAlignedLoop
365
+ KeccakP1600_ExtractAndAddBytes_LastIncompleteLane:
366
+ add $8, %r8
367
+ jz KeccakP1600_ExtractAndAddBytes_Exit
368
+ mov (%rcx), %rax
369
+ add %rdi, %rax
370
+ mov (%rax), %r10
371
+ KeccakP1600_ExtractAndAddBytes_LastIncompleteLaneLoop:
372
+ xor (%rsi), %r10b
373
+ inc %rsi
374
+ mov %r10b, (%rdx)
375
+ inc %rdx
376
+ shr $8, %r10
377
+ dec %r8
378
+ jnz KeccakP1600_ExtractAndAddBytes_LastIncompleteLaneLoop
379
+ KeccakP1600_ExtractAndAddBytes_Exit:
380
+ pop %r10
381
+ pop %rbx
382
+ ret
383
+ .ifndef old_gas_syntax
384
+ .size KeccakP1600_ExtractAndAddBytes,.-KeccakP1600_ExtractAndAddBytes
385
+ .endif
386
+
387
+ # -----------------------------------------------------------------------------
388
+ #
389
+ # internal
390
+ #
391
+ .ifndef old_gas_syntax
392
+ .type __KeccakF1600,@function
393
+ .endif
394
+ .balign 32
395
+ __KeccakF1600:
396
+ .Loop_avx2:
397
+ ######################################### Theta
398
+ vpshufd $0b01001110,%ymm2,%ymm13
399
+ vpxor %ymm3,%ymm5,%ymm12
400
+ vpxor %ymm6,%ymm4,%ymm9
401
+ vpxor %ymm1,%ymm12,%ymm12
402
+ vpxor %ymm9,%ymm12,%ymm12 # C[1..4]
403
+
404
+ vpermq $0b10010011,%ymm12,%ymm11
405
+ vpxor %ymm2,%ymm13,%ymm13
406
+ vpermq $0b01001110,%ymm13,%ymm7
407
+
408
+ vpsrlq $63,%ymm12,%ymm8
409
+ vpaddq %ymm12,%ymm12,%ymm9
410
+ vpor %ymm9,%ymm8,%ymm8 # ROL64(C[1..4],1)
411
+
412
+ vpermq $0b00111001,%ymm8,%ymm15
413
+ vpxor %ymm11,%ymm8,%ymm14
414
+ vpermq $0b00000000,%ymm14,%ymm14 # D[0..0] = ROL64(C[1],1) ^ C[4]
415
+
416
+ vpxor %ymm0,%ymm13,%ymm13
417
+ vpxor %ymm7,%ymm13,%ymm13 # C[0..0]
418
+
419
+ vpsrlq $63,%ymm13,%ymm7
420
+ vpaddq %ymm13,%ymm13,%ymm8
421
+ vpor %ymm7,%ymm8,%ymm8 # ROL64(C[0..0],1)
422
+
423
+ vpxor %ymm14,%ymm2,%ymm2 # ^= D[0..0]
424
+ vpxor %ymm14,%ymm0,%ymm0 # ^= D[0..0]
425
+
426
+ vpblendd $0b11000000,%ymm8,%ymm15,%ymm15
427
+ vpblendd $0b00000011,%ymm13,%ymm11,%ymm11
428
+ vpxor %ymm11,%ymm15,%ymm15 # D[1..4] = ROL64(C[2..4,0),1) ^ C[0..3]
429
+
430
+ ######################################### Rho + Pi + pre-Chi shuffle
431
+ vpsllvq 0*32-96(%r8),%ymm2,%ymm10
432
+ vpsrlvq 0*32-96(%r9),%ymm2,%ymm2
433
+ vpor %ymm10,%ymm2,%ymm2
434
+
435
+ vpxor %ymm15,%ymm3,%ymm3 # ^= D[1..4] from Theta
436
+ vpsllvq 2*32-96(%r8),%ymm3,%ymm11
437
+ vpsrlvq 2*32-96(%r9),%ymm3,%ymm3
438
+ vpor %ymm11,%ymm3,%ymm3
439
+
440
+ vpxor %ymm15,%ymm4,%ymm4 # ^= D[1..4] from Theta
441
+ vpsllvq 3*32-96(%r8),%ymm4,%ymm12
442
+ vpsrlvq 3*32-96(%r9),%ymm4,%ymm4
443
+ vpor %ymm12,%ymm4,%ymm4
444
+
445
+ vpxor %ymm15,%ymm5,%ymm5 # ^= D[1..4] from Theta
446
+ vpsllvq 4*32-96(%r8),%ymm5,%ymm13
447
+ vpsrlvq 4*32-96(%r9),%ymm5,%ymm5
448
+ vpor %ymm13,%ymm5,%ymm5
449
+
450
+ vpxor %ymm15,%ymm6,%ymm6 # ^= D[1..4] from Theta
451
+ vpermq $0b10001101,%ymm2,%ymm10 # %ymm2 -> future %ymm3
452
+ vpermq $0b10001101,%ymm3,%ymm11 # %ymm3 -> future %ymm4
453
+ vpsllvq 5*32-96(%r8),%ymm6,%ymm14
454
+ vpsrlvq 5*32-96(%r9),%ymm6,%ymm8
455
+ vpor %ymm14,%ymm8,%ymm8 # %ymm6 -> future %ymm1
456
+
457
+ vpxor %ymm15,%ymm1,%ymm1 # ^= D[1..4] from Theta
458
+ vpermq $0b00011011,%ymm4,%ymm12 # %ymm4 -> future %ymm5
459
+ vpermq $0b01110010,%ymm5,%ymm13 # %ymm5 -> future %ymm6
460
+ vpsllvq 1*32-96(%r8),%ymm1,%ymm15
461
+ vpsrlvq 1*32-96(%r9),%ymm1,%ymm9
462
+ vpor %ymm15,%ymm9,%ymm9 # %ymm1 -> future %ymm2
463
+
464
+ ######################################### Chi
465
+ vpsrldq $8,%ymm8,%ymm14
466
+ vpandn %ymm14,%ymm8,%ymm7 # tgting [0][0] [0][0] [0][0] [0][0]
467
+
468
+ vpblendd $0b00001100,%ymm13,%ymm9,%ymm3 # [4][4] [2][0]
469
+ vpblendd $0b00001100,%ymm9,%ymm11,%ymm15 # [4][0] [2][1]
470
+ vpblendd $0b00001100,%ymm11,%ymm10,%ymm5 # [4][2] [2][4]
471
+ vpblendd $0b00001100,%ymm10,%ymm9,%ymm14 # [4][3] [2][0]
472
+ vpblendd $0b00110000,%ymm11,%ymm3,%ymm3 # [1][3] [4][4] [2][0]
473
+ vpblendd $0b00110000,%ymm12,%ymm15,%ymm15 # [1][4] [4][0] [2][1]
474
+ vpblendd $0b00110000,%ymm9,%ymm5,%ymm5 # [1][0] [4][2] [2][4]
475
+ vpblendd $0b00110000,%ymm13,%ymm14,%ymm14 # [1][1] [4][3] [2][0]
476
+ vpblendd $0b11000000,%ymm12,%ymm3,%ymm3 # [3][2] [1][3] [4][4] [2][0]
477
+ vpblendd $0b11000000,%ymm13,%ymm15,%ymm15 # [3][3] [1][4] [4][0] [2][1]
478
+ vpblendd $0b11000000,%ymm13,%ymm5,%ymm5 # [3][3] [1][0] [4][2] [2][4]
479
+ vpblendd $0b11000000,%ymm11,%ymm14,%ymm14 # [3][4] [1][1] [4][3] [2][0]
480
+ vpandn %ymm15,%ymm3,%ymm3 # tgting [3][1] [1][2] [4][3] [2][4]
481
+ vpandn %ymm14,%ymm5,%ymm5 # tgting [3][2] [1][4] [4][1] [2][3]
482
+
483
+ vpblendd $0b00001100,%ymm9,%ymm12,%ymm6 # [4][0] [2][3]
484
+ vpblendd $0b00001100,%ymm12,%ymm10,%ymm15 # [4][1] [2][4]
485
+ vpxor %ymm10,%ymm3,%ymm3
486
+ vpblendd $0b00110000,%ymm10,%ymm6,%ymm6 # [1][2] [4][0] [2][3]
487
+ vpblendd $0b00110000,%ymm11,%ymm15,%ymm15 # [1][3] [4][1] [2][4]
488
+ vpxor %ymm12,%ymm5,%ymm5
489
+ vpblendd $0b11000000,%ymm11,%ymm6,%ymm6 # [3][4] [1][2] [4][0] [2][3]
490
+ vpblendd $0b11000000,%ymm9,%ymm15,%ymm15 # [3][0] [1][3] [4][1] [2][4]
491
+ vpandn %ymm15,%ymm6,%ymm6 # tgting [3][3] [1][1] [4][4] [2][2]
492
+ vpxor %ymm13,%ymm6,%ymm6
493
+
494
+ vpermq $0b00011110,%ymm8,%ymm4 # [0][1] [0][2] [0][4] [0][3]
495
+ vpblendd $0b00110000,%ymm0,%ymm4,%ymm15 # [0][1] [0][0] [0][4] [0][3]
496
+ vpermq $0b00111001,%ymm8,%ymm1 # [0][1] [0][4] [0][3] [0][2]
497
+ vpblendd $0b11000000,%ymm0,%ymm1,%ymm1 # [0][0] [0][4] [0][3] [0][2]
498
+ vpandn %ymm15,%ymm1,%ymm1 # tgting [0][4] [0][3] [0][2] [0][1]
499
+
500
+ vpblendd $0b00001100,%ymm12,%ymm11,%ymm2 # [4][1] [2][1]
501
+ vpblendd $0b00001100,%ymm11,%ymm13,%ymm14 # [4][2] [2][2]
502
+ vpblendd $0b00110000,%ymm13,%ymm2,%ymm2 # [1][1] [4][1] [2][1]
503
+ vpblendd $0b00110000,%ymm10,%ymm14,%ymm14 # [1][2] [4][2] [2][2]
504
+ vpblendd $0b11000000,%ymm10,%ymm2,%ymm2 # [3][1] [1][1] [4][1] [2][1]
505
+ vpblendd $0b11000000,%ymm12,%ymm14,%ymm14 # [3][2] [1][2] [4][2] [2][2]
506
+ vpandn %ymm14,%ymm2,%ymm2 # tgting [3][0] [1][0] [4][0] [2][0]
507
+ vpxor %ymm9,%ymm2,%ymm2
508
+
509
+ vpermq $0b00000000,%ymm7,%ymm7 # [0][0] [0][0] [0][0] [0][0]
510
+ vpermq $0b00011011,%ymm3,%ymm3 # post-Chi shuffle
511
+ vpermq $0b10001101,%ymm5,%ymm5
512
+ vpermq $0b01110010,%ymm6,%ymm6
513
+
514
+ vpblendd $0b00001100,%ymm10,%ymm13,%ymm4 # [4][3] [2][2]
515
+ vpblendd $0b00001100,%ymm13,%ymm12,%ymm14 # [4][4] [2][3]
516
+ vpblendd $0b00110000,%ymm12,%ymm4,%ymm4 # [1][4] [4][3] [2][2]
517
+ vpblendd $0b00110000,%ymm9,%ymm14,%ymm14 # [1][0] [4][4] [2][3]
518
+ vpblendd $0b11000000,%ymm9,%ymm4,%ymm4 # [3][0] [1][4] [4][3] [2][2]
519
+ vpblendd $0b11000000,%ymm10,%ymm14,%ymm14 # [3][1] [1][0] [4][4] [2][3]
520
+ vpandn %ymm14,%ymm4,%ymm4 # tgting [3][4] [1][3] [4][2] [2][1]
521
+
522
+ vpxor %ymm7,%ymm0,%ymm0
523
+ vpxor %ymm8,%ymm1,%ymm1
524
+ vpxor %ymm11,%ymm4,%ymm4
525
+
526
+ ######################################### Iota
527
+ vpxor (%r10),%ymm0,%ymm0
528
+ lea 32(%r10),%r10
529
+
530
+ dec %eax
531
+ jnz .Loop_avx2
532
+ ret
533
+ .ifndef old_gas_syntax
534
+ .size __KeccakF1600,.-__KeccakF1600
535
+ .endif
536
+
537
+ # -----------------------------------------------------------------------------
538
+ #
539
+ # void KeccakP1600_Permute_24rounds(void *state);
540
+ # %rdi
541
+ #
542
+ .globl KeccakP1600_Permute_24rounds
543
+ .globl _KeccakP1600_Permute_24rounds
544
+ .ifndef old_gas_syntax
545
+ .type KeccakP1600_Permute_24rounds,@function
546
+ .endif
547
+ KeccakP1600_Permute_24rounds:
548
+ _KeccakP1600_Permute_24rounds:
549
+ .balign 32
550
+ lea rhotates_left+96(%rip),%r8
551
+ lea rhotates_right+96(%rip),%r9
552
+ lea iotas(%rip),%r10
553
+ mov $24,%eax
554
+ lea 96(%rdi),%rdi
555
+ vzeroupper
556
+ vpbroadcastq -96(%rdi),%ymm0 # load A[5][5]
557
+ vmovdqu 8+32*0-96(%rdi),%ymm1
558
+ vmovdqu 8+32*1-96(%rdi),%ymm2
559
+ vmovdqu 8+32*2-96(%rdi),%ymm3
560
+ vmovdqu 8+32*3-96(%rdi),%ymm4
561
+ vmovdqu 8+32*4-96(%rdi),%ymm5
562
+ vmovdqu 8+32*5-96(%rdi),%ymm6
563
+ call __KeccakF1600
564
+ vmovq %xmm0,-96(%rdi)
565
+ vmovdqu %ymm1,8+32*0-96(%rdi)
566
+ vmovdqu %ymm2,8+32*1-96(%rdi)
567
+ vmovdqu %ymm3,8+32*2-96(%rdi)
568
+ vmovdqu %ymm4,8+32*3-96(%rdi)
569
+ vmovdqu %ymm5,8+32*4-96(%rdi)
570
+ vmovdqu %ymm6,8+32*5-96(%rdi)
571
+ vzeroupper
572
+ ret
573
+ .ifndef old_gas_syntax
574
+ .size KeccakP1600_Permute_24rounds,.-KeccakP1600_Permute_24rounds
575
+ .endif
576
+
577
+ # -----------------------------------------------------------------------------
578
+ #
579
+ # void KeccakP1600_Permute_12rounds(void *state);
580
+ # %rdi
581
+ #
582
+ .globl KeccakP1600_Permute_12rounds
583
+ .globl _KeccakP1600_Permute_12rounds
584
+ .ifndef old_gas_syntax
585
+ .type KeccakP1600_Permute_12rounds,@function
586
+ .endif
587
+ KeccakP1600_Permute_12rounds:
588
+ _KeccakP1600_Permute_12rounds:
589
+ .balign 32
590
+ lea rhotates_left+96(%rip),%r8
591
+ lea rhotates_right+96(%rip),%r9
592
+ lea iotas+12*4*8(%rip),%r10
593
+ mov $12,%eax
594
+ lea 96(%rdi),%rdi
595
+ vzeroupper
596
+ vpbroadcastq -96(%rdi),%ymm0 # load A[5][5]
597
+ vmovdqu 8+32*0-96(%rdi),%ymm1
598
+ vmovdqu 8+32*1-96(%rdi),%ymm2
599
+ vmovdqu 8+32*2-96(%rdi),%ymm3
600
+ vmovdqu 8+32*3-96(%rdi),%ymm4
601
+ vmovdqu 8+32*4-96(%rdi),%ymm5
602
+ vmovdqu 8+32*5-96(%rdi),%ymm6
603
+ call __KeccakF1600
604
+ vmovq %xmm0,-96(%rdi)
605
+ vmovdqu %ymm1,8+32*0-96(%rdi)
606
+ vmovdqu %ymm2,8+32*1-96(%rdi)
607
+ vmovdqu %ymm3,8+32*2-96(%rdi)
608
+ vmovdqu %ymm4,8+32*3-96(%rdi)
609
+ vmovdqu %ymm5,8+32*4-96(%rdi)
610
+ vmovdqu %ymm6,8+32*5-96(%rdi)
611
+ vzeroupper
612
+ ret
613
+ .ifndef old_gas_syntax
614
+ .size KeccakP1600_Permute_12rounds,.-KeccakP1600_Permute_12rounds
615
+ .endif
616
+
617
+ # -----------------------------------------------------------------------------
618
+ #
619
+ # void KeccakP1600_Permute_Nrounds(void *state, unsigned int nrounds);
620
+ # %rdi %rsi
621
+ #
622
+ .globl KeccakP1600_Permute_Nrounds
623
+ .globl _KeccakP1600_Permute_Nrounds
624
+ .ifndef old_gas_syntax
625
+ .type KeccakP1600_Permute_Nrounds,@function
626
+ .endif
627
+ KeccakP1600_Permute_Nrounds:
628
+ _KeccakP1600_Permute_Nrounds:
629
+ .balign 32
630
+ lea rhotates_left+96(%rip),%r8
631
+ lea rhotates_right+96(%rip),%r9
632
+ lea iotas+24*4*8(%rip),%r10
633
+ mov %rsi,%rax
634
+ shl $2+3,%rsi
635
+ sub %rsi, %r10
636
+ lea 96(%rdi),%rdi
637
+ vzeroupper
638
+ vpbroadcastq -96(%rdi),%ymm0 # load A[5][5]
639
+ vmovdqu 8+32*0-96(%rdi),%ymm1
640
+ vmovdqu 8+32*1-96(%rdi),%ymm2
641
+ vmovdqu 8+32*2-96(%rdi),%ymm3
642
+ vmovdqu 8+32*3-96(%rdi),%ymm4
643
+ vmovdqu 8+32*4-96(%rdi),%ymm5
644
+ vmovdqu 8+32*5-96(%rdi),%ymm6
645
+ call __KeccakF1600
646
+ vmovq %xmm0,-96(%rdi)
647
+ vmovdqu %ymm1,8+32*0-96(%rdi)
648
+ vmovdqu %ymm2,8+32*1-96(%rdi)
649
+ vmovdqu %ymm3,8+32*2-96(%rdi)
650
+ vmovdqu %ymm4,8+32*3-96(%rdi)
651
+ vmovdqu %ymm5,8+32*4-96(%rdi)
652
+ vmovdqu %ymm6,8+32*5-96(%rdi)
653
+ vzeroupper
654
+ ret
655
+ .ifndef old_gas_syntax
656
+ .size KeccakP1600_Permute_Nrounds,.-KeccakP1600_Permute_Nrounds
657
+ .endif
658
+
659
+ # -----------------------------------------------------------------------------
660
+ #
661
+ # size_t KeccakF1600_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen);
662
+ # %rdi %rsi %rdx %rcx
663
+ #
664
+ .globl KeccakF1600_FastLoop_Absorb
665
+ .globl _KeccakF1600_FastLoop_Absorb
666
+ .ifndef old_gas_syntax
667
+ .type KeccakF1600_FastLoop_Absorb,@function
668
+ .endif
669
+ KeccakF1600_FastLoop_Absorb:
670
+ _KeccakF1600_FastLoop_Absorb:
671
+ .balign 32
672
+ push %rbx
673
+ push %r10
674
+ shr $3, %rcx # rcx = data length in lanes
675
+ mov %rdx, %rbx # rbx = initial data pointer
676
+ cmp %rsi, %rcx
677
+ jb KeccakF1600_FastLoop_Absorb_Exit
678
+ vzeroupper
679
+ cmp $21, %rsi
680
+ jnz KeccakF1600_FastLoop_Absorb_Not21Lanes
681
+ sub $21, %rcx
682
+ lea rhotates_left+96(%rip),%r8
683
+ lea rhotates_right+96(%rip),%r9
684
+ lea 96(%rdi),%rdi
685
+ vpbroadcastq -96(%rdi),%ymm0 # load A[5][5]
686
+ vmovdqu 8+32*0-96(%rdi),%ymm1
687
+ vmovdqu 8+32*1-96(%rdi),%ymm2
688
+ vmovdqu 8+32*2-96(%rdi),%ymm3
689
+ vmovdqu 8+32*3-96(%rdi),%ymm4
690
+ vmovdqu 8+32*4-96(%rdi),%ymm5
691
+ vmovdqu 8+32*5-96(%rdi),%ymm6
692
+ KeccakF1600_FastLoop_Absorb_Loop21Lanes:
693
+ vpbroadcastq (%rdx),%ymm7
694
+ vmovdqu 8(%rdx),%ymm8
695
+
696
+ vmovdqa map2(%rip), %xmm15
697
+ vpcmpeqd %ymm14, %ymm14, %ymm14
698
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm9
699
+
700
+ vmovdqa mask3_21(%rip), %ymm14
701
+ vpxor %ymm10, %ymm10, %ymm10
702
+ vmovdqa map3(%rip), %xmm15
703
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm10
704
+
705
+ vmovdqa mask4_21(%rip), %ymm14
706
+ vpxor %ymm11, %ymm11, %ymm11
707
+ vmovdqa map4(%rip), %xmm15
708
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm11
709
+
710
+ vmovdqa mask5_21(%rip), %ymm14
711
+ vpxor %ymm12, %ymm12, %ymm12
712
+ vmovdqa map5(%rip), %xmm15
713
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm12
714
+
715
+ vmovdqa mask6_21(%rip), %ymm14
716
+ vpxor %ymm13, %ymm13, %ymm13
717
+ vmovdqa map6(%rip), %xmm15
718
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm13
719
+
720
+ vpxor %ymm7,%ymm0,%ymm0
721
+ vpxor %ymm8,%ymm1,%ymm1
722
+ vpxor %ymm9,%ymm2,%ymm2
723
+ vpxor %ymm10,%ymm3,%ymm3
724
+ vpxor %ymm11,%ymm4,%ymm4
725
+ vpxor %ymm12,%ymm5,%ymm5
726
+ vpxor %ymm13,%ymm6,%ymm6
727
+ add $21*8, %rdx
728
+ lea iotas(%rip),%r10
729
+ mov $24,%eax
730
+ call __KeccakF1600
731
+ sub $21, %rcx
732
+ jnc KeccakF1600_FastLoop_Absorb_Loop21Lanes
733
+ KeccakF1600_FastLoop_Absorb_SaveAndExit:
734
+ vmovq %xmm0,-96(%rdi)
735
+ vmovdqu %ymm1,8+32*0-96(%rdi)
736
+ vmovdqu %ymm2,8+32*1-96(%rdi)
737
+ vmovdqu %ymm3,8+32*2-96(%rdi)
738
+ vmovdqu %ymm4,8+32*3-96(%rdi)
739
+ vmovdqu %ymm5,8+32*4-96(%rdi)
740
+ vmovdqu %ymm6,8+32*5-96(%rdi)
741
+ KeccakF1600_FastLoop_Absorb_Exit:
742
+ vzeroupper
743
+ mov %rdx, %rax # return number of bytes processed
744
+ sub %rbx, %rax
745
+ pop %r10
746
+ pop %rbx
747
+ ret
748
+ KeccakF1600_FastLoop_Absorb_Not21Lanes:
749
+ cmp $17, %rsi
750
+ jnz KeccakF1600_FastLoop_Absorb_Not17Lanes
751
+ sub $17, %rcx
752
+ lea rhotates_left+96(%rip),%r8
753
+ lea rhotates_right+96(%rip),%r9
754
+ lea 96(%rdi),%rdi
755
+ vpbroadcastq -96(%rdi),%ymm0 # load A[5][5]
756
+ vmovdqu 8+32*0-96(%rdi),%ymm1
757
+ vmovdqu 8+32*1-96(%rdi),%ymm2
758
+ vmovdqu 8+32*2-96(%rdi),%ymm3
759
+ vmovdqu 8+32*3-96(%rdi),%ymm4
760
+ vmovdqu 8+32*4-96(%rdi),%ymm5
761
+ vmovdqu 8+32*5-96(%rdi),%ymm6
762
+ KeccakF1600_FastLoop_Absorb_Loop17Lanes:
763
+ vpbroadcastq (%rdx),%ymm7
764
+ vmovdqu 8(%rdx),%ymm8
765
+
766
+ vmovdqa mask2_17(%rip), %ymm14
767
+ vpxor %ymm9, %ymm9, %ymm9
768
+ vmovdqa map2(%rip), %xmm15
769
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm9
770
+
771
+ vmovdqa mask3_17(%rip), %ymm14
772
+ vpxor %ymm10, %ymm10, %ymm10
773
+ vmovdqa map3(%rip), %xmm15
774
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm10
775
+
776
+ vmovdqa mask4_17(%rip), %ymm14
777
+ vpxor %ymm11, %ymm11, %ymm11
778
+ vmovdqa map4(%rip), %xmm15
779
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm11
780
+
781
+ vmovdqa mask5_17(%rip), %ymm14
782
+ vpxor %ymm12, %ymm12, %ymm12
783
+ vmovdqa map5(%rip), %xmm15
784
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm12
785
+
786
+ vmovdqa mask6_17(%rip), %ymm14
787
+ vpxor %ymm13, %ymm13, %ymm13
788
+ vmovdqa map6(%rip), %xmm15
789
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm13
790
+
791
+ vpxor %ymm7,%ymm0,%ymm0
792
+ vpxor %ymm8,%ymm1,%ymm1
793
+ vpxor %ymm9,%ymm2,%ymm2
794
+ vpxor %ymm10,%ymm3,%ymm3
795
+ vpxor %ymm11,%ymm4,%ymm4
796
+ vpxor %ymm12,%ymm5,%ymm5
797
+ vpxor %ymm13,%ymm6,%ymm6
798
+ add $17*8, %rdx
799
+ lea iotas(%rip),%r10
800
+ mov $24,%eax
801
+ call __KeccakF1600
802
+ sub $17, %rcx
803
+ jnc KeccakF1600_FastLoop_Absorb_Loop17Lanes
804
+ jmp KeccakF1600_FastLoop_Absorb_SaveAndExit
805
+ KeccakF1600_FastLoop_Absorb_Not17Lanes:
806
+ lea mapState(%rip), %r9
807
+ mov %rsi, %rax
808
+ KeccakF1600_FastLoop_Absorb_LanesAddLoop:
809
+ mov (%rdx), %r8
810
+ add $8, %rdx
811
+ mov (%r9), %r10
812
+ add $8, %r9
813
+ add %rdi, %r10
814
+ xor %r8, (%r10)
815
+ sub $1, %rax
816
+ jnz KeccakF1600_FastLoop_Absorb_LanesAddLoop
817
+ sub %rsi, %rcx
818
+ push %rdi
819
+ push %rsi
820
+ push %rdx
821
+ push %rcx
822
+ .ifdef no_plt
823
+ call KeccakP1600_Permute_24rounds
824
+ .else
825
+ call KeccakP1600_Permute_24rounds@PLT
826
+ .endif
827
+ pop %rcx
828
+ pop %rdx
829
+ pop %rsi
830
+ pop %rdi
831
+ cmp %rsi, %rcx
832
+ jae KeccakF1600_FastLoop_Absorb_Not17Lanes
833
+ jmp KeccakF1600_FastLoop_Absorb_Exit
834
+ .ifndef old_gas_syntax
835
+ .size KeccakF1600_FastLoop_Absorb,.-KeccakF1600_FastLoop_Absorb
836
+ .endif
837
+
838
+ # -----------------------------------------------------------------------------
839
+ #
840
+ # size_t KeccakP1600_12rounds_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen);
841
+ # %rdi %rsi %rdx %rcx
842
+ #
843
+ .globl KeccakP1600_12rounds_FastLoop_Absorb
844
+ .globl _KeccakP1600_12rounds_FastLoop_Absorb
845
+ .ifndef old_gas_syntax
846
+ .type KeccakP1600_12rounds_FastLoop_Absorb,@function
847
+ .endif
848
+ KeccakP1600_12rounds_FastLoop_Absorb:
849
+ _KeccakP1600_12rounds_FastLoop_Absorb:
850
+ .balign 32
851
+ push %rbx
852
+ push %r10
853
+ shr $3, %rcx # rcx = data length in lanes
854
+ mov %rdx, %rbx # rbx = initial data pointer
855
+ cmp %rsi, %rcx
856
+ jb KeccakP1600_12rounds_FastLoop_Absorb_Exit
857
+ vzeroupper
858
+ cmp $21, %rsi
859
+ jnz KeccakP1600_12rounds_FastLoop_Absorb_Not21Lanes
860
+ sub $21, %rcx
861
+ lea rhotates_left+96(%rip),%r8
862
+ lea rhotates_right+96(%rip),%r9
863
+ lea 96(%rdi),%rdi
864
+ vpbroadcastq -96(%rdi),%ymm0 # load A[5][5]
865
+ vmovdqu 8+32*0-96(%rdi),%ymm1
866
+ vmovdqu 8+32*1-96(%rdi),%ymm2
867
+ vmovdqu 8+32*2-96(%rdi),%ymm3
868
+ vmovdqu 8+32*3-96(%rdi),%ymm4
869
+ vmovdqu 8+32*4-96(%rdi),%ymm5
870
+ vmovdqu 8+32*5-96(%rdi),%ymm6
871
+ KeccakP1600_12rounds_FastLoop_Absorb_Loop21Lanes:
872
+ vpbroadcastq (%rdx),%ymm7
873
+ vmovdqu 8(%rdx),%ymm8
874
+
875
+ vmovdqa map2(%rip), %xmm15
876
+ vpcmpeqd %ymm14, %ymm14, %ymm14
877
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm9
878
+
879
+ vmovdqa mask3_21(%rip), %ymm14
880
+ vpxor %ymm10, %ymm10, %ymm10
881
+ vmovdqa map3(%rip), %xmm15
882
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm10
883
+
884
+ vmovdqa mask4_21(%rip), %ymm14
885
+ vpxor %ymm11, %ymm11, %ymm11
886
+ vmovdqa map4(%rip), %xmm15
887
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm11
888
+
889
+ vmovdqa mask5_21(%rip), %ymm14
890
+ vpxor %ymm12, %ymm12, %ymm12
891
+ vmovdqa map5(%rip), %xmm15
892
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm12
893
+
894
+ vmovdqa mask6_21(%rip), %ymm14
895
+ vpxor %ymm13, %ymm13, %ymm13
896
+ vmovdqa map6(%rip), %xmm15
897
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm13
898
+
899
+ vpxor %ymm7,%ymm0,%ymm0
900
+ vpxor %ymm8,%ymm1,%ymm1
901
+ vpxor %ymm9,%ymm2,%ymm2
902
+ vpxor %ymm10,%ymm3,%ymm3
903
+ vpxor %ymm11,%ymm4,%ymm4
904
+ vpxor %ymm12,%ymm5,%ymm5
905
+ vpxor %ymm13,%ymm6,%ymm6
906
+ add $21*8, %rdx
907
+ lea iotas+12*4*8(%rip),%r10
908
+ mov $12,%eax
909
+ call __KeccakF1600
910
+ sub $21, %rcx
911
+ jnc KeccakP1600_12rounds_FastLoop_Absorb_Loop21Lanes
912
+ KeccakP1600_12rounds_FastLoop_Absorb_SaveAndExit:
913
+ vmovq %xmm0,-96(%rdi)
914
+ vmovdqu %ymm1,8+32*0-96(%rdi)
915
+ vmovdqu %ymm2,8+32*1-96(%rdi)
916
+ vmovdqu %ymm3,8+32*2-96(%rdi)
917
+ vmovdqu %ymm4,8+32*3-96(%rdi)
918
+ vmovdqu %ymm5,8+32*4-96(%rdi)
919
+ vmovdqu %ymm6,8+32*5-96(%rdi)
920
+ KeccakP1600_12rounds_FastLoop_Absorb_Exit:
921
+ vzeroupper
922
+ mov %rdx, %rax # return number of bytes processed
923
+ sub %rbx, %rax
924
+ pop %r10
925
+ pop %rbx
926
+ ret
927
+ KeccakP1600_12rounds_FastLoop_Absorb_Not21Lanes:
928
+ cmp $17, %rsi
929
+ jnz KeccakP1600_12rounds_FastLoop_Absorb_Not17Lanes
930
+ sub $17, %rcx
931
+ lea rhotates_left+96(%rip),%r8
932
+ lea rhotates_right+96(%rip),%r9
933
+ lea 96(%rdi),%rdi
934
+ vpbroadcastq -96(%rdi),%ymm0 # load A[5][5]
935
+ vmovdqu 8+32*0-96(%rdi),%ymm1
936
+ vmovdqu 8+32*1-96(%rdi),%ymm2
937
+ vmovdqu 8+32*2-96(%rdi),%ymm3
938
+ vmovdqu 8+32*3-96(%rdi),%ymm4
939
+ vmovdqu 8+32*4-96(%rdi),%ymm5
940
+ vmovdqu 8+32*5-96(%rdi),%ymm6
941
+ KeccakP1600_12rounds_FastLoop_Absorb_Loop17Lanes:
942
+ vpbroadcastq (%rdx),%ymm7
943
+ vmovdqu 8(%rdx),%ymm8
944
+
945
+ vmovdqa mask2_17(%rip), %ymm14
946
+ vpxor %ymm9, %ymm9, %ymm9
947
+ vmovdqa map2(%rip), %xmm15
948
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm9
949
+
950
+ vmovdqa mask3_17(%rip), %ymm14
951
+ vpxor %ymm10, %ymm10, %ymm10
952
+ vmovdqa map3(%rip), %xmm15
953
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm10
954
+
955
+ vmovdqa mask4_17(%rip), %ymm14
956
+ vpxor %ymm11, %ymm11, %ymm11
957
+ vmovdqa map4(%rip), %xmm15
958
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm11
959
+
960
+ vmovdqa mask5_17(%rip), %ymm14
961
+ vpxor %ymm12, %ymm12, %ymm12
962
+ vmovdqa map5(%rip), %xmm15
963
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm12
964
+
965
+ vmovdqa mask6_17(%rip), %ymm14
966
+ vpxor %ymm13, %ymm13, %ymm13
967
+ vmovdqa map6(%rip), %xmm15
968
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm13
969
+
970
+ vpxor %ymm7,%ymm0,%ymm0
971
+ vpxor %ymm8,%ymm1,%ymm1
972
+ vpxor %ymm9,%ymm2,%ymm2
973
+ vpxor %ymm10,%ymm3,%ymm3
974
+ vpxor %ymm11,%ymm4,%ymm4
975
+ vpxor %ymm12,%ymm5,%ymm5
976
+ vpxor %ymm13,%ymm6,%ymm6
977
+ add $17*8, %rdx
978
+ lea iotas+12*4*8(%rip),%r10
979
+ mov $12,%eax
980
+ call __KeccakF1600
981
+ sub $17, %rcx
982
+ jnc KeccakP1600_12rounds_FastLoop_Absorb_Loop17Lanes
983
+ jmp KeccakP1600_12rounds_FastLoop_Absorb_SaveAndExit
984
+ KeccakP1600_12rounds_FastLoop_Absorb_Not17Lanes:
985
+ lea mapState(%rip), %r9
986
+ mov %rsi, %rax
987
+ KeccakP1600_12rounds_FastLoop_Absorb_LanesAddLoop:
988
+ mov (%rdx), %r8
989
+ add $8, %rdx
990
+ mov (%r9), %r10
991
+ add $8, %r9
992
+ add %rdi, %r10
993
+ xor %r8, (%r10)
994
+ sub $1, %rax
995
+ jnz KeccakP1600_12rounds_FastLoop_Absorb_LanesAddLoop
996
+ sub %rsi, %rcx
997
+ push %rdi
998
+ push %rsi
999
+ push %rdx
1000
+ push %rcx
1001
+ .ifdef no_plt
1002
+ call KeccakP1600_Permute_12rounds
1003
+ .else
1004
+ call KeccakP1600_Permute_12rounds@PLT
1005
+ .endif
1006
+ pop %rcx
1007
+ pop %rdx
1008
+ pop %rsi
1009
+ pop %rdi
1010
+ cmp %rsi, %rcx
1011
+ jae KeccakP1600_12rounds_FastLoop_Absorb_Not17Lanes
1012
+ jmp KeccakP1600_12rounds_FastLoop_Absorb_Exit
1013
+ .ifndef old_gas_syntax
1014
+ .size KeccakP1600_12rounds_FastLoop_Absorb,.-KeccakP1600_12rounds_FastLoop_Absorb
1015
+ .endif
1016
+
1017
+ .equ ALLON, 0xFFFFFFFFFFFFFFFF
1018
+
1019
+ .balign 64
1020
+ rhotates_left:
1021
+ .quad 3, 18, 36, 41 # [2][0] [4][0] [1][0] [3][0]
1022
+ .quad 1, 62, 28, 27 # [0][1] [0][2] [0][3] [0][4]
1023
+ .quad 45, 6, 56, 39 # [3][1] [1][2] [4][3] [2][4]
1024
+ .quad 10, 61, 55, 8 # [2][1] [4][2] [1][3] [3][4]
1025
+ .quad 2, 15, 25, 20 # [4][1] [3][2] [2][3] [1][4]
1026
+ .quad 44, 43, 21, 14 # [1][1] [2][2] [3][3] [4][4]
1027
+ rhotates_right:
1028
+ .quad 64-3, 64-18, 64-36, 64-41
1029
+ .quad 64-1, 64-62, 64-28, 64-27
1030
+ .quad 64-45, 64-6, 64-56, 64-39
1031
+ .quad 64-10, 64-61, 64-55, 64-8
1032
+ .quad 64-2, 64-15, 64-25, 64-20
1033
+ .quad 64-44, 64-43, 64-21, 64-14
1034
+ iotas:
1035
+ .quad 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, 0x0000000000000001
1036
+ .quad 0x0000000000008082, 0x0000000000008082, 0x0000000000008082, 0x0000000000008082
1037
+ .quad 0x800000000000808a, 0x800000000000808a, 0x800000000000808a, 0x800000000000808a
1038
+ .quad 0x8000000080008000, 0x8000000080008000, 0x8000000080008000, 0x8000000080008000
1039
+ .quad 0x000000000000808b, 0x000000000000808b, 0x000000000000808b, 0x000000000000808b
1040
+ .quad 0x0000000080000001, 0x0000000080000001, 0x0000000080000001, 0x0000000080000001
1041
+ .quad 0x8000000080008081, 0x8000000080008081, 0x8000000080008081, 0x8000000080008081
1042
+ .quad 0x8000000000008009, 0x8000000000008009, 0x8000000000008009, 0x8000000000008009
1043
+ .quad 0x000000000000008a, 0x000000000000008a, 0x000000000000008a, 0x000000000000008a
1044
+ .quad 0x0000000000000088, 0x0000000000000088, 0x0000000000000088, 0x0000000000000088
1045
+ .quad 0x0000000080008009, 0x0000000080008009, 0x0000000080008009, 0x0000000080008009
1046
+ .quad 0x000000008000000a, 0x000000008000000a, 0x000000008000000a, 0x000000008000000a
1047
+ .quad 0x000000008000808b, 0x000000008000808b, 0x000000008000808b, 0x000000008000808b
1048
+ .quad 0x800000000000008b, 0x800000000000008b, 0x800000000000008b, 0x800000000000008b
1049
+ .quad 0x8000000000008089, 0x8000000000008089, 0x8000000000008089, 0x8000000000008089
1050
+ .quad 0x8000000000008003, 0x8000000000008003, 0x8000000000008003, 0x8000000000008003
1051
+ .quad 0x8000000000008002, 0x8000000000008002, 0x8000000000008002, 0x8000000000008002
1052
+ .quad 0x8000000000000080, 0x8000000000000080, 0x8000000000000080, 0x8000000000000080
1053
+ .quad 0x000000000000800a, 0x000000000000800a, 0x000000000000800a, 0x000000000000800a
1054
+ .quad 0x800000008000000a, 0x800000008000000a, 0x800000008000000a, 0x800000008000000a
1055
+ .quad 0x8000000080008081, 0x8000000080008081, 0x8000000080008081, 0x8000000080008081
1056
+ .quad 0x8000000000008080, 0x8000000000008080, 0x8000000000008080, 0x8000000000008080
1057
+ .quad 0x0000000080000001, 0x0000000080000001, 0x0000000080000001, 0x0000000080000001
1058
+ .quad 0x8000000080008008, 0x8000000080008008, 0x8000000080008008, 0x8000000080008008
1059
+
1060
+ mapState:
1061
+ .quad 0*8, 1*8, 2*8, 3*8, 4*8
1062
+ .quad 7*8, 21*8, 10*8, 15*8, 20*8
1063
+ .quad 5*8, 13*8, 22*8, 19*8, 12*8
1064
+ .quad 8*8, 9*8, 18*8, 23*8, 16*8
1065
+ .quad 6*8, 17*8, 14*8, 11*8, 24*8
1066
+
1067
+ .balign 16
1068
+ map2:
1069
+ .long 10*8, 20*8, 5*8, 15*8
1070
+ map3:
1071
+ .long 16*8, 7*8, 23*8, 14*8
1072
+ map4:
1073
+ .long 11*8, 22*8, 8*8, 19*8
1074
+ map5:
1075
+ .long 21*8, 17*8, 13*8, 9*8
1076
+ map6:
1077
+ .long 6*8, 12*8, 18*8, 24*8
1078
+
1079
+ .balign 32
1080
+ mask3_21:
1081
+ .quad ALLON, ALLON, 0, ALLON
1082
+ mask4_21:
1083
+ .quad ALLON, 0, ALLON, ALLON
1084
+ mask5_21:
1085
+ .quad 0, ALLON, ALLON, ALLON
1086
+ mask6_21:
1087
+ .quad ALLON, ALLON, ALLON, 0
1088
+
1089
+ mask2_17:
1090
+ .quad ALLON, 0, ALLON, ALLON
1091
+ mask3_17:
1092
+ .quad ALLON, ALLON, 0, ALLON
1093
+ mask4_17:
1094
+ .quad ALLON, 0, ALLON, 0
1095
+ mask5_17:
1096
+ .quad 0, 0, ALLON, ALLON
1097
+ mask6_17:
1098
+ .quad ALLON, ALLON, 0, 0
1099
+
1100
+ .asciz "Keccak-1600 for AVX2, CRYPTOGAMS by <appro@openssl.org>"