sleeping_kangaroo12 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (284) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +127 -0
  3. data/ext/Rakefile +73 -0
  4. data/ext/binding/sleeping_kangaroo12.c +39 -0
  5. data/ext/config/xkcp.build +17 -0
  6. data/ext/xkcp/LICENSE +1 -0
  7. data/ext/xkcp/Makefile +15 -0
  8. data/ext/xkcp/Makefile.build +200 -0
  9. data/ext/xkcp/README.markdown +296 -0
  10. data/ext/xkcp/lib/HighLevel.build +143 -0
  11. data/ext/xkcp/lib/LowLevel.build +757 -0
  12. data/ext/xkcp/lib/common/align.h +33 -0
  13. data/ext/xkcp/lib/common/brg_endian.h +143 -0
  14. data/ext/xkcp/lib/high/KangarooTwelve/KangarooTwelve.c +301 -0
  15. data/ext/xkcp/lib/high/KangarooTwelve/KangarooTwelve.h +97 -0
  16. data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.c +81 -0
  17. data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.h +125 -0
  18. data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.c +48 -0
  19. data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.h +79 -0
  20. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.c +81 -0
  21. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.h +73 -0
  22. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.inc +195 -0
  23. data/ext/xkcp/lib/high/Keccak/KeccakSponge.c +111 -0
  24. data/ext/xkcp/lib/high/Keccak/KeccakSponge.h +76 -0
  25. data/ext/xkcp/lib/high/Keccak/KeccakSponge.inc +314 -0
  26. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.c +61 -0
  27. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.h +67 -0
  28. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.inc +128 -0
  29. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.c +93 -0
  30. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.h +599 -0
  31. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.inc +573 -0
  32. data/ext/xkcp/lib/high/Ketje/Ketjev2.c +87 -0
  33. data/ext/xkcp/lib/high/Ketje/Ketjev2.h +88 -0
  34. data/ext/xkcp/lib/high/Ketje/Ketjev2.inc +274 -0
  35. data/ext/xkcp/lib/high/Keyak/Keyakv2.c +132 -0
  36. data/ext/xkcp/lib/high/Keyak/Keyakv2.h +217 -0
  37. data/ext/xkcp/lib/high/Keyak/Keyakv2.inc +81 -0
  38. data/ext/xkcp/lib/high/Keyak/Motorist.inc +953 -0
  39. data/ext/xkcp/lib/high/Kravatte/Kravatte.c +533 -0
  40. data/ext/xkcp/lib/high/Kravatte/Kravatte.h +115 -0
  41. data/ext/xkcp/lib/high/Kravatte/KravatteModes.c +557 -0
  42. data/ext/xkcp/lib/high/Kravatte/KravatteModes.h +247 -0
  43. data/ext/xkcp/lib/high/Xoodyak/Cyclist.h +66 -0
  44. data/ext/xkcp/lib/high/Xoodyak/Cyclist.inc +336 -0
  45. data/ext/xkcp/lib/high/Xoodyak/Xoodyak-parameters.h +26 -0
  46. data/ext/xkcp/lib/high/Xoodyak/Xoodyak.c +55 -0
  47. data/ext/xkcp/lib/high/Xoodyak/Xoodyak.h +35 -0
  48. data/ext/xkcp/lib/high/Xoofff/Xoofff.c +634 -0
  49. data/ext/xkcp/lib/high/Xoofff/Xoofff.h +147 -0
  50. data/ext/xkcp/lib/high/Xoofff/XoofffModes.c +483 -0
  51. data/ext/xkcp/lib/high/Xoofff/XoofffModes.h +241 -0
  52. data/ext/xkcp/lib/high/common/Phases.h +25 -0
  53. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-SnP.h +41 -0
  54. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-armcc.s +1666 -0
  55. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-gcc.s +1655 -0
  56. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-armcc.s +1268 -0
  57. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-gcc.s +1264 -0
  58. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-armcc.s +1178 -0
  59. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-gcc.s +1175 -0
  60. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-armcc.s +1338 -0
  61. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-gcc.s +1336 -0
  62. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-armcc.s +1343 -0
  63. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-gcc.s +1339 -0
  64. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-SnP.h +42 -0
  65. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-armcc.s +823 -0
  66. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-gcc.s +831 -0
  67. data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-SnP.h +31 -0
  68. data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-armv8a-neon.s +540 -0
  69. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-SnP.h +42 -0
  70. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-compact.s +733 -0
  71. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-fast.s +1121 -0
  72. data/ext/xkcp/lib/low/KeccakP-1600/AVX2/KeccakP-1600-AVX2.s +1100 -0
  73. data/ext/xkcp/lib/low/KeccakP-1600/AVX2/KeccakP-1600-SnP.h +52 -0
  74. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-AVX512.c +623 -0
  75. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-SnP.h +47 -0
  76. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u12/KeccakP-1600-AVX512-config.h +6 -0
  77. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u6/KeccakP-1600-AVX512-config.h +6 -0
  78. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/ua/KeccakP-1600-AVX512-config.h +6 -0
  79. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-AVX512.s +1031 -0
  80. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-SnP.h +53 -0
  81. data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-SnP.h +44 -0
  82. data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-XOP.c +476 -0
  83. data/ext/xkcp/lib/low/KeccakP-1600/XOP/u6/KeccakP-1600-XOP-config.h +6 -0
  84. data/ext/xkcp/lib/low/KeccakP-1600/XOP/ua/KeccakP-1600-XOP-config.h +6 -0
  85. data/ext/xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-64.macros +748 -0
  86. data/ext/xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-unrolling.macros +305 -0
  87. data/ext/xkcp/lib/low/KeccakP-1600/compact/KeccakP-1600-SnP.h +40 -0
  88. data/ext/xkcp/lib/low/KeccakP-1600/compact/KeccakP-1600-compact64.c +420 -0
  89. data/ext/xkcp/lib/low/KeccakP-1600/plain-32bits-inplace/KeccakP-1600-SnP.h +43 -0
  90. data/ext/xkcp/lib/low/KeccakP-1600/plain-32bits-inplace/KeccakP-1600-inplace32BI.c +1163 -0
  91. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/KeccakP-1600-SnP.h +54 -0
  92. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/KeccakP-1600-opt64.c +565 -0
  93. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcu6/KeccakP-1600-opt64-config.h +7 -0
  94. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua/KeccakP-1600-opt64-config.h +7 -0
  95. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua-shld/KeccakP-1600-opt64-config.h +8 -0
  96. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/u6/KeccakP-1600-opt64-config.h +6 -0
  97. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/ua/KeccakP-1600-opt64-config.h +6 -0
  98. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-SnP.h +44 -0
  99. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference.h +23 -0
  100. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference32BI.c +625 -0
  101. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-SnP.h +44 -0
  102. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.c +440 -0
  103. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.h +23 -0
  104. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-SnP.h +42 -0
  105. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas.s +1196 -0
  106. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas_Apple.s +1124 -0
  107. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-shld-gas.s +1196 -0
  108. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-armcc.s +1392 -0
  109. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-gcc.s +1394 -0
  110. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-times2-SnP.h +42 -0
  111. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u12/SIMD512-2-config.h +7 -0
  112. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u4/SIMD512-2-config.h +7 -0
  113. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512ufull/SIMD512-2-config.h +7 -0
  114. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SIMD512.c +850 -0
  115. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SnP.h +51 -0
  116. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SIMD128.c +957 -0
  117. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SnP.h +49 -0
  118. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-u2/SIMD128-config.h +8 -0
  119. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-ua/SIMD128-config.h +8 -0
  120. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-u2/SIMD128-config.h +9 -0
  121. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-ua/SIMD128-config.h +9 -0
  122. data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-SnP.h +45 -0
  123. data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-on1.c +37 -0
  124. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SIMD256.c +1321 -0
  125. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SnP.h +55 -0
  126. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u12/SIMD256-config.h +7 -0
  127. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u6/SIMD256-config.h +7 -0
  128. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/ua/SIMD256-config.h +7 -0
  129. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u12/SIMD512-4-config.h +7 -0
  130. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u4/SIMD512-4-config.h +7 -0
  131. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512ufull/SIMD512-4-config.h +7 -0
  132. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SIMD512.c +881 -0
  133. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SnP.h +51 -0
  134. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-SnP.h +45 -0
  135. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-on1.c +37 -0
  136. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-SnP.h +45 -0
  137. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-on2.c +38 -0
  138. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SIMD512.c +1615 -0
  139. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SnP.h +57 -0
  140. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u12/SIMD512-config.h +7 -0
  141. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u4/SIMD512-config.h +7 -0
  142. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/ua/SIMD512-config.h +7 -0
  143. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-SnP.h +45 -0
  144. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-on1.c +37 -0
  145. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-SnP.h +45 -0
  146. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-on2.c +38 -0
  147. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-SnP.h +45 -0
  148. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-on4.c +38 -0
  149. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-SnP.h +41 -0
  150. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-armcc.s +442 -0
  151. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-gcc.s +446 -0
  152. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-armcc.s +419 -0
  153. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-gcc.s +427 -0
  154. data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-SnP.h +41 -0
  155. data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-avr8-fast.s +647 -0
  156. data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-SnP.h +39 -0
  157. data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-compact.c +190 -0
  158. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-SnP.h +43 -0
  159. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.c +412 -0
  160. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.h +23 -0
  161. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-SnP.h +41 -0
  162. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-armcc.s +454 -0
  163. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-gcc.s +458 -0
  164. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-armcc.s +455 -0
  165. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-gcc.s +458 -0
  166. data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-SnP.h +41 -0
  167. data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-avr8-fast.s +728 -0
  168. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-SnP.h +43 -0
  169. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.c +414 -0
  170. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.h +23 -0
  171. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-SnP.h +42 -0
  172. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-armcc.s +527 -0
  173. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-gcc.s +533 -0
  174. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-armcc.s +528 -0
  175. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-gcc.s +534 -0
  176. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-armcc.s +521 -0
  177. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-gcc.s +527 -0
  178. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-armcc.s +517 -0
  179. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-gcc.s +523 -0
  180. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-armcc.s +550 -0
  181. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-gcc.s +556 -0
  182. data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-SnP.h +32 -0
  183. data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-armv8a-neon.s +432 -0
  184. data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-SnP.h +42 -0
  185. data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-avr8-fast.s +929 -0
  186. data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-SnP.h +40 -0
  187. data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-compact.c +244 -0
  188. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-SnP.h +46 -0
  189. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32-bis.macros +184 -0
  190. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.c +454 -0
  191. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.macros +459 -0
  192. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling-bis.macros +83 -0
  193. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling.macros +88 -0
  194. data/ext/xkcp/lib/low/KeccakP-800/plain/lcu2/KeccakP-800-opt32-config.h +7 -0
  195. data/ext/xkcp/lib/low/KeccakP-800/plain/lcua/KeccakP-800-opt32-config.h +7 -0
  196. data/ext/xkcp/lib/low/KeccakP-800/plain/u2/KeccakP-800-opt32-config.h +7 -0
  197. data/ext/xkcp/lib/low/KeccakP-800/plain/ua/KeccakP-800-opt32-config.h +7 -0
  198. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-SnP.h +44 -0
  199. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.c +437 -0
  200. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.h +23 -0
  201. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/Ket.h +57 -0
  202. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-armcc.s +475 -0
  203. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-gcc.s +480 -0
  204. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-armcc.s +590 -0
  205. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-gcc.s +590 -0
  206. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.c +126 -0
  207. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.h +68 -0
  208. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.inc +174 -0
  209. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.c +80 -0
  210. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.h +68 -0
  211. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.inc +142 -0
  212. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-SnP.h +55 -0
  213. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-armcc.s +1086 -0
  214. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-gcc.s +1092 -0
  215. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-armcc.s +721 -0
  216. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-gcc.s +726 -0
  217. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-armcc.s +723 -0
  218. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-gcc.s +729 -0
  219. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-armcc.s +1164 -0
  220. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-gcc.s +1165 -0
  221. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-armcc.s +562 -0
  222. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-gcc.s +563 -0
  223. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-armcc.s +563 -0
  224. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-gcc.s +565 -0
  225. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-SnP.h +55 -0
  226. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-armcc.s +476 -0
  227. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-gcc.s +485 -0
  228. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-armcc.s +362 -0
  229. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-gcc.s +367 -0
  230. data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-SnP.h +43 -0
  231. data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-avr8-u1.s +1341 -0
  232. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SIMD512.c +581 -0
  233. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SnP.h +58 -0
  234. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodyak-full-block-SIMD512.c +332 -0
  235. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SIMD128.c +329 -0
  236. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SnP.h +53 -0
  237. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodyak-full-block-SIMD128.c +355 -0
  238. data/ext/xkcp/lib/low/Xoodoo/Xoodoo.h +79 -0
  239. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-SnP.h +56 -0
  240. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-optimized.c +399 -0
  241. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodyak-full-blocks.c +127 -0
  242. data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-SnP.h +43 -0
  243. data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-reference.c +253 -0
  244. data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SIMD512.c +1044 -0
  245. data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SnP.h +49 -0
  246. data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-SnP.h +45 -0
  247. data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-on1.c +37 -0
  248. data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-ARMv7A.s +1587 -0
  249. data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-SnP.h +48 -0
  250. data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SIMD512.c +1202 -0
  251. data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SnP.h +48 -0
  252. data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SIMD128.c +484 -0
  253. data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SnP.h +44 -0
  254. data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-SnP.h +45 -0
  255. data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-on1.c +37 -0
  256. data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SIMD256.c +939 -0
  257. data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SnP.h +49 -0
  258. data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SIMD512.c +1216 -0
  259. data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SnP.h +48 -0
  260. data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-SnP.h +45 -0
  261. data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-on1.c +37 -0
  262. data/ext/xkcp/lib/low/common/PlSnP-Fallback.inc +290 -0
  263. data/ext/xkcp/lib/low/common/SnP-Relaned.h +141 -0
  264. data/ext/xkcp/support/Build/ExpandProducts.xsl +79 -0
  265. data/ext/xkcp/support/Build/ToGlobalMakefile.xsl +206 -0
  266. data/ext/xkcp/support/Build/ToOneTarget.xsl +89 -0
  267. data/ext/xkcp/support/Build/ToTargetConfigFile.xsl +37 -0
  268. data/ext/xkcp/support/Build/ToTargetMakefile.xsl +298 -0
  269. data/ext/xkcp/support/Build/ToVCXProj.xsl +198 -0
  270. data/ext/xkcp/support/Kernel-PMU/Kernel-pmu.md +133 -0
  271. data/ext/xkcp/support/Kernel-PMU/Makefile +8 -0
  272. data/ext/xkcp/support/Kernel-PMU/enable_arm_pmu.c +129 -0
  273. data/ext/xkcp/support/Kernel-PMU/load-module +1 -0
  274. data/ext/xkcp/util/KeccakSum/KeccakSum.c +394 -0
  275. data/ext/xkcp/util/KeccakSum/base64.c +86 -0
  276. data/ext/xkcp/util/KeccakSum/base64.h +12 -0
  277. data/lib/sleeping_kangaroo12/binding.rb +15 -0
  278. data/lib/sleeping_kangaroo12/build/loader.rb +40 -0
  279. data/lib/sleeping_kangaroo12/build/platform.rb +37 -0
  280. data/lib/sleeping_kangaroo12/build.rb +4 -0
  281. data/lib/sleeping_kangaroo12/digest.rb +103 -0
  282. data/lib/sleeping_kangaroo12/version.rb +5 -0
  283. data/lib/sleeping_kangaroo12.rb +7 -0
  284. metadata +372 -0
@@ -0,0 +1,1196 @@
1
+ #
2
+ # The eXtended Keccak Code Package (XKCP)
3
+ # https://github.com/XKCP/XKCP
4
+ #
5
+ # The Keccak-p permutations, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche.
6
+ #
7
+ # Implementation by Ronny Van Keer, hereby denoted as "the implementer".
8
+ #
9
+ # For more information, feedback or questions, please refer to the Keccak Team website:
10
+ # https://keccak.team/
11
+ #
12
+ # To the extent possible under law, the implementer has waived all copyright
13
+ # and related or neighboring rights to the source code in this file.
14
+ # http://creativecommons.org/publicdomain/zero/1.0/
15
+ #
16
+ # ---
17
+ #
18
+ # This file implements Keccak-p[1600] in a SnP-compatible way.
19
+ # Please refer to SnP-documentation.h for more details.
20
+ #
21
+ # This implementation comes with KeccakP-1600-SnP.h in the same folder.
22
+ # Please refer to LowLevel.build for the exact list of other files it must be combined with.
23
+ #
24
+
25
+ # WARNING: State must be 256 bit (32 bytes) aligned.
26
+
27
+ .text
28
+
29
+ # conditional assembly settings
30
+ .equ UseSIMD, 0
31
+ .equ InlinePerm, 1
32
+
33
+ # offsets in state
34
+ .equ _ba, 0*8
35
+ .equ _be, 1*8
36
+ .equ _bi, 2*8
37
+ .equ _bo, 3*8
38
+ .equ _bu, 4*8
39
+ .equ _ga, 5*8
40
+ .equ _ge, 6*8
41
+ .equ _gi, 7*8
42
+ .equ _go, 8*8
43
+ .equ _gu, 9*8
44
+ .equ _ka, 10*8
45
+ .equ _ke, 11*8
46
+ .equ _ki, 12*8
47
+ .equ _ko, 13*8
48
+ .equ _ku, 14*8
49
+ .equ _ma, 15*8
50
+ .equ _me, 16*8
51
+ .equ _mi, 17*8
52
+ .equ _mo, 18*8
53
+ .equ _mu, 19*8
54
+ .equ _sa, 20*8
55
+ .equ _se, 21*8
56
+ .equ _si, 22*8
57
+ .equ _so, 23*8
58
+ .equ _su, 24*8
59
+
60
+ # arguments passed in registers
61
+ .equ arg1, %rdi
62
+ .equ arg2, %rsi
63
+ .equ arg3, %rdx
64
+ .equ arg4, %rcx
65
+ .equ arg5, %r8
66
+ .equ arg6, %r9
67
+
68
+ # temporary registers
69
+ .equ rT1, %rax
70
+ .equ rT1a, rT1
71
+ .equ rT1e, %rbx
72
+ .equ rT1i, %r14
73
+ .equ rT1o, %r15
74
+ .equ rT1u, arg6
75
+ .equ rT2a, %r10
76
+ .equ rT2e, %r11
77
+ .equ rT2i, %r12
78
+ .equ rT2o, %r13
79
+ .equ rT2u, arg5
80
+
81
+ # round vars
82
+ .equ rpState, arg1
83
+ .equ rpStack, %rsp
84
+
85
+ .equ rDa, %rbx
86
+ .equ rDe, %rcx
87
+ .equ rDi, %rdx
88
+ .equ rDo, %r8
89
+ .equ rDu, %r9
90
+
91
+ .equ rBa, %r10
92
+ .equ rBe, %r11
93
+ .equ rBi, %r12
94
+ .equ rBo, %r13
95
+ .equ rBu, %r14
96
+
97
+ .equ rCa, %rsi
98
+ .equ rCe, %rbp
99
+ .equ rCi, rBi
100
+ .equ rCo, rBo
101
+ .equ rCu, %r15
102
+
103
+ .macro mKeccakRound iState, oState, rc, lastRound
104
+
105
+ # prepare Theta bis
106
+ movq rCe, rDa
107
+ shld $1, rDa, rDa
108
+
109
+ movq _bi(\iState), rCi
110
+ xorq _gi(\iState), rDi
111
+ xorq _ki(\iState), rCi
112
+ xorq rCu, rDa
113
+ xorq _mi(\iState), rDi
114
+ xorq rDi, rCi
115
+
116
+ movq rCi, rDe
117
+ shld $1, rDe, rDe
118
+
119
+ movq _bo(\iState), rCo
120
+ xorq _go(\iState), rDo
121
+ xorq _ko(\iState), rCo
122
+ xorq rCa, rDe
123
+ xorq _mo(\iState), rDo
124
+ xorq rDo, rCo
125
+
126
+ movq rCo, rDi
127
+ shld $1, rDi, rDi
128
+
129
+ movq rCu, rDo
130
+ xorq rCe, rDi
131
+ shld $1, rDo, rDo
132
+
133
+ movq rCa, rDu
134
+ xorq rCi, rDo
135
+ shld $1, rDu, rDu
136
+
137
+ # Theta Rho Pi Chi Iota, result b
138
+ movq _ba(\iState), rBa
139
+ movq _ge(\iState), rBe
140
+ xorq rCo, rDu
141
+ movq _ki(\iState), rBi
142
+ movq _mo(\iState), rBo
143
+ movq _su(\iState), rBu
144
+ xorq rDe, rBe
145
+ shld $44, rBe, rBe
146
+ xorq rDi, rBi
147
+ xorq rDa, rBa
148
+ shld $43, rBi, rBi
149
+
150
+ movq rBe, rCa
151
+ movq $\rc, rT1
152
+ orq rBi, rCa
153
+ xorq rBa, rT1
154
+ xorq rT1, rCa
155
+ movq rCa, _ba(\oState)
156
+
157
+ xorq rDu, rBu
158
+ shld $14, rBu, rBu
159
+ movq rBa, rCu
160
+ andq rBe, rCu
161
+ xorq rBu, rCu
162
+ movq rCu, _bu(\oState)
163
+
164
+ xorq rDo, rBo
165
+ shld $21, rBo, rBo
166
+ movq rBo, rT1
167
+ andq rBu, rT1
168
+ xorq rBi, rT1
169
+ movq rT1, _bi(\oState)
170
+
171
+ notq rBi
172
+ orq rBa, rBu
173
+ orq rBo, rBi
174
+ xorq rBo, rBu
175
+ xorq rBe, rBi
176
+ movq rBu, _bo(\oState)
177
+ movq rBi, _be(\oState)
178
+ .if \lastRound == 0
179
+ movq rBi, rCe
180
+ .endif
181
+
182
+ # Theta Rho Pi Chi, result g
183
+ movq _gu(\iState), rBe
184
+ xorq rDu, rBe
185
+ movq _ka(\iState), rBi
186
+ shld $20, rBe, rBe
187
+ xorq rDa, rBi
188
+ shld $3, rBi, rBi
189
+ movq _bo(\iState), rBa
190
+ movq rBe, rT1
191
+ orq rBi, rT1
192
+ xorq rDo, rBa
193
+ movq _me(\iState), rBo
194
+ movq _si(\iState), rBu
195
+ shld $28, rBa, rBa
196
+ xorq rBa, rT1
197
+ movq rT1, _ga(\oState)
198
+ .if \lastRound == 0
199
+ xorq rT1, rCa
200
+ .endif
201
+
202
+ xorq rDe, rBo
203
+ shld $45, rBo, rBo
204
+ movq rBi, rT1
205
+ andq rBo, rT1
206
+ xorq rBe, rT1
207
+ movq rT1, _ge(\oState)
208
+ .if \lastRound == 0
209
+ xorq rT1, rCe
210
+ .endif
211
+
212
+ xorq rDi, rBu
213
+ shld $61, rBu, rBu
214
+ movq rBu, rT1
215
+ orq rBa, rT1
216
+ xorq rBo, rT1
217
+ movq rT1, _go(\oState)
218
+
219
+ andq rBe, rBa
220
+ xorq rBu, rBa
221
+ movq rBa, _gu(\oState)
222
+ notq rBu
223
+ .if \lastRound == 0
224
+ xorq rBa, rCu
225
+ .endif
226
+
227
+ orq rBu, rBo
228
+ xorq rBi, rBo
229
+ movq rBo, _gi(\oState)
230
+
231
+ # Theta Rho Pi Chi, result k
232
+ movq _be(\iState), rBa
233
+ movq _gi(\iState), rBe
234
+ movq _ko(\iState), rBi
235
+ movq _mu(\iState), rBo
236
+ movq _sa(\iState), rBu
237
+ xorq rDi, rBe
238
+ shld $6, rBe, rBe
239
+ xorq rDo, rBi
240
+ shld $25, rBi, rBi
241
+ movq rBe, rT1
242
+ orq rBi, rT1
243
+ xorq rDe, rBa
244
+ shld $1, rBa, rBa
245
+ xorq rBa, rT1
246
+ movq rT1, _ka(\oState)
247
+ .if \lastRound == 0
248
+ xorq rT1, rCa
249
+ .endif
250
+
251
+ xorq rDu, rBo
252
+ shld $8, rBo, rBo
253
+ movq rBi, rT1
254
+ andq rBo, rT1
255
+ xorq rBe, rT1
256
+ movq rT1, _ke(\oState)
257
+ .if \lastRound == 0
258
+ xorq rT1, rCe
259
+ .endif
260
+
261
+ xorq rDa, rBu
262
+ shld $18, rBu, rBu
263
+ notq rBo
264
+ movq rBo, rT1
265
+ andq rBu, rT1
266
+ xorq rBi, rT1
267
+ movq rT1, _ki(\oState)
268
+
269
+ movq rBu, rT1
270
+ orq rBa, rT1
271
+ xorq rBo, rT1
272
+ movq rT1, _ko(\oState)
273
+
274
+ andq rBe, rBa
275
+ xorq rBu, rBa
276
+ movq rBa, _ku(\oState)
277
+ .if \lastRound == 0
278
+ xorq rBa, rCu
279
+ .endif
280
+
281
+ # Theta Rho Pi Chi, result m
282
+ movq _ga(\iState), rBe
283
+ xorq rDa, rBe
284
+ movq _ke(\iState), rBi
285
+ shld $36, rBe, rBe
286
+ xorq rDe, rBi
287
+ movq _bu(\iState), rBa
288
+ shld $10, rBi, rBi
289
+ movq rBe, rT1
290
+ movq _mi(\iState), rBo
291
+ andq rBi, rT1
292
+ xorq rDu, rBa
293
+ movq _so(\iState), rBu
294
+ shld $27, rBa, rBa
295
+ xorq rBa, rT1
296
+ movq rT1, _ma(\oState)
297
+ .if \lastRound == 0
298
+ xorq rT1, rCa
299
+ .endif
300
+
301
+ xorq rDi, rBo
302
+ shld $15, rBo, rBo
303
+ movq rBi, rT1
304
+ orq rBo, rT1
305
+ xorq rBe, rT1
306
+ movq rT1, _me(\oState)
307
+ .if \lastRound == 0
308
+ xorq rT1, rCe
309
+ .endif
310
+
311
+ xorq rDo, rBu
312
+ shld $56, rBu, rBu
313
+ notq rBo
314
+ movq rBo, rT1
315
+ orq rBu, rT1
316
+ xorq rBi, rT1
317
+ movq rT1, _mi(\oState)
318
+
319
+ orq rBa, rBe
320
+ xorq rBu, rBe
321
+ movq rBe, _mu(\oState)
322
+
323
+ andq rBa, rBu
324
+ xorq rBo, rBu
325
+ movq rBu, _mo(\oState)
326
+ .if \lastRound == 0
327
+ xorq rBe, rCu
328
+ .endif
329
+
330
+ # Theta Rho Pi Chi, result s
331
+ movq _bi(\iState), rBa
332
+ movq _go(\iState), rBe
333
+ movq _ku(\iState), rBi
334
+ xorq rDi, rBa
335
+ movq _ma(\iState), rBo
336
+ shld $62, rBa, rBa
337
+ xorq rDo, rBe
338
+ movq _se(\iState), rBu
339
+ shld $55, rBe, rBe
340
+
341
+ xorq rDu, rBi
342
+ movq rBa, rDu
343
+ xorq rDe, rBu
344
+ shld $2, rBu, rBu
345
+ andq rBe, rDu
346
+ xorq rBu, rDu
347
+ movq rDu, _su(\oState)
348
+
349
+ shld $39, rBi, rBi
350
+ .if \lastRound == 0
351
+ xorq rDu, rCu
352
+ .endif
353
+ notq rBe
354
+ xorq rDa, rBo
355
+ movq rBe, rDa
356
+ andq rBi, rDa
357
+ xorq rBa, rDa
358
+ movq rDa, _sa(\oState)
359
+ .if \lastRound == 0
360
+ xorq rDa, rCa
361
+ .endif
362
+
363
+ shld $41, rBo, rBo
364
+ movq rBi, rDe
365
+ orq rBo, rDe
366
+ xorq rBe, rDe
367
+ movq rDe, _se(\oState)
368
+ .if \lastRound == 0
369
+ xorq rDe, rCe
370
+ .endif
371
+
372
+ movq rBo, rDi
373
+ movq rBu, rDo
374
+ andq rBu, rDi
375
+ orq rBa, rDo
376
+ xorq rBi, rDi
377
+ xorq rBo, rDo
378
+ movq rDi, _si(\oState)
379
+ movq rDo, _so(\oState)
380
+
381
+ .endm
382
+
383
+ .macro mKeccakPermutation12
384
+
385
+ subq $8*25, %rsp
386
+
387
+ movq _ba(rpState), rCa
388
+ movq _be(rpState), rCe
389
+ movq _bu(rpState), rCu
390
+
391
+ xorq _ga(rpState), rCa
392
+ xorq _ge(rpState), rCe
393
+ xorq _gu(rpState), rCu
394
+
395
+ xorq _ka(rpState), rCa
396
+ xorq _ke(rpState), rCe
397
+ xorq _ku(rpState), rCu
398
+
399
+ xorq _ma(rpState), rCa
400
+ xorq _me(rpState), rCe
401
+ xorq _mu(rpState), rCu
402
+
403
+ xorq _sa(rpState), rCa
404
+ xorq _se(rpState), rCe
405
+ movq _si(rpState), rDi
406
+ movq _so(rpState), rDo
407
+ xorq _su(rpState), rCu
408
+
409
+ mKeccakRound rpState, rpStack, 0x000000008000808b, 0
410
+ mKeccakRound rpStack, rpState, 0x800000000000008b, 0
411
+ mKeccakRound rpState, rpStack, 0x8000000000008089, 0
412
+ mKeccakRound rpStack, rpState, 0x8000000000008003, 0
413
+ mKeccakRound rpState, rpStack, 0x8000000000008002, 0
414
+ mKeccakRound rpStack, rpState, 0x8000000000000080, 0
415
+ mKeccakRound rpState, rpStack, 0x000000000000800a, 0
416
+ mKeccakRound rpStack, rpState, 0x800000008000000a, 0
417
+ mKeccakRound rpState, rpStack, 0x8000000080008081, 0
418
+ mKeccakRound rpStack, rpState, 0x8000000000008080, 0
419
+ mKeccakRound rpState, rpStack, 0x0000000080000001, 0
420
+ mKeccakRound rpStack, rpState, 0x8000000080008008, 1
421
+ addq $8*25, %rsp
422
+ .endm
423
+
424
+ .macro mKeccakPermutation24
425
+
426
+ subq $8*25, %rsp
427
+
428
+ movq _ba(rpState), rCa
429
+ movq _be(rpState), rCe
430
+ movq _bu(rpState), rCu
431
+
432
+ xorq _ga(rpState), rCa
433
+ xorq _ge(rpState), rCe
434
+ xorq _gu(rpState), rCu
435
+
436
+ xorq _ka(rpState), rCa
437
+ xorq _ke(rpState), rCe
438
+ xorq _ku(rpState), rCu
439
+
440
+ xorq _ma(rpState), rCa
441
+ xorq _me(rpState), rCe
442
+ xorq _mu(rpState), rCu
443
+
444
+ xorq _sa(rpState), rCa
445
+ xorq _se(rpState), rCe
446
+ movq _si(rpState), rDi
447
+ movq _so(rpState), rDo
448
+ xorq _su(rpState), rCu
449
+
450
+ mKeccakRound rpState, rpStack, 0x0000000000000001, 0
451
+ mKeccakRound rpStack, rpState, 0x0000000000008082, 0
452
+ mKeccakRound rpState, rpStack, 0x800000000000808a, 0
453
+ mKeccakRound rpStack, rpState, 0x8000000080008000, 0
454
+ mKeccakRound rpState, rpStack, 0x000000000000808b, 0
455
+ mKeccakRound rpStack, rpState, 0x0000000080000001, 0
456
+ mKeccakRound rpState, rpStack, 0x8000000080008081, 0
457
+ mKeccakRound rpStack, rpState, 0x8000000000008009, 0
458
+ mKeccakRound rpState, rpStack, 0x000000000000008a, 0
459
+ mKeccakRound rpStack, rpState, 0x0000000000000088, 0
460
+ mKeccakRound rpState, rpStack, 0x0000000080008009, 0
461
+ mKeccakRound rpStack, rpState, 0x000000008000000a, 0
462
+
463
+ mKeccakRound rpState, rpStack, 0x000000008000808b, 0
464
+ mKeccakRound rpStack, rpState, 0x800000000000008b, 0
465
+ mKeccakRound rpState, rpStack, 0x8000000000008089, 0
466
+ mKeccakRound rpStack, rpState, 0x8000000000008003, 0
467
+ mKeccakRound rpState, rpStack, 0x8000000000008002, 0
468
+ mKeccakRound rpStack, rpState, 0x8000000000000080, 0
469
+ mKeccakRound rpState, rpStack, 0x000000000000800a, 0
470
+ mKeccakRound rpStack, rpState, 0x800000008000000a, 0
471
+ mKeccakRound rpState, rpStack, 0x8000000080008081, 0
472
+ mKeccakRound rpStack, rpState, 0x8000000000008080, 0
473
+ mKeccakRound rpState, rpStack, 0x0000000080000001, 0
474
+ mKeccakRound rpStack, rpState, 0x8000000080008008, 1
475
+ addq $8*25, %rsp
476
+ .endm
477
+
478
+ .macro mKeccakPermutationInlinable24
479
+ .if InlinePerm == 1
480
+ mKeccakPermutation24
481
+ .else
482
+ callq KeccakP1600_Permute_24rounds@PLT
483
+ .endif
484
+ .endm
485
+
486
+ .macro mPushRegs
487
+ pushq %rbx
488
+ pushq %rbp
489
+ pushq %r12
490
+ pushq %r13
491
+ pushq %r14
492
+ pushq %r15
493
+ .endm
494
+
495
+ .macro mPopRegs
496
+ popq %r15
497
+ popq %r14
498
+ popq %r13
499
+ popq %r12
500
+ popq %rbp
501
+ popq %rbx
502
+ .endm
503
+
504
+ .macro mXor128 input, output, offset
505
+ .if UseSIMD == 0
506
+ movq \offset(\input), rT1a
507
+ movq \offset+8(\input), rT1e
508
+ xorq rT1a, \offset(\output)
509
+ xorq rT1e, \offset+8(\output)
510
+ .else
511
+ movdqu \offset(\input), %xmm0
512
+ movdqu \offset(\output), %xmm1
513
+ pxor %xmm1, %xmm0
514
+ movdqu %xmm0, \offset(\output)
515
+ .endif
516
+ .endm
517
+
518
+ .macro mXor256 input, output, offset
519
+ .if UseSIMD == 0
520
+ movq \offset(\input), rT1a
521
+ movq \offset+8(\input), rT1e
522
+ movq \offset+16(\input), rT1i
523
+ movq \offset+24(\input), rT1o
524
+ xorq rT1a, \offset(\output)
525
+ xorq rT1e, \offset+8(\output)
526
+ xorq rT1i, \offset+16(\output)
527
+ xorq rT1o, \offset+24(\output)
528
+ .else
529
+ movdqu \offset(\input), %xmm0
530
+ movdqu \offset(\output), %xmm1
531
+ pxor %xmm1, %xmm0
532
+ movdqu %xmm0, \offset(\output)
533
+ movdqu \offset+16(\input), %xmm0
534
+ movdqu \offset+16(\output), %xmm1
535
+ pxor %xmm1, %xmm0
536
+ movdqu %xmm0, \offset+16(\output)
537
+ .endif
538
+ .endm
539
+
540
+ .macro mXor512 input, output, offset
541
+ .if UseSIMD == 0
542
+ mXor256 \input, \output, \offset
543
+ mXor256 \input, \output, \offset+32
544
+ .else
545
+ movdqu \offset(\input), %xmm0
546
+ movdqu \offset(\output), %xmm1
547
+ pxor %xmm1, %xmm0
548
+ movdqu %xmm0, \offset(\output)
549
+ movdqu \offset+16(\input), %xmm0
550
+ movdqu \offset+16(\output), %xmm1
551
+ pxor %xmm1, %xmm0
552
+ movdqu %xmm0, \offset+16(\output)
553
+ movdqu \offset+32(\input), %xmm0
554
+ movdqu \offset+32(\output), %xmm1
555
+ pxor %xmm1, %xmm0
556
+ movdqu %xmm0, \offset+32(\output)
557
+ movdqu \offset+48(\input), %xmm0
558
+ movdqu \offset+48(\output), %xmm1
559
+ pxor %xmm1, %xmm0
560
+ movdqu %xmm0, \offset+48(\output)
561
+ .endif
562
+ .endm
563
+
564
+ #----------------------------------------------------------------------------
565
+ #
566
+ # void KeccakP1600_StaticInitialize( void )
567
+ #
568
+ .size KeccakP1600_StaticInitialize, .-KeccakP1600_StaticInitialize
569
+ .align 8
570
+ .global KeccakP1600_StaticInitialize
571
+ .type KeccakP1600_StaticInitialize, %function
572
+ KeccakP1600_StaticInitialize:
573
+ retq
574
+
575
+ #----------------------------------------------------------------------------
576
+ #
577
+ # void KeccakP1600_Initialize(void *state)
578
+ #
579
+ .size KeccakP1600_Initialize, .-KeccakP1600_Initialize
580
+ .align 8
581
+ .global KeccakP1600_Initialize
582
+ .type KeccakP1600_Initialize, %function
583
+ KeccakP1600_Initialize:
584
+ xorq %rax, %rax
585
+ xorq %rcx, %rcx
586
+ notq %rcx
587
+ .if UseSIMD == 0
588
+ movq %rax, _ba(arg1)
589
+ movq %rcx, _be(arg1)
590
+ movq %rcx, _bi(arg1)
591
+ movq %rax, _bo(arg1)
592
+ movq %rax, _bu(arg1)
593
+ movq %rax, _ga(arg1)
594
+ movq %rax, _ge(arg1)
595
+ movq %rax, _gi(arg1)
596
+ movq %rcx, _go(arg1)
597
+ movq %rax, _gu(arg1)
598
+ movq %rax, _ka(arg1)
599
+ movq %rax, _ke(arg1)
600
+ movq %rcx, _ki(arg1)
601
+ movq %rax, _ko(arg1)
602
+ movq %rax, _ku(arg1)
603
+ movq %rax, _ma(arg1)
604
+ movq %rax, _me(arg1)
605
+ movq %rcx, _mi(arg1)
606
+ movq %rax, _mo(arg1)
607
+ movq %rax, _mu(arg1)
608
+ movq %rcx, _sa(arg1)
609
+ movq %rax, _se(arg1)
610
+ movq %rax, _si(arg1)
611
+ movq %rax, _so(arg1)
612
+ movq %rax, _su(arg1)
613
+ .else
614
+ pxor %xmm0, %xmm0
615
+ movq %rax, _ba(arg1)
616
+ movq %rcx, _be(arg1)
617
+ movq %rcx, _bi(arg1)
618
+ movq %rax, _bo(arg1)
619
+ movdqu %xmm0, _bu(arg1)
620
+ movdqu %xmm0, _ge(arg1)
621
+ movq %rcx, _go(arg1)
622
+ movq %rax, _gu(arg1)
623
+ movdqu %xmm0, _ka(arg1)
624
+ movq %rcx, _ki(arg1)
625
+ movq %rax, _ko(arg1)
626
+ movdqu %xmm0, _ku(arg1)
627
+ movq %rax, _me(arg1)
628
+ movq %rcx, _mi(arg1)
629
+ movdqu %xmm0, _mo(arg1)
630
+ movq %rcx, _sa(arg1)
631
+ movq %rax, _se(arg1)
632
+ movdqu %xmm0, _si(arg1)
633
+ movq %rax, _su(arg1)
634
+ .endif
635
+ retq
636
+
637
+ #----------------------------------------------------------------------------
638
+ #
639
+ # void KeccakP1600_AddByte(void *state, unsigned char data, unsigned int offset)
640
+ #
641
+ .size KeccakP1600_AddByte, .-KeccakP1600_AddByte
642
+ .align 8
643
+ .global KeccakP1600_AddByte
644
+ .type KeccakP1600_AddByte, %function
645
+ KeccakP1600_AddByte:
646
+ addq arg3, arg1
647
+ mov arg2, %rax
648
+ xorb %al, (arg1)
649
+ retq
650
+
651
+ #----------------------------------------------------------------------------
652
+ #
653
+ # void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
654
+ #
655
+ .size KeccakP1600_AddBytes, .-KeccakP1600_AddBytes
656
+ .align 8
657
+ .global KeccakP1600_AddBytes
658
+ .type KeccakP1600_AddBytes, %function
659
+ KeccakP1600_AddBytes:
660
+ pushq rT1e
661
+ pushq rT1i
662
+ pushq rT1o
663
+ addq arg3, arg1
664
+ testq $0xF8, arg4
665
+ jz KeccakP1600_AddBytes_Bytes
666
+ movq arg4, arg6
667
+ shrq $3, arg6
668
+ testq $16, arg6
669
+ jz KeccakP1600_AddBytes_8Lanes
670
+ mXor512 arg2, arg1, 0
671
+ mXor512 arg2, arg1, 64
672
+ addq $128, arg2
673
+ addq $128, arg1
674
+ KeccakP1600_AddBytes_8Lanes:
675
+ testq $8, arg6
676
+ jz KeccakP1600_AddBytes_4Lanes
677
+ mXor512 arg2, arg1, 0
678
+ addq $64, arg2
679
+ addq $64, arg1
680
+ KeccakP1600_AddBytes_4Lanes:
681
+ testq $4, arg6
682
+ jz KeccakP1600_AddBytes_2Lanes
683
+ mXor256 arg2, arg1, 0
684
+ addq $32, arg2
685
+ addq $32, arg1
686
+ KeccakP1600_AddBytes_2Lanes:
687
+ testq $2, arg6
688
+ jz KeccakP1600_AddBytes_1Lane
689
+ mXor128 arg2, arg1, 0
690
+ addq $16, arg2
691
+ addq $16, arg1
692
+ KeccakP1600_AddBytes_1Lane:
693
+ testq $1, arg6
694
+ jz KeccakP1600_AddBytes_Bytes
695
+ movq (arg2), rT1
696
+ xorq rT1, (arg1)
697
+ addq $8, arg2
698
+ addq $8, arg1
699
+ KeccakP1600_AddBytes_Bytes:
700
+ andq $7, arg4
701
+ jz KeccakP1600_AddBytes_Exit
702
+ KeccakP1600_AddBytes_BytesLoop:
703
+ movb (arg2), %al
704
+ xorb %al, (arg1)
705
+ addq $1, arg2
706
+ addq $1, arg1
707
+ subq $1, arg4
708
+ jnz KeccakP1600_AddBytes_BytesLoop
709
+ KeccakP1600_AddBytes_Exit:
710
+ popq rT1o
711
+ popq rT1i
712
+ popq rT1e
713
+ retq
714
+
715
+
716
+ KeccakLaneComplementTable:
717
+ .quad 0
718
+ .quad 0xFFFFFFFFFFFFFFFF # 1 be
719
+ .quad 0xFFFFFFFFFFFFFFFF # 2 bi
720
+ .quad 0
721
+ .quad 0
722
+
723
+ .quad 0
724
+ .quad 0
725
+ .quad 0
726
+ .quad 0xFFFFFFFFFFFFFFFF # 8 go
727
+ .quad 0
728
+
729
+ .quad 0
730
+ .quad 0
731
+ .quad 0xFFFFFFFFFFFFFFFF # 12 ki
732
+ .quad 0
733
+ .quad 0
734
+
735
+ .quad 0
736
+ .quad 0
737
+ .quad 0xFFFFFFFFFFFFFFFF # 17 mi
738
+ .quad 0
739
+ .quad 0
740
+
741
+ .quad 0xFFFFFFFFFFFFFFFF # 20 sa
742
+ .quad 0
743
+ .quad 0
744
+ .quad 0
745
+ .quad 0
746
+
747
+ #----------------------------------------------------------------------------
748
+ #
749
+ # void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
750
+ #
751
+ .size KeccakP1600_OverwriteBytes, .-KeccakP1600_OverwriteBytes
752
+ .align 8
753
+ .global KeccakP1600_OverwriteBytes
754
+ .type KeccakP1600_OverwriteBytes, %function
755
+ KeccakP1600_OverwriteBytes:
756
+ addq arg3, arg1
757
+ leaq KeccakLaneComplementTable(%rip), arg5
758
+ addq arg3, arg5
759
+ subq $8, arg4
760
+ jc KeccakP1600_OverwriteBytes_Bytes
761
+ KeccakP1600_OverwriteBytes_LanesLoop:
762
+ movq (arg2), rT1
763
+ xorq (arg5), rT1
764
+ movq rT1, (arg1)
765
+ addq $8, arg2
766
+ addq $8, arg5
767
+ addq $8, arg1
768
+ subq $8, arg4
769
+ jnc KeccakP1600_OverwriteBytes_LanesLoop
770
+ KeccakP1600_OverwriteBytes_Bytes:
771
+ addq $8, arg4
772
+ jz KeccakP1600_OverwriteBytes_Exit
773
+ KeccakP1600_OverwriteBytes_BytesLoop:
774
+ movb (arg2), %al
775
+ xorb (arg5), %al
776
+ movb %al, (arg1)
777
+ addq $1, arg2
778
+ addq $1, arg5
779
+ addq $1, arg1
780
+ subq $1, arg4
781
+ jnz KeccakP1600_OverwriteBytes_BytesLoop
782
+ KeccakP1600_OverwriteBytes_Exit:
783
+ retq
784
+
785
+ #----------------------------------------------------------------------------
786
+ #
787
+ # void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount)
788
+ #
789
+ .size KeccakP1600_OverwriteWithZeroes, .-KeccakP1600_OverwriteWithZeroes
790
+ .align 8
791
+ .global KeccakP1600_OverwriteWithZeroes
792
+ .type KeccakP1600_OverwriteWithZeroes, %function
793
+ KeccakP1600_OverwriteWithZeroes:
794
+ leaq KeccakLaneComplementTable(%rip), arg5
795
+ subq $8, arg2
796
+ jc KeccakP1600_OverwriteWithZeroes_Bytes
797
+ KeccakP1600_OverwriteWithZeroes_LanesLoop:
798
+ movq $0, rT1
799
+ xorq (arg5), rT1
800
+ movq rT1, (arg1)
801
+ addq $8, arg5
802
+ addq $8, arg1
803
+ subq $8, arg2
804
+ jnc KeccakP1600_OverwriteWithZeroes_LanesLoop
805
+ KeccakP1600_OverwriteWithZeroes_Bytes:
806
+ addq $8, arg2
807
+ jz KeccakP1600_OverwriteWithZeroes_Exit
808
+ KeccakP1600_OverwriteWithZeroes_BytesLoop:
809
+ movb $0, %al
810
+ xorb (arg5), %al
811
+ movb %al, (arg1)
812
+ addq $1, arg5
813
+ addq $1, arg1
814
+ subq $1, arg2
815
+ jnz KeccakP1600_OverwriteWithZeroes_BytesLoop
816
+ KeccakP1600_OverwriteWithZeroes_Exit:
817
+ retq
818
+
819
+ #----------------------------------------------------------------------------
820
+ #
821
+ # void KeccakP1600_ExtractBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
822
+ #
823
+ .size KeccakP1600_ExtractBytes, .-KeccakP1600_ExtractBytes
824
+ .align 8
825
+ .global KeccakP1600_ExtractBytes
826
+ .type KeccakP1600_ExtractBytes, %function
827
+ KeccakP1600_ExtractBytes:
828
+ addq arg3, arg1
829
+ leaq KeccakLaneComplementTable(%rip), arg5
830
+ addq arg3, arg5
831
+ subq $8, arg4
832
+ jc KeccakP1600_ExtractBytes_Bytes
833
+ KeccakP1600_ExtractBytes_LanesLoop:
834
+ movq (arg1), rT1
835
+ xorq (arg5), rT1
836
+ movq rT1, (arg2)
837
+ addq $8, arg2
838
+ addq $8, arg5
839
+ addq $8, arg1
840
+ subq $8, arg4
841
+ jnc KeccakP1600_ExtractBytes_LanesLoop
842
+ KeccakP1600_ExtractBytes_Bytes:
843
+ addq $8, arg4
844
+ jz KeccakP1600_ExtractBytes_Exit
845
+ KeccakP1600_ExtractBytes_BytesLoop:
846
+ movb (arg1), %al
847
+ xorb (arg5), %al
848
+ movb %al, (arg2)
849
+ addq $1, arg2
850
+ addq $1, arg5
851
+ addq $1, arg1
852
+ subq $1, arg4
853
+ jnz KeccakP1600_ExtractBytes_BytesLoop
854
+ KeccakP1600_ExtractBytes_Exit:
855
+ retq
856
+
857
+ #----------------------------------------------------------------------------
858
+ #
859
+ # void KeccakP1600_ExtractAndAddBytes(void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
860
+ #
861
+ .size KeccakP1600_ExtractAndAddBytes, .-KeccakP1600_ExtractAndAddBytes
862
+ .align 8
863
+ .global KeccakP1600_ExtractAndAddBytes
864
+ .type KeccakP1600_ExtractAndAddBytes, %function
865
+ KeccakP1600_ExtractAndAddBytes:
866
+ addq arg4, arg1
867
+ leaq KeccakLaneComplementTable(%rip), arg6
868
+ addq arg4, arg6
869
+ subq $8, arg5
870
+ jc KeccakP1600_ExtractAndAddBytes_Bytes
871
+ KeccakP1600_ExtractAndAddBytes_LanesLoop:
872
+ movq (arg1), rT1
873
+ xorq (arg6), rT1
874
+ xorq (arg2), rT1
875
+ movq rT1, (arg3)
876
+ addq $8, arg2
877
+ addq $8, arg3
878
+ addq $8, arg6
879
+ addq $8, arg1
880
+ subq $8, arg5
881
+ jnc KeccakP1600_ExtractAndAddBytes_LanesLoop
882
+ KeccakP1600_ExtractAndAddBytes_Bytes:
883
+ addq $8, arg5
884
+ jz KeccakP1600_ExtractAndAddBytes_Exit
885
+ KeccakP1600_ExtractAndAddBytes_BytesLoop:
886
+ movb (arg1), %al
887
+ xorb (arg6), %al
888
+ xorb (arg2), %al
889
+ movb %al, (arg3)
890
+ addq $1, arg2
891
+ addq $1, arg3
892
+ addq $1, arg6
893
+ addq $1, arg1
894
+ subq $1, arg5
895
+ jnz KeccakP1600_ExtractAndAddBytes_BytesLoop
896
+ KeccakP1600_ExtractAndAddBytes_Exit:
897
+ retq
898
+
899
+ #----------------------------------------------------------------------------
900
+ #
901
+ # void KeccakP1600_Permute_Nrounds( void *state, unsigned int nrounds )
902
+ #
903
+ .size KeccakP1600_Permute_Nrounds, .-KeccakP1600_Permute_Nrounds
904
+ .align 8
905
+ .global KeccakP1600_Permute_Nrounds
906
+ .type KeccakP1600_Permute_Nrounds, %function
907
+ KeccakP1600_Permute_Nrounds:
908
+ mPushRegs
909
+ subq $8*25, %rsp
910
+ movq arg2, rT1
911
+
912
+ movq _ba(rpState), rCa
913
+ movq _be(rpState), rCe
914
+ movq _bu(rpState), rCu
915
+
916
+ xorq _ga(rpState), rCa
917
+ xorq _ge(rpState), rCe
918
+ xorq _gu(rpState), rCu
919
+
920
+ xorq _ka(rpState), rCa
921
+ xorq _ke(rpState), rCe
922
+ xorq _ku(rpState), rCu
923
+
924
+ xorq _ma(rpState), rCa
925
+ xorq _me(rpState), rCe
926
+ xorq _mu(rpState), rCu
927
+
928
+ xorq _sa(rpState), rCa
929
+ xorq _se(rpState), rCe
930
+ movq _si(rpState), rDi
931
+ movq _so(rpState), rDo
932
+ xorq _su(rpState), rCu
933
+
934
+ testq $1, rT1
935
+ jz KeccakP1600_Permute_Nrounds_Dispatch
936
+ movq _ba(rpState), rT2a # copy to stack
937
+ movq rT2a, _ba(rpStack)
938
+ movq _be(rpState), rT2a
939
+ movq rT2a, _be(rpStack)
940
+ movq _bi(rpState), rT2a
941
+ movq rT2a, _bi(rpStack)
942
+ movq _bo(rpState), rT2a
943
+ movq rT2a, _bo(rpStack)
944
+ movq _bu(rpState), rT2a
945
+ movq rT2a, _bu(rpStack)
946
+ movq _ga(rpState), rT2a
947
+ movq rT2a, _ga(rpStack)
948
+ movq _ge(rpState), rT2a
949
+ movq rT2a, _ge(rpStack)
950
+ movq _gi(rpState), rT2a
951
+ movq rT2a, _gi(rpStack)
952
+ movq _go(rpState), rT2a
953
+ movq rT2a, _go(rpStack)
954
+ movq _gu(rpState), rT2a
955
+ movq rT2a, _gu(rpStack)
956
+ movq _ka(rpState), rT2a
957
+ movq rT2a, _ka(rpStack)
958
+ movq _ke(rpState), rT2a
959
+ movq rT2a, _ke(rpStack)
960
+ movq _ki(rpState), rT2a
961
+ movq rT2a, _ki(rpStack)
962
+ movq _ko(rpState), rT2a
963
+ movq rT2a, _ko(rpStack)
964
+ movq _ku(rpState), rT2a
965
+ movq rT2a, _ku(rpStack)
966
+ movq _ma(rpState), rT2a
967
+ movq rT2a, _ma(rpStack)
968
+ movq _me(rpState), rT2a
969
+ movq rT2a, _me(rpStack)
970
+ movq _mi(rpState), rT2a
971
+ movq rT2a, _mi(rpStack)
972
+ movq _mo(rpState), rT2a
973
+ movq rT2a, _mo(rpStack)
974
+ movq _mu(rpState), rT2a
975
+ movq rT2a, _mu(rpStack)
976
+ movq _sa(rpState), rT2a
977
+ movq rT2a, _sa(rpStack)
978
+ movq _se(rpState), rT2a
979
+ movq rT2a, _se(rpStack)
980
+ movq _si(rpState), rT2a
981
+ movq rT2a, _si(rpStack)
982
+ movq _so(rpState), rT2a
983
+ movq rT2a, _so(rpStack)
984
+ movq _su(rpState), rT2a
985
+ movq rT2a, _su(rpStack)
986
+ KeccakP1600_Permute_Nrounds_Dispatch:
987
+ shlq $3, rT1
988
+ leaq KeccakP1600_Permute_NroundsTable-8(%rip), rT2a
989
+ jmp *(rT1, rT2a)
990
+
991
+ KeccakP1600_Permute_Nrounds24:
992
+ mKeccakRound rpState, rpStack, 0x0000000000000001, 0
993
+ KeccakP1600_Permute_Nrounds23:
994
+ mKeccakRound rpStack, rpState, 0x0000000000008082, 0
995
+ KeccakP1600_Permute_Nrounds22:
996
+ mKeccakRound rpState, rpStack, 0x800000000000808a, 0
997
+ KeccakP1600_Permute_Nrounds21:
998
+ mKeccakRound rpStack, rpState, 0x8000000080008000, 0
999
+ KeccakP1600_Permute_Nrounds20:
1000
+ mKeccakRound rpState, rpStack, 0x000000000000808b, 0
1001
+ KeccakP1600_Permute_Nrounds19:
1002
+ mKeccakRound rpStack, rpState, 0x0000000080000001, 0
1003
+ KeccakP1600_Permute_Nrounds18:
1004
+ mKeccakRound rpState, rpStack, 0x8000000080008081, 0
1005
+ KeccakP1600_Permute_Nrounds17:
1006
+ mKeccakRound rpStack, rpState, 0x8000000000008009, 0
1007
+ KeccakP1600_Permute_Nrounds16:
1008
+ mKeccakRound rpState, rpStack, 0x000000000000008a, 0
1009
+ KeccakP1600_Permute_Nrounds15:
1010
+ mKeccakRound rpStack, rpState, 0x0000000000000088, 0
1011
+ KeccakP1600_Permute_Nrounds14:
1012
+ mKeccakRound rpState, rpStack, 0x0000000080008009, 0
1013
+ KeccakP1600_Permute_Nrounds13:
1014
+ mKeccakRound rpStack, rpState, 0x000000008000000a, 0
1015
+ KeccakP1600_Permute_Nrounds12:
1016
+ mKeccakRound rpState, rpStack, 0x000000008000808b, 0
1017
+ KeccakP1600_Permute_Nrounds11:
1018
+ mKeccakRound rpStack, rpState, 0x800000000000008b, 0
1019
+ KeccakP1600_Permute_Nrounds10:
1020
+ mKeccakRound rpState, rpStack, 0x8000000000008089, 0
1021
+ KeccakP1600_Permute_Nrounds9:
1022
+ mKeccakRound rpStack, rpState, 0x8000000000008003, 0
1023
+ KeccakP1600_Permute_Nrounds8:
1024
+ mKeccakRound rpState, rpStack, 0x8000000000008002, 0
1025
+ KeccakP1600_Permute_Nrounds7:
1026
+ mKeccakRound rpStack, rpState, 0x8000000000000080, 0
1027
+ KeccakP1600_Permute_Nrounds6:
1028
+ mKeccakRound rpState, rpStack, 0x000000000000800a, 0
1029
+ KeccakP1600_Permute_Nrounds5:
1030
+ mKeccakRound rpStack, rpState, 0x800000008000000a, 0
1031
+ KeccakP1600_Permute_Nrounds4:
1032
+ mKeccakRound rpState, rpStack, 0x8000000080008081, 0
1033
+ KeccakP1600_Permute_Nrounds3:
1034
+ mKeccakRound rpStack, rpState, 0x8000000000008080, 0
1035
+ KeccakP1600_Permute_Nrounds2:
1036
+ mKeccakRound rpState, rpStack, 0x0000000080000001, 0
1037
+ KeccakP1600_Permute_Nrounds1:
1038
+ mKeccakRound rpStack, rpState, 0x8000000080008008, 1
1039
+ addq $8*25, %rsp
1040
+ mPopRegs
1041
+ retq
1042
+
1043
+ KeccakP1600_Permute_NroundsTable:
1044
+ .quad KeccakP1600_Permute_Nrounds1
1045
+ .quad KeccakP1600_Permute_Nrounds2
1046
+ .quad KeccakP1600_Permute_Nrounds3
1047
+ .quad KeccakP1600_Permute_Nrounds4
1048
+ .quad KeccakP1600_Permute_Nrounds5
1049
+ .quad KeccakP1600_Permute_Nrounds6
1050
+ .quad KeccakP1600_Permute_Nrounds7
1051
+ .quad KeccakP1600_Permute_Nrounds8
1052
+ .quad KeccakP1600_Permute_Nrounds9
1053
+ .quad KeccakP1600_Permute_Nrounds10
1054
+ .quad KeccakP1600_Permute_Nrounds11
1055
+ .quad KeccakP1600_Permute_Nrounds12
1056
+ .quad KeccakP1600_Permute_Nrounds13
1057
+ .quad KeccakP1600_Permute_Nrounds14
1058
+ .quad KeccakP1600_Permute_Nrounds15
1059
+ .quad KeccakP1600_Permute_Nrounds16
1060
+ .quad KeccakP1600_Permute_Nrounds17
1061
+ .quad KeccakP1600_Permute_Nrounds18
1062
+ .quad KeccakP1600_Permute_Nrounds19
1063
+ .quad KeccakP1600_Permute_Nrounds20
1064
+ .quad KeccakP1600_Permute_Nrounds21
1065
+ .quad KeccakP1600_Permute_Nrounds22
1066
+ .quad KeccakP1600_Permute_Nrounds23
1067
+ .quad KeccakP1600_Permute_Nrounds24
1068
+
1069
+ #----------------------------------------------------------------------------
1070
+ #
1071
+ # void KeccakP1600_Permute_12rounds( void *state )
1072
+ #
1073
+ .size KeccakP1600_Permute_12rounds, .-KeccakP1600_Permute_12rounds
1074
+ .align 8
1075
+ .global KeccakP1600_Permute_12rounds
1076
+ .type KeccakP1600_Permute_12rounds, %function
1077
+ KeccakP1600_Permute_12rounds:
1078
+ mPushRegs
1079
+ mKeccakPermutation12
1080
+ mPopRegs
1081
+ retq
1082
+
1083
+ #----------------------------------------------------------------------------
1084
+ #
1085
+ # void KeccakP1600_Permute_24rounds( void *state )
1086
+ #
1087
+ .size KeccakP1600_Permute_24rounds, .-KeccakP1600_Permute_24rounds
1088
+ .align 8
1089
+ .global KeccakP1600_Permute_24rounds
1090
+ .type KeccakP1600_Permute_24rounds, %function
1091
+ KeccakP1600_Permute_24rounds:
1092
+ mPushRegs
1093
+ mKeccakPermutation24
1094
+ mPopRegs
1095
+ retq
1096
+
1097
+ #----------------------------------------------------------------------------
1098
+ #
1099
+ # size_t KeccakF1600_FastLoop_Absorb( void *state, unsigned int laneCount, unsigned char *data,
1100
+ # size_t dataByteLen, unsigned char trailingBits )
1101
+ #
1102
+ .size KeccakF1600_FastLoop_Absorb, .-KeccakF1600_FastLoop_Absorb
1103
+ .align 8
1104
+ .global KeccakF1600_FastLoop_Absorb
1105
+ .type KeccakF1600_FastLoop_Absorb, %function
1106
+ KeccakF1600_FastLoop_Absorb:
1107
+ mPushRegs
1108
+ pushq arg3 # save initial data pointer
1109
+ pushq arg5 # save trailingBits
1110
+ shrq $3, arg4 # nbrLanes = dataByteLen / SnP_laneLengthInBytes
1111
+ subq arg2, arg4 # if (nbrLanes >= laneCount)
1112
+ jc KeccakF1600_FastLoop_Absorb_Exit
1113
+ cmpq $21, arg2
1114
+ jnz KeccakF1600_FastLoop_Absorb_VariableLaneCountLoop
1115
+ KeccakF1600_FastLoop_Absorb_Loop21: # Fixed laneCount = 21 (rate = 1344, capacity = 256)
1116
+ movq _ba(arg3), rT1a
1117
+ movq _be(arg3), rT1e
1118
+ movq _bi(arg3), rT1i
1119
+ movq _bo(arg3), rT1o
1120
+ movq _bu(arg3), rT1u
1121
+ movq _ga(arg3), rT2a
1122
+ movq _ge(arg3), rT2e
1123
+ movq _gi(arg3), rT2i
1124
+ movq _go(arg3), rT2o
1125
+ movq _gu(arg3), rT2u
1126
+ xorq rT1a, _ba(arg1)
1127
+ xorq rT1e, _be(arg1)
1128
+ xorq rT1i, _bi(arg1)
1129
+ xorq rT1o, _bo(arg1)
1130
+ xorq rT1u, _bu(arg1)
1131
+ xorq rT2a, _ga(arg1)
1132
+ xorq rT2e, _ge(arg1)
1133
+ xorq rT2i, _gi(arg1)
1134
+ xorq rT2o, _go(arg1)
1135
+ xorq rT2u, _gu(arg1)
1136
+ movq _ka(arg3), rT1a
1137
+ movq _ke(arg3), rT1e
1138
+ movq _ki(arg3), rT1i
1139
+ movq _ko(arg3), rT1o
1140
+ movq _ku(arg3), rT1u
1141
+ movq _ma(arg3), rT2a
1142
+ movq _me(arg3), rT2e
1143
+ movq _mi(arg3), rT2i
1144
+ movq _mo(arg3), rT2o
1145
+ movq _mu(arg3), rT2u
1146
+ xorq rT1a, _ka(arg1)
1147
+ xorq rT1e, _ke(arg1)
1148
+ xorq rT1i, _ki(arg1)
1149
+ xorq rT1o, _ko(arg1)
1150
+ xorq rT1u, _ku(arg1)
1151
+ movq _sa(arg3), rT1a
1152
+ movq (%rsp), rT1e # xor trailingBits
1153
+ xorq rT2a, _ma(arg1)
1154
+ xorq rT2e, _me(arg1)
1155
+ xorq rT2i, _mi(arg1)
1156
+ addq $_se, arg3
1157
+ xorq rT2o, _mo(arg1)
1158
+ xorq rT2u, _mu(arg1)
1159
+ xorq rT1a, _sa(arg1)
1160
+ xorq rT1e, _se(arg1)
1161
+ pushq arg3
1162
+ pushq arg4
1163
+ mKeccakPermutationInlinable24
1164
+ popq arg4
1165
+ popq arg3
1166
+ subq $21, arg4 # while (nbrLanes >= 21)
1167
+ jnc KeccakF1600_FastLoop_Absorb_Loop21
1168
+ KeccakF1600_FastLoop_Absorb_Exit:
1169
+ addq $8, %rsp # free trailingBits
1170
+ popq rT1a # restore initial data pointer
1171
+ subq rT1a, arg3 # processed = data pointer - initial data pointer
1172
+ movq arg3, rT1a
1173
+ mPopRegs
1174
+ retq
1175
+ KeccakF1600_FastLoop_Absorb_VariableLaneCountLoop:
1176
+ pushq arg4
1177
+ pushq arg2
1178
+ pushq arg1
1179
+ movq arg2, arg4 # prepare xor call: length (in bytes)
1180
+ shlq $3, arg4
1181
+ movq arg3, arg2 # data pointer
1182
+ xorq arg3, arg3 # offset = 0
1183
+ callq KeccakP1600_AddBytes@PLT # (void *state, const unsigned char *data, unsigned int offset, unsigned int length)
1184
+ movq arg2, arg3 # updated data pointer
1185
+ movq 24(%rsp), rT1a # xor trailingBits
1186
+ xorq rT1a, (arg1)
1187
+ popq arg1
1188
+ pushq arg3
1189
+ callq KeccakP1600_Permute_24rounds@PLT
1190
+ popq arg3
1191
+ popq arg2
1192
+ popq arg4
1193
+ subq arg2, arg4 # while (nbrLanes >= 21)
1194
+ jnc KeccakF1600_FastLoop_Absorb_VariableLaneCountLoop
1195
+ jmp KeccakF1600_FastLoop_Absorb_Exit
1196
+