digest-kangarootwelve 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (305) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +51 -11
  3. data/Rakefile +2 -2
  4. data/digest-kangarootwelve.gemspec +322 -42
  5. data/ext/digest/kangarootwelve/ext.c +1 -1
  6. data/ext/digest/kangarootwelve/extconf.rb +13 -1
  7. data/ext/digest/kangarootwelve/keccak/armv6m/KangarooTwelve.link.c +1 -0
  8. data/ext/digest/kangarootwelve/keccak/armv6m/KeccakDuplexWidth1600.link.c +1 -0
  9. data/ext/digest/kangarootwelve/keccak/armv6m/KeccakP-1600-SnP.h +36 -0
  10. data/ext/digest/kangarootwelve/{KeccakP-1600-times2-SnP.h → keccak/armv6m/KeccakP-1600-times2-SnP.h} +10 -10
  11. data/ext/digest/kangarootwelve/{KeccakP-1600-times2-on1.c → keccak/armv6m/KeccakP-1600-times2-on1.c} +13 -7
  12. data/ext/digest/kangarootwelve/{KeccakP-1600-times4-SnP.h → keccak/armv6m/KeccakP-1600-times4-SnP.h} +10 -10
  13. data/ext/digest/kangarootwelve/{KeccakP-1600-times4-on1.c → keccak/armv6m/KeccakP-1600-times4-on1.c} +13 -7
  14. data/ext/digest/kangarootwelve/{KeccakP-1600-times8-SnP.h → keccak/armv6m/KeccakP-1600-times8-SnP.h} +10 -10
  15. data/ext/digest/kangarootwelve/{KeccakP-1600-times8-on1.c → keccak/armv6m/KeccakP-1600-times8-on1.c} +13 -7
  16. data/ext/digest/kangarootwelve/keccak/armv6m/KeccakP-1600-u2-32bi-armv6m-le-gcc.s +1334 -0
  17. data/ext/digest/kangarootwelve/keccak/armv6m/KeccakSpongeWidth1600.link.c +1 -0
  18. data/ext/digest/kangarootwelve/{PlSnP-Fallback.inc → keccak/armv6m/PlSnP-Fallback.inc} +11 -7
  19. data/ext/digest/kangarootwelve/keccak/armv6m/ext.link.c +1 -0
  20. data/ext/digest/kangarootwelve/keccak/armv7a/KangarooTwelve.link.c +1 -0
  21. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakDuplexWidth1600.link.c +1 -0
  22. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-SnP.h +37 -0
  23. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-armv7a-le-neon-gcc.s +826 -0
  24. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-inplace-pl2-armv7a-neon-le-gcc.s +1245 -0
  25. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times2-SnP.h +38 -0
  26. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times4-SnP.h +45 -0
  27. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times4-on2.c +38 -0
  28. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times8-SnP.h +45 -0
  29. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times8-on2.c +38 -0
  30. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakSpongeWidth1600.link.c +1 -0
  31. data/ext/digest/kangarootwelve/keccak/armv7a/PlSnP-Fallback.inc +287 -0
  32. data/ext/digest/kangarootwelve/keccak/armv7a/ext.link.c +1 -0
  33. data/ext/digest/kangarootwelve/keccak/armv7m/KangarooTwelve.link.c +1 -0
  34. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakDuplexWidth1600.link.c +1 -0
  35. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-SnP.h +36 -0
  36. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-inplace-32bi-armv7m-le-gcc.s +1170 -0
  37. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times2-SnP.h +45 -0
  38. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times2-on1.c +37 -0
  39. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times4-SnP.h +45 -0
  40. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times4-on1.c +37 -0
  41. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times8-SnP.h +45 -0
  42. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times8-on1.c +37 -0
  43. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakSpongeWidth1600.link.c +1 -0
  44. data/ext/digest/kangarootwelve/keccak/armv7m/PlSnP-Fallback.inc +287 -0
  45. data/ext/digest/kangarootwelve/keccak/armv7m/ext.link.c +1 -0
  46. data/ext/digest/kangarootwelve/keccak/armv8a/KangarooTwelve.link.c +1 -0
  47. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakDuplexWidth1600.link.c +1 -0
  48. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-SnP.h +28 -0
  49. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-armv8a-neon.s +537 -0
  50. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times2-SnP.h +45 -0
  51. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times2-on1.c +37 -0
  52. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times4-SnP.h +45 -0
  53. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times4-on1.c +37 -0
  54. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times8-SnP.h +45 -0
  55. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times8-on1.c +37 -0
  56. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakSpongeWidth1600.link.c +1 -0
  57. data/ext/digest/kangarootwelve/keccak/armv8a/PlSnP-Fallback.inc +287 -0
  58. data/ext/digest/kangarootwelve/keccak/armv8a/ext.link.c +1 -0
  59. data/ext/digest/kangarootwelve/keccak/asmx86-64/KangarooTwelve.link.c +1 -0
  60. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakDuplexWidth1600.link.c +1 -0
  61. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-SnP.h +37 -0
  62. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times2-SnP.h +45 -0
  63. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times2-on1.c +37 -0
  64. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times4-SnP.h +45 -0
  65. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times4-on1.c +37 -0
  66. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times8-SnP.h +45 -0
  67. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times8-on1.c +37 -0
  68. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-x86-64-gas.s +1190 -0
  69. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakSpongeWidth1600.link.c +1 -0
  70. data/ext/digest/kangarootwelve/keccak/asmx86-64/PlSnP-Fallback.inc +287 -0
  71. data/ext/digest/kangarootwelve/keccak/asmx86-64/ext.link.c +1 -0
  72. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KangarooTwelve.link.c +1 -0
  73. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakDuplexWidth1600.link.c +1 -0
  74. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-SnP.h +37 -0
  75. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times2-SnP.h +45 -0
  76. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times2-on1.c +37 -0
  77. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times4-SnP.h +45 -0
  78. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times4-on1.c +37 -0
  79. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times8-SnP.h +45 -0
  80. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times8-on1.c +37 -0
  81. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-x86-64-shld-gas.s +1190 -0
  82. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakSpongeWidth1600.link.c +1 -0
  83. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/PlSnP-Fallback.inc +287 -0
  84. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/ext.link.c +1 -0
  85. data/ext/digest/kangarootwelve/keccak/avr8/KangarooTwelve.link.c +1 -0
  86. data/ext/digest/kangarootwelve/keccak/avr8/KeccakDuplexWidth1600.link.c +1 -0
  87. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-SnP.h +37 -0
  88. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-avr8-fast.s +1116 -0
  89. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times2-SnP.h +45 -0
  90. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times2-on1.c +37 -0
  91. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times4-SnP.h +45 -0
  92. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times4-on1.c +37 -0
  93. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times8-SnP.h +45 -0
  94. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times8-on1.c +37 -0
  95. data/ext/digest/kangarootwelve/keccak/avr8/KeccakSpongeWidth1600.link.c +1 -0
  96. data/ext/digest/kangarootwelve/keccak/avr8/PlSnP-Fallback.inc +287 -0
  97. data/ext/digest/kangarootwelve/keccak/avr8/ext.link.c +1 -0
  98. data/ext/digest/kangarootwelve/keccak/bulldozer/KangarooTwelve.link.c +1 -0
  99. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakDuplexWidth1600.link.c +1 -0
  100. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-SnP.h +39 -0
  101. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-XOP-config.h +6 -0
  102. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-XOP.c +473 -0
  103. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times2-SIMD128.c +954 -0
  104. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times2-SnP.h +47 -0
  105. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times4-SnP.h +45 -0
  106. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times4-on2.c +38 -0
  107. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times8-SnP.h +45 -0
  108. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times8-on2.c +38 -0
  109. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-unrolling.macros +302 -0
  110. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakSpongeWidth1600.link.c +1 -0
  111. data/ext/digest/kangarootwelve/keccak/bulldozer/PlSnP-Fallback.inc +287 -0
  112. data/ext/digest/kangarootwelve/keccak/bulldozer/SIMD128-config.h +9 -0
  113. data/ext/digest/kangarootwelve/{SnP-Relaned.h → keccak/bulldozer/SnP-Relaned.h} +13 -7
  114. data/ext/digest/kangarootwelve/keccak/bulldozer/ext.link.c +1 -0
  115. data/ext/digest/kangarootwelve/{KangarooTwelve.c → keccak/common/KangarooTwelve.c} +6 -10
  116. data/ext/digest/kangarootwelve/{KangarooTwelve.h → keccak/common/KangarooTwelve.h} +3 -7
  117. data/ext/digest/kangarootwelve/keccak/common/KeccakDuplex-common.h +37 -0
  118. data/ext/digest/kangarootwelve/keccak/common/KeccakDuplex.inc +192 -0
  119. data/ext/digest/kangarootwelve/keccak/common/KeccakDuplexWidth1600.c +34 -0
  120. data/ext/digest/kangarootwelve/keccak/common/KeccakDuplexWidth1600.h +25 -0
  121. data/ext/digest/kangarootwelve/{KeccakSponge-common.h → keccak/common/KeccakSponge-common.h} +5 -7
  122. data/ext/digest/kangarootwelve/{KeccakSponge.inc → keccak/common/KeccakSponge.inc} +6 -8
  123. data/ext/digest/kangarootwelve/{KeccakSpongeWidth1600.c → keccak/common/KeccakSpongeWidth1600.c} +6 -8
  124. data/ext/digest/kangarootwelve/{KeccakSpongeWidth1600.h → keccak/common/KeccakSpongeWidth1600.h} +5 -7
  125. data/ext/digest/kangarootwelve/{Phases.h → keccak/common/Phases.h} +3 -7
  126. data/ext/digest/kangarootwelve/{align.h → keccak/common/align.h} +5 -7
  127. data/ext/digest/kangarootwelve/{brg_endian.h → keccak/common/brg_endian.h} +0 -0
  128. data/ext/digest/kangarootwelve/keccak/compact/KangarooTwelve.link.c +1 -0
  129. data/ext/digest/kangarootwelve/keccak/compact/KeccakDuplexWidth1600.link.c +1 -0
  130. data/ext/digest/kangarootwelve/{KeccakP-1600-SnP.h → keccak/compact/KeccakP-1600-SnP.h} +7 -10
  131. data/ext/digest/kangarootwelve/{KeccakP-1600-compact64.c → keccak/compact/KeccakP-1600-compact64.c} +11 -7
  132. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times2-SnP.h +45 -0
  133. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times2-on1.c +37 -0
  134. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times4-SnP.h +45 -0
  135. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times4-on1.c +37 -0
  136. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times8-SnP.h +45 -0
  137. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times8-on1.c +37 -0
  138. data/ext/digest/kangarootwelve/keccak/compact/KeccakSpongeWidth1600.link.c +1 -0
  139. data/ext/digest/kangarootwelve/keccak/compact/PlSnP-Fallback.inc +287 -0
  140. data/ext/digest/kangarootwelve/keccak/compact/SnP-Relaned.h +140 -0
  141. data/ext/digest/kangarootwelve/keccak/compact/ext.link.c +1 -0
  142. data/ext/digest/kangarootwelve/keccak/generic32/KangarooTwelve.link.c +1 -0
  143. data/ext/digest/kangarootwelve/keccak/generic32/KeccakDuplexWidth1600.link.c +1 -0
  144. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-SnP.h +38 -0
  145. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-inplace32BI.c +1162 -0
  146. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times2-SnP.h +45 -0
  147. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times2-on1.c +37 -0
  148. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times4-SnP.h +45 -0
  149. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times4-on1.c +37 -0
  150. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times8-SnP.h +45 -0
  151. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times8-on1.c +37 -0
  152. data/ext/digest/kangarootwelve/keccak/generic32/KeccakSpongeWidth1600.link.c +1 -0
  153. data/ext/digest/kangarootwelve/keccak/generic32/PlSnP-Fallback.inc +287 -0
  154. data/ext/digest/kangarootwelve/keccak/generic32/SnP-Relaned.h +140 -0
  155. data/ext/digest/kangarootwelve/keccak/generic32/ext.link.c +1 -0
  156. data/ext/digest/kangarootwelve/keccak/generic32lc/KangarooTwelve.link.c +1 -0
  157. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakDuplexWidth1600.link.c +1 -0
  158. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-SnP.h +38 -0
  159. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-inplace32BI.c +1162 -0
  160. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times2-SnP.h +45 -0
  161. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times2-on1.c +37 -0
  162. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times4-SnP.h +45 -0
  163. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times4-on1.c +37 -0
  164. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times8-SnP.h +45 -0
  165. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times8-on1.c +37 -0
  166. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakSpongeWidth1600.link.c +1 -0
  167. data/ext/digest/kangarootwelve/keccak/generic32lc/PlSnP-Fallback.inc +287 -0
  168. data/ext/digest/kangarootwelve/keccak/generic32lc/SnP-Relaned.h +140 -0
  169. data/ext/digest/kangarootwelve/keccak/generic32lc/ext.link.c +1 -0
  170. data/ext/digest/kangarootwelve/keccak/generic64/KangarooTwelve.link.c +1 -0
  171. data/ext/digest/kangarootwelve/keccak/generic64/KeccakDuplexWidth1600.link.c +1 -0
  172. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-64.macros +2195 -0
  173. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-SnP.h +49 -0
  174. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-opt64-config.h +6 -0
  175. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-opt64.c +541 -0
  176. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times2-SnP.h +45 -0
  177. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times2-on1.c +37 -0
  178. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times4-SnP.h +45 -0
  179. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times4-on1.c +37 -0
  180. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times8-SnP.h +45 -0
  181. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times8-on1.c +37 -0
  182. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-unrolling.macros +302 -0
  183. data/ext/digest/kangarootwelve/keccak/generic64/KeccakSpongeWidth1600.link.c +1 -0
  184. data/ext/digest/kangarootwelve/keccak/generic64/PlSnP-Fallback.inc +287 -0
  185. data/ext/digest/kangarootwelve/keccak/generic64/SnP-Relaned.h +140 -0
  186. data/ext/digest/kangarootwelve/keccak/generic64/ext.link.c +1 -0
  187. data/ext/digest/kangarootwelve/keccak/generic64lc/KangarooTwelve.link.c +1 -0
  188. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakDuplexWidth1600.link.c +1 -0
  189. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-64.macros +2195 -0
  190. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-SnP.h +49 -0
  191. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-opt64-config.h +7 -0
  192. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-opt64.c +541 -0
  193. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times2-SnP.h +45 -0
  194. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times2-on1.c +37 -0
  195. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times4-SnP.h +45 -0
  196. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times4-on1.c +37 -0
  197. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times8-SnP.h +45 -0
  198. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times8-on1.c +37 -0
  199. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-unrolling.macros +302 -0
  200. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakSpongeWidth1600.link.c +1 -0
  201. data/ext/digest/kangarootwelve/keccak/generic64lc/PlSnP-Fallback.inc +287 -0
  202. data/ext/digest/kangarootwelve/keccak/generic64lc/SnP-Relaned.h +140 -0
  203. data/ext/digest/kangarootwelve/keccak/generic64lc/ext.link.c +1 -0
  204. data/ext/digest/kangarootwelve/keccak/haswell/KangarooTwelve.link.c +1 -0
  205. data/ext/digest/kangarootwelve/keccak/haswell/KeccakDuplexWidth1600.link.c +1 -0
  206. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-AVX2.s +993 -0
  207. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-SnP.h +41 -0
  208. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times2-SIMD128.c +954 -0
  209. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times2-SnP.h +47 -0
  210. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times4-SIMD256.c +1303 -0
  211. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times4-SnP.h +53 -0
  212. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times8-SnP.h +45 -0
  213. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times8-on4.c +38 -0
  214. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-unrolling.macros +302 -0
  215. data/ext/digest/kangarootwelve/keccak/haswell/KeccakSpongeWidth1600.link.c +1 -0
  216. data/ext/digest/kangarootwelve/keccak/haswell/PlSnP-Fallback.inc +287 -0
  217. data/ext/digest/kangarootwelve/keccak/haswell/SIMD128-config.h +8 -0
  218. data/ext/digest/kangarootwelve/keccak/haswell/SIMD256-config.h +7 -0
  219. data/ext/digest/kangarootwelve/keccak/haswell/ext.link.c +1 -0
  220. data/ext/digest/kangarootwelve/keccak/nehalem/KangarooTwelve.link.c +1 -0
  221. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakDuplexWidth1600.link.c +1 -0
  222. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-64.macros +2195 -0
  223. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-SnP.h +49 -0
  224. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-opt64-config.h +7 -0
  225. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-opt64.c +541 -0
  226. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times2-SIMD128.c +954 -0
  227. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times2-SnP.h +47 -0
  228. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times4-SnP.h +45 -0
  229. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times4-on2.c +38 -0
  230. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times8-SnP.h +45 -0
  231. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times8-on2.c +38 -0
  232. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-unrolling.macros +302 -0
  233. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakSpongeWidth1600.link.c +1 -0
  234. data/ext/digest/kangarootwelve/keccak/nehalem/PlSnP-Fallback.inc +287 -0
  235. data/ext/digest/kangarootwelve/keccak/nehalem/SIMD128-config.h +8 -0
  236. data/ext/digest/kangarootwelve/keccak/nehalem/SnP-Relaned.h +140 -0
  237. data/ext/digest/kangarootwelve/keccak/nehalem/ext.link.c +1 -0
  238. data/ext/digest/kangarootwelve/keccak/reference/KangarooTwelve.link.c +1 -0
  239. data/ext/digest/kangarootwelve/keccak/reference/KeccakDuplexWidth1600.link.c +1 -0
  240. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-SnP.h +41 -0
  241. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-reference.c +424 -0
  242. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-reference.h +20 -0
  243. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times2-SnP.h +45 -0
  244. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times2-on1.c +37 -0
  245. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times4-SnP.h +45 -0
  246. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times4-on1.c +37 -0
  247. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times8-SnP.h +45 -0
  248. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times8-on1.c +37 -0
  249. data/ext/digest/kangarootwelve/keccak/reference/KeccakSpongeWidth1600.link.c +1 -0
  250. data/ext/digest/kangarootwelve/keccak/reference/PlSnP-Fallback.inc +287 -0
  251. data/ext/digest/kangarootwelve/keccak/reference/displayIntermediateValues.c +176 -0
  252. data/ext/digest/kangarootwelve/keccak/reference/displayIntermediateValues.h +29 -0
  253. data/ext/digest/kangarootwelve/keccak/reference/ext.link.c +1 -0
  254. data/ext/digest/kangarootwelve/keccak/reference32bits/KangarooTwelve.link.c +1 -0
  255. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakDuplexWidth1600.link.c +1 -0
  256. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-SnP.h +41 -0
  257. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-reference.h +20 -0
  258. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-reference32BI.c +612 -0
  259. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times2-SnP.h +45 -0
  260. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times2-on1.c +37 -0
  261. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times4-SnP.h +45 -0
  262. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times4-on1.c +37 -0
  263. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times8-SnP.h +45 -0
  264. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times8-on1.c +37 -0
  265. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakSpongeWidth1600.link.c +1 -0
  266. data/ext/digest/kangarootwelve/keccak/reference32bits/PlSnP-Fallback.inc +287 -0
  267. data/ext/digest/kangarootwelve/keccak/reference32bits/displayIntermediateValues.c +176 -0
  268. data/ext/digest/kangarootwelve/keccak/reference32bits/displayIntermediateValues.h +29 -0
  269. data/ext/digest/kangarootwelve/keccak/reference32bits/ext.link.c +1 -0
  270. data/ext/digest/kangarootwelve/keccak/sandybridge/KangarooTwelve.link.c +1 -0
  271. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakDuplexWidth1600.link.c +1 -0
  272. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-64.macros +2195 -0
  273. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-SnP.h +49 -0
  274. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-opt64-config.h +8 -0
  275. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-opt64.c +541 -0
  276. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times2-SIMD128.c +954 -0
  277. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times2-SnP.h +47 -0
  278. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times4-SnP.h +45 -0
  279. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times4-on2.c +38 -0
  280. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times8-SnP.h +45 -0
  281. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times8-on2.c +38 -0
  282. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-unrolling.macros +302 -0
  283. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakSpongeWidth1600.link.c +1 -0
  284. data/ext/digest/kangarootwelve/keccak/sandybridge/PlSnP-Fallback.inc +287 -0
  285. data/ext/digest/kangarootwelve/keccak/sandybridge/SIMD128-config.h +8 -0
  286. data/ext/digest/kangarootwelve/keccak/sandybridge/SnP-Relaned.h +140 -0
  287. data/ext/digest/kangarootwelve/keccak/sandybridge/ext.link.c +1 -0
  288. data/ext/digest/kangarootwelve/keccak/skylakex/KangarooTwelve.link.c +1 -0
  289. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakDuplexWidth1600.link.c +1 -0
  290. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-AVX512-config.h +6 -0
  291. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-AVX512.c +621 -0
  292. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-SnP.h +42 -0
  293. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times2-SIMD512.c +852 -0
  294. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times2-SnP.h +49 -0
  295. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times4-SIMD512.c +883 -0
  296. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times4-SnP.h +49 -0
  297. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times8-SIMD512.c +1473 -0
  298. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times8-SnP.h +53 -0
  299. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakSpongeWidth1600.link.c +1 -0
  300. data/ext/digest/kangarootwelve/keccak/skylakex/SIMD512-2-config.h +7 -0
  301. data/ext/digest/kangarootwelve/keccak/skylakex/SIMD512-4-config.h +7 -0
  302. data/ext/digest/kangarootwelve/keccak/skylakex/SIMD512-config.h +7 -0
  303. data/ext/digest/kangarootwelve/keccak/skylakex/ext.link.c +1 -0
  304. data/lib/digest/kangarootwelve/version.rb +1 -1
  305. metadata +299 -21
@@ -0,0 +1 @@
1
+ #include "../common/KeccakSpongeWidth1600.c"
@@ -0,0 +1,287 @@
1
+ /*
2
+ Implementation by Gilles Van Assche, hereby denoted as "the implementer".
3
+
4
+ For more information, feedback or questions, please refer to our website:
5
+ https://keccak.team/
6
+
7
+ To the extent possible under law, the implementer has waived all copyright
8
+ and related or neighboring rights to the source code in this file.
9
+ http://creativecommons.org/publicdomain/zero/1.0/
10
+
11
+ ---
12
+
13
+ This file contains macros that help make a PlSnP-compatible implementation by
14
+ serially falling back on a SnP-compatible implementation or on a PlSnP-compatible
15
+ implementation of lower parallism degree.
16
+
17
+ Please refer to PlSnP-documentation.h for more details.
18
+ */
19
+
20
+ /* expect PlSnP_baseParallelism, PlSnP_targetParallelism */
21
+ /* expect SnP_stateSizeInBytes, SnP_stateAlignment */
22
+ /* expect prefix */
23
+ /* expect SnP_* */
24
+
25
+ #define JOIN0(a, b) a ## b
26
+ #define JOIN(a, b) JOIN0(a, b)
27
+
28
+ #define PlSnP_StaticInitialize JOIN(prefix, _StaticInitialize)
29
+ #define PlSnP_InitializeAll JOIN(prefix, _InitializeAll)
30
+ #define PlSnP_AddByte JOIN(prefix, _AddByte)
31
+ #define PlSnP_AddBytes JOIN(prefix, _AddBytes)
32
+ #define PlSnP_AddLanesAll JOIN(prefix, _AddLanesAll)
33
+ #define PlSnP_OverwriteBytes JOIN(prefix, _OverwriteBytes)
34
+ #define PlSnP_OverwriteLanesAll JOIN(prefix, _OverwriteLanesAll)
35
+ #define PlSnP_OverwriteWithZeroes JOIN(prefix, _OverwriteWithZeroes)
36
+ #define PlSnP_ExtractBytes JOIN(prefix, _ExtractBytes)
37
+ #define PlSnP_ExtractLanesAll JOIN(prefix, _ExtractLanesAll)
38
+ #define PlSnP_ExtractAndAddBytes JOIN(prefix, _ExtractAndAddBytes)
39
+ #define PlSnP_ExtractAndAddLanesAll JOIN(prefix, _ExtractAndAddLanesAll)
40
+
41
+ #if (PlSnP_baseParallelism == 1)
42
+ #define SnP_stateSizeInBytes JOIN(SnP, _stateSizeInBytes)
43
+ #define SnP_stateAlignment JOIN(SnP, _stateAlignment)
44
+ #else
45
+ #define SnP_stateSizeInBytes JOIN(SnP, _statesSizeInBytes)
46
+ #define SnP_stateAlignment JOIN(SnP, _statesAlignment)
47
+ #endif
48
+ #define PlSnP_factor ((PlSnP_targetParallelism)/(PlSnP_baseParallelism))
49
+ #define SnP_stateOffset (((SnP_stateSizeInBytes+(SnP_stateAlignment-1))/SnP_stateAlignment)*SnP_stateAlignment)
50
+ #define stateWithIndex(i) ((unsigned char *)states+((i)*SnP_stateOffset))
51
+
52
+ #define SnP_StaticInitialize JOIN(SnP, _StaticInitialize)
53
+ #define SnP_Initialize JOIN(SnP, _Initialize)
54
+ #define SnP_InitializeAll JOIN(SnP, _InitializeAll)
55
+ #define SnP_AddByte JOIN(SnP, _AddByte)
56
+ #define SnP_AddBytes JOIN(SnP, _AddBytes)
57
+ #define SnP_AddLanesAll JOIN(SnP, _AddLanesAll)
58
+ #define SnP_OverwriteBytes JOIN(SnP, _OverwriteBytes)
59
+ #define SnP_OverwriteLanesAll JOIN(SnP, _OverwriteLanesAll)
60
+ #define SnP_OverwriteWithZeroes JOIN(SnP, _OverwriteWithZeroes)
61
+ #define SnP_ExtractBytes JOIN(SnP, _ExtractBytes)
62
+ #define SnP_ExtractLanesAll JOIN(SnP, _ExtractLanesAll)
63
+ #define SnP_ExtractAndAddBytes JOIN(SnP, _ExtractAndAddBytes)
64
+ #define SnP_ExtractAndAddLanesAll JOIN(SnP, _ExtractAndAddLanesAll)
65
+
66
+ void PlSnP_StaticInitialize( void )
67
+ {
68
+ SnP_StaticInitialize();
69
+ }
70
+
71
+ void PlSnP_InitializeAll(void *states)
72
+ {
73
+ unsigned int i;
74
+
75
+ for(i=0; i<PlSnP_factor; i++)
76
+ #if (PlSnP_baseParallelism == 1)
77
+ SnP_Initialize(stateWithIndex(i));
78
+ #else
79
+ SnP_InitializeAll(stateWithIndex(i));
80
+ #endif
81
+ }
82
+
83
+ void PlSnP_AddByte(void *states, unsigned int instanceIndex, unsigned char byte, unsigned int offset)
84
+ {
85
+ #if (PlSnP_baseParallelism == 1)
86
+ SnP_AddByte(stateWithIndex(instanceIndex), byte, offset);
87
+ #else
88
+ SnP_AddByte(stateWithIndex(instanceIndex/PlSnP_baseParallelism), instanceIndex%PlSnP_baseParallelism, byte, offset);
89
+ #endif
90
+ }
91
+
92
+ void PlSnP_AddBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length)
93
+ {
94
+ #if (PlSnP_baseParallelism == 1)
95
+ SnP_AddBytes(stateWithIndex(instanceIndex), data, offset, length);
96
+ #else
97
+ SnP_AddBytes(stateWithIndex(instanceIndex/PlSnP_baseParallelism), instanceIndex%PlSnP_baseParallelism, data, offset, length);
98
+ #endif
99
+ }
100
+
101
+ void PlSnP_AddLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
102
+ {
103
+ unsigned int i;
104
+
105
+ for(i=0; i<PlSnP_factor; i++) {
106
+ #if (PlSnP_baseParallelism == 1)
107
+ SnP_AddBytes(stateWithIndex(i), data, 0, laneCount*SnP_laneLengthInBytes);
108
+ #else
109
+ SnP_AddLanesAll(stateWithIndex(i), data, laneCount, laneOffset);
110
+ #endif
111
+ data += PlSnP_baseParallelism*laneOffset*SnP_laneLengthInBytes;
112
+ }
113
+ }
114
+
115
+ void PlSnP_OverwriteBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length)
116
+ {
117
+ #if (PlSnP_baseParallelism == 1)
118
+ SnP_OverwriteBytes(stateWithIndex(instanceIndex), data, offset, length);
119
+ #else
120
+ SnP_OverwriteBytes(stateWithIndex(instanceIndex/PlSnP_baseParallelism), instanceIndex%PlSnP_baseParallelism, data, offset, length);
121
+ #endif
122
+ }
123
+
124
+ void PlSnP_OverwriteLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
125
+ {
126
+ unsigned int i;
127
+
128
+ for(i=0; i<PlSnP_factor; i++) {
129
+ #if (PlSnP_baseParallelism == 1)
130
+ SnP_OverwriteBytes(stateWithIndex(i), data, 0, laneCount*SnP_laneLengthInBytes);
131
+ #else
132
+ SnP_OverwriteLanesAll(stateWithIndex(i), data, laneCount, laneOffset);
133
+ #endif
134
+ data += PlSnP_baseParallelism*laneOffset*SnP_laneLengthInBytes;
135
+ }
136
+ }
137
+
138
+ void PlSnP_OverwriteWithZeroes(void *states, unsigned int instanceIndex, unsigned int byteCount)
139
+ {
140
+ #if (PlSnP_baseParallelism == 1)
141
+ SnP_OverwriteWithZeroes(stateWithIndex(instanceIndex), byteCount);
142
+ #else
143
+ SnP_OverwriteWithZeroes(stateWithIndex(instanceIndex/PlSnP_baseParallelism), instanceIndex%PlSnP_baseParallelism, byteCount);
144
+ #endif
145
+ }
146
+
147
+ void PlSnP_PermuteAll(void *states)
148
+ {
149
+ unsigned int i;
150
+
151
+ for(i=0; i<PlSnP_factor; i++) {
152
+ #if (PlSnP_baseParallelism == 1)
153
+ SnP_Permute(stateWithIndex(i));
154
+ #else
155
+ SnP_PermuteAll(stateWithIndex(i));
156
+ #endif
157
+ }
158
+ }
159
+
160
+ #if (defined(SnP_Permute_12rounds) || defined(SnP_PermuteAll_12rounds))
161
+ void PlSnP_PermuteAll_12rounds(void *states)
162
+ {
163
+ unsigned int i;
164
+
165
+ for(i=0; i<PlSnP_factor; i++) {
166
+ #if (PlSnP_baseParallelism == 1)
167
+ SnP_Permute_12rounds(stateWithIndex(i));
168
+ #else
169
+ SnP_PermuteAll_12rounds(stateWithIndex(i));
170
+ #endif
171
+ }
172
+ }
173
+ #endif
174
+
175
+ #if (defined(SnP_Permute_Nrounds) || defined(SnP_PermuteAll_6rounds))
176
+ void PlSnP_PermuteAll_6rounds(void *states)
177
+ {
178
+ unsigned int i;
179
+
180
+ for(i=0; i<PlSnP_factor; i++) {
181
+ #if (PlSnP_baseParallelism == 1)
182
+ SnP_Permute_Nrounds(stateWithIndex(i), 6);
183
+ #else
184
+ SnP_PermuteAll_6rounds(stateWithIndex(i));
185
+ #endif
186
+ }
187
+ }
188
+ #endif
189
+
190
+ #if (defined(SnP_Permute_Nrounds) || defined(SnP_PermuteAll_4rounds))
191
+ void PlSnP_PermuteAll_4rounds(void *states)
192
+ {
193
+ unsigned int i;
194
+
195
+ for(i=0; i<PlSnP_factor; i++) {
196
+ #if (PlSnP_baseParallelism == 1)
197
+ SnP_Permute_Nrounds(stateWithIndex(i), 4);
198
+ #else
199
+ SnP_PermuteAll_4rounds(stateWithIndex(i));
200
+ #endif
201
+ }
202
+ }
203
+ #endif
204
+
205
+ void PlSnP_ExtractBytes(void *states, unsigned int instanceIndex, unsigned char *data, unsigned int offset, unsigned int length)
206
+ {
207
+ #if (PlSnP_baseParallelism == 1)
208
+ SnP_ExtractBytes(stateWithIndex(instanceIndex), data, offset, length);
209
+ #else
210
+ SnP_ExtractBytes(stateWithIndex(instanceIndex/PlSnP_baseParallelism), instanceIndex%PlSnP_baseParallelism, data, offset, length);
211
+ #endif
212
+ }
213
+
214
+ void PlSnP_ExtractLanesAll(const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
215
+ {
216
+ unsigned int i;
217
+
218
+ for(i=0; i<PlSnP_factor; i++) {
219
+ #if (PlSnP_baseParallelism == 1)
220
+ SnP_ExtractBytes(stateWithIndex(i), data, 0, laneCount*SnP_laneLengthInBytes);
221
+ #else
222
+ SnP_ExtractLanesAll(stateWithIndex(i), data, laneCount, laneOffset);
223
+ #endif
224
+ data += laneOffset*SnP_laneLengthInBytes*PlSnP_baseParallelism;
225
+ }
226
+ }
227
+
228
+ void PlSnP_ExtractAndAddBytes(void *states, unsigned int instanceIndex, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
229
+ {
230
+ #if (PlSnP_baseParallelism == 1)
231
+ SnP_ExtractAndAddBytes(stateWithIndex(instanceIndex), input, output, offset, length);
232
+ #else
233
+ SnP_ExtractAndAddBytes(stateWithIndex(instanceIndex/PlSnP_baseParallelism), instanceIndex%PlSnP_baseParallelism, input, output, offset, length);
234
+ #endif
235
+ }
236
+
237
+ void PlSnP_ExtractAndAddLanesAll(const void *states, const unsigned char *input, unsigned char *output, unsigned int laneCount, unsigned int laneOffset)
238
+ {
239
+ unsigned int i;
240
+
241
+ for(i=0; i<PlSnP_factor; i++) {
242
+ #if (PlSnP_baseParallelism == 1)
243
+ SnP_ExtractAndAddBytes(stateWithIndex(i), input, output, 0, laneCount*SnP_laneLengthInBytes);
244
+ #else
245
+ SnP_ExtractAndAddLanesAll(stateWithIndex(i), input, output, laneCount, laneOffset);
246
+ #endif
247
+ input += laneOffset*SnP_laneLengthInBytes*PlSnP_baseParallelism;
248
+ output += laneOffset*SnP_laneLengthInBytes*PlSnP_baseParallelism;
249
+ }
250
+ }
251
+
252
+ #undef PlSnP_factor
253
+ #undef SnP_stateOffset
254
+ #undef stateWithIndex
255
+ #undef JOIN0
256
+ #undef JOIN
257
+ #undef PlSnP_StaticInitialize
258
+ #undef PlSnP_InitializeAll
259
+ #undef PlSnP_AddByte
260
+ #undef PlSnP_AddBytes
261
+ #undef PlSnP_AddLanesAll
262
+ #undef PlSnP_OverwriteBytes
263
+ #undef PlSnP_OverwriteLanesAll
264
+ #undef PlSnP_OverwriteWithZeroes
265
+ #undef PlSnP_PermuteAll
266
+ #undef PlSnP_ExtractBytes
267
+ #undef PlSnP_ExtractLanesAll
268
+ #undef PlSnP_ExtractAndAddBytes
269
+ #undef PlSnP_ExtractAndAddLanesAll
270
+ #undef SnP_stateAlignment
271
+ #undef SnP_stateSizeInBytes
272
+ #undef PlSnP_factor
273
+ #undef SnP_stateOffset
274
+ #undef stateWithIndex
275
+ #undef SnP_StaticInitialize
276
+ #undef SnP_Initialize
277
+ #undef SnP_InitializeAll
278
+ #undef SnP_AddByte
279
+ #undef SnP_AddBytes
280
+ #undef SnP_AddLanesAll
281
+ #undef SnP_OverwriteBytes
282
+ #undef SnP_OverwriteWithZeroes
283
+ #undef SnP_OverwriteLanesAll
284
+ #undef SnP_ExtractBytes
285
+ #undef SnP_ExtractLanesAll
286
+ #undef SnP_ExtractAndAddBytes
287
+ #undef SnP_ExtractAndAddLanesAll
@@ -0,0 +1,140 @@
1
+ /*
2
+ Implementation by the Keccak Team, namely, Guido Bertoni, Joan Daemen,
3
+ Michaël Peeters, Gilles Van Assche and Ronny Van Keer,
4
+ hereby denoted as "the implementer".
5
+
6
+ For more information, feedback or questions, please refer to our website:
7
+ https://keccak.team/
8
+
9
+ To the extent possible under law, the implementer has waived all copyright
10
+ and related or neighboring rights to the source code in this file.
11
+ http://creativecommons.org/publicdomain/zero/1.0/
12
+
13
+ ---
14
+
15
+ This file contains macros that help implement a permutation in a SnP-compatible way.
16
+ It converts an implementation that implement state input/output functions
17
+ in a lane-oriented fashion (i.e., using SnP_AddLanes() and SnP_AddBytesInLane,
18
+ and similarly for Overwite, Extract and ExtractAndAdd) to the byte-oriented SnP.
19
+ Please refer to SnP-documentation.h for more details.
20
+ */
21
+
22
+ #ifndef _SnP_Relaned_h_
23
+ #define _SnP_Relaned_h_
24
+
25
+ #define SnP_AddBytes(state, data, offset, length, SnP_AddLanes, SnP_AddBytesInLane, SnP_laneLengthInBytes) \
26
+ { \
27
+ if ((offset) == 0) { \
28
+ SnP_AddLanes(state, data, (length)/SnP_laneLengthInBytes); \
29
+ SnP_AddBytesInLane(state, \
30
+ (length)/SnP_laneLengthInBytes, \
31
+ (data)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \
32
+ 0, \
33
+ (length)%SnP_laneLengthInBytes); \
34
+ } \
35
+ else { \
36
+ unsigned int _sizeLeft = (length); \
37
+ unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \
38
+ unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \
39
+ const unsigned char *_curData = (data); \
40
+ while(_sizeLeft > 0) { \
41
+ unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \
42
+ if (_bytesInLane > _sizeLeft) \
43
+ _bytesInLane = _sizeLeft; \
44
+ SnP_AddBytesInLane(state, _lanePosition, _curData, _offsetInLane, _bytesInLane); \
45
+ _sizeLeft -= _bytesInLane; \
46
+ _lanePosition++; \
47
+ _offsetInLane = 0; \
48
+ _curData += _bytesInLane; \
49
+ } \
50
+ } \
51
+ }
52
+
53
+ #define SnP_OverwriteBytes(state, data, offset, length, SnP_OverwriteLanes, SnP_OverwriteBytesInLane, SnP_laneLengthInBytes) \
54
+ { \
55
+ if ((offset) == 0) { \
56
+ SnP_OverwriteLanes(state, data, (length)/SnP_laneLengthInBytes); \
57
+ SnP_OverwriteBytesInLane(state, \
58
+ (length)/SnP_laneLengthInBytes, \
59
+ (data)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \
60
+ 0, \
61
+ (length)%SnP_laneLengthInBytes); \
62
+ } \
63
+ else { \
64
+ unsigned int _sizeLeft = (length); \
65
+ unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \
66
+ unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \
67
+ const unsigned char *_curData = (data); \
68
+ while(_sizeLeft > 0) { \
69
+ unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \
70
+ if (_bytesInLane > _sizeLeft) \
71
+ _bytesInLane = _sizeLeft; \
72
+ SnP_OverwriteBytesInLane(state, _lanePosition, _curData, _offsetInLane, _bytesInLane); \
73
+ _sizeLeft -= _bytesInLane; \
74
+ _lanePosition++; \
75
+ _offsetInLane = 0; \
76
+ _curData += _bytesInLane; \
77
+ } \
78
+ } \
79
+ }
80
+
81
+ #define SnP_ExtractBytes(state, data, offset, length, SnP_ExtractLanes, SnP_ExtractBytesInLane, SnP_laneLengthInBytes) \
82
+ { \
83
+ if ((offset) == 0) { \
84
+ SnP_ExtractLanes(state, data, (length)/SnP_laneLengthInBytes); \
85
+ SnP_ExtractBytesInLane(state, \
86
+ (length)/SnP_laneLengthInBytes, \
87
+ (data)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \
88
+ 0, \
89
+ (length)%SnP_laneLengthInBytes); \
90
+ } \
91
+ else { \
92
+ unsigned int _sizeLeft = (length); \
93
+ unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \
94
+ unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \
95
+ unsigned char *_curData = (data); \
96
+ while(_sizeLeft > 0) { \
97
+ unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \
98
+ if (_bytesInLane > _sizeLeft) \
99
+ _bytesInLane = _sizeLeft; \
100
+ SnP_ExtractBytesInLane(state, _lanePosition, _curData, _offsetInLane, _bytesInLane); \
101
+ _sizeLeft -= _bytesInLane; \
102
+ _lanePosition++; \
103
+ _offsetInLane = 0; \
104
+ _curData += _bytesInLane; \
105
+ } \
106
+ } \
107
+ }
108
+
109
+ #define SnP_ExtractAndAddBytes(state, input, output, offset, length, SnP_ExtractAndAddLanes, SnP_ExtractAndAddBytesInLane, SnP_laneLengthInBytes) \
110
+ { \
111
+ if ((offset) == 0) { \
112
+ SnP_ExtractAndAddLanes(state, input, output, (length)/SnP_laneLengthInBytes); \
113
+ SnP_ExtractAndAddBytesInLane(state, \
114
+ (length)/SnP_laneLengthInBytes, \
115
+ (input)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \
116
+ (output)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \
117
+ 0, \
118
+ (length)%SnP_laneLengthInBytes); \
119
+ } \
120
+ else { \
121
+ unsigned int _sizeLeft = (length); \
122
+ unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \
123
+ unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \
124
+ const unsigned char *_curInput = (input); \
125
+ unsigned char *_curOutput = (output); \
126
+ while(_sizeLeft > 0) { \
127
+ unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \
128
+ if (_bytesInLane > _sizeLeft) \
129
+ _bytesInLane = _sizeLeft; \
130
+ SnP_ExtractAndAddBytesInLane(state, _lanePosition, _curInput, _curOutput, _offsetInLane, _bytesInLane); \
131
+ _sizeLeft -= _bytesInLane; \
132
+ _lanePosition++; \
133
+ _offsetInLane = 0; \
134
+ _curInput += _bytesInLane; \
135
+ _curOutput += _bytesInLane; \
136
+ } \
137
+ } \
138
+ }
139
+
140
+ #endif
@@ -0,0 +1 @@
1
+ #include "../../ext.c"
@@ -0,0 +1 @@
1
+ #include "../common/KangarooTwelve.c"
@@ -0,0 +1 @@
1
+ #include "../common/KeccakDuplexWidth1600.c"
@@ -0,0 +1,993 @@
1
+ # Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org>
2
+ # Copyright (c) 2017 Ronny Van Keer
3
+ # All rights reserved.
4
+ #
5
+ # The source code in this file is licensed under the CRYPTOGAMS license.
6
+ # For further details see http://www.openssl.org/~appro/cryptogams/.
7
+ #
8
+ # Notes:
9
+ # The code for the permutation (__KeccakF1600) was generated with
10
+ # Andy Polyakov's keccak1600-avx2.pl from the CRYPTOGAMS project
11
+ # (https://github.com/dot-asm/cryptogams/blob/master/x86_64/keccak1600-avx2.pl).
12
+ # The rest of the code was written by Ronny Van Keer.
13
+
14
+ .text
15
+
16
+ # -----------------------------------------------------------------------------
17
+ #
18
+ # void KeccakP1600_Initialize(void *state);
19
+ #
20
+ .globl KeccakP1600_Initialize
21
+ .type KeccakP1600_Initialize,@function
22
+ .align 32
23
+ KeccakP1600_Initialize:
24
+ vpxor %ymm0,%ymm0,%ymm0
25
+ vmovdqa %ymm0,0*32(%rdi)
26
+ vmovdqa %ymm0,1*32(%rdi)
27
+ vmovdqa %ymm0,2*32(%rdi)
28
+ vmovdqa %ymm0,3*32(%rdi)
29
+ vmovdqa %ymm0,4*32(%rdi)
30
+ vmovdqa %ymm0,5*32(%rdi)
31
+ movq $0,6*32(%rdi)
32
+ ret
33
+ .size KeccakP1600_Initialize,.-KeccakP1600_Initialize
34
+
35
+ # -----------------------------------------------------------------------------
36
+ #
37
+ # void KeccakP1600_AddByte(void *state, unsigned char data, unsigned int offset);
38
+ # %rdi %rsi %rdx
39
+ #
40
+ .globl KeccakP1600_AddByte
41
+ .type KeccakP1600_AddByte,@function
42
+ .align 32
43
+ KeccakP1600_AddByte:
44
+ mov %rdx, %rax
45
+ and $7, %rax
46
+ and $0xFFFFFFF8, %edx
47
+ mov mapState(%rdx), %rdx
48
+ add %rdx, %rdi
49
+ add %rax, %rdi
50
+ xorb %sil, (%rdi)
51
+ ret
52
+ .size KeccakP1600_AddByte,.-KeccakP1600_AddByte
53
+
54
+ # -----------------------------------------------------------------------------
55
+ #
56
+ # void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
57
+ # %rdi %rsi %rdx %rcx
58
+ #
59
+ .globl KeccakP1600_AddBytes
60
+ .type KeccakP1600_AddBytes,@function
61
+ .align 32
62
+ KeccakP1600_AddBytes:
63
+ cmp $0, %rcx
64
+ jz KeccakP1600_AddBytes_Exit
65
+ mov %rdx, %rax # rax offset in lane
66
+ and $0xFFFFFFF8, %edx # rdx pointer into state index mapper
67
+ lea mapState(%rdx), %rdx
68
+ and $7, %rax
69
+ jz KeccakP1600_AddBytes_LaneAlignedCheck
70
+ mov $8, %r9 # r9 is (max) length of incomplete lane
71
+ sub %rax, %r9
72
+ cmp %rcx, %r9
73
+ cmovae %rcx, %r9
74
+ sub %r9, %rcx # length -= length of incomplete lane
75
+ add (%rdx), %rax # rax = pointer to state lane
76
+ add $8, %rdx
77
+ add %rdi, %rax
78
+ KeccakP1600_AddBytes_NotAlignedLoop:
79
+ mov (%rsi), %r8b
80
+ inc %rsi
81
+ xorb %r8b, (%rax)
82
+ inc %rax
83
+ dec %r9
84
+ jnz KeccakP1600_AddBytes_NotAlignedLoop
85
+ jmp KeccakP1600_AddBytes_LaneAlignedCheck
86
+ KeccakP1600_AddBytes_LaneAlignedLoop:
87
+ mov (%rsi), %r8
88
+ add $8, %rsi
89
+ mov (%rdx), %rax
90
+ add $8, %rdx
91
+ add %rdi, %rax
92
+ xor %r8, (%rax)
93
+ KeccakP1600_AddBytes_LaneAlignedCheck:
94
+ sub $8, %rcx
95
+ jnc KeccakP1600_AddBytes_LaneAlignedLoop
96
+ KeccakP1600_AddBytes_LastIncompleteLane:
97
+ add $8, %rcx
98
+ jz KeccakP1600_AddBytes_Exit
99
+ mov (%rdx), %rax
100
+ add %rdi, %rax
101
+ KeccakP1600_AddBytes_LastIncompleteLaneLoop:
102
+ mov (%rsi), %r8b
103
+ inc %rsi
104
+ xor %r8b, (%rax)
105
+ inc %rax
106
+ dec %rcx
107
+ jnz KeccakP1600_AddBytes_LastIncompleteLaneLoop
108
+ KeccakP1600_AddBytes_Exit:
109
+ ret
110
+ .size KeccakP1600_AddBytes,.-KeccakP1600_AddBytes
111
+
112
+ # -----------------------------------------------------------------------------
113
+ #
114
+ # void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
115
+ # %rdi %rsi %rdx %rcx
116
+ #
117
+ .globl KeccakP1600_OverwriteBytes
118
+ .type KeccakP1600_OverwriteBytes,@function
119
+ .align 32
120
+ KeccakP1600_OverwriteBytes:
121
+ cmp $0, %rcx
122
+ jz KeccakP1600_OverwriteBytes_Exit
123
+ mov %rdx, %rax # rax offset in lane
124
+ and $0xFFFFFFF8, %edx # rdx pointer into state index mapper
125
+ lea mapState(%rdx), %rdx
126
+ and $7, %rax
127
+ jz KeccakP1600_OverwriteBytes_LaneAlignedCheck
128
+ mov $8, %r9 # r9 is (max) length of incomplete lane
129
+ sub %rax, %r9
130
+ cmp %rcx, %r9
131
+ cmovae %rcx, %r9
132
+ sub %r9, %rcx # length -= length of incomplete lane
133
+ add (%rdx), %rax # rax = pointer to state lane
134
+ add $8, %rdx
135
+ add %rdi, %rax
136
+ KeccakP1600_OverwriteBytes_NotAlignedLoop:
137
+ mov (%rsi), %r8b
138
+ inc %rsi
139
+ mov %r8b, (%rax)
140
+ inc %rax
141
+ dec %r9
142
+ jnz KeccakP1600_OverwriteBytes_NotAlignedLoop
143
+ jmp KeccakP1600_OverwriteBytes_LaneAlignedCheck
144
+ KeccakP1600_OverwriteBytes_LaneAlignedLoop:
145
+ mov (%rsi), %r8
146
+ add $8, %rsi
147
+ mov (%rdx), %rax
148
+ add $8, %rdx
149
+ add %rdi, %rax
150
+ mov %r8, (%rax)
151
+ KeccakP1600_OverwriteBytes_LaneAlignedCheck:
152
+ sub $8, %rcx
153
+ jnc KeccakP1600_OverwriteBytes_LaneAlignedLoop
154
+ KeccakP1600_OverwriteBytes_LastIncompleteLane:
155
+ add $8, %rcx
156
+ jz KeccakP1600_OverwriteBytes_Exit
157
+ mov (%rdx), %rax
158
+ add %rdi, %rax
159
+ KeccakP1600_OverwriteBytes_LastIncompleteLaneLoop:
160
+ mov (%rsi), %r8b
161
+ inc %rsi
162
+ mov %r8b, (%rax)
163
+ inc %rax
164
+ dec %rcx
165
+ jnz KeccakP1600_OverwriteBytes_LastIncompleteLaneLoop
166
+ KeccakP1600_OverwriteBytes_Exit:
167
+ ret
168
+ .size KeccakP1600_OverwriteBytes,.-KeccakP1600_OverwriteBytes
169
+
170
+ # -----------------------------------------------------------------------------
171
+ #
172
+ # void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount);
173
+ # %rdi %rsi
174
+ #
175
+ .globl KeccakP1600_OverwriteWithZeroes
176
+ .type KeccakP1600_OverwriteWithZeroes,@function
177
+ .align 32
178
+ KeccakP1600_OverwriteWithZeroes:
179
+ cmp $0, %rsi
180
+ jz KeccakP1600_OverwriteWithZeroes_Exit
181
+ lea mapState, %rdx # rdx pointer into state index mapper
182
+ jmp KeccakP1600_OverwriteWithZeroes_LaneAlignedCheck
183
+ KeccakP1600_OverwriteWithZeroes_LaneAlignedLoop:
184
+ mov (%rdx), %rax
185
+ add $8, %rdx
186
+ add %rdi, %rax
187
+ movq $0, (%rax)
188
+ KeccakP1600_OverwriteWithZeroes_LaneAlignedCheck:
189
+ sub $8, %rsi
190
+ jnc KeccakP1600_OverwriteWithZeroes_LaneAlignedLoop
191
+ KeccakP1600_OverwriteWithZeroes_LastIncompleteLane:
192
+ add $8, %rsi
193
+ jz KeccakP1600_OverwriteWithZeroes_Exit
194
+ mov (%rdx), %rax
195
+ add %rdi, %rax
196
+ KeccakP1600_OverwriteWithZeroes_LastIncompleteLaneLoop:
197
+ movb $0, (%rax)
198
+ inc %rax
199
+ dec %rsi
200
+ jnz KeccakP1600_OverwriteWithZeroes_LastIncompleteLaneLoop
201
+ KeccakP1600_OverwriteWithZeroes_Exit:
202
+ ret
203
+ .size KeccakP1600_OverwriteWithZeroes,.-KeccakP1600_OverwriteWithZeroes
204
+
205
+ # -----------------------------------------------------------------------------
206
+ #
207
+ # void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length);
208
+ # %rdi %rsi %rdx %rcx
209
+ #
210
+ .globl KeccakP1600_ExtractBytes
211
+ .type KeccakP1600_ExtractBytes,@function
212
+ .align 32
213
+ KeccakP1600_ExtractBytes:
214
+ push %rbx
215
+ cmp $0, %rcx
216
+ jz KeccakP1600_ExtractBytes_Exit
217
+ mov %rdx, %rax # rax offset in lane
218
+ and $0xFFFFFFF8, %edx # rdx pointer into state index mapper
219
+ lea mapState(%rdx), %rdx
220
+ and $7, %rax
221
+ jz KeccakP1600_ExtractBytes_LaneAlignedCheck
222
+ mov $8, %rbx # rbx is (max) length of incomplete lane
223
+ sub %rax, %rbx
224
+ cmp %rcx, %rbx
225
+ cmovae %rcx, %rbx
226
+ sub %rbx, %rcx # length -= length of incomplete lane
227
+ mov (%rdx), %r9
228
+ add $8, %rdx
229
+ add %rdi, %r9
230
+ add %rax, %r9
231
+ KeccakP1600_ExtractBytes_NotAlignedLoop:
232
+ mov (%r9), %r8b
233
+ inc %r9
234
+ mov %r8b, (%rsi)
235
+ inc %rsi
236
+ dec %rbx
237
+ jnz KeccakP1600_ExtractBytes_NotAlignedLoop
238
+ jmp KeccakP1600_ExtractBytes_LaneAlignedCheck
239
+ KeccakP1600_ExtractBytes_LaneAlignedLoop:
240
+ mov (%rdx), %rax
241
+ add $8, %rdx
242
+ add %rdi, %rax
243
+ mov (%rax), %r8
244
+ mov %r8, (%rsi)
245
+ add $8, %rsi
246
+ KeccakP1600_ExtractBytes_LaneAlignedCheck:
247
+ sub $8, %rcx
248
+ jnc KeccakP1600_ExtractBytes_LaneAlignedLoop
249
+ KeccakP1600_ExtractBytes_LastIncompleteLane:
250
+ add $8, %rcx
251
+ jz KeccakP1600_ExtractBytes_Exit
252
+ mov (%rdx), %rax
253
+ add %rdi, %rax
254
+ mov (%rax), %r8
255
+ KeccakP1600_ExtractBytes_LastIncompleteLaneLoop:
256
+ mov %r8b, (%rsi)
257
+ shr $8, %r8
258
+ inc %rsi
259
+ dec %rcx
260
+ jnz KeccakP1600_ExtractBytes_LastIncompleteLaneLoop
261
+ KeccakP1600_ExtractBytes_Exit:
262
+ pop %rbx
263
+ ret
264
+ .size KeccakP1600_ExtractBytes,.-KeccakP1600_ExtractBytes
265
+
266
+ # -----------------------------------------------------------------------------
267
+ #
268
+ # void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length);
269
+ # %rdi %rsi %rdx %rcx %r8
270
+ #
271
+ .globl KeccakP1600_ExtractAndAddBytes
272
+ .type KeccakP1600_ExtractAndAddBytes,@function
273
+ .align 32
274
+ KeccakP1600_ExtractAndAddBytes:
275
+ push %rbx
276
+ push %r10
277
+ cmp $0, %r8
278
+ jz KeccakP1600_ExtractAndAddBytes_Exit
279
+ mov %rcx, %rax # rax offset in lane
280
+ and $0xFFFFFFF8, %ecx # rcx pointer into state index mapper
281
+ lea mapState(%rcx), %rcx
282
+ and $7, %rax
283
+ jz KeccakP1600_ExtractAndAddBytes_LaneAlignedCheck
284
+ mov $8, %rbx # rbx is (max) length of incomplete lane
285
+ sub %rax, %rbx
286
+ cmp %r8, %rbx
287
+ cmovae %r8, %rbx
288
+ sub %rbx, %r8 # length -= length of incomplete lane
289
+ mov (%rcx), %r9
290
+ add $8, %rcx
291
+ add %rdi, %r9
292
+ add %rax, %r9
293
+ KeccakP1600_ExtractAndAddBytes_NotAlignedLoop:
294
+ mov (%r9), %r10b
295
+ inc %r9
296
+ xor (%rsi), %r10b
297
+ inc %rsi
298
+ mov %r10b, (%rdx)
299
+ inc %rdx
300
+ dec %rbx
301
+ jnz KeccakP1600_ExtractAndAddBytes_NotAlignedLoop
302
+ jmp KeccakP1600_ExtractAndAddBytes_LaneAlignedCheck
303
+ KeccakP1600_ExtractAndAddBytes_LaneAlignedLoop:
304
+ mov (%rcx), %rax
305
+ add $8, %rcx
306
+ add %rdi, %rax
307
+ mov (%rax), %r10
308
+ xor (%rsi), %r10
309
+ add $8, %rsi
310
+ mov %r10, (%rdx)
311
+ add $8, %rdx
312
+ KeccakP1600_ExtractAndAddBytes_LaneAlignedCheck:
313
+ sub $8, %r8
314
+ jnc KeccakP1600_ExtractAndAddBytes_LaneAlignedLoop
315
+ KeccakP1600_ExtractAndAddBytes_LastIncompleteLane:
316
+ add $8, %r8
317
+ jz KeccakP1600_ExtractAndAddBytes_Exit
318
+ mov (%rcx), %rax
319
+ add %rdi, %rax
320
+ mov (%rax), %r10
321
+ KeccakP1600_ExtractAndAddBytes_LastIncompleteLaneLoop:
322
+ xor (%rsi), %r10b
323
+ inc %rsi
324
+ mov %r10b, (%rdx)
325
+ inc %rdx
326
+ shr $8, %r10
327
+ dec %r8
328
+ jnz KeccakP1600_ExtractAndAddBytes_LastIncompleteLaneLoop
329
+ KeccakP1600_ExtractAndAddBytes_Exit:
330
+ pop %r10
331
+ pop %rbx
332
+ ret
333
+ .size KeccakP1600_ExtractAndAddBytes,.-KeccakP1600_ExtractAndAddBytes
334
+
335
+ # -----------------------------------------------------------------------------
336
+ #
337
+ # internal
338
+ #
339
+ .type __KeccakF1600,@function
340
+ .align 32
341
+ __KeccakF1600:
342
+ .Loop_avx2:
343
+ ######################################### Theta
344
+ vpshufd $0b01001110,%ymm2,%ymm13
345
+ vpxor %ymm3,%ymm5,%ymm12
346
+ vpxor %ymm6,%ymm4,%ymm9
347
+ vpxor %ymm1,%ymm12,%ymm12
348
+ vpxor %ymm9,%ymm12,%ymm12 # C[1..4]
349
+
350
+ vpermq $0b10010011,%ymm12,%ymm11
351
+ vpxor %ymm2,%ymm13,%ymm13
352
+ vpermq $0b01001110,%ymm13,%ymm7
353
+
354
+ vpsrlq $63,%ymm12,%ymm8
355
+ vpaddq %ymm12,%ymm12,%ymm9
356
+ vpor %ymm9,%ymm8,%ymm8 # ROL64(C[1..4],1)
357
+
358
+ vpermq $0b00111001,%ymm8,%ymm15
359
+ vpxor %ymm11,%ymm8,%ymm14
360
+ vpermq $0b00000000,%ymm14,%ymm14 # D[0..0] = ROL64(C[1],1) ^ C[4]
361
+
362
+ vpxor %ymm0,%ymm13,%ymm13
363
+ vpxor %ymm7,%ymm13,%ymm13 # C[0..0]
364
+
365
+ vpsrlq $63,%ymm13,%ymm7
366
+ vpaddq %ymm13,%ymm13,%ymm8
367
+ vpor %ymm7,%ymm8,%ymm8 # ROL64(C[0..0],1)
368
+
369
+ vpxor %ymm14,%ymm2,%ymm2 # ^= D[0..0]
370
+ vpxor %ymm14,%ymm0,%ymm0 # ^= D[0..0]
371
+
372
+ vpblendd $0b11000000,%ymm8,%ymm15,%ymm15
373
+ vpblendd $0b00000011,%ymm13,%ymm11,%ymm11
374
+ vpxor %ymm11,%ymm15,%ymm15 # D[1..4] = ROL64(C[2..4,0),1) ^ C[0..3]
375
+
376
+ ######################################### Rho + Pi + pre-Chi shuffle
377
+ vpsllvq 0*32-96(%r8),%ymm2,%ymm10
378
+ vpsrlvq 0*32-96(%r9),%ymm2,%ymm2
379
+ vpor %ymm10,%ymm2,%ymm2
380
+
381
+ vpxor %ymm15,%ymm3,%ymm3 # ^= D[1..4] from Theta
382
+ vpsllvq 2*32-96(%r8),%ymm3,%ymm11
383
+ vpsrlvq 2*32-96(%r9),%ymm3,%ymm3
384
+ vpor %ymm11,%ymm3,%ymm3
385
+
386
+ vpxor %ymm15,%ymm4,%ymm4 # ^= D[1..4] from Theta
387
+ vpsllvq 3*32-96(%r8),%ymm4,%ymm12
388
+ vpsrlvq 3*32-96(%r9),%ymm4,%ymm4
389
+ vpor %ymm12,%ymm4,%ymm4
390
+
391
+ vpxor %ymm15,%ymm5,%ymm5 # ^= D[1..4] from Theta
392
+ vpsllvq 4*32-96(%r8),%ymm5,%ymm13
393
+ vpsrlvq 4*32-96(%r9),%ymm5,%ymm5
394
+ vpor %ymm13,%ymm5,%ymm5
395
+
396
+ vpxor %ymm15,%ymm6,%ymm6 # ^= D[1..4] from Theta
397
+ vpermq $0b10001101,%ymm2,%ymm10 # %ymm2 -> future %ymm3
398
+ vpermq $0b10001101,%ymm3,%ymm11 # %ymm3 -> future %ymm4
399
+ vpsllvq 5*32-96(%r8),%ymm6,%ymm14
400
+ vpsrlvq 5*32-96(%r9),%ymm6,%ymm8
401
+ vpor %ymm14,%ymm8,%ymm8 # %ymm6 -> future %ymm1
402
+
403
+ vpxor %ymm15,%ymm1,%ymm1 # ^= D[1..4] from Theta
404
+ vpermq $0b00011011,%ymm4,%ymm12 # %ymm4 -> future %ymm5
405
+ vpermq $0b01110010,%ymm5,%ymm13 # %ymm5 -> future %ymm6
406
+ vpsllvq 1*32-96(%r8),%ymm1,%ymm15
407
+ vpsrlvq 1*32-96(%r9),%ymm1,%ymm9
408
+ vpor %ymm15,%ymm9,%ymm9 # %ymm1 -> future %ymm2
409
+
410
+ ######################################### Chi
411
+ vpsrldq $8,%ymm8,%ymm14
412
+ vpandn %ymm14,%ymm8,%ymm7 # tgting [0][0] [0][0] [0][0] [0][0]
413
+
414
+ vpblendd $0b00001100,%ymm13,%ymm9,%ymm3 # [4][4] [2][0]
415
+ vpblendd $0b00001100,%ymm9,%ymm11,%ymm15 # [4][0] [2][1]
416
+ vpblendd $0b00001100,%ymm11,%ymm10,%ymm5 # [4][2] [2][4]
417
+ vpblendd $0b00001100,%ymm10,%ymm9,%ymm14 # [4][3] [2][0]
418
+ vpblendd $0b00110000,%ymm11,%ymm3,%ymm3 # [1][3] [4][4] [2][0]
419
+ vpblendd $0b00110000,%ymm12,%ymm15,%ymm15 # [1][4] [4][0] [2][1]
420
+ vpblendd $0b00110000,%ymm9,%ymm5,%ymm5 # [1][0] [4][2] [2][4]
421
+ vpblendd $0b00110000,%ymm13,%ymm14,%ymm14 # [1][1] [4][3] [2][0]
422
+ vpblendd $0b11000000,%ymm12,%ymm3,%ymm3 # [3][2] [1][3] [4][4] [2][0]
423
+ vpblendd $0b11000000,%ymm13,%ymm15,%ymm15 # [3][3] [1][4] [4][0] [2][1]
424
+ vpblendd $0b11000000,%ymm13,%ymm5,%ymm5 # [3][3] [1][0] [4][2] [2][4]
425
+ vpblendd $0b11000000,%ymm11,%ymm14,%ymm14 # [3][4] [1][1] [4][3] [2][0]
426
+ vpandn %ymm15,%ymm3,%ymm3 # tgting [3][1] [1][2] [4][3] [2][4]
427
+ vpandn %ymm14,%ymm5,%ymm5 # tgting [3][2] [1][4] [4][1] [2][3]
428
+
429
+ vpblendd $0b00001100,%ymm9,%ymm12,%ymm6 # [4][0] [2][3]
430
+ vpblendd $0b00001100,%ymm12,%ymm10,%ymm15 # [4][1] [2][4]
431
+ vpxor %ymm10,%ymm3,%ymm3
432
+ vpblendd $0b00110000,%ymm10,%ymm6,%ymm6 # [1][2] [4][0] [2][3]
433
+ vpblendd $0b00110000,%ymm11,%ymm15,%ymm15 # [1][3] [4][1] [2][4]
434
+ vpxor %ymm12,%ymm5,%ymm5
435
+ vpblendd $0b11000000,%ymm11,%ymm6,%ymm6 # [3][4] [1][2] [4][0] [2][3]
436
+ vpblendd $0b11000000,%ymm9,%ymm15,%ymm15 # [3][0] [1][3] [4][1] [2][4]
437
+ vpandn %ymm15,%ymm6,%ymm6 # tgting [3][3] [1][1] [4][4] [2][2]
438
+ vpxor %ymm13,%ymm6,%ymm6
439
+
440
+ vpermq $0b00011110,%ymm8,%ymm4 # [0][1] [0][2] [0][4] [0][3]
441
+ vpblendd $0b00110000,%ymm0,%ymm4,%ymm15 # [0][1] [0][0] [0][4] [0][3]
442
+ vpermq $0b00111001,%ymm8,%ymm1 # [0][1] [0][4] [0][3] [0][2]
443
+ vpblendd $0b11000000,%ymm0,%ymm1,%ymm1 # [0][0] [0][4] [0][3] [0][2]
444
+ vpandn %ymm15,%ymm1,%ymm1 # tgting [0][4] [0][3] [0][2] [0][1]
445
+
446
+ vpblendd $0b00001100,%ymm12,%ymm11,%ymm2 # [4][1] [2][1]
447
+ vpblendd $0b00001100,%ymm11,%ymm13,%ymm14 # [4][2] [2][2]
448
+ vpblendd $0b00110000,%ymm13,%ymm2,%ymm2 # [1][1] [4][1] [2][1]
449
+ vpblendd $0b00110000,%ymm10,%ymm14,%ymm14 # [1][2] [4][2] [2][2]
450
+ vpblendd $0b11000000,%ymm10,%ymm2,%ymm2 # [3][1] [1][1] [4][1] [2][1]
451
+ vpblendd $0b11000000,%ymm12,%ymm14,%ymm14 # [3][2] [1][2] [4][2] [2][2]
452
+ vpandn %ymm14,%ymm2,%ymm2 # tgting [3][0] [1][0] [4][0] [2][0]
453
+ vpxor %ymm9,%ymm2,%ymm2
454
+
455
+ vpermq $0b00000000,%ymm7,%ymm7 # [0][0] [0][0] [0][0] [0][0]
456
+ vpermq $0b00011011,%ymm3,%ymm3 # post-Chi shuffle
457
+ vpermq $0b10001101,%ymm5,%ymm5
458
+ vpermq $0b01110010,%ymm6,%ymm6
459
+
460
+ vpblendd $0b00001100,%ymm10,%ymm13,%ymm4 # [4][3] [2][2]
461
+ vpblendd $0b00001100,%ymm13,%ymm12,%ymm14 # [4][4] [2][3]
462
+ vpblendd $0b00110000,%ymm12,%ymm4,%ymm4 # [1][4] [4][3] [2][2]
463
+ vpblendd $0b00110000,%ymm9,%ymm14,%ymm14 # [1][0] [4][4] [2][3]
464
+ vpblendd $0b11000000,%ymm9,%ymm4,%ymm4 # [3][0] [1][4] [4][3] [2][2]
465
+ vpblendd $0b11000000,%ymm10,%ymm14,%ymm14 # [3][1] [1][0] [4][4] [2][3]
466
+ vpandn %ymm14,%ymm4,%ymm4 # tgting [3][4] [1][3] [4][2] [2][1]
467
+
468
+ vpxor %ymm7,%ymm0,%ymm0
469
+ vpxor %ymm8,%ymm1,%ymm1
470
+ vpxor %ymm11,%ymm4,%ymm4
471
+
472
+ ######################################### Iota
473
+ vpxor (%r10),%ymm0,%ymm0
474
+ lea 32(%r10),%r10
475
+
476
+ dec %eax
477
+ jnz .Loop_avx2
478
+ ret
479
+ .size __KeccakF1600,.-__KeccakF1600
480
+
481
+
482
+
483
+ .globl KeccakP1600_Permute_24rounds
484
+ .type KeccakP1600_Permute_24rounds,@function
485
+ .align 32
486
+ KeccakP1600_Permute_24rounds:
487
+ lea rhotates_left+96(%rip),%r8
488
+ lea rhotates_right+96(%rip),%r9
489
+ lea iotas(%rip),%r10
490
+ mov $24,%eax
491
+ lea 96(%rdi),%rdi
492
+ vzeroupper
493
+ vpbroadcastq -96(%rdi),%ymm0 # load A[5][5]
494
+ vmovdqu 8+32*0-96(%rdi),%ymm1
495
+ vmovdqu 8+32*1-96(%rdi),%ymm2
496
+ vmovdqu 8+32*2-96(%rdi),%ymm3
497
+ vmovdqu 8+32*3-96(%rdi),%ymm4
498
+ vmovdqu 8+32*4-96(%rdi),%ymm5
499
+ vmovdqu 8+32*5-96(%rdi),%ymm6
500
+ call __KeccakF1600
501
+ vmovq %xmm0,-96(%rdi)
502
+ vmovdqu %ymm1,8+32*0-96(%rdi)
503
+ vmovdqu %ymm2,8+32*1-96(%rdi)
504
+ vmovdqu %ymm3,8+32*2-96(%rdi)
505
+ vmovdqu %ymm4,8+32*3-96(%rdi)
506
+ vmovdqu %ymm5,8+32*4-96(%rdi)
507
+ vmovdqu %ymm6,8+32*5-96(%rdi)
508
+ vzeroupper
509
+ ret
510
+ .size KeccakP1600_Permute_24rounds,.-KeccakP1600_Permute_24rounds
511
+
512
+ .globl KeccakP1600_Permute_12rounds
513
+ .type KeccakP1600_Permute_12rounds,@function
514
+ .align 32
515
+ KeccakP1600_Permute_12rounds:
516
+ lea rhotates_left+96(%rip),%r8
517
+ lea rhotates_right+96(%rip),%r9
518
+ lea iotas+12*4*8(%rip),%r10
519
+ mov $12,%eax
520
+ lea 96(%rdi),%rdi
521
+ vzeroupper
522
+ vpbroadcastq -96(%rdi),%ymm0 # load A[5][5]
523
+ vmovdqu 8+32*0-96(%rdi),%ymm1
524
+ vmovdqu 8+32*1-96(%rdi),%ymm2
525
+ vmovdqu 8+32*2-96(%rdi),%ymm3
526
+ vmovdqu 8+32*3-96(%rdi),%ymm4
527
+ vmovdqu 8+32*4-96(%rdi),%ymm5
528
+ vmovdqu 8+32*5-96(%rdi),%ymm6
529
+ call __KeccakF1600
530
+ vmovq %xmm0,-96(%rdi)
531
+ vmovdqu %ymm1,8+32*0-96(%rdi)
532
+ vmovdqu %ymm2,8+32*1-96(%rdi)
533
+ vmovdqu %ymm3,8+32*2-96(%rdi)
534
+ vmovdqu %ymm4,8+32*3-96(%rdi)
535
+ vmovdqu %ymm5,8+32*4-96(%rdi)
536
+ vmovdqu %ymm6,8+32*5-96(%rdi)
537
+ vzeroupper
538
+ ret
539
+ .size KeccakP1600_Permute_12rounds,.-KeccakP1600_Permute_12rounds
540
+
541
+ .globl KeccakP1600_Permute_Nrounds
542
+ .type KeccakP1600_Permute_Nrounds,@function
543
+ .align 32
544
+ KeccakP1600_Permute_Nrounds:
545
+ lea rhotates_left+96(%rip),%r8
546
+ lea rhotates_right+96(%rip),%r9
547
+ lea iotas+24*4*8(%rip),%r10
548
+ mov %rsi,%rax
549
+ shl $2+3,%rsi
550
+ sub %rsi, %r10
551
+ lea 96(%rdi),%rdi
552
+ vzeroupper
553
+ vpbroadcastq -96(%rdi),%ymm0 # load A[5][5]
554
+ vmovdqu 8+32*0-96(%rdi),%ymm1
555
+ vmovdqu 8+32*1-96(%rdi),%ymm2
556
+ vmovdqu 8+32*2-96(%rdi),%ymm3
557
+ vmovdqu 8+32*3-96(%rdi),%ymm4
558
+ vmovdqu 8+32*4-96(%rdi),%ymm5
559
+ vmovdqu 8+32*5-96(%rdi),%ymm6
560
+ call __KeccakF1600
561
+ vmovq %xmm0,-96(%rdi)
562
+ vmovdqu %ymm1,8+32*0-96(%rdi)
563
+ vmovdqu %ymm2,8+32*1-96(%rdi)
564
+ vmovdqu %ymm3,8+32*2-96(%rdi)
565
+ vmovdqu %ymm4,8+32*3-96(%rdi)
566
+ vmovdqu %ymm5,8+32*4-96(%rdi)
567
+ vmovdqu %ymm6,8+32*5-96(%rdi)
568
+ vzeroupper
569
+ ret
570
+ .size KeccakP1600_Permute_Nrounds,.-KeccakP1600_Permute_Nrounds
571
+
572
+ # -----------------------------------------------------------------------------
573
+ #
574
+ # size_t KeccakF1600_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen);
575
+ # %rdi %rsi %rdx %rcx
576
+ #
577
+ .globl KeccakF1600_FastLoop_Absorb
578
+ .type KeccakF1600_FastLoop_Absorb,@function
579
+ .align 32
580
+ KeccakF1600_FastLoop_Absorb:
581
+ push %rbx
582
+ push %r10
583
+ shr $3, %rcx # rcx = data length in lanes
584
+ mov %rdx, %rbx # rbx = initial data pointer
585
+ cmp %rsi, %rcx
586
+ jb KeccakF1600_FastLoop_Absorb_Exit
587
+ vzeroupper
588
+ cmp $21, %rsi
589
+ jnz KeccakF1600_FastLoop_Absorb_Not21Lanes
590
+ sub $21, %rcx
591
+ lea rhotates_left+96(%rip),%r8
592
+ lea rhotates_right+96(%rip),%r9
593
+ lea 96(%rdi),%rdi
594
+ vpbroadcastq -96(%rdi),%ymm0 # load A[5][5]
595
+ vmovdqu 8+32*0-96(%rdi),%ymm1
596
+ vmovdqu 8+32*1-96(%rdi),%ymm2
597
+ vmovdqu 8+32*2-96(%rdi),%ymm3
598
+ vmovdqu 8+32*3-96(%rdi),%ymm4
599
+ vmovdqu 8+32*4-96(%rdi),%ymm5
600
+ vmovdqu 8+32*5-96(%rdi),%ymm6
601
+ KeccakF1600_FastLoop_Absorb_Loop21Lanes:
602
+ vpbroadcastq (%rdx),%ymm7
603
+ vmovdqu 8(%rdx),%ymm8
604
+
605
+ vmovdqa map2(%rip), %xmm15
606
+ vpcmpeqq %ymm14, %ymm14, %ymm14
607
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm9
608
+
609
+ vmovdqa mask3_21(%rip), %ymm14
610
+ vpxor %ymm10, %ymm10, %ymm10
611
+ vmovdqa map3(%rip), %xmm15
612
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm10
613
+
614
+ vmovdqa mask4_21(%rip), %ymm14
615
+ vpxor %ymm11, %ymm11, %ymm11
616
+ vmovdqa map4(%rip), %xmm15
617
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm11
618
+
619
+ vmovdqa mask5_21(%rip), %ymm14
620
+ vpxor %ymm12, %ymm12, %ymm12
621
+ vmovdqa map5(%rip), %xmm15
622
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm12
623
+
624
+ vmovdqa mask6_21(%rip), %ymm14
625
+ vpxor %ymm13, %ymm13, %ymm13
626
+ vmovdqa map6(%rip), %xmm15
627
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm13
628
+
629
+ vpxor %ymm7,%ymm0,%ymm0
630
+ vpxor %ymm8,%ymm1,%ymm1
631
+ vpxor %ymm9,%ymm2,%ymm2
632
+ vpxor %ymm10,%ymm3,%ymm3
633
+ vpxor %ymm11,%ymm4,%ymm4
634
+ vpxor %ymm12,%ymm5,%ymm5
635
+ vpxor %ymm13,%ymm6,%ymm6
636
+ add $21*8, %rdx
637
+ lea iotas(%rip),%r10
638
+ mov $24,%eax
639
+ call __KeccakF1600
640
+ sub $21, %rcx
641
+ jnc KeccakF1600_FastLoop_Absorb_Loop21Lanes
642
+ KeccakF1600_FastLoop_Absorb_SaveAndExit:
643
+ vmovq %xmm0,-96(%rdi)
644
+ vmovdqu %ymm1,8+32*0-96(%rdi)
645
+ vmovdqu %ymm2,8+32*1-96(%rdi)
646
+ vmovdqu %ymm3,8+32*2-96(%rdi)
647
+ vmovdqu %ymm4,8+32*3-96(%rdi)
648
+ vmovdqu %ymm5,8+32*4-96(%rdi)
649
+ vmovdqu %ymm6,8+32*5-96(%rdi)
650
+ KeccakF1600_FastLoop_Absorb_Exit:
651
+ vzeroupper
652
+ mov %rdx, %rax # return number of bytes processed
653
+ sub %rbx, %rax
654
+ pop %r10
655
+ pop %rbx
656
+ ret
657
+ KeccakF1600_FastLoop_Absorb_Not21Lanes:
658
+ cmp $17, %rsi
659
+ jnz KeccakF1600_FastLoop_Absorb_Not17Lanes
660
+ sub $17, %rcx
661
+ lea rhotates_left+96(%rip),%r8
662
+ lea rhotates_right+96(%rip),%r9
663
+ lea 96(%rdi),%rdi
664
+ vpbroadcastq -96(%rdi),%ymm0 # load A[5][5]
665
+ vmovdqu 8+32*0-96(%rdi),%ymm1
666
+ vmovdqu 8+32*1-96(%rdi),%ymm2
667
+ vmovdqu 8+32*2-96(%rdi),%ymm3
668
+ vmovdqu 8+32*3-96(%rdi),%ymm4
669
+ vmovdqu 8+32*4-96(%rdi),%ymm5
670
+ vmovdqu 8+32*5-96(%rdi),%ymm6
671
+ KeccakF1600_FastLoop_Absorb_Loop17Lanes:
672
+ vpbroadcastq (%rdx),%ymm7
673
+ vmovdqu 8(%rdx),%ymm8
674
+
675
+ vmovdqa mask2_17(%rip), %ymm14
676
+ vpxor %ymm9, %ymm9, %ymm9
677
+ vmovdqa map2(%rip), %xmm15
678
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm9
679
+
680
+ vmovdqa mask3_17(%rip), %ymm14
681
+ vpxor %ymm10, %ymm10, %ymm10
682
+ vmovdqa map3(%rip), %xmm15
683
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm10
684
+
685
+ vmovdqa mask4_17(%rip), %ymm14
686
+ vpxor %ymm11, %ymm11, %ymm11
687
+ vmovdqa map4(%rip), %xmm15
688
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm11
689
+
690
+ vmovdqa mask5_17(%rip), %ymm14
691
+ vpxor %ymm12, %ymm12, %ymm12
692
+ vmovdqa map5(%rip), %xmm15
693
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm12
694
+
695
+ vmovdqa mask6_17(%rip), %ymm14
696
+ vpxor %ymm13, %ymm13, %ymm13
697
+ vmovdqa map6(%rip), %xmm15
698
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm13
699
+
700
+ vpxor %ymm7,%ymm0,%ymm0
701
+ vpxor %ymm8,%ymm1,%ymm1
702
+ vpxor %ymm9,%ymm2,%ymm2
703
+ vpxor %ymm10,%ymm3,%ymm3
704
+ vpxor %ymm11,%ymm4,%ymm4
705
+ vpxor %ymm12,%ymm5,%ymm5
706
+ vpxor %ymm13,%ymm6,%ymm6
707
+ add $17*8, %rdx
708
+ lea iotas(%rip),%r10
709
+ mov $24,%eax
710
+ call __KeccakF1600
711
+ sub $17, %rcx
712
+ jnc KeccakF1600_FastLoop_Absorb_Loop17Lanes
713
+ jmp KeccakF1600_FastLoop_Absorb_SaveAndExit
714
+ KeccakF1600_FastLoop_Absorb_Not17Lanes:
715
+ lea mapState(%rip), %r9
716
+ mov %rsi, %rax
717
+ KeccakF1600_FastLoop_Absorb_LanesAddLoop:
718
+ mov (%rdx), %r8
719
+ add $8, %rdx
720
+ mov (%r9), %r10
721
+ add $8, %r9
722
+ add %rdi, %r10
723
+ xor %r8, (%r10)
724
+ sub $1, %rax
725
+ jnz KeccakF1600_FastLoop_Absorb_LanesAddLoop
726
+ sub %rsi, %rcx
727
+ push %rdi
728
+ push %rsi
729
+ push %rdx
730
+ push %rcx
731
+ call KeccakP1600_Permute_24rounds
732
+ pop %rcx
733
+ pop %rdx
734
+ pop %rsi
735
+ pop %rdi
736
+ cmp %rsi, %rcx
737
+ jae KeccakF1600_FastLoop_Absorb_Not17Lanes
738
+ jmp KeccakF1600_FastLoop_Absorb_Exit
739
+ .size KeccakF1600_FastLoop_Absorb,.-KeccakF1600_FastLoop_Absorb
740
+
741
+ # -----------------------------------------------------------------------------
742
+ #
743
+ # size_t KeccakP1600_12rounds_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen);
744
+ # %rdi %rsi %rdx %rcx
745
+ #
746
+ .globl KeccakP1600_12rounds_FastLoop_Absorb
747
+ .type KeccakP1600_12rounds_FastLoop_Absorb,@function
748
+ .align 32
749
+ KeccakP1600_12rounds_FastLoop_Absorb:
750
+ push %rbx
751
+ push %r10
752
+ shr $3, %rcx # rcx = data length in lanes
753
+ mov %rdx, %rbx # rbx = initial data pointer
754
+ cmp %rsi, %rcx
755
+ jb KeccakP1600_12rounds_FastLoop_Absorb_Exit
756
+ vzeroupper
757
+ cmp $21, %rsi
758
+ jnz KeccakP1600_12rounds_FastLoop_Absorb_Not21Lanes
759
+ sub $21, %rcx
760
+ lea rhotates_left+96(%rip),%r8
761
+ lea rhotates_right+96(%rip),%r9
762
+ lea 96(%rdi),%rdi
763
+ vpbroadcastq -96(%rdi),%ymm0 # load A[5][5]
764
+ vmovdqu 8+32*0-96(%rdi),%ymm1
765
+ vmovdqu 8+32*1-96(%rdi),%ymm2
766
+ vmovdqu 8+32*2-96(%rdi),%ymm3
767
+ vmovdqu 8+32*3-96(%rdi),%ymm4
768
+ vmovdqu 8+32*4-96(%rdi),%ymm5
769
+ vmovdqu 8+32*5-96(%rdi),%ymm6
770
+ KeccakP1600_12rounds_FastLoop_Absorb_Loop21Lanes:
771
+ vpbroadcastq (%rdx),%ymm7
772
+ vmovdqu 8(%rdx),%ymm8
773
+
774
+ vmovdqa map2(%rip), %xmm15
775
+ vpcmpeqq %ymm14, %ymm14, %ymm14
776
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm9
777
+
778
+ vmovdqa mask3_21(%rip), %ymm14
779
+ vpxor %ymm10, %ymm10, %ymm10
780
+ vmovdqa map3(%rip), %xmm15
781
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm10
782
+
783
+ vmovdqa mask4_21(%rip), %ymm14
784
+ vpxor %ymm11, %ymm11, %ymm11
785
+ vmovdqa map4(%rip), %xmm15
786
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm11
787
+
788
+ vmovdqa mask5_21(%rip), %ymm14
789
+ vpxor %ymm12, %ymm12, %ymm12
790
+ vmovdqa map5(%rip), %xmm15
791
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm12
792
+
793
+ vmovdqa mask6_21(%rip), %ymm14
794
+ vpxor %ymm13, %ymm13, %ymm13
795
+ vmovdqa map6(%rip), %xmm15
796
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm13
797
+
798
+ vpxor %ymm7,%ymm0,%ymm0
799
+ vpxor %ymm8,%ymm1,%ymm1
800
+ vpxor %ymm9,%ymm2,%ymm2
801
+ vpxor %ymm10,%ymm3,%ymm3
802
+ vpxor %ymm11,%ymm4,%ymm4
803
+ vpxor %ymm12,%ymm5,%ymm5
804
+ vpxor %ymm13,%ymm6,%ymm6
805
+ add $21*8, %rdx
806
+ lea iotas+12*4*8(%rip),%r10
807
+ mov $12,%eax
808
+ call __KeccakF1600
809
+ sub $21, %rcx
810
+ jnc KeccakP1600_12rounds_FastLoop_Absorb_Loop21Lanes
811
+ KeccakP1600_12rounds_FastLoop_Absorb_SaveAndExit:
812
+ vmovq %xmm0,-96(%rdi)
813
+ vmovdqu %ymm1,8+32*0-96(%rdi)
814
+ vmovdqu %ymm2,8+32*1-96(%rdi)
815
+ vmovdqu %ymm3,8+32*2-96(%rdi)
816
+ vmovdqu %ymm4,8+32*3-96(%rdi)
817
+ vmovdqu %ymm5,8+32*4-96(%rdi)
818
+ vmovdqu %ymm6,8+32*5-96(%rdi)
819
+ KeccakP1600_12rounds_FastLoop_Absorb_Exit:
820
+ vzeroupper
821
+ mov %rdx, %rax # return number of bytes processed
822
+ sub %rbx, %rax
823
+ pop %r10
824
+ pop %rbx
825
+ ret
826
+ KeccakP1600_12rounds_FastLoop_Absorb_Not21Lanes:
827
+ cmp $17, %rsi
828
+ jnz KeccakP1600_12rounds_FastLoop_Absorb_Not17Lanes
829
+ sub $17, %rcx
830
+ lea rhotates_left+96(%rip),%r8
831
+ lea rhotates_right+96(%rip),%r9
832
+ lea 96(%rdi),%rdi
833
+ vpbroadcastq -96(%rdi),%ymm0 # load A[5][5]
834
+ vmovdqu 8+32*0-96(%rdi),%ymm1
835
+ vmovdqu 8+32*1-96(%rdi),%ymm2
836
+ vmovdqu 8+32*2-96(%rdi),%ymm3
837
+ vmovdqu 8+32*3-96(%rdi),%ymm4
838
+ vmovdqu 8+32*4-96(%rdi),%ymm5
839
+ vmovdqu 8+32*5-96(%rdi),%ymm6
840
+ KeccakP1600_12rounds_FastLoop_Absorb_Loop17Lanes:
841
+ vpbroadcastq (%rdx),%ymm7
842
+ vmovdqu 8(%rdx),%ymm8
843
+
844
+ vmovdqa mask2_17(%rip), %ymm14
845
+ vpxor %ymm9, %ymm9, %ymm9
846
+ vmovdqa map2(%rip), %xmm15
847
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm9
848
+
849
+ vmovdqa mask3_17(%rip), %ymm14
850
+ vpxor %ymm10, %ymm10, %ymm10
851
+ vmovdqa map3(%rip), %xmm15
852
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm10
853
+
854
+ vmovdqa mask4_17(%rip), %ymm14
855
+ vpxor %ymm11, %ymm11, %ymm11
856
+ vmovdqa map4(%rip), %xmm15
857
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm11
858
+
859
+ vmovdqa mask5_17(%rip), %ymm14
860
+ vpxor %ymm12, %ymm12, %ymm12
861
+ vmovdqa map5(%rip), %xmm15
862
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm12
863
+
864
+ vmovdqa mask6_17(%rip), %ymm14
865
+ vpxor %ymm13, %ymm13, %ymm13
866
+ vmovdqa map6(%rip), %xmm15
867
+ vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm13
868
+
869
+ vpxor %ymm7,%ymm0,%ymm0
870
+ vpxor %ymm8,%ymm1,%ymm1
871
+ vpxor %ymm9,%ymm2,%ymm2
872
+ vpxor %ymm10,%ymm3,%ymm3
873
+ vpxor %ymm11,%ymm4,%ymm4
874
+ vpxor %ymm12,%ymm5,%ymm5
875
+ vpxor %ymm13,%ymm6,%ymm6
876
+ add $17*8, %rdx
877
+ lea iotas+12*4*8(%rip),%r10
878
+ mov $12,%eax
879
+ call __KeccakF1600
880
+ sub $17, %rcx
881
+ jnc KeccakP1600_12rounds_FastLoop_Absorb_Loop17Lanes
882
+ jmp KeccakP1600_12rounds_FastLoop_Absorb_SaveAndExit
883
+ KeccakP1600_12rounds_FastLoop_Absorb_Not17Lanes:
884
+ lea mapState(%rip), %r9
885
+ mov %rsi, %rax
886
+ KeccakP1600_12rounds_FastLoop_Absorb_LanesAddLoop:
887
+ mov (%rdx), %r8
888
+ add $8, %rdx
889
+ mov (%r9), %r10
890
+ add $8, %r9
891
+ add %rdi, %r10
892
+ xor %r8, (%r10)
893
+ sub $1, %rax
894
+ jnz KeccakP1600_12rounds_FastLoop_Absorb_LanesAddLoop
895
+ sub %rsi, %rcx
896
+ push %rdi
897
+ push %rsi
898
+ push %rdx
899
+ push %rcx
900
+ call KeccakP1600_Permute_12rounds
901
+ pop %rcx
902
+ pop %rdx
903
+ pop %rsi
904
+ pop %rdi
905
+ cmp %rsi, %rcx
906
+ jae KeccakP1600_12rounds_FastLoop_Absorb_Not17Lanes
907
+ jmp KeccakP1600_12rounds_FastLoop_Absorb_Exit
908
+ .size KeccakP1600_12rounds_FastLoop_Absorb,.-KeccakP1600_12rounds_FastLoop_Absorb
909
+
910
+ .equ ALLON, 0xFFFFFFFFFFFFFFFF
911
+
912
+ .align 64
913
+ rhotates_left:
914
+ .quad 3, 18, 36, 41 # [2][0] [4][0] [1][0] [3][0]
915
+ .quad 1, 62, 28, 27 # [0][1] [0][2] [0][3] [0][4]
916
+ .quad 45, 6, 56, 39 # [3][1] [1][2] [4][3] [2][4]
917
+ .quad 10, 61, 55, 8 # [2][1] [4][2] [1][3] [3][4]
918
+ .quad 2, 15, 25, 20 # [4][1] [3][2] [2][3] [1][4]
919
+ .quad 44, 43, 21, 14 # [1][1] [2][2] [3][3] [4][4]
920
+ rhotates_right:
921
+ .quad 64-3, 64-18, 64-36, 64-41
922
+ .quad 64-1, 64-62, 64-28, 64-27
923
+ .quad 64-45, 64-6, 64-56, 64-39
924
+ .quad 64-10, 64-61, 64-55, 64-8
925
+ .quad 64-2, 64-15, 64-25, 64-20
926
+ .quad 64-44, 64-43, 64-21, 64-14
927
+ iotas:
928
+ .quad 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, 0x0000000000000001
929
+ .quad 0x0000000000008082, 0x0000000000008082, 0x0000000000008082, 0x0000000000008082
930
+ .quad 0x800000000000808a, 0x800000000000808a, 0x800000000000808a, 0x800000000000808a
931
+ .quad 0x8000000080008000, 0x8000000080008000, 0x8000000080008000, 0x8000000080008000
932
+ .quad 0x000000000000808b, 0x000000000000808b, 0x000000000000808b, 0x000000000000808b
933
+ .quad 0x0000000080000001, 0x0000000080000001, 0x0000000080000001, 0x0000000080000001
934
+ .quad 0x8000000080008081, 0x8000000080008081, 0x8000000080008081, 0x8000000080008081
935
+ .quad 0x8000000000008009, 0x8000000000008009, 0x8000000000008009, 0x8000000000008009
936
+ .quad 0x000000000000008a, 0x000000000000008a, 0x000000000000008a, 0x000000000000008a
937
+ .quad 0x0000000000000088, 0x0000000000000088, 0x0000000000000088, 0x0000000000000088
938
+ .quad 0x0000000080008009, 0x0000000080008009, 0x0000000080008009, 0x0000000080008009
939
+ .quad 0x000000008000000a, 0x000000008000000a, 0x000000008000000a, 0x000000008000000a
940
+ .quad 0x000000008000808b, 0x000000008000808b, 0x000000008000808b, 0x000000008000808b
941
+ .quad 0x800000000000008b, 0x800000000000008b, 0x800000000000008b, 0x800000000000008b
942
+ .quad 0x8000000000008089, 0x8000000000008089, 0x8000000000008089, 0x8000000000008089
943
+ .quad 0x8000000000008003, 0x8000000000008003, 0x8000000000008003, 0x8000000000008003
944
+ .quad 0x8000000000008002, 0x8000000000008002, 0x8000000000008002, 0x8000000000008002
945
+ .quad 0x8000000000000080, 0x8000000000000080, 0x8000000000000080, 0x8000000000000080
946
+ .quad 0x000000000000800a, 0x000000000000800a, 0x000000000000800a, 0x000000000000800a
947
+ .quad 0x800000008000000a, 0x800000008000000a, 0x800000008000000a, 0x800000008000000a
948
+ .quad 0x8000000080008081, 0x8000000080008081, 0x8000000080008081, 0x8000000080008081
949
+ .quad 0x8000000000008080, 0x8000000000008080, 0x8000000000008080, 0x8000000000008080
950
+ .quad 0x0000000080000001, 0x0000000080000001, 0x0000000080000001, 0x0000000080000001
951
+ .quad 0x8000000080008008, 0x8000000080008008, 0x8000000080008008, 0x8000000080008008
952
+
953
+ mapState:
954
+ .quad 0*8, 1*8, 2*8, 3*8, 4*8
955
+ .quad 7*8, 21*8, 10*8, 15*8, 20*8
956
+ .quad 5*8, 13*8, 22*8, 19*8, 12*8
957
+ .quad 8*8, 9*8, 18*8, 23*8, 16*8
958
+ .quad 6*8, 17*8, 14*8, 11*8, 24*8
959
+
960
+ .align 16
961
+ map2:
962
+ .long 10*8, 20*8, 5*8, 15*8
963
+ map3:
964
+ .long 16*8, 7*8, 23*8, 14*8
965
+ map4:
966
+ .long 11*8, 22*8, 8*8, 19*8
967
+ map5:
968
+ .long 21*8, 17*8, 13*8, 9*8
969
+ map6:
970
+ .long 6*8, 12*8, 18*8, 24*8
971
+
972
+ .align 32
973
+ mask3_21:
974
+ .quad ALLON, ALLON, 0, ALLON
975
+ mask4_21:
976
+ .quad ALLON, 0, ALLON, ALLON
977
+ mask5_21:
978
+ .quad 0, ALLON, ALLON, ALLON
979
+ mask6_21:
980
+ .quad ALLON, ALLON, ALLON, 0
981
+
982
+ mask2_17:
983
+ .quad ALLON, 0, ALLON, ALLON
984
+ mask3_17:
985
+ .quad ALLON, ALLON, 0, ALLON
986
+ mask4_17:
987
+ .quad ALLON, 0, ALLON, 0
988
+ mask5_17:
989
+ .quad 0, 0, ALLON, ALLON
990
+ mask6_17:
991
+ .quad ALLON, ALLON, 0, 0
992
+
993
+ .asciz "Keccak-1600 for AVX2, CRYPTOGAMS by <appro@openssl.org>"