digest-kangarootwelve 0.0.2 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (307) hide show
  1. checksums.yaml +5 -5
  2. data/README.md +71 -37
  3. data/Rakefile +7 -9
  4. data/digest-kangarootwelve.gemspec +323 -14
  5. data/ext/digest/kangarootwelve/ext.c +228 -177
  6. data/ext/digest/kangarootwelve/extconf.rb +15 -1
  7. data/ext/digest/kangarootwelve/keccak/armv6m/KangarooTwelve.link.c +1 -0
  8. data/ext/digest/kangarootwelve/keccak/armv6m/KeccakDuplexWidth1600.link.c +1 -0
  9. data/ext/digest/kangarootwelve/keccak/armv6m/KeccakP-1600-SnP.h +36 -0
  10. data/ext/digest/kangarootwelve/{KeccakP-1600-times2-SnP.h → keccak/armv6m/KeccakP-1600-times2-SnP.h} +10 -10
  11. data/ext/digest/kangarootwelve/{KeccakP-1600-times2-on1.c → keccak/armv6m/KeccakP-1600-times2-on1.c} +13 -7
  12. data/ext/digest/kangarootwelve/{KeccakP-1600-times4-SnP.h → keccak/armv6m/KeccakP-1600-times4-SnP.h} +10 -10
  13. data/ext/digest/kangarootwelve/{KeccakP-1600-times4-on1.c → keccak/armv6m/KeccakP-1600-times4-on1.c} +13 -7
  14. data/ext/digest/kangarootwelve/{KeccakP-1600-times8-SnP.h → keccak/armv6m/KeccakP-1600-times8-SnP.h} +10 -10
  15. data/ext/digest/kangarootwelve/{KeccakP-1600-times8-on1.c → keccak/armv6m/KeccakP-1600-times8-on1.c} +13 -7
  16. data/ext/digest/kangarootwelve/keccak/armv6m/KeccakP-1600-u2-32bi-armv6m-le-gcc.s +1334 -0
  17. data/ext/digest/kangarootwelve/keccak/armv6m/KeccakSpongeWidth1600.link.c +1 -0
  18. data/ext/digest/kangarootwelve/{PlSnP-Fallback.inc → keccak/armv6m/PlSnP-Fallback.inc} +11 -7
  19. data/ext/digest/kangarootwelve/keccak/armv6m/ext.link.c +1 -0
  20. data/ext/digest/kangarootwelve/keccak/armv7a/KangarooTwelve.link.c +1 -0
  21. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakDuplexWidth1600.link.c +1 -0
  22. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-SnP.h +37 -0
  23. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-armv7a-le-neon-gcc.s +826 -0
  24. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-inplace-pl2-armv7a-neon-le-gcc.s +1245 -0
  25. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times2-SnP.h +38 -0
  26. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times4-SnP.h +45 -0
  27. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times4-on2.c +38 -0
  28. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times8-SnP.h +45 -0
  29. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times8-on2.c +38 -0
  30. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakSpongeWidth1600.link.c +1 -0
  31. data/ext/digest/kangarootwelve/keccak/armv7a/PlSnP-Fallback.inc +287 -0
  32. data/ext/digest/kangarootwelve/keccak/armv7a/ext.link.c +1 -0
  33. data/ext/digest/kangarootwelve/keccak/armv7m/KangarooTwelve.link.c +1 -0
  34. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakDuplexWidth1600.link.c +1 -0
  35. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-SnP.h +36 -0
  36. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-inplace-32bi-armv7m-le-gcc.s +1170 -0
  37. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times2-SnP.h +45 -0
  38. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times2-on1.c +37 -0
  39. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times4-SnP.h +45 -0
  40. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times4-on1.c +37 -0
  41. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times8-SnP.h +45 -0
  42. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times8-on1.c +37 -0
  43. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakSpongeWidth1600.link.c +1 -0
  44. data/ext/digest/kangarootwelve/keccak/armv7m/PlSnP-Fallback.inc +287 -0
  45. data/ext/digest/kangarootwelve/keccak/armv7m/ext.link.c +1 -0
  46. data/ext/digest/kangarootwelve/keccak/armv8a/KangarooTwelve.link.c +1 -0
  47. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakDuplexWidth1600.link.c +1 -0
  48. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-SnP.h +28 -0
  49. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-armv8a-neon.s +537 -0
  50. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times2-SnP.h +45 -0
  51. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times2-on1.c +37 -0
  52. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times4-SnP.h +45 -0
  53. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times4-on1.c +37 -0
  54. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times8-SnP.h +45 -0
  55. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times8-on1.c +37 -0
  56. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakSpongeWidth1600.link.c +1 -0
  57. data/ext/digest/kangarootwelve/keccak/armv8a/PlSnP-Fallback.inc +287 -0
  58. data/ext/digest/kangarootwelve/keccak/armv8a/ext.link.c +1 -0
  59. data/ext/digest/kangarootwelve/keccak/asmx86-64/KangarooTwelve.link.c +1 -0
  60. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakDuplexWidth1600.link.c +1 -0
  61. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-SnP.h +37 -0
  62. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times2-SnP.h +45 -0
  63. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times2-on1.c +37 -0
  64. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times4-SnP.h +45 -0
  65. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times4-on1.c +37 -0
  66. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times8-SnP.h +45 -0
  67. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times8-on1.c +37 -0
  68. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-x86-64-gas.s +1190 -0
  69. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakSpongeWidth1600.link.c +1 -0
  70. data/ext/digest/kangarootwelve/keccak/asmx86-64/PlSnP-Fallback.inc +287 -0
  71. data/ext/digest/kangarootwelve/keccak/asmx86-64/ext.link.c +1 -0
  72. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KangarooTwelve.link.c +1 -0
  73. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakDuplexWidth1600.link.c +1 -0
  74. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-SnP.h +37 -0
  75. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times2-SnP.h +45 -0
  76. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times2-on1.c +37 -0
  77. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times4-SnP.h +45 -0
  78. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times4-on1.c +37 -0
  79. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times8-SnP.h +45 -0
  80. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times8-on1.c +37 -0
  81. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-x86-64-shld-gas.s +1190 -0
  82. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakSpongeWidth1600.link.c +1 -0
  83. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/PlSnP-Fallback.inc +287 -0
  84. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/ext.link.c +1 -0
  85. data/ext/digest/kangarootwelve/keccak/avr8/KangarooTwelve.link.c +1 -0
  86. data/ext/digest/kangarootwelve/keccak/avr8/KeccakDuplexWidth1600.link.c +1 -0
  87. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-SnP.h +37 -0
  88. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-avr8-fast.s +1116 -0
  89. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times2-SnP.h +45 -0
  90. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times2-on1.c +37 -0
  91. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times4-SnP.h +45 -0
  92. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times4-on1.c +37 -0
  93. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times8-SnP.h +45 -0
  94. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times8-on1.c +37 -0
  95. data/ext/digest/kangarootwelve/keccak/avr8/KeccakSpongeWidth1600.link.c +1 -0
  96. data/ext/digest/kangarootwelve/keccak/avr8/PlSnP-Fallback.inc +287 -0
  97. data/ext/digest/kangarootwelve/keccak/avr8/ext.link.c +1 -0
  98. data/ext/digest/kangarootwelve/keccak/bulldozer/KangarooTwelve.link.c +1 -0
  99. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakDuplexWidth1600.link.c +1 -0
  100. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-SnP.h +39 -0
  101. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-XOP-config.h +6 -0
  102. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-XOP.c +473 -0
  103. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times2-SIMD128.c +954 -0
  104. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times2-SnP.h +47 -0
  105. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times4-SnP.h +45 -0
  106. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times4-on2.c +38 -0
  107. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times8-SnP.h +45 -0
  108. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times8-on2.c +38 -0
  109. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-unrolling.macros +302 -0
  110. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakSpongeWidth1600.link.c +1 -0
  111. data/ext/digest/kangarootwelve/keccak/bulldozer/PlSnP-Fallback.inc +287 -0
  112. data/ext/digest/kangarootwelve/keccak/bulldozer/SIMD128-config.h +9 -0
  113. data/ext/digest/kangarootwelve/{SnP-Relaned.h → keccak/bulldozer/SnP-Relaned.h} +13 -7
  114. data/ext/digest/kangarootwelve/keccak/bulldozer/ext.link.c +1 -0
  115. data/ext/digest/kangarootwelve/{KangarooTwelve.c → keccak/common/KangarooTwelve.c} +6 -10
  116. data/ext/digest/kangarootwelve/{KangarooTwelve.h → keccak/common/KangarooTwelve.h} +3 -7
  117. data/ext/digest/kangarootwelve/keccak/common/KeccakDuplex-common.h +37 -0
  118. data/ext/digest/kangarootwelve/keccak/common/KeccakDuplex.inc +192 -0
  119. data/ext/digest/kangarootwelve/keccak/common/KeccakDuplexWidth1600.c +34 -0
  120. data/ext/digest/kangarootwelve/keccak/common/KeccakDuplexWidth1600.h +25 -0
  121. data/ext/digest/kangarootwelve/{KeccakSponge-common.h → keccak/common/KeccakSponge-common.h} +5 -7
  122. data/ext/digest/kangarootwelve/{KeccakSponge.inc → keccak/common/KeccakSponge.inc} +6 -8
  123. data/ext/digest/kangarootwelve/{KeccakSpongeWidth1600.c → keccak/common/KeccakSpongeWidth1600.c} +6 -8
  124. data/ext/digest/kangarootwelve/{KeccakSpongeWidth1600.h → keccak/common/KeccakSpongeWidth1600.h} +5 -7
  125. data/ext/digest/kangarootwelve/{Phases.h → keccak/common/Phases.h} +3 -7
  126. data/ext/digest/kangarootwelve/{align.h → keccak/common/align.h} +5 -7
  127. data/ext/digest/kangarootwelve/{brg_endian.h → keccak/common/brg_endian.h} +0 -0
  128. data/ext/digest/kangarootwelve/keccak/compact/KangarooTwelve.link.c +1 -0
  129. data/ext/digest/kangarootwelve/keccak/compact/KeccakDuplexWidth1600.link.c +1 -0
  130. data/ext/digest/kangarootwelve/{KeccakP-1600-SnP.h → keccak/compact/KeccakP-1600-SnP.h} +7 -10
  131. data/ext/digest/kangarootwelve/{KeccakP-1600-compact64.c → keccak/compact/KeccakP-1600-compact64.c} +11 -7
  132. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times2-SnP.h +45 -0
  133. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times2-on1.c +37 -0
  134. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times4-SnP.h +45 -0
  135. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times4-on1.c +37 -0
  136. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times8-SnP.h +45 -0
  137. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times8-on1.c +37 -0
  138. data/ext/digest/kangarootwelve/keccak/compact/KeccakSpongeWidth1600.link.c +1 -0
  139. data/ext/digest/kangarootwelve/keccak/compact/PlSnP-Fallback.inc +287 -0
  140. data/ext/digest/kangarootwelve/keccak/compact/SnP-Relaned.h +140 -0
  141. data/ext/digest/kangarootwelve/keccak/compact/ext.link.c +1 -0
  142. data/ext/digest/kangarootwelve/keccak/generic32/KangarooTwelve.link.c +1 -0
  143. data/ext/digest/kangarootwelve/keccak/generic32/KeccakDuplexWidth1600.link.c +1 -0
  144. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-SnP.h +38 -0
  145. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-inplace32BI.c +1162 -0
  146. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times2-SnP.h +45 -0
  147. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times2-on1.c +37 -0
  148. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times4-SnP.h +45 -0
  149. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times4-on1.c +37 -0
  150. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times8-SnP.h +45 -0
  151. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times8-on1.c +37 -0
  152. data/ext/digest/kangarootwelve/keccak/generic32/KeccakSpongeWidth1600.link.c +1 -0
  153. data/ext/digest/kangarootwelve/keccak/generic32/PlSnP-Fallback.inc +287 -0
  154. data/ext/digest/kangarootwelve/keccak/generic32/SnP-Relaned.h +140 -0
  155. data/ext/digest/kangarootwelve/keccak/generic32/ext.link.c +1 -0
  156. data/ext/digest/kangarootwelve/keccak/generic32lc/KangarooTwelve.link.c +1 -0
  157. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakDuplexWidth1600.link.c +1 -0
  158. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-SnP.h +38 -0
  159. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-inplace32BI.c +1162 -0
  160. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times2-SnP.h +45 -0
  161. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times2-on1.c +37 -0
  162. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times4-SnP.h +45 -0
  163. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times4-on1.c +37 -0
  164. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times8-SnP.h +45 -0
  165. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times8-on1.c +37 -0
  166. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakSpongeWidth1600.link.c +1 -0
  167. data/ext/digest/kangarootwelve/keccak/generic32lc/PlSnP-Fallback.inc +287 -0
  168. data/ext/digest/kangarootwelve/keccak/generic32lc/SnP-Relaned.h +140 -0
  169. data/ext/digest/kangarootwelve/keccak/generic32lc/ext.link.c +1 -0
  170. data/ext/digest/kangarootwelve/keccak/generic64/KangarooTwelve.link.c +1 -0
  171. data/ext/digest/kangarootwelve/keccak/generic64/KeccakDuplexWidth1600.link.c +1 -0
  172. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-64.macros +2195 -0
  173. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-SnP.h +49 -0
  174. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-opt64-config.h +6 -0
  175. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-opt64.c +541 -0
  176. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times2-SnP.h +45 -0
  177. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times2-on1.c +37 -0
  178. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times4-SnP.h +45 -0
  179. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times4-on1.c +37 -0
  180. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times8-SnP.h +45 -0
  181. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times8-on1.c +37 -0
  182. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-unrolling.macros +302 -0
  183. data/ext/digest/kangarootwelve/keccak/generic64/KeccakSpongeWidth1600.link.c +1 -0
  184. data/ext/digest/kangarootwelve/keccak/generic64/PlSnP-Fallback.inc +287 -0
  185. data/ext/digest/kangarootwelve/keccak/generic64/SnP-Relaned.h +140 -0
  186. data/ext/digest/kangarootwelve/keccak/generic64/ext.link.c +1 -0
  187. data/ext/digest/kangarootwelve/keccak/generic64lc/KangarooTwelve.link.c +1 -0
  188. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakDuplexWidth1600.link.c +1 -0
  189. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-64.macros +2195 -0
  190. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-SnP.h +49 -0
  191. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-opt64-config.h +7 -0
  192. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-opt64.c +541 -0
  193. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times2-SnP.h +45 -0
  194. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times2-on1.c +37 -0
  195. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times4-SnP.h +45 -0
  196. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times4-on1.c +37 -0
  197. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times8-SnP.h +45 -0
  198. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times8-on1.c +37 -0
  199. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-unrolling.macros +302 -0
  200. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakSpongeWidth1600.link.c +1 -0
  201. data/ext/digest/kangarootwelve/keccak/generic64lc/PlSnP-Fallback.inc +287 -0
  202. data/ext/digest/kangarootwelve/keccak/generic64lc/SnP-Relaned.h +140 -0
  203. data/ext/digest/kangarootwelve/keccak/generic64lc/ext.link.c +1 -0
  204. data/ext/digest/kangarootwelve/keccak/haswell/KangarooTwelve.link.c +1 -0
  205. data/ext/digest/kangarootwelve/keccak/haswell/KeccakDuplexWidth1600.link.c +1 -0
  206. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-AVX2.s +993 -0
  207. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-SnP.h +41 -0
  208. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times2-SIMD128.c +954 -0
  209. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times2-SnP.h +47 -0
  210. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times4-SIMD256.c +1303 -0
  211. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times4-SnP.h +53 -0
  212. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times8-SnP.h +45 -0
  213. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times8-on4.c +38 -0
  214. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-unrolling.macros +302 -0
  215. data/ext/digest/kangarootwelve/keccak/haswell/KeccakSpongeWidth1600.link.c +1 -0
  216. data/ext/digest/kangarootwelve/keccak/haswell/PlSnP-Fallback.inc +287 -0
  217. data/ext/digest/kangarootwelve/keccak/haswell/SIMD128-config.h +8 -0
  218. data/ext/digest/kangarootwelve/keccak/haswell/SIMD256-config.h +7 -0
  219. data/ext/digest/kangarootwelve/keccak/haswell/ext.link.c +1 -0
  220. data/ext/digest/kangarootwelve/keccak/nehalem/KangarooTwelve.link.c +1 -0
  221. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakDuplexWidth1600.link.c +1 -0
  222. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-64.macros +2195 -0
  223. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-SnP.h +49 -0
  224. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-opt64-config.h +7 -0
  225. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-opt64.c +541 -0
  226. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times2-SIMD128.c +954 -0
  227. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times2-SnP.h +47 -0
  228. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times4-SnP.h +45 -0
  229. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times4-on2.c +38 -0
  230. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times8-SnP.h +45 -0
  231. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times8-on2.c +38 -0
  232. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-unrolling.macros +302 -0
  233. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakSpongeWidth1600.link.c +1 -0
  234. data/ext/digest/kangarootwelve/keccak/nehalem/PlSnP-Fallback.inc +287 -0
  235. data/ext/digest/kangarootwelve/keccak/nehalem/SIMD128-config.h +8 -0
  236. data/ext/digest/kangarootwelve/keccak/nehalem/SnP-Relaned.h +140 -0
  237. data/ext/digest/kangarootwelve/keccak/nehalem/ext.link.c +1 -0
  238. data/ext/digest/kangarootwelve/keccak/reference/KangarooTwelve.link.c +1 -0
  239. data/ext/digest/kangarootwelve/keccak/reference/KeccakDuplexWidth1600.link.c +1 -0
  240. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-SnP.h +41 -0
  241. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-reference.c +424 -0
  242. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-reference.h +20 -0
  243. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times2-SnP.h +45 -0
  244. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times2-on1.c +37 -0
  245. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times4-SnP.h +45 -0
  246. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times4-on1.c +37 -0
  247. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times8-SnP.h +45 -0
  248. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times8-on1.c +37 -0
  249. data/ext/digest/kangarootwelve/keccak/reference/KeccakSpongeWidth1600.link.c +1 -0
  250. data/ext/digest/kangarootwelve/keccak/reference/PlSnP-Fallback.inc +287 -0
  251. data/ext/digest/kangarootwelve/keccak/reference/displayIntermediateValues.c +176 -0
  252. data/ext/digest/kangarootwelve/keccak/reference/displayIntermediateValues.h +29 -0
  253. data/ext/digest/kangarootwelve/keccak/reference/ext.link.c +1 -0
  254. data/ext/digest/kangarootwelve/keccak/reference32bits/KangarooTwelve.link.c +1 -0
  255. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakDuplexWidth1600.link.c +1 -0
  256. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-SnP.h +41 -0
  257. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-reference.h +20 -0
  258. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-reference32BI.c +612 -0
  259. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times2-SnP.h +45 -0
  260. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times2-on1.c +37 -0
  261. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times4-SnP.h +45 -0
  262. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times4-on1.c +37 -0
  263. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times8-SnP.h +45 -0
  264. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times8-on1.c +37 -0
  265. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakSpongeWidth1600.link.c +1 -0
  266. data/ext/digest/kangarootwelve/keccak/reference32bits/PlSnP-Fallback.inc +287 -0
  267. data/ext/digest/kangarootwelve/keccak/reference32bits/displayIntermediateValues.c +176 -0
  268. data/ext/digest/kangarootwelve/keccak/reference32bits/displayIntermediateValues.h +29 -0
  269. data/ext/digest/kangarootwelve/keccak/reference32bits/ext.link.c +1 -0
  270. data/ext/digest/kangarootwelve/keccak/sandybridge/KangarooTwelve.link.c +1 -0
  271. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakDuplexWidth1600.link.c +1 -0
  272. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-64.macros +2195 -0
  273. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-SnP.h +49 -0
  274. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-opt64-config.h +8 -0
  275. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-opt64.c +541 -0
  276. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times2-SIMD128.c +954 -0
  277. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times2-SnP.h +47 -0
  278. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times4-SnP.h +45 -0
  279. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times4-on2.c +38 -0
  280. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times8-SnP.h +45 -0
  281. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times8-on2.c +38 -0
  282. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-unrolling.macros +302 -0
  283. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakSpongeWidth1600.link.c +1 -0
  284. data/ext/digest/kangarootwelve/keccak/sandybridge/PlSnP-Fallback.inc +287 -0
  285. data/ext/digest/kangarootwelve/keccak/sandybridge/SIMD128-config.h +8 -0
  286. data/ext/digest/kangarootwelve/keccak/sandybridge/SnP-Relaned.h +140 -0
  287. data/ext/digest/kangarootwelve/keccak/sandybridge/ext.link.c +1 -0
  288. data/ext/digest/kangarootwelve/keccak/skylakex/KangarooTwelve.link.c +1 -0
  289. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakDuplexWidth1600.link.c +1 -0
  290. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-AVX512-config.h +6 -0
  291. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-AVX512.c +621 -0
  292. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-SnP.h +42 -0
  293. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times2-SIMD512.c +852 -0
  294. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times2-SnP.h +49 -0
  295. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times4-SIMD512.c +883 -0
  296. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times4-SnP.h +49 -0
  297. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times8-SIMD512.c +1473 -0
  298. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times8-SnP.h +53 -0
  299. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakSpongeWidth1600.link.c +1 -0
  300. data/ext/digest/kangarootwelve/keccak/skylakex/SIMD512-2-config.h +7 -0
  301. data/ext/digest/kangarootwelve/keccak/skylakex/SIMD512-4-config.h +7 -0
  302. data/ext/digest/kangarootwelve/keccak/skylakex/SIMD512-config.h +7 -0
  303. data/ext/digest/kangarootwelve/keccak/skylakex/ext.link.c +1 -0
  304. data/ext/digest/kangarootwelve/utils.h +101 -0
  305. data/lib/digest/kangarootwelve/version.rb +2 -2
  306. data/test/test.rb +68 -31
  307. metadata +305 -27
@@ -0,0 +1,37 @@
1
+ /*
2
+ Implementation by the Keccak Team, namely, Guido Bertoni, Joan Daemen,
3
+ Michaël Peeters, Gilles Van Assche and Ronny Van Keer,
4
+ hereby denoted as "the implementer".
5
+
6
+ For more information, feedback or questions, please refer to our website:
7
+ https://keccak.team/
8
+
9
+ To the extent possible under law, the implementer has waived all copyright
10
+ and related or neighboring rights to the source code in this file.
11
+ http://creativecommons.org/publicdomain/zero/1.0/
12
+
13
+ ---
14
+
15
+ This file implements Keccak-p[1600]×4 in a PlSnP-compatible way.
16
+ Please refer to PlSnP-documentation.h for more details.
17
+
18
+ This implementation comes with KeccakP-1600-times4-SnP.h in the same folder.
19
+ Please refer to LowLevel.build for the exact list of other files it must be combined with.
20
+ */
21
+
22
+ #include "KeccakP-1600-SnP.h"
23
+
24
+ #define prefix KeccakP1600times4
25
+ #define PlSnP_baseParallelism 1
26
+ #define PlSnP_targetParallelism 4
27
+ #define SnP_laneLengthInBytes 8
28
+ #define SnP KeccakP1600
29
+ #define SnP_Permute KeccakP1600_Permute_24rounds
30
+ #define SnP_Permute_12rounds KeccakP1600_Permute_12rounds
31
+ #define SnP_Permute_Nrounds KeccakP1600_Permute_Nrounds
32
+ #define PlSnP_PermuteAll KeccakP1600times4_PermuteAll_24rounds
33
+ #define PlSnP_PermuteAll_12rounds KeccakP1600times4_PermuteAll_12rounds
34
+ #define PlSnP_PermuteAll_6rounds KeccakP1600times4_PermuteAll_6rounds
35
+ #define PlSnP_PermuteAll_4rounds KeccakP1600times4_PermuteAll_4rounds
36
+
37
+ #include "PlSnP-Fallback.inc"
@@ -0,0 +1,45 @@
1
+ /*
2
+ Implementation by the Keccak Team, namely, Guido Bertoni, Joan Daemen,
3
+ Michaël Peeters, Gilles Van Assche and Ronny Van Keer,
4
+ hereby denoted as "the implementer".
5
+
6
+ For more information, feedback or questions, please refer to our website:
7
+ https://keccak.team/
8
+
9
+ To the extent possible under law, the implementer has waived all copyright
10
+ and related or neighboring rights to the source code in this file.
11
+ http://creativecommons.org/publicdomain/zero/1.0/
12
+
13
+ ---
14
+
15
+ Please refer to PlSnP-documentation.h for more details.
16
+ */
17
+
18
+ #ifndef _KeccakP_1600_times8_SnP_h_
19
+ #define _KeccakP_1600_times8_SnP_h_
20
+
21
+ #include "KeccakP-1600-SnP.h"
22
+
23
+ #define KeccakP1600times8_implementation "fallback on serial implementation (" KeccakP1600_implementation ")"
24
+ #define KeccakP1600times8_statesSizeInBytes (((KeccakP1600_stateSizeInBytes+(KeccakP1600_stateAlignment-1))/KeccakP1600_stateAlignment)*KeccakP1600_stateAlignment*8)
25
+ #define KeccakP1600times8_statesAlignment KeccakP1600_stateAlignment
26
+ #define KeccakP1600times8_isFallback
27
+
28
+ void KeccakP1600times8_StaticInitialize( void );
29
+ void KeccakP1600times8_InitializeAll(void *states);
30
+ void KeccakP1600times8_AddByte(void *states, unsigned int instanceIndex, unsigned char data, unsigned int offset);
31
+ void KeccakP1600times8_AddBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length);
32
+ void KeccakP1600times8_AddLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset);
33
+ void KeccakP1600times8_OverwriteBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length);
34
+ void KeccakP1600times8_OverwriteLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset);
35
+ void KeccakP1600times8_OverwriteWithZeroes(void *states, unsigned int instanceIndex, unsigned int byteCount);
36
+ void KeccakP1600times8_PermuteAll_4rounds(void *states);
37
+ void KeccakP1600times8_PermuteAll_6rounds(void *states);
38
+ void KeccakP1600times8_PermuteAll_12rounds(void *states);
39
+ void KeccakP1600times8_PermuteAll_24rounds(void *states);
40
+ void KeccakP1600times8_ExtractBytes(const void *states, unsigned int instanceIndex, unsigned char *data, unsigned int offset, unsigned int length);
41
+ void KeccakP1600times8_ExtractLanesAll(const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset);
42
+ void KeccakP1600times8_ExtractAndAddBytes(const void *states, unsigned int instanceIndex, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length);
43
+ void KeccakP1600times8_ExtractAndAddLanesAll(const void *states, const unsigned char *input, unsigned char *output, unsigned int laneCount, unsigned int laneOffset);
44
+
45
+ #endif
@@ -0,0 +1,37 @@
1
+ /*
2
+ Implementation by the Keccak Team, namely, Guido Bertoni, Joan Daemen,
3
+ Michaël Peeters, Gilles Van Assche and Ronny Van Keer,
4
+ hereby denoted as "the implementer".
5
+
6
+ For more information, feedback or questions, please refer to our website:
7
+ https://keccak.team/
8
+
9
+ To the extent possible under law, the implementer has waived all copyright
10
+ and related or neighboring rights to the source code in this file.
11
+ http://creativecommons.org/publicdomain/zero/1.0/
12
+
13
+ ---
14
+
15
+ This file implements Keccak-p[1600]×8 in a PlSnP-compatible way.
16
+ Please refer to PlSnP-documentation.h for more details.
17
+
18
+ This implementation comes with KeccakP-1600-times8-SnP.h in the same folder.
19
+ Please refer to LowLevel.build for the exact list of other files it must be combined with.
20
+ */
21
+
22
+ #include "KeccakP-1600-SnP.h"
23
+
24
+ #define prefix KeccakP1600times8
25
+ #define PlSnP_baseParallelism 1
26
+ #define PlSnP_targetParallelism 8
27
+ #define SnP_laneLengthInBytes 8
28
+ #define SnP KeccakP1600
29
+ #define SnP_Permute KeccakP1600_Permute_24rounds
30
+ #define SnP_Permute_12rounds KeccakP1600_Permute_12rounds
31
+ #define SnP_Permute_Nrounds KeccakP1600_Permute_Nrounds
32
+ #define PlSnP_PermuteAll KeccakP1600times8_PermuteAll_24rounds
33
+ #define PlSnP_PermuteAll_12rounds KeccakP1600times8_PermuteAll_12rounds
34
+ #define PlSnP_PermuteAll_6rounds KeccakP1600times8_PermuteAll_6rounds
35
+ #define PlSnP_PermuteAll_4rounds KeccakP1600times8_PermuteAll_4rounds
36
+
37
+ #include "PlSnP-Fallback.inc"
@@ -0,0 +1,1190 @@
1
+ #
2
+ # Implementation by Ronny Van Keer, hereby denoted as "the implementer".
3
+ #
4
+ # For more information, feedback or questions, please refer to our website:
5
+ # https://keccak.team/
6
+ #
7
+ # To the extent possible under law, the implementer has waived all copyright
8
+ # and related or neighboring rights to the source code in this file.
9
+ # http://creativecommons.org/publicdomain/zero/1.0/
10
+ #
11
+ # ---
12
+ #
13
+ # This file implements Keccak-p[1600] in a SnP-compatible way.
14
+ # Please refer to SnP-documentation.h for more details.
15
+ #
16
+ # This implementation comes with KeccakP-1600-SnP.h in the same folder.
17
+ # Please refer to LowLevel.build for the exact list of other files it must be combined with.
18
+ #
19
+
20
+ # WARNING: State must be 256 bit (32 bytes) aligned.
21
+
22
+ .text
23
+
24
+ # conditional assembly settings
25
+ .equ UseSIMD, 0
26
+ .equ InlinePerm, 1
27
+
28
+ # offsets in state
29
+ .equ _ba, 0*8
30
+ .equ _be, 1*8
31
+ .equ _bi, 2*8
32
+ .equ _bo, 3*8
33
+ .equ _bu, 4*8
34
+ .equ _ga, 5*8
35
+ .equ _ge, 6*8
36
+ .equ _gi, 7*8
37
+ .equ _go, 8*8
38
+ .equ _gu, 9*8
39
+ .equ _ka, 10*8
40
+ .equ _ke, 11*8
41
+ .equ _ki, 12*8
42
+ .equ _ko, 13*8
43
+ .equ _ku, 14*8
44
+ .equ _ma, 15*8
45
+ .equ _me, 16*8
46
+ .equ _mi, 17*8
47
+ .equ _mo, 18*8
48
+ .equ _mu, 19*8
49
+ .equ _sa, 20*8
50
+ .equ _se, 21*8
51
+ .equ _si, 22*8
52
+ .equ _so, 23*8
53
+ .equ _su, 24*8
54
+
55
+ # arguments passed in registers
56
+ .equ arg1, %rdi
57
+ .equ arg2, %rsi
58
+ .equ arg3, %rdx
59
+ .equ arg4, %rcx
60
+ .equ arg5, %r8
61
+ .equ arg6, %r9
62
+
63
+ # temporary registers
64
+ .equ rT1, %rax
65
+ .equ rT1a, rT1
66
+ .equ rT1e, %rbx
67
+ .equ rT1i, %r14
68
+ .equ rT1o, %r15
69
+ .equ rT1u, arg6
70
+ .equ rT2a, %r10
71
+ .equ rT2e, %r11
72
+ .equ rT2i, %r12
73
+ .equ rT2o, %r13
74
+ .equ rT2u, arg5
75
+
76
+ # round vars
77
+ .equ rpState, arg1
78
+ .equ rpStack, %rsp
79
+
80
+ .equ rDa, %rbx
81
+ .equ rDe, %rcx
82
+ .equ rDi, %rdx
83
+ .equ rDo, %r8
84
+ .equ rDu, %r9
85
+
86
+ .equ rBa, %r10
87
+ .equ rBe, %r11
88
+ .equ rBi, %r12
89
+ .equ rBo, %r13
90
+ .equ rBu, %r14
91
+
92
+ .equ rCa, %rsi
93
+ .equ rCe, %rbp
94
+ .equ rCi, rBi
95
+ .equ rCo, rBo
96
+ .equ rCu, %r15
97
+
98
+ .macro mKeccakRound iState, oState, rc, lastRound
99
+
100
+ # prepare Theta bis
101
+ movq rCe, rDa
102
+ shld $1, rDa, rDa
103
+
104
+ movq _bi(\iState), rCi
105
+ xorq _gi(\iState), rDi
106
+ xorq _ki(\iState), rCi
107
+ xorq rCu, rDa
108
+ xorq _mi(\iState), rDi
109
+ xorq rDi, rCi
110
+
111
+ movq rCi, rDe
112
+ shld $1, rDe, rDe
113
+
114
+ movq _bo(\iState), rCo
115
+ xorq _go(\iState), rDo
116
+ xorq _ko(\iState), rCo
117
+ xorq rCa, rDe
118
+ xorq _mo(\iState), rDo
119
+ xorq rDo, rCo
120
+
121
+ movq rCo, rDi
122
+ shld $1, rDi, rDi
123
+
124
+ movq rCu, rDo
125
+ xorq rCe, rDi
126
+ shld $1, rDo, rDo
127
+
128
+ movq rCa, rDu
129
+ xorq rCi, rDo
130
+ shld $1, rDu, rDu
131
+
132
+ # Theta Rho Pi Chi Iota, result b
133
+ movq _ba(\iState), rBa
134
+ movq _ge(\iState), rBe
135
+ xorq rCo, rDu
136
+ movq _ki(\iState), rBi
137
+ movq _mo(\iState), rBo
138
+ movq _su(\iState), rBu
139
+ xorq rDe, rBe
140
+ shld $44, rBe, rBe
141
+ xorq rDi, rBi
142
+ xorq rDa, rBa
143
+ shld $43, rBi, rBi
144
+
145
+ movq rBe, rCa
146
+ movq $\rc, rT1
147
+ orq rBi, rCa
148
+ xorq rBa, rT1
149
+ xorq rT1, rCa
150
+ movq rCa, _ba(\oState)
151
+
152
+ xorq rDu, rBu
153
+ shld $14, rBu, rBu
154
+ movq rBa, rCu
155
+ andq rBe, rCu
156
+ xorq rBu, rCu
157
+ movq rCu, _bu(\oState)
158
+
159
+ xorq rDo, rBo
160
+ shld $21, rBo, rBo
161
+ movq rBo, rT1
162
+ andq rBu, rT1
163
+ xorq rBi, rT1
164
+ movq rT1, _bi(\oState)
165
+
166
+ notq rBi
167
+ orq rBa, rBu
168
+ orq rBo, rBi
169
+ xorq rBo, rBu
170
+ xorq rBe, rBi
171
+ movq rBu, _bo(\oState)
172
+ movq rBi, _be(\oState)
173
+ .if \lastRound == 0
174
+ movq rBi, rCe
175
+ .endif
176
+
177
+ # Theta Rho Pi Chi, result g
178
+ movq _gu(\iState), rBe
179
+ xorq rDu, rBe
180
+ movq _ka(\iState), rBi
181
+ shld $20, rBe, rBe
182
+ xorq rDa, rBi
183
+ shld $3, rBi, rBi
184
+ movq _bo(\iState), rBa
185
+ movq rBe, rT1
186
+ orq rBi, rT1
187
+ xorq rDo, rBa
188
+ movq _me(\iState), rBo
189
+ movq _si(\iState), rBu
190
+ shld $28, rBa, rBa
191
+ xorq rBa, rT1
192
+ movq rT1, _ga(\oState)
193
+ .if \lastRound == 0
194
+ xorq rT1, rCa
195
+ .endif
196
+
197
+ xorq rDe, rBo
198
+ shld $45, rBo, rBo
199
+ movq rBi, rT1
200
+ andq rBo, rT1
201
+ xorq rBe, rT1
202
+ movq rT1, _ge(\oState)
203
+ .if \lastRound == 0
204
+ xorq rT1, rCe
205
+ .endif
206
+
207
+ xorq rDi, rBu
208
+ shld $61, rBu, rBu
209
+ movq rBu, rT1
210
+ orq rBa, rT1
211
+ xorq rBo, rT1
212
+ movq rT1, _go(\oState)
213
+
214
+ andq rBe, rBa
215
+ xorq rBu, rBa
216
+ movq rBa, _gu(\oState)
217
+ notq rBu
218
+ .if \lastRound == 0
219
+ xorq rBa, rCu
220
+ .endif
221
+
222
+ orq rBu, rBo
223
+ xorq rBi, rBo
224
+ movq rBo, _gi(\oState)
225
+
226
+ # Theta Rho Pi Chi, result k
227
+ movq _be(\iState), rBa
228
+ movq _gi(\iState), rBe
229
+ movq _ko(\iState), rBi
230
+ movq _mu(\iState), rBo
231
+ movq _sa(\iState), rBu
232
+ xorq rDi, rBe
233
+ shld $6, rBe, rBe
234
+ xorq rDo, rBi
235
+ shld $25, rBi, rBi
236
+ movq rBe, rT1
237
+ orq rBi, rT1
238
+ xorq rDe, rBa
239
+ shld $1, rBa, rBa
240
+ xorq rBa, rT1
241
+ movq rT1, _ka(\oState)
242
+ .if \lastRound == 0
243
+ xorq rT1, rCa
244
+ .endif
245
+
246
+ xorq rDu, rBo
247
+ shld $8, rBo, rBo
248
+ movq rBi, rT1
249
+ andq rBo, rT1
250
+ xorq rBe, rT1
251
+ movq rT1, _ke(\oState)
252
+ .if \lastRound == 0
253
+ xorq rT1, rCe
254
+ .endif
255
+
256
+ xorq rDa, rBu
257
+ shld $18, rBu, rBu
258
+ notq rBo
259
+ movq rBo, rT1
260
+ andq rBu, rT1
261
+ xorq rBi, rT1
262
+ movq rT1, _ki(\oState)
263
+
264
+ movq rBu, rT1
265
+ orq rBa, rT1
266
+ xorq rBo, rT1
267
+ movq rT1, _ko(\oState)
268
+
269
+ andq rBe, rBa
270
+ xorq rBu, rBa
271
+ movq rBa, _ku(\oState)
272
+ .if \lastRound == 0
273
+ xorq rBa, rCu
274
+ .endif
275
+
276
+ # Theta Rho Pi Chi, result m
277
+ movq _ga(\iState), rBe
278
+ xorq rDa, rBe
279
+ movq _ke(\iState), rBi
280
+ shld $36, rBe, rBe
281
+ xorq rDe, rBi
282
+ movq _bu(\iState), rBa
283
+ shld $10, rBi, rBi
284
+ movq rBe, rT1
285
+ movq _mi(\iState), rBo
286
+ andq rBi, rT1
287
+ xorq rDu, rBa
288
+ movq _so(\iState), rBu
289
+ shld $27, rBa, rBa
290
+ xorq rBa, rT1
291
+ movq rT1, _ma(\oState)
292
+ .if \lastRound == 0
293
+ xorq rT1, rCa
294
+ .endif
295
+
296
+ xorq rDi, rBo
297
+ shld $15, rBo, rBo
298
+ movq rBi, rT1
299
+ orq rBo, rT1
300
+ xorq rBe, rT1
301
+ movq rT1, _me(\oState)
302
+ .if \lastRound == 0
303
+ xorq rT1, rCe
304
+ .endif
305
+
306
+ xorq rDo, rBu
307
+ shld $56, rBu, rBu
308
+ notq rBo
309
+ movq rBo, rT1
310
+ orq rBu, rT1
311
+ xorq rBi, rT1
312
+ movq rT1, _mi(\oState)
313
+
314
+ orq rBa, rBe
315
+ xorq rBu, rBe
316
+ movq rBe, _mu(\oState)
317
+
318
+ andq rBa, rBu
319
+ xorq rBo, rBu
320
+ movq rBu, _mo(\oState)
321
+ .if \lastRound == 0
322
+ xorq rBe, rCu
323
+ .endif
324
+
325
+ # Theta Rho Pi Chi, result s
326
+ movq _bi(\iState), rBa
327
+ movq _go(\iState), rBe
328
+ movq _ku(\iState), rBi
329
+ xorq rDi, rBa
330
+ movq _ma(\iState), rBo
331
+ shld $62, rBa, rBa
332
+ xorq rDo, rBe
333
+ movq _se(\iState), rBu
334
+ shld $55, rBe, rBe
335
+
336
+ xorq rDu, rBi
337
+ movq rBa, rDu
338
+ xorq rDe, rBu
339
+ shld $2, rBu, rBu
340
+ andq rBe, rDu
341
+ xorq rBu, rDu
342
+ movq rDu, _su(\oState)
343
+
344
+ shld $39, rBi, rBi
345
+ .if \lastRound == 0
346
+ xorq rDu, rCu
347
+ .endif
348
+ notq rBe
349
+ xorq rDa, rBo
350
+ movq rBe, rDa
351
+ andq rBi, rDa
352
+ xorq rBa, rDa
353
+ movq rDa, _sa(\oState)
354
+ .if \lastRound == 0
355
+ xorq rDa, rCa
356
+ .endif
357
+
358
+ shld $41, rBo, rBo
359
+ movq rBi, rDe
360
+ orq rBo, rDe
361
+ xorq rBe, rDe
362
+ movq rDe, _se(\oState)
363
+ .if \lastRound == 0
364
+ xorq rDe, rCe
365
+ .endif
366
+
367
+ movq rBo, rDi
368
+ movq rBu, rDo
369
+ andq rBu, rDi
370
+ orq rBa, rDo
371
+ xorq rBi, rDi
372
+ xorq rBo, rDo
373
+ movq rDi, _si(\oState)
374
+ movq rDo, _so(\oState)
375
+
376
+ .endm
377
+
378
+ .macro mKeccakPermutation12
379
+
380
+ subq $8*25, %rsp
381
+
382
+ movq _ba(rpState), rCa
383
+ movq _be(rpState), rCe
384
+ movq _bu(rpState), rCu
385
+
386
+ xorq _ga(rpState), rCa
387
+ xorq _ge(rpState), rCe
388
+ xorq _gu(rpState), rCu
389
+
390
+ xorq _ka(rpState), rCa
391
+ xorq _ke(rpState), rCe
392
+ xorq _ku(rpState), rCu
393
+
394
+ xorq _ma(rpState), rCa
395
+ xorq _me(rpState), rCe
396
+ xorq _mu(rpState), rCu
397
+
398
+ xorq _sa(rpState), rCa
399
+ xorq _se(rpState), rCe
400
+ movq _si(rpState), rDi
401
+ movq _so(rpState), rDo
402
+ xorq _su(rpState), rCu
403
+
404
+ mKeccakRound rpState, rpStack, 0x000000008000808b, 0
405
+ mKeccakRound rpStack, rpState, 0x800000000000008b, 0
406
+ mKeccakRound rpState, rpStack, 0x8000000000008089, 0
407
+ mKeccakRound rpStack, rpState, 0x8000000000008003, 0
408
+ mKeccakRound rpState, rpStack, 0x8000000000008002, 0
409
+ mKeccakRound rpStack, rpState, 0x8000000000000080, 0
410
+ mKeccakRound rpState, rpStack, 0x000000000000800a, 0
411
+ mKeccakRound rpStack, rpState, 0x800000008000000a, 0
412
+ mKeccakRound rpState, rpStack, 0x8000000080008081, 0
413
+ mKeccakRound rpStack, rpState, 0x8000000000008080, 0
414
+ mKeccakRound rpState, rpStack, 0x0000000080000001, 0
415
+ mKeccakRound rpStack, rpState, 0x8000000080008008, 1
416
+ addq $8*25, %rsp
417
+ .endm
418
+
419
+ .macro mKeccakPermutation24
420
+
421
+ subq $8*25, %rsp
422
+
423
+ movq _ba(rpState), rCa
424
+ movq _be(rpState), rCe
425
+ movq _bu(rpState), rCu
426
+
427
+ xorq _ga(rpState), rCa
428
+ xorq _ge(rpState), rCe
429
+ xorq _gu(rpState), rCu
430
+
431
+ xorq _ka(rpState), rCa
432
+ xorq _ke(rpState), rCe
433
+ xorq _ku(rpState), rCu
434
+
435
+ xorq _ma(rpState), rCa
436
+ xorq _me(rpState), rCe
437
+ xorq _mu(rpState), rCu
438
+
439
+ xorq _sa(rpState), rCa
440
+ xorq _se(rpState), rCe
441
+ movq _si(rpState), rDi
442
+ movq _so(rpState), rDo
443
+ xorq _su(rpState), rCu
444
+
445
+ mKeccakRound rpState, rpStack, 0x0000000000000001, 0
446
+ mKeccakRound rpStack, rpState, 0x0000000000008082, 0
447
+ mKeccakRound rpState, rpStack, 0x800000000000808a, 0
448
+ mKeccakRound rpStack, rpState, 0x8000000080008000, 0
449
+ mKeccakRound rpState, rpStack, 0x000000000000808b, 0
450
+ mKeccakRound rpStack, rpState, 0x0000000080000001, 0
451
+ mKeccakRound rpState, rpStack, 0x8000000080008081, 0
452
+ mKeccakRound rpStack, rpState, 0x8000000000008009, 0
453
+ mKeccakRound rpState, rpStack, 0x000000000000008a, 0
454
+ mKeccakRound rpStack, rpState, 0x0000000000000088, 0
455
+ mKeccakRound rpState, rpStack, 0x0000000080008009, 0
456
+ mKeccakRound rpStack, rpState, 0x000000008000000a, 0
457
+
458
+ mKeccakRound rpState, rpStack, 0x000000008000808b, 0
459
+ mKeccakRound rpStack, rpState, 0x800000000000008b, 0
460
+ mKeccakRound rpState, rpStack, 0x8000000000008089, 0
461
+ mKeccakRound rpStack, rpState, 0x8000000000008003, 0
462
+ mKeccakRound rpState, rpStack, 0x8000000000008002, 0
463
+ mKeccakRound rpStack, rpState, 0x8000000000000080, 0
464
+ mKeccakRound rpState, rpStack, 0x000000000000800a, 0
465
+ mKeccakRound rpStack, rpState, 0x800000008000000a, 0
466
+ mKeccakRound rpState, rpStack, 0x8000000080008081, 0
467
+ mKeccakRound rpStack, rpState, 0x8000000000008080, 0
468
+ mKeccakRound rpState, rpStack, 0x0000000080000001, 0
469
+ mKeccakRound rpStack, rpState, 0x8000000080008008, 1
470
+ addq $8*25, %rsp
471
+ .endm
472
+
473
+ .macro mKeccakPermutationInlinable24
474
+ .if InlinePerm == 1
475
+ mKeccakPermutation24
476
+ .else
477
+ callq KeccakP1600_Permute_24rounds
478
+ .endif
479
+ .endm
480
+
481
+ .macro mPushRegs
482
+ pushq %rbx
483
+ pushq %rbp
484
+ pushq %r12
485
+ pushq %r13
486
+ pushq %r14
487
+ pushq %r15
488
+ .endm
489
+
490
+ .macro mPopRegs
491
+ popq %r15
492
+ popq %r14
493
+ popq %r13
494
+ popq %r12
495
+ popq %rbp
496
+ popq %rbx
497
+ .endm
498
+
499
+ .macro mXor128 input, output, offset
500
+ .if UseSIMD == 0
501
+ movq \offset(\input), rT1a
502
+ movq \offset+8(\input), rT1e
503
+ xorq rT1a, \offset(\output)
504
+ xorq rT1e, \offset+8(\output)
505
+ .else
506
+ movdqu \offset(\input), %xmm0
507
+ movdqu \offset(\output), %xmm1
508
+ pxor %xmm1, %xmm0
509
+ movdqu %xmm0, \offset(\output)
510
+ .endif
511
+ .endm
512
+
513
+ .macro mXor256 input, output, offset
514
+ .if UseSIMD == 0
515
+ movq \offset(\input), rT1a
516
+ movq \offset+8(\input), rT1e
517
+ movq \offset+16(\input), rT1i
518
+ movq \offset+24(\input), rT1o
519
+ xorq rT1a, \offset(\output)
520
+ xorq rT1e, \offset+8(\output)
521
+ xorq rT1i, \offset+16(\output)
522
+ xorq rT1o, \offset+24(\output)
523
+ .else
524
+ movdqu \offset(\input), %xmm0
525
+ movdqu \offset(\output), %xmm1
526
+ pxor %xmm1, %xmm0
527
+ movdqu %xmm0, \offset(\output)
528
+ movdqu \offset+16(\input), %xmm0
529
+ movdqu \offset+16(\output), %xmm1
530
+ pxor %xmm1, %xmm0
531
+ movdqu %xmm0, \offset+16(\output)
532
+ .endif
533
+ .endm
534
+
535
+ .macro mXor512 input, output, offset
536
+ .if UseSIMD == 0
537
+ mXor256 \input, \output, \offset
538
+ mXor256 \input, \output, \offset+32
539
+ .else
540
+ movdqu \offset(\input), %xmm0
541
+ movdqu \offset(\output), %xmm1
542
+ pxor %xmm1, %xmm0
543
+ movdqu %xmm0, \offset(\output)
544
+ movdqu \offset+16(\input), %xmm0
545
+ movdqu \offset+16(\output), %xmm1
546
+ pxor %xmm1, %xmm0
547
+ movdqu %xmm0, \offset+16(\output)
548
+ movdqu \offset+32(\input), %xmm0
549
+ movdqu \offset+32(\output), %xmm1
550
+ pxor %xmm1, %xmm0
551
+ movdqu %xmm0, \offset+32(\output)
552
+ movdqu \offset+48(\input), %xmm0
553
+ movdqu \offset+48(\output), %xmm1
554
+ pxor %xmm1, %xmm0
555
+ movdqu %xmm0, \offset+48(\output)
556
+ .endif
557
+ .endm
558
+
559
+ #----------------------------------------------------------------------------
560
+ #
561
+ # void KeccakP1600_StaticInitialize( void )
562
+ #
563
+ .size KeccakP1600_StaticInitialize, .-KeccakP1600_StaticInitialize
564
+ .align 8
565
+ .global KeccakP1600_StaticInitialize
566
+ .type KeccakP1600_StaticInitialize, %function
567
+ KeccakP1600_StaticInitialize:
568
+ retq
569
+
570
+ #----------------------------------------------------------------------------
571
+ #
572
+ # void KeccakP1600_Initialize(void *state)
573
+ #
574
+ .size KeccakP1600_Initialize, .-KeccakP1600_Initialize
575
+ .align 8
576
+ .global KeccakP1600_Initialize
577
+ .type KeccakP1600_Initialize, %function
578
+ KeccakP1600_Initialize:
579
+ xorq %rax, %rax
580
+ xorq %rcx, %rcx
581
+ notq %rcx
582
+ .if UseSIMD == 0
583
+ movq %rax, _ba(arg1)
584
+ movq %rcx, _be(arg1)
585
+ movq %rcx, _bi(arg1)
586
+ movq %rax, _bo(arg1)
587
+ movq %rax, _bu(arg1)
588
+ movq %rax, _ga(arg1)
589
+ movq %rax, _ge(arg1)
590
+ movq %rax, _gi(arg1)
591
+ movq %rcx, _go(arg1)
592
+ movq %rax, _gu(arg1)
593
+ movq %rax, _ka(arg1)
594
+ movq %rax, _ke(arg1)
595
+ movq %rcx, _ki(arg1)
596
+ movq %rax, _ko(arg1)
597
+ movq %rax, _ku(arg1)
598
+ movq %rax, _ma(arg1)
599
+ movq %rax, _me(arg1)
600
+ movq %rcx, _mi(arg1)
601
+ movq %rax, _mo(arg1)
602
+ movq %rax, _mu(arg1)
603
+ movq %rcx, _sa(arg1)
604
+ movq %rax, _se(arg1)
605
+ movq %rax, _si(arg1)
606
+ movq %rax, _so(arg1)
607
+ movq %rax, _su(arg1)
608
+ .else
609
+ pxor %xmm0, %xmm0
610
+ movq %rax, _ba(arg1)
611
+ movq %rcx, _be(arg1)
612
+ movq %rcx, _bi(arg1)
613
+ movq %rax, _bo(arg1)
614
+ movdqu %xmm0, _bu(arg1)
615
+ movdqu %xmm0, _ge(arg1)
616
+ movq %rcx, _go(arg1)
617
+ movq %rax, _gu(arg1)
618
+ movdqu %xmm0, _ka(arg1)
619
+ movq %rcx, _ki(arg1)
620
+ movq %rax, _ko(arg1)
621
+ movdqu %xmm0, _ku(arg1)
622
+ movq %rax, _me(arg1)
623
+ movq %rcx, _mi(arg1)
624
+ movdqu %xmm0, _mo(arg1)
625
+ movq %rcx, _sa(arg1)
626
+ movq %rax, _se(arg1)
627
+ movdqu %xmm0, _si(arg1)
628
+ movq %rax, _su(arg1)
629
+ .endif
630
+ retq
631
+
632
+ #----------------------------------------------------------------------------
633
+ #
634
+ # void KeccakP1600_AddByte(void *state, unsigned char data, unsigned int offset)
635
+ #
636
+ .size KeccakP1600_AddByte, .-KeccakP1600_AddByte
637
+ .align 8
638
+ .global KeccakP1600_AddByte
639
+ .type KeccakP1600_AddByte, %function
640
+ KeccakP1600_AddByte:
641
+ addq arg3, arg1
642
+ mov arg2, %rax
643
+ xorb %al, (arg1)
644
+ retq
645
+
646
+ #----------------------------------------------------------------------------
647
+ #
648
+ # void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
649
+ #
650
+ .size KeccakP1600_AddBytes, .-KeccakP1600_AddBytes
651
+ .align 8
652
+ .global KeccakP1600_AddBytes
653
+ .type KeccakP1600_AddBytes, %function
654
+ KeccakP1600_AddBytes:
655
+ pushq rT1e
656
+ pushq rT1i
657
+ pushq rT1o
658
+ addq arg3, arg1
659
+ testq $0xF8, arg4
660
+ jz KeccakP1600_AddBytes_Bytes
661
+ movq arg4, arg6
662
+ shrq $3, arg6
663
+ testq $16, arg6
664
+ jz KeccakP1600_AddBytes_8Lanes
665
+ mXor512 arg2, arg1, 0
666
+ mXor512 arg2, arg1, 64
667
+ addq $128, arg2
668
+ addq $128, arg1
669
+ KeccakP1600_AddBytes_8Lanes:
670
+ testq $8, arg6
671
+ jz KeccakP1600_AddBytes_4Lanes
672
+ mXor512 arg2, arg1, 0
673
+ addq $64, arg2
674
+ addq $64, arg1
675
+ KeccakP1600_AddBytes_4Lanes:
676
+ testq $4, arg6
677
+ jz KeccakP1600_AddBytes_2Lanes
678
+ mXor256 arg2, arg1, 0
679
+ addq $32, arg2
680
+ addq $32, arg1
681
+ KeccakP1600_AddBytes_2Lanes:
682
+ testq $2, arg6
683
+ jz KeccakP1600_AddBytes_1Lane
684
+ mXor128 arg2, arg1, 0
685
+ addq $16, arg2
686
+ addq $16, arg1
687
+ KeccakP1600_AddBytes_1Lane:
688
+ testq $1, arg6
689
+ jz KeccakP1600_AddBytes_Bytes
690
+ movq (arg2), rT1
691
+ xorq rT1, (arg1)
692
+ addq $8, arg2
693
+ addq $8, arg1
694
+ KeccakP1600_AddBytes_Bytes:
695
+ andq $7, arg4
696
+ jz KeccakP1600_AddBytes_Exit
697
+ KeccakP1600_AddBytes_BytesLoop:
698
+ movb (arg2), %al
699
+ xorb %al, (arg1)
700
+ addq $1, arg2
701
+ addq $1, arg1
702
+ subq $1, arg4
703
+ jnz KeccakP1600_AddBytes_BytesLoop
704
+ KeccakP1600_AddBytes_Exit:
705
+ popq rT1o
706
+ popq rT1i
707
+ popq rT1e
708
+ retq
709
+
710
+
711
+ KeccakLaneComplementTable:
712
+ .quad 0
713
+ .quad 0xFFFFFFFFFFFFFFFF # 1 be
714
+ .quad 0xFFFFFFFFFFFFFFFF # 2 bi
715
+ .quad 0
716
+ .quad 0
717
+
718
+ .quad 0
719
+ .quad 0
720
+ .quad 0
721
+ .quad 0xFFFFFFFFFFFFFFFF # 8 go
722
+ .quad 0
723
+
724
+ .quad 0
725
+ .quad 0
726
+ .quad 0xFFFFFFFFFFFFFFFF # 12 ki
727
+ .quad 0
728
+ .quad 0
729
+
730
+ .quad 0
731
+ .quad 0
732
+ .quad 0xFFFFFFFFFFFFFFFF # 17 mi
733
+ .quad 0
734
+ .quad 0
735
+
736
+ .quad 0xFFFFFFFFFFFFFFFF # 20 sa
737
+ .quad 0
738
+ .quad 0
739
+ .quad 0
740
+ .quad 0
741
+
742
+ #----------------------------------------------------------------------------
743
+ #
744
+ # void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
745
+ #
746
+ .size KeccakP1600_OverwriteBytes, .-KeccakP1600_OverwriteBytes
747
+ .align 8
748
+ .global KeccakP1600_OverwriteBytes
749
+ .type KeccakP1600_OverwriteBytes, %function
750
+ KeccakP1600_OverwriteBytes:
751
+ addq arg3, arg1
752
+ leaq KeccakLaneComplementTable, arg5
753
+ addq arg3, arg5
754
+ subq $8, arg4
755
+ jc KeccakP1600_OverwriteBytes_Bytes
756
+ KeccakP1600_OverwriteBytes_LanesLoop:
757
+ movq (arg2), rT1
758
+ xorq (arg5), rT1
759
+ movq rT1, (arg1)
760
+ addq $8, arg2
761
+ addq $8, arg5
762
+ addq $8, arg1
763
+ subq $8, arg4
764
+ jnc KeccakP1600_OverwriteBytes_LanesLoop
765
+ KeccakP1600_OverwriteBytes_Bytes:
766
+ addq $8, arg4
767
+ jz KeccakP1600_OverwriteBytes_Exit
768
+ KeccakP1600_OverwriteBytes_BytesLoop:
769
+ movb (arg2), %al
770
+ xorb (arg5), %al
771
+ movb %al, (arg1)
772
+ addq $1, arg2
773
+ addq $1, arg5
774
+ addq $1, arg1
775
+ subq $1, arg4
776
+ jnz KeccakP1600_OverwriteBytes_BytesLoop
777
+ KeccakP1600_OverwriteBytes_Exit:
778
+ retq
779
+
780
+ #----------------------------------------------------------------------------
781
+ #
782
+ # void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount)
783
+ #
784
+ .size KeccakP1600_OverwriteWithZeroes, .-KeccakP1600_OverwriteWithZeroes
785
+ .align 8
786
+ .global KeccakP1600_OverwriteWithZeroes
787
+ .type KeccakP1600_OverwriteWithZeroes, %function
788
+ KeccakP1600_OverwriteWithZeroes:
789
+ leaq KeccakLaneComplementTable, arg5
790
+ subq $8, arg2
791
+ jc KeccakP1600_OverwriteWithZeroes_Bytes
792
+ KeccakP1600_OverwriteWithZeroes_LanesLoop:
793
+ movq $0, rT1
794
+ xorq (arg5), rT1
795
+ movq rT1, (arg1)
796
+ addq $8, arg5
797
+ addq $8, arg1
798
+ subq $8, arg2
799
+ jnc KeccakP1600_OverwriteWithZeroes_LanesLoop
800
+ KeccakP1600_OverwriteWithZeroes_Bytes:
801
+ addq $8, arg2
802
+ jz KeccakP1600_OverwriteWithZeroes_Exit
803
+ KeccakP1600_OverwriteWithZeroes_BytesLoop:
804
+ movb $0, %al
805
+ xorb (arg5), %al
806
+ movb %al, (arg1)
807
+ addq $1, arg5
808
+ addq $1, arg1
809
+ subq $1, arg2
810
+ jnz KeccakP1600_OverwriteWithZeroes_BytesLoop
811
+ KeccakP1600_OverwriteWithZeroes_Exit:
812
+ retq
813
+
814
+ #----------------------------------------------------------------------------
815
+ #
816
+ # void KeccakP1600_ExtractBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
817
+ #
818
+ .size KeccakP1600_ExtractBytes, .-KeccakP1600_ExtractBytes
819
+ .align 8
820
+ .global KeccakP1600_ExtractBytes
821
+ .type KeccakP1600_ExtractBytes, %function
822
+ KeccakP1600_ExtractBytes:
823
+ addq arg3, arg1
824
+ leaq KeccakLaneComplementTable, arg5
825
+ addq arg3, arg5
826
+ subq $8, arg4
827
+ jc KeccakP1600_ExtractBytes_Bytes
828
+ KeccakP1600_ExtractBytes_LanesLoop:
829
+ movq (arg1), rT1
830
+ xorq (arg5), rT1
831
+ movq rT1, (arg2)
832
+ addq $8, arg2
833
+ addq $8, arg5
834
+ addq $8, arg1
835
+ subq $8, arg4
836
+ jnc KeccakP1600_ExtractBytes_LanesLoop
837
+ KeccakP1600_ExtractBytes_Bytes:
838
+ addq $8, arg4
839
+ jz KeccakP1600_ExtractBytes_Exit
840
+ KeccakP1600_ExtractBytes_BytesLoop:
841
+ movb (arg1), %al
842
+ xorb (arg5), %al
843
+ movb %al, (arg2)
844
+ addq $1, arg2
845
+ addq $1, arg5
846
+ addq $1, arg1
847
+ subq $1, arg4
848
+ jnz KeccakP1600_ExtractBytes_BytesLoop
849
+ KeccakP1600_ExtractBytes_Exit:
850
+ retq
851
+
852
+ #----------------------------------------------------------------------------
853
+ #
854
+ # void KeccakP1600_ExtractAndAddBytes(void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
855
+ #
856
+ .size KeccakP1600_ExtractAndAddBytes, .-KeccakP1600_ExtractAndAddBytes
857
+ .align 8
858
+ .global KeccakP1600_ExtractAndAddBytes
859
+ .type KeccakP1600_ExtractAndAddBytes, %function
860
+ KeccakP1600_ExtractAndAddBytes:
861
+ addq arg4, arg1
862
+ leaq KeccakLaneComplementTable, arg6
863
+ addq arg4, arg6
864
+ subq $8, arg5
865
+ jc KeccakP1600_ExtractAndAddBytes_Bytes
866
+ KeccakP1600_ExtractAndAddBytes_LanesLoop:
867
+ movq (arg1), rT1
868
+ xorq (arg6), rT1
869
+ xorq (arg2), rT1
870
+ movq rT1, (arg3)
871
+ addq $8, arg2
872
+ addq $8, arg3
873
+ addq $8, arg6
874
+ addq $8, arg1
875
+ subq $8, arg5
876
+ jnc KeccakP1600_ExtractAndAddBytes_LanesLoop
877
+ KeccakP1600_ExtractAndAddBytes_Bytes:
878
+ addq $8, arg5
879
+ jz KeccakP1600_ExtractAndAddBytes_Exit
880
+ KeccakP1600_ExtractAndAddBytes_BytesLoop:
881
+ movb (arg1), %al
882
+ xorb (arg6), %al
883
+ xorb (arg2), %al
884
+ movb %al, (arg3)
885
+ addq $1, arg2
886
+ addq $1, arg3
887
+ addq $1, arg6
888
+ addq $1, arg1
889
+ subq $1, arg5
890
+ jnz KeccakP1600_ExtractAndAddBytes_BytesLoop
891
+ KeccakP1600_ExtractAndAddBytes_Exit:
892
+ retq
893
+
894
+ #----------------------------------------------------------------------------
895
+ #
896
+ # void KeccakP1600_Permute_Nrounds( void *state, unsigned int nrounds )
897
+ #
898
+ .size KeccakP1600_Permute_Nrounds, .-KeccakP1600_Permute_Nrounds
899
+ .align 8
900
+ .global KeccakP1600_Permute_Nrounds
901
+ .type KeccakP1600_Permute_Nrounds, %function
902
+ KeccakP1600_Permute_Nrounds:
903
+ mPushRegs
904
+ subq $8*25, %rsp
905
+ movq arg2, rT1
906
+
907
+ movq _ba(rpState), rCa
908
+ movq _be(rpState), rCe
909
+ movq _bu(rpState), rCu
910
+
911
+ xorq _ga(rpState), rCa
912
+ xorq _ge(rpState), rCe
913
+ xorq _gu(rpState), rCu
914
+
915
+ xorq _ka(rpState), rCa
916
+ xorq _ke(rpState), rCe
917
+ xorq _ku(rpState), rCu
918
+
919
+ xorq _ma(rpState), rCa
920
+ xorq _me(rpState), rCe
921
+ xorq _mu(rpState), rCu
922
+
923
+ xorq _sa(rpState), rCa
924
+ xorq _se(rpState), rCe
925
+ movq _si(rpState), rDi
926
+ movq _so(rpState), rDo
927
+ xorq _su(rpState), rCu
928
+
929
+ testq $1, rT1
930
+ jz KeccakP1600_Permute_Nrounds_Dispatch
931
+ movq _ba(rpState), rT2a # copy to stack
932
+ movq rT2a, _ba(rpStack)
933
+ movq _be(rpState), rT2a
934
+ movq rT2a, _be(rpStack)
935
+ movq _bi(rpState), rT2a
936
+ movq rT2a, _bi(rpStack)
937
+ movq _bo(rpState), rT2a
938
+ movq rT2a, _bo(rpStack)
939
+ movq _bu(rpState), rT2a
940
+ movq rT2a, _bu(rpStack)
941
+ movq _ga(rpState), rT2a
942
+ movq rT2a, _ga(rpStack)
943
+ movq _ge(rpState), rT2a
944
+ movq rT2a, _ge(rpStack)
945
+ movq _gi(rpState), rT2a
946
+ movq rT2a, _gi(rpStack)
947
+ movq _go(rpState), rT2a
948
+ movq rT2a, _go(rpStack)
949
+ movq _gu(rpState), rT2a
950
+ movq rT2a, _gu(rpStack)
951
+ movq _ka(rpState), rT2a
952
+ movq rT2a, _ka(rpStack)
953
+ movq _ke(rpState), rT2a
954
+ movq rT2a, _ke(rpStack)
955
+ movq _ki(rpState), rT2a
956
+ movq rT2a, _ki(rpStack)
957
+ movq _ko(rpState), rT2a
958
+ movq rT2a, _ko(rpStack)
959
+ movq _ku(rpState), rT2a
960
+ movq rT2a, _ku(rpStack)
961
+ movq _ma(rpState), rT2a
962
+ movq rT2a, _ma(rpStack)
963
+ movq _me(rpState), rT2a
964
+ movq rT2a, _me(rpStack)
965
+ movq _mi(rpState), rT2a
966
+ movq rT2a, _mi(rpStack)
967
+ movq _mo(rpState), rT2a
968
+ movq rT2a, _mo(rpStack)
969
+ movq _mu(rpState), rT2a
970
+ movq rT2a, _mu(rpStack)
971
+ movq _sa(rpState), rT2a
972
+ movq rT2a, _sa(rpStack)
973
+ movq _se(rpState), rT2a
974
+ movq rT2a, _se(rpStack)
975
+ movq _si(rpState), rT2a
976
+ movq rT2a, _si(rpStack)
977
+ movq _so(rpState), rT2a
978
+ movq rT2a, _so(rpStack)
979
+ movq _su(rpState), rT2a
980
+ movq rT2a, _su(rpStack)
981
+ KeccakP1600_Permute_Nrounds_Dispatch:
982
+ shlq $3, rT1
983
+ jmp *KeccakP1600_Permute_NroundsTable-8(rT1)
984
+
985
+ KeccakP1600_Permute_Nrounds24:
986
+ mKeccakRound rpState, rpStack, 0x0000000000000001, 0
987
+ KeccakP1600_Permute_Nrounds23:
988
+ mKeccakRound rpStack, rpState, 0x0000000000008082, 0
989
+ KeccakP1600_Permute_Nrounds22:
990
+ mKeccakRound rpState, rpStack, 0x800000000000808a, 0
991
+ KeccakP1600_Permute_Nrounds21:
992
+ mKeccakRound rpStack, rpState, 0x8000000080008000, 0
993
+ KeccakP1600_Permute_Nrounds20:
994
+ mKeccakRound rpState, rpStack, 0x000000000000808b, 0
995
+ KeccakP1600_Permute_Nrounds19:
996
+ mKeccakRound rpStack, rpState, 0x0000000080000001, 0
997
+ KeccakP1600_Permute_Nrounds18:
998
+ mKeccakRound rpState, rpStack, 0x8000000080008081, 0
999
+ KeccakP1600_Permute_Nrounds17:
1000
+ mKeccakRound rpStack, rpState, 0x8000000000008009, 0
1001
+ KeccakP1600_Permute_Nrounds16:
1002
+ mKeccakRound rpState, rpStack, 0x000000000000008a, 0
1003
+ KeccakP1600_Permute_Nrounds15:
1004
+ mKeccakRound rpStack, rpState, 0x0000000000000088, 0
1005
+ KeccakP1600_Permute_Nrounds14:
1006
+ mKeccakRound rpState, rpStack, 0x0000000080008009, 0
1007
+ KeccakP1600_Permute_Nrounds13:
1008
+ mKeccakRound rpStack, rpState, 0x000000008000000a, 0
1009
+ KeccakP1600_Permute_Nrounds12:
1010
+ mKeccakRound rpState, rpStack, 0x000000008000808b, 0
1011
+ KeccakP1600_Permute_Nrounds11:
1012
+ mKeccakRound rpStack, rpState, 0x800000000000008b, 0
1013
+ KeccakP1600_Permute_Nrounds10:
1014
+ mKeccakRound rpState, rpStack, 0x8000000000008089, 0
1015
+ KeccakP1600_Permute_Nrounds9:
1016
+ mKeccakRound rpStack, rpState, 0x8000000000008003, 0
1017
+ KeccakP1600_Permute_Nrounds8:
1018
+ mKeccakRound rpState, rpStack, 0x8000000000008002, 0
1019
+ KeccakP1600_Permute_Nrounds7:
1020
+ mKeccakRound rpStack, rpState, 0x8000000000000080, 0
1021
+ KeccakP1600_Permute_Nrounds6:
1022
+ mKeccakRound rpState, rpStack, 0x000000000000800a, 0
1023
+ KeccakP1600_Permute_Nrounds5:
1024
+ mKeccakRound rpStack, rpState, 0x800000008000000a, 0
1025
+ KeccakP1600_Permute_Nrounds4:
1026
+ mKeccakRound rpState, rpStack, 0x8000000080008081, 0
1027
+ KeccakP1600_Permute_Nrounds3:
1028
+ mKeccakRound rpStack, rpState, 0x8000000000008080, 0
1029
+ KeccakP1600_Permute_Nrounds2:
1030
+ mKeccakRound rpState, rpStack, 0x0000000080000001, 0
1031
+ KeccakP1600_Permute_Nrounds1:
1032
+ mKeccakRound rpStack, rpState, 0x8000000080008008, 1
1033
+ addq $8*25, %rsp
1034
+ mPopRegs
1035
+ retq
1036
+
1037
+ KeccakP1600_Permute_NroundsTable:
1038
+ .quad KeccakP1600_Permute_Nrounds1
1039
+ .quad KeccakP1600_Permute_Nrounds2
1040
+ .quad KeccakP1600_Permute_Nrounds3
1041
+ .quad KeccakP1600_Permute_Nrounds4
1042
+ .quad KeccakP1600_Permute_Nrounds5
1043
+ .quad KeccakP1600_Permute_Nrounds6
1044
+ .quad KeccakP1600_Permute_Nrounds7
1045
+ .quad KeccakP1600_Permute_Nrounds8
1046
+ .quad KeccakP1600_Permute_Nrounds9
1047
+ .quad KeccakP1600_Permute_Nrounds10
1048
+ .quad KeccakP1600_Permute_Nrounds11
1049
+ .quad KeccakP1600_Permute_Nrounds12
1050
+ .quad KeccakP1600_Permute_Nrounds13
1051
+ .quad KeccakP1600_Permute_Nrounds14
1052
+ .quad KeccakP1600_Permute_Nrounds15
1053
+ .quad KeccakP1600_Permute_Nrounds16
1054
+ .quad KeccakP1600_Permute_Nrounds17
1055
+ .quad KeccakP1600_Permute_Nrounds18
1056
+ .quad KeccakP1600_Permute_Nrounds19
1057
+ .quad KeccakP1600_Permute_Nrounds20
1058
+ .quad KeccakP1600_Permute_Nrounds21
1059
+ .quad KeccakP1600_Permute_Nrounds22
1060
+ .quad KeccakP1600_Permute_Nrounds23
1061
+ .quad KeccakP1600_Permute_Nrounds24
1062
+
1063
+ #----------------------------------------------------------------------------
1064
+ #
1065
+ # void KeccakP1600_Permute_12rounds( void *state )
1066
+ #
1067
+ .size KeccakP1600_Permute_12rounds, .-KeccakP1600_Permute_12rounds
1068
+ .align 8
1069
+ .global KeccakP1600_Permute_12rounds
1070
+ .type KeccakP1600_Permute_12rounds, %function
1071
+ KeccakP1600_Permute_12rounds:
1072
+ mPushRegs
1073
+ mKeccakPermutation12
1074
+ mPopRegs
1075
+ retq
1076
+
1077
+ #----------------------------------------------------------------------------
1078
+ #
1079
+ # void KeccakP1600_Permute_24rounds( void *state )
1080
+ #
1081
+ .size KeccakP1600_Permute_24rounds, .-KeccakP1600_Permute_24rounds
1082
+ .align 8
1083
+ .global KeccakP1600_Permute_24rounds
1084
+ .type KeccakP1600_Permute_24rounds, %function
1085
+ KeccakP1600_Permute_24rounds:
1086
+ mPushRegs
1087
+ mKeccakPermutation24
1088
+ mPopRegs
1089
+ retq
1090
+
1091
+ #----------------------------------------------------------------------------
1092
+ #
1093
+ # size_t KeccakF1600_FastLoop_Absorb( void *state, unsigned int laneCount, unsigned char *data,
1094
+ # size_t dataByteLen, unsigned char trailingBits )
1095
+ #
1096
+ .size KeccakF1600_FastLoop_Absorb, .-KeccakF1600_FastLoop_Absorb
1097
+ .align 8
1098
+ .global KeccakF1600_FastLoop_Absorb
1099
+ .type KeccakF1600_FastLoop_Absorb, %function
1100
+ KeccakF1600_FastLoop_Absorb:
1101
+ mPushRegs
1102
+ pushq arg3 # save initial data pointer
1103
+ pushq arg5 # save trailingBits
1104
+ shrq $3, arg4 # nbrLanes = dataByteLen / SnP_laneLengthInBytes
1105
+ subq arg2, arg4 # if (nbrLanes >= laneCount)
1106
+ jc KeccakF1600_FastLoop_Absorb_Exit
1107
+ cmpq $21, arg2
1108
+ jnz KeccakF1600_FastLoop_Absorb_VariableLaneCountLoop
1109
+ KeccakF1600_FastLoop_Absorb_Loop21: # Fixed laneCount = 21 (rate = 1344, capacity = 256)
1110
+ movq _ba(arg3), rT1a
1111
+ movq _be(arg3), rT1e
1112
+ movq _bi(arg3), rT1i
1113
+ movq _bo(arg3), rT1o
1114
+ movq _bu(arg3), rT1u
1115
+ movq _ga(arg3), rT2a
1116
+ movq _ge(arg3), rT2e
1117
+ movq _gi(arg3), rT2i
1118
+ movq _go(arg3), rT2o
1119
+ movq _gu(arg3), rT2u
1120
+ xorq rT1a, _ba(arg1)
1121
+ xorq rT1e, _be(arg1)
1122
+ xorq rT1i, _bi(arg1)
1123
+ xorq rT1o, _bo(arg1)
1124
+ xorq rT1u, _bu(arg1)
1125
+ xorq rT2a, _ga(arg1)
1126
+ xorq rT2e, _ge(arg1)
1127
+ xorq rT2i, _gi(arg1)
1128
+ xorq rT2o, _go(arg1)
1129
+ xorq rT2u, _gu(arg1)
1130
+ movq _ka(arg3), rT1a
1131
+ movq _ke(arg3), rT1e
1132
+ movq _ki(arg3), rT1i
1133
+ movq _ko(arg3), rT1o
1134
+ movq _ku(arg3), rT1u
1135
+ movq _ma(arg3), rT2a
1136
+ movq _me(arg3), rT2e
1137
+ movq _mi(arg3), rT2i
1138
+ movq _mo(arg3), rT2o
1139
+ movq _mu(arg3), rT2u
1140
+ xorq rT1a, _ka(arg1)
1141
+ xorq rT1e, _ke(arg1)
1142
+ xorq rT1i, _ki(arg1)
1143
+ xorq rT1o, _ko(arg1)
1144
+ xorq rT1u, _ku(arg1)
1145
+ movq _sa(arg3), rT1a
1146
+ movq (%rsp), rT1e # xor trailingBits
1147
+ xorq rT2a, _ma(arg1)
1148
+ xorq rT2e, _me(arg1)
1149
+ xorq rT2i, _mi(arg1)
1150
+ addq $_se, arg3
1151
+ xorq rT2o, _mo(arg1)
1152
+ xorq rT2u, _mu(arg1)
1153
+ xorq rT1a, _sa(arg1)
1154
+ xorq rT1e, _se(arg1)
1155
+ pushq arg3
1156
+ pushq arg4
1157
+ mKeccakPermutationInlinable24
1158
+ popq arg4
1159
+ popq arg3
1160
+ subq $21, arg4 # while (nbrLanes >= 21)
1161
+ jnc KeccakF1600_FastLoop_Absorb_Loop21
1162
+ KeccakF1600_FastLoop_Absorb_Exit:
1163
+ addq $8, %rsp # free trailingBits
1164
+ popq rT1a # restore initial data pointer
1165
+ subq rT1a, arg3 # processed = data pointer - initial data pointer
1166
+ movq arg3, rT1a
1167
+ mPopRegs
1168
+ retq
1169
+ KeccakF1600_FastLoop_Absorb_VariableLaneCountLoop:
1170
+ pushq arg4
1171
+ pushq arg2
1172
+ pushq arg1
1173
+ movq arg2, arg4 # prepare xor call: length (in bytes)
1174
+ shlq $3, arg4
1175
+ movq arg3, arg2 # data pointer
1176
+ xorq arg3, arg3 # offset = 0
1177
+ callq KeccakP1600_AddBytes # (void *state, const unsigned char *data, unsigned int offset, unsigned int length)
1178
+ movq arg2, arg3 # updated data pointer
1179
+ movq 24(%rsp), rT1a # xor trailingBits
1180
+ xorq rT1a, (arg1)
1181
+ popq arg1
1182
+ pushq arg3
1183
+ callq KeccakP1600_Permute_24rounds
1184
+ popq arg3
1185
+ popq arg2
1186
+ popq arg4
1187
+ subq arg2, arg4 # while (nbrLanes >= 21)
1188
+ jnc KeccakF1600_FastLoop_Absorb_VariableLaneCountLoop
1189
+ jmp KeccakF1600_FastLoop_Absorb_Exit
1190
+