digest-kangarootwelve 0.0.2 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (307) hide show
  1. checksums.yaml +5 -5
  2. data/README.md +71 -37
  3. data/Rakefile +7 -9
  4. data/digest-kangarootwelve.gemspec +323 -14
  5. data/ext/digest/kangarootwelve/ext.c +228 -177
  6. data/ext/digest/kangarootwelve/extconf.rb +15 -1
  7. data/ext/digest/kangarootwelve/keccak/armv6m/KangarooTwelve.link.c +1 -0
  8. data/ext/digest/kangarootwelve/keccak/armv6m/KeccakDuplexWidth1600.link.c +1 -0
  9. data/ext/digest/kangarootwelve/keccak/armv6m/KeccakP-1600-SnP.h +36 -0
  10. data/ext/digest/kangarootwelve/{KeccakP-1600-times2-SnP.h → keccak/armv6m/KeccakP-1600-times2-SnP.h} +10 -10
  11. data/ext/digest/kangarootwelve/{KeccakP-1600-times2-on1.c → keccak/armv6m/KeccakP-1600-times2-on1.c} +13 -7
  12. data/ext/digest/kangarootwelve/{KeccakP-1600-times4-SnP.h → keccak/armv6m/KeccakP-1600-times4-SnP.h} +10 -10
  13. data/ext/digest/kangarootwelve/{KeccakP-1600-times4-on1.c → keccak/armv6m/KeccakP-1600-times4-on1.c} +13 -7
  14. data/ext/digest/kangarootwelve/{KeccakP-1600-times8-SnP.h → keccak/armv6m/KeccakP-1600-times8-SnP.h} +10 -10
  15. data/ext/digest/kangarootwelve/{KeccakP-1600-times8-on1.c → keccak/armv6m/KeccakP-1600-times8-on1.c} +13 -7
  16. data/ext/digest/kangarootwelve/keccak/armv6m/KeccakP-1600-u2-32bi-armv6m-le-gcc.s +1334 -0
  17. data/ext/digest/kangarootwelve/keccak/armv6m/KeccakSpongeWidth1600.link.c +1 -0
  18. data/ext/digest/kangarootwelve/{PlSnP-Fallback.inc → keccak/armv6m/PlSnP-Fallback.inc} +11 -7
  19. data/ext/digest/kangarootwelve/keccak/armv6m/ext.link.c +1 -0
  20. data/ext/digest/kangarootwelve/keccak/armv7a/KangarooTwelve.link.c +1 -0
  21. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakDuplexWidth1600.link.c +1 -0
  22. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-SnP.h +37 -0
  23. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-armv7a-le-neon-gcc.s +826 -0
  24. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-inplace-pl2-armv7a-neon-le-gcc.s +1245 -0
  25. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times2-SnP.h +38 -0
  26. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times4-SnP.h +45 -0
  27. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times4-on2.c +38 -0
  28. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times8-SnP.h +45 -0
  29. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times8-on2.c +38 -0
  30. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakSpongeWidth1600.link.c +1 -0
  31. data/ext/digest/kangarootwelve/keccak/armv7a/PlSnP-Fallback.inc +287 -0
  32. data/ext/digest/kangarootwelve/keccak/armv7a/ext.link.c +1 -0
  33. data/ext/digest/kangarootwelve/keccak/armv7m/KangarooTwelve.link.c +1 -0
  34. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakDuplexWidth1600.link.c +1 -0
  35. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-SnP.h +36 -0
  36. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-inplace-32bi-armv7m-le-gcc.s +1170 -0
  37. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times2-SnP.h +45 -0
  38. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times2-on1.c +37 -0
  39. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times4-SnP.h +45 -0
  40. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times4-on1.c +37 -0
  41. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times8-SnP.h +45 -0
  42. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times8-on1.c +37 -0
  43. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakSpongeWidth1600.link.c +1 -0
  44. data/ext/digest/kangarootwelve/keccak/armv7m/PlSnP-Fallback.inc +287 -0
  45. data/ext/digest/kangarootwelve/keccak/armv7m/ext.link.c +1 -0
  46. data/ext/digest/kangarootwelve/keccak/armv8a/KangarooTwelve.link.c +1 -0
  47. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakDuplexWidth1600.link.c +1 -0
  48. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-SnP.h +28 -0
  49. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-armv8a-neon.s +537 -0
  50. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times2-SnP.h +45 -0
  51. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times2-on1.c +37 -0
  52. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times4-SnP.h +45 -0
  53. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times4-on1.c +37 -0
  54. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times8-SnP.h +45 -0
  55. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times8-on1.c +37 -0
  56. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakSpongeWidth1600.link.c +1 -0
  57. data/ext/digest/kangarootwelve/keccak/armv8a/PlSnP-Fallback.inc +287 -0
  58. data/ext/digest/kangarootwelve/keccak/armv8a/ext.link.c +1 -0
  59. data/ext/digest/kangarootwelve/keccak/asmx86-64/KangarooTwelve.link.c +1 -0
  60. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakDuplexWidth1600.link.c +1 -0
  61. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-SnP.h +37 -0
  62. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times2-SnP.h +45 -0
  63. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times2-on1.c +37 -0
  64. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times4-SnP.h +45 -0
  65. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times4-on1.c +37 -0
  66. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times8-SnP.h +45 -0
  67. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times8-on1.c +37 -0
  68. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-x86-64-gas.s +1190 -0
  69. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakSpongeWidth1600.link.c +1 -0
  70. data/ext/digest/kangarootwelve/keccak/asmx86-64/PlSnP-Fallback.inc +287 -0
  71. data/ext/digest/kangarootwelve/keccak/asmx86-64/ext.link.c +1 -0
  72. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KangarooTwelve.link.c +1 -0
  73. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakDuplexWidth1600.link.c +1 -0
  74. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-SnP.h +37 -0
  75. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times2-SnP.h +45 -0
  76. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times2-on1.c +37 -0
  77. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times4-SnP.h +45 -0
  78. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times4-on1.c +37 -0
  79. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times8-SnP.h +45 -0
  80. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times8-on1.c +37 -0
  81. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-x86-64-shld-gas.s +1190 -0
  82. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakSpongeWidth1600.link.c +1 -0
  83. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/PlSnP-Fallback.inc +287 -0
  84. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/ext.link.c +1 -0
  85. data/ext/digest/kangarootwelve/keccak/avr8/KangarooTwelve.link.c +1 -0
  86. data/ext/digest/kangarootwelve/keccak/avr8/KeccakDuplexWidth1600.link.c +1 -0
  87. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-SnP.h +37 -0
  88. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-avr8-fast.s +1116 -0
  89. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times2-SnP.h +45 -0
  90. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times2-on1.c +37 -0
  91. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times4-SnP.h +45 -0
  92. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times4-on1.c +37 -0
  93. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times8-SnP.h +45 -0
  94. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times8-on1.c +37 -0
  95. data/ext/digest/kangarootwelve/keccak/avr8/KeccakSpongeWidth1600.link.c +1 -0
  96. data/ext/digest/kangarootwelve/keccak/avr8/PlSnP-Fallback.inc +287 -0
  97. data/ext/digest/kangarootwelve/keccak/avr8/ext.link.c +1 -0
  98. data/ext/digest/kangarootwelve/keccak/bulldozer/KangarooTwelve.link.c +1 -0
  99. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakDuplexWidth1600.link.c +1 -0
  100. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-SnP.h +39 -0
  101. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-XOP-config.h +6 -0
  102. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-XOP.c +473 -0
  103. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times2-SIMD128.c +954 -0
  104. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times2-SnP.h +47 -0
  105. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times4-SnP.h +45 -0
  106. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times4-on2.c +38 -0
  107. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times8-SnP.h +45 -0
  108. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times8-on2.c +38 -0
  109. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-unrolling.macros +302 -0
  110. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakSpongeWidth1600.link.c +1 -0
  111. data/ext/digest/kangarootwelve/keccak/bulldozer/PlSnP-Fallback.inc +287 -0
  112. data/ext/digest/kangarootwelve/keccak/bulldozer/SIMD128-config.h +9 -0
  113. data/ext/digest/kangarootwelve/{SnP-Relaned.h → keccak/bulldozer/SnP-Relaned.h} +13 -7
  114. data/ext/digest/kangarootwelve/keccak/bulldozer/ext.link.c +1 -0
  115. data/ext/digest/kangarootwelve/{KangarooTwelve.c → keccak/common/KangarooTwelve.c} +6 -10
  116. data/ext/digest/kangarootwelve/{KangarooTwelve.h → keccak/common/KangarooTwelve.h} +3 -7
  117. data/ext/digest/kangarootwelve/keccak/common/KeccakDuplex-common.h +37 -0
  118. data/ext/digest/kangarootwelve/keccak/common/KeccakDuplex.inc +192 -0
  119. data/ext/digest/kangarootwelve/keccak/common/KeccakDuplexWidth1600.c +34 -0
  120. data/ext/digest/kangarootwelve/keccak/common/KeccakDuplexWidth1600.h +25 -0
  121. data/ext/digest/kangarootwelve/{KeccakSponge-common.h → keccak/common/KeccakSponge-common.h} +5 -7
  122. data/ext/digest/kangarootwelve/{KeccakSponge.inc → keccak/common/KeccakSponge.inc} +6 -8
  123. data/ext/digest/kangarootwelve/{KeccakSpongeWidth1600.c → keccak/common/KeccakSpongeWidth1600.c} +6 -8
  124. data/ext/digest/kangarootwelve/{KeccakSpongeWidth1600.h → keccak/common/KeccakSpongeWidth1600.h} +5 -7
  125. data/ext/digest/kangarootwelve/{Phases.h → keccak/common/Phases.h} +3 -7
  126. data/ext/digest/kangarootwelve/{align.h → keccak/common/align.h} +5 -7
  127. data/ext/digest/kangarootwelve/{brg_endian.h → keccak/common/brg_endian.h} +0 -0
  128. data/ext/digest/kangarootwelve/keccak/compact/KangarooTwelve.link.c +1 -0
  129. data/ext/digest/kangarootwelve/keccak/compact/KeccakDuplexWidth1600.link.c +1 -0
  130. data/ext/digest/kangarootwelve/{KeccakP-1600-SnP.h → keccak/compact/KeccakP-1600-SnP.h} +7 -10
  131. data/ext/digest/kangarootwelve/{KeccakP-1600-compact64.c → keccak/compact/KeccakP-1600-compact64.c} +11 -7
  132. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times2-SnP.h +45 -0
  133. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times2-on1.c +37 -0
  134. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times4-SnP.h +45 -0
  135. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times4-on1.c +37 -0
  136. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times8-SnP.h +45 -0
  137. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times8-on1.c +37 -0
  138. data/ext/digest/kangarootwelve/keccak/compact/KeccakSpongeWidth1600.link.c +1 -0
  139. data/ext/digest/kangarootwelve/keccak/compact/PlSnP-Fallback.inc +287 -0
  140. data/ext/digest/kangarootwelve/keccak/compact/SnP-Relaned.h +140 -0
  141. data/ext/digest/kangarootwelve/keccak/compact/ext.link.c +1 -0
  142. data/ext/digest/kangarootwelve/keccak/generic32/KangarooTwelve.link.c +1 -0
  143. data/ext/digest/kangarootwelve/keccak/generic32/KeccakDuplexWidth1600.link.c +1 -0
  144. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-SnP.h +38 -0
  145. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-inplace32BI.c +1162 -0
  146. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times2-SnP.h +45 -0
  147. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times2-on1.c +37 -0
  148. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times4-SnP.h +45 -0
  149. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times4-on1.c +37 -0
  150. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times8-SnP.h +45 -0
  151. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times8-on1.c +37 -0
  152. data/ext/digest/kangarootwelve/keccak/generic32/KeccakSpongeWidth1600.link.c +1 -0
  153. data/ext/digest/kangarootwelve/keccak/generic32/PlSnP-Fallback.inc +287 -0
  154. data/ext/digest/kangarootwelve/keccak/generic32/SnP-Relaned.h +140 -0
  155. data/ext/digest/kangarootwelve/keccak/generic32/ext.link.c +1 -0
  156. data/ext/digest/kangarootwelve/keccak/generic32lc/KangarooTwelve.link.c +1 -0
  157. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakDuplexWidth1600.link.c +1 -0
  158. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-SnP.h +38 -0
  159. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-inplace32BI.c +1162 -0
  160. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times2-SnP.h +45 -0
  161. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times2-on1.c +37 -0
  162. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times4-SnP.h +45 -0
  163. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times4-on1.c +37 -0
  164. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times8-SnP.h +45 -0
  165. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times8-on1.c +37 -0
  166. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakSpongeWidth1600.link.c +1 -0
  167. data/ext/digest/kangarootwelve/keccak/generic32lc/PlSnP-Fallback.inc +287 -0
  168. data/ext/digest/kangarootwelve/keccak/generic32lc/SnP-Relaned.h +140 -0
  169. data/ext/digest/kangarootwelve/keccak/generic32lc/ext.link.c +1 -0
  170. data/ext/digest/kangarootwelve/keccak/generic64/KangarooTwelve.link.c +1 -0
  171. data/ext/digest/kangarootwelve/keccak/generic64/KeccakDuplexWidth1600.link.c +1 -0
  172. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-64.macros +2195 -0
  173. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-SnP.h +49 -0
  174. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-opt64-config.h +6 -0
  175. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-opt64.c +541 -0
  176. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times2-SnP.h +45 -0
  177. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times2-on1.c +37 -0
  178. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times4-SnP.h +45 -0
  179. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times4-on1.c +37 -0
  180. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times8-SnP.h +45 -0
  181. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times8-on1.c +37 -0
  182. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-unrolling.macros +302 -0
  183. data/ext/digest/kangarootwelve/keccak/generic64/KeccakSpongeWidth1600.link.c +1 -0
  184. data/ext/digest/kangarootwelve/keccak/generic64/PlSnP-Fallback.inc +287 -0
  185. data/ext/digest/kangarootwelve/keccak/generic64/SnP-Relaned.h +140 -0
  186. data/ext/digest/kangarootwelve/keccak/generic64/ext.link.c +1 -0
  187. data/ext/digest/kangarootwelve/keccak/generic64lc/KangarooTwelve.link.c +1 -0
  188. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakDuplexWidth1600.link.c +1 -0
  189. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-64.macros +2195 -0
  190. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-SnP.h +49 -0
  191. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-opt64-config.h +7 -0
  192. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-opt64.c +541 -0
  193. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times2-SnP.h +45 -0
  194. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times2-on1.c +37 -0
  195. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times4-SnP.h +45 -0
  196. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times4-on1.c +37 -0
  197. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times8-SnP.h +45 -0
  198. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times8-on1.c +37 -0
  199. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-unrolling.macros +302 -0
  200. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakSpongeWidth1600.link.c +1 -0
  201. data/ext/digest/kangarootwelve/keccak/generic64lc/PlSnP-Fallback.inc +287 -0
  202. data/ext/digest/kangarootwelve/keccak/generic64lc/SnP-Relaned.h +140 -0
  203. data/ext/digest/kangarootwelve/keccak/generic64lc/ext.link.c +1 -0
  204. data/ext/digest/kangarootwelve/keccak/haswell/KangarooTwelve.link.c +1 -0
  205. data/ext/digest/kangarootwelve/keccak/haswell/KeccakDuplexWidth1600.link.c +1 -0
  206. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-AVX2.s +993 -0
  207. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-SnP.h +41 -0
  208. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times2-SIMD128.c +954 -0
  209. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times2-SnP.h +47 -0
  210. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times4-SIMD256.c +1303 -0
  211. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times4-SnP.h +53 -0
  212. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times8-SnP.h +45 -0
  213. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times8-on4.c +38 -0
  214. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-unrolling.macros +302 -0
  215. data/ext/digest/kangarootwelve/keccak/haswell/KeccakSpongeWidth1600.link.c +1 -0
  216. data/ext/digest/kangarootwelve/keccak/haswell/PlSnP-Fallback.inc +287 -0
  217. data/ext/digest/kangarootwelve/keccak/haswell/SIMD128-config.h +8 -0
  218. data/ext/digest/kangarootwelve/keccak/haswell/SIMD256-config.h +7 -0
  219. data/ext/digest/kangarootwelve/keccak/haswell/ext.link.c +1 -0
  220. data/ext/digest/kangarootwelve/keccak/nehalem/KangarooTwelve.link.c +1 -0
  221. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakDuplexWidth1600.link.c +1 -0
  222. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-64.macros +2195 -0
  223. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-SnP.h +49 -0
  224. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-opt64-config.h +7 -0
  225. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-opt64.c +541 -0
  226. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times2-SIMD128.c +954 -0
  227. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times2-SnP.h +47 -0
  228. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times4-SnP.h +45 -0
  229. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times4-on2.c +38 -0
  230. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times8-SnP.h +45 -0
  231. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times8-on2.c +38 -0
  232. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-unrolling.macros +302 -0
  233. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakSpongeWidth1600.link.c +1 -0
  234. data/ext/digest/kangarootwelve/keccak/nehalem/PlSnP-Fallback.inc +287 -0
  235. data/ext/digest/kangarootwelve/keccak/nehalem/SIMD128-config.h +8 -0
  236. data/ext/digest/kangarootwelve/keccak/nehalem/SnP-Relaned.h +140 -0
  237. data/ext/digest/kangarootwelve/keccak/nehalem/ext.link.c +1 -0
  238. data/ext/digest/kangarootwelve/keccak/reference/KangarooTwelve.link.c +1 -0
  239. data/ext/digest/kangarootwelve/keccak/reference/KeccakDuplexWidth1600.link.c +1 -0
  240. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-SnP.h +41 -0
  241. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-reference.c +424 -0
  242. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-reference.h +20 -0
  243. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times2-SnP.h +45 -0
  244. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times2-on1.c +37 -0
  245. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times4-SnP.h +45 -0
  246. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times4-on1.c +37 -0
  247. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times8-SnP.h +45 -0
  248. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times8-on1.c +37 -0
  249. data/ext/digest/kangarootwelve/keccak/reference/KeccakSpongeWidth1600.link.c +1 -0
  250. data/ext/digest/kangarootwelve/keccak/reference/PlSnP-Fallback.inc +287 -0
  251. data/ext/digest/kangarootwelve/keccak/reference/displayIntermediateValues.c +176 -0
  252. data/ext/digest/kangarootwelve/keccak/reference/displayIntermediateValues.h +29 -0
  253. data/ext/digest/kangarootwelve/keccak/reference/ext.link.c +1 -0
  254. data/ext/digest/kangarootwelve/keccak/reference32bits/KangarooTwelve.link.c +1 -0
  255. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakDuplexWidth1600.link.c +1 -0
  256. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-SnP.h +41 -0
  257. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-reference.h +20 -0
  258. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-reference32BI.c +612 -0
  259. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times2-SnP.h +45 -0
  260. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times2-on1.c +37 -0
  261. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times4-SnP.h +45 -0
  262. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times4-on1.c +37 -0
  263. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times8-SnP.h +45 -0
  264. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times8-on1.c +37 -0
  265. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakSpongeWidth1600.link.c +1 -0
  266. data/ext/digest/kangarootwelve/keccak/reference32bits/PlSnP-Fallback.inc +287 -0
  267. data/ext/digest/kangarootwelve/keccak/reference32bits/displayIntermediateValues.c +176 -0
  268. data/ext/digest/kangarootwelve/keccak/reference32bits/displayIntermediateValues.h +29 -0
  269. data/ext/digest/kangarootwelve/keccak/reference32bits/ext.link.c +1 -0
  270. data/ext/digest/kangarootwelve/keccak/sandybridge/KangarooTwelve.link.c +1 -0
  271. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakDuplexWidth1600.link.c +1 -0
  272. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-64.macros +2195 -0
  273. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-SnP.h +49 -0
  274. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-opt64-config.h +8 -0
  275. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-opt64.c +541 -0
  276. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times2-SIMD128.c +954 -0
  277. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times2-SnP.h +47 -0
  278. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times4-SnP.h +45 -0
  279. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times4-on2.c +38 -0
  280. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times8-SnP.h +45 -0
  281. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times8-on2.c +38 -0
  282. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-unrolling.macros +302 -0
  283. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakSpongeWidth1600.link.c +1 -0
  284. data/ext/digest/kangarootwelve/keccak/sandybridge/PlSnP-Fallback.inc +287 -0
  285. data/ext/digest/kangarootwelve/keccak/sandybridge/SIMD128-config.h +8 -0
  286. data/ext/digest/kangarootwelve/keccak/sandybridge/SnP-Relaned.h +140 -0
  287. data/ext/digest/kangarootwelve/keccak/sandybridge/ext.link.c +1 -0
  288. data/ext/digest/kangarootwelve/keccak/skylakex/KangarooTwelve.link.c +1 -0
  289. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakDuplexWidth1600.link.c +1 -0
  290. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-AVX512-config.h +6 -0
  291. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-AVX512.c +621 -0
  292. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-SnP.h +42 -0
  293. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times2-SIMD512.c +852 -0
  294. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times2-SnP.h +49 -0
  295. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times4-SIMD512.c +883 -0
  296. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times4-SnP.h +49 -0
  297. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times8-SIMD512.c +1473 -0
  298. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times8-SnP.h +53 -0
  299. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakSpongeWidth1600.link.c +1 -0
  300. data/ext/digest/kangarootwelve/keccak/skylakex/SIMD512-2-config.h +7 -0
  301. data/ext/digest/kangarootwelve/keccak/skylakex/SIMD512-4-config.h +7 -0
  302. data/ext/digest/kangarootwelve/keccak/skylakex/SIMD512-config.h +7 -0
  303. data/ext/digest/kangarootwelve/keccak/skylakex/ext.link.c +1 -0
  304. data/ext/digest/kangarootwelve/utils.h +101 -0
  305. data/lib/digest/kangarootwelve/version.rb +2 -2
  306. data/test/test.rb +68 -31
  307. metadata +305 -27
@@ -0,0 +1,49 @@
1
+ /*
2
+ Implementation by Ronny Van Keer, hereby denoted as "the implementer".
3
+
4
+ For more information, feedback or questions, please refer to our website:
5
+ https://keccak.team/
6
+
7
+ To the extent possible under law, the implementer has waived all copyright
8
+ and related or neighboring rights to the source code in this file.
9
+ http://creativecommons.org/publicdomain/zero/1.0/
10
+
11
+ ---
12
+
13
+ Please refer to PlSnP-documentation.h for more details.
14
+ */
15
+
16
+ #ifndef _KeccakP_1600_times2_SnP_h_
17
+ #define _KeccakP_1600_times2_SnP_h_
18
+
19
+ #include "SIMD512-2-config.h"
20
+
21
+ #define KeccakP1600times2_implementation "512-bit SIMD implementation (" KeccakP1600times2_implementation_config ")"
22
+ #define KeccakP1600times2_statesSizeInBytes 400
23
+ #define KeccakP1600times2_statesAlignment 64
24
+ #define KeccakF1600times2_FastLoop_supported
25
+ #define KeccakP1600times2_12rounds_FastLoop_supported
26
+
27
+ #include <stddef.h>
28
+
29
+ #define KeccakP1600times2_StaticInitialize()
30
+ void KeccakP1600times2_InitializeAll(void *states);
31
+ #define KeccakP1600times2_AddByte(states, instanceIndex, byte, offset) \
32
+ ((unsigned char*)(states))[(instanceIndex)*8 + ((offset)/8)*2*8 + (offset)%8] ^= (byte)
33
+ void KeccakP1600times2_AddBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length);
34
+ void KeccakP1600times2_AddLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset);
35
+ void KeccakP1600times2_OverwriteBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length);
36
+ void KeccakP1600times2_OverwriteLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset);
37
+ void KeccakP1600times2_OverwriteWithZeroes(void *states, unsigned int instanceIndex, unsigned int byteCount);
38
+ void KeccakP1600times2_PermuteAll_4rounds(void *states);
39
+ void KeccakP1600times2_PermuteAll_6rounds(void *states);
40
+ void KeccakP1600times2_PermuteAll_12rounds(void *states);
41
+ void KeccakP1600times2_PermuteAll_24rounds(void *states);
42
+ void KeccakP1600times2_ExtractBytes(const void *states, unsigned int instanceIndex, unsigned char *data, unsigned int offset, unsigned int length);
43
+ void KeccakP1600times2_ExtractLanesAll(const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset);
44
+ void KeccakP1600times2_ExtractAndAddBytes(const void *states, unsigned int instanceIndex, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length);
45
+ void KeccakP1600times2_ExtractAndAddLanesAll(const void *states, const unsigned char *input, unsigned char *output, unsigned int laneCount, unsigned int laneOffset);
46
+ size_t KeccakF1600times2_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen);
47
+ size_t KeccakP1600times2_12rounds_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen);
48
+
49
+ #endif
@@ -0,0 +1,883 @@
1
+ /*
2
+ Implementation by Ronny Van Keer, hereby denoted as "the implementer".
3
+
4
+ For more information, feedback or questions, please refer to our website:
5
+ https://keccak.team/
6
+
7
+ To the extent possible under law, the implementer has waived all copyright
8
+ and related or neighboring rights to the source code in this file.
9
+ http://creativecommons.org/publicdomain/zero/1.0/
10
+
11
+ ---
12
+
13
+ This file implements Keccak-p[1600]×4 in a PlSnP-compatible way.
14
+ Please refer to PlSnP-documentation.h for more details.
15
+
16
+ This implementation comes with KeccakP-1600-times4-SnP.h in the same folder.
17
+ Please refer to LowLevel.build for the exact list of other files it must be combined with.
18
+ */
19
+
20
+ #include <stdio.h>
21
+ #include <stdlib.h>
22
+ #include <string.h>
23
+ #include <stdint.h>
24
+ #include <smmintrin.h>
25
+ #include <wmmintrin.h>
26
+ #include <immintrin.h>
27
+ #include <emmintrin.h>
28
+ #include "align.h"
29
+ #include "KeccakP-1600-times4-SnP.h"
30
+ #include "SIMD512-4-config.h"
31
+
32
+ #include "brg_endian.h"
33
+ #if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN)
34
+ #error Expecting a little-endian platform
35
+ #endif
36
+
37
+ /* Comment the define hereunder when compiling for a CPU with AVX-512 SIMD */
38
+ /*
39
+ * Warning: This code has only been tested on Haswell (AVX2) with SIMULATE_AVX512 defined,
40
+ * errors will occur if we did a bad interpretation of the AVX-512 intrinsics'
41
+ * API or functionality.
42
+ */
43
+ /* #define SIMULATE_AVX512 */
44
+
45
+ typedef uint8_t UINT8;
46
+ typedef uint32_t UINT32;
47
+ typedef uint64_t UINT64;
48
+
49
+ #if defined(SIMULATE_AVX512)
50
+
51
+ typedef struct
52
+ {
53
+ UINT64 x[8];
54
+ } __m512i;
55
+
56
+ static __m512i _mm512_xor_si512( __m512i a, __m512i b)
57
+ {
58
+ __m512i r;
59
+ unsigned int i;
60
+
61
+ for ( i = 0; i < 8; ++i )
62
+ r.x[i] = a.x[i] ^ b.x[i];
63
+ return(r);
64
+ }
65
+
66
+ static __m256i _mm256_ternarylogic_epi64(__m256i a, __m256i b, __m256i c, int imm)
67
+ {
68
+
69
+ if (imm == 0x96)
70
+ return _mm256_xor_si256( _mm256_xor_si256( a, b ), c );
71
+ if (imm == 0xD2)
72
+ return _mm256_xor_si256( a, _mm256_andnot_si256(b, c) );
73
+ printf( "_mm256_ternarylogic_epi64( a, b, c, %02X) not implemented!\n", imm );
74
+ exit(1);
75
+ }
76
+
77
+ static __m256i _mm256_rol_epi64(__m256i a, int offset)
78
+ {
79
+ return _mm256_or_si256(_mm256_slli_epi64(a, offset), _mm256_srli_epi64(a, 64-offset));
80
+ }
81
+
82
+ static __m512i _mm512_i32gather_epi64(__m256i idx, const void *p, int scale)
83
+ {
84
+ __m512i r;
85
+ unsigned int i;
86
+ UINT32 offset[8];
87
+
88
+ _mm256_store_si256( (__m256i*)offset, idx );
89
+ for ( i = 0; i < 8; ++i )
90
+ r.x[i] = *(const UINT64*)((const char*)p + offset[i] * scale);
91
+ return(r);
92
+ }
93
+
94
+ static void _mm256_i32scatter_epi64( void *p, __m128i idx, __m256i value, int scale)
95
+ {
96
+ unsigned int i;
97
+ UINT64 v[4];
98
+ UINT32 offset[4];
99
+
100
+ _mm_store_ps( (float*)offset, (__m128)idx );
101
+ _mm256_store_si256( (__m256i*)v, value );
102
+ for ( i = 0; i < 4; ++i )
103
+ *(UINT64*)((char*)p + offset[i] * scale) = v[i];
104
+ }
105
+
106
+ static void _mm512_i32scatter_epi64( void *p, __m256i idx, __m512i value, int scale)
107
+ {
108
+ unsigned int i;
109
+ UINT32 offset[8];
110
+
111
+ _mm256_store_si256( (__m256i*)offset, idx );
112
+ for ( i = 0; i < 8; ++i )
113
+ *(UINT64*)((char*)p + offset[i] * scale) = value.x[i];
114
+ }
115
+
116
+ #endif
117
+
118
+ typedef __m128i V128;
119
+ typedef __m256i V256;
120
+ typedef __m512i V512;
121
+
122
+ #if defined(KeccakP1600times4_useAVX512)
123
+
124
+ #define XOR(a,b) _mm256_xor_si256(a,b)
125
+ #define XOR3(a,b,c) _mm256_ternarylogic_epi64(a,b,c,0x96)
126
+ #define XOR5(a,b,c,d,e) XOR3(XOR3(a,b,c),d,e)
127
+ #define XOR512(a,b) _mm512_xor_si512(a,b)
128
+ #define ROL(a,offset) _mm256_rol_epi64(a,offset)
129
+ #define Chi(a,b,c) _mm256_ternarylogic_epi64(a,b,c,0xD2)
130
+
131
+ #define CONST256_64(a) (V256)_mm256_broadcast_sd((const double*)(&a))
132
+ #define LOAD4_32(a,b,c,d) _mm_set_epi32((UINT64)(a), (UINT32)(b), (UINT32)(c), (UINT32)(d))
133
+ #define LOAD8_32(a,b,c,d,e,f,g,h) _mm256_set_epi32((UINT64)(a), (UINT32)(b), (UINT32)(c), (UINT32)(d), (UINT32)(e), (UINT32)(f), (UINT32)(g), (UINT32)(h))
134
+ #define LOAD_GATHER4_64(idx,p) _mm256_i32gather_epi64( (const void*)(p), idx, 8)
135
+ #define LOAD_GATHER8_64(idx,p) _mm512_i32gather_epi64( idx, (const void*)(p), 8)
136
+ #define STORE_SCATTER4_64(p,idx, v) _mm256_i32scatter_epi64( (void*)(p), idx, v, 8)
137
+ #define STORE_SCATTER8_64(p,idx, v) _mm512_i32scatter_epi64( (void*)(p), idx, v, 8)
138
+
139
+ #endif
140
+
141
+ #define laneIndex(instanceIndex, lanePosition) ((lanePosition)*4 + instanceIndex)
142
+ #define SnP_laneLengthInBytes 8
143
+
144
+ void KeccakP1600times4_InitializeAll(void *states)
145
+ {
146
+ memset(states, 0, KeccakP1600times4_statesSizeInBytes);
147
+ }
148
+
149
+ void KeccakP1600times4_AddBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length)
150
+ {
151
+ unsigned int sizeLeft = length;
152
+ unsigned int lanePosition = offset/SnP_laneLengthInBytes;
153
+ unsigned int offsetInLane = offset%SnP_laneLengthInBytes;
154
+ const unsigned char *curData = data;
155
+ UINT64 *statesAsLanes = states;
156
+
157
+ if ((sizeLeft > 0) && (offsetInLane != 0)) {
158
+ unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
159
+ UINT64 lane = 0;
160
+ if (bytesInLane > sizeLeft)
161
+ bytesInLane = sizeLeft;
162
+ memcpy((unsigned char*)&lane + offsetInLane, curData, bytesInLane);
163
+ statesAsLanes[laneIndex(instanceIndex, lanePosition)] ^= lane;
164
+ sizeLeft -= bytesInLane;
165
+ lanePosition++;
166
+ curData += bytesInLane;
167
+ }
168
+
169
+ while(sizeLeft >= SnP_laneLengthInBytes) {
170
+ UINT64 lane = *((const UINT64*)curData);
171
+ statesAsLanes[laneIndex(instanceIndex, lanePosition)] ^= lane;
172
+ sizeLeft -= SnP_laneLengthInBytes;
173
+ lanePosition++;
174
+ curData += SnP_laneLengthInBytes;
175
+ }
176
+
177
+ if (sizeLeft > 0) {
178
+ UINT64 lane = 0;
179
+ memcpy(&lane, curData, sizeLeft);
180
+ statesAsLanes[laneIndex(instanceIndex, lanePosition)] ^= lane;
181
+ }
182
+ }
183
+
184
+ void KeccakP1600times4_AddLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
185
+ {
186
+ V256 *stateAsLanes256 = states;
187
+ V512 *stateAsLanes512 = states;
188
+ const UINT64 *dataAsLanes = (const UINT64 *)data;
189
+ unsigned int i;
190
+ V256 index512;
191
+ V128 index256;
192
+
193
+ #define Add_In1( argIndex ) stateAsLanes256[argIndex] = XOR(stateAsLanes256[argIndex], LOAD_GATHER4_64(index256, dataAsLanes+argIndex))
194
+ #define Add_In2( argIndex ) stateAsLanes512[argIndex/2] = XOR512(stateAsLanes512[argIndex/2], LOAD_GATHER8_64(index512, dataAsLanes+argIndex))
195
+ index256 = LOAD4_32(3*laneOffset, 2*laneOffset, 1*laneOffset, 0*laneOffset);
196
+ index512 = LOAD8_32(3*laneOffset+1, 2*laneOffset+1, 1*laneOffset+1, 0*laneOffset+1, 3*laneOffset, 2*laneOffset, 1*laneOffset, 0*laneOffset);
197
+ if ( laneCount >= 16 ) {
198
+ Add_In2( 0 );
199
+ Add_In2( 2 );
200
+ Add_In2( 4 );
201
+ Add_In2( 6 );
202
+ Add_In2( 8 );
203
+ Add_In2( 10 );
204
+ Add_In2( 12 );
205
+ Add_In2( 14 );
206
+ if ( laneCount >= 20 ) {
207
+ Add_In2( 16 );
208
+ Add_In2( 18 );
209
+ for(i=20; i<laneCount; i++)
210
+ Add_In1( i );
211
+ }
212
+ else {
213
+ for(i=16; i<laneCount; i++)
214
+ Add_In1( i );
215
+ }
216
+ }
217
+ else {
218
+ for(i=0; i<laneCount; i++)
219
+ Add_In1( i );
220
+ }
221
+ #undef Add_In1
222
+ #undef Add_In2
223
+ }
224
+
225
+ void KeccakP1600times4_OverwriteBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length)
226
+ {
227
+ unsigned int sizeLeft = length;
228
+ unsigned int lanePosition = offset/SnP_laneLengthInBytes;
229
+ unsigned int offsetInLane = offset%SnP_laneLengthInBytes;
230
+ const unsigned char *curData = data;
231
+ UINT64 *statesAsLanes = states;
232
+
233
+ if ((sizeLeft > 0) && (offsetInLane != 0)) {
234
+ unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
235
+ if (bytesInLane > sizeLeft)
236
+ bytesInLane = sizeLeft;
237
+ memcpy( ((unsigned char *)&statesAsLanes[laneIndex(instanceIndex, lanePosition)]) + offsetInLane, curData, bytesInLane);
238
+ sizeLeft -= bytesInLane;
239
+ lanePosition++;
240
+ curData += bytesInLane;
241
+ }
242
+
243
+ while(sizeLeft >= SnP_laneLengthInBytes) {
244
+ UINT64 lane = *((const UINT64*)curData);
245
+ statesAsLanes[laneIndex(instanceIndex, lanePosition)] = lane;
246
+ sizeLeft -= SnP_laneLengthInBytes;
247
+ lanePosition++;
248
+ curData += SnP_laneLengthInBytes;
249
+ }
250
+
251
+ if (sizeLeft > 0) {
252
+ memcpy(&statesAsLanes[laneIndex(instanceIndex, lanePosition)], curData, sizeLeft);
253
+ }
254
+ }
255
+
256
+ void KeccakP1600times4_OverwriteLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
257
+ {
258
+ V256 *stateAsLanes256 = states;
259
+ V512 *stateAsLanes512 = states;
260
+ const UINT64 *dataAsLanes = (const UINT64 *)data;
261
+ unsigned int i;
262
+ V256 index512;
263
+ V128 index256;
264
+
265
+ #define OverWr1( argIndex ) stateAsLanes256[argIndex] = LOAD_GATHER4_64(index256, dataAsLanes+argIndex)
266
+ #define OverWr2( argIndex ) stateAsLanes512[argIndex/2] = LOAD_GATHER8_64(index512, dataAsLanes+argIndex)
267
+ index256 = LOAD4_32(3*laneOffset, 2*laneOffset, 1*laneOffset, 0*laneOffset);
268
+ index512 = LOAD8_32(3*laneOffset+1, 2*laneOffset+1, 1*laneOffset+1, 0*laneOffset+1, 3*laneOffset, 2*laneOffset, 1*laneOffset, 0*laneOffset);
269
+ if ( laneCount >= 16 ) {
270
+ OverWr2( 0 );
271
+ OverWr2( 2 );
272
+ OverWr2( 4 );
273
+ OverWr2( 6 );
274
+ OverWr2( 8 );
275
+ OverWr2( 10 );
276
+ OverWr2( 12 );
277
+ OverWr2( 14 );
278
+ if ( laneCount >= 20 ) {
279
+ OverWr2( 16 );
280
+ OverWr2( 18 );
281
+ for(i=20; i<laneCount; i++)
282
+ OverWr1( i );
283
+ }
284
+ else {
285
+ for(i=16; i<laneCount; i++)
286
+ OverWr1( i );
287
+ }
288
+ }
289
+ else {
290
+ for(i=0; i<laneCount; i++)
291
+ OverWr1( i );
292
+ }
293
+ #undef OverWr1
294
+ #undef OverWr2
295
+ }
296
+
297
+ void KeccakP1600times4_OverwriteWithZeroes(void *states, unsigned int instanceIndex, unsigned int byteCount)
298
+ {
299
+ unsigned int sizeLeft = byteCount;
300
+ unsigned int lanePosition = 0;
301
+ UINT64 *statesAsLanes = states;
302
+
303
+ while(sizeLeft >= SnP_laneLengthInBytes) {
304
+ statesAsLanes[laneIndex(instanceIndex, lanePosition)] = 0;
305
+ sizeLeft -= SnP_laneLengthInBytes;
306
+ lanePosition++;
307
+ }
308
+
309
+ if (sizeLeft > 0) {
310
+ memset(&statesAsLanes[laneIndex(instanceIndex, lanePosition)], 0, sizeLeft);
311
+ }
312
+ }
313
+
314
+ void KeccakP1600times4_ExtractBytes(const void *states, unsigned int instanceIndex, unsigned char *data, unsigned int offset, unsigned int length)
315
+ {
316
+ unsigned int sizeLeft = length;
317
+ unsigned int lanePosition = offset/SnP_laneLengthInBytes;
318
+ unsigned int offsetInLane = offset%SnP_laneLengthInBytes;
319
+ unsigned char *curData = data;
320
+ const UINT64 *statesAsLanes = states;
321
+
322
+ if ((sizeLeft > 0) && (offsetInLane != 0)) {
323
+ unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
324
+ if (bytesInLane > sizeLeft)
325
+ bytesInLane = sizeLeft;
326
+ memcpy( curData, ((unsigned char *)&statesAsLanes[laneIndex(instanceIndex, lanePosition)]) + offsetInLane, bytesInLane);
327
+ sizeLeft -= bytesInLane;
328
+ lanePosition++;
329
+ curData += bytesInLane;
330
+ }
331
+
332
+ while(sizeLeft >= SnP_laneLengthInBytes) {
333
+ *(UINT64*)curData = statesAsLanes[laneIndex(instanceIndex, lanePosition)];
334
+ sizeLeft -= SnP_laneLengthInBytes;
335
+ lanePosition++;
336
+ curData += SnP_laneLengthInBytes;
337
+ }
338
+
339
+ if (sizeLeft > 0) {
340
+ memcpy( curData, &statesAsLanes[laneIndex(instanceIndex, lanePosition)], sizeLeft);
341
+ }
342
+ }
343
+
344
+ void KeccakP1600times4_ExtractLanesAll(const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
345
+ {
346
+ const V256 *stateAsLanes256 = states;
347
+ const V512 *stateAsLanes512 = states;
348
+ UINT64 *dataAsLanes = (UINT64 *)data;
349
+ unsigned int i;
350
+ V256 index512;
351
+ V128 index256;
352
+
353
+ #define Extr1( argIndex ) STORE_SCATTER4_64(dataAsLanes+argIndex, index256, stateAsLanes256[argIndex])
354
+ #define Extr2( argIndex ) STORE_SCATTER8_64(dataAsLanes+argIndex, index512, stateAsLanes512[argIndex/2])
355
+ index256 = LOAD4_32(3*laneOffset, 2*laneOffset, 1*laneOffset, 0*laneOffset);
356
+ index512 = LOAD8_32(3*laneOffset+1, 2*laneOffset+1, 1*laneOffset+1, 0*laneOffset+1, 3*laneOffset, 2*laneOffset, 1*laneOffset, 0*laneOffset);
357
+ if ( laneCount >= 16 ) {
358
+ Extr2( 0 );
359
+ Extr2( 2 );
360
+ Extr2( 4 );
361
+ Extr2( 6 );
362
+ Extr2( 8 );
363
+ Extr2( 10 );
364
+ Extr2( 12 );
365
+ Extr2( 14 );
366
+ if ( laneCount >= 20 ) {
367
+ Extr2( 16 );
368
+ Extr2( 18 );
369
+ for(i=20; i<laneCount; i++)
370
+ Extr1( i );
371
+ }
372
+ else {
373
+ for(i=16; i<laneCount; i++)
374
+ Extr1( i );
375
+ }
376
+ }
377
+ else {
378
+ for(i=0; i<laneCount; i++)
379
+ Extr1( i );
380
+ }
381
+ #undef Extr1
382
+ #undef Extr2
383
+ }
384
+
385
+ void KeccakP1600times4_ExtractAndAddBytes(const void *states, unsigned int instanceIndex, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
386
+ {
387
+ unsigned int sizeLeft = length;
388
+ unsigned int lanePosition = offset/SnP_laneLengthInBytes;
389
+ unsigned int offsetInLane = offset%SnP_laneLengthInBytes;
390
+ const unsigned char *curInput = input;
391
+ unsigned char *curOutput = output;
392
+ const UINT64 *statesAsLanes = states;
393
+
394
+ if ((sizeLeft > 0) && (offsetInLane != 0)) {
395
+ unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
396
+ UINT64 lane = statesAsLanes[laneIndex(instanceIndex, lanePosition)] >> (8 * offsetInLane);
397
+ if (bytesInLane > sizeLeft)
398
+ bytesInLane = sizeLeft;
399
+ sizeLeft -= bytesInLane;
400
+ do {
401
+ *(curOutput++) = *(curInput++) ^ (unsigned char)lane;
402
+ lane >>= 8;
403
+ } while ( --bytesInLane != 0);
404
+ lanePosition++;
405
+ }
406
+
407
+ while(sizeLeft >= SnP_laneLengthInBytes) {
408
+ *((UINT64*)curOutput) = *((UINT64*)curInput) ^ statesAsLanes[laneIndex(instanceIndex, lanePosition)];
409
+ sizeLeft -= SnP_laneLengthInBytes;
410
+ lanePosition++;
411
+ curInput += SnP_laneLengthInBytes;
412
+ curOutput += SnP_laneLengthInBytes;
413
+ }
414
+
415
+ if (sizeLeft != 0) {
416
+ UINT64 lane = statesAsLanes[laneIndex(instanceIndex, lanePosition)];
417
+ do {
418
+ *(curOutput++) = *(curInput++) ^ (unsigned char)lane;
419
+ lane >>= 8;
420
+ } while ( --sizeLeft != 0);
421
+ }
422
+ }
423
+
424
+ void KeccakP1600times4_ExtractAndAddLanesAll(const void *states, const unsigned char *input, unsigned char *output, unsigned int laneCount, unsigned int laneOffset)
425
+ {
426
+ const V256 *stateAsLanes256 = states;
427
+ const V512 *stateAsLanes512 = states;
428
+ const UINT64 *inAsLanes = (const UINT64 *)input;
429
+ UINT64 *outAsLanes = (UINT64 *)output;
430
+ unsigned int i;
431
+ V256 index512;
432
+ V128 index256;
433
+
434
+ #define ExtrAdd1( argIndex ) STORE_SCATTER4_64(outAsLanes+argIndex, index256, XOR(stateAsLanes256[argIndex], LOAD_GATHER4_64(index256, inAsLanes+argIndex)))
435
+ #define ExtrAdd2( argIndex ) STORE_SCATTER8_64(outAsLanes+argIndex, index512, XOR512(stateAsLanes512[argIndex/2], LOAD_GATHER8_64(index512, inAsLanes+argIndex)))
436
+ index256 = LOAD4_32(3*laneOffset, 2*laneOffset, 1*laneOffset, 0*laneOffset);
437
+ index512 = LOAD8_32(3*laneOffset+1, 2*laneOffset+1, 1*laneOffset+1, 0*laneOffset+1, 3*laneOffset, 2*laneOffset, 1*laneOffset, 0*laneOffset);
438
+
439
+ if ( laneCount >= 16 ) {
440
+ ExtrAdd2( 0 );
441
+ ExtrAdd2( 2 );
442
+ ExtrAdd2( 4 );
443
+ ExtrAdd2( 6 );
444
+ ExtrAdd2( 8 );
445
+ ExtrAdd2( 10 );
446
+ ExtrAdd2( 12 );
447
+ ExtrAdd2( 14 );
448
+ if ( laneCount >= 20 ) {
449
+ ExtrAdd2( 16 );
450
+ ExtrAdd2( 18 );
451
+ for(i=20; i<laneCount; i++)
452
+ ExtrAdd1( i );
453
+ }
454
+ else {
455
+ for(i=16; i<laneCount; i++)
456
+ ExtrAdd1( i );
457
+ }
458
+ }
459
+ else {
460
+ for(i=0; i<laneCount; i++)
461
+ ExtrAdd1( i );
462
+ }
463
+ #undef ExtrAdd1
464
+ #undef ExtrAdd2
465
+
466
+ }
467
+
468
+ static ALIGN(KeccakP1600times4_statesAlignment) const UINT64 KeccakP1600RoundConstants[24] = {
469
+ 0x0000000000000001ULL,
470
+ 0x0000000000008082ULL,
471
+ 0x800000000000808aULL,
472
+ 0x8000000080008000ULL,
473
+ 0x000000000000808bULL,
474
+ 0x0000000080000001ULL,
475
+ 0x8000000080008081ULL,
476
+ 0x8000000000008009ULL,
477
+ 0x000000000000008aULL,
478
+ 0x0000000000000088ULL,
479
+ 0x0000000080008009ULL,
480
+ 0x000000008000000aULL,
481
+ 0x000000008000808bULL,
482
+ 0x800000000000008bULL,
483
+ 0x8000000000008089ULL,
484
+ 0x8000000000008003ULL,
485
+ 0x8000000000008002ULL,
486
+ 0x8000000000000080ULL,
487
+ 0x000000000000800aULL,
488
+ 0x800000008000000aULL,
489
+ 0x8000000080008081ULL,
490
+ 0x8000000000008080ULL,
491
+ 0x0000000080000001ULL,
492
+ 0x8000000080008008ULL};
493
+
494
+ #define KeccakP_DeclareVars \
495
+ V256 _Ba, _Be, _Bi, _Bo, _Bu; \
496
+ V256 _Da, _De, _Di, _Do, _Du; \
497
+ V256 _ba, _be, _bi, _bo, _bu; \
498
+ V256 _ga, _ge, _gi, _go, _gu; \
499
+ V256 _ka, _ke, _ki, _ko, _ku; \
500
+ V256 _ma, _me, _mi, _mo, _mu; \
501
+ V256 _sa, _se, _si, _so, _su
502
+
503
+ #define KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Bb1, _Bb2, _Bb3, _Bb4, _Bb5, _Rr1, _Rr2, _Rr3, _Rr4, _Rr5 ) \
504
+ _Bb1 = XOR(_L1, _Da); \
505
+ _Bb2 = XOR(_L2, _De); \
506
+ _Bb3 = XOR(_L3, _Di); \
507
+ _Bb4 = XOR(_L4, _Do); \
508
+ _Bb5 = XOR(_L5, _Du); \
509
+ if (_Rr1 != 0) _Bb1 = ROL(_Bb1, _Rr1); \
510
+ _Bb2 = ROL(_Bb2, _Rr2); \
511
+ _Bb3 = ROL(_Bb3, _Rr3); \
512
+ _Bb4 = ROL(_Bb4, _Rr4); \
513
+ _Bb5 = ROL(_Bb5, _Rr5); \
514
+ _L1 = Chi( _Ba, _Be, _Bi); \
515
+ _L2 = Chi( _Be, _Bi, _Bo); \
516
+ _L3 = Chi( _Bi, _Bo, _Bu); \
517
+ _L4 = Chi( _Bo, _Bu, _Ba); \
518
+ _L5 = Chi( _Bu, _Ba, _Be);
519
+
520
+ #define KeccakP_ThetaRhoPiChiIota0( _L1, _L2, _L3, _L4, _L5, _rc ) \
521
+ _Ba = XOR5( _ba, _ga, _ka, _ma, _sa ); /* Theta effect */ \
522
+ _Be = XOR5( _be, _ge, _ke, _me, _se ); \
523
+ _Bi = XOR5( _bi, _gi, _ki, _mi, _si ); \
524
+ _Bo = XOR5( _bo, _go, _ko, _mo, _so ); \
525
+ _Bu = XOR5( _bu, _gu, _ku, _mu, _su ); \
526
+ _Da = ROL( _Be, 1 ); \
527
+ _De = ROL( _Bi, 1 ); \
528
+ _Di = ROL( _Bo, 1 ); \
529
+ _Do = ROL( _Bu, 1 ); \
530
+ _Du = ROL( _Ba, 1 ); \
531
+ _Da = XOR( _Da, _Bu ); \
532
+ _De = XOR( _De, _Ba ); \
533
+ _Di = XOR( _Di, _Be ); \
534
+ _Do = XOR( _Do, _Bi ); \
535
+ _Du = XOR( _Du, _Bo ); \
536
+ KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Ba, _Be, _Bi, _Bo, _Bu, 0, 44, 43, 21, 14 ); \
537
+ _L1 = XOR(_L1, _rc) /* Iota */
538
+
539
+ #define KeccakP_ThetaRhoPiChi1( _L1, _L2, _L3, _L4, _L5 ) \
540
+ KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Bi, _Bo, _Bu, _Ba, _Be, 3, 45, 61, 28, 20 )
541
+
542
+ #define KeccakP_ThetaRhoPiChi2( _L1, _L2, _L3, _L4, _L5 ) \
543
+ KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Bu, _Ba, _Be, _Bi, _Bo, 18, 1, 6, 25, 8 )
544
+
545
+ #define KeccakP_ThetaRhoPiChi3( _L1, _L2, _L3, _L4, _L5 ) \
546
+ KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Be, _Bi, _Bo, _Bu, _Ba, 36, 10, 15, 56, 27 )
547
+
548
+ #define KeccakP_ThetaRhoPiChi4( _L1, _L2, _L3, _L4, _L5 ) \
549
+ KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Bo, _Bu, _Ba, _Be, _Bi, 41, 2, 62, 55, 39 )
550
+
551
+ #define KeccakP_4rounds( i ) \
552
+ KeccakP_ThetaRhoPiChiIota0(_ba, _ge, _ki, _mo, _su, CONST256_64(KeccakP1600RoundConstants[i]) ); \
553
+ KeccakP_ThetaRhoPiChi1( _ka, _me, _si, _bo, _gu ); \
554
+ KeccakP_ThetaRhoPiChi2( _sa, _be, _gi, _ko, _mu ); \
555
+ KeccakP_ThetaRhoPiChi3( _ga, _ke, _mi, _so, _bu ); \
556
+ KeccakP_ThetaRhoPiChi4( _ma, _se, _bi, _go, _ku ); \
557
+ \
558
+ KeccakP_ThetaRhoPiChiIota0(_ba, _me, _gi, _so, _ku, CONST256_64(KeccakP1600RoundConstants[i+1]) ); \
559
+ KeccakP_ThetaRhoPiChi1( _sa, _ke, _bi, _mo, _gu ); \
560
+ KeccakP_ThetaRhoPiChi2( _ma, _ge, _si, _ko, _bu ); \
561
+ KeccakP_ThetaRhoPiChi3( _ka, _be, _mi, _go, _su ); \
562
+ KeccakP_ThetaRhoPiChi4( _ga, _se, _ki, _bo, _mu ); \
563
+ \
564
+ KeccakP_ThetaRhoPiChiIota0(_ba, _ke, _si, _go, _mu, CONST256_64(KeccakP1600RoundConstants[i+2]) ); \
565
+ KeccakP_ThetaRhoPiChi1( _ma, _be, _ki, _so, _gu ); \
566
+ KeccakP_ThetaRhoPiChi2( _ga, _me, _bi, _ko, _su ); \
567
+ KeccakP_ThetaRhoPiChi3( _sa, _ge, _mi, _bo, _ku ); \
568
+ KeccakP_ThetaRhoPiChi4( _ka, _se, _gi, _mo, _bu ); \
569
+ \
570
+ KeccakP_ThetaRhoPiChiIota0(_ba, _be, _bi, _bo, _bu, CONST256_64(KeccakP1600RoundConstants[i+3]) ); \
571
+ KeccakP_ThetaRhoPiChi1( _ga, _ge, _gi, _go, _gu ); \
572
+ KeccakP_ThetaRhoPiChi2( _ka, _ke, _ki, _ko, _ku ); \
573
+ KeccakP_ThetaRhoPiChi3( _ma, _me, _mi, _mo, _mu ); \
574
+ KeccakP_ThetaRhoPiChi4( _sa, _se, _si, _so, _su )
575
+
576
+ #define KeccakP_2rounds( i ) \
577
+ KeccakP_ThetaRhoPiChiIota0(_ba, _ke, _si, _go, _mu, CONST256_64(KeccakP1600RoundConstants[i]) ); \
578
+ KeccakP_ThetaRhoPiChi1( _ma, _be, _ki, _so, _gu ); \
579
+ KeccakP_ThetaRhoPiChi2( _ga, _me, _bi, _ko, _su ); \
580
+ KeccakP_ThetaRhoPiChi3( _sa, _ge, _mi, _bo, _ku ); \
581
+ KeccakP_ThetaRhoPiChi4( _ka, _se, _gi, _mo, _bu ); \
582
+ \
583
+ KeccakP_ThetaRhoPiChiIota0(_ba, _be, _bi, _bo, _bu, CONST256_64(KeccakP1600RoundConstants[i+1]) ); \
584
+ KeccakP_ThetaRhoPiChi1( _ga, _ge, _gi, _go, _gu ); \
585
+ KeccakP_ThetaRhoPiChi2( _ka, _ke, _ki, _ko, _ku ); \
586
+ KeccakP_ThetaRhoPiChi3( _ma, _me, _mi, _mo, _mu ); \
587
+ KeccakP_ThetaRhoPiChi4( _sa, _se, _si, _so, _su )
588
+
589
+ #ifdef KeccakP1600times4_fullUnrolling
590
+
591
+ #define rounds12 \
592
+ KeccakP_4rounds( 12 ); \
593
+ KeccakP_4rounds( 16 ); \
594
+ KeccakP_4rounds( 20 )
595
+
596
+ #define rounds24 \
597
+ KeccakP_4rounds( 0 ); \
598
+ KeccakP_4rounds( 4 ); \
599
+ KeccakP_4rounds( 8 ); \
600
+ KeccakP_4rounds( 12 ); \
601
+ KeccakP_4rounds( 16 ); \
602
+ KeccakP_4rounds( 20 )
603
+
604
+ #elif (KeccakP1600times4_unrolling == 4)
605
+
606
+ #define rounds12 \
607
+ i = 12; \
608
+ do { \
609
+ KeccakP_4rounds( i ); \
610
+ } while( (i += 4) < 24 )
611
+
612
+ #define rounds24 \
613
+ i = 0; \
614
+ do { \
615
+ KeccakP_4rounds( i ); \
616
+ } while( (i += 4) < 24 )
617
+
618
+ #elif (KeccakP1600times4_unrolling == 12)
619
+
620
+ #define rounds12 \
621
+ KeccakP_4rounds( 12 ); \
622
+ KeccakP_4rounds( 16 ); \
623
+ KeccakP_4rounds( 20 )
624
+
625
+ #define rounds24 \
626
+ i = 0; \
627
+ do { \
628
+ KeccakP_4rounds( i ); \
629
+ KeccakP_4rounds( i+4 ); \
630
+ KeccakP_4rounds( i+8 ); \
631
+ } while( (i += 12) < 24 )
632
+
633
+ #else
634
+ #error "Unrolling is not correctly specified!"
635
+ #endif
636
+
637
+ #define copyFromState2rounds(pState) \
638
+ _ba = pState[ 0]; \
639
+ _be = pState[16]; /* me */ \
640
+ _bi = pState[ 7]; /* gi */ \
641
+ _bo = pState[23]; /* so */ \
642
+ _bu = pState[14]; /* ku */ \
643
+ _ga = pState[20]; /* sa */ \
644
+ _ge = pState[11]; /* ke */ \
645
+ _gi = pState[ 2]; /* bi */ \
646
+ _go = pState[18]; /* mo */ \
647
+ _gu = pState[ 9]; \
648
+ _ka = pState[15]; /* ma */ \
649
+ _ke = pState[ 6]; /* ge */ \
650
+ _ki = pState[22]; /* si */ \
651
+ _ko = pState[13]; \
652
+ _ku = pState[ 4]; /* bu */ \
653
+ _ma = pState[10]; /* ka */ \
654
+ _me = pState[ 1]; /* be */ \
655
+ _mi = pState[17]; \
656
+ _mo = pState[ 8]; /* go */ \
657
+ _mu = pState[24]; /* su */ \
658
+ _sa = pState[ 5]; /* ga */ \
659
+ _se = pState[21]; \
660
+ _si = pState[12]; /* ki */ \
661
+ _so = pState[ 3]; /* bo */ \
662
+ _su = pState[19] /* mu */
663
+
664
+ #define copyFromState(pState) \
665
+ _ba = pState[ 0]; \
666
+ _be = pState[ 1]; \
667
+ _bi = pState[ 2]; \
668
+ _bo = pState[ 3]; \
669
+ _bu = pState[ 4]; \
670
+ _ga = pState[ 5]; \
671
+ _ge = pState[ 6]; \
672
+ _gi = pState[ 7]; \
673
+ _go = pState[ 8]; \
674
+ _gu = pState[ 9]; \
675
+ _ka = pState[10]; \
676
+ _ke = pState[11]; \
677
+ _ki = pState[12]; \
678
+ _ko = pState[13]; \
679
+ _ku = pState[14]; \
680
+ _ma = pState[15]; \
681
+ _me = pState[16]; \
682
+ _mi = pState[17]; \
683
+ _mo = pState[18]; \
684
+ _mu = pState[19]; \
685
+ _sa = pState[20]; \
686
+ _se = pState[21]; \
687
+ _si = pState[22]; \
688
+ _so = pState[23]; \
689
+ _su = pState[24]
690
+
691
+ #define copyToState(pState) \
692
+ pState[ 0] = _ba; \
693
+ pState[ 1] = _be; \
694
+ pState[ 2] = _bi; \
695
+ pState[ 3] = _bo; \
696
+ pState[ 4] = _bu; \
697
+ pState[ 5] = _ga; \
698
+ pState[ 6] = _ge; \
699
+ pState[ 7] = _gi; \
700
+ pState[ 8] = _go; \
701
+ pState[ 9] = _gu; \
702
+ pState[10] = _ka; \
703
+ pState[11] = _ke; \
704
+ pState[12] = _ki; \
705
+ pState[13] = _ko; \
706
+ pState[14] = _ku; \
707
+ pState[15] = _ma; \
708
+ pState[16] = _me; \
709
+ pState[17] = _mi; \
710
+ pState[18] = _mo; \
711
+ pState[19] = _mu; \
712
+ pState[20] = _sa; \
713
+ pState[21] = _se; \
714
+ pState[22] = _si; \
715
+ pState[23] = _so; \
716
+ pState[24] = _su
717
+
718
+ void KeccakP1600times4_PermuteAll_24rounds(void *states)
719
+ {
720
+ V256 *statesAsLanes = states;
721
+ KeccakP_DeclareVars;
722
+ #ifndef KeccakP1600times4_fullUnrolling
723
+ unsigned int i;
724
+ #endif
725
+
726
+ copyFromState(statesAsLanes);
727
+ rounds24;
728
+ copyToState(statesAsLanes);
729
+ }
730
+
731
+ void KeccakP1600times4_PermuteAll_12rounds(void *states)
732
+ {
733
+ V256 *statesAsLanes = states;
734
+ KeccakP_DeclareVars;
735
+ #if (KeccakP1600times4_unrolling < 12)
736
+ unsigned int i;
737
+ #endif
738
+
739
+ copyFromState(statesAsLanes);
740
+ rounds12;
741
+ copyToState(statesAsLanes);
742
+ }
743
+
744
+ void KeccakP1600times4_PermuteAll_6rounds(void *states)
745
+ {
746
+ V256 *statesAsLanes = states;
747
+ KeccakP_DeclareVars;
748
+
749
+ copyFromState2rounds(statesAsLanes);
750
+ KeccakP_2rounds( 18 );
751
+ KeccakP_4rounds( 20 );
752
+ copyToState(statesAsLanes);
753
+ }
754
+
755
+ void KeccakP1600times4_PermuteAll_4rounds(void *states)
756
+ {
757
+ V256 *statesAsLanes = states;
758
+ KeccakP_DeclareVars;
759
+
760
+ copyFromState(statesAsLanes);
761
+ KeccakP_4rounds( 20 );
762
+ copyToState(statesAsLanes);
763
+ }
764
+
765
+ size_t KeccakF1600times4_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen)
766
+ {
767
+ size_t dataMinimumSize = (laneOffsetParallel*3 + laneCount)*8;
768
+
769
+ if (laneCount == 21) {
770
+ #ifndef KeccakP1600times4_fullUnrolling
771
+ unsigned int i;
772
+ #endif
773
+ const unsigned char *dataStart = data;
774
+ V256 *statesAsLanes = states;
775
+ const UINT64 *dataAsLanes = (const UINT64 *)data;
776
+ KeccakP_DeclareVars;
777
+ V128 index;
778
+
779
+ copyFromState(statesAsLanes);
780
+ index = LOAD4_32(3*laneOffsetParallel, 2*laneOffsetParallel, 1*laneOffsetParallel, 0*laneOffsetParallel);
781
+ while(dataByteLen >= dataMinimumSize) {
782
+ #define Add_In( argLane, argIndex ) argLane = XOR(argLane, LOAD_GATHER4_64(index, dataAsLanes+argIndex))
783
+ Add_In( _ba, 0 );
784
+ Add_In( _be, 1 );
785
+ Add_In( _bi, 2 );
786
+ Add_In( _bo, 3 );
787
+ Add_In( _bu, 4 );
788
+ Add_In( _ga, 5 );
789
+ Add_In( _ge, 6 );
790
+ Add_In( _gi, 7 );
791
+ Add_In( _go, 8 );
792
+ Add_In( _gu, 9 );
793
+ Add_In( _ka, 10 );
794
+ Add_In( _ke, 11 );
795
+ Add_In( _ki, 12 );
796
+ Add_In( _ko, 13 );
797
+ Add_In( _ku, 14 );
798
+ Add_In( _ma, 15 );
799
+ Add_In( _me, 16 );
800
+ Add_In( _mi, 17 );
801
+ Add_In( _mo, 18 );
802
+ Add_In( _mu, 19 );
803
+ Add_In( _sa, 20 );
804
+ #undef Add_In
805
+ rounds24;
806
+ dataAsLanes += laneOffsetSerial;
807
+ dataByteLen -= laneOffsetSerial*8;
808
+ }
809
+ copyToState(statesAsLanes);
810
+ return (const unsigned char *)dataAsLanes - dataStart;
811
+ }
812
+ else {
813
+ const unsigned char *dataStart = data;
814
+
815
+ while(dataByteLen >= dataMinimumSize) {
816
+ KeccakP1600times4_AddLanesAll(states, data, laneCount, laneOffsetParallel);
817
+ KeccakP1600times4_PermuteAll_24rounds(states);
818
+ data += laneOffsetSerial*8;
819
+ dataByteLen -= laneOffsetSerial*8;
820
+ }
821
+ return data - dataStart;
822
+ }
823
+ }
824
+
825
+ size_t KeccakP1600times4_12rounds_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen)
826
+ {
827
+ size_t dataMinimumSize = (laneOffsetParallel*3 + laneCount)*8;
828
+
829
+ if (laneCount == 21) {
830
+ #if (KeccakP1600times4_unrolling < 12)
831
+ unsigned int i;
832
+ #endif
833
+ const unsigned char *dataStart = data;
834
+ V256 *statesAsLanes = states;
835
+ const UINT64 *dataAsLanes = (const UINT64 *)data;
836
+ KeccakP_DeclareVars;
837
+ V128 index;
838
+
839
+ copyFromState(statesAsLanes);
840
+ index = LOAD4_32(3*laneOffsetParallel, 2*laneOffsetParallel, 1*laneOffsetParallel, 0*laneOffsetParallel);
841
+ while(dataByteLen >= dataMinimumSize) {
842
+ #define Add_In( argLane, argIndex ) argLane = XOR(argLane, LOAD_GATHER4_64(index, dataAsLanes+argIndex))
843
+ Add_In( _ba, 0 );
844
+ Add_In( _be, 1 );
845
+ Add_In( _bi, 2 );
846
+ Add_In( _bo, 3 );
847
+ Add_In( _bu, 4 );
848
+ Add_In( _ga, 5 );
849
+ Add_In( _ge, 6 );
850
+ Add_In( _gi, 7 );
851
+ Add_In( _go, 8 );
852
+ Add_In( _gu, 9 );
853
+ Add_In( _ka, 10 );
854
+ Add_In( _ke, 11 );
855
+ Add_In( _ki, 12 );
856
+ Add_In( _ko, 13 );
857
+ Add_In( _ku, 14 );
858
+ Add_In( _ma, 15 );
859
+ Add_In( _me, 16 );
860
+ Add_In( _mi, 17 );
861
+ Add_In( _mo, 18 );
862
+ Add_In( _mu, 19 );
863
+ Add_In( _sa, 20 );
864
+ #undef Add_In
865
+ rounds12;
866
+ dataAsLanes += laneOffsetSerial;
867
+ dataByteLen -= laneOffsetSerial*8;
868
+ }
869
+ copyToState(statesAsLanes);
870
+ return (const unsigned char *)dataAsLanes - dataStart;
871
+ }
872
+ else {
873
+ const unsigned char *dataStart = data;
874
+
875
+ while(dataByteLen >= dataMinimumSize) {
876
+ KeccakP1600times4_AddLanesAll(states, data, laneCount, laneOffsetParallel);
877
+ KeccakP1600times4_PermuteAll_12rounds(states);
878
+ data += laneOffsetSerial*8;
879
+ dataByteLen -= laneOffsetSerial*8;
880
+ }
881
+ return data - dataStart;
882
+ }
883
+ }