digest-kangarootwelve 0.0.2 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (307) hide show
  1. checksums.yaml +5 -5
  2. data/README.md +71 -37
  3. data/Rakefile +7 -9
  4. data/digest-kangarootwelve.gemspec +323 -14
  5. data/ext/digest/kangarootwelve/ext.c +228 -177
  6. data/ext/digest/kangarootwelve/extconf.rb +15 -1
  7. data/ext/digest/kangarootwelve/keccak/armv6m/KangarooTwelve.link.c +1 -0
  8. data/ext/digest/kangarootwelve/keccak/armv6m/KeccakDuplexWidth1600.link.c +1 -0
  9. data/ext/digest/kangarootwelve/keccak/armv6m/KeccakP-1600-SnP.h +36 -0
  10. data/ext/digest/kangarootwelve/{KeccakP-1600-times2-SnP.h → keccak/armv6m/KeccakP-1600-times2-SnP.h} +10 -10
  11. data/ext/digest/kangarootwelve/{KeccakP-1600-times2-on1.c → keccak/armv6m/KeccakP-1600-times2-on1.c} +13 -7
  12. data/ext/digest/kangarootwelve/{KeccakP-1600-times4-SnP.h → keccak/armv6m/KeccakP-1600-times4-SnP.h} +10 -10
  13. data/ext/digest/kangarootwelve/{KeccakP-1600-times4-on1.c → keccak/armv6m/KeccakP-1600-times4-on1.c} +13 -7
  14. data/ext/digest/kangarootwelve/{KeccakP-1600-times8-SnP.h → keccak/armv6m/KeccakP-1600-times8-SnP.h} +10 -10
  15. data/ext/digest/kangarootwelve/{KeccakP-1600-times8-on1.c → keccak/armv6m/KeccakP-1600-times8-on1.c} +13 -7
  16. data/ext/digest/kangarootwelve/keccak/armv6m/KeccakP-1600-u2-32bi-armv6m-le-gcc.s +1334 -0
  17. data/ext/digest/kangarootwelve/keccak/armv6m/KeccakSpongeWidth1600.link.c +1 -0
  18. data/ext/digest/kangarootwelve/{PlSnP-Fallback.inc → keccak/armv6m/PlSnP-Fallback.inc} +11 -7
  19. data/ext/digest/kangarootwelve/keccak/armv6m/ext.link.c +1 -0
  20. data/ext/digest/kangarootwelve/keccak/armv7a/KangarooTwelve.link.c +1 -0
  21. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakDuplexWidth1600.link.c +1 -0
  22. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-SnP.h +37 -0
  23. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-armv7a-le-neon-gcc.s +826 -0
  24. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-inplace-pl2-armv7a-neon-le-gcc.s +1245 -0
  25. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times2-SnP.h +38 -0
  26. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times4-SnP.h +45 -0
  27. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times4-on2.c +38 -0
  28. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times8-SnP.h +45 -0
  29. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times8-on2.c +38 -0
  30. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakSpongeWidth1600.link.c +1 -0
  31. data/ext/digest/kangarootwelve/keccak/armv7a/PlSnP-Fallback.inc +287 -0
  32. data/ext/digest/kangarootwelve/keccak/armv7a/ext.link.c +1 -0
  33. data/ext/digest/kangarootwelve/keccak/armv7m/KangarooTwelve.link.c +1 -0
  34. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakDuplexWidth1600.link.c +1 -0
  35. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-SnP.h +36 -0
  36. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-inplace-32bi-armv7m-le-gcc.s +1170 -0
  37. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times2-SnP.h +45 -0
  38. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times2-on1.c +37 -0
  39. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times4-SnP.h +45 -0
  40. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times4-on1.c +37 -0
  41. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times8-SnP.h +45 -0
  42. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times8-on1.c +37 -0
  43. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakSpongeWidth1600.link.c +1 -0
  44. data/ext/digest/kangarootwelve/keccak/armv7m/PlSnP-Fallback.inc +287 -0
  45. data/ext/digest/kangarootwelve/keccak/armv7m/ext.link.c +1 -0
  46. data/ext/digest/kangarootwelve/keccak/armv8a/KangarooTwelve.link.c +1 -0
  47. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakDuplexWidth1600.link.c +1 -0
  48. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-SnP.h +28 -0
  49. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-armv8a-neon.s +537 -0
  50. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times2-SnP.h +45 -0
  51. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times2-on1.c +37 -0
  52. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times4-SnP.h +45 -0
  53. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times4-on1.c +37 -0
  54. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times8-SnP.h +45 -0
  55. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times8-on1.c +37 -0
  56. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakSpongeWidth1600.link.c +1 -0
  57. data/ext/digest/kangarootwelve/keccak/armv8a/PlSnP-Fallback.inc +287 -0
  58. data/ext/digest/kangarootwelve/keccak/armv8a/ext.link.c +1 -0
  59. data/ext/digest/kangarootwelve/keccak/asmx86-64/KangarooTwelve.link.c +1 -0
  60. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakDuplexWidth1600.link.c +1 -0
  61. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-SnP.h +37 -0
  62. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times2-SnP.h +45 -0
  63. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times2-on1.c +37 -0
  64. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times4-SnP.h +45 -0
  65. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times4-on1.c +37 -0
  66. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times8-SnP.h +45 -0
  67. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times8-on1.c +37 -0
  68. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-x86-64-gas.s +1190 -0
  69. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakSpongeWidth1600.link.c +1 -0
  70. data/ext/digest/kangarootwelve/keccak/asmx86-64/PlSnP-Fallback.inc +287 -0
  71. data/ext/digest/kangarootwelve/keccak/asmx86-64/ext.link.c +1 -0
  72. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KangarooTwelve.link.c +1 -0
  73. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakDuplexWidth1600.link.c +1 -0
  74. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-SnP.h +37 -0
  75. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times2-SnP.h +45 -0
  76. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times2-on1.c +37 -0
  77. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times4-SnP.h +45 -0
  78. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times4-on1.c +37 -0
  79. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times8-SnP.h +45 -0
  80. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times8-on1.c +37 -0
  81. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-x86-64-shld-gas.s +1190 -0
  82. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakSpongeWidth1600.link.c +1 -0
  83. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/PlSnP-Fallback.inc +287 -0
  84. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/ext.link.c +1 -0
  85. data/ext/digest/kangarootwelve/keccak/avr8/KangarooTwelve.link.c +1 -0
  86. data/ext/digest/kangarootwelve/keccak/avr8/KeccakDuplexWidth1600.link.c +1 -0
  87. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-SnP.h +37 -0
  88. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-avr8-fast.s +1116 -0
  89. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times2-SnP.h +45 -0
  90. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times2-on1.c +37 -0
  91. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times4-SnP.h +45 -0
  92. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times4-on1.c +37 -0
  93. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times8-SnP.h +45 -0
  94. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times8-on1.c +37 -0
  95. data/ext/digest/kangarootwelve/keccak/avr8/KeccakSpongeWidth1600.link.c +1 -0
  96. data/ext/digest/kangarootwelve/keccak/avr8/PlSnP-Fallback.inc +287 -0
  97. data/ext/digest/kangarootwelve/keccak/avr8/ext.link.c +1 -0
  98. data/ext/digest/kangarootwelve/keccak/bulldozer/KangarooTwelve.link.c +1 -0
  99. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakDuplexWidth1600.link.c +1 -0
  100. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-SnP.h +39 -0
  101. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-XOP-config.h +6 -0
  102. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-XOP.c +473 -0
  103. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times2-SIMD128.c +954 -0
  104. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times2-SnP.h +47 -0
  105. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times4-SnP.h +45 -0
  106. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times4-on2.c +38 -0
  107. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times8-SnP.h +45 -0
  108. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times8-on2.c +38 -0
  109. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-unrolling.macros +302 -0
  110. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakSpongeWidth1600.link.c +1 -0
  111. data/ext/digest/kangarootwelve/keccak/bulldozer/PlSnP-Fallback.inc +287 -0
  112. data/ext/digest/kangarootwelve/keccak/bulldozer/SIMD128-config.h +9 -0
  113. data/ext/digest/kangarootwelve/{SnP-Relaned.h → keccak/bulldozer/SnP-Relaned.h} +13 -7
  114. data/ext/digest/kangarootwelve/keccak/bulldozer/ext.link.c +1 -0
  115. data/ext/digest/kangarootwelve/{KangarooTwelve.c → keccak/common/KangarooTwelve.c} +6 -10
  116. data/ext/digest/kangarootwelve/{KangarooTwelve.h → keccak/common/KangarooTwelve.h} +3 -7
  117. data/ext/digest/kangarootwelve/keccak/common/KeccakDuplex-common.h +37 -0
  118. data/ext/digest/kangarootwelve/keccak/common/KeccakDuplex.inc +192 -0
  119. data/ext/digest/kangarootwelve/keccak/common/KeccakDuplexWidth1600.c +34 -0
  120. data/ext/digest/kangarootwelve/keccak/common/KeccakDuplexWidth1600.h +25 -0
  121. data/ext/digest/kangarootwelve/{KeccakSponge-common.h → keccak/common/KeccakSponge-common.h} +5 -7
  122. data/ext/digest/kangarootwelve/{KeccakSponge.inc → keccak/common/KeccakSponge.inc} +6 -8
  123. data/ext/digest/kangarootwelve/{KeccakSpongeWidth1600.c → keccak/common/KeccakSpongeWidth1600.c} +6 -8
  124. data/ext/digest/kangarootwelve/{KeccakSpongeWidth1600.h → keccak/common/KeccakSpongeWidth1600.h} +5 -7
  125. data/ext/digest/kangarootwelve/{Phases.h → keccak/common/Phases.h} +3 -7
  126. data/ext/digest/kangarootwelve/{align.h → keccak/common/align.h} +5 -7
  127. data/ext/digest/kangarootwelve/{brg_endian.h → keccak/common/brg_endian.h} +0 -0
  128. data/ext/digest/kangarootwelve/keccak/compact/KangarooTwelve.link.c +1 -0
  129. data/ext/digest/kangarootwelve/keccak/compact/KeccakDuplexWidth1600.link.c +1 -0
  130. data/ext/digest/kangarootwelve/{KeccakP-1600-SnP.h → keccak/compact/KeccakP-1600-SnP.h} +7 -10
  131. data/ext/digest/kangarootwelve/{KeccakP-1600-compact64.c → keccak/compact/KeccakP-1600-compact64.c} +11 -7
  132. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times2-SnP.h +45 -0
  133. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times2-on1.c +37 -0
  134. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times4-SnP.h +45 -0
  135. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times4-on1.c +37 -0
  136. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times8-SnP.h +45 -0
  137. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times8-on1.c +37 -0
  138. data/ext/digest/kangarootwelve/keccak/compact/KeccakSpongeWidth1600.link.c +1 -0
  139. data/ext/digest/kangarootwelve/keccak/compact/PlSnP-Fallback.inc +287 -0
  140. data/ext/digest/kangarootwelve/keccak/compact/SnP-Relaned.h +140 -0
  141. data/ext/digest/kangarootwelve/keccak/compact/ext.link.c +1 -0
  142. data/ext/digest/kangarootwelve/keccak/generic32/KangarooTwelve.link.c +1 -0
  143. data/ext/digest/kangarootwelve/keccak/generic32/KeccakDuplexWidth1600.link.c +1 -0
  144. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-SnP.h +38 -0
  145. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-inplace32BI.c +1162 -0
  146. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times2-SnP.h +45 -0
  147. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times2-on1.c +37 -0
  148. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times4-SnP.h +45 -0
  149. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times4-on1.c +37 -0
  150. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times8-SnP.h +45 -0
  151. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times8-on1.c +37 -0
  152. data/ext/digest/kangarootwelve/keccak/generic32/KeccakSpongeWidth1600.link.c +1 -0
  153. data/ext/digest/kangarootwelve/keccak/generic32/PlSnP-Fallback.inc +287 -0
  154. data/ext/digest/kangarootwelve/keccak/generic32/SnP-Relaned.h +140 -0
  155. data/ext/digest/kangarootwelve/keccak/generic32/ext.link.c +1 -0
  156. data/ext/digest/kangarootwelve/keccak/generic32lc/KangarooTwelve.link.c +1 -0
  157. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakDuplexWidth1600.link.c +1 -0
  158. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-SnP.h +38 -0
  159. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-inplace32BI.c +1162 -0
  160. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times2-SnP.h +45 -0
  161. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times2-on1.c +37 -0
  162. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times4-SnP.h +45 -0
  163. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times4-on1.c +37 -0
  164. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times8-SnP.h +45 -0
  165. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times8-on1.c +37 -0
  166. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakSpongeWidth1600.link.c +1 -0
  167. data/ext/digest/kangarootwelve/keccak/generic32lc/PlSnP-Fallback.inc +287 -0
  168. data/ext/digest/kangarootwelve/keccak/generic32lc/SnP-Relaned.h +140 -0
  169. data/ext/digest/kangarootwelve/keccak/generic32lc/ext.link.c +1 -0
  170. data/ext/digest/kangarootwelve/keccak/generic64/KangarooTwelve.link.c +1 -0
  171. data/ext/digest/kangarootwelve/keccak/generic64/KeccakDuplexWidth1600.link.c +1 -0
  172. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-64.macros +2195 -0
  173. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-SnP.h +49 -0
  174. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-opt64-config.h +6 -0
  175. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-opt64.c +541 -0
  176. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times2-SnP.h +45 -0
  177. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times2-on1.c +37 -0
  178. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times4-SnP.h +45 -0
  179. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times4-on1.c +37 -0
  180. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times8-SnP.h +45 -0
  181. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times8-on1.c +37 -0
  182. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-unrolling.macros +302 -0
  183. data/ext/digest/kangarootwelve/keccak/generic64/KeccakSpongeWidth1600.link.c +1 -0
  184. data/ext/digest/kangarootwelve/keccak/generic64/PlSnP-Fallback.inc +287 -0
  185. data/ext/digest/kangarootwelve/keccak/generic64/SnP-Relaned.h +140 -0
  186. data/ext/digest/kangarootwelve/keccak/generic64/ext.link.c +1 -0
  187. data/ext/digest/kangarootwelve/keccak/generic64lc/KangarooTwelve.link.c +1 -0
  188. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakDuplexWidth1600.link.c +1 -0
  189. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-64.macros +2195 -0
  190. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-SnP.h +49 -0
  191. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-opt64-config.h +7 -0
  192. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-opt64.c +541 -0
  193. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times2-SnP.h +45 -0
  194. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times2-on1.c +37 -0
  195. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times4-SnP.h +45 -0
  196. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times4-on1.c +37 -0
  197. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times8-SnP.h +45 -0
  198. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times8-on1.c +37 -0
  199. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-unrolling.macros +302 -0
  200. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakSpongeWidth1600.link.c +1 -0
  201. data/ext/digest/kangarootwelve/keccak/generic64lc/PlSnP-Fallback.inc +287 -0
  202. data/ext/digest/kangarootwelve/keccak/generic64lc/SnP-Relaned.h +140 -0
  203. data/ext/digest/kangarootwelve/keccak/generic64lc/ext.link.c +1 -0
  204. data/ext/digest/kangarootwelve/keccak/haswell/KangarooTwelve.link.c +1 -0
  205. data/ext/digest/kangarootwelve/keccak/haswell/KeccakDuplexWidth1600.link.c +1 -0
  206. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-AVX2.s +993 -0
  207. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-SnP.h +41 -0
  208. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times2-SIMD128.c +954 -0
  209. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times2-SnP.h +47 -0
  210. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times4-SIMD256.c +1303 -0
  211. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times4-SnP.h +53 -0
  212. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times8-SnP.h +45 -0
  213. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times8-on4.c +38 -0
  214. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-unrolling.macros +302 -0
  215. data/ext/digest/kangarootwelve/keccak/haswell/KeccakSpongeWidth1600.link.c +1 -0
  216. data/ext/digest/kangarootwelve/keccak/haswell/PlSnP-Fallback.inc +287 -0
  217. data/ext/digest/kangarootwelve/keccak/haswell/SIMD128-config.h +8 -0
  218. data/ext/digest/kangarootwelve/keccak/haswell/SIMD256-config.h +7 -0
  219. data/ext/digest/kangarootwelve/keccak/haswell/ext.link.c +1 -0
  220. data/ext/digest/kangarootwelve/keccak/nehalem/KangarooTwelve.link.c +1 -0
  221. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakDuplexWidth1600.link.c +1 -0
  222. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-64.macros +2195 -0
  223. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-SnP.h +49 -0
  224. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-opt64-config.h +7 -0
  225. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-opt64.c +541 -0
  226. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times2-SIMD128.c +954 -0
  227. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times2-SnP.h +47 -0
  228. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times4-SnP.h +45 -0
  229. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times4-on2.c +38 -0
  230. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times8-SnP.h +45 -0
  231. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times8-on2.c +38 -0
  232. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-unrolling.macros +302 -0
  233. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakSpongeWidth1600.link.c +1 -0
  234. data/ext/digest/kangarootwelve/keccak/nehalem/PlSnP-Fallback.inc +287 -0
  235. data/ext/digest/kangarootwelve/keccak/nehalem/SIMD128-config.h +8 -0
  236. data/ext/digest/kangarootwelve/keccak/nehalem/SnP-Relaned.h +140 -0
  237. data/ext/digest/kangarootwelve/keccak/nehalem/ext.link.c +1 -0
  238. data/ext/digest/kangarootwelve/keccak/reference/KangarooTwelve.link.c +1 -0
  239. data/ext/digest/kangarootwelve/keccak/reference/KeccakDuplexWidth1600.link.c +1 -0
  240. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-SnP.h +41 -0
  241. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-reference.c +424 -0
  242. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-reference.h +20 -0
  243. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times2-SnP.h +45 -0
  244. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times2-on1.c +37 -0
  245. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times4-SnP.h +45 -0
  246. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times4-on1.c +37 -0
  247. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times8-SnP.h +45 -0
  248. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times8-on1.c +37 -0
  249. data/ext/digest/kangarootwelve/keccak/reference/KeccakSpongeWidth1600.link.c +1 -0
  250. data/ext/digest/kangarootwelve/keccak/reference/PlSnP-Fallback.inc +287 -0
  251. data/ext/digest/kangarootwelve/keccak/reference/displayIntermediateValues.c +176 -0
  252. data/ext/digest/kangarootwelve/keccak/reference/displayIntermediateValues.h +29 -0
  253. data/ext/digest/kangarootwelve/keccak/reference/ext.link.c +1 -0
  254. data/ext/digest/kangarootwelve/keccak/reference32bits/KangarooTwelve.link.c +1 -0
  255. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakDuplexWidth1600.link.c +1 -0
  256. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-SnP.h +41 -0
  257. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-reference.h +20 -0
  258. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-reference32BI.c +612 -0
  259. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times2-SnP.h +45 -0
  260. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times2-on1.c +37 -0
  261. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times4-SnP.h +45 -0
  262. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times4-on1.c +37 -0
  263. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times8-SnP.h +45 -0
  264. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times8-on1.c +37 -0
  265. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakSpongeWidth1600.link.c +1 -0
  266. data/ext/digest/kangarootwelve/keccak/reference32bits/PlSnP-Fallback.inc +287 -0
  267. data/ext/digest/kangarootwelve/keccak/reference32bits/displayIntermediateValues.c +176 -0
  268. data/ext/digest/kangarootwelve/keccak/reference32bits/displayIntermediateValues.h +29 -0
  269. data/ext/digest/kangarootwelve/keccak/reference32bits/ext.link.c +1 -0
  270. data/ext/digest/kangarootwelve/keccak/sandybridge/KangarooTwelve.link.c +1 -0
  271. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakDuplexWidth1600.link.c +1 -0
  272. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-64.macros +2195 -0
  273. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-SnP.h +49 -0
  274. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-opt64-config.h +8 -0
  275. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-opt64.c +541 -0
  276. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times2-SIMD128.c +954 -0
  277. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times2-SnP.h +47 -0
  278. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times4-SnP.h +45 -0
  279. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times4-on2.c +38 -0
  280. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times8-SnP.h +45 -0
  281. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times8-on2.c +38 -0
  282. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-unrolling.macros +302 -0
  283. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakSpongeWidth1600.link.c +1 -0
  284. data/ext/digest/kangarootwelve/keccak/sandybridge/PlSnP-Fallback.inc +287 -0
  285. data/ext/digest/kangarootwelve/keccak/sandybridge/SIMD128-config.h +8 -0
  286. data/ext/digest/kangarootwelve/keccak/sandybridge/SnP-Relaned.h +140 -0
  287. data/ext/digest/kangarootwelve/keccak/sandybridge/ext.link.c +1 -0
  288. data/ext/digest/kangarootwelve/keccak/skylakex/KangarooTwelve.link.c +1 -0
  289. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakDuplexWidth1600.link.c +1 -0
  290. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-AVX512-config.h +6 -0
  291. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-AVX512.c +621 -0
  292. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-SnP.h +42 -0
  293. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times2-SIMD512.c +852 -0
  294. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times2-SnP.h +49 -0
  295. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times4-SIMD512.c +883 -0
  296. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times4-SnP.h +49 -0
  297. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times8-SIMD512.c +1473 -0
  298. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times8-SnP.h +53 -0
  299. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakSpongeWidth1600.link.c +1 -0
  300. data/ext/digest/kangarootwelve/keccak/skylakex/SIMD512-2-config.h +7 -0
  301. data/ext/digest/kangarootwelve/keccak/skylakex/SIMD512-4-config.h +7 -0
  302. data/ext/digest/kangarootwelve/keccak/skylakex/SIMD512-config.h +7 -0
  303. data/ext/digest/kangarootwelve/keccak/skylakex/ext.link.c +1 -0
  304. data/ext/digest/kangarootwelve/utils.h +101 -0
  305. data/lib/digest/kangarootwelve/version.rb +2 -2
  306. data/test/test.rb +68 -31
  307. metadata +305 -27
@@ -0,0 +1,42 @@
1
+ /*
2
+ Implementation by Ronny Van Keer, hereby denoted as "the implementer".
3
+
4
+ For more information, feedback or questions, please refer to our website:
5
+ https://keccak.team/
6
+
7
+ To the extent possible under law, the implementer has waived all copyright
8
+ and related or neighboring rights to the source code in this file.
9
+ http://creativecommons.org/publicdomain/zero/1.0/
10
+
11
+ ---
12
+
13
+ Please refer to SnP-documentation.h for more details.
14
+ */
15
+
16
+ #ifndef _KeccakP_1600_SnP_h_
17
+ #define _KeccakP_1600_SnP_h_
18
+
19
+ #include <stddef.h>
20
+ #include "KeccakP-1600-AVX512-config.h"
21
+
22
+ #define KeccakP1600_implementation "AVX-512 optimized implementation (" KeccakP1600_implementation_config ")"
23
+ #define KeccakP1600_stateSizeInBytes 200
24
+ #define KeccakP1600_stateAlignment 64
25
+ #define KeccakF1600_FastLoop_supported
26
+ #define KeccakP1600_12rounds_FastLoop_supported
27
+
28
+ #define KeccakP1600_StaticInitialize()
29
+ void KeccakP1600_Initialize(void *state);
30
+ #define KeccakP1600_AddByte(state, byte, offset) ((unsigned char*)(state))[offset] ^= (byte)
31
+ void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
32
+ void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
33
+ void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount);
34
+ void KeccakP1600_Permute_Nrounds(void *state, unsigned int nrounds);
35
+ void KeccakP1600_Permute_12rounds(void *state);
36
+ void KeccakP1600_Permute_24rounds(void *state);
37
+ void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length);
38
+ void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length);
39
+ size_t KeccakF1600_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen);
40
+ size_t KeccakP1600_12rounds_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen);
41
+
42
+ #endif
@@ -0,0 +1,852 @@
1
+ /*
2
+ Implementation by Ronny Van Keer, hereby denoted as "the implementer".
3
+
4
+ For more information, feedback or questions, please refer to our website:
5
+ https://keccak.team/
6
+
7
+ To the extent possible under law, the implementer has waived all copyright
8
+ and related or neighboring rights to the source code in this file.
9
+ http://creativecommons.org/publicdomain/zero/1.0/
10
+
11
+ ---
12
+
13
+ This file implements Keccak-p[1600]×2 in a PlSnP-compatible way.
14
+ Please refer to PlSnP-documentation.h for more details.
15
+
16
+ This implementation comes with KeccakP-1600-times2-SnP.h in the same folder.
17
+ Please refer to LowLevel.build for the exact list of other files it must be combined with.
18
+ */
19
+
20
+ #include <stdio.h>
21
+ #include <stdlib.h>
22
+ #include <string.h>
23
+ #include <stdint.h>
24
+ #include <smmintrin.h>
25
+ #include <wmmintrin.h>
26
+ #include <immintrin.h>
27
+ #include <emmintrin.h>
28
+ #include "align.h"
29
+ #include "KeccakP-1600-times2-SnP.h"
30
+ #include "SIMD512-2-config.h"
31
+
32
+ #include "brg_endian.h"
33
+ #if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN)
34
+ #error Expecting a little-endian platform
35
+ #endif
36
+
37
+ /* Comment the define hereunder when compiling for a CPU with AVX-512 SIMD */
38
+ /*
39
+ * Warning: This code has only been tested on Haswell (AVX2) with SIMULATE_AVX512 defined,
40
+ * errors will occur if we did a bad interpretation of the AVX-512 intrinsics'
41
+ * API or functionality.
42
+ */
43
+ /* #define SIMULATE_AVX512 */
44
+
45
+ typedef uint8_t UINT8;
46
+ typedef uint32_t UINT32;
47
+ typedef uint64_t UINT64;
48
+
49
+ #if defined(SIMULATE_AVX512)
50
+
51
+ typedef struct
52
+ {
53
+ UINT64 x[8];
54
+ } __m512i;
55
+
56
+ static __m512i _mm512_xor_si512( __m512i a, __m512i b)
57
+ {
58
+ __m512i r;
59
+ unsigned int i;
60
+
61
+ for ( i = 0; i < 8; ++i )
62
+ r.x[i] = a.x[i] ^ b.x[i];
63
+ return(r);
64
+ }
65
+
66
+ static __m128i _mm_ternarylogic_epi64(__m128i a, __m128i b, __m128i c, int imm)
67
+ {
68
+
69
+ if (imm == 0x96)
70
+ return _mm_xor_si128( _mm_xor_si128( a, b ), c );
71
+ if (imm == 0xD2)
72
+ return _mm_xor_si128( a, _mm_andnot_si128(b, c) );
73
+ printf( "_mm_ternarylogic_epi64( a, b, c, %02X) not implemented!\n", imm );
74
+ exit(1);
75
+ }
76
+
77
+ static __m128i _mm_rol_epi64(__m128i a, int offset)
78
+ {
79
+ return _mm_or_si128(_mm_slli_epi64(a, offset), _mm_srli_epi64(a, 64-offset));
80
+ }
81
+
82
+ static __m512i _mm512_i32gather_epi64(__m256i idx, const void *p, int scale)
83
+ {
84
+ __m512i r;
85
+ unsigned int i;
86
+ UINT32 offset[8];
87
+
88
+ _mm256_store_si256( (__m256i*)offset, idx );
89
+ for ( i = 0; i < 8; ++i )
90
+ r.x[i] = *(const UINT64*)((const char*)p + offset[i] * scale);
91
+ return(r);
92
+ }
93
+
94
+ static void _mm_i32scatter_epi64( void *p, __m128i idx, __m128i value, int scale)
95
+ {
96
+ unsigned int i;
97
+ UINT64 v[2];
98
+ UINT32 offset[4];
99
+
100
+ _mm_store_ps( (float*)offset, (__m128)idx );
101
+ _mm_store_pd( (double*)v, (__m128d)value );
102
+ for ( i = 0; i < 2; ++i )
103
+ *(UINT64*)((char*)p + offset[i] * scale) = v[i];
104
+ }
105
+
106
+ static void _mm512_i32scatter_epi64( void *p, __m256i idx, __m512i value, int scale)
107
+ {
108
+ unsigned int i;
109
+ UINT32 offset[8];
110
+
111
+ _mm256_store_si256( (__m256i*)offset, idx );
112
+ for ( i = 0; i < 8; ++i )
113
+ *(UINT64*)((char*)p + offset[i] * scale) = value.x[i];
114
+ }
115
+
116
+ #endif
117
+
118
+ typedef __m128i V128;
119
+ typedef __m256i V256;
120
+ typedef __m512i V512;
121
+
122
+ #if defined(KeccakP1600times2_useAVX512)
123
+
124
+ #define XOR(a,b) _mm_xor_si128(a,b)
125
+ #define XOR3(a,b,c) _mm_ternarylogic_epi64(a,b,c,0x96)
126
+ #define XOR5(a,b,c,d,e) XOR3(XOR3(a,b,c),d,e)
127
+ #define XOR512(a,b) _mm512_xor_si512(a,b)
128
+ #define ROL(a,offset) _mm_rol_epi64(a,offset)
129
+ #define Chi(a,b,c) _mm_ternarylogic_epi64(a,b,c,0xD2)
130
+
131
+ #define CONST128_64(a) _mm_set1_epi64((__m64)(a))
132
+ #define LOAD4_32(a,b,c,d) _mm_set_epi32((UINT64)(a), (UINT32)(b), (UINT32)(c), (UINT32)(d))
133
+ #define LOAD8_32(a,b,c,d,e,f,g,h) _mm256_set_epi32((UINT64)(a), (UINT32)(b), (UINT32)(c), (UINT32)(d), (UINT32)(e), (UINT32)(f), (UINT32)(g), (UINT32)(h))
134
+ #define LOAD_GATHER2_64(idx,p) _mm_i32gather_epi64( (const void*)(p), idx, 8)
135
+ #define LOAD_GATHER8_64(idx,p) _mm512_i32gather_epi64( idx, (const void*)(p), 8)
136
+ #define STORE_SCATTER2_64(p,idx, v) _mm_i32scatter_epi64( (void*)(p), idx, v, 8)
137
+ #define STORE_SCATTER8_64(p,idx, v) _mm512_i32scatter_epi64( (void*)(p), idx, v, 8)
138
+
139
+ #endif
140
+
141
+ #define laneIndex(instanceIndex, lanePosition) ((lanePosition)*2 + instanceIndex)
142
+ #define SnP_laneLengthInBytes 8
143
+
144
+ void KeccakP1600times2_InitializeAll(void *states)
145
+ {
146
+ memset(states, 0, KeccakP1600times2_statesSizeInBytes);
147
+ }
148
+
149
+ void KeccakP1600times2_AddBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length)
150
+ {
151
+ unsigned int sizeLeft = length;
152
+ unsigned int lanePosition = offset/SnP_laneLengthInBytes;
153
+ unsigned int offsetInLane = offset%SnP_laneLengthInBytes;
154
+ const unsigned char *curData = data;
155
+ UINT64 *statesAsLanes = states;
156
+
157
+ if ((sizeLeft > 0) && (offsetInLane != 0)) {
158
+ unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
159
+ UINT64 lane = 0;
160
+ if (bytesInLane > sizeLeft)
161
+ bytesInLane = sizeLeft;
162
+ memcpy((unsigned char*)&lane + offsetInLane, curData, bytesInLane);
163
+ statesAsLanes[laneIndex(instanceIndex, lanePosition)] ^= lane;
164
+ sizeLeft -= bytesInLane;
165
+ lanePosition++;
166
+ curData += bytesInLane;
167
+ }
168
+
169
+ while(sizeLeft >= SnP_laneLengthInBytes) {
170
+ UINT64 lane = *((const UINT64*)curData);
171
+ statesAsLanes[laneIndex(instanceIndex, lanePosition)] ^= lane;
172
+ sizeLeft -= SnP_laneLengthInBytes;
173
+ lanePosition++;
174
+ curData += SnP_laneLengthInBytes;
175
+ }
176
+
177
+ if (sizeLeft > 0) {
178
+ UINT64 lane = 0;
179
+ memcpy(&lane, curData, sizeLeft);
180
+ statesAsLanes[laneIndex(instanceIndex, lanePosition)] ^= lane;
181
+ }
182
+ }
183
+
184
+ void KeccakP1600times2_AddLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
185
+ {
186
+ V128 *stateAsLanes128 = states;
187
+ V512 *stateAsLanes512 = states;
188
+ const UINT64 *dataAsLanes = (const UINT64 *)data;
189
+ unsigned int i;
190
+ V128 index128 = LOAD4_32(0, 0, 1*laneOffset, 0*laneOffset);
191
+ V256 index512 = LOAD8_32(1*laneOffset+3, 0*laneOffset+3, 1*laneOffset+2, 0*laneOffset+2, 1*laneOffset+1, 0*laneOffset+1, 1*laneOffset, 0*laneOffset);
192
+
193
+ #define Add_In1( argIndex ) stateAsLanes128[argIndex] = XOR(stateAsLanes128[argIndex], LOAD_GATHER2_64(index128, dataAsLanes+argIndex))
194
+ #define Add_In4( argIndex ) stateAsLanes512[argIndex/4] = XOR512(stateAsLanes512[argIndex/4], LOAD_GATHER8_64(index512, dataAsLanes+argIndex))
195
+ if ( laneCount >= 16 ) {
196
+ Add_In4( 0 );
197
+ Add_In4( 4 );
198
+ Add_In4( 8 );
199
+ Add_In4( 12 );
200
+ if ( laneCount >= 20 ) {
201
+ Add_In4( 16 );
202
+ for(i=20; i<laneCount; i++)
203
+ Add_In1( i );
204
+ }
205
+ else {
206
+ for(i=16; i<laneCount; i++)
207
+ Add_In1( i );
208
+ }
209
+ }
210
+ else {
211
+ for(i=0; i<laneCount; i++)
212
+ Add_In1( i );
213
+ }
214
+ #undef Add_In1
215
+ #undef Add_In4
216
+ }
217
+
218
+ void KeccakP1600times2_OverwriteBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length)
219
+ {
220
+ unsigned int sizeLeft = length;
221
+ unsigned int lanePosition = offset/SnP_laneLengthInBytes;
222
+ unsigned int offsetInLane = offset%SnP_laneLengthInBytes;
223
+ const unsigned char *curData = data;
224
+ UINT64 *statesAsLanes = states;
225
+
226
+ if ((sizeLeft > 0) && (offsetInLane != 0)) {
227
+ unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
228
+ if (bytesInLane > sizeLeft)
229
+ bytesInLane = sizeLeft;
230
+ memcpy( ((unsigned char *)&statesAsLanes[laneIndex(instanceIndex, lanePosition)]) + offsetInLane, curData, bytesInLane);
231
+ sizeLeft -= bytesInLane;
232
+ lanePosition++;
233
+ curData += bytesInLane;
234
+ }
235
+
236
+ while(sizeLeft >= SnP_laneLengthInBytes) {
237
+ UINT64 lane = *((const UINT64*)curData);
238
+ statesAsLanes[laneIndex(instanceIndex, lanePosition)] = lane;
239
+ sizeLeft -= SnP_laneLengthInBytes;
240
+ lanePosition++;
241
+ curData += SnP_laneLengthInBytes;
242
+ }
243
+
244
+ if (sizeLeft > 0) {
245
+ memcpy(&statesAsLanes[laneIndex(instanceIndex, lanePosition)], curData, sizeLeft);
246
+ }
247
+ }
248
+
249
+ void KeccakP1600times2_OverwriteLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
250
+ {
251
+ V128 *stateAsLanes128 = states;
252
+ V512 *stateAsLanes512 = states;
253
+ const UINT64 *dataAsLanes = (const UINT64 *)data;
254
+ unsigned int i;
255
+ V128 index128 = LOAD4_32(0, 0, 1*laneOffset, 0*laneOffset);
256
+ V256 index512 = LOAD8_32(1*laneOffset+3, 0*laneOffset+3, 1*laneOffset+2, 0*laneOffset+2, 1*laneOffset+1, 0*laneOffset+1, 1*laneOffset, 0*laneOffset);
257
+
258
+ #define OverWr1( argIndex ) stateAsLanes128[argIndex] = LOAD_GATHER2_64(index128, dataAsLanes+argIndex)
259
+ #define OverWr4( argIndex ) stateAsLanes512[argIndex/4] = LOAD_GATHER8_64(index512, dataAsLanes+argIndex)
260
+ if ( laneCount >= 16 ) {
261
+ OverWr4( 0 );
262
+ OverWr4( 4 );
263
+ OverWr4( 8 );
264
+ OverWr4( 12 );
265
+ if ( laneCount >= 20 ) {
266
+ OverWr4( 16 );
267
+ for(i=20; i<laneCount; i++)
268
+ OverWr1( i );
269
+ }
270
+ else {
271
+ for(i=16; i<laneCount; i++)
272
+ OverWr1( i );
273
+ }
274
+ }
275
+ else {
276
+ for(i=0; i<laneCount; i++)
277
+ OverWr1( i );
278
+ }
279
+ #undef OverWr1
280
+ #undef OverWr4
281
+ }
282
+
283
+ void KeccakP1600times2_OverwriteWithZeroes(void *states, unsigned int instanceIndex, unsigned int byteCount)
284
+ {
285
+ unsigned int sizeLeft = byteCount;
286
+ unsigned int lanePosition = 0;
287
+ UINT64 *statesAsLanes = states;
288
+
289
+ while(sizeLeft >= SnP_laneLengthInBytes) {
290
+ statesAsLanes[laneIndex(instanceIndex, lanePosition)] = 0;
291
+ sizeLeft -= SnP_laneLengthInBytes;
292
+ lanePosition++;
293
+ }
294
+
295
+ if (sizeLeft > 0) {
296
+ memset(&statesAsLanes[laneIndex(instanceIndex, lanePosition)], 0, sizeLeft);
297
+ }
298
+ }
299
+
300
+ void KeccakP1600times2_ExtractBytes(const void *states, unsigned int instanceIndex, unsigned char *data, unsigned int offset, unsigned int length)
301
+ {
302
+ unsigned int sizeLeft = length;
303
+ unsigned int lanePosition = offset/SnP_laneLengthInBytes;
304
+ unsigned int offsetInLane = offset%SnP_laneLengthInBytes;
305
+ unsigned char *curData = data;
306
+ const UINT64 *statesAsLanes = states;
307
+
308
+ if ((sizeLeft > 0) && (offsetInLane != 0)) {
309
+ unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
310
+ if (bytesInLane > sizeLeft)
311
+ bytesInLane = sizeLeft;
312
+ memcpy( curData, ((unsigned char *)&statesAsLanes[laneIndex(instanceIndex, lanePosition)]) + offsetInLane, bytesInLane);
313
+ sizeLeft -= bytesInLane;
314
+ lanePosition++;
315
+ curData += bytesInLane;
316
+ }
317
+
318
+ while(sizeLeft >= SnP_laneLengthInBytes) {
319
+ *(UINT64*)curData = statesAsLanes[laneIndex(instanceIndex, lanePosition)];
320
+ sizeLeft -= SnP_laneLengthInBytes;
321
+ lanePosition++;
322
+ curData += SnP_laneLengthInBytes;
323
+ }
324
+
325
+ if (sizeLeft > 0) {
326
+ memcpy( curData, &statesAsLanes[laneIndex(instanceIndex, lanePosition)], sizeLeft);
327
+ }
328
+ }
329
+
330
+ void KeccakP1600times2_ExtractLanesAll(const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
331
+ {
332
+ const V128 *stateAsLanes128 = states;
333
+ const V512 *stateAsLanes512 = states;
334
+ UINT64 *dataAsLanes = (UINT64 *)data;
335
+ unsigned int i;
336
+ V128 index128 = LOAD4_32(0, 0, 1*laneOffset, 0*laneOffset);
337
+ V256 index512 = LOAD8_32(1*laneOffset+3, 0*laneOffset+3, 1*laneOffset+2, 0*laneOffset+2, 1*laneOffset+1, 0*laneOffset+1, 1*laneOffset, 0*laneOffset);
338
+
339
+ #define Extr1( argIndex ) STORE_SCATTER2_64(dataAsLanes+argIndex, index128, stateAsLanes128[argIndex])
340
+ #define Extr4( argIndex ) STORE_SCATTER8_64(dataAsLanes+argIndex, index512, stateAsLanes512[argIndex/4])
341
+ if ( laneCount >= 16 ) {
342
+ Extr4( 0 );
343
+ Extr4( 4 );
344
+ Extr4( 8 );
345
+ Extr4( 12 );
346
+ if ( laneCount >= 20 ) {
347
+ Extr4( 16 );
348
+ for(i=20; i<laneCount; i++)
349
+ Extr1( i );
350
+ }
351
+ else {
352
+ for(i=16; i<laneCount; i++)
353
+ Extr1( i );
354
+ }
355
+ }
356
+ else {
357
+ for(i=0; i<laneCount; i++)
358
+ Extr1( i );
359
+ }
360
+ #undef Extr1
361
+ #undef Extr4
362
+ }
363
+
364
+ void KeccakP1600times2_ExtractAndAddBytes(const void *states, unsigned int instanceIndex, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
365
+ {
366
+ unsigned int sizeLeft = length;
367
+ unsigned int lanePosition = offset/SnP_laneLengthInBytes;
368
+ unsigned int offsetInLane = offset%SnP_laneLengthInBytes;
369
+ const unsigned char *curInput = input;
370
+ unsigned char *curOutput = output;
371
+ const UINT64 *statesAsLanes = states;
372
+
373
+ if ((sizeLeft > 0) && (offsetInLane != 0)) {
374
+ unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
375
+ UINT64 lane = statesAsLanes[laneIndex(instanceIndex, lanePosition)] >> (8 * offsetInLane);
376
+ if (bytesInLane > sizeLeft)
377
+ bytesInLane = sizeLeft;
378
+ sizeLeft -= bytesInLane;
379
+ do {
380
+ *(curOutput++) = *(curInput++) ^ (unsigned char)lane;
381
+ lane >>= 8;
382
+ } while ( --bytesInLane != 0);
383
+ lanePosition++;
384
+ }
385
+
386
+ while(sizeLeft >= SnP_laneLengthInBytes) {
387
+ *((UINT64*)curOutput) = *((UINT64*)curInput) ^ statesAsLanes[laneIndex(instanceIndex, lanePosition)];
388
+ sizeLeft -= SnP_laneLengthInBytes;
389
+ lanePosition++;
390
+ curInput += SnP_laneLengthInBytes;
391
+ curOutput += SnP_laneLengthInBytes;
392
+ }
393
+
394
+ if (sizeLeft != 0) {
395
+ UINT64 lane = statesAsLanes[laneIndex(instanceIndex, lanePosition)];
396
+ do {
397
+ *(curOutput++) = *(curInput++) ^ (unsigned char)lane;
398
+ lane >>= 8;
399
+ } while ( --sizeLeft != 0);
400
+ }
401
+ }
402
+
403
+ void KeccakP1600times2_ExtractAndAddLanesAll(const void *states, const unsigned char *input, unsigned char *output, unsigned int laneCount, unsigned int laneOffset)
404
+ {
405
+ const V128 *stateAsLanes128 = states;
406
+ const V512 *stateAsLanes512 = states;
407
+ const UINT64 *inAsLanes = (const UINT64 *)input;
408
+ UINT64 *outAsLanes = (UINT64 *)output;
409
+ unsigned int i;
410
+ V128 index128 = LOAD4_32(0, 0, 1*laneOffset, 0*laneOffset);
411
+ V256 index512 = LOAD8_32(1*laneOffset+3, 0*laneOffset+3, 1*laneOffset+2, 0*laneOffset+2, 1*laneOffset+1, 0*laneOffset+1, 1*laneOffset, 0*laneOffset);
412
+
413
+ #define ExtrAdd1( argIndex ) STORE_SCATTER2_64(outAsLanes+argIndex, index128, XOR(stateAsLanes128[argIndex], LOAD_GATHER2_64(index128, inAsLanes+argIndex)))
414
+ #define ExtrAdd4( argIndex ) STORE_SCATTER8_64(outAsLanes+argIndex, index512, XOR512(stateAsLanes512[argIndex/4], LOAD_GATHER8_64(index512, inAsLanes+argIndex)))
415
+ if ( laneCount >= 16 ) {
416
+ ExtrAdd4( 0 );
417
+ ExtrAdd4( 4 );
418
+ ExtrAdd4( 8 );
419
+ ExtrAdd4( 12 );
420
+ if ( laneCount >= 20 ) {
421
+ ExtrAdd4( 16 );
422
+ for(i=20; i<laneCount; i++)
423
+ ExtrAdd1( i );
424
+ }
425
+ else {
426
+ for(i=16; i<laneCount; i++)
427
+ ExtrAdd1( i );
428
+ }
429
+ }
430
+ else {
431
+ for(i=0; i<laneCount; i++)
432
+ ExtrAdd1( i );
433
+ }
434
+ #undef ExtrAdd1
435
+ #undef ExtrAdd4
436
+
437
+ }
438
+
439
+ static ALIGN(KeccakP1600times2_statesAlignment) const UINT64 KeccakP1600RoundConstants[24] = {
440
+ 0x0000000000000001ULL,
441
+ 0x0000000000008082ULL,
442
+ 0x800000000000808aULL,
443
+ 0x8000000080008000ULL,
444
+ 0x000000000000808bULL,
445
+ 0x0000000080000001ULL,
446
+ 0x8000000080008081ULL,
447
+ 0x8000000000008009ULL,
448
+ 0x000000000000008aULL,
449
+ 0x0000000000000088ULL,
450
+ 0x0000000080008009ULL,
451
+ 0x000000008000000aULL,
452
+ 0x000000008000808bULL,
453
+ 0x800000000000008bULL,
454
+ 0x8000000000008089ULL,
455
+ 0x8000000000008003ULL,
456
+ 0x8000000000008002ULL,
457
+ 0x8000000000000080ULL,
458
+ 0x000000000000800aULL,
459
+ 0x800000008000000aULL,
460
+ 0x8000000080008081ULL,
461
+ 0x8000000000008080ULL,
462
+ 0x0000000080000001ULL,
463
+ 0x8000000080008008ULL};
464
+
465
+ #define KeccakP_DeclareVars \
466
+ V128 _Ba, _Be, _Bi, _Bo, _Bu; \
467
+ V128 _Da, _De, _Di, _Do, _Du; \
468
+ V128 _ba, _be, _bi, _bo, _bu; \
469
+ V128 _ga, _ge, _gi, _go, _gu; \
470
+ V128 _ka, _ke, _ki, _ko, _ku; \
471
+ V128 _ma, _me, _mi, _mo, _mu; \
472
+ V128 _sa, _se, _si, _so, _su
473
+
474
+ #define KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Bb1, _Bb2, _Bb3, _Bb4, _Bb5, _Rr1, _Rr2, _Rr3, _Rr4, _Rr5 ) \
475
+ _Bb1 = XOR(_L1, _Da); \
476
+ _Bb2 = XOR(_L2, _De); \
477
+ _Bb3 = XOR(_L3, _Di); \
478
+ _Bb4 = XOR(_L4, _Do); \
479
+ _Bb5 = XOR(_L5, _Du); \
480
+ if (_Rr1 != 0) _Bb1 = ROL(_Bb1, _Rr1); \
481
+ _Bb2 = ROL(_Bb2, _Rr2); \
482
+ _Bb3 = ROL(_Bb3, _Rr3); \
483
+ _Bb4 = ROL(_Bb4, _Rr4); \
484
+ _Bb5 = ROL(_Bb5, _Rr5); \
485
+ _L1 = Chi( _Ba, _Be, _Bi); \
486
+ _L2 = Chi( _Be, _Bi, _Bo); \
487
+ _L3 = Chi( _Bi, _Bo, _Bu); \
488
+ _L4 = Chi( _Bo, _Bu, _Ba); \
489
+ _L5 = Chi( _Bu, _Ba, _Be);
490
+
491
+ #define KeccakP_ThetaRhoPiChiIota0( _L1, _L2, _L3, _L4, _L5, _rc ) \
492
+ _Ba = XOR5( _ba, _ga, _ka, _ma, _sa ); /* Theta effect */ \
493
+ _Be = XOR5( _be, _ge, _ke, _me, _se ); \
494
+ _Bi = XOR5( _bi, _gi, _ki, _mi, _si ); \
495
+ _Bo = XOR5( _bo, _go, _ko, _mo, _so ); \
496
+ _Bu = XOR5( _bu, _gu, _ku, _mu, _su ); \
497
+ _Da = ROL( _Be, 1 ); \
498
+ _De = ROL( _Bi, 1 ); \
499
+ _Di = ROL( _Bo, 1 ); \
500
+ _Do = ROL( _Bu, 1 ); \
501
+ _Du = ROL( _Ba, 1 ); \
502
+ _Da = XOR( _Da, _Bu ); \
503
+ _De = XOR( _De, _Ba ); \
504
+ _Di = XOR( _Di, _Be ); \
505
+ _Do = XOR( _Do, _Bi ); \
506
+ _Du = XOR( _Du, _Bo ); \
507
+ KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Ba, _Be, _Bi, _Bo, _Bu, 0, 44, 43, 21, 14 ); \
508
+ _L1 = XOR(_L1, _rc) /* Iota */
509
+
510
+ #define KeccakP_ThetaRhoPiChi1( _L1, _L2, _L3, _L4, _L5 ) \
511
+ KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Bi, _Bo, _Bu, _Ba, _Be, 3, 45, 61, 28, 20 )
512
+
513
+ #define KeccakP_ThetaRhoPiChi2( _L1, _L2, _L3, _L4, _L5 ) \
514
+ KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Bu, _Ba, _Be, _Bi, _Bo, 18, 1, 6, 25, 8 )
515
+
516
+ #define KeccakP_ThetaRhoPiChi3( _L1, _L2, _L3, _L4, _L5 ) \
517
+ KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Be, _Bi, _Bo, _Bu, _Ba, 36, 10, 15, 56, 27 )
518
+
519
+ #define KeccakP_ThetaRhoPiChi4( _L1, _L2, _L3, _L4, _L5 ) \
520
+ KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Bo, _Bu, _Ba, _Be, _Bi, 41, 2, 62, 55, 39 )
521
+
522
+ #define KeccakP_4rounds( i ) \
523
+ KeccakP_ThetaRhoPiChiIota0(_ba, _ge, _ki, _mo, _su, CONST128_64(KeccakP1600RoundConstants[i]) ); \
524
+ KeccakP_ThetaRhoPiChi1( _ka, _me, _si, _bo, _gu ); \
525
+ KeccakP_ThetaRhoPiChi2( _sa, _be, _gi, _ko, _mu ); \
526
+ KeccakP_ThetaRhoPiChi3( _ga, _ke, _mi, _so, _bu ); \
527
+ KeccakP_ThetaRhoPiChi4( _ma, _se, _bi, _go, _ku ); \
528
+ \
529
+ KeccakP_ThetaRhoPiChiIota0(_ba, _me, _gi, _so, _ku, CONST128_64(KeccakP1600RoundConstants[i+1]) ); \
530
+ KeccakP_ThetaRhoPiChi1( _sa, _ke, _bi, _mo, _gu ); \
531
+ KeccakP_ThetaRhoPiChi2( _ma, _ge, _si, _ko, _bu ); \
532
+ KeccakP_ThetaRhoPiChi3( _ka, _be, _mi, _go, _su ); \
533
+ KeccakP_ThetaRhoPiChi4( _ga, _se, _ki, _bo, _mu ); \
534
+ \
535
+ KeccakP_ThetaRhoPiChiIota0(_ba, _ke, _si, _go, _mu, CONST128_64(KeccakP1600RoundConstants[i+2]) ); \
536
+ KeccakP_ThetaRhoPiChi1( _ma, _be, _ki, _so, _gu ); \
537
+ KeccakP_ThetaRhoPiChi2( _ga, _me, _bi, _ko, _su ); \
538
+ KeccakP_ThetaRhoPiChi3( _sa, _ge, _mi, _bo, _ku ); \
539
+ KeccakP_ThetaRhoPiChi4( _ka, _se, _gi, _mo, _bu ); \
540
+ \
541
+ KeccakP_ThetaRhoPiChiIota0(_ba, _be, _bi, _bo, _bu, CONST128_64(KeccakP1600RoundConstants[i+3]) ); \
542
+ KeccakP_ThetaRhoPiChi1( _ga, _ge, _gi, _go, _gu ); \
543
+ KeccakP_ThetaRhoPiChi2( _ka, _ke, _ki, _ko, _ku ); \
544
+ KeccakP_ThetaRhoPiChi3( _ma, _me, _mi, _mo, _mu ); \
545
+ KeccakP_ThetaRhoPiChi4( _sa, _se, _si, _so, _su )
546
+
547
+ #define KeccakP_2rounds( i ) \
548
+ KeccakP_ThetaRhoPiChiIota0(_ba, _ke, _si, _go, _mu, CONST128_64(KeccakP1600RoundConstants[i]) ); \
549
+ KeccakP_ThetaRhoPiChi1( _ma, _be, _ki, _so, _gu ); \
550
+ KeccakP_ThetaRhoPiChi2( _ga, _me, _bi, _ko, _su ); \
551
+ KeccakP_ThetaRhoPiChi3( _sa, _ge, _mi, _bo, _ku ); \
552
+ KeccakP_ThetaRhoPiChi4( _ka, _se, _gi, _mo, _bu ); \
553
+ \
554
+ KeccakP_ThetaRhoPiChiIota0(_ba, _be, _bi, _bo, _bu, CONST128_64(KeccakP1600RoundConstants[i+1]) ); \
555
+ KeccakP_ThetaRhoPiChi1( _ga, _ge, _gi, _go, _gu ); \
556
+ KeccakP_ThetaRhoPiChi2( _ka, _ke, _ki, _ko, _ku ); \
557
+ KeccakP_ThetaRhoPiChi3( _ma, _me, _mi, _mo, _mu ); \
558
+ KeccakP_ThetaRhoPiChi4( _sa, _se, _si, _so, _su )
559
+
560
+ #ifdef KeccakP1600times2_fullUnrolling
561
+
562
+ #define rounds12 \
563
+ KeccakP_4rounds( 12 ); \
564
+ KeccakP_4rounds( 16 ); \
565
+ KeccakP_4rounds( 20 )
566
+
567
+ #define rounds24 \
568
+ KeccakP_4rounds( 0 ); \
569
+ KeccakP_4rounds( 4 ); \
570
+ KeccakP_4rounds( 8 ); \
571
+ KeccakP_4rounds( 12 ); \
572
+ KeccakP_4rounds( 16 ); \
573
+ KeccakP_4rounds( 20 )
574
+
575
+ #elif (KeccakP1600times2_unrolling == 4)
576
+
577
+ #define rounds12 \
578
+ i = 12; \
579
+ do { \
580
+ KeccakP_4rounds( i ); \
581
+ } while( (i += 4) < 24 )
582
+
583
+ #define rounds24 \
584
+ i = 0; \
585
+ do { \
586
+ KeccakP_4rounds( i ); \
587
+ } while( (i += 4) < 24 )
588
+
589
+ #elif (KeccakP1600times2_unrolling == 12)
590
+
591
+ #define rounds12 \
592
+ KeccakP_4rounds( 12 ); \
593
+ KeccakP_4rounds( 16 ); \
594
+ KeccakP_4rounds( 20 )
595
+
596
+ #define rounds24 \
597
+ i = 0; \
598
+ do { \
599
+ KeccakP_4rounds( i ); \
600
+ KeccakP_4rounds( i+4 ); \
601
+ KeccakP_4rounds( i+8 ); \
602
+ } while( (i += 12) < 24 )
603
+
604
+ #else
605
+ #error "Unrolling is not correctly specified!"
606
+ #endif
607
+
608
+ #define copyFromState2rounds(pState) \
609
+ _ba = pState[ 0]; \
610
+ _be = pState[16]; /* me */ \
611
+ _bi = pState[ 7]; /* gi */ \
612
+ _bo = pState[23]; /* so */ \
613
+ _bu = pState[14]; /* ku */ \
614
+ _ga = pState[20]; /* sa */ \
615
+ _ge = pState[11]; /* ke */ \
616
+ _gi = pState[ 2]; /* bi */ \
617
+ _go = pState[18]; /* mo */ \
618
+ _gu = pState[ 9]; \
619
+ _ka = pState[15]; /* ma */ \
620
+ _ke = pState[ 6]; /* ge */ \
621
+ _ki = pState[22]; /* si */ \
622
+ _ko = pState[13]; \
623
+ _ku = pState[ 4]; /* bu */ \
624
+ _ma = pState[10]; /* ka */ \
625
+ _me = pState[ 1]; /* be */ \
626
+ _mi = pState[17]; \
627
+ _mo = pState[ 8]; /* go */ \
628
+ _mu = pState[24]; /* su */ \
629
+ _sa = pState[ 5]; /* ga */ \
630
+ _se = pState[21]; \
631
+ _si = pState[12]; /* ki */ \
632
+ _so = pState[ 3]; /* bo */ \
633
+ _su = pState[19] /* mu */
634
+
635
+ #define copyFromState(pState) \
636
+ _ba = pState[ 0]; \
637
+ _be = pState[ 1]; \
638
+ _bi = pState[ 2]; \
639
+ _bo = pState[ 3]; \
640
+ _bu = pState[ 4]; \
641
+ _ga = pState[ 5]; \
642
+ _ge = pState[ 6]; \
643
+ _gi = pState[ 7]; \
644
+ _go = pState[ 8]; \
645
+ _gu = pState[ 9]; \
646
+ _ka = pState[10]; \
647
+ _ke = pState[11]; \
648
+ _ki = pState[12]; \
649
+ _ko = pState[13]; \
650
+ _ku = pState[14]; \
651
+ _ma = pState[15]; \
652
+ _me = pState[16]; \
653
+ _mi = pState[17]; \
654
+ _mo = pState[18]; \
655
+ _mu = pState[19]; \
656
+ _sa = pState[20]; \
657
+ _se = pState[21]; \
658
+ _si = pState[22]; \
659
+ _so = pState[23]; \
660
+ _su = pState[24]
661
+
662
+ #define copyToState(pState) \
663
+ pState[ 0] = _ba; \
664
+ pState[ 1] = _be; \
665
+ pState[ 2] = _bi; \
666
+ pState[ 3] = _bo; \
667
+ pState[ 4] = _bu; \
668
+ pState[ 5] = _ga; \
669
+ pState[ 6] = _ge; \
670
+ pState[ 7] = _gi; \
671
+ pState[ 8] = _go; \
672
+ pState[ 9] = _gu; \
673
+ pState[10] = _ka; \
674
+ pState[11] = _ke; \
675
+ pState[12] = _ki; \
676
+ pState[13] = _ko; \
677
+ pState[14] = _ku; \
678
+ pState[15] = _ma; \
679
+ pState[16] = _me; \
680
+ pState[17] = _mi; \
681
+ pState[18] = _mo; \
682
+ pState[19] = _mu; \
683
+ pState[20] = _sa; \
684
+ pState[21] = _se; \
685
+ pState[22] = _si; \
686
+ pState[23] = _so; \
687
+ pState[24] = _su
688
+
689
+ void KeccakP1600times2_PermuteAll_24rounds(void *states)
690
+ {
691
+ V128 *statesAsLanes = states;
692
+ KeccakP_DeclareVars;
693
+ #ifndef KeccakP1600times2_fullUnrolling
694
+ unsigned int i;
695
+ #endif
696
+
697
+ copyFromState(statesAsLanes);
698
+ rounds24;
699
+ copyToState(statesAsLanes);
700
+ }
701
+
702
+ void KeccakP1600times2_PermuteAll_12rounds(void *states)
703
+ {
704
+ V128 *statesAsLanes = states;
705
+ KeccakP_DeclareVars;
706
+ #if (KeccakP1600times2_unrolling < 12)
707
+ unsigned int i;
708
+ #endif
709
+
710
+ copyFromState(statesAsLanes);
711
+ rounds12;
712
+ copyToState(statesAsLanes);
713
+ }
714
+
715
+ void KeccakP1600times2_PermuteAll_6rounds(void *states)
716
+ {
717
+ V128 *statesAsLanes = states;
718
+ KeccakP_DeclareVars;
719
+
720
+ copyFromState2rounds(statesAsLanes);
721
+ KeccakP_2rounds( 18 );
722
+ KeccakP_4rounds( 20 );
723
+ copyToState(statesAsLanes);
724
+ }
725
+
726
+ void KeccakP1600times2_PermuteAll_4rounds(void *states)
727
+ {
728
+ V128 *statesAsLanes = states;
729
+ KeccakP_DeclareVars;
730
+
731
+ copyFromState(statesAsLanes);
732
+ KeccakP_4rounds( 20 );
733
+ copyToState(statesAsLanes);
734
+ }
735
+
736
+ size_t KeccakF1600times2_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen)
737
+ {
738
+ size_t dataMinimumSize = (laneOffsetParallel*1 + laneCount)*8;
739
+
740
+ if (laneCount == 21) {
741
+ #ifndef KeccakP1600times2_fullUnrolling
742
+ unsigned int i;
743
+ #endif
744
+ const unsigned char *dataStart = data;
745
+ V128 *statesAsLanes = states;
746
+ const UINT64 *dataAsLanes = (const UINT64 *)data;
747
+ KeccakP_DeclareVars;
748
+ V128 index = LOAD4_32(0, 0, 1*laneOffsetParallel, 0*laneOffsetParallel);
749
+
750
+ copyFromState(statesAsLanes);
751
+ while(dataByteLen >= dataMinimumSize) {
752
+ #define Add_In( argLane, argIndex ) argLane = XOR(argLane, LOAD_GATHER2_64(index, dataAsLanes+argIndex))
753
+ Add_In( _ba, 0 );
754
+ Add_In( _be, 1 );
755
+ Add_In( _bi, 2 );
756
+ Add_In( _bo, 3 );
757
+ Add_In( _bu, 4 );
758
+ Add_In( _ga, 5 );
759
+ Add_In( _ge, 6 );
760
+ Add_In( _gi, 7 );
761
+ Add_In( _go, 8 );
762
+ Add_In( _gu, 9 );
763
+ Add_In( _ka, 10 );
764
+ Add_In( _ke, 11 );
765
+ Add_In( _ki, 12 );
766
+ Add_In( _ko, 13 );
767
+ Add_In( _ku, 14 );
768
+ Add_In( _ma, 15 );
769
+ Add_In( _me, 16 );
770
+ Add_In( _mi, 17 );
771
+ Add_In( _mo, 18 );
772
+ Add_In( _mu, 19 );
773
+ Add_In( _sa, 20 );
774
+ #undef Add_In
775
+ rounds24;
776
+ dataAsLanes += laneOffsetSerial;
777
+ dataByteLen -= laneOffsetSerial*8;
778
+ }
779
+ copyToState(statesAsLanes);
780
+ return (const unsigned char *)dataAsLanes - dataStart;
781
+ }
782
+ else {
783
+ const unsigned char *dataStart = data;
784
+
785
+ while(dataByteLen >= dataMinimumSize) {
786
+ KeccakP1600times2_AddLanesAll(states, data, laneCount, laneOffsetParallel);
787
+ KeccakP1600times2_PermuteAll_24rounds(states);
788
+ data += laneOffsetSerial*8;
789
+ dataByteLen -= laneOffsetSerial*8;
790
+ }
791
+ return data - dataStart;
792
+ }
793
+ }
794
+
795
+ size_t KeccakP1600times2_12rounds_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen)
796
+ {
797
+ size_t dataMinimumSize = (laneOffsetParallel*1 + laneCount)*8;
798
+
799
+ if (laneCount == 21) {
800
+ #if (KeccakP1600times2_unrolling < 12)
801
+ unsigned int i;
802
+ #endif
803
+ const unsigned char *dataStart = data;
804
+ V128 *statesAsLanes = states;
805
+ const UINT64 *dataAsLanes = (const UINT64 *)data;
806
+ KeccakP_DeclareVars;
807
+ V128 index = LOAD4_32(0, 0, 1*laneOffsetParallel, 0*laneOffsetParallel);
808
+
809
+ copyFromState(statesAsLanes);
810
+ while(dataByteLen >= dataMinimumSize) {
811
+ #define Add_In( argLane, argIndex ) argLane = XOR(argLane, LOAD_GATHER2_64(index, dataAsLanes+argIndex))
812
+ Add_In( _ba, 0 );
813
+ Add_In( _be, 1 );
814
+ Add_In( _bi, 2 );
815
+ Add_In( _bo, 3 );
816
+ Add_In( _bu, 4 );
817
+ Add_In( _ga, 5 );
818
+ Add_In( _ge, 6 );
819
+ Add_In( _gi, 7 );
820
+ Add_In( _go, 8 );
821
+ Add_In( _gu, 9 );
822
+ Add_In( _ka, 10 );
823
+ Add_In( _ke, 11 );
824
+ Add_In( _ki, 12 );
825
+ Add_In( _ko, 13 );
826
+ Add_In( _ku, 14 );
827
+ Add_In( _ma, 15 );
828
+ Add_In( _me, 16 );
829
+ Add_In( _mi, 17 );
830
+ Add_In( _mo, 18 );
831
+ Add_In( _mu, 19 );
832
+ Add_In( _sa, 20 );
833
+ #undef Add_In
834
+ rounds12;
835
+ dataAsLanes += laneOffsetSerial;
836
+ dataByteLen -= laneOffsetSerial*8;
837
+ }
838
+ copyToState(statesAsLanes);
839
+ return (const unsigned char *)dataAsLanes - dataStart;
840
+ }
841
+ else {
842
+ const unsigned char *dataStart = data;
843
+
844
+ while(dataByteLen >= dataMinimumSize) {
845
+ KeccakP1600times2_AddLanesAll(states, data, laneCount, laneOffsetParallel);
846
+ KeccakP1600times2_PermuteAll_12rounds(states);
847
+ data += laneOffsetSerial*8;
848
+ dataByteLen -= laneOffsetSerial*8;
849
+ }
850
+ return data - dataStart;
851
+ }
852
+ }