digest-kangarootwelve 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (305) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +51 -11
  3. data/Rakefile +2 -2
  4. data/digest-kangarootwelve.gemspec +322 -42
  5. data/ext/digest/kangarootwelve/ext.c +1 -1
  6. data/ext/digest/kangarootwelve/extconf.rb +13 -1
  7. data/ext/digest/kangarootwelve/keccak/armv6m/KangarooTwelve.link.c +1 -0
  8. data/ext/digest/kangarootwelve/keccak/armv6m/KeccakDuplexWidth1600.link.c +1 -0
  9. data/ext/digest/kangarootwelve/keccak/armv6m/KeccakP-1600-SnP.h +36 -0
  10. data/ext/digest/kangarootwelve/{KeccakP-1600-times2-SnP.h → keccak/armv6m/KeccakP-1600-times2-SnP.h} +10 -10
  11. data/ext/digest/kangarootwelve/{KeccakP-1600-times2-on1.c → keccak/armv6m/KeccakP-1600-times2-on1.c} +13 -7
  12. data/ext/digest/kangarootwelve/{KeccakP-1600-times4-SnP.h → keccak/armv6m/KeccakP-1600-times4-SnP.h} +10 -10
  13. data/ext/digest/kangarootwelve/{KeccakP-1600-times4-on1.c → keccak/armv6m/KeccakP-1600-times4-on1.c} +13 -7
  14. data/ext/digest/kangarootwelve/{KeccakP-1600-times8-SnP.h → keccak/armv6m/KeccakP-1600-times8-SnP.h} +10 -10
  15. data/ext/digest/kangarootwelve/{KeccakP-1600-times8-on1.c → keccak/armv6m/KeccakP-1600-times8-on1.c} +13 -7
  16. data/ext/digest/kangarootwelve/keccak/armv6m/KeccakP-1600-u2-32bi-armv6m-le-gcc.s +1334 -0
  17. data/ext/digest/kangarootwelve/keccak/armv6m/KeccakSpongeWidth1600.link.c +1 -0
  18. data/ext/digest/kangarootwelve/{PlSnP-Fallback.inc → keccak/armv6m/PlSnP-Fallback.inc} +11 -7
  19. data/ext/digest/kangarootwelve/keccak/armv6m/ext.link.c +1 -0
  20. data/ext/digest/kangarootwelve/keccak/armv7a/KangarooTwelve.link.c +1 -0
  21. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakDuplexWidth1600.link.c +1 -0
  22. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-SnP.h +37 -0
  23. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-armv7a-le-neon-gcc.s +826 -0
  24. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-inplace-pl2-armv7a-neon-le-gcc.s +1245 -0
  25. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times2-SnP.h +38 -0
  26. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times4-SnP.h +45 -0
  27. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times4-on2.c +38 -0
  28. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times8-SnP.h +45 -0
  29. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times8-on2.c +38 -0
  30. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakSpongeWidth1600.link.c +1 -0
  31. data/ext/digest/kangarootwelve/keccak/armv7a/PlSnP-Fallback.inc +287 -0
  32. data/ext/digest/kangarootwelve/keccak/armv7a/ext.link.c +1 -0
  33. data/ext/digest/kangarootwelve/keccak/armv7m/KangarooTwelve.link.c +1 -0
  34. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakDuplexWidth1600.link.c +1 -0
  35. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-SnP.h +36 -0
  36. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-inplace-32bi-armv7m-le-gcc.s +1170 -0
  37. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times2-SnP.h +45 -0
  38. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times2-on1.c +37 -0
  39. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times4-SnP.h +45 -0
  40. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times4-on1.c +37 -0
  41. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times8-SnP.h +45 -0
  42. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times8-on1.c +37 -0
  43. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakSpongeWidth1600.link.c +1 -0
  44. data/ext/digest/kangarootwelve/keccak/armv7m/PlSnP-Fallback.inc +287 -0
  45. data/ext/digest/kangarootwelve/keccak/armv7m/ext.link.c +1 -0
  46. data/ext/digest/kangarootwelve/keccak/armv8a/KangarooTwelve.link.c +1 -0
  47. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakDuplexWidth1600.link.c +1 -0
  48. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-SnP.h +28 -0
  49. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-armv8a-neon.s +537 -0
  50. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times2-SnP.h +45 -0
  51. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times2-on1.c +37 -0
  52. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times4-SnP.h +45 -0
  53. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times4-on1.c +37 -0
  54. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times8-SnP.h +45 -0
  55. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times8-on1.c +37 -0
  56. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakSpongeWidth1600.link.c +1 -0
  57. data/ext/digest/kangarootwelve/keccak/armv8a/PlSnP-Fallback.inc +287 -0
  58. data/ext/digest/kangarootwelve/keccak/armv8a/ext.link.c +1 -0
  59. data/ext/digest/kangarootwelve/keccak/asmx86-64/KangarooTwelve.link.c +1 -0
  60. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakDuplexWidth1600.link.c +1 -0
  61. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-SnP.h +37 -0
  62. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times2-SnP.h +45 -0
  63. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times2-on1.c +37 -0
  64. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times4-SnP.h +45 -0
  65. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times4-on1.c +37 -0
  66. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times8-SnP.h +45 -0
  67. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times8-on1.c +37 -0
  68. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-x86-64-gas.s +1190 -0
  69. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakSpongeWidth1600.link.c +1 -0
  70. data/ext/digest/kangarootwelve/keccak/asmx86-64/PlSnP-Fallback.inc +287 -0
  71. data/ext/digest/kangarootwelve/keccak/asmx86-64/ext.link.c +1 -0
  72. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KangarooTwelve.link.c +1 -0
  73. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakDuplexWidth1600.link.c +1 -0
  74. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-SnP.h +37 -0
  75. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times2-SnP.h +45 -0
  76. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times2-on1.c +37 -0
  77. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times4-SnP.h +45 -0
  78. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times4-on1.c +37 -0
  79. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times8-SnP.h +45 -0
  80. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times8-on1.c +37 -0
  81. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-x86-64-shld-gas.s +1190 -0
  82. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakSpongeWidth1600.link.c +1 -0
  83. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/PlSnP-Fallback.inc +287 -0
  84. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/ext.link.c +1 -0
  85. data/ext/digest/kangarootwelve/keccak/avr8/KangarooTwelve.link.c +1 -0
  86. data/ext/digest/kangarootwelve/keccak/avr8/KeccakDuplexWidth1600.link.c +1 -0
  87. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-SnP.h +37 -0
  88. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-avr8-fast.s +1116 -0
  89. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times2-SnP.h +45 -0
  90. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times2-on1.c +37 -0
  91. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times4-SnP.h +45 -0
  92. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times4-on1.c +37 -0
  93. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times8-SnP.h +45 -0
  94. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times8-on1.c +37 -0
  95. data/ext/digest/kangarootwelve/keccak/avr8/KeccakSpongeWidth1600.link.c +1 -0
  96. data/ext/digest/kangarootwelve/keccak/avr8/PlSnP-Fallback.inc +287 -0
  97. data/ext/digest/kangarootwelve/keccak/avr8/ext.link.c +1 -0
  98. data/ext/digest/kangarootwelve/keccak/bulldozer/KangarooTwelve.link.c +1 -0
  99. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakDuplexWidth1600.link.c +1 -0
  100. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-SnP.h +39 -0
  101. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-XOP-config.h +6 -0
  102. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-XOP.c +473 -0
  103. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times2-SIMD128.c +954 -0
  104. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times2-SnP.h +47 -0
  105. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times4-SnP.h +45 -0
  106. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times4-on2.c +38 -0
  107. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times8-SnP.h +45 -0
  108. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times8-on2.c +38 -0
  109. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-unrolling.macros +302 -0
  110. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakSpongeWidth1600.link.c +1 -0
  111. data/ext/digest/kangarootwelve/keccak/bulldozer/PlSnP-Fallback.inc +287 -0
  112. data/ext/digest/kangarootwelve/keccak/bulldozer/SIMD128-config.h +9 -0
  113. data/ext/digest/kangarootwelve/{SnP-Relaned.h → keccak/bulldozer/SnP-Relaned.h} +13 -7
  114. data/ext/digest/kangarootwelve/keccak/bulldozer/ext.link.c +1 -0
  115. data/ext/digest/kangarootwelve/{KangarooTwelve.c → keccak/common/KangarooTwelve.c} +6 -10
  116. data/ext/digest/kangarootwelve/{KangarooTwelve.h → keccak/common/KangarooTwelve.h} +3 -7
  117. data/ext/digest/kangarootwelve/keccak/common/KeccakDuplex-common.h +37 -0
  118. data/ext/digest/kangarootwelve/keccak/common/KeccakDuplex.inc +192 -0
  119. data/ext/digest/kangarootwelve/keccak/common/KeccakDuplexWidth1600.c +34 -0
  120. data/ext/digest/kangarootwelve/keccak/common/KeccakDuplexWidth1600.h +25 -0
  121. data/ext/digest/kangarootwelve/{KeccakSponge-common.h → keccak/common/KeccakSponge-common.h} +5 -7
  122. data/ext/digest/kangarootwelve/{KeccakSponge.inc → keccak/common/KeccakSponge.inc} +6 -8
  123. data/ext/digest/kangarootwelve/{KeccakSpongeWidth1600.c → keccak/common/KeccakSpongeWidth1600.c} +6 -8
  124. data/ext/digest/kangarootwelve/{KeccakSpongeWidth1600.h → keccak/common/KeccakSpongeWidth1600.h} +5 -7
  125. data/ext/digest/kangarootwelve/{Phases.h → keccak/common/Phases.h} +3 -7
  126. data/ext/digest/kangarootwelve/{align.h → keccak/common/align.h} +5 -7
  127. data/ext/digest/kangarootwelve/{brg_endian.h → keccak/common/brg_endian.h} +0 -0
  128. data/ext/digest/kangarootwelve/keccak/compact/KangarooTwelve.link.c +1 -0
  129. data/ext/digest/kangarootwelve/keccak/compact/KeccakDuplexWidth1600.link.c +1 -0
  130. data/ext/digest/kangarootwelve/{KeccakP-1600-SnP.h → keccak/compact/KeccakP-1600-SnP.h} +7 -10
  131. data/ext/digest/kangarootwelve/{KeccakP-1600-compact64.c → keccak/compact/KeccakP-1600-compact64.c} +11 -7
  132. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times2-SnP.h +45 -0
  133. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times2-on1.c +37 -0
  134. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times4-SnP.h +45 -0
  135. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times4-on1.c +37 -0
  136. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times8-SnP.h +45 -0
  137. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times8-on1.c +37 -0
  138. data/ext/digest/kangarootwelve/keccak/compact/KeccakSpongeWidth1600.link.c +1 -0
  139. data/ext/digest/kangarootwelve/keccak/compact/PlSnP-Fallback.inc +287 -0
  140. data/ext/digest/kangarootwelve/keccak/compact/SnP-Relaned.h +140 -0
  141. data/ext/digest/kangarootwelve/keccak/compact/ext.link.c +1 -0
  142. data/ext/digest/kangarootwelve/keccak/generic32/KangarooTwelve.link.c +1 -0
  143. data/ext/digest/kangarootwelve/keccak/generic32/KeccakDuplexWidth1600.link.c +1 -0
  144. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-SnP.h +38 -0
  145. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-inplace32BI.c +1162 -0
  146. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times2-SnP.h +45 -0
  147. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times2-on1.c +37 -0
  148. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times4-SnP.h +45 -0
  149. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times4-on1.c +37 -0
  150. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times8-SnP.h +45 -0
  151. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times8-on1.c +37 -0
  152. data/ext/digest/kangarootwelve/keccak/generic32/KeccakSpongeWidth1600.link.c +1 -0
  153. data/ext/digest/kangarootwelve/keccak/generic32/PlSnP-Fallback.inc +287 -0
  154. data/ext/digest/kangarootwelve/keccak/generic32/SnP-Relaned.h +140 -0
  155. data/ext/digest/kangarootwelve/keccak/generic32/ext.link.c +1 -0
  156. data/ext/digest/kangarootwelve/keccak/generic32lc/KangarooTwelve.link.c +1 -0
  157. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakDuplexWidth1600.link.c +1 -0
  158. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-SnP.h +38 -0
  159. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-inplace32BI.c +1162 -0
  160. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times2-SnP.h +45 -0
  161. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times2-on1.c +37 -0
  162. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times4-SnP.h +45 -0
  163. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times4-on1.c +37 -0
  164. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times8-SnP.h +45 -0
  165. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times8-on1.c +37 -0
  166. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakSpongeWidth1600.link.c +1 -0
  167. data/ext/digest/kangarootwelve/keccak/generic32lc/PlSnP-Fallback.inc +287 -0
  168. data/ext/digest/kangarootwelve/keccak/generic32lc/SnP-Relaned.h +140 -0
  169. data/ext/digest/kangarootwelve/keccak/generic32lc/ext.link.c +1 -0
  170. data/ext/digest/kangarootwelve/keccak/generic64/KangarooTwelve.link.c +1 -0
  171. data/ext/digest/kangarootwelve/keccak/generic64/KeccakDuplexWidth1600.link.c +1 -0
  172. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-64.macros +2195 -0
  173. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-SnP.h +49 -0
  174. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-opt64-config.h +6 -0
  175. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-opt64.c +541 -0
  176. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times2-SnP.h +45 -0
  177. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times2-on1.c +37 -0
  178. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times4-SnP.h +45 -0
  179. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times4-on1.c +37 -0
  180. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times8-SnP.h +45 -0
  181. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times8-on1.c +37 -0
  182. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-unrolling.macros +302 -0
  183. data/ext/digest/kangarootwelve/keccak/generic64/KeccakSpongeWidth1600.link.c +1 -0
  184. data/ext/digest/kangarootwelve/keccak/generic64/PlSnP-Fallback.inc +287 -0
  185. data/ext/digest/kangarootwelve/keccak/generic64/SnP-Relaned.h +140 -0
  186. data/ext/digest/kangarootwelve/keccak/generic64/ext.link.c +1 -0
  187. data/ext/digest/kangarootwelve/keccak/generic64lc/KangarooTwelve.link.c +1 -0
  188. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakDuplexWidth1600.link.c +1 -0
  189. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-64.macros +2195 -0
  190. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-SnP.h +49 -0
  191. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-opt64-config.h +7 -0
  192. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-opt64.c +541 -0
  193. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times2-SnP.h +45 -0
  194. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times2-on1.c +37 -0
  195. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times4-SnP.h +45 -0
  196. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times4-on1.c +37 -0
  197. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times8-SnP.h +45 -0
  198. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times8-on1.c +37 -0
  199. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-unrolling.macros +302 -0
  200. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakSpongeWidth1600.link.c +1 -0
  201. data/ext/digest/kangarootwelve/keccak/generic64lc/PlSnP-Fallback.inc +287 -0
  202. data/ext/digest/kangarootwelve/keccak/generic64lc/SnP-Relaned.h +140 -0
  203. data/ext/digest/kangarootwelve/keccak/generic64lc/ext.link.c +1 -0
  204. data/ext/digest/kangarootwelve/keccak/haswell/KangarooTwelve.link.c +1 -0
  205. data/ext/digest/kangarootwelve/keccak/haswell/KeccakDuplexWidth1600.link.c +1 -0
  206. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-AVX2.s +993 -0
  207. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-SnP.h +41 -0
  208. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times2-SIMD128.c +954 -0
  209. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times2-SnP.h +47 -0
  210. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times4-SIMD256.c +1303 -0
  211. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times4-SnP.h +53 -0
  212. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times8-SnP.h +45 -0
  213. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times8-on4.c +38 -0
  214. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-unrolling.macros +302 -0
  215. data/ext/digest/kangarootwelve/keccak/haswell/KeccakSpongeWidth1600.link.c +1 -0
  216. data/ext/digest/kangarootwelve/keccak/haswell/PlSnP-Fallback.inc +287 -0
  217. data/ext/digest/kangarootwelve/keccak/haswell/SIMD128-config.h +8 -0
  218. data/ext/digest/kangarootwelve/keccak/haswell/SIMD256-config.h +7 -0
  219. data/ext/digest/kangarootwelve/keccak/haswell/ext.link.c +1 -0
  220. data/ext/digest/kangarootwelve/keccak/nehalem/KangarooTwelve.link.c +1 -0
  221. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakDuplexWidth1600.link.c +1 -0
  222. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-64.macros +2195 -0
  223. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-SnP.h +49 -0
  224. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-opt64-config.h +7 -0
  225. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-opt64.c +541 -0
  226. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times2-SIMD128.c +954 -0
  227. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times2-SnP.h +47 -0
  228. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times4-SnP.h +45 -0
  229. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times4-on2.c +38 -0
  230. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times8-SnP.h +45 -0
  231. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times8-on2.c +38 -0
  232. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-unrolling.macros +302 -0
  233. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakSpongeWidth1600.link.c +1 -0
  234. data/ext/digest/kangarootwelve/keccak/nehalem/PlSnP-Fallback.inc +287 -0
  235. data/ext/digest/kangarootwelve/keccak/nehalem/SIMD128-config.h +8 -0
  236. data/ext/digest/kangarootwelve/keccak/nehalem/SnP-Relaned.h +140 -0
  237. data/ext/digest/kangarootwelve/keccak/nehalem/ext.link.c +1 -0
  238. data/ext/digest/kangarootwelve/keccak/reference/KangarooTwelve.link.c +1 -0
  239. data/ext/digest/kangarootwelve/keccak/reference/KeccakDuplexWidth1600.link.c +1 -0
  240. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-SnP.h +41 -0
  241. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-reference.c +424 -0
  242. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-reference.h +20 -0
  243. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times2-SnP.h +45 -0
  244. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times2-on1.c +37 -0
  245. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times4-SnP.h +45 -0
  246. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times4-on1.c +37 -0
  247. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times8-SnP.h +45 -0
  248. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times8-on1.c +37 -0
  249. data/ext/digest/kangarootwelve/keccak/reference/KeccakSpongeWidth1600.link.c +1 -0
  250. data/ext/digest/kangarootwelve/keccak/reference/PlSnP-Fallback.inc +287 -0
  251. data/ext/digest/kangarootwelve/keccak/reference/displayIntermediateValues.c +176 -0
  252. data/ext/digest/kangarootwelve/keccak/reference/displayIntermediateValues.h +29 -0
  253. data/ext/digest/kangarootwelve/keccak/reference/ext.link.c +1 -0
  254. data/ext/digest/kangarootwelve/keccak/reference32bits/KangarooTwelve.link.c +1 -0
  255. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakDuplexWidth1600.link.c +1 -0
  256. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-SnP.h +41 -0
  257. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-reference.h +20 -0
  258. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-reference32BI.c +612 -0
  259. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times2-SnP.h +45 -0
  260. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times2-on1.c +37 -0
  261. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times4-SnP.h +45 -0
  262. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times4-on1.c +37 -0
  263. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times8-SnP.h +45 -0
  264. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times8-on1.c +37 -0
  265. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakSpongeWidth1600.link.c +1 -0
  266. data/ext/digest/kangarootwelve/keccak/reference32bits/PlSnP-Fallback.inc +287 -0
  267. data/ext/digest/kangarootwelve/keccak/reference32bits/displayIntermediateValues.c +176 -0
  268. data/ext/digest/kangarootwelve/keccak/reference32bits/displayIntermediateValues.h +29 -0
  269. data/ext/digest/kangarootwelve/keccak/reference32bits/ext.link.c +1 -0
  270. data/ext/digest/kangarootwelve/keccak/sandybridge/KangarooTwelve.link.c +1 -0
  271. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakDuplexWidth1600.link.c +1 -0
  272. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-64.macros +2195 -0
  273. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-SnP.h +49 -0
  274. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-opt64-config.h +8 -0
  275. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-opt64.c +541 -0
  276. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times2-SIMD128.c +954 -0
  277. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times2-SnP.h +47 -0
  278. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times4-SnP.h +45 -0
  279. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times4-on2.c +38 -0
  280. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times8-SnP.h +45 -0
  281. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times8-on2.c +38 -0
  282. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-unrolling.macros +302 -0
  283. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakSpongeWidth1600.link.c +1 -0
  284. data/ext/digest/kangarootwelve/keccak/sandybridge/PlSnP-Fallback.inc +287 -0
  285. data/ext/digest/kangarootwelve/keccak/sandybridge/SIMD128-config.h +8 -0
  286. data/ext/digest/kangarootwelve/keccak/sandybridge/SnP-Relaned.h +140 -0
  287. data/ext/digest/kangarootwelve/keccak/sandybridge/ext.link.c +1 -0
  288. data/ext/digest/kangarootwelve/keccak/skylakex/KangarooTwelve.link.c +1 -0
  289. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakDuplexWidth1600.link.c +1 -0
  290. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-AVX512-config.h +6 -0
  291. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-AVX512.c +621 -0
  292. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-SnP.h +42 -0
  293. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times2-SIMD512.c +852 -0
  294. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times2-SnP.h +49 -0
  295. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times4-SIMD512.c +883 -0
  296. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times4-SnP.h +49 -0
  297. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times8-SIMD512.c +1473 -0
  298. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times8-SnP.h +53 -0
  299. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakSpongeWidth1600.link.c +1 -0
  300. data/ext/digest/kangarootwelve/keccak/skylakex/SIMD512-2-config.h +7 -0
  301. data/ext/digest/kangarootwelve/keccak/skylakex/SIMD512-4-config.h +7 -0
  302. data/ext/digest/kangarootwelve/keccak/skylakex/SIMD512-config.h +7 -0
  303. data/ext/digest/kangarootwelve/keccak/skylakex/ext.link.c +1 -0
  304. data/lib/digest/kangarootwelve/version.rb +1 -1
  305. metadata +299 -21
@@ -0,0 +1,49 @@
1
+ /*
2
+ Implementation by Ronny Van Keer, hereby denoted as "the implementer".
3
+
4
+ For more information, feedback or questions, please refer to our website:
5
+ https://keccak.team/
6
+
7
+ To the extent possible under law, the implementer has waived all copyright
8
+ and related or neighboring rights to the source code in this file.
9
+ http://creativecommons.org/publicdomain/zero/1.0/
10
+
11
+ ---
12
+
13
+ Please refer to PlSnP-documentation.h for more details.
14
+ */
15
+
16
+ #ifndef _KeccakP_1600_times2_SnP_h_
17
+ #define _KeccakP_1600_times2_SnP_h_
18
+
19
+ #include "SIMD512-2-config.h"
20
+
21
+ #define KeccakP1600times2_implementation "512-bit SIMD implementation (" KeccakP1600times2_implementation_config ")"
22
+ #define KeccakP1600times2_statesSizeInBytes 400
23
+ #define KeccakP1600times2_statesAlignment 64
24
+ #define KeccakF1600times2_FastLoop_supported
25
+ #define KeccakP1600times2_12rounds_FastLoop_supported
26
+
27
+ #include <stddef.h>
28
+
29
+ #define KeccakP1600times2_StaticInitialize()
30
+ void KeccakP1600times2_InitializeAll(void *states);
31
+ #define KeccakP1600times2_AddByte(states, instanceIndex, byte, offset) \
32
+ ((unsigned char*)(states))[(instanceIndex)*8 + ((offset)/8)*2*8 + (offset)%8] ^= (byte)
33
+ void KeccakP1600times2_AddBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length);
34
+ void KeccakP1600times2_AddLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset);
35
+ void KeccakP1600times2_OverwriteBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length);
36
+ void KeccakP1600times2_OverwriteLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset);
37
+ void KeccakP1600times2_OverwriteWithZeroes(void *states, unsigned int instanceIndex, unsigned int byteCount);
38
+ void KeccakP1600times2_PermuteAll_4rounds(void *states);
39
+ void KeccakP1600times2_PermuteAll_6rounds(void *states);
40
+ void KeccakP1600times2_PermuteAll_12rounds(void *states);
41
+ void KeccakP1600times2_PermuteAll_24rounds(void *states);
42
+ void KeccakP1600times2_ExtractBytes(const void *states, unsigned int instanceIndex, unsigned char *data, unsigned int offset, unsigned int length);
43
+ void KeccakP1600times2_ExtractLanesAll(const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset);
44
+ void KeccakP1600times2_ExtractAndAddBytes(const void *states, unsigned int instanceIndex, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length);
45
+ void KeccakP1600times2_ExtractAndAddLanesAll(const void *states, const unsigned char *input, unsigned char *output, unsigned int laneCount, unsigned int laneOffset);
46
+ size_t KeccakF1600times2_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen);
47
+ size_t KeccakP1600times2_12rounds_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen);
48
+
49
+ #endif
@@ -0,0 +1,883 @@
1
+ /*
2
+ Implementation by Ronny Van Keer, hereby denoted as "the implementer".
3
+
4
+ For more information, feedback or questions, please refer to our website:
5
+ https://keccak.team/
6
+
7
+ To the extent possible under law, the implementer has waived all copyright
8
+ and related or neighboring rights to the source code in this file.
9
+ http://creativecommons.org/publicdomain/zero/1.0/
10
+
11
+ ---
12
+
13
+ This file implements Keccak-p[1600]×4 in a PlSnP-compatible way.
14
+ Please refer to PlSnP-documentation.h for more details.
15
+
16
+ This implementation comes with KeccakP-1600-times4-SnP.h in the same folder.
17
+ Please refer to LowLevel.build for the exact list of other files it must be combined with.
18
+ */
19
+
20
+ #include <stdio.h>
21
+ #include <stdlib.h>
22
+ #include <string.h>
23
+ #include <stdint.h>
24
+ #include <smmintrin.h>
25
+ #include <wmmintrin.h>
26
+ #include <immintrin.h>
27
+ #include <emmintrin.h>
28
+ #include "align.h"
29
+ #include "KeccakP-1600-times4-SnP.h"
30
+ #include "SIMD512-4-config.h"
31
+
32
+ #include "brg_endian.h"
33
+ #if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN)
34
+ #error Expecting a little-endian platform
35
+ #endif
36
+
37
+ /* Comment the define hereunder when compiling for a CPU with AVX-512 SIMD */
38
+ /*
39
+ * Warning: This code has only been tested on Haswell (AVX2) with SIMULATE_AVX512 defined,
40
+ * errors will occur if we did a bad interpretation of the AVX-512 intrinsics'
41
+ * API or functionality.
42
+ */
43
+ /* #define SIMULATE_AVX512 */
44
+
45
+ typedef uint8_t UINT8;
46
+ typedef uint32_t UINT32;
47
+ typedef uint64_t UINT64;
48
+
49
+ #if defined(SIMULATE_AVX512)
50
+
51
+ typedef struct
52
+ {
53
+ UINT64 x[8];
54
+ } __m512i;
55
+
56
+ static __m512i _mm512_xor_si512( __m512i a, __m512i b)
57
+ {
58
+ __m512i r;
59
+ unsigned int i;
60
+
61
+ for ( i = 0; i < 8; ++i )
62
+ r.x[i] = a.x[i] ^ b.x[i];
63
+ return(r);
64
+ }
65
+
66
+ static __m256i _mm256_ternarylogic_epi64(__m256i a, __m256i b, __m256i c, int imm)
67
+ {
68
+
69
+ if (imm == 0x96)
70
+ return _mm256_xor_si256( _mm256_xor_si256( a, b ), c );
71
+ if (imm == 0xD2)
72
+ return _mm256_xor_si256( a, _mm256_andnot_si256(b, c) );
73
+ printf( "_mm256_ternarylogic_epi64( a, b, c, %02X) not implemented!\n", imm );
74
+ exit(1);
75
+ }
76
+
77
+ static __m256i _mm256_rol_epi64(__m256i a, int offset)
78
+ {
79
+ return _mm256_or_si256(_mm256_slli_epi64(a, offset), _mm256_srli_epi64(a, 64-offset));
80
+ }
81
+
82
+ static __m512i _mm512_i32gather_epi64(__m256i idx, const void *p, int scale)
83
+ {
84
+ __m512i r;
85
+ unsigned int i;
86
+ UINT32 offset[8];
87
+
88
+ _mm256_store_si256( (__m256i*)offset, idx );
89
+ for ( i = 0; i < 8; ++i )
90
+ r.x[i] = *(const UINT64*)((const char*)p + offset[i] * scale);
91
+ return(r);
92
+ }
93
+
94
+ static void _mm256_i32scatter_epi64( void *p, __m128i idx, __m256i value, int scale)
95
+ {
96
+ unsigned int i;
97
+ UINT64 v[4];
98
+ UINT32 offset[4];
99
+
100
+ _mm_store_ps( (float*)offset, (__m128)idx );
101
+ _mm256_store_si256( (__m256i*)v, value );
102
+ for ( i = 0; i < 4; ++i )
103
+ *(UINT64*)((char*)p + offset[i] * scale) = v[i];
104
+ }
105
+
106
+ static void _mm512_i32scatter_epi64( void *p, __m256i idx, __m512i value, int scale)
107
+ {
108
+ unsigned int i;
109
+ UINT32 offset[8];
110
+
111
+ _mm256_store_si256( (__m256i*)offset, idx );
112
+ for ( i = 0; i < 8; ++i )
113
+ *(UINT64*)((char*)p + offset[i] * scale) = value.x[i];
114
+ }
115
+
116
+ #endif
117
+
118
+ typedef __m128i V128;
119
+ typedef __m256i V256;
120
+ typedef __m512i V512;
121
+
122
+ #if defined(KeccakP1600times4_useAVX512)
123
+
124
+ #define XOR(a,b) _mm256_xor_si256(a,b)
125
+ #define XOR3(a,b,c) _mm256_ternarylogic_epi64(a,b,c,0x96)
126
+ #define XOR5(a,b,c,d,e) XOR3(XOR3(a,b,c),d,e)
127
+ #define XOR512(a,b) _mm512_xor_si512(a,b)
128
+ #define ROL(a,offset) _mm256_rol_epi64(a,offset)
129
+ #define Chi(a,b,c) _mm256_ternarylogic_epi64(a,b,c,0xD2)
130
+
131
+ #define CONST256_64(a) (V256)_mm256_broadcast_sd((const double*)(&a))
132
+ #define LOAD4_32(a,b,c,d) _mm_set_epi32((UINT64)(a), (UINT32)(b), (UINT32)(c), (UINT32)(d))
133
+ #define LOAD8_32(a,b,c,d,e,f,g,h) _mm256_set_epi32((UINT64)(a), (UINT32)(b), (UINT32)(c), (UINT32)(d), (UINT32)(e), (UINT32)(f), (UINT32)(g), (UINT32)(h))
134
+ #define LOAD_GATHER4_64(idx,p) _mm256_i32gather_epi64( (const void*)(p), idx, 8)
135
+ #define LOAD_GATHER8_64(idx,p) _mm512_i32gather_epi64( idx, (const void*)(p), 8)
136
+ #define STORE_SCATTER4_64(p,idx, v) _mm256_i32scatter_epi64( (void*)(p), idx, v, 8)
137
+ #define STORE_SCATTER8_64(p,idx, v) _mm512_i32scatter_epi64( (void*)(p), idx, v, 8)
138
+
139
+ #endif
140
+
141
+ #define laneIndex(instanceIndex, lanePosition) ((lanePosition)*4 + instanceIndex)
142
+ #define SnP_laneLengthInBytes 8
143
+
144
+ void KeccakP1600times4_InitializeAll(void *states)
145
+ {
146
+ memset(states, 0, KeccakP1600times4_statesSizeInBytes);
147
+ }
148
+
149
+ void KeccakP1600times4_AddBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length)
150
+ {
151
+ unsigned int sizeLeft = length;
152
+ unsigned int lanePosition = offset/SnP_laneLengthInBytes;
153
+ unsigned int offsetInLane = offset%SnP_laneLengthInBytes;
154
+ const unsigned char *curData = data;
155
+ UINT64 *statesAsLanes = states;
156
+
157
+ if ((sizeLeft > 0) && (offsetInLane != 0)) {
158
+ unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
159
+ UINT64 lane = 0;
160
+ if (bytesInLane > sizeLeft)
161
+ bytesInLane = sizeLeft;
162
+ memcpy((unsigned char*)&lane + offsetInLane, curData, bytesInLane);
163
+ statesAsLanes[laneIndex(instanceIndex, lanePosition)] ^= lane;
164
+ sizeLeft -= bytesInLane;
165
+ lanePosition++;
166
+ curData += bytesInLane;
167
+ }
168
+
169
+ while(sizeLeft >= SnP_laneLengthInBytes) {
170
+ UINT64 lane = *((const UINT64*)curData);
171
+ statesAsLanes[laneIndex(instanceIndex, lanePosition)] ^= lane;
172
+ sizeLeft -= SnP_laneLengthInBytes;
173
+ lanePosition++;
174
+ curData += SnP_laneLengthInBytes;
175
+ }
176
+
177
+ if (sizeLeft > 0) {
178
+ UINT64 lane = 0;
179
+ memcpy(&lane, curData, sizeLeft);
180
+ statesAsLanes[laneIndex(instanceIndex, lanePosition)] ^= lane;
181
+ }
182
+ }
183
+
184
+ void KeccakP1600times4_AddLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
185
+ {
186
+ V256 *stateAsLanes256 = states;
187
+ V512 *stateAsLanes512 = states;
188
+ const UINT64 *dataAsLanes = (const UINT64 *)data;
189
+ unsigned int i;
190
+ V256 index512;
191
+ V128 index256;
192
+
193
+ #define Add_In1( argIndex ) stateAsLanes256[argIndex] = XOR(stateAsLanes256[argIndex], LOAD_GATHER4_64(index256, dataAsLanes+argIndex))
194
+ #define Add_In2( argIndex ) stateAsLanes512[argIndex/2] = XOR512(stateAsLanes512[argIndex/2], LOAD_GATHER8_64(index512, dataAsLanes+argIndex))
195
+ index256 = LOAD4_32(3*laneOffset, 2*laneOffset, 1*laneOffset, 0*laneOffset);
196
+ index512 = LOAD8_32(3*laneOffset+1, 2*laneOffset+1, 1*laneOffset+1, 0*laneOffset+1, 3*laneOffset, 2*laneOffset, 1*laneOffset, 0*laneOffset);
197
+ if ( laneCount >= 16 ) {
198
+ Add_In2( 0 );
199
+ Add_In2( 2 );
200
+ Add_In2( 4 );
201
+ Add_In2( 6 );
202
+ Add_In2( 8 );
203
+ Add_In2( 10 );
204
+ Add_In2( 12 );
205
+ Add_In2( 14 );
206
+ if ( laneCount >= 20 ) {
207
+ Add_In2( 16 );
208
+ Add_In2( 18 );
209
+ for(i=20; i<laneCount; i++)
210
+ Add_In1( i );
211
+ }
212
+ else {
213
+ for(i=16; i<laneCount; i++)
214
+ Add_In1( i );
215
+ }
216
+ }
217
+ else {
218
+ for(i=0; i<laneCount; i++)
219
+ Add_In1( i );
220
+ }
221
+ #undef Add_In1
222
+ #undef Add_In2
223
+ }
224
+
225
+ void KeccakP1600times4_OverwriteBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length)
226
+ {
227
+ unsigned int sizeLeft = length;
228
+ unsigned int lanePosition = offset/SnP_laneLengthInBytes;
229
+ unsigned int offsetInLane = offset%SnP_laneLengthInBytes;
230
+ const unsigned char *curData = data;
231
+ UINT64 *statesAsLanes = states;
232
+
233
+ if ((sizeLeft > 0) && (offsetInLane != 0)) {
234
+ unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
235
+ if (bytesInLane > sizeLeft)
236
+ bytesInLane = sizeLeft;
237
+ memcpy( ((unsigned char *)&statesAsLanes[laneIndex(instanceIndex, lanePosition)]) + offsetInLane, curData, bytesInLane);
238
+ sizeLeft -= bytesInLane;
239
+ lanePosition++;
240
+ curData += bytesInLane;
241
+ }
242
+
243
+ while(sizeLeft >= SnP_laneLengthInBytes) {
244
+ UINT64 lane = *((const UINT64*)curData);
245
+ statesAsLanes[laneIndex(instanceIndex, lanePosition)] = lane;
246
+ sizeLeft -= SnP_laneLengthInBytes;
247
+ lanePosition++;
248
+ curData += SnP_laneLengthInBytes;
249
+ }
250
+
251
+ if (sizeLeft > 0) {
252
+ memcpy(&statesAsLanes[laneIndex(instanceIndex, lanePosition)], curData, sizeLeft);
253
+ }
254
+ }
255
+
256
+ void KeccakP1600times4_OverwriteLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
257
+ {
258
+ V256 *stateAsLanes256 = states;
259
+ V512 *stateAsLanes512 = states;
260
+ const UINT64 *dataAsLanes = (const UINT64 *)data;
261
+ unsigned int i;
262
+ V256 index512;
263
+ V128 index256;
264
+
265
+ #define OverWr1( argIndex ) stateAsLanes256[argIndex] = LOAD_GATHER4_64(index256, dataAsLanes+argIndex)
266
+ #define OverWr2( argIndex ) stateAsLanes512[argIndex/2] = LOAD_GATHER8_64(index512, dataAsLanes+argIndex)
267
+ index256 = LOAD4_32(3*laneOffset, 2*laneOffset, 1*laneOffset, 0*laneOffset);
268
+ index512 = LOAD8_32(3*laneOffset+1, 2*laneOffset+1, 1*laneOffset+1, 0*laneOffset+1, 3*laneOffset, 2*laneOffset, 1*laneOffset, 0*laneOffset);
269
+ if ( laneCount >= 16 ) {
270
+ OverWr2( 0 );
271
+ OverWr2( 2 );
272
+ OverWr2( 4 );
273
+ OverWr2( 6 );
274
+ OverWr2( 8 );
275
+ OverWr2( 10 );
276
+ OverWr2( 12 );
277
+ OverWr2( 14 );
278
+ if ( laneCount >= 20 ) {
279
+ OverWr2( 16 );
280
+ OverWr2( 18 );
281
+ for(i=20; i<laneCount; i++)
282
+ OverWr1( i );
283
+ }
284
+ else {
285
+ for(i=16; i<laneCount; i++)
286
+ OverWr1( i );
287
+ }
288
+ }
289
+ else {
290
+ for(i=0; i<laneCount; i++)
291
+ OverWr1( i );
292
+ }
293
+ #undef OverWr1
294
+ #undef OverWr2
295
+ }
296
+
297
+ void KeccakP1600times4_OverwriteWithZeroes(void *states, unsigned int instanceIndex, unsigned int byteCount)
298
+ {
299
+ unsigned int sizeLeft = byteCount;
300
+ unsigned int lanePosition = 0;
301
+ UINT64 *statesAsLanes = states;
302
+
303
+ while(sizeLeft >= SnP_laneLengthInBytes) {
304
+ statesAsLanes[laneIndex(instanceIndex, lanePosition)] = 0;
305
+ sizeLeft -= SnP_laneLengthInBytes;
306
+ lanePosition++;
307
+ }
308
+
309
+ if (sizeLeft > 0) {
310
+ memset(&statesAsLanes[laneIndex(instanceIndex, lanePosition)], 0, sizeLeft);
311
+ }
312
+ }
313
+
314
+ void KeccakP1600times4_ExtractBytes(const void *states, unsigned int instanceIndex, unsigned char *data, unsigned int offset, unsigned int length)
315
+ {
316
+ unsigned int sizeLeft = length;
317
+ unsigned int lanePosition = offset/SnP_laneLengthInBytes;
318
+ unsigned int offsetInLane = offset%SnP_laneLengthInBytes;
319
+ unsigned char *curData = data;
320
+ const UINT64 *statesAsLanes = states;
321
+
322
+ if ((sizeLeft > 0) && (offsetInLane != 0)) {
323
+ unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
324
+ if (bytesInLane > sizeLeft)
325
+ bytesInLane = sizeLeft;
326
+ memcpy( curData, ((unsigned char *)&statesAsLanes[laneIndex(instanceIndex, lanePosition)]) + offsetInLane, bytesInLane);
327
+ sizeLeft -= bytesInLane;
328
+ lanePosition++;
329
+ curData += bytesInLane;
330
+ }
331
+
332
+ while(sizeLeft >= SnP_laneLengthInBytes) {
333
+ *(UINT64*)curData = statesAsLanes[laneIndex(instanceIndex, lanePosition)];
334
+ sizeLeft -= SnP_laneLengthInBytes;
335
+ lanePosition++;
336
+ curData += SnP_laneLengthInBytes;
337
+ }
338
+
339
+ if (sizeLeft > 0) {
340
+ memcpy( curData, &statesAsLanes[laneIndex(instanceIndex, lanePosition)], sizeLeft);
341
+ }
342
+ }
343
+
344
+ void KeccakP1600times4_ExtractLanesAll(const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
345
+ {
346
+ const V256 *stateAsLanes256 = states;
347
+ const V512 *stateAsLanes512 = states;
348
+ UINT64 *dataAsLanes = (UINT64 *)data;
349
+ unsigned int i;
350
+ V256 index512;
351
+ V128 index256;
352
+
353
+ #define Extr1( argIndex ) STORE_SCATTER4_64(dataAsLanes+argIndex, index256, stateAsLanes256[argIndex])
354
+ #define Extr2( argIndex ) STORE_SCATTER8_64(dataAsLanes+argIndex, index512, stateAsLanes512[argIndex/2])
355
+ index256 = LOAD4_32(3*laneOffset, 2*laneOffset, 1*laneOffset, 0*laneOffset);
356
+ index512 = LOAD8_32(3*laneOffset+1, 2*laneOffset+1, 1*laneOffset+1, 0*laneOffset+1, 3*laneOffset, 2*laneOffset, 1*laneOffset, 0*laneOffset);
357
+ if ( laneCount >= 16 ) {
358
+ Extr2( 0 );
359
+ Extr2( 2 );
360
+ Extr2( 4 );
361
+ Extr2( 6 );
362
+ Extr2( 8 );
363
+ Extr2( 10 );
364
+ Extr2( 12 );
365
+ Extr2( 14 );
366
+ if ( laneCount >= 20 ) {
367
+ Extr2( 16 );
368
+ Extr2( 18 );
369
+ for(i=20; i<laneCount; i++)
370
+ Extr1( i );
371
+ }
372
+ else {
373
+ for(i=16; i<laneCount; i++)
374
+ Extr1( i );
375
+ }
376
+ }
377
+ else {
378
+ for(i=0; i<laneCount; i++)
379
+ Extr1( i );
380
+ }
381
+ #undef Extr1
382
+ #undef Extr2
383
+ }
384
+
385
+ void KeccakP1600times4_ExtractAndAddBytes(const void *states, unsigned int instanceIndex, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
386
+ {
387
+ unsigned int sizeLeft = length;
388
+ unsigned int lanePosition = offset/SnP_laneLengthInBytes;
389
+ unsigned int offsetInLane = offset%SnP_laneLengthInBytes;
390
+ const unsigned char *curInput = input;
391
+ unsigned char *curOutput = output;
392
+ const UINT64 *statesAsLanes = states;
393
+
394
+ if ((sizeLeft > 0) && (offsetInLane != 0)) {
395
+ unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
396
+ UINT64 lane = statesAsLanes[laneIndex(instanceIndex, lanePosition)] >> (8 * offsetInLane);
397
+ if (bytesInLane > sizeLeft)
398
+ bytesInLane = sizeLeft;
399
+ sizeLeft -= bytesInLane;
400
+ do {
401
+ *(curOutput++) = *(curInput++) ^ (unsigned char)lane;
402
+ lane >>= 8;
403
+ } while ( --bytesInLane != 0);
404
+ lanePosition++;
405
+ }
406
+
407
+ while(sizeLeft >= SnP_laneLengthInBytes) {
408
+ *((UINT64*)curOutput) = *((UINT64*)curInput) ^ statesAsLanes[laneIndex(instanceIndex, lanePosition)];
409
+ sizeLeft -= SnP_laneLengthInBytes;
410
+ lanePosition++;
411
+ curInput += SnP_laneLengthInBytes;
412
+ curOutput += SnP_laneLengthInBytes;
413
+ }
414
+
415
+ if (sizeLeft != 0) {
416
+ UINT64 lane = statesAsLanes[laneIndex(instanceIndex, lanePosition)];
417
+ do {
418
+ *(curOutput++) = *(curInput++) ^ (unsigned char)lane;
419
+ lane >>= 8;
420
+ } while ( --sizeLeft != 0);
421
+ }
422
+ }
423
+
424
+ void KeccakP1600times4_ExtractAndAddLanesAll(const void *states, const unsigned char *input, unsigned char *output, unsigned int laneCount, unsigned int laneOffset)
425
+ {
426
+ const V256 *stateAsLanes256 = states;
427
+ const V512 *stateAsLanes512 = states;
428
+ const UINT64 *inAsLanes = (const UINT64 *)input;
429
+ UINT64 *outAsLanes = (UINT64 *)output;
430
+ unsigned int i;
431
+ V256 index512;
432
+ V128 index256;
433
+
434
+ #define ExtrAdd1( argIndex ) STORE_SCATTER4_64(outAsLanes+argIndex, index256, XOR(stateAsLanes256[argIndex], LOAD_GATHER4_64(index256, inAsLanes+argIndex)))
435
+ #define ExtrAdd2( argIndex ) STORE_SCATTER8_64(outAsLanes+argIndex, index512, XOR512(stateAsLanes512[argIndex/2], LOAD_GATHER8_64(index512, inAsLanes+argIndex)))
436
+ index256 = LOAD4_32(3*laneOffset, 2*laneOffset, 1*laneOffset, 0*laneOffset);
437
+ index512 = LOAD8_32(3*laneOffset+1, 2*laneOffset+1, 1*laneOffset+1, 0*laneOffset+1, 3*laneOffset, 2*laneOffset, 1*laneOffset, 0*laneOffset);
438
+
439
+ if ( laneCount >= 16 ) {
440
+ ExtrAdd2( 0 );
441
+ ExtrAdd2( 2 );
442
+ ExtrAdd2( 4 );
443
+ ExtrAdd2( 6 );
444
+ ExtrAdd2( 8 );
445
+ ExtrAdd2( 10 );
446
+ ExtrAdd2( 12 );
447
+ ExtrAdd2( 14 );
448
+ if ( laneCount >= 20 ) {
449
+ ExtrAdd2( 16 );
450
+ ExtrAdd2( 18 );
451
+ for(i=20; i<laneCount; i++)
452
+ ExtrAdd1( i );
453
+ }
454
+ else {
455
+ for(i=16; i<laneCount; i++)
456
+ ExtrAdd1( i );
457
+ }
458
+ }
459
+ else {
460
+ for(i=0; i<laneCount; i++)
461
+ ExtrAdd1( i );
462
+ }
463
+ #undef ExtrAdd1
464
+ #undef ExtrAdd2
465
+
466
+ }
467
+
468
+ static ALIGN(KeccakP1600times4_statesAlignment) const UINT64 KeccakP1600RoundConstants[24] = {
469
+ 0x0000000000000001ULL,
470
+ 0x0000000000008082ULL,
471
+ 0x800000000000808aULL,
472
+ 0x8000000080008000ULL,
473
+ 0x000000000000808bULL,
474
+ 0x0000000080000001ULL,
475
+ 0x8000000080008081ULL,
476
+ 0x8000000000008009ULL,
477
+ 0x000000000000008aULL,
478
+ 0x0000000000000088ULL,
479
+ 0x0000000080008009ULL,
480
+ 0x000000008000000aULL,
481
+ 0x000000008000808bULL,
482
+ 0x800000000000008bULL,
483
+ 0x8000000000008089ULL,
484
+ 0x8000000000008003ULL,
485
+ 0x8000000000008002ULL,
486
+ 0x8000000000000080ULL,
487
+ 0x000000000000800aULL,
488
+ 0x800000008000000aULL,
489
+ 0x8000000080008081ULL,
490
+ 0x8000000000008080ULL,
491
+ 0x0000000080000001ULL,
492
+ 0x8000000080008008ULL};
493
+
494
+ #define KeccakP_DeclareVars \
495
+ V256 _Ba, _Be, _Bi, _Bo, _Bu; \
496
+ V256 _Da, _De, _Di, _Do, _Du; \
497
+ V256 _ba, _be, _bi, _bo, _bu; \
498
+ V256 _ga, _ge, _gi, _go, _gu; \
499
+ V256 _ka, _ke, _ki, _ko, _ku; \
500
+ V256 _ma, _me, _mi, _mo, _mu; \
501
+ V256 _sa, _se, _si, _so, _su
502
+
503
+ #define KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Bb1, _Bb2, _Bb3, _Bb4, _Bb5, _Rr1, _Rr2, _Rr3, _Rr4, _Rr5 ) \
504
+ _Bb1 = XOR(_L1, _Da); \
505
+ _Bb2 = XOR(_L2, _De); \
506
+ _Bb3 = XOR(_L3, _Di); \
507
+ _Bb4 = XOR(_L4, _Do); \
508
+ _Bb5 = XOR(_L5, _Du); \
509
+ if (_Rr1 != 0) _Bb1 = ROL(_Bb1, _Rr1); \
510
+ _Bb2 = ROL(_Bb2, _Rr2); \
511
+ _Bb3 = ROL(_Bb3, _Rr3); \
512
+ _Bb4 = ROL(_Bb4, _Rr4); \
513
+ _Bb5 = ROL(_Bb5, _Rr5); \
514
+ _L1 = Chi( _Ba, _Be, _Bi); \
515
+ _L2 = Chi( _Be, _Bi, _Bo); \
516
+ _L3 = Chi( _Bi, _Bo, _Bu); \
517
+ _L4 = Chi( _Bo, _Bu, _Ba); \
518
+ _L5 = Chi( _Bu, _Ba, _Be);
519
+
520
+ #define KeccakP_ThetaRhoPiChiIota0( _L1, _L2, _L3, _L4, _L5, _rc ) \
521
+ _Ba = XOR5( _ba, _ga, _ka, _ma, _sa ); /* Theta effect */ \
522
+ _Be = XOR5( _be, _ge, _ke, _me, _se ); \
523
+ _Bi = XOR5( _bi, _gi, _ki, _mi, _si ); \
524
+ _Bo = XOR5( _bo, _go, _ko, _mo, _so ); \
525
+ _Bu = XOR5( _bu, _gu, _ku, _mu, _su ); \
526
+ _Da = ROL( _Be, 1 ); \
527
+ _De = ROL( _Bi, 1 ); \
528
+ _Di = ROL( _Bo, 1 ); \
529
+ _Do = ROL( _Bu, 1 ); \
530
+ _Du = ROL( _Ba, 1 ); \
531
+ _Da = XOR( _Da, _Bu ); \
532
+ _De = XOR( _De, _Ba ); \
533
+ _Di = XOR( _Di, _Be ); \
534
+ _Do = XOR( _Do, _Bi ); \
535
+ _Du = XOR( _Du, _Bo ); \
536
+ KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Ba, _Be, _Bi, _Bo, _Bu, 0, 44, 43, 21, 14 ); \
537
+ _L1 = XOR(_L1, _rc) /* Iota */
538
+
539
+ #define KeccakP_ThetaRhoPiChi1( _L1, _L2, _L3, _L4, _L5 ) \
540
+ KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Bi, _Bo, _Bu, _Ba, _Be, 3, 45, 61, 28, 20 )
541
+
542
+ #define KeccakP_ThetaRhoPiChi2( _L1, _L2, _L3, _L4, _L5 ) \
543
+ KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Bu, _Ba, _Be, _Bi, _Bo, 18, 1, 6, 25, 8 )
544
+
545
+ #define KeccakP_ThetaRhoPiChi3( _L1, _L2, _L3, _L4, _L5 ) \
546
+ KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Be, _Bi, _Bo, _Bu, _Ba, 36, 10, 15, 56, 27 )
547
+
548
+ #define KeccakP_ThetaRhoPiChi4( _L1, _L2, _L3, _L4, _L5 ) \
549
+ KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Bo, _Bu, _Ba, _Be, _Bi, 41, 2, 62, 55, 39 )
550
+
551
+ #define KeccakP_4rounds( i ) \
552
+ KeccakP_ThetaRhoPiChiIota0(_ba, _ge, _ki, _mo, _su, CONST256_64(KeccakP1600RoundConstants[i]) ); \
553
+ KeccakP_ThetaRhoPiChi1( _ka, _me, _si, _bo, _gu ); \
554
+ KeccakP_ThetaRhoPiChi2( _sa, _be, _gi, _ko, _mu ); \
555
+ KeccakP_ThetaRhoPiChi3( _ga, _ke, _mi, _so, _bu ); \
556
+ KeccakP_ThetaRhoPiChi4( _ma, _se, _bi, _go, _ku ); \
557
+ \
558
+ KeccakP_ThetaRhoPiChiIota0(_ba, _me, _gi, _so, _ku, CONST256_64(KeccakP1600RoundConstants[i+1]) ); \
559
+ KeccakP_ThetaRhoPiChi1( _sa, _ke, _bi, _mo, _gu ); \
560
+ KeccakP_ThetaRhoPiChi2( _ma, _ge, _si, _ko, _bu ); \
561
+ KeccakP_ThetaRhoPiChi3( _ka, _be, _mi, _go, _su ); \
562
+ KeccakP_ThetaRhoPiChi4( _ga, _se, _ki, _bo, _mu ); \
563
+ \
564
+ KeccakP_ThetaRhoPiChiIota0(_ba, _ke, _si, _go, _mu, CONST256_64(KeccakP1600RoundConstants[i+2]) ); \
565
+ KeccakP_ThetaRhoPiChi1( _ma, _be, _ki, _so, _gu ); \
566
+ KeccakP_ThetaRhoPiChi2( _ga, _me, _bi, _ko, _su ); \
567
+ KeccakP_ThetaRhoPiChi3( _sa, _ge, _mi, _bo, _ku ); \
568
+ KeccakP_ThetaRhoPiChi4( _ka, _se, _gi, _mo, _bu ); \
569
+ \
570
+ KeccakP_ThetaRhoPiChiIota0(_ba, _be, _bi, _bo, _bu, CONST256_64(KeccakP1600RoundConstants[i+3]) ); \
571
+ KeccakP_ThetaRhoPiChi1( _ga, _ge, _gi, _go, _gu ); \
572
+ KeccakP_ThetaRhoPiChi2( _ka, _ke, _ki, _ko, _ku ); \
573
+ KeccakP_ThetaRhoPiChi3( _ma, _me, _mi, _mo, _mu ); \
574
+ KeccakP_ThetaRhoPiChi4( _sa, _se, _si, _so, _su )
575
+
576
+ #define KeccakP_2rounds( i ) \
577
+ KeccakP_ThetaRhoPiChiIota0(_ba, _ke, _si, _go, _mu, CONST256_64(KeccakP1600RoundConstants[i]) ); \
578
+ KeccakP_ThetaRhoPiChi1( _ma, _be, _ki, _so, _gu ); \
579
+ KeccakP_ThetaRhoPiChi2( _ga, _me, _bi, _ko, _su ); \
580
+ KeccakP_ThetaRhoPiChi3( _sa, _ge, _mi, _bo, _ku ); \
581
+ KeccakP_ThetaRhoPiChi4( _ka, _se, _gi, _mo, _bu ); \
582
+ \
583
+ KeccakP_ThetaRhoPiChiIota0(_ba, _be, _bi, _bo, _bu, CONST256_64(KeccakP1600RoundConstants[i+1]) ); \
584
+ KeccakP_ThetaRhoPiChi1( _ga, _ge, _gi, _go, _gu ); \
585
+ KeccakP_ThetaRhoPiChi2( _ka, _ke, _ki, _ko, _ku ); \
586
+ KeccakP_ThetaRhoPiChi3( _ma, _me, _mi, _mo, _mu ); \
587
+ KeccakP_ThetaRhoPiChi4( _sa, _se, _si, _so, _su )
588
+
589
+ #ifdef KeccakP1600times4_fullUnrolling
590
+
591
+ #define rounds12 \
592
+ KeccakP_4rounds( 12 ); \
593
+ KeccakP_4rounds( 16 ); \
594
+ KeccakP_4rounds( 20 )
595
+
596
+ #define rounds24 \
597
+ KeccakP_4rounds( 0 ); \
598
+ KeccakP_4rounds( 4 ); \
599
+ KeccakP_4rounds( 8 ); \
600
+ KeccakP_4rounds( 12 ); \
601
+ KeccakP_4rounds( 16 ); \
602
+ KeccakP_4rounds( 20 )
603
+
604
+ #elif (KeccakP1600times4_unrolling == 4)
605
+
606
+ #define rounds12 \
607
+ i = 12; \
608
+ do { \
609
+ KeccakP_4rounds( i ); \
610
+ } while( (i += 4) < 24 )
611
+
612
+ #define rounds24 \
613
+ i = 0; \
614
+ do { \
615
+ KeccakP_4rounds( i ); \
616
+ } while( (i += 4) < 24 )
617
+
618
+ #elif (KeccakP1600times4_unrolling == 12)
619
+
620
+ #define rounds12 \
621
+ KeccakP_4rounds( 12 ); \
622
+ KeccakP_4rounds( 16 ); \
623
+ KeccakP_4rounds( 20 )
624
+
625
+ #define rounds24 \
626
+ i = 0; \
627
+ do { \
628
+ KeccakP_4rounds( i ); \
629
+ KeccakP_4rounds( i+4 ); \
630
+ KeccakP_4rounds( i+8 ); \
631
+ } while( (i += 12) < 24 )
632
+
633
+ #else
634
+ #error "Unrolling is not correctly specified!"
635
+ #endif
636
+
637
+ #define copyFromState2rounds(pState) \
638
+ _ba = pState[ 0]; \
639
+ _be = pState[16]; /* me */ \
640
+ _bi = pState[ 7]; /* gi */ \
641
+ _bo = pState[23]; /* so */ \
642
+ _bu = pState[14]; /* ku */ \
643
+ _ga = pState[20]; /* sa */ \
644
+ _ge = pState[11]; /* ke */ \
645
+ _gi = pState[ 2]; /* bi */ \
646
+ _go = pState[18]; /* mo */ \
647
+ _gu = pState[ 9]; \
648
+ _ka = pState[15]; /* ma */ \
649
+ _ke = pState[ 6]; /* ge */ \
650
+ _ki = pState[22]; /* si */ \
651
+ _ko = pState[13]; \
652
+ _ku = pState[ 4]; /* bu */ \
653
+ _ma = pState[10]; /* ka */ \
654
+ _me = pState[ 1]; /* be */ \
655
+ _mi = pState[17]; \
656
+ _mo = pState[ 8]; /* go */ \
657
+ _mu = pState[24]; /* su */ \
658
+ _sa = pState[ 5]; /* ga */ \
659
+ _se = pState[21]; \
660
+ _si = pState[12]; /* ki */ \
661
+ _so = pState[ 3]; /* bo */ \
662
+ _su = pState[19] /* mu */
663
+
664
+ #define copyFromState(pState) \
665
+ _ba = pState[ 0]; \
666
+ _be = pState[ 1]; \
667
+ _bi = pState[ 2]; \
668
+ _bo = pState[ 3]; \
669
+ _bu = pState[ 4]; \
670
+ _ga = pState[ 5]; \
671
+ _ge = pState[ 6]; \
672
+ _gi = pState[ 7]; \
673
+ _go = pState[ 8]; \
674
+ _gu = pState[ 9]; \
675
+ _ka = pState[10]; \
676
+ _ke = pState[11]; \
677
+ _ki = pState[12]; \
678
+ _ko = pState[13]; \
679
+ _ku = pState[14]; \
680
+ _ma = pState[15]; \
681
+ _me = pState[16]; \
682
+ _mi = pState[17]; \
683
+ _mo = pState[18]; \
684
+ _mu = pState[19]; \
685
+ _sa = pState[20]; \
686
+ _se = pState[21]; \
687
+ _si = pState[22]; \
688
+ _so = pState[23]; \
689
+ _su = pState[24]
690
+
691
+ #define copyToState(pState) \
692
+ pState[ 0] = _ba; \
693
+ pState[ 1] = _be; \
694
+ pState[ 2] = _bi; \
695
+ pState[ 3] = _bo; \
696
+ pState[ 4] = _bu; \
697
+ pState[ 5] = _ga; \
698
+ pState[ 6] = _ge; \
699
+ pState[ 7] = _gi; \
700
+ pState[ 8] = _go; \
701
+ pState[ 9] = _gu; \
702
+ pState[10] = _ka; \
703
+ pState[11] = _ke; \
704
+ pState[12] = _ki; \
705
+ pState[13] = _ko; \
706
+ pState[14] = _ku; \
707
+ pState[15] = _ma; \
708
+ pState[16] = _me; \
709
+ pState[17] = _mi; \
710
+ pState[18] = _mo; \
711
+ pState[19] = _mu; \
712
+ pState[20] = _sa; \
713
+ pState[21] = _se; \
714
+ pState[22] = _si; \
715
+ pState[23] = _so; \
716
+ pState[24] = _su
717
+
718
+ void KeccakP1600times4_PermuteAll_24rounds(void *states)
719
+ {
720
+ V256 *statesAsLanes = states;
721
+ KeccakP_DeclareVars;
722
+ #ifndef KeccakP1600times4_fullUnrolling
723
+ unsigned int i;
724
+ #endif
725
+
726
+ copyFromState(statesAsLanes);
727
+ rounds24;
728
+ copyToState(statesAsLanes);
729
+ }
730
+
731
+ void KeccakP1600times4_PermuteAll_12rounds(void *states)
732
+ {
733
+ V256 *statesAsLanes = states;
734
+ KeccakP_DeclareVars;
735
+ #if (KeccakP1600times4_unrolling < 12)
736
+ unsigned int i;
737
+ #endif
738
+
739
+ copyFromState(statesAsLanes);
740
+ rounds12;
741
+ copyToState(statesAsLanes);
742
+ }
743
+
744
+ void KeccakP1600times4_PermuteAll_6rounds(void *states)
745
+ {
746
+ V256 *statesAsLanes = states;
747
+ KeccakP_DeclareVars;
748
+
749
+ copyFromState2rounds(statesAsLanes);
750
+ KeccakP_2rounds( 18 );
751
+ KeccakP_4rounds( 20 );
752
+ copyToState(statesAsLanes);
753
+ }
754
+
755
+ void KeccakP1600times4_PermuteAll_4rounds(void *states)
756
+ {
757
+ V256 *statesAsLanes = states;
758
+ KeccakP_DeclareVars;
759
+
760
+ copyFromState(statesAsLanes);
761
+ KeccakP_4rounds( 20 );
762
+ copyToState(statesAsLanes);
763
+ }
764
+
765
+ size_t KeccakF1600times4_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen)
766
+ {
767
+ size_t dataMinimumSize = (laneOffsetParallel*3 + laneCount)*8;
768
+
769
+ if (laneCount == 21) {
770
+ #ifndef KeccakP1600times4_fullUnrolling
771
+ unsigned int i;
772
+ #endif
773
+ const unsigned char *dataStart = data;
774
+ V256 *statesAsLanes = states;
775
+ const UINT64 *dataAsLanes = (const UINT64 *)data;
776
+ KeccakP_DeclareVars;
777
+ V128 index;
778
+
779
+ copyFromState(statesAsLanes);
780
+ index = LOAD4_32(3*laneOffsetParallel, 2*laneOffsetParallel, 1*laneOffsetParallel, 0*laneOffsetParallel);
781
+ while(dataByteLen >= dataMinimumSize) {
782
+ #define Add_In( argLane, argIndex ) argLane = XOR(argLane, LOAD_GATHER4_64(index, dataAsLanes+argIndex))
783
+ Add_In( _ba, 0 );
784
+ Add_In( _be, 1 );
785
+ Add_In( _bi, 2 );
786
+ Add_In( _bo, 3 );
787
+ Add_In( _bu, 4 );
788
+ Add_In( _ga, 5 );
789
+ Add_In( _ge, 6 );
790
+ Add_In( _gi, 7 );
791
+ Add_In( _go, 8 );
792
+ Add_In( _gu, 9 );
793
+ Add_In( _ka, 10 );
794
+ Add_In( _ke, 11 );
795
+ Add_In( _ki, 12 );
796
+ Add_In( _ko, 13 );
797
+ Add_In( _ku, 14 );
798
+ Add_In( _ma, 15 );
799
+ Add_In( _me, 16 );
800
+ Add_In( _mi, 17 );
801
+ Add_In( _mo, 18 );
802
+ Add_In( _mu, 19 );
803
+ Add_In( _sa, 20 );
804
+ #undef Add_In
805
+ rounds24;
806
+ dataAsLanes += laneOffsetSerial;
807
+ dataByteLen -= laneOffsetSerial*8;
808
+ }
809
+ copyToState(statesAsLanes);
810
+ return (const unsigned char *)dataAsLanes - dataStart;
811
+ }
812
+ else {
813
+ const unsigned char *dataStart = data;
814
+
815
+ while(dataByteLen >= dataMinimumSize) {
816
+ KeccakP1600times4_AddLanesAll(states, data, laneCount, laneOffsetParallel);
817
+ KeccakP1600times4_PermuteAll_24rounds(states);
818
+ data += laneOffsetSerial*8;
819
+ dataByteLen -= laneOffsetSerial*8;
820
+ }
821
+ return data - dataStart;
822
+ }
823
+ }
824
+
825
+ size_t KeccakP1600times4_12rounds_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen)
826
+ {
827
+ size_t dataMinimumSize = (laneOffsetParallel*3 + laneCount)*8;
828
+
829
+ if (laneCount == 21) {
830
+ #if (KeccakP1600times4_unrolling < 12)
831
+ unsigned int i;
832
+ #endif
833
+ const unsigned char *dataStart = data;
834
+ V256 *statesAsLanes = states;
835
+ const UINT64 *dataAsLanes = (const UINT64 *)data;
836
+ KeccakP_DeclareVars;
837
+ V128 index;
838
+
839
+ copyFromState(statesAsLanes);
840
+ index = LOAD4_32(3*laneOffsetParallel, 2*laneOffsetParallel, 1*laneOffsetParallel, 0*laneOffsetParallel);
841
+ while(dataByteLen >= dataMinimumSize) {
842
+ #define Add_In( argLane, argIndex ) argLane = XOR(argLane, LOAD_GATHER4_64(index, dataAsLanes+argIndex))
843
+ Add_In( _ba, 0 );
844
+ Add_In( _be, 1 );
845
+ Add_In( _bi, 2 );
846
+ Add_In( _bo, 3 );
847
+ Add_In( _bu, 4 );
848
+ Add_In( _ga, 5 );
849
+ Add_In( _ge, 6 );
850
+ Add_In( _gi, 7 );
851
+ Add_In( _go, 8 );
852
+ Add_In( _gu, 9 );
853
+ Add_In( _ka, 10 );
854
+ Add_In( _ke, 11 );
855
+ Add_In( _ki, 12 );
856
+ Add_In( _ko, 13 );
857
+ Add_In( _ku, 14 );
858
+ Add_In( _ma, 15 );
859
+ Add_In( _me, 16 );
860
+ Add_In( _mi, 17 );
861
+ Add_In( _mo, 18 );
862
+ Add_In( _mu, 19 );
863
+ Add_In( _sa, 20 );
864
+ #undef Add_In
865
+ rounds12;
866
+ dataAsLanes += laneOffsetSerial;
867
+ dataByteLen -= laneOffsetSerial*8;
868
+ }
869
+ copyToState(statesAsLanes);
870
+ return (const unsigned char *)dataAsLanes - dataStart;
871
+ }
872
+ else {
873
+ const unsigned char *dataStart = data;
874
+
875
+ while(dataByteLen >= dataMinimumSize) {
876
+ KeccakP1600times4_AddLanesAll(states, data, laneCount, laneOffsetParallel);
877
+ KeccakP1600times4_PermuteAll_12rounds(states);
878
+ data += laneOffsetSerial*8;
879
+ dataByteLen -= laneOffsetSerial*8;
880
+ }
881
+ return data - dataStart;
882
+ }
883
+ }