digest-kangarootwelve 0.2.0 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (305) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +51 -11
  3. data/Rakefile +2 -2
  4. data/digest-kangarootwelve.gemspec +322 -42
  5. data/ext/digest/kangarootwelve/ext.c +1 -1
  6. data/ext/digest/kangarootwelve/extconf.rb +13 -1
  7. data/ext/digest/kangarootwelve/keccak/armv6m/KangarooTwelve.link.c +1 -0
  8. data/ext/digest/kangarootwelve/keccak/armv6m/KeccakDuplexWidth1600.link.c +1 -0
  9. data/ext/digest/kangarootwelve/keccak/armv6m/KeccakP-1600-SnP.h +36 -0
  10. data/ext/digest/kangarootwelve/{KeccakP-1600-times2-SnP.h → keccak/armv6m/KeccakP-1600-times2-SnP.h} +10 -10
  11. data/ext/digest/kangarootwelve/{KeccakP-1600-times2-on1.c → keccak/armv6m/KeccakP-1600-times2-on1.c} +13 -7
  12. data/ext/digest/kangarootwelve/{KeccakP-1600-times4-SnP.h → keccak/armv6m/KeccakP-1600-times4-SnP.h} +10 -10
  13. data/ext/digest/kangarootwelve/{KeccakP-1600-times4-on1.c → keccak/armv6m/KeccakP-1600-times4-on1.c} +13 -7
  14. data/ext/digest/kangarootwelve/{KeccakP-1600-times8-SnP.h → keccak/armv6m/KeccakP-1600-times8-SnP.h} +10 -10
  15. data/ext/digest/kangarootwelve/{KeccakP-1600-times8-on1.c → keccak/armv6m/KeccakP-1600-times8-on1.c} +13 -7
  16. data/ext/digest/kangarootwelve/keccak/armv6m/KeccakP-1600-u2-32bi-armv6m-le-gcc.s +1334 -0
  17. data/ext/digest/kangarootwelve/keccak/armv6m/KeccakSpongeWidth1600.link.c +1 -0
  18. data/ext/digest/kangarootwelve/{PlSnP-Fallback.inc → keccak/armv6m/PlSnP-Fallback.inc} +11 -7
  19. data/ext/digest/kangarootwelve/keccak/armv6m/ext.link.c +1 -0
  20. data/ext/digest/kangarootwelve/keccak/armv7a/KangarooTwelve.link.c +1 -0
  21. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakDuplexWidth1600.link.c +1 -0
  22. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-SnP.h +37 -0
  23. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-armv7a-le-neon-gcc.s +826 -0
  24. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-inplace-pl2-armv7a-neon-le-gcc.s +1245 -0
  25. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times2-SnP.h +38 -0
  26. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times4-SnP.h +45 -0
  27. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times4-on2.c +38 -0
  28. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times8-SnP.h +45 -0
  29. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakP-1600-times8-on2.c +38 -0
  30. data/ext/digest/kangarootwelve/keccak/armv7a/KeccakSpongeWidth1600.link.c +1 -0
  31. data/ext/digest/kangarootwelve/keccak/armv7a/PlSnP-Fallback.inc +287 -0
  32. data/ext/digest/kangarootwelve/keccak/armv7a/ext.link.c +1 -0
  33. data/ext/digest/kangarootwelve/keccak/armv7m/KangarooTwelve.link.c +1 -0
  34. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakDuplexWidth1600.link.c +1 -0
  35. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-SnP.h +36 -0
  36. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-inplace-32bi-armv7m-le-gcc.s +1170 -0
  37. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times2-SnP.h +45 -0
  38. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times2-on1.c +37 -0
  39. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times4-SnP.h +45 -0
  40. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times4-on1.c +37 -0
  41. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times8-SnP.h +45 -0
  42. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakP-1600-times8-on1.c +37 -0
  43. data/ext/digest/kangarootwelve/keccak/armv7m/KeccakSpongeWidth1600.link.c +1 -0
  44. data/ext/digest/kangarootwelve/keccak/armv7m/PlSnP-Fallback.inc +287 -0
  45. data/ext/digest/kangarootwelve/keccak/armv7m/ext.link.c +1 -0
  46. data/ext/digest/kangarootwelve/keccak/armv8a/KangarooTwelve.link.c +1 -0
  47. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakDuplexWidth1600.link.c +1 -0
  48. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-SnP.h +28 -0
  49. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-armv8a-neon.s +537 -0
  50. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times2-SnP.h +45 -0
  51. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times2-on1.c +37 -0
  52. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times4-SnP.h +45 -0
  53. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times4-on1.c +37 -0
  54. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times8-SnP.h +45 -0
  55. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakP-1600-times8-on1.c +37 -0
  56. data/ext/digest/kangarootwelve/keccak/armv8a/KeccakSpongeWidth1600.link.c +1 -0
  57. data/ext/digest/kangarootwelve/keccak/armv8a/PlSnP-Fallback.inc +287 -0
  58. data/ext/digest/kangarootwelve/keccak/armv8a/ext.link.c +1 -0
  59. data/ext/digest/kangarootwelve/keccak/asmx86-64/KangarooTwelve.link.c +1 -0
  60. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakDuplexWidth1600.link.c +1 -0
  61. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-SnP.h +37 -0
  62. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times2-SnP.h +45 -0
  63. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times2-on1.c +37 -0
  64. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times4-SnP.h +45 -0
  65. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times4-on1.c +37 -0
  66. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times8-SnP.h +45 -0
  67. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-times8-on1.c +37 -0
  68. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakP-1600-x86-64-gas.s +1190 -0
  69. data/ext/digest/kangarootwelve/keccak/asmx86-64/KeccakSpongeWidth1600.link.c +1 -0
  70. data/ext/digest/kangarootwelve/keccak/asmx86-64/PlSnP-Fallback.inc +287 -0
  71. data/ext/digest/kangarootwelve/keccak/asmx86-64/ext.link.c +1 -0
  72. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KangarooTwelve.link.c +1 -0
  73. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakDuplexWidth1600.link.c +1 -0
  74. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-SnP.h +37 -0
  75. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times2-SnP.h +45 -0
  76. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times2-on1.c +37 -0
  77. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times4-SnP.h +45 -0
  78. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times4-on1.c +37 -0
  79. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times8-SnP.h +45 -0
  80. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-times8-on1.c +37 -0
  81. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakP-1600-x86-64-shld-gas.s +1190 -0
  82. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/KeccakSpongeWidth1600.link.c +1 -0
  83. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/PlSnP-Fallback.inc +287 -0
  84. data/ext/digest/kangarootwelve/keccak/asmx86-64shld/ext.link.c +1 -0
  85. data/ext/digest/kangarootwelve/keccak/avr8/KangarooTwelve.link.c +1 -0
  86. data/ext/digest/kangarootwelve/keccak/avr8/KeccakDuplexWidth1600.link.c +1 -0
  87. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-SnP.h +37 -0
  88. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-avr8-fast.s +1116 -0
  89. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times2-SnP.h +45 -0
  90. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times2-on1.c +37 -0
  91. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times4-SnP.h +45 -0
  92. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times4-on1.c +37 -0
  93. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times8-SnP.h +45 -0
  94. data/ext/digest/kangarootwelve/keccak/avr8/KeccakP-1600-times8-on1.c +37 -0
  95. data/ext/digest/kangarootwelve/keccak/avr8/KeccakSpongeWidth1600.link.c +1 -0
  96. data/ext/digest/kangarootwelve/keccak/avr8/PlSnP-Fallback.inc +287 -0
  97. data/ext/digest/kangarootwelve/keccak/avr8/ext.link.c +1 -0
  98. data/ext/digest/kangarootwelve/keccak/bulldozer/KangarooTwelve.link.c +1 -0
  99. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakDuplexWidth1600.link.c +1 -0
  100. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-SnP.h +39 -0
  101. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-XOP-config.h +6 -0
  102. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-XOP.c +473 -0
  103. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times2-SIMD128.c +954 -0
  104. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times2-SnP.h +47 -0
  105. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times4-SnP.h +45 -0
  106. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times4-on2.c +38 -0
  107. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times8-SnP.h +45 -0
  108. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-times8-on2.c +38 -0
  109. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakP-1600-unrolling.macros +302 -0
  110. data/ext/digest/kangarootwelve/keccak/bulldozer/KeccakSpongeWidth1600.link.c +1 -0
  111. data/ext/digest/kangarootwelve/keccak/bulldozer/PlSnP-Fallback.inc +287 -0
  112. data/ext/digest/kangarootwelve/keccak/bulldozer/SIMD128-config.h +9 -0
  113. data/ext/digest/kangarootwelve/{SnP-Relaned.h → keccak/bulldozer/SnP-Relaned.h} +13 -7
  114. data/ext/digest/kangarootwelve/keccak/bulldozer/ext.link.c +1 -0
  115. data/ext/digest/kangarootwelve/{KangarooTwelve.c → keccak/common/KangarooTwelve.c} +6 -10
  116. data/ext/digest/kangarootwelve/{KangarooTwelve.h → keccak/common/KangarooTwelve.h} +3 -7
  117. data/ext/digest/kangarootwelve/keccak/common/KeccakDuplex-common.h +37 -0
  118. data/ext/digest/kangarootwelve/keccak/common/KeccakDuplex.inc +192 -0
  119. data/ext/digest/kangarootwelve/keccak/common/KeccakDuplexWidth1600.c +34 -0
  120. data/ext/digest/kangarootwelve/keccak/common/KeccakDuplexWidth1600.h +25 -0
  121. data/ext/digest/kangarootwelve/{KeccakSponge-common.h → keccak/common/KeccakSponge-common.h} +5 -7
  122. data/ext/digest/kangarootwelve/{KeccakSponge.inc → keccak/common/KeccakSponge.inc} +6 -8
  123. data/ext/digest/kangarootwelve/{KeccakSpongeWidth1600.c → keccak/common/KeccakSpongeWidth1600.c} +6 -8
  124. data/ext/digest/kangarootwelve/{KeccakSpongeWidth1600.h → keccak/common/KeccakSpongeWidth1600.h} +5 -7
  125. data/ext/digest/kangarootwelve/{Phases.h → keccak/common/Phases.h} +3 -7
  126. data/ext/digest/kangarootwelve/{align.h → keccak/common/align.h} +5 -7
  127. data/ext/digest/kangarootwelve/{brg_endian.h → keccak/common/brg_endian.h} +0 -0
  128. data/ext/digest/kangarootwelve/keccak/compact/KangarooTwelve.link.c +1 -0
  129. data/ext/digest/kangarootwelve/keccak/compact/KeccakDuplexWidth1600.link.c +1 -0
  130. data/ext/digest/kangarootwelve/{KeccakP-1600-SnP.h → keccak/compact/KeccakP-1600-SnP.h} +7 -10
  131. data/ext/digest/kangarootwelve/{KeccakP-1600-compact64.c → keccak/compact/KeccakP-1600-compact64.c} +11 -7
  132. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times2-SnP.h +45 -0
  133. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times2-on1.c +37 -0
  134. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times4-SnP.h +45 -0
  135. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times4-on1.c +37 -0
  136. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times8-SnP.h +45 -0
  137. data/ext/digest/kangarootwelve/keccak/compact/KeccakP-1600-times8-on1.c +37 -0
  138. data/ext/digest/kangarootwelve/keccak/compact/KeccakSpongeWidth1600.link.c +1 -0
  139. data/ext/digest/kangarootwelve/keccak/compact/PlSnP-Fallback.inc +287 -0
  140. data/ext/digest/kangarootwelve/keccak/compact/SnP-Relaned.h +140 -0
  141. data/ext/digest/kangarootwelve/keccak/compact/ext.link.c +1 -0
  142. data/ext/digest/kangarootwelve/keccak/generic32/KangarooTwelve.link.c +1 -0
  143. data/ext/digest/kangarootwelve/keccak/generic32/KeccakDuplexWidth1600.link.c +1 -0
  144. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-SnP.h +38 -0
  145. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-inplace32BI.c +1162 -0
  146. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times2-SnP.h +45 -0
  147. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times2-on1.c +37 -0
  148. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times4-SnP.h +45 -0
  149. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times4-on1.c +37 -0
  150. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times8-SnP.h +45 -0
  151. data/ext/digest/kangarootwelve/keccak/generic32/KeccakP-1600-times8-on1.c +37 -0
  152. data/ext/digest/kangarootwelve/keccak/generic32/KeccakSpongeWidth1600.link.c +1 -0
  153. data/ext/digest/kangarootwelve/keccak/generic32/PlSnP-Fallback.inc +287 -0
  154. data/ext/digest/kangarootwelve/keccak/generic32/SnP-Relaned.h +140 -0
  155. data/ext/digest/kangarootwelve/keccak/generic32/ext.link.c +1 -0
  156. data/ext/digest/kangarootwelve/keccak/generic32lc/KangarooTwelve.link.c +1 -0
  157. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakDuplexWidth1600.link.c +1 -0
  158. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-SnP.h +38 -0
  159. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-inplace32BI.c +1162 -0
  160. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times2-SnP.h +45 -0
  161. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times2-on1.c +37 -0
  162. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times4-SnP.h +45 -0
  163. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times4-on1.c +37 -0
  164. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times8-SnP.h +45 -0
  165. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakP-1600-times8-on1.c +37 -0
  166. data/ext/digest/kangarootwelve/keccak/generic32lc/KeccakSpongeWidth1600.link.c +1 -0
  167. data/ext/digest/kangarootwelve/keccak/generic32lc/PlSnP-Fallback.inc +287 -0
  168. data/ext/digest/kangarootwelve/keccak/generic32lc/SnP-Relaned.h +140 -0
  169. data/ext/digest/kangarootwelve/keccak/generic32lc/ext.link.c +1 -0
  170. data/ext/digest/kangarootwelve/keccak/generic64/KangarooTwelve.link.c +1 -0
  171. data/ext/digest/kangarootwelve/keccak/generic64/KeccakDuplexWidth1600.link.c +1 -0
  172. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-64.macros +2195 -0
  173. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-SnP.h +49 -0
  174. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-opt64-config.h +6 -0
  175. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-opt64.c +541 -0
  176. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times2-SnP.h +45 -0
  177. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times2-on1.c +37 -0
  178. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times4-SnP.h +45 -0
  179. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times4-on1.c +37 -0
  180. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times8-SnP.h +45 -0
  181. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-times8-on1.c +37 -0
  182. data/ext/digest/kangarootwelve/keccak/generic64/KeccakP-1600-unrolling.macros +302 -0
  183. data/ext/digest/kangarootwelve/keccak/generic64/KeccakSpongeWidth1600.link.c +1 -0
  184. data/ext/digest/kangarootwelve/keccak/generic64/PlSnP-Fallback.inc +287 -0
  185. data/ext/digest/kangarootwelve/keccak/generic64/SnP-Relaned.h +140 -0
  186. data/ext/digest/kangarootwelve/keccak/generic64/ext.link.c +1 -0
  187. data/ext/digest/kangarootwelve/keccak/generic64lc/KangarooTwelve.link.c +1 -0
  188. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakDuplexWidth1600.link.c +1 -0
  189. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-64.macros +2195 -0
  190. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-SnP.h +49 -0
  191. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-opt64-config.h +7 -0
  192. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-opt64.c +541 -0
  193. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times2-SnP.h +45 -0
  194. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times2-on1.c +37 -0
  195. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times4-SnP.h +45 -0
  196. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times4-on1.c +37 -0
  197. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times8-SnP.h +45 -0
  198. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-times8-on1.c +37 -0
  199. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakP-1600-unrolling.macros +302 -0
  200. data/ext/digest/kangarootwelve/keccak/generic64lc/KeccakSpongeWidth1600.link.c +1 -0
  201. data/ext/digest/kangarootwelve/keccak/generic64lc/PlSnP-Fallback.inc +287 -0
  202. data/ext/digest/kangarootwelve/keccak/generic64lc/SnP-Relaned.h +140 -0
  203. data/ext/digest/kangarootwelve/keccak/generic64lc/ext.link.c +1 -0
  204. data/ext/digest/kangarootwelve/keccak/haswell/KangarooTwelve.link.c +1 -0
  205. data/ext/digest/kangarootwelve/keccak/haswell/KeccakDuplexWidth1600.link.c +1 -0
  206. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-AVX2.s +993 -0
  207. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-SnP.h +41 -0
  208. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times2-SIMD128.c +954 -0
  209. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times2-SnP.h +47 -0
  210. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times4-SIMD256.c +1303 -0
  211. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times4-SnP.h +53 -0
  212. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times8-SnP.h +45 -0
  213. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-times8-on4.c +38 -0
  214. data/ext/digest/kangarootwelve/keccak/haswell/KeccakP-1600-unrolling.macros +302 -0
  215. data/ext/digest/kangarootwelve/keccak/haswell/KeccakSpongeWidth1600.link.c +1 -0
  216. data/ext/digest/kangarootwelve/keccak/haswell/PlSnP-Fallback.inc +287 -0
  217. data/ext/digest/kangarootwelve/keccak/haswell/SIMD128-config.h +8 -0
  218. data/ext/digest/kangarootwelve/keccak/haswell/SIMD256-config.h +7 -0
  219. data/ext/digest/kangarootwelve/keccak/haswell/ext.link.c +1 -0
  220. data/ext/digest/kangarootwelve/keccak/nehalem/KangarooTwelve.link.c +1 -0
  221. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakDuplexWidth1600.link.c +1 -0
  222. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-64.macros +2195 -0
  223. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-SnP.h +49 -0
  224. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-opt64-config.h +7 -0
  225. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-opt64.c +541 -0
  226. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times2-SIMD128.c +954 -0
  227. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times2-SnP.h +47 -0
  228. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times4-SnP.h +45 -0
  229. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times4-on2.c +38 -0
  230. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times8-SnP.h +45 -0
  231. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-times8-on2.c +38 -0
  232. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakP-1600-unrolling.macros +302 -0
  233. data/ext/digest/kangarootwelve/keccak/nehalem/KeccakSpongeWidth1600.link.c +1 -0
  234. data/ext/digest/kangarootwelve/keccak/nehalem/PlSnP-Fallback.inc +287 -0
  235. data/ext/digest/kangarootwelve/keccak/nehalem/SIMD128-config.h +8 -0
  236. data/ext/digest/kangarootwelve/keccak/nehalem/SnP-Relaned.h +140 -0
  237. data/ext/digest/kangarootwelve/keccak/nehalem/ext.link.c +1 -0
  238. data/ext/digest/kangarootwelve/keccak/reference/KangarooTwelve.link.c +1 -0
  239. data/ext/digest/kangarootwelve/keccak/reference/KeccakDuplexWidth1600.link.c +1 -0
  240. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-SnP.h +41 -0
  241. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-reference.c +424 -0
  242. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-reference.h +20 -0
  243. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times2-SnP.h +45 -0
  244. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times2-on1.c +37 -0
  245. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times4-SnP.h +45 -0
  246. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times4-on1.c +37 -0
  247. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times8-SnP.h +45 -0
  248. data/ext/digest/kangarootwelve/keccak/reference/KeccakP-1600-times8-on1.c +37 -0
  249. data/ext/digest/kangarootwelve/keccak/reference/KeccakSpongeWidth1600.link.c +1 -0
  250. data/ext/digest/kangarootwelve/keccak/reference/PlSnP-Fallback.inc +287 -0
  251. data/ext/digest/kangarootwelve/keccak/reference/displayIntermediateValues.c +176 -0
  252. data/ext/digest/kangarootwelve/keccak/reference/displayIntermediateValues.h +29 -0
  253. data/ext/digest/kangarootwelve/keccak/reference/ext.link.c +1 -0
  254. data/ext/digest/kangarootwelve/keccak/reference32bits/KangarooTwelve.link.c +1 -0
  255. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakDuplexWidth1600.link.c +1 -0
  256. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-SnP.h +41 -0
  257. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-reference.h +20 -0
  258. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-reference32BI.c +612 -0
  259. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times2-SnP.h +45 -0
  260. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times2-on1.c +37 -0
  261. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times4-SnP.h +45 -0
  262. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times4-on1.c +37 -0
  263. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times8-SnP.h +45 -0
  264. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakP-1600-times8-on1.c +37 -0
  265. data/ext/digest/kangarootwelve/keccak/reference32bits/KeccakSpongeWidth1600.link.c +1 -0
  266. data/ext/digest/kangarootwelve/keccak/reference32bits/PlSnP-Fallback.inc +287 -0
  267. data/ext/digest/kangarootwelve/keccak/reference32bits/displayIntermediateValues.c +176 -0
  268. data/ext/digest/kangarootwelve/keccak/reference32bits/displayIntermediateValues.h +29 -0
  269. data/ext/digest/kangarootwelve/keccak/reference32bits/ext.link.c +1 -0
  270. data/ext/digest/kangarootwelve/keccak/sandybridge/KangarooTwelve.link.c +1 -0
  271. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakDuplexWidth1600.link.c +1 -0
  272. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-64.macros +2195 -0
  273. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-SnP.h +49 -0
  274. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-opt64-config.h +8 -0
  275. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-opt64.c +541 -0
  276. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times2-SIMD128.c +954 -0
  277. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times2-SnP.h +47 -0
  278. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times4-SnP.h +45 -0
  279. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times4-on2.c +38 -0
  280. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times8-SnP.h +45 -0
  281. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-times8-on2.c +38 -0
  282. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakP-1600-unrolling.macros +302 -0
  283. data/ext/digest/kangarootwelve/keccak/sandybridge/KeccakSpongeWidth1600.link.c +1 -0
  284. data/ext/digest/kangarootwelve/keccak/sandybridge/PlSnP-Fallback.inc +287 -0
  285. data/ext/digest/kangarootwelve/keccak/sandybridge/SIMD128-config.h +8 -0
  286. data/ext/digest/kangarootwelve/keccak/sandybridge/SnP-Relaned.h +140 -0
  287. data/ext/digest/kangarootwelve/keccak/sandybridge/ext.link.c +1 -0
  288. data/ext/digest/kangarootwelve/keccak/skylakex/KangarooTwelve.link.c +1 -0
  289. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakDuplexWidth1600.link.c +1 -0
  290. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-AVX512-config.h +6 -0
  291. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-AVX512.c +621 -0
  292. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-SnP.h +42 -0
  293. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times2-SIMD512.c +852 -0
  294. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times2-SnP.h +49 -0
  295. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times4-SIMD512.c +883 -0
  296. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times4-SnP.h +49 -0
  297. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times8-SIMD512.c +1473 -0
  298. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakP-1600-times8-SnP.h +53 -0
  299. data/ext/digest/kangarootwelve/keccak/skylakex/KeccakSpongeWidth1600.link.c +1 -0
  300. data/ext/digest/kangarootwelve/keccak/skylakex/SIMD512-2-config.h +7 -0
  301. data/ext/digest/kangarootwelve/keccak/skylakex/SIMD512-4-config.h +7 -0
  302. data/ext/digest/kangarootwelve/keccak/skylakex/SIMD512-config.h +7 -0
  303. data/ext/digest/kangarootwelve/keccak/skylakex/ext.link.c +1 -0
  304. data/lib/digest/kangarootwelve/version.rb +1 -1
  305. metadata +299 -21
@@ -0,0 +1,954 @@
1
+ /*
2
+ Implementation by Gilles Van Assche, hereby denoted as "the implementer".
3
+
4
+ For more information, feedback or questions, please refer to our website:
5
+ https://keccak.team/
6
+
7
+ To the extent possible under law, the implementer has waived all copyright
8
+ and related or neighboring rights to the source code in this file.
9
+ http://creativecommons.org/publicdomain/zero/1.0/
10
+
11
+ ---
12
+
13
+ This file implements Keccak-p[1600]×2 in a PlSnP-compatible way.
14
+ Please refer to PlSnP-documentation.h for more details.
15
+
16
+ This implementation comes with KeccakP-1600-times2-SnP.h in the same folder.
17
+ Please refer to LowLevel.build for the exact list of other files it must be combined with.
18
+ */
19
+
20
+ #include <stdio.h>
21
+ #include <stdlib.h>
22
+ #include <string.h>
23
+ #include <x86intrin.h>
24
+ #include "align.h"
25
+ #include "KeccakP-1600-times2-SnP.h"
26
+ #include "SIMD128-config.h"
27
+
28
+ #include "brg_endian.h"
29
+ #if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN)
30
+ #error Expecting a little-endian platform
31
+ #endif
32
+
33
+ typedef unsigned char UINT8;
34
+ typedef unsigned long long int UINT64;
35
+ typedef __m128i V128;
36
+
37
+ #define laneIndex(instanceIndex, lanePosition) ((lanePosition)*2 + instanceIndex)
38
+
39
+ #if defined(KeccakP1600times2_useSSE)
40
+ #define ANDnu128(a, b) _mm_andnot_si128(a, b)
41
+ #define CONST128(a) _mm_load_si128((const V128 *)&(a))
42
+ #define LOAD128(a) _mm_load_si128((const V128 *)&(a))
43
+ #define LOAD128u(a) _mm_loadu_si128((const V128 *)&(a))
44
+ #define LOAD6464(a, b) _mm_set_epi64((__m64)(a), (__m64)(b))
45
+ #define CONST128_64(a) _mm_set1_epi64((__m64)(a))
46
+ #if defined(KeccakP1600times2_useXOP)
47
+ #define ROL64in128(a, o) _mm_roti_epi64(a, o)
48
+ #define ROL64in128_8(a) ROL64in128(a, 8)
49
+ #define ROL64in128_56(a) ROL64in128(a, 56)
50
+ #else
51
+ #define ROL64in128(a, o) _mm_or_si128(_mm_slli_epi64(a, o), _mm_srli_epi64(a, 64-(o)))
52
+ #define ROL64in128_8(a) _mm_shuffle_epi8(a, CONST128(rho8))
53
+ #define ROL64in128_56(a) _mm_shuffle_epi8(a, CONST128(rho56))
54
+ static const UINT64 rho8[2] = {0x0605040302010007, 0x0E0D0C0B0A09080F};
55
+ static const UINT64 rho56[2] = {0x0007060504030201, 0x080F0E0D0C0B0A09};
56
+ #endif
57
+ #define STORE128(a, b) _mm_store_si128((V128 *)&(a), b)
58
+ #define STORE128u(a, b) _mm_storeu_si128((V128 *)&(a), b)
59
+ #define STORE64L(a, b) _mm_storel_pi((__m64 *)&(a), (__m128)b)
60
+ #define STORE64H(a, b) _mm_storeh_pi((__m64 *)&(a), (__m128)b)
61
+ #define XOR128(a, b) _mm_xor_si128(a, b)
62
+ #define XOReq128(a, b) a = _mm_xor_si128(a, b)
63
+ #define ZERO128() _mm_setzero_si128()
64
+ #if defined(KeccakP1600times2_useSSE2)
65
+ #define UNPACKL( a, b ) _mm_unpacklo_epi64((a), (b))
66
+ #define UNPACKH( a, b ) _mm_unpackhi_epi64((a), (b))
67
+ #endif
68
+ #endif
69
+
70
+ #define SnP_laneLengthInBytes 8
71
+
72
+ void KeccakP1600times2_InitializeAll(void *states)
73
+ {
74
+ memset(states, 0, KeccakP1600times2_statesSizeInBytes);
75
+ }
76
+
77
+ void KeccakP1600times2_AddBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length)
78
+ {
79
+ unsigned int sizeLeft = length;
80
+ unsigned int lanePosition = offset/SnP_laneLengthInBytes;
81
+ unsigned int offsetInLane = offset%SnP_laneLengthInBytes;
82
+ const unsigned char *curData = data;
83
+ UINT64 *statesAsLanes = (UINT64 *)states;
84
+
85
+ if ((sizeLeft > 0) && (offsetInLane != 0)) {
86
+ unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
87
+ UINT64 lane = 0;
88
+ if (bytesInLane > sizeLeft)
89
+ bytesInLane = sizeLeft;
90
+ memcpy((unsigned char*)&lane + offsetInLane, curData, bytesInLane);
91
+ statesAsLanes[laneIndex(instanceIndex, lanePosition)] ^= lane;
92
+ sizeLeft -= bytesInLane;
93
+ lanePosition++;
94
+ curData += bytesInLane;
95
+ }
96
+
97
+ while(sizeLeft >= SnP_laneLengthInBytes) {
98
+ UINT64 lane = *((const UINT64*)curData);
99
+ statesAsLanes[laneIndex(instanceIndex, lanePosition)] ^= lane;
100
+ sizeLeft -= SnP_laneLengthInBytes;
101
+ lanePosition++;
102
+ curData += SnP_laneLengthInBytes;
103
+ }
104
+
105
+ if (sizeLeft > 0) {
106
+ UINT64 lane = 0;
107
+ memcpy(&lane, curData, sizeLeft);
108
+ statesAsLanes[laneIndex(instanceIndex, lanePosition)] ^= lane;
109
+ }
110
+ }
111
+
112
+ void KeccakP1600times2_AddLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
113
+ {
114
+ V128 *stateAsLanes = (V128 *)states;
115
+ unsigned int i;
116
+ const UINT64 *curData0 = (const UINT64 *)data;
117
+ const UINT64 *curData1 = (const UINT64 *)(data+laneOffset*SnP_laneLengthInBytes);
118
+ #define XOR_In( argIndex ) XOReq128( stateAsLanes[argIndex], LOAD6464(curData1[argIndex], curData0[argIndex]))
119
+ if ( laneCount >= 17 ) {
120
+ XOR_In( 0 );
121
+ XOR_In( 1 );
122
+ XOR_In( 2 );
123
+ XOR_In( 3 );
124
+ XOR_In( 4 );
125
+ XOR_In( 5 );
126
+ XOR_In( 6 );
127
+ XOR_In( 7 );
128
+ XOR_In( 8 );
129
+ XOR_In( 9 );
130
+ XOR_In( 10 );
131
+ XOR_In( 11 );
132
+ XOR_In( 12 );
133
+ XOR_In( 13 );
134
+ XOR_In( 14 );
135
+ XOR_In( 15 );
136
+ XOR_In( 16 );
137
+ if ( laneCount >= 21 ) {
138
+ XOR_In( 17 );
139
+ XOR_In( 18 );
140
+ XOR_In( 19 );
141
+ XOR_In( 20 );
142
+ for(i=21; i<laneCount; i++)
143
+ XOR_In( i );
144
+ }
145
+ else {
146
+ for(i=17; i<laneCount; i++)
147
+ XOR_In( i );
148
+ }
149
+ }
150
+ else {
151
+ for(i=0; i<laneCount; i++)
152
+ XOR_In( i );
153
+ }
154
+ #undef XOR_In
155
+ }
156
+
157
+ void KeccakP1600times2_OverwriteBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length)
158
+ {
159
+ unsigned int sizeLeft = length;
160
+ unsigned int lanePosition = offset/SnP_laneLengthInBytes;
161
+ unsigned int offsetInLane = offset%SnP_laneLengthInBytes;
162
+ const unsigned char *curData = data;
163
+ UINT64 *statesAsLanes = (UINT64 *)states;
164
+
165
+ if ((sizeLeft > 0) && (offsetInLane != 0)) {
166
+ unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
167
+ if (bytesInLane > sizeLeft)
168
+ bytesInLane = sizeLeft;
169
+ memcpy( ((unsigned char *)&statesAsLanes[laneIndex(instanceIndex, lanePosition)]) + offsetInLane, curData, bytesInLane);
170
+ sizeLeft -= bytesInLane;
171
+ lanePosition++;
172
+ curData += bytesInLane;
173
+ }
174
+
175
+ while(sizeLeft >= SnP_laneLengthInBytes) {
176
+ UINT64 lane = *((const UINT64*)curData);
177
+ statesAsLanes[laneIndex(instanceIndex, lanePosition)] = lane;
178
+ sizeLeft -= SnP_laneLengthInBytes;
179
+ lanePosition++;
180
+ curData += SnP_laneLengthInBytes;
181
+ }
182
+
183
+ if (sizeLeft > 0) {
184
+ memcpy(&statesAsLanes[laneIndex(instanceIndex, lanePosition)], curData, sizeLeft);
185
+ }
186
+ }
187
+
188
+ void KeccakP1600times2_OverwriteLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
189
+ {
190
+ V128 *stateAsLanes = (V128 *)states;
191
+ unsigned int i;
192
+ const UINT64 *curData0 = (const UINT64 *)data;
193
+ const UINT64 *curData1 = (const UINT64 *)(data+laneOffset*SnP_laneLengthInBytes);
194
+ #define OverWr( argIndex ) STORE128(stateAsLanes[argIndex], LOAD6464(curData1[argIndex], curData0[argIndex]))
195
+ if ( laneCount >= 17 ) {
196
+ OverWr( 0 );
197
+ OverWr( 1 );
198
+ OverWr( 2 );
199
+ OverWr( 3 );
200
+ OverWr( 4 );
201
+ OverWr( 5 );
202
+ OverWr( 6 );
203
+ OverWr( 7 );
204
+ OverWr( 8 );
205
+ OverWr( 9 );
206
+ OverWr( 10 );
207
+ OverWr( 11 );
208
+ OverWr( 12 );
209
+ OverWr( 13 );
210
+ OverWr( 14 );
211
+ OverWr( 15 );
212
+ OverWr( 16 );
213
+ if ( laneCount >= 21 ) {
214
+ OverWr( 17 );
215
+ OverWr( 18 );
216
+ OverWr( 19 );
217
+ OverWr( 20 );
218
+ for(i=21; i<laneCount; i++)
219
+ OverWr( i );
220
+ }
221
+ else {
222
+ for(i=17; i<laneCount; i++)
223
+ OverWr( i );
224
+ }
225
+ }
226
+ else {
227
+ for(i=0; i<laneCount; i++)
228
+ OverWr( i );
229
+ }
230
+ #undef OverWr
231
+ }
232
+
233
+ void KeccakP1600times2_OverwriteWithZeroes(void *states, unsigned int instanceIndex, unsigned int byteCount)
234
+ {
235
+ unsigned int sizeLeft = byteCount;
236
+ unsigned int lanePosition = 0;
237
+ UINT64 *statesAsLanes = (UINT64 *)states;
238
+
239
+ while(sizeLeft >= SnP_laneLengthInBytes) {
240
+ statesAsLanes[laneIndex(instanceIndex, lanePosition)] = 0;
241
+ sizeLeft -= SnP_laneLengthInBytes;
242
+ lanePosition++;
243
+ }
244
+
245
+ if (sizeLeft > 0) {
246
+ memset(&statesAsLanes[laneIndex(instanceIndex, lanePosition)], 0, sizeLeft);
247
+ }
248
+ }
249
+
250
+ void KeccakP1600times2_ExtractBytes(const void *states, unsigned int instanceIndex, unsigned char *data, unsigned int offset, unsigned int length)
251
+ {
252
+ unsigned int sizeLeft = length;
253
+ unsigned int lanePosition = offset/SnP_laneLengthInBytes;
254
+ unsigned int offsetInLane = offset%SnP_laneLengthInBytes;
255
+ unsigned char *curData = data;
256
+ const UINT64 *statesAsLanes = (const UINT64 *)states;
257
+
258
+ if ((sizeLeft > 0) && (offsetInLane != 0)) {
259
+ unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
260
+ if (bytesInLane > sizeLeft)
261
+ bytesInLane = sizeLeft;
262
+ memcpy( curData, ((unsigned char *)&statesAsLanes[laneIndex(instanceIndex, lanePosition)]) + offsetInLane, bytesInLane);
263
+ sizeLeft -= bytesInLane;
264
+ lanePosition++;
265
+ curData += bytesInLane;
266
+ }
267
+
268
+ while(sizeLeft >= SnP_laneLengthInBytes) {
269
+ *(UINT64*)curData = statesAsLanes[laneIndex(instanceIndex, lanePosition)];
270
+ sizeLeft -= SnP_laneLengthInBytes;
271
+ lanePosition++;
272
+ curData += SnP_laneLengthInBytes;
273
+ }
274
+
275
+ if (sizeLeft > 0) {
276
+ memcpy( curData, &statesAsLanes[laneIndex(instanceIndex, lanePosition)], sizeLeft);
277
+ }
278
+ }
279
+
280
+ void KeccakP1600times2_ExtractLanesAll(const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
281
+ {
282
+ const V128 *stateAsLanes = (const V128 *)states;
283
+ V128 lanes;
284
+ unsigned int i;
285
+ UINT64 *curData0 = (UINT64 *)data;
286
+ UINT64 *curData1 = (UINT64 *)(data+laneOffset*SnP_laneLengthInBytes);
287
+
288
+ #define Extr( argIndex ) lanes = LOAD128( stateAsLanes[argIndex] ), \
289
+ STORE64L( curData0[argIndex], lanes ), \
290
+ STORE64H( curData1[argIndex], lanes )
291
+
292
+ #if defined(KeccakP1600times2_useSSE2)
293
+ #define Extr2( argIndex ) lanes0 = LOAD128( stateAsLanes[argIndex] ), \
294
+ lanes1 = LOAD128( stateAsLanes[(argIndex)+1] ), \
295
+ lanes = UNPACKL( lanes0, lanes1 ), \
296
+ lanes0 = UNPACKH( lanes0, lanes1 ), \
297
+ STORE128u( *(V128*)&curData0[argIndex], lanes ), \
298
+ STORE128u( *(V128*)&curData1[argIndex], lanes0 )
299
+ if ( laneCount >= 16 ) {
300
+ V128 lanes0, lanes1;
301
+ Extr2( 0 );
302
+ Extr2( 2 );
303
+ Extr2( 4 );
304
+ Extr2( 6 );
305
+ Extr2( 8 );
306
+ Extr2( 10 );
307
+ Extr2( 12 );
308
+ Extr2( 14 );
309
+ if ( laneCount >= 20 ) {
310
+ Extr2( 16 );
311
+ Extr2( 18 );
312
+ for(i=20; i<laneCount; i++)
313
+ Extr( i );
314
+ }
315
+ else {
316
+ for(i=16; i<laneCount; i++)
317
+ Extr( i );
318
+ }
319
+ }
320
+ #undef Extr2
321
+ #else
322
+ if ( laneCount >= 17 ) {
323
+ Extr( 0 );
324
+ Extr( 1 );
325
+ Extr( 2 );
326
+ Extr( 3 );
327
+ Extr( 4 );
328
+ Extr( 5 );
329
+ Extr( 6 );
330
+ Extr( 7 );
331
+ Extr( 8 );
332
+ Extr( 9 );
333
+ Extr( 10 );
334
+ Extr( 11 );
335
+ Extr( 12 );
336
+ Extr( 13 );
337
+ Extr( 14 );
338
+ Extr( 15 );
339
+ Extr( 16 );
340
+ if ( laneCount >= 21 ) {
341
+ Extr( 17 );
342
+ Extr( 18 );
343
+ Extr( 19 );
344
+ Extr( 20 );
345
+ for(i=21; i<laneCount; i++)
346
+ Extr( i );
347
+ }
348
+ else {
349
+ for(i=17; i<laneCount; i++)
350
+ Extr( i );
351
+ }
352
+ }
353
+ #endif
354
+ else {
355
+ for(i=0; i<laneCount; i++)
356
+ Extr( i );
357
+ }
358
+ #undef Extr
359
+ }
360
+
361
+ void KeccakP1600times2_ExtractAndAddBytes(const void *states, unsigned int instanceIndex, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
362
+ {
363
+ unsigned int sizeLeft = length;
364
+ unsigned int lanePosition = offset/SnP_laneLengthInBytes;
365
+ unsigned int offsetInLane = offset%SnP_laneLengthInBytes;
366
+ const unsigned char *curInput = input;
367
+ unsigned char *curOutput = output;
368
+ const UINT64 *statesAsLanes = (const UINT64 *)states;
369
+
370
+ if ((sizeLeft > 0) && (offsetInLane != 0)) {
371
+ unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
372
+ UINT64 lane = statesAsLanes[laneIndex(instanceIndex, lanePosition)] >> (8 * offsetInLane);
373
+ if (bytesInLane > sizeLeft)
374
+ bytesInLane = sizeLeft;
375
+ sizeLeft -= bytesInLane;
376
+ do {
377
+ *(curOutput++) = *(curInput++) ^ (unsigned char)lane;
378
+ lane >>= 8;
379
+ } while ( --bytesInLane != 0);
380
+ lanePosition++;
381
+ }
382
+
383
+ while(sizeLeft >= SnP_laneLengthInBytes) {
384
+ *((UINT64*)curOutput) = *((UINT64*)curInput) ^ statesAsLanes[laneIndex(instanceIndex, lanePosition)];
385
+ sizeLeft -= SnP_laneLengthInBytes;
386
+ lanePosition++;
387
+ curInput += SnP_laneLengthInBytes;
388
+ curOutput += SnP_laneLengthInBytes;
389
+ }
390
+
391
+ if (sizeLeft != 0) {
392
+ UINT64 lane = statesAsLanes[laneIndex(instanceIndex, lanePosition)];
393
+ do {
394
+ *(curOutput++) = *(curInput++) ^ (unsigned char)lane;
395
+ lane >>= 8;
396
+ } while ( --sizeLeft != 0);
397
+ }
398
+ }
399
+
400
+ void KeccakP1600times2_ExtractAndAddLanesAll(const void *states, const unsigned char *input, unsigned char *output, unsigned int laneCount, unsigned int laneOffset)
401
+ {
402
+ const UINT64 *stateAsLanes = (const UINT64 *)states;
403
+ unsigned int i;
404
+ const UINT64 *curInput0 = (UINT64 *)input;
405
+ const UINT64 *curInput1 = (UINT64 *)(input+laneOffset*SnP_laneLengthInBytes);
406
+ UINT64 *curOutput0 = (UINT64 *)output;
407
+ UINT64 *curOutput1 = (UINT64 *)(output+laneOffset*SnP_laneLengthInBytes);
408
+
409
+ #define ExtrXOR( argIndex ) curOutput0[argIndex] = curInput0[argIndex] ^ stateAsLanes[2*(argIndex)], curOutput1[argIndex] = curInput1[argIndex] ^ stateAsLanes[2*(argIndex)+1]
410
+
411
+ if ( laneCount >= 17 ) {
412
+ ExtrXOR( 0 );
413
+ ExtrXOR( 1 );
414
+ ExtrXOR( 2 );
415
+ ExtrXOR( 3 );
416
+ ExtrXOR( 4 );
417
+ ExtrXOR( 5 );
418
+ ExtrXOR( 6 );
419
+ ExtrXOR( 7 );
420
+ ExtrXOR( 8 );
421
+ ExtrXOR( 9 );
422
+ ExtrXOR( 10 );
423
+ ExtrXOR( 11 );
424
+ ExtrXOR( 12 );
425
+ ExtrXOR( 13 );
426
+ ExtrXOR( 14 );
427
+ ExtrXOR( 15 );
428
+ ExtrXOR( 16 );
429
+ if ( laneCount >= 21 ) {
430
+ ExtrXOR( 17 );
431
+ ExtrXOR( 18 );
432
+ ExtrXOR( 19 );
433
+ ExtrXOR( 20 );
434
+ for(i=21; i<laneCount; i++)
435
+ ExtrXOR( i );
436
+ }
437
+ else {
438
+ for(i=17; i<laneCount; i++)
439
+ ExtrXOR( i );
440
+ }
441
+ }
442
+ else {
443
+ for(i=0; i<laneCount; i++)
444
+ ExtrXOR( i );
445
+ }
446
+ #undef ExtrXOR
447
+ }
448
+
449
+ #define declareABCDE \
450
+ V128 Aba, Abe, Abi, Abo, Abu; \
451
+ V128 Aga, Age, Agi, Ago, Agu; \
452
+ V128 Aka, Ake, Aki, Ako, Aku; \
453
+ V128 Ama, Ame, Ami, Amo, Amu; \
454
+ V128 Asa, Ase, Asi, Aso, Asu; \
455
+ V128 Bba, Bbe, Bbi, Bbo, Bbu; \
456
+ V128 Bga, Bge, Bgi, Bgo, Bgu; \
457
+ V128 Bka, Bke, Bki, Bko, Bku; \
458
+ V128 Bma, Bme, Bmi, Bmo, Bmu; \
459
+ V128 Bsa, Bse, Bsi, Bso, Bsu; \
460
+ V128 Ca, Ce, Ci, Co, Cu; \
461
+ V128 Da, De, Di, Do, Du; \
462
+ V128 Eba, Ebe, Ebi, Ebo, Ebu; \
463
+ V128 Ega, Ege, Egi, Ego, Egu; \
464
+ V128 Eka, Eke, Eki, Eko, Eku; \
465
+ V128 Ema, Eme, Emi, Emo, Emu; \
466
+ V128 Esa, Ese, Esi, Eso, Esu; \
467
+
468
+ #define prepareTheta \
469
+ Ca = XOR128(Aba, XOR128(Aga, XOR128(Aka, XOR128(Ama, Asa)))); \
470
+ Ce = XOR128(Abe, XOR128(Age, XOR128(Ake, XOR128(Ame, Ase)))); \
471
+ Ci = XOR128(Abi, XOR128(Agi, XOR128(Aki, XOR128(Ami, Asi)))); \
472
+ Co = XOR128(Abo, XOR128(Ago, XOR128(Ako, XOR128(Amo, Aso)))); \
473
+ Cu = XOR128(Abu, XOR128(Agu, XOR128(Aku, XOR128(Amu, Asu)))); \
474
+
475
+ /* --- Theta Rho Pi Chi Iota Prepare-theta */
476
+ /* --- 64-bit lanes mapped to 64-bit words */
477
+ #define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
478
+ Da = XOR128(Cu, ROL64in128(Ce, 1)); \
479
+ De = XOR128(Ca, ROL64in128(Ci, 1)); \
480
+ Di = XOR128(Ce, ROL64in128(Co, 1)); \
481
+ Do = XOR128(Ci, ROL64in128(Cu, 1)); \
482
+ Du = XOR128(Co, ROL64in128(Ca, 1)); \
483
+ \
484
+ XOReq128(A##ba, Da); \
485
+ Bba = A##ba; \
486
+ XOReq128(A##ge, De); \
487
+ Bbe = ROL64in128(A##ge, 44); \
488
+ XOReq128(A##ki, Di); \
489
+ Bbi = ROL64in128(A##ki, 43); \
490
+ E##ba = XOR128(Bba, ANDnu128(Bbe, Bbi)); \
491
+ XOReq128(E##ba, CONST128_64(KeccakF1600RoundConstants[i])); \
492
+ Ca = E##ba; \
493
+ XOReq128(A##mo, Do); \
494
+ Bbo = ROL64in128(A##mo, 21); \
495
+ E##be = XOR128(Bbe, ANDnu128(Bbi, Bbo)); \
496
+ Ce = E##be; \
497
+ XOReq128(A##su, Du); \
498
+ Bbu = ROL64in128(A##su, 14); \
499
+ E##bi = XOR128(Bbi, ANDnu128(Bbo, Bbu)); \
500
+ Ci = E##bi; \
501
+ E##bo = XOR128(Bbo, ANDnu128(Bbu, Bba)); \
502
+ Co = E##bo; \
503
+ E##bu = XOR128(Bbu, ANDnu128(Bba, Bbe)); \
504
+ Cu = E##bu; \
505
+ \
506
+ XOReq128(A##bo, Do); \
507
+ Bga = ROL64in128(A##bo, 28); \
508
+ XOReq128(A##gu, Du); \
509
+ Bge = ROL64in128(A##gu, 20); \
510
+ XOReq128(A##ka, Da); \
511
+ Bgi = ROL64in128(A##ka, 3); \
512
+ E##ga = XOR128(Bga, ANDnu128(Bge, Bgi)); \
513
+ XOReq128(Ca, E##ga); \
514
+ XOReq128(A##me, De); \
515
+ Bgo = ROL64in128(A##me, 45); \
516
+ E##ge = XOR128(Bge, ANDnu128(Bgi, Bgo)); \
517
+ XOReq128(Ce, E##ge); \
518
+ XOReq128(A##si, Di); \
519
+ Bgu = ROL64in128(A##si, 61); \
520
+ E##gi = XOR128(Bgi, ANDnu128(Bgo, Bgu)); \
521
+ XOReq128(Ci, E##gi); \
522
+ E##go = XOR128(Bgo, ANDnu128(Bgu, Bga)); \
523
+ XOReq128(Co, E##go); \
524
+ E##gu = XOR128(Bgu, ANDnu128(Bga, Bge)); \
525
+ XOReq128(Cu, E##gu); \
526
+ \
527
+ XOReq128(A##be, De); \
528
+ Bka = ROL64in128(A##be, 1); \
529
+ XOReq128(A##gi, Di); \
530
+ Bke = ROL64in128(A##gi, 6); \
531
+ XOReq128(A##ko, Do); \
532
+ Bki = ROL64in128(A##ko, 25); \
533
+ E##ka = XOR128(Bka, ANDnu128(Bke, Bki)); \
534
+ XOReq128(Ca, E##ka); \
535
+ XOReq128(A##mu, Du); \
536
+ Bko = ROL64in128_8(A##mu); \
537
+ E##ke = XOR128(Bke, ANDnu128(Bki, Bko)); \
538
+ XOReq128(Ce, E##ke); \
539
+ XOReq128(A##sa, Da); \
540
+ Bku = ROL64in128(A##sa, 18); \
541
+ E##ki = XOR128(Bki, ANDnu128(Bko, Bku)); \
542
+ XOReq128(Ci, E##ki); \
543
+ E##ko = XOR128(Bko, ANDnu128(Bku, Bka)); \
544
+ XOReq128(Co, E##ko); \
545
+ E##ku = XOR128(Bku, ANDnu128(Bka, Bke)); \
546
+ XOReq128(Cu, E##ku); \
547
+ \
548
+ XOReq128(A##bu, Du); \
549
+ Bma = ROL64in128(A##bu, 27); \
550
+ XOReq128(A##ga, Da); \
551
+ Bme = ROL64in128(A##ga, 36); \
552
+ XOReq128(A##ke, De); \
553
+ Bmi = ROL64in128(A##ke, 10); \
554
+ E##ma = XOR128(Bma, ANDnu128(Bme, Bmi)); \
555
+ XOReq128(Ca, E##ma); \
556
+ XOReq128(A##mi, Di); \
557
+ Bmo = ROL64in128(A##mi, 15); \
558
+ E##me = XOR128(Bme, ANDnu128(Bmi, Bmo)); \
559
+ XOReq128(Ce, E##me); \
560
+ XOReq128(A##so, Do); \
561
+ Bmu = ROL64in128_56(A##so); \
562
+ E##mi = XOR128(Bmi, ANDnu128(Bmo, Bmu)); \
563
+ XOReq128(Ci, E##mi); \
564
+ E##mo = XOR128(Bmo, ANDnu128(Bmu, Bma)); \
565
+ XOReq128(Co, E##mo); \
566
+ E##mu = XOR128(Bmu, ANDnu128(Bma, Bme)); \
567
+ XOReq128(Cu, E##mu); \
568
+ \
569
+ XOReq128(A##bi, Di); \
570
+ Bsa = ROL64in128(A##bi, 62); \
571
+ XOReq128(A##go, Do); \
572
+ Bse = ROL64in128(A##go, 55); \
573
+ XOReq128(A##ku, Du); \
574
+ Bsi = ROL64in128(A##ku, 39); \
575
+ E##sa = XOR128(Bsa, ANDnu128(Bse, Bsi)); \
576
+ XOReq128(Ca, E##sa); \
577
+ XOReq128(A##ma, Da); \
578
+ Bso = ROL64in128(A##ma, 41); \
579
+ E##se = XOR128(Bse, ANDnu128(Bsi, Bso)); \
580
+ XOReq128(Ce, E##se); \
581
+ XOReq128(A##se, De); \
582
+ Bsu = ROL64in128(A##se, 2); \
583
+ E##si = XOR128(Bsi, ANDnu128(Bso, Bsu)); \
584
+ XOReq128(Ci, E##si); \
585
+ E##so = XOR128(Bso, ANDnu128(Bsu, Bsa)); \
586
+ XOReq128(Co, E##so); \
587
+ E##su = XOR128(Bsu, ANDnu128(Bsa, Bse)); \
588
+ XOReq128(Cu, E##su); \
589
+ \
590
+
591
+ /* --- Theta Rho Pi Chi Iota */
592
+ /* --- 64-bit lanes mapped to 64-bit words */
593
+ #define thetaRhoPiChiIota(i, A, E) \
594
+ Da = XOR128(Cu, ROL64in128(Ce, 1)); \
595
+ De = XOR128(Ca, ROL64in128(Ci, 1)); \
596
+ Di = XOR128(Ce, ROL64in128(Co, 1)); \
597
+ Do = XOR128(Ci, ROL64in128(Cu, 1)); \
598
+ Du = XOR128(Co, ROL64in128(Ca, 1)); \
599
+ \
600
+ XOReq128(A##ba, Da); \
601
+ Bba = A##ba; \
602
+ XOReq128(A##ge, De); \
603
+ Bbe = ROL64in128(A##ge, 44); \
604
+ XOReq128(A##ki, Di); \
605
+ Bbi = ROL64in128(A##ki, 43); \
606
+ E##ba = XOR128(Bba, ANDnu128(Bbe, Bbi)); \
607
+ XOReq128(E##ba, CONST128_64(KeccakF1600RoundConstants[i])); \
608
+ XOReq128(A##mo, Do); \
609
+ Bbo = ROL64in128(A##mo, 21); \
610
+ E##be = XOR128(Bbe, ANDnu128(Bbi, Bbo)); \
611
+ XOReq128(A##su, Du); \
612
+ Bbu = ROL64in128(A##su, 14); \
613
+ E##bi = XOR128(Bbi, ANDnu128(Bbo, Bbu)); \
614
+ E##bo = XOR128(Bbo, ANDnu128(Bbu, Bba)); \
615
+ E##bu = XOR128(Bbu, ANDnu128(Bba, Bbe)); \
616
+ \
617
+ XOReq128(A##bo, Do); \
618
+ Bga = ROL64in128(A##bo, 28); \
619
+ XOReq128(A##gu, Du); \
620
+ Bge = ROL64in128(A##gu, 20); \
621
+ XOReq128(A##ka, Da); \
622
+ Bgi = ROL64in128(A##ka, 3); \
623
+ E##ga = XOR128(Bga, ANDnu128(Bge, Bgi)); \
624
+ XOReq128(A##me, De); \
625
+ Bgo = ROL64in128(A##me, 45); \
626
+ E##ge = XOR128(Bge, ANDnu128(Bgi, Bgo)); \
627
+ XOReq128(A##si, Di); \
628
+ Bgu = ROL64in128(A##si, 61); \
629
+ E##gi = XOR128(Bgi, ANDnu128(Bgo, Bgu)); \
630
+ E##go = XOR128(Bgo, ANDnu128(Bgu, Bga)); \
631
+ E##gu = XOR128(Bgu, ANDnu128(Bga, Bge)); \
632
+ \
633
+ XOReq128(A##be, De); \
634
+ Bka = ROL64in128(A##be, 1); \
635
+ XOReq128(A##gi, Di); \
636
+ Bke = ROL64in128(A##gi, 6); \
637
+ XOReq128(A##ko, Do); \
638
+ Bki = ROL64in128(A##ko, 25); \
639
+ E##ka = XOR128(Bka, ANDnu128(Bke, Bki)); \
640
+ XOReq128(A##mu, Du); \
641
+ Bko = ROL64in128_8(A##mu); \
642
+ E##ke = XOR128(Bke, ANDnu128(Bki, Bko)); \
643
+ XOReq128(A##sa, Da); \
644
+ Bku = ROL64in128(A##sa, 18); \
645
+ E##ki = XOR128(Bki, ANDnu128(Bko, Bku)); \
646
+ E##ko = XOR128(Bko, ANDnu128(Bku, Bka)); \
647
+ E##ku = XOR128(Bku, ANDnu128(Bka, Bke)); \
648
+ \
649
+ XOReq128(A##bu, Du); \
650
+ Bma = ROL64in128(A##bu, 27); \
651
+ XOReq128(A##ga, Da); \
652
+ Bme = ROL64in128(A##ga, 36); \
653
+ XOReq128(A##ke, De); \
654
+ Bmi = ROL64in128(A##ke, 10); \
655
+ E##ma = XOR128(Bma, ANDnu128(Bme, Bmi)); \
656
+ XOReq128(A##mi, Di); \
657
+ Bmo = ROL64in128(A##mi, 15); \
658
+ E##me = XOR128(Bme, ANDnu128(Bmi, Bmo)); \
659
+ XOReq128(A##so, Do); \
660
+ Bmu = ROL64in128_56(A##so); \
661
+ E##mi = XOR128(Bmi, ANDnu128(Bmo, Bmu)); \
662
+ E##mo = XOR128(Bmo, ANDnu128(Bmu, Bma)); \
663
+ E##mu = XOR128(Bmu, ANDnu128(Bma, Bme)); \
664
+ \
665
+ XOReq128(A##bi, Di); \
666
+ Bsa = ROL64in128(A##bi, 62); \
667
+ XOReq128(A##go, Do); \
668
+ Bse = ROL64in128(A##go, 55); \
669
+ XOReq128(A##ku, Du); \
670
+ Bsi = ROL64in128(A##ku, 39); \
671
+ E##sa = XOR128(Bsa, ANDnu128(Bse, Bsi)); \
672
+ XOReq128(A##ma, Da); \
673
+ Bso = ROL64in128(A##ma, 41); \
674
+ E##se = XOR128(Bse, ANDnu128(Bsi, Bso)); \
675
+ XOReq128(A##se, De); \
676
+ Bsu = ROL64in128(A##se, 2); \
677
+ E##si = XOR128(Bsi, ANDnu128(Bso, Bsu)); \
678
+ E##so = XOR128(Bso, ANDnu128(Bsu, Bsa)); \
679
+ E##su = XOR128(Bsu, ANDnu128(Bsa, Bse)); \
680
+ \
681
+
682
+ static ALIGN(KeccakP1600times2_statesAlignment) const UINT64 KeccakF1600RoundConstants[24] = {
683
+ 0x0000000000000001ULL,
684
+ 0x0000000000008082ULL,
685
+ 0x800000000000808aULL,
686
+ 0x8000000080008000ULL,
687
+ 0x000000000000808bULL,
688
+ 0x0000000080000001ULL,
689
+ 0x8000000080008081ULL,
690
+ 0x8000000000008009ULL,
691
+ 0x000000000000008aULL,
692
+ 0x0000000000000088ULL,
693
+ 0x0000000080008009ULL,
694
+ 0x000000008000000aULL,
695
+ 0x000000008000808bULL,
696
+ 0x800000000000008bULL,
697
+ 0x8000000000008089ULL,
698
+ 0x8000000000008003ULL,
699
+ 0x8000000000008002ULL,
700
+ 0x8000000000000080ULL,
701
+ 0x000000000000800aULL,
702
+ 0x800000008000000aULL,
703
+ 0x8000000080008081ULL,
704
+ 0x8000000000008080ULL,
705
+ 0x0000000080000001ULL,
706
+ 0x8000000080008008ULL};
707
+
708
+ #define copyFromState(X, state) \
709
+ X##ba = LOAD128(state[ 0]); \
710
+ X##be = LOAD128(state[ 1]); \
711
+ X##bi = LOAD128(state[ 2]); \
712
+ X##bo = LOAD128(state[ 3]); \
713
+ X##bu = LOAD128(state[ 4]); \
714
+ X##ga = LOAD128(state[ 5]); \
715
+ X##ge = LOAD128(state[ 6]); \
716
+ X##gi = LOAD128(state[ 7]); \
717
+ X##go = LOAD128(state[ 8]); \
718
+ X##gu = LOAD128(state[ 9]); \
719
+ X##ka = LOAD128(state[10]); \
720
+ X##ke = LOAD128(state[11]); \
721
+ X##ki = LOAD128(state[12]); \
722
+ X##ko = LOAD128(state[13]); \
723
+ X##ku = LOAD128(state[14]); \
724
+ X##ma = LOAD128(state[15]); \
725
+ X##me = LOAD128(state[16]); \
726
+ X##mi = LOAD128(state[17]); \
727
+ X##mo = LOAD128(state[18]); \
728
+ X##mu = LOAD128(state[19]); \
729
+ X##sa = LOAD128(state[20]); \
730
+ X##se = LOAD128(state[21]); \
731
+ X##si = LOAD128(state[22]); \
732
+ X##so = LOAD128(state[23]); \
733
+ X##su = LOAD128(state[24]); \
734
+
735
+ #define copyToState(state, X) \
736
+ STORE128(state[ 0], X##ba); \
737
+ STORE128(state[ 1], X##be); \
738
+ STORE128(state[ 2], X##bi); \
739
+ STORE128(state[ 3], X##bo); \
740
+ STORE128(state[ 4], X##bu); \
741
+ STORE128(state[ 5], X##ga); \
742
+ STORE128(state[ 6], X##ge); \
743
+ STORE128(state[ 7], X##gi); \
744
+ STORE128(state[ 8], X##go); \
745
+ STORE128(state[ 9], X##gu); \
746
+ STORE128(state[10], X##ka); \
747
+ STORE128(state[11], X##ke); \
748
+ STORE128(state[12], X##ki); \
749
+ STORE128(state[13], X##ko); \
750
+ STORE128(state[14], X##ku); \
751
+ STORE128(state[15], X##ma); \
752
+ STORE128(state[16], X##me); \
753
+ STORE128(state[17], X##mi); \
754
+ STORE128(state[18], X##mo); \
755
+ STORE128(state[19], X##mu); \
756
+ STORE128(state[20], X##sa); \
757
+ STORE128(state[21], X##se); \
758
+ STORE128(state[22], X##si); \
759
+ STORE128(state[23], X##so); \
760
+ STORE128(state[24], X##su); \
761
+
762
+ #define copyStateVariables(X, Y) \
763
+ X##ba = Y##ba; \
764
+ X##be = Y##be; \
765
+ X##bi = Y##bi; \
766
+ X##bo = Y##bo; \
767
+ X##bu = Y##bu; \
768
+ X##ga = Y##ga; \
769
+ X##ge = Y##ge; \
770
+ X##gi = Y##gi; \
771
+ X##go = Y##go; \
772
+ X##gu = Y##gu; \
773
+ X##ka = Y##ka; \
774
+ X##ke = Y##ke; \
775
+ X##ki = Y##ki; \
776
+ X##ko = Y##ko; \
777
+ X##ku = Y##ku; \
778
+ X##ma = Y##ma; \
779
+ X##me = Y##me; \
780
+ X##mi = Y##mi; \
781
+ X##mo = Y##mo; \
782
+ X##mu = Y##mu; \
783
+ X##sa = Y##sa; \
784
+ X##se = Y##se; \
785
+ X##si = Y##si; \
786
+ X##so = Y##so; \
787
+ X##su = Y##su; \
788
+
789
+ #ifdef KeccakP1600times2_fullUnrolling
790
+ #define FullUnrolling
791
+ #else
792
+ #define Unrolling KeccakP1600times2_unrolling
793
+ #endif
794
+ #include "KeccakP-1600-unrolling.macros"
795
+
796
+ void KeccakP1600times2_PermuteAll_24rounds(void *states)
797
+ {
798
+ V128 *statesAsLanes = (V128 *)states;
799
+ declareABCDE
800
+ #ifndef KeccakP1600times2_fullUnrolling
801
+ unsigned int i;
802
+ #endif
803
+
804
+ copyFromState(A, statesAsLanes)
805
+ rounds24
806
+ copyToState(statesAsLanes, A)
807
+ #if defined(UseMMX)
808
+ _mm_empty();
809
+ #endif
810
+ }
811
+
812
+ void KeccakP1600times2_PermuteAll_12rounds(void *states)
813
+ {
814
+ V128 *statesAsLanes = (V128 *)states;
815
+ declareABCDE
816
+ #ifndef KeccakP1600times2_fullUnrolling
817
+ unsigned int i;
818
+ #endif
819
+
820
+ copyFromState(A, statesAsLanes)
821
+ rounds12
822
+ copyToState(statesAsLanes, A)
823
+ #if defined(UseMMX)
824
+ _mm_empty();
825
+ #endif
826
+ }
827
+
828
+ void KeccakP1600times2_PermuteAll_6rounds(void *states)
829
+ {
830
+ V128 *statesAsLanes = (V128 *)states;
831
+ declareABCDE
832
+ #ifndef KeccakP1600times2_fullUnrolling
833
+ unsigned int i;
834
+ #endif
835
+
836
+ copyFromState(A, statesAsLanes)
837
+ rounds6
838
+ copyToState(statesAsLanes, A)
839
+ #if defined(UseMMX)
840
+ _mm_empty();
841
+ #endif
842
+ }
843
+
844
+ void KeccakP1600times2_PermuteAll_4rounds(void *states)
845
+ {
846
+ V128 *statesAsLanes = (V128 *)states;
847
+ declareABCDE
848
+ #ifndef KeccakP1600times2_fullUnrolling
849
+ unsigned int i;
850
+ #endif
851
+
852
+ copyFromState(A, statesAsLanes)
853
+ rounds4
854
+ copyToState(statesAsLanes, A)
855
+ #if defined(UseMMX)
856
+ _mm_empty();
857
+ #endif
858
+ }
859
+
860
+ size_t KeccakF1600times2_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen)
861
+ {
862
+ if (laneCount == 21) {
863
+ #if 1
864
+ unsigned int i;
865
+ const unsigned char *dataStart = data;
866
+
867
+ while(dataByteLen >= (laneOffsetParallel + laneCount)*8) {
868
+ V128 *stateAsLanes = (V128 *)states;
869
+ unsigned int i;
870
+ const UINT64 *curData0 = (const UINT64 *)data;
871
+ const UINT64 *curData1 = (const UINT64 *)(data+laneOffsetParallel*SnP_laneLengthInBytes);
872
+ #define XOR_In( argIndex ) XOReq128( stateAsLanes[argIndex], LOAD6464(curData1[argIndex], curData0[argIndex]))
873
+ XOR_In( 0 );
874
+ XOR_In( 1 );
875
+ XOR_In( 2 );
876
+ XOR_In( 3 );
877
+ XOR_In( 4 );
878
+ XOR_In( 5 );
879
+ XOR_In( 6 );
880
+ XOR_In( 7 );
881
+ XOR_In( 8 );
882
+ XOR_In( 9 );
883
+ XOR_In( 10 );
884
+ XOR_In( 11 );
885
+ XOR_In( 12 );
886
+ XOR_In( 13 );
887
+ XOR_In( 14 );
888
+ XOR_In( 15 );
889
+ XOR_In( 16 );
890
+ XOR_In( 17 );
891
+ XOR_In( 18 );
892
+ XOR_In( 19 );
893
+ XOR_In( 20 );
894
+ #undef XOR_In
895
+ KeccakP1600times2_PermuteAll_24rounds(states);
896
+ data += laneOffsetSerial*8;
897
+ dataByteLen -= laneOffsetSerial*8;
898
+ }
899
+ return data - dataStart;
900
+ #else
901
+ unsigned int i;
902
+ const unsigned char *dataStart = data;
903
+ const UINT64 *curData0 = (const UINT64 *)data;
904
+ const UINT64 *curData1 = (const UINT64 *)(data+laneOffsetParallel*SnP_laneLengthInBytes);
905
+ V128 *statesAsLanes = (V128 *)states;
906
+ declareABCDE
907
+
908
+ copyFromState(A, statesAsLanes)
909
+ while(dataByteLen >= (laneOffsetParallel + laneCount)*8) {
910
+ #define XOR_In( Xxx, argIndex ) XOReq128( Xxx, LOAD6464(curData1[argIndex], curData0[argIndex]))
911
+ XOR_In( Aba, 0 );
912
+ XOR_In( Abe, 1 );
913
+ XOR_In( Abi, 2 );
914
+ XOR_In( Abo, 3 );
915
+ XOR_In( Abu, 4 );
916
+ XOR_In( Aga, 5 );
917
+ XOR_In( Age, 6 );
918
+ XOR_In( Agi, 7 );
919
+ XOR_In( Ago, 8 );
920
+ XOR_In( Agu, 9 );
921
+ XOR_In( Aka, 10 );
922
+ XOR_In( Ake, 11 );
923
+ XOR_In( Aki, 12 );
924
+ XOR_In( Ako, 13 );
925
+ XOR_In( Aku, 14 );
926
+ XOR_In( Ama, 15 );
927
+ XOR_In( Ame, 16 );
928
+ XOR_In( Ami, 17 );
929
+ XOR_In( Amo, 18 );
930
+ XOR_In( Amu, 19 );
931
+ XOR_In( Asa, 20 );
932
+ #undef XOR_In
933
+ rounds24
934
+ curData0 += laneOffsetSerial;
935
+ curData1 += laneOffsetSerial;
936
+ dataByteLen -= laneOffsetSerial*8;
937
+ }
938
+ copyToState(statesAsLanes, A)
939
+ return (const unsigned char *)curData0 - dataStart;
940
+ #endif
941
+ }
942
+ else {
943
+ unsigned int i;
944
+ const unsigned char *dataStart = data;
945
+
946
+ while(dataByteLen >= (laneOffsetParallel + laneCount)*8) {
947
+ KeccakP1600times2_AddLanesAll(states, data, laneCount, laneOffsetParallel);
948
+ KeccakP1600times2_PermuteAll_24rounds(states);
949
+ data += laneOffsetSerial*8;
950
+ dataByteLen -= laneOffsetSerial*8;
951
+ }
952
+ return data - dataStart;
953
+ }
954
+ }