sleeping_kangaroo12 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (284) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +127 -0
  3. data/ext/Rakefile +73 -0
  4. data/ext/binding/sleeping_kangaroo12.c +39 -0
  5. data/ext/config/xkcp.build +17 -0
  6. data/ext/xkcp/LICENSE +1 -0
  7. data/ext/xkcp/Makefile +15 -0
  8. data/ext/xkcp/Makefile.build +200 -0
  9. data/ext/xkcp/README.markdown +296 -0
  10. data/ext/xkcp/lib/HighLevel.build +143 -0
  11. data/ext/xkcp/lib/LowLevel.build +757 -0
  12. data/ext/xkcp/lib/common/align.h +33 -0
  13. data/ext/xkcp/lib/common/brg_endian.h +143 -0
  14. data/ext/xkcp/lib/high/KangarooTwelve/KangarooTwelve.c +301 -0
  15. data/ext/xkcp/lib/high/KangarooTwelve/KangarooTwelve.h +97 -0
  16. data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.c +81 -0
  17. data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.h +125 -0
  18. data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.c +48 -0
  19. data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.h +79 -0
  20. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.c +81 -0
  21. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.h +73 -0
  22. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.inc +195 -0
  23. data/ext/xkcp/lib/high/Keccak/KeccakSponge.c +111 -0
  24. data/ext/xkcp/lib/high/Keccak/KeccakSponge.h +76 -0
  25. data/ext/xkcp/lib/high/Keccak/KeccakSponge.inc +314 -0
  26. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.c +61 -0
  27. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.h +67 -0
  28. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.inc +128 -0
  29. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.c +93 -0
  30. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.h +599 -0
  31. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.inc +573 -0
  32. data/ext/xkcp/lib/high/Ketje/Ketjev2.c +87 -0
  33. data/ext/xkcp/lib/high/Ketje/Ketjev2.h +88 -0
  34. data/ext/xkcp/lib/high/Ketje/Ketjev2.inc +274 -0
  35. data/ext/xkcp/lib/high/Keyak/Keyakv2.c +132 -0
  36. data/ext/xkcp/lib/high/Keyak/Keyakv2.h +217 -0
  37. data/ext/xkcp/lib/high/Keyak/Keyakv2.inc +81 -0
  38. data/ext/xkcp/lib/high/Keyak/Motorist.inc +953 -0
  39. data/ext/xkcp/lib/high/Kravatte/Kravatte.c +533 -0
  40. data/ext/xkcp/lib/high/Kravatte/Kravatte.h +115 -0
  41. data/ext/xkcp/lib/high/Kravatte/KravatteModes.c +557 -0
  42. data/ext/xkcp/lib/high/Kravatte/KravatteModes.h +247 -0
  43. data/ext/xkcp/lib/high/Xoodyak/Cyclist.h +66 -0
  44. data/ext/xkcp/lib/high/Xoodyak/Cyclist.inc +336 -0
  45. data/ext/xkcp/lib/high/Xoodyak/Xoodyak-parameters.h +26 -0
  46. data/ext/xkcp/lib/high/Xoodyak/Xoodyak.c +55 -0
  47. data/ext/xkcp/lib/high/Xoodyak/Xoodyak.h +35 -0
  48. data/ext/xkcp/lib/high/Xoofff/Xoofff.c +634 -0
  49. data/ext/xkcp/lib/high/Xoofff/Xoofff.h +147 -0
  50. data/ext/xkcp/lib/high/Xoofff/XoofffModes.c +483 -0
  51. data/ext/xkcp/lib/high/Xoofff/XoofffModes.h +241 -0
  52. data/ext/xkcp/lib/high/common/Phases.h +25 -0
  53. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-SnP.h +41 -0
  54. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-armcc.s +1666 -0
  55. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-gcc.s +1655 -0
  56. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-armcc.s +1268 -0
  57. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-gcc.s +1264 -0
  58. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-armcc.s +1178 -0
  59. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-gcc.s +1175 -0
  60. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-armcc.s +1338 -0
  61. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-gcc.s +1336 -0
  62. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-armcc.s +1343 -0
  63. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-gcc.s +1339 -0
  64. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-SnP.h +42 -0
  65. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-armcc.s +823 -0
  66. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-gcc.s +831 -0
  67. data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-SnP.h +31 -0
  68. data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-armv8a-neon.s +540 -0
  69. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-SnP.h +42 -0
  70. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-compact.s +733 -0
  71. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-fast.s +1121 -0
  72. data/ext/xkcp/lib/low/KeccakP-1600/AVX2/KeccakP-1600-AVX2.s +1100 -0
  73. data/ext/xkcp/lib/low/KeccakP-1600/AVX2/KeccakP-1600-SnP.h +52 -0
  74. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-AVX512.c +623 -0
  75. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-SnP.h +47 -0
  76. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u12/KeccakP-1600-AVX512-config.h +6 -0
  77. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u6/KeccakP-1600-AVX512-config.h +6 -0
  78. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/ua/KeccakP-1600-AVX512-config.h +6 -0
  79. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-AVX512.s +1031 -0
  80. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-SnP.h +53 -0
  81. data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-SnP.h +44 -0
  82. data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-XOP.c +476 -0
  83. data/ext/xkcp/lib/low/KeccakP-1600/XOP/u6/KeccakP-1600-XOP-config.h +6 -0
  84. data/ext/xkcp/lib/low/KeccakP-1600/XOP/ua/KeccakP-1600-XOP-config.h +6 -0
  85. data/ext/xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-64.macros +748 -0
  86. data/ext/xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-unrolling.macros +305 -0
  87. data/ext/xkcp/lib/low/KeccakP-1600/compact/KeccakP-1600-SnP.h +40 -0
  88. data/ext/xkcp/lib/low/KeccakP-1600/compact/KeccakP-1600-compact64.c +420 -0
  89. data/ext/xkcp/lib/low/KeccakP-1600/plain-32bits-inplace/KeccakP-1600-SnP.h +43 -0
  90. data/ext/xkcp/lib/low/KeccakP-1600/plain-32bits-inplace/KeccakP-1600-inplace32BI.c +1163 -0
  91. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/KeccakP-1600-SnP.h +54 -0
  92. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/KeccakP-1600-opt64.c +565 -0
  93. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcu6/KeccakP-1600-opt64-config.h +7 -0
  94. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua/KeccakP-1600-opt64-config.h +7 -0
  95. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua-shld/KeccakP-1600-opt64-config.h +8 -0
  96. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/u6/KeccakP-1600-opt64-config.h +6 -0
  97. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/ua/KeccakP-1600-opt64-config.h +6 -0
  98. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-SnP.h +44 -0
  99. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference.h +23 -0
  100. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference32BI.c +625 -0
  101. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-SnP.h +44 -0
  102. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.c +440 -0
  103. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.h +23 -0
  104. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-SnP.h +42 -0
  105. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas.s +1196 -0
  106. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas_Apple.s +1124 -0
  107. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-shld-gas.s +1196 -0
  108. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-armcc.s +1392 -0
  109. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-gcc.s +1394 -0
  110. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-times2-SnP.h +42 -0
  111. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u12/SIMD512-2-config.h +7 -0
  112. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u4/SIMD512-2-config.h +7 -0
  113. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512ufull/SIMD512-2-config.h +7 -0
  114. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SIMD512.c +850 -0
  115. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SnP.h +51 -0
  116. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SIMD128.c +957 -0
  117. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SnP.h +49 -0
  118. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-u2/SIMD128-config.h +8 -0
  119. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-ua/SIMD128-config.h +8 -0
  120. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-u2/SIMD128-config.h +9 -0
  121. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-ua/SIMD128-config.h +9 -0
  122. data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-SnP.h +45 -0
  123. data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-on1.c +37 -0
  124. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SIMD256.c +1321 -0
  125. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SnP.h +55 -0
  126. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u12/SIMD256-config.h +7 -0
  127. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u6/SIMD256-config.h +7 -0
  128. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/ua/SIMD256-config.h +7 -0
  129. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u12/SIMD512-4-config.h +7 -0
  130. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u4/SIMD512-4-config.h +7 -0
  131. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512ufull/SIMD512-4-config.h +7 -0
  132. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SIMD512.c +881 -0
  133. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SnP.h +51 -0
  134. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-SnP.h +45 -0
  135. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-on1.c +37 -0
  136. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-SnP.h +45 -0
  137. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-on2.c +38 -0
  138. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SIMD512.c +1615 -0
  139. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SnP.h +57 -0
  140. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u12/SIMD512-config.h +7 -0
  141. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u4/SIMD512-config.h +7 -0
  142. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/ua/SIMD512-config.h +7 -0
  143. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-SnP.h +45 -0
  144. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-on1.c +37 -0
  145. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-SnP.h +45 -0
  146. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-on2.c +38 -0
  147. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-SnP.h +45 -0
  148. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-on4.c +38 -0
  149. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-SnP.h +41 -0
  150. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-armcc.s +442 -0
  151. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-gcc.s +446 -0
  152. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-armcc.s +419 -0
  153. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-gcc.s +427 -0
  154. data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-SnP.h +41 -0
  155. data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-avr8-fast.s +647 -0
  156. data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-SnP.h +39 -0
  157. data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-compact.c +190 -0
  158. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-SnP.h +43 -0
  159. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.c +412 -0
  160. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.h +23 -0
  161. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-SnP.h +41 -0
  162. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-armcc.s +454 -0
  163. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-gcc.s +458 -0
  164. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-armcc.s +455 -0
  165. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-gcc.s +458 -0
  166. data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-SnP.h +41 -0
  167. data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-avr8-fast.s +728 -0
  168. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-SnP.h +43 -0
  169. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.c +414 -0
  170. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.h +23 -0
  171. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-SnP.h +42 -0
  172. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-armcc.s +527 -0
  173. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-gcc.s +533 -0
  174. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-armcc.s +528 -0
  175. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-gcc.s +534 -0
  176. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-armcc.s +521 -0
  177. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-gcc.s +527 -0
  178. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-armcc.s +517 -0
  179. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-gcc.s +523 -0
  180. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-armcc.s +550 -0
  181. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-gcc.s +556 -0
  182. data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-SnP.h +32 -0
  183. data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-armv8a-neon.s +432 -0
  184. data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-SnP.h +42 -0
  185. data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-avr8-fast.s +929 -0
  186. data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-SnP.h +40 -0
  187. data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-compact.c +244 -0
  188. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-SnP.h +46 -0
  189. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32-bis.macros +184 -0
  190. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.c +454 -0
  191. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.macros +459 -0
  192. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling-bis.macros +83 -0
  193. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling.macros +88 -0
  194. data/ext/xkcp/lib/low/KeccakP-800/plain/lcu2/KeccakP-800-opt32-config.h +7 -0
  195. data/ext/xkcp/lib/low/KeccakP-800/plain/lcua/KeccakP-800-opt32-config.h +7 -0
  196. data/ext/xkcp/lib/low/KeccakP-800/plain/u2/KeccakP-800-opt32-config.h +7 -0
  197. data/ext/xkcp/lib/low/KeccakP-800/plain/ua/KeccakP-800-opt32-config.h +7 -0
  198. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-SnP.h +44 -0
  199. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.c +437 -0
  200. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.h +23 -0
  201. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/Ket.h +57 -0
  202. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-armcc.s +475 -0
  203. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-gcc.s +480 -0
  204. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-armcc.s +590 -0
  205. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-gcc.s +590 -0
  206. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.c +126 -0
  207. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.h +68 -0
  208. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.inc +174 -0
  209. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.c +80 -0
  210. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.h +68 -0
  211. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.inc +142 -0
  212. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-SnP.h +55 -0
  213. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-armcc.s +1086 -0
  214. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-gcc.s +1092 -0
  215. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-armcc.s +721 -0
  216. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-gcc.s +726 -0
  217. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-armcc.s +723 -0
  218. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-gcc.s +729 -0
  219. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-armcc.s +1164 -0
  220. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-gcc.s +1165 -0
  221. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-armcc.s +562 -0
  222. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-gcc.s +563 -0
  223. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-armcc.s +563 -0
  224. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-gcc.s +565 -0
  225. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-SnP.h +55 -0
  226. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-armcc.s +476 -0
  227. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-gcc.s +485 -0
  228. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-armcc.s +362 -0
  229. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-gcc.s +367 -0
  230. data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-SnP.h +43 -0
  231. data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-avr8-u1.s +1341 -0
  232. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SIMD512.c +581 -0
  233. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SnP.h +58 -0
  234. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodyak-full-block-SIMD512.c +332 -0
  235. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SIMD128.c +329 -0
  236. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SnP.h +53 -0
  237. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodyak-full-block-SIMD128.c +355 -0
  238. data/ext/xkcp/lib/low/Xoodoo/Xoodoo.h +79 -0
  239. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-SnP.h +56 -0
  240. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-optimized.c +399 -0
  241. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodyak-full-blocks.c +127 -0
  242. data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-SnP.h +43 -0
  243. data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-reference.c +253 -0
  244. data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SIMD512.c +1044 -0
  245. data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SnP.h +49 -0
  246. data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-SnP.h +45 -0
  247. data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-on1.c +37 -0
  248. data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-ARMv7A.s +1587 -0
  249. data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-SnP.h +48 -0
  250. data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SIMD512.c +1202 -0
  251. data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SnP.h +48 -0
  252. data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SIMD128.c +484 -0
  253. data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SnP.h +44 -0
  254. data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-SnP.h +45 -0
  255. data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-on1.c +37 -0
  256. data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SIMD256.c +939 -0
  257. data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SnP.h +49 -0
  258. data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SIMD512.c +1216 -0
  259. data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SnP.h +48 -0
  260. data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-SnP.h +45 -0
  261. data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-on1.c +37 -0
  262. data/ext/xkcp/lib/low/common/PlSnP-Fallback.inc +290 -0
  263. data/ext/xkcp/lib/low/common/SnP-Relaned.h +141 -0
  264. data/ext/xkcp/support/Build/ExpandProducts.xsl +79 -0
  265. data/ext/xkcp/support/Build/ToGlobalMakefile.xsl +206 -0
  266. data/ext/xkcp/support/Build/ToOneTarget.xsl +89 -0
  267. data/ext/xkcp/support/Build/ToTargetConfigFile.xsl +37 -0
  268. data/ext/xkcp/support/Build/ToTargetMakefile.xsl +298 -0
  269. data/ext/xkcp/support/Build/ToVCXProj.xsl +198 -0
  270. data/ext/xkcp/support/Kernel-PMU/Kernel-pmu.md +133 -0
  271. data/ext/xkcp/support/Kernel-PMU/Makefile +8 -0
  272. data/ext/xkcp/support/Kernel-PMU/enable_arm_pmu.c +129 -0
  273. data/ext/xkcp/support/Kernel-PMU/load-module +1 -0
  274. data/ext/xkcp/util/KeccakSum/KeccakSum.c +394 -0
  275. data/ext/xkcp/util/KeccakSum/base64.c +86 -0
  276. data/ext/xkcp/util/KeccakSum/base64.h +12 -0
  277. data/lib/sleeping_kangaroo12/binding.rb +15 -0
  278. data/lib/sleeping_kangaroo12/build/loader.rb +40 -0
  279. data/lib/sleeping_kangaroo12/build/platform.rb +37 -0
  280. data/lib/sleeping_kangaroo12/build.rb +4 -0
  281. data/lib/sleeping_kangaroo12/digest.rb +103 -0
  282. data/lib/sleeping_kangaroo12/version.rb +5 -0
  283. data/lib/sleeping_kangaroo12.rb +7 -0
  284. metadata +372 -0
@@ -0,0 +1,957 @@
1
+ /*
2
+ The Keccak-p permutations, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche.
3
+
4
+ Implementation by Gilles Van Assche, hereby denoted as "the implementer".
5
+
6
+ For more information, feedback or questions, please refer to the Keccak Team website:
7
+ https://keccak.team/
8
+
9
+ To the extent possible under law, the implementer has waived all copyright
10
+ and related or neighboring rights to the source code in this file.
11
+ http://creativecommons.org/publicdomain/zero/1.0/
12
+
13
+ ---
14
+
15
+ This file implements Keccak-p[1600]×2 in a PlSnP-compatible way.
16
+ Please refer to PlSnP-documentation.h for more details.
17
+
18
+ This implementation comes with KeccakP-1600-times2-SnP.h in the same folder.
19
+ Please refer to LowLevel.build for the exact list of other files it must be combined with.
20
+ */
21
+
22
+ #include <stdint.h>
23
+ #include <stdio.h>
24
+ #include <stdlib.h>
25
+ #include <string.h>
26
+ #include <emmintrin.h>
27
+ #include <pmmintrin.h>
28
+ #include <tmmintrin.h>
29
+ #include "SIMD128-config.h"
30
+ #if defined(KeccakP1600times2_useXOP)
31
+ #include <x86intrin.h>
32
+ #endif
33
+ #include "align.h"
34
+ #include "KeccakP-1600-times2-SnP.h"
35
+
36
+ #include "brg_endian.h"
37
+ #if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN)
38
+ #error Expecting a little-endian platform
39
+ #endif
40
+
41
+ typedef __m128i V128;
42
+
43
+ #define laneIndex(instanceIndex, lanePosition) ((lanePosition)*2 + instanceIndex)
44
+
45
+ #if defined(KeccakP1600times2_useSSE)
46
+ #define ANDnu128(a, b) _mm_andnot_si128(a, b)
47
+ #define CONST128(a) _mm_load_si128((const V128 *)&(a))
48
+ #define LOAD128(a) _mm_load_si128((const V128 *)&(a))
49
+ #define LOAD128u(a) _mm_loadu_si128((const V128 *)&(a))
50
+ #define LOAD6464(a, b) _mm_set_epi64x(a, b)
51
+ #define CONST128_64(a) _mm_set1_epi64x(a)
52
+ #if defined(KeccakP1600times2_useXOP)
53
+ #define ROL64in128(a, o) _mm_roti_epi64(a, o)
54
+ #define ROL64in128_8(a) ROL64in128(a, 8)
55
+ #define ROL64in128_56(a) ROL64in128(a, 56)
56
+ #else
57
+ #define ROL64in128(a, o) _mm_or_si128(_mm_slli_epi64(a, o), _mm_srli_epi64(a, 64-(o)))
58
+ #define ROL64in128_8(a) _mm_shuffle_epi8(a, CONST128(rho8))
59
+ #define ROL64in128_56(a) _mm_shuffle_epi8(a, CONST128(rho56))
60
+ static const uint64_t rho8[2] = {0x0605040302010007, 0x0E0D0C0B0A09080F};
61
+ static const uint64_t rho56[2] = {0x0007060504030201, 0x080F0E0D0C0B0A09};
62
+ #endif
63
+ #define STORE128(a, b) _mm_store_si128((V128 *)&(a), b)
64
+ #define STORE128u(a, b) _mm_storeu_si128((V128 *)&(a), b)
65
+ #define STORE64L(a, b) _mm_storel_epi64((__m128i *)&(a), b)
66
+ #define STORE64H(a, b) _mm_storeh_pi((__m64 *)&(a), _mm_castsi128_ps(b))
67
+ #define XOR128(a, b) _mm_xor_si128(a, b)
68
+ #define XOReq128(a, b) a = _mm_xor_si128(a, b)
69
+ #define ZERO128() _mm_setzero_si128()
70
+ #if defined(KeccakP1600times2_useSSE2)
71
+ #define UNPACKL( a, b ) _mm_unpacklo_epi64((a), (b))
72
+ #define UNPACKH( a, b ) _mm_unpackhi_epi64((a), (b))
73
+ #endif
74
+ #endif
75
+
76
+ #define SnP_laneLengthInBytes 8
77
+
78
+ void KeccakP1600times2_InitializeAll(void *states)
79
+ {
80
+ memset(states, 0, KeccakP1600times2_statesSizeInBytes);
81
+ }
82
+
83
+ void KeccakP1600times2_AddBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length)
84
+ {
85
+ unsigned int sizeLeft = length;
86
+ unsigned int lanePosition = offset/SnP_laneLengthInBytes;
87
+ unsigned int offsetInLane = offset%SnP_laneLengthInBytes;
88
+ const unsigned char *curData = data;
89
+ uint64_t *statesAsLanes = (uint64_t *)states;
90
+
91
+ if ((sizeLeft > 0) && (offsetInLane != 0)) {
92
+ unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
93
+ uint64_t lane = 0;
94
+ if (bytesInLane > sizeLeft)
95
+ bytesInLane = sizeLeft;
96
+ memcpy((unsigned char*)&lane + offsetInLane, curData, bytesInLane);
97
+ statesAsLanes[laneIndex(instanceIndex, lanePosition)] ^= lane;
98
+ sizeLeft -= bytesInLane;
99
+ lanePosition++;
100
+ curData += bytesInLane;
101
+ }
102
+
103
+ while(sizeLeft >= SnP_laneLengthInBytes) {
104
+ uint64_t lane = *((const uint64_t*)curData);
105
+ statesAsLanes[laneIndex(instanceIndex, lanePosition)] ^= lane;
106
+ sizeLeft -= SnP_laneLengthInBytes;
107
+ lanePosition++;
108
+ curData += SnP_laneLengthInBytes;
109
+ }
110
+
111
+ if (sizeLeft > 0) {
112
+ uint64_t lane = 0;
113
+ memcpy(&lane, curData, sizeLeft);
114
+ statesAsLanes[laneIndex(instanceIndex, lanePosition)] ^= lane;
115
+ }
116
+ }
117
+
118
+ void KeccakP1600times2_AddLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
119
+ {
120
+ V128 *stateAsLanes = (V128 *)states;
121
+ unsigned int i;
122
+ const uint64_t *curData0 = (const uint64_t *)data;
123
+ const uint64_t *curData1 = (const uint64_t *)(data+laneOffset*SnP_laneLengthInBytes);
124
+ #define XOR_In( argIndex ) XOReq128( stateAsLanes[argIndex], LOAD6464(curData1[argIndex], curData0[argIndex]))
125
+ if ( laneCount >= 17 ) {
126
+ XOR_In( 0 );
127
+ XOR_In( 1 );
128
+ XOR_In( 2 );
129
+ XOR_In( 3 );
130
+ XOR_In( 4 );
131
+ XOR_In( 5 );
132
+ XOR_In( 6 );
133
+ XOR_In( 7 );
134
+ XOR_In( 8 );
135
+ XOR_In( 9 );
136
+ XOR_In( 10 );
137
+ XOR_In( 11 );
138
+ XOR_In( 12 );
139
+ XOR_In( 13 );
140
+ XOR_In( 14 );
141
+ XOR_In( 15 );
142
+ XOR_In( 16 );
143
+ if ( laneCount >= 21 ) {
144
+ XOR_In( 17 );
145
+ XOR_In( 18 );
146
+ XOR_In( 19 );
147
+ XOR_In( 20 );
148
+ for(i=21; i<laneCount; i++)
149
+ XOR_In( i );
150
+ }
151
+ else {
152
+ for(i=17; i<laneCount; i++)
153
+ XOR_In( i );
154
+ }
155
+ }
156
+ else {
157
+ for(i=0; i<laneCount; i++)
158
+ XOR_In( i );
159
+ }
160
+ #undef XOR_In
161
+ }
162
+
163
+ void KeccakP1600times2_OverwriteBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length)
164
+ {
165
+ unsigned int sizeLeft = length;
166
+ unsigned int lanePosition = offset/SnP_laneLengthInBytes;
167
+ unsigned int offsetInLane = offset%SnP_laneLengthInBytes;
168
+ const unsigned char *curData = data;
169
+ uint64_t *statesAsLanes = (uint64_t *)states;
170
+
171
+ if ((sizeLeft > 0) && (offsetInLane != 0)) {
172
+ unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
173
+ if (bytesInLane > sizeLeft)
174
+ bytesInLane = sizeLeft;
175
+ memcpy( ((unsigned char *)&statesAsLanes[laneIndex(instanceIndex, lanePosition)]) + offsetInLane, curData, bytesInLane);
176
+ sizeLeft -= bytesInLane;
177
+ lanePosition++;
178
+ curData += bytesInLane;
179
+ }
180
+
181
+ while(sizeLeft >= SnP_laneLengthInBytes) {
182
+ uint64_t lane = *((const uint64_t*)curData);
183
+ statesAsLanes[laneIndex(instanceIndex, lanePosition)] = lane;
184
+ sizeLeft -= SnP_laneLengthInBytes;
185
+ lanePosition++;
186
+ curData += SnP_laneLengthInBytes;
187
+ }
188
+
189
+ if (sizeLeft > 0) {
190
+ memcpy(&statesAsLanes[laneIndex(instanceIndex, lanePosition)], curData, sizeLeft);
191
+ }
192
+ }
193
+
194
+ void KeccakP1600times2_OverwriteLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
195
+ {
196
+ V128 *stateAsLanes = (V128 *)states;
197
+ unsigned int i;
198
+ const uint64_t *curData0 = (const uint64_t *)data;
199
+ const uint64_t *curData1 = (const uint64_t *)(data+laneOffset*SnP_laneLengthInBytes);
200
+ #define OverWr( argIndex ) STORE128(stateAsLanes[argIndex], LOAD6464(curData1[argIndex], curData0[argIndex]))
201
+ if ( laneCount >= 17 ) {
202
+ OverWr( 0 );
203
+ OverWr( 1 );
204
+ OverWr( 2 );
205
+ OverWr( 3 );
206
+ OverWr( 4 );
207
+ OverWr( 5 );
208
+ OverWr( 6 );
209
+ OverWr( 7 );
210
+ OverWr( 8 );
211
+ OverWr( 9 );
212
+ OverWr( 10 );
213
+ OverWr( 11 );
214
+ OverWr( 12 );
215
+ OverWr( 13 );
216
+ OverWr( 14 );
217
+ OverWr( 15 );
218
+ OverWr( 16 );
219
+ if ( laneCount >= 21 ) {
220
+ OverWr( 17 );
221
+ OverWr( 18 );
222
+ OverWr( 19 );
223
+ OverWr( 20 );
224
+ for(i=21; i<laneCount; i++)
225
+ OverWr( i );
226
+ }
227
+ else {
228
+ for(i=17; i<laneCount; i++)
229
+ OverWr( i );
230
+ }
231
+ }
232
+ else {
233
+ for(i=0; i<laneCount; i++)
234
+ OverWr( i );
235
+ }
236
+ #undef OverWr
237
+ }
238
+
239
+ void KeccakP1600times2_OverwriteWithZeroes(void *states, unsigned int instanceIndex, unsigned int byteCount)
240
+ {
241
+ unsigned int sizeLeft = byteCount;
242
+ unsigned int lanePosition = 0;
243
+ uint64_t *statesAsLanes = (uint64_t *)states;
244
+
245
+ while(sizeLeft >= SnP_laneLengthInBytes) {
246
+ statesAsLanes[laneIndex(instanceIndex, lanePosition)] = 0;
247
+ sizeLeft -= SnP_laneLengthInBytes;
248
+ lanePosition++;
249
+ }
250
+
251
+ if (sizeLeft > 0) {
252
+ memset(&statesAsLanes[laneIndex(instanceIndex, lanePosition)], 0, sizeLeft);
253
+ }
254
+ }
255
+
256
+ void KeccakP1600times2_ExtractBytes(const void *states, unsigned int instanceIndex, unsigned char *data, unsigned int offset, unsigned int length)
257
+ {
258
+ unsigned int sizeLeft = length;
259
+ unsigned int lanePosition = offset/SnP_laneLengthInBytes;
260
+ unsigned int offsetInLane = offset%SnP_laneLengthInBytes;
261
+ unsigned char *curData = data;
262
+ const uint64_t *statesAsLanes = (const uint64_t *)states;
263
+
264
+ if ((sizeLeft > 0) && (offsetInLane != 0)) {
265
+ unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
266
+ if (bytesInLane > sizeLeft)
267
+ bytesInLane = sizeLeft;
268
+ memcpy( curData, ((unsigned char *)&statesAsLanes[laneIndex(instanceIndex, lanePosition)]) + offsetInLane, bytesInLane);
269
+ sizeLeft -= bytesInLane;
270
+ lanePosition++;
271
+ curData += bytesInLane;
272
+ }
273
+
274
+ while(sizeLeft >= SnP_laneLengthInBytes) {
275
+ *(uint64_t*)curData = statesAsLanes[laneIndex(instanceIndex, lanePosition)];
276
+ sizeLeft -= SnP_laneLengthInBytes;
277
+ lanePosition++;
278
+ curData += SnP_laneLengthInBytes;
279
+ }
280
+
281
+ if (sizeLeft > 0) {
282
+ memcpy( curData, &statesAsLanes[laneIndex(instanceIndex, lanePosition)], sizeLeft);
283
+ }
284
+ }
285
+
286
+ void KeccakP1600times2_ExtractLanesAll(const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
287
+ {
288
+ const V128 *stateAsLanes = (const V128 *)states;
289
+ V128 lanes;
290
+ unsigned int i;
291
+ uint64_t *curData0 = (uint64_t *)data;
292
+ uint64_t *curData1 = (uint64_t *)(data+laneOffset*SnP_laneLengthInBytes);
293
+
294
+ #define Extr( argIndex ) lanes = LOAD128( stateAsLanes[argIndex] ), \
295
+ STORE64L( curData0[argIndex], lanes ), \
296
+ STORE64H( curData1[argIndex], lanes )
297
+
298
+ #if defined(KeccakP1600times2_useSSE2)
299
+ #define Extr2( argIndex ) lanes0 = LOAD128( stateAsLanes[argIndex] ), \
300
+ lanes1 = LOAD128( stateAsLanes[(argIndex)+1] ), \
301
+ lanes = UNPACKL( lanes0, lanes1 ), \
302
+ lanes0 = UNPACKH( lanes0, lanes1 ), \
303
+ STORE128u( *(V128*)&curData0[argIndex], lanes ), \
304
+ STORE128u( *(V128*)&curData1[argIndex], lanes0 )
305
+ if ( laneCount >= 16 ) {
306
+ V128 lanes0, lanes1;
307
+ Extr2( 0 );
308
+ Extr2( 2 );
309
+ Extr2( 4 );
310
+ Extr2( 6 );
311
+ Extr2( 8 );
312
+ Extr2( 10 );
313
+ Extr2( 12 );
314
+ Extr2( 14 );
315
+ if ( laneCount >= 20 ) {
316
+ Extr2( 16 );
317
+ Extr2( 18 );
318
+ for(i=20; i<laneCount; i++)
319
+ Extr( i );
320
+ }
321
+ else {
322
+ for(i=16; i<laneCount; i++)
323
+ Extr( i );
324
+ }
325
+ }
326
+ #undef Extr2
327
+ #else
328
+ if ( laneCount >= 17 ) {
329
+ Extr( 0 );
330
+ Extr( 1 );
331
+ Extr( 2 );
332
+ Extr( 3 );
333
+ Extr( 4 );
334
+ Extr( 5 );
335
+ Extr( 6 );
336
+ Extr( 7 );
337
+ Extr( 8 );
338
+ Extr( 9 );
339
+ Extr( 10 );
340
+ Extr( 11 );
341
+ Extr( 12 );
342
+ Extr( 13 );
343
+ Extr( 14 );
344
+ Extr( 15 );
345
+ Extr( 16 );
346
+ if ( laneCount >= 21 ) {
347
+ Extr( 17 );
348
+ Extr( 18 );
349
+ Extr( 19 );
350
+ Extr( 20 );
351
+ for(i=21; i<laneCount; i++)
352
+ Extr( i );
353
+ }
354
+ else {
355
+ for(i=17; i<laneCount; i++)
356
+ Extr( i );
357
+ }
358
+ }
359
+ #endif
360
+ else {
361
+ for(i=0; i<laneCount; i++)
362
+ Extr( i );
363
+ }
364
+ #undef Extr
365
+ }
366
+
367
+ void KeccakP1600times2_ExtractAndAddBytes(const void *states, unsigned int instanceIndex, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
368
+ {
369
+ unsigned int sizeLeft = length;
370
+ unsigned int lanePosition = offset/SnP_laneLengthInBytes;
371
+ unsigned int offsetInLane = offset%SnP_laneLengthInBytes;
372
+ const unsigned char *curInput = input;
373
+ unsigned char *curOutput = output;
374
+ const uint64_t *statesAsLanes = (const uint64_t *)states;
375
+
376
+ if ((sizeLeft > 0) && (offsetInLane != 0)) {
377
+ unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
378
+ uint64_t lane = statesAsLanes[laneIndex(instanceIndex, lanePosition)] >> (8 * offsetInLane);
379
+ if (bytesInLane > sizeLeft)
380
+ bytesInLane = sizeLeft;
381
+ sizeLeft -= bytesInLane;
382
+ do {
383
+ *(curOutput++) = *(curInput++) ^ (unsigned char)lane;
384
+ lane >>= 8;
385
+ } while ( --bytesInLane != 0);
386
+ lanePosition++;
387
+ }
388
+
389
+ while(sizeLeft >= SnP_laneLengthInBytes) {
390
+ *((uint64_t*)curOutput) = *((uint64_t*)curInput) ^ statesAsLanes[laneIndex(instanceIndex, lanePosition)];
391
+ sizeLeft -= SnP_laneLengthInBytes;
392
+ lanePosition++;
393
+ curInput += SnP_laneLengthInBytes;
394
+ curOutput += SnP_laneLengthInBytes;
395
+ }
396
+
397
+ if (sizeLeft != 0) {
398
+ uint64_t lane = statesAsLanes[laneIndex(instanceIndex, lanePosition)];
399
+ do {
400
+ *(curOutput++) = *(curInput++) ^ (unsigned char)lane;
401
+ lane >>= 8;
402
+ } while ( --sizeLeft != 0);
403
+ }
404
+ }
405
+
406
+ void KeccakP1600times2_ExtractAndAddLanesAll(const void *states, const unsigned char *input, unsigned char *output, unsigned int laneCount, unsigned int laneOffset)
407
+ {
408
+ const uint64_t *stateAsLanes = (const uint64_t *)states;
409
+ unsigned int i;
410
+ const uint64_t *curInput0 = (uint64_t *)input;
411
+ const uint64_t *curInput1 = (uint64_t *)(input+laneOffset*SnP_laneLengthInBytes);
412
+ uint64_t *curOutput0 = (uint64_t *)output;
413
+ uint64_t *curOutput1 = (uint64_t *)(output+laneOffset*SnP_laneLengthInBytes);
414
+
415
+ #define ExtrXOR( argIndex ) curOutput0[argIndex] = curInput0[argIndex] ^ stateAsLanes[2*(argIndex)], curOutput1[argIndex] = curInput1[argIndex] ^ stateAsLanes[2*(argIndex)+1]
416
+
417
+ if ( laneCount >= 17 ) {
418
+ ExtrXOR( 0 );
419
+ ExtrXOR( 1 );
420
+ ExtrXOR( 2 );
421
+ ExtrXOR( 3 );
422
+ ExtrXOR( 4 );
423
+ ExtrXOR( 5 );
424
+ ExtrXOR( 6 );
425
+ ExtrXOR( 7 );
426
+ ExtrXOR( 8 );
427
+ ExtrXOR( 9 );
428
+ ExtrXOR( 10 );
429
+ ExtrXOR( 11 );
430
+ ExtrXOR( 12 );
431
+ ExtrXOR( 13 );
432
+ ExtrXOR( 14 );
433
+ ExtrXOR( 15 );
434
+ ExtrXOR( 16 );
435
+ if ( laneCount >= 21 ) {
436
+ ExtrXOR( 17 );
437
+ ExtrXOR( 18 );
438
+ ExtrXOR( 19 );
439
+ ExtrXOR( 20 );
440
+ for(i=21; i<laneCount; i++)
441
+ ExtrXOR( i );
442
+ }
443
+ else {
444
+ for(i=17; i<laneCount; i++)
445
+ ExtrXOR( i );
446
+ }
447
+ }
448
+ else {
449
+ for(i=0; i<laneCount; i++)
450
+ ExtrXOR( i );
451
+ }
452
+ #undef ExtrXOR
453
+ }
454
+
455
+ #define declareABCDE \
456
+ V128 Aba, Abe, Abi, Abo, Abu; \
457
+ V128 Aga, Age, Agi, Ago, Agu; \
458
+ V128 Aka, Ake, Aki, Ako, Aku; \
459
+ V128 Ama, Ame, Ami, Amo, Amu; \
460
+ V128 Asa, Ase, Asi, Aso, Asu; \
461
+ V128 Bba, Bbe, Bbi, Bbo, Bbu; \
462
+ V128 Bga, Bge, Bgi, Bgo, Bgu; \
463
+ V128 Bka, Bke, Bki, Bko, Bku; \
464
+ V128 Bma, Bme, Bmi, Bmo, Bmu; \
465
+ V128 Bsa, Bse, Bsi, Bso, Bsu; \
466
+ V128 Ca, Ce, Ci, Co, Cu; \
467
+ V128 Da, De, Di, Do, Du; \
468
+ V128 Eba, Ebe, Ebi, Ebo, Ebu; \
469
+ V128 Ega, Ege, Egi, Ego, Egu; \
470
+ V128 Eka, Eke, Eki, Eko, Eku; \
471
+ V128 Ema, Eme, Emi, Emo, Emu; \
472
+ V128 Esa, Ese, Esi, Eso, Esu; \
473
+
474
+ #define prepareTheta \
475
+ Ca = XOR128(Aba, XOR128(Aga, XOR128(Aka, XOR128(Ama, Asa)))); \
476
+ Ce = XOR128(Abe, XOR128(Age, XOR128(Ake, XOR128(Ame, Ase)))); \
477
+ Ci = XOR128(Abi, XOR128(Agi, XOR128(Aki, XOR128(Ami, Asi)))); \
478
+ Co = XOR128(Abo, XOR128(Ago, XOR128(Ako, XOR128(Amo, Aso)))); \
479
+ Cu = XOR128(Abu, XOR128(Agu, XOR128(Aku, XOR128(Amu, Asu)))); \
480
+
481
+ /* --- Theta Rho Pi Chi Iota Prepare-theta */
482
+ /* --- 64-bit lanes mapped to 64-bit words */
483
+ #define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
484
+ Da = XOR128(Cu, ROL64in128(Ce, 1)); \
485
+ De = XOR128(Ca, ROL64in128(Ci, 1)); \
486
+ Di = XOR128(Ce, ROL64in128(Co, 1)); \
487
+ Do = XOR128(Ci, ROL64in128(Cu, 1)); \
488
+ Du = XOR128(Co, ROL64in128(Ca, 1)); \
489
+ \
490
+ XOReq128(A##ba, Da); \
491
+ Bba = A##ba; \
492
+ XOReq128(A##ge, De); \
493
+ Bbe = ROL64in128(A##ge, 44); \
494
+ XOReq128(A##ki, Di); \
495
+ Bbi = ROL64in128(A##ki, 43); \
496
+ E##ba = XOR128(Bba, ANDnu128(Bbe, Bbi)); \
497
+ XOReq128(E##ba, CONST128_64(KeccakF1600RoundConstants[i])); \
498
+ Ca = E##ba; \
499
+ XOReq128(A##mo, Do); \
500
+ Bbo = ROL64in128(A##mo, 21); \
501
+ E##be = XOR128(Bbe, ANDnu128(Bbi, Bbo)); \
502
+ Ce = E##be; \
503
+ XOReq128(A##su, Du); \
504
+ Bbu = ROL64in128(A##su, 14); \
505
+ E##bi = XOR128(Bbi, ANDnu128(Bbo, Bbu)); \
506
+ Ci = E##bi; \
507
+ E##bo = XOR128(Bbo, ANDnu128(Bbu, Bba)); \
508
+ Co = E##bo; \
509
+ E##bu = XOR128(Bbu, ANDnu128(Bba, Bbe)); \
510
+ Cu = E##bu; \
511
+ \
512
+ XOReq128(A##bo, Do); \
513
+ Bga = ROL64in128(A##bo, 28); \
514
+ XOReq128(A##gu, Du); \
515
+ Bge = ROL64in128(A##gu, 20); \
516
+ XOReq128(A##ka, Da); \
517
+ Bgi = ROL64in128(A##ka, 3); \
518
+ E##ga = XOR128(Bga, ANDnu128(Bge, Bgi)); \
519
+ XOReq128(Ca, E##ga); \
520
+ XOReq128(A##me, De); \
521
+ Bgo = ROL64in128(A##me, 45); \
522
+ E##ge = XOR128(Bge, ANDnu128(Bgi, Bgo)); \
523
+ XOReq128(Ce, E##ge); \
524
+ XOReq128(A##si, Di); \
525
+ Bgu = ROL64in128(A##si, 61); \
526
+ E##gi = XOR128(Bgi, ANDnu128(Bgo, Bgu)); \
527
+ XOReq128(Ci, E##gi); \
528
+ E##go = XOR128(Bgo, ANDnu128(Bgu, Bga)); \
529
+ XOReq128(Co, E##go); \
530
+ E##gu = XOR128(Bgu, ANDnu128(Bga, Bge)); \
531
+ XOReq128(Cu, E##gu); \
532
+ \
533
+ XOReq128(A##be, De); \
534
+ Bka = ROL64in128(A##be, 1); \
535
+ XOReq128(A##gi, Di); \
536
+ Bke = ROL64in128(A##gi, 6); \
537
+ XOReq128(A##ko, Do); \
538
+ Bki = ROL64in128(A##ko, 25); \
539
+ E##ka = XOR128(Bka, ANDnu128(Bke, Bki)); \
540
+ XOReq128(Ca, E##ka); \
541
+ XOReq128(A##mu, Du); \
542
+ Bko = ROL64in128_8(A##mu); \
543
+ E##ke = XOR128(Bke, ANDnu128(Bki, Bko)); \
544
+ XOReq128(Ce, E##ke); \
545
+ XOReq128(A##sa, Da); \
546
+ Bku = ROL64in128(A##sa, 18); \
547
+ E##ki = XOR128(Bki, ANDnu128(Bko, Bku)); \
548
+ XOReq128(Ci, E##ki); \
549
+ E##ko = XOR128(Bko, ANDnu128(Bku, Bka)); \
550
+ XOReq128(Co, E##ko); \
551
+ E##ku = XOR128(Bku, ANDnu128(Bka, Bke)); \
552
+ XOReq128(Cu, E##ku); \
553
+ \
554
+ XOReq128(A##bu, Du); \
555
+ Bma = ROL64in128(A##bu, 27); \
556
+ XOReq128(A##ga, Da); \
557
+ Bme = ROL64in128(A##ga, 36); \
558
+ XOReq128(A##ke, De); \
559
+ Bmi = ROL64in128(A##ke, 10); \
560
+ E##ma = XOR128(Bma, ANDnu128(Bme, Bmi)); \
561
+ XOReq128(Ca, E##ma); \
562
+ XOReq128(A##mi, Di); \
563
+ Bmo = ROL64in128(A##mi, 15); \
564
+ E##me = XOR128(Bme, ANDnu128(Bmi, Bmo)); \
565
+ XOReq128(Ce, E##me); \
566
+ XOReq128(A##so, Do); \
567
+ Bmu = ROL64in128_56(A##so); \
568
+ E##mi = XOR128(Bmi, ANDnu128(Bmo, Bmu)); \
569
+ XOReq128(Ci, E##mi); \
570
+ E##mo = XOR128(Bmo, ANDnu128(Bmu, Bma)); \
571
+ XOReq128(Co, E##mo); \
572
+ E##mu = XOR128(Bmu, ANDnu128(Bma, Bme)); \
573
+ XOReq128(Cu, E##mu); \
574
+ \
575
+ XOReq128(A##bi, Di); \
576
+ Bsa = ROL64in128(A##bi, 62); \
577
+ XOReq128(A##go, Do); \
578
+ Bse = ROL64in128(A##go, 55); \
579
+ XOReq128(A##ku, Du); \
580
+ Bsi = ROL64in128(A##ku, 39); \
581
+ E##sa = XOR128(Bsa, ANDnu128(Bse, Bsi)); \
582
+ XOReq128(Ca, E##sa); \
583
+ XOReq128(A##ma, Da); \
584
+ Bso = ROL64in128(A##ma, 41); \
585
+ E##se = XOR128(Bse, ANDnu128(Bsi, Bso)); \
586
+ XOReq128(Ce, E##se); \
587
+ XOReq128(A##se, De); \
588
+ Bsu = ROL64in128(A##se, 2); \
589
+ E##si = XOR128(Bsi, ANDnu128(Bso, Bsu)); \
590
+ XOReq128(Ci, E##si); \
591
+ E##so = XOR128(Bso, ANDnu128(Bsu, Bsa)); \
592
+ XOReq128(Co, E##so); \
593
+ E##su = XOR128(Bsu, ANDnu128(Bsa, Bse)); \
594
+ XOReq128(Cu, E##su); \
595
+ \
596
+
597
+ /* --- Theta Rho Pi Chi Iota */
598
+ /* --- 64-bit lanes mapped to 64-bit words */
599
+ #define thetaRhoPiChiIota(i, A, E) \
600
+ Da = XOR128(Cu, ROL64in128(Ce, 1)); \
601
+ De = XOR128(Ca, ROL64in128(Ci, 1)); \
602
+ Di = XOR128(Ce, ROL64in128(Co, 1)); \
603
+ Do = XOR128(Ci, ROL64in128(Cu, 1)); \
604
+ Du = XOR128(Co, ROL64in128(Ca, 1)); \
605
+ \
606
+ XOReq128(A##ba, Da); \
607
+ Bba = A##ba; \
608
+ XOReq128(A##ge, De); \
609
+ Bbe = ROL64in128(A##ge, 44); \
610
+ XOReq128(A##ki, Di); \
611
+ Bbi = ROL64in128(A##ki, 43); \
612
+ E##ba = XOR128(Bba, ANDnu128(Bbe, Bbi)); \
613
+ XOReq128(E##ba, CONST128_64(KeccakF1600RoundConstants[i])); \
614
+ XOReq128(A##mo, Do); \
615
+ Bbo = ROL64in128(A##mo, 21); \
616
+ E##be = XOR128(Bbe, ANDnu128(Bbi, Bbo)); \
617
+ XOReq128(A##su, Du); \
618
+ Bbu = ROL64in128(A##su, 14); \
619
+ E##bi = XOR128(Bbi, ANDnu128(Bbo, Bbu)); \
620
+ E##bo = XOR128(Bbo, ANDnu128(Bbu, Bba)); \
621
+ E##bu = XOR128(Bbu, ANDnu128(Bba, Bbe)); \
622
+ \
623
+ XOReq128(A##bo, Do); \
624
+ Bga = ROL64in128(A##bo, 28); \
625
+ XOReq128(A##gu, Du); \
626
+ Bge = ROL64in128(A##gu, 20); \
627
+ XOReq128(A##ka, Da); \
628
+ Bgi = ROL64in128(A##ka, 3); \
629
+ E##ga = XOR128(Bga, ANDnu128(Bge, Bgi)); \
630
+ XOReq128(A##me, De); \
631
+ Bgo = ROL64in128(A##me, 45); \
632
+ E##ge = XOR128(Bge, ANDnu128(Bgi, Bgo)); \
633
+ XOReq128(A##si, Di); \
634
+ Bgu = ROL64in128(A##si, 61); \
635
+ E##gi = XOR128(Bgi, ANDnu128(Bgo, Bgu)); \
636
+ E##go = XOR128(Bgo, ANDnu128(Bgu, Bga)); \
637
+ E##gu = XOR128(Bgu, ANDnu128(Bga, Bge)); \
638
+ \
639
+ XOReq128(A##be, De); \
640
+ Bka = ROL64in128(A##be, 1); \
641
+ XOReq128(A##gi, Di); \
642
+ Bke = ROL64in128(A##gi, 6); \
643
+ XOReq128(A##ko, Do); \
644
+ Bki = ROL64in128(A##ko, 25); \
645
+ E##ka = XOR128(Bka, ANDnu128(Bke, Bki)); \
646
+ XOReq128(A##mu, Du); \
647
+ Bko = ROL64in128_8(A##mu); \
648
+ E##ke = XOR128(Bke, ANDnu128(Bki, Bko)); \
649
+ XOReq128(A##sa, Da); \
650
+ Bku = ROL64in128(A##sa, 18); \
651
+ E##ki = XOR128(Bki, ANDnu128(Bko, Bku)); \
652
+ E##ko = XOR128(Bko, ANDnu128(Bku, Bka)); \
653
+ E##ku = XOR128(Bku, ANDnu128(Bka, Bke)); \
654
+ \
655
+ XOReq128(A##bu, Du); \
656
+ Bma = ROL64in128(A##bu, 27); \
657
+ XOReq128(A##ga, Da); \
658
+ Bme = ROL64in128(A##ga, 36); \
659
+ XOReq128(A##ke, De); \
660
+ Bmi = ROL64in128(A##ke, 10); \
661
+ E##ma = XOR128(Bma, ANDnu128(Bme, Bmi)); \
662
+ XOReq128(A##mi, Di); \
663
+ Bmo = ROL64in128(A##mi, 15); \
664
+ E##me = XOR128(Bme, ANDnu128(Bmi, Bmo)); \
665
+ XOReq128(A##so, Do); \
666
+ Bmu = ROL64in128_56(A##so); \
667
+ E##mi = XOR128(Bmi, ANDnu128(Bmo, Bmu)); \
668
+ E##mo = XOR128(Bmo, ANDnu128(Bmu, Bma)); \
669
+ E##mu = XOR128(Bmu, ANDnu128(Bma, Bme)); \
670
+ \
671
+ XOReq128(A##bi, Di); \
672
+ Bsa = ROL64in128(A##bi, 62); \
673
+ XOReq128(A##go, Do); \
674
+ Bse = ROL64in128(A##go, 55); \
675
+ XOReq128(A##ku, Du); \
676
+ Bsi = ROL64in128(A##ku, 39); \
677
+ E##sa = XOR128(Bsa, ANDnu128(Bse, Bsi)); \
678
+ XOReq128(A##ma, Da); \
679
+ Bso = ROL64in128(A##ma, 41); \
680
+ E##se = XOR128(Bse, ANDnu128(Bsi, Bso)); \
681
+ XOReq128(A##se, De); \
682
+ Bsu = ROL64in128(A##se, 2); \
683
+ E##si = XOR128(Bsi, ANDnu128(Bso, Bsu)); \
684
+ E##so = XOR128(Bso, ANDnu128(Bsu, Bsa)); \
685
+ E##su = XOR128(Bsu, ANDnu128(Bsa, Bse)); \
686
+ \
687
+
688
+ static ALIGN(KeccakP1600times2_statesAlignment) const uint64_t KeccakF1600RoundConstants[24] = {
689
+ 0x0000000000000001ULL,
690
+ 0x0000000000008082ULL,
691
+ 0x800000000000808aULL,
692
+ 0x8000000080008000ULL,
693
+ 0x000000000000808bULL,
694
+ 0x0000000080000001ULL,
695
+ 0x8000000080008081ULL,
696
+ 0x8000000000008009ULL,
697
+ 0x000000000000008aULL,
698
+ 0x0000000000000088ULL,
699
+ 0x0000000080008009ULL,
700
+ 0x000000008000000aULL,
701
+ 0x000000008000808bULL,
702
+ 0x800000000000008bULL,
703
+ 0x8000000000008089ULL,
704
+ 0x8000000000008003ULL,
705
+ 0x8000000000008002ULL,
706
+ 0x8000000000000080ULL,
707
+ 0x000000000000800aULL,
708
+ 0x800000008000000aULL,
709
+ 0x8000000080008081ULL,
710
+ 0x8000000000008080ULL,
711
+ 0x0000000080000001ULL,
712
+ 0x8000000080008008ULL};
713
+
714
+ #define copyFromState(X, state) \
715
+ X##ba = LOAD128(state[ 0]); \
716
+ X##be = LOAD128(state[ 1]); \
717
+ X##bi = LOAD128(state[ 2]); \
718
+ X##bo = LOAD128(state[ 3]); \
719
+ X##bu = LOAD128(state[ 4]); \
720
+ X##ga = LOAD128(state[ 5]); \
721
+ X##ge = LOAD128(state[ 6]); \
722
+ X##gi = LOAD128(state[ 7]); \
723
+ X##go = LOAD128(state[ 8]); \
724
+ X##gu = LOAD128(state[ 9]); \
725
+ X##ka = LOAD128(state[10]); \
726
+ X##ke = LOAD128(state[11]); \
727
+ X##ki = LOAD128(state[12]); \
728
+ X##ko = LOAD128(state[13]); \
729
+ X##ku = LOAD128(state[14]); \
730
+ X##ma = LOAD128(state[15]); \
731
+ X##me = LOAD128(state[16]); \
732
+ X##mi = LOAD128(state[17]); \
733
+ X##mo = LOAD128(state[18]); \
734
+ X##mu = LOAD128(state[19]); \
735
+ X##sa = LOAD128(state[20]); \
736
+ X##se = LOAD128(state[21]); \
737
+ X##si = LOAD128(state[22]); \
738
+ X##so = LOAD128(state[23]); \
739
+ X##su = LOAD128(state[24]); \
740
+
741
+ #define copyToState(state, X) \
742
+ STORE128(state[ 0], X##ba); \
743
+ STORE128(state[ 1], X##be); \
744
+ STORE128(state[ 2], X##bi); \
745
+ STORE128(state[ 3], X##bo); \
746
+ STORE128(state[ 4], X##bu); \
747
+ STORE128(state[ 5], X##ga); \
748
+ STORE128(state[ 6], X##ge); \
749
+ STORE128(state[ 7], X##gi); \
750
+ STORE128(state[ 8], X##go); \
751
+ STORE128(state[ 9], X##gu); \
752
+ STORE128(state[10], X##ka); \
753
+ STORE128(state[11], X##ke); \
754
+ STORE128(state[12], X##ki); \
755
+ STORE128(state[13], X##ko); \
756
+ STORE128(state[14], X##ku); \
757
+ STORE128(state[15], X##ma); \
758
+ STORE128(state[16], X##me); \
759
+ STORE128(state[17], X##mi); \
760
+ STORE128(state[18], X##mo); \
761
+ STORE128(state[19], X##mu); \
762
+ STORE128(state[20], X##sa); \
763
+ STORE128(state[21], X##se); \
764
+ STORE128(state[22], X##si); \
765
+ STORE128(state[23], X##so); \
766
+ STORE128(state[24], X##su); \
767
+
768
+ #define copyStateVariables(X, Y) \
769
+ X##ba = Y##ba; \
770
+ X##be = Y##be; \
771
+ X##bi = Y##bi; \
772
+ X##bo = Y##bo; \
773
+ X##bu = Y##bu; \
774
+ X##ga = Y##ga; \
775
+ X##ge = Y##ge; \
776
+ X##gi = Y##gi; \
777
+ X##go = Y##go; \
778
+ X##gu = Y##gu; \
779
+ X##ka = Y##ka; \
780
+ X##ke = Y##ke; \
781
+ X##ki = Y##ki; \
782
+ X##ko = Y##ko; \
783
+ X##ku = Y##ku; \
784
+ X##ma = Y##ma; \
785
+ X##me = Y##me; \
786
+ X##mi = Y##mi; \
787
+ X##mo = Y##mo; \
788
+ X##mu = Y##mu; \
789
+ X##sa = Y##sa; \
790
+ X##se = Y##se; \
791
+ X##si = Y##si; \
792
+ X##so = Y##so; \
793
+ X##su = Y##su; \
794
+
795
+ #ifdef KeccakP1600times2_fullUnrolling
796
+ #define FullUnrolling
797
+ #else
798
+ #define Unrolling KeccakP1600times2_unrolling
799
+ #endif
800
+ #include "KeccakP-1600-unrolling.macros"
801
+
802
+ void KeccakP1600times2_PermuteAll_24rounds(void *states)
803
+ {
804
+ V128 *statesAsLanes = (V128 *)states;
805
+ declareABCDE
806
+ #ifndef KeccakP1600times2_fullUnrolling
807
+ unsigned int i;
808
+ #endif
809
+
810
+ copyFromState(A, statesAsLanes)
811
+ rounds24
812
+ copyToState(statesAsLanes, A)
813
+ #if defined(UseMMX)
814
+ _mm_empty();
815
+ #endif
816
+ }
817
+
818
+ void KeccakP1600times2_PermuteAll_12rounds(void *states)
819
+ {
820
+ V128 *statesAsLanes = (V128 *)states;
821
+ declareABCDE
822
+ #ifndef KeccakP1600times2_fullUnrolling
823
+ unsigned int i;
824
+ #endif
825
+
826
+ copyFromState(A, statesAsLanes)
827
+ rounds12
828
+ copyToState(statesAsLanes, A)
829
+ #if defined(UseMMX)
830
+ _mm_empty();
831
+ #endif
832
+ }
833
+
834
+ void KeccakP1600times2_PermuteAll_6rounds(void *states)
835
+ {
836
+ V128 *statesAsLanes = (V128 *)states;
837
+ declareABCDE
838
+ #ifndef KeccakP1600times2_fullUnrolling
839
+ unsigned int i;
840
+ #endif
841
+
842
+ copyFromState(A, statesAsLanes)
843
+ rounds6
844
+ copyToState(statesAsLanes, A)
845
+ #if defined(UseMMX)
846
+ _mm_empty();
847
+ #endif
848
+ }
849
+
850
+ void KeccakP1600times2_PermuteAll_4rounds(void *states)
851
+ {
852
+ V128 *statesAsLanes = (V128 *)states;
853
+ declareABCDE
854
+ #ifndef KeccakP1600times2_fullUnrolling
855
+ unsigned int i;
856
+ #endif
857
+
858
+ copyFromState(A, statesAsLanes)
859
+ rounds4
860
+ copyToState(statesAsLanes, A)
861
+ #if defined(UseMMX)
862
+ _mm_empty();
863
+ #endif
864
+ }
865
+
866
+ size_t KeccakF1600times2_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen)
867
+ {
868
+ if (laneCount == 21) {
869
+ #if 1
870
+ const unsigned char *dataStart = data;
871
+
872
+ while(dataByteLen >= (laneOffsetParallel + laneCount)*8) {
873
+ V128 *stateAsLanes = (V128 *)states;
874
+ const uint64_t *curData0 = (const uint64_t *)data;
875
+ const uint64_t *curData1 = (const uint64_t *)(data+laneOffsetParallel*SnP_laneLengthInBytes);
876
+ #define XOR_In( argIndex ) XOReq128( stateAsLanes[argIndex], LOAD6464(curData1[argIndex], curData0[argIndex]))
877
+ XOR_In( 0 );
878
+ XOR_In( 1 );
879
+ XOR_In( 2 );
880
+ XOR_In( 3 );
881
+ XOR_In( 4 );
882
+ XOR_In( 5 );
883
+ XOR_In( 6 );
884
+ XOR_In( 7 );
885
+ XOR_In( 8 );
886
+ XOR_In( 9 );
887
+ XOR_In( 10 );
888
+ XOR_In( 11 );
889
+ XOR_In( 12 );
890
+ XOR_In( 13 );
891
+ XOR_In( 14 );
892
+ XOR_In( 15 );
893
+ XOR_In( 16 );
894
+ XOR_In( 17 );
895
+ XOR_In( 18 );
896
+ XOR_In( 19 );
897
+ XOR_In( 20 );
898
+ #undef XOR_In
899
+ KeccakP1600times2_PermuteAll_24rounds(states);
900
+ data += laneOffsetSerial*8;
901
+ dataByteLen -= laneOffsetSerial*8;
902
+ }
903
+ return data - dataStart;
904
+ #else
905
+ unsigned int i;
906
+ const unsigned char *dataStart = data;
907
+ const uint64_t *curData0 = (const uint64_t *)data;
908
+ const uint64_t *curData1 = (const uint64_t *)(data+laneOffsetParallel*SnP_laneLengthInBytes);
909
+ V128 *statesAsLanes = (V128 *)states;
910
+ declareABCDE
911
+
912
+ copyFromState(A, statesAsLanes)
913
+ while(dataByteLen >= (laneOffsetParallel + laneCount)*8) {
914
+ #define XOR_In( Xxx, argIndex ) XOReq128( Xxx, LOAD6464(curData1[argIndex], curData0[argIndex]))
915
+ XOR_In( Aba, 0 );
916
+ XOR_In( Abe, 1 );
917
+ XOR_In( Abi, 2 );
918
+ XOR_In( Abo, 3 );
919
+ XOR_In( Abu, 4 );
920
+ XOR_In( Aga, 5 );
921
+ XOR_In( Age, 6 );
922
+ XOR_In( Agi, 7 );
923
+ XOR_In( Ago, 8 );
924
+ XOR_In( Agu, 9 );
925
+ XOR_In( Aka, 10 );
926
+ XOR_In( Ake, 11 );
927
+ XOR_In( Aki, 12 );
928
+ XOR_In( Ako, 13 );
929
+ XOR_In( Aku, 14 );
930
+ XOR_In( Ama, 15 );
931
+ XOR_In( Ame, 16 );
932
+ XOR_In( Ami, 17 );
933
+ XOR_In( Amo, 18 );
934
+ XOR_In( Amu, 19 );
935
+ XOR_In( Asa, 20 );
936
+ #undef XOR_In
937
+ rounds24
938
+ curData0 += laneOffsetSerial;
939
+ curData1 += laneOffsetSerial;
940
+ dataByteLen -= laneOffsetSerial*8;
941
+ }
942
+ copyToState(statesAsLanes, A)
943
+ return (const unsigned char *)curData0 - dataStart;
944
+ #endif
945
+ }
946
+ else {
947
+ const unsigned char *dataStart = data;
948
+
949
+ while(dataByteLen >= (laneOffsetParallel + laneCount)*8) {
950
+ KeccakP1600times2_AddLanesAll(states, data, laneCount, laneOffsetParallel);
951
+ KeccakP1600times2_PermuteAll_24rounds(states);
952
+ data += laneOffsetSerial*8;
953
+ dataByteLen -= laneOffsetSerial*8;
954
+ }
955
+ return data - dataStart;
956
+ }
957
+ }