sleeping_kangaroo12 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (284) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +127 -0
  3. data/ext/Rakefile +73 -0
  4. data/ext/binding/sleeping_kangaroo12.c +39 -0
  5. data/ext/config/xkcp.build +17 -0
  6. data/ext/xkcp/LICENSE +1 -0
  7. data/ext/xkcp/Makefile +15 -0
  8. data/ext/xkcp/Makefile.build +200 -0
  9. data/ext/xkcp/README.markdown +296 -0
  10. data/ext/xkcp/lib/HighLevel.build +143 -0
  11. data/ext/xkcp/lib/LowLevel.build +757 -0
  12. data/ext/xkcp/lib/common/align.h +33 -0
  13. data/ext/xkcp/lib/common/brg_endian.h +143 -0
  14. data/ext/xkcp/lib/high/KangarooTwelve/KangarooTwelve.c +301 -0
  15. data/ext/xkcp/lib/high/KangarooTwelve/KangarooTwelve.h +97 -0
  16. data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.c +81 -0
  17. data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.h +125 -0
  18. data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.c +48 -0
  19. data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.h +79 -0
  20. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.c +81 -0
  21. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.h +73 -0
  22. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.inc +195 -0
  23. data/ext/xkcp/lib/high/Keccak/KeccakSponge.c +111 -0
  24. data/ext/xkcp/lib/high/Keccak/KeccakSponge.h +76 -0
  25. data/ext/xkcp/lib/high/Keccak/KeccakSponge.inc +314 -0
  26. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.c +61 -0
  27. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.h +67 -0
  28. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.inc +128 -0
  29. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.c +93 -0
  30. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.h +599 -0
  31. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.inc +573 -0
  32. data/ext/xkcp/lib/high/Ketje/Ketjev2.c +87 -0
  33. data/ext/xkcp/lib/high/Ketje/Ketjev2.h +88 -0
  34. data/ext/xkcp/lib/high/Ketje/Ketjev2.inc +274 -0
  35. data/ext/xkcp/lib/high/Keyak/Keyakv2.c +132 -0
  36. data/ext/xkcp/lib/high/Keyak/Keyakv2.h +217 -0
  37. data/ext/xkcp/lib/high/Keyak/Keyakv2.inc +81 -0
  38. data/ext/xkcp/lib/high/Keyak/Motorist.inc +953 -0
  39. data/ext/xkcp/lib/high/Kravatte/Kravatte.c +533 -0
  40. data/ext/xkcp/lib/high/Kravatte/Kravatte.h +115 -0
  41. data/ext/xkcp/lib/high/Kravatte/KravatteModes.c +557 -0
  42. data/ext/xkcp/lib/high/Kravatte/KravatteModes.h +247 -0
  43. data/ext/xkcp/lib/high/Xoodyak/Cyclist.h +66 -0
  44. data/ext/xkcp/lib/high/Xoodyak/Cyclist.inc +336 -0
  45. data/ext/xkcp/lib/high/Xoodyak/Xoodyak-parameters.h +26 -0
  46. data/ext/xkcp/lib/high/Xoodyak/Xoodyak.c +55 -0
  47. data/ext/xkcp/lib/high/Xoodyak/Xoodyak.h +35 -0
  48. data/ext/xkcp/lib/high/Xoofff/Xoofff.c +634 -0
  49. data/ext/xkcp/lib/high/Xoofff/Xoofff.h +147 -0
  50. data/ext/xkcp/lib/high/Xoofff/XoofffModes.c +483 -0
  51. data/ext/xkcp/lib/high/Xoofff/XoofffModes.h +241 -0
  52. data/ext/xkcp/lib/high/common/Phases.h +25 -0
  53. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-SnP.h +41 -0
  54. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-armcc.s +1666 -0
  55. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-gcc.s +1655 -0
  56. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-armcc.s +1268 -0
  57. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-gcc.s +1264 -0
  58. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-armcc.s +1178 -0
  59. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-gcc.s +1175 -0
  60. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-armcc.s +1338 -0
  61. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-gcc.s +1336 -0
  62. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-armcc.s +1343 -0
  63. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-gcc.s +1339 -0
  64. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-SnP.h +42 -0
  65. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-armcc.s +823 -0
  66. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-gcc.s +831 -0
  67. data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-SnP.h +31 -0
  68. data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-armv8a-neon.s +540 -0
  69. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-SnP.h +42 -0
  70. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-compact.s +733 -0
  71. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-fast.s +1121 -0
  72. data/ext/xkcp/lib/low/KeccakP-1600/AVX2/KeccakP-1600-AVX2.s +1100 -0
  73. data/ext/xkcp/lib/low/KeccakP-1600/AVX2/KeccakP-1600-SnP.h +52 -0
  74. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-AVX512.c +623 -0
  75. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-SnP.h +47 -0
  76. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u12/KeccakP-1600-AVX512-config.h +6 -0
  77. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u6/KeccakP-1600-AVX512-config.h +6 -0
  78. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/ua/KeccakP-1600-AVX512-config.h +6 -0
  79. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-AVX512.s +1031 -0
  80. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-SnP.h +53 -0
  81. data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-SnP.h +44 -0
  82. data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-XOP.c +476 -0
  83. data/ext/xkcp/lib/low/KeccakP-1600/XOP/u6/KeccakP-1600-XOP-config.h +6 -0
  84. data/ext/xkcp/lib/low/KeccakP-1600/XOP/ua/KeccakP-1600-XOP-config.h +6 -0
  85. data/ext/xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-64.macros +748 -0
  86. data/ext/xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-unrolling.macros +305 -0
  87. data/ext/xkcp/lib/low/KeccakP-1600/compact/KeccakP-1600-SnP.h +40 -0
  88. data/ext/xkcp/lib/low/KeccakP-1600/compact/KeccakP-1600-compact64.c +420 -0
  89. data/ext/xkcp/lib/low/KeccakP-1600/plain-32bits-inplace/KeccakP-1600-SnP.h +43 -0
  90. data/ext/xkcp/lib/low/KeccakP-1600/plain-32bits-inplace/KeccakP-1600-inplace32BI.c +1163 -0
  91. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/KeccakP-1600-SnP.h +54 -0
  92. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/KeccakP-1600-opt64.c +565 -0
  93. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcu6/KeccakP-1600-opt64-config.h +7 -0
  94. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua/KeccakP-1600-opt64-config.h +7 -0
  95. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua-shld/KeccakP-1600-opt64-config.h +8 -0
  96. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/u6/KeccakP-1600-opt64-config.h +6 -0
  97. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/ua/KeccakP-1600-opt64-config.h +6 -0
  98. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-SnP.h +44 -0
  99. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference.h +23 -0
  100. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference32BI.c +625 -0
  101. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-SnP.h +44 -0
  102. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.c +440 -0
  103. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.h +23 -0
  104. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-SnP.h +42 -0
  105. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas.s +1196 -0
  106. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas_Apple.s +1124 -0
  107. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-shld-gas.s +1196 -0
  108. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-armcc.s +1392 -0
  109. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-gcc.s +1394 -0
  110. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-times2-SnP.h +42 -0
  111. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u12/SIMD512-2-config.h +7 -0
  112. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u4/SIMD512-2-config.h +7 -0
  113. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512ufull/SIMD512-2-config.h +7 -0
  114. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SIMD512.c +850 -0
  115. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SnP.h +51 -0
  116. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SIMD128.c +957 -0
  117. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SnP.h +49 -0
  118. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-u2/SIMD128-config.h +8 -0
  119. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-ua/SIMD128-config.h +8 -0
  120. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-u2/SIMD128-config.h +9 -0
  121. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-ua/SIMD128-config.h +9 -0
  122. data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-SnP.h +45 -0
  123. data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-on1.c +37 -0
  124. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SIMD256.c +1321 -0
  125. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SnP.h +55 -0
  126. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u12/SIMD256-config.h +7 -0
  127. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u6/SIMD256-config.h +7 -0
  128. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/ua/SIMD256-config.h +7 -0
  129. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u12/SIMD512-4-config.h +7 -0
  130. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u4/SIMD512-4-config.h +7 -0
  131. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512ufull/SIMD512-4-config.h +7 -0
  132. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SIMD512.c +881 -0
  133. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SnP.h +51 -0
  134. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-SnP.h +45 -0
  135. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-on1.c +37 -0
  136. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-SnP.h +45 -0
  137. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-on2.c +38 -0
  138. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SIMD512.c +1615 -0
  139. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SnP.h +57 -0
  140. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u12/SIMD512-config.h +7 -0
  141. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u4/SIMD512-config.h +7 -0
  142. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/ua/SIMD512-config.h +7 -0
  143. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-SnP.h +45 -0
  144. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-on1.c +37 -0
  145. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-SnP.h +45 -0
  146. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-on2.c +38 -0
  147. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-SnP.h +45 -0
  148. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-on4.c +38 -0
  149. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-SnP.h +41 -0
  150. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-armcc.s +442 -0
  151. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-gcc.s +446 -0
  152. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-armcc.s +419 -0
  153. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-gcc.s +427 -0
  154. data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-SnP.h +41 -0
  155. data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-avr8-fast.s +647 -0
  156. data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-SnP.h +39 -0
  157. data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-compact.c +190 -0
  158. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-SnP.h +43 -0
  159. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.c +412 -0
  160. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.h +23 -0
  161. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-SnP.h +41 -0
  162. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-armcc.s +454 -0
  163. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-gcc.s +458 -0
  164. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-armcc.s +455 -0
  165. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-gcc.s +458 -0
  166. data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-SnP.h +41 -0
  167. data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-avr8-fast.s +728 -0
  168. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-SnP.h +43 -0
  169. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.c +414 -0
  170. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.h +23 -0
  171. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-SnP.h +42 -0
  172. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-armcc.s +527 -0
  173. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-gcc.s +533 -0
  174. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-armcc.s +528 -0
  175. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-gcc.s +534 -0
  176. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-armcc.s +521 -0
  177. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-gcc.s +527 -0
  178. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-armcc.s +517 -0
  179. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-gcc.s +523 -0
  180. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-armcc.s +550 -0
  181. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-gcc.s +556 -0
  182. data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-SnP.h +32 -0
  183. data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-armv8a-neon.s +432 -0
  184. data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-SnP.h +42 -0
  185. data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-avr8-fast.s +929 -0
  186. data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-SnP.h +40 -0
  187. data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-compact.c +244 -0
  188. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-SnP.h +46 -0
  189. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32-bis.macros +184 -0
  190. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.c +454 -0
  191. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.macros +459 -0
  192. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling-bis.macros +83 -0
  193. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling.macros +88 -0
  194. data/ext/xkcp/lib/low/KeccakP-800/plain/lcu2/KeccakP-800-opt32-config.h +7 -0
  195. data/ext/xkcp/lib/low/KeccakP-800/plain/lcua/KeccakP-800-opt32-config.h +7 -0
  196. data/ext/xkcp/lib/low/KeccakP-800/plain/u2/KeccakP-800-opt32-config.h +7 -0
  197. data/ext/xkcp/lib/low/KeccakP-800/plain/ua/KeccakP-800-opt32-config.h +7 -0
  198. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-SnP.h +44 -0
  199. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.c +437 -0
  200. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.h +23 -0
  201. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/Ket.h +57 -0
  202. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-armcc.s +475 -0
  203. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-gcc.s +480 -0
  204. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-armcc.s +590 -0
  205. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-gcc.s +590 -0
  206. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.c +126 -0
  207. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.h +68 -0
  208. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.inc +174 -0
  209. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.c +80 -0
  210. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.h +68 -0
  211. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.inc +142 -0
  212. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-SnP.h +55 -0
  213. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-armcc.s +1086 -0
  214. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-gcc.s +1092 -0
  215. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-armcc.s +721 -0
  216. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-gcc.s +726 -0
  217. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-armcc.s +723 -0
  218. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-gcc.s +729 -0
  219. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-armcc.s +1164 -0
  220. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-gcc.s +1165 -0
  221. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-armcc.s +562 -0
  222. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-gcc.s +563 -0
  223. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-armcc.s +563 -0
  224. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-gcc.s +565 -0
  225. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-SnP.h +55 -0
  226. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-armcc.s +476 -0
  227. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-gcc.s +485 -0
  228. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-armcc.s +362 -0
  229. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-gcc.s +367 -0
  230. data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-SnP.h +43 -0
  231. data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-avr8-u1.s +1341 -0
  232. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SIMD512.c +581 -0
  233. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SnP.h +58 -0
  234. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodyak-full-block-SIMD512.c +332 -0
  235. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SIMD128.c +329 -0
  236. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SnP.h +53 -0
  237. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodyak-full-block-SIMD128.c +355 -0
  238. data/ext/xkcp/lib/low/Xoodoo/Xoodoo.h +79 -0
  239. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-SnP.h +56 -0
  240. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-optimized.c +399 -0
  241. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodyak-full-blocks.c +127 -0
  242. data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-SnP.h +43 -0
  243. data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-reference.c +253 -0
  244. data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SIMD512.c +1044 -0
  245. data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SnP.h +49 -0
  246. data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-SnP.h +45 -0
  247. data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-on1.c +37 -0
  248. data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-ARMv7A.s +1587 -0
  249. data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-SnP.h +48 -0
  250. data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SIMD512.c +1202 -0
  251. data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SnP.h +48 -0
  252. data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SIMD128.c +484 -0
  253. data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SnP.h +44 -0
  254. data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-SnP.h +45 -0
  255. data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-on1.c +37 -0
  256. data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SIMD256.c +939 -0
  257. data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SnP.h +49 -0
  258. data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SIMD512.c +1216 -0
  259. data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SnP.h +48 -0
  260. data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-SnP.h +45 -0
  261. data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-on1.c +37 -0
  262. data/ext/xkcp/lib/low/common/PlSnP-Fallback.inc +290 -0
  263. data/ext/xkcp/lib/low/common/SnP-Relaned.h +141 -0
  264. data/ext/xkcp/support/Build/ExpandProducts.xsl +79 -0
  265. data/ext/xkcp/support/Build/ToGlobalMakefile.xsl +206 -0
  266. data/ext/xkcp/support/Build/ToOneTarget.xsl +89 -0
  267. data/ext/xkcp/support/Build/ToTargetConfigFile.xsl +37 -0
  268. data/ext/xkcp/support/Build/ToTargetMakefile.xsl +298 -0
  269. data/ext/xkcp/support/Build/ToVCXProj.xsl +198 -0
  270. data/ext/xkcp/support/Kernel-PMU/Kernel-pmu.md +133 -0
  271. data/ext/xkcp/support/Kernel-PMU/Makefile +8 -0
  272. data/ext/xkcp/support/Kernel-PMU/enable_arm_pmu.c +129 -0
  273. data/ext/xkcp/support/Kernel-PMU/load-module +1 -0
  274. data/ext/xkcp/util/KeccakSum/KeccakSum.c +394 -0
  275. data/ext/xkcp/util/KeccakSum/base64.c +86 -0
  276. data/ext/xkcp/util/KeccakSum/base64.h +12 -0
  277. data/lib/sleeping_kangaroo12/binding.rb +15 -0
  278. data/lib/sleeping_kangaroo12/build/loader.rb +40 -0
  279. data/lib/sleeping_kangaroo12/build/platform.rb +37 -0
  280. data/lib/sleeping_kangaroo12/build.rb +4 -0
  281. data/lib/sleeping_kangaroo12/digest.rb +103 -0
  282. data/lib/sleeping_kangaroo12/version.rb +5 -0
  283. data/lib/sleeping_kangaroo12.rb +7 -0
  284. metadata +372 -0
@@ -0,0 +1,881 @@
1
+ /*
2
+ The Keccak-p permutations, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche.
3
+
4
+ Implementation by Ronny Van Keer, hereby denoted as "the implementer".
5
+
6
+ For more information, feedback or questions, please refer to the Keccak Team website:
7
+ https://keccak.team/
8
+
9
+ To the extent possible under law, the implementer has waived all copyright
10
+ and related or neighboring rights to the source code in this file.
11
+ http://creativecommons.org/publicdomain/zero/1.0/
12
+
13
+ ---
14
+
15
+ This file implements Keccak-p[1600]×4 in a PlSnP-compatible way.
16
+ Please refer to PlSnP-documentation.h for more details.
17
+
18
+ This implementation comes with KeccakP-1600-times4-SnP.h in the same folder.
19
+ Please refer to LowLevel.build for the exact list of other files it must be combined with.
20
+ */
21
+
22
+ #include <stdio.h>
23
+ #include <stdlib.h>
24
+ #include <string.h>
25
+ #include <stdint.h>
26
+ #include <smmintrin.h>
27
+ #include <wmmintrin.h>
28
+ #include <immintrin.h>
29
+ #include <emmintrin.h>
30
+ #include "align.h"
31
+ #include "KeccakP-1600-times4-SnP.h"
32
+ #include "SIMD512-4-config.h"
33
+
34
+ #include "brg_endian.h"
35
+ #if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN)
36
+ #error Expecting a little-endian platform
37
+ #endif
38
+
39
+ /* Comment the define hereunder when compiling for a CPU with AVX-512 SIMD */
40
+ /*
41
+ * Warning: This code has only been tested on Haswell (AVX2) with SIMULATE_AVX512 defined,
42
+ * errors will occur if we did a bad interpretation of the AVX-512 intrinsics'
43
+ * API or functionality.
44
+ */
45
+ /* #define SIMULATE_AVX512 */
46
+
47
+ #if defined(SIMULATE_AVX512)
48
+
49
+ typedef struct
50
+ {
51
+ uint64_t x[8];
52
+ } __m512i;
53
+
54
+ static __m512i _mm512_xor_si512( __m512i a, __m512i b)
55
+ {
56
+ __m512i r;
57
+ unsigned int i;
58
+
59
+ for ( i = 0; i < 8; ++i )
60
+ r.x[i] = a.x[i] ^ b.x[i];
61
+ return(r);
62
+ }
63
+
64
+ static __m256i _mm256_ternarylogic_epi64(__m256i a, __m256i b, __m256i c, int imm)
65
+ {
66
+
67
+ if (imm == 0x96)
68
+ return _mm256_xor_si256( _mm256_xor_si256( a, b ), c );
69
+ if (imm == 0xD2)
70
+ return _mm256_xor_si256( a, _mm256_andnot_si256(b, c) );
71
+ printf( "_mm256_ternarylogic_epi64( a, b, c, %02X) not implemented!\n", imm );
72
+ exit(1);
73
+ }
74
+
75
+ static __m256i _mm256_rol_epi64(__m256i a, int offset)
76
+ {
77
+ return _mm256_or_si256(_mm256_slli_epi64(a, offset), _mm256_srli_epi64(a, 64-offset));
78
+ }
79
+
80
+ static __m512i _mm512_i32gather_epi64(__m256i idx, const void *p, int scale)
81
+ {
82
+ __m512i r;
83
+ unsigned int i;
84
+ uint32_t offset[8];
85
+
86
+ _mm256_store_si256( (__m256i*)offset, idx );
87
+ for ( i = 0; i < 8; ++i )
88
+ r.x[i] = *(const uint64_t*)((const char*)p + offset[i] * scale);
89
+ return(r);
90
+ }
91
+
92
+ static void _mm256_i32scatter_epi64( void *p, __m128i idx, __m256i value, int scale)
93
+ {
94
+ unsigned int i;
95
+ uint64_t v[4];
96
+ uint32_t offset[4];
97
+
98
+ _mm_store_ps( (float*)offset, (__m128)idx );
99
+ _mm256_store_si256( (__m256i*)v, value );
100
+ for ( i = 0; i < 4; ++i )
101
+ *(uint64_t*)((char*)p + offset[i] * scale) = v[i];
102
+ }
103
+
104
+ static void _mm512_i32scatter_epi64( void *p, __m256i idx, __m512i value, int scale)
105
+ {
106
+ unsigned int i;
107
+ uint32_t offset[8];
108
+
109
+ _mm256_store_si256( (__m256i*)offset, idx );
110
+ for ( i = 0; i < 8; ++i )
111
+ *(uint64_t*)((char*)p + offset[i] * scale) = value.x[i];
112
+ }
113
+
114
+ #endif
115
+
116
+ typedef __m128i V128;
117
+ typedef __m256i V256;
118
+ typedef __m512i V512;
119
+
120
+ #if defined(KeccakP1600times4_useAVX512)
121
+
122
+ #define XOR(a,b) _mm256_xor_si256(a,b)
123
+ #define XOR3(a,b,c) _mm256_ternarylogic_epi64(a,b,c,0x96)
124
+ #define XOR5(a,b,c,d,e) XOR3(XOR3(a,b,c),d,e)
125
+ #define XOR512(a,b) _mm512_xor_si512(a,b)
126
+ #define ROL(a,offset) _mm256_rol_epi64(a,offset)
127
+ #define Chi(a,b,c) _mm256_ternarylogic_epi64(a,b,c,0xD2)
128
+
129
+ #define CONST256_64(a) _mm256_set1_epi64x(a)
130
+ #define LOAD4_32(a,b,c,d) _mm_set_epi32((uint64_t)(a), (uint32_t)(b), (uint32_t)(c), (uint32_t)(d))
131
+ #define LOAD8_32(a,b,c,d,e,f,g,h) _mm256_set_epi32((uint64_t)(a), (uint32_t)(b), (uint32_t)(c), (uint32_t)(d), (uint32_t)(e), (uint32_t)(f), (uint32_t)(g), (uint32_t)(h))
132
+ #define LOAD_GATHER4_64(idx,p) _mm256_i32gather_epi64( (const void*)(p), idx, 8)
133
+ #define LOAD_GATHER8_64(idx,p) _mm512_i32gather_epi64( idx, (const void*)(p), 8)
134
+ #define STORE_SCATTER4_64(p,idx, v) _mm256_i32scatter_epi64( (void*)(p), idx, v, 8)
135
+ #define STORE_SCATTER8_64(p,idx, v) _mm512_i32scatter_epi64( (void*)(p), idx, v, 8)
136
+
137
+ #endif
138
+
139
+ #define laneIndex(instanceIndex, lanePosition) ((lanePosition)*4 + instanceIndex)
140
+ #define SnP_laneLengthInBytes 8
141
+
142
+ void KeccakP1600times4_InitializeAll(void *states)
143
+ {
144
+ memset(states, 0, KeccakP1600times4_statesSizeInBytes);
145
+ }
146
+
147
+ void KeccakP1600times4_AddBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length)
148
+ {
149
+ unsigned int sizeLeft = length;
150
+ unsigned int lanePosition = offset/SnP_laneLengthInBytes;
151
+ unsigned int offsetInLane = offset%SnP_laneLengthInBytes;
152
+ const unsigned char *curData = data;
153
+ uint64_t *statesAsLanes = states;
154
+
155
+ if ((sizeLeft > 0) && (offsetInLane != 0)) {
156
+ unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
157
+ uint64_t lane = 0;
158
+ if (bytesInLane > sizeLeft)
159
+ bytesInLane = sizeLeft;
160
+ memcpy((unsigned char*)&lane + offsetInLane, curData, bytesInLane);
161
+ statesAsLanes[laneIndex(instanceIndex, lanePosition)] ^= lane;
162
+ sizeLeft -= bytesInLane;
163
+ lanePosition++;
164
+ curData += bytesInLane;
165
+ }
166
+
167
+ while(sizeLeft >= SnP_laneLengthInBytes) {
168
+ uint64_t lane = *((const uint64_t*)curData);
169
+ statesAsLanes[laneIndex(instanceIndex, lanePosition)] ^= lane;
170
+ sizeLeft -= SnP_laneLengthInBytes;
171
+ lanePosition++;
172
+ curData += SnP_laneLengthInBytes;
173
+ }
174
+
175
+ if (sizeLeft > 0) {
176
+ uint64_t lane = 0;
177
+ memcpy(&lane, curData, sizeLeft);
178
+ statesAsLanes[laneIndex(instanceIndex, lanePosition)] ^= lane;
179
+ }
180
+ }
181
+
182
+ void KeccakP1600times4_AddLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
183
+ {
184
+ V256 *stateAsLanes256 = states;
185
+ V512 *stateAsLanes512 = states;
186
+ const uint64_t *dataAsLanes = (const uint64_t *)data;
187
+ unsigned int i;
188
+ V256 index512;
189
+ V128 index256;
190
+
191
+ #define Add_In1( argIndex ) stateAsLanes256[argIndex] = XOR(stateAsLanes256[argIndex], LOAD_GATHER4_64(index256, dataAsLanes+argIndex))
192
+ #define Add_In2( argIndex ) stateAsLanes512[argIndex/2] = XOR512(stateAsLanes512[argIndex/2], LOAD_GATHER8_64(index512, dataAsLanes+argIndex))
193
+ index256 = LOAD4_32(3*laneOffset, 2*laneOffset, 1*laneOffset, 0*laneOffset);
194
+ index512 = LOAD8_32(3*laneOffset+1, 2*laneOffset+1, 1*laneOffset+1, 0*laneOffset+1, 3*laneOffset, 2*laneOffset, 1*laneOffset, 0*laneOffset);
195
+ if ( laneCount >= 16 ) {
196
+ Add_In2( 0 );
197
+ Add_In2( 2 );
198
+ Add_In2( 4 );
199
+ Add_In2( 6 );
200
+ Add_In2( 8 );
201
+ Add_In2( 10 );
202
+ Add_In2( 12 );
203
+ Add_In2( 14 );
204
+ if ( laneCount >= 20 ) {
205
+ Add_In2( 16 );
206
+ Add_In2( 18 );
207
+ for(i=20; i<laneCount; i++)
208
+ Add_In1( i );
209
+ }
210
+ else {
211
+ for(i=16; i<laneCount; i++)
212
+ Add_In1( i );
213
+ }
214
+ }
215
+ else {
216
+ for(i=0; i<laneCount; i++)
217
+ Add_In1( i );
218
+ }
219
+ #undef Add_In1
220
+ #undef Add_In2
221
+ }
222
+
223
+ void KeccakP1600times4_OverwriteBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length)
224
+ {
225
+ unsigned int sizeLeft = length;
226
+ unsigned int lanePosition = offset/SnP_laneLengthInBytes;
227
+ unsigned int offsetInLane = offset%SnP_laneLengthInBytes;
228
+ const unsigned char *curData = data;
229
+ uint64_t *statesAsLanes = states;
230
+
231
+ if ((sizeLeft > 0) && (offsetInLane != 0)) {
232
+ unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
233
+ if (bytesInLane > sizeLeft)
234
+ bytesInLane = sizeLeft;
235
+ memcpy( ((unsigned char *)&statesAsLanes[laneIndex(instanceIndex, lanePosition)]) + offsetInLane, curData, bytesInLane);
236
+ sizeLeft -= bytesInLane;
237
+ lanePosition++;
238
+ curData += bytesInLane;
239
+ }
240
+
241
+ while(sizeLeft >= SnP_laneLengthInBytes) {
242
+ uint64_t lane = *((const uint64_t*)curData);
243
+ statesAsLanes[laneIndex(instanceIndex, lanePosition)] = lane;
244
+ sizeLeft -= SnP_laneLengthInBytes;
245
+ lanePosition++;
246
+ curData += SnP_laneLengthInBytes;
247
+ }
248
+
249
+ if (sizeLeft > 0) {
250
+ memcpy(&statesAsLanes[laneIndex(instanceIndex, lanePosition)], curData, sizeLeft);
251
+ }
252
+ }
253
+
254
+ void KeccakP1600times4_OverwriteLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
255
+ {
256
+ V256 *stateAsLanes256 = states;
257
+ V512 *stateAsLanes512 = states;
258
+ const uint64_t *dataAsLanes = (const uint64_t *)data;
259
+ unsigned int i;
260
+ V256 index512;
261
+ V128 index256;
262
+
263
+ #define OverWr1( argIndex ) stateAsLanes256[argIndex] = LOAD_GATHER4_64(index256, dataAsLanes+argIndex)
264
+ #define OverWr2( argIndex ) stateAsLanes512[argIndex/2] = LOAD_GATHER8_64(index512, dataAsLanes+argIndex)
265
+ index256 = LOAD4_32(3*laneOffset, 2*laneOffset, 1*laneOffset, 0*laneOffset);
266
+ index512 = LOAD8_32(3*laneOffset+1, 2*laneOffset+1, 1*laneOffset+1, 0*laneOffset+1, 3*laneOffset, 2*laneOffset, 1*laneOffset, 0*laneOffset);
267
+ if ( laneCount >= 16 ) {
268
+ OverWr2( 0 );
269
+ OverWr2( 2 );
270
+ OverWr2( 4 );
271
+ OverWr2( 6 );
272
+ OverWr2( 8 );
273
+ OverWr2( 10 );
274
+ OverWr2( 12 );
275
+ OverWr2( 14 );
276
+ if ( laneCount >= 20 ) {
277
+ OverWr2( 16 );
278
+ OverWr2( 18 );
279
+ for(i=20; i<laneCount; i++)
280
+ OverWr1( i );
281
+ }
282
+ else {
283
+ for(i=16; i<laneCount; i++)
284
+ OverWr1( i );
285
+ }
286
+ }
287
+ else {
288
+ for(i=0; i<laneCount; i++)
289
+ OverWr1( i );
290
+ }
291
+ #undef OverWr1
292
+ #undef OverWr2
293
+ }
294
+
295
+ void KeccakP1600times4_OverwriteWithZeroes(void *states, unsigned int instanceIndex, unsigned int byteCount)
296
+ {
297
+ unsigned int sizeLeft = byteCount;
298
+ unsigned int lanePosition = 0;
299
+ uint64_t *statesAsLanes = states;
300
+
301
+ while(sizeLeft >= SnP_laneLengthInBytes) {
302
+ statesAsLanes[laneIndex(instanceIndex, lanePosition)] = 0;
303
+ sizeLeft -= SnP_laneLengthInBytes;
304
+ lanePosition++;
305
+ }
306
+
307
+ if (sizeLeft > 0) {
308
+ memset(&statesAsLanes[laneIndex(instanceIndex, lanePosition)], 0, sizeLeft);
309
+ }
310
+ }
311
+
312
+ void KeccakP1600times4_ExtractBytes(const void *states, unsigned int instanceIndex, unsigned char *data, unsigned int offset, unsigned int length)
313
+ {
314
+ unsigned int sizeLeft = length;
315
+ unsigned int lanePosition = offset/SnP_laneLengthInBytes;
316
+ unsigned int offsetInLane = offset%SnP_laneLengthInBytes;
317
+ unsigned char *curData = data;
318
+ const uint64_t *statesAsLanes = states;
319
+
320
+ if ((sizeLeft > 0) && (offsetInLane != 0)) {
321
+ unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
322
+ if (bytesInLane > sizeLeft)
323
+ bytesInLane = sizeLeft;
324
+ memcpy( curData, ((unsigned char *)&statesAsLanes[laneIndex(instanceIndex, lanePosition)]) + offsetInLane, bytesInLane);
325
+ sizeLeft -= bytesInLane;
326
+ lanePosition++;
327
+ curData += bytesInLane;
328
+ }
329
+
330
+ while(sizeLeft >= SnP_laneLengthInBytes) {
331
+ *(uint64_t*)curData = statesAsLanes[laneIndex(instanceIndex, lanePosition)];
332
+ sizeLeft -= SnP_laneLengthInBytes;
333
+ lanePosition++;
334
+ curData += SnP_laneLengthInBytes;
335
+ }
336
+
337
+ if (sizeLeft > 0) {
338
+ memcpy( curData, &statesAsLanes[laneIndex(instanceIndex, lanePosition)], sizeLeft);
339
+ }
340
+ }
341
+
342
+ void KeccakP1600times4_ExtractLanesAll(const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
343
+ {
344
+ const V256 *stateAsLanes256 = states;
345
+ const V512 *stateAsLanes512 = states;
346
+ uint64_t *dataAsLanes = (uint64_t *)data;
347
+ unsigned int i;
348
+ V256 index512;
349
+ V128 index256;
350
+
351
+ #define Extr1( argIndex ) STORE_SCATTER4_64(dataAsLanes+argIndex, index256, stateAsLanes256[argIndex])
352
+ #define Extr2( argIndex ) STORE_SCATTER8_64(dataAsLanes+argIndex, index512, stateAsLanes512[argIndex/2])
353
+ index256 = LOAD4_32(3*laneOffset, 2*laneOffset, 1*laneOffset, 0*laneOffset);
354
+ index512 = LOAD8_32(3*laneOffset+1, 2*laneOffset+1, 1*laneOffset+1, 0*laneOffset+1, 3*laneOffset, 2*laneOffset, 1*laneOffset, 0*laneOffset);
355
+ if ( laneCount >= 16 ) {
356
+ Extr2( 0 );
357
+ Extr2( 2 );
358
+ Extr2( 4 );
359
+ Extr2( 6 );
360
+ Extr2( 8 );
361
+ Extr2( 10 );
362
+ Extr2( 12 );
363
+ Extr2( 14 );
364
+ if ( laneCount >= 20 ) {
365
+ Extr2( 16 );
366
+ Extr2( 18 );
367
+ for(i=20; i<laneCount; i++)
368
+ Extr1( i );
369
+ }
370
+ else {
371
+ for(i=16; i<laneCount; i++)
372
+ Extr1( i );
373
+ }
374
+ }
375
+ else {
376
+ for(i=0; i<laneCount; i++)
377
+ Extr1( i );
378
+ }
379
+ #undef Extr1
380
+ #undef Extr2
381
+ }
382
+
383
+ void KeccakP1600times4_ExtractAndAddBytes(const void *states, unsigned int instanceIndex, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
384
+ {
385
+ unsigned int sizeLeft = length;
386
+ unsigned int lanePosition = offset/SnP_laneLengthInBytes;
387
+ unsigned int offsetInLane = offset%SnP_laneLengthInBytes;
388
+ const unsigned char *curInput = input;
389
+ unsigned char *curOutput = output;
390
+ const uint64_t *statesAsLanes = states;
391
+
392
+ if ((sizeLeft > 0) && (offsetInLane != 0)) {
393
+ unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
394
+ uint64_t lane = statesAsLanes[laneIndex(instanceIndex, lanePosition)] >> (8 * offsetInLane);
395
+ if (bytesInLane > sizeLeft)
396
+ bytesInLane = sizeLeft;
397
+ sizeLeft -= bytesInLane;
398
+ do {
399
+ *(curOutput++) = *(curInput++) ^ (unsigned char)lane;
400
+ lane >>= 8;
401
+ } while ( --bytesInLane != 0);
402
+ lanePosition++;
403
+ }
404
+
405
+ while(sizeLeft >= SnP_laneLengthInBytes) {
406
+ *((uint64_t*)curOutput) = *((uint64_t*)curInput) ^ statesAsLanes[laneIndex(instanceIndex, lanePosition)];
407
+ sizeLeft -= SnP_laneLengthInBytes;
408
+ lanePosition++;
409
+ curInput += SnP_laneLengthInBytes;
410
+ curOutput += SnP_laneLengthInBytes;
411
+ }
412
+
413
+ if (sizeLeft != 0) {
414
+ uint64_t lane = statesAsLanes[laneIndex(instanceIndex, lanePosition)];
415
+ do {
416
+ *(curOutput++) = *(curInput++) ^ (unsigned char)lane;
417
+ lane >>= 8;
418
+ } while ( --sizeLeft != 0);
419
+ }
420
+ }
421
+
422
+ void KeccakP1600times4_ExtractAndAddLanesAll(const void *states, const unsigned char *input, unsigned char *output, unsigned int laneCount, unsigned int laneOffset)
423
+ {
424
+ const V256 *stateAsLanes256 = states;
425
+ const V512 *stateAsLanes512 = states;
426
+ const uint64_t *inAsLanes = (const uint64_t *)input;
427
+ uint64_t *outAsLanes = (uint64_t *)output;
428
+ unsigned int i;
429
+ V256 index512;
430
+ V128 index256;
431
+
432
+ #define ExtrAdd1( argIndex ) STORE_SCATTER4_64(outAsLanes+argIndex, index256, XOR(stateAsLanes256[argIndex], LOAD_GATHER4_64(index256, inAsLanes+argIndex)))
433
+ #define ExtrAdd2( argIndex ) STORE_SCATTER8_64(outAsLanes+argIndex, index512, XOR512(stateAsLanes512[argIndex/2], LOAD_GATHER8_64(index512, inAsLanes+argIndex)))
434
+ index256 = LOAD4_32(3*laneOffset, 2*laneOffset, 1*laneOffset, 0*laneOffset);
435
+ index512 = LOAD8_32(3*laneOffset+1, 2*laneOffset+1, 1*laneOffset+1, 0*laneOffset+1, 3*laneOffset, 2*laneOffset, 1*laneOffset, 0*laneOffset);
436
+
437
+ if ( laneCount >= 16 ) {
438
+ ExtrAdd2( 0 );
439
+ ExtrAdd2( 2 );
440
+ ExtrAdd2( 4 );
441
+ ExtrAdd2( 6 );
442
+ ExtrAdd2( 8 );
443
+ ExtrAdd2( 10 );
444
+ ExtrAdd2( 12 );
445
+ ExtrAdd2( 14 );
446
+ if ( laneCount >= 20 ) {
447
+ ExtrAdd2( 16 );
448
+ ExtrAdd2( 18 );
449
+ for(i=20; i<laneCount; i++)
450
+ ExtrAdd1( i );
451
+ }
452
+ else {
453
+ for(i=16; i<laneCount; i++)
454
+ ExtrAdd1( i );
455
+ }
456
+ }
457
+ else {
458
+ for(i=0; i<laneCount; i++)
459
+ ExtrAdd1( i );
460
+ }
461
+ #undef ExtrAdd1
462
+ #undef ExtrAdd2
463
+
464
+ }
465
+
466
+ static ALIGN(KeccakP1600times4_statesAlignment) const uint64_t KeccakP1600RoundConstants[24] = {
467
+ 0x0000000000000001ULL,
468
+ 0x0000000000008082ULL,
469
+ 0x800000000000808aULL,
470
+ 0x8000000080008000ULL,
471
+ 0x000000000000808bULL,
472
+ 0x0000000080000001ULL,
473
+ 0x8000000080008081ULL,
474
+ 0x8000000000008009ULL,
475
+ 0x000000000000008aULL,
476
+ 0x0000000000000088ULL,
477
+ 0x0000000080008009ULL,
478
+ 0x000000008000000aULL,
479
+ 0x000000008000808bULL,
480
+ 0x800000000000008bULL,
481
+ 0x8000000000008089ULL,
482
+ 0x8000000000008003ULL,
483
+ 0x8000000000008002ULL,
484
+ 0x8000000000000080ULL,
485
+ 0x000000000000800aULL,
486
+ 0x800000008000000aULL,
487
+ 0x8000000080008081ULL,
488
+ 0x8000000000008080ULL,
489
+ 0x0000000080000001ULL,
490
+ 0x8000000080008008ULL};
491
+
492
+ #define KeccakP_DeclareVars \
493
+ V256 _Ba, _Be, _Bi, _Bo, _Bu; \
494
+ V256 _Da, _De, _Di, _Do, _Du; \
495
+ V256 _ba, _be, _bi, _bo, _bu; \
496
+ V256 _ga, _ge, _gi, _go, _gu; \
497
+ V256 _ka, _ke, _ki, _ko, _ku; \
498
+ V256 _ma, _me, _mi, _mo, _mu; \
499
+ V256 _sa, _se, _si, _so, _su
500
+
501
+ #define KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Bb1, _Bb2, _Bb3, _Bb4, _Bb5, _Rr1, _Rr2, _Rr3, _Rr4, _Rr5 ) \
502
+ _Bb1 = XOR(_L1, _Da); \
503
+ _Bb2 = XOR(_L2, _De); \
504
+ _Bb3 = XOR(_L3, _Di); \
505
+ _Bb4 = XOR(_L4, _Do); \
506
+ _Bb5 = XOR(_L5, _Du); \
507
+ if (_Rr1 != 0) _Bb1 = ROL(_Bb1, _Rr1); \
508
+ _Bb2 = ROL(_Bb2, _Rr2); \
509
+ _Bb3 = ROL(_Bb3, _Rr3); \
510
+ _Bb4 = ROL(_Bb4, _Rr4); \
511
+ _Bb5 = ROL(_Bb5, _Rr5); \
512
+ _L1 = Chi( _Ba, _Be, _Bi); \
513
+ _L2 = Chi( _Be, _Bi, _Bo); \
514
+ _L3 = Chi( _Bi, _Bo, _Bu); \
515
+ _L4 = Chi( _Bo, _Bu, _Ba); \
516
+ _L5 = Chi( _Bu, _Ba, _Be);
517
+
518
+ #define KeccakP_ThetaRhoPiChiIota0( _L1, _L2, _L3, _L4, _L5, _rc ) \
519
+ _Ba = XOR5( _ba, _ga, _ka, _ma, _sa ); /* Theta effect */ \
520
+ _Be = XOR5( _be, _ge, _ke, _me, _se ); \
521
+ _Bi = XOR5( _bi, _gi, _ki, _mi, _si ); \
522
+ _Bo = XOR5( _bo, _go, _ko, _mo, _so ); \
523
+ _Bu = XOR5( _bu, _gu, _ku, _mu, _su ); \
524
+ _Da = ROL( _Be, 1 ); \
525
+ _De = ROL( _Bi, 1 ); \
526
+ _Di = ROL( _Bo, 1 ); \
527
+ _Do = ROL( _Bu, 1 ); \
528
+ _Du = ROL( _Ba, 1 ); \
529
+ _Da = XOR( _Da, _Bu ); \
530
+ _De = XOR( _De, _Ba ); \
531
+ _Di = XOR( _Di, _Be ); \
532
+ _Do = XOR( _Do, _Bi ); \
533
+ _Du = XOR( _Du, _Bo ); \
534
+ KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Ba, _Be, _Bi, _Bo, _Bu, 0, 44, 43, 21, 14 ); \
535
+ _L1 = XOR(_L1, _rc) /* Iota */
536
+
537
+ #define KeccakP_ThetaRhoPiChi1( _L1, _L2, _L3, _L4, _L5 ) \
538
+ KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Bi, _Bo, _Bu, _Ba, _Be, 3, 45, 61, 28, 20 )
539
+
540
+ #define KeccakP_ThetaRhoPiChi2( _L1, _L2, _L3, _L4, _L5 ) \
541
+ KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Bu, _Ba, _Be, _Bi, _Bo, 18, 1, 6, 25, 8 )
542
+
543
+ #define KeccakP_ThetaRhoPiChi3( _L1, _L2, _L3, _L4, _L5 ) \
544
+ KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Be, _Bi, _Bo, _Bu, _Ba, 36, 10, 15, 56, 27 )
545
+
546
+ #define KeccakP_ThetaRhoPiChi4( _L1, _L2, _L3, _L4, _L5 ) \
547
+ KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Bo, _Bu, _Ba, _Be, _Bi, 41, 2, 62, 55, 39 )
548
+
549
+ #define KeccakP_4rounds( i ) \
550
+ KeccakP_ThetaRhoPiChiIota0(_ba, _ge, _ki, _mo, _su, CONST256_64(KeccakP1600RoundConstants[i]) ); \
551
+ KeccakP_ThetaRhoPiChi1( _ka, _me, _si, _bo, _gu ); \
552
+ KeccakP_ThetaRhoPiChi2( _sa, _be, _gi, _ko, _mu ); \
553
+ KeccakP_ThetaRhoPiChi3( _ga, _ke, _mi, _so, _bu ); \
554
+ KeccakP_ThetaRhoPiChi4( _ma, _se, _bi, _go, _ku ); \
555
+ \
556
+ KeccakP_ThetaRhoPiChiIota0(_ba, _me, _gi, _so, _ku, CONST256_64(KeccakP1600RoundConstants[i+1]) ); \
557
+ KeccakP_ThetaRhoPiChi1( _sa, _ke, _bi, _mo, _gu ); \
558
+ KeccakP_ThetaRhoPiChi2( _ma, _ge, _si, _ko, _bu ); \
559
+ KeccakP_ThetaRhoPiChi3( _ka, _be, _mi, _go, _su ); \
560
+ KeccakP_ThetaRhoPiChi4( _ga, _se, _ki, _bo, _mu ); \
561
+ \
562
+ KeccakP_ThetaRhoPiChiIota0(_ba, _ke, _si, _go, _mu, CONST256_64(KeccakP1600RoundConstants[i+2]) ); \
563
+ KeccakP_ThetaRhoPiChi1( _ma, _be, _ki, _so, _gu ); \
564
+ KeccakP_ThetaRhoPiChi2( _ga, _me, _bi, _ko, _su ); \
565
+ KeccakP_ThetaRhoPiChi3( _sa, _ge, _mi, _bo, _ku ); \
566
+ KeccakP_ThetaRhoPiChi4( _ka, _se, _gi, _mo, _bu ); \
567
+ \
568
+ KeccakP_ThetaRhoPiChiIota0(_ba, _be, _bi, _bo, _bu, CONST256_64(KeccakP1600RoundConstants[i+3]) ); \
569
+ KeccakP_ThetaRhoPiChi1( _ga, _ge, _gi, _go, _gu ); \
570
+ KeccakP_ThetaRhoPiChi2( _ka, _ke, _ki, _ko, _ku ); \
571
+ KeccakP_ThetaRhoPiChi3( _ma, _me, _mi, _mo, _mu ); \
572
+ KeccakP_ThetaRhoPiChi4( _sa, _se, _si, _so, _su )
573
+
574
+ #define KeccakP_2rounds( i ) \
575
+ KeccakP_ThetaRhoPiChiIota0(_ba, _ke, _si, _go, _mu, CONST256_64(KeccakP1600RoundConstants[i]) ); \
576
+ KeccakP_ThetaRhoPiChi1( _ma, _be, _ki, _so, _gu ); \
577
+ KeccakP_ThetaRhoPiChi2( _ga, _me, _bi, _ko, _su ); \
578
+ KeccakP_ThetaRhoPiChi3( _sa, _ge, _mi, _bo, _ku ); \
579
+ KeccakP_ThetaRhoPiChi4( _ka, _se, _gi, _mo, _bu ); \
580
+ \
581
+ KeccakP_ThetaRhoPiChiIota0(_ba, _be, _bi, _bo, _bu, CONST256_64(KeccakP1600RoundConstants[i+1]) ); \
582
+ KeccakP_ThetaRhoPiChi1( _ga, _ge, _gi, _go, _gu ); \
583
+ KeccakP_ThetaRhoPiChi2( _ka, _ke, _ki, _ko, _ku ); \
584
+ KeccakP_ThetaRhoPiChi3( _ma, _me, _mi, _mo, _mu ); \
585
+ KeccakP_ThetaRhoPiChi4( _sa, _se, _si, _so, _su )
586
+
587
+ #ifdef KeccakP1600times4_fullUnrolling
588
+
589
+ #define rounds12 \
590
+ KeccakP_4rounds( 12 ); \
591
+ KeccakP_4rounds( 16 ); \
592
+ KeccakP_4rounds( 20 )
593
+
594
+ #define rounds24 \
595
+ KeccakP_4rounds( 0 ); \
596
+ KeccakP_4rounds( 4 ); \
597
+ KeccakP_4rounds( 8 ); \
598
+ KeccakP_4rounds( 12 ); \
599
+ KeccakP_4rounds( 16 ); \
600
+ KeccakP_4rounds( 20 )
601
+
602
+ #elif (KeccakP1600times4_unrolling == 4)
603
+
604
+ #define rounds12 \
605
+ i = 12; \
606
+ do { \
607
+ KeccakP_4rounds( i ); \
608
+ } while( (i += 4) < 24 )
609
+
610
+ #define rounds24 \
611
+ i = 0; \
612
+ do { \
613
+ KeccakP_4rounds( i ); \
614
+ } while( (i += 4) < 24 )
615
+
616
+ #elif (KeccakP1600times4_unrolling == 12)
617
+
618
+ #define rounds12 \
619
+ KeccakP_4rounds( 12 ); \
620
+ KeccakP_4rounds( 16 ); \
621
+ KeccakP_4rounds( 20 )
622
+
623
+ #define rounds24 \
624
+ i = 0; \
625
+ do { \
626
+ KeccakP_4rounds( i ); \
627
+ KeccakP_4rounds( i+4 ); \
628
+ KeccakP_4rounds( i+8 ); \
629
+ } while( (i += 12) < 24 )
630
+
631
+ #else
632
+ #error "Unrolling is not correctly specified!"
633
+ #endif
634
+
635
+ #define copyFromState2rounds(pState) \
636
+ _ba = pState[ 0]; \
637
+ _be = pState[16]; /* me */ \
638
+ _bi = pState[ 7]; /* gi */ \
639
+ _bo = pState[23]; /* so */ \
640
+ _bu = pState[14]; /* ku */ \
641
+ _ga = pState[20]; /* sa */ \
642
+ _ge = pState[11]; /* ke */ \
643
+ _gi = pState[ 2]; /* bi */ \
644
+ _go = pState[18]; /* mo */ \
645
+ _gu = pState[ 9]; \
646
+ _ka = pState[15]; /* ma */ \
647
+ _ke = pState[ 6]; /* ge */ \
648
+ _ki = pState[22]; /* si */ \
649
+ _ko = pState[13]; \
650
+ _ku = pState[ 4]; /* bu */ \
651
+ _ma = pState[10]; /* ka */ \
652
+ _me = pState[ 1]; /* be */ \
653
+ _mi = pState[17]; \
654
+ _mo = pState[ 8]; /* go */ \
655
+ _mu = pState[24]; /* su */ \
656
+ _sa = pState[ 5]; /* ga */ \
657
+ _se = pState[21]; \
658
+ _si = pState[12]; /* ki */ \
659
+ _so = pState[ 3]; /* bo */ \
660
+ _su = pState[19] /* mu */
661
+
662
+ #define copyFromState(pState) \
663
+ _ba = pState[ 0]; \
664
+ _be = pState[ 1]; \
665
+ _bi = pState[ 2]; \
666
+ _bo = pState[ 3]; \
667
+ _bu = pState[ 4]; \
668
+ _ga = pState[ 5]; \
669
+ _ge = pState[ 6]; \
670
+ _gi = pState[ 7]; \
671
+ _go = pState[ 8]; \
672
+ _gu = pState[ 9]; \
673
+ _ka = pState[10]; \
674
+ _ke = pState[11]; \
675
+ _ki = pState[12]; \
676
+ _ko = pState[13]; \
677
+ _ku = pState[14]; \
678
+ _ma = pState[15]; \
679
+ _me = pState[16]; \
680
+ _mi = pState[17]; \
681
+ _mo = pState[18]; \
682
+ _mu = pState[19]; \
683
+ _sa = pState[20]; \
684
+ _se = pState[21]; \
685
+ _si = pState[22]; \
686
+ _so = pState[23]; \
687
+ _su = pState[24]
688
+
689
+ #define copyToState(pState) \
690
+ pState[ 0] = _ba; \
691
+ pState[ 1] = _be; \
692
+ pState[ 2] = _bi; \
693
+ pState[ 3] = _bo; \
694
+ pState[ 4] = _bu; \
695
+ pState[ 5] = _ga; \
696
+ pState[ 6] = _ge; \
697
+ pState[ 7] = _gi; \
698
+ pState[ 8] = _go; \
699
+ pState[ 9] = _gu; \
700
+ pState[10] = _ka; \
701
+ pState[11] = _ke; \
702
+ pState[12] = _ki; \
703
+ pState[13] = _ko; \
704
+ pState[14] = _ku; \
705
+ pState[15] = _ma; \
706
+ pState[16] = _me; \
707
+ pState[17] = _mi; \
708
+ pState[18] = _mo; \
709
+ pState[19] = _mu; \
710
+ pState[20] = _sa; \
711
+ pState[21] = _se; \
712
+ pState[22] = _si; \
713
+ pState[23] = _so; \
714
+ pState[24] = _su
715
+
716
+ void KeccakP1600times4_PermuteAll_24rounds(void *states)
717
+ {
718
+ V256 *statesAsLanes = states;
719
+ KeccakP_DeclareVars;
720
+ #ifndef KeccakP1600times4_fullUnrolling
721
+ unsigned int i;
722
+ #endif
723
+
724
+ copyFromState(statesAsLanes);
725
+ rounds24;
726
+ copyToState(statesAsLanes);
727
+ }
728
+
729
+ void KeccakP1600times4_PermuteAll_12rounds(void *states)
730
+ {
731
+ V256 *statesAsLanes = states;
732
+ KeccakP_DeclareVars;
733
+ #if (KeccakP1600times4_unrolling < 12)
734
+ unsigned int i;
735
+ #endif
736
+
737
+ copyFromState(statesAsLanes);
738
+ rounds12;
739
+ copyToState(statesAsLanes);
740
+ }
741
+
742
+ void KeccakP1600times4_PermuteAll_6rounds(void *states)
743
+ {
744
+ V256 *statesAsLanes = states;
745
+ KeccakP_DeclareVars;
746
+
747
+ copyFromState2rounds(statesAsLanes);
748
+ KeccakP_2rounds( 18 );
749
+ KeccakP_4rounds( 20 );
750
+ copyToState(statesAsLanes);
751
+ }
752
+
753
+ void KeccakP1600times4_PermuteAll_4rounds(void *states)
754
+ {
755
+ V256 *statesAsLanes = states;
756
+ KeccakP_DeclareVars;
757
+
758
+ copyFromState(statesAsLanes);
759
+ KeccakP_4rounds( 20 );
760
+ copyToState(statesAsLanes);
761
+ }
762
+
763
+ size_t KeccakF1600times4_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen)
764
+ {
765
+ size_t dataMinimumSize = (laneOffsetParallel*3 + laneCount)*8;
766
+
767
+ if (laneCount == 21) {
768
+ #ifndef KeccakP1600times4_fullUnrolling
769
+ unsigned int i;
770
+ #endif
771
+ const unsigned char *dataStart = data;
772
+ V256 *statesAsLanes = states;
773
+ const uint64_t *dataAsLanes = (const uint64_t *)data;
774
+ KeccakP_DeclareVars;
775
+ V128 index;
776
+
777
+ copyFromState(statesAsLanes);
778
+ index = LOAD4_32(3*laneOffsetParallel, 2*laneOffsetParallel, 1*laneOffsetParallel, 0*laneOffsetParallel);
779
+ while(dataByteLen >= dataMinimumSize) {
780
+ #define Add_In( argLane, argIndex ) argLane = XOR(argLane, LOAD_GATHER4_64(index, dataAsLanes+argIndex))
781
+ Add_In( _ba, 0 );
782
+ Add_In( _be, 1 );
783
+ Add_In( _bi, 2 );
784
+ Add_In( _bo, 3 );
785
+ Add_In( _bu, 4 );
786
+ Add_In( _ga, 5 );
787
+ Add_In( _ge, 6 );
788
+ Add_In( _gi, 7 );
789
+ Add_In( _go, 8 );
790
+ Add_In( _gu, 9 );
791
+ Add_In( _ka, 10 );
792
+ Add_In( _ke, 11 );
793
+ Add_In( _ki, 12 );
794
+ Add_In( _ko, 13 );
795
+ Add_In( _ku, 14 );
796
+ Add_In( _ma, 15 );
797
+ Add_In( _me, 16 );
798
+ Add_In( _mi, 17 );
799
+ Add_In( _mo, 18 );
800
+ Add_In( _mu, 19 );
801
+ Add_In( _sa, 20 );
802
+ #undef Add_In
803
+ rounds24;
804
+ dataAsLanes += laneOffsetSerial;
805
+ dataByteLen -= laneOffsetSerial*8;
806
+ }
807
+ copyToState(statesAsLanes);
808
+ return (const unsigned char *)dataAsLanes - dataStart;
809
+ }
810
+ else {
811
+ const unsigned char *dataStart = data;
812
+
813
+ while(dataByteLen >= dataMinimumSize) {
814
+ KeccakP1600times4_AddLanesAll(states, data, laneCount, laneOffsetParallel);
815
+ KeccakP1600times4_PermuteAll_24rounds(states);
816
+ data += laneOffsetSerial*8;
817
+ dataByteLen -= laneOffsetSerial*8;
818
+ }
819
+ return data - dataStart;
820
+ }
821
+ }
822
+
823
+ size_t KeccakP1600times4_12rounds_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen)
824
+ {
825
+ size_t dataMinimumSize = (laneOffsetParallel*3 + laneCount)*8;
826
+
827
+ if (laneCount == 21) {
828
+ #if (KeccakP1600times4_unrolling < 12)
829
+ unsigned int i;
830
+ #endif
831
+ const unsigned char *dataStart = data;
832
+ V256 *statesAsLanes = states;
833
+ const uint64_t *dataAsLanes = (const uint64_t *)data;
834
+ KeccakP_DeclareVars;
835
+ V128 index;
836
+
837
+ copyFromState(statesAsLanes);
838
+ index = LOAD4_32(3*laneOffsetParallel, 2*laneOffsetParallel, 1*laneOffsetParallel, 0*laneOffsetParallel);
839
+ while(dataByteLen >= dataMinimumSize) {
840
+ #define Add_In( argLane, argIndex ) argLane = XOR(argLane, LOAD_GATHER4_64(index, dataAsLanes+argIndex))
841
+ Add_In( _ba, 0 );
842
+ Add_In( _be, 1 );
843
+ Add_In( _bi, 2 );
844
+ Add_In( _bo, 3 );
845
+ Add_In( _bu, 4 );
846
+ Add_In( _ga, 5 );
847
+ Add_In( _ge, 6 );
848
+ Add_In( _gi, 7 );
849
+ Add_In( _go, 8 );
850
+ Add_In( _gu, 9 );
851
+ Add_In( _ka, 10 );
852
+ Add_In( _ke, 11 );
853
+ Add_In( _ki, 12 );
854
+ Add_In( _ko, 13 );
855
+ Add_In( _ku, 14 );
856
+ Add_In( _ma, 15 );
857
+ Add_In( _me, 16 );
858
+ Add_In( _mi, 17 );
859
+ Add_In( _mo, 18 );
860
+ Add_In( _mu, 19 );
861
+ Add_In( _sa, 20 );
862
+ #undef Add_In
863
+ rounds12;
864
+ dataAsLanes += laneOffsetSerial;
865
+ dataByteLen -= laneOffsetSerial*8;
866
+ }
867
+ copyToState(statesAsLanes);
868
+ return (const unsigned char *)dataAsLanes - dataStart;
869
+ }
870
+ else {
871
+ const unsigned char *dataStart = data;
872
+
873
+ while(dataByteLen >= dataMinimumSize) {
874
+ KeccakP1600times4_AddLanesAll(states, data, laneCount, laneOffsetParallel);
875
+ KeccakP1600times4_PermuteAll_12rounds(states);
876
+ data += laneOffsetSerial*8;
877
+ dataByteLen -= laneOffsetSerial*8;
878
+ }
879
+ return data - dataStart;
880
+ }
881
+ }