sleeping_kangaroo12 0.0.1 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (296) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE.md +27 -0
  3. data/README.md +48 -53
  4. data/ext/Rakefile +12 -37
  5. data/ext/binding/sleeping_kangaroo12.c +1 -16
  6. data/ext/{xkcp → k12}/Makefile +0 -0
  7. data/ext/k12/Makefile.build +118 -0
  8. data/ext/k12/README.markdown +86 -0
  9. data/ext/k12/lib/ARMv8Asha3/KeccakP-1600-ARMv8Asha3.S +623 -0
  10. data/ext/k12/lib/ARMv8Asha3/KeccakP-1600-SnP.h +65 -0
  11. data/ext/k12/lib/ARMv8Asha3/KeccakP-1600-opt64.c +227 -0
  12. data/ext/{xkcp/lib/low/KeccakP-1600/compact → k12/lib/Inplace32BI}/KeccakP-1600-SnP.h +4 -9
  13. data/ext/{xkcp/lib/low/KeccakP-1600/plain-32bits-inplace → k12/lib/Inplace32BI}/KeccakP-1600-inplace32BI.c +65 -160
  14. data/ext/k12/lib/KangarooTwelve.c +332 -0
  15. data/ext/{xkcp/lib/high/KangarooTwelve → k12/lib}/KangarooTwelve.h +53 -16
  16. data/ext/{xkcp/lib/low/KeccakP-1600/AVX2 → k12/lib/Optimized64}/KeccakP-1600-AVX2.s +122 -558
  17. data/ext/k12/lib/Optimized64/KeccakP-1600-AVX512-plainC.c +241 -0
  18. data/ext/k12/lib/Optimized64/KeccakP-1600-AVX512.s +551 -0
  19. data/ext/k12/lib/Optimized64/KeccakP-1600-SnP.h +74 -0
  20. data/ext/{xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-64.macros → k12/lib/Optimized64/KeccakP-1600-opt64.c} +447 -169
  21. data/ext/k12/lib/Optimized64/KeccakP-1600-runtimeDispatch.c +406 -0
  22. data/ext/k12/lib/Optimized64/KeccakP-1600-timesN-AVX2.c +419 -0
  23. data/ext/k12/lib/Optimized64/KeccakP-1600-timesN-AVX512.c +458 -0
  24. data/ext/k12/lib/Optimized64/KeccakP-1600-timesN-SSSE3.c +438 -0
  25. data/ext/{xkcp/lib/low/KeccakP-1600/plain-64bits → k12/lib/Plain64}/KeccakP-1600-SnP.h +14 -20
  26. data/ext/{xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.h → k12/lib/Plain64/KeccakP-1600-plain64.c} +9 -8
  27. data/ext/{xkcp/lib/common → k12/lib}/align.h +3 -2
  28. data/ext/{xkcp/lib/common → k12/lib}/brg_endian.h +0 -0
  29. data/ext/{xkcp → k12}/support/Build/ExpandProducts.xsl +0 -0
  30. data/ext/{xkcp → k12}/support/Build/ToGlobalMakefile.xsl +0 -0
  31. data/ext/{xkcp → k12}/support/Build/ToOneTarget.xsl +0 -0
  32. data/ext/{xkcp → k12}/support/Build/ToTargetConfigFile.xsl +0 -0
  33. data/ext/{xkcp → k12}/support/Build/ToTargetMakefile.xsl +10 -16
  34. data/ext/{xkcp → k12}/support/Build/ToVCXProj.xsl +0 -0
  35. data/lib/sleeping_kangaroo12/binding.rb +2 -1
  36. data/lib/sleeping_kangaroo12/build/loader.rb +1 -0
  37. data/lib/sleeping_kangaroo12/build/platform.rb +1 -0
  38. data/lib/sleeping_kangaroo12/digest.rb +38 -4
  39. data/lib/sleeping_kangaroo12/version.rb +1 -1
  40. metadata +48 -288
  41. data/ext/config/xkcp.build +0 -17
  42. data/ext/xkcp/LICENSE +0 -1
  43. data/ext/xkcp/Makefile.build +0 -200
  44. data/ext/xkcp/README.markdown +0 -296
  45. data/ext/xkcp/lib/HighLevel.build +0 -143
  46. data/ext/xkcp/lib/LowLevel.build +0 -757
  47. data/ext/xkcp/lib/high/KangarooTwelve/KangarooTwelve.c +0 -301
  48. data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.c +0 -81
  49. data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.h +0 -125
  50. data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.c +0 -48
  51. data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.h +0 -79
  52. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.c +0 -81
  53. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.h +0 -73
  54. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.inc +0 -195
  55. data/ext/xkcp/lib/high/Keccak/KeccakSponge.c +0 -111
  56. data/ext/xkcp/lib/high/Keccak/KeccakSponge.h +0 -76
  57. data/ext/xkcp/lib/high/Keccak/KeccakSponge.inc +0 -314
  58. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.c +0 -61
  59. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.h +0 -67
  60. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.inc +0 -128
  61. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.c +0 -93
  62. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.h +0 -599
  63. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.inc +0 -573
  64. data/ext/xkcp/lib/high/Ketje/Ketjev2.c +0 -87
  65. data/ext/xkcp/lib/high/Ketje/Ketjev2.h +0 -88
  66. data/ext/xkcp/lib/high/Ketje/Ketjev2.inc +0 -274
  67. data/ext/xkcp/lib/high/Keyak/Keyakv2.c +0 -132
  68. data/ext/xkcp/lib/high/Keyak/Keyakv2.h +0 -217
  69. data/ext/xkcp/lib/high/Keyak/Keyakv2.inc +0 -81
  70. data/ext/xkcp/lib/high/Keyak/Motorist.inc +0 -953
  71. data/ext/xkcp/lib/high/Kravatte/Kravatte.c +0 -533
  72. data/ext/xkcp/lib/high/Kravatte/Kravatte.h +0 -115
  73. data/ext/xkcp/lib/high/Kravatte/KravatteModes.c +0 -557
  74. data/ext/xkcp/lib/high/Kravatte/KravatteModes.h +0 -247
  75. data/ext/xkcp/lib/high/Xoodyak/Cyclist.h +0 -66
  76. data/ext/xkcp/lib/high/Xoodyak/Cyclist.inc +0 -336
  77. data/ext/xkcp/lib/high/Xoodyak/Xoodyak-parameters.h +0 -26
  78. data/ext/xkcp/lib/high/Xoodyak/Xoodyak.c +0 -55
  79. data/ext/xkcp/lib/high/Xoodyak/Xoodyak.h +0 -35
  80. data/ext/xkcp/lib/high/Xoofff/Xoofff.c +0 -634
  81. data/ext/xkcp/lib/high/Xoofff/Xoofff.h +0 -147
  82. data/ext/xkcp/lib/high/Xoofff/XoofffModes.c +0 -483
  83. data/ext/xkcp/lib/high/Xoofff/XoofffModes.h +0 -241
  84. data/ext/xkcp/lib/high/common/Phases.h +0 -25
  85. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-SnP.h +0 -41
  86. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-armcc.s +0 -1666
  87. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-gcc.s +0 -1655
  88. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-armcc.s +0 -1268
  89. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-gcc.s +0 -1264
  90. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-armcc.s +0 -1178
  91. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-gcc.s +0 -1175
  92. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-armcc.s +0 -1338
  93. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-gcc.s +0 -1336
  94. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-armcc.s +0 -1343
  95. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-gcc.s +0 -1339
  96. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-SnP.h +0 -42
  97. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-armcc.s +0 -823
  98. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-gcc.s +0 -831
  99. data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-SnP.h +0 -31
  100. data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-armv8a-neon.s +0 -540
  101. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-SnP.h +0 -42
  102. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-compact.s +0 -733
  103. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-fast.s +0 -1121
  104. data/ext/xkcp/lib/low/KeccakP-1600/AVX2/KeccakP-1600-SnP.h +0 -52
  105. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-AVX512.c +0 -623
  106. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-SnP.h +0 -47
  107. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u12/KeccakP-1600-AVX512-config.h +0 -6
  108. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u6/KeccakP-1600-AVX512-config.h +0 -6
  109. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/ua/KeccakP-1600-AVX512-config.h +0 -6
  110. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-AVX512.s +0 -1031
  111. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-SnP.h +0 -53
  112. data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-SnP.h +0 -44
  113. data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-XOP.c +0 -476
  114. data/ext/xkcp/lib/low/KeccakP-1600/XOP/u6/KeccakP-1600-XOP-config.h +0 -6
  115. data/ext/xkcp/lib/low/KeccakP-1600/XOP/ua/KeccakP-1600-XOP-config.h +0 -6
  116. data/ext/xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-unrolling.macros +0 -305
  117. data/ext/xkcp/lib/low/KeccakP-1600/compact/KeccakP-1600-compact64.c +0 -420
  118. data/ext/xkcp/lib/low/KeccakP-1600/plain-32bits-inplace/KeccakP-1600-SnP.h +0 -43
  119. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/KeccakP-1600-opt64.c +0 -565
  120. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcu6/KeccakP-1600-opt64-config.h +0 -7
  121. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua/KeccakP-1600-opt64-config.h +0 -7
  122. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua-shld/KeccakP-1600-opt64-config.h +0 -8
  123. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/u6/KeccakP-1600-opt64-config.h +0 -6
  124. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/ua/KeccakP-1600-opt64-config.h +0 -6
  125. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-SnP.h +0 -44
  126. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference.h +0 -23
  127. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference32BI.c +0 -625
  128. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-SnP.h +0 -44
  129. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.c +0 -440
  130. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-SnP.h +0 -42
  131. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas.s +0 -1196
  132. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas_Apple.s +0 -1124
  133. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-shld-gas.s +0 -1196
  134. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-armcc.s +0 -1392
  135. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-gcc.s +0 -1394
  136. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-times2-SnP.h +0 -42
  137. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u12/SIMD512-2-config.h +0 -7
  138. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u4/SIMD512-2-config.h +0 -7
  139. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512ufull/SIMD512-2-config.h +0 -7
  140. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SIMD512.c +0 -850
  141. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SnP.h +0 -51
  142. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SIMD128.c +0 -957
  143. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SnP.h +0 -49
  144. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-u2/SIMD128-config.h +0 -8
  145. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-ua/SIMD128-config.h +0 -8
  146. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-u2/SIMD128-config.h +0 -9
  147. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-ua/SIMD128-config.h +0 -9
  148. data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-SnP.h +0 -45
  149. data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-on1.c +0 -37
  150. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SIMD256.c +0 -1321
  151. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SnP.h +0 -55
  152. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u12/SIMD256-config.h +0 -7
  153. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u6/SIMD256-config.h +0 -7
  154. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/ua/SIMD256-config.h +0 -7
  155. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u12/SIMD512-4-config.h +0 -7
  156. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u4/SIMD512-4-config.h +0 -7
  157. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512ufull/SIMD512-4-config.h +0 -7
  158. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SIMD512.c +0 -881
  159. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SnP.h +0 -51
  160. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-SnP.h +0 -45
  161. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-on1.c +0 -37
  162. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-SnP.h +0 -45
  163. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-on2.c +0 -38
  164. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SIMD512.c +0 -1615
  165. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SnP.h +0 -57
  166. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u12/SIMD512-config.h +0 -7
  167. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u4/SIMD512-config.h +0 -7
  168. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/ua/SIMD512-config.h +0 -7
  169. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-SnP.h +0 -45
  170. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-on1.c +0 -37
  171. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-SnP.h +0 -45
  172. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-on2.c +0 -38
  173. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-SnP.h +0 -45
  174. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-on4.c +0 -38
  175. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-SnP.h +0 -41
  176. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-armcc.s +0 -442
  177. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-gcc.s +0 -446
  178. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-armcc.s +0 -419
  179. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-gcc.s +0 -427
  180. data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-SnP.h +0 -41
  181. data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-avr8-fast.s +0 -647
  182. data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-SnP.h +0 -39
  183. data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-compact.c +0 -190
  184. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-SnP.h +0 -43
  185. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.c +0 -412
  186. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.h +0 -23
  187. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-SnP.h +0 -41
  188. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-armcc.s +0 -454
  189. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-gcc.s +0 -458
  190. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-armcc.s +0 -455
  191. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-gcc.s +0 -458
  192. data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-SnP.h +0 -41
  193. data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-avr8-fast.s +0 -728
  194. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-SnP.h +0 -43
  195. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.c +0 -414
  196. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.h +0 -23
  197. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-SnP.h +0 -42
  198. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-armcc.s +0 -527
  199. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-gcc.s +0 -533
  200. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-armcc.s +0 -528
  201. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-gcc.s +0 -534
  202. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-armcc.s +0 -521
  203. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-gcc.s +0 -527
  204. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-armcc.s +0 -517
  205. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-gcc.s +0 -523
  206. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-armcc.s +0 -550
  207. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-gcc.s +0 -556
  208. data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-SnP.h +0 -32
  209. data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-armv8a-neon.s +0 -432
  210. data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-SnP.h +0 -42
  211. data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-avr8-fast.s +0 -929
  212. data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-SnP.h +0 -40
  213. data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-compact.c +0 -244
  214. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-SnP.h +0 -46
  215. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32-bis.macros +0 -184
  216. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.c +0 -454
  217. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.macros +0 -459
  218. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling-bis.macros +0 -83
  219. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling.macros +0 -88
  220. data/ext/xkcp/lib/low/KeccakP-800/plain/lcu2/KeccakP-800-opt32-config.h +0 -7
  221. data/ext/xkcp/lib/low/KeccakP-800/plain/lcua/KeccakP-800-opt32-config.h +0 -7
  222. data/ext/xkcp/lib/low/KeccakP-800/plain/u2/KeccakP-800-opt32-config.h +0 -7
  223. data/ext/xkcp/lib/low/KeccakP-800/plain/ua/KeccakP-800-opt32-config.h +0 -7
  224. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-SnP.h +0 -44
  225. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.c +0 -437
  226. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.h +0 -23
  227. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/Ket.h +0 -57
  228. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-armcc.s +0 -475
  229. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-gcc.s +0 -480
  230. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-armcc.s +0 -590
  231. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-gcc.s +0 -590
  232. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.c +0 -126
  233. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.h +0 -68
  234. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.inc +0 -174
  235. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.c +0 -80
  236. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.h +0 -68
  237. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.inc +0 -142
  238. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-SnP.h +0 -55
  239. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-armcc.s +0 -1086
  240. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-gcc.s +0 -1092
  241. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-armcc.s +0 -721
  242. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-gcc.s +0 -726
  243. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-armcc.s +0 -723
  244. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-gcc.s +0 -729
  245. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-armcc.s +0 -1164
  246. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-gcc.s +0 -1165
  247. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-armcc.s +0 -562
  248. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-gcc.s +0 -563
  249. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-armcc.s +0 -563
  250. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-gcc.s +0 -565
  251. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-SnP.h +0 -55
  252. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-armcc.s +0 -476
  253. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-gcc.s +0 -485
  254. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-armcc.s +0 -362
  255. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-gcc.s +0 -367
  256. data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-SnP.h +0 -43
  257. data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-avr8-u1.s +0 -1341
  258. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SIMD512.c +0 -581
  259. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SnP.h +0 -58
  260. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodyak-full-block-SIMD512.c +0 -332
  261. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SIMD128.c +0 -329
  262. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SnP.h +0 -53
  263. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodyak-full-block-SIMD128.c +0 -355
  264. data/ext/xkcp/lib/low/Xoodoo/Xoodoo.h +0 -79
  265. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-SnP.h +0 -56
  266. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-optimized.c +0 -399
  267. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodyak-full-blocks.c +0 -127
  268. data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-SnP.h +0 -43
  269. data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-reference.c +0 -253
  270. data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SIMD512.c +0 -1044
  271. data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SnP.h +0 -49
  272. data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-SnP.h +0 -45
  273. data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-on1.c +0 -37
  274. data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-ARMv7A.s +0 -1587
  275. data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-SnP.h +0 -48
  276. data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SIMD512.c +0 -1202
  277. data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SnP.h +0 -48
  278. data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SIMD128.c +0 -484
  279. data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SnP.h +0 -44
  280. data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-SnP.h +0 -45
  281. data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-on1.c +0 -37
  282. data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SIMD256.c +0 -939
  283. data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SnP.h +0 -49
  284. data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SIMD512.c +0 -1216
  285. data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SnP.h +0 -48
  286. data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-SnP.h +0 -45
  287. data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-on1.c +0 -37
  288. data/ext/xkcp/lib/low/common/PlSnP-Fallback.inc +0 -290
  289. data/ext/xkcp/lib/low/common/SnP-Relaned.h +0 -141
  290. data/ext/xkcp/support/Kernel-PMU/Kernel-pmu.md +0 -133
  291. data/ext/xkcp/support/Kernel-PMU/Makefile +0 -8
  292. data/ext/xkcp/support/Kernel-PMU/enable_arm_pmu.c +0 -129
  293. data/ext/xkcp/support/Kernel-PMU/load-module +0 -1
  294. data/ext/xkcp/util/KeccakSum/KeccakSum.c +0 -394
  295. data/ext/xkcp/util/KeccakSum/base64.c +0 -86
  296. data/ext/xkcp/util/KeccakSum/base64.h +0 -12
@@ -0,0 +1,419 @@
1
+ /*
2
+ K12 based on the eXtended Keccak Code Package (XKCP)
3
+ https://github.com/XKCP/XKCP
4
+
5
+ The Keccak-p permutations, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche.
6
+
7
+ Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer".
8
+
9
+ For more information, feedback or questions, please refer to the Keccak Team website:
10
+ https://keccak.team/
11
+
12
+ To the extent possible under law, the implementer has waived all copyright
13
+ and related or neighboring rights to the source code in this file.
14
+ http://creativecommons.org/publicdomain/zero/1.0/
15
+
16
+ ---
17
+
18
+ Please refer to the XKCP for more details.
19
+ */
20
+
21
+ #include <stdint.h>
22
+ #include <immintrin.h>
23
+ #include "KeccakP-1600-SnP.h"
24
+ #include "align.h"
25
+
26
+ #define AVX2alignment 32
27
+
28
+ #define ANDnu256(a, b) _mm256_andnot_si256(a, b)
29
+ #define CONST256(a) _mm256_load_si256((const __m256i *)&(a))
30
+ #define CONST256_64(a) _mm256_set1_epi64x(a)
31
+ #define LOAD256(a) _mm256_load_si256((const __m256i *)&(a))
32
+ #define LOAD4_64(a, b, c, d) _mm256_set_epi64x((uint64_t)(a), (uint64_t)(b), (uint64_t)(c), (uint64_t)(d))
33
+ #define ROL64in256(d, a, o) d = _mm256_or_si256(_mm256_slli_epi64(a, o), _mm256_srli_epi64(a, 64-(o)))
34
+ #define ROL64in256_8(d, a) d = _mm256_shuffle_epi8(a, CONST256(rho8))
35
+ #define ROL64in256_56(d, a) d = _mm256_shuffle_epi8(a, CONST256(rho56))
36
+ static const uint64_t rho8[4] ALIGN(32) = {0x0605040302010007, 0x0E0D0C0B0A09080F, 0x1615141312111017, 0x1E1D1C1B1A19181F};
37
+ static const uint64_t rho56[4] ALIGN(32) = {0x0007060504030201, 0x080F0E0D0C0B0A09, 0x1017161514131211, 0x181F1E1D1C1B1A19};
38
+ #define STORE256(a, b) _mm256_store_si256((__m256i *)&(a), b)
39
+ #define STORE256u(a, b) _mm256_storeu_si256((__m256i *)&(a), b)
40
+ #define XOR256(a, b) _mm256_xor_si256(a, b)
41
+ #define XOReq256(a, b) a = _mm256_xor_si256(a, b)
42
+ #define UNPACKL( a, b ) _mm256_unpacklo_epi64((a), (b))
43
+ #define UNPACKH( a, b ) _mm256_unpackhi_epi64((a), (b))
44
+ #define PERM128( a, b, c ) _mm256_permute2f128_si256(a, b, c)
45
+ #define SHUFFLE64( a, b, c ) _mm256_castpd_si256(_mm256_shuffle_pd(_mm256_castsi256_pd(a), _mm256_castsi256_pd(b), c))
46
+ #define ZERO() _mm256_setzero_si256()
47
+
48
+ static ALIGN(AVX2alignment) const uint64_t KeccakP1600RoundConstants[24] = {
49
+ 0x0000000000000001ULL,
50
+ 0x0000000000008082ULL,
51
+ 0x800000000000808aULL,
52
+ 0x8000000080008000ULL,
53
+ 0x000000000000808bULL,
54
+ 0x0000000080000001ULL,
55
+ 0x8000000080008081ULL,
56
+ 0x8000000000008009ULL,
57
+ 0x000000000000008aULL,
58
+ 0x0000000000000088ULL,
59
+ 0x0000000080008009ULL,
60
+ 0x000000008000000aULL,
61
+ 0x000000008000808bULL,
62
+ 0x800000000000008bULL,
63
+ 0x8000000000008089ULL,
64
+ 0x8000000000008003ULL,
65
+ 0x8000000000008002ULL,
66
+ 0x8000000000000080ULL,
67
+ 0x000000000000800aULL,
68
+ 0x800000008000000aULL,
69
+ 0x8000000080008081ULL,
70
+ 0x8000000000008080ULL,
71
+ 0x0000000080000001ULL,
72
+ 0x8000000080008008ULL};
73
+
74
+ #define declareABCDE \
75
+ __m256i Aba, Abe, Abi, Abo, Abu; \
76
+ __m256i Aga, Age, Agi, Ago, Agu; \
77
+ __m256i Aka, Ake, Aki, Ako, Aku; \
78
+ __m256i Ama, Ame, Ami, Amo, Amu; \
79
+ __m256i Asa, Ase, Asi, Aso, Asu; \
80
+ __m256i Bba, Bbe, Bbi, Bbo, Bbu; \
81
+ __m256i Bga, Bge, Bgi, Bgo, Bgu; \
82
+ __m256i Bka, Bke, Bki, Bko, Bku; \
83
+ __m256i Bma, Bme, Bmi, Bmo, Bmu; \
84
+ __m256i Bsa, Bse, Bsi, Bso, Bsu; \
85
+ __m256i Ca, Ce, Ci, Co, Cu; \
86
+ __m256i Ca1, Ce1, Ci1, Co1, Cu1; \
87
+ __m256i Da, De, Di, Do, Du; \
88
+ __m256i Eba, Ebe, Ebi, Ebo, Ebu; \
89
+ __m256i Ega, Ege, Egi, Ego, Egu; \
90
+ __m256i Eka, Eke, Eki, Eko, Eku; \
91
+ __m256i Ema, Eme, Emi, Emo, Emu; \
92
+ __m256i Esa, Ese, Esi, Eso, Esu; \
93
+
94
+ #define prepareTheta \
95
+ Ca = XOR256(Aba, XOR256(Aga, XOR256(Aka, XOR256(Ama, Asa)))); \
96
+ Ce = XOR256(Abe, XOR256(Age, XOR256(Ake, XOR256(Ame, Ase)))); \
97
+ Ci = XOR256(Abi, XOR256(Agi, XOR256(Aki, XOR256(Ami, Asi)))); \
98
+ Co = XOR256(Abo, XOR256(Ago, XOR256(Ako, XOR256(Amo, Aso)))); \
99
+ Cu = XOR256(Abu, XOR256(Agu, XOR256(Aku, XOR256(Amu, Asu)))); \
100
+
101
+ /* --- Theta Rho Pi Chi Iota Prepare-theta */
102
+ /* --- 64-bit lanes mapped to 64-bit words */
103
+ #define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
104
+ ROL64in256(Ce1, Ce, 1); \
105
+ Da = XOR256(Cu, Ce1); \
106
+ ROL64in256(Ci1, Ci, 1); \
107
+ De = XOR256(Ca, Ci1); \
108
+ ROL64in256(Co1, Co, 1); \
109
+ Di = XOR256(Ce, Co1); \
110
+ ROL64in256(Cu1, Cu, 1); \
111
+ Do = XOR256(Ci, Cu1); \
112
+ ROL64in256(Ca1, Ca, 1); \
113
+ Du = XOR256(Co, Ca1); \
114
+ \
115
+ XOReq256(A##ba, Da); \
116
+ Bba = A##ba; \
117
+ XOReq256(A##ge, De); \
118
+ ROL64in256(Bbe, A##ge, 44); \
119
+ XOReq256(A##ki, Di); \
120
+ ROL64in256(Bbi, A##ki, 43); \
121
+ E##ba = XOR256(Bba, ANDnu256(Bbe, Bbi)); \
122
+ XOReq256(E##ba, CONST256_64(KeccakP1600RoundConstants[i])); \
123
+ Ca = E##ba; \
124
+ XOReq256(A##mo, Do); \
125
+ ROL64in256(Bbo, A##mo, 21); \
126
+ E##be = XOR256(Bbe, ANDnu256(Bbi, Bbo)); \
127
+ Ce = E##be; \
128
+ XOReq256(A##su, Du); \
129
+ ROL64in256(Bbu, A##su, 14); \
130
+ E##bi = XOR256(Bbi, ANDnu256(Bbo, Bbu)); \
131
+ Ci = E##bi; \
132
+ E##bo = XOR256(Bbo, ANDnu256(Bbu, Bba)); \
133
+ Co = E##bo; \
134
+ E##bu = XOR256(Bbu, ANDnu256(Bba, Bbe)); \
135
+ Cu = E##bu; \
136
+ \
137
+ XOReq256(A##bo, Do); \
138
+ ROL64in256(Bga, A##bo, 28); \
139
+ XOReq256(A##gu, Du); \
140
+ ROL64in256(Bge, A##gu, 20); \
141
+ XOReq256(A##ka, Da); \
142
+ ROL64in256(Bgi, A##ka, 3); \
143
+ E##ga = XOR256(Bga, ANDnu256(Bge, Bgi)); \
144
+ XOReq256(Ca, E##ga); \
145
+ XOReq256(A##me, De); \
146
+ ROL64in256(Bgo, A##me, 45); \
147
+ E##ge = XOR256(Bge, ANDnu256(Bgi, Bgo)); \
148
+ XOReq256(Ce, E##ge); \
149
+ XOReq256(A##si, Di); \
150
+ ROL64in256(Bgu, A##si, 61); \
151
+ E##gi = XOR256(Bgi, ANDnu256(Bgo, Bgu)); \
152
+ XOReq256(Ci, E##gi); \
153
+ E##go = XOR256(Bgo, ANDnu256(Bgu, Bga)); \
154
+ XOReq256(Co, E##go); \
155
+ E##gu = XOR256(Bgu, ANDnu256(Bga, Bge)); \
156
+ XOReq256(Cu, E##gu); \
157
+ \
158
+ XOReq256(A##be, De); \
159
+ ROL64in256(Bka, A##be, 1); \
160
+ XOReq256(A##gi, Di); \
161
+ ROL64in256(Bke, A##gi, 6); \
162
+ XOReq256(A##ko, Do); \
163
+ ROL64in256(Bki, A##ko, 25); \
164
+ E##ka = XOR256(Bka, ANDnu256(Bke, Bki)); \
165
+ XOReq256(Ca, E##ka); \
166
+ XOReq256(A##mu, Du); \
167
+ ROL64in256_8(Bko, A##mu); \
168
+ E##ke = XOR256(Bke, ANDnu256(Bki, Bko)); \
169
+ XOReq256(Ce, E##ke); \
170
+ XOReq256(A##sa, Da); \
171
+ ROL64in256(Bku, A##sa, 18); \
172
+ E##ki = XOR256(Bki, ANDnu256(Bko, Bku)); \
173
+ XOReq256(Ci, E##ki); \
174
+ E##ko = XOR256(Bko, ANDnu256(Bku, Bka)); \
175
+ XOReq256(Co, E##ko); \
176
+ E##ku = XOR256(Bku, ANDnu256(Bka, Bke)); \
177
+ XOReq256(Cu, E##ku); \
178
+ \
179
+ XOReq256(A##bu, Du); \
180
+ ROL64in256(Bma, A##bu, 27); \
181
+ XOReq256(A##ga, Da); \
182
+ ROL64in256(Bme, A##ga, 36); \
183
+ XOReq256(A##ke, De); \
184
+ ROL64in256(Bmi, A##ke, 10); \
185
+ E##ma = XOR256(Bma, ANDnu256(Bme, Bmi)); \
186
+ XOReq256(Ca, E##ma); \
187
+ XOReq256(A##mi, Di); \
188
+ ROL64in256(Bmo, A##mi, 15); \
189
+ E##me = XOR256(Bme, ANDnu256(Bmi, Bmo)); \
190
+ XOReq256(Ce, E##me); \
191
+ XOReq256(A##so, Do); \
192
+ ROL64in256_56(Bmu, A##so); \
193
+ E##mi = XOR256(Bmi, ANDnu256(Bmo, Bmu)); \
194
+ XOReq256(Ci, E##mi); \
195
+ E##mo = XOR256(Bmo, ANDnu256(Bmu, Bma)); \
196
+ XOReq256(Co, E##mo); \
197
+ E##mu = XOR256(Bmu, ANDnu256(Bma, Bme)); \
198
+ XOReq256(Cu, E##mu); \
199
+ \
200
+ XOReq256(A##bi, Di); \
201
+ ROL64in256(Bsa, A##bi, 62); \
202
+ XOReq256(A##go, Do); \
203
+ ROL64in256(Bse, A##go, 55); \
204
+ XOReq256(A##ku, Du); \
205
+ ROL64in256(Bsi, A##ku, 39); \
206
+ E##sa = XOR256(Bsa, ANDnu256(Bse, Bsi)); \
207
+ XOReq256(Ca, E##sa); \
208
+ XOReq256(A##ma, Da); \
209
+ ROL64in256(Bso, A##ma, 41); \
210
+ E##se = XOR256(Bse, ANDnu256(Bsi, Bso)); \
211
+ XOReq256(Ce, E##se); \
212
+ XOReq256(A##se, De); \
213
+ ROL64in256(Bsu, A##se, 2); \
214
+ E##si = XOR256(Bsi, ANDnu256(Bso, Bsu)); \
215
+ XOReq256(Ci, E##si); \
216
+ E##so = XOR256(Bso, ANDnu256(Bsu, Bsa)); \
217
+ XOReq256(Co, E##so); \
218
+ E##su = XOR256(Bsu, ANDnu256(Bsa, Bse)); \
219
+ XOReq256(Cu, E##su); \
220
+ \
221
+
222
+ /* --- Theta Rho Pi Chi Iota */
223
+ /* --- 64-bit lanes mapped to 64-bit words */
224
+ #define thetaRhoPiChiIota(i, A, E) \
225
+ ROL64in256(Ce1, Ce, 1); \
226
+ Da = XOR256(Cu, Ce1); \
227
+ ROL64in256(Ci1, Ci, 1); \
228
+ De = XOR256(Ca, Ci1); \
229
+ ROL64in256(Co1, Co, 1); \
230
+ Di = XOR256(Ce, Co1); \
231
+ ROL64in256(Cu1, Cu, 1); \
232
+ Do = XOR256(Ci, Cu1); \
233
+ ROL64in256(Ca1, Ca, 1); \
234
+ Du = XOR256(Co, Ca1); \
235
+ \
236
+ XOReq256(A##ba, Da); \
237
+ Bba = A##ba; \
238
+ XOReq256(A##ge, De); \
239
+ ROL64in256(Bbe, A##ge, 44); \
240
+ XOReq256(A##ki, Di); \
241
+ ROL64in256(Bbi, A##ki, 43); \
242
+ E##ba = XOR256(Bba, ANDnu256(Bbe, Bbi)); \
243
+ XOReq256(E##ba, CONST256_64(KeccakP1600RoundConstants[i])); \
244
+ XOReq256(A##mo, Do); \
245
+ ROL64in256(Bbo, A##mo, 21); \
246
+ E##be = XOR256(Bbe, ANDnu256(Bbi, Bbo)); \
247
+ XOReq256(A##su, Du); \
248
+ ROL64in256(Bbu, A##su, 14); \
249
+ E##bi = XOR256(Bbi, ANDnu256(Bbo, Bbu)); \
250
+ E##bo = XOR256(Bbo, ANDnu256(Bbu, Bba)); \
251
+ E##bu = XOR256(Bbu, ANDnu256(Bba, Bbe)); \
252
+ \
253
+ XOReq256(A##bo, Do); \
254
+ ROL64in256(Bga, A##bo, 28); \
255
+ XOReq256(A##gu, Du); \
256
+ ROL64in256(Bge, A##gu, 20); \
257
+ XOReq256(A##ka, Da); \
258
+ ROL64in256(Bgi, A##ka, 3); \
259
+ E##ga = XOR256(Bga, ANDnu256(Bge, Bgi)); \
260
+ XOReq256(A##me, De); \
261
+ ROL64in256(Bgo, A##me, 45); \
262
+ E##ge = XOR256(Bge, ANDnu256(Bgi, Bgo)); \
263
+ XOReq256(A##si, Di); \
264
+ ROL64in256(Bgu, A##si, 61); \
265
+ E##gi = XOR256(Bgi, ANDnu256(Bgo, Bgu)); \
266
+ E##go = XOR256(Bgo, ANDnu256(Bgu, Bga)); \
267
+ E##gu = XOR256(Bgu, ANDnu256(Bga, Bge)); \
268
+ \
269
+ XOReq256(A##be, De); \
270
+ ROL64in256(Bka, A##be, 1); \
271
+ XOReq256(A##gi, Di); \
272
+ ROL64in256(Bke, A##gi, 6); \
273
+ XOReq256(A##ko, Do); \
274
+ ROL64in256(Bki, A##ko, 25); \
275
+ E##ka = XOR256(Bka, ANDnu256(Bke, Bki)); \
276
+ XOReq256(A##mu, Du); \
277
+ ROL64in256_8(Bko, A##mu); \
278
+ E##ke = XOR256(Bke, ANDnu256(Bki, Bko)); \
279
+ XOReq256(A##sa, Da); \
280
+ ROL64in256(Bku, A##sa, 18); \
281
+ E##ki = XOR256(Bki, ANDnu256(Bko, Bku)); \
282
+ E##ko = XOR256(Bko, ANDnu256(Bku, Bka)); \
283
+ E##ku = XOR256(Bku, ANDnu256(Bka, Bke)); \
284
+ \
285
+ XOReq256(A##bu, Du); \
286
+ ROL64in256(Bma, A##bu, 27); \
287
+ XOReq256(A##ga, Da); \
288
+ ROL64in256(Bme, A##ga, 36); \
289
+ XOReq256(A##ke, De); \
290
+ ROL64in256(Bmi, A##ke, 10); \
291
+ E##ma = XOR256(Bma, ANDnu256(Bme, Bmi)); \
292
+ XOReq256(A##mi, Di); \
293
+ ROL64in256(Bmo, A##mi, 15); \
294
+ E##me = XOR256(Bme, ANDnu256(Bmi, Bmo)); \
295
+ XOReq256(A##so, Do); \
296
+ ROL64in256_56(Bmu, A##so); \
297
+ E##mi = XOR256(Bmi, ANDnu256(Bmo, Bmu)); \
298
+ E##mo = XOR256(Bmo, ANDnu256(Bmu, Bma)); \
299
+ E##mu = XOR256(Bmu, ANDnu256(Bma, Bme)); \
300
+ \
301
+ XOReq256(A##bi, Di); \
302
+ ROL64in256(Bsa, A##bi, 62); \
303
+ XOReq256(A##go, Do); \
304
+ ROL64in256(Bse, A##go, 55); \
305
+ XOReq256(A##ku, Du); \
306
+ ROL64in256(Bsi, A##ku, 39); \
307
+ E##sa = XOR256(Bsa, ANDnu256(Bse, Bsi)); \
308
+ XOReq256(A##ma, Da); \
309
+ ROL64in256(Bso, A##ma, 41); \
310
+ E##se = XOR256(Bse, ANDnu256(Bsi, Bso)); \
311
+ XOReq256(A##se, De); \
312
+ ROL64in256(Bsu, A##se, 2); \
313
+ E##si = XOR256(Bsi, ANDnu256(Bso, Bsu)); \
314
+ E##so = XOR256(Bso, ANDnu256(Bsu, Bsa)); \
315
+ E##su = XOR256(Bsu, ANDnu256(Bsa, Bse)); \
316
+ \
317
+
318
+ #define initializeState(X) \
319
+ X##ba = ZERO(); \
320
+ X##be = ZERO(); \
321
+ X##bi = ZERO(); \
322
+ X##bo = ZERO(); \
323
+ X##bu = ZERO(); \
324
+ X##ga = ZERO(); \
325
+ X##ge = ZERO(); \
326
+ X##gi = ZERO(); \
327
+ X##go = ZERO(); \
328
+ X##gu = ZERO(); \
329
+ X##ka = ZERO(); \
330
+ X##ke = ZERO(); \
331
+ X##ki = ZERO(); \
332
+ X##ko = ZERO(); \
333
+ X##ku = ZERO(); \
334
+ X##ma = ZERO(); \
335
+ X##me = ZERO(); \
336
+ X##mi = ZERO(); \
337
+ X##mo = ZERO(); \
338
+ X##mu = ZERO(); \
339
+ X##sa = ZERO(); \
340
+ X##se = ZERO(); \
341
+ X##si = ZERO(); \
342
+ X##so = ZERO(); \
343
+ X##su = ZERO(); \
344
+
345
+ #define XORdata16(X, data0, data1, data2, data3) \
346
+ XOReq256(X##ba, LOAD4_64((data3)[ 0], (data2)[ 0], (data1)[ 0], (data0)[ 0])); \
347
+ XOReq256(X##be, LOAD4_64((data3)[ 1], (data2)[ 1], (data1)[ 1], (data0)[ 1])); \
348
+ XOReq256(X##bi, LOAD4_64((data3)[ 2], (data2)[ 2], (data1)[ 2], (data0)[ 2])); \
349
+ XOReq256(X##bo, LOAD4_64((data3)[ 3], (data2)[ 3], (data1)[ 3], (data0)[ 3])); \
350
+ XOReq256(X##bu, LOAD4_64((data3)[ 4], (data2)[ 4], (data1)[ 4], (data0)[ 4])); \
351
+ XOReq256(X##ga, LOAD4_64((data3)[ 5], (data2)[ 5], (data1)[ 5], (data0)[ 5])); \
352
+ XOReq256(X##ge, LOAD4_64((data3)[ 6], (data2)[ 6], (data1)[ 6], (data0)[ 6])); \
353
+ XOReq256(X##gi, LOAD4_64((data3)[ 7], (data2)[ 7], (data1)[ 7], (data0)[ 7])); \
354
+ XOReq256(X##go, LOAD4_64((data3)[ 8], (data2)[ 8], (data1)[ 8], (data0)[ 8])); \
355
+ XOReq256(X##gu, LOAD4_64((data3)[ 9], (data2)[ 9], (data1)[ 9], (data0)[ 9])); \
356
+ XOReq256(X##ka, LOAD4_64((data3)[10], (data2)[10], (data1)[10], (data0)[10])); \
357
+ XOReq256(X##ke, LOAD4_64((data3)[11], (data2)[11], (data1)[11], (data0)[11])); \
358
+ XOReq256(X##ki, LOAD4_64((data3)[12], (data2)[12], (data1)[12], (data0)[12])); \
359
+ XOReq256(X##ko, LOAD4_64((data3)[13], (data2)[13], (data1)[13], (data0)[13])); \
360
+ XOReq256(X##ku, LOAD4_64((data3)[14], (data2)[14], (data1)[14], (data0)[14])); \
361
+ XOReq256(X##ma, LOAD4_64((data3)[15], (data2)[15], (data1)[15], (data0)[15])); \
362
+
363
+ #define XORdata21(X, data0, data1, data2, data3) \
364
+ XORdata16(X, data0, data1, data2, data3) \
365
+ XOReq256(X##me, LOAD4_64((data3)[16], (data2)[16], (data1)[16], (data0)[16])); \
366
+ XOReq256(X##mi, LOAD4_64((data3)[17], (data2)[17], (data1)[17], (data0)[17])); \
367
+ XOReq256(X##mo, LOAD4_64((data3)[18], (data2)[18], (data1)[18], (data0)[18])); \
368
+ XOReq256(X##mu, LOAD4_64((data3)[19], (data2)[19], (data1)[19], (data0)[19])); \
369
+ XOReq256(X##sa, LOAD4_64((data3)[20], (data2)[20], (data1)[20], (data0)[20])); \
370
+
371
+ #define rounds12 \
372
+ prepareTheta \
373
+ thetaRhoPiChiIotaPrepareTheta(12, A, E) \
374
+ thetaRhoPiChiIotaPrepareTheta(13, E, A) \
375
+ thetaRhoPiChiIotaPrepareTheta(14, A, E) \
376
+ thetaRhoPiChiIotaPrepareTheta(15, E, A) \
377
+ thetaRhoPiChiIotaPrepareTheta(16, A, E) \
378
+ thetaRhoPiChiIotaPrepareTheta(17, E, A) \
379
+ thetaRhoPiChiIotaPrepareTheta(18, A, E) \
380
+ thetaRhoPiChiIotaPrepareTheta(19, E, A) \
381
+ thetaRhoPiChiIotaPrepareTheta(20, A, E) \
382
+ thetaRhoPiChiIotaPrepareTheta(21, E, A) \
383
+ thetaRhoPiChiIotaPrepareTheta(22, A, E) \
384
+ thetaRhoPiChiIota(23, E, A)
385
+
386
+ #define chunkSize 8192
387
+ #define rateInBytes (21*8)
388
+
389
+ void KangarooTwelve_AVX2_Process4Leaves(const unsigned char *input, unsigned char *output)
390
+ {
391
+ declareABCDE
392
+ unsigned int j;
393
+
394
+ initializeState(A);
395
+
396
+ for(j = 0; j < (chunkSize - rateInBytes); j += rateInBytes) {
397
+ XORdata21(A, (const uint64_t *)input, (const uint64_t *)(input+chunkSize), (const uint64_t *)(input+2*chunkSize), (const uint64_t *)(input+3*chunkSize));
398
+ rounds12
399
+ input += rateInBytes;
400
+ }
401
+
402
+ XORdata16(A, (const uint64_t *)input, (const uint64_t *)(input+chunkSize), (const uint64_t *)(input+2*chunkSize), (const uint64_t *)(input+3*chunkSize));
403
+ XOReq256(Ame, CONST256_64(0x0BULL));
404
+ XOReq256(Asa, CONST256_64(0x8000000000000000ULL));
405
+ rounds12
406
+
407
+ {
408
+ __m256i lanesL01, lanesL23, lanesH01, lanesH23;
409
+
410
+ lanesL01 = UNPACKL( Aba, Abe );
411
+ lanesH01 = UNPACKH( Aba, Abe );
412
+ lanesL23 = UNPACKL( Abi, Abo );
413
+ lanesH23 = UNPACKH( Abi, Abo );
414
+ STORE256u( output[ 0], PERM128( lanesL01, lanesL23, 0x20 ) );
415
+ STORE256u( output[32], PERM128( lanesH01, lanesH23, 0x20 ) );
416
+ STORE256u( output[64], PERM128( lanesL01, lanesL23, 0x31 ) );
417
+ STORE256u( output[96], PERM128( lanesH01, lanesH23, 0x31 ) );
418
+ }
419
+ }