sleeping_kangaroo12 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (291) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +34 -67
  3. data/ext/Rakefile +12 -37
  4. data/ext/binding/sleeping_kangaroo12.c +1 -16
  5. data/ext/{xkcp → k12}/Makefile +0 -0
  6. data/ext/k12/Makefile.build +118 -0
  7. data/ext/k12/README.markdown +86 -0
  8. data/ext/k12/lib/ARMv8Asha3/KeccakP-1600-ARMv8Asha3.S +623 -0
  9. data/ext/k12/lib/ARMv8Asha3/KeccakP-1600-SnP.h +65 -0
  10. data/ext/k12/lib/ARMv8Asha3/KeccakP-1600-opt64.c +227 -0
  11. data/ext/{xkcp/lib/low/KeccakP-1600/compact → k12/lib/Inplace32BI}/KeccakP-1600-SnP.h +4 -9
  12. data/ext/{xkcp/lib/low/KeccakP-1600/plain-32bits-inplace → k12/lib/Inplace32BI}/KeccakP-1600-inplace32BI.c +65 -160
  13. data/ext/k12/lib/KangarooTwelve.c +332 -0
  14. data/ext/{xkcp/lib/high/KangarooTwelve → k12/lib}/KangarooTwelve.h +53 -16
  15. data/ext/{xkcp/lib/low/KeccakP-1600/AVX2 → k12/lib/Optimized64}/KeccakP-1600-AVX2.s +122 -558
  16. data/ext/k12/lib/Optimized64/KeccakP-1600-AVX512-plainC.c +241 -0
  17. data/ext/k12/lib/Optimized64/KeccakP-1600-AVX512.s +551 -0
  18. data/ext/k12/lib/Optimized64/KeccakP-1600-SnP.h +74 -0
  19. data/ext/{xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-64.macros → k12/lib/Optimized64/KeccakP-1600-opt64.c} +447 -169
  20. data/ext/k12/lib/Optimized64/KeccakP-1600-runtimeDispatch.c +406 -0
  21. data/ext/k12/lib/Optimized64/KeccakP-1600-timesN-AVX2.c +419 -0
  22. data/ext/k12/lib/Optimized64/KeccakP-1600-timesN-AVX512.c +458 -0
  23. data/ext/k12/lib/Optimized64/KeccakP-1600-timesN-SSSE3.c +438 -0
  24. data/ext/{xkcp/lib/low/KeccakP-1600/plain-64bits → k12/lib/Plain64}/KeccakP-1600-SnP.h +14 -20
  25. data/ext/{xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.h → k12/lib/Plain64/KeccakP-1600-plain64.c} +9 -8
  26. data/ext/{xkcp/lib/common → k12/lib}/align.h +3 -2
  27. data/ext/{xkcp/lib/common → k12/lib}/brg_endian.h +0 -0
  28. data/ext/{xkcp → k12}/support/Build/ExpandProducts.xsl +0 -0
  29. data/ext/{xkcp → k12}/support/Build/ToGlobalMakefile.xsl +0 -0
  30. data/ext/{xkcp → k12}/support/Build/ToOneTarget.xsl +0 -0
  31. data/ext/{xkcp → k12}/support/Build/ToTargetConfigFile.xsl +0 -0
  32. data/ext/{xkcp → k12}/support/Build/ToTargetMakefile.xsl +10 -16
  33. data/ext/{xkcp → k12}/support/Build/ToVCXProj.xsl +0 -0
  34. data/lib/sleeping_kangaroo12/version.rb +1 -1
  35. metadata +33 -276
  36. data/ext/config/xkcp.build +0 -17
  37. data/ext/xkcp/LICENSE +0 -1
  38. data/ext/xkcp/Makefile.build +0 -200
  39. data/ext/xkcp/README.markdown +0 -296
  40. data/ext/xkcp/lib/HighLevel.build +0 -143
  41. data/ext/xkcp/lib/LowLevel.build +0 -757
  42. data/ext/xkcp/lib/high/KangarooTwelve/KangarooTwelve.c +0 -301
  43. data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.c +0 -81
  44. data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.h +0 -125
  45. data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.c +0 -48
  46. data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.h +0 -79
  47. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.c +0 -81
  48. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.h +0 -73
  49. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.inc +0 -195
  50. data/ext/xkcp/lib/high/Keccak/KeccakSponge.c +0 -111
  51. data/ext/xkcp/lib/high/Keccak/KeccakSponge.h +0 -76
  52. data/ext/xkcp/lib/high/Keccak/KeccakSponge.inc +0 -314
  53. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.c +0 -61
  54. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.h +0 -67
  55. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.inc +0 -128
  56. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.c +0 -93
  57. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.h +0 -599
  58. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.inc +0 -573
  59. data/ext/xkcp/lib/high/Ketje/Ketjev2.c +0 -87
  60. data/ext/xkcp/lib/high/Ketje/Ketjev2.h +0 -88
  61. data/ext/xkcp/lib/high/Ketje/Ketjev2.inc +0 -274
  62. data/ext/xkcp/lib/high/Keyak/Keyakv2.c +0 -132
  63. data/ext/xkcp/lib/high/Keyak/Keyakv2.h +0 -217
  64. data/ext/xkcp/lib/high/Keyak/Keyakv2.inc +0 -81
  65. data/ext/xkcp/lib/high/Keyak/Motorist.inc +0 -953
  66. data/ext/xkcp/lib/high/Kravatte/Kravatte.c +0 -533
  67. data/ext/xkcp/lib/high/Kravatte/Kravatte.h +0 -115
  68. data/ext/xkcp/lib/high/Kravatte/KravatteModes.c +0 -557
  69. data/ext/xkcp/lib/high/Kravatte/KravatteModes.h +0 -247
  70. data/ext/xkcp/lib/high/Xoodyak/Cyclist.h +0 -66
  71. data/ext/xkcp/lib/high/Xoodyak/Cyclist.inc +0 -336
  72. data/ext/xkcp/lib/high/Xoodyak/Xoodyak-parameters.h +0 -26
  73. data/ext/xkcp/lib/high/Xoodyak/Xoodyak.c +0 -55
  74. data/ext/xkcp/lib/high/Xoodyak/Xoodyak.h +0 -35
  75. data/ext/xkcp/lib/high/Xoofff/Xoofff.c +0 -634
  76. data/ext/xkcp/lib/high/Xoofff/Xoofff.h +0 -147
  77. data/ext/xkcp/lib/high/Xoofff/XoofffModes.c +0 -483
  78. data/ext/xkcp/lib/high/Xoofff/XoofffModes.h +0 -241
  79. data/ext/xkcp/lib/high/common/Phases.h +0 -25
  80. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-SnP.h +0 -41
  81. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-armcc.s +0 -1666
  82. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-gcc.s +0 -1655
  83. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-armcc.s +0 -1268
  84. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-gcc.s +0 -1264
  85. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-armcc.s +0 -1178
  86. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-gcc.s +0 -1175
  87. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-armcc.s +0 -1338
  88. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-gcc.s +0 -1336
  89. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-armcc.s +0 -1343
  90. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-gcc.s +0 -1339
  91. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-SnP.h +0 -42
  92. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-armcc.s +0 -823
  93. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-gcc.s +0 -831
  94. data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-SnP.h +0 -31
  95. data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-armv8a-neon.s +0 -540
  96. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-SnP.h +0 -42
  97. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-compact.s +0 -733
  98. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-fast.s +0 -1121
  99. data/ext/xkcp/lib/low/KeccakP-1600/AVX2/KeccakP-1600-SnP.h +0 -52
  100. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-AVX512.c +0 -623
  101. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-SnP.h +0 -47
  102. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u12/KeccakP-1600-AVX512-config.h +0 -6
  103. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u6/KeccakP-1600-AVX512-config.h +0 -6
  104. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/ua/KeccakP-1600-AVX512-config.h +0 -6
  105. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-AVX512.s +0 -1031
  106. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-SnP.h +0 -53
  107. data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-SnP.h +0 -44
  108. data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-XOP.c +0 -476
  109. data/ext/xkcp/lib/low/KeccakP-1600/XOP/u6/KeccakP-1600-XOP-config.h +0 -6
  110. data/ext/xkcp/lib/low/KeccakP-1600/XOP/ua/KeccakP-1600-XOP-config.h +0 -6
  111. data/ext/xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-unrolling.macros +0 -305
  112. data/ext/xkcp/lib/low/KeccakP-1600/compact/KeccakP-1600-compact64.c +0 -420
  113. data/ext/xkcp/lib/low/KeccakP-1600/plain-32bits-inplace/KeccakP-1600-SnP.h +0 -43
  114. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/KeccakP-1600-opt64.c +0 -565
  115. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcu6/KeccakP-1600-opt64-config.h +0 -7
  116. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua/KeccakP-1600-opt64-config.h +0 -7
  117. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua-shld/KeccakP-1600-opt64-config.h +0 -8
  118. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/u6/KeccakP-1600-opt64-config.h +0 -6
  119. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/ua/KeccakP-1600-opt64-config.h +0 -6
  120. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-SnP.h +0 -44
  121. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference.h +0 -23
  122. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference32BI.c +0 -625
  123. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-SnP.h +0 -44
  124. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.c +0 -440
  125. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-SnP.h +0 -42
  126. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas.s +0 -1196
  127. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas_Apple.s +0 -1124
  128. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-shld-gas.s +0 -1196
  129. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-armcc.s +0 -1392
  130. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-gcc.s +0 -1394
  131. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-times2-SnP.h +0 -42
  132. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u12/SIMD512-2-config.h +0 -7
  133. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u4/SIMD512-2-config.h +0 -7
  134. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512ufull/SIMD512-2-config.h +0 -7
  135. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SIMD512.c +0 -850
  136. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SnP.h +0 -51
  137. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SIMD128.c +0 -957
  138. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SnP.h +0 -49
  139. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-u2/SIMD128-config.h +0 -8
  140. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-ua/SIMD128-config.h +0 -8
  141. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-u2/SIMD128-config.h +0 -9
  142. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-ua/SIMD128-config.h +0 -9
  143. data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-SnP.h +0 -45
  144. data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-on1.c +0 -37
  145. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SIMD256.c +0 -1321
  146. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SnP.h +0 -55
  147. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u12/SIMD256-config.h +0 -7
  148. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u6/SIMD256-config.h +0 -7
  149. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/ua/SIMD256-config.h +0 -7
  150. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u12/SIMD512-4-config.h +0 -7
  151. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u4/SIMD512-4-config.h +0 -7
  152. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512ufull/SIMD512-4-config.h +0 -7
  153. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SIMD512.c +0 -881
  154. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SnP.h +0 -51
  155. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-SnP.h +0 -45
  156. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-on1.c +0 -37
  157. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-SnP.h +0 -45
  158. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-on2.c +0 -38
  159. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SIMD512.c +0 -1615
  160. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SnP.h +0 -57
  161. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u12/SIMD512-config.h +0 -7
  162. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u4/SIMD512-config.h +0 -7
  163. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/ua/SIMD512-config.h +0 -7
  164. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-SnP.h +0 -45
  165. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-on1.c +0 -37
  166. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-SnP.h +0 -45
  167. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-on2.c +0 -38
  168. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-SnP.h +0 -45
  169. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-on4.c +0 -38
  170. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-SnP.h +0 -41
  171. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-armcc.s +0 -442
  172. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-gcc.s +0 -446
  173. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-armcc.s +0 -419
  174. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-gcc.s +0 -427
  175. data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-SnP.h +0 -41
  176. data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-avr8-fast.s +0 -647
  177. data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-SnP.h +0 -39
  178. data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-compact.c +0 -190
  179. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-SnP.h +0 -43
  180. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.c +0 -412
  181. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.h +0 -23
  182. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-SnP.h +0 -41
  183. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-armcc.s +0 -454
  184. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-gcc.s +0 -458
  185. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-armcc.s +0 -455
  186. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-gcc.s +0 -458
  187. data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-SnP.h +0 -41
  188. data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-avr8-fast.s +0 -728
  189. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-SnP.h +0 -43
  190. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.c +0 -414
  191. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.h +0 -23
  192. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-SnP.h +0 -42
  193. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-armcc.s +0 -527
  194. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-gcc.s +0 -533
  195. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-armcc.s +0 -528
  196. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-gcc.s +0 -534
  197. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-armcc.s +0 -521
  198. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-gcc.s +0 -527
  199. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-armcc.s +0 -517
  200. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-gcc.s +0 -523
  201. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-armcc.s +0 -550
  202. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-gcc.s +0 -556
  203. data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-SnP.h +0 -32
  204. data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-armv8a-neon.s +0 -432
  205. data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-SnP.h +0 -42
  206. data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-avr8-fast.s +0 -929
  207. data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-SnP.h +0 -40
  208. data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-compact.c +0 -244
  209. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-SnP.h +0 -46
  210. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32-bis.macros +0 -184
  211. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.c +0 -454
  212. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.macros +0 -459
  213. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling-bis.macros +0 -83
  214. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling.macros +0 -88
  215. data/ext/xkcp/lib/low/KeccakP-800/plain/lcu2/KeccakP-800-opt32-config.h +0 -7
  216. data/ext/xkcp/lib/low/KeccakP-800/plain/lcua/KeccakP-800-opt32-config.h +0 -7
  217. data/ext/xkcp/lib/low/KeccakP-800/plain/u2/KeccakP-800-opt32-config.h +0 -7
  218. data/ext/xkcp/lib/low/KeccakP-800/plain/ua/KeccakP-800-opt32-config.h +0 -7
  219. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-SnP.h +0 -44
  220. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.c +0 -437
  221. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.h +0 -23
  222. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/Ket.h +0 -57
  223. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-armcc.s +0 -475
  224. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-gcc.s +0 -480
  225. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-armcc.s +0 -590
  226. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-gcc.s +0 -590
  227. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.c +0 -126
  228. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.h +0 -68
  229. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.inc +0 -174
  230. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.c +0 -80
  231. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.h +0 -68
  232. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.inc +0 -142
  233. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-SnP.h +0 -55
  234. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-armcc.s +0 -1086
  235. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-gcc.s +0 -1092
  236. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-armcc.s +0 -721
  237. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-gcc.s +0 -726
  238. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-armcc.s +0 -723
  239. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-gcc.s +0 -729
  240. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-armcc.s +0 -1164
  241. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-gcc.s +0 -1165
  242. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-armcc.s +0 -562
  243. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-gcc.s +0 -563
  244. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-armcc.s +0 -563
  245. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-gcc.s +0 -565
  246. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-SnP.h +0 -55
  247. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-armcc.s +0 -476
  248. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-gcc.s +0 -485
  249. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-armcc.s +0 -362
  250. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-gcc.s +0 -367
  251. data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-SnP.h +0 -43
  252. data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-avr8-u1.s +0 -1341
  253. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SIMD512.c +0 -581
  254. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SnP.h +0 -58
  255. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodyak-full-block-SIMD512.c +0 -332
  256. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SIMD128.c +0 -329
  257. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SnP.h +0 -53
  258. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodyak-full-block-SIMD128.c +0 -355
  259. data/ext/xkcp/lib/low/Xoodoo/Xoodoo.h +0 -79
  260. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-SnP.h +0 -56
  261. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-optimized.c +0 -399
  262. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodyak-full-blocks.c +0 -127
  263. data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-SnP.h +0 -43
  264. data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-reference.c +0 -253
  265. data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SIMD512.c +0 -1044
  266. data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SnP.h +0 -49
  267. data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-SnP.h +0 -45
  268. data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-on1.c +0 -37
  269. data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-ARMv7A.s +0 -1587
  270. data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-SnP.h +0 -48
  271. data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SIMD512.c +0 -1202
  272. data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SnP.h +0 -48
  273. data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SIMD128.c +0 -484
  274. data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SnP.h +0 -44
  275. data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-SnP.h +0 -45
  276. data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-on1.c +0 -37
  277. data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SIMD256.c +0 -939
  278. data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SnP.h +0 -49
  279. data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SIMD512.c +0 -1216
  280. data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SnP.h +0 -48
  281. data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-SnP.h +0 -45
  282. data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-on1.c +0 -37
  283. data/ext/xkcp/lib/low/common/PlSnP-Fallback.inc +0 -290
  284. data/ext/xkcp/lib/low/common/SnP-Relaned.h +0 -141
  285. data/ext/xkcp/support/Kernel-PMU/Kernel-pmu.md +0 -133
  286. data/ext/xkcp/support/Kernel-PMU/Makefile +0 -8
  287. data/ext/xkcp/support/Kernel-PMU/enable_arm_pmu.c +0 -129
  288. data/ext/xkcp/support/Kernel-PMU/load-module +0 -1
  289. data/ext/xkcp/util/KeccakSum/KeccakSum.c +0 -394
  290. data/ext/xkcp/util/KeccakSum/base64.c +0 -86
  291. data/ext/xkcp/util/KeccakSum/base64.h +0 -12
@@ -0,0 +1,458 @@
1
+ /*
2
+ K12 based on the eXtended Keccak Code Package (XKCP)
3
+ https://github.com/XKCP/XKCP
4
+
5
+ The Keccak-p permutations, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche.
6
+
7
+ Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer".
8
+
9
+ For more information, feedback or questions, please refer to the Keccak Team website:
10
+ https://keccak.team/
11
+
12
+ To the extent possible under law, the implementer has waived all copyright
13
+ and related or neighboring rights to the source code in this file.
14
+ http://creativecommons.org/publicdomain/zero/1.0/
15
+
16
+ ---
17
+
18
+ Please refer to the XKCP for more details.
19
+ */
20
+
21
+ #include <stdint.h>
22
+ #include <emmintrin.h>
23
+ #include <immintrin.h>
24
+ #include "KeccakP-1600-SnP.h"
25
+ #include "align.h"
26
+
27
+ #define AVX512alignment 64
28
+
29
+ #define LOAD4_32(a,b,c,d) _mm_set_epi32((uint64_t)(a), (uint32_t)(b), (uint32_t)(c), (uint32_t)(d))
30
+ #define LOAD8_32(a,b,c,d,e,f,g,h) _mm256_set_epi32((uint64_t)(a), (uint32_t)(b), (uint32_t)(c), (uint32_t)(d), (uint32_t)(e), (uint32_t)(f), (uint32_t)(g), (uint32_t)(h))
31
+ #define LOAD_GATHER2_64(idx,p) _mm_i32gather_epi64( (const void*)(p), idx, 8)
32
+ #define LOAD_GATHER4_64(idx,p) _mm256_i32gather_epi64( (const void*)(p), idx, 8)
33
+ #define LOAD_GATHER8_64(idx,p) _mm512_i32gather_epi64( idx, (const void*)(p), 8)
34
+ #define STORE_SCATTER8_64(p,idx, v) _mm512_i32scatter_epi64( (void*)(p), idx, v, 8)
35
+
36
+
37
+ /* Keccak-p[1600]×2 */
38
+
39
+ #define XOR(a,b) _mm_xor_si128(a,b)
40
+ #define XOReq(a, b) a = _mm_xor_si128(a, b)
41
+ #define XOR3(a,b,c) _mm_ternarylogic_epi64(a,b,c,0x96)
42
+ #define XOR5(a,b,c,d,e) XOR3(XOR3(a,b,c),d,e)
43
+ #define ROL(a,offset) _mm_rol_epi64(a,offset)
44
+ #define Chi(a,b,c) _mm_ternarylogic_epi64(a,b,c,0xD2)
45
+ #define CONST_64(a) _mm_set1_epi64x(a)
46
+ #define LOAD6464(a, b) _mm_set_epi64x(a, b)
47
+ #define STORE128u(a, b) _mm_storeu_si128((__m128i *)&(a), b)
48
+ #define UNPACKL( a, b ) _mm_unpacklo_epi64((a), (b))
49
+ #define UNPACKH( a, b ) _mm_unpackhi_epi64((a), (b))
50
+ #define ZERO() _mm_setzero_si128()
51
+
52
+ static ALIGN(AVX512alignment) const uint64_t KeccakP1600RoundConstants[24] = {
53
+ 0x0000000000000001ULL,
54
+ 0x0000000000008082ULL,
55
+ 0x800000000000808aULL,
56
+ 0x8000000080008000ULL,
57
+ 0x000000000000808bULL,
58
+ 0x0000000080000001ULL,
59
+ 0x8000000080008081ULL,
60
+ 0x8000000000008009ULL,
61
+ 0x000000000000008aULL,
62
+ 0x0000000000000088ULL,
63
+ 0x0000000080008009ULL,
64
+ 0x000000008000000aULL,
65
+ 0x000000008000808bULL,
66
+ 0x800000000000008bULL,
67
+ 0x8000000000008089ULL,
68
+ 0x8000000000008003ULL,
69
+ 0x8000000000008002ULL,
70
+ 0x8000000000000080ULL,
71
+ 0x000000000000800aULL,
72
+ 0x800000008000000aULL,
73
+ 0x8000000080008081ULL,
74
+ 0x8000000000008080ULL,
75
+ 0x0000000080000001ULL,
76
+ 0x8000000080008008ULL};
77
+
78
+ #define KeccakP_DeclareVars(type) \
79
+ type _Ba, _Be, _Bi, _Bo, _Bu; \
80
+ type _Da, _De, _Di, _Do, _Du; \
81
+ type _ba, _be, _bi, _bo, _bu; \
82
+ type _ga, _ge, _gi, _go, _gu; \
83
+ type _ka, _ke, _ki, _ko, _ku; \
84
+ type _ma, _me, _mi, _mo, _mu; \
85
+ type _sa, _se, _si, _so, _su
86
+
87
+ #define KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Bb1, _Bb2, _Bb3, _Bb4, _Bb5, _Rr1, _Rr2, _Rr3, _Rr4, _Rr5 ) \
88
+ _Bb1 = XOR(_L1, _Da); \
89
+ _Bb2 = XOR(_L2, _De); \
90
+ _Bb3 = XOR(_L3, _Di); \
91
+ _Bb4 = XOR(_L4, _Do); \
92
+ _Bb5 = XOR(_L5, _Du); \
93
+ if (_Rr1 != 0) _Bb1 = ROL(_Bb1, _Rr1); \
94
+ _Bb2 = ROL(_Bb2, _Rr2); \
95
+ _Bb3 = ROL(_Bb3, _Rr3); \
96
+ _Bb4 = ROL(_Bb4, _Rr4); \
97
+ _Bb5 = ROL(_Bb5, _Rr5); \
98
+ _L1 = Chi( _Ba, _Be, _Bi); \
99
+ _L2 = Chi( _Be, _Bi, _Bo); \
100
+ _L3 = Chi( _Bi, _Bo, _Bu); \
101
+ _L4 = Chi( _Bo, _Bu, _Ba); \
102
+ _L5 = Chi( _Bu, _Ba, _Be);
103
+
104
+ #define KeccakP_ThetaRhoPiChiIota0( _L1, _L2, _L3, _L4, _L5, _rc ) \
105
+ _Ba = XOR5( _ba, _ga, _ka, _ma, _sa ); /* Theta effect */ \
106
+ _Be = XOR5( _be, _ge, _ke, _me, _se ); \
107
+ _Bi = XOR5( _bi, _gi, _ki, _mi, _si ); \
108
+ _Bo = XOR5( _bo, _go, _ko, _mo, _so ); \
109
+ _Bu = XOR5( _bu, _gu, _ku, _mu, _su ); \
110
+ _Da = ROL( _Be, 1 ); \
111
+ _De = ROL( _Bi, 1 ); \
112
+ _Di = ROL( _Bo, 1 ); \
113
+ _Do = ROL( _Bu, 1 ); \
114
+ _Du = ROL( _Ba, 1 ); \
115
+ _Da = XOR( _Da, _Bu ); \
116
+ _De = XOR( _De, _Ba ); \
117
+ _Di = XOR( _Di, _Be ); \
118
+ _Do = XOR( _Do, _Bi ); \
119
+ _Du = XOR( _Du, _Bo ); \
120
+ KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Ba, _Be, _Bi, _Bo, _Bu, 0, 44, 43, 21, 14 ); \
121
+ _L1 = XOR(_L1, _rc) /* Iota */
122
+
123
+ #define KeccakP_ThetaRhoPiChi1( _L1, _L2, _L3, _L4, _L5 ) \
124
+ KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Bi, _Bo, _Bu, _Ba, _Be, 3, 45, 61, 28, 20 )
125
+
126
+ #define KeccakP_ThetaRhoPiChi2( _L1, _L2, _L3, _L4, _L5 ) \
127
+ KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Bu, _Ba, _Be, _Bi, _Bo, 18, 1, 6, 25, 8 )
128
+
129
+ #define KeccakP_ThetaRhoPiChi3( _L1, _L2, _L3, _L4, _L5 ) \
130
+ KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Be, _Bi, _Bo, _Bu, _Ba, 36, 10, 15, 56, 27 )
131
+
132
+ #define KeccakP_ThetaRhoPiChi4( _L1, _L2, _L3, _L4, _L5 ) \
133
+ KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Bo, _Bu, _Ba, _Be, _Bi, 41, 2, 62, 55, 39 )
134
+
135
+ #define KeccakP_4rounds( i ) \
136
+ KeccakP_ThetaRhoPiChiIota0(_ba, _ge, _ki, _mo, _su, CONST_64(KeccakP1600RoundConstants[i]) ); \
137
+ KeccakP_ThetaRhoPiChi1( _ka, _me, _si, _bo, _gu ); \
138
+ KeccakP_ThetaRhoPiChi2( _sa, _be, _gi, _ko, _mu ); \
139
+ KeccakP_ThetaRhoPiChi3( _ga, _ke, _mi, _so, _bu ); \
140
+ KeccakP_ThetaRhoPiChi4( _ma, _se, _bi, _go, _ku ); \
141
+ \
142
+ KeccakP_ThetaRhoPiChiIota0(_ba, _me, _gi, _so, _ku, CONST_64(KeccakP1600RoundConstants[i+1]) ); \
143
+ KeccakP_ThetaRhoPiChi1( _sa, _ke, _bi, _mo, _gu ); \
144
+ KeccakP_ThetaRhoPiChi2( _ma, _ge, _si, _ko, _bu ); \
145
+ KeccakP_ThetaRhoPiChi3( _ka, _be, _mi, _go, _su ); \
146
+ KeccakP_ThetaRhoPiChi4( _ga, _se, _ki, _bo, _mu ); \
147
+ \
148
+ KeccakP_ThetaRhoPiChiIota0(_ba, _ke, _si, _go, _mu, CONST_64(KeccakP1600RoundConstants[i+2]) ); \
149
+ KeccakP_ThetaRhoPiChi1( _ma, _be, _ki, _so, _gu ); \
150
+ KeccakP_ThetaRhoPiChi2( _ga, _me, _bi, _ko, _su ); \
151
+ KeccakP_ThetaRhoPiChi3( _sa, _ge, _mi, _bo, _ku ); \
152
+ KeccakP_ThetaRhoPiChi4( _ka, _se, _gi, _mo, _bu ); \
153
+ \
154
+ KeccakP_ThetaRhoPiChiIota0(_ba, _be, _bi, _bo, _bu, CONST_64(KeccakP1600RoundConstants[i+3]) ); \
155
+ KeccakP_ThetaRhoPiChi1( _ga, _ge, _gi, _go, _gu ); \
156
+ KeccakP_ThetaRhoPiChi2( _ka, _ke, _ki, _ko, _ku ); \
157
+ KeccakP_ThetaRhoPiChi3( _ma, _me, _mi, _mo, _mu ); \
158
+ KeccakP_ThetaRhoPiChi4( _sa, _se, _si, _so, _su )
159
+
160
+ #define rounds12 \
161
+ KeccakP_4rounds( 12 ); \
162
+ KeccakP_4rounds( 16 ); \
163
+ KeccakP_4rounds( 20 )
164
+
165
+ #define initializeState(X) \
166
+ X##ba = ZERO(); \
167
+ X##be = ZERO(); \
168
+ X##bi = ZERO(); \
169
+ X##bo = ZERO(); \
170
+ X##bu = ZERO(); \
171
+ X##ga = ZERO(); \
172
+ X##ge = ZERO(); \
173
+ X##gi = ZERO(); \
174
+ X##go = ZERO(); \
175
+ X##gu = ZERO(); \
176
+ X##ka = ZERO(); \
177
+ X##ke = ZERO(); \
178
+ X##ki = ZERO(); \
179
+ X##ko = ZERO(); \
180
+ X##ku = ZERO(); \
181
+ X##ma = ZERO(); \
182
+ X##me = ZERO(); \
183
+ X##mi = ZERO(); \
184
+ X##mo = ZERO(); \
185
+ X##mu = ZERO(); \
186
+ X##sa = ZERO(); \
187
+ X##se = ZERO(); \
188
+ X##si = ZERO(); \
189
+ X##so = ZERO(); \
190
+ X##su = ZERO(); \
191
+
192
+ #define XORdata16(X, data0, data1) \
193
+ XOReq(X##ba, LOAD6464((data1)[ 0], (data0)[ 0])); \
194
+ XOReq(X##be, LOAD6464((data1)[ 1], (data0)[ 1])); \
195
+ XOReq(X##bi, LOAD6464((data1)[ 2], (data0)[ 2])); \
196
+ XOReq(X##bo, LOAD6464((data1)[ 3], (data0)[ 3])); \
197
+ XOReq(X##bu, LOAD6464((data1)[ 4], (data0)[ 4])); \
198
+ XOReq(X##ga, LOAD6464((data1)[ 5], (data0)[ 5])); \
199
+ XOReq(X##ge, LOAD6464((data1)[ 6], (data0)[ 6])); \
200
+ XOReq(X##gi, LOAD6464((data1)[ 7], (data0)[ 7])); \
201
+ XOReq(X##go, LOAD6464((data1)[ 8], (data0)[ 8])); \
202
+ XOReq(X##gu, LOAD6464((data1)[ 9], (data0)[ 9])); \
203
+ XOReq(X##ka, LOAD6464((data1)[10], (data0)[10])); \
204
+ XOReq(X##ke, LOAD6464((data1)[11], (data0)[11])); \
205
+ XOReq(X##ki, LOAD6464((data1)[12], (data0)[12])); \
206
+ XOReq(X##ko, LOAD6464((data1)[13], (data0)[13])); \
207
+ XOReq(X##ku, LOAD6464((data1)[14], (data0)[14])); \
208
+ XOReq(X##ma, LOAD6464((data1)[15], (data0)[15])); \
209
+
210
+ #define XORdata21(X, data0, data1) \
211
+ XORdata16(X, data0, data1) \
212
+ XOReq(X##me, LOAD6464((data1)[16], (data0)[16])); \
213
+ XOReq(X##mi, LOAD6464((data1)[17], (data0)[17])); \
214
+ XOReq(X##mo, LOAD6464((data1)[18], (data0)[18])); \
215
+ XOReq(X##mu, LOAD6464((data1)[19], (data0)[19])); \
216
+ XOReq(X##sa, LOAD6464((data1)[20], (data0)[20])); \
217
+
218
+ #define chunkSize 8192
219
+ #define rateInBytes (21*8)
220
+
221
+ void KangarooTwelve_AVX512_Process2Leaves(const unsigned char *input, unsigned char *output)
222
+ {
223
+ KeccakP_DeclareVars(__m128i);
224
+ unsigned int j;
225
+
226
+ initializeState(_);
227
+
228
+ for(j = 0; j < (chunkSize - rateInBytes); j += rateInBytes) {
229
+ XORdata21(_, (const uint64_t *)input, (const uint64_t *)(input+chunkSize));
230
+ rounds12
231
+ input += rateInBytes;
232
+ }
233
+
234
+ XORdata16(_, (const uint64_t *)input, (const uint64_t *)(input+chunkSize));
235
+ XOReq(_me, CONST_64(0x0BULL));
236
+ XOReq(_sa, CONST_64(0x8000000000000000ULL));
237
+ rounds12
238
+
239
+ STORE128u( *(__m128i*)&(output[ 0]), UNPACKL( _ba, _be ) );
240
+ STORE128u( *(__m128i*)&(output[16]), UNPACKL( _bi, _bo ) );
241
+ STORE128u( *(__m128i*)&(output[32]), UNPACKH( _ba, _be ) );
242
+ STORE128u( *(__m128i*)&(output[48]), UNPACKH( _bi, _bo ) );
243
+ }
244
+
245
+ #undef XOR
246
+ #undef XOReq
247
+ #undef XOR3
248
+ #undef XOR5
249
+ #undef ROL
250
+ #undef Chi
251
+ #undef CONST_64
252
+ #undef LOAD6464
253
+ #undef STORE128u
254
+ #undef UNPACKL
255
+ #undef UNPACKH
256
+ #undef ZERO
257
+ #undef XORdata16
258
+ #undef XORdata21
259
+
260
+
261
+ /* Keccak-p[1600]×4 */
262
+
263
+ #define XOR(a,b) _mm256_xor_si256(a,b)
264
+ #define XOReq(a,b) a = _mm256_xor_si256(a,b)
265
+ #define XOR3(a,b,c) _mm256_ternarylogic_epi64(a,b,c,0x96)
266
+ #define XOR5(a,b,c,d,e) XOR3(XOR3(a,b,c),d,e)
267
+ #define XOR512(a,b) _mm512_xor_si512(a,b)
268
+ #define ROL(a,offset) _mm256_rol_epi64(a,offset)
269
+ #define Chi(a,b,c) _mm256_ternarylogic_epi64(a,b,c,0xD2)
270
+ #define CONST_64(a) _mm256_set1_epi64x(a)
271
+ #define ZERO() _mm256_setzero_si256()
272
+ #define LOAD4_64(a, b, c, d) _mm256_set_epi64x((uint64_t)(a), (uint64_t)(b), (uint64_t)(c), (uint64_t)(d))
273
+
274
+ #define XORdata16(X, data0, data1, data2, data3) \
275
+ XOReq(X##ba, LOAD4_64((data3)[ 0], (data2)[ 0], (data1)[ 0], (data0)[ 0])); \
276
+ XOReq(X##be, LOAD4_64((data3)[ 1], (data2)[ 1], (data1)[ 1], (data0)[ 1])); \
277
+ XOReq(X##bi, LOAD4_64((data3)[ 2], (data2)[ 2], (data1)[ 2], (data0)[ 2])); \
278
+ XOReq(X##bo, LOAD4_64((data3)[ 3], (data2)[ 3], (data1)[ 3], (data0)[ 3])); \
279
+ XOReq(X##bu, LOAD4_64((data3)[ 4], (data2)[ 4], (data1)[ 4], (data0)[ 4])); \
280
+ XOReq(X##ga, LOAD4_64((data3)[ 5], (data2)[ 5], (data1)[ 5], (data0)[ 5])); \
281
+ XOReq(X##ge, LOAD4_64((data3)[ 6], (data2)[ 6], (data1)[ 6], (data0)[ 6])); \
282
+ XOReq(X##gi, LOAD4_64((data3)[ 7], (data2)[ 7], (data1)[ 7], (data0)[ 7])); \
283
+ XOReq(X##go, LOAD4_64((data3)[ 8], (data2)[ 8], (data1)[ 8], (data0)[ 8])); \
284
+ XOReq(X##gu, LOAD4_64((data3)[ 9], (data2)[ 9], (data1)[ 9], (data0)[ 9])); \
285
+ XOReq(X##ka, LOAD4_64((data3)[10], (data2)[10], (data1)[10], (data0)[10])); \
286
+ XOReq(X##ke, LOAD4_64((data3)[11], (data2)[11], (data1)[11], (data0)[11])); \
287
+ XOReq(X##ki, LOAD4_64((data3)[12], (data2)[12], (data1)[12], (data0)[12])); \
288
+ XOReq(X##ko, LOAD4_64((data3)[13], (data2)[13], (data1)[13], (data0)[13])); \
289
+ XOReq(X##ku, LOAD4_64((data3)[14], (data2)[14], (data1)[14], (data0)[14])); \
290
+ XOReq(X##ma, LOAD4_64((data3)[15], (data2)[15], (data1)[15], (data0)[15])); \
291
+
292
+ #define XORdata21(X, data0, data1, data2, data3) \
293
+ XORdata16(X, data0, data1, data2, data3) \
294
+ XOReq(X##me, LOAD4_64((data3)[16], (data2)[16], (data1)[16], (data0)[16])); \
295
+ XOReq(X##mi, LOAD4_64((data3)[17], (data2)[17], (data1)[17], (data0)[17])); \
296
+ XOReq(X##mo, LOAD4_64((data3)[18], (data2)[18], (data1)[18], (data0)[18])); \
297
+ XOReq(X##mu, LOAD4_64((data3)[19], (data2)[19], (data1)[19], (data0)[19])); \
298
+ XOReq(X##sa, LOAD4_64((data3)[20], (data2)[20], (data1)[20], (data0)[20])); \
299
+
300
+ void KangarooTwelve_AVX512_Process4Leaves(const unsigned char *input, unsigned char *output)
301
+ {
302
+ KeccakP_DeclareVars(__m256i);
303
+ unsigned int j;
304
+
305
+ initializeState(_);
306
+
307
+ for(j = 0; j < (chunkSize - rateInBytes); j += rateInBytes) {
308
+ XORdata21(_, (const uint64_t *)input, (const uint64_t *)(input+chunkSize), (const uint64_t *)(input+2*chunkSize), (const uint64_t *)(input+3*chunkSize));
309
+ rounds12
310
+ input += rateInBytes;
311
+ }
312
+
313
+ XORdata16(_, (const uint64_t *)input, (const uint64_t *)(input+chunkSize), (const uint64_t *)(input+2*chunkSize), (const uint64_t *)(input+3*chunkSize));
314
+ XOReq(_me, CONST_64(0x0BULL));
315
+ XOReq(_sa, CONST_64(0x8000000000000000ULL));
316
+ rounds12
317
+
318
+ #define STORE256u(a, b) _mm256_storeu_si256((__m256i *)&(a), b)
319
+ #define UNPACKL( a, b ) _mm256_unpacklo_epi64((a), (b))
320
+ #define UNPACKH( a, b ) _mm256_unpackhi_epi64((a), (b))
321
+ #define PERM128( a, b, c ) _mm256_permute2f128_si256(a, b, c)
322
+ {
323
+ __m256i lanesL01, lanesL23, lanesH01, lanesH23;
324
+
325
+ lanesL01 = UNPACKL( _ba, _be );
326
+ lanesH01 = UNPACKH( _ba, _be );
327
+ lanesL23 = UNPACKL( _bi, _bo );
328
+ lanesH23 = UNPACKH( _bi, _bo );
329
+ STORE256u( output[ 0], PERM128( lanesL01, lanesL23, 0x20 ) );
330
+ STORE256u( output[32], PERM128( lanesH01, lanesH23, 0x20 ) );
331
+ STORE256u( output[64], PERM128( lanesL01, lanesL23, 0x31 ) );
332
+ STORE256u( output[96], PERM128( lanesH01, lanesH23, 0x31 ) );
333
+ }
334
+ /* TODO: check if something like this would be better:
335
+ index512 = LOAD8_32(3*laneOffset+1, 2*laneOffset+1, 1*laneOffset+1, 0*laneOffset+1, 3*laneOffset, 2*laneOffset, 1*laneOffset, 0*laneOffset);
336
+ STORE_SCATTER8_64(dataAsLanes+0, index512, stateAsLanes512[0/2]);
337
+ STORE_SCATTER8_64(dataAsLanes+2, index512, stateAsLanes512[2/2]);
338
+ */
339
+ }
340
+
341
+ #undef XOR
342
+ #undef XOReq
343
+ #undef XOR3
344
+ #undef XOR5
345
+ #undef XOR512
346
+ #undef ROL
347
+ #undef Chi
348
+ #undef CONST_64
349
+ #undef ZERO
350
+ #undef LOAD4_64
351
+ #undef XORdata16
352
+ #undef XORdata21
353
+
354
+
355
+ /* Keccak-p[1600]×8 */
356
+
357
+ #define XOR(a,b) _mm512_xor_si512(a,b)
358
+ #define XOReq(a,b) a = _mm512_xor_si512(a,b)
359
+ #define XOR3(a,b,c) _mm512_ternarylogic_epi64(a,b,c,0x96)
360
+ #define XOR5(a,b,c,d,e) XOR3(XOR3(a,b,c),d,e)
361
+ #define XOReq512(a, b) a = XOR(a,b)
362
+ #define ROL(a,offset) _mm512_rol_epi64(a,offset)
363
+ #define Chi(a,b,c) _mm512_ternarylogic_epi64(a,b,c,0xD2)
364
+ #define CONST_64(a) _mm512_set1_epi64(a)
365
+ #define ZERO() _mm512_setzero_si512()
366
+ #define LOAD(p) _mm512_loadu_si512(p)
367
+
368
+ #define LoadAndTranspose8(dataAsLanes, offset) \
369
+ t0 = LOAD((dataAsLanes) + (offset) + 0*chunkSize/8); \
370
+ t1 = LOAD((dataAsLanes) + (offset) + 1*chunkSize/8); \
371
+ t2 = LOAD((dataAsLanes) + (offset) + 2*chunkSize/8); \
372
+ t3 = LOAD((dataAsLanes) + (offset) + 3*chunkSize/8); \
373
+ t4 = LOAD((dataAsLanes) + (offset) + 4*chunkSize/8); \
374
+ t5 = LOAD((dataAsLanes) + (offset) + 5*chunkSize/8); \
375
+ t6 = LOAD((dataAsLanes) + (offset) + 6*chunkSize/8); \
376
+ t7 = LOAD((dataAsLanes) + (offset) + 7*chunkSize/8); \
377
+ r0 = _mm512_unpacklo_epi64(t0, t1); \
378
+ r1 = _mm512_unpackhi_epi64(t0, t1); \
379
+ r2 = _mm512_unpacklo_epi64(t2, t3); \
380
+ r3 = _mm512_unpackhi_epi64(t2, t3); \
381
+ r4 = _mm512_unpacklo_epi64(t4, t5); \
382
+ r5 = _mm512_unpackhi_epi64(t4, t5); \
383
+ r6 = _mm512_unpacklo_epi64(t6, t7); \
384
+ r7 = _mm512_unpackhi_epi64(t6, t7); \
385
+ t0 = _mm512_shuffle_i32x4(r0, r2, 0x88); \
386
+ t1 = _mm512_shuffle_i32x4(r1, r3, 0x88); \
387
+ t2 = _mm512_shuffle_i32x4(r0, r2, 0xdd); \
388
+ t3 = _mm512_shuffle_i32x4(r1, r3, 0xdd); \
389
+ t4 = _mm512_shuffle_i32x4(r4, r6, 0x88); \
390
+ t5 = _mm512_shuffle_i32x4(r5, r7, 0x88); \
391
+ t6 = _mm512_shuffle_i32x4(r4, r6, 0xdd); \
392
+ t7 = _mm512_shuffle_i32x4(r5, r7, 0xdd); \
393
+ r0 = _mm512_shuffle_i32x4(t0, t4, 0x88); \
394
+ r1 = _mm512_shuffle_i32x4(t1, t5, 0x88); \
395
+ r2 = _mm512_shuffle_i32x4(t2, t6, 0x88); \
396
+ r3 = _mm512_shuffle_i32x4(t3, t7, 0x88); \
397
+ r4 = _mm512_shuffle_i32x4(t0, t4, 0xdd); \
398
+ r5 = _mm512_shuffle_i32x4(t1, t5, 0xdd); \
399
+ r6 = _mm512_shuffle_i32x4(t2, t6, 0xdd); \
400
+ r7 = _mm512_shuffle_i32x4(t3, t7, 0xdd); \
401
+
402
+ #define XORdata16(X, index, dataAsLanes) \
403
+ LoadAndTranspose8(dataAsLanes, 0) \
404
+ XOReq(X##ba, r0); \
405
+ XOReq(X##be, r1); \
406
+ XOReq(X##bi, r2); \
407
+ XOReq(X##bo, r3); \
408
+ XOReq(X##bu, r4); \
409
+ XOReq(X##ga, r5); \
410
+ XOReq(X##ge, r6); \
411
+ XOReq(X##gi, r7); \
412
+ LoadAndTranspose8(dataAsLanes, 8) \
413
+ XOReq(X##go, r0); \
414
+ XOReq(X##gu, r1); \
415
+ XOReq(X##ka, r2); \
416
+ XOReq(X##ke, r3); \
417
+ XOReq(X##ki, r4); \
418
+ XOReq(X##ko, r5); \
419
+ XOReq(X##ku, r6); \
420
+ XOReq(X##ma, r7); \
421
+
422
+ #define XORdata21(X, index, dataAsLanes) \
423
+ XORdata16(X, index, dataAsLanes) \
424
+ XOReq(X##me, LOAD_GATHER8_64(index, (dataAsLanes) + 16)); \
425
+ XOReq(X##mi, LOAD_GATHER8_64(index, (dataAsLanes) + 17)); \
426
+ XOReq(X##mo, LOAD_GATHER8_64(index, (dataAsLanes) + 18)); \
427
+ XOReq(X##mu, LOAD_GATHER8_64(index, (dataAsLanes) + 19)); \
428
+ XOReq(X##sa, LOAD_GATHER8_64(index, (dataAsLanes) + 20)); \
429
+
430
+ void KangarooTwelve_AVX512_Process8Leaves(const unsigned char *input, unsigned char *output)
431
+ {
432
+ KeccakP_DeclareVars(__m512i);
433
+ unsigned int j;
434
+ const uint64_t *outputAsLanes = (const uint64_t *)output;
435
+ __m256i index;
436
+ __m512i t0, t1, t2, t3, t4, t5, t6, t7;
437
+ __m512i r0, r1, r2, r3, r4, r5, r6, r7;
438
+
439
+ initializeState(_);
440
+
441
+ index = LOAD8_32(7*(chunkSize / 8), 6*(chunkSize / 8), 5*(chunkSize / 8), 4*(chunkSize / 8), 3*(chunkSize / 8), 2*(chunkSize / 8), 1*(chunkSize / 8), 0*(chunkSize / 8));
442
+ for(j = 0; j < (chunkSize - rateInBytes); j += rateInBytes) {
443
+ XORdata21(_, index, (const uint64_t *)input);
444
+ rounds12
445
+ input += rateInBytes;
446
+ }
447
+
448
+ XORdata16(_, index, (const uint64_t *)input);
449
+ XOReq(_me, CONST_64(0x0BULL));
450
+ XOReq(_sa, CONST_64(0x8000000000000000ULL));
451
+ rounds12
452
+
453
+ index = LOAD8_32(7*4, 6*4, 5*4, 4*4, 3*4, 2*4, 1*4, 0*4);
454
+ STORE_SCATTER8_64(outputAsLanes+0, index, _ba);
455
+ STORE_SCATTER8_64(outputAsLanes+1, index, _be);
456
+ STORE_SCATTER8_64(outputAsLanes+2, index, _bi);
457
+ STORE_SCATTER8_64(outputAsLanes+3, index, _bo);
458
+ }