sleeping_kangaroo12 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (291) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +34 -67
  3. data/ext/Rakefile +12 -37
  4. data/ext/binding/sleeping_kangaroo12.c +1 -16
  5. data/ext/{xkcp → k12}/Makefile +0 -0
  6. data/ext/k12/Makefile.build +118 -0
  7. data/ext/k12/README.markdown +86 -0
  8. data/ext/k12/lib/ARMv8Asha3/KeccakP-1600-ARMv8Asha3.S +623 -0
  9. data/ext/k12/lib/ARMv8Asha3/KeccakP-1600-SnP.h +65 -0
  10. data/ext/k12/lib/ARMv8Asha3/KeccakP-1600-opt64.c +227 -0
  11. data/ext/{xkcp/lib/low/KeccakP-1600/compact → k12/lib/Inplace32BI}/KeccakP-1600-SnP.h +4 -9
  12. data/ext/{xkcp/lib/low/KeccakP-1600/plain-32bits-inplace → k12/lib/Inplace32BI}/KeccakP-1600-inplace32BI.c +65 -160
  13. data/ext/k12/lib/KangarooTwelve.c +332 -0
  14. data/ext/{xkcp/lib/high/KangarooTwelve → k12/lib}/KangarooTwelve.h +53 -16
  15. data/ext/{xkcp/lib/low/KeccakP-1600/AVX2 → k12/lib/Optimized64}/KeccakP-1600-AVX2.s +122 -558
  16. data/ext/k12/lib/Optimized64/KeccakP-1600-AVX512-plainC.c +241 -0
  17. data/ext/k12/lib/Optimized64/KeccakP-1600-AVX512.s +551 -0
  18. data/ext/k12/lib/Optimized64/KeccakP-1600-SnP.h +74 -0
  19. data/ext/{xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-64.macros → k12/lib/Optimized64/KeccakP-1600-opt64.c} +447 -169
  20. data/ext/k12/lib/Optimized64/KeccakP-1600-runtimeDispatch.c +406 -0
  21. data/ext/k12/lib/Optimized64/KeccakP-1600-timesN-AVX2.c +419 -0
  22. data/ext/k12/lib/Optimized64/KeccakP-1600-timesN-AVX512.c +458 -0
  23. data/ext/k12/lib/Optimized64/KeccakP-1600-timesN-SSSE3.c +438 -0
  24. data/ext/{xkcp/lib/low/KeccakP-1600/plain-64bits → k12/lib/Plain64}/KeccakP-1600-SnP.h +14 -20
  25. data/ext/{xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.h → k12/lib/Plain64/KeccakP-1600-plain64.c} +9 -8
  26. data/ext/{xkcp/lib/common → k12/lib}/align.h +3 -2
  27. data/ext/{xkcp/lib/common → k12/lib}/brg_endian.h +0 -0
  28. data/ext/{xkcp → k12}/support/Build/ExpandProducts.xsl +0 -0
  29. data/ext/{xkcp → k12}/support/Build/ToGlobalMakefile.xsl +0 -0
  30. data/ext/{xkcp → k12}/support/Build/ToOneTarget.xsl +0 -0
  31. data/ext/{xkcp → k12}/support/Build/ToTargetConfigFile.xsl +0 -0
  32. data/ext/{xkcp → k12}/support/Build/ToTargetMakefile.xsl +10 -16
  33. data/ext/{xkcp → k12}/support/Build/ToVCXProj.xsl +0 -0
  34. data/lib/sleeping_kangaroo12/version.rb +1 -1
  35. metadata +33 -276
  36. data/ext/config/xkcp.build +0 -17
  37. data/ext/xkcp/LICENSE +0 -1
  38. data/ext/xkcp/Makefile.build +0 -200
  39. data/ext/xkcp/README.markdown +0 -296
  40. data/ext/xkcp/lib/HighLevel.build +0 -143
  41. data/ext/xkcp/lib/LowLevel.build +0 -757
  42. data/ext/xkcp/lib/high/KangarooTwelve/KangarooTwelve.c +0 -301
  43. data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.c +0 -81
  44. data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.h +0 -125
  45. data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.c +0 -48
  46. data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.h +0 -79
  47. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.c +0 -81
  48. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.h +0 -73
  49. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.inc +0 -195
  50. data/ext/xkcp/lib/high/Keccak/KeccakSponge.c +0 -111
  51. data/ext/xkcp/lib/high/Keccak/KeccakSponge.h +0 -76
  52. data/ext/xkcp/lib/high/Keccak/KeccakSponge.inc +0 -314
  53. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.c +0 -61
  54. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.h +0 -67
  55. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.inc +0 -128
  56. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.c +0 -93
  57. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.h +0 -599
  58. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.inc +0 -573
  59. data/ext/xkcp/lib/high/Ketje/Ketjev2.c +0 -87
  60. data/ext/xkcp/lib/high/Ketje/Ketjev2.h +0 -88
  61. data/ext/xkcp/lib/high/Ketje/Ketjev2.inc +0 -274
  62. data/ext/xkcp/lib/high/Keyak/Keyakv2.c +0 -132
  63. data/ext/xkcp/lib/high/Keyak/Keyakv2.h +0 -217
  64. data/ext/xkcp/lib/high/Keyak/Keyakv2.inc +0 -81
  65. data/ext/xkcp/lib/high/Keyak/Motorist.inc +0 -953
  66. data/ext/xkcp/lib/high/Kravatte/Kravatte.c +0 -533
  67. data/ext/xkcp/lib/high/Kravatte/Kravatte.h +0 -115
  68. data/ext/xkcp/lib/high/Kravatte/KravatteModes.c +0 -557
  69. data/ext/xkcp/lib/high/Kravatte/KravatteModes.h +0 -247
  70. data/ext/xkcp/lib/high/Xoodyak/Cyclist.h +0 -66
  71. data/ext/xkcp/lib/high/Xoodyak/Cyclist.inc +0 -336
  72. data/ext/xkcp/lib/high/Xoodyak/Xoodyak-parameters.h +0 -26
  73. data/ext/xkcp/lib/high/Xoodyak/Xoodyak.c +0 -55
  74. data/ext/xkcp/lib/high/Xoodyak/Xoodyak.h +0 -35
  75. data/ext/xkcp/lib/high/Xoofff/Xoofff.c +0 -634
  76. data/ext/xkcp/lib/high/Xoofff/Xoofff.h +0 -147
  77. data/ext/xkcp/lib/high/Xoofff/XoofffModes.c +0 -483
  78. data/ext/xkcp/lib/high/Xoofff/XoofffModes.h +0 -241
  79. data/ext/xkcp/lib/high/common/Phases.h +0 -25
  80. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-SnP.h +0 -41
  81. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-armcc.s +0 -1666
  82. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-gcc.s +0 -1655
  83. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-armcc.s +0 -1268
  84. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-gcc.s +0 -1264
  85. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-armcc.s +0 -1178
  86. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-gcc.s +0 -1175
  87. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-armcc.s +0 -1338
  88. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-gcc.s +0 -1336
  89. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-armcc.s +0 -1343
  90. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-gcc.s +0 -1339
  91. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-SnP.h +0 -42
  92. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-armcc.s +0 -823
  93. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-gcc.s +0 -831
  94. data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-SnP.h +0 -31
  95. data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-armv8a-neon.s +0 -540
  96. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-SnP.h +0 -42
  97. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-compact.s +0 -733
  98. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-fast.s +0 -1121
  99. data/ext/xkcp/lib/low/KeccakP-1600/AVX2/KeccakP-1600-SnP.h +0 -52
  100. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-AVX512.c +0 -623
  101. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-SnP.h +0 -47
  102. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u12/KeccakP-1600-AVX512-config.h +0 -6
  103. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u6/KeccakP-1600-AVX512-config.h +0 -6
  104. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/ua/KeccakP-1600-AVX512-config.h +0 -6
  105. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-AVX512.s +0 -1031
  106. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-SnP.h +0 -53
  107. data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-SnP.h +0 -44
  108. data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-XOP.c +0 -476
  109. data/ext/xkcp/lib/low/KeccakP-1600/XOP/u6/KeccakP-1600-XOP-config.h +0 -6
  110. data/ext/xkcp/lib/low/KeccakP-1600/XOP/ua/KeccakP-1600-XOP-config.h +0 -6
  111. data/ext/xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-unrolling.macros +0 -305
  112. data/ext/xkcp/lib/low/KeccakP-1600/compact/KeccakP-1600-compact64.c +0 -420
  113. data/ext/xkcp/lib/low/KeccakP-1600/plain-32bits-inplace/KeccakP-1600-SnP.h +0 -43
  114. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/KeccakP-1600-opt64.c +0 -565
  115. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcu6/KeccakP-1600-opt64-config.h +0 -7
  116. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua/KeccakP-1600-opt64-config.h +0 -7
  117. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua-shld/KeccakP-1600-opt64-config.h +0 -8
  118. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/u6/KeccakP-1600-opt64-config.h +0 -6
  119. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/ua/KeccakP-1600-opt64-config.h +0 -6
  120. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-SnP.h +0 -44
  121. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference.h +0 -23
  122. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference32BI.c +0 -625
  123. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-SnP.h +0 -44
  124. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.c +0 -440
  125. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-SnP.h +0 -42
  126. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas.s +0 -1196
  127. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas_Apple.s +0 -1124
  128. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-shld-gas.s +0 -1196
  129. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-armcc.s +0 -1392
  130. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-gcc.s +0 -1394
  131. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-times2-SnP.h +0 -42
  132. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u12/SIMD512-2-config.h +0 -7
  133. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u4/SIMD512-2-config.h +0 -7
  134. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512ufull/SIMD512-2-config.h +0 -7
  135. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SIMD512.c +0 -850
  136. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SnP.h +0 -51
  137. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SIMD128.c +0 -957
  138. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SnP.h +0 -49
  139. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-u2/SIMD128-config.h +0 -8
  140. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-ua/SIMD128-config.h +0 -8
  141. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-u2/SIMD128-config.h +0 -9
  142. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-ua/SIMD128-config.h +0 -9
  143. data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-SnP.h +0 -45
  144. data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-on1.c +0 -37
  145. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SIMD256.c +0 -1321
  146. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SnP.h +0 -55
  147. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u12/SIMD256-config.h +0 -7
  148. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u6/SIMD256-config.h +0 -7
  149. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/ua/SIMD256-config.h +0 -7
  150. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u12/SIMD512-4-config.h +0 -7
  151. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u4/SIMD512-4-config.h +0 -7
  152. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512ufull/SIMD512-4-config.h +0 -7
  153. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SIMD512.c +0 -881
  154. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SnP.h +0 -51
  155. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-SnP.h +0 -45
  156. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-on1.c +0 -37
  157. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-SnP.h +0 -45
  158. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-on2.c +0 -38
  159. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SIMD512.c +0 -1615
  160. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SnP.h +0 -57
  161. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u12/SIMD512-config.h +0 -7
  162. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u4/SIMD512-config.h +0 -7
  163. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/ua/SIMD512-config.h +0 -7
  164. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-SnP.h +0 -45
  165. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-on1.c +0 -37
  166. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-SnP.h +0 -45
  167. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-on2.c +0 -38
  168. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-SnP.h +0 -45
  169. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-on4.c +0 -38
  170. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-SnP.h +0 -41
  171. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-armcc.s +0 -442
  172. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-gcc.s +0 -446
  173. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-armcc.s +0 -419
  174. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-gcc.s +0 -427
  175. data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-SnP.h +0 -41
  176. data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-avr8-fast.s +0 -647
  177. data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-SnP.h +0 -39
  178. data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-compact.c +0 -190
  179. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-SnP.h +0 -43
  180. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.c +0 -412
  181. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.h +0 -23
  182. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-SnP.h +0 -41
  183. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-armcc.s +0 -454
  184. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-gcc.s +0 -458
  185. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-armcc.s +0 -455
  186. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-gcc.s +0 -458
  187. data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-SnP.h +0 -41
  188. data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-avr8-fast.s +0 -728
  189. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-SnP.h +0 -43
  190. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.c +0 -414
  191. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.h +0 -23
  192. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-SnP.h +0 -42
  193. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-armcc.s +0 -527
  194. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-gcc.s +0 -533
  195. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-armcc.s +0 -528
  196. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-gcc.s +0 -534
  197. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-armcc.s +0 -521
  198. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-gcc.s +0 -527
  199. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-armcc.s +0 -517
  200. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-gcc.s +0 -523
  201. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-armcc.s +0 -550
  202. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-gcc.s +0 -556
  203. data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-SnP.h +0 -32
  204. data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-armv8a-neon.s +0 -432
  205. data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-SnP.h +0 -42
  206. data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-avr8-fast.s +0 -929
  207. data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-SnP.h +0 -40
  208. data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-compact.c +0 -244
  209. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-SnP.h +0 -46
  210. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32-bis.macros +0 -184
  211. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.c +0 -454
  212. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.macros +0 -459
  213. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling-bis.macros +0 -83
  214. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling.macros +0 -88
  215. data/ext/xkcp/lib/low/KeccakP-800/plain/lcu2/KeccakP-800-opt32-config.h +0 -7
  216. data/ext/xkcp/lib/low/KeccakP-800/plain/lcua/KeccakP-800-opt32-config.h +0 -7
  217. data/ext/xkcp/lib/low/KeccakP-800/plain/u2/KeccakP-800-opt32-config.h +0 -7
  218. data/ext/xkcp/lib/low/KeccakP-800/plain/ua/KeccakP-800-opt32-config.h +0 -7
  219. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-SnP.h +0 -44
  220. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.c +0 -437
  221. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.h +0 -23
  222. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/Ket.h +0 -57
  223. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-armcc.s +0 -475
  224. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-gcc.s +0 -480
  225. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-armcc.s +0 -590
  226. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-gcc.s +0 -590
  227. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.c +0 -126
  228. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.h +0 -68
  229. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.inc +0 -174
  230. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.c +0 -80
  231. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.h +0 -68
  232. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.inc +0 -142
  233. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-SnP.h +0 -55
  234. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-armcc.s +0 -1086
  235. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-gcc.s +0 -1092
  236. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-armcc.s +0 -721
  237. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-gcc.s +0 -726
  238. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-armcc.s +0 -723
  239. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-gcc.s +0 -729
  240. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-armcc.s +0 -1164
  241. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-gcc.s +0 -1165
  242. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-armcc.s +0 -562
  243. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-gcc.s +0 -563
  244. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-armcc.s +0 -563
  245. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-gcc.s +0 -565
  246. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-SnP.h +0 -55
  247. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-armcc.s +0 -476
  248. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-gcc.s +0 -485
  249. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-armcc.s +0 -362
  250. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-gcc.s +0 -367
  251. data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-SnP.h +0 -43
  252. data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-avr8-u1.s +0 -1341
  253. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SIMD512.c +0 -581
  254. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SnP.h +0 -58
  255. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodyak-full-block-SIMD512.c +0 -332
  256. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SIMD128.c +0 -329
  257. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SnP.h +0 -53
  258. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodyak-full-block-SIMD128.c +0 -355
  259. data/ext/xkcp/lib/low/Xoodoo/Xoodoo.h +0 -79
  260. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-SnP.h +0 -56
  261. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-optimized.c +0 -399
  262. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodyak-full-blocks.c +0 -127
  263. data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-SnP.h +0 -43
  264. data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-reference.c +0 -253
  265. data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SIMD512.c +0 -1044
  266. data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SnP.h +0 -49
  267. data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-SnP.h +0 -45
  268. data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-on1.c +0 -37
  269. data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-ARMv7A.s +0 -1587
  270. data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-SnP.h +0 -48
  271. data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SIMD512.c +0 -1202
  272. data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SnP.h +0 -48
  273. data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SIMD128.c +0 -484
  274. data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SnP.h +0 -44
  275. data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-SnP.h +0 -45
  276. data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-on1.c +0 -37
  277. data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SIMD256.c +0 -939
  278. data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SnP.h +0 -49
  279. data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SIMD512.c +0 -1216
  280. data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SnP.h +0 -48
  281. data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-SnP.h +0 -45
  282. data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-on1.c +0 -37
  283. data/ext/xkcp/lib/low/common/PlSnP-Fallback.inc +0 -290
  284. data/ext/xkcp/lib/low/common/SnP-Relaned.h +0 -141
  285. data/ext/xkcp/support/Kernel-PMU/Kernel-pmu.md +0 -133
  286. data/ext/xkcp/support/Kernel-PMU/Makefile +0 -8
  287. data/ext/xkcp/support/Kernel-PMU/enable_arm_pmu.c +0 -129
  288. data/ext/xkcp/support/Kernel-PMU/load-module +0 -1
  289. data/ext/xkcp/util/KeccakSum/KeccakSum.c +0 -394
  290. data/ext/xkcp/util/KeccakSum/base64.c +0 -86
  291. data/ext/xkcp/util/KeccakSum/base64.h +0 -12
@@ -0,0 +1,551 @@
1
+ # Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org>
2
+ # Copyright (c) 2018 Ronny Van Keer
3
+ # All rights reserved.
4
+ #
5
+ # The source code in this file is licensed under the CRYPTOGAMS license.
6
+ # For further details see http://www.openssl.org/~appro/cryptogams/.
7
+ #
8
+ # Notes:
9
+ # The code for the permutation (__KeccakF1600) was generated with
10
+ # Andy Polyakov's keccak1600-avx512.pl from the CRYPTOGAMS project
11
+ # (https://github.com/dot-asm/cryptogams/blob/master/x86_64/keccak1600-avx512.pl).
12
+ # The rest of the code was written by Ronny Van Keer.
13
+ # Adaptations for macOS by Stéphane Léon.
14
+
15
+ .text
16
+
17
+ # -----------------------------------------------------------------------------
18
+ #
19
+ # void KeccakP1600_AVX512_Initialize(void *state);
20
+ #
21
+ .ifdef macOS
22
+ .globl _KeccakP1600_AVX512_Initialize
23
+ _KeccakP1600_AVX512_Initialize:
24
+ .else
25
+ .globl KeccakP1600_AVX512_Initialize
26
+ .type KeccakP1600_AVX512_Initialize,@function
27
+ KeccakP1600_AVX512_Initialize:
28
+ .endif
29
+ .balign 32
30
+ vpxorq %zmm0,%zmm0,%zmm0
31
+ vmovdqu64 %zmm0,0*64(%rdi)
32
+ vmovdqu64 %zmm0,1*64(%rdi)
33
+ vmovdqu64 %zmm0,2*64(%rdi)
34
+ movq $0,3*64(%rdi)
35
+ ret
36
+ .ifdef macOS
37
+ .else
38
+ .size KeccakP1600_AVX512_Initialize,.-KeccakP1600_AVX512_Initialize
39
+ .endif
40
+
41
+ # -----------------------------------------------------------------------------
42
+ #
43
+ # void KeccakP1600_AVX512_AddByte(void *state, unsigned char data, unsigned int offset);
44
+ # %rdi %rsi %rdx
45
+ #!!
46
+ #.globl KeccakP1600_AVX512_AddByte
47
+ #.type KeccakP1600_AVX512_AddByte,@function
48
+ #.balign 32
49
+ #KeccakP1600_AVX512_AddByte:
50
+ # mov %rdx, %rax
51
+ # and $7, %rax
52
+ # and $0xFFFFFFF8, %edx
53
+ # mov mapState(%rdx), %rdx
54
+ # add %rdx, %rdi
55
+ # add %rax, %rdi
56
+ # xorb %sil, (%rdi)
57
+ # ret
58
+ #.size KeccakP1600_AVX512_AddByte,.-KeccakP1600_AVX512_AddByte
59
+
60
+ # -----------------------------------------------------------------------------
61
+ #
62
+ # void KeccakP1600_AVX512_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
63
+ # %rdi %rsi %rdx %rcx
64
+ #
65
+ .ifdef macOS
66
+ .globl _KeccakP1600_AVX512_AddBytes
67
+ _KeccakP1600_AVX512_AddBytes:
68
+ .else
69
+ .globl KeccakP1600_AVX512_AddBytes
70
+ .type KeccakP1600_AVX512_AddBytes,@function
71
+ KeccakP1600_AVX512_AddBytes:
72
+ .endif
73
+ .balign 32
74
+ cmp $0, %rcx
75
+ jz KeccakP1600_AVX512_AddBytes_Exit
76
+ add %rdx, %rdi # state += offset
77
+ and $7, %rdx
78
+ jz KeccakP1600_AVX512_AddBytes_LaneAlignedCheck
79
+ mov $8, %r9 # r9 is (max) length of incomplete lane
80
+ sub %rdx, %r9
81
+ cmp %rcx, %r9
82
+ cmovae %rcx, %r9
83
+ sub %r9, %rcx # length -= length of incomplete lane
84
+ KeccakP1600_AVX512_AddBytes_NotAlignedLoop:
85
+ mov (%rsi), %r8b
86
+ inc %rsi
87
+ xorb %r8b, (%rdi)
88
+ inc %rdi
89
+ dec %r9
90
+ jnz KeccakP1600_AVX512_AddBytes_NotAlignedLoop
91
+ jmp KeccakP1600_AVX512_AddBytes_LaneAlignedCheck
92
+ KeccakP1600_AVX512_AddBytes_LaneAlignedLoop:
93
+ mov (%rsi), %r8
94
+ add $8, %rsi
95
+ xor %r8, (%rdi)
96
+ add $8, %rdi
97
+ KeccakP1600_AVX512_AddBytes_LaneAlignedCheck:
98
+ sub $8, %rcx
99
+ jnc KeccakP1600_AVX512_AddBytes_LaneAlignedLoop
100
+ KeccakP1600_AVX512_AddBytes_LastIncompleteLane:
101
+ add $8, %rcx
102
+ jz KeccakP1600_AVX512_AddBytes_Exit
103
+ KeccakP1600_AVX512_AddBytes_LastIncompleteLaneLoop:
104
+ mov (%rsi), %r8b
105
+ inc %rsi
106
+ xor %r8b, (%rdi)
107
+ inc %rdi
108
+ dec %rcx
109
+ jnz KeccakP1600_AVX512_AddBytes_LastIncompleteLaneLoop
110
+ KeccakP1600_AVX512_AddBytes_Exit:
111
+ ret
112
+ .ifdef macOS
113
+ .else
114
+ .size KeccakP1600_AVX512_AddBytes,.-KeccakP1600_AVX512_AddBytes
115
+ .endif
116
+
117
+ # -----------------------------------------------------------------------------
118
+ #
119
+ # void KeccakP1600_AVX512_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length);
120
+ # %rdi %rsi %rdx %rcx
121
+ #
122
+ .ifdef macOS
123
+ .globl _KeccakP1600_AVX512_ExtractBytes
124
+ _KeccakP1600_AVX512_ExtractBytes:
125
+ .else
126
+ .globl KeccakP1600_AVX512_ExtractBytes
127
+ .type KeccakP1600_AVX512_ExtractBytes,@function
128
+ KeccakP1600_AVX512_ExtractBytes:
129
+ .endif
130
+ .balign 32
131
+ cmp $0, %rcx
132
+ jz KeccakP1600_AVX512_ExtractBytes_Exit
133
+ add %rdx, %rdi # state += offset
134
+ and $7, %rdx
135
+ jz KeccakP1600_AVX512_ExtractBytes_LaneAlignedCheck
136
+ mov $8, %rax # rax is (max) length of incomplete lane
137
+ sub %rdx, %rax
138
+ cmp %rcx, %rax
139
+ cmovae %rcx, %rax
140
+ sub %rax, %rcx # length -= length of incomplete lane
141
+ KeccakP1600_AVX512_ExtractBytes_NotAlignedLoop:
142
+ mov (%rdi), %r8b
143
+ inc %rdi
144
+ mov %r8b, (%rsi)
145
+ inc %rsi
146
+ dec %rax
147
+ jnz KeccakP1600_AVX512_ExtractBytes_NotAlignedLoop
148
+ jmp KeccakP1600_AVX512_ExtractBytes_LaneAlignedCheck
149
+ KeccakP1600_AVX512_ExtractBytes_LaneAlignedLoop:
150
+ mov (%rdi), %r8
151
+ add $8, %rdi
152
+ mov %r8, (%rsi)
153
+ add $8, %rsi
154
+ KeccakP1600_AVX512_ExtractBytes_LaneAlignedCheck:
155
+ sub $8, %rcx
156
+ jnc KeccakP1600_AVX512_ExtractBytes_LaneAlignedLoop
157
+ KeccakP1600_AVX512_ExtractBytes_LastIncompleteLane:
158
+ add $8, %rcx
159
+ jz KeccakP1600_AVX512_ExtractBytes_Exit
160
+ mov (%rdi), %r8
161
+ KeccakP1600_AVX512_ExtractBytes_LastIncompleteLaneLoop:
162
+ mov %r8b, (%rsi)
163
+ shr $8, %r8
164
+ inc %rsi
165
+ dec %rcx
166
+ jnz KeccakP1600_AVX512_ExtractBytes_LastIncompleteLaneLoop
167
+ KeccakP1600_AVX512_ExtractBytes_Exit:
168
+ ret
169
+ .ifdef macOS
170
+ .else
171
+ .size KeccakP1600_AVX512_ExtractBytes,.-KeccakP1600_AVX512_ExtractBytes
172
+ .endif
173
+
174
+ # -----------------------------------------------------------------------------
175
+ #
176
+ # internal
177
+ #
178
+ .text
179
+ .ifdef macOS
180
+ .else
181
+ .type __KeccakF1600,@function
182
+ .endif
183
+ .balign 32
184
+ __KeccakF1600:
185
+ .Loop_avx512:
186
+ ######################################### Theta, even round
187
+ vmovdqa64 %zmm0,%zmm5 # put aside original A00
188
+ vpternlogq $0x96,%zmm2,%zmm1,%zmm0 # and use it as "C00"
189
+ vpternlogq $0x96,%zmm4,%zmm3,%zmm0
190
+ vprolq $1,%zmm0,%zmm6
191
+ vpermq %zmm0,%zmm13,%zmm0
192
+ vpermq %zmm6,%zmm16,%zmm6
193
+ vpternlogq $0x96,%zmm0,%zmm6,%zmm5 # T[0] is original A00
194
+ vpternlogq $0x96,%zmm0,%zmm6,%zmm1
195
+ vpternlogq $0x96,%zmm0,%zmm6,%zmm2
196
+ vpternlogq $0x96,%zmm0,%zmm6,%zmm3
197
+ vpternlogq $0x96,%zmm0,%zmm6,%zmm4
198
+ ######################################### Rho
199
+ vprolvq %zmm22,%zmm5,%zmm0 # T[0] is original A00
200
+ vprolvq %zmm23,%zmm1,%zmm1
201
+ vprolvq %zmm24,%zmm2,%zmm2
202
+ vprolvq %zmm25,%zmm3,%zmm3
203
+ vprolvq %zmm26,%zmm4,%zmm4
204
+ ######################################### Pi
205
+ vpermq %zmm0,%zmm17,%zmm0
206
+ vpermq %zmm1,%zmm18,%zmm1
207
+ vpermq %zmm2,%zmm19,%zmm2
208
+ vpermq %zmm3,%zmm20,%zmm3
209
+ vpermq %zmm4,%zmm21,%zmm4
210
+ ######################################### Chi
211
+ vmovdqa64 %zmm0,%zmm5
212
+ vmovdqa64 %zmm1,%zmm6
213
+ vpternlogq $0xD2,%zmm2,%zmm1,%zmm0
214
+ vpternlogq $0xD2,%zmm3,%zmm2,%zmm1
215
+ vpternlogq $0xD2,%zmm4,%zmm3,%zmm2
216
+ vpternlogq $0xD2,%zmm5,%zmm4,%zmm3
217
+ vpternlogq $0xD2,%zmm6,%zmm5,%zmm4
218
+ ######################################### Iota
219
+ vpxorq (%r10),%zmm0,%zmm0{%k1}
220
+ lea 16(%r10),%r10
221
+ ######################################### Harmonize rounds
222
+ vpblendmq %zmm2,%zmm1,%zmm6{%k2}
223
+ vpblendmq %zmm3,%zmm2,%zmm7{%k2}
224
+ vpblendmq %zmm4,%zmm3,%zmm8{%k2}
225
+ vpblendmq %zmm1,%zmm0,%zmm5{%k2}
226
+ vpblendmq %zmm0,%zmm4,%zmm9{%k2}
227
+ vpblendmq %zmm3,%zmm6,%zmm6{%k3}
228
+ vpblendmq %zmm4,%zmm7,%zmm7{%k3}
229
+ vpblendmq %zmm2,%zmm5,%zmm5{%k3}
230
+ vpblendmq %zmm0,%zmm8,%zmm8{%k3}
231
+ vpblendmq %zmm1,%zmm9,%zmm9{%k3}
232
+ vpblendmq %zmm4,%zmm6,%zmm6{%k4}
233
+ vpblendmq %zmm3,%zmm5,%zmm5{%k4}
234
+ vpblendmq %zmm0,%zmm7,%zmm7{%k4}
235
+ vpblendmq %zmm1,%zmm8,%zmm8{%k4}
236
+ vpblendmq %zmm2,%zmm9,%zmm9{%k4}
237
+ vpblendmq %zmm4,%zmm5,%zmm5{%k5}
238
+ vpblendmq %zmm0,%zmm6,%zmm6{%k5}
239
+ vpblendmq %zmm1,%zmm7,%zmm7{%k5}
240
+ vpblendmq %zmm2,%zmm8,%zmm8{%k5}
241
+ vpblendmq %zmm3,%zmm9,%zmm9{%k5}
242
+ #vpermq %zmm5,%zmm33,%zmm0 # doesn't actually change order
243
+ vpermq %zmm6,%zmm13,%zmm1
244
+ vpermq %zmm7,%zmm14,%zmm2
245
+ vpermq %zmm8,%zmm15,%zmm3
246
+ vpermq %zmm9,%zmm16,%zmm4
247
+ ######################################### Theta, odd round
248
+ vmovdqa64 %zmm5,%zmm0 # real A00
249
+ vpternlogq $0x96,%zmm2,%zmm1,%zmm5 # C00 is %zmm5's alias
250
+ vpternlogq $0x96,%zmm4,%zmm3,%zmm5
251
+ vprolq $1,%zmm5,%zmm6
252
+ vpermq %zmm5,%zmm13,%zmm5
253
+ vpermq %zmm6,%zmm16,%zmm6
254
+ vpternlogq $0x96,%zmm5,%zmm6,%zmm0
255
+ vpternlogq $0x96,%zmm5,%zmm6,%zmm3
256
+ vpternlogq $0x96,%zmm5,%zmm6,%zmm1
257
+ vpternlogq $0x96,%zmm5,%zmm6,%zmm4
258
+ vpternlogq $0x96,%zmm5,%zmm6,%zmm2
259
+ ######################################### Rho
260
+ vprolvq %zmm27,%zmm0,%zmm0
261
+ vprolvq %zmm30,%zmm3,%zmm6
262
+ vprolvq %zmm28,%zmm1,%zmm7
263
+ vprolvq %zmm31,%zmm4,%zmm8
264
+ vprolvq %zmm29,%zmm2,%zmm9
265
+ vpermq %zmm0,%zmm16,%zmm10
266
+ vpermq %zmm0,%zmm15,%zmm11
267
+ ######################################### Iota
268
+ vpxorq -8(%r10),%zmm0,%zmm0{%k1}
269
+ ######################################### Pi
270
+ vpermq %zmm6,%zmm14,%zmm1
271
+ vpermq %zmm7,%zmm16,%zmm2
272
+ vpermq %zmm8,%zmm13,%zmm3
273
+ vpermq %zmm9,%zmm15,%zmm4
274
+ ######################################### Chi
275
+ vpternlogq $0xD2,%zmm11,%zmm10,%zmm0
276
+ vpermq %zmm6,%zmm13,%zmm12
277
+ #vpermq %zmm6,%zmm33,%zmm6
278
+ vpternlogq $0xD2,%zmm6,%zmm12,%zmm1
279
+ vpermq %zmm7,%zmm15,%zmm5
280
+ vpermq %zmm7,%zmm14,%zmm7
281
+ vpternlogq $0xD2,%zmm7,%zmm5,%zmm2
282
+ #vpermq %zmm8,%zmm33,%zmm8
283
+ vpermq %zmm8,%zmm16,%zmm6
284
+ vpternlogq $0xD2,%zmm6,%zmm8,%zmm3
285
+ vpermq %zmm9,%zmm14,%zmm5
286
+ vpermq %zmm9,%zmm13,%zmm9
287
+ vpternlogq $0xD2,%zmm9,%zmm5,%zmm4
288
+ dec %eax
289
+ jnz .Loop_avx512
290
+ ret
291
+ .ifdef macOS
292
+ .else
293
+ .size __KeccakF1600,.-__KeccakF1600
294
+ .endif
295
+
296
+ # -----------------------------------------------------------------------------
297
+ #
298
+ # void KeccakP1600_AVX512_Permute_12rounds(void *state);
299
+ # %rdi
300
+ #
301
+ .ifdef macOS
302
+ .globl _KeccakP1600_AVX512_Permute_12rounds
303
+ _KeccakP1600_AVX512_Permute_12rounds:
304
+ .else
305
+ .globl KeccakP1600_AVX512_Permute_12rounds
306
+ .type KeccakP1600_AVX512_Permute_12rounds,@function
307
+ KeccakP1600_AVX512_Permute_12rounds:
308
+ .endif
309
+ .balign 32
310
+ lea 96(%rdi),%rdi
311
+ lea theta_perm(%rip),%r8
312
+ kxnorw %k6,%k6,%k6
313
+ kshiftrw $15,%k6,%k1
314
+ kshiftrw $11,%k6,%k6
315
+ kshiftlw $1,%k1,%k2
316
+ kshiftlw $2,%k1,%k3
317
+ kshiftlw $3,%k1,%k4
318
+ kshiftlw $4,%k1,%k5
319
+ #vmovdqa64 64*0(%r8),%zmm33
320
+ vmovdqa64 64*1(%r8),%zmm13
321
+ vmovdqa64 64*2(%r8),%zmm14
322
+ vmovdqa64 64*3(%r8),%zmm15
323
+ vmovdqa64 64*4(%r8),%zmm16
324
+ vmovdqa64 64*5(%r8),%zmm27
325
+ vmovdqa64 64*6(%r8),%zmm28
326
+ vmovdqa64 64*7(%r8),%zmm29
327
+ vmovdqa64 64*8(%r8),%zmm30
328
+ vmovdqa64 64*9(%r8),%zmm31
329
+ vmovdqa64 64*10(%r8),%zmm22
330
+ vmovdqa64 64*11(%r8),%zmm23
331
+ vmovdqa64 64*12(%r8),%zmm24
332
+ vmovdqa64 64*13(%r8),%zmm25
333
+ vmovdqa64 64*14(%r8),%zmm26
334
+ vmovdqa64 64*15(%r8),%zmm17
335
+ vmovdqa64 64*16(%r8),%zmm18
336
+ vmovdqa64 64*17(%r8),%zmm19
337
+ vmovdqa64 64*18(%r8),%zmm20
338
+ vmovdqa64 64*19(%r8),%zmm21
339
+ vmovdqu64 40*0-96(%rdi),%zmm0{%k6}{z}
340
+ # vpxorq %zmm5,%zmm5,%zmm5
341
+ vmovdqu64 40*1-96(%rdi),%zmm1{%k6}{z}
342
+ vmovdqu64 40*2-96(%rdi),%zmm2{%k6}{z}
343
+ vmovdqu64 40*3-96(%rdi),%zmm3{%k6}{z}
344
+ vmovdqu64 40*4-96(%rdi),%zmm4{%k6}{z}
345
+ lea iotas+12*8(%rip), %r10
346
+ mov $12/2, %eax
347
+ call __KeccakF1600
348
+ vmovdqu64 %zmm0,40*0-96(%rdi){%k6}
349
+ vmovdqu64 %zmm1,40*1-96(%rdi){%k6}
350
+ vmovdqu64 %zmm2,40*2-96(%rdi){%k6}
351
+ vmovdqu64 %zmm3,40*3-96(%rdi){%k6}
352
+ vmovdqu64 %zmm4,40*4-96(%rdi){%k6}
353
+ vzeroupper
354
+ ret
355
+ .ifdef macOS
356
+ .else
357
+ .size KeccakP1600_AVX512_Permute_12rounds,.-KeccakP1600_AVX512_Permute_12rounds
358
+ .endif
359
+
360
+ # -----------------------------------------------------------------------------
361
+ #
362
+ # size_t KeccakP1600_AVX512_12rounds_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen);
363
+ # %rdi %rsi %rdx %rcx
364
+ #
365
+ .ifdef macOS
366
+ .globl _KeccakP1600_AVX512_12rounds_FastLoop_Absorb
367
+ _KeccakP1600_AVX512_12rounds_FastLoop_Absorb:
368
+ .else
369
+ .globl KeccakP1600_AVX512_12rounds_FastLoop_Absorb
370
+ .type KeccakP1600_AVX512_12rounds_FastLoop_Absorb,@function
371
+ KeccakP1600_AVX512_12rounds_FastLoop_Absorb:
372
+ .endif
373
+ .balign 32
374
+ push %rbx
375
+ push %r10
376
+ shr $3, %rcx # rcx = data length in lanes
377
+ mov %rdx, %rbx # rbx = initial data pointer
378
+ cmp %rsi, %rcx
379
+ jb KeccakP1600_AVX512_FastLoop_Absorb_Exit
380
+ lea 96(%rdi),%rdi
381
+ lea theta_perm(%rip),%r8
382
+ kxnorw %k6,%k6,%k6
383
+ kshiftrw $15,%k6,%k1
384
+ kshiftrw $11,%k6,%k6
385
+ kshiftlw $1,%k1,%k2
386
+ kshiftlw $2,%k1,%k3
387
+ kshiftlw $3,%k1,%k4
388
+ kshiftlw $4,%k1,%k5
389
+ vmovdqa64 64*1(%r8),%zmm13
390
+ vmovdqa64 64*2(%r8),%zmm14
391
+ vmovdqa64 64*3(%r8),%zmm15
392
+ vmovdqa64 64*4(%r8),%zmm16
393
+ vmovdqa64 64*5(%r8),%zmm27
394
+ vmovdqa64 64*6(%r8),%zmm28
395
+ vmovdqa64 64*7(%r8),%zmm29
396
+ vmovdqa64 64*8(%r8),%zmm30
397
+ vmovdqa64 64*9(%r8),%zmm31
398
+ vmovdqa64 64*10(%r8),%zmm22
399
+ vmovdqa64 64*11(%r8),%zmm23
400
+ vmovdqa64 64*12(%r8),%zmm24
401
+ vmovdqa64 64*13(%r8),%zmm25
402
+ vmovdqa64 64*14(%r8),%zmm26
403
+ vmovdqa64 64*15(%r8),%zmm17
404
+ vmovdqa64 64*16(%r8),%zmm18
405
+ vmovdqa64 64*17(%r8),%zmm19
406
+ vmovdqa64 64*18(%r8),%zmm20
407
+ vmovdqa64 64*19(%r8),%zmm21
408
+ vmovdqu64 40*0-96(%rdi),%zmm0{%k6}{z}
409
+ vmovdqu64 40*1-96(%rdi),%zmm1{%k6}{z}
410
+ vmovdqu64 40*2-96(%rdi),%zmm2{%k6}{z}
411
+ vmovdqu64 40*3-96(%rdi),%zmm3{%k6}{z}
412
+ vmovdqu64 40*4-96(%rdi),%zmm4{%k6}{z}
413
+ cmp $21, %rsi
414
+ jnz KeccakP1600_AVX512_FastLoop_Absorb_Not21Lanes
415
+ sub $21, %rcx
416
+ KeccakP1600_AVX512_FastLoop_Absorb_Loop21Lanes:
417
+ vmovdqu64 8*0(%rdx),%zmm5{%k6}{z}
418
+ vmovdqu64 8*5(%rdx),%zmm6{%k6}{z}
419
+ vmovdqu64 8*10(%rdx),%zmm7{%k6}{z}
420
+ vmovdqu64 8*15(%rdx),%zmm8{%k6}{z}
421
+ vmovdqu64 8*20(%rdx),%zmm9{%k1}{z}
422
+ vpxorq %zmm5,%zmm0,%zmm0
423
+ vpxorq %zmm6,%zmm1,%zmm1
424
+ vpxorq %zmm7,%zmm2,%zmm2
425
+ vpxorq %zmm8,%zmm3,%zmm3
426
+ vpxorq %zmm9,%zmm4,%zmm4
427
+ add $21*8, %rdx
428
+ lea iotas+12*8(%rip), %r10
429
+ mov $12/2, %eax
430
+ call __KeccakF1600
431
+ sub $21, %rcx
432
+ jnc KeccakP1600_AVX512_FastLoop_Absorb_Loop21Lanes
433
+ KeccakP1600_AVX512_FastLoop_Absorb_SaveAndExit:
434
+ vmovdqu64 %zmm0,40*0-96(%rdi){%k6}
435
+ vmovdqu64 %zmm1,40*1-96(%rdi){%k6}
436
+ vmovdqu64 %zmm2,40*2-96(%rdi){%k6}
437
+ vmovdqu64 %zmm3,40*3-96(%rdi){%k6}
438
+ vmovdqu64 %zmm4,40*4-96(%rdi){%k6}
439
+ KeccakP1600_AVX512_FastLoop_Absorb_Exit:
440
+ vzeroupper
441
+ mov %rdx, %rax # return number of bytes processed
442
+ sub %rbx, %rax
443
+ pop %r10
444
+ pop %rbx
445
+ ret
446
+ KeccakP1600_AVX512_FastLoop_Absorb_Not21Lanes:
447
+ cmp $17, %rsi
448
+ jnz KeccakP1600_AVX512_FastLoop_Absorb_Not17Lanes
449
+ sub $17, %rcx
450
+ KeccakP1600_AVX512_FastLoop_Absorb_Loop17Lanes:
451
+ vmovdqu64 8*0(%rdx),%zmm5{%k6}{z}
452
+ vmovdqu64 8*5(%rdx),%zmm6{%k6}{z}
453
+ vmovdqu64 8*10(%rdx),%zmm7{%k6}{z}
454
+ vmovdqu64 8*15(%rdx),%zmm8{%k1}{z}
455
+ vmovdqu64 8*15(%rdx),%zmm8{%k2}
456
+ vpxorq %zmm5,%zmm0,%zmm0
457
+ vpxorq %zmm6,%zmm1,%zmm1
458
+ vpxorq %zmm7,%zmm2,%zmm2
459
+ vpxorq %zmm8,%zmm3,%zmm3
460
+ add $17*8, %rdx
461
+ lea iotas+12*8(%rip), %r10
462
+ mov $12/2, %eax
463
+ call __KeccakF1600
464
+ sub $17, %rcx
465
+ jnc KeccakP1600_AVX512_FastLoop_Absorb_Loop17Lanes
466
+ jmp KeccakP1600_AVX512_FastLoop_Absorb_SaveAndExit
467
+ KeccakP1600_AVX512_FastLoop_Absorb_Not17Lanes:
468
+ lea -96(%rdi), %rdi
469
+ KeccakP1600_AVX512_FastLoop_Absorb_LanesLoop:
470
+ mov %rsi, %rax
471
+ mov %rdi, %r10
472
+ KeccakP1600_AVX512_FastLoop_Absorb_LanesAddLoop:
473
+ mov (%rdx), %r8
474
+ add $8, %rdx
475
+ xor %r8, (%r10)
476
+ add $8, %r10
477
+ sub $1, %rax
478
+ jnz KeccakP1600_AVX512_FastLoop_Absorb_LanesAddLoop
479
+ sub %rsi, %rcx
480
+ push %rdi
481
+ push %rsi
482
+ push %rdx
483
+ push %rcx
484
+ .ifdef macOS
485
+ call _KeccakP1600_AVX512_Permute_12rounds
486
+ .else
487
+ call KeccakP1600_AVX512_Permute_12rounds@PLT
488
+ .endif
489
+ pop %rcx
490
+ pop %rdx
491
+ pop %rsi
492
+ pop %rdi
493
+ cmp %rsi, %rcx
494
+ jae KeccakP1600_AVX512_FastLoop_Absorb_LanesLoop
495
+ jmp KeccakP1600_AVX512_FastLoop_Absorb_Exit
496
+ .ifdef macOS
497
+ .else
498
+ .size KeccakP1600_AVX512_12rounds_FastLoop_Absorb,.-KeccakP1600_AVX512_12rounds_FastLoop_Absorb
499
+ .endif
500
+ .balign 64
501
+ theta_perm:
502
+ .quad 0, 1, 2, 3, 4, 5, 6, 7 # [not used]
503
+ .quad 4, 0, 1, 2, 3, 5, 6, 7
504
+ .quad 3, 4, 0, 1, 2, 5, 6, 7
505
+ .quad 2, 3, 4, 0, 1, 5, 6, 7
506
+ .quad 1, 2, 3, 4, 0, 5, 6, 7
507
+ rhotates1:
508
+ .quad 0, 44, 43, 21, 14, 0, 0, 0 # [0][0] [1][1] [2][2] [3][3] [4][4]
509
+ .quad 18, 1, 6, 25, 8, 0, 0, 0 # [4][0] [0][1] [1][2] [2][3] [3][4]
510
+ .quad 41, 2, 62, 55, 39, 0, 0, 0 # [3][0] [4][1] [0][2] [1][3] [2][4]
511
+ .quad 3, 45, 61, 28, 20, 0, 0, 0 # [2][0] [3][1] [4][2] [0][3] [1][4]
512
+ .quad 36, 10, 15, 56, 27, 0, 0, 0 # [1][0] [2][1] [3][2] [4][3] [0][4]
513
+ rhotates0:
514
+ .quad 0, 1, 62, 28, 27, 0, 0, 0
515
+ .quad 36, 44, 6, 55, 20, 0, 0, 0
516
+ .quad 3, 10, 43, 25, 39, 0, 0, 0
517
+ .quad 41, 45, 15, 21, 8, 0, 0, 0
518
+ .quad 18, 2, 61, 56, 14, 0, 0, 0
519
+ pi0_perm:
520
+ .quad 0, 3, 1, 4, 2, 5, 6, 7
521
+ .quad 1, 4, 2, 0, 3, 5, 6, 7
522
+ .quad 2, 0, 3, 1, 4, 5, 6, 7
523
+ .quad 3, 1, 4, 2, 0, 5, 6, 7
524
+ .quad 4, 2, 0, 3, 1, 5, 6, 7
525
+ iotas:
526
+ .quad 0x0000000000000001
527
+ .quad 0x0000000000008082
528
+ .quad 0x800000000000808a
529
+ .quad 0x8000000080008000
530
+ .quad 0x000000000000808b
531
+ .quad 0x0000000080000001
532
+ .quad 0x8000000080008081
533
+ .quad 0x8000000000008009
534
+ .quad 0x000000000000008a
535
+ .quad 0x0000000000000088
536
+ .quad 0x0000000080008009
537
+ .quad 0x000000008000000a
538
+ .quad 0x000000008000808b
539
+ .quad 0x800000000000008b
540
+ .quad 0x8000000000008089
541
+ .quad 0x8000000000008003
542
+ .quad 0x8000000000008002
543
+ .quad 0x8000000000000080
544
+ .quad 0x000000000000800a
545
+ .quad 0x800000008000000a
546
+ .quad 0x8000000080008081
547
+ .quad 0x8000000000008080
548
+ .quad 0x0000000080000001
549
+ .quad 0x8000000080008008
550
+ iotas_end:
551
+ .asciz "Keccak-1600 for AVX-512F, CRYPTOGAMS by <appro@openssl.org>"
@@ -0,0 +1,74 @@
1
+ /*
2
+ K12 based on the eXtended Keccak Code Package (XKCP)
3
+ https://github.com/XKCP/XKCP
4
+
5
+ The Keccak-p permutations, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche.
6
+
7
+ Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer".
8
+
9
+ For more information, feedback or questions, please refer to the Keccak Team website:
10
+ https://keccak.team/
11
+
12
+ To the extent possible under law, the implementer has waived all copyright
13
+ and related or neighboring rights to the source code in this file.
14
+ http://creativecommons.org/publicdomain/zero/1.0/
15
+
16
+ ---
17
+
18
+ Please refer to the XKCP for more details.
19
+ */
20
+
21
+ #ifndef _KeccakP_1600_SnP_h_
22
+ #define _KeccakP_1600_SnP_h_
23
+
24
+ /* Keccak-p[1600] */
25
+
26
+ #define KeccakP1600_stateSizeInBytes 200
27
+ #define KeccakP1600_stateAlignment 8
28
+ #define KeccakP1600_12rounds_FastLoop_supported
29
+
30
+ const char * KeccakP1600_GetImplementation();
31
+ void KeccakP1600_Initialize(void *state);
32
+ void KeccakP1600_AddByte(void *state, unsigned char data, unsigned int offset);
33
+ void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
34
+ void KeccakP1600_Permute_12rounds(void *state);
35
+ void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length);
36
+ size_t KeccakP1600_12rounds_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen);
37
+
38
+ void KeccakP1600_AVX512_Initialize(void *state);
39
+ void KeccakP1600_AVX512_AddByte(void *state, unsigned char data, unsigned int offset);
40
+ void KeccakP1600_AVX512_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
41
+ void KeccakP1600_AVX512_Permute_12rounds(void *state);
42
+ void KeccakP1600_AVX512_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length);
43
+ size_t KeccakP1600_AVX512_12rounds_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen);
44
+
45
+ void KeccakP1600_AVX2_Initialize(void *state);
46
+ void KeccakP1600_AVX2_AddByte(void *state, unsigned char data, unsigned int offset);
47
+ void KeccakP1600_AVX2_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
48
+ void KeccakP1600_AVX2_Permute_12rounds(void *state);
49
+ void KeccakP1600_AVX2_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length);
50
+ size_t KeccakP1600_AVX2_12rounds_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen);
51
+
52
+ void KeccakP1600_opt64_Initialize(void *state);
53
+ void KeccakP1600_opt64_AddByte(void *state, unsigned char data, unsigned int offset);
54
+ void KeccakP1600_opt64_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
55
+ void KeccakP1600_opt64_Permute_12rounds(void *state);
56
+ void KeccakP1600_opt64_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length);
57
+ size_t KeccakP1600_opt64_12rounds_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen);
58
+
59
+ /* Keccak-p[1600]×2 */
60
+
61
+ int KeccakP1600times2_IsAvailable();
62
+ const char * KeccakP1600times2_GetImplementation();
63
+
64
+ /* Keccak-p[1600]×4 */
65
+
66
+ int KeccakP1600times4_IsAvailable();
67
+ const char * KeccakP1600times4_GetImplementation();
68
+
69
+ /* Keccak-p[1600]×8 */
70
+
71
+ int KeccakP1600times8_IsAvailable();
72
+ const char * KeccakP1600times8_GetImplementation();
73
+
74
+ #endif