sleeping_kangaroo12 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (291) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +34 -67
  3. data/ext/Rakefile +12 -37
  4. data/ext/binding/sleeping_kangaroo12.c +1 -16
  5. data/ext/{xkcp → k12}/Makefile +0 -0
  6. data/ext/k12/Makefile.build +118 -0
  7. data/ext/k12/README.markdown +86 -0
  8. data/ext/k12/lib/ARMv8Asha3/KeccakP-1600-ARMv8Asha3.S +623 -0
  9. data/ext/k12/lib/ARMv8Asha3/KeccakP-1600-SnP.h +65 -0
  10. data/ext/k12/lib/ARMv8Asha3/KeccakP-1600-opt64.c +227 -0
  11. data/ext/{xkcp/lib/low/KeccakP-1600/compact → k12/lib/Inplace32BI}/KeccakP-1600-SnP.h +4 -9
  12. data/ext/{xkcp/lib/low/KeccakP-1600/plain-32bits-inplace → k12/lib/Inplace32BI}/KeccakP-1600-inplace32BI.c +65 -160
  13. data/ext/k12/lib/KangarooTwelve.c +332 -0
  14. data/ext/{xkcp/lib/high/KangarooTwelve → k12/lib}/KangarooTwelve.h +53 -16
  15. data/ext/{xkcp/lib/low/KeccakP-1600/AVX2 → k12/lib/Optimized64}/KeccakP-1600-AVX2.s +122 -558
  16. data/ext/k12/lib/Optimized64/KeccakP-1600-AVX512-plainC.c +241 -0
  17. data/ext/k12/lib/Optimized64/KeccakP-1600-AVX512.s +551 -0
  18. data/ext/k12/lib/Optimized64/KeccakP-1600-SnP.h +74 -0
  19. data/ext/{xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-64.macros → k12/lib/Optimized64/KeccakP-1600-opt64.c} +447 -169
  20. data/ext/k12/lib/Optimized64/KeccakP-1600-runtimeDispatch.c +406 -0
  21. data/ext/k12/lib/Optimized64/KeccakP-1600-timesN-AVX2.c +419 -0
  22. data/ext/k12/lib/Optimized64/KeccakP-1600-timesN-AVX512.c +458 -0
  23. data/ext/k12/lib/Optimized64/KeccakP-1600-timesN-SSSE3.c +438 -0
  24. data/ext/{xkcp/lib/low/KeccakP-1600/plain-64bits → k12/lib/Plain64}/KeccakP-1600-SnP.h +14 -20
  25. data/ext/{xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.h → k12/lib/Plain64/KeccakP-1600-plain64.c} +9 -8
  26. data/ext/{xkcp/lib/common → k12/lib}/align.h +3 -2
  27. data/ext/{xkcp/lib/common → k12/lib}/brg_endian.h +0 -0
  28. data/ext/{xkcp → k12}/support/Build/ExpandProducts.xsl +0 -0
  29. data/ext/{xkcp → k12}/support/Build/ToGlobalMakefile.xsl +0 -0
  30. data/ext/{xkcp → k12}/support/Build/ToOneTarget.xsl +0 -0
  31. data/ext/{xkcp → k12}/support/Build/ToTargetConfigFile.xsl +0 -0
  32. data/ext/{xkcp → k12}/support/Build/ToTargetMakefile.xsl +10 -16
  33. data/ext/{xkcp → k12}/support/Build/ToVCXProj.xsl +0 -0
  34. data/lib/sleeping_kangaroo12/version.rb +1 -1
  35. metadata +33 -276
  36. data/ext/config/xkcp.build +0 -17
  37. data/ext/xkcp/LICENSE +0 -1
  38. data/ext/xkcp/Makefile.build +0 -200
  39. data/ext/xkcp/README.markdown +0 -296
  40. data/ext/xkcp/lib/HighLevel.build +0 -143
  41. data/ext/xkcp/lib/LowLevel.build +0 -757
  42. data/ext/xkcp/lib/high/KangarooTwelve/KangarooTwelve.c +0 -301
  43. data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.c +0 -81
  44. data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.h +0 -125
  45. data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.c +0 -48
  46. data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.h +0 -79
  47. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.c +0 -81
  48. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.h +0 -73
  49. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.inc +0 -195
  50. data/ext/xkcp/lib/high/Keccak/KeccakSponge.c +0 -111
  51. data/ext/xkcp/lib/high/Keccak/KeccakSponge.h +0 -76
  52. data/ext/xkcp/lib/high/Keccak/KeccakSponge.inc +0 -314
  53. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.c +0 -61
  54. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.h +0 -67
  55. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.inc +0 -128
  56. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.c +0 -93
  57. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.h +0 -599
  58. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.inc +0 -573
  59. data/ext/xkcp/lib/high/Ketje/Ketjev2.c +0 -87
  60. data/ext/xkcp/lib/high/Ketje/Ketjev2.h +0 -88
  61. data/ext/xkcp/lib/high/Ketje/Ketjev2.inc +0 -274
  62. data/ext/xkcp/lib/high/Keyak/Keyakv2.c +0 -132
  63. data/ext/xkcp/lib/high/Keyak/Keyakv2.h +0 -217
  64. data/ext/xkcp/lib/high/Keyak/Keyakv2.inc +0 -81
  65. data/ext/xkcp/lib/high/Keyak/Motorist.inc +0 -953
  66. data/ext/xkcp/lib/high/Kravatte/Kravatte.c +0 -533
  67. data/ext/xkcp/lib/high/Kravatte/Kravatte.h +0 -115
  68. data/ext/xkcp/lib/high/Kravatte/KravatteModes.c +0 -557
  69. data/ext/xkcp/lib/high/Kravatte/KravatteModes.h +0 -247
  70. data/ext/xkcp/lib/high/Xoodyak/Cyclist.h +0 -66
  71. data/ext/xkcp/lib/high/Xoodyak/Cyclist.inc +0 -336
  72. data/ext/xkcp/lib/high/Xoodyak/Xoodyak-parameters.h +0 -26
  73. data/ext/xkcp/lib/high/Xoodyak/Xoodyak.c +0 -55
  74. data/ext/xkcp/lib/high/Xoodyak/Xoodyak.h +0 -35
  75. data/ext/xkcp/lib/high/Xoofff/Xoofff.c +0 -634
  76. data/ext/xkcp/lib/high/Xoofff/Xoofff.h +0 -147
  77. data/ext/xkcp/lib/high/Xoofff/XoofffModes.c +0 -483
  78. data/ext/xkcp/lib/high/Xoofff/XoofffModes.h +0 -241
  79. data/ext/xkcp/lib/high/common/Phases.h +0 -25
  80. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-SnP.h +0 -41
  81. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-armcc.s +0 -1666
  82. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-gcc.s +0 -1655
  83. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-armcc.s +0 -1268
  84. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-gcc.s +0 -1264
  85. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-armcc.s +0 -1178
  86. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-gcc.s +0 -1175
  87. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-armcc.s +0 -1338
  88. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-gcc.s +0 -1336
  89. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-armcc.s +0 -1343
  90. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-gcc.s +0 -1339
  91. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-SnP.h +0 -42
  92. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-armcc.s +0 -823
  93. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-gcc.s +0 -831
  94. data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-SnP.h +0 -31
  95. data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-armv8a-neon.s +0 -540
  96. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-SnP.h +0 -42
  97. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-compact.s +0 -733
  98. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-fast.s +0 -1121
  99. data/ext/xkcp/lib/low/KeccakP-1600/AVX2/KeccakP-1600-SnP.h +0 -52
  100. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-AVX512.c +0 -623
  101. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-SnP.h +0 -47
  102. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u12/KeccakP-1600-AVX512-config.h +0 -6
  103. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u6/KeccakP-1600-AVX512-config.h +0 -6
  104. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/ua/KeccakP-1600-AVX512-config.h +0 -6
  105. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-AVX512.s +0 -1031
  106. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-SnP.h +0 -53
  107. data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-SnP.h +0 -44
  108. data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-XOP.c +0 -476
  109. data/ext/xkcp/lib/low/KeccakP-1600/XOP/u6/KeccakP-1600-XOP-config.h +0 -6
  110. data/ext/xkcp/lib/low/KeccakP-1600/XOP/ua/KeccakP-1600-XOP-config.h +0 -6
  111. data/ext/xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-unrolling.macros +0 -305
  112. data/ext/xkcp/lib/low/KeccakP-1600/compact/KeccakP-1600-compact64.c +0 -420
  113. data/ext/xkcp/lib/low/KeccakP-1600/plain-32bits-inplace/KeccakP-1600-SnP.h +0 -43
  114. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/KeccakP-1600-opt64.c +0 -565
  115. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcu6/KeccakP-1600-opt64-config.h +0 -7
  116. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua/KeccakP-1600-opt64-config.h +0 -7
  117. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua-shld/KeccakP-1600-opt64-config.h +0 -8
  118. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/u6/KeccakP-1600-opt64-config.h +0 -6
  119. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/ua/KeccakP-1600-opt64-config.h +0 -6
  120. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-SnP.h +0 -44
  121. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference.h +0 -23
  122. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference32BI.c +0 -625
  123. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-SnP.h +0 -44
  124. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.c +0 -440
  125. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-SnP.h +0 -42
  126. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas.s +0 -1196
  127. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas_Apple.s +0 -1124
  128. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-shld-gas.s +0 -1196
  129. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-armcc.s +0 -1392
  130. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-gcc.s +0 -1394
  131. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-times2-SnP.h +0 -42
  132. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u12/SIMD512-2-config.h +0 -7
  133. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u4/SIMD512-2-config.h +0 -7
  134. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512ufull/SIMD512-2-config.h +0 -7
  135. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SIMD512.c +0 -850
  136. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SnP.h +0 -51
  137. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SIMD128.c +0 -957
  138. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SnP.h +0 -49
  139. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-u2/SIMD128-config.h +0 -8
  140. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-ua/SIMD128-config.h +0 -8
  141. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-u2/SIMD128-config.h +0 -9
  142. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-ua/SIMD128-config.h +0 -9
  143. data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-SnP.h +0 -45
  144. data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-on1.c +0 -37
  145. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SIMD256.c +0 -1321
  146. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SnP.h +0 -55
  147. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u12/SIMD256-config.h +0 -7
  148. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u6/SIMD256-config.h +0 -7
  149. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/ua/SIMD256-config.h +0 -7
  150. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u12/SIMD512-4-config.h +0 -7
  151. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u4/SIMD512-4-config.h +0 -7
  152. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512ufull/SIMD512-4-config.h +0 -7
  153. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SIMD512.c +0 -881
  154. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SnP.h +0 -51
  155. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-SnP.h +0 -45
  156. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-on1.c +0 -37
  157. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-SnP.h +0 -45
  158. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-on2.c +0 -38
  159. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SIMD512.c +0 -1615
  160. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SnP.h +0 -57
  161. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u12/SIMD512-config.h +0 -7
  162. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u4/SIMD512-config.h +0 -7
  163. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/ua/SIMD512-config.h +0 -7
  164. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-SnP.h +0 -45
  165. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-on1.c +0 -37
  166. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-SnP.h +0 -45
  167. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-on2.c +0 -38
  168. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-SnP.h +0 -45
  169. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-on4.c +0 -38
  170. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-SnP.h +0 -41
  171. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-armcc.s +0 -442
  172. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-gcc.s +0 -446
  173. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-armcc.s +0 -419
  174. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-gcc.s +0 -427
  175. data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-SnP.h +0 -41
  176. data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-avr8-fast.s +0 -647
  177. data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-SnP.h +0 -39
  178. data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-compact.c +0 -190
  179. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-SnP.h +0 -43
  180. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.c +0 -412
  181. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.h +0 -23
  182. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-SnP.h +0 -41
  183. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-armcc.s +0 -454
  184. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-gcc.s +0 -458
  185. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-armcc.s +0 -455
  186. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-gcc.s +0 -458
  187. data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-SnP.h +0 -41
  188. data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-avr8-fast.s +0 -728
  189. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-SnP.h +0 -43
  190. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.c +0 -414
  191. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.h +0 -23
  192. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-SnP.h +0 -42
  193. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-armcc.s +0 -527
  194. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-gcc.s +0 -533
  195. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-armcc.s +0 -528
  196. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-gcc.s +0 -534
  197. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-armcc.s +0 -521
  198. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-gcc.s +0 -527
  199. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-armcc.s +0 -517
  200. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-gcc.s +0 -523
  201. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-armcc.s +0 -550
  202. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-gcc.s +0 -556
  203. data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-SnP.h +0 -32
  204. data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-armv8a-neon.s +0 -432
  205. data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-SnP.h +0 -42
  206. data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-avr8-fast.s +0 -929
  207. data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-SnP.h +0 -40
  208. data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-compact.c +0 -244
  209. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-SnP.h +0 -46
  210. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32-bis.macros +0 -184
  211. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.c +0 -454
  212. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.macros +0 -459
  213. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling-bis.macros +0 -83
  214. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling.macros +0 -88
  215. data/ext/xkcp/lib/low/KeccakP-800/plain/lcu2/KeccakP-800-opt32-config.h +0 -7
  216. data/ext/xkcp/lib/low/KeccakP-800/plain/lcua/KeccakP-800-opt32-config.h +0 -7
  217. data/ext/xkcp/lib/low/KeccakP-800/plain/u2/KeccakP-800-opt32-config.h +0 -7
  218. data/ext/xkcp/lib/low/KeccakP-800/plain/ua/KeccakP-800-opt32-config.h +0 -7
  219. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-SnP.h +0 -44
  220. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.c +0 -437
  221. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.h +0 -23
  222. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/Ket.h +0 -57
  223. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-armcc.s +0 -475
  224. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-gcc.s +0 -480
  225. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-armcc.s +0 -590
  226. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-gcc.s +0 -590
  227. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.c +0 -126
  228. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.h +0 -68
  229. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.inc +0 -174
  230. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.c +0 -80
  231. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.h +0 -68
  232. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.inc +0 -142
  233. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-SnP.h +0 -55
  234. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-armcc.s +0 -1086
  235. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-gcc.s +0 -1092
  236. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-armcc.s +0 -721
  237. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-gcc.s +0 -726
  238. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-armcc.s +0 -723
  239. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-gcc.s +0 -729
  240. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-armcc.s +0 -1164
  241. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-gcc.s +0 -1165
  242. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-armcc.s +0 -562
  243. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-gcc.s +0 -563
  244. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-armcc.s +0 -563
  245. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-gcc.s +0 -565
  246. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-SnP.h +0 -55
  247. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-armcc.s +0 -476
  248. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-gcc.s +0 -485
  249. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-armcc.s +0 -362
  250. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-gcc.s +0 -367
  251. data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-SnP.h +0 -43
  252. data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-avr8-u1.s +0 -1341
  253. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SIMD512.c +0 -581
  254. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SnP.h +0 -58
  255. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodyak-full-block-SIMD512.c +0 -332
  256. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SIMD128.c +0 -329
  257. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SnP.h +0 -53
  258. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodyak-full-block-SIMD128.c +0 -355
  259. data/ext/xkcp/lib/low/Xoodoo/Xoodoo.h +0 -79
  260. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-SnP.h +0 -56
  261. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-optimized.c +0 -399
  262. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodyak-full-blocks.c +0 -127
  263. data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-SnP.h +0 -43
  264. data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-reference.c +0 -253
  265. data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SIMD512.c +0 -1044
  266. data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SnP.h +0 -49
  267. data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-SnP.h +0 -45
  268. data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-on1.c +0 -37
  269. data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-ARMv7A.s +0 -1587
  270. data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-SnP.h +0 -48
  271. data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SIMD512.c +0 -1202
  272. data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SnP.h +0 -48
  273. data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SIMD128.c +0 -484
  274. data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SnP.h +0 -44
  275. data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-SnP.h +0 -45
  276. data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-on1.c +0 -37
  277. data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SIMD256.c +0 -939
  278. data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SnP.h +0 -49
  279. data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SIMD512.c +0 -1216
  280. data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SnP.h +0 -48
  281. data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-SnP.h +0 -45
  282. data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-on1.c +0 -37
  283. data/ext/xkcp/lib/low/common/PlSnP-Fallback.inc +0 -290
  284. data/ext/xkcp/lib/low/common/SnP-Relaned.h +0 -141
  285. data/ext/xkcp/support/Kernel-PMU/Kernel-pmu.md +0 -133
  286. data/ext/xkcp/support/Kernel-PMU/Makefile +0 -8
  287. data/ext/xkcp/support/Kernel-PMU/enable_arm_pmu.c +0 -129
  288. data/ext/xkcp/support/Kernel-PMU/load-module +0 -1
  289. data/ext/xkcp/util/KeccakSum/KeccakSum.c +0 -394
  290. data/ext/xkcp/util/KeccakSum/base64.c +0 -86
  291. data/ext/xkcp/util/KeccakSum/base64.h +0 -12
@@ -1,1031 +0,0 @@
1
- # The eXtended Keccak Code Package (XKCP)
2
- # https://github.com/XKCP/XKCP
3
- #
4
- # Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org>
5
- # Copyright (c) 2018 Ronny Van Keer
6
- # All rights reserved.
7
- #
8
- # The source code in this file is licensed under the CRYPTOGAMS license.
9
- # For further details see http://www.openssl.org/~appro/cryptogams/.
10
- #
11
- # Notes:
12
- # The code for the permutation (__KeccakF1600) was generated with
13
- # Andy Polyakov's keccak1600-avx512.pl from the CRYPTOGAMS project
14
- # (https://github.com/dot-asm/cryptogams/blob/master/x86_64/keccak1600-avx512.pl).
15
- # The rest of the code was written by Ronny Van Keer.
16
- # Adaptations for macOS by Stéphane Léon.
17
- # Adaptations for mingw-w64 (changes macOS too) by Jorrit Jongma.
18
-
19
- .text
20
-
21
- # -----------------------------------------------------------------------------
22
- #
23
- # void KeccakP1600_Initialize(void *state);
24
- #
25
- .globl KeccakP1600_Initialize
26
- .globl _KeccakP1600_Initialize
27
- .ifndef old_gas_syntax
28
- .type KeccakP1600_Initialize,@function
29
- .endif
30
- KeccakP1600_Initialize:
31
- _KeccakP1600_Initialize:
32
- .balign 32
33
- vpxorq %zmm0,%zmm0,%zmm0
34
- vmovdqu64 %zmm0,0*64(%rdi)
35
- vmovdqu64 %zmm0,1*64(%rdi)
36
- vmovdqu64 %zmm0,2*64(%rdi)
37
- movq $0,3*64(%rdi)
38
- ret
39
- .ifndef old_gas_syntax
40
- .size KeccakP1600_Initialize,.-KeccakP1600_Initialize
41
- .endif
42
-
43
- # -----------------------------------------------------------------------------
44
- #
45
- # void KeccakP1600_AddByte(void *state, unsigned char data, unsigned int offset);
46
- # %rdi %rsi %rdx
47
- #!!
48
- #.globl KeccakP1600_AddByte
49
- #.type KeccakP1600_AddByte,@function
50
- #.align 32
51
- #KeccakP1600_AddByte:
52
- # mov %rdx, %rax
53
- # and $7, %rax
54
- # and $0xFFFFFFF8, %edx
55
- # mov mapState(%rdx), %rdx
56
- # add %rdx, %rdi
57
- # add %rax, %rdi
58
- # xorb %sil, (%rdi)
59
- # ret
60
- #.size KeccakP1600_AddByte,.-KeccakP1600_AddByte
61
-
62
- # -----------------------------------------------------------------------------
63
- #
64
- # void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
65
- # %rdi %rsi %rdx %rcx
66
- #
67
- .globl KeccakP1600_AddBytes
68
- .globl _KeccakP1600_AddBytes
69
- .ifndef old_gas_syntax
70
- .type KeccakP1600_AddBytes,@function
71
- .endif
72
- KeccakP1600_AddBytes:
73
- _KeccakP1600_AddBytes:
74
- .balign 32
75
- cmp $0, %rcx
76
- jz KeccakP1600_AddBytes_Exit
77
- add %rdx, %rdi # state += offset
78
- and $7, %rdx
79
- jz KeccakP1600_AddBytes_LaneAlignedCheck
80
- mov $8, %r9 # r9 is (max) length of incomplete lane
81
- sub %rdx, %r9
82
- cmp %rcx, %r9
83
- cmovae %rcx, %r9
84
- sub %r9, %rcx # length -= length of incomplete lane
85
- KeccakP1600_AddBytes_NotAlignedLoop:
86
- mov (%rsi), %r8b
87
- inc %rsi
88
- xorb %r8b, (%rdi)
89
- inc %rdi
90
- dec %r9
91
- jnz KeccakP1600_AddBytes_NotAlignedLoop
92
- jmp KeccakP1600_AddBytes_LaneAlignedCheck
93
- KeccakP1600_AddBytes_LaneAlignedLoop:
94
- mov (%rsi), %r8
95
- add $8, %rsi
96
- xor %r8, (%rdi)
97
- add $8, %rdi
98
- KeccakP1600_AddBytes_LaneAlignedCheck:
99
- sub $8, %rcx
100
- jnc KeccakP1600_AddBytes_LaneAlignedLoop
101
- KeccakP1600_AddBytes_LastIncompleteLane:
102
- add $8, %rcx
103
- jz KeccakP1600_AddBytes_Exit
104
- KeccakP1600_AddBytes_LastIncompleteLaneLoop:
105
- mov (%rsi), %r8b
106
- inc %rsi
107
- xor %r8b, (%rdi)
108
- inc %rdi
109
- dec %rcx
110
- jnz KeccakP1600_AddBytes_LastIncompleteLaneLoop
111
- KeccakP1600_AddBytes_Exit:
112
- ret
113
- .ifndef old_gas_syntax
114
- .size KeccakP1600_AddBytes,.-KeccakP1600_AddBytes
115
- .endif
116
-
117
- # -----------------------------------------------------------------------------
118
- #
119
- # void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
120
- # %rdi %rsi %rdx %rcx
121
- #
122
- .globl KeccakP1600_OverwriteBytes
123
- .globl _KeccakP1600_OverwriteBytes
124
- .ifndef old_gas_syntax
125
- .type KeccakP1600_OverwriteBytes,@function
126
- .endif
127
- KeccakP1600_OverwriteBytes:
128
- _KeccakP1600_OverwriteBytes:
129
- .balign 32
130
- cmp $0, %rcx
131
- jz KeccakP1600_OverwriteBytes_Exit
132
- add %rdx, %rdi # state += offset
133
- and $7, %rdx
134
- jz KeccakP1600_OverwriteBytes_LaneAlignedCheck
135
- mov $8, %r9 # r9 is (max) length of incomplete lane
136
- sub %rdx, %r9
137
- cmp %rcx, %r9
138
- cmovae %rcx, %r9
139
- sub %r9, %rcx # length -= length of incomplete lane
140
- KeccakP1600_OverwriteBytes_NotAlignedLoop:
141
- mov (%rsi), %r8b
142
- inc %rsi
143
- mov %r8b, (%rdi)
144
- inc %rdi
145
- dec %r9
146
- jnz KeccakP1600_OverwriteBytes_NotAlignedLoop
147
- jmp KeccakP1600_OverwriteBytes_LaneAlignedCheck
148
- KeccakP1600_OverwriteBytes_LaneAlignedLoop:
149
- mov (%rsi), %r8
150
- add $8, %rsi
151
- mov %r8, (%rdi)
152
- add $8, %rdi
153
- KeccakP1600_OverwriteBytes_LaneAlignedCheck:
154
- sub $8, %rcx
155
- jnc KeccakP1600_OverwriteBytes_LaneAlignedLoop
156
- KeccakP1600_OverwriteBytes_LastIncompleteLane:
157
- add $8, %rcx
158
- jz KeccakP1600_OverwriteBytes_Exit
159
- KeccakP1600_OverwriteBytes_LastIncompleteLaneLoop:
160
- mov (%rsi), %r8b
161
- inc %rsi
162
- mov %r8b, (%rdi)
163
- inc %rdi
164
- dec %rcx
165
- jnz KeccakP1600_OverwriteBytes_LastIncompleteLaneLoop
166
- KeccakP1600_OverwriteBytes_Exit:
167
- ret
168
- .ifndef old_gas_syntax
169
- .size KeccakP1600_OverwriteBytes,.-KeccakP1600_OverwriteBytes
170
- .endif
171
-
172
- # -----------------------------------------------------------------------------
173
- #
174
- # void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount);
175
- # %rdi %rsi
176
- #
177
- .globl KeccakP1600_OverwriteWithZeroes
178
- .globl _KeccakP1600_OverwriteWithZeroes
179
- .ifndef old_gas_syntax
180
- .type KeccakP1600_OverwriteWithZeroes,@function
181
- .endif
182
- KeccakP1600_OverwriteWithZeroes:
183
- _KeccakP1600_OverwriteWithZeroes:
184
- .balign 32
185
- cmp $0, %rsi
186
- jz KeccakP1600_OverwriteWithZeroes_Exit
187
- jmp KeccakP1600_OverwriteWithZeroes_LaneAlignedCheck
188
- KeccakP1600_OverwriteWithZeroes_LaneAlignedLoop:
189
- movq $0, (%rdi)
190
- add $8, %rdi
191
- KeccakP1600_OverwriteWithZeroes_LaneAlignedCheck:
192
- sub $8, %rsi
193
- jnc KeccakP1600_OverwriteWithZeroes_LaneAlignedLoop
194
- KeccakP1600_OverwriteWithZeroes_LastIncompleteLane:
195
- add $8, %rsi
196
- jz KeccakP1600_OverwriteWithZeroes_Exit
197
- KeccakP1600_OverwriteWithZeroes_LastIncompleteLaneLoop:
198
- movb $0, (%rdi)
199
- inc %rdi
200
- dec %rsi
201
- jnz KeccakP1600_OverwriteWithZeroes_LastIncompleteLaneLoop
202
- KeccakP1600_OverwriteWithZeroes_Exit:
203
- ret
204
- .ifndef old_gas_syntax
205
- .size KeccakP1600_OverwriteWithZeroes,.-KeccakP1600_OverwriteWithZeroes
206
- .endif
207
-
208
- # -----------------------------------------------------------------------------
209
- #
210
- # void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length);
211
- # %rdi %rsi %rdx %rcx
212
- #
213
- .globl KeccakP1600_ExtractBytes
214
- .globl _KeccakP1600_ExtractBytes
215
- .ifndef old_gas_syntax
216
- .type KeccakP1600_ExtractBytes,@function
217
- .endif
218
- KeccakP1600_ExtractBytes:
219
- _KeccakP1600_ExtractBytes:
220
- .balign 32
221
- cmp $0, %rcx
222
- jz KeccakP1600_ExtractBytes_Exit
223
- add %rdx, %rdi # state += offset
224
- and $7, %rdx
225
- jz KeccakP1600_ExtractBytes_LaneAlignedCheck
226
- mov $8, %rax # rax is (max) length of incomplete lane
227
- sub %rdx, %rax
228
- cmp %rcx, %rax
229
- cmovae %rcx, %rax
230
- sub %rax, %rcx # length -= length of incomplete lane
231
- KeccakP1600_ExtractBytes_NotAlignedLoop:
232
- mov (%rdi), %r8b
233
- inc %rdi
234
- mov %r8b, (%rsi)
235
- inc %rsi
236
- dec %rax
237
- jnz KeccakP1600_ExtractBytes_NotAlignedLoop
238
- jmp KeccakP1600_ExtractBytes_LaneAlignedCheck
239
- KeccakP1600_ExtractBytes_LaneAlignedLoop:
240
- mov (%rdi), %r8
241
- add $8, %rdi
242
- mov %r8, (%rsi)
243
- add $8, %rsi
244
- KeccakP1600_ExtractBytes_LaneAlignedCheck:
245
- sub $8, %rcx
246
- jnc KeccakP1600_ExtractBytes_LaneAlignedLoop
247
- KeccakP1600_ExtractBytes_LastIncompleteLane:
248
- add $8, %rcx
249
- jz KeccakP1600_ExtractBytes_Exit
250
- mov (%rdi), %r8
251
- KeccakP1600_ExtractBytes_LastIncompleteLaneLoop:
252
- mov %r8b, (%rsi)
253
- shr $8, %r8
254
- inc %rsi
255
- dec %rcx
256
- jnz KeccakP1600_ExtractBytes_LastIncompleteLaneLoop
257
- KeccakP1600_ExtractBytes_Exit:
258
- ret
259
- .ifndef old_gas_syntax
260
- .size KeccakP1600_ExtractBytes,.-KeccakP1600_ExtractBytes
261
- .endif
262
-
263
- # -----------------------------------------------------------------------------
264
- #
265
- # void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length);
266
- # %rdi %rsi %rdx %rcx %r8
267
- #
268
- .globl KeccakP1600_ExtractAndAddBytes
269
- .globl _KeccakP1600_ExtractAndAddBytes
270
- .ifndef old_gas_syntax
271
- .type KeccakP1600_ExtractAndAddBytes,@function
272
- .endif
273
- KeccakP1600_ExtractAndAddBytes:
274
- _KeccakP1600_ExtractAndAddBytes:
275
- .balign 32
276
- push %rbx
277
- cmp $0, %r8
278
- jz KeccakP1600_ExtractAndAddBytes_Exit
279
- add %rcx, %rdi # state += offset
280
- and $7, %rcx
281
- jz KeccakP1600_ExtractAndAddBytes_LaneAlignedCheck
282
- mov $8, %rbx # rbx is (max) length of incomplete lane
283
- sub %rcx, %rbx
284
- cmp %r8, %rbx
285
- cmovae %r8, %rbx
286
- sub %rbx, %r8 # length -= length of incomplete lane
287
- KeccakP1600_ExtractAndAddBytes_NotAlignedLoop:
288
- mov (%rdi), %r9b
289
- inc %rdi
290
- xor (%rsi), %r9b
291
- inc %rsi
292
- mov %r9b, (%rdx)
293
- inc %rdx
294
- dec %rbx
295
- jnz KeccakP1600_ExtractAndAddBytes_NotAlignedLoop
296
- jmp KeccakP1600_ExtractAndAddBytes_LaneAlignedCheck
297
- KeccakP1600_ExtractAndAddBytes_LaneAlignedLoop:
298
- mov (%rdi), %r9
299
- add $8, %rdi
300
- xor (%rsi), %r9
301
- add $8, %rsi
302
- mov %r9, (%rdx)
303
- add $8, %rdx
304
- KeccakP1600_ExtractAndAddBytes_LaneAlignedCheck:
305
- sub $8, %r8
306
- jnc KeccakP1600_ExtractAndAddBytes_LaneAlignedLoop
307
- KeccakP1600_ExtractAndAddBytes_LastIncompleteLane:
308
- add $8, %r8
309
- jz KeccakP1600_ExtractAndAddBytes_Exit
310
- mov (%rdi), %r9
311
- KeccakP1600_ExtractAndAddBytes_LastIncompleteLaneLoop:
312
- xor (%rsi), %r9b
313
- inc %rsi
314
- mov %r9b, (%rdx)
315
- inc %rdx
316
- shr $8, %r9
317
- dec %r8
318
- jnz KeccakP1600_ExtractAndAddBytes_LastIncompleteLaneLoop
319
- KeccakP1600_ExtractAndAddBytes_Exit:
320
- pop %rbx
321
- ret
322
- .ifndef old_gas_syntax
323
- .size KeccakP1600_ExtractAndAddBytes,.-KeccakP1600_ExtractAndAddBytes
324
- .endif
325
-
326
- # -----------------------------------------------------------------------------
327
- #
328
- # internal
329
- #
330
- .text
331
- .ifndef old_gas_syntax
332
- .type __KeccakF1600,@function
333
- .endif
334
- .balign 32
335
- __KeccakF1600:
336
- .Loop_avx512:
337
- ######################################### Theta, even round
338
- vmovdqa64 %zmm0,%zmm5 # put aside original A00
339
- vpternlogq $0x96,%zmm2,%zmm1,%zmm0 # and use it as "C00"
340
- vpternlogq $0x96,%zmm4,%zmm3,%zmm0
341
- vprolq $1,%zmm0,%zmm6
342
- vpermq %zmm0,%zmm13,%zmm0
343
- vpermq %zmm6,%zmm16,%zmm6
344
- vpternlogq $0x96,%zmm0,%zmm6,%zmm5 # T[0] is original A00
345
- vpternlogq $0x96,%zmm0,%zmm6,%zmm1
346
- vpternlogq $0x96,%zmm0,%zmm6,%zmm2
347
- vpternlogq $0x96,%zmm0,%zmm6,%zmm3
348
- vpternlogq $0x96,%zmm0,%zmm6,%zmm4
349
- ######################################### Rho
350
- vprolvq %zmm22,%zmm5,%zmm0 # T[0] is original A00
351
- vprolvq %zmm23,%zmm1,%zmm1
352
- vprolvq %zmm24,%zmm2,%zmm2
353
- vprolvq %zmm25,%zmm3,%zmm3
354
- vprolvq %zmm26,%zmm4,%zmm4
355
- ######################################### Pi
356
- vpermq %zmm0,%zmm17,%zmm0
357
- vpermq %zmm1,%zmm18,%zmm1
358
- vpermq %zmm2,%zmm19,%zmm2
359
- vpermq %zmm3,%zmm20,%zmm3
360
- vpermq %zmm4,%zmm21,%zmm4
361
- ######################################### Chi
362
- vmovdqa64 %zmm0,%zmm5
363
- vmovdqa64 %zmm1,%zmm6
364
- vpternlogq $0xD2,%zmm2,%zmm1,%zmm0
365
- vpternlogq $0xD2,%zmm3,%zmm2,%zmm1
366
- vpternlogq $0xD2,%zmm4,%zmm3,%zmm2
367
- vpternlogq $0xD2,%zmm5,%zmm4,%zmm3
368
- vpternlogq $0xD2,%zmm6,%zmm5,%zmm4
369
- ######################################### Iota
370
- vpxorq (%r10),%zmm0,%zmm0{%k1}
371
- lea 16(%r10),%r10
372
- ######################################### Harmonize rounds
373
- vpblendmq %zmm2,%zmm1,%zmm6{%k2}
374
- vpblendmq %zmm3,%zmm2,%zmm7{%k2}
375
- vpblendmq %zmm4,%zmm3,%zmm8{%k2}
376
- vpblendmq %zmm1,%zmm0,%zmm5{%k2}
377
- vpblendmq %zmm0,%zmm4,%zmm9{%k2}
378
- vpblendmq %zmm3,%zmm6,%zmm6{%k3}
379
- vpblendmq %zmm4,%zmm7,%zmm7{%k3}
380
- vpblendmq %zmm2,%zmm5,%zmm5{%k3}
381
- vpblendmq %zmm0,%zmm8,%zmm8{%k3}
382
- vpblendmq %zmm1,%zmm9,%zmm9{%k3}
383
- vpblendmq %zmm4,%zmm6,%zmm6{%k4}
384
- vpblendmq %zmm3,%zmm5,%zmm5{%k4}
385
- vpblendmq %zmm0,%zmm7,%zmm7{%k4}
386
- vpblendmq %zmm1,%zmm8,%zmm8{%k4}
387
- vpblendmq %zmm2,%zmm9,%zmm9{%k4}
388
- vpblendmq %zmm4,%zmm5,%zmm5{%k5}
389
- vpblendmq %zmm0,%zmm6,%zmm6{%k5}
390
- vpblendmq %zmm1,%zmm7,%zmm7{%k5}
391
- vpblendmq %zmm2,%zmm8,%zmm8{%k5}
392
- vpblendmq %zmm3,%zmm9,%zmm9{%k5}
393
- #vpermq %zmm5,%zmm33,%zmm0 # doesn't actually change order
394
- vpermq %zmm6,%zmm13,%zmm1
395
- vpermq %zmm7,%zmm14,%zmm2
396
- vpermq %zmm8,%zmm15,%zmm3
397
- vpermq %zmm9,%zmm16,%zmm4
398
- ######################################### Theta, odd round
399
- vmovdqa64 %zmm5,%zmm0 # real A00
400
- vpternlogq $0x96,%zmm2,%zmm1,%zmm5 # C00 is %zmm5's alias
401
- vpternlogq $0x96,%zmm4,%zmm3,%zmm5
402
- vprolq $1,%zmm5,%zmm6
403
- vpermq %zmm5,%zmm13,%zmm5
404
- vpermq %zmm6,%zmm16,%zmm6
405
- vpternlogq $0x96,%zmm5,%zmm6,%zmm0
406
- vpternlogq $0x96,%zmm5,%zmm6,%zmm3
407
- vpternlogq $0x96,%zmm5,%zmm6,%zmm1
408
- vpternlogq $0x96,%zmm5,%zmm6,%zmm4
409
- vpternlogq $0x96,%zmm5,%zmm6,%zmm2
410
- ######################################### Rho
411
- vprolvq %zmm27,%zmm0,%zmm0
412
- vprolvq %zmm30,%zmm3,%zmm6
413
- vprolvq %zmm28,%zmm1,%zmm7
414
- vprolvq %zmm31,%zmm4,%zmm8
415
- vprolvq %zmm29,%zmm2,%zmm9
416
- vpermq %zmm0,%zmm16,%zmm10
417
- vpermq %zmm0,%zmm15,%zmm11
418
- ######################################### Iota
419
- vpxorq -8(%r10),%zmm0,%zmm0{%k1}
420
- ######################################### Pi
421
- vpermq %zmm6,%zmm14,%zmm1
422
- vpermq %zmm7,%zmm16,%zmm2
423
- vpermq %zmm8,%zmm13,%zmm3
424
- vpermq %zmm9,%zmm15,%zmm4
425
- ######################################### Chi
426
- vpternlogq $0xD2,%zmm11,%zmm10,%zmm0
427
- vpermq %zmm6,%zmm13,%zmm12
428
- #vpermq %zmm6,%zmm33,%zmm6
429
- vpternlogq $0xD2,%zmm6,%zmm12,%zmm1
430
- vpermq %zmm7,%zmm15,%zmm5
431
- vpermq %zmm7,%zmm14,%zmm7
432
- vpternlogq $0xD2,%zmm7,%zmm5,%zmm2
433
- #vpermq %zmm8,%zmm33,%zmm8
434
- vpermq %zmm8,%zmm16,%zmm6
435
- vpternlogq $0xD2,%zmm6,%zmm8,%zmm3
436
- vpermq %zmm9,%zmm14,%zmm5
437
- vpermq %zmm9,%zmm13,%zmm9
438
- vpternlogq $0xD2,%zmm9,%zmm5,%zmm4
439
- dec %eax
440
- jnz .Loop_avx512
441
- ret
442
- .ifndef old_gas_syntax
443
- .size __KeccakF1600,.-__KeccakF1600
444
- .endif
445
-
446
- # -----------------------------------------------------------------------------
447
- #
448
- # void KeccakP1600_Permute_24rounds(void *state);
449
- # %rdi
450
- #
451
- .globl KeccakP1600_Permute_24rounds
452
- .globl _KeccakP1600_Permute_24rounds
453
- .ifndef old_gas_syntax
454
- .type KeccakP1600_Permute_24rounds,@function
455
- .endif
456
- KeccakP1600_Permute_24rounds:
457
- _KeccakP1600_Permute_24rounds:
458
- .balign 32
459
- lea 96(%rdi),%rdi
460
- lea theta_perm(%rip),%r8
461
- kxnorw %k6,%k6,%k6
462
- kshiftrw $15,%k6,%k1
463
- kshiftrw $11,%k6,%k6
464
- kshiftlw $1,%k1,%k2
465
- kshiftlw $2,%k1,%k3
466
- kshiftlw $3,%k1,%k4
467
- kshiftlw $4,%k1,%k5
468
- #vmovdqa64 64*0(%r8),%zmm33
469
- vmovdqa64 64*1(%r8),%zmm13
470
- vmovdqa64 64*2(%r8),%zmm14
471
- vmovdqa64 64*3(%r8),%zmm15
472
- vmovdqa64 64*4(%r8),%zmm16
473
- vmovdqa64 64*5(%r8),%zmm27
474
- vmovdqa64 64*6(%r8),%zmm28
475
- vmovdqa64 64*7(%r8),%zmm29
476
- vmovdqa64 64*8(%r8),%zmm30
477
- vmovdqa64 64*9(%r8),%zmm31
478
- vmovdqa64 64*10(%r8),%zmm22
479
- vmovdqa64 64*11(%r8),%zmm23
480
- vmovdqa64 64*12(%r8),%zmm24
481
- vmovdqa64 64*13(%r8),%zmm25
482
- vmovdqa64 64*14(%r8),%zmm26
483
- vmovdqa64 64*15(%r8),%zmm17
484
- vmovdqa64 64*16(%r8),%zmm18
485
- vmovdqa64 64*17(%r8),%zmm19
486
- vmovdqa64 64*18(%r8),%zmm20
487
- vmovdqa64 64*19(%r8),%zmm21
488
- vmovdqu64 40*0-96(%rdi),%zmm0{%k6}{z}
489
- # vpxorq %zmm5,%zmm5,%zmm5
490
- vmovdqu64 40*1-96(%rdi),%zmm1{%k6}{z}
491
- vmovdqu64 40*2-96(%rdi),%zmm2{%k6}{z}
492
- vmovdqu64 40*3-96(%rdi),%zmm3{%k6}{z}
493
- vmovdqu64 40*4-96(%rdi),%zmm4{%k6}{z}
494
- lea iotas(%rip), %r10
495
- mov $24/2, %eax
496
- call __KeccakF1600
497
- vmovdqu64 %zmm0,40*0-96(%rdi){%k6}
498
- vmovdqu64 %zmm1,40*1-96(%rdi){%k6}
499
- vmovdqu64 %zmm2,40*2-96(%rdi){%k6}
500
- vmovdqu64 %zmm3,40*3-96(%rdi){%k6}
501
- vmovdqu64 %zmm4,40*4-96(%rdi){%k6}
502
- vzeroupper
503
- ret
504
- .ifndef old_gas_syntax
505
- .size KeccakP1600_Permute_24rounds,.-KeccakP1600_Permute_24rounds
506
- .endif
507
-
508
- # -----------------------------------------------------------------------------
509
- #
510
- # void KeccakP1600_Permute_12rounds(void *state);
511
- # %rdi
512
- #
513
- .globl KeccakP1600_Permute_12rounds
514
- .globl _KeccakP1600_Permute_12rounds
515
- .ifndef old_gas_syntax
516
- .type KeccakP1600_Permute_12rounds,@function
517
- .endif
518
- KeccakP1600_Permute_12rounds:
519
- _KeccakP1600_Permute_12rounds:
520
- .balign 32
521
- lea 96(%rdi),%rdi
522
- lea theta_perm(%rip),%r8
523
- kxnorw %k6,%k6,%k6
524
- kshiftrw $15,%k6,%k1
525
- kshiftrw $11,%k6,%k6
526
- kshiftlw $1,%k1,%k2
527
- kshiftlw $2,%k1,%k3
528
- kshiftlw $3,%k1,%k4
529
- kshiftlw $4,%k1,%k5
530
- #vmovdqa64 64*0(%r8),%zmm33
531
- vmovdqa64 64*1(%r8),%zmm13
532
- vmovdqa64 64*2(%r8),%zmm14
533
- vmovdqa64 64*3(%r8),%zmm15
534
- vmovdqa64 64*4(%r8),%zmm16
535
- vmovdqa64 64*5(%r8),%zmm27
536
- vmovdqa64 64*6(%r8),%zmm28
537
- vmovdqa64 64*7(%r8),%zmm29
538
- vmovdqa64 64*8(%r8),%zmm30
539
- vmovdqa64 64*9(%r8),%zmm31
540
- vmovdqa64 64*10(%r8),%zmm22
541
- vmovdqa64 64*11(%r8),%zmm23
542
- vmovdqa64 64*12(%r8),%zmm24
543
- vmovdqa64 64*13(%r8),%zmm25
544
- vmovdqa64 64*14(%r8),%zmm26
545
- vmovdqa64 64*15(%r8),%zmm17
546
- vmovdqa64 64*16(%r8),%zmm18
547
- vmovdqa64 64*17(%r8),%zmm19
548
- vmovdqa64 64*18(%r8),%zmm20
549
- vmovdqa64 64*19(%r8),%zmm21
550
- vmovdqu64 40*0-96(%rdi),%zmm0{%k6}{z}
551
- # vpxorq %zmm5,%zmm5,%zmm5
552
- vmovdqu64 40*1-96(%rdi),%zmm1{%k6}{z}
553
- vmovdqu64 40*2-96(%rdi),%zmm2{%k6}{z}
554
- vmovdqu64 40*3-96(%rdi),%zmm3{%k6}{z}
555
- vmovdqu64 40*4-96(%rdi),%zmm4{%k6}{z}
556
- lea iotas+12*8(%rip), %r10
557
- mov $12/2, %eax
558
- call __KeccakF1600
559
- vmovdqu64 %zmm0,40*0-96(%rdi){%k6}
560
- vmovdqu64 %zmm1,40*1-96(%rdi){%k6}
561
- vmovdqu64 %zmm2,40*2-96(%rdi){%k6}
562
- vmovdqu64 %zmm3,40*3-96(%rdi){%k6}
563
- vmovdqu64 %zmm4,40*4-96(%rdi){%k6}
564
- vzeroupper
565
- ret
566
- .ifndef old_gas_syntax
567
- .size KeccakP1600_Permute_12rounds,.-KeccakP1600_Permute_12rounds
568
- .endif
569
-
570
- # -----------------------------------------------------------------------------
571
- #
572
- # void KeccakP1600_Permute_Nrounds(void *state, unsigned int nrounds);
573
- # %rdi %rsi
574
- #
575
- .globl KeccakP1600_Permute_Nrounds
576
- .globl _KeccakP1600_Permute_Nrounds
577
- .ifndef old_gas_syntax
578
- .type KeccakP1600_Permute_Nrounds,@function
579
- .endif
580
- KeccakP1600_Permute_Nrounds:
581
- _KeccakP1600_Permute_Nrounds:
582
- .balign 32
583
- lea 96(%rdi),%rdi
584
- lea theta_perm(%rip),%r8
585
- kxnorw %k6,%k6,%k6
586
- kshiftrw $15,%k6,%k1
587
- kshiftrw $11,%k6,%k6
588
- kshiftlw $1,%k1,%k2
589
- kshiftlw $2,%k1,%k3
590
- kshiftlw $3,%k1,%k4
591
- kshiftlw $4,%k1,%k5
592
- vmovdqa64 64*1(%r8),%zmm13
593
- vmovdqa64 64*2(%r8),%zmm14
594
- vmovdqa64 64*3(%r8),%zmm15
595
- vmovdqa64 64*4(%r8),%zmm16
596
- vmovdqa64 64*5(%r8),%zmm27
597
- vmovdqa64 64*6(%r8),%zmm28
598
- vmovdqa64 64*7(%r8),%zmm29
599
- vmovdqa64 64*8(%r8),%zmm30
600
- vmovdqa64 64*9(%r8),%zmm31
601
- vmovdqa64 64*10(%r8),%zmm22
602
- vmovdqa64 64*11(%r8),%zmm23
603
- vmovdqa64 64*12(%r8),%zmm24
604
- vmovdqa64 64*13(%r8),%zmm25
605
- vmovdqa64 64*14(%r8),%zmm26
606
- vmovdqa64 64*15(%r8),%zmm17
607
- vmovdqa64 64*16(%r8),%zmm18
608
- vmovdqa64 64*17(%r8),%zmm19
609
- vmovdqa64 64*18(%r8),%zmm20
610
- vmovdqa64 64*19(%r8),%zmm21
611
- vmovdqu64 40*0-96(%rdi),%zmm0{%k6}{z}
612
- vmovdqu64 40*1-96(%rdi),%zmm1{%k6}{z}
613
- vmovdqu64 40*2-96(%rdi),%zmm2{%k6}{z}
614
- vmovdqu64 40*3-96(%rdi),%zmm3{%k6}{z}
615
- vmovdqu64 40*4-96(%rdi),%zmm4{%k6}{z}
616
- mov %rsi, %rax # r10 pointer in iota table
617
- lea iotas_end(%rip), %r10
618
- shl $3, %rsi
619
- sub %rsi, %r10
620
- test $1, %eax
621
- jz .KeccakP1600_Permute_Nrounds_DoubleRound
622
- # do odd round
623
- ######################################### Theta
624
- vmovdqa64 %zmm0,%zmm5 # put aside original A00
625
- vpternlogq $0x96,%zmm2,%zmm1,%zmm0 # and use it as "C00"
626
- vpternlogq $0x96,%zmm4,%zmm3,%zmm0
627
- vprolq $1,%zmm0,%zmm6
628
- vpermq %zmm0,%zmm13,%zmm0
629
- vpermq %zmm6,%zmm16,%zmm6
630
- vpternlogq $0x96,%zmm0,%zmm6,%zmm5 # T[0] is original A00
631
- vpternlogq $0x96,%zmm0,%zmm6,%zmm1
632
- vpternlogq $0x96,%zmm0,%zmm6,%zmm2
633
- vpternlogq $0x96,%zmm0,%zmm6,%zmm3
634
- vpternlogq $0x96,%zmm0,%zmm6,%zmm4
635
- ######################################### Rho
636
- vprolvq %zmm22,%zmm5,%zmm0 # T[0] is original A00
637
- vprolvq %zmm23,%zmm1,%zmm1
638
- vprolvq %zmm24,%zmm2,%zmm2
639
- vprolvq %zmm25,%zmm3,%zmm3
640
- vprolvq %zmm26,%zmm4,%zmm4
641
- ######################################### Pi
642
- vpermq %zmm0,%zmm17,%zmm0
643
- vpermq %zmm1,%zmm18,%zmm1
644
- vpermq %zmm2,%zmm19,%zmm2
645
- vpermq %zmm3,%zmm20,%zmm3
646
- vpermq %zmm4,%zmm21,%zmm4
647
- ######################################### Chi
648
- vmovdqa64 %zmm0,%zmm5
649
- vmovdqa64 %zmm1,%zmm6
650
- vpternlogq $0xD2,%zmm2,%zmm1,%zmm0
651
- vpternlogq $0xD2,%zmm3,%zmm2,%zmm1
652
- vpternlogq $0xD2,%zmm4,%zmm3,%zmm2
653
- vpternlogq $0xD2,%zmm5,%zmm4,%zmm3
654
- vpternlogq $0xD2,%zmm6,%zmm5,%zmm4
655
- ######################################### Iota
656
- vpxorq (%r10),%zmm0,%zmm0{%k1}
657
- lea 8(%r10),%r10
658
- ######################################### Harmonize single round
659
- vpermq %zmm1,%zmm13,%zmm1
660
- vpermq %zmm2,%zmm14,%zmm2
661
- vpermq %zmm3,%zmm15,%zmm3
662
- vpermq %zmm4,%zmm16,%zmm4
663
- vpblendmq %zmm1,%zmm0,%zmm5{%k2}
664
- vpblendmq %zmm2,%zmm1,%zmm6{%k2}
665
- vpblendmq %zmm3,%zmm2,%zmm7{%k2}
666
- vpblendmq %zmm4,%zmm3,%zmm8{%k2}
667
- vpblendmq %zmm0,%zmm4,%zmm9{%k2}
668
- vpblendmq %zmm2,%zmm5,%zmm5{%k3}
669
- vpblendmq %zmm3,%zmm6,%zmm6{%k3}
670
- vpblendmq %zmm4,%zmm7,%zmm7{%k3}
671
- vpblendmq %zmm0,%zmm8,%zmm8{%k3}
672
- vpblendmq %zmm1,%zmm9,%zmm9{%k3}
673
- vpblendmq %zmm3,%zmm5,%zmm5{%k4}
674
- vpblendmq %zmm4,%zmm6,%zmm6{%k4}
675
- vpblendmq %zmm0,%zmm7,%zmm7{%k4}
676
- vpblendmq %zmm1,%zmm8,%zmm8{%k4}
677
- vpblendmq %zmm2,%zmm9,%zmm9{%k4}
678
- vpblendmq %zmm0,%zmm6,%zmm6{%k5}
679
- vpblendmq %zmm4,%zmm5,%zmm0{%k5}
680
- vpblendmq %zmm1,%zmm7,%zmm7{%k5}
681
- vpblendmq %zmm2,%zmm8,%zmm8{%k5}
682
- vpblendmq %zmm3,%zmm9,%zmm9{%k5}
683
- vpermq %zmm6,%zmm13,%zmm4
684
- vpermq %zmm7,%zmm14,%zmm3
685
- vpermq %zmm8,%zmm15,%zmm2
686
- vpermq %zmm9,%zmm16,%zmm1
687
- .KeccakP1600_Permute_Nrounds_DoubleRound:
688
- shr $1, %eax
689
- jz .KeccakP1600_Permute_Nrounds_End
690
- call __KeccakF1600
691
- .KeccakP1600_Permute_Nrounds_End:
692
- vmovdqu64 %zmm0,40*0-96(%rdi){%k6}
693
- vmovdqu64 %zmm1,40*1-96(%rdi){%k6}
694
- vmovdqu64 %zmm2,40*2-96(%rdi){%k6}
695
- vmovdqu64 %zmm3,40*3-96(%rdi){%k6}
696
- vmovdqu64 %zmm4,40*4-96(%rdi){%k6}
697
- vzeroupper
698
- ret
699
- .ifndef old_gas_syntax
700
- .size KeccakP1600_Permute_Nrounds,.-KeccakP1600_Permute_Nrounds
701
- .endif
702
-
703
- # -----------------------------------------------------------------------------
704
- #
705
- # size_t KeccakF1600_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen);
706
- # %rdi %rsi %rdx %rcx
707
- #
708
- .globl KeccakF1600_FastLoop_Absorb
709
- .globl _KeccakF1600_FastLoop_Absorb
710
- .ifndef old_gas_syntax
711
- .type KeccakF1600_FastLoop_Absorb,@function
712
- .endif
713
- KeccakF1600_FastLoop_Absorb:
714
- _KeccakF1600_FastLoop_Absorb:
715
- .balign 32
716
- push %rbx
717
- push %r10
718
- shr $3, %rcx # rcx = data length in lanes
719
- mov %rdx, %rbx # rbx = initial data pointer
720
- cmp %rsi, %rcx
721
- jb KeccakF1600_FastLoop_Absorb_Exit
722
- lea 96(%rdi),%rdi
723
- lea theta_perm(%rip),%r8
724
- kxnorw %k6,%k6,%k6
725
- kshiftrw $15,%k6,%k1
726
- kshiftrw $11,%k6,%k6
727
- kshiftlw $1,%k1,%k2
728
- kshiftlw $2,%k1,%k3
729
- kshiftlw $3,%k1,%k4
730
- kshiftlw $4,%k1,%k5
731
- vmovdqa64 64*1(%r8),%zmm13
732
- vmovdqa64 64*2(%r8),%zmm14
733
- vmovdqa64 64*3(%r8),%zmm15
734
- vmovdqa64 64*4(%r8),%zmm16
735
- vmovdqa64 64*5(%r8),%zmm27
736
- vmovdqa64 64*6(%r8),%zmm28
737
- vmovdqa64 64*7(%r8),%zmm29
738
- vmovdqa64 64*8(%r8),%zmm30
739
- vmovdqa64 64*9(%r8),%zmm31
740
- vmovdqa64 64*10(%r8),%zmm22
741
- vmovdqa64 64*11(%r8),%zmm23
742
- vmovdqa64 64*12(%r8),%zmm24
743
- vmovdqa64 64*13(%r8),%zmm25
744
- vmovdqa64 64*14(%r8),%zmm26
745
- vmovdqa64 64*15(%r8),%zmm17
746
- vmovdqa64 64*16(%r8),%zmm18
747
- vmovdqa64 64*17(%r8),%zmm19
748
- vmovdqa64 64*18(%r8),%zmm20
749
- vmovdqa64 64*19(%r8),%zmm21
750
- vmovdqu64 40*0-96(%rdi),%zmm0{%k6}{z}
751
- vmovdqu64 40*1-96(%rdi),%zmm1{%k6}{z}
752
- vmovdqu64 40*2-96(%rdi),%zmm2{%k6}{z}
753
- vmovdqu64 40*3-96(%rdi),%zmm3{%k6}{z}
754
- vmovdqu64 40*4-96(%rdi),%zmm4{%k6}{z}
755
- cmp $21, %rsi
756
- jnz KeccakF1600_FastLoop_Absorb_Not21Lanes
757
- sub $21, %rcx
758
- KeccakF1600_FastLoop_Absorb_Loop21Lanes:
759
- vmovdqu64 8*0(%rdx),%zmm5{%k6}{z}
760
- vmovdqu64 8*5(%rdx),%zmm6{%k6}{z}
761
- vmovdqu64 8*10(%rdx),%zmm7{%k6}{z}
762
- vmovdqu64 8*15(%rdx),%zmm8{%k6}{z}
763
- vmovdqu64 8*20(%rdx),%zmm9{%k1}{z}
764
- vpxorq %zmm5,%zmm0,%zmm0
765
- vpxorq %zmm6,%zmm1,%zmm1
766
- vpxorq %zmm7,%zmm2,%zmm2
767
- vpxorq %zmm8,%zmm3,%zmm3
768
- vpxorq %zmm9,%zmm4,%zmm4
769
- add $21*8, %rdx
770
- lea iotas(%rip), %r10
771
- mov $12, %eax
772
- call __KeccakF1600
773
- sub $21, %rcx
774
- jnc KeccakF1600_FastLoop_Absorb_Loop21Lanes
775
- KeccakF1600_FastLoop_Absorb_SaveAndExit:
776
- vmovdqu64 %zmm0,40*0-96(%rdi){%k6}
777
- vmovdqu64 %zmm1,40*1-96(%rdi){%k6}
778
- vmovdqu64 %zmm2,40*2-96(%rdi){%k6}
779
- vmovdqu64 %zmm3,40*3-96(%rdi){%k6}
780
- vmovdqu64 %zmm4,40*4-96(%rdi){%k6}
781
- KeccakF1600_FastLoop_Absorb_Exit:
782
- vzeroupper
783
- mov %rdx, %rax # return number of bytes processed
784
- sub %rbx, %rax
785
- pop %r10
786
- pop %rbx
787
- ret
788
- KeccakF1600_FastLoop_Absorb_Not21Lanes:
789
- cmp $17, %rsi
790
- jnz KeccakF1600_FastLoop_Absorb_Not17Lanes
791
- sub $17, %rcx
792
- KeccakF1600_FastLoop_Absorb_Loop17Lanes:
793
- vmovdqu64 8*0(%rdx),%zmm5{%k6}{z}
794
- vmovdqu64 8*5(%rdx),%zmm6{%k6}{z}
795
- vmovdqu64 8*10(%rdx),%zmm7{%k6}{z}
796
- vmovdqu64 8*15(%rdx),%zmm8{%k1}{z}
797
- vmovdqu64 8*15(%rdx),%zmm8{%k2}
798
- vpxorq %zmm5,%zmm0,%zmm0
799
- vpxorq %zmm6,%zmm1,%zmm1
800
- vpxorq %zmm7,%zmm2,%zmm2
801
- vpxorq %zmm8,%zmm3,%zmm3
802
- add $17*8, %rdx
803
- lea iotas(%rip), %r10
804
- mov $12, %eax
805
- call __KeccakF1600
806
- sub $17, %rcx
807
- jnc KeccakF1600_FastLoop_Absorb_Loop17Lanes
808
- jmp KeccakF1600_FastLoop_Absorb_SaveAndExit
809
- KeccakF1600_FastLoop_Absorb_Not17Lanes:
810
- lea -96(%rdi), %rdi
811
- KeccakF1600_FastLoop_Absorb_LanesLoop:
812
- mov %rsi, %rax
813
- mov %rdi, %r10
814
- KeccakF1600_FastLoop_Absorb_LanesAddLoop:
815
- mov (%rdx), %r8
816
- add $8, %rdx
817
- xor %r8, (%r10)
818
- add $8, %r10
819
- sub $1, %rax
820
- jnz KeccakF1600_FastLoop_Absorb_LanesAddLoop
821
- sub %rsi, %rcx
822
- push %rdi
823
- push %rsi
824
- push %rdx
825
- push %rcx
826
- .ifdef no_plt
827
- call KeccakP1600_Permute_24rounds
828
- .else
829
- call KeccakP1600_Permute_24rounds@PLT
830
- .endif
831
- pop %rcx
832
- pop %rdx
833
- pop %rsi
834
- pop %rdi
835
- cmp %rsi, %rcx
836
- jae KeccakF1600_FastLoop_Absorb_LanesLoop
837
- jmp KeccakF1600_FastLoop_Absorb_Exit
838
- .ifndef old_gas_syntax
839
- .size KeccakF1600_FastLoop_Absorb,.-KeccakF1600_FastLoop_Absorb
840
- .endif
841
-
842
- # -----------------------------------------------------------------------------
843
- #
844
- # size_t KeccakP1600_12rounds_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen);
845
- # %rdi %rsi %rdx %rcx
846
- #
847
- .globl KeccakP1600_12rounds_FastLoop_Absorb
848
- .globl _KeccakP1600_12rounds_FastLoop_Absorb
849
- .ifndef old_gas_syntax
850
- .type KeccakP1600_12rounds_FastLoop_Absorb,@function
851
- .endif
852
- KeccakP1600_12rounds_FastLoop_Absorb:
853
- _KeccakP1600_12rounds_FastLoop_Absorb:
854
- .balign 32
855
- push %rbx
856
- push %r10
857
- shr $3, %rcx # rcx = data length in lanes
858
- mov %rdx, %rbx # rbx = initial data pointer
859
- cmp %rsi, %rcx
860
- jb KeccakP1600_FastLoop_Absorb_Exit
861
- lea 96(%rdi),%rdi
862
- lea theta_perm(%rip),%r8
863
- kxnorw %k6,%k6,%k6
864
- kshiftrw $15,%k6,%k1
865
- kshiftrw $11,%k6,%k6
866
- kshiftlw $1,%k1,%k2
867
- kshiftlw $2,%k1,%k3
868
- kshiftlw $3,%k1,%k4
869
- kshiftlw $4,%k1,%k5
870
- vmovdqa64 64*1(%r8),%zmm13
871
- vmovdqa64 64*2(%r8),%zmm14
872
- vmovdqa64 64*3(%r8),%zmm15
873
- vmovdqa64 64*4(%r8),%zmm16
874
- vmovdqa64 64*5(%r8),%zmm27
875
- vmovdqa64 64*6(%r8),%zmm28
876
- vmovdqa64 64*7(%r8),%zmm29
877
- vmovdqa64 64*8(%r8),%zmm30
878
- vmovdqa64 64*9(%r8),%zmm31
879
- vmovdqa64 64*10(%r8),%zmm22
880
- vmovdqa64 64*11(%r8),%zmm23
881
- vmovdqa64 64*12(%r8),%zmm24
882
- vmovdqa64 64*13(%r8),%zmm25
883
- vmovdqa64 64*14(%r8),%zmm26
884
- vmovdqa64 64*15(%r8),%zmm17
885
- vmovdqa64 64*16(%r8),%zmm18
886
- vmovdqa64 64*17(%r8),%zmm19
887
- vmovdqa64 64*18(%r8),%zmm20
888
- vmovdqa64 64*19(%r8),%zmm21
889
- vmovdqu64 40*0-96(%rdi),%zmm0{%k6}{z}
890
- vmovdqu64 40*1-96(%rdi),%zmm1{%k6}{z}
891
- vmovdqu64 40*2-96(%rdi),%zmm2{%k6}{z}
892
- vmovdqu64 40*3-96(%rdi),%zmm3{%k6}{z}
893
- vmovdqu64 40*4-96(%rdi),%zmm4{%k6}{z}
894
- cmp $21, %rsi
895
- jnz KeccakP1600_FastLoop_Absorb_Not21Lanes
896
- sub $21, %rcx
897
- KeccakP1600_FastLoop_Absorb_Loop21Lanes:
898
- vmovdqu64 8*0(%rdx),%zmm5{%k6}{z}
899
- vmovdqu64 8*5(%rdx),%zmm6{%k6}{z}
900
- vmovdqu64 8*10(%rdx),%zmm7{%k6}{z}
901
- vmovdqu64 8*15(%rdx),%zmm8{%k6}{z}
902
- vmovdqu64 8*20(%rdx),%zmm9{%k1}{z}
903
- vpxorq %zmm5,%zmm0,%zmm0
904
- vpxorq %zmm6,%zmm1,%zmm1
905
- vpxorq %zmm7,%zmm2,%zmm2
906
- vpxorq %zmm8,%zmm3,%zmm3
907
- vpxorq %zmm9,%zmm4,%zmm4
908
- add $21*8, %rdx
909
- lea iotas+12*8(%rip), %r10
910
- mov $12/2, %eax
911
- call __KeccakF1600
912
- sub $21, %rcx
913
- jnc KeccakP1600_FastLoop_Absorb_Loop21Lanes
914
- KeccakP1600_FastLoop_Absorb_SaveAndExit:
915
- vmovdqu64 %zmm0,40*0-96(%rdi){%k6}
916
- vmovdqu64 %zmm1,40*1-96(%rdi){%k6}
917
- vmovdqu64 %zmm2,40*2-96(%rdi){%k6}
918
- vmovdqu64 %zmm3,40*3-96(%rdi){%k6}
919
- vmovdqu64 %zmm4,40*4-96(%rdi){%k6}
920
- KeccakP1600_FastLoop_Absorb_Exit:
921
- vzeroupper
922
- mov %rdx, %rax # return number of bytes processed
923
- sub %rbx, %rax
924
- pop %r10
925
- pop %rbx
926
- ret
927
- KeccakP1600_FastLoop_Absorb_Not21Lanes:
928
- cmp $17, %rsi
929
- jnz KeccakP1600_FastLoop_Absorb_Not17Lanes
930
- sub $17, %rcx
931
- KeccakP1600_FastLoop_Absorb_Loop17Lanes:
932
- vmovdqu64 8*0(%rdx),%zmm5{%k6}{z}
933
- vmovdqu64 8*5(%rdx),%zmm6{%k6}{z}
934
- vmovdqu64 8*10(%rdx),%zmm7{%k6}{z}
935
- vmovdqu64 8*15(%rdx),%zmm8{%k1}{z}
936
- vmovdqu64 8*15(%rdx),%zmm8{%k2}
937
- vpxorq %zmm5,%zmm0,%zmm0
938
- vpxorq %zmm6,%zmm1,%zmm1
939
- vpxorq %zmm7,%zmm2,%zmm2
940
- vpxorq %zmm8,%zmm3,%zmm3
941
- add $17*8, %rdx
942
- lea iotas+12*8(%rip), %r10
943
- mov $12/2, %eax
944
- call __KeccakF1600
945
- sub $17, %rcx
946
- jnc KeccakP1600_FastLoop_Absorb_Loop17Lanes
947
- jmp KeccakP1600_FastLoop_Absorb_SaveAndExit
948
- KeccakP1600_FastLoop_Absorb_Not17Lanes:
949
- lea -96(%rdi), %rdi
950
- KeccakP1600_FastLoop_Absorb_LanesLoop:
951
- mov %rsi, %rax
952
- mov %rdi, %r10
953
- KeccakP1600_FastLoop_Absorb_LanesAddLoop:
954
- mov (%rdx), %r8
955
- add $8, %rdx
956
- xor %r8, (%r10)
957
- add $8, %r10
958
- sub $1, %rax
959
- jnz KeccakP1600_FastLoop_Absorb_LanesAddLoop
960
- sub %rsi, %rcx
961
- push %rdi
962
- push %rsi
963
- push %rdx
964
- push %rcx
965
- .ifdef no_plt
966
- call KeccakP1600_Permute_12rounds
967
- .else
968
- call KeccakP1600_Permute_12rounds@PLT
969
- .endif
970
- pop %rcx
971
- pop %rdx
972
- pop %rsi
973
- pop %rdi
974
- cmp %rsi, %rcx
975
- jae KeccakP1600_FastLoop_Absorb_LanesLoop
976
- jmp KeccakP1600_FastLoop_Absorb_Exit
977
- .ifndef old_gas_syntax
978
- .size KeccakP1600_12rounds_FastLoop_Absorb,.-KeccakP1600_12rounds_FastLoop_Absorb
979
- .endif
980
- .balign 64
981
- theta_perm:
982
- .quad 0, 1, 2, 3, 4, 5, 6, 7 # [not used]
983
- .quad 4, 0, 1, 2, 3, 5, 6, 7
984
- .quad 3, 4, 0, 1, 2, 5, 6, 7
985
- .quad 2, 3, 4, 0, 1, 5, 6, 7
986
- .quad 1, 2, 3, 4, 0, 5, 6, 7
987
- rhotates1:
988
- .quad 0, 44, 43, 21, 14, 0, 0, 0 # [0][0] [1][1] [2][2] [3][3] [4][4]
989
- .quad 18, 1, 6, 25, 8, 0, 0, 0 # [4][0] [0][1] [1][2] [2][3] [3][4]
990
- .quad 41, 2, 62, 55, 39, 0, 0, 0 # [3][0] [4][1] [0][2] [1][3] [2][4]
991
- .quad 3, 45, 61, 28, 20, 0, 0, 0 # [2][0] [3][1] [4][2] [0][3] [1][4]
992
- .quad 36, 10, 15, 56, 27, 0, 0, 0 # [1][0] [2][1] [3][2] [4][3] [0][4]
993
- rhotates0:
994
- .quad 0, 1, 62, 28, 27, 0, 0, 0
995
- .quad 36, 44, 6, 55, 20, 0, 0, 0
996
- .quad 3, 10, 43, 25, 39, 0, 0, 0
997
- .quad 41, 45, 15, 21, 8, 0, 0, 0
998
- .quad 18, 2, 61, 56, 14, 0, 0, 0
999
- pi0_perm:
1000
- .quad 0, 3, 1, 4, 2, 5, 6, 7
1001
- .quad 1, 4, 2, 0, 3, 5, 6, 7
1002
- .quad 2, 0, 3, 1, 4, 5, 6, 7
1003
- .quad 3, 1, 4, 2, 0, 5, 6, 7
1004
- .quad 4, 2, 0, 3, 1, 5, 6, 7
1005
- iotas:
1006
- .quad 0x0000000000000001
1007
- .quad 0x0000000000008082
1008
- .quad 0x800000000000808a
1009
- .quad 0x8000000080008000
1010
- .quad 0x000000000000808b
1011
- .quad 0x0000000080000001
1012
- .quad 0x8000000080008081
1013
- .quad 0x8000000000008009
1014
- .quad 0x000000000000008a
1015
- .quad 0x0000000000000088
1016
- .quad 0x0000000080008009
1017
- .quad 0x000000008000000a
1018
- .quad 0x000000008000808b
1019
- .quad 0x800000000000008b
1020
- .quad 0x8000000000008089
1021
- .quad 0x8000000000008003
1022
- .quad 0x8000000000008002
1023
- .quad 0x8000000000000080
1024
- .quad 0x000000000000800a
1025
- .quad 0x800000008000000a
1026
- .quad 0x8000000080008081
1027
- .quad 0x8000000000008080
1028
- .quad 0x0000000080000001
1029
- .quad 0x8000000080008008
1030
- iotas_end:
1031
- .asciz "Keccak-1600 for AVX-512F, CRYPTOGAMS by <appro@openssl.org>"