sleeping_kangaroo12 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (291) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +34 -67
  3. data/ext/Rakefile +12 -37
  4. data/ext/binding/sleeping_kangaroo12.c +1 -16
  5. data/ext/{xkcp → k12}/Makefile +0 -0
  6. data/ext/k12/Makefile.build +118 -0
  7. data/ext/k12/README.markdown +86 -0
  8. data/ext/k12/lib/ARMv8Asha3/KeccakP-1600-ARMv8Asha3.S +623 -0
  9. data/ext/k12/lib/ARMv8Asha3/KeccakP-1600-SnP.h +65 -0
  10. data/ext/k12/lib/ARMv8Asha3/KeccakP-1600-opt64.c +227 -0
  11. data/ext/{xkcp/lib/low/KeccakP-1600/compact → k12/lib/Inplace32BI}/KeccakP-1600-SnP.h +4 -9
  12. data/ext/{xkcp/lib/low/KeccakP-1600/plain-32bits-inplace → k12/lib/Inplace32BI}/KeccakP-1600-inplace32BI.c +65 -160
  13. data/ext/k12/lib/KangarooTwelve.c +332 -0
  14. data/ext/{xkcp/lib/high/KangarooTwelve → k12/lib}/KangarooTwelve.h +53 -16
  15. data/ext/{xkcp/lib/low/KeccakP-1600/AVX2 → k12/lib/Optimized64}/KeccakP-1600-AVX2.s +122 -558
  16. data/ext/k12/lib/Optimized64/KeccakP-1600-AVX512-plainC.c +241 -0
  17. data/ext/k12/lib/Optimized64/KeccakP-1600-AVX512.s +551 -0
  18. data/ext/k12/lib/Optimized64/KeccakP-1600-SnP.h +74 -0
  19. data/ext/{xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-64.macros → k12/lib/Optimized64/KeccakP-1600-opt64.c} +447 -169
  20. data/ext/k12/lib/Optimized64/KeccakP-1600-runtimeDispatch.c +406 -0
  21. data/ext/k12/lib/Optimized64/KeccakP-1600-timesN-AVX2.c +419 -0
  22. data/ext/k12/lib/Optimized64/KeccakP-1600-timesN-AVX512.c +458 -0
  23. data/ext/k12/lib/Optimized64/KeccakP-1600-timesN-SSSE3.c +438 -0
  24. data/ext/{xkcp/lib/low/KeccakP-1600/plain-64bits → k12/lib/Plain64}/KeccakP-1600-SnP.h +14 -20
  25. data/ext/{xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.h → k12/lib/Plain64/KeccakP-1600-plain64.c} +9 -8
  26. data/ext/{xkcp/lib/common → k12/lib}/align.h +3 -2
  27. data/ext/{xkcp/lib/common → k12/lib}/brg_endian.h +0 -0
  28. data/ext/{xkcp → k12}/support/Build/ExpandProducts.xsl +0 -0
  29. data/ext/{xkcp → k12}/support/Build/ToGlobalMakefile.xsl +0 -0
  30. data/ext/{xkcp → k12}/support/Build/ToOneTarget.xsl +0 -0
  31. data/ext/{xkcp → k12}/support/Build/ToTargetConfigFile.xsl +0 -0
  32. data/ext/{xkcp → k12}/support/Build/ToTargetMakefile.xsl +10 -16
  33. data/ext/{xkcp → k12}/support/Build/ToVCXProj.xsl +0 -0
  34. data/lib/sleeping_kangaroo12/version.rb +1 -1
  35. metadata +33 -276
  36. data/ext/config/xkcp.build +0 -17
  37. data/ext/xkcp/LICENSE +0 -1
  38. data/ext/xkcp/Makefile.build +0 -200
  39. data/ext/xkcp/README.markdown +0 -296
  40. data/ext/xkcp/lib/HighLevel.build +0 -143
  41. data/ext/xkcp/lib/LowLevel.build +0 -757
  42. data/ext/xkcp/lib/high/KangarooTwelve/KangarooTwelve.c +0 -301
  43. data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.c +0 -81
  44. data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.h +0 -125
  45. data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.c +0 -48
  46. data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.h +0 -79
  47. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.c +0 -81
  48. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.h +0 -73
  49. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.inc +0 -195
  50. data/ext/xkcp/lib/high/Keccak/KeccakSponge.c +0 -111
  51. data/ext/xkcp/lib/high/Keccak/KeccakSponge.h +0 -76
  52. data/ext/xkcp/lib/high/Keccak/KeccakSponge.inc +0 -314
  53. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.c +0 -61
  54. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.h +0 -67
  55. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.inc +0 -128
  56. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.c +0 -93
  57. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.h +0 -599
  58. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.inc +0 -573
  59. data/ext/xkcp/lib/high/Ketje/Ketjev2.c +0 -87
  60. data/ext/xkcp/lib/high/Ketje/Ketjev2.h +0 -88
  61. data/ext/xkcp/lib/high/Ketje/Ketjev2.inc +0 -274
  62. data/ext/xkcp/lib/high/Keyak/Keyakv2.c +0 -132
  63. data/ext/xkcp/lib/high/Keyak/Keyakv2.h +0 -217
  64. data/ext/xkcp/lib/high/Keyak/Keyakv2.inc +0 -81
  65. data/ext/xkcp/lib/high/Keyak/Motorist.inc +0 -953
  66. data/ext/xkcp/lib/high/Kravatte/Kravatte.c +0 -533
  67. data/ext/xkcp/lib/high/Kravatte/Kravatte.h +0 -115
  68. data/ext/xkcp/lib/high/Kravatte/KravatteModes.c +0 -557
  69. data/ext/xkcp/lib/high/Kravatte/KravatteModes.h +0 -247
  70. data/ext/xkcp/lib/high/Xoodyak/Cyclist.h +0 -66
  71. data/ext/xkcp/lib/high/Xoodyak/Cyclist.inc +0 -336
  72. data/ext/xkcp/lib/high/Xoodyak/Xoodyak-parameters.h +0 -26
  73. data/ext/xkcp/lib/high/Xoodyak/Xoodyak.c +0 -55
  74. data/ext/xkcp/lib/high/Xoodyak/Xoodyak.h +0 -35
  75. data/ext/xkcp/lib/high/Xoofff/Xoofff.c +0 -634
  76. data/ext/xkcp/lib/high/Xoofff/Xoofff.h +0 -147
  77. data/ext/xkcp/lib/high/Xoofff/XoofffModes.c +0 -483
  78. data/ext/xkcp/lib/high/Xoofff/XoofffModes.h +0 -241
  79. data/ext/xkcp/lib/high/common/Phases.h +0 -25
  80. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-SnP.h +0 -41
  81. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-armcc.s +0 -1666
  82. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-gcc.s +0 -1655
  83. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-armcc.s +0 -1268
  84. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-gcc.s +0 -1264
  85. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-armcc.s +0 -1178
  86. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-gcc.s +0 -1175
  87. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-armcc.s +0 -1338
  88. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-gcc.s +0 -1336
  89. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-armcc.s +0 -1343
  90. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-gcc.s +0 -1339
  91. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-SnP.h +0 -42
  92. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-armcc.s +0 -823
  93. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-gcc.s +0 -831
  94. data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-SnP.h +0 -31
  95. data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-armv8a-neon.s +0 -540
  96. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-SnP.h +0 -42
  97. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-compact.s +0 -733
  98. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-fast.s +0 -1121
  99. data/ext/xkcp/lib/low/KeccakP-1600/AVX2/KeccakP-1600-SnP.h +0 -52
  100. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-AVX512.c +0 -623
  101. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-SnP.h +0 -47
  102. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u12/KeccakP-1600-AVX512-config.h +0 -6
  103. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u6/KeccakP-1600-AVX512-config.h +0 -6
  104. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/ua/KeccakP-1600-AVX512-config.h +0 -6
  105. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-AVX512.s +0 -1031
  106. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-SnP.h +0 -53
  107. data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-SnP.h +0 -44
  108. data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-XOP.c +0 -476
  109. data/ext/xkcp/lib/low/KeccakP-1600/XOP/u6/KeccakP-1600-XOP-config.h +0 -6
  110. data/ext/xkcp/lib/low/KeccakP-1600/XOP/ua/KeccakP-1600-XOP-config.h +0 -6
  111. data/ext/xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-unrolling.macros +0 -305
  112. data/ext/xkcp/lib/low/KeccakP-1600/compact/KeccakP-1600-compact64.c +0 -420
  113. data/ext/xkcp/lib/low/KeccakP-1600/plain-32bits-inplace/KeccakP-1600-SnP.h +0 -43
  114. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/KeccakP-1600-opt64.c +0 -565
  115. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcu6/KeccakP-1600-opt64-config.h +0 -7
  116. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua/KeccakP-1600-opt64-config.h +0 -7
  117. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua-shld/KeccakP-1600-opt64-config.h +0 -8
  118. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/u6/KeccakP-1600-opt64-config.h +0 -6
  119. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/ua/KeccakP-1600-opt64-config.h +0 -6
  120. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-SnP.h +0 -44
  121. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference.h +0 -23
  122. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference32BI.c +0 -625
  123. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-SnP.h +0 -44
  124. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.c +0 -440
  125. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-SnP.h +0 -42
  126. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas.s +0 -1196
  127. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas_Apple.s +0 -1124
  128. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-shld-gas.s +0 -1196
  129. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-armcc.s +0 -1392
  130. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-gcc.s +0 -1394
  131. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-times2-SnP.h +0 -42
  132. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u12/SIMD512-2-config.h +0 -7
  133. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u4/SIMD512-2-config.h +0 -7
  134. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512ufull/SIMD512-2-config.h +0 -7
  135. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SIMD512.c +0 -850
  136. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SnP.h +0 -51
  137. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SIMD128.c +0 -957
  138. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SnP.h +0 -49
  139. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-u2/SIMD128-config.h +0 -8
  140. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-ua/SIMD128-config.h +0 -8
  141. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-u2/SIMD128-config.h +0 -9
  142. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-ua/SIMD128-config.h +0 -9
  143. data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-SnP.h +0 -45
  144. data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-on1.c +0 -37
  145. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SIMD256.c +0 -1321
  146. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SnP.h +0 -55
  147. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u12/SIMD256-config.h +0 -7
  148. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u6/SIMD256-config.h +0 -7
  149. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/ua/SIMD256-config.h +0 -7
  150. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u12/SIMD512-4-config.h +0 -7
  151. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u4/SIMD512-4-config.h +0 -7
  152. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512ufull/SIMD512-4-config.h +0 -7
  153. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SIMD512.c +0 -881
  154. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SnP.h +0 -51
  155. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-SnP.h +0 -45
  156. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-on1.c +0 -37
  157. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-SnP.h +0 -45
  158. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-on2.c +0 -38
  159. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SIMD512.c +0 -1615
  160. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SnP.h +0 -57
  161. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u12/SIMD512-config.h +0 -7
  162. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u4/SIMD512-config.h +0 -7
  163. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/ua/SIMD512-config.h +0 -7
  164. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-SnP.h +0 -45
  165. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-on1.c +0 -37
  166. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-SnP.h +0 -45
  167. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-on2.c +0 -38
  168. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-SnP.h +0 -45
  169. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-on4.c +0 -38
  170. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-SnP.h +0 -41
  171. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-armcc.s +0 -442
  172. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-gcc.s +0 -446
  173. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-armcc.s +0 -419
  174. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-gcc.s +0 -427
  175. data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-SnP.h +0 -41
  176. data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-avr8-fast.s +0 -647
  177. data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-SnP.h +0 -39
  178. data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-compact.c +0 -190
  179. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-SnP.h +0 -43
  180. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.c +0 -412
  181. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.h +0 -23
  182. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-SnP.h +0 -41
  183. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-armcc.s +0 -454
  184. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-gcc.s +0 -458
  185. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-armcc.s +0 -455
  186. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-gcc.s +0 -458
  187. data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-SnP.h +0 -41
  188. data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-avr8-fast.s +0 -728
  189. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-SnP.h +0 -43
  190. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.c +0 -414
  191. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.h +0 -23
  192. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-SnP.h +0 -42
  193. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-armcc.s +0 -527
  194. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-gcc.s +0 -533
  195. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-armcc.s +0 -528
  196. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-gcc.s +0 -534
  197. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-armcc.s +0 -521
  198. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-gcc.s +0 -527
  199. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-armcc.s +0 -517
  200. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-gcc.s +0 -523
  201. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-armcc.s +0 -550
  202. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-gcc.s +0 -556
  203. data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-SnP.h +0 -32
  204. data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-armv8a-neon.s +0 -432
  205. data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-SnP.h +0 -42
  206. data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-avr8-fast.s +0 -929
  207. data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-SnP.h +0 -40
  208. data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-compact.c +0 -244
  209. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-SnP.h +0 -46
  210. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32-bis.macros +0 -184
  211. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.c +0 -454
  212. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.macros +0 -459
  213. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling-bis.macros +0 -83
  214. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling.macros +0 -88
  215. data/ext/xkcp/lib/low/KeccakP-800/plain/lcu2/KeccakP-800-opt32-config.h +0 -7
  216. data/ext/xkcp/lib/low/KeccakP-800/plain/lcua/KeccakP-800-opt32-config.h +0 -7
  217. data/ext/xkcp/lib/low/KeccakP-800/plain/u2/KeccakP-800-opt32-config.h +0 -7
  218. data/ext/xkcp/lib/low/KeccakP-800/plain/ua/KeccakP-800-opt32-config.h +0 -7
  219. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-SnP.h +0 -44
  220. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.c +0 -437
  221. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.h +0 -23
  222. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/Ket.h +0 -57
  223. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-armcc.s +0 -475
  224. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-gcc.s +0 -480
  225. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-armcc.s +0 -590
  226. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-gcc.s +0 -590
  227. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.c +0 -126
  228. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.h +0 -68
  229. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.inc +0 -174
  230. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.c +0 -80
  231. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.h +0 -68
  232. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.inc +0 -142
  233. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-SnP.h +0 -55
  234. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-armcc.s +0 -1086
  235. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-gcc.s +0 -1092
  236. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-armcc.s +0 -721
  237. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-gcc.s +0 -726
  238. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-armcc.s +0 -723
  239. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-gcc.s +0 -729
  240. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-armcc.s +0 -1164
  241. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-gcc.s +0 -1165
  242. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-armcc.s +0 -562
  243. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-gcc.s +0 -563
  244. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-armcc.s +0 -563
  245. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-gcc.s +0 -565
  246. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-SnP.h +0 -55
  247. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-armcc.s +0 -476
  248. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-gcc.s +0 -485
  249. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-armcc.s +0 -362
  250. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-gcc.s +0 -367
  251. data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-SnP.h +0 -43
  252. data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-avr8-u1.s +0 -1341
  253. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SIMD512.c +0 -581
  254. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SnP.h +0 -58
  255. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodyak-full-block-SIMD512.c +0 -332
  256. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SIMD128.c +0 -329
  257. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SnP.h +0 -53
  258. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodyak-full-block-SIMD128.c +0 -355
  259. data/ext/xkcp/lib/low/Xoodoo/Xoodoo.h +0 -79
  260. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-SnP.h +0 -56
  261. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-optimized.c +0 -399
  262. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodyak-full-blocks.c +0 -127
  263. data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-SnP.h +0 -43
  264. data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-reference.c +0 -253
  265. data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SIMD512.c +0 -1044
  266. data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SnP.h +0 -49
  267. data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-SnP.h +0 -45
  268. data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-on1.c +0 -37
  269. data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-ARMv7A.s +0 -1587
  270. data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-SnP.h +0 -48
  271. data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SIMD512.c +0 -1202
  272. data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SnP.h +0 -48
  273. data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SIMD128.c +0 -484
  274. data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SnP.h +0 -44
  275. data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-SnP.h +0 -45
  276. data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-on1.c +0 -37
  277. data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SIMD256.c +0 -939
  278. data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SnP.h +0 -49
  279. data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SIMD512.c +0 -1216
  280. data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SnP.h +0 -48
  281. data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-SnP.h +0 -45
  282. data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-on1.c +0 -37
  283. data/ext/xkcp/lib/low/common/PlSnP-Fallback.inc +0 -290
  284. data/ext/xkcp/lib/low/common/SnP-Relaned.h +0 -141
  285. data/ext/xkcp/support/Kernel-PMU/Kernel-pmu.md +0 -133
  286. data/ext/xkcp/support/Kernel-PMU/Makefile +0 -8
  287. data/ext/xkcp/support/Kernel-PMU/enable_arm_pmu.c +0 -129
  288. data/ext/xkcp/support/Kernel-PMU/load-module +0 -1
  289. data/ext/xkcp/util/KeccakSum/KeccakSum.c +0 -394
  290. data/ext/xkcp/util/KeccakSum/base64.c +0 -86
  291. data/ext/xkcp/util/KeccakSum/base64.h +0 -12
@@ -1,6 +1,3 @@
1
- # The eXtended Keccak Code Package (XKCP)
2
- # https://github.com/XKCP/XKCP
3
- #
4
1
  # Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org>
5
2
  # Copyright (c) 2017 Ronny Van Keer
6
3
  # All rights reserved.
@@ -14,22 +11,22 @@
14
11
  # (https://github.com/dot-asm/cryptogams/blob/master/x86_64/keccak1600-avx2.pl).
15
12
  # The rest of the code was written by Ronny Van Keer.
16
13
  # Adaptations for macOS by Stéphane Léon.
17
- # Adaptations for mingw-w64 (changes macOS too) by Jorrit Jongma.
18
14
 
19
15
  .text
20
16
 
21
17
  # -----------------------------------------------------------------------------
22
18
  #
23
- # void KeccakP1600_Initialize(void *state);
19
+ # void KeccakP1600_AVX2_Initialize(void *state);
24
20
  #
25
- .globl KeccakP1600_Initialize
26
- .globl _KeccakP1600_Initialize
27
- .ifndef old_gas_syntax
28
- .type KeccakP1600_Initialize,@function
21
+ .ifdef macOS
22
+ .globl _KeccakP1600_AVX2_Initialize
23
+ _KeccakP1600_AVX2_Initialize:
24
+ .else
25
+ .globl KeccakP1600_AVX2_Initialize
26
+ .type KeccakP1600_AVX2_Initialize,@function
27
+ KeccakP1600_AVX2_Initialize:
29
28
  .endif
30
- KeccakP1600_Initialize:
31
- _KeccakP1600_Initialize:
32
- .balign 32
29
+ .balign 32
33
30
  vpxor %ymm0,%ymm0,%ymm0
34
31
  vmovdqu %ymm0,0*32(%rdi)
35
32
  vmovdqu %ymm0,1*32(%rdi)
@@ -39,22 +36,24 @@ _KeccakP1600_Initialize:
39
36
  vmovdqu %ymm0,5*32(%rdi)
40
37
  movq $0,6*32(%rdi)
41
38
  ret
42
- .ifndef old_gas_syntax
43
- .size KeccakP1600_Initialize,.-KeccakP1600_Initialize
39
+ .ifdef macOS
40
+ .else
41
+ .size KeccakP1600_AVX2_Initialize,.-KeccakP1600_AVX2_Initialize
44
42
  .endif
45
43
 
46
44
  # -----------------------------------------------------------------------------
47
45
  #
48
- # void KeccakP1600_AddByte(void *state, unsigned char data, unsigned int offset);
46
+ # void KeccakP1600_AVX2_AddByte(void *state, unsigned char data, unsigned int offset);
49
47
  # %rdi %rsi %rdx
50
48
  #
51
- .globl KeccakP1600_AddByte
52
- .globl _KeccakP1600_AddByte
53
- .ifndef old_gas_syntax
54
- .type KeccakP1600_AddByte,@function
49
+ .ifdef macOS
50
+ .globl _KeccakP1600_AVX2_AddByte
51
+ _KeccakP1600_AVX2_AddByte:
52
+ .else
53
+ .globl KeccakP1600_AVX2_AddByte
54
+ .type KeccakP1600_AVX2_AddByte,@function
55
+ KeccakP1600_AVX2_AddByte:
55
56
  .endif
56
- KeccakP1600_AddByte:
57
- _KeccakP1600_AddByte:
58
57
  .balign 32
59
58
  mov %rdx, %rax
60
59
  and $7, %rax
@@ -65,31 +64,33 @@ _KeccakP1600_AddByte:
65
64
  add %rax, %rdi
66
65
  xorb %sil, (%rdi)
67
66
  ret
68
- .ifndef old_gas_syntax
69
- .size KeccakP1600_AddByte,.-KeccakP1600_AddByte
67
+ .ifdef macOS
68
+ .else
69
+ .size KeccakP1600_AVX2_AddByte,.-KeccakP1600_AVX2_AddByte
70
70
  .endif
71
71
 
72
72
  # -----------------------------------------------------------------------------
73
73
  #
74
- # void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
74
+ # void KeccakP1600_AVX2_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
75
75
  # %rdi %rsi %rdx %rcx
76
76
  #
77
- .globl KeccakP1600_AddBytes
78
- .globl _KeccakP1600_AddBytes
79
- .ifndef old_gas_syntax
80
- .type KeccakP1600_AddBytes,@function
77
+ .ifdef macOS
78
+ .globl _KeccakP1600_AVX2_AddBytes
79
+ _KeccakP1600_AVX2_AddBytes:
80
+ .else
81
+ .globl KeccakP1600_AVX2_AddBytes
82
+ .type KeccakP1600_AVX2_AddBytes,@function
83
+ KeccakP1600_AVX2_AddBytes:
81
84
  .endif
82
- KeccakP1600_AddBytes:
83
- _KeccakP1600_AddBytes:
84
85
  .balign 32
85
86
  cmp $0, %rcx
86
- jz KeccakP1600_AddBytes_Exit
87
+ jz KeccakP1600_AVX2_AddBytes_Exit
87
88
  mov %rdx, %rax # rax offset in lane
88
89
  and $0xFFFFFFF8, %edx # rdx pointer into state index mapper
89
90
  lea mapState(%rip), %r9
90
91
  add %r9, %rdx
91
92
  and $7, %rax
92
- jz KeccakP1600_AddBytes_LaneAlignedCheck
93
+ jz KeccakP1600_AVX2_AddBytes_LaneAlignedCheck
93
94
  mov $8, %r9 # r9 is (max) length of incomplete lane
94
95
  sub %rax, %r9
95
96
  cmp %rcx, %r9
@@ -98,170 +99,66 @@ _KeccakP1600_AddBytes:
98
99
  add (%rdx), %rax # rax = pointer to state lane
99
100
  add $8, %rdx
100
101
  add %rdi, %rax
101
- KeccakP1600_AddBytes_NotAlignedLoop:
102
+ KeccakP1600_AVX2_AddBytes_NotAlignedLoop:
102
103
  mov (%rsi), %r8b
103
104
  inc %rsi
104
105
  xorb %r8b, (%rax)
105
106
  inc %rax
106
107
  dec %r9
107
- jnz KeccakP1600_AddBytes_NotAlignedLoop
108
- jmp KeccakP1600_AddBytes_LaneAlignedCheck
109
- KeccakP1600_AddBytes_LaneAlignedLoop:
108
+ jnz KeccakP1600_AVX2_AddBytes_NotAlignedLoop
109
+ jmp KeccakP1600_AVX2_AddBytes_LaneAlignedCheck
110
+ KeccakP1600_AVX2_AddBytes_LaneAlignedLoop:
110
111
  mov (%rsi), %r8
111
112
  add $8, %rsi
112
113
  mov (%rdx), %rax
113
114
  add $8, %rdx
114
115
  add %rdi, %rax
115
116
  xor %r8, (%rax)
116
- KeccakP1600_AddBytes_LaneAlignedCheck:
117
+ KeccakP1600_AVX2_AddBytes_LaneAlignedCheck:
117
118
  sub $8, %rcx
118
- jnc KeccakP1600_AddBytes_LaneAlignedLoop
119
- KeccakP1600_AddBytes_LastIncompleteLane:
119
+ jnc KeccakP1600_AVX2_AddBytes_LaneAlignedLoop
120
+ KeccakP1600_AVX2_AddBytes_LastIncompleteLane:
120
121
  add $8, %rcx
121
- jz KeccakP1600_AddBytes_Exit
122
+ jz KeccakP1600_AVX2_AddBytes_Exit
122
123
  mov (%rdx), %rax
123
124
  add %rdi, %rax
124
- KeccakP1600_AddBytes_LastIncompleteLaneLoop:
125
+ KeccakP1600_AVX2_AddBytes_LastIncompleteLaneLoop:
125
126
  mov (%rsi), %r8b
126
127
  inc %rsi
127
128
  xor %r8b, (%rax)
128
129
  inc %rax
129
130
  dec %rcx
130
- jnz KeccakP1600_AddBytes_LastIncompleteLaneLoop
131
- KeccakP1600_AddBytes_Exit:
131
+ jnz KeccakP1600_AVX2_AddBytes_LastIncompleteLaneLoop
132
+ KeccakP1600_AVX2_AddBytes_Exit:
132
133
  ret
133
- .ifndef old_gas_syntax
134
- .size KeccakP1600_AddBytes,.-KeccakP1600_AddBytes
135
- .endif
136
-
137
- # -----------------------------------------------------------------------------
138
- #
139
- # void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
140
- # %rdi %rsi %rdx %rcx
141
- #
142
- .globl KeccakP1600_OverwriteBytes
143
- .globl _KeccakP1600_OverwriteBytes
144
- .ifndef old_gas_syntax
145
- .type KeccakP1600_OverwriteBytes,@function
146
- .endif
147
- KeccakP1600_OverwriteBytes:
148
- _KeccakP1600_OverwriteBytes:
149
- .balign 32
150
- cmp $0, %rcx
151
- jz KeccakP1600_OverwriteBytes_Exit
152
- mov %rdx, %rax # rax offset in lane
153
- and $0xFFFFFFF8, %edx # rdx pointer into state index mapper
154
- lea mapState(%rip), %r9
155
- add %r9, %rdx
156
- and $7, %rax
157
- jz KeccakP1600_OverwriteBytes_LaneAlignedCheck
158
- mov $8, %r9 # r9 is (max) length of incomplete lane
159
- sub %rax, %r9
160
- cmp %rcx, %r9
161
- cmovae %rcx, %r9
162
- sub %r9, %rcx # length -= length of incomplete lane
163
- add (%rdx), %rax # rax = pointer to state lane
164
- add $8, %rdx
165
- add %rdi, %rax
166
- KeccakP1600_OverwriteBytes_NotAlignedLoop:
167
- mov (%rsi), %r8b
168
- inc %rsi
169
- mov %r8b, (%rax)
170
- inc %rax
171
- dec %r9
172
- jnz KeccakP1600_OverwriteBytes_NotAlignedLoop
173
- jmp KeccakP1600_OverwriteBytes_LaneAlignedCheck
174
- KeccakP1600_OverwriteBytes_LaneAlignedLoop:
175
- mov (%rsi), %r8
176
- add $8, %rsi
177
- mov (%rdx), %rax
178
- add $8, %rdx
179
- add %rdi, %rax
180
- mov %r8, (%rax)
181
- KeccakP1600_OverwriteBytes_LaneAlignedCheck:
182
- sub $8, %rcx
183
- jnc KeccakP1600_OverwriteBytes_LaneAlignedLoop
184
- KeccakP1600_OverwriteBytes_LastIncompleteLane:
185
- add $8, %rcx
186
- jz KeccakP1600_OverwriteBytes_Exit
187
- mov (%rdx), %rax
188
- add %rdi, %rax
189
- KeccakP1600_OverwriteBytes_LastIncompleteLaneLoop:
190
- mov (%rsi), %r8b
191
- inc %rsi
192
- mov %r8b, (%rax)
193
- inc %rax
194
- dec %rcx
195
- jnz KeccakP1600_OverwriteBytes_LastIncompleteLaneLoop
196
- KeccakP1600_OverwriteBytes_Exit:
197
- ret
198
- .ifndef old_gas_syntax
199
- .size KeccakP1600_OverwriteBytes,.-KeccakP1600_OverwriteBytes
200
- .endif
201
-
202
- # -----------------------------------------------------------------------------
203
- #
204
- # void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount);
205
- # %rdi %rsi
206
- #
207
- .globl KeccakP1600_OverwriteWithZeroes
208
- .globl _KeccakP1600_OverwriteWithZeroes
209
- .ifndef old_gas_syntax
210
- .type KeccakP1600_OverwriteWithZeroes,@function
211
- .endif
212
- KeccakP1600_OverwriteWithZeroes:
213
- _KeccakP1600_OverwriteWithZeroes:
214
- .balign 32
215
- cmp $0, %rsi
216
- jz KeccakP1600_OverwriteWithZeroes_Exit
217
- lea mapState(%rip), %rdx # rdx pointer into state index mapper
218
- jmp KeccakP1600_OverwriteWithZeroes_LaneAlignedCheck
219
- KeccakP1600_OverwriteWithZeroes_LaneAlignedLoop:
220
- mov (%rdx), %rax
221
- add $8, %rdx
222
- add %rdi, %rax
223
- movq $0, (%rax)
224
- KeccakP1600_OverwriteWithZeroes_LaneAlignedCheck:
225
- sub $8, %rsi
226
- jnc KeccakP1600_OverwriteWithZeroes_LaneAlignedLoop
227
- KeccakP1600_OverwriteWithZeroes_LastIncompleteLane:
228
- add $8, %rsi
229
- jz KeccakP1600_OverwriteWithZeroes_Exit
230
- mov (%rdx), %rax
231
- add %rdi, %rax
232
- KeccakP1600_OverwriteWithZeroes_LastIncompleteLaneLoop:
233
- movb $0, (%rax)
234
- inc %rax
235
- dec %rsi
236
- jnz KeccakP1600_OverwriteWithZeroes_LastIncompleteLaneLoop
237
- KeccakP1600_OverwriteWithZeroes_Exit:
238
- ret
239
- .ifndef old_gas_syntax
240
- .size KeccakP1600_OverwriteWithZeroes,.-KeccakP1600_OverwriteWithZeroes
134
+ .ifdef macOS
135
+ .else
136
+ .size KeccakP1600_AVX2_AddBytes,.-KeccakP1600_AVX2_AddBytes
241
137
  .endif
242
138
 
243
139
  # -----------------------------------------------------------------------------
244
140
  #
245
- # void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length);
141
+ # void KeccakP1600_AVX2_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length);
246
142
  # %rdi %rsi %rdx %rcx
247
143
  #
248
- .globl KeccakP1600_ExtractBytes
249
- .globl _KeccakP1600_ExtractBytes
250
- .ifndef old_gas_syntax
251
- .type KeccakP1600_ExtractBytes,@function
144
+ .ifdef macOS
145
+ .globl _KeccakP1600_AVX2_ExtractBytes
146
+ _KeccakP1600_AVX2_ExtractBytes:
147
+ .else
148
+ .globl KeccakP1600_AVX2_ExtractBytes
149
+ .type KeccakP1600_AVX2_ExtractBytes,@function
150
+ KeccakP1600_AVX2_ExtractBytes:
252
151
  .endif
253
- KeccakP1600_ExtractBytes:
254
- _KeccakP1600_ExtractBytes:
255
- .balign 32
152
+ .balign 32
256
153
  push %rbx
257
154
  cmp $0, %rcx
258
- jz KeccakP1600_ExtractBytes_Exit
155
+ jz KeccakP1600_AVX2_ExtractBytes_Exit
259
156
  mov %rdx, %rax # rax offset in lane
260
157
  and $0xFFFFFFF8, %edx # rdx pointer into state index mapper
261
158
  lea mapState(%rip), %r9
262
159
  add %r9, %rdx
263
160
  and $7, %rax
264
- jz KeccakP1600_ExtractBytes_LaneAlignedCheck
161
+ jz KeccakP1600_AVX2_ExtractBytes_LaneAlignedCheck
265
162
  mov $8, %rbx # rbx is (max) length of incomplete lane
266
163
  sub %rax, %rbx
267
164
  cmp %rcx, %rbx
@@ -271,127 +168,53 @@ _KeccakP1600_ExtractBytes:
271
168
  add $8, %rdx
272
169
  add %rdi, %r9
273
170
  add %rax, %r9
274
- KeccakP1600_ExtractBytes_NotAlignedLoop:
171
+ KeccakP1600_AVX2_ExtractBytes_NotAlignedLoop:
275
172
  mov (%r9), %r8b
276
173
  inc %r9
277
174
  mov %r8b, (%rsi)
278
175
  inc %rsi
279
176
  dec %rbx
280
- jnz KeccakP1600_ExtractBytes_NotAlignedLoop
281
- jmp KeccakP1600_ExtractBytes_LaneAlignedCheck
282
- KeccakP1600_ExtractBytes_LaneAlignedLoop:
177
+ jnz KeccakP1600_AVX2_ExtractBytes_NotAlignedLoop
178
+ jmp KeccakP1600_AVX2_ExtractBytes_LaneAlignedCheck
179
+ KeccakP1600_AVX2_ExtractBytes_LaneAlignedLoop:
283
180
  mov (%rdx), %rax
284
181
  add $8, %rdx
285
182
  add %rdi, %rax
286
183
  mov (%rax), %r8
287
184
  mov %r8, (%rsi)
288
185
  add $8, %rsi
289
- KeccakP1600_ExtractBytes_LaneAlignedCheck:
186
+ KeccakP1600_AVX2_ExtractBytes_LaneAlignedCheck:
290
187
  sub $8, %rcx
291
- jnc KeccakP1600_ExtractBytes_LaneAlignedLoop
292
- KeccakP1600_ExtractBytes_LastIncompleteLane:
188
+ jnc KeccakP1600_AVX2_ExtractBytes_LaneAlignedLoop
189
+ KeccakP1600_AVX2_ExtractBytes_LastIncompleteLane:
293
190
  add $8, %rcx
294
- jz KeccakP1600_ExtractBytes_Exit
191
+ jz KeccakP1600_AVX2_ExtractBytes_Exit
295
192
  mov (%rdx), %rax
296
193
  add %rdi, %rax
297
194
  mov (%rax), %r8
298
- KeccakP1600_ExtractBytes_LastIncompleteLaneLoop:
195
+ KeccakP1600_AVX2_ExtractBytes_LastIncompleteLaneLoop:
299
196
  mov %r8b, (%rsi)
300
197
  shr $8, %r8
301
198
  inc %rsi
302
199
  dec %rcx
303
- jnz KeccakP1600_ExtractBytes_LastIncompleteLaneLoop
304
- KeccakP1600_ExtractBytes_Exit:
200
+ jnz KeccakP1600_AVX2_ExtractBytes_LastIncompleteLaneLoop
201
+ KeccakP1600_AVX2_ExtractBytes_Exit:
305
202
  pop %rbx
306
203
  ret
307
- .ifndef old_gas_syntax
308
- .size KeccakP1600_ExtractBytes,.-KeccakP1600_ExtractBytes
309
- .endif
310
-
311
- # -----------------------------------------------------------------------------
312
- #
313
- # void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length);
314
- # %rdi %rsi %rdx %rcx %r8
315
- #
316
- .globl KeccakP1600_ExtractAndAddBytes
317
- .globl _KeccakP1600_ExtractAndAddBytes
318
- .ifndef old_gas_syntax
319
- .type KeccakP1600_ExtractAndAddBytes,@function
320
- .endif
321
- KeccakP1600_ExtractAndAddBytes:
322
- _KeccakP1600_ExtractAndAddBytes:
323
- .balign 32
324
- push %rbx
325
- push %r10
326
- cmp $0, %r8
327
- jz KeccakP1600_ExtractAndAddBytes_Exit
328
- mov %rcx, %rax # rax offset in lane
329
- and $0xFFFFFFF8, %ecx # rcx pointer into state index mapper
330
- lea mapState(%rip), %r9
331
- add %r9, %rcx
332
- and $7, %rax
333
- jz KeccakP1600_ExtractAndAddBytes_LaneAlignedCheck
334
- mov $8, %rbx # rbx is (max) length of incomplete lane
335
- sub %rax, %rbx
336
- cmp %r8, %rbx
337
- cmovae %r8, %rbx
338
- sub %rbx, %r8 # length -= length of incomplete lane
339
- mov (%rcx), %r9
340
- add $8, %rcx
341
- add %rdi, %r9
342
- add %rax, %r9
343
- KeccakP1600_ExtractAndAddBytes_NotAlignedLoop:
344
- mov (%r9), %r10b
345
- inc %r9
346
- xor (%rsi), %r10b
347
- inc %rsi
348
- mov %r10b, (%rdx)
349
- inc %rdx
350
- dec %rbx
351
- jnz KeccakP1600_ExtractAndAddBytes_NotAlignedLoop
352
- jmp KeccakP1600_ExtractAndAddBytes_LaneAlignedCheck
353
- KeccakP1600_ExtractAndAddBytes_LaneAlignedLoop:
354
- mov (%rcx), %rax
355
- add $8, %rcx
356
- add %rdi, %rax
357
- mov (%rax), %r10
358
- xor (%rsi), %r10
359
- add $8, %rsi
360
- mov %r10, (%rdx)
361
- add $8, %rdx
362
- KeccakP1600_ExtractAndAddBytes_LaneAlignedCheck:
363
- sub $8, %r8
364
- jnc KeccakP1600_ExtractAndAddBytes_LaneAlignedLoop
365
- KeccakP1600_ExtractAndAddBytes_LastIncompleteLane:
366
- add $8, %r8
367
- jz KeccakP1600_ExtractAndAddBytes_Exit
368
- mov (%rcx), %rax
369
- add %rdi, %rax
370
- mov (%rax), %r10
371
- KeccakP1600_ExtractAndAddBytes_LastIncompleteLaneLoop:
372
- xor (%rsi), %r10b
373
- inc %rsi
374
- mov %r10b, (%rdx)
375
- inc %rdx
376
- shr $8, %r10
377
- dec %r8
378
- jnz KeccakP1600_ExtractAndAddBytes_LastIncompleteLaneLoop
379
- KeccakP1600_ExtractAndAddBytes_Exit:
380
- pop %r10
381
- pop %rbx
382
- ret
383
- .ifndef old_gas_syntax
384
- .size KeccakP1600_ExtractAndAddBytes,.-KeccakP1600_ExtractAndAddBytes
204
+ .ifdef macOS
205
+ .else
206
+ .size KeccakP1600_AVX2_ExtractBytes,.-KeccakP1600_AVX2_ExtractBytes
385
207
  .endif
386
208
 
387
209
  # -----------------------------------------------------------------------------
388
210
  #
389
211
  # internal
390
212
  #
391
- .ifndef old_gas_syntax
213
+ .ifdef macOS
214
+ .else
392
215
  .type __KeccakF1600,@function
393
216
  .endif
394
- .balign 32
217
+ .balign 32
395
218
  __KeccakF1600:
396
219
  .Loop_avx2:
397
220
  ######################################### Theta
@@ -530,63 +353,22 @@ __KeccakF1600:
530
353
  dec %eax
531
354
  jnz .Loop_avx2
532
355
  ret
533
- .ifndef old_gas_syntax
356
+ .ifdef macOS
357
+ .else
534
358
  .size __KeccakF1600,.-__KeccakF1600
535
359
  .endif
536
360
 
537
- # -----------------------------------------------------------------------------
538
- #
539
- # void KeccakP1600_Permute_24rounds(void *state);
540
- # %rdi
541
- #
542
- .globl KeccakP1600_Permute_24rounds
543
- .globl _KeccakP1600_Permute_24rounds
544
- .ifndef old_gas_syntax
545
- .type KeccakP1600_Permute_24rounds,@function
546
- .endif
547
- KeccakP1600_Permute_24rounds:
548
- _KeccakP1600_Permute_24rounds:
549
- .balign 32
550
- lea rhotates_left+96(%rip),%r8
551
- lea rhotates_right+96(%rip),%r9
552
- lea iotas(%rip),%r10
553
- mov $24,%eax
554
- lea 96(%rdi),%rdi
555
- vzeroupper
556
- vpbroadcastq -96(%rdi),%ymm0 # load A[5][5]
557
- vmovdqu 8+32*0-96(%rdi),%ymm1
558
- vmovdqu 8+32*1-96(%rdi),%ymm2
559
- vmovdqu 8+32*2-96(%rdi),%ymm3
560
- vmovdqu 8+32*3-96(%rdi),%ymm4
561
- vmovdqu 8+32*4-96(%rdi),%ymm5
562
- vmovdqu 8+32*5-96(%rdi),%ymm6
563
- call __KeccakF1600
564
- vmovq %xmm0,-96(%rdi)
565
- vmovdqu %ymm1,8+32*0-96(%rdi)
566
- vmovdqu %ymm2,8+32*1-96(%rdi)
567
- vmovdqu %ymm3,8+32*2-96(%rdi)
568
- vmovdqu %ymm4,8+32*3-96(%rdi)
569
- vmovdqu %ymm5,8+32*4-96(%rdi)
570
- vmovdqu %ymm6,8+32*5-96(%rdi)
571
- vzeroupper
572
- ret
573
- .ifndef old_gas_syntax
574
- .size KeccakP1600_Permute_24rounds,.-KeccakP1600_Permute_24rounds
575
- .endif
576
361
 
577
- # -----------------------------------------------------------------------------
578
- #
579
- # void KeccakP1600_Permute_12rounds(void *state);
580
- # %rdi
581
- #
582
- .globl KeccakP1600_Permute_12rounds
583
- .globl _KeccakP1600_Permute_12rounds
584
- .ifndef old_gas_syntax
585
- .type KeccakP1600_Permute_12rounds,@function
362
+
363
+ .ifdef macOS
364
+ .globl _KeccakP1600_AVX2_Permute_12rounds
365
+ _KeccakP1600_AVX2_Permute_12rounds:
366
+ .else
367
+ .globl KeccakP1600_AVX2_Permute_12rounds
368
+ .type KeccakP1600_AVX2_Permute_12rounds,@function
369
+ KeccakP1600_AVX2_Permute_12rounds:
586
370
  .endif
587
- KeccakP1600_Permute_12rounds:
588
- _KeccakP1600_Permute_12rounds:
589
- .balign 32
371
+ .balign 32
590
372
  lea rhotates_left+96(%rip),%r8
591
373
  lea rhotates_right+96(%rip),%r9
592
374
  lea iotas+12*4*8(%rip),%r10
@@ -610,253 +392,34 @@ _KeccakP1600_Permute_12rounds:
610
392
  vmovdqu %ymm6,8+32*5-96(%rdi)
611
393
  vzeroupper
612
394
  ret
613
- .ifndef old_gas_syntax
614
- .size KeccakP1600_Permute_12rounds,.-KeccakP1600_Permute_12rounds
615
- .endif
616
-
617
- # -----------------------------------------------------------------------------
618
- #
619
- # void KeccakP1600_Permute_Nrounds(void *state, unsigned int nrounds);
620
- # %rdi %rsi
621
- #
622
- .globl KeccakP1600_Permute_Nrounds
623
- .globl _KeccakP1600_Permute_Nrounds
624
- .ifndef old_gas_syntax
625
- .type KeccakP1600_Permute_Nrounds,@function
626
- .endif
627
- KeccakP1600_Permute_Nrounds:
628
- _KeccakP1600_Permute_Nrounds:
629
- .balign 32
630
- lea rhotates_left+96(%rip),%r8
631
- lea rhotates_right+96(%rip),%r9
632
- lea iotas+24*4*8(%rip),%r10
633
- mov %rsi,%rax
634
- shl $2+3,%rsi
635
- sub %rsi, %r10
636
- lea 96(%rdi),%rdi
637
- vzeroupper
638
- vpbroadcastq -96(%rdi),%ymm0 # load A[5][5]
639
- vmovdqu 8+32*0-96(%rdi),%ymm1
640
- vmovdqu 8+32*1-96(%rdi),%ymm2
641
- vmovdqu 8+32*2-96(%rdi),%ymm3
642
- vmovdqu 8+32*3-96(%rdi),%ymm4
643
- vmovdqu 8+32*4-96(%rdi),%ymm5
644
- vmovdqu 8+32*5-96(%rdi),%ymm6
645
- call __KeccakF1600
646
- vmovq %xmm0,-96(%rdi)
647
- vmovdqu %ymm1,8+32*0-96(%rdi)
648
- vmovdqu %ymm2,8+32*1-96(%rdi)
649
- vmovdqu %ymm3,8+32*2-96(%rdi)
650
- vmovdqu %ymm4,8+32*3-96(%rdi)
651
- vmovdqu %ymm5,8+32*4-96(%rdi)
652
- vmovdqu %ymm6,8+32*5-96(%rdi)
653
- vzeroupper
654
- ret
655
- .ifndef old_gas_syntax
656
- .size KeccakP1600_Permute_Nrounds,.-KeccakP1600_Permute_Nrounds
657
- .endif
658
-
659
- # -----------------------------------------------------------------------------
660
- #
661
- # size_t KeccakF1600_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen);
662
- # %rdi %rsi %rdx %rcx
663
- #
664
- .globl KeccakF1600_FastLoop_Absorb
665
- .globl _KeccakF1600_FastLoop_Absorb
666
- .ifndef old_gas_syntax
667
- .type KeccakF1600_FastLoop_Absorb,@function
668
- .endif
669
- KeccakF1600_FastLoop_Absorb:
670
- _KeccakF1600_FastLoop_Absorb:
671
- .balign 32
672
- push %rbx
673
- push %r10
674
- shr $3, %rcx # rcx = data length in lanes
675
- mov %rdx, %rbx # rbx = initial data pointer
676
- cmp %rsi, %rcx
677
- jb KeccakF1600_FastLoop_Absorb_Exit
678
- vzeroupper
679
- cmp $21, %rsi
680
- jnz KeccakF1600_FastLoop_Absorb_Not21Lanes
681
- sub $21, %rcx
682
- lea rhotates_left+96(%rip),%r8
683
- lea rhotates_right+96(%rip),%r9
684
- lea 96(%rdi),%rdi
685
- vpbroadcastq -96(%rdi),%ymm0 # load A[5][5]
686
- vmovdqu 8+32*0-96(%rdi),%ymm1
687
- vmovdqu 8+32*1-96(%rdi),%ymm2
688
- vmovdqu 8+32*2-96(%rdi),%ymm3
689
- vmovdqu 8+32*3-96(%rdi),%ymm4
690
- vmovdqu 8+32*4-96(%rdi),%ymm5
691
- vmovdqu 8+32*5-96(%rdi),%ymm6
692
- KeccakF1600_FastLoop_Absorb_Loop21Lanes:
693
- vpbroadcastq (%rdx),%ymm7
694
- vmovdqu 8(%rdx),%ymm8
695
-
696
- vmovdqa map2(%rip), %xmm15
697
- vpcmpeqd %ymm14, %ymm14, %ymm14
698
- vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm9
699
-
700
- vmovdqa mask3_21(%rip), %ymm14
701
- vpxor %ymm10, %ymm10, %ymm10
702
- vmovdqa map3(%rip), %xmm15
703
- vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm10
704
-
705
- vmovdqa mask4_21(%rip), %ymm14
706
- vpxor %ymm11, %ymm11, %ymm11
707
- vmovdqa map4(%rip), %xmm15
708
- vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm11
709
-
710
- vmovdqa mask5_21(%rip), %ymm14
711
- vpxor %ymm12, %ymm12, %ymm12
712
- vmovdqa map5(%rip), %xmm15
713
- vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm12
714
-
715
- vmovdqa mask6_21(%rip), %ymm14
716
- vpxor %ymm13, %ymm13, %ymm13
717
- vmovdqa map6(%rip), %xmm15
718
- vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm13
719
-
720
- vpxor %ymm7,%ymm0,%ymm0
721
- vpxor %ymm8,%ymm1,%ymm1
722
- vpxor %ymm9,%ymm2,%ymm2
723
- vpxor %ymm10,%ymm3,%ymm3
724
- vpxor %ymm11,%ymm4,%ymm4
725
- vpxor %ymm12,%ymm5,%ymm5
726
- vpxor %ymm13,%ymm6,%ymm6
727
- add $21*8, %rdx
728
- lea iotas(%rip),%r10
729
- mov $24,%eax
730
- call __KeccakF1600
731
- sub $21, %rcx
732
- jnc KeccakF1600_FastLoop_Absorb_Loop21Lanes
733
- KeccakF1600_FastLoop_Absorb_SaveAndExit:
734
- vmovq %xmm0,-96(%rdi)
735
- vmovdqu %ymm1,8+32*0-96(%rdi)
736
- vmovdqu %ymm2,8+32*1-96(%rdi)
737
- vmovdqu %ymm3,8+32*2-96(%rdi)
738
- vmovdqu %ymm4,8+32*3-96(%rdi)
739
- vmovdqu %ymm5,8+32*4-96(%rdi)
740
- vmovdqu %ymm6,8+32*5-96(%rdi)
741
- KeccakF1600_FastLoop_Absorb_Exit:
742
- vzeroupper
743
- mov %rdx, %rax # return number of bytes processed
744
- sub %rbx, %rax
745
- pop %r10
746
- pop %rbx
747
- ret
748
- KeccakF1600_FastLoop_Absorb_Not21Lanes:
749
- cmp $17, %rsi
750
- jnz KeccakF1600_FastLoop_Absorb_Not17Lanes
751
- sub $17, %rcx
752
- lea rhotates_left+96(%rip),%r8
753
- lea rhotates_right+96(%rip),%r9
754
- lea 96(%rdi),%rdi
755
- vpbroadcastq -96(%rdi),%ymm0 # load A[5][5]
756
- vmovdqu 8+32*0-96(%rdi),%ymm1
757
- vmovdqu 8+32*1-96(%rdi),%ymm2
758
- vmovdqu 8+32*2-96(%rdi),%ymm3
759
- vmovdqu 8+32*3-96(%rdi),%ymm4
760
- vmovdqu 8+32*4-96(%rdi),%ymm5
761
- vmovdqu 8+32*5-96(%rdi),%ymm6
762
- KeccakF1600_FastLoop_Absorb_Loop17Lanes:
763
- vpbroadcastq (%rdx),%ymm7
764
- vmovdqu 8(%rdx),%ymm8
765
-
766
- vmovdqa mask2_17(%rip), %ymm14
767
- vpxor %ymm9, %ymm9, %ymm9
768
- vmovdqa map2(%rip), %xmm15
769
- vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm9
770
-
771
- vmovdqa mask3_17(%rip), %ymm14
772
- vpxor %ymm10, %ymm10, %ymm10
773
- vmovdqa map3(%rip), %xmm15
774
- vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm10
775
-
776
- vmovdqa mask4_17(%rip), %ymm14
777
- vpxor %ymm11, %ymm11, %ymm11
778
- vmovdqa map4(%rip), %xmm15
779
- vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm11
780
-
781
- vmovdqa mask5_17(%rip), %ymm14
782
- vpxor %ymm12, %ymm12, %ymm12
783
- vmovdqa map5(%rip), %xmm15
784
- vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm12
785
-
786
- vmovdqa mask6_17(%rip), %ymm14
787
- vpxor %ymm13, %ymm13, %ymm13
788
- vmovdqa map6(%rip), %xmm15
789
- vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm13
790
-
791
- vpxor %ymm7,%ymm0,%ymm0
792
- vpxor %ymm8,%ymm1,%ymm1
793
- vpxor %ymm9,%ymm2,%ymm2
794
- vpxor %ymm10,%ymm3,%ymm3
795
- vpxor %ymm11,%ymm4,%ymm4
796
- vpxor %ymm12,%ymm5,%ymm5
797
- vpxor %ymm13,%ymm6,%ymm6
798
- add $17*8, %rdx
799
- lea iotas(%rip),%r10
800
- mov $24,%eax
801
- call __KeccakF1600
802
- sub $17, %rcx
803
- jnc KeccakF1600_FastLoop_Absorb_Loop17Lanes
804
- jmp KeccakF1600_FastLoop_Absorb_SaveAndExit
805
- KeccakF1600_FastLoop_Absorb_Not17Lanes:
806
- lea mapState(%rip), %r9
807
- mov %rsi, %rax
808
- KeccakF1600_FastLoop_Absorb_LanesAddLoop:
809
- mov (%rdx), %r8
810
- add $8, %rdx
811
- mov (%r9), %r10
812
- add $8, %r9
813
- add %rdi, %r10
814
- xor %r8, (%r10)
815
- sub $1, %rax
816
- jnz KeccakF1600_FastLoop_Absorb_LanesAddLoop
817
- sub %rsi, %rcx
818
- push %rdi
819
- push %rsi
820
- push %rdx
821
- push %rcx
822
- .ifdef no_plt
823
- call KeccakP1600_Permute_24rounds
395
+ .ifdef macOS
824
396
  .else
825
- call KeccakP1600_Permute_24rounds@PLT
826
- .endif
827
- pop %rcx
828
- pop %rdx
829
- pop %rsi
830
- pop %rdi
831
- cmp %rsi, %rcx
832
- jae KeccakF1600_FastLoop_Absorb_Not17Lanes
833
- jmp KeccakF1600_FastLoop_Absorb_Exit
834
- .ifndef old_gas_syntax
835
- .size KeccakF1600_FastLoop_Absorb,.-KeccakF1600_FastLoop_Absorb
397
+ .size KeccakP1600_AVX2_Permute_12rounds,.-KeccakP1600_AVX2_Permute_12rounds
836
398
  .endif
837
399
 
838
400
  # -----------------------------------------------------------------------------
839
401
  #
840
- # size_t KeccakP1600_12rounds_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen);
402
+ # size_t KeccakP1600_AVX2_12rounds_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen);
841
403
  # %rdi %rsi %rdx %rcx
842
404
  #
843
- .globl KeccakP1600_12rounds_FastLoop_Absorb
844
- .globl _KeccakP1600_12rounds_FastLoop_Absorb
845
- .ifndef old_gas_syntax
846
- .type KeccakP1600_12rounds_FastLoop_Absorb,@function
405
+ .ifdef macOS
406
+ .globl _KeccakP1600_AVX2_12rounds_FastLoop_Absorb
407
+ _KeccakP1600_AVX2_12rounds_FastLoop_Absorb:
408
+ .else
409
+ .globl KeccakP1600_AVX2_12rounds_FastLoop_Absorb
410
+ .type KeccakP1600_AVX2_12rounds_FastLoop_Absorb,@function
411
+ KeccakP1600_AVX2_12rounds_FastLoop_Absorb:
847
412
  .endif
848
- KeccakP1600_12rounds_FastLoop_Absorb:
849
- _KeccakP1600_12rounds_FastLoop_Absorb:
850
- .balign 32
413
+ .balign 32
851
414
  push %rbx
852
415
  push %r10
853
416
  shr $3, %rcx # rcx = data length in lanes
854
417
  mov %rdx, %rbx # rbx = initial data pointer
855
418
  cmp %rsi, %rcx
856
- jb KeccakP1600_12rounds_FastLoop_Absorb_Exit
419
+ jb KeccakP1600_AVX2_12rounds_FastLoop_Absorb_Exit
857
420
  vzeroupper
858
421
  cmp $21, %rsi
859
- jnz KeccakP1600_12rounds_FastLoop_Absorb_Not21Lanes
422
+ jnz KeccakP1600_AVX2_12rounds_FastLoop_Absorb_Not21Lanes
860
423
  sub $21, %rcx
861
424
  lea rhotates_left+96(%rip),%r8
862
425
  lea rhotates_right+96(%rip),%r9
@@ -868,7 +431,7 @@ _KeccakP1600_12rounds_FastLoop_Absorb:
868
431
  vmovdqu 8+32*3-96(%rdi),%ymm4
869
432
  vmovdqu 8+32*4-96(%rdi),%ymm5
870
433
  vmovdqu 8+32*5-96(%rdi),%ymm6
871
- KeccakP1600_12rounds_FastLoop_Absorb_Loop21Lanes:
434
+ KeccakP1600_AVX2_12rounds_FastLoop_Absorb_Loop21Lanes:
872
435
  vpbroadcastq (%rdx),%ymm7
873
436
  vmovdqu 8(%rdx),%ymm8
874
437
 
@@ -908,8 +471,8 @@ KeccakP1600_12rounds_FastLoop_Absorb_Loop21Lanes:
908
471
  mov $12,%eax
909
472
  call __KeccakF1600
910
473
  sub $21, %rcx
911
- jnc KeccakP1600_12rounds_FastLoop_Absorb_Loop21Lanes
912
- KeccakP1600_12rounds_FastLoop_Absorb_SaveAndExit:
474
+ jnc KeccakP1600_AVX2_12rounds_FastLoop_Absorb_Loop21Lanes
475
+ KeccakP1600_AVX2_12rounds_FastLoop_Absorb_SaveAndExit:
913
476
  vmovq %xmm0,-96(%rdi)
914
477
  vmovdqu %ymm1,8+32*0-96(%rdi)
915
478
  vmovdqu %ymm2,8+32*1-96(%rdi)
@@ -917,16 +480,16 @@ KeccakP1600_12rounds_FastLoop_Absorb_SaveAndExit:
917
480
  vmovdqu %ymm4,8+32*3-96(%rdi)
918
481
  vmovdqu %ymm5,8+32*4-96(%rdi)
919
482
  vmovdqu %ymm6,8+32*5-96(%rdi)
920
- KeccakP1600_12rounds_FastLoop_Absorb_Exit:
483
+ KeccakP1600_AVX2_12rounds_FastLoop_Absorb_Exit:
921
484
  vzeroupper
922
485
  mov %rdx, %rax # return number of bytes processed
923
486
  sub %rbx, %rax
924
487
  pop %r10
925
488
  pop %rbx
926
489
  ret
927
- KeccakP1600_12rounds_FastLoop_Absorb_Not21Lanes:
490
+ KeccakP1600_AVX2_12rounds_FastLoop_Absorb_Not21Lanes:
928
491
  cmp $17, %rsi
929
- jnz KeccakP1600_12rounds_FastLoop_Absorb_Not17Lanes
492
+ jnz KeccakP1600_AVX2_12rounds_FastLoop_Absorb_Not17Lanes
930
493
  sub $17, %rcx
931
494
  lea rhotates_left+96(%rip),%r8
932
495
  lea rhotates_right+96(%rip),%r9
@@ -938,7 +501,7 @@ KeccakP1600_12rounds_FastLoop_Absorb_Not21Lanes:
938
501
  vmovdqu 8+32*3-96(%rdi),%ymm4
939
502
  vmovdqu 8+32*4-96(%rdi),%ymm5
940
503
  vmovdqu 8+32*5-96(%rdi),%ymm6
941
- KeccakP1600_12rounds_FastLoop_Absorb_Loop17Lanes:
504
+ KeccakP1600_AVX2_12rounds_FastLoop_Absorb_Loop17Lanes:
942
505
  vpbroadcastq (%rdx),%ymm7
943
506
  vmovdqu 8(%rdx),%ymm8
944
507
 
@@ -979,12 +542,12 @@ KeccakP1600_12rounds_FastLoop_Absorb_Loop17Lanes:
979
542
  mov $12,%eax
980
543
  call __KeccakF1600
981
544
  sub $17, %rcx
982
- jnc KeccakP1600_12rounds_FastLoop_Absorb_Loop17Lanes
983
- jmp KeccakP1600_12rounds_FastLoop_Absorb_SaveAndExit
984
- KeccakP1600_12rounds_FastLoop_Absorb_Not17Lanes:
545
+ jnc KeccakP1600_AVX2_12rounds_FastLoop_Absorb_Loop17Lanes
546
+ jmp KeccakP1600_AVX2_12rounds_FastLoop_Absorb_SaveAndExit
547
+ KeccakP1600_AVX2_12rounds_FastLoop_Absorb_Not17Lanes:
985
548
  lea mapState(%rip), %r9
986
549
  mov %rsi, %rax
987
- KeccakP1600_12rounds_FastLoop_Absorb_LanesAddLoop:
550
+ KeccakP1600_AVX2_12rounds_FastLoop_Absorb_LanesAddLoop:
988
551
  mov (%rdx), %r8
989
552
  add $8, %rdx
990
553
  mov (%r9), %r10
@@ -992,31 +555,32 @@ KeccakP1600_12rounds_FastLoop_Absorb_LanesAddLoop:
992
555
  add %rdi, %r10
993
556
  xor %r8, (%r10)
994
557
  sub $1, %rax
995
- jnz KeccakP1600_12rounds_FastLoop_Absorb_LanesAddLoop
558
+ jnz KeccakP1600_AVX2_12rounds_FastLoop_Absorb_LanesAddLoop
996
559
  sub %rsi, %rcx
997
560
  push %rdi
998
561
  push %rsi
999
562
  push %rdx
1000
563
  push %rcx
1001
- .ifdef no_plt
1002
- call KeccakP1600_Permute_12rounds
564
+ .ifdef macOS
565
+ call _KeccakP1600_AVX2_Permute_12rounds
1003
566
  .else
1004
- call KeccakP1600_Permute_12rounds@PLT
567
+ call KeccakP1600_AVX2_Permute_12rounds@PLT
1005
568
  .endif
1006
569
  pop %rcx
1007
570
  pop %rdx
1008
571
  pop %rsi
1009
572
  pop %rdi
1010
573
  cmp %rsi, %rcx
1011
- jae KeccakP1600_12rounds_FastLoop_Absorb_Not17Lanes
1012
- jmp KeccakP1600_12rounds_FastLoop_Absorb_Exit
1013
- .ifndef old_gas_syntax
1014
- .size KeccakP1600_12rounds_FastLoop_Absorb,.-KeccakP1600_12rounds_FastLoop_Absorb
574
+ jae KeccakP1600_AVX2_12rounds_FastLoop_Absorb_Not17Lanes
575
+ jmp KeccakP1600_AVX2_12rounds_FastLoop_Absorb_Exit
576
+ .ifdef macOS
577
+ .else
578
+ .size KeccakP1600_AVX2_12rounds_FastLoop_Absorb,.-KeccakP1600_AVX2_12rounds_FastLoop_Absorb
1015
579
  .endif
1016
580
 
1017
581
  .equ ALLON, 0xFFFFFFFFFFFFFFFF
1018
582
 
1019
- .balign 64
583
+ .balign 64
1020
584
  rhotates_left:
1021
585
  .quad 3, 18, 36, 41 # [2][0] [4][0] [1][0] [3][0]
1022
586
  .quad 1, 62, 28, 27 # [0][1] [0][2] [0][3] [0][4]
@@ -1064,7 +628,7 @@ mapState:
1064
628
  .quad 8*8, 9*8, 18*8, 23*8, 16*8
1065
629
  .quad 6*8, 17*8, 14*8, 11*8, 24*8
1066
630
 
1067
- .balign 16
631
+ .balign 16
1068
632
  map2:
1069
633
  .long 10*8, 20*8, 5*8, 15*8
1070
634
  map3:
@@ -1076,7 +640,7 @@ map5:
1076
640
  map6:
1077
641
  .long 6*8, 12*8, 18*8, 24*8
1078
642
 
1079
- .balign 32
643
+ .balign 32
1080
644
  mask3_21:
1081
645
  .quad ALLON, ALLON, 0, ALLON
1082
646
  mask4_21: