sleeping_kangaroo12 0.0.1 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (296) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE.md +27 -0
  3. data/README.md +48 -53
  4. data/ext/Rakefile +12 -37
  5. data/ext/binding/sleeping_kangaroo12.c +1 -16
  6. data/ext/{xkcp → k12}/Makefile +0 -0
  7. data/ext/k12/Makefile.build +118 -0
  8. data/ext/k12/README.markdown +86 -0
  9. data/ext/k12/lib/ARMv8Asha3/KeccakP-1600-ARMv8Asha3.S +623 -0
  10. data/ext/k12/lib/ARMv8Asha3/KeccakP-1600-SnP.h +65 -0
  11. data/ext/k12/lib/ARMv8Asha3/KeccakP-1600-opt64.c +227 -0
  12. data/ext/{xkcp/lib/low/KeccakP-1600/compact → k12/lib/Inplace32BI}/KeccakP-1600-SnP.h +4 -9
  13. data/ext/{xkcp/lib/low/KeccakP-1600/plain-32bits-inplace → k12/lib/Inplace32BI}/KeccakP-1600-inplace32BI.c +65 -160
  14. data/ext/k12/lib/KangarooTwelve.c +332 -0
  15. data/ext/{xkcp/lib/high/KangarooTwelve → k12/lib}/KangarooTwelve.h +53 -16
  16. data/ext/{xkcp/lib/low/KeccakP-1600/AVX2 → k12/lib/Optimized64}/KeccakP-1600-AVX2.s +122 -558
  17. data/ext/k12/lib/Optimized64/KeccakP-1600-AVX512-plainC.c +241 -0
  18. data/ext/k12/lib/Optimized64/KeccakP-1600-AVX512.s +551 -0
  19. data/ext/k12/lib/Optimized64/KeccakP-1600-SnP.h +74 -0
  20. data/ext/{xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-64.macros → k12/lib/Optimized64/KeccakP-1600-opt64.c} +447 -169
  21. data/ext/k12/lib/Optimized64/KeccakP-1600-runtimeDispatch.c +406 -0
  22. data/ext/k12/lib/Optimized64/KeccakP-1600-timesN-AVX2.c +419 -0
  23. data/ext/k12/lib/Optimized64/KeccakP-1600-timesN-AVX512.c +458 -0
  24. data/ext/k12/lib/Optimized64/KeccakP-1600-timesN-SSSE3.c +438 -0
  25. data/ext/{xkcp/lib/low/KeccakP-1600/plain-64bits → k12/lib/Plain64}/KeccakP-1600-SnP.h +14 -20
  26. data/ext/{xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.h → k12/lib/Plain64/KeccakP-1600-plain64.c} +9 -8
  27. data/ext/{xkcp/lib/common → k12/lib}/align.h +3 -2
  28. data/ext/{xkcp/lib/common → k12/lib}/brg_endian.h +0 -0
  29. data/ext/{xkcp → k12}/support/Build/ExpandProducts.xsl +0 -0
  30. data/ext/{xkcp → k12}/support/Build/ToGlobalMakefile.xsl +0 -0
  31. data/ext/{xkcp → k12}/support/Build/ToOneTarget.xsl +0 -0
  32. data/ext/{xkcp → k12}/support/Build/ToTargetConfigFile.xsl +0 -0
  33. data/ext/{xkcp → k12}/support/Build/ToTargetMakefile.xsl +10 -16
  34. data/ext/{xkcp → k12}/support/Build/ToVCXProj.xsl +0 -0
  35. data/lib/sleeping_kangaroo12/binding.rb +2 -1
  36. data/lib/sleeping_kangaroo12/build/loader.rb +1 -0
  37. data/lib/sleeping_kangaroo12/build/platform.rb +1 -0
  38. data/lib/sleeping_kangaroo12/digest.rb +38 -4
  39. data/lib/sleeping_kangaroo12/version.rb +1 -1
  40. metadata +48 -288
  41. data/ext/config/xkcp.build +0 -17
  42. data/ext/xkcp/LICENSE +0 -1
  43. data/ext/xkcp/Makefile.build +0 -200
  44. data/ext/xkcp/README.markdown +0 -296
  45. data/ext/xkcp/lib/HighLevel.build +0 -143
  46. data/ext/xkcp/lib/LowLevel.build +0 -757
  47. data/ext/xkcp/lib/high/KangarooTwelve/KangarooTwelve.c +0 -301
  48. data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.c +0 -81
  49. data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.h +0 -125
  50. data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.c +0 -48
  51. data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.h +0 -79
  52. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.c +0 -81
  53. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.h +0 -73
  54. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.inc +0 -195
  55. data/ext/xkcp/lib/high/Keccak/KeccakSponge.c +0 -111
  56. data/ext/xkcp/lib/high/Keccak/KeccakSponge.h +0 -76
  57. data/ext/xkcp/lib/high/Keccak/KeccakSponge.inc +0 -314
  58. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.c +0 -61
  59. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.h +0 -67
  60. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.inc +0 -128
  61. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.c +0 -93
  62. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.h +0 -599
  63. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.inc +0 -573
  64. data/ext/xkcp/lib/high/Ketje/Ketjev2.c +0 -87
  65. data/ext/xkcp/lib/high/Ketje/Ketjev2.h +0 -88
  66. data/ext/xkcp/lib/high/Ketje/Ketjev2.inc +0 -274
  67. data/ext/xkcp/lib/high/Keyak/Keyakv2.c +0 -132
  68. data/ext/xkcp/lib/high/Keyak/Keyakv2.h +0 -217
  69. data/ext/xkcp/lib/high/Keyak/Keyakv2.inc +0 -81
  70. data/ext/xkcp/lib/high/Keyak/Motorist.inc +0 -953
  71. data/ext/xkcp/lib/high/Kravatte/Kravatte.c +0 -533
  72. data/ext/xkcp/lib/high/Kravatte/Kravatte.h +0 -115
  73. data/ext/xkcp/lib/high/Kravatte/KravatteModes.c +0 -557
  74. data/ext/xkcp/lib/high/Kravatte/KravatteModes.h +0 -247
  75. data/ext/xkcp/lib/high/Xoodyak/Cyclist.h +0 -66
  76. data/ext/xkcp/lib/high/Xoodyak/Cyclist.inc +0 -336
  77. data/ext/xkcp/lib/high/Xoodyak/Xoodyak-parameters.h +0 -26
  78. data/ext/xkcp/lib/high/Xoodyak/Xoodyak.c +0 -55
  79. data/ext/xkcp/lib/high/Xoodyak/Xoodyak.h +0 -35
  80. data/ext/xkcp/lib/high/Xoofff/Xoofff.c +0 -634
  81. data/ext/xkcp/lib/high/Xoofff/Xoofff.h +0 -147
  82. data/ext/xkcp/lib/high/Xoofff/XoofffModes.c +0 -483
  83. data/ext/xkcp/lib/high/Xoofff/XoofffModes.h +0 -241
  84. data/ext/xkcp/lib/high/common/Phases.h +0 -25
  85. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-SnP.h +0 -41
  86. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-armcc.s +0 -1666
  87. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-gcc.s +0 -1655
  88. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-armcc.s +0 -1268
  89. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-gcc.s +0 -1264
  90. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-armcc.s +0 -1178
  91. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-gcc.s +0 -1175
  92. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-armcc.s +0 -1338
  93. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-gcc.s +0 -1336
  94. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-armcc.s +0 -1343
  95. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-gcc.s +0 -1339
  96. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-SnP.h +0 -42
  97. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-armcc.s +0 -823
  98. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-gcc.s +0 -831
  99. data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-SnP.h +0 -31
  100. data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-armv8a-neon.s +0 -540
  101. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-SnP.h +0 -42
  102. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-compact.s +0 -733
  103. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-fast.s +0 -1121
  104. data/ext/xkcp/lib/low/KeccakP-1600/AVX2/KeccakP-1600-SnP.h +0 -52
  105. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-AVX512.c +0 -623
  106. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-SnP.h +0 -47
  107. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u12/KeccakP-1600-AVX512-config.h +0 -6
  108. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u6/KeccakP-1600-AVX512-config.h +0 -6
  109. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/ua/KeccakP-1600-AVX512-config.h +0 -6
  110. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-AVX512.s +0 -1031
  111. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-SnP.h +0 -53
  112. data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-SnP.h +0 -44
  113. data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-XOP.c +0 -476
  114. data/ext/xkcp/lib/low/KeccakP-1600/XOP/u6/KeccakP-1600-XOP-config.h +0 -6
  115. data/ext/xkcp/lib/low/KeccakP-1600/XOP/ua/KeccakP-1600-XOP-config.h +0 -6
  116. data/ext/xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-unrolling.macros +0 -305
  117. data/ext/xkcp/lib/low/KeccakP-1600/compact/KeccakP-1600-compact64.c +0 -420
  118. data/ext/xkcp/lib/low/KeccakP-1600/plain-32bits-inplace/KeccakP-1600-SnP.h +0 -43
  119. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/KeccakP-1600-opt64.c +0 -565
  120. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcu6/KeccakP-1600-opt64-config.h +0 -7
  121. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua/KeccakP-1600-opt64-config.h +0 -7
  122. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua-shld/KeccakP-1600-opt64-config.h +0 -8
  123. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/u6/KeccakP-1600-opt64-config.h +0 -6
  124. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/ua/KeccakP-1600-opt64-config.h +0 -6
  125. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-SnP.h +0 -44
  126. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference.h +0 -23
  127. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference32BI.c +0 -625
  128. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-SnP.h +0 -44
  129. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.c +0 -440
  130. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-SnP.h +0 -42
  131. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas.s +0 -1196
  132. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas_Apple.s +0 -1124
  133. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-shld-gas.s +0 -1196
  134. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-armcc.s +0 -1392
  135. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-gcc.s +0 -1394
  136. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-times2-SnP.h +0 -42
  137. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u12/SIMD512-2-config.h +0 -7
  138. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u4/SIMD512-2-config.h +0 -7
  139. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512ufull/SIMD512-2-config.h +0 -7
  140. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SIMD512.c +0 -850
  141. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SnP.h +0 -51
  142. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SIMD128.c +0 -957
  143. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SnP.h +0 -49
  144. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-u2/SIMD128-config.h +0 -8
  145. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-ua/SIMD128-config.h +0 -8
  146. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-u2/SIMD128-config.h +0 -9
  147. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-ua/SIMD128-config.h +0 -9
  148. data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-SnP.h +0 -45
  149. data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-on1.c +0 -37
  150. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SIMD256.c +0 -1321
  151. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SnP.h +0 -55
  152. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u12/SIMD256-config.h +0 -7
  153. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u6/SIMD256-config.h +0 -7
  154. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/ua/SIMD256-config.h +0 -7
  155. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u12/SIMD512-4-config.h +0 -7
  156. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u4/SIMD512-4-config.h +0 -7
  157. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512ufull/SIMD512-4-config.h +0 -7
  158. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SIMD512.c +0 -881
  159. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SnP.h +0 -51
  160. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-SnP.h +0 -45
  161. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-on1.c +0 -37
  162. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-SnP.h +0 -45
  163. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-on2.c +0 -38
  164. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SIMD512.c +0 -1615
  165. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SnP.h +0 -57
  166. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u12/SIMD512-config.h +0 -7
  167. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u4/SIMD512-config.h +0 -7
  168. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/ua/SIMD512-config.h +0 -7
  169. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-SnP.h +0 -45
  170. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-on1.c +0 -37
  171. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-SnP.h +0 -45
  172. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-on2.c +0 -38
  173. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-SnP.h +0 -45
  174. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-on4.c +0 -38
  175. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-SnP.h +0 -41
  176. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-armcc.s +0 -442
  177. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-gcc.s +0 -446
  178. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-armcc.s +0 -419
  179. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-gcc.s +0 -427
  180. data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-SnP.h +0 -41
  181. data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-avr8-fast.s +0 -647
  182. data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-SnP.h +0 -39
  183. data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-compact.c +0 -190
  184. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-SnP.h +0 -43
  185. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.c +0 -412
  186. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.h +0 -23
  187. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-SnP.h +0 -41
  188. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-armcc.s +0 -454
  189. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-gcc.s +0 -458
  190. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-armcc.s +0 -455
  191. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-gcc.s +0 -458
  192. data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-SnP.h +0 -41
  193. data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-avr8-fast.s +0 -728
  194. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-SnP.h +0 -43
  195. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.c +0 -414
  196. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.h +0 -23
  197. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-SnP.h +0 -42
  198. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-armcc.s +0 -527
  199. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-gcc.s +0 -533
  200. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-armcc.s +0 -528
  201. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-gcc.s +0 -534
  202. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-armcc.s +0 -521
  203. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-gcc.s +0 -527
  204. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-armcc.s +0 -517
  205. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-gcc.s +0 -523
  206. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-armcc.s +0 -550
  207. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-gcc.s +0 -556
  208. data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-SnP.h +0 -32
  209. data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-armv8a-neon.s +0 -432
  210. data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-SnP.h +0 -42
  211. data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-avr8-fast.s +0 -929
  212. data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-SnP.h +0 -40
  213. data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-compact.c +0 -244
  214. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-SnP.h +0 -46
  215. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32-bis.macros +0 -184
  216. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.c +0 -454
  217. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.macros +0 -459
  218. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling-bis.macros +0 -83
  219. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling.macros +0 -88
  220. data/ext/xkcp/lib/low/KeccakP-800/plain/lcu2/KeccakP-800-opt32-config.h +0 -7
  221. data/ext/xkcp/lib/low/KeccakP-800/plain/lcua/KeccakP-800-opt32-config.h +0 -7
  222. data/ext/xkcp/lib/low/KeccakP-800/plain/u2/KeccakP-800-opt32-config.h +0 -7
  223. data/ext/xkcp/lib/low/KeccakP-800/plain/ua/KeccakP-800-opt32-config.h +0 -7
  224. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-SnP.h +0 -44
  225. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.c +0 -437
  226. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.h +0 -23
  227. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/Ket.h +0 -57
  228. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-armcc.s +0 -475
  229. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-gcc.s +0 -480
  230. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-armcc.s +0 -590
  231. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-gcc.s +0 -590
  232. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.c +0 -126
  233. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.h +0 -68
  234. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.inc +0 -174
  235. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.c +0 -80
  236. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.h +0 -68
  237. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.inc +0 -142
  238. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-SnP.h +0 -55
  239. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-armcc.s +0 -1086
  240. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-gcc.s +0 -1092
  241. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-armcc.s +0 -721
  242. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-gcc.s +0 -726
  243. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-armcc.s +0 -723
  244. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-gcc.s +0 -729
  245. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-armcc.s +0 -1164
  246. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-gcc.s +0 -1165
  247. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-armcc.s +0 -562
  248. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-gcc.s +0 -563
  249. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-armcc.s +0 -563
  250. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-gcc.s +0 -565
  251. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-SnP.h +0 -55
  252. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-armcc.s +0 -476
  253. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-gcc.s +0 -485
  254. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-armcc.s +0 -362
  255. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-gcc.s +0 -367
  256. data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-SnP.h +0 -43
  257. data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-avr8-u1.s +0 -1341
  258. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SIMD512.c +0 -581
  259. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SnP.h +0 -58
  260. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodyak-full-block-SIMD512.c +0 -332
  261. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SIMD128.c +0 -329
  262. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SnP.h +0 -53
  263. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodyak-full-block-SIMD128.c +0 -355
  264. data/ext/xkcp/lib/low/Xoodoo/Xoodoo.h +0 -79
  265. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-SnP.h +0 -56
  266. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-optimized.c +0 -399
  267. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodyak-full-blocks.c +0 -127
  268. data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-SnP.h +0 -43
  269. data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-reference.c +0 -253
  270. data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SIMD512.c +0 -1044
  271. data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SnP.h +0 -49
  272. data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-SnP.h +0 -45
  273. data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-on1.c +0 -37
  274. data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-ARMv7A.s +0 -1587
  275. data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-SnP.h +0 -48
  276. data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SIMD512.c +0 -1202
  277. data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SnP.h +0 -48
  278. data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SIMD128.c +0 -484
  279. data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SnP.h +0 -44
  280. data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-SnP.h +0 -45
  281. data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-on1.c +0 -37
  282. data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SIMD256.c +0 -939
  283. data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SnP.h +0 -49
  284. data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SIMD512.c +0 -1216
  285. data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SnP.h +0 -48
  286. data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-SnP.h +0 -45
  287. data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-on1.c +0 -37
  288. data/ext/xkcp/lib/low/common/PlSnP-Fallback.inc +0 -290
  289. data/ext/xkcp/lib/low/common/SnP-Relaned.h +0 -141
  290. data/ext/xkcp/support/Kernel-PMU/Kernel-pmu.md +0 -133
  291. data/ext/xkcp/support/Kernel-PMU/Makefile +0 -8
  292. data/ext/xkcp/support/Kernel-PMU/enable_arm_pmu.c +0 -129
  293. data/ext/xkcp/support/Kernel-PMU/load-module +0 -1
  294. data/ext/xkcp/util/KeccakSum/KeccakSum.c +0 -394
  295. data/ext/xkcp/util/KeccakSum/base64.c +0 -86
  296. data/ext/xkcp/util/KeccakSum/base64.h +0 -12
@@ -1,850 +0,0 @@
1
- /*
2
- The Keccak-p permutations, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche.
3
-
4
- Implementation by Ronny Van Keer, hereby denoted as "the implementer".
5
-
6
- For more information, feedback or questions, please refer to the Keccak Team website:
7
- https://keccak.team/
8
-
9
- To the extent possible under law, the implementer has waived all copyright
10
- and related or neighboring rights to the source code in this file.
11
- http://creativecommons.org/publicdomain/zero/1.0/
12
-
13
- ---
14
-
15
- This file implements Keccak-p[1600]×2 in a PlSnP-compatible way.
16
- Please refer to PlSnP-documentation.h for more details.
17
-
18
- This implementation comes with KeccakP-1600-times2-SnP.h in the same folder.
19
- Please refer to LowLevel.build for the exact list of other files it must be combined with.
20
- */
21
-
22
- #include <stdio.h>
23
- #include <stdlib.h>
24
- #include <string.h>
25
- #include <stdint.h>
26
- #include <smmintrin.h>
27
- #include <wmmintrin.h>
28
- #include <immintrin.h>
29
- #include <emmintrin.h>
30
- #include "align.h"
31
- #include "KeccakP-1600-times2-SnP.h"
32
- #include "SIMD512-2-config.h"
33
-
34
- #include "brg_endian.h"
35
- #if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN)
36
- #error Expecting a little-endian platform
37
- #endif
38
-
39
- /* Comment the define hereunder when compiling for a CPU with AVX-512 SIMD */
40
- /*
41
- * Warning: This code has only been tested on Haswell (AVX2) with SIMULATE_AVX512 defined,
42
- * errors will occur if we did a bad interpretation of the AVX-512 intrinsics'
43
- * API or functionality.
44
- */
45
- /* #define SIMULATE_AVX512 */
46
-
47
- #if defined(SIMULATE_AVX512)
48
-
49
- typedef struct
50
- {
51
- uint64_t x[8];
52
- } __m512i;
53
-
54
- static __m512i _mm512_xor_si512( __m512i a, __m512i b)
55
- {
56
- __m512i r;
57
- unsigned int i;
58
-
59
- for ( i = 0; i < 8; ++i )
60
- r.x[i] = a.x[i] ^ b.x[i];
61
- return(r);
62
- }
63
-
64
- static __m128i _mm_ternarylogic_epi64(__m128i a, __m128i b, __m128i c, int imm)
65
- {
66
-
67
- if (imm == 0x96)
68
- return _mm_xor_si128( _mm_xor_si128( a, b ), c );
69
- if (imm == 0xD2)
70
- return _mm_xor_si128( a, _mm_andnot_si128(b, c) );
71
- printf( "_mm_ternarylogic_epi64( a, b, c, %02X) not implemented!\n", imm );
72
- exit(1);
73
- }
74
-
75
- static __m128i _mm_rol_epi64(__m128i a, int offset)
76
- {
77
- return _mm_or_si128(_mm_slli_epi64(a, offset), _mm_srli_epi64(a, 64-offset));
78
- }
79
-
80
- static __m512i _mm512_i32gather_epi64(__m256i idx, const void *p, int scale)
81
- {
82
- __m512i r;
83
- unsigned int i;
84
- uint32_t offset[8];
85
-
86
- _mm256_store_si256( (__m256i*)offset, idx );
87
- for ( i = 0; i < 8; ++i )
88
- r.x[i] = *(const uint64_t*)((const char*)p + offset[i] * scale);
89
- return(r);
90
- }
91
-
92
- static void _mm_i32scatter_epi64( void *p, __m128i idx, __m128i value, int scale)
93
- {
94
- unsigned int i;
95
- uint64_t v[2];
96
- uint32_t offset[4];
97
-
98
- _mm_store_ps( (float*)offset, (__m128)idx );
99
- _mm_store_pd( (double*)v, (__m128d)value );
100
- for ( i = 0; i < 2; ++i )
101
- *(uint64_t*)((char*)p + offset[i] * scale) = v[i];
102
- }
103
-
104
- static void _mm512_i32scatter_epi64( void *p, __m256i idx, __m512i value, int scale)
105
- {
106
- unsigned int i;
107
- uint32_t offset[8];
108
-
109
- _mm256_store_si256( (__m256i*)offset, idx );
110
- for ( i = 0; i < 8; ++i )
111
- *(uint64_t*)((char*)p + offset[i] * scale) = value.x[i];
112
- }
113
-
114
- #endif
115
-
116
- typedef __m128i V128;
117
- typedef __m256i V256;
118
- typedef __m512i V512;
119
-
120
- #if defined(KeccakP1600times2_useAVX512)
121
-
122
- #define XOR(a,b) _mm_xor_si128(a,b)
123
- #define XOR3(a,b,c) _mm_ternarylogic_epi64(a,b,c,0x96)
124
- #define XOR5(a,b,c,d,e) XOR3(XOR3(a,b,c),d,e)
125
- #define XOR512(a,b) _mm512_xor_si512(a,b)
126
- #define ROL(a,offset) _mm_rol_epi64(a,offset)
127
- #define Chi(a,b,c) _mm_ternarylogic_epi64(a,b,c,0xD2)
128
-
129
- #define CONST128_64(a) _mm_set1_epi64x(a)
130
- #define LOAD4_32(a,b,c,d) _mm_set_epi32((uint64_t)(a), (uint32_t)(b), (uint32_t)(c), (uint32_t)(d))
131
- #define LOAD8_32(a,b,c,d,e,f,g,h) _mm256_set_epi32((uint64_t)(a), (uint32_t)(b), (uint32_t)(c), (uint32_t)(d), (uint32_t)(e), (uint32_t)(f), (uint32_t)(g), (uint32_t)(h))
132
- #define LOAD_GATHER2_64(idx,p) _mm_i32gather_epi64( (const void*)(p), idx, 8)
133
- #define LOAD_GATHER8_64(idx,p) _mm512_i32gather_epi64( idx, (const void*)(p), 8)
134
- #define STORE_SCATTER2_64(p,idx, v) _mm_i32scatter_epi64( (void*)(p), idx, v, 8)
135
- #define STORE_SCATTER8_64(p,idx, v) _mm512_i32scatter_epi64( (void*)(p), idx, v, 8)
136
-
137
- #endif
138
-
139
- #define laneIndex(instanceIndex, lanePosition) ((lanePosition)*2 + instanceIndex)
140
- #define SnP_laneLengthInBytes 8
141
-
142
- void KeccakP1600times2_InitializeAll(void *states)
143
- {
144
- memset(states, 0, KeccakP1600times2_statesSizeInBytes);
145
- }
146
-
147
- void KeccakP1600times2_AddBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length)
148
- {
149
- unsigned int sizeLeft = length;
150
- unsigned int lanePosition = offset/SnP_laneLengthInBytes;
151
- unsigned int offsetInLane = offset%SnP_laneLengthInBytes;
152
- const unsigned char *curData = data;
153
- uint64_t *statesAsLanes = states;
154
-
155
- if ((sizeLeft > 0) && (offsetInLane != 0)) {
156
- unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
157
- uint64_t lane = 0;
158
- if (bytesInLane > sizeLeft)
159
- bytesInLane = sizeLeft;
160
- memcpy((unsigned char*)&lane + offsetInLane, curData, bytesInLane);
161
- statesAsLanes[laneIndex(instanceIndex, lanePosition)] ^= lane;
162
- sizeLeft -= bytesInLane;
163
- lanePosition++;
164
- curData += bytesInLane;
165
- }
166
-
167
- while(sizeLeft >= SnP_laneLengthInBytes) {
168
- uint64_t lane = *((const uint64_t*)curData);
169
- statesAsLanes[laneIndex(instanceIndex, lanePosition)] ^= lane;
170
- sizeLeft -= SnP_laneLengthInBytes;
171
- lanePosition++;
172
- curData += SnP_laneLengthInBytes;
173
- }
174
-
175
- if (sizeLeft > 0) {
176
- uint64_t lane = 0;
177
- memcpy(&lane, curData, sizeLeft);
178
- statesAsLanes[laneIndex(instanceIndex, lanePosition)] ^= lane;
179
- }
180
- }
181
-
182
- void KeccakP1600times2_AddLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
183
- {
184
- V128 *stateAsLanes128 = states;
185
- V512 *stateAsLanes512 = states;
186
- const uint64_t *dataAsLanes = (const uint64_t *)data;
187
- unsigned int i;
188
- V128 index128 = LOAD4_32(0, 0, 1*laneOffset, 0*laneOffset);
189
- V256 index512 = LOAD8_32(1*laneOffset+3, 0*laneOffset+3, 1*laneOffset+2, 0*laneOffset+2, 1*laneOffset+1, 0*laneOffset+1, 1*laneOffset, 0*laneOffset);
190
-
191
- #define Add_In1( argIndex ) stateAsLanes128[argIndex] = XOR(stateAsLanes128[argIndex], LOAD_GATHER2_64(index128, dataAsLanes+argIndex))
192
- #define Add_In4( argIndex ) stateAsLanes512[argIndex/4] = XOR512(stateAsLanes512[argIndex/4], LOAD_GATHER8_64(index512, dataAsLanes+argIndex))
193
- if ( laneCount >= 16 ) {
194
- Add_In4( 0 );
195
- Add_In4( 4 );
196
- Add_In4( 8 );
197
- Add_In4( 12 );
198
- if ( laneCount >= 20 ) {
199
- Add_In4( 16 );
200
- for(i=20; i<laneCount; i++)
201
- Add_In1( i );
202
- }
203
- else {
204
- for(i=16; i<laneCount; i++)
205
- Add_In1( i );
206
- }
207
- }
208
- else {
209
- for(i=0; i<laneCount; i++)
210
- Add_In1( i );
211
- }
212
- #undef Add_In1
213
- #undef Add_In4
214
- }
215
-
216
- void KeccakP1600times2_OverwriteBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length)
217
- {
218
- unsigned int sizeLeft = length;
219
- unsigned int lanePosition = offset/SnP_laneLengthInBytes;
220
- unsigned int offsetInLane = offset%SnP_laneLengthInBytes;
221
- const unsigned char *curData = data;
222
- uint64_t *statesAsLanes = states;
223
-
224
- if ((sizeLeft > 0) && (offsetInLane != 0)) {
225
- unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
226
- if (bytesInLane > sizeLeft)
227
- bytesInLane = sizeLeft;
228
- memcpy( ((unsigned char *)&statesAsLanes[laneIndex(instanceIndex, lanePosition)]) + offsetInLane, curData, bytesInLane);
229
- sizeLeft -= bytesInLane;
230
- lanePosition++;
231
- curData += bytesInLane;
232
- }
233
-
234
- while(sizeLeft >= SnP_laneLengthInBytes) {
235
- uint64_t lane = *((const uint64_t*)curData);
236
- statesAsLanes[laneIndex(instanceIndex, lanePosition)] = lane;
237
- sizeLeft -= SnP_laneLengthInBytes;
238
- lanePosition++;
239
- curData += SnP_laneLengthInBytes;
240
- }
241
-
242
- if (sizeLeft > 0) {
243
- memcpy(&statesAsLanes[laneIndex(instanceIndex, lanePosition)], curData, sizeLeft);
244
- }
245
- }
246
-
247
- void KeccakP1600times2_OverwriteLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
248
- {
249
- V128 *stateAsLanes128 = states;
250
- V512 *stateAsLanes512 = states;
251
- const uint64_t *dataAsLanes = (const uint64_t *)data;
252
- unsigned int i;
253
- V128 index128 = LOAD4_32(0, 0, 1*laneOffset, 0*laneOffset);
254
- V256 index512 = LOAD8_32(1*laneOffset+3, 0*laneOffset+3, 1*laneOffset+2, 0*laneOffset+2, 1*laneOffset+1, 0*laneOffset+1, 1*laneOffset, 0*laneOffset);
255
-
256
- #define OverWr1( argIndex ) stateAsLanes128[argIndex] = LOAD_GATHER2_64(index128, dataAsLanes+argIndex)
257
- #define OverWr4( argIndex ) stateAsLanes512[argIndex/4] = LOAD_GATHER8_64(index512, dataAsLanes+argIndex)
258
- if ( laneCount >= 16 ) {
259
- OverWr4( 0 );
260
- OverWr4( 4 );
261
- OverWr4( 8 );
262
- OverWr4( 12 );
263
- if ( laneCount >= 20 ) {
264
- OverWr4( 16 );
265
- for(i=20; i<laneCount; i++)
266
- OverWr1( i );
267
- }
268
- else {
269
- for(i=16; i<laneCount; i++)
270
- OverWr1( i );
271
- }
272
- }
273
- else {
274
- for(i=0; i<laneCount; i++)
275
- OverWr1( i );
276
- }
277
- #undef OverWr1
278
- #undef OverWr4
279
- }
280
-
281
- void KeccakP1600times2_OverwriteWithZeroes(void *states, unsigned int instanceIndex, unsigned int byteCount)
282
- {
283
- unsigned int sizeLeft = byteCount;
284
- unsigned int lanePosition = 0;
285
- uint64_t *statesAsLanes = states;
286
-
287
- while(sizeLeft >= SnP_laneLengthInBytes) {
288
- statesAsLanes[laneIndex(instanceIndex, lanePosition)] = 0;
289
- sizeLeft -= SnP_laneLengthInBytes;
290
- lanePosition++;
291
- }
292
-
293
- if (sizeLeft > 0) {
294
- memset(&statesAsLanes[laneIndex(instanceIndex, lanePosition)], 0, sizeLeft);
295
- }
296
- }
297
-
298
- void KeccakP1600times2_ExtractBytes(const void *states, unsigned int instanceIndex, unsigned char *data, unsigned int offset, unsigned int length)
299
- {
300
- unsigned int sizeLeft = length;
301
- unsigned int lanePosition = offset/SnP_laneLengthInBytes;
302
- unsigned int offsetInLane = offset%SnP_laneLengthInBytes;
303
- unsigned char *curData = data;
304
- const uint64_t *statesAsLanes = states;
305
-
306
- if ((sizeLeft > 0) && (offsetInLane != 0)) {
307
- unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
308
- if (bytesInLane > sizeLeft)
309
- bytesInLane = sizeLeft;
310
- memcpy( curData, ((unsigned char *)&statesAsLanes[laneIndex(instanceIndex, lanePosition)]) + offsetInLane, bytesInLane);
311
- sizeLeft -= bytesInLane;
312
- lanePosition++;
313
- curData += bytesInLane;
314
- }
315
-
316
- while(sizeLeft >= SnP_laneLengthInBytes) {
317
- *(uint64_t*)curData = statesAsLanes[laneIndex(instanceIndex, lanePosition)];
318
- sizeLeft -= SnP_laneLengthInBytes;
319
- lanePosition++;
320
- curData += SnP_laneLengthInBytes;
321
- }
322
-
323
- if (sizeLeft > 0) {
324
- memcpy( curData, &statesAsLanes[laneIndex(instanceIndex, lanePosition)], sizeLeft);
325
- }
326
- }
327
-
328
- void KeccakP1600times2_ExtractLanesAll(const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
329
- {
330
- const V128 *stateAsLanes128 = states;
331
- const V512 *stateAsLanes512 = states;
332
- uint64_t *dataAsLanes = (uint64_t *)data;
333
- unsigned int i;
334
- V128 index128 = LOAD4_32(0, 0, 1*laneOffset, 0*laneOffset);
335
- V256 index512 = LOAD8_32(1*laneOffset+3, 0*laneOffset+3, 1*laneOffset+2, 0*laneOffset+2, 1*laneOffset+1, 0*laneOffset+1, 1*laneOffset, 0*laneOffset);
336
-
337
- #define Extr1( argIndex ) STORE_SCATTER2_64(dataAsLanes+argIndex, index128, stateAsLanes128[argIndex])
338
- #define Extr4( argIndex ) STORE_SCATTER8_64(dataAsLanes+argIndex, index512, stateAsLanes512[argIndex/4])
339
- if ( laneCount >= 16 ) {
340
- Extr4( 0 );
341
- Extr4( 4 );
342
- Extr4( 8 );
343
- Extr4( 12 );
344
- if ( laneCount >= 20 ) {
345
- Extr4( 16 );
346
- for(i=20; i<laneCount; i++)
347
- Extr1( i );
348
- }
349
- else {
350
- for(i=16; i<laneCount; i++)
351
- Extr1( i );
352
- }
353
- }
354
- else {
355
- for(i=0; i<laneCount; i++)
356
- Extr1( i );
357
- }
358
- #undef Extr1
359
- #undef Extr4
360
- }
361
-
362
- void KeccakP1600times2_ExtractAndAddBytes(const void *states, unsigned int instanceIndex, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
363
- {
364
- unsigned int sizeLeft = length;
365
- unsigned int lanePosition = offset/SnP_laneLengthInBytes;
366
- unsigned int offsetInLane = offset%SnP_laneLengthInBytes;
367
- const unsigned char *curInput = input;
368
- unsigned char *curOutput = output;
369
- const uint64_t *statesAsLanes = states;
370
-
371
- if ((sizeLeft > 0) && (offsetInLane != 0)) {
372
- unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
373
- uint64_t lane = statesAsLanes[laneIndex(instanceIndex, lanePosition)] >> (8 * offsetInLane);
374
- if (bytesInLane > sizeLeft)
375
- bytesInLane = sizeLeft;
376
- sizeLeft -= bytesInLane;
377
- do {
378
- *(curOutput++) = *(curInput++) ^ (unsigned char)lane;
379
- lane >>= 8;
380
- } while ( --bytesInLane != 0);
381
- lanePosition++;
382
- }
383
-
384
- while(sizeLeft >= SnP_laneLengthInBytes) {
385
- *((uint64_t*)curOutput) = *((uint64_t*)curInput) ^ statesAsLanes[laneIndex(instanceIndex, lanePosition)];
386
- sizeLeft -= SnP_laneLengthInBytes;
387
- lanePosition++;
388
- curInput += SnP_laneLengthInBytes;
389
- curOutput += SnP_laneLengthInBytes;
390
- }
391
-
392
- if (sizeLeft != 0) {
393
- uint64_t lane = statesAsLanes[laneIndex(instanceIndex, lanePosition)];
394
- do {
395
- *(curOutput++) = *(curInput++) ^ (unsigned char)lane;
396
- lane >>= 8;
397
- } while ( --sizeLeft != 0);
398
- }
399
- }
400
-
401
- void KeccakP1600times2_ExtractAndAddLanesAll(const void *states, const unsigned char *input, unsigned char *output, unsigned int laneCount, unsigned int laneOffset)
402
- {
403
- const V128 *stateAsLanes128 = states;
404
- const V512 *stateAsLanes512 = states;
405
- const uint64_t *inAsLanes = (const uint64_t *)input;
406
- uint64_t *outAsLanes = (uint64_t *)output;
407
- unsigned int i;
408
- V128 index128 = LOAD4_32(0, 0, 1*laneOffset, 0*laneOffset);
409
- V256 index512 = LOAD8_32(1*laneOffset+3, 0*laneOffset+3, 1*laneOffset+2, 0*laneOffset+2, 1*laneOffset+1, 0*laneOffset+1, 1*laneOffset, 0*laneOffset);
410
-
411
- #define ExtrAdd1( argIndex ) STORE_SCATTER2_64(outAsLanes+argIndex, index128, XOR(stateAsLanes128[argIndex], LOAD_GATHER2_64(index128, inAsLanes+argIndex)))
412
- #define ExtrAdd4( argIndex ) STORE_SCATTER8_64(outAsLanes+argIndex, index512, XOR512(stateAsLanes512[argIndex/4], LOAD_GATHER8_64(index512, inAsLanes+argIndex)))
413
- if ( laneCount >= 16 ) {
414
- ExtrAdd4( 0 );
415
- ExtrAdd4( 4 );
416
- ExtrAdd4( 8 );
417
- ExtrAdd4( 12 );
418
- if ( laneCount >= 20 ) {
419
- ExtrAdd4( 16 );
420
- for(i=20; i<laneCount; i++)
421
- ExtrAdd1( i );
422
- }
423
- else {
424
- for(i=16; i<laneCount; i++)
425
- ExtrAdd1( i );
426
- }
427
- }
428
- else {
429
- for(i=0; i<laneCount; i++)
430
- ExtrAdd1( i );
431
- }
432
- #undef ExtrAdd1
433
- #undef ExtrAdd4
434
-
435
- }
436
-
437
- static ALIGN(KeccakP1600times2_statesAlignment) const uint64_t KeccakP1600RoundConstants[24] = {
438
- 0x0000000000000001ULL,
439
- 0x0000000000008082ULL,
440
- 0x800000000000808aULL,
441
- 0x8000000080008000ULL,
442
- 0x000000000000808bULL,
443
- 0x0000000080000001ULL,
444
- 0x8000000080008081ULL,
445
- 0x8000000000008009ULL,
446
- 0x000000000000008aULL,
447
- 0x0000000000000088ULL,
448
- 0x0000000080008009ULL,
449
- 0x000000008000000aULL,
450
- 0x000000008000808bULL,
451
- 0x800000000000008bULL,
452
- 0x8000000000008089ULL,
453
- 0x8000000000008003ULL,
454
- 0x8000000000008002ULL,
455
- 0x8000000000000080ULL,
456
- 0x000000000000800aULL,
457
- 0x800000008000000aULL,
458
- 0x8000000080008081ULL,
459
- 0x8000000000008080ULL,
460
- 0x0000000080000001ULL,
461
- 0x8000000080008008ULL};
462
-
463
- #define KeccakP_DeclareVars \
464
- V128 _Ba, _Be, _Bi, _Bo, _Bu; \
465
- V128 _Da, _De, _Di, _Do, _Du; \
466
- V128 _ba, _be, _bi, _bo, _bu; \
467
- V128 _ga, _ge, _gi, _go, _gu; \
468
- V128 _ka, _ke, _ki, _ko, _ku; \
469
- V128 _ma, _me, _mi, _mo, _mu; \
470
- V128 _sa, _se, _si, _so, _su
471
-
472
- #define KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Bb1, _Bb2, _Bb3, _Bb4, _Bb5, _Rr1, _Rr2, _Rr3, _Rr4, _Rr5 ) \
473
- _Bb1 = XOR(_L1, _Da); \
474
- _Bb2 = XOR(_L2, _De); \
475
- _Bb3 = XOR(_L3, _Di); \
476
- _Bb4 = XOR(_L4, _Do); \
477
- _Bb5 = XOR(_L5, _Du); \
478
- if (_Rr1 != 0) _Bb1 = ROL(_Bb1, _Rr1); \
479
- _Bb2 = ROL(_Bb2, _Rr2); \
480
- _Bb3 = ROL(_Bb3, _Rr3); \
481
- _Bb4 = ROL(_Bb4, _Rr4); \
482
- _Bb5 = ROL(_Bb5, _Rr5); \
483
- _L1 = Chi( _Ba, _Be, _Bi); \
484
- _L2 = Chi( _Be, _Bi, _Bo); \
485
- _L3 = Chi( _Bi, _Bo, _Bu); \
486
- _L4 = Chi( _Bo, _Bu, _Ba); \
487
- _L5 = Chi( _Bu, _Ba, _Be);
488
-
489
- #define KeccakP_ThetaRhoPiChiIota0( _L1, _L2, _L3, _L4, _L5, _rc ) \
490
- _Ba = XOR5( _ba, _ga, _ka, _ma, _sa ); /* Theta effect */ \
491
- _Be = XOR5( _be, _ge, _ke, _me, _se ); \
492
- _Bi = XOR5( _bi, _gi, _ki, _mi, _si ); \
493
- _Bo = XOR5( _bo, _go, _ko, _mo, _so ); \
494
- _Bu = XOR5( _bu, _gu, _ku, _mu, _su ); \
495
- _Da = ROL( _Be, 1 ); \
496
- _De = ROL( _Bi, 1 ); \
497
- _Di = ROL( _Bo, 1 ); \
498
- _Do = ROL( _Bu, 1 ); \
499
- _Du = ROL( _Ba, 1 ); \
500
- _Da = XOR( _Da, _Bu ); \
501
- _De = XOR( _De, _Ba ); \
502
- _Di = XOR( _Di, _Be ); \
503
- _Do = XOR( _Do, _Bi ); \
504
- _Du = XOR( _Du, _Bo ); \
505
- KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Ba, _Be, _Bi, _Bo, _Bu, 0, 44, 43, 21, 14 ); \
506
- _L1 = XOR(_L1, _rc) /* Iota */
507
-
508
- #define KeccakP_ThetaRhoPiChi1( _L1, _L2, _L3, _L4, _L5 ) \
509
- KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Bi, _Bo, _Bu, _Ba, _Be, 3, 45, 61, 28, 20 )
510
-
511
- #define KeccakP_ThetaRhoPiChi2( _L1, _L2, _L3, _L4, _L5 ) \
512
- KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Bu, _Ba, _Be, _Bi, _Bo, 18, 1, 6, 25, 8 )
513
-
514
- #define KeccakP_ThetaRhoPiChi3( _L1, _L2, _L3, _L4, _L5 ) \
515
- KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Be, _Bi, _Bo, _Bu, _Ba, 36, 10, 15, 56, 27 )
516
-
517
- #define KeccakP_ThetaRhoPiChi4( _L1, _L2, _L3, _L4, _L5 ) \
518
- KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Bo, _Bu, _Ba, _Be, _Bi, 41, 2, 62, 55, 39 )
519
-
520
- #define KeccakP_4rounds( i ) \
521
- KeccakP_ThetaRhoPiChiIota0(_ba, _ge, _ki, _mo, _su, CONST128_64(KeccakP1600RoundConstants[i]) ); \
522
- KeccakP_ThetaRhoPiChi1( _ka, _me, _si, _bo, _gu ); \
523
- KeccakP_ThetaRhoPiChi2( _sa, _be, _gi, _ko, _mu ); \
524
- KeccakP_ThetaRhoPiChi3( _ga, _ke, _mi, _so, _bu ); \
525
- KeccakP_ThetaRhoPiChi4( _ma, _se, _bi, _go, _ku ); \
526
- \
527
- KeccakP_ThetaRhoPiChiIota0(_ba, _me, _gi, _so, _ku, CONST128_64(KeccakP1600RoundConstants[i+1]) ); \
528
- KeccakP_ThetaRhoPiChi1( _sa, _ke, _bi, _mo, _gu ); \
529
- KeccakP_ThetaRhoPiChi2( _ma, _ge, _si, _ko, _bu ); \
530
- KeccakP_ThetaRhoPiChi3( _ka, _be, _mi, _go, _su ); \
531
- KeccakP_ThetaRhoPiChi4( _ga, _se, _ki, _bo, _mu ); \
532
- \
533
- KeccakP_ThetaRhoPiChiIota0(_ba, _ke, _si, _go, _mu, CONST128_64(KeccakP1600RoundConstants[i+2]) ); \
534
- KeccakP_ThetaRhoPiChi1( _ma, _be, _ki, _so, _gu ); \
535
- KeccakP_ThetaRhoPiChi2( _ga, _me, _bi, _ko, _su ); \
536
- KeccakP_ThetaRhoPiChi3( _sa, _ge, _mi, _bo, _ku ); \
537
- KeccakP_ThetaRhoPiChi4( _ka, _se, _gi, _mo, _bu ); \
538
- \
539
- KeccakP_ThetaRhoPiChiIota0(_ba, _be, _bi, _bo, _bu, CONST128_64(KeccakP1600RoundConstants[i+3]) ); \
540
- KeccakP_ThetaRhoPiChi1( _ga, _ge, _gi, _go, _gu ); \
541
- KeccakP_ThetaRhoPiChi2( _ka, _ke, _ki, _ko, _ku ); \
542
- KeccakP_ThetaRhoPiChi3( _ma, _me, _mi, _mo, _mu ); \
543
- KeccakP_ThetaRhoPiChi4( _sa, _se, _si, _so, _su )
544
-
545
- #define KeccakP_2rounds( i ) \
546
- KeccakP_ThetaRhoPiChiIota0(_ba, _ke, _si, _go, _mu, CONST128_64(KeccakP1600RoundConstants[i]) ); \
547
- KeccakP_ThetaRhoPiChi1( _ma, _be, _ki, _so, _gu ); \
548
- KeccakP_ThetaRhoPiChi2( _ga, _me, _bi, _ko, _su ); \
549
- KeccakP_ThetaRhoPiChi3( _sa, _ge, _mi, _bo, _ku ); \
550
- KeccakP_ThetaRhoPiChi4( _ka, _se, _gi, _mo, _bu ); \
551
- \
552
- KeccakP_ThetaRhoPiChiIota0(_ba, _be, _bi, _bo, _bu, CONST128_64(KeccakP1600RoundConstants[i+1]) ); \
553
- KeccakP_ThetaRhoPiChi1( _ga, _ge, _gi, _go, _gu ); \
554
- KeccakP_ThetaRhoPiChi2( _ka, _ke, _ki, _ko, _ku ); \
555
- KeccakP_ThetaRhoPiChi3( _ma, _me, _mi, _mo, _mu ); \
556
- KeccakP_ThetaRhoPiChi4( _sa, _se, _si, _so, _su )
557
-
558
- #ifdef KeccakP1600times2_fullUnrolling
559
-
560
- #define rounds12 \
561
- KeccakP_4rounds( 12 ); \
562
- KeccakP_4rounds( 16 ); \
563
- KeccakP_4rounds( 20 )
564
-
565
- #define rounds24 \
566
- KeccakP_4rounds( 0 ); \
567
- KeccakP_4rounds( 4 ); \
568
- KeccakP_4rounds( 8 ); \
569
- KeccakP_4rounds( 12 ); \
570
- KeccakP_4rounds( 16 ); \
571
- KeccakP_4rounds( 20 )
572
-
573
- #elif (KeccakP1600times2_unrolling == 4)
574
-
575
- #define rounds12 \
576
- i = 12; \
577
- do { \
578
- KeccakP_4rounds( i ); \
579
- } while( (i += 4) < 24 )
580
-
581
- #define rounds24 \
582
- i = 0; \
583
- do { \
584
- KeccakP_4rounds( i ); \
585
- } while( (i += 4) < 24 )
586
-
587
- #elif (KeccakP1600times2_unrolling == 12)
588
-
589
- #define rounds12 \
590
- KeccakP_4rounds( 12 ); \
591
- KeccakP_4rounds( 16 ); \
592
- KeccakP_4rounds( 20 )
593
-
594
- #define rounds24 \
595
- i = 0; \
596
- do { \
597
- KeccakP_4rounds( i ); \
598
- KeccakP_4rounds( i+4 ); \
599
- KeccakP_4rounds( i+8 ); \
600
- } while( (i += 12) < 24 )
601
-
602
- #else
603
- #error "Unrolling is not correctly specified!"
604
- #endif
605
-
606
- #define copyFromState2rounds(pState) \
607
- _ba = pState[ 0]; \
608
- _be = pState[16]; /* me */ \
609
- _bi = pState[ 7]; /* gi */ \
610
- _bo = pState[23]; /* so */ \
611
- _bu = pState[14]; /* ku */ \
612
- _ga = pState[20]; /* sa */ \
613
- _ge = pState[11]; /* ke */ \
614
- _gi = pState[ 2]; /* bi */ \
615
- _go = pState[18]; /* mo */ \
616
- _gu = pState[ 9]; \
617
- _ka = pState[15]; /* ma */ \
618
- _ke = pState[ 6]; /* ge */ \
619
- _ki = pState[22]; /* si */ \
620
- _ko = pState[13]; \
621
- _ku = pState[ 4]; /* bu */ \
622
- _ma = pState[10]; /* ka */ \
623
- _me = pState[ 1]; /* be */ \
624
- _mi = pState[17]; \
625
- _mo = pState[ 8]; /* go */ \
626
- _mu = pState[24]; /* su */ \
627
- _sa = pState[ 5]; /* ga */ \
628
- _se = pState[21]; \
629
- _si = pState[12]; /* ki */ \
630
- _so = pState[ 3]; /* bo */ \
631
- _su = pState[19] /* mu */
632
-
633
- #define copyFromState(pState) \
634
- _ba = pState[ 0]; \
635
- _be = pState[ 1]; \
636
- _bi = pState[ 2]; \
637
- _bo = pState[ 3]; \
638
- _bu = pState[ 4]; \
639
- _ga = pState[ 5]; \
640
- _ge = pState[ 6]; \
641
- _gi = pState[ 7]; \
642
- _go = pState[ 8]; \
643
- _gu = pState[ 9]; \
644
- _ka = pState[10]; \
645
- _ke = pState[11]; \
646
- _ki = pState[12]; \
647
- _ko = pState[13]; \
648
- _ku = pState[14]; \
649
- _ma = pState[15]; \
650
- _me = pState[16]; \
651
- _mi = pState[17]; \
652
- _mo = pState[18]; \
653
- _mu = pState[19]; \
654
- _sa = pState[20]; \
655
- _se = pState[21]; \
656
- _si = pState[22]; \
657
- _so = pState[23]; \
658
- _su = pState[24]
659
-
660
- #define copyToState(pState) \
661
- pState[ 0] = _ba; \
662
- pState[ 1] = _be; \
663
- pState[ 2] = _bi; \
664
- pState[ 3] = _bo; \
665
- pState[ 4] = _bu; \
666
- pState[ 5] = _ga; \
667
- pState[ 6] = _ge; \
668
- pState[ 7] = _gi; \
669
- pState[ 8] = _go; \
670
- pState[ 9] = _gu; \
671
- pState[10] = _ka; \
672
- pState[11] = _ke; \
673
- pState[12] = _ki; \
674
- pState[13] = _ko; \
675
- pState[14] = _ku; \
676
- pState[15] = _ma; \
677
- pState[16] = _me; \
678
- pState[17] = _mi; \
679
- pState[18] = _mo; \
680
- pState[19] = _mu; \
681
- pState[20] = _sa; \
682
- pState[21] = _se; \
683
- pState[22] = _si; \
684
- pState[23] = _so; \
685
- pState[24] = _su
686
-
687
- void KeccakP1600times2_PermuteAll_24rounds(void *states)
688
- {
689
- V128 *statesAsLanes = states;
690
- KeccakP_DeclareVars;
691
- #ifndef KeccakP1600times2_fullUnrolling
692
- unsigned int i;
693
- #endif
694
-
695
- copyFromState(statesAsLanes);
696
- rounds24;
697
- copyToState(statesAsLanes);
698
- }
699
-
700
- void KeccakP1600times2_PermuteAll_12rounds(void *states)
701
- {
702
- V128 *statesAsLanes = states;
703
- KeccakP_DeclareVars;
704
- #if (KeccakP1600times2_unrolling < 12)
705
- unsigned int i;
706
- #endif
707
-
708
- copyFromState(statesAsLanes);
709
- rounds12;
710
- copyToState(statesAsLanes);
711
- }
712
-
713
- void KeccakP1600times2_PermuteAll_6rounds(void *states)
714
- {
715
- V128 *statesAsLanes = states;
716
- KeccakP_DeclareVars;
717
-
718
- copyFromState2rounds(statesAsLanes);
719
- KeccakP_2rounds( 18 );
720
- KeccakP_4rounds( 20 );
721
- copyToState(statesAsLanes);
722
- }
723
-
724
- void KeccakP1600times2_PermuteAll_4rounds(void *states)
725
- {
726
- V128 *statesAsLanes = states;
727
- KeccakP_DeclareVars;
728
-
729
- copyFromState(statesAsLanes);
730
- KeccakP_4rounds( 20 );
731
- copyToState(statesAsLanes);
732
- }
733
-
734
- size_t KeccakF1600times2_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen)
735
- {
736
- size_t dataMinimumSize = (laneOffsetParallel*1 + laneCount)*8;
737
-
738
- if (laneCount == 21) {
739
- #ifndef KeccakP1600times2_fullUnrolling
740
- unsigned int i;
741
- #endif
742
- const unsigned char *dataStart = data;
743
- V128 *statesAsLanes = states;
744
- const uint64_t *dataAsLanes = (const uint64_t *)data;
745
- KeccakP_DeclareVars;
746
- V128 index = LOAD4_32(0, 0, 1*laneOffsetParallel, 0*laneOffsetParallel);
747
-
748
- copyFromState(statesAsLanes);
749
- while(dataByteLen >= dataMinimumSize) {
750
- #define Add_In( argLane, argIndex ) argLane = XOR(argLane, LOAD_GATHER2_64(index, dataAsLanes+argIndex))
751
- Add_In( _ba, 0 );
752
- Add_In( _be, 1 );
753
- Add_In( _bi, 2 );
754
- Add_In( _bo, 3 );
755
- Add_In( _bu, 4 );
756
- Add_In( _ga, 5 );
757
- Add_In( _ge, 6 );
758
- Add_In( _gi, 7 );
759
- Add_In( _go, 8 );
760
- Add_In( _gu, 9 );
761
- Add_In( _ka, 10 );
762
- Add_In( _ke, 11 );
763
- Add_In( _ki, 12 );
764
- Add_In( _ko, 13 );
765
- Add_In( _ku, 14 );
766
- Add_In( _ma, 15 );
767
- Add_In( _me, 16 );
768
- Add_In( _mi, 17 );
769
- Add_In( _mo, 18 );
770
- Add_In( _mu, 19 );
771
- Add_In( _sa, 20 );
772
- #undef Add_In
773
- rounds24;
774
- dataAsLanes += laneOffsetSerial;
775
- dataByteLen -= laneOffsetSerial*8;
776
- }
777
- copyToState(statesAsLanes);
778
- return (const unsigned char *)dataAsLanes - dataStart;
779
- }
780
- else {
781
- const unsigned char *dataStart = data;
782
-
783
- while(dataByteLen >= dataMinimumSize) {
784
- KeccakP1600times2_AddLanesAll(states, data, laneCount, laneOffsetParallel);
785
- KeccakP1600times2_PermuteAll_24rounds(states);
786
- data += laneOffsetSerial*8;
787
- dataByteLen -= laneOffsetSerial*8;
788
- }
789
- return data - dataStart;
790
- }
791
- }
792
-
793
- size_t KeccakP1600times2_12rounds_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen)
794
- {
795
- size_t dataMinimumSize = (laneOffsetParallel*1 + laneCount)*8;
796
-
797
- if (laneCount == 21) {
798
- #if (KeccakP1600times2_unrolling < 12)
799
- unsigned int i;
800
- #endif
801
- const unsigned char *dataStart = data;
802
- V128 *statesAsLanes = states;
803
- const uint64_t *dataAsLanes = (const uint64_t *)data;
804
- KeccakP_DeclareVars;
805
- V128 index = LOAD4_32(0, 0, 1*laneOffsetParallel, 0*laneOffsetParallel);
806
-
807
- copyFromState(statesAsLanes);
808
- while(dataByteLen >= dataMinimumSize) {
809
- #define Add_In( argLane, argIndex ) argLane = XOR(argLane, LOAD_GATHER2_64(index, dataAsLanes+argIndex))
810
- Add_In( _ba, 0 );
811
- Add_In( _be, 1 );
812
- Add_In( _bi, 2 );
813
- Add_In( _bo, 3 );
814
- Add_In( _bu, 4 );
815
- Add_In( _ga, 5 );
816
- Add_In( _ge, 6 );
817
- Add_In( _gi, 7 );
818
- Add_In( _go, 8 );
819
- Add_In( _gu, 9 );
820
- Add_In( _ka, 10 );
821
- Add_In( _ke, 11 );
822
- Add_In( _ki, 12 );
823
- Add_In( _ko, 13 );
824
- Add_In( _ku, 14 );
825
- Add_In( _ma, 15 );
826
- Add_In( _me, 16 );
827
- Add_In( _mi, 17 );
828
- Add_In( _mo, 18 );
829
- Add_In( _mu, 19 );
830
- Add_In( _sa, 20 );
831
- #undef Add_In
832
- rounds12;
833
- dataAsLanes += laneOffsetSerial;
834
- dataByteLen -= laneOffsetSerial*8;
835
- }
836
- copyToState(statesAsLanes);
837
- return (const unsigned char *)dataAsLanes - dataStart;
838
- }
839
- else {
840
- const unsigned char *dataStart = data;
841
-
842
- while(dataByteLen >= dataMinimumSize) {
843
- KeccakP1600times2_AddLanesAll(states, data, laneCount, laneOffsetParallel);
844
- KeccakP1600times2_PermuteAll_12rounds(states);
845
- data += laneOffsetSerial*8;
846
- dataByteLen -= laneOffsetSerial*8;
847
- }
848
- return data - dataStart;
849
- }
850
- }