sleeping_kangaroo12 0.0.1 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (296) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE.md +27 -0
  3. data/README.md +48 -53
  4. data/ext/Rakefile +12 -37
  5. data/ext/binding/sleeping_kangaroo12.c +1 -16
  6. data/ext/{xkcp → k12}/Makefile +0 -0
  7. data/ext/k12/Makefile.build +118 -0
  8. data/ext/k12/README.markdown +86 -0
  9. data/ext/k12/lib/ARMv8Asha3/KeccakP-1600-ARMv8Asha3.S +623 -0
  10. data/ext/k12/lib/ARMv8Asha3/KeccakP-1600-SnP.h +65 -0
  11. data/ext/k12/lib/ARMv8Asha3/KeccakP-1600-opt64.c +227 -0
  12. data/ext/{xkcp/lib/low/KeccakP-1600/compact → k12/lib/Inplace32BI}/KeccakP-1600-SnP.h +4 -9
  13. data/ext/{xkcp/lib/low/KeccakP-1600/plain-32bits-inplace → k12/lib/Inplace32BI}/KeccakP-1600-inplace32BI.c +65 -160
  14. data/ext/k12/lib/KangarooTwelve.c +332 -0
  15. data/ext/{xkcp/lib/high/KangarooTwelve → k12/lib}/KangarooTwelve.h +53 -16
  16. data/ext/{xkcp/lib/low/KeccakP-1600/AVX2 → k12/lib/Optimized64}/KeccakP-1600-AVX2.s +122 -558
  17. data/ext/k12/lib/Optimized64/KeccakP-1600-AVX512-plainC.c +241 -0
  18. data/ext/k12/lib/Optimized64/KeccakP-1600-AVX512.s +551 -0
  19. data/ext/k12/lib/Optimized64/KeccakP-1600-SnP.h +74 -0
  20. data/ext/{xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-64.macros → k12/lib/Optimized64/KeccakP-1600-opt64.c} +447 -169
  21. data/ext/k12/lib/Optimized64/KeccakP-1600-runtimeDispatch.c +406 -0
  22. data/ext/k12/lib/Optimized64/KeccakP-1600-timesN-AVX2.c +419 -0
  23. data/ext/k12/lib/Optimized64/KeccakP-1600-timesN-AVX512.c +458 -0
  24. data/ext/k12/lib/Optimized64/KeccakP-1600-timesN-SSSE3.c +438 -0
  25. data/ext/{xkcp/lib/low/KeccakP-1600/plain-64bits → k12/lib/Plain64}/KeccakP-1600-SnP.h +14 -20
  26. data/ext/{xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.h → k12/lib/Plain64/KeccakP-1600-plain64.c} +9 -8
  27. data/ext/{xkcp/lib/common → k12/lib}/align.h +3 -2
  28. data/ext/{xkcp/lib/common → k12/lib}/brg_endian.h +0 -0
  29. data/ext/{xkcp → k12}/support/Build/ExpandProducts.xsl +0 -0
  30. data/ext/{xkcp → k12}/support/Build/ToGlobalMakefile.xsl +0 -0
  31. data/ext/{xkcp → k12}/support/Build/ToOneTarget.xsl +0 -0
  32. data/ext/{xkcp → k12}/support/Build/ToTargetConfigFile.xsl +0 -0
  33. data/ext/{xkcp → k12}/support/Build/ToTargetMakefile.xsl +10 -16
  34. data/ext/{xkcp → k12}/support/Build/ToVCXProj.xsl +0 -0
  35. data/lib/sleeping_kangaroo12/binding.rb +2 -1
  36. data/lib/sleeping_kangaroo12/build/loader.rb +1 -0
  37. data/lib/sleeping_kangaroo12/build/platform.rb +1 -0
  38. data/lib/sleeping_kangaroo12/digest.rb +38 -4
  39. data/lib/sleeping_kangaroo12/version.rb +1 -1
  40. metadata +48 -288
  41. data/ext/config/xkcp.build +0 -17
  42. data/ext/xkcp/LICENSE +0 -1
  43. data/ext/xkcp/Makefile.build +0 -200
  44. data/ext/xkcp/README.markdown +0 -296
  45. data/ext/xkcp/lib/HighLevel.build +0 -143
  46. data/ext/xkcp/lib/LowLevel.build +0 -757
  47. data/ext/xkcp/lib/high/KangarooTwelve/KangarooTwelve.c +0 -301
  48. data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.c +0 -81
  49. data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.h +0 -125
  50. data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.c +0 -48
  51. data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.h +0 -79
  52. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.c +0 -81
  53. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.h +0 -73
  54. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.inc +0 -195
  55. data/ext/xkcp/lib/high/Keccak/KeccakSponge.c +0 -111
  56. data/ext/xkcp/lib/high/Keccak/KeccakSponge.h +0 -76
  57. data/ext/xkcp/lib/high/Keccak/KeccakSponge.inc +0 -314
  58. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.c +0 -61
  59. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.h +0 -67
  60. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.inc +0 -128
  61. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.c +0 -93
  62. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.h +0 -599
  63. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.inc +0 -573
  64. data/ext/xkcp/lib/high/Ketje/Ketjev2.c +0 -87
  65. data/ext/xkcp/lib/high/Ketje/Ketjev2.h +0 -88
  66. data/ext/xkcp/lib/high/Ketje/Ketjev2.inc +0 -274
  67. data/ext/xkcp/lib/high/Keyak/Keyakv2.c +0 -132
  68. data/ext/xkcp/lib/high/Keyak/Keyakv2.h +0 -217
  69. data/ext/xkcp/lib/high/Keyak/Keyakv2.inc +0 -81
  70. data/ext/xkcp/lib/high/Keyak/Motorist.inc +0 -953
  71. data/ext/xkcp/lib/high/Kravatte/Kravatte.c +0 -533
  72. data/ext/xkcp/lib/high/Kravatte/Kravatte.h +0 -115
  73. data/ext/xkcp/lib/high/Kravatte/KravatteModes.c +0 -557
  74. data/ext/xkcp/lib/high/Kravatte/KravatteModes.h +0 -247
  75. data/ext/xkcp/lib/high/Xoodyak/Cyclist.h +0 -66
  76. data/ext/xkcp/lib/high/Xoodyak/Cyclist.inc +0 -336
  77. data/ext/xkcp/lib/high/Xoodyak/Xoodyak-parameters.h +0 -26
  78. data/ext/xkcp/lib/high/Xoodyak/Xoodyak.c +0 -55
  79. data/ext/xkcp/lib/high/Xoodyak/Xoodyak.h +0 -35
  80. data/ext/xkcp/lib/high/Xoofff/Xoofff.c +0 -634
  81. data/ext/xkcp/lib/high/Xoofff/Xoofff.h +0 -147
  82. data/ext/xkcp/lib/high/Xoofff/XoofffModes.c +0 -483
  83. data/ext/xkcp/lib/high/Xoofff/XoofffModes.h +0 -241
  84. data/ext/xkcp/lib/high/common/Phases.h +0 -25
  85. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-SnP.h +0 -41
  86. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-armcc.s +0 -1666
  87. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-gcc.s +0 -1655
  88. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-armcc.s +0 -1268
  89. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-gcc.s +0 -1264
  90. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-armcc.s +0 -1178
  91. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-gcc.s +0 -1175
  92. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-armcc.s +0 -1338
  93. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-gcc.s +0 -1336
  94. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-armcc.s +0 -1343
  95. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-gcc.s +0 -1339
  96. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-SnP.h +0 -42
  97. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-armcc.s +0 -823
  98. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-gcc.s +0 -831
  99. data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-SnP.h +0 -31
  100. data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-armv8a-neon.s +0 -540
  101. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-SnP.h +0 -42
  102. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-compact.s +0 -733
  103. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-fast.s +0 -1121
  104. data/ext/xkcp/lib/low/KeccakP-1600/AVX2/KeccakP-1600-SnP.h +0 -52
  105. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-AVX512.c +0 -623
  106. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-SnP.h +0 -47
  107. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u12/KeccakP-1600-AVX512-config.h +0 -6
  108. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u6/KeccakP-1600-AVX512-config.h +0 -6
  109. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/ua/KeccakP-1600-AVX512-config.h +0 -6
  110. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-AVX512.s +0 -1031
  111. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-SnP.h +0 -53
  112. data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-SnP.h +0 -44
  113. data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-XOP.c +0 -476
  114. data/ext/xkcp/lib/low/KeccakP-1600/XOP/u6/KeccakP-1600-XOP-config.h +0 -6
  115. data/ext/xkcp/lib/low/KeccakP-1600/XOP/ua/KeccakP-1600-XOP-config.h +0 -6
  116. data/ext/xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-unrolling.macros +0 -305
  117. data/ext/xkcp/lib/low/KeccakP-1600/compact/KeccakP-1600-compact64.c +0 -420
  118. data/ext/xkcp/lib/low/KeccakP-1600/plain-32bits-inplace/KeccakP-1600-SnP.h +0 -43
  119. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/KeccakP-1600-opt64.c +0 -565
  120. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcu6/KeccakP-1600-opt64-config.h +0 -7
  121. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua/KeccakP-1600-opt64-config.h +0 -7
  122. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua-shld/KeccakP-1600-opt64-config.h +0 -8
  123. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/u6/KeccakP-1600-opt64-config.h +0 -6
  124. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/ua/KeccakP-1600-opt64-config.h +0 -6
  125. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-SnP.h +0 -44
  126. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference.h +0 -23
  127. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference32BI.c +0 -625
  128. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-SnP.h +0 -44
  129. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.c +0 -440
  130. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-SnP.h +0 -42
  131. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas.s +0 -1196
  132. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas_Apple.s +0 -1124
  133. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-shld-gas.s +0 -1196
  134. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-armcc.s +0 -1392
  135. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-gcc.s +0 -1394
  136. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-times2-SnP.h +0 -42
  137. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u12/SIMD512-2-config.h +0 -7
  138. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u4/SIMD512-2-config.h +0 -7
  139. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512ufull/SIMD512-2-config.h +0 -7
  140. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SIMD512.c +0 -850
  141. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SnP.h +0 -51
  142. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SIMD128.c +0 -957
  143. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SnP.h +0 -49
  144. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-u2/SIMD128-config.h +0 -8
  145. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-ua/SIMD128-config.h +0 -8
  146. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-u2/SIMD128-config.h +0 -9
  147. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-ua/SIMD128-config.h +0 -9
  148. data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-SnP.h +0 -45
  149. data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-on1.c +0 -37
  150. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SIMD256.c +0 -1321
  151. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SnP.h +0 -55
  152. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u12/SIMD256-config.h +0 -7
  153. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u6/SIMD256-config.h +0 -7
  154. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/ua/SIMD256-config.h +0 -7
  155. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u12/SIMD512-4-config.h +0 -7
  156. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u4/SIMD512-4-config.h +0 -7
  157. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512ufull/SIMD512-4-config.h +0 -7
  158. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SIMD512.c +0 -881
  159. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SnP.h +0 -51
  160. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-SnP.h +0 -45
  161. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-on1.c +0 -37
  162. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-SnP.h +0 -45
  163. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-on2.c +0 -38
  164. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SIMD512.c +0 -1615
  165. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SnP.h +0 -57
  166. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u12/SIMD512-config.h +0 -7
  167. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u4/SIMD512-config.h +0 -7
  168. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/ua/SIMD512-config.h +0 -7
  169. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-SnP.h +0 -45
  170. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-on1.c +0 -37
  171. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-SnP.h +0 -45
  172. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-on2.c +0 -38
  173. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-SnP.h +0 -45
  174. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-on4.c +0 -38
  175. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-SnP.h +0 -41
  176. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-armcc.s +0 -442
  177. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-gcc.s +0 -446
  178. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-armcc.s +0 -419
  179. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-gcc.s +0 -427
  180. data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-SnP.h +0 -41
  181. data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-avr8-fast.s +0 -647
  182. data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-SnP.h +0 -39
  183. data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-compact.c +0 -190
  184. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-SnP.h +0 -43
  185. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.c +0 -412
  186. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.h +0 -23
  187. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-SnP.h +0 -41
  188. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-armcc.s +0 -454
  189. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-gcc.s +0 -458
  190. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-armcc.s +0 -455
  191. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-gcc.s +0 -458
  192. data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-SnP.h +0 -41
  193. data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-avr8-fast.s +0 -728
  194. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-SnP.h +0 -43
  195. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.c +0 -414
  196. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.h +0 -23
  197. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-SnP.h +0 -42
  198. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-armcc.s +0 -527
  199. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-gcc.s +0 -533
  200. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-armcc.s +0 -528
  201. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-gcc.s +0 -534
  202. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-armcc.s +0 -521
  203. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-gcc.s +0 -527
  204. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-armcc.s +0 -517
  205. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-gcc.s +0 -523
  206. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-armcc.s +0 -550
  207. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-gcc.s +0 -556
  208. data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-SnP.h +0 -32
  209. data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-armv8a-neon.s +0 -432
  210. data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-SnP.h +0 -42
  211. data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-avr8-fast.s +0 -929
  212. data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-SnP.h +0 -40
  213. data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-compact.c +0 -244
  214. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-SnP.h +0 -46
  215. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32-bis.macros +0 -184
  216. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.c +0 -454
  217. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.macros +0 -459
  218. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling-bis.macros +0 -83
  219. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling.macros +0 -88
  220. data/ext/xkcp/lib/low/KeccakP-800/plain/lcu2/KeccakP-800-opt32-config.h +0 -7
  221. data/ext/xkcp/lib/low/KeccakP-800/plain/lcua/KeccakP-800-opt32-config.h +0 -7
  222. data/ext/xkcp/lib/low/KeccakP-800/plain/u2/KeccakP-800-opt32-config.h +0 -7
  223. data/ext/xkcp/lib/low/KeccakP-800/plain/ua/KeccakP-800-opt32-config.h +0 -7
  224. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-SnP.h +0 -44
  225. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.c +0 -437
  226. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.h +0 -23
  227. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/Ket.h +0 -57
  228. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-armcc.s +0 -475
  229. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-gcc.s +0 -480
  230. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-armcc.s +0 -590
  231. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-gcc.s +0 -590
  232. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.c +0 -126
  233. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.h +0 -68
  234. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.inc +0 -174
  235. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.c +0 -80
  236. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.h +0 -68
  237. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.inc +0 -142
  238. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-SnP.h +0 -55
  239. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-armcc.s +0 -1086
  240. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-gcc.s +0 -1092
  241. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-armcc.s +0 -721
  242. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-gcc.s +0 -726
  243. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-armcc.s +0 -723
  244. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-gcc.s +0 -729
  245. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-armcc.s +0 -1164
  246. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-gcc.s +0 -1165
  247. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-armcc.s +0 -562
  248. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-gcc.s +0 -563
  249. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-armcc.s +0 -563
  250. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-gcc.s +0 -565
  251. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-SnP.h +0 -55
  252. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-armcc.s +0 -476
  253. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-gcc.s +0 -485
  254. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-armcc.s +0 -362
  255. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-gcc.s +0 -367
  256. data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-SnP.h +0 -43
  257. data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-avr8-u1.s +0 -1341
  258. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SIMD512.c +0 -581
  259. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SnP.h +0 -58
  260. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodyak-full-block-SIMD512.c +0 -332
  261. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SIMD128.c +0 -329
  262. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SnP.h +0 -53
  263. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodyak-full-block-SIMD128.c +0 -355
  264. data/ext/xkcp/lib/low/Xoodoo/Xoodoo.h +0 -79
  265. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-SnP.h +0 -56
  266. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-optimized.c +0 -399
  267. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodyak-full-blocks.c +0 -127
  268. data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-SnP.h +0 -43
  269. data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-reference.c +0 -253
  270. data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SIMD512.c +0 -1044
  271. data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SnP.h +0 -49
  272. data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-SnP.h +0 -45
  273. data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-on1.c +0 -37
  274. data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-ARMv7A.s +0 -1587
  275. data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-SnP.h +0 -48
  276. data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SIMD512.c +0 -1202
  277. data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SnP.h +0 -48
  278. data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SIMD128.c +0 -484
  279. data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SnP.h +0 -44
  280. data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-SnP.h +0 -45
  281. data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-on1.c +0 -37
  282. data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SIMD256.c +0 -939
  283. data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SnP.h +0 -49
  284. data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SIMD512.c +0 -1216
  285. data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SnP.h +0 -48
  286. data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-SnP.h +0 -45
  287. data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-on1.c +0 -37
  288. data/ext/xkcp/lib/low/common/PlSnP-Fallback.inc +0 -290
  289. data/ext/xkcp/lib/low/common/SnP-Relaned.h +0 -141
  290. data/ext/xkcp/support/Kernel-PMU/Kernel-pmu.md +0 -133
  291. data/ext/xkcp/support/Kernel-PMU/Makefile +0 -8
  292. data/ext/xkcp/support/Kernel-PMU/enable_arm_pmu.c +0 -129
  293. data/ext/xkcp/support/Kernel-PMU/load-module +0 -1
  294. data/ext/xkcp/util/KeccakSum/KeccakSum.c +0 -394
  295. data/ext/xkcp/util/KeccakSum/base64.c +0 -86
  296. data/ext/xkcp/util/KeccakSum/base64.h +0 -12
@@ -1,881 +0,0 @@
1
- /*
2
- The Keccak-p permutations, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche.
3
-
4
- Implementation by Ronny Van Keer, hereby denoted as "the implementer".
5
-
6
- For more information, feedback or questions, please refer to the Keccak Team website:
7
- https://keccak.team/
8
-
9
- To the extent possible under law, the implementer has waived all copyright
10
- and related or neighboring rights to the source code in this file.
11
- http://creativecommons.org/publicdomain/zero/1.0/
12
-
13
- ---
14
-
15
- This file implements Keccak-p[1600]×4 in a PlSnP-compatible way.
16
- Please refer to PlSnP-documentation.h for more details.
17
-
18
- This implementation comes with KeccakP-1600-times4-SnP.h in the same folder.
19
- Please refer to LowLevel.build for the exact list of other files it must be combined with.
20
- */
21
-
22
- #include <stdio.h>
23
- #include <stdlib.h>
24
- #include <string.h>
25
- #include <stdint.h>
26
- #include <smmintrin.h>
27
- #include <wmmintrin.h>
28
- #include <immintrin.h>
29
- #include <emmintrin.h>
30
- #include "align.h"
31
- #include "KeccakP-1600-times4-SnP.h"
32
- #include "SIMD512-4-config.h"
33
-
34
- #include "brg_endian.h"
35
- #if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN)
36
- #error Expecting a little-endian platform
37
- #endif
38
-
39
- /* Comment the define hereunder when compiling for a CPU with AVX-512 SIMD */
40
- /*
41
- * Warning: This code has only been tested on Haswell (AVX2) with SIMULATE_AVX512 defined,
42
- * errors will occur if we did a bad interpretation of the AVX-512 intrinsics'
43
- * API or functionality.
44
- */
45
- /* #define SIMULATE_AVX512 */
46
-
47
- #if defined(SIMULATE_AVX512)
48
-
49
- typedef struct
50
- {
51
- uint64_t x[8];
52
- } __m512i;
53
-
54
- static __m512i _mm512_xor_si512( __m512i a, __m512i b)
55
- {
56
- __m512i r;
57
- unsigned int i;
58
-
59
- for ( i = 0; i < 8; ++i )
60
- r.x[i] = a.x[i] ^ b.x[i];
61
- return(r);
62
- }
63
-
64
- static __m256i _mm256_ternarylogic_epi64(__m256i a, __m256i b, __m256i c, int imm)
65
- {
66
-
67
- if (imm == 0x96)
68
- return _mm256_xor_si256( _mm256_xor_si256( a, b ), c );
69
- if (imm == 0xD2)
70
- return _mm256_xor_si256( a, _mm256_andnot_si256(b, c) );
71
- printf( "_mm256_ternarylogic_epi64( a, b, c, %02X) not implemented!\n", imm );
72
- exit(1);
73
- }
74
-
75
- static __m256i _mm256_rol_epi64(__m256i a, int offset)
76
- {
77
- return _mm256_or_si256(_mm256_slli_epi64(a, offset), _mm256_srli_epi64(a, 64-offset));
78
- }
79
-
80
- static __m512i _mm512_i32gather_epi64(__m256i idx, const void *p, int scale)
81
- {
82
- __m512i r;
83
- unsigned int i;
84
- uint32_t offset[8];
85
-
86
- _mm256_store_si256( (__m256i*)offset, idx );
87
- for ( i = 0; i < 8; ++i )
88
- r.x[i] = *(const uint64_t*)((const char*)p + offset[i] * scale);
89
- return(r);
90
- }
91
-
92
- static void _mm256_i32scatter_epi64( void *p, __m128i idx, __m256i value, int scale)
93
- {
94
- unsigned int i;
95
- uint64_t v[4];
96
- uint32_t offset[4];
97
-
98
- _mm_store_ps( (float*)offset, (__m128)idx );
99
- _mm256_store_si256( (__m256i*)v, value );
100
- for ( i = 0; i < 4; ++i )
101
- *(uint64_t*)((char*)p + offset[i] * scale) = v[i];
102
- }
103
-
104
- static void _mm512_i32scatter_epi64( void *p, __m256i idx, __m512i value, int scale)
105
- {
106
- unsigned int i;
107
- uint32_t offset[8];
108
-
109
- _mm256_store_si256( (__m256i*)offset, idx );
110
- for ( i = 0; i < 8; ++i )
111
- *(uint64_t*)((char*)p + offset[i] * scale) = value.x[i];
112
- }
113
-
114
- #endif
115
-
116
- typedef __m128i V128;
117
- typedef __m256i V256;
118
- typedef __m512i V512;
119
-
120
- #if defined(KeccakP1600times4_useAVX512)
121
-
122
- #define XOR(a,b) _mm256_xor_si256(a,b)
123
- #define XOR3(a,b,c) _mm256_ternarylogic_epi64(a,b,c,0x96)
124
- #define XOR5(a,b,c,d,e) XOR3(XOR3(a,b,c),d,e)
125
- #define XOR512(a,b) _mm512_xor_si512(a,b)
126
- #define ROL(a,offset) _mm256_rol_epi64(a,offset)
127
- #define Chi(a,b,c) _mm256_ternarylogic_epi64(a,b,c,0xD2)
128
-
129
- #define CONST256_64(a) _mm256_set1_epi64x(a)
130
- #define LOAD4_32(a,b,c,d) _mm_set_epi32((uint64_t)(a), (uint32_t)(b), (uint32_t)(c), (uint32_t)(d))
131
- #define LOAD8_32(a,b,c,d,e,f,g,h) _mm256_set_epi32((uint64_t)(a), (uint32_t)(b), (uint32_t)(c), (uint32_t)(d), (uint32_t)(e), (uint32_t)(f), (uint32_t)(g), (uint32_t)(h))
132
- #define LOAD_GATHER4_64(idx,p) _mm256_i32gather_epi64( (const void*)(p), idx, 8)
133
- #define LOAD_GATHER8_64(idx,p) _mm512_i32gather_epi64( idx, (const void*)(p), 8)
134
- #define STORE_SCATTER4_64(p,idx, v) _mm256_i32scatter_epi64( (void*)(p), idx, v, 8)
135
- #define STORE_SCATTER8_64(p,idx, v) _mm512_i32scatter_epi64( (void*)(p), idx, v, 8)
136
-
137
- #endif
138
-
139
- #define laneIndex(instanceIndex, lanePosition) ((lanePosition)*4 + instanceIndex)
140
- #define SnP_laneLengthInBytes 8
141
-
142
- void KeccakP1600times4_InitializeAll(void *states)
143
- {
144
- memset(states, 0, KeccakP1600times4_statesSizeInBytes);
145
- }
146
-
147
- void KeccakP1600times4_AddBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length)
148
- {
149
- unsigned int sizeLeft = length;
150
- unsigned int lanePosition = offset/SnP_laneLengthInBytes;
151
- unsigned int offsetInLane = offset%SnP_laneLengthInBytes;
152
- const unsigned char *curData = data;
153
- uint64_t *statesAsLanes = states;
154
-
155
- if ((sizeLeft > 0) && (offsetInLane != 0)) {
156
- unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
157
- uint64_t lane = 0;
158
- if (bytesInLane > sizeLeft)
159
- bytesInLane = sizeLeft;
160
- memcpy((unsigned char*)&lane + offsetInLane, curData, bytesInLane);
161
- statesAsLanes[laneIndex(instanceIndex, lanePosition)] ^= lane;
162
- sizeLeft -= bytesInLane;
163
- lanePosition++;
164
- curData += bytesInLane;
165
- }
166
-
167
- while(sizeLeft >= SnP_laneLengthInBytes) {
168
- uint64_t lane = *((const uint64_t*)curData);
169
- statesAsLanes[laneIndex(instanceIndex, lanePosition)] ^= lane;
170
- sizeLeft -= SnP_laneLengthInBytes;
171
- lanePosition++;
172
- curData += SnP_laneLengthInBytes;
173
- }
174
-
175
- if (sizeLeft > 0) {
176
- uint64_t lane = 0;
177
- memcpy(&lane, curData, sizeLeft);
178
- statesAsLanes[laneIndex(instanceIndex, lanePosition)] ^= lane;
179
- }
180
- }
181
-
182
- void KeccakP1600times4_AddLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
183
- {
184
- V256 *stateAsLanes256 = states;
185
- V512 *stateAsLanes512 = states;
186
- const uint64_t *dataAsLanes = (const uint64_t *)data;
187
- unsigned int i;
188
- V256 index512;
189
- V128 index256;
190
-
191
- #define Add_In1( argIndex ) stateAsLanes256[argIndex] = XOR(stateAsLanes256[argIndex], LOAD_GATHER4_64(index256, dataAsLanes+argIndex))
192
- #define Add_In2( argIndex ) stateAsLanes512[argIndex/2] = XOR512(stateAsLanes512[argIndex/2], LOAD_GATHER8_64(index512, dataAsLanes+argIndex))
193
- index256 = LOAD4_32(3*laneOffset, 2*laneOffset, 1*laneOffset, 0*laneOffset);
194
- index512 = LOAD8_32(3*laneOffset+1, 2*laneOffset+1, 1*laneOffset+1, 0*laneOffset+1, 3*laneOffset, 2*laneOffset, 1*laneOffset, 0*laneOffset);
195
- if ( laneCount >= 16 ) {
196
- Add_In2( 0 );
197
- Add_In2( 2 );
198
- Add_In2( 4 );
199
- Add_In2( 6 );
200
- Add_In2( 8 );
201
- Add_In2( 10 );
202
- Add_In2( 12 );
203
- Add_In2( 14 );
204
- if ( laneCount >= 20 ) {
205
- Add_In2( 16 );
206
- Add_In2( 18 );
207
- for(i=20; i<laneCount; i++)
208
- Add_In1( i );
209
- }
210
- else {
211
- for(i=16; i<laneCount; i++)
212
- Add_In1( i );
213
- }
214
- }
215
- else {
216
- for(i=0; i<laneCount; i++)
217
- Add_In1( i );
218
- }
219
- #undef Add_In1
220
- #undef Add_In2
221
- }
222
-
223
- void KeccakP1600times4_OverwriteBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length)
224
- {
225
- unsigned int sizeLeft = length;
226
- unsigned int lanePosition = offset/SnP_laneLengthInBytes;
227
- unsigned int offsetInLane = offset%SnP_laneLengthInBytes;
228
- const unsigned char *curData = data;
229
- uint64_t *statesAsLanes = states;
230
-
231
- if ((sizeLeft > 0) && (offsetInLane != 0)) {
232
- unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
233
- if (bytesInLane > sizeLeft)
234
- bytesInLane = sizeLeft;
235
- memcpy( ((unsigned char *)&statesAsLanes[laneIndex(instanceIndex, lanePosition)]) + offsetInLane, curData, bytesInLane);
236
- sizeLeft -= bytesInLane;
237
- lanePosition++;
238
- curData += bytesInLane;
239
- }
240
-
241
- while(sizeLeft >= SnP_laneLengthInBytes) {
242
- uint64_t lane = *((const uint64_t*)curData);
243
- statesAsLanes[laneIndex(instanceIndex, lanePosition)] = lane;
244
- sizeLeft -= SnP_laneLengthInBytes;
245
- lanePosition++;
246
- curData += SnP_laneLengthInBytes;
247
- }
248
-
249
- if (sizeLeft > 0) {
250
- memcpy(&statesAsLanes[laneIndex(instanceIndex, lanePosition)], curData, sizeLeft);
251
- }
252
- }
253
-
254
- void KeccakP1600times4_OverwriteLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
255
- {
256
- V256 *stateAsLanes256 = states;
257
- V512 *stateAsLanes512 = states;
258
- const uint64_t *dataAsLanes = (const uint64_t *)data;
259
- unsigned int i;
260
- V256 index512;
261
- V128 index256;
262
-
263
- #define OverWr1( argIndex ) stateAsLanes256[argIndex] = LOAD_GATHER4_64(index256, dataAsLanes+argIndex)
264
- #define OverWr2( argIndex ) stateAsLanes512[argIndex/2] = LOAD_GATHER8_64(index512, dataAsLanes+argIndex)
265
- index256 = LOAD4_32(3*laneOffset, 2*laneOffset, 1*laneOffset, 0*laneOffset);
266
- index512 = LOAD8_32(3*laneOffset+1, 2*laneOffset+1, 1*laneOffset+1, 0*laneOffset+1, 3*laneOffset, 2*laneOffset, 1*laneOffset, 0*laneOffset);
267
- if ( laneCount >= 16 ) {
268
- OverWr2( 0 );
269
- OverWr2( 2 );
270
- OverWr2( 4 );
271
- OverWr2( 6 );
272
- OverWr2( 8 );
273
- OverWr2( 10 );
274
- OverWr2( 12 );
275
- OverWr2( 14 );
276
- if ( laneCount >= 20 ) {
277
- OverWr2( 16 );
278
- OverWr2( 18 );
279
- for(i=20; i<laneCount; i++)
280
- OverWr1( i );
281
- }
282
- else {
283
- for(i=16; i<laneCount; i++)
284
- OverWr1( i );
285
- }
286
- }
287
- else {
288
- for(i=0; i<laneCount; i++)
289
- OverWr1( i );
290
- }
291
- #undef OverWr1
292
- #undef OverWr2
293
- }
294
-
295
- void KeccakP1600times4_OverwriteWithZeroes(void *states, unsigned int instanceIndex, unsigned int byteCount)
296
- {
297
- unsigned int sizeLeft = byteCount;
298
- unsigned int lanePosition = 0;
299
- uint64_t *statesAsLanes = states;
300
-
301
- while(sizeLeft >= SnP_laneLengthInBytes) {
302
- statesAsLanes[laneIndex(instanceIndex, lanePosition)] = 0;
303
- sizeLeft -= SnP_laneLengthInBytes;
304
- lanePosition++;
305
- }
306
-
307
- if (sizeLeft > 0) {
308
- memset(&statesAsLanes[laneIndex(instanceIndex, lanePosition)], 0, sizeLeft);
309
- }
310
- }
311
-
312
- void KeccakP1600times4_ExtractBytes(const void *states, unsigned int instanceIndex, unsigned char *data, unsigned int offset, unsigned int length)
313
- {
314
- unsigned int sizeLeft = length;
315
- unsigned int lanePosition = offset/SnP_laneLengthInBytes;
316
- unsigned int offsetInLane = offset%SnP_laneLengthInBytes;
317
- unsigned char *curData = data;
318
- const uint64_t *statesAsLanes = states;
319
-
320
- if ((sizeLeft > 0) && (offsetInLane != 0)) {
321
- unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
322
- if (bytesInLane > sizeLeft)
323
- bytesInLane = sizeLeft;
324
- memcpy( curData, ((unsigned char *)&statesAsLanes[laneIndex(instanceIndex, lanePosition)]) + offsetInLane, bytesInLane);
325
- sizeLeft -= bytesInLane;
326
- lanePosition++;
327
- curData += bytesInLane;
328
- }
329
-
330
- while(sizeLeft >= SnP_laneLengthInBytes) {
331
- *(uint64_t*)curData = statesAsLanes[laneIndex(instanceIndex, lanePosition)];
332
- sizeLeft -= SnP_laneLengthInBytes;
333
- lanePosition++;
334
- curData += SnP_laneLengthInBytes;
335
- }
336
-
337
- if (sizeLeft > 0) {
338
- memcpy( curData, &statesAsLanes[laneIndex(instanceIndex, lanePosition)], sizeLeft);
339
- }
340
- }
341
-
342
- void KeccakP1600times4_ExtractLanesAll(const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
343
- {
344
- const V256 *stateAsLanes256 = states;
345
- const V512 *stateAsLanes512 = states;
346
- uint64_t *dataAsLanes = (uint64_t *)data;
347
- unsigned int i;
348
- V256 index512;
349
- V128 index256;
350
-
351
- #define Extr1( argIndex ) STORE_SCATTER4_64(dataAsLanes+argIndex, index256, stateAsLanes256[argIndex])
352
- #define Extr2( argIndex ) STORE_SCATTER8_64(dataAsLanes+argIndex, index512, stateAsLanes512[argIndex/2])
353
- index256 = LOAD4_32(3*laneOffset, 2*laneOffset, 1*laneOffset, 0*laneOffset);
354
- index512 = LOAD8_32(3*laneOffset+1, 2*laneOffset+1, 1*laneOffset+1, 0*laneOffset+1, 3*laneOffset, 2*laneOffset, 1*laneOffset, 0*laneOffset);
355
- if ( laneCount >= 16 ) {
356
- Extr2( 0 );
357
- Extr2( 2 );
358
- Extr2( 4 );
359
- Extr2( 6 );
360
- Extr2( 8 );
361
- Extr2( 10 );
362
- Extr2( 12 );
363
- Extr2( 14 );
364
- if ( laneCount >= 20 ) {
365
- Extr2( 16 );
366
- Extr2( 18 );
367
- for(i=20; i<laneCount; i++)
368
- Extr1( i );
369
- }
370
- else {
371
- for(i=16; i<laneCount; i++)
372
- Extr1( i );
373
- }
374
- }
375
- else {
376
- for(i=0; i<laneCount; i++)
377
- Extr1( i );
378
- }
379
- #undef Extr1
380
- #undef Extr2
381
- }
382
-
383
- void KeccakP1600times4_ExtractAndAddBytes(const void *states, unsigned int instanceIndex, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
384
- {
385
- unsigned int sizeLeft = length;
386
- unsigned int lanePosition = offset/SnP_laneLengthInBytes;
387
- unsigned int offsetInLane = offset%SnP_laneLengthInBytes;
388
- const unsigned char *curInput = input;
389
- unsigned char *curOutput = output;
390
- const uint64_t *statesAsLanes = states;
391
-
392
- if ((sizeLeft > 0) && (offsetInLane != 0)) {
393
- unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
394
- uint64_t lane = statesAsLanes[laneIndex(instanceIndex, lanePosition)] >> (8 * offsetInLane);
395
- if (bytesInLane > sizeLeft)
396
- bytesInLane = sizeLeft;
397
- sizeLeft -= bytesInLane;
398
- do {
399
- *(curOutput++) = *(curInput++) ^ (unsigned char)lane;
400
- lane >>= 8;
401
- } while ( --bytesInLane != 0);
402
- lanePosition++;
403
- }
404
-
405
- while(sizeLeft >= SnP_laneLengthInBytes) {
406
- *((uint64_t*)curOutput) = *((uint64_t*)curInput) ^ statesAsLanes[laneIndex(instanceIndex, lanePosition)];
407
- sizeLeft -= SnP_laneLengthInBytes;
408
- lanePosition++;
409
- curInput += SnP_laneLengthInBytes;
410
- curOutput += SnP_laneLengthInBytes;
411
- }
412
-
413
- if (sizeLeft != 0) {
414
- uint64_t lane = statesAsLanes[laneIndex(instanceIndex, lanePosition)];
415
- do {
416
- *(curOutput++) = *(curInput++) ^ (unsigned char)lane;
417
- lane >>= 8;
418
- } while ( --sizeLeft != 0);
419
- }
420
- }
421
-
422
- void KeccakP1600times4_ExtractAndAddLanesAll(const void *states, const unsigned char *input, unsigned char *output, unsigned int laneCount, unsigned int laneOffset)
423
- {
424
- const V256 *stateAsLanes256 = states;
425
- const V512 *stateAsLanes512 = states;
426
- const uint64_t *inAsLanes = (const uint64_t *)input;
427
- uint64_t *outAsLanes = (uint64_t *)output;
428
- unsigned int i;
429
- V256 index512;
430
- V128 index256;
431
-
432
- #define ExtrAdd1( argIndex ) STORE_SCATTER4_64(outAsLanes+argIndex, index256, XOR(stateAsLanes256[argIndex], LOAD_GATHER4_64(index256, inAsLanes+argIndex)))
433
- #define ExtrAdd2( argIndex ) STORE_SCATTER8_64(outAsLanes+argIndex, index512, XOR512(stateAsLanes512[argIndex/2], LOAD_GATHER8_64(index512, inAsLanes+argIndex)))
434
- index256 = LOAD4_32(3*laneOffset, 2*laneOffset, 1*laneOffset, 0*laneOffset);
435
- index512 = LOAD8_32(3*laneOffset+1, 2*laneOffset+1, 1*laneOffset+1, 0*laneOffset+1, 3*laneOffset, 2*laneOffset, 1*laneOffset, 0*laneOffset);
436
-
437
- if ( laneCount >= 16 ) {
438
- ExtrAdd2( 0 );
439
- ExtrAdd2( 2 );
440
- ExtrAdd2( 4 );
441
- ExtrAdd2( 6 );
442
- ExtrAdd2( 8 );
443
- ExtrAdd2( 10 );
444
- ExtrAdd2( 12 );
445
- ExtrAdd2( 14 );
446
- if ( laneCount >= 20 ) {
447
- ExtrAdd2( 16 );
448
- ExtrAdd2( 18 );
449
- for(i=20; i<laneCount; i++)
450
- ExtrAdd1( i );
451
- }
452
- else {
453
- for(i=16; i<laneCount; i++)
454
- ExtrAdd1( i );
455
- }
456
- }
457
- else {
458
- for(i=0; i<laneCount; i++)
459
- ExtrAdd1( i );
460
- }
461
- #undef ExtrAdd1
462
- #undef ExtrAdd2
463
-
464
- }
465
-
466
- static ALIGN(KeccakP1600times4_statesAlignment) const uint64_t KeccakP1600RoundConstants[24] = {
467
- 0x0000000000000001ULL,
468
- 0x0000000000008082ULL,
469
- 0x800000000000808aULL,
470
- 0x8000000080008000ULL,
471
- 0x000000000000808bULL,
472
- 0x0000000080000001ULL,
473
- 0x8000000080008081ULL,
474
- 0x8000000000008009ULL,
475
- 0x000000000000008aULL,
476
- 0x0000000000000088ULL,
477
- 0x0000000080008009ULL,
478
- 0x000000008000000aULL,
479
- 0x000000008000808bULL,
480
- 0x800000000000008bULL,
481
- 0x8000000000008089ULL,
482
- 0x8000000000008003ULL,
483
- 0x8000000000008002ULL,
484
- 0x8000000000000080ULL,
485
- 0x000000000000800aULL,
486
- 0x800000008000000aULL,
487
- 0x8000000080008081ULL,
488
- 0x8000000000008080ULL,
489
- 0x0000000080000001ULL,
490
- 0x8000000080008008ULL};
491
-
492
- #define KeccakP_DeclareVars \
493
- V256 _Ba, _Be, _Bi, _Bo, _Bu; \
494
- V256 _Da, _De, _Di, _Do, _Du; \
495
- V256 _ba, _be, _bi, _bo, _bu; \
496
- V256 _ga, _ge, _gi, _go, _gu; \
497
- V256 _ka, _ke, _ki, _ko, _ku; \
498
- V256 _ma, _me, _mi, _mo, _mu; \
499
- V256 _sa, _se, _si, _so, _su
500
-
501
- #define KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Bb1, _Bb2, _Bb3, _Bb4, _Bb5, _Rr1, _Rr2, _Rr3, _Rr4, _Rr5 ) \
502
- _Bb1 = XOR(_L1, _Da); \
503
- _Bb2 = XOR(_L2, _De); \
504
- _Bb3 = XOR(_L3, _Di); \
505
- _Bb4 = XOR(_L4, _Do); \
506
- _Bb5 = XOR(_L5, _Du); \
507
- if (_Rr1 != 0) _Bb1 = ROL(_Bb1, _Rr1); \
508
- _Bb2 = ROL(_Bb2, _Rr2); \
509
- _Bb3 = ROL(_Bb3, _Rr3); \
510
- _Bb4 = ROL(_Bb4, _Rr4); \
511
- _Bb5 = ROL(_Bb5, _Rr5); \
512
- _L1 = Chi( _Ba, _Be, _Bi); \
513
- _L2 = Chi( _Be, _Bi, _Bo); \
514
- _L3 = Chi( _Bi, _Bo, _Bu); \
515
- _L4 = Chi( _Bo, _Bu, _Ba); \
516
- _L5 = Chi( _Bu, _Ba, _Be);
517
-
518
- #define KeccakP_ThetaRhoPiChiIota0( _L1, _L2, _L3, _L4, _L5, _rc ) \
519
- _Ba = XOR5( _ba, _ga, _ka, _ma, _sa ); /* Theta effect */ \
520
- _Be = XOR5( _be, _ge, _ke, _me, _se ); \
521
- _Bi = XOR5( _bi, _gi, _ki, _mi, _si ); \
522
- _Bo = XOR5( _bo, _go, _ko, _mo, _so ); \
523
- _Bu = XOR5( _bu, _gu, _ku, _mu, _su ); \
524
- _Da = ROL( _Be, 1 ); \
525
- _De = ROL( _Bi, 1 ); \
526
- _Di = ROL( _Bo, 1 ); \
527
- _Do = ROL( _Bu, 1 ); \
528
- _Du = ROL( _Ba, 1 ); \
529
- _Da = XOR( _Da, _Bu ); \
530
- _De = XOR( _De, _Ba ); \
531
- _Di = XOR( _Di, _Be ); \
532
- _Do = XOR( _Do, _Bi ); \
533
- _Du = XOR( _Du, _Bo ); \
534
- KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Ba, _Be, _Bi, _Bo, _Bu, 0, 44, 43, 21, 14 ); \
535
- _L1 = XOR(_L1, _rc) /* Iota */
536
-
537
- #define KeccakP_ThetaRhoPiChi1( _L1, _L2, _L3, _L4, _L5 ) \
538
- KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Bi, _Bo, _Bu, _Ba, _Be, 3, 45, 61, 28, 20 )
539
-
540
- #define KeccakP_ThetaRhoPiChi2( _L1, _L2, _L3, _L4, _L5 ) \
541
- KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Bu, _Ba, _Be, _Bi, _Bo, 18, 1, 6, 25, 8 )
542
-
543
- #define KeccakP_ThetaRhoPiChi3( _L1, _L2, _L3, _L4, _L5 ) \
544
- KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Be, _Bi, _Bo, _Bu, _Ba, 36, 10, 15, 56, 27 )
545
-
546
- #define KeccakP_ThetaRhoPiChi4( _L1, _L2, _L3, _L4, _L5 ) \
547
- KeccakP_ThetaRhoPiChi( _L1, _L2, _L3, _L4, _L5, _Bo, _Bu, _Ba, _Be, _Bi, 41, 2, 62, 55, 39 )
548
-
549
- #define KeccakP_4rounds( i ) \
550
- KeccakP_ThetaRhoPiChiIota0(_ba, _ge, _ki, _mo, _su, CONST256_64(KeccakP1600RoundConstants[i]) ); \
551
- KeccakP_ThetaRhoPiChi1( _ka, _me, _si, _bo, _gu ); \
552
- KeccakP_ThetaRhoPiChi2( _sa, _be, _gi, _ko, _mu ); \
553
- KeccakP_ThetaRhoPiChi3( _ga, _ke, _mi, _so, _bu ); \
554
- KeccakP_ThetaRhoPiChi4( _ma, _se, _bi, _go, _ku ); \
555
- \
556
- KeccakP_ThetaRhoPiChiIota0(_ba, _me, _gi, _so, _ku, CONST256_64(KeccakP1600RoundConstants[i+1]) ); \
557
- KeccakP_ThetaRhoPiChi1( _sa, _ke, _bi, _mo, _gu ); \
558
- KeccakP_ThetaRhoPiChi2( _ma, _ge, _si, _ko, _bu ); \
559
- KeccakP_ThetaRhoPiChi3( _ka, _be, _mi, _go, _su ); \
560
- KeccakP_ThetaRhoPiChi4( _ga, _se, _ki, _bo, _mu ); \
561
- \
562
- KeccakP_ThetaRhoPiChiIota0(_ba, _ke, _si, _go, _mu, CONST256_64(KeccakP1600RoundConstants[i+2]) ); \
563
- KeccakP_ThetaRhoPiChi1( _ma, _be, _ki, _so, _gu ); \
564
- KeccakP_ThetaRhoPiChi2( _ga, _me, _bi, _ko, _su ); \
565
- KeccakP_ThetaRhoPiChi3( _sa, _ge, _mi, _bo, _ku ); \
566
- KeccakP_ThetaRhoPiChi4( _ka, _se, _gi, _mo, _bu ); \
567
- \
568
- KeccakP_ThetaRhoPiChiIota0(_ba, _be, _bi, _bo, _bu, CONST256_64(KeccakP1600RoundConstants[i+3]) ); \
569
- KeccakP_ThetaRhoPiChi1( _ga, _ge, _gi, _go, _gu ); \
570
- KeccakP_ThetaRhoPiChi2( _ka, _ke, _ki, _ko, _ku ); \
571
- KeccakP_ThetaRhoPiChi3( _ma, _me, _mi, _mo, _mu ); \
572
- KeccakP_ThetaRhoPiChi4( _sa, _se, _si, _so, _su )
573
-
574
- #define KeccakP_2rounds( i ) \
575
- KeccakP_ThetaRhoPiChiIota0(_ba, _ke, _si, _go, _mu, CONST256_64(KeccakP1600RoundConstants[i]) ); \
576
- KeccakP_ThetaRhoPiChi1( _ma, _be, _ki, _so, _gu ); \
577
- KeccakP_ThetaRhoPiChi2( _ga, _me, _bi, _ko, _su ); \
578
- KeccakP_ThetaRhoPiChi3( _sa, _ge, _mi, _bo, _ku ); \
579
- KeccakP_ThetaRhoPiChi4( _ka, _se, _gi, _mo, _bu ); \
580
- \
581
- KeccakP_ThetaRhoPiChiIota0(_ba, _be, _bi, _bo, _bu, CONST256_64(KeccakP1600RoundConstants[i+1]) ); \
582
- KeccakP_ThetaRhoPiChi1( _ga, _ge, _gi, _go, _gu ); \
583
- KeccakP_ThetaRhoPiChi2( _ka, _ke, _ki, _ko, _ku ); \
584
- KeccakP_ThetaRhoPiChi3( _ma, _me, _mi, _mo, _mu ); \
585
- KeccakP_ThetaRhoPiChi4( _sa, _se, _si, _so, _su )
586
-
587
- #ifdef KeccakP1600times4_fullUnrolling
588
-
589
- #define rounds12 \
590
- KeccakP_4rounds( 12 ); \
591
- KeccakP_4rounds( 16 ); \
592
- KeccakP_4rounds( 20 )
593
-
594
- #define rounds24 \
595
- KeccakP_4rounds( 0 ); \
596
- KeccakP_4rounds( 4 ); \
597
- KeccakP_4rounds( 8 ); \
598
- KeccakP_4rounds( 12 ); \
599
- KeccakP_4rounds( 16 ); \
600
- KeccakP_4rounds( 20 )
601
-
602
- #elif (KeccakP1600times4_unrolling == 4)
603
-
604
- #define rounds12 \
605
- i = 12; \
606
- do { \
607
- KeccakP_4rounds( i ); \
608
- } while( (i += 4) < 24 )
609
-
610
- #define rounds24 \
611
- i = 0; \
612
- do { \
613
- KeccakP_4rounds( i ); \
614
- } while( (i += 4) < 24 )
615
-
616
- #elif (KeccakP1600times4_unrolling == 12)
617
-
618
- #define rounds12 \
619
- KeccakP_4rounds( 12 ); \
620
- KeccakP_4rounds( 16 ); \
621
- KeccakP_4rounds( 20 )
622
-
623
- #define rounds24 \
624
- i = 0; \
625
- do { \
626
- KeccakP_4rounds( i ); \
627
- KeccakP_4rounds( i+4 ); \
628
- KeccakP_4rounds( i+8 ); \
629
- } while( (i += 12) < 24 )
630
-
631
- #else
632
- #error "Unrolling is not correctly specified!"
633
- #endif
634
-
635
- #define copyFromState2rounds(pState) \
636
- _ba = pState[ 0]; \
637
- _be = pState[16]; /* me */ \
638
- _bi = pState[ 7]; /* gi */ \
639
- _bo = pState[23]; /* so */ \
640
- _bu = pState[14]; /* ku */ \
641
- _ga = pState[20]; /* sa */ \
642
- _ge = pState[11]; /* ke */ \
643
- _gi = pState[ 2]; /* bi */ \
644
- _go = pState[18]; /* mo */ \
645
- _gu = pState[ 9]; \
646
- _ka = pState[15]; /* ma */ \
647
- _ke = pState[ 6]; /* ge */ \
648
- _ki = pState[22]; /* si */ \
649
- _ko = pState[13]; \
650
- _ku = pState[ 4]; /* bu */ \
651
- _ma = pState[10]; /* ka */ \
652
- _me = pState[ 1]; /* be */ \
653
- _mi = pState[17]; \
654
- _mo = pState[ 8]; /* go */ \
655
- _mu = pState[24]; /* su */ \
656
- _sa = pState[ 5]; /* ga */ \
657
- _se = pState[21]; \
658
- _si = pState[12]; /* ki */ \
659
- _so = pState[ 3]; /* bo */ \
660
- _su = pState[19] /* mu */
661
-
662
- #define copyFromState(pState) \
663
- _ba = pState[ 0]; \
664
- _be = pState[ 1]; \
665
- _bi = pState[ 2]; \
666
- _bo = pState[ 3]; \
667
- _bu = pState[ 4]; \
668
- _ga = pState[ 5]; \
669
- _ge = pState[ 6]; \
670
- _gi = pState[ 7]; \
671
- _go = pState[ 8]; \
672
- _gu = pState[ 9]; \
673
- _ka = pState[10]; \
674
- _ke = pState[11]; \
675
- _ki = pState[12]; \
676
- _ko = pState[13]; \
677
- _ku = pState[14]; \
678
- _ma = pState[15]; \
679
- _me = pState[16]; \
680
- _mi = pState[17]; \
681
- _mo = pState[18]; \
682
- _mu = pState[19]; \
683
- _sa = pState[20]; \
684
- _se = pState[21]; \
685
- _si = pState[22]; \
686
- _so = pState[23]; \
687
- _su = pState[24]
688
-
689
- #define copyToState(pState) \
690
- pState[ 0] = _ba; \
691
- pState[ 1] = _be; \
692
- pState[ 2] = _bi; \
693
- pState[ 3] = _bo; \
694
- pState[ 4] = _bu; \
695
- pState[ 5] = _ga; \
696
- pState[ 6] = _ge; \
697
- pState[ 7] = _gi; \
698
- pState[ 8] = _go; \
699
- pState[ 9] = _gu; \
700
- pState[10] = _ka; \
701
- pState[11] = _ke; \
702
- pState[12] = _ki; \
703
- pState[13] = _ko; \
704
- pState[14] = _ku; \
705
- pState[15] = _ma; \
706
- pState[16] = _me; \
707
- pState[17] = _mi; \
708
- pState[18] = _mo; \
709
- pState[19] = _mu; \
710
- pState[20] = _sa; \
711
- pState[21] = _se; \
712
- pState[22] = _si; \
713
- pState[23] = _so; \
714
- pState[24] = _su
715
-
716
- void KeccakP1600times4_PermuteAll_24rounds(void *states)
717
- {
718
- V256 *statesAsLanes = states;
719
- KeccakP_DeclareVars;
720
- #ifndef KeccakP1600times4_fullUnrolling
721
- unsigned int i;
722
- #endif
723
-
724
- copyFromState(statesAsLanes);
725
- rounds24;
726
- copyToState(statesAsLanes);
727
- }
728
-
729
- void KeccakP1600times4_PermuteAll_12rounds(void *states)
730
- {
731
- V256 *statesAsLanes = states;
732
- KeccakP_DeclareVars;
733
- #if (KeccakP1600times4_unrolling < 12)
734
- unsigned int i;
735
- #endif
736
-
737
- copyFromState(statesAsLanes);
738
- rounds12;
739
- copyToState(statesAsLanes);
740
- }
741
-
742
- void KeccakP1600times4_PermuteAll_6rounds(void *states)
743
- {
744
- V256 *statesAsLanes = states;
745
- KeccakP_DeclareVars;
746
-
747
- copyFromState2rounds(statesAsLanes);
748
- KeccakP_2rounds( 18 );
749
- KeccakP_4rounds( 20 );
750
- copyToState(statesAsLanes);
751
- }
752
-
753
- void KeccakP1600times4_PermuteAll_4rounds(void *states)
754
- {
755
- V256 *statesAsLanes = states;
756
- KeccakP_DeclareVars;
757
-
758
- copyFromState(statesAsLanes);
759
- KeccakP_4rounds( 20 );
760
- copyToState(statesAsLanes);
761
- }
762
-
763
- size_t KeccakF1600times4_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen)
764
- {
765
- size_t dataMinimumSize = (laneOffsetParallel*3 + laneCount)*8;
766
-
767
- if (laneCount == 21) {
768
- #ifndef KeccakP1600times4_fullUnrolling
769
- unsigned int i;
770
- #endif
771
- const unsigned char *dataStart = data;
772
- V256 *statesAsLanes = states;
773
- const uint64_t *dataAsLanes = (const uint64_t *)data;
774
- KeccakP_DeclareVars;
775
- V128 index;
776
-
777
- copyFromState(statesAsLanes);
778
- index = LOAD4_32(3*laneOffsetParallel, 2*laneOffsetParallel, 1*laneOffsetParallel, 0*laneOffsetParallel);
779
- while(dataByteLen >= dataMinimumSize) {
780
- #define Add_In( argLane, argIndex ) argLane = XOR(argLane, LOAD_GATHER4_64(index, dataAsLanes+argIndex))
781
- Add_In( _ba, 0 );
782
- Add_In( _be, 1 );
783
- Add_In( _bi, 2 );
784
- Add_In( _bo, 3 );
785
- Add_In( _bu, 4 );
786
- Add_In( _ga, 5 );
787
- Add_In( _ge, 6 );
788
- Add_In( _gi, 7 );
789
- Add_In( _go, 8 );
790
- Add_In( _gu, 9 );
791
- Add_In( _ka, 10 );
792
- Add_In( _ke, 11 );
793
- Add_In( _ki, 12 );
794
- Add_In( _ko, 13 );
795
- Add_In( _ku, 14 );
796
- Add_In( _ma, 15 );
797
- Add_In( _me, 16 );
798
- Add_In( _mi, 17 );
799
- Add_In( _mo, 18 );
800
- Add_In( _mu, 19 );
801
- Add_In( _sa, 20 );
802
- #undef Add_In
803
- rounds24;
804
- dataAsLanes += laneOffsetSerial;
805
- dataByteLen -= laneOffsetSerial*8;
806
- }
807
- copyToState(statesAsLanes);
808
- return (const unsigned char *)dataAsLanes - dataStart;
809
- }
810
- else {
811
- const unsigned char *dataStart = data;
812
-
813
- while(dataByteLen >= dataMinimumSize) {
814
- KeccakP1600times4_AddLanesAll(states, data, laneCount, laneOffsetParallel);
815
- KeccakP1600times4_PermuteAll_24rounds(states);
816
- data += laneOffsetSerial*8;
817
- dataByteLen -= laneOffsetSerial*8;
818
- }
819
- return data - dataStart;
820
- }
821
- }
822
-
823
- size_t KeccakP1600times4_12rounds_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen)
824
- {
825
- size_t dataMinimumSize = (laneOffsetParallel*3 + laneCount)*8;
826
-
827
- if (laneCount == 21) {
828
- #if (KeccakP1600times4_unrolling < 12)
829
- unsigned int i;
830
- #endif
831
- const unsigned char *dataStart = data;
832
- V256 *statesAsLanes = states;
833
- const uint64_t *dataAsLanes = (const uint64_t *)data;
834
- KeccakP_DeclareVars;
835
- V128 index;
836
-
837
- copyFromState(statesAsLanes);
838
- index = LOAD4_32(3*laneOffsetParallel, 2*laneOffsetParallel, 1*laneOffsetParallel, 0*laneOffsetParallel);
839
- while(dataByteLen >= dataMinimumSize) {
840
- #define Add_In( argLane, argIndex ) argLane = XOR(argLane, LOAD_GATHER4_64(index, dataAsLanes+argIndex))
841
- Add_In( _ba, 0 );
842
- Add_In( _be, 1 );
843
- Add_In( _bi, 2 );
844
- Add_In( _bo, 3 );
845
- Add_In( _bu, 4 );
846
- Add_In( _ga, 5 );
847
- Add_In( _ge, 6 );
848
- Add_In( _gi, 7 );
849
- Add_In( _go, 8 );
850
- Add_In( _gu, 9 );
851
- Add_In( _ka, 10 );
852
- Add_In( _ke, 11 );
853
- Add_In( _ki, 12 );
854
- Add_In( _ko, 13 );
855
- Add_In( _ku, 14 );
856
- Add_In( _ma, 15 );
857
- Add_In( _me, 16 );
858
- Add_In( _mi, 17 );
859
- Add_In( _mo, 18 );
860
- Add_In( _mu, 19 );
861
- Add_In( _sa, 20 );
862
- #undef Add_In
863
- rounds12;
864
- dataAsLanes += laneOffsetSerial;
865
- dataByteLen -= laneOffsetSerial*8;
866
- }
867
- copyToState(statesAsLanes);
868
- return (const unsigned char *)dataAsLanes - dataStart;
869
- }
870
- else {
871
- const unsigned char *dataStart = data;
872
-
873
- while(dataByteLen >= dataMinimumSize) {
874
- KeccakP1600times4_AddLanesAll(states, data, laneCount, laneOffsetParallel);
875
- KeccakP1600times4_PermuteAll_12rounds(states);
876
- data += laneOffsetSerial*8;
877
- dataByteLen -= laneOffsetSerial*8;
878
- }
879
- return data - dataStart;
880
- }
881
- }