sleeping_kangaroo12 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (291) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +34 -67
  3. data/ext/Rakefile +12 -37
  4. data/ext/binding/sleeping_kangaroo12.c +1 -16
  5. data/ext/{xkcp → k12}/Makefile +0 -0
  6. data/ext/k12/Makefile.build +118 -0
  7. data/ext/k12/README.markdown +86 -0
  8. data/ext/k12/lib/ARMv8Asha3/KeccakP-1600-ARMv8Asha3.S +623 -0
  9. data/ext/k12/lib/ARMv8Asha3/KeccakP-1600-SnP.h +65 -0
  10. data/ext/k12/lib/ARMv8Asha3/KeccakP-1600-opt64.c +227 -0
  11. data/ext/{xkcp/lib/low/KeccakP-1600/compact → k12/lib/Inplace32BI}/KeccakP-1600-SnP.h +4 -9
  12. data/ext/{xkcp/lib/low/KeccakP-1600/plain-32bits-inplace → k12/lib/Inplace32BI}/KeccakP-1600-inplace32BI.c +65 -160
  13. data/ext/k12/lib/KangarooTwelve.c +332 -0
  14. data/ext/{xkcp/lib/high/KangarooTwelve → k12/lib}/KangarooTwelve.h +53 -16
  15. data/ext/{xkcp/lib/low/KeccakP-1600/AVX2 → k12/lib/Optimized64}/KeccakP-1600-AVX2.s +122 -558
  16. data/ext/k12/lib/Optimized64/KeccakP-1600-AVX512-plainC.c +241 -0
  17. data/ext/k12/lib/Optimized64/KeccakP-1600-AVX512.s +551 -0
  18. data/ext/k12/lib/Optimized64/KeccakP-1600-SnP.h +74 -0
  19. data/ext/{xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-64.macros → k12/lib/Optimized64/KeccakP-1600-opt64.c} +447 -169
  20. data/ext/k12/lib/Optimized64/KeccakP-1600-runtimeDispatch.c +406 -0
  21. data/ext/k12/lib/Optimized64/KeccakP-1600-timesN-AVX2.c +419 -0
  22. data/ext/k12/lib/Optimized64/KeccakP-1600-timesN-AVX512.c +458 -0
  23. data/ext/k12/lib/Optimized64/KeccakP-1600-timesN-SSSE3.c +438 -0
  24. data/ext/{xkcp/lib/low/KeccakP-1600/plain-64bits → k12/lib/Plain64}/KeccakP-1600-SnP.h +14 -20
  25. data/ext/{xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.h → k12/lib/Plain64/KeccakP-1600-plain64.c} +9 -8
  26. data/ext/{xkcp/lib/common → k12/lib}/align.h +3 -2
  27. data/ext/{xkcp/lib/common → k12/lib}/brg_endian.h +0 -0
  28. data/ext/{xkcp → k12}/support/Build/ExpandProducts.xsl +0 -0
  29. data/ext/{xkcp → k12}/support/Build/ToGlobalMakefile.xsl +0 -0
  30. data/ext/{xkcp → k12}/support/Build/ToOneTarget.xsl +0 -0
  31. data/ext/{xkcp → k12}/support/Build/ToTargetConfigFile.xsl +0 -0
  32. data/ext/{xkcp → k12}/support/Build/ToTargetMakefile.xsl +10 -16
  33. data/ext/{xkcp → k12}/support/Build/ToVCXProj.xsl +0 -0
  34. data/lib/sleeping_kangaroo12/version.rb +1 -1
  35. metadata +33 -276
  36. data/ext/config/xkcp.build +0 -17
  37. data/ext/xkcp/LICENSE +0 -1
  38. data/ext/xkcp/Makefile.build +0 -200
  39. data/ext/xkcp/README.markdown +0 -296
  40. data/ext/xkcp/lib/HighLevel.build +0 -143
  41. data/ext/xkcp/lib/LowLevel.build +0 -757
  42. data/ext/xkcp/lib/high/KangarooTwelve/KangarooTwelve.c +0 -301
  43. data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.c +0 -81
  44. data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.h +0 -125
  45. data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.c +0 -48
  46. data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.h +0 -79
  47. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.c +0 -81
  48. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.h +0 -73
  49. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.inc +0 -195
  50. data/ext/xkcp/lib/high/Keccak/KeccakSponge.c +0 -111
  51. data/ext/xkcp/lib/high/Keccak/KeccakSponge.h +0 -76
  52. data/ext/xkcp/lib/high/Keccak/KeccakSponge.inc +0 -314
  53. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.c +0 -61
  54. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.h +0 -67
  55. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.inc +0 -128
  56. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.c +0 -93
  57. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.h +0 -599
  58. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.inc +0 -573
  59. data/ext/xkcp/lib/high/Ketje/Ketjev2.c +0 -87
  60. data/ext/xkcp/lib/high/Ketje/Ketjev2.h +0 -88
  61. data/ext/xkcp/lib/high/Ketje/Ketjev2.inc +0 -274
  62. data/ext/xkcp/lib/high/Keyak/Keyakv2.c +0 -132
  63. data/ext/xkcp/lib/high/Keyak/Keyakv2.h +0 -217
  64. data/ext/xkcp/lib/high/Keyak/Keyakv2.inc +0 -81
  65. data/ext/xkcp/lib/high/Keyak/Motorist.inc +0 -953
  66. data/ext/xkcp/lib/high/Kravatte/Kravatte.c +0 -533
  67. data/ext/xkcp/lib/high/Kravatte/Kravatte.h +0 -115
  68. data/ext/xkcp/lib/high/Kravatte/KravatteModes.c +0 -557
  69. data/ext/xkcp/lib/high/Kravatte/KravatteModes.h +0 -247
  70. data/ext/xkcp/lib/high/Xoodyak/Cyclist.h +0 -66
  71. data/ext/xkcp/lib/high/Xoodyak/Cyclist.inc +0 -336
  72. data/ext/xkcp/lib/high/Xoodyak/Xoodyak-parameters.h +0 -26
  73. data/ext/xkcp/lib/high/Xoodyak/Xoodyak.c +0 -55
  74. data/ext/xkcp/lib/high/Xoodyak/Xoodyak.h +0 -35
  75. data/ext/xkcp/lib/high/Xoofff/Xoofff.c +0 -634
  76. data/ext/xkcp/lib/high/Xoofff/Xoofff.h +0 -147
  77. data/ext/xkcp/lib/high/Xoofff/XoofffModes.c +0 -483
  78. data/ext/xkcp/lib/high/Xoofff/XoofffModes.h +0 -241
  79. data/ext/xkcp/lib/high/common/Phases.h +0 -25
  80. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-SnP.h +0 -41
  81. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-armcc.s +0 -1666
  82. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-gcc.s +0 -1655
  83. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-armcc.s +0 -1268
  84. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-gcc.s +0 -1264
  85. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-armcc.s +0 -1178
  86. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-gcc.s +0 -1175
  87. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-armcc.s +0 -1338
  88. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-gcc.s +0 -1336
  89. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-armcc.s +0 -1343
  90. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-gcc.s +0 -1339
  91. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-SnP.h +0 -42
  92. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-armcc.s +0 -823
  93. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-gcc.s +0 -831
  94. data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-SnP.h +0 -31
  95. data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-armv8a-neon.s +0 -540
  96. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-SnP.h +0 -42
  97. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-compact.s +0 -733
  98. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-fast.s +0 -1121
  99. data/ext/xkcp/lib/low/KeccakP-1600/AVX2/KeccakP-1600-SnP.h +0 -52
  100. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-AVX512.c +0 -623
  101. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-SnP.h +0 -47
  102. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u12/KeccakP-1600-AVX512-config.h +0 -6
  103. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u6/KeccakP-1600-AVX512-config.h +0 -6
  104. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/ua/KeccakP-1600-AVX512-config.h +0 -6
  105. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-AVX512.s +0 -1031
  106. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-SnP.h +0 -53
  107. data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-SnP.h +0 -44
  108. data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-XOP.c +0 -476
  109. data/ext/xkcp/lib/low/KeccakP-1600/XOP/u6/KeccakP-1600-XOP-config.h +0 -6
  110. data/ext/xkcp/lib/low/KeccakP-1600/XOP/ua/KeccakP-1600-XOP-config.h +0 -6
  111. data/ext/xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-unrolling.macros +0 -305
  112. data/ext/xkcp/lib/low/KeccakP-1600/compact/KeccakP-1600-compact64.c +0 -420
  113. data/ext/xkcp/lib/low/KeccakP-1600/plain-32bits-inplace/KeccakP-1600-SnP.h +0 -43
  114. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/KeccakP-1600-opt64.c +0 -565
  115. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcu6/KeccakP-1600-opt64-config.h +0 -7
  116. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua/KeccakP-1600-opt64-config.h +0 -7
  117. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua-shld/KeccakP-1600-opt64-config.h +0 -8
  118. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/u6/KeccakP-1600-opt64-config.h +0 -6
  119. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/ua/KeccakP-1600-opt64-config.h +0 -6
  120. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-SnP.h +0 -44
  121. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference.h +0 -23
  122. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference32BI.c +0 -625
  123. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-SnP.h +0 -44
  124. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.c +0 -440
  125. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-SnP.h +0 -42
  126. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas.s +0 -1196
  127. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas_Apple.s +0 -1124
  128. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-shld-gas.s +0 -1196
  129. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-armcc.s +0 -1392
  130. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-gcc.s +0 -1394
  131. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-times2-SnP.h +0 -42
  132. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u12/SIMD512-2-config.h +0 -7
  133. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u4/SIMD512-2-config.h +0 -7
  134. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512ufull/SIMD512-2-config.h +0 -7
  135. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SIMD512.c +0 -850
  136. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SnP.h +0 -51
  137. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SIMD128.c +0 -957
  138. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SnP.h +0 -49
  139. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-u2/SIMD128-config.h +0 -8
  140. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-ua/SIMD128-config.h +0 -8
  141. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-u2/SIMD128-config.h +0 -9
  142. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-ua/SIMD128-config.h +0 -9
  143. data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-SnP.h +0 -45
  144. data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-on1.c +0 -37
  145. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SIMD256.c +0 -1321
  146. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SnP.h +0 -55
  147. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u12/SIMD256-config.h +0 -7
  148. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u6/SIMD256-config.h +0 -7
  149. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/ua/SIMD256-config.h +0 -7
  150. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u12/SIMD512-4-config.h +0 -7
  151. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u4/SIMD512-4-config.h +0 -7
  152. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512ufull/SIMD512-4-config.h +0 -7
  153. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SIMD512.c +0 -881
  154. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SnP.h +0 -51
  155. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-SnP.h +0 -45
  156. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-on1.c +0 -37
  157. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-SnP.h +0 -45
  158. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-on2.c +0 -38
  159. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SIMD512.c +0 -1615
  160. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SnP.h +0 -57
  161. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u12/SIMD512-config.h +0 -7
  162. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u4/SIMD512-config.h +0 -7
  163. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/ua/SIMD512-config.h +0 -7
  164. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-SnP.h +0 -45
  165. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-on1.c +0 -37
  166. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-SnP.h +0 -45
  167. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-on2.c +0 -38
  168. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-SnP.h +0 -45
  169. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-on4.c +0 -38
  170. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-SnP.h +0 -41
  171. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-armcc.s +0 -442
  172. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-gcc.s +0 -446
  173. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-armcc.s +0 -419
  174. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-gcc.s +0 -427
  175. data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-SnP.h +0 -41
  176. data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-avr8-fast.s +0 -647
  177. data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-SnP.h +0 -39
  178. data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-compact.c +0 -190
  179. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-SnP.h +0 -43
  180. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.c +0 -412
  181. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.h +0 -23
  182. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-SnP.h +0 -41
  183. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-armcc.s +0 -454
  184. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-gcc.s +0 -458
  185. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-armcc.s +0 -455
  186. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-gcc.s +0 -458
  187. data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-SnP.h +0 -41
  188. data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-avr8-fast.s +0 -728
  189. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-SnP.h +0 -43
  190. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.c +0 -414
  191. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.h +0 -23
  192. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-SnP.h +0 -42
  193. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-armcc.s +0 -527
  194. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-gcc.s +0 -533
  195. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-armcc.s +0 -528
  196. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-gcc.s +0 -534
  197. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-armcc.s +0 -521
  198. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-gcc.s +0 -527
  199. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-armcc.s +0 -517
  200. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-gcc.s +0 -523
  201. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-armcc.s +0 -550
  202. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-gcc.s +0 -556
  203. data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-SnP.h +0 -32
  204. data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-armv8a-neon.s +0 -432
  205. data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-SnP.h +0 -42
  206. data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-avr8-fast.s +0 -929
  207. data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-SnP.h +0 -40
  208. data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-compact.c +0 -244
  209. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-SnP.h +0 -46
  210. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32-bis.macros +0 -184
  211. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.c +0 -454
  212. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.macros +0 -459
  213. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling-bis.macros +0 -83
  214. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling.macros +0 -88
  215. data/ext/xkcp/lib/low/KeccakP-800/plain/lcu2/KeccakP-800-opt32-config.h +0 -7
  216. data/ext/xkcp/lib/low/KeccakP-800/plain/lcua/KeccakP-800-opt32-config.h +0 -7
  217. data/ext/xkcp/lib/low/KeccakP-800/plain/u2/KeccakP-800-opt32-config.h +0 -7
  218. data/ext/xkcp/lib/low/KeccakP-800/plain/ua/KeccakP-800-opt32-config.h +0 -7
  219. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-SnP.h +0 -44
  220. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.c +0 -437
  221. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.h +0 -23
  222. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/Ket.h +0 -57
  223. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-armcc.s +0 -475
  224. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-gcc.s +0 -480
  225. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-armcc.s +0 -590
  226. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-gcc.s +0 -590
  227. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.c +0 -126
  228. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.h +0 -68
  229. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.inc +0 -174
  230. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.c +0 -80
  231. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.h +0 -68
  232. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.inc +0 -142
  233. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-SnP.h +0 -55
  234. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-armcc.s +0 -1086
  235. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-gcc.s +0 -1092
  236. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-armcc.s +0 -721
  237. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-gcc.s +0 -726
  238. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-armcc.s +0 -723
  239. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-gcc.s +0 -729
  240. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-armcc.s +0 -1164
  241. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-gcc.s +0 -1165
  242. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-armcc.s +0 -562
  243. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-gcc.s +0 -563
  244. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-armcc.s +0 -563
  245. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-gcc.s +0 -565
  246. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-SnP.h +0 -55
  247. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-armcc.s +0 -476
  248. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-gcc.s +0 -485
  249. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-armcc.s +0 -362
  250. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-gcc.s +0 -367
  251. data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-SnP.h +0 -43
  252. data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-avr8-u1.s +0 -1341
  253. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SIMD512.c +0 -581
  254. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SnP.h +0 -58
  255. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodyak-full-block-SIMD512.c +0 -332
  256. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SIMD128.c +0 -329
  257. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SnP.h +0 -53
  258. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodyak-full-block-SIMD128.c +0 -355
  259. data/ext/xkcp/lib/low/Xoodoo/Xoodoo.h +0 -79
  260. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-SnP.h +0 -56
  261. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-optimized.c +0 -399
  262. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodyak-full-blocks.c +0 -127
  263. data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-SnP.h +0 -43
  264. data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-reference.c +0 -253
  265. data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SIMD512.c +0 -1044
  266. data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SnP.h +0 -49
  267. data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-SnP.h +0 -45
  268. data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-on1.c +0 -37
  269. data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-ARMv7A.s +0 -1587
  270. data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-SnP.h +0 -48
  271. data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SIMD512.c +0 -1202
  272. data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SnP.h +0 -48
  273. data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SIMD128.c +0 -484
  274. data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SnP.h +0 -44
  275. data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-SnP.h +0 -45
  276. data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-on1.c +0 -37
  277. data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SIMD256.c +0 -939
  278. data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SnP.h +0 -49
  279. data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SIMD512.c +0 -1216
  280. data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SnP.h +0 -48
  281. data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-SnP.h +0 -45
  282. data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-on1.c +0 -37
  283. data/ext/xkcp/lib/low/common/PlSnP-Fallback.inc +0 -290
  284. data/ext/xkcp/lib/low/common/SnP-Relaned.h +0 -141
  285. data/ext/xkcp/support/Kernel-PMU/Kernel-pmu.md +0 -133
  286. data/ext/xkcp/support/Kernel-PMU/Makefile +0 -8
  287. data/ext/xkcp/support/Kernel-PMU/enable_arm_pmu.c +0 -129
  288. data/ext/xkcp/support/Kernel-PMU/load-module +0 -1
  289. data/ext/xkcp/util/KeccakSum/KeccakSum.c +0 -394
  290. data/ext/xkcp/util/KeccakSum/base64.c +0 -86
  291. data/ext/xkcp/util/KeccakSum/base64.h +0 -12
@@ -1,957 +0,0 @@
1
- /*
2
- The Keccak-p permutations, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche.
3
-
4
- Implementation by Gilles Van Assche, hereby denoted as "the implementer".
5
-
6
- For more information, feedback or questions, please refer to the Keccak Team website:
7
- https://keccak.team/
8
-
9
- To the extent possible under law, the implementer has waived all copyright
10
- and related or neighboring rights to the source code in this file.
11
- http://creativecommons.org/publicdomain/zero/1.0/
12
-
13
- ---
14
-
15
- This file implements Keccak-p[1600]×2 in a PlSnP-compatible way.
16
- Please refer to PlSnP-documentation.h for more details.
17
-
18
- This implementation comes with KeccakP-1600-times2-SnP.h in the same folder.
19
- Please refer to LowLevel.build for the exact list of other files it must be combined with.
20
- */
21
-
22
- #include <stdint.h>
23
- #include <stdio.h>
24
- #include <stdlib.h>
25
- #include <string.h>
26
- #include <emmintrin.h>
27
- #include <pmmintrin.h>
28
- #include <tmmintrin.h>
29
- #include "SIMD128-config.h"
30
- #if defined(KeccakP1600times2_useXOP)
31
- #include <x86intrin.h>
32
- #endif
33
- #include "align.h"
34
- #include "KeccakP-1600-times2-SnP.h"
35
-
36
- #include "brg_endian.h"
37
- #if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN)
38
- #error Expecting a little-endian platform
39
- #endif
40
-
41
- typedef __m128i V128;
42
-
43
- #define laneIndex(instanceIndex, lanePosition) ((lanePosition)*2 + instanceIndex)
44
-
45
- #if defined(KeccakP1600times2_useSSE)
46
- #define ANDnu128(a, b) _mm_andnot_si128(a, b)
47
- #define CONST128(a) _mm_load_si128((const V128 *)&(a))
48
- #define LOAD128(a) _mm_load_si128((const V128 *)&(a))
49
- #define LOAD128u(a) _mm_loadu_si128((const V128 *)&(a))
50
- #define LOAD6464(a, b) _mm_set_epi64x(a, b)
51
- #define CONST128_64(a) _mm_set1_epi64x(a)
52
- #if defined(KeccakP1600times2_useXOP)
53
- #define ROL64in128(a, o) _mm_roti_epi64(a, o)
54
- #define ROL64in128_8(a) ROL64in128(a, 8)
55
- #define ROL64in128_56(a) ROL64in128(a, 56)
56
- #else
57
- #define ROL64in128(a, o) _mm_or_si128(_mm_slli_epi64(a, o), _mm_srli_epi64(a, 64-(o)))
58
- #define ROL64in128_8(a) _mm_shuffle_epi8(a, CONST128(rho8))
59
- #define ROL64in128_56(a) _mm_shuffle_epi8(a, CONST128(rho56))
60
- static const uint64_t rho8[2] = {0x0605040302010007, 0x0E0D0C0B0A09080F};
61
- static const uint64_t rho56[2] = {0x0007060504030201, 0x080F0E0D0C0B0A09};
62
- #endif
63
- #define STORE128(a, b) _mm_store_si128((V128 *)&(a), b)
64
- #define STORE128u(a, b) _mm_storeu_si128((V128 *)&(a), b)
65
- #define STORE64L(a, b) _mm_storel_epi64((__m128i *)&(a), b)
66
- #define STORE64H(a, b) _mm_storeh_pi((__m64 *)&(a), _mm_castsi128_ps(b))
67
- #define XOR128(a, b) _mm_xor_si128(a, b)
68
- #define XOReq128(a, b) a = _mm_xor_si128(a, b)
69
- #define ZERO128() _mm_setzero_si128()
70
- #if defined(KeccakP1600times2_useSSE2)
71
- #define UNPACKL( a, b ) _mm_unpacklo_epi64((a), (b))
72
- #define UNPACKH( a, b ) _mm_unpackhi_epi64((a), (b))
73
- #endif
74
- #endif
75
-
76
- #define SnP_laneLengthInBytes 8
77
-
78
- void KeccakP1600times2_InitializeAll(void *states)
79
- {
80
- memset(states, 0, KeccakP1600times2_statesSizeInBytes);
81
- }
82
-
83
- void KeccakP1600times2_AddBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length)
84
- {
85
- unsigned int sizeLeft = length;
86
- unsigned int lanePosition = offset/SnP_laneLengthInBytes;
87
- unsigned int offsetInLane = offset%SnP_laneLengthInBytes;
88
- const unsigned char *curData = data;
89
- uint64_t *statesAsLanes = (uint64_t *)states;
90
-
91
- if ((sizeLeft > 0) && (offsetInLane != 0)) {
92
- unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
93
- uint64_t lane = 0;
94
- if (bytesInLane > sizeLeft)
95
- bytesInLane = sizeLeft;
96
- memcpy((unsigned char*)&lane + offsetInLane, curData, bytesInLane);
97
- statesAsLanes[laneIndex(instanceIndex, lanePosition)] ^= lane;
98
- sizeLeft -= bytesInLane;
99
- lanePosition++;
100
- curData += bytesInLane;
101
- }
102
-
103
- while(sizeLeft >= SnP_laneLengthInBytes) {
104
- uint64_t lane = *((const uint64_t*)curData);
105
- statesAsLanes[laneIndex(instanceIndex, lanePosition)] ^= lane;
106
- sizeLeft -= SnP_laneLengthInBytes;
107
- lanePosition++;
108
- curData += SnP_laneLengthInBytes;
109
- }
110
-
111
- if (sizeLeft > 0) {
112
- uint64_t lane = 0;
113
- memcpy(&lane, curData, sizeLeft);
114
- statesAsLanes[laneIndex(instanceIndex, lanePosition)] ^= lane;
115
- }
116
- }
117
-
118
- void KeccakP1600times2_AddLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
119
- {
120
- V128 *stateAsLanes = (V128 *)states;
121
- unsigned int i;
122
- const uint64_t *curData0 = (const uint64_t *)data;
123
- const uint64_t *curData1 = (const uint64_t *)(data+laneOffset*SnP_laneLengthInBytes);
124
- #define XOR_In( argIndex ) XOReq128( stateAsLanes[argIndex], LOAD6464(curData1[argIndex], curData0[argIndex]))
125
- if ( laneCount >= 17 ) {
126
- XOR_In( 0 );
127
- XOR_In( 1 );
128
- XOR_In( 2 );
129
- XOR_In( 3 );
130
- XOR_In( 4 );
131
- XOR_In( 5 );
132
- XOR_In( 6 );
133
- XOR_In( 7 );
134
- XOR_In( 8 );
135
- XOR_In( 9 );
136
- XOR_In( 10 );
137
- XOR_In( 11 );
138
- XOR_In( 12 );
139
- XOR_In( 13 );
140
- XOR_In( 14 );
141
- XOR_In( 15 );
142
- XOR_In( 16 );
143
- if ( laneCount >= 21 ) {
144
- XOR_In( 17 );
145
- XOR_In( 18 );
146
- XOR_In( 19 );
147
- XOR_In( 20 );
148
- for(i=21; i<laneCount; i++)
149
- XOR_In( i );
150
- }
151
- else {
152
- for(i=17; i<laneCount; i++)
153
- XOR_In( i );
154
- }
155
- }
156
- else {
157
- for(i=0; i<laneCount; i++)
158
- XOR_In( i );
159
- }
160
- #undef XOR_In
161
- }
162
-
163
- void KeccakP1600times2_OverwriteBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length)
164
- {
165
- unsigned int sizeLeft = length;
166
- unsigned int lanePosition = offset/SnP_laneLengthInBytes;
167
- unsigned int offsetInLane = offset%SnP_laneLengthInBytes;
168
- const unsigned char *curData = data;
169
- uint64_t *statesAsLanes = (uint64_t *)states;
170
-
171
- if ((sizeLeft > 0) && (offsetInLane != 0)) {
172
- unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
173
- if (bytesInLane > sizeLeft)
174
- bytesInLane = sizeLeft;
175
- memcpy( ((unsigned char *)&statesAsLanes[laneIndex(instanceIndex, lanePosition)]) + offsetInLane, curData, bytesInLane);
176
- sizeLeft -= bytesInLane;
177
- lanePosition++;
178
- curData += bytesInLane;
179
- }
180
-
181
- while(sizeLeft >= SnP_laneLengthInBytes) {
182
- uint64_t lane = *((const uint64_t*)curData);
183
- statesAsLanes[laneIndex(instanceIndex, lanePosition)] = lane;
184
- sizeLeft -= SnP_laneLengthInBytes;
185
- lanePosition++;
186
- curData += SnP_laneLengthInBytes;
187
- }
188
-
189
- if (sizeLeft > 0) {
190
- memcpy(&statesAsLanes[laneIndex(instanceIndex, lanePosition)], curData, sizeLeft);
191
- }
192
- }
193
-
194
- void KeccakP1600times2_OverwriteLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
195
- {
196
- V128 *stateAsLanes = (V128 *)states;
197
- unsigned int i;
198
- const uint64_t *curData0 = (const uint64_t *)data;
199
- const uint64_t *curData1 = (const uint64_t *)(data+laneOffset*SnP_laneLengthInBytes);
200
- #define OverWr( argIndex ) STORE128(stateAsLanes[argIndex], LOAD6464(curData1[argIndex], curData0[argIndex]))
201
- if ( laneCount >= 17 ) {
202
- OverWr( 0 );
203
- OverWr( 1 );
204
- OverWr( 2 );
205
- OverWr( 3 );
206
- OverWr( 4 );
207
- OverWr( 5 );
208
- OverWr( 6 );
209
- OverWr( 7 );
210
- OverWr( 8 );
211
- OverWr( 9 );
212
- OverWr( 10 );
213
- OverWr( 11 );
214
- OverWr( 12 );
215
- OverWr( 13 );
216
- OverWr( 14 );
217
- OverWr( 15 );
218
- OverWr( 16 );
219
- if ( laneCount >= 21 ) {
220
- OverWr( 17 );
221
- OverWr( 18 );
222
- OverWr( 19 );
223
- OverWr( 20 );
224
- for(i=21; i<laneCount; i++)
225
- OverWr( i );
226
- }
227
- else {
228
- for(i=17; i<laneCount; i++)
229
- OverWr( i );
230
- }
231
- }
232
- else {
233
- for(i=0; i<laneCount; i++)
234
- OverWr( i );
235
- }
236
- #undef OverWr
237
- }
238
-
239
- void KeccakP1600times2_OverwriteWithZeroes(void *states, unsigned int instanceIndex, unsigned int byteCount)
240
- {
241
- unsigned int sizeLeft = byteCount;
242
- unsigned int lanePosition = 0;
243
- uint64_t *statesAsLanes = (uint64_t *)states;
244
-
245
- while(sizeLeft >= SnP_laneLengthInBytes) {
246
- statesAsLanes[laneIndex(instanceIndex, lanePosition)] = 0;
247
- sizeLeft -= SnP_laneLengthInBytes;
248
- lanePosition++;
249
- }
250
-
251
- if (sizeLeft > 0) {
252
- memset(&statesAsLanes[laneIndex(instanceIndex, lanePosition)], 0, sizeLeft);
253
- }
254
- }
255
-
256
- void KeccakP1600times2_ExtractBytes(const void *states, unsigned int instanceIndex, unsigned char *data, unsigned int offset, unsigned int length)
257
- {
258
- unsigned int sizeLeft = length;
259
- unsigned int lanePosition = offset/SnP_laneLengthInBytes;
260
- unsigned int offsetInLane = offset%SnP_laneLengthInBytes;
261
- unsigned char *curData = data;
262
- const uint64_t *statesAsLanes = (const uint64_t *)states;
263
-
264
- if ((sizeLeft > 0) && (offsetInLane != 0)) {
265
- unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
266
- if (bytesInLane > sizeLeft)
267
- bytesInLane = sizeLeft;
268
- memcpy( curData, ((unsigned char *)&statesAsLanes[laneIndex(instanceIndex, lanePosition)]) + offsetInLane, bytesInLane);
269
- sizeLeft -= bytesInLane;
270
- lanePosition++;
271
- curData += bytesInLane;
272
- }
273
-
274
- while(sizeLeft >= SnP_laneLengthInBytes) {
275
- *(uint64_t*)curData = statesAsLanes[laneIndex(instanceIndex, lanePosition)];
276
- sizeLeft -= SnP_laneLengthInBytes;
277
- lanePosition++;
278
- curData += SnP_laneLengthInBytes;
279
- }
280
-
281
- if (sizeLeft > 0) {
282
- memcpy( curData, &statesAsLanes[laneIndex(instanceIndex, lanePosition)], sizeLeft);
283
- }
284
- }
285
-
286
- void KeccakP1600times2_ExtractLanesAll(const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
287
- {
288
- const V128 *stateAsLanes = (const V128 *)states;
289
- V128 lanes;
290
- unsigned int i;
291
- uint64_t *curData0 = (uint64_t *)data;
292
- uint64_t *curData1 = (uint64_t *)(data+laneOffset*SnP_laneLengthInBytes);
293
-
294
- #define Extr( argIndex ) lanes = LOAD128( stateAsLanes[argIndex] ), \
295
- STORE64L( curData0[argIndex], lanes ), \
296
- STORE64H( curData1[argIndex], lanes )
297
-
298
- #if defined(KeccakP1600times2_useSSE2)
299
- #define Extr2( argIndex ) lanes0 = LOAD128( stateAsLanes[argIndex] ), \
300
- lanes1 = LOAD128( stateAsLanes[(argIndex)+1] ), \
301
- lanes = UNPACKL( lanes0, lanes1 ), \
302
- lanes0 = UNPACKH( lanes0, lanes1 ), \
303
- STORE128u( *(V128*)&curData0[argIndex], lanes ), \
304
- STORE128u( *(V128*)&curData1[argIndex], lanes0 )
305
- if ( laneCount >= 16 ) {
306
- V128 lanes0, lanes1;
307
- Extr2( 0 );
308
- Extr2( 2 );
309
- Extr2( 4 );
310
- Extr2( 6 );
311
- Extr2( 8 );
312
- Extr2( 10 );
313
- Extr2( 12 );
314
- Extr2( 14 );
315
- if ( laneCount >= 20 ) {
316
- Extr2( 16 );
317
- Extr2( 18 );
318
- for(i=20; i<laneCount; i++)
319
- Extr( i );
320
- }
321
- else {
322
- for(i=16; i<laneCount; i++)
323
- Extr( i );
324
- }
325
- }
326
- #undef Extr2
327
- #else
328
- if ( laneCount >= 17 ) {
329
- Extr( 0 );
330
- Extr( 1 );
331
- Extr( 2 );
332
- Extr( 3 );
333
- Extr( 4 );
334
- Extr( 5 );
335
- Extr( 6 );
336
- Extr( 7 );
337
- Extr( 8 );
338
- Extr( 9 );
339
- Extr( 10 );
340
- Extr( 11 );
341
- Extr( 12 );
342
- Extr( 13 );
343
- Extr( 14 );
344
- Extr( 15 );
345
- Extr( 16 );
346
- if ( laneCount >= 21 ) {
347
- Extr( 17 );
348
- Extr( 18 );
349
- Extr( 19 );
350
- Extr( 20 );
351
- for(i=21; i<laneCount; i++)
352
- Extr( i );
353
- }
354
- else {
355
- for(i=17; i<laneCount; i++)
356
- Extr( i );
357
- }
358
- }
359
- #endif
360
- else {
361
- for(i=0; i<laneCount; i++)
362
- Extr( i );
363
- }
364
- #undef Extr
365
- }
366
-
367
- void KeccakP1600times2_ExtractAndAddBytes(const void *states, unsigned int instanceIndex, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
368
- {
369
- unsigned int sizeLeft = length;
370
- unsigned int lanePosition = offset/SnP_laneLengthInBytes;
371
- unsigned int offsetInLane = offset%SnP_laneLengthInBytes;
372
- const unsigned char *curInput = input;
373
- unsigned char *curOutput = output;
374
- const uint64_t *statesAsLanes = (const uint64_t *)states;
375
-
376
- if ((sizeLeft > 0) && (offsetInLane != 0)) {
377
- unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane;
378
- uint64_t lane = statesAsLanes[laneIndex(instanceIndex, lanePosition)] >> (8 * offsetInLane);
379
- if (bytesInLane > sizeLeft)
380
- bytesInLane = sizeLeft;
381
- sizeLeft -= bytesInLane;
382
- do {
383
- *(curOutput++) = *(curInput++) ^ (unsigned char)lane;
384
- lane >>= 8;
385
- } while ( --bytesInLane != 0);
386
- lanePosition++;
387
- }
388
-
389
- while(sizeLeft >= SnP_laneLengthInBytes) {
390
- *((uint64_t*)curOutput) = *((uint64_t*)curInput) ^ statesAsLanes[laneIndex(instanceIndex, lanePosition)];
391
- sizeLeft -= SnP_laneLengthInBytes;
392
- lanePosition++;
393
- curInput += SnP_laneLengthInBytes;
394
- curOutput += SnP_laneLengthInBytes;
395
- }
396
-
397
- if (sizeLeft != 0) {
398
- uint64_t lane = statesAsLanes[laneIndex(instanceIndex, lanePosition)];
399
- do {
400
- *(curOutput++) = *(curInput++) ^ (unsigned char)lane;
401
- lane >>= 8;
402
- } while ( --sizeLeft != 0);
403
- }
404
- }
405
-
406
- void KeccakP1600times2_ExtractAndAddLanesAll(const void *states, const unsigned char *input, unsigned char *output, unsigned int laneCount, unsigned int laneOffset)
407
- {
408
- const uint64_t *stateAsLanes = (const uint64_t *)states;
409
- unsigned int i;
410
- const uint64_t *curInput0 = (uint64_t *)input;
411
- const uint64_t *curInput1 = (uint64_t *)(input+laneOffset*SnP_laneLengthInBytes);
412
- uint64_t *curOutput0 = (uint64_t *)output;
413
- uint64_t *curOutput1 = (uint64_t *)(output+laneOffset*SnP_laneLengthInBytes);
414
-
415
- #define ExtrXOR( argIndex ) curOutput0[argIndex] = curInput0[argIndex] ^ stateAsLanes[2*(argIndex)], curOutput1[argIndex] = curInput1[argIndex] ^ stateAsLanes[2*(argIndex)+1]
416
-
417
- if ( laneCount >= 17 ) {
418
- ExtrXOR( 0 );
419
- ExtrXOR( 1 );
420
- ExtrXOR( 2 );
421
- ExtrXOR( 3 );
422
- ExtrXOR( 4 );
423
- ExtrXOR( 5 );
424
- ExtrXOR( 6 );
425
- ExtrXOR( 7 );
426
- ExtrXOR( 8 );
427
- ExtrXOR( 9 );
428
- ExtrXOR( 10 );
429
- ExtrXOR( 11 );
430
- ExtrXOR( 12 );
431
- ExtrXOR( 13 );
432
- ExtrXOR( 14 );
433
- ExtrXOR( 15 );
434
- ExtrXOR( 16 );
435
- if ( laneCount >= 21 ) {
436
- ExtrXOR( 17 );
437
- ExtrXOR( 18 );
438
- ExtrXOR( 19 );
439
- ExtrXOR( 20 );
440
- for(i=21; i<laneCount; i++)
441
- ExtrXOR( i );
442
- }
443
- else {
444
- for(i=17; i<laneCount; i++)
445
- ExtrXOR( i );
446
- }
447
- }
448
- else {
449
- for(i=0; i<laneCount; i++)
450
- ExtrXOR( i );
451
- }
452
- #undef ExtrXOR
453
- }
454
-
455
- #define declareABCDE \
456
- V128 Aba, Abe, Abi, Abo, Abu; \
457
- V128 Aga, Age, Agi, Ago, Agu; \
458
- V128 Aka, Ake, Aki, Ako, Aku; \
459
- V128 Ama, Ame, Ami, Amo, Amu; \
460
- V128 Asa, Ase, Asi, Aso, Asu; \
461
- V128 Bba, Bbe, Bbi, Bbo, Bbu; \
462
- V128 Bga, Bge, Bgi, Bgo, Bgu; \
463
- V128 Bka, Bke, Bki, Bko, Bku; \
464
- V128 Bma, Bme, Bmi, Bmo, Bmu; \
465
- V128 Bsa, Bse, Bsi, Bso, Bsu; \
466
- V128 Ca, Ce, Ci, Co, Cu; \
467
- V128 Da, De, Di, Do, Du; \
468
- V128 Eba, Ebe, Ebi, Ebo, Ebu; \
469
- V128 Ega, Ege, Egi, Ego, Egu; \
470
- V128 Eka, Eke, Eki, Eko, Eku; \
471
- V128 Ema, Eme, Emi, Emo, Emu; \
472
- V128 Esa, Ese, Esi, Eso, Esu; \
473
-
474
- #define prepareTheta \
475
- Ca = XOR128(Aba, XOR128(Aga, XOR128(Aka, XOR128(Ama, Asa)))); \
476
- Ce = XOR128(Abe, XOR128(Age, XOR128(Ake, XOR128(Ame, Ase)))); \
477
- Ci = XOR128(Abi, XOR128(Agi, XOR128(Aki, XOR128(Ami, Asi)))); \
478
- Co = XOR128(Abo, XOR128(Ago, XOR128(Ako, XOR128(Amo, Aso)))); \
479
- Cu = XOR128(Abu, XOR128(Agu, XOR128(Aku, XOR128(Amu, Asu)))); \
480
-
481
- /* --- Theta Rho Pi Chi Iota Prepare-theta */
482
- /* --- 64-bit lanes mapped to 64-bit words */
483
- #define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
484
- Da = XOR128(Cu, ROL64in128(Ce, 1)); \
485
- De = XOR128(Ca, ROL64in128(Ci, 1)); \
486
- Di = XOR128(Ce, ROL64in128(Co, 1)); \
487
- Do = XOR128(Ci, ROL64in128(Cu, 1)); \
488
- Du = XOR128(Co, ROL64in128(Ca, 1)); \
489
- \
490
- XOReq128(A##ba, Da); \
491
- Bba = A##ba; \
492
- XOReq128(A##ge, De); \
493
- Bbe = ROL64in128(A##ge, 44); \
494
- XOReq128(A##ki, Di); \
495
- Bbi = ROL64in128(A##ki, 43); \
496
- E##ba = XOR128(Bba, ANDnu128(Bbe, Bbi)); \
497
- XOReq128(E##ba, CONST128_64(KeccakF1600RoundConstants[i])); \
498
- Ca = E##ba; \
499
- XOReq128(A##mo, Do); \
500
- Bbo = ROL64in128(A##mo, 21); \
501
- E##be = XOR128(Bbe, ANDnu128(Bbi, Bbo)); \
502
- Ce = E##be; \
503
- XOReq128(A##su, Du); \
504
- Bbu = ROL64in128(A##su, 14); \
505
- E##bi = XOR128(Bbi, ANDnu128(Bbo, Bbu)); \
506
- Ci = E##bi; \
507
- E##bo = XOR128(Bbo, ANDnu128(Bbu, Bba)); \
508
- Co = E##bo; \
509
- E##bu = XOR128(Bbu, ANDnu128(Bba, Bbe)); \
510
- Cu = E##bu; \
511
- \
512
- XOReq128(A##bo, Do); \
513
- Bga = ROL64in128(A##bo, 28); \
514
- XOReq128(A##gu, Du); \
515
- Bge = ROL64in128(A##gu, 20); \
516
- XOReq128(A##ka, Da); \
517
- Bgi = ROL64in128(A##ka, 3); \
518
- E##ga = XOR128(Bga, ANDnu128(Bge, Bgi)); \
519
- XOReq128(Ca, E##ga); \
520
- XOReq128(A##me, De); \
521
- Bgo = ROL64in128(A##me, 45); \
522
- E##ge = XOR128(Bge, ANDnu128(Bgi, Bgo)); \
523
- XOReq128(Ce, E##ge); \
524
- XOReq128(A##si, Di); \
525
- Bgu = ROL64in128(A##si, 61); \
526
- E##gi = XOR128(Bgi, ANDnu128(Bgo, Bgu)); \
527
- XOReq128(Ci, E##gi); \
528
- E##go = XOR128(Bgo, ANDnu128(Bgu, Bga)); \
529
- XOReq128(Co, E##go); \
530
- E##gu = XOR128(Bgu, ANDnu128(Bga, Bge)); \
531
- XOReq128(Cu, E##gu); \
532
- \
533
- XOReq128(A##be, De); \
534
- Bka = ROL64in128(A##be, 1); \
535
- XOReq128(A##gi, Di); \
536
- Bke = ROL64in128(A##gi, 6); \
537
- XOReq128(A##ko, Do); \
538
- Bki = ROL64in128(A##ko, 25); \
539
- E##ka = XOR128(Bka, ANDnu128(Bke, Bki)); \
540
- XOReq128(Ca, E##ka); \
541
- XOReq128(A##mu, Du); \
542
- Bko = ROL64in128_8(A##mu); \
543
- E##ke = XOR128(Bke, ANDnu128(Bki, Bko)); \
544
- XOReq128(Ce, E##ke); \
545
- XOReq128(A##sa, Da); \
546
- Bku = ROL64in128(A##sa, 18); \
547
- E##ki = XOR128(Bki, ANDnu128(Bko, Bku)); \
548
- XOReq128(Ci, E##ki); \
549
- E##ko = XOR128(Bko, ANDnu128(Bku, Bka)); \
550
- XOReq128(Co, E##ko); \
551
- E##ku = XOR128(Bku, ANDnu128(Bka, Bke)); \
552
- XOReq128(Cu, E##ku); \
553
- \
554
- XOReq128(A##bu, Du); \
555
- Bma = ROL64in128(A##bu, 27); \
556
- XOReq128(A##ga, Da); \
557
- Bme = ROL64in128(A##ga, 36); \
558
- XOReq128(A##ke, De); \
559
- Bmi = ROL64in128(A##ke, 10); \
560
- E##ma = XOR128(Bma, ANDnu128(Bme, Bmi)); \
561
- XOReq128(Ca, E##ma); \
562
- XOReq128(A##mi, Di); \
563
- Bmo = ROL64in128(A##mi, 15); \
564
- E##me = XOR128(Bme, ANDnu128(Bmi, Bmo)); \
565
- XOReq128(Ce, E##me); \
566
- XOReq128(A##so, Do); \
567
- Bmu = ROL64in128_56(A##so); \
568
- E##mi = XOR128(Bmi, ANDnu128(Bmo, Bmu)); \
569
- XOReq128(Ci, E##mi); \
570
- E##mo = XOR128(Bmo, ANDnu128(Bmu, Bma)); \
571
- XOReq128(Co, E##mo); \
572
- E##mu = XOR128(Bmu, ANDnu128(Bma, Bme)); \
573
- XOReq128(Cu, E##mu); \
574
- \
575
- XOReq128(A##bi, Di); \
576
- Bsa = ROL64in128(A##bi, 62); \
577
- XOReq128(A##go, Do); \
578
- Bse = ROL64in128(A##go, 55); \
579
- XOReq128(A##ku, Du); \
580
- Bsi = ROL64in128(A##ku, 39); \
581
- E##sa = XOR128(Bsa, ANDnu128(Bse, Bsi)); \
582
- XOReq128(Ca, E##sa); \
583
- XOReq128(A##ma, Da); \
584
- Bso = ROL64in128(A##ma, 41); \
585
- E##se = XOR128(Bse, ANDnu128(Bsi, Bso)); \
586
- XOReq128(Ce, E##se); \
587
- XOReq128(A##se, De); \
588
- Bsu = ROL64in128(A##se, 2); \
589
- E##si = XOR128(Bsi, ANDnu128(Bso, Bsu)); \
590
- XOReq128(Ci, E##si); \
591
- E##so = XOR128(Bso, ANDnu128(Bsu, Bsa)); \
592
- XOReq128(Co, E##so); \
593
- E##su = XOR128(Bsu, ANDnu128(Bsa, Bse)); \
594
- XOReq128(Cu, E##su); \
595
- \
596
-
597
- /* --- Theta Rho Pi Chi Iota */
598
- /* --- 64-bit lanes mapped to 64-bit words */
599
- #define thetaRhoPiChiIota(i, A, E) \
600
- Da = XOR128(Cu, ROL64in128(Ce, 1)); \
601
- De = XOR128(Ca, ROL64in128(Ci, 1)); \
602
- Di = XOR128(Ce, ROL64in128(Co, 1)); \
603
- Do = XOR128(Ci, ROL64in128(Cu, 1)); \
604
- Du = XOR128(Co, ROL64in128(Ca, 1)); \
605
- \
606
- XOReq128(A##ba, Da); \
607
- Bba = A##ba; \
608
- XOReq128(A##ge, De); \
609
- Bbe = ROL64in128(A##ge, 44); \
610
- XOReq128(A##ki, Di); \
611
- Bbi = ROL64in128(A##ki, 43); \
612
- E##ba = XOR128(Bba, ANDnu128(Bbe, Bbi)); \
613
- XOReq128(E##ba, CONST128_64(KeccakF1600RoundConstants[i])); \
614
- XOReq128(A##mo, Do); \
615
- Bbo = ROL64in128(A##mo, 21); \
616
- E##be = XOR128(Bbe, ANDnu128(Bbi, Bbo)); \
617
- XOReq128(A##su, Du); \
618
- Bbu = ROL64in128(A##su, 14); \
619
- E##bi = XOR128(Bbi, ANDnu128(Bbo, Bbu)); \
620
- E##bo = XOR128(Bbo, ANDnu128(Bbu, Bba)); \
621
- E##bu = XOR128(Bbu, ANDnu128(Bba, Bbe)); \
622
- \
623
- XOReq128(A##bo, Do); \
624
- Bga = ROL64in128(A##bo, 28); \
625
- XOReq128(A##gu, Du); \
626
- Bge = ROL64in128(A##gu, 20); \
627
- XOReq128(A##ka, Da); \
628
- Bgi = ROL64in128(A##ka, 3); \
629
- E##ga = XOR128(Bga, ANDnu128(Bge, Bgi)); \
630
- XOReq128(A##me, De); \
631
- Bgo = ROL64in128(A##me, 45); \
632
- E##ge = XOR128(Bge, ANDnu128(Bgi, Bgo)); \
633
- XOReq128(A##si, Di); \
634
- Bgu = ROL64in128(A##si, 61); \
635
- E##gi = XOR128(Bgi, ANDnu128(Bgo, Bgu)); \
636
- E##go = XOR128(Bgo, ANDnu128(Bgu, Bga)); \
637
- E##gu = XOR128(Bgu, ANDnu128(Bga, Bge)); \
638
- \
639
- XOReq128(A##be, De); \
640
- Bka = ROL64in128(A##be, 1); \
641
- XOReq128(A##gi, Di); \
642
- Bke = ROL64in128(A##gi, 6); \
643
- XOReq128(A##ko, Do); \
644
- Bki = ROL64in128(A##ko, 25); \
645
- E##ka = XOR128(Bka, ANDnu128(Bke, Bki)); \
646
- XOReq128(A##mu, Du); \
647
- Bko = ROL64in128_8(A##mu); \
648
- E##ke = XOR128(Bke, ANDnu128(Bki, Bko)); \
649
- XOReq128(A##sa, Da); \
650
- Bku = ROL64in128(A##sa, 18); \
651
- E##ki = XOR128(Bki, ANDnu128(Bko, Bku)); \
652
- E##ko = XOR128(Bko, ANDnu128(Bku, Bka)); \
653
- E##ku = XOR128(Bku, ANDnu128(Bka, Bke)); \
654
- \
655
- XOReq128(A##bu, Du); \
656
- Bma = ROL64in128(A##bu, 27); \
657
- XOReq128(A##ga, Da); \
658
- Bme = ROL64in128(A##ga, 36); \
659
- XOReq128(A##ke, De); \
660
- Bmi = ROL64in128(A##ke, 10); \
661
- E##ma = XOR128(Bma, ANDnu128(Bme, Bmi)); \
662
- XOReq128(A##mi, Di); \
663
- Bmo = ROL64in128(A##mi, 15); \
664
- E##me = XOR128(Bme, ANDnu128(Bmi, Bmo)); \
665
- XOReq128(A##so, Do); \
666
- Bmu = ROL64in128_56(A##so); \
667
- E##mi = XOR128(Bmi, ANDnu128(Bmo, Bmu)); \
668
- E##mo = XOR128(Bmo, ANDnu128(Bmu, Bma)); \
669
- E##mu = XOR128(Bmu, ANDnu128(Bma, Bme)); \
670
- \
671
- XOReq128(A##bi, Di); \
672
- Bsa = ROL64in128(A##bi, 62); \
673
- XOReq128(A##go, Do); \
674
- Bse = ROL64in128(A##go, 55); \
675
- XOReq128(A##ku, Du); \
676
- Bsi = ROL64in128(A##ku, 39); \
677
- E##sa = XOR128(Bsa, ANDnu128(Bse, Bsi)); \
678
- XOReq128(A##ma, Da); \
679
- Bso = ROL64in128(A##ma, 41); \
680
- E##se = XOR128(Bse, ANDnu128(Bsi, Bso)); \
681
- XOReq128(A##se, De); \
682
- Bsu = ROL64in128(A##se, 2); \
683
- E##si = XOR128(Bsi, ANDnu128(Bso, Bsu)); \
684
- E##so = XOR128(Bso, ANDnu128(Bsu, Bsa)); \
685
- E##su = XOR128(Bsu, ANDnu128(Bsa, Bse)); \
686
- \
687
-
688
- static ALIGN(KeccakP1600times2_statesAlignment) const uint64_t KeccakF1600RoundConstants[24] = {
689
- 0x0000000000000001ULL,
690
- 0x0000000000008082ULL,
691
- 0x800000000000808aULL,
692
- 0x8000000080008000ULL,
693
- 0x000000000000808bULL,
694
- 0x0000000080000001ULL,
695
- 0x8000000080008081ULL,
696
- 0x8000000000008009ULL,
697
- 0x000000000000008aULL,
698
- 0x0000000000000088ULL,
699
- 0x0000000080008009ULL,
700
- 0x000000008000000aULL,
701
- 0x000000008000808bULL,
702
- 0x800000000000008bULL,
703
- 0x8000000000008089ULL,
704
- 0x8000000000008003ULL,
705
- 0x8000000000008002ULL,
706
- 0x8000000000000080ULL,
707
- 0x000000000000800aULL,
708
- 0x800000008000000aULL,
709
- 0x8000000080008081ULL,
710
- 0x8000000000008080ULL,
711
- 0x0000000080000001ULL,
712
- 0x8000000080008008ULL};
713
-
714
- #define copyFromState(X, state) \
715
- X##ba = LOAD128(state[ 0]); \
716
- X##be = LOAD128(state[ 1]); \
717
- X##bi = LOAD128(state[ 2]); \
718
- X##bo = LOAD128(state[ 3]); \
719
- X##bu = LOAD128(state[ 4]); \
720
- X##ga = LOAD128(state[ 5]); \
721
- X##ge = LOAD128(state[ 6]); \
722
- X##gi = LOAD128(state[ 7]); \
723
- X##go = LOAD128(state[ 8]); \
724
- X##gu = LOAD128(state[ 9]); \
725
- X##ka = LOAD128(state[10]); \
726
- X##ke = LOAD128(state[11]); \
727
- X##ki = LOAD128(state[12]); \
728
- X##ko = LOAD128(state[13]); \
729
- X##ku = LOAD128(state[14]); \
730
- X##ma = LOAD128(state[15]); \
731
- X##me = LOAD128(state[16]); \
732
- X##mi = LOAD128(state[17]); \
733
- X##mo = LOAD128(state[18]); \
734
- X##mu = LOAD128(state[19]); \
735
- X##sa = LOAD128(state[20]); \
736
- X##se = LOAD128(state[21]); \
737
- X##si = LOAD128(state[22]); \
738
- X##so = LOAD128(state[23]); \
739
- X##su = LOAD128(state[24]); \
740
-
741
- #define copyToState(state, X) \
742
- STORE128(state[ 0], X##ba); \
743
- STORE128(state[ 1], X##be); \
744
- STORE128(state[ 2], X##bi); \
745
- STORE128(state[ 3], X##bo); \
746
- STORE128(state[ 4], X##bu); \
747
- STORE128(state[ 5], X##ga); \
748
- STORE128(state[ 6], X##ge); \
749
- STORE128(state[ 7], X##gi); \
750
- STORE128(state[ 8], X##go); \
751
- STORE128(state[ 9], X##gu); \
752
- STORE128(state[10], X##ka); \
753
- STORE128(state[11], X##ke); \
754
- STORE128(state[12], X##ki); \
755
- STORE128(state[13], X##ko); \
756
- STORE128(state[14], X##ku); \
757
- STORE128(state[15], X##ma); \
758
- STORE128(state[16], X##me); \
759
- STORE128(state[17], X##mi); \
760
- STORE128(state[18], X##mo); \
761
- STORE128(state[19], X##mu); \
762
- STORE128(state[20], X##sa); \
763
- STORE128(state[21], X##se); \
764
- STORE128(state[22], X##si); \
765
- STORE128(state[23], X##so); \
766
- STORE128(state[24], X##su); \
767
-
768
- #define copyStateVariables(X, Y) \
769
- X##ba = Y##ba; \
770
- X##be = Y##be; \
771
- X##bi = Y##bi; \
772
- X##bo = Y##bo; \
773
- X##bu = Y##bu; \
774
- X##ga = Y##ga; \
775
- X##ge = Y##ge; \
776
- X##gi = Y##gi; \
777
- X##go = Y##go; \
778
- X##gu = Y##gu; \
779
- X##ka = Y##ka; \
780
- X##ke = Y##ke; \
781
- X##ki = Y##ki; \
782
- X##ko = Y##ko; \
783
- X##ku = Y##ku; \
784
- X##ma = Y##ma; \
785
- X##me = Y##me; \
786
- X##mi = Y##mi; \
787
- X##mo = Y##mo; \
788
- X##mu = Y##mu; \
789
- X##sa = Y##sa; \
790
- X##se = Y##se; \
791
- X##si = Y##si; \
792
- X##so = Y##so; \
793
- X##su = Y##su; \
794
-
795
- #ifdef KeccakP1600times2_fullUnrolling
796
- #define FullUnrolling
797
- #else
798
- #define Unrolling KeccakP1600times2_unrolling
799
- #endif
800
- #include "KeccakP-1600-unrolling.macros"
801
-
802
- void KeccakP1600times2_PermuteAll_24rounds(void *states)
803
- {
804
- V128 *statesAsLanes = (V128 *)states;
805
- declareABCDE
806
- #ifndef KeccakP1600times2_fullUnrolling
807
- unsigned int i;
808
- #endif
809
-
810
- copyFromState(A, statesAsLanes)
811
- rounds24
812
- copyToState(statesAsLanes, A)
813
- #if defined(UseMMX)
814
- _mm_empty();
815
- #endif
816
- }
817
-
818
- void KeccakP1600times2_PermuteAll_12rounds(void *states)
819
- {
820
- V128 *statesAsLanes = (V128 *)states;
821
- declareABCDE
822
- #ifndef KeccakP1600times2_fullUnrolling
823
- unsigned int i;
824
- #endif
825
-
826
- copyFromState(A, statesAsLanes)
827
- rounds12
828
- copyToState(statesAsLanes, A)
829
- #if defined(UseMMX)
830
- _mm_empty();
831
- #endif
832
- }
833
-
834
- void KeccakP1600times2_PermuteAll_6rounds(void *states)
835
- {
836
- V128 *statesAsLanes = (V128 *)states;
837
- declareABCDE
838
- #ifndef KeccakP1600times2_fullUnrolling
839
- unsigned int i;
840
- #endif
841
-
842
- copyFromState(A, statesAsLanes)
843
- rounds6
844
- copyToState(statesAsLanes, A)
845
- #if defined(UseMMX)
846
- _mm_empty();
847
- #endif
848
- }
849
-
850
- void KeccakP1600times2_PermuteAll_4rounds(void *states)
851
- {
852
- V128 *statesAsLanes = (V128 *)states;
853
- declareABCDE
854
- #ifndef KeccakP1600times2_fullUnrolling
855
- unsigned int i;
856
- #endif
857
-
858
- copyFromState(A, statesAsLanes)
859
- rounds4
860
- copyToState(statesAsLanes, A)
861
- #if defined(UseMMX)
862
- _mm_empty();
863
- #endif
864
- }
865
-
866
- size_t KeccakF1600times2_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen)
867
- {
868
- if (laneCount == 21) {
869
- #if 1
870
- const unsigned char *dataStart = data;
871
-
872
- while(dataByteLen >= (laneOffsetParallel + laneCount)*8) {
873
- V128 *stateAsLanes = (V128 *)states;
874
- const uint64_t *curData0 = (const uint64_t *)data;
875
- const uint64_t *curData1 = (const uint64_t *)(data+laneOffsetParallel*SnP_laneLengthInBytes);
876
- #define XOR_In( argIndex ) XOReq128( stateAsLanes[argIndex], LOAD6464(curData1[argIndex], curData0[argIndex]))
877
- XOR_In( 0 );
878
- XOR_In( 1 );
879
- XOR_In( 2 );
880
- XOR_In( 3 );
881
- XOR_In( 4 );
882
- XOR_In( 5 );
883
- XOR_In( 6 );
884
- XOR_In( 7 );
885
- XOR_In( 8 );
886
- XOR_In( 9 );
887
- XOR_In( 10 );
888
- XOR_In( 11 );
889
- XOR_In( 12 );
890
- XOR_In( 13 );
891
- XOR_In( 14 );
892
- XOR_In( 15 );
893
- XOR_In( 16 );
894
- XOR_In( 17 );
895
- XOR_In( 18 );
896
- XOR_In( 19 );
897
- XOR_In( 20 );
898
- #undef XOR_In
899
- KeccakP1600times2_PermuteAll_24rounds(states);
900
- data += laneOffsetSerial*8;
901
- dataByteLen -= laneOffsetSerial*8;
902
- }
903
- return data - dataStart;
904
- #else
905
- unsigned int i;
906
- const unsigned char *dataStart = data;
907
- const uint64_t *curData0 = (const uint64_t *)data;
908
- const uint64_t *curData1 = (const uint64_t *)(data+laneOffsetParallel*SnP_laneLengthInBytes);
909
- V128 *statesAsLanes = (V128 *)states;
910
- declareABCDE
911
-
912
- copyFromState(A, statesAsLanes)
913
- while(dataByteLen >= (laneOffsetParallel + laneCount)*8) {
914
- #define XOR_In( Xxx, argIndex ) XOReq128( Xxx, LOAD6464(curData1[argIndex], curData0[argIndex]))
915
- XOR_In( Aba, 0 );
916
- XOR_In( Abe, 1 );
917
- XOR_In( Abi, 2 );
918
- XOR_In( Abo, 3 );
919
- XOR_In( Abu, 4 );
920
- XOR_In( Aga, 5 );
921
- XOR_In( Age, 6 );
922
- XOR_In( Agi, 7 );
923
- XOR_In( Ago, 8 );
924
- XOR_In( Agu, 9 );
925
- XOR_In( Aka, 10 );
926
- XOR_In( Ake, 11 );
927
- XOR_In( Aki, 12 );
928
- XOR_In( Ako, 13 );
929
- XOR_In( Aku, 14 );
930
- XOR_In( Ama, 15 );
931
- XOR_In( Ame, 16 );
932
- XOR_In( Ami, 17 );
933
- XOR_In( Amo, 18 );
934
- XOR_In( Amu, 19 );
935
- XOR_In( Asa, 20 );
936
- #undef XOR_In
937
- rounds24
938
- curData0 += laneOffsetSerial;
939
- curData1 += laneOffsetSerial;
940
- dataByteLen -= laneOffsetSerial*8;
941
- }
942
- copyToState(statesAsLanes, A)
943
- return (const unsigned char *)curData0 - dataStart;
944
- #endif
945
- }
946
- else {
947
- const unsigned char *dataStart = data;
948
-
949
- while(dataByteLen >= (laneOffsetParallel + laneCount)*8) {
950
- KeccakP1600times2_AddLanesAll(states, data, laneCount, laneOffsetParallel);
951
- KeccakP1600times2_PermuteAll_24rounds(states);
952
- data += laneOffsetSerial*8;
953
- dataByteLen -= laneOffsetSerial*8;
954
- }
955
- return data - dataStart;
956
- }
957
- }