sleeping_kangaroo12 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (291) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +34 -67
  3. data/ext/Rakefile +12 -37
  4. data/ext/binding/sleeping_kangaroo12.c +1 -16
  5. data/ext/{xkcp → k12}/Makefile +0 -0
  6. data/ext/k12/Makefile.build +118 -0
  7. data/ext/k12/README.markdown +86 -0
  8. data/ext/k12/lib/ARMv8Asha3/KeccakP-1600-ARMv8Asha3.S +623 -0
  9. data/ext/k12/lib/ARMv8Asha3/KeccakP-1600-SnP.h +65 -0
  10. data/ext/k12/lib/ARMv8Asha3/KeccakP-1600-opt64.c +227 -0
  11. data/ext/{xkcp/lib/low/KeccakP-1600/compact → k12/lib/Inplace32BI}/KeccakP-1600-SnP.h +4 -9
  12. data/ext/{xkcp/lib/low/KeccakP-1600/plain-32bits-inplace → k12/lib/Inplace32BI}/KeccakP-1600-inplace32BI.c +65 -160
  13. data/ext/k12/lib/KangarooTwelve.c +332 -0
  14. data/ext/{xkcp/lib/high/KangarooTwelve → k12/lib}/KangarooTwelve.h +53 -16
  15. data/ext/{xkcp/lib/low/KeccakP-1600/AVX2 → k12/lib/Optimized64}/KeccakP-1600-AVX2.s +122 -558
  16. data/ext/k12/lib/Optimized64/KeccakP-1600-AVX512-plainC.c +241 -0
  17. data/ext/k12/lib/Optimized64/KeccakP-1600-AVX512.s +551 -0
  18. data/ext/k12/lib/Optimized64/KeccakP-1600-SnP.h +74 -0
  19. data/ext/{xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-64.macros → k12/lib/Optimized64/KeccakP-1600-opt64.c} +447 -169
  20. data/ext/k12/lib/Optimized64/KeccakP-1600-runtimeDispatch.c +406 -0
  21. data/ext/k12/lib/Optimized64/KeccakP-1600-timesN-AVX2.c +419 -0
  22. data/ext/k12/lib/Optimized64/KeccakP-1600-timesN-AVX512.c +458 -0
  23. data/ext/k12/lib/Optimized64/KeccakP-1600-timesN-SSSE3.c +438 -0
  24. data/ext/{xkcp/lib/low/KeccakP-1600/plain-64bits → k12/lib/Plain64}/KeccakP-1600-SnP.h +14 -20
  25. data/ext/{xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.h → k12/lib/Plain64/KeccakP-1600-plain64.c} +9 -8
  26. data/ext/{xkcp/lib/common → k12/lib}/align.h +3 -2
  27. data/ext/{xkcp/lib/common → k12/lib}/brg_endian.h +0 -0
  28. data/ext/{xkcp → k12}/support/Build/ExpandProducts.xsl +0 -0
  29. data/ext/{xkcp → k12}/support/Build/ToGlobalMakefile.xsl +0 -0
  30. data/ext/{xkcp → k12}/support/Build/ToOneTarget.xsl +0 -0
  31. data/ext/{xkcp → k12}/support/Build/ToTargetConfigFile.xsl +0 -0
  32. data/ext/{xkcp → k12}/support/Build/ToTargetMakefile.xsl +10 -16
  33. data/ext/{xkcp → k12}/support/Build/ToVCXProj.xsl +0 -0
  34. data/lib/sleeping_kangaroo12/version.rb +1 -1
  35. metadata +33 -276
  36. data/ext/config/xkcp.build +0 -17
  37. data/ext/xkcp/LICENSE +0 -1
  38. data/ext/xkcp/Makefile.build +0 -200
  39. data/ext/xkcp/README.markdown +0 -296
  40. data/ext/xkcp/lib/HighLevel.build +0 -143
  41. data/ext/xkcp/lib/LowLevel.build +0 -757
  42. data/ext/xkcp/lib/high/KangarooTwelve/KangarooTwelve.c +0 -301
  43. data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.c +0 -81
  44. data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.h +0 -125
  45. data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.c +0 -48
  46. data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.h +0 -79
  47. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.c +0 -81
  48. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.h +0 -73
  49. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.inc +0 -195
  50. data/ext/xkcp/lib/high/Keccak/KeccakSponge.c +0 -111
  51. data/ext/xkcp/lib/high/Keccak/KeccakSponge.h +0 -76
  52. data/ext/xkcp/lib/high/Keccak/KeccakSponge.inc +0 -314
  53. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.c +0 -61
  54. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.h +0 -67
  55. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.inc +0 -128
  56. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.c +0 -93
  57. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.h +0 -599
  58. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.inc +0 -573
  59. data/ext/xkcp/lib/high/Ketje/Ketjev2.c +0 -87
  60. data/ext/xkcp/lib/high/Ketje/Ketjev2.h +0 -88
  61. data/ext/xkcp/lib/high/Ketje/Ketjev2.inc +0 -274
  62. data/ext/xkcp/lib/high/Keyak/Keyakv2.c +0 -132
  63. data/ext/xkcp/lib/high/Keyak/Keyakv2.h +0 -217
  64. data/ext/xkcp/lib/high/Keyak/Keyakv2.inc +0 -81
  65. data/ext/xkcp/lib/high/Keyak/Motorist.inc +0 -953
  66. data/ext/xkcp/lib/high/Kravatte/Kravatte.c +0 -533
  67. data/ext/xkcp/lib/high/Kravatte/Kravatte.h +0 -115
  68. data/ext/xkcp/lib/high/Kravatte/KravatteModes.c +0 -557
  69. data/ext/xkcp/lib/high/Kravatte/KravatteModes.h +0 -247
  70. data/ext/xkcp/lib/high/Xoodyak/Cyclist.h +0 -66
  71. data/ext/xkcp/lib/high/Xoodyak/Cyclist.inc +0 -336
  72. data/ext/xkcp/lib/high/Xoodyak/Xoodyak-parameters.h +0 -26
  73. data/ext/xkcp/lib/high/Xoodyak/Xoodyak.c +0 -55
  74. data/ext/xkcp/lib/high/Xoodyak/Xoodyak.h +0 -35
  75. data/ext/xkcp/lib/high/Xoofff/Xoofff.c +0 -634
  76. data/ext/xkcp/lib/high/Xoofff/Xoofff.h +0 -147
  77. data/ext/xkcp/lib/high/Xoofff/XoofffModes.c +0 -483
  78. data/ext/xkcp/lib/high/Xoofff/XoofffModes.h +0 -241
  79. data/ext/xkcp/lib/high/common/Phases.h +0 -25
  80. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-SnP.h +0 -41
  81. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-armcc.s +0 -1666
  82. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-gcc.s +0 -1655
  83. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-armcc.s +0 -1268
  84. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-gcc.s +0 -1264
  85. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-armcc.s +0 -1178
  86. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-gcc.s +0 -1175
  87. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-armcc.s +0 -1338
  88. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-gcc.s +0 -1336
  89. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-armcc.s +0 -1343
  90. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-gcc.s +0 -1339
  91. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-SnP.h +0 -42
  92. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-armcc.s +0 -823
  93. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-gcc.s +0 -831
  94. data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-SnP.h +0 -31
  95. data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-armv8a-neon.s +0 -540
  96. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-SnP.h +0 -42
  97. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-compact.s +0 -733
  98. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-fast.s +0 -1121
  99. data/ext/xkcp/lib/low/KeccakP-1600/AVX2/KeccakP-1600-SnP.h +0 -52
  100. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-AVX512.c +0 -623
  101. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-SnP.h +0 -47
  102. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u12/KeccakP-1600-AVX512-config.h +0 -6
  103. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u6/KeccakP-1600-AVX512-config.h +0 -6
  104. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/ua/KeccakP-1600-AVX512-config.h +0 -6
  105. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-AVX512.s +0 -1031
  106. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-SnP.h +0 -53
  107. data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-SnP.h +0 -44
  108. data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-XOP.c +0 -476
  109. data/ext/xkcp/lib/low/KeccakP-1600/XOP/u6/KeccakP-1600-XOP-config.h +0 -6
  110. data/ext/xkcp/lib/low/KeccakP-1600/XOP/ua/KeccakP-1600-XOP-config.h +0 -6
  111. data/ext/xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-unrolling.macros +0 -305
  112. data/ext/xkcp/lib/low/KeccakP-1600/compact/KeccakP-1600-compact64.c +0 -420
  113. data/ext/xkcp/lib/low/KeccakP-1600/plain-32bits-inplace/KeccakP-1600-SnP.h +0 -43
  114. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/KeccakP-1600-opt64.c +0 -565
  115. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcu6/KeccakP-1600-opt64-config.h +0 -7
  116. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua/KeccakP-1600-opt64-config.h +0 -7
  117. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua-shld/KeccakP-1600-opt64-config.h +0 -8
  118. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/u6/KeccakP-1600-opt64-config.h +0 -6
  119. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/ua/KeccakP-1600-opt64-config.h +0 -6
  120. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-SnP.h +0 -44
  121. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference.h +0 -23
  122. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference32BI.c +0 -625
  123. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-SnP.h +0 -44
  124. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.c +0 -440
  125. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-SnP.h +0 -42
  126. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas.s +0 -1196
  127. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas_Apple.s +0 -1124
  128. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-shld-gas.s +0 -1196
  129. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-armcc.s +0 -1392
  130. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-gcc.s +0 -1394
  131. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-times2-SnP.h +0 -42
  132. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u12/SIMD512-2-config.h +0 -7
  133. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u4/SIMD512-2-config.h +0 -7
  134. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512ufull/SIMD512-2-config.h +0 -7
  135. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SIMD512.c +0 -850
  136. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SnP.h +0 -51
  137. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SIMD128.c +0 -957
  138. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SnP.h +0 -49
  139. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-u2/SIMD128-config.h +0 -8
  140. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-ua/SIMD128-config.h +0 -8
  141. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-u2/SIMD128-config.h +0 -9
  142. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-ua/SIMD128-config.h +0 -9
  143. data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-SnP.h +0 -45
  144. data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-on1.c +0 -37
  145. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SIMD256.c +0 -1321
  146. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SnP.h +0 -55
  147. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u12/SIMD256-config.h +0 -7
  148. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u6/SIMD256-config.h +0 -7
  149. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/ua/SIMD256-config.h +0 -7
  150. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u12/SIMD512-4-config.h +0 -7
  151. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u4/SIMD512-4-config.h +0 -7
  152. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512ufull/SIMD512-4-config.h +0 -7
  153. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SIMD512.c +0 -881
  154. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SnP.h +0 -51
  155. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-SnP.h +0 -45
  156. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-on1.c +0 -37
  157. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-SnP.h +0 -45
  158. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-on2.c +0 -38
  159. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SIMD512.c +0 -1615
  160. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SnP.h +0 -57
  161. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u12/SIMD512-config.h +0 -7
  162. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u4/SIMD512-config.h +0 -7
  163. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/ua/SIMD512-config.h +0 -7
  164. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-SnP.h +0 -45
  165. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-on1.c +0 -37
  166. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-SnP.h +0 -45
  167. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-on2.c +0 -38
  168. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-SnP.h +0 -45
  169. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-on4.c +0 -38
  170. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-SnP.h +0 -41
  171. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-armcc.s +0 -442
  172. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-gcc.s +0 -446
  173. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-armcc.s +0 -419
  174. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-gcc.s +0 -427
  175. data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-SnP.h +0 -41
  176. data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-avr8-fast.s +0 -647
  177. data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-SnP.h +0 -39
  178. data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-compact.c +0 -190
  179. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-SnP.h +0 -43
  180. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.c +0 -412
  181. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.h +0 -23
  182. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-SnP.h +0 -41
  183. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-armcc.s +0 -454
  184. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-gcc.s +0 -458
  185. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-armcc.s +0 -455
  186. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-gcc.s +0 -458
  187. data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-SnP.h +0 -41
  188. data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-avr8-fast.s +0 -728
  189. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-SnP.h +0 -43
  190. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.c +0 -414
  191. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.h +0 -23
  192. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-SnP.h +0 -42
  193. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-armcc.s +0 -527
  194. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-gcc.s +0 -533
  195. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-armcc.s +0 -528
  196. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-gcc.s +0 -534
  197. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-armcc.s +0 -521
  198. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-gcc.s +0 -527
  199. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-armcc.s +0 -517
  200. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-gcc.s +0 -523
  201. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-armcc.s +0 -550
  202. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-gcc.s +0 -556
  203. data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-SnP.h +0 -32
  204. data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-armv8a-neon.s +0 -432
  205. data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-SnP.h +0 -42
  206. data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-avr8-fast.s +0 -929
  207. data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-SnP.h +0 -40
  208. data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-compact.c +0 -244
  209. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-SnP.h +0 -46
  210. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32-bis.macros +0 -184
  211. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.c +0 -454
  212. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.macros +0 -459
  213. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling-bis.macros +0 -83
  214. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling.macros +0 -88
  215. data/ext/xkcp/lib/low/KeccakP-800/plain/lcu2/KeccakP-800-opt32-config.h +0 -7
  216. data/ext/xkcp/lib/low/KeccakP-800/plain/lcua/KeccakP-800-opt32-config.h +0 -7
  217. data/ext/xkcp/lib/low/KeccakP-800/plain/u2/KeccakP-800-opt32-config.h +0 -7
  218. data/ext/xkcp/lib/low/KeccakP-800/plain/ua/KeccakP-800-opt32-config.h +0 -7
  219. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-SnP.h +0 -44
  220. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.c +0 -437
  221. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.h +0 -23
  222. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/Ket.h +0 -57
  223. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-armcc.s +0 -475
  224. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-gcc.s +0 -480
  225. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-armcc.s +0 -590
  226. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-gcc.s +0 -590
  227. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.c +0 -126
  228. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.h +0 -68
  229. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.inc +0 -174
  230. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.c +0 -80
  231. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.h +0 -68
  232. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.inc +0 -142
  233. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-SnP.h +0 -55
  234. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-armcc.s +0 -1086
  235. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-gcc.s +0 -1092
  236. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-armcc.s +0 -721
  237. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-gcc.s +0 -726
  238. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-armcc.s +0 -723
  239. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-gcc.s +0 -729
  240. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-armcc.s +0 -1164
  241. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-gcc.s +0 -1165
  242. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-armcc.s +0 -562
  243. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-gcc.s +0 -563
  244. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-armcc.s +0 -563
  245. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-gcc.s +0 -565
  246. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-SnP.h +0 -55
  247. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-armcc.s +0 -476
  248. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-gcc.s +0 -485
  249. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-armcc.s +0 -362
  250. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-gcc.s +0 -367
  251. data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-SnP.h +0 -43
  252. data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-avr8-u1.s +0 -1341
  253. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SIMD512.c +0 -581
  254. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SnP.h +0 -58
  255. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodyak-full-block-SIMD512.c +0 -332
  256. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SIMD128.c +0 -329
  257. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SnP.h +0 -53
  258. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodyak-full-block-SIMD128.c +0 -355
  259. data/ext/xkcp/lib/low/Xoodoo/Xoodoo.h +0 -79
  260. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-SnP.h +0 -56
  261. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-optimized.c +0 -399
  262. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodyak-full-blocks.c +0 -127
  263. data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-SnP.h +0 -43
  264. data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-reference.c +0 -253
  265. data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SIMD512.c +0 -1044
  266. data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SnP.h +0 -49
  267. data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-SnP.h +0 -45
  268. data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-on1.c +0 -37
  269. data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-ARMv7A.s +0 -1587
  270. data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-SnP.h +0 -48
  271. data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SIMD512.c +0 -1202
  272. data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SnP.h +0 -48
  273. data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SIMD128.c +0 -484
  274. data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SnP.h +0 -44
  275. data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-SnP.h +0 -45
  276. data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-on1.c +0 -37
  277. data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SIMD256.c +0 -939
  278. data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SnP.h +0 -49
  279. data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SIMD512.c +0 -1216
  280. data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SnP.h +0 -48
  281. data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-SnP.h +0 -45
  282. data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-on1.c +0 -37
  283. data/ext/xkcp/lib/low/common/PlSnP-Fallback.inc +0 -290
  284. data/ext/xkcp/lib/low/common/SnP-Relaned.h +0 -141
  285. data/ext/xkcp/support/Kernel-PMU/Kernel-pmu.md +0 -133
  286. data/ext/xkcp/support/Kernel-PMU/Makefile +0 -8
  287. data/ext/xkcp/support/Kernel-PMU/enable_arm_pmu.c +0 -129
  288. data/ext/xkcp/support/Kernel-PMU/load-module +0 -1
  289. data/ext/xkcp/util/KeccakSum/KeccakSum.c +0 -394
  290. data/ext/xkcp/util/KeccakSum/base64.c +0 -86
  291. data/ext/xkcp/util/KeccakSum/base64.h +0 -12
@@ -1,1341 +0,0 @@
1
- ;
2
- ; The eXtended Keccak Code Package (XKCP)
3
- ; https://github.com/XKCP/XKCP
4
- ;
5
- ; The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer.
6
- ;
7
- ; Implementation by Ronny Van Keer, hereby denoted as "the implementer".
8
- ;
9
- ; For more information, feedback or questions, please refer to the Keccak Team website:
10
- ; https://keccak.team/
11
- ;
12
- ; To the extent possible under law, the implementer has waived all copyright
13
- ; and related or neighboring rights to the source code in this file.
14
- ; http://creativecommons.org/publicdomain/zero/1.0/
15
- ;
16
- ; ---
17
- ;
18
- ; This file implements Xoodoo in a SnP-compatible way.
19
- ; Please refer to SnP-documentation.h for more details.
20
- ;
21
- ; This implementation comes with Xoodoo-SnP.h in the same folder.
22
- ; Please refer to LowLevel.build for the exact list of other files it must be combined with.
23
- ;
24
-
25
- ; INFO: Tested on ATmega1280 simulator
26
-
27
- ; Registers used in all routines
28
- #define zero 1
29
- #define rpState 24
30
- #define rX 26
31
- #define rY 28
32
- #define rZ 30
33
- #define sp 0x3D
34
-
35
- ;----------------------------------------------------------------------------
36
- ;
37
- ; void Xoodoo_StaticInitialize( void )
38
- ;
39
- .global Xoodoo_StaticInitialize
40
-
41
- ;----------------------------------------------------------------------------
42
- ;
43
- ; void Xoodoo_Initialize(void *state)
44
- ;
45
- ; argument state is passed in r24:r25
46
- ;
47
- .global Xoodoo_Initialize
48
- Xoodoo_Initialize:
49
- movw rZ, r24
50
- ldi r23, 3*4/2 ; clear state (8 bytes / 2 lanes) per iteration
51
- Xoodoo_Initialize_Loop:
52
- st z+, zero
53
- st z+, zero
54
- st z+, zero
55
- st z+, zero
56
- st z+, zero
57
- st z+, zero
58
- st z+, zero
59
- st z+, zero
60
- dec r23
61
- brne Xoodoo_Initialize_Loop
62
- Xoodoo_StaticInitialize:
63
- ret
64
-
65
- ;----------------------------------------------------------------------------
66
- ;
67
- ; void Xoodoo_AddByte(void *state, unsigned char data, unsigned int offset)
68
- ;
69
- ; argument state is passed in r24:r25
70
- ; argument data is passed in r22:r23, only LSB (r22) is used
71
- ; argument offset is passed in r20:r21, only LSB (r20) is used
72
- ;
73
- .global Xoodoo_AddByte
74
- Xoodoo_AddByte:
75
- movw rZ, r24
76
- add rZ, r20
77
- adc rZ+1, zero
78
- ld r0, Z
79
- eor r0, r22
80
- st Z, r0
81
- ret
82
-
83
- ;----------------------------------------------------------------------------
84
- ;
85
- ; void Xoodoo_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
86
- ;
87
- ; argument state is passed in r24:r25
88
- ; argument data is passed in r22:r23
89
- ; argument offset is passed in r20:r21, only LSB (r20) is used
90
- ; argument length is passed in r18:r19, only LSB (r18) is used
91
- ;
92
- .global Xoodoo_AddBytes
93
- Xoodoo_AddBytes:
94
- movw rZ, r24
95
- add rZ, r20
96
- adc rZ+1, zero
97
- movw rX, r22
98
- subi r18, 8
99
- brcs Xoodoo_AddBytes_Byte
100
- ;do 8 bytes per iteration
101
- Xoodoo_AddBytes_Loop8:
102
- ld r21, X+
103
- ld r0, Z
104
- eor r0, r21
105
- st Z+, r0
106
- ld r21, X+
107
- ld r0, Z
108
- eor r0, r21
109
- st Z+, r0
110
- ld r21, X+
111
- ld r0, Z
112
- eor r0, r21
113
- st Z+, r0
114
- ld r21, X+
115
- ld r0, Z
116
- eor r0, r21
117
- st Z+, r0
118
- ld r21, X+
119
- ld r0, Z
120
- eor r0, r21
121
- st Z+, r0
122
- ld r21, X+
123
- ld r0, Z
124
- eor r0, r21
125
- st Z+, r0
126
- ld r21, X+
127
- ld r0, Z
128
- eor r0, r21
129
- st Z+, r0
130
- ld r21, X+
131
- ld r0, Z
132
- eor r0, r21
133
- st Z+, r0
134
- subi r18, 8
135
- brcc Xoodoo_AddBytes_Loop8
136
- Xoodoo_AddBytes_Byte:
137
- ldi r19, 8
138
- add r18, r19
139
- breq Xoodoo_AddBytes_End
140
- Xoodoo_AddBytes_Loop1:
141
- ld r21, X+
142
- ld r0, Z
143
- eor r0, r21
144
- st Z+, r0
145
- dec r18
146
- brne Xoodoo_AddBytes_Loop1
147
- Xoodoo_AddBytes_End:
148
- ret
149
-
150
-
151
- ;----------------------------------------------------------------------------
152
- ;
153
- ; void Xoodoo_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
154
- ;
155
- ; argument state is passed in r24:r25
156
- ; argument data is passed in r22:r23
157
- ; argument offset is passed in r20:r21, only LSB (r20) is used
158
- ; argument length is passed in r18:r19, only LSB (r18) is used
159
- ;
160
- .global Xoodoo_OverwriteBytes
161
- Xoodoo_OverwriteBytes:
162
- movw rZ, r24
163
- add rZ, r20
164
- adc rZ+1, zero
165
- movw rX, r22
166
- subi r18, 8
167
- brcs Xoodoo_OverwriteBytes_Byte
168
- ;do 8 bytes per iteration
169
- Xoodoo_OverwriteBytes_Loop8:
170
- ld r0, X+
171
- st Z+, r0
172
- ld r0, X+
173
- st Z+, r0
174
- ld r0, X+
175
- st Z+, r0
176
- ld r0, X+
177
- st Z+, r0
178
- ld r0, X+
179
- st Z+, r0
180
- ld r0, X+
181
- st Z+, r0
182
- ld r0, X+
183
- st Z+, r0
184
- ld r0, X+
185
- st Z+, r0
186
- subi r18, 8
187
- brcc Xoodoo_OverwriteBytes_Loop8
188
- Xoodoo_OverwriteBytes_Byte:
189
- ldi r19, 8
190
- add r18, r19
191
- breq Xoodoo_OverwriteBytes_End
192
- Xoodoo_OverwriteBytes_Loop1:
193
- ld r0, X+
194
- st Z+, r0
195
- dec r18
196
- brne Xoodoo_OverwriteBytes_Loop1
197
- Xoodoo_OverwriteBytes_End:
198
- ret
199
-
200
- ;----------------------------------------------------------------------------
201
- ;
202
- ; void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount)
203
- ;
204
- ; argument state is passed in r24:r25
205
- ; argument byteCount is passed in r22:r23, only LSB (r22) is used
206
- ;
207
- .global Xoodoo_OverwriteWithZeroes
208
- Xoodoo_OverwriteWithZeroes:
209
- movw rZ, r24 ; rZ = state
210
- mov r23, r22
211
- lsr r23
212
- lsr r23
213
- lsr r23
214
- breq Xoodoo_OverwriteWithZeroes_Bytes
215
- Xoodoo_OverwriteWithZeroes_LoopLanes:
216
- st Z+, r1
217
- st Z+, r1
218
- st Z+, r1
219
- st Z+, r1
220
- st Z+, r1
221
- st Z+, r1
222
- st Z+, r1
223
- st Z+, r1
224
- dec r23
225
- brne Xoodoo_OverwriteWithZeroes_LoopLanes
226
- Xoodoo_OverwriteWithZeroes_Bytes:
227
- andi r22, 7
228
- breq Xoodoo_OverwriteWithZeroes_End
229
- Xoodoo_OverwriteWithZeroes_LoopBytes:
230
- st Z+, r1
231
- dec r22
232
- brne Xoodoo_OverwriteWithZeroes_LoopBytes
233
- Xoodoo_OverwriteWithZeroes_End:
234
- ret
235
-
236
- ;----------------------------------------------------------------------------
237
- ;
238
- ; void Xoodoo_ExtractBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
239
- ;
240
- ; argument state is passed in r24:r25
241
- ; argument data is passed in r22:r23
242
- ; argument offset is passed in r20:r21, only LSB (r20) is used
243
- ; argument length is passed in r18:r19, only LSB (r18) is used
244
- ;
245
- .global Xoodoo_ExtractBytes
246
- Xoodoo_ExtractBytes:
247
- movw rZ, r24
248
- add rZ, r20
249
- adc rZ+1, zero
250
- movw rX, r22
251
- subi r18, 8
252
- brcs Xoodoo_ExtractBytes_Byte
253
- ;do 8 bytes per iteration
254
- Xoodoo_ExtractBytes_Loop8:
255
- ld r0, Z+
256
- st X+, r0
257
- ld r0, Z+
258
- st X+, r0
259
- ld r0, Z+
260
- st X+, r0
261
- ld r0, Z+
262
- st X+, r0
263
- ld r0, Z+
264
- st X+, r0
265
- ld r0, Z+
266
- st X+, r0
267
- ld r0, Z+
268
- st X+, r0
269
- ld r0, Z+
270
- st X+, r0
271
- subi r18, 8
272
- brcc Xoodoo_ExtractBytes_Loop8
273
- Xoodoo_ExtractBytes_Byte:
274
- ldi r19, 8
275
- add r18, r19
276
- breq Xoodoo_ExtractBytes_End
277
- Xoodoo_ExtractBytes_Loop1:
278
- ld r0, Z+
279
- st X+, r0
280
- dec r18
281
- brne Xoodoo_ExtractBytes_Loop1
282
- Xoodoo_ExtractBytes_End:
283
- ret
284
-
285
- ;----------------------------------------------------------------------------
286
- ;
287
- ; void Xoodoo_ExtractAndAddBytes(void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
288
- ;
289
- ; argument state is passed in r24:r25
290
- ; argument input is passed in r22:r23
291
- ; argument output is passed in r20:r21
292
- ; argument offset is passed in r18:r19, only LSB (r18) is used
293
- ; argument length is passed in r16:r17, only LSB (r16) is used
294
- ;
295
- .global Xoodoo_ExtractAndAddBytes
296
- Xoodoo_ExtractAndAddBytes:
297
- tst r16
298
- breq Xoodoo_ExtractAndAddBytes_End
299
- push r16
300
- push r28
301
- push r29
302
- movw rZ, r24
303
- add rZ, r18
304
- adc rZ+1, zero
305
- movw rX, r22
306
- movw rY, r20
307
- subi r16, 8
308
- brcs Xoodoo_ExtractAndAddBytes_Byte
309
- Xoodoo_ExtractAndAddBytes_LoopLane:
310
- ld r21, Z+
311
- ld r0, X+
312
- eor r0, r21
313
- st Y+, r0
314
- ld r21, Z+
315
- ld r0, X+
316
- eor r0, r21
317
- st Y+, r0
318
- ld r21, Z+
319
- ld r0, X+
320
- eor r0, r21
321
- st Y+, r0
322
- ld r21, Z+
323
- ld r0, X+
324
- eor r0, r21
325
- st Y+, r0
326
- ld r21, Z+
327
- ld r0, X+
328
- eor r0, r21
329
- st Y+, r0
330
- ld r21, Z+
331
- ld r0, X+
332
- eor r0, r21
333
- st Y+, r0
334
- ld r21, Z+
335
- ld r0, X+
336
- eor r0, r21
337
- st Y+, r0
338
- ld r21, Z+
339
- ld r0, X+
340
- eor r0, r21
341
- st Y+, r0
342
- subi r16, 8
343
- brcc Xoodoo_ExtractAndAddBytes_LoopLane
344
- Xoodoo_ExtractAndAddBytes_Byte:
345
- ldi r19, 8
346
- add r16, r19
347
- breq Xoodoo_ExtractAndAddBytes_Done
348
- Xoodoo_ExtractAndAddBytes_Loop1:
349
- ld r21, Z+
350
- ld r0, X+
351
- eor r0, r21
352
- st Y+, r0
353
- dec r16
354
- brne Xoodoo_ExtractAndAddBytes_Loop1
355
- Xoodoo_ExtractAndAddBytes_Done:
356
- pop r29
357
- pop r28
358
- pop r16
359
- Xoodoo_ExtractAndAddBytes_End:
360
- ret
361
-
362
- Xoodoo_RoundConstants_12:
363
- .BYTE 0x58, 0x00
364
- .BYTE 0x38, 0x00
365
- .BYTE 0xC0, 0x03
366
- .BYTE 0xD0, 0x00
367
- .BYTE 0x20, 0x01
368
- .BYTE 0x14, 0x00
369
- Xoodoo_RoundConstants_6:
370
- .BYTE 0x60, 0x00
371
- .BYTE 0x2C, 0x00
372
- .BYTE 0x80, 0x03
373
- .BYTE 0xF0, 0x00
374
- .BYTE 0xA0, 0x01
375
- .BYTE 0x12, 0x00
376
- Xoodoo_RoundConstants_0:
377
- .BYTE 0xFF, 0 ; terminator
378
-
379
- .text
380
-
381
- ; Register variables used in permutation
382
- #define rC0 2 // 4 regs (2-5)
383
- #define rC1 6 // 4 regs (6-9)
384
- #define rC2 10 // 4 regs (10-13)
385
- #define rC3 14 // 4 regs (14-17)
386
- #define rVv 18 // 4 regs (18-21)
387
- #define rTt 22 // 4 regs (22-25)
388
- // r26-27 free
389
- #define a00 0
390
- #define a01 4
391
- #define a02 8
392
- #define a03 12
393
- #define a10 16
394
- #define a11 20
395
- #define a12 24
396
- #define a13 28
397
- #define a20 32
398
- #define a21 36
399
- #define a22 40
400
- #define a23 44
401
-
402
- ;----------------------------------------------------------------------------
403
- ;
404
- ; void Xoodoo_Permute_Nrounds( void *state, unsigned int nrounds )
405
- ;
406
- ; argument state is passed in r24:r25
407
- ; argument nrounds is passed in r22:r23 (only LSB (r22) is used)
408
- ;
409
- .global Xoodoo_Permute_Nrounds
410
- Xoodoo_Permute_Nrounds:
411
- mov r26, r22
412
- ldi rZ+0, lo8(Xoodoo_RoundConstants_0)
413
- ldi rZ+1, hi8(Xoodoo_RoundConstants_0)
414
- lsl r26
415
- sub rZ, r26
416
- sbc rZ+1, zero
417
- rjmp Xoodoo_Permute
418
-
419
- ;----------------------------------------------------------------------------
420
- ;
421
- ; void Xoodoo_Permute_6rounds( void *state )
422
- ;
423
- ; argument state is passed in r24:r25
424
- ;
425
- .global Xoodoo_Permute_6rounds
426
- Xoodoo_Permute_6rounds:
427
- ldi rZ+0, lo8(Xoodoo_RoundConstants_6)
428
- ldi rZ+1, hi8(Xoodoo_RoundConstants_6)
429
- rjmp Xoodoo_Permute
430
-
431
- ;----------------------------------------------------------------------------
432
- ;
433
- ; void Xoodoo_Permute_12rounds( void *state )
434
- ;
435
- ; argument state is passed in r24:r25
436
- ;
437
- .global Xoodoo_Permute_12rounds
438
- Xoodoo_Permute_12rounds:
439
- ldi rZ+0, lo8(Xoodoo_RoundConstants_12)
440
- ldi rZ+1, hi8(Xoodoo_RoundConstants_12)
441
- Xoodoo_Permute:
442
- push r2
443
- push r3
444
- push r4
445
- push r5
446
- push r6
447
- push r7
448
- push r8
449
- push r9
450
- push r10
451
- push r11
452
- push r12
453
- push r13
454
- push r14
455
- push r15
456
- push r16
457
- push r17
458
- push r28
459
- push r29
460
-
461
- ; Initial Prepare Theta
462
- movw rY, rpState
463
- ld rC0+0, Y+ ; a00
464
- ld rC0+1, Y+
465
- ld rC0+2, Y+
466
- ld rC0+3, Y+
467
- ld rC1+0, Y+ ; a01
468
- ld rC1+1, Y+
469
- ld rC1+2, Y+
470
- ld rC1+3, Y+
471
- ld rC2+0, Y+ ; a02
472
- ld rC2+1, Y+
473
- ld rC2+2, Y+
474
- ld rC2+3, Y+
475
- ld rC3+0, Y+ ; a03
476
- ld rC3+1, Y+
477
- ld rC3+2, Y+
478
- ld rC3+3, Y+
479
-
480
- ld r0, Y+ ; a10
481
- eor rC0+0, r0
482
- ld r0, Y+
483
- eor rC0+1, r0
484
- ld r0, Y+
485
- eor rC0+2, r0
486
- ld r0, Y+
487
- eor rC0+3, r0
488
- ld r0, Y+ ; a11
489
- eor rC1+0, r0
490
- ld r0, Y+
491
- eor rC1+1, r0
492
- ld r0, Y+
493
- eor rC1+2, r0
494
- ld r0, Y+
495
- eor rC1+3, r0
496
- ld r0, Y+ ; a12
497
- eor rC2+0, r0
498
- ld r0, Y+
499
- eor rC2+1, r0
500
- ld r0, Y+
501
- eor rC2+2, r0
502
- ld r0, Y+
503
- eor rC2+3, r0
504
- ld r0, Y+ ; a13
505
- eor rC3+0, r0
506
- ld r0, Y+
507
- eor rC3+1, r0
508
- ld r0, Y+
509
- eor rC3+2, r0
510
- ld r0, Y+
511
- eor rC3+3, r0
512
-
513
- ld r0, Y+ ; a20
514
- eor rC0+0, r0
515
- ld r0, Y+
516
- eor rC0+1, r0
517
- ld r0, Y+
518
- eor rC0+2, r0
519
- ld r0, Y+
520
- eor rC0+3, r0
521
- ld r0, Y+ ; a21
522
- eor rC1+0, r0
523
- ld r0, Y+
524
- eor rC1+1, r0
525
- ld r0, Y+
526
- eor rC1+2, r0
527
- ld r0, Y+
528
- eor rC1+3, r0
529
- ld r0, Y+ ; a22
530
- eor rC2+0, r0
531
- ld r0, Y+
532
- eor rC2+1, r0
533
- ld r0, Y+
534
- eor rC2+2, r0
535
- ld r0, Y+
536
- eor rC2+3, r0
537
- ld r0, Y+ ; a23
538
- eor rC3+0, r0
539
- ld r0, Y+
540
- eor rC3+1, r0
541
- ld r0, Y+
542
- eor rC3+2, r0
543
- ld r0, Y+
544
- eor rC3+3, r0
545
- sbiw rY, 48
546
-
547
- Xoodoo_RoundLoop:
548
- ; Theta + Rho west
549
- ; c0 = ROTL32(c0 ^ ROTL32(c0, 9), 5);
550
- mov rVv+1, rC0+0 ; rol 9
551
- mov rVv+2, rC0+1
552
- mov rVv+3, rC0+2
553
- mov rVv+0, rC0+3
554
- lsl rVv+0
555
- rol rVv+1
556
- rol rVv+2
557
- rol rVv+3
558
- adc rVv+0, zero
559
- eor rVv+0, rC0+0
560
- eor rVv+1, rC0+1
561
- eor rVv+2, rC0+2
562
- eor rVv+3, rC0+3
563
- bst rVv, 0 ; rol 5 (= ror 3 + rol 8)
564
- ror rVv+3
565
- ror rVv+2
566
- ror rVv+1
567
- ror rVv
568
- bld rVv+3, 7
569
- bst rVv, 0
570
- ror rVv+3
571
- ror rVv+2
572
- ror rVv+1
573
- ror rVv
574
- bld rVv+3, 7
575
- bst rVv, 0
576
- ror rVv+3
577
- ror rVv+2
578
- ror rVv+1
579
- ror rVv
580
- bld rVv+3, 7
581
- mov rC0+0, rVv+3
582
- mov rC0+1, rVv+0
583
- mov rC0+2, rVv+1
584
- mov rC0+3, rVv+2
585
-
586
- ; c1 = ROTL32(c1 ^ ROTL32(c1, 9), 5);
587
- mov rVv+1, rC1+0 ; rol 9
588
- mov rVv+2, rC1+1
589
- mov rVv+3, rC1+2
590
- mov rVv+0, rC1+3
591
- lsl rVv+0
592
- rol rVv+1
593
- rol rVv+2
594
- rol rVv+3
595
- adc rVv+0, zero
596
- eor rVv+0, rC1+0
597
- eor rVv+1, rC1+1
598
- eor rVv+2, rC1+2
599
- eor rVv+3, rC1+3
600
- bst rVv, 0 ; rol 5 (= ror 3 + rol 8)
601
- ror rVv+3
602
- ror rVv+2
603
- ror rVv+1
604
- ror rVv
605
- bld rVv+3, 7
606
- bst rVv, 0
607
- ror rVv+3
608
- ror rVv+2
609
- ror rVv+1
610
- ror rVv
611
- bld rVv+3, 7
612
- bst rVv, 0
613
- ror rVv+3
614
- ror rVv+2
615
- ror rVv+1
616
- ror rVv
617
- bld rVv+3, 7
618
- mov rC1+0, rVv+3
619
- mov rC1+1, rVv+0
620
- mov rC1+2, rVv+1
621
- mov rC1+3, rVv+2
622
-
623
- ; c2 = ROTL32(c2 ^ ROTL32(c2, 9), 5);
624
- mov rVv+1, rC2+0 ; rol 9
625
- mov rVv+2, rC2+1
626
- mov rVv+3, rC2+2
627
- mov rVv+0, rC2+3
628
- lsl rVv+0
629
- rol rVv+1
630
- rol rVv+2
631
- rol rVv+3
632
- adc rVv+0, zero
633
- eor rVv+0, rC2+0
634
- eor rVv+1, rC2+1
635
- eor rVv+2, rC2+2
636
- eor rVv+3, rC2+3
637
- bst rVv, 0 ; rol 5 (= ror 3 + rol 8)
638
- ror rVv+3
639
- ror rVv+2
640
- ror rVv+1
641
- ror rVv
642
- bld rVv+3, 7
643
- bst rVv, 0
644
- ror rVv+3
645
- ror rVv+2
646
- ror rVv+1
647
- ror rVv
648
- bld rVv+3, 7
649
- bst rVv, 0
650
- ror rVv+3
651
- ror rVv+2
652
- ror rVv+1
653
- ror rVv
654
- bld rVv+3, 7
655
- mov rC2+0, rVv+3
656
- mov rC2+1, rVv+0
657
- mov rC2+2, rVv+1
658
- mov rC2+3, rVv+2
659
-
660
- ; c3 = ROTL32(c3 ^ ROTL32(c3, 9), 5);
661
- mov rVv+1, rC3+0 ; rol 9
662
- mov rVv+2, rC3+1
663
- mov rVv+3, rC3+2
664
- mov rVv+0, rC3+3
665
- lsl rVv+0
666
- rol rVv+1
667
- rol rVv+2
668
- rol rVv+3
669
- adc rVv+0, zero
670
- eor rVv+0, rC3+0
671
- eor rVv+1, rC3+1
672
- eor rVv+2, rC3+2
673
- eor rVv+3, rC3+3
674
- bst rVv, 0 ; rol 5 (= ror 3 + rol 8)
675
- ror rVv+3
676
- ror rVv+2
677
- ror rVv+1
678
- ror rVv
679
- bld rVv+3, 7
680
- bst rVv, 0
681
- ror rVv+3
682
- ror rVv+2
683
- ror rVv+1
684
- ror rVv
685
- bld rVv+3, 7
686
- bst rVv, 0
687
- ror rVv+3
688
- ror rVv+2
689
- ror rVv+1
690
- ror rVv
691
- bld rVv+3, 7
692
- mov rC3+0, rVv+3
693
- mov rC3+1, rVv+0
694
- mov rC3+2, rVv+1
695
- mov rC3+3, rVv+2
696
-
697
- ; v1 = a13;
698
- ldd rVv+0, Y+a13+0
699
- ldd rVv+1, Y+a13+1
700
- ldd rVv+2, Y+a13+2
701
- ldd rVv+3, Y+a13+3
702
-
703
- ; a13 = a12 ^ c1;
704
- ldd r0, Y+a12+0
705
- eor r0, rC1+0
706
- std Y+a13+0, r0
707
- ldd r0, Y+a12+1
708
- eor r0, rC1+1
709
- std Y+a13+1, r0
710
- ldd r0, Y+a12+2
711
- eor r0, rC1+2
712
- std Y+a13+2, r0
713
- ldd r0, Y+a12+3
714
- eor r0, rC1+3
715
- std Y+a13+3, r0
716
-
717
- ; a12 = a11 ^ c0;
718
- ldd r0, Y+a11+0
719
- eor r0, rC0+0
720
- std Y+a12+0, r0
721
- ldd r0, Y+a11+1
722
- eor r0, rC0+1
723
- std Y+a12+1, r0
724
- ldd r0, Y+a11+2
725
- eor r0, rC0+2
726
- std Y+a12+2, r0
727
- ldd r0, Y+a11+3
728
- eor r0, rC0+3
729
- std Y+a12+3, r0
730
-
731
- ; a11 = a10 ^ c3;
732
- ldd r0, Y+a10+0
733
- eor r0, rC3+0
734
- std Y+a11+0, r0
735
- ldd r0, Y+a10+1
736
- eor r0, rC3+1
737
- std Y+a11+1, r0
738
- ldd r0, Y+a10+2
739
- eor r0, rC3+2
740
- std Y+a11+2, r0
741
- ldd r0, Y+a10+3
742
- eor r0, rC3+3
743
- std Y+a11+3, r0
744
-
745
- ; a10 = v1 ^ c2;
746
- eor rVv+0, rC2+0
747
- std Y+a10+0, rVv+0
748
- eor rVv+1, rC2+1
749
- std Y+a10+1, rVv+1
750
- eor rVv+2, rC2+2
751
- std Y+a10+2, rVv+2
752
- eor rVv+3, rC2+3
753
- std Y+a10+3, rVv+3
754
-
755
- ; a20 = ROTL32(a20 ^ c3, 11);
756
- ldd rVv+0, Y+a20+3
757
- eor rVv+0, rC3+3
758
- ldd rVv+1, Y+a20+0
759
- eor rVv+1, rC3+0
760
- ldd rVv+2, Y+a20+1
761
- eor rVv+2, rC3+1
762
- ldd rVv+3, Y+a20+2
763
- eor rVv+3, rC3+2
764
- lsl rVv+0
765
- rol rVv+1
766
- rol rVv+2
767
- rol rVv+3
768
- adc rVv+0, zero
769
- lsl rVv+0
770
- rol rVv+1
771
- rol rVv+2
772
- rol rVv+3
773
- adc rVv+0, zero
774
- lsl rVv+0
775
- rol rVv+1
776
- rol rVv+2
777
- rol rVv+3
778
- adc rVv+0, zero
779
- std Y+a20+0, rVv+0
780
- std Y+a20+1, rVv+1
781
- std Y+a20+2, rVv+2
782
- std Y+a20+3, rVv+3
783
-
784
- ; a21 = ROTL32(a21 ^ c0, 11);
785
- ldd rVv+0, Y+a21+3
786
- eor rVv+0, rC0+3
787
- ldd rVv+1, Y+a21+0
788
- eor rVv+1, rC0+0
789
- ldd rVv+2, Y+a21+1
790
- eor rVv+2, rC0+1
791
- ldd rVv+3, Y+a21+2
792
- eor rVv+3, rC0+2
793
- lsl rVv+0
794
- rol rVv+1
795
- rol rVv+2
796
- rol rVv+3
797
- adc rVv+0, zero
798
- lsl rVv+0
799
- rol rVv+1
800
- rol rVv+2
801
- rol rVv+3
802
- adc rVv+0, zero
803
- lsl rVv+0
804
- rol rVv+1
805
- rol rVv+2
806
- rol rVv+3
807
- adc rVv+0, zero
808
- std Y+a21+0, rVv+0
809
- std Y+a21+1, rVv+1
810
- std Y+a21+2, rVv+2
811
- std Y+a21+3, rVv+3
812
-
813
- ; a22 = ROTL32(a22 ^ c1, 11);
814
- ldd rVv+0, Y+a22+3
815
- eor rVv+0, rC1+3
816
- ldd rVv+1, Y+a22+0
817
- eor rVv+1, rC1+0
818
- ldd rVv+2, Y+a22+1
819
- eor rVv+2, rC1+1
820
- ldd rVv+3, Y+a22+2
821
- eor rVv+3, rC1+2
822
- lsl rVv+0
823
- rol rVv+1
824
- rol rVv+2
825
- rol rVv+3
826
- adc rVv+0, zero
827
- lsl rVv+0
828
- rol rVv+1
829
- rol rVv+2
830
- rol rVv+3
831
- adc rVv+0, zero
832
- lsl rVv+0
833
- rol rVv+1
834
- rol rVv+2
835
- rol rVv+3
836
- adc rVv+0, zero
837
- std Y+a22+0, rVv+0
838
- std Y+a22+1, rVv+1
839
- std Y+a22+2, rVv+2
840
- std Y+a22+3, rVv+3
841
-
842
- ; a23 = ROTL32(a23 ^ c2, 11);
843
- ldd rVv+0, Y+a23+3
844
- eor rVv+0, rC2+3
845
- ldd rVv+1, Y+a23+0
846
- eor rVv+1, rC2+0
847
- ldd rVv+2, Y+a23+1
848
- eor rVv+2, rC2+1
849
- ldd rVv+3, Y+a23+2
850
- eor rVv+3, rC2+2
851
- lsl rVv+0
852
- rol rVv+1
853
- rol rVv+2
854
- rol rVv+3
855
- adc rVv+0, zero
856
- lsl rVv+0
857
- rol rVv+1
858
- rol rVv+2
859
- rol rVv+3
860
- adc rVv+0, zero
861
- lsl rVv+0
862
- rol rVv+1
863
- rol rVv+2
864
- rol rVv+3
865
- adc rVv+0, zero
866
- std Y+a23+0, rVv+0
867
- std Y+a23+1, rVv+1
868
- std Y+a23+2, rVv+2
869
- std Y+a23+3, rVv+3
870
-
871
- ; v1 = c3;
872
- movw rVv+0, rC3+0
873
- movw rVv+2, rC3+2
874
-
875
- ; c3 = a03 ^ c2; /* a03 resides in c3 */
876
- ldd rC3+0, Y+a03+0
877
- eor rC3+0, rC2+0
878
- ldd rC3+1, Y+a03+1
879
- eor rC3+1, rC2+1
880
- ldd rC3+2, Y+a03+2
881
- eor rC3+2, rC2+2
882
- ldd rC3+3, Y+a03+3
883
- eor rC3+3, rC2+3
884
-
885
- ; c2 = a02 ^ c1; /* a02 resides in c2 */
886
- ldd rC2+0, Y+a02+0
887
- eor rC2+0, rC1+0
888
- ldd rC2+1, Y+a02+1
889
- eor rC2+1, rC1+1
890
- ldd rC2+2, Y+a02+2
891
- eor rC2+2, rC1+2
892
- ldd rC2+3, Y+a02+3
893
- eor rC2+3, rC1+3
894
-
895
- ; c1 = a01 ^ c0; /* a01 resides in c1 */
896
- ldd rC1+0, Y+a01+0
897
- eor rC1+0, rC0+0
898
- ldd rC1+1, Y+a01+1
899
- eor rC1+1, rC0+1
900
- ldd rC1+2, Y+a01+2
901
- eor rC1+2, rC0+2
902
- ldd rC1+3, Y+a01+3
903
- eor rC1+3, rC0+3
904
-
905
- ; c0 = a00 ^ v1; /* a00 resides in c0 */
906
- ldd rC0+0, Y+a00+0
907
- eor rC0+0, rVv+0
908
- ldd rC0+1, Y+a00+1
909
- eor rC0+1, rVv+1
910
- ldd rC0+2, Y+a00+2
911
- eor rC0+2, rVv+2
912
- ldd rC0+3, Y+a00+3
913
- eor rC0+3, rVv+3
914
-
915
- ; c0 ^= __rc; /* +Iota */
916
- lpm rVv+0, Z+
917
- lpm rVv+1, Z+
918
- eor rC0+0, rVv+0
919
- eor rC0+1, rVv+1
920
-
921
- ; Chi + Rho east + Early Theta
922
- ; a00 = c0 ^= ~a10 & a20;
923
- ldd r0, Y+a10+0
924
- com r0
925
- ldd rTt+0, Y+a20+0 ; a20 in rTt
926
- and r0, rTt+0
927
- eor rC0+0, r0
928
- std Y+a00+0, rC0+0
929
- ldd r0, Y+a10+1
930
- com r0
931
- ldd rTt+1, Y+a20+1
932
- and r0, rTt+1
933
- eor rC0+1, r0
934
- std Y+a00+1, rC0+1
935
- ldd r0, Y+a10+2
936
- com r0
937
- ldd rTt+2, Y+a20+2
938
- and r0, rTt+2
939
- eor rC0+2, r0
940
- std Y+a00+2, rC0+2
941
- ldd r0, Y+a10+3
942
- com r0
943
- ldd rTt+3, Y+a20+3
944
- and r0, rTt+3
945
- eor rC0+3, r0
946
- std Y+a00+3, rC0+3
947
-
948
- ; a10 ^= ~a20 & c0;
949
- com rTt+0
950
- and rTt+0, rC0+0
951
- ldd r0, Y+a10+0
952
- eor rTt+0, r0 ; new a10 in rTt
953
- std Y+a10+0, rTt+0
954
- com rTt+1
955
- and rTt+1, rC0+1
956
- ldd r0, Y+a10+1
957
- eor rTt+1, r0
958
- std Y+a10+1, rTt+1
959
- com rTt+2
960
- and rTt+2, rC0+2
961
- ldd r0, Y+a10+2
962
- eor rTt+2, r0
963
- std Y+a10+2, rTt+2
964
- com rTt+3
965
- and rTt+3, rC0+3
966
- ldd r0, Y+a10+3
967
- eor rTt+3, r0
968
- std Y+a10+3, rTt+3
969
-
970
- ; v1(a20) = ROTL32(a20 ^ ~c0 & a10, 8);
971
- movw rVv+0, rTt+0 ; a10 in rVv
972
- movw rVv+2, rTt+2
973
- mov r0, rC0+0
974
- com r0
975
- and rTt+0, r0
976
- ldd r0, Y+a20+0
977
- eor rTt+0, r0
978
-
979
- mov r0, rC0+1
980
- com r0
981
- and rTt+1, r0
982
- ldd r0, Y+a20+1
983
- eor rTt+1, r0
984
-
985
- mov r0, rC0+2
986
- com r0
987
- and rTt+2, r0
988
- ldd r0, Y+a20+2
989
- eor rTt+2, r0
990
-
991
- mov r0, rC0+3
992
- com r0
993
- and rTt+3, r0
994
- ldd r0, Y+a20+3
995
- eor rTt+3, r0
996
- std Y+a20+0, rTt+3
997
- std Y+a20+1, rTt+0
998
- std Y+a20+2, rTt+1
999
- std Y+a20+3, rTt+2
1000
-
1001
- ; c0 ^= a10 = ROTL32(a10, 1);
1002
- lsl rVv+0
1003
- rol rVv+1
1004
- std Y+a10+1, rVv+1
1005
- eor rC0+1, rVv+1
1006
- rol rVv+2
1007
- std Y+a10+2, rVv+2
1008
- eor rC0+2, rVv+2
1009
- rol rVv+3
1010
- std Y+a10+3, rVv+3
1011
- eor rC0+3, rVv+3
1012
- adc rVv+0, zero
1013
- std Y+a10+0, rVv+0
1014
- eor rC0+0, rVv+0
1015
-
1016
- ; a02 = c2 ^= ~a12 & a22;
1017
- ldd r0, Y+a12+0
1018
- com r0
1019
- ldd rVv+0, Y+a22+0 ; a22 in rVv
1020
- and r0, rVv+0
1021
- eor rC2+0, r0
1022
- std Y+a02+0, rC2+0
1023
- ldd r0, Y+a12+1
1024
- com r0
1025
- ldd rVv+1, Y+a22+1
1026
- and r0, rVv+1
1027
- eor rC2+1, r0
1028
- std Y+a02+1, rC2+1
1029
- ldd r0, Y+a12+2
1030
- com r0
1031
- ldd rVv+2, Y+a22+2
1032
- and r0, rVv+2
1033
- eor rC2+2, r0
1034
- std Y+a02+2, rC2+2
1035
- ldd r0, Y+a12+3
1036
- com r0
1037
- ldd rVv+3, Y+a22+3
1038
- and r0, rVv+3
1039
- eor rC2+3, r0
1040
- std Y+a02+3, rC2+3
1041
-
1042
- ; a12 ^= ~a22 & c2;
1043
- mov r0, rVv+0 ; a12 in rTt
1044
- com r0
1045
- and r0, rC2+0
1046
- ldd rTt+0, Y+a12+0
1047
- eor rTt+0, r0
1048
- std Y+a12+0, rTt+0
1049
- mov r0, rVv+1
1050
- com r0
1051
- and r0, rC2+1
1052
- ldd rTt+1, Y+a12+1
1053
- eor rTt+1, r0
1054
- std Y+a12+1, rTt+1
1055
- mov r0, rVv+2
1056
- com r0
1057
- and r0, rC2+2
1058
- ldd rTt+2, Y+a12+2
1059
- eor rTt+2, r0
1060
- std Y+a12+2, rTt+2
1061
- mov r0, rVv+3
1062
- com r0
1063
- and r0, rC2+3
1064
- ldd rTt+3, Y+a12+3
1065
- eor rTt+3, r0
1066
- std Y+a12+3, rTt+3
1067
-
1068
- ; c0 ^= a20 = ROTL32(a22 ^ ~c2 & a12, 8);
1069
- mov r0, rC2+0
1070
- com r0
1071
- and r0, rTt+0
1072
- eor r0, rVv+0
1073
- ldd rVv+0, Y+a20+1 ; rVv = a22
1074
- std Y+a20+1, r0
1075
- eor rC0+1, r0
1076
- mov r0, rC2+1
1077
- com r0
1078
- and r0, rTt+1
1079
- eor r0, rVv+1
1080
- ldd rVv+1, Y+a20+2
1081
- std Y+a20+2, r0
1082
- eor rC0+2, r0
1083
- mov r0, rC2+2
1084
- com r0
1085
- and r0, rTt+2
1086
- eor r0, rVv+2
1087
- ldd rVv+2, Y+a20+3
1088
- std Y+a20+3, r0
1089
- eor rC0+3, r0
1090
- mov r0, rC2+3
1091
- com r0
1092
- and r0, rTt+3
1093
- eor r0, rVv+3
1094
- ldd rVv+3, Y+a20+0
1095
- std Y+a20+0, r0
1096
- eor rC0+0, r0
1097
-
1098
- ; c2 ^= a12 = ROTL32(a12, 1);
1099
- lsl rTt+0
1100
- rol rTt+1
1101
- eor rC2+1, rTt+1
1102
- std Y+a12+1, rTt+1
1103
- rol rTt+2
1104
- eor rC2+2, rTt+2
1105
- std Y+a12+2, rTt+2
1106
- rol rTt+3
1107
- eor rC2+3, rTt+3
1108
- std Y+a12+3, rTt+3
1109
- adc rTt+0, zero
1110
- eor rC2+0, rTt+0
1111
- std Y+a12+0, rTt+0
1112
-
1113
- ; a22 = v1;
1114
- std Y+a22+0, rVv+3
1115
- std Y+a22+1, rVv+0
1116
- std Y+a22+2, rVv+1
1117
- std Y+a22+3, rVv+2
1118
-
1119
- ; c2 ^= v1;
1120
- eor rC2+0, rVv+3
1121
- eor rC2+1, rVv+0
1122
- eor rC2+2, rVv+1
1123
- eor rC2+3, rVv+2
1124
-
1125
- ; a01 = c1 ^= ~a11 & a21;
1126
- ldd rTt+0, Y+a11+0 ;rTt holds a11
1127
- mov r0, rTt+0
1128
- com r0
1129
- ldd rVv+0, Y+a21+0 ;rVv holds a21
1130
- and r0, rVv+0
1131
- eor rC1+0, r0
1132
- std Y+a01+0, rC1+0
1133
- ldd rTt+1, Y+a11+1
1134
- mov r0, rTt+1
1135
- com r0
1136
- ldd rVv+1, Y+a21+1
1137
- and r0, rVv+1
1138
- eor rC1+1, r0
1139
- std Y+a01+1, rC1+1
1140
- ldd rTt+2, Y+a11+2
1141
- mov r0, rTt+2
1142
- com r0
1143
- ldd rVv+2, Y+a21+2
1144
- and r0, rVv+2
1145
- eor rC1+2, r0
1146
- std Y+a01+2, rC1+2
1147
- ldd rTt+3, Y+a11+3
1148
- mov r0, rTt+3
1149
- com r0
1150
- ldd rVv+3, Y+a21+3
1151
- and r0, rVv+3
1152
- eor rC1+3, r0
1153
- std Y+a01+3, rC1+3
1154
-
1155
- ; a11 ^= ~a21 & c1;
1156
- mov r0, rVv+0
1157
- com r0
1158
- and r0, rC1+0
1159
- eor rTt+0, r0
1160
- std Y+a11+0, rTt+0
1161
- mov r0, rVv+1
1162
- com r0
1163
- and r0, rC1+1
1164
- eor rTt+1, r0
1165
- std Y+a11+1, rTt+1
1166
- mov r0, rVv+2
1167
- com r0
1168
- and r0, rC1+2
1169
- eor rTt+2, r0
1170
- std Y+a11+2, rTt+2
1171
- mov r0, rVv+3
1172
- com r0
1173
- and r0, rC1+3
1174
- eor rTt+3, r0
1175
- std Y+a11+3, rTt+3
1176
-
1177
- ; v1 = ROTL32(a21 ^ ~c1 & a11, 8);
1178
- mov r0, rC1+0
1179
- com r0
1180
- and r0, rTt+0
1181
- eor rVv+0, r0 ; v1 not yet ROTL32'ed(8)
1182
- mov r0, rC1+1
1183
- com r0
1184
- and r0, rTt+1
1185
- eor rVv+1, r0
1186
- mov r0, rC1+2
1187
- com r0
1188
- and r0, rTt+2
1189
- eor rVv+2, r0
1190
- mov r0, rC1+3
1191
- com r0
1192
- and r0, rTt+3
1193
- eor rVv+3, r0
1194
-
1195
- ; c1 ^= a11 = ROTL32(a11, 1);
1196
- lsl rTt+0
1197
- rol rTt+1
1198
- eor rC1+1, rTt+1
1199
- std Y+a11+1, rTt+1
1200
- rol rTt+2
1201
- eor rC1+2, rTt+2
1202
- std Y+a11+2, rTt+2
1203
- rol rTt+3
1204
- eor rC1+3, rTt+3
1205
- std Y+a11+3, rTt+3
1206
- adc rTt+0, zero
1207
- eor rC1+0, rTt+0
1208
- std Y+a11+0, rTt+0
1209
-
1210
- ; a03 = c3 ^= ~a13 & a23;
1211
- ldd r0, Y+a13+0
1212
- com r0
1213
- ldd rTt+0, Y+a23+0 ; a23 in rTt
1214
- and r0, rTt+0
1215
- eor rC3+0, r0
1216
- std Y+a03+0, rC3+0
1217
- ldd r0, Y+a13+1
1218
- com r0
1219
- ldd rTt+1, Y+a23+1
1220
- and r0, rTt+1
1221
- eor rC3+1, r0
1222
- std Y+a03+1, rC3+1
1223
- ldd r0, Y+a13+2
1224
- com r0
1225
- ldd rTt+2, Y+a23+2
1226
- and r0, rTt+2
1227
- eor rC3+2, r0
1228
- std Y+a03+2, rC3+2
1229
- ldd r0, Y+a13+3
1230
- com r0
1231
- ldd rTt+3, Y+a23+3
1232
- and r0, rTt+3
1233
- eor rC3+3, r0
1234
- std Y+a03+3, rC3+3
1235
-
1236
- ; a13 ^= ~a23 & c3;
1237
- mov r0, rTt+0
1238
- com r0
1239
- and r0, rC3+0
1240
- ldd rTt+0, Y+a13+0 ; a13 in rTt
1241
- eor rTt+0, r0
1242
- mov r0, rTt+1
1243
- com r0
1244
- and r0, rC3+1
1245
- ldd rTt+1, Y+a13+1
1246
- eor rTt+1, r0
1247
- mov r0, rTt+2
1248
- com r0
1249
- and r0, rC3+2
1250
- ldd rTt+2, Y+a13+2
1251
- eor rTt+2, r0
1252
- mov r0, rTt+3
1253
- com r0
1254
- and r0, rC3+3
1255
- ldd rTt+3, Y+a13+3
1256
- eor rTt+3, r0
1257
-
1258
- ; c1 ^= a21 = ROTL32(a23 ^ ~c3 & a13, 8);
1259
- push rVv
1260
- mov r0, rC3+0
1261
- com r0
1262
- and r0, rTt+0
1263
- ldd rVv, Y+a23+0
1264
- eor r0, rVv
1265
- eor rC1+1, r0
1266
- std Y+a21+1, r0
1267
- mov r0, rC3+1
1268
- com r0
1269
- and r0, rTt+1
1270
- ldd rVv, Y+a23+1
1271
- eor r0, rVv
1272
- eor rC1+2, r0
1273
- std Y+a21+2, r0
1274
- mov r0, rC3+2
1275
- com r0
1276
- and r0, rTt+2
1277
- ldd rVv, Y+a23+2
1278
- eor r0, rVv
1279
- eor rC1+3, r0
1280
- std Y+a21+3, r0
1281
- mov r0, rC3+3
1282
- com r0
1283
- and r0, rTt+3
1284
- ldd rVv, Y+a23+3
1285
- eor r0, rVv
1286
- eor rC1+0, r0
1287
- std Y+a21+0, r0
1288
- pop rVv
1289
-
1290
- ; a23 = v1;
1291
- std Y+a23+0, rVv+3 ; rol8(rVv)
1292
- std Y+a23+1, rVv+0
1293
- std Y+a23+2, rVv+1
1294
- std Y+a23+3, rVv+2
1295
-
1296
- ; c3 ^= v1;
1297
- eor rC3+0, rVv+3
1298
- eor rC3+1, rVv+0
1299
- eor rC3+2, rVv+1
1300
- eor rC3+3, rVv+2
1301
-
1302
- ; c3 ^= a13 = ROTL32(a13, 1);
1303
- lsl rTt+0
1304
- rol rTt+1
1305
- std Y+a13+1, rTt+1
1306
- eor rC3+1, rTt+1
1307
- rol rTt+2
1308
- std Y+a13+2, rTt+2
1309
- eor rC3+2, rTt+2
1310
- rol rTt+3
1311
- std Y+a13+3, rTt+3
1312
- eor rC3+3, rTt+3
1313
- adc rTt+0, zero
1314
- std Y+a13+0, rTt+0
1315
- eor rC3+0, rTt+0
1316
-
1317
- ; Check for terminator
1318
- lpm r0, Z
1319
- inc r0
1320
- breq Xoodoo_Done
1321
- rjmp Xoodoo_RoundLoop
1322
- Xoodoo_Done:
1323
- pop r29
1324
- pop r28
1325
- pop r17
1326
- pop r16
1327
- pop r15
1328
- pop r14
1329
- pop r13
1330
- pop r12
1331
- pop r11
1332
- pop r10
1333
- pop r9
1334
- pop r8
1335
- pop r7
1336
- pop r6
1337
- pop r5
1338
- pop r4
1339
- pop r3
1340
- pop r2
1341
- ret