sleeping_kangaroo12 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (284) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +127 -0
  3. data/ext/Rakefile +73 -0
  4. data/ext/binding/sleeping_kangaroo12.c +39 -0
  5. data/ext/config/xkcp.build +17 -0
  6. data/ext/xkcp/LICENSE +1 -0
  7. data/ext/xkcp/Makefile +15 -0
  8. data/ext/xkcp/Makefile.build +200 -0
  9. data/ext/xkcp/README.markdown +296 -0
  10. data/ext/xkcp/lib/HighLevel.build +143 -0
  11. data/ext/xkcp/lib/LowLevel.build +757 -0
  12. data/ext/xkcp/lib/common/align.h +33 -0
  13. data/ext/xkcp/lib/common/brg_endian.h +143 -0
  14. data/ext/xkcp/lib/high/KangarooTwelve/KangarooTwelve.c +301 -0
  15. data/ext/xkcp/lib/high/KangarooTwelve/KangarooTwelve.h +97 -0
  16. data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.c +81 -0
  17. data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.h +125 -0
  18. data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.c +48 -0
  19. data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.h +79 -0
  20. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.c +81 -0
  21. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.h +73 -0
  22. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.inc +195 -0
  23. data/ext/xkcp/lib/high/Keccak/KeccakSponge.c +111 -0
  24. data/ext/xkcp/lib/high/Keccak/KeccakSponge.h +76 -0
  25. data/ext/xkcp/lib/high/Keccak/KeccakSponge.inc +314 -0
  26. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.c +61 -0
  27. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.h +67 -0
  28. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.inc +128 -0
  29. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.c +93 -0
  30. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.h +599 -0
  31. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.inc +573 -0
  32. data/ext/xkcp/lib/high/Ketje/Ketjev2.c +87 -0
  33. data/ext/xkcp/lib/high/Ketje/Ketjev2.h +88 -0
  34. data/ext/xkcp/lib/high/Ketje/Ketjev2.inc +274 -0
  35. data/ext/xkcp/lib/high/Keyak/Keyakv2.c +132 -0
  36. data/ext/xkcp/lib/high/Keyak/Keyakv2.h +217 -0
  37. data/ext/xkcp/lib/high/Keyak/Keyakv2.inc +81 -0
  38. data/ext/xkcp/lib/high/Keyak/Motorist.inc +953 -0
  39. data/ext/xkcp/lib/high/Kravatte/Kravatte.c +533 -0
  40. data/ext/xkcp/lib/high/Kravatte/Kravatte.h +115 -0
  41. data/ext/xkcp/lib/high/Kravatte/KravatteModes.c +557 -0
  42. data/ext/xkcp/lib/high/Kravatte/KravatteModes.h +247 -0
  43. data/ext/xkcp/lib/high/Xoodyak/Cyclist.h +66 -0
  44. data/ext/xkcp/lib/high/Xoodyak/Cyclist.inc +336 -0
  45. data/ext/xkcp/lib/high/Xoodyak/Xoodyak-parameters.h +26 -0
  46. data/ext/xkcp/lib/high/Xoodyak/Xoodyak.c +55 -0
  47. data/ext/xkcp/lib/high/Xoodyak/Xoodyak.h +35 -0
  48. data/ext/xkcp/lib/high/Xoofff/Xoofff.c +634 -0
  49. data/ext/xkcp/lib/high/Xoofff/Xoofff.h +147 -0
  50. data/ext/xkcp/lib/high/Xoofff/XoofffModes.c +483 -0
  51. data/ext/xkcp/lib/high/Xoofff/XoofffModes.h +241 -0
  52. data/ext/xkcp/lib/high/common/Phases.h +25 -0
  53. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-SnP.h +41 -0
  54. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-armcc.s +1666 -0
  55. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-gcc.s +1655 -0
  56. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-armcc.s +1268 -0
  57. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-gcc.s +1264 -0
  58. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-armcc.s +1178 -0
  59. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-gcc.s +1175 -0
  60. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-armcc.s +1338 -0
  61. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-gcc.s +1336 -0
  62. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-armcc.s +1343 -0
  63. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-gcc.s +1339 -0
  64. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-SnP.h +42 -0
  65. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-armcc.s +823 -0
  66. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-gcc.s +831 -0
  67. data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-SnP.h +31 -0
  68. data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-armv8a-neon.s +540 -0
  69. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-SnP.h +42 -0
  70. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-compact.s +733 -0
  71. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-fast.s +1121 -0
  72. data/ext/xkcp/lib/low/KeccakP-1600/AVX2/KeccakP-1600-AVX2.s +1100 -0
  73. data/ext/xkcp/lib/low/KeccakP-1600/AVX2/KeccakP-1600-SnP.h +52 -0
  74. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-AVX512.c +623 -0
  75. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-SnP.h +47 -0
  76. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u12/KeccakP-1600-AVX512-config.h +6 -0
  77. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u6/KeccakP-1600-AVX512-config.h +6 -0
  78. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/ua/KeccakP-1600-AVX512-config.h +6 -0
  79. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-AVX512.s +1031 -0
  80. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-SnP.h +53 -0
  81. data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-SnP.h +44 -0
  82. data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-XOP.c +476 -0
  83. data/ext/xkcp/lib/low/KeccakP-1600/XOP/u6/KeccakP-1600-XOP-config.h +6 -0
  84. data/ext/xkcp/lib/low/KeccakP-1600/XOP/ua/KeccakP-1600-XOP-config.h +6 -0
  85. data/ext/xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-64.macros +748 -0
  86. data/ext/xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-unrolling.macros +305 -0
  87. data/ext/xkcp/lib/low/KeccakP-1600/compact/KeccakP-1600-SnP.h +40 -0
  88. data/ext/xkcp/lib/low/KeccakP-1600/compact/KeccakP-1600-compact64.c +420 -0
  89. data/ext/xkcp/lib/low/KeccakP-1600/plain-32bits-inplace/KeccakP-1600-SnP.h +43 -0
  90. data/ext/xkcp/lib/low/KeccakP-1600/plain-32bits-inplace/KeccakP-1600-inplace32BI.c +1163 -0
  91. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/KeccakP-1600-SnP.h +54 -0
  92. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/KeccakP-1600-opt64.c +565 -0
  93. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcu6/KeccakP-1600-opt64-config.h +7 -0
  94. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua/KeccakP-1600-opt64-config.h +7 -0
  95. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua-shld/KeccakP-1600-opt64-config.h +8 -0
  96. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/u6/KeccakP-1600-opt64-config.h +6 -0
  97. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/ua/KeccakP-1600-opt64-config.h +6 -0
  98. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-SnP.h +44 -0
  99. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference.h +23 -0
  100. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference32BI.c +625 -0
  101. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-SnP.h +44 -0
  102. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.c +440 -0
  103. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.h +23 -0
  104. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-SnP.h +42 -0
  105. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas.s +1196 -0
  106. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas_Apple.s +1124 -0
  107. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-shld-gas.s +1196 -0
  108. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-armcc.s +1392 -0
  109. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-gcc.s +1394 -0
  110. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-times2-SnP.h +42 -0
  111. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u12/SIMD512-2-config.h +7 -0
  112. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u4/SIMD512-2-config.h +7 -0
  113. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512ufull/SIMD512-2-config.h +7 -0
  114. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SIMD512.c +850 -0
  115. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SnP.h +51 -0
  116. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SIMD128.c +957 -0
  117. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SnP.h +49 -0
  118. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-u2/SIMD128-config.h +8 -0
  119. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-ua/SIMD128-config.h +8 -0
  120. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-u2/SIMD128-config.h +9 -0
  121. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-ua/SIMD128-config.h +9 -0
  122. data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-SnP.h +45 -0
  123. data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-on1.c +37 -0
  124. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SIMD256.c +1321 -0
  125. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SnP.h +55 -0
  126. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u12/SIMD256-config.h +7 -0
  127. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u6/SIMD256-config.h +7 -0
  128. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/ua/SIMD256-config.h +7 -0
  129. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u12/SIMD512-4-config.h +7 -0
  130. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u4/SIMD512-4-config.h +7 -0
  131. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512ufull/SIMD512-4-config.h +7 -0
  132. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SIMD512.c +881 -0
  133. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SnP.h +51 -0
  134. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-SnP.h +45 -0
  135. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-on1.c +37 -0
  136. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-SnP.h +45 -0
  137. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-on2.c +38 -0
  138. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SIMD512.c +1615 -0
  139. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SnP.h +57 -0
  140. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u12/SIMD512-config.h +7 -0
  141. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u4/SIMD512-config.h +7 -0
  142. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/ua/SIMD512-config.h +7 -0
  143. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-SnP.h +45 -0
  144. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-on1.c +37 -0
  145. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-SnP.h +45 -0
  146. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-on2.c +38 -0
  147. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-SnP.h +45 -0
  148. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-on4.c +38 -0
  149. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-SnP.h +41 -0
  150. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-armcc.s +442 -0
  151. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-gcc.s +446 -0
  152. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-armcc.s +419 -0
  153. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-gcc.s +427 -0
  154. data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-SnP.h +41 -0
  155. data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-avr8-fast.s +647 -0
  156. data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-SnP.h +39 -0
  157. data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-compact.c +190 -0
  158. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-SnP.h +43 -0
  159. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.c +412 -0
  160. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.h +23 -0
  161. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-SnP.h +41 -0
  162. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-armcc.s +454 -0
  163. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-gcc.s +458 -0
  164. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-armcc.s +455 -0
  165. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-gcc.s +458 -0
  166. data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-SnP.h +41 -0
  167. data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-avr8-fast.s +728 -0
  168. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-SnP.h +43 -0
  169. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.c +414 -0
  170. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.h +23 -0
  171. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-SnP.h +42 -0
  172. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-armcc.s +527 -0
  173. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-gcc.s +533 -0
  174. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-armcc.s +528 -0
  175. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-gcc.s +534 -0
  176. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-armcc.s +521 -0
  177. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-gcc.s +527 -0
  178. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-armcc.s +517 -0
  179. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-gcc.s +523 -0
  180. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-armcc.s +550 -0
  181. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-gcc.s +556 -0
  182. data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-SnP.h +32 -0
  183. data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-armv8a-neon.s +432 -0
  184. data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-SnP.h +42 -0
  185. data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-avr8-fast.s +929 -0
  186. data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-SnP.h +40 -0
  187. data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-compact.c +244 -0
  188. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-SnP.h +46 -0
  189. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32-bis.macros +184 -0
  190. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.c +454 -0
  191. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.macros +459 -0
  192. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling-bis.macros +83 -0
  193. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling.macros +88 -0
  194. data/ext/xkcp/lib/low/KeccakP-800/plain/lcu2/KeccakP-800-opt32-config.h +7 -0
  195. data/ext/xkcp/lib/low/KeccakP-800/plain/lcua/KeccakP-800-opt32-config.h +7 -0
  196. data/ext/xkcp/lib/low/KeccakP-800/plain/u2/KeccakP-800-opt32-config.h +7 -0
  197. data/ext/xkcp/lib/low/KeccakP-800/plain/ua/KeccakP-800-opt32-config.h +7 -0
  198. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-SnP.h +44 -0
  199. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.c +437 -0
  200. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.h +23 -0
  201. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/Ket.h +57 -0
  202. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-armcc.s +475 -0
  203. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-gcc.s +480 -0
  204. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-armcc.s +590 -0
  205. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-gcc.s +590 -0
  206. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.c +126 -0
  207. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.h +68 -0
  208. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.inc +174 -0
  209. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.c +80 -0
  210. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.h +68 -0
  211. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.inc +142 -0
  212. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-SnP.h +55 -0
  213. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-armcc.s +1086 -0
  214. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-gcc.s +1092 -0
  215. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-armcc.s +721 -0
  216. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-gcc.s +726 -0
  217. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-armcc.s +723 -0
  218. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-gcc.s +729 -0
  219. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-armcc.s +1164 -0
  220. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-gcc.s +1165 -0
  221. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-armcc.s +562 -0
  222. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-gcc.s +563 -0
  223. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-armcc.s +563 -0
  224. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-gcc.s +565 -0
  225. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-SnP.h +55 -0
  226. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-armcc.s +476 -0
  227. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-gcc.s +485 -0
  228. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-armcc.s +362 -0
  229. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-gcc.s +367 -0
  230. data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-SnP.h +43 -0
  231. data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-avr8-u1.s +1341 -0
  232. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SIMD512.c +581 -0
  233. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SnP.h +58 -0
  234. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodyak-full-block-SIMD512.c +332 -0
  235. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SIMD128.c +329 -0
  236. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SnP.h +53 -0
  237. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodyak-full-block-SIMD128.c +355 -0
  238. data/ext/xkcp/lib/low/Xoodoo/Xoodoo.h +79 -0
  239. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-SnP.h +56 -0
  240. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-optimized.c +399 -0
  241. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodyak-full-blocks.c +127 -0
  242. data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-SnP.h +43 -0
  243. data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-reference.c +253 -0
  244. data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SIMD512.c +1044 -0
  245. data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SnP.h +49 -0
  246. data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-SnP.h +45 -0
  247. data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-on1.c +37 -0
  248. data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-ARMv7A.s +1587 -0
  249. data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-SnP.h +48 -0
  250. data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SIMD512.c +1202 -0
  251. data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SnP.h +48 -0
  252. data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SIMD128.c +484 -0
  253. data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SnP.h +44 -0
  254. data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-SnP.h +45 -0
  255. data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-on1.c +37 -0
  256. data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SIMD256.c +939 -0
  257. data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SnP.h +49 -0
  258. data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SIMD512.c +1216 -0
  259. data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SnP.h +48 -0
  260. data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-SnP.h +45 -0
  261. data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-on1.c +37 -0
  262. data/ext/xkcp/lib/low/common/PlSnP-Fallback.inc +290 -0
  263. data/ext/xkcp/lib/low/common/SnP-Relaned.h +141 -0
  264. data/ext/xkcp/support/Build/ExpandProducts.xsl +79 -0
  265. data/ext/xkcp/support/Build/ToGlobalMakefile.xsl +206 -0
  266. data/ext/xkcp/support/Build/ToOneTarget.xsl +89 -0
  267. data/ext/xkcp/support/Build/ToTargetConfigFile.xsl +37 -0
  268. data/ext/xkcp/support/Build/ToTargetMakefile.xsl +298 -0
  269. data/ext/xkcp/support/Build/ToVCXProj.xsl +198 -0
  270. data/ext/xkcp/support/Kernel-PMU/Kernel-pmu.md +133 -0
  271. data/ext/xkcp/support/Kernel-PMU/Makefile +8 -0
  272. data/ext/xkcp/support/Kernel-PMU/enable_arm_pmu.c +129 -0
  273. data/ext/xkcp/support/Kernel-PMU/load-module +1 -0
  274. data/ext/xkcp/util/KeccakSum/KeccakSum.c +394 -0
  275. data/ext/xkcp/util/KeccakSum/base64.c +86 -0
  276. data/ext/xkcp/util/KeccakSum/base64.h +12 -0
  277. data/lib/sleeping_kangaroo12/binding.rb +15 -0
  278. data/lib/sleeping_kangaroo12/build/loader.rb +40 -0
  279. data/lib/sleeping_kangaroo12/build/platform.rb +37 -0
  280. data/lib/sleeping_kangaroo12/build.rb +4 -0
  281. data/lib/sleeping_kangaroo12/digest.rb +103 -0
  282. data/lib/sleeping_kangaroo12/version.rb +5 -0
  283. data/lib/sleeping_kangaroo12.rb +7 -0
  284. metadata +372 -0
@@ -0,0 +1,1341 @@
1
+ ;
2
+ ; The eXtended Keccak Code Package (XKCP)
3
+ ; https://github.com/XKCP/XKCP
4
+ ;
5
+ ; The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer.
6
+ ;
7
+ ; Implementation by Ronny Van Keer, hereby denoted as "the implementer".
8
+ ;
9
+ ; For more information, feedback or questions, please refer to the Keccak Team website:
10
+ ; https://keccak.team/
11
+ ;
12
+ ; To the extent possible under law, the implementer has waived all copyright
13
+ ; and related or neighboring rights to the source code in this file.
14
+ ; http://creativecommons.org/publicdomain/zero/1.0/
15
+ ;
16
+ ; ---
17
+ ;
18
+ ; This file implements Xoodoo in a SnP-compatible way.
19
+ ; Please refer to SnP-documentation.h for more details.
20
+ ;
21
+ ; This implementation comes with Xoodoo-SnP.h in the same folder.
22
+ ; Please refer to LowLevel.build for the exact list of other files it must be combined with.
23
+ ;
24
+
25
+ ; INFO: Tested on ATmega1280 simulator
26
+
27
+ ; Registers used in all routines
28
+ #define zero 1
29
+ #define rpState 24
30
+ #define rX 26
31
+ #define rY 28
32
+ #define rZ 30
33
+ #define sp 0x3D
34
+
35
+ ;----------------------------------------------------------------------------
36
+ ;
37
+ ; void Xoodoo_StaticInitialize( void )
38
+ ;
39
+ .global Xoodoo_StaticInitialize
40
+
41
+ ;----------------------------------------------------------------------------
42
+ ;
43
+ ; void Xoodoo_Initialize(void *state)
44
+ ;
45
+ ; argument state is passed in r24:r25
46
+ ;
47
+ .global Xoodoo_Initialize
48
+ Xoodoo_Initialize:
49
+ movw rZ, r24
50
+ ldi r23, 3*4/2 ; clear state (8 bytes / 2 lanes) per iteration
51
+ Xoodoo_Initialize_Loop:
52
+ st z+, zero
53
+ st z+, zero
54
+ st z+, zero
55
+ st z+, zero
56
+ st z+, zero
57
+ st z+, zero
58
+ st z+, zero
59
+ st z+, zero
60
+ dec r23
61
+ brne Xoodoo_Initialize_Loop
62
+ Xoodoo_StaticInitialize:
63
+ ret
64
+
65
+ ;----------------------------------------------------------------------------
66
+ ;
67
+ ; void Xoodoo_AddByte(void *state, unsigned char data, unsigned int offset)
68
+ ;
69
+ ; argument state is passed in r24:r25
70
+ ; argument data is passed in r22:r23, only LSB (r22) is used
71
+ ; argument offset is passed in r20:r21, only LSB (r20) is used
72
+ ;
73
+ .global Xoodoo_AddByte
74
+ Xoodoo_AddByte:
75
+ movw rZ, r24
76
+ add rZ, r20
77
+ adc rZ+1, zero
78
+ ld r0, Z
79
+ eor r0, r22
80
+ st Z, r0
81
+ ret
82
+
83
+ ;----------------------------------------------------------------------------
84
+ ;
85
+ ; void Xoodoo_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
86
+ ;
87
+ ; argument state is passed in r24:r25
88
+ ; argument data is passed in r22:r23
89
+ ; argument offset is passed in r20:r21, only LSB (r20) is used
90
+ ; argument length is passed in r18:r19, only LSB (r18) is used
91
+ ;
92
+ .global Xoodoo_AddBytes
93
+ Xoodoo_AddBytes:
94
+ movw rZ, r24
95
+ add rZ, r20
96
+ adc rZ+1, zero
97
+ movw rX, r22
98
+ subi r18, 8
99
+ brcs Xoodoo_AddBytes_Byte
100
+ ;do 8 bytes per iteration
101
+ Xoodoo_AddBytes_Loop8:
102
+ ld r21, X+
103
+ ld r0, Z
104
+ eor r0, r21
105
+ st Z+, r0
106
+ ld r21, X+
107
+ ld r0, Z
108
+ eor r0, r21
109
+ st Z+, r0
110
+ ld r21, X+
111
+ ld r0, Z
112
+ eor r0, r21
113
+ st Z+, r0
114
+ ld r21, X+
115
+ ld r0, Z
116
+ eor r0, r21
117
+ st Z+, r0
118
+ ld r21, X+
119
+ ld r0, Z
120
+ eor r0, r21
121
+ st Z+, r0
122
+ ld r21, X+
123
+ ld r0, Z
124
+ eor r0, r21
125
+ st Z+, r0
126
+ ld r21, X+
127
+ ld r0, Z
128
+ eor r0, r21
129
+ st Z+, r0
130
+ ld r21, X+
131
+ ld r0, Z
132
+ eor r0, r21
133
+ st Z+, r0
134
+ subi r18, 8
135
+ brcc Xoodoo_AddBytes_Loop8
136
+ Xoodoo_AddBytes_Byte:
137
+ ldi r19, 8
138
+ add r18, r19
139
+ breq Xoodoo_AddBytes_End
140
+ Xoodoo_AddBytes_Loop1:
141
+ ld r21, X+
142
+ ld r0, Z
143
+ eor r0, r21
144
+ st Z+, r0
145
+ dec r18
146
+ brne Xoodoo_AddBytes_Loop1
147
+ Xoodoo_AddBytes_End:
148
+ ret
149
+
150
+
151
+ ;----------------------------------------------------------------------------
152
+ ;
153
+ ; void Xoodoo_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
154
+ ;
155
+ ; argument state is passed in r24:r25
156
+ ; argument data is passed in r22:r23
157
+ ; argument offset is passed in r20:r21, only LSB (r20) is used
158
+ ; argument length is passed in r18:r19, only LSB (r18) is used
159
+ ;
160
+ .global Xoodoo_OverwriteBytes
161
+ Xoodoo_OverwriteBytes:
162
+ movw rZ, r24
163
+ add rZ, r20
164
+ adc rZ+1, zero
165
+ movw rX, r22
166
+ subi r18, 8
167
+ brcs Xoodoo_OverwriteBytes_Byte
168
+ ;do 8 bytes per iteration
169
+ Xoodoo_OverwriteBytes_Loop8:
170
+ ld r0, X+
171
+ st Z+, r0
172
+ ld r0, X+
173
+ st Z+, r0
174
+ ld r0, X+
175
+ st Z+, r0
176
+ ld r0, X+
177
+ st Z+, r0
178
+ ld r0, X+
179
+ st Z+, r0
180
+ ld r0, X+
181
+ st Z+, r0
182
+ ld r0, X+
183
+ st Z+, r0
184
+ ld r0, X+
185
+ st Z+, r0
186
+ subi r18, 8
187
+ brcc Xoodoo_OverwriteBytes_Loop8
188
+ Xoodoo_OverwriteBytes_Byte:
189
+ ldi r19, 8
190
+ add r18, r19
191
+ breq Xoodoo_OverwriteBytes_End
192
+ Xoodoo_OverwriteBytes_Loop1:
193
+ ld r0, X+
194
+ st Z+, r0
195
+ dec r18
196
+ brne Xoodoo_OverwriteBytes_Loop1
197
+ Xoodoo_OverwriteBytes_End:
198
+ ret
199
+
200
+ ;----------------------------------------------------------------------------
201
+ ;
202
+ ; void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount)
203
+ ;
204
+ ; argument state is passed in r24:r25
205
+ ; argument byteCount is passed in r22:r23, only LSB (r22) is used
206
+ ;
207
+ .global Xoodoo_OverwriteWithZeroes
208
+ Xoodoo_OverwriteWithZeroes:
209
+ movw rZ, r24 ; rZ = state
210
+ mov r23, r22
211
+ lsr r23
212
+ lsr r23
213
+ lsr r23
214
+ breq Xoodoo_OverwriteWithZeroes_Bytes
215
+ Xoodoo_OverwriteWithZeroes_LoopLanes:
216
+ st Z+, r1
217
+ st Z+, r1
218
+ st Z+, r1
219
+ st Z+, r1
220
+ st Z+, r1
221
+ st Z+, r1
222
+ st Z+, r1
223
+ st Z+, r1
224
+ dec r23
225
+ brne Xoodoo_OverwriteWithZeroes_LoopLanes
226
+ Xoodoo_OverwriteWithZeroes_Bytes:
227
+ andi r22, 7
228
+ breq Xoodoo_OverwriteWithZeroes_End
229
+ Xoodoo_OverwriteWithZeroes_LoopBytes:
230
+ st Z+, r1
231
+ dec r22
232
+ brne Xoodoo_OverwriteWithZeroes_LoopBytes
233
+ Xoodoo_OverwriteWithZeroes_End:
234
+ ret
235
+
236
+ ;----------------------------------------------------------------------------
237
+ ;
238
+ ; void Xoodoo_ExtractBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
239
+ ;
240
+ ; argument state is passed in r24:r25
241
+ ; argument data is passed in r22:r23
242
+ ; argument offset is passed in r20:r21, only LSB (r20) is used
243
+ ; argument length is passed in r18:r19, only LSB (r18) is used
244
+ ;
245
+ .global Xoodoo_ExtractBytes
246
+ Xoodoo_ExtractBytes:
247
+ movw rZ, r24
248
+ add rZ, r20
249
+ adc rZ+1, zero
250
+ movw rX, r22
251
+ subi r18, 8
252
+ brcs Xoodoo_ExtractBytes_Byte
253
+ ;do 8 bytes per iteration
254
+ Xoodoo_ExtractBytes_Loop8:
255
+ ld r0, Z+
256
+ st X+, r0
257
+ ld r0, Z+
258
+ st X+, r0
259
+ ld r0, Z+
260
+ st X+, r0
261
+ ld r0, Z+
262
+ st X+, r0
263
+ ld r0, Z+
264
+ st X+, r0
265
+ ld r0, Z+
266
+ st X+, r0
267
+ ld r0, Z+
268
+ st X+, r0
269
+ ld r0, Z+
270
+ st X+, r0
271
+ subi r18, 8
272
+ brcc Xoodoo_ExtractBytes_Loop8
273
+ Xoodoo_ExtractBytes_Byte:
274
+ ldi r19, 8
275
+ add r18, r19
276
+ breq Xoodoo_ExtractBytes_End
277
+ Xoodoo_ExtractBytes_Loop1:
278
+ ld r0, Z+
279
+ st X+, r0
280
+ dec r18
281
+ brne Xoodoo_ExtractBytes_Loop1
282
+ Xoodoo_ExtractBytes_End:
283
+ ret
284
+
285
+ ;----------------------------------------------------------------------------
286
+ ;
287
+ ; void Xoodoo_ExtractAndAddBytes(void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
288
+ ;
289
+ ; argument state is passed in r24:r25
290
+ ; argument input is passed in r22:r23
291
+ ; argument output is passed in r20:r21
292
+ ; argument offset is passed in r18:r19, only LSB (r18) is used
293
+ ; argument length is passed in r16:r17, only LSB (r16) is used
294
+ ;
295
+ .global Xoodoo_ExtractAndAddBytes
296
+ Xoodoo_ExtractAndAddBytes:
297
+ tst r16
298
+ breq Xoodoo_ExtractAndAddBytes_End
299
+ push r16
300
+ push r28
301
+ push r29
302
+ movw rZ, r24
303
+ add rZ, r18
304
+ adc rZ+1, zero
305
+ movw rX, r22
306
+ movw rY, r20
307
+ subi r16, 8
308
+ brcs Xoodoo_ExtractAndAddBytes_Byte
309
+ Xoodoo_ExtractAndAddBytes_LoopLane:
310
+ ld r21, Z+
311
+ ld r0, X+
312
+ eor r0, r21
313
+ st Y+, r0
314
+ ld r21, Z+
315
+ ld r0, X+
316
+ eor r0, r21
317
+ st Y+, r0
318
+ ld r21, Z+
319
+ ld r0, X+
320
+ eor r0, r21
321
+ st Y+, r0
322
+ ld r21, Z+
323
+ ld r0, X+
324
+ eor r0, r21
325
+ st Y+, r0
326
+ ld r21, Z+
327
+ ld r0, X+
328
+ eor r0, r21
329
+ st Y+, r0
330
+ ld r21, Z+
331
+ ld r0, X+
332
+ eor r0, r21
333
+ st Y+, r0
334
+ ld r21, Z+
335
+ ld r0, X+
336
+ eor r0, r21
337
+ st Y+, r0
338
+ ld r21, Z+
339
+ ld r0, X+
340
+ eor r0, r21
341
+ st Y+, r0
342
+ subi r16, 8
343
+ brcc Xoodoo_ExtractAndAddBytes_LoopLane
344
+ Xoodoo_ExtractAndAddBytes_Byte:
345
+ ldi r19, 8
346
+ add r16, r19
347
+ breq Xoodoo_ExtractAndAddBytes_Done
348
+ Xoodoo_ExtractAndAddBytes_Loop1:
349
+ ld r21, Z+
350
+ ld r0, X+
351
+ eor r0, r21
352
+ st Y+, r0
353
+ dec r16
354
+ brne Xoodoo_ExtractAndAddBytes_Loop1
355
+ Xoodoo_ExtractAndAddBytes_Done:
356
+ pop r29
357
+ pop r28
358
+ pop r16
359
+ Xoodoo_ExtractAndAddBytes_End:
360
+ ret
361
+
362
+ Xoodoo_RoundConstants_12:
363
+ .BYTE 0x58, 0x00
364
+ .BYTE 0x38, 0x00
365
+ .BYTE 0xC0, 0x03
366
+ .BYTE 0xD0, 0x00
367
+ .BYTE 0x20, 0x01
368
+ .BYTE 0x14, 0x00
369
+ Xoodoo_RoundConstants_6:
370
+ .BYTE 0x60, 0x00
371
+ .BYTE 0x2C, 0x00
372
+ .BYTE 0x80, 0x03
373
+ .BYTE 0xF0, 0x00
374
+ .BYTE 0xA0, 0x01
375
+ .BYTE 0x12, 0x00
376
+ Xoodoo_RoundConstants_0:
377
+ .BYTE 0xFF, 0 ; terminator
378
+
379
+ .text
380
+
381
+ ; Register variables used in permutation
382
+ #define rC0 2 // 4 regs (2-5)
383
+ #define rC1 6 // 4 regs (6-9)
384
+ #define rC2 10 // 4 regs (10-13)
385
+ #define rC3 14 // 4 regs (14-17)
386
+ #define rVv 18 // 4 regs (18-21)
387
+ #define rTt 22 // 4 regs (22-25)
388
+ // r26-27 free
389
+ #define a00 0
390
+ #define a01 4
391
+ #define a02 8
392
+ #define a03 12
393
+ #define a10 16
394
+ #define a11 20
395
+ #define a12 24
396
+ #define a13 28
397
+ #define a20 32
398
+ #define a21 36
399
+ #define a22 40
400
+ #define a23 44
401
+
402
+ ;----------------------------------------------------------------------------
403
+ ;
404
+ ; void Xoodoo_Permute_Nrounds( void *state, unsigned int nrounds )
405
+ ;
406
+ ; argument state is passed in r24:r25
407
+ ; argument nrounds is passed in r22:r23 (only LSB (r22) is used)
408
+ ;
409
+ .global Xoodoo_Permute_Nrounds
410
+ Xoodoo_Permute_Nrounds:
411
+ mov r26, r22
412
+ ldi rZ+0, lo8(Xoodoo_RoundConstants_0)
413
+ ldi rZ+1, hi8(Xoodoo_RoundConstants_0)
414
+ lsl r26
415
+ sub rZ, r26
416
+ sbc rZ+1, zero
417
+ rjmp Xoodoo_Permute
418
+
419
+ ;----------------------------------------------------------------------------
420
+ ;
421
+ ; void Xoodoo_Permute_6rounds( void *state )
422
+ ;
423
+ ; argument state is passed in r24:r25
424
+ ;
425
+ .global Xoodoo_Permute_6rounds
426
+ Xoodoo_Permute_6rounds:
427
+ ldi rZ+0, lo8(Xoodoo_RoundConstants_6)
428
+ ldi rZ+1, hi8(Xoodoo_RoundConstants_6)
429
+ rjmp Xoodoo_Permute
430
+
431
+ ;----------------------------------------------------------------------------
432
+ ;
433
+ ; void Xoodoo_Permute_12rounds( void *state )
434
+ ;
435
+ ; argument state is passed in r24:r25
436
+ ;
437
+ .global Xoodoo_Permute_12rounds
438
+ Xoodoo_Permute_12rounds:
439
+ ldi rZ+0, lo8(Xoodoo_RoundConstants_12)
440
+ ldi rZ+1, hi8(Xoodoo_RoundConstants_12)
441
+ Xoodoo_Permute:
442
+ push r2
443
+ push r3
444
+ push r4
445
+ push r5
446
+ push r6
447
+ push r7
448
+ push r8
449
+ push r9
450
+ push r10
451
+ push r11
452
+ push r12
453
+ push r13
454
+ push r14
455
+ push r15
456
+ push r16
457
+ push r17
458
+ push r28
459
+ push r29
460
+
461
+ ; Initial Prepare Theta
462
+ movw rY, rpState
463
+ ld rC0+0, Y+ ; a00
464
+ ld rC0+1, Y+
465
+ ld rC0+2, Y+
466
+ ld rC0+3, Y+
467
+ ld rC1+0, Y+ ; a01
468
+ ld rC1+1, Y+
469
+ ld rC1+2, Y+
470
+ ld rC1+3, Y+
471
+ ld rC2+0, Y+ ; a02
472
+ ld rC2+1, Y+
473
+ ld rC2+2, Y+
474
+ ld rC2+3, Y+
475
+ ld rC3+0, Y+ ; a03
476
+ ld rC3+1, Y+
477
+ ld rC3+2, Y+
478
+ ld rC3+3, Y+
479
+
480
+ ld r0, Y+ ; a10
481
+ eor rC0+0, r0
482
+ ld r0, Y+
483
+ eor rC0+1, r0
484
+ ld r0, Y+
485
+ eor rC0+2, r0
486
+ ld r0, Y+
487
+ eor rC0+3, r0
488
+ ld r0, Y+ ; a11
489
+ eor rC1+0, r0
490
+ ld r0, Y+
491
+ eor rC1+1, r0
492
+ ld r0, Y+
493
+ eor rC1+2, r0
494
+ ld r0, Y+
495
+ eor rC1+3, r0
496
+ ld r0, Y+ ; a12
497
+ eor rC2+0, r0
498
+ ld r0, Y+
499
+ eor rC2+1, r0
500
+ ld r0, Y+
501
+ eor rC2+2, r0
502
+ ld r0, Y+
503
+ eor rC2+3, r0
504
+ ld r0, Y+ ; a13
505
+ eor rC3+0, r0
506
+ ld r0, Y+
507
+ eor rC3+1, r0
508
+ ld r0, Y+
509
+ eor rC3+2, r0
510
+ ld r0, Y+
511
+ eor rC3+3, r0
512
+
513
+ ld r0, Y+ ; a20
514
+ eor rC0+0, r0
515
+ ld r0, Y+
516
+ eor rC0+1, r0
517
+ ld r0, Y+
518
+ eor rC0+2, r0
519
+ ld r0, Y+
520
+ eor rC0+3, r0
521
+ ld r0, Y+ ; a21
522
+ eor rC1+0, r0
523
+ ld r0, Y+
524
+ eor rC1+1, r0
525
+ ld r0, Y+
526
+ eor rC1+2, r0
527
+ ld r0, Y+
528
+ eor rC1+3, r0
529
+ ld r0, Y+ ; a22
530
+ eor rC2+0, r0
531
+ ld r0, Y+
532
+ eor rC2+1, r0
533
+ ld r0, Y+
534
+ eor rC2+2, r0
535
+ ld r0, Y+
536
+ eor rC2+3, r0
537
+ ld r0, Y+ ; a23
538
+ eor rC3+0, r0
539
+ ld r0, Y+
540
+ eor rC3+1, r0
541
+ ld r0, Y+
542
+ eor rC3+2, r0
543
+ ld r0, Y+
544
+ eor rC3+3, r0
545
+ sbiw rY, 48
546
+
547
+ Xoodoo_RoundLoop:
548
+ ; Theta + Rho west
549
+ ; c0 = ROTL32(c0 ^ ROTL32(c0, 9), 5);
550
+ mov rVv+1, rC0+0 ; rol 9
551
+ mov rVv+2, rC0+1
552
+ mov rVv+3, rC0+2
553
+ mov rVv+0, rC0+3
554
+ lsl rVv+0
555
+ rol rVv+1
556
+ rol rVv+2
557
+ rol rVv+3
558
+ adc rVv+0, zero
559
+ eor rVv+0, rC0+0
560
+ eor rVv+1, rC0+1
561
+ eor rVv+2, rC0+2
562
+ eor rVv+3, rC0+3
563
+ bst rVv, 0 ; rol 5 (= ror 3 + rol 8)
564
+ ror rVv+3
565
+ ror rVv+2
566
+ ror rVv+1
567
+ ror rVv
568
+ bld rVv+3, 7
569
+ bst rVv, 0
570
+ ror rVv+3
571
+ ror rVv+2
572
+ ror rVv+1
573
+ ror rVv
574
+ bld rVv+3, 7
575
+ bst rVv, 0
576
+ ror rVv+3
577
+ ror rVv+2
578
+ ror rVv+1
579
+ ror rVv
580
+ bld rVv+3, 7
581
+ mov rC0+0, rVv+3
582
+ mov rC0+1, rVv+0
583
+ mov rC0+2, rVv+1
584
+ mov rC0+3, rVv+2
585
+
586
+ ; c1 = ROTL32(c1 ^ ROTL32(c1, 9), 5);
587
+ mov rVv+1, rC1+0 ; rol 9
588
+ mov rVv+2, rC1+1
589
+ mov rVv+3, rC1+2
590
+ mov rVv+0, rC1+3
591
+ lsl rVv+0
592
+ rol rVv+1
593
+ rol rVv+2
594
+ rol rVv+3
595
+ adc rVv+0, zero
596
+ eor rVv+0, rC1+0
597
+ eor rVv+1, rC1+1
598
+ eor rVv+2, rC1+2
599
+ eor rVv+3, rC1+3
600
+ bst rVv, 0 ; rol 5 (= ror 3 + rol 8)
601
+ ror rVv+3
602
+ ror rVv+2
603
+ ror rVv+1
604
+ ror rVv
605
+ bld rVv+3, 7
606
+ bst rVv, 0
607
+ ror rVv+3
608
+ ror rVv+2
609
+ ror rVv+1
610
+ ror rVv
611
+ bld rVv+3, 7
612
+ bst rVv, 0
613
+ ror rVv+3
614
+ ror rVv+2
615
+ ror rVv+1
616
+ ror rVv
617
+ bld rVv+3, 7
618
+ mov rC1+0, rVv+3
619
+ mov rC1+1, rVv+0
620
+ mov rC1+2, rVv+1
621
+ mov rC1+3, rVv+2
622
+
623
+ ; c2 = ROTL32(c2 ^ ROTL32(c2, 9), 5);
624
+ mov rVv+1, rC2+0 ; rol 9
625
+ mov rVv+2, rC2+1
626
+ mov rVv+3, rC2+2
627
+ mov rVv+0, rC2+3
628
+ lsl rVv+0
629
+ rol rVv+1
630
+ rol rVv+2
631
+ rol rVv+3
632
+ adc rVv+0, zero
633
+ eor rVv+0, rC2+0
634
+ eor rVv+1, rC2+1
635
+ eor rVv+2, rC2+2
636
+ eor rVv+3, rC2+3
637
+ bst rVv, 0 ; rol 5 (= ror 3 + rol 8)
638
+ ror rVv+3
639
+ ror rVv+2
640
+ ror rVv+1
641
+ ror rVv
642
+ bld rVv+3, 7
643
+ bst rVv, 0
644
+ ror rVv+3
645
+ ror rVv+2
646
+ ror rVv+1
647
+ ror rVv
648
+ bld rVv+3, 7
649
+ bst rVv, 0
650
+ ror rVv+3
651
+ ror rVv+2
652
+ ror rVv+1
653
+ ror rVv
654
+ bld rVv+3, 7
655
+ mov rC2+0, rVv+3
656
+ mov rC2+1, rVv+0
657
+ mov rC2+2, rVv+1
658
+ mov rC2+3, rVv+2
659
+
660
+ ; c3 = ROTL32(c3 ^ ROTL32(c3, 9), 5);
661
+ mov rVv+1, rC3+0 ; rol 9
662
+ mov rVv+2, rC3+1
663
+ mov rVv+3, rC3+2
664
+ mov rVv+0, rC3+3
665
+ lsl rVv+0
666
+ rol rVv+1
667
+ rol rVv+2
668
+ rol rVv+3
669
+ adc rVv+0, zero
670
+ eor rVv+0, rC3+0
671
+ eor rVv+1, rC3+1
672
+ eor rVv+2, rC3+2
673
+ eor rVv+3, rC3+3
674
+ bst rVv, 0 ; rol 5 (= ror 3 + rol 8)
675
+ ror rVv+3
676
+ ror rVv+2
677
+ ror rVv+1
678
+ ror rVv
679
+ bld rVv+3, 7
680
+ bst rVv, 0
681
+ ror rVv+3
682
+ ror rVv+2
683
+ ror rVv+1
684
+ ror rVv
685
+ bld rVv+3, 7
686
+ bst rVv, 0
687
+ ror rVv+3
688
+ ror rVv+2
689
+ ror rVv+1
690
+ ror rVv
691
+ bld rVv+3, 7
692
+ mov rC3+0, rVv+3
693
+ mov rC3+1, rVv+0
694
+ mov rC3+2, rVv+1
695
+ mov rC3+3, rVv+2
696
+
697
+ ; v1 = a13;
698
+ ldd rVv+0, Y+a13+0
699
+ ldd rVv+1, Y+a13+1
700
+ ldd rVv+2, Y+a13+2
701
+ ldd rVv+3, Y+a13+3
702
+
703
+ ; a13 = a12 ^ c1;
704
+ ldd r0, Y+a12+0
705
+ eor r0, rC1+0
706
+ std Y+a13+0, r0
707
+ ldd r0, Y+a12+1
708
+ eor r0, rC1+1
709
+ std Y+a13+1, r0
710
+ ldd r0, Y+a12+2
711
+ eor r0, rC1+2
712
+ std Y+a13+2, r0
713
+ ldd r0, Y+a12+3
714
+ eor r0, rC1+3
715
+ std Y+a13+3, r0
716
+
717
+ ; a12 = a11 ^ c0;
718
+ ldd r0, Y+a11+0
719
+ eor r0, rC0+0
720
+ std Y+a12+0, r0
721
+ ldd r0, Y+a11+1
722
+ eor r0, rC0+1
723
+ std Y+a12+1, r0
724
+ ldd r0, Y+a11+2
725
+ eor r0, rC0+2
726
+ std Y+a12+2, r0
727
+ ldd r0, Y+a11+3
728
+ eor r0, rC0+3
729
+ std Y+a12+3, r0
730
+
731
+ ; a11 = a10 ^ c3;
732
+ ldd r0, Y+a10+0
733
+ eor r0, rC3+0
734
+ std Y+a11+0, r0
735
+ ldd r0, Y+a10+1
736
+ eor r0, rC3+1
737
+ std Y+a11+1, r0
738
+ ldd r0, Y+a10+2
739
+ eor r0, rC3+2
740
+ std Y+a11+2, r0
741
+ ldd r0, Y+a10+3
742
+ eor r0, rC3+3
743
+ std Y+a11+3, r0
744
+
745
+ ; a10 = v1 ^ c2;
746
+ eor rVv+0, rC2+0
747
+ std Y+a10+0, rVv+0
748
+ eor rVv+1, rC2+1
749
+ std Y+a10+1, rVv+1
750
+ eor rVv+2, rC2+2
751
+ std Y+a10+2, rVv+2
752
+ eor rVv+3, rC2+3
753
+ std Y+a10+3, rVv+3
754
+
755
+ ; a20 = ROTL32(a20 ^ c3, 11);
756
+ ldd rVv+0, Y+a20+3
757
+ eor rVv+0, rC3+3
758
+ ldd rVv+1, Y+a20+0
759
+ eor rVv+1, rC3+0
760
+ ldd rVv+2, Y+a20+1
761
+ eor rVv+2, rC3+1
762
+ ldd rVv+3, Y+a20+2
763
+ eor rVv+3, rC3+2
764
+ lsl rVv+0
765
+ rol rVv+1
766
+ rol rVv+2
767
+ rol rVv+3
768
+ adc rVv+0, zero
769
+ lsl rVv+0
770
+ rol rVv+1
771
+ rol rVv+2
772
+ rol rVv+3
773
+ adc rVv+0, zero
774
+ lsl rVv+0
775
+ rol rVv+1
776
+ rol rVv+2
777
+ rol rVv+3
778
+ adc rVv+0, zero
779
+ std Y+a20+0, rVv+0
780
+ std Y+a20+1, rVv+1
781
+ std Y+a20+2, rVv+2
782
+ std Y+a20+3, rVv+3
783
+
784
+ ; a21 = ROTL32(a21 ^ c0, 11);
785
+ ldd rVv+0, Y+a21+3
786
+ eor rVv+0, rC0+3
787
+ ldd rVv+1, Y+a21+0
788
+ eor rVv+1, rC0+0
789
+ ldd rVv+2, Y+a21+1
790
+ eor rVv+2, rC0+1
791
+ ldd rVv+3, Y+a21+2
792
+ eor rVv+3, rC0+2
793
+ lsl rVv+0
794
+ rol rVv+1
795
+ rol rVv+2
796
+ rol rVv+3
797
+ adc rVv+0, zero
798
+ lsl rVv+0
799
+ rol rVv+1
800
+ rol rVv+2
801
+ rol rVv+3
802
+ adc rVv+0, zero
803
+ lsl rVv+0
804
+ rol rVv+1
805
+ rol rVv+2
806
+ rol rVv+3
807
+ adc rVv+0, zero
808
+ std Y+a21+0, rVv+0
809
+ std Y+a21+1, rVv+1
810
+ std Y+a21+2, rVv+2
811
+ std Y+a21+3, rVv+3
812
+
813
+ ; a22 = ROTL32(a22 ^ c1, 11);
814
+ ldd rVv+0, Y+a22+3
815
+ eor rVv+0, rC1+3
816
+ ldd rVv+1, Y+a22+0
817
+ eor rVv+1, rC1+0
818
+ ldd rVv+2, Y+a22+1
819
+ eor rVv+2, rC1+1
820
+ ldd rVv+3, Y+a22+2
821
+ eor rVv+3, rC1+2
822
+ lsl rVv+0
823
+ rol rVv+1
824
+ rol rVv+2
825
+ rol rVv+3
826
+ adc rVv+0, zero
827
+ lsl rVv+0
828
+ rol rVv+1
829
+ rol rVv+2
830
+ rol rVv+3
831
+ adc rVv+0, zero
832
+ lsl rVv+0
833
+ rol rVv+1
834
+ rol rVv+2
835
+ rol rVv+3
836
+ adc rVv+0, zero
837
+ std Y+a22+0, rVv+0
838
+ std Y+a22+1, rVv+1
839
+ std Y+a22+2, rVv+2
840
+ std Y+a22+3, rVv+3
841
+
842
+ ; a23 = ROTL32(a23 ^ c2, 11);
843
+ ldd rVv+0, Y+a23+3
844
+ eor rVv+0, rC2+3
845
+ ldd rVv+1, Y+a23+0
846
+ eor rVv+1, rC2+0
847
+ ldd rVv+2, Y+a23+1
848
+ eor rVv+2, rC2+1
849
+ ldd rVv+3, Y+a23+2
850
+ eor rVv+3, rC2+2
851
+ lsl rVv+0
852
+ rol rVv+1
853
+ rol rVv+2
854
+ rol rVv+3
855
+ adc rVv+0, zero
856
+ lsl rVv+0
857
+ rol rVv+1
858
+ rol rVv+2
859
+ rol rVv+3
860
+ adc rVv+0, zero
861
+ lsl rVv+0
862
+ rol rVv+1
863
+ rol rVv+2
864
+ rol rVv+3
865
+ adc rVv+0, zero
866
+ std Y+a23+0, rVv+0
867
+ std Y+a23+1, rVv+1
868
+ std Y+a23+2, rVv+2
869
+ std Y+a23+3, rVv+3
870
+
871
+ ; v1 = c3;
872
+ movw rVv+0, rC3+0
873
+ movw rVv+2, rC3+2
874
+
875
+ ; c3 = a03 ^ c2; /* a03 resides in c3 */
876
+ ldd rC3+0, Y+a03+0
877
+ eor rC3+0, rC2+0
878
+ ldd rC3+1, Y+a03+1
879
+ eor rC3+1, rC2+1
880
+ ldd rC3+2, Y+a03+2
881
+ eor rC3+2, rC2+2
882
+ ldd rC3+3, Y+a03+3
883
+ eor rC3+3, rC2+3
884
+
885
+ ; c2 = a02 ^ c1; /* a02 resides in c2 */
886
+ ldd rC2+0, Y+a02+0
887
+ eor rC2+0, rC1+0
888
+ ldd rC2+1, Y+a02+1
889
+ eor rC2+1, rC1+1
890
+ ldd rC2+2, Y+a02+2
891
+ eor rC2+2, rC1+2
892
+ ldd rC2+3, Y+a02+3
893
+ eor rC2+3, rC1+3
894
+
895
+ ; c1 = a01 ^ c0; /* a01 resides in c1 */
896
+ ldd rC1+0, Y+a01+0
897
+ eor rC1+0, rC0+0
898
+ ldd rC1+1, Y+a01+1
899
+ eor rC1+1, rC0+1
900
+ ldd rC1+2, Y+a01+2
901
+ eor rC1+2, rC0+2
902
+ ldd rC1+3, Y+a01+3
903
+ eor rC1+3, rC0+3
904
+
905
+ ; c0 = a00 ^ v1; /* a00 resides in c0 */
906
+ ldd rC0+0, Y+a00+0
907
+ eor rC0+0, rVv+0
908
+ ldd rC0+1, Y+a00+1
909
+ eor rC0+1, rVv+1
910
+ ldd rC0+2, Y+a00+2
911
+ eor rC0+2, rVv+2
912
+ ldd rC0+3, Y+a00+3
913
+ eor rC0+3, rVv+3
914
+
915
+ ; c0 ^= __rc; /* +Iota */
916
+ lpm rVv+0, Z+
917
+ lpm rVv+1, Z+
918
+ eor rC0+0, rVv+0
919
+ eor rC0+1, rVv+1
920
+
921
+ ; Chi + Rho east + Early Theta
922
+ ; a00 = c0 ^= ~a10 & a20;
923
+ ldd r0, Y+a10+0
924
+ com r0
925
+ ldd rTt+0, Y+a20+0 ; a20 in rTt
926
+ and r0, rTt+0
927
+ eor rC0+0, r0
928
+ std Y+a00+0, rC0+0
929
+ ldd r0, Y+a10+1
930
+ com r0
931
+ ldd rTt+1, Y+a20+1
932
+ and r0, rTt+1
933
+ eor rC0+1, r0
934
+ std Y+a00+1, rC0+1
935
+ ldd r0, Y+a10+2
936
+ com r0
937
+ ldd rTt+2, Y+a20+2
938
+ and r0, rTt+2
939
+ eor rC0+2, r0
940
+ std Y+a00+2, rC0+2
941
+ ldd r0, Y+a10+3
942
+ com r0
943
+ ldd rTt+3, Y+a20+3
944
+ and r0, rTt+3
945
+ eor rC0+3, r0
946
+ std Y+a00+3, rC0+3
947
+
948
+ ; a10 ^= ~a20 & c0;
949
+ com rTt+0
950
+ and rTt+0, rC0+0
951
+ ldd r0, Y+a10+0
952
+ eor rTt+0, r0 ; new a10 in rTt
953
+ std Y+a10+0, rTt+0
954
+ com rTt+1
955
+ and rTt+1, rC0+1
956
+ ldd r0, Y+a10+1
957
+ eor rTt+1, r0
958
+ std Y+a10+1, rTt+1
959
+ com rTt+2
960
+ and rTt+2, rC0+2
961
+ ldd r0, Y+a10+2
962
+ eor rTt+2, r0
963
+ std Y+a10+2, rTt+2
964
+ com rTt+3
965
+ and rTt+3, rC0+3
966
+ ldd r0, Y+a10+3
967
+ eor rTt+3, r0
968
+ std Y+a10+3, rTt+3
969
+
970
+ ; v1(a20) = ROTL32(a20 ^ ~c0 & a10, 8);
971
+ movw rVv+0, rTt+0 ; a10 in rVv
972
+ movw rVv+2, rTt+2
973
+ mov r0, rC0+0
974
+ com r0
975
+ and rTt+0, r0
976
+ ldd r0, Y+a20+0
977
+ eor rTt+0, r0
978
+
979
+ mov r0, rC0+1
980
+ com r0
981
+ and rTt+1, r0
982
+ ldd r0, Y+a20+1
983
+ eor rTt+1, r0
984
+
985
+ mov r0, rC0+2
986
+ com r0
987
+ and rTt+2, r0
988
+ ldd r0, Y+a20+2
989
+ eor rTt+2, r0
990
+
991
+ mov r0, rC0+3
992
+ com r0
993
+ and rTt+3, r0
994
+ ldd r0, Y+a20+3
995
+ eor rTt+3, r0
996
+ std Y+a20+0, rTt+3
997
+ std Y+a20+1, rTt+0
998
+ std Y+a20+2, rTt+1
999
+ std Y+a20+3, rTt+2
1000
+
1001
+ ; c0 ^= a10 = ROTL32(a10, 1);
1002
+ lsl rVv+0
1003
+ rol rVv+1
1004
+ std Y+a10+1, rVv+1
1005
+ eor rC0+1, rVv+1
1006
+ rol rVv+2
1007
+ std Y+a10+2, rVv+2
1008
+ eor rC0+2, rVv+2
1009
+ rol rVv+3
1010
+ std Y+a10+3, rVv+3
1011
+ eor rC0+3, rVv+3
1012
+ adc rVv+0, zero
1013
+ std Y+a10+0, rVv+0
1014
+ eor rC0+0, rVv+0
1015
+
1016
+ ; a02 = c2 ^= ~a12 & a22;
1017
+ ldd r0, Y+a12+0
1018
+ com r0
1019
+ ldd rVv+0, Y+a22+0 ; a22 in rVv
1020
+ and r0, rVv+0
1021
+ eor rC2+0, r0
1022
+ std Y+a02+0, rC2+0
1023
+ ldd r0, Y+a12+1
1024
+ com r0
1025
+ ldd rVv+1, Y+a22+1
1026
+ and r0, rVv+1
1027
+ eor rC2+1, r0
1028
+ std Y+a02+1, rC2+1
1029
+ ldd r0, Y+a12+2
1030
+ com r0
1031
+ ldd rVv+2, Y+a22+2
1032
+ and r0, rVv+2
1033
+ eor rC2+2, r0
1034
+ std Y+a02+2, rC2+2
1035
+ ldd r0, Y+a12+3
1036
+ com r0
1037
+ ldd rVv+3, Y+a22+3
1038
+ and r0, rVv+3
1039
+ eor rC2+3, r0
1040
+ std Y+a02+3, rC2+3
1041
+
1042
+ ; a12 ^= ~a22 & c2;
1043
+ mov r0, rVv+0 ; a12 in rTt
1044
+ com r0
1045
+ and r0, rC2+0
1046
+ ldd rTt+0, Y+a12+0
1047
+ eor rTt+0, r0
1048
+ std Y+a12+0, rTt+0
1049
+ mov r0, rVv+1
1050
+ com r0
1051
+ and r0, rC2+1
1052
+ ldd rTt+1, Y+a12+1
1053
+ eor rTt+1, r0
1054
+ std Y+a12+1, rTt+1
1055
+ mov r0, rVv+2
1056
+ com r0
1057
+ and r0, rC2+2
1058
+ ldd rTt+2, Y+a12+2
1059
+ eor rTt+2, r0
1060
+ std Y+a12+2, rTt+2
1061
+ mov r0, rVv+3
1062
+ com r0
1063
+ and r0, rC2+3
1064
+ ldd rTt+3, Y+a12+3
1065
+ eor rTt+3, r0
1066
+ std Y+a12+3, rTt+3
1067
+
1068
+ ; c0 ^= a20 = ROTL32(a22 ^ ~c2 & a12, 8);
1069
+ mov r0, rC2+0
1070
+ com r0
1071
+ and r0, rTt+0
1072
+ eor r0, rVv+0
1073
+ ldd rVv+0, Y+a20+1 ; rVv = a22
1074
+ std Y+a20+1, r0
1075
+ eor rC0+1, r0
1076
+ mov r0, rC2+1
1077
+ com r0
1078
+ and r0, rTt+1
1079
+ eor r0, rVv+1
1080
+ ldd rVv+1, Y+a20+2
1081
+ std Y+a20+2, r0
1082
+ eor rC0+2, r0
1083
+ mov r0, rC2+2
1084
+ com r0
1085
+ and r0, rTt+2
1086
+ eor r0, rVv+2
1087
+ ldd rVv+2, Y+a20+3
1088
+ std Y+a20+3, r0
1089
+ eor rC0+3, r0
1090
+ mov r0, rC2+3
1091
+ com r0
1092
+ and r0, rTt+3
1093
+ eor r0, rVv+3
1094
+ ldd rVv+3, Y+a20+0
1095
+ std Y+a20+0, r0
1096
+ eor rC0+0, r0
1097
+
1098
+ ; c2 ^= a12 = ROTL32(a12, 1);
1099
+ lsl rTt+0
1100
+ rol rTt+1
1101
+ eor rC2+1, rTt+1
1102
+ std Y+a12+1, rTt+1
1103
+ rol rTt+2
1104
+ eor rC2+2, rTt+2
1105
+ std Y+a12+2, rTt+2
1106
+ rol rTt+3
1107
+ eor rC2+3, rTt+3
1108
+ std Y+a12+3, rTt+3
1109
+ adc rTt+0, zero
1110
+ eor rC2+0, rTt+0
1111
+ std Y+a12+0, rTt+0
1112
+
1113
+ ; a22 = v1;
1114
+ std Y+a22+0, rVv+3
1115
+ std Y+a22+1, rVv+0
1116
+ std Y+a22+2, rVv+1
1117
+ std Y+a22+3, rVv+2
1118
+
1119
+ ; c2 ^= v1;
1120
+ eor rC2+0, rVv+3
1121
+ eor rC2+1, rVv+0
1122
+ eor rC2+2, rVv+1
1123
+ eor rC2+3, rVv+2
1124
+
1125
+ ; a01 = c1 ^= ~a11 & a21;
1126
+ ldd rTt+0, Y+a11+0 ;rTt holds a11
1127
+ mov r0, rTt+0
1128
+ com r0
1129
+ ldd rVv+0, Y+a21+0 ;rVv holds a21
1130
+ and r0, rVv+0
1131
+ eor rC1+0, r0
1132
+ std Y+a01+0, rC1+0
1133
+ ldd rTt+1, Y+a11+1
1134
+ mov r0, rTt+1
1135
+ com r0
1136
+ ldd rVv+1, Y+a21+1
1137
+ and r0, rVv+1
1138
+ eor rC1+1, r0
1139
+ std Y+a01+1, rC1+1
1140
+ ldd rTt+2, Y+a11+2
1141
+ mov r0, rTt+2
1142
+ com r0
1143
+ ldd rVv+2, Y+a21+2
1144
+ and r0, rVv+2
1145
+ eor rC1+2, r0
1146
+ std Y+a01+2, rC1+2
1147
+ ldd rTt+3, Y+a11+3
1148
+ mov r0, rTt+3
1149
+ com r0
1150
+ ldd rVv+3, Y+a21+3
1151
+ and r0, rVv+3
1152
+ eor rC1+3, r0
1153
+ std Y+a01+3, rC1+3
1154
+
1155
+ ; a11 ^= ~a21 & c1;
1156
+ mov r0, rVv+0
1157
+ com r0
1158
+ and r0, rC1+0
1159
+ eor rTt+0, r0
1160
+ std Y+a11+0, rTt+0
1161
+ mov r0, rVv+1
1162
+ com r0
1163
+ and r0, rC1+1
1164
+ eor rTt+1, r0
1165
+ std Y+a11+1, rTt+1
1166
+ mov r0, rVv+2
1167
+ com r0
1168
+ and r0, rC1+2
1169
+ eor rTt+2, r0
1170
+ std Y+a11+2, rTt+2
1171
+ mov r0, rVv+3
1172
+ com r0
1173
+ and r0, rC1+3
1174
+ eor rTt+3, r0
1175
+ std Y+a11+3, rTt+3
1176
+
1177
+ ; v1 = ROTL32(a21 ^ ~c1 & a11, 8);
1178
+ mov r0, rC1+0
1179
+ com r0
1180
+ and r0, rTt+0
1181
+ eor rVv+0, r0 ; v1 not yet ROTL32'ed(8)
1182
+ mov r0, rC1+1
1183
+ com r0
1184
+ and r0, rTt+1
1185
+ eor rVv+1, r0
1186
+ mov r0, rC1+2
1187
+ com r0
1188
+ and r0, rTt+2
1189
+ eor rVv+2, r0
1190
+ mov r0, rC1+3
1191
+ com r0
1192
+ and r0, rTt+3
1193
+ eor rVv+3, r0
1194
+
1195
+ ; c1 ^= a11 = ROTL32(a11, 1);
1196
+ lsl rTt+0
1197
+ rol rTt+1
1198
+ eor rC1+1, rTt+1
1199
+ std Y+a11+1, rTt+1
1200
+ rol rTt+2
1201
+ eor rC1+2, rTt+2
1202
+ std Y+a11+2, rTt+2
1203
+ rol rTt+3
1204
+ eor rC1+3, rTt+3
1205
+ std Y+a11+3, rTt+3
1206
+ adc rTt+0, zero
1207
+ eor rC1+0, rTt+0
1208
+ std Y+a11+0, rTt+0
1209
+
1210
+ ; a03 = c3 ^= ~a13 & a23;
1211
+ ldd r0, Y+a13+0
1212
+ com r0
1213
+ ldd rTt+0, Y+a23+0 ; a23 in rTt
1214
+ and r0, rTt+0
1215
+ eor rC3+0, r0
1216
+ std Y+a03+0, rC3+0
1217
+ ldd r0, Y+a13+1
1218
+ com r0
1219
+ ldd rTt+1, Y+a23+1
1220
+ and r0, rTt+1
1221
+ eor rC3+1, r0
1222
+ std Y+a03+1, rC3+1
1223
+ ldd r0, Y+a13+2
1224
+ com r0
1225
+ ldd rTt+2, Y+a23+2
1226
+ and r0, rTt+2
1227
+ eor rC3+2, r0
1228
+ std Y+a03+2, rC3+2
1229
+ ldd r0, Y+a13+3
1230
+ com r0
1231
+ ldd rTt+3, Y+a23+3
1232
+ and r0, rTt+3
1233
+ eor rC3+3, r0
1234
+ std Y+a03+3, rC3+3
1235
+
1236
+ ; a13 ^= ~a23 & c3;
1237
+ mov r0, rTt+0
1238
+ com r0
1239
+ and r0, rC3+0
1240
+ ldd rTt+0, Y+a13+0 ; a13 in rTt
1241
+ eor rTt+0, r0
1242
+ mov r0, rTt+1
1243
+ com r0
1244
+ and r0, rC3+1
1245
+ ldd rTt+1, Y+a13+1
1246
+ eor rTt+1, r0
1247
+ mov r0, rTt+2
1248
+ com r0
1249
+ and r0, rC3+2
1250
+ ldd rTt+2, Y+a13+2
1251
+ eor rTt+2, r0
1252
+ mov r0, rTt+3
1253
+ com r0
1254
+ and r0, rC3+3
1255
+ ldd rTt+3, Y+a13+3
1256
+ eor rTt+3, r0
1257
+
1258
+ ; c1 ^= a21 = ROTL32(a23 ^ ~c3 & a13, 8);
1259
+ push rVv
1260
+ mov r0, rC3+0
1261
+ com r0
1262
+ and r0, rTt+0
1263
+ ldd rVv, Y+a23+0
1264
+ eor r0, rVv
1265
+ eor rC1+1, r0
1266
+ std Y+a21+1, r0
1267
+ mov r0, rC3+1
1268
+ com r0
1269
+ and r0, rTt+1
1270
+ ldd rVv, Y+a23+1
1271
+ eor r0, rVv
1272
+ eor rC1+2, r0
1273
+ std Y+a21+2, r0
1274
+ mov r0, rC3+2
1275
+ com r0
1276
+ and r0, rTt+2
1277
+ ldd rVv, Y+a23+2
1278
+ eor r0, rVv
1279
+ eor rC1+3, r0
1280
+ std Y+a21+3, r0
1281
+ mov r0, rC3+3
1282
+ com r0
1283
+ and r0, rTt+3
1284
+ ldd rVv, Y+a23+3
1285
+ eor r0, rVv
1286
+ eor rC1+0, r0
1287
+ std Y+a21+0, r0
1288
+ pop rVv
1289
+
1290
+ ; a23 = v1;
1291
+ std Y+a23+0, rVv+3 ; rol8(rVv)
1292
+ std Y+a23+1, rVv+0
1293
+ std Y+a23+2, rVv+1
1294
+ std Y+a23+3, rVv+2
1295
+
1296
+ ; c3 ^= v1;
1297
+ eor rC3+0, rVv+3
1298
+ eor rC3+1, rVv+0
1299
+ eor rC3+2, rVv+1
1300
+ eor rC3+3, rVv+2
1301
+
1302
+ ; c3 ^= a13 = ROTL32(a13, 1);
1303
+ lsl rTt+0
1304
+ rol rTt+1
1305
+ std Y+a13+1, rTt+1
1306
+ eor rC3+1, rTt+1
1307
+ rol rTt+2
1308
+ std Y+a13+2, rTt+2
1309
+ eor rC3+2, rTt+2
1310
+ rol rTt+3
1311
+ std Y+a13+3, rTt+3
1312
+ eor rC3+3, rTt+3
1313
+ adc rTt+0, zero
1314
+ std Y+a13+0, rTt+0
1315
+ eor rC3+0, rTt+0
1316
+
1317
+ ; Check for terminator
1318
+ lpm r0, Z
1319
+ inc r0
1320
+ breq Xoodoo_Done
1321
+ rjmp Xoodoo_RoundLoop
1322
+ Xoodoo_Done:
1323
+ pop r29
1324
+ pop r28
1325
+ pop r17
1326
+ pop r16
1327
+ pop r15
1328
+ pop r14
1329
+ pop r13
1330
+ pop r12
1331
+ pop r11
1332
+ pop r10
1333
+ pop r9
1334
+ pop r8
1335
+ pop r7
1336
+ pop r6
1337
+ pop r5
1338
+ pop r4
1339
+ pop r3
1340
+ pop r2
1341
+ ret