sleeping_kangaroo12 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (284) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +127 -0
  3. data/ext/Rakefile +73 -0
  4. data/ext/binding/sleeping_kangaroo12.c +39 -0
  5. data/ext/config/xkcp.build +17 -0
  6. data/ext/xkcp/LICENSE +1 -0
  7. data/ext/xkcp/Makefile +15 -0
  8. data/ext/xkcp/Makefile.build +200 -0
  9. data/ext/xkcp/README.markdown +296 -0
  10. data/ext/xkcp/lib/HighLevel.build +143 -0
  11. data/ext/xkcp/lib/LowLevel.build +757 -0
  12. data/ext/xkcp/lib/common/align.h +33 -0
  13. data/ext/xkcp/lib/common/brg_endian.h +143 -0
  14. data/ext/xkcp/lib/high/KangarooTwelve/KangarooTwelve.c +301 -0
  15. data/ext/xkcp/lib/high/KangarooTwelve/KangarooTwelve.h +97 -0
  16. data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.c +81 -0
  17. data/ext/xkcp/lib/high/Keccak/FIPS202/KeccakHash.h +125 -0
  18. data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.c +48 -0
  19. data/ext/xkcp/lib/high/Keccak/FIPS202/SimpleFIPS202.h +79 -0
  20. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.c +81 -0
  21. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.h +73 -0
  22. data/ext/xkcp/lib/high/Keccak/KeccakDuplex.inc +195 -0
  23. data/ext/xkcp/lib/high/Keccak/KeccakSponge.c +111 -0
  24. data/ext/xkcp/lib/high/Keccak/KeccakSponge.h +76 -0
  25. data/ext/xkcp/lib/high/Keccak/KeccakSponge.inc +314 -0
  26. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.c +61 -0
  27. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.h +67 -0
  28. data/ext/xkcp/lib/high/Keccak/PRG/KeccakPRG.inc +128 -0
  29. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.c +93 -0
  30. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.h +599 -0
  31. data/ext/xkcp/lib/high/Keccak/SP800-185/SP800-185.inc +573 -0
  32. data/ext/xkcp/lib/high/Ketje/Ketjev2.c +87 -0
  33. data/ext/xkcp/lib/high/Ketje/Ketjev2.h +88 -0
  34. data/ext/xkcp/lib/high/Ketje/Ketjev2.inc +274 -0
  35. data/ext/xkcp/lib/high/Keyak/Keyakv2.c +132 -0
  36. data/ext/xkcp/lib/high/Keyak/Keyakv2.h +217 -0
  37. data/ext/xkcp/lib/high/Keyak/Keyakv2.inc +81 -0
  38. data/ext/xkcp/lib/high/Keyak/Motorist.inc +953 -0
  39. data/ext/xkcp/lib/high/Kravatte/Kravatte.c +533 -0
  40. data/ext/xkcp/lib/high/Kravatte/Kravatte.h +115 -0
  41. data/ext/xkcp/lib/high/Kravatte/KravatteModes.c +557 -0
  42. data/ext/xkcp/lib/high/Kravatte/KravatteModes.h +247 -0
  43. data/ext/xkcp/lib/high/Xoodyak/Cyclist.h +66 -0
  44. data/ext/xkcp/lib/high/Xoodyak/Cyclist.inc +336 -0
  45. data/ext/xkcp/lib/high/Xoodyak/Xoodyak-parameters.h +26 -0
  46. data/ext/xkcp/lib/high/Xoodyak/Xoodyak.c +55 -0
  47. data/ext/xkcp/lib/high/Xoodyak/Xoodyak.h +35 -0
  48. data/ext/xkcp/lib/high/Xoofff/Xoofff.c +634 -0
  49. data/ext/xkcp/lib/high/Xoofff/Xoofff.h +147 -0
  50. data/ext/xkcp/lib/high/Xoofff/XoofffModes.c +483 -0
  51. data/ext/xkcp/lib/high/Xoofff/XoofffModes.h +241 -0
  52. data/ext/xkcp/lib/high/common/Phases.h +25 -0
  53. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-SnP.h +41 -0
  54. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-armcc.s +1666 -0
  55. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv6m-le-gcc.s +1655 -0
  56. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-armcc.s +1268 -0
  57. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7a-le-gcc.s +1264 -0
  58. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-armcc.s +1178 -0
  59. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-inplace-32bi-armv7m-le-gcc.s +1175 -0
  60. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-armcc.s +1338 -0
  61. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u1-32bi-armv6m-le-gcc.s +1336 -0
  62. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-armcc.s +1343 -0
  63. data/ext/xkcp/lib/low/KeccakP-1600/ARM/KeccakP-1600-u2-32bi-armv6m-le-gcc.s +1339 -0
  64. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-SnP.h +42 -0
  65. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-armcc.s +823 -0
  66. data/ext/xkcp/lib/low/KeccakP-1600/ARMv7A-NEON/KeccakP-1600-armv7a-le-neon-gcc.s +831 -0
  67. data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-SnP.h +31 -0
  68. data/ext/xkcp/lib/low/KeccakP-1600/ARMv8A/KeccakP-1600-armv8a-neon.s +540 -0
  69. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-SnP.h +42 -0
  70. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-compact.s +733 -0
  71. data/ext/xkcp/lib/low/KeccakP-1600/AVR8/KeccakP-1600-avr8-fast.s +1121 -0
  72. data/ext/xkcp/lib/low/KeccakP-1600/AVX2/KeccakP-1600-AVX2.s +1100 -0
  73. data/ext/xkcp/lib/low/KeccakP-1600/AVX2/KeccakP-1600-SnP.h +52 -0
  74. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-AVX512.c +623 -0
  75. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/KeccakP-1600-SnP.h +47 -0
  76. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u12/KeccakP-1600-AVX512-config.h +6 -0
  77. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/u6/KeccakP-1600-AVX512-config.h +6 -0
  78. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/C/ua/KeccakP-1600-AVX512-config.h +6 -0
  79. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-AVX512.s +1031 -0
  80. data/ext/xkcp/lib/low/KeccakP-1600/AVX512/KeccakP-1600-SnP.h +53 -0
  81. data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-SnP.h +44 -0
  82. data/ext/xkcp/lib/low/KeccakP-1600/XOP/KeccakP-1600-XOP.c +476 -0
  83. data/ext/xkcp/lib/low/KeccakP-1600/XOP/u6/KeccakP-1600-XOP-config.h +6 -0
  84. data/ext/xkcp/lib/low/KeccakP-1600/XOP/ua/KeccakP-1600-XOP-config.h +6 -0
  85. data/ext/xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-64.macros +748 -0
  86. data/ext/xkcp/lib/low/KeccakP-1600/common/KeccakP-1600-unrolling.macros +305 -0
  87. data/ext/xkcp/lib/low/KeccakP-1600/compact/KeccakP-1600-SnP.h +40 -0
  88. data/ext/xkcp/lib/low/KeccakP-1600/compact/KeccakP-1600-compact64.c +420 -0
  89. data/ext/xkcp/lib/low/KeccakP-1600/plain-32bits-inplace/KeccakP-1600-SnP.h +43 -0
  90. data/ext/xkcp/lib/low/KeccakP-1600/plain-32bits-inplace/KeccakP-1600-inplace32BI.c +1163 -0
  91. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/KeccakP-1600-SnP.h +54 -0
  92. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/KeccakP-1600-opt64.c +565 -0
  93. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcu6/KeccakP-1600-opt64-config.h +7 -0
  94. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua/KeccakP-1600-opt64-config.h +7 -0
  95. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/lcua-shld/KeccakP-1600-opt64-config.h +8 -0
  96. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/u6/KeccakP-1600-opt64-config.h +6 -0
  97. data/ext/xkcp/lib/low/KeccakP-1600/plain-64bits/ua/KeccakP-1600-opt64-config.h +6 -0
  98. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-SnP.h +44 -0
  99. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference.h +23 -0
  100. data/ext/xkcp/lib/low/KeccakP-1600/ref-32bits/KeccakP-1600-reference32BI.c +625 -0
  101. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-SnP.h +44 -0
  102. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.c +440 -0
  103. data/ext/xkcp/lib/low/KeccakP-1600/ref-64bits/KeccakP-1600-reference.h +23 -0
  104. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-SnP.h +42 -0
  105. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas.s +1196 -0
  106. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-gas_Apple.s +1124 -0
  107. data/ext/xkcp/lib/low/KeccakP-1600/x86-64/KeccakP-1600-x86-64-shld-gas.s +1196 -0
  108. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-armcc.s +1392 -0
  109. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-inplace-pl2-armv7a-neon-le-gcc.s +1394 -0
  110. data/ext/xkcp/lib/low/KeccakP-1600-times2/ARMv7A-NEON/KeccakP-1600-times2-SnP.h +42 -0
  111. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u12/SIMD512-2-config.h +7 -0
  112. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512u4/SIMD512-2-config.h +7 -0
  113. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/AVX512ufull/SIMD512-2-config.h +7 -0
  114. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SIMD512.c +850 -0
  115. data/ext/xkcp/lib/low/KeccakP-1600-times2/AVX512/KeccakP-1600-times2-SnP.h +51 -0
  116. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SIMD128.c +957 -0
  117. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SnP.h +49 -0
  118. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-u2/SIMD128-config.h +8 -0
  119. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/SSSE3-ua/SIMD128-config.h +8 -0
  120. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-u2/SIMD128-config.h +9 -0
  121. data/ext/xkcp/lib/low/KeccakP-1600-times2/SIMD128/XOP-ua/SIMD128-config.h +9 -0
  122. data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-SnP.h +45 -0
  123. data/ext/xkcp/lib/low/KeccakP-1600-times2/fallback-on1/KeccakP-1600-times2-on1.c +37 -0
  124. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SIMD256.c +1321 -0
  125. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/KeccakP-1600-times4-SnP.h +55 -0
  126. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u12/SIMD256-config.h +7 -0
  127. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/u6/SIMD256-config.h +7 -0
  128. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX2/ua/SIMD256-config.h +7 -0
  129. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u12/SIMD512-4-config.h +7 -0
  130. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512u4/SIMD512-4-config.h +7 -0
  131. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/AVX512ufull/SIMD512-4-config.h +7 -0
  132. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SIMD512.c +881 -0
  133. data/ext/xkcp/lib/low/KeccakP-1600-times4/AVX512/KeccakP-1600-times4-SnP.h +51 -0
  134. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-SnP.h +45 -0
  135. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on1/KeccakP-1600-times4-on1.c +37 -0
  136. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-SnP.h +45 -0
  137. data/ext/xkcp/lib/low/KeccakP-1600-times4/fallback-on2/KeccakP-1600-times4-on2.c +38 -0
  138. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SIMD512.c +1615 -0
  139. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/KeccakP-1600-times8-SnP.h +57 -0
  140. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u12/SIMD512-config.h +7 -0
  141. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/u4/SIMD512-config.h +7 -0
  142. data/ext/xkcp/lib/low/KeccakP-1600-times8/AVX512/ua/SIMD512-config.h +7 -0
  143. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-SnP.h +45 -0
  144. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on1/KeccakP-1600-times8-on1.c +37 -0
  145. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-SnP.h +45 -0
  146. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on2/KeccakP-1600-times8-on2.c +38 -0
  147. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-SnP.h +45 -0
  148. data/ext/xkcp/lib/low/KeccakP-1600-times8/fallback-on4/KeccakP-1600-times8-on4.c +38 -0
  149. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-SnP.h +41 -0
  150. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-armcc.s +442 -0
  151. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv6m-le-gcc.s +446 -0
  152. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-armcc.s +419 -0
  153. data/ext/xkcp/lib/low/KeccakP-200/ARM/KeccakP-200-armv7m-le-gcc.s +427 -0
  154. data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-SnP.h +41 -0
  155. data/ext/xkcp/lib/low/KeccakP-200/AVR8/KeccakP-200-avr8-fast.s +647 -0
  156. data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-SnP.h +39 -0
  157. data/ext/xkcp/lib/low/KeccakP-200/compact/KeccakP-200-compact.c +190 -0
  158. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-SnP.h +43 -0
  159. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.c +412 -0
  160. data/ext/xkcp/lib/low/KeccakP-200/ref/KeccakP-200-reference.h +23 -0
  161. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-SnP.h +41 -0
  162. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-armcc.s +454 -0
  163. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv6m-le-gcc.s +458 -0
  164. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-armcc.s +455 -0
  165. data/ext/xkcp/lib/low/KeccakP-400/ARM/KeccakP-400-armv7m-le-gcc.s +458 -0
  166. data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-SnP.h +41 -0
  167. data/ext/xkcp/lib/low/KeccakP-400/AVR8/KeccakP-400-avr8-fast.s +728 -0
  168. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-SnP.h +43 -0
  169. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.c +414 -0
  170. data/ext/xkcp/lib/low/KeccakP-400/ref/KeccakP-400-reference.h +23 -0
  171. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-SnP.h +42 -0
  172. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-armcc.s +527 -0
  173. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u1-armv6m-le-gcc.s +533 -0
  174. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-armcc.s +528 -0
  175. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv6m-le-gcc.s +534 -0
  176. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-armcc.s +521 -0
  177. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7a-le-gcc.s +527 -0
  178. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-armcc.s +517 -0
  179. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-u2-armv7m-le-gcc.s +523 -0
  180. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-armcc.s +550 -0
  181. data/ext/xkcp/lib/low/KeccakP-800/ARM/KeccakP-800-uf-armv7m-le-gcc.s +556 -0
  182. data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-SnP.h +32 -0
  183. data/ext/xkcp/lib/low/KeccakP-800/ARMv8A/KeccakP-800-armv8a-neon.s +432 -0
  184. data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-SnP.h +42 -0
  185. data/ext/xkcp/lib/low/KeccakP-800/AVR8/KeccakP-800-avr8-fast.s +929 -0
  186. data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-SnP.h +40 -0
  187. data/ext/xkcp/lib/low/KeccakP-800/compact/KeccakP-800-compact.c +244 -0
  188. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-SnP.h +46 -0
  189. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32-bis.macros +184 -0
  190. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.c +454 -0
  191. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-opt32.macros +459 -0
  192. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling-bis.macros +83 -0
  193. data/ext/xkcp/lib/low/KeccakP-800/plain/KeccakP-800-unrolling.macros +88 -0
  194. data/ext/xkcp/lib/low/KeccakP-800/plain/lcu2/KeccakP-800-opt32-config.h +7 -0
  195. data/ext/xkcp/lib/low/KeccakP-800/plain/lcua/KeccakP-800-opt32-config.h +7 -0
  196. data/ext/xkcp/lib/low/KeccakP-800/plain/u2/KeccakP-800-opt32-config.h +7 -0
  197. data/ext/xkcp/lib/low/KeccakP-800/plain/ua/KeccakP-800-opt32-config.h +7 -0
  198. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-SnP.h +44 -0
  199. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.c +437 -0
  200. data/ext/xkcp/lib/low/KeccakP-800/ref/KeccakP-800-reference.h +23 -0
  201. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/Ket.h +57 -0
  202. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-armcc.s +475 -0
  203. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeJr-armv7m-le-gcc.s +480 -0
  204. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-armcc.s +590 -0
  205. data/ext/xkcp/lib/low/Ketje/OptimizedAsmARM/KetjeSr-armv7m-le-gcc.s +590 -0
  206. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.c +126 -0
  207. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.h +68 -0
  208. data/ext/xkcp/lib/low/Ketje/OptimizedLE/Ket.inc +174 -0
  209. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.c +80 -0
  210. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.h +68 -0
  211. data/ext/xkcp/lib/low/Ketje/SnP-compliant/Ket.inc +142 -0
  212. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-SnP.h +55 -0
  213. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-armcc.s +1086 -0
  214. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-u1-armv6m-le-gcc.s +1092 -0
  215. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-armcc.s +721 -0
  216. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv6-le-gcc.s +726 -0
  217. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-armcc.s +723 -0
  218. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodoo-uf-armv7m-le-gcc.s +729 -0
  219. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-armcc.s +1164 -0
  220. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-u1-armv6m-le-gcc.s +1165 -0
  221. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-armcc.s +562 -0
  222. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv6-le-gcc.s +563 -0
  223. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-armcc.s +563 -0
  224. data/ext/xkcp/lib/low/Xoodoo/ARM/Xoodyak-uf-armv7m-le-gcc.s +565 -0
  225. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-SnP.h +55 -0
  226. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-armcc.s +476 -0
  227. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodoo-uf-armv7a-neon-le-gcc.s +485 -0
  228. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-armcc.s +362 -0
  229. data/ext/xkcp/lib/low/Xoodoo/ARMv7A-NEON/Xoodyak-uf-armv7a-neon-le-gcc.s +367 -0
  230. data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-SnP.h +43 -0
  231. data/ext/xkcp/lib/low/Xoodoo/AVR8/Xoodoo-avr8-u1.s +1341 -0
  232. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SIMD512.c +581 -0
  233. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodoo-SnP.h +58 -0
  234. data/ext/xkcp/lib/low/Xoodoo/AVX512/Xoodyak-full-block-SIMD512.c +332 -0
  235. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SIMD128.c +329 -0
  236. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodoo-SnP.h +53 -0
  237. data/ext/xkcp/lib/low/Xoodoo/SSE2/Xoodyak-full-block-SIMD128.c +355 -0
  238. data/ext/xkcp/lib/low/Xoodoo/Xoodoo.h +79 -0
  239. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-SnP.h +56 -0
  240. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodoo-optimized.c +399 -0
  241. data/ext/xkcp/lib/low/Xoodoo/plain/Xoodyak-full-blocks.c +127 -0
  242. data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-SnP.h +43 -0
  243. data/ext/xkcp/lib/low/Xoodoo/ref/Xoodoo-reference.c +253 -0
  244. data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SIMD512.c +1044 -0
  245. data/ext/xkcp/lib/low/Xoodoo-times16/AVX512/Xoodoo-times16-SnP.h +49 -0
  246. data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-SnP.h +45 -0
  247. data/ext/xkcp/lib/low/Xoodoo-times16/fallback-on1/Xoodoo-times16-on1.c +37 -0
  248. data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-ARMv7A.s +1587 -0
  249. data/ext/xkcp/lib/low/Xoodoo-times4/ARMv7A-NEON/Xoodoo-times4-SnP.h +48 -0
  250. data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SIMD512.c +1202 -0
  251. data/ext/xkcp/lib/low/Xoodoo-times4/AVX512/Xoodoo-times4-SnP.h +48 -0
  252. data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SIMD128.c +484 -0
  253. data/ext/xkcp/lib/low/Xoodoo-times4/SSSE3/Xoodoo-times4-SnP.h +44 -0
  254. data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-SnP.h +45 -0
  255. data/ext/xkcp/lib/low/Xoodoo-times4/fallback-on1/Xoodoo-times4-on1.c +37 -0
  256. data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SIMD256.c +939 -0
  257. data/ext/xkcp/lib/low/Xoodoo-times8/AVX2/Xoodoo-times8-SnP.h +49 -0
  258. data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SIMD512.c +1216 -0
  259. data/ext/xkcp/lib/low/Xoodoo-times8/AVX512/Xoodoo-times8-SnP.h +48 -0
  260. data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-SnP.h +45 -0
  261. data/ext/xkcp/lib/low/Xoodoo-times8/fallback-on1/Xoodoo-times8-on1.c +37 -0
  262. data/ext/xkcp/lib/low/common/PlSnP-Fallback.inc +290 -0
  263. data/ext/xkcp/lib/low/common/SnP-Relaned.h +141 -0
  264. data/ext/xkcp/support/Build/ExpandProducts.xsl +79 -0
  265. data/ext/xkcp/support/Build/ToGlobalMakefile.xsl +206 -0
  266. data/ext/xkcp/support/Build/ToOneTarget.xsl +89 -0
  267. data/ext/xkcp/support/Build/ToTargetConfigFile.xsl +37 -0
  268. data/ext/xkcp/support/Build/ToTargetMakefile.xsl +298 -0
  269. data/ext/xkcp/support/Build/ToVCXProj.xsl +198 -0
  270. data/ext/xkcp/support/Kernel-PMU/Kernel-pmu.md +133 -0
  271. data/ext/xkcp/support/Kernel-PMU/Makefile +8 -0
  272. data/ext/xkcp/support/Kernel-PMU/enable_arm_pmu.c +129 -0
  273. data/ext/xkcp/support/Kernel-PMU/load-module +1 -0
  274. data/ext/xkcp/util/KeccakSum/KeccakSum.c +394 -0
  275. data/ext/xkcp/util/KeccakSum/base64.c +86 -0
  276. data/ext/xkcp/util/KeccakSum/base64.h +12 -0
  277. data/lib/sleeping_kangaroo12/binding.rb +15 -0
  278. data/lib/sleeping_kangaroo12/build/loader.rb +40 -0
  279. data/lib/sleeping_kangaroo12/build/platform.rb +37 -0
  280. data/lib/sleeping_kangaroo12/build.rb +4 -0
  281. data/lib/sleeping_kangaroo12/digest.rb +103 -0
  282. data/lib/sleeping_kangaroo12/version.rb +5 -0
  283. data/lib/sleeping_kangaroo12.rb +7 -0
  284. metadata +372 -0
@@ -0,0 +1,1341 @@
1
+ ;
2
+ ; The eXtended Keccak Code Package (XKCP)
3
+ ; https://github.com/XKCP/XKCP
4
+ ;
5
+ ; The Xoodoo permutation, designed by Joan Daemen, Seth Hoffert, Gilles Van Assche and Ronny Van Keer.
6
+ ;
7
+ ; Implementation by Ronny Van Keer, hereby denoted as "the implementer".
8
+ ;
9
+ ; For more information, feedback or questions, please refer to the Keccak Team website:
10
+ ; https://keccak.team/
11
+ ;
12
+ ; To the extent possible under law, the implementer has waived all copyright
13
+ ; and related or neighboring rights to the source code in this file.
14
+ ; http://creativecommons.org/publicdomain/zero/1.0/
15
+ ;
16
+ ; ---
17
+ ;
18
+ ; This file implements Xoodoo in a SnP-compatible way.
19
+ ; Please refer to SnP-documentation.h for more details.
20
+ ;
21
+ ; This implementation comes with Xoodoo-SnP.h in the same folder.
22
+ ; Please refer to LowLevel.build for the exact list of other files it must be combined with.
23
+ ;
24
+
25
+ ; INFO: Tested on ATmega1280 simulator
26
+
27
+ ; Registers used in all routines
28
+ #define zero 1
29
+ #define rpState 24
30
+ #define rX 26
31
+ #define rY 28
32
+ #define rZ 30
33
+ #define sp 0x3D
34
+
35
+ ;----------------------------------------------------------------------------
36
+ ;
37
+ ; void Xoodoo_StaticInitialize( void )
38
+ ;
39
+ .global Xoodoo_StaticInitialize
40
+
41
+ ;----------------------------------------------------------------------------
42
+ ;
43
+ ; void Xoodoo_Initialize(void *state)
44
+ ;
45
+ ; argument state is passed in r24:r25
46
+ ;
47
+ .global Xoodoo_Initialize
48
+ Xoodoo_Initialize:
49
+ movw rZ, r24
50
+ ldi r23, 3*4/2 ; clear state (8 bytes / 2 lanes) per iteration
51
+ Xoodoo_Initialize_Loop:
52
+ st z+, zero
53
+ st z+, zero
54
+ st z+, zero
55
+ st z+, zero
56
+ st z+, zero
57
+ st z+, zero
58
+ st z+, zero
59
+ st z+, zero
60
+ dec r23
61
+ brne Xoodoo_Initialize_Loop
62
+ Xoodoo_StaticInitialize:
63
+ ret
64
+
65
+ ;----------------------------------------------------------------------------
66
+ ;
67
+ ; void Xoodoo_AddByte(void *state, unsigned char data, unsigned int offset)
68
+ ;
69
+ ; argument state is passed in r24:r25
70
+ ; argument data is passed in r22:r23, only LSB (r22) is used
71
+ ; argument offset is passed in r20:r21, only LSB (r20) is used
72
+ ;
73
+ .global Xoodoo_AddByte
74
+ Xoodoo_AddByte:
75
+ movw rZ, r24
76
+ add rZ, r20
77
+ adc rZ+1, zero
78
+ ld r0, Z
79
+ eor r0, r22
80
+ st Z, r0
81
+ ret
82
+
83
+ ;----------------------------------------------------------------------------
84
+ ;
85
+ ; void Xoodoo_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
86
+ ;
87
+ ; argument state is passed in r24:r25
88
+ ; argument data is passed in r22:r23
89
+ ; argument offset is passed in r20:r21, only LSB (r20) is used
90
+ ; argument length is passed in r18:r19, only LSB (r18) is used
91
+ ;
92
+ .global Xoodoo_AddBytes
93
+ Xoodoo_AddBytes:
94
+ movw rZ, r24
95
+ add rZ, r20
96
+ adc rZ+1, zero
97
+ movw rX, r22
98
+ subi r18, 8
99
+ brcs Xoodoo_AddBytes_Byte
100
+ ;do 8 bytes per iteration
101
+ Xoodoo_AddBytes_Loop8:
102
+ ld r21, X+
103
+ ld r0, Z
104
+ eor r0, r21
105
+ st Z+, r0
106
+ ld r21, X+
107
+ ld r0, Z
108
+ eor r0, r21
109
+ st Z+, r0
110
+ ld r21, X+
111
+ ld r0, Z
112
+ eor r0, r21
113
+ st Z+, r0
114
+ ld r21, X+
115
+ ld r0, Z
116
+ eor r0, r21
117
+ st Z+, r0
118
+ ld r21, X+
119
+ ld r0, Z
120
+ eor r0, r21
121
+ st Z+, r0
122
+ ld r21, X+
123
+ ld r0, Z
124
+ eor r0, r21
125
+ st Z+, r0
126
+ ld r21, X+
127
+ ld r0, Z
128
+ eor r0, r21
129
+ st Z+, r0
130
+ ld r21, X+
131
+ ld r0, Z
132
+ eor r0, r21
133
+ st Z+, r0
134
+ subi r18, 8
135
+ brcc Xoodoo_AddBytes_Loop8
136
+ Xoodoo_AddBytes_Byte:
137
+ ldi r19, 8
138
+ add r18, r19
139
+ breq Xoodoo_AddBytes_End
140
+ Xoodoo_AddBytes_Loop1:
141
+ ld r21, X+
142
+ ld r0, Z
143
+ eor r0, r21
144
+ st Z+, r0
145
+ dec r18
146
+ brne Xoodoo_AddBytes_Loop1
147
+ Xoodoo_AddBytes_End:
148
+ ret
149
+
150
+
151
+ ;----------------------------------------------------------------------------
152
+ ;
153
+ ; void Xoodoo_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
154
+ ;
155
+ ; argument state is passed in r24:r25
156
+ ; argument data is passed in r22:r23
157
+ ; argument offset is passed in r20:r21, only LSB (r20) is used
158
+ ; argument length is passed in r18:r19, only LSB (r18) is used
159
+ ;
160
+ .global Xoodoo_OverwriteBytes
161
+ Xoodoo_OverwriteBytes:
162
+ movw rZ, r24
163
+ add rZ, r20
164
+ adc rZ+1, zero
165
+ movw rX, r22
166
+ subi r18, 8
167
+ brcs Xoodoo_OverwriteBytes_Byte
168
+ ;do 8 bytes per iteration
169
+ Xoodoo_OverwriteBytes_Loop8:
170
+ ld r0, X+
171
+ st Z+, r0
172
+ ld r0, X+
173
+ st Z+, r0
174
+ ld r0, X+
175
+ st Z+, r0
176
+ ld r0, X+
177
+ st Z+, r0
178
+ ld r0, X+
179
+ st Z+, r0
180
+ ld r0, X+
181
+ st Z+, r0
182
+ ld r0, X+
183
+ st Z+, r0
184
+ ld r0, X+
185
+ st Z+, r0
186
+ subi r18, 8
187
+ brcc Xoodoo_OverwriteBytes_Loop8
188
+ Xoodoo_OverwriteBytes_Byte:
189
+ ldi r19, 8
190
+ add r18, r19
191
+ breq Xoodoo_OverwriteBytes_End
192
+ Xoodoo_OverwriteBytes_Loop1:
193
+ ld r0, X+
194
+ st Z+, r0
195
+ dec r18
196
+ brne Xoodoo_OverwriteBytes_Loop1
197
+ Xoodoo_OverwriteBytes_End:
198
+ ret
199
+
200
+ ;----------------------------------------------------------------------------
201
+ ;
202
+ ; void Xoodoo_OverwriteWithZeroes(void *state, unsigned int byteCount)
203
+ ;
204
+ ; argument state is passed in r24:r25
205
+ ; argument byteCount is passed in r22:r23, only LSB (r22) is used
206
+ ;
207
+ .global Xoodoo_OverwriteWithZeroes
208
+ Xoodoo_OverwriteWithZeroes:
209
+ movw rZ, r24 ; rZ = state
210
+ mov r23, r22
211
+ lsr r23
212
+ lsr r23
213
+ lsr r23
214
+ breq Xoodoo_OverwriteWithZeroes_Bytes
215
+ Xoodoo_OverwriteWithZeroes_LoopLanes:
216
+ st Z+, r1
217
+ st Z+, r1
218
+ st Z+, r1
219
+ st Z+, r1
220
+ st Z+, r1
221
+ st Z+, r1
222
+ st Z+, r1
223
+ st Z+, r1
224
+ dec r23
225
+ brne Xoodoo_OverwriteWithZeroes_LoopLanes
226
+ Xoodoo_OverwriteWithZeroes_Bytes:
227
+ andi r22, 7
228
+ breq Xoodoo_OverwriteWithZeroes_End
229
+ Xoodoo_OverwriteWithZeroes_LoopBytes:
230
+ st Z+, r1
231
+ dec r22
232
+ brne Xoodoo_OverwriteWithZeroes_LoopBytes
233
+ Xoodoo_OverwriteWithZeroes_End:
234
+ ret
235
+
236
+ ;----------------------------------------------------------------------------
237
+ ;
238
+ ; void Xoodoo_ExtractBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
239
+ ;
240
+ ; argument state is passed in r24:r25
241
+ ; argument data is passed in r22:r23
242
+ ; argument offset is passed in r20:r21, only LSB (r20) is used
243
+ ; argument length is passed in r18:r19, only LSB (r18) is used
244
+ ;
245
+ .global Xoodoo_ExtractBytes
246
+ Xoodoo_ExtractBytes:
247
+ movw rZ, r24
248
+ add rZ, r20
249
+ adc rZ+1, zero
250
+ movw rX, r22
251
+ subi r18, 8
252
+ brcs Xoodoo_ExtractBytes_Byte
253
+ ;do 8 bytes per iteration
254
+ Xoodoo_ExtractBytes_Loop8:
255
+ ld r0, Z+
256
+ st X+, r0
257
+ ld r0, Z+
258
+ st X+, r0
259
+ ld r0, Z+
260
+ st X+, r0
261
+ ld r0, Z+
262
+ st X+, r0
263
+ ld r0, Z+
264
+ st X+, r0
265
+ ld r0, Z+
266
+ st X+, r0
267
+ ld r0, Z+
268
+ st X+, r0
269
+ ld r0, Z+
270
+ st X+, r0
271
+ subi r18, 8
272
+ brcc Xoodoo_ExtractBytes_Loop8
273
+ Xoodoo_ExtractBytes_Byte:
274
+ ldi r19, 8
275
+ add r18, r19
276
+ breq Xoodoo_ExtractBytes_End
277
+ Xoodoo_ExtractBytes_Loop1:
278
+ ld r0, Z+
279
+ st X+, r0
280
+ dec r18
281
+ brne Xoodoo_ExtractBytes_Loop1
282
+ Xoodoo_ExtractBytes_End:
283
+ ret
284
+
285
+ ;----------------------------------------------------------------------------
286
+ ;
287
+ ; void Xoodoo_ExtractAndAddBytes(void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
288
+ ;
289
+ ; argument state is passed in r24:r25
290
+ ; argument input is passed in r22:r23
291
+ ; argument output is passed in r20:r21
292
+ ; argument offset is passed in r18:r19, only LSB (r18) is used
293
+ ; argument length is passed in r16:r17, only LSB (r16) is used
294
+ ;
295
+ .global Xoodoo_ExtractAndAddBytes
296
+ Xoodoo_ExtractAndAddBytes:
297
+ tst r16
298
+ breq Xoodoo_ExtractAndAddBytes_End
299
+ push r16
300
+ push r28
301
+ push r29
302
+ movw rZ, r24
303
+ add rZ, r18
304
+ adc rZ+1, zero
305
+ movw rX, r22
306
+ movw rY, r20
307
+ subi r16, 8
308
+ brcs Xoodoo_ExtractAndAddBytes_Byte
309
+ Xoodoo_ExtractAndAddBytes_LoopLane:
310
+ ld r21, Z+
311
+ ld r0, X+
312
+ eor r0, r21
313
+ st Y+, r0
314
+ ld r21, Z+
315
+ ld r0, X+
316
+ eor r0, r21
317
+ st Y+, r0
318
+ ld r21, Z+
319
+ ld r0, X+
320
+ eor r0, r21
321
+ st Y+, r0
322
+ ld r21, Z+
323
+ ld r0, X+
324
+ eor r0, r21
325
+ st Y+, r0
326
+ ld r21, Z+
327
+ ld r0, X+
328
+ eor r0, r21
329
+ st Y+, r0
330
+ ld r21, Z+
331
+ ld r0, X+
332
+ eor r0, r21
333
+ st Y+, r0
334
+ ld r21, Z+
335
+ ld r0, X+
336
+ eor r0, r21
337
+ st Y+, r0
338
+ ld r21, Z+
339
+ ld r0, X+
340
+ eor r0, r21
341
+ st Y+, r0
342
+ subi r16, 8
343
+ brcc Xoodoo_ExtractAndAddBytes_LoopLane
344
+ Xoodoo_ExtractAndAddBytes_Byte:
345
+ ldi r19, 8
346
+ add r16, r19
347
+ breq Xoodoo_ExtractAndAddBytes_Done
348
+ Xoodoo_ExtractAndAddBytes_Loop1:
349
+ ld r21, Z+
350
+ ld r0, X+
351
+ eor r0, r21
352
+ st Y+, r0
353
+ dec r16
354
+ brne Xoodoo_ExtractAndAddBytes_Loop1
355
+ Xoodoo_ExtractAndAddBytes_Done:
356
+ pop r29
357
+ pop r28
358
+ pop r16
359
+ Xoodoo_ExtractAndAddBytes_End:
360
+ ret
361
+
362
+ Xoodoo_RoundConstants_12:
363
+ .BYTE 0x58, 0x00
364
+ .BYTE 0x38, 0x00
365
+ .BYTE 0xC0, 0x03
366
+ .BYTE 0xD0, 0x00
367
+ .BYTE 0x20, 0x01
368
+ .BYTE 0x14, 0x00
369
+ Xoodoo_RoundConstants_6:
370
+ .BYTE 0x60, 0x00
371
+ .BYTE 0x2C, 0x00
372
+ .BYTE 0x80, 0x03
373
+ .BYTE 0xF0, 0x00
374
+ .BYTE 0xA0, 0x01
375
+ .BYTE 0x12, 0x00
376
+ Xoodoo_RoundConstants_0:
377
+ .BYTE 0xFF, 0 ; terminator
378
+
379
+ .text
380
+
381
+ ; Register variables used in permutation
382
+ #define rC0 2 // 4 regs (2-5)
383
+ #define rC1 6 // 4 regs (6-9)
384
+ #define rC2 10 // 4 regs (10-13)
385
+ #define rC3 14 // 4 regs (14-17)
386
+ #define rVv 18 // 4 regs (18-21)
387
+ #define rTt 22 // 4 regs (22-25)
388
+ // r26-27 free
389
+ #define a00 0
390
+ #define a01 4
391
+ #define a02 8
392
+ #define a03 12
393
+ #define a10 16
394
+ #define a11 20
395
+ #define a12 24
396
+ #define a13 28
397
+ #define a20 32
398
+ #define a21 36
399
+ #define a22 40
400
+ #define a23 44
401
+
402
+ ;----------------------------------------------------------------------------
403
+ ;
404
+ ; void Xoodoo_Permute_Nrounds( void *state, unsigned int nrounds )
405
+ ;
406
+ ; argument state is passed in r24:r25
407
+ ; argument nrounds is passed in r22:r23 (only LSB (r22) is used)
408
+ ;
409
+ .global Xoodoo_Permute_Nrounds
410
+ Xoodoo_Permute_Nrounds:
411
+ mov r26, r22
412
+ ldi rZ+0, lo8(Xoodoo_RoundConstants_0)
413
+ ldi rZ+1, hi8(Xoodoo_RoundConstants_0)
414
+ lsl r26
415
+ sub rZ, r26
416
+ sbc rZ+1, zero
417
+ rjmp Xoodoo_Permute
418
+
419
+ ;----------------------------------------------------------------------------
420
+ ;
421
+ ; void Xoodoo_Permute_6rounds( void *state )
422
+ ;
423
+ ; argument state is passed in r24:r25
424
+ ;
425
+ .global Xoodoo_Permute_6rounds
426
+ Xoodoo_Permute_6rounds:
427
+ ldi rZ+0, lo8(Xoodoo_RoundConstants_6)
428
+ ldi rZ+1, hi8(Xoodoo_RoundConstants_6)
429
+ rjmp Xoodoo_Permute
430
+
431
+ ;----------------------------------------------------------------------------
432
+ ;
433
+ ; void Xoodoo_Permute_12rounds( void *state )
434
+ ;
435
+ ; argument state is passed in r24:r25
436
+ ;
437
+ .global Xoodoo_Permute_12rounds
438
+ Xoodoo_Permute_12rounds:
439
+ ldi rZ+0, lo8(Xoodoo_RoundConstants_12)
440
+ ldi rZ+1, hi8(Xoodoo_RoundConstants_12)
441
+ Xoodoo_Permute:
442
+ push r2
443
+ push r3
444
+ push r4
445
+ push r5
446
+ push r6
447
+ push r7
448
+ push r8
449
+ push r9
450
+ push r10
451
+ push r11
452
+ push r12
453
+ push r13
454
+ push r14
455
+ push r15
456
+ push r16
457
+ push r17
458
+ push r28
459
+ push r29
460
+
461
+ ; Initial Prepare Theta
462
+ movw rY, rpState
463
+ ld rC0+0, Y+ ; a00
464
+ ld rC0+1, Y+
465
+ ld rC0+2, Y+
466
+ ld rC0+3, Y+
467
+ ld rC1+0, Y+ ; a01
468
+ ld rC1+1, Y+
469
+ ld rC1+2, Y+
470
+ ld rC1+3, Y+
471
+ ld rC2+0, Y+ ; a02
472
+ ld rC2+1, Y+
473
+ ld rC2+2, Y+
474
+ ld rC2+3, Y+
475
+ ld rC3+0, Y+ ; a03
476
+ ld rC3+1, Y+
477
+ ld rC3+2, Y+
478
+ ld rC3+3, Y+
479
+
480
+ ld r0, Y+ ; a10
481
+ eor rC0+0, r0
482
+ ld r0, Y+
483
+ eor rC0+1, r0
484
+ ld r0, Y+
485
+ eor rC0+2, r0
486
+ ld r0, Y+
487
+ eor rC0+3, r0
488
+ ld r0, Y+ ; a11
489
+ eor rC1+0, r0
490
+ ld r0, Y+
491
+ eor rC1+1, r0
492
+ ld r0, Y+
493
+ eor rC1+2, r0
494
+ ld r0, Y+
495
+ eor rC1+3, r0
496
+ ld r0, Y+ ; a12
497
+ eor rC2+0, r0
498
+ ld r0, Y+
499
+ eor rC2+1, r0
500
+ ld r0, Y+
501
+ eor rC2+2, r0
502
+ ld r0, Y+
503
+ eor rC2+3, r0
504
+ ld r0, Y+ ; a13
505
+ eor rC3+0, r0
506
+ ld r0, Y+
507
+ eor rC3+1, r0
508
+ ld r0, Y+
509
+ eor rC3+2, r0
510
+ ld r0, Y+
511
+ eor rC3+3, r0
512
+
513
+ ld r0, Y+ ; a20
514
+ eor rC0+0, r0
515
+ ld r0, Y+
516
+ eor rC0+1, r0
517
+ ld r0, Y+
518
+ eor rC0+2, r0
519
+ ld r0, Y+
520
+ eor rC0+3, r0
521
+ ld r0, Y+ ; a21
522
+ eor rC1+0, r0
523
+ ld r0, Y+
524
+ eor rC1+1, r0
525
+ ld r0, Y+
526
+ eor rC1+2, r0
527
+ ld r0, Y+
528
+ eor rC1+3, r0
529
+ ld r0, Y+ ; a22
530
+ eor rC2+0, r0
531
+ ld r0, Y+
532
+ eor rC2+1, r0
533
+ ld r0, Y+
534
+ eor rC2+2, r0
535
+ ld r0, Y+
536
+ eor rC2+3, r0
537
+ ld r0, Y+ ; a23
538
+ eor rC3+0, r0
539
+ ld r0, Y+
540
+ eor rC3+1, r0
541
+ ld r0, Y+
542
+ eor rC3+2, r0
543
+ ld r0, Y+
544
+ eor rC3+3, r0
545
+ sbiw rY, 48
546
+
547
+ Xoodoo_RoundLoop:
548
+ ; Theta + Rho west
549
+ ; c0 = ROTL32(c0 ^ ROTL32(c0, 9), 5);
550
+ mov rVv+1, rC0+0 ; rol 9
551
+ mov rVv+2, rC0+1
552
+ mov rVv+3, rC0+2
553
+ mov rVv+0, rC0+3
554
+ lsl rVv+0
555
+ rol rVv+1
556
+ rol rVv+2
557
+ rol rVv+3
558
+ adc rVv+0, zero
559
+ eor rVv+0, rC0+0
560
+ eor rVv+1, rC0+1
561
+ eor rVv+2, rC0+2
562
+ eor rVv+3, rC0+3
563
+ bst rVv, 0 ; rol 5 (= ror 3 + rol 8)
564
+ ror rVv+3
565
+ ror rVv+2
566
+ ror rVv+1
567
+ ror rVv
568
+ bld rVv+3, 7
569
+ bst rVv, 0
570
+ ror rVv+3
571
+ ror rVv+2
572
+ ror rVv+1
573
+ ror rVv
574
+ bld rVv+3, 7
575
+ bst rVv, 0
576
+ ror rVv+3
577
+ ror rVv+2
578
+ ror rVv+1
579
+ ror rVv
580
+ bld rVv+3, 7
581
+ mov rC0+0, rVv+3
582
+ mov rC0+1, rVv+0
583
+ mov rC0+2, rVv+1
584
+ mov rC0+3, rVv+2
585
+
586
+ ; c1 = ROTL32(c1 ^ ROTL32(c1, 9), 5);
587
+ mov rVv+1, rC1+0 ; rol 9
588
+ mov rVv+2, rC1+1
589
+ mov rVv+3, rC1+2
590
+ mov rVv+0, rC1+3
591
+ lsl rVv+0
592
+ rol rVv+1
593
+ rol rVv+2
594
+ rol rVv+3
595
+ adc rVv+0, zero
596
+ eor rVv+0, rC1+0
597
+ eor rVv+1, rC1+1
598
+ eor rVv+2, rC1+2
599
+ eor rVv+3, rC1+3
600
+ bst rVv, 0 ; rol 5 (= ror 3 + rol 8)
601
+ ror rVv+3
602
+ ror rVv+2
603
+ ror rVv+1
604
+ ror rVv
605
+ bld rVv+3, 7
606
+ bst rVv, 0
607
+ ror rVv+3
608
+ ror rVv+2
609
+ ror rVv+1
610
+ ror rVv
611
+ bld rVv+3, 7
612
+ bst rVv, 0
613
+ ror rVv+3
614
+ ror rVv+2
615
+ ror rVv+1
616
+ ror rVv
617
+ bld rVv+3, 7
618
+ mov rC1+0, rVv+3
619
+ mov rC1+1, rVv+0
620
+ mov rC1+2, rVv+1
621
+ mov rC1+3, rVv+2
622
+
623
+ ; c2 = ROTL32(c2 ^ ROTL32(c2, 9), 5);
624
+ mov rVv+1, rC2+0 ; rol 9
625
+ mov rVv+2, rC2+1
626
+ mov rVv+3, rC2+2
627
+ mov rVv+0, rC2+3
628
+ lsl rVv+0
629
+ rol rVv+1
630
+ rol rVv+2
631
+ rol rVv+3
632
+ adc rVv+0, zero
633
+ eor rVv+0, rC2+0
634
+ eor rVv+1, rC2+1
635
+ eor rVv+2, rC2+2
636
+ eor rVv+3, rC2+3
637
+ bst rVv, 0 ; rol 5 (= ror 3 + rol 8)
638
+ ror rVv+3
639
+ ror rVv+2
640
+ ror rVv+1
641
+ ror rVv
642
+ bld rVv+3, 7
643
+ bst rVv, 0
644
+ ror rVv+3
645
+ ror rVv+2
646
+ ror rVv+1
647
+ ror rVv
648
+ bld rVv+3, 7
649
+ bst rVv, 0
650
+ ror rVv+3
651
+ ror rVv+2
652
+ ror rVv+1
653
+ ror rVv
654
+ bld rVv+3, 7
655
+ mov rC2+0, rVv+3
656
+ mov rC2+1, rVv+0
657
+ mov rC2+2, rVv+1
658
+ mov rC2+3, rVv+2
659
+
660
+ ; c3 = ROTL32(c3 ^ ROTL32(c3, 9), 5);
661
+ mov rVv+1, rC3+0 ; rol 9
662
+ mov rVv+2, rC3+1
663
+ mov rVv+3, rC3+2
664
+ mov rVv+0, rC3+3
665
+ lsl rVv+0
666
+ rol rVv+1
667
+ rol rVv+2
668
+ rol rVv+3
669
+ adc rVv+0, zero
670
+ eor rVv+0, rC3+0
671
+ eor rVv+1, rC3+1
672
+ eor rVv+2, rC3+2
673
+ eor rVv+3, rC3+3
674
+ bst rVv, 0 ; rol 5 (= ror 3 + rol 8)
675
+ ror rVv+3
676
+ ror rVv+2
677
+ ror rVv+1
678
+ ror rVv
679
+ bld rVv+3, 7
680
+ bst rVv, 0
681
+ ror rVv+3
682
+ ror rVv+2
683
+ ror rVv+1
684
+ ror rVv
685
+ bld rVv+3, 7
686
+ bst rVv, 0
687
+ ror rVv+3
688
+ ror rVv+2
689
+ ror rVv+1
690
+ ror rVv
691
+ bld rVv+3, 7
692
+ mov rC3+0, rVv+3
693
+ mov rC3+1, rVv+0
694
+ mov rC3+2, rVv+1
695
+ mov rC3+3, rVv+2
696
+
697
+ ; v1 = a13;
698
+ ldd rVv+0, Y+a13+0
699
+ ldd rVv+1, Y+a13+1
700
+ ldd rVv+2, Y+a13+2
701
+ ldd rVv+3, Y+a13+3
702
+
703
+ ; a13 = a12 ^ c1;
704
+ ldd r0, Y+a12+0
705
+ eor r0, rC1+0
706
+ std Y+a13+0, r0
707
+ ldd r0, Y+a12+1
708
+ eor r0, rC1+1
709
+ std Y+a13+1, r0
710
+ ldd r0, Y+a12+2
711
+ eor r0, rC1+2
712
+ std Y+a13+2, r0
713
+ ldd r0, Y+a12+3
714
+ eor r0, rC1+3
715
+ std Y+a13+3, r0
716
+
717
+ ; a12 = a11 ^ c0;
718
+ ldd r0, Y+a11+0
719
+ eor r0, rC0+0
720
+ std Y+a12+0, r0
721
+ ldd r0, Y+a11+1
722
+ eor r0, rC0+1
723
+ std Y+a12+1, r0
724
+ ldd r0, Y+a11+2
725
+ eor r0, rC0+2
726
+ std Y+a12+2, r0
727
+ ldd r0, Y+a11+3
728
+ eor r0, rC0+3
729
+ std Y+a12+3, r0
730
+
731
+ ; a11 = a10 ^ c3;
732
+ ldd r0, Y+a10+0
733
+ eor r0, rC3+0
734
+ std Y+a11+0, r0
735
+ ldd r0, Y+a10+1
736
+ eor r0, rC3+1
737
+ std Y+a11+1, r0
738
+ ldd r0, Y+a10+2
739
+ eor r0, rC3+2
740
+ std Y+a11+2, r0
741
+ ldd r0, Y+a10+3
742
+ eor r0, rC3+3
743
+ std Y+a11+3, r0
744
+
745
+ ; a10 = v1 ^ c2;
746
+ eor rVv+0, rC2+0
747
+ std Y+a10+0, rVv+0
748
+ eor rVv+1, rC2+1
749
+ std Y+a10+1, rVv+1
750
+ eor rVv+2, rC2+2
751
+ std Y+a10+2, rVv+2
752
+ eor rVv+3, rC2+3
753
+ std Y+a10+3, rVv+3
754
+
755
+ ; a20 = ROTL32(a20 ^ c3, 11);
756
+ ldd rVv+0, Y+a20+3
757
+ eor rVv+0, rC3+3
758
+ ldd rVv+1, Y+a20+0
759
+ eor rVv+1, rC3+0
760
+ ldd rVv+2, Y+a20+1
761
+ eor rVv+2, rC3+1
762
+ ldd rVv+3, Y+a20+2
763
+ eor rVv+3, rC3+2
764
+ lsl rVv+0
765
+ rol rVv+1
766
+ rol rVv+2
767
+ rol rVv+3
768
+ adc rVv+0, zero
769
+ lsl rVv+0
770
+ rol rVv+1
771
+ rol rVv+2
772
+ rol rVv+3
773
+ adc rVv+0, zero
774
+ lsl rVv+0
775
+ rol rVv+1
776
+ rol rVv+2
777
+ rol rVv+3
778
+ adc rVv+0, zero
779
+ std Y+a20+0, rVv+0
780
+ std Y+a20+1, rVv+1
781
+ std Y+a20+2, rVv+2
782
+ std Y+a20+3, rVv+3
783
+
784
+ ; a21 = ROTL32(a21 ^ c0, 11);
785
+ ldd rVv+0, Y+a21+3
786
+ eor rVv+0, rC0+3
787
+ ldd rVv+1, Y+a21+0
788
+ eor rVv+1, rC0+0
789
+ ldd rVv+2, Y+a21+1
790
+ eor rVv+2, rC0+1
791
+ ldd rVv+3, Y+a21+2
792
+ eor rVv+3, rC0+2
793
+ lsl rVv+0
794
+ rol rVv+1
795
+ rol rVv+2
796
+ rol rVv+3
797
+ adc rVv+0, zero
798
+ lsl rVv+0
799
+ rol rVv+1
800
+ rol rVv+2
801
+ rol rVv+3
802
+ adc rVv+0, zero
803
+ lsl rVv+0
804
+ rol rVv+1
805
+ rol rVv+2
806
+ rol rVv+3
807
+ adc rVv+0, zero
808
+ std Y+a21+0, rVv+0
809
+ std Y+a21+1, rVv+1
810
+ std Y+a21+2, rVv+2
811
+ std Y+a21+3, rVv+3
812
+
813
+ ; a22 = ROTL32(a22 ^ c1, 11);
814
+ ldd rVv+0, Y+a22+3
815
+ eor rVv+0, rC1+3
816
+ ldd rVv+1, Y+a22+0
817
+ eor rVv+1, rC1+0
818
+ ldd rVv+2, Y+a22+1
819
+ eor rVv+2, rC1+1
820
+ ldd rVv+3, Y+a22+2
821
+ eor rVv+3, rC1+2
822
+ lsl rVv+0
823
+ rol rVv+1
824
+ rol rVv+2
825
+ rol rVv+3
826
+ adc rVv+0, zero
827
+ lsl rVv+0
828
+ rol rVv+1
829
+ rol rVv+2
830
+ rol rVv+3
831
+ adc rVv+0, zero
832
+ lsl rVv+0
833
+ rol rVv+1
834
+ rol rVv+2
835
+ rol rVv+3
836
+ adc rVv+0, zero
837
+ std Y+a22+0, rVv+0
838
+ std Y+a22+1, rVv+1
839
+ std Y+a22+2, rVv+2
840
+ std Y+a22+3, rVv+3
841
+
842
+ ; a23 = ROTL32(a23 ^ c2, 11);
843
+ ldd rVv+0, Y+a23+3
844
+ eor rVv+0, rC2+3
845
+ ldd rVv+1, Y+a23+0
846
+ eor rVv+1, rC2+0
847
+ ldd rVv+2, Y+a23+1
848
+ eor rVv+2, rC2+1
849
+ ldd rVv+3, Y+a23+2
850
+ eor rVv+3, rC2+2
851
+ lsl rVv+0
852
+ rol rVv+1
853
+ rol rVv+2
854
+ rol rVv+3
855
+ adc rVv+0, zero
856
+ lsl rVv+0
857
+ rol rVv+1
858
+ rol rVv+2
859
+ rol rVv+3
860
+ adc rVv+0, zero
861
+ lsl rVv+0
862
+ rol rVv+1
863
+ rol rVv+2
864
+ rol rVv+3
865
+ adc rVv+0, zero
866
+ std Y+a23+0, rVv+0
867
+ std Y+a23+1, rVv+1
868
+ std Y+a23+2, rVv+2
869
+ std Y+a23+3, rVv+3
870
+
871
+ ; v1 = c3;
872
+ movw rVv+0, rC3+0
873
+ movw rVv+2, rC3+2
874
+
875
+ ; c3 = a03 ^ c2; /* a03 resides in c3 */
876
+ ldd rC3+0, Y+a03+0
877
+ eor rC3+0, rC2+0
878
+ ldd rC3+1, Y+a03+1
879
+ eor rC3+1, rC2+1
880
+ ldd rC3+2, Y+a03+2
881
+ eor rC3+2, rC2+2
882
+ ldd rC3+3, Y+a03+3
883
+ eor rC3+3, rC2+3
884
+
885
+ ; c2 = a02 ^ c1; /* a02 resides in c2 */
886
+ ldd rC2+0, Y+a02+0
887
+ eor rC2+0, rC1+0
888
+ ldd rC2+1, Y+a02+1
889
+ eor rC2+1, rC1+1
890
+ ldd rC2+2, Y+a02+2
891
+ eor rC2+2, rC1+2
892
+ ldd rC2+3, Y+a02+3
893
+ eor rC2+3, rC1+3
894
+
895
+ ; c1 = a01 ^ c0; /* a01 resides in c1 */
896
+ ldd rC1+0, Y+a01+0
897
+ eor rC1+0, rC0+0
898
+ ldd rC1+1, Y+a01+1
899
+ eor rC1+1, rC0+1
900
+ ldd rC1+2, Y+a01+2
901
+ eor rC1+2, rC0+2
902
+ ldd rC1+3, Y+a01+3
903
+ eor rC1+3, rC0+3
904
+
905
+ ; c0 = a00 ^ v1; /* a00 resides in c0 */
906
+ ldd rC0+0, Y+a00+0
907
+ eor rC0+0, rVv+0
908
+ ldd rC0+1, Y+a00+1
909
+ eor rC0+1, rVv+1
910
+ ldd rC0+2, Y+a00+2
911
+ eor rC0+2, rVv+2
912
+ ldd rC0+3, Y+a00+3
913
+ eor rC0+3, rVv+3
914
+
915
+ ; c0 ^= __rc; /* +Iota */
916
+ lpm rVv+0, Z+
917
+ lpm rVv+1, Z+
918
+ eor rC0+0, rVv+0
919
+ eor rC0+1, rVv+1
920
+
921
+ ; Chi + Rho east + Early Theta
922
+ ; a00 = c0 ^= ~a10 & a20;
923
+ ldd r0, Y+a10+0
924
+ com r0
925
+ ldd rTt+0, Y+a20+0 ; a20 in rTt
926
+ and r0, rTt+0
927
+ eor rC0+0, r0
928
+ std Y+a00+0, rC0+0
929
+ ldd r0, Y+a10+1
930
+ com r0
931
+ ldd rTt+1, Y+a20+1
932
+ and r0, rTt+1
933
+ eor rC0+1, r0
934
+ std Y+a00+1, rC0+1
935
+ ldd r0, Y+a10+2
936
+ com r0
937
+ ldd rTt+2, Y+a20+2
938
+ and r0, rTt+2
939
+ eor rC0+2, r0
940
+ std Y+a00+2, rC0+2
941
+ ldd r0, Y+a10+3
942
+ com r0
943
+ ldd rTt+3, Y+a20+3
944
+ and r0, rTt+3
945
+ eor rC0+3, r0
946
+ std Y+a00+3, rC0+3
947
+
948
+ ; a10 ^= ~a20 & c0;
949
+ com rTt+0
950
+ and rTt+0, rC0+0
951
+ ldd r0, Y+a10+0
952
+ eor rTt+0, r0 ; new a10 in rTt
953
+ std Y+a10+0, rTt+0
954
+ com rTt+1
955
+ and rTt+1, rC0+1
956
+ ldd r0, Y+a10+1
957
+ eor rTt+1, r0
958
+ std Y+a10+1, rTt+1
959
+ com rTt+2
960
+ and rTt+2, rC0+2
961
+ ldd r0, Y+a10+2
962
+ eor rTt+2, r0
963
+ std Y+a10+2, rTt+2
964
+ com rTt+3
965
+ and rTt+3, rC0+3
966
+ ldd r0, Y+a10+3
967
+ eor rTt+3, r0
968
+ std Y+a10+3, rTt+3
969
+
970
+ ; v1(a20) = ROTL32(a20 ^ ~c0 & a10, 8);
971
+ movw rVv+0, rTt+0 ; a10 in rVv
972
+ movw rVv+2, rTt+2
973
+ mov r0, rC0+0
974
+ com r0
975
+ and rTt+0, r0
976
+ ldd r0, Y+a20+0
977
+ eor rTt+0, r0
978
+
979
+ mov r0, rC0+1
980
+ com r0
981
+ and rTt+1, r0
982
+ ldd r0, Y+a20+1
983
+ eor rTt+1, r0
984
+
985
+ mov r0, rC0+2
986
+ com r0
987
+ and rTt+2, r0
988
+ ldd r0, Y+a20+2
989
+ eor rTt+2, r0
990
+
991
+ mov r0, rC0+3
992
+ com r0
993
+ and rTt+3, r0
994
+ ldd r0, Y+a20+3
995
+ eor rTt+3, r0
996
+ std Y+a20+0, rTt+3
997
+ std Y+a20+1, rTt+0
998
+ std Y+a20+2, rTt+1
999
+ std Y+a20+3, rTt+2
1000
+
1001
+ ; c0 ^= a10 = ROTL32(a10, 1);
1002
+ lsl rVv+0
1003
+ rol rVv+1
1004
+ std Y+a10+1, rVv+1
1005
+ eor rC0+1, rVv+1
1006
+ rol rVv+2
1007
+ std Y+a10+2, rVv+2
1008
+ eor rC0+2, rVv+2
1009
+ rol rVv+3
1010
+ std Y+a10+3, rVv+3
1011
+ eor rC0+3, rVv+3
1012
+ adc rVv+0, zero
1013
+ std Y+a10+0, rVv+0
1014
+ eor rC0+0, rVv+0
1015
+
1016
+ ; a02 = c2 ^= ~a12 & a22;
1017
+ ldd r0, Y+a12+0
1018
+ com r0
1019
+ ldd rVv+0, Y+a22+0 ; a22 in rVv
1020
+ and r0, rVv+0
1021
+ eor rC2+0, r0
1022
+ std Y+a02+0, rC2+0
1023
+ ldd r0, Y+a12+1
1024
+ com r0
1025
+ ldd rVv+1, Y+a22+1
1026
+ and r0, rVv+1
1027
+ eor rC2+1, r0
1028
+ std Y+a02+1, rC2+1
1029
+ ldd r0, Y+a12+2
1030
+ com r0
1031
+ ldd rVv+2, Y+a22+2
1032
+ and r0, rVv+2
1033
+ eor rC2+2, r0
1034
+ std Y+a02+2, rC2+2
1035
+ ldd r0, Y+a12+3
1036
+ com r0
1037
+ ldd rVv+3, Y+a22+3
1038
+ and r0, rVv+3
1039
+ eor rC2+3, r0
1040
+ std Y+a02+3, rC2+3
1041
+
1042
+ ; a12 ^= ~a22 & c2;
1043
+ mov r0, rVv+0 ; a12 in rTt
1044
+ com r0
1045
+ and r0, rC2+0
1046
+ ldd rTt+0, Y+a12+0
1047
+ eor rTt+0, r0
1048
+ std Y+a12+0, rTt+0
1049
+ mov r0, rVv+1
1050
+ com r0
1051
+ and r0, rC2+1
1052
+ ldd rTt+1, Y+a12+1
1053
+ eor rTt+1, r0
1054
+ std Y+a12+1, rTt+1
1055
+ mov r0, rVv+2
1056
+ com r0
1057
+ and r0, rC2+2
1058
+ ldd rTt+2, Y+a12+2
1059
+ eor rTt+2, r0
1060
+ std Y+a12+2, rTt+2
1061
+ mov r0, rVv+3
1062
+ com r0
1063
+ and r0, rC2+3
1064
+ ldd rTt+3, Y+a12+3
1065
+ eor rTt+3, r0
1066
+ std Y+a12+3, rTt+3
1067
+
1068
+ ; c0 ^= a20 = ROTL32(a22 ^ ~c2 & a12, 8);
1069
+ mov r0, rC2+0
1070
+ com r0
1071
+ and r0, rTt+0
1072
+ eor r0, rVv+0
1073
+ ldd rVv+0, Y+a20+1 ; rVv = a22
1074
+ std Y+a20+1, r0
1075
+ eor rC0+1, r0
1076
+ mov r0, rC2+1
1077
+ com r0
1078
+ and r0, rTt+1
1079
+ eor r0, rVv+1
1080
+ ldd rVv+1, Y+a20+2
1081
+ std Y+a20+2, r0
1082
+ eor rC0+2, r0
1083
+ mov r0, rC2+2
1084
+ com r0
1085
+ and r0, rTt+2
1086
+ eor r0, rVv+2
1087
+ ldd rVv+2, Y+a20+3
1088
+ std Y+a20+3, r0
1089
+ eor rC0+3, r0
1090
+ mov r0, rC2+3
1091
+ com r0
1092
+ and r0, rTt+3
1093
+ eor r0, rVv+3
1094
+ ldd rVv+3, Y+a20+0
1095
+ std Y+a20+0, r0
1096
+ eor rC0+0, r0
1097
+
1098
+ ; c2 ^= a12 = ROTL32(a12, 1);
1099
+ lsl rTt+0
1100
+ rol rTt+1
1101
+ eor rC2+1, rTt+1
1102
+ std Y+a12+1, rTt+1
1103
+ rol rTt+2
1104
+ eor rC2+2, rTt+2
1105
+ std Y+a12+2, rTt+2
1106
+ rol rTt+3
1107
+ eor rC2+3, rTt+3
1108
+ std Y+a12+3, rTt+3
1109
+ adc rTt+0, zero
1110
+ eor rC2+0, rTt+0
1111
+ std Y+a12+0, rTt+0
1112
+
1113
+ ; a22 = v1;
1114
+ std Y+a22+0, rVv+3
1115
+ std Y+a22+1, rVv+0
1116
+ std Y+a22+2, rVv+1
1117
+ std Y+a22+3, rVv+2
1118
+
1119
+ ; c2 ^= v1;
1120
+ eor rC2+0, rVv+3
1121
+ eor rC2+1, rVv+0
1122
+ eor rC2+2, rVv+1
1123
+ eor rC2+3, rVv+2
1124
+
1125
+ ; a01 = c1 ^= ~a11 & a21;
1126
+ ldd rTt+0, Y+a11+0 ;rTt holds a11
1127
+ mov r0, rTt+0
1128
+ com r0
1129
+ ldd rVv+0, Y+a21+0 ;rVv holds a21
1130
+ and r0, rVv+0
1131
+ eor rC1+0, r0
1132
+ std Y+a01+0, rC1+0
1133
+ ldd rTt+1, Y+a11+1
1134
+ mov r0, rTt+1
1135
+ com r0
1136
+ ldd rVv+1, Y+a21+1
1137
+ and r0, rVv+1
1138
+ eor rC1+1, r0
1139
+ std Y+a01+1, rC1+1
1140
+ ldd rTt+2, Y+a11+2
1141
+ mov r0, rTt+2
1142
+ com r0
1143
+ ldd rVv+2, Y+a21+2
1144
+ and r0, rVv+2
1145
+ eor rC1+2, r0
1146
+ std Y+a01+2, rC1+2
1147
+ ldd rTt+3, Y+a11+3
1148
+ mov r0, rTt+3
1149
+ com r0
1150
+ ldd rVv+3, Y+a21+3
1151
+ and r0, rVv+3
1152
+ eor rC1+3, r0
1153
+ std Y+a01+3, rC1+3
1154
+
1155
+ ; a11 ^= ~a21 & c1;
1156
+ mov r0, rVv+0
1157
+ com r0
1158
+ and r0, rC1+0
1159
+ eor rTt+0, r0
1160
+ std Y+a11+0, rTt+0
1161
+ mov r0, rVv+1
1162
+ com r0
1163
+ and r0, rC1+1
1164
+ eor rTt+1, r0
1165
+ std Y+a11+1, rTt+1
1166
+ mov r0, rVv+2
1167
+ com r0
1168
+ and r0, rC1+2
1169
+ eor rTt+2, r0
1170
+ std Y+a11+2, rTt+2
1171
+ mov r0, rVv+3
1172
+ com r0
1173
+ and r0, rC1+3
1174
+ eor rTt+3, r0
1175
+ std Y+a11+3, rTt+3
1176
+
1177
+ ; v1 = ROTL32(a21 ^ ~c1 & a11, 8);
1178
+ mov r0, rC1+0
1179
+ com r0
1180
+ and r0, rTt+0
1181
+ eor rVv+0, r0 ; v1 not yet ROTL32'ed(8)
1182
+ mov r0, rC1+1
1183
+ com r0
1184
+ and r0, rTt+1
1185
+ eor rVv+1, r0
1186
+ mov r0, rC1+2
1187
+ com r0
1188
+ and r0, rTt+2
1189
+ eor rVv+2, r0
1190
+ mov r0, rC1+3
1191
+ com r0
1192
+ and r0, rTt+3
1193
+ eor rVv+3, r0
1194
+
1195
+ ; c1 ^= a11 = ROTL32(a11, 1);
1196
+ lsl rTt+0
1197
+ rol rTt+1
1198
+ eor rC1+1, rTt+1
1199
+ std Y+a11+1, rTt+1
1200
+ rol rTt+2
1201
+ eor rC1+2, rTt+2
1202
+ std Y+a11+2, rTt+2
1203
+ rol rTt+3
1204
+ eor rC1+3, rTt+3
1205
+ std Y+a11+3, rTt+3
1206
+ adc rTt+0, zero
1207
+ eor rC1+0, rTt+0
1208
+ std Y+a11+0, rTt+0
1209
+
1210
+ ; a03 = c3 ^= ~a13 & a23;
1211
+ ldd r0, Y+a13+0
1212
+ com r0
1213
+ ldd rTt+0, Y+a23+0 ; a23 in rTt
1214
+ and r0, rTt+0
1215
+ eor rC3+0, r0
1216
+ std Y+a03+0, rC3+0
1217
+ ldd r0, Y+a13+1
1218
+ com r0
1219
+ ldd rTt+1, Y+a23+1
1220
+ and r0, rTt+1
1221
+ eor rC3+1, r0
1222
+ std Y+a03+1, rC3+1
1223
+ ldd r0, Y+a13+2
1224
+ com r0
1225
+ ldd rTt+2, Y+a23+2
1226
+ and r0, rTt+2
1227
+ eor rC3+2, r0
1228
+ std Y+a03+2, rC3+2
1229
+ ldd r0, Y+a13+3
1230
+ com r0
1231
+ ldd rTt+3, Y+a23+3
1232
+ and r0, rTt+3
1233
+ eor rC3+3, r0
1234
+ std Y+a03+3, rC3+3
1235
+
1236
+ ; a13 ^= ~a23 & c3;
1237
+ mov r0, rTt+0
1238
+ com r0
1239
+ and r0, rC3+0
1240
+ ldd rTt+0, Y+a13+0 ; a13 in rTt
1241
+ eor rTt+0, r0
1242
+ mov r0, rTt+1
1243
+ com r0
1244
+ and r0, rC3+1
1245
+ ldd rTt+1, Y+a13+1
1246
+ eor rTt+1, r0
1247
+ mov r0, rTt+2
1248
+ com r0
1249
+ and r0, rC3+2
1250
+ ldd rTt+2, Y+a13+2
1251
+ eor rTt+2, r0
1252
+ mov r0, rTt+3
1253
+ com r0
1254
+ and r0, rC3+3
1255
+ ldd rTt+3, Y+a13+3
1256
+ eor rTt+3, r0
1257
+
1258
+ ; c1 ^= a21 = ROTL32(a23 ^ ~c3 & a13, 8);
1259
+ push rVv
1260
+ mov r0, rC3+0
1261
+ com r0
1262
+ and r0, rTt+0
1263
+ ldd rVv, Y+a23+0
1264
+ eor r0, rVv
1265
+ eor rC1+1, r0
1266
+ std Y+a21+1, r0
1267
+ mov r0, rC3+1
1268
+ com r0
1269
+ and r0, rTt+1
1270
+ ldd rVv, Y+a23+1
1271
+ eor r0, rVv
1272
+ eor rC1+2, r0
1273
+ std Y+a21+2, r0
1274
+ mov r0, rC3+2
1275
+ com r0
1276
+ and r0, rTt+2
1277
+ ldd rVv, Y+a23+2
1278
+ eor r0, rVv
1279
+ eor rC1+3, r0
1280
+ std Y+a21+3, r0
1281
+ mov r0, rC3+3
1282
+ com r0
1283
+ and r0, rTt+3
1284
+ ldd rVv, Y+a23+3
1285
+ eor r0, rVv
1286
+ eor rC1+0, r0
1287
+ std Y+a21+0, r0
1288
+ pop rVv
1289
+
1290
+ ; a23 = v1;
1291
+ std Y+a23+0, rVv+3 ; rol8(rVv)
1292
+ std Y+a23+1, rVv+0
1293
+ std Y+a23+2, rVv+1
1294
+ std Y+a23+3, rVv+2
1295
+
1296
+ ; c3 ^= v1;
1297
+ eor rC3+0, rVv+3
1298
+ eor rC3+1, rVv+0
1299
+ eor rC3+2, rVv+1
1300
+ eor rC3+3, rVv+2
1301
+
1302
+ ; c3 ^= a13 = ROTL32(a13, 1);
1303
+ lsl rTt+0
1304
+ rol rTt+1
1305
+ std Y+a13+1, rTt+1
1306
+ eor rC3+1, rTt+1
1307
+ rol rTt+2
1308
+ std Y+a13+2, rTt+2
1309
+ eor rC3+2, rTt+2
1310
+ rol rTt+3
1311
+ std Y+a13+3, rTt+3
1312
+ eor rC3+3, rTt+3
1313
+ adc rTt+0, zero
1314
+ std Y+a13+0, rTt+0
1315
+ eor rC3+0, rTt+0
1316
+
1317
+ ; Check for terminator
1318
+ lpm r0, Z
1319
+ inc r0
1320
+ breq Xoodoo_Done
1321
+ rjmp Xoodoo_RoundLoop
1322
+ Xoodoo_Done:
1323
+ pop r29
1324
+ pop r28
1325
+ pop r17
1326
+ pop r16
1327
+ pop r15
1328
+ pop r14
1329
+ pop r13
1330
+ pop r12
1331
+ pop r11
1332
+ pop r10
1333
+ pop r9
1334
+ pop r8
1335
+ pop r7
1336
+ pop r6
1337
+ pop r5
1338
+ pop r4
1339
+ pop r3
1340
+ pop r2
1341
+ ret