sha3-ruby 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (186) hide show
  1. data/.gitignore +17 -0
  2. data/Gemfile +4 -0
  3. data/LICENSE.txt +22 -0
  4. data/README.md +29 -0
  5. data/Rakefile +7 -0
  6. data/ext/sha3/KeccakReferenceAndOptimized/KeccakCompact.vcproj +207 -0
  7. data/ext/sha3/KeccakReferenceAndOptimized/KeccakCompact8.vcproj +207 -0
  8. data/ext/sha3/KeccakReferenceAndOptimized/KeccakInplace.vcproj +203 -0
  9. data/ext/sha3/KeccakReferenceAndOptimized/KeccakInplace32BI.vcproj +201 -0
  10. data/ext/sha3/KeccakReferenceAndOptimized/KeccakOptimized32.vcproj +267 -0
  11. data/ext/sha3/KeccakReferenceAndOptimized/KeccakOptimized64.vcproj +267 -0
  12. data/ext/sha3/KeccakReferenceAndOptimized/KeccakReference.vcproj +243 -0
  13. data/ext/sha3/KeccakReferenceAndOptimized/KeccakReference32BI.vcproj +243 -0
  14. data/ext/sha3/KeccakReferenceAndOptimized/KeccakReferenceAndOptimized.sln +62 -0
  15. data/ext/sha3/KeccakReferenceAndOptimized/KeccakSimple.vcproj +203 -0
  16. data/ext/sha3/KeccakReferenceAndOptimized/KeccakSimple32BI.vcproj +201 -0
  17. data/ext/sha3/KeccakReferenceAndOptimized/Sources/AVR8-rotate64.h +27 -0
  18. data/ext/sha3/KeccakReferenceAndOptimized/Sources/AVR8-rotate64.s +285 -0
  19. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-avr8-settings.h +2 -0
  20. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-avr8-test.c +142 -0
  21. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-avr8-util.h +15 -0
  22. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-avr8-util.s +119 -0
  23. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-avr8.c +184 -0
  24. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-avr8.h +25 -0
  25. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-compact-settings.h +3 -0
  26. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-compact-test.c +317 -0
  27. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-compact.c +341 -0
  28. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-compact.h +50 -0
  29. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-compact8-settings.h +2 -0
  30. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-compact8-test.c +192 -0
  31. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-compact8.c +375 -0
  32. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-compact8.h +47 -0
  33. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-crypto_hash-inplace-armgcc-ARMv7A-NEON.s +406 -0
  34. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-inplace-minimal-test.c +231 -0
  35. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-inplace-settings.h +3 -0
  36. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-inplace-test.c +221 -0
  37. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-inplace.c +445 -0
  38. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-inplace32BI-armgcc-ARMv6M.s +844 -0
  39. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-inplace32BI-armgcc-ARMv7A.s +687 -0
  40. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-inplace32BI-armgcc-ARMv7M.s +687 -0
  41. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-inplace32BI.c +849 -0
  42. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-simple-settings.h +3 -0
  43. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-simple-test.c +221 -0
  44. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-simple.c +403 -0
  45. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-simple32BI.c +673 -0
  46. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakDuplex.c +68 -0
  47. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakDuplex.h +59 -0
  48. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-32-rvk.macros +555 -0
  49. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-32-s1.macros +1187 -0
  50. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-32-s2.macros +1187 -0
  51. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-32.macros +26 -0
  52. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-64.macros +728 -0
  53. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-arm.c +123 -0
  54. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-armcc.s +653 -0
  55. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-armgcc.s +686 -0
  56. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-avr8.c +163 -0
  57. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-avr8asm-compact.s +647 -0
  58. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-avr8asm-fast.s +934 -0
  59. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-inplace-armgcc-ARMv7A-NEON.s +446 -0
  60. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-int-set.h +6 -0
  61. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-interface.h +46 -0
  62. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-opt32-settings.h +4 -0
  63. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-opt32.c +524 -0
  64. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-opt64-settings.h +7 -0
  65. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-opt64.c +504 -0
  66. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-reference.c +300 -0
  67. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-reference.h +20 -0
  68. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-reference.o +0 -0
  69. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-reference32BI.c +371 -0
  70. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-simd128.macros +651 -0
  71. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-simd64.macros +517 -0
  72. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-unrolling.macros +124 -0
  73. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-x86-64-asm.c +62 -0
  74. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-x86-64-gas.s +766 -0
  75. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-x86-64-shld-gas.s +766 -0
  76. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-xop.macros +573 -0
  77. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakNISTInterface.c +81 -0
  78. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakNISTInterface.h +70 -0
  79. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakNISTInterface.o +0 -0
  80. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakSponge.c +266 -0
  81. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakSponge.h +76 -0
  82. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakSponge.o +0 -0
  83. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccakc1024-crypto_hash-inplace-armgcc-ARMv7A-NEON.s +296 -0
  84. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccakc512-crypto_hash-inplace-armgcc-ARMv7A-NEON.s +429 -0
  85. data/ext/sha3/KeccakReferenceAndOptimized/Sources/brg_endian.h +142 -0
  86. data/ext/sha3/KeccakReferenceAndOptimized/Sources/crypto_hash.h +0 -0
  87. data/ext/sha3/KeccakReferenceAndOptimized/Sources/displayIntermediateValues.c +117 -0
  88. data/ext/sha3/KeccakReferenceAndOptimized/Sources/displayIntermediateValues.h +29 -0
  89. data/ext/sha3/KeccakReferenceAndOptimized/Sources/displayIntermediateValues.o +0 -0
  90. data/ext/sha3/KeccakReferenceAndOptimized/Sources/genKAT.c +692 -0
  91. data/ext/sha3/KeccakReferenceAndOptimized/Sources/mainARM.c +88 -0
  92. data/ext/sha3/KeccakReferenceAndOptimized/Sources/mainOptimized.c +23 -0
  93. data/ext/sha3/KeccakReferenceAndOptimized/Sources/mainReference.c +381 -0
  94. data/ext/sha3/KeccakReferenceAndOptimized/Sources/timing.c +436 -0
  95. data/ext/sha3/KeccakReferenceAndOptimized/Sources/timing.h +13 -0
  96. data/ext/sha3/KeccakReferenceAndOptimized/TreeHashing/Sources/DoublePermutation-config.h +2 -0
  97. data/ext/sha3/KeccakReferenceAndOptimized/TreeHashing/Sources/DoublePermutation.c +572 -0
  98. data/ext/sha3/KeccakReferenceAndOptimized/TreeHashing/Sources/DoublePermutation.h +38 -0
  99. data/ext/sha3/KeccakReferenceAndOptimized/TreeHashing/Sources/KeccakF-1600-unrolling.macros +124 -0
  100. data/ext/sha3/KeccakReferenceAndOptimized/TreeHashing/Sources/Keccakc256TreeD2.c +81 -0
  101. data/ext/sha3/KeccakReferenceAndOptimized/TreeHashing/Sources/Keccakc256TreeD2.h +18 -0
  102. data/ext/sha3/KeccakReferenceAndOptimized/TreeHashing/Sources/Keccakc512TreeD2.c +81 -0
  103. data/ext/sha3/KeccakReferenceAndOptimized/TreeHashing/Sources/Keccakc512TreeD2.h +18 -0
  104. data/ext/sha3/KeccakReferenceAndOptimized/TreeHashing/Sources/crypto_hash.h +0 -0
  105. data/ext/sha3/KeccakReferenceAndOptimized/TreeHashing/Sources/mainOptimized.c +112 -0
  106. data/ext/sha3/KeccakReferenceAndOptimized/TreeHashing/Sources/timing-Double.c +225 -0
  107. data/ext/sha3/KeccakReferenceAndOptimized/TreeHashing/Sources/timing-Double.h +20 -0
  108. data/ext/sha3/KeccakReferenceAndOptimized/TreeHashing/makefile +68 -0
  109. data/ext/sha3/KeccakReferenceAndOptimized/bin/KeccakCompact +0 -0
  110. data/ext/sha3/KeccakReferenceAndOptimized/bin/KeccakOptimized32 +0 -0
  111. data/ext/sha3/KeccakReferenceAndOptimized/bin/KeccakOptimized64 +0 -0
  112. data/ext/sha3/KeccakReferenceAndOptimized/bin/KeccakReference +0 -0
  113. data/ext/sha3/KeccakReferenceAndOptimized/bin/KeccakReference32BI +0 -0
  114. data/ext/sha3/KeccakReferenceAndOptimized/bin/KeccakSimple +0 -0
  115. data/ext/sha3/KeccakReferenceAndOptimized/bin/KeccakSimple32BI +0 -0
  116. data/ext/sha3/KeccakReferenceAndOptimized/bin/compact/Keccak-compact-test.o +0 -0
  117. data/ext/sha3/KeccakReferenceAndOptimized/bin/compact/Keccak-compact.o +0 -0
  118. data/ext/sha3/KeccakReferenceAndOptimized/bin/optimized32/KeccakDuplex.o +0 -0
  119. data/ext/sha3/KeccakReferenceAndOptimized/bin/optimized32/KeccakF-1600-opt32.o +0 -0
  120. data/ext/sha3/KeccakReferenceAndOptimized/bin/optimized32/KeccakNISTInterface.o +0 -0
  121. data/ext/sha3/KeccakReferenceAndOptimized/bin/optimized32/KeccakSponge.o +0 -0
  122. data/ext/sha3/KeccakReferenceAndOptimized/bin/optimized32/genKAT.o +0 -0
  123. data/ext/sha3/KeccakReferenceAndOptimized/bin/optimized32/mainOptimized.o +0 -0
  124. data/ext/sha3/KeccakReferenceAndOptimized/bin/optimized32/timing.o +0 -0
  125. data/ext/sha3/KeccakReferenceAndOptimized/bin/optimized64/KeccakDuplex.o +0 -0
  126. data/ext/sha3/KeccakReferenceAndOptimized/bin/optimized64/KeccakF-1600-opt64.o +0 -0
  127. data/ext/sha3/KeccakReferenceAndOptimized/bin/optimized64/KeccakNISTInterface.o +0 -0
  128. data/ext/sha3/KeccakReferenceAndOptimized/bin/optimized64/KeccakSponge.o +0 -0
  129. data/ext/sha3/KeccakReferenceAndOptimized/bin/optimized64/genKAT.o +0 -0
  130. data/ext/sha3/KeccakReferenceAndOptimized/bin/optimized64/mainOptimized.o +0 -0
  131. data/ext/sha3/KeccakReferenceAndOptimized/bin/optimized64/timing.o +0 -0
  132. data/ext/sha3/KeccakReferenceAndOptimized/bin/reference/KeccakDuplex.o +0 -0
  133. data/ext/sha3/KeccakReferenceAndOptimized/bin/reference/KeccakF-1600-reference.o +0 -0
  134. data/ext/sha3/KeccakReferenceAndOptimized/bin/reference/KeccakNISTInterface.o +0 -0
  135. data/ext/sha3/KeccakReferenceAndOptimized/bin/reference/KeccakSponge.o +0 -0
  136. data/ext/sha3/KeccakReferenceAndOptimized/bin/reference/displayIntermediateValues.o +0 -0
  137. data/ext/sha3/KeccakReferenceAndOptimized/bin/reference/genKAT.o +0 -0
  138. data/ext/sha3/KeccakReferenceAndOptimized/bin/reference/mainReference.o +0 -0
  139. data/ext/sha3/KeccakReferenceAndOptimized/bin/reference32bi/KeccakDuplex.o +0 -0
  140. data/ext/sha3/KeccakReferenceAndOptimized/bin/reference32bi/KeccakF-1600-reference32BI.o +0 -0
  141. data/ext/sha3/KeccakReferenceAndOptimized/bin/reference32bi/KeccakNISTInterface.o +0 -0
  142. data/ext/sha3/KeccakReferenceAndOptimized/bin/reference32bi/KeccakSponge.o +0 -0
  143. data/ext/sha3/KeccakReferenceAndOptimized/bin/reference32bi/displayIntermediateValues.o +0 -0
  144. data/ext/sha3/KeccakReferenceAndOptimized/bin/reference32bi/genKAT.o +0 -0
  145. data/ext/sha3/KeccakReferenceAndOptimized/bin/reference32bi/mainReference.o +0 -0
  146. data/ext/sha3/KeccakReferenceAndOptimized/bin/simple/Keccak-simple-test.o +0 -0
  147. data/ext/sha3/KeccakReferenceAndOptimized/bin/simple/Keccak-simple.o +0 -0
  148. data/ext/sha3/KeccakReferenceAndOptimized/bin/simple32BI/Keccak-simple-test.o +0 -0
  149. data/ext/sha3/KeccakReferenceAndOptimized/bin/simple32BI/Keccak-simple32BI.o +0 -0
  150. data/ext/sha3/KeccakReferenceAndOptimized/compile64.bat +1 -0
  151. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/checksum-keccak +1 -0
  152. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/checksum-keccakc1024 +1 -0
  153. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/checksum-keccakc256 +1 -0
  154. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/checksum-keccakc256treed2 +1 -0
  155. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/checksum-keccakc448 +1 -0
  156. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/checksum-keccakc512 +1 -0
  157. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/checksum-keccakc512treed2 +1 -0
  158. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/checksum-keccakc768 +1 -0
  159. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/hash-keccak.c +11 -0
  160. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/hash-keccakc1024.c +11 -0
  161. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/hash-keccakc256.c +11 -0
  162. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/hash-keccakc448.c +11 -0
  163. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/hash-keccakc512.c +11 -0
  164. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/hash-keccakc768.c +11 -0
  165. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/int-set-keccak.h +1 -0
  166. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/int-set-keccakc1024.h +1 -0
  167. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/int-set-keccakc256.h +1 -0
  168. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/int-set-keccakc448.h +1 -0
  169. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/int-set-keccakc512.h +1 -0
  170. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/int-set-keccakc768.h +1 -0
  171. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/populate.py +506 -0
  172. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/simple-keccak.h +2 -0
  173. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/simple-keccakc1024.h +2 -0
  174. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/simple-keccakc256.h +2 -0
  175. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/simple-keccakc448.h +2 -0
  176. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/simple-keccakc512.h +2 -0
  177. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/simple-keccakc768.h +2 -0
  178. data/ext/sha3/KeccakReferenceAndOptimized/makefile +327 -0
  179. data/ext/sha3/Makefile +240 -0
  180. data/ext/sha3/depend +28 -0
  181. data/ext/sha3/extconf.rb +21 -0
  182. data/ext/sha3/sha3.c +95 -0
  183. data/lib/sha3-ruby.rb +27 -0
  184. data/lib/sha3-ruby/version.rb +5 -0
  185. data/sha3-ruby.gemspec +21 -0
  186. metadata +233 -0
@@ -0,0 +1,296 @@
1
+ @ The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
2
+ @ Michaël Peeters and Gilles Van Assche. For more information, feedback or
3
+ @ questions, please refer to our website: http://keccak.noekeon.org/
4
+ @
5
+ @ Implementation by Ronny Van Keer, hereby denoted as "the implementer".
6
+ @
7
+ @ To the extent possible under law, the implementer has waived all copyright
8
+ @ and related or neighboring rights to the source code in this file.
9
+ @ http://creativecommons.org/publicdomain/zero/1.0/
10
+
11
+ @ This file was created from a .asm file
12
+ @ using the ads2gas.pl script.
13
+ .equ DO1STROUNDING, 0
14
+
15
+ @ PRESERVE8
16
+ .text
17
+
18
+ @// --- defines
19
+ .equ cKeccakLaneSizeInBytes, 8
20
+ .equ cKeccakR_SizeInBytes , 576/8
21
+ .equ crypto_hash_BYTES , cKeccakR_SizeInBytes @// populate.py, please set crypto_hash_BYTES
22
+
23
+ @// --- offsets in state
24
+ .equ Aba, 0*8
25
+ .equ Aga, 1*8
26
+ .equ Aka, 2*8
27
+ .equ Ama, 3*8
28
+ .equ Asa, 4*8
29
+
30
+
31
+ @// --- code
32
+
33
+ .align 8
34
+
35
+ .global KeccakF_armv7a_neon_asm
36
+
37
+ @//int crypto_hash( unsigned char *out, const unsigned char *in, unsigned long long inlen )
38
+ .global crypto_hash_keccakc1024_inplace_armv7a_neon
39
+
40
+ crypto_hash_keccakc1024_inplace_armv7a_neon: @
41
+
42
+ push {r4-r6,lr}
43
+ mov r5, r0
44
+ vpush {q4-q7}
45
+
46
+ @//allocate and clear state
47
+ pld [sp, #-5*8] @//preload state data
48
+ vmov.i64 q6, #0
49
+ vpush {d12}
50
+ vmov.i64 q7, #0
51
+ vmov.i64 q8, #0
52
+ vpush {d12}
53
+ vmov.i64 q9, #0
54
+ vmov.i64 q10, #0
55
+ vpush {d12}
56
+ vmov.i64 q11, #0
57
+ vmov.i64 q12, #0
58
+ vpush {d12}
59
+ vmov.i64 q13, #0
60
+ vmov.i64 q14, #0
61
+ vpush {d12}
62
+ vmov.i64 q15, #0
63
+
64
+ subs r2, r2, #cKeccakR_SizeInBytes
65
+ bcc crypto_hash_LoopEnd
66
+ pld [r1] @//preload in data
67
+
68
+ @// Complete rate loop
69
+ crypto_hash_Loop:
70
+
71
+ vldr d7, [sp, #Aba]
72
+ vldm r1!, { d0-d1 }
73
+ veor.64 d7, d0
74
+ vldm r1!, { d2-d3 }
75
+ veor.64 d12, d1
76
+ vldm r1!, { d0-d1 }
77
+ veor.64 d17, d2
78
+ vldr d8, [sp, #Aga]
79
+ veor.64 d22, d3
80
+ vldm r1!, { d2-d3 }
81
+ veor.64 d27, d0
82
+ vstr d7, [sp, #Aba]
83
+ veor.64 d8, d1
84
+ vldm r1!, { d0 }
85
+ veor.64 d13, d2
86
+ vldr d9, [sp, #Aka]
87
+ veor.64 d18, d3
88
+ vldr d10, [sp, #Ama]
89
+ veor.64 d23, d0
90
+ vldr d11, [sp, #Asa]
91
+ vstr d8, [sp, #Aga]
92
+
93
+ veor.64 q4, q5
94
+ veor.64 d5, d8, d9
95
+ veor.64 d5, d5, d7
96
+
97
+ bl KeccakF_armv7a_neon_asm
98
+ subs r2, r2, #cKeccakR_SizeInBytes
99
+ bcs crypto_hash_Loop
100
+ crypto_hash_LoopEnd:
101
+
102
+ adds r2, r2, #cKeccakR_SizeInBytes
103
+ vldm sp, { d7-d11 } @get 5 lanes from stack
104
+ cmp r2, #8
105
+ blo crypto_hashd10IncompleteLane
106
+
107
+ cmp r2, #4*8
108
+ bhs crypto_hash_4LanesOrMore
109
+
110
+ @1 to 3 lanes left
111
+ vld1.64 d0, [r1]!
112
+ cmp r2, #2*8
113
+ veor.64 d7, d0
114
+ blo crypto_hash_FinalizeCompleteLanes
115
+ vld1.64 d0, [r1]!
116
+ cmp r2, #3*8
117
+ veor.64 d12, d0
118
+ blo crypto_hash_FinalizeCompleteLanes
119
+ vld1.64 d0, [r1]!
120
+ veor.64 d17, d0
121
+ b crypto_hash_FinalizeCompleteLanes
122
+
123
+ crypto_hash_4LanesOrMore:
124
+ vldm r1!, { d0-d1 }
125
+ veor.64 d7, d0
126
+ vldm r1!, { d2-d3 }
127
+ veor.64 d12, d1
128
+ veor.64 d17, d2
129
+ veor.64 d22, d3
130
+
131
+ cmp r2, #5*8
132
+ blo crypto_hash_FinalizeCompleteLanes
133
+ vld1.64 d0, [r1]!
134
+ cmp r2, #6*8
135
+ veor.64 d27, d0
136
+ blo crypto_hash_FinalizeCompleteLanes
137
+ vld1.64 d0, [r1]!
138
+ cmp r2, #7*8
139
+ veor.64 d8, d0
140
+ blo crypto_hash_FinalizeCompleteLanes
141
+ vld1.64 d0, [r1]!
142
+ cmp r2, #8*8
143
+ veor.64 d13, d0
144
+ blo crypto_hash_FinalizeCompleteLanes
145
+ vld1.64 d0, [r1]!
146
+ veor.64 d18, d0
147
+
148
+ crypto_hash_FinalizeCompleteLanes:
149
+
150
+
151
+ crypto_hashd10IncompleteLane:
152
+ and r0, r2, #cKeccakLaneSizeInBytes-1
153
+ vmov.i8 d0, #0xFF @padding
154
+ lsr r2, r2, #3 @//number of lanes left
155
+
156
+ vshr.u64 d0, #63 @padding
157
+ adr r3, crypto_hash_IncompleteLaneTable
158
+ ldr pc, [r3, r0, LSL #2]
159
+
160
+ crypto_hash_IncompleteLaneTable:
161
+ .long crypto_hash_IncompleteDone @0 left
162
+ .long crypto_hash_1left
163
+ .long crypto_hash_2left
164
+ .long crypto_hash_3left
165
+ .long crypto_hash_4left
166
+ .long crypto_hash_5left
167
+ .long crypto_hash_6left
168
+ .long crypto_hash_7left
169
+
170
+ crypto_hash_1left:
171
+ vshl.u64 d0, d0, #8
172
+ vld1.8 d0[0], [r1]!
173
+ b crypto_hash_IncompleteDone
174
+
175
+ crypto_hash_2left:
176
+ vshl.u64 d0, d0, #16
177
+ vld1.16 d0[0], [r1]!
178
+ b crypto_hash_IncompleteDone
179
+
180
+ crypto_hash_3left:
181
+ vshl.u64 d0, d0, #24
182
+ vld1.16 d0[0], [r1]!
183
+ vld1.8 d0[2], [r1]!
184
+ b crypto_hash_IncompleteDone
185
+
186
+ crypto_hash_4left:
187
+ vshl.u64 d0, d0, #32
188
+ vld1.32 d0[0], [r1]!
189
+ b crypto_hash_IncompleteDone
190
+
191
+ crypto_hash_5left:
192
+ vshl.u64 d0, d0, #40
193
+ vld1.32 d0[0], [r1]!
194
+ vld1.8 d0[4], [r1]!
195
+ b crypto_hash_IncompleteDone
196
+
197
+ crypto_hash_6left:
198
+ vshl.u64 d0, d0, #48
199
+ vld1.32 d0[0], [r1]!
200
+ vld1.16 d0[2], [r1]!
201
+ b crypto_hash_IncompleteDone
202
+
203
+ crypto_hash_7left:
204
+ vshl.u64 d0, d0, #56
205
+ vld1.32 d0[0], [r1]!
206
+ vld1.16 d0[2], [r1]!
207
+ vld1.8 d0[6], [r1]!
208
+
209
+
210
+ crypto_hash_IncompleteDone:
211
+ adr r3, crypto_hash_xorlastLaneTable
212
+ ldr pc, [r3, r2, LSL #2]
213
+
214
+ crypto_hash_xorlastLaneTable:
215
+ .long crypto_hash_xorLane1
216
+ .long crypto_hash_xorLane2
217
+ .long crypto_hash_xorLane3
218
+ .long crypto_hash_xorLane4
219
+ .long crypto_hash_xorLane5
220
+ .long crypto_hash_xorLane6
221
+ .long crypto_hash_xorLane7
222
+ .long crypto_hash_xorLane8
223
+ .long crypto_hash_xorLane9
224
+
225
+ crypto_hash_xorLane1:
226
+ veor.64 d7, d0
227
+ b crypto_hash_xorLastBitOfRate
228
+
229
+ crypto_hash_xorLane2:
230
+ veor.64 d12, d0
231
+ b crypto_hash_xorLastBitOfRate
232
+
233
+ crypto_hash_xorLane3:
234
+ veor.64 d17, d0
235
+ b crypto_hash_xorLastBitOfRate
236
+
237
+ crypto_hash_xorLane4:
238
+ veor.64 d22, d0
239
+ b crypto_hash_xorLastBitOfRate
240
+
241
+ crypto_hash_xorLane5:
242
+ veor.64 d27, d0
243
+ b crypto_hash_xorLastBitOfRate
244
+
245
+ crypto_hash_xorLane6:
246
+ veor.64 d8, d0
247
+ b crypto_hash_xorLastBitOfRate
248
+
249
+ crypto_hash_xorLane7:
250
+ veor.64 d13, d0
251
+ b crypto_hash_xorLastBitOfRate
252
+
253
+ crypto_hash_xorLane8:
254
+ veor.64 d18, d0
255
+ b crypto_hash_xorLastBitOfRate
256
+
257
+ crypto_hash_xorLane9:
258
+ veor.64 d23, d0
259
+
260
+ crypto_hash_xorLastBitOfRate:
261
+ vmov.i8 d3, #0xFF
262
+ vshl.u64 d3, d3, #63
263
+ veor.64 d23, d23, d3
264
+
265
+ vstm sp, { d7-d8 } @put 2 lanes back on stack (others not modified)
266
+
267
+ veor.64 q4, q5
268
+ veor.64 d5, d8, d9
269
+ veor.64 d5, d5, d7
270
+
271
+ bl KeccakF_armv7a_neon_asm
272
+
273
+ pld [r5] @//preload out data for write
274
+
275
+ vpop { d0-d1 }
276
+ vst1.64 d0, [r5]!
277
+ vst1.64 d12, [r5]!
278
+ vst1.64 d17, [r5]!
279
+ vst1.64 d22, [r5]!
280
+ vst1.64 d27, [r5]!
281
+ vst1.64 d1, [r5]!
282
+ vst1.64 d13, [r5]!
283
+ vst1.64 d18, [r5]!
284
+ .if crypto_hash_BYTES == cKeccakR_SizeInBytes
285
+ vst1.64 d23, [r5]!
286
+ .endif
287
+
288
+ adds sp, sp, #3*8 @no need of last stacked lanes in output
289
+
290
+ vpop {q4-q7}
291
+ movs r0, #0
292
+ pop {r4-r6,pc}
293
+
294
+ @
295
+ .align 8
296
+
@@ -0,0 +1,429 @@
1
+ @ The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
2
+ @ Michaël Peeters and Gilles Van Assche. For more information, feedback or
3
+ @ questions, please refer to our website: http://keccak.noekeon.org/
4
+ @
5
+ @ Implementation by Ronny Van Keer, hereby denoted as "the implementer".
6
+ @
7
+ @ To the extent possible under law, the implementer has waived all copyright
8
+ @ and related or neighboring rights to the source code in this file.
9
+ @ http://creativecommons.org/publicdomain/zero/1.0/
10
+
11
+ @ This file was created from a .asm file
12
+ @ using the ads2gas.pl script.
13
+ .equ DO1STROUNDING, 0
14
+
15
+ @ PRESERVE8
16
+ .text
17
+
18
+ @// --- defines
19
+ .equ cKeccakLaneSizeInBytes, 8
20
+ .equ cKeccakR_SizeInBytes , 1088/8
21
+ .equ crypto_hash_BYTES , cKeccakR_SizeInBytes @// populate.py, please set crypto_hash_BYTES
22
+
23
+ @// --- offsets in state
24
+ .equ Aba, 0*8
25
+ .equ Aga, 1*8
26
+ .equ Aka, 2*8
27
+ .equ Ama, 3*8
28
+ .equ Asa, 4*8
29
+
30
+
31
+ @// --- code
32
+
33
+ .align 8
34
+
35
+ .global KeccakF_armv7a_neon_asm
36
+
37
+ @//int crypto_hash( unsigned char *out, const unsigned char *in, unsigned long long inlen )
38
+ .global crypto_hash_keccakc512_inplace_armv7a_neon
39
+
40
+ crypto_hash_keccakc512_inplace_armv7a_neon: @
41
+
42
+ push {r4-r6,lr}
43
+ mov r5, r0
44
+ vpush {q4-q7}
45
+
46
+ @//allocate and clear state
47
+ pld [sp, #-5*8] @//preload state data
48
+ vmov.i64 q6, #0
49
+ vpush {d12}
50
+ vmov.i64 q7, #0
51
+ vmov.i64 q8, #0
52
+ vpush {d12}
53
+ vmov.i64 q9, #0
54
+ vmov.i64 q10, #0
55
+ vpush {d12}
56
+ vmov.i64 q11, #0
57
+ vmov.i64 q12, #0
58
+ vpush {d12}
59
+ vmov.i64 q13, #0
60
+ vmov.i64 q14, #0
61
+ vpush {d12}
62
+ vmov.i64 q15, #0
63
+
64
+ subs r2, r2, #cKeccakR_SizeInBytes
65
+ bcc crypto_hash_LoopEnd
66
+ pld [r1] @//preload in data
67
+
68
+ @// Complete rate loop
69
+ crypto_hash_Loop:
70
+
71
+ vldr d7, [sp, #Aba] @get 5 lanes from stack
72
+ vldm r1!, { d0-d1 }
73
+ veor.64 d7, d0
74
+ vldm r1!, { d2-d3 }
75
+ veor.64 d12, d1
76
+ vldm r1!, { d0-d1 }
77
+ veor.64 d17, d2
78
+ vldr d8, [sp, #Aga]
79
+ veor.64 d22, d3
80
+ vldm r1!, { d2-d3 }
81
+ veor.64 d27, d0
82
+ vstr d7, [sp, #Aba]
83
+ veor.64 d8, d1
84
+ vldm r1!, { d0-d1 }
85
+ veor.64 d13, d2
86
+ vldr d9, [sp, #Aka]
87
+ veor.64 d18, d3
88
+ vldm r1!, { d2-d3 }
89
+ veor.64 d23, d0
90
+ vldr d10, [sp, #Ama]
91
+ veor.64 d28, d1
92
+ vldm r1!, { d0-d1 }
93
+ veor.64 d9, d2
94
+ vstr d8, [sp, #Aga]
95
+ veor.64 d14, d3
96
+ vldm r1!, { d2-d3 }
97
+ veor.64 d19, d0
98
+ vldr d11, [sp, #Asa]
99
+ veor.64 d24, d1
100
+ vstr d9, [sp, #Aka]
101
+ veor.64 d29, d2
102
+ vldm r1!, { d0 }
103
+ veor.64 d10, d3
104
+ veor.64 d15, d0
105
+
106
+ veor.64 q4, q5
107
+ vstr d10, [sp, #Ama]
108
+ veor.64 d5, d8, d9
109
+ veor.64 d5, d5, d7
110
+
111
+ bl KeccakF_armv7a_neon_asm
112
+ subs r2, r2, #cKeccakR_SizeInBytes
113
+ bcs crypto_hash_Loop
114
+ crypto_hash_LoopEnd:
115
+
116
+ adds r2, r2, #cKeccakR_SizeInBytes
117
+ vldm sp, { d7-d11 } @get 5 lanes from stack
118
+ cmp r2, #8
119
+ blo crypto_hashd10IncompleteLane
120
+
121
+ @ Absorb last complete lanes
122
+ cmp r2, #8*8
123
+ bhs crypto_hash_8LanesOrMore
124
+
125
+ @less than 8 lanes left
126
+ cmp r2, #4*8
127
+ bhs crypto_hash_4LanesOrMore
128
+
129
+ @1 to 3 lanes left
130
+ vld1.64 d0, [r1]!
131
+ cmp r2, #2*8
132
+ veor.64 d7, d0
133
+ blo crypto_hash_FinalizeCompleteLanes
134
+ vld1.64 d0, [r1]!
135
+ cmp r2, #3*8
136
+ veor.64 d12, d0
137
+ blo crypto_hash_FinalizeCompleteLanes
138
+ vld1.64 d0, [r1]!
139
+ veor.64 d17, d0
140
+ b crypto_hash_FinalizeCompleteLanes
141
+
142
+ crypto_hash_4LanesOrMore:
143
+ vldm r1!, { d0-d1 }
144
+ veor.64 d7, d0
145
+ vldm r1!, { d2-d3 }
146
+ veor.64 d12, d1
147
+ veor.64 d17, d2
148
+ veor.64 d22, d3
149
+
150
+ cmp r2, #5*8
151
+ blo crypto_hash_FinalizeCompleteLanes
152
+ vld1.64 d0, [r1]!
153
+ cmp r2, #6*8
154
+ veor.64 d27, d0
155
+ blo crypto_hash_FinalizeCompleteLanes
156
+ vld1.64 d0, [r1]!
157
+ cmp r2, #7*8
158
+ veor.64 d8, d0
159
+ blo crypto_hash_FinalizeCompleteLanes
160
+ vld1.64 d0, [r1]!
161
+ veor.64 d13, d0
162
+ b crypto_hash_FinalizeCompleteLanes
163
+
164
+ crypto_hash_8LanesOrMore:
165
+ vldm r1!, { d0-d1 }
166
+ veor.64 d7, d0
167
+ vldm r1!, { d2-d3 }
168
+ veor.64 d12, d1
169
+ vldm r1!, { d0-d1 }
170
+ veor.64 d17, d2
171
+ veor.64 d22, d3
172
+ vldm r1!, { d2-d3 }
173
+ veor.64 d27, d0
174
+ veor.64 d8, d1
175
+ veor.64 d13, d2
176
+ veor.64 d18, d3
177
+
178
+ cmp r2, #12*8
179
+ bhs crypto_hash_12LanesOrMore
180
+
181
+ @8 to 11 lanes left
182
+ cmp r2, #9*8
183
+ blo crypto_hash_FinalizeCompleteLanes
184
+ vld1.64 d0, [r1]!
185
+ cmp r2, #10*8
186
+ veor.64 d23, d0
187
+ blo crypto_hash_FinalizeCompleteLanes
188
+ vld1.64 d0, [r1]!
189
+ cmp r2, #11*8
190
+ veor.64 d28, d0
191
+ blo crypto_hash_FinalizeCompleteLanes
192
+ vld1.64 d0, [r1]!
193
+ veor.64 d9, d0
194
+ b crypto_hash_FinalizeCompleteLanes
195
+
196
+ crypto_hash_12LanesOrMore:
197
+ vldm r1!, { d0-d1 }
198
+ veor.64 d23, d0
199
+ vldm r1!, { d2-d3 }
200
+ veor.64 d28, d1
201
+ veor.64 d9, d2
202
+ veor.64 d14, d3
203
+
204
+ @12 to 16 lanes left
205
+ cmp r2, #13*8
206
+ blo crypto_hash_FinalizeCompleteLanes
207
+ vld1.64 d0, [r1]!
208
+ cmp r2, #14*8
209
+ veor.64 d19, d0
210
+ blo crypto_hash_FinalizeCompleteLanes
211
+ vld1.64 d0, [r1]!
212
+ cmp r2, #15*8
213
+ veor.64 d24, d0
214
+ blo crypto_hash_FinalizeCompleteLanes
215
+ vld1.64 d0, [r1]!
216
+ cmp r2, #16*8
217
+ veor.64 d29, d0
218
+ blo crypto_hash_FinalizeCompleteLanes
219
+ vld1.64 d0, [r1]!
220
+ veor.64 d10, d0
221
+
222
+ crypto_hash_FinalizeCompleteLanes:
223
+
224
+
225
+ crypto_hashd10IncompleteLane:
226
+ and r0, r2, #cKeccakLaneSizeInBytes-1
227
+ vmov.i8 d0, #0xFF @padding
228
+ lsr r2, r2, #3 @//number of lanes left
229
+
230
+ vshr.u64 d0, #63 @padding
231
+ adr r3, crypto_hash_IncompleteLaneTable
232
+ ldr pc, [r3, r0, LSL #2]
233
+
234
+ crypto_hash_IncompleteLaneTable:
235
+ .long crypto_hash_IncompleteDone @0 left
236
+ .long crypto_hash_1left
237
+ .long crypto_hash_2left
238
+ .long crypto_hash_3left
239
+ .long crypto_hash_4left
240
+ .long crypto_hash_5left
241
+ .long crypto_hash_6left
242
+ .long crypto_hash_7left
243
+
244
+ crypto_hash_1left:
245
+ vshl.u64 d0, d0, #8
246
+ vld1.8 d0[0], [r1]!
247
+ b crypto_hash_IncompleteDone
248
+
249
+ crypto_hash_2left:
250
+ vshl.u64 d0, d0, #16
251
+ vld1.16 d0[0], [r1]!
252
+ b crypto_hash_IncompleteDone
253
+
254
+ crypto_hash_3left:
255
+ vshl.u64 d0, d0, #24
256
+ vld1.16 d0[0], [r1]!
257
+ vld1.8 d0[2], [r1]!
258
+ b crypto_hash_IncompleteDone
259
+
260
+ crypto_hash_4left:
261
+ vshl.u64 d0, d0, #32
262
+ vld1.32 d0[0], [r1]!
263
+ b crypto_hash_IncompleteDone
264
+
265
+ crypto_hash_5left:
266
+ vshl.u64 d0, d0, #40
267
+ vld1.32 d0[0], [r1]!
268
+ vld1.8 d0[4], [r1]!
269
+ b crypto_hash_IncompleteDone
270
+
271
+ crypto_hash_6left:
272
+ vshl.u64 d0, d0, #48
273
+ vld1.32 d0[0], [r1]!
274
+ vld1.16 d0[2], [r1]!
275
+ b crypto_hash_IncompleteDone
276
+
277
+ crypto_hash_7left:
278
+ vshl.u64 d0, d0, #56
279
+ vld1.32 d0[0], [r1]!
280
+ vld1.16 d0[2], [r1]!
281
+ vld1.8 d0[6], [r1]!
282
+
283
+ crypto_hash_IncompleteDone:
284
+ adr r3, crypto_hash_xorlastLaneTable
285
+ ldr pc, [r3, r2, LSL #2]
286
+
287
+ crypto_hash_xorlastLaneTable:
288
+ .long crypto_hash_xorLane1
289
+ .long crypto_hash_xorLane2
290
+ .long crypto_hash_xorLane3
291
+ .long crypto_hash_xorLane4
292
+ .long crypto_hash_xorLane5
293
+ .long crypto_hash_xorLane6
294
+ .long crypto_hash_xorLane7
295
+ .long crypto_hash_xorLane8
296
+ .long crypto_hash_xorLane9
297
+ .long crypto_hash_xorLane10
298
+ .long crypto_hash_xorLane11
299
+ .long crypto_hash_xorLane12
300
+ .long crypto_hash_xorLane13
301
+ .long crypto_hash_xorLane14
302
+ .long crypto_hash_xorLane15
303
+ .long crypto_hash_xorLane16
304
+ .long crypto_hash_xorLane17
305
+
306
+ crypto_hash_xorLane1:
307
+ veor.64 d7, d0
308
+ b crypto_hash_xorLastBitOfRate
309
+
310
+ crypto_hash_xorLane2:
311
+ veor.64 d12, d0
312
+ b crypto_hash_xorLastBitOfRate
313
+
314
+ crypto_hash_xorLane3:
315
+ veor.64 d17, d0
316
+ b crypto_hash_xorLastBitOfRate
317
+
318
+ crypto_hash_xorLane4:
319
+ veor.64 d22, d0
320
+ b crypto_hash_xorLastBitOfRate
321
+
322
+ crypto_hash_xorLane5:
323
+ veor.64 d27, d0
324
+ b crypto_hash_xorLastBitOfRate
325
+
326
+ crypto_hash_xorLane6:
327
+ veor.64 d8, d0
328
+ b crypto_hash_xorLastBitOfRate
329
+
330
+ crypto_hash_xorLane7:
331
+ veor.64 d13, d0
332
+ b crypto_hash_xorLastBitOfRate
333
+
334
+ crypto_hash_xorLane8:
335
+ veor.64 d18, d0
336
+ b crypto_hash_xorLastBitOfRate
337
+
338
+ crypto_hash_xorLane9:
339
+ veor.64 d23, d0
340
+ b crypto_hash_xorLastBitOfRate
341
+
342
+ crypto_hash_xorLane10:
343
+ veor.64 d28, d0
344
+ b crypto_hash_xorLastBitOfRate
345
+
346
+ crypto_hash_xorLane11:
347
+ veor.64 d9, d0
348
+ b crypto_hash_xorLastBitOfRate
349
+
350
+ crypto_hash_xorLane12:
351
+ veor.64 d14, d0
352
+ b crypto_hash_xorLastBitOfRate
353
+
354
+ crypto_hash_xorLane13:
355
+ veor.64 d19, d0
356
+ b crypto_hash_xorLastBitOfRate
357
+
358
+ crypto_hash_xorLane14:
359
+ veor.64 d24, d0
360
+ b crypto_hash_xorLastBitOfRate
361
+
362
+ crypto_hash_xorLane15:
363
+ veor.64 d29, d0
364
+ b crypto_hash_xorLastBitOfRate
365
+
366
+ crypto_hash_xorLane16:
367
+ veor.64 d10, d0
368
+ b crypto_hash_xorLastBitOfRate
369
+
370
+ crypto_hash_xorLane17:
371
+ veor.64 d15, d0
372
+
373
+
374
+ crypto_hash_xorLastBitOfRate:
375
+ vmov.i8 d3, #0xFF
376
+ vshl.u64 d3, d3, #63
377
+ veor.64 d15, d15, d3
378
+
379
+ vstm sp, { d7-d10 } @put 4 lanes back on stack (last one not modified)
380
+
381
+ veor.64 q4, q5
382
+ veor.64 d5, d8, d9
383
+ veor.64 d5, d5, d7
384
+
385
+ bl KeccakF_armv7a_neon_asm
386
+
387
+ .if crypto_hash_BYTES == cKeccakR_SizeInBytes
388
+ pld [r5] @//preload out data for write
389
+
390
+ vpop { d0-d1 }
391
+ vst1.64 d0, [r5]!
392
+ vst1.64 d12, [r5]!
393
+ vst1.64 d17, [r5]!
394
+ vst1.64 d22, [r5]!
395
+ vst1.64 d27, [r5]!
396
+
397
+ vpop { d2-d3 }
398
+ vst1.64 d1, [r5]!
399
+ vst1.64 d13, [r5]!
400
+ vst1.64 d18, [r5]!
401
+ vst1.64 d23, [r5]!
402
+ vst1.64 d28, [r5]!
403
+
404
+ vst1.64 d2, [r5]!
405
+ vst1.64 d14, [r5]!
406
+ vst1.64 d19, [r5]!
407
+ vst1.64 d24, [r5]!
408
+ vst1.64 d29, [r5]!
409
+
410
+ adds sp, sp, #8 @no need of last stacked lane in output
411
+ vst1.64 d3, [r5]!
412
+ vst1.64 d15, [r5]!
413
+ .else
414
+ @output fixed 256 bits
415
+ vpop { d0 }
416
+ vst1.64 d0, [r5]!
417
+ vst1.64 d12, [r5]!
418
+ adds sp, sp, #4*8 @no need of other 4 stacked lanes in output
419
+ vst1.64 d17, [r5]!
420
+ vst1.64 d22, [r5]!
421
+ .endif
422
+
423
+ vpop {q4-q7}
424
+ movs r0, #0
425
+ pop {r4-r6,pc}
426
+
427
+ @
428
+ .align 8
429
+