sha3-ruby 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (186) hide show
  1. data/.gitignore +17 -0
  2. data/Gemfile +4 -0
  3. data/LICENSE.txt +22 -0
  4. data/README.md +29 -0
  5. data/Rakefile +7 -0
  6. data/ext/sha3/KeccakReferenceAndOptimized/KeccakCompact.vcproj +207 -0
  7. data/ext/sha3/KeccakReferenceAndOptimized/KeccakCompact8.vcproj +207 -0
  8. data/ext/sha3/KeccakReferenceAndOptimized/KeccakInplace.vcproj +203 -0
  9. data/ext/sha3/KeccakReferenceAndOptimized/KeccakInplace32BI.vcproj +201 -0
  10. data/ext/sha3/KeccakReferenceAndOptimized/KeccakOptimized32.vcproj +267 -0
  11. data/ext/sha3/KeccakReferenceAndOptimized/KeccakOptimized64.vcproj +267 -0
  12. data/ext/sha3/KeccakReferenceAndOptimized/KeccakReference.vcproj +243 -0
  13. data/ext/sha3/KeccakReferenceAndOptimized/KeccakReference32BI.vcproj +243 -0
  14. data/ext/sha3/KeccakReferenceAndOptimized/KeccakReferenceAndOptimized.sln +62 -0
  15. data/ext/sha3/KeccakReferenceAndOptimized/KeccakSimple.vcproj +203 -0
  16. data/ext/sha3/KeccakReferenceAndOptimized/KeccakSimple32BI.vcproj +201 -0
  17. data/ext/sha3/KeccakReferenceAndOptimized/Sources/AVR8-rotate64.h +27 -0
  18. data/ext/sha3/KeccakReferenceAndOptimized/Sources/AVR8-rotate64.s +285 -0
  19. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-avr8-settings.h +2 -0
  20. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-avr8-test.c +142 -0
  21. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-avr8-util.h +15 -0
  22. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-avr8-util.s +119 -0
  23. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-avr8.c +184 -0
  24. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-avr8.h +25 -0
  25. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-compact-settings.h +3 -0
  26. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-compact-test.c +317 -0
  27. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-compact.c +341 -0
  28. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-compact.h +50 -0
  29. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-compact8-settings.h +2 -0
  30. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-compact8-test.c +192 -0
  31. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-compact8.c +375 -0
  32. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-compact8.h +47 -0
  33. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-crypto_hash-inplace-armgcc-ARMv7A-NEON.s +406 -0
  34. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-inplace-minimal-test.c +231 -0
  35. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-inplace-settings.h +3 -0
  36. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-inplace-test.c +221 -0
  37. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-inplace.c +445 -0
  38. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-inplace32BI-armgcc-ARMv6M.s +844 -0
  39. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-inplace32BI-armgcc-ARMv7A.s +687 -0
  40. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-inplace32BI-armgcc-ARMv7M.s +687 -0
  41. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-inplace32BI.c +849 -0
  42. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-simple-settings.h +3 -0
  43. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-simple-test.c +221 -0
  44. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-simple.c +403 -0
  45. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccak-simple32BI.c +673 -0
  46. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakDuplex.c +68 -0
  47. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakDuplex.h +59 -0
  48. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-32-rvk.macros +555 -0
  49. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-32-s1.macros +1187 -0
  50. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-32-s2.macros +1187 -0
  51. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-32.macros +26 -0
  52. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-64.macros +728 -0
  53. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-arm.c +123 -0
  54. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-armcc.s +653 -0
  55. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-armgcc.s +686 -0
  56. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-avr8.c +163 -0
  57. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-avr8asm-compact.s +647 -0
  58. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-avr8asm-fast.s +934 -0
  59. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-inplace-armgcc-ARMv7A-NEON.s +446 -0
  60. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-int-set.h +6 -0
  61. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-interface.h +46 -0
  62. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-opt32-settings.h +4 -0
  63. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-opt32.c +524 -0
  64. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-opt64-settings.h +7 -0
  65. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-opt64.c +504 -0
  66. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-reference.c +300 -0
  67. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-reference.h +20 -0
  68. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-reference.o +0 -0
  69. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-reference32BI.c +371 -0
  70. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-simd128.macros +651 -0
  71. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-simd64.macros +517 -0
  72. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-unrolling.macros +124 -0
  73. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-x86-64-asm.c +62 -0
  74. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-x86-64-gas.s +766 -0
  75. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-x86-64-shld-gas.s +766 -0
  76. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakF-1600-xop.macros +573 -0
  77. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakNISTInterface.c +81 -0
  78. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakNISTInterface.h +70 -0
  79. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakNISTInterface.o +0 -0
  80. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakSponge.c +266 -0
  81. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakSponge.h +76 -0
  82. data/ext/sha3/KeccakReferenceAndOptimized/Sources/KeccakSponge.o +0 -0
  83. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccakc1024-crypto_hash-inplace-armgcc-ARMv7A-NEON.s +296 -0
  84. data/ext/sha3/KeccakReferenceAndOptimized/Sources/Keccakc512-crypto_hash-inplace-armgcc-ARMv7A-NEON.s +429 -0
  85. data/ext/sha3/KeccakReferenceAndOptimized/Sources/brg_endian.h +142 -0
  86. data/ext/sha3/KeccakReferenceAndOptimized/Sources/crypto_hash.h +0 -0
  87. data/ext/sha3/KeccakReferenceAndOptimized/Sources/displayIntermediateValues.c +117 -0
  88. data/ext/sha3/KeccakReferenceAndOptimized/Sources/displayIntermediateValues.h +29 -0
  89. data/ext/sha3/KeccakReferenceAndOptimized/Sources/displayIntermediateValues.o +0 -0
  90. data/ext/sha3/KeccakReferenceAndOptimized/Sources/genKAT.c +692 -0
  91. data/ext/sha3/KeccakReferenceAndOptimized/Sources/mainARM.c +88 -0
  92. data/ext/sha3/KeccakReferenceAndOptimized/Sources/mainOptimized.c +23 -0
  93. data/ext/sha3/KeccakReferenceAndOptimized/Sources/mainReference.c +381 -0
  94. data/ext/sha3/KeccakReferenceAndOptimized/Sources/timing.c +436 -0
  95. data/ext/sha3/KeccakReferenceAndOptimized/Sources/timing.h +13 -0
  96. data/ext/sha3/KeccakReferenceAndOptimized/TreeHashing/Sources/DoublePermutation-config.h +2 -0
  97. data/ext/sha3/KeccakReferenceAndOptimized/TreeHashing/Sources/DoublePermutation.c +572 -0
  98. data/ext/sha3/KeccakReferenceAndOptimized/TreeHashing/Sources/DoublePermutation.h +38 -0
  99. data/ext/sha3/KeccakReferenceAndOptimized/TreeHashing/Sources/KeccakF-1600-unrolling.macros +124 -0
  100. data/ext/sha3/KeccakReferenceAndOptimized/TreeHashing/Sources/Keccakc256TreeD2.c +81 -0
  101. data/ext/sha3/KeccakReferenceAndOptimized/TreeHashing/Sources/Keccakc256TreeD2.h +18 -0
  102. data/ext/sha3/KeccakReferenceAndOptimized/TreeHashing/Sources/Keccakc512TreeD2.c +81 -0
  103. data/ext/sha3/KeccakReferenceAndOptimized/TreeHashing/Sources/Keccakc512TreeD2.h +18 -0
  104. data/ext/sha3/KeccakReferenceAndOptimized/TreeHashing/Sources/crypto_hash.h +0 -0
  105. data/ext/sha3/KeccakReferenceAndOptimized/TreeHashing/Sources/mainOptimized.c +112 -0
  106. data/ext/sha3/KeccakReferenceAndOptimized/TreeHashing/Sources/timing-Double.c +225 -0
  107. data/ext/sha3/KeccakReferenceAndOptimized/TreeHashing/Sources/timing-Double.h +20 -0
  108. data/ext/sha3/KeccakReferenceAndOptimized/TreeHashing/makefile +68 -0
  109. data/ext/sha3/KeccakReferenceAndOptimized/bin/KeccakCompact +0 -0
  110. data/ext/sha3/KeccakReferenceAndOptimized/bin/KeccakOptimized32 +0 -0
  111. data/ext/sha3/KeccakReferenceAndOptimized/bin/KeccakOptimized64 +0 -0
  112. data/ext/sha3/KeccakReferenceAndOptimized/bin/KeccakReference +0 -0
  113. data/ext/sha3/KeccakReferenceAndOptimized/bin/KeccakReference32BI +0 -0
  114. data/ext/sha3/KeccakReferenceAndOptimized/bin/KeccakSimple +0 -0
  115. data/ext/sha3/KeccakReferenceAndOptimized/bin/KeccakSimple32BI +0 -0
  116. data/ext/sha3/KeccakReferenceAndOptimized/bin/compact/Keccak-compact-test.o +0 -0
  117. data/ext/sha3/KeccakReferenceAndOptimized/bin/compact/Keccak-compact.o +0 -0
  118. data/ext/sha3/KeccakReferenceAndOptimized/bin/optimized32/KeccakDuplex.o +0 -0
  119. data/ext/sha3/KeccakReferenceAndOptimized/bin/optimized32/KeccakF-1600-opt32.o +0 -0
  120. data/ext/sha3/KeccakReferenceAndOptimized/bin/optimized32/KeccakNISTInterface.o +0 -0
  121. data/ext/sha3/KeccakReferenceAndOptimized/bin/optimized32/KeccakSponge.o +0 -0
  122. data/ext/sha3/KeccakReferenceAndOptimized/bin/optimized32/genKAT.o +0 -0
  123. data/ext/sha3/KeccakReferenceAndOptimized/bin/optimized32/mainOptimized.o +0 -0
  124. data/ext/sha3/KeccakReferenceAndOptimized/bin/optimized32/timing.o +0 -0
  125. data/ext/sha3/KeccakReferenceAndOptimized/bin/optimized64/KeccakDuplex.o +0 -0
  126. data/ext/sha3/KeccakReferenceAndOptimized/bin/optimized64/KeccakF-1600-opt64.o +0 -0
  127. data/ext/sha3/KeccakReferenceAndOptimized/bin/optimized64/KeccakNISTInterface.o +0 -0
  128. data/ext/sha3/KeccakReferenceAndOptimized/bin/optimized64/KeccakSponge.o +0 -0
  129. data/ext/sha3/KeccakReferenceAndOptimized/bin/optimized64/genKAT.o +0 -0
  130. data/ext/sha3/KeccakReferenceAndOptimized/bin/optimized64/mainOptimized.o +0 -0
  131. data/ext/sha3/KeccakReferenceAndOptimized/bin/optimized64/timing.o +0 -0
  132. data/ext/sha3/KeccakReferenceAndOptimized/bin/reference/KeccakDuplex.o +0 -0
  133. data/ext/sha3/KeccakReferenceAndOptimized/bin/reference/KeccakF-1600-reference.o +0 -0
  134. data/ext/sha3/KeccakReferenceAndOptimized/bin/reference/KeccakNISTInterface.o +0 -0
  135. data/ext/sha3/KeccakReferenceAndOptimized/bin/reference/KeccakSponge.o +0 -0
  136. data/ext/sha3/KeccakReferenceAndOptimized/bin/reference/displayIntermediateValues.o +0 -0
  137. data/ext/sha3/KeccakReferenceAndOptimized/bin/reference/genKAT.o +0 -0
  138. data/ext/sha3/KeccakReferenceAndOptimized/bin/reference/mainReference.o +0 -0
  139. data/ext/sha3/KeccakReferenceAndOptimized/bin/reference32bi/KeccakDuplex.o +0 -0
  140. data/ext/sha3/KeccakReferenceAndOptimized/bin/reference32bi/KeccakF-1600-reference32BI.o +0 -0
  141. data/ext/sha3/KeccakReferenceAndOptimized/bin/reference32bi/KeccakNISTInterface.o +0 -0
  142. data/ext/sha3/KeccakReferenceAndOptimized/bin/reference32bi/KeccakSponge.o +0 -0
  143. data/ext/sha3/KeccakReferenceAndOptimized/bin/reference32bi/displayIntermediateValues.o +0 -0
  144. data/ext/sha3/KeccakReferenceAndOptimized/bin/reference32bi/genKAT.o +0 -0
  145. data/ext/sha3/KeccakReferenceAndOptimized/bin/reference32bi/mainReference.o +0 -0
  146. data/ext/sha3/KeccakReferenceAndOptimized/bin/simple/Keccak-simple-test.o +0 -0
  147. data/ext/sha3/KeccakReferenceAndOptimized/bin/simple/Keccak-simple.o +0 -0
  148. data/ext/sha3/KeccakReferenceAndOptimized/bin/simple32BI/Keccak-simple-test.o +0 -0
  149. data/ext/sha3/KeccakReferenceAndOptimized/bin/simple32BI/Keccak-simple32BI.o +0 -0
  150. data/ext/sha3/KeccakReferenceAndOptimized/compile64.bat +1 -0
  151. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/checksum-keccak +1 -0
  152. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/checksum-keccakc1024 +1 -0
  153. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/checksum-keccakc256 +1 -0
  154. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/checksum-keccakc256treed2 +1 -0
  155. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/checksum-keccakc448 +1 -0
  156. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/checksum-keccakc512 +1 -0
  157. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/checksum-keccakc512treed2 +1 -0
  158. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/checksum-keccakc768 +1 -0
  159. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/hash-keccak.c +11 -0
  160. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/hash-keccakc1024.c +11 -0
  161. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/hash-keccakc256.c +11 -0
  162. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/hash-keccakc448.c +11 -0
  163. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/hash-keccakc512.c +11 -0
  164. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/hash-keccakc768.c +11 -0
  165. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/int-set-keccak.h +1 -0
  166. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/int-set-keccakc1024.h +1 -0
  167. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/int-set-keccakc256.h +1 -0
  168. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/int-set-keccakc448.h +1 -0
  169. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/int-set-keccakc512.h +1 -0
  170. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/int-set-keccakc768.h +1 -0
  171. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/populate.py +506 -0
  172. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/simple-keccak.h +2 -0
  173. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/simple-keccakc1024.h +2 -0
  174. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/simple-keccakc256.h +2 -0
  175. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/simple-keccakc448.h +2 -0
  176. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/simple-keccakc512.h +2 -0
  177. data/ext/sha3/KeccakReferenceAndOptimized/eBASH/simple-keccakc768.h +2 -0
  178. data/ext/sha3/KeccakReferenceAndOptimized/makefile +327 -0
  179. data/ext/sha3/Makefile +240 -0
  180. data/ext/sha3/depend +28 -0
  181. data/ext/sha3/extconf.rb +21 -0
  182. data/ext/sha3/sha3.c +95 -0
  183. data/lib/sha3-ruby.rb +27 -0
  184. data/lib/sha3-ruby/version.rb +5 -0
  185. data/sha3-ruby.gemspec +21 -0
  186. metadata +233 -0
@@ -0,0 +1,296 @@
1
+ @ The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
2
+ @ Michaël Peeters and Gilles Van Assche. For more information, feedback or
3
+ @ questions, please refer to our website: http://keccak.noekeon.org/
4
+ @
5
+ @ Implementation by Ronny Van Keer, hereby denoted as "the implementer".
6
+ @
7
+ @ To the extent possible under law, the implementer has waived all copyright
8
+ @ and related or neighboring rights to the source code in this file.
9
+ @ http://creativecommons.org/publicdomain/zero/1.0/
10
+
11
+ @ This file was created from a .asm file
12
+ @ using the ads2gas.pl script.
13
+ .equ DO1STROUNDING, 0
14
+
15
+ @ PRESERVE8
16
+ .text
17
+
18
+ @// --- defines
19
+ .equ cKeccakLaneSizeInBytes, 8
20
+ .equ cKeccakR_SizeInBytes , 576/8
21
+ .equ crypto_hash_BYTES , cKeccakR_SizeInBytes @// populate.py, please set crypto_hash_BYTES
22
+
23
+ @// --- offsets in state
24
+ .equ Aba, 0*8
25
+ .equ Aga, 1*8
26
+ .equ Aka, 2*8
27
+ .equ Ama, 3*8
28
+ .equ Asa, 4*8
29
+
30
+
31
+ @// --- code
32
+
33
+ .align 8
34
+
35
+ .global KeccakF_armv7a_neon_asm
36
+
37
+ @//int crypto_hash( unsigned char *out, const unsigned char *in, unsigned long long inlen )
38
+ .global crypto_hash_keccakc1024_inplace_armv7a_neon
39
+
40
+ crypto_hash_keccakc1024_inplace_armv7a_neon: @
41
+
42
+ push {r4-r6,lr}
43
+ mov r5, r0
44
+ vpush {q4-q7}
45
+
46
+ @//allocate and clear state
47
+ pld [sp, #-5*8] @//preload state data
48
+ vmov.i64 q6, #0
49
+ vpush {d12}
50
+ vmov.i64 q7, #0
51
+ vmov.i64 q8, #0
52
+ vpush {d12}
53
+ vmov.i64 q9, #0
54
+ vmov.i64 q10, #0
55
+ vpush {d12}
56
+ vmov.i64 q11, #0
57
+ vmov.i64 q12, #0
58
+ vpush {d12}
59
+ vmov.i64 q13, #0
60
+ vmov.i64 q14, #0
61
+ vpush {d12}
62
+ vmov.i64 q15, #0
63
+
64
+ subs r2, r2, #cKeccakR_SizeInBytes
65
+ bcc crypto_hash_LoopEnd
66
+ pld [r1] @//preload in data
67
+
68
+ @// Complete rate loop
69
+ crypto_hash_Loop:
70
+
71
+ vldr d7, [sp, #Aba]
72
+ vldm r1!, { d0-d1 }
73
+ veor.64 d7, d0
74
+ vldm r1!, { d2-d3 }
75
+ veor.64 d12, d1
76
+ vldm r1!, { d0-d1 }
77
+ veor.64 d17, d2
78
+ vldr d8, [sp, #Aga]
79
+ veor.64 d22, d3
80
+ vldm r1!, { d2-d3 }
81
+ veor.64 d27, d0
82
+ vstr d7, [sp, #Aba]
83
+ veor.64 d8, d1
84
+ vldm r1!, { d0 }
85
+ veor.64 d13, d2
86
+ vldr d9, [sp, #Aka]
87
+ veor.64 d18, d3
88
+ vldr d10, [sp, #Ama]
89
+ veor.64 d23, d0
90
+ vldr d11, [sp, #Asa]
91
+ vstr d8, [sp, #Aga]
92
+
93
+ veor.64 q4, q5
94
+ veor.64 d5, d8, d9
95
+ veor.64 d5, d5, d7
96
+
97
+ bl KeccakF_armv7a_neon_asm
98
+ subs r2, r2, #cKeccakR_SizeInBytes
99
+ bcs crypto_hash_Loop
100
+ crypto_hash_LoopEnd:
101
+
102
+ adds r2, r2, #cKeccakR_SizeInBytes
103
+ vldm sp, { d7-d11 } @get 5 lanes from stack
104
+ cmp r2, #8
105
+ blo crypto_hashd10IncompleteLane
106
+
107
+ cmp r2, #4*8
108
+ bhs crypto_hash_4LanesOrMore
109
+
110
+ @1 to 3 lanes left
111
+ vld1.64 d0, [r1]!
112
+ cmp r2, #2*8
113
+ veor.64 d7, d0
114
+ blo crypto_hash_FinalizeCompleteLanes
115
+ vld1.64 d0, [r1]!
116
+ cmp r2, #3*8
117
+ veor.64 d12, d0
118
+ blo crypto_hash_FinalizeCompleteLanes
119
+ vld1.64 d0, [r1]!
120
+ veor.64 d17, d0
121
+ b crypto_hash_FinalizeCompleteLanes
122
+
123
+ crypto_hash_4LanesOrMore:
124
+ vldm r1!, { d0-d1 }
125
+ veor.64 d7, d0
126
+ vldm r1!, { d2-d3 }
127
+ veor.64 d12, d1
128
+ veor.64 d17, d2
129
+ veor.64 d22, d3
130
+
131
+ cmp r2, #5*8
132
+ blo crypto_hash_FinalizeCompleteLanes
133
+ vld1.64 d0, [r1]!
134
+ cmp r2, #6*8
135
+ veor.64 d27, d0
136
+ blo crypto_hash_FinalizeCompleteLanes
137
+ vld1.64 d0, [r1]!
138
+ cmp r2, #7*8
139
+ veor.64 d8, d0
140
+ blo crypto_hash_FinalizeCompleteLanes
141
+ vld1.64 d0, [r1]!
142
+ cmp r2, #8*8
143
+ veor.64 d13, d0
144
+ blo crypto_hash_FinalizeCompleteLanes
145
+ vld1.64 d0, [r1]!
146
+ veor.64 d18, d0
147
+
148
+ crypto_hash_FinalizeCompleteLanes:
149
+
150
+
151
+ crypto_hashd10IncompleteLane:
152
+ and r0, r2, #cKeccakLaneSizeInBytes-1
153
+ vmov.i8 d0, #0xFF @padding
154
+ lsr r2, r2, #3 @//number of lanes left
155
+
156
+ vshr.u64 d0, #63 @padding
157
+ adr r3, crypto_hash_IncompleteLaneTable
158
+ ldr pc, [r3, r0, LSL #2]
159
+
160
+ crypto_hash_IncompleteLaneTable:
161
+ .long crypto_hash_IncompleteDone @0 left
162
+ .long crypto_hash_1left
163
+ .long crypto_hash_2left
164
+ .long crypto_hash_3left
165
+ .long crypto_hash_4left
166
+ .long crypto_hash_5left
167
+ .long crypto_hash_6left
168
+ .long crypto_hash_7left
169
+
170
+ crypto_hash_1left:
171
+ vshl.u64 d0, d0, #8
172
+ vld1.8 d0[0], [r1]!
173
+ b crypto_hash_IncompleteDone
174
+
175
+ crypto_hash_2left:
176
+ vshl.u64 d0, d0, #16
177
+ vld1.16 d0[0], [r1]!
178
+ b crypto_hash_IncompleteDone
179
+
180
+ crypto_hash_3left:
181
+ vshl.u64 d0, d0, #24
182
+ vld1.16 d0[0], [r1]!
183
+ vld1.8 d0[2], [r1]!
184
+ b crypto_hash_IncompleteDone
185
+
186
+ crypto_hash_4left:
187
+ vshl.u64 d0, d0, #32
188
+ vld1.32 d0[0], [r1]!
189
+ b crypto_hash_IncompleteDone
190
+
191
+ crypto_hash_5left:
192
+ vshl.u64 d0, d0, #40
193
+ vld1.32 d0[0], [r1]!
194
+ vld1.8 d0[4], [r1]!
195
+ b crypto_hash_IncompleteDone
196
+
197
+ crypto_hash_6left:
198
+ vshl.u64 d0, d0, #48
199
+ vld1.32 d0[0], [r1]!
200
+ vld1.16 d0[2], [r1]!
201
+ b crypto_hash_IncompleteDone
202
+
203
+ crypto_hash_7left:
204
+ vshl.u64 d0, d0, #56
205
+ vld1.32 d0[0], [r1]!
206
+ vld1.16 d0[2], [r1]!
207
+ vld1.8 d0[6], [r1]!
208
+
209
+
210
+ crypto_hash_IncompleteDone:
211
+ adr r3, crypto_hash_xorlastLaneTable
212
+ ldr pc, [r3, r2, LSL #2]
213
+
214
+ crypto_hash_xorlastLaneTable:
215
+ .long crypto_hash_xorLane1
216
+ .long crypto_hash_xorLane2
217
+ .long crypto_hash_xorLane3
218
+ .long crypto_hash_xorLane4
219
+ .long crypto_hash_xorLane5
220
+ .long crypto_hash_xorLane6
221
+ .long crypto_hash_xorLane7
222
+ .long crypto_hash_xorLane8
223
+ .long crypto_hash_xorLane9
224
+
225
+ crypto_hash_xorLane1:
226
+ veor.64 d7, d0
227
+ b crypto_hash_xorLastBitOfRate
228
+
229
+ crypto_hash_xorLane2:
230
+ veor.64 d12, d0
231
+ b crypto_hash_xorLastBitOfRate
232
+
233
+ crypto_hash_xorLane3:
234
+ veor.64 d17, d0
235
+ b crypto_hash_xorLastBitOfRate
236
+
237
+ crypto_hash_xorLane4:
238
+ veor.64 d22, d0
239
+ b crypto_hash_xorLastBitOfRate
240
+
241
+ crypto_hash_xorLane5:
242
+ veor.64 d27, d0
243
+ b crypto_hash_xorLastBitOfRate
244
+
245
+ crypto_hash_xorLane6:
246
+ veor.64 d8, d0
247
+ b crypto_hash_xorLastBitOfRate
248
+
249
+ crypto_hash_xorLane7:
250
+ veor.64 d13, d0
251
+ b crypto_hash_xorLastBitOfRate
252
+
253
+ crypto_hash_xorLane8:
254
+ veor.64 d18, d0
255
+ b crypto_hash_xorLastBitOfRate
256
+
257
+ crypto_hash_xorLane9:
258
+ veor.64 d23, d0
259
+
260
+ crypto_hash_xorLastBitOfRate:
261
+ vmov.i8 d3, #0xFF
262
+ vshl.u64 d3, d3, #63
263
+ veor.64 d23, d23, d3
264
+
265
+ vstm sp, { d7-d8 } @put 2 lanes back on stack (others not modified)
266
+
267
+ veor.64 q4, q5
268
+ veor.64 d5, d8, d9
269
+ veor.64 d5, d5, d7
270
+
271
+ bl KeccakF_armv7a_neon_asm
272
+
273
+ pld [r5] @//preload out data for write
274
+
275
+ vpop { d0-d1 }
276
+ vst1.64 d0, [r5]!
277
+ vst1.64 d12, [r5]!
278
+ vst1.64 d17, [r5]!
279
+ vst1.64 d22, [r5]!
280
+ vst1.64 d27, [r5]!
281
+ vst1.64 d1, [r5]!
282
+ vst1.64 d13, [r5]!
283
+ vst1.64 d18, [r5]!
284
+ .if crypto_hash_BYTES == cKeccakR_SizeInBytes
285
+ vst1.64 d23, [r5]!
286
+ .endif
287
+
288
+ adds sp, sp, #3*8 @no need of last stacked lanes in output
289
+
290
+ vpop {q4-q7}
291
+ movs r0, #0
292
+ pop {r4-r6,pc}
293
+
294
+ @
295
+ .align 8
296
+
@@ -0,0 +1,429 @@
1
+ @ The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
2
+ @ Michaël Peeters and Gilles Van Assche. For more information, feedback or
3
+ @ questions, please refer to our website: http://keccak.noekeon.org/
4
+ @
5
+ @ Implementation by Ronny Van Keer, hereby denoted as "the implementer".
6
+ @
7
+ @ To the extent possible under law, the implementer has waived all copyright
8
+ @ and related or neighboring rights to the source code in this file.
9
+ @ http://creativecommons.org/publicdomain/zero/1.0/
10
+
11
+ @ This file was created from a .asm file
12
+ @ using the ads2gas.pl script.
13
+ .equ DO1STROUNDING, 0
14
+
15
+ @ PRESERVE8
16
+ .text
17
+
18
+ @// --- defines
19
+ .equ cKeccakLaneSizeInBytes, 8
20
+ .equ cKeccakR_SizeInBytes , 1088/8
21
+ .equ crypto_hash_BYTES , cKeccakR_SizeInBytes @// populate.py, please set crypto_hash_BYTES
22
+
23
+ @// --- offsets in state
24
+ .equ Aba, 0*8
25
+ .equ Aga, 1*8
26
+ .equ Aka, 2*8
27
+ .equ Ama, 3*8
28
+ .equ Asa, 4*8
29
+
30
+
31
+ @// --- code
32
+
33
+ .align 8
34
+
35
+ .global KeccakF_armv7a_neon_asm
36
+
37
+ @//int crypto_hash( unsigned char *out, const unsigned char *in, unsigned long long inlen )
38
+ .global crypto_hash_keccakc512_inplace_armv7a_neon
39
+
40
+ crypto_hash_keccakc512_inplace_armv7a_neon: @
41
+
42
+ push {r4-r6,lr}
43
+ mov r5, r0
44
+ vpush {q4-q7}
45
+
46
+ @//allocate and clear state
47
+ pld [sp, #-5*8] @//preload state data
48
+ vmov.i64 q6, #0
49
+ vpush {d12}
50
+ vmov.i64 q7, #0
51
+ vmov.i64 q8, #0
52
+ vpush {d12}
53
+ vmov.i64 q9, #0
54
+ vmov.i64 q10, #0
55
+ vpush {d12}
56
+ vmov.i64 q11, #0
57
+ vmov.i64 q12, #0
58
+ vpush {d12}
59
+ vmov.i64 q13, #0
60
+ vmov.i64 q14, #0
61
+ vpush {d12}
62
+ vmov.i64 q15, #0
63
+
64
+ subs r2, r2, #cKeccakR_SizeInBytes
65
+ bcc crypto_hash_LoopEnd
66
+ pld [r1] @//preload in data
67
+
68
+ @// Complete rate loop
69
+ crypto_hash_Loop:
70
+
71
+ vldr d7, [sp, #Aba] @get 5 lanes from stack
72
+ vldm r1!, { d0-d1 }
73
+ veor.64 d7, d0
74
+ vldm r1!, { d2-d3 }
75
+ veor.64 d12, d1
76
+ vldm r1!, { d0-d1 }
77
+ veor.64 d17, d2
78
+ vldr d8, [sp, #Aga]
79
+ veor.64 d22, d3
80
+ vldm r1!, { d2-d3 }
81
+ veor.64 d27, d0
82
+ vstr d7, [sp, #Aba]
83
+ veor.64 d8, d1
84
+ vldm r1!, { d0-d1 }
85
+ veor.64 d13, d2
86
+ vldr d9, [sp, #Aka]
87
+ veor.64 d18, d3
88
+ vldm r1!, { d2-d3 }
89
+ veor.64 d23, d0
90
+ vldr d10, [sp, #Ama]
91
+ veor.64 d28, d1
92
+ vldm r1!, { d0-d1 }
93
+ veor.64 d9, d2
94
+ vstr d8, [sp, #Aga]
95
+ veor.64 d14, d3
96
+ vldm r1!, { d2-d3 }
97
+ veor.64 d19, d0
98
+ vldr d11, [sp, #Asa]
99
+ veor.64 d24, d1
100
+ vstr d9, [sp, #Aka]
101
+ veor.64 d29, d2
102
+ vldm r1!, { d0 }
103
+ veor.64 d10, d3
104
+ veor.64 d15, d0
105
+
106
+ veor.64 q4, q5
107
+ vstr d10, [sp, #Ama]
108
+ veor.64 d5, d8, d9
109
+ veor.64 d5, d5, d7
110
+
111
+ bl KeccakF_armv7a_neon_asm
112
+ subs r2, r2, #cKeccakR_SizeInBytes
113
+ bcs crypto_hash_Loop
114
+ crypto_hash_LoopEnd:
115
+
116
+ adds r2, r2, #cKeccakR_SizeInBytes
117
+ vldm sp, { d7-d11 } @get 5 lanes from stack
118
+ cmp r2, #8
119
+ blo crypto_hashd10IncompleteLane
120
+
121
+ @ Absorb last complete lanes
122
+ cmp r2, #8*8
123
+ bhs crypto_hash_8LanesOrMore
124
+
125
+ @less than 8 lanes left
126
+ cmp r2, #4*8
127
+ bhs crypto_hash_4LanesOrMore
128
+
129
+ @1 to 3 lanes left
130
+ vld1.64 d0, [r1]!
131
+ cmp r2, #2*8
132
+ veor.64 d7, d0
133
+ blo crypto_hash_FinalizeCompleteLanes
134
+ vld1.64 d0, [r1]!
135
+ cmp r2, #3*8
136
+ veor.64 d12, d0
137
+ blo crypto_hash_FinalizeCompleteLanes
138
+ vld1.64 d0, [r1]!
139
+ veor.64 d17, d0
140
+ b crypto_hash_FinalizeCompleteLanes
141
+
142
+ crypto_hash_4LanesOrMore:
143
+ vldm r1!, { d0-d1 }
144
+ veor.64 d7, d0
145
+ vldm r1!, { d2-d3 }
146
+ veor.64 d12, d1
147
+ veor.64 d17, d2
148
+ veor.64 d22, d3
149
+
150
+ cmp r2, #5*8
151
+ blo crypto_hash_FinalizeCompleteLanes
152
+ vld1.64 d0, [r1]!
153
+ cmp r2, #6*8
154
+ veor.64 d27, d0
155
+ blo crypto_hash_FinalizeCompleteLanes
156
+ vld1.64 d0, [r1]!
157
+ cmp r2, #7*8
158
+ veor.64 d8, d0
159
+ blo crypto_hash_FinalizeCompleteLanes
160
+ vld1.64 d0, [r1]!
161
+ veor.64 d13, d0
162
+ b crypto_hash_FinalizeCompleteLanes
163
+
164
+ crypto_hash_8LanesOrMore:
165
+ vldm r1!, { d0-d1 }
166
+ veor.64 d7, d0
167
+ vldm r1!, { d2-d3 }
168
+ veor.64 d12, d1
169
+ vldm r1!, { d0-d1 }
170
+ veor.64 d17, d2
171
+ veor.64 d22, d3
172
+ vldm r1!, { d2-d3 }
173
+ veor.64 d27, d0
174
+ veor.64 d8, d1
175
+ veor.64 d13, d2
176
+ veor.64 d18, d3
177
+
178
+ cmp r2, #12*8
179
+ bhs crypto_hash_12LanesOrMore
180
+
181
+ @8 to 11 lanes left
182
+ cmp r2, #9*8
183
+ blo crypto_hash_FinalizeCompleteLanes
184
+ vld1.64 d0, [r1]!
185
+ cmp r2, #10*8
186
+ veor.64 d23, d0
187
+ blo crypto_hash_FinalizeCompleteLanes
188
+ vld1.64 d0, [r1]!
189
+ cmp r2, #11*8
190
+ veor.64 d28, d0
191
+ blo crypto_hash_FinalizeCompleteLanes
192
+ vld1.64 d0, [r1]!
193
+ veor.64 d9, d0
194
+ b crypto_hash_FinalizeCompleteLanes
195
+
196
+ crypto_hash_12LanesOrMore:
197
+ vldm r1!, { d0-d1 }
198
+ veor.64 d23, d0
199
+ vldm r1!, { d2-d3 }
200
+ veor.64 d28, d1
201
+ veor.64 d9, d2
202
+ veor.64 d14, d3
203
+
204
+ @12 to 16 lanes left
205
+ cmp r2, #13*8
206
+ blo crypto_hash_FinalizeCompleteLanes
207
+ vld1.64 d0, [r1]!
208
+ cmp r2, #14*8
209
+ veor.64 d19, d0
210
+ blo crypto_hash_FinalizeCompleteLanes
211
+ vld1.64 d0, [r1]!
212
+ cmp r2, #15*8
213
+ veor.64 d24, d0
214
+ blo crypto_hash_FinalizeCompleteLanes
215
+ vld1.64 d0, [r1]!
216
+ cmp r2, #16*8
217
+ veor.64 d29, d0
218
+ blo crypto_hash_FinalizeCompleteLanes
219
+ vld1.64 d0, [r1]!
220
+ veor.64 d10, d0
221
+
222
+ crypto_hash_FinalizeCompleteLanes:
223
+
224
+
225
+ crypto_hashd10IncompleteLane:
226
+ and r0, r2, #cKeccakLaneSizeInBytes-1
227
+ vmov.i8 d0, #0xFF @padding
228
+ lsr r2, r2, #3 @//number of lanes left
229
+
230
+ vshr.u64 d0, #63 @padding
231
+ adr r3, crypto_hash_IncompleteLaneTable
232
+ ldr pc, [r3, r0, LSL #2]
233
+
234
+ crypto_hash_IncompleteLaneTable:
235
+ .long crypto_hash_IncompleteDone @0 left
236
+ .long crypto_hash_1left
237
+ .long crypto_hash_2left
238
+ .long crypto_hash_3left
239
+ .long crypto_hash_4left
240
+ .long crypto_hash_5left
241
+ .long crypto_hash_6left
242
+ .long crypto_hash_7left
243
+
244
+ crypto_hash_1left:
245
+ vshl.u64 d0, d0, #8
246
+ vld1.8 d0[0], [r1]!
247
+ b crypto_hash_IncompleteDone
248
+
249
+ crypto_hash_2left:
250
+ vshl.u64 d0, d0, #16
251
+ vld1.16 d0[0], [r1]!
252
+ b crypto_hash_IncompleteDone
253
+
254
+ crypto_hash_3left:
255
+ vshl.u64 d0, d0, #24
256
+ vld1.16 d0[0], [r1]!
257
+ vld1.8 d0[2], [r1]!
258
+ b crypto_hash_IncompleteDone
259
+
260
+ crypto_hash_4left:
261
+ vshl.u64 d0, d0, #32
262
+ vld1.32 d0[0], [r1]!
263
+ b crypto_hash_IncompleteDone
264
+
265
+ crypto_hash_5left:
266
+ vshl.u64 d0, d0, #40
267
+ vld1.32 d0[0], [r1]!
268
+ vld1.8 d0[4], [r1]!
269
+ b crypto_hash_IncompleteDone
270
+
271
+ crypto_hash_6left:
272
+ vshl.u64 d0, d0, #48
273
+ vld1.32 d0[0], [r1]!
274
+ vld1.16 d0[2], [r1]!
275
+ b crypto_hash_IncompleteDone
276
+
277
+ crypto_hash_7left:
278
+ vshl.u64 d0, d0, #56
279
+ vld1.32 d0[0], [r1]!
280
+ vld1.16 d0[2], [r1]!
281
+ vld1.8 d0[6], [r1]!
282
+
283
+ crypto_hash_IncompleteDone:
284
+ adr r3, crypto_hash_xorlastLaneTable
285
+ ldr pc, [r3, r2, LSL #2]
286
+
287
+ crypto_hash_xorlastLaneTable:
288
+ .long crypto_hash_xorLane1
289
+ .long crypto_hash_xorLane2
290
+ .long crypto_hash_xorLane3
291
+ .long crypto_hash_xorLane4
292
+ .long crypto_hash_xorLane5
293
+ .long crypto_hash_xorLane6
294
+ .long crypto_hash_xorLane7
295
+ .long crypto_hash_xorLane8
296
+ .long crypto_hash_xorLane9
297
+ .long crypto_hash_xorLane10
298
+ .long crypto_hash_xorLane11
299
+ .long crypto_hash_xorLane12
300
+ .long crypto_hash_xorLane13
301
+ .long crypto_hash_xorLane14
302
+ .long crypto_hash_xorLane15
303
+ .long crypto_hash_xorLane16
304
+ .long crypto_hash_xorLane17
305
+
306
+ crypto_hash_xorLane1:
307
+ veor.64 d7, d0
308
+ b crypto_hash_xorLastBitOfRate
309
+
310
+ crypto_hash_xorLane2:
311
+ veor.64 d12, d0
312
+ b crypto_hash_xorLastBitOfRate
313
+
314
+ crypto_hash_xorLane3:
315
+ veor.64 d17, d0
316
+ b crypto_hash_xorLastBitOfRate
317
+
318
+ crypto_hash_xorLane4:
319
+ veor.64 d22, d0
320
+ b crypto_hash_xorLastBitOfRate
321
+
322
+ crypto_hash_xorLane5:
323
+ veor.64 d27, d0
324
+ b crypto_hash_xorLastBitOfRate
325
+
326
+ crypto_hash_xorLane6:
327
+ veor.64 d8, d0
328
+ b crypto_hash_xorLastBitOfRate
329
+
330
+ crypto_hash_xorLane7:
331
+ veor.64 d13, d0
332
+ b crypto_hash_xorLastBitOfRate
333
+
334
+ crypto_hash_xorLane8:
335
+ veor.64 d18, d0
336
+ b crypto_hash_xorLastBitOfRate
337
+
338
+ crypto_hash_xorLane9:
339
+ veor.64 d23, d0
340
+ b crypto_hash_xorLastBitOfRate
341
+
342
+ crypto_hash_xorLane10:
343
+ veor.64 d28, d0
344
+ b crypto_hash_xorLastBitOfRate
345
+
346
+ crypto_hash_xorLane11:
347
+ veor.64 d9, d0
348
+ b crypto_hash_xorLastBitOfRate
349
+
350
+ crypto_hash_xorLane12:
351
+ veor.64 d14, d0
352
+ b crypto_hash_xorLastBitOfRate
353
+
354
+ crypto_hash_xorLane13:
355
+ veor.64 d19, d0
356
+ b crypto_hash_xorLastBitOfRate
357
+
358
+ crypto_hash_xorLane14:
359
+ veor.64 d24, d0
360
+ b crypto_hash_xorLastBitOfRate
361
+
362
+ crypto_hash_xorLane15:
363
+ veor.64 d29, d0
364
+ b crypto_hash_xorLastBitOfRate
365
+
366
+ crypto_hash_xorLane16:
367
+ veor.64 d10, d0
368
+ b crypto_hash_xorLastBitOfRate
369
+
370
+ crypto_hash_xorLane17:
371
+ veor.64 d15, d0
372
+
373
+
374
+ crypto_hash_xorLastBitOfRate:
375
+ vmov.i8 d3, #0xFF
376
+ vshl.u64 d3, d3, #63
377
+ veor.64 d15, d15, d3
378
+
379
+ vstm sp, { d7-d10 } @put 4 lanes back on stack (last one not modified)
380
+
381
+ veor.64 q4, q5
382
+ veor.64 d5, d8, d9
383
+ veor.64 d5, d5, d7
384
+
385
+ bl KeccakF_armv7a_neon_asm
386
+
387
+ .if crypto_hash_BYTES == cKeccakR_SizeInBytes
388
+ pld [r5] @//preload out data for write
389
+
390
+ vpop { d0-d1 }
391
+ vst1.64 d0, [r5]!
392
+ vst1.64 d12, [r5]!
393
+ vst1.64 d17, [r5]!
394
+ vst1.64 d22, [r5]!
395
+ vst1.64 d27, [r5]!
396
+
397
+ vpop { d2-d3 }
398
+ vst1.64 d1, [r5]!
399
+ vst1.64 d13, [r5]!
400
+ vst1.64 d18, [r5]!
401
+ vst1.64 d23, [r5]!
402
+ vst1.64 d28, [r5]!
403
+
404
+ vst1.64 d2, [r5]!
405
+ vst1.64 d14, [r5]!
406
+ vst1.64 d19, [r5]!
407
+ vst1.64 d24, [r5]!
408
+ vst1.64 d29, [r5]!
409
+
410
+ adds sp, sp, #8 @no need of last stacked lane in output
411
+ vst1.64 d3, [r5]!
412
+ vst1.64 d15, [r5]!
413
+ .else
414
+ @output fixed 256 bits
415
+ vpop { d0 }
416
+ vst1.64 d0, [r5]!
417
+ vst1.64 d12, [r5]!
418
+ adds sp, sp, #4*8 @no need of other 4 stacked lanes in output
419
+ vst1.64 d17, [r5]!
420
+ vst1.64 d22, [r5]!
421
+ .endif
422
+
423
+ vpop {q4-q7}
424
+ movs r0, #0
425
+ pop {r4-r6,pc}
426
+
427
+ @
428
+ .align 8
429
+