ring-native 0.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (261) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/Gemfile +3 -0
  4. data/README.md +22 -0
  5. data/Rakefile +1 -0
  6. data/ext/ring/extconf.rb +29 -0
  7. data/lib/ring/native.rb +8 -0
  8. data/lib/ring/native/version.rb +5 -0
  9. data/ring-native.gemspec +25 -0
  10. data/vendor/ring/BUILDING.md +40 -0
  11. data/vendor/ring/Cargo.toml +43 -0
  12. data/vendor/ring/LICENSE +185 -0
  13. data/vendor/ring/Makefile +35 -0
  14. data/vendor/ring/PORTING.md +163 -0
  15. data/vendor/ring/README.md +113 -0
  16. data/vendor/ring/STYLE.md +197 -0
  17. data/vendor/ring/appveyor.yml +27 -0
  18. data/vendor/ring/build.rs +108 -0
  19. data/vendor/ring/crypto/aes/aes.c +1142 -0
  20. data/vendor/ring/crypto/aes/aes_test.Windows.vcxproj +25 -0
  21. data/vendor/ring/crypto/aes/aes_test.cc +93 -0
  22. data/vendor/ring/crypto/aes/asm/aes-586.pl +2368 -0
  23. data/vendor/ring/crypto/aes/asm/aes-armv4.pl +1249 -0
  24. data/vendor/ring/crypto/aes/asm/aes-x86_64.pl +2246 -0
  25. data/vendor/ring/crypto/aes/asm/aesni-x86.pl +1318 -0
  26. data/vendor/ring/crypto/aes/asm/aesni-x86_64.pl +2084 -0
  27. data/vendor/ring/crypto/aes/asm/aesv8-armx.pl +675 -0
  28. data/vendor/ring/crypto/aes/asm/bsaes-armv7.pl +1364 -0
  29. data/vendor/ring/crypto/aes/asm/bsaes-x86_64.pl +1565 -0
  30. data/vendor/ring/crypto/aes/asm/vpaes-x86.pl +841 -0
  31. data/vendor/ring/crypto/aes/asm/vpaes-x86_64.pl +1116 -0
  32. data/vendor/ring/crypto/aes/internal.h +87 -0
  33. data/vendor/ring/crypto/aes/mode_wrappers.c +61 -0
  34. data/vendor/ring/crypto/bn/add.c +394 -0
  35. data/vendor/ring/crypto/bn/asm/armv4-mont.pl +694 -0
  36. data/vendor/ring/crypto/bn/asm/armv8-mont.pl +1503 -0
  37. data/vendor/ring/crypto/bn/asm/bn-586.pl +774 -0
  38. data/vendor/ring/crypto/bn/asm/co-586.pl +287 -0
  39. data/vendor/ring/crypto/bn/asm/rsaz-avx2.pl +1882 -0
  40. data/vendor/ring/crypto/bn/asm/x86-mont.pl +592 -0
  41. data/vendor/ring/crypto/bn/asm/x86_64-gcc.c +599 -0
  42. data/vendor/ring/crypto/bn/asm/x86_64-mont.pl +1393 -0
  43. data/vendor/ring/crypto/bn/asm/x86_64-mont5.pl +3507 -0
  44. data/vendor/ring/crypto/bn/bn.c +352 -0
  45. data/vendor/ring/crypto/bn/bn_asn1.c +74 -0
  46. data/vendor/ring/crypto/bn/bn_test.Windows.vcxproj +25 -0
  47. data/vendor/ring/crypto/bn/bn_test.cc +1696 -0
  48. data/vendor/ring/crypto/bn/cmp.c +200 -0
  49. data/vendor/ring/crypto/bn/convert.c +433 -0
  50. data/vendor/ring/crypto/bn/ctx.c +311 -0
  51. data/vendor/ring/crypto/bn/div.c +594 -0
  52. data/vendor/ring/crypto/bn/exponentiation.c +1335 -0
  53. data/vendor/ring/crypto/bn/gcd.c +711 -0
  54. data/vendor/ring/crypto/bn/generic.c +1019 -0
  55. data/vendor/ring/crypto/bn/internal.h +316 -0
  56. data/vendor/ring/crypto/bn/montgomery.c +516 -0
  57. data/vendor/ring/crypto/bn/mul.c +888 -0
  58. data/vendor/ring/crypto/bn/prime.c +829 -0
  59. data/vendor/ring/crypto/bn/random.c +334 -0
  60. data/vendor/ring/crypto/bn/rsaz_exp.c +262 -0
  61. data/vendor/ring/crypto/bn/rsaz_exp.h +53 -0
  62. data/vendor/ring/crypto/bn/shift.c +276 -0
  63. data/vendor/ring/crypto/bytestring/bytestring_test.Windows.vcxproj +25 -0
  64. data/vendor/ring/crypto/bytestring/bytestring_test.cc +421 -0
  65. data/vendor/ring/crypto/bytestring/cbb.c +399 -0
  66. data/vendor/ring/crypto/bytestring/cbs.c +227 -0
  67. data/vendor/ring/crypto/bytestring/internal.h +46 -0
  68. data/vendor/ring/crypto/chacha/chacha_generic.c +140 -0
  69. data/vendor/ring/crypto/chacha/chacha_vec.c +323 -0
  70. data/vendor/ring/crypto/chacha/chacha_vec_arm.S +1447 -0
  71. data/vendor/ring/crypto/chacha/chacha_vec_arm_generate.go +153 -0
  72. data/vendor/ring/crypto/cipher/cipher_test.Windows.vcxproj +25 -0
  73. data/vendor/ring/crypto/cipher/e_aes.c +390 -0
  74. data/vendor/ring/crypto/cipher/e_chacha20poly1305.c +208 -0
  75. data/vendor/ring/crypto/cipher/internal.h +173 -0
  76. data/vendor/ring/crypto/cipher/test/aes_128_gcm_tests.txt +543 -0
  77. data/vendor/ring/crypto/cipher/test/aes_128_key_wrap_tests.txt +9 -0
  78. data/vendor/ring/crypto/cipher/test/aes_256_gcm_tests.txt +475 -0
  79. data/vendor/ring/crypto/cipher/test/aes_256_key_wrap_tests.txt +23 -0
  80. data/vendor/ring/crypto/cipher/test/chacha20_poly1305_old_tests.txt +422 -0
  81. data/vendor/ring/crypto/cipher/test/chacha20_poly1305_tests.txt +484 -0
  82. data/vendor/ring/crypto/cipher/test/cipher_test.txt +100 -0
  83. data/vendor/ring/crypto/constant_time_test.Windows.vcxproj +25 -0
  84. data/vendor/ring/crypto/constant_time_test.c +304 -0
  85. data/vendor/ring/crypto/cpu-arm-asm.S +32 -0
  86. data/vendor/ring/crypto/cpu-arm.c +199 -0
  87. data/vendor/ring/crypto/cpu-intel.c +261 -0
  88. data/vendor/ring/crypto/crypto.c +151 -0
  89. data/vendor/ring/crypto/curve25519/asm/x25519-arm.S +2118 -0
  90. data/vendor/ring/crypto/curve25519/curve25519.c +4888 -0
  91. data/vendor/ring/crypto/curve25519/x25519_test.cc +128 -0
  92. data/vendor/ring/crypto/digest/md32_common.h +181 -0
  93. data/vendor/ring/crypto/ec/asm/p256-x86_64-asm.pl +2725 -0
  94. data/vendor/ring/crypto/ec/ec.c +193 -0
  95. data/vendor/ring/crypto/ec/ec_curves.c +61 -0
  96. data/vendor/ring/crypto/ec/ec_key.c +228 -0
  97. data/vendor/ring/crypto/ec/ec_montgomery.c +114 -0
  98. data/vendor/ring/crypto/ec/example_mul.Windows.vcxproj +25 -0
  99. data/vendor/ring/crypto/ec/internal.h +243 -0
  100. data/vendor/ring/crypto/ec/oct.c +253 -0
  101. data/vendor/ring/crypto/ec/p256-64.c +1794 -0
  102. data/vendor/ring/crypto/ec/p256-x86_64-table.h +9548 -0
  103. data/vendor/ring/crypto/ec/p256-x86_64.c +509 -0
  104. data/vendor/ring/crypto/ec/simple.c +1007 -0
  105. data/vendor/ring/crypto/ec/util-64.c +183 -0
  106. data/vendor/ring/crypto/ec/wnaf.c +508 -0
  107. data/vendor/ring/crypto/ecdh/ecdh.c +155 -0
  108. data/vendor/ring/crypto/ecdsa/ecdsa.c +304 -0
  109. data/vendor/ring/crypto/ecdsa/ecdsa_asn1.c +193 -0
  110. data/vendor/ring/crypto/ecdsa/ecdsa_test.Windows.vcxproj +25 -0
  111. data/vendor/ring/crypto/ecdsa/ecdsa_test.cc +327 -0
  112. data/vendor/ring/crypto/header_removed.h +17 -0
  113. data/vendor/ring/crypto/internal.h +495 -0
  114. data/vendor/ring/crypto/libring.Windows.vcxproj +101 -0
  115. data/vendor/ring/crypto/mem.c +98 -0
  116. data/vendor/ring/crypto/modes/asm/aesni-gcm-x86_64.pl +1045 -0
  117. data/vendor/ring/crypto/modes/asm/ghash-armv4.pl +517 -0
  118. data/vendor/ring/crypto/modes/asm/ghash-x86.pl +1393 -0
  119. data/vendor/ring/crypto/modes/asm/ghash-x86_64.pl +1741 -0
  120. data/vendor/ring/crypto/modes/asm/ghashv8-armx.pl +422 -0
  121. data/vendor/ring/crypto/modes/ctr.c +226 -0
  122. data/vendor/ring/crypto/modes/gcm.c +1206 -0
  123. data/vendor/ring/crypto/modes/gcm_test.Windows.vcxproj +25 -0
  124. data/vendor/ring/crypto/modes/gcm_test.c +348 -0
  125. data/vendor/ring/crypto/modes/internal.h +299 -0
  126. data/vendor/ring/crypto/perlasm/arm-xlate.pl +170 -0
  127. data/vendor/ring/crypto/perlasm/readme +100 -0
  128. data/vendor/ring/crypto/perlasm/x86_64-xlate.pl +1164 -0
  129. data/vendor/ring/crypto/perlasm/x86asm.pl +292 -0
  130. data/vendor/ring/crypto/perlasm/x86gas.pl +263 -0
  131. data/vendor/ring/crypto/perlasm/x86masm.pl +200 -0
  132. data/vendor/ring/crypto/perlasm/x86nasm.pl +187 -0
  133. data/vendor/ring/crypto/poly1305/poly1305.c +331 -0
  134. data/vendor/ring/crypto/poly1305/poly1305_arm.c +301 -0
  135. data/vendor/ring/crypto/poly1305/poly1305_arm_asm.S +2015 -0
  136. data/vendor/ring/crypto/poly1305/poly1305_test.Windows.vcxproj +25 -0
  137. data/vendor/ring/crypto/poly1305/poly1305_test.cc +80 -0
  138. data/vendor/ring/crypto/poly1305/poly1305_test.txt +52 -0
  139. data/vendor/ring/crypto/poly1305/poly1305_vec.c +892 -0
  140. data/vendor/ring/crypto/rand/asm/rdrand-x86_64.pl +75 -0
  141. data/vendor/ring/crypto/rand/internal.h +32 -0
  142. data/vendor/ring/crypto/rand/rand.c +189 -0
  143. data/vendor/ring/crypto/rand/urandom.c +219 -0
  144. data/vendor/ring/crypto/rand/windows.c +56 -0
  145. data/vendor/ring/crypto/refcount_c11.c +66 -0
  146. data/vendor/ring/crypto/refcount_lock.c +53 -0
  147. data/vendor/ring/crypto/refcount_test.Windows.vcxproj +25 -0
  148. data/vendor/ring/crypto/refcount_test.c +58 -0
  149. data/vendor/ring/crypto/rsa/blinding.c +462 -0
  150. data/vendor/ring/crypto/rsa/internal.h +108 -0
  151. data/vendor/ring/crypto/rsa/padding.c +300 -0
  152. data/vendor/ring/crypto/rsa/rsa.c +450 -0
  153. data/vendor/ring/crypto/rsa/rsa_asn1.c +261 -0
  154. data/vendor/ring/crypto/rsa/rsa_impl.c +944 -0
  155. data/vendor/ring/crypto/rsa/rsa_test.Windows.vcxproj +25 -0
  156. data/vendor/ring/crypto/rsa/rsa_test.cc +437 -0
  157. data/vendor/ring/crypto/sha/asm/sha-armv8.pl +436 -0
  158. data/vendor/ring/crypto/sha/asm/sha-x86_64.pl +2390 -0
  159. data/vendor/ring/crypto/sha/asm/sha256-586.pl +1275 -0
  160. data/vendor/ring/crypto/sha/asm/sha256-armv4.pl +735 -0
  161. data/vendor/ring/crypto/sha/asm/sha256-armv8.pl +14 -0
  162. data/vendor/ring/crypto/sha/asm/sha256-x86_64.pl +14 -0
  163. data/vendor/ring/crypto/sha/asm/sha512-586.pl +911 -0
  164. data/vendor/ring/crypto/sha/asm/sha512-armv4.pl +666 -0
  165. data/vendor/ring/crypto/sha/asm/sha512-armv8.pl +14 -0
  166. data/vendor/ring/crypto/sha/asm/sha512-x86_64.pl +14 -0
  167. data/vendor/ring/crypto/sha/sha1.c +271 -0
  168. data/vendor/ring/crypto/sha/sha256.c +204 -0
  169. data/vendor/ring/crypto/sha/sha512.c +355 -0
  170. data/vendor/ring/crypto/test/file_test.cc +326 -0
  171. data/vendor/ring/crypto/test/file_test.h +181 -0
  172. data/vendor/ring/crypto/test/malloc.cc +150 -0
  173. data/vendor/ring/crypto/test/scoped_types.h +95 -0
  174. data/vendor/ring/crypto/test/test.Windows.vcxproj +35 -0
  175. data/vendor/ring/crypto/test/test_util.cc +46 -0
  176. data/vendor/ring/crypto/test/test_util.h +41 -0
  177. data/vendor/ring/crypto/thread_none.c +55 -0
  178. data/vendor/ring/crypto/thread_pthread.c +165 -0
  179. data/vendor/ring/crypto/thread_test.Windows.vcxproj +25 -0
  180. data/vendor/ring/crypto/thread_test.c +200 -0
  181. data/vendor/ring/crypto/thread_win.c +282 -0
  182. data/vendor/ring/examples/checkdigest.rs +103 -0
  183. data/vendor/ring/include/openssl/aes.h +121 -0
  184. data/vendor/ring/include/openssl/arm_arch.h +129 -0
  185. data/vendor/ring/include/openssl/base.h +156 -0
  186. data/vendor/ring/include/openssl/bn.h +794 -0
  187. data/vendor/ring/include/openssl/buffer.h +18 -0
  188. data/vendor/ring/include/openssl/bytestring.h +235 -0
  189. data/vendor/ring/include/openssl/chacha.h +37 -0
  190. data/vendor/ring/include/openssl/cmac.h +76 -0
  191. data/vendor/ring/include/openssl/cpu.h +184 -0
  192. data/vendor/ring/include/openssl/crypto.h +43 -0
  193. data/vendor/ring/include/openssl/curve25519.h +88 -0
  194. data/vendor/ring/include/openssl/ec.h +225 -0
  195. data/vendor/ring/include/openssl/ec_key.h +129 -0
  196. data/vendor/ring/include/openssl/ecdh.h +110 -0
  197. data/vendor/ring/include/openssl/ecdsa.h +156 -0
  198. data/vendor/ring/include/openssl/err.h +201 -0
  199. data/vendor/ring/include/openssl/mem.h +101 -0
  200. data/vendor/ring/include/openssl/obj_mac.h +71 -0
  201. data/vendor/ring/include/openssl/opensslfeatures.h +68 -0
  202. data/vendor/ring/include/openssl/opensslv.h +18 -0
  203. data/vendor/ring/include/openssl/ossl_typ.h +18 -0
  204. data/vendor/ring/include/openssl/poly1305.h +51 -0
  205. data/vendor/ring/include/openssl/rand.h +70 -0
  206. data/vendor/ring/include/openssl/rsa.h +399 -0
  207. data/vendor/ring/include/openssl/thread.h +133 -0
  208. data/vendor/ring/include/openssl/type_check.h +71 -0
  209. data/vendor/ring/mk/Common.props +63 -0
  210. data/vendor/ring/mk/Windows.props +42 -0
  211. data/vendor/ring/mk/WindowsTest.props +18 -0
  212. data/vendor/ring/mk/appveyor.bat +62 -0
  213. data/vendor/ring/mk/bottom_of_makefile.mk +54 -0
  214. data/vendor/ring/mk/ring.mk +266 -0
  215. data/vendor/ring/mk/top_of_makefile.mk +214 -0
  216. data/vendor/ring/mk/travis.sh +40 -0
  217. data/vendor/ring/mk/update-travis-yml.py +229 -0
  218. data/vendor/ring/ring.sln +153 -0
  219. data/vendor/ring/src/aead.rs +682 -0
  220. data/vendor/ring/src/agreement.rs +248 -0
  221. data/vendor/ring/src/c.rs +129 -0
  222. data/vendor/ring/src/constant_time.rs +37 -0
  223. data/vendor/ring/src/der.rs +96 -0
  224. data/vendor/ring/src/digest.rs +690 -0
  225. data/vendor/ring/src/digest_tests.txt +57 -0
  226. data/vendor/ring/src/ecc.rs +28 -0
  227. data/vendor/ring/src/ecc_build.rs +279 -0
  228. data/vendor/ring/src/ecc_curves.rs +117 -0
  229. data/vendor/ring/src/ed25519_tests.txt +2579 -0
  230. data/vendor/ring/src/exe_tests.rs +46 -0
  231. data/vendor/ring/src/ffi.rs +29 -0
  232. data/vendor/ring/src/file_test.rs +187 -0
  233. data/vendor/ring/src/hkdf.rs +153 -0
  234. data/vendor/ring/src/hkdf_tests.txt +59 -0
  235. data/vendor/ring/src/hmac.rs +414 -0
  236. data/vendor/ring/src/hmac_tests.txt +97 -0
  237. data/vendor/ring/src/input.rs +312 -0
  238. data/vendor/ring/src/lib.rs +41 -0
  239. data/vendor/ring/src/pbkdf2.rs +265 -0
  240. data/vendor/ring/src/pbkdf2_tests.txt +113 -0
  241. data/vendor/ring/src/polyfill.rs +57 -0
  242. data/vendor/ring/src/rand.rs +28 -0
  243. data/vendor/ring/src/signature.rs +314 -0
  244. data/vendor/ring/third-party/NIST/README.md +9 -0
  245. data/vendor/ring/third-party/NIST/SHAVS/SHA1LongMsg.rsp +263 -0
  246. data/vendor/ring/third-party/NIST/SHAVS/SHA1Monte.rsp +309 -0
  247. data/vendor/ring/third-party/NIST/SHAVS/SHA1ShortMsg.rsp +267 -0
  248. data/vendor/ring/third-party/NIST/SHAVS/SHA224LongMsg.rsp +263 -0
  249. data/vendor/ring/third-party/NIST/SHAVS/SHA224Monte.rsp +309 -0
  250. data/vendor/ring/third-party/NIST/SHAVS/SHA224ShortMsg.rsp +267 -0
  251. data/vendor/ring/third-party/NIST/SHAVS/SHA256LongMsg.rsp +263 -0
  252. data/vendor/ring/third-party/NIST/SHAVS/SHA256Monte.rsp +309 -0
  253. data/vendor/ring/third-party/NIST/SHAVS/SHA256ShortMsg.rsp +267 -0
  254. data/vendor/ring/third-party/NIST/SHAVS/SHA384LongMsg.rsp +519 -0
  255. data/vendor/ring/third-party/NIST/SHAVS/SHA384Monte.rsp +309 -0
  256. data/vendor/ring/third-party/NIST/SHAVS/SHA384ShortMsg.rsp +523 -0
  257. data/vendor/ring/third-party/NIST/SHAVS/SHA512LongMsg.rsp +519 -0
  258. data/vendor/ring/third-party/NIST/SHAVS/SHA512Monte.rsp +309 -0
  259. data/vendor/ring/third-party/NIST/SHAVS/SHA512ShortMsg.rsp +523 -0
  260. data/vendor/ring/third-party/NIST/sha256sums.txt +1 -0
  261. metadata +333 -0
@@ -0,0 +1,666 @@
1
+ #!/usr/bin/env perl
2
+
3
+ # ====================================================================
4
+ # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5
+ # project. The module is, however, dual licensed under OpenSSL and
6
+ # CRYPTOGAMS licenses depending on where you obtain it. For further
7
+ # details see http://www.openssl.org/~appro/cryptogams/.
8
+ #
9
+ # Permission to use under GPL terms is granted.
10
+ # ====================================================================
11
+
12
+ # SHA512 block procedure for ARMv4. September 2007.
13
+
14
+ # This code is ~4.5 (four and a half) times faster than code generated
15
+ # by gcc 3.4 and it spends ~72 clock cycles per byte [on single-issue
16
+ # Xscale PXA250 core].
17
+ #
18
+ # July 2010.
19
+ #
20
+ # Rescheduling for dual-issue pipeline resulted in 6% improvement on
21
+ # Cortex A8 core and ~40 cycles per processed byte.
22
+
23
+ # February 2011.
24
+ #
25
+ # Profiler-assisted and platform-specific optimization resulted in 7%
26
+ # improvement on Coxtex A8 core and ~38 cycles per byte.
27
+
28
+ # March 2011.
29
+ #
30
+ # Add NEON implementation. On Cortex A8 it was measured to process
31
+ # one byte in 23.3 cycles or ~60% faster than integer-only code.
32
+
33
+ # August 2012.
34
+ #
35
+ # Improve NEON performance by 12% on Snapdragon S4. In absolute
36
+ # terms it's 22.6 cycles per byte, which is disappointing result.
37
+ # Technical writers asserted that 3-way S4 pipeline can sustain
38
+ # multiple NEON instructions per cycle, but dual NEON issue could
39
+ # not be observed, see http://www.openssl.org/~appro/Snapdragon-S4.html
40
+ # for further details. On side note Cortex-A15 processes one byte in
41
+ # 16 cycles.
42
+
43
+ # Byte order [in]dependence. =========================================
44
+ #
45
+ # Originally caller was expected to maintain specific *dword* order in
46
+ # h[0-7], namely with most significant dword at *lower* address, which
47
+ # was reflected in below two parameters as 0 and 4. Now caller is
48
+ # expected to maintain native byte order for whole 64-bit values.
49
+ $hi="HI";
50
+ $lo="LO";
51
+ # ====================================================================
52
+
53
+ $flavour = shift;
54
+ if ($flavour=~/^\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
55
+ else { while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} }
56
+
57
+ if ($flavour && $flavour ne "void") {
58
+ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
59
+ ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
60
+ ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
61
+ die "can't locate arm-xlate.pl";
62
+
63
+ open STDOUT,"| \"$^X\" $xlate $flavour $output";
64
+ } else {
65
+ open STDOUT,">$output";
66
+ }
67
+
68
+ $ctx="r0"; # parameter block
69
+ $inp="r1";
70
+ $len="r2";
71
+
72
+ $Tlo="r3";
73
+ $Thi="r4";
74
+ $Alo="r5";
75
+ $Ahi="r6";
76
+ $Elo="r7";
77
+ $Ehi="r8";
78
+ $t0="r9";
79
+ $t1="r10";
80
+ $t2="r11";
81
+ $t3="r12";
82
+ ############ r13 is stack pointer
83
+ $Ktbl="r14";
84
+ ############ r15 is program counter
85
+
86
+ $Aoff=8*0;
87
+ $Boff=8*1;
88
+ $Coff=8*2;
89
+ $Doff=8*3;
90
+ $Eoff=8*4;
91
+ $Foff=8*5;
92
+ $Goff=8*6;
93
+ $Hoff=8*7;
94
+ $Xoff=8*8;
95
+
96
+ sub BODY_00_15() {
97
+ my $magic = shift;
98
+ $code.=<<___;
99
+ @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
100
+ @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
101
+ @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
102
+ mov $t0,$Elo,lsr#14
103
+ str $Tlo,[sp,#$Xoff+0]
104
+ mov $t1,$Ehi,lsr#14
105
+ str $Thi,[sp,#$Xoff+4]
106
+ eor $t0,$t0,$Ehi,lsl#18
107
+ ldr $t2,[sp,#$Hoff+0] @ h.lo
108
+ eor $t1,$t1,$Elo,lsl#18
109
+ ldr $t3,[sp,#$Hoff+4] @ h.hi
110
+ eor $t0,$t0,$Elo,lsr#18
111
+ eor $t1,$t1,$Ehi,lsr#18
112
+ eor $t0,$t0,$Ehi,lsl#14
113
+ eor $t1,$t1,$Elo,lsl#14
114
+ eor $t0,$t0,$Ehi,lsr#9
115
+ eor $t1,$t1,$Elo,lsr#9
116
+ eor $t0,$t0,$Elo,lsl#23
117
+ eor $t1,$t1,$Ehi,lsl#23 @ Sigma1(e)
118
+ adds $Tlo,$Tlo,$t0
119
+ ldr $t0,[sp,#$Foff+0] @ f.lo
120
+ adc $Thi,$Thi,$t1 @ T += Sigma1(e)
121
+ ldr $t1,[sp,#$Foff+4] @ f.hi
122
+ adds $Tlo,$Tlo,$t2
123
+ ldr $t2,[sp,#$Goff+0] @ g.lo
124
+ adc $Thi,$Thi,$t3 @ T += h
125
+ ldr $t3,[sp,#$Goff+4] @ g.hi
126
+
127
+ eor $t0,$t0,$t2
128
+ str $Elo,[sp,#$Eoff+0]
129
+ eor $t1,$t1,$t3
130
+ str $Ehi,[sp,#$Eoff+4]
131
+ and $t0,$t0,$Elo
132
+ str $Alo,[sp,#$Aoff+0]
133
+ and $t1,$t1,$Ehi
134
+ str $Ahi,[sp,#$Aoff+4]
135
+ eor $t0,$t0,$t2
136
+ ldr $t2,[$Ktbl,#$lo] @ K[i].lo
137
+ eor $t1,$t1,$t3 @ Ch(e,f,g)
138
+ ldr $t3,[$Ktbl,#$hi] @ K[i].hi
139
+
140
+ adds $Tlo,$Tlo,$t0
141
+ ldr $Elo,[sp,#$Doff+0] @ d.lo
142
+ adc $Thi,$Thi,$t1 @ T += Ch(e,f,g)
143
+ ldr $Ehi,[sp,#$Doff+4] @ d.hi
144
+ adds $Tlo,$Tlo,$t2
145
+ and $t0,$t2,#0xff
146
+ adc $Thi,$Thi,$t3 @ T += K[i]
147
+ adds $Elo,$Elo,$Tlo
148
+ ldr $t2,[sp,#$Boff+0] @ b.lo
149
+ adc $Ehi,$Ehi,$Thi @ d += T
150
+ teq $t0,#$magic
151
+
152
+ ldr $t3,[sp,#$Coff+0] @ c.lo
153
+ #if __ARM_ARCH__>=7
154
+ it eq @ Thumb2 thing, sanity check in ARM
155
+ #endif
156
+ orreq $Ktbl,$Ktbl,#1
157
+ @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
158
+ @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
159
+ @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
160
+ mov $t0,$Alo,lsr#28
161
+ mov $t1,$Ahi,lsr#28
162
+ eor $t0,$t0,$Ahi,lsl#4
163
+ eor $t1,$t1,$Alo,lsl#4
164
+ eor $t0,$t0,$Ahi,lsr#2
165
+ eor $t1,$t1,$Alo,lsr#2
166
+ eor $t0,$t0,$Alo,lsl#30
167
+ eor $t1,$t1,$Ahi,lsl#30
168
+ eor $t0,$t0,$Ahi,lsr#7
169
+ eor $t1,$t1,$Alo,lsr#7
170
+ eor $t0,$t0,$Alo,lsl#25
171
+ eor $t1,$t1,$Ahi,lsl#25 @ Sigma0(a)
172
+ adds $Tlo,$Tlo,$t0
173
+ and $t0,$Alo,$t2
174
+ adc $Thi,$Thi,$t1 @ T += Sigma0(a)
175
+
176
+ ldr $t1,[sp,#$Boff+4] @ b.hi
177
+ orr $Alo,$Alo,$t2
178
+ ldr $t2,[sp,#$Coff+4] @ c.hi
179
+ and $Alo,$Alo,$t3
180
+ and $t3,$Ahi,$t1
181
+ orr $Ahi,$Ahi,$t1
182
+ orr $Alo,$Alo,$t0 @ Maj(a,b,c).lo
183
+ and $Ahi,$Ahi,$t2
184
+ adds $Alo,$Alo,$Tlo
185
+ orr $Ahi,$Ahi,$t3 @ Maj(a,b,c).hi
186
+ sub sp,sp,#8
187
+ adc $Ahi,$Ahi,$Thi @ h += T
188
+ tst $Ktbl,#1
189
+ add $Ktbl,$Ktbl,#8
190
+ ___
191
+ }
192
+ $code=<<___;
193
+ #ifndef __KERNEL__
194
+ # include <openssl/arm_arch.h>
195
+ # define VFP_ABI_PUSH vstmdb sp!,{d8-d15}
196
+ # define VFP_ABI_POP vldmia sp!,{d8-d15}
197
+ #else
198
+ # define __ARM_ARCH__ __LINUX_ARM_ARCH__
199
+ # define __ARM_MAX_ARCH__ 7
200
+ # define VFP_ABI_PUSH
201
+ # define VFP_ABI_POP
202
+ #endif
203
+
204
+ #ifdef __ARMEL__
205
+ # define LO 0
206
+ # define HI 4
207
+ # define WORD64(hi0,lo0,hi1,lo1) .word lo0,hi0, lo1,hi1
208
+ #else
209
+ # define HI 0
210
+ # define LO 4
211
+ # define WORD64(hi0,lo0,hi1,lo1) .word hi0,lo0, hi1,lo1
212
+ #endif
213
+
214
+ .text
215
+ #if __ARM_ARCH__<7 || defined(__APPLE__)
216
+ .code 32
217
+ #else
218
+ .syntax unified
219
+ # ifdef __thumb2__
220
+ # define adrl adr
221
+ .thumb
222
+ # else
223
+ .code 32
224
+ # endif
225
+ #endif
226
+
227
+ .type K512,%object
228
+ .align 5
229
+ K512:
230
+ WORD64(0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd)
231
+ WORD64(0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc)
232
+ WORD64(0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019)
233
+ WORD64(0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118)
234
+ WORD64(0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe)
235
+ WORD64(0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2)
236
+ WORD64(0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1)
237
+ WORD64(0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694)
238
+ WORD64(0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3)
239
+ WORD64(0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65)
240
+ WORD64(0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483)
241
+ WORD64(0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5)
242
+ WORD64(0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210)
243
+ WORD64(0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4)
244
+ WORD64(0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725)
245
+ WORD64(0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70)
246
+ WORD64(0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926)
247
+ WORD64(0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df)
248
+ WORD64(0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8)
249
+ WORD64(0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b)
250
+ WORD64(0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001)
251
+ WORD64(0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30)
252
+ WORD64(0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910)
253
+ WORD64(0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8)
254
+ WORD64(0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53)
255
+ WORD64(0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8)
256
+ WORD64(0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb)
257
+ WORD64(0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3)
258
+ WORD64(0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60)
259
+ WORD64(0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec)
260
+ WORD64(0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9)
261
+ WORD64(0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b)
262
+ WORD64(0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207)
263
+ WORD64(0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178)
264
+ WORD64(0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6)
265
+ WORD64(0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b)
266
+ WORD64(0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493)
267
+ WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c)
268
+ WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a)
269
+ WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
270
+ .size K512,.-K512
271
+ #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
272
+ .LOPENSSL_armcap:
273
+ .word OPENSSL_armcap_P-.Lsha512_block_data_order
274
+ .skip 32-4
275
+ #else
276
+ .skip 32
277
+ #endif
278
+
279
+ .global sha512_block_data_order
280
+ .type sha512_block_data_order,%function
281
+ sha512_block_data_order:
282
+ .Lsha512_block_data_order:
283
+ #if __ARM_ARCH__<7
284
+ sub r3,pc,#8 @ sha512_block_data_order
285
+ #else
286
+ adr r3,sha512_block_data_order
287
+ #endif
288
+ #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
289
+ ldr r12,.LOPENSSL_armcap
290
+ ldr r12,[r3,r12] @ OPENSSL_armcap_P
291
+ #ifdef __APPLE__
292
+ ldr r12,[r12]
293
+ #endif
294
+ tst r12,#1
295
+ bne .LNEON
296
+ #endif
297
+ add $len,$inp,$len,lsl#7 @ len to point at the end of inp
298
+ stmdb sp!,{r4-r12,lr}
299
+ sub $Ktbl,r3,#672 @ K512
300
+ sub sp,sp,#9*8
301
+
302
+ ldr $Elo,[$ctx,#$Eoff+$lo]
303
+ ldr $Ehi,[$ctx,#$Eoff+$hi]
304
+ ldr $t0, [$ctx,#$Goff+$lo]
305
+ ldr $t1, [$ctx,#$Goff+$hi]
306
+ ldr $t2, [$ctx,#$Hoff+$lo]
307
+ ldr $t3, [$ctx,#$Hoff+$hi]
308
+ .Loop:
309
+ str $t0, [sp,#$Goff+0]
310
+ str $t1, [sp,#$Goff+4]
311
+ str $t2, [sp,#$Hoff+0]
312
+ str $t3, [sp,#$Hoff+4]
313
+ ldr $Alo,[$ctx,#$Aoff+$lo]
314
+ ldr $Ahi,[$ctx,#$Aoff+$hi]
315
+ ldr $Tlo,[$ctx,#$Boff+$lo]
316
+ ldr $Thi,[$ctx,#$Boff+$hi]
317
+ ldr $t0, [$ctx,#$Coff+$lo]
318
+ ldr $t1, [$ctx,#$Coff+$hi]
319
+ ldr $t2, [$ctx,#$Doff+$lo]
320
+ ldr $t3, [$ctx,#$Doff+$hi]
321
+ str $Tlo,[sp,#$Boff+0]
322
+ str $Thi,[sp,#$Boff+4]
323
+ str $t0, [sp,#$Coff+0]
324
+ str $t1, [sp,#$Coff+4]
325
+ str $t2, [sp,#$Doff+0]
326
+ str $t3, [sp,#$Doff+4]
327
+ ldr $Tlo,[$ctx,#$Foff+$lo]
328
+ ldr $Thi,[$ctx,#$Foff+$hi]
329
+ str $Tlo,[sp,#$Foff+0]
330
+ str $Thi,[sp,#$Foff+4]
331
+
332
+ .L00_15:
333
+ #if __ARM_ARCH__<7
334
+ ldrb $Tlo,[$inp,#7]
335
+ ldrb $t0, [$inp,#6]
336
+ ldrb $t1, [$inp,#5]
337
+ ldrb $t2, [$inp,#4]
338
+ ldrb $Thi,[$inp,#3]
339
+ ldrb $t3, [$inp,#2]
340
+ orr $Tlo,$Tlo,$t0,lsl#8
341
+ ldrb $t0, [$inp,#1]
342
+ orr $Tlo,$Tlo,$t1,lsl#16
343
+ ldrb $t1, [$inp],#8
344
+ orr $Tlo,$Tlo,$t2,lsl#24
345
+ orr $Thi,$Thi,$t3,lsl#8
346
+ orr $Thi,$Thi,$t0,lsl#16
347
+ orr $Thi,$Thi,$t1,lsl#24
348
+ #else
349
+ ldr $Tlo,[$inp,#4]
350
+ ldr $Thi,[$inp],#8
351
+ #ifdef __ARMEL__
352
+ rev $Tlo,$Tlo
353
+ rev $Thi,$Thi
354
+ #endif
355
+ #endif
356
+ ___
357
+ &BODY_00_15(0x94);
358
+ $code.=<<___;
359
+ tst $Ktbl,#1
360
+ beq .L00_15
361
+ ldr $t0,[sp,#`$Xoff+8*(16-1)`+0]
362
+ ldr $t1,[sp,#`$Xoff+8*(16-1)`+4]
363
+ bic $Ktbl,$Ktbl,#1
364
+ .L16_79:
365
+ @ sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
366
+ @ LO lo>>1^hi<<31 ^ lo>>8^hi<<24 ^ lo>>7^hi<<25
367
+ @ HI hi>>1^lo<<31 ^ hi>>8^lo<<24 ^ hi>>7
368
+ mov $Tlo,$t0,lsr#1
369
+ ldr $t2,[sp,#`$Xoff+8*(16-14)`+0]
370
+ mov $Thi,$t1,lsr#1
371
+ ldr $t3,[sp,#`$Xoff+8*(16-14)`+4]
372
+ eor $Tlo,$Tlo,$t1,lsl#31
373
+ eor $Thi,$Thi,$t0,lsl#31
374
+ eor $Tlo,$Tlo,$t0,lsr#8
375
+ eor $Thi,$Thi,$t1,lsr#8
376
+ eor $Tlo,$Tlo,$t1,lsl#24
377
+ eor $Thi,$Thi,$t0,lsl#24
378
+ eor $Tlo,$Tlo,$t0,lsr#7
379
+ eor $Thi,$Thi,$t1,lsr#7
380
+ eor $Tlo,$Tlo,$t1,lsl#25
381
+
382
+ @ sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
383
+ @ LO lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26
384
+ @ HI hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6
385
+ mov $t0,$t2,lsr#19
386
+ mov $t1,$t3,lsr#19
387
+ eor $t0,$t0,$t3,lsl#13
388
+ eor $t1,$t1,$t2,lsl#13
389
+ eor $t0,$t0,$t3,lsr#29
390
+ eor $t1,$t1,$t2,lsr#29
391
+ eor $t0,$t0,$t2,lsl#3
392
+ eor $t1,$t1,$t3,lsl#3
393
+ eor $t0,$t0,$t2,lsr#6
394
+ eor $t1,$t1,$t3,lsr#6
395
+ ldr $t2,[sp,#`$Xoff+8*(16-9)`+0]
396
+ eor $t0,$t0,$t3,lsl#26
397
+
398
+ ldr $t3,[sp,#`$Xoff+8*(16-9)`+4]
399
+ adds $Tlo,$Tlo,$t0
400
+ ldr $t0,[sp,#`$Xoff+8*16`+0]
401
+ adc $Thi,$Thi,$t1
402
+
403
+ ldr $t1,[sp,#`$Xoff+8*16`+4]
404
+ adds $Tlo,$Tlo,$t2
405
+ adc $Thi,$Thi,$t3
406
+ adds $Tlo,$Tlo,$t0
407
+ adc $Thi,$Thi,$t1
408
+ ___
409
+ &BODY_00_15(0x17);
410
+ $code.=<<___;
411
+ #if __ARM_ARCH__>=7
412
+ ittt eq @ Thumb2 thing, sanity check in ARM
413
+ #endif
414
+ ldreq $t0,[sp,#`$Xoff+8*(16-1)`+0]
415
+ ldreq $t1,[sp,#`$Xoff+8*(16-1)`+4]
416
+ beq .L16_79
417
+ bic $Ktbl,$Ktbl,#1
418
+
419
+ ldr $Tlo,[sp,#$Boff+0]
420
+ ldr $Thi,[sp,#$Boff+4]
421
+ ldr $t0, [$ctx,#$Aoff+$lo]
422
+ ldr $t1, [$ctx,#$Aoff+$hi]
423
+ ldr $t2, [$ctx,#$Boff+$lo]
424
+ ldr $t3, [$ctx,#$Boff+$hi]
425
+ adds $t0,$Alo,$t0
426
+ str $t0, [$ctx,#$Aoff+$lo]
427
+ adc $t1,$Ahi,$t1
428
+ str $t1, [$ctx,#$Aoff+$hi]
429
+ adds $t2,$Tlo,$t2
430
+ str $t2, [$ctx,#$Boff+$lo]
431
+ adc $t3,$Thi,$t3
432
+ str $t3, [$ctx,#$Boff+$hi]
433
+
434
+ ldr $Alo,[sp,#$Coff+0]
435
+ ldr $Ahi,[sp,#$Coff+4]
436
+ ldr $Tlo,[sp,#$Doff+0]
437
+ ldr $Thi,[sp,#$Doff+4]
438
+ ldr $t0, [$ctx,#$Coff+$lo]
439
+ ldr $t1, [$ctx,#$Coff+$hi]
440
+ ldr $t2, [$ctx,#$Doff+$lo]
441
+ ldr $t3, [$ctx,#$Doff+$hi]
442
+ adds $t0,$Alo,$t0
443
+ str $t0, [$ctx,#$Coff+$lo]
444
+ adc $t1,$Ahi,$t1
445
+ str $t1, [$ctx,#$Coff+$hi]
446
+ adds $t2,$Tlo,$t2
447
+ str $t2, [$ctx,#$Doff+$lo]
448
+ adc $t3,$Thi,$t3
449
+ str $t3, [$ctx,#$Doff+$hi]
450
+
451
+ ldr $Tlo,[sp,#$Foff+0]
452
+ ldr $Thi,[sp,#$Foff+4]
453
+ ldr $t0, [$ctx,#$Eoff+$lo]
454
+ ldr $t1, [$ctx,#$Eoff+$hi]
455
+ ldr $t2, [$ctx,#$Foff+$lo]
456
+ ldr $t3, [$ctx,#$Foff+$hi]
457
+ adds $Elo,$Elo,$t0
458
+ str $Elo,[$ctx,#$Eoff+$lo]
459
+ adc $Ehi,$Ehi,$t1
460
+ str $Ehi,[$ctx,#$Eoff+$hi]
461
+ adds $t2,$Tlo,$t2
462
+ str $t2, [$ctx,#$Foff+$lo]
463
+ adc $t3,$Thi,$t3
464
+ str $t3, [$ctx,#$Foff+$hi]
465
+
466
+ ldr $Alo,[sp,#$Goff+0]
467
+ ldr $Ahi,[sp,#$Goff+4]
468
+ ldr $Tlo,[sp,#$Hoff+0]
469
+ ldr $Thi,[sp,#$Hoff+4]
470
+ ldr $t0, [$ctx,#$Goff+$lo]
471
+ ldr $t1, [$ctx,#$Goff+$hi]
472
+ ldr $t2, [$ctx,#$Hoff+$lo]
473
+ ldr $t3, [$ctx,#$Hoff+$hi]
474
+ adds $t0,$Alo,$t0
475
+ str $t0, [$ctx,#$Goff+$lo]
476
+ adc $t1,$Ahi,$t1
477
+ str $t1, [$ctx,#$Goff+$hi]
478
+ adds $t2,$Tlo,$t2
479
+ str $t2, [$ctx,#$Hoff+$lo]
480
+ adc $t3,$Thi,$t3
481
+ str $t3, [$ctx,#$Hoff+$hi]
482
+
483
+ add sp,sp,#640
484
+ sub $Ktbl,$Ktbl,#640
485
+
486
+ teq $inp,$len
487
+ bne .Loop
488
+
489
+ add sp,sp,#8*9 @ destroy frame
490
+ #if __ARM_ARCH__>=5
491
+ ldmia sp!,{r4-r12,pc}
492
+ #else
493
+ ldmia sp!,{r4-r12,lr}
494
+ tst lr,#1
495
+ moveq pc,lr @ be binary compatible with V4, yet
496
+ bx lr @ interoperable with Thumb ISA:-)
497
+ #endif
498
+ .size sha512_block_data_order,.-sha512_block_data_order
499
+ ___
500
+
501
+ {
502
+ my @Sigma0=(28,34,39);
503
+ my @Sigma1=(14,18,41);
504
+ my @sigma0=(1, 8, 7);
505
+ my @sigma1=(19,61,6);
506
+
507
+ my $Ktbl="r3";
508
+ my $cnt="r12"; # volatile register known as ip, intra-procedure-call scratch
509
+
510
+ my @X=map("d$_",(0..15));
511
+ my @V=($A,$B,$C,$D,$E,$F,$G,$H)=map("d$_",(16..23));
512
+
513
+ sub NEON_00_15() {
514
+ my $i=shift;
515
+ my ($a,$b,$c,$d,$e,$f,$g,$h)=@_;
516
+ my ($t0,$t1,$t2,$T1,$K,$Ch,$Maj)=map("d$_",(24..31)); # temps
517
+
518
+ $code.=<<___ if ($i<16 || $i&1);
519
+ vshr.u64 $t0,$e,#@Sigma1[0] @ $i
520
+ #if $i<16
521
+ vld1.64 {@X[$i%16]},[$inp]! @ handles unaligned
522
+ #endif
523
+ vshr.u64 $t1,$e,#@Sigma1[1]
524
+ #if $i>0
525
+ vadd.i64 $a,$Maj @ h+=Maj from the past
526
+ #endif
527
+ vshr.u64 $t2,$e,#@Sigma1[2]
528
+ ___
529
+ $code.=<<___;
530
+ vld1.64 {$K},[$Ktbl,:64]! @ K[i++]
531
+ vsli.64 $t0,$e,#`64-@Sigma1[0]`
532
+ vsli.64 $t1,$e,#`64-@Sigma1[1]`
533
+ vmov $Ch,$e
534
+ vsli.64 $t2,$e,#`64-@Sigma1[2]`
535
+ #if $i<16 && defined(__ARMEL__)
536
+ vrev64.8 @X[$i],@X[$i]
537
+ #endif
538
+ veor $t1,$t0
539
+ vbsl $Ch,$f,$g @ Ch(e,f,g)
540
+ vshr.u64 $t0,$a,#@Sigma0[0]
541
+ veor $t2,$t1 @ Sigma1(e)
542
+ vadd.i64 $T1,$Ch,$h
543
+ vshr.u64 $t1,$a,#@Sigma0[1]
544
+ vsli.64 $t0,$a,#`64-@Sigma0[0]`
545
+ vadd.i64 $T1,$t2
546
+ vshr.u64 $t2,$a,#@Sigma0[2]
547
+ vadd.i64 $K,@X[$i%16]
548
+ vsli.64 $t1,$a,#`64-@Sigma0[1]`
549
+ veor $Maj,$a,$b
550
+ vsli.64 $t2,$a,#`64-@Sigma0[2]`
551
+ veor $h,$t0,$t1
552
+ vadd.i64 $T1,$K
553
+ vbsl $Maj,$c,$b @ Maj(a,b,c)
554
+ veor $h,$t2 @ Sigma0(a)
555
+ vadd.i64 $d,$T1
556
+ vadd.i64 $Maj,$T1
557
+ @ vadd.i64 $h,$Maj
558
+ ___
559
+ }
560
+
561
+ sub NEON_16_79() {
562
+ my $i=shift;
563
+
564
+ if ($i&1) { &NEON_00_15($i,@_); return; }
565
+
566
+ # 2x-vectorized, therefore runs every 2nd round
567
+ my @X=map("q$_",(0..7)); # view @X as 128-bit vector
568
+ my ($t0,$t1,$s0,$s1) = map("q$_",(12..15)); # temps
569
+ my ($d0,$d1,$d2) = map("d$_",(24..26)); # temps from NEON_00_15
570
+ my $e=@_[4]; # $e from NEON_00_15
571
+ $i /= 2;
572
+ $code.=<<___;
573
+ vshr.u64 $t0,@X[($i+7)%8],#@sigma1[0]
574
+ vshr.u64 $t1,@X[($i+7)%8],#@sigma1[1]
575
+ vadd.i64 @_[0],d30 @ h+=Maj from the past
576
+ vshr.u64 $s1,@X[($i+7)%8],#@sigma1[2]
577
+ vsli.64 $t0,@X[($i+7)%8],#`64-@sigma1[0]`
578
+ vext.8 $s0,@X[$i%8],@X[($i+1)%8],#8 @ X[i+1]
579
+ vsli.64 $t1,@X[($i+7)%8],#`64-@sigma1[1]`
580
+ veor $s1,$t0
581
+ vshr.u64 $t0,$s0,#@sigma0[0]
582
+ veor $s1,$t1 @ sigma1(X[i+14])
583
+ vshr.u64 $t1,$s0,#@sigma0[1]
584
+ vadd.i64 @X[$i%8],$s1
585
+ vshr.u64 $s1,$s0,#@sigma0[2]
586
+ vsli.64 $t0,$s0,#`64-@sigma0[0]`
587
+ vsli.64 $t1,$s0,#`64-@sigma0[1]`
588
+ vext.8 $s0,@X[($i+4)%8],@X[($i+5)%8],#8 @ X[i+9]
589
+ veor $s1,$t0
590
+ vshr.u64 $d0,$e,#@Sigma1[0] @ from NEON_00_15
591
+ vadd.i64 @X[$i%8],$s0
592
+ vshr.u64 $d1,$e,#@Sigma1[1] @ from NEON_00_15
593
+ veor $s1,$t1 @ sigma0(X[i+1])
594
+ vshr.u64 $d2,$e,#@Sigma1[2] @ from NEON_00_15
595
+ vadd.i64 @X[$i%8],$s1
596
+ ___
597
+ &NEON_00_15(2*$i,@_);
598
+ }
599
+
600
+ $code.=<<___;
601
+ #if __ARM_MAX_ARCH__>=7
602
+ .arch armv7-a
603
+ .fpu neon
604
+
605
+ .global sha512_block_data_order_neon
606
+ .type sha512_block_data_order_neon,%function
607
+ .align 4
608
+ sha512_block_data_order_neon:
609
+ .LNEON:
610
+ dmb @ errata #451034 on early Cortex A8
611
+ add $len,$inp,$len,lsl#7 @ len to point at the end of inp
612
+ adr $Ktbl,K512
613
+ VFP_ABI_PUSH
614
+ vldmia $ctx,{$A-$H} @ load context
615
+ .Loop_neon:
616
+ ___
617
+ for($i=0;$i<16;$i++) { &NEON_00_15($i,@V); unshift(@V,pop(@V)); }
618
+ $code.=<<___;
619
+ mov $cnt,#4
620
+ .L16_79_neon:
621
+ subs $cnt,#1
622
+ ___
623
+ for(;$i<32;$i++) { &NEON_16_79($i,@V); unshift(@V,pop(@V)); }
624
+ $code.=<<___;
625
+ bne .L16_79_neon
626
+
627
+ vadd.i64 $A,d30 @ h+=Maj from the past
628
+ vldmia $ctx,{d24-d31} @ load context to temp
629
+ vadd.i64 q8,q12 @ vectorized accumulate
630
+ vadd.i64 q9,q13
631
+ vadd.i64 q10,q14
632
+ vadd.i64 q11,q15
633
+ vstmia $ctx,{$A-$H} @ save context
634
+ teq $inp,$len
635
+ sub $Ktbl,#640 @ rewind K512
636
+ bne .Loop_neon
637
+
638
+ VFP_ABI_POP
639
+ ret @ bx lr
640
+ .size sha512_block_data_order_neon,.-sha512_block_data_order_neon
641
+ #endif
642
+ ___
643
+ }
644
+ $code.=<<___;
645
+ .asciz "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
646
+ .align 2
647
+ #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
648
+ .comm OPENSSL_armcap_P,4,4
649
+ .hidden OPENSSL_armcap_P
650
+ #endif
651
+ ___
652
+
653
+ $code =~ s/\`([^\`]*)\`/eval $1/gem;
654
+ $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4
655
+ $code =~ s/\bret\b/bx lr/gm;
656
+
657
+ open SELF,$0;
658
+ while(<SELF>) {
659
+ next if (/^#!/);
660
+ last if (!s/^#/@/ and !/^$/);
661
+ print;
662
+ }
663
+ close SELF;
664
+
665
+ print $code;
666
+ close STDOUT; # enforce flush