ring-native 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/Gemfile +3 -0
  4. data/README.md +22 -0
  5. data/Rakefile +1 -0
  6. data/ext/ring/extconf.rb +29 -0
  7. data/lib/ring/native.rb +8 -0
  8. data/lib/ring/native/version.rb +5 -0
  9. data/ring-native.gemspec +25 -0
  10. data/vendor/ring/BUILDING.md +40 -0
  11. data/vendor/ring/Cargo.toml +43 -0
  12. data/vendor/ring/LICENSE +185 -0
  13. data/vendor/ring/Makefile +35 -0
  14. data/vendor/ring/PORTING.md +163 -0
  15. data/vendor/ring/README.md +113 -0
  16. data/vendor/ring/STYLE.md +197 -0
  17. data/vendor/ring/appveyor.yml +27 -0
  18. data/vendor/ring/build.rs +108 -0
  19. data/vendor/ring/crypto/aes/aes.c +1142 -0
  20. data/vendor/ring/crypto/aes/aes_test.Windows.vcxproj +25 -0
  21. data/vendor/ring/crypto/aes/aes_test.cc +93 -0
  22. data/vendor/ring/crypto/aes/asm/aes-586.pl +2368 -0
  23. data/vendor/ring/crypto/aes/asm/aes-armv4.pl +1249 -0
  24. data/vendor/ring/crypto/aes/asm/aes-x86_64.pl +2246 -0
  25. data/vendor/ring/crypto/aes/asm/aesni-x86.pl +1318 -0
  26. data/vendor/ring/crypto/aes/asm/aesni-x86_64.pl +2084 -0
  27. data/vendor/ring/crypto/aes/asm/aesv8-armx.pl +675 -0
  28. data/vendor/ring/crypto/aes/asm/bsaes-armv7.pl +1364 -0
  29. data/vendor/ring/crypto/aes/asm/bsaes-x86_64.pl +1565 -0
  30. data/vendor/ring/crypto/aes/asm/vpaes-x86.pl +841 -0
  31. data/vendor/ring/crypto/aes/asm/vpaes-x86_64.pl +1116 -0
  32. data/vendor/ring/crypto/aes/internal.h +87 -0
  33. data/vendor/ring/crypto/aes/mode_wrappers.c +61 -0
  34. data/vendor/ring/crypto/bn/add.c +394 -0
  35. data/vendor/ring/crypto/bn/asm/armv4-mont.pl +694 -0
  36. data/vendor/ring/crypto/bn/asm/armv8-mont.pl +1503 -0
  37. data/vendor/ring/crypto/bn/asm/bn-586.pl +774 -0
  38. data/vendor/ring/crypto/bn/asm/co-586.pl +287 -0
  39. data/vendor/ring/crypto/bn/asm/rsaz-avx2.pl +1882 -0
  40. data/vendor/ring/crypto/bn/asm/x86-mont.pl +592 -0
  41. data/vendor/ring/crypto/bn/asm/x86_64-gcc.c +599 -0
  42. data/vendor/ring/crypto/bn/asm/x86_64-mont.pl +1393 -0
  43. data/vendor/ring/crypto/bn/asm/x86_64-mont5.pl +3507 -0
  44. data/vendor/ring/crypto/bn/bn.c +352 -0
  45. data/vendor/ring/crypto/bn/bn_asn1.c +74 -0
  46. data/vendor/ring/crypto/bn/bn_test.Windows.vcxproj +25 -0
  47. data/vendor/ring/crypto/bn/bn_test.cc +1696 -0
  48. data/vendor/ring/crypto/bn/cmp.c +200 -0
  49. data/vendor/ring/crypto/bn/convert.c +433 -0
  50. data/vendor/ring/crypto/bn/ctx.c +311 -0
  51. data/vendor/ring/crypto/bn/div.c +594 -0
  52. data/vendor/ring/crypto/bn/exponentiation.c +1335 -0
  53. data/vendor/ring/crypto/bn/gcd.c +711 -0
  54. data/vendor/ring/crypto/bn/generic.c +1019 -0
  55. data/vendor/ring/crypto/bn/internal.h +316 -0
  56. data/vendor/ring/crypto/bn/montgomery.c +516 -0
  57. data/vendor/ring/crypto/bn/mul.c +888 -0
  58. data/vendor/ring/crypto/bn/prime.c +829 -0
  59. data/vendor/ring/crypto/bn/random.c +334 -0
  60. data/vendor/ring/crypto/bn/rsaz_exp.c +262 -0
  61. data/vendor/ring/crypto/bn/rsaz_exp.h +53 -0
  62. data/vendor/ring/crypto/bn/shift.c +276 -0
  63. data/vendor/ring/crypto/bytestring/bytestring_test.Windows.vcxproj +25 -0
  64. data/vendor/ring/crypto/bytestring/bytestring_test.cc +421 -0
  65. data/vendor/ring/crypto/bytestring/cbb.c +399 -0
  66. data/vendor/ring/crypto/bytestring/cbs.c +227 -0
  67. data/vendor/ring/crypto/bytestring/internal.h +46 -0
  68. data/vendor/ring/crypto/chacha/chacha_generic.c +140 -0
  69. data/vendor/ring/crypto/chacha/chacha_vec.c +323 -0
  70. data/vendor/ring/crypto/chacha/chacha_vec_arm.S +1447 -0
  71. data/vendor/ring/crypto/chacha/chacha_vec_arm_generate.go +153 -0
  72. data/vendor/ring/crypto/cipher/cipher_test.Windows.vcxproj +25 -0
  73. data/vendor/ring/crypto/cipher/e_aes.c +390 -0
  74. data/vendor/ring/crypto/cipher/e_chacha20poly1305.c +208 -0
  75. data/vendor/ring/crypto/cipher/internal.h +173 -0
  76. data/vendor/ring/crypto/cipher/test/aes_128_gcm_tests.txt +543 -0
  77. data/vendor/ring/crypto/cipher/test/aes_128_key_wrap_tests.txt +9 -0
  78. data/vendor/ring/crypto/cipher/test/aes_256_gcm_tests.txt +475 -0
  79. data/vendor/ring/crypto/cipher/test/aes_256_key_wrap_tests.txt +23 -0
  80. data/vendor/ring/crypto/cipher/test/chacha20_poly1305_old_tests.txt +422 -0
  81. data/vendor/ring/crypto/cipher/test/chacha20_poly1305_tests.txt +484 -0
  82. data/vendor/ring/crypto/cipher/test/cipher_test.txt +100 -0
  83. data/vendor/ring/crypto/constant_time_test.Windows.vcxproj +25 -0
  84. data/vendor/ring/crypto/constant_time_test.c +304 -0
  85. data/vendor/ring/crypto/cpu-arm-asm.S +32 -0
  86. data/vendor/ring/crypto/cpu-arm.c +199 -0
  87. data/vendor/ring/crypto/cpu-intel.c +261 -0
  88. data/vendor/ring/crypto/crypto.c +151 -0
  89. data/vendor/ring/crypto/curve25519/asm/x25519-arm.S +2118 -0
  90. data/vendor/ring/crypto/curve25519/curve25519.c +4888 -0
  91. data/vendor/ring/crypto/curve25519/x25519_test.cc +128 -0
  92. data/vendor/ring/crypto/digest/md32_common.h +181 -0
  93. data/vendor/ring/crypto/ec/asm/p256-x86_64-asm.pl +2725 -0
  94. data/vendor/ring/crypto/ec/ec.c +193 -0
  95. data/vendor/ring/crypto/ec/ec_curves.c +61 -0
  96. data/vendor/ring/crypto/ec/ec_key.c +228 -0
  97. data/vendor/ring/crypto/ec/ec_montgomery.c +114 -0
  98. data/vendor/ring/crypto/ec/example_mul.Windows.vcxproj +25 -0
  99. data/vendor/ring/crypto/ec/internal.h +243 -0
  100. data/vendor/ring/crypto/ec/oct.c +253 -0
  101. data/vendor/ring/crypto/ec/p256-64.c +1794 -0
  102. data/vendor/ring/crypto/ec/p256-x86_64-table.h +9548 -0
  103. data/vendor/ring/crypto/ec/p256-x86_64.c +509 -0
  104. data/vendor/ring/crypto/ec/simple.c +1007 -0
  105. data/vendor/ring/crypto/ec/util-64.c +183 -0
  106. data/vendor/ring/crypto/ec/wnaf.c +508 -0
  107. data/vendor/ring/crypto/ecdh/ecdh.c +155 -0
  108. data/vendor/ring/crypto/ecdsa/ecdsa.c +304 -0
  109. data/vendor/ring/crypto/ecdsa/ecdsa_asn1.c +193 -0
  110. data/vendor/ring/crypto/ecdsa/ecdsa_test.Windows.vcxproj +25 -0
  111. data/vendor/ring/crypto/ecdsa/ecdsa_test.cc +327 -0
  112. data/vendor/ring/crypto/header_removed.h +17 -0
  113. data/vendor/ring/crypto/internal.h +495 -0
  114. data/vendor/ring/crypto/libring.Windows.vcxproj +101 -0
  115. data/vendor/ring/crypto/mem.c +98 -0
  116. data/vendor/ring/crypto/modes/asm/aesni-gcm-x86_64.pl +1045 -0
  117. data/vendor/ring/crypto/modes/asm/ghash-armv4.pl +517 -0
  118. data/vendor/ring/crypto/modes/asm/ghash-x86.pl +1393 -0
  119. data/vendor/ring/crypto/modes/asm/ghash-x86_64.pl +1741 -0
  120. data/vendor/ring/crypto/modes/asm/ghashv8-armx.pl +422 -0
  121. data/vendor/ring/crypto/modes/ctr.c +226 -0
  122. data/vendor/ring/crypto/modes/gcm.c +1206 -0
  123. data/vendor/ring/crypto/modes/gcm_test.Windows.vcxproj +25 -0
  124. data/vendor/ring/crypto/modes/gcm_test.c +348 -0
  125. data/vendor/ring/crypto/modes/internal.h +299 -0
  126. data/vendor/ring/crypto/perlasm/arm-xlate.pl +170 -0
  127. data/vendor/ring/crypto/perlasm/readme +100 -0
  128. data/vendor/ring/crypto/perlasm/x86_64-xlate.pl +1164 -0
  129. data/vendor/ring/crypto/perlasm/x86asm.pl +292 -0
  130. data/vendor/ring/crypto/perlasm/x86gas.pl +263 -0
  131. data/vendor/ring/crypto/perlasm/x86masm.pl +200 -0
  132. data/vendor/ring/crypto/perlasm/x86nasm.pl +187 -0
  133. data/vendor/ring/crypto/poly1305/poly1305.c +331 -0
  134. data/vendor/ring/crypto/poly1305/poly1305_arm.c +301 -0
  135. data/vendor/ring/crypto/poly1305/poly1305_arm_asm.S +2015 -0
  136. data/vendor/ring/crypto/poly1305/poly1305_test.Windows.vcxproj +25 -0
  137. data/vendor/ring/crypto/poly1305/poly1305_test.cc +80 -0
  138. data/vendor/ring/crypto/poly1305/poly1305_test.txt +52 -0
  139. data/vendor/ring/crypto/poly1305/poly1305_vec.c +892 -0
  140. data/vendor/ring/crypto/rand/asm/rdrand-x86_64.pl +75 -0
  141. data/vendor/ring/crypto/rand/internal.h +32 -0
  142. data/vendor/ring/crypto/rand/rand.c +189 -0
  143. data/vendor/ring/crypto/rand/urandom.c +219 -0
  144. data/vendor/ring/crypto/rand/windows.c +56 -0
  145. data/vendor/ring/crypto/refcount_c11.c +66 -0
  146. data/vendor/ring/crypto/refcount_lock.c +53 -0
  147. data/vendor/ring/crypto/refcount_test.Windows.vcxproj +25 -0
  148. data/vendor/ring/crypto/refcount_test.c +58 -0
  149. data/vendor/ring/crypto/rsa/blinding.c +462 -0
  150. data/vendor/ring/crypto/rsa/internal.h +108 -0
  151. data/vendor/ring/crypto/rsa/padding.c +300 -0
  152. data/vendor/ring/crypto/rsa/rsa.c +450 -0
  153. data/vendor/ring/crypto/rsa/rsa_asn1.c +261 -0
  154. data/vendor/ring/crypto/rsa/rsa_impl.c +944 -0
  155. data/vendor/ring/crypto/rsa/rsa_test.Windows.vcxproj +25 -0
  156. data/vendor/ring/crypto/rsa/rsa_test.cc +437 -0
  157. data/vendor/ring/crypto/sha/asm/sha-armv8.pl +436 -0
  158. data/vendor/ring/crypto/sha/asm/sha-x86_64.pl +2390 -0
  159. data/vendor/ring/crypto/sha/asm/sha256-586.pl +1275 -0
  160. data/vendor/ring/crypto/sha/asm/sha256-armv4.pl +735 -0
  161. data/vendor/ring/crypto/sha/asm/sha256-armv8.pl +14 -0
  162. data/vendor/ring/crypto/sha/asm/sha256-x86_64.pl +14 -0
  163. data/vendor/ring/crypto/sha/asm/sha512-586.pl +911 -0
  164. data/vendor/ring/crypto/sha/asm/sha512-armv4.pl +666 -0
  165. data/vendor/ring/crypto/sha/asm/sha512-armv8.pl +14 -0
  166. data/vendor/ring/crypto/sha/asm/sha512-x86_64.pl +14 -0
  167. data/vendor/ring/crypto/sha/sha1.c +271 -0
  168. data/vendor/ring/crypto/sha/sha256.c +204 -0
  169. data/vendor/ring/crypto/sha/sha512.c +355 -0
  170. data/vendor/ring/crypto/test/file_test.cc +326 -0
  171. data/vendor/ring/crypto/test/file_test.h +181 -0
  172. data/vendor/ring/crypto/test/malloc.cc +150 -0
  173. data/vendor/ring/crypto/test/scoped_types.h +95 -0
  174. data/vendor/ring/crypto/test/test.Windows.vcxproj +35 -0
  175. data/vendor/ring/crypto/test/test_util.cc +46 -0
  176. data/vendor/ring/crypto/test/test_util.h +41 -0
  177. data/vendor/ring/crypto/thread_none.c +55 -0
  178. data/vendor/ring/crypto/thread_pthread.c +165 -0
  179. data/vendor/ring/crypto/thread_test.Windows.vcxproj +25 -0
  180. data/vendor/ring/crypto/thread_test.c +200 -0
  181. data/vendor/ring/crypto/thread_win.c +282 -0
  182. data/vendor/ring/examples/checkdigest.rs +103 -0
  183. data/vendor/ring/include/openssl/aes.h +121 -0
  184. data/vendor/ring/include/openssl/arm_arch.h +129 -0
  185. data/vendor/ring/include/openssl/base.h +156 -0
  186. data/vendor/ring/include/openssl/bn.h +794 -0
  187. data/vendor/ring/include/openssl/buffer.h +18 -0
  188. data/vendor/ring/include/openssl/bytestring.h +235 -0
  189. data/vendor/ring/include/openssl/chacha.h +37 -0
  190. data/vendor/ring/include/openssl/cmac.h +76 -0
  191. data/vendor/ring/include/openssl/cpu.h +184 -0
  192. data/vendor/ring/include/openssl/crypto.h +43 -0
  193. data/vendor/ring/include/openssl/curve25519.h +88 -0
  194. data/vendor/ring/include/openssl/ec.h +225 -0
  195. data/vendor/ring/include/openssl/ec_key.h +129 -0
  196. data/vendor/ring/include/openssl/ecdh.h +110 -0
  197. data/vendor/ring/include/openssl/ecdsa.h +156 -0
  198. data/vendor/ring/include/openssl/err.h +201 -0
  199. data/vendor/ring/include/openssl/mem.h +101 -0
  200. data/vendor/ring/include/openssl/obj_mac.h +71 -0
  201. data/vendor/ring/include/openssl/opensslfeatures.h +68 -0
  202. data/vendor/ring/include/openssl/opensslv.h +18 -0
  203. data/vendor/ring/include/openssl/ossl_typ.h +18 -0
  204. data/vendor/ring/include/openssl/poly1305.h +51 -0
  205. data/vendor/ring/include/openssl/rand.h +70 -0
  206. data/vendor/ring/include/openssl/rsa.h +399 -0
  207. data/vendor/ring/include/openssl/thread.h +133 -0
  208. data/vendor/ring/include/openssl/type_check.h +71 -0
  209. data/vendor/ring/mk/Common.props +63 -0
  210. data/vendor/ring/mk/Windows.props +42 -0
  211. data/vendor/ring/mk/WindowsTest.props +18 -0
  212. data/vendor/ring/mk/appveyor.bat +62 -0
  213. data/vendor/ring/mk/bottom_of_makefile.mk +54 -0
  214. data/vendor/ring/mk/ring.mk +266 -0
  215. data/vendor/ring/mk/top_of_makefile.mk +214 -0
  216. data/vendor/ring/mk/travis.sh +40 -0
  217. data/vendor/ring/mk/update-travis-yml.py +229 -0
  218. data/vendor/ring/ring.sln +153 -0
  219. data/vendor/ring/src/aead.rs +682 -0
  220. data/vendor/ring/src/agreement.rs +248 -0
  221. data/vendor/ring/src/c.rs +129 -0
  222. data/vendor/ring/src/constant_time.rs +37 -0
  223. data/vendor/ring/src/der.rs +96 -0
  224. data/vendor/ring/src/digest.rs +690 -0
  225. data/vendor/ring/src/digest_tests.txt +57 -0
  226. data/vendor/ring/src/ecc.rs +28 -0
  227. data/vendor/ring/src/ecc_build.rs +279 -0
  228. data/vendor/ring/src/ecc_curves.rs +117 -0
  229. data/vendor/ring/src/ed25519_tests.txt +2579 -0
  230. data/vendor/ring/src/exe_tests.rs +46 -0
  231. data/vendor/ring/src/ffi.rs +29 -0
  232. data/vendor/ring/src/file_test.rs +187 -0
  233. data/vendor/ring/src/hkdf.rs +153 -0
  234. data/vendor/ring/src/hkdf_tests.txt +59 -0
  235. data/vendor/ring/src/hmac.rs +414 -0
  236. data/vendor/ring/src/hmac_tests.txt +97 -0
  237. data/vendor/ring/src/input.rs +312 -0
  238. data/vendor/ring/src/lib.rs +41 -0
  239. data/vendor/ring/src/pbkdf2.rs +265 -0
  240. data/vendor/ring/src/pbkdf2_tests.txt +113 -0
  241. data/vendor/ring/src/polyfill.rs +57 -0
  242. data/vendor/ring/src/rand.rs +28 -0
  243. data/vendor/ring/src/signature.rs +314 -0
  244. data/vendor/ring/third-party/NIST/README.md +9 -0
  245. data/vendor/ring/third-party/NIST/SHAVS/SHA1LongMsg.rsp +263 -0
  246. data/vendor/ring/third-party/NIST/SHAVS/SHA1Monte.rsp +309 -0
  247. data/vendor/ring/third-party/NIST/SHAVS/SHA1ShortMsg.rsp +267 -0
  248. data/vendor/ring/third-party/NIST/SHAVS/SHA224LongMsg.rsp +263 -0
  249. data/vendor/ring/third-party/NIST/SHAVS/SHA224Monte.rsp +309 -0
  250. data/vendor/ring/third-party/NIST/SHAVS/SHA224ShortMsg.rsp +267 -0
  251. data/vendor/ring/third-party/NIST/SHAVS/SHA256LongMsg.rsp +263 -0
  252. data/vendor/ring/third-party/NIST/SHAVS/SHA256Monte.rsp +309 -0
  253. data/vendor/ring/third-party/NIST/SHAVS/SHA256ShortMsg.rsp +267 -0
  254. data/vendor/ring/third-party/NIST/SHAVS/SHA384LongMsg.rsp +519 -0
  255. data/vendor/ring/third-party/NIST/SHAVS/SHA384Monte.rsp +309 -0
  256. data/vendor/ring/third-party/NIST/SHAVS/SHA384ShortMsg.rsp +523 -0
  257. data/vendor/ring/third-party/NIST/SHAVS/SHA512LongMsg.rsp +519 -0
  258. data/vendor/ring/third-party/NIST/SHAVS/SHA512Monte.rsp +309 -0
  259. data/vendor/ring/third-party/NIST/SHAVS/SHA512ShortMsg.rsp +523 -0
  260. data/vendor/ring/third-party/NIST/sha256sums.txt +1 -0
  261. metadata +333 -0
@@ -0,0 +1,666 @@
1
+ #!/usr/bin/env perl
2
+
3
+ # ====================================================================
4
+ # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5
+ # project. The module is, however, dual licensed under OpenSSL and
6
+ # CRYPTOGAMS licenses depending on where you obtain it. For further
7
+ # details see http://www.openssl.org/~appro/cryptogams/.
8
+ #
9
+ # Permission to use under GPL terms is granted.
10
+ # ====================================================================
11
+
12
+ # SHA512 block procedure for ARMv4. September 2007.
13
+
14
+ # This code is ~4.5 (four and a half) times faster than code generated
15
+ # by gcc 3.4 and it spends ~72 clock cycles per byte [on single-issue
16
+ # Xscale PXA250 core].
17
+ #
18
+ # July 2010.
19
+ #
20
+ # Rescheduling for dual-issue pipeline resulted in 6% improvement on
21
+ # Cortex A8 core and ~40 cycles per processed byte.
22
+
23
+ # February 2011.
24
+ #
25
+ # Profiler-assisted and platform-specific optimization resulted in 7%
26
+ # improvement on Coxtex A8 core and ~38 cycles per byte.
27
+
28
+ # March 2011.
29
+ #
30
+ # Add NEON implementation. On Cortex A8 it was measured to process
31
+ # one byte in 23.3 cycles or ~60% faster than integer-only code.
32
+
33
+ # August 2012.
34
+ #
35
+ # Improve NEON performance by 12% on Snapdragon S4. In absolute
36
+ # terms it's 22.6 cycles per byte, which is disappointing result.
37
+ # Technical writers asserted that 3-way S4 pipeline can sustain
38
+ # multiple NEON instructions per cycle, but dual NEON issue could
39
+ # not be observed, see http://www.openssl.org/~appro/Snapdragon-S4.html
40
+ # for further details. On side note Cortex-A15 processes one byte in
41
+ # 16 cycles.
42
+
43
+ # Byte order [in]dependence. =========================================
44
+ #
45
+ # Originally caller was expected to maintain specific *dword* order in
46
+ # h[0-7], namely with most significant dword at *lower* address, which
47
+ # was reflected in below two parameters as 0 and 4. Now caller is
48
+ # expected to maintain native byte order for whole 64-bit values.
49
+ $hi="HI";
50
+ $lo="LO";
51
+ # ====================================================================
52
+
53
+ $flavour = shift;
54
+ if ($flavour=~/^\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
55
+ else { while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} }
56
+
57
+ if ($flavour && $flavour ne "void") {
58
+ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
59
+ ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
60
+ ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
61
+ die "can't locate arm-xlate.pl";
62
+
63
+ open STDOUT,"| \"$^X\" $xlate $flavour $output";
64
+ } else {
65
+ open STDOUT,">$output";
66
+ }
67
+
68
+ $ctx="r0"; # parameter block
69
+ $inp="r1";
70
+ $len="r2";
71
+
72
+ $Tlo="r3";
73
+ $Thi="r4";
74
+ $Alo="r5";
75
+ $Ahi="r6";
76
+ $Elo="r7";
77
+ $Ehi="r8";
78
+ $t0="r9";
79
+ $t1="r10";
80
+ $t2="r11";
81
+ $t3="r12";
82
+ ############ r13 is stack pointer
83
+ $Ktbl="r14";
84
+ ############ r15 is program counter
85
+
86
+ $Aoff=8*0;
87
+ $Boff=8*1;
88
+ $Coff=8*2;
89
+ $Doff=8*3;
90
+ $Eoff=8*4;
91
+ $Foff=8*5;
92
+ $Goff=8*6;
93
+ $Hoff=8*7;
94
+ $Xoff=8*8;
95
+
96
+ sub BODY_00_15() {
97
+ my $magic = shift;
98
+ $code.=<<___;
99
+ @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
100
+ @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
101
+ @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
102
+ mov $t0,$Elo,lsr#14
103
+ str $Tlo,[sp,#$Xoff+0]
104
+ mov $t1,$Ehi,lsr#14
105
+ str $Thi,[sp,#$Xoff+4]
106
+ eor $t0,$t0,$Ehi,lsl#18
107
+ ldr $t2,[sp,#$Hoff+0] @ h.lo
108
+ eor $t1,$t1,$Elo,lsl#18
109
+ ldr $t3,[sp,#$Hoff+4] @ h.hi
110
+ eor $t0,$t0,$Elo,lsr#18
111
+ eor $t1,$t1,$Ehi,lsr#18
112
+ eor $t0,$t0,$Ehi,lsl#14
113
+ eor $t1,$t1,$Elo,lsl#14
114
+ eor $t0,$t0,$Ehi,lsr#9
115
+ eor $t1,$t1,$Elo,lsr#9
116
+ eor $t0,$t0,$Elo,lsl#23
117
+ eor $t1,$t1,$Ehi,lsl#23 @ Sigma1(e)
118
+ adds $Tlo,$Tlo,$t0
119
+ ldr $t0,[sp,#$Foff+0] @ f.lo
120
+ adc $Thi,$Thi,$t1 @ T += Sigma1(e)
121
+ ldr $t1,[sp,#$Foff+4] @ f.hi
122
+ adds $Tlo,$Tlo,$t2
123
+ ldr $t2,[sp,#$Goff+0] @ g.lo
124
+ adc $Thi,$Thi,$t3 @ T += h
125
+ ldr $t3,[sp,#$Goff+4] @ g.hi
126
+
127
+ eor $t0,$t0,$t2
128
+ str $Elo,[sp,#$Eoff+0]
129
+ eor $t1,$t1,$t3
130
+ str $Ehi,[sp,#$Eoff+4]
131
+ and $t0,$t0,$Elo
132
+ str $Alo,[sp,#$Aoff+0]
133
+ and $t1,$t1,$Ehi
134
+ str $Ahi,[sp,#$Aoff+4]
135
+ eor $t0,$t0,$t2
136
+ ldr $t2,[$Ktbl,#$lo] @ K[i].lo
137
+ eor $t1,$t1,$t3 @ Ch(e,f,g)
138
+ ldr $t3,[$Ktbl,#$hi] @ K[i].hi
139
+
140
+ adds $Tlo,$Tlo,$t0
141
+ ldr $Elo,[sp,#$Doff+0] @ d.lo
142
+ adc $Thi,$Thi,$t1 @ T += Ch(e,f,g)
143
+ ldr $Ehi,[sp,#$Doff+4] @ d.hi
144
+ adds $Tlo,$Tlo,$t2
145
+ and $t0,$t2,#0xff
146
+ adc $Thi,$Thi,$t3 @ T += K[i]
147
+ adds $Elo,$Elo,$Tlo
148
+ ldr $t2,[sp,#$Boff+0] @ b.lo
149
+ adc $Ehi,$Ehi,$Thi @ d += T
150
+ teq $t0,#$magic
151
+
152
+ ldr $t3,[sp,#$Coff+0] @ c.lo
153
+ #if __ARM_ARCH__>=7
154
+ it eq @ Thumb2 thing, sanity check in ARM
155
+ #endif
156
+ orreq $Ktbl,$Ktbl,#1
157
+ @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
158
+ @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
159
+ @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
160
+ mov $t0,$Alo,lsr#28
161
+ mov $t1,$Ahi,lsr#28
162
+ eor $t0,$t0,$Ahi,lsl#4
163
+ eor $t1,$t1,$Alo,lsl#4
164
+ eor $t0,$t0,$Ahi,lsr#2
165
+ eor $t1,$t1,$Alo,lsr#2
166
+ eor $t0,$t0,$Alo,lsl#30
167
+ eor $t1,$t1,$Ahi,lsl#30
168
+ eor $t0,$t0,$Ahi,lsr#7
169
+ eor $t1,$t1,$Alo,lsr#7
170
+ eor $t0,$t0,$Alo,lsl#25
171
+ eor $t1,$t1,$Ahi,lsl#25 @ Sigma0(a)
172
+ adds $Tlo,$Tlo,$t0
173
+ and $t0,$Alo,$t2
174
+ adc $Thi,$Thi,$t1 @ T += Sigma0(a)
175
+
176
+ ldr $t1,[sp,#$Boff+4] @ b.hi
177
+ orr $Alo,$Alo,$t2
178
+ ldr $t2,[sp,#$Coff+4] @ c.hi
179
+ and $Alo,$Alo,$t3
180
+ and $t3,$Ahi,$t1
181
+ orr $Ahi,$Ahi,$t1
182
+ orr $Alo,$Alo,$t0 @ Maj(a,b,c).lo
183
+ and $Ahi,$Ahi,$t2
184
+ adds $Alo,$Alo,$Tlo
185
+ orr $Ahi,$Ahi,$t3 @ Maj(a,b,c).hi
186
+ sub sp,sp,#8
187
+ adc $Ahi,$Ahi,$Thi @ h += T
188
+ tst $Ktbl,#1
189
+ add $Ktbl,$Ktbl,#8
190
+ ___
191
+ }
192
+ $code=<<___;
193
+ #ifndef __KERNEL__
194
+ # include <openssl/arm_arch.h>
195
+ # define VFP_ABI_PUSH vstmdb sp!,{d8-d15}
196
+ # define VFP_ABI_POP vldmia sp!,{d8-d15}
197
+ #else
198
+ # define __ARM_ARCH__ __LINUX_ARM_ARCH__
199
+ # define __ARM_MAX_ARCH__ 7
200
+ # define VFP_ABI_PUSH
201
+ # define VFP_ABI_POP
202
+ #endif
203
+
204
+ #ifdef __ARMEL__
205
+ # define LO 0
206
+ # define HI 4
207
+ # define WORD64(hi0,lo0,hi1,lo1) .word lo0,hi0, lo1,hi1
208
+ #else
209
+ # define HI 0
210
+ # define LO 4
211
+ # define WORD64(hi0,lo0,hi1,lo1) .word hi0,lo0, hi1,lo1
212
+ #endif
213
+
214
+ .text
215
+ #if __ARM_ARCH__<7 || defined(__APPLE__)
216
+ .code 32
217
+ #else
218
+ .syntax unified
219
+ # ifdef __thumb2__
220
+ # define adrl adr
221
+ .thumb
222
+ # else
223
+ .code 32
224
+ # endif
225
+ #endif
226
+
227
+ .type K512,%object
228
+ .align 5
229
+ K512:
230
+ WORD64(0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd)
231
+ WORD64(0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc)
232
+ WORD64(0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019)
233
+ WORD64(0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118)
234
+ WORD64(0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe)
235
+ WORD64(0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2)
236
+ WORD64(0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1)
237
+ WORD64(0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694)
238
+ WORD64(0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3)
239
+ WORD64(0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65)
240
+ WORD64(0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483)
241
+ WORD64(0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5)
242
+ WORD64(0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210)
243
+ WORD64(0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4)
244
+ WORD64(0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725)
245
+ WORD64(0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70)
246
+ WORD64(0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926)
247
+ WORD64(0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df)
248
+ WORD64(0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8)
249
+ WORD64(0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b)
250
+ WORD64(0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001)
251
+ WORD64(0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30)
252
+ WORD64(0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910)
253
+ WORD64(0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8)
254
+ WORD64(0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53)
255
+ WORD64(0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8)
256
+ WORD64(0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb)
257
+ WORD64(0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3)
258
+ WORD64(0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60)
259
+ WORD64(0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec)
260
+ WORD64(0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9)
261
+ WORD64(0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b)
262
+ WORD64(0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207)
263
+ WORD64(0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178)
264
+ WORD64(0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6)
265
+ WORD64(0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b)
266
+ WORD64(0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493)
267
+ WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c)
268
+ WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a)
269
+ WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
270
+ .size K512,.-K512
271
+ #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
272
+ .LOPENSSL_armcap:
273
+ .word OPENSSL_armcap_P-.Lsha512_block_data_order
274
+ .skip 32-4
275
+ #else
276
+ .skip 32
277
+ #endif
278
+
279
+ .global sha512_block_data_order
280
+ .type sha512_block_data_order,%function
281
+ sha512_block_data_order:
282
+ .Lsha512_block_data_order:
283
+ #if __ARM_ARCH__<7
284
+ sub r3,pc,#8 @ sha512_block_data_order
285
+ #else
286
+ adr r3,sha512_block_data_order
287
+ #endif
288
+ #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
289
+ ldr r12,.LOPENSSL_armcap
290
+ ldr r12,[r3,r12] @ OPENSSL_armcap_P
291
+ #ifdef __APPLE__
292
+ ldr r12,[r12]
293
+ #endif
294
+ tst r12,#1
295
+ bne .LNEON
296
+ #endif
297
+ add $len,$inp,$len,lsl#7 @ len to point at the end of inp
298
+ stmdb sp!,{r4-r12,lr}
299
+ sub $Ktbl,r3,#672 @ K512
300
+ sub sp,sp,#9*8
301
+
302
+ ldr $Elo,[$ctx,#$Eoff+$lo]
303
+ ldr $Ehi,[$ctx,#$Eoff+$hi]
304
+ ldr $t0, [$ctx,#$Goff+$lo]
305
+ ldr $t1, [$ctx,#$Goff+$hi]
306
+ ldr $t2, [$ctx,#$Hoff+$lo]
307
+ ldr $t3, [$ctx,#$Hoff+$hi]
308
+ .Loop:
309
+ str $t0, [sp,#$Goff+0]
310
+ str $t1, [sp,#$Goff+4]
311
+ str $t2, [sp,#$Hoff+0]
312
+ str $t3, [sp,#$Hoff+4]
313
+ ldr $Alo,[$ctx,#$Aoff+$lo]
314
+ ldr $Ahi,[$ctx,#$Aoff+$hi]
315
+ ldr $Tlo,[$ctx,#$Boff+$lo]
316
+ ldr $Thi,[$ctx,#$Boff+$hi]
317
+ ldr $t0, [$ctx,#$Coff+$lo]
318
+ ldr $t1, [$ctx,#$Coff+$hi]
319
+ ldr $t2, [$ctx,#$Doff+$lo]
320
+ ldr $t3, [$ctx,#$Doff+$hi]
321
+ str $Tlo,[sp,#$Boff+0]
322
+ str $Thi,[sp,#$Boff+4]
323
+ str $t0, [sp,#$Coff+0]
324
+ str $t1, [sp,#$Coff+4]
325
+ str $t2, [sp,#$Doff+0]
326
+ str $t3, [sp,#$Doff+4]
327
+ ldr $Tlo,[$ctx,#$Foff+$lo]
328
+ ldr $Thi,[$ctx,#$Foff+$hi]
329
+ str $Tlo,[sp,#$Foff+0]
330
+ str $Thi,[sp,#$Foff+4]
331
+
332
+ .L00_15:
333
+ #if __ARM_ARCH__<7
334
+ ldrb $Tlo,[$inp,#7]
335
+ ldrb $t0, [$inp,#6]
336
+ ldrb $t1, [$inp,#5]
337
+ ldrb $t2, [$inp,#4]
338
+ ldrb $Thi,[$inp,#3]
339
+ ldrb $t3, [$inp,#2]
340
+ orr $Tlo,$Tlo,$t0,lsl#8
341
+ ldrb $t0, [$inp,#1]
342
+ orr $Tlo,$Tlo,$t1,lsl#16
343
+ ldrb $t1, [$inp],#8
344
+ orr $Tlo,$Tlo,$t2,lsl#24
345
+ orr $Thi,$Thi,$t3,lsl#8
346
+ orr $Thi,$Thi,$t0,lsl#16
347
+ orr $Thi,$Thi,$t1,lsl#24
348
+ #else
349
+ ldr $Tlo,[$inp,#4]
350
+ ldr $Thi,[$inp],#8
351
+ #ifdef __ARMEL__
352
+ rev $Tlo,$Tlo
353
+ rev $Thi,$Thi
354
+ #endif
355
+ #endif
356
+ ___
357
+ &BODY_00_15(0x94);
358
+ $code.=<<___;
359
+ tst $Ktbl,#1
360
+ beq .L00_15
361
+ ldr $t0,[sp,#`$Xoff+8*(16-1)`+0]
362
+ ldr $t1,[sp,#`$Xoff+8*(16-1)`+4]
363
+ bic $Ktbl,$Ktbl,#1
364
+ .L16_79:
365
+ @ sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
366
+ @ LO lo>>1^hi<<31 ^ lo>>8^hi<<24 ^ lo>>7^hi<<25
367
+ @ HI hi>>1^lo<<31 ^ hi>>8^lo<<24 ^ hi>>7
368
+ mov $Tlo,$t0,lsr#1
369
+ ldr $t2,[sp,#`$Xoff+8*(16-14)`+0]
370
+ mov $Thi,$t1,lsr#1
371
+ ldr $t3,[sp,#`$Xoff+8*(16-14)`+4]
372
+ eor $Tlo,$Tlo,$t1,lsl#31
373
+ eor $Thi,$Thi,$t0,lsl#31
374
+ eor $Tlo,$Tlo,$t0,lsr#8
375
+ eor $Thi,$Thi,$t1,lsr#8
376
+ eor $Tlo,$Tlo,$t1,lsl#24
377
+ eor $Thi,$Thi,$t0,lsl#24
378
+ eor $Tlo,$Tlo,$t0,lsr#7
379
+ eor $Thi,$Thi,$t1,lsr#7
380
+ eor $Tlo,$Tlo,$t1,lsl#25
381
+
382
+ @ sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
383
+ @ LO lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26
384
+ @ HI hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6
385
+ mov $t0,$t2,lsr#19
386
+ mov $t1,$t3,lsr#19
387
+ eor $t0,$t0,$t3,lsl#13
388
+ eor $t1,$t1,$t2,lsl#13
389
+ eor $t0,$t0,$t3,lsr#29
390
+ eor $t1,$t1,$t2,lsr#29
391
+ eor $t0,$t0,$t2,lsl#3
392
+ eor $t1,$t1,$t3,lsl#3
393
+ eor $t0,$t0,$t2,lsr#6
394
+ eor $t1,$t1,$t3,lsr#6
395
+ ldr $t2,[sp,#`$Xoff+8*(16-9)`+0]
396
+ eor $t0,$t0,$t3,lsl#26
397
+
398
+ ldr $t3,[sp,#`$Xoff+8*(16-9)`+4]
399
+ adds $Tlo,$Tlo,$t0
400
+ ldr $t0,[sp,#`$Xoff+8*16`+0]
401
+ adc $Thi,$Thi,$t1
402
+
403
+ ldr $t1,[sp,#`$Xoff+8*16`+4]
404
+ adds $Tlo,$Tlo,$t2
405
+ adc $Thi,$Thi,$t3
406
+ adds $Tlo,$Tlo,$t0
407
+ adc $Thi,$Thi,$t1
408
+ ___
409
+ &BODY_00_15(0x17);
410
+ $code.=<<___;
411
+ #if __ARM_ARCH__>=7
412
+ ittt eq @ Thumb2 thing, sanity check in ARM
413
+ #endif
414
+ ldreq $t0,[sp,#`$Xoff+8*(16-1)`+0]
415
+ ldreq $t1,[sp,#`$Xoff+8*(16-1)`+4]
416
+ beq .L16_79
417
+ bic $Ktbl,$Ktbl,#1
418
+
419
+ ldr $Tlo,[sp,#$Boff+0]
420
+ ldr $Thi,[sp,#$Boff+4]
421
+ ldr $t0, [$ctx,#$Aoff+$lo]
422
+ ldr $t1, [$ctx,#$Aoff+$hi]
423
+ ldr $t2, [$ctx,#$Boff+$lo]
424
+ ldr $t3, [$ctx,#$Boff+$hi]
425
+ adds $t0,$Alo,$t0
426
+ str $t0, [$ctx,#$Aoff+$lo]
427
+ adc $t1,$Ahi,$t1
428
+ str $t1, [$ctx,#$Aoff+$hi]
429
+ adds $t2,$Tlo,$t2
430
+ str $t2, [$ctx,#$Boff+$lo]
431
+ adc $t3,$Thi,$t3
432
+ str $t3, [$ctx,#$Boff+$hi]
433
+
434
+ ldr $Alo,[sp,#$Coff+0]
435
+ ldr $Ahi,[sp,#$Coff+4]
436
+ ldr $Tlo,[sp,#$Doff+0]
437
+ ldr $Thi,[sp,#$Doff+4]
438
+ ldr $t0, [$ctx,#$Coff+$lo]
439
+ ldr $t1, [$ctx,#$Coff+$hi]
440
+ ldr $t2, [$ctx,#$Doff+$lo]
441
+ ldr $t3, [$ctx,#$Doff+$hi]
442
+ adds $t0,$Alo,$t0
443
+ str $t0, [$ctx,#$Coff+$lo]
444
+ adc $t1,$Ahi,$t1
445
+ str $t1, [$ctx,#$Coff+$hi]
446
+ adds $t2,$Tlo,$t2
447
+ str $t2, [$ctx,#$Doff+$lo]
448
+ adc $t3,$Thi,$t3
449
+ str $t3, [$ctx,#$Doff+$hi]
450
+
451
+ ldr $Tlo,[sp,#$Foff+0]
452
+ ldr $Thi,[sp,#$Foff+4]
453
+ ldr $t0, [$ctx,#$Eoff+$lo]
454
+ ldr $t1, [$ctx,#$Eoff+$hi]
455
+ ldr $t2, [$ctx,#$Foff+$lo]
456
+ ldr $t3, [$ctx,#$Foff+$hi]
457
+ adds $Elo,$Elo,$t0
458
+ str $Elo,[$ctx,#$Eoff+$lo]
459
+ adc $Ehi,$Ehi,$t1
460
+ str $Ehi,[$ctx,#$Eoff+$hi]
461
+ adds $t2,$Tlo,$t2
462
+ str $t2, [$ctx,#$Foff+$lo]
463
+ adc $t3,$Thi,$t3
464
+ str $t3, [$ctx,#$Foff+$hi]
465
+
466
+ ldr $Alo,[sp,#$Goff+0]
467
+ ldr $Ahi,[sp,#$Goff+4]
468
+ ldr $Tlo,[sp,#$Hoff+0]
469
+ ldr $Thi,[sp,#$Hoff+4]
470
+ ldr $t0, [$ctx,#$Goff+$lo]
471
+ ldr $t1, [$ctx,#$Goff+$hi]
472
+ ldr $t2, [$ctx,#$Hoff+$lo]
473
+ ldr $t3, [$ctx,#$Hoff+$hi]
474
+ adds $t0,$Alo,$t0
475
+ str $t0, [$ctx,#$Goff+$lo]
476
+ adc $t1,$Ahi,$t1
477
+ str $t1, [$ctx,#$Goff+$hi]
478
+ adds $t2,$Tlo,$t2
479
+ str $t2, [$ctx,#$Hoff+$lo]
480
+ adc $t3,$Thi,$t3
481
+ str $t3, [$ctx,#$Hoff+$hi]
482
+
483
+ add sp,sp,#640
484
+ sub $Ktbl,$Ktbl,#640
485
+
486
+ teq $inp,$len
487
+ bne .Loop
488
+
489
+ add sp,sp,#8*9 @ destroy frame
490
+ #if __ARM_ARCH__>=5
491
+ ldmia sp!,{r4-r12,pc}
492
+ #else
493
+ ldmia sp!,{r4-r12,lr}
494
+ tst lr,#1
495
+ moveq pc,lr @ be binary compatible with V4, yet
496
+ bx lr @ interoperable with Thumb ISA:-)
497
+ #endif
498
+ .size sha512_block_data_order,.-sha512_block_data_order
499
+ ___
500
+
501
+ {
502
+ my @Sigma0=(28,34,39);
503
+ my @Sigma1=(14,18,41);
504
+ my @sigma0=(1, 8, 7);
505
+ my @sigma1=(19,61,6);
506
+
507
+ my $Ktbl="r3";
508
+ my $cnt="r12"; # volatile register known as ip, intra-procedure-call scratch
509
+
510
+ my @X=map("d$_",(0..15));
511
+ my @V=($A,$B,$C,$D,$E,$F,$G,$H)=map("d$_",(16..23));
512
+
513
+ sub NEON_00_15() {
514
+ my $i=shift;
515
+ my ($a,$b,$c,$d,$e,$f,$g,$h)=@_;
516
+ my ($t0,$t1,$t2,$T1,$K,$Ch,$Maj)=map("d$_",(24..31)); # temps
517
+
518
+ $code.=<<___ if ($i<16 || $i&1);
519
+ vshr.u64 $t0,$e,#@Sigma1[0] @ $i
520
+ #if $i<16
521
+ vld1.64 {@X[$i%16]},[$inp]! @ handles unaligned
522
+ #endif
523
+ vshr.u64 $t1,$e,#@Sigma1[1]
524
+ #if $i>0
525
+ vadd.i64 $a,$Maj @ h+=Maj from the past
526
+ #endif
527
+ vshr.u64 $t2,$e,#@Sigma1[2]
528
+ ___
529
+ $code.=<<___;
530
+ vld1.64 {$K},[$Ktbl,:64]! @ K[i++]
531
+ vsli.64 $t0,$e,#`64-@Sigma1[0]`
532
+ vsli.64 $t1,$e,#`64-@Sigma1[1]`
533
+ vmov $Ch,$e
534
+ vsli.64 $t2,$e,#`64-@Sigma1[2]`
535
+ #if $i<16 && defined(__ARMEL__)
536
+ vrev64.8 @X[$i],@X[$i]
537
+ #endif
538
+ veor $t1,$t0
539
+ vbsl $Ch,$f,$g @ Ch(e,f,g)
540
+ vshr.u64 $t0,$a,#@Sigma0[0]
541
+ veor $t2,$t1 @ Sigma1(e)
542
+ vadd.i64 $T1,$Ch,$h
543
+ vshr.u64 $t1,$a,#@Sigma0[1]
544
+ vsli.64 $t0,$a,#`64-@Sigma0[0]`
545
+ vadd.i64 $T1,$t2
546
+ vshr.u64 $t2,$a,#@Sigma0[2]
547
+ vadd.i64 $K,@X[$i%16]
548
+ vsli.64 $t1,$a,#`64-@Sigma0[1]`
549
+ veor $Maj,$a,$b
550
+ vsli.64 $t2,$a,#`64-@Sigma0[2]`
551
+ veor $h,$t0,$t1
552
+ vadd.i64 $T1,$K
553
+ vbsl $Maj,$c,$b @ Maj(a,b,c)
554
+ veor $h,$t2 @ Sigma0(a)
555
+ vadd.i64 $d,$T1
556
+ vadd.i64 $Maj,$T1
557
+ @ vadd.i64 $h,$Maj
558
+ ___
559
+ }
560
+
561
+ sub NEON_16_79() {
562
+ my $i=shift;
563
+
564
+ if ($i&1) { &NEON_00_15($i,@_); return; }
565
+
566
+ # 2x-vectorized, therefore runs every 2nd round
567
+ my @X=map("q$_",(0..7)); # view @X as 128-bit vector
568
+ my ($t0,$t1,$s0,$s1) = map("q$_",(12..15)); # temps
569
+ my ($d0,$d1,$d2) = map("d$_",(24..26)); # temps from NEON_00_15
570
+ my $e=@_[4]; # $e from NEON_00_15
571
+ $i /= 2;
572
+ $code.=<<___;
573
+ vshr.u64 $t0,@X[($i+7)%8],#@sigma1[0]
574
+ vshr.u64 $t1,@X[($i+7)%8],#@sigma1[1]
575
+ vadd.i64 @_[0],d30 @ h+=Maj from the past
576
+ vshr.u64 $s1,@X[($i+7)%8],#@sigma1[2]
577
+ vsli.64 $t0,@X[($i+7)%8],#`64-@sigma1[0]`
578
+ vext.8 $s0,@X[$i%8],@X[($i+1)%8],#8 @ X[i+1]
579
+ vsli.64 $t1,@X[($i+7)%8],#`64-@sigma1[1]`
580
+ veor $s1,$t0
581
+ vshr.u64 $t0,$s0,#@sigma0[0]
582
+ veor $s1,$t1 @ sigma1(X[i+14])
583
+ vshr.u64 $t1,$s0,#@sigma0[1]
584
+ vadd.i64 @X[$i%8],$s1
585
+ vshr.u64 $s1,$s0,#@sigma0[2]
586
+ vsli.64 $t0,$s0,#`64-@sigma0[0]`
587
+ vsli.64 $t1,$s0,#`64-@sigma0[1]`
588
+ vext.8 $s0,@X[($i+4)%8],@X[($i+5)%8],#8 @ X[i+9]
589
+ veor $s1,$t0
590
+ vshr.u64 $d0,$e,#@Sigma1[0] @ from NEON_00_15
591
+ vadd.i64 @X[$i%8],$s0
592
+ vshr.u64 $d1,$e,#@Sigma1[1] @ from NEON_00_15
593
+ veor $s1,$t1 @ sigma0(X[i+1])
594
+ vshr.u64 $d2,$e,#@Sigma1[2] @ from NEON_00_15
595
+ vadd.i64 @X[$i%8],$s1
596
+ ___
597
+ &NEON_00_15(2*$i,@_);
598
+ }
599
+
600
+ $code.=<<___;
601
+ #if __ARM_MAX_ARCH__>=7
602
+ .arch armv7-a
603
+ .fpu neon
604
+
605
+ .global sha512_block_data_order_neon
606
+ .type sha512_block_data_order_neon,%function
607
+ .align 4
608
+ sha512_block_data_order_neon:
609
+ .LNEON:
610
+ dmb @ errata #451034 on early Cortex A8
611
+ add $len,$inp,$len,lsl#7 @ len to point at the end of inp
612
+ adr $Ktbl,K512
613
+ VFP_ABI_PUSH
614
+ vldmia $ctx,{$A-$H} @ load context
615
+ .Loop_neon:
616
+ ___
617
+ for($i=0;$i<16;$i++) { &NEON_00_15($i,@V); unshift(@V,pop(@V)); }
618
+ $code.=<<___;
619
+ mov $cnt,#4
620
+ .L16_79_neon:
621
+ subs $cnt,#1
622
+ ___
623
+ for(;$i<32;$i++) { &NEON_16_79($i,@V); unshift(@V,pop(@V)); }
624
+ $code.=<<___;
625
+ bne .L16_79_neon
626
+
627
+ vadd.i64 $A,d30 @ h+=Maj from the past
628
+ vldmia $ctx,{d24-d31} @ load context to temp
629
+ vadd.i64 q8,q12 @ vectorized accumulate
630
+ vadd.i64 q9,q13
631
+ vadd.i64 q10,q14
632
+ vadd.i64 q11,q15
633
+ vstmia $ctx,{$A-$H} @ save context
634
+ teq $inp,$len
635
+ sub $Ktbl,#640 @ rewind K512
636
+ bne .Loop_neon
637
+
638
+ VFP_ABI_POP
639
+ ret @ bx lr
640
+ .size sha512_block_data_order_neon,.-sha512_block_data_order_neon
641
+ #endif
642
+ ___
643
+ }
644
+ $code.=<<___;
645
+ .asciz "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
646
+ .align 2
647
+ #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
648
+ .comm OPENSSL_armcap_P,4,4
649
+ .hidden OPENSSL_armcap_P
650
+ #endif
651
+ ___
652
+
653
+ $code =~ s/\`([^\`]*)\`/eval $1/gem;
654
+ $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4
655
+ $code =~ s/\bret\b/bx lr/gm;
656
+
657
+ open SELF,$0;
658
+ while(<SELF>) {
659
+ next if (/^#!/);
660
+ last if (!s/^#/@/ and !/^$/);
661
+ print;
662
+ }
663
+ close SELF;
664
+
665
+ print $code;
666
+ close STDOUT; # enforce flush