ring-native 0.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (261) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/Gemfile +3 -0
  4. data/README.md +22 -0
  5. data/Rakefile +1 -0
  6. data/ext/ring/extconf.rb +29 -0
  7. data/lib/ring/native.rb +8 -0
  8. data/lib/ring/native/version.rb +5 -0
  9. data/ring-native.gemspec +25 -0
  10. data/vendor/ring/BUILDING.md +40 -0
  11. data/vendor/ring/Cargo.toml +43 -0
  12. data/vendor/ring/LICENSE +185 -0
  13. data/vendor/ring/Makefile +35 -0
  14. data/vendor/ring/PORTING.md +163 -0
  15. data/vendor/ring/README.md +113 -0
  16. data/vendor/ring/STYLE.md +197 -0
  17. data/vendor/ring/appveyor.yml +27 -0
  18. data/vendor/ring/build.rs +108 -0
  19. data/vendor/ring/crypto/aes/aes.c +1142 -0
  20. data/vendor/ring/crypto/aes/aes_test.Windows.vcxproj +25 -0
  21. data/vendor/ring/crypto/aes/aes_test.cc +93 -0
  22. data/vendor/ring/crypto/aes/asm/aes-586.pl +2368 -0
  23. data/vendor/ring/crypto/aes/asm/aes-armv4.pl +1249 -0
  24. data/vendor/ring/crypto/aes/asm/aes-x86_64.pl +2246 -0
  25. data/vendor/ring/crypto/aes/asm/aesni-x86.pl +1318 -0
  26. data/vendor/ring/crypto/aes/asm/aesni-x86_64.pl +2084 -0
  27. data/vendor/ring/crypto/aes/asm/aesv8-armx.pl +675 -0
  28. data/vendor/ring/crypto/aes/asm/bsaes-armv7.pl +1364 -0
  29. data/vendor/ring/crypto/aes/asm/bsaes-x86_64.pl +1565 -0
  30. data/vendor/ring/crypto/aes/asm/vpaes-x86.pl +841 -0
  31. data/vendor/ring/crypto/aes/asm/vpaes-x86_64.pl +1116 -0
  32. data/vendor/ring/crypto/aes/internal.h +87 -0
  33. data/vendor/ring/crypto/aes/mode_wrappers.c +61 -0
  34. data/vendor/ring/crypto/bn/add.c +394 -0
  35. data/vendor/ring/crypto/bn/asm/armv4-mont.pl +694 -0
  36. data/vendor/ring/crypto/bn/asm/armv8-mont.pl +1503 -0
  37. data/vendor/ring/crypto/bn/asm/bn-586.pl +774 -0
  38. data/vendor/ring/crypto/bn/asm/co-586.pl +287 -0
  39. data/vendor/ring/crypto/bn/asm/rsaz-avx2.pl +1882 -0
  40. data/vendor/ring/crypto/bn/asm/x86-mont.pl +592 -0
  41. data/vendor/ring/crypto/bn/asm/x86_64-gcc.c +599 -0
  42. data/vendor/ring/crypto/bn/asm/x86_64-mont.pl +1393 -0
  43. data/vendor/ring/crypto/bn/asm/x86_64-mont5.pl +3507 -0
  44. data/vendor/ring/crypto/bn/bn.c +352 -0
  45. data/vendor/ring/crypto/bn/bn_asn1.c +74 -0
  46. data/vendor/ring/crypto/bn/bn_test.Windows.vcxproj +25 -0
  47. data/vendor/ring/crypto/bn/bn_test.cc +1696 -0
  48. data/vendor/ring/crypto/bn/cmp.c +200 -0
  49. data/vendor/ring/crypto/bn/convert.c +433 -0
  50. data/vendor/ring/crypto/bn/ctx.c +311 -0
  51. data/vendor/ring/crypto/bn/div.c +594 -0
  52. data/vendor/ring/crypto/bn/exponentiation.c +1335 -0
  53. data/vendor/ring/crypto/bn/gcd.c +711 -0
  54. data/vendor/ring/crypto/bn/generic.c +1019 -0
  55. data/vendor/ring/crypto/bn/internal.h +316 -0
  56. data/vendor/ring/crypto/bn/montgomery.c +516 -0
  57. data/vendor/ring/crypto/bn/mul.c +888 -0
  58. data/vendor/ring/crypto/bn/prime.c +829 -0
  59. data/vendor/ring/crypto/bn/random.c +334 -0
  60. data/vendor/ring/crypto/bn/rsaz_exp.c +262 -0
  61. data/vendor/ring/crypto/bn/rsaz_exp.h +53 -0
  62. data/vendor/ring/crypto/bn/shift.c +276 -0
  63. data/vendor/ring/crypto/bytestring/bytestring_test.Windows.vcxproj +25 -0
  64. data/vendor/ring/crypto/bytestring/bytestring_test.cc +421 -0
  65. data/vendor/ring/crypto/bytestring/cbb.c +399 -0
  66. data/vendor/ring/crypto/bytestring/cbs.c +227 -0
  67. data/vendor/ring/crypto/bytestring/internal.h +46 -0
  68. data/vendor/ring/crypto/chacha/chacha_generic.c +140 -0
  69. data/vendor/ring/crypto/chacha/chacha_vec.c +323 -0
  70. data/vendor/ring/crypto/chacha/chacha_vec_arm.S +1447 -0
  71. data/vendor/ring/crypto/chacha/chacha_vec_arm_generate.go +153 -0
  72. data/vendor/ring/crypto/cipher/cipher_test.Windows.vcxproj +25 -0
  73. data/vendor/ring/crypto/cipher/e_aes.c +390 -0
  74. data/vendor/ring/crypto/cipher/e_chacha20poly1305.c +208 -0
  75. data/vendor/ring/crypto/cipher/internal.h +173 -0
  76. data/vendor/ring/crypto/cipher/test/aes_128_gcm_tests.txt +543 -0
  77. data/vendor/ring/crypto/cipher/test/aes_128_key_wrap_tests.txt +9 -0
  78. data/vendor/ring/crypto/cipher/test/aes_256_gcm_tests.txt +475 -0
  79. data/vendor/ring/crypto/cipher/test/aes_256_key_wrap_tests.txt +23 -0
  80. data/vendor/ring/crypto/cipher/test/chacha20_poly1305_old_tests.txt +422 -0
  81. data/vendor/ring/crypto/cipher/test/chacha20_poly1305_tests.txt +484 -0
  82. data/vendor/ring/crypto/cipher/test/cipher_test.txt +100 -0
  83. data/vendor/ring/crypto/constant_time_test.Windows.vcxproj +25 -0
  84. data/vendor/ring/crypto/constant_time_test.c +304 -0
  85. data/vendor/ring/crypto/cpu-arm-asm.S +32 -0
  86. data/vendor/ring/crypto/cpu-arm.c +199 -0
  87. data/vendor/ring/crypto/cpu-intel.c +261 -0
  88. data/vendor/ring/crypto/crypto.c +151 -0
  89. data/vendor/ring/crypto/curve25519/asm/x25519-arm.S +2118 -0
  90. data/vendor/ring/crypto/curve25519/curve25519.c +4888 -0
  91. data/vendor/ring/crypto/curve25519/x25519_test.cc +128 -0
  92. data/vendor/ring/crypto/digest/md32_common.h +181 -0
  93. data/vendor/ring/crypto/ec/asm/p256-x86_64-asm.pl +2725 -0
  94. data/vendor/ring/crypto/ec/ec.c +193 -0
  95. data/vendor/ring/crypto/ec/ec_curves.c +61 -0
  96. data/vendor/ring/crypto/ec/ec_key.c +228 -0
  97. data/vendor/ring/crypto/ec/ec_montgomery.c +114 -0
  98. data/vendor/ring/crypto/ec/example_mul.Windows.vcxproj +25 -0
  99. data/vendor/ring/crypto/ec/internal.h +243 -0
  100. data/vendor/ring/crypto/ec/oct.c +253 -0
  101. data/vendor/ring/crypto/ec/p256-64.c +1794 -0
  102. data/vendor/ring/crypto/ec/p256-x86_64-table.h +9548 -0
  103. data/vendor/ring/crypto/ec/p256-x86_64.c +509 -0
  104. data/vendor/ring/crypto/ec/simple.c +1007 -0
  105. data/vendor/ring/crypto/ec/util-64.c +183 -0
  106. data/vendor/ring/crypto/ec/wnaf.c +508 -0
  107. data/vendor/ring/crypto/ecdh/ecdh.c +155 -0
  108. data/vendor/ring/crypto/ecdsa/ecdsa.c +304 -0
  109. data/vendor/ring/crypto/ecdsa/ecdsa_asn1.c +193 -0
  110. data/vendor/ring/crypto/ecdsa/ecdsa_test.Windows.vcxproj +25 -0
  111. data/vendor/ring/crypto/ecdsa/ecdsa_test.cc +327 -0
  112. data/vendor/ring/crypto/header_removed.h +17 -0
  113. data/vendor/ring/crypto/internal.h +495 -0
  114. data/vendor/ring/crypto/libring.Windows.vcxproj +101 -0
  115. data/vendor/ring/crypto/mem.c +98 -0
  116. data/vendor/ring/crypto/modes/asm/aesni-gcm-x86_64.pl +1045 -0
  117. data/vendor/ring/crypto/modes/asm/ghash-armv4.pl +517 -0
  118. data/vendor/ring/crypto/modes/asm/ghash-x86.pl +1393 -0
  119. data/vendor/ring/crypto/modes/asm/ghash-x86_64.pl +1741 -0
  120. data/vendor/ring/crypto/modes/asm/ghashv8-armx.pl +422 -0
  121. data/vendor/ring/crypto/modes/ctr.c +226 -0
  122. data/vendor/ring/crypto/modes/gcm.c +1206 -0
  123. data/vendor/ring/crypto/modes/gcm_test.Windows.vcxproj +25 -0
  124. data/vendor/ring/crypto/modes/gcm_test.c +348 -0
  125. data/vendor/ring/crypto/modes/internal.h +299 -0
  126. data/vendor/ring/crypto/perlasm/arm-xlate.pl +170 -0
  127. data/vendor/ring/crypto/perlasm/readme +100 -0
  128. data/vendor/ring/crypto/perlasm/x86_64-xlate.pl +1164 -0
  129. data/vendor/ring/crypto/perlasm/x86asm.pl +292 -0
  130. data/vendor/ring/crypto/perlasm/x86gas.pl +263 -0
  131. data/vendor/ring/crypto/perlasm/x86masm.pl +200 -0
  132. data/vendor/ring/crypto/perlasm/x86nasm.pl +187 -0
  133. data/vendor/ring/crypto/poly1305/poly1305.c +331 -0
  134. data/vendor/ring/crypto/poly1305/poly1305_arm.c +301 -0
  135. data/vendor/ring/crypto/poly1305/poly1305_arm_asm.S +2015 -0
  136. data/vendor/ring/crypto/poly1305/poly1305_test.Windows.vcxproj +25 -0
  137. data/vendor/ring/crypto/poly1305/poly1305_test.cc +80 -0
  138. data/vendor/ring/crypto/poly1305/poly1305_test.txt +52 -0
  139. data/vendor/ring/crypto/poly1305/poly1305_vec.c +892 -0
  140. data/vendor/ring/crypto/rand/asm/rdrand-x86_64.pl +75 -0
  141. data/vendor/ring/crypto/rand/internal.h +32 -0
  142. data/vendor/ring/crypto/rand/rand.c +189 -0
  143. data/vendor/ring/crypto/rand/urandom.c +219 -0
  144. data/vendor/ring/crypto/rand/windows.c +56 -0
  145. data/vendor/ring/crypto/refcount_c11.c +66 -0
  146. data/vendor/ring/crypto/refcount_lock.c +53 -0
  147. data/vendor/ring/crypto/refcount_test.Windows.vcxproj +25 -0
  148. data/vendor/ring/crypto/refcount_test.c +58 -0
  149. data/vendor/ring/crypto/rsa/blinding.c +462 -0
  150. data/vendor/ring/crypto/rsa/internal.h +108 -0
  151. data/vendor/ring/crypto/rsa/padding.c +300 -0
  152. data/vendor/ring/crypto/rsa/rsa.c +450 -0
  153. data/vendor/ring/crypto/rsa/rsa_asn1.c +261 -0
  154. data/vendor/ring/crypto/rsa/rsa_impl.c +944 -0
  155. data/vendor/ring/crypto/rsa/rsa_test.Windows.vcxproj +25 -0
  156. data/vendor/ring/crypto/rsa/rsa_test.cc +437 -0
  157. data/vendor/ring/crypto/sha/asm/sha-armv8.pl +436 -0
  158. data/vendor/ring/crypto/sha/asm/sha-x86_64.pl +2390 -0
  159. data/vendor/ring/crypto/sha/asm/sha256-586.pl +1275 -0
  160. data/vendor/ring/crypto/sha/asm/sha256-armv4.pl +735 -0
  161. data/vendor/ring/crypto/sha/asm/sha256-armv8.pl +14 -0
  162. data/vendor/ring/crypto/sha/asm/sha256-x86_64.pl +14 -0
  163. data/vendor/ring/crypto/sha/asm/sha512-586.pl +911 -0
  164. data/vendor/ring/crypto/sha/asm/sha512-armv4.pl +666 -0
  165. data/vendor/ring/crypto/sha/asm/sha512-armv8.pl +14 -0
  166. data/vendor/ring/crypto/sha/asm/sha512-x86_64.pl +14 -0
  167. data/vendor/ring/crypto/sha/sha1.c +271 -0
  168. data/vendor/ring/crypto/sha/sha256.c +204 -0
  169. data/vendor/ring/crypto/sha/sha512.c +355 -0
  170. data/vendor/ring/crypto/test/file_test.cc +326 -0
  171. data/vendor/ring/crypto/test/file_test.h +181 -0
  172. data/vendor/ring/crypto/test/malloc.cc +150 -0
  173. data/vendor/ring/crypto/test/scoped_types.h +95 -0
  174. data/vendor/ring/crypto/test/test.Windows.vcxproj +35 -0
  175. data/vendor/ring/crypto/test/test_util.cc +46 -0
  176. data/vendor/ring/crypto/test/test_util.h +41 -0
  177. data/vendor/ring/crypto/thread_none.c +55 -0
  178. data/vendor/ring/crypto/thread_pthread.c +165 -0
  179. data/vendor/ring/crypto/thread_test.Windows.vcxproj +25 -0
  180. data/vendor/ring/crypto/thread_test.c +200 -0
  181. data/vendor/ring/crypto/thread_win.c +282 -0
  182. data/vendor/ring/examples/checkdigest.rs +103 -0
  183. data/vendor/ring/include/openssl/aes.h +121 -0
  184. data/vendor/ring/include/openssl/arm_arch.h +129 -0
  185. data/vendor/ring/include/openssl/base.h +156 -0
  186. data/vendor/ring/include/openssl/bn.h +794 -0
  187. data/vendor/ring/include/openssl/buffer.h +18 -0
  188. data/vendor/ring/include/openssl/bytestring.h +235 -0
  189. data/vendor/ring/include/openssl/chacha.h +37 -0
  190. data/vendor/ring/include/openssl/cmac.h +76 -0
  191. data/vendor/ring/include/openssl/cpu.h +184 -0
  192. data/vendor/ring/include/openssl/crypto.h +43 -0
  193. data/vendor/ring/include/openssl/curve25519.h +88 -0
  194. data/vendor/ring/include/openssl/ec.h +225 -0
  195. data/vendor/ring/include/openssl/ec_key.h +129 -0
  196. data/vendor/ring/include/openssl/ecdh.h +110 -0
  197. data/vendor/ring/include/openssl/ecdsa.h +156 -0
  198. data/vendor/ring/include/openssl/err.h +201 -0
  199. data/vendor/ring/include/openssl/mem.h +101 -0
  200. data/vendor/ring/include/openssl/obj_mac.h +71 -0
  201. data/vendor/ring/include/openssl/opensslfeatures.h +68 -0
  202. data/vendor/ring/include/openssl/opensslv.h +18 -0
  203. data/vendor/ring/include/openssl/ossl_typ.h +18 -0
  204. data/vendor/ring/include/openssl/poly1305.h +51 -0
  205. data/vendor/ring/include/openssl/rand.h +70 -0
  206. data/vendor/ring/include/openssl/rsa.h +399 -0
  207. data/vendor/ring/include/openssl/thread.h +133 -0
  208. data/vendor/ring/include/openssl/type_check.h +71 -0
  209. data/vendor/ring/mk/Common.props +63 -0
  210. data/vendor/ring/mk/Windows.props +42 -0
  211. data/vendor/ring/mk/WindowsTest.props +18 -0
  212. data/vendor/ring/mk/appveyor.bat +62 -0
  213. data/vendor/ring/mk/bottom_of_makefile.mk +54 -0
  214. data/vendor/ring/mk/ring.mk +266 -0
  215. data/vendor/ring/mk/top_of_makefile.mk +214 -0
  216. data/vendor/ring/mk/travis.sh +40 -0
  217. data/vendor/ring/mk/update-travis-yml.py +229 -0
  218. data/vendor/ring/ring.sln +153 -0
  219. data/vendor/ring/src/aead.rs +682 -0
  220. data/vendor/ring/src/agreement.rs +248 -0
  221. data/vendor/ring/src/c.rs +129 -0
  222. data/vendor/ring/src/constant_time.rs +37 -0
  223. data/vendor/ring/src/der.rs +96 -0
  224. data/vendor/ring/src/digest.rs +690 -0
  225. data/vendor/ring/src/digest_tests.txt +57 -0
  226. data/vendor/ring/src/ecc.rs +28 -0
  227. data/vendor/ring/src/ecc_build.rs +279 -0
  228. data/vendor/ring/src/ecc_curves.rs +117 -0
  229. data/vendor/ring/src/ed25519_tests.txt +2579 -0
  230. data/vendor/ring/src/exe_tests.rs +46 -0
  231. data/vendor/ring/src/ffi.rs +29 -0
  232. data/vendor/ring/src/file_test.rs +187 -0
  233. data/vendor/ring/src/hkdf.rs +153 -0
  234. data/vendor/ring/src/hkdf_tests.txt +59 -0
  235. data/vendor/ring/src/hmac.rs +414 -0
  236. data/vendor/ring/src/hmac_tests.txt +97 -0
  237. data/vendor/ring/src/input.rs +312 -0
  238. data/vendor/ring/src/lib.rs +41 -0
  239. data/vendor/ring/src/pbkdf2.rs +265 -0
  240. data/vendor/ring/src/pbkdf2_tests.txt +113 -0
  241. data/vendor/ring/src/polyfill.rs +57 -0
  242. data/vendor/ring/src/rand.rs +28 -0
  243. data/vendor/ring/src/signature.rs +314 -0
  244. data/vendor/ring/third-party/NIST/README.md +9 -0
  245. data/vendor/ring/third-party/NIST/SHAVS/SHA1LongMsg.rsp +263 -0
  246. data/vendor/ring/third-party/NIST/SHAVS/SHA1Monte.rsp +309 -0
  247. data/vendor/ring/third-party/NIST/SHAVS/SHA1ShortMsg.rsp +267 -0
  248. data/vendor/ring/third-party/NIST/SHAVS/SHA224LongMsg.rsp +263 -0
  249. data/vendor/ring/third-party/NIST/SHAVS/SHA224Monte.rsp +309 -0
  250. data/vendor/ring/third-party/NIST/SHAVS/SHA224ShortMsg.rsp +267 -0
  251. data/vendor/ring/third-party/NIST/SHAVS/SHA256LongMsg.rsp +263 -0
  252. data/vendor/ring/third-party/NIST/SHAVS/SHA256Monte.rsp +309 -0
  253. data/vendor/ring/third-party/NIST/SHAVS/SHA256ShortMsg.rsp +267 -0
  254. data/vendor/ring/third-party/NIST/SHAVS/SHA384LongMsg.rsp +519 -0
  255. data/vendor/ring/third-party/NIST/SHAVS/SHA384Monte.rsp +309 -0
  256. data/vendor/ring/third-party/NIST/SHAVS/SHA384ShortMsg.rsp +523 -0
  257. data/vendor/ring/third-party/NIST/SHAVS/SHA512LongMsg.rsp +519 -0
  258. data/vendor/ring/third-party/NIST/SHAVS/SHA512Monte.rsp +309 -0
  259. data/vendor/ring/third-party/NIST/SHAVS/SHA512ShortMsg.rsp +523 -0
  260. data/vendor/ring/third-party/NIST/sha256sums.txt +1 -0
  261. metadata +333 -0
@@ -0,0 +1,735 @@
1
+ #!/usr/bin/env perl
2
+
3
+ # ====================================================================
4
+ # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5
+ # project. The module is, however, dual licensed under OpenSSL and
6
+ # CRYPTOGAMS licenses depending on where you obtain it. For further
7
+ # details see http://www.openssl.org/~appro/cryptogams/.
8
+ #
9
+ # Permission to use under GPL terms is granted.
10
+ # ====================================================================
11
+
12
+ # SHA256 block procedure for ARMv4. May 2007.
13
+
14
+ # Performance is ~2x better than gcc 3.4 generated code and in "abso-
15
+ # lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
16
+ # byte [on single-issue Xscale PXA250 core].
17
+
18
+ # July 2010.
19
+ #
20
+ # Rescheduling for dual-issue pipeline resulted in 22% improvement on
21
+ # Cortex A8 core and ~20 cycles per processed byte.
22
+
23
+ # February 2011.
24
+ #
25
+ # Profiler-assisted and platform-specific optimization resulted in 16%
26
+ # improvement on Cortex A8 core and ~15.4 cycles per processed byte.
27
+
28
+ # September 2013.
29
+ #
30
+ # Add NEON implementation. On Cortex A8 it was measured to process one
31
+ # byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
32
+ # S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
33
+ # code (meaning that latter performs sub-optimally, nothing was done
34
+ # about it).
35
+
36
+ # May 2014.
37
+ #
38
+ # Add ARMv8 code path performing at 2.0 cpb on Apple A7.
39
+
40
+ $flavour = shift;
41
+ if ($flavour=~/^\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
42
+ else { while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} }
43
+
44
+ if ($flavour && $flavour ne "void") {
45
+ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
46
+ ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
47
+ ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
48
+ die "can't locate arm-xlate.pl";
49
+
50
+ open STDOUT,"| \"$^X\" $xlate $flavour $output";
51
+ } else {
52
+ open STDOUT,">$output";
53
+ }
54
+
55
+ $ctx="r0"; $t0="r0";
56
+ $inp="r1"; $t4="r1";
57
+ $len="r2"; $t1="r2";
58
+ $T1="r3"; $t3="r3";
59
+ $A="r4";
60
+ $B="r5";
61
+ $C="r6";
62
+ $D="r7";
63
+ $E="r8";
64
+ $F="r9";
65
+ $G="r10";
66
+ $H="r11";
67
+ @V=($A,$B,$C,$D,$E,$F,$G,$H);
68
+ $t2="r12";
69
+ $Ktbl="r14";
70
+
71
+ @Sigma0=( 2,13,22);
72
+ @Sigma1=( 6,11,25);
73
+ @sigma0=( 7,18, 3);
74
+ @sigma1=(17,19,10);
75
+
76
+ sub BODY_00_15 {
77
+ my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
78
+
79
+ $code.=<<___ if ($i<16);
80
+ #if __ARM_ARCH__>=7
81
+ @ ldr $t1,[$inp],#4 @ $i
82
+ # if $i==15
83
+ str $inp,[sp,#17*4] @ make room for $t4
84
+ # endif
85
+ eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]`
86
+ add $a,$a,$t2 @ h+=Maj(a,b,c) from the past
87
+ eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e)
88
+ # ifndef __ARMEB__
89
+ rev $t1,$t1
90
+ # endif
91
+ #else
92
+ @ ldrb $t1,[$inp,#3] @ $i
93
+ add $a,$a,$t2 @ h+=Maj(a,b,c) from the past
94
+ ldrb $t2,[$inp,#2]
95
+ ldrb $t0,[$inp,#1]
96
+ orr $t1,$t1,$t2,lsl#8
97
+ ldrb $t2,[$inp],#4
98
+ orr $t1,$t1,$t0,lsl#16
99
+ # if $i==15
100
+ str $inp,[sp,#17*4] @ make room for $t4
101
+ # endif
102
+ eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]`
103
+ orr $t1,$t1,$t2,lsl#24
104
+ eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e)
105
+ #endif
106
+ ___
107
+ $code.=<<___;
108
+ ldr $t2,[$Ktbl],#4 @ *K256++
109
+ add $h,$h,$t1 @ h+=X[i]
110
+ str $t1,[sp,#`$i%16`*4]
111
+ eor $t1,$f,$g
112
+ add $h,$h,$t0,ror#$Sigma1[0] @ h+=Sigma1(e)
113
+ and $t1,$t1,$e
114
+ add $h,$h,$t2 @ h+=K256[i]
115
+ eor $t1,$t1,$g @ Ch(e,f,g)
116
+ eor $t0,$a,$a,ror#`$Sigma0[1]-$Sigma0[0]`
117
+ add $h,$h,$t1 @ h+=Ch(e,f,g)
118
+ #if $i==31
119
+ and $t2,$t2,#0xff
120
+ cmp $t2,#0xf2 @ done?
121
+ #endif
122
+ #if $i<15
123
+ # if __ARM_ARCH__>=7
124
+ ldr $t1,[$inp],#4 @ prefetch
125
+ # else
126
+ ldrb $t1,[$inp,#3]
127
+ # endif
128
+ eor $t2,$a,$b @ a^b, b^c in next round
129
+ #else
130
+ ldr $t1,[sp,#`($i+2)%16`*4] @ from future BODY_16_xx
131
+ eor $t2,$a,$b @ a^b, b^c in next round
132
+ ldr $t4,[sp,#`($i+15)%16`*4] @ from future BODY_16_xx
133
+ #endif
134
+ eor $t0,$t0,$a,ror#`$Sigma0[2]-$Sigma0[0]` @ Sigma0(a)
135
+ and $t3,$t3,$t2 @ (b^c)&=(a^b)
136
+ add $d,$d,$h @ d+=h
137
+ eor $t3,$t3,$b @ Maj(a,b,c)
138
+ add $h,$h,$t0,ror#$Sigma0[0] @ h+=Sigma0(a)
139
+ @ add $h,$h,$t3 @ h+=Maj(a,b,c)
140
+ ___
141
+ ($t2,$t3)=($t3,$t2);
142
+ }
143
+
144
+ sub BODY_16_XX {
145
+ my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
146
+
147
+ $code.=<<___;
148
+ @ ldr $t1,[sp,#`($i+1)%16`*4] @ $i
149
+ @ ldr $t4,[sp,#`($i+14)%16`*4]
150
+ mov $t0,$t1,ror#$sigma0[0]
151
+ add $a,$a,$t2 @ h+=Maj(a,b,c) from the past
152
+ mov $t2,$t4,ror#$sigma1[0]
153
+ eor $t0,$t0,$t1,ror#$sigma0[1]
154
+ eor $t2,$t2,$t4,ror#$sigma1[1]
155
+ eor $t0,$t0,$t1,lsr#$sigma0[2] @ sigma0(X[i+1])
156
+ ldr $t1,[sp,#`($i+0)%16`*4]
157
+ eor $t2,$t2,$t4,lsr#$sigma1[2] @ sigma1(X[i+14])
158
+ ldr $t4,[sp,#`($i+9)%16`*4]
159
+
160
+ add $t2,$t2,$t0
161
+ eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` @ from BODY_00_15
162
+ add $t1,$t1,$t2
163
+ eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e)
164
+ add $t1,$t1,$t4 @ X[i]
165
+ ___
166
+ &BODY_00_15(@_);
167
+ }
168
+
169
+ $code=<<___;
170
+ #ifndef __KERNEL__
171
+ # include <openssl/arm_arch.h>
172
+ #else
173
+ # define __ARM_ARCH__ __LINUX_ARM_ARCH__
174
+ # define __ARM_MAX_ARCH__ 7
175
+ #endif
176
+
177
+ .text
178
+ #if __ARM_ARCH__<7
179
+ .code 32
180
+ #else
181
+ .syntax unified
182
+ # if defined(__thumb2__) && !defined(__APPLE__)
183
+ # define adrl adr
184
+ .thumb
185
+ # else
186
+ .code 32
187
+ # endif
188
+ #endif
189
+
190
+ .type K256,%object
191
+ .align 5
192
+ K256:
193
+ .word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
194
+ .word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
195
+ .word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
196
+ .word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
197
+ .word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
198
+ .word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
199
+ .word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
200
+ .word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
201
+ .word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
202
+ .word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
203
+ .word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
204
+ .word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
205
+ .word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
206
+ .word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
207
+ .word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
208
+ .word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
209
+ .size K256,.-K256
210
+ .word 0 @ terminator
211
+ #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
212
+ .LOPENSSL_armcap:
213
+ .word OPENSSL_armcap_P-.Lsha256_block_data_order
214
+ #endif
215
+ .align 5
216
+
217
+ .global sha256_block_data_order
218
+ .type sha256_block_data_order,%function
219
+ sha256_block_data_order:
220
+ .Lsha256_block_data_order:
221
+ #if __ARM_ARCH__<7
222
+ sub r3,pc,#8 @ sha256_block_data_order
223
+ #else
224
+ adr r3,sha256_block_data_order
225
+ #endif
226
+ #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
227
+ ldr r12,.LOPENSSL_armcap
228
+ ldr r12,[r3,r12] @ OPENSSL_armcap_P
229
+ #ifdef __APPLE__
230
+ ldr r12,[r12]
231
+ #endif
232
+ tst r12,#ARMV8_SHA256
233
+ bne .LARMv8
234
+ tst r12,#ARMV7_NEON
235
+ bne .LNEON
236
+ #endif
237
+ add $len,$inp,$len,lsl#6 @ len to point at the end of inp
238
+ stmdb sp!,{$ctx,$inp,$len,r4-r11,lr}
239
+ ldmia $ctx,{$A,$B,$C,$D,$E,$F,$G,$H}
240
+ sub $Ktbl,r3,#256+32 @ K256
241
+ sub sp,sp,#16*4 @ alloca(X[16])
242
+ .Loop:
243
+ # if __ARM_ARCH__>=7
244
+ ldr $t1,[$inp],#4
245
+ # else
246
+ ldrb $t1,[$inp,#3]
247
+ # endif
248
+ eor $t3,$B,$C @ magic
249
+ eor $t2,$t2,$t2
250
+ ___
251
+ for($i=0;$i<16;$i++) { &BODY_00_15($i,@V); unshift(@V,pop(@V)); }
252
+ $code.=".Lrounds_16_xx:\n";
253
+ for (;$i<32;$i++) { &BODY_16_XX($i,@V); unshift(@V,pop(@V)); }
254
+ $code.=<<___;
255
+ #if __ARM_ARCH__>=7
256
+ ite eq @ Thumb2 thing, sanity check in ARM
257
+ #endif
258
+ ldreq $t3,[sp,#16*4] @ pull ctx
259
+ bne .Lrounds_16_xx
260
+
261
+ add $A,$A,$t2 @ h+=Maj(a,b,c) from the past
262
+ ldr $t0,[$t3,#0]
263
+ ldr $t1,[$t3,#4]
264
+ ldr $t2,[$t3,#8]
265
+ add $A,$A,$t0
266
+ ldr $t0,[$t3,#12]
267
+ add $B,$B,$t1
268
+ ldr $t1,[$t3,#16]
269
+ add $C,$C,$t2
270
+ ldr $t2,[$t3,#20]
271
+ add $D,$D,$t0
272
+ ldr $t0,[$t3,#24]
273
+ add $E,$E,$t1
274
+ ldr $t1,[$t3,#28]
275
+ add $F,$F,$t2
276
+ ldr $inp,[sp,#17*4] @ pull inp
277
+ ldr $t2,[sp,#18*4] @ pull inp+len
278
+ add $G,$G,$t0
279
+ add $H,$H,$t1
280
+ stmia $t3,{$A,$B,$C,$D,$E,$F,$G,$H}
281
+ cmp $inp,$t2
282
+ sub $Ktbl,$Ktbl,#256 @ rewind Ktbl
283
+ bne .Loop
284
+
285
+ add sp,sp,#`16+3`*4 @ destroy frame
286
+ #if __ARM_ARCH__>=5
287
+ ldmia sp!,{r4-r11,pc}
288
+ #else
289
+ ldmia sp!,{r4-r11,lr}
290
+ tst lr,#1
291
+ moveq pc,lr @ be binary compatible with V4, yet
292
+ bx lr @ interoperable with Thumb ISA:-)
293
+ #endif
294
+ .size sha256_block_data_order,.-sha256_block_data_order
295
+ ___
296
+ ######################################################################
297
+ # NEON stuff
298
+ #
299
+ {{{
300
+ my @X=map("q$_",(0..3));
301
+ my ($T0,$T1,$T2,$T3,$T4,$T5)=("q8","q9","q10","q11","d24","d25");
302
+ my $Xfer=$t4;
303
+ my $j=0;
304
+
305
+ sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; }
306
+ sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; }
307
+
308
+ sub AUTOLOAD() # thunk [simplified] x86-style perlasm
309
+ { my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./;
310
+ my $arg = pop;
311
+ $arg = "#$arg" if ($arg*1 eq $arg);
312
+ $code .= "\t$opcode\t".join(',',@_,$arg)."\n";
313
+ }
314
+
315
+ sub Xupdate()
316
+ { use integer;
317
+ my $body = shift;
318
+ my @insns = (&$body,&$body,&$body,&$body);
319
+ my ($a,$b,$c,$d,$e,$f,$g,$h);
320
+
321
+ &vext_8 ($T0,@X[0],@X[1],4); # X[1..4]
322
+ eval(shift(@insns));
323
+ eval(shift(@insns));
324
+ eval(shift(@insns));
325
+ &vext_8 ($T1,@X[2],@X[3],4); # X[9..12]
326
+ eval(shift(@insns));
327
+ eval(shift(@insns));
328
+ eval(shift(@insns));
329
+ &vshr_u32 ($T2,$T0,$sigma0[0]);
330
+ eval(shift(@insns));
331
+ eval(shift(@insns));
332
+ &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += X[9..12]
333
+ eval(shift(@insns));
334
+ eval(shift(@insns));
335
+ &vshr_u32 ($T1,$T0,$sigma0[2]);
336
+ eval(shift(@insns));
337
+ eval(shift(@insns));
338
+ &vsli_32 ($T2,$T0,32-$sigma0[0]);
339
+ eval(shift(@insns));
340
+ eval(shift(@insns));
341
+ &vshr_u32 ($T3,$T0,$sigma0[1]);
342
+ eval(shift(@insns));
343
+ eval(shift(@insns));
344
+ &veor ($T1,$T1,$T2);
345
+ eval(shift(@insns));
346
+ eval(shift(@insns));
347
+ &vsli_32 ($T3,$T0,32-$sigma0[1]);
348
+ eval(shift(@insns));
349
+ eval(shift(@insns));
350
+ &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[0]);
351
+ eval(shift(@insns));
352
+ eval(shift(@insns));
353
+ &veor ($T1,$T1,$T3); # sigma0(X[1..4])
354
+ eval(shift(@insns));
355
+ eval(shift(@insns));
356
+ &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[0]);
357
+ eval(shift(@insns));
358
+ eval(shift(@insns));
359
+ &vshr_u32 ($T5,&Dhi(@X[3]),$sigma1[2]);
360
+ eval(shift(@insns));
361
+ eval(shift(@insns));
362
+ &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += sigma0(X[1..4])
363
+ eval(shift(@insns));
364
+ eval(shift(@insns));
365
+ &veor ($T5,$T5,$T4);
366
+ eval(shift(@insns));
367
+ eval(shift(@insns));
368
+ &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[1]);
369
+ eval(shift(@insns));
370
+ eval(shift(@insns));
371
+ &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[1]);
372
+ eval(shift(@insns));
373
+ eval(shift(@insns));
374
+ &veor ($T5,$T5,$T4); # sigma1(X[14..15])
375
+ eval(shift(@insns));
376
+ eval(shift(@insns));
377
+ &vadd_i32 (&Dlo(@X[0]),&Dlo(@X[0]),$T5);# X[0..1] += sigma1(X[14..15])
378
+ eval(shift(@insns));
379
+ eval(shift(@insns));
380
+ &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[0]);
381
+ eval(shift(@insns));
382
+ eval(shift(@insns));
383
+ &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[0]);
384
+ eval(shift(@insns));
385
+ eval(shift(@insns));
386
+ &vshr_u32 ($T5,&Dlo(@X[0]),$sigma1[2]);
387
+ eval(shift(@insns));
388
+ eval(shift(@insns));
389
+ &veor ($T5,$T5,$T4);
390
+ eval(shift(@insns));
391
+ eval(shift(@insns));
392
+ &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[1]);
393
+ eval(shift(@insns));
394
+ eval(shift(@insns));
395
+ &vld1_32 ("{$T0}","[$Ktbl,:128]!");
396
+ eval(shift(@insns));
397
+ eval(shift(@insns));
398
+ &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[1]);
399
+ eval(shift(@insns));
400
+ eval(shift(@insns));
401
+ &veor ($T5,$T5,$T4); # sigma1(X[16..17])
402
+ eval(shift(@insns));
403
+ eval(shift(@insns));
404
+ &vadd_i32 (&Dhi(@X[0]),&Dhi(@X[0]),$T5);# X[2..3] += sigma1(X[16..17])
405
+ eval(shift(@insns));
406
+ eval(shift(@insns));
407
+ &vadd_i32 ($T0,$T0,@X[0]);
408
+ while($#insns>=2) { eval(shift(@insns)); }
409
+ &vst1_32 ("{$T0}","[$Xfer,:128]!");
410
+ eval(shift(@insns));
411
+ eval(shift(@insns));
412
+
413
+ push(@X,shift(@X)); # "rotate" X[]
414
+ }
415
+
416
+ sub Xpreload()
417
+ { use integer;
418
+ my $body = shift;
419
+ my @insns = (&$body,&$body,&$body,&$body);
420
+ my ($a,$b,$c,$d,$e,$f,$g,$h);
421
+
422
+ eval(shift(@insns));
423
+ eval(shift(@insns));
424
+ eval(shift(@insns));
425
+ eval(shift(@insns));
426
+ &vld1_32 ("{$T0}","[$Ktbl,:128]!");
427
+ eval(shift(@insns));
428
+ eval(shift(@insns));
429
+ eval(shift(@insns));
430
+ eval(shift(@insns));
431
+ &vrev32_8 (@X[0],@X[0]);
432
+ eval(shift(@insns));
433
+ eval(shift(@insns));
434
+ eval(shift(@insns));
435
+ eval(shift(@insns));
436
+ &vadd_i32 ($T0,$T0,@X[0]);
437
+ foreach (@insns) { eval; } # remaining instructions
438
+ &vst1_32 ("{$T0}","[$Xfer,:128]!");
439
+
440
+ push(@X,shift(@X)); # "rotate" X[]
441
+ }
442
+
443
+ sub body_00_15 () {
444
+ (
445
+ '($a,$b,$c,$d,$e,$f,$g,$h)=@V;'.
446
+ '&add ($h,$h,$t1)', # h+=X[i]+K[i]
447
+ '&eor ($t1,$f,$g)',
448
+ '&eor ($t0,$e,$e,"ror#".($Sigma1[1]-$Sigma1[0]))',
449
+ '&add ($a,$a,$t2)', # h+=Maj(a,b,c) from the past
450
+ '&and ($t1,$t1,$e)',
451
+ '&eor ($t2,$t0,$e,"ror#".($Sigma1[2]-$Sigma1[0]))', # Sigma1(e)
452
+ '&eor ($t0,$a,$a,"ror#".($Sigma0[1]-$Sigma0[0]))',
453
+ '&eor ($t1,$t1,$g)', # Ch(e,f,g)
454
+ '&add ($h,$h,$t2,"ror#$Sigma1[0]")', # h+=Sigma1(e)
455
+ '&eor ($t2,$a,$b)', # a^b, b^c in next round
456
+ '&eor ($t0,$t0,$a,"ror#".($Sigma0[2]-$Sigma0[0]))', # Sigma0(a)
457
+ '&add ($h,$h,$t1)', # h+=Ch(e,f,g)
458
+ '&ldr ($t1,sprintf "[sp,#%d]",4*(($j+1)&15)) if (($j&15)!=15);'.
459
+ '&ldr ($t1,"[$Ktbl]") if ($j==15);'.
460
+ '&ldr ($t1,"[sp,#64]") if ($j==31)',
461
+ '&and ($t3,$t3,$t2)', # (b^c)&=(a^b)
462
+ '&add ($d,$d,$h)', # d+=h
463
+ '&add ($h,$h,$t0,"ror#$Sigma0[0]");'. # h+=Sigma0(a)
464
+ '&eor ($t3,$t3,$b)', # Maj(a,b,c)
465
+ '$j++; unshift(@V,pop(@V)); ($t2,$t3)=($t3,$t2);'
466
+ )
467
+ }
468
+
469
+ $code.=<<___;
470
+ #if __ARM_MAX_ARCH__>=7
471
+ .arch armv7-a
472
+ .fpu neon
473
+
474
+ .global sha256_block_data_order_neon
475
+ .type sha256_block_data_order_neon,%function
476
+ .align 4
477
+ sha256_block_data_order_neon:
478
+ .LNEON:
479
+ stmdb sp!,{r4-r12,lr}
480
+
481
+ sub $H,sp,#16*4+16
482
+ adrl $Ktbl,K256
483
+ bic $H,$H,#15 @ align for 128-bit stores
484
+ mov $t2,sp
485
+ mov sp,$H @ alloca
486
+ add $len,$inp,$len,lsl#6 @ len to point at the end of inp
487
+
488
+ vld1.8 {@X[0]},[$inp]!
489
+ vld1.8 {@X[1]},[$inp]!
490
+ vld1.8 {@X[2]},[$inp]!
491
+ vld1.8 {@X[3]},[$inp]!
492
+ vld1.32 {$T0},[$Ktbl,:128]!
493
+ vld1.32 {$T1},[$Ktbl,:128]!
494
+ vld1.32 {$T2},[$Ktbl,:128]!
495
+ vld1.32 {$T3},[$Ktbl,:128]!
496
+ vrev32.8 @X[0],@X[0] @ yes, even on
497
+ str $ctx,[sp,#64]
498
+ vrev32.8 @X[1],@X[1] @ big-endian
499
+ str $inp,[sp,#68]
500
+ mov $Xfer,sp
501
+ vrev32.8 @X[2],@X[2]
502
+ str $len,[sp,#72]
503
+ vrev32.8 @X[3],@X[3]
504
+ str $t2,[sp,#76] @ save original sp
505
+ vadd.i32 $T0,$T0,@X[0]
506
+ vadd.i32 $T1,$T1,@X[1]
507
+ vst1.32 {$T0},[$Xfer,:128]!
508
+ vadd.i32 $T2,$T2,@X[2]
509
+ vst1.32 {$T1},[$Xfer,:128]!
510
+ vadd.i32 $T3,$T3,@X[3]
511
+ vst1.32 {$T2},[$Xfer,:128]!
512
+ vst1.32 {$T3},[$Xfer,:128]!
513
+
514
+ ldmia $ctx,{$A-$H}
515
+ sub $Xfer,$Xfer,#64
516
+ ldr $t1,[sp,#0]
517
+ eor $t2,$t2,$t2
518
+ eor $t3,$B,$C
519
+ b .L_00_48
520
+
521
+ .align 4
522
+ .L_00_48:
523
+ ___
524
+ &Xupdate(\&body_00_15);
525
+ &Xupdate(\&body_00_15);
526
+ &Xupdate(\&body_00_15);
527
+ &Xupdate(\&body_00_15);
528
+ $code.=<<___;
529
+ teq $t1,#0 @ check for K256 terminator
530
+ ldr $t1,[sp,#0]
531
+ sub $Xfer,$Xfer,#64
532
+ bne .L_00_48
533
+
534
+ ldr $inp,[sp,#68]
535
+ ldr $t0,[sp,#72]
536
+ sub $Ktbl,$Ktbl,#256 @ rewind $Ktbl
537
+ teq $inp,$t0
538
+ it eq
539
+ subeq $inp,$inp,#64 @ avoid SEGV
540
+ vld1.8 {@X[0]},[$inp]! @ load next input block
541
+ vld1.8 {@X[1]},[$inp]!
542
+ vld1.8 {@X[2]},[$inp]!
543
+ vld1.8 {@X[3]},[$inp]!
544
+ it ne
545
+ strne $inp,[sp,#68]
546
+ mov $Xfer,sp
547
+ ___
548
+ &Xpreload(\&body_00_15);
549
+ &Xpreload(\&body_00_15);
550
+ &Xpreload(\&body_00_15);
551
+ &Xpreload(\&body_00_15);
552
+ $code.=<<___;
553
+ ldr $t0,[$t1,#0]
554
+ add $A,$A,$t2 @ h+=Maj(a,b,c) from the past
555
+ ldr $t2,[$t1,#4]
556
+ ldr $t3,[$t1,#8]
557
+ ldr $t4,[$t1,#12]
558
+ add $A,$A,$t0 @ accumulate
559
+ ldr $t0,[$t1,#16]
560
+ add $B,$B,$t2
561
+ ldr $t2,[$t1,#20]
562
+ add $C,$C,$t3
563
+ ldr $t3,[$t1,#24]
564
+ add $D,$D,$t4
565
+ ldr $t4,[$t1,#28]
566
+ add $E,$E,$t0
567
+ str $A,[$t1],#4
568
+ add $F,$F,$t2
569
+ str $B,[$t1],#4
570
+ add $G,$G,$t3
571
+ str $C,[$t1],#4
572
+ add $H,$H,$t4
573
+ str $D,[$t1],#4
574
+ stmia $t1,{$E-$H}
575
+
576
+ ittte ne
577
+ movne $Xfer,sp
578
+ ldrne $t1,[sp,#0]
579
+ eorne $t2,$t2,$t2
580
+ ldreq sp,[sp,#76] @ restore original sp
581
+ itt ne
582
+ eorne $t3,$B,$C
583
+ bne .L_00_48
584
+
585
+ ldmia sp!,{r4-r12,pc}
586
+ .size sha256_block_data_order_neon,.-sha256_block_data_order_neon
587
+ #endif
588
+ ___
589
+ }}}
590
+ ######################################################################
591
+ # ARMv8 stuff
592
+ #
593
+ {{{
594
+ my ($ABCD,$EFGH,$abcd)=map("q$_",(0..2));
595
+ my @MSG=map("q$_",(8..11));
596
+ my ($W0,$W1,$ABCD_SAVE,$EFGH_SAVE)=map("q$_",(12..15));
597
+ my $Ktbl="r3";
598
+
599
+ $code.=<<___;
600
+ #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
601
+
602
+ # if defined(__thumb2__) && !defined(__APPLE__)
603
+ # define INST(a,b,c,d) .byte c,d|0xc,a,b
604
+ # else
605
+ # define INST(a,b,c,d) .byte a,b,c,d
606
+ # endif
607
+
608
+ .type sha256_block_data_order_armv8,%function
609
+ .align 5
610
+ sha256_block_data_order_armv8:
611
+ .LARMv8:
612
+ vld1.32 {$ABCD,$EFGH},[$ctx]
613
+ # ifdef __APPLE__
614
+ sub $Ktbl,$Ktbl,#256+32
615
+ # elif defined(__thumb2__)
616
+ adr $Ktbl,.LARMv8
617
+ sub $Ktbl,$Ktbl,#.LARMv8-K256
618
+ # else
619
+ adrl $Ktbl,K256
620
+ # endif
621
+ add $len,$inp,$len,lsl#6 @ len to point at the end of inp
622
+
623
+ .Loop_v8:
624
+ vld1.8 {@MSG[0]-@MSG[1]},[$inp]!
625
+ vld1.8 {@MSG[2]-@MSG[3]},[$inp]!
626
+ vld1.32 {$W0},[$Ktbl]!
627
+ vrev32.8 @MSG[0],@MSG[0]
628
+ vrev32.8 @MSG[1],@MSG[1]
629
+ vrev32.8 @MSG[2],@MSG[2]
630
+ vrev32.8 @MSG[3],@MSG[3]
631
+ vmov $ABCD_SAVE,$ABCD @ offload
632
+ vmov $EFGH_SAVE,$EFGH
633
+ teq $inp,$len
634
+ ___
635
+ for($i=0;$i<12;$i++) {
636
+ $code.=<<___;
637
+ vld1.32 {$W1},[$Ktbl]!
638
+ vadd.i32 $W0,$W0,@MSG[0]
639
+ sha256su0 @MSG[0],@MSG[1]
640
+ vmov $abcd,$ABCD
641
+ sha256h $ABCD,$EFGH,$W0
642
+ sha256h2 $EFGH,$abcd,$W0
643
+ sha256su1 @MSG[0],@MSG[2],@MSG[3]
644
+ ___
645
+ ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG));
646
+ }
647
+ $code.=<<___;
648
+ vld1.32 {$W1},[$Ktbl]!
649
+ vadd.i32 $W0,$W0,@MSG[0]
650
+ vmov $abcd,$ABCD
651
+ sha256h $ABCD,$EFGH,$W0
652
+ sha256h2 $EFGH,$abcd,$W0
653
+
654
+ vld1.32 {$W0},[$Ktbl]!
655
+ vadd.i32 $W1,$W1,@MSG[1]
656
+ vmov $abcd,$ABCD
657
+ sha256h $ABCD,$EFGH,$W1
658
+ sha256h2 $EFGH,$abcd,$W1
659
+
660
+ vld1.32 {$W1},[$Ktbl]
661
+ vadd.i32 $W0,$W0,@MSG[2]
662
+ sub $Ktbl,$Ktbl,#256-16 @ rewind
663
+ vmov $abcd,$ABCD
664
+ sha256h $ABCD,$EFGH,$W0
665
+ sha256h2 $EFGH,$abcd,$W0
666
+
667
+ vadd.i32 $W1,$W1,@MSG[3]
668
+ vmov $abcd,$ABCD
669
+ sha256h $ABCD,$EFGH,$W1
670
+ sha256h2 $EFGH,$abcd,$W1
671
+
672
+ vadd.i32 $ABCD,$ABCD,$ABCD_SAVE
673
+ vadd.i32 $EFGH,$EFGH,$EFGH_SAVE
674
+ it ne
675
+ bne .Loop_v8
676
+
677
+ vst1.32 {$ABCD,$EFGH},[$ctx]
678
+
679
+ ret @ bx lr
680
+ .size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
681
+ #endif
682
+ ___
683
+ }}}
684
+ $code.=<<___;
685
+ .asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
686
+ .align 2
687
+ #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
688
+ .comm OPENSSL_armcap_P,4,4
689
+ .hidden OPENSSL_armcap_P
690
+ #endif
691
+ ___
692
+
693
+ open SELF,$0;
694
+ while(<SELF>) {
695
+ next if (/^#!/);
696
+ last if (!s/^#/@/ and !/^$/);
697
+ print;
698
+ }
699
+ close SELF;
700
+
701
+ { my %opcode = (
702
+ "sha256h" => 0xf3000c40, "sha256h2" => 0xf3100c40,
703
+ "sha256su0" => 0xf3ba03c0, "sha256su1" => 0xf3200c40 );
704
+
705
+ sub unsha256 {
706
+ my ($mnemonic,$arg)=@_;
707
+
708
+ if ($arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o) {
709
+ my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19)
710
+ |(($2&7)<<17)|(($2&8)<<4)
711
+ |(($3&7)<<1) |(($3&8)<<2);
712
+ # since ARMv7 instructions are always encoded little-endian.
713
+ # correct solution is to use .inst directive, but older
714
+ # assemblers don't implement it:-(
715
+ sprintf "INST(0x%02x,0x%02x,0x%02x,0x%02x)\t@ %s %s",
716
+ $word&0xff,($word>>8)&0xff,
717
+ ($word>>16)&0xff,($word>>24)&0xff,
718
+ $mnemonic,$arg;
719
+ }
720
+ }
721
+ }
722
+
723
+ foreach (split($/,$code)) {
724
+
725
+ s/\`([^\`]*)\`/eval $1/geo;
726
+
727
+ s/\b(sha256\w+)\s+(q.*)/unsha256($1,$2)/geo;
728
+
729
+ s/\bret\b/bx lr/go or
730
+ s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4
731
+
732
+ print $_,"\n";
733
+ }
734
+
735
+ close STDOUT; # enforce flush