ring-native 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/Gemfile +3 -0
  4. data/README.md +22 -0
  5. data/Rakefile +1 -0
  6. data/ext/ring/extconf.rb +29 -0
  7. data/lib/ring/native.rb +8 -0
  8. data/lib/ring/native/version.rb +5 -0
  9. data/ring-native.gemspec +25 -0
  10. data/vendor/ring/BUILDING.md +40 -0
  11. data/vendor/ring/Cargo.toml +43 -0
  12. data/vendor/ring/LICENSE +185 -0
  13. data/vendor/ring/Makefile +35 -0
  14. data/vendor/ring/PORTING.md +163 -0
  15. data/vendor/ring/README.md +113 -0
  16. data/vendor/ring/STYLE.md +197 -0
  17. data/vendor/ring/appveyor.yml +27 -0
  18. data/vendor/ring/build.rs +108 -0
  19. data/vendor/ring/crypto/aes/aes.c +1142 -0
  20. data/vendor/ring/crypto/aes/aes_test.Windows.vcxproj +25 -0
  21. data/vendor/ring/crypto/aes/aes_test.cc +93 -0
  22. data/vendor/ring/crypto/aes/asm/aes-586.pl +2368 -0
  23. data/vendor/ring/crypto/aes/asm/aes-armv4.pl +1249 -0
  24. data/vendor/ring/crypto/aes/asm/aes-x86_64.pl +2246 -0
  25. data/vendor/ring/crypto/aes/asm/aesni-x86.pl +1318 -0
  26. data/vendor/ring/crypto/aes/asm/aesni-x86_64.pl +2084 -0
  27. data/vendor/ring/crypto/aes/asm/aesv8-armx.pl +675 -0
  28. data/vendor/ring/crypto/aes/asm/bsaes-armv7.pl +1364 -0
  29. data/vendor/ring/crypto/aes/asm/bsaes-x86_64.pl +1565 -0
  30. data/vendor/ring/crypto/aes/asm/vpaes-x86.pl +841 -0
  31. data/vendor/ring/crypto/aes/asm/vpaes-x86_64.pl +1116 -0
  32. data/vendor/ring/crypto/aes/internal.h +87 -0
  33. data/vendor/ring/crypto/aes/mode_wrappers.c +61 -0
  34. data/vendor/ring/crypto/bn/add.c +394 -0
  35. data/vendor/ring/crypto/bn/asm/armv4-mont.pl +694 -0
  36. data/vendor/ring/crypto/bn/asm/armv8-mont.pl +1503 -0
  37. data/vendor/ring/crypto/bn/asm/bn-586.pl +774 -0
  38. data/vendor/ring/crypto/bn/asm/co-586.pl +287 -0
  39. data/vendor/ring/crypto/bn/asm/rsaz-avx2.pl +1882 -0
  40. data/vendor/ring/crypto/bn/asm/x86-mont.pl +592 -0
  41. data/vendor/ring/crypto/bn/asm/x86_64-gcc.c +599 -0
  42. data/vendor/ring/crypto/bn/asm/x86_64-mont.pl +1393 -0
  43. data/vendor/ring/crypto/bn/asm/x86_64-mont5.pl +3507 -0
  44. data/vendor/ring/crypto/bn/bn.c +352 -0
  45. data/vendor/ring/crypto/bn/bn_asn1.c +74 -0
  46. data/vendor/ring/crypto/bn/bn_test.Windows.vcxproj +25 -0
  47. data/vendor/ring/crypto/bn/bn_test.cc +1696 -0
  48. data/vendor/ring/crypto/bn/cmp.c +200 -0
  49. data/vendor/ring/crypto/bn/convert.c +433 -0
  50. data/vendor/ring/crypto/bn/ctx.c +311 -0
  51. data/vendor/ring/crypto/bn/div.c +594 -0
  52. data/vendor/ring/crypto/bn/exponentiation.c +1335 -0
  53. data/vendor/ring/crypto/bn/gcd.c +711 -0
  54. data/vendor/ring/crypto/bn/generic.c +1019 -0
  55. data/vendor/ring/crypto/bn/internal.h +316 -0
  56. data/vendor/ring/crypto/bn/montgomery.c +516 -0
  57. data/vendor/ring/crypto/bn/mul.c +888 -0
  58. data/vendor/ring/crypto/bn/prime.c +829 -0
  59. data/vendor/ring/crypto/bn/random.c +334 -0
  60. data/vendor/ring/crypto/bn/rsaz_exp.c +262 -0
  61. data/vendor/ring/crypto/bn/rsaz_exp.h +53 -0
  62. data/vendor/ring/crypto/bn/shift.c +276 -0
  63. data/vendor/ring/crypto/bytestring/bytestring_test.Windows.vcxproj +25 -0
  64. data/vendor/ring/crypto/bytestring/bytestring_test.cc +421 -0
  65. data/vendor/ring/crypto/bytestring/cbb.c +399 -0
  66. data/vendor/ring/crypto/bytestring/cbs.c +227 -0
  67. data/vendor/ring/crypto/bytestring/internal.h +46 -0
  68. data/vendor/ring/crypto/chacha/chacha_generic.c +140 -0
  69. data/vendor/ring/crypto/chacha/chacha_vec.c +323 -0
  70. data/vendor/ring/crypto/chacha/chacha_vec_arm.S +1447 -0
  71. data/vendor/ring/crypto/chacha/chacha_vec_arm_generate.go +153 -0
  72. data/vendor/ring/crypto/cipher/cipher_test.Windows.vcxproj +25 -0
  73. data/vendor/ring/crypto/cipher/e_aes.c +390 -0
  74. data/vendor/ring/crypto/cipher/e_chacha20poly1305.c +208 -0
  75. data/vendor/ring/crypto/cipher/internal.h +173 -0
  76. data/vendor/ring/crypto/cipher/test/aes_128_gcm_tests.txt +543 -0
  77. data/vendor/ring/crypto/cipher/test/aes_128_key_wrap_tests.txt +9 -0
  78. data/vendor/ring/crypto/cipher/test/aes_256_gcm_tests.txt +475 -0
  79. data/vendor/ring/crypto/cipher/test/aes_256_key_wrap_tests.txt +23 -0
  80. data/vendor/ring/crypto/cipher/test/chacha20_poly1305_old_tests.txt +422 -0
  81. data/vendor/ring/crypto/cipher/test/chacha20_poly1305_tests.txt +484 -0
  82. data/vendor/ring/crypto/cipher/test/cipher_test.txt +100 -0
  83. data/vendor/ring/crypto/constant_time_test.Windows.vcxproj +25 -0
  84. data/vendor/ring/crypto/constant_time_test.c +304 -0
  85. data/vendor/ring/crypto/cpu-arm-asm.S +32 -0
  86. data/vendor/ring/crypto/cpu-arm.c +199 -0
  87. data/vendor/ring/crypto/cpu-intel.c +261 -0
  88. data/vendor/ring/crypto/crypto.c +151 -0
  89. data/vendor/ring/crypto/curve25519/asm/x25519-arm.S +2118 -0
  90. data/vendor/ring/crypto/curve25519/curve25519.c +4888 -0
  91. data/vendor/ring/crypto/curve25519/x25519_test.cc +128 -0
  92. data/vendor/ring/crypto/digest/md32_common.h +181 -0
  93. data/vendor/ring/crypto/ec/asm/p256-x86_64-asm.pl +2725 -0
  94. data/vendor/ring/crypto/ec/ec.c +193 -0
  95. data/vendor/ring/crypto/ec/ec_curves.c +61 -0
  96. data/vendor/ring/crypto/ec/ec_key.c +228 -0
  97. data/vendor/ring/crypto/ec/ec_montgomery.c +114 -0
  98. data/vendor/ring/crypto/ec/example_mul.Windows.vcxproj +25 -0
  99. data/vendor/ring/crypto/ec/internal.h +243 -0
  100. data/vendor/ring/crypto/ec/oct.c +253 -0
  101. data/vendor/ring/crypto/ec/p256-64.c +1794 -0
  102. data/vendor/ring/crypto/ec/p256-x86_64-table.h +9548 -0
  103. data/vendor/ring/crypto/ec/p256-x86_64.c +509 -0
  104. data/vendor/ring/crypto/ec/simple.c +1007 -0
  105. data/vendor/ring/crypto/ec/util-64.c +183 -0
  106. data/vendor/ring/crypto/ec/wnaf.c +508 -0
  107. data/vendor/ring/crypto/ecdh/ecdh.c +155 -0
  108. data/vendor/ring/crypto/ecdsa/ecdsa.c +304 -0
  109. data/vendor/ring/crypto/ecdsa/ecdsa_asn1.c +193 -0
  110. data/vendor/ring/crypto/ecdsa/ecdsa_test.Windows.vcxproj +25 -0
  111. data/vendor/ring/crypto/ecdsa/ecdsa_test.cc +327 -0
  112. data/vendor/ring/crypto/header_removed.h +17 -0
  113. data/vendor/ring/crypto/internal.h +495 -0
  114. data/vendor/ring/crypto/libring.Windows.vcxproj +101 -0
  115. data/vendor/ring/crypto/mem.c +98 -0
  116. data/vendor/ring/crypto/modes/asm/aesni-gcm-x86_64.pl +1045 -0
  117. data/vendor/ring/crypto/modes/asm/ghash-armv4.pl +517 -0
  118. data/vendor/ring/crypto/modes/asm/ghash-x86.pl +1393 -0
  119. data/vendor/ring/crypto/modes/asm/ghash-x86_64.pl +1741 -0
  120. data/vendor/ring/crypto/modes/asm/ghashv8-armx.pl +422 -0
  121. data/vendor/ring/crypto/modes/ctr.c +226 -0
  122. data/vendor/ring/crypto/modes/gcm.c +1206 -0
  123. data/vendor/ring/crypto/modes/gcm_test.Windows.vcxproj +25 -0
  124. data/vendor/ring/crypto/modes/gcm_test.c +348 -0
  125. data/vendor/ring/crypto/modes/internal.h +299 -0
  126. data/vendor/ring/crypto/perlasm/arm-xlate.pl +170 -0
  127. data/vendor/ring/crypto/perlasm/readme +100 -0
  128. data/vendor/ring/crypto/perlasm/x86_64-xlate.pl +1164 -0
  129. data/vendor/ring/crypto/perlasm/x86asm.pl +292 -0
  130. data/vendor/ring/crypto/perlasm/x86gas.pl +263 -0
  131. data/vendor/ring/crypto/perlasm/x86masm.pl +200 -0
  132. data/vendor/ring/crypto/perlasm/x86nasm.pl +187 -0
  133. data/vendor/ring/crypto/poly1305/poly1305.c +331 -0
  134. data/vendor/ring/crypto/poly1305/poly1305_arm.c +301 -0
  135. data/vendor/ring/crypto/poly1305/poly1305_arm_asm.S +2015 -0
  136. data/vendor/ring/crypto/poly1305/poly1305_test.Windows.vcxproj +25 -0
  137. data/vendor/ring/crypto/poly1305/poly1305_test.cc +80 -0
  138. data/vendor/ring/crypto/poly1305/poly1305_test.txt +52 -0
  139. data/vendor/ring/crypto/poly1305/poly1305_vec.c +892 -0
  140. data/vendor/ring/crypto/rand/asm/rdrand-x86_64.pl +75 -0
  141. data/vendor/ring/crypto/rand/internal.h +32 -0
  142. data/vendor/ring/crypto/rand/rand.c +189 -0
  143. data/vendor/ring/crypto/rand/urandom.c +219 -0
  144. data/vendor/ring/crypto/rand/windows.c +56 -0
  145. data/vendor/ring/crypto/refcount_c11.c +66 -0
  146. data/vendor/ring/crypto/refcount_lock.c +53 -0
  147. data/vendor/ring/crypto/refcount_test.Windows.vcxproj +25 -0
  148. data/vendor/ring/crypto/refcount_test.c +58 -0
  149. data/vendor/ring/crypto/rsa/blinding.c +462 -0
  150. data/vendor/ring/crypto/rsa/internal.h +108 -0
  151. data/vendor/ring/crypto/rsa/padding.c +300 -0
  152. data/vendor/ring/crypto/rsa/rsa.c +450 -0
  153. data/vendor/ring/crypto/rsa/rsa_asn1.c +261 -0
  154. data/vendor/ring/crypto/rsa/rsa_impl.c +944 -0
  155. data/vendor/ring/crypto/rsa/rsa_test.Windows.vcxproj +25 -0
  156. data/vendor/ring/crypto/rsa/rsa_test.cc +437 -0
  157. data/vendor/ring/crypto/sha/asm/sha-armv8.pl +436 -0
  158. data/vendor/ring/crypto/sha/asm/sha-x86_64.pl +2390 -0
  159. data/vendor/ring/crypto/sha/asm/sha256-586.pl +1275 -0
  160. data/vendor/ring/crypto/sha/asm/sha256-armv4.pl +735 -0
  161. data/vendor/ring/crypto/sha/asm/sha256-armv8.pl +14 -0
  162. data/vendor/ring/crypto/sha/asm/sha256-x86_64.pl +14 -0
  163. data/vendor/ring/crypto/sha/asm/sha512-586.pl +911 -0
  164. data/vendor/ring/crypto/sha/asm/sha512-armv4.pl +666 -0
  165. data/vendor/ring/crypto/sha/asm/sha512-armv8.pl +14 -0
  166. data/vendor/ring/crypto/sha/asm/sha512-x86_64.pl +14 -0
  167. data/vendor/ring/crypto/sha/sha1.c +271 -0
  168. data/vendor/ring/crypto/sha/sha256.c +204 -0
  169. data/vendor/ring/crypto/sha/sha512.c +355 -0
  170. data/vendor/ring/crypto/test/file_test.cc +326 -0
  171. data/vendor/ring/crypto/test/file_test.h +181 -0
  172. data/vendor/ring/crypto/test/malloc.cc +150 -0
  173. data/vendor/ring/crypto/test/scoped_types.h +95 -0
  174. data/vendor/ring/crypto/test/test.Windows.vcxproj +35 -0
  175. data/vendor/ring/crypto/test/test_util.cc +46 -0
  176. data/vendor/ring/crypto/test/test_util.h +41 -0
  177. data/vendor/ring/crypto/thread_none.c +55 -0
  178. data/vendor/ring/crypto/thread_pthread.c +165 -0
  179. data/vendor/ring/crypto/thread_test.Windows.vcxproj +25 -0
  180. data/vendor/ring/crypto/thread_test.c +200 -0
  181. data/vendor/ring/crypto/thread_win.c +282 -0
  182. data/vendor/ring/examples/checkdigest.rs +103 -0
  183. data/vendor/ring/include/openssl/aes.h +121 -0
  184. data/vendor/ring/include/openssl/arm_arch.h +129 -0
  185. data/vendor/ring/include/openssl/base.h +156 -0
  186. data/vendor/ring/include/openssl/bn.h +794 -0
  187. data/vendor/ring/include/openssl/buffer.h +18 -0
  188. data/vendor/ring/include/openssl/bytestring.h +235 -0
  189. data/vendor/ring/include/openssl/chacha.h +37 -0
  190. data/vendor/ring/include/openssl/cmac.h +76 -0
  191. data/vendor/ring/include/openssl/cpu.h +184 -0
  192. data/vendor/ring/include/openssl/crypto.h +43 -0
  193. data/vendor/ring/include/openssl/curve25519.h +88 -0
  194. data/vendor/ring/include/openssl/ec.h +225 -0
  195. data/vendor/ring/include/openssl/ec_key.h +129 -0
  196. data/vendor/ring/include/openssl/ecdh.h +110 -0
  197. data/vendor/ring/include/openssl/ecdsa.h +156 -0
  198. data/vendor/ring/include/openssl/err.h +201 -0
  199. data/vendor/ring/include/openssl/mem.h +101 -0
  200. data/vendor/ring/include/openssl/obj_mac.h +71 -0
  201. data/vendor/ring/include/openssl/opensslfeatures.h +68 -0
  202. data/vendor/ring/include/openssl/opensslv.h +18 -0
  203. data/vendor/ring/include/openssl/ossl_typ.h +18 -0
  204. data/vendor/ring/include/openssl/poly1305.h +51 -0
  205. data/vendor/ring/include/openssl/rand.h +70 -0
  206. data/vendor/ring/include/openssl/rsa.h +399 -0
  207. data/vendor/ring/include/openssl/thread.h +133 -0
  208. data/vendor/ring/include/openssl/type_check.h +71 -0
  209. data/vendor/ring/mk/Common.props +63 -0
  210. data/vendor/ring/mk/Windows.props +42 -0
  211. data/vendor/ring/mk/WindowsTest.props +18 -0
  212. data/vendor/ring/mk/appveyor.bat +62 -0
  213. data/vendor/ring/mk/bottom_of_makefile.mk +54 -0
  214. data/vendor/ring/mk/ring.mk +266 -0
  215. data/vendor/ring/mk/top_of_makefile.mk +214 -0
  216. data/vendor/ring/mk/travis.sh +40 -0
  217. data/vendor/ring/mk/update-travis-yml.py +229 -0
  218. data/vendor/ring/ring.sln +153 -0
  219. data/vendor/ring/src/aead.rs +682 -0
  220. data/vendor/ring/src/agreement.rs +248 -0
  221. data/vendor/ring/src/c.rs +129 -0
  222. data/vendor/ring/src/constant_time.rs +37 -0
  223. data/vendor/ring/src/der.rs +96 -0
  224. data/vendor/ring/src/digest.rs +690 -0
  225. data/vendor/ring/src/digest_tests.txt +57 -0
  226. data/vendor/ring/src/ecc.rs +28 -0
  227. data/vendor/ring/src/ecc_build.rs +279 -0
  228. data/vendor/ring/src/ecc_curves.rs +117 -0
  229. data/vendor/ring/src/ed25519_tests.txt +2579 -0
  230. data/vendor/ring/src/exe_tests.rs +46 -0
  231. data/vendor/ring/src/ffi.rs +29 -0
  232. data/vendor/ring/src/file_test.rs +187 -0
  233. data/vendor/ring/src/hkdf.rs +153 -0
  234. data/vendor/ring/src/hkdf_tests.txt +59 -0
  235. data/vendor/ring/src/hmac.rs +414 -0
  236. data/vendor/ring/src/hmac_tests.txt +97 -0
  237. data/vendor/ring/src/input.rs +312 -0
  238. data/vendor/ring/src/lib.rs +41 -0
  239. data/vendor/ring/src/pbkdf2.rs +265 -0
  240. data/vendor/ring/src/pbkdf2_tests.txt +113 -0
  241. data/vendor/ring/src/polyfill.rs +57 -0
  242. data/vendor/ring/src/rand.rs +28 -0
  243. data/vendor/ring/src/signature.rs +314 -0
  244. data/vendor/ring/third-party/NIST/README.md +9 -0
  245. data/vendor/ring/third-party/NIST/SHAVS/SHA1LongMsg.rsp +263 -0
  246. data/vendor/ring/third-party/NIST/SHAVS/SHA1Monte.rsp +309 -0
  247. data/vendor/ring/third-party/NIST/SHAVS/SHA1ShortMsg.rsp +267 -0
  248. data/vendor/ring/third-party/NIST/SHAVS/SHA224LongMsg.rsp +263 -0
  249. data/vendor/ring/third-party/NIST/SHAVS/SHA224Monte.rsp +309 -0
  250. data/vendor/ring/third-party/NIST/SHAVS/SHA224ShortMsg.rsp +267 -0
  251. data/vendor/ring/third-party/NIST/SHAVS/SHA256LongMsg.rsp +263 -0
  252. data/vendor/ring/third-party/NIST/SHAVS/SHA256Monte.rsp +309 -0
  253. data/vendor/ring/third-party/NIST/SHAVS/SHA256ShortMsg.rsp +267 -0
  254. data/vendor/ring/third-party/NIST/SHAVS/SHA384LongMsg.rsp +519 -0
  255. data/vendor/ring/third-party/NIST/SHAVS/SHA384Monte.rsp +309 -0
  256. data/vendor/ring/third-party/NIST/SHAVS/SHA384ShortMsg.rsp +523 -0
  257. data/vendor/ring/third-party/NIST/SHAVS/SHA512LongMsg.rsp +519 -0
  258. data/vendor/ring/third-party/NIST/SHAVS/SHA512Monte.rsp +309 -0
  259. data/vendor/ring/third-party/NIST/SHAVS/SHA512ShortMsg.rsp +523 -0
  260. data/vendor/ring/third-party/NIST/sha256sums.txt +1 -0
  261. metadata +333 -0
@@ -0,0 +1,735 @@
1
+ #!/usr/bin/env perl
2
+
3
+ # ====================================================================
4
+ # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5
+ # project. The module is, however, dual licensed under OpenSSL and
6
+ # CRYPTOGAMS licenses depending on where you obtain it. For further
7
+ # details see http://www.openssl.org/~appro/cryptogams/.
8
+ #
9
+ # Permission to use under GPL terms is granted.
10
+ # ====================================================================
11
+
12
+ # SHA256 block procedure for ARMv4. May 2007.
13
+
14
+ # Performance is ~2x better than gcc 3.4 generated code and in "abso-
15
+ # lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
16
+ # byte [on single-issue Xscale PXA250 core].
17
+
18
+ # July 2010.
19
+ #
20
+ # Rescheduling for dual-issue pipeline resulted in 22% improvement on
21
+ # Cortex A8 core and ~20 cycles per processed byte.
22
+
23
+ # February 2011.
24
+ #
25
+ # Profiler-assisted and platform-specific optimization resulted in 16%
26
+ # improvement on Cortex A8 core and ~15.4 cycles per processed byte.
27
+
28
+ # September 2013.
29
+ #
30
+ # Add NEON implementation. On Cortex A8 it was measured to process one
31
+ # byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
32
+ # S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
33
+ # code (meaning that latter performs sub-optimally, nothing was done
34
+ # about it).
35
+
36
+ # May 2014.
37
+ #
38
+ # Add ARMv8 code path performing at 2.0 cpb on Apple A7.
39
+
40
+ $flavour = shift;
41
+ if ($flavour=~/^\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
42
+ else { while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} }
43
+
44
+ if ($flavour && $flavour ne "void") {
45
+ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
46
+ ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
47
+ ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
48
+ die "can't locate arm-xlate.pl";
49
+
50
+ open STDOUT,"| \"$^X\" $xlate $flavour $output";
51
+ } else {
52
+ open STDOUT,">$output";
53
+ }
54
+
55
+ $ctx="r0"; $t0="r0";
56
+ $inp="r1"; $t4="r1";
57
+ $len="r2"; $t1="r2";
58
+ $T1="r3"; $t3="r3";
59
+ $A="r4";
60
+ $B="r5";
61
+ $C="r6";
62
+ $D="r7";
63
+ $E="r8";
64
+ $F="r9";
65
+ $G="r10";
66
+ $H="r11";
67
+ @V=($A,$B,$C,$D,$E,$F,$G,$H);
68
+ $t2="r12";
69
+ $Ktbl="r14";
70
+
71
+ @Sigma0=( 2,13,22);
72
+ @Sigma1=( 6,11,25);
73
+ @sigma0=( 7,18, 3);
74
+ @sigma1=(17,19,10);
75
+
76
+ sub BODY_00_15 {
77
+ my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
78
+
79
+ $code.=<<___ if ($i<16);
80
+ #if __ARM_ARCH__>=7
81
+ @ ldr $t1,[$inp],#4 @ $i
82
+ # if $i==15
83
+ str $inp,[sp,#17*4] @ make room for $t4
84
+ # endif
85
+ eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]`
86
+ add $a,$a,$t2 @ h+=Maj(a,b,c) from the past
87
+ eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e)
88
+ # ifndef __ARMEB__
89
+ rev $t1,$t1
90
+ # endif
91
+ #else
92
+ @ ldrb $t1,[$inp,#3] @ $i
93
+ add $a,$a,$t2 @ h+=Maj(a,b,c) from the past
94
+ ldrb $t2,[$inp,#2]
95
+ ldrb $t0,[$inp,#1]
96
+ orr $t1,$t1,$t2,lsl#8
97
+ ldrb $t2,[$inp],#4
98
+ orr $t1,$t1,$t0,lsl#16
99
+ # if $i==15
100
+ str $inp,[sp,#17*4] @ make room for $t4
101
+ # endif
102
+ eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]`
103
+ orr $t1,$t1,$t2,lsl#24
104
+ eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e)
105
+ #endif
106
+ ___
107
+ $code.=<<___;
108
+ ldr $t2,[$Ktbl],#4 @ *K256++
109
+ add $h,$h,$t1 @ h+=X[i]
110
+ str $t1,[sp,#`$i%16`*4]
111
+ eor $t1,$f,$g
112
+ add $h,$h,$t0,ror#$Sigma1[0] @ h+=Sigma1(e)
113
+ and $t1,$t1,$e
114
+ add $h,$h,$t2 @ h+=K256[i]
115
+ eor $t1,$t1,$g @ Ch(e,f,g)
116
+ eor $t0,$a,$a,ror#`$Sigma0[1]-$Sigma0[0]`
117
+ add $h,$h,$t1 @ h+=Ch(e,f,g)
118
+ #if $i==31
119
+ and $t2,$t2,#0xff
120
+ cmp $t2,#0xf2 @ done?
121
+ #endif
122
+ #if $i<15
123
+ # if __ARM_ARCH__>=7
124
+ ldr $t1,[$inp],#4 @ prefetch
125
+ # else
126
+ ldrb $t1,[$inp,#3]
127
+ # endif
128
+ eor $t2,$a,$b @ a^b, b^c in next round
129
+ #else
130
+ ldr $t1,[sp,#`($i+2)%16`*4] @ from future BODY_16_xx
131
+ eor $t2,$a,$b @ a^b, b^c in next round
132
+ ldr $t4,[sp,#`($i+15)%16`*4] @ from future BODY_16_xx
133
+ #endif
134
+ eor $t0,$t0,$a,ror#`$Sigma0[2]-$Sigma0[0]` @ Sigma0(a)
135
+ and $t3,$t3,$t2 @ (b^c)&=(a^b)
136
+ add $d,$d,$h @ d+=h
137
+ eor $t3,$t3,$b @ Maj(a,b,c)
138
+ add $h,$h,$t0,ror#$Sigma0[0] @ h+=Sigma0(a)
139
+ @ add $h,$h,$t3 @ h+=Maj(a,b,c)
140
+ ___
141
+ ($t2,$t3)=($t3,$t2);
142
+ }
143
+
144
+ sub BODY_16_XX {
145
+ my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
146
+
147
+ $code.=<<___;
148
+ @ ldr $t1,[sp,#`($i+1)%16`*4] @ $i
149
+ @ ldr $t4,[sp,#`($i+14)%16`*4]
150
+ mov $t0,$t1,ror#$sigma0[0]
151
+ add $a,$a,$t2 @ h+=Maj(a,b,c) from the past
152
+ mov $t2,$t4,ror#$sigma1[0]
153
+ eor $t0,$t0,$t1,ror#$sigma0[1]
154
+ eor $t2,$t2,$t4,ror#$sigma1[1]
155
+ eor $t0,$t0,$t1,lsr#$sigma0[2] @ sigma0(X[i+1])
156
+ ldr $t1,[sp,#`($i+0)%16`*4]
157
+ eor $t2,$t2,$t4,lsr#$sigma1[2] @ sigma1(X[i+14])
158
+ ldr $t4,[sp,#`($i+9)%16`*4]
159
+
160
+ add $t2,$t2,$t0
161
+ eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` @ from BODY_00_15
162
+ add $t1,$t1,$t2
163
+ eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e)
164
+ add $t1,$t1,$t4 @ X[i]
165
+ ___
166
+ &BODY_00_15(@_);
167
+ }
168
+
169
+ $code=<<___;
170
+ #ifndef __KERNEL__
171
+ # include <openssl/arm_arch.h>
172
+ #else
173
+ # define __ARM_ARCH__ __LINUX_ARM_ARCH__
174
+ # define __ARM_MAX_ARCH__ 7
175
+ #endif
176
+
177
+ .text
178
+ #if __ARM_ARCH__<7
179
+ .code 32
180
+ #else
181
+ .syntax unified
182
+ # if defined(__thumb2__) && !defined(__APPLE__)
183
+ # define adrl adr
184
+ .thumb
185
+ # else
186
+ .code 32
187
+ # endif
188
+ #endif
189
+
190
+ .type K256,%object
191
+ .align 5
192
+ K256:
193
+ .word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
194
+ .word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
195
+ .word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
196
+ .word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
197
+ .word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
198
+ .word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
199
+ .word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
200
+ .word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
201
+ .word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
202
+ .word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
203
+ .word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
204
+ .word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
205
+ .word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
206
+ .word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
207
+ .word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
208
+ .word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
209
+ .size K256,.-K256
210
+ .word 0 @ terminator
211
+ #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
212
+ .LOPENSSL_armcap:
213
+ .word OPENSSL_armcap_P-.Lsha256_block_data_order
214
+ #endif
215
+ .align 5
216
+
217
+ .global sha256_block_data_order
218
+ .type sha256_block_data_order,%function
219
+ sha256_block_data_order:
220
+ .Lsha256_block_data_order:
221
+ #if __ARM_ARCH__<7
222
+ sub r3,pc,#8 @ sha256_block_data_order
223
+ #else
224
+ adr r3,sha256_block_data_order
225
+ #endif
226
+ #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
227
+ ldr r12,.LOPENSSL_armcap
228
+ ldr r12,[r3,r12] @ OPENSSL_armcap_P
229
+ #ifdef __APPLE__
230
+ ldr r12,[r12]
231
+ #endif
232
+ tst r12,#ARMV8_SHA256
233
+ bne .LARMv8
234
+ tst r12,#ARMV7_NEON
235
+ bne .LNEON
236
+ #endif
237
+ add $len,$inp,$len,lsl#6 @ len to point at the end of inp
238
+ stmdb sp!,{$ctx,$inp,$len,r4-r11,lr}
239
+ ldmia $ctx,{$A,$B,$C,$D,$E,$F,$G,$H}
240
+ sub $Ktbl,r3,#256+32 @ K256
241
+ sub sp,sp,#16*4 @ alloca(X[16])
242
+ .Loop:
243
+ # if __ARM_ARCH__>=7
244
+ ldr $t1,[$inp],#4
245
+ # else
246
+ ldrb $t1,[$inp,#3]
247
+ # endif
248
+ eor $t3,$B,$C @ magic
249
+ eor $t2,$t2,$t2
250
+ ___
251
+ for($i=0;$i<16;$i++) { &BODY_00_15($i,@V); unshift(@V,pop(@V)); }
252
+ $code.=".Lrounds_16_xx:\n";
253
+ for (;$i<32;$i++) { &BODY_16_XX($i,@V); unshift(@V,pop(@V)); }
254
+ $code.=<<___;
255
+ #if __ARM_ARCH__>=7
256
+ ite eq @ Thumb2 thing, sanity check in ARM
257
+ #endif
258
+ ldreq $t3,[sp,#16*4] @ pull ctx
259
+ bne .Lrounds_16_xx
260
+
261
+ add $A,$A,$t2 @ h+=Maj(a,b,c) from the past
262
+ ldr $t0,[$t3,#0]
263
+ ldr $t1,[$t3,#4]
264
+ ldr $t2,[$t3,#8]
265
+ add $A,$A,$t0
266
+ ldr $t0,[$t3,#12]
267
+ add $B,$B,$t1
268
+ ldr $t1,[$t3,#16]
269
+ add $C,$C,$t2
270
+ ldr $t2,[$t3,#20]
271
+ add $D,$D,$t0
272
+ ldr $t0,[$t3,#24]
273
+ add $E,$E,$t1
274
+ ldr $t1,[$t3,#28]
275
+ add $F,$F,$t2
276
+ ldr $inp,[sp,#17*4] @ pull inp
277
+ ldr $t2,[sp,#18*4] @ pull inp+len
278
+ add $G,$G,$t0
279
+ add $H,$H,$t1
280
+ stmia $t3,{$A,$B,$C,$D,$E,$F,$G,$H}
281
+ cmp $inp,$t2
282
+ sub $Ktbl,$Ktbl,#256 @ rewind Ktbl
283
+ bne .Loop
284
+
285
+ add sp,sp,#`16+3`*4 @ destroy frame
286
+ #if __ARM_ARCH__>=5
287
+ ldmia sp!,{r4-r11,pc}
288
+ #else
289
+ ldmia sp!,{r4-r11,lr}
290
+ tst lr,#1
291
+ moveq pc,lr @ be binary compatible with V4, yet
292
+ bx lr @ interoperable with Thumb ISA:-)
293
+ #endif
294
+ .size sha256_block_data_order,.-sha256_block_data_order
295
+ ___
296
+ ######################################################################
297
+ # NEON stuff
298
+ #
299
+ {{{
300
+ my @X=map("q$_",(0..3));
301
+ my ($T0,$T1,$T2,$T3,$T4,$T5)=("q8","q9","q10","q11","d24","d25");
302
+ my $Xfer=$t4;
303
+ my $j=0;
304
+
305
+ sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; }
306
+ sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; }
307
+
308
+ sub AUTOLOAD() # thunk [simplified] x86-style perlasm
309
+ { my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./;
310
+ my $arg = pop;
311
+ $arg = "#$arg" if ($arg*1 eq $arg);
312
+ $code .= "\t$opcode\t".join(',',@_,$arg)."\n";
313
+ }
314
+
315
+ sub Xupdate()
316
+ { use integer;
317
+ my $body = shift;
318
+ my @insns = (&$body,&$body,&$body,&$body);
319
+ my ($a,$b,$c,$d,$e,$f,$g,$h);
320
+
321
+ &vext_8 ($T0,@X[0],@X[1],4); # X[1..4]
322
+ eval(shift(@insns));
323
+ eval(shift(@insns));
324
+ eval(shift(@insns));
325
+ &vext_8 ($T1,@X[2],@X[3],4); # X[9..12]
326
+ eval(shift(@insns));
327
+ eval(shift(@insns));
328
+ eval(shift(@insns));
329
+ &vshr_u32 ($T2,$T0,$sigma0[0]);
330
+ eval(shift(@insns));
331
+ eval(shift(@insns));
332
+ &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += X[9..12]
333
+ eval(shift(@insns));
334
+ eval(shift(@insns));
335
+ &vshr_u32 ($T1,$T0,$sigma0[2]);
336
+ eval(shift(@insns));
337
+ eval(shift(@insns));
338
+ &vsli_32 ($T2,$T0,32-$sigma0[0]);
339
+ eval(shift(@insns));
340
+ eval(shift(@insns));
341
+ &vshr_u32 ($T3,$T0,$sigma0[1]);
342
+ eval(shift(@insns));
343
+ eval(shift(@insns));
344
+ &veor ($T1,$T1,$T2);
345
+ eval(shift(@insns));
346
+ eval(shift(@insns));
347
+ &vsli_32 ($T3,$T0,32-$sigma0[1]);
348
+ eval(shift(@insns));
349
+ eval(shift(@insns));
350
+ &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[0]);
351
+ eval(shift(@insns));
352
+ eval(shift(@insns));
353
+ &veor ($T1,$T1,$T3); # sigma0(X[1..4])
354
+ eval(shift(@insns));
355
+ eval(shift(@insns));
356
+ &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[0]);
357
+ eval(shift(@insns));
358
+ eval(shift(@insns));
359
+ &vshr_u32 ($T5,&Dhi(@X[3]),$sigma1[2]);
360
+ eval(shift(@insns));
361
+ eval(shift(@insns));
362
+ &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += sigma0(X[1..4])
363
+ eval(shift(@insns));
364
+ eval(shift(@insns));
365
+ &veor ($T5,$T5,$T4);
366
+ eval(shift(@insns));
367
+ eval(shift(@insns));
368
+ &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[1]);
369
+ eval(shift(@insns));
370
+ eval(shift(@insns));
371
+ &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[1]);
372
+ eval(shift(@insns));
373
+ eval(shift(@insns));
374
+ &veor ($T5,$T5,$T4); # sigma1(X[14..15])
375
+ eval(shift(@insns));
376
+ eval(shift(@insns));
377
+ &vadd_i32 (&Dlo(@X[0]),&Dlo(@X[0]),$T5);# X[0..1] += sigma1(X[14..15])
378
+ eval(shift(@insns));
379
+ eval(shift(@insns));
380
+ &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[0]);
381
+ eval(shift(@insns));
382
+ eval(shift(@insns));
383
+ &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[0]);
384
+ eval(shift(@insns));
385
+ eval(shift(@insns));
386
+ &vshr_u32 ($T5,&Dlo(@X[0]),$sigma1[2]);
387
+ eval(shift(@insns));
388
+ eval(shift(@insns));
389
+ &veor ($T5,$T5,$T4);
390
+ eval(shift(@insns));
391
+ eval(shift(@insns));
392
+ &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[1]);
393
+ eval(shift(@insns));
394
+ eval(shift(@insns));
395
+ &vld1_32 ("{$T0}","[$Ktbl,:128]!");
396
+ eval(shift(@insns));
397
+ eval(shift(@insns));
398
+ &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[1]);
399
+ eval(shift(@insns));
400
+ eval(shift(@insns));
401
+ &veor ($T5,$T5,$T4); # sigma1(X[16..17])
402
+ eval(shift(@insns));
403
+ eval(shift(@insns));
404
+ &vadd_i32 (&Dhi(@X[0]),&Dhi(@X[0]),$T5);# X[2..3] += sigma1(X[16..17])
405
+ eval(shift(@insns));
406
+ eval(shift(@insns));
407
+ &vadd_i32 ($T0,$T0,@X[0]);
408
+ while($#insns>=2) { eval(shift(@insns)); }
409
+ &vst1_32 ("{$T0}","[$Xfer,:128]!");
410
+ eval(shift(@insns));
411
+ eval(shift(@insns));
412
+
413
+ push(@X,shift(@X)); # "rotate" X[]
414
+ }
415
+
416
+ sub Xpreload()
417
+ { use integer;
418
+ my $body = shift;
419
+ my @insns = (&$body,&$body,&$body,&$body);
420
+ my ($a,$b,$c,$d,$e,$f,$g,$h);
421
+
422
+ eval(shift(@insns));
423
+ eval(shift(@insns));
424
+ eval(shift(@insns));
425
+ eval(shift(@insns));
426
+ &vld1_32 ("{$T0}","[$Ktbl,:128]!");
427
+ eval(shift(@insns));
428
+ eval(shift(@insns));
429
+ eval(shift(@insns));
430
+ eval(shift(@insns));
431
+ &vrev32_8 (@X[0],@X[0]);
432
+ eval(shift(@insns));
433
+ eval(shift(@insns));
434
+ eval(shift(@insns));
435
+ eval(shift(@insns));
436
+ &vadd_i32 ($T0,$T0,@X[0]);
437
+ foreach (@insns) { eval; } # remaining instructions
438
+ &vst1_32 ("{$T0}","[$Xfer,:128]!");
439
+
440
+ push(@X,shift(@X)); # "rotate" X[]
441
+ }
442
+
443
+ sub body_00_15 () {
444
+ (
445
+ '($a,$b,$c,$d,$e,$f,$g,$h)=@V;'.
446
+ '&add ($h,$h,$t1)', # h+=X[i]+K[i]
447
+ '&eor ($t1,$f,$g)',
448
+ '&eor ($t0,$e,$e,"ror#".($Sigma1[1]-$Sigma1[0]))',
449
+ '&add ($a,$a,$t2)', # h+=Maj(a,b,c) from the past
450
+ '&and ($t1,$t1,$e)',
451
+ '&eor ($t2,$t0,$e,"ror#".($Sigma1[2]-$Sigma1[0]))', # Sigma1(e)
452
+ '&eor ($t0,$a,$a,"ror#".($Sigma0[1]-$Sigma0[0]))',
453
+ '&eor ($t1,$t1,$g)', # Ch(e,f,g)
454
+ '&add ($h,$h,$t2,"ror#$Sigma1[0]")', # h+=Sigma1(e)
455
+ '&eor ($t2,$a,$b)', # a^b, b^c in next round
456
+ '&eor ($t0,$t0,$a,"ror#".($Sigma0[2]-$Sigma0[0]))', # Sigma0(a)
457
+ '&add ($h,$h,$t1)', # h+=Ch(e,f,g)
458
+ '&ldr ($t1,sprintf "[sp,#%d]",4*(($j+1)&15)) if (($j&15)!=15);'.
459
+ '&ldr ($t1,"[$Ktbl]") if ($j==15);'.
460
+ '&ldr ($t1,"[sp,#64]") if ($j==31)',
461
+ '&and ($t3,$t3,$t2)', # (b^c)&=(a^b)
462
+ '&add ($d,$d,$h)', # d+=h
463
+ '&add ($h,$h,$t0,"ror#$Sigma0[0]");'. # h+=Sigma0(a)
464
+ '&eor ($t3,$t3,$b)', # Maj(a,b,c)
465
+ '$j++; unshift(@V,pop(@V)); ($t2,$t3)=($t3,$t2);'
466
+ )
467
+ }
468
+
469
+ $code.=<<___;
470
+ #if __ARM_MAX_ARCH__>=7
471
+ .arch armv7-a
472
+ .fpu neon
473
+
474
+ .global sha256_block_data_order_neon
475
+ .type sha256_block_data_order_neon,%function
476
+ .align 4
477
+ sha256_block_data_order_neon:
478
+ .LNEON:
479
+ stmdb sp!,{r4-r12,lr}
480
+
481
+ sub $H,sp,#16*4+16
482
+ adrl $Ktbl,K256
483
+ bic $H,$H,#15 @ align for 128-bit stores
484
+ mov $t2,sp
485
+ mov sp,$H @ alloca
486
+ add $len,$inp,$len,lsl#6 @ len to point at the end of inp
487
+
488
+ vld1.8 {@X[0]},[$inp]!
489
+ vld1.8 {@X[1]},[$inp]!
490
+ vld1.8 {@X[2]},[$inp]!
491
+ vld1.8 {@X[3]},[$inp]!
492
+ vld1.32 {$T0},[$Ktbl,:128]!
493
+ vld1.32 {$T1},[$Ktbl,:128]!
494
+ vld1.32 {$T2},[$Ktbl,:128]!
495
+ vld1.32 {$T3},[$Ktbl,:128]!
496
+ vrev32.8 @X[0],@X[0] @ yes, even on
497
+ str $ctx,[sp,#64]
498
+ vrev32.8 @X[1],@X[1] @ big-endian
499
+ str $inp,[sp,#68]
500
+ mov $Xfer,sp
501
+ vrev32.8 @X[2],@X[2]
502
+ str $len,[sp,#72]
503
+ vrev32.8 @X[3],@X[3]
504
+ str $t2,[sp,#76] @ save original sp
505
+ vadd.i32 $T0,$T0,@X[0]
506
+ vadd.i32 $T1,$T1,@X[1]
507
+ vst1.32 {$T0},[$Xfer,:128]!
508
+ vadd.i32 $T2,$T2,@X[2]
509
+ vst1.32 {$T1},[$Xfer,:128]!
510
+ vadd.i32 $T3,$T3,@X[3]
511
+ vst1.32 {$T2},[$Xfer,:128]!
512
+ vst1.32 {$T3},[$Xfer,:128]!
513
+
514
+ ldmia $ctx,{$A-$H}
515
+ sub $Xfer,$Xfer,#64
516
+ ldr $t1,[sp,#0]
517
+ eor $t2,$t2,$t2
518
+ eor $t3,$B,$C
519
+ b .L_00_48
520
+
521
+ .align 4
522
+ .L_00_48:
523
+ ___
524
+ &Xupdate(\&body_00_15);
525
+ &Xupdate(\&body_00_15);
526
+ &Xupdate(\&body_00_15);
527
+ &Xupdate(\&body_00_15);
528
+ $code.=<<___;
529
+ teq $t1,#0 @ check for K256 terminator
530
+ ldr $t1,[sp,#0]
531
+ sub $Xfer,$Xfer,#64
532
+ bne .L_00_48
533
+
534
+ ldr $inp,[sp,#68]
535
+ ldr $t0,[sp,#72]
536
+ sub $Ktbl,$Ktbl,#256 @ rewind $Ktbl
537
+ teq $inp,$t0
538
+ it eq
539
+ subeq $inp,$inp,#64 @ avoid SEGV
540
+ vld1.8 {@X[0]},[$inp]! @ load next input block
541
+ vld1.8 {@X[1]},[$inp]!
542
+ vld1.8 {@X[2]},[$inp]!
543
+ vld1.8 {@X[3]},[$inp]!
544
+ it ne
545
+ strne $inp,[sp,#68]
546
+ mov $Xfer,sp
547
+ ___
548
+ &Xpreload(\&body_00_15);
549
+ &Xpreload(\&body_00_15);
550
+ &Xpreload(\&body_00_15);
551
+ &Xpreload(\&body_00_15);
552
+ $code.=<<___;
553
+ ldr $t0,[$t1,#0]
554
+ add $A,$A,$t2 @ h+=Maj(a,b,c) from the past
555
+ ldr $t2,[$t1,#4]
556
+ ldr $t3,[$t1,#8]
557
+ ldr $t4,[$t1,#12]
558
+ add $A,$A,$t0 @ accumulate
559
+ ldr $t0,[$t1,#16]
560
+ add $B,$B,$t2
561
+ ldr $t2,[$t1,#20]
562
+ add $C,$C,$t3
563
+ ldr $t3,[$t1,#24]
564
+ add $D,$D,$t4
565
+ ldr $t4,[$t1,#28]
566
+ add $E,$E,$t0
567
+ str $A,[$t1],#4
568
+ add $F,$F,$t2
569
+ str $B,[$t1],#4
570
+ add $G,$G,$t3
571
+ str $C,[$t1],#4
572
+ add $H,$H,$t4
573
+ str $D,[$t1],#4
574
+ stmia $t1,{$E-$H}
575
+
576
+ ittte ne
577
+ movne $Xfer,sp
578
+ ldrne $t1,[sp,#0]
579
+ eorne $t2,$t2,$t2
580
+ ldreq sp,[sp,#76] @ restore original sp
581
+ itt ne
582
+ eorne $t3,$B,$C
583
+ bne .L_00_48
584
+
585
+ ldmia sp!,{r4-r12,pc}
586
+ .size sha256_block_data_order_neon,.-sha256_block_data_order_neon
587
+ #endif
588
+ ___
589
+ }}}
590
+ ######################################################################
591
+ # ARMv8 stuff
592
+ #
593
+ {{{
594
+ my ($ABCD,$EFGH,$abcd)=map("q$_",(0..2));
595
+ my @MSG=map("q$_",(8..11));
596
+ my ($W0,$W1,$ABCD_SAVE,$EFGH_SAVE)=map("q$_",(12..15));
597
+ my $Ktbl="r3";
598
+
599
+ $code.=<<___;
600
+ #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
601
+
602
+ # if defined(__thumb2__) && !defined(__APPLE__)
603
+ # define INST(a,b,c,d) .byte c,d|0xc,a,b
604
+ # else
605
+ # define INST(a,b,c,d) .byte a,b,c,d
606
+ # endif
607
+
608
+ .type sha256_block_data_order_armv8,%function
609
+ .align 5
610
+ sha256_block_data_order_armv8:
611
+ .LARMv8:
612
+ vld1.32 {$ABCD,$EFGH},[$ctx]
613
+ # ifdef __APPLE__
614
+ sub $Ktbl,$Ktbl,#256+32
615
+ # elif defined(__thumb2__)
616
+ adr $Ktbl,.LARMv8
617
+ sub $Ktbl,$Ktbl,#.LARMv8-K256
618
+ # else
619
+ adrl $Ktbl,K256
620
+ # endif
621
+ add $len,$inp,$len,lsl#6 @ len to point at the end of inp
622
+
623
+ .Loop_v8:
624
+ vld1.8 {@MSG[0]-@MSG[1]},[$inp]!
625
+ vld1.8 {@MSG[2]-@MSG[3]},[$inp]!
626
+ vld1.32 {$W0},[$Ktbl]!
627
+ vrev32.8 @MSG[0],@MSG[0]
628
+ vrev32.8 @MSG[1],@MSG[1]
629
+ vrev32.8 @MSG[2],@MSG[2]
630
+ vrev32.8 @MSG[3],@MSG[3]
631
+ vmov $ABCD_SAVE,$ABCD @ offload
632
+ vmov $EFGH_SAVE,$EFGH
633
+ teq $inp,$len
634
+ ___
635
+ for($i=0;$i<12;$i++) {
636
+ $code.=<<___;
637
+ vld1.32 {$W1},[$Ktbl]!
638
+ vadd.i32 $W0,$W0,@MSG[0]
639
+ sha256su0 @MSG[0],@MSG[1]
640
+ vmov $abcd,$ABCD
641
+ sha256h $ABCD,$EFGH,$W0
642
+ sha256h2 $EFGH,$abcd,$W0
643
+ sha256su1 @MSG[0],@MSG[2],@MSG[3]
644
+ ___
645
+ ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG));
646
+ }
647
+ $code.=<<___;
648
+ vld1.32 {$W1},[$Ktbl]!
649
+ vadd.i32 $W0,$W0,@MSG[0]
650
+ vmov $abcd,$ABCD
651
+ sha256h $ABCD,$EFGH,$W0
652
+ sha256h2 $EFGH,$abcd,$W0
653
+
654
+ vld1.32 {$W0},[$Ktbl]!
655
+ vadd.i32 $W1,$W1,@MSG[1]
656
+ vmov $abcd,$ABCD
657
+ sha256h $ABCD,$EFGH,$W1
658
+ sha256h2 $EFGH,$abcd,$W1
659
+
660
+ vld1.32 {$W1},[$Ktbl]
661
+ vadd.i32 $W0,$W0,@MSG[2]
662
+ sub $Ktbl,$Ktbl,#256-16 @ rewind
663
+ vmov $abcd,$ABCD
664
+ sha256h $ABCD,$EFGH,$W0
665
+ sha256h2 $EFGH,$abcd,$W0
666
+
667
+ vadd.i32 $W1,$W1,@MSG[3]
668
+ vmov $abcd,$ABCD
669
+ sha256h $ABCD,$EFGH,$W1
670
+ sha256h2 $EFGH,$abcd,$W1
671
+
672
+ vadd.i32 $ABCD,$ABCD,$ABCD_SAVE
673
+ vadd.i32 $EFGH,$EFGH,$EFGH_SAVE
674
+ it ne
675
+ bne .Loop_v8
676
+
677
+ vst1.32 {$ABCD,$EFGH},[$ctx]
678
+
679
+ ret @ bx lr
680
+ .size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
681
+ #endif
682
+ ___
683
+ }}}
684
+ $code.=<<___;
685
+ .asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
686
+ .align 2
687
+ #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
688
+ .comm OPENSSL_armcap_P,4,4
689
+ .hidden OPENSSL_armcap_P
690
+ #endif
691
+ ___
692
+
693
+ open SELF,$0;
694
+ while(<SELF>) {
695
+ next if (/^#!/);
696
+ last if (!s/^#/@/ and !/^$/);
697
+ print;
698
+ }
699
+ close SELF;
700
+
701
+ { my %opcode = (
702
+ "sha256h" => 0xf3000c40, "sha256h2" => 0xf3100c40,
703
+ "sha256su0" => 0xf3ba03c0, "sha256su1" => 0xf3200c40 );
704
+
705
+ sub unsha256 {
706
+ my ($mnemonic,$arg)=@_;
707
+
708
+ if ($arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o) {
709
+ my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19)
710
+ |(($2&7)<<17)|(($2&8)<<4)
711
+ |(($3&7)<<1) |(($3&8)<<2);
712
+ # since ARMv7 instructions are always encoded little-endian.
713
+ # correct solution is to use .inst directive, but older
714
+ # assemblers don't implement it:-(
715
+ sprintf "INST(0x%02x,0x%02x,0x%02x,0x%02x)\t@ %s %s",
716
+ $word&0xff,($word>>8)&0xff,
717
+ ($word>>16)&0xff,($word>>24)&0xff,
718
+ $mnemonic,$arg;
719
+ }
720
+ }
721
+ }
722
+
723
+ foreach (split($/,$code)) {
724
+
725
+ s/\`([^\`]*)\`/eval $1/geo;
726
+
727
+ s/\b(sha256\w+)\s+(q.*)/unsha256($1,$2)/geo;
728
+
729
+ s/\bret\b/bx lr/go or
730
+ s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4
731
+
732
+ print $_,"\n";
733
+ }
734
+
735
+ close STDOUT; # enforce flush