ring-native 0.0.0 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (267) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/CHANGES.md +7 -0
  4. data/Makefile +5 -0
  5. data/README.md +12 -5
  6. data/Rakefile +4 -0
  7. data/ext/ring/extconf.rb +4 -5
  8. data/lib/ring/native.rb +3 -1
  9. data/lib/ring/native/version.rb +5 -1
  10. data/ring-native.gemspec +6 -6
  11. data/vendor/ring-ffi/Cargo.lock +26 -0
  12. data/vendor/ring-ffi/Cargo.toml +45 -0
  13. data/vendor/ring-ffi/LICENSE +16 -0
  14. data/vendor/ring-ffi/README.md +59 -0
  15. data/vendor/ring-ffi/src/lib.rs +79 -0
  16. metadata +10 -255
  17. data/vendor/ring/BUILDING.md +0 -40
  18. data/vendor/ring/Cargo.toml +0 -43
  19. data/vendor/ring/LICENSE +0 -185
  20. data/vendor/ring/Makefile +0 -35
  21. data/vendor/ring/PORTING.md +0 -163
  22. data/vendor/ring/README.md +0 -113
  23. data/vendor/ring/STYLE.md +0 -197
  24. data/vendor/ring/appveyor.yml +0 -27
  25. data/vendor/ring/build.rs +0 -108
  26. data/vendor/ring/crypto/aes/aes.c +0 -1142
  27. data/vendor/ring/crypto/aes/aes_test.Windows.vcxproj +0 -25
  28. data/vendor/ring/crypto/aes/aes_test.cc +0 -93
  29. data/vendor/ring/crypto/aes/asm/aes-586.pl +0 -2368
  30. data/vendor/ring/crypto/aes/asm/aes-armv4.pl +0 -1249
  31. data/vendor/ring/crypto/aes/asm/aes-x86_64.pl +0 -2246
  32. data/vendor/ring/crypto/aes/asm/aesni-x86.pl +0 -1318
  33. data/vendor/ring/crypto/aes/asm/aesni-x86_64.pl +0 -2084
  34. data/vendor/ring/crypto/aes/asm/aesv8-armx.pl +0 -675
  35. data/vendor/ring/crypto/aes/asm/bsaes-armv7.pl +0 -1364
  36. data/vendor/ring/crypto/aes/asm/bsaes-x86_64.pl +0 -1565
  37. data/vendor/ring/crypto/aes/asm/vpaes-x86.pl +0 -841
  38. data/vendor/ring/crypto/aes/asm/vpaes-x86_64.pl +0 -1116
  39. data/vendor/ring/crypto/aes/internal.h +0 -87
  40. data/vendor/ring/crypto/aes/mode_wrappers.c +0 -61
  41. data/vendor/ring/crypto/bn/add.c +0 -394
  42. data/vendor/ring/crypto/bn/asm/armv4-mont.pl +0 -694
  43. data/vendor/ring/crypto/bn/asm/armv8-mont.pl +0 -1503
  44. data/vendor/ring/crypto/bn/asm/bn-586.pl +0 -774
  45. data/vendor/ring/crypto/bn/asm/co-586.pl +0 -287
  46. data/vendor/ring/crypto/bn/asm/rsaz-avx2.pl +0 -1882
  47. data/vendor/ring/crypto/bn/asm/x86-mont.pl +0 -592
  48. data/vendor/ring/crypto/bn/asm/x86_64-gcc.c +0 -599
  49. data/vendor/ring/crypto/bn/asm/x86_64-mont.pl +0 -1393
  50. data/vendor/ring/crypto/bn/asm/x86_64-mont5.pl +0 -3507
  51. data/vendor/ring/crypto/bn/bn.c +0 -352
  52. data/vendor/ring/crypto/bn/bn_asn1.c +0 -74
  53. data/vendor/ring/crypto/bn/bn_test.Windows.vcxproj +0 -25
  54. data/vendor/ring/crypto/bn/bn_test.cc +0 -1696
  55. data/vendor/ring/crypto/bn/cmp.c +0 -200
  56. data/vendor/ring/crypto/bn/convert.c +0 -433
  57. data/vendor/ring/crypto/bn/ctx.c +0 -311
  58. data/vendor/ring/crypto/bn/div.c +0 -594
  59. data/vendor/ring/crypto/bn/exponentiation.c +0 -1335
  60. data/vendor/ring/crypto/bn/gcd.c +0 -711
  61. data/vendor/ring/crypto/bn/generic.c +0 -1019
  62. data/vendor/ring/crypto/bn/internal.h +0 -316
  63. data/vendor/ring/crypto/bn/montgomery.c +0 -516
  64. data/vendor/ring/crypto/bn/mul.c +0 -888
  65. data/vendor/ring/crypto/bn/prime.c +0 -829
  66. data/vendor/ring/crypto/bn/random.c +0 -334
  67. data/vendor/ring/crypto/bn/rsaz_exp.c +0 -262
  68. data/vendor/ring/crypto/bn/rsaz_exp.h +0 -53
  69. data/vendor/ring/crypto/bn/shift.c +0 -276
  70. data/vendor/ring/crypto/bytestring/bytestring_test.Windows.vcxproj +0 -25
  71. data/vendor/ring/crypto/bytestring/bytestring_test.cc +0 -421
  72. data/vendor/ring/crypto/bytestring/cbb.c +0 -399
  73. data/vendor/ring/crypto/bytestring/cbs.c +0 -227
  74. data/vendor/ring/crypto/bytestring/internal.h +0 -46
  75. data/vendor/ring/crypto/chacha/chacha_generic.c +0 -140
  76. data/vendor/ring/crypto/chacha/chacha_vec.c +0 -323
  77. data/vendor/ring/crypto/chacha/chacha_vec_arm.S +0 -1447
  78. data/vendor/ring/crypto/chacha/chacha_vec_arm_generate.go +0 -153
  79. data/vendor/ring/crypto/cipher/cipher_test.Windows.vcxproj +0 -25
  80. data/vendor/ring/crypto/cipher/e_aes.c +0 -390
  81. data/vendor/ring/crypto/cipher/e_chacha20poly1305.c +0 -208
  82. data/vendor/ring/crypto/cipher/internal.h +0 -173
  83. data/vendor/ring/crypto/cipher/test/aes_128_gcm_tests.txt +0 -543
  84. data/vendor/ring/crypto/cipher/test/aes_128_key_wrap_tests.txt +0 -9
  85. data/vendor/ring/crypto/cipher/test/aes_256_gcm_tests.txt +0 -475
  86. data/vendor/ring/crypto/cipher/test/aes_256_key_wrap_tests.txt +0 -23
  87. data/vendor/ring/crypto/cipher/test/chacha20_poly1305_old_tests.txt +0 -422
  88. data/vendor/ring/crypto/cipher/test/chacha20_poly1305_tests.txt +0 -484
  89. data/vendor/ring/crypto/cipher/test/cipher_test.txt +0 -100
  90. data/vendor/ring/crypto/constant_time_test.Windows.vcxproj +0 -25
  91. data/vendor/ring/crypto/constant_time_test.c +0 -304
  92. data/vendor/ring/crypto/cpu-arm-asm.S +0 -32
  93. data/vendor/ring/crypto/cpu-arm.c +0 -199
  94. data/vendor/ring/crypto/cpu-intel.c +0 -261
  95. data/vendor/ring/crypto/crypto.c +0 -151
  96. data/vendor/ring/crypto/curve25519/asm/x25519-arm.S +0 -2118
  97. data/vendor/ring/crypto/curve25519/curve25519.c +0 -4888
  98. data/vendor/ring/crypto/curve25519/x25519_test.cc +0 -128
  99. data/vendor/ring/crypto/digest/md32_common.h +0 -181
  100. data/vendor/ring/crypto/ec/asm/p256-x86_64-asm.pl +0 -2725
  101. data/vendor/ring/crypto/ec/ec.c +0 -193
  102. data/vendor/ring/crypto/ec/ec_curves.c +0 -61
  103. data/vendor/ring/crypto/ec/ec_key.c +0 -228
  104. data/vendor/ring/crypto/ec/ec_montgomery.c +0 -114
  105. data/vendor/ring/crypto/ec/example_mul.Windows.vcxproj +0 -25
  106. data/vendor/ring/crypto/ec/internal.h +0 -243
  107. data/vendor/ring/crypto/ec/oct.c +0 -253
  108. data/vendor/ring/crypto/ec/p256-64.c +0 -1794
  109. data/vendor/ring/crypto/ec/p256-x86_64-table.h +0 -9548
  110. data/vendor/ring/crypto/ec/p256-x86_64.c +0 -509
  111. data/vendor/ring/crypto/ec/simple.c +0 -1007
  112. data/vendor/ring/crypto/ec/util-64.c +0 -183
  113. data/vendor/ring/crypto/ec/wnaf.c +0 -508
  114. data/vendor/ring/crypto/ecdh/ecdh.c +0 -155
  115. data/vendor/ring/crypto/ecdsa/ecdsa.c +0 -304
  116. data/vendor/ring/crypto/ecdsa/ecdsa_asn1.c +0 -193
  117. data/vendor/ring/crypto/ecdsa/ecdsa_test.Windows.vcxproj +0 -25
  118. data/vendor/ring/crypto/ecdsa/ecdsa_test.cc +0 -327
  119. data/vendor/ring/crypto/header_removed.h +0 -17
  120. data/vendor/ring/crypto/internal.h +0 -495
  121. data/vendor/ring/crypto/libring.Windows.vcxproj +0 -101
  122. data/vendor/ring/crypto/mem.c +0 -98
  123. data/vendor/ring/crypto/modes/asm/aesni-gcm-x86_64.pl +0 -1045
  124. data/vendor/ring/crypto/modes/asm/ghash-armv4.pl +0 -517
  125. data/vendor/ring/crypto/modes/asm/ghash-x86.pl +0 -1393
  126. data/vendor/ring/crypto/modes/asm/ghash-x86_64.pl +0 -1741
  127. data/vendor/ring/crypto/modes/asm/ghashv8-armx.pl +0 -422
  128. data/vendor/ring/crypto/modes/ctr.c +0 -226
  129. data/vendor/ring/crypto/modes/gcm.c +0 -1206
  130. data/vendor/ring/crypto/modes/gcm_test.Windows.vcxproj +0 -25
  131. data/vendor/ring/crypto/modes/gcm_test.c +0 -348
  132. data/vendor/ring/crypto/modes/internal.h +0 -299
  133. data/vendor/ring/crypto/perlasm/arm-xlate.pl +0 -170
  134. data/vendor/ring/crypto/perlasm/readme +0 -100
  135. data/vendor/ring/crypto/perlasm/x86_64-xlate.pl +0 -1164
  136. data/vendor/ring/crypto/perlasm/x86asm.pl +0 -292
  137. data/vendor/ring/crypto/perlasm/x86gas.pl +0 -263
  138. data/vendor/ring/crypto/perlasm/x86masm.pl +0 -200
  139. data/vendor/ring/crypto/perlasm/x86nasm.pl +0 -187
  140. data/vendor/ring/crypto/poly1305/poly1305.c +0 -331
  141. data/vendor/ring/crypto/poly1305/poly1305_arm.c +0 -301
  142. data/vendor/ring/crypto/poly1305/poly1305_arm_asm.S +0 -2015
  143. data/vendor/ring/crypto/poly1305/poly1305_test.Windows.vcxproj +0 -25
  144. data/vendor/ring/crypto/poly1305/poly1305_test.cc +0 -80
  145. data/vendor/ring/crypto/poly1305/poly1305_test.txt +0 -52
  146. data/vendor/ring/crypto/poly1305/poly1305_vec.c +0 -892
  147. data/vendor/ring/crypto/rand/asm/rdrand-x86_64.pl +0 -75
  148. data/vendor/ring/crypto/rand/internal.h +0 -32
  149. data/vendor/ring/crypto/rand/rand.c +0 -189
  150. data/vendor/ring/crypto/rand/urandom.c +0 -219
  151. data/vendor/ring/crypto/rand/windows.c +0 -56
  152. data/vendor/ring/crypto/refcount_c11.c +0 -66
  153. data/vendor/ring/crypto/refcount_lock.c +0 -53
  154. data/vendor/ring/crypto/refcount_test.Windows.vcxproj +0 -25
  155. data/vendor/ring/crypto/refcount_test.c +0 -58
  156. data/vendor/ring/crypto/rsa/blinding.c +0 -462
  157. data/vendor/ring/crypto/rsa/internal.h +0 -108
  158. data/vendor/ring/crypto/rsa/padding.c +0 -300
  159. data/vendor/ring/crypto/rsa/rsa.c +0 -450
  160. data/vendor/ring/crypto/rsa/rsa_asn1.c +0 -261
  161. data/vendor/ring/crypto/rsa/rsa_impl.c +0 -944
  162. data/vendor/ring/crypto/rsa/rsa_test.Windows.vcxproj +0 -25
  163. data/vendor/ring/crypto/rsa/rsa_test.cc +0 -437
  164. data/vendor/ring/crypto/sha/asm/sha-armv8.pl +0 -436
  165. data/vendor/ring/crypto/sha/asm/sha-x86_64.pl +0 -2390
  166. data/vendor/ring/crypto/sha/asm/sha256-586.pl +0 -1275
  167. data/vendor/ring/crypto/sha/asm/sha256-armv4.pl +0 -735
  168. data/vendor/ring/crypto/sha/asm/sha256-armv8.pl +0 -14
  169. data/vendor/ring/crypto/sha/asm/sha256-x86_64.pl +0 -14
  170. data/vendor/ring/crypto/sha/asm/sha512-586.pl +0 -911
  171. data/vendor/ring/crypto/sha/asm/sha512-armv4.pl +0 -666
  172. data/vendor/ring/crypto/sha/asm/sha512-armv8.pl +0 -14
  173. data/vendor/ring/crypto/sha/asm/sha512-x86_64.pl +0 -14
  174. data/vendor/ring/crypto/sha/sha1.c +0 -271
  175. data/vendor/ring/crypto/sha/sha256.c +0 -204
  176. data/vendor/ring/crypto/sha/sha512.c +0 -355
  177. data/vendor/ring/crypto/test/file_test.cc +0 -326
  178. data/vendor/ring/crypto/test/file_test.h +0 -181
  179. data/vendor/ring/crypto/test/malloc.cc +0 -150
  180. data/vendor/ring/crypto/test/scoped_types.h +0 -95
  181. data/vendor/ring/crypto/test/test.Windows.vcxproj +0 -35
  182. data/vendor/ring/crypto/test/test_util.cc +0 -46
  183. data/vendor/ring/crypto/test/test_util.h +0 -41
  184. data/vendor/ring/crypto/thread_none.c +0 -55
  185. data/vendor/ring/crypto/thread_pthread.c +0 -165
  186. data/vendor/ring/crypto/thread_test.Windows.vcxproj +0 -25
  187. data/vendor/ring/crypto/thread_test.c +0 -200
  188. data/vendor/ring/crypto/thread_win.c +0 -282
  189. data/vendor/ring/examples/checkdigest.rs +0 -103
  190. data/vendor/ring/include/openssl/aes.h +0 -121
  191. data/vendor/ring/include/openssl/arm_arch.h +0 -129
  192. data/vendor/ring/include/openssl/base.h +0 -156
  193. data/vendor/ring/include/openssl/bn.h +0 -794
  194. data/vendor/ring/include/openssl/buffer.h +0 -18
  195. data/vendor/ring/include/openssl/bytestring.h +0 -235
  196. data/vendor/ring/include/openssl/chacha.h +0 -37
  197. data/vendor/ring/include/openssl/cmac.h +0 -76
  198. data/vendor/ring/include/openssl/cpu.h +0 -184
  199. data/vendor/ring/include/openssl/crypto.h +0 -43
  200. data/vendor/ring/include/openssl/curve25519.h +0 -88
  201. data/vendor/ring/include/openssl/ec.h +0 -225
  202. data/vendor/ring/include/openssl/ec_key.h +0 -129
  203. data/vendor/ring/include/openssl/ecdh.h +0 -110
  204. data/vendor/ring/include/openssl/ecdsa.h +0 -156
  205. data/vendor/ring/include/openssl/err.h +0 -201
  206. data/vendor/ring/include/openssl/mem.h +0 -101
  207. data/vendor/ring/include/openssl/obj_mac.h +0 -71
  208. data/vendor/ring/include/openssl/opensslfeatures.h +0 -68
  209. data/vendor/ring/include/openssl/opensslv.h +0 -18
  210. data/vendor/ring/include/openssl/ossl_typ.h +0 -18
  211. data/vendor/ring/include/openssl/poly1305.h +0 -51
  212. data/vendor/ring/include/openssl/rand.h +0 -70
  213. data/vendor/ring/include/openssl/rsa.h +0 -399
  214. data/vendor/ring/include/openssl/thread.h +0 -133
  215. data/vendor/ring/include/openssl/type_check.h +0 -71
  216. data/vendor/ring/mk/Common.props +0 -63
  217. data/vendor/ring/mk/Windows.props +0 -42
  218. data/vendor/ring/mk/WindowsTest.props +0 -18
  219. data/vendor/ring/mk/appveyor.bat +0 -62
  220. data/vendor/ring/mk/bottom_of_makefile.mk +0 -54
  221. data/vendor/ring/mk/ring.mk +0 -266
  222. data/vendor/ring/mk/top_of_makefile.mk +0 -214
  223. data/vendor/ring/mk/travis.sh +0 -40
  224. data/vendor/ring/mk/update-travis-yml.py +0 -229
  225. data/vendor/ring/ring.sln +0 -153
  226. data/vendor/ring/src/aead.rs +0 -682
  227. data/vendor/ring/src/agreement.rs +0 -248
  228. data/vendor/ring/src/c.rs +0 -129
  229. data/vendor/ring/src/constant_time.rs +0 -37
  230. data/vendor/ring/src/der.rs +0 -96
  231. data/vendor/ring/src/digest.rs +0 -690
  232. data/vendor/ring/src/digest_tests.txt +0 -57
  233. data/vendor/ring/src/ecc.rs +0 -28
  234. data/vendor/ring/src/ecc_build.rs +0 -279
  235. data/vendor/ring/src/ecc_curves.rs +0 -117
  236. data/vendor/ring/src/ed25519_tests.txt +0 -2579
  237. data/vendor/ring/src/exe_tests.rs +0 -46
  238. data/vendor/ring/src/ffi.rs +0 -29
  239. data/vendor/ring/src/file_test.rs +0 -187
  240. data/vendor/ring/src/hkdf.rs +0 -153
  241. data/vendor/ring/src/hkdf_tests.txt +0 -59
  242. data/vendor/ring/src/hmac.rs +0 -414
  243. data/vendor/ring/src/hmac_tests.txt +0 -97
  244. data/vendor/ring/src/input.rs +0 -312
  245. data/vendor/ring/src/lib.rs +0 -41
  246. data/vendor/ring/src/pbkdf2.rs +0 -265
  247. data/vendor/ring/src/pbkdf2_tests.txt +0 -113
  248. data/vendor/ring/src/polyfill.rs +0 -57
  249. data/vendor/ring/src/rand.rs +0 -28
  250. data/vendor/ring/src/signature.rs +0 -314
  251. data/vendor/ring/third-party/NIST/README.md +0 -9
  252. data/vendor/ring/third-party/NIST/SHAVS/SHA1LongMsg.rsp +0 -263
  253. data/vendor/ring/third-party/NIST/SHAVS/SHA1Monte.rsp +0 -309
  254. data/vendor/ring/third-party/NIST/SHAVS/SHA1ShortMsg.rsp +0 -267
  255. data/vendor/ring/third-party/NIST/SHAVS/SHA224LongMsg.rsp +0 -263
  256. data/vendor/ring/third-party/NIST/SHAVS/SHA224Monte.rsp +0 -309
  257. data/vendor/ring/third-party/NIST/SHAVS/SHA224ShortMsg.rsp +0 -267
  258. data/vendor/ring/third-party/NIST/SHAVS/SHA256LongMsg.rsp +0 -263
  259. data/vendor/ring/third-party/NIST/SHAVS/SHA256Monte.rsp +0 -309
  260. data/vendor/ring/third-party/NIST/SHAVS/SHA256ShortMsg.rsp +0 -267
  261. data/vendor/ring/third-party/NIST/SHAVS/SHA384LongMsg.rsp +0 -519
  262. data/vendor/ring/third-party/NIST/SHAVS/SHA384Monte.rsp +0 -309
  263. data/vendor/ring/third-party/NIST/SHAVS/SHA384ShortMsg.rsp +0 -523
  264. data/vendor/ring/third-party/NIST/SHAVS/SHA512LongMsg.rsp +0 -519
  265. data/vendor/ring/third-party/NIST/SHAVS/SHA512Monte.rsp +0 -309
  266. data/vendor/ring/third-party/NIST/SHAVS/SHA512ShortMsg.rsp +0 -523
  267. data/vendor/ring/third-party/NIST/sha256sums.txt +0 -1
@@ -1,14 +0,0 @@
1
- #!/usr/bin/env perl
2
- #
3
- # ====================================================================
4
- # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5
- # project. Rights for redistribution and usage in source and binary
6
- # forms are granted according to the OpenSSL license.
7
- # ====================================================================
8
-
9
- push @ARGV, 'sha256';
10
-
11
- $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
12
- push(@INC,"${dir}");
13
-
14
- do "sha-x86_64.pl" or die "failed to run sha-armv8.pl: $@";
@@ -1,14 +0,0 @@
1
- #!/usr/bin/env perl
2
- #
3
- # ====================================================================
4
- # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5
- # project. Rights for redistribution and usage in source and binary
6
- # forms are granted according to the OpenSSL license.
7
- # ====================================================================
8
-
9
- push @ARGV, 'sha256';
10
-
11
- $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
12
- push(@INC,"${dir}");
13
-
14
- do "sha-x86_64.pl" or die "failed to run sha-x86_64.pl: $@";
@@ -1,911 +0,0 @@
1
- #!/usr/bin/env perl
2
- #
3
- # ====================================================================
4
- # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5
- # project. The module is, however, dual licensed under OpenSSL and
6
- # CRYPTOGAMS licenses depending on where you obtain it. For further
7
- # details see http://www.openssl.org/~appro/cryptogams/.
8
- # ====================================================================
9
- #
10
- # SHA512 block transform for x86. September 2007.
11
- #
12
- # May 2013.
13
- #
14
- # Add SSSE3 code path, 20-25% improvement [over original SSE2 code].
15
- #
16
- # Performance in clock cycles per processed byte (less is better):
17
- #
18
- # gcc icc x86 asm SIMD(*) x86_64(**)
19
- # Pentium 100 97 61 - -
20
- # PIII 75 77 56 - -
21
- # P4 116 95 82 34.6 30.8
22
- # AMD K8 54 55 36 20.7 9.57
23
- # Core2 66 57 40 15.9 9.97
24
- # Westmere 70 - 38 12.2 9.58
25
- # Sandy Bridge 58 - 35 11.9 11.2
26
- # Ivy Bridge 50 - 33 11.5 8.17
27
- # Haswell 46 - 29 11.3 7.66
28
- # Bulldozer 121 - 50 14.0 13.5
29
- # VIA Nano 91 - 52 33 14.7
30
- # Atom 126 - 68 48(***) 14.7
31
- # Silvermont 97 - 58 42(***) 17.5
32
- #
33
- # (*) whichever best applicable.
34
- # (**) x86_64 assembler performance is presented for reference
35
- # purposes, the results are for integer-only code.
36
- # (***) paddq is increadibly slow on Atom.
37
- #
38
- # IALU code-path is optimized for elder Pentiums. On vanilla Pentium
39
- # performance improvement over compiler generated code reaches ~60%,
40
- # while on PIII - ~35%. On newer µ-archs improvement varies from 15%
41
- # to 50%, but it's less important as they are expected to execute SSE2
42
- # code-path, which is commonly ~2-3x faster [than compiler generated
43
- # code]. SSE2 code-path is as fast as original sha512-sse2.pl, even
44
- # though it does not use 128-bit operations. The latter means that
45
- # SSE2-aware kernel is no longer required to execute the code. Another
46
- # difference is that new code optimizes amount of writes, but at the
47
- # cost of increased data cache "footprint" by 1/2KB.
48
-
49
- $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
50
- push(@INC,"${dir}","${dir}../../perlasm");
51
- require "x86asm.pl";
52
-
53
- &asm_init($ARGV[0],"sha512-586.pl",$ARGV[$#ARGV] eq "386");
54
-
55
- $sse2=0;
56
- for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
57
-
58
- &external_label("OPENSSL_ia32cap_P") if ($sse2);
59
-
60
- $Tlo=&DWP(0,"esp"); $Thi=&DWP(4,"esp");
61
- $Alo=&DWP(8,"esp"); $Ahi=&DWP(8+4,"esp");
62
- $Blo=&DWP(16,"esp"); $Bhi=&DWP(16+4,"esp");
63
- $Clo=&DWP(24,"esp"); $Chi=&DWP(24+4,"esp");
64
- $Dlo=&DWP(32,"esp"); $Dhi=&DWP(32+4,"esp");
65
- $Elo=&DWP(40,"esp"); $Ehi=&DWP(40+4,"esp");
66
- $Flo=&DWP(48,"esp"); $Fhi=&DWP(48+4,"esp");
67
- $Glo=&DWP(56,"esp"); $Ghi=&DWP(56+4,"esp");
68
- $Hlo=&DWP(64,"esp"); $Hhi=&DWP(64+4,"esp");
69
- $K512="ebp";
70
-
71
- $Asse2=&QWP(0,"esp");
72
- $Bsse2=&QWP(8,"esp");
73
- $Csse2=&QWP(16,"esp");
74
- $Dsse2=&QWP(24,"esp");
75
- $Esse2=&QWP(32,"esp");
76
- $Fsse2=&QWP(40,"esp");
77
- $Gsse2=&QWP(48,"esp");
78
- $Hsse2=&QWP(56,"esp");
79
-
80
- $A="mm0"; # B-D and
81
- $E="mm4"; # F-H are commonly loaded to respectively mm1-mm3 and
82
- # mm5-mm7, but it's done on on-demand basis...
83
- $BxC="mm2"; # ... except for B^C
84
-
85
- sub BODY_00_15_sse2 {
86
- my $phase=shift;
87
-
88
- #&movq ("mm5",$Fsse2); # load f
89
- #&movq ("mm6",$Gsse2); # load g
90
-
91
- &movq ("mm1",$E); # %mm1 is sliding right
92
- &pxor ("mm5","mm6"); # f^=g
93
- &psrlq ("mm1",14);
94
- &movq ($Esse2,$E); # modulo-scheduled save e
95
- &pand ("mm5",$E); # f&=e
96
- &psllq ($E,23); # $E is sliding left
97
- &movq ($A,"mm3") if ($phase<2);
98
- &movq (&QWP(8*9,"esp"),"mm7") # save X[i]
99
- &movq ("mm3","mm1"); # %mm3 is T1
100
- &psrlq ("mm1",4);
101
- &pxor ("mm5","mm6"); # Ch(e,f,g)
102
- &pxor ("mm3",$E);
103
- &psllq ($E,23);
104
- &pxor ("mm3","mm1");
105
- &movq ($Asse2,$A); # modulo-scheduled save a
106
- &paddq ("mm7","mm5"); # X[i]+=Ch(e,f,g)
107
- &pxor ("mm3",$E);
108
- &psrlq ("mm1",23);
109
- &paddq ("mm7",$Hsse2); # X[i]+=h
110
- &pxor ("mm3","mm1");
111
- &psllq ($E,4);
112
- &paddq ("mm7",QWP(0,$K512)); # X[i]+=K512[i]
113
- &pxor ("mm3",$E); # T1=Sigma1_512(e)
114
-
115
- &movq ($E,$Dsse2); # e = load d, e in next round
116
- &paddq ("mm3","mm7"); # T1+=X[i]
117
- &movq ("mm5",$A); # %mm5 is sliding right
118
- &psrlq ("mm5",28);
119
- &paddq ($E,"mm3"); # d += T1
120
- &movq ("mm6",$A); # %mm6 is sliding left
121
- &movq ("mm7","mm5");
122
- &psllq ("mm6",25);
123
- &movq ("mm1",$Bsse2); # load b
124
- &psrlq ("mm5",6);
125
- &pxor ("mm7","mm6");
126
- &sub ("esp",8);
127
- &psllq ("mm6",5);
128
- &pxor ("mm7","mm5");
129
- &pxor ($A,"mm1"); # a^b, b^c in next round
130
- &psrlq ("mm5",5);
131
- &pxor ("mm7","mm6");
132
- &pand ($BxC,$A); # (b^c)&(a^b)
133
- &psllq ("mm6",6);
134
- &pxor ("mm7","mm5");
135
- &pxor ($BxC,"mm1"); # [h=]Maj(a,b,c)
136
- &pxor ("mm6","mm7"); # Sigma0_512(a)
137
- &movq ("mm7",&QWP(8*(9+16-1),"esp")) if ($phase!=0); # pre-fetch
138
- &movq ("mm5",$Fsse2) if ($phase==0); # load f
139
-
140
- if ($phase>1) {
141
- &paddq ($BxC,"mm6"); # h+=Sigma0(a)
142
- &add ($K512,8);
143
- #&paddq ($BxC,"mm3"); # h+=T1
144
-
145
- ($A,$BxC) = ($BxC,$A); # rotate registers
146
- } else {
147
- &paddq ("mm3",$BxC); # T1+=Maj(a,b,c)
148
- &movq ($BxC,$A);
149
- &add ($K512,8);
150
- &paddq ("mm3","mm6"); # T1+=Sigma0(a)
151
- &movq ("mm6",$Gsse2) if ($phase==0); # load g
152
- #&movq ($A,"mm3"); # h=T1
153
- }
154
- }
155
-
156
- sub BODY_00_15_x86 {
157
- #define Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
158
- # LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
159
- # HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
160
- &mov ("ecx",$Elo);
161
- &mov ("edx",$Ehi);
162
- &mov ("esi","ecx");
163
-
164
- &shr ("ecx",9); # lo>>9
165
- &mov ("edi","edx");
166
- &shr ("edx",9); # hi>>9
167
- &mov ("ebx","ecx");
168
- &shl ("esi",14); # lo<<14
169
- &mov ("eax","edx");
170
- &shl ("edi",14); # hi<<14
171
- &xor ("ebx","esi");
172
-
173
- &shr ("ecx",14-9); # lo>>14
174
- &xor ("eax","edi");
175
- &shr ("edx",14-9); # hi>>14
176
- &xor ("eax","ecx");
177
- &shl ("esi",18-14); # lo<<18
178
- &xor ("ebx","edx");
179
- &shl ("edi",18-14); # hi<<18
180
- &xor ("ebx","esi");
181
-
182
- &shr ("ecx",18-14); # lo>>18
183
- &xor ("eax","edi");
184
- &shr ("edx",18-14); # hi>>18
185
- &xor ("eax","ecx");
186
- &shl ("esi",23-18); # lo<<23
187
- &xor ("ebx","edx");
188
- &shl ("edi",23-18); # hi<<23
189
- &xor ("eax","esi");
190
- &xor ("ebx","edi"); # T1 = Sigma1(e)
191
-
192
- &mov ("ecx",$Flo);
193
- &mov ("edx",$Fhi);
194
- &mov ("esi",$Glo);
195
- &mov ("edi",$Ghi);
196
- &add ("eax",$Hlo);
197
- &adc ("ebx",$Hhi); # T1 += h
198
- &xor ("ecx","esi");
199
- &xor ("edx","edi");
200
- &and ("ecx",$Elo);
201
- &and ("edx",$Ehi);
202
- &add ("eax",&DWP(8*(9+15)+0,"esp"));
203
- &adc ("ebx",&DWP(8*(9+15)+4,"esp")); # T1 += X[0]
204
- &xor ("ecx","esi");
205
- &xor ("edx","edi"); # Ch(e,f,g) = (f^g)&e)^g
206
-
207
- &mov ("esi",&DWP(0,$K512));
208
- &mov ("edi",&DWP(4,$K512)); # K[i]
209
- &add ("eax","ecx");
210
- &adc ("ebx","edx"); # T1 += Ch(e,f,g)
211
- &mov ("ecx",$Dlo);
212
- &mov ("edx",$Dhi);
213
- &add ("eax","esi");
214
- &adc ("ebx","edi"); # T1 += K[i]
215
- &mov ($Tlo,"eax");
216
- &mov ($Thi,"ebx"); # put T1 away
217
- &add ("eax","ecx");
218
- &adc ("ebx","edx"); # d += T1
219
-
220
- #define Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
221
- # LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
222
- # HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
223
- &mov ("ecx",$Alo);
224
- &mov ("edx",$Ahi);
225
- &mov ($Dlo,"eax");
226
- &mov ($Dhi,"ebx");
227
- &mov ("esi","ecx");
228
-
229
- &shr ("ecx",2); # lo>>2
230
- &mov ("edi","edx");
231
- &shr ("edx",2); # hi>>2
232
- &mov ("ebx","ecx");
233
- &shl ("esi",4); # lo<<4
234
- &mov ("eax","edx");
235
- &shl ("edi",4); # hi<<4
236
- &xor ("ebx","esi");
237
-
238
- &shr ("ecx",7-2); # lo>>7
239
- &xor ("eax","edi");
240
- &shr ("edx",7-2); # hi>>7
241
- &xor ("ebx","ecx");
242
- &shl ("esi",25-4); # lo<<25
243
- &xor ("eax","edx");
244
- &shl ("edi",25-4); # hi<<25
245
- &xor ("eax","esi");
246
-
247
- &shr ("ecx",28-7); # lo>>28
248
- &xor ("ebx","edi");
249
- &shr ("edx",28-7); # hi>>28
250
- &xor ("eax","ecx");
251
- &shl ("esi",30-25); # lo<<30
252
- &xor ("ebx","edx");
253
- &shl ("edi",30-25); # hi<<30
254
- &xor ("eax","esi");
255
- &xor ("ebx","edi"); # Sigma0(a)
256
-
257
- &mov ("ecx",$Alo);
258
- &mov ("edx",$Ahi);
259
- &mov ("esi",$Blo);
260
- &mov ("edi",$Bhi);
261
- &add ("eax",$Tlo);
262
- &adc ("ebx",$Thi); # T1 = Sigma0(a)+T1
263
- &or ("ecx","esi");
264
- &or ("edx","edi");
265
- &and ("ecx",$Clo);
266
- &and ("edx",$Chi);
267
- &and ("esi",$Alo);
268
- &and ("edi",$Ahi);
269
- &or ("ecx","esi");
270
- &or ("edx","edi"); # Maj(a,b,c) = ((a|b)&c)|(a&b)
271
-
272
- &add ("eax","ecx");
273
- &adc ("ebx","edx"); # T1 += Maj(a,b,c)
274
- &mov ($Tlo,"eax");
275
- &mov ($Thi,"ebx");
276
-
277
- &mov (&LB("edx"),&BP(0,$K512)); # pre-fetch LSB of *K
278
- &sub ("esp",8);
279
- &lea ($K512,&DWP(8,$K512)); # K++
280
- }
281
-
282
-
283
- &function_begin("sha512_block_data_order");
284
- &mov ("esi",wparam(0)); # ctx
285
- &mov ("edi",wparam(1)); # inp
286
- &mov ("eax",wparam(2)); # num
287
- &mov ("ebx","esp"); # saved sp
288
-
289
- &call (&label("pic_point")); # make it PIC!
290
- &set_label("pic_point");
291
- &blindpop($K512);
292
- &lea ($K512,&DWP(&label("K512")."-".&label("pic_point"),$K512));
293
-
294
- &sub ("esp",16);
295
- &and ("esp",-64);
296
-
297
- &shl ("eax",7);
298
- &add ("eax","edi");
299
- &mov (&DWP(0,"esp"),"esi"); # ctx
300
- &mov (&DWP(4,"esp"),"edi"); # inp
301
- &mov (&DWP(8,"esp"),"eax"); # inp+num*128
302
- &mov (&DWP(12,"esp"),"ebx"); # saved sp
303
-
304
- if ($sse2) {
305
- &picmeup("edx","OPENSSL_ia32cap_P",$K512,&label("K512"));
306
- &mov ("ecx",&DWP(0,"edx"));
307
- &test ("ecx",1<<26);
308
- &jz (&label("loop_x86"));
309
-
310
- &mov ("edx",&DWP(4,"edx"));
311
-
312
- # load ctx->h[0-7]
313
- &movq ($A,&QWP(0,"esi"));
314
- &and ("ecx",1<<24); # XMM registers availability
315
- &movq ("mm1",&QWP(8,"esi"));
316
- &and ("edx",1<<9); # SSSE3 bit
317
- &movq ($BxC,&QWP(16,"esi"));
318
- &or ("ecx","edx");
319
- &movq ("mm3",&QWP(24,"esi"));
320
- &movq ($E,&QWP(32,"esi"));
321
- &movq ("mm5",&QWP(40,"esi"));
322
- &movq ("mm6",&QWP(48,"esi"));
323
- &movq ("mm7",&QWP(56,"esi"));
324
- &cmp ("ecx",1<<24|1<<9);
325
- &je (&label("SSSE3"));
326
- &sub ("esp",8*10);
327
- &jmp (&label("loop_sse2"));
328
-
329
- &set_label("loop_sse2",16);
330
- #&movq ($Asse2,$A);
331
- &movq ($Bsse2,"mm1");
332
- &movq ($Csse2,$BxC);
333
- &movq ($Dsse2,"mm3");
334
- #&movq ($Esse2,$E);
335
- &movq ($Fsse2,"mm5");
336
- &movq ($Gsse2,"mm6");
337
- &pxor ($BxC,"mm1"); # magic
338
- &movq ($Hsse2,"mm7");
339
- &movq ("mm3",$A); # magic
340
-
341
- &mov ("eax",&DWP(0,"edi"));
342
- &mov ("ebx",&DWP(4,"edi"));
343
- &add ("edi",8);
344
- &mov ("edx",15); # counter
345
- &bswap ("eax");
346
- &bswap ("ebx");
347
- &jmp (&label("00_14_sse2"));
348
-
349
- &set_label("00_14_sse2",16);
350
- &movd ("mm1","eax");
351
- &mov ("eax",&DWP(0,"edi"));
352
- &movd ("mm7","ebx");
353
- &mov ("ebx",&DWP(4,"edi"));
354
- &add ("edi",8);
355
- &bswap ("eax");
356
- &bswap ("ebx");
357
- &punpckldq("mm7","mm1");
358
-
359
- &BODY_00_15_sse2();
360
-
361
- &dec ("edx");
362
- &jnz (&label("00_14_sse2"));
363
-
364
- &movd ("mm1","eax");
365
- &movd ("mm7","ebx");
366
- &punpckldq("mm7","mm1");
367
-
368
- &BODY_00_15_sse2(1);
369
-
370
- &pxor ($A,$A); # A is in %mm3
371
- &mov ("edx",32); # counter
372
- &jmp (&label("16_79_sse2"));
373
-
374
- &set_label("16_79_sse2",16);
375
- for ($j=0;$j<2;$j++) { # 2x unroll
376
- #&movq ("mm7",&QWP(8*(9+16-1),"esp")); # prefetched in BODY_00_15
377
- &movq ("mm5",&QWP(8*(9+16-14),"esp"));
378
- &movq ("mm1","mm7");
379
- &psrlq ("mm7",1);
380
- &movq ("mm6","mm5");
381
- &psrlq ("mm5",6);
382
- &psllq ("mm1",56);
383
- &paddq ($A,"mm3"); # from BODY_00_15
384
- &movq ("mm3","mm7");
385
- &psrlq ("mm7",7-1);
386
- &pxor ("mm3","mm1");
387
- &psllq ("mm1",63-56);
388
- &pxor ("mm3","mm7");
389
- &psrlq ("mm7",8-7);
390
- &pxor ("mm3","mm1");
391
- &movq ("mm1","mm5");
392
- &psrlq ("mm5",19-6);
393
- &pxor ("mm7","mm3"); # sigma0
394
-
395
- &psllq ("mm6",3);
396
- &pxor ("mm1","mm5");
397
- &paddq ("mm7",&QWP(8*(9+16),"esp"));
398
- &pxor ("mm1","mm6");
399
- &psrlq ("mm5",61-19);
400
- &paddq ("mm7",&QWP(8*(9+16-9),"esp"));
401
- &pxor ("mm1","mm5");
402
- &psllq ("mm6",45-3);
403
- &movq ("mm5",$Fsse2); # load f
404
- &pxor ("mm1","mm6"); # sigma1
405
- &movq ("mm6",$Gsse2); # load g
406
-
407
- &paddq ("mm7","mm1"); # X[i]
408
- #&movq (&QWP(8*9,"esp"),"mm7"); # moved to BODY_00_15
409
-
410
- &BODY_00_15_sse2(2);
411
- }
412
- &dec ("edx");
413
- &jnz (&label("16_79_sse2"));
414
-
415
- #&movq ($A,$Asse2);
416
- &paddq ($A,"mm3"); # from BODY_00_15
417
- &movq ("mm1",$Bsse2);
418
- #&movq ($BxC,$Csse2);
419
- &movq ("mm3",$Dsse2);
420
- #&movq ($E,$Esse2);
421
- &movq ("mm5",$Fsse2);
422
- &movq ("mm6",$Gsse2);
423
- &movq ("mm7",$Hsse2);
424
-
425
- &pxor ($BxC,"mm1"); # de-magic
426
- &paddq ($A,&QWP(0,"esi"));
427
- &paddq ("mm1",&QWP(8,"esi"));
428
- &paddq ($BxC,&QWP(16,"esi"));
429
- &paddq ("mm3",&QWP(24,"esi"));
430
- &paddq ($E,&QWP(32,"esi"));
431
- &paddq ("mm5",&QWP(40,"esi"));
432
- &paddq ("mm6",&QWP(48,"esi"));
433
- &paddq ("mm7",&QWP(56,"esi"));
434
-
435
- &mov ("eax",8*80);
436
- &movq (&QWP(0,"esi"),$A);
437
- &movq (&QWP(8,"esi"),"mm1");
438
- &movq (&QWP(16,"esi"),$BxC);
439
- &movq (&QWP(24,"esi"),"mm3");
440
- &movq (&QWP(32,"esi"),$E);
441
- &movq (&QWP(40,"esi"),"mm5");
442
- &movq (&QWP(48,"esi"),"mm6");
443
- &movq (&QWP(56,"esi"),"mm7");
444
-
445
- &lea ("esp",&DWP(0,"esp","eax")); # destroy frame
446
- &sub ($K512,"eax"); # rewind K
447
-
448
- &cmp ("edi",&DWP(8*10+8,"esp")); # are we done yet?
449
- &jb (&label("loop_sse2"));
450
-
451
- &mov ("esp",&DWP(8*10+12,"esp")); # restore sp
452
- &emms ();
453
- &function_end_A();
454
-
455
- &set_label("SSSE3",32);
456
- { my ($cnt,$frame)=("ecx","edx");
457
- my @X=map("xmm$_",(0..7));
458
- my $j;
459
- my $i=0;
460
-
461
- &lea ($frame,&DWP(-64,"esp"));
462
- &sub ("esp",256);
463
-
464
- # fixed stack frame layout
465
- #
466
- # +0 A B C D E F G H # backing store
467
- # +64 X[0]+K[i] .. X[15]+K[i] # XMM->MM xfer area
468
- # +192 # XMM off-load ring buffer
469
- # +256 # saved parameters
470
-
471
- &movdqa (@X[1],&QWP(80*8,$K512)); # byte swap mask
472
- &movdqu (@X[0],&QWP(0,"edi"));
473
- &pshufb (@X[0],@X[1]);
474
- for ($j=0;$j<8;$j++) {
475
- &movdqa (&QWP(16*(($j-1)%4),$frame),@X[3]) if ($j>4); # off-load
476
- &movdqa (@X[3],&QWP(16*($j%8),$K512));
477
- &movdqa (@X[2],@X[1]) if ($j<7); # perpetuate byte swap mask
478
- &movdqu (@X[1],&QWP(16*($j+1),"edi")) if ($j<7); # next input
479
- &movdqa (@X[1],&QWP(16*(($j+1)%4),$frame)) if ($j==7);# restore @X[0]
480
- &paddq (@X[3],@X[0]);
481
- &pshufb (@X[1],@X[2]) if ($j<7);
482
- &movdqa (&QWP(16*($j%8)-128,$frame),@X[3]); # xfer X[i]+K[i]
483
-
484
- push(@X,shift(@X)); # rotate(@X)
485
- }
486
- #&jmp (&label("loop_ssse3"));
487
- &nop ();
488
-
489
- &set_label("loop_ssse3",32);
490
- &movdqa (@X[2],&QWP(16*(($j+1)%4),$frame)); # pre-restore @X[1]
491
- &movdqa (&QWP(16*(($j-1)%4),$frame),@X[3]); # off-load @X[3]
492
- &lea ($K512,&DWP(16*8,$K512));
493
-
494
- #&movq ($Asse2,$A); # off-load A-H
495
- &movq ($Bsse2,"mm1");
496
- &mov ("ebx","edi");
497
- &movq ($Csse2,$BxC);
498
- &lea ("edi",&DWP(128,"edi")); # advance input
499
- &movq ($Dsse2,"mm3");
500
- &cmp ("edi","eax");
501
- #&movq ($Esse2,$E);
502
- &movq ($Fsse2,"mm5");
503
- &cmovb ("ebx","edi");
504
- &movq ($Gsse2,"mm6");
505
- &mov ("ecx",4); # loop counter
506
- &pxor ($BxC,"mm1"); # magic
507
- &movq ($Hsse2,"mm7");
508
- &pxor ("mm3","mm3"); # magic
509
-
510
- &jmp (&label("00_47_ssse3"));
511
-
512
- sub BODY_00_15_ssse3 { # "phase-less" copy of BODY_00_15_sse2
513
- (
514
- '&movq ("mm1",$E)', # %mm1 is sliding right
515
- '&movq ("mm7",&QWP(((-8*$i)%128)-128,$frame))',# X[i]+K[i]
516
- '&pxor ("mm5","mm6")', # f^=g
517
- '&psrlq ("mm1",14)',
518
- '&movq (&QWP(8*($i+4)%64,"esp"),$E)', # modulo-scheduled save e
519
- '&pand ("mm5",$E)', # f&=e
520
- '&psllq ($E,23)', # $E is sliding left
521
- '&paddq ($A,"mm3")', # [h+=Maj(a,b,c)]
522
- '&movq ("mm3","mm1")', # %mm3 is T1
523
- '&psrlq("mm1",4)',
524
- '&pxor ("mm5","mm6")', # Ch(e,f,g)
525
- '&pxor ("mm3",$E)',
526
- '&psllq($E,23)',
527
- '&pxor ("mm3","mm1")',
528
- '&movq (&QWP(8*$i%64,"esp"),$A)', # modulo-scheduled save a
529
- '&paddq("mm7","mm5")', # X[i]+=Ch(e,f,g)
530
- '&pxor ("mm3",$E)',
531
- '&psrlq("mm1",23)',
532
- '&paddq("mm7",&QWP(8*($i+7)%64,"esp"))', # X[i]+=h
533
- '&pxor ("mm3","mm1")',
534
- '&psllq($E,4)',
535
- '&pxor ("mm3",$E)', # T1=Sigma1_512(e)
536
-
537
- '&movq ($E,&QWP(8*($i+3)%64,"esp"))', # e = load d, e in next round
538
- '&paddq ("mm3","mm7")', # T1+=X[i]
539
- '&movq ("mm5",$A)', # %mm5 is sliding right
540
- '&psrlq("mm5",28)',
541
- '&paddq ($E,"mm3")', # d += T1
542
- '&movq ("mm6",$A)', # %mm6 is sliding left
543
- '&movq ("mm7","mm5")',
544
- '&psllq("mm6",25)',
545
- '&movq ("mm1",&QWP(8*($i+1)%64,"esp"))', # load b
546
- '&psrlq("mm5",6)',
547
- '&pxor ("mm7","mm6")',
548
- '&psllq("mm6",5)',
549
- '&pxor ("mm7","mm5")',
550
- '&pxor ($A,"mm1")', # a^b, b^c in next round
551
- '&psrlq("mm5",5)',
552
- '&pxor ("mm7","mm6")',
553
- '&pand ($BxC,$A)', # (b^c)&(a^b)
554
- '&psllq("mm6",6)',
555
- '&pxor ("mm7","mm5")',
556
- '&pxor ($BxC,"mm1")', # [h=]Maj(a,b,c)
557
- '&pxor ("mm6","mm7")', # Sigma0_512(a)
558
- '&movq ("mm5",&QWP(8*($i+5-1)%64,"esp"))', # pre-load f
559
- '&paddq ($BxC,"mm6")', # h+=Sigma0(a)
560
- '&movq ("mm6",&QWP(8*($i+6-1)%64,"esp"))', # pre-load g
561
-
562
- '($A,$BxC) = ($BxC,$A); $i--;'
563
- );
564
- }
565
-
566
- &set_label("00_47_ssse3",32);
567
-
568
- for(;$j<16;$j++) {
569
- my ($t0,$t2,$t1)=@X[2..4];
570
- my @insns = (&BODY_00_15_ssse3(),&BODY_00_15_ssse3());
571
-
572
- &movdqa ($t2,@X[5]);
573
- &movdqa (@X[1],$t0); # restore @X[1]
574
- &palignr ($t0,@X[0],8); # X[1..2]
575
- &movdqa (&QWP(16*($j%4),$frame),@X[4]); # off-load @X[4]
576
- &palignr ($t2,@X[4],8); # X[9..10]
577
-
578
- &movdqa ($t1,$t0);
579
- &psrlq ($t0,7);
580
- &paddq (@X[0],$t2); # X[0..1] += X[9..10]
581
- &movdqa ($t2,$t1);
582
- &psrlq ($t1,1);
583
- &psllq ($t2,64-8);
584
- &pxor ($t0,$t1);
585
- &psrlq ($t1,8-1);
586
- &pxor ($t0,$t2);
587
- &psllq ($t2,8-1);
588
- &pxor ($t0,$t1);
589
- &movdqa ($t1,@X[7]);
590
- &pxor ($t0,$t2); # sigma0(X[1..2])
591
- &movdqa ($t2,@X[7]);
592
- &psrlq ($t1,6);
593
- &paddq (@X[0],$t0); # X[0..1] += sigma0(X[1..2])
594
-
595
- &movdqa ($t0,@X[7]);
596
- &psrlq ($t2,19);
597
- &psllq ($t0,64-61);
598
- &pxor ($t1,$t2);
599
- &psrlq ($t2,61-19);
600
- &pxor ($t1,$t0);
601
- &psllq ($t0,61-19);
602
- &pxor ($t1,$t2);
603
- &movdqa ($t2,&QWP(16*(($j+2)%4),$frame));# pre-restore @X[1]
604
- &pxor ($t1,$t0); # sigma0(X[1..2])
605
- &movdqa ($t0,&QWP(16*($j%8),$K512));
606
- eval(shift(@insns));
607
- &paddq (@X[0],$t1); # X[0..1] += sigma0(X[14..15])
608
- eval(shift(@insns));
609
- eval(shift(@insns));
610
- eval(shift(@insns));
611
- eval(shift(@insns));
612
- &paddq ($t0,@X[0]);
613
- foreach(@insns) { eval; }
614
- &movdqa (&QWP(16*($j%8)-128,$frame),$t0);# xfer X[i]+K[i]
615
-
616
- push(@X,shift(@X)); # rotate(@X)
617
- }
618
- &lea ($K512,&DWP(16*8,$K512));
619
- &dec ("ecx");
620
- &jnz (&label("00_47_ssse3"));
621
-
622
- &movdqa (@X[1],&QWP(0,$K512)); # byte swap mask
623
- &lea ($K512,&DWP(-80*8,$K512)); # rewind
624
- &movdqu (@X[0],&QWP(0,"ebx"));
625
- &pshufb (@X[0],@X[1]);
626
-
627
- for ($j=0;$j<8;$j++) { # load next or same block
628
- my @insns = (&BODY_00_15_ssse3(),&BODY_00_15_ssse3());
629
-
630
- &movdqa (&QWP(16*(($j-1)%4),$frame),@X[3]) if ($j>4); # off-load
631
- &movdqa (@X[3],&QWP(16*($j%8),$K512));
632
- &movdqa (@X[2],@X[1]) if ($j<7); # perpetuate byte swap mask
633
- &movdqu (@X[1],&QWP(16*($j+1),"ebx")) if ($j<7); # next input
634
- &movdqa (@X[1],&QWP(16*(($j+1)%4),$frame)) if ($j==7);# restore @X[0]
635
- &paddq (@X[3],@X[0]);
636
- &pshufb (@X[1],@X[2]) if ($j<7);
637
- foreach(@insns) { eval; }
638
- &movdqa (&QWP(16*($j%8)-128,$frame),@X[3]);# xfer X[i]+K[i]
639
-
640
- push(@X,shift(@X)); # rotate(@X)
641
- }
642
-
643
- #&movq ($A,$Asse2); # load A-H
644
- &movq ("mm1",$Bsse2);
645
- &paddq ($A,"mm3"); # from BODY_00_15
646
- #&movq ($BxC,$Csse2);
647
- &movq ("mm3",$Dsse2);
648
- #&movq ($E,$Esse2);
649
- #&movq ("mm5",$Fsse2);
650
- #&movq ("mm6",$Gsse2);
651
- &movq ("mm7",$Hsse2);
652
-
653
- &pxor ($BxC,"mm1"); # de-magic
654
- &paddq ($A,&QWP(0,"esi"));
655
- &paddq ("mm1",&QWP(8,"esi"));
656
- &paddq ($BxC,&QWP(16,"esi"));
657
- &paddq ("mm3",&QWP(24,"esi"));
658
- &paddq ($E,&QWP(32,"esi"));
659
- &paddq ("mm5",&QWP(40,"esi"));
660
- &paddq ("mm6",&QWP(48,"esi"));
661
- &paddq ("mm7",&QWP(56,"esi"));
662
-
663
- &movq (&QWP(0,"esi"),$A);
664
- &movq (&QWP(8,"esi"),"mm1");
665
- &movq (&QWP(16,"esi"),$BxC);
666
- &movq (&QWP(24,"esi"),"mm3");
667
- &movq (&QWP(32,"esi"),$E);
668
- &movq (&QWP(40,"esi"),"mm5");
669
- &movq (&QWP(48,"esi"),"mm6");
670
- &movq (&QWP(56,"esi"),"mm7");
671
-
672
- &cmp ("edi","eax") # are we done yet?
673
- &jb (&label("loop_ssse3"));
674
-
675
- &mov ("esp",&DWP(64+12,$frame)); # restore sp
676
- &emms ();
677
- }
678
- &function_end_A();
679
- }
680
- &set_label("loop_x86",16);
681
- # copy input block to stack reversing byte and qword order
682
- for ($i=0;$i<8;$i++) {
683
- &mov ("eax",&DWP($i*16+0,"edi"));
684
- &mov ("ebx",&DWP($i*16+4,"edi"));
685
- &mov ("ecx",&DWP($i*16+8,"edi"));
686
- &mov ("edx",&DWP($i*16+12,"edi"));
687
- &bswap ("eax");
688
- &bswap ("ebx");
689
- &bswap ("ecx");
690
- &bswap ("edx");
691
- &push ("eax");
692
- &push ("ebx");
693
- &push ("ecx");
694
- &push ("edx");
695
- }
696
- &add ("edi",128);
697
- &sub ("esp",9*8); # place for T,A,B,C,D,E,F,G,H
698
- &mov (&DWP(8*(9+16)+4,"esp"),"edi");
699
-
700
- # copy ctx->h[0-7] to A,B,C,D,E,F,G,H on stack
701
- &lea ("edi",&DWP(8,"esp"));
702
- &mov ("ecx",16);
703
- &data_word(0xA5F3F689); # rep movsd
704
-
705
- &set_label("00_15_x86",16);
706
- &BODY_00_15_x86();
707
-
708
- &cmp (&LB("edx"),0x94);
709
- &jne (&label("00_15_x86"));
710
-
711
- &set_label("16_79_x86",16);
712
- #define sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
713
- # LO lo>>1^hi<<31 ^ lo>>8^hi<<24 ^ lo>>7^hi<<25
714
- # HI hi>>1^lo<<31 ^ hi>>8^lo<<24 ^ hi>>7
715
- &mov ("ecx",&DWP(8*(9+15+16-1)+0,"esp"));
716
- &mov ("edx",&DWP(8*(9+15+16-1)+4,"esp"));
717
- &mov ("esi","ecx");
718
-
719
- &shr ("ecx",1); # lo>>1
720
- &mov ("edi","edx");
721
- &shr ("edx",1); # hi>>1
722
- &mov ("eax","ecx");
723
- &shl ("esi",24); # lo<<24
724
- &mov ("ebx","edx");
725
- &shl ("edi",24); # hi<<24
726
- &xor ("ebx","esi");
727
-
728
- &shr ("ecx",7-1); # lo>>7
729
- &xor ("eax","edi");
730
- &shr ("edx",7-1); # hi>>7
731
- &xor ("eax","ecx");
732
- &shl ("esi",31-24); # lo<<31
733
- &xor ("ebx","edx");
734
- &shl ("edi",25-24); # hi<<25
735
- &xor ("ebx","esi");
736
-
737
- &shr ("ecx",8-7); # lo>>8
738
- &xor ("eax","edi");
739
- &shr ("edx",8-7); # hi>>8
740
- &xor ("eax","ecx");
741
- &shl ("edi",31-25); # hi<<31
742
- &xor ("ebx","edx");
743
- &xor ("eax","edi"); # T1 = sigma0(X[-15])
744
-
745
- &mov (&DWP(0,"esp"),"eax");
746
- &mov (&DWP(4,"esp"),"ebx"); # put T1 away
747
-
748
- #define sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
749
- # LO lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26
750
- # HI hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6
751
- &mov ("ecx",&DWP(8*(9+15+16-14)+0,"esp"));
752
- &mov ("edx",&DWP(8*(9+15+16-14)+4,"esp"));
753
- &mov ("esi","ecx");
754
-
755
- &shr ("ecx",6); # lo>>6
756
- &mov ("edi","edx");
757
- &shr ("edx",6); # hi>>6
758
- &mov ("eax","ecx");
759
- &shl ("esi",3); # lo<<3
760
- &mov ("ebx","edx");
761
- &shl ("edi",3); # hi<<3
762
- &xor ("eax","esi");
763
-
764
- &shr ("ecx",19-6); # lo>>19
765
- &xor ("ebx","edi");
766
- &shr ("edx",19-6); # hi>>19
767
- &xor ("eax","ecx");
768
- &shl ("esi",13-3); # lo<<13
769
- &xor ("ebx","edx");
770
- &shl ("edi",13-3); # hi<<13
771
- &xor ("ebx","esi");
772
-
773
- &shr ("ecx",29-19); # lo>>29
774
- &xor ("eax","edi");
775
- &shr ("edx",29-19); # hi>>29
776
- &xor ("ebx","ecx");
777
- &shl ("edi",26-13); # hi<<26
778
- &xor ("eax","edx");
779
- &xor ("eax","edi"); # sigma1(X[-2])
780
-
781
- &mov ("ecx",&DWP(8*(9+15+16)+0,"esp"));
782
- &mov ("edx",&DWP(8*(9+15+16)+4,"esp"));
783
- &add ("eax",&DWP(0,"esp"));
784
- &adc ("ebx",&DWP(4,"esp")); # T1 = sigma1(X[-2])+T1
785
- &mov ("esi",&DWP(8*(9+15+16-9)+0,"esp"));
786
- &mov ("edi",&DWP(8*(9+15+16-9)+4,"esp"));
787
- &add ("eax","ecx");
788
- &adc ("ebx","edx"); # T1 += X[-16]
789
- &add ("eax","esi");
790
- &adc ("ebx","edi"); # T1 += X[-7]
791
- &mov (&DWP(8*(9+15)+0,"esp"),"eax");
792
- &mov (&DWP(8*(9+15)+4,"esp"),"ebx"); # save X[0]
793
-
794
- &BODY_00_15_x86();
795
-
796
- &cmp (&LB("edx"),0x17);
797
- &jne (&label("16_79_x86"));
798
-
799
- &mov ("esi",&DWP(8*(9+16+80)+0,"esp"));# ctx
800
- &mov ("edi",&DWP(8*(9+16+80)+4,"esp"));# inp
801
- for($i=0;$i<4;$i++) {
802
- &mov ("eax",&DWP($i*16+0,"esi"));
803
- &mov ("ebx",&DWP($i*16+4,"esi"));
804
- &mov ("ecx",&DWP($i*16+8,"esi"));
805
- &mov ("edx",&DWP($i*16+12,"esi"));
806
- &add ("eax",&DWP(8+($i*16)+0,"esp"));
807
- &adc ("ebx",&DWP(8+($i*16)+4,"esp"));
808
- &mov (&DWP($i*16+0,"esi"),"eax");
809
- &mov (&DWP($i*16+4,"esi"),"ebx");
810
- &add ("ecx",&DWP(8+($i*16)+8,"esp"));
811
- &adc ("edx",&DWP(8+($i*16)+12,"esp"));
812
- &mov (&DWP($i*16+8,"esi"),"ecx");
813
- &mov (&DWP($i*16+12,"esi"),"edx");
814
- }
815
- &add ("esp",8*(9+16+80)); # destroy frame
816
- &sub ($K512,8*80); # rewind K
817
-
818
- &cmp ("edi",&DWP(8,"esp")); # are we done yet?
819
- &jb (&label("loop_x86"));
820
-
821
- &mov ("esp",&DWP(12,"esp")); # restore sp
822
- &function_end_A();
823
-
824
- &set_label("K512",64); # Yes! I keep it in the code segment!
825
- &data_word(0xd728ae22,0x428a2f98); # u64
826
- &data_word(0x23ef65cd,0x71374491); # u64
827
- &data_word(0xec4d3b2f,0xb5c0fbcf); # u64
828
- &data_word(0x8189dbbc,0xe9b5dba5); # u64
829
- &data_word(0xf348b538,0x3956c25b); # u64
830
- &data_word(0xb605d019,0x59f111f1); # u64
831
- &data_word(0xaf194f9b,0x923f82a4); # u64
832
- &data_word(0xda6d8118,0xab1c5ed5); # u64
833
- &data_word(0xa3030242,0xd807aa98); # u64
834
- &data_word(0x45706fbe,0x12835b01); # u64
835
- &data_word(0x4ee4b28c,0x243185be); # u64
836
- &data_word(0xd5ffb4e2,0x550c7dc3); # u64
837
- &data_word(0xf27b896f,0x72be5d74); # u64
838
- &data_word(0x3b1696b1,0x80deb1fe); # u64
839
- &data_word(0x25c71235,0x9bdc06a7); # u64
840
- &data_word(0xcf692694,0xc19bf174); # u64
841
- &data_word(0x9ef14ad2,0xe49b69c1); # u64
842
- &data_word(0x384f25e3,0xefbe4786); # u64
843
- &data_word(0x8b8cd5b5,0x0fc19dc6); # u64
844
- &data_word(0x77ac9c65,0x240ca1cc); # u64
845
- &data_word(0x592b0275,0x2de92c6f); # u64
846
- &data_word(0x6ea6e483,0x4a7484aa); # u64
847
- &data_word(0xbd41fbd4,0x5cb0a9dc); # u64
848
- &data_word(0x831153b5,0x76f988da); # u64
849
- &data_word(0xee66dfab,0x983e5152); # u64
850
- &data_word(0x2db43210,0xa831c66d); # u64
851
- &data_word(0x98fb213f,0xb00327c8); # u64
852
- &data_word(0xbeef0ee4,0xbf597fc7); # u64
853
- &data_word(0x3da88fc2,0xc6e00bf3); # u64
854
- &data_word(0x930aa725,0xd5a79147); # u64
855
- &data_word(0xe003826f,0x06ca6351); # u64
856
- &data_word(0x0a0e6e70,0x14292967); # u64
857
- &data_word(0x46d22ffc,0x27b70a85); # u64
858
- &data_word(0x5c26c926,0x2e1b2138); # u64
859
- &data_word(0x5ac42aed,0x4d2c6dfc); # u64
860
- &data_word(0x9d95b3df,0x53380d13); # u64
861
- &data_word(0x8baf63de,0x650a7354); # u64
862
- &data_word(0x3c77b2a8,0x766a0abb); # u64
863
- &data_word(0x47edaee6,0x81c2c92e); # u64
864
- &data_word(0x1482353b,0x92722c85); # u64
865
- &data_word(0x4cf10364,0xa2bfe8a1); # u64
866
- &data_word(0xbc423001,0xa81a664b); # u64
867
- &data_word(0xd0f89791,0xc24b8b70); # u64
868
- &data_word(0x0654be30,0xc76c51a3); # u64
869
- &data_word(0xd6ef5218,0xd192e819); # u64
870
- &data_word(0x5565a910,0xd6990624); # u64
871
- &data_word(0x5771202a,0xf40e3585); # u64
872
- &data_word(0x32bbd1b8,0x106aa070); # u64
873
- &data_word(0xb8d2d0c8,0x19a4c116); # u64
874
- &data_word(0x5141ab53,0x1e376c08); # u64
875
- &data_word(0xdf8eeb99,0x2748774c); # u64
876
- &data_word(0xe19b48a8,0x34b0bcb5); # u64
877
- &data_word(0xc5c95a63,0x391c0cb3); # u64
878
- &data_word(0xe3418acb,0x4ed8aa4a); # u64
879
- &data_word(0x7763e373,0x5b9cca4f); # u64
880
- &data_word(0xd6b2b8a3,0x682e6ff3); # u64
881
- &data_word(0x5defb2fc,0x748f82ee); # u64
882
- &data_word(0x43172f60,0x78a5636f); # u64
883
- &data_word(0xa1f0ab72,0x84c87814); # u64
884
- &data_word(0x1a6439ec,0x8cc70208); # u64
885
- &data_word(0x23631e28,0x90befffa); # u64
886
- &data_word(0xde82bde9,0xa4506ceb); # u64
887
- &data_word(0xb2c67915,0xbef9a3f7); # u64
888
- &data_word(0xe372532b,0xc67178f2); # u64
889
- &data_word(0xea26619c,0xca273ece); # u64
890
- &data_word(0x21c0c207,0xd186b8c7); # u64
891
- &data_word(0xcde0eb1e,0xeada7dd6); # u64
892
- &data_word(0xee6ed178,0xf57d4f7f); # u64
893
- &data_word(0x72176fba,0x06f067aa); # u64
894
- &data_word(0xa2c898a6,0x0a637dc5); # u64
895
- &data_word(0xbef90dae,0x113f9804); # u64
896
- &data_word(0x131c471b,0x1b710b35); # u64
897
- &data_word(0x23047d84,0x28db77f5); # u64
898
- &data_word(0x40c72493,0x32caab7b); # u64
899
- &data_word(0x15c9bebc,0x3c9ebe0a); # u64
900
- &data_word(0x9c100d4c,0x431d67c4); # u64
901
- &data_word(0xcb3e42b6,0x4cc5d4be); # u64
902
- &data_word(0xfc657e2a,0x597f299c); # u64
903
- &data_word(0x3ad6faec,0x5fcb6fab); # u64
904
- &data_word(0x4a475817,0x6c44198c); # u64
905
-
906
- &data_word(0x04050607,0x00010203); # byte swap
907
- &data_word(0x0c0d0e0f,0x08090a0b); # mask
908
- &function_end_B("sha512_block_data_order");
909
- &asciz("SHA512 block transform for x86, CRYPTOGAMS by <appro\@openssl.org>");
910
-
911
- &asm_finish();