ring-native 0.0.0 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (267) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/CHANGES.md +7 -0
  4. data/Makefile +5 -0
  5. data/README.md +12 -5
  6. data/Rakefile +4 -0
  7. data/ext/ring/extconf.rb +4 -5
  8. data/lib/ring/native.rb +3 -1
  9. data/lib/ring/native/version.rb +5 -1
  10. data/ring-native.gemspec +6 -6
  11. data/vendor/ring-ffi/Cargo.lock +26 -0
  12. data/vendor/ring-ffi/Cargo.toml +45 -0
  13. data/vendor/ring-ffi/LICENSE +16 -0
  14. data/vendor/ring-ffi/README.md +59 -0
  15. data/vendor/ring-ffi/src/lib.rs +79 -0
  16. metadata +10 -255
  17. data/vendor/ring/BUILDING.md +0 -40
  18. data/vendor/ring/Cargo.toml +0 -43
  19. data/vendor/ring/LICENSE +0 -185
  20. data/vendor/ring/Makefile +0 -35
  21. data/vendor/ring/PORTING.md +0 -163
  22. data/vendor/ring/README.md +0 -113
  23. data/vendor/ring/STYLE.md +0 -197
  24. data/vendor/ring/appveyor.yml +0 -27
  25. data/vendor/ring/build.rs +0 -108
  26. data/vendor/ring/crypto/aes/aes.c +0 -1142
  27. data/vendor/ring/crypto/aes/aes_test.Windows.vcxproj +0 -25
  28. data/vendor/ring/crypto/aes/aes_test.cc +0 -93
  29. data/vendor/ring/crypto/aes/asm/aes-586.pl +0 -2368
  30. data/vendor/ring/crypto/aes/asm/aes-armv4.pl +0 -1249
  31. data/vendor/ring/crypto/aes/asm/aes-x86_64.pl +0 -2246
  32. data/vendor/ring/crypto/aes/asm/aesni-x86.pl +0 -1318
  33. data/vendor/ring/crypto/aes/asm/aesni-x86_64.pl +0 -2084
  34. data/vendor/ring/crypto/aes/asm/aesv8-armx.pl +0 -675
  35. data/vendor/ring/crypto/aes/asm/bsaes-armv7.pl +0 -1364
  36. data/vendor/ring/crypto/aes/asm/bsaes-x86_64.pl +0 -1565
  37. data/vendor/ring/crypto/aes/asm/vpaes-x86.pl +0 -841
  38. data/vendor/ring/crypto/aes/asm/vpaes-x86_64.pl +0 -1116
  39. data/vendor/ring/crypto/aes/internal.h +0 -87
  40. data/vendor/ring/crypto/aes/mode_wrappers.c +0 -61
  41. data/vendor/ring/crypto/bn/add.c +0 -394
  42. data/vendor/ring/crypto/bn/asm/armv4-mont.pl +0 -694
  43. data/vendor/ring/crypto/bn/asm/armv8-mont.pl +0 -1503
  44. data/vendor/ring/crypto/bn/asm/bn-586.pl +0 -774
  45. data/vendor/ring/crypto/bn/asm/co-586.pl +0 -287
  46. data/vendor/ring/crypto/bn/asm/rsaz-avx2.pl +0 -1882
  47. data/vendor/ring/crypto/bn/asm/x86-mont.pl +0 -592
  48. data/vendor/ring/crypto/bn/asm/x86_64-gcc.c +0 -599
  49. data/vendor/ring/crypto/bn/asm/x86_64-mont.pl +0 -1393
  50. data/vendor/ring/crypto/bn/asm/x86_64-mont5.pl +0 -3507
  51. data/vendor/ring/crypto/bn/bn.c +0 -352
  52. data/vendor/ring/crypto/bn/bn_asn1.c +0 -74
  53. data/vendor/ring/crypto/bn/bn_test.Windows.vcxproj +0 -25
  54. data/vendor/ring/crypto/bn/bn_test.cc +0 -1696
  55. data/vendor/ring/crypto/bn/cmp.c +0 -200
  56. data/vendor/ring/crypto/bn/convert.c +0 -433
  57. data/vendor/ring/crypto/bn/ctx.c +0 -311
  58. data/vendor/ring/crypto/bn/div.c +0 -594
  59. data/vendor/ring/crypto/bn/exponentiation.c +0 -1335
  60. data/vendor/ring/crypto/bn/gcd.c +0 -711
  61. data/vendor/ring/crypto/bn/generic.c +0 -1019
  62. data/vendor/ring/crypto/bn/internal.h +0 -316
  63. data/vendor/ring/crypto/bn/montgomery.c +0 -516
  64. data/vendor/ring/crypto/bn/mul.c +0 -888
  65. data/vendor/ring/crypto/bn/prime.c +0 -829
  66. data/vendor/ring/crypto/bn/random.c +0 -334
  67. data/vendor/ring/crypto/bn/rsaz_exp.c +0 -262
  68. data/vendor/ring/crypto/bn/rsaz_exp.h +0 -53
  69. data/vendor/ring/crypto/bn/shift.c +0 -276
  70. data/vendor/ring/crypto/bytestring/bytestring_test.Windows.vcxproj +0 -25
  71. data/vendor/ring/crypto/bytestring/bytestring_test.cc +0 -421
  72. data/vendor/ring/crypto/bytestring/cbb.c +0 -399
  73. data/vendor/ring/crypto/bytestring/cbs.c +0 -227
  74. data/vendor/ring/crypto/bytestring/internal.h +0 -46
  75. data/vendor/ring/crypto/chacha/chacha_generic.c +0 -140
  76. data/vendor/ring/crypto/chacha/chacha_vec.c +0 -323
  77. data/vendor/ring/crypto/chacha/chacha_vec_arm.S +0 -1447
  78. data/vendor/ring/crypto/chacha/chacha_vec_arm_generate.go +0 -153
  79. data/vendor/ring/crypto/cipher/cipher_test.Windows.vcxproj +0 -25
  80. data/vendor/ring/crypto/cipher/e_aes.c +0 -390
  81. data/vendor/ring/crypto/cipher/e_chacha20poly1305.c +0 -208
  82. data/vendor/ring/crypto/cipher/internal.h +0 -173
  83. data/vendor/ring/crypto/cipher/test/aes_128_gcm_tests.txt +0 -543
  84. data/vendor/ring/crypto/cipher/test/aes_128_key_wrap_tests.txt +0 -9
  85. data/vendor/ring/crypto/cipher/test/aes_256_gcm_tests.txt +0 -475
  86. data/vendor/ring/crypto/cipher/test/aes_256_key_wrap_tests.txt +0 -23
  87. data/vendor/ring/crypto/cipher/test/chacha20_poly1305_old_tests.txt +0 -422
  88. data/vendor/ring/crypto/cipher/test/chacha20_poly1305_tests.txt +0 -484
  89. data/vendor/ring/crypto/cipher/test/cipher_test.txt +0 -100
  90. data/vendor/ring/crypto/constant_time_test.Windows.vcxproj +0 -25
  91. data/vendor/ring/crypto/constant_time_test.c +0 -304
  92. data/vendor/ring/crypto/cpu-arm-asm.S +0 -32
  93. data/vendor/ring/crypto/cpu-arm.c +0 -199
  94. data/vendor/ring/crypto/cpu-intel.c +0 -261
  95. data/vendor/ring/crypto/crypto.c +0 -151
  96. data/vendor/ring/crypto/curve25519/asm/x25519-arm.S +0 -2118
  97. data/vendor/ring/crypto/curve25519/curve25519.c +0 -4888
  98. data/vendor/ring/crypto/curve25519/x25519_test.cc +0 -128
  99. data/vendor/ring/crypto/digest/md32_common.h +0 -181
  100. data/vendor/ring/crypto/ec/asm/p256-x86_64-asm.pl +0 -2725
  101. data/vendor/ring/crypto/ec/ec.c +0 -193
  102. data/vendor/ring/crypto/ec/ec_curves.c +0 -61
  103. data/vendor/ring/crypto/ec/ec_key.c +0 -228
  104. data/vendor/ring/crypto/ec/ec_montgomery.c +0 -114
  105. data/vendor/ring/crypto/ec/example_mul.Windows.vcxproj +0 -25
  106. data/vendor/ring/crypto/ec/internal.h +0 -243
  107. data/vendor/ring/crypto/ec/oct.c +0 -253
  108. data/vendor/ring/crypto/ec/p256-64.c +0 -1794
  109. data/vendor/ring/crypto/ec/p256-x86_64-table.h +0 -9548
  110. data/vendor/ring/crypto/ec/p256-x86_64.c +0 -509
  111. data/vendor/ring/crypto/ec/simple.c +0 -1007
  112. data/vendor/ring/crypto/ec/util-64.c +0 -183
  113. data/vendor/ring/crypto/ec/wnaf.c +0 -508
  114. data/vendor/ring/crypto/ecdh/ecdh.c +0 -155
  115. data/vendor/ring/crypto/ecdsa/ecdsa.c +0 -304
  116. data/vendor/ring/crypto/ecdsa/ecdsa_asn1.c +0 -193
  117. data/vendor/ring/crypto/ecdsa/ecdsa_test.Windows.vcxproj +0 -25
  118. data/vendor/ring/crypto/ecdsa/ecdsa_test.cc +0 -327
  119. data/vendor/ring/crypto/header_removed.h +0 -17
  120. data/vendor/ring/crypto/internal.h +0 -495
  121. data/vendor/ring/crypto/libring.Windows.vcxproj +0 -101
  122. data/vendor/ring/crypto/mem.c +0 -98
  123. data/vendor/ring/crypto/modes/asm/aesni-gcm-x86_64.pl +0 -1045
  124. data/vendor/ring/crypto/modes/asm/ghash-armv4.pl +0 -517
  125. data/vendor/ring/crypto/modes/asm/ghash-x86.pl +0 -1393
  126. data/vendor/ring/crypto/modes/asm/ghash-x86_64.pl +0 -1741
  127. data/vendor/ring/crypto/modes/asm/ghashv8-armx.pl +0 -422
  128. data/vendor/ring/crypto/modes/ctr.c +0 -226
  129. data/vendor/ring/crypto/modes/gcm.c +0 -1206
  130. data/vendor/ring/crypto/modes/gcm_test.Windows.vcxproj +0 -25
  131. data/vendor/ring/crypto/modes/gcm_test.c +0 -348
  132. data/vendor/ring/crypto/modes/internal.h +0 -299
  133. data/vendor/ring/crypto/perlasm/arm-xlate.pl +0 -170
  134. data/vendor/ring/crypto/perlasm/readme +0 -100
  135. data/vendor/ring/crypto/perlasm/x86_64-xlate.pl +0 -1164
  136. data/vendor/ring/crypto/perlasm/x86asm.pl +0 -292
  137. data/vendor/ring/crypto/perlasm/x86gas.pl +0 -263
  138. data/vendor/ring/crypto/perlasm/x86masm.pl +0 -200
  139. data/vendor/ring/crypto/perlasm/x86nasm.pl +0 -187
  140. data/vendor/ring/crypto/poly1305/poly1305.c +0 -331
  141. data/vendor/ring/crypto/poly1305/poly1305_arm.c +0 -301
  142. data/vendor/ring/crypto/poly1305/poly1305_arm_asm.S +0 -2015
  143. data/vendor/ring/crypto/poly1305/poly1305_test.Windows.vcxproj +0 -25
  144. data/vendor/ring/crypto/poly1305/poly1305_test.cc +0 -80
  145. data/vendor/ring/crypto/poly1305/poly1305_test.txt +0 -52
  146. data/vendor/ring/crypto/poly1305/poly1305_vec.c +0 -892
  147. data/vendor/ring/crypto/rand/asm/rdrand-x86_64.pl +0 -75
  148. data/vendor/ring/crypto/rand/internal.h +0 -32
  149. data/vendor/ring/crypto/rand/rand.c +0 -189
  150. data/vendor/ring/crypto/rand/urandom.c +0 -219
  151. data/vendor/ring/crypto/rand/windows.c +0 -56
  152. data/vendor/ring/crypto/refcount_c11.c +0 -66
  153. data/vendor/ring/crypto/refcount_lock.c +0 -53
  154. data/vendor/ring/crypto/refcount_test.Windows.vcxproj +0 -25
  155. data/vendor/ring/crypto/refcount_test.c +0 -58
  156. data/vendor/ring/crypto/rsa/blinding.c +0 -462
  157. data/vendor/ring/crypto/rsa/internal.h +0 -108
  158. data/vendor/ring/crypto/rsa/padding.c +0 -300
  159. data/vendor/ring/crypto/rsa/rsa.c +0 -450
  160. data/vendor/ring/crypto/rsa/rsa_asn1.c +0 -261
  161. data/vendor/ring/crypto/rsa/rsa_impl.c +0 -944
  162. data/vendor/ring/crypto/rsa/rsa_test.Windows.vcxproj +0 -25
  163. data/vendor/ring/crypto/rsa/rsa_test.cc +0 -437
  164. data/vendor/ring/crypto/sha/asm/sha-armv8.pl +0 -436
  165. data/vendor/ring/crypto/sha/asm/sha-x86_64.pl +0 -2390
  166. data/vendor/ring/crypto/sha/asm/sha256-586.pl +0 -1275
  167. data/vendor/ring/crypto/sha/asm/sha256-armv4.pl +0 -735
  168. data/vendor/ring/crypto/sha/asm/sha256-armv8.pl +0 -14
  169. data/vendor/ring/crypto/sha/asm/sha256-x86_64.pl +0 -14
  170. data/vendor/ring/crypto/sha/asm/sha512-586.pl +0 -911
  171. data/vendor/ring/crypto/sha/asm/sha512-armv4.pl +0 -666
  172. data/vendor/ring/crypto/sha/asm/sha512-armv8.pl +0 -14
  173. data/vendor/ring/crypto/sha/asm/sha512-x86_64.pl +0 -14
  174. data/vendor/ring/crypto/sha/sha1.c +0 -271
  175. data/vendor/ring/crypto/sha/sha256.c +0 -204
  176. data/vendor/ring/crypto/sha/sha512.c +0 -355
  177. data/vendor/ring/crypto/test/file_test.cc +0 -326
  178. data/vendor/ring/crypto/test/file_test.h +0 -181
  179. data/vendor/ring/crypto/test/malloc.cc +0 -150
  180. data/vendor/ring/crypto/test/scoped_types.h +0 -95
  181. data/vendor/ring/crypto/test/test.Windows.vcxproj +0 -35
  182. data/vendor/ring/crypto/test/test_util.cc +0 -46
  183. data/vendor/ring/crypto/test/test_util.h +0 -41
  184. data/vendor/ring/crypto/thread_none.c +0 -55
  185. data/vendor/ring/crypto/thread_pthread.c +0 -165
  186. data/vendor/ring/crypto/thread_test.Windows.vcxproj +0 -25
  187. data/vendor/ring/crypto/thread_test.c +0 -200
  188. data/vendor/ring/crypto/thread_win.c +0 -282
  189. data/vendor/ring/examples/checkdigest.rs +0 -103
  190. data/vendor/ring/include/openssl/aes.h +0 -121
  191. data/vendor/ring/include/openssl/arm_arch.h +0 -129
  192. data/vendor/ring/include/openssl/base.h +0 -156
  193. data/vendor/ring/include/openssl/bn.h +0 -794
  194. data/vendor/ring/include/openssl/buffer.h +0 -18
  195. data/vendor/ring/include/openssl/bytestring.h +0 -235
  196. data/vendor/ring/include/openssl/chacha.h +0 -37
  197. data/vendor/ring/include/openssl/cmac.h +0 -76
  198. data/vendor/ring/include/openssl/cpu.h +0 -184
  199. data/vendor/ring/include/openssl/crypto.h +0 -43
  200. data/vendor/ring/include/openssl/curve25519.h +0 -88
  201. data/vendor/ring/include/openssl/ec.h +0 -225
  202. data/vendor/ring/include/openssl/ec_key.h +0 -129
  203. data/vendor/ring/include/openssl/ecdh.h +0 -110
  204. data/vendor/ring/include/openssl/ecdsa.h +0 -156
  205. data/vendor/ring/include/openssl/err.h +0 -201
  206. data/vendor/ring/include/openssl/mem.h +0 -101
  207. data/vendor/ring/include/openssl/obj_mac.h +0 -71
  208. data/vendor/ring/include/openssl/opensslfeatures.h +0 -68
  209. data/vendor/ring/include/openssl/opensslv.h +0 -18
  210. data/vendor/ring/include/openssl/ossl_typ.h +0 -18
  211. data/vendor/ring/include/openssl/poly1305.h +0 -51
  212. data/vendor/ring/include/openssl/rand.h +0 -70
  213. data/vendor/ring/include/openssl/rsa.h +0 -399
  214. data/vendor/ring/include/openssl/thread.h +0 -133
  215. data/vendor/ring/include/openssl/type_check.h +0 -71
  216. data/vendor/ring/mk/Common.props +0 -63
  217. data/vendor/ring/mk/Windows.props +0 -42
  218. data/vendor/ring/mk/WindowsTest.props +0 -18
  219. data/vendor/ring/mk/appveyor.bat +0 -62
  220. data/vendor/ring/mk/bottom_of_makefile.mk +0 -54
  221. data/vendor/ring/mk/ring.mk +0 -266
  222. data/vendor/ring/mk/top_of_makefile.mk +0 -214
  223. data/vendor/ring/mk/travis.sh +0 -40
  224. data/vendor/ring/mk/update-travis-yml.py +0 -229
  225. data/vendor/ring/ring.sln +0 -153
  226. data/vendor/ring/src/aead.rs +0 -682
  227. data/vendor/ring/src/agreement.rs +0 -248
  228. data/vendor/ring/src/c.rs +0 -129
  229. data/vendor/ring/src/constant_time.rs +0 -37
  230. data/vendor/ring/src/der.rs +0 -96
  231. data/vendor/ring/src/digest.rs +0 -690
  232. data/vendor/ring/src/digest_tests.txt +0 -57
  233. data/vendor/ring/src/ecc.rs +0 -28
  234. data/vendor/ring/src/ecc_build.rs +0 -279
  235. data/vendor/ring/src/ecc_curves.rs +0 -117
  236. data/vendor/ring/src/ed25519_tests.txt +0 -2579
  237. data/vendor/ring/src/exe_tests.rs +0 -46
  238. data/vendor/ring/src/ffi.rs +0 -29
  239. data/vendor/ring/src/file_test.rs +0 -187
  240. data/vendor/ring/src/hkdf.rs +0 -153
  241. data/vendor/ring/src/hkdf_tests.txt +0 -59
  242. data/vendor/ring/src/hmac.rs +0 -414
  243. data/vendor/ring/src/hmac_tests.txt +0 -97
  244. data/vendor/ring/src/input.rs +0 -312
  245. data/vendor/ring/src/lib.rs +0 -41
  246. data/vendor/ring/src/pbkdf2.rs +0 -265
  247. data/vendor/ring/src/pbkdf2_tests.txt +0 -113
  248. data/vendor/ring/src/polyfill.rs +0 -57
  249. data/vendor/ring/src/rand.rs +0 -28
  250. data/vendor/ring/src/signature.rs +0 -314
  251. data/vendor/ring/third-party/NIST/README.md +0 -9
  252. data/vendor/ring/third-party/NIST/SHAVS/SHA1LongMsg.rsp +0 -263
  253. data/vendor/ring/third-party/NIST/SHAVS/SHA1Monte.rsp +0 -309
  254. data/vendor/ring/third-party/NIST/SHAVS/SHA1ShortMsg.rsp +0 -267
  255. data/vendor/ring/third-party/NIST/SHAVS/SHA224LongMsg.rsp +0 -263
  256. data/vendor/ring/third-party/NIST/SHAVS/SHA224Monte.rsp +0 -309
  257. data/vendor/ring/third-party/NIST/SHAVS/SHA224ShortMsg.rsp +0 -267
  258. data/vendor/ring/third-party/NIST/SHAVS/SHA256LongMsg.rsp +0 -263
  259. data/vendor/ring/third-party/NIST/SHAVS/SHA256Monte.rsp +0 -309
  260. data/vendor/ring/third-party/NIST/SHAVS/SHA256ShortMsg.rsp +0 -267
  261. data/vendor/ring/third-party/NIST/SHAVS/SHA384LongMsg.rsp +0 -519
  262. data/vendor/ring/third-party/NIST/SHAVS/SHA384Monte.rsp +0 -309
  263. data/vendor/ring/third-party/NIST/SHAVS/SHA384ShortMsg.rsp +0 -523
  264. data/vendor/ring/third-party/NIST/SHAVS/SHA512LongMsg.rsp +0 -519
  265. data/vendor/ring/third-party/NIST/SHAVS/SHA512Monte.rsp +0 -309
  266. data/vendor/ring/third-party/NIST/SHAVS/SHA512ShortMsg.rsp +0 -523
  267. data/vendor/ring/third-party/NIST/sha256sums.txt +0 -1
@@ -1,592 +0,0 @@
1
- #!/usr/bin/env perl
2
-
3
- # ====================================================================
4
- # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5
- # project. The module is, however, dual licensed under OpenSSL and
6
- # CRYPTOGAMS licenses depending on where you obtain it. For further
7
- # details see http://www.openssl.org/~appro/cryptogams/.
8
- # ====================================================================
9
-
10
- # October 2005
11
- #
12
- # This is a "teaser" code, as it can be improved in several ways...
13
- # First of all non-SSE2 path should be implemented (yes, for now it
14
- # performs Montgomery multiplication/convolution only on SSE2-capable
15
- # CPUs such as P4, others fall down to original code). Then inner loop
16
- # can be unrolled and modulo-scheduled to improve ILP and possibly
17
- # moved to 128-bit XMM register bank (though it would require input
18
- # rearrangement and/or increase bus bandwidth utilization). Dedicated
19
- # squaring procedure should give further performance improvement...
20
- # Yet, for being draft, the code improves rsa512 *sign* benchmark by
21
- # 110%(!), rsa1024 one - by 70% and rsa4096 - by 20%:-)
22
-
23
- # December 2006
24
- #
25
- # Modulo-scheduling SSE2 loops results in further 15-20% improvement.
26
- # Integer-only code [being equipped with dedicated squaring procedure]
27
- # gives ~40% on rsa512 sign benchmark...
28
-
29
- $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
30
- push(@INC,"${dir}","${dir}../../perlasm");
31
- require "x86asm.pl";
32
-
33
- &asm_init($ARGV[0],$0);
34
-
35
- $sse2=0;
36
- for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
37
-
38
- &external_label("OPENSSL_ia32cap_P") if ($sse2);
39
-
40
- &function_begin("bn_mul_mont");
41
-
42
- $i="edx";
43
- $j="ecx";
44
- $ap="esi"; $tp="esi"; # overlapping variables!!!
45
- $rp="edi"; $bp="edi"; # overlapping variables!!!
46
- $np="ebp";
47
- $num="ebx";
48
-
49
- $_num=&DWP(4*0,"esp"); # stack top layout
50
- $_rp=&DWP(4*1,"esp");
51
- $_ap=&DWP(4*2,"esp");
52
- $_bp=&DWP(4*3,"esp");
53
- $_np=&DWP(4*4,"esp");
54
- $_n0=&DWP(4*5,"esp"); $_n0q=&QWP(4*5,"esp");
55
- $_sp=&DWP(4*6,"esp");
56
- $_bpend=&DWP(4*7,"esp");
57
- $frame=32; # size of above frame rounded up to 16n
58
-
59
- &xor ("eax","eax");
60
- &mov ("edi",&wparam(5)); # int num
61
- &cmp ("edi",4);
62
- &jl (&label("just_leave"));
63
-
64
- &lea ("esi",&wparam(0)); # put aside pointer to argument block
65
- &lea ("edx",&wparam(1)); # load ap
66
- &mov ("ebp","esp"); # saved stack pointer!
67
- &add ("edi",2); # extra two words on top of tp
68
- &neg ("edi");
69
- &lea ("esp",&DWP(-$frame,"esp","edi",4)); # alloca($frame+4*(num+2))
70
- &neg ("edi");
71
-
72
- # minimize cache contention by arraning 2K window between stack
73
- # pointer and ap argument [np is also position sensitive vector,
74
- # but it's assumed to be near ap, as it's allocated at ~same
75
- # time].
76
- &mov ("eax","esp");
77
- &sub ("eax","edx");
78
- &and ("eax",2047);
79
- &sub ("esp","eax"); # this aligns sp and ap modulo 2048
80
-
81
- &xor ("edx","esp");
82
- &and ("edx",2048);
83
- &xor ("edx",2048);
84
- &sub ("esp","edx"); # this splits them apart modulo 4096
85
-
86
- &and ("esp",-64); # align to cache line
87
-
88
- ################################# load argument block...
89
- &mov ("eax",&DWP(0*4,"esi"));# BN_ULONG *rp
90
- &mov ("ebx",&DWP(1*4,"esi"));# const BN_ULONG *ap
91
- &mov ("ecx",&DWP(2*4,"esi"));# const BN_ULONG *bp
92
- &mov ("edx",&DWP(3*4,"esi"));# const BN_ULONG *np
93
- &mov ("esi",&DWP(4*4,"esi"));# const BN_ULONG *n0
94
- #&mov ("edi",&DWP(5*4,"esi"));# int num
95
-
96
- &mov ("esi",&DWP(0,"esi")); # pull n0[0]
97
- &mov ($_rp,"eax"); # ... save a copy of argument block
98
- &mov ($_ap,"ebx");
99
- &mov ($_bp,"ecx");
100
- &mov ($_np,"edx");
101
- &mov ($_n0,"esi");
102
- &lea ($num,&DWP(-3,"edi")); # num=num-1 to assist modulo-scheduling
103
- #&mov ($_num,$num); # redundant as $num is not reused
104
- &mov ($_sp,"ebp"); # saved stack pointer!
105
-
106
- if($sse2) {
107
- $acc0="mm0"; # mmx register bank layout
108
- $acc1="mm1";
109
- $car0="mm2";
110
- $car1="mm3";
111
- $mul0="mm4";
112
- $mul1="mm5";
113
- $temp="mm6";
114
- $mask="mm7";
115
-
116
- &picmeup("eax","OPENSSL_ia32cap_P");
117
- &bt (&DWP(0,"eax"),26);
118
- &jnc (&label("non_sse2"));
119
-
120
- &mov ("eax",-1);
121
- &movd ($mask,"eax"); # mask 32 lower bits
122
-
123
- &mov ($ap,$_ap); # load input pointers
124
- &mov ($bp,$_bp);
125
- &mov ($np,$_np);
126
-
127
- &xor ($i,$i); # i=0
128
- &xor ($j,$j); # j=0
129
-
130
- &movd ($mul0,&DWP(0,$bp)); # bp[0]
131
- &movd ($mul1,&DWP(0,$ap)); # ap[0]
132
- &movd ($car1,&DWP(0,$np)); # np[0]
133
-
134
- &pmuludq($mul1,$mul0); # ap[0]*bp[0]
135
- &movq ($car0,$mul1);
136
- &movq ($acc0,$mul1); # I wish movd worked for
137
- &pand ($acc0,$mask); # inter-register transfers
138
-
139
- &pmuludq($mul1,$_n0q); # *=n0
140
-
141
- &pmuludq($car1,$mul1); # "t[0]"*np[0]*n0
142
- &paddq ($car1,$acc0);
143
-
144
- &movd ($acc1,&DWP(4,$np)); # np[1]
145
- &movd ($acc0,&DWP(4,$ap)); # ap[1]
146
-
147
- &psrlq ($car0,32);
148
- &psrlq ($car1,32);
149
-
150
- &inc ($j); # j++
151
- &set_label("1st",16);
152
- &pmuludq($acc0,$mul0); # ap[j]*bp[0]
153
- &pmuludq($acc1,$mul1); # np[j]*m1
154
- &paddq ($car0,$acc0); # +=c0
155
- &paddq ($car1,$acc1); # +=c1
156
-
157
- &movq ($acc0,$car0);
158
- &pand ($acc0,$mask);
159
- &movd ($acc1,&DWP(4,$np,$j,4)); # np[j+1]
160
- &paddq ($car1,$acc0); # +=ap[j]*bp[0];
161
- &movd ($acc0,&DWP(4,$ap,$j,4)); # ap[j+1]
162
- &psrlq ($car0,32);
163
- &movd (&DWP($frame-4,"esp",$j,4),$car1); # tp[j-1]=
164
- &psrlq ($car1,32);
165
-
166
- &lea ($j,&DWP(1,$j));
167
- &cmp ($j,$num);
168
- &jl (&label("1st"));
169
-
170
- &pmuludq($acc0,$mul0); # ap[num-1]*bp[0]
171
- &pmuludq($acc1,$mul1); # np[num-1]*m1
172
- &paddq ($car0,$acc0); # +=c0
173
- &paddq ($car1,$acc1); # +=c1
174
-
175
- &movq ($acc0,$car0);
176
- &pand ($acc0,$mask);
177
- &paddq ($car1,$acc0); # +=ap[num-1]*bp[0];
178
- &movd (&DWP($frame-4,"esp",$j,4),$car1); # tp[num-2]=
179
-
180
- &psrlq ($car0,32);
181
- &psrlq ($car1,32);
182
-
183
- &paddq ($car1,$car0);
184
- &movq (&QWP($frame,"esp",$num,4),$car1); # tp[num].tp[num-1]
185
-
186
- &inc ($i); # i++
187
- &set_label("outer");
188
- &xor ($j,$j); # j=0
189
-
190
- &movd ($mul0,&DWP(0,$bp,$i,4)); # bp[i]
191
- &movd ($mul1,&DWP(0,$ap)); # ap[0]
192
- &movd ($temp,&DWP($frame,"esp")); # tp[0]
193
- &movd ($car1,&DWP(0,$np)); # np[0]
194
- &pmuludq($mul1,$mul0); # ap[0]*bp[i]
195
-
196
- &paddq ($mul1,$temp); # +=tp[0]
197
- &movq ($acc0,$mul1);
198
- &movq ($car0,$mul1);
199
- &pand ($acc0,$mask);
200
-
201
- &pmuludq($mul1,$_n0q); # *=n0
202
-
203
- &pmuludq($car1,$mul1);
204
- &paddq ($car1,$acc0);
205
-
206
- &movd ($temp,&DWP($frame+4,"esp")); # tp[1]
207
- &movd ($acc1,&DWP(4,$np)); # np[1]
208
- &movd ($acc0,&DWP(4,$ap)); # ap[1]
209
-
210
- &psrlq ($car0,32);
211
- &psrlq ($car1,32);
212
- &paddq ($car0,$temp); # +=tp[1]
213
-
214
- &inc ($j); # j++
215
- &dec ($num);
216
- &set_label("inner");
217
- &pmuludq($acc0,$mul0); # ap[j]*bp[i]
218
- &pmuludq($acc1,$mul1); # np[j]*m1
219
- &paddq ($car0,$acc0); # +=c0
220
- &paddq ($car1,$acc1); # +=c1
221
-
222
- &movq ($acc0,$car0);
223
- &movd ($temp,&DWP($frame+4,"esp",$j,4));# tp[j+1]
224
- &pand ($acc0,$mask);
225
- &movd ($acc1,&DWP(4,$np,$j,4)); # np[j+1]
226
- &paddq ($car1,$acc0); # +=ap[j]*bp[i]+tp[j]
227
- &movd ($acc0,&DWP(4,$ap,$j,4)); # ap[j+1]
228
- &psrlq ($car0,32);
229
- &movd (&DWP($frame-4,"esp",$j,4),$car1);# tp[j-1]=
230
- &psrlq ($car1,32);
231
- &paddq ($car0,$temp); # +=tp[j+1]
232
-
233
- &dec ($num);
234
- &lea ($j,&DWP(1,$j)); # j++
235
- &jnz (&label("inner"));
236
-
237
- &mov ($num,$j);
238
- &pmuludq($acc0,$mul0); # ap[num-1]*bp[i]
239
- &pmuludq($acc1,$mul1); # np[num-1]*m1
240
- &paddq ($car0,$acc0); # +=c0
241
- &paddq ($car1,$acc1); # +=c1
242
-
243
- &movq ($acc0,$car0);
244
- &pand ($acc0,$mask);
245
- &paddq ($car1,$acc0); # +=ap[num-1]*bp[i]+tp[num-1]
246
- &movd (&DWP($frame-4,"esp",$j,4),$car1); # tp[num-2]=
247
- &psrlq ($car0,32);
248
- &psrlq ($car1,32);
249
-
250
- &movd ($temp,&DWP($frame+4,"esp",$num,4)); # += tp[num]
251
- &paddq ($car1,$car0);
252
- &paddq ($car1,$temp);
253
- &movq (&QWP($frame,"esp",$num,4),$car1); # tp[num].tp[num-1]
254
-
255
- &lea ($i,&DWP(1,$i)); # i++
256
- &cmp ($i,$num);
257
- &jle (&label("outer"));
258
-
259
- &emms (); # done with mmx bank
260
- &jmp (&label("common_tail"));
261
-
262
- &set_label("non_sse2",16);
263
- }
264
-
265
- if (0) {
266
- &mov ("esp",$_sp);
267
- &xor ("eax","eax"); # signal "not fast enough [yet]"
268
- &jmp (&label("just_leave"));
269
- # While the below code provides competitive performance for
270
- # all key lengthes on modern Intel cores, it's still more
271
- # than 10% slower for 4096-bit key elsewhere:-( "Competitive"
272
- # means compared to the original integer-only assembler.
273
- # 512-bit RSA sign is better by ~40%, but that's about all
274
- # one can say about all CPUs...
275
- } else {
276
- $inp="esi"; # integer path uses these registers differently
277
- $word="edi";
278
- $carry="ebp";
279
-
280
- &mov ($inp,$_ap);
281
- &lea ($carry,&DWP(1,$num));
282
- &mov ($word,$_bp);
283
- &xor ($j,$j); # j=0
284
- &mov ("edx",$inp);
285
- &and ($carry,1); # see if num is even
286
- &sub ("edx",$word); # see if ap==bp
287
- &lea ("eax",&DWP(4,$word,$num,4)); # &bp[num]
288
- &or ($carry,"edx");
289
- &mov ($word,&DWP(0,$word)); # bp[0]
290
- &jz (&label("bn_sqr_mont"));
291
- &mov ($_bpend,"eax");
292
- &mov ("eax",&DWP(0,$inp));
293
- &xor ("edx","edx");
294
-
295
- &set_label("mull",16);
296
- &mov ($carry,"edx");
297
- &mul ($word); # ap[j]*bp[0]
298
- &add ($carry,"eax");
299
- &lea ($j,&DWP(1,$j));
300
- &adc ("edx",0);
301
- &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j+1]
302
- &cmp ($j,$num);
303
- &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]=
304
- &jl (&label("mull"));
305
-
306
- &mov ($carry,"edx");
307
- &mul ($word); # ap[num-1]*bp[0]
308
- &mov ($word,$_n0);
309
- &add ("eax",$carry);
310
- &mov ($inp,$_np);
311
- &adc ("edx",0);
312
- &imul ($word,&DWP($frame,"esp")); # n0*tp[0]
313
-
314
- &mov (&DWP($frame,"esp",$num,4),"eax"); # tp[num-1]=
315
- &xor ($j,$j);
316
- &mov (&DWP($frame+4,"esp",$num,4),"edx"); # tp[num]=
317
- &mov (&DWP($frame+8,"esp",$num,4),$j); # tp[num+1]=
318
-
319
- &mov ("eax",&DWP(0,$inp)); # np[0]
320
- &mul ($word); # np[0]*m
321
- &add ("eax",&DWP($frame,"esp")); # +=tp[0]
322
- &mov ("eax",&DWP(4,$inp)); # np[1]
323
- &adc ("edx",0);
324
- &inc ($j);
325
-
326
- &jmp (&label("2ndmadd"));
327
-
328
- &set_label("1stmadd",16);
329
- &mov ($carry,"edx");
330
- &mul ($word); # ap[j]*bp[i]
331
- &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j]
332
- &lea ($j,&DWP(1,$j));
333
- &adc ("edx",0);
334
- &add ($carry,"eax");
335
- &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j+1]
336
- &adc ("edx",0);
337
- &cmp ($j,$num);
338
- &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]=
339
- &jl (&label("1stmadd"));
340
-
341
- &mov ($carry,"edx");
342
- &mul ($word); # ap[num-1]*bp[i]
343
- &add ("eax",&DWP($frame,"esp",$num,4)); # +=tp[num-1]
344
- &mov ($word,$_n0);
345
- &adc ("edx",0);
346
- &mov ($inp,$_np);
347
- &add ($carry,"eax");
348
- &adc ("edx",0);
349
- &imul ($word,&DWP($frame,"esp")); # n0*tp[0]
350
-
351
- &xor ($j,$j);
352
- &add ("edx",&DWP($frame+4,"esp",$num,4)); # carry+=tp[num]
353
- &mov (&DWP($frame,"esp",$num,4),$carry); # tp[num-1]=
354
- &adc ($j,0);
355
- &mov ("eax",&DWP(0,$inp)); # np[0]
356
- &mov (&DWP($frame+4,"esp",$num,4),"edx"); # tp[num]=
357
- &mov (&DWP($frame+8,"esp",$num,4),$j); # tp[num+1]=
358
-
359
- &mul ($word); # np[0]*m
360
- &add ("eax",&DWP($frame,"esp")); # +=tp[0]
361
- &mov ("eax",&DWP(4,$inp)); # np[1]
362
- &adc ("edx",0);
363
- &mov ($j,1);
364
-
365
- &set_label("2ndmadd",16);
366
- &mov ($carry,"edx");
367
- &mul ($word); # np[j]*m
368
- &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j]
369
- &lea ($j,&DWP(1,$j));
370
- &adc ("edx",0);
371
- &add ($carry,"eax");
372
- &mov ("eax",&DWP(0,$inp,$j,4)); # np[j+1]
373
- &adc ("edx",0);
374
- &cmp ($j,$num);
375
- &mov (&DWP($frame-8,"esp",$j,4),$carry); # tp[j-1]=
376
- &jl (&label("2ndmadd"));
377
-
378
- &mov ($carry,"edx");
379
- &mul ($word); # np[j]*m
380
- &add ($carry,&DWP($frame,"esp",$num,4)); # +=tp[num-1]
381
- &adc ("edx",0);
382
- &add ($carry,"eax");
383
- &adc ("edx",0);
384
- &mov (&DWP($frame-4,"esp",$num,4),$carry); # tp[num-2]=
385
-
386
- &xor ("eax","eax");
387
- &mov ($j,$_bp); # &bp[i]
388
- &add ("edx",&DWP($frame+4,"esp",$num,4)); # carry+=tp[num]
389
- &adc ("eax",&DWP($frame+8,"esp",$num,4)); # +=tp[num+1]
390
- &lea ($j,&DWP(4,$j));
391
- &mov (&DWP($frame,"esp",$num,4),"edx"); # tp[num-1]=
392
- &cmp ($j,$_bpend);
393
- &mov (&DWP($frame+4,"esp",$num,4),"eax"); # tp[num]=
394
- &je (&label("common_tail"));
395
-
396
- &mov ($word,&DWP(0,$j)); # bp[i+1]
397
- &mov ($inp,$_ap);
398
- &mov ($_bp,$j); # &bp[++i]
399
- &xor ($j,$j);
400
- &xor ("edx","edx");
401
- &mov ("eax",&DWP(0,$inp));
402
- &jmp (&label("1stmadd"));
403
-
404
- &set_label("bn_sqr_mont",16);
405
- $sbit=$num;
406
- &mov ($_num,$num);
407
- &mov ($_bp,$j); # i=0
408
-
409
- &mov ("eax",$word); # ap[0]
410
- &mul ($word); # ap[0]*ap[0]
411
- &mov (&DWP($frame,"esp"),"eax"); # tp[0]=
412
- &mov ($sbit,"edx");
413
- &shr ("edx",1);
414
- &and ($sbit,1);
415
- &inc ($j);
416
- &set_label("sqr",16);
417
- &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j]
418
- &mov ($carry,"edx");
419
- &mul ($word); # ap[j]*ap[0]
420
- &add ("eax",$carry);
421
- &lea ($j,&DWP(1,$j));
422
- &adc ("edx",0);
423
- &lea ($carry,&DWP(0,$sbit,"eax",2));
424
- &shr ("eax",31);
425
- &cmp ($j,$_num);
426
- &mov ($sbit,"eax");
427
- &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]=
428
- &jl (&label("sqr"));
429
-
430
- &mov ("eax",&DWP(0,$inp,$j,4)); # ap[num-1]
431
- &mov ($carry,"edx");
432
- &mul ($word); # ap[num-1]*ap[0]
433
- &add ("eax",$carry);
434
- &mov ($word,$_n0);
435
- &adc ("edx",0);
436
- &mov ($inp,$_np);
437
- &lea ($carry,&DWP(0,$sbit,"eax",2));
438
- &imul ($word,&DWP($frame,"esp")); # n0*tp[0]
439
- &shr ("eax",31);
440
- &mov (&DWP($frame,"esp",$j,4),$carry); # tp[num-1]=
441
-
442
- &lea ($carry,&DWP(0,"eax","edx",2));
443
- &mov ("eax",&DWP(0,$inp)); # np[0]
444
- &shr ("edx",31);
445
- &mov (&DWP($frame+4,"esp",$j,4),$carry); # tp[num]=
446
- &mov (&DWP($frame+8,"esp",$j,4),"edx"); # tp[num+1]=
447
-
448
- &mul ($word); # np[0]*m
449
- &add ("eax",&DWP($frame,"esp")); # +=tp[0]
450
- &mov ($num,$j);
451
- &adc ("edx",0);
452
- &mov ("eax",&DWP(4,$inp)); # np[1]
453
- &mov ($j,1);
454
-
455
- &set_label("3rdmadd",16);
456
- &mov ($carry,"edx");
457
- &mul ($word); # np[j]*m
458
- &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j]
459
- &adc ("edx",0);
460
- &add ($carry,"eax");
461
- &mov ("eax",&DWP(4,$inp,$j,4)); # np[j+1]
462
- &adc ("edx",0);
463
- &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j-1]=
464
-
465
- &mov ($carry,"edx");
466
- &mul ($word); # np[j+1]*m
467
- &add ($carry,&DWP($frame+4,"esp",$j,4)); # +=tp[j+1]
468
- &lea ($j,&DWP(2,$j));
469
- &adc ("edx",0);
470
- &add ($carry,"eax");
471
- &mov ("eax",&DWP(0,$inp,$j,4)); # np[j+2]
472
- &adc ("edx",0);
473
- &cmp ($j,$num);
474
- &mov (&DWP($frame-8,"esp",$j,4),$carry); # tp[j]=
475
- &jl (&label("3rdmadd"));
476
-
477
- &mov ($carry,"edx");
478
- &mul ($word); # np[j]*m
479
- &add ($carry,&DWP($frame,"esp",$num,4)); # +=tp[num-1]
480
- &adc ("edx",0);
481
- &add ($carry,"eax");
482
- &adc ("edx",0);
483
- &mov (&DWP($frame-4,"esp",$num,4),$carry); # tp[num-2]=
484
-
485
- &mov ($j,$_bp); # i
486
- &xor ("eax","eax");
487
- &mov ($inp,$_ap);
488
- &add ("edx",&DWP($frame+4,"esp",$num,4)); # carry+=tp[num]
489
- &adc ("eax",&DWP($frame+8,"esp",$num,4)); # +=tp[num+1]
490
- &mov (&DWP($frame,"esp",$num,4),"edx"); # tp[num-1]=
491
- &cmp ($j,$num);
492
- &mov (&DWP($frame+4,"esp",$num,4),"eax"); # tp[num]=
493
- &je (&label("common_tail"));
494
-
495
- &mov ($word,&DWP(4,$inp,$j,4)); # ap[i]
496
- &lea ($j,&DWP(1,$j));
497
- &mov ("eax",$word);
498
- &mov ($_bp,$j); # ++i
499
- &mul ($word); # ap[i]*ap[i]
500
- &add ("eax",&DWP($frame,"esp",$j,4)); # +=tp[i]
501
- &adc ("edx",0);
502
- &mov (&DWP($frame,"esp",$j,4),"eax"); # tp[i]=
503
- &xor ($carry,$carry);
504
- &cmp ($j,$num);
505
- &lea ($j,&DWP(1,$j));
506
- &je (&label("sqrlast"));
507
-
508
- &mov ($sbit,"edx"); # zaps $num
509
- &shr ("edx",1);
510
- &and ($sbit,1);
511
- &set_label("sqradd",16);
512
- &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j]
513
- &mov ($carry,"edx");
514
- &mul ($word); # ap[j]*ap[i]
515
- &add ("eax",$carry);
516
- &lea ($carry,&DWP(0,"eax","eax"));
517
- &adc ("edx",0);
518
- &shr ("eax",31);
519
- &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j]
520
- &lea ($j,&DWP(1,$j));
521
- &adc ("eax",0);
522
- &add ($carry,$sbit);
523
- &adc ("eax",0);
524
- &cmp ($j,$_num);
525
- &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]=
526
- &mov ($sbit,"eax");
527
- &jle (&label("sqradd"));
528
-
529
- &mov ($carry,"edx");
530
- &add ("edx","edx");
531
- &shr ($carry,31);
532
- &add ("edx",$sbit);
533
- &adc ($carry,0);
534
- &set_label("sqrlast");
535
- &mov ($word,$_n0);
536
- &mov ($inp,$_np);
537
- &imul ($word,&DWP($frame,"esp")); # n0*tp[0]
538
-
539
- &add ("edx",&DWP($frame,"esp",$j,4)); # +=tp[num]
540
- &mov ("eax",&DWP(0,$inp)); # np[0]
541
- &adc ($carry,0);
542
- &mov (&DWP($frame,"esp",$j,4),"edx"); # tp[num]=
543
- &mov (&DWP($frame+4,"esp",$j,4),$carry); # tp[num+1]=
544
-
545
- &mul ($word); # np[0]*m
546
- &add ("eax",&DWP($frame,"esp")); # +=tp[0]
547
- &lea ($num,&DWP(-1,$j));
548
- &adc ("edx",0);
549
- &mov ($j,1);
550
- &mov ("eax",&DWP(4,$inp)); # np[1]
551
-
552
- &jmp (&label("3rdmadd"));
553
- }
554
-
555
- &set_label("common_tail",16);
556
- &mov ($np,$_np); # load modulus pointer
557
- &mov ($rp,$_rp); # load result pointer
558
- &lea ($tp,&DWP($frame,"esp")); # [$ap and $bp are zapped]
559
-
560
- &mov ("eax",&DWP(0,$tp)); # tp[0]
561
- &mov ($j,$num); # j=num-1
562
- &xor ($i,$i); # i=0 and clear CF!
563
-
564
- &set_label("sub",16);
565
- &sbb ("eax",&DWP(0,$np,$i,4));
566
- &mov (&DWP(0,$rp,$i,4),"eax"); # rp[i]=tp[i]-np[i]
567
- &dec ($j); # doesn't affect CF!
568
- &mov ("eax",&DWP(4,$tp,$i,4)); # tp[i+1]
569
- &lea ($i,&DWP(1,$i)); # i++
570
- &jge (&label("sub"));
571
-
572
- &sbb ("eax",0); # handle upmost overflow bit
573
-
574
- &set_label("copy",16); # copy or in-place refresh
575
- &mov ("edx",&DWP(0,$tp,$num,4));
576
- &mov ($np,&DWP(0,$rp,$num,4));
577
- &xor ("edx",$np); # conditional select
578
- &and ("edx","eax");
579
- &xor ("edx",$np);
580
- &mov (&DWP(0,$tp,$num,4),$j) # zap temporary vector
581
- &mov (&DWP(0,$rp,$num,4),"edx"); # rp[i]=tp[i]
582
- &dec ($num);
583
- &jge (&label("copy"));
584
-
585
- &mov ("esp",$_sp); # pull saved stack pointer
586
- &mov ("eax",1);
587
- &set_label("just_leave");
588
- &function_end("bn_mul_mont");
589
-
590
- &asciz("Montgomery Multiplication for x86, CRYPTOGAMS by <appro\@openssl.org>");
591
-
592
- &asm_finish();