ring-native 0.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (261) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/Gemfile +3 -0
  4. data/README.md +22 -0
  5. data/Rakefile +1 -0
  6. data/ext/ring/extconf.rb +29 -0
  7. data/lib/ring/native.rb +8 -0
  8. data/lib/ring/native/version.rb +5 -0
  9. data/ring-native.gemspec +25 -0
  10. data/vendor/ring/BUILDING.md +40 -0
  11. data/vendor/ring/Cargo.toml +43 -0
  12. data/vendor/ring/LICENSE +185 -0
  13. data/vendor/ring/Makefile +35 -0
  14. data/vendor/ring/PORTING.md +163 -0
  15. data/vendor/ring/README.md +113 -0
  16. data/vendor/ring/STYLE.md +197 -0
  17. data/vendor/ring/appveyor.yml +27 -0
  18. data/vendor/ring/build.rs +108 -0
  19. data/vendor/ring/crypto/aes/aes.c +1142 -0
  20. data/vendor/ring/crypto/aes/aes_test.Windows.vcxproj +25 -0
  21. data/vendor/ring/crypto/aes/aes_test.cc +93 -0
  22. data/vendor/ring/crypto/aes/asm/aes-586.pl +2368 -0
  23. data/vendor/ring/crypto/aes/asm/aes-armv4.pl +1249 -0
  24. data/vendor/ring/crypto/aes/asm/aes-x86_64.pl +2246 -0
  25. data/vendor/ring/crypto/aes/asm/aesni-x86.pl +1318 -0
  26. data/vendor/ring/crypto/aes/asm/aesni-x86_64.pl +2084 -0
  27. data/vendor/ring/crypto/aes/asm/aesv8-armx.pl +675 -0
  28. data/vendor/ring/crypto/aes/asm/bsaes-armv7.pl +1364 -0
  29. data/vendor/ring/crypto/aes/asm/bsaes-x86_64.pl +1565 -0
  30. data/vendor/ring/crypto/aes/asm/vpaes-x86.pl +841 -0
  31. data/vendor/ring/crypto/aes/asm/vpaes-x86_64.pl +1116 -0
  32. data/vendor/ring/crypto/aes/internal.h +87 -0
  33. data/vendor/ring/crypto/aes/mode_wrappers.c +61 -0
  34. data/vendor/ring/crypto/bn/add.c +394 -0
  35. data/vendor/ring/crypto/bn/asm/armv4-mont.pl +694 -0
  36. data/vendor/ring/crypto/bn/asm/armv8-mont.pl +1503 -0
  37. data/vendor/ring/crypto/bn/asm/bn-586.pl +774 -0
  38. data/vendor/ring/crypto/bn/asm/co-586.pl +287 -0
  39. data/vendor/ring/crypto/bn/asm/rsaz-avx2.pl +1882 -0
  40. data/vendor/ring/crypto/bn/asm/x86-mont.pl +592 -0
  41. data/vendor/ring/crypto/bn/asm/x86_64-gcc.c +599 -0
  42. data/vendor/ring/crypto/bn/asm/x86_64-mont.pl +1393 -0
  43. data/vendor/ring/crypto/bn/asm/x86_64-mont5.pl +3507 -0
  44. data/vendor/ring/crypto/bn/bn.c +352 -0
  45. data/vendor/ring/crypto/bn/bn_asn1.c +74 -0
  46. data/vendor/ring/crypto/bn/bn_test.Windows.vcxproj +25 -0
  47. data/vendor/ring/crypto/bn/bn_test.cc +1696 -0
  48. data/vendor/ring/crypto/bn/cmp.c +200 -0
  49. data/vendor/ring/crypto/bn/convert.c +433 -0
  50. data/vendor/ring/crypto/bn/ctx.c +311 -0
  51. data/vendor/ring/crypto/bn/div.c +594 -0
  52. data/vendor/ring/crypto/bn/exponentiation.c +1335 -0
  53. data/vendor/ring/crypto/bn/gcd.c +711 -0
  54. data/vendor/ring/crypto/bn/generic.c +1019 -0
  55. data/vendor/ring/crypto/bn/internal.h +316 -0
  56. data/vendor/ring/crypto/bn/montgomery.c +516 -0
  57. data/vendor/ring/crypto/bn/mul.c +888 -0
  58. data/vendor/ring/crypto/bn/prime.c +829 -0
  59. data/vendor/ring/crypto/bn/random.c +334 -0
  60. data/vendor/ring/crypto/bn/rsaz_exp.c +262 -0
  61. data/vendor/ring/crypto/bn/rsaz_exp.h +53 -0
  62. data/vendor/ring/crypto/bn/shift.c +276 -0
  63. data/vendor/ring/crypto/bytestring/bytestring_test.Windows.vcxproj +25 -0
  64. data/vendor/ring/crypto/bytestring/bytestring_test.cc +421 -0
  65. data/vendor/ring/crypto/bytestring/cbb.c +399 -0
  66. data/vendor/ring/crypto/bytestring/cbs.c +227 -0
  67. data/vendor/ring/crypto/bytestring/internal.h +46 -0
  68. data/vendor/ring/crypto/chacha/chacha_generic.c +140 -0
  69. data/vendor/ring/crypto/chacha/chacha_vec.c +323 -0
  70. data/vendor/ring/crypto/chacha/chacha_vec_arm.S +1447 -0
  71. data/vendor/ring/crypto/chacha/chacha_vec_arm_generate.go +153 -0
  72. data/vendor/ring/crypto/cipher/cipher_test.Windows.vcxproj +25 -0
  73. data/vendor/ring/crypto/cipher/e_aes.c +390 -0
  74. data/vendor/ring/crypto/cipher/e_chacha20poly1305.c +208 -0
  75. data/vendor/ring/crypto/cipher/internal.h +173 -0
  76. data/vendor/ring/crypto/cipher/test/aes_128_gcm_tests.txt +543 -0
  77. data/vendor/ring/crypto/cipher/test/aes_128_key_wrap_tests.txt +9 -0
  78. data/vendor/ring/crypto/cipher/test/aes_256_gcm_tests.txt +475 -0
  79. data/vendor/ring/crypto/cipher/test/aes_256_key_wrap_tests.txt +23 -0
  80. data/vendor/ring/crypto/cipher/test/chacha20_poly1305_old_tests.txt +422 -0
  81. data/vendor/ring/crypto/cipher/test/chacha20_poly1305_tests.txt +484 -0
  82. data/vendor/ring/crypto/cipher/test/cipher_test.txt +100 -0
  83. data/vendor/ring/crypto/constant_time_test.Windows.vcxproj +25 -0
  84. data/vendor/ring/crypto/constant_time_test.c +304 -0
  85. data/vendor/ring/crypto/cpu-arm-asm.S +32 -0
  86. data/vendor/ring/crypto/cpu-arm.c +199 -0
  87. data/vendor/ring/crypto/cpu-intel.c +261 -0
  88. data/vendor/ring/crypto/crypto.c +151 -0
  89. data/vendor/ring/crypto/curve25519/asm/x25519-arm.S +2118 -0
  90. data/vendor/ring/crypto/curve25519/curve25519.c +4888 -0
  91. data/vendor/ring/crypto/curve25519/x25519_test.cc +128 -0
  92. data/vendor/ring/crypto/digest/md32_common.h +181 -0
  93. data/vendor/ring/crypto/ec/asm/p256-x86_64-asm.pl +2725 -0
  94. data/vendor/ring/crypto/ec/ec.c +193 -0
  95. data/vendor/ring/crypto/ec/ec_curves.c +61 -0
  96. data/vendor/ring/crypto/ec/ec_key.c +228 -0
  97. data/vendor/ring/crypto/ec/ec_montgomery.c +114 -0
  98. data/vendor/ring/crypto/ec/example_mul.Windows.vcxproj +25 -0
  99. data/vendor/ring/crypto/ec/internal.h +243 -0
  100. data/vendor/ring/crypto/ec/oct.c +253 -0
  101. data/vendor/ring/crypto/ec/p256-64.c +1794 -0
  102. data/vendor/ring/crypto/ec/p256-x86_64-table.h +9548 -0
  103. data/vendor/ring/crypto/ec/p256-x86_64.c +509 -0
  104. data/vendor/ring/crypto/ec/simple.c +1007 -0
  105. data/vendor/ring/crypto/ec/util-64.c +183 -0
  106. data/vendor/ring/crypto/ec/wnaf.c +508 -0
  107. data/vendor/ring/crypto/ecdh/ecdh.c +155 -0
  108. data/vendor/ring/crypto/ecdsa/ecdsa.c +304 -0
  109. data/vendor/ring/crypto/ecdsa/ecdsa_asn1.c +193 -0
  110. data/vendor/ring/crypto/ecdsa/ecdsa_test.Windows.vcxproj +25 -0
  111. data/vendor/ring/crypto/ecdsa/ecdsa_test.cc +327 -0
  112. data/vendor/ring/crypto/header_removed.h +17 -0
  113. data/vendor/ring/crypto/internal.h +495 -0
  114. data/vendor/ring/crypto/libring.Windows.vcxproj +101 -0
  115. data/vendor/ring/crypto/mem.c +98 -0
  116. data/vendor/ring/crypto/modes/asm/aesni-gcm-x86_64.pl +1045 -0
  117. data/vendor/ring/crypto/modes/asm/ghash-armv4.pl +517 -0
  118. data/vendor/ring/crypto/modes/asm/ghash-x86.pl +1393 -0
  119. data/vendor/ring/crypto/modes/asm/ghash-x86_64.pl +1741 -0
  120. data/vendor/ring/crypto/modes/asm/ghashv8-armx.pl +422 -0
  121. data/vendor/ring/crypto/modes/ctr.c +226 -0
  122. data/vendor/ring/crypto/modes/gcm.c +1206 -0
  123. data/vendor/ring/crypto/modes/gcm_test.Windows.vcxproj +25 -0
  124. data/vendor/ring/crypto/modes/gcm_test.c +348 -0
  125. data/vendor/ring/crypto/modes/internal.h +299 -0
  126. data/vendor/ring/crypto/perlasm/arm-xlate.pl +170 -0
  127. data/vendor/ring/crypto/perlasm/readme +100 -0
  128. data/vendor/ring/crypto/perlasm/x86_64-xlate.pl +1164 -0
  129. data/vendor/ring/crypto/perlasm/x86asm.pl +292 -0
  130. data/vendor/ring/crypto/perlasm/x86gas.pl +263 -0
  131. data/vendor/ring/crypto/perlasm/x86masm.pl +200 -0
  132. data/vendor/ring/crypto/perlasm/x86nasm.pl +187 -0
  133. data/vendor/ring/crypto/poly1305/poly1305.c +331 -0
  134. data/vendor/ring/crypto/poly1305/poly1305_arm.c +301 -0
  135. data/vendor/ring/crypto/poly1305/poly1305_arm_asm.S +2015 -0
  136. data/vendor/ring/crypto/poly1305/poly1305_test.Windows.vcxproj +25 -0
  137. data/vendor/ring/crypto/poly1305/poly1305_test.cc +80 -0
  138. data/vendor/ring/crypto/poly1305/poly1305_test.txt +52 -0
  139. data/vendor/ring/crypto/poly1305/poly1305_vec.c +892 -0
  140. data/vendor/ring/crypto/rand/asm/rdrand-x86_64.pl +75 -0
  141. data/vendor/ring/crypto/rand/internal.h +32 -0
  142. data/vendor/ring/crypto/rand/rand.c +189 -0
  143. data/vendor/ring/crypto/rand/urandom.c +219 -0
  144. data/vendor/ring/crypto/rand/windows.c +56 -0
  145. data/vendor/ring/crypto/refcount_c11.c +66 -0
  146. data/vendor/ring/crypto/refcount_lock.c +53 -0
  147. data/vendor/ring/crypto/refcount_test.Windows.vcxproj +25 -0
  148. data/vendor/ring/crypto/refcount_test.c +58 -0
  149. data/vendor/ring/crypto/rsa/blinding.c +462 -0
  150. data/vendor/ring/crypto/rsa/internal.h +108 -0
  151. data/vendor/ring/crypto/rsa/padding.c +300 -0
  152. data/vendor/ring/crypto/rsa/rsa.c +450 -0
  153. data/vendor/ring/crypto/rsa/rsa_asn1.c +261 -0
  154. data/vendor/ring/crypto/rsa/rsa_impl.c +944 -0
  155. data/vendor/ring/crypto/rsa/rsa_test.Windows.vcxproj +25 -0
  156. data/vendor/ring/crypto/rsa/rsa_test.cc +437 -0
  157. data/vendor/ring/crypto/sha/asm/sha-armv8.pl +436 -0
  158. data/vendor/ring/crypto/sha/asm/sha-x86_64.pl +2390 -0
  159. data/vendor/ring/crypto/sha/asm/sha256-586.pl +1275 -0
  160. data/vendor/ring/crypto/sha/asm/sha256-armv4.pl +735 -0
  161. data/vendor/ring/crypto/sha/asm/sha256-armv8.pl +14 -0
  162. data/vendor/ring/crypto/sha/asm/sha256-x86_64.pl +14 -0
  163. data/vendor/ring/crypto/sha/asm/sha512-586.pl +911 -0
  164. data/vendor/ring/crypto/sha/asm/sha512-armv4.pl +666 -0
  165. data/vendor/ring/crypto/sha/asm/sha512-armv8.pl +14 -0
  166. data/vendor/ring/crypto/sha/asm/sha512-x86_64.pl +14 -0
  167. data/vendor/ring/crypto/sha/sha1.c +271 -0
  168. data/vendor/ring/crypto/sha/sha256.c +204 -0
  169. data/vendor/ring/crypto/sha/sha512.c +355 -0
  170. data/vendor/ring/crypto/test/file_test.cc +326 -0
  171. data/vendor/ring/crypto/test/file_test.h +181 -0
  172. data/vendor/ring/crypto/test/malloc.cc +150 -0
  173. data/vendor/ring/crypto/test/scoped_types.h +95 -0
  174. data/vendor/ring/crypto/test/test.Windows.vcxproj +35 -0
  175. data/vendor/ring/crypto/test/test_util.cc +46 -0
  176. data/vendor/ring/crypto/test/test_util.h +41 -0
  177. data/vendor/ring/crypto/thread_none.c +55 -0
  178. data/vendor/ring/crypto/thread_pthread.c +165 -0
  179. data/vendor/ring/crypto/thread_test.Windows.vcxproj +25 -0
  180. data/vendor/ring/crypto/thread_test.c +200 -0
  181. data/vendor/ring/crypto/thread_win.c +282 -0
  182. data/vendor/ring/examples/checkdigest.rs +103 -0
  183. data/vendor/ring/include/openssl/aes.h +121 -0
  184. data/vendor/ring/include/openssl/arm_arch.h +129 -0
  185. data/vendor/ring/include/openssl/base.h +156 -0
  186. data/vendor/ring/include/openssl/bn.h +794 -0
  187. data/vendor/ring/include/openssl/buffer.h +18 -0
  188. data/vendor/ring/include/openssl/bytestring.h +235 -0
  189. data/vendor/ring/include/openssl/chacha.h +37 -0
  190. data/vendor/ring/include/openssl/cmac.h +76 -0
  191. data/vendor/ring/include/openssl/cpu.h +184 -0
  192. data/vendor/ring/include/openssl/crypto.h +43 -0
  193. data/vendor/ring/include/openssl/curve25519.h +88 -0
  194. data/vendor/ring/include/openssl/ec.h +225 -0
  195. data/vendor/ring/include/openssl/ec_key.h +129 -0
  196. data/vendor/ring/include/openssl/ecdh.h +110 -0
  197. data/vendor/ring/include/openssl/ecdsa.h +156 -0
  198. data/vendor/ring/include/openssl/err.h +201 -0
  199. data/vendor/ring/include/openssl/mem.h +101 -0
  200. data/vendor/ring/include/openssl/obj_mac.h +71 -0
  201. data/vendor/ring/include/openssl/opensslfeatures.h +68 -0
  202. data/vendor/ring/include/openssl/opensslv.h +18 -0
  203. data/vendor/ring/include/openssl/ossl_typ.h +18 -0
  204. data/vendor/ring/include/openssl/poly1305.h +51 -0
  205. data/vendor/ring/include/openssl/rand.h +70 -0
  206. data/vendor/ring/include/openssl/rsa.h +399 -0
  207. data/vendor/ring/include/openssl/thread.h +133 -0
  208. data/vendor/ring/include/openssl/type_check.h +71 -0
  209. data/vendor/ring/mk/Common.props +63 -0
  210. data/vendor/ring/mk/Windows.props +42 -0
  211. data/vendor/ring/mk/WindowsTest.props +18 -0
  212. data/vendor/ring/mk/appveyor.bat +62 -0
  213. data/vendor/ring/mk/bottom_of_makefile.mk +54 -0
  214. data/vendor/ring/mk/ring.mk +266 -0
  215. data/vendor/ring/mk/top_of_makefile.mk +214 -0
  216. data/vendor/ring/mk/travis.sh +40 -0
  217. data/vendor/ring/mk/update-travis-yml.py +229 -0
  218. data/vendor/ring/ring.sln +153 -0
  219. data/vendor/ring/src/aead.rs +682 -0
  220. data/vendor/ring/src/agreement.rs +248 -0
  221. data/vendor/ring/src/c.rs +129 -0
  222. data/vendor/ring/src/constant_time.rs +37 -0
  223. data/vendor/ring/src/der.rs +96 -0
  224. data/vendor/ring/src/digest.rs +690 -0
  225. data/vendor/ring/src/digest_tests.txt +57 -0
  226. data/vendor/ring/src/ecc.rs +28 -0
  227. data/vendor/ring/src/ecc_build.rs +279 -0
  228. data/vendor/ring/src/ecc_curves.rs +117 -0
  229. data/vendor/ring/src/ed25519_tests.txt +2579 -0
  230. data/vendor/ring/src/exe_tests.rs +46 -0
  231. data/vendor/ring/src/ffi.rs +29 -0
  232. data/vendor/ring/src/file_test.rs +187 -0
  233. data/vendor/ring/src/hkdf.rs +153 -0
  234. data/vendor/ring/src/hkdf_tests.txt +59 -0
  235. data/vendor/ring/src/hmac.rs +414 -0
  236. data/vendor/ring/src/hmac_tests.txt +97 -0
  237. data/vendor/ring/src/input.rs +312 -0
  238. data/vendor/ring/src/lib.rs +41 -0
  239. data/vendor/ring/src/pbkdf2.rs +265 -0
  240. data/vendor/ring/src/pbkdf2_tests.txt +113 -0
  241. data/vendor/ring/src/polyfill.rs +57 -0
  242. data/vendor/ring/src/rand.rs +28 -0
  243. data/vendor/ring/src/signature.rs +314 -0
  244. data/vendor/ring/third-party/NIST/README.md +9 -0
  245. data/vendor/ring/third-party/NIST/SHAVS/SHA1LongMsg.rsp +263 -0
  246. data/vendor/ring/third-party/NIST/SHAVS/SHA1Monte.rsp +309 -0
  247. data/vendor/ring/third-party/NIST/SHAVS/SHA1ShortMsg.rsp +267 -0
  248. data/vendor/ring/third-party/NIST/SHAVS/SHA224LongMsg.rsp +263 -0
  249. data/vendor/ring/third-party/NIST/SHAVS/SHA224Monte.rsp +309 -0
  250. data/vendor/ring/third-party/NIST/SHAVS/SHA224ShortMsg.rsp +267 -0
  251. data/vendor/ring/third-party/NIST/SHAVS/SHA256LongMsg.rsp +263 -0
  252. data/vendor/ring/third-party/NIST/SHAVS/SHA256Monte.rsp +309 -0
  253. data/vendor/ring/third-party/NIST/SHAVS/SHA256ShortMsg.rsp +267 -0
  254. data/vendor/ring/third-party/NIST/SHAVS/SHA384LongMsg.rsp +519 -0
  255. data/vendor/ring/third-party/NIST/SHAVS/SHA384Monte.rsp +309 -0
  256. data/vendor/ring/third-party/NIST/SHAVS/SHA384ShortMsg.rsp +523 -0
  257. data/vendor/ring/third-party/NIST/SHAVS/SHA512LongMsg.rsp +519 -0
  258. data/vendor/ring/third-party/NIST/SHAVS/SHA512Monte.rsp +309 -0
  259. data/vendor/ring/third-party/NIST/SHAVS/SHA512ShortMsg.rsp +523 -0
  260. data/vendor/ring/third-party/NIST/sha256sums.txt +1 -0
  261. metadata +333 -0
@@ -0,0 +1,599 @@
1
+ #include <openssl/bn.h>
2
+
3
+ #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && !defined(OPENSSL_WINDOWS)
4
+
5
+ #include "../internal.h"
6
+
7
+ /* x86_64 BIGNUM accelerator version 0.1, December 2002.
8
+ *
9
+ * Implemented by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
10
+ * project.
11
+ *
12
+ * Rights for redistribution and usage in source and binary forms are
13
+ * granted according to the OpenSSL license. Warranty of any kind is
14
+ * disclaimed.
15
+ *
16
+ * Q. Version 0.1? It doesn't sound like Andy, he used to assign real
17
+ * versions, like 1.0...
18
+ * A. Well, that's because this code is basically a quick-n-dirty
19
+ * proof-of-concept hack. As you can see it's implemented with
20
+ * inline assembler, which means that you're bound to GCC and that
21
+ * there might be enough room for further improvement.
22
+ *
23
+ * Q. Why inline assembler?
24
+ * A. x86_64 features own ABI which I'm not familiar with. This is
25
+ * why I decided to let the compiler take care of subroutine
26
+ * prologue/epilogue as well as register allocation. For reference.
27
+ * Win64 implements different ABI for AMD64, different from Linux.
28
+ *
29
+ * Q. How much faster does it get?
30
+ * A. 'apps/openssl speed rsa dsa' output with no-asm:
31
+ *
32
+ * sign verify sign/s verify/s
33
+ * rsa 512 bits 0.0006s 0.0001s 1683.8 18456.2
34
+ * rsa 1024 bits 0.0028s 0.0002s 356.0 6407.0
35
+ * rsa 2048 bits 0.0172s 0.0005s 58.0 1957.8
36
+ * rsa 4096 bits 0.1155s 0.0018s 8.7 555.6
37
+ * sign verify sign/s verify/s
38
+ * dsa 512 bits 0.0005s 0.0006s 2100.8 1768.3
39
+ * dsa 1024 bits 0.0014s 0.0018s 692.3 559.2
40
+ * dsa 2048 bits 0.0049s 0.0061s 204.7 165.0
41
+ *
42
+ * 'apps/openssl speed rsa dsa' output with this module:
43
+ *
44
+ * sign verify sign/s verify/s
45
+ * rsa 512 bits 0.0004s 0.0000s 2767.1 33297.9
46
+ * rsa 1024 bits 0.0012s 0.0001s 867.4 14674.7
47
+ * rsa 2048 bits 0.0061s 0.0002s 164.0 5270.0
48
+ * rsa 4096 bits 0.0384s 0.0006s 26.1 1650.8
49
+ * sign verify sign/s verify/s
50
+ * dsa 512 bits 0.0002s 0.0003s 4442.2 3786.3
51
+ * dsa 1024 bits 0.0005s 0.0007s 1835.1 1497.4
52
+ * dsa 2048 bits 0.0016s 0.0020s 620.4 504.6
53
+ *
54
+ * For the reference. IA-32 assembler implementation performs
55
+ * very much like 64-bit code compiled with no-asm on the same
56
+ * machine.
57
+ */
58
+
59
+ /* TODO(davidben): Get this file working on Windows x64. */
60
+
61
+ #undef mul
62
+ #undef mul_add
63
+
64
+ #define asm __asm__
65
+
66
+ /*
67
+ * "m"(a), "+m"(r) is the way to favor DirectPath µ-code;
68
+ * "g"(0) let the compiler to decide where does it
69
+ * want to keep the value of zero;
70
+ */
71
+ #define mul_add(r, a, word, carry) \
72
+ do { \
73
+ register BN_ULONG high, low; \
74
+ asm("mulq %3" : "=a"(low), "=d"(high) : "a"(word), "m"(a) : "cc"); \
75
+ asm("addq %2,%0; adcq %3,%1" \
76
+ : "+r"(carry), "+d"(high) \
77
+ : "a"(low), "g"(0) \
78
+ : "cc"); \
79
+ asm("addq %2,%0; adcq %3,%1" \
80
+ : "+m"(r), "+d"(high) \
81
+ : "r"(carry), "g"(0) \
82
+ : "cc"); \
83
+ carry = high; \
84
+ } while (0)
85
+
86
+ #define mul(r, a, word, carry) \
87
+ do { \
88
+ register BN_ULONG high, low; \
89
+ asm("mulq %3" : "=a"(low), "=d"(high) : "a"(word), "g"(a) : "cc"); \
90
+ asm("addq %2,%0; adcq %3,%1" \
91
+ : "+r"(carry), "+d"(high) \
92
+ : "a"(low), "g"(0) \
93
+ : "cc"); \
94
+ (r) = carry, carry = high; \
95
+ } while (0)
96
+ #undef sqr
97
+ #define sqr(r0, r1, a) asm("mulq %2" : "=a"(r0), "=d"(r1) : "a"(a) : "cc");
98
+
99
+ BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num,
100
+ BN_ULONG w) {
101
+ BN_ULONG c1 = 0;
102
+
103
+ if (num <= 0) {
104
+ return (c1);
105
+ }
106
+
107
+ while (num & ~3) {
108
+ mul_add(rp[0], ap[0], w, c1);
109
+ mul_add(rp[1], ap[1], w, c1);
110
+ mul_add(rp[2], ap[2], w, c1);
111
+ mul_add(rp[3], ap[3], w, c1);
112
+ ap += 4;
113
+ rp += 4;
114
+ num -= 4;
115
+ }
116
+ if (num) {
117
+ mul_add(rp[0], ap[0], w, c1);
118
+ if (--num == 0) {
119
+ return c1;
120
+ }
121
+ mul_add(rp[1], ap[1], w, c1);
122
+ if (--num == 0) {
123
+ return c1;
124
+ }
125
+ mul_add(rp[2], ap[2], w, c1);
126
+ return c1;
127
+ }
128
+
129
+ return c1;
130
+ }
131
+
132
+ BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) {
133
+ BN_ULONG c1 = 0;
134
+
135
+ if (num <= 0) {
136
+ return c1;
137
+ }
138
+
139
+ while (num & ~3) {
140
+ mul(rp[0], ap[0], w, c1);
141
+ mul(rp[1], ap[1], w, c1);
142
+ mul(rp[2], ap[2], w, c1);
143
+ mul(rp[3], ap[3], w, c1);
144
+ ap += 4;
145
+ rp += 4;
146
+ num -= 4;
147
+ }
148
+ if (num) {
149
+ mul(rp[0], ap[0], w, c1);
150
+ if (--num == 0) {
151
+ return c1;
152
+ }
153
+ mul(rp[1], ap[1], w, c1);
154
+ if (--num == 0) {
155
+ return c1;
156
+ }
157
+ mul(rp[2], ap[2], w, c1);
158
+ }
159
+ return c1;
160
+ }
161
+
162
+ void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) {
163
+ if (n <= 0) {
164
+ return;
165
+ }
166
+
167
+ while (n & ~3) {
168
+ sqr(r[0], r[1], a[0]);
169
+ sqr(r[2], r[3], a[1]);
170
+ sqr(r[4], r[5], a[2]);
171
+ sqr(r[6], r[7], a[3]);
172
+ a += 4;
173
+ r += 8;
174
+ n -= 4;
175
+ }
176
+ if (n) {
177
+ sqr(r[0], r[1], a[0]);
178
+ if (--n == 0) {
179
+ return;
180
+ }
181
+ sqr(r[2], r[3], a[1]);
182
+ if (--n == 0) {
183
+ return;
184
+ }
185
+ sqr(r[4], r[5], a[2]);
186
+ }
187
+ }
188
+
189
+ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) {
190
+ BN_ULONG ret, waste;
191
+
192
+ asm("divq %4" : "=a"(ret), "=d"(waste) : "a"(l), "d"(h), "g"(d) : "cc");
193
+
194
+ return ret;
195
+ }
196
+
197
+ BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
198
+ int n) {
199
+ BN_ULONG ret;
200
+ size_t i = 0;
201
+
202
+ if (n <= 0) {
203
+ return 0;
204
+ }
205
+
206
+ asm volatile (
207
+ " subq %0,%0 \n" /* clear carry */
208
+ " jmp 1f \n"
209
+ ".p2align 4 \n"
210
+ "1: movq (%4,%2,8),%0 \n"
211
+ " adcq (%5,%2,8),%0 \n"
212
+ " movq %0,(%3,%2,8) \n"
213
+ " lea 1(%2),%2 \n"
214
+ " loop 1b \n"
215
+ " sbbq %0,%0 \n"
216
+ : "=&r"(ret), "+c"(n), "+r"(i)
217
+ : "r"(rp), "r"(ap), "r"(bp)
218
+ : "cc", "memory");
219
+
220
+ return ret & 1;
221
+ }
222
+
223
+ #ifndef SIMICS
224
+ BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
225
+ int n) {
226
+ BN_ULONG ret;
227
+ size_t i = 0;
228
+
229
+ if (n <= 0) {
230
+ return 0;
231
+ }
232
+
233
+ asm volatile (
234
+ " subq %0,%0 \n" /* clear borrow */
235
+ " jmp 1f \n"
236
+ ".p2align 4 \n"
237
+ "1: movq (%4,%2,8),%0 \n"
238
+ " sbbq (%5,%2,8),%0 \n"
239
+ " movq %0,(%3,%2,8) \n"
240
+ " lea 1(%2),%2 \n"
241
+ " loop 1b \n"
242
+ " sbbq %0,%0 \n"
243
+ : "=&r"(ret), "+c"(n), "+r"(i)
244
+ : "r"(rp), "r"(ap), "r"(bp)
245
+ : "cc", "memory");
246
+
247
+ return ret & 1;
248
+ }
249
+ #else
250
+ /* Simics 1.4<7 has buggy sbbq:-( */
251
+ #define BN_MASK2 0xffffffffffffffffL
252
+ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) {
253
+ BN_ULONG t1, t2;
254
+ int c = 0;
255
+
256
+ if (n <= 0) {
257
+ return (BN_ULONG)0;
258
+ }
259
+
260
+ for (;;) {
261
+ t1 = a[0];
262
+ t2 = b[0];
263
+ r[0] = (t1 - t2 - c) & BN_MASK2;
264
+ if (t1 != t2) {
265
+ c = (t1 < t2);
266
+ }
267
+ if (--n <= 0) {
268
+ break;
269
+ }
270
+
271
+ t1 = a[1];
272
+ t2 = b[1];
273
+ r[1] = (t1 - t2 - c) & BN_MASK2;
274
+ if (t1 != t2) {
275
+ c = (t1 < t2);
276
+ }
277
+ if (--n <= 0) {
278
+ break;
279
+ }
280
+
281
+ t1 = a[2];
282
+ t2 = b[2];
283
+ r[2] = (t1 - t2 - c) & BN_MASK2;
284
+ if (t1 != t2) {
285
+ c = (t1 < t2);
286
+ }
287
+ if (--n <= 0) {
288
+ break;
289
+ }
290
+
291
+ t1 = a[3];
292
+ t2 = b[3];
293
+ r[3] = (t1 - t2 - c) & BN_MASK2;
294
+ if (t1 != t2) {
295
+ c = (t1 < t2);
296
+ }
297
+ if (--n <= 0) {
298
+ break;
299
+ }
300
+
301
+ a += 4;
302
+ b += 4;
303
+ r += 4;
304
+ }
305
+ return c;
306
+ }
307
+ #endif
308
+
309
+ /* mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0) */
310
+ /* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */
311
+ /* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
312
+ /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0)
313
+ */
314
+
315
+ /* Keep in mind that carrying into high part of multiplication result can not
316
+ * overflow, because it cannot be all-ones. */
317
+ #define mul_add_c(a, b, c0, c1, c2) \
318
+ do { \
319
+ BN_ULONG t1, t2; \
320
+ asm("mulq %3" : "=a"(t1), "=d"(t2) : "a"(a), "m"(b) : "cc"); \
321
+ asm("addq %3,%0; adcq %4,%1; adcq %5,%2" \
322
+ : "+r"(c0), "+r"(c1), "+r"(c2) \
323
+ : "r"(t1), "r"(t2), "g"(0) \
324
+ : "cc"); \
325
+ } while (0)
326
+
327
+ #define sqr_add_c(a, i, c0, c1, c2) \
328
+ do { \
329
+ BN_ULONG t1, t2; \
330
+ asm("mulq %2" : "=a"(t1), "=d"(t2) : "a"(a[i]) : "cc"); \
331
+ asm("addq %3,%0; adcq %4,%1; adcq %5,%2" \
332
+ : "+r"(c0), "+r"(c1), "+r"(c2) \
333
+ : "r"(t1), "r"(t2), "g"(0) \
334
+ : "cc"); \
335
+ } while (0)
336
+
337
+ #define mul_add_c2(a, b, c0, c1, c2) \
338
+ do { \
339
+ BN_ULONG t1, t2; \
340
+ asm("mulq %3" : "=a"(t1), "=d"(t2) : "a"(a), "m"(b) : "cc"); \
341
+ asm("addq %3,%0; adcq %4,%1; adcq %5,%2" \
342
+ : "+r"(c0), "+r"(c1), "+r"(c2) \
343
+ : "r"(t1), "r"(t2), "g"(0) \
344
+ : "cc"); \
345
+ asm("addq %3,%0; adcq %4,%1; adcq %5,%2" \
346
+ : "+r"(c0), "+r"(c1), "+r"(c2) \
347
+ : "r"(t1), "r"(t2), "g"(0) \
348
+ : "cc"); \
349
+ } while (0)
350
+
351
+ #define sqr_add_c2(a, i, j, c0, c1, c2) mul_add_c2((a)[i], (a)[j], c0, c1, c2)
352
+
353
+ void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) {
354
+ BN_ULONG c1, c2, c3;
355
+
356
+ c1 = 0;
357
+ c2 = 0;
358
+ c3 = 0;
359
+ mul_add_c(a[0], b[0], c1, c2, c3);
360
+ r[0] = c1;
361
+ c1 = 0;
362
+ mul_add_c(a[0], b[1], c2, c3, c1);
363
+ mul_add_c(a[1], b[0], c2, c3, c1);
364
+ r[1] = c2;
365
+ c2 = 0;
366
+ mul_add_c(a[2], b[0], c3, c1, c2);
367
+ mul_add_c(a[1], b[1], c3, c1, c2);
368
+ mul_add_c(a[0], b[2], c3, c1, c2);
369
+ r[2] = c3;
370
+ c3 = 0;
371
+ mul_add_c(a[0], b[3], c1, c2, c3);
372
+ mul_add_c(a[1], b[2], c1, c2, c3);
373
+ mul_add_c(a[2], b[1], c1, c2, c3);
374
+ mul_add_c(a[3], b[0], c1, c2, c3);
375
+ r[3] = c1;
376
+ c1 = 0;
377
+ mul_add_c(a[4], b[0], c2, c3, c1);
378
+ mul_add_c(a[3], b[1], c2, c3, c1);
379
+ mul_add_c(a[2], b[2], c2, c3, c1);
380
+ mul_add_c(a[1], b[3], c2, c3, c1);
381
+ mul_add_c(a[0], b[4], c2, c3, c1);
382
+ r[4] = c2;
383
+ c2 = 0;
384
+ mul_add_c(a[0], b[5], c3, c1, c2);
385
+ mul_add_c(a[1], b[4], c3, c1, c2);
386
+ mul_add_c(a[2], b[3], c3, c1, c2);
387
+ mul_add_c(a[3], b[2], c3, c1, c2);
388
+ mul_add_c(a[4], b[1], c3, c1, c2);
389
+ mul_add_c(a[5], b[0], c3, c1, c2);
390
+ r[5] = c3;
391
+ c3 = 0;
392
+ mul_add_c(a[6], b[0], c1, c2, c3);
393
+ mul_add_c(a[5], b[1], c1, c2, c3);
394
+ mul_add_c(a[4], b[2], c1, c2, c3);
395
+ mul_add_c(a[3], b[3], c1, c2, c3);
396
+ mul_add_c(a[2], b[4], c1, c2, c3);
397
+ mul_add_c(a[1], b[5], c1, c2, c3);
398
+ mul_add_c(a[0], b[6], c1, c2, c3);
399
+ r[6] = c1;
400
+ c1 = 0;
401
+ mul_add_c(a[0], b[7], c2, c3, c1);
402
+ mul_add_c(a[1], b[6], c2, c3, c1);
403
+ mul_add_c(a[2], b[5], c2, c3, c1);
404
+ mul_add_c(a[3], b[4], c2, c3, c1);
405
+ mul_add_c(a[4], b[3], c2, c3, c1);
406
+ mul_add_c(a[5], b[2], c2, c3, c1);
407
+ mul_add_c(a[6], b[1], c2, c3, c1);
408
+ mul_add_c(a[7], b[0], c2, c3, c1);
409
+ r[7] = c2;
410
+ c2 = 0;
411
+ mul_add_c(a[7], b[1], c3, c1, c2);
412
+ mul_add_c(a[6], b[2], c3, c1, c2);
413
+ mul_add_c(a[5], b[3], c3, c1, c2);
414
+ mul_add_c(a[4], b[4], c3, c1, c2);
415
+ mul_add_c(a[3], b[5], c3, c1, c2);
416
+ mul_add_c(a[2], b[6], c3, c1, c2);
417
+ mul_add_c(a[1], b[7], c3, c1, c2);
418
+ r[8] = c3;
419
+ c3 = 0;
420
+ mul_add_c(a[2], b[7], c1, c2, c3);
421
+ mul_add_c(a[3], b[6], c1, c2, c3);
422
+ mul_add_c(a[4], b[5], c1, c2, c3);
423
+ mul_add_c(a[5], b[4], c1, c2, c3);
424
+ mul_add_c(a[6], b[3], c1, c2, c3);
425
+ mul_add_c(a[7], b[2], c1, c2, c3);
426
+ r[9] = c1;
427
+ c1 = 0;
428
+ mul_add_c(a[7], b[3], c2, c3, c1);
429
+ mul_add_c(a[6], b[4], c2, c3, c1);
430
+ mul_add_c(a[5], b[5], c2, c3, c1);
431
+ mul_add_c(a[4], b[6], c2, c3, c1);
432
+ mul_add_c(a[3], b[7], c2, c3, c1);
433
+ r[10] = c2;
434
+ c2 = 0;
435
+ mul_add_c(a[4], b[7], c3, c1, c2);
436
+ mul_add_c(a[5], b[6], c3, c1, c2);
437
+ mul_add_c(a[6], b[5], c3, c1, c2);
438
+ mul_add_c(a[7], b[4], c3, c1, c2);
439
+ r[11] = c3;
440
+ c3 = 0;
441
+ mul_add_c(a[7], b[5], c1, c2, c3);
442
+ mul_add_c(a[6], b[6], c1, c2, c3);
443
+ mul_add_c(a[5], b[7], c1, c2, c3);
444
+ r[12] = c1;
445
+ c1 = 0;
446
+ mul_add_c(a[6], b[7], c2, c3, c1);
447
+ mul_add_c(a[7], b[6], c2, c3, c1);
448
+ r[13] = c2;
449
+ c2 = 0;
450
+ mul_add_c(a[7], b[7], c3, c1, c2);
451
+ r[14] = c3;
452
+ r[15] = c1;
453
+ }
454
+
455
+ void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) {
456
+ BN_ULONG c1, c2, c3;
457
+
458
+ c1 = 0;
459
+ c2 = 0;
460
+ c3 = 0;
461
+ mul_add_c(a[0], b[0], c1, c2, c3);
462
+ r[0] = c1;
463
+ c1 = 0;
464
+ mul_add_c(a[0], b[1], c2, c3, c1);
465
+ mul_add_c(a[1], b[0], c2, c3, c1);
466
+ r[1] = c2;
467
+ c2 = 0;
468
+ mul_add_c(a[2], b[0], c3, c1, c2);
469
+ mul_add_c(a[1], b[1], c3, c1, c2);
470
+ mul_add_c(a[0], b[2], c3, c1, c2);
471
+ r[2] = c3;
472
+ c3 = 0;
473
+ mul_add_c(a[0], b[3], c1, c2, c3);
474
+ mul_add_c(a[1], b[2], c1, c2, c3);
475
+ mul_add_c(a[2], b[1], c1, c2, c3);
476
+ mul_add_c(a[3], b[0], c1, c2, c3);
477
+ r[3] = c1;
478
+ c1 = 0;
479
+ mul_add_c(a[3], b[1], c2, c3, c1);
480
+ mul_add_c(a[2], b[2], c2, c3, c1);
481
+ mul_add_c(a[1], b[3], c2, c3, c1);
482
+ r[4] = c2;
483
+ c2 = 0;
484
+ mul_add_c(a[2], b[3], c3, c1, c2);
485
+ mul_add_c(a[3], b[2], c3, c1, c2);
486
+ r[5] = c3;
487
+ c3 = 0;
488
+ mul_add_c(a[3], b[3], c1, c2, c3);
489
+ r[6] = c1;
490
+ r[7] = c2;
491
+ }
492
+
493
+ void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) {
494
+ BN_ULONG c1, c2, c3;
495
+
496
+ c1 = 0;
497
+ c2 = 0;
498
+ c3 = 0;
499
+ sqr_add_c(a, 0, c1, c2, c3);
500
+ r[0] = c1;
501
+ c1 = 0;
502
+ sqr_add_c2(a, 1, 0, c2, c3, c1);
503
+ r[1] = c2;
504
+ c2 = 0;
505
+ sqr_add_c(a, 1, c3, c1, c2);
506
+ sqr_add_c2(a, 2, 0, c3, c1, c2);
507
+ r[2] = c3;
508
+ c3 = 0;
509
+ sqr_add_c2(a, 3, 0, c1, c2, c3);
510
+ sqr_add_c2(a, 2, 1, c1, c2, c3);
511
+ r[3] = c1;
512
+ c1 = 0;
513
+ sqr_add_c(a, 2, c2, c3, c1);
514
+ sqr_add_c2(a, 3, 1, c2, c3, c1);
515
+ sqr_add_c2(a, 4, 0, c2, c3, c1);
516
+ r[4] = c2;
517
+ c2 = 0;
518
+ sqr_add_c2(a, 5, 0, c3, c1, c2);
519
+ sqr_add_c2(a, 4, 1, c3, c1, c2);
520
+ sqr_add_c2(a, 3, 2, c3, c1, c2);
521
+ r[5] = c3;
522
+ c3 = 0;
523
+ sqr_add_c(a, 3, c1, c2, c3);
524
+ sqr_add_c2(a, 4, 2, c1, c2, c3);
525
+ sqr_add_c2(a, 5, 1, c1, c2, c3);
526
+ sqr_add_c2(a, 6, 0, c1, c2, c3);
527
+ r[6] = c1;
528
+ c1 = 0;
529
+ sqr_add_c2(a, 7, 0, c2, c3, c1);
530
+ sqr_add_c2(a, 6, 1, c2, c3, c1);
531
+ sqr_add_c2(a, 5, 2, c2, c3, c1);
532
+ sqr_add_c2(a, 4, 3, c2, c3, c1);
533
+ r[7] = c2;
534
+ c2 = 0;
535
+ sqr_add_c(a, 4, c3, c1, c2);
536
+ sqr_add_c2(a, 5, 3, c3, c1, c2);
537
+ sqr_add_c2(a, 6, 2, c3, c1, c2);
538
+ sqr_add_c2(a, 7, 1, c3, c1, c2);
539
+ r[8] = c3;
540
+ c3 = 0;
541
+ sqr_add_c2(a, 7, 2, c1, c2, c3);
542
+ sqr_add_c2(a, 6, 3, c1, c2, c3);
543
+ sqr_add_c2(a, 5, 4, c1, c2, c3);
544
+ r[9] = c1;
545
+ c1 = 0;
546
+ sqr_add_c(a, 5, c2, c3, c1);
547
+ sqr_add_c2(a, 6, 4, c2, c3, c1);
548
+ sqr_add_c2(a, 7, 3, c2, c3, c1);
549
+ r[10] = c2;
550
+ c2 = 0;
551
+ sqr_add_c2(a, 7, 4, c3, c1, c2);
552
+ sqr_add_c2(a, 6, 5, c3, c1, c2);
553
+ r[11] = c3;
554
+ c3 = 0;
555
+ sqr_add_c(a, 6, c1, c2, c3);
556
+ sqr_add_c2(a, 7, 5, c1, c2, c3);
557
+ r[12] = c1;
558
+ c1 = 0;
559
+ sqr_add_c2(a, 7, 6, c2, c3, c1);
560
+ r[13] = c2;
561
+ c2 = 0;
562
+ sqr_add_c(a, 7, c3, c1, c2);
563
+ r[14] = c3;
564
+ r[15] = c1;
565
+ }
566
+
567
+ void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) {
568
+ BN_ULONG c1, c2, c3;
569
+
570
+ c1 = 0;
571
+ c2 = 0;
572
+ c3 = 0;
573
+ sqr_add_c(a, 0, c1, c2, c3);
574
+ r[0] = c1;
575
+ c1 = 0;
576
+ sqr_add_c2(a, 1, 0, c2, c3, c1);
577
+ r[1] = c2;
578
+ c2 = 0;
579
+ sqr_add_c(a, 1, c3, c1, c2);
580
+ sqr_add_c2(a, 2, 0, c3, c1, c2);
581
+ r[2] = c3;
582
+ c3 = 0;
583
+ sqr_add_c2(a, 3, 0, c1, c2, c3);
584
+ sqr_add_c2(a, 2, 1, c1, c2, c3);
585
+ r[3] = c1;
586
+ c1 = 0;
587
+ sqr_add_c(a, 2, c2, c3, c1);
588
+ sqr_add_c2(a, 3, 1, c2, c3, c1);
589
+ r[4] = c2;
590
+ c2 = 0;
591
+ sqr_add_c2(a, 3, 2, c3, c1, c2);
592
+ r[5] = c3;
593
+ c3 = 0;
594
+ sqr_add_c(a, 3, c1, c2, c3);
595
+ r[6] = c1;
596
+ r[7] = c2;
597
+ }
598
+
599
+ #endif /* !NO_ASM && X86_64 && !WINDOWS */