aws-crt 0.1.9 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (581) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/VERSION +1 -1
  4. data/aws-crt-ffi/crt/aws-c-auth/include/aws/auth/auth.h +1 -0
  5. data/aws-crt-ffi/crt/aws-c-auth/include/aws/auth/aws_imds_client.h +5 -0
  6. data/aws-crt-ffi/crt/aws-c-auth/include/aws/auth/credentials.h +5 -0
  7. data/aws-crt-ffi/crt/aws-c-auth/include/aws/auth/private/aws_signing.h +1 -0
  8. data/aws-crt-ffi/crt/aws-c-auth/include/aws/auth/private/credentials_utils.h +2 -0
  9. data/aws-crt-ffi/crt/aws-c-auth/include/aws/auth/signing_config.h +1 -0
  10. data/aws-crt-ffi/crt/aws-c-auth/source/auth.c +3 -1
  11. data/aws-crt-ffi/crt/aws-c-auth/source/aws_imds_client.c +146 -63
  12. data/aws-crt-ffi/crt/aws-c-auth/source/aws_signing.c +41 -19
  13. data/aws-crt-ffi/crt/aws-c-auth/source/credentials_provider_imds.c +1 -0
  14. data/aws-crt-ffi/crt/aws-c-auth/source/credentials_utils.c +1 -0
  15. data/aws-crt-ffi/crt/aws-c-auth/source/signable_http_request.c +2 -1
  16. data/aws-crt-ffi/crt/aws-c-auth/source/signing_config.c +25 -0
  17. data/aws-crt-ffi/crt/aws-c-auth/tests/CMakeLists.txt +3 -0
  18. data/aws-crt-ffi/crt/aws-c-auth/tests/aws_imds_client_test.c +197 -31
  19. data/aws-crt-ffi/crt/aws-c-auth/tests/credentials_provider_imds_tests.c +16 -18
  20. data/aws-crt-ffi/crt/aws-c-auth/tests/sigv4_signing_tests.c +3 -1
  21. data/aws-crt-ffi/crt/aws-c-cal/include/aws/cal/private/opensslcrypto_common.h +22 -0
  22. data/aws-crt-ffi/crt/aws-c-cal/source/darwin/commoncrypto_aes.c +46 -17
  23. data/aws-crt-ffi/crt/aws-c-cal/source/unix/openssl_aes.c +1 -0
  24. data/aws-crt-ffi/crt/aws-c-cal/source/unix/openssl_platform_init.c +7 -0
  25. data/aws-crt-ffi/crt/aws-c-cal/source/unix/openssl_rsa.c +59 -2
  26. data/aws-crt-ffi/crt/aws-c-cal/source/unix/opensslcrypto_ecc.c +1 -0
  27. data/aws-crt-ffi/crt/aws-c-common/CMakeLists.txt +13 -1
  28. data/aws-crt-ffi/crt/aws-c-common/THIRD-PARTY-LICENSES.txt +28 -7
  29. data/aws-crt-ffi/crt/aws-c-common/bin/system_info/CMakeLists.txt +18 -0
  30. data/aws-crt-ffi/crt/aws-c-common/bin/system_info/print_system_info.c +48 -0
  31. data/aws-crt-ffi/crt/aws-c-common/include/aws/common/allocator.h +23 -0
  32. data/aws-crt-ffi/crt/aws-c-common/include/aws/common/byte_buf.h +12 -0
  33. data/aws-crt-ffi/crt/aws-c-common/include/aws/common/cross_process_lock.h +35 -0
  34. data/aws-crt-ffi/crt/aws-c-common/include/aws/common/hash_table.h +1 -0
  35. data/aws-crt-ffi/crt/aws-c-common/include/aws/common/priority_queue.h +24 -0
  36. data/aws-crt-ffi/crt/aws-c-common/include/aws/common/private/system_info_priv.h +37 -0
  37. data/aws-crt-ffi/crt/aws-c-common/include/aws/common/system_info.h +47 -0
  38. data/aws-crt-ffi/crt/aws-c-common/include/aws/common/system_resource_util.h +30 -0
  39. data/aws-crt-ffi/crt/aws-c-common/include/aws/testing/aws_test_harness.h +3 -2
  40. data/aws-crt-ffi/crt/aws-c-common/source/allocator.c +64 -13
  41. data/aws-crt-ffi/crt/aws-c-common/source/android/logging.c +14 -0
  42. data/aws-crt-ffi/crt/aws-c-common/source/common.c +3 -3
  43. data/aws-crt-ffi/crt/aws-c-common/source/file.c +96 -35
  44. data/aws-crt-ffi/crt/aws-c-common/source/linux/system_info.c +24 -0
  45. data/aws-crt-ffi/crt/aws-c-common/source/memtrace.c +10 -3
  46. data/aws-crt-ffi/crt/aws-c-common/source/platform_fallback_stubs/system_info.c +21 -0
  47. data/aws-crt-ffi/crt/aws-c-common/source/posix/cross_process_lock.c +141 -0
  48. data/aws-crt-ffi/crt/aws-c-common/source/posix/system_info.c +1 -1
  49. data/aws-crt-ffi/crt/aws-c-common/source/posix/system_resource_utils.c +32 -0
  50. data/aws-crt-ffi/crt/aws-c-common/source/priority_queue.c +24 -0
  51. data/aws-crt-ffi/crt/aws-c-common/source/system_info.c +80 -0
  52. data/aws-crt-ffi/crt/aws-c-common/source/task_scheduler.c +2 -2
  53. data/aws-crt-ffi/crt/aws-c-common/source/windows/cross_process_lock.c +93 -0
  54. data/aws-crt-ffi/crt/aws-c-common/source/windows/system_resource_utils.c +31 -0
  55. data/aws-crt-ffi/crt/aws-c-common/tests/CMakeLists.txt +16 -0
  56. data/aws-crt-ffi/crt/aws-c-common/tests/alloc_test.c +83 -22
  57. data/aws-crt-ffi/crt/aws-c-common/tests/cross_process_lock_tests.c +116 -0
  58. data/aws-crt-ffi/crt/aws-c-common/tests/file_test.c +103 -0
  59. data/aws-crt-ffi/crt/aws-c-common/tests/priority_queue_test.c +36 -0
  60. data/aws-crt-ffi/crt/aws-c-common/tests/system_info_tests.c +19 -0
  61. data/aws-crt-ffi/crt/aws-c-common/tests/system_resource_util_test.c +37 -0
  62. data/aws-crt-ffi/crt/aws-c-http/include/aws/http/connection.h +9 -0
  63. data/aws-crt-ffi/crt/aws-c-http/include/aws/http/http.h +1 -0
  64. data/aws-crt-ffi/crt/aws-c-http/include/aws/http/private/connection_impl.h +5 -4
  65. data/aws-crt-ffi/crt/aws-c-http/include/aws/http/private/connection_manager_system_vtable.h +10 -18
  66. data/aws-crt-ffi/crt/aws-c-http/include/aws/http/private/proxy_impl.h +5 -1
  67. data/aws-crt-ffi/crt/aws-c-http/include/aws/http/private/request_response_impl.h +5 -0
  68. data/aws-crt-ffi/crt/aws-c-http/include/aws/http/request_response.h +10 -0
  69. data/aws-crt-ffi/crt/aws-c-http/source/connection.c +5 -2
  70. data/aws-crt-ffi/crt/aws-c-http/source/connection_manager.c +22 -21
  71. data/aws-crt-ffi/crt/aws-c-http/source/h1_connection.c +102 -17
  72. data/aws-crt-ffi/crt/aws-c-http/source/h1_stream.c +1 -0
  73. data/aws-crt-ffi/crt/aws-c-http/source/http.c +3 -0
  74. data/aws-crt-ffi/crt/aws-c-http/source/proxy_connection.c +2 -2
  75. data/aws-crt-ffi/crt/aws-c-http/tests/CMakeLists.txt +2 -0
  76. data/aws-crt-ffi/crt/aws-c-http/tests/test_connection_manager.c +18 -18
  77. data/aws-crt-ffi/crt/aws-c-http/tests/test_h1_client.c +111 -1
  78. data/aws-crt-ffi/crt/aws-c-http/tests/test_proxy.c +2 -2
  79. data/aws-crt-ffi/crt/aws-c-http/tests/test_stream_manager.c +2 -2
  80. data/aws-crt-ffi/crt/aws-c-io/include/aws/io/retry_strategy.h +1 -1
  81. data/aws-crt-ffi/crt/aws-c-io/source/exponential_backoff_retry_strategy.c +1 -1
  82. data/aws-crt-ffi/crt/aws-c-io/source/pkcs11_tls_op_handler.c +2 -4
  83. data/aws-crt-ffi/crt/aws-lc/CMakeLists.txt +16 -8
  84. data/aws-crt-ffi/crt/aws-lc/cmake/go.cmake +6 -0
  85. data/aws-crt-ffi/crt/aws-lc/crypto/CMakeLists.txt +6 -9
  86. data/aws-crt-ffi/crt/aws-lc/crypto/asn1/a_time.c +34 -1
  87. data/aws-crt-ffi/crt/aws-lc/crypto/asn1/a_utctm.c +4 -1
  88. data/aws-crt-ffi/crt/aws-lc/crypto/asn1/asn1_test.cc +41 -0
  89. data/aws-crt-ffi/crt/aws-lc/crypto/bio/bio_mem.c +6 -7
  90. data/aws-crt-ffi/crt/aws-lc/crypto/bio/bio_test.cc +152 -16
  91. data/aws-crt-ffi/crt/aws-lc/crypto/bio/connect.c +6 -12
  92. data/aws-crt-ffi/crt/aws-lc/crypto/bio/fd.c +2 -2
  93. data/aws-crt-ffi/crt/aws-lc/crypto/bio/file.c +20 -8
  94. data/aws-crt-ffi/crt/aws-lc/crypto/bio/socket.c +2 -2
  95. data/aws-crt-ffi/crt/aws-lc/crypto/bio/socket_helper.c +2 -2
  96. data/aws-crt-ffi/crt/aws-lc/crypto/blake2/blake2.c +11 -1
  97. data/aws-crt-ffi/crt/aws-lc/crypto/bytestring/cbb.c +13 -3
  98. data/aws-crt-ffi/crt/aws-lc/crypto/bytestring/cbs.c +9 -0
  99. data/aws-crt-ffi/crt/aws-lc/crypto/chacha/asm/chacha-armv8.pl +1 -1
  100. data/aws-crt-ffi/crt/aws-lc/crypto/chacha/chacha.c +49 -8
  101. data/aws-crt-ffi/crt/aws-lc/crypto/chacha/chacha_test.cc +110 -0
  102. data/aws-crt-ffi/crt/aws-lc/crypto/chacha/internal.h +8 -1
  103. data/aws-crt-ffi/crt/aws-lc/crypto/compiler_test.cc +4 -1
  104. data/aws-crt-ffi/crt/aws-lc/crypto/conf/conf_test.cc +1 -0
  105. data/aws-crt-ffi/crt/aws-lc/crypto/crypto_test.cc +9 -0
  106. data/aws-crt-ffi/crt/aws-lc/crypto/curve25519/curve25519.c +189 -108
  107. data/aws-crt-ffi/crt/aws-lc/crypto/curve25519/curve25519_nohw.c +78 -6
  108. data/aws-crt-ffi/crt/aws-lc/crypto/curve25519/ed25519_test.cc +9 -0
  109. data/aws-crt-ffi/crt/aws-lc/crypto/curve25519/internal.h +24 -10
  110. data/aws-crt-ffi/crt/aws-lc/crypto/curve25519/spake25519.c +4 -4
  111. data/aws-crt-ffi/crt/aws-lc/crypto/curve25519/x25519_test.cc +80 -11
  112. data/aws-crt-ffi/crt/aws-lc/crypto/decrepit/evp/evp_do_all.c +2 -0
  113. data/aws-crt-ffi/crt/aws-lc/crypto/digest_extra/digest_extra.c +8 -0
  114. data/aws-crt-ffi/crt/aws-lc/crypto/digest_extra/digest_test.cc +110 -45
  115. data/aws-crt-ffi/crt/aws-lc/crypto/dsa/dsa_test.cc +8 -2
  116. data/aws-crt-ffi/crt/aws-lc/crypto/dsa/internal.h +18 -0
  117. data/aws-crt-ffi/crt/aws-lc/crypto/dynamic_loading_test.c +8 -5
  118. data/aws-crt-ffi/crt/aws-lc/crypto/ec_extra/ec_derive.c +4 -3
  119. data/aws-crt-ffi/crt/aws-lc/crypto/ec_extra/hash_to_curve.c +6 -18
  120. data/aws-crt-ffi/crt/aws-lc/crypto/endian_test.cc +308 -0
  121. data/aws-crt-ffi/crt/aws-lc/crypto/err/ssl.errordata +2 -0
  122. data/aws-crt-ffi/crt/aws-lc/crypto/evp_extra/evp_extra_test.cc +2 -0
  123. data/aws-crt-ffi/crt/aws-lc/crypto/evp_extra/evp_test.cc +11 -1
  124. data/aws-crt-ffi/crt/aws-lc/crypto/evp_extra/evp_tests.txt +25 -0
  125. data/aws-crt-ffi/crt/aws-lc/crypto/evp_extra/p_ec_asn1.c +1 -1
  126. data/aws-crt-ffi/crt/aws-lc/crypto/evp_extra/p_kem.c +2 -2
  127. data/aws-crt-ffi/crt/aws-lc/crypto/evp_extra/p_rsa_asn1.c +1 -0
  128. data/aws-crt-ffi/crt/aws-lc/crypto/evp_extra/print.c +7 -6
  129. data/aws-crt-ffi/crt/aws-lc/crypto/evp_extra/scrypt.c +13 -1
  130. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/CMakeLists.txt +13 -4
  131. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/aes/aes_nohw.c +18 -6
  132. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/bcm.c +12 -4
  133. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/bn/bn_assert_test.cc +77 -0
  134. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/bn/bn_test.cc +30 -0
  135. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/bn/bytes.c +112 -22
  136. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/bn/div.c +12 -5
  137. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/bn/exponentiation.c +54 -1
  138. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/bn/gcd.c +5 -6
  139. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/bn/internal.h +37 -15
  140. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/bn/montgomery.c +4 -11
  141. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/bn/montgomery_inv.c +51 -15
  142. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/cipher/aead.c +2 -2
  143. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/digest/digest.c +29 -6
  144. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/digest/digests.c +89 -0
  145. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/digest/internal.h +4 -0
  146. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/ec/ec.c +19 -36
  147. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/ec/ec_key.c +3 -3
  148. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/ec/ec_montgomery.c +9 -7
  149. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/ec/ec_test.cc +33 -9
  150. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/ec/internal.h +17 -12
  151. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/ec/p224-64.c +5 -8
  152. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/ec/p256-nistz.c +8 -8
  153. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/ec/p256.c +9 -8
  154. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/ec/p384.c +33 -16
  155. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/ec/p521.c +14 -6
  156. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/ec/scalar.c +26 -24
  157. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/ec/simple_mul.c +8 -5
  158. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/ec/wnaf.c +3 -3
  159. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/ecdsa/ecdsa.c +9 -3
  160. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/evp/evp.c +43 -12
  161. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/evp/p_ec.c +4 -3
  162. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/hmac/hmac.c +3 -1
  163. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/modes/xts.c +26 -3
  164. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/rand/cpu_jitter_test.cc +1 -1
  165. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/rand/internal.h +20 -11
  166. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/rand/rand.c +10 -10
  167. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/rand/urandom.c +2 -2
  168. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/rsa/internal.h +59 -0
  169. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/rsa/padding.c +9 -3
  170. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/rsa/rsa.c +7 -0
  171. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/rsa/rsa_impl.c +51 -60
  172. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/service_indicator/service_indicator.c +5 -2
  173. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/service_indicator/service_indicator_test.cc +205 -5
  174. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/sha/asm/sha1-armv8.pl +1 -1
  175. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/sha/asm/sha512-armv8.pl +1 -1
  176. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/sha/internal.h +8 -0
  177. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/sha/sha3.c +37 -15
  178. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/sha/sha3_test.cc +115 -110
  179. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/sha/sha512.c +55 -1
  180. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/sshkdf/sshkdf.c +2 -2
  181. data/aws-crt-ffi/crt/aws-lc/crypto/hmac_extra/hmac_test.cc +12 -0
  182. data/aws-crt-ffi/crt/aws-lc/crypto/hmac_extra/hmac_tests.txt +10 -0
  183. data/aws-crt-ffi/crt/aws-lc/crypto/hrss/asm/poly_rq_mul.S +2 -6
  184. data/aws-crt-ffi/crt/aws-lc/crypto/impl_dispatch_test.cc +9 -1
  185. data/aws-crt-ffi/crt/aws-lc/crypto/internal.h +90 -8
  186. data/aws-crt-ffi/crt/aws-lc/crypto/kem/kem.c +28 -27
  187. data/aws-crt-ffi/crt/aws-lc/crypto/kyber/kem_kyber.h +14 -0
  188. data/aws-crt-ffi/crt/aws-lc/crypto/obj/obj_dat.h +52 -2
  189. data/aws-crt-ffi/crt/aws-lc/crypto/obj/obj_mac.num +5 -0
  190. data/aws-crt-ffi/crt/aws-lc/crypto/obj/objects.txt +7 -0
  191. data/aws-crt-ffi/crt/aws-lc/crypto/perlasm/arm-xlate.pl +3 -14
  192. data/aws-crt-ffi/crt/aws-lc/crypto/perlasm/ppc-xlate.pl +1 -5
  193. data/aws-crt-ffi/crt/aws-lc/crypto/perlasm/x86_64-xlate.pl +4 -15
  194. data/aws-crt-ffi/crt/aws-lc/crypto/perlasm/x86asm.pl +4 -13
  195. data/aws-crt-ffi/crt/aws-lc/crypto/poly1305/poly1305_arm_asm.S +3 -13
  196. data/aws-crt-ffi/crt/aws-lc/crypto/rand_extra/deterministic.c +4 -3
  197. data/aws-crt-ffi/crt/aws-lc/crypto/rand_extra/fuchsia.c +4 -4
  198. data/aws-crt-ffi/crt/aws-lc/crypto/rand_extra/rand_test.cc +0 -63
  199. data/aws-crt-ffi/crt/aws-lc/crypto/rand_extra/windows.c +41 -19
  200. data/aws-crt-ffi/crt/aws-lc/crypto/rsa_extra/rsa_test.cc +3 -3
  201. data/aws-crt-ffi/crt/aws-lc/crypto/siphash/siphash.c +12 -5
  202. data/aws-crt-ffi/crt/aws-lc/crypto/siphash/siphash_test.cc +5 -5
  203. data/aws-crt-ffi/crt/aws-lc/crypto/stack/stack.c +68 -46
  204. data/aws-crt-ffi/crt/aws-lc/crypto/trust_token/pmbtoken.c +4 -4
  205. data/aws-crt-ffi/crt/aws-lc/crypto/trust_token/voprf.c +2 -2
  206. data/aws-crt-ffi/crt/aws-lc/crypto/x509/by_dir.c +0 -6
  207. data/aws-crt-ffi/crt/aws-lc/crypto/x509/internal.h +4 -1
  208. data/aws-crt-ffi/crt/aws-lc/crypto/x509/x509_lu.c +33 -9
  209. data/aws-crt-ffi/crt/aws-lc/crypto/x509/x509_test.cc +87 -0
  210. data/aws-crt-ffi/crt/aws-lc/crypto/x509/x509_trs.c +1 -1
  211. data/aws-crt-ffi/crt/aws-lc/crypto/x509/x509_vfy.c +35 -13
  212. data/aws-crt-ffi/crt/aws-lc/crypto/x509v3/v3_lib.c +2 -0
  213. data/aws-crt-ffi/crt/aws-lc/crypto/x509v3/v3_purp.c +4 -6
  214. data/aws-crt-ffi/crt/aws-lc/generated-src/crypto_test_data.cc +179 -151
  215. data/aws-crt-ffi/crt/aws-lc/generated-src/err_data.c +353 -349
  216. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-aarch64/crypto/chacha/chacha-armv8.S +4 -14
  217. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-aarch64/crypto/cipher_extra/chacha20_poly1305_armv8.S +4 -14
  218. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-aarch64/crypto/fipsmodule/aesv8-armx.S +3 -13
  219. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-aarch64/crypto/fipsmodule/aesv8-gcm-armv8-unroll8.S +3 -13
  220. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-aarch64/crypto/fipsmodule/aesv8-gcm-armv8.S +3 -13
  221. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-aarch64/crypto/fipsmodule/armv8-mont.S +4 -14
  222. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-aarch64/crypto/fipsmodule/bn-armv8.S +4 -14
  223. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-aarch64/crypto/fipsmodule/ghash-neon-armv8.S +4 -14
  224. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-aarch64/crypto/fipsmodule/ghashv8-armx.S +3 -13
  225. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-aarch64/crypto/fipsmodule/keccak1600-armv8.S +3 -13
  226. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-aarch64/crypto/fipsmodule/md5-armv8.S +3 -13
  227. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-aarch64/crypto/fipsmodule/p256-armv8-asm.S +4 -14
  228. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-aarch64/crypto/fipsmodule/p256_beeu-armv8-asm.S +4 -14
  229. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-aarch64/crypto/fipsmodule/sha1-armv8.S +4 -14
  230. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-aarch64/crypto/fipsmodule/sha256-armv8.S +4 -14
  231. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-aarch64/crypto/fipsmodule/sha512-armv8.S +4 -14
  232. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-aarch64/crypto/fipsmodule/vpaes-armv8.S +3 -13
  233. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-aarch64/crypto/test/trampoline-armv8.S +4 -14
  234. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-arm/crypto/chacha/chacha-armv4.S +3 -13
  235. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-arm/crypto/fipsmodule/aesv8-armx.S +3 -13
  236. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-arm/crypto/fipsmodule/armv4-mont.S +3 -13
  237. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-arm/crypto/fipsmodule/bsaes-armv7.S +3 -13
  238. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-arm/crypto/fipsmodule/ghash-armv4.S +3 -13
  239. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-arm/crypto/fipsmodule/ghashv8-armx.S +3 -13
  240. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-arm/crypto/fipsmodule/sha1-armv4-large.S +3 -13
  241. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-arm/crypto/fipsmodule/sha256-armv4.S +3 -13
  242. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-arm/crypto/fipsmodule/sha512-armv4.S +3 -13
  243. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-arm/crypto/fipsmodule/vpaes-armv7.S +3 -13
  244. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-arm/crypto/test/trampoline-armv4.S +3 -13
  245. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-aarch64/crypto/chacha/chacha-armv8.S +4 -14
  246. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-aarch64/crypto/cipher_extra/chacha20_poly1305_armv8.S +4 -14
  247. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-aarch64/crypto/fipsmodule/aesv8-armx.S +3 -13
  248. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-aarch64/crypto/fipsmodule/aesv8-gcm-armv8-unroll8.S +3 -13
  249. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-aarch64/crypto/fipsmodule/aesv8-gcm-armv8.S +3 -13
  250. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-aarch64/crypto/fipsmodule/armv8-mont.S +4 -14
  251. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-aarch64/crypto/fipsmodule/bn-armv8.S +3 -13
  252. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-aarch64/crypto/fipsmodule/ghash-neon-armv8.S +3 -13
  253. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-aarch64/crypto/fipsmodule/ghashv8-armx.S +3 -13
  254. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-aarch64/crypto/fipsmodule/keccak1600-armv8.S +3 -13
  255. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-aarch64/crypto/fipsmodule/md5-armv8.S +3 -13
  256. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-aarch64/crypto/fipsmodule/p256-armv8-asm.S +4 -14
  257. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-aarch64/crypto/fipsmodule/p256_beeu-armv8-asm.S +4 -14
  258. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-aarch64/crypto/fipsmodule/sha1-armv8.S +4 -14
  259. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-aarch64/crypto/fipsmodule/sha256-armv8.S +4 -14
  260. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-aarch64/crypto/fipsmodule/sha512-armv8.S +4 -14
  261. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-aarch64/crypto/fipsmodule/vpaes-armv8.S +3 -13
  262. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-aarch64/crypto/test/trampoline-armv8.S +3 -13
  263. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-arm/crypto/chacha/chacha-armv4.S +3 -13
  264. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-arm/crypto/fipsmodule/aesv8-armx.S +3 -13
  265. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-arm/crypto/fipsmodule/armv4-mont.S +3 -13
  266. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-arm/crypto/fipsmodule/bsaes-armv7.S +3 -13
  267. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-arm/crypto/fipsmodule/ghash-armv4.S +3 -13
  268. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-arm/crypto/fipsmodule/ghashv8-armx.S +3 -13
  269. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-arm/crypto/fipsmodule/sha1-armv4-large.S +3 -13
  270. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-arm/crypto/fipsmodule/sha256-armv4.S +3 -13
  271. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-arm/crypto/fipsmodule/sha512-armv4.S +3 -13
  272. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-arm/crypto/fipsmodule/vpaes-armv7.S +3 -13
  273. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-arm/crypto/test/trampoline-armv4.S +3 -13
  274. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-ppc64le/crypto/fipsmodule/aesp8-ppc.S +1 -5
  275. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-ppc64le/crypto/fipsmodule/ghashp8-ppc.S +1 -5
  276. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-ppc64le/crypto/test/trampoline-ppc.S +1 -5
  277. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86/crypto/chacha/chacha-x86.S +3 -12
  278. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86/crypto/fipsmodule/aesni-x86.S +3 -12
  279. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86/crypto/fipsmodule/bn-586.S +4 -13
  280. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86/crypto/fipsmodule/co-586.S +4 -13
  281. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86/crypto/fipsmodule/ghash-ssse3-x86.S +3 -12
  282. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86/crypto/fipsmodule/ghash-x86.S +3 -12
  283. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86/crypto/fipsmodule/md5-586.S +4 -13
  284. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86/crypto/fipsmodule/sha1-586.S +4 -13
  285. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86/crypto/fipsmodule/sha256-586.S +3 -12
  286. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86/crypto/fipsmodule/sha512-586.S +3 -12
  287. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86/crypto/fipsmodule/vpaes-x86.S +3 -12
  288. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86/crypto/fipsmodule/x86-mont.S +3 -12
  289. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86/crypto/test/trampoline-x86.S +3 -12
  290. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86_64/crypto/chacha/chacha-x86_64.S +2 -11
  291. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S +2 -11
  292. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86_64/crypto/cipher_extra/aesni-sha1-x86_64.S +2 -11
  293. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86_64/crypto/cipher_extra/aesni-sha256-x86_64.S +2 -11
  294. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S +2 -11
  295. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86_64/crypto/fipsmodule/aesni-gcm-avx512.S +2 -11
  296. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S +2 -11
  297. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86_64/crypto/fipsmodule/aesni-x86_64.S +2 -11
  298. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86_64/crypto/fipsmodule/aesni-xts-avx512.S +2 -11
  299. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S +2 -11
  300. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86_64/crypto/fipsmodule/ghash-x86_64.S +2 -11
  301. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86_64/crypto/fipsmodule/md5-x86_64.S +2 -11
  302. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86_64/crypto/fipsmodule/p256-x86_64-asm.S +2 -11
  303. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S +2 -11
  304. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86_64/crypto/fipsmodule/rdrand-x86_64.S +2 -11
  305. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86_64/crypto/fipsmodule/rsaz-avx2.S +2 -11
  306. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86_64/crypto/fipsmodule/sha1-x86_64.S +2 -11
  307. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86_64/crypto/fipsmodule/sha256-x86_64.S +2 -11
  308. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86_64/crypto/fipsmodule/sha512-x86_64.S +2 -11
  309. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86_64/crypto/fipsmodule/vpaes-x86_64.S +2 -11
  310. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86_64/crypto/fipsmodule/x86_64-mont.S +2 -11
  311. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86_64/crypto/fipsmodule/x86_64-mont5.S +2 -11
  312. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86_64/crypto/test/trampoline-x86_64.S +2 -11
  313. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86/crypto/chacha/chacha-x86.S +3 -12
  314. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86/crypto/fipsmodule/aesni-x86.S +3 -12
  315. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86/crypto/fipsmodule/bn-586.S +3 -12
  316. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86/crypto/fipsmodule/co-586.S +3 -12
  317. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86/crypto/fipsmodule/ghash-ssse3-x86.S +3 -12
  318. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86/crypto/fipsmodule/ghash-x86.S +3 -12
  319. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86/crypto/fipsmodule/md5-586.S +3 -12
  320. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86/crypto/fipsmodule/sha1-586.S +3 -12
  321. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86/crypto/fipsmodule/sha256-586.S +3 -12
  322. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86/crypto/fipsmodule/sha512-586.S +3 -12
  323. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86/crypto/fipsmodule/vpaes-x86.S +3 -12
  324. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86/crypto/fipsmodule/x86-mont.S +3 -12
  325. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86/crypto/test/trampoline-x86.S +3 -12
  326. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86_64/crypto/chacha/chacha-x86_64.S +2 -11
  327. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S +2 -11
  328. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86_64/crypto/cipher_extra/aesni-sha1-x86_64.S +2 -11
  329. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86_64/crypto/cipher_extra/aesni-sha256-x86_64.S +2 -11
  330. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S +2 -11
  331. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86_64/crypto/fipsmodule/aesni-gcm-avx512.S +2 -11
  332. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S +2 -11
  333. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86_64/crypto/fipsmodule/aesni-x86_64.S +2 -11
  334. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86_64/crypto/fipsmodule/aesni-xts-avx512.S +2 -11
  335. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S +2 -11
  336. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86_64/crypto/fipsmodule/ghash-x86_64.S +2 -11
  337. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86_64/crypto/fipsmodule/md5-x86_64.S +2 -11
  338. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86_64/crypto/fipsmodule/p256-x86_64-asm.S +2 -11
  339. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S +2 -11
  340. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86_64/crypto/fipsmodule/rdrand-x86_64.S +2 -11
  341. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86_64/crypto/fipsmodule/rsaz-avx2.S +2 -11
  342. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86_64/crypto/fipsmodule/sha1-x86_64.S +2 -11
  343. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86_64/crypto/fipsmodule/sha256-x86_64.S +2 -11
  344. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86_64/crypto/fipsmodule/sha512-x86_64.S +2 -11
  345. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86_64/crypto/fipsmodule/vpaes-x86_64.S +2 -11
  346. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86_64/crypto/fipsmodule/x86_64-mont.S +2 -11
  347. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86_64/crypto/fipsmodule/x86_64-mont5.S +2 -11
  348. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86_64/crypto/test/trampoline-x86_64.S +2 -11
  349. data/aws-crt-ffi/crt/aws-lc/generated-src/win-aarch64/crypto/chacha/chacha-armv8.S +4 -14
  350. data/aws-crt-ffi/crt/aws-lc/generated-src/win-aarch64/crypto/cipher_extra/chacha20_poly1305_armv8.S +4 -14
  351. data/aws-crt-ffi/crt/aws-lc/generated-src/win-aarch64/crypto/fipsmodule/aesv8-armx.S +3 -13
  352. data/aws-crt-ffi/crt/aws-lc/generated-src/win-aarch64/crypto/fipsmodule/aesv8-gcm-armv8-unroll8.S +3 -13
  353. data/aws-crt-ffi/crt/aws-lc/generated-src/win-aarch64/crypto/fipsmodule/aesv8-gcm-armv8.S +3 -13
  354. data/aws-crt-ffi/crt/aws-lc/generated-src/win-aarch64/crypto/fipsmodule/armv8-mont.S +4 -14
  355. data/aws-crt-ffi/crt/aws-lc/generated-src/win-aarch64/crypto/fipsmodule/bn-armv8.S +4 -14
  356. data/aws-crt-ffi/crt/aws-lc/generated-src/win-aarch64/crypto/fipsmodule/ghash-neon-armv8.S +4 -14
  357. data/aws-crt-ffi/crt/aws-lc/generated-src/win-aarch64/crypto/fipsmodule/ghashv8-armx.S +3 -13
  358. data/aws-crt-ffi/crt/aws-lc/generated-src/win-aarch64/crypto/fipsmodule/keccak1600-armv8.S +3 -13
  359. data/aws-crt-ffi/crt/aws-lc/generated-src/win-aarch64/crypto/fipsmodule/md5-armv8.S +3 -13
  360. data/aws-crt-ffi/crt/aws-lc/generated-src/win-aarch64/crypto/fipsmodule/p256-armv8-asm.S +4 -14
  361. data/aws-crt-ffi/crt/aws-lc/generated-src/win-aarch64/crypto/fipsmodule/p256_beeu-armv8-asm.S +4 -14
  362. data/aws-crt-ffi/crt/aws-lc/generated-src/win-aarch64/crypto/fipsmodule/sha1-armv8.S +4 -14
  363. data/aws-crt-ffi/crt/aws-lc/generated-src/win-aarch64/crypto/fipsmodule/sha256-armv8.S +4 -14
  364. data/aws-crt-ffi/crt/aws-lc/generated-src/win-aarch64/crypto/fipsmodule/sha512-armv8.S +4 -14
  365. data/aws-crt-ffi/crt/aws-lc/generated-src/win-aarch64/crypto/fipsmodule/vpaes-armv8.S +3 -13
  366. data/aws-crt-ffi/crt/aws-lc/generated-src/win-aarch64/crypto/test/trampoline-armv8.S +4 -14
  367. data/aws-crt-ffi/crt/aws-lc/go.mod +4 -4
  368. data/aws-crt-ffi/crt/aws-lc/go.sum +8 -10
  369. data/aws-crt-ffi/crt/aws-lc/include/openssl/aead.h +2 -2
  370. data/aws-crt-ffi/crt/aws-lc/include/openssl/arm_arch.h +4 -119
  371. data/aws-crt-ffi/crt/aws-lc/include/openssl/asm_base.h +185 -0
  372. data/aws-crt-ffi/crt/aws-lc/include/openssl/asn1.h +5 -0
  373. data/aws-crt-ffi/crt/aws-lc/include/openssl/base.h +31 -134
  374. data/aws-crt-ffi/crt/aws-lc/include/openssl/bio.h +30 -18
  375. data/aws-crt-ffi/crt/aws-lc/include/openssl/bn.h +0 -2
  376. data/aws-crt-ffi/crt/aws-lc/include/openssl/chacha.h +6 -0
  377. data/aws-crt-ffi/crt/aws-lc/include/openssl/cipher.h +2 -2
  378. data/aws-crt-ffi/crt/aws-lc/include/openssl/digest.h +9 -6
  379. data/aws-crt-ffi/crt/aws-lc/include/openssl/dsa.h +0 -21
  380. data/aws-crt-ffi/crt/aws-lc/include/openssl/ec.h +1 -1
  381. data/aws-crt-ffi/crt/aws-lc/include/openssl/err.h +1 -1
  382. data/aws-crt-ffi/crt/aws-lc/include/openssl/evp.h +8 -5
  383. data/aws-crt-ffi/crt/aws-lc/include/openssl/nid.h +21 -0
  384. data/aws-crt-ffi/crt/aws-lc/include/openssl/rsa.h +1 -65
  385. data/aws-crt-ffi/crt/aws-lc/include/openssl/sha.h +22 -1
  386. data/aws-crt-ffi/crt/aws-lc/include/openssl/ssl.h +121 -13
  387. data/aws-crt-ffi/crt/aws-lc/include/openssl/stack.h +229 -208
  388. data/aws-crt-ffi/crt/aws-lc/include/openssl/target.h +166 -0
  389. data/aws-crt-ffi/crt/aws-lc/include/openssl/x509.h +30 -10
  390. data/aws-crt-ffi/crt/aws-lc/include/openssl/x509v3.h +6 -4
  391. data/aws-crt-ffi/crt/aws-lc/sources.cmake +2 -0
  392. data/aws-crt-ffi/crt/aws-lc/ssl/extensions.cc +12 -7
  393. data/aws-crt-ffi/crt/aws-lc/ssl/handshake_server.cc +28 -18
  394. data/aws-crt-ffi/crt/aws-lc/ssl/internal.h +41 -6
  395. data/aws-crt-ffi/crt/aws-lc/ssl/s3_both.cc +9 -17
  396. data/aws-crt-ffi/crt/aws-lc/ssl/ssl_cipher.cc +13 -5
  397. data/aws-crt-ffi/crt/aws-lc/ssl/ssl_key_share.cc +542 -2
  398. data/aws-crt-ffi/crt/aws-lc/ssl/ssl_lib.cc +35 -0
  399. data/aws-crt-ffi/crt/aws-lc/ssl/ssl_test.cc +1847 -14
  400. data/aws-crt-ffi/crt/aws-lc/ssl/ssl_x509.cc +128 -0
  401. data/aws-crt-ffi/crt/aws-lc/ssl/test/PORTING.md +10 -7
  402. data/aws-crt-ffi/crt/aws-lc/ssl/test/bssl_shim.cc +133 -77
  403. data/aws-crt-ffi/crt/aws-lc/ssl/test/handshake_util.cc +3 -3
  404. data/aws-crt-ffi/crt/aws-lc/ssl/test/handshaker.cc +4 -0
  405. data/aws-crt-ffi/crt/aws-lc/ssl/test/runner/handshake_client.go +6 -2
  406. data/aws-crt-ffi/crt/aws-lc/ssl/test/runner/handshake_messages.go +894 -1042
  407. data/aws-crt-ffi/crt/aws-lc/ssl/test/runner/handshake_server.go +24 -23
  408. data/aws-crt-ffi/crt/aws-lc/ssl/test/runner/prf.go +6 -5
  409. data/aws-crt-ffi/crt/aws-lc/ssl/test/runner/runner.go +56 -55
  410. data/aws-crt-ffi/crt/aws-lc/ssl/test/runner/shim_dispatcher.go +188 -0
  411. data/aws-crt-ffi/crt/aws-lc/ssl/test/runner/ticket.go +37 -39
  412. data/aws-crt-ffi/crt/aws-lc/ssl/test/test_config.cc +59 -24
  413. data/aws-crt-ffi/crt/aws-lc/ssl/test/test_config.h +3 -2
  414. data/aws-crt-ffi/crt/aws-lc/ssl/tls13_server.cc +10 -11
  415. data/aws-crt-ffi/crt/aws-lc/tests/ci/cdk/app.py +4 -4
  416. data/aws-crt-ffi/crt/aws-lc/tests/ci/cdk/cdk/{aws_lc_mac_arm_ci_stack.py → aws_lc_ec2_test_framework_ci_stack.py} +13 -29
  417. data/aws-crt-ffi/crt/aws-lc/tests/ci/cdk/cdk/ssm/general_test_run_ssm_document.yaml +43 -0
  418. data/aws-crt-ffi/crt/aws-lc/tests/ci/common_posix_setup.sh +10 -0
  419. data/aws-crt-ffi/crt/aws-lc/tests/ci/docker_images/linux-aarch/amazonlinux-2023_base/Dockerfile +5 -1
  420. data/aws-crt-ffi/crt/aws-lc/tests/ci/docker_images/linux-aarch/ubuntu-22.04_base/Dockerfile +19 -3
  421. data/aws-crt-ffi/crt/aws-lc/tests/ci/docker_images/linux-x86/amazonlinux-2_gcc-7x-intel-sde/Dockerfile +5 -4
  422. data/aws-crt-ffi/crt/aws-lc/tests/ci/docker_images/linux-x86/build_images.sh +1 -0
  423. data/aws-crt-ffi/crt/aws-lc/tests/ci/docker_images/linux-x86/push_images.sh +2 -1
  424. data/aws-crt-ffi/crt/aws-lc/tests/ci/docker_images/linux-x86/ubuntu-20.04_clang-10x_formal-verification/create_image.sh +1 -1
  425. data/aws-crt-ffi/crt/aws-lc/tests/ci/docker_images/linux-x86/ubuntu-22.04_base/Dockerfile +1 -0
  426. data/aws-crt-ffi/crt/aws-lc/tests/ci/docker_images/linux-x86/ubuntu-22.04_clang-14x-sde/Dockerfile +42 -0
  427. data/aws-crt-ffi/crt/aws-lc/tests/ci/docker_images/windows/vs2017/Dockerfile +14 -0
  428. data/aws-crt-ffi/crt/aws-lc/tests/ci/docker_images/windows/windows_base/Dockerfile +3 -0
  429. data/aws-crt-ffi/crt/aws-lc/tests/ci/integration/README.md +12 -0
  430. data/aws-crt-ffi/crt/aws-lc/tests/ci/integration/nginx_patch/aws-lc-nginx.patch +68 -23
  431. data/aws-crt-ffi/crt/aws-lc/tests/ci/integration/run_crt_integration.sh +27 -0
  432. data/aws-crt-ffi/crt/aws-lc/tests/ci/integration/run_monit_integration.sh +56 -0
  433. data/aws-crt-ffi/crt/aws-lc/tests/ci/integration/sslproxy_patch/aws-lc-sslproxy.patch +2 -2
  434. data/aws-crt-ffi/crt/aws-lc/tests/ci/run_ec2_test_framework.sh +135 -0
  435. data/aws-crt-ffi/crt/aws-lc/tests/ci/run_fips_tests.sh +14 -2
  436. data/aws-crt-ffi/crt/aws-lc/tests/ci/run_tests_with_sde.sh +4 -1
  437. data/aws-crt-ffi/crt/aws-lc/tests/ci/run_tests_with_sde_asan.sh +14 -0
  438. data/aws-crt-ffi/crt/aws-lc/tests/ci/run_windows_tests.bat +39 -3
  439. data/aws-crt-ffi/crt/aws-lc/third_party/fiat/README.md +21 -6
  440. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/arm/curve25519/bignum_madd_n25519.S +284 -0
  441. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/arm/curve25519/bignum_madd_n25519_alt.S +210 -0
  442. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/arm/curve25519/bignum_mod_n25519.S +186 -0
  443. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/arm/curve25519/bignum_neg_p25519.S +65 -0
  444. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/arm/curve25519/curve25519_x25519.S +1043 -352
  445. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/arm/curve25519/curve25519_x25519_alt.S +1043 -352
  446. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/arm/curve25519/curve25519_x25519_byte.S +1043 -352
  447. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/arm/curve25519/curve25519_x25519_byte_alt.S +1043 -352
  448. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/arm/curve25519/curve25519_x25519base.S +1042 -352
  449. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/arm/curve25519/curve25519_x25519base_alt.S +1042 -352
  450. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/arm/curve25519/curve25519_x25519base_byte.S +1042 -352
  451. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/arm/curve25519/curve25519_x25519base_byte_alt.S +1043 -354
  452. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/arm/curve25519/edwards25519_decode.S +700 -0
  453. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/arm/curve25519/edwards25519_decode_alt.S +563 -0
  454. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/arm/curve25519/edwards25519_encode.S +131 -0
  455. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/arm/curve25519/edwards25519_scalarmulbase.S +9626 -0
  456. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/arm/curve25519/edwards25519_scalarmulbase_alt.S +9468 -0
  457. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/arm/curve25519/edwards25519_scalarmuldouble.S +3157 -0
  458. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/arm/curve25519/edwards25519_scalarmuldouble_alt.S +2941 -0
  459. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/arm/p384/Makefile +1 -1
  460. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/arm/p521/Makefile +1 -1
  461. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/include/s2n-bignum_aws-lc.h +34 -0
  462. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/x86_att/curve25519/bignum_madd_n25519.S +219 -0
  463. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/x86_att/curve25519/bignum_madd_n25519_alt.S +245 -0
  464. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/x86_att/curve25519/bignum_mod_n25519.S +228 -0
  465. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/x86_att/curve25519/bignum_neg_p25519.S +86 -0
  466. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/x86_att/curve25519/curve25519_x25519.S +1350 -407
  467. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/x86_att/curve25519/curve25519_x25519_alt.S +1350 -407
  468. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/x86_att/curve25519/curve25519_x25519base.S +1344 -400
  469. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/x86_att/curve25519/curve25519_x25519base_alt.S +1348 -402
  470. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/x86_att/curve25519/edwards25519_decode.S +670 -0
  471. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/x86_att/curve25519/edwards25519_decode_alt.S +751 -0
  472. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/x86_att/curve25519/edwards25519_encode.S +81 -0
  473. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/x86_att/curve25519/edwards25519_scalarmulbase.S +9910 -0
  474. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/x86_att/curve25519/edwards25519_scalarmulbase_alt.S +9986 -0
  475. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/x86_att/curve25519/edwards25519_scalarmuldouble.S +3619 -0
  476. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/x86_att/curve25519/edwards25519_scalarmuldouble_alt.S +3736 -0
  477. data/aws-crt-ffi/crt/aws-lc/third_party/wycheproof_testvectors/hmac_sha512_224_test.json +1978 -0
  478. data/aws-crt-ffi/crt/aws-lc/third_party/wycheproof_testvectors/hmac_sha512_224_test.txt +1403 -0
  479. data/aws-crt-ffi/crt/aws-lc/third_party/wycheproof_testvectors/hmac_sha512_256_test.json +1993 -0
  480. data/aws-crt-ffi/crt/aws-lc/third_party/wycheproof_testvectors/hmac_sha512_256_test.txt +1416 -0
  481. data/aws-crt-ffi/crt/aws-lc/tool/digest.cc +4 -0
  482. data/aws-crt-ffi/crt/aws-lc/tool/internal.h +1 -0
  483. data/aws-crt-ffi/crt/aws-lc/tool/speed.cc +53 -6
  484. data/aws-crt-ffi/crt/aws-lc/util/all_tests.go +43 -12
  485. data/aws-crt-ffi/crt/aws-lc/util/all_tests.json +13 -5
  486. data/aws-crt-ffi/crt/aws-lc/util/bot/DEPS +4 -4
  487. data/aws-crt-ffi/crt/aws-lc/util/bot/update_clang.py +8 -2
  488. data/aws-crt-ffi/crt/aws-lc/util/codecov-ci.sh +82 -0
  489. data/aws-crt-ffi/crt/aws-lc/util/convert_wycheproof/convert_wycheproof.go +7 -5
  490. data/aws-crt-ffi/crt/aws-lc/util/fipstools/acvp/ACVP.md +7 -0
  491. data/aws-crt-ffi/crt/aws-lc/util/fipstools/acvp/acvptool/subprocess/hash.go +24 -9
  492. data/aws-crt-ffi/crt/aws-lc/util/fipstools/acvp/acvptool/subprocess/rsa.go +3 -4
  493. data/aws-crt-ffi/crt/aws-lc/util/fipstools/acvp/acvptool/subprocess/subprocess.go +15 -10
  494. data/aws-crt-ffi/crt/aws-lc/util/fipstools/acvp/acvptool/test/expected/HMAC-SHA2-512-224.bz2 +0 -0
  495. data/aws-crt-ffi/crt/aws-lc/util/fipstools/acvp/acvptool/test/expected/SHA2-512-224.bz2 +0 -0
  496. data/aws-crt-ffi/crt/aws-lc/util/fipstools/acvp/acvptool/test/expected/SHAKE-128.bz2 +0 -0
  497. data/aws-crt-ffi/crt/aws-lc/util/fipstools/acvp/acvptool/test/expected/SHAKE-256.bz2 +0 -0
  498. data/aws-crt-ffi/crt/aws-lc/util/fipstools/acvp/acvptool/test/sha-tests/sha512-224-tests.json +1 -0
  499. data/aws-crt-ffi/crt/aws-lc/util/fipstools/acvp/acvptool/test/sha-tests/shake-128-tests.json +1 -0
  500. data/aws-crt-ffi/crt/aws-lc/util/fipstools/acvp/acvptool/test/sha-tests/shake-256-tests.json +1 -0
  501. data/aws-crt-ffi/crt/aws-lc/util/fipstools/acvp/acvptool/test/tests.json +1 -0
  502. data/aws-crt-ffi/crt/aws-lc/util/fipstools/acvp/acvptool/test/vectors/HMAC-SHA2-512-224.bz2 +0 -0
  503. data/aws-crt-ffi/crt/aws-lc/util/fipstools/acvp/acvptool/test/vectors/SHA2-512-224.bz2 +0 -0
  504. data/aws-crt-ffi/crt/aws-lc/util/fipstools/acvp/acvptool/test/vectors/SHAKE-128.bz2 +0 -0
  505. data/aws-crt-ffi/crt/aws-lc/util/fipstools/acvp/acvptool/test/vectors/SHAKE-256.bz2 +0 -0
  506. data/aws-crt-ffi/crt/aws-lc/util/fipstools/acvp/modulewrapper/main.cc +4 -0
  507. data/aws-crt-ffi/crt/aws-lc/util/fipstools/acvp/modulewrapper/modulewrapper.cc +144 -1
  508. data/aws-crt-ffi/crt/aws-lc/util/fipstools/delocate/delocate.go +9 -3
  509. data/aws-crt-ffi/crt/aws-lc/util/fipstools/delocate/testdata/aarch64-Basic/in.s +4 -0
  510. data/aws-crt-ffi/crt/aws-lc/util/fipstools/delocate/testdata/aarch64-Basic/out.s +11 -0
  511. data/aws-crt-ffi/crt/aws-lc/util/fipstools/inject_hash/inject_hash.go +13 -4
  512. data/aws-crt-ffi/crt/aws-lc/util/fipstools/test-break-kat.sh +2 -0
  513. data/aws-crt-ffi/crt/aws-lc/util/testconfig/testconfig.go +2 -1
  514. data/aws-crt-ffi/crt/s2n/api/s2n.h +9 -5
  515. data/aws-crt-ffi/crt/s2n/bindings/rust/bench/benches/handshake.rs +9 -6
  516. data/aws-crt-ffi/crt/s2n/bindings/rust/bench/benches/resumption.rs +14 -14
  517. data/aws-crt-ffi/crt/s2n/bindings/rust/bench/benches/throughput.rs +9 -6
  518. data/aws-crt-ffi/crt/s2n/bindings/rust/bench/src/harness.rs +106 -102
  519. data/aws-crt-ffi/crt/s2n/bindings/rust/bench/src/openssl.rs +24 -20
  520. data/aws-crt-ffi/crt/s2n/bindings/rust/bench/src/rustls.rs +28 -24
  521. data/aws-crt-ffi/crt/s2n/bindings/rust/bench/src/s2n_tls.rs +52 -50
  522. data/aws-crt-ffi/crt/s2n/bindings/rust/generate/Cargo.toml +1 -0
  523. data/aws-crt-ffi/crt/s2n/bindings/rust/integration/Cargo.toml +3 -0
  524. data/aws-crt-ffi/crt/s2n/bindings/rust/s2n-tls/Cargo.toml +2 -2
  525. data/aws-crt-ffi/crt/s2n/bindings/rust/s2n-tls/src/connection.rs +9 -0
  526. data/aws-crt-ffi/crt/s2n/bindings/rust/s2n-tls-sys/templates/Cargo.template +2 -1
  527. data/aws-crt-ffi/crt/s2n/bindings/rust/s2n-tls-tokio/Cargo.toml +2 -2
  528. data/aws-crt-ffi/crt/s2n/tests/cbmc/sources/make_common_datastructures.c +9 -2
  529. data/aws-crt-ffi/crt/s2n/tests/fuzz/s2n_client_cert_verify_recv_test.c +1 -1
  530. data/aws-crt-ffi/crt/s2n/tests/fuzz/s2n_hybrid_ecdhe_kyber_r3_fuzz_test.c +1 -1
  531. data/aws-crt-ffi/crt/s2n/tests/fuzz/s2n_tls13_cert_verify_recv_test.c +1 -1
  532. data/aws-crt-ffi/crt/s2n/tests/integrationv2/test_version_negotiation.py +4 -4
  533. data/aws-crt-ffi/crt/s2n/tests/unit/s2n_auth_selection_test.c +19 -9
  534. data/aws-crt-ffi/crt/s2n/tests/unit/s2n_client_auth_handshake_test.c +3 -3
  535. data/aws-crt-ffi/crt/s2n/tests/unit/s2n_client_cert_verify_test.c +1 -1
  536. data/aws-crt-ffi/crt/s2n/tests/unit/s2n_client_hello_recv_test.c +1 -1
  537. data/aws-crt-ffi/crt/s2n/tests/unit/s2n_client_hello_test.c +4 -4
  538. data/aws-crt-ffi/crt/s2n/tests/unit/s2n_client_signature_algorithms_extension_test.c +4 -5
  539. data/aws-crt-ffi/crt/s2n/tests/unit/s2n_connection_protocol_versions_test.c +390 -0
  540. data/aws-crt-ffi/crt/s2n/tests/unit/s2n_connection_test.c +8 -4
  541. data/aws-crt-ffi/crt/s2n/tests/unit/s2n_handshake_test.c +2 -1
  542. data/aws-crt-ffi/crt/s2n/tests/unit/s2n_quic_support_io_test.c +106 -0
  543. data/aws-crt-ffi/crt/s2n/tests/unit/s2n_security_policies_test.c +6 -2
  544. data/aws-crt-ffi/crt/s2n/tests/unit/s2n_self_talk_offload_signing_test.c +3 -3
  545. data/aws-crt-ffi/crt/s2n/tests/unit/s2n_self_talk_session_resumption_test.c +135 -0
  546. data/aws-crt-ffi/crt/s2n/tests/unit/s2n_server_new_session_ticket_test.c +32 -0
  547. data/aws-crt-ffi/crt/s2n/tests/unit/s2n_server_signature_algorithms_extension_test.c +1 -1
  548. data/aws-crt-ffi/crt/s2n/tests/unit/s2n_signature_algorithms_test.c +307 -283
  549. data/aws-crt-ffi/crt/s2n/tests/unit/s2n_tls13_cert_request_test.c +1 -1
  550. data/aws-crt-ffi/crt/s2n/tests/unit/s2n_tls13_cert_verify_test.c +18 -17
  551. data/aws-crt-ffi/crt/s2n/tests/unit/s2n_x509_validator_test.c +125 -0
  552. data/aws-crt-ffi/crt/s2n/tls/extensions/s2n_client_signature_algorithms.c +8 -1
  553. data/aws-crt-ffi/crt/s2n/tls/extensions/s2n_client_supported_versions.c +43 -11
  554. data/aws-crt-ffi/crt/s2n/tls/extensions/s2n_client_supported_versions.h +3 -0
  555. data/aws-crt-ffi/crt/s2n/tls/extensions/s2n_server_signature_algorithms.c +8 -1
  556. data/aws-crt-ffi/crt/s2n/tls/s2n_auth_selection.c +4 -2
  557. data/aws-crt-ffi/crt/s2n/tls/s2n_client_cert_verify.c +7 -10
  558. data/aws-crt-ffi/crt/s2n/tls/s2n_client_hello.c +2 -2
  559. data/aws-crt-ffi/crt/s2n/tls/s2n_connection.c +75 -14
  560. data/aws-crt-ffi/crt/s2n/tls/s2n_handshake.h +2 -2
  561. data/aws-crt-ffi/crt/s2n/tls/s2n_post_handshake.c +1 -1
  562. data/aws-crt-ffi/crt/s2n/tls/s2n_post_handshake.h +1 -0
  563. data/aws-crt-ffi/crt/s2n/tls/s2n_quic_support.c +29 -0
  564. data/aws-crt-ffi/crt/s2n/tls/s2n_quic_support.h +5 -0
  565. data/aws-crt-ffi/crt/s2n/tls/s2n_security_policies.c +40 -0
  566. data/aws-crt-ffi/crt/s2n/tls/s2n_security_policies.h +4 -0
  567. data/aws-crt-ffi/crt/s2n/tls/s2n_server_cert_request.c +1 -1
  568. data/aws-crt-ffi/crt/s2n/tls/s2n_server_hello.c +0 -3
  569. data/aws-crt-ffi/crt/s2n/tls/s2n_server_key_exchange.c +8 -9
  570. data/aws-crt-ffi/crt/s2n/tls/s2n_server_new_session_ticket.c +8 -0
  571. data/aws-crt-ffi/crt/s2n/tls/s2n_signature_algorithms.c +111 -72
  572. data/aws-crt-ffi/crt/s2n/tls/s2n_signature_algorithms.h +11 -9
  573. data/aws-crt-ffi/crt/s2n/tls/s2n_signature_scheme.c +9 -0
  574. data/aws-crt-ffi/crt/s2n/tls/s2n_signature_scheme.h +2 -0
  575. data/aws-crt-ffi/crt/s2n/tls/s2n_tls13_certificate_verify.c +12 -18
  576. data/aws-crt-ffi/crt/s2n/tls/s2n_x509_validator.c +7 -7
  577. data/aws-crt-ffi/src/api.h +1 -0
  578. data/lib/aws-crt/native.rb +1 -1
  579. metadata +68 -5
  580. data/aws-crt-ffi/crt/aws-lc/tests/ci/cdk/cdk/ssm/m1_tests_ssm_document.yaml +0 -34
  581. data/aws-crt-ffi/crt/aws-lc/tests/ci/run_m1_ec2_instance.sh +0 -96
@@ -0,0 +1,3619 @@
1
+ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ // SPDX-License-Identifier: Apache-2.0 OR ISC
3
+
4
+ // ----------------------------------------------------------------------------
5
+ // Double scalar multiplication for edwards25519, fresh and base point
6
+ // Input scalar[4], point[8], bscalar[4]; output res[8]
7
+ //
8
+ // extern void edwards25519_scalarmuldouble
9
+ // (uint64_t res[static 8],uint64_t scalar[static 4],
10
+ // uint64_t point[static 8],uint64_t bscalar[static 4]);
11
+ //
12
+ // Given scalar = n, point = P and bscalar = m, returns in res
13
+ // the point (X,Y) = n * P + m * B where B = (...,4/5) is
14
+ // the standard basepoint for the edwards25519 (Ed25519) curve.
15
+ //
16
+ // Both 256-bit coordinates of the input point P are implicitly
17
+ // reduced modulo 2^255-19 if they are not already in reduced form,
18
+ // but the conventional usage is that they *are* already reduced.
19
+ // The scalars can be arbitrary 256-bit numbers but may also be
20
+ // considered as implicitly reduced modulo the group order.
21
+ //
22
+ // Standard x86-64 ABI: RDI = res, RSI = scalar, RDX = point, RCX = bscalar
23
+ // Microsoft x64 ABI: RCX = res, RDX = scalar, R8 = point, R9 = bscalar
24
+ // ----------------------------------------------------------------------------
25
+ #include "_internal_s2n_bignum.h"
26
+
27
+
28
+ S2N_BN_SYM_VISIBILITY_DIRECTIVE(edwards25519_scalarmuldouble)
29
+ S2N_BN_SYM_PRIVACY_DIRECTIVE(edwards25519_scalarmuldouble)
30
+ .text
31
+
32
+ // Size of individual field elements
33
+
34
+ #define NUMSIZE 32
35
+
36
+ // Pointer-offset pairs for result and temporaries on stack with some aliasing.
37
+ // Both "resx" and "resy" assume the "res" pointer has been preloaded into %rbp.
38
+
39
+ #define resx (0*NUMSIZE)(%rbp)
40
+ #define resy (1*NUMSIZE)(%rbp)
41
+
42
+ #define scalar (0*NUMSIZE)(%rsp)
43
+ #define bscalar (1*NUMSIZE)(%rsp)
44
+
45
+ #define tabent (2*NUMSIZE)(%rsp)
46
+ #define btabent (6*NUMSIZE)(%rsp)
47
+
48
+ #define acc (9*NUMSIZE)(%rsp)
49
+
50
+ #define tab (13*NUMSIZE)(%rsp)
51
+
52
+ // Additional variables kept on the stack
53
+
54
+ #define bf 45*NUMSIZE(%rsp)
55
+ #define cf 45*NUMSIZE+8(%rsp)
56
+ #define i 45*NUMSIZE+16(%rsp)
57
+ #define res 45*NUMSIZE+24(%rsp)
58
+
59
+ // Total size to reserve on the stack (excluding local subroutines)
60
+
61
+ #define NSPACE (46*NUMSIZE)
62
+
63
+ // Syntactic variants to make x86_att forms easier to generate
64
+
65
+ #define SCALAR (0*NUMSIZE)
66
+ #define BSCALAR (1*NUMSIZE)
67
+ #define TABENT (2*NUMSIZE)
68
+ #define BTABENT (6*NUMSIZE)
69
+ #define ACC (9*NUMSIZE)
70
+ #define TAB (13*NUMSIZE)
71
+
72
+ // Sub-references used in local subroutines with local stack
73
+
74
+ #define x_0 0(%rdi)
75
+ #define y_0 NUMSIZE(%rdi)
76
+ #define z_0 (2*NUMSIZE)(%rdi)
77
+ #define w_0 (3*NUMSIZE)(%rdi)
78
+
79
+ #define x_1 0(%rsi)
80
+ #define y_1 NUMSIZE(%rsi)
81
+ #define z_1 (2*NUMSIZE)(%rsi)
82
+ #define w_1 (3*NUMSIZE)(%rsi)
83
+
84
+ #define x_2 0(%rbp)
85
+ #define y_2 NUMSIZE(%rbp)
86
+ #define z_2 (2*NUMSIZE)(%rbp)
87
+ #define w_2 (3*NUMSIZE)(%rbp)
88
+
89
+ #define t0 (0*NUMSIZE)(%rsp)
90
+ #define t1 (1*NUMSIZE)(%rsp)
91
+ #define t2 (2*NUMSIZE)(%rsp)
92
+ #define t3 (3*NUMSIZE)(%rsp)
93
+ #define t4 (4*NUMSIZE)(%rsp)
94
+ #define t5 (5*NUMSIZE)(%rsp)
95
+
96
+ // Macro wrapping up the basic field multiplication, only trivially
97
+ // different from a pure function call to bignum_mul_p25519.
98
+
99
+ #define mul_p25519(P0,P1,P2) \
100
+ xorl %ecx, %ecx ; \
101
+ movq P2, %rdx ; \
102
+ mulxq P1, %r8, %r9 ; \
103
+ mulxq 0x8+P1, %rax, %r10 ; \
104
+ addq %rax, %r9 ; \
105
+ mulxq 0x10+P1, %rax, %r11 ; \
106
+ adcq %rax, %r10 ; \
107
+ mulxq 0x18+P1, %rax, %r12 ; \
108
+ adcq %rax, %r11 ; \
109
+ adcq %rcx, %r12 ; \
110
+ xorl %ecx, %ecx ; \
111
+ movq 0x8+P2, %rdx ; \
112
+ mulxq P1, %rax, %rbx ; \
113
+ adcxq %rax, %r9 ; \
114
+ adoxq %rbx, %r10 ; \
115
+ mulxq 0x8+P1, %rax, %rbx ; \
116
+ adcxq %rax, %r10 ; \
117
+ adoxq %rbx, %r11 ; \
118
+ mulxq 0x10+P1, %rax, %rbx ; \
119
+ adcxq %rax, %r11 ; \
120
+ adoxq %rbx, %r12 ; \
121
+ mulxq 0x18+P1, %rax, %r13 ; \
122
+ adcxq %rax, %r12 ; \
123
+ adoxq %rcx, %r13 ; \
124
+ adcxq %rcx, %r13 ; \
125
+ xorl %ecx, %ecx ; \
126
+ movq 0x10+P2, %rdx ; \
127
+ mulxq P1, %rax, %rbx ; \
128
+ adcxq %rax, %r10 ; \
129
+ adoxq %rbx, %r11 ; \
130
+ mulxq 0x8+P1, %rax, %rbx ; \
131
+ adcxq %rax, %r11 ; \
132
+ adoxq %rbx, %r12 ; \
133
+ mulxq 0x10+P1, %rax, %rbx ; \
134
+ adcxq %rax, %r12 ; \
135
+ adoxq %rbx, %r13 ; \
136
+ mulxq 0x18+P1, %rax, %r14 ; \
137
+ adcxq %rax, %r13 ; \
138
+ adoxq %rcx, %r14 ; \
139
+ adcxq %rcx, %r14 ; \
140
+ xorl %ecx, %ecx ; \
141
+ movq 0x18+P2, %rdx ; \
142
+ mulxq P1, %rax, %rbx ; \
143
+ adcxq %rax, %r11 ; \
144
+ adoxq %rbx, %r12 ; \
145
+ mulxq 0x8+P1, %rax, %rbx ; \
146
+ adcxq %rax, %r12 ; \
147
+ adoxq %rbx, %r13 ; \
148
+ mulxq 0x10+P1, %rax, %rbx ; \
149
+ adcxq %rax, %r13 ; \
150
+ adoxq %rbx, %r14 ; \
151
+ mulxq 0x18+P1, %rax, %r15 ; \
152
+ adcxq %rax, %r14 ; \
153
+ adoxq %rcx, %r15 ; \
154
+ adcxq %rcx, %r15 ; \
155
+ movl $0x26, %edx ; \
156
+ xorl %ecx, %ecx ; \
157
+ mulxq %r12, %rax, %rbx ; \
158
+ adcxq %rax, %r8 ; \
159
+ adoxq %rbx, %r9 ; \
160
+ mulxq %r13, %rax, %rbx ; \
161
+ adcxq %rax, %r9 ; \
162
+ adoxq %rbx, %r10 ; \
163
+ mulxq %r14, %rax, %rbx ; \
164
+ adcxq %rax, %r10 ; \
165
+ adoxq %rbx, %r11 ; \
166
+ mulxq %r15, %rax, %r12 ; \
167
+ adcxq %rax, %r11 ; \
168
+ adoxq %rcx, %r12 ; \
169
+ adcxq %rcx, %r12 ; \
170
+ shldq $0x1, %r11, %r12 ; \
171
+ movl $0x13, %edx ; \
172
+ incq %r12; \
173
+ bts $63, %r11 ; \
174
+ mulxq %r12, %rax, %rbx ; \
175
+ addq %rax, %r8 ; \
176
+ adcq %rbx, %r9 ; \
177
+ adcq %rcx, %r10 ; \
178
+ adcq %rcx, %r11 ; \
179
+ sbbq %rax, %rax ; \
180
+ notq %rax; \
181
+ andq %rdx, %rax ; \
182
+ subq %rax, %r8 ; \
183
+ sbbq %rcx, %r9 ; \
184
+ sbbq %rcx, %r10 ; \
185
+ sbbq %rcx, %r11 ; \
186
+ btr $63, %r11 ; \
187
+ movq %r8, P0 ; \
188
+ movq %r9, 0x8+P0 ; \
189
+ movq %r10, 0x10+P0 ; \
190
+ movq %r11, 0x18+P0
191
+
192
+ // A version of multiplication that only guarantees output < 2 * p_25519.
193
+ // This basically skips the +1 and final correction in quotient estimation.
194
+
195
+ #define mul_4(P0,P1,P2) \
196
+ xorl %ecx, %ecx ; \
197
+ movq P2, %rdx ; \
198
+ mulxq P1, %r8, %r9 ; \
199
+ mulxq 0x8+P1, %rax, %r10 ; \
200
+ addq %rax, %r9 ; \
201
+ mulxq 0x10+P1, %rax, %r11 ; \
202
+ adcq %rax, %r10 ; \
203
+ mulxq 0x18+P1, %rax, %r12 ; \
204
+ adcq %rax, %r11 ; \
205
+ adcq %rcx, %r12 ; \
206
+ xorl %ecx, %ecx ; \
207
+ movq 0x8+P2, %rdx ; \
208
+ mulxq P1, %rax, %rbx ; \
209
+ adcxq %rax, %r9 ; \
210
+ adoxq %rbx, %r10 ; \
211
+ mulxq 0x8+P1, %rax, %rbx ; \
212
+ adcxq %rax, %r10 ; \
213
+ adoxq %rbx, %r11 ; \
214
+ mulxq 0x10+P1, %rax, %rbx ; \
215
+ adcxq %rax, %r11 ; \
216
+ adoxq %rbx, %r12 ; \
217
+ mulxq 0x18+P1, %rax, %r13 ; \
218
+ adcxq %rax, %r12 ; \
219
+ adoxq %rcx, %r13 ; \
220
+ adcxq %rcx, %r13 ; \
221
+ xorl %ecx, %ecx ; \
222
+ movq 0x10+P2, %rdx ; \
223
+ mulxq P1, %rax, %rbx ; \
224
+ adcxq %rax, %r10 ; \
225
+ adoxq %rbx, %r11 ; \
226
+ mulxq 0x8+P1, %rax, %rbx ; \
227
+ adcxq %rax, %r11 ; \
228
+ adoxq %rbx, %r12 ; \
229
+ mulxq 0x10+P1, %rax, %rbx ; \
230
+ adcxq %rax, %r12 ; \
231
+ adoxq %rbx, %r13 ; \
232
+ mulxq 0x18+P1, %rax, %r14 ; \
233
+ adcxq %rax, %r13 ; \
234
+ adoxq %rcx, %r14 ; \
235
+ adcxq %rcx, %r14 ; \
236
+ xorl %ecx, %ecx ; \
237
+ movq 0x18+P2, %rdx ; \
238
+ mulxq P1, %rax, %rbx ; \
239
+ adcxq %rax, %r11 ; \
240
+ adoxq %rbx, %r12 ; \
241
+ mulxq 0x8+P1, %rax, %rbx ; \
242
+ adcxq %rax, %r12 ; \
243
+ adoxq %rbx, %r13 ; \
244
+ mulxq 0x10+P1, %rax, %rbx ; \
245
+ adcxq %rax, %r13 ; \
246
+ adoxq %rbx, %r14 ; \
247
+ mulxq 0x18+P1, %rax, %r15 ; \
248
+ adcxq %rax, %r14 ; \
249
+ adoxq %rcx, %r15 ; \
250
+ adcxq %rcx, %r15 ; \
251
+ movl $0x26, %edx ; \
252
+ xorl %ecx, %ecx ; \
253
+ mulxq %r12, %rax, %rbx ; \
254
+ adcxq %rax, %r8 ; \
255
+ adoxq %rbx, %r9 ; \
256
+ mulxq %r13, %rax, %rbx ; \
257
+ adcxq %rax, %r9 ; \
258
+ adoxq %rbx, %r10 ; \
259
+ mulxq %r14, %rax, %rbx ; \
260
+ adcxq %rax, %r10 ; \
261
+ adoxq %rbx, %r11 ; \
262
+ mulxq %r15, %rax, %r12 ; \
263
+ adcxq %rax, %r11 ; \
264
+ adoxq %rcx, %r12 ; \
265
+ adcxq %rcx, %r12 ; \
266
+ shldq $0x1, %r11, %r12 ; \
267
+ btr $0x3f, %r11 ; \
268
+ movl $0x13, %edx ; \
269
+ imulq %r12, %rdx ; \
270
+ addq %rdx, %r8 ; \
271
+ adcq %rcx, %r9 ; \
272
+ adcq %rcx, %r10 ; \
273
+ adcq %rcx, %r11 ; \
274
+ movq %r8, P0 ; \
275
+ movq %r9, 0x8+P0 ; \
276
+ movq %r10, 0x10+P0 ; \
277
+ movq %r11, 0x18+P0
278
+
279
+ // Squaring just giving a result < 2 * p_25519, which is done by
280
+ // basically skipping the +1 in the quotient estimate and the final
281
+ // optional correction.
282
+
283
+ #define sqr_4(P0,P1) \
284
+ movq P1, %rdx ; \
285
+ mulxq %rdx, %r8, %r15 ; \
286
+ mulxq 0x8+P1, %r9, %r10 ; \
287
+ mulxq 0x18+P1, %r11, %r12 ; \
288
+ movq 0x10+P1, %rdx ; \
289
+ mulxq 0x18+P1, %r13, %r14 ; \
290
+ xorl %ebx, %ebx ; \
291
+ mulxq P1, %rax, %rcx ; \
292
+ adcxq %rax, %r10 ; \
293
+ adoxq %rcx, %r11 ; \
294
+ mulxq 0x8+P1, %rax, %rcx ; \
295
+ adcxq %rax, %r11 ; \
296
+ adoxq %rcx, %r12 ; \
297
+ movq 0x18+P1, %rdx ; \
298
+ mulxq 0x8+P1, %rax, %rcx ; \
299
+ adcxq %rax, %r12 ; \
300
+ adoxq %rcx, %r13 ; \
301
+ adcxq %rbx, %r13 ; \
302
+ adoxq %rbx, %r14 ; \
303
+ adcq %rbx, %r14 ; \
304
+ xorl %ebx, %ebx ; \
305
+ adcxq %r9, %r9 ; \
306
+ adoxq %r15, %r9 ; \
307
+ movq 0x8+P1, %rdx ; \
308
+ mulxq %rdx, %rax, %rdx ; \
309
+ adcxq %r10, %r10 ; \
310
+ adoxq %rax, %r10 ; \
311
+ adcxq %r11, %r11 ; \
312
+ adoxq %rdx, %r11 ; \
313
+ movq 0x10+P1, %rdx ; \
314
+ mulxq %rdx, %rax, %rdx ; \
315
+ adcxq %r12, %r12 ; \
316
+ adoxq %rax, %r12 ; \
317
+ adcxq %r13, %r13 ; \
318
+ adoxq %rdx, %r13 ; \
319
+ movq 0x18+P1, %rdx ; \
320
+ mulxq %rdx, %rax, %r15 ; \
321
+ adcxq %r14, %r14 ; \
322
+ adoxq %rax, %r14 ; \
323
+ adcxq %rbx, %r15 ; \
324
+ adoxq %rbx, %r15 ; \
325
+ movl $0x26, %edx ; \
326
+ xorl %ebx, %ebx ; \
327
+ mulxq %r12, %rax, %rcx ; \
328
+ adcxq %rax, %r8 ; \
329
+ adoxq %rcx, %r9 ; \
330
+ mulxq %r13, %rax, %rcx ; \
331
+ adcxq %rax, %r9 ; \
332
+ adoxq %rcx, %r10 ; \
333
+ mulxq %r14, %rax, %rcx ; \
334
+ adcxq %rax, %r10 ; \
335
+ adoxq %rcx, %r11 ; \
336
+ mulxq %r15, %rax, %r12 ; \
337
+ adcxq %rax, %r11 ; \
338
+ adoxq %rbx, %r12 ; \
339
+ adcxq %rbx, %r12 ; \
340
+ shldq $0x1, %r11, %r12 ; \
341
+ btr $0x3f, %r11 ; \
342
+ movl $0x13, %edx ; \
343
+ imulq %r12, %rdx ; \
344
+ addq %rdx, %r8 ; \
345
+ adcq %rbx, %r9 ; \
346
+ adcq %rbx, %r10 ; \
347
+ adcq %rbx, %r11 ; \
348
+ movq %r8, P0 ; \
349
+ movq %r9, 0x8+P0 ; \
350
+ movq %r10, 0x10+P0 ; \
351
+ movq %r11, 0x18+P0
352
+
353
+ // Modular subtraction with double modulus 2 * p_25519 = 2^256 - 38
354
+
355
+ #define sub_twice4(P0,P1,P2) \
356
+ movq P1, %r8 ; \
357
+ xorl %ebx, %ebx ; \
358
+ subq P2, %r8 ; \
359
+ movq 8+P1, %r9 ; \
360
+ sbbq 8+P2, %r9 ; \
361
+ movl $38, %ecx ; \
362
+ movq 16+P1, %r10 ; \
363
+ sbbq 16+P2, %r10 ; \
364
+ movq 24+P1, %rax ; \
365
+ sbbq 24+P2, %rax ; \
366
+ cmovncq %rbx, %rcx ; \
367
+ subq %rcx, %r8 ; \
368
+ sbbq %rbx, %r9 ; \
369
+ sbbq %rbx, %r10 ; \
370
+ sbbq %rbx, %rax ; \
371
+ movq %r8, P0 ; \
372
+ movq %r9, 8+P0 ; \
373
+ movq %r10, 16+P0 ; \
374
+ movq %rax, 24+P0
375
+
376
+ // Modular addition and doubling with double modulus 2 * p_25519 = 2^256 - 38.
377
+ // This only ensures that the result fits in 4 digits, not that it is reduced
378
+ // even w.r.t. double modulus. The result is always correct modulo provided
379
+ // the sum of the inputs is < 2^256 + 2^256 - 38, so in particular provided
380
+ // at least one of them is reduced double modulo.
381
+
382
+ #define add_twice4(P0,P1,P2) \
383
+ movq P1, %r8 ; \
384
+ xorl %ecx, %ecx ; \
385
+ addq P2, %r8 ; \
386
+ movq 0x8+P1, %r9 ; \
387
+ adcq 0x8+P2, %r9 ; \
388
+ movq 0x10+P1, %r10 ; \
389
+ adcq 0x10+P2, %r10 ; \
390
+ movq 0x18+P1, %r11 ; \
391
+ adcq 0x18+P2, %r11 ; \
392
+ movl $38, %eax ; \
393
+ cmovncq %rcx, %rax ; \
394
+ addq %rax, %r8 ; \
395
+ adcq %rcx, %r9 ; \
396
+ adcq %rcx, %r10 ; \
397
+ adcq %rcx, %r11 ; \
398
+ movq %r8, P0 ; \
399
+ movq %r9, 0x8+P0 ; \
400
+ movq %r10, 0x10+P0 ; \
401
+ movq %r11, 0x18+P0
402
+
403
+ #define double_twice4(P0,P1) \
404
+ movq P1, %r8 ; \
405
+ xorl %ecx, %ecx ; \
406
+ addq %r8, %r8 ; \
407
+ movq 0x8+P1, %r9 ; \
408
+ adcq %r9, %r9 ; \
409
+ movq 0x10+P1, %r10 ; \
410
+ adcq %r10, %r10 ; \
411
+ movq 0x18+P1, %r11 ; \
412
+ adcq %r11, %r11 ; \
413
+ movl $38, %eax ; \
414
+ cmovncq %rcx, %rax ; \
415
+ addq %rax, %r8 ; \
416
+ adcq %rcx, %r9 ; \
417
+ adcq %rcx, %r10 ; \
418
+ adcq %rcx, %r11 ; \
419
+ movq %r8, P0 ; \
420
+ movq %r9, 0x8+P0 ; \
421
+ movq %r10, 0x10+P0 ; \
422
+ movq %r11, 0x18+P0
423
+
424
+ // Load the constant k_25519 = 2 * d_25519 using immediate operations
425
+
426
+ #define load_k25519(P0) \
427
+ movq $0xebd69b9426b2f159, %rax ; \
428
+ movq %rax, P0 ; \
429
+ movq $0x00e0149a8283b156, %rax ; \
430
+ movq %rax, 8+P0 ; \
431
+ movq $0x198e80f2eef3d130, %rax ; \
432
+ movq %rax, 16+P0 ; \
433
+ movq $0x2406d9dc56dffce7, %rax ; \
434
+ movq %rax, 24+P0
435
+
436
+ S2N_BN_SYMBOL(edwards25519_scalarmuldouble):
437
+
438
+ // In this case the Windows form literally makes a subroutine call.
439
+ // This avoids hassle arising from keeping code and data together.
440
+
441
+ #if WINDOWS_ABI
442
+ pushq %rdi
443
+ pushq %rsi
444
+ movq %rcx, %rdi
445
+ movq %rdx, %rsi
446
+ movq %r8, %rdx
447
+ movq %r9, %rcx
448
+ callq edwards25519_scalarmuldouble_standard
449
+ popq %rsi
450
+ popq %rdi
451
+ ret
452
+
453
+ edwards25519_scalarmuldouble_standard:
454
+ #endif
455
+
456
+ // Save registers, make room for temps, preserve input arguments.
457
+
458
+ pushq %rbx
459
+ pushq %rbp
460
+ pushq %r12
461
+ pushq %r13
462
+ pushq %r14
463
+ pushq %r15
464
+ subq $NSPACE, %rsp
465
+
466
+ // Move the output pointer to a stable place
467
+
468
+ movq %rdi, res
469
+
470
+ // Copy scalars while recoding all 4-bit nybbles except the top
471
+ // one (bits 252..255) into signed 4-bit digits. This is essentially
472
+ // done just by adding the recoding constant 0x0888..888, after
473
+ // which all digits except the first have an implicit bias of -8,
474
+ // so 0 -> -8, 1 -> -7, ... 7 -> -1, 8 -> 0, 9 -> 1, ... 15 -> 7.
475
+ // (We could literally create 2s complement signed nybbles by
476
+ // XORing with the same constant 0x0888..888 afterwards, but it
477
+ // doesn't seem to make the end usage any simpler.)
478
+ //
479
+ // In order to ensure that the unrecoded top nybble (bits 252..255)
480
+ // does not become > 8 as a result of carries lower down from the
481
+ // recoding, we first (conceptually) subtract the group order iff
482
+ // the top digit of the scalar is > 2^63. In the implementation the
483
+ // reduction and recoding are combined by optionally using the
484
+ // modified recoding constant 0x0888...888 + (2^256 - group_order).
485
+
486
+ movq (%rcx), %r8
487
+ movq 8(%rcx), %r9
488
+ movq 16(%rcx), %r10
489
+ movq 24(%rcx), %r11
490
+ movq $0xc7f56fb5a0d9e920, %r12
491
+ movq $0xe190b99370cba1d5, %r13
492
+ movq $0x8888888888888887, %r14
493
+ movq $0x8888888888888888, %r15
494
+ movq $0x8000000000000000, %rax
495
+ movq $0x0888888888888888, %rbx
496
+ cmpq %r11, %rax
497
+ cmovncq %r15, %r12
498
+ cmovncq %r15, %r13
499
+ cmovncq %r15, %r14
500
+ cmovncq %rbx, %r15
501
+ addq %r12, %r8
502
+ adcq %r13, %r9
503
+ adcq %r14, %r10
504
+ adcq %r15, %r11
505
+ movq %r8, BSCALAR(%rsp)
506
+ movq %r9, BSCALAR+8(%rsp)
507
+ movq %r10, BSCALAR+16(%rsp)
508
+ movq %r11, BSCALAR+24(%rsp)
509
+
510
+ movq (%rsi), %r8
511
+ movq 8(%rsi), %r9
512
+ movq 16(%rsi), %r10
513
+ movq 24(%rsi), %r11
514
+ movq $0xc7f56fb5a0d9e920, %r12
515
+ movq $0xe190b99370cba1d5, %r13
516
+ movq $0x8888888888888887, %r14
517
+ movq $0x8888888888888888, %r15
518
+ movq $0x8000000000000000, %rax
519
+ movq $0x0888888888888888, %rbx
520
+ cmpq %r11, %rax
521
+ cmovncq %r15, %r12
522
+ cmovncq %r15, %r13
523
+ cmovncq %r15, %r14
524
+ cmovncq %rbx, %r15
525
+ addq %r12, %r8
526
+ adcq %r13, %r9
527
+ adcq %r14, %r10
528
+ adcq %r15, %r11
529
+ movq %r8, SCALAR(%rsp)
530
+ movq %r9, SCALAR+8(%rsp)
531
+ movq %r10, SCALAR+16(%rsp)
532
+ movq %r11, SCALAR+24(%rsp)
533
+
534
+ // Create table of multiples 1..8 of the general input point at "tab".
535
+ // Reduce the input coordinates x and y modulo 2^256 - 38 first, for the
536
+ // sake of definiteness; this is the reduction that will be maintained.
537
+ // We could slightly optimize the additions because we know the input
538
+ // point is affine (so Z = 1), but it doesn't seem worth the complication.
539
+
540
+ movl $38, %eax
541
+ movq (%rdx), %r8
542
+ xorl %ebx, %ebx
543
+ movq 8(%rdx), %r9
544
+ xorl %ecx, %ecx
545
+ movq 16(%rdx), %r10
546
+ xorl %esi, %esi
547
+ movq 24(%rdx), %r11
548
+ addq %r8, %rax
549
+ adcq %r9, %rbx
550
+ adcq %r10, %rcx
551
+ adcq %r11, %rsi
552
+ cmovncq %r8, %rax
553
+ movq %rax, TAB(%rsp)
554
+ cmovncq %r9, %rbx
555
+ movq %rbx, TAB+8(%rsp)
556
+ cmovncq %r10, %rcx
557
+ movq %rcx, TAB+16(%rsp)
558
+ cmovncq %r11, %rsi
559
+ movq %rsi, TAB+24(%rsp)
560
+
561
+ movl $38, %eax
562
+ movq 32(%rdx), %r8
563
+ xorl %ebx, %ebx
564
+ movq 40(%rdx), %r9
565
+ xorl %ecx, %ecx
566
+ movq 48(%rdx), %r10
567
+ xorl %esi, %esi
568
+ movq 56(%rdx), %r11
569
+ addq %r8, %rax
570
+ adcq %r9, %rbx
571
+ adcq %r10, %rcx
572
+ adcq %r11, %rsi
573
+ cmovncq %r8, %rax
574
+ movq %rax, TAB+32(%rsp)
575
+ cmovncq %r9, %rbx
576
+ movq %rbx, TAB+40(%rsp)
577
+ cmovncq %r10, %rcx
578
+ movq %rcx, TAB+48(%rsp)
579
+ cmovncq %r11, %rsi
580
+ movq %rsi, TAB+56(%rsp)
581
+
582
+ movl $1, %eax
583
+ movq %rax, TAB+64(%rsp)
584
+ xorl %eax, %eax
585
+ movq %rax, TAB+72(%rsp)
586
+ movq %rax, TAB+80(%rsp)
587
+ movq %rax, TAB+88(%rsp)
588
+
589
+ leaq TAB+96(%rsp), %rdi
590
+ leaq TAB(%rsp), %rsi
591
+ leaq TAB+32(%rsp), %rbp
592
+ mul_4(x_0,x_1,x_2)
593
+
594
+ // Multiple 2
595
+
596
+ leaq TAB+1*128(%rsp), %rdi
597
+ leaq TAB(%rsp), %rsi
598
+ callq edwards25519_scalarmuldouble_epdouble
599
+
600
+ // Multiple 3
601
+
602
+ leaq TAB+2*128(%rsp), %rdi
603
+ leaq TAB(%rsp), %rsi
604
+ leaq TAB+1*128(%rsp), %rbp
605
+ callq edwards25519_scalarmuldouble_epadd
606
+
607
+ // Multiple 4
608
+
609
+ leaq TAB+3*128(%rsp), %rdi
610
+ leaq TAB+1*128(%rsp), %rsi
611
+ callq edwards25519_scalarmuldouble_epdouble
612
+
613
+ // Multiple 5
614
+
615
+ leaq TAB+4*128(%rsp), %rdi
616
+ leaq TAB(%rsp), %rsi
617
+ leaq TAB+3*128(%rsp), %rbp
618
+ callq edwards25519_scalarmuldouble_epadd
619
+
620
+ // Multiple 6
621
+
622
+ leaq TAB+5*128(%rsp), %rdi
623
+ leaq TAB+2*128(%rsp), %rsi
624
+ callq edwards25519_scalarmuldouble_epdouble
625
+
626
+ // Multiple 7
627
+
628
+ leaq TAB+6*128(%rsp), %rdi
629
+ leaq TAB(%rsp), %rsi
630
+ leaq TAB+5*128(%rsp), %rbp
631
+ callq edwards25519_scalarmuldouble_epadd
632
+
633
+ // Multiple 8
634
+
635
+ leaq TAB+7*128(%rsp), %rdi
636
+ leaq TAB+3*128(%rsp), %rsi
637
+ callq edwards25519_scalarmuldouble_epdouble
638
+
639
+ // Handle the initialization, starting the loop counter at i = 252
640
+ // and initializing acc to the sum of the table entries for the
641
+ // top nybbles of the scalars (the ones with no implicit -8 bias).
642
+
643
+ movq $252, %rax
644
+ movq %rax, i
645
+
646
+ // Index for btable entry...
647
+
648
+ movq BSCALAR+24(%rsp), %rax
649
+ shrq $60, %rax
650
+ movq %rax, bf
651
+
652
+ // ...and constant-time indexing based on that index
653
+
654
+ movl $1, %eax
655
+ xorl %ebx, %ebx
656
+ xorl %ecx, %ecx
657
+ xorl %edx, %edx
658
+ movl $1, %r8d
659
+ xorl %r9d, %r9d
660
+ xorl %r10d, %r10d
661
+ xorl %r11d, %r11d
662
+ xorl %r12d, %r12d
663
+ xorl %r13d, %r13d
664
+ xorl %r14d, %r14d
665
+ xorl %r15d, %r15d
666
+
667
+ leaq edwards25519_scalarmuldouble_table(%rip), %rbp
668
+
669
+ cmpq $1, bf
670
+ movq (%rbp), %rsi
671
+ cmovzq %rsi, %rax
672
+ movq 8(%rbp), %rsi
673
+ cmovzq %rsi, %rbx
674
+ movq 16(%rbp), %rsi
675
+ cmovzq %rsi, %rcx
676
+ movq 24(%rbp), %rsi
677
+ cmovzq %rsi, %rdx
678
+ movq 32(%rbp), %rsi
679
+ cmovzq %rsi, %r8
680
+ movq 40(%rbp), %rsi
681
+ cmovzq %rsi, %r9
682
+ movq 48(%rbp), %rsi
683
+ cmovzq %rsi, %r10
684
+ movq 56(%rbp), %rsi
685
+ cmovzq %rsi, %r11
686
+ movq 64(%rbp), %rsi
687
+ cmovzq %rsi, %r12
688
+ movq 72(%rbp), %rsi
689
+ cmovzq %rsi, %r13
690
+ movq 80(%rbp), %rsi
691
+ cmovzq %rsi, %r14
692
+ movq 88(%rbp), %rsi
693
+ cmovzq %rsi, %r15
694
+ addq $96, %rbp
695
+
696
+ cmpq $2, bf
697
+ movq (%rbp), %rsi
698
+ cmovzq %rsi, %rax
699
+ movq 8(%rbp), %rsi
700
+ cmovzq %rsi, %rbx
701
+ movq 16(%rbp), %rsi
702
+ cmovzq %rsi, %rcx
703
+ movq 24(%rbp), %rsi
704
+ cmovzq %rsi, %rdx
705
+ movq 32(%rbp), %rsi
706
+ cmovzq %rsi, %r8
707
+ movq 40(%rbp), %rsi
708
+ cmovzq %rsi, %r9
709
+ movq 48(%rbp), %rsi
710
+ cmovzq %rsi, %r10
711
+ movq 56(%rbp), %rsi
712
+ cmovzq %rsi, %r11
713
+ movq 64(%rbp), %rsi
714
+ cmovzq %rsi, %r12
715
+ movq 72(%rbp), %rsi
716
+ cmovzq %rsi, %r13
717
+ movq 80(%rbp), %rsi
718
+ cmovzq %rsi, %r14
719
+ movq 88(%rbp), %rsi
720
+ cmovzq %rsi, %r15
721
+ addq $96, %rbp
722
+
723
+ cmpq $3, bf
724
+ movq (%rbp), %rsi
725
+ cmovzq %rsi, %rax
726
+ movq 8(%rbp), %rsi
727
+ cmovzq %rsi, %rbx
728
+ movq 16(%rbp), %rsi
729
+ cmovzq %rsi, %rcx
730
+ movq 24(%rbp), %rsi
731
+ cmovzq %rsi, %rdx
732
+ movq 32(%rbp), %rsi
733
+ cmovzq %rsi, %r8
734
+ movq 40(%rbp), %rsi
735
+ cmovzq %rsi, %r9
736
+ movq 48(%rbp), %rsi
737
+ cmovzq %rsi, %r10
738
+ movq 56(%rbp), %rsi
739
+ cmovzq %rsi, %r11
740
+ movq 64(%rbp), %rsi
741
+ cmovzq %rsi, %r12
742
+ movq 72(%rbp), %rsi
743
+ cmovzq %rsi, %r13
744
+ movq 80(%rbp), %rsi
745
+ cmovzq %rsi, %r14
746
+ movq 88(%rbp), %rsi
747
+ cmovzq %rsi, %r15
748
+ addq $96, %rbp
749
+
750
+ cmpq $4, bf
751
+ movq (%rbp), %rsi
752
+ cmovzq %rsi, %rax
753
+ movq 8(%rbp), %rsi
754
+ cmovzq %rsi, %rbx
755
+ movq 16(%rbp), %rsi
756
+ cmovzq %rsi, %rcx
757
+ movq 24(%rbp), %rsi
758
+ cmovzq %rsi, %rdx
759
+ movq 32(%rbp), %rsi
760
+ cmovzq %rsi, %r8
761
+ movq 40(%rbp), %rsi
762
+ cmovzq %rsi, %r9
763
+ movq 48(%rbp), %rsi
764
+ cmovzq %rsi, %r10
765
+ movq 56(%rbp), %rsi
766
+ cmovzq %rsi, %r11
767
+ movq 64(%rbp), %rsi
768
+ cmovzq %rsi, %r12
769
+ movq 72(%rbp), %rsi
770
+ cmovzq %rsi, %r13
771
+ movq 80(%rbp), %rsi
772
+ cmovzq %rsi, %r14
773
+ movq 88(%rbp), %rsi
774
+ cmovzq %rsi, %r15
775
+ addq $96, %rbp
776
+
777
+ cmpq $5, bf
778
+ movq (%rbp), %rsi
779
+ cmovzq %rsi, %rax
780
+ movq 8(%rbp), %rsi
781
+ cmovzq %rsi, %rbx
782
+ movq 16(%rbp), %rsi
783
+ cmovzq %rsi, %rcx
784
+ movq 24(%rbp), %rsi
785
+ cmovzq %rsi, %rdx
786
+ movq 32(%rbp), %rsi
787
+ cmovzq %rsi, %r8
788
+ movq 40(%rbp), %rsi
789
+ cmovzq %rsi, %r9
790
+ movq 48(%rbp), %rsi
791
+ cmovzq %rsi, %r10
792
+ movq 56(%rbp), %rsi
793
+ cmovzq %rsi, %r11
794
+ movq 64(%rbp), %rsi
795
+ cmovzq %rsi, %r12
796
+ movq 72(%rbp), %rsi
797
+ cmovzq %rsi, %r13
798
+ movq 80(%rbp), %rsi
799
+ cmovzq %rsi, %r14
800
+ movq 88(%rbp), %rsi
801
+ cmovzq %rsi, %r15
802
+ addq $96, %rbp
803
+
804
+ cmpq $6, bf
805
+ movq (%rbp), %rsi
806
+ cmovzq %rsi, %rax
807
+ movq 8(%rbp), %rsi
808
+ cmovzq %rsi, %rbx
809
+ movq 16(%rbp), %rsi
810
+ cmovzq %rsi, %rcx
811
+ movq 24(%rbp), %rsi
812
+ cmovzq %rsi, %rdx
813
+ movq 32(%rbp), %rsi
814
+ cmovzq %rsi, %r8
815
+ movq 40(%rbp), %rsi
816
+ cmovzq %rsi, %r9
817
+ movq 48(%rbp), %rsi
818
+ cmovzq %rsi, %r10
819
+ movq 56(%rbp), %rsi
820
+ cmovzq %rsi, %r11
821
+ movq 64(%rbp), %rsi
822
+ cmovzq %rsi, %r12
823
+ movq 72(%rbp), %rsi
824
+ cmovzq %rsi, %r13
825
+ movq 80(%rbp), %rsi
826
+ cmovzq %rsi, %r14
827
+ movq 88(%rbp), %rsi
828
+ cmovzq %rsi, %r15
829
+ addq $96, %rbp
830
+
831
+ cmpq $7, bf
832
+ movq (%rbp), %rsi
833
+ cmovzq %rsi, %rax
834
+ movq 8(%rbp), %rsi
835
+ cmovzq %rsi, %rbx
836
+ movq 16(%rbp), %rsi
837
+ cmovzq %rsi, %rcx
838
+ movq 24(%rbp), %rsi
839
+ cmovzq %rsi, %rdx
840
+ movq 32(%rbp), %rsi
841
+ cmovzq %rsi, %r8
842
+ movq 40(%rbp), %rsi
843
+ cmovzq %rsi, %r9
844
+ movq 48(%rbp), %rsi
845
+ cmovzq %rsi, %r10
846
+ movq 56(%rbp), %rsi
847
+ cmovzq %rsi, %r11
848
+ movq 64(%rbp), %rsi
849
+ cmovzq %rsi, %r12
850
+ movq 72(%rbp), %rsi
851
+ cmovzq %rsi, %r13
852
+ movq 80(%rbp), %rsi
853
+ cmovzq %rsi, %r14
854
+ movq 88(%rbp), %rsi
855
+ cmovzq %rsi, %r15
856
+ addq $96, %rbp
857
+
858
+ cmpq $8, bf
859
+ movq (%rbp), %rsi
860
+ cmovzq %rsi, %rax
861
+ movq 8(%rbp), %rsi
862
+ cmovzq %rsi, %rbx
863
+ movq 16(%rbp), %rsi
864
+ cmovzq %rsi, %rcx
865
+ movq 24(%rbp), %rsi
866
+ cmovzq %rsi, %rdx
867
+ movq 32(%rbp), %rsi
868
+ cmovzq %rsi, %r8
869
+ movq 40(%rbp), %rsi
870
+ cmovzq %rsi, %r9
871
+ movq 48(%rbp), %rsi
872
+ cmovzq %rsi, %r10
873
+ movq 56(%rbp), %rsi
874
+ cmovzq %rsi, %r11
875
+ movq 64(%rbp), %rsi
876
+ cmovzq %rsi, %r12
877
+ movq 72(%rbp), %rsi
878
+ cmovzq %rsi, %r13
879
+ movq 80(%rbp), %rsi
880
+ cmovzq %rsi, %r14
881
+ movq 88(%rbp), %rsi
882
+ cmovzq %rsi, %r15
883
+
884
+ movq %rax, BTABENT(%rsp)
885
+ movq %rbx, BTABENT+8(%rsp)
886
+ movq %rcx, BTABENT+16(%rsp)
887
+ movq %rdx, BTABENT+24(%rsp)
888
+ movq %r8, BTABENT+32(%rsp)
889
+ movq %r9, BTABENT+40(%rsp)
890
+ movq %r10, BTABENT+48(%rsp)
891
+ movq %r11, BTABENT+56(%rsp)
892
+ movq %r12, BTABENT+64(%rsp)
893
+ movq %r13, BTABENT+72(%rsp)
894
+ movq %r14, BTABENT+80(%rsp)
895
+ movq %r15, BTABENT+88(%rsp)
896
+
897
+ // Index for table entry...
898
+
899
+ movq SCALAR+24(%rsp), %rax
900
+ shrq $60, %rax
901
+ movq %rax, bf
902
+
903
+ // ...and constant-time indexing based on that index.
904
+ // Do the Y and Z fields first, to save on registers...
905
+
906
+ movl $1, %eax
907
+ xorl %ebx, %ebx
908
+ xorl %ecx, %ecx
909
+ xorl %edx, %edx
910
+ movl $1, %r8d
911
+ xorl %r9d, %r9d
912
+ xorl %r10d, %r10d
913
+ xorl %r11d, %r11d
914
+
915
+ leaq TAB+32(%rsp), %rbp
916
+
917
+ cmpq $1, bf
918
+ movq (%rbp), %rsi
919
+ cmovzq %rsi, %rax
920
+ movq 8(%rbp), %rsi
921
+ cmovzq %rsi, %rbx
922
+ movq 16(%rbp), %rsi
923
+ cmovzq %rsi, %rcx
924
+ movq 24(%rbp), %rsi
925
+ cmovzq %rsi, %rdx
926
+ movq 32(%rbp), %rsi
927
+ cmovzq %rsi, %r8
928
+ movq 40(%rbp), %rsi
929
+ cmovzq %rsi, %r9
930
+ movq 48(%rbp), %rsi
931
+ cmovzq %rsi, %r10
932
+ movq 56(%rbp), %rsi
933
+ cmovzq %rsi, %r11
934
+ addq $128, %rbp
935
+
936
+ cmpq $2, bf
937
+ movq (%rbp), %rsi
938
+ cmovzq %rsi, %rax
939
+ movq 8(%rbp), %rsi
940
+ cmovzq %rsi, %rbx
941
+ movq 16(%rbp), %rsi
942
+ cmovzq %rsi, %rcx
943
+ movq 24(%rbp), %rsi
944
+ cmovzq %rsi, %rdx
945
+ movq 32(%rbp), %rsi
946
+ cmovzq %rsi, %r8
947
+ movq 40(%rbp), %rsi
948
+ cmovzq %rsi, %r9
949
+ movq 48(%rbp), %rsi
950
+ cmovzq %rsi, %r10
951
+ movq 56(%rbp), %rsi
952
+ cmovzq %rsi, %r11
953
+ addq $128, %rbp
954
+
955
+ cmpq $3, bf
956
+ movq (%rbp), %rsi
957
+ cmovzq %rsi, %rax
958
+ movq 8(%rbp), %rsi
959
+ cmovzq %rsi, %rbx
960
+ movq 16(%rbp), %rsi
961
+ cmovzq %rsi, %rcx
962
+ movq 24(%rbp), %rsi
963
+ cmovzq %rsi, %rdx
964
+ movq 32(%rbp), %rsi
965
+ cmovzq %rsi, %r8
966
+ movq 40(%rbp), %rsi
967
+ cmovzq %rsi, %r9
968
+ movq 48(%rbp), %rsi
969
+ cmovzq %rsi, %r10
970
+ movq 56(%rbp), %rsi
971
+ cmovzq %rsi, %r11
972
+ addq $128, %rbp
973
+
974
+ cmpq $4, bf
975
+ movq (%rbp), %rsi
976
+ cmovzq %rsi, %rax
977
+ movq 8(%rbp), %rsi
978
+ cmovzq %rsi, %rbx
979
+ movq 16(%rbp), %rsi
980
+ cmovzq %rsi, %rcx
981
+ movq 24(%rbp), %rsi
982
+ cmovzq %rsi, %rdx
983
+ movq 32(%rbp), %rsi
984
+ cmovzq %rsi, %r8
985
+ movq 40(%rbp), %rsi
986
+ cmovzq %rsi, %r9
987
+ movq 48(%rbp), %rsi
988
+ cmovzq %rsi, %r10
989
+ movq 56(%rbp), %rsi
990
+ cmovzq %rsi, %r11
991
+ addq $128, %rbp
992
+
993
+ cmpq $5, bf
994
+ movq (%rbp), %rsi
995
+ cmovzq %rsi, %rax
996
+ movq 8(%rbp), %rsi
997
+ cmovzq %rsi, %rbx
998
+ movq 16(%rbp), %rsi
999
+ cmovzq %rsi, %rcx
1000
+ movq 24(%rbp), %rsi
1001
+ cmovzq %rsi, %rdx
1002
+ movq 32(%rbp), %rsi
1003
+ cmovzq %rsi, %r8
1004
+ movq 40(%rbp), %rsi
1005
+ cmovzq %rsi, %r9
1006
+ movq 48(%rbp), %rsi
1007
+ cmovzq %rsi, %r10
1008
+ movq 56(%rbp), %rsi
1009
+ cmovzq %rsi, %r11
1010
+ addq $128, %rbp
1011
+
1012
+ cmpq $6, bf
1013
+ movq (%rbp), %rsi
1014
+ cmovzq %rsi, %rax
1015
+ movq 8(%rbp), %rsi
1016
+ cmovzq %rsi, %rbx
1017
+ movq 16(%rbp), %rsi
1018
+ cmovzq %rsi, %rcx
1019
+ movq 24(%rbp), %rsi
1020
+ cmovzq %rsi, %rdx
1021
+ movq 32(%rbp), %rsi
1022
+ cmovzq %rsi, %r8
1023
+ movq 40(%rbp), %rsi
1024
+ cmovzq %rsi, %r9
1025
+ movq 48(%rbp), %rsi
1026
+ cmovzq %rsi, %r10
1027
+ movq 56(%rbp), %rsi
1028
+ cmovzq %rsi, %r11
1029
+ addq $128, %rbp
1030
+
1031
+ cmpq $7, bf
1032
+ movq (%rbp), %rsi
1033
+ cmovzq %rsi, %rax
1034
+ movq 8(%rbp), %rsi
1035
+ cmovzq %rsi, %rbx
1036
+ movq 16(%rbp), %rsi
1037
+ cmovzq %rsi, %rcx
1038
+ movq 24(%rbp), %rsi
1039
+ cmovzq %rsi, %rdx
1040
+ movq 32(%rbp), %rsi
1041
+ cmovzq %rsi, %r8
1042
+ movq 40(%rbp), %rsi
1043
+ cmovzq %rsi, %r9
1044
+ movq 48(%rbp), %rsi
1045
+ cmovzq %rsi, %r10
1046
+ movq 56(%rbp), %rsi
1047
+ cmovzq %rsi, %r11
1048
+ addq $128, %rbp
1049
+
1050
+ cmpq $8, bf
1051
+ movq (%rbp), %rsi
1052
+ cmovzq %rsi, %rax
1053
+ movq 8(%rbp), %rsi
1054
+ cmovzq %rsi, %rbx
1055
+ movq 16(%rbp), %rsi
1056
+ cmovzq %rsi, %rcx
1057
+ movq 24(%rbp), %rsi
1058
+ cmovzq %rsi, %rdx
1059
+ movq 32(%rbp), %rsi
1060
+ cmovzq %rsi, %r8
1061
+ movq 40(%rbp), %rsi
1062
+ cmovzq %rsi, %r9
1063
+ movq 48(%rbp), %rsi
1064
+ cmovzq %rsi, %r10
1065
+ movq 56(%rbp), %rsi
1066
+ cmovzq %rsi, %r11
1067
+
1068
+ movq %rax, TABENT+32(%rsp)
1069
+ movq %rbx, TABENT+40(%rsp)
1070
+ movq %rcx, TABENT+48(%rsp)
1071
+ movq %rdx, TABENT+56(%rsp)
1072
+ movq %r8, TABENT+64(%rsp)
1073
+ movq %r9, TABENT+72(%rsp)
1074
+ movq %r10, TABENT+80(%rsp)
1075
+ movq %r11, TABENT+88(%rsp)
1076
+
1077
+ // ...followed by the X and W fields
1078
+
1079
+ leaq TAB(%rsp), %rbp
1080
+
1081
+ xorl %eax, %eax
1082
+ xorl %ebx, %ebx
1083
+ xorl %ecx, %ecx
1084
+ xorl %edx, %edx
1085
+ xorl %r8d, %r8d
1086
+ xorl %r9d, %r9d
1087
+ xorl %r10d, %r10d
1088
+ xorl %r11d, %r11d
1089
+
1090
+ cmpq $1, bf
1091
+ movq (%rbp), %rsi
1092
+ cmovzq %rsi, %rax
1093
+ movq 8(%rbp), %rsi
1094
+ cmovzq %rsi, %rbx
1095
+ movq 16(%rbp), %rsi
1096
+ cmovzq %rsi, %rcx
1097
+ movq 24(%rbp), %rsi
1098
+ cmovzq %rsi, %rdx
1099
+ movq 96(%rbp), %rsi
1100
+ cmovzq %rsi, %r8
1101
+ movq 104(%rbp), %rsi
1102
+ cmovzq %rsi, %r9
1103
+ movq 112(%rbp), %rsi
1104
+ cmovzq %rsi, %r10
1105
+ movq 120(%rbp), %rsi
1106
+ cmovzq %rsi, %r11
1107
+ addq $128, %rbp
1108
+
1109
+ cmpq $2, bf
1110
+ movq (%rbp), %rsi
1111
+ cmovzq %rsi, %rax
1112
+ movq 8(%rbp), %rsi
1113
+ cmovzq %rsi, %rbx
1114
+ movq 16(%rbp), %rsi
1115
+ cmovzq %rsi, %rcx
1116
+ movq 24(%rbp), %rsi
1117
+ cmovzq %rsi, %rdx
1118
+ movq 96(%rbp), %rsi
1119
+ cmovzq %rsi, %r8
1120
+ movq 104(%rbp), %rsi
1121
+ cmovzq %rsi, %r9
1122
+ movq 112(%rbp), %rsi
1123
+ cmovzq %rsi, %r10
1124
+ movq 120(%rbp), %rsi
1125
+ cmovzq %rsi, %r11
1126
+ addq $128, %rbp
1127
+
1128
+ cmpq $3, bf
1129
+ movq (%rbp), %rsi
1130
+ cmovzq %rsi, %rax
1131
+ movq 8(%rbp), %rsi
1132
+ cmovzq %rsi, %rbx
1133
+ movq 16(%rbp), %rsi
1134
+ cmovzq %rsi, %rcx
1135
+ movq 24(%rbp), %rsi
1136
+ cmovzq %rsi, %rdx
1137
+ movq 96(%rbp), %rsi
1138
+ cmovzq %rsi, %r8
1139
+ movq 104(%rbp), %rsi
1140
+ cmovzq %rsi, %r9
1141
+ movq 112(%rbp), %rsi
1142
+ cmovzq %rsi, %r10
1143
+ movq 120(%rbp), %rsi
1144
+ cmovzq %rsi, %r11
1145
+ addq $128, %rbp
1146
+
1147
+ cmpq $4, bf
1148
+ movq (%rbp), %rsi
1149
+ cmovzq %rsi, %rax
1150
+ movq 8(%rbp), %rsi
1151
+ cmovzq %rsi, %rbx
1152
+ movq 16(%rbp), %rsi
1153
+ cmovzq %rsi, %rcx
1154
+ movq 24(%rbp), %rsi
1155
+ cmovzq %rsi, %rdx
1156
+ movq 96(%rbp), %rsi
1157
+ cmovzq %rsi, %r8
1158
+ movq 104(%rbp), %rsi
1159
+ cmovzq %rsi, %r9
1160
+ movq 112(%rbp), %rsi
1161
+ cmovzq %rsi, %r10
1162
+ movq 120(%rbp), %rsi
1163
+ cmovzq %rsi, %r11
1164
+ addq $128, %rbp
1165
+
1166
+ cmpq $5, bf
1167
+ movq (%rbp), %rsi
1168
+ cmovzq %rsi, %rax
1169
+ movq 8(%rbp), %rsi
1170
+ cmovzq %rsi, %rbx
1171
+ movq 16(%rbp), %rsi
1172
+ cmovzq %rsi, %rcx
1173
+ movq 24(%rbp), %rsi
1174
+ cmovzq %rsi, %rdx
1175
+ movq 96(%rbp), %rsi
1176
+ cmovzq %rsi, %r8
1177
+ movq 104(%rbp), %rsi
1178
+ cmovzq %rsi, %r9
1179
+ movq 112(%rbp), %rsi
1180
+ cmovzq %rsi, %r10
1181
+ movq 120(%rbp), %rsi
1182
+ cmovzq %rsi, %r11
1183
+ addq $128, %rbp
1184
+
1185
+ cmpq $6, bf
1186
+ movq (%rbp), %rsi
1187
+ cmovzq %rsi, %rax
1188
+ movq 8(%rbp), %rsi
1189
+ cmovzq %rsi, %rbx
1190
+ movq 16(%rbp), %rsi
1191
+ cmovzq %rsi, %rcx
1192
+ movq 24(%rbp), %rsi
1193
+ cmovzq %rsi, %rdx
1194
+ movq 96(%rbp), %rsi
1195
+ cmovzq %rsi, %r8
1196
+ movq 104(%rbp), %rsi
1197
+ cmovzq %rsi, %r9
1198
+ movq 112(%rbp), %rsi
1199
+ cmovzq %rsi, %r10
1200
+ movq 120(%rbp), %rsi
1201
+ cmovzq %rsi, %r11
1202
+ addq $128, %rbp
1203
+
1204
+ cmpq $7, bf
1205
+ movq (%rbp), %rsi
1206
+ cmovzq %rsi, %rax
1207
+ movq 8(%rbp), %rsi
1208
+ cmovzq %rsi, %rbx
1209
+ movq 16(%rbp), %rsi
1210
+ cmovzq %rsi, %rcx
1211
+ movq 24(%rbp), %rsi
1212
+ cmovzq %rsi, %rdx
1213
+ movq 96(%rbp), %rsi
1214
+ cmovzq %rsi, %r8
1215
+ movq 104(%rbp), %rsi
1216
+ cmovzq %rsi, %r9
1217
+ movq 112(%rbp), %rsi
1218
+ cmovzq %rsi, %r10
1219
+ movq 120(%rbp), %rsi
1220
+ cmovzq %rsi, %r11
1221
+ addq $128, %rbp
1222
+
1223
+ cmpq $8, bf
1224
+ movq (%rbp), %rsi
1225
+ cmovzq %rsi, %rax
1226
+ movq 8(%rbp), %rsi
1227
+ cmovzq %rsi, %rbx
1228
+ movq 16(%rbp), %rsi
1229
+ cmovzq %rsi, %rcx
1230
+ movq 24(%rbp), %rsi
1231
+ cmovzq %rsi, %rdx
1232
+ movq 96(%rbp), %rsi
1233
+ cmovzq %rsi, %r8
1234
+ movq 104(%rbp), %rsi
1235
+ cmovzq %rsi, %r9
1236
+ movq 112(%rbp), %rsi
1237
+ cmovzq %rsi, %r10
1238
+ movq 120(%rbp), %rsi
1239
+ cmovzq %rsi, %r11
1240
+
1241
+ movq %rax, TABENT(%rsp)
1242
+ movq %rbx, TABENT+8(%rsp)
1243
+ movq %rcx, TABENT+16(%rsp)
1244
+ movq %rdx, TABENT+24(%rsp)
1245
+ movq %r8, TABENT+96(%rsp)
1246
+ movq %r9, TABENT+104(%rsp)
1247
+ movq %r10, TABENT+112(%rsp)
1248
+ movq %r11, TABENT+120(%rsp)
1249
+
1250
+ // Add those elements to initialize the accumulator for bit position 252
1251
+
1252
+ leaq ACC(%rsp), %rdi
1253
+ leaq TABENT(%rsp), %rsi
1254
+ leaq BTABENT(%rsp), %rbp
1255
+ callq edwards25519_scalarmuldouble_pepadd
1256
+
1257
+ // Main loop with acc = [scalar/2^i] * point + [bscalar/2^i] * basepoint
1258
+ // Start with i = 252 for bits 248..251 and go down four at a time to 3..0
1259
+
1260
+ edwards25519_scalarmuldouble_loop:
1261
+
1262
+ movq i, %rax
1263
+ subq $4, %rax
1264
+ movq %rax, i
1265
+
1266
+ // Double to acc' = 2 * acc
1267
+
1268
+ leaq ACC(%rsp), %rdi
1269
+ leaq ACC(%rsp), %rsi
1270
+ callq edwards25519_scalarmuldouble_pdouble
1271
+
1272
+ // Get btable entry, first getting the adjusted bitfield...
1273
+
1274
+ movq i, %rax
1275
+ movq %rax, %rcx
1276
+ shrq $6, %rax
1277
+ movq 32(%rsp,%rax,8), %rax
1278
+ shrq %cl, %rax
1279
+ andq $15, %rax
1280
+
1281
+ subq $8, %rax
1282
+ sbbq %rcx, %rcx
1283
+ xorq %rcx, %rax
1284
+ subq %rcx, %rax
1285
+ movq %rcx, cf
1286
+ movq %rax, bf
1287
+
1288
+ // ... then doing constant-time lookup with the appropriate index...
1289
+
1290
+ movl $1, %eax
1291
+ xorl %ebx, %ebx
1292
+ xorl %ecx, %ecx
1293
+ xorl %edx, %edx
1294
+ movl $1, %r8d
1295
+ xorl %r9d, %r9d
1296
+ xorl %r10d, %r10d
1297
+ xorl %r11d, %r11d
1298
+ xorl %r12d, %r12d
1299
+ xorl %r13d, %r13d
1300
+ xorl %r14d, %r14d
1301
+ xorl %r15d, %r15d
1302
+
1303
+ leaq edwards25519_scalarmuldouble_table(%rip), %rbp
1304
+
1305
+ cmpq $1, bf
1306
+ movq (%rbp), %rsi
1307
+ cmovzq %rsi, %rax
1308
+ movq 8(%rbp), %rsi
1309
+ cmovzq %rsi, %rbx
1310
+ movq 16(%rbp), %rsi
1311
+ cmovzq %rsi, %rcx
1312
+ movq 24(%rbp), %rsi
1313
+ cmovzq %rsi, %rdx
1314
+ movq 32(%rbp), %rsi
1315
+ cmovzq %rsi, %r8
1316
+ movq 40(%rbp), %rsi
1317
+ cmovzq %rsi, %r9
1318
+ movq 48(%rbp), %rsi
1319
+ cmovzq %rsi, %r10
1320
+ movq 56(%rbp), %rsi
1321
+ cmovzq %rsi, %r11
1322
+ movq 64(%rbp), %rsi
1323
+ cmovzq %rsi, %r12
1324
+ movq 72(%rbp), %rsi
1325
+ cmovzq %rsi, %r13
1326
+ movq 80(%rbp), %rsi
1327
+ cmovzq %rsi, %r14
1328
+ movq 88(%rbp), %rsi
1329
+ cmovzq %rsi, %r15
1330
+ addq $96, %rbp
1331
+
1332
+ cmpq $2, bf
1333
+ movq (%rbp), %rsi
1334
+ cmovzq %rsi, %rax
1335
+ movq 8(%rbp), %rsi
1336
+ cmovzq %rsi, %rbx
1337
+ movq 16(%rbp), %rsi
1338
+ cmovzq %rsi, %rcx
1339
+ movq 24(%rbp), %rsi
1340
+ cmovzq %rsi, %rdx
1341
+ movq 32(%rbp), %rsi
1342
+ cmovzq %rsi, %r8
1343
+ movq 40(%rbp), %rsi
1344
+ cmovzq %rsi, %r9
1345
+ movq 48(%rbp), %rsi
1346
+ cmovzq %rsi, %r10
1347
+ movq 56(%rbp), %rsi
1348
+ cmovzq %rsi, %r11
1349
+ movq 64(%rbp), %rsi
1350
+ cmovzq %rsi, %r12
1351
+ movq 72(%rbp), %rsi
1352
+ cmovzq %rsi, %r13
1353
+ movq 80(%rbp), %rsi
1354
+ cmovzq %rsi, %r14
1355
+ movq 88(%rbp), %rsi
1356
+ cmovzq %rsi, %r15
1357
+ addq $96, %rbp
1358
+
1359
+ cmpq $3, bf
1360
+ movq (%rbp), %rsi
1361
+ cmovzq %rsi, %rax
1362
+ movq 8(%rbp), %rsi
1363
+ cmovzq %rsi, %rbx
1364
+ movq 16(%rbp), %rsi
1365
+ cmovzq %rsi, %rcx
1366
+ movq 24(%rbp), %rsi
1367
+ cmovzq %rsi, %rdx
1368
+ movq 32(%rbp), %rsi
1369
+ cmovzq %rsi, %r8
1370
+ movq 40(%rbp), %rsi
1371
+ cmovzq %rsi, %r9
1372
+ movq 48(%rbp), %rsi
1373
+ cmovzq %rsi, %r10
1374
+ movq 56(%rbp), %rsi
1375
+ cmovzq %rsi, %r11
1376
+ movq 64(%rbp), %rsi
1377
+ cmovzq %rsi, %r12
1378
+ movq 72(%rbp), %rsi
1379
+ cmovzq %rsi, %r13
1380
+ movq 80(%rbp), %rsi
1381
+ cmovzq %rsi, %r14
1382
+ movq 88(%rbp), %rsi
1383
+ cmovzq %rsi, %r15
1384
+ addq $96, %rbp
1385
+
1386
+ cmpq $4, bf
1387
+ movq (%rbp), %rsi
1388
+ cmovzq %rsi, %rax
1389
+ movq 8(%rbp), %rsi
1390
+ cmovzq %rsi, %rbx
1391
+ movq 16(%rbp), %rsi
1392
+ cmovzq %rsi, %rcx
1393
+ movq 24(%rbp), %rsi
1394
+ cmovzq %rsi, %rdx
1395
+ movq 32(%rbp), %rsi
1396
+ cmovzq %rsi, %r8
1397
+ movq 40(%rbp), %rsi
1398
+ cmovzq %rsi, %r9
1399
+ movq 48(%rbp), %rsi
1400
+ cmovzq %rsi, %r10
1401
+ movq 56(%rbp), %rsi
1402
+ cmovzq %rsi, %r11
1403
+ movq 64(%rbp), %rsi
1404
+ cmovzq %rsi, %r12
1405
+ movq 72(%rbp), %rsi
1406
+ cmovzq %rsi, %r13
1407
+ movq 80(%rbp), %rsi
1408
+ cmovzq %rsi, %r14
1409
+ movq 88(%rbp), %rsi
1410
+ cmovzq %rsi, %r15
1411
+ addq $96, %rbp
1412
+
1413
+ cmpq $5, bf
1414
+ movq (%rbp), %rsi
1415
+ cmovzq %rsi, %rax
1416
+ movq 8(%rbp), %rsi
1417
+ cmovzq %rsi, %rbx
1418
+ movq 16(%rbp), %rsi
1419
+ cmovzq %rsi, %rcx
1420
+ movq 24(%rbp), %rsi
1421
+ cmovzq %rsi, %rdx
1422
+ movq 32(%rbp), %rsi
1423
+ cmovzq %rsi, %r8
1424
+ movq 40(%rbp), %rsi
1425
+ cmovzq %rsi, %r9
1426
+ movq 48(%rbp), %rsi
1427
+ cmovzq %rsi, %r10
1428
+ movq 56(%rbp), %rsi
1429
+ cmovzq %rsi, %r11
1430
+ movq 64(%rbp), %rsi
1431
+ cmovzq %rsi, %r12
1432
+ movq 72(%rbp), %rsi
1433
+ cmovzq %rsi, %r13
1434
+ movq 80(%rbp), %rsi
1435
+ cmovzq %rsi, %r14
1436
+ movq 88(%rbp), %rsi
1437
+ cmovzq %rsi, %r15
1438
+ addq $96, %rbp
1439
+
1440
+ cmpq $6, bf
1441
+ movq (%rbp), %rsi
1442
+ cmovzq %rsi, %rax
1443
+ movq 8(%rbp), %rsi
1444
+ cmovzq %rsi, %rbx
1445
+ movq 16(%rbp), %rsi
1446
+ cmovzq %rsi, %rcx
1447
+ movq 24(%rbp), %rsi
1448
+ cmovzq %rsi, %rdx
1449
+ movq 32(%rbp), %rsi
1450
+ cmovzq %rsi, %r8
1451
+ movq 40(%rbp), %rsi
1452
+ cmovzq %rsi, %r9
1453
+ movq 48(%rbp), %rsi
1454
+ cmovzq %rsi, %r10
1455
+ movq 56(%rbp), %rsi
1456
+ cmovzq %rsi, %r11
1457
+ movq 64(%rbp), %rsi
1458
+ cmovzq %rsi, %r12
1459
+ movq 72(%rbp), %rsi
1460
+ cmovzq %rsi, %r13
1461
+ movq 80(%rbp), %rsi
1462
+ cmovzq %rsi, %r14
1463
+ movq 88(%rbp), %rsi
1464
+ cmovzq %rsi, %r15
1465
+ addq $96, %rbp
1466
+
1467
+ cmpq $7, bf
1468
+ movq (%rbp), %rsi
1469
+ cmovzq %rsi, %rax
1470
+ movq 8(%rbp), %rsi
1471
+ cmovzq %rsi, %rbx
1472
+ movq 16(%rbp), %rsi
1473
+ cmovzq %rsi, %rcx
1474
+ movq 24(%rbp), %rsi
1475
+ cmovzq %rsi, %rdx
1476
+ movq 32(%rbp), %rsi
1477
+ cmovzq %rsi, %r8
1478
+ movq 40(%rbp), %rsi
1479
+ cmovzq %rsi, %r9
1480
+ movq 48(%rbp), %rsi
1481
+ cmovzq %rsi, %r10
1482
+ movq 56(%rbp), %rsi
1483
+ cmovzq %rsi, %r11
1484
+ movq 64(%rbp), %rsi
1485
+ cmovzq %rsi, %r12
1486
+ movq 72(%rbp), %rsi
1487
+ cmovzq %rsi, %r13
1488
+ movq 80(%rbp), %rsi
1489
+ cmovzq %rsi, %r14
1490
+ movq 88(%rbp), %rsi
1491
+ cmovzq %rsi, %r15
1492
+ addq $96, %rbp
1493
+
1494
+ cmpq $8, bf
1495
+ movq (%rbp), %rsi
1496
+ cmovzq %rsi, %rax
1497
+ movq 8(%rbp), %rsi
1498
+ cmovzq %rsi, %rbx
1499
+ movq 16(%rbp), %rsi
1500
+ cmovzq %rsi, %rcx
1501
+ movq 24(%rbp), %rsi
1502
+ cmovzq %rsi, %rdx
1503
+ movq 32(%rbp), %rsi
1504
+ cmovzq %rsi, %r8
1505
+ movq 40(%rbp), %rsi
1506
+ cmovzq %rsi, %r9
1507
+ movq 48(%rbp), %rsi
1508
+ cmovzq %rsi, %r10
1509
+ movq 56(%rbp), %rsi
1510
+ cmovzq %rsi, %r11
1511
+ movq 64(%rbp), %rsi
1512
+ cmovzq %rsi, %r12
1513
+ movq 72(%rbp), %rsi
1514
+ cmovzq %rsi, %r13
1515
+ movq 80(%rbp), %rsi
1516
+ cmovzq %rsi, %r14
1517
+ movq 88(%rbp), %rsi
1518
+ cmovzq %rsi, %r15
1519
+
1520
+ // ... then optionally negating before storing. The table entry
1521
+ // is in precomputed form and we currently have
1522
+ //
1523
+ // [%rdx;%rcx;%rbx;%rax] = y - x
1524
+ // [%r11;%r10;%r9;%r8] = x + y
1525
+ // [%r15;%r14;%r13;%r12] = 2 * d * x * y
1526
+ //
1527
+ // Negation for Edwards curves is -(x,y) = (-x,y), which in this modified
1528
+ // form amounts to swapping the first two fields and negating the third.
1529
+ // The negation does not always fully reduce even mod 2^256-38 in the zero
1530
+ // case, instead giving -0 = 2^256-38. But that is fine since the result is
1531
+ // always fed to a multiplication inside the "pepadd" function below that
1532
+ // handles any 256-bit input.
1533
+
1534
+ movq cf, %rdi
1535
+ testq %rdi, %rdi
1536
+
1537
+ movq %rax, %rsi
1538
+ cmovnzq %r8, %rsi
1539
+ cmovnzq %rax, %r8
1540
+ movq %rsi, BTABENT(%rsp)
1541
+ movq %r8, BTABENT+32(%rsp)
1542
+
1543
+ movq %rbx, %rsi
1544
+ cmovnzq %r9, %rsi
1545
+ cmovnzq %rbx, %r9
1546
+ movq %rsi, BTABENT+8(%rsp)
1547
+ movq %r9, BTABENT+40(%rsp)
1548
+
1549
+ movq %rcx, %rsi
1550
+ cmovnzq %r10, %rsi
1551
+ cmovnzq %rcx, %r10
1552
+ movq %rsi, BTABENT+16(%rsp)
1553
+ movq %r10, BTABENT+48(%rsp)
1554
+
1555
+ movq %rdx, %rsi
1556
+ cmovnzq %r11, %rsi
1557
+ cmovnzq %rdx, %r11
1558
+ movq %rsi, BTABENT+24(%rsp)
1559
+ movq %r11, BTABENT+56(%rsp)
1560
+
1561
+ xorq %rdi, %r12
1562
+ xorq %rdi, %r13
1563
+ xorq %rdi, %r14
1564
+ xorq %rdi, %r15
1565
+ andq $37, %rdi
1566
+ subq %rdi, %r12
1567
+ sbbq $0, %r13
1568
+ sbbq $0, %r14
1569
+ sbbq $0, %r15
1570
+ movq %r12, BTABENT+64(%rsp)
1571
+ movq %r13, BTABENT+72(%rsp)
1572
+ movq %r14, BTABENT+80(%rsp)
1573
+ movq %r15, BTABENT+88(%rsp)
1574
+
1575
+ // Get table entry, first getting the adjusted bitfield...
1576
+
1577
+ movq i, %rax
1578
+ movq %rax, %rcx
1579
+ shrq $6, %rax
1580
+ movq (%rsp,%rax,8), %rax
1581
+ shrq %cl, %rax
1582
+ andq $15, %rax
1583
+
1584
+ subq $8, %rax
1585
+ sbbq %rcx, %rcx
1586
+ xorq %rcx, %rax
1587
+ subq %rcx, %rax
1588
+ movq %rcx, cf
1589
+ movq %rax, bf
1590
+
1591
+ // ...and constant-time indexing based on that index
1592
+ // Do the Y and Z fields first, to save on registers
1593
+ // and store them back (they don't need any modification)
1594
+
1595
+ movl $1, %eax
1596
+ xorl %ebx, %ebx
1597
+ xorl %ecx, %ecx
1598
+ xorl %edx, %edx
1599
+ movl $1, %r8d
1600
+ xorl %r9d, %r9d
1601
+ xorl %r10d, %r10d
1602
+ xorl %r11d, %r11d
1603
+
1604
+ leaq TAB+32(%rsp), %rbp
1605
+
1606
+ cmpq $1, bf
1607
+ movq (%rbp), %rsi
1608
+ cmovzq %rsi, %rax
1609
+ movq 8(%rbp), %rsi
1610
+ cmovzq %rsi, %rbx
1611
+ movq 16(%rbp), %rsi
1612
+ cmovzq %rsi, %rcx
1613
+ movq 24(%rbp), %rsi
1614
+ cmovzq %rsi, %rdx
1615
+ movq 32(%rbp), %rsi
1616
+ cmovzq %rsi, %r8
1617
+ movq 40(%rbp), %rsi
1618
+ cmovzq %rsi, %r9
1619
+ movq 48(%rbp), %rsi
1620
+ cmovzq %rsi, %r10
1621
+ movq 56(%rbp), %rsi
1622
+ cmovzq %rsi, %r11
1623
+ addq $128, %rbp
1624
+
1625
+ cmpq $2, bf
1626
+ movq (%rbp), %rsi
1627
+ cmovzq %rsi, %rax
1628
+ movq 8(%rbp), %rsi
1629
+ cmovzq %rsi, %rbx
1630
+ movq 16(%rbp), %rsi
1631
+ cmovzq %rsi, %rcx
1632
+ movq 24(%rbp), %rsi
1633
+ cmovzq %rsi, %rdx
1634
+ movq 32(%rbp), %rsi
1635
+ cmovzq %rsi, %r8
1636
+ movq 40(%rbp), %rsi
1637
+ cmovzq %rsi, %r9
1638
+ movq 48(%rbp), %rsi
1639
+ cmovzq %rsi, %r10
1640
+ movq 56(%rbp), %rsi
1641
+ cmovzq %rsi, %r11
1642
+ addq $128, %rbp
1643
+
1644
+ cmpq $3, bf
1645
+ movq (%rbp), %rsi
1646
+ cmovzq %rsi, %rax
1647
+ movq 8(%rbp), %rsi
1648
+ cmovzq %rsi, %rbx
1649
+ movq 16(%rbp), %rsi
1650
+ cmovzq %rsi, %rcx
1651
+ movq 24(%rbp), %rsi
1652
+ cmovzq %rsi, %rdx
1653
+ movq 32(%rbp), %rsi
1654
+ cmovzq %rsi, %r8
1655
+ movq 40(%rbp), %rsi
1656
+ cmovzq %rsi, %r9
1657
+ movq 48(%rbp), %rsi
1658
+ cmovzq %rsi, %r10
1659
+ movq 56(%rbp), %rsi
1660
+ cmovzq %rsi, %r11
1661
+ addq $128, %rbp
1662
+
1663
+ cmpq $4, bf
1664
+ movq (%rbp), %rsi
1665
+ cmovzq %rsi, %rax
1666
+ movq 8(%rbp), %rsi
1667
+ cmovzq %rsi, %rbx
1668
+ movq 16(%rbp), %rsi
1669
+ cmovzq %rsi, %rcx
1670
+ movq 24(%rbp), %rsi
1671
+ cmovzq %rsi, %rdx
1672
+ movq 32(%rbp), %rsi
1673
+ cmovzq %rsi, %r8
1674
+ movq 40(%rbp), %rsi
1675
+ cmovzq %rsi, %r9
1676
+ movq 48(%rbp), %rsi
1677
+ cmovzq %rsi, %r10
1678
+ movq 56(%rbp), %rsi
1679
+ cmovzq %rsi, %r11
1680
+ addq $128, %rbp
1681
+
1682
+ cmpq $5, bf
1683
+ movq (%rbp), %rsi
1684
+ cmovzq %rsi, %rax
1685
+ movq 8(%rbp), %rsi
1686
+ cmovzq %rsi, %rbx
1687
+ movq 16(%rbp), %rsi
1688
+ cmovzq %rsi, %rcx
1689
+ movq 24(%rbp), %rsi
1690
+ cmovzq %rsi, %rdx
1691
+ movq 32(%rbp), %rsi
1692
+ cmovzq %rsi, %r8
1693
+ movq 40(%rbp), %rsi
1694
+ cmovzq %rsi, %r9
1695
+ movq 48(%rbp), %rsi
1696
+ cmovzq %rsi, %r10
1697
+ movq 56(%rbp), %rsi
1698
+ cmovzq %rsi, %r11
1699
+ addq $128, %rbp
1700
+
1701
+ cmpq $6, bf
1702
+ movq (%rbp), %rsi
1703
+ cmovzq %rsi, %rax
1704
+ movq 8(%rbp), %rsi
1705
+ cmovzq %rsi, %rbx
1706
+ movq 16(%rbp), %rsi
1707
+ cmovzq %rsi, %rcx
1708
+ movq 24(%rbp), %rsi
1709
+ cmovzq %rsi, %rdx
1710
+ movq 32(%rbp), %rsi
1711
+ cmovzq %rsi, %r8
1712
+ movq 40(%rbp), %rsi
1713
+ cmovzq %rsi, %r9
1714
+ movq 48(%rbp), %rsi
1715
+ cmovzq %rsi, %r10
1716
+ movq 56(%rbp), %rsi
1717
+ cmovzq %rsi, %r11
1718
+ addq $128, %rbp
1719
+
1720
+ cmpq $7, bf
1721
+ movq (%rbp), %rsi
1722
+ cmovzq %rsi, %rax
1723
+ movq 8(%rbp), %rsi
1724
+ cmovzq %rsi, %rbx
1725
+ movq 16(%rbp), %rsi
1726
+ cmovzq %rsi, %rcx
1727
+ movq 24(%rbp), %rsi
1728
+ cmovzq %rsi, %rdx
1729
+ movq 32(%rbp), %rsi
1730
+ cmovzq %rsi, %r8
1731
+ movq 40(%rbp), %rsi
1732
+ cmovzq %rsi, %r9
1733
+ movq 48(%rbp), %rsi
1734
+ cmovzq %rsi, %r10
1735
+ movq 56(%rbp), %rsi
1736
+ cmovzq %rsi, %r11
1737
+ addq $128, %rbp
1738
+
1739
+ cmpq $8, bf
1740
+ movq (%rbp), %rsi
1741
+ cmovzq %rsi, %rax
1742
+ movq 8(%rbp), %rsi
1743
+ cmovzq %rsi, %rbx
1744
+ movq 16(%rbp), %rsi
1745
+ cmovzq %rsi, %rcx
1746
+ movq 24(%rbp), %rsi
1747
+ cmovzq %rsi, %rdx
1748
+ movq 32(%rbp), %rsi
1749
+ cmovzq %rsi, %r8
1750
+ movq 40(%rbp), %rsi
1751
+ cmovzq %rsi, %r9
1752
+ movq 48(%rbp), %rsi
1753
+ cmovzq %rsi, %r10
1754
+ movq 56(%rbp), %rsi
1755
+ cmovzq %rsi, %r11
1756
+
1757
+ movq %rax, TABENT+32(%rsp)
1758
+ movq %rbx, TABENT+40(%rsp)
1759
+ movq %rcx, TABENT+48(%rsp)
1760
+ movq %rdx, TABENT+56(%rsp)
1761
+ movq %r8, TABENT+64(%rsp)
1762
+ movq %r9, TABENT+72(%rsp)
1763
+ movq %r10, TABENT+80(%rsp)
1764
+ movq %r11, TABENT+88(%rsp)
1765
+
1766
+ // Now do the X and W fields...
1767
+
1768
+ leaq TAB(%rsp), %rbp
1769
+
1770
+ xorl %eax, %eax
1771
+ xorl %ebx, %ebx
1772
+ xorl %ecx, %ecx
1773
+ xorl %edx, %edx
1774
+ xorl %r8d, %r8d
1775
+ xorl %r9d, %r9d
1776
+ xorl %r10d, %r10d
1777
+ xorl %r11d, %r11d
1778
+
1779
+ cmpq $1, bf
1780
+ movq (%rbp), %rsi
1781
+ cmovzq %rsi, %rax
1782
+ movq 8(%rbp), %rsi
1783
+ cmovzq %rsi, %rbx
1784
+ movq 16(%rbp), %rsi
1785
+ cmovzq %rsi, %rcx
1786
+ movq 24(%rbp), %rsi
1787
+ cmovzq %rsi, %rdx
1788
+ movq 96(%rbp), %rsi
1789
+ cmovzq %rsi, %r8
1790
+ movq 104(%rbp), %rsi
1791
+ cmovzq %rsi, %r9
1792
+ movq 112(%rbp), %rsi
1793
+ cmovzq %rsi, %r10
1794
+ movq 120(%rbp), %rsi
1795
+ cmovzq %rsi, %r11
1796
+ addq $128, %rbp
1797
+
1798
+ cmpq $2, bf
1799
+ movq (%rbp), %rsi
1800
+ cmovzq %rsi, %rax
1801
+ movq 8(%rbp), %rsi
1802
+ cmovzq %rsi, %rbx
1803
+ movq 16(%rbp), %rsi
1804
+ cmovzq %rsi, %rcx
1805
+ movq 24(%rbp), %rsi
1806
+ cmovzq %rsi, %rdx
1807
+ movq 96(%rbp), %rsi
1808
+ cmovzq %rsi, %r8
1809
+ movq 104(%rbp), %rsi
1810
+ cmovzq %rsi, %r9
1811
+ movq 112(%rbp), %rsi
1812
+ cmovzq %rsi, %r10
1813
+ movq 120(%rbp), %rsi
1814
+ cmovzq %rsi, %r11
1815
+ addq $128, %rbp
1816
+
1817
+ cmpq $3, bf
1818
+ movq (%rbp), %rsi
1819
+ cmovzq %rsi, %rax
1820
+ movq 8(%rbp), %rsi
1821
+ cmovzq %rsi, %rbx
1822
+ movq 16(%rbp), %rsi
1823
+ cmovzq %rsi, %rcx
1824
+ movq 24(%rbp), %rsi
1825
+ cmovzq %rsi, %rdx
1826
+ movq 96(%rbp), %rsi
1827
+ cmovzq %rsi, %r8
1828
+ movq 104(%rbp), %rsi
1829
+ cmovzq %rsi, %r9
1830
+ movq 112(%rbp), %rsi
1831
+ cmovzq %rsi, %r10
1832
+ movq 120(%rbp), %rsi
1833
+ cmovzq %rsi, %r11
1834
+ addq $128, %rbp
1835
+
1836
+ cmpq $4, bf
1837
+ movq (%rbp), %rsi
1838
+ cmovzq %rsi, %rax
1839
+ movq 8(%rbp), %rsi
1840
+ cmovzq %rsi, %rbx
1841
+ movq 16(%rbp), %rsi
1842
+ cmovzq %rsi, %rcx
1843
+ movq 24(%rbp), %rsi
1844
+ cmovzq %rsi, %rdx
1845
+ movq 96(%rbp), %rsi
1846
+ cmovzq %rsi, %r8
1847
+ movq 104(%rbp), %rsi
1848
+ cmovzq %rsi, %r9
1849
+ movq 112(%rbp), %rsi
1850
+ cmovzq %rsi, %r10
1851
+ movq 120(%rbp), %rsi
1852
+ cmovzq %rsi, %r11
1853
+ addq $128, %rbp
1854
+
1855
+ cmpq $5, bf
1856
+ movq (%rbp), %rsi
1857
+ cmovzq %rsi, %rax
1858
+ movq 8(%rbp), %rsi
1859
+ cmovzq %rsi, %rbx
1860
+ movq 16(%rbp), %rsi
1861
+ cmovzq %rsi, %rcx
1862
+ movq 24(%rbp), %rsi
1863
+ cmovzq %rsi, %rdx
1864
+ movq 96(%rbp), %rsi
1865
+ cmovzq %rsi, %r8
1866
+ movq 104(%rbp), %rsi
1867
+ cmovzq %rsi, %r9
1868
+ movq 112(%rbp), %rsi
1869
+ cmovzq %rsi, %r10
1870
+ movq 120(%rbp), %rsi
1871
+ cmovzq %rsi, %r11
1872
+ addq $128, %rbp
1873
+
1874
+ cmpq $6, bf
1875
+ movq (%rbp), %rsi
1876
+ cmovzq %rsi, %rax
1877
+ movq 8(%rbp), %rsi
1878
+ cmovzq %rsi, %rbx
1879
+ movq 16(%rbp), %rsi
1880
+ cmovzq %rsi, %rcx
1881
+ movq 24(%rbp), %rsi
1882
+ cmovzq %rsi, %rdx
1883
+ movq 96(%rbp), %rsi
1884
+ cmovzq %rsi, %r8
1885
+ movq 104(%rbp), %rsi
1886
+ cmovzq %rsi, %r9
1887
+ movq 112(%rbp), %rsi
1888
+ cmovzq %rsi, %r10
1889
+ movq 120(%rbp), %rsi
1890
+ cmovzq %rsi, %r11
1891
+ addq $128, %rbp
1892
+
1893
+ cmpq $7, bf
1894
+ movq (%rbp), %rsi
1895
+ cmovzq %rsi, %rax
1896
+ movq 8(%rbp), %rsi
1897
+ cmovzq %rsi, %rbx
1898
+ movq 16(%rbp), %rsi
1899
+ cmovzq %rsi, %rcx
1900
+ movq 24(%rbp), %rsi
1901
+ cmovzq %rsi, %rdx
1902
+ movq 96(%rbp), %rsi
1903
+ cmovzq %rsi, %r8
1904
+ movq 104(%rbp), %rsi
1905
+ cmovzq %rsi, %r9
1906
+ movq 112(%rbp), %rsi
1907
+ cmovzq %rsi, %r10
1908
+ movq 120(%rbp), %rsi
1909
+ cmovzq %rsi, %r11
1910
+ addq $128, %rbp
1911
+
1912
+ cmpq $8, bf
1913
+ movq (%rbp), %rsi
1914
+ cmovzq %rsi, %rax
1915
+ movq 8(%rbp), %rsi
1916
+ cmovzq %rsi, %rbx
1917
+ movq 16(%rbp), %rsi
1918
+ cmovzq %rsi, %rcx
1919
+ movq 24(%rbp), %rsi
1920
+ cmovzq %rsi, %rdx
1921
+ movq 96(%rbp), %rsi
1922
+ cmovzq %rsi, %r8
1923
+ movq 104(%rbp), %rsi
1924
+ cmovzq %rsi, %r9
1925
+ movq 112(%rbp), %rsi
1926
+ cmovzq %rsi, %r10
1927
+ movq 120(%rbp), %rsi
1928
+ cmovzq %rsi, %r11
1929
+
1930
+ // ... then optionally negate before storing the X and W fields. This
1931
+ // time the table entry is extended-projective, and is here:
1932
+ //
1933
+ // [%rdx;%rcx;%rbx;%rax] = X
1934
+ // [tabent+32] = Y
1935
+ // [tabent+64] = Z
1936
+ // [%r11;%r10;%r9;%r8] = W
1937
+ //
1938
+ // This time we just need to negate the X and the W fields.
1939
+ // The crude way negation is done can result in values of X or W
1940
+ // (when initially zero before negation) being exactly equal to
1941
+ // 2^256-38, but the "pepadd" function handles that correctly.
1942
+
1943
+ movq cf, %rdi
1944
+
1945
+ xorq %rdi, %rax
1946
+ xorq %rdi, %rbx
1947
+ xorq %rdi, %rcx
1948
+ xorq %rdi, %rdx
1949
+
1950
+ xorq %rdi, %r8
1951
+ xorq %rdi, %r9
1952
+ xorq %rdi, %r10
1953
+ xorq %rdi, %r11
1954
+
1955
+ andq $37, %rdi
1956
+
1957
+ subq %rdi, %rax
1958
+ sbbq $0, %rbx
1959
+ sbbq $0, %rcx
1960
+ sbbq $0, %rdx
1961
+
1962
+ movq %rax, TABENT(%rsp)
1963
+ movq %rbx, TABENT+8(%rsp)
1964
+ movq %rcx, TABENT+16(%rsp)
1965
+ movq %rdx, TABENT+24(%rsp)
1966
+
1967
+ subq %rdi, %r8
1968
+ sbbq $0, %r9
1969
+ sbbq $0, %r10
1970
+ sbbq $0, %r11
1971
+
1972
+ movq %r8, TABENT+96(%rsp)
1973
+ movq %r9, TABENT+104(%rsp)
1974
+ movq %r10, TABENT+112(%rsp)
1975
+ movq %r11, TABENT+120(%rsp)
1976
+
1977
+ // Double to acc' = 4 * acc
1978
+
1979
+ leaq ACC(%rsp), %rdi
1980
+ leaq ACC(%rsp), %rsi
1981
+ callq edwards25519_scalarmuldouble_pdouble
1982
+
1983
+ // Add tabent := tabent + btabent
1984
+
1985
+ leaq TABENT(%rsp), %rdi
1986
+ leaq TABENT(%rsp), %rsi
1987
+ leaq BTABENT(%rsp), %rbp
1988
+ callq edwards25519_scalarmuldouble_pepadd
1989
+
1990
+ // Double to acc' = 8 * acc
1991
+
1992
+ leaq ACC(%rsp), %rdi
1993
+ leaq ACC(%rsp), %rsi
1994
+ callq edwards25519_scalarmuldouble_pdouble
1995
+
1996
+ // Double to acc' = 16 * acc
1997
+
1998
+ leaq ACC(%rsp), %rdi
1999
+ leaq ACC(%rsp), %rsi
2000
+ callq edwards25519_scalarmuldouble_epdouble
2001
+
2002
+ // Add table entry, acc := acc + tabent
2003
+
2004
+ leaq ACC(%rsp), %rdi
2005
+ leaq ACC(%rsp), %rsi
2006
+ leaq TABENT(%rsp), %rbp
2007
+ callq edwards25519_scalarmuldouble_epadd
2008
+
2009
+ // Loop down
2010
+
2011
+ movq i, %rax
2012
+ testq %rax, %rax
2013
+ jnz edwards25519_scalarmuldouble_loop
2014
+
2015
+ // Prepare to call the modular inverse function to get tab = 1/z
2016
+
2017
+ leaq TAB(%rsp), %rdi
2018
+ leaq ACC+64(%rsp), %rsi
2019
+
2020
+ // Inline copy of bignum_inv_p25519, identical except for stripping out
2021
+ // the prologue and epilogue saving and restoring registers and making
2022
+ // and reclaiming room on the stack. For more details and explanations see
2023
+ // "x86/curve25519/bignum_inv_p25519.S". Note that the stack it uses for
2024
+ // its own temporaries is 208 bytes, so it has no effect on variables
2025
+ // that are needed in the rest of our computation here: res, tab and acc.
2026
+
2027
+ movq %rdi, 0xc0(%rsp)
2028
+ xorl %eax, %eax
2029
+ leaq -0x13(%rax), %rcx
2030
+ notq %rax
2031
+ movq %rcx, (%rsp)
2032
+ movq %rax, 0x8(%rsp)
2033
+ movq %rax, 0x10(%rsp)
2034
+ btr $0x3f, %rax
2035
+ movq %rax, 0x18(%rsp)
2036
+ movq (%rsi), %rdx
2037
+ movq 0x8(%rsi), %rcx
2038
+ movq 0x10(%rsi), %r8
2039
+ movq 0x18(%rsi), %r9
2040
+ movl $0x1, %eax
2041
+ xorl %r10d, %r10d
2042
+ bts $0x3f, %r9
2043
+ adcq %r10, %rax
2044
+ imulq $0x13, %rax, %rax
2045
+ addq %rax, %rdx
2046
+ adcq %r10, %rcx
2047
+ adcq %r10, %r8
2048
+ adcq %r10, %r9
2049
+ movl $0x13, %eax
2050
+ cmovbq %r10, %rax
2051
+ subq %rax, %rdx
2052
+ sbbq %r10, %rcx
2053
+ sbbq %r10, %r8
2054
+ sbbq %r10, %r9
2055
+ btr $0x3f, %r9
2056
+ movq %rdx, 0x20(%rsp)
2057
+ movq %rcx, 0x28(%rsp)
2058
+ movq %r8, 0x30(%rsp)
2059
+ movq %r9, 0x38(%rsp)
2060
+ xorl %eax, %eax
2061
+ movq %rax, 0x40(%rsp)
2062
+ movq %rax, 0x48(%rsp)
2063
+ movq %rax, 0x50(%rsp)
2064
+ movq %rax, 0x58(%rsp)
2065
+ movabsq $0xa0f99e2375022099, %rax
2066
+ movq %rax, 0x60(%rsp)
2067
+ movabsq $0xa8c68f3f1d132595, %rax
2068
+ movq %rax, 0x68(%rsp)
2069
+ movabsq $0x6c6c893805ac5242, %rax
2070
+ movq %rax, 0x70(%rsp)
2071
+ movabsq $0x276508b241770615, %rax
2072
+ movq %rax, 0x78(%rsp)
2073
+ movq $0xa, 0x90(%rsp)
2074
+ movq $0x1, 0x98(%rsp)
2075
+ jmp edwards25519_scalarmuldouble_midloop
2076
+ edwards25519_scalarmuldouble_inverseloop:
2077
+ movq %r8, %r9
2078
+ sarq $0x3f, %r9
2079
+ xorq %r9, %r8
2080
+ subq %r9, %r8
2081
+ movq %r10, %r11
2082
+ sarq $0x3f, %r11
2083
+ xorq %r11, %r10
2084
+ subq %r11, %r10
2085
+ movq %r12, %r13
2086
+ sarq $0x3f, %r13
2087
+ xorq %r13, %r12
2088
+ subq %r13, %r12
2089
+ movq %r14, %r15
2090
+ sarq $0x3f, %r15
2091
+ xorq %r15, %r14
2092
+ subq %r15, %r14
2093
+ movq %r8, %rax
2094
+ andq %r9, %rax
2095
+ movq %r10, %rdi
2096
+ andq %r11, %rdi
2097
+ addq %rax, %rdi
2098
+ movq %rdi, 0x80(%rsp)
2099
+ movq %r12, %rax
2100
+ andq %r13, %rax
2101
+ movq %r14, %rsi
2102
+ andq %r15, %rsi
2103
+ addq %rax, %rsi
2104
+ movq %rsi, 0x88(%rsp)
2105
+ xorl %ebx, %ebx
2106
+ movq (%rsp), %rax
2107
+ xorq %r9, %rax
2108
+ mulq %r8
2109
+ addq %rax, %rdi
2110
+ adcq %rdx, %rbx
2111
+ movq 0x20(%rsp), %rax
2112
+ xorq %r11, %rax
2113
+ mulq %r10
2114
+ addq %rax, %rdi
2115
+ adcq %rdx, %rbx
2116
+ xorl %ebp, %ebp
2117
+ movq (%rsp), %rax
2118
+ xorq %r13, %rax
2119
+ mulq %r12
2120
+ addq %rax, %rsi
2121
+ adcq %rdx, %rbp
2122
+ movq 0x20(%rsp), %rax
2123
+ xorq %r15, %rax
2124
+ mulq %r14
2125
+ addq %rax, %rsi
2126
+ adcq %rdx, %rbp
2127
+ xorl %ecx, %ecx
2128
+ movq 0x8(%rsp), %rax
2129
+ xorq %r9, %rax
2130
+ mulq %r8
2131
+ addq %rax, %rbx
2132
+ adcq %rdx, %rcx
2133
+ movq 0x28(%rsp), %rax
2134
+ xorq %r11, %rax
2135
+ mulq %r10
2136
+ addq %rax, %rbx
2137
+ adcq %rdx, %rcx
2138
+ shrdq $0x3b, %rbx, %rdi
2139
+ movq %rdi, (%rsp)
2140
+ xorl %edi, %edi
2141
+ movq 0x8(%rsp), %rax
2142
+ xorq %r13, %rax
2143
+ mulq %r12
2144
+ addq %rax, %rbp
2145
+ adcq %rdx, %rdi
2146
+ movq 0x28(%rsp), %rax
2147
+ xorq %r15, %rax
2148
+ mulq %r14
2149
+ addq %rax, %rbp
2150
+ adcq %rdx, %rdi
2151
+ shrdq $0x3b, %rbp, %rsi
2152
+ movq %rsi, 0x20(%rsp)
2153
+ xorl %esi, %esi
2154
+ movq 0x10(%rsp), %rax
2155
+ xorq %r9, %rax
2156
+ mulq %r8
2157
+ addq %rax, %rcx
2158
+ adcq %rdx, %rsi
2159
+ movq 0x30(%rsp), %rax
2160
+ xorq %r11, %rax
2161
+ mulq %r10
2162
+ addq %rax, %rcx
2163
+ adcq %rdx, %rsi
2164
+ shrdq $0x3b, %rcx, %rbx
2165
+ movq %rbx, 0x8(%rsp)
2166
+ xorl %ebx, %ebx
2167
+ movq 0x10(%rsp), %rax
2168
+ xorq %r13, %rax
2169
+ mulq %r12
2170
+ addq %rax, %rdi
2171
+ adcq %rdx, %rbx
2172
+ movq 0x30(%rsp), %rax
2173
+ xorq %r15, %rax
2174
+ mulq %r14
2175
+ addq %rax, %rdi
2176
+ adcq %rdx, %rbx
2177
+ shrdq $0x3b, %rdi, %rbp
2178
+ movq %rbp, 0x28(%rsp)
2179
+ movq 0x18(%rsp), %rax
2180
+ xorq %r9, %rax
2181
+ movq %rax, %rbp
2182
+ sarq $0x3f, %rbp
2183
+ andq %r8, %rbp
2184
+ negq %rbp
2185
+ mulq %r8
2186
+ addq %rax, %rsi
2187
+ adcq %rdx, %rbp
2188
+ movq 0x38(%rsp), %rax
2189
+ xorq %r11, %rax
2190
+ movq %rax, %rdx
2191
+ sarq $0x3f, %rdx
2192
+ andq %r10, %rdx
2193
+ subq %rdx, %rbp
2194
+ mulq %r10
2195
+ addq %rax, %rsi
2196
+ adcq %rdx, %rbp
2197
+ shrdq $0x3b, %rsi, %rcx
2198
+ movq %rcx, 0x10(%rsp)
2199
+ shrdq $0x3b, %rbp, %rsi
2200
+ movq 0x18(%rsp), %rax
2201
+ movq %rsi, 0x18(%rsp)
2202
+ xorq %r13, %rax
2203
+ movq %rax, %rsi
2204
+ sarq $0x3f, %rsi
2205
+ andq %r12, %rsi
2206
+ negq %rsi
2207
+ mulq %r12
2208
+ addq %rax, %rbx
2209
+ adcq %rdx, %rsi
2210
+ movq 0x38(%rsp), %rax
2211
+ xorq %r15, %rax
2212
+ movq %rax, %rdx
2213
+ sarq $0x3f, %rdx
2214
+ andq %r14, %rdx
2215
+ subq %rdx, %rsi
2216
+ mulq %r14
2217
+ addq %rax, %rbx
2218
+ adcq %rdx, %rsi
2219
+ shrdq $0x3b, %rbx, %rdi
2220
+ movq %rdi, 0x30(%rsp)
2221
+ shrdq $0x3b, %rsi, %rbx
2222
+ movq %rbx, 0x38(%rsp)
2223
+ movq 0x80(%rsp), %rbx
2224
+ movq 0x88(%rsp), %rbp
2225
+ xorl %ecx, %ecx
2226
+ movq 0x40(%rsp), %rax
2227
+ xorq %r9, %rax
2228
+ mulq %r8
2229
+ addq %rax, %rbx
2230
+ adcq %rdx, %rcx
2231
+ movq 0x60(%rsp), %rax
2232
+ xorq %r11, %rax
2233
+ mulq %r10
2234
+ addq %rax, %rbx
2235
+ adcq %rdx, %rcx
2236
+ xorl %esi, %esi
2237
+ movq 0x40(%rsp), %rax
2238
+ xorq %r13, %rax
2239
+ mulq %r12
2240
+ movq %rbx, 0x40(%rsp)
2241
+ addq %rax, %rbp
2242
+ adcq %rdx, %rsi
2243
+ movq 0x60(%rsp), %rax
2244
+ xorq %r15, %rax
2245
+ mulq %r14
2246
+ addq %rax, %rbp
2247
+ adcq %rdx, %rsi
2248
+ movq %rbp, 0x60(%rsp)
2249
+ xorl %ebx, %ebx
2250
+ movq 0x48(%rsp), %rax
2251
+ xorq %r9, %rax
2252
+ mulq %r8
2253
+ addq %rax, %rcx
2254
+ adcq %rdx, %rbx
2255
+ movq 0x68(%rsp), %rax
2256
+ xorq %r11, %rax
2257
+ mulq %r10
2258
+ addq %rax, %rcx
2259
+ adcq %rdx, %rbx
2260
+ xorl %ebp, %ebp
2261
+ movq 0x48(%rsp), %rax
2262
+ xorq %r13, %rax
2263
+ mulq %r12
2264
+ movq %rcx, 0x48(%rsp)
2265
+ addq %rax, %rsi
2266
+ adcq %rdx, %rbp
2267
+ movq 0x68(%rsp), %rax
2268
+ xorq %r15, %rax
2269
+ mulq %r14
2270
+ addq %rax, %rsi
2271
+ adcq %rdx, %rbp
2272
+ movq %rsi, 0x68(%rsp)
2273
+ xorl %ecx, %ecx
2274
+ movq 0x50(%rsp), %rax
2275
+ xorq %r9, %rax
2276
+ mulq %r8
2277
+ addq %rax, %rbx
2278
+ adcq %rdx, %rcx
2279
+ movq 0x70(%rsp), %rax
2280
+ xorq %r11, %rax
2281
+ mulq %r10
2282
+ addq %rax, %rbx
2283
+ adcq %rdx, %rcx
2284
+ xorl %esi, %esi
2285
+ movq 0x50(%rsp), %rax
2286
+ xorq %r13, %rax
2287
+ mulq %r12
2288
+ movq %rbx, 0x50(%rsp)
2289
+ addq %rax, %rbp
2290
+ adcq %rdx, %rsi
2291
+ movq 0x70(%rsp), %rax
2292
+ xorq %r15, %rax
2293
+ mulq %r14
2294
+ addq %rax, %rbp
2295
+ adcq %rdx, %rsi
2296
+ movq %rbp, 0x70(%rsp)
2297
+ movq 0x58(%rsp), %rax
2298
+ xorq %r9, %rax
2299
+ movq %r9, %rbx
2300
+ andq %r8, %rbx
2301
+ negq %rbx
2302
+ mulq %r8
2303
+ addq %rax, %rcx
2304
+ adcq %rdx, %rbx
2305
+ movq 0x78(%rsp), %rax
2306
+ xorq %r11, %rax
2307
+ movq %r11, %rdx
2308
+ andq %r10, %rdx
2309
+ subq %rdx, %rbx
2310
+ mulq %r10
2311
+ addq %rax, %rcx
2312
+ adcq %rbx, %rdx
2313
+ movq %rdx, %rbx
2314
+ shldq $0x1, %rcx, %rdx
2315
+ sarq $0x3f, %rbx
2316
+ addq %rbx, %rdx
2317
+ movl $0x13, %eax
2318
+ imulq %rdx
2319
+ movq 0x40(%rsp), %r8
2320
+ addq %rax, %r8
2321
+ movq %r8, 0x40(%rsp)
2322
+ movq 0x48(%rsp), %r8
2323
+ adcq %rdx, %r8
2324
+ movq %r8, 0x48(%rsp)
2325
+ movq 0x50(%rsp), %r8
2326
+ adcq %rbx, %r8
2327
+ movq %r8, 0x50(%rsp)
2328
+ adcq %rbx, %rcx
2329
+ shlq $0x3f, %rax
2330
+ addq %rax, %rcx
2331
+ movq 0x58(%rsp), %rax
2332
+ movq %rcx, 0x58(%rsp)
2333
+ xorq %r13, %rax
2334
+ movq %r13, %rcx
2335
+ andq %r12, %rcx
2336
+ negq %rcx
2337
+ mulq %r12
2338
+ addq %rax, %rsi
2339
+ adcq %rdx, %rcx
2340
+ movq 0x78(%rsp), %rax
2341
+ xorq %r15, %rax
2342
+ movq %r15, %rdx
2343
+ andq %r14, %rdx
2344
+ subq %rdx, %rcx
2345
+ mulq %r14
2346
+ addq %rax, %rsi
2347
+ adcq %rcx, %rdx
2348
+ movq %rdx, %rcx
2349
+ shldq $0x1, %rsi, %rdx
2350
+ sarq $0x3f, %rcx
2351
+ movl $0x13, %eax
2352
+ addq %rcx, %rdx
2353
+ imulq %rdx
2354
+ movq 0x60(%rsp), %r8
2355
+ addq %rax, %r8
2356
+ movq %r8, 0x60(%rsp)
2357
+ movq 0x68(%rsp), %r8
2358
+ adcq %rdx, %r8
2359
+ movq %r8, 0x68(%rsp)
2360
+ movq 0x70(%rsp), %r8
2361
+ adcq %rcx, %r8
2362
+ movq %r8, 0x70(%rsp)
2363
+ adcq %rcx, %rsi
2364
+ shlq $0x3f, %rax
2365
+ addq %rax, %rsi
2366
+ movq %rsi, 0x78(%rsp)
2367
+ edwards25519_scalarmuldouble_midloop:
2368
+ movq 0x98(%rsp), %rsi
2369
+ movq (%rsp), %rdx
2370
+ movq 0x20(%rsp), %rcx
2371
+ movq %rdx, %rbx
2372
+ andq $0xfffff, %rbx
2373
+ movabsq $0xfffffe0000000000, %rax
2374
+ orq %rax, %rbx
2375
+ andq $0xfffff, %rcx
2376
+ movabsq $0xc000000000000000, %rax
2377
+ orq %rax, %rcx
2378
+ movq $0xfffffffffffffffe, %rax
2379
+ xorl %ebp, %ebp
2380
+ movl $0x2, %edx
2381
+ movq %rbx, %rdi
2382
+ movq %rax, %r8
2383
+ testq %rsi, %rsi
2384
+ cmovs %rbp, %r8
2385
+ testq $0x1, %rcx
2386
+ cmoveq %rbp, %r8
2387
+ cmoveq %rbp, %rdi
2388
+ xorq %r8, %rdi
2389
+ xorq %r8, %rsi
2390
+ btq $0x3f, %r8
2391
+ cmovbq %rcx, %rbx
2392
+ movq %rax, %r8
2393
+ subq %rax, %rsi
2394
+ leaq (%rcx,%rdi), %rcx
2395
+ cmovs %rbp, %r8
2396
+ movq %rbx, %rdi
2397
+ testq %rdx, %rcx
2398
+ cmoveq %rbp, %r8
2399
+ cmoveq %rbp, %rdi
2400
+ sarq $1, %rcx
2401
+ xorq %r8, %rdi
2402
+ xorq %r8, %rsi
2403
+ btq $0x3f, %r8
2404
+ cmovbq %rcx, %rbx
2405
+ movq %rax, %r8
2406
+ subq %rax, %rsi
2407
+ leaq (%rcx,%rdi), %rcx
2408
+ cmovs %rbp, %r8
2409
+ movq %rbx, %rdi
2410
+ testq %rdx, %rcx
2411
+ cmoveq %rbp, %r8
2412
+ cmoveq %rbp, %rdi
2413
+ sarq $1, %rcx
2414
+ xorq %r8, %rdi
2415
+ xorq %r8, %rsi
2416
+ btq $0x3f, %r8
2417
+ cmovbq %rcx, %rbx
2418
+ movq %rax, %r8
2419
+ subq %rax, %rsi
2420
+ leaq (%rcx,%rdi), %rcx
2421
+ cmovs %rbp, %r8
2422
+ movq %rbx, %rdi
2423
+ testq %rdx, %rcx
2424
+ cmoveq %rbp, %r8
2425
+ cmoveq %rbp, %rdi
2426
+ sarq $1, %rcx
2427
+ xorq %r8, %rdi
2428
+ xorq %r8, %rsi
2429
+ btq $0x3f, %r8
2430
+ cmovbq %rcx, %rbx
2431
+ movq %rax, %r8
2432
+ subq %rax, %rsi
2433
+ leaq (%rcx,%rdi), %rcx
2434
+ cmovs %rbp, %r8
2435
+ movq %rbx, %rdi
2436
+ testq %rdx, %rcx
2437
+ cmoveq %rbp, %r8
2438
+ cmoveq %rbp, %rdi
2439
+ sarq $1, %rcx
2440
+ xorq %r8, %rdi
2441
+ xorq %r8, %rsi
2442
+ btq $0x3f, %r8
2443
+ cmovbq %rcx, %rbx
2444
+ movq %rax, %r8
2445
+ subq %rax, %rsi
2446
+ leaq (%rcx,%rdi), %rcx
2447
+ cmovs %rbp, %r8
2448
+ movq %rbx, %rdi
2449
+ testq %rdx, %rcx
2450
+ cmoveq %rbp, %r8
2451
+ cmoveq %rbp, %rdi
2452
+ sarq $1, %rcx
2453
+ xorq %r8, %rdi
2454
+ xorq %r8, %rsi
2455
+ btq $0x3f, %r8
2456
+ cmovbq %rcx, %rbx
2457
+ movq %rax, %r8
2458
+ subq %rax, %rsi
2459
+ leaq (%rcx,%rdi), %rcx
2460
+ cmovs %rbp, %r8
2461
+ movq %rbx, %rdi
2462
+ testq %rdx, %rcx
2463
+ cmoveq %rbp, %r8
2464
+ cmoveq %rbp, %rdi
2465
+ sarq $1, %rcx
2466
+ xorq %r8, %rdi
2467
+ xorq %r8, %rsi
2468
+ btq $0x3f, %r8
2469
+ cmovbq %rcx, %rbx
2470
+ movq %rax, %r8
2471
+ subq %rax, %rsi
2472
+ leaq (%rcx,%rdi), %rcx
2473
+ cmovs %rbp, %r8
2474
+ movq %rbx, %rdi
2475
+ testq %rdx, %rcx
2476
+ cmoveq %rbp, %r8
2477
+ cmoveq %rbp, %rdi
2478
+ sarq $1, %rcx
2479
+ xorq %r8, %rdi
2480
+ xorq %r8, %rsi
2481
+ btq $0x3f, %r8
2482
+ cmovbq %rcx, %rbx
2483
+ movq %rax, %r8
2484
+ subq %rax, %rsi
2485
+ leaq (%rcx,%rdi), %rcx
2486
+ cmovs %rbp, %r8
2487
+ movq %rbx, %rdi
2488
+ testq %rdx, %rcx
2489
+ cmoveq %rbp, %r8
2490
+ cmoveq %rbp, %rdi
2491
+ sarq $1, %rcx
2492
+ xorq %r8, %rdi
2493
+ xorq %r8, %rsi
2494
+ btq $0x3f, %r8
2495
+ cmovbq %rcx, %rbx
2496
+ movq %rax, %r8
2497
+ subq %rax, %rsi
2498
+ leaq (%rcx,%rdi), %rcx
2499
+ cmovs %rbp, %r8
2500
+ movq %rbx, %rdi
2501
+ testq %rdx, %rcx
2502
+ cmoveq %rbp, %r8
2503
+ cmoveq %rbp, %rdi
2504
+ sarq $1, %rcx
2505
+ xorq %r8, %rdi
2506
+ xorq %r8, %rsi
2507
+ btq $0x3f, %r8
2508
+ cmovbq %rcx, %rbx
2509
+ movq %rax, %r8
2510
+ subq %rax, %rsi
2511
+ leaq (%rcx,%rdi), %rcx
2512
+ cmovs %rbp, %r8
2513
+ movq %rbx, %rdi
2514
+ testq %rdx, %rcx
2515
+ cmoveq %rbp, %r8
2516
+ cmoveq %rbp, %rdi
2517
+ sarq $1, %rcx
2518
+ xorq %r8, %rdi
2519
+ xorq %r8, %rsi
2520
+ btq $0x3f, %r8
2521
+ cmovbq %rcx, %rbx
2522
+ movq %rax, %r8
2523
+ subq %rax, %rsi
2524
+ leaq (%rcx,%rdi), %rcx
2525
+ cmovs %rbp, %r8
2526
+ movq %rbx, %rdi
2527
+ testq %rdx, %rcx
2528
+ cmoveq %rbp, %r8
2529
+ cmoveq %rbp, %rdi
2530
+ sarq $1, %rcx
2531
+ xorq %r8, %rdi
2532
+ xorq %r8, %rsi
2533
+ btq $0x3f, %r8
2534
+ cmovbq %rcx, %rbx
2535
+ movq %rax, %r8
2536
+ subq %rax, %rsi
2537
+ leaq (%rcx,%rdi), %rcx
2538
+ cmovs %rbp, %r8
2539
+ movq %rbx, %rdi
2540
+ testq %rdx, %rcx
2541
+ cmoveq %rbp, %r8
2542
+ cmoveq %rbp, %rdi
2543
+ sarq $1, %rcx
2544
+ xorq %r8, %rdi
2545
+ xorq %r8, %rsi
2546
+ btq $0x3f, %r8
2547
+ cmovbq %rcx, %rbx
2548
+ movq %rax, %r8
2549
+ subq %rax, %rsi
2550
+ leaq (%rcx,%rdi), %rcx
2551
+ cmovs %rbp, %r8
2552
+ movq %rbx, %rdi
2553
+ testq %rdx, %rcx
2554
+ cmoveq %rbp, %r8
2555
+ cmoveq %rbp, %rdi
2556
+ sarq $1, %rcx
2557
+ xorq %r8, %rdi
2558
+ xorq %r8, %rsi
2559
+ btq $0x3f, %r8
2560
+ cmovbq %rcx, %rbx
2561
+ movq %rax, %r8
2562
+ subq %rax, %rsi
2563
+ leaq (%rcx,%rdi), %rcx
2564
+ cmovs %rbp, %r8
2565
+ movq %rbx, %rdi
2566
+ testq %rdx, %rcx
2567
+ cmoveq %rbp, %r8
2568
+ cmoveq %rbp, %rdi
2569
+ sarq $1, %rcx
2570
+ xorq %r8, %rdi
2571
+ xorq %r8, %rsi
2572
+ btq $0x3f, %r8
2573
+ cmovbq %rcx, %rbx
2574
+ movq %rax, %r8
2575
+ subq %rax, %rsi
2576
+ leaq (%rcx,%rdi), %rcx
2577
+ cmovs %rbp, %r8
2578
+ movq %rbx, %rdi
2579
+ testq %rdx, %rcx
2580
+ cmoveq %rbp, %r8
2581
+ cmoveq %rbp, %rdi
2582
+ sarq $1, %rcx
2583
+ xorq %r8, %rdi
2584
+ xorq %r8, %rsi
2585
+ btq $0x3f, %r8
2586
+ cmovbq %rcx, %rbx
2587
+ movq %rax, %r8
2588
+ subq %rax, %rsi
2589
+ leaq (%rcx,%rdi), %rcx
2590
+ cmovs %rbp, %r8
2591
+ movq %rbx, %rdi
2592
+ testq %rdx, %rcx
2593
+ cmoveq %rbp, %r8
2594
+ cmoveq %rbp, %rdi
2595
+ sarq $1, %rcx
2596
+ xorq %r8, %rdi
2597
+ xorq %r8, %rsi
2598
+ btq $0x3f, %r8
2599
+ cmovbq %rcx, %rbx
2600
+ movq %rax, %r8
2601
+ subq %rax, %rsi
2602
+ leaq (%rcx,%rdi), %rcx
2603
+ cmovs %rbp, %r8
2604
+ movq %rbx, %rdi
2605
+ testq %rdx, %rcx
2606
+ cmoveq %rbp, %r8
2607
+ cmoveq %rbp, %rdi
2608
+ sarq $1, %rcx
2609
+ xorq %r8, %rdi
2610
+ xorq %r8, %rsi
2611
+ btq $0x3f, %r8
2612
+ cmovbq %rcx, %rbx
2613
+ movq %rax, %r8
2614
+ subq %rax, %rsi
2615
+ leaq (%rcx,%rdi), %rcx
2616
+ cmovs %rbp, %r8
2617
+ movq %rbx, %rdi
2618
+ testq %rdx, %rcx
2619
+ cmoveq %rbp, %r8
2620
+ cmoveq %rbp, %rdi
2621
+ sarq $1, %rcx
2622
+ xorq %r8, %rdi
2623
+ xorq %r8, %rsi
2624
+ btq $0x3f, %r8
2625
+ cmovbq %rcx, %rbx
2626
+ movq %rax, %r8
2627
+ subq %rax, %rsi
2628
+ leaq (%rcx,%rdi), %rcx
2629
+ cmovs %rbp, %r8
2630
+ movq %rbx, %rdi
2631
+ testq %rdx, %rcx
2632
+ cmoveq %rbp, %r8
2633
+ cmoveq %rbp, %rdi
2634
+ sarq $1, %rcx
2635
+ xorq %r8, %rdi
2636
+ xorq %r8, %rsi
2637
+ btq $0x3f, %r8
2638
+ cmovbq %rcx, %rbx
2639
+ movq %rax, %r8
2640
+ subq %rax, %rsi
2641
+ leaq (%rcx,%rdi), %rcx
2642
+ sarq $1, %rcx
2643
+ movl $0x100000, %eax
2644
+ leaq (%rbx,%rax), %rdx
2645
+ leaq (%rcx,%rax), %rdi
2646
+ shlq $0x16, %rdx
2647
+ shlq $0x16, %rdi
2648
+ sarq $0x2b, %rdx
2649
+ sarq $0x2b, %rdi
2650
+ movabsq $0x20000100000, %rax
2651
+ leaq (%rbx,%rax), %rbx
2652
+ leaq (%rcx,%rax), %rcx
2653
+ sarq $0x2a, %rbx
2654
+ sarq $0x2a, %rcx
2655
+ movq %rdx, 0xa0(%rsp)
2656
+ movq %rbx, 0xa8(%rsp)
2657
+ movq %rdi, 0xb0(%rsp)
2658
+ movq %rcx, 0xb8(%rsp)
2659
+ movq (%rsp), %r12
2660
+ imulq %r12, %rdi
2661
+ imulq %rdx, %r12
2662
+ movq 0x20(%rsp), %r13
2663
+ imulq %r13, %rbx
2664
+ imulq %rcx, %r13
2665
+ addq %rbx, %r12
2666
+ addq %rdi, %r13
2667
+ sarq $0x14, %r12
2668
+ sarq $0x14, %r13
2669
+ movq %r12, %rbx
2670
+ andq $0xfffff, %rbx
2671
+ movabsq $0xfffffe0000000000, %rax
2672
+ orq %rax, %rbx
2673
+ movq %r13, %rcx
2674
+ andq $0xfffff, %rcx
2675
+ movabsq $0xc000000000000000, %rax
2676
+ orq %rax, %rcx
2677
+ movq $0xfffffffffffffffe, %rax
2678
+ movl $0x2, %edx
2679
+ movq %rbx, %rdi
2680
+ movq %rax, %r8
2681
+ testq %rsi, %rsi
2682
+ cmovs %rbp, %r8
2683
+ testq $0x1, %rcx
2684
+ cmoveq %rbp, %r8
2685
+ cmoveq %rbp, %rdi
2686
+ xorq %r8, %rdi
2687
+ xorq %r8, %rsi
2688
+ btq $0x3f, %r8
2689
+ cmovbq %rcx, %rbx
2690
+ movq %rax, %r8
2691
+ subq %rax, %rsi
2692
+ leaq (%rcx,%rdi), %rcx
2693
+ cmovs %rbp, %r8
2694
+ movq %rbx, %rdi
2695
+ testq %rdx, %rcx
2696
+ cmoveq %rbp, %r8
2697
+ cmoveq %rbp, %rdi
2698
+ sarq $1, %rcx
2699
+ xorq %r8, %rdi
2700
+ xorq %r8, %rsi
2701
+ btq $0x3f, %r8
2702
+ cmovbq %rcx, %rbx
2703
+ movq %rax, %r8
2704
+ subq %rax, %rsi
2705
+ leaq (%rcx,%rdi), %rcx
2706
+ cmovs %rbp, %r8
2707
+ movq %rbx, %rdi
2708
+ testq %rdx, %rcx
2709
+ cmoveq %rbp, %r8
2710
+ cmoveq %rbp, %rdi
2711
+ sarq $1, %rcx
2712
+ xorq %r8, %rdi
2713
+ xorq %r8, %rsi
2714
+ btq $0x3f, %r8
2715
+ cmovbq %rcx, %rbx
2716
+ movq %rax, %r8
2717
+ subq %rax, %rsi
2718
+ leaq (%rcx,%rdi), %rcx
2719
+ cmovs %rbp, %r8
2720
+ movq %rbx, %rdi
2721
+ testq %rdx, %rcx
2722
+ cmoveq %rbp, %r8
2723
+ cmoveq %rbp, %rdi
2724
+ sarq $1, %rcx
2725
+ xorq %r8, %rdi
2726
+ xorq %r8, %rsi
2727
+ btq $0x3f, %r8
2728
+ cmovbq %rcx, %rbx
2729
+ movq %rax, %r8
2730
+ subq %rax, %rsi
2731
+ leaq (%rcx,%rdi), %rcx
2732
+ cmovs %rbp, %r8
2733
+ movq %rbx, %rdi
2734
+ testq %rdx, %rcx
2735
+ cmoveq %rbp, %r8
2736
+ cmoveq %rbp, %rdi
2737
+ sarq $1, %rcx
2738
+ xorq %r8, %rdi
2739
+ xorq %r8, %rsi
2740
+ btq $0x3f, %r8
2741
+ cmovbq %rcx, %rbx
2742
+ movq %rax, %r8
2743
+ subq %rax, %rsi
2744
+ leaq (%rcx,%rdi), %rcx
2745
+ cmovs %rbp, %r8
2746
+ movq %rbx, %rdi
2747
+ testq %rdx, %rcx
2748
+ cmoveq %rbp, %r8
2749
+ cmoveq %rbp, %rdi
2750
+ sarq $1, %rcx
2751
+ xorq %r8, %rdi
2752
+ xorq %r8, %rsi
2753
+ btq $0x3f, %r8
2754
+ cmovbq %rcx, %rbx
2755
+ movq %rax, %r8
2756
+ subq %rax, %rsi
2757
+ leaq (%rcx,%rdi), %rcx
2758
+ cmovs %rbp, %r8
2759
+ movq %rbx, %rdi
2760
+ testq %rdx, %rcx
2761
+ cmoveq %rbp, %r8
2762
+ cmoveq %rbp, %rdi
2763
+ sarq $1, %rcx
2764
+ xorq %r8, %rdi
2765
+ xorq %r8, %rsi
2766
+ btq $0x3f, %r8
2767
+ cmovbq %rcx, %rbx
2768
+ movq %rax, %r8
2769
+ subq %rax, %rsi
2770
+ leaq (%rcx,%rdi), %rcx
2771
+ cmovs %rbp, %r8
2772
+ movq %rbx, %rdi
2773
+ testq %rdx, %rcx
2774
+ cmoveq %rbp, %r8
2775
+ cmoveq %rbp, %rdi
2776
+ sarq $1, %rcx
2777
+ xorq %r8, %rdi
2778
+ xorq %r8, %rsi
2779
+ btq $0x3f, %r8
2780
+ cmovbq %rcx, %rbx
2781
+ movq %rax, %r8
2782
+ subq %rax, %rsi
2783
+ leaq (%rcx,%rdi), %rcx
2784
+ cmovs %rbp, %r8
2785
+ movq %rbx, %rdi
2786
+ testq %rdx, %rcx
2787
+ cmoveq %rbp, %r8
2788
+ cmoveq %rbp, %rdi
2789
+ sarq $1, %rcx
2790
+ xorq %r8, %rdi
2791
+ xorq %r8, %rsi
2792
+ btq $0x3f, %r8
2793
+ cmovbq %rcx, %rbx
2794
+ movq %rax, %r8
2795
+ subq %rax, %rsi
2796
+ leaq (%rcx,%rdi), %rcx
2797
+ cmovs %rbp, %r8
2798
+ movq %rbx, %rdi
2799
+ testq %rdx, %rcx
2800
+ cmoveq %rbp, %r8
2801
+ cmoveq %rbp, %rdi
2802
+ sarq $1, %rcx
2803
+ xorq %r8, %rdi
2804
+ xorq %r8, %rsi
2805
+ btq $0x3f, %r8
2806
+ cmovbq %rcx, %rbx
2807
+ movq %rax, %r8
2808
+ subq %rax, %rsi
2809
+ leaq (%rcx,%rdi), %rcx
2810
+ cmovs %rbp, %r8
2811
+ movq %rbx, %rdi
2812
+ testq %rdx, %rcx
2813
+ cmoveq %rbp, %r8
2814
+ cmoveq %rbp, %rdi
2815
+ sarq $1, %rcx
2816
+ xorq %r8, %rdi
2817
+ xorq %r8, %rsi
2818
+ btq $0x3f, %r8
2819
+ cmovbq %rcx, %rbx
2820
+ movq %rax, %r8
2821
+ subq %rax, %rsi
2822
+ leaq (%rcx,%rdi), %rcx
2823
+ cmovs %rbp, %r8
2824
+ movq %rbx, %rdi
2825
+ testq %rdx, %rcx
2826
+ cmoveq %rbp, %r8
2827
+ cmoveq %rbp, %rdi
2828
+ sarq $1, %rcx
2829
+ xorq %r8, %rdi
2830
+ xorq %r8, %rsi
2831
+ btq $0x3f, %r8
2832
+ cmovbq %rcx, %rbx
2833
+ movq %rax, %r8
2834
+ subq %rax, %rsi
2835
+ leaq (%rcx,%rdi), %rcx
2836
+ cmovs %rbp, %r8
2837
+ movq %rbx, %rdi
2838
+ testq %rdx, %rcx
2839
+ cmoveq %rbp, %r8
2840
+ cmoveq %rbp, %rdi
2841
+ sarq $1, %rcx
2842
+ xorq %r8, %rdi
2843
+ xorq %r8, %rsi
2844
+ btq $0x3f, %r8
2845
+ cmovbq %rcx, %rbx
2846
+ movq %rax, %r8
2847
+ subq %rax, %rsi
2848
+ leaq (%rcx,%rdi), %rcx
2849
+ cmovs %rbp, %r8
2850
+ movq %rbx, %rdi
2851
+ testq %rdx, %rcx
2852
+ cmoveq %rbp, %r8
2853
+ cmoveq %rbp, %rdi
2854
+ sarq $1, %rcx
2855
+ xorq %r8, %rdi
2856
+ xorq %r8, %rsi
2857
+ btq $0x3f, %r8
2858
+ cmovbq %rcx, %rbx
2859
+ movq %rax, %r8
2860
+ subq %rax, %rsi
2861
+ leaq (%rcx,%rdi), %rcx
2862
+ cmovs %rbp, %r8
2863
+ movq %rbx, %rdi
2864
+ testq %rdx, %rcx
2865
+ cmoveq %rbp, %r8
2866
+ cmoveq %rbp, %rdi
2867
+ sarq $1, %rcx
2868
+ xorq %r8, %rdi
2869
+ xorq %r8, %rsi
2870
+ btq $0x3f, %r8
2871
+ cmovbq %rcx, %rbx
2872
+ movq %rax, %r8
2873
+ subq %rax, %rsi
2874
+ leaq (%rcx,%rdi), %rcx
2875
+ cmovs %rbp, %r8
2876
+ movq %rbx, %rdi
2877
+ testq %rdx, %rcx
2878
+ cmoveq %rbp, %r8
2879
+ cmoveq %rbp, %rdi
2880
+ sarq $1, %rcx
2881
+ xorq %r8, %rdi
2882
+ xorq %r8, %rsi
2883
+ btq $0x3f, %r8
2884
+ cmovbq %rcx, %rbx
2885
+ movq %rax, %r8
2886
+ subq %rax, %rsi
2887
+ leaq (%rcx,%rdi), %rcx
2888
+ cmovs %rbp, %r8
2889
+ movq %rbx, %rdi
2890
+ testq %rdx, %rcx
2891
+ cmoveq %rbp, %r8
2892
+ cmoveq %rbp, %rdi
2893
+ sarq $1, %rcx
2894
+ xorq %r8, %rdi
2895
+ xorq %r8, %rsi
2896
+ btq $0x3f, %r8
2897
+ cmovbq %rcx, %rbx
2898
+ movq %rax, %r8
2899
+ subq %rax, %rsi
2900
+ leaq (%rcx,%rdi), %rcx
2901
+ cmovs %rbp, %r8
2902
+ movq %rbx, %rdi
2903
+ testq %rdx, %rcx
2904
+ cmoveq %rbp, %r8
2905
+ cmoveq %rbp, %rdi
2906
+ sarq $1, %rcx
2907
+ xorq %r8, %rdi
2908
+ xorq %r8, %rsi
2909
+ btq $0x3f, %r8
2910
+ cmovbq %rcx, %rbx
2911
+ movq %rax, %r8
2912
+ subq %rax, %rsi
2913
+ leaq (%rcx,%rdi), %rcx
2914
+ cmovs %rbp, %r8
2915
+ movq %rbx, %rdi
2916
+ testq %rdx, %rcx
2917
+ cmoveq %rbp, %r8
2918
+ cmoveq %rbp, %rdi
2919
+ sarq $1, %rcx
2920
+ xorq %r8, %rdi
2921
+ xorq %r8, %rsi
2922
+ btq $0x3f, %r8
2923
+ cmovbq %rcx, %rbx
2924
+ movq %rax, %r8
2925
+ subq %rax, %rsi
2926
+ leaq (%rcx,%rdi), %rcx
2927
+ cmovs %rbp, %r8
2928
+ movq %rbx, %rdi
2929
+ testq %rdx, %rcx
2930
+ cmoveq %rbp, %r8
2931
+ cmoveq %rbp, %rdi
2932
+ sarq $1, %rcx
2933
+ xorq %r8, %rdi
2934
+ xorq %r8, %rsi
2935
+ btq $0x3f, %r8
2936
+ cmovbq %rcx, %rbx
2937
+ movq %rax, %r8
2938
+ subq %rax, %rsi
2939
+ leaq (%rcx,%rdi), %rcx
2940
+ sarq $1, %rcx
2941
+ movl $0x100000, %eax
2942
+ leaq (%rbx,%rax), %r8
2943
+ leaq (%rcx,%rax), %r10
2944
+ shlq $0x16, %r8
2945
+ shlq $0x16, %r10
2946
+ sarq $0x2b, %r8
2947
+ sarq $0x2b, %r10
2948
+ movabsq $0x20000100000, %rax
2949
+ leaq (%rbx,%rax), %r15
2950
+ leaq (%rcx,%rax), %r11
2951
+ sarq $0x2a, %r15
2952
+ sarq $0x2a, %r11
2953
+ movq %r13, %rbx
2954
+ movq %r12, %rcx
2955
+ imulq %r8, %r12
2956
+ imulq %r15, %rbx
2957
+ addq %rbx, %r12
2958
+ imulq %r11, %r13
2959
+ imulq %r10, %rcx
2960
+ addq %rcx, %r13
2961
+ sarq $0x14, %r12
2962
+ sarq $0x14, %r13
2963
+ movq %r12, %rbx
2964
+ andq $0xfffff, %rbx
2965
+ movabsq $0xfffffe0000000000, %rax
2966
+ orq %rax, %rbx
2967
+ movq %r13, %rcx
2968
+ andq $0xfffff, %rcx
2969
+ movabsq $0xc000000000000000, %rax
2970
+ orq %rax, %rcx
2971
+ movq 0xa0(%rsp), %rax
2972
+ imulq %r8, %rax
2973
+ movq 0xb0(%rsp), %rdx
2974
+ imulq %r15, %rdx
2975
+ imulq 0xa8(%rsp), %r8
2976
+ imulq 0xb8(%rsp), %r15
2977
+ addq %r8, %r15
2978
+ leaq (%rax,%rdx), %r9
2979
+ movq 0xa0(%rsp), %rax
2980
+ imulq %r10, %rax
2981
+ movq 0xb0(%rsp), %rdx
2982
+ imulq %r11, %rdx
2983
+ imulq 0xa8(%rsp), %r10
2984
+ imulq 0xb8(%rsp), %r11
2985
+ addq %r10, %r11
2986
+ leaq (%rax,%rdx), %r13
2987
+ movq $0xfffffffffffffffe, %rax
2988
+ movl $0x2, %edx
2989
+ movq %rbx, %rdi
2990
+ movq %rax, %r8
2991
+ testq %rsi, %rsi
2992
+ cmovs %rbp, %r8
2993
+ testq $0x1, %rcx
2994
+ cmoveq %rbp, %r8
2995
+ cmoveq %rbp, %rdi
2996
+ xorq %r8, %rdi
2997
+ xorq %r8, %rsi
2998
+ btq $0x3f, %r8
2999
+ cmovbq %rcx, %rbx
3000
+ movq %rax, %r8
3001
+ subq %rax, %rsi
3002
+ leaq (%rcx,%rdi), %rcx
3003
+ cmovs %rbp, %r8
3004
+ movq %rbx, %rdi
3005
+ testq %rdx, %rcx
3006
+ cmoveq %rbp, %r8
3007
+ cmoveq %rbp, %rdi
3008
+ sarq $1, %rcx
3009
+ xorq %r8, %rdi
3010
+ xorq %r8, %rsi
3011
+ btq $0x3f, %r8
3012
+ cmovbq %rcx, %rbx
3013
+ movq %rax, %r8
3014
+ subq %rax, %rsi
3015
+ leaq (%rcx,%rdi), %rcx
3016
+ cmovs %rbp, %r8
3017
+ movq %rbx, %rdi
3018
+ testq %rdx, %rcx
3019
+ cmoveq %rbp, %r8
3020
+ cmoveq %rbp, %rdi
3021
+ sarq $1, %rcx
3022
+ xorq %r8, %rdi
3023
+ xorq %r8, %rsi
3024
+ btq $0x3f, %r8
3025
+ cmovbq %rcx, %rbx
3026
+ movq %rax, %r8
3027
+ subq %rax, %rsi
3028
+ leaq (%rcx,%rdi), %rcx
3029
+ cmovs %rbp, %r8
3030
+ movq %rbx, %rdi
3031
+ testq %rdx, %rcx
3032
+ cmoveq %rbp, %r8
3033
+ cmoveq %rbp, %rdi
3034
+ sarq $1, %rcx
3035
+ xorq %r8, %rdi
3036
+ xorq %r8, %rsi
3037
+ btq $0x3f, %r8
3038
+ cmovbq %rcx, %rbx
3039
+ movq %rax, %r8
3040
+ subq %rax, %rsi
3041
+ leaq (%rcx,%rdi), %rcx
3042
+ cmovs %rbp, %r8
3043
+ movq %rbx, %rdi
3044
+ testq %rdx, %rcx
3045
+ cmoveq %rbp, %r8
3046
+ cmoveq %rbp, %rdi
3047
+ sarq $1, %rcx
3048
+ xorq %r8, %rdi
3049
+ xorq %r8, %rsi
3050
+ btq $0x3f, %r8
3051
+ cmovbq %rcx, %rbx
3052
+ movq %rax, %r8
3053
+ subq %rax, %rsi
3054
+ leaq (%rcx,%rdi), %rcx
3055
+ cmovs %rbp, %r8
3056
+ movq %rbx, %rdi
3057
+ testq %rdx, %rcx
3058
+ cmoveq %rbp, %r8
3059
+ cmoveq %rbp, %rdi
3060
+ sarq $1, %rcx
3061
+ xorq %r8, %rdi
3062
+ xorq %r8, %rsi
3063
+ btq $0x3f, %r8
3064
+ cmovbq %rcx, %rbx
3065
+ movq %rax, %r8
3066
+ subq %rax, %rsi
3067
+ leaq (%rcx,%rdi), %rcx
3068
+ cmovs %rbp, %r8
3069
+ movq %rbx, %rdi
3070
+ testq %rdx, %rcx
3071
+ cmoveq %rbp, %r8
3072
+ cmoveq %rbp, %rdi
3073
+ sarq $1, %rcx
3074
+ xorq %r8, %rdi
3075
+ xorq %r8, %rsi
3076
+ btq $0x3f, %r8
3077
+ cmovbq %rcx, %rbx
3078
+ movq %rax, %r8
3079
+ subq %rax, %rsi
3080
+ leaq (%rcx,%rdi), %rcx
3081
+ cmovs %rbp, %r8
3082
+ movq %rbx, %rdi
3083
+ testq %rdx, %rcx
3084
+ cmoveq %rbp, %r8
3085
+ cmoveq %rbp, %rdi
3086
+ sarq $1, %rcx
3087
+ xorq %r8, %rdi
3088
+ xorq %r8, %rsi
3089
+ btq $0x3f, %r8
3090
+ cmovbq %rcx, %rbx
3091
+ movq %rax, %r8
3092
+ subq %rax, %rsi
3093
+ leaq (%rcx,%rdi), %rcx
3094
+ cmovs %rbp, %r8
3095
+ movq %rbx, %rdi
3096
+ testq %rdx, %rcx
3097
+ cmoveq %rbp, %r8
3098
+ cmoveq %rbp, %rdi
3099
+ sarq $1, %rcx
3100
+ xorq %r8, %rdi
3101
+ xorq %r8, %rsi
3102
+ btq $0x3f, %r8
3103
+ cmovbq %rcx, %rbx
3104
+ movq %rax, %r8
3105
+ subq %rax, %rsi
3106
+ leaq (%rcx,%rdi), %rcx
3107
+ cmovs %rbp, %r8
3108
+ movq %rbx, %rdi
3109
+ testq %rdx, %rcx
3110
+ cmoveq %rbp, %r8
3111
+ cmoveq %rbp, %rdi
3112
+ sarq $1, %rcx
3113
+ xorq %r8, %rdi
3114
+ xorq %r8, %rsi
3115
+ btq $0x3f, %r8
3116
+ cmovbq %rcx, %rbx
3117
+ movq %rax, %r8
3118
+ subq %rax, %rsi
3119
+ leaq (%rcx,%rdi), %rcx
3120
+ cmovs %rbp, %r8
3121
+ movq %rbx, %rdi
3122
+ testq %rdx, %rcx
3123
+ cmoveq %rbp, %r8
3124
+ cmoveq %rbp, %rdi
3125
+ sarq $1, %rcx
3126
+ xorq %r8, %rdi
3127
+ xorq %r8, %rsi
3128
+ btq $0x3f, %r8
3129
+ cmovbq %rcx, %rbx
3130
+ movq %rax, %r8
3131
+ subq %rax, %rsi
3132
+ leaq (%rcx,%rdi), %rcx
3133
+ cmovs %rbp, %r8
3134
+ movq %rbx, %rdi
3135
+ testq %rdx, %rcx
3136
+ cmoveq %rbp, %r8
3137
+ cmoveq %rbp, %rdi
3138
+ sarq $1, %rcx
3139
+ xorq %r8, %rdi
3140
+ xorq %r8, %rsi
3141
+ btq $0x3f, %r8
3142
+ cmovbq %rcx, %rbx
3143
+ movq %rax, %r8
3144
+ subq %rax, %rsi
3145
+ leaq (%rcx,%rdi), %rcx
3146
+ cmovs %rbp, %r8
3147
+ movq %rbx, %rdi
3148
+ testq %rdx, %rcx
3149
+ cmoveq %rbp, %r8
3150
+ cmoveq %rbp, %rdi
3151
+ sarq $1, %rcx
3152
+ xorq %r8, %rdi
3153
+ xorq %r8, %rsi
3154
+ btq $0x3f, %r8
3155
+ cmovbq %rcx, %rbx
3156
+ movq %rax, %r8
3157
+ subq %rax, %rsi
3158
+ leaq (%rcx,%rdi), %rcx
3159
+ cmovs %rbp, %r8
3160
+ movq %rbx, %rdi
3161
+ testq %rdx, %rcx
3162
+ cmoveq %rbp, %r8
3163
+ cmoveq %rbp, %rdi
3164
+ sarq $1, %rcx
3165
+ xorq %r8, %rdi
3166
+ xorq %r8, %rsi
3167
+ btq $0x3f, %r8
3168
+ cmovbq %rcx, %rbx
3169
+ movq %rax, %r8
3170
+ subq %rax, %rsi
3171
+ leaq (%rcx,%rdi), %rcx
3172
+ cmovs %rbp, %r8
3173
+ movq %rbx, %rdi
3174
+ testq %rdx, %rcx
3175
+ cmoveq %rbp, %r8
3176
+ cmoveq %rbp, %rdi
3177
+ sarq $1, %rcx
3178
+ xorq %r8, %rdi
3179
+ xorq %r8, %rsi
3180
+ btq $0x3f, %r8
3181
+ cmovbq %rcx, %rbx
3182
+ movq %rax, %r8
3183
+ subq %rax, %rsi
3184
+ leaq (%rcx,%rdi), %rcx
3185
+ cmovs %rbp, %r8
3186
+ movq %rbx, %rdi
3187
+ testq %rdx, %rcx
3188
+ cmoveq %rbp, %r8
3189
+ cmoveq %rbp, %rdi
3190
+ sarq $1, %rcx
3191
+ xorq %r8, %rdi
3192
+ xorq %r8, %rsi
3193
+ btq $0x3f, %r8
3194
+ cmovbq %rcx, %rbx
3195
+ movq %rax, %r8
3196
+ subq %rax, %rsi
3197
+ leaq (%rcx,%rdi), %rcx
3198
+ cmovs %rbp, %r8
3199
+ movq %rbx, %rdi
3200
+ testq %rdx, %rcx
3201
+ cmoveq %rbp, %r8
3202
+ cmoveq %rbp, %rdi
3203
+ sarq $1, %rcx
3204
+ xorq %r8, %rdi
3205
+ xorq %r8, %rsi
3206
+ btq $0x3f, %r8
3207
+ cmovbq %rcx, %rbx
3208
+ movq %rax, %r8
3209
+ subq %rax, %rsi
3210
+ leaq (%rcx,%rdi), %rcx
3211
+ cmovs %rbp, %r8
3212
+ movq %rbx, %rdi
3213
+ testq %rdx, %rcx
3214
+ cmoveq %rbp, %r8
3215
+ cmoveq %rbp, %rdi
3216
+ sarq $1, %rcx
3217
+ xorq %r8, %rdi
3218
+ xorq %r8, %rsi
3219
+ btq $0x3f, %r8
3220
+ cmovbq %rcx, %rbx
3221
+ movq %rax, %r8
3222
+ subq %rax, %rsi
3223
+ leaq (%rcx,%rdi), %rcx
3224
+ cmovs %rbp, %r8
3225
+ movq %rbx, %rdi
3226
+ testq %rdx, %rcx
3227
+ cmoveq %rbp, %r8
3228
+ cmoveq %rbp, %rdi
3229
+ sarq $1, %rcx
3230
+ xorq %r8, %rdi
3231
+ xorq %r8, %rsi
3232
+ btq $0x3f, %r8
3233
+ cmovbq %rcx, %rbx
3234
+ movq %rax, %r8
3235
+ subq %rax, %rsi
3236
+ leaq (%rcx,%rdi), %rcx
3237
+ sarq $1, %rcx
3238
+ movl $0x100000, %eax
3239
+ leaq (%rbx,%rax), %r8
3240
+ leaq (%rcx,%rax), %r12
3241
+ shlq $0x15, %r8
3242
+ shlq $0x15, %r12
3243
+ sarq $0x2b, %r8
3244
+ sarq $0x2b, %r12
3245
+ movabsq $0x20000100000, %rax
3246
+ leaq (%rbx,%rax), %r10
3247
+ leaq (%rcx,%rax), %r14
3248
+ sarq $0x2b, %r10
3249
+ sarq $0x2b, %r14
3250
+ movq %r9, %rax
3251
+ imulq %r8, %rax
3252
+ movq %r13, %rdx
3253
+ imulq %r10, %rdx
3254
+ imulq %r15, %r8
3255
+ imulq %r11, %r10
3256
+ addq %r8, %r10
3257
+ leaq (%rax,%rdx), %r8
3258
+ movq %r9, %rax
3259
+ imulq %r12, %rax
3260
+ movq %r13, %rdx
3261
+ imulq %r14, %rdx
3262
+ imulq %r15, %r12
3263
+ imulq %r11, %r14
3264
+ addq %r12, %r14
3265
+ leaq (%rax,%rdx), %r12
3266
+ movq %rsi, 0x98(%rsp)
3267
+ decq 0x90(%rsp)
3268
+ jne edwards25519_scalarmuldouble_inverseloop
3269
+ movq (%rsp), %rax
3270
+ movq 0x20(%rsp), %rcx
3271
+ imulq %r8, %rax
3272
+ imulq %r10, %rcx
3273
+ addq %rcx, %rax
3274
+ sarq $0x3f, %rax
3275
+ movq %r8, %r9
3276
+ sarq $0x3f, %r9
3277
+ xorq %r9, %r8
3278
+ subq %r9, %r8
3279
+ xorq %rax, %r9
3280
+ movq %r10, %r11
3281
+ sarq $0x3f, %r11
3282
+ xorq %r11, %r10
3283
+ subq %r11, %r10
3284
+ xorq %rax, %r11
3285
+ movq %r12, %r13
3286
+ sarq $0x3f, %r13
3287
+ xorq %r13, %r12
3288
+ subq %r13, %r12
3289
+ xorq %rax, %r13
3290
+ movq %r14, %r15
3291
+ sarq $0x3f, %r15
3292
+ xorq %r15, %r14
3293
+ subq %r15, %r14
3294
+ xorq %rax, %r15
3295
+ movq %r8, %rax
3296
+ andq %r9, %rax
3297
+ movq %r10, %r12
3298
+ andq %r11, %r12
3299
+ addq %rax, %r12
3300
+ xorl %r13d, %r13d
3301
+ movq 0x40(%rsp), %rax
3302
+ xorq %r9, %rax
3303
+ mulq %r8
3304
+ addq %rax, %r12
3305
+ adcq %rdx, %r13
3306
+ movq 0x60(%rsp), %rax
3307
+ xorq %r11, %rax
3308
+ mulq %r10
3309
+ addq %rax, %r12
3310
+ adcq %rdx, %r13
3311
+ xorl %r14d, %r14d
3312
+ movq 0x48(%rsp), %rax
3313
+ xorq %r9, %rax
3314
+ mulq %r8
3315
+ addq %rax, %r13
3316
+ adcq %rdx, %r14
3317
+ movq 0x68(%rsp), %rax
3318
+ xorq %r11, %rax
3319
+ mulq %r10
3320
+ addq %rax, %r13
3321
+ adcq %rdx, %r14
3322
+ xorl %r15d, %r15d
3323
+ movq 0x50(%rsp), %rax
3324
+ xorq %r9, %rax
3325
+ mulq %r8
3326
+ addq %rax, %r14
3327
+ adcq %rdx, %r15
3328
+ movq 0x70(%rsp), %rax
3329
+ xorq %r11, %rax
3330
+ mulq %r10
3331
+ addq %rax, %r14
3332
+ adcq %rdx, %r15
3333
+ movq 0x58(%rsp), %rax
3334
+ xorq %r9, %rax
3335
+ andq %r8, %r9
3336
+ negq %r9
3337
+ mulq %r8
3338
+ addq %rax, %r15
3339
+ adcq %rdx, %r9
3340
+ movq 0x78(%rsp), %rax
3341
+ xorq %r11, %rax
3342
+ movq %r11, %rdx
3343
+ andq %r10, %rdx
3344
+ subq %rdx, %r9
3345
+ mulq %r10
3346
+ addq %rax, %r15
3347
+ adcq %rdx, %r9
3348
+ movq %r9, %rax
3349
+ shldq $0x1, %r15, %rax
3350
+ sarq $0x3f, %r9
3351
+ movl $0x13, %ebx
3352
+ leaq 0x1(%rax,%r9,1), %rax
3353
+ imulq %rbx
3354
+ xorl %ebp, %ebp
3355
+ addq %rax, %r12
3356
+ adcq %rdx, %r13
3357
+ adcq %r9, %r14
3358
+ adcq %r9, %r15
3359
+ shlq $0x3f, %rax
3360
+ addq %rax, %r15
3361
+ cmovns %rbp, %rbx
3362
+ subq %rbx, %r12
3363
+ sbbq %rbp, %r13
3364
+ sbbq %rbp, %r14
3365
+ sbbq %rbp, %r15
3366
+ btr $0x3f, %r15
3367
+ movq 0xc0(%rsp), %rdi
3368
+ movq %r12, (%rdi)
3369
+ movq %r13, 0x8(%rdi)
3370
+ movq %r14, 0x10(%rdi)
3371
+ movq %r15, 0x18(%rdi)
3372
+
3373
+ // Store result
3374
+
3375
+ movq res, %rdi
3376
+ leaq ACC(%rsp), %rsi
3377
+ leaq TAB(%rsp), %rbp
3378
+ mul_p25519(x_0,x_1,x_2)
3379
+
3380
+ movq res, %rdi
3381
+ addq $32, %rdi
3382
+ leaq ACC+32(%rsp), %rsi
3383
+ leaq TAB(%rsp), %rbp
3384
+ mul_p25519(x_0,x_1,x_2)
3385
+
3386
+ // Restore stack and registers
3387
+
3388
+ addq $NSPACE, %rsp
3389
+
3390
+ popq %r15
3391
+ popq %r14
3392
+ popq %r13
3393
+ popq %r12
3394
+ popq %rbp
3395
+ popq %rbx
3396
+ ret
3397
+
3398
+ // ****************************************************************************
3399
+ // Localized versions of subroutines.
3400
+ // These are close to the standalone functions "edwards25519_epdouble" etc.,
3401
+ // but are only maintaining reduction modulo 2^256 - 38, not 2^255 - 19.
3402
+ // ****************************************************************************
3403
+
3404
+ edwards25519_scalarmuldouble_epdouble:
3405
+ sub $(5*NUMSIZE), %rsp
3406
+ add_twice4(t0,x_1,y_1)
3407
+ sqr_4(t1,z_1)
3408
+ sqr_4(t2,x_1)
3409
+ sqr_4(t3,y_1)
3410
+ double_twice4(t1,t1)
3411
+ sqr_4(t0,t0)
3412
+ add_twice4(t4,t2,t3)
3413
+ sub_twice4(t2,t2,t3)
3414
+ add_twice4(t3,t1,t2)
3415
+ sub_twice4(t1,t4,t0)
3416
+ mul_4(y_0,t2,t4)
3417
+ mul_4(z_0,t3,t2)
3418
+ mul_4(w_0,t1,t4)
3419
+ mul_4(x_0,t1,t3)
3420
+ add $(5*NUMSIZE), %rsp
3421
+ ret
3422
+
3423
+ edwards25519_scalarmuldouble_pdouble:
3424
+ sub $(5*NUMSIZE), %rsp
3425
+ add_twice4(t0,x_1,y_1)
3426
+ sqr_4(t1,z_1)
3427
+ sqr_4(t2,x_1)
3428
+ sqr_4(t3,y_1)
3429
+ double_twice4(t1,t1)
3430
+ sqr_4(t0,t0)
3431
+ add_twice4(t4,t2,t3)
3432
+ sub_twice4(t2,t2,t3)
3433
+ add_twice4(t3,t1,t2)
3434
+ sub_twice4(t1,t4,t0)
3435
+ mul_4(y_0,t2,t4)
3436
+ mul_4(z_0,t3,t2)
3437
+ mul_4(x_0,t1,t3)
3438
+ add $(5*NUMSIZE), %rsp
3439
+ ret
3440
+
3441
+ edwards25519_scalarmuldouble_epadd:
3442
+ sub $(6*NUMSIZE), %rsp
3443
+ mul_4(t0,w_1,w_2)
3444
+ sub_twice4(t1,y_1,x_1)
3445
+ sub_twice4(t2,y_2,x_2)
3446
+ add_twice4(t3,y_1,x_1)
3447
+ add_twice4(t4,y_2,x_2)
3448
+ double_twice4(t5,z_2)
3449
+ mul_4(t1,t1,t2)
3450
+ mul_4(t3,t3,t4)
3451
+ load_k25519(t2)
3452
+ mul_4(t2,t2,t0)
3453
+ mul_4(t4,z_1,t5)
3454
+ sub_twice4(t0,t3,t1)
3455
+ add_twice4(t5,t3,t1)
3456
+ sub_twice4(t1,t4,t2)
3457
+ add_twice4(t3,t4,t2)
3458
+ mul_4(w_0,t0,t5)
3459
+ mul_4(x_0,t0,t1)
3460
+ mul_4(y_0,t3,t5)
3461
+ mul_4(z_0,t1,t3)
3462
+ add $(6*NUMSIZE), %rsp
3463
+ ret
3464
+
3465
+ edwards25519_scalarmuldouble_pepadd:
3466
+ sub $(6*NUMSIZE), %rsp
3467
+ double_twice4(t0,z_1);
3468
+ sub_twice4(t1,y_1,x_1);
3469
+ add_twice4(t2,y_1,x_1);
3470
+ mul_4(t3,w_1,z_2);
3471
+ mul_4(t1,t1,x_2);
3472
+ mul_4(t2,t2,y_2);
3473
+ sub_twice4(t4,t0,t3);
3474
+ add_twice4(t0,t0,t3);
3475
+ sub_twice4(t5,t2,t1);
3476
+ add_twice4(t1,t2,t1);
3477
+ mul_4(z_0,t4,t0);
3478
+ mul_4(x_0,t5,t4);
3479
+ mul_4(y_0,t0,t1);
3480
+ mul_4(w_0,t5,t1);
3481
+ add $(6*NUMSIZE), %rsp
3482
+ ret
3483
+
3484
+ // ****************************************************************************
3485
+ // The precomputed data (all read-only). This is currently part of the same
3486
+ // text section, which gives position-independent code with simple PC-relative
3487
+ // addressing. However it could be put in a separate section via something like
3488
+ //
3489
+ // .section .rodata
3490
+ // ****************************************************************************
3491
+
3492
+ // Precomputed table of multiples of generator for edwards25519
3493
+ // all in precomputed extended-projective (y-x,x+y,2*d*x*y) triples.
3494
+
3495
+ edwards25519_scalarmuldouble_table:
3496
+
3497
+ // 1 * G
3498
+
3499
+ .quad 0x9d103905d740913e
3500
+ .quad 0xfd399f05d140beb3
3501
+ .quad 0xa5c18434688f8a09
3502
+ .quad 0x44fd2f9298f81267
3503
+ .quad 0x2fbc93c6f58c3b85
3504
+ .quad 0xcf932dc6fb8c0e19
3505
+ .quad 0x270b4898643d42c2
3506
+ .quad 0x07cf9d3a33d4ba65
3507
+ .quad 0xabc91205877aaa68
3508
+ .quad 0x26d9e823ccaac49e
3509
+ .quad 0x5a1b7dcbdd43598c
3510
+ .quad 0x6f117b689f0c65a8
3511
+
3512
+ // 2 * G
3513
+
3514
+ .quad 0x8a99a56042b4d5a8
3515
+ .quad 0x8f2b810c4e60acf6
3516
+ .quad 0xe09e236bb16e37aa
3517
+ .quad 0x6bb595a669c92555
3518
+ .quad 0x9224e7fc933c71d7
3519
+ .quad 0x9f469d967a0ff5b5
3520
+ .quad 0x5aa69a65e1d60702
3521
+ .quad 0x590c063fa87d2e2e
3522
+ .quad 0x43faa8b3a59b7a5f
3523
+ .quad 0x36c16bdd5d9acf78
3524
+ .quad 0x500fa0840b3d6a31
3525
+ .quad 0x701af5b13ea50b73
3526
+
3527
+ // 3 * G
3528
+
3529
+ .quad 0x56611fe8a4fcd265
3530
+ .quad 0x3bd353fde5c1ba7d
3531
+ .quad 0x8131f31a214bd6bd
3532
+ .quad 0x2ab91587555bda62
3533
+ .quad 0xaf25b0a84cee9730
3534
+ .quad 0x025a8430e8864b8a
3535
+ .quad 0xc11b50029f016732
3536
+ .quad 0x7a164e1b9a80f8f4
3537
+ .quad 0x14ae933f0dd0d889
3538
+ .quad 0x589423221c35da62
3539
+ .quad 0xd170e5458cf2db4c
3540
+ .quad 0x5a2826af12b9b4c6
3541
+
3542
+ // 4 * G
3543
+
3544
+ .quad 0x95fe050a056818bf
3545
+ .quad 0x327e89715660faa9
3546
+ .quad 0xc3e8e3cd06a05073
3547
+ .quad 0x27933f4c7445a49a
3548
+ .quad 0x287351b98efc099f
3549
+ .quad 0x6765c6f47dfd2538
3550
+ .quad 0xca348d3dfb0a9265
3551
+ .quad 0x680e910321e58727
3552
+ .quad 0x5a13fbe9c476ff09
3553
+ .quad 0x6e9e39457b5cc172
3554
+ .quad 0x5ddbdcf9102b4494
3555
+ .quad 0x7f9d0cbf63553e2b
3556
+
3557
+ // 5 * G
3558
+
3559
+ .quad 0x7f9182c3a447d6ba
3560
+ .quad 0xd50014d14b2729b7
3561
+ .quad 0xe33cf11cb864a087
3562
+ .quad 0x154a7e73eb1b55f3
3563
+ .quad 0xa212bc4408a5bb33
3564
+ .quad 0x8d5048c3c75eed02
3565
+ .quad 0xdd1beb0c5abfec44
3566
+ .quad 0x2945ccf146e206eb
3567
+ .quad 0xbcbbdbf1812a8285
3568
+ .quad 0x270e0807d0bdd1fc
3569
+ .quad 0xb41b670b1bbda72d
3570
+ .quad 0x43aabe696b3bb69a
3571
+
3572
+ // 6 * G
3573
+
3574
+ .quad 0x499806b67b7d8ca4
3575
+ .quad 0x575be28427d22739
3576
+ .quad 0xbb085ce7204553b9
3577
+ .quad 0x38b64c41ae417884
3578
+ .quad 0x3a0ceeeb77157131
3579
+ .quad 0x9b27158900c8af88
3580
+ .quad 0x8065b668da59a736
3581
+ .quad 0x51e57bb6a2cc38bd
3582
+ .quad 0x85ac326702ea4b71
3583
+ .quad 0xbe70e00341a1bb01
3584
+ .quad 0x53e4a24b083bc144
3585
+ .quad 0x10b8e91a9f0d61e3
3586
+
3587
+ // 7 * G
3588
+
3589
+ .quad 0xba6f2c9aaa3221b1
3590
+ .quad 0x6ca021533bba23a7
3591
+ .quad 0x9dea764f92192c3a
3592
+ .quad 0x1d6edd5d2e5317e0
3593
+ .quad 0x6b1a5cd0944ea3bf
3594
+ .quad 0x7470353ab39dc0d2
3595
+ .quad 0x71b2528228542e49
3596
+ .quad 0x461bea69283c927e
3597
+ .quad 0xf1836dc801b8b3a2
3598
+ .quad 0xb3035f47053ea49a
3599
+ .quad 0x529c41ba5877adf3
3600
+ .quad 0x7a9fbb1c6a0f90a7
3601
+
3602
+ // 8 * G
3603
+
3604
+ .quad 0xe2a75dedf39234d9
3605
+ .quad 0x963d7680e1b558f9
3606
+ .quad 0x2c2741ac6e3c23fb
3607
+ .quad 0x3a9024a1320e01c3
3608
+ .quad 0x59b7596604dd3e8f
3609
+ .quad 0x6cb30377e288702c
3610
+ .quad 0xb1339c665ed9c323
3611
+ .quad 0x0915e76061bce52f
3612
+ .quad 0xe7c1f5d9c9a2911a
3613
+ .quad 0xb8a371788bcca7d7
3614
+ .quad 0x636412190eb62a32
3615
+ .quad 0x26907c5c2ecc4e95
3616
+
3617
+ #if defined(__linux__) && defined(__ELF__)
3618
+ .section .note.GNU-stack, "", %progbits
3619
+ #endif