aws-crt 0.1.9 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (581) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/VERSION +1 -1
  4. data/aws-crt-ffi/crt/aws-c-auth/include/aws/auth/auth.h +1 -0
  5. data/aws-crt-ffi/crt/aws-c-auth/include/aws/auth/aws_imds_client.h +5 -0
  6. data/aws-crt-ffi/crt/aws-c-auth/include/aws/auth/credentials.h +5 -0
  7. data/aws-crt-ffi/crt/aws-c-auth/include/aws/auth/private/aws_signing.h +1 -0
  8. data/aws-crt-ffi/crt/aws-c-auth/include/aws/auth/private/credentials_utils.h +2 -0
  9. data/aws-crt-ffi/crt/aws-c-auth/include/aws/auth/signing_config.h +1 -0
  10. data/aws-crt-ffi/crt/aws-c-auth/source/auth.c +3 -1
  11. data/aws-crt-ffi/crt/aws-c-auth/source/aws_imds_client.c +146 -63
  12. data/aws-crt-ffi/crt/aws-c-auth/source/aws_signing.c +41 -19
  13. data/aws-crt-ffi/crt/aws-c-auth/source/credentials_provider_imds.c +1 -0
  14. data/aws-crt-ffi/crt/aws-c-auth/source/credentials_utils.c +1 -0
  15. data/aws-crt-ffi/crt/aws-c-auth/source/signable_http_request.c +2 -1
  16. data/aws-crt-ffi/crt/aws-c-auth/source/signing_config.c +25 -0
  17. data/aws-crt-ffi/crt/aws-c-auth/tests/CMakeLists.txt +3 -0
  18. data/aws-crt-ffi/crt/aws-c-auth/tests/aws_imds_client_test.c +197 -31
  19. data/aws-crt-ffi/crt/aws-c-auth/tests/credentials_provider_imds_tests.c +16 -18
  20. data/aws-crt-ffi/crt/aws-c-auth/tests/sigv4_signing_tests.c +3 -1
  21. data/aws-crt-ffi/crt/aws-c-cal/include/aws/cal/private/opensslcrypto_common.h +22 -0
  22. data/aws-crt-ffi/crt/aws-c-cal/source/darwin/commoncrypto_aes.c +46 -17
  23. data/aws-crt-ffi/crt/aws-c-cal/source/unix/openssl_aes.c +1 -0
  24. data/aws-crt-ffi/crt/aws-c-cal/source/unix/openssl_platform_init.c +7 -0
  25. data/aws-crt-ffi/crt/aws-c-cal/source/unix/openssl_rsa.c +59 -2
  26. data/aws-crt-ffi/crt/aws-c-cal/source/unix/opensslcrypto_ecc.c +1 -0
  27. data/aws-crt-ffi/crt/aws-c-common/CMakeLists.txt +13 -1
  28. data/aws-crt-ffi/crt/aws-c-common/THIRD-PARTY-LICENSES.txt +28 -7
  29. data/aws-crt-ffi/crt/aws-c-common/bin/system_info/CMakeLists.txt +18 -0
  30. data/aws-crt-ffi/crt/aws-c-common/bin/system_info/print_system_info.c +48 -0
  31. data/aws-crt-ffi/crt/aws-c-common/include/aws/common/allocator.h +23 -0
  32. data/aws-crt-ffi/crt/aws-c-common/include/aws/common/byte_buf.h +12 -0
  33. data/aws-crt-ffi/crt/aws-c-common/include/aws/common/cross_process_lock.h +35 -0
  34. data/aws-crt-ffi/crt/aws-c-common/include/aws/common/hash_table.h +1 -0
  35. data/aws-crt-ffi/crt/aws-c-common/include/aws/common/priority_queue.h +24 -0
  36. data/aws-crt-ffi/crt/aws-c-common/include/aws/common/private/system_info_priv.h +37 -0
  37. data/aws-crt-ffi/crt/aws-c-common/include/aws/common/system_info.h +47 -0
  38. data/aws-crt-ffi/crt/aws-c-common/include/aws/common/system_resource_util.h +30 -0
  39. data/aws-crt-ffi/crt/aws-c-common/include/aws/testing/aws_test_harness.h +3 -2
  40. data/aws-crt-ffi/crt/aws-c-common/source/allocator.c +64 -13
  41. data/aws-crt-ffi/crt/aws-c-common/source/android/logging.c +14 -0
  42. data/aws-crt-ffi/crt/aws-c-common/source/common.c +3 -3
  43. data/aws-crt-ffi/crt/aws-c-common/source/file.c +96 -35
  44. data/aws-crt-ffi/crt/aws-c-common/source/linux/system_info.c +24 -0
  45. data/aws-crt-ffi/crt/aws-c-common/source/memtrace.c +10 -3
  46. data/aws-crt-ffi/crt/aws-c-common/source/platform_fallback_stubs/system_info.c +21 -0
  47. data/aws-crt-ffi/crt/aws-c-common/source/posix/cross_process_lock.c +141 -0
  48. data/aws-crt-ffi/crt/aws-c-common/source/posix/system_info.c +1 -1
  49. data/aws-crt-ffi/crt/aws-c-common/source/posix/system_resource_utils.c +32 -0
  50. data/aws-crt-ffi/crt/aws-c-common/source/priority_queue.c +24 -0
  51. data/aws-crt-ffi/crt/aws-c-common/source/system_info.c +80 -0
  52. data/aws-crt-ffi/crt/aws-c-common/source/task_scheduler.c +2 -2
  53. data/aws-crt-ffi/crt/aws-c-common/source/windows/cross_process_lock.c +93 -0
  54. data/aws-crt-ffi/crt/aws-c-common/source/windows/system_resource_utils.c +31 -0
  55. data/aws-crt-ffi/crt/aws-c-common/tests/CMakeLists.txt +16 -0
  56. data/aws-crt-ffi/crt/aws-c-common/tests/alloc_test.c +83 -22
  57. data/aws-crt-ffi/crt/aws-c-common/tests/cross_process_lock_tests.c +116 -0
  58. data/aws-crt-ffi/crt/aws-c-common/tests/file_test.c +103 -0
  59. data/aws-crt-ffi/crt/aws-c-common/tests/priority_queue_test.c +36 -0
  60. data/aws-crt-ffi/crt/aws-c-common/tests/system_info_tests.c +19 -0
  61. data/aws-crt-ffi/crt/aws-c-common/tests/system_resource_util_test.c +37 -0
  62. data/aws-crt-ffi/crt/aws-c-http/include/aws/http/connection.h +9 -0
  63. data/aws-crt-ffi/crt/aws-c-http/include/aws/http/http.h +1 -0
  64. data/aws-crt-ffi/crt/aws-c-http/include/aws/http/private/connection_impl.h +5 -4
  65. data/aws-crt-ffi/crt/aws-c-http/include/aws/http/private/connection_manager_system_vtable.h +10 -18
  66. data/aws-crt-ffi/crt/aws-c-http/include/aws/http/private/proxy_impl.h +5 -1
  67. data/aws-crt-ffi/crt/aws-c-http/include/aws/http/private/request_response_impl.h +5 -0
  68. data/aws-crt-ffi/crt/aws-c-http/include/aws/http/request_response.h +10 -0
  69. data/aws-crt-ffi/crt/aws-c-http/source/connection.c +5 -2
  70. data/aws-crt-ffi/crt/aws-c-http/source/connection_manager.c +22 -21
  71. data/aws-crt-ffi/crt/aws-c-http/source/h1_connection.c +102 -17
  72. data/aws-crt-ffi/crt/aws-c-http/source/h1_stream.c +1 -0
  73. data/aws-crt-ffi/crt/aws-c-http/source/http.c +3 -0
  74. data/aws-crt-ffi/crt/aws-c-http/source/proxy_connection.c +2 -2
  75. data/aws-crt-ffi/crt/aws-c-http/tests/CMakeLists.txt +2 -0
  76. data/aws-crt-ffi/crt/aws-c-http/tests/test_connection_manager.c +18 -18
  77. data/aws-crt-ffi/crt/aws-c-http/tests/test_h1_client.c +111 -1
  78. data/aws-crt-ffi/crt/aws-c-http/tests/test_proxy.c +2 -2
  79. data/aws-crt-ffi/crt/aws-c-http/tests/test_stream_manager.c +2 -2
  80. data/aws-crt-ffi/crt/aws-c-io/include/aws/io/retry_strategy.h +1 -1
  81. data/aws-crt-ffi/crt/aws-c-io/source/exponential_backoff_retry_strategy.c +1 -1
  82. data/aws-crt-ffi/crt/aws-c-io/source/pkcs11_tls_op_handler.c +2 -4
  83. data/aws-crt-ffi/crt/aws-lc/CMakeLists.txt +16 -8
  84. data/aws-crt-ffi/crt/aws-lc/cmake/go.cmake +6 -0
  85. data/aws-crt-ffi/crt/aws-lc/crypto/CMakeLists.txt +6 -9
  86. data/aws-crt-ffi/crt/aws-lc/crypto/asn1/a_time.c +34 -1
  87. data/aws-crt-ffi/crt/aws-lc/crypto/asn1/a_utctm.c +4 -1
  88. data/aws-crt-ffi/crt/aws-lc/crypto/asn1/asn1_test.cc +41 -0
  89. data/aws-crt-ffi/crt/aws-lc/crypto/bio/bio_mem.c +6 -7
  90. data/aws-crt-ffi/crt/aws-lc/crypto/bio/bio_test.cc +152 -16
  91. data/aws-crt-ffi/crt/aws-lc/crypto/bio/connect.c +6 -12
  92. data/aws-crt-ffi/crt/aws-lc/crypto/bio/fd.c +2 -2
  93. data/aws-crt-ffi/crt/aws-lc/crypto/bio/file.c +20 -8
  94. data/aws-crt-ffi/crt/aws-lc/crypto/bio/socket.c +2 -2
  95. data/aws-crt-ffi/crt/aws-lc/crypto/bio/socket_helper.c +2 -2
  96. data/aws-crt-ffi/crt/aws-lc/crypto/blake2/blake2.c +11 -1
  97. data/aws-crt-ffi/crt/aws-lc/crypto/bytestring/cbb.c +13 -3
  98. data/aws-crt-ffi/crt/aws-lc/crypto/bytestring/cbs.c +9 -0
  99. data/aws-crt-ffi/crt/aws-lc/crypto/chacha/asm/chacha-armv8.pl +1 -1
  100. data/aws-crt-ffi/crt/aws-lc/crypto/chacha/chacha.c +49 -8
  101. data/aws-crt-ffi/crt/aws-lc/crypto/chacha/chacha_test.cc +110 -0
  102. data/aws-crt-ffi/crt/aws-lc/crypto/chacha/internal.h +8 -1
  103. data/aws-crt-ffi/crt/aws-lc/crypto/compiler_test.cc +4 -1
  104. data/aws-crt-ffi/crt/aws-lc/crypto/conf/conf_test.cc +1 -0
  105. data/aws-crt-ffi/crt/aws-lc/crypto/crypto_test.cc +9 -0
  106. data/aws-crt-ffi/crt/aws-lc/crypto/curve25519/curve25519.c +189 -108
  107. data/aws-crt-ffi/crt/aws-lc/crypto/curve25519/curve25519_nohw.c +78 -6
  108. data/aws-crt-ffi/crt/aws-lc/crypto/curve25519/ed25519_test.cc +9 -0
  109. data/aws-crt-ffi/crt/aws-lc/crypto/curve25519/internal.h +24 -10
  110. data/aws-crt-ffi/crt/aws-lc/crypto/curve25519/spake25519.c +4 -4
  111. data/aws-crt-ffi/crt/aws-lc/crypto/curve25519/x25519_test.cc +80 -11
  112. data/aws-crt-ffi/crt/aws-lc/crypto/decrepit/evp/evp_do_all.c +2 -0
  113. data/aws-crt-ffi/crt/aws-lc/crypto/digest_extra/digest_extra.c +8 -0
  114. data/aws-crt-ffi/crt/aws-lc/crypto/digest_extra/digest_test.cc +110 -45
  115. data/aws-crt-ffi/crt/aws-lc/crypto/dsa/dsa_test.cc +8 -2
  116. data/aws-crt-ffi/crt/aws-lc/crypto/dsa/internal.h +18 -0
  117. data/aws-crt-ffi/crt/aws-lc/crypto/dynamic_loading_test.c +8 -5
  118. data/aws-crt-ffi/crt/aws-lc/crypto/ec_extra/ec_derive.c +4 -3
  119. data/aws-crt-ffi/crt/aws-lc/crypto/ec_extra/hash_to_curve.c +6 -18
  120. data/aws-crt-ffi/crt/aws-lc/crypto/endian_test.cc +308 -0
  121. data/aws-crt-ffi/crt/aws-lc/crypto/err/ssl.errordata +2 -0
  122. data/aws-crt-ffi/crt/aws-lc/crypto/evp_extra/evp_extra_test.cc +2 -0
  123. data/aws-crt-ffi/crt/aws-lc/crypto/evp_extra/evp_test.cc +11 -1
  124. data/aws-crt-ffi/crt/aws-lc/crypto/evp_extra/evp_tests.txt +25 -0
  125. data/aws-crt-ffi/crt/aws-lc/crypto/evp_extra/p_ec_asn1.c +1 -1
  126. data/aws-crt-ffi/crt/aws-lc/crypto/evp_extra/p_kem.c +2 -2
  127. data/aws-crt-ffi/crt/aws-lc/crypto/evp_extra/p_rsa_asn1.c +1 -0
  128. data/aws-crt-ffi/crt/aws-lc/crypto/evp_extra/print.c +7 -6
  129. data/aws-crt-ffi/crt/aws-lc/crypto/evp_extra/scrypt.c +13 -1
  130. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/CMakeLists.txt +13 -4
  131. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/aes/aes_nohw.c +18 -6
  132. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/bcm.c +12 -4
  133. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/bn/bn_assert_test.cc +77 -0
  134. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/bn/bn_test.cc +30 -0
  135. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/bn/bytes.c +112 -22
  136. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/bn/div.c +12 -5
  137. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/bn/exponentiation.c +54 -1
  138. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/bn/gcd.c +5 -6
  139. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/bn/internal.h +37 -15
  140. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/bn/montgomery.c +4 -11
  141. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/bn/montgomery_inv.c +51 -15
  142. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/cipher/aead.c +2 -2
  143. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/digest/digest.c +29 -6
  144. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/digest/digests.c +89 -0
  145. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/digest/internal.h +4 -0
  146. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/ec/ec.c +19 -36
  147. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/ec/ec_key.c +3 -3
  148. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/ec/ec_montgomery.c +9 -7
  149. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/ec/ec_test.cc +33 -9
  150. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/ec/internal.h +17 -12
  151. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/ec/p224-64.c +5 -8
  152. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/ec/p256-nistz.c +8 -8
  153. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/ec/p256.c +9 -8
  154. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/ec/p384.c +33 -16
  155. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/ec/p521.c +14 -6
  156. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/ec/scalar.c +26 -24
  157. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/ec/simple_mul.c +8 -5
  158. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/ec/wnaf.c +3 -3
  159. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/ecdsa/ecdsa.c +9 -3
  160. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/evp/evp.c +43 -12
  161. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/evp/p_ec.c +4 -3
  162. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/hmac/hmac.c +3 -1
  163. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/modes/xts.c +26 -3
  164. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/rand/cpu_jitter_test.cc +1 -1
  165. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/rand/internal.h +20 -11
  166. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/rand/rand.c +10 -10
  167. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/rand/urandom.c +2 -2
  168. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/rsa/internal.h +59 -0
  169. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/rsa/padding.c +9 -3
  170. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/rsa/rsa.c +7 -0
  171. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/rsa/rsa_impl.c +51 -60
  172. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/service_indicator/service_indicator.c +5 -2
  173. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/service_indicator/service_indicator_test.cc +205 -5
  174. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/sha/asm/sha1-armv8.pl +1 -1
  175. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/sha/asm/sha512-armv8.pl +1 -1
  176. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/sha/internal.h +8 -0
  177. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/sha/sha3.c +37 -15
  178. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/sha/sha3_test.cc +115 -110
  179. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/sha/sha512.c +55 -1
  180. data/aws-crt-ffi/crt/aws-lc/crypto/fipsmodule/sshkdf/sshkdf.c +2 -2
  181. data/aws-crt-ffi/crt/aws-lc/crypto/hmac_extra/hmac_test.cc +12 -0
  182. data/aws-crt-ffi/crt/aws-lc/crypto/hmac_extra/hmac_tests.txt +10 -0
  183. data/aws-crt-ffi/crt/aws-lc/crypto/hrss/asm/poly_rq_mul.S +2 -6
  184. data/aws-crt-ffi/crt/aws-lc/crypto/impl_dispatch_test.cc +9 -1
  185. data/aws-crt-ffi/crt/aws-lc/crypto/internal.h +90 -8
  186. data/aws-crt-ffi/crt/aws-lc/crypto/kem/kem.c +28 -27
  187. data/aws-crt-ffi/crt/aws-lc/crypto/kyber/kem_kyber.h +14 -0
  188. data/aws-crt-ffi/crt/aws-lc/crypto/obj/obj_dat.h +52 -2
  189. data/aws-crt-ffi/crt/aws-lc/crypto/obj/obj_mac.num +5 -0
  190. data/aws-crt-ffi/crt/aws-lc/crypto/obj/objects.txt +7 -0
  191. data/aws-crt-ffi/crt/aws-lc/crypto/perlasm/arm-xlate.pl +3 -14
  192. data/aws-crt-ffi/crt/aws-lc/crypto/perlasm/ppc-xlate.pl +1 -5
  193. data/aws-crt-ffi/crt/aws-lc/crypto/perlasm/x86_64-xlate.pl +4 -15
  194. data/aws-crt-ffi/crt/aws-lc/crypto/perlasm/x86asm.pl +4 -13
  195. data/aws-crt-ffi/crt/aws-lc/crypto/poly1305/poly1305_arm_asm.S +3 -13
  196. data/aws-crt-ffi/crt/aws-lc/crypto/rand_extra/deterministic.c +4 -3
  197. data/aws-crt-ffi/crt/aws-lc/crypto/rand_extra/fuchsia.c +4 -4
  198. data/aws-crt-ffi/crt/aws-lc/crypto/rand_extra/rand_test.cc +0 -63
  199. data/aws-crt-ffi/crt/aws-lc/crypto/rand_extra/windows.c +41 -19
  200. data/aws-crt-ffi/crt/aws-lc/crypto/rsa_extra/rsa_test.cc +3 -3
  201. data/aws-crt-ffi/crt/aws-lc/crypto/siphash/siphash.c +12 -5
  202. data/aws-crt-ffi/crt/aws-lc/crypto/siphash/siphash_test.cc +5 -5
  203. data/aws-crt-ffi/crt/aws-lc/crypto/stack/stack.c +68 -46
  204. data/aws-crt-ffi/crt/aws-lc/crypto/trust_token/pmbtoken.c +4 -4
  205. data/aws-crt-ffi/crt/aws-lc/crypto/trust_token/voprf.c +2 -2
  206. data/aws-crt-ffi/crt/aws-lc/crypto/x509/by_dir.c +0 -6
  207. data/aws-crt-ffi/crt/aws-lc/crypto/x509/internal.h +4 -1
  208. data/aws-crt-ffi/crt/aws-lc/crypto/x509/x509_lu.c +33 -9
  209. data/aws-crt-ffi/crt/aws-lc/crypto/x509/x509_test.cc +87 -0
  210. data/aws-crt-ffi/crt/aws-lc/crypto/x509/x509_trs.c +1 -1
  211. data/aws-crt-ffi/crt/aws-lc/crypto/x509/x509_vfy.c +35 -13
  212. data/aws-crt-ffi/crt/aws-lc/crypto/x509v3/v3_lib.c +2 -0
  213. data/aws-crt-ffi/crt/aws-lc/crypto/x509v3/v3_purp.c +4 -6
  214. data/aws-crt-ffi/crt/aws-lc/generated-src/crypto_test_data.cc +179 -151
  215. data/aws-crt-ffi/crt/aws-lc/generated-src/err_data.c +353 -349
  216. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-aarch64/crypto/chacha/chacha-armv8.S +4 -14
  217. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-aarch64/crypto/cipher_extra/chacha20_poly1305_armv8.S +4 -14
  218. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-aarch64/crypto/fipsmodule/aesv8-armx.S +3 -13
  219. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-aarch64/crypto/fipsmodule/aesv8-gcm-armv8-unroll8.S +3 -13
  220. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-aarch64/crypto/fipsmodule/aesv8-gcm-armv8.S +3 -13
  221. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-aarch64/crypto/fipsmodule/armv8-mont.S +4 -14
  222. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-aarch64/crypto/fipsmodule/bn-armv8.S +4 -14
  223. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-aarch64/crypto/fipsmodule/ghash-neon-armv8.S +4 -14
  224. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-aarch64/crypto/fipsmodule/ghashv8-armx.S +3 -13
  225. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-aarch64/crypto/fipsmodule/keccak1600-armv8.S +3 -13
  226. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-aarch64/crypto/fipsmodule/md5-armv8.S +3 -13
  227. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-aarch64/crypto/fipsmodule/p256-armv8-asm.S +4 -14
  228. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-aarch64/crypto/fipsmodule/p256_beeu-armv8-asm.S +4 -14
  229. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-aarch64/crypto/fipsmodule/sha1-armv8.S +4 -14
  230. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-aarch64/crypto/fipsmodule/sha256-armv8.S +4 -14
  231. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-aarch64/crypto/fipsmodule/sha512-armv8.S +4 -14
  232. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-aarch64/crypto/fipsmodule/vpaes-armv8.S +3 -13
  233. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-aarch64/crypto/test/trampoline-armv8.S +4 -14
  234. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-arm/crypto/chacha/chacha-armv4.S +3 -13
  235. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-arm/crypto/fipsmodule/aesv8-armx.S +3 -13
  236. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-arm/crypto/fipsmodule/armv4-mont.S +3 -13
  237. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-arm/crypto/fipsmodule/bsaes-armv7.S +3 -13
  238. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-arm/crypto/fipsmodule/ghash-armv4.S +3 -13
  239. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-arm/crypto/fipsmodule/ghashv8-armx.S +3 -13
  240. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-arm/crypto/fipsmodule/sha1-armv4-large.S +3 -13
  241. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-arm/crypto/fipsmodule/sha256-armv4.S +3 -13
  242. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-arm/crypto/fipsmodule/sha512-armv4.S +3 -13
  243. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-arm/crypto/fipsmodule/vpaes-armv7.S +3 -13
  244. data/aws-crt-ffi/crt/aws-lc/generated-src/ios-arm/crypto/test/trampoline-armv4.S +3 -13
  245. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-aarch64/crypto/chacha/chacha-armv8.S +4 -14
  246. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-aarch64/crypto/cipher_extra/chacha20_poly1305_armv8.S +4 -14
  247. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-aarch64/crypto/fipsmodule/aesv8-armx.S +3 -13
  248. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-aarch64/crypto/fipsmodule/aesv8-gcm-armv8-unroll8.S +3 -13
  249. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-aarch64/crypto/fipsmodule/aesv8-gcm-armv8.S +3 -13
  250. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-aarch64/crypto/fipsmodule/armv8-mont.S +4 -14
  251. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-aarch64/crypto/fipsmodule/bn-armv8.S +3 -13
  252. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-aarch64/crypto/fipsmodule/ghash-neon-armv8.S +3 -13
  253. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-aarch64/crypto/fipsmodule/ghashv8-armx.S +3 -13
  254. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-aarch64/crypto/fipsmodule/keccak1600-armv8.S +3 -13
  255. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-aarch64/crypto/fipsmodule/md5-armv8.S +3 -13
  256. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-aarch64/crypto/fipsmodule/p256-armv8-asm.S +4 -14
  257. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-aarch64/crypto/fipsmodule/p256_beeu-armv8-asm.S +4 -14
  258. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-aarch64/crypto/fipsmodule/sha1-armv8.S +4 -14
  259. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-aarch64/crypto/fipsmodule/sha256-armv8.S +4 -14
  260. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-aarch64/crypto/fipsmodule/sha512-armv8.S +4 -14
  261. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-aarch64/crypto/fipsmodule/vpaes-armv8.S +3 -13
  262. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-aarch64/crypto/test/trampoline-armv8.S +3 -13
  263. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-arm/crypto/chacha/chacha-armv4.S +3 -13
  264. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-arm/crypto/fipsmodule/aesv8-armx.S +3 -13
  265. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-arm/crypto/fipsmodule/armv4-mont.S +3 -13
  266. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-arm/crypto/fipsmodule/bsaes-armv7.S +3 -13
  267. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-arm/crypto/fipsmodule/ghash-armv4.S +3 -13
  268. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-arm/crypto/fipsmodule/ghashv8-armx.S +3 -13
  269. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-arm/crypto/fipsmodule/sha1-armv4-large.S +3 -13
  270. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-arm/crypto/fipsmodule/sha256-armv4.S +3 -13
  271. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-arm/crypto/fipsmodule/sha512-armv4.S +3 -13
  272. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-arm/crypto/fipsmodule/vpaes-armv7.S +3 -13
  273. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-arm/crypto/test/trampoline-armv4.S +3 -13
  274. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-ppc64le/crypto/fipsmodule/aesp8-ppc.S +1 -5
  275. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-ppc64le/crypto/fipsmodule/ghashp8-ppc.S +1 -5
  276. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-ppc64le/crypto/test/trampoline-ppc.S +1 -5
  277. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86/crypto/chacha/chacha-x86.S +3 -12
  278. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86/crypto/fipsmodule/aesni-x86.S +3 -12
  279. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86/crypto/fipsmodule/bn-586.S +4 -13
  280. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86/crypto/fipsmodule/co-586.S +4 -13
  281. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86/crypto/fipsmodule/ghash-ssse3-x86.S +3 -12
  282. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86/crypto/fipsmodule/ghash-x86.S +3 -12
  283. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86/crypto/fipsmodule/md5-586.S +4 -13
  284. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86/crypto/fipsmodule/sha1-586.S +4 -13
  285. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86/crypto/fipsmodule/sha256-586.S +3 -12
  286. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86/crypto/fipsmodule/sha512-586.S +3 -12
  287. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86/crypto/fipsmodule/vpaes-x86.S +3 -12
  288. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86/crypto/fipsmodule/x86-mont.S +3 -12
  289. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86/crypto/test/trampoline-x86.S +3 -12
  290. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86_64/crypto/chacha/chacha-x86_64.S +2 -11
  291. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S +2 -11
  292. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86_64/crypto/cipher_extra/aesni-sha1-x86_64.S +2 -11
  293. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86_64/crypto/cipher_extra/aesni-sha256-x86_64.S +2 -11
  294. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S +2 -11
  295. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86_64/crypto/fipsmodule/aesni-gcm-avx512.S +2 -11
  296. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S +2 -11
  297. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86_64/crypto/fipsmodule/aesni-x86_64.S +2 -11
  298. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86_64/crypto/fipsmodule/aesni-xts-avx512.S +2 -11
  299. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S +2 -11
  300. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86_64/crypto/fipsmodule/ghash-x86_64.S +2 -11
  301. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86_64/crypto/fipsmodule/md5-x86_64.S +2 -11
  302. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86_64/crypto/fipsmodule/p256-x86_64-asm.S +2 -11
  303. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S +2 -11
  304. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86_64/crypto/fipsmodule/rdrand-x86_64.S +2 -11
  305. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86_64/crypto/fipsmodule/rsaz-avx2.S +2 -11
  306. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86_64/crypto/fipsmodule/sha1-x86_64.S +2 -11
  307. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86_64/crypto/fipsmodule/sha256-x86_64.S +2 -11
  308. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86_64/crypto/fipsmodule/sha512-x86_64.S +2 -11
  309. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86_64/crypto/fipsmodule/vpaes-x86_64.S +2 -11
  310. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86_64/crypto/fipsmodule/x86_64-mont.S +2 -11
  311. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86_64/crypto/fipsmodule/x86_64-mont5.S +2 -11
  312. data/aws-crt-ffi/crt/aws-lc/generated-src/linux-x86_64/crypto/test/trampoline-x86_64.S +2 -11
  313. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86/crypto/chacha/chacha-x86.S +3 -12
  314. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86/crypto/fipsmodule/aesni-x86.S +3 -12
  315. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86/crypto/fipsmodule/bn-586.S +3 -12
  316. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86/crypto/fipsmodule/co-586.S +3 -12
  317. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86/crypto/fipsmodule/ghash-ssse3-x86.S +3 -12
  318. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86/crypto/fipsmodule/ghash-x86.S +3 -12
  319. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86/crypto/fipsmodule/md5-586.S +3 -12
  320. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86/crypto/fipsmodule/sha1-586.S +3 -12
  321. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86/crypto/fipsmodule/sha256-586.S +3 -12
  322. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86/crypto/fipsmodule/sha512-586.S +3 -12
  323. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86/crypto/fipsmodule/vpaes-x86.S +3 -12
  324. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86/crypto/fipsmodule/x86-mont.S +3 -12
  325. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86/crypto/test/trampoline-x86.S +3 -12
  326. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86_64/crypto/chacha/chacha-x86_64.S +2 -11
  327. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S +2 -11
  328. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86_64/crypto/cipher_extra/aesni-sha1-x86_64.S +2 -11
  329. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86_64/crypto/cipher_extra/aesni-sha256-x86_64.S +2 -11
  330. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S +2 -11
  331. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86_64/crypto/fipsmodule/aesni-gcm-avx512.S +2 -11
  332. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S +2 -11
  333. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86_64/crypto/fipsmodule/aesni-x86_64.S +2 -11
  334. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86_64/crypto/fipsmodule/aesni-xts-avx512.S +2 -11
  335. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S +2 -11
  336. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86_64/crypto/fipsmodule/ghash-x86_64.S +2 -11
  337. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86_64/crypto/fipsmodule/md5-x86_64.S +2 -11
  338. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86_64/crypto/fipsmodule/p256-x86_64-asm.S +2 -11
  339. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S +2 -11
  340. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86_64/crypto/fipsmodule/rdrand-x86_64.S +2 -11
  341. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86_64/crypto/fipsmodule/rsaz-avx2.S +2 -11
  342. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86_64/crypto/fipsmodule/sha1-x86_64.S +2 -11
  343. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86_64/crypto/fipsmodule/sha256-x86_64.S +2 -11
  344. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86_64/crypto/fipsmodule/sha512-x86_64.S +2 -11
  345. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86_64/crypto/fipsmodule/vpaes-x86_64.S +2 -11
  346. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86_64/crypto/fipsmodule/x86_64-mont.S +2 -11
  347. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86_64/crypto/fipsmodule/x86_64-mont5.S +2 -11
  348. data/aws-crt-ffi/crt/aws-lc/generated-src/mac-x86_64/crypto/test/trampoline-x86_64.S +2 -11
  349. data/aws-crt-ffi/crt/aws-lc/generated-src/win-aarch64/crypto/chacha/chacha-armv8.S +4 -14
  350. data/aws-crt-ffi/crt/aws-lc/generated-src/win-aarch64/crypto/cipher_extra/chacha20_poly1305_armv8.S +4 -14
  351. data/aws-crt-ffi/crt/aws-lc/generated-src/win-aarch64/crypto/fipsmodule/aesv8-armx.S +3 -13
  352. data/aws-crt-ffi/crt/aws-lc/generated-src/win-aarch64/crypto/fipsmodule/aesv8-gcm-armv8-unroll8.S +3 -13
  353. data/aws-crt-ffi/crt/aws-lc/generated-src/win-aarch64/crypto/fipsmodule/aesv8-gcm-armv8.S +3 -13
  354. data/aws-crt-ffi/crt/aws-lc/generated-src/win-aarch64/crypto/fipsmodule/armv8-mont.S +4 -14
  355. data/aws-crt-ffi/crt/aws-lc/generated-src/win-aarch64/crypto/fipsmodule/bn-armv8.S +4 -14
  356. data/aws-crt-ffi/crt/aws-lc/generated-src/win-aarch64/crypto/fipsmodule/ghash-neon-armv8.S +4 -14
  357. data/aws-crt-ffi/crt/aws-lc/generated-src/win-aarch64/crypto/fipsmodule/ghashv8-armx.S +3 -13
  358. data/aws-crt-ffi/crt/aws-lc/generated-src/win-aarch64/crypto/fipsmodule/keccak1600-armv8.S +3 -13
  359. data/aws-crt-ffi/crt/aws-lc/generated-src/win-aarch64/crypto/fipsmodule/md5-armv8.S +3 -13
  360. data/aws-crt-ffi/crt/aws-lc/generated-src/win-aarch64/crypto/fipsmodule/p256-armv8-asm.S +4 -14
  361. data/aws-crt-ffi/crt/aws-lc/generated-src/win-aarch64/crypto/fipsmodule/p256_beeu-armv8-asm.S +4 -14
  362. data/aws-crt-ffi/crt/aws-lc/generated-src/win-aarch64/crypto/fipsmodule/sha1-armv8.S +4 -14
  363. data/aws-crt-ffi/crt/aws-lc/generated-src/win-aarch64/crypto/fipsmodule/sha256-armv8.S +4 -14
  364. data/aws-crt-ffi/crt/aws-lc/generated-src/win-aarch64/crypto/fipsmodule/sha512-armv8.S +4 -14
  365. data/aws-crt-ffi/crt/aws-lc/generated-src/win-aarch64/crypto/fipsmodule/vpaes-armv8.S +3 -13
  366. data/aws-crt-ffi/crt/aws-lc/generated-src/win-aarch64/crypto/test/trampoline-armv8.S +4 -14
  367. data/aws-crt-ffi/crt/aws-lc/go.mod +4 -4
  368. data/aws-crt-ffi/crt/aws-lc/go.sum +8 -10
  369. data/aws-crt-ffi/crt/aws-lc/include/openssl/aead.h +2 -2
  370. data/aws-crt-ffi/crt/aws-lc/include/openssl/arm_arch.h +4 -119
  371. data/aws-crt-ffi/crt/aws-lc/include/openssl/asm_base.h +185 -0
  372. data/aws-crt-ffi/crt/aws-lc/include/openssl/asn1.h +5 -0
  373. data/aws-crt-ffi/crt/aws-lc/include/openssl/base.h +31 -134
  374. data/aws-crt-ffi/crt/aws-lc/include/openssl/bio.h +30 -18
  375. data/aws-crt-ffi/crt/aws-lc/include/openssl/bn.h +0 -2
  376. data/aws-crt-ffi/crt/aws-lc/include/openssl/chacha.h +6 -0
  377. data/aws-crt-ffi/crt/aws-lc/include/openssl/cipher.h +2 -2
  378. data/aws-crt-ffi/crt/aws-lc/include/openssl/digest.h +9 -6
  379. data/aws-crt-ffi/crt/aws-lc/include/openssl/dsa.h +0 -21
  380. data/aws-crt-ffi/crt/aws-lc/include/openssl/ec.h +1 -1
  381. data/aws-crt-ffi/crt/aws-lc/include/openssl/err.h +1 -1
  382. data/aws-crt-ffi/crt/aws-lc/include/openssl/evp.h +8 -5
  383. data/aws-crt-ffi/crt/aws-lc/include/openssl/nid.h +21 -0
  384. data/aws-crt-ffi/crt/aws-lc/include/openssl/rsa.h +1 -65
  385. data/aws-crt-ffi/crt/aws-lc/include/openssl/sha.h +22 -1
  386. data/aws-crt-ffi/crt/aws-lc/include/openssl/ssl.h +121 -13
  387. data/aws-crt-ffi/crt/aws-lc/include/openssl/stack.h +229 -208
  388. data/aws-crt-ffi/crt/aws-lc/include/openssl/target.h +166 -0
  389. data/aws-crt-ffi/crt/aws-lc/include/openssl/x509.h +30 -10
  390. data/aws-crt-ffi/crt/aws-lc/include/openssl/x509v3.h +6 -4
  391. data/aws-crt-ffi/crt/aws-lc/sources.cmake +2 -0
  392. data/aws-crt-ffi/crt/aws-lc/ssl/extensions.cc +12 -7
  393. data/aws-crt-ffi/crt/aws-lc/ssl/handshake_server.cc +28 -18
  394. data/aws-crt-ffi/crt/aws-lc/ssl/internal.h +41 -6
  395. data/aws-crt-ffi/crt/aws-lc/ssl/s3_both.cc +9 -17
  396. data/aws-crt-ffi/crt/aws-lc/ssl/ssl_cipher.cc +13 -5
  397. data/aws-crt-ffi/crt/aws-lc/ssl/ssl_key_share.cc +542 -2
  398. data/aws-crt-ffi/crt/aws-lc/ssl/ssl_lib.cc +35 -0
  399. data/aws-crt-ffi/crt/aws-lc/ssl/ssl_test.cc +1847 -14
  400. data/aws-crt-ffi/crt/aws-lc/ssl/ssl_x509.cc +128 -0
  401. data/aws-crt-ffi/crt/aws-lc/ssl/test/PORTING.md +10 -7
  402. data/aws-crt-ffi/crt/aws-lc/ssl/test/bssl_shim.cc +133 -77
  403. data/aws-crt-ffi/crt/aws-lc/ssl/test/handshake_util.cc +3 -3
  404. data/aws-crt-ffi/crt/aws-lc/ssl/test/handshaker.cc +4 -0
  405. data/aws-crt-ffi/crt/aws-lc/ssl/test/runner/handshake_client.go +6 -2
  406. data/aws-crt-ffi/crt/aws-lc/ssl/test/runner/handshake_messages.go +894 -1042
  407. data/aws-crt-ffi/crt/aws-lc/ssl/test/runner/handshake_server.go +24 -23
  408. data/aws-crt-ffi/crt/aws-lc/ssl/test/runner/prf.go +6 -5
  409. data/aws-crt-ffi/crt/aws-lc/ssl/test/runner/runner.go +56 -55
  410. data/aws-crt-ffi/crt/aws-lc/ssl/test/runner/shim_dispatcher.go +188 -0
  411. data/aws-crt-ffi/crt/aws-lc/ssl/test/runner/ticket.go +37 -39
  412. data/aws-crt-ffi/crt/aws-lc/ssl/test/test_config.cc +59 -24
  413. data/aws-crt-ffi/crt/aws-lc/ssl/test/test_config.h +3 -2
  414. data/aws-crt-ffi/crt/aws-lc/ssl/tls13_server.cc +10 -11
  415. data/aws-crt-ffi/crt/aws-lc/tests/ci/cdk/app.py +4 -4
  416. data/aws-crt-ffi/crt/aws-lc/tests/ci/cdk/cdk/{aws_lc_mac_arm_ci_stack.py → aws_lc_ec2_test_framework_ci_stack.py} +13 -29
  417. data/aws-crt-ffi/crt/aws-lc/tests/ci/cdk/cdk/ssm/general_test_run_ssm_document.yaml +43 -0
  418. data/aws-crt-ffi/crt/aws-lc/tests/ci/common_posix_setup.sh +10 -0
  419. data/aws-crt-ffi/crt/aws-lc/tests/ci/docker_images/linux-aarch/amazonlinux-2023_base/Dockerfile +5 -1
  420. data/aws-crt-ffi/crt/aws-lc/tests/ci/docker_images/linux-aarch/ubuntu-22.04_base/Dockerfile +19 -3
  421. data/aws-crt-ffi/crt/aws-lc/tests/ci/docker_images/linux-x86/amazonlinux-2_gcc-7x-intel-sde/Dockerfile +5 -4
  422. data/aws-crt-ffi/crt/aws-lc/tests/ci/docker_images/linux-x86/build_images.sh +1 -0
  423. data/aws-crt-ffi/crt/aws-lc/tests/ci/docker_images/linux-x86/push_images.sh +2 -1
  424. data/aws-crt-ffi/crt/aws-lc/tests/ci/docker_images/linux-x86/ubuntu-20.04_clang-10x_formal-verification/create_image.sh +1 -1
  425. data/aws-crt-ffi/crt/aws-lc/tests/ci/docker_images/linux-x86/ubuntu-22.04_base/Dockerfile +1 -0
  426. data/aws-crt-ffi/crt/aws-lc/tests/ci/docker_images/linux-x86/ubuntu-22.04_clang-14x-sde/Dockerfile +42 -0
  427. data/aws-crt-ffi/crt/aws-lc/tests/ci/docker_images/windows/vs2017/Dockerfile +14 -0
  428. data/aws-crt-ffi/crt/aws-lc/tests/ci/docker_images/windows/windows_base/Dockerfile +3 -0
  429. data/aws-crt-ffi/crt/aws-lc/tests/ci/integration/README.md +12 -0
  430. data/aws-crt-ffi/crt/aws-lc/tests/ci/integration/nginx_patch/aws-lc-nginx.patch +68 -23
  431. data/aws-crt-ffi/crt/aws-lc/tests/ci/integration/run_crt_integration.sh +27 -0
  432. data/aws-crt-ffi/crt/aws-lc/tests/ci/integration/run_monit_integration.sh +56 -0
  433. data/aws-crt-ffi/crt/aws-lc/tests/ci/integration/sslproxy_patch/aws-lc-sslproxy.patch +2 -2
  434. data/aws-crt-ffi/crt/aws-lc/tests/ci/run_ec2_test_framework.sh +135 -0
  435. data/aws-crt-ffi/crt/aws-lc/tests/ci/run_fips_tests.sh +14 -2
  436. data/aws-crt-ffi/crt/aws-lc/tests/ci/run_tests_with_sde.sh +4 -1
  437. data/aws-crt-ffi/crt/aws-lc/tests/ci/run_tests_with_sde_asan.sh +14 -0
  438. data/aws-crt-ffi/crt/aws-lc/tests/ci/run_windows_tests.bat +39 -3
  439. data/aws-crt-ffi/crt/aws-lc/third_party/fiat/README.md +21 -6
  440. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/arm/curve25519/bignum_madd_n25519.S +284 -0
  441. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/arm/curve25519/bignum_madd_n25519_alt.S +210 -0
  442. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/arm/curve25519/bignum_mod_n25519.S +186 -0
  443. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/arm/curve25519/bignum_neg_p25519.S +65 -0
  444. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/arm/curve25519/curve25519_x25519.S +1043 -352
  445. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/arm/curve25519/curve25519_x25519_alt.S +1043 -352
  446. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/arm/curve25519/curve25519_x25519_byte.S +1043 -352
  447. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/arm/curve25519/curve25519_x25519_byte_alt.S +1043 -352
  448. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/arm/curve25519/curve25519_x25519base.S +1042 -352
  449. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/arm/curve25519/curve25519_x25519base_alt.S +1042 -352
  450. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/arm/curve25519/curve25519_x25519base_byte.S +1042 -352
  451. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/arm/curve25519/curve25519_x25519base_byte_alt.S +1043 -354
  452. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/arm/curve25519/edwards25519_decode.S +700 -0
  453. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/arm/curve25519/edwards25519_decode_alt.S +563 -0
  454. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/arm/curve25519/edwards25519_encode.S +131 -0
  455. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/arm/curve25519/edwards25519_scalarmulbase.S +9626 -0
  456. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/arm/curve25519/edwards25519_scalarmulbase_alt.S +9468 -0
  457. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/arm/curve25519/edwards25519_scalarmuldouble.S +3157 -0
  458. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/arm/curve25519/edwards25519_scalarmuldouble_alt.S +2941 -0
  459. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/arm/p384/Makefile +1 -1
  460. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/arm/p521/Makefile +1 -1
  461. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/include/s2n-bignum_aws-lc.h +34 -0
  462. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/x86_att/curve25519/bignum_madd_n25519.S +219 -0
  463. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/x86_att/curve25519/bignum_madd_n25519_alt.S +245 -0
  464. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/x86_att/curve25519/bignum_mod_n25519.S +228 -0
  465. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/x86_att/curve25519/bignum_neg_p25519.S +86 -0
  466. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/x86_att/curve25519/curve25519_x25519.S +1350 -407
  467. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/x86_att/curve25519/curve25519_x25519_alt.S +1350 -407
  468. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/x86_att/curve25519/curve25519_x25519base.S +1344 -400
  469. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/x86_att/curve25519/curve25519_x25519base_alt.S +1348 -402
  470. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/x86_att/curve25519/edwards25519_decode.S +670 -0
  471. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/x86_att/curve25519/edwards25519_decode_alt.S +751 -0
  472. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/x86_att/curve25519/edwards25519_encode.S +81 -0
  473. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/x86_att/curve25519/edwards25519_scalarmulbase.S +9910 -0
  474. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/x86_att/curve25519/edwards25519_scalarmulbase_alt.S +9986 -0
  475. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/x86_att/curve25519/edwards25519_scalarmuldouble.S +3619 -0
  476. data/aws-crt-ffi/crt/aws-lc/third_party/s2n-bignum/x86_att/curve25519/edwards25519_scalarmuldouble_alt.S +3736 -0
  477. data/aws-crt-ffi/crt/aws-lc/third_party/wycheproof_testvectors/hmac_sha512_224_test.json +1978 -0
  478. data/aws-crt-ffi/crt/aws-lc/third_party/wycheproof_testvectors/hmac_sha512_224_test.txt +1403 -0
  479. data/aws-crt-ffi/crt/aws-lc/third_party/wycheproof_testvectors/hmac_sha512_256_test.json +1993 -0
  480. data/aws-crt-ffi/crt/aws-lc/third_party/wycheproof_testvectors/hmac_sha512_256_test.txt +1416 -0
  481. data/aws-crt-ffi/crt/aws-lc/tool/digest.cc +4 -0
  482. data/aws-crt-ffi/crt/aws-lc/tool/internal.h +1 -0
  483. data/aws-crt-ffi/crt/aws-lc/tool/speed.cc +53 -6
  484. data/aws-crt-ffi/crt/aws-lc/util/all_tests.go +43 -12
  485. data/aws-crt-ffi/crt/aws-lc/util/all_tests.json +13 -5
  486. data/aws-crt-ffi/crt/aws-lc/util/bot/DEPS +4 -4
  487. data/aws-crt-ffi/crt/aws-lc/util/bot/update_clang.py +8 -2
  488. data/aws-crt-ffi/crt/aws-lc/util/codecov-ci.sh +82 -0
  489. data/aws-crt-ffi/crt/aws-lc/util/convert_wycheproof/convert_wycheproof.go +7 -5
  490. data/aws-crt-ffi/crt/aws-lc/util/fipstools/acvp/ACVP.md +7 -0
  491. data/aws-crt-ffi/crt/aws-lc/util/fipstools/acvp/acvptool/subprocess/hash.go +24 -9
  492. data/aws-crt-ffi/crt/aws-lc/util/fipstools/acvp/acvptool/subprocess/rsa.go +3 -4
  493. data/aws-crt-ffi/crt/aws-lc/util/fipstools/acvp/acvptool/subprocess/subprocess.go +15 -10
  494. data/aws-crt-ffi/crt/aws-lc/util/fipstools/acvp/acvptool/test/expected/HMAC-SHA2-512-224.bz2 +0 -0
  495. data/aws-crt-ffi/crt/aws-lc/util/fipstools/acvp/acvptool/test/expected/SHA2-512-224.bz2 +0 -0
  496. data/aws-crt-ffi/crt/aws-lc/util/fipstools/acvp/acvptool/test/expected/SHAKE-128.bz2 +0 -0
  497. data/aws-crt-ffi/crt/aws-lc/util/fipstools/acvp/acvptool/test/expected/SHAKE-256.bz2 +0 -0
  498. data/aws-crt-ffi/crt/aws-lc/util/fipstools/acvp/acvptool/test/sha-tests/sha512-224-tests.json +1 -0
  499. data/aws-crt-ffi/crt/aws-lc/util/fipstools/acvp/acvptool/test/sha-tests/shake-128-tests.json +1 -0
  500. data/aws-crt-ffi/crt/aws-lc/util/fipstools/acvp/acvptool/test/sha-tests/shake-256-tests.json +1 -0
  501. data/aws-crt-ffi/crt/aws-lc/util/fipstools/acvp/acvptool/test/tests.json +1 -0
  502. data/aws-crt-ffi/crt/aws-lc/util/fipstools/acvp/acvptool/test/vectors/HMAC-SHA2-512-224.bz2 +0 -0
  503. data/aws-crt-ffi/crt/aws-lc/util/fipstools/acvp/acvptool/test/vectors/SHA2-512-224.bz2 +0 -0
  504. data/aws-crt-ffi/crt/aws-lc/util/fipstools/acvp/acvptool/test/vectors/SHAKE-128.bz2 +0 -0
  505. data/aws-crt-ffi/crt/aws-lc/util/fipstools/acvp/acvptool/test/vectors/SHAKE-256.bz2 +0 -0
  506. data/aws-crt-ffi/crt/aws-lc/util/fipstools/acvp/modulewrapper/main.cc +4 -0
  507. data/aws-crt-ffi/crt/aws-lc/util/fipstools/acvp/modulewrapper/modulewrapper.cc +144 -1
  508. data/aws-crt-ffi/crt/aws-lc/util/fipstools/delocate/delocate.go +9 -3
  509. data/aws-crt-ffi/crt/aws-lc/util/fipstools/delocate/testdata/aarch64-Basic/in.s +4 -0
  510. data/aws-crt-ffi/crt/aws-lc/util/fipstools/delocate/testdata/aarch64-Basic/out.s +11 -0
  511. data/aws-crt-ffi/crt/aws-lc/util/fipstools/inject_hash/inject_hash.go +13 -4
  512. data/aws-crt-ffi/crt/aws-lc/util/fipstools/test-break-kat.sh +2 -0
  513. data/aws-crt-ffi/crt/aws-lc/util/testconfig/testconfig.go +2 -1
  514. data/aws-crt-ffi/crt/s2n/api/s2n.h +9 -5
  515. data/aws-crt-ffi/crt/s2n/bindings/rust/bench/benches/handshake.rs +9 -6
  516. data/aws-crt-ffi/crt/s2n/bindings/rust/bench/benches/resumption.rs +14 -14
  517. data/aws-crt-ffi/crt/s2n/bindings/rust/bench/benches/throughput.rs +9 -6
  518. data/aws-crt-ffi/crt/s2n/bindings/rust/bench/src/harness.rs +106 -102
  519. data/aws-crt-ffi/crt/s2n/bindings/rust/bench/src/openssl.rs +24 -20
  520. data/aws-crt-ffi/crt/s2n/bindings/rust/bench/src/rustls.rs +28 -24
  521. data/aws-crt-ffi/crt/s2n/bindings/rust/bench/src/s2n_tls.rs +52 -50
  522. data/aws-crt-ffi/crt/s2n/bindings/rust/generate/Cargo.toml +1 -0
  523. data/aws-crt-ffi/crt/s2n/bindings/rust/integration/Cargo.toml +3 -0
  524. data/aws-crt-ffi/crt/s2n/bindings/rust/s2n-tls/Cargo.toml +2 -2
  525. data/aws-crt-ffi/crt/s2n/bindings/rust/s2n-tls/src/connection.rs +9 -0
  526. data/aws-crt-ffi/crt/s2n/bindings/rust/s2n-tls-sys/templates/Cargo.template +2 -1
  527. data/aws-crt-ffi/crt/s2n/bindings/rust/s2n-tls-tokio/Cargo.toml +2 -2
  528. data/aws-crt-ffi/crt/s2n/tests/cbmc/sources/make_common_datastructures.c +9 -2
  529. data/aws-crt-ffi/crt/s2n/tests/fuzz/s2n_client_cert_verify_recv_test.c +1 -1
  530. data/aws-crt-ffi/crt/s2n/tests/fuzz/s2n_hybrid_ecdhe_kyber_r3_fuzz_test.c +1 -1
  531. data/aws-crt-ffi/crt/s2n/tests/fuzz/s2n_tls13_cert_verify_recv_test.c +1 -1
  532. data/aws-crt-ffi/crt/s2n/tests/integrationv2/test_version_negotiation.py +4 -4
  533. data/aws-crt-ffi/crt/s2n/tests/unit/s2n_auth_selection_test.c +19 -9
  534. data/aws-crt-ffi/crt/s2n/tests/unit/s2n_client_auth_handshake_test.c +3 -3
  535. data/aws-crt-ffi/crt/s2n/tests/unit/s2n_client_cert_verify_test.c +1 -1
  536. data/aws-crt-ffi/crt/s2n/tests/unit/s2n_client_hello_recv_test.c +1 -1
  537. data/aws-crt-ffi/crt/s2n/tests/unit/s2n_client_hello_test.c +4 -4
  538. data/aws-crt-ffi/crt/s2n/tests/unit/s2n_client_signature_algorithms_extension_test.c +4 -5
  539. data/aws-crt-ffi/crt/s2n/tests/unit/s2n_connection_protocol_versions_test.c +390 -0
  540. data/aws-crt-ffi/crt/s2n/tests/unit/s2n_connection_test.c +8 -4
  541. data/aws-crt-ffi/crt/s2n/tests/unit/s2n_handshake_test.c +2 -1
  542. data/aws-crt-ffi/crt/s2n/tests/unit/s2n_quic_support_io_test.c +106 -0
  543. data/aws-crt-ffi/crt/s2n/tests/unit/s2n_security_policies_test.c +6 -2
  544. data/aws-crt-ffi/crt/s2n/tests/unit/s2n_self_talk_offload_signing_test.c +3 -3
  545. data/aws-crt-ffi/crt/s2n/tests/unit/s2n_self_talk_session_resumption_test.c +135 -0
  546. data/aws-crt-ffi/crt/s2n/tests/unit/s2n_server_new_session_ticket_test.c +32 -0
  547. data/aws-crt-ffi/crt/s2n/tests/unit/s2n_server_signature_algorithms_extension_test.c +1 -1
  548. data/aws-crt-ffi/crt/s2n/tests/unit/s2n_signature_algorithms_test.c +307 -283
  549. data/aws-crt-ffi/crt/s2n/tests/unit/s2n_tls13_cert_request_test.c +1 -1
  550. data/aws-crt-ffi/crt/s2n/tests/unit/s2n_tls13_cert_verify_test.c +18 -17
  551. data/aws-crt-ffi/crt/s2n/tests/unit/s2n_x509_validator_test.c +125 -0
  552. data/aws-crt-ffi/crt/s2n/tls/extensions/s2n_client_signature_algorithms.c +8 -1
  553. data/aws-crt-ffi/crt/s2n/tls/extensions/s2n_client_supported_versions.c +43 -11
  554. data/aws-crt-ffi/crt/s2n/tls/extensions/s2n_client_supported_versions.h +3 -0
  555. data/aws-crt-ffi/crt/s2n/tls/extensions/s2n_server_signature_algorithms.c +8 -1
  556. data/aws-crt-ffi/crt/s2n/tls/s2n_auth_selection.c +4 -2
  557. data/aws-crt-ffi/crt/s2n/tls/s2n_client_cert_verify.c +7 -10
  558. data/aws-crt-ffi/crt/s2n/tls/s2n_client_hello.c +2 -2
  559. data/aws-crt-ffi/crt/s2n/tls/s2n_connection.c +75 -14
  560. data/aws-crt-ffi/crt/s2n/tls/s2n_handshake.h +2 -2
  561. data/aws-crt-ffi/crt/s2n/tls/s2n_post_handshake.c +1 -1
  562. data/aws-crt-ffi/crt/s2n/tls/s2n_post_handshake.h +1 -0
  563. data/aws-crt-ffi/crt/s2n/tls/s2n_quic_support.c +29 -0
  564. data/aws-crt-ffi/crt/s2n/tls/s2n_quic_support.h +5 -0
  565. data/aws-crt-ffi/crt/s2n/tls/s2n_security_policies.c +40 -0
  566. data/aws-crt-ffi/crt/s2n/tls/s2n_security_policies.h +4 -0
  567. data/aws-crt-ffi/crt/s2n/tls/s2n_server_cert_request.c +1 -1
  568. data/aws-crt-ffi/crt/s2n/tls/s2n_server_hello.c +0 -3
  569. data/aws-crt-ffi/crt/s2n/tls/s2n_server_key_exchange.c +8 -9
  570. data/aws-crt-ffi/crt/s2n/tls/s2n_server_new_session_ticket.c +8 -0
  571. data/aws-crt-ffi/crt/s2n/tls/s2n_signature_algorithms.c +111 -72
  572. data/aws-crt-ffi/crt/s2n/tls/s2n_signature_algorithms.h +11 -9
  573. data/aws-crt-ffi/crt/s2n/tls/s2n_signature_scheme.c +9 -0
  574. data/aws-crt-ffi/crt/s2n/tls/s2n_signature_scheme.h +2 -0
  575. data/aws-crt-ffi/crt/s2n/tls/s2n_tls13_certificate_verify.c +12 -18
  576. data/aws-crt-ffi/crt/s2n/tls/s2n_x509_validator.c +7 -7
  577. data/aws-crt-ffi/src/api.h +1 -0
  578. data/lib/aws-crt/native.rb +1 -1
  579. metadata +68 -5
  580. data/aws-crt-ffi/crt/aws-lc/tests/ci/cdk/cdk/ssm/m1_tests_ssm_document.yaml +0 -34
  581. data/aws-crt-ffi/crt/aws-lc/tests/ci/run_m1_ec2_instance.sh +0 -96
@@ -0,0 +1,3736 @@
1
+ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ // SPDX-License-Identifier: Apache-2.0 OR ISC
3
+
4
+ // ----------------------------------------------------------------------------
5
+ // Double scalar multiplication for edwards25519, fresh and base point
6
+ // Input scalar[4], point[8], bscalar[4]; output res[8]
7
+ //
8
+ // extern void edwards25519_scalarmuldouble_alt
9
+ // (uint64_t res[static 8],uint64_t scalar[static 4],
10
+ // uint64_t point[static 8],uint64_t bscalar[static 4]);
11
+ //
12
+ // Given scalar = n, point = P and bscalar = m, returns in res
13
+ // the point (X,Y) = n * P + m * B where B = (...,4/5) is
14
+ // the standard basepoint for the edwards25519 (Ed25519) curve.
15
+ //
16
+ // Both 256-bit coordinates of the input point P are implicitly
17
+ // reduced modulo 2^255-19 if they are not already in reduced form,
18
+ // but the conventional usage is that they *are* already reduced.
19
+ // The scalars can be arbitrary 256-bit numbers but may also be
20
+ // considered as implicitly reduced modulo the group order.
21
+ //
22
+ // Standard x86-64 ABI: RDI = res, RSI = scalar, RDX = point, RCX = bscalar
23
+ // Microsoft x64 ABI: RCX = res, RDX = scalar, R8 = point, R9 = bscalar
24
+ // ----------------------------------------------------------------------------
25
+ #include "_internal_s2n_bignum.h"
26
+
27
+
28
+ S2N_BN_SYM_VISIBILITY_DIRECTIVE(edwards25519_scalarmuldouble_alt)
29
+ S2N_BN_SYM_PRIVACY_DIRECTIVE(edwards25519_scalarmuldouble_alt)
30
+ .text
31
+
32
+ // Size of individual field elements
33
+
34
+ #define NUMSIZE 32
35
+
36
+ // Pointer-offset pairs for result and temporaries on stack with some aliasing.
37
+ // Both "resx" and "resy" assume the "res" pointer has been preloaded into %rbp.
38
+
39
+ #define resx (0*NUMSIZE)(%rbp)
40
+ #define resy (1*NUMSIZE)(%rbp)
41
+
42
+ #define scalar (0*NUMSIZE)(%rsp)
43
+ #define bscalar (1*NUMSIZE)(%rsp)
44
+
45
+ #define tabent (2*NUMSIZE)(%rsp)
46
+ #define btabent (6*NUMSIZE)(%rsp)
47
+
48
+ #define acc (9*NUMSIZE)(%rsp)
49
+
50
+ #define tab (13*NUMSIZE)(%rsp)
51
+
52
+ // Additional variables kept on the stack
53
+
54
+ #define bf 45*NUMSIZE(%rsp)
55
+ #define cf 45*NUMSIZE+8(%rsp)
56
+ #define i 45*NUMSIZE+16(%rsp)
57
+ #define res 45*NUMSIZE+24(%rsp)
58
+
59
+ // Total size to reserve on the stack (excluding local subroutines)
60
+
61
+ #define NSPACE (46*NUMSIZE)
62
+
63
+ // Syntactic variants to make x86_att forms easier to generate
64
+
65
+ #define SCALAR (0*NUMSIZE)
66
+ #define BSCALAR (1*NUMSIZE)
67
+ #define TABENT (2*NUMSIZE)
68
+ #define BTABENT (6*NUMSIZE)
69
+ #define ACC (9*NUMSIZE)
70
+ #define TAB (13*NUMSIZE)
71
+
72
+ // Sub-references used in local subroutines with local stack
73
+
74
+ #define x_0 0(%rdi)
75
+ #define y_0 NUMSIZE(%rdi)
76
+ #define z_0 (2*NUMSIZE)(%rdi)
77
+ #define w_0 (3*NUMSIZE)(%rdi)
78
+
79
+ #define x_1 0(%rsi)
80
+ #define y_1 NUMSIZE(%rsi)
81
+ #define z_1 (2*NUMSIZE)(%rsi)
82
+ #define w_1 (3*NUMSIZE)(%rsi)
83
+
84
+ #define x_2 0(%rbp)
85
+ #define y_2 NUMSIZE(%rbp)
86
+ #define z_2 (2*NUMSIZE)(%rbp)
87
+ #define w_2 (3*NUMSIZE)(%rbp)
88
+
89
+ #define t0 (0*NUMSIZE)(%rsp)
90
+ #define t1 (1*NUMSIZE)(%rsp)
91
+ #define t2 (2*NUMSIZE)(%rsp)
92
+ #define t3 (3*NUMSIZE)(%rsp)
93
+ #define t4 (4*NUMSIZE)(%rsp)
94
+ #define t5 (5*NUMSIZE)(%rsp)
95
+
96
+ // Macro wrapping up the basic field multiplication, only trivially
97
+ // different from a pure function call to bignum_mul_p25519_alt.
98
+
99
+ #define mul_p25519(P0,P1,P2) \
100
+ movq P1, %rax ; \
101
+ mulq P2; \
102
+ movq %rax, %r8 ; \
103
+ movq %rdx, %r9 ; \
104
+ xorq %r10, %r10 ; \
105
+ xorq %r11, %r11 ; \
106
+ movq P1, %rax ; \
107
+ mulq 0x8+P2; \
108
+ addq %rax, %r9 ; \
109
+ adcq %rdx, %r10 ; \
110
+ movq 0x8+P1, %rax ; \
111
+ mulq P2; \
112
+ addq %rax, %r9 ; \
113
+ adcq %rdx, %r10 ; \
114
+ adcq $0x0, %r11 ; \
115
+ xorq %r12, %r12 ; \
116
+ movq P1, %rax ; \
117
+ mulq 0x10+P2; \
118
+ addq %rax, %r10 ; \
119
+ adcq %rdx, %r11 ; \
120
+ adcq %r12, %r12 ; \
121
+ movq 0x8+P1, %rax ; \
122
+ mulq 0x8+P2; \
123
+ addq %rax, %r10 ; \
124
+ adcq %rdx, %r11 ; \
125
+ adcq $0x0, %r12 ; \
126
+ movq 0x10+P1, %rax ; \
127
+ mulq P2; \
128
+ addq %rax, %r10 ; \
129
+ adcq %rdx, %r11 ; \
130
+ adcq $0x0, %r12 ; \
131
+ xorq %r13, %r13 ; \
132
+ movq P1, %rax ; \
133
+ mulq 0x18+P2; \
134
+ addq %rax, %r11 ; \
135
+ adcq %rdx, %r12 ; \
136
+ adcq %r13, %r13 ; \
137
+ movq 0x8+P1, %rax ; \
138
+ mulq 0x10+P2; \
139
+ addq %rax, %r11 ; \
140
+ adcq %rdx, %r12 ; \
141
+ adcq $0x0, %r13 ; \
142
+ movq 0x10+P1, %rax ; \
143
+ mulq 0x8+P2; \
144
+ addq %rax, %r11 ; \
145
+ adcq %rdx, %r12 ; \
146
+ adcq $0x0, %r13 ; \
147
+ movq 0x18+P1, %rax ; \
148
+ mulq P2; \
149
+ addq %rax, %r11 ; \
150
+ adcq %rdx, %r12 ; \
151
+ adcq $0x0, %r13 ; \
152
+ xorq %r14, %r14 ; \
153
+ movq 0x8+P1, %rax ; \
154
+ mulq 0x18+P2; \
155
+ addq %rax, %r12 ; \
156
+ adcq %rdx, %r13 ; \
157
+ adcq %r14, %r14 ; \
158
+ movq 0x10+P1, %rax ; \
159
+ mulq 0x10+P2; \
160
+ addq %rax, %r12 ; \
161
+ adcq %rdx, %r13 ; \
162
+ adcq $0x0, %r14 ; \
163
+ movq 0x18+P1, %rax ; \
164
+ mulq 0x8+P2; \
165
+ addq %rax, %r12 ; \
166
+ adcq %rdx, %r13 ; \
167
+ adcq $0x0, %r14 ; \
168
+ xorq %r15, %r15 ; \
169
+ movq 0x10+P1, %rax ; \
170
+ mulq 0x18+P2; \
171
+ addq %rax, %r13 ; \
172
+ adcq %rdx, %r14 ; \
173
+ adcq %r15, %r15 ; \
174
+ movq 0x18+P1, %rax ; \
175
+ mulq 0x10+P2; \
176
+ addq %rax, %r13 ; \
177
+ adcq %rdx, %r14 ; \
178
+ adcq $0x0, %r15 ; \
179
+ movq 0x18+P1, %rax ; \
180
+ mulq 0x18+P2; \
181
+ addq %rax, %r14 ; \
182
+ adcq %rdx, %r15 ; \
183
+ movl $0x26, %esi ; \
184
+ movq %r12, %rax ; \
185
+ mulq %rsi; \
186
+ addq %rax, %r8 ; \
187
+ adcq %rdx, %r9 ; \
188
+ sbbq %rcx, %rcx ; \
189
+ movq %r13, %rax ; \
190
+ mulq %rsi; \
191
+ subq %rcx, %rdx ; \
192
+ addq %rax, %r9 ; \
193
+ adcq %rdx, %r10 ; \
194
+ sbbq %rcx, %rcx ; \
195
+ movq %r14, %rax ; \
196
+ mulq %rsi; \
197
+ subq %rcx, %rdx ; \
198
+ addq %rax, %r10 ; \
199
+ adcq %rdx, %r11 ; \
200
+ sbbq %rcx, %rcx ; \
201
+ movq %r15, %rax ; \
202
+ mulq %rsi; \
203
+ subq %rcx, %rdx ; \
204
+ xorq %rcx, %rcx ; \
205
+ addq %rax, %r11 ; \
206
+ movq %rdx, %r12 ; \
207
+ adcq %rcx, %r12 ; \
208
+ shldq $0x1, %r11, %r12 ; \
209
+ leaq 0x1(%r12), %rax ; \
210
+ movl $0x13, %esi ; \
211
+ bts $63, %r11 ; \
212
+ imulq %rsi, %rax ; \
213
+ addq %rax, %r8 ; \
214
+ adcq %rcx, %r9 ; \
215
+ adcq %rcx, %r10 ; \
216
+ adcq %rcx, %r11 ; \
217
+ sbbq %rax, %rax ; \
218
+ notq %rax; \
219
+ andq %rsi, %rax ; \
220
+ subq %rax, %r8 ; \
221
+ sbbq %rcx, %r9 ; \
222
+ sbbq %rcx, %r10 ; \
223
+ sbbq %rcx, %r11 ; \
224
+ btr $63, %r11 ; \
225
+ movq %r8, P0 ; \
226
+ movq %r9, 0x8+P0 ; \
227
+ movq %r10, 0x10+P0 ; \
228
+ movq %r11, 0x18+P0
229
+
230
+ // A version of multiplication that only guarantees output < 2 * p_25519.
231
+ // This basically skips the +1 and final correction in quotient estimation.
232
+
233
+ #define mul_4(P0,P1,P2) \
234
+ movq P1, %rax ; \
235
+ mulq P2; \
236
+ movq %rax, %r8 ; \
237
+ movq %rdx, %r9 ; \
238
+ xorq %r10, %r10 ; \
239
+ xorq %r11, %r11 ; \
240
+ movq P1, %rax ; \
241
+ mulq 0x8+P2; \
242
+ addq %rax, %r9 ; \
243
+ adcq %rdx, %r10 ; \
244
+ movq 0x8+P1, %rax ; \
245
+ mulq P2; \
246
+ addq %rax, %r9 ; \
247
+ adcq %rdx, %r10 ; \
248
+ adcq $0x0, %r11 ; \
249
+ xorq %r12, %r12 ; \
250
+ movq P1, %rax ; \
251
+ mulq 0x10+P2; \
252
+ addq %rax, %r10 ; \
253
+ adcq %rdx, %r11 ; \
254
+ adcq %r12, %r12 ; \
255
+ movq 0x8+P1, %rax ; \
256
+ mulq 0x8+P2; \
257
+ addq %rax, %r10 ; \
258
+ adcq %rdx, %r11 ; \
259
+ adcq $0x0, %r12 ; \
260
+ movq 0x10+P1, %rax ; \
261
+ mulq P2; \
262
+ addq %rax, %r10 ; \
263
+ adcq %rdx, %r11 ; \
264
+ adcq $0x0, %r12 ; \
265
+ xorq %r13, %r13 ; \
266
+ movq P1, %rax ; \
267
+ mulq 0x18+P2; \
268
+ addq %rax, %r11 ; \
269
+ adcq %rdx, %r12 ; \
270
+ adcq %r13, %r13 ; \
271
+ movq 0x8+P1, %rax ; \
272
+ mulq 0x10+P2; \
273
+ addq %rax, %r11 ; \
274
+ adcq %rdx, %r12 ; \
275
+ adcq $0x0, %r13 ; \
276
+ movq 0x10+P1, %rax ; \
277
+ mulq 0x8+P2; \
278
+ addq %rax, %r11 ; \
279
+ adcq %rdx, %r12 ; \
280
+ adcq $0x0, %r13 ; \
281
+ movq 0x18+P1, %rax ; \
282
+ mulq P2; \
283
+ addq %rax, %r11 ; \
284
+ adcq %rdx, %r12 ; \
285
+ adcq $0x0, %r13 ; \
286
+ xorq %r14, %r14 ; \
287
+ movq 0x8+P1, %rax ; \
288
+ mulq 0x18+P2; \
289
+ addq %rax, %r12 ; \
290
+ adcq %rdx, %r13 ; \
291
+ adcq %r14, %r14 ; \
292
+ movq 0x10+P1, %rax ; \
293
+ mulq 0x10+P2; \
294
+ addq %rax, %r12 ; \
295
+ adcq %rdx, %r13 ; \
296
+ adcq $0x0, %r14 ; \
297
+ movq 0x18+P1, %rax ; \
298
+ mulq 0x8+P2; \
299
+ addq %rax, %r12 ; \
300
+ adcq %rdx, %r13 ; \
301
+ adcq $0x0, %r14 ; \
302
+ xorq %r15, %r15 ; \
303
+ movq 0x10+P1, %rax ; \
304
+ mulq 0x18+P2; \
305
+ addq %rax, %r13 ; \
306
+ adcq %rdx, %r14 ; \
307
+ adcq %r15, %r15 ; \
308
+ movq 0x18+P1, %rax ; \
309
+ mulq 0x10+P2; \
310
+ addq %rax, %r13 ; \
311
+ adcq %rdx, %r14 ; \
312
+ adcq $0x0, %r15 ; \
313
+ movq 0x18+P1, %rax ; \
314
+ mulq 0x18+P2; \
315
+ addq %rax, %r14 ; \
316
+ adcq %rdx, %r15 ; \
317
+ movl $0x26, %ebx ; \
318
+ movq %r12, %rax ; \
319
+ mulq %rbx; \
320
+ addq %rax, %r8 ; \
321
+ adcq %rdx, %r9 ; \
322
+ sbbq %rcx, %rcx ; \
323
+ movq %r13, %rax ; \
324
+ mulq %rbx; \
325
+ subq %rcx, %rdx ; \
326
+ addq %rax, %r9 ; \
327
+ adcq %rdx, %r10 ; \
328
+ sbbq %rcx, %rcx ; \
329
+ movq %r14, %rax ; \
330
+ mulq %rbx; \
331
+ subq %rcx, %rdx ; \
332
+ addq %rax, %r10 ; \
333
+ adcq %rdx, %r11 ; \
334
+ sbbq %rcx, %rcx ; \
335
+ movq %r15, %rax ; \
336
+ mulq %rbx; \
337
+ subq %rcx, %rdx ; \
338
+ xorq %rcx, %rcx ; \
339
+ addq %rax, %r11 ; \
340
+ movq %rdx, %r12 ; \
341
+ adcq %rcx, %r12 ; \
342
+ shldq $0x1, %r11, %r12 ; \
343
+ btr $0x3f, %r11 ; \
344
+ movl $0x13, %edx ; \
345
+ imulq %r12, %rdx ; \
346
+ addq %rdx, %r8 ; \
347
+ adcq %rcx, %r9 ; \
348
+ adcq %rcx, %r10 ; \
349
+ adcq %rcx, %r11 ; \
350
+ movq %r8, P0 ; \
351
+ movq %r9, 0x8+P0 ; \
352
+ movq %r10, 0x10+P0 ; \
353
+ movq %r11, 0x18+P0
354
+
355
+ // Squaring just giving a result < 2 * p_25519, which is done by
356
+ // basically skipping the +1 in the quotient estimate and the final
357
+ // optional correction.
358
+
359
+ #define sqr_4(P0,P1) \
360
+ movq P1, %rax ; \
361
+ mulq %rax; \
362
+ movq %rax, %r8 ; \
363
+ movq %rdx, %r9 ; \
364
+ xorq %r10, %r10 ; \
365
+ xorq %r11, %r11 ; \
366
+ movq P1, %rax ; \
367
+ mulq 0x8+P1; \
368
+ addq %rax, %rax ; \
369
+ adcq %rdx, %rdx ; \
370
+ adcq $0x0, %r11 ; \
371
+ addq %rax, %r9 ; \
372
+ adcq %rdx, %r10 ; \
373
+ adcq $0x0, %r11 ; \
374
+ xorq %r12, %r12 ; \
375
+ movq 0x8+P1, %rax ; \
376
+ mulq %rax; \
377
+ addq %rax, %r10 ; \
378
+ adcq %rdx, %r11 ; \
379
+ adcq $0x0, %r12 ; \
380
+ movq P1, %rax ; \
381
+ mulq 0x10+P1; \
382
+ addq %rax, %rax ; \
383
+ adcq %rdx, %rdx ; \
384
+ adcq $0x0, %r12 ; \
385
+ addq %rax, %r10 ; \
386
+ adcq %rdx, %r11 ; \
387
+ adcq $0x0, %r12 ; \
388
+ xorq %r13, %r13 ; \
389
+ movq P1, %rax ; \
390
+ mulq 0x18+P1; \
391
+ addq %rax, %rax ; \
392
+ adcq %rdx, %rdx ; \
393
+ adcq $0x0, %r13 ; \
394
+ addq %rax, %r11 ; \
395
+ adcq %rdx, %r12 ; \
396
+ adcq $0x0, %r13 ; \
397
+ movq 0x8+P1, %rax ; \
398
+ mulq 0x10+P1; \
399
+ addq %rax, %rax ; \
400
+ adcq %rdx, %rdx ; \
401
+ adcq $0x0, %r13 ; \
402
+ addq %rax, %r11 ; \
403
+ adcq %rdx, %r12 ; \
404
+ adcq $0x0, %r13 ; \
405
+ xorq %r14, %r14 ; \
406
+ movq 0x8+P1, %rax ; \
407
+ mulq 0x18+P1; \
408
+ addq %rax, %rax ; \
409
+ adcq %rdx, %rdx ; \
410
+ adcq $0x0, %r14 ; \
411
+ addq %rax, %r12 ; \
412
+ adcq %rdx, %r13 ; \
413
+ adcq $0x0, %r14 ; \
414
+ movq 0x10+P1, %rax ; \
415
+ mulq %rax; \
416
+ addq %rax, %r12 ; \
417
+ adcq %rdx, %r13 ; \
418
+ adcq $0x0, %r14 ; \
419
+ xorq %r15, %r15 ; \
420
+ movq 0x10+P1, %rax ; \
421
+ mulq 0x18+P1; \
422
+ addq %rax, %rax ; \
423
+ adcq %rdx, %rdx ; \
424
+ adcq $0x0, %r15 ; \
425
+ addq %rax, %r13 ; \
426
+ adcq %rdx, %r14 ; \
427
+ adcq $0x0, %r15 ; \
428
+ movq 0x18+P1, %rax ; \
429
+ mulq %rax; \
430
+ addq %rax, %r14 ; \
431
+ adcq %rdx, %r15 ; \
432
+ movl $0x26, %ebx ; \
433
+ movq %r12, %rax ; \
434
+ mulq %rbx; \
435
+ addq %rax, %r8 ; \
436
+ adcq %rdx, %r9 ; \
437
+ sbbq %rcx, %rcx ; \
438
+ movq %r13, %rax ; \
439
+ mulq %rbx; \
440
+ subq %rcx, %rdx ; \
441
+ addq %rax, %r9 ; \
442
+ adcq %rdx, %r10 ; \
443
+ sbbq %rcx, %rcx ; \
444
+ movq %r14, %rax ; \
445
+ mulq %rbx; \
446
+ subq %rcx, %rdx ; \
447
+ addq %rax, %r10 ; \
448
+ adcq %rdx, %r11 ; \
449
+ sbbq %rcx, %rcx ; \
450
+ movq %r15, %rax ; \
451
+ mulq %rbx; \
452
+ subq %rcx, %rdx ; \
453
+ xorq %rcx, %rcx ; \
454
+ addq %rax, %r11 ; \
455
+ movq %rdx, %r12 ; \
456
+ adcq %rcx, %r12 ; \
457
+ shldq $0x1, %r11, %r12 ; \
458
+ btr $0x3f, %r11 ; \
459
+ movl $0x13, %edx ; \
460
+ imulq %r12, %rdx ; \
461
+ addq %rdx, %r8 ; \
462
+ adcq %rcx, %r9 ; \
463
+ adcq %rcx, %r10 ; \
464
+ adcq %rcx, %r11 ; \
465
+ movq %r8, P0 ; \
466
+ movq %r9, 0x8+P0 ; \
467
+ movq %r10, 0x10+P0 ; \
468
+ movq %r11, 0x18+P0
469
+
470
+ // Modular subtraction with double modulus 2 * p_25519 = 2^256 - 38
471
+
472
+ #define sub_twice4(P0,P1,P2) \
473
+ movq P1, %r8 ; \
474
+ xorl %ebx, %ebx ; \
475
+ subq P2, %r8 ; \
476
+ movq 8+P1, %r9 ; \
477
+ sbbq 8+P2, %r9 ; \
478
+ movl $38, %ecx ; \
479
+ movq 16+P1, %r10 ; \
480
+ sbbq 16+P2, %r10 ; \
481
+ movq 24+P1, %rax ; \
482
+ sbbq 24+P2, %rax ; \
483
+ cmovncq %rbx, %rcx ; \
484
+ subq %rcx, %r8 ; \
485
+ sbbq %rbx, %r9 ; \
486
+ sbbq %rbx, %r10 ; \
487
+ sbbq %rbx, %rax ; \
488
+ movq %r8, P0 ; \
489
+ movq %r9, 8+P0 ; \
490
+ movq %r10, 16+P0 ; \
491
+ movq %rax, 24+P0
492
+
493
+ // Modular addition and doubling with double modulus 2 * p_25519 = 2^256 - 38.
494
+ // This only ensures that the result fits in 4 digits, not that it is reduced
495
+ // even w.r.t. double modulus. The result is always correct modulo provided
496
+ // the sum of the inputs is < 2^256 + 2^256 - 38, so in particular provided
497
+ // at least one of them is reduced double modulo.
498
+
499
+ #define add_twice4(P0,P1,P2) \
500
+ movq P1, %r8 ; \
501
+ xorl %ecx, %ecx ; \
502
+ addq P2, %r8 ; \
503
+ movq 0x8+P1, %r9 ; \
504
+ adcq 0x8+P2, %r9 ; \
505
+ movq 0x10+P1, %r10 ; \
506
+ adcq 0x10+P2, %r10 ; \
507
+ movq 0x18+P1, %r11 ; \
508
+ adcq 0x18+P2, %r11 ; \
509
+ movl $38, %eax ; \
510
+ cmovncq %rcx, %rax ; \
511
+ addq %rax, %r8 ; \
512
+ adcq %rcx, %r9 ; \
513
+ adcq %rcx, %r10 ; \
514
+ adcq %rcx, %r11 ; \
515
+ movq %r8, P0 ; \
516
+ movq %r9, 0x8+P0 ; \
517
+ movq %r10, 0x10+P0 ; \
518
+ movq %r11, 0x18+P0
519
+
520
+ #define double_twice4(P0,P1) \
521
+ movq P1, %r8 ; \
522
+ xorl %ecx, %ecx ; \
523
+ addq %r8, %r8 ; \
524
+ movq 0x8+P1, %r9 ; \
525
+ adcq %r9, %r9 ; \
526
+ movq 0x10+P1, %r10 ; \
527
+ adcq %r10, %r10 ; \
528
+ movq 0x18+P1, %r11 ; \
529
+ adcq %r11, %r11 ; \
530
+ movl $38, %eax ; \
531
+ cmovncq %rcx, %rax ; \
532
+ addq %rax, %r8 ; \
533
+ adcq %rcx, %r9 ; \
534
+ adcq %rcx, %r10 ; \
535
+ adcq %rcx, %r11 ; \
536
+ movq %r8, P0 ; \
537
+ movq %r9, 0x8+P0 ; \
538
+ movq %r10, 0x10+P0 ; \
539
+ movq %r11, 0x18+P0
540
+
541
+ // Load the constant k_25519 = 2 * d_25519 using immediate operations
542
+
543
+ #define load_k25519(P0) \
544
+ movq $0xebd69b9426b2f159, %rax ; \
545
+ movq %rax, P0 ; \
546
+ movq $0x00e0149a8283b156, %rax ; \
547
+ movq %rax, 8+P0 ; \
548
+ movq $0x198e80f2eef3d130, %rax ; \
549
+ movq %rax, 16+P0 ; \
550
+ movq $0x2406d9dc56dffce7, %rax ; \
551
+ movq %rax, 24+P0
552
+
553
+ S2N_BN_SYMBOL(edwards25519_scalarmuldouble_alt):
554
+
555
+ // In this case the Windows form literally makes a subroutine call.
556
+ // This avoids hassle arising from keeping code and data together.
557
+
558
+ #if WINDOWS_ABI
559
+ pushq %rdi
560
+ pushq %rsi
561
+ movq %rcx, %rdi
562
+ movq %rdx, %rsi
563
+ movq %r8, %rdx
564
+ movq %r9, %rcx
565
+ callq edwards25519_scalarmuldouble_alt_standard
566
+ popq %rsi
567
+ popq %rdi
568
+ ret
569
+
570
+ edwards25519_scalarmuldouble_alt_standard:
571
+ #endif
572
+
573
+ // Save registers, make room for temps, preserve input arguments.
574
+
575
+ pushq %rbx
576
+ pushq %rbp
577
+ pushq %r12
578
+ pushq %r13
579
+ pushq %r14
580
+ pushq %r15
581
+ subq $NSPACE, %rsp
582
+
583
+ // Move the output pointer to a stable place
584
+
585
+ movq %rdi, res
586
+
587
+ // Copy scalars while recoding all 4-bit nybbles except the top
588
+ // one (bits 252..255) into signed 4-bit digits. This is essentially
589
+ // done just by adding the recoding constant 0x0888..888, after
590
+ // which all digits except the first have an implicit bias of -8,
591
+ // so 0 -> -8, 1 -> -7, ... 7 -> -1, 8 -> 0, 9 -> 1, ... 15 -> 7.
592
+ // (We could literally create 2s complement signed nybbles by
593
+ // XORing with the same constant 0x0888..888 afterwards, but it
594
+ // doesn't seem to make the end usage any simpler.)
595
+ //
596
+ // In order to ensure that the unrecoded top nybble (bits 252..255)
597
+ // does not become > 8 as a result of carries lower down from the
598
+ // recoding, we first (conceptually) subtract the group order iff
599
+ // the top digit of the scalar is > 2^63. In the implementation the
600
+ // reduction and recoding are combined by optionally using the
601
+ // modified recoding constant 0x0888...888 + (2^256 - group_order).
602
+
603
+ movq (%rcx), %r8
604
+ movq 8(%rcx), %r9
605
+ movq 16(%rcx), %r10
606
+ movq 24(%rcx), %r11
607
+ movq $0xc7f56fb5a0d9e920, %r12
608
+ movq $0xe190b99370cba1d5, %r13
609
+ movq $0x8888888888888887, %r14
610
+ movq $0x8888888888888888, %r15
611
+ movq $0x8000000000000000, %rax
612
+ movq $0x0888888888888888, %rbx
613
+ cmpq %r11, %rax
614
+ cmovncq %r15, %r12
615
+ cmovncq %r15, %r13
616
+ cmovncq %r15, %r14
617
+ cmovncq %rbx, %r15
618
+ addq %r12, %r8
619
+ adcq %r13, %r9
620
+ adcq %r14, %r10
621
+ adcq %r15, %r11
622
+ movq %r8, BSCALAR(%rsp)
623
+ movq %r9, BSCALAR+8(%rsp)
624
+ movq %r10, BSCALAR+16(%rsp)
625
+ movq %r11, BSCALAR+24(%rsp)
626
+
627
+ movq (%rsi), %r8
628
+ movq 8(%rsi), %r9
629
+ movq 16(%rsi), %r10
630
+ movq 24(%rsi), %r11
631
+ movq $0xc7f56fb5a0d9e920, %r12
632
+ movq $0xe190b99370cba1d5, %r13
633
+ movq $0x8888888888888887, %r14
634
+ movq $0x8888888888888888, %r15
635
+ movq $0x8000000000000000, %rax
636
+ movq $0x0888888888888888, %rbx
637
+ cmpq %r11, %rax
638
+ cmovncq %r15, %r12
639
+ cmovncq %r15, %r13
640
+ cmovncq %r15, %r14
641
+ cmovncq %rbx, %r15
642
+ addq %r12, %r8
643
+ adcq %r13, %r9
644
+ adcq %r14, %r10
645
+ adcq %r15, %r11
646
+ movq %r8, SCALAR(%rsp)
647
+ movq %r9, SCALAR+8(%rsp)
648
+ movq %r10, SCALAR+16(%rsp)
649
+ movq %r11, SCALAR+24(%rsp)
650
+
651
+ // Create table of multiples 1..8 of the general input point at "tab".
652
+ // Reduce the input coordinates x and y modulo 2^256 - 38 first, for the
653
+ // sake of definiteness; this is the reduction that will be maintained.
654
+ // We could slightly optimize the additions because we know the input
655
+ // point is affine (so Z = 1), but it doesn't seem worth the complication.
656
+
657
+ movl $38, %eax
658
+ movq (%rdx), %r8
659
+ xorl %ebx, %ebx
660
+ movq 8(%rdx), %r9
661
+ xorl %ecx, %ecx
662
+ movq 16(%rdx), %r10
663
+ xorl %esi, %esi
664
+ movq 24(%rdx), %r11
665
+ addq %r8, %rax
666
+ adcq %r9, %rbx
667
+ adcq %r10, %rcx
668
+ adcq %r11, %rsi
669
+ cmovncq %r8, %rax
670
+ movq %rax, TAB(%rsp)
671
+ cmovncq %r9, %rbx
672
+ movq %rbx, TAB+8(%rsp)
673
+ cmovncq %r10, %rcx
674
+ movq %rcx, TAB+16(%rsp)
675
+ cmovncq %r11, %rsi
676
+ movq %rsi, TAB+24(%rsp)
677
+
678
+ movl $38, %eax
679
+ movq 32(%rdx), %r8
680
+ xorl %ebx, %ebx
681
+ movq 40(%rdx), %r9
682
+ xorl %ecx, %ecx
683
+ movq 48(%rdx), %r10
684
+ xorl %esi, %esi
685
+ movq 56(%rdx), %r11
686
+ addq %r8, %rax
687
+ adcq %r9, %rbx
688
+ adcq %r10, %rcx
689
+ adcq %r11, %rsi
690
+ cmovncq %r8, %rax
691
+ movq %rax, TAB+32(%rsp)
692
+ cmovncq %r9, %rbx
693
+ movq %rbx, TAB+40(%rsp)
694
+ cmovncq %r10, %rcx
695
+ movq %rcx, TAB+48(%rsp)
696
+ cmovncq %r11, %rsi
697
+ movq %rsi, TAB+56(%rsp)
698
+
699
+ movl $1, %eax
700
+ movq %rax, TAB+64(%rsp)
701
+ xorl %eax, %eax
702
+ movq %rax, TAB+72(%rsp)
703
+ movq %rax, TAB+80(%rsp)
704
+ movq %rax, TAB+88(%rsp)
705
+
706
+ leaq TAB+96(%rsp), %rdi
707
+ leaq TAB(%rsp), %rsi
708
+ leaq TAB+32(%rsp), %rbp
709
+ mul_4(x_0,x_1,x_2)
710
+
711
+ // Multiple 2
712
+
713
+ leaq TAB+1*128(%rsp), %rdi
714
+ leaq TAB(%rsp), %rsi
715
+ callq edwards25519_scalarmuldouble_alt_epdouble
716
+
717
+ // Multiple 3
718
+
719
+ leaq TAB+2*128(%rsp), %rdi
720
+ leaq TAB(%rsp), %rsi
721
+ leaq TAB+1*128(%rsp), %rbp
722
+ callq edwards25519_scalarmuldouble_alt_epadd
723
+
724
+ // Multiple 4
725
+
726
+ leaq TAB+3*128(%rsp), %rdi
727
+ leaq TAB+1*128(%rsp), %rsi
728
+ callq edwards25519_scalarmuldouble_alt_epdouble
729
+
730
+ // Multiple 5
731
+
732
+ leaq TAB+4*128(%rsp), %rdi
733
+ leaq TAB(%rsp), %rsi
734
+ leaq TAB+3*128(%rsp), %rbp
735
+ callq edwards25519_scalarmuldouble_alt_epadd
736
+
737
+ // Multiple 6
738
+
739
+ leaq TAB+5*128(%rsp), %rdi
740
+ leaq TAB+2*128(%rsp), %rsi
741
+ callq edwards25519_scalarmuldouble_alt_epdouble
742
+
743
+ // Multiple 7
744
+
745
+ leaq TAB+6*128(%rsp), %rdi
746
+ leaq TAB(%rsp), %rsi
747
+ leaq TAB+5*128(%rsp), %rbp
748
+ callq edwards25519_scalarmuldouble_alt_epadd
749
+
750
+ // Multiple 8
751
+
752
+ leaq TAB+7*128(%rsp), %rdi
753
+ leaq TAB+3*128(%rsp), %rsi
754
+ callq edwards25519_scalarmuldouble_alt_epdouble
755
+
756
+ // Handle the initialization, starting the loop counter at i = 252
757
+ // and initializing acc to the sum of the table entries for the
758
+ // top nybbles of the scalars (the ones with no implicit -8 bias).
759
+
760
+ movq $252, %rax
761
+ movq %rax, i
762
+
763
+ // Index for btable entry...
764
+
765
+ movq BSCALAR+24(%rsp), %rax
766
+ shrq $60, %rax
767
+ movq %rax, bf
768
+
769
+ // ...and constant-time indexing based on that index
770
+
771
+ movl $1, %eax
772
+ xorl %ebx, %ebx
773
+ xorl %ecx, %ecx
774
+ xorl %edx, %edx
775
+ movl $1, %r8d
776
+ xorl %r9d, %r9d
777
+ xorl %r10d, %r10d
778
+ xorl %r11d, %r11d
779
+ xorl %r12d, %r12d
780
+ xorl %r13d, %r13d
781
+ xorl %r14d, %r14d
782
+ xorl %r15d, %r15d
783
+
784
+ leaq edwards25519_scalarmuldouble_alt_table(%rip), %rbp
785
+
786
+ cmpq $1, bf
787
+ movq (%rbp), %rsi
788
+ cmovzq %rsi, %rax
789
+ movq 8(%rbp), %rsi
790
+ cmovzq %rsi, %rbx
791
+ movq 16(%rbp), %rsi
792
+ cmovzq %rsi, %rcx
793
+ movq 24(%rbp), %rsi
794
+ cmovzq %rsi, %rdx
795
+ movq 32(%rbp), %rsi
796
+ cmovzq %rsi, %r8
797
+ movq 40(%rbp), %rsi
798
+ cmovzq %rsi, %r9
799
+ movq 48(%rbp), %rsi
800
+ cmovzq %rsi, %r10
801
+ movq 56(%rbp), %rsi
802
+ cmovzq %rsi, %r11
803
+ movq 64(%rbp), %rsi
804
+ cmovzq %rsi, %r12
805
+ movq 72(%rbp), %rsi
806
+ cmovzq %rsi, %r13
807
+ movq 80(%rbp), %rsi
808
+ cmovzq %rsi, %r14
809
+ movq 88(%rbp), %rsi
810
+ cmovzq %rsi, %r15
811
+ addq $96, %rbp
812
+
813
+ cmpq $2, bf
814
+ movq (%rbp), %rsi
815
+ cmovzq %rsi, %rax
816
+ movq 8(%rbp), %rsi
817
+ cmovzq %rsi, %rbx
818
+ movq 16(%rbp), %rsi
819
+ cmovzq %rsi, %rcx
820
+ movq 24(%rbp), %rsi
821
+ cmovzq %rsi, %rdx
822
+ movq 32(%rbp), %rsi
823
+ cmovzq %rsi, %r8
824
+ movq 40(%rbp), %rsi
825
+ cmovzq %rsi, %r9
826
+ movq 48(%rbp), %rsi
827
+ cmovzq %rsi, %r10
828
+ movq 56(%rbp), %rsi
829
+ cmovzq %rsi, %r11
830
+ movq 64(%rbp), %rsi
831
+ cmovzq %rsi, %r12
832
+ movq 72(%rbp), %rsi
833
+ cmovzq %rsi, %r13
834
+ movq 80(%rbp), %rsi
835
+ cmovzq %rsi, %r14
836
+ movq 88(%rbp), %rsi
837
+ cmovzq %rsi, %r15
838
+ addq $96, %rbp
839
+
840
+ cmpq $3, bf
841
+ movq (%rbp), %rsi
842
+ cmovzq %rsi, %rax
843
+ movq 8(%rbp), %rsi
844
+ cmovzq %rsi, %rbx
845
+ movq 16(%rbp), %rsi
846
+ cmovzq %rsi, %rcx
847
+ movq 24(%rbp), %rsi
848
+ cmovzq %rsi, %rdx
849
+ movq 32(%rbp), %rsi
850
+ cmovzq %rsi, %r8
851
+ movq 40(%rbp), %rsi
852
+ cmovzq %rsi, %r9
853
+ movq 48(%rbp), %rsi
854
+ cmovzq %rsi, %r10
855
+ movq 56(%rbp), %rsi
856
+ cmovzq %rsi, %r11
857
+ movq 64(%rbp), %rsi
858
+ cmovzq %rsi, %r12
859
+ movq 72(%rbp), %rsi
860
+ cmovzq %rsi, %r13
861
+ movq 80(%rbp), %rsi
862
+ cmovzq %rsi, %r14
863
+ movq 88(%rbp), %rsi
864
+ cmovzq %rsi, %r15
865
+ addq $96, %rbp
866
+
867
+ cmpq $4, bf
868
+ movq (%rbp), %rsi
869
+ cmovzq %rsi, %rax
870
+ movq 8(%rbp), %rsi
871
+ cmovzq %rsi, %rbx
872
+ movq 16(%rbp), %rsi
873
+ cmovzq %rsi, %rcx
874
+ movq 24(%rbp), %rsi
875
+ cmovzq %rsi, %rdx
876
+ movq 32(%rbp), %rsi
877
+ cmovzq %rsi, %r8
878
+ movq 40(%rbp), %rsi
879
+ cmovzq %rsi, %r9
880
+ movq 48(%rbp), %rsi
881
+ cmovzq %rsi, %r10
882
+ movq 56(%rbp), %rsi
883
+ cmovzq %rsi, %r11
884
+ movq 64(%rbp), %rsi
885
+ cmovzq %rsi, %r12
886
+ movq 72(%rbp), %rsi
887
+ cmovzq %rsi, %r13
888
+ movq 80(%rbp), %rsi
889
+ cmovzq %rsi, %r14
890
+ movq 88(%rbp), %rsi
891
+ cmovzq %rsi, %r15
892
+ addq $96, %rbp
893
+
894
+ cmpq $5, bf
895
+ movq (%rbp), %rsi
896
+ cmovzq %rsi, %rax
897
+ movq 8(%rbp), %rsi
898
+ cmovzq %rsi, %rbx
899
+ movq 16(%rbp), %rsi
900
+ cmovzq %rsi, %rcx
901
+ movq 24(%rbp), %rsi
902
+ cmovzq %rsi, %rdx
903
+ movq 32(%rbp), %rsi
904
+ cmovzq %rsi, %r8
905
+ movq 40(%rbp), %rsi
906
+ cmovzq %rsi, %r9
907
+ movq 48(%rbp), %rsi
908
+ cmovzq %rsi, %r10
909
+ movq 56(%rbp), %rsi
910
+ cmovzq %rsi, %r11
911
+ movq 64(%rbp), %rsi
912
+ cmovzq %rsi, %r12
913
+ movq 72(%rbp), %rsi
914
+ cmovzq %rsi, %r13
915
+ movq 80(%rbp), %rsi
916
+ cmovzq %rsi, %r14
917
+ movq 88(%rbp), %rsi
918
+ cmovzq %rsi, %r15
919
+ addq $96, %rbp
920
+
921
+ cmpq $6, bf
922
+ movq (%rbp), %rsi
923
+ cmovzq %rsi, %rax
924
+ movq 8(%rbp), %rsi
925
+ cmovzq %rsi, %rbx
926
+ movq 16(%rbp), %rsi
927
+ cmovzq %rsi, %rcx
928
+ movq 24(%rbp), %rsi
929
+ cmovzq %rsi, %rdx
930
+ movq 32(%rbp), %rsi
931
+ cmovzq %rsi, %r8
932
+ movq 40(%rbp), %rsi
933
+ cmovzq %rsi, %r9
934
+ movq 48(%rbp), %rsi
935
+ cmovzq %rsi, %r10
936
+ movq 56(%rbp), %rsi
937
+ cmovzq %rsi, %r11
938
+ movq 64(%rbp), %rsi
939
+ cmovzq %rsi, %r12
940
+ movq 72(%rbp), %rsi
941
+ cmovzq %rsi, %r13
942
+ movq 80(%rbp), %rsi
943
+ cmovzq %rsi, %r14
944
+ movq 88(%rbp), %rsi
945
+ cmovzq %rsi, %r15
946
+ addq $96, %rbp
947
+
948
+ cmpq $7, bf
949
+ movq (%rbp), %rsi
950
+ cmovzq %rsi, %rax
951
+ movq 8(%rbp), %rsi
952
+ cmovzq %rsi, %rbx
953
+ movq 16(%rbp), %rsi
954
+ cmovzq %rsi, %rcx
955
+ movq 24(%rbp), %rsi
956
+ cmovzq %rsi, %rdx
957
+ movq 32(%rbp), %rsi
958
+ cmovzq %rsi, %r8
959
+ movq 40(%rbp), %rsi
960
+ cmovzq %rsi, %r9
961
+ movq 48(%rbp), %rsi
962
+ cmovzq %rsi, %r10
963
+ movq 56(%rbp), %rsi
964
+ cmovzq %rsi, %r11
965
+ movq 64(%rbp), %rsi
966
+ cmovzq %rsi, %r12
967
+ movq 72(%rbp), %rsi
968
+ cmovzq %rsi, %r13
969
+ movq 80(%rbp), %rsi
970
+ cmovzq %rsi, %r14
971
+ movq 88(%rbp), %rsi
972
+ cmovzq %rsi, %r15
973
+ addq $96, %rbp
974
+
975
+ cmpq $8, bf
976
+ movq (%rbp), %rsi
977
+ cmovzq %rsi, %rax
978
+ movq 8(%rbp), %rsi
979
+ cmovzq %rsi, %rbx
980
+ movq 16(%rbp), %rsi
981
+ cmovzq %rsi, %rcx
982
+ movq 24(%rbp), %rsi
983
+ cmovzq %rsi, %rdx
984
+ movq 32(%rbp), %rsi
985
+ cmovzq %rsi, %r8
986
+ movq 40(%rbp), %rsi
987
+ cmovzq %rsi, %r9
988
+ movq 48(%rbp), %rsi
989
+ cmovzq %rsi, %r10
990
+ movq 56(%rbp), %rsi
991
+ cmovzq %rsi, %r11
992
+ movq 64(%rbp), %rsi
993
+ cmovzq %rsi, %r12
994
+ movq 72(%rbp), %rsi
995
+ cmovzq %rsi, %r13
996
+ movq 80(%rbp), %rsi
997
+ cmovzq %rsi, %r14
998
+ movq 88(%rbp), %rsi
999
+ cmovzq %rsi, %r15
1000
+
1001
+ movq %rax, BTABENT(%rsp)
1002
+ movq %rbx, BTABENT+8(%rsp)
1003
+ movq %rcx, BTABENT+16(%rsp)
1004
+ movq %rdx, BTABENT+24(%rsp)
1005
+ movq %r8, BTABENT+32(%rsp)
1006
+ movq %r9, BTABENT+40(%rsp)
1007
+ movq %r10, BTABENT+48(%rsp)
1008
+ movq %r11, BTABENT+56(%rsp)
1009
+ movq %r12, BTABENT+64(%rsp)
1010
+ movq %r13, BTABENT+72(%rsp)
1011
+ movq %r14, BTABENT+80(%rsp)
1012
+ movq %r15, BTABENT+88(%rsp)
1013
+
1014
+ // Index for table entry...
1015
+
1016
+ movq SCALAR+24(%rsp), %rax
1017
+ shrq $60, %rax
1018
+ movq %rax, bf
1019
+
1020
+ // ...and constant-time indexing based on that index.
1021
+ // Do the Y and Z fields first, to save on registers...
1022
+
1023
+ movl $1, %eax
1024
+ xorl %ebx, %ebx
1025
+ xorl %ecx, %ecx
1026
+ xorl %edx, %edx
1027
+ movl $1, %r8d
1028
+ xorl %r9d, %r9d
1029
+ xorl %r10d, %r10d
1030
+ xorl %r11d, %r11d
1031
+
1032
+ leaq TAB+32(%rsp), %rbp
1033
+
1034
+ cmpq $1, bf
1035
+ movq (%rbp), %rsi
1036
+ cmovzq %rsi, %rax
1037
+ movq 8(%rbp), %rsi
1038
+ cmovzq %rsi, %rbx
1039
+ movq 16(%rbp), %rsi
1040
+ cmovzq %rsi, %rcx
1041
+ movq 24(%rbp), %rsi
1042
+ cmovzq %rsi, %rdx
1043
+ movq 32(%rbp), %rsi
1044
+ cmovzq %rsi, %r8
1045
+ movq 40(%rbp), %rsi
1046
+ cmovzq %rsi, %r9
1047
+ movq 48(%rbp), %rsi
1048
+ cmovzq %rsi, %r10
1049
+ movq 56(%rbp), %rsi
1050
+ cmovzq %rsi, %r11
1051
+ addq $128, %rbp
1052
+
1053
+ cmpq $2, bf
1054
+ movq (%rbp), %rsi
1055
+ cmovzq %rsi, %rax
1056
+ movq 8(%rbp), %rsi
1057
+ cmovzq %rsi, %rbx
1058
+ movq 16(%rbp), %rsi
1059
+ cmovzq %rsi, %rcx
1060
+ movq 24(%rbp), %rsi
1061
+ cmovzq %rsi, %rdx
1062
+ movq 32(%rbp), %rsi
1063
+ cmovzq %rsi, %r8
1064
+ movq 40(%rbp), %rsi
1065
+ cmovzq %rsi, %r9
1066
+ movq 48(%rbp), %rsi
1067
+ cmovzq %rsi, %r10
1068
+ movq 56(%rbp), %rsi
1069
+ cmovzq %rsi, %r11
1070
+ addq $128, %rbp
1071
+
1072
+ cmpq $3, bf
1073
+ movq (%rbp), %rsi
1074
+ cmovzq %rsi, %rax
1075
+ movq 8(%rbp), %rsi
1076
+ cmovzq %rsi, %rbx
1077
+ movq 16(%rbp), %rsi
1078
+ cmovzq %rsi, %rcx
1079
+ movq 24(%rbp), %rsi
1080
+ cmovzq %rsi, %rdx
1081
+ movq 32(%rbp), %rsi
1082
+ cmovzq %rsi, %r8
1083
+ movq 40(%rbp), %rsi
1084
+ cmovzq %rsi, %r9
1085
+ movq 48(%rbp), %rsi
1086
+ cmovzq %rsi, %r10
1087
+ movq 56(%rbp), %rsi
1088
+ cmovzq %rsi, %r11
1089
+ addq $128, %rbp
1090
+
1091
+ cmpq $4, bf
1092
+ movq (%rbp), %rsi
1093
+ cmovzq %rsi, %rax
1094
+ movq 8(%rbp), %rsi
1095
+ cmovzq %rsi, %rbx
1096
+ movq 16(%rbp), %rsi
1097
+ cmovzq %rsi, %rcx
1098
+ movq 24(%rbp), %rsi
1099
+ cmovzq %rsi, %rdx
1100
+ movq 32(%rbp), %rsi
1101
+ cmovzq %rsi, %r8
1102
+ movq 40(%rbp), %rsi
1103
+ cmovzq %rsi, %r9
1104
+ movq 48(%rbp), %rsi
1105
+ cmovzq %rsi, %r10
1106
+ movq 56(%rbp), %rsi
1107
+ cmovzq %rsi, %r11
1108
+ addq $128, %rbp
1109
+
1110
+ cmpq $5, bf
1111
+ movq (%rbp), %rsi
1112
+ cmovzq %rsi, %rax
1113
+ movq 8(%rbp), %rsi
1114
+ cmovzq %rsi, %rbx
1115
+ movq 16(%rbp), %rsi
1116
+ cmovzq %rsi, %rcx
1117
+ movq 24(%rbp), %rsi
1118
+ cmovzq %rsi, %rdx
1119
+ movq 32(%rbp), %rsi
1120
+ cmovzq %rsi, %r8
1121
+ movq 40(%rbp), %rsi
1122
+ cmovzq %rsi, %r9
1123
+ movq 48(%rbp), %rsi
1124
+ cmovzq %rsi, %r10
1125
+ movq 56(%rbp), %rsi
1126
+ cmovzq %rsi, %r11
1127
+ addq $128, %rbp
1128
+
1129
+ cmpq $6, bf
1130
+ movq (%rbp), %rsi
1131
+ cmovzq %rsi, %rax
1132
+ movq 8(%rbp), %rsi
1133
+ cmovzq %rsi, %rbx
1134
+ movq 16(%rbp), %rsi
1135
+ cmovzq %rsi, %rcx
1136
+ movq 24(%rbp), %rsi
1137
+ cmovzq %rsi, %rdx
1138
+ movq 32(%rbp), %rsi
1139
+ cmovzq %rsi, %r8
1140
+ movq 40(%rbp), %rsi
1141
+ cmovzq %rsi, %r9
1142
+ movq 48(%rbp), %rsi
1143
+ cmovzq %rsi, %r10
1144
+ movq 56(%rbp), %rsi
1145
+ cmovzq %rsi, %r11
1146
+ addq $128, %rbp
1147
+
1148
+ cmpq $7, bf
1149
+ movq (%rbp), %rsi
1150
+ cmovzq %rsi, %rax
1151
+ movq 8(%rbp), %rsi
1152
+ cmovzq %rsi, %rbx
1153
+ movq 16(%rbp), %rsi
1154
+ cmovzq %rsi, %rcx
1155
+ movq 24(%rbp), %rsi
1156
+ cmovzq %rsi, %rdx
1157
+ movq 32(%rbp), %rsi
1158
+ cmovzq %rsi, %r8
1159
+ movq 40(%rbp), %rsi
1160
+ cmovzq %rsi, %r9
1161
+ movq 48(%rbp), %rsi
1162
+ cmovzq %rsi, %r10
1163
+ movq 56(%rbp), %rsi
1164
+ cmovzq %rsi, %r11
1165
+ addq $128, %rbp
1166
+
1167
+ cmpq $8, bf
1168
+ movq (%rbp), %rsi
1169
+ cmovzq %rsi, %rax
1170
+ movq 8(%rbp), %rsi
1171
+ cmovzq %rsi, %rbx
1172
+ movq 16(%rbp), %rsi
1173
+ cmovzq %rsi, %rcx
1174
+ movq 24(%rbp), %rsi
1175
+ cmovzq %rsi, %rdx
1176
+ movq 32(%rbp), %rsi
1177
+ cmovzq %rsi, %r8
1178
+ movq 40(%rbp), %rsi
1179
+ cmovzq %rsi, %r9
1180
+ movq 48(%rbp), %rsi
1181
+ cmovzq %rsi, %r10
1182
+ movq 56(%rbp), %rsi
1183
+ cmovzq %rsi, %r11
1184
+
1185
+ movq %rax, TABENT+32(%rsp)
1186
+ movq %rbx, TABENT+40(%rsp)
1187
+ movq %rcx, TABENT+48(%rsp)
1188
+ movq %rdx, TABENT+56(%rsp)
1189
+ movq %r8, TABENT+64(%rsp)
1190
+ movq %r9, TABENT+72(%rsp)
1191
+ movq %r10, TABENT+80(%rsp)
1192
+ movq %r11, TABENT+88(%rsp)
1193
+
1194
+ // ...followed by the X and W fields
1195
+
1196
+ leaq TAB(%rsp), %rbp
1197
+
1198
+ xorl %eax, %eax
1199
+ xorl %ebx, %ebx
1200
+ xorl %ecx, %ecx
1201
+ xorl %edx, %edx
1202
+ xorl %r8d, %r8d
1203
+ xorl %r9d, %r9d
1204
+ xorl %r10d, %r10d
1205
+ xorl %r11d, %r11d
1206
+
1207
+ cmpq $1, bf
1208
+ movq (%rbp), %rsi
1209
+ cmovzq %rsi, %rax
1210
+ movq 8(%rbp), %rsi
1211
+ cmovzq %rsi, %rbx
1212
+ movq 16(%rbp), %rsi
1213
+ cmovzq %rsi, %rcx
1214
+ movq 24(%rbp), %rsi
1215
+ cmovzq %rsi, %rdx
1216
+ movq 96(%rbp), %rsi
1217
+ cmovzq %rsi, %r8
1218
+ movq 104(%rbp), %rsi
1219
+ cmovzq %rsi, %r9
1220
+ movq 112(%rbp), %rsi
1221
+ cmovzq %rsi, %r10
1222
+ movq 120(%rbp), %rsi
1223
+ cmovzq %rsi, %r11
1224
+ addq $128, %rbp
1225
+
1226
+ cmpq $2, bf
1227
+ movq (%rbp), %rsi
1228
+ cmovzq %rsi, %rax
1229
+ movq 8(%rbp), %rsi
1230
+ cmovzq %rsi, %rbx
1231
+ movq 16(%rbp), %rsi
1232
+ cmovzq %rsi, %rcx
1233
+ movq 24(%rbp), %rsi
1234
+ cmovzq %rsi, %rdx
1235
+ movq 96(%rbp), %rsi
1236
+ cmovzq %rsi, %r8
1237
+ movq 104(%rbp), %rsi
1238
+ cmovzq %rsi, %r9
1239
+ movq 112(%rbp), %rsi
1240
+ cmovzq %rsi, %r10
1241
+ movq 120(%rbp), %rsi
1242
+ cmovzq %rsi, %r11
1243
+ addq $128, %rbp
1244
+
1245
+ cmpq $3, bf
1246
+ movq (%rbp), %rsi
1247
+ cmovzq %rsi, %rax
1248
+ movq 8(%rbp), %rsi
1249
+ cmovzq %rsi, %rbx
1250
+ movq 16(%rbp), %rsi
1251
+ cmovzq %rsi, %rcx
1252
+ movq 24(%rbp), %rsi
1253
+ cmovzq %rsi, %rdx
1254
+ movq 96(%rbp), %rsi
1255
+ cmovzq %rsi, %r8
1256
+ movq 104(%rbp), %rsi
1257
+ cmovzq %rsi, %r9
1258
+ movq 112(%rbp), %rsi
1259
+ cmovzq %rsi, %r10
1260
+ movq 120(%rbp), %rsi
1261
+ cmovzq %rsi, %r11
1262
+ addq $128, %rbp
1263
+
1264
+ cmpq $4, bf
1265
+ movq (%rbp), %rsi
1266
+ cmovzq %rsi, %rax
1267
+ movq 8(%rbp), %rsi
1268
+ cmovzq %rsi, %rbx
1269
+ movq 16(%rbp), %rsi
1270
+ cmovzq %rsi, %rcx
1271
+ movq 24(%rbp), %rsi
1272
+ cmovzq %rsi, %rdx
1273
+ movq 96(%rbp), %rsi
1274
+ cmovzq %rsi, %r8
1275
+ movq 104(%rbp), %rsi
1276
+ cmovzq %rsi, %r9
1277
+ movq 112(%rbp), %rsi
1278
+ cmovzq %rsi, %r10
1279
+ movq 120(%rbp), %rsi
1280
+ cmovzq %rsi, %r11
1281
+ addq $128, %rbp
1282
+
1283
+ cmpq $5, bf
1284
+ movq (%rbp), %rsi
1285
+ cmovzq %rsi, %rax
1286
+ movq 8(%rbp), %rsi
1287
+ cmovzq %rsi, %rbx
1288
+ movq 16(%rbp), %rsi
1289
+ cmovzq %rsi, %rcx
1290
+ movq 24(%rbp), %rsi
1291
+ cmovzq %rsi, %rdx
1292
+ movq 96(%rbp), %rsi
1293
+ cmovzq %rsi, %r8
1294
+ movq 104(%rbp), %rsi
1295
+ cmovzq %rsi, %r9
1296
+ movq 112(%rbp), %rsi
1297
+ cmovzq %rsi, %r10
1298
+ movq 120(%rbp), %rsi
1299
+ cmovzq %rsi, %r11
1300
+ addq $128, %rbp
1301
+
1302
+ cmpq $6, bf
1303
+ movq (%rbp), %rsi
1304
+ cmovzq %rsi, %rax
1305
+ movq 8(%rbp), %rsi
1306
+ cmovzq %rsi, %rbx
1307
+ movq 16(%rbp), %rsi
1308
+ cmovzq %rsi, %rcx
1309
+ movq 24(%rbp), %rsi
1310
+ cmovzq %rsi, %rdx
1311
+ movq 96(%rbp), %rsi
1312
+ cmovzq %rsi, %r8
1313
+ movq 104(%rbp), %rsi
1314
+ cmovzq %rsi, %r9
1315
+ movq 112(%rbp), %rsi
1316
+ cmovzq %rsi, %r10
1317
+ movq 120(%rbp), %rsi
1318
+ cmovzq %rsi, %r11
1319
+ addq $128, %rbp
1320
+
1321
+ cmpq $7, bf
1322
+ movq (%rbp), %rsi
1323
+ cmovzq %rsi, %rax
1324
+ movq 8(%rbp), %rsi
1325
+ cmovzq %rsi, %rbx
1326
+ movq 16(%rbp), %rsi
1327
+ cmovzq %rsi, %rcx
1328
+ movq 24(%rbp), %rsi
1329
+ cmovzq %rsi, %rdx
1330
+ movq 96(%rbp), %rsi
1331
+ cmovzq %rsi, %r8
1332
+ movq 104(%rbp), %rsi
1333
+ cmovzq %rsi, %r9
1334
+ movq 112(%rbp), %rsi
1335
+ cmovzq %rsi, %r10
1336
+ movq 120(%rbp), %rsi
1337
+ cmovzq %rsi, %r11
1338
+ addq $128, %rbp
1339
+
1340
+ cmpq $8, bf
1341
+ movq (%rbp), %rsi
1342
+ cmovzq %rsi, %rax
1343
+ movq 8(%rbp), %rsi
1344
+ cmovzq %rsi, %rbx
1345
+ movq 16(%rbp), %rsi
1346
+ cmovzq %rsi, %rcx
1347
+ movq 24(%rbp), %rsi
1348
+ cmovzq %rsi, %rdx
1349
+ movq 96(%rbp), %rsi
1350
+ cmovzq %rsi, %r8
1351
+ movq 104(%rbp), %rsi
1352
+ cmovzq %rsi, %r9
1353
+ movq 112(%rbp), %rsi
1354
+ cmovzq %rsi, %r10
1355
+ movq 120(%rbp), %rsi
1356
+ cmovzq %rsi, %r11
1357
+
1358
+ movq %rax, TABENT(%rsp)
1359
+ movq %rbx, TABENT+8(%rsp)
1360
+ movq %rcx, TABENT+16(%rsp)
1361
+ movq %rdx, TABENT+24(%rsp)
1362
+ movq %r8, TABENT+96(%rsp)
1363
+ movq %r9, TABENT+104(%rsp)
1364
+ movq %r10, TABENT+112(%rsp)
1365
+ movq %r11, TABENT+120(%rsp)
1366
+
1367
+ // Add those elements to initialize the accumulator for bit position 252
1368
+
1369
+ leaq ACC(%rsp), %rdi
1370
+ leaq TABENT(%rsp), %rsi
1371
+ leaq BTABENT(%rsp), %rbp
1372
+ callq edwards25519_scalarmuldouble_alt_pepadd
1373
+
1374
+ // Main loop with acc = [scalar/2^i] * point + [bscalar/2^i] * basepoint
1375
+ // Start with i = 252 for bits 248..251 and go down four at a time to 3..0
1376
+
1377
+ edwards25519_scalarmuldouble_alt_loop:
1378
+
1379
+ movq i, %rax
1380
+ subq $4, %rax
1381
+ movq %rax, i
1382
+
1383
+ // Double to acc' = 2 * acc
1384
+
1385
+ leaq ACC(%rsp), %rdi
1386
+ leaq ACC(%rsp), %rsi
1387
+ callq edwards25519_scalarmuldouble_alt_pdouble
1388
+
1389
+ // Get btable entry, first getting the adjusted bitfield...
1390
+
1391
+ movq i, %rax
1392
+ movq %rax, %rcx
1393
+ shrq $6, %rax
1394
+ movq 32(%rsp,%rax,8), %rax
1395
+ shrq %cl, %rax
1396
+ andq $15, %rax
1397
+
1398
+ subq $8, %rax
1399
+ sbbq %rcx, %rcx
1400
+ xorq %rcx, %rax
1401
+ subq %rcx, %rax
1402
+ movq %rcx, cf
1403
+ movq %rax, bf
1404
+
1405
+ // ... then doing constant-time lookup with the appropriate index...
1406
+
1407
+ movl $1, %eax
1408
+ xorl %ebx, %ebx
1409
+ xorl %ecx, %ecx
1410
+ xorl %edx, %edx
1411
+ movl $1, %r8d
1412
+ xorl %r9d, %r9d
1413
+ xorl %r10d, %r10d
1414
+ xorl %r11d, %r11d
1415
+ xorl %r12d, %r12d
1416
+ xorl %r13d, %r13d
1417
+ xorl %r14d, %r14d
1418
+ xorl %r15d, %r15d
1419
+
1420
+ leaq edwards25519_scalarmuldouble_alt_table(%rip), %rbp
1421
+
1422
+ cmpq $1, bf
1423
+ movq (%rbp), %rsi
1424
+ cmovzq %rsi, %rax
1425
+ movq 8(%rbp), %rsi
1426
+ cmovzq %rsi, %rbx
1427
+ movq 16(%rbp), %rsi
1428
+ cmovzq %rsi, %rcx
1429
+ movq 24(%rbp), %rsi
1430
+ cmovzq %rsi, %rdx
1431
+ movq 32(%rbp), %rsi
1432
+ cmovzq %rsi, %r8
1433
+ movq 40(%rbp), %rsi
1434
+ cmovzq %rsi, %r9
1435
+ movq 48(%rbp), %rsi
1436
+ cmovzq %rsi, %r10
1437
+ movq 56(%rbp), %rsi
1438
+ cmovzq %rsi, %r11
1439
+ movq 64(%rbp), %rsi
1440
+ cmovzq %rsi, %r12
1441
+ movq 72(%rbp), %rsi
1442
+ cmovzq %rsi, %r13
1443
+ movq 80(%rbp), %rsi
1444
+ cmovzq %rsi, %r14
1445
+ movq 88(%rbp), %rsi
1446
+ cmovzq %rsi, %r15
1447
+ addq $96, %rbp
1448
+
1449
+ cmpq $2, bf
1450
+ movq (%rbp), %rsi
1451
+ cmovzq %rsi, %rax
1452
+ movq 8(%rbp), %rsi
1453
+ cmovzq %rsi, %rbx
1454
+ movq 16(%rbp), %rsi
1455
+ cmovzq %rsi, %rcx
1456
+ movq 24(%rbp), %rsi
1457
+ cmovzq %rsi, %rdx
1458
+ movq 32(%rbp), %rsi
1459
+ cmovzq %rsi, %r8
1460
+ movq 40(%rbp), %rsi
1461
+ cmovzq %rsi, %r9
1462
+ movq 48(%rbp), %rsi
1463
+ cmovzq %rsi, %r10
1464
+ movq 56(%rbp), %rsi
1465
+ cmovzq %rsi, %r11
1466
+ movq 64(%rbp), %rsi
1467
+ cmovzq %rsi, %r12
1468
+ movq 72(%rbp), %rsi
1469
+ cmovzq %rsi, %r13
1470
+ movq 80(%rbp), %rsi
1471
+ cmovzq %rsi, %r14
1472
+ movq 88(%rbp), %rsi
1473
+ cmovzq %rsi, %r15
1474
+ addq $96, %rbp
1475
+
1476
+ cmpq $3, bf
1477
+ movq (%rbp), %rsi
1478
+ cmovzq %rsi, %rax
1479
+ movq 8(%rbp), %rsi
1480
+ cmovzq %rsi, %rbx
1481
+ movq 16(%rbp), %rsi
1482
+ cmovzq %rsi, %rcx
1483
+ movq 24(%rbp), %rsi
1484
+ cmovzq %rsi, %rdx
1485
+ movq 32(%rbp), %rsi
1486
+ cmovzq %rsi, %r8
1487
+ movq 40(%rbp), %rsi
1488
+ cmovzq %rsi, %r9
1489
+ movq 48(%rbp), %rsi
1490
+ cmovzq %rsi, %r10
1491
+ movq 56(%rbp), %rsi
1492
+ cmovzq %rsi, %r11
1493
+ movq 64(%rbp), %rsi
1494
+ cmovzq %rsi, %r12
1495
+ movq 72(%rbp), %rsi
1496
+ cmovzq %rsi, %r13
1497
+ movq 80(%rbp), %rsi
1498
+ cmovzq %rsi, %r14
1499
+ movq 88(%rbp), %rsi
1500
+ cmovzq %rsi, %r15
1501
+ addq $96, %rbp
1502
+
1503
+ cmpq $4, bf
1504
+ movq (%rbp), %rsi
1505
+ cmovzq %rsi, %rax
1506
+ movq 8(%rbp), %rsi
1507
+ cmovzq %rsi, %rbx
1508
+ movq 16(%rbp), %rsi
1509
+ cmovzq %rsi, %rcx
1510
+ movq 24(%rbp), %rsi
1511
+ cmovzq %rsi, %rdx
1512
+ movq 32(%rbp), %rsi
1513
+ cmovzq %rsi, %r8
1514
+ movq 40(%rbp), %rsi
1515
+ cmovzq %rsi, %r9
1516
+ movq 48(%rbp), %rsi
1517
+ cmovzq %rsi, %r10
1518
+ movq 56(%rbp), %rsi
1519
+ cmovzq %rsi, %r11
1520
+ movq 64(%rbp), %rsi
1521
+ cmovzq %rsi, %r12
1522
+ movq 72(%rbp), %rsi
1523
+ cmovzq %rsi, %r13
1524
+ movq 80(%rbp), %rsi
1525
+ cmovzq %rsi, %r14
1526
+ movq 88(%rbp), %rsi
1527
+ cmovzq %rsi, %r15
1528
+ addq $96, %rbp
1529
+
1530
+ cmpq $5, bf
1531
+ movq (%rbp), %rsi
1532
+ cmovzq %rsi, %rax
1533
+ movq 8(%rbp), %rsi
1534
+ cmovzq %rsi, %rbx
1535
+ movq 16(%rbp), %rsi
1536
+ cmovzq %rsi, %rcx
1537
+ movq 24(%rbp), %rsi
1538
+ cmovzq %rsi, %rdx
1539
+ movq 32(%rbp), %rsi
1540
+ cmovzq %rsi, %r8
1541
+ movq 40(%rbp), %rsi
1542
+ cmovzq %rsi, %r9
1543
+ movq 48(%rbp), %rsi
1544
+ cmovzq %rsi, %r10
1545
+ movq 56(%rbp), %rsi
1546
+ cmovzq %rsi, %r11
1547
+ movq 64(%rbp), %rsi
1548
+ cmovzq %rsi, %r12
1549
+ movq 72(%rbp), %rsi
1550
+ cmovzq %rsi, %r13
1551
+ movq 80(%rbp), %rsi
1552
+ cmovzq %rsi, %r14
1553
+ movq 88(%rbp), %rsi
1554
+ cmovzq %rsi, %r15
1555
+ addq $96, %rbp
1556
+
1557
+ cmpq $6, bf
1558
+ movq (%rbp), %rsi
1559
+ cmovzq %rsi, %rax
1560
+ movq 8(%rbp), %rsi
1561
+ cmovzq %rsi, %rbx
1562
+ movq 16(%rbp), %rsi
1563
+ cmovzq %rsi, %rcx
1564
+ movq 24(%rbp), %rsi
1565
+ cmovzq %rsi, %rdx
1566
+ movq 32(%rbp), %rsi
1567
+ cmovzq %rsi, %r8
1568
+ movq 40(%rbp), %rsi
1569
+ cmovzq %rsi, %r9
1570
+ movq 48(%rbp), %rsi
1571
+ cmovzq %rsi, %r10
1572
+ movq 56(%rbp), %rsi
1573
+ cmovzq %rsi, %r11
1574
+ movq 64(%rbp), %rsi
1575
+ cmovzq %rsi, %r12
1576
+ movq 72(%rbp), %rsi
1577
+ cmovzq %rsi, %r13
1578
+ movq 80(%rbp), %rsi
1579
+ cmovzq %rsi, %r14
1580
+ movq 88(%rbp), %rsi
1581
+ cmovzq %rsi, %r15
1582
+ addq $96, %rbp
1583
+
1584
+ cmpq $7, bf
1585
+ movq (%rbp), %rsi
1586
+ cmovzq %rsi, %rax
1587
+ movq 8(%rbp), %rsi
1588
+ cmovzq %rsi, %rbx
1589
+ movq 16(%rbp), %rsi
1590
+ cmovzq %rsi, %rcx
1591
+ movq 24(%rbp), %rsi
1592
+ cmovzq %rsi, %rdx
1593
+ movq 32(%rbp), %rsi
1594
+ cmovzq %rsi, %r8
1595
+ movq 40(%rbp), %rsi
1596
+ cmovzq %rsi, %r9
1597
+ movq 48(%rbp), %rsi
1598
+ cmovzq %rsi, %r10
1599
+ movq 56(%rbp), %rsi
1600
+ cmovzq %rsi, %r11
1601
+ movq 64(%rbp), %rsi
1602
+ cmovzq %rsi, %r12
1603
+ movq 72(%rbp), %rsi
1604
+ cmovzq %rsi, %r13
1605
+ movq 80(%rbp), %rsi
1606
+ cmovzq %rsi, %r14
1607
+ movq 88(%rbp), %rsi
1608
+ cmovzq %rsi, %r15
1609
+ addq $96, %rbp
1610
+
1611
+ cmpq $8, bf
1612
+ movq (%rbp), %rsi
1613
+ cmovzq %rsi, %rax
1614
+ movq 8(%rbp), %rsi
1615
+ cmovzq %rsi, %rbx
1616
+ movq 16(%rbp), %rsi
1617
+ cmovzq %rsi, %rcx
1618
+ movq 24(%rbp), %rsi
1619
+ cmovzq %rsi, %rdx
1620
+ movq 32(%rbp), %rsi
1621
+ cmovzq %rsi, %r8
1622
+ movq 40(%rbp), %rsi
1623
+ cmovzq %rsi, %r9
1624
+ movq 48(%rbp), %rsi
1625
+ cmovzq %rsi, %r10
1626
+ movq 56(%rbp), %rsi
1627
+ cmovzq %rsi, %r11
1628
+ movq 64(%rbp), %rsi
1629
+ cmovzq %rsi, %r12
1630
+ movq 72(%rbp), %rsi
1631
+ cmovzq %rsi, %r13
1632
+ movq 80(%rbp), %rsi
1633
+ cmovzq %rsi, %r14
1634
+ movq 88(%rbp), %rsi
1635
+ cmovzq %rsi, %r15
1636
+
1637
+ // ... then optionally negating before storing. The table entry
1638
+ // is in precomputed form and we currently have
1639
+ //
1640
+ // [%rdx;%rcx;%rbx;%rax] = y - x
1641
+ // [%r11;%r10;%r9;%r8] = x + y
1642
+ // [%r15;%r14;%r13;%r12] = 2 * d * x * y
1643
+ //
1644
+ // Negation for Edwards curves is -(x,y) = (-x,y), which in this modified
1645
+ // form amounts to swapping the first two fields and negating the third.
1646
+ // The negation does not always fully reduce even mod 2^256-38 in the zero
1647
+ // case, instead giving -0 = 2^256-38. But that is fine since the result is
1648
+ // always fed to a multiplication inside the "pepadd" function below that
1649
+ // handles any 256-bit input.
1650
+
1651
+ movq cf, %rdi
1652
+ testq %rdi, %rdi
1653
+
1654
+ movq %rax, %rsi
1655
+ cmovnzq %r8, %rsi
1656
+ cmovnzq %rax, %r8
1657
+ movq %rsi, BTABENT(%rsp)
1658
+ movq %r8, BTABENT+32(%rsp)
1659
+
1660
+ movq %rbx, %rsi
1661
+ cmovnzq %r9, %rsi
1662
+ cmovnzq %rbx, %r9
1663
+ movq %rsi, BTABENT+8(%rsp)
1664
+ movq %r9, BTABENT+40(%rsp)
1665
+
1666
+ movq %rcx, %rsi
1667
+ cmovnzq %r10, %rsi
1668
+ cmovnzq %rcx, %r10
1669
+ movq %rsi, BTABENT+16(%rsp)
1670
+ movq %r10, BTABENT+48(%rsp)
1671
+
1672
+ movq %rdx, %rsi
1673
+ cmovnzq %r11, %rsi
1674
+ cmovnzq %rdx, %r11
1675
+ movq %rsi, BTABENT+24(%rsp)
1676
+ movq %r11, BTABENT+56(%rsp)
1677
+
1678
+ xorq %rdi, %r12
1679
+ xorq %rdi, %r13
1680
+ xorq %rdi, %r14
1681
+ xorq %rdi, %r15
1682
+ andq $37, %rdi
1683
+ subq %rdi, %r12
1684
+ sbbq $0, %r13
1685
+ sbbq $0, %r14
1686
+ sbbq $0, %r15
1687
+ movq %r12, BTABENT+64(%rsp)
1688
+ movq %r13, BTABENT+72(%rsp)
1689
+ movq %r14, BTABENT+80(%rsp)
1690
+ movq %r15, BTABENT+88(%rsp)
1691
+
1692
+ // Get table entry, first getting the adjusted bitfield...
1693
+
1694
+ movq i, %rax
1695
+ movq %rax, %rcx
1696
+ shrq $6, %rax
1697
+ movq (%rsp,%rax,8), %rax
1698
+ shrq %cl, %rax
1699
+ andq $15, %rax
1700
+
1701
+ subq $8, %rax
1702
+ sbbq %rcx, %rcx
1703
+ xorq %rcx, %rax
1704
+ subq %rcx, %rax
1705
+ movq %rcx, cf
1706
+ movq %rax, bf
1707
+
1708
+ // ...and constant-time indexing based on that index
1709
+ // Do the Y and Z fields first, to save on registers
1710
+ // and store them back (they don't need any modification)
1711
+
1712
+ movl $1, %eax
1713
+ xorl %ebx, %ebx
1714
+ xorl %ecx, %ecx
1715
+ xorl %edx, %edx
1716
+ movl $1, %r8d
1717
+ xorl %r9d, %r9d
1718
+ xorl %r10d, %r10d
1719
+ xorl %r11d, %r11d
1720
+
1721
+ leaq TAB+32(%rsp), %rbp
1722
+
1723
+ cmpq $1, bf
1724
+ movq (%rbp), %rsi
1725
+ cmovzq %rsi, %rax
1726
+ movq 8(%rbp), %rsi
1727
+ cmovzq %rsi, %rbx
1728
+ movq 16(%rbp), %rsi
1729
+ cmovzq %rsi, %rcx
1730
+ movq 24(%rbp), %rsi
1731
+ cmovzq %rsi, %rdx
1732
+ movq 32(%rbp), %rsi
1733
+ cmovzq %rsi, %r8
1734
+ movq 40(%rbp), %rsi
1735
+ cmovzq %rsi, %r9
1736
+ movq 48(%rbp), %rsi
1737
+ cmovzq %rsi, %r10
1738
+ movq 56(%rbp), %rsi
1739
+ cmovzq %rsi, %r11
1740
+ addq $128, %rbp
1741
+
1742
+ cmpq $2, bf
1743
+ movq (%rbp), %rsi
1744
+ cmovzq %rsi, %rax
1745
+ movq 8(%rbp), %rsi
1746
+ cmovzq %rsi, %rbx
1747
+ movq 16(%rbp), %rsi
1748
+ cmovzq %rsi, %rcx
1749
+ movq 24(%rbp), %rsi
1750
+ cmovzq %rsi, %rdx
1751
+ movq 32(%rbp), %rsi
1752
+ cmovzq %rsi, %r8
1753
+ movq 40(%rbp), %rsi
1754
+ cmovzq %rsi, %r9
1755
+ movq 48(%rbp), %rsi
1756
+ cmovzq %rsi, %r10
1757
+ movq 56(%rbp), %rsi
1758
+ cmovzq %rsi, %r11
1759
+ addq $128, %rbp
1760
+
1761
+ cmpq $3, bf
1762
+ movq (%rbp), %rsi
1763
+ cmovzq %rsi, %rax
1764
+ movq 8(%rbp), %rsi
1765
+ cmovzq %rsi, %rbx
1766
+ movq 16(%rbp), %rsi
1767
+ cmovzq %rsi, %rcx
1768
+ movq 24(%rbp), %rsi
1769
+ cmovzq %rsi, %rdx
1770
+ movq 32(%rbp), %rsi
1771
+ cmovzq %rsi, %r8
1772
+ movq 40(%rbp), %rsi
1773
+ cmovzq %rsi, %r9
1774
+ movq 48(%rbp), %rsi
1775
+ cmovzq %rsi, %r10
1776
+ movq 56(%rbp), %rsi
1777
+ cmovzq %rsi, %r11
1778
+ addq $128, %rbp
1779
+
1780
+ cmpq $4, bf
1781
+ movq (%rbp), %rsi
1782
+ cmovzq %rsi, %rax
1783
+ movq 8(%rbp), %rsi
1784
+ cmovzq %rsi, %rbx
1785
+ movq 16(%rbp), %rsi
1786
+ cmovzq %rsi, %rcx
1787
+ movq 24(%rbp), %rsi
1788
+ cmovzq %rsi, %rdx
1789
+ movq 32(%rbp), %rsi
1790
+ cmovzq %rsi, %r8
1791
+ movq 40(%rbp), %rsi
1792
+ cmovzq %rsi, %r9
1793
+ movq 48(%rbp), %rsi
1794
+ cmovzq %rsi, %r10
1795
+ movq 56(%rbp), %rsi
1796
+ cmovzq %rsi, %r11
1797
+ addq $128, %rbp
1798
+
1799
+ cmpq $5, bf
1800
+ movq (%rbp), %rsi
1801
+ cmovzq %rsi, %rax
1802
+ movq 8(%rbp), %rsi
1803
+ cmovzq %rsi, %rbx
1804
+ movq 16(%rbp), %rsi
1805
+ cmovzq %rsi, %rcx
1806
+ movq 24(%rbp), %rsi
1807
+ cmovzq %rsi, %rdx
1808
+ movq 32(%rbp), %rsi
1809
+ cmovzq %rsi, %r8
1810
+ movq 40(%rbp), %rsi
1811
+ cmovzq %rsi, %r9
1812
+ movq 48(%rbp), %rsi
1813
+ cmovzq %rsi, %r10
1814
+ movq 56(%rbp), %rsi
1815
+ cmovzq %rsi, %r11
1816
+ addq $128, %rbp
1817
+
1818
+ cmpq $6, bf
1819
+ movq (%rbp), %rsi
1820
+ cmovzq %rsi, %rax
1821
+ movq 8(%rbp), %rsi
1822
+ cmovzq %rsi, %rbx
1823
+ movq 16(%rbp), %rsi
1824
+ cmovzq %rsi, %rcx
1825
+ movq 24(%rbp), %rsi
1826
+ cmovzq %rsi, %rdx
1827
+ movq 32(%rbp), %rsi
1828
+ cmovzq %rsi, %r8
1829
+ movq 40(%rbp), %rsi
1830
+ cmovzq %rsi, %r9
1831
+ movq 48(%rbp), %rsi
1832
+ cmovzq %rsi, %r10
1833
+ movq 56(%rbp), %rsi
1834
+ cmovzq %rsi, %r11
1835
+ addq $128, %rbp
1836
+
1837
+ cmpq $7, bf
1838
+ movq (%rbp), %rsi
1839
+ cmovzq %rsi, %rax
1840
+ movq 8(%rbp), %rsi
1841
+ cmovzq %rsi, %rbx
1842
+ movq 16(%rbp), %rsi
1843
+ cmovzq %rsi, %rcx
1844
+ movq 24(%rbp), %rsi
1845
+ cmovzq %rsi, %rdx
1846
+ movq 32(%rbp), %rsi
1847
+ cmovzq %rsi, %r8
1848
+ movq 40(%rbp), %rsi
1849
+ cmovzq %rsi, %r9
1850
+ movq 48(%rbp), %rsi
1851
+ cmovzq %rsi, %r10
1852
+ movq 56(%rbp), %rsi
1853
+ cmovzq %rsi, %r11
1854
+ addq $128, %rbp
1855
+
1856
+ cmpq $8, bf
1857
+ movq (%rbp), %rsi
1858
+ cmovzq %rsi, %rax
1859
+ movq 8(%rbp), %rsi
1860
+ cmovzq %rsi, %rbx
1861
+ movq 16(%rbp), %rsi
1862
+ cmovzq %rsi, %rcx
1863
+ movq 24(%rbp), %rsi
1864
+ cmovzq %rsi, %rdx
1865
+ movq 32(%rbp), %rsi
1866
+ cmovzq %rsi, %r8
1867
+ movq 40(%rbp), %rsi
1868
+ cmovzq %rsi, %r9
1869
+ movq 48(%rbp), %rsi
1870
+ cmovzq %rsi, %r10
1871
+ movq 56(%rbp), %rsi
1872
+ cmovzq %rsi, %r11
1873
+
1874
+ movq %rax, TABENT+32(%rsp)
1875
+ movq %rbx, TABENT+40(%rsp)
1876
+ movq %rcx, TABENT+48(%rsp)
1877
+ movq %rdx, TABENT+56(%rsp)
1878
+ movq %r8, TABENT+64(%rsp)
1879
+ movq %r9, TABENT+72(%rsp)
1880
+ movq %r10, TABENT+80(%rsp)
1881
+ movq %r11, TABENT+88(%rsp)
1882
+
1883
+ // Now do the X and W fields...
1884
+
1885
+ leaq TAB(%rsp), %rbp
1886
+
1887
+ xorl %eax, %eax
1888
+ xorl %ebx, %ebx
1889
+ xorl %ecx, %ecx
1890
+ xorl %edx, %edx
1891
+ xorl %r8d, %r8d
1892
+ xorl %r9d, %r9d
1893
+ xorl %r10d, %r10d
1894
+ xorl %r11d, %r11d
1895
+
1896
+ cmpq $1, bf
1897
+ movq (%rbp), %rsi
1898
+ cmovzq %rsi, %rax
1899
+ movq 8(%rbp), %rsi
1900
+ cmovzq %rsi, %rbx
1901
+ movq 16(%rbp), %rsi
1902
+ cmovzq %rsi, %rcx
1903
+ movq 24(%rbp), %rsi
1904
+ cmovzq %rsi, %rdx
1905
+ movq 96(%rbp), %rsi
1906
+ cmovzq %rsi, %r8
1907
+ movq 104(%rbp), %rsi
1908
+ cmovzq %rsi, %r9
1909
+ movq 112(%rbp), %rsi
1910
+ cmovzq %rsi, %r10
1911
+ movq 120(%rbp), %rsi
1912
+ cmovzq %rsi, %r11
1913
+ addq $128, %rbp
1914
+
1915
+ cmpq $2, bf
1916
+ movq (%rbp), %rsi
1917
+ cmovzq %rsi, %rax
1918
+ movq 8(%rbp), %rsi
1919
+ cmovzq %rsi, %rbx
1920
+ movq 16(%rbp), %rsi
1921
+ cmovzq %rsi, %rcx
1922
+ movq 24(%rbp), %rsi
1923
+ cmovzq %rsi, %rdx
1924
+ movq 96(%rbp), %rsi
1925
+ cmovzq %rsi, %r8
1926
+ movq 104(%rbp), %rsi
1927
+ cmovzq %rsi, %r9
1928
+ movq 112(%rbp), %rsi
1929
+ cmovzq %rsi, %r10
1930
+ movq 120(%rbp), %rsi
1931
+ cmovzq %rsi, %r11
1932
+ addq $128, %rbp
1933
+
1934
+ cmpq $3, bf
1935
+ movq (%rbp), %rsi
1936
+ cmovzq %rsi, %rax
1937
+ movq 8(%rbp), %rsi
1938
+ cmovzq %rsi, %rbx
1939
+ movq 16(%rbp), %rsi
1940
+ cmovzq %rsi, %rcx
1941
+ movq 24(%rbp), %rsi
1942
+ cmovzq %rsi, %rdx
1943
+ movq 96(%rbp), %rsi
1944
+ cmovzq %rsi, %r8
1945
+ movq 104(%rbp), %rsi
1946
+ cmovzq %rsi, %r9
1947
+ movq 112(%rbp), %rsi
1948
+ cmovzq %rsi, %r10
1949
+ movq 120(%rbp), %rsi
1950
+ cmovzq %rsi, %r11
1951
+ addq $128, %rbp
1952
+
1953
+ cmpq $4, bf
1954
+ movq (%rbp), %rsi
1955
+ cmovzq %rsi, %rax
1956
+ movq 8(%rbp), %rsi
1957
+ cmovzq %rsi, %rbx
1958
+ movq 16(%rbp), %rsi
1959
+ cmovzq %rsi, %rcx
1960
+ movq 24(%rbp), %rsi
1961
+ cmovzq %rsi, %rdx
1962
+ movq 96(%rbp), %rsi
1963
+ cmovzq %rsi, %r8
1964
+ movq 104(%rbp), %rsi
1965
+ cmovzq %rsi, %r9
1966
+ movq 112(%rbp), %rsi
1967
+ cmovzq %rsi, %r10
1968
+ movq 120(%rbp), %rsi
1969
+ cmovzq %rsi, %r11
1970
+ addq $128, %rbp
1971
+
1972
+ cmpq $5, bf
1973
+ movq (%rbp), %rsi
1974
+ cmovzq %rsi, %rax
1975
+ movq 8(%rbp), %rsi
1976
+ cmovzq %rsi, %rbx
1977
+ movq 16(%rbp), %rsi
1978
+ cmovzq %rsi, %rcx
1979
+ movq 24(%rbp), %rsi
1980
+ cmovzq %rsi, %rdx
1981
+ movq 96(%rbp), %rsi
1982
+ cmovzq %rsi, %r8
1983
+ movq 104(%rbp), %rsi
1984
+ cmovzq %rsi, %r9
1985
+ movq 112(%rbp), %rsi
1986
+ cmovzq %rsi, %r10
1987
+ movq 120(%rbp), %rsi
1988
+ cmovzq %rsi, %r11
1989
+ addq $128, %rbp
1990
+
1991
+ cmpq $6, bf
1992
+ movq (%rbp), %rsi
1993
+ cmovzq %rsi, %rax
1994
+ movq 8(%rbp), %rsi
1995
+ cmovzq %rsi, %rbx
1996
+ movq 16(%rbp), %rsi
1997
+ cmovzq %rsi, %rcx
1998
+ movq 24(%rbp), %rsi
1999
+ cmovzq %rsi, %rdx
2000
+ movq 96(%rbp), %rsi
2001
+ cmovzq %rsi, %r8
2002
+ movq 104(%rbp), %rsi
2003
+ cmovzq %rsi, %r9
2004
+ movq 112(%rbp), %rsi
2005
+ cmovzq %rsi, %r10
2006
+ movq 120(%rbp), %rsi
2007
+ cmovzq %rsi, %r11
2008
+ addq $128, %rbp
2009
+
2010
+ cmpq $7, bf
2011
+ movq (%rbp), %rsi
2012
+ cmovzq %rsi, %rax
2013
+ movq 8(%rbp), %rsi
2014
+ cmovzq %rsi, %rbx
2015
+ movq 16(%rbp), %rsi
2016
+ cmovzq %rsi, %rcx
2017
+ movq 24(%rbp), %rsi
2018
+ cmovzq %rsi, %rdx
2019
+ movq 96(%rbp), %rsi
2020
+ cmovzq %rsi, %r8
2021
+ movq 104(%rbp), %rsi
2022
+ cmovzq %rsi, %r9
2023
+ movq 112(%rbp), %rsi
2024
+ cmovzq %rsi, %r10
2025
+ movq 120(%rbp), %rsi
2026
+ cmovzq %rsi, %r11
2027
+ addq $128, %rbp
2028
+
2029
+ cmpq $8, bf
2030
+ movq (%rbp), %rsi
2031
+ cmovzq %rsi, %rax
2032
+ movq 8(%rbp), %rsi
2033
+ cmovzq %rsi, %rbx
2034
+ movq 16(%rbp), %rsi
2035
+ cmovzq %rsi, %rcx
2036
+ movq 24(%rbp), %rsi
2037
+ cmovzq %rsi, %rdx
2038
+ movq 96(%rbp), %rsi
2039
+ cmovzq %rsi, %r8
2040
+ movq 104(%rbp), %rsi
2041
+ cmovzq %rsi, %r9
2042
+ movq 112(%rbp), %rsi
2043
+ cmovzq %rsi, %r10
2044
+ movq 120(%rbp), %rsi
2045
+ cmovzq %rsi, %r11
2046
+
2047
+ // ... then optionally negate before storing the X and W fields. This
2048
+ // time the table entry is extended-projective, and is here:
2049
+ //
2050
+ // [%rdx;%rcx;%rbx;%rax] = X
2051
+ // [tabent+32] = Y
2052
+ // [tabent+64] = Z
2053
+ // [%r11;%r10;%r9;%r8] = W
2054
+ //
2055
+ // This time we just need to negate the X and the W fields.
2056
+ // The crude way negation is done can result in values of X or W
2057
+ // (when initially zero before negation) being exactly equal to
2058
+ // 2^256-38, but the "pepadd" function handles that correctly.
2059
+
2060
+ movq cf, %rdi
2061
+
2062
+ xorq %rdi, %rax
2063
+ xorq %rdi, %rbx
2064
+ xorq %rdi, %rcx
2065
+ xorq %rdi, %rdx
2066
+
2067
+ xorq %rdi, %r8
2068
+ xorq %rdi, %r9
2069
+ xorq %rdi, %r10
2070
+ xorq %rdi, %r11
2071
+
2072
+ andq $37, %rdi
2073
+
2074
+ subq %rdi, %rax
2075
+ sbbq $0, %rbx
2076
+ sbbq $0, %rcx
2077
+ sbbq $0, %rdx
2078
+
2079
+ movq %rax, TABENT(%rsp)
2080
+ movq %rbx, TABENT+8(%rsp)
2081
+ movq %rcx, TABENT+16(%rsp)
2082
+ movq %rdx, TABENT+24(%rsp)
2083
+
2084
+ subq %rdi, %r8
2085
+ sbbq $0, %r9
2086
+ sbbq $0, %r10
2087
+ sbbq $0, %r11
2088
+
2089
+ movq %r8, TABENT+96(%rsp)
2090
+ movq %r9, TABENT+104(%rsp)
2091
+ movq %r10, TABENT+112(%rsp)
2092
+ movq %r11, TABENT+120(%rsp)
2093
+
2094
+ // Double to acc' = 4 * acc
2095
+
2096
+ leaq ACC(%rsp), %rdi
2097
+ leaq ACC(%rsp), %rsi
2098
+ callq edwards25519_scalarmuldouble_alt_pdouble
2099
+
2100
+ // Add tabent := tabent + btabent
2101
+
2102
+ leaq TABENT(%rsp), %rdi
2103
+ leaq TABENT(%rsp), %rsi
2104
+ leaq BTABENT(%rsp), %rbp
2105
+ callq edwards25519_scalarmuldouble_alt_pepadd
2106
+
2107
+ // Double to acc' = 8 * acc
2108
+
2109
+ leaq ACC(%rsp), %rdi
2110
+ leaq ACC(%rsp), %rsi
2111
+ callq edwards25519_scalarmuldouble_alt_pdouble
2112
+
2113
+ // Double to acc' = 16 * acc
2114
+
2115
+ leaq ACC(%rsp), %rdi
2116
+ leaq ACC(%rsp), %rsi
2117
+ callq edwards25519_scalarmuldouble_alt_epdouble
2118
+
2119
+ // Add table entry, acc := acc + tabent
2120
+
2121
+ leaq ACC(%rsp), %rdi
2122
+ leaq ACC(%rsp), %rsi
2123
+ leaq TABENT(%rsp), %rbp
2124
+ callq edwards25519_scalarmuldouble_alt_epadd
2125
+
2126
+ // Loop down
2127
+
2128
+ movq i, %rax
2129
+ testq %rax, %rax
2130
+ jnz edwards25519_scalarmuldouble_alt_loop
2131
+
2132
+ // Prepare to call the modular inverse function to get tab = 1/z
2133
+
2134
+ leaq TAB(%rsp), %rdi
2135
+ leaq ACC+64(%rsp), %rsi
2136
+
2137
+ // Inline copy of bignum_inv_p25519, identical except for stripping out
2138
+ // the prologue and epilogue saving and restoring registers and making
2139
+ // and reclaiming room on the stack. For more details and explanations see
2140
+ // "x86/curve25519/bignum_inv_p25519.S". Note that the stack it uses for
2141
+ // its own temporaries is 208 bytes, so it has no effect on variables
2142
+ // that are needed in the rest of our computation here: res, tab and acc.
2143
+
2144
+ movq %rdi, 0xc0(%rsp)
2145
+ xorl %eax, %eax
2146
+ leaq -0x13(%rax), %rcx
2147
+ notq %rax
2148
+ movq %rcx, (%rsp)
2149
+ movq %rax, 0x8(%rsp)
2150
+ movq %rax, 0x10(%rsp)
2151
+ btr $0x3f, %rax
2152
+ movq %rax, 0x18(%rsp)
2153
+ movq (%rsi), %rdx
2154
+ movq 0x8(%rsi), %rcx
2155
+ movq 0x10(%rsi), %r8
2156
+ movq 0x18(%rsi), %r9
2157
+ movl $0x1, %eax
2158
+ xorl %r10d, %r10d
2159
+ bts $0x3f, %r9
2160
+ adcq %r10, %rax
2161
+ imulq $0x13, %rax, %rax
2162
+ addq %rax, %rdx
2163
+ adcq %r10, %rcx
2164
+ adcq %r10, %r8
2165
+ adcq %r10, %r9
2166
+ movl $0x13, %eax
2167
+ cmovbq %r10, %rax
2168
+ subq %rax, %rdx
2169
+ sbbq %r10, %rcx
2170
+ sbbq %r10, %r8
2171
+ sbbq %r10, %r9
2172
+ btr $0x3f, %r9
2173
+ movq %rdx, 0x20(%rsp)
2174
+ movq %rcx, 0x28(%rsp)
2175
+ movq %r8, 0x30(%rsp)
2176
+ movq %r9, 0x38(%rsp)
2177
+ xorl %eax, %eax
2178
+ movq %rax, 0x40(%rsp)
2179
+ movq %rax, 0x48(%rsp)
2180
+ movq %rax, 0x50(%rsp)
2181
+ movq %rax, 0x58(%rsp)
2182
+ movabsq $0xa0f99e2375022099, %rax
2183
+ movq %rax, 0x60(%rsp)
2184
+ movabsq $0xa8c68f3f1d132595, %rax
2185
+ movq %rax, 0x68(%rsp)
2186
+ movabsq $0x6c6c893805ac5242, %rax
2187
+ movq %rax, 0x70(%rsp)
2188
+ movabsq $0x276508b241770615, %rax
2189
+ movq %rax, 0x78(%rsp)
2190
+ movq $0xa, 0x90(%rsp)
2191
+ movq $0x1, 0x98(%rsp)
2192
+ jmp edwards25519_scalarmuldouble_alt_midloop
2193
+ edwards25519_scalarmuldouble_alt_inverseloop:
2194
+ movq %r8, %r9
2195
+ sarq $0x3f, %r9
2196
+ xorq %r9, %r8
2197
+ subq %r9, %r8
2198
+ movq %r10, %r11
2199
+ sarq $0x3f, %r11
2200
+ xorq %r11, %r10
2201
+ subq %r11, %r10
2202
+ movq %r12, %r13
2203
+ sarq $0x3f, %r13
2204
+ xorq %r13, %r12
2205
+ subq %r13, %r12
2206
+ movq %r14, %r15
2207
+ sarq $0x3f, %r15
2208
+ xorq %r15, %r14
2209
+ subq %r15, %r14
2210
+ movq %r8, %rax
2211
+ andq %r9, %rax
2212
+ movq %r10, %rdi
2213
+ andq %r11, %rdi
2214
+ addq %rax, %rdi
2215
+ movq %rdi, 0x80(%rsp)
2216
+ movq %r12, %rax
2217
+ andq %r13, %rax
2218
+ movq %r14, %rsi
2219
+ andq %r15, %rsi
2220
+ addq %rax, %rsi
2221
+ movq %rsi, 0x88(%rsp)
2222
+ xorl %ebx, %ebx
2223
+ movq (%rsp), %rax
2224
+ xorq %r9, %rax
2225
+ mulq %r8
2226
+ addq %rax, %rdi
2227
+ adcq %rdx, %rbx
2228
+ movq 0x20(%rsp), %rax
2229
+ xorq %r11, %rax
2230
+ mulq %r10
2231
+ addq %rax, %rdi
2232
+ adcq %rdx, %rbx
2233
+ xorl %ebp, %ebp
2234
+ movq (%rsp), %rax
2235
+ xorq %r13, %rax
2236
+ mulq %r12
2237
+ addq %rax, %rsi
2238
+ adcq %rdx, %rbp
2239
+ movq 0x20(%rsp), %rax
2240
+ xorq %r15, %rax
2241
+ mulq %r14
2242
+ addq %rax, %rsi
2243
+ adcq %rdx, %rbp
2244
+ xorl %ecx, %ecx
2245
+ movq 0x8(%rsp), %rax
2246
+ xorq %r9, %rax
2247
+ mulq %r8
2248
+ addq %rax, %rbx
2249
+ adcq %rdx, %rcx
2250
+ movq 0x28(%rsp), %rax
2251
+ xorq %r11, %rax
2252
+ mulq %r10
2253
+ addq %rax, %rbx
2254
+ adcq %rdx, %rcx
2255
+ shrdq $0x3b, %rbx, %rdi
2256
+ movq %rdi, (%rsp)
2257
+ xorl %edi, %edi
2258
+ movq 0x8(%rsp), %rax
2259
+ xorq %r13, %rax
2260
+ mulq %r12
2261
+ addq %rax, %rbp
2262
+ adcq %rdx, %rdi
2263
+ movq 0x28(%rsp), %rax
2264
+ xorq %r15, %rax
2265
+ mulq %r14
2266
+ addq %rax, %rbp
2267
+ adcq %rdx, %rdi
2268
+ shrdq $0x3b, %rbp, %rsi
2269
+ movq %rsi, 0x20(%rsp)
2270
+ xorl %esi, %esi
2271
+ movq 0x10(%rsp), %rax
2272
+ xorq %r9, %rax
2273
+ mulq %r8
2274
+ addq %rax, %rcx
2275
+ adcq %rdx, %rsi
2276
+ movq 0x30(%rsp), %rax
2277
+ xorq %r11, %rax
2278
+ mulq %r10
2279
+ addq %rax, %rcx
2280
+ adcq %rdx, %rsi
2281
+ shrdq $0x3b, %rcx, %rbx
2282
+ movq %rbx, 0x8(%rsp)
2283
+ xorl %ebx, %ebx
2284
+ movq 0x10(%rsp), %rax
2285
+ xorq %r13, %rax
2286
+ mulq %r12
2287
+ addq %rax, %rdi
2288
+ adcq %rdx, %rbx
2289
+ movq 0x30(%rsp), %rax
2290
+ xorq %r15, %rax
2291
+ mulq %r14
2292
+ addq %rax, %rdi
2293
+ adcq %rdx, %rbx
2294
+ shrdq $0x3b, %rdi, %rbp
2295
+ movq %rbp, 0x28(%rsp)
2296
+ movq 0x18(%rsp), %rax
2297
+ xorq %r9, %rax
2298
+ movq %rax, %rbp
2299
+ sarq $0x3f, %rbp
2300
+ andq %r8, %rbp
2301
+ negq %rbp
2302
+ mulq %r8
2303
+ addq %rax, %rsi
2304
+ adcq %rdx, %rbp
2305
+ movq 0x38(%rsp), %rax
2306
+ xorq %r11, %rax
2307
+ movq %rax, %rdx
2308
+ sarq $0x3f, %rdx
2309
+ andq %r10, %rdx
2310
+ subq %rdx, %rbp
2311
+ mulq %r10
2312
+ addq %rax, %rsi
2313
+ adcq %rdx, %rbp
2314
+ shrdq $0x3b, %rsi, %rcx
2315
+ movq %rcx, 0x10(%rsp)
2316
+ shrdq $0x3b, %rbp, %rsi
2317
+ movq 0x18(%rsp), %rax
2318
+ movq %rsi, 0x18(%rsp)
2319
+ xorq %r13, %rax
2320
+ movq %rax, %rsi
2321
+ sarq $0x3f, %rsi
2322
+ andq %r12, %rsi
2323
+ negq %rsi
2324
+ mulq %r12
2325
+ addq %rax, %rbx
2326
+ adcq %rdx, %rsi
2327
+ movq 0x38(%rsp), %rax
2328
+ xorq %r15, %rax
2329
+ movq %rax, %rdx
2330
+ sarq $0x3f, %rdx
2331
+ andq %r14, %rdx
2332
+ subq %rdx, %rsi
2333
+ mulq %r14
2334
+ addq %rax, %rbx
2335
+ adcq %rdx, %rsi
2336
+ shrdq $0x3b, %rbx, %rdi
2337
+ movq %rdi, 0x30(%rsp)
2338
+ shrdq $0x3b, %rsi, %rbx
2339
+ movq %rbx, 0x38(%rsp)
2340
+ movq 0x80(%rsp), %rbx
2341
+ movq 0x88(%rsp), %rbp
2342
+ xorl %ecx, %ecx
2343
+ movq 0x40(%rsp), %rax
2344
+ xorq %r9, %rax
2345
+ mulq %r8
2346
+ addq %rax, %rbx
2347
+ adcq %rdx, %rcx
2348
+ movq 0x60(%rsp), %rax
2349
+ xorq %r11, %rax
2350
+ mulq %r10
2351
+ addq %rax, %rbx
2352
+ adcq %rdx, %rcx
2353
+ xorl %esi, %esi
2354
+ movq 0x40(%rsp), %rax
2355
+ xorq %r13, %rax
2356
+ mulq %r12
2357
+ movq %rbx, 0x40(%rsp)
2358
+ addq %rax, %rbp
2359
+ adcq %rdx, %rsi
2360
+ movq 0x60(%rsp), %rax
2361
+ xorq %r15, %rax
2362
+ mulq %r14
2363
+ addq %rax, %rbp
2364
+ adcq %rdx, %rsi
2365
+ movq %rbp, 0x60(%rsp)
2366
+ xorl %ebx, %ebx
2367
+ movq 0x48(%rsp), %rax
2368
+ xorq %r9, %rax
2369
+ mulq %r8
2370
+ addq %rax, %rcx
2371
+ adcq %rdx, %rbx
2372
+ movq 0x68(%rsp), %rax
2373
+ xorq %r11, %rax
2374
+ mulq %r10
2375
+ addq %rax, %rcx
2376
+ adcq %rdx, %rbx
2377
+ xorl %ebp, %ebp
2378
+ movq 0x48(%rsp), %rax
2379
+ xorq %r13, %rax
2380
+ mulq %r12
2381
+ movq %rcx, 0x48(%rsp)
2382
+ addq %rax, %rsi
2383
+ adcq %rdx, %rbp
2384
+ movq 0x68(%rsp), %rax
2385
+ xorq %r15, %rax
2386
+ mulq %r14
2387
+ addq %rax, %rsi
2388
+ adcq %rdx, %rbp
2389
+ movq %rsi, 0x68(%rsp)
2390
+ xorl %ecx, %ecx
2391
+ movq 0x50(%rsp), %rax
2392
+ xorq %r9, %rax
2393
+ mulq %r8
2394
+ addq %rax, %rbx
2395
+ adcq %rdx, %rcx
2396
+ movq 0x70(%rsp), %rax
2397
+ xorq %r11, %rax
2398
+ mulq %r10
2399
+ addq %rax, %rbx
2400
+ adcq %rdx, %rcx
2401
+ xorl %esi, %esi
2402
+ movq 0x50(%rsp), %rax
2403
+ xorq %r13, %rax
2404
+ mulq %r12
2405
+ movq %rbx, 0x50(%rsp)
2406
+ addq %rax, %rbp
2407
+ adcq %rdx, %rsi
2408
+ movq 0x70(%rsp), %rax
2409
+ xorq %r15, %rax
2410
+ mulq %r14
2411
+ addq %rax, %rbp
2412
+ adcq %rdx, %rsi
2413
+ movq %rbp, 0x70(%rsp)
2414
+ movq 0x58(%rsp), %rax
2415
+ xorq %r9, %rax
2416
+ movq %r9, %rbx
2417
+ andq %r8, %rbx
2418
+ negq %rbx
2419
+ mulq %r8
2420
+ addq %rax, %rcx
2421
+ adcq %rdx, %rbx
2422
+ movq 0x78(%rsp), %rax
2423
+ xorq %r11, %rax
2424
+ movq %r11, %rdx
2425
+ andq %r10, %rdx
2426
+ subq %rdx, %rbx
2427
+ mulq %r10
2428
+ addq %rax, %rcx
2429
+ adcq %rbx, %rdx
2430
+ movq %rdx, %rbx
2431
+ shldq $0x1, %rcx, %rdx
2432
+ sarq $0x3f, %rbx
2433
+ addq %rbx, %rdx
2434
+ movl $0x13, %eax
2435
+ imulq %rdx
2436
+ movq 0x40(%rsp), %r8
2437
+ addq %rax, %r8
2438
+ movq %r8, 0x40(%rsp)
2439
+ movq 0x48(%rsp), %r8
2440
+ adcq %rdx, %r8
2441
+ movq %r8, 0x48(%rsp)
2442
+ movq 0x50(%rsp), %r8
2443
+ adcq %rbx, %r8
2444
+ movq %r8, 0x50(%rsp)
2445
+ adcq %rbx, %rcx
2446
+ shlq $0x3f, %rax
2447
+ addq %rax, %rcx
2448
+ movq 0x58(%rsp), %rax
2449
+ movq %rcx, 0x58(%rsp)
2450
+ xorq %r13, %rax
2451
+ movq %r13, %rcx
2452
+ andq %r12, %rcx
2453
+ negq %rcx
2454
+ mulq %r12
2455
+ addq %rax, %rsi
2456
+ adcq %rdx, %rcx
2457
+ movq 0x78(%rsp), %rax
2458
+ xorq %r15, %rax
2459
+ movq %r15, %rdx
2460
+ andq %r14, %rdx
2461
+ subq %rdx, %rcx
2462
+ mulq %r14
2463
+ addq %rax, %rsi
2464
+ adcq %rcx, %rdx
2465
+ movq %rdx, %rcx
2466
+ shldq $0x1, %rsi, %rdx
2467
+ sarq $0x3f, %rcx
2468
+ movl $0x13, %eax
2469
+ addq %rcx, %rdx
2470
+ imulq %rdx
2471
+ movq 0x60(%rsp), %r8
2472
+ addq %rax, %r8
2473
+ movq %r8, 0x60(%rsp)
2474
+ movq 0x68(%rsp), %r8
2475
+ adcq %rdx, %r8
2476
+ movq %r8, 0x68(%rsp)
2477
+ movq 0x70(%rsp), %r8
2478
+ adcq %rcx, %r8
2479
+ movq %r8, 0x70(%rsp)
2480
+ adcq %rcx, %rsi
2481
+ shlq $0x3f, %rax
2482
+ addq %rax, %rsi
2483
+ movq %rsi, 0x78(%rsp)
2484
+ edwards25519_scalarmuldouble_alt_midloop:
2485
+ movq 0x98(%rsp), %rsi
2486
+ movq (%rsp), %rdx
2487
+ movq 0x20(%rsp), %rcx
2488
+ movq %rdx, %rbx
2489
+ andq $0xfffff, %rbx
2490
+ movabsq $0xfffffe0000000000, %rax
2491
+ orq %rax, %rbx
2492
+ andq $0xfffff, %rcx
2493
+ movabsq $0xc000000000000000, %rax
2494
+ orq %rax, %rcx
2495
+ movq $0xfffffffffffffffe, %rax
2496
+ xorl %ebp, %ebp
2497
+ movl $0x2, %edx
2498
+ movq %rbx, %rdi
2499
+ movq %rax, %r8
2500
+ testq %rsi, %rsi
2501
+ cmovs %rbp, %r8
2502
+ testq $0x1, %rcx
2503
+ cmoveq %rbp, %r8
2504
+ cmoveq %rbp, %rdi
2505
+ xorq %r8, %rdi
2506
+ xorq %r8, %rsi
2507
+ btq $0x3f, %r8
2508
+ cmovbq %rcx, %rbx
2509
+ movq %rax, %r8
2510
+ subq %rax, %rsi
2511
+ leaq (%rcx,%rdi), %rcx
2512
+ cmovs %rbp, %r8
2513
+ movq %rbx, %rdi
2514
+ testq %rdx, %rcx
2515
+ cmoveq %rbp, %r8
2516
+ cmoveq %rbp, %rdi
2517
+ sarq $1, %rcx
2518
+ xorq %r8, %rdi
2519
+ xorq %r8, %rsi
2520
+ btq $0x3f, %r8
2521
+ cmovbq %rcx, %rbx
2522
+ movq %rax, %r8
2523
+ subq %rax, %rsi
2524
+ leaq (%rcx,%rdi), %rcx
2525
+ cmovs %rbp, %r8
2526
+ movq %rbx, %rdi
2527
+ testq %rdx, %rcx
2528
+ cmoveq %rbp, %r8
2529
+ cmoveq %rbp, %rdi
2530
+ sarq $1, %rcx
2531
+ xorq %r8, %rdi
2532
+ xorq %r8, %rsi
2533
+ btq $0x3f, %r8
2534
+ cmovbq %rcx, %rbx
2535
+ movq %rax, %r8
2536
+ subq %rax, %rsi
2537
+ leaq (%rcx,%rdi), %rcx
2538
+ cmovs %rbp, %r8
2539
+ movq %rbx, %rdi
2540
+ testq %rdx, %rcx
2541
+ cmoveq %rbp, %r8
2542
+ cmoveq %rbp, %rdi
2543
+ sarq $1, %rcx
2544
+ xorq %r8, %rdi
2545
+ xorq %r8, %rsi
2546
+ btq $0x3f, %r8
2547
+ cmovbq %rcx, %rbx
2548
+ movq %rax, %r8
2549
+ subq %rax, %rsi
2550
+ leaq (%rcx,%rdi), %rcx
2551
+ cmovs %rbp, %r8
2552
+ movq %rbx, %rdi
2553
+ testq %rdx, %rcx
2554
+ cmoveq %rbp, %r8
2555
+ cmoveq %rbp, %rdi
2556
+ sarq $1, %rcx
2557
+ xorq %r8, %rdi
2558
+ xorq %r8, %rsi
2559
+ btq $0x3f, %r8
2560
+ cmovbq %rcx, %rbx
2561
+ movq %rax, %r8
2562
+ subq %rax, %rsi
2563
+ leaq (%rcx,%rdi), %rcx
2564
+ cmovs %rbp, %r8
2565
+ movq %rbx, %rdi
2566
+ testq %rdx, %rcx
2567
+ cmoveq %rbp, %r8
2568
+ cmoveq %rbp, %rdi
2569
+ sarq $1, %rcx
2570
+ xorq %r8, %rdi
2571
+ xorq %r8, %rsi
2572
+ btq $0x3f, %r8
2573
+ cmovbq %rcx, %rbx
2574
+ movq %rax, %r8
2575
+ subq %rax, %rsi
2576
+ leaq (%rcx,%rdi), %rcx
2577
+ cmovs %rbp, %r8
2578
+ movq %rbx, %rdi
2579
+ testq %rdx, %rcx
2580
+ cmoveq %rbp, %r8
2581
+ cmoveq %rbp, %rdi
2582
+ sarq $1, %rcx
2583
+ xorq %r8, %rdi
2584
+ xorq %r8, %rsi
2585
+ btq $0x3f, %r8
2586
+ cmovbq %rcx, %rbx
2587
+ movq %rax, %r8
2588
+ subq %rax, %rsi
2589
+ leaq (%rcx,%rdi), %rcx
2590
+ cmovs %rbp, %r8
2591
+ movq %rbx, %rdi
2592
+ testq %rdx, %rcx
2593
+ cmoveq %rbp, %r8
2594
+ cmoveq %rbp, %rdi
2595
+ sarq $1, %rcx
2596
+ xorq %r8, %rdi
2597
+ xorq %r8, %rsi
2598
+ btq $0x3f, %r8
2599
+ cmovbq %rcx, %rbx
2600
+ movq %rax, %r8
2601
+ subq %rax, %rsi
2602
+ leaq (%rcx,%rdi), %rcx
2603
+ cmovs %rbp, %r8
2604
+ movq %rbx, %rdi
2605
+ testq %rdx, %rcx
2606
+ cmoveq %rbp, %r8
2607
+ cmoveq %rbp, %rdi
2608
+ sarq $1, %rcx
2609
+ xorq %r8, %rdi
2610
+ xorq %r8, %rsi
2611
+ btq $0x3f, %r8
2612
+ cmovbq %rcx, %rbx
2613
+ movq %rax, %r8
2614
+ subq %rax, %rsi
2615
+ leaq (%rcx,%rdi), %rcx
2616
+ cmovs %rbp, %r8
2617
+ movq %rbx, %rdi
2618
+ testq %rdx, %rcx
2619
+ cmoveq %rbp, %r8
2620
+ cmoveq %rbp, %rdi
2621
+ sarq $1, %rcx
2622
+ xorq %r8, %rdi
2623
+ xorq %r8, %rsi
2624
+ btq $0x3f, %r8
2625
+ cmovbq %rcx, %rbx
2626
+ movq %rax, %r8
2627
+ subq %rax, %rsi
2628
+ leaq (%rcx,%rdi), %rcx
2629
+ cmovs %rbp, %r8
2630
+ movq %rbx, %rdi
2631
+ testq %rdx, %rcx
2632
+ cmoveq %rbp, %r8
2633
+ cmoveq %rbp, %rdi
2634
+ sarq $1, %rcx
2635
+ xorq %r8, %rdi
2636
+ xorq %r8, %rsi
2637
+ btq $0x3f, %r8
2638
+ cmovbq %rcx, %rbx
2639
+ movq %rax, %r8
2640
+ subq %rax, %rsi
2641
+ leaq (%rcx,%rdi), %rcx
2642
+ cmovs %rbp, %r8
2643
+ movq %rbx, %rdi
2644
+ testq %rdx, %rcx
2645
+ cmoveq %rbp, %r8
2646
+ cmoveq %rbp, %rdi
2647
+ sarq $1, %rcx
2648
+ xorq %r8, %rdi
2649
+ xorq %r8, %rsi
2650
+ btq $0x3f, %r8
2651
+ cmovbq %rcx, %rbx
2652
+ movq %rax, %r8
2653
+ subq %rax, %rsi
2654
+ leaq (%rcx,%rdi), %rcx
2655
+ cmovs %rbp, %r8
2656
+ movq %rbx, %rdi
2657
+ testq %rdx, %rcx
2658
+ cmoveq %rbp, %r8
2659
+ cmoveq %rbp, %rdi
2660
+ sarq $1, %rcx
2661
+ xorq %r8, %rdi
2662
+ xorq %r8, %rsi
2663
+ btq $0x3f, %r8
2664
+ cmovbq %rcx, %rbx
2665
+ movq %rax, %r8
2666
+ subq %rax, %rsi
2667
+ leaq (%rcx,%rdi), %rcx
2668
+ cmovs %rbp, %r8
2669
+ movq %rbx, %rdi
2670
+ testq %rdx, %rcx
2671
+ cmoveq %rbp, %r8
2672
+ cmoveq %rbp, %rdi
2673
+ sarq $1, %rcx
2674
+ xorq %r8, %rdi
2675
+ xorq %r8, %rsi
2676
+ btq $0x3f, %r8
2677
+ cmovbq %rcx, %rbx
2678
+ movq %rax, %r8
2679
+ subq %rax, %rsi
2680
+ leaq (%rcx,%rdi), %rcx
2681
+ cmovs %rbp, %r8
2682
+ movq %rbx, %rdi
2683
+ testq %rdx, %rcx
2684
+ cmoveq %rbp, %r8
2685
+ cmoveq %rbp, %rdi
2686
+ sarq $1, %rcx
2687
+ xorq %r8, %rdi
2688
+ xorq %r8, %rsi
2689
+ btq $0x3f, %r8
2690
+ cmovbq %rcx, %rbx
2691
+ movq %rax, %r8
2692
+ subq %rax, %rsi
2693
+ leaq (%rcx,%rdi), %rcx
2694
+ cmovs %rbp, %r8
2695
+ movq %rbx, %rdi
2696
+ testq %rdx, %rcx
2697
+ cmoveq %rbp, %r8
2698
+ cmoveq %rbp, %rdi
2699
+ sarq $1, %rcx
2700
+ xorq %r8, %rdi
2701
+ xorq %r8, %rsi
2702
+ btq $0x3f, %r8
2703
+ cmovbq %rcx, %rbx
2704
+ movq %rax, %r8
2705
+ subq %rax, %rsi
2706
+ leaq (%rcx,%rdi), %rcx
2707
+ cmovs %rbp, %r8
2708
+ movq %rbx, %rdi
2709
+ testq %rdx, %rcx
2710
+ cmoveq %rbp, %r8
2711
+ cmoveq %rbp, %rdi
2712
+ sarq $1, %rcx
2713
+ xorq %r8, %rdi
2714
+ xorq %r8, %rsi
2715
+ btq $0x3f, %r8
2716
+ cmovbq %rcx, %rbx
2717
+ movq %rax, %r8
2718
+ subq %rax, %rsi
2719
+ leaq (%rcx,%rdi), %rcx
2720
+ cmovs %rbp, %r8
2721
+ movq %rbx, %rdi
2722
+ testq %rdx, %rcx
2723
+ cmoveq %rbp, %r8
2724
+ cmoveq %rbp, %rdi
2725
+ sarq $1, %rcx
2726
+ xorq %r8, %rdi
2727
+ xorq %r8, %rsi
2728
+ btq $0x3f, %r8
2729
+ cmovbq %rcx, %rbx
2730
+ movq %rax, %r8
2731
+ subq %rax, %rsi
2732
+ leaq (%rcx,%rdi), %rcx
2733
+ cmovs %rbp, %r8
2734
+ movq %rbx, %rdi
2735
+ testq %rdx, %rcx
2736
+ cmoveq %rbp, %r8
2737
+ cmoveq %rbp, %rdi
2738
+ sarq $1, %rcx
2739
+ xorq %r8, %rdi
2740
+ xorq %r8, %rsi
2741
+ btq $0x3f, %r8
2742
+ cmovbq %rcx, %rbx
2743
+ movq %rax, %r8
2744
+ subq %rax, %rsi
2745
+ leaq (%rcx,%rdi), %rcx
2746
+ cmovs %rbp, %r8
2747
+ movq %rbx, %rdi
2748
+ testq %rdx, %rcx
2749
+ cmoveq %rbp, %r8
2750
+ cmoveq %rbp, %rdi
2751
+ sarq $1, %rcx
2752
+ xorq %r8, %rdi
2753
+ xorq %r8, %rsi
2754
+ btq $0x3f, %r8
2755
+ cmovbq %rcx, %rbx
2756
+ movq %rax, %r8
2757
+ subq %rax, %rsi
2758
+ leaq (%rcx,%rdi), %rcx
2759
+ sarq $1, %rcx
2760
+ movl $0x100000, %eax
2761
+ leaq (%rbx,%rax), %rdx
2762
+ leaq (%rcx,%rax), %rdi
2763
+ shlq $0x16, %rdx
2764
+ shlq $0x16, %rdi
2765
+ sarq $0x2b, %rdx
2766
+ sarq $0x2b, %rdi
2767
+ movabsq $0x20000100000, %rax
2768
+ leaq (%rbx,%rax), %rbx
2769
+ leaq (%rcx,%rax), %rcx
2770
+ sarq $0x2a, %rbx
2771
+ sarq $0x2a, %rcx
2772
+ movq %rdx, 0xa0(%rsp)
2773
+ movq %rbx, 0xa8(%rsp)
2774
+ movq %rdi, 0xb0(%rsp)
2775
+ movq %rcx, 0xb8(%rsp)
2776
+ movq (%rsp), %r12
2777
+ imulq %r12, %rdi
2778
+ imulq %rdx, %r12
2779
+ movq 0x20(%rsp), %r13
2780
+ imulq %r13, %rbx
2781
+ imulq %rcx, %r13
2782
+ addq %rbx, %r12
2783
+ addq %rdi, %r13
2784
+ sarq $0x14, %r12
2785
+ sarq $0x14, %r13
2786
+ movq %r12, %rbx
2787
+ andq $0xfffff, %rbx
2788
+ movabsq $0xfffffe0000000000, %rax
2789
+ orq %rax, %rbx
2790
+ movq %r13, %rcx
2791
+ andq $0xfffff, %rcx
2792
+ movabsq $0xc000000000000000, %rax
2793
+ orq %rax, %rcx
2794
+ movq $0xfffffffffffffffe, %rax
2795
+ movl $0x2, %edx
2796
+ movq %rbx, %rdi
2797
+ movq %rax, %r8
2798
+ testq %rsi, %rsi
2799
+ cmovs %rbp, %r8
2800
+ testq $0x1, %rcx
2801
+ cmoveq %rbp, %r8
2802
+ cmoveq %rbp, %rdi
2803
+ xorq %r8, %rdi
2804
+ xorq %r8, %rsi
2805
+ btq $0x3f, %r8
2806
+ cmovbq %rcx, %rbx
2807
+ movq %rax, %r8
2808
+ subq %rax, %rsi
2809
+ leaq (%rcx,%rdi), %rcx
2810
+ cmovs %rbp, %r8
2811
+ movq %rbx, %rdi
2812
+ testq %rdx, %rcx
2813
+ cmoveq %rbp, %r8
2814
+ cmoveq %rbp, %rdi
2815
+ sarq $1, %rcx
2816
+ xorq %r8, %rdi
2817
+ xorq %r8, %rsi
2818
+ btq $0x3f, %r8
2819
+ cmovbq %rcx, %rbx
2820
+ movq %rax, %r8
2821
+ subq %rax, %rsi
2822
+ leaq (%rcx,%rdi), %rcx
2823
+ cmovs %rbp, %r8
2824
+ movq %rbx, %rdi
2825
+ testq %rdx, %rcx
2826
+ cmoveq %rbp, %r8
2827
+ cmoveq %rbp, %rdi
2828
+ sarq $1, %rcx
2829
+ xorq %r8, %rdi
2830
+ xorq %r8, %rsi
2831
+ btq $0x3f, %r8
2832
+ cmovbq %rcx, %rbx
2833
+ movq %rax, %r8
2834
+ subq %rax, %rsi
2835
+ leaq (%rcx,%rdi), %rcx
2836
+ cmovs %rbp, %r8
2837
+ movq %rbx, %rdi
2838
+ testq %rdx, %rcx
2839
+ cmoveq %rbp, %r8
2840
+ cmoveq %rbp, %rdi
2841
+ sarq $1, %rcx
2842
+ xorq %r8, %rdi
2843
+ xorq %r8, %rsi
2844
+ btq $0x3f, %r8
2845
+ cmovbq %rcx, %rbx
2846
+ movq %rax, %r8
2847
+ subq %rax, %rsi
2848
+ leaq (%rcx,%rdi), %rcx
2849
+ cmovs %rbp, %r8
2850
+ movq %rbx, %rdi
2851
+ testq %rdx, %rcx
2852
+ cmoveq %rbp, %r8
2853
+ cmoveq %rbp, %rdi
2854
+ sarq $1, %rcx
2855
+ xorq %r8, %rdi
2856
+ xorq %r8, %rsi
2857
+ btq $0x3f, %r8
2858
+ cmovbq %rcx, %rbx
2859
+ movq %rax, %r8
2860
+ subq %rax, %rsi
2861
+ leaq (%rcx,%rdi), %rcx
2862
+ cmovs %rbp, %r8
2863
+ movq %rbx, %rdi
2864
+ testq %rdx, %rcx
2865
+ cmoveq %rbp, %r8
2866
+ cmoveq %rbp, %rdi
2867
+ sarq $1, %rcx
2868
+ xorq %r8, %rdi
2869
+ xorq %r8, %rsi
2870
+ btq $0x3f, %r8
2871
+ cmovbq %rcx, %rbx
2872
+ movq %rax, %r8
2873
+ subq %rax, %rsi
2874
+ leaq (%rcx,%rdi), %rcx
2875
+ cmovs %rbp, %r8
2876
+ movq %rbx, %rdi
2877
+ testq %rdx, %rcx
2878
+ cmoveq %rbp, %r8
2879
+ cmoveq %rbp, %rdi
2880
+ sarq $1, %rcx
2881
+ xorq %r8, %rdi
2882
+ xorq %r8, %rsi
2883
+ btq $0x3f, %r8
2884
+ cmovbq %rcx, %rbx
2885
+ movq %rax, %r8
2886
+ subq %rax, %rsi
2887
+ leaq (%rcx,%rdi), %rcx
2888
+ cmovs %rbp, %r8
2889
+ movq %rbx, %rdi
2890
+ testq %rdx, %rcx
2891
+ cmoveq %rbp, %r8
2892
+ cmoveq %rbp, %rdi
2893
+ sarq $1, %rcx
2894
+ xorq %r8, %rdi
2895
+ xorq %r8, %rsi
2896
+ btq $0x3f, %r8
2897
+ cmovbq %rcx, %rbx
2898
+ movq %rax, %r8
2899
+ subq %rax, %rsi
2900
+ leaq (%rcx,%rdi), %rcx
2901
+ cmovs %rbp, %r8
2902
+ movq %rbx, %rdi
2903
+ testq %rdx, %rcx
2904
+ cmoveq %rbp, %r8
2905
+ cmoveq %rbp, %rdi
2906
+ sarq $1, %rcx
2907
+ xorq %r8, %rdi
2908
+ xorq %r8, %rsi
2909
+ btq $0x3f, %r8
2910
+ cmovbq %rcx, %rbx
2911
+ movq %rax, %r8
2912
+ subq %rax, %rsi
2913
+ leaq (%rcx,%rdi), %rcx
2914
+ cmovs %rbp, %r8
2915
+ movq %rbx, %rdi
2916
+ testq %rdx, %rcx
2917
+ cmoveq %rbp, %r8
2918
+ cmoveq %rbp, %rdi
2919
+ sarq $1, %rcx
2920
+ xorq %r8, %rdi
2921
+ xorq %r8, %rsi
2922
+ btq $0x3f, %r8
2923
+ cmovbq %rcx, %rbx
2924
+ movq %rax, %r8
2925
+ subq %rax, %rsi
2926
+ leaq (%rcx,%rdi), %rcx
2927
+ cmovs %rbp, %r8
2928
+ movq %rbx, %rdi
2929
+ testq %rdx, %rcx
2930
+ cmoveq %rbp, %r8
2931
+ cmoveq %rbp, %rdi
2932
+ sarq $1, %rcx
2933
+ xorq %r8, %rdi
2934
+ xorq %r8, %rsi
2935
+ btq $0x3f, %r8
2936
+ cmovbq %rcx, %rbx
2937
+ movq %rax, %r8
2938
+ subq %rax, %rsi
2939
+ leaq (%rcx,%rdi), %rcx
2940
+ cmovs %rbp, %r8
2941
+ movq %rbx, %rdi
2942
+ testq %rdx, %rcx
2943
+ cmoveq %rbp, %r8
2944
+ cmoveq %rbp, %rdi
2945
+ sarq $1, %rcx
2946
+ xorq %r8, %rdi
2947
+ xorq %r8, %rsi
2948
+ btq $0x3f, %r8
2949
+ cmovbq %rcx, %rbx
2950
+ movq %rax, %r8
2951
+ subq %rax, %rsi
2952
+ leaq (%rcx,%rdi), %rcx
2953
+ cmovs %rbp, %r8
2954
+ movq %rbx, %rdi
2955
+ testq %rdx, %rcx
2956
+ cmoveq %rbp, %r8
2957
+ cmoveq %rbp, %rdi
2958
+ sarq $1, %rcx
2959
+ xorq %r8, %rdi
2960
+ xorq %r8, %rsi
2961
+ btq $0x3f, %r8
2962
+ cmovbq %rcx, %rbx
2963
+ movq %rax, %r8
2964
+ subq %rax, %rsi
2965
+ leaq (%rcx,%rdi), %rcx
2966
+ cmovs %rbp, %r8
2967
+ movq %rbx, %rdi
2968
+ testq %rdx, %rcx
2969
+ cmoveq %rbp, %r8
2970
+ cmoveq %rbp, %rdi
2971
+ sarq $1, %rcx
2972
+ xorq %r8, %rdi
2973
+ xorq %r8, %rsi
2974
+ btq $0x3f, %r8
2975
+ cmovbq %rcx, %rbx
2976
+ movq %rax, %r8
2977
+ subq %rax, %rsi
2978
+ leaq (%rcx,%rdi), %rcx
2979
+ cmovs %rbp, %r8
2980
+ movq %rbx, %rdi
2981
+ testq %rdx, %rcx
2982
+ cmoveq %rbp, %r8
2983
+ cmoveq %rbp, %rdi
2984
+ sarq $1, %rcx
2985
+ xorq %r8, %rdi
2986
+ xorq %r8, %rsi
2987
+ btq $0x3f, %r8
2988
+ cmovbq %rcx, %rbx
2989
+ movq %rax, %r8
2990
+ subq %rax, %rsi
2991
+ leaq (%rcx,%rdi), %rcx
2992
+ cmovs %rbp, %r8
2993
+ movq %rbx, %rdi
2994
+ testq %rdx, %rcx
2995
+ cmoveq %rbp, %r8
2996
+ cmoveq %rbp, %rdi
2997
+ sarq $1, %rcx
2998
+ xorq %r8, %rdi
2999
+ xorq %r8, %rsi
3000
+ btq $0x3f, %r8
3001
+ cmovbq %rcx, %rbx
3002
+ movq %rax, %r8
3003
+ subq %rax, %rsi
3004
+ leaq (%rcx,%rdi), %rcx
3005
+ cmovs %rbp, %r8
3006
+ movq %rbx, %rdi
3007
+ testq %rdx, %rcx
3008
+ cmoveq %rbp, %r8
3009
+ cmoveq %rbp, %rdi
3010
+ sarq $1, %rcx
3011
+ xorq %r8, %rdi
3012
+ xorq %r8, %rsi
3013
+ btq $0x3f, %r8
3014
+ cmovbq %rcx, %rbx
3015
+ movq %rax, %r8
3016
+ subq %rax, %rsi
3017
+ leaq (%rcx,%rdi), %rcx
3018
+ cmovs %rbp, %r8
3019
+ movq %rbx, %rdi
3020
+ testq %rdx, %rcx
3021
+ cmoveq %rbp, %r8
3022
+ cmoveq %rbp, %rdi
3023
+ sarq $1, %rcx
3024
+ xorq %r8, %rdi
3025
+ xorq %r8, %rsi
3026
+ btq $0x3f, %r8
3027
+ cmovbq %rcx, %rbx
3028
+ movq %rax, %r8
3029
+ subq %rax, %rsi
3030
+ leaq (%rcx,%rdi), %rcx
3031
+ cmovs %rbp, %r8
3032
+ movq %rbx, %rdi
3033
+ testq %rdx, %rcx
3034
+ cmoveq %rbp, %r8
3035
+ cmoveq %rbp, %rdi
3036
+ sarq $1, %rcx
3037
+ xorq %r8, %rdi
3038
+ xorq %r8, %rsi
3039
+ btq $0x3f, %r8
3040
+ cmovbq %rcx, %rbx
3041
+ movq %rax, %r8
3042
+ subq %rax, %rsi
3043
+ leaq (%rcx,%rdi), %rcx
3044
+ cmovs %rbp, %r8
3045
+ movq %rbx, %rdi
3046
+ testq %rdx, %rcx
3047
+ cmoveq %rbp, %r8
3048
+ cmoveq %rbp, %rdi
3049
+ sarq $1, %rcx
3050
+ xorq %r8, %rdi
3051
+ xorq %r8, %rsi
3052
+ btq $0x3f, %r8
3053
+ cmovbq %rcx, %rbx
3054
+ movq %rax, %r8
3055
+ subq %rax, %rsi
3056
+ leaq (%rcx,%rdi), %rcx
3057
+ sarq $1, %rcx
3058
+ movl $0x100000, %eax
3059
+ leaq (%rbx,%rax), %r8
3060
+ leaq (%rcx,%rax), %r10
3061
+ shlq $0x16, %r8
3062
+ shlq $0x16, %r10
3063
+ sarq $0x2b, %r8
3064
+ sarq $0x2b, %r10
3065
+ movabsq $0x20000100000, %rax
3066
+ leaq (%rbx,%rax), %r15
3067
+ leaq (%rcx,%rax), %r11
3068
+ sarq $0x2a, %r15
3069
+ sarq $0x2a, %r11
3070
+ movq %r13, %rbx
3071
+ movq %r12, %rcx
3072
+ imulq %r8, %r12
3073
+ imulq %r15, %rbx
3074
+ addq %rbx, %r12
3075
+ imulq %r11, %r13
3076
+ imulq %r10, %rcx
3077
+ addq %rcx, %r13
3078
+ sarq $0x14, %r12
3079
+ sarq $0x14, %r13
3080
+ movq %r12, %rbx
3081
+ andq $0xfffff, %rbx
3082
+ movabsq $0xfffffe0000000000, %rax
3083
+ orq %rax, %rbx
3084
+ movq %r13, %rcx
3085
+ andq $0xfffff, %rcx
3086
+ movabsq $0xc000000000000000, %rax
3087
+ orq %rax, %rcx
3088
+ movq 0xa0(%rsp), %rax
3089
+ imulq %r8, %rax
3090
+ movq 0xb0(%rsp), %rdx
3091
+ imulq %r15, %rdx
3092
+ imulq 0xa8(%rsp), %r8
3093
+ imulq 0xb8(%rsp), %r15
3094
+ addq %r8, %r15
3095
+ leaq (%rax,%rdx), %r9
3096
+ movq 0xa0(%rsp), %rax
3097
+ imulq %r10, %rax
3098
+ movq 0xb0(%rsp), %rdx
3099
+ imulq %r11, %rdx
3100
+ imulq 0xa8(%rsp), %r10
3101
+ imulq 0xb8(%rsp), %r11
3102
+ addq %r10, %r11
3103
+ leaq (%rax,%rdx), %r13
3104
+ movq $0xfffffffffffffffe, %rax
3105
+ movl $0x2, %edx
3106
+ movq %rbx, %rdi
3107
+ movq %rax, %r8
3108
+ testq %rsi, %rsi
3109
+ cmovs %rbp, %r8
3110
+ testq $0x1, %rcx
3111
+ cmoveq %rbp, %r8
3112
+ cmoveq %rbp, %rdi
3113
+ xorq %r8, %rdi
3114
+ xorq %r8, %rsi
3115
+ btq $0x3f, %r8
3116
+ cmovbq %rcx, %rbx
3117
+ movq %rax, %r8
3118
+ subq %rax, %rsi
3119
+ leaq (%rcx,%rdi), %rcx
3120
+ cmovs %rbp, %r8
3121
+ movq %rbx, %rdi
3122
+ testq %rdx, %rcx
3123
+ cmoveq %rbp, %r8
3124
+ cmoveq %rbp, %rdi
3125
+ sarq $1, %rcx
3126
+ xorq %r8, %rdi
3127
+ xorq %r8, %rsi
3128
+ btq $0x3f, %r8
3129
+ cmovbq %rcx, %rbx
3130
+ movq %rax, %r8
3131
+ subq %rax, %rsi
3132
+ leaq (%rcx,%rdi), %rcx
3133
+ cmovs %rbp, %r8
3134
+ movq %rbx, %rdi
3135
+ testq %rdx, %rcx
3136
+ cmoveq %rbp, %r8
3137
+ cmoveq %rbp, %rdi
3138
+ sarq $1, %rcx
3139
+ xorq %r8, %rdi
3140
+ xorq %r8, %rsi
3141
+ btq $0x3f, %r8
3142
+ cmovbq %rcx, %rbx
3143
+ movq %rax, %r8
3144
+ subq %rax, %rsi
3145
+ leaq (%rcx,%rdi), %rcx
3146
+ cmovs %rbp, %r8
3147
+ movq %rbx, %rdi
3148
+ testq %rdx, %rcx
3149
+ cmoveq %rbp, %r8
3150
+ cmoveq %rbp, %rdi
3151
+ sarq $1, %rcx
3152
+ xorq %r8, %rdi
3153
+ xorq %r8, %rsi
3154
+ btq $0x3f, %r8
3155
+ cmovbq %rcx, %rbx
3156
+ movq %rax, %r8
3157
+ subq %rax, %rsi
3158
+ leaq (%rcx,%rdi), %rcx
3159
+ cmovs %rbp, %r8
3160
+ movq %rbx, %rdi
3161
+ testq %rdx, %rcx
3162
+ cmoveq %rbp, %r8
3163
+ cmoveq %rbp, %rdi
3164
+ sarq $1, %rcx
3165
+ xorq %r8, %rdi
3166
+ xorq %r8, %rsi
3167
+ btq $0x3f, %r8
3168
+ cmovbq %rcx, %rbx
3169
+ movq %rax, %r8
3170
+ subq %rax, %rsi
3171
+ leaq (%rcx,%rdi), %rcx
3172
+ cmovs %rbp, %r8
3173
+ movq %rbx, %rdi
3174
+ testq %rdx, %rcx
3175
+ cmoveq %rbp, %r8
3176
+ cmoveq %rbp, %rdi
3177
+ sarq $1, %rcx
3178
+ xorq %r8, %rdi
3179
+ xorq %r8, %rsi
3180
+ btq $0x3f, %r8
3181
+ cmovbq %rcx, %rbx
3182
+ movq %rax, %r8
3183
+ subq %rax, %rsi
3184
+ leaq (%rcx,%rdi), %rcx
3185
+ cmovs %rbp, %r8
3186
+ movq %rbx, %rdi
3187
+ testq %rdx, %rcx
3188
+ cmoveq %rbp, %r8
3189
+ cmoveq %rbp, %rdi
3190
+ sarq $1, %rcx
3191
+ xorq %r8, %rdi
3192
+ xorq %r8, %rsi
3193
+ btq $0x3f, %r8
3194
+ cmovbq %rcx, %rbx
3195
+ movq %rax, %r8
3196
+ subq %rax, %rsi
3197
+ leaq (%rcx,%rdi), %rcx
3198
+ cmovs %rbp, %r8
3199
+ movq %rbx, %rdi
3200
+ testq %rdx, %rcx
3201
+ cmoveq %rbp, %r8
3202
+ cmoveq %rbp, %rdi
3203
+ sarq $1, %rcx
3204
+ xorq %r8, %rdi
3205
+ xorq %r8, %rsi
3206
+ btq $0x3f, %r8
3207
+ cmovbq %rcx, %rbx
3208
+ movq %rax, %r8
3209
+ subq %rax, %rsi
3210
+ leaq (%rcx,%rdi), %rcx
3211
+ cmovs %rbp, %r8
3212
+ movq %rbx, %rdi
3213
+ testq %rdx, %rcx
3214
+ cmoveq %rbp, %r8
3215
+ cmoveq %rbp, %rdi
3216
+ sarq $1, %rcx
3217
+ xorq %r8, %rdi
3218
+ xorq %r8, %rsi
3219
+ btq $0x3f, %r8
3220
+ cmovbq %rcx, %rbx
3221
+ movq %rax, %r8
3222
+ subq %rax, %rsi
3223
+ leaq (%rcx,%rdi), %rcx
3224
+ cmovs %rbp, %r8
3225
+ movq %rbx, %rdi
3226
+ testq %rdx, %rcx
3227
+ cmoveq %rbp, %r8
3228
+ cmoveq %rbp, %rdi
3229
+ sarq $1, %rcx
3230
+ xorq %r8, %rdi
3231
+ xorq %r8, %rsi
3232
+ btq $0x3f, %r8
3233
+ cmovbq %rcx, %rbx
3234
+ movq %rax, %r8
3235
+ subq %rax, %rsi
3236
+ leaq (%rcx,%rdi), %rcx
3237
+ cmovs %rbp, %r8
3238
+ movq %rbx, %rdi
3239
+ testq %rdx, %rcx
3240
+ cmoveq %rbp, %r8
3241
+ cmoveq %rbp, %rdi
3242
+ sarq $1, %rcx
3243
+ xorq %r8, %rdi
3244
+ xorq %r8, %rsi
3245
+ btq $0x3f, %r8
3246
+ cmovbq %rcx, %rbx
3247
+ movq %rax, %r8
3248
+ subq %rax, %rsi
3249
+ leaq (%rcx,%rdi), %rcx
3250
+ cmovs %rbp, %r8
3251
+ movq %rbx, %rdi
3252
+ testq %rdx, %rcx
3253
+ cmoveq %rbp, %r8
3254
+ cmoveq %rbp, %rdi
3255
+ sarq $1, %rcx
3256
+ xorq %r8, %rdi
3257
+ xorq %r8, %rsi
3258
+ btq $0x3f, %r8
3259
+ cmovbq %rcx, %rbx
3260
+ movq %rax, %r8
3261
+ subq %rax, %rsi
3262
+ leaq (%rcx,%rdi), %rcx
3263
+ cmovs %rbp, %r8
3264
+ movq %rbx, %rdi
3265
+ testq %rdx, %rcx
3266
+ cmoveq %rbp, %r8
3267
+ cmoveq %rbp, %rdi
3268
+ sarq $1, %rcx
3269
+ xorq %r8, %rdi
3270
+ xorq %r8, %rsi
3271
+ btq $0x3f, %r8
3272
+ cmovbq %rcx, %rbx
3273
+ movq %rax, %r8
3274
+ subq %rax, %rsi
3275
+ leaq (%rcx,%rdi), %rcx
3276
+ cmovs %rbp, %r8
3277
+ movq %rbx, %rdi
3278
+ testq %rdx, %rcx
3279
+ cmoveq %rbp, %r8
3280
+ cmoveq %rbp, %rdi
3281
+ sarq $1, %rcx
3282
+ xorq %r8, %rdi
3283
+ xorq %r8, %rsi
3284
+ btq $0x3f, %r8
3285
+ cmovbq %rcx, %rbx
3286
+ movq %rax, %r8
3287
+ subq %rax, %rsi
3288
+ leaq (%rcx,%rdi), %rcx
3289
+ cmovs %rbp, %r8
3290
+ movq %rbx, %rdi
3291
+ testq %rdx, %rcx
3292
+ cmoveq %rbp, %r8
3293
+ cmoveq %rbp, %rdi
3294
+ sarq $1, %rcx
3295
+ xorq %r8, %rdi
3296
+ xorq %r8, %rsi
3297
+ btq $0x3f, %r8
3298
+ cmovbq %rcx, %rbx
3299
+ movq %rax, %r8
3300
+ subq %rax, %rsi
3301
+ leaq (%rcx,%rdi), %rcx
3302
+ cmovs %rbp, %r8
3303
+ movq %rbx, %rdi
3304
+ testq %rdx, %rcx
3305
+ cmoveq %rbp, %r8
3306
+ cmoveq %rbp, %rdi
3307
+ sarq $1, %rcx
3308
+ xorq %r8, %rdi
3309
+ xorq %r8, %rsi
3310
+ btq $0x3f, %r8
3311
+ cmovbq %rcx, %rbx
3312
+ movq %rax, %r8
3313
+ subq %rax, %rsi
3314
+ leaq (%rcx,%rdi), %rcx
3315
+ cmovs %rbp, %r8
3316
+ movq %rbx, %rdi
3317
+ testq %rdx, %rcx
3318
+ cmoveq %rbp, %r8
3319
+ cmoveq %rbp, %rdi
3320
+ sarq $1, %rcx
3321
+ xorq %r8, %rdi
3322
+ xorq %r8, %rsi
3323
+ btq $0x3f, %r8
3324
+ cmovbq %rcx, %rbx
3325
+ movq %rax, %r8
3326
+ subq %rax, %rsi
3327
+ leaq (%rcx,%rdi), %rcx
3328
+ cmovs %rbp, %r8
3329
+ movq %rbx, %rdi
3330
+ testq %rdx, %rcx
3331
+ cmoveq %rbp, %r8
3332
+ cmoveq %rbp, %rdi
3333
+ sarq $1, %rcx
3334
+ xorq %r8, %rdi
3335
+ xorq %r8, %rsi
3336
+ btq $0x3f, %r8
3337
+ cmovbq %rcx, %rbx
3338
+ movq %rax, %r8
3339
+ subq %rax, %rsi
3340
+ leaq (%rcx,%rdi), %rcx
3341
+ cmovs %rbp, %r8
3342
+ movq %rbx, %rdi
3343
+ testq %rdx, %rcx
3344
+ cmoveq %rbp, %r8
3345
+ cmoveq %rbp, %rdi
3346
+ sarq $1, %rcx
3347
+ xorq %r8, %rdi
3348
+ xorq %r8, %rsi
3349
+ btq $0x3f, %r8
3350
+ cmovbq %rcx, %rbx
3351
+ movq %rax, %r8
3352
+ subq %rax, %rsi
3353
+ leaq (%rcx,%rdi), %rcx
3354
+ sarq $1, %rcx
3355
+ movl $0x100000, %eax
3356
+ leaq (%rbx,%rax), %r8
3357
+ leaq (%rcx,%rax), %r12
3358
+ shlq $0x15, %r8
3359
+ shlq $0x15, %r12
3360
+ sarq $0x2b, %r8
3361
+ sarq $0x2b, %r12
3362
+ movabsq $0x20000100000, %rax
3363
+ leaq (%rbx,%rax), %r10
3364
+ leaq (%rcx,%rax), %r14
3365
+ sarq $0x2b, %r10
3366
+ sarq $0x2b, %r14
3367
+ movq %r9, %rax
3368
+ imulq %r8, %rax
3369
+ movq %r13, %rdx
3370
+ imulq %r10, %rdx
3371
+ imulq %r15, %r8
3372
+ imulq %r11, %r10
3373
+ addq %r8, %r10
3374
+ leaq (%rax,%rdx), %r8
3375
+ movq %r9, %rax
3376
+ imulq %r12, %rax
3377
+ movq %r13, %rdx
3378
+ imulq %r14, %rdx
3379
+ imulq %r15, %r12
3380
+ imulq %r11, %r14
3381
+ addq %r12, %r14
3382
+ leaq (%rax,%rdx), %r12
3383
+ movq %rsi, 0x98(%rsp)
3384
+ decq 0x90(%rsp)
3385
+ jne edwards25519_scalarmuldouble_alt_inverseloop
3386
+ movq (%rsp), %rax
3387
+ movq 0x20(%rsp), %rcx
3388
+ imulq %r8, %rax
3389
+ imulq %r10, %rcx
3390
+ addq %rcx, %rax
3391
+ sarq $0x3f, %rax
3392
+ movq %r8, %r9
3393
+ sarq $0x3f, %r9
3394
+ xorq %r9, %r8
3395
+ subq %r9, %r8
3396
+ xorq %rax, %r9
3397
+ movq %r10, %r11
3398
+ sarq $0x3f, %r11
3399
+ xorq %r11, %r10
3400
+ subq %r11, %r10
3401
+ xorq %rax, %r11
3402
+ movq %r12, %r13
3403
+ sarq $0x3f, %r13
3404
+ xorq %r13, %r12
3405
+ subq %r13, %r12
3406
+ xorq %rax, %r13
3407
+ movq %r14, %r15
3408
+ sarq $0x3f, %r15
3409
+ xorq %r15, %r14
3410
+ subq %r15, %r14
3411
+ xorq %rax, %r15
3412
+ movq %r8, %rax
3413
+ andq %r9, %rax
3414
+ movq %r10, %r12
3415
+ andq %r11, %r12
3416
+ addq %rax, %r12
3417
+ xorl %r13d, %r13d
3418
+ movq 0x40(%rsp), %rax
3419
+ xorq %r9, %rax
3420
+ mulq %r8
3421
+ addq %rax, %r12
3422
+ adcq %rdx, %r13
3423
+ movq 0x60(%rsp), %rax
3424
+ xorq %r11, %rax
3425
+ mulq %r10
3426
+ addq %rax, %r12
3427
+ adcq %rdx, %r13
3428
+ xorl %r14d, %r14d
3429
+ movq 0x48(%rsp), %rax
3430
+ xorq %r9, %rax
3431
+ mulq %r8
3432
+ addq %rax, %r13
3433
+ adcq %rdx, %r14
3434
+ movq 0x68(%rsp), %rax
3435
+ xorq %r11, %rax
3436
+ mulq %r10
3437
+ addq %rax, %r13
3438
+ adcq %rdx, %r14
3439
+ xorl %r15d, %r15d
3440
+ movq 0x50(%rsp), %rax
3441
+ xorq %r9, %rax
3442
+ mulq %r8
3443
+ addq %rax, %r14
3444
+ adcq %rdx, %r15
3445
+ movq 0x70(%rsp), %rax
3446
+ xorq %r11, %rax
3447
+ mulq %r10
3448
+ addq %rax, %r14
3449
+ adcq %rdx, %r15
3450
+ movq 0x58(%rsp), %rax
3451
+ xorq %r9, %rax
3452
+ andq %r8, %r9
3453
+ negq %r9
3454
+ mulq %r8
3455
+ addq %rax, %r15
3456
+ adcq %rdx, %r9
3457
+ movq 0x78(%rsp), %rax
3458
+ xorq %r11, %rax
3459
+ movq %r11, %rdx
3460
+ andq %r10, %rdx
3461
+ subq %rdx, %r9
3462
+ mulq %r10
3463
+ addq %rax, %r15
3464
+ adcq %rdx, %r9
3465
+ movq %r9, %rax
3466
+ shldq $0x1, %r15, %rax
3467
+ sarq $0x3f, %r9
3468
+ movl $0x13, %ebx
3469
+ leaq 0x1(%rax,%r9,1), %rax
3470
+ imulq %rbx
3471
+ xorl %ebp, %ebp
3472
+ addq %rax, %r12
3473
+ adcq %rdx, %r13
3474
+ adcq %r9, %r14
3475
+ adcq %r9, %r15
3476
+ shlq $0x3f, %rax
3477
+ addq %rax, %r15
3478
+ cmovns %rbp, %rbx
3479
+ subq %rbx, %r12
3480
+ sbbq %rbp, %r13
3481
+ sbbq %rbp, %r14
3482
+ sbbq %rbp, %r15
3483
+ btr $0x3f, %r15
3484
+ movq 0xc0(%rsp), %rdi
3485
+ movq %r12, (%rdi)
3486
+ movq %r13, 0x8(%rdi)
3487
+ movq %r14, 0x10(%rdi)
3488
+ movq %r15, 0x18(%rdi)
3489
+
3490
+ // Store result
3491
+
3492
+ movq res, %rdi
3493
+ leaq ACC(%rsp), %rsi
3494
+ leaq TAB(%rsp), %rbp
3495
+ mul_p25519(x_0,x_1,x_2)
3496
+
3497
+ movq res, %rdi
3498
+ addq $32, %rdi
3499
+ leaq ACC+32(%rsp), %rsi
3500
+ leaq TAB(%rsp), %rbp
3501
+ mul_p25519(x_0,x_1,x_2)
3502
+
3503
+ // Restore stack and registers
3504
+
3505
+ addq $NSPACE, %rsp
3506
+
3507
+ popq %r15
3508
+ popq %r14
3509
+ popq %r13
3510
+ popq %r12
3511
+ popq %rbp
3512
+ popq %rbx
3513
+ ret
3514
+
3515
+ // ****************************************************************************
3516
+ // Localized versions of subroutines.
3517
+ // These are close to the standalone functions "edwards25519_epdouble" etc.,
3518
+ // but are only maintaining reduction modulo 2^256 - 38, not 2^255 - 19.
3519
+ // ****************************************************************************
3520
+
3521
+ edwards25519_scalarmuldouble_alt_epdouble:
3522
+ sub $(5*NUMSIZE), %rsp
3523
+ add_twice4(t0,x_1,y_1)
3524
+ sqr_4(t1,z_1)
3525
+ sqr_4(t2,x_1)
3526
+ sqr_4(t3,y_1)
3527
+ double_twice4(t1,t1)
3528
+ sqr_4(t0,t0)
3529
+ add_twice4(t4,t2,t3)
3530
+ sub_twice4(t2,t2,t3)
3531
+ add_twice4(t3,t1,t2)
3532
+ sub_twice4(t1,t4,t0)
3533
+ mul_4(y_0,t2,t4)
3534
+ mul_4(z_0,t3,t2)
3535
+ mul_4(w_0,t1,t4)
3536
+ mul_4(x_0,t1,t3)
3537
+ add $(5*NUMSIZE), %rsp
3538
+ ret
3539
+
3540
+ edwards25519_scalarmuldouble_alt_pdouble:
3541
+ sub $(5*NUMSIZE), %rsp
3542
+ add_twice4(t0,x_1,y_1)
3543
+ sqr_4(t1,z_1)
3544
+ sqr_4(t2,x_1)
3545
+ sqr_4(t3,y_1)
3546
+ double_twice4(t1,t1)
3547
+ sqr_4(t0,t0)
3548
+ add_twice4(t4,t2,t3)
3549
+ sub_twice4(t2,t2,t3)
3550
+ add_twice4(t3,t1,t2)
3551
+ sub_twice4(t1,t4,t0)
3552
+ mul_4(y_0,t2,t4)
3553
+ mul_4(z_0,t3,t2)
3554
+ mul_4(x_0,t1,t3)
3555
+ add $(5*NUMSIZE), %rsp
3556
+ ret
3557
+
3558
+ edwards25519_scalarmuldouble_alt_epadd:
3559
+ sub $(6*NUMSIZE), %rsp
3560
+ mul_4(t0,w_1,w_2)
3561
+ sub_twice4(t1,y_1,x_1)
3562
+ sub_twice4(t2,y_2,x_2)
3563
+ add_twice4(t3,y_1,x_1)
3564
+ add_twice4(t4,y_2,x_2)
3565
+ double_twice4(t5,z_2)
3566
+ mul_4(t1,t1,t2)
3567
+ mul_4(t3,t3,t4)
3568
+ load_k25519(t2)
3569
+ mul_4(t2,t2,t0)
3570
+ mul_4(t4,z_1,t5)
3571
+ sub_twice4(t0,t3,t1)
3572
+ add_twice4(t5,t3,t1)
3573
+ sub_twice4(t1,t4,t2)
3574
+ add_twice4(t3,t4,t2)
3575
+ mul_4(w_0,t0,t5)
3576
+ mul_4(x_0,t0,t1)
3577
+ mul_4(y_0,t3,t5)
3578
+ mul_4(z_0,t1,t3)
3579
+ add $(6*NUMSIZE), %rsp
3580
+ ret
3581
+
3582
+ edwards25519_scalarmuldouble_alt_pepadd:
3583
+ sub $(6*NUMSIZE), %rsp
3584
+ double_twice4(t0,z_1);
3585
+ sub_twice4(t1,y_1,x_1);
3586
+ add_twice4(t2,y_1,x_1);
3587
+ mul_4(t3,w_1,z_2);
3588
+ mul_4(t1,t1,x_2);
3589
+ mul_4(t2,t2,y_2);
3590
+ sub_twice4(t4,t0,t3);
3591
+ add_twice4(t0,t0,t3);
3592
+ sub_twice4(t5,t2,t1);
3593
+ add_twice4(t1,t2,t1);
3594
+ mul_4(z_0,t4,t0);
3595
+ mul_4(x_0,t5,t4);
3596
+ mul_4(y_0,t0,t1);
3597
+ mul_4(w_0,t5,t1);
3598
+ add $(6*NUMSIZE), %rsp
3599
+ ret
3600
+
3601
+ // ****************************************************************************
3602
+ // The precomputed data (all read-only). This is currently part of the same
3603
+ // text section, which gives position-independent code with simple PC-relative
3604
+ // addressing. However it could be put in a separate section via something like
3605
+ //
3606
+ // .section .rodata
3607
+ // ****************************************************************************
3608
+
3609
+ // Precomputed table of multiples of generator for edwards25519
3610
+ // all in precomputed extended-projective (y-x,x+y,2*d*x*y) triples.
3611
+
3612
+ edwards25519_scalarmuldouble_alt_table:
3613
+
3614
+ // 1 * G
3615
+
3616
+ .quad 0x9d103905d740913e
3617
+ .quad 0xfd399f05d140beb3
3618
+ .quad 0xa5c18434688f8a09
3619
+ .quad 0x44fd2f9298f81267
3620
+ .quad 0x2fbc93c6f58c3b85
3621
+ .quad 0xcf932dc6fb8c0e19
3622
+ .quad 0x270b4898643d42c2
3623
+ .quad 0x07cf9d3a33d4ba65
3624
+ .quad 0xabc91205877aaa68
3625
+ .quad 0x26d9e823ccaac49e
3626
+ .quad 0x5a1b7dcbdd43598c
3627
+ .quad 0x6f117b689f0c65a8
3628
+
3629
+ // 2 * G
3630
+
3631
+ .quad 0x8a99a56042b4d5a8
3632
+ .quad 0x8f2b810c4e60acf6
3633
+ .quad 0xe09e236bb16e37aa
3634
+ .quad 0x6bb595a669c92555
3635
+ .quad 0x9224e7fc933c71d7
3636
+ .quad 0x9f469d967a0ff5b5
3637
+ .quad 0x5aa69a65e1d60702
3638
+ .quad 0x590c063fa87d2e2e
3639
+ .quad 0x43faa8b3a59b7a5f
3640
+ .quad 0x36c16bdd5d9acf78
3641
+ .quad 0x500fa0840b3d6a31
3642
+ .quad 0x701af5b13ea50b73
3643
+
3644
+ // 3 * G
3645
+
3646
+ .quad 0x56611fe8a4fcd265
3647
+ .quad 0x3bd353fde5c1ba7d
3648
+ .quad 0x8131f31a214bd6bd
3649
+ .quad 0x2ab91587555bda62
3650
+ .quad 0xaf25b0a84cee9730
3651
+ .quad 0x025a8430e8864b8a
3652
+ .quad 0xc11b50029f016732
3653
+ .quad 0x7a164e1b9a80f8f4
3654
+ .quad 0x14ae933f0dd0d889
3655
+ .quad 0x589423221c35da62
3656
+ .quad 0xd170e5458cf2db4c
3657
+ .quad 0x5a2826af12b9b4c6
3658
+
3659
+ // 4 * G
3660
+
3661
+ .quad 0x95fe050a056818bf
3662
+ .quad 0x327e89715660faa9
3663
+ .quad 0xc3e8e3cd06a05073
3664
+ .quad 0x27933f4c7445a49a
3665
+ .quad 0x287351b98efc099f
3666
+ .quad 0x6765c6f47dfd2538
3667
+ .quad 0xca348d3dfb0a9265
3668
+ .quad 0x680e910321e58727
3669
+ .quad 0x5a13fbe9c476ff09
3670
+ .quad 0x6e9e39457b5cc172
3671
+ .quad 0x5ddbdcf9102b4494
3672
+ .quad 0x7f9d0cbf63553e2b
3673
+
3674
+ // 5 * G
3675
+
3676
+ .quad 0x7f9182c3a447d6ba
3677
+ .quad 0xd50014d14b2729b7
3678
+ .quad 0xe33cf11cb864a087
3679
+ .quad 0x154a7e73eb1b55f3
3680
+ .quad 0xa212bc4408a5bb33
3681
+ .quad 0x8d5048c3c75eed02
3682
+ .quad 0xdd1beb0c5abfec44
3683
+ .quad 0x2945ccf146e206eb
3684
+ .quad 0xbcbbdbf1812a8285
3685
+ .quad 0x270e0807d0bdd1fc
3686
+ .quad 0xb41b670b1bbda72d
3687
+ .quad 0x43aabe696b3bb69a
3688
+
3689
+ // 6 * G
3690
+
3691
+ .quad 0x499806b67b7d8ca4
3692
+ .quad 0x575be28427d22739
3693
+ .quad 0xbb085ce7204553b9
3694
+ .quad 0x38b64c41ae417884
3695
+ .quad 0x3a0ceeeb77157131
3696
+ .quad 0x9b27158900c8af88
3697
+ .quad 0x8065b668da59a736
3698
+ .quad 0x51e57bb6a2cc38bd
3699
+ .quad 0x85ac326702ea4b71
3700
+ .quad 0xbe70e00341a1bb01
3701
+ .quad 0x53e4a24b083bc144
3702
+ .quad 0x10b8e91a9f0d61e3
3703
+
3704
+ // 7 * G
3705
+
3706
+ .quad 0xba6f2c9aaa3221b1
3707
+ .quad 0x6ca021533bba23a7
3708
+ .quad 0x9dea764f92192c3a
3709
+ .quad 0x1d6edd5d2e5317e0
3710
+ .quad 0x6b1a5cd0944ea3bf
3711
+ .quad 0x7470353ab39dc0d2
3712
+ .quad 0x71b2528228542e49
3713
+ .quad 0x461bea69283c927e
3714
+ .quad 0xf1836dc801b8b3a2
3715
+ .quad 0xb3035f47053ea49a
3716
+ .quad 0x529c41ba5877adf3
3717
+ .quad 0x7a9fbb1c6a0f90a7
3718
+
3719
+ // 8 * G
3720
+
3721
+ .quad 0xe2a75dedf39234d9
3722
+ .quad 0x963d7680e1b558f9
3723
+ .quad 0x2c2741ac6e3c23fb
3724
+ .quad 0x3a9024a1320e01c3
3725
+ .quad 0x59b7596604dd3e8f
3726
+ .quad 0x6cb30377e288702c
3727
+ .quad 0xb1339c665ed9c323
3728
+ .quad 0x0915e76061bce52f
3729
+ .quad 0xe7c1f5d9c9a2911a
3730
+ .quad 0xb8a371788bcca7d7
3731
+ .quad 0x636412190eb62a32
3732
+ .quad 0x26907c5c2ecc4e95
3733
+
3734
+ #if defined(__linux__) && defined(__ELF__)
3735
+ .section .note.GNU-stack, "", %progbits
3736
+ #endif