rbnacl-libsodium 1.0.8 → 1.0.9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (204) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +23 -0
  3. data/CHANGES.md +5 -0
  4. data/Gemfile +5 -2
  5. data/Rakefile +5 -0
  6. data/ext/rbnacl/extconf.rb +2 -1
  7. data/lib/rbnacl/libsodium.rb +8 -2
  8. data/lib/rbnacl/libsodium/version.rb +1 -1
  9. data/vendor/libsodium/AUTHORS +14 -0
  10. data/vendor/libsodium/ChangeLog +26 -0
  11. data/vendor/libsodium/LICENSE +1 -1
  12. data/vendor/libsodium/Makefile.am +1 -0
  13. data/vendor/libsodium/Makefile.in +9 -0
  14. data/vendor/libsodium/README.markdown +7 -0
  15. data/vendor/libsodium/aclocal.m4 +1 -0
  16. data/vendor/libsodium/appveyor.yml +25 -0
  17. data/vendor/libsodium/autom4te.cache/output.1 +640 -126
  18. data/vendor/libsodium/autom4te.cache/output.6 +19049 -0
  19. data/vendor/libsodium/autom4te.cache/requests +1151 -914
  20. data/vendor/libsodium/autom4te.cache/traces.1 +472 -426
  21. data/vendor/libsodium/autom4te.cache/traces.6 +3193 -0
  22. data/vendor/libsodium/builds/msvc/version.h +2 -2
  23. data/vendor/libsodium/builds/msvc/vs2010/libsodium.sln +50 -79
  24. data/vendor/libsodium/builds/msvc/vs2010/libsodium/libsodium.vcxproj +20 -8
  25. data/vendor/libsodium/builds/msvc/vs2010/libsodium/libsodium.vcxproj.filters +208 -166
  26. data/vendor/libsodium/builds/msvc/vs2012/libsodium.sln +50 -79
  27. data/vendor/libsodium/builds/msvc/vs2012/libsodium/libsodium.vcxproj +20 -8
  28. data/vendor/libsodium/builds/msvc/vs2012/libsodium/libsodium.vcxproj.filters +206 -164
  29. data/vendor/libsodium/builds/msvc/vs2013/libsodium.sln +52 -81
  30. data/vendor/libsodium/builds/msvc/vs2013/libsodium/libsodium.vcxproj +20 -8
  31. data/vendor/libsodium/builds/msvc/vs2013/libsodium/libsodium.vcxproj.filters +206 -164
  32. data/vendor/libsodium/builds/msvc/vs2015/libsodium.sln +52 -81
  33. data/vendor/libsodium/builds/msvc/vs2015/libsodium/libsodium.vcxproj +20 -8
  34. data/vendor/libsodium/builds/msvc/vs2015/libsodium/libsodium.vcxproj.filters +206 -164
  35. data/vendor/libsodium/configure +639 -125
  36. data/vendor/libsodium/configure.ac +94 -16
  37. data/vendor/libsodium/dist-build/Makefile.in +9 -0
  38. data/vendor/libsodium/dist-build/emscripten-symbols.def +370 -0
  39. data/vendor/libsodium/dist-build/emscripten.sh +9 -3
  40. data/vendor/libsodium/dist-build/generate-emscripten-symbols.sh +43 -0
  41. data/vendor/libsodium/libsodium-uninstalled.pc.in +1 -1
  42. data/vendor/libsodium/libsodium.pc.in +1 -1
  43. data/vendor/libsodium/libsodium.vcxproj +70 -66
  44. data/vendor/libsodium/libsodium.vcxproj.filters +204 -192
  45. data/vendor/libsodium/m4/ax_valgrind_check.m4 +190 -0
  46. data/vendor/libsodium/msvc-scripts/Makefile.in +9 -0
  47. data/vendor/libsodium/msvc-scripts/process.bat +2 -2
  48. data/vendor/libsodium/src/Makefile.in +9 -0
  49. data/vendor/libsodium/src/libsodium/Makefile.am +31 -6
  50. data/vendor/libsodium/src/libsodium/Makefile.in +238 -42
  51. data/vendor/libsodium/src/libsodium/crypto_aead/aes256gcm/aesni/aead_aes256gcm_aesni.c +234 -38
  52. data/vendor/libsodium/src/libsodium/crypto_aead/chacha20poly1305/sodium/aead_chacha20poly1305.c +208 -118
  53. data/vendor/libsodium/src/libsodium/crypto_box/crypto_box_seal.c +2 -2
  54. data/vendor/libsodium/src/libsodium/crypto_box/curve25519xsalsa20poly1305/ref/before_curve25519xsalsa20poly1305.c +1 -4
  55. data/vendor/libsodium/src/libsodium/crypto_core/curve25519/ref10/curve25519_ref10.c +1799 -1790
  56. data/vendor/libsodium/src/libsodium/crypto_core/curve25519/ref10/curve25519_ref10.h +39 -39
  57. data/vendor/libsodium/src/libsodium/crypto_core/hchacha20/core_hchacha20.c +86 -0
  58. data/vendor/libsodium/src/libsodium/crypto_core/hchacha20/core_hchacha20.h +28 -0
  59. data/vendor/libsodium/src/libsodium/crypto_core/hsalsa20/ref2/core_hsalsa20.c +38 -46
  60. data/vendor/libsodium/src/libsodium/crypto_core/salsa20/ref/core_salsa20.c +47 -55
  61. data/vendor/libsodium/src/libsodium/crypto_core/salsa2012/ref/core_salsa2012.c +47 -55
  62. data/vendor/libsodium/src/libsodium/crypto_core/salsa208/ref/core_salsa208.c +47 -55
  63. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/generichash_blake2_api.c +7 -0
  64. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2-impl.h +0 -89
  65. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2.h +50 -141
  66. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-compress-avx2.c +45 -0
  67. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-compress-avx2.h +123 -0
  68. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-compress-ref.c +3 -2
  69. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-compress-sse41.c +2 -2
  70. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/{blake2b-round.h → blake2b-compress-sse41.h} +2 -28
  71. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-compress-ssse3.c +2 -4
  72. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-compress-ssse3.h +97 -0
  73. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-load-avx2.h +339 -0
  74. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-load-sse2.h +0 -2
  75. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-load-sse41.h +0 -2
  76. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-ref.c +29 -18
  77. data/vendor/libsodium/src/libsodium/crypto_hash/sha256/cp/hash_sha256.c +4 -43
  78. data/vendor/libsodium/src/libsodium/crypto_hash/sha512/cp/hash_sha512.c +3 -32
  79. data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna.h +1 -20
  80. data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna32.h +22 -41
  81. data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna64.h +12 -39
  82. data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/sse2/poly1305_sse2.c +2 -4
  83. data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/sse2/poly1305_sse2.h +1 -20
  84. data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2-core.c +570 -0
  85. data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2-core.h +198 -0
  86. data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2-encoding.c +444 -0
  87. data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2-encoding.h +32 -0
  88. data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2-fill-block-ref.c +229 -0
  89. data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2-fill-block-ssse3.c +222 -0
  90. data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2-impl.h +40 -0
  91. data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2.c +238 -0
  92. data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2.h +251 -0
  93. data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/blake2b-long.c +80 -0
  94. data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/blake2b-long.h +8 -0
  95. data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/blamka-round-ref.h +38 -0
  96. data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/blamka-round-ssse3.h +117 -0
  97. data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/pwhash_argon2i.c +164 -0
  98. data/vendor/libsodium/src/libsodium/crypto_pwhash/crypto_pwhash.c +106 -0
  99. data/vendor/libsodium/src/libsodium/crypto_pwhash/scryptsalsa208sha256/crypto_scrypt-common.c +1 -1
  100. data/vendor/libsodium/src/libsodium/crypto_pwhash/scryptsalsa208sha256/crypto_scrypt.h +4 -4
  101. data/vendor/libsodium/src/libsodium/crypto_pwhash/scryptsalsa208sha256/nosse/pwhash_scryptsalsa208sha256_nosse.c +186 -186
  102. data/vendor/libsodium/src/libsodium/crypto_pwhash/scryptsalsa208sha256/pbkdf2-sha256.c +2 -2
  103. data/vendor/libsodium/src/libsodium/crypto_pwhash/scryptsalsa208sha256/pwhash_scryptsalsa208sha256.c +3 -2
  104. data/vendor/libsodium/src/libsodium/crypto_pwhash/scryptsalsa208sha256/scrypt_platform.c +33 -33
  105. data/vendor/libsodium/src/libsodium/crypto_pwhash/scryptsalsa208sha256/sse/pwhash_scryptsalsa208sha256_sse.c +253 -254
  106. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/donna_c64/curve25519_donna_c64.c +16 -17
  107. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/donna_c64/curve25519_donna_c64.h +1 -0
  108. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/ref10/x25519_ref10.c +11 -11
  109. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/ref10/x25519_ref10.h +1 -0
  110. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/consts_namespace.h +1 -1
  111. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/fe.h +3 -2
  112. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/fe51.h +5 -3
  113. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/fe51_invert.c +41 -41
  114. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/fe51_mul.S +10 -2
  115. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/fe51_namespace.h +1 -1
  116. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/fe51_nsquare.S +4 -0
  117. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/fe51_pack.S +4 -0
  118. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/fe_frombytes_sandy2x.c +31 -32
  119. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/ladder.S +4 -0
  120. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/ladder.h +1 -1
  121. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/ladder_base.S +4 -0
  122. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/ladder_base.h +1 -1
  123. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/ladder_base_namespace.h +1 -1
  124. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/ladder_namespace.h +1 -1
  125. data/vendor/libsodium/src/libsodium/crypto_secretbox/crypto_secretbox_easy.c +2 -6
  126. data/vendor/libsodium/src/libsodium/crypto_shorthash/siphash24/ref/shorthash_siphash24.c +8 -28
  127. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/open.c +75 -0
  128. data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/portable/afternm_aes128ctr.c +6 -6
  129. data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/portable/common.h +1 -18
  130. data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/portable/int128_aes128ctr.c +20 -20
  131. data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/portable/types.h +4 -4
  132. data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/portable/xor_afternm_aes128ctr.c +6 -6
  133. data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/ref/stream_chacha20_ref.c +56 -77
  134. data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/ref/stream_chacha20_ref.h +1 -0
  135. data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/vec/stream_chacha20_vec.h +1 -0
  136. data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/ref/stream_salsa20_ref.c +2 -8
  137. data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/ref/xor_salsa20_ref.c +2 -8
  138. data/vendor/libsodium/src/libsodium/crypto_stream/salsa2012/ref/stream_salsa2012.c +2 -8
  139. data/vendor/libsodium/src/libsodium/crypto_stream/salsa2012/ref/xor_salsa2012.c +2 -8
  140. data/vendor/libsodium/src/libsodium/crypto_stream/salsa208/ref/stream_salsa208.c +2 -8
  141. data/vendor/libsodium/src/libsodium/crypto_stream/salsa208/ref/xor_salsa208.c +2 -8
  142. data/vendor/libsodium/src/libsodium/crypto_stream/xsalsa20/ref/stream_xsalsa20.c +1 -5
  143. data/vendor/libsodium/src/libsodium/crypto_stream/xsalsa20/ref/xor_xsalsa20.c +1 -5
  144. data/vendor/libsodium/src/libsodium/include/Makefile.am +3 -0
  145. data/vendor/libsodium/src/libsodium/include/Makefile.in +19 -8
  146. data/vendor/libsodium/src/libsodium/include/sodium.h +3 -0
  147. data/vendor/libsodium/src/libsodium/include/sodium/crypto_aead_aes256gcm.h +50 -0
  148. data/vendor/libsodium/src/libsodium/include/sodium/crypto_aead_chacha20poly1305.h +94 -22
  149. data/vendor/libsodium/src/libsodium/include/sodium/crypto_box_curve25519xsalsa20poly1305.h +6 -6
  150. data/vendor/libsodium/src/libsodium/include/sodium/crypto_core_hchacha20.h +35 -0
  151. data/vendor/libsodium/src/libsodium/include/sodium/crypto_generichash_blake2b.h +3 -0
  152. data/vendor/libsodium/src/libsodium/include/sodium/crypto_pwhash.h +89 -0
  153. data/vendor/libsodium/src/libsodium/include/sodium/crypto_pwhash_argon2i.h +86 -0
  154. data/vendor/libsodium/src/libsodium/include/sodium/crypto_secretbox_xsalsa20poly1305.h +6 -6
  155. data/vendor/libsodium/src/libsodium/include/sodium/crypto_sign_edwards25519sha512batch.h +0 -11
  156. data/vendor/libsodium/src/libsodium/include/sodium/runtime.h +3 -0
  157. data/vendor/libsodium/src/libsodium/randombytes/randombytes.c +3 -0
  158. data/vendor/libsodium/src/libsodium/randombytes/salsa20/randombytes_salsa20_random.c +5 -1
  159. data/vendor/libsodium/src/libsodium/sodium/common.h +150 -0
  160. data/vendor/libsodium/src/libsodium/sodium/core.c +3 -1
  161. data/vendor/libsodium/src/libsodium/sodium/runtime.c +37 -19
  162. data/vendor/libsodium/src/libsodium/sodium/utils.c +18 -9
  163. data/vendor/libsodium/test/Makefile.in +9 -0
  164. data/vendor/libsodium/test/default/Makefile.am +10 -0
  165. data/vendor/libsodium/test/default/Makefile.in +53 -20
  166. data/vendor/libsodium/test/default/aead_aes256gcm.c +43 -17
  167. data/vendor/libsodium/test/default/aead_chacha20poly1305.c +179 -86
  168. data/vendor/libsodium/test/default/auth7.c +5 -5
  169. data/vendor/libsodium/test/default/box.c +4 -4
  170. data/vendor/libsodium/test/default/box2.c +1 -1
  171. data/vendor/libsodium/test/default/core6.c +1 -1
  172. data/vendor/libsodium/test/default/generichash.c +12 -1
  173. data/vendor/libsodium/test/default/generichash2.c +2 -2
  174. data/vendor/libsodium/test/default/generichash3.c +21 -0
  175. data/vendor/libsodium/test/default/pwhash.c +186 -168
  176. data/vendor/libsodium/test/default/pwhash.exp +11 -30
  177. data/vendor/libsodium/test/default/pwhash_scrypt.c +349 -0
  178. data/vendor/libsodium/test/default/pwhash_scrypt.exp +31 -0
  179. data/vendor/libsodium/test/default/secretbox.c +1 -1
  180. data/vendor/libsodium/test/default/secretbox2.c +1 -1
  181. data/vendor/libsodium/test/default/sign.c +15 -0
  182. data/vendor/libsodium/test/default/sodium_utils2.c +8 -3
  183. data/vendor/libsodium/test/default/sodium_utils3.c +4 -2
  184. data/vendor/libsodium/test/default/verify1.c +0 -4
  185. data/vendor/libsodium/test/quirks/quirks.h +3 -0
  186. metadata +37 -22
  187. data/vendor/libsodium/builds/msvc/vs2010/test/test.props +0 -43
  188. data/vendor/libsodium/builds/msvc/vs2010/test/test.runner.bat +0 -78
  189. data/vendor/libsodium/builds/msvc/vs2010/test/test.vcxproj +0 -244
  190. data/vendor/libsodium/builds/msvc/vs2010/test/test.vcxproj.filters +0 -192
  191. data/vendor/libsodium/builds/msvc/vs2012/test/test.props +0 -43
  192. data/vendor/libsodium/builds/msvc/vs2012/test/test.runner.bat +0 -78
  193. data/vendor/libsodium/builds/msvc/vs2012/test/test.vcxproj +0 -244
  194. data/vendor/libsodium/builds/msvc/vs2012/test/test.vcxproj.filters +0 -192
  195. data/vendor/libsodium/builds/msvc/vs2013/test/test.props +0 -43
  196. data/vendor/libsodium/builds/msvc/vs2013/test/test.runner.bat +0 -78
  197. data/vendor/libsodium/builds/msvc/vs2013/test/test.vcxproj +0 -244
  198. data/vendor/libsodium/builds/msvc/vs2013/test/test.vcxproj.filters +0 -192
  199. data/vendor/libsodium/builds/msvc/vs2015/test/test.props +0 -43
  200. data/vendor/libsodium/builds/msvc/vs2015/test/test.runner.bat +0 -78
  201. data/vendor/libsodium/builds/msvc/vs2015/test/test.vcxproj +0 -244
  202. data/vendor/libsodium/builds/msvc/vs2015/test/test.vcxproj.filters +0 -192
  203. data/vendor/libsodium/src/libsodium/crypto_pwhash/scryptsalsa208sha256/sysendian.h +0 -146
  204. data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/portable/common_aes128ctr.c +0 -64
@@ -35,9 +35,9 @@ crypto_box_seal(unsigned char *c, const unsigned char *m,
35
35
  _crypto_box_seal_nonce(nonce, epk, pk);
36
36
  ret = crypto_box_easy(c + crypto_box_PUBLICKEYBYTES, m, mlen,
37
37
  nonce, pk, esk);
38
- sodium_memzero(nonce, sizeof nonce);
39
- sodium_memzero(epk, sizeof epk);
40
38
  sodium_memzero(esk, sizeof esk);
39
+ sodium_memzero(epk, sizeof epk);
40
+ sodium_memzero(nonce, sizeof nonce);
41
41
 
42
42
  return ret;
43
43
  }
@@ -2,9 +2,6 @@
2
2
  #include "crypto_core_hsalsa20.h"
3
3
  #include "crypto_scalarmult_curve25519.h"
4
4
 
5
- static const unsigned char sigma[16] = {
6
- 'e', 'x', 'p', 'a', 'n', 'd', ' ', '3', '2', '-', 'b', 'y', 't', 'e', ' ', 'k'
7
- };
8
5
  static const unsigned char n[16] = {0};
9
6
 
10
7
  int crypto_box_curve25519xsalsa20poly1305_beforenm(
@@ -17,5 +14,5 @@ int crypto_box_curve25519xsalsa20poly1305_beforenm(
17
14
  if (crypto_scalarmult_curve25519(s,sk,pk) != 0) {
18
15
  return -1;
19
16
  }
20
- return crypto_core_hsalsa20(k,n,s,sigma);
17
+ return crypto_core_hsalsa20(k,n,s,NULL);
21
18
  }
@@ -6,988 +6,992 @@
6
6
 
7
7
  static uint64_t load_3(const unsigned char *in)
8
8
  {
9
- uint64_t result;
10
- result = (uint64_t) in[0];
11
- result |= ((uint64_t) in[1]) << 8;
12
- result |= ((uint64_t) in[2]) << 16;
13
- return result;
9
+ uint64_t result;
10
+ result = (uint64_t) in[0];
11
+ result |= ((uint64_t) in[1]) << 8;
12
+ result |= ((uint64_t) in[2]) << 16;
13
+
14
+ return result;
14
15
  }
15
16
 
16
17
  static uint64_t load_4(const unsigned char *in)
17
18
  {
18
- uint64_t result;
19
- result = (uint64_t) in[0];
20
- result |= ((uint64_t) in[1]) << 8;
21
- result |= ((uint64_t) in[2]) << 16;
22
- result |= ((uint64_t) in[3]) << 24;
23
- return result;
19
+ uint64_t result;
20
+ result = (uint64_t) in[0];
21
+ result |= ((uint64_t) in[1]) << 8;
22
+ result |= ((uint64_t) in[2]) << 16;
23
+ result |= ((uint64_t) in[3]) << 24;
24
+
25
+ return result;
24
26
  }
25
27
 
26
28
  /*
27
- h = 0
28
- */
29
+ h = 0
30
+ */
29
31
 
30
32
  void fe_0(fe h)
31
33
  {
32
- memset(&h[0], 0, 10 * sizeof h[0]);
34
+ memset(&h[0], 0, 10 * sizeof h[0]);
33
35
  }
34
36
 
35
37
  /*
36
- h = 1
37
- */
38
+ h = 1
39
+ */
38
40
 
39
41
  void fe_1(fe h)
40
42
  {
41
- h[0] = 1;
42
- h[1] = 0;
43
- memset(&h[2], 0, 8 * sizeof h[0]);
43
+ h[0] = 1;
44
+ h[1] = 0;
45
+ memset(&h[2], 0, 8 * sizeof h[0]);
44
46
  }
45
47
 
46
48
  /*
47
- h = f + g
48
- Can overlap h with f or g.
49
-
50
- Preconditions:
51
- |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
52
- |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
53
-
54
- Postconditions:
55
- |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
56
- */
49
+ h = f + g
50
+ Can overlap h with f or g.
51
+ *
52
+ Preconditions:
53
+ |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
54
+ |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
55
+ *
56
+ Postconditions:
57
+ |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
58
+ */
57
59
 
58
60
  void fe_add(fe h,const fe f,const fe g)
59
61
  {
60
- int32_t f0 = f[0];
61
- int32_t f1 = f[1];
62
- int32_t f2 = f[2];
63
- int32_t f3 = f[3];
64
- int32_t f4 = f[4];
65
- int32_t f5 = f[5];
66
- int32_t f6 = f[6];
67
- int32_t f7 = f[7];
68
- int32_t f8 = f[8];
69
- int32_t f9 = f[9];
70
- int32_t g0 = g[0];
71
- int32_t g1 = g[1];
72
- int32_t g2 = g[2];
73
- int32_t g3 = g[3];
74
- int32_t g4 = g[4];
75
- int32_t g5 = g[5];
76
- int32_t g6 = g[6];
77
- int32_t g7 = g[7];
78
- int32_t g8 = g[8];
79
- int32_t g9 = g[9];
80
- int32_t h0 = f0 + g0;
81
- int32_t h1 = f1 + g1;
82
- int32_t h2 = f2 + g2;
83
- int32_t h3 = f3 + g3;
84
- int32_t h4 = f4 + g4;
85
- int32_t h5 = f5 + g5;
86
- int32_t h6 = f6 + g6;
87
- int32_t h7 = f7 + g7;
88
- int32_t h8 = f8 + g8;
89
- int32_t h9 = f9 + g9;
90
- h[0] = h0;
91
- h[1] = h1;
92
- h[2] = h2;
93
- h[3] = h3;
94
- h[4] = h4;
95
- h[5] = h5;
96
- h[6] = h6;
97
- h[7] = h7;
98
- h[8] = h8;
99
- h[9] = h9;
62
+ int32_t f0 = f[0];
63
+ int32_t f1 = f[1];
64
+ int32_t f2 = f[2];
65
+ int32_t f3 = f[3];
66
+ int32_t f4 = f[4];
67
+ int32_t f5 = f[5];
68
+ int32_t f6 = f[6];
69
+ int32_t f7 = f[7];
70
+ int32_t f8 = f[8];
71
+ int32_t f9 = f[9];
72
+ int32_t g0 = g[0];
73
+ int32_t g1 = g[1];
74
+ int32_t g2 = g[2];
75
+ int32_t g3 = g[3];
76
+ int32_t g4 = g[4];
77
+ int32_t g5 = g[5];
78
+ int32_t g6 = g[6];
79
+ int32_t g7 = g[7];
80
+ int32_t g8 = g[8];
81
+ int32_t g9 = g[9];
82
+ int32_t h0 = f0 + g0;
83
+ int32_t h1 = f1 + g1;
84
+ int32_t h2 = f2 + g2;
85
+ int32_t h3 = f3 + g3;
86
+ int32_t h4 = f4 + g4;
87
+ int32_t h5 = f5 + g5;
88
+ int32_t h6 = f6 + g6;
89
+ int32_t h7 = f7 + g7;
90
+ int32_t h8 = f8 + g8;
91
+ int32_t h9 = f9 + g9;
92
+ h[0] = h0;
93
+ h[1] = h1;
94
+ h[2] = h2;
95
+ h[3] = h3;
96
+ h[4] = h4;
97
+ h[5] = h5;
98
+ h[6] = h6;
99
+ h[7] = h7;
100
+ h[8] = h8;
101
+ h[9] = h9;
100
102
  }
101
103
 
102
104
  /*
103
- Replace (f,g) with (g,g) if b == 1;
104
- replace (f,g) with (f,g) if b == 0.
105
-
106
- Preconditions: b in {0,1}.
107
- */
105
+ Replace (f,g) with (g,g) if b == 1;
106
+ replace (f,g) with (f,g) if b == 0.
107
+ *
108
+ Preconditions: b in {0,1}.
109
+ */
108
110
 
109
111
  void fe_cmov(fe f,const fe g,unsigned int b)
110
112
  {
111
- int32_t f0 = f[0];
112
- int32_t f1 = f[1];
113
- int32_t f2 = f[2];
114
- int32_t f3 = f[3];
115
- int32_t f4 = f[4];
116
- int32_t f5 = f[5];
117
- int32_t f6 = f[6];
118
- int32_t f7 = f[7];
119
- int32_t f8 = f[8];
120
- int32_t f9 = f[9];
121
- int32_t g0 = g[0];
122
- int32_t g1 = g[1];
123
- int32_t g2 = g[2];
124
- int32_t g3 = g[3];
125
- int32_t g4 = g[4];
126
- int32_t g5 = g[5];
127
- int32_t g6 = g[6];
128
- int32_t g7 = g[7];
129
- int32_t g8 = g[8];
130
- int32_t g9 = g[9];
131
- int32_t x0 = f0 ^ g0;
132
- int32_t x1 = f1 ^ g1;
133
- int32_t x2 = f2 ^ g2;
134
- int32_t x3 = f3 ^ g3;
135
- int32_t x4 = f4 ^ g4;
136
- int32_t x5 = f5 ^ g5;
137
- int32_t x6 = f6 ^ g6;
138
- int32_t x7 = f7 ^ g7;
139
- int32_t x8 = f8 ^ g8;
140
- int32_t x9 = f9 ^ g9;
141
- b = (unsigned int) (- (int) b);
142
- x0 &= b;
143
- x1 &= b;
144
- x2 &= b;
145
- x3 &= b;
146
- x4 &= b;
147
- x5 &= b;
148
- x6 &= b;
149
- x7 &= b;
150
- x8 &= b;
151
- x9 &= b;
152
- f[0] = f0 ^ x0;
153
- f[1] = f1 ^ x1;
154
- f[2] = f2 ^ x2;
155
- f[3] = f3 ^ x3;
156
- f[4] = f4 ^ x4;
157
- f[5] = f5 ^ x5;
158
- f[6] = f6 ^ x6;
159
- f[7] = f7 ^ x7;
160
- f[8] = f8 ^ x8;
161
- f[9] = f9 ^ x9;
113
+ int32_t f0 = f[0];
114
+ int32_t f1 = f[1];
115
+ int32_t f2 = f[2];
116
+ int32_t f3 = f[3];
117
+ int32_t f4 = f[4];
118
+ int32_t f5 = f[5];
119
+ int32_t f6 = f[6];
120
+ int32_t f7 = f[7];
121
+ int32_t f8 = f[8];
122
+ int32_t f9 = f[9];
123
+ int32_t g0 = g[0];
124
+ int32_t g1 = g[1];
125
+ int32_t g2 = g[2];
126
+ int32_t g3 = g[3];
127
+ int32_t g4 = g[4];
128
+ int32_t g5 = g[5];
129
+ int32_t g6 = g[6];
130
+ int32_t g7 = g[7];
131
+ int32_t g8 = g[8];
132
+ int32_t g9 = g[9];
133
+ int32_t x0 = f0 ^ g0;
134
+ int32_t x1 = f1 ^ g1;
135
+ int32_t x2 = f2 ^ g2;
136
+ int32_t x3 = f3 ^ g3;
137
+ int32_t x4 = f4 ^ g4;
138
+ int32_t x5 = f5 ^ g5;
139
+ int32_t x6 = f6 ^ g6;
140
+ int32_t x7 = f7 ^ g7;
141
+ int32_t x8 = f8 ^ g8;
142
+ int32_t x9 = f9 ^ g9;
143
+ b = (unsigned int) (- (int) b);
144
+ x0 &= b;
145
+ x1 &= b;
146
+ x2 &= b;
147
+ x3 &= b;
148
+ x4 &= b;
149
+ x5 &= b;
150
+ x6 &= b;
151
+ x7 &= b;
152
+ x8 &= b;
153
+ x9 &= b;
154
+ f[0] = f0 ^ x0;
155
+ f[1] = f1 ^ x1;
156
+ f[2] = f2 ^ x2;
157
+ f[3] = f3 ^ x3;
158
+ f[4] = f4 ^ x4;
159
+ f[5] = f5 ^ x5;
160
+ f[6] = f6 ^ x6;
161
+ f[7] = f7 ^ x7;
162
+ f[8] = f8 ^ x8;
163
+ f[9] = f9 ^ x9;
162
164
  }
163
165
 
164
166
  /*
165
- h = f
166
- */
167
+ h = f
168
+ */
167
169
 
168
170
  void fe_copy(fe h,const fe f)
169
171
  {
170
- int32_t f0 = f[0];
171
- int32_t f1 = f[1];
172
- int32_t f2 = f[2];
173
- int32_t f3 = f[3];
174
- int32_t f4 = f[4];
175
- int32_t f5 = f[5];
176
- int32_t f6 = f[6];
177
- int32_t f7 = f[7];
178
- int32_t f8 = f[8];
179
- int32_t f9 = f[9];
180
- h[0] = f0;
181
- h[1] = f1;
182
- h[2] = f2;
183
- h[3] = f3;
184
- h[4] = f4;
185
- h[5] = f5;
186
- h[6] = f6;
187
- h[7] = f7;
188
- h[8] = f8;
189
- h[9] = f9;
172
+ int32_t f0 = f[0];
173
+ int32_t f1 = f[1];
174
+ int32_t f2 = f[2];
175
+ int32_t f3 = f[3];
176
+ int32_t f4 = f[4];
177
+ int32_t f5 = f[5];
178
+ int32_t f6 = f[6];
179
+ int32_t f7 = f[7];
180
+ int32_t f8 = f[8];
181
+ int32_t f9 = f[9];
182
+ h[0] = f0;
183
+ h[1] = f1;
184
+ h[2] = f2;
185
+ h[3] = f3;
186
+ h[4] = f4;
187
+ h[5] = f5;
188
+ h[6] = f6;
189
+ h[7] = f7;
190
+ h[8] = f8;
191
+ h[9] = f9;
190
192
  }
191
193
 
192
194
  /*
193
- Ignores top bit of h.
194
- */
195
+ Ignores top bit of h.
196
+ */
195
197
 
196
198
  void fe_frombytes(fe h,const unsigned char *s)
197
199
  {
198
- int64_t h0 = load_4(s);
199
- int64_t h1 = load_3(s + 4) << 6;
200
- int64_t h2 = load_3(s + 7) << 5;
201
- int64_t h3 = load_3(s + 10) << 3;
202
- int64_t h4 = load_3(s + 13) << 2;
203
- int64_t h5 = load_4(s + 16);
204
- int64_t h6 = load_3(s + 20) << 7;
205
- int64_t h7 = load_3(s + 23) << 5;
206
- int64_t h8 = load_3(s + 26) << 4;
207
- int64_t h9 = (load_3(s + 29) & 8388607) << 2;
208
- int64_t carry0;
209
- int64_t carry1;
210
- int64_t carry2;
211
- int64_t carry3;
212
- int64_t carry4;
213
- int64_t carry5;
214
- int64_t carry6;
215
- int64_t carry7;
216
- int64_t carry8;
217
- int64_t carry9;
218
-
219
- carry9 = (h9 + (int64_t) (1L << 24)) >> 25; h0 += carry9 * 19; h9 -= carry9 * ((uint64_t) 1L << 25);
220
- carry1 = (h1 + (int64_t) (1L << 24)) >> 25; h2 += carry1; h1 -= carry1 * ((uint64_t) 1L << 25);
221
- carry3 = (h3 + (int64_t) (1L << 24)) >> 25; h4 += carry3; h3 -= carry3 * ((uint64_t) 1L << 25);
222
- carry5 = (h5 + (int64_t) (1L << 24)) >> 25; h6 += carry5; h5 -= carry5 * ((uint64_t) 1L << 25);
223
- carry7 = (h7 + (int64_t) (1L << 24)) >> 25; h8 += carry7; h7 -= carry7 * ((uint64_t) 1L << 25);
224
-
225
- carry0 = (h0 + (int64_t) (1L << 25)) >> 26; h1 += carry0; h0 -= carry0 * ((uint64_t) 1L << 26);
226
- carry2 = (h2 + (int64_t) (1L << 25)) >> 26; h3 += carry2; h2 -= carry2 * ((uint64_t) 1L << 26);
227
- carry4 = (h4 + (int64_t) (1L << 25)) >> 26; h5 += carry4; h4 -= carry4 * ((uint64_t) 1L << 26);
228
- carry6 = (h6 + (int64_t) (1L << 25)) >> 26; h7 += carry6; h6 -= carry6 * ((uint64_t) 1L << 26);
229
- carry8 = (h8 + (int64_t) (1L << 25)) >> 26; h9 += carry8; h8 -= carry8 * ((uint64_t) 1L << 26);
230
-
231
- h[0] = (int32_t) h0;
232
- h[1] = (int32_t) h1;
233
- h[2] = (int32_t) h2;
234
- h[3] = (int32_t) h3;
235
- h[4] = (int32_t) h4;
236
- h[5] = (int32_t) h5;
237
- h[6] = (int32_t) h6;
238
- h[7] = (int32_t) h7;
239
- h[8] = (int32_t) h8;
240
- h[9] = (int32_t) h9;
200
+ int64_t h0 = load_4(s);
201
+ int64_t h1 = load_3(s + 4) << 6;
202
+ int64_t h2 = load_3(s + 7) << 5;
203
+ int64_t h3 = load_3(s + 10) << 3;
204
+ int64_t h4 = load_3(s + 13) << 2;
205
+ int64_t h5 = load_4(s + 16);
206
+ int64_t h6 = load_3(s + 20) << 7;
207
+ int64_t h7 = load_3(s + 23) << 5;
208
+ int64_t h8 = load_3(s + 26) << 4;
209
+ int64_t h9 = (load_3(s + 29) & 8388607) << 2;
210
+ int64_t carry0;
211
+ int64_t carry1;
212
+ int64_t carry2;
213
+ int64_t carry3;
214
+ int64_t carry4;
215
+ int64_t carry5;
216
+ int64_t carry6;
217
+ int64_t carry7;
218
+ int64_t carry8;
219
+ int64_t carry9;
220
+
221
+ carry9 = (h9 + (int64_t) (1L << 24)) >> 25; h0 += carry9 * 19; h9 -= carry9 * ((uint64_t) 1L << 25);
222
+ carry1 = (h1 + (int64_t) (1L << 24)) >> 25; h2 += carry1; h1 -= carry1 * ((uint64_t) 1L << 25);
223
+ carry3 = (h3 + (int64_t) (1L << 24)) >> 25; h4 += carry3; h3 -= carry3 * ((uint64_t) 1L << 25);
224
+ carry5 = (h5 + (int64_t) (1L << 24)) >> 25; h6 += carry5; h5 -= carry5 * ((uint64_t) 1L << 25);
225
+ carry7 = (h7 + (int64_t) (1L << 24)) >> 25; h8 += carry7; h7 -= carry7 * ((uint64_t) 1L << 25);
226
+
227
+ carry0 = (h0 + (int64_t) (1L << 25)) >> 26; h1 += carry0; h0 -= carry0 * ((uint64_t) 1L << 26);
228
+ carry2 = (h2 + (int64_t) (1L << 25)) >> 26; h3 += carry2; h2 -= carry2 * ((uint64_t) 1L << 26);
229
+ carry4 = (h4 + (int64_t) (1L << 25)) >> 26; h5 += carry4; h4 -= carry4 * ((uint64_t) 1L << 26);
230
+ carry6 = (h6 + (int64_t) (1L << 25)) >> 26; h7 += carry6; h6 -= carry6 * ((uint64_t) 1L << 26);
231
+ carry8 = (h8 + (int64_t) (1L << 25)) >> 26; h9 += carry8; h8 -= carry8 * ((uint64_t) 1L << 26);
232
+
233
+ h[0] = (int32_t) h0;
234
+ h[1] = (int32_t) h1;
235
+ h[2] = (int32_t) h2;
236
+ h[3] = (int32_t) h3;
237
+ h[4] = (int32_t) h4;
238
+ h[5] = (int32_t) h5;
239
+ h[6] = (int32_t) h6;
240
+ h[7] = (int32_t) h7;
241
+ h[8] = (int32_t) h8;
242
+ h[9] = (int32_t) h9;
241
243
  }
242
244
 
243
245
  /*
244
- Preconditions:
245
- |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
246
-
247
- Write p=2^255-19; q=floor(h/p).
248
- Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
249
-
250
- Proof:
251
- Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
252
- Also have |h-2^230 h9|<2^231 so |19 2^(-255)(h-2^230 h9)|<1/4.
253
-
254
- Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
255
- Then 0<y<1.
256
-
257
- Write r=h-pq.
258
- Have 0<=r<=p-1=2^255-20.
259
- Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
260
-
261
- Write x=r+19(2^-255)r+y.
262
- Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
263
-
264
- Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
265
- so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
266
- */
246
+ Preconditions:
247
+ |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
248
+ *
249
+ Write p=2^255-19; q=floor(h/p).
250
+ Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
251
+ *
252
+ Proof:
253
+ Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
254
+ Also have |h-2^230 h9|<2^231 so |19 2^(-255)(h-2^230 h9)|<1/4.
255
+ *
256
+ Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
257
+ Then 0<y<1.
258
+ *
259
+ Write r=h-pq.
260
+ Have 0<=r<=p-1=2^255-20.
261
+ Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
262
+ *
263
+ Write x=r+19(2^-255)r+y.
264
+ Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
265
+ *
266
+ Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
267
+ so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
268
+ */
267
269
 
268
270
  void fe_tobytes(unsigned char *s,const fe h)
269
271
  {
270
- int32_t h0 = h[0];
271
- int32_t h1 = h[1];
272
- int32_t h2 = h[2];
273
- int32_t h3 = h[3];
274
- int32_t h4 = h[4];
275
- int32_t h5 = h[5];
276
- int32_t h6 = h[6];
277
- int32_t h7 = h[7];
278
- int32_t h8 = h[8];
279
- int32_t h9 = h[9];
280
- int32_t q;
281
- int32_t carry0;
282
- int32_t carry1;
283
- int32_t carry2;
284
- int32_t carry3;
285
- int32_t carry4;
286
- int32_t carry5;
287
- int32_t carry6;
288
- int32_t carry7;
289
- int32_t carry8;
290
- int32_t carry9;
291
-
292
- q = (19 * h9 + ((uint32_t) 1L << 24)) >> 25;
293
- q = (h0 + q) >> 26;
294
- q = (h1 + q) >> 25;
295
- q = (h2 + q) >> 26;
296
- q = (h3 + q) >> 25;
297
- q = (h4 + q) >> 26;
298
- q = (h5 + q) >> 25;
299
- q = (h6 + q) >> 26;
300
- q = (h7 + q) >> 25;
301
- q = (h8 + q) >> 26;
302
- q = (h9 + q) >> 25;
303
-
304
- /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */
305
- h0 += 19 * q;
306
- /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */
307
-
308
- carry0 = h0 >> 26; h1 += carry0; h0 -= carry0 * ((uint32_t) 1L << 26);
309
- carry1 = h1 >> 25; h2 += carry1; h1 -= carry1 * ((uint32_t) 1L << 25);
310
- carry2 = h2 >> 26; h3 += carry2; h2 -= carry2 * ((uint32_t) 1L << 26);
311
- carry3 = h3 >> 25; h4 += carry3; h3 -= carry3 * ((uint32_t) 1L << 25);
312
- carry4 = h4 >> 26; h5 += carry4; h4 -= carry4 * ((uint32_t) 1L << 26);
313
- carry5 = h5 >> 25; h6 += carry5; h5 -= carry5 * ((uint32_t) 1L << 25);
314
- carry6 = h6 >> 26; h7 += carry6; h6 -= carry6 * ((uint32_t) 1L << 26);
315
- carry7 = h7 >> 25; h8 += carry7; h7 -= carry7 * ((uint32_t) 1L << 25);
316
- carry8 = h8 >> 26; h9 += carry8; h8 -= carry8 * ((uint32_t) 1L << 26);
317
- carry9 = h9 >> 25; h9 -= carry9 * ((uint32_t) 1L << 25);
318
- /* h10 = carry9 */
319
-
320
- /*
321
- Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
322
- Have h0+...+2^230 h9 between 0 and 2^255-1;
323
- evidently 2^255 h10-2^255 q = 0.
324
- Goal: Output h0+...+2^230 h9.
325
- */
326
-
327
- s[0] = h0 >> 0;
328
- s[1] = h0 >> 8;
329
- s[2] = h0 >> 16;
330
- s[3] = (h0 >> 24) | (h1 * ((uint32_t) 1 << 2));
331
- s[4] = h1 >> 6;
332
- s[5] = h1 >> 14;
333
- s[6] = (h1 >> 22) | (h2 * ((uint32_t) 1 << 3));
334
- s[7] = h2 >> 5;
335
- s[8] = h2 >> 13;
336
- s[9] = (h2 >> 21) | (h3 * ((uint32_t) 1 << 5));
337
- s[10] = h3 >> 3;
338
- s[11] = h3 >> 11;
339
- s[12] = (h3 >> 19) | (h4 * ((uint32_t) 1 << 6));
340
- s[13] = h4 >> 2;
341
- s[14] = h4 >> 10;
342
- s[15] = h4 >> 18;
343
- s[16] = h5 >> 0;
344
- s[17] = h5 >> 8;
345
- s[18] = h5 >> 16;
346
- s[19] = (h5 >> 24) | (h6 * ((uint32_t) 1 << 1));
347
- s[20] = h6 >> 7;
348
- s[21] = h6 >> 15;
349
- s[22] = (h6 >> 23) | (h7 * ((uint32_t) 1 << 3));
350
- s[23] = h7 >> 5;
351
- s[24] = h7 >> 13;
352
- s[25] = (h7 >> 21) | (h8 * ((uint32_t) 1 << 4));
353
- s[26] = h8 >> 4;
354
- s[27] = h8 >> 12;
355
- s[28] = (h8 >> 20) | (h9 * ((uint32_t) 1 << 6));
356
- s[29] = h9 >> 2;
357
- s[30] = h9 >> 10;
358
- s[31] = h9 >> 18;
272
+ int32_t h0 = h[0];
273
+ int32_t h1 = h[1];
274
+ int32_t h2 = h[2];
275
+ int32_t h3 = h[3];
276
+ int32_t h4 = h[4];
277
+ int32_t h5 = h[5];
278
+ int32_t h6 = h[6];
279
+ int32_t h7 = h[7];
280
+ int32_t h8 = h[8];
281
+ int32_t h9 = h[9];
282
+ int32_t q;
283
+ int32_t carry0;
284
+ int32_t carry1;
285
+ int32_t carry2;
286
+ int32_t carry3;
287
+ int32_t carry4;
288
+ int32_t carry5;
289
+ int32_t carry6;
290
+ int32_t carry7;
291
+ int32_t carry8;
292
+ int32_t carry9;
293
+
294
+ q = (19 * h9 + ((uint32_t) 1L << 24)) >> 25;
295
+ q = (h0 + q) >> 26;
296
+ q = (h1 + q) >> 25;
297
+ q = (h2 + q) >> 26;
298
+ q = (h3 + q) >> 25;
299
+ q = (h4 + q) >> 26;
300
+ q = (h5 + q) >> 25;
301
+ q = (h6 + q) >> 26;
302
+ q = (h7 + q) >> 25;
303
+ q = (h8 + q) >> 26;
304
+ q = (h9 + q) >> 25;
305
+
306
+ /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */
307
+ h0 += 19 * q;
308
+ /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */
309
+
310
+ carry0 = h0 >> 26; h1 += carry0; h0 -= carry0 * ((uint32_t) 1L << 26);
311
+ carry1 = h1 >> 25; h2 += carry1; h1 -= carry1 * ((uint32_t) 1L << 25);
312
+ carry2 = h2 >> 26; h3 += carry2; h2 -= carry2 * ((uint32_t) 1L << 26);
313
+ carry3 = h3 >> 25; h4 += carry3; h3 -= carry3 * ((uint32_t) 1L << 25);
314
+ carry4 = h4 >> 26; h5 += carry4; h4 -= carry4 * ((uint32_t) 1L << 26);
315
+ carry5 = h5 >> 25; h6 += carry5; h5 -= carry5 * ((uint32_t) 1L << 25);
316
+ carry6 = h6 >> 26; h7 += carry6; h6 -= carry6 * ((uint32_t) 1L << 26);
317
+ carry7 = h7 >> 25; h8 += carry7; h7 -= carry7 * ((uint32_t) 1L << 25);
318
+ carry8 = h8 >> 26; h9 += carry8; h8 -= carry8 * ((uint32_t) 1L << 26);
319
+ carry9 = h9 >> 25; h9 -= carry9 * ((uint32_t) 1L << 25);
320
+ /* h10 = carry9 */
321
+
322
+ /*
323
+ Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
324
+ Have h0+...+2^230 h9 between 0 and 2^255-1;
325
+ evidently 2^255 h10-2^255 q = 0.
326
+ Goal: Output h0+...+2^230 h9.
327
+ */
328
+
329
+ s[0] = h0 >> 0;
330
+ s[1] = h0 >> 8;
331
+ s[2] = h0 >> 16;
332
+ s[3] = (h0 >> 24) | (h1 * ((uint32_t) 1 << 2));
333
+ s[4] = h1 >> 6;
334
+ s[5] = h1 >> 14;
335
+ s[6] = (h1 >> 22) | (h2 * ((uint32_t) 1 << 3));
336
+ s[7] = h2 >> 5;
337
+ s[8] = h2 >> 13;
338
+ s[9] = (h2 >> 21) | (h3 * ((uint32_t) 1 << 5));
339
+ s[10] = h3 >> 3;
340
+ s[11] = h3 >> 11;
341
+ s[12] = (h3 >> 19) | (h4 * ((uint32_t) 1 << 6));
342
+ s[13] = h4 >> 2;
343
+ s[14] = h4 >> 10;
344
+ s[15] = h4 >> 18;
345
+ s[16] = h5 >> 0;
346
+ s[17] = h5 >> 8;
347
+ s[18] = h5 >> 16;
348
+ s[19] = (h5 >> 24) | (h6 * ((uint32_t) 1 << 1));
349
+ s[20] = h6 >> 7;
350
+ s[21] = h6 >> 15;
351
+ s[22] = (h6 >> 23) | (h7 * ((uint32_t) 1 << 3));
352
+ s[23] = h7 >> 5;
353
+ s[24] = h7 >> 13;
354
+ s[25] = (h7 >> 21) | (h8 * ((uint32_t) 1 << 4));
355
+ s[26] = h8 >> 4;
356
+ s[27] = h8 >> 12;
357
+ s[28] = (h8 >> 20) | (h9 * ((uint32_t) 1 << 6));
358
+ s[29] = h9 >> 2;
359
+ s[30] = h9 >> 10;
360
+ s[31] = h9 >> 18;
359
361
  }
360
362
 
361
363
  /*
362
- return 1 if f is in {1,3,5,...,q-2}
363
- return 0 if f is in {0,2,4,...,q-1}
364
-
365
- Preconditions:
366
- |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
367
- */
364
+ return 1 if f is in {1,3,5,...,q-2}
365
+ return 0 if f is in {0,2,4,...,q-1}
366
+ *
367
+ Preconditions:
368
+ |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
369
+ */
368
370
 
369
371
  int fe_isnegative(const fe f)
370
372
  {
371
- unsigned char s[32];
372
- fe_tobytes(s,f);
373
- return s[0] & 1;
373
+ unsigned char s[32];
374
+ fe_tobytes(s,f);
375
+
376
+ return s[0] & 1;
374
377
  }
375
378
 
376
379
  /*
377
- return 1 if f == 0
378
- return 0 if f != 0
379
-
380
- Preconditions:
381
- |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
382
- */
380
+ return 1 if f == 0
381
+ return 0 if f != 0
382
+ *
383
+ Preconditions:
384
+ |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
385
+ */
383
386
 
384
387
  static unsigned char zero[32];
385
388
 
386
389
  int fe_isnonzero(const fe f)
387
390
  {
388
- unsigned char s[32];
389
- fe_tobytes(s,f);
390
- return crypto_verify_32(s,zero);
391
+ unsigned char s[32];
392
+ fe_tobytes(s,f);
393
+
394
+ return crypto_verify_32(s,zero);
391
395
  }
392
396
 
393
397
  /*
394
- h = f * g
395
- Can overlap h with f or g.
396
-
397
- Preconditions:
398
- |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
399
- |g| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
400
-
401
- Postconditions:
402
- |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
403
- */
398
+ h = f * g
399
+ Can overlap h with f or g.
400
+ *
401
+ Preconditions:
402
+ |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
403
+ |g| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
404
+ *
405
+ Postconditions:
406
+ |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
407
+ */
404
408
 
405
409
  /*
406
- Notes on implementation strategy:
407
-
408
- Using schoolbook multiplication.
409
- Karatsuba would save a little in some cost models.
410
-
411
- Most multiplications by 2 and 19 are 32-bit precomputations;
412
- cheaper than 64-bit postcomputations.
413
-
414
- There is one remaining multiplication by 19 in the carry chain;
415
- one *19 precomputation can be merged into this,
416
- but the resulting data flow is considerably less clean.
417
-
418
- There are 12 carries below.
419
- 10 of them are 2-way parallelizable and vectorizable.
420
- Can get away with 11 carries, but then data flow is much deeper.
421
-
422
- With tighter constraints on inputs can squeeze carries into int32.
423
- */
410
+ Notes on implementation strategy:
411
+ *
412
+ Using schoolbook multiplication.
413
+ Karatsuba would save a little in some cost models.
414
+ *
415
+ Most multiplications by 2 and 19 are 32-bit precomputations;
416
+ cheaper than 64-bit postcomputations.
417
+ *
418
+ There is one remaining multiplication by 19 in the carry chain;
419
+ one *19 precomputation can be merged into this,
420
+ but the resulting data flow is considerably less clean.
421
+ *
422
+ There are 12 carries below.
423
+ 10 of them are 2-way parallelizable and vectorizable.
424
+ Can get away with 11 carries, but then data flow is much deeper.
425
+ *
426
+ With tighter constraints on inputs can squeeze carries into int32.
427
+ */
424
428
 
425
429
  void fe_mul(fe h,const fe f,const fe g)
426
430
  {
427
- int32_t f0 = f[0];
428
- int32_t f1 = f[1];
429
- int32_t f2 = f[2];
430
- int32_t f3 = f[3];
431
- int32_t f4 = f[4];
432
- int32_t f5 = f[5];
433
- int32_t f6 = f[6];
434
- int32_t f7 = f[7];
435
- int32_t f8 = f[8];
436
- int32_t f9 = f[9];
437
- int32_t g0 = g[0];
438
- int32_t g1 = g[1];
439
- int32_t g2 = g[2];
440
- int32_t g3 = g[3];
441
- int32_t g4 = g[4];
442
- int32_t g5 = g[5];
443
- int32_t g6 = g[6];
444
- int32_t g7 = g[7];
445
- int32_t g8 = g[8];
446
- int32_t g9 = g[9];
447
- int32_t g1_19 = 19 * g1; /* 1.959375*2^29 */
448
- int32_t g2_19 = 19 * g2; /* 1.959375*2^30; still ok */
449
- int32_t g3_19 = 19 * g3;
450
- int32_t g4_19 = 19 * g4;
451
- int32_t g5_19 = 19 * g5;
452
- int32_t g6_19 = 19 * g6;
453
- int32_t g7_19 = 19 * g7;
454
- int32_t g8_19 = 19 * g8;
455
- int32_t g9_19 = 19 * g9;
456
- int32_t f1_2 = 2 * f1;
457
- int32_t f3_2 = 2 * f3;
458
- int32_t f5_2 = 2 * f5;
459
- int32_t f7_2 = 2 * f7;
460
- int32_t f9_2 = 2 * f9;
461
- int64_t f0g0 = f0 * (int64_t) g0;
462
- int64_t f0g1 = f0 * (int64_t) g1;
463
- int64_t f0g2 = f0 * (int64_t) g2;
464
- int64_t f0g3 = f0 * (int64_t) g3;
465
- int64_t f0g4 = f0 * (int64_t) g4;
466
- int64_t f0g5 = f0 * (int64_t) g5;
467
- int64_t f0g6 = f0 * (int64_t) g6;
468
- int64_t f0g7 = f0 * (int64_t) g7;
469
- int64_t f0g8 = f0 * (int64_t) g8;
470
- int64_t f0g9 = f0 * (int64_t) g9;
471
- int64_t f1g0 = f1 * (int64_t) g0;
472
- int64_t f1g1_2 = f1_2 * (int64_t) g1;
473
- int64_t f1g2 = f1 * (int64_t) g2;
474
- int64_t f1g3_2 = f1_2 * (int64_t) g3;
475
- int64_t f1g4 = f1 * (int64_t) g4;
476
- int64_t f1g5_2 = f1_2 * (int64_t) g5;
477
- int64_t f1g6 = f1 * (int64_t) g6;
478
- int64_t f1g7_2 = f1_2 * (int64_t) g7;
479
- int64_t f1g8 = f1 * (int64_t) g8;
480
- int64_t f1g9_38 = f1_2 * (int64_t) g9_19;
481
- int64_t f2g0 = f2 * (int64_t) g0;
482
- int64_t f2g1 = f2 * (int64_t) g1;
483
- int64_t f2g2 = f2 * (int64_t) g2;
484
- int64_t f2g3 = f2 * (int64_t) g3;
485
- int64_t f2g4 = f2 * (int64_t) g4;
486
- int64_t f2g5 = f2 * (int64_t) g5;
487
- int64_t f2g6 = f2 * (int64_t) g6;
488
- int64_t f2g7 = f2 * (int64_t) g7;
489
- int64_t f2g8_19 = f2 * (int64_t) g8_19;
490
- int64_t f2g9_19 = f2 * (int64_t) g9_19;
491
- int64_t f3g0 = f3 * (int64_t) g0;
492
- int64_t f3g1_2 = f3_2 * (int64_t) g1;
493
- int64_t f3g2 = f3 * (int64_t) g2;
494
- int64_t f3g3_2 = f3_2 * (int64_t) g3;
495
- int64_t f3g4 = f3 * (int64_t) g4;
496
- int64_t f3g5_2 = f3_2 * (int64_t) g5;
497
- int64_t f3g6 = f3 * (int64_t) g6;
498
- int64_t f3g7_38 = f3_2 * (int64_t) g7_19;
499
- int64_t f3g8_19 = f3 * (int64_t) g8_19;
500
- int64_t f3g9_38 = f3_2 * (int64_t) g9_19;
501
- int64_t f4g0 = f4 * (int64_t) g0;
502
- int64_t f4g1 = f4 * (int64_t) g1;
503
- int64_t f4g2 = f4 * (int64_t) g2;
504
- int64_t f4g3 = f4 * (int64_t) g3;
505
- int64_t f4g4 = f4 * (int64_t) g4;
506
- int64_t f4g5 = f4 * (int64_t) g5;
507
- int64_t f4g6_19 = f4 * (int64_t) g6_19;
508
- int64_t f4g7_19 = f4 * (int64_t) g7_19;
509
- int64_t f4g8_19 = f4 * (int64_t) g8_19;
510
- int64_t f4g9_19 = f4 * (int64_t) g9_19;
511
- int64_t f5g0 = f5 * (int64_t) g0;
512
- int64_t f5g1_2 = f5_2 * (int64_t) g1;
513
- int64_t f5g2 = f5 * (int64_t) g2;
514
- int64_t f5g3_2 = f5_2 * (int64_t) g3;
515
- int64_t f5g4 = f5 * (int64_t) g4;
516
- int64_t f5g5_38 = f5_2 * (int64_t) g5_19;
517
- int64_t f5g6_19 = f5 * (int64_t) g6_19;
518
- int64_t f5g7_38 = f5_2 * (int64_t) g7_19;
519
- int64_t f5g8_19 = f5 * (int64_t) g8_19;
520
- int64_t f5g9_38 = f5_2 * (int64_t) g9_19;
521
- int64_t f6g0 = f6 * (int64_t) g0;
522
- int64_t f6g1 = f6 * (int64_t) g1;
523
- int64_t f6g2 = f6 * (int64_t) g2;
524
- int64_t f6g3 = f6 * (int64_t) g3;
525
- int64_t f6g4_19 = f6 * (int64_t) g4_19;
526
- int64_t f6g5_19 = f6 * (int64_t) g5_19;
527
- int64_t f6g6_19 = f6 * (int64_t) g6_19;
528
- int64_t f6g7_19 = f6 * (int64_t) g7_19;
529
- int64_t f6g8_19 = f6 * (int64_t) g8_19;
530
- int64_t f6g9_19 = f6 * (int64_t) g9_19;
531
- int64_t f7g0 = f7 * (int64_t) g0;
532
- int64_t f7g1_2 = f7_2 * (int64_t) g1;
533
- int64_t f7g2 = f7 * (int64_t) g2;
534
- int64_t f7g3_38 = f7_2 * (int64_t) g3_19;
535
- int64_t f7g4_19 = f7 * (int64_t) g4_19;
536
- int64_t f7g5_38 = f7_2 * (int64_t) g5_19;
537
- int64_t f7g6_19 = f7 * (int64_t) g6_19;
538
- int64_t f7g7_38 = f7_2 * (int64_t) g7_19;
539
- int64_t f7g8_19 = f7 * (int64_t) g8_19;
540
- int64_t f7g9_38 = f7_2 * (int64_t) g9_19;
541
- int64_t f8g0 = f8 * (int64_t) g0;
542
- int64_t f8g1 = f8 * (int64_t) g1;
543
- int64_t f8g2_19 = f8 * (int64_t) g2_19;
544
- int64_t f8g3_19 = f8 * (int64_t) g3_19;
545
- int64_t f8g4_19 = f8 * (int64_t) g4_19;
546
- int64_t f8g5_19 = f8 * (int64_t) g5_19;
547
- int64_t f8g6_19 = f8 * (int64_t) g6_19;
548
- int64_t f8g7_19 = f8 * (int64_t) g7_19;
549
- int64_t f8g8_19 = f8 * (int64_t) g8_19;
550
- int64_t f8g9_19 = f8 * (int64_t) g9_19;
551
- int64_t f9g0 = f9 * (int64_t) g0;
552
- int64_t f9g1_38 = f9_2 * (int64_t) g1_19;
553
- int64_t f9g2_19 = f9 * (int64_t) g2_19;
554
- int64_t f9g3_38 = f9_2 * (int64_t) g3_19;
555
- int64_t f9g4_19 = f9 * (int64_t) g4_19;
556
- int64_t f9g5_38 = f9_2 * (int64_t) g5_19;
557
- int64_t f9g6_19 = f9 * (int64_t) g6_19;
558
- int64_t f9g7_38 = f9_2 * (int64_t) g7_19;
559
- int64_t f9g8_19 = f9 * (int64_t) g8_19;
560
- int64_t f9g9_38 = f9_2 * (int64_t) g9_19;
561
- int64_t h0 = f0g0+f1g9_38+f2g8_19+f3g7_38+f4g6_19+f5g5_38+f6g4_19+f7g3_38+f8g2_19+f9g1_38;
562
- int64_t h1 = f0g1+f1g0 +f2g9_19+f3g8_19+f4g7_19+f5g6_19+f6g5_19+f7g4_19+f8g3_19+f9g2_19;
563
- int64_t h2 = f0g2+f1g1_2 +f2g0 +f3g9_38+f4g8_19+f5g7_38+f6g6_19+f7g5_38+f8g4_19+f9g3_38;
564
- int64_t h3 = f0g3+f1g2 +f2g1 +f3g0 +f4g9_19+f5g8_19+f6g7_19+f7g6_19+f8g5_19+f9g4_19;
565
- int64_t h4 = f0g4+f1g3_2 +f2g2 +f3g1_2 +f4g0 +f5g9_38+f6g8_19+f7g7_38+f8g6_19+f9g5_38;
566
- int64_t h5 = f0g5+f1g4 +f2g3 +f3g2 +f4g1 +f5g0 +f6g9_19+f7g8_19+f8g7_19+f9g6_19;
567
- int64_t h6 = f0g6+f1g5_2 +f2g4 +f3g3_2 +f4g2 +f5g1_2 +f6g0 +f7g9_38+f8g8_19+f9g7_38;
568
- int64_t h7 = f0g7+f1g6 +f2g5 +f3g4 +f4g3 +f5g2 +f6g1 +f7g0 +f8g9_19+f9g8_19;
569
- int64_t h8 = f0g8+f1g7_2 +f2g6 +f3g5_2 +f4g4 +f5g3_2 +f6g2 +f7g1_2 +f8g0 +f9g9_38;
570
- int64_t h9 = f0g9+f1g8 +f2g7 +f3g6 +f4g5 +f5g4 +f6g3 +f7g2 +f8g1 +f9g0 ;
571
- int64_t carry0;
572
- int64_t carry1;
573
- int64_t carry2;
574
- int64_t carry3;
575
- int64_t carry4;
576
- int64_t carry5;
577
- int64_t carry6;
578
- int64_t carry7;
579
- int64_t carry8;
580
- int64_t carry9;
581
-
582
- /*
583
- |h0| <= (1.65*1.65*2^52*(1+19+19+19+19)+1.65*1.65*2^50*(38+38+38+38+38))
584
- i.e. |h0| <= 1.4*2^60; narrower ranges for h2, h4, h6, h8
585
- |h1| <= (1.65*1.65*2^51*(1+1+19+19+19+19+19+19+19+19))
586
- i.e. |h1| <= 1.7*2^59; narrower ranges for h3, h5, h7, h9
587
- */
588
-
589
- carry0 = (h0 + (int64_t) (1L << 25)) >> 26; h1 += carry0; h0 -= carry0 * ((uint64_t) 1L << 26);
590
- carry4 = (h4 + (int64_t) (1L << 25)) >> 26; h5 += carry4; h4 -= carry4 * ((uint64_t) 1L << 26);
591
- /* |h0| <= 2^25 */
592
- /* |h4| <= 2^25 */
593
- /* |h1| <= 1.71*2^59 */
594
- /* |h5| <= 1.71*2^59 */
595
-
596
- carry1 = (h1 + (int64_t) (1L << 24)) >> 25; h2 += carry1; h1 -= carry1 * ((uint64_t) 1L << 25);
597
- carry5 = (h5 + (int64_t) (1L << 24)) >> 25; h6 += carry5; h5 -= carry5 * ((uint64_t) 1L << 25);
598
- /* |h1| <= 2^24; from now on fits into int32 */
599
- /* |h5| <= 2^24; from now on fits into int32 */
600
- /* |h2| <= 1.41*2^60 */
601
- /* |h6| <= 1.41*2^60 */
602
-
603
- carry2 = (h2 + (int64_t) (1L << 25)) >> 26; h3 += carry2; h2 -= carry2 * ((uint64_t) 1L << 26);
604
- carry6 = (h6 + (int64_t) (1L << 25)) >> 26; h7 += carry6; h6 -= carry6 * ((uint64_t) 1L << 26);
605
- /* |h2| <= 2^25; from now on fits into int32 unchanged */
606
- /* |h6| <= 2^25; from now on fits into int32 unchanged */
607
- /* |h3| <= 1.71*2^59 */
608
- /* |h7| <= 1.71*2^59 */
609
-
610
- carry3 = (h3 + (int64_t) (1L << 24)) >> 25; h4 += carry3; h3 -= carry3 * ((uint64_t) 1L << 25);
611
- carry7 = (h7 + (int64_t) (1L << 24)) >> 25; h8 += carry7; h7 -= carry7 * ((uint64_t) 1L << 25);
612
- /* |h3| <= 2^24; from now on fits into int32 unchanged */
613
- /* |h7| <= 2^24; from now on fits into int32 unchanged */
614
- /* |h4| <= 1.72*2^34 */
615
- /* |h8| <= 1.41*2^60 */
616
-
617
- carry4 = (h4 + (int64_t) (1L << 25)) >> 26; h5 += carry4; h4 -= carry4 * ((uint64_t) 1L << 26);
618
- carry8 = (h8 + (int64_t) (1L << 25)) >> 26; h9 += carry8; h8 -= carry8 * ((uint64_t) 1L << 26);
619
- /* |h4| <= 2^25; from now on fits into int32 unchanged */
620
- /* |h8| <= 2^25; from now on fits into int32 unchanged */
621
- /* |h5| <= 1.01*2^24 */
622
- /* |h9| <= 1.71*2^59 */
623
-
624
- carry9 = (h9 + (int64_t) (1L << 24)) >> 25; h0 += carry9 * 19; h9 -= carry9 * ((uint64_t) 1L << 25);
625
- /* |h9| <= 2^24; from now on fits into int32 unchanged */
626
- /* |h0| <= 1.1*2^39 */
627
-
628
- carry0 = (h0 + (int64_t) (1L << 25)) >> 26; h1 += carry0; h0 -= carry0 * ((uint64_t) 1L << 26);
629
- /* |h0| <= 2^25; from now on fits into int32 unchanged */
630
- /* |h1| <= 1.01*2^24 */
631
-
632
- h[0] = (int32_t) h0;
633
- h[1] = (int32_t) h1;
634
- h[2] = (int32_t) h2;
635
- h[3] = (int32_t) h3;
636
- h[4] = (int32_t) h4;
637
- h[5] = (int32_t) h5;
638
- h[6] = (int32_t) h6;
639
- h[7] = (int32_t) h7;
640
- h[8] = (int32_t) h8;
641
- h[9] = (int32_t) h9;
431
+ int32_t f0 = f[0];
432
+ int32_t f1 = f[1];
433
+ int32_t f2 = f[2];
434
+ int32_t f3 = f[3];
435
+ int32_t f4 = f[4];
436
+ int32_t f5 = f[5];
437
+ int32_t f6 = f[6];
438
+ int32_t f7 = f[7];
439
+ int32_t f8 = f[8];
440
+ int32_t f9 = f[9];
441
+ int32_t g0 = g[0];
442
+ int32_t g1 = g[1];
443
+ int32_t g2 = g[2];
444
+ int32_t g3 = g[3];
445
+ int32_t g4 = g[4];
446
+ int32_t g5 = g[5];
447
+ int32_t g6 = g[6];
448
+ int32_t g7 = g[7];
449
+ int32_t g8 = g[8];
450
+ int32_t g9 = g[9];
451
+ int32_t g1_19 = 19 * g1; /* 1.959375*2^29 */
452
+ int32_t g2_19 = 19 * g2; /* 1.959375*2^30; still ok */
453
+ int32_t g3_19 = 19 * g3;
454
+ int32_t g4_19 = 19 * g4;
455
+ int32_t g5_19 = 19 * g5;
456
+ int32_t g6_19 = 19 * g6;
457
+ int32_t g7_19 = 19 * g7;
458
+ int32_t g8_19 = 19 * g8;
459
+ int32_t g9_19 = 19 * g9;
460
+ int32_t f1_2 = 2 * f1;
461
+ int32_t f3_2 = 2 * f3;
462
+ int32_t f5_2 = 2 * f5;
463
+ int32_t f7_2 = 2 * f7;
464
+ int32_t f9_2 = 2 * f9;
465
+ int64_t f0g0 = f0 * (int64_t) g0;
466
+ int64_t f0g1 = f0 * (int64_t) g1;
467
+ int64_t f0g2 = f0 * (int64_t) g2;
468
+ int64_t f0g3 = f0 * (int64_t) g3;
469
+ int64_t f0g4 = f0 * (int64_t) g4;
470
+ int64_t f0g5 = f0 * (int64_t) g5;
471
+ int64_t f0g6 = f0 * (int64_t) g6;
472
+ int64_t f0g7 = f0 * (int64_t) g7;
473
+ int64_t f0g8 = f0 * (int64_t) g8;
474
+ int64_t f0g9 = f0 * (int64_t) g9;
475
+ int64_t f1g0 = f1 * (int64_t) g0;
476
+ int64_t f1g1_2 = f1_2 * (int64_t) g1;
477
+ int64_t f1g2 = f1 * (int64_t) g2;
478
+ int64_t f1g3_2 = f1_2 * (int64_t) g3;
479
+ int64_t f1g4 = f1 * (int64_t) g4;
480
+ int64_t f1g5_2 = f1_2 * (int64_t) g5;
481
+ int64_t f1g6 = f1 * (int64_t) g6;
482
+ int64_t f1g7_2 = f1_2 * (int64_t) g7;
483
+ int64_t f1g8 = f1 * (int64_t) g8;
484
+ int64_t f1g9_38 = f1_2 * (int64_t) g9_19;
485
+ int64_t f2g0 = f2 * (int64_t) g0;
486
+ int64_t f2g1 = f2 * (int64_t) g1;
487
+ int64_t f2g2 = f2 * (int64_t) g2;
488
+ int64_t f2g3 = f2 * (int64_t) g3;
489
+ int64_t f2g4 = f2 * (int64_t) g4;
490
+ int64_t f2g5 = f2 * (int64_t) g5;
491
+ int64_t f2g6 = f2 * (int64_t) g6;
492
+ int64_t f2g7 = f2 * (int64_t) g7;
493
+ int64_t f2g8_19 = f2 * (int64_t) g8_19;
494
+ int64_t f2g9_19 = f2 * (int64_t) g9_19;
495
+ int64_t f3g0 = f3 * (int64_t) g0;
496
+ int64_t f3g1_2 = f3_2 * (int64_t) g1;
497
+ int64_t f3g2 = f3 * (int64_t) g2;
498
+ int64_t f3g3_2 = f3_2 * (int64_t) g3;
499
+ int64_t f3g4 = f3 * (int64_t) g4;
500
+ int64_t f3g5_2 = f3_2 * (int64_t) g5;
501
+ int64_t f3g6 = f3 * (int64_t) g6;
502
+ int64_t f3g7_38 = f3_2 * (int64_t) g7_19;
503
+ int64_t f3g8_19 = f3 * (int64_t) g8_19;
504
+ int64_t f3g9_38 = f3_2 * (int64_t) g9_19;
505
+ int64_t f4g0 = f4 * (int64_t) g0;
506
+ int64_t f4g1 = f4 * (int64_t) g1;
507
+ int64_t f4g2 = f4 * (int64_t) g2;
508
+ int64_t f4g3 = f4 * (int64_t) g3;
509
+ int64_t f4g4 = f4 * (int64_t) g4;
510
+ int64_t f4g5 = f4 * (int64_t) g5;
511
+ int64_t f4g6_19 = f4 * (int64_t) g6_19;
512
+ int64_t f4g7_19 = f4 * (int64_t) g7_19;
513
+ int64_t f4g8_19 = f4 * (int64_t) g8_19;
514
+ int64_t f4g9_19 = f4 * (int64_t) g9_19;
515
+ int64_t f5g0 = f5 * (int64_t) g0;
516
+ int64_t f5g1_2 = f5_2 * (int64_t) g1;
517
+ int64_t f5g2 = f5 * (int64_t) g2;
518
+ int64_t f5g3_2 = f5_2 * (int64_t) g3;
519
+ int64_t f5g4 = f5 * (int64_t) g4;
520
+ int64_t f5g5_38 = f5_2 * (int64_t) g5_19;
521
+ int64_t f5g6_19 = f5 * (int64_t) g6_19;
522
+ int64_t f5g7_38 = f5_2 * (int64_t) g7_19;
523
+ int64_t f5g8_19 = f5 * (int64_t) g8_19;
524
+ int64_t f5g9_38 = f5_2 * (int64_t) g9_19;
525
+ int64_t f6g0 = f6 * (int64_t) g0;
526
+ int64_t f6g1 = f6 * (int64_t) g1;
527
+ int64_t f6g2 = f6 * (int64_t) g2;
528
+ int64_t f6g3 = f6 * (int64_t) g3;
529
+ int64_t f6g4_19 = f6 * (int64_t) g4_19;
530
+ int64_t f6g5_19 = f6 * (int64_t) g5_19;
531
+ int64_t f6g6_19 = f6 * (int64_t) g6_19;
532
+ int64_t f6g7_19 = f6 * (int64_t) g7_19;
533
+ int64_t f6g8_19 = f6 * (int64_t) g8_19;
534
+ int64_t f6g9_19 = f6 * (int64_t) g9_19;
535
+ int64_t f7g0 = f7 * (int64_t) g0;
536
+ int64_t f7g1_2 = f7_2 * (int64_t) g1;
537
+ int64_t f7g2 = f7 * (int64_t) g2;
538
+ int64_t f7g3_38 = f7_2 * (int64_t) g3_19;
539
+ int64_t f7g4_19 = f7 * (int64_t) g4_19;
540
+ int64_t f7g5_38 = f7_2 * (int64_t) g5_19;
541
+ int64_t f7g6_19 = f7 * (int64_t) g6_19;
542
+ int64_t f7g7_38 = f7_2 * (int64_t) g7_19;
543
+ int64_t f7g8_19 = f7 * (int64_t) g8_19;
544
+ int64_t f7g9_38 = f7_2 * (int64_t) g9_19;
545
+ int64_t f8g0 = f8 * (int64_t) g0;
546
+ int64_t f8g1 = f8 * (int64_t) g1;
547
+ int64_t f8g2_19 = f8 * (int64_t) g2_19;
548
+ int64_t f8g3_19 = f8 * (int64_t) g3_19;
549
+ int64_t f8g4_19 = f8 * (int64_t) g4_19;
550
+ int64_t f8g5_19 = f8 * (int64_t) g5_19;
551
+ int64_t f8g6_19 = f8 * (int64_t) g6_19;
552
+ int64_t f8g7_19 = f8 * (int64_t) g7_19;
553
+ int64_t f8g8_19 = f8 * (int64_t) g8_19;
554
+ int64_t f8g9_19 = f8 * (int64_t) g9_19;
555
+ int64_t f9g0 = f9 * (int64_t) g0;
556
+ int64_t f9g1_38 = f9_2 * (int64_t) g1_19;
557
+ int64_t f9g2_19 = f9 * (int64_t) g2_19;
558
+ int64_t f9g3_38 = f9_2 * (int64_t) g3_19;
559
+ int64_t f9g4_19 = f9 * (int64_t) g4_19;
560
+ int64_t f9g5_38 = f9_2 * (int64_t) g5_19;
561
+ int64_t f9g6_19 = f9 * (int64_t) g6_19;
562
+ int64_t f9g7_38 = f9_2 * (int64_t) g7_19;
563
+ int64_t f9g8_19 = f9 * (int64_t) g8_19;
564
+ int64_t f9g9_38 = f9_2 * (int64_t) g9_19;
565
+ int64_t h0 = f0g0+f1g9_38+f2g8_19+f3g7_38+f4g6_19+f5g5_38+f6g4_19+f7g3_38+f8g2_19+f9g1_38;
566
+ int64_t h1 = f0g1+f1g0 +f2g9_19+f3g8_19+f4g7_19+f5g6_19+f6g5_19+f7g4_19+f8g3_19+f9g2_19;
567
+ int64_t h2 = f0g2+f1g1_2 +f2g0 +f3g9_38+f4g8_19+f5g7_38+f6g6_19+f7g5_38+f8g4_19+f9g3_38;
568
+ int64_t h3 = f0g3+f1g2 +f2g1 +f3g0 +f4g9_19+f5g8_19+f6g7_19+f7g6_19+f8g5_19+f9g4_19;
569
+ int64_t h4 = f0g4+f1g3_2 +f2g2 +f3g1_2 +f4g0 +f5g9_38+f6g8_19+f7g7_38+f8g6_19+f9g5_38;
570
+ int64_t h5 = f0g5+f1g4 +f2g3 +f3g2 +f4g1 +f5g0 +f6g9_19+f7g8_19+f8g7_19+f9g6_19;
571
+ int64_t h6 = f0g6+f1g5_2 +f2g4 +f3g3_2 +f4g2 +f5g1_2 +f6g0 +f7g9_38+f8g8_19+f9g7_38;
572
+ int64_t h7 = f0g7+f1g6 +f2g5 +f3g4 +f4g3 +f5g2 +f6g1 +f7g0 +f8g9_19+f9g8_19;
573
+ int64_t h8 = f0g8+f1g7_2 +f2g6 +f3g5_2 +f4g4 +f5g3_2 +f6g2 +f7g1_2 +f8g0 +f9g9_38;
574
+ int64_t h9 = f0g9+f1g8 +f2g7 +f3g6 +f4g5 +f5g4 +f6g3 +f7g2 +f8g1 +f9g0 ;
575
+ int64_t carry0;
576
+ int64_t carry1;
577
+ int64_t carry2;
578
+ int64_t carry3;
579
+ int64_t carry4;
580
+ int64_t carry5;
581
+ int64_t carry6;
582
+ int64_t carry7;
583
+ int64_t carry8;
584
+ int64_t carry9;
585
+
586
+ /*
587
+ |h0| <= (1.65*1.65*2^52*(1+19+19+19+19)+1.65*1.65*2^50*(38+38+38+38+38))
588
+ i.e. |h0| <= 1.4*2^60; narrower ranges for h2, h4, h6, h8
589
+ |h1| <= (1.65*1.65*2^51*(1+1+19+19+19+19+19+19+19+19))
590
+ i.e. |h1| <= 1.7*2^59; narrower ranges for h3, h5, h7, h9
591
+ */
592
+
593
+ carry0 = (h0 + (int64_t) (1L << 25)) >> 26; h1 += carry0; h0 -= carry0 * ((uint64_t) 1L << 26);
594
+ carry4 = (h4 + (int64_t) (1L << 25)) >> 26; h5 += carry4; h4 -= carry4 * ((uint64_t) 1L << 26);
595
+ /* |h0| <= 2^25 */
596
+ /* |h4| <= 2^25 */
597
+ /* |h1| <= 1.71*2^59 */
598
+ /* |h5| <= 1.71*2^59 */
599
+
600
+ carry1 = (h1 + (int64_t) (1L << 24)) >> 25; h2 += carry1; h1 -= carry1 * ((uint64_t) 1L << 25);
601
+ carry5 = (h5 + (int64_t) (1L << 24)) >> 25; h6 += carry5; h5 -= carry5 * ((uint64_t) 1L << 25);
602
+ /* |h1| <= 2^24; from now on fits into int32 */
603
+ /* |h5| <= 2^24; from now on fits into int32 */
604
+ /* |h2| <= 1.41*2^60 */
605
+ /* |h6| <= 1.41*2^60 */
606
+
607
+ carry2 = (h2 + (int64_t) (1L << 25)) >> 26; h3 += carry2; h2 -= carry2 * ((uint64_t) 1L << 26);
608
+ carry6 = (h6 + (int64_t) (1L << 25)) >> 26; h7 += carry6; h6 -= carry6 * ((uint64_t) 1L << 26);
609
+ /* |h2| <= 2^25; from now on fits into int32 unchanged */
610
+ /* |h6| <= 2^25; from now on fits into int32 unchanged */
611
+ /* |h3| <= 1.71*2^59 */
612
+ /* |h7| <= 1.71*2^59 */
613
+
614
+ carry3 = (h3 + (int64_t) (1L << 24)) >> 25; h4 += carry3; h3 -= carry3 * ((uint64_t) 1L << 25);
615
+ carry7 = (h7 + (int64_t) (1L << 24)) >> 25; h8 += carry7; h7 -= carry7 * ((uint64_t) 1L << 25);
616
+ /* |h3| <= 2^24; from now on fits into int32 unchanged */
617
+ /* |h7| <= 2^24; from now on fits into int32 unchanged */
618
+ /* |h4| <= 1.72*2^34 */
619
+ /* |h8| <= 1.41*2^60 */
620
+
621
+ carry4 = (h4 + (int64_t) (1L << 25)) >> 26; h5 += carry4; h4 -= carry4 * ((uint64_t) 1L << 26);
622
+ carry8 = (h8 + (int64_t) (1L << 25)) >> 26; h9 += carry8; h8 -= carry8 * ((uint64_t) 1L << 26);
623
+ /* |h4| <= 2^25; from now on fits into int32 unchanged */
624
+ /* |h8| <= 2^25; from now on fits into int32 unchanged */
625
+ /* |h5| <= 1.01*2^24 */
626
+ /* |h9| <= 1.71*2^59 */
627
+
628
+ carry9 = (h9 + (int64_t) (1L << 24)) >> 25; h0 += carry9 * 19; h9 -= carry9 * ((uint64_t) 1L << 25);
629
+ /* |h9| <= 2^24; from now on fits into int32 unchanged */
630
+ /* |h0| <= 1.1*2^39 */
631
+
632
+ carry0 = (h0 + (int64_t) (1L << 25)) >> 26; h1 += carry0; h0 -= carry0 * ((uint64_t) 1L << 26);
633
+ /* |h0| <= 2^25; from now on fits into int32 unchanged */
634
+ /* |h1| <= 1.01*2^24 */
635
+
636
+ h[0] = (int32_t) h0;
637
+ h[1] = (int32_t) h1;
638
+ h[2] = (int32_t) h2;
639
+ h[3] = (int32_t) h3;
640
+ h[4] = (int32_t) h4;
641
+ h[5] = (int32_t) h5;
642
+ h[6] = (int32_t) h6;
643
+ h[7] = (int32_t) h7;
644
+ h[8] = (int32_t) h8;
645
+ h[9] = (int32_t) h9;
642
646
  }
643
647
 
644
648
  /*
645
- h = -f
646
-
647
- Preconditions:
648
- |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
649
-
650
- Postconditions:
651
- |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
652
- */
649
+ h = -f
650
+ *
651
+ Preconditions:
652
+ |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
653
+ *
654
+ Postconditions:
655
+ |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
656
+ */
653
657
 
654
658
  void fe_neg(fe h,const fe f)
655
659
  {
656
- int32_t f0 = f[0];
657
- int32_t f1 = f[1];
658
- int32_t f2 = f[2];
659
- int32_t f3 = f[3];
660
- int32_t f4 = f[4];
661
- int32_t f5 = f[5];
662
- int32_t f6 = f[6];
663
- int32_t f7 = f[7];
664
- int32_t f8 = f[8];
665
- int32_t f9 = f[9];
666
- int32_t h0 = -f0;
667
- int32_t h1 = -f1;
668
- int32_t h2 = -f2;
669
- int32_t h3 = -f3;
670
- int32_t h4 = -f4;
671
- int32_t h5 = -f5;
672
- int32_t h6 = -f6;
673
- int32_t h7 = -f7;
674
- int32_t h8 = -f8;
675
- int32_t h9 = -f9;
676
- h[0] = h0;
677
- h[1] = h1;
678
- h[2] = h2;
679
- h[3] = h3;
680
- h[4] = h4;
681
- h[5] = h5;
682
- h[6] = h6;
683
- h[7] = h7;
684
- h[8] = h8;
685
- h[9] = h9;
660
+ int32_t f0 = f[0];
661
+ int32_t f1 = f[1];
662
+ int32_t f2 = f[2];
663
+ int32_t f3 = f[3];
664
+ int32_t f4 = f[4];
665
+ int32_t f5 = f[5];
666
+ int32_t f6 = f[6];
667
+ int32_t f7 = f[7];
668
+ int32_t f8 = f[8];
669
+ int32_t f9 = f[9];
670
+ int32_t h0 = -f0;
671
+ int32_t h1 = -f1;
672
+ int32_t h2 = -f2;
673
+ int32_t h3 = -f3;
674
+ int32_t h4 = -f4;
675
+ int32_t h5 = -f5;
676
+ int32_t h6 = -f6;
677
+ int32_t h7 = -f7;
678
+ int32_t h8 = -f8;
679
+ int32_t h9 = -f9;
680
+ h[0] = h0;
681
+ h[1] = h1;
682
+ h[2] = h2;
683
+ h[3] = h3;
684
+ h[4] = h4;
685
+ h[5] = h5;
686
+ h[6] = h6;
687
+ h[7] = h7;
688
+ h[8] = h8;
689
+ h[9] = h9;
686
690
  }
687
691
 
688
692
  /*
689
- h = f * f
690
- Can overlap h with f.
691
-
692
- Preconditions:
693
- |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
694
-
695
- Postconditions:
696
- |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
697
- */
693
+ h = f * f
694
+ Can overlap h with f.
695
+ *
696
+ Preconditions:
697
+ |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
698
+ *
699
+ Postconditions:
700
+ |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
701
+ */
698
702
 
699
703
  /*
700
- See fe_mul.c for discussion of implementation strategy.
701
- */
704
+ See fe_mul.c for discussion of implementation strategy.
705
+ */
702
706
 
703
707
  void fe_sq(fe h,const fe f)
704
708
  {
705
- int32_t f0 = f[0];
706
- int32_t f1 = f[1];
707
- int32_t f2 = f[2];
708
- int32_t f3 = f[3];
709
- int32_t f4 = f[4];
710
- int32_t f5 = f[5];
711
- int32_t f6 = f[6];
712
- int32_t f7 = f[7];
713
- int32_t f8 = f[8];
714
- int32_t f9 = f[9];
715
- int32_t f0_2 = 2 * f0;
716
- int32_t f1_2 = 2 * f1;
717
- int32_t f2_2 = 2 * f2;
718
- int32_t f3_2 = 2 * f3;
719
- int32_t f4_2 = 2 * f4;
720
- int32_t f5_2 = 2 * f5;
721
- int32_t f6_2 = 2 * f6;
722
- int32_t f7_2 = 2 * f7;
723
- int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
724
- int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
725
- int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
726
- int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
727
- int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
728
- int64_t f0f0 = f0 * (int64_t) f0;
729
- int64_t f0f1_2 = f0_2 * (int64_t) f1;
730
- int64_t f0f2_2 = f0_2 * (int64_t) f2;
731
- int64_t f0f3_2 = f0_2 * (int64_t) f3;
732
- int64_t f0f4_2 = f0_2 * (int64_t) f4;
733
- int64_t f0f5_2 = f0_2 * (int64_t) f5;
734
- int64_t f0f6_2 = f0_2 * (int64_t) f6;
735
- int64_t f0f7_2 = f0_2 * (int64_t) f7;
736
- int64_t f0f8_2 = f0_2 * (int64_t) f8;
737
- int64_t f0f9_2 = f0_2 * (int64_t) f9;
738
- int64_t f1f1_2 = f1_2 * (int64_t) f1;
739
- int64_t f1f2_2 = f1_2 * (int64_t) f2;
740
- int64_t f1f3_4 = f1_2 * (int64_t) f3_2;
741
- int64_t f1f4_2 = f1_2 * (int64_t) f4;
742
- int64_t f1f5_4 = f1_2 * (int64_t) f5_2;
743
- int64_t f1f6_2 = f1_2 * (int64_t) f6;
744
- int64_t f1f7_4 = f1_2 * (int64_t) f7_2;
745
- int64_t f1f8_2 = f1_2 * (int64_t) f8;
746
- int64_t f1f9_76 = f1_2 * (int64_t) f9_38;
747
- int64_t f2f2 = f2 * (int64_t) f2;
748
- int64_t f2f3_2 = f2_2 * (int64_t) f3;
749
- int64_t f2f4_2 = f2_2 * (int64_t) f4;
750
- int64_t f2f5_2 = f2_2 * (int64_t) f5;
751
- int64_t f2f6_2 = f2_2 * (int64_t) f6;
752
- int64_t f2f7_2 = f2_2 * (int64_t) f7;
753
- int64_t f2f8_38 = f2_2 * (int64_t) f8_19;
754
- int64_t f2f9_38 = f2 * (int64_t) f9_38;
755
- int64_t f3f3_2 = f3_2 * (int64_t) f3;
756
- int64_t f3f4_2 = f3_2 * (int64_t) f4;
757
- int64_t f3f5_4 = f3_2 * (int64_t) f5_2;
758
- int64_t f3f6_2 = f3_2 * (int64_t) f6;
759
- int64_t f3f7_76 = f3_2 * (int64_t) f7_38;
760
- int64_t f3f8_38 = f3_2 * (int64_t) f8_19;
761
- int64_t f3f9_76 = f3_2 * (int64_t) f9_38;
762
- int64_t f4f4 = f4 * (int64_t) f4;
763
- int64_t f4f5_2 = f4_2 * (int64_t) f5;
764
- int64_t f4f6_38 = f4_2 * (int64_t) f6_19;
765
- int64_t f4f7_38 = f4 * (int64_t) f7_38;
766
- int64_t f4f8_38 = f4_2 * (int64_t) f8_19;
767
- int64_t f4f9_38 = f4 * (int64_t) f9_38;
768
- int64_t f5f5_38 = f5 * (int64_t) f5_38;
769
- int64_t f5f6_38 = f5_2 * (int64_t) f6_19;
770
- int64_t f5f7_76 = f5_2 * (int64_t) f7_38;
771
- int64_t f5f8_38 = f5_2 * (int64_t) f8_19;
772
- int64_t f5f9_76 = f5_2 * (int64_t) f9_38;
773
- int64_t f6f6_19 = f6 * (int64_t) f6_19;
774
- int64_t f6f7_38 = f6 * (int64_t) f7_38;
775
- int64_t f6f8_38 = f6_2 * (int64_t) f8_19;
776
- int64_t f6f9_38 = f6 * (int64_t) f9_38;
777
- int64_t f7f7_38 = f7 * (int64_t) f7_38;
778
- int64_t f7f8_38 = f7_2 * (int64_t) f8_19;
779
- int64_t f7f9_76 = f7_2 * (int64_t) f9_38;
780
- int64_t f8f8_19 = f8 * (int64_t) f8_19;
781
- int64_t f8f9_38 = f8 * (int64_t) f9_38;
782
- int64_t f9f9_38 = f9 * (int64_t) f9_38;
783
- int64_t h0 = f0f0 +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38;
784
- int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38;
785
- int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19;
786
- int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38;
787
- int64_t h4 = f0f4_2+f1f3_4 +f2f2 +f5f9_76+f6f8_38+f7f7_38;
788
- int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38;
789
- int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19;
790
- int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38;
791
- int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4 +f9f9_38;
792
- int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2;
793
- int64_t carry0;
794
- int64_t carry1;
795
- int64_t carry2;
796
- int64_t carry3;
797
- int64_t carry4;
798
- int64_t carry5;
799
- int64_t carry6;
800
- int64_t carry7;
801
- int64_t carry8;
802
- int64_t carry9;
803
-
804
- carry0 = (h0 + (int64_t) (1L << 25)) >> 26; h1 += carry0; h0 -= carry0 * ((uint64_t) 1L << 26);
805
- carry4 = (h4 + (int64_t) (1L << 25)) >> 26; h5 += carry4; h4 -= carry4 * ((uint64_t) 1L << 26);
806
-
807
- carry1 = (h1 + (int64_t) (1L << 24)) >> 25; h2 += carry1; h1 -= carry1 * ((uint64_t) 1L << 25);
808
- carry5 = (h5 + (int64_t) (1L << 24)) >> 25; h6 += carry5; h5 -= carry5 * ((uint64_t) 1L << 25);
809
-
810
- carry2 = (h2 + (int64_t) (1L << 25)) >> 26; h3 += carry2; h2 -= carry2 * ((uint64_t) 1L << 26);
811
- carry6 = (h6 + (int64_t) (1L << 25)) >> 26; h7 += carry6; h6 -= carry6 * ((uint64_t) 1L << 26);
812
-
813
- carry3 = (h3 + (int64_t) (1L << 24)) >> 25; h4 += carry3; h3 -= carry3 * ((uint64_t) 1L << 25);
814
- carry7 = (h7 + (int64_t) (1L << 24)) >> 25; h8 += carry7; h7 -= carry7 * ((uint64_t) 1L << 25);
815
-
816
- carry4 = (h4 + (int64_t) (1L << 25)) >> 26; h5 += carry4; h4 -= carry4 * ((uint64_t) 1L << 26);
817
- carry8 = (h8 + (int64_t) (1L << 25)) >> 26; h9 += carry8; h8 -= carry8 * ((uint64_t) 1L << 26);
818
-
819
- carry9 = (h9 + (int64_t) (1L << 24)) >> 25; h0 += carry9 * 19; h9 -= carry9 * ((uint64_t) 1L << 25);
820
-
821
- carry0 = (h0 + (int64_t) (1L << 25)) >> 26; h1 += carry0; h0 -= carry0 * ((uint64_t) 1L << 26);
822
-
823
- h[0] = (int32_t) h0;
824
- h[1] = (int32_t) h1;
825
- h[2] = (int32_t) h2;
826
- h[3] = (int32_t) h3;
827
- h[4] = (int32_t) h4;
828
- h[5] = (int32_t) h5;
829
- h[6] = (int32_t) h6;
830
- h[7] = (int32_t) h7;
831
- h[8] = (int32_t) h8;
832
- h[9] = (int32_t) h9;
709
+ int32_t f0 = f[0];
710
+ int32_t f1 = f[1];
711
+ int32_t f2 = f[2];
712
+ int32_t f3 = f[3];
713
+ int32_t f4 = f[4];
714
+ int32_t f5 = f[5];
715
+ int32_t f6 = f[6];
716
+ int32_t f7 = f[7];
717
+ int32_t f8 = f[8];
718
+ int32_t f9 = f[9];
719
+ int32_t f0_2 = 2 * f0;
720
+ int32_t f1_2 = 2 * f1;
721
+ int32_t f2_2 = 2 * f2;
722
+ int32_t f3_2 = 2 * f3;
723
+ int32_t f4_2 = 2 * f4;
724
+ int32_t f5_2 = 2 * f5;
725
+ int32_t f6_2 = 2 * f6;
726
+ int32_t f7_2 = 2 * f7;
727
+ int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
728
+ int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
729
+ int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
730
+ int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
731
+ int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
732
+ int64_t f0f0 = f0 * (int64_t) f0;
733
+ int64_t f0f1_2 = f0_2 * (int64_t) f1;
734
+ int64_t f0f2_2 = f0_2 * (int64_t) f2;
735
+ int64_t f0f3_2 = f0_2 * (int64_t) f3;
736
+ int64_t f0f4_2 = f0_2 * (int64_t) f4;
737
+ int64_t f0f5_2 = f0_2 * (int64_t) f5;
738
+ int64_t f0f6_2 = f0_2 * (int64_t) f6;
739
+ int64_t f0f7_2 = f0_2 * (int64_t) f7;
740
+ int64_t f0f8_2 = f0_2 * (int64_t) f8;
741
+ int64_t f0f9_2 = f0_2 * (int64_t) f9;
742
+ int64_t f1f1_2 = f1_2 * (int64_t) f1;
743
+ int64_t f1f2_2 = f1_2 * (int64_t) f2;
744
+ int64_t f1f3_4 = f1_2 * (int64_t) f3_2;
745
+ int64_t f1f4_2 = f1_2 * (int64_t) f4;
746
+ int64_t f1f5_4 = f1_2 * (int64_t) f5_2;
747
+ int64_t f1f6_2 = f1_2 * (int64_t) f6;
748
+ int64_t f1f7_4 = f1_2 * (int64_t) f7_2;
749
+ int64_t f1f8_2 = f1_2 * (int64_t) f8;
750
+ int64_t f1f9_76 = f1_2 * (int64_t) f9_38;
751
+ int64_t f2f2 = f2 * (int64_t) f2;
752
+ int64_t f2f3_2 = f2_2 * (int64_t) f3;
753
+ int64_t f2f4_2 = f2_2 * (int64_t) f4;
754
+ int64_t f2f5_2 = f2_2 * (int64_t) f5;
755
+ int64_t f2f6_2 = f2_2 * (int64_t) f6;
756
+ int64_t f2f7_2 = f2_2 * (int64_t) f7;
757
+ int64_t f2f8_38 = f2_2 * (int64_t) f8_19;
758
+ int64_t f2f9_38 = f2 * (int64_t) f9_38;
759
+ int64_t f3f3_2 = f3_2 * (int64_t) f3;
760
+ int64_t f3f4_2 = f3_2 * (int64_t) f4;
761
+ int64_t f3f5_4 = f3_2 * (int64_t) f5_2;
762
+ int64_t f3f6_2 = f3_2 * (int64_t) f6;
763
+ int64_t f3f7_76 = f3_2 * (int64_t) f7_38;
764
+ int64_t f3f8_38 = f3_2 * (int64_t) f8_19;
765
+ int64_t f3f9_76 = f3_2 * (int64_t) f9_38;
766
+ int64_t f4f4 = f4 * (int64_t) f4;
767
+ int64_t f4f5_2 = f4_2 * (int64_t) f5;
768
+ int64_t f4f6_38 = f4_2 * (int64_t) f6_19;
769
+ int64_t f4f7_38 = f4 * (int64_t) f7_38;
770
+ int64_t f4f8_38 = f4_2 * (int64_t) f8_19;
771
+ int64_t f4f9_38 = f4 * (int64_t) f9_38;
772
+ int64_t f5f5_38 = f5 * (int64_t) f5_38;
773
+ int64_t f5f6_38 = f5_2 * (int64_t) f6_19;
774
+ int64_t f5f7_76 = f5_2 * (int64_t) f7_38;
775
+ int64_t f5f8_38 = f5_2 * (int64_t) f8_19;
776
+ int64_t f5f9_76 = f5_2 * (int64_t) f9_38;
777
+ int64_t f6f6_19 = f6 * (int64_t) f6_19;
778
+ int64_t f6f7_38 = f6 * (int64_t) f7_38;
779
+ int64_t f6f8_38 = f6_2 * (int64_t) f8_19;
780
+ int64_t f6f9_38 = f6 * (int64_t) f9_38;
781
+ int64_t f7f7_38 = f7 * (int64_t) f7_38;
782
+ int64_t f7f8_38 = f7_2 * (int64_t) f8_19;
783
+ int64_t f7f9_76 = f7_2 * (int64_t) f9_38;
784
+ int64_t f8f8_19 = f8 * (int64_t) f8_19;
785
+ int64_t f8f9_38 = f8 * (int64_t) f9_38;
786
+ int64_t f9f9_38 = f9 * (int64_t) f9_38;
787
+ int64_t h0 = f0f0 +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38;
788
+ int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38;
789
+ int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19;
790
+ int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38;
791
+ int64_t h4 = f0f4_2+f1f3_4 +f2f2 +f5f9_76+f6f8_38+f7f7_38;
792
+ int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38;
793
+ int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19;
794
+ int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38;
795
+ int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4 +f9f9_38;
796
+ int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2;
797
+ int64_t carry0;
798
+ int64_t carry1;
799
+ int64_t carry2;
800
+ int64_t carry3;
801
+ int64_t carry4;
802
+ int64_t carry5;
803
+ int64_t carry6;
804
+ int64_t carry7;
805
+ int64_t carry8;
806
+ int64_t carry9;
807
+
808
+ carry0 = (h0 + (int64_t) (1L << 25)) >> 26; h1 += carry0; h0 -= carry0 * ((uint64_t) 1L << 26);
809
+ carry4 = (h4 + (int64_t) (1L << 25)) >> 26; h5 += carry4; h4 -= carry4 * ((uint64_t) 1L << 26);
810
+
811
+ carry1 = (h1 + (int64_t) (1L << 24)) >> 25; h2 += carry1; h1 -= carry1 * ((uint64_t) 1L << 25);
812
+ carry5 = (h5 + (int64_t) (1L << 24)) >> 25; h6 += carry5; h5 -= carry5 * ((uint64_t) 1L << 25);
813
+
814
+ carry2 = (h2 + (int64_t) (1L << 25)) >> 26; h3 += carry2; h2 -= carry2 * ((uint64_t) 1L << 26);
815
+ carry6 = (h6 + (int64_t) (1L << 25)) >> 26; h7 += carry6; h6 -= carry6 * ((uint64_t) 1L << 26);
816
+
817
+ carry3 = (h3 + (int64_t) (1L << 24)) >> 25; h4 += carry3; h3 -= carry3 * ((uint64_t) 1L << 25);
818
+ carry7 = (h7 + (int64_t) (1L << 24)) >> 25; h8 += carry7; h7 -= carry7 * ((uint64_t) 1L << 25);
819
+
820
+ carry4 = (h4 + (int64_t) (1L << 25)) >> 26; h5 += carry4; h4 -= carry4 * ((uint64_t) 1L << 26);
821
+ carry8 = (h8 + (int64_t) (1L << 25)) >> 26; h9 += carry8; h8 -= carry8 * ((uint64_t) 1L << 26);
822
+
823
+ carry9 = (h9 + (int64_t) (1L << 24)) >> 25; h0 += carry9 * 19; h9 -= carry9 * ((uint64_t) 1L << 25);
824
+
825
+ carry0 = (h0 + (int64_t) (1L << 25)) >> 26; h1 += carry0; h0 -= carry0 * ((uint64_t) 1L << 26);
826
+
827
+ h[0] = (int32_t) h0;
828
+ h[1] = (int32_t) h1;
829
+ h[2] = (int32_t) h2;
830
+ h[3] = (int32_t) h3;
831
+ h[4] = (int32_t) h4;
832
+ h[5] = (int32_t) h5;
833
+ h[6] = (int32_t) h6;
834
+ h[7] = (int32_t) h7;
835
+ h[8] = (int32_t) h8;
836
+ h[9] = (int32_t) h9;
833
837
  }
834
838
 
835
839
  /*
836
- h = 2 * f * f
837
- Can overlap h with f.
838
-
839
- Preconditions:
840
- |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
841
-
842
- Postconditions:
843
- |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
844
- */
840
+ h = 2 * f * f
841
+ Can overlap h with f.
842
+ *
843
+ Preconditions:
844
+ |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
845
+ *
846
+ Postconditions:
847
+ |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
848
+ */
845
849
 
846
850
  /*
847
- See fe_mul.c for discussion of implementation strategy.
848
- */
851
+ See fe_mul.c for discussion of implementation strategy.
852
+ */
849
853
 
850
854
  void fe_sq2(fe h,const fe f)
851
855
  {
852
- int32_t f0 = f[0];
853
- int32_t f1 = f[1];
854
- int32_t f2 = f[2];
855
- int32_t f3 = f[3];
856
- int32_t f4 = f[4];
857
- int32_t f5 = f[5];
858
- int32_t f6 = f[6];
859
- int32_t f7 = f[7];
860
- int32_t f8 = f[8];
861
- int32_t f9 = f[9];
862
- int32_t f0_2 = 2 * f0;
863
- int32_t f1_2 = 2 * f1;
864
- int32_t f2_2 = 2 * f2;
865
- int32_t f3_2 = 2 * f3;
866
- int32_t f4_2 = 2 * f4;
867
- int32_t f5_2 = 2 * f5;
868
- int32_t f6_2 = 2 * f6;
869
- int32_t f7_2 = 2 * f7;
870
- int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
871
- int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
872
- int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
873
- int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
874
- int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
875
- int64_t f0f0 = f0 * (int64_t) f0;
876
- int64_t f0f1_2 = f0_2 * (int64_t) f1;
877
- int64_t f0f2_2 = f0_2 * (int64_t) f2;
878
- int64_t f0f3_2 = f0_2 * (int64_t) f3;
879
- int64_t f0f4_2 = f0_2 * (int64_t) f4;
880
- int64_t f0f5_2 = f0_2 * (int64_t) f5;
881
- int64_t f0f6_2 = f0_2 * (int64_t) f6;
882
- int64_t f0f7_2 = f0_2 * (int64_t) f7;
883
- int64_t f0f8_2 = f0_2 * (int64_t) f8;
884
- int64_t f0f9_2 = f0_2 * (int64_t) f9;
885
- int64_t f1f1_2 = f1_2 * (int64_t) f1;
886
- int64_t f1f2_2 = f1_2 * (int64_t) f2;
887
- int64_t f1f3_4 = f1_2 * (int64_t) f3_2;
888
- int64_t f1f4_2 = f1_2 * (int64_t) f4;
889
- int64_t f1f5_4 = f1_2 * (int64_t) f5_2;
890
- int64_t f1f6_2 = f1_2 * (int64_t) f6;
891
- int64_t f1f7_4 = f1_2 * (int64_t) f7_2;
892
- int64_t f1f8_2 = f1_2 * (int64_t) f8;
893
- int64_t f1f9_76 = f1_2 * (int64_t) f9_38;
894
- int64_t f2f2 = f2 * (int64_t) f2;
895
- int64_t f2f3_2 = f2_2 * (int64_t) f3;
896
- int64_t f2f4_2 = f2_2 * (int64_t) f4;
897
- int64_t f2f5_2 = f2_2 * (int64_t) f5;
898
- int64_t f2f6_2 = f2_2 * (int64_t) f6;
899
- int64_t f2f7_2 = f2_2 * (int64_t) f7;
900
- int64_t f2f8_38 = f2_2 * (int64_t) f8_19;
901
- int64_t f2f9_38 = f2 * (int64_t) f9_38;
902
- int64_t f3f3_2 = f3_2 * (int64_t) f3;
903
- int64_t f3f4_2 = f3_2 * (int64_t) f4;
904
- int64_t f3f5_4 = f3_2 * (int64_t) f5_2;
905
- int64_t f3f6_2 = f3_2 * (int64_t) f6;
906
- int64_t f3f7_76 = f3_2 * (int64_t) f7_38;
907
- int64_t f3f8_38 = f3_2 * (int64_t) f8_19;
908
- int64_t f3f9_76 = f3_2 * (int64_t) f9_38;
909
- int64_t f4f4 = f4 * (int64_t) f4;
910
- int64_t f4f5_2 = f4_2 * (int64_t) f5;
911
- int64_t f4f6_38 = f4_2 * (int64_t) f6_19;
912
- int64_t f4f7_38 = f4 * (int64_t) f7_38;
913
- int64_t f4f8_38 = f4_2 * (int64_t) f8_19;
914
- int64_t f4f9_38 = f4 * (int64_t) f9_38;
915
- int64_t f5f5_38 = f5 * (int64_t) f5_38;
916
- int64_t f5f6_38 = f5_2 * (int64_t) f6_19;
917
- int64_t f5f7_76 = f5_2 * (int64_t) f7_38;
918
- int64_t f5f8_38 = f5_2 * (int64_t) f8_19;
919
- int64_t f5f9_76 = f5_2 * (int64_t) f9_38;
920
- int64_t f6f6_19 = f6 * (int64_t) f6_19;
921
- int64_t f6f7_38 = f6 * (int64_t) f7_38;
922
- int64_t f6f8_38 = f6_2 * (int64_t) f8_19;
923
- int64_t f6f9_38 = f6 * (int64_t) f9_38;
924
- int64_t f7f7_38 = f7 * (int64_t) f7_38;
925
- int64_t f7f8_38 = f7_2 * (int64_t) f8_19;
926
- int64_t f7f9_76 = f7_2 * (int64_t) f9_38;
927
- int64_t f8f8_19 = f8 * (int64_t) f8_19;
928
- int64_t f8f9_38 = f8 * (int64_t) f9_38;
929
- int64_t f9f9_38 = f9 * (int64_t) f9_38;
930
- int64_t h0 = f0f0 +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38;
931
- int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38;
932
- int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19;
933
- int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38;
934
- int64_t h4 = f0f4_2+f1f3_4 +f2f2 +f5f9_76+f6f8_38+f7f7_38;
935
- int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38;
936
- int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19;
937
- int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38;
938
- int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4 +f9f9_38;
939
- int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2;
940
- int64_t carry0;
941
- int64_t carry1;
942
- int64_t carry2;
943
- int64_t carry3;
944
- int64_t carry4;
945
- int64_t carry5;
946
- int64_t carry6;
947
- int64_t carry7;
948
- int64_t carry8;
949
- int64_t carry9;
950
-
951
- h0 += h0;
952
- h1 += h1;
953
- h2 += h2;
954
- h3 += h3;
955
- h4 += h4;
956
- h5 += h5;
957
- h6 += h6;
958
- h7 += h7;
959
- h8 += h8;
960
- h9 += h9;
961
-
962
- carry0 = (h0 + (int64_t) (1L << 25)) >> 26; h1 += carry0; h0 -= carry0 * ((uint64_t) 1L << 26);
963
- carry4 = (h4 + (int64_t) (1L << 25)) >> 26; h5 += carry4; h4 -= carry4 * ((uint64_t) 1L << 26);
964
-
965
- carry1 = (h1 + (int64_t) (1L << 24)) >> 25; h2 += carry1; h1 -= carry1 * ((uint64_t) 1L << 25);
966
- carry5 = (h5 + (int64_t) (1L << 24)) >> 25; h6 += carry5; h5 -= carry5 * ((uint64_t) 1L << 25);
967
-
968
- carry2 = (h2 + (int64_t) (1L << 25)) >> 26; h3 += carry2; h2 -= carry2 * ((uint64_t) 1L << 26);
969
- carry6 = (h6 + (int64_t) (1L << 25)) >> 26; h7 += carry6; h6 -= carry6 * ((uint64_t) 1L << 26);
970
-
971
- carry3 = (h3 + (int64_t) (1L << 24)) >> 25; h4 += carry3; h3 -= carry3 * ((uint64_t) 1L << 25);
972
- carry7 = (h7 + (int64_t) (1L << 24)) >> 25; h8 += carry7; h7 -= carry7 * ((uint64_t) 1L << 25);
973
-
974
- carry4 = (h4 + (int64_t) (1L << 25)) >> 26; h5 += carry4; h4 -= carry4 * ((uint64_t) 1L << 26);
975
- carry8 = (h8 + (int64_t) (1L << 25)) >> 26; h9 += carry8; h8 -= carry8 * ((uint64_t) 1L << 26);
976
-
977
- carry9 = (h9 + (int64_t) (1L << 24)) >> 25; h0 += carry9 * 19; h9 -= carry9 * ((uint64_t) 1L << 25);
978
-
979
- carry0 = (h0 + (int64_t) (1L << 25)) >> 26; h1 += carry0; h0 -= carry0 * ((uint64_t) 1L << 26);
980
-
981
- h[0] = (int32_t) h0;
982
- h[1] = (int32_t) h1;
983
- h[2] = (int32_t) h2;
984
- h[3] = (int32_t) h3;
985
- h[4] = (int32_t) h4;
986
- h[5] = (int32_t) h5;
987
- h[6] = (int32_t) h6;
988
- h[7] = (int32_t) h7;
989
- h[8] = (int32_t) h8;
990
- h[9] = (int32_t) h9;
856
+ int32_t f0 = f[0];
857
+ int32_t f1 = f[1];
858
+ int32_t f2 = f[2];
859
+ int32_t f3 = f[3];
860
+ int32_t f4 = f[4];
861
+ int32_t f5 = f[5];
862
+ int32_t f6 = f[6];
863
+ int32_t f7 = f[7];
864
+ int32_t f8 = f[8];
865
+ int32_t f9 = f[9];
866
+ int32_t f0_2 = 2 * f0;
867
+ int32_t f1_2 = 2 * f1;
868
+ int32_t f2_2 = 2 * f2;
869
+ int32_t f3_2 = 2 * f3;
870
+ int32_t f4_2 = 2 * f4;
871
+ int32_t f5_2 = 2 * f5;
872
+ int32_t f6_2 = 2 * f6;
873
+ int32_t f7_2 = 2 * f7;
874
+ int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
875
+ int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
876
+ int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
877
+ int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
878
+ int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
879
+ int64_t f0f0 = f0 * (int64_t) f0;
880
+ int64_t f0f1_2 = f0_2 * (int64_t) f1;
881
+ int64_t f0f2_2 = f0_2 * (int64_t) f2;
882
+ int64_t f0f3_2 = f0_2 * (int64_t) f3;
883
+ int64_t f0f4_2 = f0_2 * (int64_t) f4;
884
+ int64_t f0f5_2 = f0_2 * (int64_t) f5;
885
+ int64_t f0f6_2 = f0_2 * (int64_t) f6;
886
+ int64_t f0f7_2 = f0_2 * (int64_t) f7;
887
+ int64_t f0f8_2 = f0_2 * (int64_t) f8;
888
+ int64_t f0f9_2 = f0_2 * (int64_t) f9;
889
+ int64_t f1f1_2 = f1_2 * (int64_t) f1;
890
+ int64_t f1f2_2 = f1_2 * (int64_t) f2;
891
+ int64_t f1f3_4 = f1_2 * (int64_t) f3_2;
892
+ int64_t f1f4_2 = f1_2 * (int64_t) f4;
893
+ int64_t f1f5_4 = f1_2 * (int64_t) f5_2;
894
+ int64_t f1f6_2 = f1_2 * (int64_t) f6;
895
+ int64_t f1f7_4 = f1_2 * (int64_t) f7_2;
896
+ int64_t f1f8_2 = f1_2 * (int64_t) f8;
897
+ int64_t f1f9_76 = f1_2 * (int64_t) f9_38;
898
+ int64_t f2f2 = f2 * (int64_t) f2;
899
+ int64_t f2f3_2 = f2_2 * (int64_t) f3;
900
+ int64_t f2f4_2 = f2_2 * (int64_t) f4;
901
+ int64_t f2f5_2 = f2_2 * (int64_t) f5;
902
+ int64_t f2f6_2 = f2_2 * (int64_t) f6;
903
+ int64_t f2f7_2 = f2_2 * (int64_t) f7;
904
+ int64_t f2f8_38 = f2_2 * (int64_t) f8_19;
905
+ int64_t f2f9_38 = f2 * (int64_t) f9_38;
906
+ int64_t f3f3_2 = f3_2 * (int64_t) f3;
907
+ int64_t f3f4_2 = f3_2 * (int64_t) f4;
908
+ int64_t f3f5_4 = f3_2 * (int64_t) f5_2;
909
+ int64_t f3f6_2 = f3_2 * (int64_t) f6;
910
+ int64_t f3f7_76 = f3_2 * (int64_t) f7_38;
911
+ int64_t f3f8_38 = f3_2 * (int64_t) f8_19;
912
+ int64_t f3f9_76 = f3_2 * (int64_t) f9_38;
913
+ int64_t f4f4 = f4 * (int64_t) f4;
914
+ int64_t f4f5_2 = f4_2 * (int64_t) f5;
915
+ int64_t f4f6_38 = f4_2 * (int64_t) f6_19;
916
+ int64_t f4f7_38 = f4 * (int64_t) f7_38;
917
+ int64_t f4f8_38 = f4_2 * (int64_t) f8_19;
918
+ int64_t f4f9_38 = f4 * (int64_t) f9_38;
919
+ int64_t f5f5_38 = f5 * (int64_t) f5_38;
920
+ int64_t f5f6_38 = f5_2 * (int64_t) f6_19;
921
+ int64_t f5f7_76 = f5_2 * (int64_t) f7_38;
922
+ int64_t f5f8_38 = f5_2 * (int64_t) f8_19;
923
+ int64_t f5f9_76 = f5_2 * (int64_t) f9_38;
924
+ int64_t f6f6_19 = f6 * (int64_t) f6_19;
925
+ int64_t f6f7_38 = f6 * (int64_t) f7_38;
926
+ int64_t f6f8_38 = f6_2 * (int64_t) f8_19;
927
+ int64_t f6f9_38 = f6 * (int64_t) f9_38;
928
+ int64_t f7f7_38 = f7 * (int64_t) f7_38;
929
+ int64_t f7f8_38 = f7_2 * (int64_t) f8_19;
930
+ int64_t f7f9_76 = f7_2 * (int64_t) f9_38;
931
+ int64_t f8f8_19 = f8 * (int64_t) f8_19;
932
+ int64_t f8f9_38 = f8 * (int64_t) f9_38;
933
+ int64_t f9f9_38 = f9 * (int64_t) f9_38;
934
+ int64_t h0 = f0f0 +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38;
935
+ int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38;
936
+ int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19;
937
+ int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38;
938
+ int64_t h4 = f0f4_2+f1f3_4 +f2f2 +f5f9_76+f6f8_38+f7f7_38;
939
+ int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38;
940
+ int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19;
941
+ int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38;
942
+ int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4 +f9f9_38;
943
+ int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2;
944
+ int64_t carry0;
945
+ int64_t carry1;
946
+ int64_t carry2;
947
+ int64_t carry3;
948
+ int64_t carry4;
949
+ int64_t carry5;
950
+ int64_t carry6;
951
+ int64_t carry7;
952
+ int64_t carry8;
953
+ int64_t carry9;
954
+
955
+ h0 += h0;
956
+ h1 += h1;
957
+ h2 += h2;
958
+ h3 += h3;
959
+ h4 += h4;
960
+ h5 += h5;
961
+ h6 += h6;
962
+ h7 += h7;
963
+ h8 += h8;
964
+ h9 += h9;
965
+
966
+ carry0 = (h0 + (int64_t) (1L << 25)) >> 26; h1 += carry0; h0 -= carry0 * ((uint64_t) 1L << 26);
967
+ carry4 = (h4 + (int64_t) (1L << 25)) >> 26; h5 += carry4; h4 -= carry4 * ((uint64_t) 1L << 26);
968
+
969
+ carry1 = (h1 + (int64_t) (1L << 24)) >> 25; h2 += carry1; h1 -= carry1 * ((uint64_t) 1L << 25);
970
+ carry5 = (h5 + (int64_t) (1L << 24)) >> 25; h6 += carry5; h5 -= carry5 * ((uint64_t) 1L << 25);
971
+
972
+ carry2 = (h2 + (int64_t) (1L << 25)) >> 26; h3 += carry2; h2 -= carry2 * ((uint64_t) 1L << 26);
973
+ carry6 = (h6 + (int64_t) (1L << 25)) >> 26; h7 += carry6; h6 -= carry6 * ((uint64_t) 1L << 26);
974
+
975
+ carry3 = (h3 + (int64_t) (1L << 24)) >> 25; h4 += carry3; h3 -= carry3 * ((uint64_t) 1L << 25);
976
+ carry7 = (h7 + (int64_t) (1L << 24)) >> 25; h8 += carry7; h7 -= carry7 * ((uint64_t) 1L << 25);
977
+
978
+ carry4 = (h4 + (int64_t) (1L << 25)) >> 26; h5 += carry4; h4 -= carry4 * ((uint64_t) 1L << 26);
979
+ carry8 = (h8 + (int64_t) (1L << 25)) >> 26; h9 += carry8; h8 -= carry8 * ((uint64_t) 1L << 26);
980
+
981
+ carry9 = (h9 + (int64_t) (1L << 24)) >> 25; h0 += carry9 * 19; h9 -= carry9 * ((uint64_t) 1L << 25);
982
+
983
+ carry0 = (h0 + (int64_t) (1L << 25)) >> 26; h1 += carry0; h0 -= carry0 * ((uint64_t) 1L << 26);
984
+
985
+ h[0] = (int32_t) h0;
986
+ h[1] = (int32_t) h1;
987
+ h[2] = (int32_t) h2;
988
+ h[3] = (int32_t) h3;
989
+ h[4] = (int32_t) h4;
990
+ h[5] = (int32_t) h5;
991
+ h[6] = (int32_t) h6;
992
+ h[7] = (int32_t) h7;
993
+ h[8] = (int32_t) h8;
994
+ h[9] = (int32_t) h9;
991
995
  }
992
996
 
993
997
  void fe_invert(fe out,const fe z)
@@ -1102,64 +1106,64 @@ void fe_pow22523(fe out,const fe z)
1102
1106
  }
1103
1107
 
1104
1108
  /*
1105
- h = f - g
1106
- Can overlap h with f or g.
1107
-
1108
- Preconditions:
1109
- |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
1110
- |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
1111
-
1112
- Postconditions:
1113
- |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
1114
- */
1109
+ h = f - g
1110
+ Can overlap h with f or g.
1111
+ *
1112
+ Preconditions:
1113
+ |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
1114
+ |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
1115
+ *
1116
+ Postconditions:
1117
+ |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
1118
+ */
1115
1119
 
1116
1120
  void fe_sub(fe h,const fe f,const fe g)
1117
1121
  {
1118
- int32_t f0 = f[0];
1119
- int32_t f1 = f[1];
1120
- int32_t f2 = f[2];
1121
- int32_t f3 = f[3];
1122
- int32_t f4 = f[4];
1123
- int32_t f5 = f[5];
1124
- int32_t f6 = f[6];
1125
- int32_t f7 = f[7];
1126
- int32_t f8 = f[8];
1127
- int32_t f9 = f[9];
1128
- int32_t g0 = g[0];
1129
- int32_t g1 = g[1];
1130
- int32_t g2 = g[2];
1131
- int32_t g3 = g[3];
1132
- int32_t g4 = g[4];
1133
- int32_t g5 = g[5];
1134
- int32_t g6 = g[6];
1135
- int32_t g7 = g[7];
1136
- int32_t g8 = g[8];
1137
- int32_t g9 = g[9];
1138
- int32_t h0 = f0 - g0;
1139
- int32_t h1 = f1 - g1;
1140
- int32_t h2 = f2 - g2;
1141
- int32_t h3 = f3 - g3;
1142
- int32_t h4 = f4 - g4;
1143
- int32_t h5 = f5 - g5;
1144
- int32_t h6 = f6 - g6;
1145
- int32_t h7 = f7 - g7;
1146
- int32_t h8 = f8 - g8;
1147
- int32_t h9 = f9 - g9;
1148
- h[0] = h0;
1149
- h[1] = h1;
1150
- h[2] = h2;
1151
- h[3] = h3;
1152
- h[4] = h4;
1153
- h[5] = h5;
1154
- h[6] = h6;
1155
- h[7] = h7;
1156
- h[8] = h8;
1157
- h[9] = h9;
1122
+ int32_t f0 = f[0];
1123
+ int32_t f1 = f[1];
1124
+ int32_t f2 = f[2];
1125
+ int32_t f3 = f[3];
1126
+ int32_t f4 = f[4];
1127
+ int32_t f5 = f[5];
1128
+ int32_t f6 = f[6];
1129
+ int32_t f7 = f[7];
1130
+ int32_t f8 = f[8];
1131
+ int32_t f9 = f[9];
1132
+ int32_t g0 = g[0];
1133
+ int32_t g1 = g[1];
1134
+ int32_t g2 = g[2];
1135
+ int32_t g3 = g[3];
1136
+ int32_t g4 = g[4];
1137
+ int32_t g5 = g[5];
1138
+ int32_t g6 = g[6];
1139
+ int32_t g7 = g[7];
1140
+ int32_t g8 = g[8];
1141
+ int32_t g9 = g[9];
1142
+ int32_t h0 = f0 - g0;
1143
+ int32_t h1 = f1 - g1;
1144
+ int32_t h2 = f2 - g2;
1145
+ int32_t h3 = f3 - g3;
1146
+ int32_t h4 = f4 - g4;
1147
+ int32_t h5 = f5 - g5;
1148
+ int32_t h6 = f6 - g6;
1149
+ int32_t h7 = f7 - g7;
1150
+ int32_t h8 = f8 - g8;
1151
+ int32_t h9 = f9 - g9;
1152
+ h[0] = h0;
1153
+ h[1] = h1;
1154
+ h[2] = h2;
1155
+ h[3] = h3;
1156
+ h[4] = h4;
1157
+ h[5] = h5;
1158
+ h[6] = h6;
1159
+ h[7] = h7;
1160
+ h[8] = h8;
1161
+ h[9] = h9;
1158
1162
  }
1159
1163
 
1160
1164
  /*
1161
- r = p + q
1162
- */
1165
+ r = p + q
1166
+ */
1163
1167
 
1164
1168
  void ge_add(ge_p1p1 *r,const ge_p3 *p,const ge_cached *q)
1165
1169
  {
@@ -1180,33 +1184,33 @@ void ge_add(ge_p1p1 *r,const ge_p3 *p,const ge_cached *q)
1180
1184
 
1181
1185
  static void slide(signed char *r,const unsigned char *a)
1182
1186
  {
1183
- int i;
1184
- int b;
1185
- int k;
1186
-
1187
- for (i = 0;i < 256;++i)
1188
- r[i] = 1 & (a[i >> 3] >> (i & 7));
1189
-
1190
- for (i = 0;i < 256;++i)
1191
- if (r[i]) {
1192
- for (b = 1;b <= 6 && i + b < 256;++b) {
1193
- if (r[i + b]) {
1194
- if (r[i] + (r[i + b] << b) <= 15) {
1195
- r[i] += r[i + b] << b; r[i + b] = 0;
1196
- } else if (r[i] - (r[i + b] << b) >= -15) {
1197
- r[i] -= r[i + b] << b;
1198
- for (k = i + b;k < 256;++k) {
1199
- if (!r[k]) {
1200
- r[k] = 1;
1201
- break;
1202
- }
1203
- r[k] = 0;
1187
+ int i;
1188
+ int b;
1189
+ int k;
1190
+
1191
+ for (i = 0;i < 256;++i)
1192
+ r[i] = 1 & (a[i >> 3] >> (i & 7));
1193
+
1194
+ for (i = 0;i < 256;++i)
1195
+ if (r[i]) {
1196
+ for (b = 1;b <= 6 && i + b < 256;++b) {
1197
+ if (r[i + b]) {
1198
+ if (r[i] + (r[i + b] << b) <= 15) {
1199
+ r[i] += r[i + b] << b; r[i + b] = 0;
1200
+ } else if (r[i] - (r[i + b] << b) >= -15) {
1201
+ r[i] -= r[i + b] << b;
1202
+ for (k = i + b;k < 256;++k) {
1203
+ if (!r[k]) {
1204
+ r[k] = 1;
1205
+ break;
1206
+ }
1207
+ r[k] = 0;
1208
+ }
1209
+ } else
1210
+ break;
1211
+ }
1204
1212
  }
1205
- } else
1206
- break;
1207
1213
  }
1208
- }
1209
- }
1210
1214
 
1211
1215
  }
1212
1216
 
@@ -1226,48 +1230,51 @@ static const fe sqrtm1 = {
1226
1230
 
1227
1231
  int ge_frombytes_negate_vartime(ge_p3 *h,const unsigned char *s)
1228
1232
  {
1229
- fe u;
1230
- fe v;
1231
- fe v3;
1232
- fe vxx;
1233
- fe check;
1234
-
1235
- fe_frombytes(h->Y,s);
1236
- fe_1(h->Z);
1237
- fe_sq(u,h->Y);
1238
- fe_mul(v,u,d);
1239
- fe_sub(u,u,h->Z); /* u = y^2-1 */
1240
- fe_add(v,v,h->Z); /* v = dy^2+1 */
1241
-
1242
- fe_sq(v3,v);
1243
- fe_mul(v3,v3,v); /* v3 = v^3 */
1244
- fe_sq(h->X,v3);
1245
- fe_mul(h->X,h->X,v);
1246
- fe_mul(h->X,h->X,u); /* x = uv^7 */
1247
-
1248
- fe_pow22523(h->X,h->X); /* x = (uv^7)^((q-5)/8) */
1249
- fe_mul(h->X,h->X,v3);
1250
- fe_mul(h->X,h->X,u); /* x = uv^3(uv^7)^((q-5)/8) */
1251
-
1252
- fe_sq(vxx,h->X);
1253
- fe_mul(vxx,vxx,v);
1254
- fe_sub(check,vxx,u); /* vx^2-u */
1255
- if (fe_isnonzero(check)) {
1256
- fe_add(check,vxx,u); /* vx^2+u */
1257
- if (fe_isnonzero(check)) return -1;
1258
- fe_mul(h->X,h->X,sqrtm1);
1259
- }
1260
-
1261
- if (fe_isnegative(h->X) == (s[31] >> 7))
1262
- fe_neg(h->X,h->X);
1263
-
1264
- fe_mul(h->T,h->X,h->Y);
1265
- return 0;
1233
+ fe u;
1234
+ fe v;
1235
+ fe v3;
1236
+ fe vxx;
1237
+ fe check;
1238
+
1239
+ fe_frombytes(h->Y,s);
1240
+ fe_1(h->Z);
1241
+ fe_sq(u,h->Y);
1242
+ fe_mul(v,u,d);
1243
+ fe_sub(u,u,h->Z); /* u = y^2-1 */
1244
+ fe_add(v,v,h->Z); /* v = dy^2+1 */
1245
+
1246
+ fe_sq(v3,v);
1247
+ fe_mul(v3,v3,v); /* v3 = v^3 */
1248
+ fe_sq(h->X,v3);
1249
+ fe_mul(h->X,h->X,v);
1250
+ fe_mul(h->X,h->X,u); /* x = uv^7 */
1251
+
1252
+ fe_pow22523(h->X,h->X); /* x = (uv^7)^((q-5)/8) */
1253
+ fe_mul(h->X,h->X,v3);
1254
+ fe_mul(h->X,h->X,u); /* x = uv^3(uv^7)^((q-5)/8) */
1255
+
1256
+ fe_sq(vxx,h->X);
1257
+ fe_mul(vxx,vxx,v);
1258
+ fe_sub(check,vxx,u); /* vx^2-u */
1259
+ if (fe_isnonzero(check)) {
1260
+ fe_add(check,vxx,u); /* vx^2+u */
1261
+ if (fe_isnonzero(check)) {
1262
+ return -1;
1263
+ }
1264
+ fe_mul(h->X,h->X,sqrtm1);
1265
+ }
1266
+
1267
+ if (fe_isnegative(h->X) == (s[31] >> 7)) {
1268
+ fe_neg(h->X,h->X);
1269
+ }
1270
+ fe_mul(h->T,h->X,h->Y);
1271
+
1272
+ return 0;
1266
1273
  }
1267
1274
 
1268
1275
  /*
1269
- r = p + q
1270
- */
1276
+ r = p + q
1277
+ */
1271
1278
 
1272
1279
  void ge_madd(ge_p1p1 *r,const ge_p3 *p,const ge_precomp *q)
1273
1280
  {
@@ -1286,8 +1293,8 @@ void ge_madd(ge_p1p1 *r,const ge_p3 *p,const ge_precomp *q)
1286
1293
  }
1287
1294
 
1288
1295
  /*
1289
- r = p - q
1290
- */
1296
+ r = p - q
1297
+ */
1291
1298
 
1292
1299
  void ge_msub(ge_p1p1 *r,const ge_p3 *p,const ge_precomp *q)
1293
1300
  {
@@ -1306,38 +1313,38 @@ void ge_msub(ge_p1p1 *r,const ge_p3 *p,const ge_precomp *q)
1306
1313
  }
1307
1314
 
1308
1315
  /*
1309
- r = p
1310
- */
1316
+ r = p
1317
+ */
1311
1318
 
1312
1319
  extern void ge_p1p1_to_p2(ge_p2 *r,const ge_p1p1 *p)
1313
1320
  {
1314
- fe_mul(r->X,p->X,p->T);
1315
- fe_mul(r->Y,p->Y,p->Z);
1316
- fe_mul(r->Z,p->Z,p->T);
1321
+ fe_mul(r->X,p->X,p->T);
1322
+ fe_mul(r->Y,p->Y,p->Z);
1323
+ fe_mul(r->Z,p->Z,p->T);
1317
1324
  }
1318
1325
 
1319
1326
  /*
1320
- r = p
1321
- */
1327
+ r = p
1328
+ */
1322
1329
 
1323
1330
  extern void ge_p1p1_to_p3(ge_p3 *r,const ge_p1p1 *p)
1324
1331
  {
1325
- fe_mul(r->X,p->X,p->T);
1326
- fe_mul(r->Y,p->Y,p->Z);
1327
- fe_mul(r->Z,p->Z,p->T);
1328
- fe_mul(r->T,p->X,p->Y);
1332
+ fe_mul(r->X,p->X,p->T);
1333
+ fe_mul(r->Y,p->Y,p->Z);
1334
+ fe_mul(r->Z,p->Z,p->T);
1335
+ fe_mul(r->T,p->X,p->Y);
1329
1336
  }
1330
1337
 
1331
1338
  void ge_p2_0(ge_p2 *h)
1332
1339
  {
1333
- fe_0(h->X);
1334
- fe_1(h->Y);
1335
- fe_1(h->Z);
1340
+ fe_0(h->X);
1341
+ fe_1(h->Y);
1342
+ fe_1(h->Z);
1336
1343
  }
1337
1344
 
1338
1345
  /*
1339
- r = 2 * p
1340
- */
1346
+ r = 2 * p
1347
+ */
1341
1348
 
1342
1349
  void ge_p2_dbl(ge_p1p1 *r,const ge_p2 *p)
1343
1350
  {
@@ -1356,15 +1363,15 @@ void ge_p2_dbl(ge_p1p1 *r,const ge_p2 *p)
1356
1363
 
1357
1364
  void ge_p3_0(ge_p3 *h)
1358
1365
  {
1359
- fe_0(h->X);
1360
- fe_1(h->Y);
1361
- fe_1(h->Z);
1362
- fe_0(h->T);
1366
+ fe_0(h->X);
1367
+ fe_1(h->Y);
1368
+ fe_1(h->Z);
1369
+ fe_0(h->T);
1363
1370
  }
1364
1371
 
1365
1372
  /*
1366
- r = p
1367
- */
1373
+ r = p
1374
+ */
1368
1375
 
1369
1376
  /* 2 * d = 16295367250680780974490674513165176452449235426866156013048779062215315747161 */
1370
1377
  static const fe d2 = {
@@ -1373,77 +1380,79 @@ static const fe d2 = {
1373
1380
 
1374
1381
  extern void ge_p3_to_cached(ge_cached *r,const ge_p3 *p)
1375
1382
  {
1376
- fe_add(r->YplusX,p->Y,p->X);
1377
- fe_sub(r->YminusX,p->Y,p->X);
1378
- fe_copy(r->Z,p->Z);
1379
- fe_mul(r->T2d,p->T,d2);
1383
+ fe_add(r->YplusX,p->Y,p->X);
1384
+ fe_sub(r->YminusX,p->Y,p->X);
1385
+ fe_copy(r->Z,p->Z);
1386
+ fe_mul(r->T2d,p->T,d2);
1380
1387
  }
1381
1388
 
1382
1389
  /*
1383
- r = p
1384
- */
1390
+ r = p
1391
+ */
1385
1392
 
1386
1393
  extern void ge_p3_to_p2(ge_p2 *r,const ge_p3 *p)
1387
1394
  {
1388
- fe_copy(r->X,p->X);
1389
- fe_copy(r->Y,p->Y);
1390
- fe_copy(r->Z,p->Z);
1395
+ fe_copy(r->X,p->X);
1396
+ fe_copy(r->Y,p->Y);
1397
+ fe_copy(r->Z,p->Z);
1391
1398
  }
1392
1399
 
1393
1400
  void ge_p3_tobytes(unsigned char *s,const ge_p3 *h)
1394
1401
  {
1395
- fe recip;
1396
- fe x;
1397
- fe y;
1398
-
1399
- fe_invert(recip,h->Z);
1400
- fe_mul(x,h->X,recip);
1401
- fe_mul(y,h->Y,recip);
1402
- fe_tobytes(s,y);
1403
- s[31] ^= fe_isnegative(x) << 7;
1402
+ fe recip;
1403
+ fe x;
1404
+ fe y;
1405
+
1406
+ fe_invert(recip,h->Z);
1407
+ fe_mul(x,h->X,recip);
1408
+ fe_mul(y,h->Y,recip);
1409
+ fe_tobytes(s,y);
1410
+ s[31] ^= fe_isnegative(x) << 7;
1404
1411
  }
1405
1412
 
1406
1413
  /*
1407
- r = 2 * p
1408
- */
1414
+ r = 2 * p
1415
+ */
1409
1416
 
1410
1417
  void ge_p3_dbl(ge_p1p1 *r,const ge_p3 *p)
1411
1418
  {
1412
- ge_p2 q;
1413
- ge_p3_to_p2(&q,p);
1414
- ge_p2_dbl(r,&q);
1419
+ ge_p2 q;
1420
+ ge_p3_to_p2(&q,p);
1421
+ ge_p2_dbl(r,&q);
1415
1422
  }
1416
1423
 
1417
1424
  void ge_precomp_0(ge_precomp *h)
1418
1425
  {
1419
- fe_1(h->yplusx);
1420
- fe_1(h->yminusx);
1421
- fe_0(h->xy2d);
1426
+ fe_1(h->yplusx);
1427
+ fe_1(h->yminusx);
1428
+ fe_0(h->xy2d);
1422
1429
  }
1423
1430
 
1424
1431
  static unsigned char equal(signed char b,signed char c)
1425
1432
  {
1426
- unsigned char ub = b;
1427
- unsigned char uc = c;
1428
- unsigned char x = ub ^ uc; /* 0: yes; 1..255: no */
1429
- uint32_t y = x; /* 0: yes; 1..255: no */
1430
- y -= 1; /* 4294967295: yes; 0..254: no */
1431
- y >>= 31; /* 1: yes; 0: no */
1432
- return y;
1433
+ unsigned char ub = b;
1434
+ unsigned char uc = c;
1435
+ unsigned char x = ub ^ uc; /* 0: yes; 1..255: no */
1436
+ uint32_t y = x; /* 0: yes; 1..255: no */
1437
+ y -= 1; /* 4294967295: yes; 0..254: no */
1438
+ y >>= 31; /* 1: yes; 0: no */
1439
+
1440
+ return y;
1433
1441
  }
1434
1442
 
1435
1443
  static unsigned char negative(signed char b)
1436
1444
  {
1437
- uint64_t x = b; /* 18446744073709551361..18446744073709551615: yes; 0..255: no */
1438
- x >>= 63; /* 1: yes; 0: no */
1439
- return x;
1445
+ uint64_t x = b; /* 18446744073709551361..18446744073709551615: yes; 0..255: no */
1446
+ x >>= 63; /* 1: yes; 0: no */
1447
+
1448
+ return x;
1440
1449
  }
1441
1450
 
1442
1451
  static void cmov(ge_precomp *t,const ge_precomp *u,unsigned char b)
1443
1452
  {
1444
- fe_cmov(t->yplusx,u->yplusx,b);
1445
- fe_cmov(t->yminusx,u->yminusx,b);
1446
- fe_cmov(t->xy2d,u->xy2d,b);
1453
+ fe_cmov(t->yplusx,u->yplusx,b);
1454
+ fe_cmov(t->yminusx,u->yminusx,b);
1455
+ fe_cmov(t->xy2d,u->xy2d,b);
1447
1456
  }
1448
1457
 
1449
1458
  /* base[i][j] = (j+1)*256^i*B */
@@ -1453,28 +1462,28 @@ static const ge_precomp base[32][8] = {
1453
1462
 
1454
1463
  static void ge_select(ge_precomp *t,int pos,signed char b)
1455
1464
  {
1456
- ge_precomp minust;
1457
- unsigned char bnegative = negative(b);
1458
- unsigned char babs = b - (((-bnegative) & b) * ((signed char) 1 << 1));
1459
-
1460
- ge_precomp_0(t);
1461
- cmov(t,&base[pos][0],equal(babs,1));
1462
- cmov(t,&base[pos][1],equal(babs,2));
1463
- cmov(t,&base[pos][2],equal(babs,3));
1464
- cmov(t,&base[pos][3],equal(babs,4));
1465
- cmov(t,&base[pos][4],equal(babs,5));
1466
- cmov(t,&base[pos][5],equal(babs,6));
1467
- cmov(t,&base[pos][6],equal(babs,7));
1468
- cmov(t,&base[pos][7],equal(babs,8));
1469
- fe_copy(minust.yplusx,t->yminusx);
1470
- fe_copy(minust.yminusx,t->yplusx);
1471
- fe_neg(minust.xy2d,t->xy2d);
1472
- cmov(t,&minust,bnegative);
1465
+ ge_precomp minust;
1466
+ unsigned char bnegative = negative(b);
1467
+ unsigned char babs = b - (((-bnegative) & b) * ((signed char) 1 << 1));
1468
+
1469
+ ge_precomp_0(t);
1470
+ cmov(t,&base[pos][0],equal(babs,1));
1471
+ cmov(t,&base[pos][1],equal(babs,2));
1472
+ cmov(t,&base[pos][2],equal(babs,3));
1473
+ cmov(t,&base[pos][3],equal(babs,4));
1474
+ cmov(t,&base[pos][4],equal(babs,5));
1475
+ cmov(t,&base[pos][5],equal(babs,6));
1476
+ cmov(t,&base[pos][6],equal(babs,7));
1477
+ cmov(t,&base[pos][7],equal(babs,8));
1478
+ fe_copy(minust.yplusx,t->yminusx);
1479
+ fe_copy(minust.yminusx,t->yplusx);
1480
+ fe_neg(minust.xy2d,t->xy2d);
1481
+ cmov(t,&minust,bnegative);
1473
1482
  }
1474
1483
 
1475
1484
  /*
1476
- r = p - q
1477
- */
1485
+ r = p - q
1486
+ */
1478
1487
 
1479
1488
  void ge_sub(ge_p1p1 *r,const ge_p3 *p,const ge_cached *q)
1480
1489
  {
@@ -1495,739 +1504,739 @@ void ge_sub(ge_p1p1 *r,const ge_p3 *p,const ge_cached *q)
1495
1504
 
1496
1505
  void ge_tobytes(unsigned char *s,const ge_p2 *h)
1497
1506
  {
1498
- fe recip;
1499
- fe x;
1500
- fe y;
1501
-
1502
- fe_invert(recip,h->Z);
1503
- fe_mul(x,h->X,recip);
1504
- fe_mul(y,h->Y,recip);
1505
- fe_tobytes(s,y);
1506
- s[31] ^= fe_isnegative(x) << 7;
1507
+ fe recip;
1508
+ fe x;
1509
+ fe y;
1510
+
1511
+ fe_invert(recip,h->Z);
1512
+ fe_mul(x,h->X,recip);
1513
+ fe_mul(y,h->Y,recip);
1514
+ fe_tobytes(s,y);
1515
+ s[31] ^= fe_isnegative(x) << 7;
1507
1516
  }
1508
1517
 
1509
1518
  /*
1510
- h = a * B
1511
- where a = a[0]+256*a[1]+...+256^31 a[31]
1512
- B is the Ed25519 base point (x,4/5) with x positive.
1513
-
1514
- Preconditions:
1515
- a[31] <= 127
1516
- */
1519
+ h = a * B
1520
+ where a = a[0]+256*a[1]+...+256^31 a[31]
1521
+ B is the Ed25519 base point (x,4/5) with x positive.
1522
+ *
1523
+ Preconditions:
1524
+ a[31] <= 127
1525
+ */
1517
1526
 
1518
1527
  /*
1519
- r = a * A + b * B
1520
- where a = a[0]+256*a[1]+...+256^31 a[31].
1521
- and b = b[0]+256*b[1]+...+256^31 b[31].
1522
- B is the Ed25519 base point (x,4/5) with x positive.
1523
- */
1528
+ r = a * A + b * B
1529
+ where a = a[0]+256*a[1]+...+256^31 a[31].
1530
+ and b = b[0]+256*b[1]+...+256^31 b[31].
1531
+ B is the Ed25519 base point (x,4/5) with x positive.
1532
+ */
1524
1533
 
1525
1534
  void ge_double_scalarmult_vartime(ge_p2 *r,const unsigned char *a,const ge_p3 *A,const unsigned char *b)
1526
1535
  {
1527
- signed char aslide[256];
1528
- signed char bslide[256];
1529
- ge_cached Ai[8]; /* A,3A,5A,7A,9A,11A,13A,15A */
1530
- ge_p1p1 t;
1531
- ge_p3 u;
1532
- ge_p3 A2;
1533
- int i;
1534
-
1535
- slide(aslide,a);
1536
- slide(bslide,b);
1537
-
1538
- ge_p3_to_cached(&Ai[0],A);
1539
- ge_p3_dbl(&t,A); ge_p1p1_to_p3(&A2,&t);
1540
- ge_add(&t,&A2,&Ai[0]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[1],&u);
1541
- ge_add(&t,&A2,&Ai[1]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[2],&u);
1542
- ge_add(&t,&A2,&Ai[2]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[3],&u);
1543
- ge_add(&t,&A2,&Ai[3]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[4],&u);
1544
- ge_add(&t,&A2,&Ai[4]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[5],&u);
1545
- ge_add(&t,&A2,&Ai[5]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[6],&u);
1546
- ge_add(&t,&A2,&Ai[6]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[7],&u);
1547
-
1548
- ge_p2_0(r);
1549
-
1550
- for (i = 255;i >= 0;--i) {
1551
- if (aslide[i] || bslide[i]) break;
1552
- }
1553
-
1554
- for (;i >= 0;--i) {
1555
- ge_p2_dbl(&t,r);
1556
-
1557
- if (aslide[i] > 0) {
1558
- ge_p1p1_to_p3(&u,&t);
1559
- ge_add(&t,&u,&Ai[aslide[i]/2]);
1560
- } else if (aslide[i] < 0) {
1561
- ge_p1p1_to_p3(&u,&t);
1562
- ge_sub(&t,&u,&Ai[(-aslide[i])/2]);
1563
- }
1536
+ signed char aslide[256];
1537
+ signed char bslide[256];
1538
+ ge_cached Ai[8]; /* A,3A,5A,7A,9A,11A,13A,15A */
1539
+ ge_p1p1 t;
1540
+ ge_p3 u;
1541
+ ge_p3 A2;
1542
+ int i;
1543
+
1544
+ slide(aslide,a);
1545
+ slide(bslide,b);
1546
+
1547
+ ge_p3_to_cached(&Ai[0],A);
1548
+ ge_p3_dbl(&t,A); ge_p1p1_to_p3(&A2,&t);
1549
+ ge_add(&t,&A2,&Ai[0]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[1],&u);
1550
+ ge_add(&t,&A2,&Ai[1]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[2],&u);
1551
+ ge_add(&t,&A2,&Ai[2]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[3],&u);
1552
+ ge_add(&t,&A2,&Ai[3]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[4],&u);
1553
+ ge_add(&t,&A2,&Ai[4]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[5],&u);
1554
+ ge_add(&t,&A2,&Ai[5]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[6],&u);
1555
+ ge_add(&t,&A2,&Ai[6]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[7],&u);
1564
1556
 
1565
- if (bslide[i] > 0) {
1566
- ge_p1p1_to_p3(&u,&t);
1567
- ge_madd(&t,&u,&Bi[bslide[i]/2]);
1568
- } else if (bslide[i] < 0) {
1569
- ge_p1p1_to_p3(&u,&t);
1570
- ge_msub(&t,&u,&Bi[(-bslide[i])/2]);
1557
+ ge_p2_0(r);
1558
+
1559
+ for (i = 255;i >= 0;--i) {
1560
+ if (aslide[i] || bslide[i]) break;
1571
1561
  }
1572
1562
 
1573
- ge_p1p1_to_p2(r,&t);
1574
- }
1563
+ for (;i >= 0;--i) {
1564
+ ge_p2_dbl(&t,r);
1565
+
1566
+ if (aslide[i] > 0) {
1567
+ ge_p1p1_to_p3(&u,&t);
1568
+ ge_add(&t,&u,&Ai[aslide[i]/2]);
1569
+ } else if (aslide[i] < 0) {
1570
+ ge_p1p1_to_p3(&u,&t);
1571
+ ge_sub(&t,&u,&Ai[(-aslide[i])/2]);
1572
+ }
1573
+
1574
+ if (bslide[i] > 0) {
1575
+ ge_p1p1_to_p3(&u,&t);
1576
+ ge_madd(&t,&u,&Bi[bslide[i]/2]);
1577
+ } else if (bslide[i] < 0) {
1578
+ ge_p1p1_to_p3(&u,&t);
1579
+ ge_msub(&t,&u,&Bi[(-bslide[i])/2]);
1580
+ }
1581
+
1582
+ ge_p1p1_to_p2(r,&t);
1583
+ }
1575
1584
  }
1576
1585
 
1577
1586
  void ge_scalarmult_vartime(ge_p3 *r,const unsigned char *a,const ge_p3 *A)
1578
1587
  {
1579
- signed char aslide[256];
1580
- ge_cached Ai[8];
1581
- ge_p1p1 t;
1582
- ge_p3 u;
1583
- ge_p3 A2;
1584
- int i;
1585
-
1586
- slide(aslide,a);
1587
-
1588
- ge_p3_to_cached(&Ai[0],A);
1589
- ge_p3_dbl(&t,A); ge_p1p1_to_p3(&A2,&t);
1590
- ge_add(&t,&A2,&Ai[0]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[1],&u);
1591
- ge_add(&t,&A2,&Ai[1]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[2],&u);
1592
- ge_add(&t,&A2,&Ai[2]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[3],&u);
1593
- ge_add(&t,&A2,&Ai[3]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[4],&u);
1594
- ge_add(&t,&A2,&Ai[4]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[5],&u);
1595
- ge_add(&t,&A2,&Ai[5]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[6],&u);
1596
- ge_add(&t,&A2,&Ai[6]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[7],&u);
1597
-
1598
- ge_p3_0(r);
1599
-
1600
- for (i = 255;i >= 0;--i) {
1601
- if (aslide[i]) break;
1602
- }
1603
-
1604
- for (;i >= 0;--i) {
1605
- ge_p3_dbl(&t,r);
1606
-
1607
- if (aslide[i] > 0) {
1608
- ge_p1p1_to_p3(&u,&t);
1609
- ge_add(&t,&u,&Ai[aslide[i]/2]);
1610
- } else if (aslide[i] < 0) {
1611
- ge_p1p1_to_p3(&u,&t);
1612
- ge_sub(&t,&u,&Ai[(-aslide[i])/2]);
1588
+ signed char aslide[256];
1589
+ ge_cached Ai[8];
1590
+ ge_p1p1 t;
1591
+ ge_p3 u;
1592
+ ge_p3 A2;
1593
+ int i;
1594
+
1595
+ slide(aslide,a);
1596
+
1597
+ ge_p3_to_cached(&Ai[0],A);
1598
+ ge_p3_dbl(&t,A); ge_p1p1_to_p3(&A2,&t);
1599
+ ge_add(&t,&A2,&Ai[0]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[1],&u);
1600
+ ge_add(&t,&A2,&Ai[1]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[2],&u);
1601
+ ge_add(&t,&A2,&Ai[2]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[3],&u);
1602
+ ge_add(&t,&A2,&Ai[3]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[4],&u);
1603
+ ge_add(&t,&A2,&Ai[4]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[5],&u);
1604
+ ge_add(&t,&A2,&Ai[5]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[6],&u);
1605
+ ge_add(&t,&A2,&Ai[6]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[7],&u);
1606
+
1607
+ ge_p3_0(r);
1608
+
1609
+ for (i = 255;i >= 0;--i) {
1610
+ if (aslide[i]) break;
1613
1611
  }
1614
1612
 
1615
- ge_p1p1_to_p3(r,&t);
1616
- }
1613
+ for (;i >= 0;--i) {
1614
+ ge_p3_dbl(&t,r);
1615
+
1616
+ if (aslide[i] > 0) {
1617
+ ge_p1p1_to_p3(&u,&t);
1618
+ ge_add(&t,&u,&Ai[aslide[i]/2]);
1619
+ } else if (aslide[i] < 0) {
1620
+ ge_p1p1_to_p3(&u,&t);
1621
+ ge_sub(&t,&u,&Ai[(-aslide[i])/2]);
1622
+ }
1623
+
1624
+ ge_p1p1_to_p3(r,&t);
1625
+ }
1617
1626
  }
1618
1627
 
1619
1628
  void ge_scalarmult_base(ge_p3 *h,const unsigned char *a)
1620
1629
  {
1621
- signed char e[64];
1622
- signed char carry;
1623
- ge_p1p1 r;
1624
- ge_p2 s;
1625
- ge_precomp t;
1626
- int i;
1627
-
1628
- for (i = 0;i < 32;++i) {
1629
- e[2 * i + 0] = (a[i] >> 0) & 15;
1630
- e[2 * i + 1] = (a[i] >> 4) & 15;
1631
- }
1632
- /* each e[i] is between 0 and 15 */
1633
- /* e[63] is between 0 and 7 */
1634
-
1635
- carry = 0;
1636
- for (i = 0;i < 63;++i) {
1637
- e[i] += carry;
1638
- carry = e[i] + 8;
1639
- carry >>= 4;
1640
- e[i] -= carry * ((signed char) 1 << 4);
1641
- }
1642
- e[63] += carry;
1643
- /* each e[i] is between -8 and 8 */
1644
-
1645
- ge_p3_0(h);
1646
- for (i = 1;i < 64;i += 2) {
1647
- ge_select(&t,i / 2,e[i]);
1648
- ge_madd(&r,h,&t); ge_p1p1_to_p3(h,&r);
1649
- }
1650
-
1651
- ge_p3_dbl(&r,h); ge_p1p1_to_p2(&s,&r);
1652
- ge_p2_dbl(&r,&s); ge_p1p1_to_p2(&s,&r);
1653
- ge_p2_dbl(&r,&s); ge_p1p1_to_p2(&s,&r);
1654
- ge_p2_dbl(&r,&s); ge_p1p1_to_p3(h,&r);
1655
-
1656
- for (i = 0;i < 64;i += 2) {
1657
- ge_select(&t,i / 2,e[i]);
1658
- ge_madd(&r,h,&t); ge_p1p1_to_p3(h,&r);
1659
- }
1630
+ signed char e[64];
1631
+ signed char carry;
1632
+ ge_p1p1 r;
1633
+ ge_p2 s;
1634
+ ge_precomp t;
1635
+ int i;
1636
+
1637
+ for (i = 0;i < 32;++i) {
1638
+ e[2 * i + 0] = (a[i] >> 0) & 15;
1639
+ e[2 * i + 1] = (a[i] >> 4) & 15;
1640
+ }
1641
+ /* each e[i] is between 0 and 15 */
1642
+ /* e[63] is between 0 and 7 */
1643
+
1644
+ carry = 0;
1645
+ for (i = 0;i < 63;++i) {
1646
+ e[i] += carry;
1647
+ carry = e[i] + 8;
1648
+ carry >>= 4;
1649
+ e[i] -= carry * ((signed char) 1 << 4);
1650
+ }
1651
+ e[63] += carry;
1652
+ /* each e[i] is between -8 and 8 */
1653
+
1654
+ ge_p3_0(h);
1655
+ for (i = 1;i < 64;i += 2) {
1656
+ ge_select(&t,i / 2,e[i]);
1657
+ ge_madd(&r,h,&t); ge_p1p1_to_p3(h,&r);
1658
+ }
1659
+
1660
+ ge_p3_dbl(&r,h); ge_p1p1_to_p2(&s,&r);
1661
+ ge_p2_dbl(&r,&s); ge_p1p1_to_p2(&s,&r);
1662
+ ge_p2_dbl(&r,&s); ge_p1p1_to_p2(&s,&r);
1663
+ ge_p2_dbl(&r,&s); ge_p1p1_to_p3(h,&r);
1664
+
1665
+ for (i = 0;i < 64;i += 2) {
1666
+ ge_select(&t,i / 2,e[i]);
1667
+ ge_madd(&r,h,&t); ge_p1p1_to_p3(h,&r);
1668
+ }
1660
1669
  }
1661
1670
 
1662
1671
  /*
1663
- Input:
1664
- a[0]+256*a[1]+...+256^31*a[31] = a
1665
- b[0]+256*b[1]+...+256^31*b[31] = b
1666
- c[0]+256*c[1]+...+256^31*c[31] = c
1667
-
1668
- Output:
1669
- s[0]+256*s[1]+...+256^31*s[31] = (ab+c) mod l
1670
- where l = 2^252 + 27742317777372353535851937790883648493.
1671
- */
1672
+ Input:
1673
+ a[0]+256*a[1]+...+256^31*a[31] = a
1674
+ b[0]+256*b[1]+...+256^31*b[31] = b
1675
+ c[0]+256*c[1]+...+256^31*c[31] = c
1676
+ *
1677
+ Output:
1678
+ s[0]+256*s[1]+...+256^31*s[31] = (ab+c) mod l
1679
+ where l = 2^252 + 27742317777372353535851937790883648493.
1680
+ */
1672
1681
 
1673
1682
  void sc_muladd(unsigned char *s,const unsigned char *a,const unsigned char *b,const unsigned char *c)
1674
1683
  {
1675
- int64_t a0 = 2097151 & load_3(a);
1676
- int64_t a1 = 2097151 & (load_4(a + 2) >> 5);
1677
- int64_t a2 = 2097151 & (load_3(a + 5) >> 2);
1678
- int64_t a3 = 2097151 & (load_4(a + 7) >> 7);
1679
- int64_t a4 = 2097151 & (load_4(a + 10) >> 4);
1680
- int64_t a5 = 2097151 & (load_3(a + 13) >> 1);
1681
- int64_t a6 = 2097151 & (load_4(a + 15) >> 6);
1682
- int64_t a7 = 2097151 & (load_3(a + 18) >> 3);
1683
- int64_t a8 = 2097151 & load_3(a + 21);
1684
- int64_t a9 = 2097151 & (load_4(a + 23) >> 5);
1685
- int64_t a10 = 2097151 & (load_3(a + 26) >> 2);
1686
- int64_t a11 = (load_4(a + 28) >> 7);
1687
- int64_t b0 = 2097151 & load_3(b);
1688
- int64_t b1 = 2097151 & (load_4(b + 2) >> 5);
1689
- int64_t b2 = 2097151 & (load_3(b + 5) >> 2);
1690
- int64_t b3 = 2097151 & (load_4(b + 7) >> 7);
1691
- int64_t b4 = 2097151 & (load_4(b + 10) >> 4);
1692
- int64_t b5 = 2097151 & (load_3(b + 13) >> 1);
1693
- int64_t b6 = 2097151 & (load_4(b + 15) >> 6);
1694
- int64_t b7 = 2097151 & (load_3(b + 18) >> 3);
1695
- int64_t b8 = 2097151 & load_3(b + 21);
1696
- int64_t b9 = 2097151 & (load_4(b + 23) >> 5);
1697
- int64_t b10 = 2097151 & (load_3(b + 26) >> 2);
1698
- int64_t b11 = (load_4(b + 28) >> 7);
1699
- int64_t c0 = 2097151 & load_3(c);
1700
- int64_t c1 = 2097151 & (load_4(c + 2) >> 5);
1701
- int64_t c2 = 2097151 & (load_3(c + 5) >> 2);
1702
- int64_t c3 = 2097151 & (load_4(c + 7) >> 7);
1703
- int64_t c4 = 2097151 & (load_4(c + 10) >> 4);
1704
- int64_t c5 = 2097151 & (load_3(c + 13) >> 1);
1705
- int64_t c6 = 2097151 & (load_4(c + 15) >> 6);
1706
- int64_t c7 = 2097151 & (load_3(c + 18) >> 3);
1707
- int64_t c8 = 2097151 & load_3(c + 21);
1708
- int64_t c9 = 2097151 & (load_4(c + 23) >> 5);
1709
- int64_t c10 = 2097151 & (load_3(c + 26) >> 2);
1710
- int64_t c11 = (load_4(c + 28) >> 7);
1711
- int64_t s0;
1712
- int64_t s1;
1713
- int64_t s2;
1714
- int64_t s3;
1715
- int64_t s4;
1716
- int64_t s5;
1717
- int64_t s6;
1718
- int64_t s7;
1719
- int64_t s8;
1720
- int64_t s9;
1721
- int64_t s10;
1722
- int64_t s11;
1723
- int64_t s12;
1724
- int64_t s13;
1725
- int64_t s14;
1726
- int64_t s15;
1727
- int64_t s16;
1728
- int64_t s17;
1729
- int64_t s18;
1730
- int64_t s19;
1731
- int64_t s20;
1732
- int64_t s21;
1733
- int64_t s22;
1734
- int64_t s23;
1735
- int64_t carry0;
1736
- int64_t carry1;
1737
- int64_t carry2;
1738
- int64_t carry3;
1739
- int64_t carry4;
1740
- int64_t carry5;
1741
- int64_t carry6;
1742
- int64_t carry7;
1743
- int64_t carry8;
1744
- int64_t carry9;
1745
- int64_t carry10;
1746
- int64_t carry11;
1747
- int64_t carry12;
1748
- int64_t carry13;
1749
- int64_t carry14;
1750
- int64_t carry15;
1751
- int64_t carry16;
1752
- int64_t carry17;
1753
- int64_t carry18;
1754
- int64_t carry19;
1755
- int64_t carry20;
1756
- int64_t carry21;
1757
- int64_t carry22;
1758
-
1759
- s0 = c0 + a0*b0;
1760
- s1 = c1 + a0*b1 + a1*b0;
1761
- s2 = c2 + a0*b2 + a1*b1 + a2*b0;
1762
- s3 = c3 + a0*b3 + a1*b2 + a2*b1 + a3*b0;
1763
- s4 = c4 + a0*b4 + a1*b3 + a2*b2 + a3*b1 + a4*b0;
1764
- s5 = c5 + a0*b5 + a1*b4 + a2*b3 + a3*b2 + a4*b1 + a5*b0;
1765
- s6 = c6 + a0*b6 + a1*b5 + a2*b4 + a3*b3 + a4*b2 + a5*b1 + a6*b0;
1766
- s7 = c7 + a0*b7 + a1*b6 + a2*b5 + a3*b4 + a4*b3 + a5*b2 + a6*b1 + a7*b0;
1767
- s8 = c8 + a0*b8 + a1*b7 + a2*b6 + a3*b5 + a4*b4 + a5*b3 + a6*b2 + a7*b1 + a8*b0;
1768
- s9 = c9 + a0*b9 + a1*b8 + a2*b7 + a3*b6 + a4*b5 + a5*b4 + a6*b3 + a7*b2 + a8*b1 + a9*b0;
1769
- s10 = c10 + a0*b10 + a1*b9 + a2*b8 + a3*b7 + a4*b6 + a5*b5 + a6*b4 + a7*b3 + a8*b2 + a9*b1 + a10*b0;
1770
- s11 = c11 + a0*b11 + a1*b10 + a2*b9 + a3*b8 + a4*b7 + a5*b6 + a6*b5 + a7*b4 + a8*b3 + a9*b2 + a10*b1 + a11*b0;
1771
- s12 = a1*b11 + a2*b10 + a3*b9 + a4*b8 + a5*b7 + a6*b6 + a7*b5 + a8*b4 + a9*b3 + a10*b2 + a11*b1;
1772
- s13 = a2*b11 + a3*b10 + a4*b9 + a5*b8 + a6*b7 + a7*b6 + a8*b5 + a9*b4 + a10*b3 + a11*b2;
1773
- s14 = a3*b11 + a4*b10 + a5*b9 + a6*b8 + a7*b7 + a8*b6 + a9*b5 + a10*b4 + a11*b3;
1774
- s15 = a4*b11 + a5*b10 + a6*b9 + a7*b8 + a8*b7 + a9*b6 + a10*b5 + a11*b4;
1775
- s16 = a5*b11 + a6*b10 + a7*b9 + a8*b8 + a9*b7 + a10*b6 + a11*b5;
1776
- s17 = a6*b11 + a7*b10 + a8*b9 + a9*b8 + a10*b7 + a11*b6;
1777
- s18 = a7*b11 + a8*b10 + a9*b9 + a10*b8 + a11*b7;
1778
- s19 = a8*b11 + a9*b10 + a10*b9 + a11*b8;
1779
- s20 = a9*b11 + a10*b10 + a11*b9;
1780
- s21 = a10*b11 + a11*b10;
1781
- s22 = a11*b11;
1782
- s23 = 0;
1783
-
1784
- carry0 = (s0 + (int64_t) (1L << 20)) >> 21; s1 += carry0; s0 -= carry0 * ((uint64_t) 1L << 21);
1785
- carry2 = (s2 + (int64_t) (1L << 20)) >> 21; s3 += carry2; s2 -= carry2 * ((uint64_t) 1L << 21);
1786
- carry4 = (s4 + (int64_t) (1L << 20)) >> 21; s5 += carry4; s4 -= carry4 * ((uint64_t) 1L << 21);
1787
- carry6 = (s6 + (int64_t) (1L << 20)) >> 21; s7 += carry6; s6 -= carry6 * ((uint64_t) 1L << 21);
1788
- carry8 = (s8 + (int64_t) (1L << 20)) >> 21; s9 += carry8; s8 -= carry8 * ((uint64_t) 1L << 21);
1789
- carry10 = (s10 + (int64_t) (1L << 20)) >> 21; s11 += carry10; s10 -= carry10 * ((uint64_t) 1L << 21);
1790
- carry12 = (s12 + (int64_t) (1L << 20)) >> 21; s13 += carry12; s12 -= carry12 * ((uint64_t) 1L << 21);
1791
- carry14 = (s14 + (int64_t) (1L << 20)) >> 21; s15 += carry14; s14 -= carry14 * ((uint64_t) 1L << 21);
1792
- carry16 = (s16 + (int64_t) (1L << 20)) >> 21; s17 += carry16; s16 -= carry16 * ((uint64_t) 1L << 21);
1793
- carry18 = (s18 + (int64_t) (1L << 20)) >> 21; s19 += carry18; s18 -= carry18 * ((uint64_t) 1L << 21);
1794
- carry20 = (s20 + (int64_t) (1L << 20)) >> 21; s21 += carry20; s20 -= carry20 * ((uint64_t) 1L << 21);
1795
- carry22 = (s22 + (int64_t) (1L << 20)) >> 21; s23 += carry22; s22 -= carry22 * ((uint64_t) 1L << 21);
1796
-
1797
- carry1 = (s1 + (int64_t) (1L << 20)) >> 21; s2 += carry1; s1 -= carry1 * ((uint64_t) 1L << 21);
1798
- carry3 = (s3 + (int64_t) (1L << 20)) >> 21; s4 += carry3; s3 -= carry3 * ((uint64_t) 1L << 21);
1799
- carry5 = (s5 + (int64_t) (1L << 20)) >> 21; s6 += carry5; s5 -= carry5 * ((uint64_t) 1L << 21);
1800
- carry7 = (s7 + (int64_t) (1L << 20)) >> 21; s8 += carry7; s7 -= carry7 * ((uint64_t) 1L << 21);
1801
- carry9 = (s9 + (int64_t) (1L << 20)) >> 21; s10 += carry9; s9 -= carry9 * ((uint64_t) 1L << 21);
1802
- carry11 = (s11 + (int64_t) (1L << 20)) >> 21; s12 += carry11; s11 -= carry11 * ((uint64_t) 1L << 21);
1803
- carry13 = (s13 + (int64_t) (1L << 20)) >> 21; s14 += carry13; s13 -= carry13 * ((uint64_t) 1L << 21);
1804
- carry15 = (s15 + (int64_t) (1L << 20)) >> 21; s16 += carry15; s15 -= carry15 * ((uint64_t) 1L << 21);
1805
- carry17 = (s17 + (int64_t) (1L << 20)) >> 21; s18 += carry17; s17 -= carry17 * ((uint64_t) 1L << 21);
1806
- carry19 = (s19 + (int64_t) (1L << 20)) >> 21; s20 += carry19; s19 -= carry19 * ((uint64_t) 1L << 21);
1807
- carry21 = (s21 + (int64_t) (1L << 20)) >> 21; s22 += carry21; s21 -= carry21 * ((uint64_t) 1L << 21);
1808
-
1809
- s11 += s23 * 666643;
1810
- s12 += s23 * 470296;
1811
- s13 += s23 * 654183;
1812
- s14 -= s23 * 997805;
1813
- s15 += s23 * 136657;
1814
- s16 -= s23 * 683901;
1815
-
1816
- s10 += s22 * 666643;
1817
- s11 += s22 * 470296;
1818
- s12 += s22 * 654183;
1819
- s13 -= s22 * 997805;
1820
- s14 += s22 * 136657;
1821
- s15 -= s22 * 683901;
1822
-
1823
- s9 += s21 * 666643;
1824
- s10 += s21 * 470296;
1825
- s11 += s21 * 654183;
1826
- s12 -= s21 * 997805;
1827
- s13 += s21 * 136657;
1828
- s14 -= s21 * 683901;
1829
-
1830
- s8 += s20 * 666643;
1831
- s9 += s20 * 470296;
1832
- s10 += s20 * 654183;
1833
- s11 -= s20 * 997805;
1834
- s12 += s20 * 136657;
1835
- s13 -= s20 * 683901;
1836
-
1837
- s7 += s19 * 666643;
1838
- s8 += s19 * 470296;
1839
- s9 += s19 * 654183;
1840
- s10 -= s19 * 997805;
1841
- s11 += s19 * 136657;
1842
- s12 -= s19 * 683901;
1843
-
1844
- s6 += s18 * 666643;
1845
- s7 += s18 * 470296;
1846
- s8 += s18 * 654183;
1847
- s9 -= s18 * 997805;
1848
- s10 += s18 * 136657;
1849
- s11 -= s18 * 683901;
1850
-
1851
- carry6 = (s6 + (int64_t) (1L << 20)) >> 21; s7 += carry6; s6 -= carry6 * ((uint64_t) 1L << 21);
1852
- carry8 = (s8 + (int64_t) (1L << 20)) >> 21; s9 += carry8; s8 -= carry8 * ((uint64_t) 1L << 21);
1853
- carry10 = (s10 + (int64_t) (1L << 20)) >> 21; s11 += carry10; s10 -= carry10 * ((uint64_t) 1L << 21);
1854
- carry12 = (s12 + (int64_t) (1L << 20)) >> 21; s13 += carry12; s12 -= carry12 * ((uint64_t) 1L << 21);
1855
- carry14 = (s14 + (int64_t) (1L << 20)) >> 21; s15 += carry14; s14 -= carry14 * ((uint64_t) 1L << 21);
1856
- carry16 = (s16 + (int64_t) (1L << 20)) >> 21; s17 += carry16; s16 -= carry16 * ((uint64_t) 1L << 21);
1857
-
1858
- carry7 = (s7 + (int64_t) (1L << 20)) >> 21; s8 += carry7; s7 -= carry7 * ((uint64_t) 1L << 21);
1859
- carry9 = (s9 + (int64_t) (1L << 20)) >> 21; s10 += carry9; s9 -= carry9 * ((uint64_t) 1L << 21);
1860
- carry11 = (s11 + (int64_t) (1L << 20)) >> 21; s12 += carry11; s11 -= carry11 * ((uint64_t) 1L << 21);
1861
- carry13 = (s13 + (int64_t) (1L << 20)) >> 21; s14 += carry13; s13 -= carry13 * ((uint64_t) 1L << 21);
1862
- carry15 = (s15 + (int64_t) (1L << 20)) >> 21; s16 += carry15; s15 -= carry15 * ((uint64_t) 1L << 21);
1863
-
1864
- s5 += s17 * 666643;
1865
- s6 += s17 * 470296;
1866
- s7 += s17 * 654183;
1867
- s8 -= s17 * 997805;
1868
- s9 += s17 * 136657;
1869
- s10 -= s17 * 683901;
1870
-
1871
- s4 += s16 * 666643;
1872
- s5 += s16 * 470296;
1873
- s6 += s16 * 654183;
1874
- s7 -= s16 * 997805;
1875
- s8 += s16 * 136657;
1876
- s9 -= s16 * 683901;
1877
-
1878
- s3 += s15 * 666643;
1879
- s4 += s15 * 470296;
1880
- s5 += s15 * 654183;
1881
- s6 -= s15 * 997805;
1882
- s7 += s15 * 136657;
1883
- s8 -= s15 * 683901;
1884
-
1885
- s2 += s14 * 666643;
1886
- s3 += s14 * 470296;
1887
- s4 += s14 * 654183;
1888
- s5 -= s14 * 997805;
1889
- s6 += s14 * 136657;
1890
- s7 -= s14 * 683901;
1891
-
1892
- s1 += s13 * 666643;
1893
- s2 += s13 * 470296;
1894
- s3 += s13 * 654183;
1895
- s4 -= s13 * 997805;
1896
- s5 += s13 * 136657;
1897
- s6 -= s13 * 683901;
1898
-
1899
- s0 += s12 * 666643;
1900
- s1 += s12 * 470296;
1901
- s2 += s12 * 654183;
1902
- s3 -= s12 * 997805;
1903
- s4 += s12 * 136657;
1904
- s5 -= s12 * 683901;
1905
- s12 = 0;
1906
-
1907
- carry0 = (s0 + (int64_t) (1L << 20)) >> 21; s1 += carry0; s0 -= carry0 * ((uint64_t) 1L << 21);
1908
- carry2 = (s2 + (int64_t) (1L << 20)) >> 21; s3 += carry2; s2 -= carry2 * ((uint64_t) 1L << 21);
1909
- carry4 = (s4 + (int64_t) (1L << 20)) >> 21; s5 += carry4; s4 -= carry4 * ((uint64_t) 1L << 21);
1910
- carry6 = (s6 + (int64_t) (1L << 20)) >> 21; s7 += carry6; s6 -= carry6 * ((uint64_t) 1L << 21);
1911
- carry8 = (s8 + (int64_t) (1L << 20)) >> 21; s9 += carry8; s8 -= carry8 * ((uint64_t) 1L << 21);
1912
- carry10 = (s10 + (int64_t) (1L << 20)) >> 21; s11 += carry10; s10 -= carry10 * ((uint64_t) 1L << 21);
1913
-
1914
- carry1 = (s1 + (int64_t) (1L << 20)) >> 21; s2 += carry1; s1 -= carry1 * ((uint64_t) 1L << 21);
1915
- carry3 = (s3 + (int64_t) (1L << 20)) >> 21; s4 += carry3; s3 -= carry3 * ((uint64_t) 1L << 21);
1916
- carry5 = (s5 + (int64_t) (1L << 20)) >> 21; s6 += carry5; s5 -= carry5 * ((uint64_t) 1L << 21);
1917
- carry7 = (s7 + (int64_t) (1L << 20)) >> 21; s8 += carry7; s7 -= carry7 * ((uint64_t) 1L << 21);
1918
- carry9 = (s9 + (int64_t) (1L << 20)) >> 21; s10 += carry9; s9 -= carry9 * ((uint64_t) 1L << 21);
1919
- carry11 = (s11 + (int64_t) (1L << 20)) >> 21; s12 += carry11; s11 -= carry11 * ((uint64_t) 1L << 21);
1920
-
1921
- s0 += s12 * 666643;
1922
- s1 += s12 * 470296;
1923
- s2 += s12 * 654183;
1924
- s3 -= s12 * 997805;
1925
- s4 += s12 * 136657;
1926
- s5 -= s12 * 683901;
1927
- s12 = 0;
1928
-
1929
- carry0 = s0 >> 21; s1 += carry0; s0 -= carry0 * ((uint64_t) 1L << 21);
1930
- carry1 = s1 >> 21; s2 += carry1; s1 -= carry1 * ((uint64_t) 1L << 21);
1931
- carry2 = s2 >> 21; s3 += carry2; s2 -= carry2 * ((uint64_t) 1L << 21);
1932
- carry3 = s3 >> 21; s4 += carry3; s3 -= carry3 * ((uint64_t) 1L << 21);
1933
- carry4 = s4 >> 21; s5 += carry4; s4 -= carry4 * ((uint64_t) 1L << 21);
1934
- carry5 = s5 >> 21; s6 += carry5; s5 -= carry5 * ((uint64_t) 1L << 21);
1935
- carry6 = s6 >> 21; s7 += carry6; s6 -= carry6 * ((uint64_t) 1L << 21);
1936
- carry7 = s7 >> 21; s8 += carry7; s7 -= carry7 * ((uint64_t) 1L << 21);
1937
- carry8 = s8 >> 21; s9 += carry8; s8 -= carry8 * ((uint64_t) 1L << 21);
1938
- carry9 = s9 >> 21; s10 += carry9; s9 -= carry9 * ((uint64_t) 1L << 21);
1939
- carry10 = s10 >> 21; s11 += carry10; s10 -= carry10 * ((uint64_t) 1L << 21);
1940
- carry11 = s11 >> 21; s12 += carry11; s11 -= carry11 * ((uint64_t) 1L << 21);
1941
-
1942
- s0 += s12 * 666643;
1943
- s1 += s12 * 470296;
1944
- s2 += s12 * 654183;
1945
- s3 -= s12 * 997805;
1946
- s4 += s12 * 136657;
1947
- s5 -= s12 * 683901;
1948
-
1949
- carry0 = s0 >> 21; s1 += carry0; s0 -= carry0 * ((uint64_t) 1L << 21);
1950
- carry1 = s1 >> 21; s2 += carry1; s1 -= carry1 * ((uint64_t) 1L << 21);
1951
- carry2 = s2 >> 21; s3 += carry2; s2 -= carry2 * ((uint64_t) 1L << 21);
1952
- carry3 = s3 >> 21; s4 += carry3; s3 -= carry3 * ((uint64_t) 1L << 21);
1953
- carry4 = s4 >> 21; s5 += carry4; s4 -= carry4 * ((uint64_t) 1L << 21);
1954
- carry5 = s5 >> 21; s6 += carry5; s5 -= carry5 * ((uint64_t) 1L << 21);
1955
- carry6 = s6 >> 21; s7 += carry6; s6 -= carry6 * ((uint64_t) 1L << 21);
1956
- carry7 = s7 >> 21; s8 += carry7; s7 -= carry7 * ((uint64_t) 1L << 21);
1957
- carry8 = s8 >> 21; s9 += carry8; s8 -= carry8 * ((uint64_t) 1L << 21);
1958
- carry9 = s9 >> 21; s10 += carry9; s9 -= carry9 * ((uint64_t) 1L << 21);
1959
- carry10 = s10 >> 21; s11 += carry10; s10 -= carry10 * ((uint64_t) 1L << 21);
1960
-
1961
- s[0] = s0 >> 0;
1962
- s[1] = s0 >> 8;
1963
- s[2] = (s0 >> 16) | (s1 * ((uint64_t) 1 << 5));
1964
- s[3] = s1 >> 3;
1965
- s[4] = s1 >> 11;
1966
- s[5] = (s1 >> 19) | (s2 * ((uint64_t) 1 << 2));
1967
- s[6] = s2 >> 6;
1968
- s[7] = (s2 >> 14) | (s3 * ((uint64_t) 1 << 7));
1969
- s[8] = s3 >> 1;
1970
- s[9] = s3 >> 9;
1971
- s[10] = (s3 >> 17) | (s4 * ((uint64_t) 1 << 4));
1972
- s[11] = s4 >> 4;
1973
- s[12] = s4 >> 12;
1974
- s[13] = (s4 >> 20) | (s5 * ((uint64_t) 1 << 1));
1975
- s[14] = s5 >> 7;
1976
- s[15] = (s5 >> 15) | (s6 * ((uint64_t) 1 << 6));
1977
- s[16] = s6 >> 2;
1978
- s[17] = s6 >> 10;
1979
- s[18] = (s6 >> 18) | (s7 * ((uint64_t) 1 << 3));
1980
- s[19] = s7 >> 5;
1981
- s[20] = s7 >> 13;
1982
- s[21] = s8 >> 0;
1983
- s[22] = s8 >> 8;
1984
- s[23] = (s8 >> 16) | (s9 * ((uint64_t) 1 << 5));
1985
- s[24] = s9 >> 3;
1986
- s[25] = s9 >> 11;
1987
- s[26] = (s9 >> 19) | (s10 * ((uint64_t) 1 << 2));
1988
- s[27] = s10 >> 6;
1989
- s[28] = (s10 >> 14) | (s11 * ((uint64_t) 1 << 7));
1990
- s[29] = s11 >> 1;
1991
- s[30] = s11 >> 9;
1992
- s[31] = s11 >> 17;
1684
+ int64_t a0 = 2097151 & load_3(a);
1685
+ int64_t a1 = 2097151 & (load_4(a + 2) >> 5);
1686
+ int64_t a2 = 2097151 & (load_3(a + 5) >> 2);
1687
+ int64_t a3 = 2097151 & (load_4(a + 7) >> 7);
1688
+ int64_t a4 = 2097151 & (load_4(a + 10) >> 4);
1689
+ int64_t a5 = 2097151 & (load_3(a + 13) >> 1);
1690
+ int64_t a6 = 2097151 & (load_4(a + 15) >> 6);
1691
+ int64_t a7 = 2097151 & (load_3(a + 18) >> 3);
1692
+ int64_t a8 = 2097151 & load_3(a + 21);
1693
+ int64_t a9 = 2097151 & (load_4(a + 23) >> 5);
1694
+ int64_t a10 = 2097151 & (load_3(a + 26) >> 2);
1695
+ int64_t a11 = (load_4(a + 28) >> 7);
1696
+ int64_t b0 = 2097151 & load_3(b);
1697
+ int64_t b1 = 2097151 & (load_4(b + 2) >> 5);
1698
+ int64_t b2 = 2097151 & (load_3(b + 5) >> 2);
1699
+ int64_t b3 = 2097151 & (load_4(b + 7) >> 7);
1700
+ int64_t b4 = 2097151 & (load_4(b + 10) >> 4);
1701
+ int64_t b5 = 2097151 & (load_3(b + 13) >> 1);
1702
+ int64_t b6 = 2097151 & (load_4(b + 15) >> 6);
1703
+ int64_t b7 = 2097151 & (load_3(b + 18) >> 3);
1704
+ int64_t b8 = 2097151 & load_3(b + 21);
1705
+ int64_t b9 = 2097151 & (load_4(b + 23) >> 5);
1706
+ int64_t b10 = 2097151 & (load_3(b + 26) >> 2);
1707
+ int64_t b11 = (load_4(b + 28) >> 7);
1708
+ int64_t c0 = 2097151 & load_3(c);
1709
+ int64_t c1 = 2097151 & (load_4(c + 2) >> 5);
1710
+ int64_t c2 = 2097151 & (load_3(c + 5) >> 2);
1711
+ int64_t c3 = 2097151 & (load_4(c + 7) >> 7);
1712
+ int64_t c4 = 2097151 & (load_4(c + 10) >> 4);
1713
+ int64_t c5 = 2097151 & (load_3(c + 13) >> 1);
1714
+ int64_t c6 = 2097151 & (load_4(c + 15) >> 6);
1715
+ int64_t c7 = 2097151 & (load_3(c + 18) >> 3);
1716
+ int64_t c8 = 2097151 & load_3(c + 21);
1717
+ int64_t c9 = 2097151 & (load_4(c + 23) >> 5);
1718
+ int64_t c10 = 2097151 & (load_3(c + 26) >> 2);
1719
+ int64_t c11 = (load_4(c + 28) >> 7);
1720
+ int64_t s0;
1721
+ int64_t s1;
1722
+ int64_t s2;
1723
+ int64_t s3;
1724
+ int64_t s4;
1725
+ int64_t s5;
1726
+ int64_t s6;
1727
+ int64_t s7;
1728
+ int64_t s8;
1729
+ int64_t s9;
1730
+ int64_t s10;
1731
+ int64_t s11;
1732
+ int64_t s12;
1733
+ int64_t s13;
1734
+ int64_t s14;
1735
+ int64_t s15;
1736
+ int64_t s16;
1737
+ int64_t s17;
1738
+ int64_t s18;
1739
+ int64_t s19;
1740
+ int64_t s20;
1741
+ int64_t s21;
1742
+ int64_t s22;
1743
+ int64_t s23;
1744
+ int64_t carry0;
1745
+ int64_t carry1;
1746
+ int64_t carry2;
1747
+ int64_t carry3;
1748
+ int64_t carry4;
1749
+ int64_t carry5;
1750
+ int64_t carry6;
1751
+ int64_t carry7;
1752
+ int64_t carry8;
1753
+ int64_t carry9;
1754
+ int64_t carry10;
1755
+ int64_t carry11;
1756
+ int64_t carry12;
1757
+ int64_t carry13;
1758
+ int64_t carry14;
1759
+ int64_t carry15;
1760
+ int64_t carry16;
1761
+ int64_t carry17;
1762
+ int64_t carry18;
1763
+ int64_t carry19;
1764
+ int64_t carry20;
1765
+ int64_t carry21;
1766
+ int64_t carry22;
1767
+
1768
+ s0 = c0 + a0*b0;
1769
+ s1 = c1 + a0*b1 + a1*b0;
1770
+ s2 = c2 + a0*b2 + a1*b1 + a2*b0;
1771
+ s3 = c3 + a0*b3 + a1*b2 + a2*b1 + a3*b0;
1772
+ s4 = c4 + a0*b4 + a1*b3 + a2*b2 + a3*b1 + a4*b0;
1773
+ s5 = c5 + a0*b5 + a1*b4 + a2*b3 + a3*b2 + a4*b1 + a5*b0;
1774
+ s6 = c6 + a0*b6 + a1*b5 + a2*b4 + a3*b3 + a4*b2 + a5*b1 + a6*b0;
1775
+ s7 = c7 + a0*b7 + a1*b6 + a2*b5 + a3*b4 + a4*b3 + a5*b2 + a6*b1 + a7*b0;
1776
+ s8 = c8 + a0*b8 + a1*b7 + a2*b6 + a3*b5 + a4*b4 + a5*b3 + a6*b2 + a7*b1 + a8*b0;
1777
+ s9 = c9 + a0*b9 + a1*b8 + a2*b7 + a3*b6 + a4*b5 + a5*b4 + a6*b3 + a7*b2 + a8*b1 + a9*b0;
1778
+ s10 = c10 + a0*b10 + a1*b9 + a2*b8 + a3*b7 + a4*b6 + a5*b5 + a6*b4 + a7*b3 + a8*b2 + a9*b1 + a10*b0;
1779
+ s11 = c11 + a0*b11 + a1*b10 + a2*b9 + a3*b8 + a4*b7 + a5*b6 + a6*b5 + a7*b4 + a8*b3 + a9*b2 + a10*b1 + a11*b0;
1780
+ s12 = a1*b11 + a2*b10 + a3*b9 + a4*b8 + a5*b7 + a6*b6 + a7*b5 + a8*b4 + a9*b3 + a10*b2 + a11*b1;
1781
+ s13 = a2*b11 + a3*b10 + a4*b9 + a5*b8 + a6*b7 + a7*b6 + a8*b5 + a9*b4 + a10*b3 + a11*b2;
1782
+ s14 = a3*b11 + a4*b10 + a5*b9 + a6*b8 + a7*b7 + a8*b6 + a9*b5 + a10*b4 + a11*b3;
1783
+ s15 = a4*b11 + a5*b10 + a6*b9 + a7*b8 + a8*b7 + a9*b6 + a10*b5 + a11*b4;
1784
+ s16 = a5*b11 + a6*b10 + a7*b9 + a8*b8 + a9*b7 + a10*b6 + a11*b5;
1785
+ s17 = a6*b11 + a7*b10 + a8*b9 + a9*b8 + a10*b7 + a11*b6;
1786
+ s18 = a7*b11 + a8*b10 + a9*b9 + a10*b8 + a11*b7;
1787
+ s19 = a8*b11 + a9*b10 + a10*b9 + a11*b8;
1788
+ s20 = a9*b11 + a10*b10 + a11*b9;
1789
+ s21 = a10*b11 + a11*b10;
1790
+ s22 = a11*b11;
1791
+ s23 = 0;
1792
+
1793
+ carry0 = (s0 + (int64_t) (1L << 20)) >> 21; s1 += carry0; s0 -= carry0 * ((uint64_t) 1L << 21);
1794
+ carry2 = (s2 + (int64_t) (1L << 20)) >> 21; s3 += carry2; s2 -= carry2 * ((uint64_t) 1L << 21);
1795
+ carry4 = (s4 + (int64_t) (1L << 20)) >> 21; s5 += carry4; s4 -= carry4 * ((uint64_t) 1L << 21);
1796
+ carry6 = (s6 + (int64_t) (1L << 20)) >> 21; s7 += carry6; s6 -= carry6 * ((uint64_t) 1L << 21);
1797
+ carry8 = (s8 + (int64_t) (1L << 20)) >> 21; s9 += carry8; s8 -= carry8 * ((uint64_t) 1L << 21);
1798
+ carry10 = (s10 + (int64_t) (1L << 20)) >> 21; s11 += carry10; s10 -= carry10 * ((uint64_t) 1L << 21);
1799
+ carry12 = (s12 + (int64_t) (1L << 20)) >> 21; s13 += carry12; s12 -= carry12 * ((uint64_t) 1L << 21);
1800
+ carry14 = (s14 + (int64_t) (1L << 20)) >> 21; s15 += carry14; s14 -= carry14 * ((uint64_t) 1L << 21);
1801
+ carry16 = (s16 + (int64_t) (1L << 20)) >> 21; s17 += carry16; s16 -= carry16 * ((uint64_t) 1L << 21);
1802
+ carry18 = (s18 + (int64_t) (1L << 20)) >> 21; s19 += carry18; s18 -= carry18 * ((uint64_t) 1L << 21);
1803
+ carry20 = (s20 + (int64_t) (1L << 20)) >> 21; s21 += carry20; s20 -= carry20 * ((uint64_t) 1L << 21);
1804
+ carry22 = (s22 + (int64_t) (1L << 20)) >> 21; s23 += carry22; s22 -= carry22 * ((uint64_t) 1L << 21);
1805
+
1806
+ carry1 = (s1 + (int64_t) (1L << 20)) >> 21; s2 += carry1; s1 -= carry1 * ((uint64_t) 1L << 21);
1807
+ carry3 = (s3 + (int64_t) (1L << 20)) >> 21; s4 += carry3; s3 -= carry3 * ((uint64_t) 1L << 21);
1808
+ carry5 = (s5 + (int64_t) (1L << 20)) >> 21; s6 += carry5; s5 -= carry5 * ((uint64_t) 1L << 21);
1809
+ carry7 = (s7 + (int64_t) (1L << 20)) >> 21; s8 += carry7; s7 -= carry7 * ((uint64_t) 1L << 21);
1810
+ carry9 = (s9 + (int64_t) (1L << 20)) >> 21; s10 += carry9; s9 -= carry9 * ((uint64_t) 1L << 21);
1811
+ carry11 = (s11 + (int64_t) (1L << 20)) >> 21; s12 += carry11; s11 -= carry11 * ((uint64_t) 1L << 21);
1812
+ carry13 = (s13 + (int64_t) (1L << 20)) >> 21; s14 += carry13; s13 -= carry13 * ((uint64_t) 1L << 21);
1813
+ carry15 = (s15 + (int64_t) (1L << 20)) >> 21; s16 += carry15; s15 -= carry15 * ((uint64_t) 1L << 21);
1814
+ carry17 = (s17 + (int64_t) (1L << 20)) >> 21; s18 += carry17; s17 -= carry17 * ((uint64_t) 1L << 21);
1815
+ carry19 = (s19 + (int64_t) (1L << 20)) >> 21; s20 += carry19; s19 -= carry19 * ((uint64_t) 1L << 21);
1816
+ carry21 = (s21 + (int64_t) (1L << 20)) >> 21; s22 += carry21; s21 -= carry21 * ((uint64_t) 1L << 21);
1817
+
1818
+ s11 += s23 * 666643;
1819
+ s12 += s23 * 470296;
1820
+ s13 += s23 * 654183;
1821
+ s14 -= s23 * 997805;
1822
+ s15 += s23 * 136657;
1823
+ s16 -= s23 * 683901;
1824
+
1825
+ s10 += s22 * 666643;
1826
+ s11 += s22 * 470296;
1827
+ s12 += s22 * 654183;
1828
+ s13 -= s22 * 997805;
1829
+ s14 += s22 * 136657;
1830
+ s15 -= s22 * 683901;
1831
+
1832
+ s9 += s21 * 666643;
1833
+ s10 += s21 * 470296;
1834
+ s11 += s21 * 654183;
1835
+ s12 -= s21 * 997805;
1836
+ s13 += s21 * 136657;
1837
+ s14 -= s21 * 683901;
1838
+
1839
+ s8 += s20 * 666643;
1840
+ s9 += s20 * 470296;
1841
+ s10 += s20 * 654183;
1842
+ s11 -= s20 * 997805;
1843
+ s12 += s20 * 136657;
1844
+ s13 -= s20 * 683901;
1845
+
1846
+ s7 += s19 * 666643;
1847
+ s8 += s19 * 470296;
1848
+ s9 += s19 * 654183;
1849
+ s10 -= s19 * 997805;
1850
+ s11 += s19 * 136657;
1851
+ s12 -= s19 * 683901;
1852
+
1853
+ s6 += s18 * 666643;
1854
+ s7 += s18 * 470296;
1855
+ s8 += s18 * 654183;
1856
+ s9 -= s18 * 997805;
1857
+ s10 += s18 * 136657;
1858
+ s11 -= s18 * 683901;
1859
+
1860
+ carry6 = (s6 + (int64_t) (1L << 20)) >> 21; s7 += carry6; s6 -= carry6 * ((uint64_t) 1L << 21);
1861
+ carry8 = (s8 + (int64_t) (1L << 20)) >> 21; s9 += carry8; s8 -= carry8 * ((uint64_t) 1L << 21);
1862
+ carry10 = (s10 + (int64_t) (1L << 20)) >> 21; s11 += carry10; s10 -= carry10 * ((uint64_t) 1L << 21);
1863
+ carry12 = (s12 + (int64_t) (1L << 20)) >> 21; s13 += carry12; s12 -= carry12 * ((uint64_t) 1L << 21);
1864
+ carry14 = (s14 + (int64_t) (1L << 20)) >> 21; s15 += carry14; s14 -= carry14 * ((uint64_t) 1L << 21);
1865
+ carry16 = (s16 + (int64_t) (1L << 20)) >> 21; s17 += carry16; s16 -= carry16 * ((uint64_t) 1L << 21);
1866
+
1867
+ carry7 = (s7 + (int64_t) (1L << 20)) >> 21; s8 += carry7; s7 -= carry7 * ((uint64_t) 1L << 21);
1868
+ carry9 = (s9 + (int64_t) (1L << 20)) >> 21; s10 += carry9; s9 -= carry9 * ((uint64_t) 1L << 21);
1869
+ carry11 = (s11 + (int64_t) (1L << 20)) >> 21; s12 += carry11; s11 -= carry11 * ((uint64_t) 1L << 21);
1870
+ carry13 = (s13 + (int64_t) (1L << 20)) >> 21; s14 += carry13; s13 -= carry13 * ((uint64_t) 1L << 21);
1871
+ carry15 = (s15 + (int64_t) (1L << 20)) >> 21; s16 += carry15; s15 -= carry15 * ((uint64_t) 1L << 21);
1872
+
1873
+ s5 += s17 * 666643;
1874
+ s6 += s17 * 470296;
1875
+ s7 += s17 * 654183;
1876
+ s8 -= s17 * 997805;
1877
+ s9 += s17 * 136657;
1878
+ s10 -= s17 * 683901;
1879
+
1880
+ s4 += s16 * 666643;
1881
+ s5 += s16 * 470296;
1882
+ s6 += s16 * 654183;
1883
+ s7 -= s16 * 997805;
1884
+ s8 += s16 * 136657;
1885
+ s9 -= s16 * 683901;
1886
+
1887
+ s3 += s15 * 666643;
1888
+ s4 += s15 * 470296;
1889
+ s5 += s15 * 654183;
1890
+ s6 -= s15 * 997805;
1891
+ s7 += s15 * 136657;
1892
+ s8 -= s15 * 683901;
1893
+
1894
+ s2 += s14 * 666643;
1895
+ s3 += s14 * 470296;
1896
+ s4 += s14 * 654183;
1897
+ s5 -= s14 * 997805;
1898
+ s6 += s14 * 136657;
1899
+ s7 -= s14 * 683901;
1900
+
1901
+ s1 += s13 * 666643;
1902
+ s2 += s13 * 470296;
1903
+ s3 += s13 * 654183;
1904
+ s4 -= s13 * 997805;
1905
+ s5 += s13 * 136657;
1906
+ s6 -= s13 * 683901;
1907
+
1908
+ s0 += s12 * 666643;
1909
+ s1 += s12 * 470296;
1910
+ s2 += s12 * 654183;
1911
+ s3 -= s12 * 997805;
1912
+ s4 += s12 * 136657;
1913
+ s5 -= s12 * 683901;
1914
+ s12 = 0;
1915
+
1916
+ carry0 = (s0 + (int64_t) (1L << 20)) >> 21; s1 += carry0; s0 -= carry0 * ((uint64_t) 1L << 21);
1917
+ carry2 = (s2 + (int64_t) (1L << 20)) >> 21; s3 += carry2; s2 -= carry2 * ((uint64_t) 1L << 21);
1918
+ carry4 = (s4 + (int64_t) (1L << 20)) >> 21; s5 += carry4; s4 -= carry4 * ((uint64_t) 1L << 21);
1919
+ carry6 = (s6 + (int64_t) (1L << 20)) >> 21; s7 += carry6; s6 -= carry6 * ((uint64_t) 1L << 21);
1920
+ carry8 = (s8 + (int64_t) (1L << 20)) >> 21; s9 += carry8; s8 -= carry8 * ((uint64_t) 1L << 21);
1921
+ carry10 = (s10 + (int64_t) (1L << 20)) >> 21; s11 += carry10; s10 -= carry10 * ((uint64_t) 1L << 21);
1922
+
1923
+ carry1 = (s1 + (int64_t) (1L << 20)) >> 21; s2 += carry1; s1 -= carry1 * ((uint64_t) 1L << 21);
1924
+ carry3 = (s3 + (int64_t) (1L << 20)) >> 21; s4 += carry3; s3 -= carry3 * ((uint64_t) 1L << 21);
1925
+ carry5 = (s5 + (int64_t) (1L << 20)) >> 21; s6 += carry5; s5 -= carry5 * ((uint64_t) 1L << 21);
1926
+ carry7 = (s7 + (int64_t) (1L << 20)) >> 21; s8 += carry7; s7 -= carry7 * ((uint64_t) 1L << 21);
1927
+ carry9 = (s9 + (int64_t) (1L << 20)) >> 21; s10 += carry9; s9 -= carry9 * ((uint64_t) 1L << 21);
1928
+ carry11 = (s11 + (int64_t) (1L << 20)) >> 21; s12 += carry11; s11 -= carry11 * ((uint64_t) 1L << 21);
1929
+
1930
+ s0 += s12 * 666643;
1931
+ s1 += s12 * 470296;
1932
+ s2 += s12 * 654183;
1933
+ s3 -= s12 * 997805;
1934
+ s4 += s12 * 136657;
1935
+ s5 -= s12 * 683901;
1936
+ s12 = 0;
1937
+
1938
+ carry0 = s0 >> 21; s1 += carry0; s0 -= carry0 * ((uint64_t) 1L << 21);
1939
+ carry1 = s1 >> 21; s2 += carry1; s1 -= carry1 * ((uint64_t) 1L << 21);
1940
+ carry2 = s2 >> 21; s3 += carry2; s2 -= carry2 * ((uint64_t) 1L << 21);
1941
+ carry3 = s3 >> 21; s4 += carry3; s3 -= carry3 * ((uint64_t) 1L << 21);
1942
+ carry4 = s4 >> 21; s5 += carry4; s4 -= carry4 * ((uint64_t) 1L << 21);
1943
+ carry5 = s5 >> 21; s6 += carry5; s5 -= carry5 * ((uint64_t) 1L << 21);
1944
+ carry6 = s6 >> 21; s7 += carry6; s6 -= carry6 * ((uint64_t) 1L << 21);
1945
+ carry7 = s7 >> 21; s8 += carry7; s7 -= carry7 * ((uint64_t) 1L << 21);
1946
+ carry8 = s8 >> 21; s9 += carry8; s8 -= carry8 * ((uint64_t) 1L << 21);
1947
+ carry9 = s9 >> 21; s10 += carry9; s9 -= carry9 * ((uint64_t) 1L << 21);
1948
+ carry10 = s10 >> 21; s11 += carry10; s10 -= carry10 * ((uint64_t) 1L << 21);
1949
+ carry11 = s11 >> 21; s12 += carry11; s11 -= carry11 * ((uint64_t) 1L << 21);
1950
+
1951
+ s0 += s12 * 666643;
1952
+ s1 += s12 * 470296;
1953
+ s2 += s12 * 654183;
1954
+ s3 -= s12 * 997805;
1955
+ s4 += s12 * 136657;
1956
+ s5 -= s12 * 683901;
1957
+
1958
+ carry0 = s0 >> 21; s1 += carry0; s0 -= carry0 * ((uint64_t) 1L << 21);
1959
+ carry1 = s1 >> 21; s2 += carry1; s1 -= carry1 * ((uint64_t) 1L << 21);
1960
+ carry2 = s2 >> 21; s3 += carry2; s2 -= carry2 * ((uint64_t) 1L << 21);
1961
+ carry3 = s3 >> 21; s4 += carry3; s3 -= carry3 * ((uint64_t) 1L << 21);
1962
+ carry4 = s4 >> 21; s5 += carry4; s4 -= carry4 * ((uint64_t) 1L << 21);
1963
+ carry5 = s5 >> 21; s6 += carry5; s5 -= carry5 * ((uint64_t) 1L << 21);
1964
+ carry6 = s6 >> 21; s7 += carry6; s6 -= carry6 * ((uint64_t) 1L << 21);
1965
+ carry7 = s7 >> 21; s8 += carry7; s7 -= carry7 * ((uint64_t) 1L << 21);
1966
+ carry8 = s8 >> 21; s9 += carry8; s8 -= carry8 * ((uint64_t) 1L << 21);
1967
+ carry9 = s9 >> 21; s10 += carry9; s9 -= carry9 * ((uint64_t) 1L << 21);
1968
+ carry10 = s10 >> 21; s11 += carry10; s10 -= carry10 * ((uint64_t) 1L << 21);
1969
+
1970
+ s[0] = s0 >> 0;
1971
+ s[1] = s0 >> 8;
1972
+ s[2] = (s0 >> 16) | (s1 * ((uint64_t) 1 << 5));
1973
+ s[3] = s1 >> 3;
1974
+ s[4] = s1 >> 11;
1975
+ s[5] = (s1 >> 19) | (s2 * ((uint64_t) 1 << 2));
1976
+ s[6] = s2 >> 6;
1977
+ s[7] = (s2 >> 14) | (s3 * ((uint64_t) 1 << 7));
1978
+ s[8] = s3 >> 1;
1979
+ s[9] = s3 >> 9;
1980
+ s[10] = (s3 >> 17) | (s4 * ((uint64_t) 1 << 4));
1981
+ s[11] = s4 >> 4;
1982
+ s[12] = s4 >> 12;
1983
+ s[13] = (s4 >> 20) | (s5 * ((uint64_t) 1 << 1));
1984
+ s[14] = s5 >> 7;
1985
+ s[15] = (s5 >> 15) | (s6 * ((uint64_t) 1 << 6));
1986
+ s[16] = s6 >> 2;
1987
+ s[17] = s6 >> 10;
1988
+ s[18] = (s6 >> 18) | (s7 * ((uint64_t) 1 << 3));
1989
+ s[19] = s7 >> 5;
1990
+ s[20] = s7 >> 13;
1991
+ s[21] = s8 >> 0;
1992
+ s[22] = s8 >> 8;
1993
+ s[23] = (s8 >> 16) | (s9 * ((uint64_t) 1 << 5));
1994
+ s[24] = s9 >> 3;
1995
+ s[25] = s9 >> 11;
1996
+ s[26] = (s9 >> 19) | (s10 * ((uint64_t) 1 << 2));
1997
+ s[27] = s10 >> 6;
1998
+ s[28] = (s10 >> 14) | (s11 * ((uint64_t) 1 << 7));
1999
+ s[29] = s11 >> 1;
2000
+ s[30] = s11 >> 9;
2001
+ s[31] = s11 >> 17;
1993
2002
  }
1994
2003
 
1995
2004
  /*
1996
- Input:
1997
- s[0]+256*s[1]+...+256^63*s[63] = s
1998
-
1999
- Output:
2000
- s[0]+256*s[1]+...+256^31*s[31] = s mod l
2001
- where l = 2^252 + 27742317777372353535851937790883648493.
2002
- Overwrites s in place.
2003
- */
2005
+ Input:
2006
+ s[0]+256*s[1]+...+256^63*s[63] = s
2007
+ *
2008
+ Output:
2009
+ s[0]+256*s[1]+...+256^31*s[31] = s mod l
2010
+ where l = 2^252 + 27742317777372353535851937790883648493.
2011
+ Overwrites s in place.
2012
+ */
2004
2013
 
2005
2014
  void sc_reduce(unsigned char *s)
2006
2015
  {
2007
- int64_t s0 = 2097151 & load_3(s);
2008
- int64_t s1 = 2097151 & (load_4(s + 2) >> 5);
2009
- int64_t s2 = 2097151 & (load_3(s + 5) >> 2);
2010
- int64_t s3 = 2097151 & (load_4(s + 7) >> 7);
2011
- int64_t s4 = 2097151 & (load_4(s + 10) >> 4);
2012
- int64_t s5 = 2097151 & (load_3(s + 13) >> 1);
2013
- int64_t s6 = 2097151 & (load_4(s + 15) >> 6);
2014
- int64_t s7 = 2097151 & (load_3(s + 18) >> 3);
2015
- int64_t s8 = 2097151 & load_3(s + 21);
2016
- int64_t s9 = 2097151 & (load_4(s + 23) >> 5);
2017
- int64_t s10 = 2097151 & (load_3(s + 26) >> 2);
2018
- int64_t s11 = 2097151 & (load_4(s + 28) >> 7);
2019
- int64_t s12 = 2097151 & (load_4(s + 31) >> 4);
2020
- int64_t s13 = 2097151 & (load_3(s + 34) >> 1);
2021
- int64_t s14 = 2097151 & (load_4(s + 36) >> 6);
2022
- int64_t s15 = 2097151 & (load_3(s + 39) >> 3);
2023
- int64_t s16 = 2097151 & load_3(s + 42);
2024
- int64_t s17 = 2097151 & (load_4(s + 44) >> 5);
2025
- int64_t s18 = 2097151 & (load_3(s + 47) >> 2);
2026
- int64_t s19 = 2097151 & (load_4(s + 49) >> 7);
2027
- int64_t s20 = 2097151 & (load_4(s + 52) >> 4);
2028
- int64_t s21 = 2097151 & (load_3(s + 55) >> 1);
2029
- int64_t s22 = 2097151 & (load_4(s + 57) >> 6);
2030
- int64_t s23 = (load_4(s + 60) >> 3);
2031
- int64_t carry0;
2032
- int64_t carry1;
2033
- int64_t carry2;
2034
- int64_t carry3;
2035
- int64_t carry4;
2036
- int64_t carry5;
2037
- int64_t carry6;
2038
- int64_t carry7;
2039
- int64_t carry8;
2040
- int64_t carry9;
2041
- int64_t carry10;
2042
- int64_t carry11;
2043
- int64_t carry12;
2044
- int64_t carry13;
2045
- int64_t carry14;
2046
- int64_t carry15;
2047
- int64_t carry16;
2048
-
2049
- s11 += s23 * 666643;
2050
- s12 += s23 * 470296;
2051
- s13 += s23 * 654183;
2052
- s14 -= s23 * 997805;
2053
- s15 += s23 * 136657;
2054
- s16 -= s23 * 683901;
2055
-
2056
- s10 += s22 * 666643;
2057
- s11 += s22 * 470296;
2058
- s12 += s22 * 654183;
2059
- s13 -= s22 * 997805;
2060
- s14 += s22 * 136657;
2061
- s15 -= s22 * 683901;
2062
-
2063
- s9 += s21 * 666643;
2064
- s10 += s21 * 470296;
2065
- s11 += s21 * 654183;
2066
- s12 -= s21 * 997805;
2067
- s13 += s21 * 136657;
2068
- s14 -= s21 * 683901;
2069
-
2070
- s8 += s20 * 666643;
2071
- s9 += s20 * 470296;
2072
- s10 += s20 * 654183;
2073
- s11 -= s20 * 997805;
2074
- s12 += s20 * 136657;
2075
- s13 -= s20 * 683901;
2076
-
2077
- s7 += s19 * 666643;
2078
- s8 += s19 * 470296;
2079
- s9 += s19 * 654183;
2080
- s10 -= s19 * 997805;
2081
- s11 += s19 * 136657;
2082
- s12 -= s19 * 683901;
2083
-
2084
- s6 += s18 * 666643;
2085
- s7 += s18 * 470296;
2086
- s8 += s18 * 654183;
2087
- s9 -= s18 * 997805;
2088
- s10 += s18 * 136657;
2089
- s11 -= s18 * 683901;
2016
+ int64_t s0 = 2097151 & load_3(s);
2017
+ int64_t s1 = 2097151 & (load_4(s + 2) >> 5);
2018
+ int64_t s2 = 2097151 & (load_3(s + 5) >> 2);
2019
+ int64_t s3 = 2097151 & (load_4(s + 7) >> 7);
2020
+ int64_t s4 = 2097151 & (load_4(s + 10) >> 4);
2021
+ int64_t s5 = 2097151 & (load_3(s + 13) >> 1);
2022
+ int64_t s6 = 2097151 & (load_4(s + 15) >> 6);
2023
+ int64_t s7 = 2097151 & (load_3(s + 18) >> 3);
2024
+ int64_t s8 = 2097151 & load_3(s + 21);
2025
+ int64_t s9 = 2097151 & (load_4(s + 23) >> 5);
2026
+ int64_t s10 = 2097151 & (load_3(s + 26) >> 2);
2027
+ int64_t s11 = 2097151 & (load_4(s + 28) >> 7);
2028
+ int64_t s12 = 2097151 & (load_4(s + 31) >> 4);
2029
+ int64_t s13 = 2097151 & (load_3(s + 34) >> 1);
2030
+ int64_t s14 = 2097151 & (load_4(s + 36) >> 6);
2031
+ int64_t s15 = 2097151 & (load_3(s + 39) >> 3);
2032
+ int64_t s16 = 2097151 & load_3(s + 42);
2033
+ int64_t s17 = 2097151 & (load_4(s + 44) >> 5);
2034
+ int64_t s18 = 2097151 & (load_3(s + 47) >> 2);
2035
+ int64_t s19 = 2097151 & (load_4(s + 49) >> 7);
2036
+ int64_t s20 = 2097151 & (load_4(s + 52) >> 4);
2037
+ int64_t s21 = 2097151 & (load_3(s + 55) >> 1);
2038
+ int64_t s22 = 2097151 & (load_4(s + 57) >> 6);
2039
+ int64_t s23 = (load_4(s + 60) >> 3);
2040
+ int64_t carry0;
2041
+ int64_t carry1;
2042
+ int64_t carry2;
2043
+ int64_t carry3;
2044
+ int64_t carry4;
2045
+ int64_t carry5;
2046
+ int64_t carry6;
2047
+ int64_t carry7;
2048
+ int64_t carry8;
2049
+ int64_t carry9;
2050
+ int64_t carry10;
2051
+ int64_t carry11;
2052
+ int64_t carry12;
2053
+ int64_t carry13;
2054
+ int64_t carry14;
2055
+ int64_t carry15;
2056
+ int64_t carry16;
2057
+
2058
+ s11 += s23 * 666643;
2059
+ s12 += s23 * 470296;
2060
+ s13 += s23 * 654183;
2061
+ s14 -= s23 * 997805;
2062
+ s15 += s23 * 136657;
2063
+ s16 -= s23 * 683901;
2064
+
2065
+ s10 += s22 * 666643;
2066
+ s11 += s22 * 470296;
2067
+ s12 += s22 * 654183;
2068
+ s13 -= s22 * 997805;
2069
+ s14 += s22 * 136657;
2070
+ s15 -= s22 * 683901;
2071
+
2072
+ s9 += s21 * 666643;
2073
+ s10 += s21 * 470296;
2074
+ s11 += s21 * 654183;
2075
+ s12 -= s21 * 997805;
2076
+ s13 += s21 * 136657;
2077
+ s14 -= s21 * 683901;
2078
+
2079
+ s8 += s20 * 666643;
2080
+ s9 += s20 * 470296;
2081
+ s10 += s20 * 654183;
2082
+ s11 -= s20 * 997805;
2083
+ s12 += s20 * 136657;
2084
+ s13 -= s20 * 683901;
2085
+
2086
+ s7 += s19 * 666643;
2087
+ s8 += s19 * 470296;
2088
+ s9 += s19 * 654183;
2089
+ s10 -= s19 * 997805;
2090
+ s11 += s19 * 136657;
2091
+ s12 -= s19 * 683901;
2092
+
2093
+ s6 += s18 * 666643;
2094
+ s7 += s18 * 470296;
2095
+ s8 += s18 * 654183;
2096
+ s9 -= s18 * 997805;
2097
+ s10 += s18 * 136657;
2098
+ s11 -= s18 * 683901;
2090
2099
 
2091
2100
  carry6 = (s6 + (int64_t) (1L << 20)) >> 21; s7 += carry6; s6 -= carry6 * ((uint64_t) 1L << 21);
2092
- carry8 = (s8 + (int64_t) (1L << 20)) >> 21; s9 += carry8; s8 -= carry8 * ((uint64_t) 1L << 21);
2093
- carry10 = (s10 + (int64_t) (1L << 20)) >> 21; s11 += carry10; s10 -= carry10 * ((uint64_t) 1L << 21);
2094
- carry12 = (s12 + (int64_t) (1L << 20)) >> 21; s13 += carry12; s12 -= carry12 * ((uint64_t) 1L << 21);
2095
- carry14 = (s14 + (int64_t) (1L << 20)) >> 21; s15 += carry14; s14 -= carry14 * ((uint64_t) 1L << 21);
2096
- carry16 = (s16 + (int64_t) (1L << 20)) >> 21; s17 += carry16; s16 -= carry16 * ((uint64_t) 1L << 21);
2097
-
2098
- carry7 = (s7 + (int64_t) (1L << 20)) >> 21; s8 += carry7; s7 -= carry7 * ((uint64_t) 1L << 21);
2099
- carry9 = (s9 + (int64_t) (1L << 20)) >> 21; s10 += carry9; s9 -= carry9 * ((uint64_t) 1L << 21);
2100
- carry11 = (s11 + (int64_t) (1L << 20)) >> 21; s12 += carry11; s11 -= carry11 * ((uint64_t) 1L << 21);
2101
- carry13 = (s13 + (int64_t) (1L << 20)) >> 21; s14 += carry13; s13 -= carry13 * ((uint64_t) 1L << 21);
2102
- carry15 = (s15 + (int64_t) (1L << 20)) >> 21; s16 += carry15; s15 -= carry15 * ((uint64_t) 1L << 21);
2103
-
2104
- s5 += s17 * 666643;
2105
- s6 += s17 * 470296;
2106
- s7 += s17 * 654183;
2107
- s8 -= s17 * 997805;
2108
- s9 += s17 * 136657;
2109
- s10 -= s17 * 683901;
2110
-
2111
- s4 += s16 * 666643;
2112
- s5 += s16 * 470296;
2113
- s6 += s16 * 654183;
2114
- s7 -= s16 * 997805;
2115
- s8 += s16 * 136657;
2116
- s9 -= s16 * 683901;
2117
-
2118
- s3 += s15 * 666643;
2119
- s4 += s15 * 470296;
2120
- s5 += s15 * 654183;
2121
- s6 -= s15 * 997805;
2122
- s7 += s15 * 136657;
2123
- s8 -= s15 * 683901;
2124
-
2125
- s2 += s14 * 666643;
2126
- s3 += s14 * 470296;
2127
- s4 += s14 * 654183;
2128
- s5 -= s14 * 997805;
2129
- s6 += s14 * 136657;
2130
- s7 -= s14 * 683901;
2131
-
2132
- s1 += s13 * 666643;
2133
- s2 += s13 * 470296;
2134
- s3 += s13 * 654183;
2135
- s4 -= s13 * 997805;
2136
- s5 += s13 * 136657;
2137
- s6 -= s13 * 683901;
2138
-
2139
- s0 += s12 * 666643;
2140
- s1 += s12 * 470296;
2141
- s2 += s12 * 654183;
2142
- s3 -= s12 * 997805;
2143
- s4 += s12 * 136657;
2144
- s5 -= s12 * 683901;
2145
- s12 = 0;
2146
-
2147
- carry0 = (s0 + (int64_t) (1L << 20)) >> 21; s1 += carry0; s0 -= carry0 * ((uint64_t) 1L << 21);
2148
- carry2 = (s2 + (int64_t) (1L << 20)) >> 21; s3 += carry2; s2 -= carry2 * ((uint64_t) 1L << 21);
2149
- carry4 = (s4 + (int64_t) (1L << 20)) >> 21; s5 += carry4; s4 -= carry4 * ((uint64_t) 1L << 21);
2150
- carry6 = (s6 + (int64_t) (1L << 20)) >> 21; s7 += carry6; s6 -= carry6 * ((uint64_t) 1L << 21);
2151
- carry8 = (s8 + (int64_t) (1L << 20)) >> 21; s9 += carry8; s8 -= carry8 * ((uint64_t) 1L << 21);
2152
- carry10 = (s10 + (int64_t) (1L << 20)) >> 21; s11 += carry10; s10 -= carry10 * ((uint64_t) 1L << 21);
2153
-
2154
- carry1 = (s1 + (int64_t) (1L << 20)) >> 21; s2 += carry1; s1 -= carry1 * ((uint64_t) 1L << 21);
2155
- carry3 = (s3 + (int64_t) (1L << 20)) >> 21; s4 += carry3; s3 -= carry3 * ((uint64_t) 1L << 21);
2156
- carry5 = (s5 + (int64_t) (1L << 20)) >> 21; s6 += carry5; s5 -= carry5 * ((uint64_t) 1L << 21);
2157
- carry7 = (s7 + (int64_t) (1L << 20)) >> 21; s8 += carry7; s7 -= carry7 * ((uint64_t) 1L << 21);
2158
- carry9 = (s9 + (int64_t) (1L << 20)) >> 21; s10 += carry9; s9 -= carry9 * ((uint64_t) 1L << 21);
2159
- carry11 = (s11 + (int64_t) (1L << 20)) >> 21; s12 += carry11; s11 -= carry11 * ((uint64_t) 1L << 21);
2160
-
2161
- s0 += s12 * 666643;
2162
- s1 += s12 * 470296;
2163
- s2 += s12 * 654183;
2164
- s3 -= s12 * 997805;
2165
- s4 += s12 * 136657;
2166
- s5 -= s12 * 683901;
2167
- s12 = 0;
2168
-
2169
- carry0 = s0 >> 21; s1 += carry0; s0 -= carry0 * ((uint64_t) 1L << 21);
2170
- carry1 = s1 >> 21; s2 += carry1; s1 -= carry1 * ((uint64_t) 1L << 21);
2171
- carry2 = s2 >> 21; s3 += carry2; s2 -= carry2 * ((uint64_t) 1L << 21);
2172
- carry3 = s3 >> 21; s4 += carry3; s3 -= carry3 * ((uint64_t) 1L << 21);
2173
- carry4 = s4 >> 21; s5 += carry4; s4 -= carry4 * ((uint64_t) 1L << 21);
2174
- carry5 = s5 >> 21; s6 += carry5; s5 -= carry5 * ((uint64_t) 1L << 21);
2175
- carry6 = s6 >> 21; s7 += carry6; s6 -= carry6 * ((uint64_t) 1L << 21);
2176
- carry7 = s7 >> 21; s8 += carry7; s7 -= carry7 * ((uint64_t) 1L << 21);
2177
- carry8 = s8 >> 21; s9 += carry8; s8 -= carry8 * ((uint64_t) 1L << 21);
2178
- carry9 = s9 >> 21; s10 += carry9; s9 -= carry9 * ((uint64_t) 1L << 21);
2179
- carry10 = s10 >> 21; s11 += carry10; s10 -= carry10 * ((uint64_t) 1L << 21);
2180
- carry11 = s11 >> 21; s12 += carry11; s11 -= carry11 * ((uint64_t) 1L << 21);
2181
-
2182
- s0 += s12 * 666643;
2183
- s1 += s12 * 470296;
2184
- s2 += s12 * 654183;
2185
- s3 -= s12 * 997805;
2186
- s4 += s12 * 136657;
2187
- s5 -= s12 * 683901;
2188
-
2189
- carry0 = s0 >> 21; s1 += carry0; s0 -= carry0 * ((uint64_t) 1L << 21);
2190
- carry1 = s1 >> 21; s2 += carry1; s1 -= carry1 * ((uint64_t) 1L << 21);
2191
- carry2 = s2 >> 21; s3 += carry2; s2 -= carry2 * ((uint64_t) 1L << 21);
2192
- carry3 = s3 >> 21; s4 += carry3; s3 -= carry3 * ((uint64_t) 1L << 21);
2193
- carry4 = s4 >> 21; s5 += carry4; s4 -= carry4 * ((uint64_t) 1L << 21);
2194
- carry5 = s5 >> 21; s6 += carry5; s5 -= carry5 * ((uint64_t) 1L << 21);
2195
- carry6 = s6 >> 21; s7 += carry6; s6 -= carry6 * ((uint64_t) 1L << 21);
2196
- carry7 = s7 >> 21; s8 += carry7; s7 -= carry7 * ((uint64_t) 1L << 21);
2197
- carry8 = s8 >> 21; s9 += carry8; s8 -= carry8 * ((uint64_t) 1L << 21);
2198
- carry9 = s9 >> 21; s10 += carry9; s9 -= carry9 * ((uint64_t) 1L << 21);
2199
- carry10 = s10 >> 21; s11 += carry10; s10 -= carry10 * ((uint64_t) 1L << 21);
2200
-
2201
- s[0] = s0 >> 0;
2202
- s[1] = s0 >> 8;
2203
- s[2] = (s0 >> 16) | (s1 * ((uint64_t) 1 << 5));
2204
- s[3] = s1 >> 3;
2205
- s[4] = s1 >> 11;
2206
- s[5] = (s1 >> 19) | (s2 * ((uint64_t) 1 << 2));
2207
- s[6] = s2 >> 6;
2208
- s[7] = (s2 >> 14) | (s3 * ((uint64_t) 1 << 7));
2209
- s[8] = s3 >> 1;
2210
- s[9] = s3 >> 9;
2211
- s[10] = (s3 >> 17) | (s4 * ((uint64_t) 1 << 4));
2212
- s[11] = s4 >> 4;
2213
- s[12] = s4 >> 12;
2214
- s[13] = (s4 >> 20) | (s5 * ((uint64_t) 1 << 1));
2215
- s[14] = s5 >> 7;
2216
- s[15] = (s5 >> 15) | (s6 * ((uint64_t) 1 << 6));
2217
- s[16] = s6 >> 2;
2218
- s[17] = s6 >> 10;
2219
- s[18] = (s6 >> 18) | (s7 * ((uint64_t) 1 << 3));
2220
- s[19] = s7 >> 5;
2221
- s[20] = s7 >> 13;
2222
- s[21] = s8 >> 0;
2223
- s[22] = s8 >> 8;
2224
- s[23] = (s8 >> 16) | (s9 * ((uint64_t) 1 << 5));
2225
- s[24] = s9 >> 3;
2226
- s[25] = s9 >> 11;
2227
- s[26] = (s9 >> 19) | (s10 * ((uint64_t) 1 << 2));
2228
- s[27] = s10 >> 6;
2229
- s[28] = (s10 >> 14) | (s11 * ((uint64_t) 1 << 7));
2230
- s[29] = s11 >> 1;
2231
- s[30] = s11 >> 9;
2232
- s[31] = s11 >> 17;
2101
+ carry8 = (s8 + (int64_t) (1L << 20)) >> 21; s9 += carry8; s8 -= carry8 * ((uint64_t) 1L << 21);
2102
+ carry10 = (s10 + (int64_t) (1L << 20)) >> 21; s11 += carry10; s10 -= carry10 * ((uint64_t) 1L << 21);
2103
+ carry12 = (s12 + (int64_t) (1L << 20)) >> 21; s13 += carry12; s12 -= carry12 * ((uint64_t) 1L << 21);
2104
+ carry14 = (s14 + (int64_t) (1L << 20)) >> 21; s15 += carry14; s14 -= carry14 * ((uint64_t) 1L << 21);
2105
+ carry16 = (s16 + (int64_t) (1L << 20)) >> 21; s17 += carry16; s16 -= carry16 * ((uint64_t) 1L << 21);
2106
+
2107
+ carry7 = (s7 + (int64_t) (1L << 20)) >> 21; s8 += carry7; s7 -= carry7 * ((uint64_t) 1L << 21);
2108
+ carry9 = (s9 + (int64_t) (1L << 20)) >> 21; s10 += carry9; s9 -= carry9 * ((uint64_t) 1L << 21);
2109
+ carry11 = (s11 + (int64_t) (1L << 20)) >> 21; s12 += carry11; s11 -= carry11 * ((uint64_t) 1L << 21);
2110
+ carry13 = (s13 + (int64_t) (1L << 20)) >> 21; s14 += carry13; s13 -= carry13 * ((uint64_t) 1L << 21);
2111
+ carry15 = (s15 + (int64_t) (1L << 20)) >> 21; s16 += carry15; s15 -= carry15 * ((uint64_t) 1L << 21);
2112
+
2113
+ s5 += s17 * 666643;
2114
+ s6 += s17 * 470296;
2115
+ s7 += s17 * 654183;
2116
+ s8 -= s17 * 997805;
2117
+ s9 += s17 * 136657;
2118
+ s10 -= s17 * 683901;
2119
+
2120
+ s4 += s16 * 666643;
2121
+ s5 += s16 * 470296;
2122
+ s6 += s16 * 654183;
2123
+ s7 -= s16 * 997805;
2124
+ s8 += s16 * 136657;
2125
+ s9 -= s16 * 683901;
2126
+
2127
+ s3 += s15 * 666643;
2128
+ s4 += s15 * 470296;
2129
+ s5 += s15 * 654183;
2130
+ s6 -= s15 * 997805;
2131
+ s7 += s15 * 136657;
2132
+ s8 -= s15 * 683901;
2133
+
2134
+ s2 += s14 * 666643;
2135
+ s3 += s14 * 470296;
2136
+ s4 += s14 * 654183;
2137
+ s5 -= s14 * 997805;
2138
+ s6 += s14 * 136657;
2139
+ s7 -= s14 * 683901;
2140
+
2141
+ s1 += s13 * 666643;
2142
+ s2 += s13 * 470296;
2143
+ s3 += s13 * 654183;
2144
+ s4 -= s13 * 997805;
2145
+ s5 += s13 * 136657;
2146
+ s6 -= s13 * 683901;
2147
+
2148
+ s0 += s12 * 666643;
2149
+ s1 += s12 * 470296;
2150
+ s2 += s12 * 654183;
2151
+ s3 -= s12 * 997805;
2152
+ s4 += s12 * 136657;
2153
+ s5 -= s12 * 683901;
2154
+ s12 = 0;
2155
+
2156
+ carry0 = (s0 + (int64_t) (1L << 20)) >> 21; s1 += carry0; s0 -= carry0 * ((uint64_t) 1L << 21);
2157
+ carry2 = (s2 + (int64_t) (1L << 20)) >> 21; s3 += carry2; s2 -= carry2 * ((uint64_t) 1L << 21);
2158
+ carry4 = (s4 + (int64_t) (1L << 20)) >> 21; s5 += carry4; s4 -= carry4 * ((uint64_t) 1L << 21);
2159
+ carry6 = (s6 + (int64_t) (1L << 20)) >> 21; s7 += carry6; s6 -= carry6 * ((uint64_t) 1L << 21);
2160
+ carry8 = (s8 + (int64_t) (1L << 20)) >> 21; s9 += carry8; s8 -= carry8 * ((uint64_t) 1L << 21);
2161
+ carry10 = (s10 + (int64_t) (1L << 20)) >> 21; s11 += carry10; s10 -= carry10 * ((uint64_t) 1L << 21);
2162
+
2163
+ carry1 = (s1 + (int64_t) (1L << 20)) >> 21; s2 += carry1; s1 -= carry1 * ((uint64_t) 1L << 21);
2164
+ carry3 = (s3 + (int64_t) (1L << 20)) >> 21; s4 += carry3; s3 -= carry3 * ((uint64_t) 1L << 21);
2165
+ carry5 = (s5 + (int64_t) (1L << 20)) >> 21; s6 += carry5; s5 -= carry5 * ((uint64_t) 1L << 21);
2166
+ carry7 = (s7 + (int64_t) (1L << 20)) >> 21; s8 += carry7; s7 -= carry7 * ((uint64_t) 1L << 21);
2167
+ carry9 = (s9 + (int64_t) (1L << 20)) >> 21; s10 += carry9; s9 -= carry9 * ((uint64_t) 1L << 21);
2168
+ carry11 = (s11 + (int64_t) (1L << 20)) >> 21; s12 += carry11; s11 -= carry11 * ((uint64_t) 1L << 21);
2169
+
2170
+ s0 += s12 * 666643;
2171
+ s1 += s12 * 470296;
2172
+ s2 += s12 * 654183;
2173
+ s3 -= s12 * 997805;
2174
+ s4 += s12 * 136657;
2175
+ s5 -= s12 * 683901;
2176
+ s12 = 0;
2177
+
2178
+ carry0 = s0 >> 21; s1 += carry0; s0 -= carry0 * ((uint64_t) 1L << 21);
2179
+ carry1 = s1 >> 21; s2 += carry1; s1 -= carry1 * ((uint64_t) 1L << 21);
2180
+ carry2 = s2 >> 21; s3 += carry2; s2 -= carry2 * ((uint64_t) 1L << 21);
2181
+ carry3 = s3 >> 21; s4 += carry3; s3 -= carry3 * ((uint64_t) 1L << 21);
2182
+ carry4 = s4 >> 21; s5 += carry4; s4 -= carry4 * ((uint64_t) 1L << 21);
2183
+ carry5 = s5 >> 21; s6 += carry5; s5 -= carry5 * ((uint64_t) 1L << 21);
2184
+ carry6 = s6 >> 21; s7 += carry6; s6 -= carry6 * ((uint64_t) 1L << 21);
2185
+ carry7 = s7 >> 21; s8 += carry7; s7 -= carry7 * ((uint64_t) 1L << 21);
2186
+ carry8 = s8 >> 21; s9 += carry8; s8 -= carry8 * ((uint64_t) 1L << 21);
2187
+ carry9 = s9 >> 21; s10 += carry9; s9 -= carry9 * ((uint64_t) 1L << 21);
2188
+ carry10 = s10 >> 21; s11 += carry10; s10 -= carry10 * ((uint64_t) 1L << 21);
2189
+ carry11 = s11 >> 21; s12 += carry11; s11 -= carry11 * ((uint64_t) 1L << 21);
2190
+
2191
+ s0 += s12 * 666643;
2192
+ s1 += s12 * 470296;
2193
+ s2 += s12 * 654183;
2194
+ s3 -= s12 * 997805;
2195
+ s4 += s12 * 136657;
2196
+ s5 -= s12 * 683901;
2197
+
2198
+ carry0 = s0 >> 21; s1 += carry0; s0 -= carry0 * ((uint64_t) 1L << 21);
2199
+ carry1 = s1 >> 21; s2 += carry1; s1 -= carry1 * ((uint64_t) 1L << 21);
2200
+ carry2 = s2 >> 21; s3 += carry2; s2 -= carry2 * ((uint64_t) 1L << 21);
2201
+ carry3 = s3 >> 21; s4 += carry3; s3 -= carry3 * ((uint64_t) 1L << 21);
2202
+ carry4 = s4 >> 21; s5 += carry4; s4 -= carry4 * ((uint64_t) 1L << 21);
2203
+ carry5 = s5 >> 21; s6 += carry5; s5 -= carry5 * ((uint64_t) 1L << 21);
2204
+ carry6 = s6 >> 21; s7 += carry6; s6 -= carry6 * ((uint64_t) 1L << 21);
2205
+ carry7 = s7 >> 21; s8 += carry7; s7 -= carry7 * ((uint64_t) 1L << 21);
2206
+ carry8 = s8 >> 21; s9 += carry8; s8 -= carry8 * ((uint64_t) 1L << 21);
2207
+ carry9 = s9 >> 21; s10 += carry9; s9 -= carry9 * ((uint64_t) 1L << 21);
2208
+ carry10 = s10 >> 21; s11 += carry10; s10 -= carry10 * ((uint64_t) 1L << 21);
2209
+
2210
+ s[0] = s0 >> 0;
2211
+ s[1] = s0 >> 8;
2212
+ s[2] = (s0 >> 16) | (s1 * ((uint64_t) 1 << 5));
2213
+ s[3] = s1 >> 3;
2214
+ s[4] = s1 >> 11;
2215
+ s[5] = (s1 >> 19) | (s2 * ((uint64_t) 1 << 2));
2216
+ s[6] = s2 >> 6;
2217
+ s[7] = (s2 >> 14) | (s3 * ((uint64_t) 1 << 7));
2218
+ s[8] = s3 >> 1;
2219
+ s[9] = s3 >> 9;
2220
+ s[10] = (s3 >> 17) | (s4 * ((uint64_t) 1 << 4));
2221
+ s[11] = s4 >> 4;
2222
+ s[12] = s4 >> 12;
2223
+ s[13] = (s4 >> 20) | (s5 * ((uint64_t) 1 << 1));
2224
+ s[14] = s5 >> 7;
2225
+ s[15] = (s5 >> 15) | (s6 * ((uint64_t) 1 << 6));
2226
+ s[16] = s6 >> 2;
2227
+ s[17] = s6 >> 10;
2228
+ s[18] = (s6 >> 18) | (s7 * ((uint64_t) 1 << 3));
2229
+ s[19] = s7 >> 5;
2230
+ s[20] = s7 >> 13;
2231
+ s[21] = s8 >> 0;
2232
+ s[22] = s8 >> 8;
2233
+ s[23] = (s8 >> 16) | (s9 * ((uint64_t) 1 << 5));
2234
+ s[24] = s9 >> 3;
2235
+ s[25] = s9 >> 11;
2236
+ s[26] = (s9 >> 19) | (s10 * ((uint64_t) 1 << 2));
2237
+ s[27] = s10 >> 6;
2238
+ s[28] = (s10 >> 14) | (s11 * ((uint64_t) 1 << 7));
2239
+ s[29] = s11 >> 1;
2240
+ s[30] = s11 >> 9;
2241
+ s[31] = s11 >> 17;
2233
2242
  }