rbnacl-libsodium 1.0.7 → 1.0.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (132) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGES.md +4 -0
  3. data/lib/rbnacl/libsodium/version.rb +1 -1
  4. data/vendor/libsodium/ChangeLog +5 -0
  5. data/vendor/libsodium/README.markdown +4 -1
  6. data/vendor/libsodium/autogen.sh +0 -5
  7. data/vendor/libsodium/autom4te.cache/output.1 +14 -14
  8. data/vendor/libsodium/autom4te.cache/output.5 +14 -14
  9. data/vendor/libsodium/autom4te.cache/requests +868 -868
  10. data/vendor/libsodium/autom4te.cache/traces.1 +1 -1
  11. data/vendor/libsodium/builds/msvc/version.h +2 -2
  12. data/vendor/libsodium/builds/msvc/vs2010/libsodium/libsodium.vcxproj +10 -72
  13. data/vendor/libsodium/builds/msvc/vs2010/libsodium/libsodium.vcxproj.filters +36 -216
  14. data/vendor/libsodium/builds/msvc/vs2012/libsodium/libsodium.vcxproj +10 -72
  15. data/vendor/libsodium/builds/msvc/vs2012/libsodium/libsodium.vcxproj.filters +34 -214
  16. data/vendor/libsodium/builds/msvc/vs2013/libsodium/libsodium.vcxproj +10 -72
  17. data/vendor/libsodium/builds/msvc/vs2013/libsodium/libsodium.vcxproj.filters +34 -214
  18. data/vendor/libsodium/builds/msvc/vs2015/libsodium/libsodium.vcxproj +10 -72
  19. data/vendor/libsodium/builds/msvc/vs2015/libsodium/libsodium.vcxproj.filters +25 -205
  20. data/vendor/libsodium/configure +14 -14
  21. data/vendor/libsodium/configure.ac +5 -5
  22. data/vendor/libsodium/dist-build/msys2-win32.sh +3 -2
  23. data/vendor/libsodium/dist-build/msys2-win64.sh +3 -2
  24. data/vendor/libsodium/examples/sign.c +2 -2
  25. data/vendor/libsodium/examples/utils.h +3 -1
  26. data/vendor/libsodium/libsodium.vcxproj +7 -53
  27. data/vendor/libsodium/libsodium.vcxproj.filters +18 -152
  28. data/vendor/libsodium/msvc-scripts/process.bat +2 -2
  29. data/vendor/libsodium/src/libsodium/Makefile.am +6 -68
  30. data/vendor/libsodium/src/libsodium/Makefile.in +49 -724
  31. data/vendor/libsodium/src/libsodium/crypto_aead/aes256gcm/aesni/aead_aes256gcm_aesni.c +2 -2
  32. data/vendor/libsodium/src/libsodium/{crypto_sign/ed25519 → crypto_core/curve25519}/ref10/base.h +0 -0
  33. data/vendor/libsodium/src/libsodium/{crypto_sign/ed25519 → crypto_core/curve25519}/ref10/base2.h +0 -0
  34. data/vendor/libsodium/src/libsodium/crypto_core/curve25519/ref10/curve25519_ref10.c +2233 -0
  35. data/vendor/libsodium/src/libsodium/crypto_core/curve25519/ref10/curve25519_ref10.h +160 -0
  36. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-ref.c +2 -2
  37. data/vendor/libsodium/src/libsodium/crypto_hash/sha256/cp/hash_sha256.c +1 -1
  38. data/vendor/libsodium/src/libsodium/crypto_hash/sha512/cp/hash_sha512.c +1 -1
  39. data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna32.h +1 -1
  40. data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna64.h +2 -2
  41. data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/sse2/poly1305_sse2.c +2 -2
  42. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/donna_c64/curve25519_donna_c64.c +1 -1
  43. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/ref10/x25519_ref10.c +255 -0
  44. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/ref10/{curve25519_ref10.h → x25519_ref10.h} +0 -0
  45. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/scalarmult_curve25519.c +1 -1
  46. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/keypair.c +1 -2
  47. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/obsolete.c +2 -4
  48. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/open.c +1 -2
  49. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/sign.c +1 -2
  50. data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/ref/stream_chacha20_ref.c +1 -1
  51. data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/vec/stream_chacha20_vec.c +16 -9
  52. data/vendor/libsodium/src/libsodium/include/sodium/export.h +2 -2
  53. data/vendor/libsodium/src/libsodium/include/sodium/utils.h +6 -4
  54. data/vendor/libsodium/src/libsodium/randombytes/salsa20/randombytes_salsa20_random.c +12 -8
  55. data/vendor/libsodium/src/libsodium/sodium/core.c +9 -0
  56. data/vendor/libsodium/src/libsodium/sodium/runtime.c +32 -10
  57. data/vendor/libsodium/src/libsodium/sodium/utils.c +8 -8
  58. data/vendor/libsodium/test/default/auth.c +1 -1
  59. data/vendor/libsodium/test/default/box.c +16 -4
  60. data/vendor/libsodium/test/default/box2.c +7 -0
  61. data/vendor/libsodium/test/default/box7.c +18 -10
  62. data/vendor/libsodium/test/default/box8.c +1 -1
  63. data/vendor/libsodium/test/default/box_easy2.c +13 -0
  64. data/vendor/libsodium/test/default/onetimeauth.c +1 -0
  65. data/vendor/libsodium/test/default/pwhash_scrypt_ll.c +2 -2
  66. data/vendor/libsodium/test/default/verify1.c +1 -1
  67. metadata +9 -71
  68. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/ref10/curve25519_ref10.c +0 -73
  69. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/ref10/fe.h +0 -44
  70. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/ref10/fe_0_curve25519_ref10.c +0 -23
  71. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/ref10/fe_1_curve25519_ref10.c +0 -23
  72. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/ref10/fe_add_curve25519_ref10.c +0 -61
  73. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/ref10/fe_copy_curve25519_ref10.c +0 -33
  74. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/ref10/fe_cswap_curve25519_ref10.c +0 -77
  75. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/ref10/fe_frombytes_curve25519_ref10.c +0 -74
  76. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/ref10/fe_invert_curve25519_ref10.c +0 -18
  77. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/ref10/fe_mul121666_curve25519_ref10.c +0 -74
  78. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/ref10/fe_mul_curve25519_ref10.c +0 -257
  79. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/ref10/fe_sq_curve25519_ref10.c +0 -153
  80. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/ref10/fe_sub_curve25519_ref10.c +0 -61
  81. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/ref10/fe_tobytes_curve25519_ref10.c +0 -123
  82. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/ref10/montgomery.h +0 -140
  83. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/ref10/pow225521.h +0 -160
  84. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/d.h +0 -1
  85. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/d2.h +0 -1
  86. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/fe.h +0 -56
  87. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/fe_0.c +0 -19
  88. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/fe_1.c +0 -19
  89. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/fe_add.c +0 -57
  90. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/fe_cmov.c +0 -63
  91. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/fe_copy.c +0 -29
  92. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/fe_frombytes.c +0 -73
  93. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/fe_invert.c +0 -14
  94. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/fe_isnegative.c +0 -16
  95. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/fe_isnonzero.c +0 -19
  96. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/fe_mul.c +0 -253
  97. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/fe_neg.c +0 -45
  98. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/fe_pow22523.c +0 -13
  99. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/fe_sq.c +0 -149
  100. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/fe_sq2.c +0 -160
  101. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/fe_sub.c +0 -57
  102. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/fe_tobytes.c +0 -119
  103. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/ge.h +0 -97
  104. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/ge_add.c +0 -11
  105. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/ge_add.h +0 -97
  106. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/ge_double_scalarmult.c +0 -138
  107. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/ge_frombytes.c +0 -50
  108. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/ge_madd.c +0 -11
  109. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/ge_madd.h +0 -88
  110. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/ge_msub.c +0 -11
  111. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/ge_msub.h +0 -88
  112. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/ge_p1p1_to_p2.c +0 -12
  113. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/ge_p1p1_to_p3.c +0 -13
  114. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/ge_p2_0.c +0 -8
  115. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/ge_p2_dbl.c +0 -11
  116. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/ge_p2_dbl.h +0 -73
  117. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/ge_p3_0.c +0 -9
  118. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/ge_p3_dbl.c +0 -12
  119. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/ge_p3_to_cached.c +0 -17
  120. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/ge_p3_to_p2.c +0 -12
  121. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/ge_p3_tobytes.c +0 -14
  122. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/ge_precomp_0.c +0 -8
  123. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/ge_scalarmult_base.c +0 -111
  124. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/ge_sub.c +0 -11
  125. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/ge_sub.h +0 -97
  126. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/ge_tobytes.c +0 -14
  127. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/pow22523.h +0 -160
  128. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/pow225521.h +0 -160
  129. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/sc.h +0 -15
  130. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/sc_muladd.c +0 -368
  131. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/sc_reduce.c +0 -275
  132. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/sqrtm1.h +0 -1
@@ -527,7 +527,7 @@ crypto_aead_aes256gcm_encrypt_afternm(unsigned char *c, unsigned long long *clen
527
527
  (void) nsec;
528
528
  memcpy(H, ctx->H, sizeof H);
529
529
  if (mlen > 16ULL * (1ULL << 32)) {
530
- abort();
530
+ abort(); /* LCOV_EXCL_LINE */
531
531
  }
532
532
  memcpy(&n2[0], npub, 3 * 4);
533
533
  n2[3] = 0x01000000;
@@ -646,7 +646,7 @@ crypto_aead_aes256gcm_decrypt_afternm(unsigned char *m, unsigned long long *mlen
646
646
 
647
647
  (void) nsec;
648
648
  if (clen > 16ULL * (1ULL << 32) - 16ULL) {
649
- abort();
649
+ abort(); /* LCOV_EXCL_LINE */
650
650
  }
651
651
  if (mlen_p != NULL) {
652
652
  *mlen_p = 0U;
@@ -0,0 +1,2233 @@
1
+ #include <stddef.h>
2
+ #include <stdint.h>
3
+ #include <string.h>
4
+ #include "curve25519_ref10.h"
5
+ #include "crypto_verify_32.h"
6
+
7
+ static uint64_t load_3(const unsigned char *in)
8
+ {
9
+ uint64_t result;
10
+ result = (uint64_t) in[0];
11
+ result |= ((uint64_t) in[1]) << 8;
12
+ result |= ((uint64_t) in[2]) << 16;
13
+ return result;
14
+ }
15
+
16
+ static uint64_t load_4(const unsigned char *in)
17
+ {
18
+ uint64_t result;
19
+ result = (uint64_t) in[0];
20
+ result |= ((uint64_t) in[1]) << 8;
21
+ result |= ((uint64_t) in[2]) << 16;
22
+ result |= ((uint64_t) in[3]) << 24;
23
+ return result;
24
+ }
25
+
26
+ /*
27
+ h = 0
28
+ */
29
+
30
+ void fe_0(fe h)
31
+ {
32
+ memset(&h[0], 0, 10 * sizeof h[0]);
33
+ }
34
+
35
+ /*
36
+ h = 1
37
+ */
38
+
39
+ void fe_1(fe h)
40
+ {
41
+ h[0] = 1;
42
+ h[1] = 0;
43
+ memset(&h[2], 0, 8 * sizeof h[0]);
44
+ }
45
+
46
+ /*
47
+ h = f + g
48
+ Can overlap h with f or g.
49
+
50
+ Preconditions:
51
+ |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
52
+ |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
53
+
54
+ Postconditions:
55
+ |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
56
+ */
57
+
58
+ void fe_add(fe h,const fe f,const fe g)
59
+ {
60
+ int32_t f0 = f[0];
61
+ int32_t f1 = f[1];
62
+ int32_t f2 = f[2];
63
+ int32_t f3 = f[3];
64
+ int32_t f4 = f[4];
65
+ int32_t f5 = f[5];
66
+ int32_t f6 = f[6];
67
+ int32_t f7 = f[7];
68
+ int32_t f8 = f[8];
69
+ int32_t f9 = f[9];
70
+ int32_t g0 = g[0];
71
+ int32_t g1 = g[1];
72
+ int32_t g2 = g[2];
73
+ int32_t g3 = g[3];
74
+ int32_t g4 = g[4];
75
+ int32_t g5 = g[5];
76
+ int32_t g6 = g[6];
77
+ int32_t g7 = g[7];
78
+ int32_t g8 = g[8];
79
+ int32_t g9 = g[9];
80
+ int32_t h0 = f0 + g0;
81
+ int32_t h1 = f1 + g1;
82
+ int32_t h2 = f2 + g2;
83
+ int32_t h3 = f3 + g3;
84
+ int32_t h4 = f4 + g4;
85
+ int32_t h5 = f5 + g5;
86
+ int32_t h6 = f6 + g6;
87
+ int32_t h7 = f7 + g7;
88
+ int32_t h8 = f8 + g8;
89
+ int32_t h9 = f9 + g9;
90
+ h[0] = h0;
91
+ h[1] = h1;
92
+ h[2] = h2;
93
+ h[3] = h3;
94
+ h[4] = h4;
95
+ h[5] = h5;
96
+ h[6] = h6;
97
+ h[7] = h7;
98
+ h[8] = h8;
99
+ h[9] = h9;
100
+ }
101
+
102
+ /*
103
+ Replace (f,g) with (g,g) if b == 1;
104
+ replace (f,g) with (f,g) if b == 0.
105
+
106
+ Preconditions: b in {0,1}.
107
+ */
108
+
109
+ void fe_cmov(fe f,const fe g,unsigned int b)
110
+ {
111
+ int32_t f0 = f[0];
112
+ int32_t f1 = f[1];
113
+ int32_t f2 = f[2];
114
+ int32_t f3 = f[3];
115
+ int32_t f4 = f[4];
116
+ int32_t f5 = f[5];
117
+ int32_t f6 = f[6];
118
+ int32_t f7 = f[7];
119
+ int32_t f8 = f[8];
120
+ int32_t f9 = f[9];
121
+ int32_t g0 = g[0];
122
+ int32_t g1 = g[1];
123
+ int32_t g2 = g[2];
124
+ int32_t g3 = g[3];
125
+ int32_t g4 = g[4];
126
+ int32_t g5 = g[5];
127
+ int32_t g6 = g[6];
128
+ int32_t g7 = g[7];
129
+ int32_t g8 = g[8];
130
+ int32_t g9 = g[9];
131
+ int32_t x0 = f0 ^ g0;
132
+ int32_t x1 = f1 ^ g1;
133
+ int32_t x2 = f2 ^ g2;
134
+ int32_t x3 = f3 ^ g3;
135
+ int32_t x4 = f4 ^ g4;
136
+ int32_t x5 = f5 ^ g5;
137
+ int32_t x6 = f6 ^ g6;
138
+ int32_t x7 = f7 ^ g7;
139
+ int32_t x8 = f8 ^ g8;
140
+ int32_t x9 = f9 ^ g9;
141
+ b = (unsigned int) (- (int) b);
142
+ x0 &= b;
143
+ x1 &= b;
144
+ x2 &= b;
145
+ x3 &= b;
146
+ x4 &= b;
147
+ x5 &= b;
148
+ x6 &= b;
149
+ x7 &= b;
150
+ x8 &= b;
151
+ x9 &= b;
152
+ f[0] = f0 ^ x0;
153
+ f[1] = f1 ^ x1;
154
+ f[2] = f2 ^ x2;
155
+ f[3] = f3 ^ x3;
156
+ f[4] = f4 ^ x4;
157
+ f[5] = f5 ^ x5;
158
+ f[6] = f6 ^ x6;
159
+ f[7] = f7 ^ x7;
160
+ f[8] = f8 ^ x8;
161
+ f[9] = f9 ^ x9;
162
+ }
163
+
164
+ /*
165
+ h = f
166
+ */
167
+
168
+ void fe_copy(fe h,const fe f)
169
+ {
170
+ int32_t f0 = f[0];
171
+ int32_t f1 = f[1];
172
+ int32_t f2 = f[2];
173
+ int32_t f3 = f[3];
174
+ int32_t f4 = f[4];
175
+ int32_t f5 = f[5];
176
+ int32_t f6 = f[6];
177
+ int32_t f7 = f[7];
178
+ int32_t f8 = f[8];
179
+ int32_t f9 = f[9];
180
+ h[0] = f0;
181
+ h[1] = f1;
182
+ h[2] = f2;
183
+ h[3] = f3;
184
+ h[4] = f4;
185
+ h[5] = f5;
186
+ h[6] = f6;
187
+ h[7] = f7;
188
+ h[8] = f8;
189
+ h[9] = f9;
190
+ }
191
+
192
+ /*
193
+ Ignores top bit of h.
194
+ */
195
+
196
+ void fe_frombytes(fe h,const unsigned char *s)
197
+ {
198
+ int64_t h0 = load_4(s);
199
+ int64_t h1 = load_3(s + 4) << 6;
200
+ int64_t h2 = load_3(s + 7) << 5;
201
+ int64_t h3 = load_3(s + 10) << 3;
202
+ int64_t h4 = load_3(s + 13) << 2;
203
+ int64_t h5 = load_4(s + 16);
204
+ int64_t h6 = load_3(s + 20) << 7;
205
+ int64_t h7 = load_3(s + 23) << 5;
206
+ int64_t h8 = load_3(s + 26) << 4;
207
+ int64_t h9 = (load_3(s + 29) & 8388607) << 2;
208
+ int64_t carry0;
209
+ int64_t carry1;
210
+ int64_t carry2;
211
+ int64_t carry3;
212
+ int64_t carry4;
213
+ int64_t carry5;
214
+ int64_t carry6;
215
+ int64_t carry7;
216
+ int64_t carry8;
217
+ int64_t carry9;
218
+
219
+ carry9 = (h9 + (int64_t) (1L << 24)) >> 25; h0 += carry9 * 19; h9 -= carry9 * ((uint64_t) 1L << 25);
220
+ carry1 = (h1 + (int64_t) (1L << 24)) >> 25; h2 += carry1; h1 -= carry1 * ((uint64_t) 1L << 25);
221
+ carry3 = (h3 + (int64_t) (1L << 24)) >> 25; h4 += carry3; h3 -= carry3 * ((uint64_t) 1L << 25);
222
+ carry5 = (h5 + (int64_t) (1L << 24)) >> 25; h6 += carry5; h5 -= carry5 * ((uint64_t) 1L << 25);
223
+ carry7 = (h7 + (int64_t) (1L << 24)) >> 25; h8 += carry7; h7 -= carry7 * ((uint64_t) 1L << 25);
224
+
225
+ carry0 = (h0 + (int64_t) (1L << 25)) >> 26; h1 += carry0; h0 -= carry0 * ((uint64_t) 1L << 26);
226
+ carry2 = (h2 + (int64_t) (1L << 25)) >> 26; h3 += carry2; h2 -= carry2 * ((uint64_t) 1L << 26);
227
+ carry4 = (h4 + (int64_t) (1L << 25)) >> 26; h5 += carry4; h4 -= carry4 * ((uint64_t) 1L << 26);
228
+ carry6 = (h6 + (int64_t) (1L << 25)) >> 26; h7 += carry6; h6 -= carry6 * ((uint64_t) 1L << 26);
229
+ carry8 = (h8 + (int64_t) (1L << 25)) >> 26; h9 += carry8; h8 -= carry8 * ((uint64_t) 1L << 26);
230
+
231
+ h[0] = (int32_t) h0;
232
+ h[1] = (int32_t) h1;
233
+ h[2] = (int32_t) h2;
234
+ h[3] = (int32_t) h3;
235
+ h[4] = (int32_t) h4;
236
+ h[5] = (int32_t) h5;
237
+ h[6] = (int32_t) h6;
238
+ h[7] = (int32_t) h7;
239
+ h[8] = (int32_t) h8;
240
+ h[9] = (int32_t) h9;
241
+ }
242
+
243
+ /*
244
+ Preconditions:
245
+ |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
246
+
247
+ Write p=2^255-19; q=floor(h/p).
248
+ Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
249
+
250
+ Proof:
251
+ Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
252
+ Also have |h-2^230 h9|<2^231 so |19 2^(-255)(h-2^230 h9)|<1/4.
253
+
254
+ Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
255
+ Then 0<y<1.
256
+
257
+ Write r=h-pq.
258
+ Have 0<=r<=p-1=2^255-20.
259
+ Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
260
+
261
+ Write x=r+19(2^-255)r+y.
262
+ Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
263
+
264
+ Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
265
+ so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
266
+ */
267
+
268
+ void fe_tobytes(unsigned char *s,const fe h)
269
+ {
270
+ int32_t h0 = h[0];
271
+ int32_t h1 = h[1];
272
+ int32_t h2 = h[2];
273
+ int32_t h3 = h[3];
274
+ int32_t h4 = h[4];
275
+ int32_t h5 = h[5];
276
+ int32_t h6 = h[6];
277
+ int32_t h7 = h[7];
278
+ int32_t h8 = h[8];
279
+ int32_t h9 = h[9];
280
+ int32_t q;
281
+ int32_t carry0;
282
+ int32_t carry1;
283
+ int32_t carry2;
284
+ int32_t carry3;
285
+ int32_t carry4;
286
+ int32_t carry5;
287
+ int32_t carry6;
288
+ int32_t carry7;
289
+ int32_t carry8;
290
+ int32_t carry9;
291
+
292
+ q = (19 * h9 + ((uint32_t) 1L << 24)) >> 25;
293
+ q = (h0 + q) >> 26;
294
+ q = (h1 + q) >> 25;
295
+ q = (h2 + q) >> 26;
296
+ q = (h3 + q) >> 25;
297
+ q = (h4 + q) >> 26;
298
+ q = (h5 + q) >> 25;
299
+ q = (h6 + q) >> 26;
300
+ q = (h7 + q) >> 25;
301
+ q = (h8 + q) >> 26;
302
+ q = (h9 + q) >> 25;
303
+
304
+ /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */
305
+ h0 += 19 * q;
306
+ /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */
307
+
308
+ carry0 = h0 >> 26; h1 += carry0; h0 -= carry0 * ((uint32_t) 1L << 26);
309
+ carry1 = h1 >> 25; h2 += carry1; h1 -= carry1 * ((uint32_t) 1L << 25);
310
+ carry2 = h2 >> 26; h3 += carry2; h2 -= carry2 * ((uint32_t) 1L << 26);
311
+ carry3 = h3 >> 25; h4 += carry3; h3 -= carry3 * ((uint32_t) 1L << 25);
312
+ carry4 = h4 >> 26; h5 += carry4; h4 -= carry4 * ((uint32_t) 1L << 26);
313
+ carry5 = h5 >> 25; h6 += carry5; h5 -= carry5 * ((uint32_t) 1L << 25);
314
+ carry6 = h6 >> 26; h7 += carry6; h6 -= carry6 * ((uint32_t) 1L << 26);
315
+ carry7 = h7 >> 25; h8 += carry7; h7 -= carry7 * ((uint32_t) 1L << 25);
316
+ carry8 = h8 >> 26; h9 += carry8; h8 -= carry8 * ((uint32_t) 1L << 26);
317
+ carry9 = h9 >> 25; h9 -= carry9 * ((uint32_t) 1L << 25);
318
+ /* h10 = carry9 */
319
+
320
+ /*
321
+ Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
322
+ Have h0+...+2^230 h9 between 0 and 2^255-1;
323
+ evidently 2^255 h10-2^255 q = 0.
324
+ Goal: Output h0+...+2^230 h9.
325
+ */
326
+
327
+ s[0] = h0 >> 0;
328
+ s[1] = h0 >> 8;
329
+ s[2] = h0 >> 16;
330
+ s[3] = (h0 >> 24) | (h1 * ((uint32_t) 1 << 2));
331
+ s[4] = h1 >> 6;
332
+ s[5] = h1 >> 14;
333
+ s[6] = (h1 >> 22) | (h2 * ((uint32_t) 1 << 3));
334
+ s[7] = h2 >> 5;
335
+ s[8] = h2 >> 13;
336
+ s[9] = (h2 >> 21) | (h3 * ((uint32_t) 1 << 5));
337
+ s[10] = h3 >> 3;
338
+ s[11] = h3 >> 11;
339
+ s[12] = (h3 >> 19) | (h4 * ((uint32_t) 1 << 6));
340
+ s[13] = h4 >> 2;
341
+ s[14] = h4 >> 10;
342
+ s[15] = h4 >> 18;
343
+ s[16] = h5 >> 0;
344
+ s[17] = h5 >> 8;
345
+ s[18] = h5 >> 16;
346
+ s[19] = (h5 >> 24) | (h6 * ((uint32_t) 1 << 1));
347
+ s[20] = h6 >> 7;
348
+ s[21] = h6 >> 15;
349
+ s[22] = (h6 >> 23) | (h7 * ((uint32_t) 1 << 3));
350
+ s[23] = h7 >> 5;
351
+ s[24] = h7 >> 13;
352
+ s[25] = (h7 >> 21) | (h8 * ((uint32_t) 1 << 4));
353
+ s[26] = h8 >> 4;
354
+ s[27] = h8 >> 12;
355
+ s[28] = (h8 >> 20) | (h9 * ((uint32_t) 1 << 6));
356
+ s[29] = h9 >> 2;
357
+ s[30] = h9 >> 10;
358
+ s[31] = h9 >> 18;
359
+ }
360
+
361
+ /*
362
+ return 1 if f is in {1,3,5,...,q-2}
363
+ return 0 if f is in {0,2,4,...,q-1}
364
+
365
+ Preconditions:
366
+ |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
367
+ */
368
+
369
+ int fe_isnegative(const fe f)
370
+ {
371
+ unsigned char s[32];
372
+ fe_tobytes(s,f);
373
+ return s[0] & 1;
374
+ }
375
+
376
+ /*
377
+ return 1 if f == 0
378
+ return 0 if f != 0
379
+
380
+ Preconditions:
381
+ |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
382
+ */
383
+
384
+ static unsigned char zero[32];
385
+
386
+ int fe_isnonzero(const fe f)
387
+ {
388
+ unsigned char s[32];
389
+ fe_tobytes(s,f);
390
+ return crypto_verify_32(s,zero);
391
+ }
392
+
393
+ /*
394
+ h = f * g
395
+ Can overlap h with f or g.
396
+
397
+ Preconditions:
398
+ |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
399
+ |g| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
400
+
401
+ Postconditions:
402
+ |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
403
+ */
404
+
405
+ /*
406
+ Notes on implementation strategy:
407
+
408
+ Using schoolbook multiplication.
409
+ Karatsuba would save a little in some cost models.
410
+
411
+ Most multiplications by 2 and 19 are 32-bit precomputations;
412
+ cheaper than 64-bit postcomputations.
413
+
414
+ There is one remaining multiplication by 19 in the carry chain;
415
+ one *19 precomputation can be merged into this,
416
+ but the resulting data flow is considerably less clean.
417
+
418
+ There are 12 carries below.
419
+ 10 of them are 2-way parallelizable and vectorizable.
420
+ Can get away with 11 carries, but then data flow is much deeper.
421
+
422
+ With tighter constraints on inputs can squeeze carries into int32.
423
+ */
424
+
425
+ void fe_mul(fe h,const fe f,const fe g)
426
+ {
427
+ int32_t f0 = f[0];
428
+ int32_t f1 = f[1];
429
+ int32_t f2 = f[2];
430
+ int32_t f3 = f[3];
431
+ int32_t f4 = f[4];
432
+ int32_t f5 = f[5];
433
+ int32_t f6 = f[6];
434
+ int32_t f7 = f[7];
435
+ int32_t f8 = f[8];
436
+ int32_t f9 = f[9];
437
+ int32_t g0 = g[0];
438
+ int32_t g1 = g[1];
439
+ int32_t g2 = g[2];
440
+ int32_t g3 = g[3];
441
+ int32_t g4 = g[4];
442
+ int32_t g5 = g[5];
443
+ int32_t g6 = g[6];
444
+ int32_t g7 = g[7];
445
+ int32_t g8 = g[8];
446
+ int32_t g9 = g[9];
447
+ int32_t g1_19 = 19 * g1; /* 1.959375*2^29 */
448
+ int32_t g2_19 = 19 * g2; /* 1.959375*2^30; still ok */
449
+ int32_t g3_19 = 19 * g3;
450
+ int32_t g4_19 = 19 * g4;
451
+ int32_t g5_19 = 19 * g5;
452
+ int32_t g6_19 = 19 * g6;
453
+ int32_t g7_19 = 19 * g7;
454
+ int32_t g8_19 = 19 * g8;
455
+ int32_t g9_19 = 19 * g9;
456
+ int32_t f1_2 = 2 * f1;
457
+ int32_t f3_2 = 2 * f3;
458
+ int32_t f5_2 = 2 * f5;
459
+ int32_t f7_2 = 2 * f7;
460
+ int32_t f9_2 = 2 * f9;
461
+ int64_t f0g0 = f0 * (int64_t) g0;
462
+ int64_t f0g1 = f0 * (int64_t) g1;
463
+ int64_t f0g2 = f0 * (int64_t) g2;
464
+ int64_t f0g3 = f0 * (int64_t) g3;
465
+ int64_t f0g4 = f0 * (int64_t) g4;
466
+ int64_t f0g5 = f0 * (int64_t) g5;
467
+ int64_t f0g6 = f0 * (int64_t) g6;
468
+ int64_t f0g7 = f0 * (int64_t) g7;
469
+ int64_t f0g8 = f0 * (int64_t) g8;
470
+ int64_t f0g9 = f0 * (int64_t) g9;
471
+ int64_t f1g0 = f1 * (int64_t) g0;
472
+ int64_t f1g1_2 = f1_2 * (int64_t) g1;
473
+ int64_t f1g2 = f1 * (int64_t) g2;
474
+ int64_t f1g3_2 = f1_2 * (int64_t) g3;
475
+ int64_t f1g4 = f1 * (int64_t) g4;
476
+ int64_t f1g5_2 = f1_2 * (int64_t) g5;
477
+ int64_t f1g6 = f1 * (int64_t) g6;
478
+ int64_t f1g7_2 = f1_2 * (int64_t) g7;
479
+ int64_t f1g8 = f1 * (int64_t) g8;
480
+ int64_t f1g9_38 = f1_2 * (int64_t) g9_19;
481
+ int64_t f2g0 = f2 * (int64_t) g0;
482
+ int64_t f2g1 = f2 * (int64_t) g1;
483
+ int64_t f2g2 = f2 * (int64_t) g2;
484
+ int64_t f2g3 = f2 * (int64_t) g3;
485
+ int64_t f2g4 = f2 * (int64_t) g4;
486
+ int64_t f2g5 = f2 * (int64_t) g5;
487
+ int64_t f2g6 = f2 * (int64_t) g6;
488
+ int64_t f2g7 = f2 * (int64_t) g7;
489
+ int64_t f2g8_19 = f2 * (int64_t) g8_19;
490
+ int64_t f2g9_19 = f2 * (int64_t) g9_19;
491
+ int64_t f3g0 = f3 * (int64_t) g0;
492
+ int64_t f3g1_2 = f3_2 * (int64_t) g1;
493
+ int64_t f3g2 = f3 * (int64_t) g2;
494
+ int64_t f3g3_2 = f3_2 * (int64_t) g3;
495
+ int64_t f3g4 = f3 * (int64_t) g4;
496
+ int64_t f3g5_2 = f3_2 * (int64_t) g5;
497
+ int64_t f3g6 = f3 * (int64_t) g6;
498
+ int64_t f3g7_38 = f3_2 * (int64_t) g7_19;
499
+ int64_t f3g8_19 = f3 * (int64_t) g8_19;
500
+ int64_t f3g9_38 = f3_2 * (int64_t) g9_19;
501
+ int64_t f4g0 = f4 * (int64_t) g0;
502
+ int64_t f4g1 = f4 * (int64_t) g1;
503
+ int64_t f4g2 = f4 * (int64_t) g2;
504
+ int64_t f4g3 = f4 * (int64_t) g3;
505
+ int64_t f4g4 = f4 * (int64_t) g4;
506
+ int64_t f4g5 = f4 * (int64_t) g5;
507
+ int64_t f4g6_19 = f4 * (int64_t) g6_19;
508
+ int64_t f4g7_19 = f4 * (int64_t) g7_19;
509
+ int64_t f4g8_19 = f4 * (int64_t) g8_19;
510
+ int64_t f4g9_19 = f4 * (int64_t) g9_19;
511
+ int64_t f5g0 = f5 * (int64_t) g0;
512
+ int64_t f5g1_2 = f5_2 * (int64_t) g1;
513
+ int64_t f5g2 = f5 * (int64_t) g2;
514
+ int64_t f5g3_2 = f5_2 * (int64_t) g3;
515
+ int64_t f5g4 = f5 * (int64_t) g4;
516
+ int64_t f5g5_38 = f5_2 * (int64_t) g5_19;
517
+ int64_t f5g6_19 = f5 * (int64_t) g6_19;
518
+ int64_t f5g7_38 = f5_2 * (int64_t) g7_19;
519
+ int64_t f5g8_19 = f5 * (int64_t) g8_19;
520
+ int64_t f5g9_38 = f5_2 * (int64_t) g9_19;
521
+ int64_t f6g0 = f6 * (int64_t) g0;
522
+ int64_t f6g1 = f6 * (int64_t) g1;
523
+ int64_t f6g2 = f6 * (int64_t) g2;
524
+ int64_t f6g3 = f6 * (int64_t) g3;
525
+ int64_t f6g4_19 = f6 * (int64_t) g4_19;
526
+ int64_t f6g5_19 = f6 * (int64_t) g5_19;
527
+ int64_t f6g6_19 = f6 * (int64_t) g6_19;
528
+ int64_t f6g7_19 = f6 * (int64_t) g7_19;
529
+ int64_t f6g8_19 = f6 * (int64_t) g8_19;
530
+ int64_t f6g9_19 = f6 * (int64_t) g9_19;
531
+ int64_t f7g0 = f7 * (int64_t) g0;
532
+ int64_t f7g1_2 = f7_2 * (int64_t) g1;
533
+ int64_t f7g2 = f7 * (int64_t) g2;
534
+ int64_t f7g3_38 = f7_2 * (int64_t) g3_19;
535
+ int64_t f7g4_19 = f7 * (int64_t) g4_19;
536
+ int64_t f7g5_38 = f7_2 * (int64_t) g5_19;
537
+ int64_t f7g6_19 = f7 * (int64_t) g6_19;
538
+ int64_t f7g7_38 = f7_2 * (int64_t) g7_19;
539
+ int64_t f7g8_19 = f7 * (int64_t) g8_19;
540
+ int64_t f7g9_38 = f7_2 * (int64_t) g9_19;
541
+ int64_t f8g0 = f8 * (int64_t) g0;
542
+ int64_t f8g1 = f8 * (int64_t) g1;
543
+ int64_t f8g2_19 = f8 * (int64_t) g2_19;
544
+ int64_t f8g3_19 = f8 * (int64_t) g3_19;
545
+ int64_t f8g4_19 = f8 * (int64_t) g4_19;
546
+ int64_t f8g5_19 = f8 * (int64_t) g5_19;
547
+ int64_t f8g6_19 = f8 * (int64_t) g6_19;
548
+ int64_t f8g7_19 = f8 * (int64_t) g7_19;
549
+ int64_t f8g8_19 = f8 * (int64_t) g8_19;
550
+ int64_t f8g9_19 = f8 * (int64_t) g9_19;
551
+ int64_t f9g0 = f9 * (int64_t) g0;
552
+ int64_t f9g1_38 = f9_2 * (int64_t) g1_19;
553
+ int64_t f9g2_19 = f9 * (int64_t) g2_19;
554
+ int64_t f9g3_38 = f9_2 * (int64_t) g3_19;
555
+ int64_t f9g4_19 = f9 * (int64_t) g4_19;
556
+ int64_t f9g5_38 = f9_2 * (int64_t) g5_19;
557
+ int64_t f9g6_19 = f9 * (int64_t) g6_19;
558
+ int64_t f9g7_38 = f9_2 * (int64_t) g7_19;
559
+ int64_t f9g8_19 = f9 * (int64_t) g8_19;
560
+ int64_t f9g9_38 = f9_2 * (int64_t) g9_19;
561
+ int64_t h0 = f0g0+f1g9_38+f2g8_19+f3g7_38+f4g6_19+f5g5_38+f6g4_19+f7g3_38+f8g2_19+f9g1_38;
562
+ int64_t h1 = f0g1+f1g0 +f2g9_19+f3g8_19+f4g7_19+f5g6_19+f6g5_19+f7g4_19+f8g3_19+f9g2_19;
563
+ int64_t h2 = f0g2+f1g1_2 +f2g0 +f3g9_38+f4g8_19+f5g7_38+f6g6_19+f7g5_38+f8g4_19+f9g3_38;
564
+ int64_t h3 = f0g3+f1g2 +f2g1 +f3g0 +f4g9_19+f5g8_19+f6g7_19+f7g6_19+f8g5_19+f9g4_19;
565
+ int64_t h4 = f0g4+f1g3_2 +f2g2 +f3g1_2 +f4g0 +f5g9_38+f6g8_19+f7g7_38+f8g6_19+f9g5_38;
566
+ int64_t h5 = f0g5+f1g4 +f2g3 +f3g2 +f4g1 +f5g0 +f6g9_19+f7g8_19+f8g7_19+f9g6_19;
567
+ int64_t h6 = f0g6+f1g5_2 +f2g4 +f3g3_2 +f4g2 +f5g1_2 +f6g0 +f7g9_38+f8g8_19+f9g7_38;
568
+ int64_t h7 = f0g7+f1g6 +f2g5 +f3g4 +f4g3 +f5g2 +f6g1 +f7g0 +f8g9_19+f9g8_19;
569
+ int64_t h8 = f0g8+f1g7_2 +f2g6 +f3g5_2 +f4g4 +f5g3_2 +f6g2 +f7g1_2 +f8g0 +f9g9_38;
570
+ int64_t h9 = f0g9+f1g8 +f2g7 +f3g6 +f4g5 +f5g4 +f6g3 +f7g2 +f8g1 +f9g0 ;
571
+ int64_t carry0;
572
+ int64_t carry1;
573
+ int64_t carry2;
574
+ int64_t carry3;
575
+ int64_t carry4;
576
+ int64_t carry5;
577
+ int64_t carry6;
578
+ int64_t carry7;
579
+ int64_t carry8;
580
+ int64_t carry9;
581
+
582
+ /*
583
+ |h0| <= (1.65*1.65*2^52*(1+19+19+19+19)+1.65*1.65*2^50*(38+38+38+38+38))
584
+ i.e. |h0| <= 1.4*2^60; narrower ranges for h2, h4, h6, h8
585
+ |h1| <= (1.65*1.65*2^51*(1+1+19+19+19+19+19+19+19+19))
586
+ i.e. |h1| <= 1.7*2^59; narrower ranges for h3, h5, h7, h9
587
+ */
588
+
589
+ carry0 = (h0 + (int64_t) (1L << 25)) >> 26; h1 += carry0; h0 -= carry0 * ((uint64_t) 1L << 26);
590
+ carry4 = (h4 + (int64_t) (1L << 25)) >> 26; h5 += carry4; h4 -= carry4 * ((uint64_t) 1L << 26);
591
+ /* |h0| <= 2^25 */
592
+ /* |h4| <= 2^25 */
593
+ /* |h1| <= 1.71*2^59 */
594
+ /* |h5| <= 1.71*2^59 */
595
+
596
+ carry1 = (h1 + (int64_t) (1L << 24)) >> 25; h2 += carry1; h1 -= carry1 * ((uint64_t) 1L << 25);
597
+ carry5 = (h5 + (int64_t) (1L << 24)) >> 25; h6 += carry5; h5 -= carry5 * ((uint64_t) 1L << 25);
598
+ /* |h1| <= 2^24; from now on fits into int32 */
599
+ /* |h5| <= 2^24; from now on fits into int32 */
600
+ /* |h2| <= 1.41*2^60 */
601
+ /* |h6| <= 1.41*2^60 */
602
+
603
+ carry2 = (h2 + (int64_t) (1L << 25)) >> 26; h3 += carry2; h2 -= carry2 * ((uint64_t) 1L << 26);
604
+ carry6 = (h6 + (int64_t) (1L << 25)) >> 26; h7 += carry6; h6 -= carry6 * ((uint64_t) 1L << 26);
605
+ /* |h2| <= 2^25; from now on fits into int32 unchanged */
606
+ /* |h6| <= 2^25; from now on fits into int32 unchanged */
607
+ /* |h3| <= 1.71*2^59 */
608
+ /* |h7| <= 1.71*2^59 */
609
+
610
+ carry3 = (h3 + (int64_t) (1L << 24)) >> 25; h4 += carry3; h3 -= carry3 * ((uint64_t) 1L << 25);
611
+ carry7 = (h7 + (int64_t) (1L << 24)) >> 25; h8 += carry7; h7 -= carry7 * ((uint64_t) 1L << 25);
612
+ /* |h3| <= 2^24; from now on fits into int32 unchanged */
613
+ /* |h7| <= 2^24; from now on fits into int32 unchanged */
614
+ /* |h4| <= 1.72*2^34 */
615
+ /* |h8| <= 1.41*2^60 */
616
+
617
+ carry4 = (h4 + (int64_t) (1L << 25)) >> 26; h5 += carry4; h4 -= carry4 * ((uint64_t) 1L << 26);
618
+ carry8 = (h8 + (int64_t) (1L << 25)) >> 26; h9 += carry8; h8 -= carry8 * ((uint64_t) 1L << 26);
619
+ /* |h4| <= 2^25; from now on fits into int32 unchanged */
620
+ /* |h8| <= 2^25; from now on fits into int32 unchanged */
621
+ /* |h5| <= 1.01*2^24 */
622
+ /* |h9| <= 1.71*2^59 */
623
+
624
+ carry9 = (h9 + (int64_t) (1L << 24)) >> 25; h0 += carry9 * 19; h9 -= carry9 * ((uint64_t) 1L << 25);
625
+ /* |h9| <= 2^24; from now on fits into int32 unchanged */
626
+ /* |h0| <= 1.1*2^39 */
627
+
628
+ carry0 = (h0 + (int64_t) (1L << 25)) >> 26; h1 += carry0; h0 -= carry0 * ((uint64_t) 1L << 26);
629
+ /* |h0| <= 2^25; from now on fits into int32 unchanged */
630
+ /* |h1| <= 1.01*2^24 */
631
+
632
+ h[0] = (int32_t) h0;
633
+ h[1] = (int32_t) h1;
634
+ h[2] = (int32_t) h2;
635
+ h[3] = (int32_t) h3;
636
+ h[4] = (int32_t) h4;
637
+ h[5] = (int32_t) h5;
638
+ h[6] = (int32_t) h6;
639
+ h[7] = (int32_t) h7;
640
+ h[8] = (int32_t) h8;
641
+ h[9] = (int32_t) h9;
642
+ }
643
+
644
+ /*
645
+ h = -f
646
+
647
+ Preconditions:
648
+ |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
649
+
650
+ Postconditions:
651
+ |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
652
+ */
653
+
654
+ void fe_neg(fe h,const fe f)
655
+ {
656
+ int32_t f0 = f[0];
657
+ int32_t f1 = f[1];
658
+ int32_t f2 = f[2];
659
+ int32_t f3 = f[3];
660
+ int32_t f4 = f[4];
661
+ int32_t f5 = f[5];
662
+ int32_t f6 = f[6];
663
+ int32_t f7 = f[7];
664
+ int32_t f8 = f[8];
665
+ int32_t f9 = f[9];
666
+ int32_t h0 = -f0;
667
+ int32_t h1 = -f1;
668
+ int32_t h2 = -f2;
669
+ int32_t h3 = -f3;
670
+ int32_t h4 = -f4;
671
+ int32_t h5 = -f5;
672
+ int32_t h6 = -f6;
673
+ int32_t h7 = -f7;
674
+ int32_t h8 = -f8;
675
+ int32_t h9 = -f9;
676
+ h[0] = h0;
677
+ h[1] = h1;
678
+ h[2] = h2;
679
+ h[3] = h3;
680
+ h[4] = h4;
681
+ h[5] = h5;
682
+ h[6] = h6;
683
+ h[7] = h7;
684
+ h[8] = h8;
685
+ h[9] = h9;
686
+ }
687
+
688
+ /*
689
+ h = f * f
690
+ Can overlap h with f.
691
+
692
+ Preconditions:
693
+ |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
694
+
695
+ Postconditions:
696
+ |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
697
+ */
698
+
699
+ /*
700
+ See fe_mul.c for discussion of implementation strategy.
701
+ */
702
+
703
+ void fe_sq(fe h,const fe f)
704
+ {
705
+ int32_t f0 = f[0];
706
+ int32_t f1 = f[1];
707
+ int32_t f2 = f[2];
708
+ int32_t f3 = f[3];
709
+ int32_t f4 = f[4];
710
+ int32_t f5 = f[5];
711
+ int32_t f6 = f[6];
712
+ int32_t f7 = f[7];
713
+ int32_t f8 = f[8];
714
+ int32_t f9 = f[9];
715
+ int32_t f0_2 = 2 * f0;
716
+ int32_t f1_2 = 2 * f1;
717
+ int32_t f2_2 = 2 * f2;
718
+ int32_t f3_2 = 2 * f3;
719
+ int32_t f4_2 = 2 * f4;
720
+ int32_t f5_2 = 2 * f5;
721
+ int32_t f6_2 = 2 * f6;
722
+ int32_t f7_2 = 2 * f7;
723
+ int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
724
+ int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
725
+ int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
726
+ int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
727
+ int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
728
+ int64_t f0f0 = f0 * (int64_t) f0;
729
+ int64_t f0f1_2 = f0_2 * (int64_t) f1;
730
+ int64_t f0f2_2 = f0_2 * (int64_t) f2;
731
+ int64_t f0f3_2 = f0_2 * (int64_t) f3;
732
+ int64_t f0f4_2 = f0_2 * (int64_t) f4;
733
+ int64_t f0f5_2 = f0_2 * (int64_t) f5;
734
+ int64_t f0f6_2 = f0_2 * (int64_t) f6;
735
+ int64_t f0f7_2 = f0_2 * (int64_t) f7;
736
+ int64_t f0f8_2 = f0_2 * (int64_t) f8;
737
+ int64_t f0f9_2 = f0_2 * (int64_t) f9;
738
+ int64_t f1f1_2 = f1_2 * (int64_t) f1;
739
+ int64_t f1f2_2 = f1_2 * (int64_t) f2;
740
+ int64_t f1f3_4 = f1_2 * (int64_t) f3_2;
741
+ int64_t f1f4_2 = f1_2 * (int64_t) f4;
742
+ int64_t f1f5_4 = f1_2 * (int64_t) f5_2;
743
+ int64_t f1f6_2 = f1_2 * (int64_t) f6;
744
+ int64_t f1f7_4 = f1_2 * (int64_t) f7_2;
745
+ int64_t f1f8_2 = f1_2 * (int64_t) f8;
746
+ int64_t f1f9_76 = f1_2 * (int64_t) f9_38;
747
+ int64_t f2f2 = f2 * (int64_t) f2;
748
+ int64_t f2f3_2 = f2_2 * (int64_t) f3;
749
+ int64_t f2f4_2 = f2_2 * (int64_t) f4;
750
+ int64_t f2f5_2 = f2_2 * (int64_t) f5;
751
+ int64_t f2f6_2 = f2_2 * (int64_t) f6;
752
+ int64_t f2f7_2 = f2_2 * (int64_t) f7;
753
+ int64_t f2f8_38 = f2_2 * (int64_t) f8_19;
754
+ int64_t f2f9_38 = f2 * (int64_t) f9_38;
755
+ int64_t f3f3_2 = f3_2 * (int64_t) f3;
756
+ int64_t f3f4_2 = f3_2 * (int64_t) f4;
757
+ int64_t f3f5_4 = f3_2 * (int64_t) f5_2;
758
+ int64_t f3f6_2 = f3_2 * (int64_t) f6;
759
+ int64_t f3f7_76 = f3_2 * (int64_t) f7_38;
760
+ int64_t f3f8_38 = f3_2 * (int64_t) f8_19;
761
+ int64_t f3f9_76 = f3_2 * (int64_t) f9_38;
762
+ int64_t f4f4 = f4 * (int64_t) f4;
763
+ int64_t f4f5_2 = f4_2 * (int64_t) f5;
764
+ int64_t f4f6_38 = f4_2 * (int64_t) f6_19;
765
+ int64_t f4f7_38 = f4 * (int64_t) f7_38;
766
+ int64_t f4f8_38 = f4_2 * (int64_t) f8_19;
767
+ int64_t f4f9_38 = f4 * (int64_t) f9_38;
768
+ int64_t f5f5_38 = f5 * (int64_t) f5_38;
769
+ int64_t f5f6_38 = f5_2 * (int64_t) f6_19;
770
+ int64_t f5f7_76 = f5_2 * (int64_t) f7_38;
771
+ int64_t f5f8_38 = f5_2 * (int64_t) f8_19;
772
+ int64_t f5f9_76 = f5_2 * (int64_t) f9_38;
773
+ int64_t f6f6_19 = f6 * (int64_t) f6_19;
774
+ int64_t f6f7_38 = f6 * (int64_t) f7_38;
775
+ int64_t f6f8_38 = f6_2 * (int64_t) f8_19;
776
+ int64_t f6f9_38 = f6 * (int64_t) f9_38;
777
+ int64_t f7f7_38 = f7 * (int64_t) f7_38;
778
+ int64_t f7f8_38 = f7_2 * (int64_t) f8_19;
779
+ int64_t f7f9_76 = f7_2 * (int64_t) f9_38;
780
+ int64_t f8f8_19 = f8 * (int64_t) f8_19;
781
+ int64_t f8f9_38 = f8 * (int64_t) f9_38;
782
+ int64_t f9f9_38 = f9 * (int64_t) f9_38;
783
+ int64_t h0 = f0f0 +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38;
784
+ int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38;
785
+ int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19;
786
+ int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38;
787
+ int64_t h4 = f0f4_2+f1f3_4 +f2f2 +f5f9_76+f6f8_38+f7f7_38;
788
+ int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38;
789
+ int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19;
790
+ int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38;
791
+ int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4 +f9f9_38;
792
+ int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2;
793
+ int64_t carry0;
794
+ int64_t carry1;
795
+ int64_t carry2;
796
+ int64_t carry3;
797
+ int64_t carry4;
798
+ int64_t carry5;
799
+ int64_t carry6;
800
+ int64_t carry7;
801
+ int64_t carry8;
802
+ int64_t carry9;
803
+
804
+ carry0 = (h0 + (int64_t) (1L << 25)) >> 26; h1 += carry0; h0 -= carry0 * ((uint64_t) 1L << 26);
805
+ carry4 = (h4 + (int64_t) (1L << 25)) >> 26; h5 += carry4; h4 -= carry4 * ((uint64_t) 1L << 26);
806
+
807
+ carry1 = (h1 + (int64_t) (1L << 24)) >> 25; h2 += carry1; h1 -= carry1 * ((uint64_t) 1L << 25);
808
+ carry5 = (h5 + (int64_t) (1L << 24)) >> 25; h6 += carry5; h5 -= carry5 * ((uint64_t) 1L << 25);
809
+
810
+ carry2 = (h2 + (int64_t) (1L << 25)) >> 26; h3 += carry2; h2 -= carry2 * ((uint64_t) 1L << 26);
811
+ carry6 = (h6 + (int64_t) (1L << 25)) >> 26; h7 += carry6; h6 -= carry6 * ((uint64_t) 1L << 26);
812
+
813
+ carry3 = (h3 + (int64_t) (1L << 24)) >> 25; h4 += carry3; h3 -= carry3 * ((uint64_t) 1L << 25);
814
+ carry7 = (h7 + (int64_t) (1L << 24)) >> 25; h8 += carry7; h7 -= carry7 * ((uint64_t) 1L << 25);
815
+
816
+ carry4 = (h4 + (int64_t) (1L << 25)) >> 26; h5 += carry4; h4 -= carry4 * ((uint64_t) 1L << 26);
817
+ carry8 = (h8 + (int64_t) (1L << 25)) >> 26; h9 += carry8; h8 -= carry8 * ((uint64_t) 1L << 26);
818
+
819
+ carry9 = (h9 + (int64_t) (1L << 24)) >> 25; h0 += carry9 * 19; h9 -= carry9 * ((uint64_t) 1L << 25);
820
+
821
+ carry0 = (h0 + (int64_t) (1L << 25)) >> 26; h1 += carry0; h0 -= carry0 * ((uint64_t) 1L << 26);
822
+
823
+ h[0] = (int32_t) h0;
824
+ h[1] = (int32_t) h1;
825
+ h[2] = (int32_t) h2;
826
+ h[3] = (int32_t) h3;
827
+ h[4] = (int32_t) h4;
828
+ h[5] = (int32_t) h5;
829
+ h[6] = (int32_t) h6;
830
+ h[7] = (int32_t) h7;
831
+ h[8] = (int32_t) h8;
832
+ h[9] = (int32_t) h9;
833
+ }
834
+
835
+ /*
836
+ h = 2 * f * f
837
+ Can overlap h with f.
838
+
839
+ Preconditions:
840
+ |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
841
+
842
+ Postconditions:
843
+ |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
844
+ */
845
+
846
+ /*
847
+ See fe_mul.c for discussion of implementation strategy.
848
+ */
849
+
850
+ void fe_sq2(fe h,const fe f)
851
+ {
852
+ int32_t f0 = f[0];
853
+ int32_t f1 = f[1];
854
+ int32_t f2 = f[2];
855
+ int32_t f3 = f[3];
856
+ int32_t f4 = f[4];
857
+ int32_t f5 = f[5];
858
+ int32_t f6 = f[6];
859
+ int32_t f7 = f[7];
860
+ int32_t f8 = f[8];
861
+ int32_t f9 = f[9];
862
+ int32_t f0_2 = 2 * f0;
863
+ int32_t f1_2 = 2 * f1;
864
+ int32_t f2_2 = 2 * f2;
865
+ int32_t f3_2 = 2 * f3;
866
+ int32_t f4_2 = 2 * f4;
867
+ int32_t f5_2 = 2 * f5;
868
+ int32_t f6_2 = 2 * f6;
869
+ int32_t f7_2 = 2 * f7;
870
+ int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
871
+ int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
872
+ int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
873
+ int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
874
+ int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
875
+ int64_t f0f0 = f0 * (int64_t) f0;
876
+ int64_t f0f1_2 = f0_2 * (int64_t) f1;
877
+ int64_t f0f2_2 = f0_2 * (int64_t) f2;
878
+ int64_t f0f3_2 = f0_2 * (int64_t) f3;
879
+ int64_t f0f4_2 = f0_2 * (int64_t) f4;
880
+ int64_t f0f5_2 = f0_2 * (int64_t) f5;
881
+ int64_t f0f6_2 = f0_2 * (int64_t) f6;
882
+ int64_t f0f7_2 = f0_2 * (int64_t) f7;
883
+ int64_t f0f8_2 = f0_2 * (int64_t) f8;
884
+ int64_t f0f9_2 = f0_2 * (int64_t) f9;
885
+ int64_t f1f1_2 = f1_2 * (int64_t) f1;
886
+ int64_t f1f2_2 = f1_2 * (int64_t) f2;
887
+ int64_t f1f3_4 = f1_2 * (int64_t) f3_2;
888
+ int64_t f1f4_2 = f1_2 * (int64_t) f4;
889
+ int64_t f1f5_4 = f1_2 * (int64_t) f5_2;
890
+ int64_t f1f6_2 = f1_2 * (int64_t) f6;
891
+ int64_t f1f7_4 = f1_2 * (int64_t) f7_2;
892
+ int64_t f1f8_2 = f1_2 * (int64_t) f8;
893
+ int64_t f1f9_76 = f1_2 * (int64_t) f9_38;
894
+ int64_t f2f2 = f2 * (int64_t) f2;
895
+ int64_t f2f3_2 = f2_2 * (int64_t) f3;
896
+ int64_t f2f4_2 = f2_2 * (int64_t) f4;
897
+ int64_t f2f5_2 = f2_2 * (int64_t) f5;
898
+ int64_t f2f6_2 = f2_2 * (int64_t) f6;
899
+ int64_t f2f7_2 = f2_2 * (int64_t) f7;
900
+ int64_t f2f8_38 = f2_2 * (int64_t) f8_19;
901
+ int64_t f2f9_38 = f2 * (int64_t) f9_38;
902
+ int64_t f3f3_2 = f3_2 * (int64_t) f3;
903
+ int64_t f3f4_2 = f3_2 * (int64_t) f4;
904
+ int64_t f3f5_4 = f3_2 * (int64_t) f5_2;
905
+ int64_t f3f6_2 = f3_2 * (int64_t) f6;
906
+ int64_t f3f7_76 = f3_2 * (int64_t) f7_38;
907
+ int64_t f3f8_38 = f3_2 * (int64_t) f8_19;
908
+ int64_t f3f9_76 = f3_2 * (int64_t) f9_38;
909
+ int64_t f4f4 = f4 * (int64_t) f4;
910
+ int64_t f4f5_2 = f4_2 * (int64_t) f5;
911
+ int64_t f4f6_38 = f4_2 * (int64_t) f6_19;
912
+ int64_t f4f7_38 = f4 * (int64_t) f7_38;
913
+ int64_t f4f8_38 = f4_2 * (int64_t) f8_19;
914
+ int64_t f4f9_38 = f4 * (int64_t) f9_38;
915
+ int64_t f5f5_38 = f5 * (int64_t) f5_38;
916
+ int64_t f5f6_38 = f5_2 * (int64_t) f6_19;
917
+ int64_t f5f7_76 = f5_2 * (int64_t) f7_38;
918
+ int64_t f5f8_38 = f5_2 * (int64_t) f8_19;
919
+ int64_t f5f9_76 = f5_2 * (int64_t) f9_38;
920
+ int64_t f6f6_19 = f6 * (int64_t) f6_19;
921
+ int64_t f6f7_38 = f6 * (int64_t) f7_38;
922
+ int64_t f6f8_38 = f6_2 * (int64_t) f8_19;
923
+ int64_t f6f9_38 = f6 * (int64_t) f9_38;
924
+ int64_t f7f7_38 = f7 * (int64_t) f7_38;
925
+ int64_t f7f8_38 = f7_2 * (int64_t) f8_19;
926
+ int64_t f7f9_76 = f7_2 * (int64_t) f9_38;
927
+ int64_t f8f8_19 = f8 * (int64_t) f8_19;
928
+ int64_t f8f9_38 = f8 * (int64_t) f9_38;
929
+ int64_t f9f9_38 = f9 * (int64_t) f9_38;
930
+ int64_t h0 = f0f0 +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38;
931
+ int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38;
932
+ int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19;
933
+ int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38;
934
+ int64_t h4 = f0f4_2+f1f3_4 +f2f2 +f5f9_76+f6f8_38+f7f7_38;
935
+ int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38;
936
+ int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19;
937
+ int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38;
938
+ int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4 +f9f9_38;
939
+ int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2;
940
+ int64_t carry0;
941
+ int64_t carry1;
942
+ int64_t carry2;
943
+ int64_t carry3;
944
+ int64_t carry4;
945
+ int64_t carry5;
946
+ int64_t carry6;
947
+ int64_t carry7;
948
+ int64_t carry8;
949
+ int64_t carry9;
950
+
951
+ h0 += h0;
952
+ h1 += h1;
953
+ h2 += h2;
954
+ h3 += h3;
955
+ h4 += h4;
956
+ h5 += h5;
957
+ h6 += h6;
958
+ h7 += h7;
959
+ h8 += h8;
960
+ h9 += h9;
961
+
962
+ carry0 = (h0 + (int64_t) (1L << 25)) >> 26; h1 += carry0; h0 -= carry0 * ((uint64_t) 1L << 26);
963
+ carry4 = (h4 + (int64_t) (1L << 25)) >> 26; h5 += carry4; h4 -= carry4 * ((uint64_t) 1L << 26);
964
+
965
+ carry1 = (h1 + (int64_t) (1L << 24)) >> 25; h2 += carry1; h1 -= carry1 * ((uint64_t) 1L << 25);
966
+ carry5 = (h5 + (int64_t) (1L << 24)) >> 25; h6 += carry5; h5 -= carry5 * ((uint64_t) 1L << 25);
967
+
968
+ carry2 = (h2 + (int64_t) (1L << 25)) >> 26; h3 += carry2; h2 -= carry2 * ((uint64_t) 1L << 26);
969
+ carry6 = (h6 + (int64_t) (1L << 25)) >> 26; h7 += carry6; h6 -= carry6 * ((uint64_t) 1L << 26);
970
+
971
+ carry3 = (h3 + (int64_t) (1L << 24)) >> 25; h4 += carry3; h3 -= carry3 * ((uint64_t) 1L << 25);
972
+ carry7 = (h7 + (int64_t) (1L << 24)) >> 25; h8 += carry7; h7 -= carry7 * ((uint64_t) 1L << 25);
973
+
974
+ carry4 = (h4 + (int64_t) (1L << 25)) >> 26; h5 += carry4; h4 -= carry4 * ((uint64_t) 1L << 26);
975
+ carry8 = (h8 + (int64_t) (1L << 25)) >> 26; h9 += carry8; h8 -= carry8 * ((uint64_t) 1L << 26);
976
+
977
+ carry9 = (h9 + (int64_t) (1L << 24)) >> 25; h0 += carry9 * 19; h9 -= carry9 * ((uint64_t) 1L << 25);
978
+
979
+ carry0 = (h0 + (int64_t) (1L << 25)) >> 26; h1 += carry0; h0 -= carry0 * ((uint64_t) 1L << 26);
980
+
981
+ h[0] = (int32_t) h0;
982
+ h[1] = (int32_t) h1;
983
+ h[2] = (int32_t) h2;
984
+ h[3] = (int32_t) h3;
985
+ h[4] = (int32_t) h4;
986
+ h[5] = (int32_t) h5;
987
+ h[6] = (int32_t) h6;
988
+ h[7] = (int32_t) h7;
989
+ h[8] = (int32_t) h8;
990
+ h[9] = (int32_t) h9;
991
+ }
992
+
993
+ void fe_invert(fe out,const fe z)
994
+ {
995
+ fe t0;
996
+ fe t1;
997
+ fe t2;
998
+ fe t3;
999
+ int i;
1000
+
1001
+ fe_sq(t0, z);
1002
+ fe_sq(t1, t0);
1003
+ fe_sq(t1, t1);
1004
+ fe_mul(t1, z, t1);
1005
+ fe_mul(t0, t0, t1);
1006
+ fe_sq(t2, t0);
1007
+ fe_mul(t1, t1, t2);
1008
+ fe_sq(t2, t1);
1009
+ for (i = 1; i < 5; ++i) {
1010
+ fe_sq(t2, t2);
1011
+ }
1012
+ fe_mul(t1, t2, t1);
1013
+ fe_sq(t2, t1);
1014
+ for (i = 1; i < 10; ++i) {
1015
+ fe_sq(t2, t2);
1016
+ }
1017
+ fe_mul(t2, t2, t1);
1018
+ fe_sq(t3, t2);
1019
+ for (i = 1; i < 20; ++i) {
1020
+ fe_sq(t3, t3);
1021
+ }
1022
+ fe_mul(t2, t3, t2);
1023
+ fe_sq(t2, t2);
1024
+ for (i = 1; i < 10; ++i) {
1025
+ fe_sq(t2, t2);
1026
+ }
1027
+ fe_mul(t1, t2, t1);
1028
+ fe_sq(t2, t1);
1029
+ for (i = 1; i < 50; ++i) {
1030
+ fe_sq(t2, t2);
1031
+ }
1032
+ fe_mul(t2, t2, t1);
1033
+ fe_sq(t3, t2);
1034
+ for (i = 1; i < 100; ++i) {
1035
+ fe_sq(t3, t3);
1036
+ }
1037
+ fe_mul(t2, t3, t2);
1038
+ fe_sq(t2, t2);
1039
+ for (i = 1; i < 50; ++i) {
1040
+ fe_sq(t2, t2);
1041
+ }
1042
+ fe_mul(t1, t2, t1);
1043
+ fe_sq(t1, t1);
1044
+ for (i = 1; i < 5; ++i) {
1045
+ fe_sq(t1, t1);
1046
+ }
1047
+ fe_mul(out, t1, t0);
1048
+ }
1049
+
1050
+ void fe_pow22523(fe out,const fe z)
1051
+ {
1052
+ fe t0;
1053
+ fe t1;
1054
+ fe t2;
1055
+ int i;
1056
+
1057
+ fe_sq(t0, z);
1058
+ fe_sq(t1, t0);
1059
+ fe_sq(t1, t1);
1060
+ fe_mul(t1, z, t1);
1061
+ fe_mul(t0, t0, t1);
1062
+ fe_sq(t0, t0);
1063
+ fe_mul(t0, t1, t0);
1064
+ fe_sq(t1, t0);
1065
+ for (i = 1; i < 5; ++i) {
1066
+ fe_sq(t1, t1);
1067
+ }
1068
+ fe_mul(t0, t1, t0);
1069
+ fe_sq(t1, t0);
1070
+ for (i = 1; i < 10; ++i) {
1071
+ fe_sq(t1, t1);
1072
+ }
1073
+ fe_mul(t1, t1, t0);
1074
+ fe_sq(t2, t1);
1075
+ for (i = 1; i < 20; ++i) {
1076
+ fe_sq(t2, t2);
1077
+ }
1078
+ fe_mul(t1, t2, t1);
1079
+ fe_sq(t1, t1);
1080
+ for (i = 1; i < 10; ++i) {
1081
+ fe_sq(t1, t1);
1082
+ }
1083
+ fe_mul(t0, t1, t0);
1084
+ fe_sq(t1, t0);
1085
+ for (i = 1; i < 50; ++i) {
1086
+ fe_sq(t1, t1);
1087
+ }
1088
+ fe_mul(t1, t1, t0);
1089
+ fe_sq(t2, t1);
1090
+ for (i = 1; i < 100; ++i) {
1091
+ fe_sq(t2, t2);
1092
+ }
1093
+ fe_mul(t1, t2, t1);
1094
+ fe_sq(t1, t1);
1095
+ for (i = 1; i < 50; ++i) {
1096
+ fe_sq(t1, t1);
1097
+ }
1098
+ fe_mul(t0, t1, t0);
1099
+ fe_sq(t0, t0);
1100
+ fe_sq(t0, t0);
1101
+ fe_mul(out, t0, z);
1102
+ }
1103
+
1104
+ /*
1105
+ h = f - g
1106
+ Can overlap h with f or g.
1107
+
1108
+ Preconditions:
1109
+ |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
1110
+ |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
1111
+
1112
+ Postconditions:
1113
+ |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
1114
+ */
1115
+
1116
+ void fe_sub(fe h,const fe f,const fe g)
1117
+ {
1118
+ int32_t f0 = f[0];
1119
+ int32_t f1 = f[1];
1120
+ int32_t f2 = f[2];
1121
+ int32_t f3 = f[3];
1122
+ int32_t f4 = f[4];
1123
+ int32_t f5 = f[5];
1124
+ int32_t f6 = f[6];
1125
+ int32_t f7 = f[7];
1126
+ int32_t f8 = f[8];
1127
+ int32_t f9 = f[9];
1128
+ int32_t g0 = g[0];
1129
+ int32_t g1 = g[1];
1130
+ int32_t g2 = g[2];
1131
+ int32_t g3 = g[3];
1132
+ int32_t g4 = g[4];
1133
+ int32_t g5 = g[5];
1134
+ int32_t g6 = g[6];
1135
+ int32_t g7 = g[7];
1136
+ int32_t g8 = g[8];
1137
+ int32_t g9 = g[9];
1138
+ int32_t h0 = f0 - g0;
1139
+ int32_t h1 = f1 - g1;
1140
+ int32_t h2 = f2 - g2;
1141
+ int32_t h3 = f3 - g3;
1142
+ int32_t h4 = f4 - g4;
1143
+ int32_t h5 = f5 - g5;
1144
+ int32_t h6 = f6 - g6;
1145
+ int32_t h7 = f7 - g7;
1146
+ int32_t h8 = f8 - g8;
1147
+ int32_t h9 = f9 - g9;
1148
+ h[0] = h0;
1149
+ h[1] = h1;
1150
+ h[2] = h2;
1151
+ h[3] = h3;
1152
+ h[4] = h4;
1153
+ h[5] = h5;
1154
+ h[6] = h6;
1155
+ h[7] = h7;
1156
+ h[8] = h8;
1157
+ h[9] = h9;
1158
+ }
1159
+
1160
+ /*
1161
+ r = p + q
1162
+ */
1163
+
1164
+ void ge_add(ge_p1p1 *r,const ge_p3 *p,const ge_cached *q)
1165
+ {
1166
+ fe t0;
1167
+
1168
+ fe_add(r->X, p->Y, p->X);
1169
+ fe_sub(r->Y, p->Y, p->X);
1170
+ fe_mul(r->Z, r->X, q->YplusX);
1171
+ fe_mul(r->Y, r->Y, q->YminusX);
1172
+ fe_mul(r->T, q->T2d, p->T);
1173
+ fe_mul(r->X, p->Z, q->Z);
1174
+ fe_add(t0, r->X, r->X);
1175
+ fe_sub(r->X, r->Z, r->Y);
1176
+ fe_add(r->Y, r->Z, r->Y);
1177
+ fe_add(r->Z, t0, r->T);
1178
+ fe_sub(r->T, t0, r->T);
1179
+ }
1180
+
1181
+ static void slide(signed char *r,const unsigned char *a)
1182
+ {
1183
+ int i;
1184
+ int b;
1185
+ int k;
1186
+
1187
+ for (i = 0;i < 256;++i)
1188
+ r[i] = 1 & (a[i >> 3] >> (i & 7));
1189
+
1190
+ for (i = 0;i < 256;++i)
1191
+ if (r[i]) {
1192
+ for (b = 1;b <= 6 && i + b < 256;++b) {
1193
+ if (r[i + b]) {
1194
+ if (r[i] + (r[i + b] << b) <= 15) {
1195
+ r[i] += r[i + b] << b; r[i + b] = 0;
1196
+ } else if (r[i] - (r[i + b] << b) >= -15) {
1197
+ r[i] -= r[i + b] << b;
1198
+ for (k = i + b;k < 256;++k) {
1199
+ if (!r[k]) {
1200
+ r[k] = 1;
1201
+ break;
1202
+ }
1203
+ r[k] = 0;
1204
+ }
1205
+ } else
1206
+ break;
1207
+ }
1208
+ }
1209
+ }
1210
+
1211
+ }
1212
+
1213
+ static const ge_precomp Bi[8] = {
1214
+ #include "base2.h"
1215
+ };
1216
+
1217
+ /* 37095705934669439343138083508754565189542113879843219016388785533085940283555 */
1218
+ static const fe d = {
1219
+ -10913610,13857413,-15372611,6949391,114729,-8787816,-6275908,-3247719,-18696448,-12055116
1220
+ };
1221
+
1222
+ /* sqrt(-1) */
1223
+ static const fe sqrtm1 = {
1224
+ -32595792,-7943725,9377950,3500415,12389472,-272473,-25146209,-2005654,326686,11406482
1225
+ };
1226
+
1227
+ int ge_frombytes_negate_vartime(ge_p3 *h,const unsigned char *s)
1228
+ {
1229
+ fe u;
1230
+ fe v;
1231
+ fe v3;
1232
+ fe vxx;
1233
+ fe check;
1234
+
1235
+ fe_frombytes(h->Y,s);
1236
+ fe_1(h->Z);
1237
+ fe_sq(u,h->Y);
1238
+ fe_mul(v,u,d);
1239
+ fe_sub(u,u,h->Z); /* u = y^2-1 */
1240
+ fe_add(v,v,h->Z); /* v = dy^2+1 */
1241
+
1242
+ fe_sq(v3,v);
1243
+ fe_mul(v3,v3,v); /* v3 = v^3 */
1244
+ fe_sq(h->X,v3);
1245
+ fe_mul(h->X,h->X,v);
1246
+ fe_mul(h->X,h->X,u); /* x = uv^7 */
1247
+
1248
+ fe_pow22523(h->X,h->X); /* x = (uv^7)^((q-5)/8) */
1249
+ fe_mul(h->X,h->X,v3);
1250
+ fe_mul(h->X,h->X,u); /* x = uv^3(uv^7)^((q-5)/8) */
1251
+
1252
+ fe_sq(vxx,h->X);
1253
+ fe_mul(vxx,vxx,v);
1254
+ fe_sub(check,vxx,u); /* vx^2-u */
1255
+ if (fe_isnonzero(check)) {
1256
+ fe_add(check,vxx,u); /* vx^2+u */
1257
+ if (fe_isnonzero(check)) return -1;
1258
+ fe_mul(h->X,h->X,sqrtm1);
1259
+ }
1260
+
1261
+ if (fe_isnegative(h->X) == (s[31] >> 7))
1262
+ fe_neg(h->X,h->X);
1263
+
1264
+ fe_mul(h->T,h->X,h->Y);
1265
+ return 0;
1266
+ }
1267
+
1268
+ /*
1269
+ r = p + q
1270
+ */
1271
+
1272
+ void ge_madd(ge_p1p1 *r,const ge_p3 *p,const ge_precomp *q)
1273
+ {
1274
+ fe t0;
1275
+
1276
+ fe_add(r->X, p->Y, p->X);
1277
+ fe_sub(r->Y, p->Y, p->X);
1278
+ fe_mul(r->Z, r->X, q->yplusx);
1279
+ fe_mul(r->Y, r->Y, q->yminusx);
1280
+ fe_mul(r->T, q->xy2d, p->T);
1281
+ fe_add(t0, p->Z, p->Z);
1282
+ fe_sub(r->X, r->Z, r->Y);
1283
+ fe_add(r->Y, r->Z, r->Y);
1284
+ fe_add(r->Z, t0, r->T);
1285
+ fe_sub(r->T, t0, r->T);
1286
+ }
1287
+
1288
+ /*
1289
+ r = p - q
1290
+ */
1291
+
1292
+ void ge_msub(ge_p1p1 *r,const ge_p3 *p,const ge_precomp *q)
1293
+ {
1294
+ fe t0;
1295
+
1296
+ fe_add(r->X, p->Y, p->X);
1297
+ fe_sub(r->Y, p->Y, p->X);
1298
+ fe_mul(r->Z, r->X, q->yminusx);
1299
+ fe_mul(r->Y, r->Y, q->yplusx);
1300
+ fe_mul(r->T, q->xy2d, p->T);
1301
+ fe_add(t0, p->Z, p->Z);
1302
+ fe_sub(r->X, r->Z, r->Y);
1303
+ fe_add(r->Y, r->Z, r->Y);
1304
+ fe_sub(r->Z, t0, r->T);
1305
+ fe_add(r->T, t0, r->T);
1306
+ }
1307
+
1308
+ /*
1309
+ r = p
1310
+ */
1311
+
1312
+ extern void ge_p1p1_to_p2(ge_p2 *r,const ge_p1p1 *p)
1313
+ {
1314
+ fe_mul(r->X,p->X,p->T);
1315
+ fe_mul(r->Y,p->Y,p->Z);
1316
+ fe_mul(r->Z,p->Z,p->T);
1317
+ }
1318
+
1319
+ /*
1320
+ r = p
1321
+ */
1322
+
1323
+ extern void ge_p1p1_to_p3(ge_p3 *r,const ge_p1p1 *p)
1324
+ {
1325
+ fe_mul(r->X,p->X,p->T);
1326
+ fe_mul(r->Y,p->Y,p->Z);
1327
+ fe_mul(r->Z,p->Z,p->T);
1328
+ fe_mul(r->T,p->X,p->Y);
1329
+ }
1330
+
1331
+ void ge_p2_0(ge_p2 *h)
1332
+ {
1333
+ fe_0(h->X);
1334
+ fe_1(h->Y);
1335
+ fe_1(h->Z);
1336
+ }
1337
+
1338
+ /*
1339
+ r = 2 * p
1340
+ */
1341
+
1342
+ void ge_p2_dbl(ge_p1p1 *r,const ge_p2 *p)
1343
+ {
1344
+ fe t0;
1345
+
1346
+ fe_sq(r->X, p->X);
1347
+ fe_sq(r->Z, p->Y);
1348
+ fe_sq2(r->T, p->Z);
1349
+ fe_add(r->Y, p->X, p->Y);
1350
+ fe_sq(t0, r->Y);
1351
+ fe_add(r->Y, r->Z, r->X);
1352
+ fe_sub(r->Z, r->Z, r->X);
1353
+ fe_sub(r->X, t0, r->Y);
1354
+ fe_sub(r->T, r->T, r->Z);
1355
+ }
1356
+
1357
+ void ge_p3_0(ge_p3 *h)
1358
+ {
1359
+ fe_0(h->X);
1360
+ fe_1(h->Y);
1361
+ fe_1(h->Z);
1362
+ fe_0(h->T);
1363
+ }
1364
+
1365
+ /*
1366
+ r = p
1367
+ */
1368
+
1369
+ /* 2 * d = 16295367250680780974490674513165176452449235426866156013048779062215315747161 */
1370
+ static const fe d2 = {
1371
+ -21827239,-5839606,-30745221,13898782,229458,15978800,-12551817,-6495438,29715968,9444199
1372
+ };
1373
+
1374
+ extern void ge_p3_to_cached(ge_cached *r,const ge_p3 *p)
1375
+ {
1376
+ fe_add(r->YplusX,p->Y,p->X);
1377
+ fe_sub(r->YminusX,p->Y,p->X);
1378
+ fe_copy(r->Z,p->Z);
1379
+ fe_mul(r->T2d,p->T,d2);
1380
+ }
1381
+
1382
+ /*
1383
+ r = p
1384
+ */
1385
+
1386
+ extern void ge_p3_to_p2(ge_p2 *r,const ge_p3 *p)
1387
+ {
1388
+ fe_copy(r->X,p->X);
1389
+ fe_copy(r->Y,p->Y);
1390
+ fe_copy(r->Z,p->Z);
1391
+ }
1392
+
1393
+ void ge_p3_tobytes(unsigned char *s,const ge_p3 *h)
1394
+ {
1395
+ fe recip;
1396
+ fe x;
1397
+ fe y;
1398
+
1399
+ fe_invert(recip,h->Z);
1400
+ fe_mul(x,h->X,recip);
1401
+ fe_mul(y,h->Y,recip);
1402
+ fe_tobytes(s,y);
1403
+ s[31] ^= fe_isnegative(x) << 7;
1404
+ }
1405
+
1406
+ /*
1407
+ r = 2 * p
1408
+ */
1409
+
1410
+ void ge_p3_dbl(ge_p1p1 *r,const ge_p3 *p)
1411
+ {
1412
+ ge_p2 q;
1413
+ ge_p3_to_p2(&q,p);
1414
+ ge_p2_dbl(r,&q);
1415
+ }
1416
+
1417
+ void ge_precomp_0(ge_precomp *h)
1418
+ {
1419
+ fe_1(h->yplusx);
1420
+ fe_1(h->yminusx);
1421
+ fe_0(h->xy2d);
1422
+ }
1423
+
1424
+ static unsigned char equal(signed char b,signed char c)
1425
+ {
1426
+ unsigned char ub = b;
1427
+ unsigned char uc = c;
1428
+ unsigned char x = ub ^ uc; /* 0: yes; 1..255: no */
1429
+ uint32_t y = x; /* 0: yes; 1..255: no */
1430
+ y -= 1; /* 4294967295: yes; 0..254: no */
1431
+ y >>= 31; /* 1: yes; 0: no */
1432
+ return y;
1433
+ }
1434
+
1435
+ static unsigned char negative(signed char b)
1436
+ {
1437
+ uint64_t x = b; /* 18446744073709551361..18446744073709551615: yes; 0..255: no */
1438
+ x >>= 63; /* 1: yes; 0: no */
1439
+ return x;
1440
+ }
1441
+
1442
+ static void cmov(ge_precomp *t,const ge_precomp *u,unsigned char b)
1443
+ {
1444
+ fe_cmov(t->yplusx,u->yplusx,b);
1445
+ fe_cmov(t->yminusx,u->yminusx,b);
1446
+ fe_cmov(t->xy2d,u->xy2d,b);
1447
+ }
1448
+
1449
+ /* base[i][j] = (j+1)*256^i*B */
1450
+ static const ge_precomp base[32][8] = {
1451
+ #include "base.h"
1452
+ };
1453
+
1454
+ static void ge_select(ge_precomp *t,int pos,signed char b)
1455
+ {
1456
+ ge_precomp minust;
1457
+ unsigned char bnegative = negative(b);
1458
+ unsigned char babs = b - (((-bnegative) & b) * ((signed char) 1 << 1));
1459
+
1460
+ ge_precomp_0(t);
1461
+ cmov(t,&base[pos][0],equal(babs,1));
1462
+ cmov(t,&base[pos][1],equal(babs,2));
1463
+ cmov(t,&base[pos][2],equal(babs,3));
1464
+ cmov(t,&base[pos][3],equal(babs,4));
1465
+ cmov(t,&base[pos][4],equal(babs,5));
1466
+ cmov(t,&base[pos][5],equal(babs,6));
1467
+ cmov(t,&base[pos][6],equal(babs,7));
1468
+ cmov(t,&base[pos][7],equal(babs,8));
1469
+ fe_copy(minust.yplusx,t->yminusx);
1470
+ fe_copy(minust.yminusx,t->yplusx);
1471
+ fe_neg(minust.xy2d,t->xy2d);
1472
+ cmov(t,&minust,bnegative);
1473
+ }
1474
+
1475
+ /*
1476
+ r = p - q
1477
+ */
1478
+
1479
+ void ge_sub(ge_p1p1 *r,const ge_p3 *p,const ge_cached *q)
1480
+ {
1481
+ fe t0;
1482
+
1483
+ fe_add(r->X, p->Y, p->X);
1484
+ fe_sub(r->Y, p->Y, p->X);
1485
+ fe_mul(r->Z, r->X, q->YminusX);
1486
+ fe_mul(r->Y, r->Y, q->YplusX);
1487
+ fe_mul(r->T, q->T2d, p->T);
1488
+ fe_mul(r->X, p->Z, q->Z);
1489
+ fe_add(t0, r->X, r->X);
1490
+ fe_sub(r->X, r->Z, r->Y);
1491
+ fe_add(r->Y, r->Z, r->Y);
1492
+ fe_sub(r->Z, t0, r->T);
1493
+ fe_add(r->T, t0, r->T);
1494
+ }
1495
+
1496
+ void ge_tobytes(unsigned char *s,const ge_p2 *h)
1497
+ {
1498
+ fe recip;
1499
+ fe x;
1500
+ fe y;
1501
+
1502
+ fe_invert(recip,h->Z);
1503
+ fe_mul(x,h->X,recip);
1504
+ fe_mul(y,h->Y,recip);
1505
+ fe_tobytes(s,y);
1506
+ s[31] ^= fe_isnegative(x) << 7;
1507
+ }
1508
+
1509
+ /*
1510
+ h = a * B
1511
+ where a = a[0]+256*a[1]+...+256^31 a[31]
1512
+ B is the Ed25519 base point (x,4/5) with x positive.
1513
+
1514
+ Preconditions:
1515
+ a[31] <= 127
1516
+ */
1517
+
1518
+ /*
1519
+ r = a * A + b * B
1520
+ where a = a[0]+256*a[1]+...+256^31 a[31].
1521
+ and b = b[0]+256*b[1]+...+256^31 b[31].
1522
+ B is the Ed25519 base point (x,4/5) with x positive.
1523
+ */
1524
+
1525
+ void ge_double_scalarmult_vartime(ge_p2 *r,const unsigned char *a,const ge_p3 *A,const unsigned char *b)
1526
+ {
1527
+ signed char aslide[256];
1528
+ signed char bslide[256];
1529
+ ge_cached Ai[8]; /* A,3A,5A,7A,9A,11A,13A,15A */
1530
+ ge_p1p1 t;
1531
+ ge_p3 u;
1532
+ ge_p3 A2;
1533
+ int i;
1534
+
1535
+ slide(aslide,a);
1536
+ slide(bslide,b);
1537
+
1538
+ ge_p3_to_cached(&Ai[0],A);
1539
+ ge_p3_dbl(&t,A); ge_p1p1_to_p3(&A2,&t);
1540
+ ge_add(&t,&A2,&Ai[0]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[1],&u);
1541
+ ge_add(&t,&A2,&Ai[1]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[2],&u);
1542
+ ge_add(&t,&A2,&Ai[2]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[3],&u);
1543
+ ge_add(&t,&A2,&Ai[3]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[4],&u);
1544
+ ge_add(&t,&A2,&Ai[4]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[5],&u);
1545
+ ge_add(&t,&A2,&Ai[5]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[6],&u);
1546
+ ge_add(&t,&A2,&Ai[6]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[7],&u);
1547
+
1548
+ ge_p2_0(r);
1549
+
1550
+ for (i = 255;i >= 0;--i) {
1551
+ if (aslide[i] || bslide[i]) break;
1552
+ }
1553
+
1554
+ for (;i >= 0;--i) {
1555
+ ge_p2_dbl(&t,r);
1556
+
1557
+ if (aslide[i] > 0) {
1558
+ ge_p1p1_to_p3(&u,&t);
1559
+ ge_add(&t,&u,&Ai[aslide[i]/2]);
1560
+ } else if (aslide[i] < 0) {
1561
+ ge_p1p1_to_p3(&u,&t);
1562
+ ge_sub(&t,&u,&Ai[(-aslide[i])/2]);
1563
+ }
1564
+
1565
+ if (bslide[i] > 0) {
1566
+ ge_p1p1_to_p3(&u,&t);
1567
+ ge_madd(&t,&u,&Bi[bslide[i]/2]);
1568
+ } else if (bslide[i] < 0) {
1569
+ ge_p1p1_to_p3(&u,&t);
1570
+ ge_msub(&t,&u,&Bi[(-bslide[i])/2]);
1571
+ }
1572
+
1573
+ ge_p1p1_to_p2(r,&t);
1574
+ }
1575
+ }
1576
+
1577
+ void ge_scalarmult_vartime(ge_p3 *r,const unsigned char *a,const ge_p3 *A)
1578
+ {
1579
+ signed char aslide[256];
1580
+ ge_cached Ai[8];
1581
+ ge_p1p1 t;
1582
+ ge_p3 u;
1583
+ ge_p3 A2;
1584
+ int i;
1585
+
1586
+ slide(aslide,a);
1587
+
1588
+ ge_p3_to_cached(&Ai[0],A);
1589
+ ge_p3_dbl(&t,A); ge_p1p1_to_p3(&A2,&t);
1590
+ ge_add(&t,&A2,&Ai[0]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[1],&u);
1591
+ ge_add(&t,&A2,&Ai[1]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[2],&u);
1592
+ ge_add(&t,&A2,&Ai[2]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[3],&u);
1593
+ ge_add(&t,&A2,&Ai[3]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[4],&u);
1594
+ ge_add(&t,&A2,&Ai[4]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[5],&u);
1595
+ ge_add(&t,&A2,&Ai[5]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[6],&u);
1596
+ ge_add(&t,&A2,&Ai[6]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[7],&u);
1597
+
1598
+ ge_p3_0(r);
1599
+
1600
+ for (i = 255;i >= 0;--i) {
1601
+ if (aslide[i]) break;
1602
+ }
1603
+
1604
+ for (;i >= 0;--i) {
1605
+ ge_p3_dbl(&t,r);
1606
+
1607
+ if (aslide[i] > 0) {
1608
+ ge_p1p1_to_p3(&u,&t);
1609
+ ge_add(&t,&u,&Ai[aslide[i]/2]);
1610
+ } else if (aslide[i] < 0) {
1611
+ ge_p1p1_to_p3(&u,&t);
1612
+ ge_sub(&t,&u,&Ai[(-aslide[i])/2]);
1613
+ }
1614
+
1615
+ ge_p1p1_to_p3(r,&t);
1616
+ }
1617
+ }
1618
+
1619
+ void ge_scalarmult_base(ge_p3 *h,const unsigned char *a)
1620
+ {
1621
+ signed char e[64];
1622
+ signed char carry;
1623
+ ge_p1p1 r;
1624
+ ge_p2 s;
1625
+ ge_precomp t;
1626
+ int i;
1627
+
1628
+ for (i = 0;i < 32;++i) {
1629
+ e[2 * i + 0] = (a[i] >> 0) & 15;
1630
+ e[2 * i + 1] = (a[i] >> 4) & 15;
1631
+ }
1632
+ /* each e[i] is between 0 and 15 */
1633
+ /* e[63] is between 0 and 7 */
1634
+
1635
+ carry = 0;
1636
+ for (i = 0;i < 63;++i) {
1637
+ e[i] += carry;
1638
+ carry = e[i] + 8;
1639
+ carry >>= 4;
1640
+ e[i] -= carry * ((signed char) 1 << 4);
1641
+ }
1642
+ e[63] += carry;
1643
+ /* each e[i] is between -8 and 8 */
1644
+
1645
+ ge_p3_0(h);
1646
+ for (i = 1;i < 64;i += 2) {
1647
+ ge_select(&t,i / 2,e[i]);
1648
+ ge_madd(&r,h,&t); ge_p1p1_to_p3(h,&r);
1649
+ }
1650
+
1651
+ ge_p3_dbl(&r,h); ge_p1p1_to_p2(&s,&r);
1652
+ ge_p2_dbl(&r,&s); ge_p1p1_to_p2(&s,&r);
1653
+ ge_p2_dbl(&r,&s); ge_p1p1_to_p2(&s,&r);
1654
+ ge_p2_dbl(&r,&s); ge_p1p1_to_p3(h,&r);
1655
+
1656
+ for (i = 0;i < 64;i += 2) {
1657
+ ge_select(&t,i / 2,e[i]);
1658
+ ge_madd(&r,h,&t); ge_p1p1_to_p3(h,&r);
1659
+ }
1660
+ }
1661
+
1662
+ /*
1663
+ Input:
1664
+ a[0]+256*a[1]+...+256^31*a[31] = a
1665
+ b[0]+256*b[1]+...+256^31*b[31] = b
1666
+ c[0]+256*c[1]+...+256^31*c[31] = c
1667
+
1668
+ Output:
1669
+ s[0]+256*s[1]+...+256^31*s[31] = (ab+c) mod l
1670
+ where l = 2^252 + 27742317777372353535851937790883648493.
1671
+ */
1672
+
1673
+ void sc_muladd(unsigned char *s,const unsigned char *a,const unsigned char *b,const unsigned char *c)
1674
+ {
1675
+ int64_t a0 = 2097151 & load_3(a);
1676
+ int64_t a1 = 2097151 & (load_4(a + 2) >> 5);
1677
+ int64_t a2 = 2097151 & (load_3(a + 5) >> 2);
1678
+ int64_t a3 = 2097151 & (load_4(a + 7) >> 7);
1679
+ int64_t a4 = 2097151 & (load_4(a + 10) >> 4);
1680
+ int64_t a5 = 2097151 & (load_3(a + 13) >> 1);
1681
+ int64_t a6 = 2097151 & (load_4(a + 15) >> 6);
1682
+ int64_t a7 = 2097151 & (load_3(a + 18) >> 3);
1683
+ int64_t a8 = 2097151 & load_3(a + 21);
1684
+ int64_t a9 = 2097151 & (load_4(a + 23) >> 5);
1685
+ int64_t a10 = 2097151 & (load_3(a + 26) >> 2);
1686
+ int64_t a11 = (load_4(a + 28) >> 7);
1687
+ int64_t b0 = 2097151 & load_3(b);
1688
+ int64_t b1 = 2097151 & (load_4(b + 2) >> 5);
1689
+ int64_t b2 = 2097151 & (load_3(b + 5) >> 2);
1690
+ int64_t b3 = 2097151 & (load_4(b + 7) >> 7);
1691
+ int64_t b4 = 2097151 & (load_4(b + 10) >> 4);
1692
+ int64_t b5 = 2097151 & (load_3(b + 13) >> 1);
1693
+ int64_t b6 = 2097151 & (load_4(b + 15) >> 6);
1694
+ int64_t b7 = 2097151 & (load_3(b + 18) >> 3);
1695
+ int64_t b8 = 2097151 & load_3(b + 21);
1696
+ int64_t b9 = 2097151 & (load_4(b + 23) >> 5);
1697
+ int64_t b10 = 2097151 & (load_3(b + 26) >> 2);
1698
+ int64_t b11 = (load_4(b + 28) >> 7);
1699
+ int64_t c0 = 2097151 & load_3(c);
1700
+ int64_t c1 = 2097151 & (load_4(c + 2) >> 5);
1701
+ int64_t c2 = 2097151 & (load_3(c + 5) >> 2);
1702
+ int64_t c3 = 2097151 & (load_4(c + 7) >> 7);
1703
+ int64_t c4 = 2097151 & (load_4(c + 10) >> 4);
1704
+ int64_t c5 = 2097151 & (load_3(c + 13) >> 1);
1705
+ int64_t c6 = 2097151 & (load_4(c + 15) >> 6);
1706
+ int64_t c7 = 2097151 & (load_3(c + 18) >> 3);
1707
+ int64_t c8 = 2097151 & load_3(c + 21);
1708
+ int64_t c9 = 2097151 & (load_4(c + 23) >> 5);
1709
+ int64_t c10 = 2097151 & (load_3(c + 26) >> 2);
1710
+ int64_t c11 = (load_4(c + 28) >> 7);
1711
+ int64_t s0;
1712
+ int64_t s1;
1713
+ int64_t s2;
1714
+ int64_t s3;
1715
+ int64_t s4;
1716
+ int64_t s5;
1717
+ int64_t s6;
1718
+ int64_t s7;
1719
+ int64_t s8;
1720
+ int64_t s9;
1721
+ int64_t s10;
1722
+ int64_t s11;
1723
+ int64_t s12;
1724
+ int64_t s13;
1725
+ int64_t s14;
1726
+ int64_t s15;
1727
+ int64_t s16;
1728
+ int64_t s17;
1729
+ int64_t s18;
1730
+ int64_t s19;
1731
+ int64_t s20;
1732
+ int64_t s21;
1733
+ int64_t s22;
1734
+ int64_t s23;
1735
+ int64_t carry0;
1736
+ int64_t carry1;
1737
+ int64_t carry2;
1738
+ int64_t carry3;
1739
+ int64_t carry4;
1740
+ int64_t carry5;
1741
+ int64_t carry6;
1742
+ int64_t carry7;
1743
+ int64_t carry8;
1744
+ int64_t carry9;
1745
+ int64_t carry10;
1746
+ int64_t carry11;
1747
+ int64_t carry12;
1748
+ int64_t carry13;
1749
+ int64_t carry14;
1750
+ int64_t carry15;
1751
+ int64_t carry16;
1752
+ int64_t carry17;
1753
+ int64_t carry18;
1754
+ int64_t carry19;
1755
+ int64_t carry20;
1756
+ int64_t carry21;
1757
+ int64_t carry22;
1758
+
1759
+ s0 = c0 + a0*b0;
1760
+ s1 = c1 + a0*b1 + a1*b0;
1761
+ s2 = c2 + a0*b2 + a1*b1 + a2*b0;
1762
+ s3 = c3 + a0*b3 + a1*b2 + a2*b1 + a3*b0;
1763
+ s4 = c4 + a0*b4 + a1*b3 + a2*b2 + a3*b1 + a4*b0;
1764
+ s5 = c5 + a0*b5 + a1*b4 + a2*b3 + a3*b2 + a4*b1 + a5*b0;
1765
+ s6 = c6 + a0*b6 + a1*b5 + a2*b4 + a3*b3 + a4*b2 + a5*b1 + a6*b0;
1766
+ s7 = c7 + a0*b7 + a1*b6 + a2*b5 + a3*b4 + a4*b3 + a5*b2 + a6*b1 + a7*b0;
1767
+ s8 = c8 + a0*b8 + a1*b7 + a2*b6 + a3*b5 + a4*b4 + a5*b3 + a6*b2 + a7*b1 + a8*b0;
1768
+ s9 = c9 + a0*b9 + a1*b8 + a2*b7 + a3*b6 + a4*b5 + a5*b4 + a6*b3 + a7*b2 + a8*b1 + a9*b0;
1769
+ s10 = c10 + a0*b10 + a1*b9 + a2*b8 + a3*b7 + a4*b6 + a5*b5 + a6*b4 + a7*b3 + a8*b2 + a9*b1 + a10*b0;
1770
+ s11 = c11 + a0*b11 + a1*b10 + a2*b9 + a3*b8 + a4*b7 + a5*b6 + a6*b5 + a7*b4 + a8*b3 + a9*b2 + a10*b1 + a11*b0;
1771
+ s12 = a1*b11 + a2*b10 + a3*b9 + a4*b8 + a5*b7 + a6*b6 + a7*b5 + a8*b4 + a9*b3 + a10*b2 + a11*b1;
1772
+ s13 = a2*b11 + a3*b10 + a4*b9 + a5*b8 + a6*b7 + a7*b6 + a8*b5 + a9*b4 + a10*b3 + a11*b2;
1773
+ s14 = a3*b11 + a4*b10 + a5*b9 + a6*b8 + a7*b7 + a8*b6 + a9*b5 + a10*b4 + a11*b3;
1774
+ s15 = a4*b11 + a5*b10 + a6*b9 + a7*b8 + a8*b7 + a9*b6 + a10*b5 + a11*b4;
1775
+ s16 = a5*b11 + a6*b10 + a7*b9 + a8*b8 + a9*b7 + a10*b6 + a11*b5;
1776
+ s17 = a6*b11 + a7*b10 + a8*b9 + a9*b8 + a10*b7 + a11*b6;
1777
+ s18 = a7*b11 + a8*b10 + a9*b9 + a10*b8 + a11*b7;
1778
+ s19 = a8*b11 + a9*b10 + a10*b9 + a11*b8;
1779
+ s20 = a9*b11 + a10*b10 + a11*b9;
1780
+ s21 = a10*b11 + a11*b10;
1781
+ s22 = a11*b11;
1782
+ s23 = 0;
1783
+
1784
+ carry0 = (s0 + (int64_t) (1L << 20)) >> 21; s1 += carry0; s0 -= carry0 * ((uint64_t) 1L << 21);
1785
+ carry2 = (s2 + (int64_t) (1L << 20)) >> 21; s3 += carry2; s2 -= carry2 * ((uint64_t) 1L << 21);
1786
+ carry4 = (s4 + (int64_t) (1L << 20)) >> 21; s5 += carry4; s4 -= carry4 * ((uint64_t) 1L << 21);
1787
+ carry6 = (s6 + (int64_t) (1L << 20)) >> 21; s7 += carry6; s6 -= carry6 * ((uint64_t) 1L << 21);
1788
+ carry8 = (s8 + (int64_t) (1L << 20)) >> 21; s9 += carry8; s8 -= carry8 * ((uint64_t) 1L << 21);
1789
+ carry10 = (s10 + (int64_t) (1L << 20)) >> 21; s11 += carry10; s10 -= carry10 * ((uint64_t) 1L << 21);
1790
+ carry12 = (s12 + (int64_t) (1L << 20)) >> 21; s13 += carry12; s12 -= carry12 * ((uint64_t) 1L << 21);
1791
+ carry14 = (s14 + (int64_t) (1L << 20)) >> 21; s15 += carry14; s14 -= carry14 * ((uint64_t) 1L << 21);
1792
+ carry16 = (s16 + (int64_t) (1L << 20)) >> 21; s17 += carry16; s16 -= carry16 * ((uint64_t) 1L << 21);
1793
+ carry18 = (s18 + (int64_t) (1L << 20)) >> 21; s19 += carry18; s18 -= carry18 * ((uint64_t) 1L << 21);
1794
+ carry20 = (s20 + (int64_t) (1L << 20)) >> 21; s21 += carry20; s20 -= carry20 * ((uint64_t) 1L << 21);
1795
+ carry22 = (s22 + (int64_t) (1L << 20)) >> 21; s23 += carry22; s22 -= carry22 * ((uint64_t) 1L << 21);
1796
+
1797
+ carry1 = (s1 + (int64_t) (1L << 20)) >> 21; s2 += carry1; s1 -= carry1 * ((uint64_t) 1L << 21);
1798
+ carry3 = (s3 + (int64_t) (1L << 20)) >> 21; s4 += carry3; s3 -= carry3 * ((uint64_t) 1L << 21);
1799
+ carry5 = (s5 + (int64_t) (1L << 20)) >> 21; s6 += carry5; s5 -= carry5 * ((uint64_t) 1L << 21);
1800
+ carry7 = (s7 + (int64_t) (1L << 20)) >> 21; s8 += carry7; s7 -= carry7 * ((uint64_t) 1L << 21);
1801
+ carry9 = (s9 + (int64_t) (1L << 20)) >> 21; s10 += carry9; s9 -= carry9 * ((uint64_t) 1L << 21);
1802
+ carry11 = (s11 + (int64_t) (1L << 20)) >> 21; s12 += carry11; s11 -= carry11 * ((uint64_t) 1L << 21);
1803
+ carry13 = (s13 + (int64_t) (1L << 20)) >> 21; s14 += carry13; s13 -= carry13 * ((uint64_t) 1L << 21);
1804
+ carry15 = (s15 + (int64_t) (1L << 20)) >> 21; s16 += carry15; s15 -= carry15 * ((uint64_t) 1L << 21);
1805
+ carry17 = (s17 + (int64_t) (1L << 20)) >> 21; s18 += carry17; s17 -= carry17 * ((uint64_t) 1L << 21);
1806
+ carry19 = (s19 + (int64_t) (1L << 20)) >> 21; s20 += carry19; s19 -= carry19 * ((uint64_t) 1L << 21);
1807
+ carry21 = (s21 + (int64_t) (1L << 20)) >> 21; s22 += carry21; s21 -= carry21 * ((uint64_t) 1L << 21);
1808
+
1809
+ s11 += s23 * 666643;
1810
+ s12 += s23 * 470296;
1811
+ s13 += s23 * 654183;
1812
+ s14 -= s23 * 997805;
1813
+ s15 += s23 * 136657;
1814
+ s16 -= s23 * 683901;
1815
+
1816
+ s10 += s22 * 666643;
1817
+ s11 += s22 * 470296;
1818
+ s12 += s22 * 654183;
1819
+ s13 -= s22 * 997805;
1820
+ s14 += s22 * 136657;
1821
+ s15 -= s22 * 683901;
1822
+
1823
+ s9 += s21 * 666643;
1824
+ s10 += s21 * 470296;
1825
+ s11 += s21 * 654183;
1826
+ s12 -= s21 * 997805;
1827
+ s13 += s21 * 136657;
1828
+ s14 -= s21 * 683901;
1829
+
1830
+ s8 += s20 * 666643;
1831
+ s9 += s20 * 470296;
1832
+ s10 += s20 * 654183;
1833
+ s11 -= s20 * 997805;
1834
+ s12 += s20 * 136657;
1835
+ s13 -= s20 * 683901;
1836
+
1837
+ s7 += s19 * 666643;
1838
+ s8 += s19 * 470296;
1839
+ s9 += s19 * 654183;
1840
+ s10 -= s19 * 997805;
1841
+ s11 += s19 * 136657;
1842
+ s12 -= s19 * 683901;
1843
+
1844
+ s6 += s18 * 666643;
1845
+ s7 += s18 * 470296;
1846
+ s8 += s18 * 654183;
1847
+ s9 -= s18 * 997805;
1848
+ s10 += s18 * 136657;
1849
+ s11 -= s18 * 683901;
1850
+
1851
+ carry6 = (s6 + (int64_t) (1L << 20)) >> 21; s7 += carry6; s6 -= carry6 * ((uint64_t) 1L << 21);
1852
+ carry8 = (s8 + (int64_t) (1L << 20)) >> 21; s9 += carry8; s8 -= carry8 * ((uint64_t) 1L << 21);
1853
+ carry10 = (s10 + (int64_t) (1L << 20)) >> 21; s11 += carry10; s10 -= carry10 * ((uint64_t) 1L << 21);
1854
+ carry12 = (s12 + (int64_t) (1L << 20)) >> 21; s13 += carry12; s12 -= carry12 * ((uint64_t) 1L << 21);
1855
+ carry14 = (s14 + (int64_t) (1L << 20)) >> 21; s15 += carry14; s14 -= carry14 * ((uint64_t) 1L << 21);
1856
+ carry16 = (s16 + (int64_t) (1L << 20)) >> 21; s17 += carry16; s16 -= carry16 * ((uint64_t) 1L << 21);
1857
+
1858
+ carry7 = (s7 + (int64_t) (1L << 20)) >> 21; s8 += carry7; s7 -= carry7 * ((uint64_t) 1L << 21);
1859
+ carry9 = (s9 + (int64_t) (1L << 20)) >> 21; s10 += carry9; s9 -= carry9 * ((uint64_t) 1L << 21);
1860
+ carry11 = (s11 + (int64_t) (1L << 20)) >> 21; s12 += carry11; s11 -= carry11 * ((uint64_t) 1L << 21);
1861
+ carry13 = (s13 + (int64_t) (1L << 20)) >> 21; s14 += carry13; s13 -= carry13 * ((uint64_t) 1L << 21);
1862
+ carry15 = (s15 + (int64_t) (1L << 20)) >> 21; s16 += carry15; s15 -= carry15 * ((uint64_t) 1L << 21);
1863
+
1864
+ s5 += s17 * 666643;
1865
+ s6 += s17 * 470296;
1866
+ s7 += s17 * 654183;
1867
+ s8 -= s17 * 997805;
1868
+ s9 += s17 * 136657;
1869
+ s10 -= s17 * 683901;
1870
+
1871
+ s4 += s16 * 666643;
1872
+ s5 += s16 * 470296;
1873
+ s6 += s16 * 654183;
1874
+ s7 -= s16 * 997805;
1875
+ s8 += s16 * 136657;
1876
+ s9 -= s16 * 683901;
1877
+
1878
+ s3 += s15 * 666643;
1879
+ s4 += s15 * 470296;
1880
+ s5 += s15 * 654183;
1881
+ s6 -= s15 * 997805;
1882
+ s7 += s15 * 136657;
1883
+ s8 -= s15 * 683901;
1884
+
1885
+ s2 += s14 * 666643;
1886
+ s3 += s14 * 470296;
1887
+ s4 += s14 * 654183;
1888
+ s5 -= s14 * 997805;
1889
+ s6 += s14 * 136657;
1890
+ s7 -= s14 * 683901;
1891
+
1892
+ s1 += s13 * 666643;
1893
+ s2 += s13 * 470296;
1894
+ s3 += s13 * 654183;
1895
+ s4 -= s13 * 997805;
1896
+ s5 += s13 * 136657;
1897
+ s6 -= s13 * 683901;
1898
+
1899
+ s0 += s12 * 666643;
1900
+ s1 += s12 * 470296;
1901
+ s2 += s12 * 654183;
1902
+ s3 -= s12 * 997805;
1903
+ s4 += s12 * 136657;
1904
+ s5 -= s12 * 683901;
1905
+ s12 = 0;
1906
+
1907
+ carry0 = (s0 + (int64_t) (1L << 20)) >> 21; s1 += carry0; s0 -= carry0 * ((uint64_t) 1L << 21);
1908
+ carry2 = (s2 + (int64_t) (1L << 20)) >> 21; s3 += carry2; s2 -= carry2 * ((uint64_t) 1L << 21);
1909
+ carry4 = (s4 + (int64_t) (1L << 20)) >> 21; s5 += carry4; s4 -= carry4 * ((uint64_t) 1L << 21);
1910
+ carry6 = (s6 + (int64_t) (1L << 20)) >> 21; s7 += carry6; s6 -= carry6 * ((uint64_t) 1L << 21);
1911
+ carry8 = (s8 + (int64_t) (1L << 20)) >> 21; s9 += carry8; s8 -= carry8 * ((uint64_t) 1L << 21);
1912
+ carry10 = (s10 + (int64_t) (1L << 20)) >> 21; s11 += carry10; s10 -= carry10 * ((uint64_t) 1L << 21);
1913
+
1914
+ carry1 = (s1 + (int64_t) (1L << 20)) >> 21; s2 += carry1; s1 -= carry1 * ((uint64_t) 1L << 21);
1915
+ carry3 = (s3 + (int64_t) (1L << 20)) >> 21; s4 += carry3; s3 -= carry3 * ((uint64_t) 1L << 21);
1916
+ carry5 = (s5 + (int64_t) (1L << 20)) >> 21; s6 += carry5; s5 -= carry5 * ((uint64_t) 1L << 21);
1917
+ carry7 = (s7 + (int64_t) (1L << 20)) >> 21; s8 += carry7; s7 -= carry7 * ((uint64_t) 1L << 21);
1918
+ carry9 = (s9 + (int64_t) (1L << 20)) >> 21; s10 += carry9; s9 -= carry9 * ((uint64_t) 1L << 21);
1919
+ carry11 = (s11 + (int64_t) (1L << 20)) >> 21; s12 += carry11; s11 -= carry11 * ((uint64_t) 1L << 21);
1920
+
1921
+ s0 += s12 * 666643;
1922
+ s1 += s12 * 470296;
1923
+ s2 += s12 * 654183;
1924
+ s3 -= s12 * 997805;
1925
+ s4 += s12 * 136657;
1926
+ s5 -= s12 * 683901;
1927
+ s12 = 0;
1928
+
1929
+ carry0 = s0 >> 21; s1 += carry0; s0 -= carry0 * ((uint64_t) 1L << 21);
1930
+ carry1 = s1 >> 21; s2 += carry1; s1 -= carry1 * ((uint64_t) 1L << 21);
1931
+ carry2 = s2 >> 21; s3 += carry2; s2 -= carry2 * ((uint64_t) 1L << 21);
1932
+ carry3 = s3 >> 21; s4 += carry3; s3 -= carry3 * ((uint64_t) 1L << 21);
1933
+ carry4 = s4 >> 21; s5 += carry4; s4 -= carry4 * ((uint64_t) 1L << 21);
1934
+ carry5 = s5 >> 21; s6 += carry5; s5 -= carry5 * ((uint64_t) 1L << 21);
1935
+ carry6 = s6 >> 21; s7 += carry6; s6 -= carry6 * ((uint64_t) 1L << 21);
1936
+ carry7 = s7 >> 21; s8 += carry7; s7 -= carry7 * ((uint64_t) 1L << 21);
1937
+ carry8 = s8 >> 21; s9 += carry8; s8 -= carry8 * ((uint64_t) 1L << 21);
1938
+ carry9 = s9 >> 21; s10 += carry9; s9 -= carry9 * ((uint64_t) 1L << 21);
1939
+ carry10 = s10 >> 21; s11 += carry10; s10 -= carry10 * ((uint64_t) 1L << 21);
1940
+ carry11 = s11 >> 21; s12 += carry11; s11 -= carry11 * ((uint64_t) 1L << 21);
1941
+
1942
+ s0 += s12 * 666643;
1943
+ s1 += s12 * 470296;
1944
+ s2 += s12 * 654183;
1945
+ s3 -= s12 * 997805;
1946
+ s4 += s12 * 136657;
1947
+ s5 -= s12 * 683901;
1948
+
1949
+ carry0 = s0 >> 21; s1 += carry0; s0 -= carry0 * ((uint64_t) 1L << 21);
1950
+ carry1 = s1 >> 21; s2 += carry1; s1 -= carry1 * ((uint64_t) 1L << 21);
1951
+ carry2 = s2 >> 21; s3 += carry2; s2 -= carry2 * ((uint64_t) 1L << 21);
1952
+ carry3 = s3 >> 21; s4 += carry3; s3 -= carry3 * ((uint64_t) 1L << 21);
1953
+ carry4 = s4 >> 21; s5 += carry4; s4 -= carry4 * ((uint64_t) 1L << 21);
1954
+ carry5 = s5 >> 21; s6 += carry5; s5 -= carry5 * ((uint64_t) 1L << 21);
1955
+ carry6 = s6 >> 21; s7 += carry6; s6 -= carry6 * ((uint64_t) 1L << 21);
1956
+ carry7 = s7 >> 21; s8 += carry7; s7 -= carry7 * ((uint64_t) 1L << 21);
1957
+ carry8 = s8 >> 21; s9 += carry8; s8 -= carry8 * ((uint64_t) 1L << 21);
1958
+ carry9 = s9 >> 21; s10 += carry9; s9 -= carry9 * ((uint64_t) 1L << 21);
1959
+ carry10 = s10 >> 21; s11 += carry10; s10 -= carry10 * ((uint64_t) 1L << 21);
1960
+
1961
+ s[0] = s0 >> 0;
1962
+ s[1] = s0 >> 8;
1963
+ s[2] = (s0 >> 16) | (s1 * ((uint64_t) 1 << 5));
1964
+ s[3] = s1 >> 3;
1965
+ s[4] = s1 >> 11;
1966
+ s[5] = (s1 >> 19) | (s2 * ((uint64_t) 1 << 2));
1967
+ s[6] = s2 >> 6;
1968
+ s[7] = (s2 >> 14) | (s3 * ((uint64_t) 1 << 7));
1969
+ s[8] = s3 >> 1;
1970
+ s[9] = s3 >> 9;
1971
+ s[10] = (s3 >> 17) | (s4 * ((uint64_t) 1 << 4));
1972
+ s[11] = s4 >> 4;
1973
+ s[12] = s4 >> 12;
1974
+ s[13] = (s4 >> 20) | (s5 * ((uint64_t) 1 << 1));
1975
+ s[14] = s5 >> 7;
1976
+ s[15] = (s5 >> 15) | (s6 * ((uint64_t) 1 << 6));
1977
+ s[16] = s6 >> 2;
1978
+ s[17] = s6 >> 10;
1979
+ s[18] = (s6 >> 18) | (s7 * ((uint64_t) 1 << 3));
1980
+ s[19] = s7 >> 5;
1981
+ s[20] = s7 >> 13;
1982
+ s[21] = s8 >> 0;
1983
+ s[22] = s8 >> 8;
1984
+ s[23] = (s8 >> 16) | (s9 * ((uint64_t) 1 << 5));
1985
+ s[24] = s9 >> 3;
1986
+ s[25] = s9 >> 11;
1987
+ s[26] = (s9 >> 19) | (s10 * ((uint64_t) 1 << 2));
1988
+ s[27] = s10 >> 6;
1989
+ s[28] = (s10 >> 14) | (s11 * ((uint64_t) 1 << 7));
1990
+ s[29] = s11 >> 1;
1991
+ s[30] = s11 >> 9;
1992
+ s[31] = s11 >> 17;
1993
+ }
1994
+
1995
+ /*
1996
+ Input:
1997
+ s[0]+256*s[1]+...+256^63*s[63] = s
1998
+
1999
+ Output:
2000
+ s[0]+256*s[1]+...+256^31*s[31] = s mod l
2001
+ where l = 2^252 + 27742317777372353535851937790883648493.
2002
+ Overwrites s in place.
2003
+ */
2004
+
2005
+ void sc_reduce(unsigned char *s)
2006
+ {
2007
+ int64_t s0 = 2097151 & load_3(s);
2008
+ int64_t s1 = 2097151 & (load_4(s + 2) >> 5);
2009
+ int64_t s2 = 2097151 & (load_3(s + 5) >> 2);
2010
+ int64_t s3 = 2097151 & (load_4(s + 7) >> 7);
2011
+ int64_t s4 = 2097151 & (load_4(s + 10) >> 4);
2012
+ int64_t s5 = 2097151 & (load_3(s + 13) >> 1);
2013
+ int64_t s6 = 2097151 & (load_4(s + 15) >> 6);
2014
+ int64_t s7 = 2097151 & (load_3(s + 18) >> 3);
2015
+ int64_t s8 = 2097151 & load_3(s + 21);
2016
+ int64_t s9 = 2097151 & (load_4(s + 23) >> 5);
2017
+ int64_t s10 = 2097151 & (load_3(s + 26) >> 2);
2018
+ int64_t s11 = 2097151 & (load_4(s + 28) >> 7);
2019
+ int64_t s12 = 2097151 & (load_4(s + 31) >> 4);
2020
+ int64_t s13 = 2097151 & (load_3(s + 34) >> 1);
2021
+ int64_t s14 = 2097151 & (load_4(s + 36) >> 6);
2022
+ int64_t s15 = 2097151 & (load_3(s + 39) >> 3);
2023
+ int64_t s16 = 2097151 & load_3(s + 42);
2024
+ int64_t s17 = 2097151 & (load_4(s + 44) >> 5);
2025
+ int64_t s18 = 2097151 & (load_3(s + 47) >> 2);
2026
+ int64_t s19 = 2097151 & (load_4(s + 49) >> 7);
2027
+ int64_t s20 = 2097151 & (load_4(s + 52) >> 4);
2028
+ int64_t s21 = 2097151 & (load_3(s + 55) >> 1);
2029
+ int64_t s22 = 2097151 & (load_4(s + 57) >> 6);
2030
+ int64_t s23 = (load_4(s + 60) >> 3);
2031
+ int64_t carry0;
2032
+ int64_t carry1;
2033
+ int64_t carry2;
2034
+ int64_t carry3;
2035
+ int64_t carry4;
2036
+ int64_t carry5;
2037
+ int64_t carry6;
2038
+ int64_t carry7;
2039
+ int64_t carry8;
2040
+ int64_t carry9;
2041
+ int64_t carry10;
2042
+ int64_t carry11;
2043
+ int64_t carry12;
2044
+ int64_t carry13;
2045
+ int64_t carry14;
2046
+ int64_t carry15;
2047
+ int64_t carry16;
2048
+
2049
+ s11 += s23 * 666643;
2050
+ s12 += s23 * 470296;
2051
+ s13 += s23 * 654183;
2052
+ s14 -= s23 * 997805;
2053
+ s15 += s23 * 136657;
2054
+ s16 -= s23 * 683901;
2055
+
2056
+ s10 += s22 * 666643;
2057
+ s11 += s22 * 470296;
2058
+ s12 += s22 * 654183;
2059
+ s13 -= s22 * 997805;
2060
+ s14 += s22 * 136657;
2061
+ s15 -= s22 * 683901;
2062
+
2063
+ s9 += s21 * 666643;
2064
+ s10 += s21 * 470296;
2065
+ s11 += s21 * 654183;
2066
+ s12 -= s21 * 997805;
2067
+ s13 += s21 * 136657;
2068
+ s14 -= s21 * 683901;
2069
+
2070
+ s8 += s20 * 666643;
2071
+ s9 += s20 * 470296;
2072
+ s10 += s20 * 654183;
2073
+ s11 -= s20 * 997805;
2074
+ s12 += s20 * 136657;
2075
+ s13 -= s20 * 683901;
2076
+
2077
+ s7 += s19 * 666643;
2078
+ s8 += s19 * 470296;
2079
+ s9 += s19 * 654183;
2080
+ s10 -= s19 * 997805;
2081
+ s11 += s19 * 136657;
2082
+ s12 -= s19 * 683901;
2083
+
2084
+ s6 += s18 * 666643;
2085
+ s7 += s18 * 470296;
2086
+ s8 += s18 * 654183;
2087
+ s9 -= s18 * 997805;
2088
+ s10 += s18 * 136657;
2089
+ s11 -= s18 * 683901;
2090
+
2091
+ carry6 = (s6 + (int64_t) (1L << 20)) >> 21; s7 += carry6; s6 -= carry6 * ((uint64_t) 1L << 21);
2092
+ carry8 = (s8 + (int64_t) (1L << 20)) >> 21; s9 += carry8; s8 -= carry8 * ((uint64_t) 1L << 21);
2093
+ carry10 = (s10 + (int64_t) (1L << 20)) >> 21; s11 += carry10; s10 -= carry10 * ((uint64_t) 1L << 21);
2094
+ carry12 = (s12 + (int64_t) (1L << 20)) >> 21; s13 += carry12; s12 -= carry12 * ((uint64_t) 1L << 21);
2095
+ carry14 = (s14 + (int64_t) (1L << 20)) >> 21; s15 += carry14; s14 -= carry14 * ((uint64_t) 1L << 21);
2096
+ carry16 = (s16 + (int64_t) (1L << 20)) >> 21; s17 += carry16; s16 -= carry16 * ((uint64_t) 1L << 21);
2097
+
2098
+ carry7 = (s7 + (int64_t) (1L << 20)) >> 21; s8 += carry7; s7 -= carry7 * ((uint64_t) 1L << 21);
2099
+ carry9 = (s9 + (int64_t) (1L << 20)) >> 21; s10 += carry9; s9 -= carry9 * ((uint64_t) 1L << 21);
2100
+ carry11 = (s11 + (int64_t) (1L << 20)) >> 21; s12 += carry11; s11 -= carry11 * ((uint64_t) 1L << 21);
2101
+ carry13 = (s13 + (int64_t) (1L << 20)) >> 21; s14 += carry13; s13 -= carry13 * ((uint64_t) 1L << 21);
2102
+ carry15 = (s15 + (int64_t) (1L << 20)) >> 21; s16 += carry15; s15 -= carry15 * ((uint64_t) 1L << 21);
2103
+
2104
+ s5 += s17 * 666643;
2105
+ s6 += s17 * 470296;
2106
+ s7 += s17 * 654183;
2107
+ s8 -= s17 * 997805;
2108
+ s9 += s17 * 136657;
2109
+ s10 -= s17 * 683901;
2110
+
2111
+ s4 += s16 * 666643;
2112
+ s5 += s16 * 470296;
2113
+ s6 += s16 * 654183;
2114
+ s7 -= s16 * 997805;
2115
+ s8 += s16 * 136657;
2116
+ s9 -= s16 * 683901;
2117
+
2118
+ s3 += s15 * 666643;
2119
+ s4 += s15 * 470296;
2120
+ s5 += s15 * 654183;
2121
+ s6 -= s15 * 997805;
2122
+ s7 += s15 * 136657;
2123
+ s8 -= s15 * 683901;
2124
+
2125
+ s2 += s14 * 666643;
2126
+ s3 += s14 * 470296;
2127
+ s4 += s14 * 654183;
2128
+ s5 -= s14 * 997805;
2129
+ s6 += s14 * 136657;
2130
+ s7 -= s14 * 683901;
2131
+
2132
+ s1 += s13 * 666643;
2133
+ s2 += s13 * 470296;
2134
+ s3 += s13 * 654183;
2135
+ s4 -= s13 * 997805;
2136
+ s5 += s13 * 136657;
2137
+ s6 -= s13 * 683901;
2138
+
2139
+ s0 += s12 * 666643;
2140
+ s1 += s12 * 470296;
2141
+ s2 += s12 * 654183;
2142
+ s3 -= s12 * 997805;
2143
+ s4 += s12 * 136657;
2144
+ s5 -= s12 * 683901;
2145
+ s12 = 0;
2146
+
2147
+ carry0 = (s0 + (int64_t) (1L << 20)) >> 21; s1 += carry0; s0 -= carry0 * ((uint64_t) 1L << 21);
2148
+ carry2 = (s2 + (int64_t) (1L << 20)) >> 21; s3 += carry2; s2 -= carry2 * ((uint64_t) 1L << 21);
2149
+ carry4 = (s4 + (int64_t) (1L << 20)) >> 21; s5 += carry4; s4 -= carry4 * ((uint64_t) 1L << 21);
2150
+ carry6 = (s6 + (int64_t) (1L << 20)) >> 21; s7 += carry6; s6 -= carry6 * ((uint64_t) 1L << 21);
2151
+ carry8 = (s8 + (int64_t) (1L << 20)) >> 21; s9 += carry8; s8 -= carry8 * ((uint64_t) 1L << 21);
2152
+ carry10 = (s10 + (int64_t) (1L << 20)) >> 21; s11 += carry10; s10 -= carry10 * ((uint64_t) 1L << 21);
2153
+
2154
+ carry1 = (s1 + (int64_t) (1L << 20)) >> 21; s2 += carry1; s1 -= carry1 * ((uint64_t) 1L << 21);
2155
+ carry3 = (s3 + (int64_t) (1L << 20)) >> 21; s4 += carry3; s3 -= carry3 * ((uint64_t) 1L << 21);
2156
+ carry5 = (s5 + (int64_t) (1L << 20)) >> 21; s6 += carry5; s5 -= carry5 * ((uint64_t) 1L << 21);
2157
+ carry7 = (s7 + (int64_t) (1L << 20)) >> 21; s8 += carry7; s7 -= carry7 * ((uint64_t) 1L << 21);
2158
+ carry9 = (s9 + (int64_t) (1L << 20)) >> 21; s10 += carry9; s9 -= carry9 * ((uint64_t) 1L << 21);
2159
+ carry11 = (s11 + (int64_t) (1L << 20)) >> 21; s12 += carry11; s11 -= carry11 * ((uint64_t) 1L << 21);
2160
+
2161
+ s0 += s12 * 666643;
2162
+ s1 += s12 * 470296;
2163
+ s2 += s12 * 654183;
2164
+ s3 -= s12 * 997805;
2165
+ s4 += s12 * 136657;
2166
+ s5 -= s12 * 683901;
2167
+ s12 = 0;
2168
+
2169
+ carry0 = s0 >> 21; s1 += carry0; s0 -= carry0 * ((uint64_t) 1L << 21);
2170
+ carry1 = s1 >> 21; s2 += carry1; s1 -= carry1 * ((uint64_t) 1L << 21);
2171
+ carry2 = s2 >> 21; s3 += carry2; s2 -= carry2 * ((uint64_t) 1L << 21);
2172
+ carry3 = s3 >> 21; s4 += carry3; s3 -= carry3 * ((uint64_t) 1L << 21);
2173
+ carry4 = s4 >> 21; s5 += carry4; s4 -= carry4 * ((uint64_t) 1L << 21);
2174
+ carry5 = s5 >> 21; s6 += carry5; s5 -= carry5 * ((uint64_t) 1L << 21);
2175
+ carry6 = s6 >> 21; s7 += carry6; s6 -= carry6 * ((uint64_t) 1L << 21);
2176
+ carry7 = s7 >> 21; s8 += carry7; s7 -= carry7 * ((uint64_t) 1L << 21);
2177
+ carry8 = s8 >> 21; s9 += carry8; s8 -= carry8 * ((uint64_t) 1L << 21);
2178
+ carry9 = s9 >> 21; s10 += carry9; s9 -= carry9 * ((uint64_t) 1L << 21);
2179
+ carry10 = s10 >> 21; s11 += carry10; s10 -= carry10 * ((uint64_t) 1L << 21);
2180
+ carry11 = s11 >> 21; s12 += carry11; s11 -= carry11 * ((uint64_t) 1L << 21);
2181
+
2182
+ s0 += s12 * 666643;
2183
+ s1 += s12 * 470296;
2184
+ s2 += s12 * 654183;
2185
+ s3 -= s12 * 997805;
2186
+ s4 += s12 * 136657;
2187
+ s5 -= s12 * 683901;
2188
+
2189
+ carry0 = s0 >> 21; s1 += carry0; s0 -= carry0 * ((uint64_t) 1L << 21);
2190
+ carry1 = s1 >> 21; s2 += carry1; s1 -= carry1 * ((uint64_t) 1L << 21);
2191
+ carry2 = s2 >> 21; s3 += carry2; s2 -= carry2 * ((uint64_t) 1L << 21);
2192
+ carry3 = s3 >> 21; s4 += carry3; s3 -= carry3 * ((uint64_t) 1L << 21);
2193
+ carry4 = s4 >> 21; s5 += carry4; s4 -= carry4 * ((uint64_t) 1L << 21);
2194
+ carry5 = s5 >> 21; s6 += carry5; s5 -= carry5 * ((uint64_t) 1L << 21);
2195
+ carry6 = s6 >> 21; s7 += carry6; s6 -= carry6 * ((uint64_t) 1L << 21);
2196
+ carry7 = s7 >> 21; s8 += carry7; s7 -= carry7 * ((uint64_t) 1L << 21);
2197
+ carry8 = s8 >> 21; s9 += carry8; s8 -= carry8 * ((uint64_t) 1L << 21);
2198
+ carry9 = s9 >> 21; s10 += carry9; s9 -= carry9 * ((uint64_t) 1L << 21);
2199
+ carry10 = s10 >> 21; s11 += carry10; s10 -= carry10 * ((uint64_t) 1L << 21);
2200
+
2201
+ s[0] = s0 >> 0;
2202
+ s[1] = s0 >> 8;
2203
+ s[2] = (s0 >> 16) | (s1 * ((uint64_t) 1 << 5));
2204
+ s[3] = s1 >> 3;
2205
+ s[4] = s1 >> 11;
2206
+ s[5] = (s1 >> 19) | (s2 * ((uint64_t) 1 << 2));
2207
+ s[6] = s2 >> 6;
2208
+ s[7] = (s2 >> 14) | (s3 * ((uint64_t) 1 << 7));
2209
+ s[8] = s3 >> 1;
2210
+ s[9] = s3 >> 9;
2211
+ s[10] = (s3 >> 17) | (s4 * ((uint64_t) 1 << 4));
2212
+ s[11] = s4 >> 4;
2213
+ s[12] = s4 >> 12;
2214
+ s[13] = (s4 >> 20) | (s5 * ((uint64_t) 1 << 1));
2215
+ s[14] = s5 >> 7;
2216
+ s[15] = (s5 >> 15) | (s6 * ((uint64_t) 1 << 6));
2217
+ s[16] = s6 >> 2;
2218
+ s[17] = s6 >> 10;
2219
+ s[18] = (s6 >> 18) | (s7 * ((uint64_t) 1 << 3));
2220
+ s[19] = s7 >> 5;
2221
+ s[20] = s7 >> 13;
2222
+ s[21] = s8 >> 0;
2223
+ s[22] = s8 >> 8;
2224
+ s[23] = (s8 >> 16) | (s9 * ((uint64_t) 1 << 5));
2225
+ s[24] = s9 >> 3;
2226
+ s[25] = s9 >> 11;
2227
+ s[26] = (s9 >> 19) | (s10 * ((uint64_t) 1 << 2));
2228
+ s[27] = s10 >> 6;
2229
+ s[28] = (s10 >> 14) | (s11 * ((uint64_t) 1 << 7));
2230
+ s[29] = s11 >> 1;
2231
+ s[30] = s11 >> 9;
2232
+ s[31] = s11 >> 17;
2233
+ }