ed25519_blake2b 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/CODE_OF_CONDUCT.md +74 -0
  4. data/Gemfile +6 -0
  5. data/Gemfile.lock +23 -0
  6. data/LICENSE +21 -0
  7. data/README.md +39 -0
  8. data/Rakefile +13 -0
  9. data/bin/console +14 -0
  10. data/bin/setup +8 -0
  11. data/ed25519_blake2b.gemspec +31 -0
  12. data/ext/ed25519_blake2b/blake2-config.h +72 -0
  13. data/ext/ed25519_blake2b/blake2-impl.h +160 -0
  14. data/ext/ed25519_blake2b/blake2.h +195 -0
  15. data/ext/ed25519_blake2b/blake2b-load-sse2.h +68 -0
  16. data/ext/ed25519_blake2b/blake2b-load-sse41.h +402 -0
  17. data/ext/ed25519_blake2b/blake2b-ref.c +373 -0
  18. data/ext/ed25519_blake2b/blake2b-round.h +157 -0
  19. data/ext/ed25519_blake2b/curve25519-donna-32bit.h +579 -0
  20. data/ext/ed25519_blake2b/curve25519-donna-64bit.h +413 -0
  21. data/ext/ed25519_blake2b/curve25519-donna-helpers.h +67 -0
  22. data/ext/ed25519_blake2b/curve25519-donna-sse2.h +1112 -0
  23. data/ext/ed25519_blake2b/ed25519-donna-32bit-sse2.h +513 -0
  24. data/ext/ed25519_blake2b/ed25519-donna-32bit-tables.h +61 -0
  25. data/ext/ed25519_blake2b/ed25519-donna-64bit-sse2.h +436 -0
  26. data/ext/ed25519_blake2b/ed25519-donna-64bit-tables.h +53 -0
  27. data/ext/ed25519_blake2b/ed25519-donna-64bit-x86-32bit.h +435 -0
  28. data/ext/ed25519_blake2b/ed25519-donna-64bit-x86.h +351 -0
  29. data/ext/ed25519_blake2b/ed25519-donna-basepoint-table.h +259 -0
  30. data/ext/ed25519_blake2b/ed25519-donna-batchverify.h +275 -0
  31. data/ext/ed25519_blake2b/ed25519-donna-impl-base.h +364 -0
  32. data/ext/ed25519_blake2b/ed25519-donna-impl-sse2.h +390 -0
  33. data/ext/ed25519_blake2b/ed25519-donna-portable-identify.h +103 -0
  34. data/ext/ed25519_blake2b/ed25519-donna-portable.h +135 -0
  35. data/ext/ed25519_blake2b/ed25519-donna.h +115 -0
  36. data/ext/ed25519_blake2b/ed25519-hash-custom.c +28 -0
  37. data/ext/ed25519_blake2b/ed25519-hash-custom.h +30 -0
  38. data/ext/ed25519_blake2b/ed25519-hash.h +219 -0
  39. data/ext/ed25519_blake2b/ed25519-randombytes-custom.h +10 -0
  40. data/ext/ed25519_blake2b/ed25519-randombytes.h +91 -0
  41. data/ext/ed25519_blake2b/ed25519.c +150 -0
  42. data/ext/ed25519_blake2b/ed25519.h +30 -0
  43. data/ext/ed25519_blake2b/extconf.rb +3 -0
  44. data/ext/ed25519_blake2b/fuzz/README.md +173 -0
  45. data/ext/ed25519_blake2b/fuzz/build-nix.php +134 -0
  46. data/ext/ed25519_blake2b/fuzz/curve25519-ref10.c +1272 -0
  47. data/ext/ed25519_blake2b/fuzz/curve25519-ref10.h +8 -0
  48. data/ext/ed25519_blake2b/fuzz/ed25519-donna-sse2.c +3 -0
  49. data/ext/ed25519_blake2b/fuzz/ed25519-donna.c +1 -0
  50. data/ext/ed25519_blake2b/fuzz/ed25519-donna.h +34 -0
  51. data/ext/ed25519_blake2b/fuzz/ed25519-ref10.c +4647 -0
  52. data/ext/ed25519_blake2b/fuzz/ed25519-ref10.h +9 -0
  53. data/ext/ed25519_blake2b/fuzz/fuzz-curve25519.c +172 -0
  54. data/ext/ed25519_blake2b/fuzz/fuzz-ed25519.c +219 -0
  55. data/ext/ed25519_blake2b/modm-donna-32bit.h +469 -0
  56. data/ext/ed25519_blake2b/modm-donna-64bit.h +361 -0
  57. data/ext/ed25519_blake2b/rbext.c +25 -0
  58. data/ext/ed25519_blake2b/regression.h +1024 -0
  59. data/lib/ed25519_blake2b/ed25519_blake2b.rb +4 -0
  60. data/lib/ed25519_blake2b/version.rb +3 -0
  61. metadata +147 -0
@@ -0,0 +1,134 @@
1
+ <?php
2
+ function echoln($str) {
3
+ echo $str;
4
+ echo "\n";
5
+ }
6
+
7
+ function usage($reason) {
8
+ echoln("Usage: php build-nix.php [flags]");
9
+ echoln("Flags in parantheses are optional");
10
+ echoln("");
11
+ echoln(" --bits=[32,64]");
12
+ echoln(" --function=[curve25519,ed25519]");
13
+ echoln(" (--compiler=[*gcc,clang,icc]) which compiler to use, gcc is default");
14
+ echoln(" (--with-openssl) use openssl for SHA512");
15
+ echoln(" (--with-sse2) additionally fuzz against SSE2");
16
+ echoln(" (--no-asm) don't use platform specific asm");
17
+ echoln("");
18
+ if ($reason)
19
+ echoln($reason);
20
+ }
21
+
22
+ function cleanup() {
23
+ system("rm -f *.o");
24
+ }
25
+
26
+ function runcmd($desc, $cmd) {
27
+ echoln($desc);
28
+
29
+ $ret = 0;
30
+ system($cmd, $ret);
31
+ if ($ret) {
32
+ cleanup();
33
+ exit;
34
+ }
35
+ }
36
+
37
+ class argument {
38
+ var $set, $value;
39
+ }
40
+
41
+ class multiargument extends argument {
42
+ function multiargument($flag, $legal_values) {
43
+ global $argc, $argv;
44
+
45
+ $this->set = false;
46
+
47
+ $map = array();
48
+ foreach($legal_values as $value)
49
+ $map[$value] = true;
50
+
51
+ for ($i = 1; $i < $argc; $i++) {
52
+ if (!preg_match("!--".$flag."=(.*)!", $argv[$i], $m))
53
+ continue;
54
+ if (isset($map[$m[1]])) {
55
+ $this->value = $m[1];
56
+ $this->set = true;
57
+ return;
58
+ } else {
59
+ usage("{$m[1]} is not a valid parameter to --{$flag}!");
60
+ exit(1);
61
+ }
62
+ }
63
+ }
64
+ }
65
+
66
+ class flag extends argument {
67
+ function flag($flag) {
68
+ global $argc, $argv;
69
+
70
+ $this->set = false;
71
+
72
+ $flag = "--{$flag}";
73
+ for ($i = 1; $i < $argc; $i++) {
74
+ if ($argv[$i] !== $flag)
75
+ continue;
76
+ $this->value = true;
77
+ $this->set = true;
78
+ return;
79
+ }
80
+ }
81
+ }
82
+
83
+ $bits = new multiargument("bits", array("32", "64"));
84
+ $function = new multiargument("function", array("curve25519", "ed25519"));
85
+ $compiler = new multiargument("compiler", array("gcc", "clang", "icc"));
86
+ $with_sse2 = new flag("with-sse2");
87
+ $with_openssl = new flag("with-openssl");
88
+ $no_asm = new flag("no-asm");
89
+
90
+ $err = "";
91
+ if (!$bits->set)
92
+ $err .= "--bits not set\n";
93
+ if (!$function->set)
94
+ $err .= "--function not set\n";
95
+
96
+ if ($err !== "") {
97
+ usage($err);
98
+ exit;
99
+ }
100
+
101
+ $compile = ($compiler->set) ? $compiler->value : "gcc";
102
+ $link = "";
103
+ $flags = "-O3 -m{$bits->value}";
104
+ $ret = 0;
105
+
106
+ if ($with_openssl->set) $link .= " -lssl -lcrypto";
107
+ if (!$with_openssl->set) $flags .= " -DED25519_REFHASH -DED25519_TEST";
108
+ if ($no_asm->set) $flags .= " -DED25519_NO_INLINE_ASM";
109
+
110
+ if ($function->value === "curve25519") {
111
+ runcmd("building ref10..", "{$compile} {$flags} curve25519-ref10.c -c -o curve25519-ref10.o");
112
+ runcmd("building ed25519..", "{$compile} {$flags} ed25519-donna.c -c -o ed25519.o");
113
+ if ($with_sse2->set) {
114
+ runcmd("building ed25519-sse2..", "{$compile} {$flags} ed25519-donna-sse2.c -c -o ed25519-sse2.o -msse2");
115
+ $flags .= " -DED25519_SSE2";
116
+ $link .= " ed25519-sse2.o";
117
+ }
118
+ runcmd("linking..", "{$compile} {$flags} {$link} fuzz-curve25519.c ed25519.o curve25519-ref10.o -o fuzz-curve25519");
119
+ echoln("fuzz-curve25519 built.");
120
+ } else if ($function->value === "ed25519") {
121
+ runcmd("building ref10..", "{$compile} {$flags} ed25519-ref10.c -c -o ed25519-ref10.o");
122
+ runcmd("building ed25519..", "{$compile} {$flags} ed25519-donna.c -c -o ed25519.o");
123
+ if ($with_sse2->set) {
124
+ runcmd("building ed25519-sse2..", "{$compile} {$flags} ed25519-donna-sse2.c -c -o ed25519-sse2.o -msse2");
125
+ $flags .= " -DED25519_SSE2";
126
+ $link .= " ed25519-sse2.o";
127
+ }
128
+ runcmd("linking..", "{$compile} {$flags} {$link} fuzz-ed25519.c ed25519.o ed25519-ref10.o -o fuzz-ed25519");
129
+ echoln("fuzz-ed25519 built.");
130
+ }
131
+
132
+
133
+ cleanup();
134
+ ?>
@@ -0,0 +1,1272 @@
1
+ #include <stdint.h>
2
+
3
+ typedef int32_t crypto_int32;
4
+ typedef int64_t crypto_int64;
5
+ typedef uint64_t crypto_uint64;
6
+
7
+ typedef crypto_int32 fe[10];
8
+
9
+ /*
10
+ h = 0
11
+ */
12
+
13
+ void fe_0(fe h)
14
+ {
15
+ h[0] = 0;
16
+ h[1] = 0;
17
+ h[2] = 0;
18
+ h[3] = 0;
19
+ h[4] = 0;
20
+ h[5] = 0;
21
+ h[6] = 0;
22
+ h[7] = 0;
23
+ h[8] = 0;
24
+ h[9] = 0;
25
+ }
26
+
27
+ /*
28
+ h = 1
29
+ */
30
+
31
+ void fe_1(fe h)
32
+ {
33
+ h[0] = 1;
34
+ h[1] = 0;
35
+ h[2] = 0;
36
+ h[3] = 0;
37
+ h[4] = 0;
38
+ h[5] = 0;
39
+ h[6] = 0;
40
+ h[7] = 0;
41
+ h[8] = 0;
42
+ h[9] = 0;
43
+ }
44
+
45
+ /*
46
+ h = f + g
47
+ Can overlap h with f or g.
48
+
49
+ Preconditions:
50
+ |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
51
+ |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
52
+
53
+ Postconditions:
54
+ |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
55
+ */
56
+
57
+ void fe_add(fe h,fe f,fe g)
58
+ {
59
+ crypto_int32 f0 = f[0];
60
+ crypto_int32 f1 = f[1];
61
+ crypto_int32 f2 = f[2];
62
+ crypto_int32 f3 = f[3];
63
+ crypto_int32 f4 = f[4];
64
+ crypto_int32 f5 = f[5];
65
+ crypto_int32 f6 = f[6];
66
+ crypto_int32 f7 = f[7];
67
+ crypto_int32 f8 = f[8];
68
+ crypto_int32 f9 = f[9];
69
+ crypto_int32 g0 = g[0];
70
+ crypto_int32 g1 = g[1];
71
+ crypto_int32 g2 = g[2];
72
+ crypto_int32 g3 = g[3];
73
+ crypto_int32 g4 = g[4];
74
+ crypto_int32 g5 = g[5];
75
+ crypto_int32 g6 = g[6];
76
+ crypto_int32 g7 = g[7];
77
+ crypto_int32 g8 = g[8];
78
+ crypto_int32 g9 = g[9];
79
+ crypto_int32 h0 = f0 + g0;
80
+ crypto_int32 h1 = f1 + g1;
81
+ crypto_int32 h2 = f2 + g2;
82
+ crypto_int32 h3 = f3 + g3;
83
+ crypto_int32 h4 = f4 + g4;
84
+ crypto_int32 h5 = f5 + g5;
85
+ crypto_int32 h6 = f6 + g6;
86
+ crypto_int32 h7 = f7 + g7;
87
+ crypto_int32 h8 = f8 + g8;
88
+ crypto_int32 h9 = f9 + g9;
89
+ h[0] = h0;
90
+ h[1] = h1;
91
+ h[2] = h2;
92
+ h[3] = h3;
93
+ h[4] = h4;
94
+ h[5] = h5;
95
+ h[6] = h6;
96
+ h[7] = h7;
97
+ h[8] = h8;
98
+ h[9] = h9;
99
+ }
100
+
101
+ /*
102
+ h = f
103
+ */
104
+
105
+ void fe_copy(fe h,fe f)
106
+ {
107
+ crypto_int32 f0 = f[0];
108
+ crypto_int32 f1 = f[1];
109
+ crypto_int32 f2 = f[2];
110
+ crypto_int32 f3 = f[3];
111
+ crypto_int32 f4 = f[4];
112
+ crypto_int32 f5 = f[5];
113
+ crypto_int32 f6 = f[6];
114
+ crypto_int32 f7 = f[7];
115
+ crypto_int32 f8 = f[8];
116
+ crypto_int32 f9 = f[9];
117
+ h[0] = f0;
118
+ h[1] = f1;
119
+ h[2] = f2;
120
+ h[3] = f3;
121
+ h[4] = f4;
122
+ h[5] = f5;
123
+ h[6] = f6;
124
+ h[7] = f7;
125
+ h[8] = f8;
126
+ h[9] = f9;
127
+ }
128
+
129
+
130
+ /*
131
+ Replace (f,g) with (g,f) if b == 1;
132
+ replace (f,g) with (f,g) if b == 0.
133
+
134
+ Preconditions: b in {0,1}.
135
+ */
136
+
137
+ void fe_cswap(fe f,fe g,unsigned int b)
138
+ {
139
+ crypto_int32 f0 = f[0];
140
+ crypto_int32 f1 = f[1];
141
+ crypto_int32 f2 = f[2];
142
+ crypto_int32 f3 = f[3];
143
+ crypto_int32 f4 = f[4];
144
+ crypto_int32 f5 = f[5];
145
+ crypto_int32 f6 = f[6];
146
+ crypto_int32 f7 = f[7];
147
+ crypto_int32 f8 = f[8];
148
+ crypto_int32 f9 = f[9];
149
+ crypto_int32 g0 = g[0];
150
+ crypto_int32 g1 = g[1];
151
+ crypto_int32 g2 = g[2];
152
+ crypto_int32 g3 = g[3];
153
+ crypto_int32 g4 = g[4];
154
+ crypto_int32 g5 = g[5];
155
+ crypto_int32 g6 = g[6];
156
+ crypto_int32 g7 = g[7];
157
+ crypto_int32 g8 = g[8];
158
+ crypto_int32 g9 = g[9];
159
+ crypto_int32 x0 = f0 ^ g0;
160
+ crypto_int32 x1 = f1 ^ g1;
161
+ crypto_int32 x2 = f2 ^ g2;
162
+ crypto_int32 x3 = f3 ^ g3;
163
+ crypto_int32 x4 = f4 ^ g4;
164
+ crypto_int32 x5 = f5 ^ g5;
165
+ crypto_int32 x6 = f6 ^ g6;
166
+ crypto_int32 x7 = f7 ^ g7;
167
+ crypto_int32 x8 = f8 ^ g8;
168
+ crypto_int32 x9 = f9 ^ g9;
169
+ b = -b;
170
+ x0 &= b;
171
+ x1 &= b;
172
+ x2 &= b;
173
+ x3 &= b;
174
+ x4 &= b;
175
+ x5 &= b;
176
+ x6 &= b;
177
+ x7 &= b;
178
+ x8 &= b;
179
+ x9 &= b;
180
+ f[0] = f0 ^ x0;
181
+ f[1] = f1 ^ x1;
182
+ f[2] = f2 ^ x2;
183
+ f[3] = f3 ^ x3;
184
+ f[4] = f4 ^ x4;
185
+ f[5] = f5 ^ x5;
186
+ f[6] = f6 ^ x6;
187
+ f[7] = f7 ^ x7;
188
+ f[8] = f8 ^ x8;
189
+ f[9] = f9 ^ x9;
190
+ g[0] = g0 ^ x0;
191
+ g[1] = g1 ^ x1;
192
+ g[2] = g2 ^ x2;
193
+ g[3] = g3 ^ x3;
194
+ g[4] = g4 ^ x4;
195
+ g[5] = g5 ^ x5;
196
+ g[6] = g6 ^ x6;
197
+ g[7] = g7 ^ x7;
198
+ g[8] = g8 ^ x8;
199
+ g[9] = g9 ^ x9;
200
+ }
201
+
202
+ static crypto_uint64 load_3(const unsigned char *in)
203
+ {
204
+ crypto_uint64 result;
205
+ result = (crypto_uint64) in[0];
206
+ result |= ((crypto_uint64) in[1]) << 8;
207
+ result |= ((crypto_uint64) in[2]) << 16;
208
+ return result;
209
+ }
210
+
211
+ static crypto_uint64 load_4(const unsigned char *in)
212
+ {
213
+ crypto_uint64 result;
214
+ result = (crypto_uint64) in[0];
215
+ result |= ((crypto_uint64) in[1]) << 8;
216
+ result |= ((crypto_uint64) in[2]) << 16;
217
+ result |= ((crypto_uint64) in[3]) << 24;
218
+ return result;
219
+ }
220
+
221
+ void fe_frombytes(fe h,const unsigned char *s)
222
+ {
223
+ crypto_int64 h0 = load_4(s);
224
+ crypto_int64 h1 = load_3(s + 4) << 6;
225
+ crypto_int64 h2 = load_3(s + 7) << 5;
226
+ crypto_int64 h3 = load_3(s + 10) << 3;
227
+ crypto_int64 h4 = load_3(s + 13) << 2;
228
+ crypto_int64 h5 = load_4(s + 16);
229
+ crypto_int64 h6 = load_3(s + 20) << 7;
230
+ crypto_int64 h7 = load_3(s + 23) << 5;
231
+ crypto_int64 h8 = load_3(s + 26) << 4;
232
+ crypto_int64 h9 = load_3(s + 29) << 2;
233
+ crypto_int64 carry0;
234
+ crypto_int64 carry1;
235
+ crypto_int64 carry2;
236
+ crypto_int64 carry3;
237
+ crypto_int64 carry4;
238
+ crypto_int64 carry5;
239
+ crypto_int64 carry6;
240
+ crypto_int64 carry7;
241
+ crypto_int64 carry8;
242
+ crypto_int64 carry9;
243
+
244
+ carry9 = (h9 + (crypto_int64) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
245
+ carry1 = (h1 + (crypto_int64) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
246
+ carry3 = (h3 + (crypto_int64) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
247
+ carry5 = (h5 + (crypto_int64) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
248
+ carry7 = (h7 + (crypto_int64) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
249
+
250
+ carry0 = (h0 + (crypto_int64) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
251
+ carry2 = (h2 + (crypto_int64) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
252
+ carry4 = (h4 + (crypto_int64) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
253
+ carry6 = (h6 + (crypto_int64) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
254
+ carry8 = (h8 + (crypto_int64) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
255
+
256
+ h[0] = h0;
257
+ h[1] = h1;
258
+ h[2] = h2;
259
+ h[3] = h3;
260
+ h[4] = h4;
261
+ h[5] = h5;
262
+ h[6] = h6;
263
+ h[7] = h7;
264
+ h[8] = h8;
265
+ h[9] = h9;
266
+ }
267
+
268
+
269
+ /*
270
+ h = f * g
271
+ Can overlap h with f or g.
272
+
273
+ Preconditions:
274
+ |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
275
+ |g| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
276
+
277
+ Postconditions:
278
+ |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
279
+ */
280
+
281
+ /*
282
+ Notes on implementation strategy:
283
+
284
+ Using schoolbook multiplication.
285
+ Karatsuba would save a little in some cost models.
286
+
287
+ Most multiplications by 2 and 19 are 32-bit precomputations;
288
+ cheaper than 64-bit postcomputations.
289
+
290
+ There is one remaining multiplication by 19 in the carry chain;
291
+ one *19 precomputation can be merged into this,
292
+ but the resulting data flow is considerably less clean.
293
+
294
+ There are 12 carries below.
295
+ 10 of them are 2-way parallelizable and vectorizable.
296
+ Can get away with 11 carries, but then data flow is much deeper.
297
+
298
+ With tighter constraints on inputs can squeeze carries into int32.
299
+ */
300
+
301
+ void fe_mul(fe h,fe f,fe g)
302
+ {
303
+ crypto_int32 f0 = f[0];
304
+ crypto_int32 f1 = f[1];
305
+ crypto_int32 f2 = f[2];
306
+ crypto_int32 f3 = f[3];
307
+ crypto_int32 f4 = f[4];
308
+ crypto_int32 f5 = f[5];
309
+ crypto_int32 f6 = f[6];
310
+ crypto_int32 f7 = f[7];
311
+ crypto_int32 f8 = f[8];
312
+ crypto_int32 f9 = f[9];
313
+ crypto_int32 g0 = g[0];
314
+ crypto_int32 g1 = g[1];
315
+ crypto_int32 g2 = g[2];
316
+ crypto_int32 g3 = g[3];
317
+ crypto_int32 g4 = g[4];
318
+ crypto_int32 g5 = g[5];
319
+ crypto_int32 g6 = g[6];
320
+ crypto_int32 g7 = g[7];
321
+ crypto_int32 g8 = g[8];
322
+ crypto_int32 g9 = g[9];
323
+ crypto_int32 g1_19 = 19 * g1; /* 1.4*2^29 */
324
+ crypto_int32 g2_19 = 19 * g2; /* 1.4*2^30; still ok */
325
+ crypto_int32 g3_19 = 19 * g3;
326
+ crypto_int32 g4_19 = 19 * g4;
327
+ crypto_int32 g5_19 = 19 * g5;
328
+ crypto_int32 g6_19 = 19 * g6;
329
+ crypto_int32 g7_19 = 19 * g7;
330
+ crypto_int32 g8_19 = 19 * g8;
331
+ crypto_int32 g9_19 = 19 * g9;
332
+ crypto_int32 f1_2 = 2 * f1;
333
+ crypto_int32 f3_2 = 2 * f3;
334
+ crypto_int32 f5_2 = 2 * f5;
335
+ crypto_int32 f7_2 = 2 * f7;
336
+ crypto_int32 f9_2 = 2 * f9;
337
+ crypto_int64 f0g0 = f0 * (crypto_int64) g0;
338
+ crypto_int64 f0g1 = f0 * (crypto_int64) g1;
339
+ crypto_int64 f0g2 = f0 * (crypto_int64) g2;
340
+ crypto_int64 f0g3 = f0 * (crypto_int64) g3;
341
+ crypto_int64 f0g4 = f0 * (crypto_int64) g4;
342
+ crypto_int64 f0g5 = f0 * (crypto_int64) g5;
343
+ crypto_int64 f0g6 = f0 * (crypto_int64) g6;
344
+ crypto_int64 f0g7 = f0 * (crypto_int64) g7;
345
+ crypto_int64 f0g8 = f0 * (crypto_int64) g8;
346
+ crypto_int64 f0g9 = f0 * (crypto_int64) g9;
347
+ crypto_int64 f1g0 = f1 * (crypto_int64) g0;
348
+ crypto_int64 f1g1_2 = f1_2 * (crypto_int64) g1;
349
+ crypto_int64 f1g2 = f1 * (crypto_int64) g2;
350
+ crypto_int64 f1g3_2 = f1_2 * (crypto_int64) g3;
351
+ crypto_int64 f1g4 = f1 * (crypto_int64) g4;
352
+ crypto_int64 f1g5_2 = f1_2 * (crypto_int64) g5;
353
+ crypto_int64 f1g6 = f1 * (crypto_int64) g6;
354
+ crypto_int64 f1g7_2 = f1_2 * (crypto_int64) g7;
355
+ crypto_int64 f1g8 = f1 * (crypto_int64) g8;
356
+ crypto_int64 f1g9_38 = f1_2 * (crypto_int64) g9_19;
357
+ crypto_int64 f2g0 = f2 * (crypto_int64) g0;
358
+ crypto_int64 f2g1 = f2 * (crypto_int64) g1;
359
+ crypto_int64 f2g2 = f2 * (crypto_int64) g2;
360
+ crypto_int64 f2g3 = f2 * (crypto_int64) g3;
361
+ crypto_int64 f2g4 = f2 * (crypto_int64) g4;
362
+ crypto_int64 f2g5 = f2 * (crypto_int64) g5;
363
+ crypto_int64 f2g6 = f2 * (crypto_int64) g6;
364
+ crypto_int64 f2g7 = f2 * (crypto_int64) g7;
365
+ crypto_int64 f2g8_19 = f2 * (crypto_int64) g8_19;
366
+ crypto_int64 f2g9_19 = f2 * (crypto_int64) g9_19;
367
+ crypto_int64 f3g0 = f3 * (crypto_int64) g0;
368
+ crypto_int64 f3g1_2 = f3_2 * (crypto_int64) g1;
369
+ crypto_int64 f3g2 = f3 * (crypto_int64) g2;
370
+ crypto_int64 f3g3_2 = f3_2 * (crypto_int64) g3;
371
+ crypto_int64 f3g4 = f3 * (crypto_int64) g4;
372
+ crypto_int64 f3g5_2 = f3_2 * (crypto_int64) g5;
373
+ crypto_int64 f3g6 = f3 * (crypto_int64) g6;
374
+ crypto_int64 f3g7_38 = f3_2 * (crypto_int64) g7_19;
375
+ crypto_int64 f3g8_19 = f3 * (crypto_int64) g8_19;
376
+ crypto_int64 f3g9_38 = f3_2 * (crypto_int64) g9_19;
377
+ crypto_int64 f4g0 = f4 * (crypto_int64) g0;
378
+ crypto_int64 f4g1 = f4 * (crypto_int64) g1;
379
+ crypto_int64 f4g2 = f4 * (crypto_int64) g2;
380
+ crypto_int64 f4g3 = f4 * (crypto_int64) g3;
381
+ crypto_int64 f4g4 = f4 * (crypto_int64) g4;
382
+ crypto_int64 f4g5 = f4 * (crypto_int64) g5;
383
+ crypto_int64 f4g6_19 = f4 * (crypto_int64) g6_19;
384
+ crypto_int64 f4g7_19 = f4 * (crypto_int64) g7_19;
385
+ crypto_int64 f4g8_19 = f4 * (crypto_int64) g8_19;
386
+ crypto_int64 f4g9_19 = f4 * (crypto_int64) g9_19;
387
+ crypto_int64 f5g0 = f5 * (crypto_int64) g0;
388
+ crypto_int64 f5g1_2 = f5_2 * (crypto_int64) g1;
389
+ crypto_int64 f5g2 = f5 * (crypto_int64) g2;
390
+ crypto_int64 f5g3_2 = f5_2 * (crypto_int64) g3;
391
+ crypto_int64 f5g4 = f5 * (crypto_int64) g4;
392
+ crypto_int64 f5g5_38 = f5_2 * (crypto_int64) g5_19;
393
+ crypto_int64 f5g6_19 = f5 * (crypto_int64) g6_19;
394
+ crypto_int64 f5g7_38 = f5_2 * (crypto_int64) g7_19;
395
+ crypto_int64 f5g8_19 = f5 * (crypto_int64) g8_19;
396
+ crypto_int64 f5g9_38 = f5_2 * (crypto_int64) g9_19;
397
+ crypto_int64 f6g0 = f6 * (crypto_int64) g0;
398
+ crypto_int64 f6g1 = f6 * (crypto_int64) g1;
399
+ crypto_int64 f6g2 = f6 * (crypto_int64) g2;
400
+ crypto_int64 f6g3 = f6 * (crypto_int64) g3;
401
+ crypto_int64 f6g4_19 = f6 * (crypto_int64) g4_19;
402
+ crypto_int64 f6g5_19 = f6 * (crypto_int64) g5_19;
403
+ crypto_int64 f6g6_19 = f6 * (crypto_int64) g6_19;
404
+ crypto_int64 f6g7_19 = f6 * (crypto_int64) g7_19;
405
+ crypto_int64 f6g8_19 = f6 * (crypto_int64) g8_19;
406
+ crypto_int64 f6g9_19 = f6 * (crypto_int64) g9_19;
407
+ crypto_int64 f7g0 = f7 * (crypto_int64) g0;
408
+ crypto_int64 f7g1_2 = f7_2 * (crypto_int64) g1;
409
+ crypto_int64 f7g2 = f7 * (crypto_int64) g2;
410
+ crypto_int64 f7g3_38 = f7_2 * (crypto_int64) g3_19;
411
+ crypto_int64 f7g4_19 = f7 * (crypto_int64) g4_19;
412
+ crypto_int64 f7g5_38 = f7_2 * (crypto_int64) g5_19;
413
+ crypto_int64 f7g6_19 = f7 * (crypto_int64) g6_19;
414
+ crypto_int64 f7g7_38 = f7_2 * (crypto_int64) g7_19;
415
+ crypto_int64 f7g8_19 = f7 * (crypto_int64) g8_19;
416
+ crypto_int64 f7g9_38 = f7_2 * (crypto_int64) g9_19;
417
+ crypto_int64 f8g0 = f8 * (crypto_int64) g0;
418
+ crypto_int64 f8g1 = f8 * (crypto_int64) g1;
419
+ crypto_int64 f8g2_19 = f8 * (crypto_int64) g2_19;
420
+ crypto_int64 f8g3_19 = f8 * (crypto_int64) g3_19;
421
+ crypto_int64 f8g4_19 = f8 * (crypto_int64) g4_19;
422
+ crypto_int64 f8g5_19 = f8 * (crypto_int64) g5_19;
423
+ crypto_int64 f8g6_19 = f8 * (crypto_int64) g6_19;
424
+ crypto_int64 f8g7_19 = f8 * (crypto_int64) g7_19;
425
+ crypto_int64 f8g8_19 = f8 * (crypto_int64) g8_19;
426
+ crypto_int64 f8g9_19 = f8 * (crypto_int64) g9_19;
427
+ crypto_int64 f9g0 = f9 * (crypto_int64) g0;
428
+ crypto_int64 f9g1_38 = f9_2 * (crypto_int64) g1_19;
429
+ crypto_int64 f9g2_19 = f9 * (crypto_int64) g2_19;
430
+ crypto_int64 f9g3_38 = f9_2 * (crypto_int64) g3_19;
431
+ crypto_int64 f9g4_19 = f9 * (crypto_int64) g4_19;
432
+ crypto_int64 f9g5_38 = f9_2 * (crypto_int64) g5_19;
433
+ crypto_int64 f9g6_19 = f9 * (crypto_int64) g6_19;
434
+ crypto_int64 f9g7_38 = f9_2 * (crypto_int64) g7_19;
435
+ crypto_int64 f9g8_19 = f9 * (crypto_int64) g8_19;
436
+ crypto_int64 f9g9_38 = f9_2 * (crypto_int64) g9_19;
437
+ crypto_int64 h0 = f0g0+f1g9_38+f2g8_19+f3g7_38+f4g6_19+f5g5_38+f6g4_19+f7g3_38+f8g2_19+f9g1_38;
438
+ crypto_int64 h1 = f0g1+f1g0 +f2g9_19+f3g8_19+f4g7_19+f5g6_19+f6g5_19+f7g4_19+f8g3_19+f9g2_19;
439
+ crypto_int64 h2 = f0g2+f1g1_2 +f2g0 +f3g9_38+f4g8_19+f5g7_38+f6g6_19+f7g5_38+f8g4_19+f9g3_38;
440
+ crypto_int64 h3 = f0g3+f1g2 +f2g1 +f3g0 +f4g9_19+f5g8_19+f6g7_19+f7g6_19+f8g5_19+f9g4_19;
441
+ crypto_int64 h4 = f0g4+f1g3_2 +f2g2 +f3g1_2 +f4g0 +f5g9_38+f6g8_19+f7g7_38+f8g6_19+f9g5_38;
442
+ crypto_int64 h5 = f0g5+f1g4 +f2g3 +f3g2 +f4g1 +f5g0 +f6g9_19+f7g8_19+f8g7_19+f9g6_19;
443
+ crypto_int64 h6 = f0g6+f1g5_2 +f2g4 +f3g3_2 +f4g2 +f5g1_2 +f6g0 +f7g9_38+f8g8_19+f9g7_38;
444
+ crypto_int64 h7 = f0g7+f1g6 +f2g5 +f3g4 +f4g3 +f5g2 +f6g1 +f7g0 +f8g9_19+f9g8_19;
445
+ crypto_int64 h8 = f0g8+f1g7_2 +f2g6 +f3g5_2 +f4g4 +f5g3_2 +f6g2 +f7g1_2 +f8g0 +f9g9_38;
446
+ crypto_int64 h9 = f0g9+f1g8 +f2g7 +f3g6 +f4g5 +f5g4 +f6g3 +f7g2 +f8g1 +f9g0 ;
447
+ crypto_int64 carry0;
448
+ crypto_int64 carry1;
449
+ crypto_int64 carry2;
450
+ crypto_int64 carry3;
451
+ crypto_int64 carry4;
452
+ crypto_int64 carry5;
453
+ crypto_int64 carry6;
454
+ crypto_int64 carry7;
455
+ crypto_int64 carry8;
456
+ crypto_int64 carry9;
457
+
458
+ /*
459
+ |h0| <= (1.1*1.1*2^52*(1+19+19+19+19)+1.1*1.1*2^50*(38+38+38+38+38))
460
+ i.e. |h0| <= 1.2*2^59; narrower ranges for h2, h4, h6, h8
461
+ |h1| <= (1.1*1.1*2^51*(1+1+19+19+19+19+19+19+19+19))
462
+ i.e. |h1| <= 1.5*2^58; narrower ranges for h3, h5, h7, h9
463
+ */
464
+
465
+ carry0 = (h0 + (crypto_int64) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
466
+ carry4 = (h4 + (crypto_int64) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
467
+ /* |h0| <= 2^25 */
468
+ /* |h4| <= 2^25 */
469
+ /* |h1| <= 1.51*2^58 */
470
+ /* |h5| <= 1.51*2^58 */
471
+
472
+ carry1 = (h1 + (crypto_int64) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
473
+ carry5 = (h5 + (crypto_int64) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
474
+ /* |h1| <= 2^24; from now on fits into int32 */
475
+ /* |h5| <= 2^24; from now on fits into int32 */
476
+ /* |h2| <= 1.21*2^59 */
477
+ /* |h6| <= 1.21*2^59 */
478
+
479
+ carry2 = (h2 + (crypto_int64) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
480
+ carry6 = (h6 + (crypto_int64) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
481
+ /* |h2| <= 2^25; from now on fits into int32 unchanged */
482
+ /* |h6| <= 2^25; from now on fits into int32 unchanged */
483
+ /* |h3| <= 1.51*2^58 */
484
+ /* |h7| <= 1.51*2^58 */
485
+
486
+ carry3 = (h3 + (crypto_int64) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
487
+ carry7 = (h7 + (crypto_int64) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
488
+ /* |h3| <= 2^24; from now on fits into int32 unchanged */
489
+ /* |h7| <= 2^24; from now on fits into int32 unchanged */
490
+ /* |h4| <= 1.52*2^33 */
491
+ /* |h8| <= 1.52*2^33 */
492
+
493
+ carry4 = (h4 + (crypto_int64) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
494
+ carry8 = (h8 + (crypto_int64) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
495
+ /* |h4| <= 2^25; from now on fits into int32 unchanged */
496
+ /* |h8| <= 2^25; from now on fits into int32 unchanged */
497
+ /* |h5| <= 1.01*2^24 */
498
+ /* |h9| <= 1.51*2^58 */
499
+
500
+ carry9 = (h9 + (crypto_int64) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
501
+ /* |h9| <= 2^24; from now on fits into int32 unchanged */
502
+ /* |h0| <= 1.8*2^37 */
503
+
504
+ carry0 = (h0 + (crypto_int64) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
505
+ /* |h0| <= 2^25; from now on fits into int32 unchanged */
506
+ /* |h1| <= 1.01*2^24 */
507
+
508
+ h[0] = h0;
509
+ h[1] = h1;
510
+ h[2] = h2;
511
+ h[3] = h3;
512
+ h[4] = h4;
513
+ h[5] = h5;
514
+ h[6] = h6;
515
+ h[7] = h7;
516
+ h[8] = h8;
517
+ h[9] = h9;
518
+ }
519
+
520
+ /*
521
+ h = f * 121666
522
+ Can overlap h with f.
523
+
524
+ Preconditions:
525
+ |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
526
+
527
+ Postconditions:
528
+ |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
529
+ */
530
+
531
+ void fe_mul121666(fe h,fe f)
532
+ {
533
+ crypto_int32 f0 = f[0];
534
+ crypto_int32 f1 = f[1];
535
+ crypto_int32 f2 = f[2];
536
+ crypto_int32 f3 = f[3];
537
+ crypto_int32 f4 = f[4];
538
+ crypto_int32 f5 = f[5];
539
+ crypto_int32 f6 = f[6];
540
+ crypto_int32 f7 = f[7];
541
+ crypto_int32 f8 = f[8];
542
+ crypto_int32 f9 = f[9];
543
+ crypto_int64 h0 = f0 * (crypto_int64) 121666;
544
+ crypto_int64 h1 = f1 * (crypto_int64) 121666;
545
+ crypto_int64 h2 = f2 * (crypto_int64) 121666;
546
+ crypto_int64 h3 = f3 * (crypto_int64) 121666;
547
+ crypto_int64 h4 = f4 * (crypto_int64) 121666;
548
+ crypto_int64 h5 = f5 * (crypto_int64) 121666;
549
+ crypto_int64 h6 = f6 * (crypto_int64) 121666;
550
+ crypto_int64 h7 = f7 * (crypto_int64) 121666;
551
+ crypto_int64 h8 = f8 * (crypto_int64) 121666;
552
+ crypto_int64 h9 = f9 * (crypto_int64) 121666;
553
+ crypto_int64 carry0;
554
+ crypto_int64 carry1;
555
+ crypto_int64 carry2;
556
+ crypto_int64 carry3;
557
+ crypto_int64 carry4;
558
+ crypto_int64 carry5;
559
+ crypto_int64 carry6;
560
+ crypto_int64 carry7;
561
+ crypto_int64 carry8;
562
+ crypto_int64 carry9;
563
+
564
+ carry9 = (h9 + (crypto_int64) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
565
+ carry1 = (h1 + (crypto_int64) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
566
+ carry3 = (h3 + (crypto_int64) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
567
+ carry5 = (h5 + (crypto_int64) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
568
+ carry7 = (h7 + (crypto_int64) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
569
+
570
+ carry0 = (h0 + (crypto_int64) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
571
+ carry2 = (h2 + (crypto_int64) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
572
+ carry4 = (h4 + (crypto_int64) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
573
+ carry6 = (h6 + (crypto_int64) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
574
+ carry8 = (h8 + (crypto_int64) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
575
+
576
+ h[0] = h0;
577
+ h[1] = h1;
578
+ h[2] = h2;
579
+ h[3] = h3;
580
+ h[4] = h4;
581
+ h[5] = h5;
582
+ h[6] = h6;
583
+ h[7] = h7;
584
+ h[8] = h8;
585
+ h[9] = h9;
586
+ }
587
+
588
+ /*
589
+ h = f * f
590
+ Can overlap h with f.
591
+
592
+ Preconditions:
593
+ |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
594
+
595
+ Postconditions:
596
+ |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
597
+ */
598
+
599
+ /*
600
+ See fe_mul.c for discussion of implementation strategy.
601
+ */
602
+
603
+ void fe_sq(fe h,fe f)
604
+ {
605
+ crypto_int32 f0 = f[0];
606
+ crypto_int32 f1 = f[1];
607
+ crypto_int32 f2 = f[2];
608
+ crypto_int32 f3 = f[3];
609
+ crypto_int32 f4 = f[4];
610
+ crypto_int32 f5 = f[5];
611
+ crypto_int32 f6 = f[6];
612
+ crypto_int32 f7 = f[7];
613
+ crypto_int32 f8 = f[8];
614
+ crypto_int32 f9 = f[9];
615
+ crypto_int32 f0_2 = 2 * f0;
616
+ crypto_int32 f1_2 = 2 * f1;
617
+ crypto_int32 f2_2 = 2 * f2;
618
+ crypto_int32 f3_2 = 2 * f3;
619
+ crypto_int32 f4_2 = 2 * f4;
620
+ crypto_int32 f5_2 = 2 * f5;
621
+ crypto_int32 f6_2 = 2 * f6;
622
+ crypto_int32 f7_2 = 2 * f7;
623
+ crypto_int32 f5_38 = 38 * f5; /* 1.31*2^30 */
624
+ crypto_int32 f6_19 = 19 * f6; /* 1.31*2^30 */
625
+ crypto_int32 f7_38 = 38 * f7; /* 1.31*2^30 */
626
+ crypto_int32 f8_19 = 19 * f8; /* 1.31*2^30 */
627
+ crypto_int32 f9_38 = 38 * f9; /* 1.31*2^30 */
628
+ crypto_int64 f0f0 = f0 * (crypto_int64) f0;
629
+ crypto_int64 f0f1_2 = f0_2 * (crypto_int64) f1;
630
+ crypto_int64 f0f2_2 = f0_2 * (crypto_int64) f2;
631
+ crypto_int64 f0f3_2 = f0_2 * (crypto_int64) f3;
632
+ crypto_int64 f0f4_2 = f0_2 * (crypto_int64) f4;
633
+ crypto_int64 f0f5_2 = f0_2 * (crypto_int64) f5;
634
+ crypto_int64 f0f6_2 = f0_2 * (crypto_int64) f6;
635
+ crypto_int64 f0f7_2 = f0_2 * (crypto_int64) f7;
636
+ crypto_int64 f0f8_2 = f0_2 * (crypto_int64) f8;
637
+ crypto_int64 f0f9_2 = f0_2 * (crypto_int64) f9;
638
+ crypto_int64 f1f1_2 = f1_2 * (crypto_int64) f1;
639
+ crypto_int64 f1f2_2 = f1_2 * (crypto_int64) f2;
640
+ crypto_int64 f1f3_4 = f1_2 * (crypto_int64) f3_2;
641
+ crypto_int64 f1f4_2 = f1_2 * (crypto_int64) f4;
642
+ crypto_int64 f1f5_4 = f1_2 * (crypto_int64) f5_2;
643
+ crypto_int64 f1f6_2 = f1_2 * (crypto_int64) f6;
644
+ crypto_int64 f1f7_4 = f1_2 * (crypto_int64) f7_2;
645
+ crypto_int64 f1f8_2 = f1_2 * (crypto_int64) f8;
646
+ crypto_int64 f1f9_76 = f1_2 * (crypto_int64) f9_38;
647
+ crypto_int64 f2f2 = f2 * (crypto_int64) f2;
648
+ crypto_int64 f2f3_2 = f2_2 * (crypto_int64) f3;
649
+ crypto_int64 f2f4_2 = f2_2 * (crypto_int64) f4;
650
+ crypto_int64 f2f5_2 = f2_2 * (crypto_int64) f5;
651
+ crypto_int64 f2f6_2 = f2_2 * (crypto_int64) f6;
652
+ crypto_int64 f2f7_2 = f2_2 * (crypto_int64) f7;
653
+ crypto_int64 f2f8_38 = f2_2 * (crypto_int64) f8_19;
654
+ crypto_int64 f2f9_38 = f2 * (crypto_int64) f9_38;
655
+ crypto_int64 f3f3_2 = f3_2 * (crypto_int64) f3;
656
+ crypto_int64 f3f4_2 = f3_2 * (crypto_int64) f4;
657
+ crypto_int64 f3f5_4 = f3_2 * (crypto_int64) f5_2;
658
+ crypto_int64 f3f6_2 = f3_2 * (crypto_int64) f6;
659
+ crypto_int64 f3f7_76 = f3_2 * (crypto_int64) f7_38;
660
+ crypto_int64 f3f8_38 = f3_2 * (crypto_int64) f8_19;
661
+ crypto_int64 f3f9_76 = f3_2 * (crypto_int64) f9_38;
662
+ crypto_int64 f4f4 = f4 * (crypto_int64) f4;
663
+ crypto_int64 f4f5_2 = f4_2 * (crypto_int64) f5;
664
+ crypto_int64 f4f6_38 = f4_2 * (crypto_int64) f6_19;
665
+ crypto_int64 f4f7_38 = f4 * (crypto_int64) f7_38;
666
+ crypto_int64 f4f8_38 = f4_2 * (crypto_int64) f8_19;
667
+ crypto_int64 f4f9_38 = f4 * (crypto_int64) f9_38;
668
+ crypto_int64 f5f5_38 = f5 * (crypto_int64) f5_38;
669
+ crypto_int64 f5f6_38 = f5_2 * (crypto_int64) f6_19;
670
+ crypto_int64 f5f7_76 = f5_2 * (crypto_int64) f7_38;
671
+ crypto_int64 f5f8_38 = f5_2 * (crypto_int64) f8_19;
672
+ crypto_int64 f5f9_76 = f5_2 * (crypto_int64) f9_38;
673
+ crypto_int64 f6f6_19 = f6 * (crypto_int64) f6_19;
674
+ crypto_int64 f6f7_38 = f6 * (crypto_int64) f7_38;
675
+ crypto_int64 f6f8_38 = f6_2 * (crypto_int64) f8_19;
676
+ crypto_int64 f6f9_38 = f6 * (crypto_int64) f9_38;
677
+ crypto_int64 f7f7_38 = f7 * (crypto_int64) f7_38;
678
+ crypto_int64 f7f8_38 = f7_2 * (crypto_int64) f8_19;
679
+ crypto_int64 f7f9_76 = f7_2 * (crypto_int64) f9_38;
680
+ crypto_int64 f8f8_19 = f8 * (crypto_int64) f8_19;
681
+ crypto_int64 f8f9_38 = f8 * (crypto_int64) f9_38;
682
+ crypto_int64 f9f9_38 = f9 * (crypto_int64) f9_38;
683
+ crypto_int64 h0 = f0f0 +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38;
684
+ crypto_int64 h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38;
685
+ crypto_int64 h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19;
686
+ crypto_int64 h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38;
687
+ crypto_int64 h4 = f0f4_2+f1f3_4 +f2f2 +f5f9_76+f6f8_38+f7f7_38;
688
+ crypto_int64 h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38;
689
+ crypto_int64 h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19;
690
+ crypto_int64 h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38;
691
+ crypto_int64 h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4 +f9f9_38;
692
+ crypto_int64 h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2;
693
+ crypto_int64 carry0;
694
+ crypto_int64 carry1;
695
+ crypto_int64 carry2;
696
+ crypto_int64 carry3;
697
+ crypto_int64 carry4;
698
+ crypto_int64 carry5;
699
+ crypto_int64 carry6;
700
+ crypto_int64 carry7;
701
+ crypto_int64 carry8;
702
+ crypto_int64 carry9;
703
+
704
+ carry0 = (h0 + (crypto_int64) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
705
+ carry4 = (h4 + (crypto_int64) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
706
+
707
+ carry1 = (h1 + (crypto_int64) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
708
+ carry5 = (h5 + (crypto_int64) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
709
+
710
+ carry2 = (h2 + (crypto_int64) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
711
+ carry6 = (h6 + (crypto_int64) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
712
+
713
+ carry3 = (h3 + (crypto_int64) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
714
+ carry7 = (h7 + (crypto_int64) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
715
+
716
+ carry4 = (h4 + (crypto_int64) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
717
+ carry8 = (h8 + (crypto_int64) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
718
+
719
+ carry9 = (h9 + (crypto_int64) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
720
+
721
+ carry0 = (h0 + (crypto_int64) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
722
+
723
+ h[0] = h0;
724
+ h[1] = h1;
725
+ h[2] = h2;
726
+ h[3] = h3;
727
+ h[4] = h4;
728
+ h[5] = h5;
729
+ h[6] = h6;
730
+ h[7] = h7;
731
+ h[8] = h8;
732
+ h[9] = h9;
733
+ }
734
+
735
+ /*
736
+ h = f - g
737
+ Can overlap h with f or g.
738
+
739
+ Preconditions:
740
+ |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
741
+ |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
742
+
743
+ Postconditions:
744
+ |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
745
+ */
746
+
747
+ void fe_sub(fe h,fe f,fe g)
748
+ {
749
+ crypto_int32 f0 = f[0];
750
+ crypto_int32 f1 = f[1];
751
+ crypto_int32 f2 = f[2];
752
+ crypto_int32 f3 = f[3];
753
+ crypto_int32 f4 = f[4];
754
+ crypto_int32 f5 = f[5];
755
+ crypto_int32 f6 = f[6];
756
+ crypto_int32 f7 = f[7];
757
+ crypto_int32 f8 = f[8];
758
+ crypto_int32 f9 = f[9];
759
+ crypto_int32 g0 = g[0];
760
+ crypto_int32 g1 = g[1];
761
+ crypto_int32 g2 = g[2];
762
+ crypto_int32 g3 = g[3];
763
+ crypto_int32 g4 = g[4];
764
+ crypto_int32 g5 = g[5];
765
+ crypto_int32 g6 = g[6];
766
+ crypto_int32 g7 = g[7];
767
+ crypto_int32 g8 = g[8];
768
+ crypto_int32 g9 = g[9];
769
+ crypto_int32 h0 = f0 - g0;
770
+ crypto_int32 h1 = f1 - g1;
771
+ crypto_int32 h2 = f2 - g2;
772
+ crypto_int32 h3 = f3 - g3;
773
+ crypto_int32 h4 = f4 - g4;
774
+ crypto_int32 h5 = f5 - g5;
775
+ crypto_int32 h6 = f6 - g6;
776
+ crypto_int32 h7 = f7 - g7;
777
+ crypto_int32 h8 = f8 - g8;
778
+ crypto_int32 h9 = f9 - g9;
779
+ h[0] = h0;
780
+ h[1] = h1;
781
+ h[2] = h2;
782
+ h[3] = h3;
783
+ h[4] = h4;
784
+ h[5] = h5;
785
+ h[6] = h6;
786
+ h[7] = h7;
787
+ h[8] = h8;
788
+ h[9] = h9;
789
+ }
790
+
791
+ /*
792
+ Preconditions:
793
+ |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
794
+
795
+ Write p=2^255-19; q=floor(h/p).
796
+ Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
797
+
798
+ Proof:
799
+ Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
800
+ Also have |h-2^230 h9|<2^230 so |19 2^(-255)(h-2^230 h9)|<1/4.
801
+
802
+ Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
803
+ Then 0<y<1.
804
+
805
+ Write r=h-pq.
806
+ Have 0<=r<=p-1=2^255-20.
807
+ Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
808
+
809
+ Write x=r+19(2^-255)r+y.
810
+ Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
811
+
812
+ Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
813
+ so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
814
+ */
815
+
816
+ void fe_tobytes(unsigned char *s,fe h)
817
+ {
818
+ crypto_int32 h0 = h[0];
819
+ crypto_int32 h1 = h[1];
820
+ crypto_int32 h2 = h[2];
821
+ crypto_int32 h3 = h[3];
822
+ crypto_int32 h4 = h[4];
823
+ crypto_int32 h5 = h[5];
824
+ crypto_int32 h6 = h[6];
825
+ crypto_int32 h7 = h[7];
826
+ crypto_int32 h8 = h[8];
827
+ crypto_int32 h9 = h[9];
828
+ crypto_int32 q;
829
+ crypto_int32 carry0;
830
+ crypto_int32 carry1;
831
+ crypto_int32 carry2;
832
+ crypto_int32 carry3;
833
+ crypto_int32 carry4;
834
+ crypto_int32 carry5;
835
+ crypto_int32 carry6;
836
+ crypto_int32 carry7;
837
+ crypto_int32 carry8;
838
+ crypto_int32 carry9;
839
+
840
+ q = (19 * h9 + (((crypto_int32) 1) << 24)) >> 25;
841
+ q = (h0 + q) >> 26;
842
+ q = (h1 + q) >> 25;
843
+ q = (h2 + q) >> 26;
844
+ q = (h3 + q) >> 25;
845
+ q = (h4 + q) >> 26;
846
+ q = (h5 + q) >> 25;
847
+ q = (h6 + q) >> 26;
848
+ q = (h7 + q) >> 25;
849
+ q = (h8 + q) >> 26;
850
+ q = (h9 + q) >> 25;
851
+
852
+ /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */
853
+ h0 += 19 * q;
854
+ /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */
855
+
856
+ carry0 = h0 >> 26; h1 += carry0; h0 -= carry0 << 26;
857
+ carry1 = h1 >> 25; h2 += carry1; h1 -= carry1 << 25;
858
+ carry2 = h2 >> 26; h3 += carry2; h2 -= carry2 << 26;
859
+ carry3 = h3 >> 25; h4 += carry3; h3 -= carry3 << 25;
860
+ carry4 = h4 >> 26; h5 += carry4; h4 -= carry4 << 26;
861
+ carry5 = h5 >> 25; h6 += carry5; h5 -= carry5 << 25;
862
+ carry6 = h6 >> 26; h7 += carry6; h6 -= carry6 << 26;
863
+ carry7 = h7 >> 25; h8 += carry7; h7 -= carry7 << 25;
864
+ carry8 = h8 >> 26; h9 += carry8; h8 -= carry8 << 26;
865
+ carry9 = h9 >> 25; h9 -= carry9 << 25;
866
+ /* h10 = carry9 */
867
+
868
+ /*
869
+ Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
870
+ Have h0+...+2^230 h9 between 0 and 2^255-1;
871
+ evidently 2^255 h10-2^255 q = 0.
872
+ Goal: Output h0+...+2^230 h9.
873
+ */
874
+
875
+ s[0] = h0 >> 0;
876
+ s[1] = h0 >> 8;
877
+ s[2] = h0 >> 16;
878
+ s[3] = (h0 >> 24) | (h1 << 2);
879
+ s[4] = h1 >> 6;
880
+ s[5] = h1 >> 14;
881
+ s[6] = (h1 >> 22) | (h2 << 3);
882
+ s[7] = h2 >> 5;
883
+ s[8] = h2 >> 13;
884
+ s[9] = (h2 >> 21) | (h3 << 5);
885
+ s[10] = h3 >> 3;
886
+ s[11] = h3 >> 11;
887
+ s[12] = (h3 >> 19) | (h4 << 6);
888
+ s[13] = h4 >> 2;
889
+ s[14] = h4 >> 10;
890
+ s[15] = h4 >> 18;
891
+ s[16] = h5 >> 0;
892
+ s[17] = h5 >> 8;
893
+ s[18] = h5 >> 16;
894
+ s[19] = (h5 >> 24) | (h6 << 1);
895
+ s[20] = h6 >> 7;
896
+ s[21] = h6 >> 15;
897
+ s[22] = (h6 >> 23) | (h7 << 3);
898
+ s[23] = h7 >> 5;
899
+ s[24] = h7 >> 13;
900
+ s[25] = (h7 >> 21) | (h8 << 4);
901
+ s[26] = h8 >> 4;
902
+ s[27] = h8 >> 12;
903
+ s[28] = (h8 >> 20) | (h9 << 6);
904
+ s[29] = h9 >> 2;
905
+ s[30] = h9 >> 10;
906
+ s[31] = h9 >> 18;
907
+ }
908
+
909
+ void fe_invert(fe out,fe z)
910
+ {
911
+ fe t0;
912
+ fe t1;
913
+ fe t2;
914
+ fe t3;
915
+ int i;
916
+
917
+
918
+ /* qhasm: fe z1 */
919
+
920
+ /* qhasm: fe z2 */
921
+
922
+ /* qhasm: fe z8 */
923
+
924
+ /* qhasm: fe z9 */
925
+
926
+ /* qhasm: fe z11 */
927
+
928
+ /* qhasm: fe z22 */
929
+
930
+ /* qhasm: fe z_5_0 */
931
+
932
+ /* qhasm: fe z_10_5 */
933
+
934
+ /* qhasm: fe z_10_0 */
935
+
936
+ /* qhasm: fe z_20_10 */
937
+
938
+ /* qhasm: fe z_20_0 */
939
+
940
+ /* qhasm: fe z_40_20 */
941
+
942
+ /* qhasm: fe z_40_0 */
943
+
944
+ /* qhasm: fe z_50_10 */
945
+
946
+ /* qhasm: fe z_50_0 */
947
+
948
+ /* qhasm: fe z_100_50 */
949
+
950
+ /* qhasm: fe z_100_0 */
951
+
952
+ /* qhasm: fe z_200_100 */
953
+
954
+ /* qhasm: fe z_200_0 */
955
+
956
+ /* qhasm: fe z_250_50 */
957
+
958
+ /* qhasm: fe z_250_0 */
959
+
960
+ /* qhasm: fe z_255_5 */
961
+
962
+ /* qhasm: fe z_255_21 */
963
+
964
+ /* qhasm: enter pow225521 */
965
+
966
+ /* qhasm: z2 = z1^2^1 */
967
+ /* asm 1: fe_sq(>z2=fe#1,<z1=fe#11); for (i = 1;i < 1;++i) fe_sq(>z2=fe#1,>z2=fe#1); */
968
+ /* asm 2: fe_sq(>z2=t0,<z1=z); for (i = 1;i < 1;++i) fe_sq(>z2=t0,>z2=t0); */
969
+ fe_sq(t0,z); for (i = 1;i < 1;++i) fe_sq(t0,t0);
970
+
971
+ /* qhasm: z8 = z2^2^2 */
972
+ /* asm 1: fe_sq(>z8=fe#2,<z2=fe#1); for (i = 1;i < 2;++i) fe_sq(>z8=fe#2,>z8=fe#2); */
973
+ /* asm 2: fe_sq(>z8=t1,<z2=t0); for (i = 1;i < 2;++i) fe_sq(>z8=t1,>z8=t1); */
974
+ fe_sq(t1,t0); for (i = 1;i < 2;++i) fe_sq(t1,t1);
975
+
976
+ /* qhasm: z9 = z1*z8 */
977
+ /* asm 1: fe_mul(>z9=fe#2,<z1=fe#11,<z8=fe#2); */
978
+ /* asm 2: fe_mul(>z9=t1,<z1=z,<z8=t1); */
979
+ fe_mul(t1,z,t1);
980
+
981
+ /* qhasm: z11 = z2*z9 */
982
+ /* asm 1: fe_mul(>z11=fe#1,<z2=fe#1,<z9=fe#2); */
983
+ /* asm 2: fe_mul(>z11=t0,<z2=t0,<z9=t1); */
984
+ fe_mul(t0,t0,t1);
985
+
986
+ /* qhasm: z22 = z11^2^1 */
987
+ /* asm 1: fe_sq(>z22=fe#3,<z11=fe#1); for (i = 1;i < 1;++i) fe_sq(>z22=fe#3,>z22=fe#3); */
988
+ /* asm 2: fe_sq(>z22=t2,<z11=t0); for (i = 1;i < 1;++i) fe_sq(>z22=t2,>z22=t2); */
989
+ fe_sq(t2,t0); for (i = 1;i < 1;++i) fe_sq(t2,t2);
990
+
991
+ /* qhasm: z_5_0 = z9*z22 */
992
+ /* asm 1: fe_mul(>z_5_0=fe#2,<z9=fe#2,<z22=fe#3); */
993
+ /* asm 2: fe_mul(>z_5_0=t1,<z9=t1,<z22=t2); */
994
+ fe_mul(t1,t1,t2);
995
+
996
+ /* qhasm: z_10_5 = z_5_0^2^5 */
997
+ /* asm 1: fe_sq(>z_10_5=fe#3,<z_5_0=fe#2); for (i = 1;i < 5;++i) fe_sq(>z_10_5=fe#3,>z_10_5=fe#3); */
998
+ /* asm 2: fe_sq(>z_10_5=t2,<z_5_0=t1); for (i = 1;i < 5;++i) fe_sq(>z_10_5=t2,>z_10_5=t2); */
999
+ fe_sq(t2,t1); for (i = 1;i < 5;++i) fe_sq(t2,t2);
1000
+
1001
+ /* qhasm: z_10_0 = z_10_5*z_5_0 */
1002
+ /* asm 1: fe_mul(>z_10_0=fe#2,<z_10_5=fe#3,<z_5_0=fe#2); */
1003
+ /* asm 2: fe_mul(>z_10_0=t1,<z_10_5=t2,<z_5_0=t1); */
1004
+ fe_mul(t1,t2,t1);
1005
+
1006
+ /* qhasm: z_20_10 = z_10_0^2^10 */
1007
+ /* asm 1: fe_sq(>z_20_10=fe#3,<z_10_0=fe#2); for (i = 1;i < 10;++i) fe_sq(>z_20_10=fe#3,>z_20_10=fe#3); */
1008
+ /* asm 2: fe_sq(>z_20_10=t2,<z_10_0=t1); for (i = 1;i < 10;++i) fe_sq(>z_20_10=t2,>z_20_10=t2); */
1009
+ fe_sq(t2,t1); for (i = 1;i < 10;++i) fe_sq(t2,t2);
1010
+
1011
+ /* qhasm: z_20_0 = z_20_10*z_10_0 */
1012
+ /* asm 1: fe_mul(>z_20_0=fe#3,<z_20_10=fe#3,<z_10_0=fe#2); */
1013
+ /* asm 2: fe_mul(>z_20_0=t2,<z_20_10=t2,<z_10_0=t1); */
1014
+ fe_mul(t2,t2,t1);
1015
+
1016
+ /* qhasm: z_40_20 = z_20_0^2^20 */
1017
+ /* asm 1: fe_sq(>z_40_20=fe#4,<z_20_0=fe#3); for (i = 1;i < 20;++i) fe_sq(>z_40_20=fe#4,>z_40_20=fe#4); */
1018
+ /* asm 2: fe_sq(>z_40_20=t3,<z_20_0=t2); for (i = 1;i < 20;++i) fe_sq(>z_40_20=t3,>z_40_20=t3); */
1019
+ fe_sq(t3,t2); for (i = 1;i < 20;++i) fe_sq(t3,t3);
1020
+
1021
+ /* qhasm: z_40_0 = z_40_20*z_20_0 */
1022
+ /* asm 1: fe_mul(>z_40_0=fe#3,<z_40_20=fe#4,<z_20_0=fe#3); */
1023
+ /* asm 2: fe_mul(>z_40_0=t2,<z_40_20=t3,<z_20_0=t2); */
1024
+ fe_mul(t2,t3,t2);
1025
+
1026
+ /* qhasm: z_50_10 = z_40_0^2^10 */
1027
+ /* asm 1: fe_sq(>z_50_10=fe#3,<z_40_0=fe#3); for (i = 1;i < 10;++i) fe_sq(>z_50_10=fe#3,>z_50_10=fe#3); */
1028
+ /* asm 2: fe_sq(>z_50_10=t2,<z_40_0=t2); for (i = 1;i < 10;++i) fe_sq(>z_50_10=t2,>z_50_10=t2); */
1029
+ fe_sq(t2,t2); for (i = 1;i < 10;++i) fe_sq(t2,t2);
1030
+
1031
+ /* qhasm: z_50_0 = z_50_10*z_10_0 */
1032
+ /* asm 1: fe_mul(>z_50_0=fe#2,<z_50_10=fe#3,<z_10_0=fe#2); */
1033
+ /* asm 2: fe_mul(>z_50_0=t1,<z_50_10=t2,<z_10_0=t1); */
1034
+ fe_mul(t1,t2,t1);
1035
+
1036
+ /* qhasm: z_100_50 = z_50_0^2^50 */
1037
+ /* asm 1: fe_sq(>z_100_50=fe#3,<z_50_0=fe#2); for (i = 1;i < 50;++i) fe_sq(>z_100_50=fe#3,>z_100_50=fe#3); */
1038
+ /* asm 2: fe_sq(>z_100_50=t2,<z_50_0=t1); for (i = 1;i < 50;++i) fe_sq(>z_100_50=t2,>z_100_50=t2); */
1039
+ fe_sq(t2,t1); for (i = 1;i < 50;++i) fe_sq(t2,t2);
1040
+
1041
+ /* qhasm: z_100_0 = z_100_50*z_50_0 */
1042
+ /* asm 1: fe_mul(>z_100_0=fe#3,<z_100_50=fe#3,<z_50_0=fe#2); */
1043
+ /* asm 2: fe_mul(>z_100_0=t2,<z_100_50=t2,<z_50_0=t1); */
1044
+ fe_mul(t2,t2,t1);
1045
+
1046
+ /* qhasm: z_200_100 = z_100_0^2^100 */
1047
+ /* asm 1: fe_sq(>z_200_100=fe#4,<z_100_0=fe#3); for (i = 1;i < 100;++i) fe_sq(>z_200_100=fe#4,>z_200_100=fe#4); */
1048
+ /* asm 2: fe_sq(>z_200_100=t3,<z_100_0=t2); for (i = 1;i < 100;++i) fe_sq(>z_200_100=t3,>z_200_100=t3); */
1049
+ fe_sq(t3,t2); for (i = 1;i < 100;++i) fe_sq(t3,t3);
1050
+
1051
+ /* qhasm: z_200_0 = z_200_100*z_100_0 */
1052
+ /* asm 1: fe_mul(>z_200_0=fe#3,<z_200_100=fe#4,<z_100_0=fe#3); */
1053
+ /* asm 2: fe_mul(>z_200_0=t2,<z_200_100=t3,<z_100_0=t2); */
1054
+ fe_mul(t2,t3,t2);
1055
+
1056
+ /* qhasm: z_250_50 = z_200_0^2^50 */
1057
+ /* asm 1: fe_sq(>z_250_50=fe#3,<z_200_0=fe#3); for (i = 1;i < 50;++i) fe_sq(>z_250_50=fe#3,>z_250_50=fe#3); */
1058
+ /* asm 2: fe_sq(>z_250_50=t2,<z_200_0=t2); for (i = 1;i < 50;++i) fe_sq(>z_250_50=t2,>z_250_50=t2); */
1059
+ fe_sq(t2,t2); for (i = 1;i < 50;++i) fe_sq(t2,t2);
1060
+
1061
+ /* qhasm: z_250_0 = z_250_50*z_50_0 */
1062
+ /* asm 1: fe_mul(>z_250_0=fe#2,<z_250_50=fe#3,<z_50_0=fe#2); */
1063
+ /* asm 2: fe_mul(>z_250_0=t1,<z_250_50=t2,<z_50_0=t1); */
1064
+ fe_mul(t1,t2,t1);
1065
+
1066
+ /* qhasm: z_255_5 = z_250_0^2^5 */
1067
+ /* asm 1: fe_sq(>z_255_5=fe#2,<z_250_0=fe#2); for (i = 1;i < 5;++i) fe_sq(>z_255_5=fe#2,>z_255_5=fe#2); */
1068
+ /* asm 2: fe_sq(>z_255_5=t1,<z_250_0=t1); for (i = 1;i < 5;++i) fe_sq(>z_255_5=t1,>z_255_5=t1); */
1069
+ fe_sq(t1,t1); for (i = 1;i < 5;++i) fe_sq(t1,t1);
1070
+
1071
+ /* qhasm: z_255_21 = z_255_5*z11 */
1072
+ /* asm 1: fe_mul(>z_255_21=fe#12,<z_255_5=fe#2,<z11=fe#1); */
1073
+ /* asm 2: fe_mul(>z_255_21=out,<z_255_5=t1,<z11=t0); */
1074
+ fe_mul(out,t1,t0);
1075
+
1076
+ /* qhasm: return */
1077
+
1078
+ return;
1079
+ }
1080
+
1081
+
1082
+ int crypto_scalarmult_ref10(unsigned char *q,
1083
+ const unsigned char *n,
1084
+ const unsigned char *p)
1085
+ {
1086
+ unsigned char e[32];
1087
+ unsigned int i;
1088
+ fe x1;
1089
+ fe x2;
1090
+ fe z2;
1091
+ fe x3;
1092
+ fe z3;
1093
+ fe tmp0;
1094
+ fe tmp1;
1095
+ int pos;
1096
+ unsigned int swap;
1097
+ unsigned int b;
1098
+
1099
+ for (i = 0;i < 32;++i) e[i] = n[i];
1100
+ e[0] &= 248;
1101
+ e[31] &= 127;
1102
+ e[31] |= 64;
1103
+ fe_frombytes(x1,p);
1104
+ fe_1(x2);
1105
+ fe_0(z2);
1106
+ fe_copy(x3,x1);
1107
+ fe_1(z3);
1108
+
1109
+ swap = 0;
1110
+ for (pos = 254;pos >= 0;--pos) {
1111
+ b = e[pos / 8] >> (pos & 7);
1112
+ b &= 1;
1113
+ swap ^= b;
1114
+ fe_cswap(x2,x3,swap);
1115
+ fe_cswap(z2,z3,swap);
1116
+ swap = b;
1117
+ /* qhasm: fe X2 */
1118
+
1119
+ /* qhasm: fe Z2 */
1120
+
1121
+ /* qhasm: fe X3 */
1122
+
1123
+ /* qhasm: fe Z3 */
1124
+
1125
+ /* qhasm: fe X4 */
1126
+
1127
+ /* qhasm: fe Z4 */
1128
+
1129
+ /* qhasm: fe X5 */
1130
+
1131
+ /* qhasm: fe Z5 */
1132
+
1133
+ /* qhasm: fe A */
1134
+
1135
+ /* qhasm: fe B */
1136
+
1137
+ /* qhasm: fe C */
1138
+
1139
+ /* qhasm: fe D */
1140
+
1141
+ /* qhasm: fe E */
1142
+
1143
+ /* qhasm: fe AA */
1144
+
1145
+ /* qhasm: fe BB */
1146
+
1147
+ /* qhasm: fe DA */
1148
+
1149
+ /* qhasm: fe CB */
1150
+
1151
+ /* qhasm: fe t0 */
1152
+
1153
+ /* qhasm: fe t1 */
1154
+
1155
+ /* qhasm: fe t2 */
1156
+
1157
+ /* qhasm: fe t3 */
1158
+
1159
+ /* qhasm: fe t4 */
1160
+
1161
+ /* qhasm: enter ladder */
1162
+
1163
+ /* qhasm: D = X3-Z3 */
1164
+ /* asm 1: fe_sub(>D=fe#5,<X3=fe#3,<Z3=fe#4); */
1165
+ /* asm 2: fe_sub(>D=tmp0,<X3=x3,<Z3=z3); */
1166
+ fe_sub(tmp0,x3,z3);
1167
+
1168
+ /* qhasm: B = X2-Z2 */
1169
+ /* asm 1: fe_sub(>B=fe#6,<X2=fe#1,<Z2=fe#2); */
1170
+ /* asm 2: fe_sub(>B=tmp1,<X2=x2,<Z2=z2); */
1171
+ fe_sub(tmp1,x2,z2);
1172
+
1173
+ /* qhasm: A = X2+Z2 */
1174
+ /* asm 1: fe_add(>A=fe#1,<X2=fe#1,<Z2=fe#2); */
1175
+ /* asm 2: fe_add(>A=x2,<X2=x2,<Z2=z2); */
1176
+ fe_add(x2,x2,z2);
1177
+
1178
+ /* qhasm: C = X3+Z3 */
1179
+ /* asm 1: fe_add(>C=fe#2,<X3=fe#3,<Z3=fe#4); */
1180
+ /* asm 2: fe_add(>C=z2,<X3=x3,<Z3=z3); */
1181
+ fe_add(z2,x3,z3);
1182
+
1183
+ /* qhasm: DA = D*A */
1184
+ /* asm 1: fe_mul(>DA=fe#4,<D=fe#5,<A=fe#1); */
1185
+ /* asm 2: fe_mul(>DA=z3,<D=tmp0,<A=x2); */
1186
+ fe_mul(z3,tmp0,x2);
1187
+
1188
+ /* qhasm: CB = C*B */
1189
+ /* asm 1: fe_mul(>CB=fe#2,<C=fe#2,<B=fe#6); */
1190
+ /* asm 2: fe_mul(>CB=z2,<C=z2,<B=tmp1); */
1191
+ fe_mul(z2,z2,tmp1);
1192
+
1193
+ /* qhasm: BB = B^2 */
1194
+ /* asm 1: fe_sq(>BB=fe#5,<B=fe#6); */
1195
+ /* asm 2: fe_sq(>BB=tmp0,<B=tmp1); */
1196
+ fe_sq(tmp0,tmp1);
1197
+
1198
+ /* qhasm: AA = A^2 */
1199
+ /* asm 1: fe_sq(>AA=fe#6,<A=fe#1); */
1200
+ /* asm 2: fe_sq(>AA=tmp1,<A=x2); */
1201
+ fe_sq(tmp1,x2);
1202
+
1203
+ /* qhasm: t0 = DA+CB */
1204
+ /* asm 1: fe_add(>t0=fe#3,<DA=fe#4,<CB=fe#2); */
1205
+ /* asm 2: fe_add(>t0=x3,<DA=z3,<CB=z2); */
1206
+ fe_add(x3,z3,z2);
1207
+
1208
+ /* qhasm: assign x3 to t0 */
1209
+
1210
+ /* qhasm: t1 = DA-CB */
1211
+ /* asm 1: fe_sub(>t1=fe#2,<DA=fe#4,<CB=fe#2); */
1212
+ /* asm 2: fe_sub(>t1=z2,<DA=z3,<CB=z2); */
1213
+ fe_sub(z2,z3,z2);
1214
+
1215
+ /* qhasm: X4 = AA*BB */
1216
+ /* asm 1: fe_mul(>X4=fe#1,<AA=fe#6,<BB=fe#5); */
1217
+ /* asm 2: fe_mul(>X4=x2,<AA=tmp1,<BB=tmp0); */
1218
+ fe_mul(x2,tmp1,tmp0);
1219
+
1220
+ /* qhasm: E = AA-BB */
1221
+ /* asm 1: fe_sub(>E=fe#6,<AA=fe#6,<BB=fe#5); */
1222
+ /* asm 2: fe_sub(>E=tmp1,<AA=tmp1,<BB=tmp0); */
1223
+ fe_sub(tmp1,tmp1,tmp0);
1224
+
1225
+ /* qhasm: t2 = t1^2 */
1226
+ /* asm 1: fe_sq(>t2=fe#2,<t1=fe#2); */
1227
+ /* asm 2: fe_sq(>t2=z2,<t1=z2); */
1228
+ fe_sq(z2,z2);
1229
+
1230
+ /* qhasm: t3 = a24*E */
1231
+ /* asm 1: fe_mul121666(>t3=fe#4,<E=fe#6); */
1232
+ /* asm 2: fe_mul121666(>t3=z3,<E=tmp1); */
1233
+ fe_mul121666(z3,tmp1);
1234
+
1235
+ /* qhasm: X5 = t0^2 */
1236
+ /* asm 1: fe_sq(>X5=fe#3,<t0=fe#3); */
1237
+ /* asm 2: fe_sq(>X5=x3,<t0=x3); */
1238
+ fe_sq(x3,x3);
1239
+
1240
+ /* qhasm: t4 = BB+t3 */
1241
+ /* asm 1: fe_add(>t4=fe#5,<BB=fe#5,<t3=fe#4); */
1242
+ /* asm 2: fe_add(>t4=tmp0,<BB=tmp0,<t3=z3); */
1243
+ fe_add(tmp0,tmp0,z3);
1244
+
1245
+ /* qhasm: Z5 = X1*t2 */
1246
+ /* asm 1: fe_mul(>Z5=fe#4,x1,<t2=fe#2); */
1247
+ /* asm 2: fe_mul(>Z5=z3,x1,<t2=z2); */
1248
+ fe_mul(z3,x1,z2);
1249
+
1250
+ /* qhasm: Z4 = E*t4 */
1251
+ /* asm 1: fe_mul(>Z4=fe#2,<E=fe#6,<t4=fe#5); */
1252
+ /* asm 2: fe_mul(>Z4=z2,<E=tmp1,<t4=tmp0); */
1253
+ fe_mul(z2,tmp1,tmp0);
1254
+
1255
+ /* qhasm: return */
1256
+ }
1257
+ fe_cswap(x2,x3,swap);
1258
+ fe_cswap(z2,z3,swap);
1259
+
1260
+ fe_invert(z2,z2);
1261
+ fe_mul(x2,x2,z2);
1262
+ fe_tobytes(q,x2);
1263
+ return 0;
1264
+ }
1265
+
1266
+ static const unsigned char basepoint[32] = {9};
1267
+
1268
+ int crypto_scalarmult_base_ref10(unsigned char *q,const unsigned char *n)
1269
+ {
1270
+ return crypto_scalarmult_ref10(q,n,basepoint);
1271
+ }
1272
+