x25519 1.0.4 → 1.0.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6dedcb7122063e5242e3c6a785ebada985b3d9d254b67b0da5bd83a2e177974f
4
- data.tar.gz: 417172354b7ee82964df3d230c87da9591afe86c8c5df4b7661d244c170cdf5e
3
+ metadata.gz: 316da0f14382521bac4d5142c22d44d4dbabda25ca5956e8c9d467b8b92661f7
4
+ data.tar.gz: 22efb4909f13bcbd27899b26851120a7a3e1d99a0311356191bc78033cd949b9
5
5
  SHA512:
6
- metadata.gz: 3b21ebaf94888e6e98edb0807953425a575d78ad845036256cf09399d9887222e6ba94766329b5c1017f3725c7fcb77d9a6b6f2078e40327b7e5a5b6bbaea482
7
- data.tar.gz: 646f24462200e73a70b9b150ba811aa907377be257cbc698361bd72a63f4c479a08d64ff1a66cd8dd15a9a18b016ffc44a4d2f07f2b8a4d516d4943eebd3a6c4
6
+ metadata.gz: 719c8a4fb16da08c8a9f8d20d66a78c5a3c003b1eef5e579e1b10062ddf840d14016d85bf292dfd7acd733ef978f7a0c82a0f9c93ed131811dc5e76e7cbdf947
7
+ data.tar.gz: bd019615fa2f53475e03b47e6fdd852e4aaffac71a63859b1572f35141998cca75d454e474c57ddd8d3670293e6f492f30f7ec8ecd115ee04354a295b72c0ef3
@@ -1,14 +1,7 @@
1
1
  AllCops:
2
- TargetRubyVersion: 2.4
2
+ TargetRubyVersion: 2.2
3
3
  DisplayCopNames: true
4
4
 
5
- #
6
- # Style
7
- #
8
-
9
- Style/StringLiterals:
10
- EnforcedStyle: double_quotes
11
-
12
5
  #
13
6
  # Metrics
14
7
  #
@@ -33,3 +26,13 @@ Metrics/LineLength:
33
26
 
34
27
  Metrics/MethodLength:
35
28
  Max: 25
29
+
30
+ #
31
+ # Style
32
+ #
33
+
34
+ Style/FrozenStringLiteralComment:
35
+ Enabled: true
36
+
37
+ Style/StringLiterals:
38
+ EnforcedStyle: double_quotes
data/CHANGES.md CHANGED
@@ -1,3 +1,13 @@
1
+ # [1.0.5] (2017-12-31)
2
+
3
+ [1.0.5]: https://github.com/crypto-rb/x25519/compare/v1.0.4...v1.0.5
4
+
5
+ * [#15](https://github.com/crypto-rb/x25519/pull/15)
6
+ RuboCop 0.52.1
7
+
8
+ * [#14](https://github.com/crypto-rb/x25519/pull/14)
9
+ `ext/x25519_ref10`: Consolidate all field element code into fe.c
10
+
1
11
  # [1.0.4] (2017-12-31)
2
12
 
3
13
  [1.0.4]: https://github.com/crypto-rb/x25519/compare/v1.0.3...v1.0.4
data/Gemfile CHANGED
@@ -8,5 +8,5 @@ group :development, :test do
8
8
  gem "rake", require: false
9
9
  gem "rake-compiler", "~> 1.0", require: false
10
10
  gem "rspec", "~> 3.7", require: false
11
- gem "rubocop", "0.51.0", require: false
11
+ gem "rubocop", "0.52.1", require: false
12
12
  end
data/README.md CHANGED
@@ -7,7 +7,7 @@
7
7
  [appveyor-image]: https://ci.appveyor.com/api/projects/status/a245an53hsk05sn2?svg=true
8
8
  [appveyor-link]: https://ci.appveyor.com/project/tarcieri/x25519
9
9
  [docs-image]: https://img.shields.io/badge/yard-docs-blue.svg
10
- [docs-link]: http://www.rubydoc.info/gems/x25519/1.0.4
10
+ [docs-link]: http://www.rubydoc.info/gems/x25519/1.0.5
11
11
  [license-image]: https://img.shields.io/badge/License-LGPL%20v3-blue.svg
12
12
  [license-link]: https://www.gnu.org/licenses/lgpl-3.0
13
13
 
@@ -7,3 +7,5 @@ require "mkmf"
7
7
  $CFLAGS << " -Wall -O3 -pedantic -std=c99 -mbmi -mbmi2 -march=native -mtune=native"
8
8
 
9
9
  create_makefile "x25519_precomputed"
10
+
11
+ # rubocop:enable Style/GlobalVars
@@ -7,3 +7,5 @@ require "mkmf"
7
7
  $CFLAGS << " -Wall -O3 -pedantic -std=c99"
8
8
 
9
9
  create_makefile "x25519_ref10"
10
+
11
+ # rubocop:enable Style/GlobalVars
@@ -0,0 +1,912 @@
1
+ #include "fe.h"
2
+
3
+ /*
4
+ h = 0
5
+ */
6
+
7
+ void fe_0(fe h)
8
+ {
9
+ h[0] = 0;
10
+ h[1] = 0;
11
+ h[2] = 0;
12
+ h[3] = 0;
13
+ h[4] = 0;
14
+ h[5] = 0;
15
+ h[6] = 0;
16
+ h[7] = 0;
17
+ h[8] = 0;
18
+ h[9] = 0;
19
+ }
20
+
21
+ /*
22
+ h = 1
23
+ */
24
+
25
+ void fe_1(fe h)
26
+ {
27
+ h[0] = 1;
28
+ h[1] = 0;
29
+ h[2] = 0;
30
+ h[3] = 0;
31
+ h[4] = 0;
32
+ h[5] = 0;
33
+ h[6] = 0;
34
+ h[7] = 0;
35
+ h[8] = 0;
36
+ h[9] = 0;
37
+ }
38
+
39
+ /*
40
+ h = f + g
41
+ Can overlap h with f or g.
42
+
43
+ Preconditions:
44
+ |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
45
+ |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
46
+
47
+ Postconditions:
48
+ |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
49
+ */
50
+
51
+ void fe_add(fe h,fe f,fe g)
52
+ {
53
+ int32_t f0 = f[0];
54
+ int32_t f1 = f[1];
55
+ int32_t f2 = f[2];
56
+ int32_t f3 = f[3];
57
+ int32_t f4 = f[4];
58
+ int32_t f5 = f[5];
59
+ int32_t f6 = f[6];
60
+ int32_t f7 = f[7];
61
+ int32_t f8 = f[8];
62
+ int32_t f9 = f[9];
63
+ int32_t g0 = g[0];
64
+ int32_t g1 = g[1];
65
+ int32_t g2 = g[2];
66
+ int32_t g3 = g[3];
67
+ int32_t g4 = g[4];
68
+ int32_t g5 = g[5];
69
+ int32_t g6 = g[6];
70
+ int32_t g7 = g[7];
71
+ int32_t g8 = g[8];
72
+ int32_t g9 = g[9];
73
+ int32_t h0 = f0 + g0;
74
+ int32_t h1 = f1 + g1;
75
+ int32_t h2 = f2 + g2;
76
+ int32_t h3 = f3 + g3;
77
+ int32_t h4 = f4 + g4;
78
+ int32_t h5 = f5 + g5;
79
+ int32_t h6 = f6 + g6;
80
+ int32_t h7 = f7 + g7;
81
+ int32_t h8 = f8 + g8;
82
+ int32_t h9 = f9 + g9;
83
+ h[0] = h0;
84
+ h[1] = h1;
85
+ h[2] = h2;
86
+ h[3] = h3;
87
+ h[4] = h4;
88
+ h[5] = h5;
89
+ h[6] = h6;
90
+ h[7] = h7;
91
+ h[8] = h8;
92
+ h[9] = h9;
93
+ }
94
+
95
+ /*
96
+ h = f
97
+ */
98
+
99
+ void fe_copy(fe h,fe f)
100
+ {
101
+ int32_t f0 = f[0];
102
+ int32_t f1 = f[1];
103
+ int32_t f2 = f[2];
104
+ int32_t f3 = f[3];
105
+ int32_t f4 = f[4];
106
+ int32_t f5 = f[5];
107
+ int32_t f6 = f[6];
108
+ int32_t f7 = f[7];
109
+ int32_t f8 = f[8];
110
+ int32_t f9 = f[9];
111
+ h[0] = f0;
112
+ h[1] = f1;
113
+ h[2] = f2;
114
+ h[3] = f3;
115
+ h[4] = f4;
116
+ h[5] = f5;
117
+ h[6] = f6;
118
+ h[7] = f7;
119
+ h[8] = f8;
120
+ h[9] = f9;
121
+ }
122
+
123
+ /*
124
+ Replace (f,g) with (g,f) if b == 1;
125
+ replace (f,g) with (f,g) if b == 0.
126
+
127
+ Preconditions: b in {0,1}.
128
+ */
129
+
130
+ void fe_cswap(fe f,fe g,unsigned int b)
131
+ {
132
+ int32_t f0 = f[0];
133
+ int32_t f1 = f[1];
134
+ int32_t f2 = f[2];
135
+ int32_t f3 = f[3];
136
+ int32_t f4 = f[4];
137
+ int32_t f5 = f[5];
138
+ int32_t f6 = f[6];
139
+ int32_t f7 = f[7];
140
+ int32_t f8 = f[8];
141
+ int32_t f9 = f[9];
142
+ int32_t g0 = g[0];
143
+ int32_t g1 = g[1];
144
+ int32_t g2 = g[2];
145
+ int32_t g3 = g[3];
146
+ int32_t g4 = g[4];
147
+ int32_t g5 = g[5];
148
+ int32_t g6 = g[6];
149
+ int32_t g7 = g[7];
150
+ int32_t g8 = g[8];
151
+ int32_t g9 = g[9];
152
+ int32_t x0 = f0 ^ g0;
153
+ int32_t x1 = f1 ^ g1;
154
+ int32_t x2 = f2 ^ g2;
155
+ int32_t x3 = f3 ^ g3;
156
+ int32_t x4 = f4 ^ g4;
157
+ int32_t x5 = f5 ^ g5;
158
+ int32_t x6 = f6 ^ g6;
159
+ int32_t x7 = f7 ^ g7;
160
+ int32_t x8 = f8 ^ g8;
161
+ int32_t x9 = f9 ^ g9;
162
+ b = -b;
163
+ x0 &= b;
164
+ x1 &= b;
165
+ x2 &= b;
166
+ x3 &= b;
167
+ x4 &= b;
168
+ x5 &= b;
169
+ x6 &= b;
170
+ x7 &= b;
171
+ x8 &= b;
172
+ x9 &= b;
173
+ f[0] = f0 ^ x0;
174
+ f[1] = f1 ^ x1;
175
+ f[2] = f2 ^ x2;
176
+ f[3] = f3 ^ x3;
177
+ f[4] = f4 ^ x4;
178
+ f[5] = f5 ^ x5;
179
+ f[6] = f6 ^ x6;
180
+ f[7] = f7 ^ x7;
181
+ f[8] = f8 ^ x8;
182
+ f[9] = f9 ^ x9;
183
+ g[0] = g0 ^ x0;
184
+ g[1] = g1 ^ x1;
185
+ g[2] = g2 ^ x2;
186
+ g[3] = g3 ^ x3;
187
+ g[4] = g4 ^ x4;
188
+ g[5] = g5 ^ x5;
189
+ g[6] = g6 ^ x6;
190
+ g[7] = g7 ^ x7;
191
+ g[8] = g8 ^ x8;
192
+ g[9] = g9 ^ x9;
193
+ }
194
+
195
+ static uint64_t load_3(const unsigned char *in)
196
+ {
197
+ uint64_t result;
198
+ result = (uint64_t) in[0];
199
+ result |= ((uint64_t) in[1]) << 8;
200
+ result |= ((uint64_t) in[2]) << 16;
201
+ return result;
202
+ }
203
+
204
+ static uint64_t load_4(const unsigned char *in)
205
+ {
206
+ uint64_t result;
207
+ result = (uint64_t) in[0];
208
+ result |= ((uint64_t) in[1]) << 8;
209
+ result |= ((uint64_t) in[2]) << 16;
210
+ result |= ((uint64_t) in[3]) << 24;
211
+ return result;
212
+ }
213
+
214
+ void fe_frombytes(fe h,const unsigned char *s)
215
+ {
216
+ int64_t h0 = load_4(s);
217
+ int64_t h1 = load_3(s + 4) << 6;
218
+ int64_t h2 = load_3(s + 7) << 5;
219
+ int64_t h3 = load_3(s + 10) << 3;
220
+ int64_t h4 = load_3(s + 13) << 2;
221
+ int64_t h5 = load_4(s + 16);
222
+ int64_t h6 = load_3(s + 20) << 7;
223
+ int64_t h7 = load_3(s + 23) << 5;
224
+ int64_t h8 = load_3(s + 26) << 4;
225
+ int64_t h9 = (load_3(s + 29) & 8388607) << 2;
226
+ int64_t carry0;
227
+ int64_t carry1;
228
+ int64_t carry2;
229
+ int64_t carry3;
230
+ int64_t carry4;
231
+ int64_t carry5;
232
+ int64_t carry6;
233
+ int64_t carry7;
234
+ int64_t carry8;
235
+ int64_t carry9;
236
+
237
+ carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
238
+ carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
239
+ carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
240
+ carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
241
+ carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
242
+
243
+ carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
244
+ carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
245
+ carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
246
+ carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
247
+ carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
248
+
249
+ h[0] = (int32_t)h0;
250
+ h[1] = (int32_t)h1;
251
+ h[2] = (int32_t)h2;
252
+ h[3] = (int32_t)h3;
253
+ h[4] = (int32_t)h4;
254
+ h[5] = (int32_t)h5;
255
+ h[6] = (int32_t)h6;
256
+ h[7] = (int32_t)h7;
257
+ h[8] = (int32_t)h8;
258
+ h[9] = (int32_t)h9;
259
+ }
260
+
261
+ void fe_invert(fe out,fe z)
262
+ {
263
+ fe t0;
264
+ fe t1;
265
+ fe t2;
266
+ fe t3;
267
+ int i;
268
+
269
+ #include "pow225521.h"
270
+
271
+ return;
272
+ }
273
+
274
+ /*
275
+ h = f * g
276
+ Can overlap h with f or g.
277
+
278
+ Preconditions:
279
+ |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
280
+ |g| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
281
+
282
+ Postconditions:
283
+ |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
284
+ */
285
+
286
+ /*
287
+ Notes on implementation strategy:
288
+
289
+ Using schoolbook multiplication.
290
+ Karatsuba would save a little in some cost models.
291
+
292
+ Most multiplications by 2 and 19 are 32-bit precomputations;
293
+ cheaper than 64-bit postcomputations.
294
+
295
+ There is one remaining multiplication by 19 in the carry chain;
296
+ one *19 precomputation can be merged into this,
297
+ but the resulting data flow is considerably less clean.
298
+
299
+ There are 12 carries below.
300
+ 10 of them are 2-way parallelizable and vectorizable.
301
+ Can get away with 11 carries, but then data flow is much deeper.
302
+
303
+ With tighter constraints on inputs can squeeze carries into int32.
304
+ */
305
+
306
+ void fe_mul(fe h,fe f,fe g)
307
+ {
308
+ int32_t f0 = f[0];
309
+ int32_t f1 = f[1];
310
+ int32_t f2 = f[2];
311
+ int32_t f3 = f[3];
312
+ int32_t f4 = f[4];
313
+ int32_t f5 = f[5];
314
+ int32_t f6 = f[6];
315
+ int32_t f7 = f[7];
316
+ int32_t f8 = f[8];
317
+ int32_t f9 = f[9];
318
+ int32_t g0 = g[0];
319
+ int32_t g1 = g[1];
320
+ int32_t g2 = g[2];
321
+ int32_t g3 = g[3];
322
+ int32_t g4 = g[4];
323
+ int32_t g5 = g[5];
324
+ int32_t g6 = g[6];
325
+ int32_t g7 = g[7];
326
+ int32_t g8 = g[8];
327
+ int32_t g9 = g[9];
328
+ int32_t g1_19 = 19 * g1; /* 1.4*2^29 */
329
+ int32_t g2_19 = 19 * g2; /* 1.4*2^30; still ok */
330
+ int32_t g3_19 = 19 * g3;
331
+ int32_t g4_19 = 19 * g4;
332
+ int32_t g5_19 = 19 * g5;
333
+ int32_t g6_19 = 19 * g6;
334
+ int32_t g7_19 = 19 * g7;
335
+ int32_t g8_19 = 19 * g8;
336
+ int32_t g9_19 = 19 * g9;
337
+ int32_t f1_2 = 2 * f1;
338
+ int32_t f3_2 = 2 * f3;
339
+ int32_t f5_2 = 2 * f5;
340
+ int32_t f7_2 = 2 * f7;
341
+ int32_t f9_2 = 2 * f9;
342
+ int64_t f0g0 = f0 * (int64_t) g0;
343
+ int64_t f0g1 = f0 * (int64_t) g1;
344
+ int64_t f0g2 = f0 * (int64_t) g2;
345
+ int64_t f0g3 = f0 * (int64_t) g3;
346
+ int64_t f0g4 = f0 * (int64_t) g4;
347
+ int64_t f0g5 = f0 * (int64_t) g5;
348
+ int64_t f0g6 = f0 * (int64_t) g6;
349
+ int64_t f0g7 = f0 * (int64_t) g7;
350
+ int64_t f0g8 = f0 * (int64_t) g8;
351
+ int64_t f0g9 = f0 * (int64_t) g9;
352
+ int64_t f1g0 = f1 * (int64_t) g0;
353
+ int64_t f1g1_2 = f1_2 * (int64_t) g1;
354
+ int64_t f1g2 = f1 * (int64_t) g2;
355
+ int64_t f1g3_2 = f1_2 * (int64_t) g3;
356
+ int64_t f1g4 = f1 * (int64_t) g4;
357
+ int64_t f1g5_2 = f1_2 * (int64_t) g5;
358
+ int64_t f1g6 = f1 * (int64_t) g6;
359
+ int64_t f1g7_2 = f1_2 * (int64_t) g7;
360
+ int64_t f1g8 = f1 * (int64_t) g8;
361
+ int64_t f1g9_38 = f1_2 * (int64_t) g9_19;
362
+ int64_t f2g0 = f2 * (int64_t) g0;
363
+ int64_t f2g1 = f2 * (int64_t) g1;
364
+ int64_t f2g2 = f2 * (int64_t) g2;
365
+ int64_t f2g3 = f2 * (int64_t) g3;
366
+ int64_t f2g4 = f2 * (int64_t) g4;
367
+ int64_t f2g5 = f2 * (int64_t) g5;
368
+ int64_t f2g6 = f2 * (int64_t) g6;
369
+ int64_t f2g7 = f2 * (int64_t) g7;
370
+ int64_t f2g8_19 = f2 * (int64_t) g8_19;
371
+ int64_t f2g9_19 = f2 * (int64_t) g9_19;
372
+ int64_t f3g0 = f3 * (int64_t) g0;
373
+ int64_t f3g1_2 = f3_2 * (int64_t) g1;
374
+ int64_t f3g2 = f3 * (int64_t) g2;
375
+ int64_t f3g3_2 = f3_2 * (int64_t) g3;
376
+ int64_t f3g4 = f3 * (int64_t) g4;
377
+ int64_t f3g5_2 = f3_2 * (int64_t) g5;
378
+ int64_t f3g6 = f3 * (int64_t) g6;
379
+ int64_t f3g7_38 = f3_2 * (int64_t) g7_19;
380
+ int64_t f3g8_19 = f3 * (int64_t) g8_19;
381
+ int64_t f3g9_38 = f3_2 * (int64_t) g9_19;
382
+ int64_t f4g0 = f4 * (int64_t) g0;
383
+ int64_t f4g1 = f4 * (int64_t) g1;
384
+ int64_t f4g2 = f4 * (int64_t) g2;
385
+ int64_t f4g3 = f4 * (int64_t) g3;
386
+ int64_t f4g4 = f4 * (int64_t) g4;
387
+ int64_t f4g5 = f4 * (int64_t) g5;
388
+ int64_t f4g6_19 = f4 * (int64_t) g6_19;
389
+ int64_t f4g7_19 = f4 * (int64_t) g7_19;
390
+ int64_t f4g8_19 = f4 * (int64_t) g8_19;
391
+ int64_t f4g9_19 = f4 * (int64_t) g9_19;
392
+ int64_t f5g0 = f5 * (int64_t) g0;
393
+ int64_t f5g1_2 = f5_2 * (int64_t) g1;
394
+ int64_t f5g2 = f5 * (int64_t) g2;
395
+ int64_t f5g3_2 = f5_2 * (int64_t) g3;
396
+ int64_t f5g4 = f5 * (int64_t) g4;
397
+ int64_t f5g5_38 = f5_2 * (int64_t) g5_19;
398
+ int64_t f5g6_19 = f5 * (int64_t) g6_19;
399
+ int64_t f5g7_38 = f5_2 * (int64_t) g7_19;
400
+ int64_t f5g8_19 = f5 * (int64_t) g8_19;
401
+ int64_t f5g9_38 = f5_2 * (int64_t) g9_19;
402
+ int64_t f6g0 = f6 * (int64_t) g0;
403
+ int64_t f6g1 = f6 * (int64_t) g1;
404
+ int64_t f6g2 = f6 * (int64_t) g2;
405
+ int64_t f6g3 = f6 * (int64_t) g3;
406
+ int64_t f6g4_19 = f6 * (int64_t) g4_19;
407
+ int64_t f6g5_19 = f6 * (int64_t) g5_19;
408
+ int64_t f6g6_19 = f6 * (int64_t) g6_19;
409
+ int64_t f6g7_19 = f6 * (int64_t) g7_19;
410
+ int64_t f6g8_19 = f6 * (int64_t) g8_19;
411
+ int64_t f6g9_19 = f6 * (int64_t) g9_19;
412
+ int64_t f7g0 = f7 * (int64_t) g0;
413
+ int64_t f7g1_2 = f7_2 * (int64_t) g1;
414
+ int64_t f7g2 = f7 * (int64_t) g2;
415
+ int64_t f7g3_38 = f7_2 * (int64_t) g3_19;
416
+ int64_t f7g4_19 = f7 * (int64_t) g4_19;
417
+ int64_t f7g5_38 = f7_2 * (int64_t) g5_19;
418
+ int64_t f7g6_19 = f7 * (int64_t) g6_19;
419
+ int64_t f7g7_38 = f7_2 * (int64_t) g7_19;
420
+ int64_t f7g8_19 = f7 * (int64_t) g8_19;
421
+ int64_t f7g9_38 = f7_2 * (int64_t) g9_19;
422
+ int64_t f8g0 = f8 * (int64_t) g0;
423
+ int64_t f8g1 = f8 * (int64_t) g1;
424
+ int64_t f8g2_19 = f8 * (int64_t) g2_19;
425
+ int64_t f8g3_19 = f8 * (int64_t) g3_19;
426
+ int64_t f8g4_19 = f8 * (int64_t) g4_19;
427
+ int64_t f8g5_19 = f8 * (int64_t) g5_19;
428
+ int64_t f8g6_19 = f8 * (int64_t) g6_19;
429
+ int64_t f8g7_19 = f8 * (int64_t) g7_19;
430
+ int64_t f8g8_19 = f8 * (int64_t) g8_19;
431
+ int64_t f8g9_19 = f8 * (int64_t) g9_19;
432
+ int64_t f9g0 = f9 * (int64_t) g0;
433
+ int64_t f9g1_38 = f9_2 * (int64_t) g1_19;
434
+ int64_t f9g2_19 = f9 * (int64_t) g2_19;
435
+ int64_t f9g3_38 = f9_2 * (int64_t) g3_19;
436
+ int64_t f9g4_19 = f9 * (int64_t) g4_19;
437
+ int64_t f9g5_38 = f9_2 * (int64_t) g5_19;
438
+ int64_t f9g6_19 = f9 * (int64_t) g6_19;
439
+ int64_t f9g7_38 = f9_2 * (int64_t) g7_19;
440
+ int64_t f9g8_19 = f9 * (int64_t) g8_19;
441
+ int64_t f9g9_38 = f9_2 * (int64_t) g9_19;
442
+ int64_t h0 = f0g0+f1g9_38+f2g8_19+f3g7_38+f4g6_19+f5g5_38+f6g4_19+f7g3_38+f8g2_19+f9g1_38;
443
+ int64_t h1 = f0g1+f1g0 +f2g9_19+f3g8_19+f4g7_19+f5g6_19+f6g5_19+f7g4_19+f8g3_19+f9g2_19;
444
+ int64_t h2 = f0g2+f1g1_2 +f2g0 +f3g9_38+f4g8_19+f5g7_38+f6g6_19+f7g5_38+f8g4_19+f9g3_38;
445
+ int64_t h3 = f0g3+f1g2 +f2g1 +f3g0 +f4g9_19+f5g8_19+f6g7_19+f7g6_19+f8g5_19+f9g4_19;
446
+ int64_t h4 = f0g4+f1g3_2 +f2g2 +f3g1_2 +f4g0 +f5g9_38+f6g8_19+f7g7_38+f8g6_19+f9g5_38;
447
+ int64_t h5 = f0g5+f1g4 +f2g3 +f3g2 +f4g1 +f5g0 +f6g9_19+f7g8_19+f8g7_19+f9g6_19;
448
+ int64_t h6 = f0g6+f1g5_2 +f2g4 +f3g3_2 +f4g2 +f5g1_2 +f6g0 +f7g9_38+f8g8_19+f9g7_38;
449
+ int64_t h7 = f0g7+f1g6 +f2g5 +f3g4 +f4g3 +f5g2 +f6g1 +f7g0 +f8g9_19+f9g8_19;
450
+ int64_t h8 = f0g8+f1g7_2 +f2g6 +f3g5_2 +f4g4 +f5g3_2 +f6g2 +f7g1_2 +f8g0 +f9g9_38;
451
+ int64_t h9 = f0g9+f1g8 +f2g7 +f3g6 +f4g5 +f5g4 +f6g3 +f7g2 +f8g1 +f9g0 ;
452
+ int64_t carry0;
453
+ int64_t carry1;
454
+ int64_t carry2;
455
+ int64_t carry3;
456
+ int64_t carry4;
457
+ int64_t carry5;
458
+ int64_t carry6;
459
+ int64_t carry7;
460
+ int64_t carry8;
461
+ int64_t carry9;
462
+
463
+ /*
464
+ |h0| <= (1.1*1.1*2^52*(1+19+19+19+19)+1.1*1.1*2^50*(38+38+38+38+38))
465
+ i.e. |h0| <= 1.2*2^59; narrower ranges for h2, h4, h6, h8
466
+ |h1| <= (1.1*1.1*2^51*(1+1+19+19+19+19+19+19+19+19))
467
+ i.e. |h1| <= 1.5*2^58; narrower ranges for h3, h5, h7, h9
468
+ */
469
+
470
+ carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
471
+ carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
472
+ /* |h0| <= 2^25 */
473
+ /* |h4| <= 2^25 */
474
+ /* |h1| <= 1.51*2^58 */
475
+ /* |h5| <= 1.51*2^58 */
476
+
477
+ carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
478
+ carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
479
+ /* |h1| <= 2^24; from now on fits into int32 */
480
+ /* |h5| <= 2^24; from now on fits into int32 */
481
+ /* |h2| <= 1.21*2^59 */
482
+ /* |h6| <= 1.21*2^59 */
483
+
484
+ carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
485
+ carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
486
+ /* |h2| <= 2^25; from now on fits into int32 unchanged */
487
+ /* |h6| <= 2^25; from now on fits into int32 unchanged */
488
+ /* |h3| <= 1.51*2^58 */
489
+ /* |h7| <= 1.51*2^58 */
490
+
491
+ carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
492
+ carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
493
+ /* |h3| <= 2^24; from now on fits into int32 unchanged */
494
+ /* |h7| <= 2^24; from now on fits into int32 unchanged */
495
+ /* |h4| <= 1.52*2^33 */
496
+ /* |h8| <= 1.52*2^33 */
497
+
498
+ carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
499
+ carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
500
+ /* |h4| <= 2^25; from now on fits into int32 unchanged */
501
+ /* |h8| <= 2^25; from now on fits into int32 unchanged */
502
+ /* |h5| <= 1.01*2^24 */
503
+ /* |h9| <= 1.51*2^58 */
504
+
505
+ carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
506
+ /* |h9| <= 2^24; from now on fits into int32 unchanged */
507
+ /* |h0| <= 1.8*2^37 */
508
+
509
+ carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
510
+ /* |h0| <= 2^25; from now on fits into int32 unchanged */
511
+ /* |h1| <= 1.01*2^24 */
512
+
513
+ h[0] = (int32_t)h0;
514
+ h[1] = (int32_t)h1;
515
+ h[2] = (int32_t)h2;
516
+ h[3] = (int32_t)h3;
517
+ h[4] = (int32_t)h4;
518
+ h[5] = (int32_t)h5;
519
+ h[6] = (int32_t)h6;
520
+ h[7] = (int32_t)h7;
521
+ h[8] = (int32_t)h8;
522
+ h[9] = (int32_t)h9;
523
+ }
524
+
525
+ /*
526
+ h = f * 121666
527
+ Can overlap h with f.
528
+
529
+ Preconditions:
530
+ |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
531
+
532
+ Postconditions:
533
+ |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
534
+ */
535
+
536
+ void fe_mul121666(fe h,fe f)
537
+ {
538
+ int32_t f0 = f[0];
539
+ int32_t f1 = f[1];
540
+ int32_t f2 = f[2];
541
+ int32_t f3 = f[3];
542
+ int32_t f4 = f[4];
543
+ int32_t f5 = f[5];
544
+ int32_t f6 = f[6];
545
+ int32_t f7 = f[7];
546
+ int32_t f8 = f[8];
547
+ int32_t f9 = f[9];
548
+ int64_t h0 = f0 * (int64_t) 121666;
549
+ int64_t h1 = f1 * (int64_t) 121666;
550
+ int64_t h2 = f2 * (int64_t) 121666;
551
+ int64_t h3 = f3 * (int64_t) 121666;
552
+ int64_t h4 = f4 * (int64_t) 121666;
553
+ int64_t h5 = f5 * (int64_t) 121666;
554
+ int64_t h6 = f6 * (int64_t) 121666;
555
+ int64_t h7 = f7 * (int64_t) 121666;
556
+ int64_t h8 = f8 * (int64_t) 121666;
557
+ int64_t h9 = f9 * (int64_t) 121666;
558
+ int64_t carry0;
559
+ int64_t carry1;
560
+ int64_t carry2;
561
+ int64_t carry3;
562
+ int64_t carry4;
563
+ int64_t carry5;
564
+ int64_t carry6;
565
+ int64_t carry7;
566
+ int64_t carry8;
567
+ int64_t carry9;
568
+
569
+ carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
570
+ carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
571
+ carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
572
+ carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
573
+ carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
574
+
575
+ carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
576
+ carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
577
+ carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
578
+ carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
579
+ carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
580
+
581
+ h[0] = (int32_t)h0;
582
+ h[1] = (int32_t)h1;
583
+ h[2] = (int32_t)h2;
584
+ h[3] = (int32_t)h3;
585
+ h[4] = (int32_t)h4;
586
+ h[5] = (int32_t)h5;
587
+ h[6] = (int32_t)h6;
588
+ h[7] = (int32_t)h7;
589
+ h[8] = (int32_t)h8;
590
+ h[9] = (int32_t)h9;
591
+ }
592
+
593
+ /*
594
+ h = f * f
595
+ Can overlap h with f.
596
+
597
+ Preconditions:
598
+ |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
599
+
600
+ Postconditions:
601
+ |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
602
+ */
603
+
604
+ /*
605
+ See fe_mul.c for discussion of implementation strategy.
606
+ */
607
+
608
+ void fe_sq(fe h,fe f)
609
+ {
610
+ int32_t f0 = f[0];
611
+ int32_t f1 = f[1];
612
+ int32_t f2 = f[2];
613
+ int32_t f3 = f[3];
614
+ int32_t f4 = f[4];
615
+ int32_t f5 = f[5];
616
+ int32_t f6 = f[6];
617
+ int32_t f7 = f[7];
618
+ int32_t f8 = f[8];
619
+ int32_t f9 = f[9];
620
+ int32_t f0_2 = 2 * f0;
621
+ int32_t f1_2 = 2 * f1;
622
+ int32_t f2_2 = 2 * f2;
623
+ int32_t f3_2 = 2 * f3;
624
+ int32_t f4_2 = 2 * f4;
625
+ int32_t f5_2 = 2 * f5;
626
+ int32_t f6_2 = 2 * f6;
627
+ int32_t f7_2 = 2 * f7;
628
+ int32_t f5_38 = 38 * f5; /* 1.31*2^30 */
629
+ int32_t f6_19 = 19 * f6; /* 1.31*2^30 */
630
+ int32_t f7_38 = 38 * f7; /* 1.31*2^30 */
631
+ int32_t f8_19 = 19 * f8; /* 1.31*2^30 */
632
+ int32_t f9_38 = 38 * f9; /* 1.31*2^30 */
633
+ int64_t f0f0 = f0 * (int64_t) f0;
634
+ int64_t f0f1_2 = f0_2 * (int64_t) f1;
635
+ int64_t f0f2_2 = f0_2 * (int64_t) f2;
636
+ int64_t f0f3_2 = f0_2 * (int64_t) f3;
637
+ int64_t f0f4_2 = f0_2 * (int64_t) f4;
638
+ int64_t f0f5_2 = f0_2 * (int64_t) f5;
639
+ int64_t f0f6_2 = f0_2 * (int64_t) f6;
640
+ int64_t f0f7_2 = f0_2 * (int64_t) f7;
641
+ int64_t f0f8_2 = f0_2 * (int64_t) f8;
642
+ int64_t f0f9_2 = f0_2 * (int64_t) f9;
643
+ int64_t f1f1_2 = f1_2 * (int64_t) f1;
644
+ int64_t f1f2_2 = f1_2 * (int64_t) f2;
645
+ int64_t f1f3_4 = f1_2 * (int64_t) f3_2;
646
+ int64_t f1f4_2 = f1_2 * (int64_t) f4;
647
+ int64_t f1f5_4 = f1_2 * (int64_t) f5_2;
648
+ int64_t f1f6_2 = f1_2 * (int64_t) f6;
649
+ int64_t f1f7_4 = f1_2 * (int64_t) f7_2;
650
+ int64_t f1f8_2 = f1_2 * (int64_t) f8;
651
+ int64_t f1f9_76 = f1_2 * (int64_t) f9_38;
652
+ int64_t f2f2 = f2 * (int64_t) f2;
653
+ int64_t f2f3_2 = f2_2 * (int64_t) f3;
654
+ int64_t f2f4_2 = f2_2 * (int64_t) f4;
655
+ int64_t f2f5_2 = f2_2 * (int64_t) f5;
656
+ int64_t f2f6_2 = f2_2 * (int64_t) f6;
657
+ int64_t f2f7_2 = f2_2 * (int64_t) f7;
658
+ int64_t f2f8_38 = f2_2 * (int64_t) f8_19;
659
+ int64_t f2f9_38 = f2 * (int64_t) f9_38;
660
+ int64_t f3f3_2 = f3_2 * (int64_t) f3;
661
+ int64_t f3f4_2 = f3_2 * (int64_t) f4;
662
+ int64_t f3f5_4 = f3_2 * (int64_t) f5_2;
663
+ int64_t f3f6_2 = f3_2 * (int64_t) f6;
664
+ int64_t f3f7_76 = f3_2 * (int64_t) f7_38;
665
+ int64_t f3f8_38 = f3_2 * (int64_t) f8_19;
666
+ int64_t f3f9_76 = f3_2 * (int64_t) f9_38;
667
+ int64_t f4f4 = f4 * (int64_t) f4;
668
+ int64_t f4f5_2 = f4_2 * (int64_t) f5;
669
+ int64_t f4f6_38 = f4_2 * (int64_t) f6_19;
670
+ int64_t f4f7_38 = f4 * (int64_t) f7_38;
671
+ int64_t f4f8_38 = f4_2 * (int64_t) f8_19;
672
+ int64_t f4f9_38 = f4 * (int64_t) f9_38;
673
+ int64_t f5f5_38 = f5 * (int64_t) f5_38;
674
+ int64_t f5f6_38 = f5_2 * (int64_t) f6_19;
675
+ int64_t f5f7_76 = f5_2 * (int64_t) f7_38;
676
+ int64_t f5f8_38 = f5_2 * (int64_t) f8_19;
677
+ int64_t f5f9_76 = f5_2 * (int64_t) f9_38;
678
+ int64_t f6f6_19 = f6 * (int64_t) f6_19;
679
+ int64_t f6f7_38 = f6 * (int64_t) f7_38;
680
+ int64_t f6f8_38 = f6_2 * (int64_t) f8_19;
681
+ int64_t f6f9_38 = f6 * (int64_t) f9_38;
682
+ int64_t f7f7_38 = f7 * (int64_t) f7_38;
683
+ int64_t f7f8_38 = f7_2 * (int64_t) f8_19;
684
+ int64_t f7f9_76 = f7_2 * (int64_t) f9_38;
685
+ int64_t f8f8_19 = f8 * (int64_t) f8_19;
686
+ int64_t f8f9_38 = f8 * (int64_t) f9_38;
687
+ int64_t f9f9_38 = f9 * (int64_t) f9_38;
688
+ int64_t h0 = f0f0 +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38;
689
+ int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38;
690
+ int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19;
691
+ int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38;
692
+ int64_t h4 = f0f4_2+f1f3_4 +f2f2 +f5f9_76+f6f8_38+f7f7_38;
693
+ int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38;
694
+ int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19;
695
+ int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38;
696
+ int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4 +f9f9_38;
697
+ int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2;
698
+ int64_t carry0;
699
+ int64_t carry1;
700
+ int64_t carry2;
701
+ int64_t carry3;
702
+ int64_t carry4;
703
+ int64_t carry5;
704
+ int64_t carry6;
705
+ int64_t carry7;
706
+ int64_t carry8;
707
+ int64_t carry9;
708
+
709
+ carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
710
+ carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
711
+
712
+ carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
713
+ carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
714
+
715
+ carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
716
+ carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
717
+
718
+ carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
719
+ carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
720
+
721
+ carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
722
+ carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
723
+
724
+ carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
725
+
726
+ carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
727
+
728
+ h[0] = (int32_t)h0;
729
+ h[1] = (int32_t)h1;
730
+ h[2] = (int32_t)h2;
731
+ h[3] = (int32_t)h3;
732
+ h[4] = (int32_t)h4;
733
+ h[5] = (int32_t)h5;
734
+ h[6] = (int32_t)h6;
735
+ h[7] = (int32_t)h7;
736
+ h[8] = (int32_t)h8;
737
+ h[9] = (int32_t)h9;
738
+ }
739
+
740
+ /*
741
+ h = f - g
742
+ Can overlap h with f or g.
743
+
744
+ Preconditions:
745
+ |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
746
+ |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
747
+
748
+ Postconditions:
749
+ |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
750
+ */
751
+
752
+ void fe_sub(fe h,fe f,fe g)
753
+ {
754
+ int32_t f0 = f[0];
755
+ int32_t f1 = f[1];
756
+ int32_t f2 = f[2];
757
+ int32_t f3 = f[3];
758
+ int32_t f4 = f[4];
759
+ int32_t f5 = f[5];
760
+ int32_t f6 = f[6];
761
+ int32_t f7 = f[7];
762
+ int32_t f8 = f[8];
763
+ int32_t f9 = f[9];
764
+ int32_t g0 = g[0];
765
+ int32_t g1 = g[1];
766
+ int32_t g2 = g[2];
767
+ int32_t g3 = g[3];
768
+ int32_t g4 = g[4];
769
+ int32_t g5 = g[5];
770
+ int32_t g6 = g[6];
771
+ int32_t g7 = g[7];
772
+ int32_t g8 = g[8];
773
+ int32_t g9 = g[9];
774
+ int32_t h0 = f0 - g0;
775
+ int32_t h1 = f1 - g1;
776
+ int32_t h2 = f2 - g2;
777
+ int32_t h3 = f3 - g3;
778
+ int32_t h4 = f4 - g4;
779
+ int32_t h5 = f5 - g5;
780
+ int32_t h6 = f6 - g6;
781
+ int32_t h7 = f7 - g7;
782
+ int32_t h8 = f8 - g8;
783
+ int32_t h9 = f9 - g9;
784
+ h[0] = h0;
785
+ h[1] = h1;
786
+ h[2] = h2;
787
+ h[3] = h3;
788
+ h[4] = h4;
789
+ h[5] = h5;
790
+ h[6] = h6;
791
+ h[7] = h7;
792
+ h[8] = h8;
793
+ h[9] = h9;
794
+ }
795
+
796
+ /*
797
+ Preconditions:
798
+ |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
799
+
800
+ Write p=2^255-19; q=floor(h/p).
801
+ Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
802
+
803
+ Proof:
804
+ Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
805
+ Also have |h-2^230 h9|<2^230 so |19 2^(-255)(h-2^230 h9)|<1/4.
806
+
807
+ Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
808
+ Then 0<y<1.
809
+
810
+ Write r=h-pq.
811
+ Have 0<=r<=p-1=2^255-20.
812
+ Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
813
+
814
+ Write x=r+19(2^-255)r+y.
815
+ Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
816
+
817
+ Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
818
+ so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
819
+ */
820
+
821
+ void fe_tobytes(unsigned char *s,fe h)
822
+ {
823
+ int32_t h0 = h[0];
824
+ int32_t h1 = h[1];
825
+ int32_t h2 = h[2];
826
+ int32_t h3 = h[3];
827
+ int32_t h4 = h[4];
828
+ int32_t h5 = h[5];
829
+ int32_t h6 = h[6];
830
+ int32_t h7 = h[7];
831
+ int32_t h8 = h[8];
832
+ int32_t h9 = h[9];
833
+ int32_t q;
834
+ int32_t carry0;
835
+ int32_t carry1;
836
+ int32_t carry2;
837
+ int32_t carry3;
838
+ int32_t carry4;
839
+ int32_t carry5;
840
+ int32_t carry6;
841
+ int32_t carry7;
842
+ int32_t carry8;
843
+ int32_t carry9;
844
+
845
+ q = (19 * h9 + (((int32_t) 1) << 24)) >> 25;
846
+ q = (h0 + q) >> 26;
847
+ q = (h1 + q) >> 25;
848
+ q = (h2 + q) >> 26;
849
+ q = (h3 + q) >> 25;
850
+ q = (h4 + q) >> 26;
851
+ q = (h5 + q) >> 25;
852
+ q = (h6 + q) >> 26;
853
+ q = (h7 + q) >> 25;
854
+ q = (h8 + q) >> 26;
855
+ q = (h9 + q) >> 25;
856
+
857
+ /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */
858
+ h0 += 19 * q;
859
+ /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */
860
+
861
+ carry0 = h0 >> 26; h1 += carry0; h0 -= carry0 << 26;
862
+ carry1 = h1 >> 25; h2 += carry1; h1 -= carry1 << 25;
863
+ carry2 = h2 >> 26; h3 += carry2; h2 -= carry2 << 26;
864
+ carry3 = h3 >> 25; h4 += carry3; h3 -= carry3 << 25;
865
+ carry4 = h4 >> 26; h5 += carry4; h4 -= carry4 << 26;
866
+ carry5 = h5 >> 25; h6 += carry5; h5 -= carry5 << 25;
867
+ carry6 = h6 >> 26; h7 += carry6; h6 -= carry6 << 26;
868
+ carry7 = h7 >> 25; h8 += carry7; h7 -= carry7 << 25;
869
+ carry8 = h8 >> 26; h9 += carry8; h8 -= carry8 << 26;
870
+ carry9 = h9 >> 25; h9 -= carry9 << 25;
871
+ /* h10 = carry9 */
872
+
873
+ /*
874
+ Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
875
+ Have h0+...+2^230 h9 between 0 and 2^255-1;
876
+ evidently 2^255 h10-2^255 q = 0.
877
+ Goal: Output h0+...+2^230 h9.
878
+ */
879
+
880
+ s[0] = h0 >> 0;
881
+ s[1] = h0 >> 8;
882
+ s[2] = h0 >> 16;
883
+ s[3] = (h0 >> 24) | (h1 << 2);
884
+ s[4] = h1 >> 6;
885
+ s[5] = h1 >> 14;
886
+ s[6] = (h1 >> 22) | (h2 << 3);
887
+ s[7] = h2 >> 5;
888
+ s[8] = h2 >> 13;
889
+ s[9] = (h2 >> 21) | (h3 << 5);
890
+ s[10] = h3 >> 3;
891
+ s[11] = h3 >> 11;
892
+ s[12] = (h3 >> 19) | (h4 << 6);
893
+ s[13] = h4 >> 2;
894
+ s[14] = h4 >> 10;
895
+ s[15] = h4 >> 18;
896
+ s[16] = h5 >> 0;
897
+ s[17] = h5 >> 8;
898
+ s[18] = h5 >> 16;
899
+ s[19] = (h5 >> 24) | (h6 << 1);
900
+ s[20] = h6 >> 7;
901
+ s[21] = h6 >> 15;
902
+ s[22] = (h6 >> 23) | (h7 << 3);
903
+ s[23] = h7 >> 5;
904
+ s[24] = h7 >> 13;
905
+ s[25] = (h7 >> 21) | (h8 << 4);
906
+ s[26] = h8 >> 4;
907
+ s[27] = h8 >> 12;
908
+ s[28] = (h8 >> 20) | (h9 << 6);
909
+ s[29] = h9 >> 2;
910
+ s[30] = h9 >> 10;
911
+ s[31] = h9 >> 18;
912
+ }