x25519 0.0.0 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +3 -0
  3. data/.rubocop.yml +3 -0
  4. data/CHANGES.md +3 -0
  5. data/Gemfile +3 -2
  6. data/README.md +205 -14
  7. data/Rakefile +9 -1
  8. data/ext/x25519/cputest.c +68 -0
  9. data/ext/x25519/extconf.rb +31 -0
  10. data/ext/x25519/ref10/api.h +2 -0
  11. data/ext/x25519/ref10/base.c +12 -0
  12. data/ext/x25519/ref10/fe.h +44 -0
  13. data/ext/x25519/ref10/fe_0.c +19 -0
  14. data/ext/x25519/ref10/fe_1.c +19 -0
  15. data/ext/x25519/ref10/fe_add.c +57 -0
  16. data/ext/x25519/ref10/fe_copy.c +29 -0
  17. data/ext/x25519/ref10/fe_cswap.c +73 -0
  18. data/ext/x25519/ref10/fe_frombytes.c +67 -0
  19. data/ext/x25519/ref10/fe_invert.c +14 -0
  20. data/ext/x25519/ref10/fe_mul.c +252 -0
  21. data/ext/x25519/ref10/fe_mul121666.c +69 -0
  22. data/ext/x25519/ref10/fe_sq.c +148 -0
  23. data/ext/x25519/ref10/fe_sub.c +57 -0
  24. data/ext/x25519/ref10/fe_tobytes.c +119 -0
  25. data/ext/x25519/ref10/montgomery.h +140 -0
  26. data/ext/x25519/ref10/pow225521.h +160 -0
  27. data/ext/x25519/ref10/scalarmult.c +46 -0
  28. data/ext/x25519/{fp25519_x64.c → rfc7748_precomputed/fp25519_x64.c} +14 -16
  29. data/ext/x25519/{fp25519_x64.h → rfc7748_precomputed/fp25519_x64.h} +6 -10
  30. data/ext/x25519/{bytes.h → rfc7748_precomputed/rfc7748_precomputed.h} +13 -5
  31. data/ext/x25519/{table_ladder_x25519.h → rfc7748_precomputed/table_ladder_x25519.h} +0 -0
  32. data/ext/x25519/{x25519_x64.c → rfc7748_precomputed/x25519_x64.c} +16 -29
  33. data/ext/x25519/x25519.c +325 -0
  34. data/ext/x25519/x25519.h +24 -0
  35. data/x25519.gemspec +3 -6
  36. metadata +32 -15
  37. data/ext/x25519/bytes.c +0 -42
  38. data/ext/x25519/random.c +0 -51
  39. data/ext/x25519/random.h +0 -24
  40. data/ext/x25519/rfc7748_precompted.h +0 -49
  41. data/ext/x25519/rfc7748_precomputed.c +0 -20
  42. data/lib/x25519.rb +0 -7
  43. data/lib/x25519/version.rb +0 -5
@@ -0,0 +1,44 @@
1
+ #ifndef FE_H
2
+ #define FE_H
3
+
4
+ #include <stdint.h>
5
+
6
+ typedef int32_t fe[10];
7
+
8
+ /*
9
+ fe means field element.
10
+ Here the field is \Z/(2^255-19).
11
+ An element t, entries t[0]...t[9], represents the integer
12
+ t[0]+2^26 t[1]+2^51 t[2]+2^77 t[3]+2^102 t[4]+...+2^230 t[9].
13
+ Bounds on each t[i] vary depending on context.
14
+ */
15
+
16
+ #define fe_frombytes crypto_scalarmult_curve25519_ref10_fe_frombytes
17
+ #define fe_tobytes crypto_scalarmult_curve25519_ref10_fe_tobytes
18
+ #define fe_copy crypto_scalarmult_curve25519_ref10_fe_copy
19
+ #define fe_0 crypto_scalarmult_curve25519_ref10_fe_0
20
+ #define fe_1 crypto_scalarmult_curve25519_ref10_fe_1
21
+ #define fe_cswap crypto_scalarmult_curve25519_ref10_fe_cswap
22
+ #define fe_add crypto_scalarmult_curve25519_ref10_fe_add
23
+ #define fe_sub crypto_scalarmult_curve25519_ref10_fe_sub
24
+ #define fe_mul crypto_scalarmult_curve25519_ref10_fe_mul
25
+ #define fe_sq crypto_scalarmult_curve25519_ref10_fe_sq
26
+ #define fe_mul121666 crypto_scalarmult_curve25519_ref10_fe_mul121666
27
+ #define fe_invert crypto_scalarmult_curve25519_ref10_fe_invert
28
+
29
+ extern void fe_frombytes(fe,const unsigned char *);
30
+ extern void fe_tobytes(unsigned char *,fe);
31
+
32
+ extern void fe_copy(fe,fe);
33
+ extern void fe_0(fe);
34
+ extern void fe_1(fe);
35
+ extern void fe_cswap(fe,fe,unsigned int);
36
+
37
+ extern void fe_add(fe,fe,fe);
38
+ extern void fe_sub(fe,fe,fe);
39
+ extern void fe_mul(fe,fe,fe);
40
+ extern void fe_sq(fe,fe);
41
+ extern void fe_mul121666(fe,fe);
42
+ extern void fe_invert(fe,fe);
43
+
44
+ #endif
@@ -0,0 +1,19 @@
1
+ #include "fe.h"
2
+
3
+ /*
4
+ h = 0
5
+ */
6
+
7
+ void fe_0(fe h)
8
+ {
9
+ h[0] = 0;
10
+ h[1] = 0;
11
+ h[2] = 0;
12
+ h[3] = 0;
13
+ h[4] = 0;
14
+ h[5] = 0;
15
+ h[6] = 0;
16
+ h[7] = 0;
17
+ h[8] = 0;
18
+ h[9] = 0;
19
+ }
@@ -0,0 +1,19 @@
1
+ #include "fe.h"
2
+
3
+ /*
4
+ h = 1
5
+ */
6
+
7
+ void fe_1(fe h)
8
+ {
9
+ h[0] = 1;
10
+ h[1] = 0;
11
+ h[2] = 0;
12
+ h[3] = 0;
13
+ h[4] = 0;
14
+ h[5] = 0;
15
+ h[6] = 0;
16
+ h[7] = 0;
17
+ h[8] = 0;
18
+ h[9] = 0;
19
+ }
@@ -0,0 +1,57 @@
1
+ #include "fe.h"
2
+
3
+ /*
4
+ h = f + g
5
+ Can overlap h with f or g.
6
+
7
+ Preconditions:
8
+ |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
9
+ |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
10
+
11
+ Postconditions:
12
+ |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
13
+ */
14
+
15
+ void fe_add(fe h,fe f,fe g)
16
+ {
17
+ int32_t f0 = f[0];
18
+ int32_t f1 = f[1];
19
+ int32_t f2 = f[2];
20
+ int32_t f3 = f[3];
21
+ int32_t f4 = f[4];
22
+ int32_t f5 = f[5];
23
+ int32_t f6 = f[6];
24
+ int32_t f7 = f[7];
25
+ int32_t f8 = f[8];
26
+ int32_t f9 = f[9];
27
+ int32_t g0 = g[0];
28
+ int32_t g1 = g[1];
29
+ int32_t g2 = g[2];
30
+ int32_t g3 = g[3];
31
+ int32_t g4 = g[4];
32
+ int32_t g5 = g[5];
33
+ int32_t g6 = g[6];
34
+ int32_t g7 = g[7];
35
+ int32_t g8 = g[8];
36
+ int32_t g9 = g[9];
37
+ int32_t h0 = f0 + g0;
38
+ int32_t h1 = f1 + g1;
39
+ int32_t h2 = f2 + g2;
40
+ int32_t h3 = f3 + g3;
41
+ int32_t h4 = f4 + g4;
42
+ int32_t h5 = f5 + g5;
43
+ int32_t h6 = f6 + g6;
44
+ int32_t h7 = f7 + g7;
45
+ int32_t h8 = f8 + g8;
46
+ int32_t h9 = f9 + g9;
47
+ h[0] = h0;
48
+ h[1] = h1;
49
+ h[2] = h2;
50
+ h[3] = h3;
51
+ h[4] = h4;
52
+ h[5] = h5;
53
+ h[6] = h6;
54
+ h[7] = h7;
55
+ h[8] = h8;
56
+ h[9] = h9;
57
+ }
@@ -0,0 +1,29 @@
1
+ #include "fe.h"
2
+
3
+ /*
4
+ h = f
5
+ */
6
+
7
+ void fe_copy(fe h,fe f)
8
+ {
9
+ int32_t f0 = f[0];
10
+ int32_t f1 = f[1];
11
+ int32_t f2 = f[2];
12
+ int32_t f3 = f[3];
13
+ int32_t f4 = f[4];
14
+ int32_t f5 = f[5];
15
+ int32_t f6 = f[6];
16
+ int32_t f7 = f[7];
17
+ int32_t f8 = f[8];
18
+ int32_t f9 = f[9];
19
+ h[0] = f0;
20
+ h[1] = f1;
21
+ h[2] = f2;
22
+ h[3] = f3;
23
+ h[4] = f4;
24
+ h[5] = f5;
25
+ h[6] = f6;
26
+ h[7] = f7;
27
+ h[8] = f8;
28
+ h[9] = f9;
29
+ }
@@ -0,0 +1,73 @@
1
+ #include "fe.h"
2
+
3
+ /*
4
+ Replace (f,g) with (g,f) if b == 1;
5
+ replace (f,g) with (f,g) if b == 0.
6
+
7
+ Preconditions: b in {0,1}.
8
+ */
9
+
10
+ void fe_cswap(fe f,fe g,unsigned int b)
11
+ {
12
+ int32_t f0 = f[0];
13
+ int32_t f1 = f[1];
14
+ int32_t f2 = f[2];
15
+ int32_t f3 = f[3];
16
+ int32_t f4 = f[4];
17
+ int32_t f5 = f[5];
18
+ int32_t f6 = f[6];
19
+ int32_t f7 = f[7];
20
+ int32_t f8 = f[8];
21
+ int32_t f9 = f[9];
22
+ int32_t g0 = g[0];
23
+ int32_t g1 = g[1];
24
+ int32_t g2 = g[2];
25
+ int32_t g3 = g[3];
26
+ int32_t g4 = g[4];
27
+ int32_t g5 = g[5];
28
+ int32_t g6 = g[6];
29
+ int32_t g7 = g[7];
30
+ int32_t g8 = g[8];
31
+ int32_t g9 = g[9];
32
+ int32_t x0 = f0 ^ g0;
33
+ int32_t x1 = f1 ^ g1;
34
+ int32_t x2 = f2 ^ g2;
35
+ int32_t x3 = f3 ^ g3;
36
+ int32_t x4 = f4 ^ g4;
37
+ int32_t x5 = f5 ^ g5;
38
+ int32_t x6 = f6 ^ g6;
39
+ int32_t x7 = f7 ^ g7;
40
+ int32_t x8 = f8 ^ g8;
41
+ int32_t x9 = f9 ^ g9;
42
+ b = -b;
43
+ x0 &= b;
44
+ x1 &= b;
45
+ x2 &= b;
46
+ x3 &= b;
47
+ x4 &= b;
48
+ x5 &= b;
49
+ x6 &= b;
50
+ x7 &= b;
51
+ x8 &= b;
52
+ x9 &= b;
53
+ f[0] = f0 ^ x0;
54
+ f[1] = f1 ^ x1;
55
+ f[2] = f2 ^ x2;
56
+ f[3] = f3 ^ x3;
57
+ f[4] = f4 ^ x4;
58
+ f[5] = f5 ^ x5;
59
+ f[6] = f6 ^ x6;
60
+ f[7] = f7 ^ x7;
61
+ f[8] = f8 ^ x8;
62
+ f[9] = f9 ^ x9;
63
+ g[0] = g0 ^ x0;
64
+ g[1] = g1 ^ x1;
65
+ g[2] = g2 ^ x2;
66
+ g[3] = g3 ^ x3;
67
+ g[4] = g4 ^ x4;
68
+ g[5] = g5 ^ x5;
69
+ g[6] = g6 ^ x6;
70
+ g[7] = g7 ^ x7;
71
+ g[8] = g8 ^ x8;
72
+ g[9] = g9 ^ x9;
73
+ }
@@ -0,0 +1,67 @@
1
+ #include "fe.h"
2
+
3
+ static uint64_t load_3(const unsigned char *in)
4
+ {
5
+ uint64_t result;
6
+ result = (uint64_t) in[0];
7
+ result |= ((uint64_t) in[1]) << 8;
8
+ result |= ((uint64_t) in[2]) << 16;
9
+ return result;
10
+ }
11
+
12
+ static uint64_t load_4(const unsigned char *in)
13
+ {
14
+ uint64_t result;
15
+ result = (uint64_t) in[0];
16
+ result |= ((uint64_t) in[1]) << 8;
17
+ result |= ((uint64_t) in[2]) << 16;
18
+ result |= ((uint64_t) in[3]) << 24;
19
+ return result;
20
+ }
21
+
22
+ void fe_frombytes(fe h,const unsigned char *s)
23
+ {
24
+ int64_t h0 = load_4(s);
25
+ int64_t h1 = load_3(s + 4) << 6;
26
+ int64_t h2 = load_3(s + 7) << 5;
27
+ int64_t h3 = load_3(s + 10) << 3;
28
+ int64_t h4 = load_3(s + 13) << 2;
29
+ int64_t h5 = load_4(s + 16);
30
+ int64_t h6 = load_3(s + 20) << 7;
31
+ int64_t h7 = load_3(s + 23) << 5;
32
+ int64_t h8 = load_3(s + 26) << 4;
33
+ int64_t h9 = (load_3(s + 29) & 8388607) << 2;
34
+ int64_t carry0;
35
+ int64_t carry1;
36
+ int64_t carry2;
37
+ int64_t carry3;
38
+ int64_t carry4;
39
+ int64_t carry5;
40
+ int64_t carry6;
41
+ int64_t carry7;
42
+ int64_t carry8;
43
+ int64_t carry9;
44
+
45
+ carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
46
+ carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
47
+ carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
48
+ carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
49
+ carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
50
+
51
+ carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
52
+ carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
53
+ carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
54
+ carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
55
+ carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
56
+
57
+ h[0] = (int32_t)h0;
58
+ h[1] = (int32_t)h1;
59
+ h[2] = (int32_t)h2;
60
+ h[3] = (int32_t)h3;
61
+ h[4] = (int32_t)h4;
62
+ h[5] = (int32_t)h5;
63
+ h[6] = (int32_t)h6;
64
+ h[7] = (int32_t)h7;
65
+ h[8] = (int32_t)h8;
66
+ h[9] = (int32_t)h9;
67
+ }
@@ -0,0 +1,14 @@
1
+ #include "fe.h"
2
+
3
+ void fe_invert(fe out,fe z)
4
+ {
5
+ fe t0;
6
+ fe t1;
7
+ fe t2;
8
+ fe t3;
9
+ int i;
10
+
11
+ #include "pow225521.h"
12
+
13
+ return;
14
+ }
@@ -0,0 +1,252 @@
1
+ #include "fe.h"
2
+
3
+ /*
4
+ h = f * g
5
+ Can overlap h with f or g.
6
+
7
+ Preconditions:
8
+ |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
9
+ |g| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
10
+
11
+ Postconditions:
12
+ |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
13
+ */
14
+
15
+ /*
16
+ Notes on implementation strategy:
17
+
18
+ Using schoolbook multiplication.
19
+ Karatsuba would save a little in some cost models.
20
+
21
+ Most multiplications by 2 and 19 are 32-bit precomputations;
22
+ cheaper than 64-bit postcomputations.
23
+
24
+ There is one remaining multiplication by 19 in the carry chain;
25
+ one *19 precomputation can be merged into this,
26
+ but the resulting data flow is considerably less clean.
27
+
28
+ There are 12 carries below.
29
+ 10 of them are 2-way parallelizable and vectorizable.
30
+ Can get away with 11 carries, but then data flow is much deeper.
31
+
32
+ With tighter constraints on inputs can squeeze carries into int32.
33
+ */
34
+
35
+ void fe_mul(fe h,fe f,fe g)
36
+ {
37
+ int32_t f0 = f[0];
38
+ int32_t f1 = f[1];
39
+ int32_t f2 = f[2];
40
+ int32_t f3 = f[3];
41
+ int32_t f4 = f[4];
42
+ int32_t f5 = f[5];
43
+ int32_t f6 = f[6];
44
+ int32_t f7 = f[7];
45
+ int32_t f8 = f[8];
46
+ int32_t f9 = f[9];
47
+ int32_t g0 = g[0];
48
+ int32_t g1 = g[1];
49
+ int32_t g2 = g[2];
50
+ int32_t g3 = g[3];
51
+ int32_t g4 = g[4];
52
+ int32_t g5 = g[5];
53
+ int32_t g6 = g[6];
54
+ int32_t g7 = g[7];
55
+ int32_t g8 = g[8];
56
+ int32_t g9 = g[9];
57
+ int32_t g1_19 = 19 * g1; /* 1.4*2^29 */
58
+ int32_t g2_19 = 19 * g2; /* 1.4*2^30; still ok */
59
+ int32_t g3_19 = 19 * g3;
60
+ int32_t g4_19 = 19 * g4;
61
+ int32_t g5_19 = 19 * g5;
62
+ int32_t g6_19 = 19 * g6;
63
+ int32_t g7_19 = 19 * g7;
64
+ int32_t g8_19 = 19 * g8;
65
+ int32_t g9_19 = 19 * g9;
66
+ int32_t f1_2 = 2 * f1;
67
+ int32_t f3_2 = 2 * f3;
68
+ int32_t f5_2 = 2 * f5;
69
+ int32_t f7_2 = 2 * f7;
70
+ int32_t f9_2 = 2 * f9;
71
+ int64_t f0g0 = f0 * (int64_t) g0;
72
+ int64_t f0g1 = f0 * (int64_t) g1;
73
+ int64_t f0g2 = f0 * (int64_t) g2;
74
+ int64_t f0g3 = f0 * (int64_t) g3;
75
+ int64_t f0g4 = f0 * (int64_t) g4;
76
+ int64_t f0g5 = f0 * (int64_t) g5;
77
+ int64_t f0g6 = f0 * (int64_t) g6;
78
+ int64_t f0g7 = f0 * (int64_t) g7;
79
+ int64_t f0g8 = f0 * (int64_t) g8;
80
+ int64_t f0g9 = f0 * (int64_t) g9;
81
+ int64_t f1g0 = f1 * (int64_t) g0;
82
+ int64_t f1g1_2 = f1_2 * (int64_t) g1;
83
+ int64_t f1g2 = f1 * (int64_t) g2;
84
+ int64_t f1g3_2 = f1_2 * (int64_t) g3;
85
+ int64_t f1g4 = f1 * (int64_t) g4;
86
+ int64_t f1g5_2 = f1_2 * (int64_t) g5;
87
+ int64_t f1g6 = f1 * (int64_t) g6;
88
+ int64_t f1g7_2 = f1_2 * (int64_t) g7;
89
+ int64_t f1g8 = f1 * (int64_t) g8;
90
+ int64_t f1g9_38 = f1_2 * (int64_t) g9_19;
91
+ int64_t f2g0 = f2 * (int64_t) g0;
92
+ int64_t f2g1 = f2 * (int64_t) g1;
93
+ int64_t f2g2 = f2 * (int64_t) g2;
94
+ int64_t f2g3 = f2 * (int64_t) g3;
95
+ int64_t f2g4 = f2 * (int64_t) g4;
96
+ int64_t f2g5 = f2 * (int64_t) g5;
97
+ int64_t f2g6 = f2 * (int64_t) g6;
98
+ int64_t f2g7 = f2 * (int64_t) g7;
99
+ int64_t f2g8_19 = f2 * (int64_t) g8_19;
100
+ int64_t f2g9_19 = f2 * (int64_t) g9_19;
101
+ int64_t f3g0 = f3 * (int64_t) g0;
102
+ int64_t f3g1_2 = f3_2 * (int64_t) g1;
103
+ int64_t f3g2 = f3 * (int64_t) g2;
104
+ int64_t f3g3_2 = f3_2 * (int64_t) g3;
105
+ int64_t f3g4 = f3 * (int64_t) g4;
106
+ int64_t f3g5_2 = f3_2 * (int64_t) g5;
107
+ int64_t f3g6 = f3 * (int64_t) g6;
108
+ int64_t f3g7_38 = f3_2 * (int64_t) g7_19;
109
+ int64_t f3g8_19 = f3 * (int64_t) g8_19;
110
+ int64_t f3g9_38 = f3_2 * (int64_t) g9_19;
111
+ int64_t f4g0 = f4 * (int64_t) g0;
112
+ int64_t f4g1 = f4 * (int64_t) g1;
113
+ int64_t f4g2 = f4 * (int64_t) g2;
114
+ int64_t f4g3 = f4 * (int64_t) g3;
115
+ int64_t f4g4 = f4 * (int64_t) g4;
116
+ int64_t f4g5 = f4 * (int64_t) g5;
117
+ int64_t f4g6_19 = f4 * (int64_t) g6_19;
118
+ int64_t f4g7_19 = f4 * (int64_t) g7_19;
119
+ int64_t f4g8_19 = f4 * (int64_t) g8_19;
120
+ int64_t f4g9_19 = f4 * (int64_t) g9_19;
121
+ int64_t f5g0 = f5 * (int64_t) g0;
122
+ int64_t f5g1_2 = f5_2 * (int64_t) g1;
123
+ int64_t f5g2 = f5 * (int64_t) g2;
124
+ int64_t f5g3_2 = f5_2 * (int64_t) g3;
125
+ int64_t f5g4 = f5 * (int64_t) g4;
126
+ int64_t f5g5_38 = f5_2 * (int64_t) g5_19;
127
+ int64_t f5g6_19 = f5 * (int64_t) g6_19;
128
+ int64_t f5g7_38 = f5_2 * (int64_t) g7_19;
129
+ int64_t f5g8_19 = f5 * (int64_t) g8_19;
130
+ int64_t f5g9_38 = f5_2 * (int64_t) g9_19;
131
+ int64_t f6g0 = f6 * (int64_t) g0;
132
+ int64_t f6g1 = f6 * (int64_t) g1;
133
+ int64_t f6g2 = f6 * (int64_t) g2;
134
+ int64_t f6g3 = f6 * (int64_t) g3;
135
+ int64_t f6g4_19 = f6 * (int64_t) g4_19;
136
+ int64_t f6g5_19 = f6 * (int64_t) g5_19;
137
+ int64_t f6g6_19 = f6 * (int64_t) g6_19;
138
+ int64_t f6g7_19 = f6 * (int64_t) g7_19;
139
+ int64_t f6g8_19 = f6 * (int64_t) g8_19;
140
+ int64_t f6g9_19 = f6 * (int64_t) g9_19;
141
+ int64_t f7g0 = f7 * (int64_t) g0;
142
+ int64_t f7g1_2 = f7_2 * (int64_t) g1;
143
+ int64_t f7g2 = f7 * (int64_t) g2;
144
+ int64_t f7g3_38 = f7_2 * (int64_t) g3_19;
145
+ int64_t f7g4_19 = f7 * (int64_t) g4_19;
146
+ int64_t f7g5_38 = f7_2 * (int64_t) g5_19;
147
+ int64_t f7g6_19 = f7 * (int64_t) g6_19;
148
+ int64_t f7g7_38 = f7_2 * (int64_t) g7_19;
149
+ int64_t f7g8_19 = f7 * (int64_t) g8_19;
150
+ int64_t f7g9_38 = f7_2 * (int64_t) g9_19;
151
+ int64_t f8g0 = f8 * (int64_t) g0;
152
+ int64_t f8g1 = f8 * (int64_t) g1;
153
+ int64_t f8g2_19 = f8 * (int64_t) g2_19;
154
+ int64_t f8g3_19 = f8 * (int64_t) g3_19;
155
+ int64_t f8g4_19 = f8 * (int64_t) g4_19;
156
+ int64_t f8g5_19 = f8 * (int64_t) g5_19;
157
+ int64_t f8g6_19 = f8 * (int64_t) g6_19;
158
+ int64_t f8g7_19 = f8 * (int64_t) g7_19;
159
+ int64_t f8g8_19 = f8 * (int64_t) g8_19;
160
+ int64_t f8g9_19 = f8 * (int64_t) g9_19;
161
+ int64_t f9g0 = f9 * (int64_t) g0;
162
+ int64_t f9g1_38 = f9_2 * (int64_t) g1_19;
163
+ int64_t f9g2_19 = f9 * (int64_t) g2_19;
164
+ int64_t f9g3_38 = f9_2 * (int64_t) g3_19;
165
+ int64_t f9g4_19 = f9 * (int64_t) g4_19;
166
+ int64_t f9g5_38 = f9_2 * (int64_t) g5_19;
167
+ int64_t f9g6_19 = f9 * (int64_t) g6_19;
168
+ int64_t f9g7_38 = f9_2 * (int64_t) g7_19;
169
+ int64_t f9g8_19 = f9 * (int64_t) g8_19;
170
+ int64_t f9g9_38 = f9_2 * (int64_t) g9_19;
171
+ int64_t h0 = f0g0+f1g9_38+f2g8_19+f3g7_38+f4g6_19+f5g5_38+f6g4_19+f7g3_38+f8g2_19+f9g1_38;
172
+ int64_t h1 = f0g1+f1g0 +f2g9_19+f3g8_19+f4g7_19+f5g6_19+f6g5_19+f7g4_19+f8g3_19+f9g2_19;
173
+ int64_t h2 = f0g2+f1g1_2 +f2g0 +f3g9_38+f4g8_19+f5g7_38+f6g6_19+f7g5_38+f8g4_19+f9g3_38;
174
+ int64_t h3 = f0g3+f1g2 +f2g1 +f3g0 +f4g9_19+f5g8_19+f6g7_19+f7g6_19+f8g5_19+f9g4_19;
175
+ int64_t h4 = f0g4+f1g3_2 +f2g2 +f3g1_2 +f4g0 +f5g9_38+f6g8_19+f7g7_38+f8g6_19+f9g5_38;
176
+ int64_t h5 = f0g5+f1g4 +f2g3 +f3g2 +f4g1 +f5g0 +f6g9_19+f7g8_19+f8g7_19+f9g6_19;
177
+ int64_t h6 = f0g6+f1g5_2 +f2g4 +f3g3_2 +f4g2 +f5g1_2 +f6g0 +f7g9_38+f8g8_19+f9g7_38;
178
+ int64_t h7 = f0g7+f1g6 +f2g5 +f3g4 +f4g3 +f5g2 +f6g1 +f7g0 +f8g9_19+f9g8_19;
179
+ int64_t h8 = f0g8+f1g7_2 +f2g6 +f3g5_2 +f4g4 +f5g3_2 +f6g2 +f7g1_2 +f8g0 +f9g9_38;
180
+ int64_t h9 = f0g9+f1g8 +f2g7 +f3g6 +f4g5 +f5g4 +f6g3 +f7g2 +f8g1 +f9g0 ;
181
+ int64_t carry0;
182
+ int64_t carry1;
183
+ int64_t carry2;
184
+ int64_t carry3;
185
+ int64_t carry4;
186
+ int64_t carry5;
187
+ int64_t carry6;
188
+ int64_t carry7;
189
+ int64_t carry8;
190
+ int64_t carry9;
191
+
192
+ /*
193
+ |h0| <= (1.1*1.1*2^52*(1+19+19+19+19)+1.1*1.1*2^50*(38+38+38+38+38))
194
+ i.e. |h0| <= 1.2*2^59; narrower ranges for h2, h4, h6, h8
195
+ |h1| <= (1.1*1.1*2^51*(1+1+19+19+19+19+19+19+19+19))
196
+ i.e. |h1| <= 1.5*2^58; narrower ranges for h3, h5, h7, h9
197
+ */
198
+
199
+ carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
200
+ carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
201
+ /* |h0| <= 2^25 */
202
+ /* |h4| <= 2^25 */
203
+ /* |h1| <= 1.51*2^58 */
204
+ /* |h5| <= 1.51*2^58 */
205
+
206
+ carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
207
+ carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
208
+ /* |h1| <= 2^24; from now on fits into int32 */
209
+ /* |h5| <= 2^24; from now on fits into int32 */
210
+ /* |h2| <= 1.21*2^59 */
211
+ /* |h6| <= 1.21*2^59 */
212
+
213
+ carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
214
+ carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
215
+ /* |h2| <= 2^25; from now on fits into int32 unchanged */
216
+ /* |h6| <= 2^25; from now on fits into int32 unchanged */
217
+ /* |h3| <= 1.51*2^58 */
218
+ /* |h7| <= 1.51*2^58 */
219
+
220
+ carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
221
+ carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
222
+ /* |h3| <= 2^24; from now on fits into int32 unchanged */
223
+ /* |h7| <= 2^24; from now on fits into int32 unchanged */
224
+ /* |h4| <= 1.52*2^33 */
225
+ /* |h8| <= 1.52*2^33 */
226
+
227
+ carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
228
+ carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
229
+ /* |h4| <= 2^25; from now on fits into int32 unchanged */
230
+ /* |h8| <= 2^25; from now on fits into int32 unchanged */
231
+ /* |h5| <= 1.01*2^24 */
232
+ /* |h9| <= 1.51*2^58 */
233
+
234
+ carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
235
+ /* |h9| <= 2^24; from now on fits into int32 unchanged */
236
+ /* |h0| <= 1.8*2^37 */
237
+
238
+ carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
239
+ /* |h0| <= 2^25; from now on fits into int32 unchanged */
240
+ /* |h1| <= 1.01*2^24 */
241
+
242
+ h[0] = (int32_t)h0;
243
+ h[1] = (int32_t)h1;
244
+ h[2] = (int32_t)h2;
245
+ h[3] = (int32_t)h3;
246
+ h[4] = (int32_t)h4;
247
+ h[5] = (int32_t)h5;
248
+ h[6] = (int32_t)h6;
249
+ h[7] = (int32_t)h7;
250
+ h[8] = (int32_t)h8;
251
+ h[9] = (int32_t)h9;
252
+ }