ed25519_blake2b 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +23 -0
- data/LICENSE +21 -0
- data/README.md +39 -0
- data/Rakefile +13 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/ed25519_blake2b.gemspec +31 -0
- data/ext/ed25519_blake2b/blake2-config.h +72 -0
- data/ext/ed25519_blake2b/blake2-impl.h +160 -0
- data/ext/ed25519_blake2b/blake2.h +195 -0
- data/ext/ed25519_blake2b/blake2b-load-sse2.h +68 -0
- data/ext/ed25519_blake2b/blake2b-load-sse41.h +402 -0
- data/ext/ed25519_blake2b/blake2b-ref.c +373 -0
- data/ext/ed25519_blake2b/blake2b-round.h +157 -0
- data/ext/ed25519_blake2b/curve25519-donna-32bit.h +579 -0
- data/ext/ed25519_blake2b/curve25519-donna-64bit.h +413 -0
- data/ext/ed25519_blake2b/curve25519-donna-helpers.h +67 -0
- data/ext/ed25519_blake2b/curve25519-donna-sse2.h +1112 -0
- data/ext/ed25519_blake2b/ed25519-donna-32bit-sse2.h +513 -0
- data/ext/ed25519_blake2b/ed25519-donna-32bit-tables.h +61 -0
- data/ext/ed25519_blake2b/ed25519-donna-64bit-sse2.h +436 -0
- data/ext/ed25519_blake2b/ed25519-donna-64bit-tables.h +53 -0
- data/ext/ed25519_blake2b/ed25519-donna-64bit-x86-32bit.h +435 -0
- data/ext/ed25519_blake2b/ed25519-donna-64bit-x86.h +351 -0
- data/ext/ed25519_blake2b/ed25519-donna-basepoint-table.h +259 -0
- data/ext/ed25519_blake2b/ed25519-donna-batchverify.h +275 -0
- data/ext/ed25519_blake2b/ed25519-donna-impl-base.h +364 -0
- data/ext/ed25519_blake2b/ed25519-donna-impl-sse2.h +390 -0
- data/ext/ed25519_blake2b/ed25519-donna-portable-identify.h +103 -0
- data/ext/ed25519_blake2b/ed25519-donna-portable.h +135 -0
- data/ext/ed25519_blake2b/ed25519-donna.h +115 -0
- data/ext/ed25519_blake2b/ed25519-hash-custom.c +28 -0
- data/ext/ed25519_blake2b/ed25519-hash-custom.h +30 -0
- data/ext/ed25519_blake2b/ed25519-hash.h +219 -0
- data/ext/ed25519_blake2b/ed25519-randombytes-custom.h +10 -0
- data/ext/ed25519_blake2b/ed25519-randombytes.h +91 -0
- data/ext/ed25519_blake2b/ed25519.c +150 -0
- data/ext/ed25519_blake2b/ed25519.h +30 -0
- data/ext/ed25519_blake2b/extconf.rb +3 -0
- data/ext/ed25519_blake2b/fuzz/README.md +173 -0
- data/ext/ed25519_blake2b/fuzz/build-nix.php +134 -0
- data/ext/ed25519_blake2b/fuzz/curve25519-ref10.c +1272 -0
- data/ext/ed25519_blake2b/fuzz/curve25519-ref10.h +8 -0
- data/ext/ed25519_blake2b/fuzz/ed25519-donna-sse2.c +3 -0
- data/ext/ed25519_blake2b/fuzz/ed25519-donna.c +1 -0
- data/ext/ed25519_blake2b/fuzz/ed25519-donna.h +34 -0
- data/ext/ed25519_blake2b/fuzz/ed25519-ref10.c +4647 -0
- data/ext/ed25519_blake2b/fuzz/ed25519-ref10.h +9 -0
- data/ext/ed25519_blake2b/fuzz/fuzz-curve25519.c +172 -0
- data/ext/ed25519_blake2b/fuzz/fuzz-ed25519.c +219 -0
- data/ext/ed25519_blake2b/modm-donna-32bit.h +469 -0
- data/ext/ed25519_blake2b/modm-donna-64bit.h +361 -0
- data/ext/ed25519_blake2b/rbext.c +25 -0
- data/ext/ed25519_blake2b/regression.h +1024 -0
- data/lib/ed25519_blake2b/ed25519_blake2b.rb +4 -0
- data/lib/ed25519_blake2b/version.rb +3 -0
- metadata +147 -0
@@ -0,0 +1,134 @@
|
|
1
|
+
<?php
|
2
|
+
function echoln($str) {
|
3
|
+
echo $str;
|
4
|
+
echo "\n";
|
5
|
+
}
|
6
|
+
|
7
|
+
function usage($reason) {
|
8
|
+
echoln("Usage: php build-nix.php [flags]");
|
9
|
+
echoln("Flags in parantheses are optional");
|
10
|
+
echoln("");
|
11
|
+
echoln(" --bits=[32,64]");
|
12
|
+
echoln(" --function=[curve25519,ed25519]");
|
13
|
+
echoln(" (--compiler=[*gcc,clang,icc]) which compiler to use, gcc is default");
|
14
|
+
echoln(" (--with-openssl) use openssl for SHA512");
|
15
|
+
echoln(" (--with-sse2) additionally fuzz against SSE2");
|
16
|
+
echoln(" (--no-asm) don't use platform specific asm");
|
17
|
+
echoln("");
|
18
|
+
if ($reason)
|
19
|
+
echoln($reason);
|
20
|
+
}
|
21
|
+
|
22
|
+
function cleanup() {
|
23
|
+
system("rm -f *.o");
|
24
|
+
}
|
25
|
+
|
26
|
+
function runcmd($desc, $cmd) {
|
27
|
+
echoln($desc);
|
28
|
+
|
29
|
+
$ret = 0;
|
30
|
+
system($cmd, $ret);
|
31
|
+
if ($ret) {
|
32
|
+
cleanup();
|
33
|
+
exit;
|
34
|
+
}
|
35
|
+
}
|
36
|
+
|
37
|
+
class argument {
|
38
|
+
var $set, $value;
|
39
|
+
}
|
40
|
+
|
41
|
+
class multiargument extends argument {
|
42
|
+
function multiargument($flag, $legal_values) {
|
43
|
+
global $argc, $argv;
|
44
|
+
|
45
|
+
$this->set = false;
|
46
|
+
|
47
|
+
$map = array();
|
48
|
+
foreach($legal_values as $value)
|
49
|
+
$map[$value] = true;
|
50
|
+
|
51
|
+
for ($i = 1; $i < $argc; $i++) {
|
52
|
+
if (!preg_match("!--".$flag."=(.*)!", $argv[$i], $m))
|
53
|
+
continue;
|
54
|
+
if (isset($map[$m[1]])) {
|
55
|
+
$this->value = $m[1];
|
56
|
+
$this->set = true;
|
57
|
+
return;
|
58
|
+
} else {
|
59
|
+
usage("{$m[1]} is not a valid parameter to --{$flag}!");
|
60
|
+
exit(1);
|
61
|
+
}
|
62
|
+
}
|
63
|
+
}
|
64
|
+
}
|
65
|
+
|
66
|
+
class flag extends argument {
|
67
|
+
function flag($flag) {
|
68
|
+
global $argc, $argv;
|
69
|
+
|
70
|
+
$this->set = false;
|
71
|
+
|
72
|
+
$flag = "--{$flag}";
|
73
|
+
for ($i = 1; $i < $argc; $i++) {
|
74
|
+
if ($argv[$i] !== $flag)
|
75
|
+
continue;
|
76
|
+
$this->value = true;
|
77
|
+
$this->set = true;
|
78
|
+
return;
|
79
|
+
}
|
80
|
+
}
|
81
|
+
}
|
82
|
+
|
83
|
+
$bits = new multiargument("bits", array("32", "64"));
|
84
|
+
$function = new multiargument("function", array("curve25519", "ed25519"));
|
85
|
+
$compiler = new multiargument("compiler", array("gcc", "clang", "icc"));
|
86
|
+
$with_sse2 = new flag("with-sse2");
|
87
|
+
$with_openssl = new flag("with-openssl");
|
88
|
+
$no_asm = new flag("no-asm");
|
89
|
+
|
90
|
+
$err = "";
|
91
|
+
if (!$bits->set)
|
92
|
+
$err .= "--bits not set\n";
|
93
|
+
if (!$function->set)
|
94
|
+
$err .= "--function not set\n";
|
95
|
+
|
96
|
+
if ($err !== "") {
|
97
|
+
usage($err);
|
98
|
+
exit;
|
99
|
+
}
|
100
|
+
|
101
|
+
$compile = ($compiler->set) ? $compiler->value : "gcc";
|
102
|
+
$link = "";
|
103
|
+
$flags = "-O3 -m{$bits->value}";
|
104
|
+
$ret = 0;
|
105
|
+
|
106
|
+
if ($with_openssl->set) $link .= " -lssl -lcrypto";
|
107
|
+
if (!$with_openssl->set) $flags .= " -DED25519_REFHASH -DED25519_TEST";
|
108
|
+
if ($no_asm->set) $flags .= " -DED25519_NO_INLINE_ASM";
|
109
|
+
|
110
|
+
if ($function->value === "curve25519") {
|
111
|
+
runcmd("building ref10..", "{$compile} {$flags} curve25519-ref10.c -c -o curve25519-ref10.o");
|
112
|
+
runcmd("building ed25519..", "{$compile} {$flags} ed25519-donna.c -c -o ed25519.o");
|
113
|
+
if ($with_sse2->set) {
|
114
|
+
runcmd("building ed25519-sse2..", "{$compile} {$flags} ed25519-donna-sse2.c -c -o ed25519-sse2.o -msse2");
|
115
|
+
$flags .= " -DED25519_SSE2";
|
116
|
+
$link .= " ed25519-sse2.o";
|
117
|
+
}
|
118
|
+
runcmd("linking..", "{$compile} {$flags} {$link} fuzz-curve25519.c ed25519.o curve25519-ref10.o -o fuzz-curve25519");
|
119
|
+
echoln("fuzz-curve25519 built.");
|
120
|
+
} else if ($function->value === "ed25519") {
|
121
|
+
runcmd("building ref10..", "{$compile} {$flags} ed25519-ref10.c -c -o ed25519-ref10.o");
|
122
|
+
runcmd("building ed25519..", "{$compile} {$flags} ed25519-donna.c -c -o ed25519.o");
|
123
|
+
if ($with_sse2->set) {
|
124
|
+
runcmd("building ed25519-sse2..", "{$compile} {$flags} ed25519-donna-sse2.c -c -o ed25519-sse2.o -msse2");
|
125
|
+
$flags .= " -DED25519_SSE2";
|
126
|
+
$link .= " ed25519-sse2.o";
|
127
|
+
}
|
128
|
+
runcmd("linking..", "{$compile} {$flags} {$link} fuzz-ed25519.c ed25519.o ed25519-ref10.o -o fuzz-ed25519");
|
129
|
+
echoln("fuzz-ed25519 built.");
|
130
|
+
}
|
131
|
+
|
132
|
+
|
133
|
+
cleanup();
|
134
|
+
?>
|
@@ -0,0 +1,1272 @@
|
|
1
|
+
#include <stdint.h>
|
2
|
+
|
3
|
+
typedef int32_t crypto_int32;
|
4
|
+
typedef int64_t crypto_int64;
|
5
|
+
typedef uint64_t crypto_uint64;
|
6
|
+
|
7
|
+
typedef crypto_int32 fe[10];
|
8
|
+
|
9
|
+
/*
|
10
|
+
h = 0
|
11
|
+
*/
|
12
|
+
|
13
|
+
void fe_0(fe h)
|
14
|
+
{
|
15
|
+
h[0] = 0;
|
16
|
+
h[1] = 0;
|
17
|
+
h[2] = 0;
|
18
|
+
h[3] = 0;
|
19
|
+
h[4] = 0;
|
20
|
+
h[5] = 0;
|
21
|
+
h[6] = 0;
|
22
|
+
h[7] = 0;
|
23
|
+
h[8] = 0;
|
24
|
+
h[9] = 0;
|
25
|
+
}
|
26
|
+
|
27
|
+
/*
|
28
|
+
h = 1
|
29
|
+
*/
|
30
|
+
|
31
|
+
void fe_1(fe h)
|
32
|
+
{
|
33
|
+
h[0] = 1;
|
34
|
+
h[1] = 0;
|
35
|
+
h[2] = 0;
|
36
|
+
h[3] = 0;
|
37
|
+
h[4] = 0;
|
38
|
+
h[5] = 0;
|
39
|
+
h[6] = 0;
|
40
|
+
h[7] = 0;
|
41
|
+
h[8] = 0;
|
42
|
+
h[9] = 0;
|
43
|
+
}
|
44
|
+
|
45
|
+
/*
|
46
|
+
h = f + g
|
47
|
+
Can overlap h with f or g.
|
48
|
+
|
49
|
+
Preconditions:
|
50
|
+
|f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
|
51
|
+
|g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
|
52
|
+
|
53
|
+
Postconditions:
|
54
|
+
|h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
|
55
|
+
*/
|
56
|
+
|
57
|
+
void fe_add(fe h,fe f,fe g)
|
58
|
+
{
|
59
|
+
crypto_int32 f0 = f[0];
|
60
|
+
crypto_int32 f1 = f[1];
|
61
|
+
crypto_int32 f2 = f[2];
|
62
|
+
crypto_int32 f3 = f[3];
|
63
|
+
crypto_int32 f4 = f[4];
|
64
|
+
crypto_int32 f5 = f[5];
|
65
|
+
crypto_int32 f6 = f[6];
|
66
|
+
crypto_int32 f7 = f[7];
|
67
|
+
crypto_int32 f8 = f[8];
|
68
|
+
crypto_int32 f9 = f[9];
|
69
|
+
crypto_int32 g0 = g[0];
|
70
|
+
crypto_int32 g1 = g[1];
|
71
|
+
crypto_int32 g2 = g[2];
|
72
|
+
crypto_int32 g3 = g[3];
|
73
|
+
crypto_int32 g4 = g[4];
|
74
|
+
crypto_int32 g5 = g[5];
|
75
|
+
crypto_int32 g6 = g[6];
|
76
|
+
crypto_int32 g7 = g[7];
|
77
|
+
crypto_int32 g8 = g[8];
|
78
|
+
crypto_int32 g9 = g[9];
|
79
|
+
crypto_int32 h0 = f0 + g0;
|
80
|
+
crypto_int32 h1 = f1 + g1;
|
81
|
+
crypto_int32 h2 = f2 + g2;
|
82
|
+
crypto_int32 h3 = f3 + g3;
|
83
|
+
crypto_int32 h4 = f4 + g4;
|
84
|
+
crypto_int32 h5 = f5 + g5;
|
85
|
+
crypto_int32 h6 = f6 + g6;
|
86
|
+
crypto_int32 h7 = f7 + g7;
|
87
|
+
crypto_int32 h8 = f8 + g8;
|
88
|
+
crypto_int32 h9 = f9 + g9;
|
89
|
+
h[0] = h0;
|
90
|
+
h[1] = h1;
|
91
|
+
h[2] = h2;
|
92
|
+
h[3] = h3;
|
93
|
+
h[4] = h4;
|
94
|
+
h[5] = h5;
|
95
|
+
h[6] = h6;
|
96
|
+
h[7] = h7;
|
97
|
+
h[8] = h8;
|
98
|
+
h[9] = h9;
|
99
|
+
}
|
100
|
+
|
101
|
+
/*
|
102
|
+
h = f
|
103
|
+
*/
|
104
|
+
|
105
|
+
void fe_copy(fe h,fe f)
|
106
|
+
{
|
107
|
+
crypto_int32 f0 = f[0];
|
108
|
+
crypto_int32 f1 = f[1];
|
109
|
+
crypto_int32 f2 = f[2];
|
110
|
+
crypto_int32 f3 = f[3];
|
111
|
+
crypto_int32 f4 = f[4];
|
112
|
+
crypto_int32 f5 = f[5];
|
113
|
+
crypto_int32 f6 = f[6];
|
114
|
+
crypto_int32 f7 = f[7];
|
115
|
+
crypto_int32 f8 = f[8];
|
116
|
+
crypto_int32 f9 = f[9];
|
117
|
+
h[0] = f0;
|
118
|
+
h[1] = f1;
|
119
|
+
h[2] = f2;
|
120
|
+
h[3] = f3;
|
121
|
+
h[4] = f4;
|
122
|
+
h[5] = f5;
|
123
|
+
h[6] = f6;
|
124
|
+
h[7] = f7;
|
125
|
+
h[8] = f8;
|
126
|
+
h[9] = f9;
|
127
|
+
}
|
128
|
+
|
129
|
+
|
130
|
+
/*
|
131
|
+
Replace (f,g) with (g,f) if b == 1;
|
132
|
+
replace (f,g) with (f,g) if b == 0.
|
133
|
+
|
134
|
+
Preconditions: b in {0,1}.
|
135
|
+
*/
|
136
|
+
|
137
|
+
void fe_cswap(fe f,fe g,unsigned int b)
|
138
|
+
{
|
139
|
+
crypto_int32 f0 = f[0];
|
140
|
+
crypto_int32 f1 = f[1];
|
141
|
+
crypto_int32 f2 = f[2];
|
142
|
+
crypto_int32 f3 = f[3];
|
143
|
+
crypto_int32 f4 = f[4];
|
144
|
+
crypto_int32 f5 = f[5];
|
145
|
+
crypto_int32 f6 = f[6];
|
146
|
+
crypto_int32 f7 = f[7];
|
147
|
+
crypto_int32 f8 = f[8];
|
148
|
+
crypto_int32 f9 = f[9];
|
149
|
+
crypto_int32 g0 = g[0];
|
150
|
+
crypto_int32 g1 = g[1];
|
151
|
+
crypto_int32 g2 = g[2];
|
152
|
+
crypto_int32 g3 = g[3];
|
153
|
+
crypto_int32 g4 = g[4];
|
154
|
+
crypto_int32 g5 = g[5];
|
155
|
+
crypto_int32 g6 = g[6];
|
156
|
+
crypto_int32 g7 = g[7];
|
157
|
+
crypto_int32 g8 = g[8];
|
158
|
+
crypto_int32 g9 = g[9];
|
159
|
+
crypto_int32 x0 = f0 ^ g0;
|
160
|
+
crypto_int32 x1 = f1 ^ g1;
|
161
|
+
crypto_int32 x2 = f2 ^ g2;
|
162
|
+
crypto_int32 x3 = f3 ^ g3;
|
163
|
+
crypto_int32 x4 = f4 ^ g4;
|
164
|
+
crypto_int32 x5 = f5 ^ g5;
|
165
|
+
crypto_int32 x6 = f6 ^ g6;
|
166
|
+
crypto_int32 x7 = f7 ^ g7;
|
167
|
+
crypto_int32 x8 = f8 ^ g8;
|
168
|
+
crypto_int32 x9 = f9 ^ g9;
|
169
|
+
b = -b;
|
170
|
+
x0 &= b;
|
171
|
+
x1 &= b;
|
172
|
+
x2 &= b;
|
173
|
+
x3 &= b;
|
174
|
+
x4 &= b;
|
175
|
+
x5 &= b;
|
176
|
+
x6 &= b;
|
177
|
+
x7 &= b;
|
178
|
+
x8 &= b;
|
179
|
+
x9 &= b;
|
180
|
+
f[0] = f0 ^ x0;
|
181
|
+
f[1] = f1 ^ x1;
|
182
|
+
f[2] = f2 ^ x2;
|
183
|
+
f[3] = f3 ^ x3;
|
184
|
+
f[4] = f4 ^ x4;
|
185
|
+
f[5] = f5 ^ x5;
|
186
|
+
f[6] = f6 ^ x6;
|
187
|
+
f[7] = f7 ^ x7;
|
188
|
+
f[8] = f8 ^ x8;
|
189
|
+
f[9] = f9 ^ x9;
|
190
|
+
g[0] = g0 ^ x0;
|
191
|
+
g[1] = g1 ^ x1;
|
192
|
+
g[2] = g2 ^ x2;
|
193
|
+
g[3] = g3 ^ x3;
|
194
|
+
g[4] = g4 ^ x4;
|
195
|
+
g[5] = g5 ^ x5;
|
196
|
+
g[6] = g6 ^ x6;
|
197
|
+
g[7] = g7 ^ x7;
|
198
|
+
g[8] = g8 ^ x8;
|
199
|
+
g[9] = g9 ^ x9;
|
200
|
+
}
|
201
|
+
|
202
|
+
static crypto_uint64 load_3(const unsigned char *in)
|
203
|
+
{
|
204
|
+
crypto_uint64 result;
|
205
|
+
result = (crypto_uint64) in[0];
|
206
|
+
result |= ((crypto_uint64) in[1]) << 8;
|
207
|
+
result |= ((crypto_uint64) in[2]) << 16;
|
208
|
+
return result;
|
209
|
+
}
|
210
|
+
|
211
|
+
static crypto_uint64 load_4(const unsigned char *in)
|
212
|
+
{
|
213
|
+
crypto_uint64 result;
|
214
|
+
result = (crypto_uint64) in[0];
|
215
|
+
result |= ((crypto_uint64) in[1]) << 8;
|
216
|
+
result |= ((crypto_uint64) in[2]) << 16;
|
217
|
+
result |= ((crypto_uint64) in[3]) << 24;
|
218
|
+
return result;
|
219
|
+
}
|
220
|
+
|
221
|
+
void fe_frombytes(fe h,const unsigned char *s)
|
222
|
+
{
|
223
|
+
crypto_int64 h0 = load_4(s);
|
224
|
+
crypto_int64 h1 = load_3(s + 4) << 6;
|
225
|
+
crypto_int64 h2 = load_3(s + 7) << 5;
|
226
|
+
crypto_int64 h3 = load_3(s + 10) << 3;
|
227
|
+
crypto_int64 h4 = load_3(s + 13) << 2;
|
228
|
+
crypto_int64 h5 = load_4(s + 16);
|
229
|
+
crypto_int64 h6 = load_3(s + 20) << 7;
|
230
|
+
crypto_int64 h7 = load_3(s + 23) << 5;
|
231
|
+
crypto_int64 h8 = load_3(s + 26) << 4;
|
232
|
+
crypto_int64 h9 = load_3(s + 29) << 2;
|
233
|
+
crypto_int64 carry0;
|
234
|
+
crypto_int64 carry1;
|
235
|
+
crypto_int64 carry2;
|
236
|
+
crypto_int64 carry3;
|
237
|
+
crypto_int64 carry4;
|
238
|
+
crypto_int64 carry5;
|
239
|
+
crypto_int64 carry6;
|
240
|
+
crypto_int64 carry7;
|
241
|
+
crypto_int64 carry8;
|
242
|
+
crypto_int64 carry9;
|
243
|
+
|
244
|
+
carry9 = (h9 + (crypto_int64) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
|
245
|
+
carry1 = (h1 + (crypto_int64) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
|
246
|
+
carry3 = (h3 + (crypto_int64) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
|
247
|
+
carry5 = (h5 + (crypto_int64) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
|
248
|
+
carry7 = (h7 + (crypto_int64) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
|
249
|
+
|
250
|
+
carry0 = (h0 + (crypto_int64) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
|
251
|
+
carry2 = (h2 + (crypto_int64) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
|
252
|
+
carry4 = (h4 + (crypto_int64) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
|
253
|
+
carry6 = (h6 + (crypto_int64) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
|
254
|
+
carry8 = (h8 + (crypto_int64) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
|
255
|
+
|
256
|
+
h[0] = h0;
|
257
|
+
h[1] = h1;
|
258
|
+
h[2] = h2;
|
259
|
+
h[3] = h3;
|
260
|
+
h[4] = h4;
|
261
|
+
h[5] = h5;
|
262
|
+
h[6] = h6;
|
263
|
+
h[7] = h7;
|
264
|
+
h[8] = h8;
|
265
|
+
h[9] = h9;
|
266
|
+
}
|
267
|
+
|
268
|
+
|
269
|
+
/*
|
270
|
+
h = f * g
|
271
|
+
Can overlap h with f or g.
|
272
|
+
|
273
|
+
Preconditions:
|
274
|
+
|f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
|
275
|
+
|g| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
|
276
|
+
|
277
|
+
Postconditions:
|
278
|
+
|h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
|
279
|
+
*/
|
280
|
+
|
281
|
+
/*
|
282
|
+
Notes on implementation strategy:
|
283
|
+
|
284
|
+
Using schoolbook multiplication.
|
285
|
+
Karatsuba would save a little in some cost models.
|
286
|
+
|
287
|
+
Most multiplications by 2 and 19 are 32-bit precomputations;
|
288
|
+
cheaper than 64-bit postcomputations.
|
289
|
+
|
290
|
+
There is one remaining multiplication by 19 in the carry chain;
|
291
|
+
one *19 precomputation can be merged into this,
|
292
|
+
but the resulting data flow is considerably less clean.
|
293
|
+
|
294
|
+
There are 12 carries below.
|
295
|
+
10 of them are 2-way parallelizable and vectorizable.
|
296
|
+
Can get away with 11 carries, but then data flow is much deeper.
|
297
|
+
|
298
|
+
With tighter constraints on inputs can squeeze carries into int32.
|
299
|
+
*/
|
300
|
+
|
301
|
+
void fe_mul(fe h,fe f,fe g)
|
302
|
+
{
|
303
|
+
crypto_int32 f0 = f[0];
|
304
|
+
crypto_int32 f1 = f[1];
|
305
|
+
crypto_int32 f2 = f[2];
|
306
|
+
crypto_int32 f3 = f[3];
|
307
|
+
crypto_int32 f4 = f[4];
|
308
|
+
crypto_int32 f5 = f[5];
|
309
|
+
crypto_int32 f6 = f[6];
|
310
|
+
crypto_int32 f7 = f[7];
|
311
|
+
crypto_int32 f8 = f[8];
|
312
|
+
crypto_int32 f9 = f[9];
|
313
|
+
crypto_int32 g0 = g[0];
|
314
|
+
crypto_int32 g1 = g[1];
|
315
|
+
crypto_int32 g2 = g[2];
|
316
|
+
crypto_int32 g3 = g[3];
|
317
|
+
crypto_int32 g4 = g[4];
|
318
|
+
crypto_int32 g5 = g[5];
|
319
|
+
crypto_int32 g6 = g[6];
|
320
|
+
crypto_int32 g7 = g[7];
|
321
|
+
crypto_int32 g8 = g[8];
|
322
|
+
crypto_int32 g9 = g[9];
|
323
|
+
crypto_int32 g1_19 = 19 * g1; /* 1.4*2^29 */
|
324
|
+
crypto_int32 g2_19 = 19 * g2; /* 1.4*2^30; still ok */
|
325
|
+
crypto_int32 g3_19 = 19 * g3;
|
326
|
+
crypto_int32 g4_19 = 19 * g4;
|
327
|
+
crypto_int32 g5_19 = 19 * g5;
|
328
|
+
crypto_int32 g6_19 = 19 * g6;
|
329
|
+
crypto_int32 g7_19 = 19 * g7;
|
330
|
+
crypto_int32 g8_19 = 19 * g8;
|
331
|
+
crypto_int32 g9_19 = 19 * g9;
|
332
|
+
crypto_int32 f1_2 = 2 * f1;
|
333
|
+
crypto_int32 f3_2 = 2 * f3;
|
334
|
+
crypto_int32 f5_2 = 2 * f5;
|
335
|
+
crypto_int32 f7_2 = 2 * f7;
|
336
|
+
crypto_int32 f9_2 = 2 * f9;
|
337
|
+
crypto_int64 f0g0 = f0 * (crypto_int64) g0;
|
338
|
+
crypto_int64 f0g1 = f0 * (crypto_int64) g1;
|
339
|
+
crypto_int64 f0g2 = f0 * (crypto_int64) g2;
|
340
|
+
crypto_int64 f0g3 = f0 * (crypto_int64) g3;
|
341
|
+
crypto_int64 f0g4 = f0 * (crypto_int64) g4;
|
342
|
+
crypto_int64 f0g5 = f0 * (crypto_int64) g5;
|
343
|
+
crypto_int64 f0g6 = f0 * (crypto_int64) g6;
|
344
|
+
crypto_int64 f0g7 = f0 * (crypto_int64) g7;
|
345
|
+
crypto_int64 f0g8 = f0 * (crypto_int64) g8;
|
346
|
+
crypto_int64 f0g9 = f0 * (crypto_int64) g9;
|
347
|
+
crypto_int64 f1g0 = f1 * (crypto_int64) g0;
|
348
|
+
crypto_int64 f1g1_2 = f1_2 * (crypto_int64) g1;
|
349
|
+
crypto_int64 f1g2 = f1 * (crypto_int64) g2;
|
350
|
+
crypto_int64 f1g3_2 = f1_2 * (crypto_int64) g3;
|
351
|
+
crypto_int64 f1g4 = f1 * (crypto_int64) g4;
|
352
|
+
crypto_int64 f1g5_2 = f1_2 * (crypto_int64) g5;
|
353
|
+
crypto_int64 f1g6 = f1 * (crypto_int64) g6;
|
354
|
+
crypto_int64 f1g7_2 = f1_2 * (crypto_int64) g7;
|
355
|
+
crypto_int64 f1g8 = f1 * (crypto_int64) g8;
|
356
|
+
crypto_int64 f1g9_38 = f1_2 * (crypto_int64) g9_19;
|
357
|
+
crypto_int64 f2g0 = f2 * (crypto_int64) g0;
|
358
|
+
crypto_int64 f2g1 = f2 * (crypto_int64) g1;
|
359
|
+
crypto_int64 f2g2 = f2 * (crypto_int64) g2;
|
360
|
+
crypto_int64 f2g3 = f2 * (crypto_int64) g3;
|
361
|
+
crypto_int64 f2g4 = f2 * (crypto_int64) g4;
|
362
|
+
crypto_int64 f2g5 = f2 * (crypto_int64) g5;
|
363
|
+
crypto_int64 f2g6 = f2 * (crypto_int64) g6;
|
364
|
+
crypto_int64 f2g7 = f2 * (crypto_int64) g7;
|
365
|
+
crypto_int64 f2g8_19 = f2 * (crypto_int64) g8_19;
|
366
|
+
crypto_int64 f2g9_19 = f2 * (crypto_int64) g9_19;
|
367
|
+
crypto_int64 f3g0 = f3 * (crypto_int64) g0;
|
368
|
+
crypto_int64 f3g1_2 = f3_2 * (crypto_int64) g1;
|
369
|
+
crypto_int64 f3g2 = f3 * (crypto_int64) g2;
|
370
|
+
crypto_int64 f3g3_2 = f3_2 * (crypto_int64) g3;
|
371
|
+
crypto_int64 f3g4 = f3 * (crypto_int64) g4;
|
372
|
+
crypto_int64 f3g5_2 = f3_2 * (crypto_int64) g5;
|
373
|
+
crypto_int64 f3g6 = f3 * (crypto_int64) g6;
|
374
|
+
crypto_int64 f3g7_38 = f3_2 * (crypto_int64) g7_19;
|
375
|
+
crypto_int64 f3g8_19 = f3 * (crypto_int64) g8_19;
|
376
|
+
crypto_int64 f3g9_38 = f3_2 * (crypto_int64) g9_19;
|
377
|
+
crypto_int64 f4g0 = f4 * (crypto_int64) g0;
|
378
|
+
crypto_int64 f4g1 = f4 * (crypto_int64) g1;
|
379
|
+
crypto_int64 f4g2 = f4 * (crypto_int64) g2;
|
380
|
+
crypto_int64 f4g3 = f4 * (crypto_int64) g3;
|
381
|
+
crypto_int64 f4g4 = f4 * (crypto_int64) g4;
|
382
|
+
crypto_int64 f4g5 = f4 * (crypto_int64) g5;
|
383
|
+
crypto_int64 f4g6_19 = f4 * (crypto_int64) g6_19;
|
384
|
+
crypto_int64 f4g7_19 = f4 * (crypto_int64) g7_19;
|
385
|
+
crypto_int64 f4g8_19 = f4 * (crypto_int64) g8_19;
|
386
|
+
crypto_int64 f4g9_19 = f4 * (crypto_int64) g9_19;
|
387
|
+
crypto_int64 f5g0 = f5 * (crypto_int64) g0;
|
388
|
+
crypto_int64 f5g1_2 = f5_2 * (crypto_int64) g1;
|
389
|
+
crypto_int64 f5g2 = f5 * (crypto_int64) g2;
|
390
|
+
crypto_int64 f5g3_2 = f5_2 * (crypto_int64) g3;
|
391
|
+
crypto_int64 f5g4 = f5 * (crypto_int64) g4;
|
392
|
+
crypto_int64 f5g5_38 = f5_2 * (crypto_int64) g5_19;
|
393
|
+
crypto_int64 f5g6_19 = f5 * (crypto_int64) g6_19;
|
394
|
+
crypto_int64 f5g7_38 = f5_2 * (crypto_int64) g7_19;
|
395
|
+
crypto_int64 f5g8_19 = f5 * (crypto_int64) g8_19;
|
396
|
+
crypto_int64 f5g9_38 = f5_2 * (crypto_int64) g9_19;
|
397
|
+
crypto_int64 f6g0 = f6 * (crypto_int64) g0;
|
398
|
+
crypto_int64 f6g1 = f6 * (crypto_int64) g1;
|
399
|
+
crypto_int64 f6g2 = f6 * (crypto_int64) g2;
|
400
|
+
crypto_int64 f6g3 = f6 * (crypto_int64) g3;
|
401
|
+
crypto_int64 f6g4_19 = f6 * (crypto_int64) g4_19;
|
402
|
+
crypto_int64 f6g5_19 = f6 * (crypto_int64) g5_19;
|
403
|
+
crypto_int64 f6g6_19 = f6 * (crypto_int64) g6_19;
|
404
|
+
crypto_int64 f6g7_19 = f6 * (crypto_int64) g7_19;
|
405
|
+
crypto_int64 f6g8_19 = f6 * (crypto_int64) g8_19;
|
406
|
+
crypto_int64 f6g9_19 = f6 * (crypto_int64) g9_19;
|
407
|
+
crypto_int64 f7g0 = f7 * (crypto_int64) g0;
|
408
|
+
crypto_int64 f7g1_2 = f7_2 * (crypto_int64) g1;
|
409
|
+
crypto_int64 f7g2 = f7 * (crypto_int64) g2;
|
410
|
+
crypto_int64 f7g3_38 = f7_2 * (crypto_int64) g3_19;
|
411
|
+
crypto_int64 f7g4_19 = f7 * (crypto_int64) g4_19;
|
412
|
+
crypto_int64 f7g5_38 = f7_2 * (crypto_int64) g5_19;
|
413
|
+
crypto_int64 f7g6_19 = f7 * (crypto_int64) g6_19;
|
414
|
+
crypto_int64 f7g7_38 = f7_2 * (crypto_int64) g7_19;
|
415
|
+
crypto_int64 f7g8_19 = f7 * (crypto_int64) g8_19;
|
416
|
+
crypto_int64 f7g9_38 = f7_2 * (crypto_int64) g9_19;
|
417
|
+
crypto_int64 f8g0 = f8 * (crypto_int64) g0;
|
418
|
+
crypto_int64 f8g1 = f8 * (crypto_int64) g1;
|
419
|
+
crypto_int64 f8g2_19 = f8 * (crypto_int64) g2_19;
|
420
|
+
crypto_int64 f8g3_19 = f8 * (crypto_int64) g3_19;
|
421
|
+
crypto_int64 f8g4_19 = f8 * (crypto_int64) g4_19;
|
422
|
+
crypto_int64 f8g5_19 = f8 * (crypto_int64) g5_19;
|
423
|
+
crypto_int64 f8g6_19 = f8 * (crypto_int64) g6_19;
|
424
|
+
crypto_int64 f8g7_19 = f8 * (crypto_int64) g7_19;
|
425
|
+
crypto_int64 f8g8_19 = f8 * (crypto_int64) g8_19;
|
426
|
+
crypto_int64 f8g9_19 = f8 * (crypto_int64) g9_19;
|
427
|
+
crypto_int64 f9g0 = f9 * (crypto_int64) g0;
|
428
|
+
crypto_int64 f9g1_38 = f9_2 * (crypto_int64) g1_19;
|
429
|
+
crypto_int64 f9g2_19 = f9 * (crypto_int64) g2_19;
|
430
|
+
crypto_int64 f9g3_38 = f9_2 * (crypto_int64) g3_19;
|
431
|
+
crypto_int64 f9g4_19 = f9 * (crypto_int64) g4_19;
|
432
|
+
crypto_int64 f9g5_38 = f9_2 * (crypto_int64) g5_19;
|
433
|
+
crypto_int64 f9g6_19 = f9 * (crypto_int64) g6_19;
|
434
|
+
crypto_int64 f9g7_38 = f9_2 * (crypto_int64) g7_19;
|
435
|
+
crypto_int64 f9g8_19 = f9 * (crypto_int64) g8_19;
|
436
|
+
crypto_int64 f9g9_38 = f9_2 * (crypto_int64) g9_19;
|
437
|
+
crypto_int64 h0 = f0g0+f1g9_38+f2g8_19+f3g7_38+f4g6_19+f5g5_38+f6g4_19+f7g3_38+f8g2_19+f9g1_38;
|
438
|
+
crypto_int64 h1 = f0g1+f1g0 +f2g9_19+f3g8_19+f4g7_19+f5g6_19+f6g5_19+f7g4_19+f8g3_19+f9g2_19;
|
439
|
+
crypto_int64 h2 = f0g2+f1g1_2 +f2g0 +f3g9_38+f4g8_19+f5g7_38+f6g6_19+f7g5_38+f8g4_19+f9g3_38;
|
440
|
+
crypto_int64 h3 = f0g3+f1g2 +f2g1 +f3g0 +f4g9_19+f5g8_19+f6g7_19+f7g6_19+f8g5_19+f9g4_19;
|
441
|
+
crypto_int64 h4 = f0g4+f1g3_2 +f2g2 +f3g1_2 +f4g0 +f5g9_38+f6g8_19+f7g7_38+f8g6_19+f9g5_38;
|
442
|
+
crypto_int64 h5 = f0g5+f1g4 +f2g3 +f3g2 +f4g1 +f5g0 +f6g9_19+f7g8_19+f8g7_19+f9g6_19;
|
443
|
+
crypto_int64 h6 = f0g6+f1g5_2 +f2g4 +f3g3_2 +f4g2 +f5g1_2 +f6g0 +f7g9_38+f8g8_19+f9g7_38;
|
444
|
+
crypto_int64 h7 = f0g7+f1g6 +f2g5 +f3g4 +f4g3 +f5g2 +f6g1 +f7g0 +f8g9_19+f9g8_19;
|
445
|
+
crypto_int64 h8 = f0g8+f1g7_2 +f2g6 +f3g5_2 +f4g4 +f5g3_2 +f6g2 +f7g1_2 +f8g0 +f9g9_38;
|
446
|
+
crypto_int64 h9 = f0g9+f1g8 +f2g7 +f3g6 +f4g5 +f5g4 +f6g3 +f7g2 +f8g1 +f9g0 ;
|
447
|
+
crypto_int64 carry0;
|
448
|
+
crypto_int64 carry1;
|
449
|
+
crypto_int64 carry2;
|
450
|
+
crypto_int64 carry3;
|
451
|
+
crypto_int64 carry4;
|
452
|
+
crypto_int64 carry5;
|
453
|
+
crypto_int64 carry6;
|
454
|
+
crypto_int64 carry7;
|
455
|
+
crypto_int64 carry8;
|
456
|
+
crypto_int64 carry9;
|
457
|
+
|
458
|
+
/*
|
459
|
+
|h0| <= (1.1*1.1*2^52*(1+19+19+19+19)+1.1*1.1*2^50*(38+38+38+38+38))
|
460
|
+
i.e. |h0| <= 1.2*2^59; narrower ranges for h2, h4, h6, h8
|
461
|
+
|h1| <= (1.1*1.1*2^51*(1+1+19+19+19+19+19+19+19+19))
|
462
|
+
i.e. |h1| <= 1.5*2^58; narrower ranges for h3, h5, h7, h9
|
463
|
+
*/
|
464
|
+
|
465
|
+
carry0 = (h0 + (crypto_int64) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
|
466
|
+
carry4 = (h4 + (crypto_int64) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
|
467
|
+
/* |h0| <= 2^25 */
|
468
|
+
/* |h4| <= 2^25 */
|
469
|
+
/* |h1| <= 1.51*2^58 */
|
470
|
+
/* |h5| <= 1.51*2^58 */
|
471
|
+
|
472
|
+
carry1 = (h1 + (crypto_int64) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
|
473
|
+
carry5 = (h5 + (crypto_int64) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
|
474
|
+
/* |h1| <= 2^24; from now on fits into int32 */
|
475
|
+
/* |h5| <= 2^24; from now on fits into int32 */
|
476
|
+
/* |h2| <= 1.21*2^59 */
|
477
|
+
/* |h6| <= 1.21*2^59 */
|
478
|
+
|
479
|
+
carry2 = (h2 + (crypto_int64) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
|
480
|
+
carry6 = (h6 + (crypto_int64) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
|
481
|
+
/* |h2| <= 2^25; from now on fits into int32 unchanged */
|
482
|
+
/* |h6| <= 2^25; from now on fits into int32 unchanged */
|
483
|
+
/* |h3| <= 1.51*2^58 */
|
484
|
+
/* |h7| <= 1.51*2^58 */
|
485
|
+
|
486
|
+
carry3 = (h3 + (crypto_int64) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
|
487
|
+
carry7 = (h7 + (crypto_int64) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
|
488
|
+
/* |h3| <= 2^24; from now on fits into int32 unchanged */
|
489
|
+
/* |h7| <= 2^24; from now on fits into int32 unchanged */
|
490
|
+
/* |h4| <= 1.52*2^33 */
|
491
|
+
/* |h8| <= 1.52*2^33 */
|
492
|
+
|
493
|
+
carry4 = (h4 + (crypto_int64) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
|
494
|
+
carry8 = (h8 + (crypto_int64) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
|
495
|
+
/* |h4| <= 2^25; from now on fits into int32 unchanged */
|
496
|
+
/* |h8| <= 2^25; from now on fits into int32 unchanged */
|
497
|
+
/* |h5| <= 1.01*2^24 */
|
498
|
+
/* |h9| <= 1.51*2^58 */
|
499
|
+
|
500
|
+
carry9 = (h9 + (crypto_int64) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
|
501
|
+
/* |h9| <= 2^24; from now on fits into int32 unchanged */
|
502
|
+
/* |h0| <= 1.8*2^37 */
|
503
|
+
|
504
|
+
carry0 = (h0 + (crypto_int64) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
|
505
|
+
/* |h0| <= 2^25; from now on fits into int32 unchanged */
|
506
|
+
/* |h1| <= 1.01*2^24 */
|
507
|
+
|
508
|
+
h[0] = h0;
|
509
|
+
h[1] = h1;
|
510
|
+
h[2] = h2;
|
511
|
+
h[3] = h3;
|
512
|
+
h[4] = h4;
|
513
|
+
h[5] = h5;
|
514
|
+
h[6] = h6;
|
515
|
+
h[7] = h7;
|
516
|
+
h[8] = h8;
|
517
|
+
h[9] = h9;
|
518
|
+
}
|
519
|
+
|
520
|
+
/*
|
521
|
+
h = f * 121666
|
522
|
+
Can overlap h with f.
|
523
|
+
|
524
|
+
Preconditions:
|
525
|
+
|f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
|
526
|
+
|
527
|
+
Postconditions:
|
528
|
+
|h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
|
529
|
+
*/
|
530
|
+
|
531
|
+
void fe_mul121666(fe h,fe f)
|
532
|
+
{
|
533
|
+
crypto_int32 f0 = f[0];
|
534
|
+
crypto_int32 f1 = f[1];
|
535
|
+
crypto_int32 f2 = f[2];
|
536
|
+
crypto_int32 f3 = f[3];
|
537
|
+
crypto_int32 f4 = f[4];
|
538
|
+
crypto_int32 f5 = f[5];
|
539
|
+
crypto_int32 f6 = f[6];
|
540
|
+
crypto_int32 f7 = f[7];
|
541
|
+
crypto_int32 f8 = f[8];
|
542
|
+
crypto_int32 f9 = f[9];
|
543
|
+
crypto_int64 h0 = f0 * (crypto_int64) 121666;
|
544
|
+
crypto_int64 h1 = f1 * (crypto_int64) 121666;
|
545
|
+
crypto_int64 h2 = f2 * (crypto_int64) 121666;
|
546
|
+
crypto_int64 h3 = f3 * (crypto_int64) 121666;
|
547
|
+
crypto_int64 h4 = f4 * (crypto_int64) 121666;
|
548
|
+
crypto_int64 h5 = f5 * (crypto_int64) 121666;
|
549
|
+
crypto_int64 h6 = f6 * (crypto_int64) 121666;
|
550
|
+
crypto_int64 h7 = f7 * (crypto_int64) 121666;
|
551
|
+
crypto_int64 h8 = f8 * (crypto_int64) 121666;
|
552
|
+
crypto_int64 h9 = f9 * (crypto_int64) 121666;
|
553
|
+
crypto_int64 carry0;
|
554
|
+
crypto_int64 carry1;
|
555
|
+
crypto_int64 carry2;
|
556
|
+
crypto_int64 carry3;
|
557
|
+
crypto_int64 carry4;
|
558
|
+
crypto_int64 carry5;
|
559
|
+
crypto_int64 carry6;
|
560
|
+
crypto_int64 carry7;
|
561
|
+
crypto_int64 carry8;
|
562
|
+
crypto_int64 carry9;
|
563
|
+
|
564
|
+
carry9 = (h9 + (crypto_int64) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
|
565
|
+
carry1 = (h1 + (crypto_int64) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
|
566
|
+
carry3 = (h3 + (crypto_int64) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
|
567
|
+
carry5 = (h5 + (crypto_int64) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
|
568
|
+
carry7 = (h7 + (crypto_int64) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
|
569
|
+
|
570
|
+
carry0 = (h0 + (crypto_int64) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
|
571
|
+
carry2 = (h2 + (crypto_int64) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
|
572
|
+
carry4 = (h4 + (crypto_int64) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
|
573
|
+
carry6 = (h6 + (crypto_int64) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
|
574
|
+
carry8 = (h8 + (crypto_int64) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
|
575
|
+
|
576
|
+
h[0] = h0;
|
577
|
+
h[1] = h1;
|
578
|
+
h[2] = h2;
|
579
|
+
h[3] = h3;
|
580
|
+
h[4] = h4;
|
581
|
+
h[5] = h5;
|
582
|
+
h[6] = h6;
|
583
|
+
h[7] = h7;
|
584
|
+
h[8] = h8;
|
585
|
+
h[9] = h9;
|
586
|
+
}
|
587
|
+
|
588
|
+
/*
|
589
|
+
h = f * f
|
590
|
+
Can overlap h with f.
|
591
|
+
|
592
|
+
Preconditions:
|
593
|
+
|f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
|
594
|
+
|
595
|
+
Postconditions:
|
596
|
+
|h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
|
597
|
+
*/
|
598
|
+
|
599
|
+
/*
|
600
|
+
See fe_mul.c for discussion of implementation strategy.
|
601
|
+
*/
|
602
|
+
|
603
|
+
void fe_sq(fe h,fe f)
|
604
|
+
{
|
605
|
+
crypto_int32 f0 = f[0];
|
606
|
+
crypto_int32 f1 = f[1];
|
607
|
+
crypto_int32 f2 = f[2];
|
608
|
+
crypto_int32 f3 = f[3];
|
609
|
+
crypto_int32 f4 = f[4];
|
610
|
+
crypto_int32 f5 = f[5];
|
611
|
+
crypto_int32 f6 = f[6];
|
612
|
+
crypto_int32 f7 = f[7];
|
613
|
+
crypto_int32 f8 = f[8];
|
614
|
+
crypto_int32 f9 = f[9];
|
615
|
+
crypto_int32 f0_2 = 2 * f0;
|
616
|
+
crypto_int32 f1_2 = 2 * f1;
|
617
|
+
crypto_int32 f2_2 = 2 * f2;
|
618
|
+
crypto_int32 f3_2 = 2 * f3;
|
619
|
+
crypto_int32 f4_2 = 2 * f4;
|
620
|
+
crypto_int32 f5_2 = 2 * f5;
|
621
|
+
crypto_int32 f6_2 = 2 * f6;
|
622
|
+
crypto_int32 f7_2 = 2 * f7;
|
623
|
+
crypto_int32 f5_38 = 38 * f5; /* 1.31*2^30 */
|
624
|
+
crypto_int32 f6_19 = 19 * f6; /* 1.31*2^30 */
|
625
|
+
crypto_int32 f7_38 = 38 * f7; /* 1.31*2^30 */
|
626
|
+
crypto_int32 f8_19 = 19 * f8; /* 1.31*2^30 */
|
627
|
+
crypto_int32 f9_38 = 38 * f9; /* 1.31*2^30 */
|
628
|
+
crypto_int64 f0f0 = f0 * (crypto_int64) f0;
|
629
|
+
crypto_int64 f0f1_2 = f0_2 * (crypto_int64) f1;
|
630
|
+
crypto_int64 f0f2_2 = f0_2 * (crypto_int64) f2;
|
631
|
+
crypto_int64 f0f3_2 = f0_2 * (crypto_int64) f3;
|
632
|
+
crypto_int64 f0f4_2 = f0_2 * (crypto_int64) f4;
|
633
|
+
crypto_int64 f0f5_2 = f0_2 * (crypto_int64) f5;
|
634
|
+
crypto_int64 f0f6_2 = f0_2 * (crypto_int64) f6;
|
635
|
+
crypto_int64 f0f7_2 = f0_2 * (crypto_int64) f7;
|
636
|
+
crypto_int64 f0f8_2 = f0_2 * (crypto_int64) f8;
|
637
|
+
crypto_int64 f0f9_2 = f0_2 * (crypto_int64) f9;
|
638
|
+
crypto_int64 f1f1_2 = f1_2 * (crypto_int64) f1;
|
639
|
+
crypto_int64 f1f2_2 = f1_2 * (crypto_int64) f2;
|
640
|
+
crypto_int64 f1f3_4 = f1_2 * (crypto_int64) f3_2;
|
641
|
+
crypto_int64 f1f4_2 = f1_2 * (crypto_int64) f4;
|
642
|
+
crypto_int64 f1f5_4 = f1_2 * (crypto_int64) f5_2;
|
643
|
+
crypto_int64 f1f6_2 = f1_2 * (crypto_int64) f6;
|
644
|
+
crypto_int64 f1f7_4 = f1_2 * (crypto_int64) f7_2;
|
645
|
+
crypto_int64 f1f8_2 = f1_2 * (crypto_int64) f8;
|
646
|
+
crypto_int64 f1f9_76 = f1_2 * (crypto_int64) f9_38;
|
647
|
+
crypto_int64 f2f2 = f2 * (crypto_int64) f2;
|
648
|
+
crypto_int64 f2f3_2 = f2_2 * (crypto_int64) f3;
|
649
|
+
crypto_int64 f2f4_2 = f2_2 * (crypto_int64) f4;
|
650
|
+
crypto_int64 f2f5_2 = f2_2 * (crypto_int64) f5;
|
651
|
+
crypto_int64 f2f6_2 = f2_2 * (crypto_int64) f6;
|
652
|
+
crypto_int64 f2f7_2 = f2_2 * (crypto_int64) f7;
|
653
|
+
crypto_int64 f2f8_38 = f2_2 * (crypto_int64) f8_19;
|
654
|
+
crypto_int64 f2f9_38 = f2 * (crypto_int64) f9_38;
|
655
|
+
crypto_int64 f3f3_2 = f3_2 * (crypto_int64) f3;
|
656
|
+
crypto_int64 f3f4_2 = f3_2 * (crypto_int64) f4;
|
657
|
+
crypto_int64 f3f5_4 = f3_2 * (crypto_int64) f5_2;
|
658
|
+
crypto_int64 f3f6_2 = f3_2 * (crypto_int64) f6;
|
659
|
+
crypto_int64 f3f7_76 = f3_2 * (crypto_int64) f7_38;
|
660
|
+
crypto_int64 f3f8_38 = f3_2 * (crypto_int64) f8_19;
|
661
|
+
crypto_int64 f3f9_76 = f3_2 * (crypto_int64) f9_38;
|
662
|
+
crypto_int64 f4f4 = f4 * (crypto_int64) f4;
|
663
|
+
crypto_int64 f4f5_2 = f4_2 * (crypto_int64) f5;
|
664
|
+
crypto_int64 f4f6_38 = f4_2 * (crypto_int64) f6_19;
|
665
|
+
crypto_int64 f4f7_38 = f4 * (crypto_int64) f7_38;
|
666
|
+
crypto_int64 f4f8_38 = f4_2 * (crypto_int64) f8_19;
|
667
|
+
crypto_int64 f4f9_38 = f4 * (crypto_int64) f9_38;
|
668
|
+
crypto_int64 f5f5_38 = f5 * (crypto_int64) f5_38;
|
669
|
+
crypto_int64 f5f6_38 = f5_2 * (crypto_int64) f6_19;
|
670
|
+
crypto_int64 f5f7_76 = f5_2 * (crypto_int64) f7_38;
|
671
|
+
crypto_int64 f5f8_38 = f5_2 * (crypto_int64) f8_19;
|
672
|
+
crypto_int64 f5f9_76 = f5_2 * (crypto_int64) f9_38;
|
673
|
+
crypto_int64 f6f6_19 = f6 * (crypto_int64) f6_19;
|
674
|
+
crypto_int64 f6f7_38 = f6 * (crypto_int64) f7_38;
|
675
|
+
crypto_int64 f6f8_38 = f6_2 * (crypto_int64) f8_19;
|
676
|
+
crypto_int64 f6f9_38 = f6 * (crypto_int64) f9_38;
|
677
|
+
crypto_int64 f7f7_38 = f7 * (crypto_int64) f7_38;
|
678
|
+
crypto_int64 f7f8_38 = f7_2 * (crypto_int64) f8_19;
|
679
|
+
crypto_int64 f7f9_76 = f7_2 * (crypto_int64) f9_38;
|
680
|
+
crypto_int64 f8f8_19 = f8 * (crypto_int64) f8_19;
|
681
|
+
crypto_int64 f8f9_38 = f8 * (crypto_int64) f9_38;
|
682
|
+
crypto_int64 f9f9_38 = f9 * (crypto_int64) f9_38;
|
683
|
+
crypto_int64 h0 = f0f0 +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38;
|
684
|
+
crypto_int64 h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38;
|
685
|
+
crypto_int64 h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19;
|
686
|
+
crypto_int64 h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38;
|
687
|
+
crypto_int64 h4 = f0f4_2+f1f3_4 +f2f2 +f5f9_76+f6f8_38+f7f7_38;
|
688
|
+
crypto_int64 h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38;
|
689
|
+
crypto_int64 h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19;
|
690
|
+
crypto_int64 h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38;
|
691
|
+
crypto_int64 h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4 +f9f9_38;
|
692
|
+
crypto_int64 h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2;
|
693
|
+
crypto_int64 carry0;
|
694
|
+
crypto_int64 carry1;
|
695
|
+
crypto_int64 carry2;
|
696
|
+
crypto_int64 carry3;
|
697
|
+
crypto_int64 carry4;
|
698
|
+
crypto_int64 carry5;
|
699
|
+
crypto_int64 carry6;
|
700
|
+
crypto_int64 carry7;
|
701
|
+
crypto_int64 carry8;
|
702
|
+
crypto_int64 carry9;
|
703
|
+
|
704
|
+
carry0 = (h0 + (crypto_int64) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
|
705
|
+
carry4 = (h4 + (crypto_int64) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
|
706
|
+
|
707
|
+
carry1 = (h1 + (crypto_int64) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
|
708
|
+
carry5 = (h5 + (crypto_int64) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
|
709
|
+
|
710
|
+
carry2 = (h2 + (crypto_int64) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
|
711
|
+
carry6 = (h6 + (crypto_int64) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
|
712
|
+
|
713
|
+
carry3 = (h3 + (crypto_int64) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
|
714
|
+
carry7 = (h7 + (crypto_int64) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
|
715
|
+
|
716
|
+
carry4 = (h4 + (crypto_int64) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
|
717
|
+
carry8 = (h8 + (crypto_int64) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
|
718
|
+
|
719
|
+
carry9 = (h9 + (crypto_int64) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
|
720
|
+
|
721
|
+
carry0 = (h0 + (crypto_int64) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
|
722
|
+
|
723
|
+
h[0] = h0;
|
724
|
+
h[1] = h1;
|
725
|
+
h[2] = h2;
|
726
|
+
h[3] = h3;
|
727
|
+
h[4] = h4;
|
728
|
+
h[5] = h5;
|
729
|
+
h[6] = h6;
|
730
|
+
h[7] = h7;
|
731
|
+
h[8] = h8;
|
732
|
+
h[9] = h9;
|
733
|
+
}
|
734
|
+
|
735
|
+
/*
|
736
|
+
h = f - g
|
737
|
+
Can overlap h with f or g.
|
738
|
+
|
739
|
+
Preconditions:
|
740
|
+
|f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
|
741
|
+
|g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
|
742
|
+
|
743
|
+
Postconditions:
|
744
|
+
|h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
|
745
|
+
*/
|
746
|
+
|
747
|
+
void fe_sub(fe h,fe f,fe g)
|
748
|
+
{
|
749
|
+
crypto_int32 f0 = f[0];
|
750
|
+
crypto_int32 f1 = f[1];
|
751
|
+
crypto_int32 f2 = f[2];
|
752
|
+
crypto_int32 f3 = f[3];
|
753
|
+
crypto_int32 f4 = f[4];
|
754
|
+
crypto_int32 f5 = f[5];
|
755
|
+
crypto_int32 f6 = f[6];
|
756
|
+
crypto_int32 f7 = f[7];
|
757
|
+
crypto_int32 f8 = f[8];
|
758
|
+
crypto_int32 f9 = f[9];
|
759
|
+
crypto_int32 g0 = g[0];
|
760
|
+
crypto_int32 g1 = g[1];
|
761
|
+
crypto_int32 g2 = g[2];
|
762
|
+
crypto_int32 g3 = g[3];
|
763
|
+
crypto_int32 g4 = g[4];
|
764
|
+
crypto_int32 g5 = g[5];
|
765
|
+
crypto_int32 g6 = g[6];
|
766
|
+
crypto_int32 g7 = g[7];
|
767
|
+
crypto_int32 g8 = g[8];
|
768
|
+
crypto_int32 g9 = g[9];
|
769
|
+
crypto_int32 h0 = f0 - g0;
|
770
|
+
crypto_int32 h1 = f1 - g1;
|
771
|
+
crypto_int32 h2 = f2 - g2;
|
772
|
+
crypto_int32 h3 = f3 - g3;
|
773
|
+
crypto_int32 h4 = f4 - g4;
|
774
|
+
crypto_int32 h5 = f5 - g5;
|
775
|
+
crypto_int32 h6 = f6 - g6;
|
776
|
+
crypto_int32 h7 = f7 - g7;
|
777
|
+
crypto_int32 h8 = f8 - g8;
|
778
|
+
crypto_int32 h9 = f9 - g9;
|
779
|
+
h[0] = h0;
|
780
|
+
h[1] = h1;
|
781
|
+
h[2] = h2;
|
782
|
+
h[3] = h3;
|
783
|
+
h[4] = h4;
|
784
|
+
h[5] = h5;
|
785
|
+
h[6] = h6;
|
786
|
+
h[7] = h7;
|
787
|
+
h[8] = h8;
|
788
|
+
h[9] = h9;
|
789
|
+
}
|
790
|
+
|
791
|
+
/*
|
792
|
+
Preconditions:
|
793
|
+
|h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
|
794
|
+
|
795
|
+
Write p=2^255-19; q=floor(h/p).
|
796
|
+
Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
|
797
|
+
|
798
|
+
Proof:
|
799
|
+
Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
|
800
|
+
Also have |h-2^230 h9|<2^230 so |19 2^(-255)(h-2^230 h9)|<1/4.
|
801
|
+
|
802
|
+
Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
|
803
|
+
Then 0<y<1.
|
804
|
+
|
805
|
+
Write r=h-pq.
|
806
|
+
Have 0<=r<=p-1=2^255-20.
|
807
|
+
Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
|
808
|
+
|
809
|
+
Write x=r+19(2^-255)r+y.
|
810
|
+
Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
|
811
|
+
|
812
|
+
Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
|
813
|
+
so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
|
814
|
+
*/
|
815
|
+
|
816
|
+
void fe_tobytes(unsigned char *s,fe h)
|
817
|
+
{
|
818
|
+
crypto_int32 h0 = h[0];
|
819
|
+
crypto_int32 h1 = h[1];
|
820
|
+
crypto_int32 h2 = h[2];
|
821
|
+
crypto_int32 h3 = h[3];
|
822
|
+
crypto_int32 h4 = h[4];
|
823
|
+
crypto_int32 h5 = h[5];
|
824
|
+
crypto_int32 h6 = h[6];
|
825
|
+
crypto_int32 h7 = h[7];
|
826
|
+
crypto_int32 h8 = h[8];
|
827
|
+
crypto_int32 h9 = h[9];
|
828
|
+
crypto_int32 q;
|
829
|
+
crypto_int32 carry0;
|
830
|
+
crypto_int32 carry1;
|
831
|
+
crypto_int32 carry2;
|
832
|
+
crypto_int32 carry3;
|
833
|
+
crypto_int32 carry4;
|
834
|
+
crypto_int32 carry5;
|
835
|
+
crypto_int32 carry6;
|
836
|
+
crypto_int32 carry7;
|
837
|
+
crypto_int32 carry8;
|
838
|
+
crypto_int32 carry9;
|
839
|
+
|
840
|
+
q = (19 * h9 + (((crypto_int32) 1) << 24)) >> 25;
|
841
|
+
q = (h0 + q) >> 26;
|
842
|
+
q = (h1 + q) >> 25;
|
843
|
+
q = (h2 + q) >> 26;
|
844
|
+
q = (h3 + q) >> 25;
|
845
|
+
q = (h4 + q) >> 26;
|
846
|
+
q = (h5 + q) >> 25;
|
847
|
+
q = (h6 + q) >> 26;
|
848
|
+
q = (h7 + q) >> 25;
|
849
|
+
q = (h8 + q) >> 26;
|
850
|
+
q = (h9 + q) >> 25;
|
851
|
+
|
852
|
+
/* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */
|
853
|
+
h0 += 19 * q;
|
854
|
+
/* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */
|
855
|
+
|
856
|
+
carry0 = h0 >> 26; h1 += carry0; h0 -= carry0 << 26;
|
857
|
+
carry1 = h1 >> 25; h2 += carry1; h1 -= carry1 << 25;
|
858
|
+
carry2 = h2 >> 26; h3 += carry2; h2 -= carry2 << 26;
|
859
|
+
carry3 = h3 >> 25; h4 += carry3; h3 -= carry3 << 25;
|
860
|
+
carry4 = h4 >> 26; h5 += carry4; h4 -= carry4 << 26;
|
861
|
+
carry5 = h5 >> 25; h6 += carry5; h5 -= carry5 << 25;
|
862
|
+
carry6 = h6 >> 26; h7 += carry6; h6 -= carry6 << 26;
|
863
|
+
carry7 = h7 >> 25; h8 += carry7; h7 -= carry7 << 25;
|
864
|
+
carry8 = h8 >> 26; h9 += carry8; h8 -= carry8 << 26;
|
865
|
+
carry9 = h9 >> 25; h9 -= carry9 << 25;
|
866
|
+
/* h10 = carry9 */
|
867
|
+
|
868
|
+
/*
|
869
|
+
Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
|
870
|
+
Have h0+...+2^230 h9 between 0 and 2^255-1;
|
871
|
+
evidently 2^255 h10-2^255 q = 0.
|
872
|
+
Goal: Output h0+...+2^230 h9.
|
873
|
+
*/
|
874
|
+
|
875
|
+
s[0] = h0 >> 0;
|
876
|
+
s[1] = h0 >> 8;
|
877
|
+
s[2] = h0 >> 16;
|
878
|
+
s[3] = (h0 >> 24) | (h1 << 2);
|
879
|
+
s[4] = h1 >> 6;
|
880
|
+
s[5] = h1 >> 14;
|
881
|
+
s[6] = (h1 >> 22) | (h2 << 3);
|
882
|
+
s[7] = h2 >> 5;
|
883
|
+
s[8] = h2 >> 13;
|
884
|
+
s[9] = (h2 >> 21) | (h3 << 5);
|
885
|
+
s[10] = h3 >> 3;
|
886
|
+
s[11] = h3 >> 11;
|
887
|
+
s[12] = (h3 >> 19) | (h4 << 6);
|
888
|
+
s[13] = h4 >> 2;
|
889
|
+
s[14] = h4 >> 10;
|
890
|
+
s[15] = h4 >> 18;
|
891
|
+
s[16] = h5 >> 0;
|
892
|
+
s[17] = h5 >> 8;
|
893
|
+
s[18] = h5 >> 16;
|
894
|
+
s[19] = (h5 >> 24) | (h6 << 1);
|
895
|
+
s[20] = h6 >> 7;
|
896
|
+
s[21] = h6 >> 15;
|
897
|
+
s[22] = (h6 >> 23) | (h7 << 3);
|
898
|
+
s[23] = h7 >> 5;
|
899
|
+
s[24] = h7 >> 13;
|
900
|
+
s[25] = (h7 >> 21) | (h8 << 4);
|
901
|
+
s[26] = h8 >> 4;
|
902
|
+
s[27] = h8 >> 12;
|
903
|
+
s[28] = (h8 >> 20) | (h9 << 6);
|
904
|
+
s[29] = h9 >> 2;
|
905
|
+
s[30] = h9 >> 10;
|
906
|
+
s[31] = h9 >> 18;
|
907
|
+
}
|
908
|
+
|
909
|
+
void fe_invert(fe out,fe z)
|
910
|
+
{
|
911
|
+
fe t0;
|
912
|
+
fe t1;
|
913
|
+
fe t2;
|
914
|
+
fe t3;
|
915
|
+
int i;
|
916
|
+
|
917
|
+
|
918
|
+
/* qhasm: fe z1 */
|
919
|
+
|
920
|
+
/* qhasm: fe z2 */
|
921
|
+
|
922
|
+
/* qhasm: fe z8 */
|
923
|
+
|
924
|
+
/* qhasm: fe z9 */
|
925
|
+
|
926
|
+
/* qhasm: fe z11 */
|
927
|
+
|
928
|
+
/* qhasm: fe z22 */
|
929
|
+
|
930
|
+
/* qhasm: fe z_5_0 */
|
931
|
+
|
932
|
+
/* qhasm: fe z_10_5 */
|
933
|
+
|
934
|
+
/* qhasm: fe z_10_0 */
|
935
|
+
|
936
|
+
/* qhasm: fe z_20_10 */
|
937
|
+
|
938
|
+
/* qhasm: fe z_20_0 */
|
939
|
+
|
940
|
+
/* qhasm: fe z_40_20 */
|
941
|
+
|
942
|
+
/* qhasm: fe z_40_0 */
|
943
|
+
|
944
|
+
/* qhasm: fe z_50_10 */
|
945
|
+
|
946
|
+
/* qhasm: fe z_50_0 */
|
947
|
+
|
948
|
+
/* qhasm: fe z_100_50 */
|
949
|
+
|
950
|
+
/* qhasm: fe z_100_0 */
|
951
|
+
|
952
|
+
/* qhasm: fe z_200_100 */
|
953
|
+
|
954
|
+
/* qhasm: fe z_200_0 */
|
955
|
+
|
956
|
+
/* qhasm: fe z_250_50 */
|
957
|
+
|
958
|
+
/* qhasm: fe z_250_0 */
|
959
|
+
|
960
|
+
/* qhasm: fe z_255_5 */
|
961
|
+
|
962
|
+
/* qhasm: fe z_255_21 */
|
963
|
+
|
964
|
+
/* qhasm: enter pow225521 */
|
965
|
+
|
966
|
+
/* qhasm: z2 = z1^2^1 */
|
967
|
+
/* asm 1: fe_sq(>z2=fe#1,<z1=fe#11); for (i = 1;i < 1;++i) fe_sq(>z2=fe#1,>z2=fe#1); */
|
968
|
+
/* asm 2: fe_sq(>z2=t0,<z1=z); for (i = 1;i < 1;++i) fe_sq(>z2=t0,>z2=t0); */
|
969
|
+
fe_sq(t0,z); for (i = 1;i < 1;++i) fe_sq(t0,t0);
|
970
|
+
|
971
|
+
/* qhasm: z8 = z2^2^2 */
|
972
|
+
/* asm 1: fe_sq(>z8=fe#2,<z2=fe#1); for (i = 1;i < 2;++i) fe_sq(>z8=fe#2,>z8=fe#2); */
|
973
|
+
/* asm 2: fe_sq(>z8=t1,<z2=t0); for (i = 1;i < 2;++i) fe_sq(>z8=t1,>z8=t1); */
|
974
|
+
fe_sq(t1,t0); for (i = 1;i < 2;++i) fe_sq(t1,t1);
|
975
|
+
|
976
|
+
/* qhasm: z9 = z1*z8 */
|
977
|
+
/* asm 1: fe_mul(>z9=fe#2,<z1=fe#11,<z8=fe#2); */
|
978
|
+
/* asm 2: fe_mul(>z9=t1,<z1=z,<z8=t1); */
|
979
|
+
fe_mul(t1,z,t1);
|
980
|
+
|
981
|
+
/* qhasm: z11 = z2*z9 */
|
982
|
+
/* asm 1: fe_mul(>z11=fe#1,<z2=fe#1,<z9=fe#2); */
|
983
|
+
/* asm 2: fe_mul(>z11=t0,<z2=t0,<z9=t1); */
|
984
|
+
fe_mul(t0,t0,t1);
|
985
|
+
|
986
|
+
/* qhasm: z22 = z11^2^1 */
|
987
|
+
/* asm 1: fe_sq(>z22=fe#3,<z11=fe#1); for (i = 1;i < 1;++i) fe_sq(>z22=fe#3,>z22=fe#3); */
|
988
|
+
/* asm 2: fe_sq(>z22=t2,<z11=t0); for (i = 1;i < 1;++i) fe_sq(>z22=t2,>z22=t2); */
|
989
|
+
fe_sq(t2,t0); for (i = 1;i < 1;++i) fe_sq(t2,t2);
|
990
|
+
|
991
|
+
/* qhasm: z_5_0 = z9*z22 */
|
992
|
+
/* asm 1: fe_mul(>z_5_0=fe#2,<z9=fe#2,<z22=fe#3); */
|
993
|
+
/* asm 2: fe_mul(>z_5_0=t1,<z9=t1,<z22=t2); */
|
994
|
+
fe_mul(t1,t1,t2);
|
995
|
+
|
996
|
+
/* qhasm: z_10_5 = z_5_0^2^5 */
|
997
|
+
/* asm 1: fe_sq(>z_10_5=fe#3,<z_5_0=fe#2); for (i = 1;i < 5;++i) fe_sq(>z_10_5=fe#3,>z_10_5=fe#3); */
|
998
|
+
/* asm 2: fe_sq(>z_10_5=t2,<z_5_0=t1); for (i = 1;i < 5;++i) fe_sq(>z_10_5=t2,>z_10_5=t2); */
|
999
|
+
fe_sq(t2,t1); for (i = 1;i < 5;++i) fe_sq(t2,t2);
|
1000
|
+
|
1001
|
+
/* qhasm: z_10_0 = z_10_5*z_5_0 */
|
1002
|
+
/* asm 1: fe_mul(>z_10_0=fe#2,<z_10_5=fe#3,<z_5_0=fe#2); */
|
1003
|
+
/* asm 2: fe_mul(>z_10_0=t1,<z_10_5=t2,<z_5_0=t1); */
|
1004
|
+
fe_mul(t1,t2,t1);
|
1005
|
+
|
1006
|
+
/* qhasm: z_20_10 = z_10_0^2^10 */
|
1007
|
+
/* asm 1: fe_sq(>z_20_10=fe#3,<z_10_0=fe#2); for (i = 1;i < 10;++i) fe_sq(>z_20_10=fe#3,>z_20_10=fe#3); */
|
1008
|
+
/* asm 2: fe_sq(>z_20_10=t2,<z_10_0=t1); for (i = 1;i < 10;++i) fe_sq(>z_20_10=t2,>z_20_10=t2); */
|
1009
|
+
fe_sq(t2,t1); for (i = 1;i < 10;++i) fe_sq(t2,t2);
|
1010
|
+
|
1011
|
+
/* qhasm: z_20_0 = z_20_10*z_10_0 */
|
1012
|
+
/* asm 1: fe_mul(>z_20_0=fe#3,<z_20_10=fe#3,<z_10_0=fe#2); */
|
1013
|
+
/* asm 2: fe_mul(>z_20_0=t2,<z_20_10=t2,<z_10_0=t1); */
|
1014
|
+
fe_mul(t2,t2,t1);
|
1015
|
+
|
1016
|
+
/* qhasm: z_40_20 = z_20_0^2^20 */
|
1017
|
+
/* asm 1: fe_sq(>z_40_20=fe#4,<z_20_0=fe#3); for (i = 1;i < 20;++i) fe_sq(>z_40_20=fe#4,>z_40_20=fe#4); */
|
1018
|
+
/* asm 2: fe_sq(>z_40_20=t3,<z_20_0=t2); for (i = 1;i < 20;++i) fe_sq(>z_40_20=t3,>z_40_20=t3); */
|
1019
|
+
fe_sq(t3,t2); for (i = 1;i < 20;++i) fe_sq(t3,t3);
|
1020
|
+
|
1021
|
+
/* qhasm: z_40_0 = z_40_20*z_20_0 */
|
1022
|
+
/* asm 1: fe_mul(>z_40_0=fe#3,<z_40_20=fe#4,<z_20_0=fe#3); */
|
1023
|
+
/* asm 2: fe_mul(>z_40_0=t2,<z_40_20=t3,<z_20_0=t2); */
|
1024
|
+
fe_mul(t2,t3,t2);
|
1025
|
+
|
1026
|
+
/* qhasm: z_50_10 = z_40_0^2^10 */
|
1027
|
+
/* asm 1: fe_sq(>z_50_10=fe#3,<z_40_0=fe#3); for (i = 1;i < 10;++i) fe_sq(>z_50_10=fe#3,>z_50_10=fe#3); */
|
1028
|
+
/* asm 2: fe_sq(>z_50_10=t2,<z_40_0=t2); for (i = 1;i < 10;++i) fe_sq(>z_50_10=t2,>z_50_10=t2); */
|
1029
|
+
fe_sq(t2,t2); for (i = 1;i < 10;++i) fe_sq(t2,t2);
|
1030
|
+
|
1031
|
+
/* qhasm: z_50_0 = z_50_10*z_10_0 */
|
1032
|
+
/* asm 1: fe_mul(>z_50_0=fe#2,<z_50_10=fe#3,<z_10_0=fe#2); */
|
1033
|
+
/* asm 2: fe_mul(>z_50_0=t1,<z_50_10=t2,<z_10_0=t1); */
|
1034
|
+
fe_mul(t1,t2,t1);
|
1035
|
+
|
1036
|
+
/* qhasm: z_100_50 = z_50_0^2^50 */
|
1037
|
+
/* asm 1: fe_sq(>z_100_50=fe#3,<z_50_0=fe#2); for (i = 1;i < 50;++i) fe_sq(>z_100_50=fe#3,>z_100_50=fe#3); */
|
1038
|
+
/* asm 2: fe_sq(>z_100_50=t2,<z_50_0=t1); for (i = 1;i < 50;++i) fe_sq(>z_100_50=t2,>z_100_50=t2); */
|
1039
|
+
fe_sq(t2,t1); for (i = 1;i < 50;++i) fe_sq(t2,t2);
|
1040
|
+
|
1041
|
+
/* qhasm: z_100_0 = z_100_50*z_50_0 */
|
1042
|
+
/* asm 1: fe_mul(>z_100_0=fe#3,<z_100_50=fe#3,<z_50_0=fe#2); */
|
1043
|
+
/* asm 2: fe_mul(>z_100_0=t2,<z_100_50=t2,<z_50_0=t1); */
|
1044
|
+
fe_mul(t2,t2,t1);
|
1045
|
+
|
1046
|
+
/* qhasm: z_200_100 = z_100_0^2^100 */
|
1047
|
+
/* asm 1: fe_sq(>z_200_100=fe#4,<z_100_0=fe#3); for (i = 1;i < 100;++i) fe_sq(>z_200_100=fe#4,>z_200_100=fe#4); */
|
1048
|
+
/* asm 2: fe_sq(>z_200_100=t3,<z_100_0=t2); for (i = 1;i < 100;++i) fe_sq(>z_200_100=t3,>z_200_100=t3); */
|
1049
|
+
fe_sq(t3,t2); for (i = 1;i < 100;++i) fe_sq(t3,t3);
|
1050
|
+
|
1051
|
+
/* qhasm: z_200_0 = z_200_100*z_100_0 */
|
1052
|
+
/* asm 1: fe_mul(>z_200_0=fe#3,<z_200_100=fe#4,<z_100_0=fe#3); */
|
1053
|
+
/* asm 2: fe_mul(>z_200_0=t2,<z_200_100=t3,<z_100_0=t2); */
|
1054
|
+
fe_mul(t2,t3,t2);
|
1055
|
+
|
1056
|
+
/* qhasm: z_250_50 = z_200_0^2^50 */
|
1057
|
+
/* asm 1: fe_sq(>z_250_50=fe#3,<z_200_0=fe#3); for (i = 1;i < 50;++i) fe_sq(>z_250_50=fe#3,>z_250_50=fe#3); */
|
1058
|
+
/* asm 2: fe_sq(>z_250_50=t2,<z_200_0=t2); for (i = 1;i < 50;++i) fe_sq(>z_250_50=t2,>z_250_50=t2); */
|
1059
|
+
fe_sq(t2,t2); for (i = 1;i < 50;++i) fe_sq(t2,t2);
|
1060
|
+
|
1061
|
+
/* qhasm: z_250_0 = z_250_50*z_50_0 */
|
1062
|
+
/* asm 1: fe_mul(>z_250_0=fe#2,<z_250_50=fe#3,<z_50_0=fe#2); */
|
1063
|
+
/* asm 2: fe_mul(>z_250_0=t1,<z_250_50=t2,<z_50_0=t1); */
|
1064
|
+
fe_mul(t1,t2,t1);
|
1065
|
+
|
1066
|
+
/* qhasm: z_255_5 = z_250_0^2^5 */
|
1067
|
+
/* asm 1: fe_sq(>z_255_5=fe#2,<z_250_0=fe#2); for (i = 1;i < 5;++i) fe_sq(>z_255_5=fe#2,>z_255_5=fe#2); */
|
1068
|
+
/* asm 2: fe_sq(>z_255_5=t1,<z_250_0=t1); for (i = 1;i < 5;++i) fe_sq(>z_255_5=t1,>z_255_5=t1); */
|
1069
|
+
fe_sq(t1,t1); for (i = 1;i < 5;++i) fe_sq(t1,t1);
|
1070
|
+
|
1071
|
+
/* qhasm: z_255_21 = z_255_5*z11 */
|
1072
|
+
/* asm 1: fe_mul(>z_255_21=fe#12,<z_255_5=fe#2,<z11=fe#1); */
|
1073
|
+
/* asm 2: fe_mul(>z_255_21=out,<z_255_5=t1,<z11=t0); */
|
1074
|
+
fe_mul(out,t1,t0);
|
1075
|
+
|
1076
|
+
/* qhasm: return */
|
1077
|
+
|
1078
|
+
return;
|
1079
|
+
}
|
1080
|
+
|
1081
|
+
|
1082
|
+
int crypto_scalarmult_ref10(unsigned char *q,
|
1083
|
+
const unsigned char *n,
|
1084
|
+
const unsigned char *p)
|
1085
|
+
{
|
1086
|
+
unsigned char e[32];
|
1087
|
+
unsigned int i;
|
1088
|
+
fe x1;
|
1089
|
+
fe x2;
|
1090
|
+
fe z2;
|
1091
|
+
fe x3;
|
1092
|
+
fe z3;
|
1093
|
+
fe tmp0;
|
1094
|
+
fe tmp1;
|
1095
|
+
int pos;
|
1096
|
+
unsigned int swap;
|
1097
|
+
unsigned int b;
|
1098
|
+
|
1099
|
+
for (i = 0;i < 32;++i) e[i] = n[i];
|
1100
|
+
e[0] &= 248;
|
1101
|
+
e[31] &= 127;
|
1102
|
+
e[31] |= 64;
|
1103
|
+
fe_frombytes(x1,p);
|
1104
|
+
fe_1(x2);
|
1105
|
+
fe_0(z2);
|
1106
|
+
fe_copy(x3,x1);
|
1107
|
+
fe_1(z3);
|
1108
|
+
|
1109
|
+
swap = 0;
|
1110
|
+
for (pos = 254;pos >= 0;--pos) {
|
1111
|
+
b = e[pos / 8] >> (pos & 7);
|
1112
|
+
b &= 1;
|
1113
|
+
swap ^= b;
|
1114
|
+
fe_cswap(x2,x3,swap);
|
1115
|
+
fe_cswap(z2,z3,swap);
|
1116
|
+
swap = b;
|
1117
|
+
/* qhasm: fe X2 */
|
1118
|
+
|
1119
|
+
/* qhasm: fe Z2 */
|
1120
|
+
|
1121
|
+
/* qhasm: fe X3 */
|
1122
|
+
|
1123
|
+
/* qhasm: fe Z3 */
|
1124
|
+
|
1125
|
+
/* qhasm: fe X4 */
|
1126
|
+
|
1127
|
+
/* qhasm: fe Z4 */
|
1128
|
+
|
1129
|
+
/* qhasm: fe X5 */
|
1130
|
+
|
1131
|
+
/* qhasm: fe Z5 */
|
1132
|
+
|
1133
|
+
/* qhasm: fe A */
|
1134
|
+
|
1135
|
+
/* qhasm: fe B */
|
1136
|
+
|
1137
|
+
/* qhasm: fe C */
|
1138
|
+
|
1139
|
+
/* qhasm: fe D */
|
1140
|
+
|
1141
|
+
/* qhasm: fe E */
|
1142
|
+
|
1143
|
+
/* qhasm: fe AA */
|
1144
|
+
|
1145
|
+
/* qhasm: fe BB */
|
1146
|
+
|
1147
|
+
/* qhasm: fe DA */
|
1148
|
+
|
1149
|
+
/* qhasm: fe CB */
|
1150
|
+
|
1151
|
+
/* qhasm: fe t0 */
|
1152
|
+
|
1153
|
+
/* qhasm: fe t1 */
|
1154
|
+
|
1155
|
+
/* qhasm: fe t2 */
|
1156
|
+
|
1157
|
+
/* qhasm: fe t3 */
|
1158
|
+
|
1159
|
+
/* qhasm: fe t4 */
|
1160
|
+
|
1161
|
+
/* qhasm: enter ladder */
|
1162
|
+
|
1163
|
+
/* qhasm: D = X3-Z3 */
|
1164
|
+
/* asm 1: fe_sub(>D=fe#5,<X3=fe#3,<Z3=fe#4); */
|
1165
|
+
/* asm 2: fe_sub(>D=tmp0,<X3=x3,<Z3=z3); */
|
1166
|
+
fe_sub(tmp0,x3,z3);
|
1167
|
+
|
1168
|
+
/* qhasm: B = X2-Z2 */
|
1169
|
+
/* asm 1: fe_sub(>B=fe#6,<X2=fe#1,<Z2=fe#2); */
|
1170
|
+
/* asm 2: fe_sub(>B=tmp1,<X2=x2,<Z2=z2); */
|
1171
|
+
fe_sub(tmp1,x2,z2);
|
1172
|
+
|
1173
|
+
/* qhasm: A = X2+Z2 */
|
1174
|
+
/* asm 1: fe_add(>A=fe#1,<X2=fe#1,<Z2=fe#2); */
|
1175
|
+
/* asm 2: fe_add(>A=x2,<X2=x2,<Z2=z2); */
|
1176
|
+
fe_add(x2,x2,z2);
|
1177
|
+
|
1178
|
+
/* qhasm: C = X3+Z3 */
|
1179
|
+
/* asm 1: fe_add(>C=fe#2,<X3=fe#3,<Z3=fe#4); */
|
1180
|
+
/* asm 2: fe_add(>C=z2,<X3=x3,<Z3=z3); */
|
1181
|
+
fe_add(z2,x3,z3);
|
1182
|
+
|
1183
|
+
/* qhasm: DA = D*A */
|
1184
|
+
/* asm 1: fe_mul(>DA=fe#4,<D=fe#5,<A=fe#1); */
|
1185
|
+
/* asm 2: fe_mul(>DA=z3,<D=tmp0,<A=x2); */
|
1186
|
+
fe_mul(z3,tmp0,x2);
|
1187
|
+
|
1188
|
+
/* qhasm: CB = C*B */
|
1189
|
+
/* asm 1: fe_mul(>CB=fe#2,<C=fe#2,<B=fe#6); */
|
1190
|
+
/* asm 2: fe_mul(>CB=z2,<C=z2,<B=tmp1); */
|
1191
|
+
fe_mul(z2,z2,tmp1);
|
1192
|
+
|
1193
|
+
/* qhasm: BB = B^2 */
|
1194
|
+
/* asm 1: fe_sq(>BB=fe#5,<B=fe#6); */
|
1195
|
+
/* asm 2: fe_sq(>BB=tmp0,<B=tmp1); */
|
1196
|
+
fe_sq(tmp0,tmp1);
|
1197
|
+
|
1198
|
+
/* qhasm: AA = A^2 */
|
1199
|
+
/* asm 1: fe_sq(>AA=fe#6,<A=fe#1); */
|
1200
|
+
/* asm 2: fe_sq(>AA=tmp1,<A=x2); */
|
1201
|
+
fe_sq(tmp1,x2);
|
1202
|
+
|
1203
|
+
/* qhasm: t0 = DA+CB */
|
1204
|
+
/* asm 1: fe_add(>t0=fe#3,<DA=fe#4,<CB=fe#2); */
|
1205
|
+
/* asm 2: fe_add(>t0=x3,<DA=z3,<CB=z2); */
|
1206
|
+
fe_add(x3,z3,z2);
|
1207
|
+
|
1208
|
+
/* qhasm: assign x3 to t0 */
|
1209
|
+
|
1210
|
+
/* qhasm: t1 = DA-CB */
|
1211
|
+
/* asm 1: fe_sub(>t1=fe#2,<DA=fe#4,<CB=fe#2); */
|
1212
|
+
/* asm 2: fe_sub(>t1=z2,<DA=z3,<CB=z2); */
|
1213
|
+
fe_sub(z2,z3,z2);
|
1214
|
+
|
1215
|
+
/* qhasm: X4 = AA*BB */
|
1216
|
+
/* asm 1: fe_mul(>X4=fe#1,<AA=fe#6,<BB=fe#5); */
|
1217
|
+
/* asm 2: fe_mul(>X4=x2,<AA=tmp1,<BB=tmp0); */
|
1218
|
+
fe_mul(x2,tmp1,tmp0);
|
1219
|
+
|
1220
|
+
/* qhasm: E = AA-BB */
|
1221
|
+
/* asm 1: fe_sub(>E=fe#6,<AA=fe#6,<BB=fe#5); */
|
1222
|
+
/* asm 2: fe_sub(>E=tmp1,<AA=tmp1,<BB=tmp0); */
|
1223
|
+
fe_sub(tmp1,tmp1,tmp0);
|
1224
|
+
|
1225
|
+
/* qhasm: t2 = t1^2 */
|
1226
|
+
/* asm 1: fe_sq(>t2=fe#2,<t1=fe#2); */
|
1227
|
+
/* asm 2: fe_sq(>t2=z2,<t1=z2); */
|
1228
|
+
fe_sq(z2,z2);
|
1229
|
+
|
1230
|
+
/* qhasm: t3 = a24*E */
|
1231
|
+
/* asm 1: fe_mul121666(>t3=fe#4,<E=fe#6); */
|
1232
|
+
/* asm 2: fe_mul121666(>t3=z3,<E=tmp1); */
|
1233
|
+
fe_mul121666(z3,tmp1);
|
1234
|
+
|
1235
|
+
/* qhasm: X5 = t0^2 */
|
1236
|
+
/* asm 1: fe_sq(>X5=fe#3,<t0=fe#3); */
|
1237
|
+
/* asm 2: fe_sq(>X5=x3,<t0=x3); */
|
1238
|
+
fe_sq(x3,x3);
|
1239
|
+
|
1240
|
+
/* qhasm: t4 = BB+t3 */
|
1241
|
+
/* asm 1: fe_add(>t4=fe#5,<BB=fe#5,<t3=fe#4); */
|
1242
|
+
/* asm 2: fe_add(>t4=tmp0,<BB=tmp0,<t3=z3); */
|
1243
|
+
fe_add(tmp0,tmp0,z3);
|
1244
|
+
|
1245
|
+
/* qhasm: Z5 = X1*t2 */
|
1246
|
+
/* asm 1: fe_mul(>Z5=fe#4,x1,<t2=fe#2); */
|
1247
|
+
/* asm 2: fe_mul(>Z5=z3,x1,<t2=z2); */
|
1248
|
+
fe_mul(z3,x1,z2);
|
1249
|
+
|
1250
|
+
/* qhasm: Z4 = E*t4 */
|
1251
|
+
/* asm 1: fe_mul(>Z4=fe#2,<E=fe#6,<t4=fe#5); */
|
1252
|
+
/* asm 2: fe_mul(>Z4=z2,<E=tmp1,<t4=tmp0); */
|
1253
|
+
fe_mul(z2,tmp1,tmp0);
|
1254
|
+
|
1255
|
+
/* qhasm: return */
|
1256
|
+
}
|
1257
|
+
fe_cswap(x2,x3,swap);
|
1258
|
+
fe_cswap(z2,z3,swap);
|
1259
|
+
|
1260
|
+
fe_invert(z2,z2);
|
1261
|
+
fe_mul(x2,x2,z2);
|
1262
|
+
fe_tobytes(q,x2);
|
1263
|
+
return 0;
|
1264
|
+
}
|
1265
|
+
|
1266
|
+
static const unsigned char basepoint[32] = {9};
|
1267
|
+
|
1268
|
+
int crypto_scalarmult_base_ref10(unsigned char *q,const unsigned char *n)
|
1269
|
+
{
|
1270
|
+
return crypto_scalarmult_ref10(q,n,basepoint);
|
1271
|
+
}
|
1272
|
+
|