x25519-termux 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.rspec +5 -0
- data/.rubocop.yml +38 -0
- data/.travis.yml +21 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +13 -0
- data/LICENSE +32 -0
- data/README.md +301 -0
- data/Rakefile +25 -0
- data/appveyor.yml +20 -0
- data/ext/extconf_helpers.rb +20 -0
- data/ext/x25519_precomputed/cputest.c +74 -0
- data/ext/x25519_precomputed/extconf.rb +19 -0
- data/ext/x25519_precomputed/fp25519_x64.c +943 -0
- data/ext/x25519_precomputed/fp25519_x64.h +122 -0
- data/ext/x25519_precomputed/table_ladder_x25519.h +544 -0
- data/ext/x25519_precomputed/x25519_precomputed.c +99 -0
- data/ext/x25519_precomputed/x25519_precomputed.h +58 -0
- data/ext/x25519_precomputed/x25519_x64.c +251 -0
- data/ext/x25519_ref10/api.h +2 -0
- data/ext/x25519_ref10/base.c +9 -0
- data/ext/x25519_ref10/extconf.rb +13 -0
- data/ext/x25519_ref10/fe.c +912 -0
- data/ext/x25519_ref10/fe.h +44 -0
- data/ext/x25519_ref10/montgomery.h +140 -0
- data/ext/x25519_ref10/pow225521.h +160 -0
- data/ext/x25519_ref10/scalarmult.c +47 -0
- data/ext/x25519_ref10/x25519_ref10.c +82 -0
- data/ext/x25519_ref10/x25519_ref10.h +15 -0
- data/lib/x25519-termux.rb +99 -0
- data/lib/x25519-termux/montgomery_u.rb +34 -0
- data/lib/x25519-termux/scalar.rb +56 -0
- data/lib/x25519-termux/test_vectors.rb +38 -0
- data/lib/x25519-termux/version.rb +5 -0
- data/x25519-termux.gemspec +30 -0
- metadata +97 -0
@@ -0,0 +1,99 @@
|
|
1
|
+
/*
|
2
|
+
Ruby C extension providing bindings to the rfc7748_precomputed implementation of
|
3
|
+
the X25519 Diffie-Hellman algorithm
|
4
|
+
*/
|
5
|
+
|
6
|
+
#include "ruby.h"
|
7
|
+
#include "x25519_precomputed.h"
|
8
|
+
|
9
|
+
static VALUE mX25519 = Qnil;
|
10
|
+
static VALUE mX25519_Provider = Qnil;
|
11
|
+
static VALUE mX25519_Provider_Precomputed = Qnil;
|
12
|
+
|
13
|
+
static VALUE mX25519_Provider_Precomputed_scalarmult(VALUE self, VALUE scalar, VALUE montgomery_u);
|
14
|
+
static VALUE mX25519_Provider_Precomputed_scalarmult_base(VALUE self, VALUE scalar);
|
15
|
+
static VALUE mX25519_is_available(VALUE self);
|
16
|
+
static VALUE mX25519_disabled(VALUE self);
|
17
|
+
|
18
|
+
/* Initialize the x25519_precomputed C extension */
|
19
|
+
void Init_x25519_precomputed()
|
20
|
+
{
|
21
|
+
mX25519 = rb_define_module("X25519");
|
22
|
+
mX25519_Provider = rb_define_module_under(mX25519, "Provider");
|
23
|
+
mX25519_Provider_Precomputed = rb_define_module_under(mX25519_Provider, "Precomputed");
|
24
|
+
|
25
|
+
#ifdef DISABLE_PRECOMPUTED
|
26
|
+
rb_define_singleton_method(mX25519_Provider_Precomputed, "available?", mX25519_disabled, 0);
|
27
|
+
#else
|
28
|
+
rb_define_singleton_method(mX25519_Provider_Precomputed, "scalarmult", mX25519_Provider_Precomputed_scalarmult, 2);
|
29
|
+
rb_define_singleton_method(mX25519_Provider_Precomputed, "scalarmult_base", mX25519_Provider_Precomputed_scalarmult_base, 1);
|
30
|
+
rb_define_singleton_method(mX25519_Provider_Precomputed, "available?", mX25519_is_available, 0);
|
31
|
+
#endif
|
32
|
+
}
|
33
|
+
|
34
|
+
/* Variable-base scalar multiplication */
|
35
|
+
static VALUE mX25519_Provider_Precomputed_scalarmult(VALUE self, VALUE scalar, VALUE montgomery_u)
|
36
|
+
{
|
37
|
+
/* X25519_KEY ensures inputs are aligned at 32-bytes */
|
38
|
+
X25519_KEY raw_scalar, raw_montgomery_u, product;
|
39
|
+
|
40
|
+
StringValue(scalar);
|
41
|
+
if(RSTRING_LEN(scalar) != X25519_KEYSIZE_BYTES) {
|
42
|
+
rb_raise(
|
43
|
+
rb_eArgError,
|
44
|
+
"expected %d-byte scalar, got %ld",
|
45
|
+
X25519_KEYSIZE_BYTES,
|
46
|
+
RSTRING_LEN(scalar)
|
47
|
+
);
|
48
|
+
}
|
49
|
+
|
50
|
+
StringValue(montgomery_u);
|
51
|
+
if(RSTRING_LEN(montgomery_u) != X25519_KEYSIZE_BYTES) {
|
52
|
+
rb_raise(
|
53
|
+
rb_eArgError,
|
54
|
+
"expected %d-byte Montgomery-u coordinate, got %ld",
|
55
|
+
X25519_KEYSIZE_BYTES,
|
56
|
+
RSTRING_LEN(montgomery_u)
|
57
|
+
);
|
58
|
+
}
|
59
|
+
|
60
|
+
memcpy(raw_scalar, RSTRING_PTR(scalar), X25519_KEYSIZE_BYTES);
|
61
|
+
memcpy(raw_montgomery_u, RSTRING_PTR(montgomery_u), X25519_KEYSIZE_BYTES);
|
62
|
+
x25519_precomputed_scalarmult(product, raw_scalar, raw_montgomery_u);
|
63
|
+
|
64
|
+
return rb_str_new((const char *)product, X25519_KEYSIZE_BYTES);
|
65
|
+
}
|
66
|
+
|
67
|
+
/* Fixed-base scalar multiplication */
|
68
|
+
static VALUE mX25519_Provider_Precomputed_scalarmult_base(VALUE self, VALUE scalar)
|
69
|
+
{
|
70
|
+
/* X25519_KEY ensures inputs are aligned at 32-bytes */
|
71
|
+
X25519_KEY raw_scalar, product;
|
72
|
+
|
73
|
+
StringValue(scalar);
|
74
|
+
if(RSTRING_LEN(scalar) != X25519_KEYSIZE_BYTES) {
|
75
|
+
rb_raise(
|
76
|
+
rb_eArgError,
|
77
|
+
"expected %d-byte scalar, got %ld",
|
78
|
+
X25519_KEYSIZE_BYTES,
|
79
|
+
RSTRING_LEN(scalar)
|
80
|
+
);
|
81
|
+
}
|
82
|
+
|
83
|
+
memcpy(raw_scalar, RSTRING_PTR(scalar), X25519_KEYSIZE_BYTES);
|
84
|
+
x25519_precomputed_scalarmult_base(product, raw_scalar);
|
85
|
+
|
86
|
+
return rb_str_new((const char *)product, X25519_KEYSIZE_BYTES);
|
87
|
+
}
|
88
|
+
|
89
|
+
/* Is the x25519_precomputed backend supported on this CPU? */
|
90
|
+
static VALUE mX25519_is_available(VALUE self)
|
91
|
+
{
|
92
|
+
return check_4th_gen_intel_core_features() ? Qtrue : Qfalse;
|
93
|
+
}
|
94
|
+
|
95
|
+
/* Set availability to return false if extension is skipped */
|
96
|
+
static VALUE mX25519_disabled(VALUE self)
|
97
|
+
{
|
98
|
+
return Qfalse;
|
99
|
+
}
|
@@ -0,0 +1,58 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) 2017, Armando Faz <armfazh@ic.unicamp.br>. All rights reserved.
|
3
|
+
* Institute of Computing.
|
4
|
+
* University of Campinas, Brazil.
|
5
|
+
*
|
6
|
+
* Redistribution and use in source and binary forms, with or without
|
7
|
+
* modification, are permitted provided that the following conditions
|
8
|
+
* are met:
|
9
|
+
*
|
10
|
+
* * Redistributions of source code must retain the above copyright
|
11
|
+
* notice, this list of conditions and the following disclaimer.
|
12
|
+
* * Redistributions in binary form must reproduce the above
|
13
|
+
* copyright notice, this list of conditions and the following
|
14
|
+
* disclaimer in the documentation and/or other materials provided
|
15
|
+
* with the distribution.
|
16
|
+
* * Neither the name of University of Campinas nor the names of its
|
17
|
+
* contributors may be used to endorse or promote products derived
|
18
|
+
* from this software without specific prior written permission.
|
19
|
+
*
|
20
|
+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
21
|
+
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
22
|
+
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
23
|
+
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
24
|
+
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
25
|
+
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
26
|
+
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
27
|
+
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
28
|
+
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
29
|
+
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
30
|
+
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
31
|
+
* OF THE POSSIBILITY OF SUCH DAMAGE.
|
32
|
+
*/
|
33
|
+
|
34
|
+
#ifndef X25519_PRECOMPUTED_H
|
35
|
+
#define X25519_PRECOMPUTED_H
|
36
|
+
|
37
|
+
#include <stdint.h>
|
38
|
+
|
39
|
+
#ifndef ALIGN_BYTES
|
40
|
+
#define ALIGN_BYTES 32
|
41
|
+
#endif
|
42
|
+
|
43
|
+
#ifndef ALIGN
|
44
|
+
#ifdef __INTEL_COMPILER
|
45
|
+
#define ALIGN __declspec(align(ALIGN_BYTES))
|
46
|
+
#else
|
47
|
+
#define ALIGN __attribute__((aligned(ALIGN_BYTES)))
|
48
|
+
#endif
|
49
|
+
#endif
|
50
|
+
|
51
|
+
#define X25519_KEYSIZE_BYTES 32
|
52
|
+
typedef ALIGN uint8_t X25519_KEY[X25519_KEYSIZE_BYTES];
|
53
|
+
|
54
|
+
void x25519_precomputed_scalarmult(uint8_t *shared, uint8_t *private_key, uint8_t *session_key);
|
55
|
+
void x25519_precomputed_scalarmult_base(uint8_t *session_key, uint8_t *private_key);
|
56
|
+
int check_4th_gen_intel_core_features();
|
57
|
+
|
58
|
+
#endif /* X25519_PRECOMPUTED_H */
|
@@ -0,0 +1,251 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) 2017, Armando Faz <armfazh@ic.unicamp.br>. All rights reserved.
|
3
|
+
* Institute of Computing.
|
4
|
+
* University of Campinas, Brazil.
|
5
|
+
*
|
6
|
+
* Copyright (C) 2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
7
|
+
*
|
8
|
+
* Redistribution and use in source and binary forms, with or without
|
9
|
+
* modification, are permitted provided that the following conditions
|
10
|
+
* are met:
|
11
|
+
*
|
12
|
+
* * Redistributions of source code must retain the above copyright
|
13
|
+
* notice, this list of conditions and the following disclaimer.
|
14
|
+
* * Redistributions in binary form must reproduce the above
|
15
|
+
* copyright notice, this list of conditions and the following
|
16
|
+
* disclaimer in the documentation and/or other materials provided
|
17
|
+
* with the distribution.
|
18
|
+
* * Neither the name of University of Campinas nor the names of its
|
19
|
+
* contributors may be used to endorse or promote products derived
|
20
|
+
* from this software without specific prior written permission.
|
21
|
+
*
|
22
|
+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
23
|
+
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
24
|
+
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
25
|
+
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
26
|
+
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
27
|
+
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
28
|
+
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
29
|
+
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
30
|
+
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
31
|
+
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
32
|
+
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
33
|
+
* OF THE POSSIBILITY OF SUCH DAMAGE.
|
34
|
+
*/
|
35
|
+
|
36
|
+
#include <string.h>
|
37
|
+
#include "fp25519_x64.h"
|
38
|
+
#include "x25519_precomputed.h"
|
39
|
+
#include "table_ladder_x25519.h"
|
40
|
+
|
41
|
+
static inline void cswap_x64(uint64_t bit, uint64_t *const px,
|
42
|
+
uint64_t *const py) {
|
43
|
+
int i = 0;
|
44
|
+
uint64_t mask = (uint64_t)0 - bit;
|
45
|
+
for (i = 0; i < NUM_WORDS_ELTFP25519_X64; i++) {
|
46
|
+
uint64_t t = mask & (px[i] ^ py[i]);
|
47
|
+
px[i] = px[i] ^ t;
|
48
|
+
py[i] = py[i] ^ t;
|
49
|
+
}
|
50
|
+
}
|
51
|
+
|
52
|
+
|
53
|
+
/** Original rfc7748_precomputed name: 'x25519_shared_secret_x64' */
|
54
|
+
void x25519_precomputed_scalarmult(uint8_t *shared, uint8_t *private_key,
|
55
|
+
uint8_t *session_key) {
|
56
|
+
ALIGN uint64_t buffer[4 * NUM_WORDS_ELTFP25519_X64];
|
57
|
+
ALIGN uint64_t coordinates[4 * NUM_WORDS_ELTFP25519_X64];
|
58
|
+
ALIGN uint64_t workspace[6 * NUM_WORDS_ELTFP25519_X64];
|
59
|
+
ALIGN uint8_t session[X25519_KEYSIZE_BYTES];
|
60
|
+
ALIGN uint8_t private[X25519_KEYSIZE_BYTES];
|
61
|
+
|
62
|
+
int i = 0, j = 0;
|
63
|
+
uint64_t prev = 0;
|
64
|
+
uint64_t *const X1 = (uint64_t *)session;
|
65
|
+
uint64_t *const key = (uint64_t *)private;
|
66
|
+
uint64_t *const Px = coordinates + 0;
|
67
|
+
uint64_t *const Pz = coordinates + 4;
|
68
|
+
uint64_t *const Qx = coordinates + 8;
|
69
|
+
uint64_t *const Qz = coordinates + 12;
|
70
|
+
uint64_t *const X2 = Qx;
|
71
|
+
uint64_t *const Z2 = Qz;
|
72
|
+
uint64_t *const X3 = Px;
|
73
|
+
uint64_t *const Z3 = Pz;
|
74
|
+
uint64_t *const X2Z2 = Qx;
|
75
|
+
uint64_t *const X3Z3 = Px;
|
76
|
+
|
77
|
+
uint64_t *const A = workspace + 0;
|
78
|
+
uint64_t *const B = workspace + 4;
|
79
|
+
uint64_t *const D = workspace + 8;
|
80
|
+
uint64_t *const C = workspace + 12;
|
81
|
+
uint64_t *const DA = workspace + 16;
|
82
|
+
uint64_t *const CB = workspace + 20;
|
83
|
+
uint64_t *const AB = A;
|
84
|
+
uint64_t *const DC = D;
|
85
|
+
uint64_t *const DACB = DA;
|
86
|
+
uint64_t *const buffer_1w = buffer;
|
87
|
+
uint64_t *const buffer_2w = buffer;
|
88
|
+
|
89
|
+
memcpy(private, private_key, sizeof(private));
|
90
|
+
memcpy(session, session_key, sizeof(session));
|
91
|
+
|
92
|
+
/* clampC function */
|
93
|
+
private
|
94
|
+
[0] = private[0] & (~(uint8_t)0x7);
|
95
|
+
private
|
96
|
+
[X25519_KEYSIZE_BYTES - 1] =
|
97
|
+
(uint8_t)64 | (private[X25519_KEYSIZE_BYTES - 1] & (uint8_t)0x7F);
|
98
|
+
|
99
|
+
/**
|
100
|
+
* As in the draft:
|
101
|
+
* When receiving such an array, implementations of curve25519
|
102
|
+
* MUST mask the most-significant bit in the final byte. This
|
103
|
+
* is done to preserve compatibility with point formats which
|
104
|
+
* reserve the sign bit for use in other protocols and to
|
105
|
+
* increase resistance to implementation fingerprinting
|
106
|
+
**/
|
107
|
+
session[X25519_KEYSIZE_BYTES - 1] &= (1 << (255 % 8)) - 1;
|
108
|
+
|
109
|
+
copy_EltFp25519_1w_x64(Px, X1);
|
110
|
+
setzero_EltFp25519_1w_x64(Pz);
|
111
|
+
setzero_EltFp25519_1w_x64(Qx);
|
112
|
+
setzero_EltFp25519_1w_x64(Qz);
|
113
|
+
|
114
|
+
Pz[0] = 1;
|
115
|
+
Qx[0] = 1;
|
116
|
+
|
117
|
+
/* main-loop */
|
118
|
+
prev = 0;
|
119
|
+
j = 62;
|
120
|
+
for (i = 3; i >= 0; i--) {
|
121
|
+
while (j >= 0) {
|
122
|
+
uint64_t bit = (key[i] >> j) & 0x1;
|
123
|
+
uint64_t swap = bit ^ prev;
|
124
|
+
prev = bit;
|
125
|
+
|
126
|
+
add_EltFp25519_1w_x64(A, X2, Z2); /* A = (X2+Z2) */
|
127
|
+
sub_EltFp25519_1w_x64(B, X2, Z2); /* B = (X2-Z2) */
|
128
|
+
add_EltFp25519_1w_x64(C, X3, Z3); /* C = (X3+Z3) */
|
129
|
+
sub_EltFp25519_1w_x64(D, X3, Z3); /* D = (X3-Z3) */
|
130
|
+
mul_EltFp25519_2w_x64(DACB, AB, DC); /* [DA|CB] = [A|B]*[D|C] */
|
131
|
+
|
132
|
+
cswap_x64(swap, A, C);
|
133
|
+
cswap_x64(swap, B, D);
|
134
|
+
|
135
|
+
sqr_EltFp25519_2w_x64(AB); /* [AA|BB] = [A^2|B^2] */
|
136
|
+
add_EltFp25519_1w_x64(X3, DA, CB); /* X3 = (DA+CB) */
|
137
|
+
sub_EltFp25519_1w_x64(Z3, DA, CB); /* Z3 = (DA-CB) */
|
138
|
+
sqr_EltFp25519_2w_x64(X3Z3); /* [X3|Z3] = [(DA+CB)|(DA+CB)]^2 */
|
139
|
+
|
140
|
+
copy_EltFp25519_1w_x64(X2, B); /* X2 = B^2 */
|
141
|
+
sub_EltFp25519_1w_x64(Z2, A, B); /* Z2 = E = AA-BB */
|
142
|
+
|
143
|
+
mul_a24_EltFp25519_1w_x64(B, Z2); /* B = a24*E */
|
144
|
+
add_EltFp25519_1w_x64(B, B, X2); /* B = a24*E+B */
|
145
|
+
mul_EltFp25519_2w_x64(X2Z2, X2Z2, AB); /* [X2|Z2] = [B|E]*[A|a24*E+B] */
|
146
|
+
mul_EltFp25519_1w_x64(Z3, Z3, X1); /* Z3 = Z3*X1 */
|
147
|
+
j--;
|
148
|
+
}
|
149
|
+
j = 63;
|
150
|
+
}
|
151
|
+
|
152
|
+
inv_EltFp25519_1w_x64(A, Qz);
|
153
|
+
mul_EltFp25519_1w_x64((uint64_t *)shared, Qx, A);
|
154
|
+
fred_EltFp25519_1w_x64((uint64_t *)shared);
|
155
|
+
}
|
156
|
+
|
157
|
+
/* Original rfc7748_precomputed name: 'x25519_keygen_precmp_x64' */
|
158
|
+
void x25519_precomputed_scalarmult_base(uint8_t *session_key, uint8_t *private_key) {
|
159
|
+
ALIGN uint64_t buffer[4 * NUM_WORDS_ELTFP25519_X64];
|
160
|
+
ALIGN uint64_t coordinates[4 * NUM_WORDS_ELTFP25519_X64];
|
161
|
+
ALIGN uint64_t workspace[4 * NUM_WORDS_ELTFP25519_X64];
|
162
|
+
ALIGN uint8_t private[X25519_KEYSIZE_BYTES];
|
163
|
+
|
164
|
+
int i = 0, j = 0, k = 0;
|
165
|
+
uint64_t *const key = (uint64_t *)private;
|
166
|
+
uint64_t *const Ur1 = coordinates + 0;
|
167
|
+
uint64_t *const Zr1 = coordinates + 4;
|
168
|
+
uint64_t *const Ur2 = coordinates + 8;
|
169
|
+
uint64_t *const Zr2 = coordinates + 12;
|
170
|
+
|
171
|
+
uint64_t *const UZr1 = coordinates + 0;
|
172
|
+
uint64_t *const ZUr2 = coordinates + 8;
|
173
|
+
|
174
|
+
uint64_t *const A = workspace + 0;
|
175
|
+
uint64_t *const B = workspace + 4;
|
176
|
+
uint64_t *const C = workspace + 8;
|
177
|
+
uint64_t *const D = workspace + 12;
|
178
|
+
|
179
|
+
uint64_t *const AB = workspace + 0;
|
180
|
+
uint64_t *const CD = workspace + 8;
|
181
|
+
|
182
|
+
uint64_t *const buffer_1w = buffer;
|
183
|
+
uint64_t *const buffer_2w = buffer;
|
184
|
+
uint64_t *P = (uint64_t *)Table_Ladder_8k;
|
185
|
+
|
186
|
+
memcpy(private, private_key, sizeof(private));
|
187
|
+
|
188
|
+
/* clampC function */
|
189
|
+
private
|
190
|
+
[0] = private[0] & (~(uint8_t)0x7);
|
191
|
+
private
|
192
|
+
[X25519_KEYSIZE_BYTES - 1] =
|
193
|
+
(uint8_t)64 | (private[X25519_KEYSIZE_BYTES - 1] & (uint8_t)0x7F);
|
194
|
+
|
195
|
+
setzero_EltFp25519_1w_x64(Ur1);
|
196
|
+
setzero_EltFp25519_1w_x64(Zr1);
|
197
|
+
setzero_EltFp25519_1w_x64(Zr2);
|
198
|
+
Ur1[0] = 1;
|
199
|
+
Zr1[0] = 1;
|
200
|
+
Zr2[0] = 1;
|
201
|
+
|
202
|
+
/* G-S */
|
203
|
+
Ur2[3] = 0x1eaecdeee27cab34;
|
204
|
+
Ur2[2] = 0xadc7a0b9235d48e2;
|
205
|
+
Ur2[1] = 0xbbf095ae14b2edf8;
|
206
|
+
Ur2[0] = 0x7e94e1fec82faabd;
|
207
|
+
|
208
|
+
/* main-loop */
|
209
|
+
const int ite[4] = {64, 64, 64, 63};
|
210
|
+
const int q = 3;
|
211
|
+
uint64_t swap = 1;
|
212
|
+
|
213
|
+
j = q;
|
214
|
+
for (i = 0; i < NUM_WORDS_ELTFP25519_X64; i++) {
|
215
|
+
while (j < ite[i]) {
|
216
|
+
k = (64 * i + j - q);
|
217
|
+
uint64_t bit = (key[i] >> j) & 0x1;
|
218
|
+
swap = swap ^ bit;
|
219
|
+
cswap_x64(swap, Ur1, Ur2);
|
220
|
+
cswap_x64(swap, Zr1, Zr2);
|
221
|
+
swap = bit;
|
222
|
+
/** Addition */
|
223
|
+
sub_EltFp25519_1w_x64(B, Ur1, Zr1); /* B = Ur1-Zr1 */
|
224
|
+
add_EltFp25519_1w_x64(A, Ur1, Zr1); /* A = Ur1+Zr1 */
|
225
|
+
mul_EltFp25519_1w_x64(C, &P[4 * k], B); /* C = M0-B */
|
226
|
+
sub_EltFp25519_1w_x64(B, A, C); /* B = (Ur1+Zr1) - M*(Ur1-Zr1) */
|
227
|
+
add_EltFp25519_1w_x64(A, A, C); /* A = (Ur1+Zr1) + M*(Ur1-Zr1) */
|
228
|
+
sqr_EltFp25519_2w_x64(AB); /* A = A^2 | B = B^2 */
|
229
|
+
mul_EltFp25519_2w_x64(UZr1, ZUr2, AB); /* Ur1 = Zr2*A | Zr1 = Ur2*B */
|
230
|
+
j++;
|
231
|
+
}
|
232
|
+
j = 0;
|
233
|
+
}
|
234
|
+
|
235
|
+
/** Doubling */
|
236
|
+
for (i = 0; i < q; i++) {
|
237
|
+
add_EltFp25519_1w_x64(A, Ur1, Zr1); /* A = Ur1+Zr1 */
|
238
|
+
sub_EltFp25519_1w_x64(B, Ur1, Zr1); /* B = Ur1-Zr1 */
|
239
|
+
sqr_EltFp25519_2w_x64(AB); /* A = A**2 B = B**2 */
|
240
|
+
copy_EltFp25519_1w_x64(C, B); /* C = B */
|
241
|
+
sub_EltFp25519_1w_x64(B, A, B); /* B = A-B */
|
242
|
+
mul_a24_EltFp25519_1w_x64(D, B); /* D = my_a24*B */
|
243
|
+
add_EltFp25519_1w_x64(D, D, C); /* D = D+C */
|
244
|
+
mul_EltFp25519_2w_x64(UZr1, AB, CD); /* Ur1 = A*B Zr1 = Zr1*A */
|
245
|
+
}
|
246
|
+
|
247
|
+
/* Convert to affine coordinates */
|
248
|
+
inv_EltFp25519_1w_x64(A, Zr1);
|
249
|
+
mul_EltFp25519_1w_x64((uint64_t *)session_key, Ur1, A);
|
250
|
+
fred_EltFp25519_1w_x64((uint64_t *)session_key);
|
251
|
+
}
|
@@ -0,0 +1,912 @@
|
|
1
|
+
#include "fe.h"
|
2
|
+
|
3
|
+
/*
|
4
|
+
h = 0
|
5
|
+
*/
|
6
|
+
|
7
|
+
void fe_0(fe h)
|
8
|
+
{
|
9
|
+
h[0] = 0;
|
10
|
+
h[1] = 0;
|
11
|
+
h[2] = 0;
|
12
|
+
h[3] = 0;
|
13
|
+
h[4] = 0;
|
14
|
+
h[5] = 0;
|
15
|
+
h[6] = 0;
|
16
|
+
h[7] = 0;
|
17
|
+
h[8] = 0;
|
18
|
+
h[9] = 0;
|
19
|
+
}
|
20
|
+
|
21
|
+
/*
|
22
|
+
h = 1
|
23
|
+
*/
|
24
|
+
|
25
|
+
void fe_1(fe h)
|
26
|
+
{
|
27
|
+
h[0] = 1;
|
28
|
+
h[1] = 0;
|
29
|
+
h[2] = 0;
|
30
|
+
h[3] = 0;
|
31
|
+
h[4] = 0;
|
32
|
+
h[5] = 0;
|
33
|
+
h[6] = 0;
|
34
|
+
h[7] = 0;
|
35
|
+
h[8] = 0;
|
36
|
+
h[9] = 0;
|
37
|
+
}
|
38
|
+
|
39
|
+
/*
|
40
|
+
h = f + g
|
41
|
+
Can overlap h with f or g.
|
42
|
+
|
43
|
+
Preconditions:
|
44
|
+
|f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
|
45
|
+
|g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
|
46
|
+
|
47
|
+
Postconditions:
|
48
|
+
|h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
|
49
|
+
*/
|
50
|
+
|
51
|
+
void fe_add(fe h,fe f,fe g)
|
52
|
+
{
|
53
|
+
int32_t f0 = f[0];
|
54
|
+
int32_t f1 = f[1];
|
55
|
+
int32_t f2 = f[2];
|
56
|
+
int32_t f3 = f[3];
|
57
|
+
int32_t f4 = f[4];
|
58
|
+
int32_t f5 = f[5];
|
59
|
+
int32_t f6 = f[6];
|
60
|
+
int32_t f7 = f[7];
|
61
|
+
int32_t f8 = f[8];
|
62
|
+
int32_t f9 = f[9];
|
63
|
+
int32_t g0 = g[0];
|
64
|
+
int32_t g1 = g[1];
|
65
|
+
int32_t g2 = g[2];
|
66
|
+
int32_t g3 = g[3];
|
67
|
+
int32_t g4 = g[4];
|
68
|
+
int32_t g5 = g[5];
|
69
|
+
int32_t g6 = g[6];
|
70
|
+
int32_t g7 = g[7];
|
71
|
+
int32_t g8 = g[8];
|
72
|
+
int32_t g9 = g[9];
|
73
|
+
int32_t h0 = f0 + g0;
|
74
|
+
int32_t h1 = f1 + g1;
|
75
|
+
int32_t h2 = f2 + g2;
|
76
|
+
int32_t h3 = f3 + g3;
|
77
|
+
int32_t h4 = f4 + g4;
|
78
|
+
int32_t h5 = f5 + g5;
|
79
|
+
int32_t h6 = f6 + g6;
|
80
|
+
int32_t h7 = f7 + g7;
|
81
|
+
int32_t h8 = f8 + g8;
|
82
|
+
int32_t h9 = f9 + g9;
|
83
|
+
h[0] = h0;
|
84
|
+
h[1] = h1;
|
85
|
+
h[2] = h2;
|
86
|
+
h[3] = h3;
|
87
|
+
h[4] = h4;
|
88
|
+
h[5] = h5;
|
89
|
+
h[6] = h6;
|
90
|
+
h[7] = h7;
|
91
|
+
h[8] = h8;
|
92
|
+
h[9] = h9;
|
93
|
+
}
|
94
|
+
|
95
|
+
/*
|
96
|
+
h = f
|
97
|
+
*/
|
98
|
+
|
99
|
+
void fe_copy(fe h,fe f)
|
100
|
+
{
|
101
|
+
int32_t f0 = f[0];
|
102
|
+
int32_t f1 = f[1];
|
103
|
+
int32_t f2 = f[2];
|
104
|
+
int32_t f3 = f[3];
|
105
|
+
int32_t f4 = f[4];
|
106
|
+
int32_t f5 = f[5];
|
107
|
+
int32_t f6 = f[6];
|
108
|
+
int32_t f7 = f[7];
|
109
|
+
int32_t f8 = f[8];
|
110
|
+
int32_t f9 = f[9];
|
111
|
+
h[0] = f0;
|
112
|
+
h[1] = f1;
|
113
|
+
h[2] = f2;
|
114
|
+
h[3] = f3;
|
115
|
+
h[4] = f4;
|
116
|
+
h[5] = f5;
|
117
|
+
h[6] = f6;
|
118
|
+
h[7] = f7;
|
119
|
+
h[8] = f8;
|
120
|
+
h[9] = f9;
|
121
|
+
}
|
122
|
+
|
123
|
+
/*
|
124
|
+
Replace (f,g) with (g,f) if b == 1;
|
125
|
+
replace (f,g) with (f,g) if b == 0.
|
126
|
+
|
127
|
+
Preconditions: b in {0,1}.
|
128
|
+
*/
|
129
|
+
|
130
|
+
void fe_cswap(fe f,fe g,unsigned int b)
|
131
|
+
{
|
132
|
+
int32_t f0 = f[0];
|
133
|
+
int32_t f1 = f[1];
|
134
|
+
int32_t f2 = f[2];
|
135
|
+
int32_t f3 = f[3];
|
136
|
+
int32_t f4 = f[4];
|
137
|
+
int32_t f5 = f[5];
|
138
|
+
int32_t f6 = f[6];
|
139
|
+
int32_t f7 = f[7];
|
140
|
+
int32_t f8 = f[8];
|
141
|
+
int32_t f9 = f[9];
|
142
|
+
int32_t g0 = g[0];
|
143
|
+
int32_t g1 = g[1];
|
144
|
+
int32_t g2 = g[2];
|
145
|
+
int32_t g3 = g[3];
|
146
|
+
int32_t g4 = g[4];
|
147
|
+
int32_t g5 = g[5];
|
148
|
+
int32_t g6 = g[6];
|
149
|
+
int32_t g7 = g[7];
|
150
|
+
int32_t g8 = g[8];
|
151
|
+
int32_t g9 = g[9];
|
152
|
+
int32_t x0 = f0 ^ g0;
|
153
|
+
int32_t x1 = f1 ^ g1;
|
154
|
+
int32_t x2 = f2 ^ g2;
|
155
|
+
int32_t x3 = f3 ^ g3;
|
156
|
+
int32_t x4 = f4 ^ g4;
|
157
|
+
int32_t x5 = f5 ^ g5;
|
158
|
+
int32_t x6 = f6 ^ g6;
|
159
|
+
int32_t x7 = f7 ^ g7;
|
160
|
+
int32_t x8 = f8 ^ g8;
|
161
|
+
int32_t x9 = f9 ^ g9;
|
162
|
+
b = -b;
|
163
|
+
x0 &= b;
|
164
|
+
x1 &= b;
|
165
|
+
x2 &= b;
|
166
|
+
x3 &= b;
|
167
|
+
x4 &= b;
|
168
|
+
x5 &= b;
|
169
|
+
x6 &= b;
|
170
|
+
x7 &= b;
|
171
|
+
x8 &= b;
|
172
|
+
x9 &= b;
|
173
|
+
f[0] = f0 ^ x0;
|
174
|
+
f[1] = f1 ^ x1;
|
175
|
+
f[2] = f2 ^ x2;
|
176
|
+
f[3] = f3 ^ x3;
|
177
|
+
f[4] = f4 ^ x4;
|
178
|
+
f[5] = f5 ^ x5;
|
179
|
+
f[6] = f6 ^ x6;
|
180
|
+
f[7] = f7 ^ x7;
|
181
|
+
f[8] = f8 ^ x8;
|
182
|
+
f[9] = f9 ^ x9;
|
183
|
+
g[0] = g0 ^ x0;
|
184
|
+
g[1] = g1 ^ x1;
|
185
|
+
g[2] = g2 ^ x2;
|
186
|
+
g[3] = g3 ^ x3;
|
187
|
+
g[4] = g4 ^ x4;
|
188
|
+
g[5] = g5 ^ x5;
|
189
|
+
g[6] = g6 ^ x6;
|
190
|
+
g[7] = g7 ^ x7;
|
191
|
+
g[8] = g8 ^ x8;
|
192
|
+
g[9] = g9 ^ x9;
|
193
|
+
}
|
194
|
+
|
195
|
+
static uint64_t load_3(const unsigned char *in)
|
196
|
+
{
|
197
|
+
uint64_t result;
|
198
|
+
result = (uint64_t) in[0];
|
199
|
+
result |= ((uint64_t) in[1]) << 8;
|
200
|
+
result |= ((uint64_t) in[2]) << 16;
|
201
|
+
return result;
|
202
|
+
}
|
203
|
+
|
204
|
+
static uint64_t load_4(const unsigned char *in)
|
205
|
+
{
|
206
|
+
uint64_t result;
|
207
|
+
result = (uint64_t) in[0];
|
208
|
+
result |= ((uint64_t) in[1]) << 8;
|
209
|
+
result |= ((uint64_t) in[2]) << 16;
|
210
|
+
result |= ((uint64_t) in[3]) << 24;
|
211
|
+
return result;
|
212
|
+
}
|
213
|
+
|
214
|
+
void fe_frombytes(fe h,const unsigned char *s)
|
215
|
+
{
|
216
|
+
int64_t h0 = load_4(s);
|
217
|
+
int64_t h1 = load_3(s + 4) << 6;
|
218
|
+
int64_t h2 = load_3(s + 7) << 5;
|
219
|
+
int64_t h3 = load_3(s + 10) << 3;
|
220
|
+
int64_t h4 = load_3(s + 13) << 2;
|
221
|
+
int64_t h5 = load_4(s + 16);
|
222
|
+
int64_t h6 = load_3(s + 20) << 7;
|
223
|
+
int64_t h7 = load_3(s + 23) << 5;
|
224
|
+
int64_t h8 = load_3(s + 26) << 4;
|
225
|
+
int64_t h9 = (load_3(s + 29) & 8388607) << 2;
|
226
|
+
int64_t carry0;
|
227
|
+
int64_t carry1;
|
228
|
+
int64_t carry2;
|
229
|
+
int64_t carry3;
|
230
|
+
int64_t carry4;
|
231
|
+
int64_t carry5;
|
232
|
+
int64_t carry6;
|
233
|
+
int64_t carry7;
|
234
|
+
int64_t carry8;
|
235
|
+
int64_t carry9;
|
236
|
+
|
237
|
+
carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
|
238
|
+
carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
|
239
|
+
carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
|
240
|
+
carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
|
241
|
+
carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
|
242
|
+
|
243
|
+
carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
|
244
|
+
carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
|
245
|
+
carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
|
246
|
+
carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
|
247
|
+
carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
|
248
|
+
|
249
|
+
h[0] = (int32_t)h0;
|
250
|
+
h[1] = (int32_t)h1;
|
251
|
+
h[2] = (int32_t)h2;
|
252
|
+
h[3] = (int32_t)h3;
|
253
|
+
h[4] = (int32_t)h4;
|
254
|
+
h[5] = (int32_t)h5;
|
255
|
+
h[6] = (int32_t)h6;
|
256
|
+
h[7] = (int32_t)h7;
|
257
|
+
h[8] = (int32_t)h8;
|
258
|
+
h[9] = (int32_t)h9;
|
259
|
+
}
|
260
|
+
|
261
|
+
void fe_invert(fe out,fe z)
|
262
|
+
{
|
263
|
+
fe t0;
|
264
|
+
fe t1;
|
265
|
+
fe t2;
|
266
|
+
fe t3;
|
267
|
+
int i;
|
268
|
+
|
269
|
+
#include "pow225521.h"
|
270
|
+
|
271
|
+
return;
|
272
|
+
}
|
273
|
+
|
274
|
+
/*
|
275
|
+
h = f * g
|
276
|
+
Can overlap h with f or g.
|
277
|
+
|
278
|
+
Preconditions:
|
279
|
+
|f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
|
280
|
+
|g| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
|
281
|
+
|
282
|
+
Postconditions:
|
283
|
+
|h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
|
284
|
+
*/
|
285
|
+
|
286
|
+
/*
|
287
|
+
Notes on implementation strategy:
|
288
|
+
|
289
|
+
Using schoolbook multiplication.
|
290
|
+
Karatsuba would save a little in some cost models.
|
291
|
+
|
292
|
+
Most multiplications by 2 and 19 are 32-bit precomputations;
|
293
|
+
cheaper than 64-bit postcomputations.
|
294
|
+
|
295
|
+
There is one remaining multiplication by 19 in the carry chain;
|
296
|
+
one *19 precomputation can be merged into this,
|
297
|
+
but the resulting data flow is considerably less clean.
|
298
|
+
|
299
|
+
There are 12 carries below.
|
300
|
+
10 of them are 2-way parallelizable and vectorizable.
|
301
|
+
Can get away with 11 carries, but then data flow is much deeper.
|
302
|
+
|
303
|
+
With tighter constraints on inputs can squeeze carries into int32.
|
304
|
+
*/
|
305
|
+
|
306
|
+
void fe_mul(fe h,fe f,fe g)
|
307
|
+
{
|
308
|
+
int32_t f0 = f[0];
|
309
|
+
int32_t f1 = f[1];
|
310
|
+
int32_t f2 = f[2];
|
311
|
+
int32_t f3 = f[3];
|
312
|
+
int32_t f4 = f[4];
|
313
|
+
int32_t f5 = f[5];
|
314
|
+
int32_t f6 = f[6];
|
315
|
+
int32_t f7 = f[7];
|
316
|
+
int32_t f8 = f[8];
|
317
|
+
int32_t f9 = f[9];
|
318
|
+
int32_t g0 = g[0];
|
319
|
+
int32_t g1 = g[1];
|
320
|
+
int32_t g2 = g[2];
|
321
|
+
int32_t g3 = g[3];
|
322
|
+
int32_t g4 = g[4];
|
323
|
+
int32_t g5 = g[5];
|
324
|
+
int32_t g6 = g[6];
|
325
|
+
int32_t g7 = g[7];
|
326
|
+
int32_t g8 = g[8];
|
327
|
+
int32_t g9 = g[9];
|
328
|
+
int32_t g1_19 = 19 * g1; /* 1.4*2^29 */
|
329
|
+
int32_t g2_19 = 19 * g2; /* 1.4*2^30; still ok */
|
330
|
+
int32_t g3_19 = 19 * g3;
|
331
|
+
int32_t g4_19 = 19 * g4;
|
332
|
+
int32_t g5_19 = 19 * g5;
|
333
|
+
int32_t g6_19 = 19 * g6;
|
334
|
+
int32_t g7_19 = 19 * g7;
|
335
|
+
int32_t g8_19 = 19 * g8;
|
336
|
+
int32_t g9_19 = 19 * g9;
|
337
|
+
int32_t f1_2 = 2 * f1;
|
338
|
+
int32_t f3_2 = 2 * f3;
|
339
|
+
int32_t f5_2 = 2 * f5;
|
340
|
+
int32_t f7_2 = 2 * f7;
|
341
|
+
int32_t f9_2 = 2 * f9;
|
342
|
+
int64_t f0g0 = f0 * (int64_t) g0;
|
343
|
+
int64_t f0g1 = f0 * (int64_t) g1;
|
344
|
+
int64_t f0g2 = f0 * (int64_t) g2;
|
345
|
+
int64_t f0g3 = f0 * (int64_t) g3;
|
346
|
+
int64_t f0g4 = f0 * (int64_t) g4;
|
347
|
+
int64_t f0g5 = f0 * (int64_t) g5;
|
348
|
+
int64_t f0g6 = f0 * (int64_t) g6;
|
349
|
+
int64_t f0g7 = f0 * (int64_t) g7;
|
350
|
+
int64_t f0g8 = f0 * (int64_t) g8;
|
351
|
+
int64_t f0g9 = f0 * (int64_t) g9;
|
352
|
+
int64_t f1g0 = f1 * (int64_t) g0;
|
353
|
+
int64_t f1g1_2 = f1_2 * (int64_t) g1;
|
354
|
+
int64_t f1g2 = f1 * (int64_t) g2;
|
355
|
+
int64_t f1g3_2 = f1_2 * (int64_t) g3;
|
356
|
+
int64_t f1g4 = f1 * (int64_t) g4;
|
357
|
+
int64_t f1g5_2 = f1_2 * (int64_t) g5;
|
358
|
+
int64_t f1g6 = f1 * (int64_t) g6;
|
359
|
+
int64_t f1g7_2 = f1_2 * (int64_t) g7;
|
360
|
+
int64_t f1g8 = f1 * (int64_t) g8;
|
361
|
+
int64_t f1g9_38 = f1_2 * (int64_t) g9_19;
|
362
|
+
int64_t f2g0 = f2 * (int64_t) g0;
|
363
|
+
int64_t f2g1 = f2 * (int64_t) g1;
|
364
|
+
int64_t f2g2 = f2 * (int64_t) g2;
|
365
|
+
int64_t f2g3 = f2 * (int64_t) g3;
|
366
|
+
int64_t f2g4 = f2 * (int64_t) g4;
|
367
|
+
int64_t f2g5 = f2 * (int64_t) g5;
|
368
|
+
int64_t f2g6 = f2 * (int64_t) g6;
|
369
|
+
int64_t f2g7 = f2 * (int64_t) g7;
|
370
|
+
int64_t f2g8_19 = f2 * (int64_t) g8_19;
|
371
|
+
int64_t f2g9_19 = f2 * (int64_t) g9_19;
|
372
|
+
int64_t f3g0 = f3 * (int64_t) g0;
|
373
|
+
int64_t f3g1_2 = f3_2 * (int64_t) g1;
|
374
|
+
int64_t f3g2 = f3 * (int64_t) g2;
|
375
|
+
int64_t f3g3_2 = f3_2 * (int64_t) g3;
|
376
|
+
int64_t f3g4 = f3 * (int64_t) g4;
|
377
|
+
int64_t f3g5_2 = f3_2 * (int64_t) g5;
|
378
|
+
int64_t f3g6 = f3 * (int64_t) g6;
|
379
|
+
int64_t f3g7_38 = f3_2 * (int64_t) g7_19;
|
380
|
+
int64_t f3g8_19 = f3 * (int64_t) g8_19;
|
381
|
+
int64_t f3g9_38 = f3_2 * (int64_t) g9_19;
|
382
|
+
int64_t f4g0 = f4 * (int64_t) g0;
|
383
|
+
int64_t f4g1 = f4 * (int64_t) g1;
|
384
|
+
int64_t f4g2 = f4 * (int64_t) g2;
|
385
|
+
int64_t f4g3 = f4 * (int64_t) g3;
|
386
|
+
int64_t f4g4 = f4 * (int64_t) g4;
|
387
|
+
int64_t f4g5 = f4 * (int64_t) g5;
|
388
|
+
int64_t f4g6_19 = f4 * (int64_t) g6_19;
|
389
|
+
int64_t f4g7_19 = f4 * (int64_t) g7_19;
|
390
|
+
int64_t f4g8_19 = f4 * (int64_t) g8_19;
|
391
|
+
int64_t f4g9_19 = f4 * (int64_t) g9_19;
|
392
|
+
int64_t f5g0 = f5 * (int64_t) g0;
|
393
|
+
int64_t f5g1_2 = f5_2 * (int64_t) g1;
|
394
|
+
int64_t f5g2 = f5 * (int64_t) g2;
|
395
|
+
int64_t f5g3_2 = f5_2 * (int64_t) g3;
|
396
|
+
int64_t f5g4 = f5 * (int64_t) g4;
|
397
|
+
int64_t f5g5_38 = f5_2 * (int64_t) g5_19;
|
398
|
+
int64_t f5g6_19 = f5 * (int64_t) g6_19;
|
399
|
+
int64_t f5g7_38 = f5_2 * (int64_t) g7_19;
|
400
|
+
int64_t f5g8_19 = f5 * (int64_t) g8_19;
|
401
|
+
int64_t f5g9_38 = f5_2 * (int64_t) g9_19;
|
402
|
+
int64_t f6g0 = f6 * (int64_t) g0;
|
403
|
+
int64_t f6g1 = f6 * (int64_t) g1;
|
404
|
+
int64_t f6g2 = f6 * (int64_t) g2;
|
405
|
+
int64_t f6g3 = f6 * (int64_t) g3;
|
406
|
+
int64_t f6g4_19 = f6 * (int64_t) g4_19;
|
407
|
+
int64_t f6g5_19 = f6 * (int64_t) g5_19;
|
408
|
+
int64_t f6g6_19 = f6 * (int64_t) g6_19;
|
409
|
+
int64_t f6g7_19 = f6 * (int64_t) g7_19;
|
410
|
+
int64_t f6g8_19 = f6 * (int64_t) g8_19;
|
411
|
+
int64_t f6g9_19 = f6 * (int64_t) g9_19;
|
412
|
+
int64_t f7g0 = f7 * (int64_t) g0;
|
413
|
+
int64_t f7g1_2 = f7_2 * (int64_t) g1;
|
414
|
+
int64_t f7g2 = f7 * (int64_t) g2;
|
415
|
+
int64_t f7g3_38 = f7_2 * (int64_t) g3_19;
|
416
|
+
int64_t f7g4_19 = f7 * (int64_t) g4_19;
|
417
|
+
int64_t f7g5_38 = f7_2 * (int64_t) g5_19;
|
418
|
+
int64_t f7g6_19 = f7 * (int64_t) g6_19;
|
419
|
+
int64_t f7g7_38 = f7_2 * (int64_t) g7_19;
|
420
|
+
int64_t f7g8_19 = f7 * (int64_t) g8_19;
|
421
|
+
int64_t f7g9_38 = f7_2 * (int64_t) g9_19;
|
422
|
+
int64_t f8g0 = f8 * (int64_t) g0;
|
423
|
+
int64_t f8g1 = f8 * (int64_t) g1;
|
424
|
+
int64_t f8g2_19 = f8 * (int64_t) g2_19;
|
425
|
+
int64_t f8g3_19 = f8 * (int64_t) g3_19;
|
426
|
+
int64_t f8g4_19 = f8 * (int64_t) g4_19;
|
427
|
+
int64_t f8g5_19 = f8 * (int64_t) g5_19;
|
428
|
+
int64_t f8g6_19 = f8 * (int64_t) g6_19;
|
429
|
+
int64_t f8g7_19 = f8 * (int64_t) g7_19;
|
430
|
+
int64_t f8g8_19 = f8 * (int64_t) g8_19;
|
431
|
+
int64_t f8g9_19 = f8 * (int64_t) g9_19;
|
432
|
+
int64_t f9g0 = f9 * (int64_t) g0;
|
433
|
+
int64_t f9g1_38 = f9_2 * (int64_t) g1_19;
|
434
|
+
int64_t f9g2_19 = f9 * (int64_t) g2_19;
|
435
|
+
int64_t f9g3_38 = f9_2 * (int64_t) g3_19;
|
436
|
+
int64_t f9g4_19 = f9 * (int64_t) g4_19;
|
437
|
+
int64_t f9g5_38 = f9_2 * (int64_t) g5_19;
|
438
|
+
int64_t f9g6_19 = f9 * (int64_t) g6_19;
|
439
|
+
int64_t f9g7_38 = f9_2 * (int64_t) g7_19;
|
440
|
+
int64_t f9g8_19 = f9 * (int64_t) g8_19;
|
441
|
+
int64_t f9g9_38 = f9_2 * (int64_t) g9_19;
|
442
|
+
int64_t h0 = f0g0+f1g9_38+f2g8_19+f3g7_38+f4g6_19+f5g5_38+f6g4_19+f7g3_38+f8g2_19+f9g1_38;
|
443
|
+
int64_t h1 = f0g1+f1g0 +f2g9_19+f3g8_19+f4g7_19+f5g6_19+f6g5_19+f7g4_19+f8g3_19+f9g2_19;
|
444
|
+
int64_t h2 = f0g2+f1g1_2 +f2g0 +f3g9_38+f4g8_19+f5g7_38+f6g6_19+f7g5_38+f8g4_19+f9g3_38;
|
445
|
+
int64_t h3 = f0g3+f1g2 +f2g1 +f3g0 +f4g9_19+f5g8_19+f6g7_19+f7g6_19+f8g5_19+f9g4_19;
|
446
|
+
int64_t h4 = f0g4+f1g3_2 +f2g2 +f3g1_2 +f4g0 +f5g9_38+f6g8_19+f7g7_38+f8g6_19+f9g5_38;
|
447
|
+
int64_t h5 = f0g5+f1g4 +f2g3 +f3g2 +f4g1 +f5g0 +f6g9_19+f7g8_19+f8g7_19+f9g6_19;
|
448
|
+
int64_t h6 = f0g6+f1g5_2 +f2g4 +f3g3_2 +f4g2 +f5g1_2 +f6g0 +f7g9_38+f8g8_19+f9g7_38;
|
449
|
+
int64_t h7 = f0g7+f1g6 +f2g5 +f3g4 +f4g3 +f5g2 +f6g1 +f7g0 +f8g9_19+f9g8_19;
|
450
|
+
int64_t h8 = f0g8+f1g7_2 +f2g6 +f3g5_2 +f4g4 +f5g3_2 +f6g2 +f7g1_2 +f8g0 +f9g9_38;
|
451
|
+
int64_t h9 = f0g9+f1g8 +f2g7 +f3g6 +f4g5 +f5g4 +f6g3 +f7g2 +f8g1 +f9g0 ;
|
452
|
+
int64_t carry0;
|
453
|
+
int64_t carry1;
|
454
|
+
int64_t carry2;
|
455
|
+
int64_t carry3;
|
456
|
+
int64_t carry4;
|
457
|
+
int64_t carry5;
|
458
|
+
int64_t carry6;
|
459
|
+
int64_t carry7;
|
460
|
+
int64_t carry8;
|
461
|
+
int64_t carry9;
|
462
|
+
|
463
|
+
/*
|
464
|
+
|h0| <= (1.1*1.1*2^52*(1+19+19+19+19)+1.1*1.1*2^50*(38+38+38+38+38))
|
465
|
+
i.e. |h0| <= 1.2*2^59; narrower ranges for h2, h4, h6, h8
|
466
|
+
|h1| <= (1.1*1.1*2^51*(1+1+19+19+19+19+19+19+19+19))
|
467
|
+
i.e. |h1| <= 1.5*2^58; narrower ranges for h3, h5, h7, h9
|
468
|
+
*/
|
469
|
+
|
470
|
+
carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
|
471
|
+
carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
|
472
|
+
/* |h0| <= 2^25 */
|
473
|
+
/* |h4| <= 2^25 */
|
474
|
+
/* |h1| <= 1.51*2^58 */
|
475
|
+
/* |h5| <= 1.51*2^58 */
|
476
|
+
|
477
|
+
carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
|
478
|
+
carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
|
479
|
+
/* |h1| <= 2^24; from now on fits into int32 */
|
480
|
+
/* |h5| <= 2^24; from now on fits into int32 */
|
481
|
+
/* |h2| <= 1.21*2^59 */
|
482
|
+
/* |h6| <= 1.21*2^59 */
|
483
|
+
|
484
|
+
carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
|
485
|
+
carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
|
486
|
+
/* |h2| <= 2^25; from now on fits into int32 unchanged */
|
487
|
+
/* |h6| <= 2^25; from now on fits into int32 unchanged */
|
488
|
+
/* |h3| <= 1.51*2^58 */
|
489
|
+
/* |h7| <= 1.51*2^58 */
|
490
|
+
|
491
|
+
carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
|
492
|
+
carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
|
493
|
+
/* |h3| <= 2^24; from now on fits into int32 unchanged */
|
494
|
+
/* |h7| <= 2^24; from now on fits into int32 unchanged */
|
495
|
+
/* |h4| <= 1.52*2^33 */
|
496
|
+
/* |h8| <= 1.52*2^33 */
|
497
|
+
|
498
|
+
carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
|
499
|
+
carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
|
500
|
+
/* |h4| <= 2^25; from now on fits into int32 unchanged */
|
501
|
+
/* |h8| <= 2^25; from now on fits into int32 unchanged */
|
502
|
+
/* |h5| <= 1.01*2^24 */
|
503
|
+
/* |h9| <= 1.51*2^58 */
|
504
|
+
|
505
|
+
carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
|
506
|
+
/* |h9| <= 2^24; from now on fits into int32 unchanged */
|
507
|
+
/* |h0| <= 1.8*2^37 */
|
508
|
+
|
509
|
+
carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
|
510
|
+
/* |h0| <= 2^25; from now on fits into int32 unchanged */
|
511
|
+
/* |h1| <= 1.01*2^24 */
|
512
|
+
|
513
|
+
h[0] = (int32_t)h0;
|
514
|
+
h[1] = (int32_t)h1;
|
515
|
+
h[2] = (int32_t)h2;
|
516
|
+
h[3] = (int32_t)h3;
|
517
|
+
h[4] = (int32_t)h4;
|
518
|
+
h[5] = (int32_t)h5;
|
519
|
+
h[6] = (int32_t)h6;
|
520
|
+
h[7] = (int32_t)h7;
|
521
|
+
h[8] = (int32_t)h8;
|
522
|
+
h[9] = (int32_t)h9;
|
523
|
+
}
|
524
|
+
|
525
|
+
/*
|
526
|
+
h = f * 121666
|
527
|
+
Can overlap h with f.
|
528
|
+
|
529
|
+
Preconditions:
|
530
|
+
|f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
|
531
|
+
|
532
|
+
Postconditions:
|
533
|
+
|h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
|
534
|
+
*/
|
535
|
+
|
536
|
+
void fe_mul121666(fe h,fe f)
|
537
|
+
{
|
538
|
+
int32_t f0 = f[0];
|
539
|
+
int32_t f1 = f[1];
|
540
|
+
int32_t f2 = f[2];
|
541
|
+
int32_t f3 = f[3];
|
542
|
+
int32_t f4 = f[4];
|
543
|
+
int32_t f5 = f[5];
|
544
|
+
int32_t f6 = f[6];
|
545
|
+
int32_t f7 = f[7];
|
546
|
+
int32_t f8 = f[8];
|
547
|
+
int32_t f9 = f[9];
|
548
|
+
int64_t h0 = f0 * (int64_t) 121666;
|
549
|
+
int64_t h1 = f1 * (int64_t) 121666;
|
550
|
+
int64_t h2 = f2 * (int64_t) 121666;
|
551
|
+
int64_t h3 = f3 * (int64_t) 121666;
|
552
|
+
int64_t h4 = f4 * (int64_t) 121666;
|
553
|
+
int64_t h5 = f5 * (int64_t) 121666;
|
554
|
+
int64_t h6 = f6 * (int64_t) 121666;
|
555
|
+
int64_t h7 = f7 * (int64_t) 121666;
|
556
|
+
int64_t h8 = f8 * (int64_t) 121666;
|
557
|
+
int64_t h9 = f9 * (int64_t) 121666;
|
558
|
+
int64_t carry0;
|
559
|
+
int64_t carry1;
|
560
|
+
int64_t carry2;
|
561
|
+
int64_t carry3;
|
562
|
+
int64_t carry4;
|
563
|
+
int64_t carry5;
|
564
|
+
int64_t carry6;
|
565
|
+
int64_t carry7;
|
566
|
+
int64_t carry8;
|
567
|
+
int64_t carry9;
|
568
|
+
|
569
|
+
carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
|
570
|
+
carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
|
571
|
+
carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
|
572
|
+
carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
|
573
|
+
carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
|
574
|
+
|
575
|
+
carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
|
576
|
+
carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
|
577
|
+
carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
|
578
|
+
carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
|
579
|
+
carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
|
580
|
+
|
581
|
+
h[0] = (int32_t)h0;
|
582
|
+
h[1] = (int32_t)h1;
|
583
|
+
h[2] = (int32_t)h2;
|
584
|
+
h[3] = (int32_t)h3;
|
585
|
+
h[4] = (int32_t)h4;
|
586
|
+
h[5] = (int32_t)h5;
|
587
|
+
h[6] = (int32_t)h6;
|
588
|
+
h[7] = (int32_t)h7;
|
589
|
+
h[8] = (int32_t)h8;
|
590
|
+
h[9] = (int32_t)h9;
|
591
|
+
}
|
592
|
+
|
593
|
+
/*
|
594
|
+
h = f * f
|
595
|
+
Can overlap h with f.
|
596
|
+
|
597
|
+
Preconditions:
|
598
|
+
|f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
|
599
|
+
|
600
|
+
Postconditions:
|
601
|
+
|h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
|
602
|
+
*/
|
603
|
+
|
604
|
+
/*
|
605
|
+
See fe_mul.c for discussion of implementation strategy.
|
606
|
+
*/
|
607
|
+
|
608
|
+
void fe_sq(fe h,fe f)
|
609
|
+
{
|
610
|
+
int32_t f0 = f[0];
|
611
|
+
int32_t f1 = f[1];
|
612
|
+
int32_t f2 = f[2];
|
613
|
+
int32_t f3 = f[3];
|
614
|
+
int32_t f4 = f[4];
|
615
|
+
int32_t f5 = f[5];
|
616
|
+
int32_t f6 = f[6];
|
617
|
+
int32_t f7 = f[7];
|
618
|
+
int32_t f8 = f[8];
|
619
|
+
int32_t f9 = f[9];
|
620
|
+
int32_t f0_2 = 2 * f0;
|
621
|
+
int32_t f1_2 = 2 * f1;
|
622
|
+
int32_t f2_2 = 2 * f2;
|
623
|
+
int32_t f3_2 = 2 * f3;
|
624
|
+
int32_t f4_2 = 2 * f4;
|
625
|
+
int32_t f5_2 = 2 * f5;
|
626
|
+
int32_t f6_2 = 2 * f6;
|
627
|
+
int32_t f7_2 = 2 * f7;
|
628
|
+
int32_t f5_38 = 38 * f5; /* 1.31*2^30 */
|
629
|
+
int32_t f6_19 = 19 * f6; /* 1.31*2^30 */
|
630
|
+
int32_t f7_38 = 38 * f7; /* 1.31*2^30 */
|
631
|
+
int32_t f8_19 = 19 * f8; /* 1.31*2^30 */
|
632
|
+
int32_t f9_38 = 38 * f9; /* 1.31*2^30 */
|
633
|
+
int64_t f0f0 = f0 * (int64_t) f0;
|
634
|
+
int64_t f0f1_2 = f0_2 * (int64_t) f1;
|
635
|
+
int64_t f0f2_2 = f0_2 * (int64_t) f2;
|
636
|
+
int64_t f0f3_2 = f0_2 * (int64_t) f3;
|
637
|
+
int64_t f0f4_2 = f0_2 * (int64_t) f4;
|
638
|
+
int64_t f0f5_2 = f0_2 * (int64_t) f5;
|
639
|
+
int64_t f0f6_2 = f0_2 * (int64_t) f6;
|
640
|
+
int64_t f0f7_2 = f0_2 * (int64_t) f7;
|
641
|
+
int64_t f0f8_2 = f0_2 * (int64_t) f8;
|
642
|
+
int64_t f0f9_2 = f0_2 * (int64_t) f9;
|
643
|
+
int64_t f1f1_2 = f1_2 * (int64_t) f1;
|
644
|
+
int64_t f1f2_2 = f1_2 * (int64_t) f2;
|
645
|
+
int64_t f1f3_4 = f1_2 * (int64_t) f3_2;
|
646
|
+
int64_t f1f4_2 = f1_2 * (int64_t) f4;
|
647
|
+
int64_t f1f5_4 = f1_2 * (int64_t) f5_2;
|
648
|
+
int64_t f1f6_2 = f1_2 * (int64_t) f6;
|
649
|
+
int64_t f1f7_4 = f1_2 * (int64_t) f7_2;
|
650
|
+
int64_t f1f8_2 = f1_2 * (int64_t) f8;
|
651
|
+
int64_t f1f9_76 = f1_2 * (int64_t) f9_38;
|
652
|
+
int64_t f2f2 = f2 * (int64_t) f2;
|
653
|
+
int64_t f2f3_2 = f2_2 * (int64_t) f3;
|
654
|
+
int64_t f2f4_2 = f2_2 * (int64_t) f4;
|
655
|
+
int64_t f2f5_2 = f2_2 * (int64_t) f5;
|
656
|
+
int64_t f2f6_2 = f2_2 * (int64_t) f6;
|
657
|
+
int64_t f2f7_2 = f2_2 * (int64_t) f7;
|
658
|
+
int64_t f2f8_38 = f2_2 * (int64_t) f8_19;
|
659
|
+
int64_t f2f9_38 = f2 * (int64_t) f9_38;
|
660
|
+
int64_t f3f3_2 = f3_2 * (int64_t) f3;
|
661
|
+
int64_t f3f4_2 = f3_2 * (int64_t) f4;
|
662
|
+
int64_t f3f5_4 = f3_2 * (int64_t) f5_2;
|
663
|
+
int64_t f3f6_2 = f3_2 * (int64_t) f6;
|
664
|
+
int64_t f3f7_76 = f3_2 * (int64_t) f7_38;
|
665
|
+
int64_t f3f8_38 = f3_2 * (int64_t) f8_19;
|
666
|
+
int64_t f3f9_76 = f3_2 * (int64_t) f9_38;
|
667
|
+
int64_t f4f4 = f4 * (int64_t) f4;
|
668
|
+
int64_t f4f5_2 = f4_2 * (int64_t) f5;
|
669
|
+
int64_t f4f6_38 = f4_2 * (int64_t) f6_19;
|
670
|
+
int64_t f4f7_38 = f4 * (int64_t) f7_38;
|
671
|
+
int64_t f4f8_38 = f4_2 * (int64_t) f8_19;
|
672
|
+
int64_t f4f9_38 = f4 * (int64_t) f9_38;
|
673
|
+
int64_t f5f5_38 = f5 * (int64_t) f5_38;
|
674
|
+
int64_t f5f6_38 = f5_2 * (int64_t) f6_19;
|
675
|
+
int64_t f5f7_76 = f5_2 * (int64_t) f7_38;
|
676
|
+
int64_t f5f8_38 = f5_2 * (int64_t) f8_19;
|
677
|
+
int64_t f5f9_76 = f5_2 * (int64_t) f9_38;
|
678
|
+
int64_t f6f6_19 = f6 * (int64_t) f6_19;
|
679
|
+
int64_t f6f7_38 = f6 * (int64_t) f7_38;
|
680
|
+
int64_t f6f8_38 = f6_2 * (int64_t) f8_19;
|
681
|
+
int64_t f6f9_38 = f6 * (int64_t) f9_38;
|
682
|
+
int64_t f7f7_38 = f7 * (int64_t) f7_38;
|
683
|
+
int64_t f7f8_38 = f7_2 * (int64_t) f8_19;
|
684
|
+
int64_t f7f9_76 = f7_2 * (int64_t) f9_38;
|
685
|
+
int64_t f8f8_19 = f8 * (int64_t) f8_19;
|
686
|
+
int64_t f8f9_38 = f8 * (int64_t) f9_38;
|
687
|
+
int64_t f9f9_38 = f9 * (int64_t) f9_38;
|
688
|
+
int64_t h0 = f0f0 +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38;
|
689
|
+
int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38;
|
690
|
+
int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19;
|
691
|
+
int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38;
|
692
|
+
int64_t h4 = f0f4_2+f1f3_4 +f2f2 +f5f9_76+f6f8_38+f7f7_38;
|
693
|
+
int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38;
|
694
|
+
int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19;
|
695
|
+
int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38;
|
696
|
+
int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4 +f9f9_38;
|
697
|
+
int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2;
|
698
|
+
int64_t carry0;
|
699
|
+
int64_t carry1;
|
700
|
+
int64_t carry2;
|
701
|
+
int64_t carry3;
|
702
|
+
int64_t carry4;
|
703
|
+
int64_t carry5;
|
704
|
+
int64_t carry6;
|
705
|
+
int64_t carry7;
|
706
|
+
int64_t carry8;
|
707
|
+
int64_t carry9;
|
708
|
+
|
709
|
+
carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
|
710
|
+
carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
|
711
|
+
|
712
|
+
carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
|
713
|
+
carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
|
714
|
+
|
715
|
+
carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
|
716
|
+
carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
|
717
|
+
|
718
|
+
carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
|
719
|
+
carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
|
720
|
+
|
721
|
+
carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
|
722
|
+
carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
|
723
|
+
|
724
|
+
carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
|
725
|
+
|
726
|
+
carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
|
727
|
+
|
728
|
+
h[0] = (int32_t)h0;
|
729
|
+
h[1] = (int32_t)h1;
|
730
|
+
h[2] = (int32_t)h2;
|
731
|
+
h[3] = (int32_t)h3;
|
732
|
+
h[4] = (int32_t)h4;
|
733
|
+
h[5] = (int32_t)h5;
|
734
|
+
h[6] = (int32_t)h6;
|
735
|
+
h[7] = (int32_t)h7;
|
736
|
+
h[8] = (int32_t)h8;
|
737
|
+
h[9] = (int32_t)h9;
|
738
|
+
}
|
739
|
+
|
740
|
+
/*
|
741
|
+
h = f - g
|
742
|
+
Can overlap h with f or g.
|
743
|
+
|
744
|
+
Preconditions:
|
745
|
+
|f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
|
746
|
+
|g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
|
747
|
+
|
748
|
+
Postconditions:
|
749
|
+
|h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
|
750
|
+
*/
|
751
|
+
|
752
|
+
void fe_sub(fe h,fe f,fe g)
|
753
|
+
{
|
754
|
+
int32_t f0 = f[0];
|
755
|
+
int32_t f1 = f[1];
|
756
|
+
int32_t f2 = f[2];
|
757
|
+
int32_t f3 = f[3];
|
758
|
+
int32_t f4 = f[4];
|
759
|
+
int32_t f5 = f[5];
|
760
|
+
int32_t f6 = f[6];
|
761
|
+
int32_t f7 = f[7];
|
762
|
+
int32_t f8 = f[8];
|
763
|
+
int32_t f9 = f[9];
|
764
|
+
int32_t g0 = g[0];
|
765
|
+
int32_t g1 = g[1];
|
766
|
+
int32_t g2 = g[2];
|
767
|
+
int32_t g3 = g[3];
|
768
|
+
int32_t g4 = g[4];
|
769
|
+
int32_t g5 = g[5];
|
770
|
+
int32_t g6 = g[6];
|
771
|
+
int32_t g7 = g[7];
|
772
|
+
int32_t g8 = g[8];
|
773
|
+
int32_t g9 = g[9];
|
774
|
+
int32_t h0 = f0 - g0;
|
775
|
+
int32_t h1 = f1 - g1;
|
776
|
+
int32_t h2 = f2 - g2;
|
777
|
+
int32_t h3 = f3 - g3;
|
778
|
+
int32_t h4 = f4 - g4;
|
779
|
+
int32_t h5 = f5 - g5;
|
780
|
+
int32_t h6 = f6 - g6;
|
781
|
+
int32_t h7 = f7 - g7;
|
782
|
+
int32_t h8 = f8 - g8;
|
783
|
+
int32_t h9 = f9 - g9;
|
784
|
+
h[0] = h0;
|
785
|
+
h[1] = h1;
|
786
|
+
h[2] = h2;
|
787
|
+
h[3] = h3;
|
788
|
+
h[4] = h4;
|
789
|
+
h[5] = h5;
|
790
|
+
h[6] = h6;
|
791
|
+
h[7] = h7;
|
792
|
+
h[8] = h8;
|
793
|
+
h[9] = h9;
|
794
|
+
}
|
795
|
+
|
796
|
+
/*
|
797
|
+
Preconditions:
|
798
|
+
|h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
|
799
|
+
|
800
|
+
Write p=2^255-19; q=floor(h/p).
|
801
|
+
Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
|
802
|
+
|
803
|
+
Proof:
|
804
|
+
Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
|
805
|
+
Also have |h-2^230 h9|<2^230 so |19 2^(-255)(h-2^230 h9)|<1/4.
|
806
|
+
|
807
|
+
Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
|
808
|
+
Then 0<y<1.
|
809
|
+
|
810
|
+
Write r=h-pq.
|
811
|
+
Have 0<=r<=p-1=2^255-20.
|
812
|
+
Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
|
813
|
+
|
814
|
+
Write x=r+19(2^-255)r+y.
|
815
|
+
Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
|
816
|
+
|
817
|
+
Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
|
818
|
+
so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
|
819
|
+
*/
|
820
|
+
|
821
|
+
void fe_tobytes(unsigned char *s,fe h)
|
822
|
+
{
|
823
|
+
int32_t h0 = h[0];
|
824
|
+
int32_t h1 = h[1];
|
825
|
+
int32_t h2 = h[2];
|
826
|
+
int32_t h3 = h[3];
|
827
|
+
int32_t h4 = h[4];
|
828
|
+
int32_t h5 = h[5];
|
829
|
+
int32_t h6 = h[6];
|
830
|
+
int32_t h7 = h[7];
|
831
|
+
int32_t h8 = h[8];
|
832
|
+
int32_t h9 = h[9];
|
833
|
+
int32_t q;
|
834
|
+
int32_t carry0;
|
835
|
+
int32_t carry1;
|
836
|
+
int32_t carry2;
|
837
|
+
int32_t carry3;
|
838
|
+
int32_t carry4;
|
839
|
+
int32_t carry5;
|
840
|
+
int32_t carry6;
|
841
|
+
int32_t carry7;
|
842
|
+
int32_t carry8;
|
843
|
+
int32_t carry9;
|
844
|
+
|
845
|
+
q = (19 * h9 + (((int32_t) 1) << 24)) >> 25;
|
846
|
+
q = (h0 + q) >> 26;
|
847
|
+
q = (h1 + q) >> 25;
|
848
|
+
q = (h2 + q) >> 26;
|
849
|
+
q = (h3 + q) >> 25;
|
850
|
+
q = (h4 + q) >> 26;
|
851
|
+
q = (h5 + q) >> 25;
|
852
|
+
q = (h6 + q) >> 26;
|
853
|
+
q = (h7 + q) >> 25;
|
854
|
+
q = (h8 + q) >> 26;
|
855
|
+
q = (h9 + q) >> 25;
|
856
|
+
|
857
|
+
/* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */
|
858
|
+
h0 += 19 * q;
|
859
|
+
/* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */
|
860
|
+
|
861
|
+
carry0 = h0 >> 26; h1 += carry0; h0 -= carry0 << 26;
|
862
|
+
carry1 = h1 >> 25; h2 += carry1; h1 -= carry1 << 25;
|
863
|
+
carry2 = h2 >> 26; h3 += carry2; h2 -= carry2 << 26;
|
864
|
+
carry3 = h3 >> 25; h4 += carry3; h3 -= carry3 << 25;
|
865
|
+
carry4 = h4 >> 26; h5 += carry4; h4 -= carry4 << 26;
|
866
|
+
carry5 = h5 >> 25; h6 += carry5; h5 -= carry5 << 25;
|
867
|
+
carry6 = h6 >> 26; h7 += carry6; h6 -= carry6 << 26;
|
868
|
+
carry7 = h7 >> 25; h8 += carry7; h7 -= carry7 << 25;
|
869
|
+
carry8 = h8 >> 26; h9 += carry8; h8 -= carry8 << 26;
|
870
|
+
carry9 = h9 >> 25; h9 -= carry9 << 25;
|
871
|
+
/* h10 = carry9 */
|
872
|
+
|
873
|
+
/*
|
874
|
+
Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
|
875
|
+
Have h0+...+2^230 h9 between 0 and 2^255-1;
|
876
|
+
evidently 2^255 h10-2^255 q = 0.
|
877
|
+
Goal: Output h0+...+2^230 h9.
|
878
|
+
*/
|
879
|
+
|
880
|
+
s[0] = h0 >> 0;
|
881
|
+
s[1] = h0 >> 8;
|
882
|
+
s[2] = h0 >> 16;
|
883
|
+
s[3] = (h0 >> 24) | (h1 << 2);
|
884
|
+
s[4] = h1 >> 6;
|
885
|
+
s[5] = h1 >> 14;
|
886
|
+
s[6] = (h1 >> 22) | (h2 << 3);
|
887
|
+
s[7] = h2 >> 5;
|
888
|
+
s[8] = h2 >> 13;
|
889
|
+
s[9] = (h2 >> 21) | (h3 << 5);
|
890
|
+
s[10] = h3 >> 3;
|
891
|
+
s[11] = h3 >> 11;
|
892
|
+
s[12] = (h3 >> 19) | (h4 << 6);
|
893
|
+
s[13] = h4 >> 2;
|
894
|
+
s[14] = h4 >> 10;
|
895
|
+
s[15] = h4 >> 18;
|
896
|
+
s[16] = h5 >> 0;
|
897
|
+
s[17] = h5 >> 8;
|
898
|
+
s[18] = h5 >> 16;
|
899
|
+
s[19] = (h5 >> 24) | (h6 << 1);
|
900
|
+
s[20] = h6 >> 7;
|
901
|
+
s[21] = h6 >> 15;
|
902
|
+
s[22] = (h6 >> 23) | (h7 << 3);
|
903
|
+
s[23] = h7 >> 5;
|
904
|
+
s[24] = h7 >> 13;
|
905
|
+
s[25] = (h7 >> 21) | (h8 << 4);
|
906
|
+
s[26] = h8 >> 4;
|
907
|
+
s[27] = h8 >> 12;
|
908
|
+
s[28] = (h8 >> 20) | (h9 << 6);
|
909
|
+
s[29] = h9 >> 2;
|
910
|
+
s[30] = h9 >> 10;
|
911
|
+
s[31] = h9 >> 18;
|
912
|
+
}
|