ed25519_blake2b 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +23 -0
- data/LICENSE +21 -0
- data/README.md +39 -0
- data/Rakefile +13 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/ed25519_blake2b.gemspec +31 -0
- data/ext/ed25519_blake2b/blake2-config.h +72 -0
- data/ext/ed25519_blake2b/blake2-impl.h +160 -0
- data/ext/ed25519_blake2b/blake2.h +195 -0
- data/ext/ed25519_blake2b/blake2b-load-sse2.h +68 -0
- data/ext/ed25519_blake2b/blake2b-load-sse41.h +402 -0
- data/ext/ed25519_blake2b/blake2b-ref.c +373 -0
- data/ext/ed25519_blake2b/blake2b-round.h +157 -0
- data/ext/ed25519_blake2b/curve25519-donna-32bit.h +579 -0
- data/ext/ed25519_blake2b/curve25519-donna-64bit.h +413 -0
- data/ext/ed25519_blake2b/curve25519-donna-helpers.h +67 -0
- data/ext/ed25519_blake2b/curve25519-donna-sse2.h +1112 -0
- data/ext/ed25519_blake2b/ed25519-donna-32bit-sse2.h +513 -0
- data/ext/ed25519_blake2b/ed25519-donna-32bit-tables.h +61 -0
- data/ext/ed25519_blake2b/ed25519-donna-64bit-sse2.h +436 -0
- data/ext/ed25519_blake2b/ed25519-donna-64bit-tables.h +53 -0
- data/ext/ed25519_blake2b/ed25519-donna-64bit-x86-32bit.h +435 -0
- data/ext/ed25519_blake2b/ed25519-donna-64bit-x86.h +351 -0
- data/ext/ed25519_blake2b/ed25519-donna-basepoint-table.h +259 -0
- data/ext/ed25519_blake2b/ed25519-donna-batchverify.h +275 -0
- data/ext/ed25519_blake2b/ed25519-donna-impl-base.h +364 -0
- data/ext/ed25519_blake2b/ed25519-donna-impl-sse2.h +390 -0
- data/ext/ed25519_blake2b/ed25519-donna-portable-identify.h +103 -0
- data/ext/ed25519_blake2b/ed25519-donna-portable.h +135 -0
- data/ext/ed25519_blake2b/ed25519-donna.h +115 -0
- data/ext/ed25519_blake2b/ed25519-hash-custom.c +28 -0
- data/ext/ed25519_blake2b/ed25519-hash-custom.h +30 -0
- data/ext/ed25519_blake2b/ed25519-hash.h +219 -0
- data/ext/ed25519_blake2b/ed25519-randombytes-custom.h +10 -0
- data/ext/ed25519_blake2b/ed25519-randombytes.h +91 -0
- data/ext/ed25519_blake2b/ed25519.c +150 -0
- data/ext/ed25519_blake2b/ed25519.h +30 -0
- data/ext/ed25519_blake2b/extconf.rb +3 -0
- data/ext/ed25519_blake2b/fuzz/README.md +173 -0
- data/ext/ed25519_blake2b/fuzz/build-nix.php +134 -0
- data/ext/ed25519_blake2b/fuzz/curve25519-ref10.c +1272 -0
- data/ext/ed25519_blake2b/fuzz/curve25519-ref10.h +8 -0
- data/ext/ed25519_blake2b/fuzz/ed25519-donna-sse2.c +3 -0
- data/ext/ed25519_blake2b/fuzz/ed25519-donna.c +1 -0
- data/ext/ed25519_blake2b/fuzz/ed25519-donna.h +34 -0
- data/ext/ed25519_blake2b/fuzz/ed25519-ref10.c +4647 -0
- data/ext/ed25519_blake2b/fuzz/ed25519-ref10.h +9 -0
- data/ext/ed25519_blake2b/fuzz/fuzz-curve25519.c +172 -0
- data/ext/ed25519_blake2b/fuzz/fuzz-ed25519.c +219 -0
- data/ext/ed25519_blake2b/modm-donna-32bit.h +469 -0
- data/ext/ed25519_blake2b/modm-donna-64bit.h +361 -0
- data/ext/ed25519_blake2b/rbext.c +25 -0
- data/ext/ed25519_blake2b/regression.h +1024 -0
- data/lib/ed25519_blake2b/ed25519_blake2b.rb +4 -0
- data/lib/ed25519_blake2b/version.rb +3 -0
- metadata +147 -0
@@ -0,0 +1,9 @@
|
|
1
|
+
#ifndef ED25519_REF10_H
|
2
|
+
#define ED25519_REF10_H
|
3
|
+
|
4
|
+
int crypto_sign_pk_ref10(unsigned char *pk,unsigned char *sk);
|
5
|
+
int crypto_sign_ref10(unsigned char *sm,unsigned long long *smlen,const unsigned char *m,unsigned long long mlen,const unsigned char *sk);
|
6
|
+
int crypto_sign_open_ref10(unsigned char *m,unsigned long long *mlen,const unsigned char *sm,unsigned long long smlen,const unsigned char *pk);
|
7
|
+
|
8
|
+
#endif /* ED25519_REF10_H */
|
9
|
+
|
@@ -0,0 +1,172 @@
|
|
1
|
+
#if defined(_WIN32)
|
2
|
+
#include <windows.h>
|
3
|
+
#include <wincrypt.h>
|
4
|
+
typedef unsigned int uint32_t;
|
5
|
+
typedef unsigned __int64 uint64_t;
|
6
|
+
#else
|
7
|
+
#include <stdint.h>
|
8
|
+
#endif
|
9
|
+
|
10
|
+
#include <string.h>
|
11
|
+
#include <stdio.h>
|
12
|
+
|
13
|
+
#include "ed25519-donna.h"
|
14
|
+
#include "curve25519-ref10.h"
|
15
|
+
|
16
|
+
static void
|
17
|
+
print_diff(const char *desc, const unsigned char *a, const unsigned char *b, size_t len) {
|
18
|
+
size_t p = 0;
|
19
|
+
unsigned char diff;
|
20
|
+
printf("%s diff:\n", desc);
|
21
|
+
while (len--) {
|
22
|
+
diff = *a++ ^ *b++;
|
23
|
+
if (!diff)
|
24
|
+
printf("____,");
|
25
|
+
else
|
26
|
+
printf("0x%02x,", diff);
|
27
|
+
if ((++p & 15) == 0)
|
28
|
+
printf("\n");
|
29
|
+
}
|
30
|
+
printf("\n\n");
|
31
|
+
}
|
32
|
+
|
33
|
+
static void
|
34
|
+
print_bytes(const char *desc, const unsigned char *bytes, size_t len) {
|
35
|
+
size_t p = 0;
|
36
|
+
printf("%s:\n", desc);
|
37
|
+
while (len--) {
|
38
|
+
printf("0x%02x,", *bytes++);
|
39
|
+
if ((++p & 15) == 0)
|
40
|
+
printf("\n");
|
41
|
+
}
|
42
|
+
printf("\n\n");
|
43
|
+
}
|
44
|
+
|
45
|
+
|
46
|
+
/* chacha20/12 prng */
|
47
|
+
void
|
48
|
+
prng(unsigned char *out, size_t bytes) {
|
49
|
+
static uint32_t state[16];
|
50
|
+
static int init = 0;
|
51
|
+
uint32_t x[16], t;
|
52
|
+
size_t i;
|
53
|
+
|
54
|
+
if (!init) {
|
55
|
+
#if defined(_WIN32)
|
56
|
+
HCRYPTPROV csp;
|
57
|
+
if (!CryptAcquireContext(&csp, 0, 0, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT)) {
|
58
|
+
printf("CryptAcquireContext failed\n");
|
59
|
+
exit(1);
|
60
|
+
}
|
61
|
+
if (!CryptGenRandom(csp, (DWORD)sizeof(state), (BYTE*)state)) {
|
62
|
+
printf("CryptGenRandom failed\n");
|
63
|
+
exit(1);
|
64
|
+
}
|
65
|
+
CryptReleaseContext(csp, 0);
|
66
|
+
#else
|
67
|
+
FILE *f = NULL;
|
68
|
+
f = fopen("/dev/urandom", "rb");
|
69
|
+
if (!f) {
|
70
|
+
printf("failed to open /dev/urandom\n");
|
71
|
+
exit(1);
|
72
|
+
}
|
73
|
+
if (fread(state, sizeof(state), 1, f) != 1) {
|
74
|
+
printf("read error on /dev/urandom\n");
|
75
|
+
exit(1);
|
76
|
+
}
|
77
|
+
#endif
|
78
|
+
init = 1;
|
79
|
+
}
|
80
|
+
|
81
|
+
while (bytes) {
|
82
|
+
for (i = 0; i < 16; i++) x[i] = state[i];
|
83
|
+
|
84
|
+
#define rotl32(x,k) ((x << k) | (x >> (32 - k)))
|
85
|
+
#define quarter(a,b,c,d) \
|
86
|
+
x[a] += x[b]; t = x[d]^x[a]; x[d] = rotl32(t,16); \
|
87
|
+
x[c] += x[d]; t = x[b]^x[c]; x[b] = rotl32(t,12); \
|
88
|
+
x[a] += x[b]; t = x[d]^x[a]; x[d] = rotl32(t, 8); \
|
89
|
+
x[c] += x[d]; t = x[b]^x[c]; x[b] = rotl32(t, 7);
|
90
|
+
|
91
|
+
for (i = 0; i < 12; i += 2) {
|
92
|
+
quarter( 0, 4, 8,12)
|
93
|
+
quarter( 1, 5, 9,13)
|
94
|
+
quarter( 2, 6,10,14)
|
95
|
+
quarter( 3, 7,11,15)
|
96
|
+
quarter( 0, 5,10,15)
|
97
|
+
quarter( 1, 6,11,12)
|
98
|
+
quarter( 2, 7, 8,13)
|
99
|
+
quarter( 3, 4, 9,14)
|
100
|
+
};
|
101
|
+
|
102
|
+
if (bytes <= 64) {
|
103
|
+
memcpy(out, x, bytes);
|
104
|
+
bytes = 0;
|
105
|
+
} else {
|
106
|
+
memcpy(out, x, 64);
|
107
|
+
bytes -= 64;
|
108
|
+
out += 64;
|
109
|
+
}
|
110
|
+
|
111
|
+
/* don't need a nonce, so last 4 words are the counter. 2^136 bytes can be generated */
|
112
|
+
if (!++state[12]) if (!++state[13]) if (!++state[14]) ++state[15];
|
113
|
+
}
|
114
|
+
}
|
115
|
+
|
116
|
+
|
117
|
+
|
118
|
+
int main() {
|
119
|
+
const size_t skmax = 1024;
|
120
|
+
static unsigned char sk[1024][32];
|
121
|
+
unsigned char pk[3][32];
|
122
|
+
unsigned char *skp;
|
123
|
+
size_t ski, pki, i;
|
124
|
+
uint64_t ctr;
|
125
|
+
|
126
|
+
printf("fuzzing: ");
|
127
|
+
printf(" ref10");
|
128
|
+
printf(" curved25519");
|
129
|
+
#if defined(ED25519_SSE2)
|
130
|
+
printf(" curved25519-sse2");
|
131
|
+
#endif
|
132
|
+
printf("\n\n");
|
133
|
+
|
134
|
+
for (ctr = 0, ski = skmax;;ctr++) {
|
135
|
+
if (ski == skmax) {
|
136
|
+
prng((unsigned char *)sk, sizeof(sk));
|
137
|
+
ski = 0;
|
138
|
+
}
|
139
|
+
skp = sk[ski++];
|
140
|
+
|
141
|
+
pki = 0;
|
142
|
+
crypto_scalarmult_base_ref10(pk[pki++], skp);
|
143
|
+
curved25519_scalarmult_basepoint(pk[pki++], skp);
|
144
|
+
#if defined(ED25519_SSE2)
|
145
|
+
curved25519_scalarmult_basepoint_sse2(pk[pki++], skp);
|
146
|
+
#endif
|
147
|
+
|
148
|
+
for (i = 1; i < pki; i++) {
|
149
|
+
if (memcmp(pk[0], pk[i], 32) != 0) {
|
150
|
+
printf("\n\n");
|
151
|
+
print_bytes("sk", skp, 32);
|
152
|
+
print_bytes("ref10", pk[0], 32);
|
153
|
+
print_diff("curved25519", pk[0], pk[1], 32);
|
154
|
+
#if defined(ED25519_SSE2)
|
155
|
+
print_diff("curved25519-sse2", pk[0], pk[2], 32);
|
156
|
+
#endif
|
157
|
+
exit(1);
|
158
|
+
}
|
159
|
+
}
|
160
|
+
|
161
|
+
if (ctr && (ctr % 0x1000 == 0)) {
|
162
|
+
printf(".");
|
163
|
+
if ((ctr % 0x20000) == 0) {
|
164
|
+
printf(" [");
|
165
|
+
for (i = 0; i < 8; i++)
|
166
|
+
printf("%02x", (unsigned char)(ctr >> ((7 - i) * 8)));
|
167
|
+
printf("]\n");
|
168
|
+
}
|
169
|
+
}
|
170
|
+
}
|
171
|
+
}
|
172
|
+
|
@@ -0,0 +1,219 @@
|
|
1
|
+
#if defined(_WIN32)
|
2
|
+
#include <windows.h>
|
3
|
+
#include <wincrypt.h>
|
4
|
+
typedef unsigned int uint32_t;
|
5
|
+
#else
|
6
|
+
#include <stdint.h>
|
7
|
+
#endif
|
8
|
+
|
9
|
+
#include <string.h>
|
10
|
+
#include <stdio.h>
|
11
|
+
|
12
|
+
#include "ed25519-donna.h"
|
13
|
+
#include "ed25519-ref10.h"
|
14
|
+
|
15
|
+
static void
|
16
|
+
print_diff(const char *desc, const unsigned char *a, const unsigned char *b, size_t len) {
|
17
|
+
size_t p = 0;
|
18
|
+
unsigned char diff;
|
19
|
+
printf("%s diff:\n", desc);
|
20
|
+
while (len--) {
|
21
|
+
diff = *a++ ^ *b++;
|
22
|
+
if (!diff)
|
23
|
+
printf("____,");
|
24
|
+
else
|
25
|
+
printf("0x%02x,", diff);
|
26
|
+
if ((++p & 15) == 0)
|
27
|
+
printf("\n");
|
28
|
+
}
|
29
|
+
printf("\n");
|
30
|
+
}
|
31
|
+
|
32
|
+
static void
|
33
|
+
print_bytes(const char *desc, const unsigned char *bytes, size_t len) {
|
34
|
+
size_t p = 0;
|
35
|
+
printf("%s:\n", desc);
|
36
|
+
while (len--) {
|
37
|
+
printf("0x%02x,", *bytes++);
|
38
|
+
if ((++p & 15) == 0)
|
39
|
+
printf("\n");
|
40
|
+
}
|
41
|
+
printf("\n");
|
42
|
+
}
|
43
|
+
|
44
|
+
|
45
|
+
/* chacha20/12 prng */
|
46
|
+
void
|
47
|
+
prng(unsigned char *out, size_t bytes) {
|
48
|
+
static uint32_t state[16];
|
49
|
+
static int init = 0;
|
50
|
+
uint32_t x[16], t;
|
51
|
+
size_t i;
|
52
|
+
|
53
|
+
if (!init) {
|
54
|
+
#if defined(_WIN32)
|
55
|
+
HCRYPTPROV csp = NULL;
|
56
|
+
if (!CryptAcquireContext(&csp, 0, 0, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT)) {
|
57
|
+
printf("CryptAcquireContext failed\n");
|
58
|
+
exit(1);
|
59
|
+
}
|
60
|
+
if (!CryptGenRandom(csp, (DWORD)sizeof(state), (BYTE*)state)) {
|
61
|
+
printf("CryptGenRandom failed\n");
|
62
|
+
exit(1);
|
63
|
+
}
|
64
|
+
CryptReleaseContext(csp, 0);
|
65
|
+
#else
|
66
|
+
FILE *f = NULL;
|
67
|
+
f = fopen("/dev/urandom", "rb");
|
68
|
+
if (!f) {
|
69
|
+
printf("failed to open /dev/urandom\n");
|
70
|
+
exit(1);
|
71
|
+
}
|
72
|
+
if (fread(state, sizeof(state), 1, f) != 1) {
|
73
|
+
printf("read error on /dev/urandom\n");
|
74
|
+
exit(1);
|
75
|
+
}
|
76
|
+
#endif
|
77
|
+
init = 1;
|
78
|
+
}
|
79
|
+
|
80
|
+
while (bytes) {
|
81
|
+
for (i = 0; i < 16; i++) x[i] = state[i];
|
82
|
+
|
83
|
+
#define rotl32(x,k) ((x << k) | (x >> (32 - k)))
|
84
|
+
#define quarter(a,b,c,d) \
|
85
|
+
x[a] += x[b]; t = x[d]^x[a]; x[d] = rotl32(t,16); \
|
86
|
+
x[c] += x[d]; t = x[b]^x[c]; x[b] = rotl32(t,12); \
|
87
|
+
x[a] += x[b]; t = x[d]^x[a]; x[d] = rotl32(t, 8); \
|
88
|
+
x[c] += x[d]; t = x[b]^x[c]; x[b] = rotl32(t, 7);
|
89
|
+
|
90
|
+
for (i = 0; i < 12; i += 2) {
|
91
|
+
quarter( 0, 4, 8,12)
|
92
|
+
quarter( 1, 5, 9,13)
|
93
|
+
quarter( 2, 6,10,14)
|
94
|
+
quarter( 3, 7,11,15)
|
95
|
+
quarter( 0, 5,10,15)
|
96
|
+
quarter( 1, 6,11,12)
|
97
|
+
quarter( 2, 7, 8,13)
|
98
|
+
quarter( 3, 4, 9,14)
|
99
|
+
};
|
100
|
+
|
101
|
+
if (bytes <= 64) {
|
102
|
+
memcpy(out, x, bytes);
|
103
|
+
bytes = 0;
|
104
|
+
} else {
|
105
|
+
memcpy(out, x, 64);
|
106
|
+
bytes -= 64;
|
107
|
+
out += 64;
|
108
|
+
}
|
109
|
+
|
110
|
+
/* don't need a nonce, so last 4 words are the counter. 2^136 bytes can be generated */
|
111
|
+
if (!++state[12]) if (!++state[13]) if (!++state[14]) ++state[15];
|
112
|
+
}
|
113
|
+
}
|
114
|
+
|
115
|
+
typedef struct random_data_t {
|
116
|
+
unsigned char sk[32];
|
117
|
+
unsigned char m[128];
|
118
|
+
} random_data;
|
119
|
+
|
120
|
+
typedef struct generated_data_t {
|
121
|
+
unsigned char pk[32];
|
122
|
+
unsigned char sig[64];
|
123
|
+
int valid;
|
124
|
+
} generated_data;
|
125
|
+
|
126
|
+
static void
|
127
|
+
print_generated(const char *desc, generated_data *g) {
|
128
|
+
printf("%s:\n", desc);
|
129
|
+
print_bytes("pk", g->pk, 32);
|
130
|
+
print_bytes("sig", g->sig, 64);
|
131
|
+
printf("valid: %s\n\n", g->valid ? "no" : "yes");
|
132
|
+
}
|
133
|
+
|
134
|
+
static void
|
135
|
+
print_generated_diff(const char *desc, const generated_data *base, generated_data *g) {
|
136
|
+
printf("%s:\n", desc);
|
137
|
+
print_diff("pk", base->pk, g->pk, 32);
|
138
|
+
print_diff("sig", base->sig, g->sig, 64);
|
139
|
+
printf("valid: %s\n\n", (base->valid == g->valid) ? "___" : (g->valid ? "no" : "yes"));
|
140
|
+
}
|
141
|
+
|
142
|
+
int main() {
|
143
|
+
const size_t rndmax = 128;
|
144
|
+
static random_data rnd[128];
|
145
|
+
static generated_data gen[3];
|
146
|
+
random_data *r;
|
147
|
+
generated_data *g;
|
148
|
+
unsigned long long dummylen;
|
149
|
+
unsigned char dummysk[64];
|
150
|
+
unsigned char dummymsg[2][128+64];
|
151
|
+
size_t rndi, geni, i, j;
|
152
|
+
uint64_t ctr;
|
153
|
+
|
154
|
+
printf("fuzzing: ");
|
155
|
+
printf(" ref10");
|
156
|
+
printf(" ed25519-donna");
|
157
|
+
#if defined(ED25519_SSE2)
|
158
|
+
printf(" ed25519-donna-sse2");
|
159
|
+
#endif
|
160
|
+
printf("\n\n");
|
161
|
+
|
162
|
+
for (ctr = 0, rndi = rndmax;;ctr++) {
|
163
|
+
if (rndi == rndmax) {
|
164
|
+
prng((unsigned char *)rnd, sizeof(rnd));
|
165
|
+
rndi = 0;
|
166
|
+
}
|
167
|
+
r = &rnd[rndi++];
|
168
|
+
|
169
|
+
/* ref10, lots of horrible gymnastics to work around the wonky api */
|
170
|
+
geni = 0;
|
171
|
+
g = &gen[geni++];
|
172
|
+
memcpy(dummysk, r->sk, 32); /* pk is appended to the sk, need to copy the sk to a larger buffer */
|
173
|
+
crypto_sign_pk_ref10(dummysk + 32, dummysk);
|
174
|
+
memcpy(g->pk, dummysk + 32, 32);
|
175
|
+
crypto_sign_ref10(dummymsg[0], &dummylen, r->m, 128, dummysk);
|
176
|
+
memcpy(g->sig, dummymsg[0], 64); /* sig is placed in front of the signed message */
|
177
|
+
g->valid = crypto_sign_open_ref10(dummymsg[1], &dummylen, dummymsg[0], 128 + 64, g->pk);
|
178
|
+
|
179
|
+
/* ed25519-donna */
|
180
|
+
g = &gen[geni++];
|
181
|
+
ed25519_publickey(r->sk, g->pk);
|
182
|
+
ed25519_sign(r->m, 128, r->sk, g->pk, g->sig);
|
183
|
+
g->valid = ed25519_sign_open(r->m, 128, g->pk, g->sig);
|
184
|
+
|
185
|
+
#if defined(ED25519_SSE2)
|
186
|
+
/* ed25519-donna-sse2 */
|
187
|
+
g = &gen[geni++];
|
188
|
+
ed25519_publickey_sse2(r->sk, g->pk);
|
189
|
+
ed25519_sign_sse2(r->m, 128, r->sk, g->pk, g->sig);
|
190
|
+
g->valid = ed25519_sign_open_sse2(r->m, 128, g->pk, g->sig);
|
191
|
+
#endif
|
192
|
+
|
193
|
+
/* compare implementations 1..geni against the reference */
|
194
|
+
for (i = 1; i < geni; i++) {
|
195
|
+
if (memcmp(&gen[0], &gen[i], sizeof(generated_data)) != 0) {
|
196
|
+
printf("\n\n");
|
197
|
+
print_bytes("sk", r->sk, 32);
|
198
|
+
print_bytes("m", r->m, 128);
|
199
|
+
print_generated("ref10", &gen[0]);
|
200
|
+
print_generated_diff("ed25519-donna", &gen[0], &gen[1]);
|
201
|
+
#if defined(ED25519_SSE2)
|
202
|
+
print_generated_diff("ed25519-donna-sse2", &gen[0], &gen[2]);
|
203
|
+
#endif
|
204
|
+
exit(1);
|
205
|
+
}
|
206
|
+
}
|
207
|
+
|
208
|
+
/* print out status */
|
209
|
+
if (ctr && (ctr % 0x1000 == 0)) {
|
210
|
+
printf(".");
|
211
|
+
if ((ctr % 0x20000) == 0) {
|
212
|
+
printf(" [");
|
213
|
+
for (i = 0; i < 8; i++)
|
214
|
+
printf("%02x", (unsigned char)(ctr >> ((7 - i) * 8)));
|
215
|
+
printf("]\n");
|
216
|
+
}
|
217
|
+
}
|
218
|
+
}
|
219
|
+
}
|
@@ -0,0 +1,469 @@
|
|
1
|
+
/*
|
2
|
+
Public domain by Andrew M. <liquidsun@gmail.com>
|
3
|
+
*/
|
4
|
+
|
5
|
+
|
6
|
+
/*
|
7
|
+
Arithmetic modulo the group order n = 2^252 + 27742317777372353535851937790883648493 = 7237005577332262213973186563042994240857116359379907606001950938285454250989
|
8
|
+
|
9
|
+
k = 32
|
10
|
+
b = 1 << 8 = 256
|
11
|
+
m = 2^252 + 27742317777372353535851937790883648493 = 0x1000000000000000000000000000000014def9dea2f79cd65812631a5cf5d3ed
|
12
|
+
mu = floor( b^(k*2) / m ) = 0xfffffffffffffffffffffffffffffffeb2106215d086329a7ed9ce5a30a2c131b
|
13
|
+
*/
|
14
|
+
|
15
|
+
#define bignum256modm_bits_per_limb 30
|
16
|
+
#define bignum256modm_limb_size 9
|
17
|
+
|
18
|
+
typedef uint32_t bignum256modm_element_t;
|
19
|
+
typedef bignum256modm_element_t bignum256modm[9];
|
20
|
+
|
21
|
+
static const bignum256modm modm_m = {
|
22
|
+
0x1cf5d3ed, 0x20498c69, 0x2f79cd65, 0x37be77a8,
|
23
|
+
0x00000014, 0x00000000, 0x00000000, 0x00000000,
|
24
|
+
0x00001000
|
25
|
+
};
|
26
|
+
|
27
|
+
static const bignum256modm modm_mu = {
|
28
|
+
0x0a2c131b, 0x3673968c, 0x06329a7e, 0x01885742,
|
29
|
+
0x3fffeb21, 0x3fffffff, 0x3fffffff, 0x3fffffff,
|
30
|
+
0x000fffff
|
31
|
+
};
|
32
|
+
|
33
|
+
static bignum256modm_element_t
|
34
|
+
lt_modm(bignum256modm_element_t a, bignum256modm_element_t b) {
|
35
|
+
return (a - b) >> 31;
|
36
|
+
}
|
37
|
+
|
38
|
+
/* see HAC, Alg. 14.42 Step 4 */
|
39
|
+
static void
|
40
|
+
reduce256_modm(bignum256modm r) {
|
41
|
+
bignum256modm t;
|
42
|
+
bignum256modm_element_t b = 0, pb, mask;
|
43
|
+
|
44
|
+
/* t = r - m */
|
45
|
+
pb = 0;
|
46
|
+
pb += modm_m[0]; b = lt_modm(r[0], pb); t[0] = (r[0] - pb + (b << 30)); pb = b;
|
47
|
+
pb += modm_m[1]; b = lt_modm(r[1], pb); t[1] = (r[1] - pb + (b << 30)); pb = b;
|
48
|
+
pb += modm_m[2]; b = lt_modm(r[2], pb); t[2] = (r[2] - pb + (b << 30)); pb = b;
|
49
|
+
pb += modm_m[3]; b = lt_modm(r[3], pb); t[3] = (r[3] - pb + (b << 30)); pb = b;
|
50
|
+
pb += modm_m[4]; b = lt_modm(r[4], pb); t[4] = (r[4] - pb + (b << 30)); pb = b;
|
51
|
+
pb += modm_m[5]; b = lt_modm(r[5], pb); t[5] = (r[5] - pb + (b << 30)); pb = b;
|
52
|
+
pb += modm_m[6]; b = lt_modm(r[6], pb); t[6] = (r[6] - pb + (b << 30)); pb = b;
|
53
|
+
pb += modm_m[7]; b = lt_modm(r[7], pb); t[7] = (r[7] - pb + (b << 30)); pb = b;
|
54
|
+
pb += modm_m[8]; b = lt_modm(r[8], pb); t[8] = (r[8] - pb + (b << 16));
|
55
|
+
|
56
|
+
/* keep r if r was smaller than m */
|
57
|
+
mask = b - 1;
|
58
|
+
r[0] ^= mask & (r[0] ^ t[0]);
|
59
|
+
r[1] ^= mask & (r[1] ^ t[1]);
|
60
|
+
r[2] ^= mask & (r[2] ^ t[2]);
|
61
|
+
r[3] ^= mask & (r[3] ^ t[3]);
|
62
|
+
r[4] ^= mask & (r[4] ^ t[4]);
|
63
|
+
r[5] ^= mask & (r[5] ^ t[5]);
|
64
|
+
r[6] ^= mask & (r[6] ^ t[6]);
|
65
|
+
r[7] ^= mask & (r[7] ^ t[7]);
|
66
|
+
r[8] ^= mask & (r[8] ^ t[8]);
|
67
|
+
}
|
68
|
+
|
69
|
+
/*
|
70
|
+
Barrett reduction, see HAC, Alg. 14.42
|
71
|
+
|
72
|
+
Instead of passing in x, pre-process in to q1 and r1 for efficiency
|
73
|
+
*/
|
74
|
+
static void
|
75
|
+
barrett_reduce256_modm(bignum256modm r, const bignum256modm q1, const bignum256modm r1) {
|
76
|
+
bignum256modm q3, r2;
|
77
|
+
uint64_t c;
|
78
|
+
bignum256modm_element_t f, b, pb;
|
79
|
+
|
80
|
+
/* q1 = x >> 248 = 264 bits = 9 30 bit elements
|
81
|
+
q2 = mu * q1
|
82
|
+
q3 = (q2 / 256(32+1)) = q2 / (2^8)^(32+1) = q2 >> 264 */
|
83
|
+
c = mul32x32_64(modm_mu[0], q1[7]) + mul32x32_64(modm_mu[1], q1[6]) + mul32x32_64(modm_mu[2], q1[5]) + mul32x32_64(modm_mu[3], q1[4]) + mul32x32_64(modm_mu[4], q1[3]) + mul32x32_64(modm_mu[5], q1[2]) + mul32x32_64(modm_mu[6], q1[1]) + mul32x32_64(modm_mu[7], q1[0]);
|
84
|
+
c >>= 30;
|
85
|
+
c += mul32x32_64(modm_mu[0], q1[8]) + mul32x32_64(modm_mu[1], q1[7]) + mul32x32_64(modm_mu[2], q1[6]) + mul32x32_64(modm_mu[3], q1[5]) + mul32x32_64(modm_mu[4], q1[4]) + mul32x32_64(modm_mu[5], q1[3]) + mul32x32_64(modm_mu[6], q1[2]) + mul32x32_64(modm_mu[7], q1[1]) + mul32x32_64(modm_mu[8], q1[0]);
|
86
|
+
f = (bignum256modm_element_t)c; q3[0] = (f >> 24) & 0x3f; c >>= 30;
|
87
|
+
c += mul32x32_64(modm_mu[1], q1[8]) + mul32x32_64(modm_mu[2], q1[7]) + mul32x32_64(modm_mu[3], q1[6]) + mul32x32_64(modm_mu[4], q1[5]) + mul32x32_64(modm_mu[5], q1[4]) + mul32x32_64(modm_mu[6], q1[3]) + mul32x32_64(modm_mu[7], q1[2]) + mul32x32_64(modm_mu[8], q1[1]);
|
88
|
+
f = (bignum256modm_element_t)c; q3[0] |= (f << 6) & 0x3fffffff; q3[1] = (f >> 24) & 0x3f; c >>= 30;
|
89
|
+
c += mul32x32_64(modm_mu[2], q1[8]) + mul32x32_64(modm_mu[3], q1[7]) + mul32x32_64(modm_mu[4], q1[6]) + mul32x32_64(modm_mu[5], q1[5]) + mul32x32_64(modm_mu[6], q1[4]) + mul32x32_64(modm_mu[7], q1[3]) + mul32x32_64(modm_mu[8], q1[2]);
|
90
|
+
f = (bignum256modm_element_t)c; q3[1] |= (f << 6) & 0x3fffffff; q3[2] = (f >> 24) & 0x3f; c >>= 30;
|
91
|
+
c += mul32x32_64(modm_mu[3], q1[8]) + mul32x32_64(modm_mu[4], q1[7]) + mul32x32_64(modm_mu[5], q1[6]) + mul32x32_64(modm_mu[6], q1[5]) + mul32x32_64(modm_mu[7], q1[4]) + mul32x32_64(modm_mu[8], q1[3]);
|
92
|
+
f = (bignum256modm_element_t)c; q3[2] |= (f << 6) & 0x3fffffff; q3[3] = (f >> 24) & 0x3f; c >>= 30;
|
93
|
+
c += mul32x32_64(modm_mu[4], q1[8]) + mul32x32_64(modm_mu[5], q1[7]) + mul32x32_64(modm_mu[6], q1[6]) + mul32x32_64(modm_mu[7], q1[5]) + mul32x32_64(modm_mu[8], q1[4]);
|
94
|
+
f = (bignum256modm_element_t)c; q3[3] |= (f << 6) & 0x3fffffff; q3[4] = (f >> 24) & 0x3f; c >>= 30;
|
95
|
+
c += mul32x32_64(modm_mu[5], q1[8]) + mul32x32_64(modm_mu[6], q1[7]) + mul32x32_64(modm_mu[7], q1[6]) + mul32x32_64(modm_mu[8], q1[5]);
|
96
|
+
f = (bignum256modm_element_t)c; q3[4] |= (f << 6) & 0x3fffffff; q3[5] = (f >> 24) & 0x3f; c >>= 30;
|
97
|
+
c += mul32x32_64(modm_mu[6], q1[8]) + mul32x32_64(modm_mu[7], q1[7]) + mul32x32_64(modm_mu[8], q1[6]);
|
98
|
+
f = (bignum256modm_element_t)c; q3[5] |= (f << 6) & 0x3fffffff; q3[6] = (f >> 24) & 0x3f; c >>= 30;
|
99
|
+
c += mul32x32_64(modm_mu[7], q1[8]) + mul32x32_64(modm_mu[8], q1[7]);
|
100
|
+
f = (bignum256modm_element_t)c; q3[6] |= (f << 6) & 0x3fffffff; q3[7] = (f >> 24) & 0x3f; c >>= 30;
|
101
|
+
c += mul32x32_64(modm_mu[8], q1[8]);
|
102
|
+
f = (bignum256modm_element_t)c; q3[7] |= (f << 6) & 0x3fffffff; q3[8] = (bignum256modm_element_t)(c >> 24);
|
103
|
+
|
104
|
+
/* r1 = (x mod 256^(32+1)) = x mod (2^8)(31+1) = x & ((1 << 264) - 1)
|
105
|
+
r2 = (q3 * m) mod (256^(32+1)) = (q3 * m) & ((1 << 264) - 1) */
|
106
|
+
c = mul32x32_64(modm_m[0], q3[0]);
|
107
|
+
r2[0] = (bignum256modm_element_t)(c & 0x3fffffff); c >>= 30;
|
108
|
+
c += mul32x32_64(modm_m[0], q3[1]) + mul32x32_64(modm_m[1], q3[0]);
|
109
|
+
r2[1] = (bignum256modm_element_t)(c & 0x3fffffff); c >>= 30;
|
110
|
+
c += mul32x32_64(modm_m[0], q3[2]) + mul32x32_64(modm_m[1], q3[1]) + mul32x32_64(modm_m[2], q3[0]);
|
111
|
+
r2[2] = (bignum256modm_element_t)(c & 0x3fffffff); c >>= 30;
|
112
|
+
c += mul32x32_64(modm_m[0], q3[3]) + mul32x32_64(modm_m[1], q3[2]) + mul32x32_64(modm_m[2], q3[1]) + mul32x32_64(modm_m[3], q3[0]);
|
113
|
+
r2[3] = (bignum256modm_element_t)(c & 0x3fffffff); c >>= 30;
|
114
|
+
c += mul32x32_64(modm_m[0], q3[4]) + mul32x32_64(modm_m[1], q3[3]) + mul32x32_64(modm_m[2], q3[2]) + mul32x32_64(modm_m[3], q3[1]) + mul32x32_64(modm_m[4], q3[0]);
|
115
|
+
r2[4] = (bignum256modm_element_t)(c & 0x3fffffff); c >>= 30;
|
116
|
+
c += mul32x32_64(modm_m[0], q3[5]) + mul32x32_64(modm_m[1], q3[4]) + mul32x32_64(modm_m[2], q3[3]) + mul32x32_64(modm_m[3], q3[2]) + mul32x32_64(modm_m[4], q3[1]) + mul32x32_64(modm_m[5], q3[0]);
|
117
|
+
r2[5] = (bignum256modm_element_t)(c & 0x3fffffff); c >>= 30;
|
118
|
+
c += mul32x32_64(modm_m[0], q3[6]) + mul32x32_64(modm_m[1], q3[5]) + mul32x32_64(modm_m[2], q3[4]) + mul32x32_64(modm_m[3], q3[3]) + mul32x32_64(modm_m[4], q3[2]) + mul32x32_64(modm_m[5], q3[1]) + mul32x32_64(modm_m[6], q3[0]);
|
119
|
+
r2[6] = (bignum256modm_element_t)(c & 0x3fffffff); c >>= 30;
|
120
|
+
c += mul32x32_64(modm_m[0], q3[7]) + mul32x32_64(modm_m[1], q3[6]) + mul32x32_64(modm_m[2], q3[5]) + mul32x32_64(modm_m[3], q3[4]) + mul32x32_64(modm_m[4], q3[3]) + mul32x32_64(modm_m[5], q3[2]) + mul32x32_64(modm_m[6], q3[1]) + mul32x32_64(modm_m[7], q3[0]);
|
121
|
+
r2[7] = (bignum256modm_element_t)(c & 0x3fffffff); c >>= 30;
|
122
|
+
c += mul32x32_64(modm_m[0], q3[8]) + mul32x32_64(modm_m[1], q3[7]) + mul32x32_64(modm_m[2], q3[6]) + mul32x32_64(modm_m[3], q3[5]) + mul32x32_64(modm_m[4], q3[4]) + mul32x32_64(modm_m[5], q3[3]) + mul32x32_64(modm_m[6], q3[2]) + mul32x32_64(modm_m[7], q3[1]) + mul32x32_64(modm_m[8], q3[0]);
|
123
|
+
r2[8] = (bignum256modm_element_t)(c & 0xffffff);
|
124
|
+
|
125
|
+
/* r = r1 - r2
|
126
|
+
if (r < 0) r += (1 << 264) */
|
127
|
+
pb = 0;
|
128
|
+
pb += r2[0]; b = lt_modm(r1[0], pb); r[0] = (r1[0] - pb + (b << 30)); pb = b;
|
129
|
+
pb += r2[1]; b = lt_modm(r1[1], pb); r[1] = (r1[1] - pb + (b << 30)); pb = b;
|
130
|
+
pb += r2[2]; b = lt_modm(r1[2], pb); r[2] = (r1[2] - pb + (b << 30)); pb = b;
|
131
|
+
pb += r2[3]; b = lt_modm(r1[3], pb); r[3] = (r1[3] - pb + (b << 30)); pb = b;
|
132
|
+
pb += r2[4]; b = lt_modm(r1[4], pb); r[4] = (r1[4] - pb + (b << 30)); pb = b;
|
133
|
+
pb += r2[5]; b = lt_modm(r1[5], pb); r[5] = (r1[5] - pb + (b << 30)); pb = b;
|
134
|
+
pb += r2[6]; b = lt_modm(r1[6], pb); r[6] = (r1[6] - pb + (b << 30)); pb = b;
|
135
|
+
pb += r2[7]; b = lt_modm(r1[7], pb); r[7] = (r1[7] - pb + (b << 30)); pb = b;
|
136
|
+
pb += r2[8]; b = lt_modm(r1[8], pb); r[8] = (r1[8] - pb + (b << 24));
|
137
|
+
|
138
|
+
reduce256_modm(r);
|
139
|
+
reduce256_modm(r);
|
140
|
+
}
|
141
|
+
|
142
|
+
/* addition modulo m */
|
143
|
+
static void
|
144
|
+
add256_modm(bignum256modm r, const bignum256modm x, const bignum256modm y) {
|
145
|
+
bignum256modm_element_t c;
|
146
|
+
|
147
|
+
c = x[0] + y[0]; r[0] = c & 0x3fffffff; c >>= 30;
|
148
|
+
c += x[1] + y[1]; r[1] = c & 0x3fffffff; c >>= 30;
|
149
|
+
c += x[2] + y[2]; r[2] = c & 0x3fffffff; c >>= 30;
|
150
|
+
c += x[3] + y[3]; r[3] = c & 0x3fffffff; c >>= 30;
|
151
|
+
c += x[4] + y[4]; r[4] = c & 0x3fffffff; c >>= 30;
|
152
|
+
c += x[5] + y[5]; r[5] = c & 0x3fffffff; c >>= 30;
|
153
|
+
c += x[6] + y[6]; r[6] = c & 0x3fffffff; c >>= 30;
|
154
|
+
c += x[7] + y[7]; r[7] = c & 0x3fffffff; c >>= 30;
|
155
|
+
c += x[8] + y[8]; r[8] = c;
|
156
|
+
|
157
|
+
reduce256_modm(r);
|
158
|
+
}
|
159
|
+
|
160
|
+
/* multiplication modulo m */
|
161
|
+
static void
|
162
|
+
mul256_modm(bignum256modm r, const bignum256modm x, const bignum256modm y) {
|
163
|
+
bignum256modm r1, q1;
|
164
|
+
uint64_t c;
|
165
|
+
bignum256modm_element_t f;
|
166
|
+
|
167
|
+
/* r1 = (x mod 256^(32+1)) = x mod (2^8)(31+1) = x & ((1 << 264) - 1)
|
168
|
+
q1 = x >> 248 = 264 bits = 9 30 bit elements */
|
169
|
+
c = mul32x32_64(x[0], y[0]);
|
170
|
+
f = (bignum256modm_element_t)c; r1[0] = (f & 0x3fffffff); c >>= 30;
|
171
|
+
c += mul32x32_64(x[0], y[1]) + mul32x32_64(x[1], y[0]);
|
172
|
+
f = (bignum256modm_element_t)c; r1[1] = (f & 0x3fffffff); c >>= 30;
|
173
|
+
c += mul32x32_64(x[0], y[2]) + mul32x32_64(x[1], y[1]) + mul32x32_64(x[2], y[0]);
|
174
|
+
f = (bignum256modm_element_t)c; r1[2] = (f & 0x3fffffff); c >>= 30;
|
175
|
+
c += mul32x32_64(x[0], y[3]) + mul32x32_64(x[1], y[2]) + mul32x32_64(x[2], y[1]) + mul32x32_64(x[3], y[0]);
|
176
|
+
f = (bignum256modm_element_t)c; r1[3] = (f & 0x3fffffff); c >>= 30;
|
177
|
+
c += mul32x32_64(x[0], y[4]) + mul32x32_64(x[1], y[3]) + mul32x32_64(x[2], y[2]) + mul32x32_64(x[3], y[1]) + mul32x32_64(x[4], y[0]);
|
178
|
+
f = (bignum256modm_element_t)c; r1[4] = (f & 0x3fffffff); c >>= 30;
|
179
|
+
c += mul32x32_64(x[0], y[5]) + mul32x32_64(x[1], y[4]) + mul32x32_64(x[2], y[3]) + mul32x32_64(x[3], y[2]) + mul32x32_64(x[4], y[1]) + mul32x32_64(x[5], y[0]);
|
180
|
+
f = (bignum256modm_element_t)c; r1[5] = (f & 0x3fffffff); c >>= 30;
|
181
|
+
c += mul32x32_64(x[0], y[6]) + mul32x32_64(x[1], y[5]) + mul32x32_64(x[2], y[4]) + mul32x32_64(x[3], y[3]) + mul32x32_64(x[4], y[2]) + mul32x32_64(x[5], y[1]) + mul32x32_64(x[6], y[0]);
|
182
|
+
f = (bignum256modm_element_t)c; r1[6] = (f & 0x3fffffff); c >>= 30;
|
183
|
+
c += mul32x32_64(x[0], y[7]) + mul32x32_64(x[1], y[6]) + mul32x32_64(x[2], y[5]) + mul32x32_64(x[3], y[4]) + mul32x32_64(x[4], y[3]) + mul32x32_64(x[5], y[2]) + mul32x32_64(x[6], y[1]) + mul32x32_64(x[7], y[0]);
|
184
|
+
f = (bignum256modm_element_t)c; r1[7] = (f & 0x3fffffff); c >>= 30;
|
185
|
+
c += mul32x32_64(x[0], y[8]) + mul32x32_64(x[1], y[7]) + mul32x32_64(x[2], y[6]) + mul32x32_64(x[3], y[5]) + mul32x32_64(x[4], y[4]) + mul32x32_64(x[5], y[3]) + mul32x32_64(x[6], y[2]) + mul32x32_64(x[7], y[1]) + mul32x32_64(x[8], y[0]);
|
186
|
+
f = (bignum256modm_element_t)c; r1[8] = (f & 0x00ffffff); q1[0] = (f >> 8) & 0x3fffff; c >>= 30;
|
187
|
+
c += mul32x32_64(x[1], y[8]) + mul32x32_64(x[2], y[7]) + mul32x32_64(x[3], y[6]) + mul32x32_64(x[4], y[5]) + mul32x32_64(x[5], y[4]) + mul32x32_64(x[6], y[3]) + mul32x32_64(x[7], y[2]) + mul32x32_64(x[8], y[1]);
|
188
|
+
f = (bignum256modm_element_t)c; q1[0] = (q1[0] | (f << 22)) & 0x3fffffff; q1[1] = (f >> 8) & 0x3fffff; c >>= 30;
|
189
|
+
c += mul32x32_64(x[2], y[8]) + mul32x32_64(x[3], y[7]) + mul32x32_64(x[4], y[6]) + mul32x32_64(x[5], y[5]) + mul32x32_64(x[6], y[4]) + mul32x32_64(x[7], y[3]) + mul32x32_64(x[8], y[2]);
|
190
|
+
f = (bignum256modm_element_t)c; q1[1] = (q1[1] | (f << 22)) & 0x3fffffff; q1[2] = (f >> 8) & 0x3fffff; c >>= 30;
|
191
|
+
c += mul32x32_64(x[3], y[8]) + mul32x32_64(x[4], y[7]) + mul32x32_64(x[5], y[6]) + mul32x32_64(x[6], y[5]) + mul32x32_64(x[7], y[4]) + mul32x32_64(x[8], y[3]);
|
192
|
+
f = (bignum256modm_element_t)c; q1[2] = (q1[2] | (f << 22)) & 0x3fffffff; q1[3] = (f >> 8) & 0x3fffff; c >>= 30;
|
193
|
+
c += mul32x32_64(x[4], y[8]) + mul32x32_64(x[5], y[7]) + mul32x32_64(x[6], y[6]) + mul32x32_64(x[7], y[5]) + mul32x32_64(x[8], y[4]);
|
194
|
+
f = (bignum256modm_element_t)c; q1[3] = (q1[3] | (f << 22)) & 0x3fffffff; q1[4] = (f >> 8) & 0x3fffff; c >>= 30;
|
195
|
+
c += mul32x32_64(x[5], y[8]) + mul32x32_64(x[6], y[7]) + mul32x32_64(x[7], y[6]) + mul32x32_64(x[8], y[5]);
|
196
|
+
f = (bignum256modm_element_t)c; q1[4] = (q1[4] | (f << 22)) & 0x3fffffff; q1[5] = (f >> 8) & 0x3fffff; c >>= 30;
|
197
|
+
c += mul32x32_64(x[6], y[8]) + mul32x32_64(x[7], y[7]) + mul32x32_64(x[8], y[6]);
|
198
|
+
f = (bignum256modm_element_t)c; q1[5] = (q1[5] | (f << 22)) & 0x3fffffff; q1[6] = (f >> 8) & 0x3fffff; c >>= 30;
|
199
|
+
c += mul32x32_64(x[7], y[8]) + mul32x32_64(x[8], y[7]);
|
200
|
+
f = (bignum256modm_element_t)c; q1[6] = (q1[6] | (f << 22)) & 0x3fffffff; q1[7] = (f >> 8) & 0x3fffff; c >>= 30;
|
201
|
+
c += mul32x32_64(x[8], y[8]);
|
202
|
+
f = (bignum256modm_element_t)c; q1[7] = (q1[7] | (f << 22)) & 0x3fffffff; q1[8] = (f >> 8) & 0x3fffff;
|
203
|
+
|
204
|
+
barrett_reduce256_modm(r, q1, r1);
|
205
|
+
}
|
206
|
+
|
207
|
+
static void
|
208
|
+
expand256_modm(bignum256modm out, const unsigned char *in, size_t len) {
|
209
|
+
unsigned char work[64] = {0};
|
210
|
+
bignum256modm_element_t x[16];
|
211
|
+
bignum256modm q1;
|
212
|
+
|
213
|
+
memcpy(work, in, len);
|
214
|
+
x[0] = U8TO32_LE(work + 0);
|
215
|
+
x[1] = U8TO32_LE(work + 4);
|
216
|
+
x[2] = U8TO32_LE(work + 8);
|
217
|
+
x[3] = U8TO32_LE(work + 12);
|
218
|
+
x[4] = U8TO32_LE(work + 16);
|
219
|
+
x[5] = U8TO32_LE(work + 20);
|
220
|
+
x[6] = U8TO32_LE(work + 24);
|
221
|
+
x[7] = U8TO32_LE(work + 28);
|
222
|
+
x[8] = U8TO32_LE(work + 32);
|
223
|
+
x[9] = U8TO32_LE(work + 36);
|
224
|
+
x[10] = U8TO32_LE(work + 40);
|
225
|
+
x[11] = U8TO32_LE(work + 44);
|
226
|
+
x[12] = U8TO32_LE(work + 48);
|
227
|
+
x[13] = U8TO32_LE(work + 52);
|
228
|
+
x[14] = U8TO32_LE(work + 56);
|
229
|
+
x[15] = U8TO32_LE(work + 60);
|
230
|
+
|
231
|
+
/* r1 = (x mod 256^(32+1)) = x mod (2^8)(31+1) = x & ((1 << 264) - 1) */
|
232
|
+
out[0] = ( x[0]) & 0x3fffffff;
|
233
|
+
out[1] = ((x[ 0] >> 30) | (x[ 1] << 2)) & 0x3fffffff;
|
234
|
+
out[2] = ((x[ 1] >> 28) | (x[ 2] << 4)) & 0x3fffffff;
|
235
|
+
out[3] = ((x[ 2] >> 26) | (x[ 3] << 6)) & 0x3fffffff;
|
236
|
+
out[4] = ((x[ 3] >> 24) | (x[ 4] << 8)) & 0x3fffffff;
|
237
|
+
out[5] = ((x[ 4] >> 22) | (x[ 5] << 10)) & 0x3fffffff;
|
238
|
+
out[6] = ((x[ 5] >> 20) | (x[ 6] << 12)) & 0x3fffffff;
|
239
|
+
out[7] = ((x[ 6] >> 18) | (x[ 7] << 14)) & 0x3fffffff;
|
240
|
+
out[8] = ((x[ 7] >> 16) | (x[ 8] << 16)) & 0x00ffffff;
|
241
|
+
|
242
|
+
/* 8*31 = 248 bits, no need to reduce */
|
243
|
+
if (len < 32)
|
244
|
+
return;
|
245
|
+
|
246
|
+
/* q1 = x >> 248 = 264 bits = 9 30 bit elements */
|
247
|
+
q1[0] = ((x[ 7] >> 24) | (x[ 8] << 8)) & 0x3fffffff;
|
248
|
+
q1[1] = ((x[ 8] >> 22) | (x[ 9] << 10)) & 0x3fffffff;
|
249
|
+
q1[2] = ((x[ 9] >> 20) | (x[10] << 12)) & 0x3fffffff;
|
250
|
+
q1[3] = ((x[10] >> 18) | (x[11] << 14)) & 0x3fffffff;
|
251
|
+
q1[4] = ((x[11] >> 16) | (x[12] << 16)) & 0x3fffffff;
|
252
|
+
q1[5] = ((x[12] >> 14) | (x[13] << 18)) & 0x3fffffff;
|
253
|
+
q1[6] = ((x[13] >> 12) | (x[14] << 20)) & 0x3fffffff;
|
254
|
+
q1[7] = ((x[14] >> 10) | (x[15] << 22)) & 0x3fffffff;
|
255
|
+
q1[8] = ((x[15] >> 8) );
|
256
|
+
|
257
|
+
barrett_reduce256_modm(out, q1, out);
|
258
|
+
}
|
259
|
+
|
260
|
+
static void
|
261
|
+
expand_raw256_modm(bignum256modm out, const unsigned char in[32]) {
|
262
|
+
bignum256modm_element_t x[8];
|
263
|
+
|
264
|
+
x[0] = U8TO32_LE(in + 0);
|
265
|
+
x[1] = U8TO32_LE(in + 4);
|
266
|
+
x[2] = U8TO32_LE(in + 8);
|
267
|
+
x[3] = U8TO32_LE(in + 12);
|
268
|
+
x[4] = U8TO32_LE(in + 16);
|
269
|
+
x[5] = U8TO32_LE(in + 20);
|
270
|
+
x[6] = U8TO32_LE(in + 24);
|
271
|
+
x[7] = U8TO32_LE(in + 28);
|
272
|
+
|
273
|
+
out[0] = ( x[0]) & 0x3fffffff;
|
274
|
+
out[1] = ((x[ 0] >> 30) | (x[ 1] << 2)) & 0x3fffffff;
|
275
|
+
out[2] = ((x[ 1] >> 28) | (x[ 2] << 4)) & 0x3fffffff;
|
276
|
+
out[3] = ((x[ 2] >> 26) | (x[ 3] << 6)) & 0x3fffffff;
|
277
|
+
out[4] = ((x[ 3] >> 24) | (x[ 4] << 8)) & 0x3fffffff;
|
278
|
+
out[5] = ((x[ 4] >> 22) | (x[ 5] << 10)) & 0x3fffffff;
|
279
|
+
out[6] = ((x[ 5] >> 20) | (x[ 6] << 12)) & 0x3fffffff;
|
280
|
+
out[7] = ((x[ 6] >> 18) | (x[ 7] << 14)) & 0x3fffffff;
|
281
|
+
out[8] = ((x[ 7] >> 16) ) & 0x0000ffff;
|
282
|
+
}
|
283
|
+
|
284
|
+
static void
|
285
|
+
contract256_modm(unsigned char out[32], const bignum256modm in) {
|
286
|
+
U32TO8_LE(out + 0, (in[0] ) | (in[1] << 30));
|
287
|
+
U32TO8_LE(out + 4, (in[1] >> 2) | (in[2] << 28));
|
288
|
+
U32TO8_LE(out + 8, (in[2] >> 4) | (in[3] << 26));
|
289
|
+
U32TO8_LE(out + 12, (in[3] >> 6) | (in[4] << 24));
|
290
|
+
U32TO8_LE(out + 16, (in[4] >> 8) | (in[5] << 22));
|
291
|
+
U32TO8_LE(out + 20, (in[5] >> 10) | (in[6] << 20));
|
292
|
+
U32TO8_LE(out + 24, (in[6] >> 12) | (in[7] << 18));
|
293
|
+
U32TO8_LE(out + 28, (in[7] >> 14) | (in[8] << 16));
|
294
|
+
}
|
295
|
+
|
296
|
+
|
297
|
+
|
298
|
+
static void
|
299
|
+
contract256_window4_modm(signed char r[64], const bignum256modm in) {
|
300
|
+
char carry;
|
301
|
+
signed char *quads = r;
|
302
|
+
bignum256modm_element_t i, j, v;
|
303
|
+
|
304
|
+
for (i = 0; i < 8; i += 2) {
|
305
|
+
v = in[i];
|
306
|
+
for (j = 0; j < 7; j++) {
|
307
|
+
*quads++ = (v & 15);
|
308
|
+
v >>= 4;
|
309
|
+
}
|
310
|
+
v |= (in[i+1] << 2);
|
311
|
+
for (j = 0; j < 8; j++) {
|
312
|
+
*quads++ = (v & 15);
|
313
|
+
v >>= 4;
|
314
|
+
}
|
315
|
+
}
|
316
|
+
v = in[8];
|
317
|
+
*quads++ = (v & 15); v >>= 4;
|
318
|
+
*quads++ = (v & 15); v >>= 4;
|
319
|
+
*quads++ = (v & 15); v >>= 4;
|
320
|
+
*quads++ = (v & 15); v >>= 4;
|
321
|
+
|
322
|
+
/* making it signed */
|
323
|
+
carry = 0;
|
324
|
+
for(i = 0; i < 63; i++) {
|
325
|
+
r[i] += carry;
|
326
|
+
r[i+1] += (r[i] >> 4);
|
327
|
+
r[i] &= 15;
|
328
|
+
carry = (r[i] >> 3);
|
329
|
+
r[i] -= (carry << 4);
|
330
|
+
}
|
331
|
+
r[63] += carry;
|
332
|
+
}
|
333
|
+
|
334
|
+
static void
|
335
|
+
contract256_slidingwindow_modm(signed char r[256], const bignum256modm s, int windowsize) {
|
336
|
+
int i,j,k,b;
|
337
|
+
int m = (1 << (windowsize - 1)) - 1, soplen = 256;
|
338
|
+
signed char *bits = r;
|
339
|
+
bignum256modm_element_t v;
|
340
|
+
|
341
|
+
/* first put the binary expansion into r */
|
342
|
+
for (i = 0; i < 8; i++) {
|
343
|
+
v = s[i];
|
344
|
+
for (j = 0; j < 30; j++, v >>= 1)
|
345
|
+
*bits++ = (v & 1);
|
346
|
+
}
|
347
|
+
v = s[8];
|
348
|
+
for (j = 0; j < 16; j++, v >>= 1)
|
349
|
+
*bits++ = (v & 1);
|
350
|
+
|
351
|
+
/* Making it sliding window */
|
352
|
+
for (j = 0; j < soplen; j++) {
|
353
|
+
if (!r[j])
|
354
|
+
continue;
|
355
|
+
|
356
|
+
for (b = 1; (b < (soplen - j)) && (b <= 6); b++) {
|
357
|
+
if ((r[j] + (r[j + b] << b)) <= m) {
|
358
|
+
r[j] += r[j + b] << b;
|
359
|
+
r[j + b] = 0;
|
360
|
+
} else if ((r[j] - (r[j + b] << b)) >= -m) {
|
361
|
+
r[j] -= r[j + b] << b;
|
362
|
+
for (k = j + b; k < soplen; k++) {
|
363
|
+
if (!r[k]) {
|
364
|
+
r[k] = 1;
|
365
|
+
break;
|
366
|
+
}
|
367
|
+
r[k] = 0;
|
368
|
+
}
|
369
|
+
} else if (r[j + b]) {
|
370
|
+
break;
|
371
|
+
}
|
372
|
+
}
|
373
|
+
}
|
374
|
+
}
|
375
|
+
|
376
|
+
|
377
|
+
/*
|
378
|
+
helpers for batch verifcation, are allowed to be vartime
|
379
|
+
*/
|
380
|
+
|
381
|
+
/* out = a - b, a must be larger than b */
|
382
|
+
static void
|
383
|
+
sub256_modm_batch(bignum256modm out, const bignum256modm a, const bignum256modm b, size_t limbsize) {
|
384
|
+
size_t i = 0;
|
385
|
+
bignum256modm_element_t carry = 0;
|
386
|
+
switch (limbsize) {
|
387
|
+
case 8: out[i] = (a[i] - b[i]) - carry; carry = (out[i] >> 31); out[i] &= 0x3fffffff; i++;
|
388
|
+
case 7: out[i] = (a[i] - b[i]) - carry; carry = (out[i] >> 31); out[i] &= 0x3fffffff; i++;
|
389
|
+
case 6: out[i] = (a[i] - b[i]) - carry; carry = (out[i] >> 31); out[i] &= 0x3fffffff; i++;
|
390
|
+
case 5: out[i] = (a[i] - b[i]) - carry; carry = (out[i] >> 31); out[i] &= 0x3fffffff; i++;
|
391
|
+
case 4: out[i] = (a[i] - b[i]) - carry; carry = (out[i] >> 31); out[i] &= 0x3fffffff; i++;
|
392
|
+
case 3: out[i] = (a[i] - b[i]) - carry; carry = (out[i] >> 31); out[i] &= 0x3fffffff; i++;
|
393
|
+
case 2: out[i] = (a[i] - b[i]) - carry; carry = (out[i] >> 31); out[i] &= 0x3fffffff; i++;
|
394
|
+
case 1: out[i] = (a[i] - b[i]) - carry; carry = (out[i] >> 31); out[i] &= 0x3fffffff; i++;
|
395
|
+
case 0:
|
396
|
+
default: out[i] = (a[i] - b[i]) - carry;
|
397
|
+
}
|
398
|
+
}
|
399
|
+
|
400
|
+
|
401
|
+
/* is a < b */
|
402
|
+
static int
|
403
|
+
lt256_modm_batch(const bignum256modm a, const bignum256modm b, size_t limbsize) {
|
404
|
+
switch (limbsize) {
|
405
|
+
case 8: if (a[8] > b[8]) return 0; if (a[8] < b[8]) return 1;
|
406
|
+
case 7: if (a[7] > b[7]) return 0; if (a[7] < b[7]) return 1;
|
407
|
+
case 6: if (a[6] > b[6]) return 0; if (a[6] < b[6]) return 1;
|
408
|
+
case 5: if (a[5] > b[5]) return 0; if (a[5] < b[5]) return 1;
|
409
|
+
case 4: if (a[4] > b[4]) return 0; if (a[4] < b[4]) return 1;
|
410
|
+
case 3: if (a[3] > b[3]) return 0; if (a[3] < b[3]) return 1;
|
411
|
+
case 2: if (a[2] > b[2]) return 0; if (a[2] < b[2]) return 1;
|
412
|
+
case 1: if (a[1] > b[1]) return 0; if (a[1] < b[1]) return 1;
|
413
|
+
case 0: if (a[0] > b[0]) return 0; if (a[0] < b[0]) return 1;
|
414
|
+
}
|
415
|
+
return 0;
|
416
|
+
}
|
417
|
+
|
418
|
+
/* is a <= b */
|
419
|
+
static int
|
420
|
+
lte256_modm_batch(const bignum256modm a, const bignum256modm b, size_t limbsize) {
|
421
|
+
switch (limbsize) {
|
422
|
+
case 8: if (a[8] > b[8]) return 0; if (a[8] < b[8]) return 1;
|
423
|
+
case 7: if (a[7] > b[7]) return 0; if (a[7] < b[7]) return 1;
|
424
|
+
case 6: if (a[6] > b[6]) return 0; if (a[6] < b[6]) return 1;
|
425
|
+
case 5: if (a[5] > b[5]) return 0; if (a[5] < b[5]) return 1;
|
426
|
+
case 4: if (a[4] > b[4]) return 0; if (a[4] < b[4]) return 1;
|
427
|
+
case 3: if (a[3] > b[3]) return 0; if (a[3] < b[3]) return 1;
|
428
|
+
case 2: if (a[2] > b[2]) return 0; if (a[2] < b[2]) return 1;
|
429
|
+
case 1: if (a[1] > b[1]) return 0; if (a[1] < b[1]) return 1;
|
430
|
+
case 0: if (a[0] > b[0]) return 0; if (a[0] < b[0]) return 1;
|
431
|
+
}
|
432
|
+
return 1;
|
433
|
+
}
|
434
|
+
|
435
|
+
|
436
|
+
/* is a == 0 */
|
437
|
+
static int
|
438
|
+
iszero256_modm_batch(const bignum256modm a) {
|
439
|
+
size_t i;
|
440
|
+
for (i = 0; i < 9; i++)
|
441
|
+
if (a[i])
|
442
|
+
return 0;
|
443
|
+
return 1;
|
444
|
+
}
|
445
|
+
|
446
|
+
/* is a == 1 */
|
447
|
+
static int
|
448
|
+
isone256_modm_batch(const bignum256modm a) {
|
449
|
+
size_t i;
|
450
|
+
if (a[0] != 1)
|
451
|
+
return 0;
|
452
|
+
for (i = 1; i < 9; i++)
|
453
|
+
if (a[i])
|
454
|
+
return 0;
|
455
|
+
return 1;
|
456
|
+
}
|
457
|
+
|
458
|
+
/* can a fit in to (at most) 128 bits */
|
459
|
+
static int
|
460
|
+
isatmost128bits256_modm_batch(const bignum256modm a) {
|
461
|
+
uint32_t mask =
|
462
|
+
((a[8] ) | /* 16 */
|
463
|
+
(a[7] ) | /* 46 */
|
464
|
+
(a[6] ) | /* 76 */
|
465
|
+
(a[5] ) | /* 106 */
|
466
|
+
(a[4] & 0x3fffff00)); /* 128 */
|
467
|
+
|
468
|
+
return (mask == 0);
|
469
|
+
}
|