ed25519_blake2b 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/CODE_OF_CONDUCT.md +74 -0
  4. data/Gemfile +6 -0
  5. data/Gemfile.lock +23 -0
  6. data/LICENSE +21 -0
  7. data/README.md +39 -0
  8. data/Rakefile +13 -0
  9. data/bin/console +14 -0
  10. data/bin/setup +8 -0
  11. data/ed25519_blake2b.gemspec +31 -0
  12. data/ext/ed25519_blake2b/blake2-config.h +72 -0
  13. data/ext/ed25519_blake2b/blake2-impl.h +160 -0
  14. data/ext/ed25519_blake2b/blake2.h +195 -0
  15. data/ext/ed25519_blake2b/blake2b-load-sse2.h +68 -0
  16. data/ext/ed25519_blake2b/blake2b-load-sse41.h +402 -0
  17. data/ext/ed25519_blake2b/blake2b-ref.c +373 -0
  18. data/ext/ed25519_blake2b/blake2b-round.h +157 -0
  19. data/ext/ed25519_blake2b/curve25519-donna-32bit.h +579 -0
  20. data/ext/ed25519_blake2b/curve25519-donna-64bit.h +413 -0
  21. data/ext/ed25519_blake2b/curve25519-donna-helpers.h +67 -0
  22. data/ext/ed25519_blake2b/curve25519-donna-sse2.h +1112 -0
  23. data/ext/ed25519_blake2b/ed25519-donna-32bit-sse2.h +513 -0
  24. data/ext/ed25519_blake2b/ed25519-donna-32bit-tables.h +61 -0
  25. data/ext/ed25519_blake2b/ed25519-donna-64bit-sse2.h +436 -0
  26. data/ext/ed25519_blake2b/ed25519-donna-64bit-tables.h +53 -0
  27. data/ext/ed25519_blake2b/ed25519-donna-64bit-x86-32bit.h +435 -0
  28. data/ext/ed25519_blake2b/ed25519-donna-64bit-x86.h +351 -0
  29. data/ext/ed25519_blake2b/ed25519-donna-basepoint-table.h +259 -0
  30. data/ext/ed25519_blake2b/ed25519-donna-batchverify.h +275 -0
  31. data/ext/ed25519_blake2b/ed25519-donna-impl-base.h +364 -0
  32. data/ext/ed25519_blake2b/ed25519-donna-impl-sse2.h +390 -0
  33. data/ext/ed25519_blake2b/ed25519-donna-portable-identify.h +103 -0
  34. data/ext/ed25519_blake2b/ed25519-donna-portable.h +135 -0
  35. data/ext/ed25519_blake2b/ed25519-donna.h +115 -0
  36. data/ext/ed25519_blake2b/ed25519-hash-custom.c +28 -0
  37. data/ext/ed25519_blake2b/ed25519-hash-custom.h +30 -0
  38. data/ext/ed25519_blake2b/ed25519-hash.h +219 -0
  39. data/ext/ed25519_blake2b/ed25519-randombytes-custom.h +10 -0
  40. data/ext/ed25519_blake2b/ed25519-randombytes.h +91 -0
  41. data/ext/ed25519_blake2b/ed25519.c +150 -0
  42. data/ext/ed25519_blake2b/ed25519.h +30 -0
  43. data/ext/ed25519_blake2b/extconf.rb +3 -0
  44. data/ext/ed25519_blake2b/fuzz/README.md +173 -0
  45. data/ext/ed25519_blake2b/fuzz/build-nix.php +134 -0
  46. data/ext/ed25519_blake2b/fuzz/curve25519-ref10.c +1272 -0
  47. data/ext/ed25519_blake2b/fuzz/curve25519-ref10.h +8 -0
  48. data/ext/ed25519_blake2b/fuzz/ed25519-donna-sse2.c +3 -0
  49. data/ext/ed25519_blake2b/fuzz/ed25519-donna.c +1 -0
  50. data/ext/ed25519_blake2b/fuzz/ed25519-donna.h +34 -0
  51. data/ext/ed25519_blake2b/fuzz/ed25519-ref10.c +4647 -0
  52. data/ext/ed25519_blake2b/fuzz/ed25519-ref10.h +9 -0
  53. data/ext/ed25519_blake2b/fuzz/fuzz-curve25519.c +172 -0
  54. data/ext/ed25519_blake2b/fuzz/fuzz-ed25519.c +219 -0
  55. data/ext/ed25519_blake2b/modm-donna-32bit.h +469 -0
  56. data/ext/ed25519_blake2b/modm-donna-64bit.h +361 -0
  57. data/ext/ed25519_blake2b/rbext.c +25 -0
  58. data/ext/ed25519_blake2b/regression.h +1024 -0
  59. data/lib/ed25519_blake2b/ed25519_blake2b.rb +4 -0
  60. data/lib/ed25519_blake2b/version.rb +3 -0
  61. metadata +147 -0
@@ -0,0 +1,9 @@
1
+ #ifndef ED25519_REF10_H
2
+ #define ED25519_REF10_H
3
+
4
+ int crypto_sign_pk_ref10(unsigned char *pk,unsigned char *sk);
5
+ int crypto_sign_ref10(unsigned char *sm,unsigned long long *smlen,const unsigned char *m,unsigned long long mlen,const unsigned char *sk);
6
+ int crypto_sign_open_ref10(unsigned char *m,unsigned long long *mlen,const unsigned char *sm,unsigned long long smlen,const unsigned char *pk);
7
+
8
+ #endif /* ED25519_REF10_H */
9
+
@@ -0,0 +1,172 @@
1
+ #if defined(_WIN32)
2
+ #include <windows.h>
3
+ #include <wincrypt.h>
4
+ typedef unsigned int uint32_t;
5
+ typedef unsigned __int64 uint64_t;
6
+ #else
7
+ #include <stdint.h>
8
+ #endif
9
+
10
+ #include <string.h>
11
+ #include <stdio.h>
12
+
13
+ #include "ed25519-donna.h"
14
+ #include "curve25519-ref10.h"
15
+
16
+ static void
17
+ print_diff(const char *desc, const unsigned char *a, const unsigned char *b, size_t len) {
18
+ size_t p = 0;
19
+ unsigned char diff;
20
+ printf("%s diff:\n", desc);
21
+ while (len--) {
22
+ diff = *a++ ^ *b++;
23
+ if (!diff)
24
+ printf("____,");
25
+ else
26
+ printf("0x%02x,", diff);
27
+ if ((++p & 15) == 0)
28
+ printf("\n");
29
+ }
30
+ printf("\n\n");
31
+ }
32
+
33
+ static void
34
+ print_bytes(const char *desc, const unsigned char *bytes, size_t len) {
35
+ size_t p = 0;
36
+ printf("%s:\n", desc);
37
+ while (len--) {
38
+ printf("0x%02x,", *bytes++);
39
+ if ((++p & 15) == 0)
40
+ printf("\n");
41
+ }
42
+ printf("\n\n");
43
+ }
44
+
45
+
46
+ /* chacha20/12 prng */
47
+ void
48
+ prng(unsigned char *out, size_t bytes) {
49
+ static uint32_t state[16];
50
+ static int init = 0;
51
+ uint32_t x[16], t;
52
+ size_t i;
53
+
54
+ if (!init) {
55
+ #if defined(_WIN32)
56
+ HCRYPTPROV csp;
57
+ if (!CryptAcquireContext(&csp, 0, 0, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT)) {
58
+ printf("CryptAcquireContext failed\n");
59
+ exit(1);
60
+ }
61
+ if (!CryptGenRandom(csp, (DWORD)sizeof(state), (BYTE*)state)) {
62
+ printf("CryptGenRandom failed\n");
63
+ exit(1);
64
+ }
65
+ CryptReleaseContext(csp, 0);
66
+ #else
67
+ FILE *f = NULL;
68
+ f = fopen("/dev/urandom", "rb");
69
+ if (!f) {
70
+ printf("failed to open /dev/urandom\n");
71
+ exit(1);
72
+ }
73
+ if (fread(state, sizeof(state), 1, f) != 1) {
74
+ printf("read error on /dev/urandom\n");
75
+ exit(1);
76
+ }
77
+ #endif
78
+ init = 1;
79
+ }
80
+
81
+ while (bytes) {
82
+ for (i = 0; i < 16; i++) x[i] = state[i];
83
+
84
+ #define rotl32(x,k) ((x << k) | (x >> (32 - k)))
85
+ #define quarter(a,b,c,d) \
86
+ x[a] += x[b]; t = x[d]^x[a]; x[d] = rotl32(t,16); \
87
+ x[c] += x[d]; t = x[b]^x[c]; x[b] = rotl32(t,12); \
88
+ x[a] += x[b]; t = x[d]^x[a]; x[d] = rotl32(t, 8); \
89
+ x[c] += x[d]; t = x[b]^x[c]; x[b] = rotl32(t, 7);
90
+
91
+ for (i = 0; i < 12; i += 2) {
92
+ quarter( 0, 4, 8,12)
93
+ quarter( 1, 5, 9,13)
94
+ quarter( 2, 6,10,14)
95
+ quarter( 3, 7,11,15)
96
+ quarter( 0, 5,10,15)
97
+ quarter( 1, 6,11,12)
98
+ quarter( 2, 7, 8,13)
99
+ quarter( 3, 4, 9,14)
100
+ };
101
+
102
+ if (bytes <= 64) {
103
+ memcpy(out, x, bytes);
104
+ bytes = 0;
105
+ } else {
106
+ memcpy(out, x, 64);
107
+ bytes -= 64;
108
+ out += 64;
109
+ }
110
+
111
+ /* don't need a nonce, so last 4 words are the counter. 2^136 bytes can be generated */
112
+ if (!++state[12]) if (!++state[13]) if (!++state[14]) ++state[15];
113
+ }
114
+ }
115
+
116
+
117
+
118
+ int main() {
119
+ const size_t skmax = 1024;
120
+ static unsigned char sk[1024][32];
121
+ unsigned char pk[3][32];
122
+ unsigned char *skp;
123
+ size_t ski, pki, i;
124
+ uint64_t ctr;
125
+
126
+ printf("fuzzing: ");
127
+ printf(" ref10");
128
+ printf(" curved25519");
129
+ #if defined(ED25519_SSE2)
130
+ printf(" curved25519-sse2");
131
+ #endif
132
+ printf("\n\n");
133
+
134
+ for (ctr = 0, ski = skmax;;ctr++) {
135
+ if (ski == skmax) {
136
+ prng((unsigned char *)sk, sizeof(sk));
137
+ ski = 0;
138
+ }
139
+ skp = sk[ski++];
140
+
141
+ pki = 0;
142
+ crypto_scalarmult_base_ref10(pk[pki++], skp);
143
+ curved25519_scalarmult_basepoint(pk[pki++], skp);
144
+ #if defined(ED25519_SSE2)
145
+ curved25519_scalarmult_basepoint_sse2(pk[pki++], skp);
146
+ #endif
147
+
148
+ for (i = 1; i < pki; i++) {
149
+ if (memcmp(pk[0], pk[i], 32) != 0) {
150
+ printf("\n\n");
151
+ print_bytes("sk", skp, 32);
152
+ print_bytes("ref10", pk[0], 32);
153
+ print_diff("curved25519", pk[0], pk[1], 32);
154
+ #if defined(ED25519_SSE2)
155
+ print_diff("curved25519-sse2", pk[0], pk[2], 32);
156
+ #endif
157
+ exit(1);
158
+ }
159
+ }
160
+
161
+ if (ctr && (ctr % 0x1000 == 0)) {
162
+ printf(".");
163
+ if ((ctr % 0x20000) == 0) {
164
+ printf(" [");
165
+ for (i = 0; i < 8; i++)
166
+ printf("%02x", (unsigned char)(ctr >> ((7 - i) * 8)));
167
+ printf("]\n");
168
+ }
169
+ }
170
+ }
171
+ }
172
+
@@ -0,0 +1,219 @@
1
+ #if defined(_WIN32)
2
+ #include <windows.h>
3
+ #include <wincrypt.h>
4
+ typedef unsigned int uint32_t;
5
+ #else
6
+ #include <stdint.h>
7
+ #endif
8
+
9
+ #include <string.h>
10
+ #include <stdio.h>
11
+
12
+ #include "ed25519-donna.h"
13
+ #include "ed25519-ref10.h"
14
+
15
+ static void
16
+ print_diff(const char *desc, const unsigned char *a, const unsigned char *b, size_t len) {
17
+ size_t p = 0;
18
+ unsigned char diff;
19
+ printf("%s diff:\n", desc);
20
+ while (len--) {
21
+ diff = *a++ ^ *b++;
22
+ if (!diff)
23
+ printf("____,");
24
+ else
25
+ printf("0x%02x,", diff);
26
+ if ((++p & 15) == 0)
27
+ printf("\n");
28
+ }
29
+ printf("\n");
30
+ }
31
+
32
+ static void
33
+ print_bytes(const char *desc, const unsigned char *bytes, size_t len) {
34
+ size_t p = 0;
35
+ printf("%s:\n", desc);
36
+ while (len--) {
37
+ printf("0x%02x,", *bytes++);
38
+ if ((++p & 15) == 0)
39
+ printf("\n");
40
+ }
41
+ printf("\n");
42
+ }
43
+
44
+
45
+ /* chacha20/12 prng */
46
+ void
47
+ prng(unsigned char *out, size_t bytes) {
48
+ static uint32_t state[16];
49
+ static int init = 0;
50
+ uint32_t x[16], t;
51
+ size_t i;
52
+
53
+ if (!init) {
54
+ #if defined(_WIN32)
55
+ HCRYPTPROV csp = NULL;
56
+ if (!CryptAcquireContext(&csp, 0, 0, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT)) {
57
+ printf("CryptAcquireContext failed\n");
58
+ exit(1);
59
+ }
60
+ if (!CryptGenRandom(csp, (DWORD)sizeof(state), (BYTE*)state)) {
61
+ printf("CryptGenRandom failed\n");
62
+ exit(1);
63
+ }
64
+ CryptReleaseContext(csp, 0);
65
+ #else
66
+ FILE *f = NULL;
67
+ f = fopen("/dev/urandom", "rb");
68
+ if (!f) {
69
+ printf("failed to open /dev/urandom\n");
70
+ exit(1);
71
+ }
72
+ if (fread(state, sizeof(state), 1, f) != 1) {
73
+ printf("read error on /dev/urandom\n");
74
+ exit(1);
75
+ }
76
+ #endif
77
+ init = 1;
78
+ }
79
+
80
+ while (bytes) {
81
+ for (i = 0; i < 16; i++) x[i] = state[i];
82
+
83
+ #define rotl32(x,k) ((x << k) | (x >> (32 - k)))
84
+ #define quarter(a,b,c,d) \
85
+ x[a] += x[b]; t = x[d]^x[a]; x[d] = rotl32(t,16); \
86
+ x[c] += x[d]; t = x[b]^x[c]; x[b] = rotl32(t,12); \
87
+ x[a] += x[b]; t = x[d]^x[a]; x[d] = rotl32(t, 8); \
88
+ x[c] += x[d]; t = x[b]^x[c]; x[b] = rotl32(t, 7);
89
+
90
+ for (i = 0; i < 12; i += 2) {
91
+ quarter( 0, 4, 8,12)
92
+ quarter( 1, 5, 9,13)
93
+ quarter( 2, 6,10,14)
94
+ quarter( 3, 7,11,15)
95
+ quarter( 0, 5,10,15)
96
+ quarter( 1, 6,11,12)
97
+ quarter( 2, 7, 8,13)
98
+ quarter( 3, 4, 9,14)
99
+ };
100
+
101
+ if (bytes <= 64) {
102
+ memcpy(out, x, bytes);
103
+ bytes = 0;
104
+ } else {
105
+ memcpy(out, x, 64);
106
+ bytes -= 64;
107
+ out += 64;
108
+ }
109
+
110
+ /* don't need a nonce, so last 4 words are the counter. 2^136 bytes can be generated */
111
+ if (!++state[12]) if (!++state[13]) if (!++state[14]) ++state[15];
112
+ }
113
+ }
114
+
115
+ typedef struct random_data_t {
116
+ unsigned char sk[32];
117
+ unsigned char m[128];
118
+ } random_data;
119
+
120
+ typedef struct generated_data_t {
121
+ unsigned char pk[32];
122
+ unsigned char sig[64];
123
+ int valid;
124
+ } generated_data;
125
+
126
+ static void
127
+ print_generated(const char *desc, generated_data *g) {
128
+ printf("%s:\n", desc);
129
+ print_bytes("pk", g->pk, 32);
130
+ print_bytes("sig", g->sig, 64);
131
+ printf("valid: %s\n\n", g->valid ? "no" : "yes");
132
+ }
133
+
134
+ static void
135
+ print_generated_diff(const char *desc, const generated_data *base, generated_data *g) {
136
+ printf("%s:\n", desc);
137
+ print_diff("pk", base->pk, g->pk, 32);
138
+ print_diff("sig", base->sig, g->sig, 64);
139
+ printf("valid: %s\n\n", (base->valid == g->valid) ? "___" : (g->valid ? "no" : "yes"));
140
+ }
141
+
142
+ int main() {
143
+ const size_t rndmax = 128;
144
+ static random_data rnd[128];
145
+ static generated_data gen[3];
146
+ random_data *r;
147
+ generated_data *g;
148
+ unsigned long long dummylen;
149
+ unsigned char dummysk[64];
150
+ unsigned char dummymsg[2][128+64];
151
+ size_t rndi, geni, i, j;
152
+ uint64_t ctr;
153
+
154
+ printf("fuzzing: ");
155
+ printf(" ref10");
156
+ printf(" ed25519-donna");
157
+ #if defined(ED25519_SSE2)
158
+ printf(" ed25519-donna-sse2");
159
+ #endif
160
+ printf("\n\n");
161
+
162
+ for (ctr = 0, rndi = rndmax;;ctr++) {
163
+ if (rndi == rndmax) {
164
+ prng((unsigned char *)rnd, sizeof(rnd));
165
+ rndi = 0;
166
+ }
167
+ r = &rnd[rndi++];
168
+
169
+ /* ref10, lots of horrible gymnastics to work around the wonky api */
170
+ geni = 0;
171
+ g = &gen[geni++];
172
+ memcpy(dummysk, r->sk, 32); /* pk is appended to the sk, need to copy the sk to a larger buffer */
173
+ crypto_sign_pk_ref10(dummysk + 32, dummysk);
174
+ memcpy(g->pk, dummysk + 32, 32);
175
+ crypto_sign_ref10(dummymsg[0], &dummylen, r->m, 128, dummysk);
176
+ memcpy(g->sig, dummymsg[0], 64); /* sig is placed in front of the signed message */
177
+ g->valid = crypto_sign_open_ref10(dummymsg[1], &dummylen, dummymsg[0], 128 + 64, g->pk);
178
+
179
+ /* ed25519-donna */
180
+ g = &gen[geni++];
181
+ ed25519_publickey(r->sk, g->pk);
182
+ ed25519_sign(r->m, 128, r->sk, g->pk, g->sig);
183
+ g->valid = ed25519_sign_open(r->m, 128, g->pk, g->sig);
184
+
185
+ #if defined(ED25519_SSE2)
186
+ /* ed25519-donna-sse2 */
187
+ g = &gen[geni++];
188
+ ed25519_publickey_sse2(r->sk, g->pk);
189
+ ed25519_sign_sse2(r->m, 128, r->sk, g->pk, g->sig);
190
+ g->valid = ed25519_sign_open_sse2(r->m, 128, g->pk, g->sig);
191
+ #endif
192
+
193
+ /* compare implementations 1..geni against the reference */
194
+ for (i = 1; i < geni; i++) {
195
+ if (memcmp(&gen[0], &gen[i], sizeof(generated_data)) != 0) {
196
+ printf("\n\n");
197
+ print_bytes("sk", r->sk, 32);
198
+ print_bytes("m", r->m, 128);
199
+ print_generated("ref10", &gen[0]);
200
+ print_generated_diff("ed25519-donna", &gen[0], &gen[1]);
201
+ #if defined(ED25519_SSE2)
202
+ print_generated_diff("ed25519-donna-sse2", &gen[0], &gen[2]);
203
+ #endif
204
+ exit(1);
205
+ }
206
+ }
207
+
208
+ /* print out status */
209
+ if (ctr && (ctr % 0x1000 == 0)) {
210
+ printf(".");
211
+ if ((ctr % 0x20000) == 0) {
212
+ printf(" [");
213
+ for (i = 0; i < 8; i++)
214
+ printf("%02x", (unsigned char)(ctr >> ((7 - i) * 8)));
215
+ printf("]\n");
216
+ }
217
+ }
218
+ }
219
+ }
@@ -0,0 +1,469 @@
1
+ /*
2
+ Public domain by Andrew M. <liquidsun@gmail.com>
3
+ */
4
+
5
+
6
+ /*
7
+ Arithmetic modulo the group order n = 2^252 + 27742317777372353535851937790883648493 = 7237005577332262213973186563042994240857116359379907606001950938285454250989
8
+
9
+ k = 32
10
+ b = 1 << 8 = 256
11
+ m = 2^252 + 27742317777372353535851937790883648493 = 0x1000000000000000000000000000000014def9dea2f79cd65812631a5cf5d3ed
12
+ mu = floor( b^(k*2) / m ) = 0xfffffffffffffffffffffffffffffffeb2106215d086329a7ed9ce5a30a2c131b
13
+ */
14
+
15
+ #define bignum256modm_bits_per_limb 30
16
+ #define bignum256modm_limb_size 9
17
+
18
+ typedef uint32_t bignum256modm_element_t;
19
+ typedef bignum256modm_element_t bignum256modm[9];
20
+
21
+ static const bignum256modm modm_m = {
22
+ 0x1cf5d3ed, 0x20498c69, 0x2f79cd65, 0x37be77a8,
23
+ 0x00000014, 0x00000000, 0x00000000, 0x00000000,
24
+ 0x00001000
25
+ };
26
+
27
+ static const bignum256modm modm_mu = {
28
+ 0x0a2c131b, 0x3673968c, 0x06329a7e, 0x01885742,
29
+ 0x3fffeb21, 0x3fffffff, 0x3fffffff, 0x3fffffff,
30
+ 0x000fffff
31
+ };
32
+
33
+ static bignum256modm_element_t
34
+ lt_modm(bignum256modm_element_t a, bignum256modm_element_t b) {
35
+ return (a - b) >> 31;
36
+ }
37
+
38
+ /* see HAC, Alg. 14.42 Step 4 */
39
+ static void
40
+ reduce256_modm(bignum256modm r) {
41
+ bignum256modm t;
42
+ bignum256modm_element_t b = 0, pb, mask;
43
+
44
+ /* t = r - m */
45
+ pb = 0;
46
+ pb += modm_m[0]; b = lt_modm(r[0], pb); t[0] = (r[0] - pb + (b << 30)); pb = b;
47
+ pb += modm_m[1]; b = lt_modm(r[1], pb); t[1] = (r[1] - pb + (b << 30)); pb = b;
48
+ pb += modm_m[2]; b = lt_modm(r[2], pb); t[2] = (r[2] - pb + (b << 30)); pb = b;
49
+ pb += modm_m[3]; b = lt_modm(r[3], pb); t[3] = (r[3] - pb + (b << 30)); pb = b;
50
+ pb += modm_m[4]; b = lt_modm(r[4], pb); t[4] = (r[4] - pb + (b << 30)); pb = b;
51
+ pb += modm_m[5]; b = lt_modm(r[5], pb); t[5] = (r[5] - pb + (b << 30)); pb = b;
52
+ pb += modm_m[6]; b = lt_modm(r[6], pb); t[6] = (r[6] - pb + (b << 30)); pb = b;
53
+ pb += modm_m[7]; b = lt_modm(r[7], pb); t[7] = (r[7] - pb + (b << 30)); pb = b;
54
+ pb += modm_m[8]; b = lt_modm(r[8], pb); t[8] = (r[8] - pb + (b << 16));
55
+
56
+ /* keep r if r was smaller than m */
57
+ mask = b - 1;
58
+ r[0] ^= mask & (r[0] ^ t[0]);
59
+ r[1] ^= mask & (r[1] ^ t[1]);
60
+ r[2] ^= mask & (r[2] ^ t[2]);
61
+ r[3] ^= mask & (r[3] ^ t[3]);
62
+ r[4] ^= mask & (r[4] ^ t[4]);
63
+ r[5] ^= mask & (r[5] ^ t[5]);
64
+ r[6] ^= mask & (r[6] ^ t[6]);
65
+ r[7] ^= mask & (r[7] ^ t[7]);
66
+ r[8] ^= mask & (r[8] ^ t[8]);
67
+ }
68
+
69
+ /*
70
+ Barrett reduction, see HAC, Alg. 14.42
71
+
72
+ Instead of passing in x, pre-process in to q1 and r1 for efficiency
73
+ */
74
+ static void
75
+ barrett_reduce256_modm(bignum256modm r, const bignum256modm q1, const bignum256modm r1) {
76
+ bignum256modm q3, r2;
77
+ uint64_t c;
78
+ bignum256modm_element_t f, b, pb;
79
+
80
+ /* q1 = x >> 248 = 264 bits = 9 30 bit elements
81
+ q2 = mu * q1
82
+ q3 = (q2 / 256(32+1)) = q2 / (2^8)^(32+1) = q2 >> 264 */
83
+ c = mul32x32_64(modm_mu[0], q1[7]) + mul32x32_64(modm_mu[1], q1[6]) + mul32x32_64(modm_mu[2], q1[5]) + mul32x32_64(modm_mu[3], q1[4]) + mul32x32_64(modm_mu[4], q1[3]) + mul32x32_64(modm_mu[5], q1[2]) + mul32x32_64(modm_mu[6], q1[1]) + mul32x32_64(modm_mu[7], q1[0]);
84
+ c >>= 30;
85
+ c += mul32x32_64(modm_mu[0], q1[8]) + mul32x32_64(modm_mu[1], q1[7]) + mul32x32_64(modm_mu[2], q1[6]) + mul32x32_64(modm_mu[3], q1[5]) + mul32x32_64(modm_mu[4], q1[4]) + mul32x32_64(modm_mu[5], q1[3]) + mul32x32_64(modm_mu[6], q1[2]) + mul32x32_64(modm_mu[7], q1[1]) + mul32x32_64(modm_mu[8], q1[0]);
86
+ f = (bignum256modm_element_t)c; q3[0] = (f >> 24) & 0x3f; c >>= 30;
87
+ c += mul32x32_64(modm_mu[1], q1[8]) + mul32x32_64(modm_mu[2], q1[7]) + mul32x32_64(modm_mu[3], q1[6]) + mul32x32_64(modm_mu[4], q1[5]) + mul32x32_64(modm_mu[5], q1[4]) + mul32x32_64(modm_mu[6], q1[3]) + mul32x32_64(modm_mu[7], q1[2]) + mul32x32_64(modm_mu[8], q1[1]);
88
+ f = (bignum256modm_element_t)c; q3[0] |= (f << 6) & 0x3fffffff; q3[1] = (f >> 24) & 0x3f; c >>= 30;
89
+ c += mul32x32_64(modm_mu[2], q1[8]) + mul32x32_64(modm_mu[3], q1[7]) + mul32x32_64(modm_mu[4], q1[6]) + mul32x32_64(modm_mu[5], q1[5]) + mul32x32_64(modm_mu[6], q1[4]) + mul32x32_64(modm_mu[7], q1[3]) + mul32x32_64(modm_mu[8], q1[2]);
90
+ f = (bignum256modm_element_t)c; q3[1] |= (f << 6) & 0x3fffffff; q3[2] = (f >> 24) & 0x3f; c >>= 30;
91
+ c += mul32x32_64(modm_mu[3], q1[8]) + mul32x32_64(modm_mu[4], q1[7]) + mul32x32_64(modm_mu[5], q1[6]) + mul32x32_64(modm_mu[6], q1[5]) + mul32x32_64(modm_mu[7], q1[4]) + mul32x32_64(modm_mu[8], q1[3]);
92
+ f = (bignum256modm_element_t)c; q3[2] |= (f << 6) & 0x3fffffff; q3[3] = (f >> 24) & 0x3f; c >>= 30;
93
+ c += mul32x32_64(modm_mu[4], q1[8]) + mul32x32_64(modm_mu[5], q1[7]) + mul32x32_64(modm_mu[6], q1[6]) + mul32x32_64(modm_mu[7], q1[5]) + mul32x32_64(modm_mu[8], q1[4]);
94
+ f = (bignum256modm_element_t)c; q3[3] |= (f << 6) & 0x3fffffff; q3[4] = (f >> 24) & 0x3f; c >>= 30;
95
+ c += mul32x32_64(modm_mu[5], q1[8]) + mul32x32_64(modm_mu[6], q1[7]) + mul32x32_64(modm_mu[7], q1[6]) + mul32x32_64(modm_mu[8], q1[5]);
96
+ f = (bignum256modm_element_t)c; q3[4] |= (f << 6) & 0x3fffffff; q3[5] = (f >> 24) & 0x3f; c >>= 30;
97
+ c += mul32x32_64(modm_mu[6], q1[8]) + mul32x32_64(modm_mu[7], q1[7]) + mul32x32_64(modm_mu[8], q1[6]);
98
+ f = (bignum256modm_element_t)c; q3[5] |= (f << 6) & 0x3fffffff; q3[6] = (f >> 24) & 0x3f; c >>= 30;
99
+ c += mul32x32_64(modm_mu[7], q1[8]) + mul32x32_64(modm_mu[8], q1[7]);
100
+ f = (bignum256modm_element_t)c; q3[6] |= (f << 6) & 0x3fffffff; q3[7] = (f >> 24) & 0x3f; c >>= 30;
101
+ c += mul32x32_64(modm_mu[8], q1[8]);
102
+ f = (bignum256modm_element_t)c; q3[7] |= (f << 6) & 0x3fffffff; q3[8] = (bignum256modm_element_t)(c >> 24);
103
+
104
+ /* r1 = (x mod 256^(32+1)) = x mod (2^8)(31+1) = x & ((1 << 264) - 1)
105
+ r2 = (q3 * m) mod (256^(32+1)) = (q3 * m) & ((1 << 264) - 1) */
106
+ c = mul32x32_64(modm_m[0], q3[0]);
107
+ r2[0] = (bignum256modm_element_t)(c & 0x3fffffff); c >>= 30;
108
+ c += mul32x32_64(modm_m[0], q3[1]) + mul32x32_64(modm_m[1], q3[0]);
109
+ r2[1] = (bignum256modm_element_t)(c & 0x3fffffff); c >>= 30;
110
+ c += mul32x32_64(modm_m[0], q3[2]) + mul32x32_64(modm_m[1], q3[1]) + mul32x32_64(modm_m[2], q3[0]);
111
+ r2[2] = (bignum256modm_element_t)(c & 0x3fffffff); c >>= 30;
112
+ c += mul32x32_64(modm_m[0], q3[3]) + mul32x32_64(modm_m[1], q3[2]) + mul32x32_64(modm_m[2], q3[1]) + mul32x32_64(modm_m[3], q3[0]);
113
+ r2[3] = (bignum256modm_element_t)(c & 0x3fffffff); c >>= 30;
114
+ c += mul32x32_64(modm_m[0], q3[4]) + mul32x32_64(modm_m[1], q3[3]) + mul32x32_64(modm_m[2], q3[2]) + mul32x32_64(modm_m[3], q3[1]) + mul32x32_64(modm_m[4], q3[0]);
115
+ r2[4] = (bignum256modm_element_t)(c & 0x3fffffff); c >>= 30;
116
+ c += mul32x32_64(modm_m[0], q3[5]) + mul32x32_64(modm_m[1], q3[4]) + mul32x32_64(modm_m[2], q3[3]) + mul32x32_64(modm_m[3], q3[2]) + mul32x32_64(modm_m[4], q3[1]) + mul32x32_64(modm_m[5], q3[0]);
117
+ r2[5] = (bignum256modm_element_t)(c & 0x3fffffff); c >>= 30;
118
+ c += mul32x32_64(modm_m[0], q3[6]) + mul32x32_64(modm_m[1], q3[5]) + mul32x32_64(modm_m[2], q3[4]) + mul32x32_64(modm_m[3], q3[3]) + mul32x32_64(modm_m[4], q3[2]) + mul32x32_64(modm_m[5], q3[1]) + mul32x32_64(modm_m[6], q3[0]);
119
+ r2[6] = (bignum256modm_element_t)(c & 0x3fffffff); c >>= 30;
120
+ c += mul32x32_64(modm_m[0], q3[7]) + mul32x32_64(modm_m[1], q3[6]) + mul32x32_64(modm_m[2], q3[5]) + mul32x32_64(modm_m[3], q3[4]) + mul32x32_64(modm_m[4], q3[3]) + mul32x32_64(modm_m[5], q3[2]) + mul32x32_64(modm_m[6], q3[1]) + mul32x32_64(modm_m[7], q3[0]);
121
+ r2[7] = (bignum256modm_element_t)(c & 0x3fffffff); c >>= 30;
122
+ c += mul32x32_64(modm_m[0], q3[8]) + mul32x32_64(modm_m[1], q3[7]) + mul32x32_64(modm_m[2], q3[6]) + mul32x32_64(modm_m[3], q3[5]) + mul32x32_64(modm_m[4], q3[4]) + mul32x32_64(modm_m[5], q3[3]) + mul32x32_64(modm_m[6], q3[2]) + mul32x32_64(modm_m[7], q3[1]) + mul32x32_64(modm_m[8], q3[0]);
123
+ r2[8] = (bignum256modm_element_t)(c & 0xffffff);
124
+
125
+ /* r = r1 - r2
126
+ if (r < 0) r += (1 << 264) */
127
+ pb = 0;
128
+ pb += r2[0]; b = lt_modm(r1[0], pb); r[0] = (r1[0] - pb + (b << 30)); pb = b;
129
+ pb += r2[1]; b = lt_modm(r1[1], pb); r[1] = (r1[1] - pb + (b << 30)); pb = b;
130
+ pb += r2[2]; b = lt_modm(r1[2], pb); r[2] = (r1[2] - pb + (b << 30)); pb = b;
131
+ pb += r2[3]; b = lt_modm(r1[3], pb); r[3] = (r1[3] - pb + (b << 30)); pb = b;
132
+ pb += r2[4]; b = lt_modm(r1[4], pb); r[4] = (r1[4] - pb + (b << 30)); pb = b;
133
+ pb += r2[5]; b = lt_modm(r1[5], pb); r[5] = (r1[5] - pb + (b << 30)); pb = b;
134
+ pb += r2[6]; b = lt_modm(r1[6], pb); r[6] = (r1[6] - pb + (b << 30)); pb = b;
135
+ pb += r2[7]; b = lt_modm(r1[7], pb); r[7] = (r1[7] - pb + (b << 30)); pb = b;
136
+ pb += r2[8]; b = lt_modm(r1[8], pb); r[8] = (r1[8] - pb + (b << 24));
137
+
138
+ reduce256_modm(r);
139
+ reduce256_modm(r);
140
+ }
141
+
142
+ /* addition modulo m */
143
+ static void
144
+ add256_modm(bignum256modm r, const bignum256modm x, const bignum256modm y) {
145
+ bignum256modm_element_t c;
146
+
147
+ c = x[0] + y[0]; r[0] = c & 0x3fffffff; c >>= 30;
148
+ c += x[1] + y[1]; r[1] = c & 0x3fffffff; c >>= 30;
149
+ c += x[2] + y[2]; r[2] = c & 0x3fffffff; c >>= 30;
150
+ c += x[3] + y[3]; r[3] = c & 0x3fffffff; c >>= 30;
151
+ c += x[4] + y[4]; r[4] = c & 0x3fffffff; c >>= 30;
152
+ c += x[5] + y[5]; r[5] = c & 0x3fffffff; c >>= 30;
153
+ c += x[6] + y[6]; r[6] = c & 0x3fffffff; c >>= 30;
154
+ c += x[7] + y[7]; r[7] = c & 0x3fffffff; c >>= 30;
155
+ c += x[8] + y[8]; r[8] = c;
156
+
157
+ reduce256_modm(r);
158
+ }
159
+
160
+ /* multiplication modulo m */
161
+ static void
162
+ mul256_modm(bignum256modm r, const bignum256modm x, const bignum256modm y) {
163
+ bignum256modm r1, q1;
164
+ uint64_t c;
165
+ bignum256modm_element_t f;
166
+
167
+ /* r1 = (x mod 256^(32+1)) = x mod (2^8)(31+1) = x & ((1 << 264) - 1)
168
+ q1 = x >> 248 = 264 bits = 9 30 bit elements */
169
+ c = mul32x32_64(x[0], y[0]);
170
+ f = (bignum256modm_element_t)c; r1[0] = (f & 0x3fffffff); c >>= 30;
171
+ c += mul32x32_64(x[0], y[1]) + mul32x32_64(x[1], y[0]);
172
+ f = (bignum256modm_element_t)c; r1[1] = (f & 0x3fffffff); c >>= 30;
173
+ c += mul32x32_64(x[0], y[2]) + mul32x32_64(x[1], y[1]) + mul32x32_64(x[2], y[0]);
174
+ f = (bignum256modm_element_t)c; r1[2] = (f & 0x3fffffff); c >>= 30;
175
+ c += mul32x32_64(x[0], y[3]) + mul32x32_64(x[1], y[2]) + mul32x32_64(x[2], y[1]) + mul32x32_64(x[3], y[0]);
176
+ f = (bignum256modm_element_t)c; r1[3] = (f & 0x3fffffff); c >>= 30;
177
+ c += mul32x32_64(x[0], y[4]) + mul32x32_64(x[1], y[3]) + mul32x32_64(x[2], y[2]) + mul32x32_64(x[3], y[1]) + mul32x32_64(x[4], y[0]);
178
+ f = (bignum256modm_element_t)c; r1[4] = (f & 0x3fffffff); c >>= 30;
179
+ c += mul32x32_64(x[0], y[5]) + mul32x32_64(x[1], y[4]) + mul32x32_64(x[2], y[3]) + mul32x32_64(x[3], y[2]) + mul32x32_64(x[4], y[1]) + mul32x32_64(x[5], y[0]);
180
+ f = (bignum256modm_element_t)c; r1[5] = (f & 0x3fffffff); c >>= 30;
181
+ c += mul32x32_64(x[0], y[6]) + mul32x32_64(x[1], y[5]) + mul32x32_64(x[2], y[4]) + mul32x32_64(x[3], y[3]) + mul32x32_64(x[4], y[2]) + mul32x32_64(x[5], y[1]) + mul32x32_64(x[6], y[0]);
182
+ f = (bignum256modm_element_t)c; r1[6] = (f & 0x3fffffff); c >>= 30;
183
+ c += mul32x32_64(x[0], y[7]) + mul32x32_64(x[1], y[6]) + mul32x32_64(x[2], y[5]) + mul32x32_64(x[3], y[4]) + mul32x32_64(x[4], y[3]) + mul32x32_64(x[5], y[2]) + mul32x32_64(x[6], y[1]) + mul32x32_64(x[7], y[0]);
184
+ f = (bignum256modm_element_t)c; r1[7] = (f & 0x3fffffff); c >>= 30;
185
+ c += mul32x32_64(x[0], y[8]) + mul32x32_64(x[1], y[7]) + mul32x32_64(x[2], y[6]) + mul32x32_64(x[3], y[5]) + mul32x32_64(x[4], y[4]) + mul32x32_64(x[5], y[3]) + mul32x32_64(x[6], y[2]) + mul32x32_64(x[7], y[1]) + mul32x32_64(x[8], y[0]);
186
+ f = (bignum256modm_element_t)c; r1[8] = (f & 0x00ffffff); q1[0] = (f >> 8) & 0x3fffff; c >>= 30;
187
+ c += mul32x32_64(x[1], y[8]) + mul32x32_64(x[2], y[7]) + mul32x32_64(x[3], y[6]) + mul32x32_64(x[4], y[5]) + mul32x32_64(x[5], y[4]) + mul32x32_64(x[6], y[3]) + mul32x32_64(x[7], y[2]) + mul32x32_64(x[8], y[1]);
188
+ f = (bignum256modm_element_t)c; q1[0] = (q1[0] | (f << 22)) & 0x3fffffff; q1[1] = (f >> 8) & 0x3fffff; c >>= 30;
189
+ c += mul32x32_64(x[2], y[8]) + mul32x32_64(x[3], y[7]) + mul32x32_64(x[4], y[6]) + mul32x32_64(x[5], y[5]) + mul32x32_64(x[6], y[4]) + mul32x32_64(x[7], y[3]) + mul32x32_64(x[8], y[2]);
190
+ f = (bignum256modm_element_t)c; q1[1] = (q1[1] | (f << 22)) & 0x3fffffff; q1[2] = (f >> 8) & 0x3fffff; c >>= 30;
191
+ c += mul32x32_64(x[3], y[8]) + mul32x32_64(x[4], y[7]) + mul32x32_64(x[5], y[6]) + mul32x32_64(x[6], y[5]) + mul32x32_64(x[7], y[4]) + mul32x32_64(x[8], y[3]);
192
+ f = (bignum256modm_element_t)c; q1[2] = (q1[2] | (f << 22)) & 0x3fffffff; q1[3] = (f >> 8) & 0x3fffff; c >>= 30;
193
+ c += mul32x32_64(x[4], y[8]) + mul32x32_64(x[5], y[7]) + mul32x32_64(x[6], y[6]) + mul32x32_64(x[7], y[5]) + mul32x32_64(x[8], y[4]);
194
+ f = (bignum256modm_element_t)c; q1[3] = (q1[3] | (f << 22)) & 0x3fffffff; q1[4] = (f >> 8) & 0x3fffff; c >>= 30;
195
+ c += mul32x32_64(x[5], y[8]) + mul32x32_64(x[6], y[7]) + mul32x32_64(x[7], y[6]) + mul32x32_64(x[8], y[5]);
196
+ f = (bignum256modm_element_t)c; q1[4] = (q1[4] | (f << 22)) & 0x3fffffff; q1[5] = (f >> 8) & 0x3fffff; c >>= 30;
197
+ c += mul32x32_64(x[6], y[8]) + mul32x32_64(x[7], y[7]) + mul32x32_64(x[8], y[6]);
198
+ f = (bignum256modm_element_t)c; q1[5] = (q1[5] | (f << 22)) & 0x3fffffff; q1[6] = (f >> 8) & 0x3fffff; c >>= 30;
199
+ c += mul32x32_64(x[7], y[8]) + mul32x32_64(x[8], y[7]);
200
+ f = (bignum256modm_element_t)c; q1[6] = (q1[6] | (f << 22)) & 0x3fffffff; q1[7] = (f >> 8) & 0x3fffff; c >>= 30;
201
+ c += mul32x32_64(x[8], y[8]);
202
+ f = (bignum256modm_element_t)c; q1[7] = (q1[7] | (f << 22)) & 0x3fffffff; q1[8] = (f >> 8) & 0x3fffff;
203
+
204
+ barrett_reduce256_modm(r, q1, r1);
205
+ }
206
+
207
+ static void
208
+ expand256_modm(bignum256modm out, const unsigned char *in, size_t len) {
209
+ unsigned char work[64] = {0};
210
+ bignum256modm_element_t x[16];
211
+ bignum256modm q1;
212
+
213
+ memcpy(work, in, len);
214
+ x[0] = U8TO32_LE(work + 0);
215
+ x[1] = U8TO32_LE(work + 4);
216
+ x[2] = U8TO32_LE(work + 8);
217
+ x[3] = U8TO32_LE(work + 12);
218
+ x[4] = U8TO32_LE(work + 16);
219
+ x[5] = U8TO32_LE(work + 20);
220
+ x[6] = U8TO32_LE(work + 24);
221
+ x[7] = U8TO32_LE(work + 28);
222
+ x[8] = U8TO32_LE(work + 32);
223
+ x[9] = U8TO32_LE(work + 36);
224
+ x[10] = U8TO32_LE(work + 40);
225
+ x[11] = U8TO32_LE(work + 44);
226
+ x[12] = U8TO32_LE(work + 48);
227
+ x[13] = U8TO32_LE(work + 52);
228
+ x[14] = U8TO32_LE(work + 56);
229
+ x[15] = U8TO32_LE(work + 60);
230
+
231
+ /* r1 = (x mod 256^(32+1)) = x mod (2^8)(31+1) = x & ((1 << 264) - 1) */
232
+ out[0] = ( x[0]) & 0x3fffffff;
233
+ out[1] = ((x[ 0] >> 30) | (x[ 1] << 2)) & 0x3fffffff;
234
+ out[2] = ((x[ 1] >> 28) | (x[ 2] << 4)) & 0x3fffffff;
235
+ out[3] = ((x[ 2] >> 26) | (x[ 3] << 6)) & 0x3fffffff;
236
+ out[4] = ((x[ 3] >> 24) | (x[ 4] << 8)) & 0x3fffffff;
237
+ out[5] = ((x[ 4] >> 22) | (x[ 5] << 10)) & 0x3fffffff;
238
+ out[6] = ((x[ 5] >> 20) | (x[ 6] << 12)) & 0x3fffffff;
239
+ out[7] = ((x[ 6] >> 18) | (x[ 7] << 14)) & 0x3fffffff;
240
+ out[8] = ((x[ 7] >> 16) | (x[ 8] << 16)) & 0x00ffffff;
241
+
242
+ /* 8*31 = 248 bits, no need to reduce */
243
+ if (len < 32)
244
+ return;
245
+
246
+ /* q1 = x >> 248 = 264 bits = 9 30 bit elements */
247
+ q1[0] = ((x[ 7] >> 24) | (x[ 8] << 8)) & 0x3fffffff;
248
+ q1[1] = ((x[ 8] >> 22) | (x[ 9] << 10)) & 0x3fffffff;
249
+ q1[2] = ((x[ 9] >> 20) | (x[10] << 12)) & 0x3fffffff;
250
+ q1[3] = ((x[10] >> 18) | (x[11] << 14)) & 0x3fffffff;
251
+ q1[4] = ((x[11] >> 16) | (x[12] << 16)) & 0x3fffffff;
252
+ q1[5] = ((x[12] >> 14) | (x[13] << 18)) & 0x3fffffff;
253
+ q1[6] = ((x[13] >> 12) | (x[14] << 20)) & 0x3fffffff;
254
+ q1[7] = ((x[14] >> 10) | (x[15] << 22)) & 0x3fffffff;
255
+ q1[8] = ((x[15] >> 8) );
256
+
257
+ barrett_reduce256_modm(out, q1, out);
258
+ }
259
+
260
+ static void
261
+ expand_raw256_modm(bignum256modm out, const unsigned char in[32]) {
262
+ bignum256modm_element_t x[8];
263
+
264
+ x[0] = U8TO32_LE(in + 0);
265
+ x[1] = U8TO32_LE(in + 4);
266
+ x[2] = U8TO32_LE(in + 8);
267
+ x[3] = U8TO32_LE(in + 12);
268
+ x[4] = U8TO32_LE(in + 16);
269
+ x[5] = U8TO32_LE(in + 20);
270
+ x[6] = U8TO32_LE(in + 24);
271
+ x[7] = U8TO32_LE(in + 28);
272
+
273
+ out[0] = ( x[0]) & 0x3fffffff;
274
+ out[1] = ((x[ 0] >> 30) | (x[ 1] << 2)) & 0x3fffffff;
275
+ out[2] = ((x[ 1] >> 28) | (x[ 2] << 4)) & 0x3fffffff;
276
+ out[3] = ((x[ 2] >> 26) | (x[ 3] << 6)) & 0x3fffffff;
277
+ out[4] = ((x[ 3] >> 24) | (x[ 4] << 8)) & 0x3fffffff;
278
+ out[5] = ((x[ 4] >> 22) | (x[ 5] << 10)) & 0x3fffffff;
279
+ out[6] = ((x[ 5] >> 20) | (x[ 6] << 12)) & 0x3fffffff;
280
+ out[7] = ((x[ 6] >> 18) | (x[ 7] << 14)) & 0x3fffffff;
281
+ out[8] = ((x[ 7] >> 16) ) & 0x0000ffff;
282
+ }
283
+
284
+ static void
285
+ contract256_modm(unsigned char out[32], const bignum256modm in) {
286
+ U32TO8_LE(out + 0, (in[0] ) | (in[1] << 30));
287
+ U32TO8_LE(out + 4, (in[1] >> 2) | (in[2] << 28));
288
+ U32TO8_LE(out + 8, (in[2] >> 4) | (in[3] << 26));
289
+ U32TO8_LE(out + 12, (in[3] >> 6) | (in[4] << 24));
290
+ U32TO8_LE(out + 16, (in[4] >> 8) | (in[5] << 22));
291
+ U32TO8_LE(out + 20, (in[5] >> 10) | (in[6] << 20));
292
+ U32TO8_LE(out + 24, (in[6] >> 12) | (in[7] << 18));
293
+ U32TO8_LE(out + 28, (in[7] >> 14) | (in[8] << 16));
294
+ }
295
+
296
+
297
+
298
+ static void
299
+ contract256_window4_modm(signed char r[64], const bignum256modm in) {
300
+ char carry;
301
+ signed char *quads = r;
302
+ bignum256modm_element_t i, j, v;
303
+
304
+ for (i = 0; i < 8; i += 2) {
305
+ v = in[i];
306
+ for (j = 0; j < 7; j++) {
307
+ *quads++ = (v & 15);
308
+ v >>= 4;
309
+ }
310
+ v |= (in[i+1] << 2);
311
+ for (j = 0; j < 8; j++) {
312
+ *quads++ = (v & 15);
313
+ v >>= 4;
314
+ }
315
+ }
316
+ v = in[8];
317
+ *quads++ = (v & 15); v >>= 4;
318
+ *quads++ = (v & 15); v >>= 4;
319
+ *quads++ = (v & 15); v >>= 4;
320
+ *quads++ = (v & 15); v >>= 4;
321
+
322
+ /* making it signed */
323
+ carry = 0;
324
+ for(i = 0; i < 63; i++) {
325
+ r[i] += carry;
326
+ r[i+1] += (r[i] >> 4);
327
+ r[i] &= 15;
328
+ carry = (r[i] >> 3);
329
+ r[i] -= (carry << 4);
330
+ }
331
+ r[63] += carry;
332
+ }
333
+
334
+ static void
335
+ contract256_slidingwindow_modm(signed char r[256], const bignum256modm s, int windowsize) {
336
+ int i,j,k,b;
337
+ int m = (1 << (windowsize - 1)) - 1, soplen = 256;
338
+ signed char *bits = r;
339
+ bignum256modm_element_t v;
340
+
341
+ /* first put the binary expansion into r */
342
+ for (i = 0; i < 8; i++) {
343
+ v = s[i];
344
+ for (j = 0; j < 30; j++, v >>= 1)
345
+ *bits++ = (v & 1);
346
+ }
347
+ v = s[8];
348
+ for (j = 0; j < 16; j++, v >>= 1)
349
+ *bits++ = (v & 1);
350
+
351
+ /* Making it sliding window */
352
+ for (j = 0; j < soplen; j++) {
353
+ if (!r[j])
354
+ continue;
355
+
356
+ for (b = 1; (b < (soplen - j)) && (b <= 6); b++) {
357
+ if ((r[j] + (r[j + b] << b)) <= m) {
358
+ r[j] += r[j + b] << b;
359
+ r[j + b] = 0;
360
+ } else if ((r[j] - (r[j + b] << b)) >= -m) {
361
+ r[j] -= r[j + b] << b;
362
+ for (k = j + b; k < soplen; k++) {
363
+ if (!r[k]) {
364
+ r[k] = 1;
365
+ break;
366
+ }
367
+ r[k] = 0;
368
+ }
369
+ } else if (r[j + b]) {
370
+ break;
371
+ }
372
+ }
373
+ }
374
+ }
375
+
376
+
377
+ /*
378
+ helpers for batch verifcation, are allowed to be vartime
379
+ */
380
+
381
+ /* out = a - b, a must be larger than b */
382
+ static void
383
+ sub256_modm_batch(bignum256modm out, const bignum256modm a, const bignum256modm b, size_t limbsize) {
384
+ size_t i = 0;
385
+ bignum256modm_element_t carry = 0;
386
+ switch (limbsize) {
387
+ case 8: out[i] = (a[i] - b[i]) - carry; carry = (out[i] >> 31); out[i] &= 0x3fffffff; i++;
388
+ case 7: out[i] = (a[i] - b[i]) - carry; carry = (out[i] >> 31); out[i] &= 0x3fffffff; i++;
389
+ case 6: out[i] = (a[i] - b[i]) - carry; carry = (out[i] >> 31); out[i] &= 0x3fffffff; i++;
390
+ case 5: out[i] = (a[i] - b[i]) - carry; carry = (out[i] >> 31); out[i] &= 0x3fffffff; i++;
391
+ case 4: out[i] = (a[i] - b[i]) - carry; carry = (out[i] >> 31); out[i] &= 0x3fffffff; i++;
392
+ case 3: out[i] = (a[i] - b[i]) - carry; carry = (out[i] >> 31); out[i] &= 0x3fffffff; i++;
393
+ case 2: out[i] = (a[i] - b[i]) - carry; carry = (out[i] >> 31); out[i] &= 0x3fffffff; i++;
394
+ case 1: out[i] = (a[i] - b[i]) - carry; carry = (out[i] >> 31); out[i] &= 0x3fffffff; i++;
395
+ case 0:
396
+ default: out[i] = (a[i] - b[i]) - carry;
397
+ }
398
+ }
399
+
400
+
401
+ /* is a < b */
402
+ static int
403
+ lt256_modm_batch(const bignum256modm a, const bignum256modm b, size_t limbsize) {
404
+ switch (limbsize) {
405
+ case 8: if (a[8] > b[8]) return 0; if (a[8] < b[8]) return 1;
406
+ case 7: if (a[7] > b[7]) return 0; if (a[7] < b[7]) return 1;
407
+ case 6: if (a[6] > b[6]) return 0; if (a[6] < b[6]) return 1;
408
+ case 5: if (a[5] > b[5]) return 0; if (a[5] < b[5]) return 1;
409
+ case 4: if (a[4] > b[4]) return 0; if (a[4] < b[4]) return 1;
410
+ case 3: if (a[3] > b[3]) return 0; if (a[3] < b[3]) return 1;
411
+ case 2: if (a[2] > b[2]) return 0; if (a[2] < b[2]) return 1;
412
+ case 1: if (a[1] > b[1]) return 0; if (a[1] < b[1]) return 1;
413
+ case 0: if (a[0] > b[0]) return 0; if (a[0] < b[0]) return 1;
414
+ }
415
+ return 0;
416
+ }
417
+
418
+ /* is a <= b */
419
+ static int
420
+ lte256_modm_batch(const bignum256modm a, const bignum256modm b, size_t limbsize) {
421
+ switch (limbsize) {
422
+ case 8: if (a[8] > b[8]) return 0; if (a[8] < b[8]) return 1;
423
+ case 7: if (a[7] > b[7]) return 0; if (a[7] < b[7]) return 1;
424
+ case 6: if (a[6] > b[6]) return 0; if (a[6] < b[6]) return 1;
425
+ case 5: if (a[5] > b[5]) return 0; if (a[5] < b[5]) return 1;
426
+ case 4: if (a[4] > b[4]) return 0; if (a[4] < b[4]) return 1;
427
+ case 3: if (a[3] > b[3]) return 0; if (a[3] < b[3]) return 1;
428
+ case 2: if (a[2] > b[2]) return 0; if (a[2] < b[2]) return 1;
429
+ case 1: if (a[1] > b[1]) return 0; if (a[1] < b[1]) return 1;
430
+ case 0: if (a[0] > b[0]) return 0; if (a[0] < b[0]) return 1;
431
+ }
432
+ return 1;
433
+ }
434
+
435
+
436
+ /* is a == 0 */
437
+ static int
438
+ iszero256_modm_batch(const bignum256modm a) {
439
+ size_t i;
440
+ for (i = 0; i < 9; i++)
441
+ if (a[i])
442
+ return 0;
443
+ return 1;
444
+ }
445
+
446
+ /* is a == 1 */
447
+ static int
448
+ isone256_modm_batch(const bignum256modm a) {
449
+ size_t i;
450
+ if (a[0] != 1)
451
+ return 0;
452
+ for (i = 1; i < 9; i++)
453
+ if (a[i])
454
+ return 0;
455
+ return 1;
456
+ }
457
+
458
+ /* can a fit in to (at most) 128 bits */
459
+ static int
460
+ isatmost128bits256_modm_batch(const bignum256modm a) {
461
+ uint32_t mask =
462
+ ((a[8] ) | /* 16 */
463
+ (a[7] ) | /* 46 */
464
+ (a[6] ) | /* 76 */
465
+ (a[5] ) | /* 106 */
466
+ (a[4] & 0x3fffff00)); /* 128 */
467
+
468
+ return (mask == 0);
469
+ }