nanocurrency 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +11 -0
  3. data/.rspec +3 -0
  4. data/.travis.yml +7 -0
  5. data/CODE_OF_CONDUCT.md +74 -0
  6. data/Gemfile +6 -0
  7. data/Gemfile.lock +40 -0
  8. data/LICENSE.txt +21 -0
  9. data/README.md +43 -0
  10. data/Rakefile +16 -0
  11. data/bin/console +14 -0
  12. data/bin/setup +8 -0
  13. data/ext/.DS_Store +0 -0
  14. data/ext/nanocurrency_ext/blake2-config.h +72 -0
  15. data/ext/nanocurrency_ext/blake2-impl.h +160 -0
  16. data/ext/nanocurrency_ext/blake2.h +195 -0
  17. data/ext/nanocurrency_ext/blake2b-load-sse2.h +68 -0
  18. data/ext/nanocurrency_ext/blake2b-load-sse41.h +402 -0
  19. data/ext/nanocurrency_ext/blake2b-ref.c +373 -0
  20. data/ext/nanocurrency_ext/blake2b-round.h +157 -0
  21. data/ext/nanocurrency_ext/curve25519-donna-32bit.h +579 -0
  22. data/ext/nanocurrency_ext/curve25519-donna-64bit.h +413 -0
  23. data/ext/nanocurrency_ext/curve25519-donna-helpers.h +67 -0
  24. data/ext/nanocurrency_ext/curve25519-donna-sse2.h +1112 -0
  25. data/ext/nanocurrency_ext/ed25519-donna-32bit-sse2.h +513 -0
  26. data/ext/nanocurrency_ext/ed25519-donna-32bit-tables.h +61 -0
  27. data/ext/nanocurrency_ext/ed25519-donna-64bit-sse2.h +436 -0
  28. data/ext/nanocurrency_ext/ed25519-donna-64bit-tables.h +53 -0
  29. data/ext/nanocurrency_ext/ed25519-donna-64bit-x86-32bit.h +435 -0
  30. data/ext/nanocurrency_ext/ed25519-donna-64bit-x86.h +351 -0
  31. data/ext/nanocurrency_ext/ed25519-donna-basepoint-table.h +259 -0
  32. data/ext/nanocurrency_ext/ed25519-donna-batchverify.h +275 -0
  33. data/ext/nanocurrency_ext/ed25519-donna-impl-base.h +364 -0
  34. data/ext/nanocurrency_ext/ed25519-donna-impl-sse2.h +390 -0
  35. data/ext/nanocurrency_ext/ed25519-donna-portable-identify.h +103 -0
  36. data/ext/nanocurrency_ext/ed25519-donna-portable.h +135 -0
  37. data/ext/nanocurrency_ext/ed25519-donna.h +115 -0
  38. data/ext/nanocurrency_ext/ed25519-hash-custom.c +28 -0
  39. data/ext/nanocurrency_ext/ed25519-hash-custom.h +30 -0
  40. data/ext/nanocurrency_ext/ed25519-hash.h +219 -0
  41. data/ext/nanocurrency_ext/ed25519-randombytes-custom.h +10 -0
  42. data/ext/nanocurrency_ext/ed25519-randombytes.h +91 -0
  43. data/ext/nanocurrency_ext/ed25519.c +150 -0
  44. data/ext/nanocurrency_ext/ed25519.h +30 -0
  45. data/ext/nanocurrency_ext/extconf.rb +3 -0
  46. data/ext/nanocurrency_ext/fuzz/README.md +173 -0
  47. data/ext/nanocurrency_ext/fuzz/build-nix.php +134 -0
  48. data/ext/nanocurrency_ext/fuzz/curve25519-ref10.c +1272 -0
  49. data/ext/nanocurrency_ext/fuzz/curve25519-ref10.h +8 -0
  50. data/ext/nanocurrency_ext/fuzz/ed25519-donna-sse2.c +3 -0
  51. data/ext/nanocurrency_ext/fuzz/ed25519-donna.c +1 -0
  52. data/ext/nanocurrency_ext/fuzz/ed25519-donna.h +34 -0
  53. data/ext/nanocurrency_ext/fuzz/ed25519-ref10.c +4647 -0
  54. data/ext/nanocurrency_ext/fuzz/ed25519-ref10.h +9 -0
  55. data/ext/nanocurrency_ext/fuzz/fuzz-curve25519.c +172 -0
  56. data/ext/nanocurrency_ext/fuzz/fuzz-ed25519.c +219 -0
  57. data/ext/nanocurrency_ext/modm-donna-32bit.h +469 -0
  58. data/ext/nanocurrency_ext/modm-donna-64bit.h +361 -0
  59. data/ext/nanocurrency_ext/rbext.c +164 -0
  60. data/ext/nanocurrency_ext/regression.h +1024 -0
  61. data/lib/nano/account.rb +59 -0
  62. data/lib/nano/base32.rb +87 -0
  63. data/lib/nano/block.rb +142 -0
  64. data/lib/nano/check.rb +65 -0
  65. data/lib/nano/conversion.rb +102 -0
  66. data/lib/nano/hash.rb +43 -0
  67. data/lib/nano/key.rb +69 -0
  68. data/lib/nano/utils.rb +45 -0
  69. data/lib/nano/work.rb +51 -0
  70. data/lib/nanocurrency.rb +7 -0
  71. data/lib/nanocurrency/version.rb +3 -0
  72. data/lib/nanocurrency_ext.bundle +0 -0
  73. data/nanocurrency.gemspec +44 -0
  74. metadata +192 -0
@@ -0,0 +1,9 @@
1
+ #ifndef ED25519_REF10_H
2
+ #define ED25519_REF10_H
3
+
4
+ int crypto_sign_pk_ref10(unsigned char *pk,unsigned char *sk);
5
+ int crypto_sign_ref10(unsigned char *sm,unsigned long long *smlen,const unsigned char *m,unsigned long long mlen,const unsigned char *sk);
6
+ int crypto_sign_open_ref10(unsigned char *m,unsigned long long *mlen,const unsigned char *sm,unsigned long long smlen,const unsigned char *pk);
7
+
8
+ #endif /* ED25519_REF10_H */
9
+
@@ -0,0 +1,172 @@
1
+ #if defined(_WIN32)
2
+ #include <windows.h>
3
+ #include <wincrypt.h>
4
+ typedef unsigned int uint32_t;
5
+ typedef unsigned __int64 uint64_t;
6
+ #else
7
+ #include <stdint.h>
8
+ #endif
9
+
10
+ #include <string.h>
11
+ #include <stdio.h>
12
+
13
+ #include "ed25519-donna.h"
14
+ #include "curve25519-ref10.h"
15
+
16
+ static void
17
+ print_diff(const char *desc, const unsigned char *a, const unsigned char *b, size_t len) {
18
+ size_t p = 0;
19
+ unsigned char diff;
20
+ printf("%s diff:\n", desc);
21
+ while (len--) {
22
+ diff = *a++ ^ *b++;
23
+ if (!diff)
24
+ printf("____,");
25
+ else
26
+ printf("0x%02x,", diff);
27
+ if ((++p & 15) == 0)
28
+ printf("\n");
29
+ }
30
+ printf("\n\n");
31
+ }
32
+
33
+ static void
34
+ print_bytes(const char *desc, const unsigned char *bytes, size_t len) {
35
+ size_t p = 0;
36
+ printf("%s:\n", desc);
37
+ while (len--) {
38
+ printf("0x%02x,", *bytes++);
39
+ if ((++p & 15) == 0)
40
+ printf("\n");
41
+ }
42
+ printf("\n\n");
43
+ }
44
+
45
+
46
+ /* chacha20/12 prng */
47
+ void
48
+ prng(unsigned char *out, size_t bytes) {
49
+ static uint32_t state[16];
50
+ static int init = 0;
51
+ uint32_t x[16], t;
52
+ size_t i;
53
+
54
+ if (!init) {
55
+ #if defined(_WIN32)
56
+ HCRYPTPROV csp;
57
+ if (!CryptAcquireContext(&csp, 0, 0, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT)) {
58
+ printf("CryptAcquireContext failed\n");
59
+ exit(1);
60
+ }
61
+ if (!CryptGenRandom(csp, (DWORD)sizeof(state), (BYTE*)state)) {
62
+ printf("CryptGenRandom failed\n");
63
+ exit(1);
64
+ }
65
+ CryptReleaseContext(csp, 0);
66
+ #else
67
+ FILE *f = NULL;
68
+ f = fopen("/dev/urandom", "rb");
69
+ if (!f) {
70
+ printf("failed to open /dev/urandom\n");
71
+ exit(1);
72
+ }
73
+ if (fread(state, sizeof(state), 1, f) != 1) {
74
+ printf("read error on /dev/urandom\n");
75
+ exit(1);
76
+ }
77
+ #endif
78
+ init = 1;
79
+ }
80
+
81
+ while (bytes) {
82
+ for (i = 0; i < 16; i++) x[i] = state[i];
83
+
84
+ #define rotl32(x,k) ((x << k) | (x >> (32 - k)))
85
+ #define quarter(a,b,c,d) \
86
+ x[a] += x[b]; t = x[d]^x[a]; x[d] = rotl32(t,16); \
87
+ x[c] += x[d]; t = x[b]^x[c]; x[b] = rotl32(t,12); \
88
+ x[a] += x[b]; t = x[d]^x[a]; x[d] = rotl32(t, 8); \
89
+ x[c] += x[d]; t = x[b]^x[c]; x[b] = rotl32(t, 7);
90
+
91
+ for (i = 0; i < 12; i += 2) {
92
+ quarter( 0, 4, 8,12)
93
+ quarter( 1, 5, 9,13)
94
+ quarter( 2, 6,10,14)
95
+ quarter( 3, 7,11,15)
96
+ quarter( 0, 5,10,15)
97
+ quarter( 1, 6,11,12)
98
+ quarter( 2, 7, 8,13)
99
+ quarter( 3, 4, 9,14)
100
+ };
101
+
102
+ if (bytes <= 64) {
103
+ memcpy(out, x, bytes);
104
+ bytes = 0;
105
+ } else {
106
+ memcpy(out, x, 64);
107
+ bytes -= 64;
108
+ out += 64;
109
+ }
110
+
111
+ /* don't need a nonce, so last 4 words are the counter. 2^136 bytes can be generated */
112
+ if (!++state[12]) if (!++state[13]) if (!++state[14]) ++state[15];
113
+ }
114
+ }
115
+
116
+
117
+
118
+ int main() {
119
+ const size_t skmax = 1024;
120
+ static unsigned char sk[1024][32];
121
+ unsigned char pk[3][32];
122
+ unsigned char *skp;
123
+ size_t ski, pki, i;
124
+ uint64_t ctr;
125
+
126
+ printf("fuzzing: ");
127
+ printf(" ref10");
128
+ printf(" curved25519");
129
+ #if defined(ED25519_SSE2)
130
+ printf(" curved25519-sse2");
131
+ #endif
132
+ printf("\n\n");
133
+
134
+ for (ctr = 0, ski = skmax;;ctr++) {
135
+ if (ski == skmax) {
136
+ prng((unsigned char *)sk, sizeof(sk));
137
+ ski = 0;
138
+ }
139
+ skp = sk[ski++];
140
+
141
+ pki = 0;
142
+ crypto_scalarmult_base_ref10(pk[pki++], skp);
143
+ curved25519_scalarmult_basepoint(pk[pki++], skp);
144
+ #if defined(ED25519_SSE2)
145
+ curved25519_scalarmult_basepoint_sse2(pk[pki++], skp);
146
+ #endif
147
+
148
+ for (i = 1; i < pki; i++) {
149
+ if (memcmp(pk[0], pk[i], 32) != 0) {
150
+ printf("\n\n");
151
+ print_bytes("sk", skp, 32);
152
+ print_bytes("ref10", pk[0], 32);
153
+ print_diff("curved25519", pk[0], pk[1], 32);
154
+ #if defined(ED25519_SSE2)
155
+ print_diff("curved25519-sse2", pk[0], pk[2], 32);
156
+ #endif
157
+ exit(1);
158
+ }
159
+ }
160
+
161
+ if (ctr && (ctr % 0x1000 == 0)) {
162
+ printf(".");
163
+ if ((ctr % 0x20000) == 0) {
164
+ printf(" [");
165
+ for (i = 0; i < 8; i++)
166
+ printf("%02x", (unsigned char)(ctr >> ((7 - i) * 8)));
167
+ printf("]\n");
168
+ }
169
+ }
170
+ }
171
+ }
172
+
@@ -0,0 +1,219 @@
1
+ #if defined(_WIN32)
2
+ #include <windows.h>
3
+ #include <wincrypt.h>
4
+ typedef unsigned int uint32_t;
5
+ #else
6
+ #include <stdint.h>
7
+ #endif
8
+
9
+ #include <string.h>
10
+ #include <stdio.h>
11
+
12
+ #include "ed25519-donna.h"
13
+ #include "ed25519-ref10.h"
14
+
15
+ static void
16
+ print_diff(const char *desc, const unsigned char *a, const unsigned char *b, size_t len) {
17
+ size_t p = 0;
18
+ unsigned char diff;
19
+ printf("%s diff:\n", desc);
20
+ while (len--) {
21
+ diff = *a++ ^ *b++;
22
+ if (!diff)
23
+ printf("____,");
24
+ else
25
+ printf("0x%02x,", diff);
26
+ if ((++p & 15) == 0)
27
+ printf("\n");
28
+ }
29
+ printf("\n");
30
+ }
31
+
32
+ static void
33
+ print_bytes(const char *desc, const unsigned char *bytes, size_t len) {
34
+ size_t p = 0;
35
+ printf("%s:\n", desc);
36
+ while (len--) {
37
+ printf("0x%02x,", *bytes++);
38
+ if ((++p & 15) == 0)
39
+ printf("\n");
40
+ }
41
+ printf("\n");
42
+ }
43
+
44
+
45
+ /* chacha20/12 prng */
46
+ void
47
+ prng(unsigned char *out, size_t bytes) {
48
+ static uint32_t state[16];
49
+ static int init = 0;
50
+ uint32_t x[16], t;
51
+ size_t i;
52
+
53
+ if (!init) {
54
+ #if defined(_WIN32)
55
+ HCRYPTPROV csp = NULL;
56
+ if (!CryptAcquireContext(&csp, 0, 0, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT)) {
57
+ printf("CryptAcquireContext failed\n");
58
+ exit(1);
59
+ }
60
+ if (!CryptGenRandom(csp, (DWORD)sizeof(state), (BYTE*)state)) {
61
+ printf("CryptGenRandom failed\n");
62
+ exit(1);
63
+ }
64
+ CryptReleaseContext(csp, 0);
65
+ #else
66
+ FILE *f = NULL;
67
+ f = fopen("/dev/urandom", "rb");
68
+ if (!f) {
69
+ printf("failed to open /dev/urandom\n");
70
+ exit(1);
71
+ }
72
+ if (fread(state, sizeof(state), 1, f) != 1) {
73
+ printf("read error on /dev/urandom\n");
74
+ exit(1);
75
+ }
76
+ #endif
77
+ init = 1;
78
+ }
79
+
80
+ while (bytes) {
81
+ for (i = 0; i < 16; i++) x[i] = state[i];
82
+
83
+ #define rotl32(x,k) ((x << k) | (x >> (32 - k)))
84
+ #define quarter(a,b,c,d) \
85
+ x[a] += x[b]; t = x[d]^x[a]; x[d] = rotl32(t,16); \
86
+ x[c] += x[d]; t = x[b]^x[c]; x[b] = rotl32(t,12); \
87
+ x[a] += x[b]; t = x[d]^x[a]; x[d] = rotl32(t, 8); \
88
+ x[c] += x[d]; t = x[b]^x[c]; x[b] = rotl32(t, 7);
89
+
90
+ for (i = 0; i < 12; i += 2) {
91
+ quarter( 0, 4, 8,12)
92
+ quarter( 1, 5, 9,13)
93
+ quarter( 2, 6,10,14)
94
+ quarter( 3, 7,11,15)
95
+ quarter( 0, 5,10,15)
96
+ quarter( 1, 6,11,12)
97
+ quarter( 2, 7, 8,13)
98
+ quarter( 3, 4, 9,14)
99
+ };
100
+
101
+ if (bytes <= 64) {
102
+ memcpy(out, x, bytes);
103
+ bytes = 0;
104
+ } else {
105
+ memcpy(out, x, 64);
106
+ bytes -= 64;
107
+ out += 64;
108
+ }
109
+
110
+ /* don't need a nonce, so last 4 words are the counter. 2^136 bytes can be generated */
111
+ if (!++state[12]) if (!++state[13]) if (!++state[14]) ++state[15];
112
+ }
113
+ }
114
+
115
+ typedef struct random_data_t {
116
+ unsigned char sk[32];
117
+ unsigned char m[128];
118
+ } random_data;
119
+
120
+ typedef struct generated_data_t {
121
+ unsigned char pk[32];
122
+ unsigned char sig[64];
123
+ int valid;
124
+ } generated_data;
125
+
126
+ static void
127
+ print_generated(const char *desc, generated_data *g) {
128
+ printf("%s:\n", desc);
129
+ print_bytes("pk", g->pk, 32);
130
+ print_bytes("sig", g->sig, 64);
131
+ printf("valid: %s\n\n", g->valid ? "no" : "yes");
132
+ }
133
+
134
+ static void
135
+ print_generated_diff(const char *desc, const generated_data *base, generated_data *g) {
136
+ printf("%s:\n", desc);
137
+ print_diff("pk", base->pk, g->pk, 32);
138
+ print_diff("sig", base->sig, g->sig, 64);
139
+ printf("valid: %s\n\n", (base->valid == g->valid) ? "___" : (g->valid ? "no" : "yes"));
140
+ }
141
+
142
+ int main() {
143
+ const size_t rndmax = 128;
144
+ static random_data rnd[128];
145
+ static generated_data gen[3];
146
+ random_data *r;
147
+ generated_data *g;
148
+ unsigned long long dummylen;
149
+ unsigned char dummysk[64];
150
+ unsigned char dummymsg[2][128+64];
151
+ size_t rndi, geni, i, j;
152
+ uint64_t ctr;
153
+
154
+ printf("fuzzing: ");
155
+ printf(" ref10");
156
+ printf(" ed25519-donna");
157
+ #if defined(ED25519_SSE2)
158
+ printf(" ed25519-donna-sse2");
159
+ #endif
160
+ printf("\n\n");
161
+
162
+ for (ctr = 0, rndi = rndmax;;ctr++) {
163
+ if (rndi == rndmax) {
164
+ prng((unsigned char *)rnd, sizeof(rnd));
165
+ rndi = 0;
166
+ }
167
+ r = &rnd[rndi++];
168
+
169
+ /* ref10, lots of horrible gymnastics to work around the wonky api */
170
+ geni = 0;
171
+ g = &gen[geni++];
172
+ memcpy(dummysk, r->sk, 32); /* pk is appended to the sk, need to copy the sk to a larger buffer */
173
+ crypto_sign_pk_ref10(dummysk + 32, dummysk);
174
+ memcpy(g->pk, dummysk + 32, 32);
175
+ crypto_sign_ref10(dummymsg[0], &dummylen, r->m, 128, dummysk);
176
+ memcpy(g->sig, dummymsg[0], 64); /* sig is placed in front of the signed message */
177
+ g->valid = crypto_sign_open_ref10(dummymsg[1], &dummylen, dummymsg[0], 128 + 64, g->pk);
178
+
179
+ /* ed25519-donna */
180
+ g = &gen[geni++];
181
+ ed25519_publickey(r->sk, g->pk);
182
+ ed25519_sign(r->m, 128, r->sk, g->pk, g->sig);
183
+ g->valid = ed25519_sign_open(r->m, 128, g->pk, g->sig);
184
+
185
+ #if defined(ED25519_SSE2)
186
+ /* ed25519-donna-sse2 */
187
+ g = &gen[geni++];
188
+ ed25519_publickey_sse2(r->sk, g->pk);
189
+ ed25519_sign_sse2(r->m, 128, r->sk, g->pk, g->sig);
190
+ g->valid = ed25519_sign_open_sse2(r->m, 128, g->pk, g->sig);
191
+ #endif
192
+
193
+ /* compare implementations 1..geni against the reference */
194
+ for (i = 1; i < geni; i++) {
195
+ if (memcmp(&gen[0], &gen[i], sizeof(generated_data)) != 0) {
196
+ printf("\n\n");
197
+ print_bytes("sk", r->sk, 32);
198
+ print_bytes("m", r->m, 128);
199
+ print_generated("ref10", &gen[0]);
200
+ print_generated_diff("ed25519-donna", &gen[0], &gen[1]);
201
+ #if defined(ED25519_SSE2)
202
+ print_generated_diff("ed25519-donna-sse2", &gen[0], &gen[2]);
203
+ #endif
204
+ exit(1);
205
+ }
206
+ }
207
+
208
+ /* print out status */
209
+ if (ctr && (ctr % 0x1000 == 0)) {
210
+ printf(".");
211
+ if ((ctr % 0x20000) == 0) {
212
+ printf(" [");
213
+ for (i = 0; i < 8; i++)
214
+ printf("%02x", (unsigned char)(ctr >> ((7 - i) * 8)));
215
+ printf("]\n");
216
+ }
217
+ }
218
+ }
219
+ }
@@ -0,0 +1,469 @@
1
+ /*
2
+ Public domain by Andrew M. <liquidsun@gmail.com>
3
+ */
4
+
5
+
6
+ /*
7
+ Arithmetic modulo the group order n = 2^252 + 27742317777372353535851937790883648493 = 7237005577332262213973186563042994240857116359379907606001950938285454250989
8
+
9
+ k = 32
10
+ b = 1 << 8 = 256
11
+ m = 2^252 + 27742317777372353535851937790883648493 = 0x1000000000000000000000000000000014def9dea2f79cd65812631a5cf5d3ed
12
+ mu = floor( b^(k*2) / m ) = 0xfffffffffffffffffffffffffffffffeb2106215d086329a7ed9ce5a30a2c131b
13
+ */
14
+
15
+ #define bignum256modm_bits_per_limb 30
16
+ #define bignum256modm_limb_size 9
17
+
18
+ typedef uint32_t bignum256modm_element_t;
19
+ typedef bignum256modm_element_t bignum256modm[9];
20
+
21
+ static const bignum256modm modm_m = {
22
+ 0x1cf5d3ed, 0x20498c69, 0x2f79cd65, 0x37be77a8,
23
+ 0x00000014, 0x00000000, 0x00000000, 0x00000000,
24
+ 0x00001000
25
+ };
26
+
27
+ static const bignum256modm modm_mu = {
28
+ 0x0a2c131b, 0x3673968c, 0x06329a7e, 0x01885742,
29
+ 0x3fffeb21, 0x3fffffff, 0x3fffffff, 0x3fffffff,
30
+ 0x000fffff
31
+ };
32
+
33
+ static bignum256modm_element_t
34
+ lt_modm(bignum256modm_element_t a, bignum256modm_element_t b) {
35
+ return (a - b) >> 31;
36
+ }
37
+
38
+ /* see HAC, Alg. 14.42 Step 4 */
39
+ static void
40
+ reduce256_modm(bignum256modm r) {
41
+ bignum256modm t;
42
+ bignum256modm_element_t b = 0, pb, mask;
43
+
44
+ /* t = r - m */
45
+ pb = 0;
46
+ pb += modm_m[0]; b = lt_modm(r[0], pb); t[0] = (r[0] - pb + (b << 30)); pb = b;
47
+ pb += modm_m[1]; b = lt_modm(r[1], pb); t[1] = (r[1] - pb + (b << 30)); pb = b;
48
+ pb += modm_m[2]; b = lt_modm(r[2], pb); t[2] = (r[2] - pb + (b << 30)); pb = b;
49
+ pb += modm_m[3]; b = lt_modm(r[3], pb); t[3] = (r[3] - pb + (b << 30)); pb = b;
50
+ pb += modm_m[4]; b = lt_modm(r[4], pb); t[4] = (r[4] - pb + (b << 30)); pb = b;
51
+ pb += modm_m[5]; b = lt_modm(r[5], pb); t[5] = (r[5] - pb + (b << 30)); pb = b;
52
+ pb += modm_m[6]; b = lt_modm(r[6], pb); t[6] = (r[6] - pb + (b << 30)); pb = b;
53
+ pb += modm_m[7]; b = lt_modm(r[7], pb); t[7] = (r[7] - pb + (b << 30)); pb = b;
54
+ pb += modm_m[8]; b = lt_modm(r[8], pb); t[8] = (r[8] - pb + (b << 16));
55
+
56
+ /* keep r if r was smaller than m */
57
+ mask = b - 1;
58
+ r[0] ^= mask & (r[0] ^ t[0]);
59
+ r[1] ^= mask & (r[1] ^ t[1]);
60
+ r[2] ^= mask & (r[2] ^ t[2]);
61
+ r[3] ^= mask & (r[3] ^ t[3]);
62
+ r[4] ^= mask & (r[4] ^ t[4]);
63
+ r[5] ^= mask & (r[5] ^ t[5]);
64
+ r[6] ^= mask & (r[6] ^ t[6]);
65
+ r[7] ^= mask & (r[7] ^ t[7]);
66
+ r[8] ^= mask & (r[8] ^ t[8]);
67
+ }
68
+
69
+ /*
70
+ Barrett reduction, see HAC, Alg. 14.42
71
+
72
+ Instead of passing in x, pre-process in to q1 and r1 for efficiency
73
+ */
74
+ static void
75
+ barrett_reduce256_modm(bignum256modm r, const bignum256modm q1, const bignum256modm r1) {
76
+ bignum256modm q3, r2;
77
+ uint64_t c;
78
+ bignum256modm_element_t f, b, pb;
79
+
80
+ /* q1 = x >> 248 = 264 bits = 9 30 bit elements
81
+ q2 = mu * q1
82
+ q3 = (q2 / 256(32+1)) = q2 / (2^8)^(32+1) = q2 >> 264 */
83
+ c = mul32x32_64(modm_mu[0], q1[7]) + mul32x32_64(modm_mu[1], q1[6]) + mul32x32_64(modm_mu[2], q1[5]) + mul32x32_64(modm_mu[3], q1[4]) + mul32x32_64(modm_mu[4], q1[3]) + mul32x32_64(modm_mu[5], q1[2]) + mul32x32_64(modm_mu[6], q1[1]) + mul32x32_64(modm_mu[7], q1[0]);
84
+ c >>= 30;
85
+ c += mul32x32_64(modm_mu[0], q1[8]) + mul32x32_64(modm_mu[1], q1[7]) + mul32x32_64(modm_mu[2], q1[6]) + mul32x32_64(modm_mu[3], q1[5]) + mul32x32_64(modm_mu[4], q1[4]) + mul32x32_64(modm_mu[5], q1[3]) + mul32x32_64(modm_mu[6], q1[2]) + mul32x32_64(modm_mu[7], q1[1]) + mul32x32_64(modm_mu[8], q1[0]);
86
+ f = (bignum256modm_element_t)c; q3[0] = (f >> 24) & 0x3f; c >>= 30;
87
+ c += mul32x32_64(modm_mu[1], q1[8]) + mul32x32_64(modm_mu[2], q1[7]) + mul32x32_64(modm_mu[3], q1[6]) + mul32x32_64(modm_mu[4], q1[5]) + mul32x32_64(modm_mu[5], q1[4]) + mul32x32_64(modm_mu[6], q1[3]) + mul32x32_64(modm_mu[7], q1[2]) + mul32x32_64(modm_mu[8], q1[1]);
88
+ f = (bignum256modm_element_t)c; q3[0] |= (f << 6) & 0x3fffffff; q3[1] = (f >> 24) & 0x3f; c >>= 30;
89
+ c += mul32x32_64(modm_mu[2], q1[8]) + mul32x32_64(modm_mu[3], q1[7]) + mul32x32_64(modm_mu[4], q1[6]) + mul32x32_64(modm_mu[5], q1[5]) + mul32x32_64(modm_mu[6], q1[4]) + mul32x32_64(modm_mu[7], q1[3]) + mul32x32_64(modm_mu[8], q1[2]);
90
+ f = (bignum256modm_element_t)c; q3[1] |= (f << 6) & 0x3fffffff; q3[2] = (f >> 24) & 0x3f; c >>= 30;
91
+ c += mul32x32_64(modm_mu[3], q1[8]) + mul32x32_64(modm_mu[4], q1[7]) + mul32x32_64(modm_mu[5], q1[6]) + mul32x32_64(modm_mu[6], q1[5]) + mul32x32_64(modm_mu[7], q1[4]) + mul32x32_64(modm_mu[8], q1[3]);
92
+ f = (bignum256modm_element_t)c; q3[2] |= (f << 6) & 0x3fffffff; q3[3] = (f >> 24) & 0x3f; c >>= 30;
93
+ c += mul32x32_64(modm_mu[4], q1[8]) + mul32x32_64(modm_mu[5], q1[7]) + mul32x32_64(modm_mu[6], q1[6]) + mul32x32_64(modm_mu[7], q1[5]) + mul32x32_64(modm_mu[8], q1[4]);
94
+ f = (bignum256modm_element_t)c; q3[3] |= (f << 6) & 0x3fffffff; q3[4] = (f >> 24) & 0x3f; c >>= 30;
95
+ c += mul32x32_64(modm_mu[5], q1[8]) + mul32x32_64(modm_mu[6], q1[7]) + mul32x32_64(modm_mu[7], q1[6]) + mul32x32_64(modm_mu[8], q1[5]);
96
+ f = (bignum256modm_element_t)c; q3[4] |= (f << 6) & 0x3fffffff; q3[5] = (f >> 24) & 0x3f; c >>= 30;
97
+ c += mul32x32_64(modm_mu[6], q1[8]) + mul32x32_64(modm_mu[7], q1[7]) + mul32x32_64(modm_mu[8], q1[6]);
98
+ f = (bignum256modm_element_t)c; q3[5] |= (f << 6) & 0x3fffffff; q3[6] = (f >> 24) & 0x3f; c >>= 30;
99
+ c += mul32x32_64(modm_mu[7], q1[8]) + mul32x32_64(modm_mu[8], q1[7]);
100
+ f = (bignum256modm_element_t)c; q3[6] |= (f << 6) & 0x3fffffff; q3[7] = (f >> 24) & 0x3f; c >>= 30;
101
+ c += mul32x32_64(modm_mu[8], q1[8]);
102
+ f = (bignum256modm_element_t)c; q3[7] |= (f << 6) & 0x3fffffff; q3[8] = (bignum256modm_element_t)(c >> 24);
103
+
104
+ /* r1 = (x mod 256^(32+1)) = x mod (2^8)(31+1) = x & ((1 << 264) - 1)
105
+ r2 = (q3 * m) mod (256^(32+1)) = (q3 * m) & ((1 << 264) - 1) */
106
+ c = mul32x32_64(modm_m[0], q3[0]);
107
+ r2[0] = (bignum256modm_element_t)(c & 0x3fffffff); c >>= 30;
108
+ c += mul32x32_64(modm_m[0], q3[1]) + mul32x32_64(modm_m[1], q3[0]);
109
+ r2[1] = (bignum256modm_element_t)(c & 0x3fffffff); c >>= 30;
110
+ c += mul32x32_64(modm_m[0], q3[2]) + mul32x32_64(modm_m[1], q3[1]) + mul32x32_64(modm_m[2], q3[0]);
111
+ r2[2] = (bignum256modm_element_t)(c & 0x3fffffff); c >>= 30;
112
+ c += mul32x32_64(modm_m[0], q3[3]) + mul32x32_64(modm_m[1], q3[2]) + mul32x32_64(modm_m[2], q3[1]) + mul32x32_64(modm_m[3], q3[0]);
113
+ r2[3] = (bignum256modm_element_t)(c & 0x3fffffff); c >>= 30;
114
+ c += mul32x32_64(modm_m[0], q3[4]) + mul32x32_64(modm_m[1], q3[3]) + mul32x32_64(modm_m[2], q3[2]) + mul32x32_64(modm_m[3], q3[1]) + mul32x32_64(modm_m[4], q3[0]);
115
+ r2[4] = (bignum256modm_element_t)(c & 0x3fffffff); c >>= 30;
116
+ c += mul32x32_64(modm_m[0], q3[5]) + mul32x32_64(modm_m[1], q3[4]) + mul32x32_64(modm_m[2], q3[3]) + mul32x32_64(modm_m[3], q3[2]) + mul32x32_64(modm_m[4], q3[1]) + mul32x32_64(modm_m[5], q3[0]);
117
+ r2[5] = (bignum256modm_element_t)(c & 0x3fffffff); c >>= 30;
118
+ c += mul32x32_64(modm_m[0], q3[6]) + mul32x32_64(modm_m[1], q3[5]) + mul32x32_64(modm_m[2], q3[4]) + mul32x32_64(modm_m[3], q3[3]) + mul32x32_64(modm_m[4], q3[2]) + mul32x32_64(modm_m[5], q3[1]) + mul32x32_64(modm_m[6], q3[0]);
119
+ r2[6] = (bignum256modm_element_t)(c & 0x3fffffff); c >>= 30;
120
+ c += mul32x32_64(modm_m[0], q3[7]) + mul32x32_64(modm_m[1], q3[6]) + mul32x32_64(modm_m[2], q3[5]) + mul32x32_64(modm_m[3], q3[4]) + mul32x32_64(modm_m[4], q3[3]) + mul32x32_64(modm_m[5], q3[2]) + mul32x32_64(modm_m[6], q3[1]) + mul32x32_64(modm_m[7], q3[0]);
121
+ r2[7] = (bignum256modm_element_t)(c & 0x3fffffff); c >>= 30;
122
+ c += mul32x32_64(modm_m[0], q3[8]) + mul32x32_64(modm_m[1], q3[7]) + mul32x32_64(modm_m[2], q3[6]) + mul32x32_64(modm_m[3], q3[5]) + mul32x32_64(modm_m[4], q3[4]) + mul32x32_64(modm_m[5], q3[3]) + mul32x32_64(modm_m[6], q3[2]) + mul32x32_64(modm_m[7], q3[1]) + mul32x32_64(modm_m[8], q3[0]);
123
+ r2[8] = (bignum256modm_element_t)(c & 0xffffff);
124
+
125
+ /* r = r1 - r2
126
+ if (r < 0) r += (1 << 264) */
127
+ pb = 0;
128
+ pb += r2[0]; b = lt_modm(r1[0], pb); r[0] = (r1[0] - pb + (b << 30)); pb = b;
129
+ pb += r2[1]; b = lt_modm(r1[1], pb); r[1] = (r1[1] - pb + (b << 30)); pb = b;
130
+ pb += r2[2]; b = lt_modm(r1[2], pb); r[2] = (r1[2] - pb + (b << 30)); pb = b;
131
+ pb += r2[3]; b = lt_modm(r1[3], pb); r[3] = (r1[3] - pb + (b << 30)); pb = b;
132
+ pb += r2[4]; b = lt_modm(r1[4], pb); r[4] = (r1[4] - pb + (b << 30)); pb = b;
133
+ pb += r2[5]; b = lt_modm(r1[5], pb); r[5] = (r1[5] - pb + (b << 30)); pb = b;
134
+ pb += r2[6]; b = lt_modm(r1[6], pb); r[6] = (r1[6] - pb + (b << 30)); pb = b;
135
+ pb += r2[7]; b = lt_modm(r1[7], pb); r[7] = (r1[7] - pb + (b << 30)); pb = b;
136
+ pb += r2[8]; b = lt_modm(r1[8], pb); r[8] = (r1[8] - pb + (b << 24));
137
+
138
+ reduce256_modm(r);
139
+ reduce256_modm(r);
140
+ }
141
+
142
+ /* addition modulo m */
143
+ static void
144
+ add256_modm(bignum256modm r, const bignum256modm x, const bignum256modm y) {
145
+ bignum256modm_element_t c;
146
+
147
+ c = x[0] + y[0]; r[0] = c & 0x3fffffff; c >>= 30;
148
+ c += x[1] + y[1]; r[1] = c & 0x3fffffff; c >>= 30;
149
+ c += x[2] + y[2]; r[2] = c & 0x3fffffff; c >>= 30;
150
+ c += x[3] + y[3]; r[3] = c & 0x3fffffff; c >>= 30;
151
+ c += x[4] + y[4]; r[4] = c & 0x3fffffff; c >>= 30;
152
+ c += x[5] + y[5]; r[5] = c & 0x3fffffff; c >>= 30;
153
+ c += x[6] + y[6]; r[6] = c & 0x3fffffff; c >>= 30;
154
+ c += x[7] + y[7]; r[7] = c & 0x3fffffff; c >>= 30;
155
+ c += x[8] + y[8]; r[8] = c;
156
+
157
+ reduce256_modm(r);
158
+ }
159
+
160
+ /* multiplication modulo m */
161
+ static void
162
+ mul256_modm(bignum256modm r, const bignum256modm x, const bignum256modm y) {
163
+ bignum256modm r1, q1;
164
+ uint64_t c;
165
+ bignum256modm_element_t f;
166
+
167
+ /* r1 = (x mod 256^(32+1)) = x mod (2^8)(31+1) = x & ((1 << 264) - 1)
168
+ q1 = x >> 248 = 264 bits = 9 30 bit elements */
169
+ c = mul32x32_64(x[0], y[0]);
170
+ f = (bignum256modm_element_t)c; r1[0] = (f & 0x3fffffff); c >>= 30;
171
+ c += mul32x32_64(x[0], y[1]) + mul32x32_64(x[1], y[0]);
172
+ f = (bignum256modm_element_t)c; r1[1] = (f & 0x3fffffff); c >>= 30;
173
+ c += mul32x32_64(x[0], y[2]) + mul32x32_64(x[1], y[1]) + mul32x32_64(x[2], y[0]);
174
+ f = (bignum256modm_element_t)c; r1[2] = (f & 0x3fffffff); c >>= 30;
175
+ c += mul32x32_64(x[0], y[3]) + mul32x32_64(x[1], y[2]) + mul32x32_64(x[2], y[1]) + mul32x32_64(x[3], y[0]);
176
+ f = (bignum256modm_element_t)c; r1[3] = (f & 0x3fffffff); c >>= 30;
177
+ c += mul32x32_64(x[0], y[4]) + mul32x32_64(x[1], y[3]) + mul32x32_64(x[2], y[2]) + mul32x32_64(x[3], y[1]) + mul32x32_64(x[4], y[0]);
178
+ f = (bignum256modm_element_t)c; r1[4] = (f & 0x3fffffff); c >>= 30;
179
+ c += mul32x32_64(x[0], y[5]) + mul32x32_64(x[1], y[4]) + mul32x32_64(x[2], y[3]) + mul32x32_64(x[3], y[2]) + mul32x32_64(x[4], y[1]) + mul32x32_64(x[5], y[0]);
180
+ f = (bignum256modm_element_t)c; r1[5] = (f & 0x3fffffff); c >>= 30;
181
+ c += mul32x32_64(x[0], y[6]) + mul32x32_64(x[1], y[5]) + mul32x32_64(x[2], y[4]) + mul32x32_64(x[3], y[3]) + mul32x32_64(x[4], y[2]) + mul32x32_64(x[5], y[1]) + mul32x32_64(x[6], y[0]);
182
+ f = (bignum256modm_element_t)c; r1[6] = (f & 0x3fffffff); c >>= 30;
183
+ c += mul32x32_64(x[0], y[7]) + mul32x32_64(x[1], y[6]) + mul32x32_64(x[2], y[5]) + mul32x32_64(x[3], y[4]) + mul32x32_64(x[4], y[3]) + mul32x32_64(x[5], y[2]) + mul32x32_64(x[6], y[1]) + mul32x32_64(x[7], y[0]);
184
+ f = (bignum256modm_element_t)c; r1[7] = (f & 0x3fffffff); c >>= 30;
185
+ c += mul32x32_64(x[0], y[8]) + mul32x32_64(x[1], y[7]) + mul32x32_64(x[2], y[6]) + mul32x32_64(x[3], y[5]) + mul32x32_64(x[4], y[4]) + mul32x32_64(x[5], y[3]) + mul32x32_64(x[6], y[2]) + mul32x32_64(x[7], y[1]) + mul32x32_64(x[8], y[0]);
186
+ f = (bignum256modm_element_t)c; r1[8] = (f & 0x00ffffff); q1[0] = (f >> 8) & 0x3fffff; c >>= 30;
187
+ c += mul32x32_64(x[1], y[8]) + mul32x32_64(x[2], y[7]) + mul32x32_64(x[3], y[6]) + mul32x32_64(x[4], y[5]) + mul32x32_64(x[5], y[4]) + mul32x32_64(x[6], y[3]) + mul32x32_64(x[7], y[2]) + mul32x32_64(x[8], y[1]);
188
+ f = (bignum256modm_element_t)c; q1[0] = (q1[0] | (f << 22)) & 0x3fffffff; q1[1] = (f >> 8) & 0x3fffff; c >>= 30;
189
+ c += mul32x32_64(x[2], y[8]) + mul32x32_64(x[3], y[7]) + mul32x32_64(x[4], y[6]) + mul32x32_64(x[5], y[5]) + mul32x32_64(x[6], y[4]) + mul32x32_64(x[7], y[3]) + mul32x32_64(x[8], y[2]);
190
+ f = (bignum256modm_element_t)c; q1[1] = (q1[1] | (f << 22)) & 0x3fffffff; q1[2] = (f >> 8) & 0x3fffff; c >>= 30;
191
+ c += mul32x32_64(x[3], y[8]) + mul32x32_64(x[4], y[7]) + mul32x32_64(x[5], y[6]) + mul32x32_64(x[6], y[5]) + mul32x32_64(x[7], y[4]) + mul32x32_64(x[8], y[3]);
192
+ f = (bignum256modm_element_t)c; q1[2] = (q1[2] | (f << 22)) & 0x3fffffff; q1[3] = (f >> 8) & 0x3fffff; c >>= 30;
193
+ c += mul32x32_64(x[4], y[8]) + mul32x32_64(x[5], y[7]) + mul32x32_64(x[6], y[6]) + mul32x32_64(x[7], y[5]) + mul32x32_64(x[8], y[4]);
194
+ f = (bignum256modm_element_t)c; q1[3] = (q1[3] | (f << 22)) & 0x3fffffff; q1[4] = (f >> 8) & 0x3fffff; c >>= 30;
195
+ c += mul32x32_64(x[5], y[8]) + mul32x32_64(x[6], y[7]) + mul32x32_64(x[7], y[6]) + mul32x32_64(x[8], y[5]);
196
+ f = (bignum256modm_element_t)c; q1[4] = (q1[4] | (f << 22)) & 0x3fffffff; q1[5] = (f >> 8) & 0x3fffff; c >>= 30;
197
+ c += mul32x32_64(x[6], y[8]) + mul32x32_64(x[7], y[7]) + mul32x32_64(x[8], y[6]);
198
+ f = (bignum256modm_element_t)c; q1[5] = (q1[5] | (f << 22)) & 0x3fffffff; q1[6] = (f >> 8) & 0x3fffff; c >>= 30;
199
+ c += mul32x32_64(x[7], y[8]) + mul32x32_64(x[8], y[7]);
200
+ f = (bignum256modm_element_t)c; q1[6] = (q1[6] | (f << 22)) & 0x3fffffff; q1[7] = (f >> 8) & 0x3fffff; c >>= 30;
201
+ c += mul32x32_64(x[8], y[8]);
202
+ f = (bignum256modm_element_t)c; q1[7] = (q1[7] | (f << 22)) & 0x3fffffff; q1[8] = (f >> 8) & 0x3fffff;
203
+
204
+ barrett_reduce256_modm(r, q1, r1);
205
+ }
206
+
207
+ static void
208
+ expand256_modm(bignum256modm out, const unsigned char *in, size_t len) {
209
+ unsigned char work[64] = {0};
210
+ bignum256modm_element_t x[16];
211
+ bignum256modm q1;
212
+
213
+ memcpy(work, in, len);
214
+ x[0] = U8TO32_LE(work + 0);
215
+ x[1] = U8TO32_LE(work + 4);
216
+ x[2] = U8TO32_LE(work + 8);
217
+ x[3] = U8TO32_LE(work + 12);
218
+ x[4] = U8TO32_LE(work + 16);
219
+ x[5] = U8TO32_LE(work + 20);
220
+ x[6] = U8TO32_LE(work + 24);
221
+ x[7] = U8TO32_LE(work + 28);
222
+ x[8] = U8TO32_LE(work + 32);
223
+ x[9] = U8TO32_LE(work + 36);
224
+ x[10] = U8TO32_LE(work + 40);
225
+ x[11] = U8TO32_LE(work + 44);
226
+ x[12] = U8TO32_LE(work + 48);
227
+ x[13] = U8TO32_LE(work + 52);
228
+ x[14] = U8TO32_LE(work + 56);
229
+ x[15] = U8TO32_LE(work + 60);
230
+
231
+ /* r1 = (x mod 256^(32+1)) = x mod (2^8)(31+1) = x & ((1 << 264) - 1) */
232
+ out[0] = ( x[0]) & 0x3fffffff;
233
+ out[1] = ((x[ 0] >> 30) | (x[ 1] << 2)) & 0x3fffffff;
234
+ out[2] = ((x[ 1] >> 28) | (x[ 2] << 4)) & 0x3fffffff;
235
+ out[3] = ((x[ 2] >> 26) | (x[ 3] << 6)) & 0x3fffffff;
236
+ out[4] = ((x[ 3] >> 24) | (x[ 4] << 8)) & 0x3fffffff;
237
+ out[5] = ((x[ 4] >> 22) | (x[ 5] << 10)) & 0x3fffffff;
238
+ out[6] = ((x[ 5] >> 20) | (x[ 6] << 12)) & 0x3fffffff;
239
+ out[7] = ((x[ 6] >> 18) | (x[ 7] << 14)) & 0x3fffffff;
240
+ out[8] = ((x[ 7] >> 16) | (x[ 8] << 16)) & 0x00ffffff;
241
+
242
+ /* 8*31 = 248 bits, no need to reduce */
243
+ if (len < 32)
244
+ return;
245
+
246
+ /* q1 = x >> 248 = 264 bits = 9 30 bit elements */
247
+ q1[0] = ((x[ 7] >> 24) | (x[ 8] << 8)) & 0x3fffffff;
248
+ q1[1] = ((x[ 8] >> 22) | (x[ 9] << 10)) & 0x3fffffff;
249
+ q1[2] = ((x[ 9] >> 20) | (x[10] << 12)) & 0x3fffffff;
250
+ q1[3] = ((x[10] >> 18) | (x[11] << 14)) & 0x3fffffff;
251
+ q1[4] = ((x[11] >> 16) | (x[12] << 16)) & 0x3fffffff;
252
+ q1[5] = ((x[12] >> 14) | (x[13] << 18)) & 0x3fffffff;
253
+ q1[6] = ((x[13] >> 12) | (x[14] << 20)) & 0x3fffffff;
254
+ q1[7] = ((x[14] >> 10) | (x[15] << 22)) & 0x3fffffff;
255
+ q1[8] = ((x[15] >> 8) );
256
+
257
+ barrett_reduce256_modm(out, q1, out);
258
+ }
259
+
260
+ static void
261
+ expand_raw256_modm(bignum256modm out, const unsigned char in[32]) {
262
+ bignum256modm_element_t x[8];
263
+
264
+ x[0] = U8TO32_LE(in + 0);
265
+ x[1] = U8TO32_LE(in + 4);
266
+ x[2] = U8TO32_LE(in + 8);
267
+ x[3] = U8TO32_LE(in + 12);
268
+ x[4] = U8TO32_LE(in + 16);
269
+ x[5] = U8TO32_LE(in + 20);
270
+ x[6] = U8TO32_LE(in + 24);
271
+ x[7] = U8TO32_LE(in + 28);
272
+
273
+ out[0] = ( x[0]) & 0x3fffffff;
274
+ out[1] = ((x[ 0] >> 30) | (x[ 1] << 2)) & 0x3fffffff;
275
+ out[2] = ((x[ 1] >> 28) | (x[ 2] << 4)) & 0x3fffffff;
276
+ out[3] = ((x[ 2] >> 26) | (x[ 3] << 6)) & 0x3fffffff;
277
+ out[4] = ((x[ 3] >> 24) | (x[ 4] << 8)) & 0x3fffffff;
278
+ out[5] = ((x[ 4] >> 22) | (x[ 5] << 10)) & 0x3fffffff;
279
+ out[6] = ((x[ 5] >> 20) | (x[ 6] << 12)) & 0x3fffffff;
280
+ out[7] = ((x[ 6] >> 18) | (x[ 7] << 14)) & 0x3fffffff;
281
+ out[8] = ((x[ 7] >> 16) ) & 0x0000ffff;
282
+ }
283
+
284
+ static void
285
+ contract256_modm(unsigned char out[32], const bignum256modm in) {
286
+ U32TO8_LE(out + 0, (in[0] ) | (in[1] << 30));
287
+ U32TO8_LE(out + 4, (in[1] >> 2) | (in[2] << 28));
288
+ U32TO8_LE(out + 8, (in[2] >> 4) | (in[3] << 26));
289
+ U32TO8_LE(out + 12, (in[3] >> 6) | (in[4] << 24));
290
+ U32TO8_LE(out + 16, (in[4] >> 8) | (in[5] << 22));
291
+ U32TO8_LE(out + 20, (in[5] >> 10) | (in[6] << 20));
292
+ U32TO8_LE(out + 24, (in[6] >> 12) | (in[7] << 18));
293
+ U32TO8_LE(out + 28, (in[7] >> 14) | (in[8] << 16));
294
+ }
295
+
296
+
297
+
298
+ static void
299
+ contract256_window4_modm(signed char r[64], const bignum256modm in) {
300
+ char carry;
301
+ signed char *quads = r;
302
+ bignum256modm_element_t i, j, v;
303
+
304
+ for (i = 0; i < 8; i += 2) {
305
+ v = in[i];
306
+ for (j = 0; j < 7; j++) {
307
+ *quads++ = (v & 15);
308
+ v >>= 4;
309
+ }
310
+ v |= (in[i+1] << 2);
311
+ for (j = 0; j < 8; j++) {
312
+ *quads++ = (v & 15);
313
+ v >>= 4;
314
+ }
315
+ }
316
+ v = in[8];
317
+ *quads++ = (v & 15); v >>= 4;
318
+ *quads++ = (v & 15); v >>= 4;
319
+ *quads++ = (v & 15); v >>= 4;
320
+ *quads++ = (v & 15); v >>= 4;
321
+
322
+ /* making it signed */
323
+ carry = 0;
324
+ for(i = 0; i < 63; i++) {
325
+ r[i] += carry;
326
+ r[i+1] += (r[i] >> 4);
327
+ r[i] &= 15;
328
+ carry = (r[i] >> 3);
329
+ r[i] -= (carry << 4);
330
+ }
331
+ r[63] += carry;
332
+ }
333
+
334
+ static void
335
+ contract256_slidingwindow_modm(signed char r[256], const bignum256modm s, int windowsize) {
336
+ int i,j,k,b;
337
+ int m = (1 << (windowsize - 1)) - 1, soplen = 256;
338
+ signed char *bits = r;
339
+ bignum256modm_element_t v;
340
+
341
+ /* first put the binary expansion into r */
342
+ for (i = 0; i < 8; i++) {
343
+ v = s[i];
344
+ for (j = 0; j < 30; j++, v >>= 1)
345
+ *bits++ = (v & 1);
346
+ }
347
+ v = s[8];
348
+ for (j = 0; j < 16; j++, v >>= 1)
349
+ *bits++ = (v & 1);
350
+
351
+ /* Making it sliding window */
352
+ for (j = 0; j < soplen; j++) {
353
+ if (!r[j])
354
+ continue;
355
+
356
+ for (b = 1; (b < (soplen - j)) && (b <= 6); b++) {
357
+ if ((r[j] + (r[j + b] << b)) <= m) {
358
+ r[j] += r[j + b] << b;
359
+ r[j + b] = 0;
360
+ } else if ((r[j] - (r[j + b] << b)) >= -m) {
361
+ r[j] -= r[j + b] << b;
362
+ for (k = j + b; k < soplen; k++) {
363
+ if (!r[k]) {
364
+ r[k] = 1;
365
+ break;
366
+ }
367
+ r[k] = 0;
368
+ }
369
+ } else if (r[j + b]) {
370
+ break;
371
+ }
372
+ }
373
+ }
374
+ }
375
+
376
+
377
+ /*
378
+ helpers for batch verifcation, are allowed to be vartime
379
+ */
380
+
381
+ /* out = a - b, a must be larger than b */
382
+ static void
383
+ sub256_modm_batch(bignum256modm out, const bignum256modm a, const bignum256modm b, size_t limbsize) {
384
+ size_t i = 0;
385
+ bignum256modm_element_t carry = 0;
386
+ switch (limbsize) {
387
+ case 8: out[i] = (a[i] - b[i]) - carry; carry = (out[i] >> 31); out[i] &= 0x3fffffff; i++;
388
+ case 7: out[i] = (a[i] - b[i]) - carry; carry = (out[i] >> 31); out[i] &= 0x3fffffff; i++;
389
+ case 6: out[i] = (a[i] - b[i]) - carry; carry = (out[i] >> 31); out[i] &= 0x3fffffff; i++;
390
+ case 5: out[i] = (a[i] - b[i]) - carry; carry = (out[i] >> 31); out[i] &= 0x3fffffff; i++;
391
+ case 4: out[i] = (a[i] - b[i]) - carry; carry = (out[i] >> 31); out[i] &= 0x3fffffff; i++;
392
+ case 3: out[i] = (a[i] - b[i]) - carry; carry = (out[i] >> 31); out[i] &= 0x3fffffff; i++;
393
+ case 2: out[i] = (a[i] - b[i]) - carry; carry = (out[i] >> 31); out[i] &= 0x3fffffff; i++;
394
+ case 1: out[i] = (a[i] - b[i]) - carry; carry = (out[i] >> 31); out[i] &= 0x3fffffff; i++;
395
+ case 0:
396
+ default: out[i] = (a[i] - b[i]) - carry;
397
+ }
398
+ }
399
+
400
+
401
+ /* is a < b */
402
+ static int
403
+ lt256_modm_batch(const bignum256modm a, const bignum256modm b, size_t limbsize) {
404
+ switch (limbsize) {
405
+ case 8: if (a[8] > b[8]) return 0; if (a[8] < b[8]) return 1;
406
+ case 7: if (a[7] > b[7]) return 0; if (a[7] < b[7]) return 1;
407
+ case 6: if (a[6] > b[6]) return 0; if (a[6] < b[6]) return 1;
408
+ case 5: if (a[5] > b[5]) return 0; if (a[5] < b[5]) return 1;
409
+ case 4: if (a[4] > b[4]) return 0; if (a[4] < b[4]) return 1;
410
+ case 3: if (a[3] > b[3]) return 0; if (a[3] < b[3]) return 1;
411
+ case 2: if (a[2] > b[2]) return 0; if (a[2] < b[2]) return 1;
412
+ case 1: if (a[1] > b[1]) return 0; if (a[1] < b[1]) return 1;
413
+ case 0: if (a[0] > b[0]) return 0; if (a[0] < b[0]) return 1;
414
+ }
415
+ return 0;
416
+ }
417
+
418
+ /* is a <= b */
419
+ static int
420
+ lte256_modm_batch(const bignum256modm a, const bignum256modm b, size_t limbsize) {
421
+ switch (limbsize) {
422
+ case 8: if (a[8] > b[8]) return 0; if (a[8] < b[8]) return 1;
423
+ case 7: if (a[7] > b[7]) return 0; if (a[7] < b[7]) return 1;
424
+ case 6: if (a[6] > b[6]) return 0; if (a[6] < b[6]) return 1;
425
+ case 5: if (a[5] > b[5]) return 0; if (a[5] < b[5]) return 1;
426
+ case 4: if (a[4] > b[4]) return 0; if (a[4] < b[4]) return 1;
427
+ case 3: if (a[3] > b[3]) return 0; if (a[3] < b[3]) return 1;
428
+ case 2: if (a[2] > b[2]) return 0; if (a[2] < b[2]) return 1;
429
+ case 1: if (a[1] > b[1]) return 0; if (a[1] < b[1]) return 1;
430
+ case 0: if (a[0] > b[0]) return 0; if (a[0] < b[0]) return 1;
431
+ }
432
+ return 1;
433
+ }
434
+
435
+
436
+ /* is a == 0 */
437
+ static int
438
+ iszero256_modm_batch(const bignum256modm a) {
439
+ size_t i;
440
+ for (i = 0; i < 9; i++)
441
+ if (a[i])
442
+ return 0;
443
+ return 1;
444
+ }
445
+
446
+ /* is a == 1 */
447
+ static int
448
+ isone256_modm_batch(const bignum256modm a) {
449
+ size_t i;
450
+ if (a[0] != 1)
451
+ return 0;
452
+ for (i = 1; i < 9; i++)
453
+ if (a[i])
454
+ return 0;
455
+ return 1;
456
+ }
457
+
458
+ /* can a fit in to (at most) 128 bits */
459
+ static int
460
+ isatmost128bits256_modm_batch(const bignum256modm a) {
461
+ uint32_t mask =
462
+ ((a[8] ) | /* 16 */
463
+ (a[7] ) | /* 46 */
464
+ (a[6] ) | /* 76 */
465
+ (a[5] ) | /* 106 */
466
+ (a[4] & 0x3fffff00)); /* 128 */
467
+
468
+ return (mask == 0);
469
+ }