ed25519_blake2b 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/CODE_OF_CONDUCT.md +74 -0
  4. data/Gemfile +6 -0
  5. data/Gemfile.lock +23 -0
  6. data/LICENSE +21 -0
  7. data/README.md +39 -0
  8. data/Rakefile +13 -0
  9. data/bin/console +14 -0
  10. data/bin/setup +8 -0
  11. data/ed25519_blake2b.gemspec +31 -0
  12. data/ext/ed25519_blake2b/blake2-config.h +72 -0
  13. data/ext/ed25519_blake2b/blake2-impl.h +160 -0
  14. data/ext/ed25519_blake2b/blake2.h +195 -0
  15. data/ext/ed25519_blake2b/blake2b-load-sse2.h +68 -0
  16. data/ext/ed25519_blake2b/blake2b-load-sse41.h +402 -0
  17. data/ext/ed25519_blake2b/blake2b-ref.c +373 -0
  18. data/ext/ed25519_blake2b/blake2b-round.h +157 -0
  19. data/ext/ed25519_blake2b/curve25519-donna-32bit.h +579 -0
  20. data/ext/ed25519_blake2b/curve25519-donna-64bit.h +413 -0
  21. data/ext/ed25519_blake2b/curve25519-donna-helpers.h +67 -0
  22. data/ext/ed25519_blake2b/curve25519-donna-sse2.h +1112 -0
  23. data/ext/ed25519_blake2b/ed25519-donna-32bit-sse2.h +513 -0
  24. data/ext/ed25519_blake2b/ed25519-donna-32bit-tables.h +61 -0
  25. data/ext/ed25519_blake2b/ed25519-donna-64bit-sse2.h +436 -0
  26. data/ext/ed25519_blake2b/ed25519-donna-64bit-tables.h +53 -0
  27. data/ext/ed25519_blake2b/ed25519-donna-64bit-x86-32bit.h +435 -0
  28. data/ext/ed25519_blake2b/ed25519-donna-64bit-x86.h +351 -0
  29. data/ext/ed25519_blake2b/ed25519-donna-basepoint-table.h +259 -0
  30. data/ext/ed25519_blake2b/ed25519-donna-batchverify.h +275 -0
  31. data/ext/ed25519_blake2b/ed25519-donna-impl-base.h +364 -0
  32. data/ext/ed25519_blake2b/ed25519-donna-impl-sse2.h +390 -0
  33. data/ext/ed25519_blake2b/ed25519-donna-portable-identify.h +103 -0
  34. data/ext/ed25519_blake2b/ed25519-donna-portable.h +135 -0
  35. data/ext/ed25519_blake2b/ed25519-donna.h +115 -0
  36. data/ext/ed25519_blake2b/ed25519-hash-custom.c +28 -0
  37. data/ext/ed25519_blake2b/ed25519-hash-custom.h +30 -0
  38. data/ext/ed25519_blake2b/ed25519-hash.h +219 -0
  39. data/ext/ed25519_blake2b/ed25519-randombytes-custom.h +10 -0
  40. data/ext/ed25519_blake2b/ed25519-randombytes.h +91 -0
  41. data/ext/ed25519_blake2b/ed25519.c +150 -0
  42. data/ext/ed25519_blake2b/ed25519.h +30 -0
  43. data/ext/ed25519_blake2b/extconf.rb +3 -0
  44. data/ext/ed25519_blake2b/fuzz/README.md +173 -0
  45. data/ext/ed25519_blake2b/fuzz/build-nix.php +134 -0
  46. data/ext/ed25519_blake2b/fuzz/curve25519-ref10.c +1272 -0
  47. data/ext/ed25519_blake2b/fuzz/curve25519-ref10.h +8 -0
  48. data/ext/ed25519_blake2b/fuzz/ed25519-donna-sse2.c +3 -0
  49. data/ext/ed25519_blake2b/fuzz/ed25519-donna.c +1 -0
  50. data/ext/ed25519_blake2b/fuzz/ed25519-donna.h +34 -0
  51. data/ext/ed25519_blake2b/fuzz/ed25519-ref10.c +4647 -0
  52. data/ext/ed25519_blake2b/fuzz/ed25519-ref10.h +9 -0
  53. data/ext/ed25519_blake2b/fuzz/fuzz-curve25519.c +172 -0
  54. data/ext/ed25519_blake2b/fuzz/fuzz-ed25519.c +219 -0
  55. data/ext/ed25519_blake2b/modm-donna-32bit.h +469 -0
  56. data/ext/ed25519_blake2b/modm-donna-64bit.h +361 -0
  57. data/ext/ed25519_blake2b/rbext.c +25 -0
  58. data/ext/ed25519_blake2b/regression.h +1024 -0
  59. data/lib/ed25519_blake2b/ed25519_blake2b.rb +4 -0
  60. data/lib/ed25519_blake2b/version.rb +3 -0
  61. metadata +147 -0
@@ -0,0 +1,513 @@
1
+ #if defined(ED25519_GCC_32BIT_SSE_CHOOSE)
2
+
3
+ #define HAVE_GE25519_SCALARMULT_BASE_CHOOSE_NIELS
4
+
5
+ DONNA_NOINLINE static void
6
+ ge25519_scalarmult_base_choose_niels(ge25519_niels *t, const uint8_t table[256][96], uint32_t pos, signed char b) {
7
+ int32_t breg = (int32_t)b;
8
+ uint32_t sign = (uint32_t)breg >> 31;
9
+ uint32_t mask = ~(sign - 1);
10
+ uint32_t u = (breg + mask) ^ mask;
11
+
12
+ __asm__ __volatile__ (
13
+ /* ysubx+xaddy */
14
+ "movl %0, %%eax ;\n"
15
+ "movd %%eax, %%xmm6 ;\n"
16
+ "pshufd $0x00, %%xmm6, %%xmm6 ;\n"
17
+ "pxor %%xmm0, %%xmm0 ;\n"
18
+ "pxor %%xmm1, %%xmm1 ;\n"
19
+ "pxor %%xmm2, %%xmm2 ;\n"
20
+ "pxor %%xmm3, %%xmm3 ;\n"
21
+
22
+ /* 0 */
23
+ "movl $0, %%eax ;\n"
24
+ "movd %%eax, %%xmm7 ;\n"
25
+ "pshufd $0x00, %%xmm7, %%xmm7 ;\n"
26
+ "pcmpeqd %%xmm6, %%xmm7 ;\n"
27
+ "movl $1, %%ecx ;\n"
28
+ "movd %%ecx, %%xmm4 ;\n"
29
+ "pxor %%xmm5, %%xmm5 ;\n"
30
+ "pand %%xmm7, %%xmm4 ;\n"
31
+ "pand %%xmm7, %%xmm5 ;\n"
32
+ "por %%xmm4, %%xmm0 ;\n"
33
+ "por %%xmm5, %%xmm1 ;\n"
34
+ "por %%xmm4, %%xmm2 ;\n"
35
+ "por %%xmm5, %%xmm3 ;\n"
36
+
37
+ /* 1 */
38
+ "movl $1, %%eax ;\n"
39
+ "movd %%eax, %%xmm7 ;\n"
40
+ "pshufd $0x00, %%xmm7, %%xmm7 ;\n"
41
+ "pcmpeqd %%xmm6, %%xmm7 ;\n"
42
+ "movdqa 0(%1), %%xmm4 ;\n"
43
+ "movdqa 16(%1), %%xmm5 ;\n"
44
+ "pand %%xmm7, %%xmm4 ;\n"
45
+ "pand %%xmm7, %%xmm5 ;\n"
46
+ "por %%xmm4, %%xmm0 ;\n"
47
+ "por %%xmm5, %%xmm1 ;\n"
48
+ "movdqa 32(%1), %%xmm4 ;\n"
49
+ "movdqa 48(%1), %%xmm5 ;\n"
50
+ "pand %%xmm7, %%xmm4 ;\n"
51
+ "pand %%xmm7, %%xmm5 ;\n"
52
+ "por %%xmm4, %%xmm2 ;\n"
53
+ "por %%xmm5, %%xmm3 ;\n"
54
+
55
+ /* 2 */
56
+ "movl $2, %%eax ;\n"
57
+ "movd %%eax, %%xmm7 ;\n"
58
+ "pshufd $0x00, %%xmm7, %%xmm7 ;\n"
59
+ "pcmpeqd %%xmm6, %%xmm7 ;\n"
60
+ "movdqa 96(%1), %%xmm4 ;\n"
61
+ "movdqa 112(%1), %%xmm5 ;\n"
62
+ "pand %%xmm7, %%xmm4 ;\n"
63
+ "pand %%xmm7, %%xmm5 ;\n"
64
+ "por %%xmm4, %%xmm0 ;\n"
65
+ "por %%xmm5, %%xmm1 ;\n"
66
+ "movdqa 128(%1), %%xmm4 ;\n"
67
+ "movdqa 144(%1), %%xmm5 ;\n"
68
+ "pand %%xmm7, %%xmm4 ;\n"
69
+ "pand %%xmm7, %%xmm5 ;\n"
70
+ "por %%xmm4, %%xmm2 ;\n"
71
+ "por %%xmm5, %%xmm3 ;\n"
72
+
73
+ /* 3 */
74
+ "movl $3, %%eax ;\n"
75
+ "movd %%eax, %%xmm7 ;\n"
76
+ "pshufd $0x00, %%xmm7, %%xmm7 ;\n"
77
+ "pcmpeqd %%xmm6, %%xmm7 ;\n"
78
+ "movdqa 192(%1), %%xmm4 ;\n"
79
+ "movdqa 208(%1), %%xmm5 ;\n"
80
+ "pand %%xmm7, %%xmm4 ;\n"
81
+ "pand %%xmm7, %%xmm5 ;\n"
82
+ "por %%xmm4, %%xmm0 ;\n"
83
+ "por %%xmm5, %%xmm1 ;\n"
84
+ "movdqa 224(%1), %%xmm4 ;\n"
85
+ "movdqa 240(%1), %%xmm5 ;\n"
86
+ "pand %%xmm7, %%xmm4 ;\n"
87
+ "pand %%xmm7, %%xmm5 ;\n"
88
+ "por %%xmm4, %%xmm2 ;\n"
89
+ "por %%xmm5, %%xmm3 ;\n"
90
+
91
+ /* 4 */
92
+ "movl $4, %%eax ;\n"
93
+ "movd %%eax, %%xmm7 ;\n"
94
+ "pshufd $0x00, %%xmm7, %%xmm7 ;\n"
95
+ "pcmpeqd %%xmm6, %%xmm7 ;\n"
96
+ "movdqa 288(%1), %%xmm4 ;\n"
97
+ "movdqa 304(%1), %%xmm5 ;\n"
98
+ "pand %%xmm7, %%xmm4 ;\n"
99
+ "pand %%xmm7, %%xmm5 ;\n"
100
+ "por %%xmm4, %%xmm0 ;\n"
101
+ "por %%xmm5, %%xmm1 ;\n"
102
+ "movdqa 320(%1), %%xmm4 ;\n"
103
+ "movdqa 336(%1), %%xmm5 ;\n"
104
+ "pand %%xmm7, %%xmm4 ;\n"
105
+ "pand %%xmm7, %%xmm5 ;\n"
106
+ "por %%xmm4, %%xmm2 ;\n"
107
+ "por %%xmm5, %%xmm3 ;\n"
108
+
109
+ /* 5 */
110
+ "movl $5, %%eax ;\n"
111
+ "movd %%eax, %%xmm7 ;\n"
112
+ "pshufd $0x00, %%xmm7, %%xmm7 ;\n"
113
+ "pcmpeqd %%xmm6, %%xmm7 ;\n"
114
+ "movdqa 384(%1), %%xmm4 ;\n"
115
+ "movdqa 400(%1), %%xmm5 ;\n"
116
+ "pand %%xmm7, %%xmm4 ;\n"
117
+ "pand %%xmm7, %%xmm5 ;\n"
118
+ "por %%xmm4, %%xmm0 ;\n"
119
+ "por %%xmm5, %%xmm1 ;\n"
120
+ "movdqa 416(%1), %%xmm4 ;\n"
121
+ "movdqa 432(%1), %%xmm5 ;\n"
122
+ "pand %%xmm7, %%xmm4 ;\n"
123
+ "pand %%xmm7, %%xmm5 ;\n"
124
+ "por %%xmm4, %%xmm2 ;\n"
125
+ "por %%xmm5, %%xmm3 ;\n"
126
+
127
+ /* 6 */
128
+ "movl $6, %%eax ;\n"
129
+ "movd %%eax, %%xmm7 ;\n"
130
+ "pshufd $0x00, %%xmm7, %%xmm7 ;\n"
131
+ "pcmpeqd %%xmm6, %%xmm7 ;\n"
132
+ "movdqa 480(%1), %%xmm4 ;\n"
133
+ "movdqa 496(%1), %%xmm5 ;\n"
134
+ "pand %%xmm7, %%xmm4 ;\n"
135
+ "pand %%xmm7, %%xmm5 ;\n"
136
+ "por %%xmm4, %%xmm0 ;\n"
137
+ "por %%xmm5, %%xmm1 ;\n"
138
+ "movdqa 512(%1), %%xmm4 ;\n"
139
+ "movdqa 528(%1), %%xmm5 ;\n"
140
+ "pand %%xmm7, %%xmm4 ;\n"
141
+ "pand %%xmm7, %%xmm5 ;\n"
142
+ "por %%xmm4, %%xmm2 ;\n"
143
+ "por %%xmm5, %%xmm3 ;\n"
144
+
145
+ /* 7 */
146
+ "movl $7, %%eax ;\n"
147
+ "movd %%eax, %%xmm7 ;\n"
148
+ "pshufd $0x00, %%xmm7, %%xmm7 ;\n"
149
+ "pcmpeqd %%xmm6, %%xmm7 ;\n"
150
+ "movdqa 576(%1), %%xmm4 ;\n"
151
+ "movdqa 592(%1), %%xmm5 ;\n"
152
+ "pand %%xmm7, %%xmm4 ;\n"
153
+ "pand %%xmm7, %%xmm5 ;\n"
154
+ "por %%xmm4, %%xmm0 ;\n"
155
+ "por %%xmm5, %%xmm1 ;\n"
156
+ "movdqa 608(%1), %%xmm4 ;\n"
157
+ "movdqa 624(%1), %%xmm5 ;\n"
158
+ "pand %%xmm7, %%xmm4 ;\n"
159
+ "pand %%xmm7, %%xmm5 ;\n"
160
+ "por %%xmm4, %%xmm2 ;\n"
161
+ "por %%xmm5, %%xmm3 ;\n"
162
+
163
+ /* 8 */
164
+ "movl $8, %%eax ;\n"
165
+ "movd %%eax, %%xmm7 ;\n"
166
+ "pshufd $0x00, %%xmm7, %%xmm7 ;\n"
167
+ "pcmpeqd %%xmm6, %%xmm7 ;\n"
168
+ "movdqa 672(%1), %%xmm4 ;\n"
169
+ "movdqa 688(%1), %%xmm5 ;\n"
170
+ "pand %%xmm7, %%xmm4 ;\n"
171
+ "pand %%xmm7, %%xmm5 ;\n"
172
+ "por %%xmm4, %%xmm0 ;\n"
173
+ "por %%xmm5, %%xmm1 ;\n"
174
+ "movdqa 704(%1), %%xmm4 ;\n"
175
+ "movdqa 720(%1), %%xmm5 ;\n"
176
+ "pand %%xmm7, %%xmm4 ;\n"
177
+ "pand %%xmm7, %%xmm5 ;\n"
178
+ "por %%xmm4, %%xmm2 ;\n"
179
+ "por %%xmm5, %%xmm3 ;\n"
180
+
181
+ /* conditional swap based on sign */
182
+ "movl %3, %%ecx ;\n"
183
+ "movl %2, %%eax ;\n"
184
+ "xorl $1, %%ecx ;\n"
185
+ "movd %%ecx, %%xmm6 ;\n"
186
+ "pxor %%xmm7, %%xmm7 ;\n"
187
+ "pshufd $0x00, %%xmm6, %%xmm6 ;\n"
188
+ "pxor %%xmm0, %%xmm2 ;\n"
189
+ "pxor %%xmm1, %%xmm3 ;\n"
190
+ "pcmpeqd %%xmm6, %%xmm7 ;\n"
191
+ "movdqa %%xmm2, %%xmm4 ;\n"
192
+ "movdqa %%xmm3, %%xmm5 ;\n"
193
+ "pand %%xmm7, %%xmm4 ;\n"
194
+ "pand %%xmm7, %%xmm5 ;\n"
195
+ "pxor %%xmm4, %%xmm0 ;\n"
196
+ "pxor %%xmm5, %%xmm1 ;\n"
197
+ "pxor %%xmm0, %%xmm2 ;\n"
198
+ "pxor %%xmm1, %%xmm3 ;\n"
199
+
200
+ /* store ysubx */
201
+ "movd %%xmm0, %%ecx ;\n"
202
+ "movl %%ecx, %%edx ;\n"
203
+ "pshufd $0x39, %%xmm0, %%xmm0 ;\n"
204
+ "andl $0x3ffffff, %%ecx ;\n"
205
+ "movl %%ecx, 0(%%eax) ;\n"
206
+ "movd %%xmm0, %%ecx ;\n"
207
+ "pshufd $0x39, %%xmm0, %%xmm0 ;\n"
208
+ "shrdl $26, %%ecx, %%edx ;\n"
209
+ "andl $0x1ffffff, %%edx ;\n"
210
+ "movl %%edx, 4(%%eax) ;\n"
211
+ "movd %%xmm0, %%edx ;\n"
212
+ "pshufd $0x39, %%xmm0, %%xmm0 ;\n"
213
+ "shrdl $19, %%edx, %%ecx ;\n"
214
+ "andl $0x3ffffff, %%ecx ;\n"
215
+ "movl %%ecx, 8(%%eax) ;\n"
216
+ "movd %%xmm0, %%ecx ;\n"
217
+ "shrdl $13, %%ecx, %%edx ;\n"
218
+ "andl $0x1ffffff, %%edx ;\n"
219
+ "movl %%edx, 12(%%eax) ;\n"
220
+ "movd %%xmm1, %%edx ;\n"
221
+ "pshufd $0x39, %%xmm1, %%xmm1 ;\n"
222
+ "shrl $6, %%ecx ;\n"
223
+ "andl $0x3ffffff, %%ecx ;\n"
224
+ "movl %%ecx, 16(%%eax) ;\n"
225
+ "movl %%edx, %%ecx ;\n"
226
+ "andl $0x1ffffff, %%edx ;\n"
227
+ "movl %%edx, 20(%%eax) ;\n"
228
+ "movd %%xmm1, %%edx ;\n"
229
+ "pshufd $0x39, %%xmm1, %%xmm1 ;\n"
230
+ "shrdl $25, %%edx, %%ecx ;\n"
231
+ "andl $0x3ffffff, %%ecx ;\n"
232
+ "movl %%ecx, 24(%%eax) ;\n"
233
+ "movd %%xmm1, %%ecx ;\n"
234
+ "pshufd $0x39, %%xmm1, %%xmm1 ;\n"
235
+ "shrdl $19, %%ecx, %%edx ;\n"
236
+ "andl $0x1ffffff, %%edx ;\n"
237
+ "movl %%edx, 28(%%eax) ;\n"
238
+ "movd %%xmm1, %%edx ;\n"
239
+ "shrdl $12, %%edx, %%ecx ;\n"
240
+ "andl $0x3ffffff, %%ecx ;\n"
241
+ "movl %%ecx, 32(%%eax) ;\n"
242
+ "shrl $6, %%edx ;\n"
243
+ "andl $0x1ffffff, %%edx ;\n"
244
+ "xorl %%ecx, %%ecx ;\n"
245
+ "movl %%edx, 36(%%eax) ;\n"
246
+ "movl %%ecx, 40(%%eax) ;\n"
247
+ "movl %%ecx, 44(%%eax) ;\n"
248
+
249
+ /* store xaddy */
250
+ "addl $48, %%eax ;\n"
251
+ "movdqa %%xmm2, %%xmm0 ;\n"
252
+ "movdqa %%xmm3, %%xmm1 ;\n"
253
+ "movd %%xmm0, %%ecx ;\n"
254
+ "movl %%ecx, %%edx ;\n"
255
+ "pshufd $0x39, %%xmm0, %%xmm0 ;\n"
256
+ "andl $0x3ffffff, %%ecx ;\n"
257
+ "movl %%ecx, 0(%%eax) ;\n"
258
+ "movd %%xmm0, %%ecx ;\n"
259
+ "pshufd $0x39, %%xmm0, %%xmm0 ;\n"
260
+ "shrdl $26, %%ecx, %%edx ;\n"
261
+ "andl $0x1ffffff, %%edx ;\n"
262
+ "movl %%edx, 4(%%eax) ;\n"
263
+ "movd %%xmm0, %%edx ;\n"
264
+ "pshufd $0x39, %%xmm0, %%xmm0 ;\n"
265
+ "shrdl $19, %%edx, %%ecx ;\n"
266
+ "andl $0x3ffffff, %%ecx ;\n"
267
+ "movl %%ecx, 8(%%eax) ;\n"
268
+ "movd %%xmm0, %%ecx ;\n"
269
+ "shrdl $13, %%ecx, %%edx ;\n"
270
+ "andl $0x1ffffff, %%edx ;\n"
271
+ "movl %%edx, 12(%%eax) ;\n"
272
+ "movd %%xmm1, %%edx ;\n"
273
+ "pshufd $0x39, %%xmm1, %%xmm1 ;\n"
274
+ "shrl $6, %%ecx ;\n"
275
+ "andl $0x3ffffff, %%ecx ;\n"
276
+ "movl %%ecx, 16(%%eax) ;\n"
277
+ "movl %%edx, %%ecx ;\n"
278
+ "andl $0x1ffffff, %%edx ;\n"
279
+ "movl %%edx, 20(%%eax) ;\n"
280
+ "movd %%xmm1, %%edx ;\n"
281
+ "pshufd $0x39, %%xmm1, %%xmm1 ;\n"
282
+ "shrdl $25, %%edx, %%ecx ;\n"
283
+ "andl $0x3ffffff, %%ecx ;\n"
284
+ "movl %%ecx, 24(%%eax) ;\n"
285
+ "movd %%xmm1, %%ecx ;\n"
286
+ "pshufd $0x39, %%xmm1, %%xmm1 ;\n"
287
+ "shrdl $19, %%ecx, %%edx ;\n"
288
+ "andl $0x1ffffff, %%edx ;\n"
289
+ "movl %%edx, 28(%%eax) ;\n"
290
+ "movd %%xmm1, %%edx ;\n"
291
+ "shrdl $12, %%edx, %%ecx ;\n"
292
+ "andl $0x3ffffff, %%ecx ;\n"
293
+ "movl %%ecx, 32(%%eax) ;\n"
294
+ "shrl $6, %%edx ;\n"
295
+ "andl $0x1ffffff, %%edx ;\n"
296
+ "xorl %%ecx, %%ecx ;\n"
297
+ "movl %%edx, 36(%%eax) ;\n"
298
+ "movl %%ecx, 40(%%eax) ;\n"
299
+ "movl %%ecx, 44(%%eax) ;\n"
300
+
301
+ /* t2d */
302
+ "movl %0, %%eax ;\n"
303
+ "movd %%eax, %%xmm6 ;\n"
304
+ "pshufd $0x00, %%xmm6, %%xmm6 ;\n"
305
+ "pxor %%xmm0, %%xmm0 ;\n"
306
+ "pxor %%xmm1, %%xmm1 ;\n"
307
+
308
+ /* 0 */
309
+ "movl $0, %%eax ;\n"
310
+ "movd %%eax, %%xmm7 ;\n"
311
+ "pshufd $0x00, %%xmm7, %%xmm7 ;\n"
312
+ "pcmpeqd %%xmm6, %%xmm7 ;\n"
313
+ "pxor %%xmm0, %%xmm0 ;\n"
314
+ "pxor %%xmm1, %%xmm1 ;\n"
315
+
316
+ /* 1 */
317
+ "movl $1, %%eax ;\n"
318
+ "movd %%eax, %%xmm7 ;\n"
319
+ "pshufd $0x00, %%xmm7, %%xmm7 ;\n"
320
+ "pcmpeqd %%xmm6, %%xmm7 ;\n"
321
+ "movdqa 64(%1), %%xmm3 ;\n"
322
+ "movdqa 80(%1), %%xmm4 ;\n"
323
+ "pand %%xmm7, %%xmm3 ;\n"
324
+ "pand %%xmm7, %%xmm4 ;\n"
325
+ "por %%xmm3, %%xmm0 ;\n"
326
+ "por %%xmm4, %%xmm1 ;\n"
327
+
328
+ /* 2 */
329
+ "movl $2, %%eax ;\n"
330
+ "movd %%eax, %%xmm7 ;\n"
331
+ "pshufd $0x00, %%xmm7, %%xmm7 ;\n"
332
+ "pcmpeqd %%xmm6, %%xmm7 ;\n"
333
+ "movdqa 160(%1), %%xmm3 ;\n"
334
+ "movdqa 176(%1), %%xmm4 ;\n"
335
+ "pand %%xmm7, %%xmm3 ;\n"
336
+ "pand %%xmm7, %%xmm4 ;\n"
337
+ "por %%xmm3, %%xmm0 ;\n"
338
+ "por %%xmm4, %%xmm1 ;\n"
339
+
340
+ /* 3 */
341
+ "movl $3, %%eax ;\n"
342
+ "movd %%eax, %%xmm7 ;\n"
343
+ "pshufd $0x00, %%xmm7, %%xmm7 ;\n"
344
+ "pcmpeqd %%xmm6, %%xmm7 ;\n"
345
+ "movdqa 256(%1), %%xmm3 ;\n"
346
+ "movdqa 272(%1), %%xmm4 ;\n"
347
+ "pand %%xmm7, %%xmm3 ;\n"
348
+ "pand %%xmm7, %%xmm4 ;\n"
349
+ "por %%xmm3, %%xmm0 ;\n"
350
+ "por %%xmm4, %%xmm1 ;\n"
351
+
352
+ /* 4 */
353
+ "movl $4, %%eax ;\n"
354
+ "movd %%eax, %%xmm7 ;\n"
355
+ "pshufd $0x00, %%xmm7, %%xmm7 ;\n"
356
+ "pcmpeqd %%xmm6, %%xmm7 ;\n"
357
+ "movdqa 352(%1), %%xmm3 ;\n"
358
+ "movdqa 368(%1), %%xmm4 ;\n"
359
+ "pand %%xmm7, %%xmm3 ;\n"
360
+ "pand %%xmm7, %%xmm4 ;\n"
361
+ "por %%xmm3, %%xmm0 ;\n"
362
+ "por %%xmm4, %%xmm1 ;\n"
363
+
364
+ /* 5 */
365
+ "movl $5, %%eax ;\n"
366
+ "movd %%eax, %%xmm7 ;\n"
367
+ "pshufd $0x00, %%xmm7, %%xmm7 ;\n"
368
+ "pcmpeqd %%xmm6, %%xmm7 ;\n"
369
+ "movdqa 448(%1), %%xmm3 ;\n"
370
+ "movdqa 464(%1), %%xmm4 ;\n"
371
+ "pand %%xmm7, %%xmm3 ;\n"
372
+ "pand %%xmm7, %%xmm4 ;\n"
373
+ "por %%xmm3, %%xmm0 ;\n"
374
+ "por %%xmm4, %%xmm1 ;\n"
375
+
376
+ /* 6 */
377
+ "movl $6, %%eax ;\n"
378
+ "movd %%eax, %%xmm7 ;\n"
379
+ "pshufd $0x00, %%xmm7, %%xmm7 ;\n"
380
+ "pcmpeqd %%xmm6, %%xmm7 ;\n"
381
+ "movdqa 544(%1), %%xmm3 ;\n"
382
+ "movdqa 560(%1), %%xmm4 ;\n"
383
+ "pand %%xmm7, %%xmm3 ;\n"
384
+ "pand %%xmm7, %%xmm4 ;\n"
385
+ "por %%xmm3, %%xmm0 ;\n"
386
+ "por %%xmm4, %%xmm1 ;\n"
387
+
388
+ /* 7 */
389
+ "movl $7, %%eax ;\n"
390
+ "movd %%eax, %%xmm7 ;\n"
391
+ "pshufd $0x00, %%xmm7, %%xmm7 ;\n"
392
+ "pcmpeqd %%xmm6, %%xmm7 ;\n"
393
+ "movdqa 640(%1), %%xmm3 ;\n"
394
+ "movdqa 656(%1), %%xmm4 ;\n"
395
+ "pand %%xmm7, %%xmm3 ;\n"
396
+ "pand %%xmm7, %%xmm4 ;\n"
397
+ "por %%xmm3, %%xmm0 ;\n"
398
+ "por %%xmm4, %%xmm1 ;\n"
399
+
400
+ /* 8 */
401
+ "movl $8, %%eax ;\n"
402
+ "movd %%eax, %%xmm7 ;\n"
403
+ "pshufd $0x00, %%xmm7, %%xmm7 ;\n"
404
+ "pcmpeqd %%xmm6, %%xmm7 ;\n"
405
+ "movdqa 736(%1), %%xmm3 ;\n"
406
+ "movdqa 752(%1), %%xmm4 ;\n"
407
+ "pand %%xmm7, %%xmm3 ;\n"
408
+ "pand %%xmm7, %%xmm4 ;\n"
409
+ "por %%xmm3, %%xmm0 ;\n"
410
+ "por %%xmm4, %%xmm1 ;\n"
411
+
412
+ /* store t2d */
413
+ "movl %2, %%eax ;\n"
414
+ "addl $96, %%eax ;\n"
415
+ "movd %%xmm0, %%ecx ;\n"
416
+ "movl %%ecx, %%edx ;\n"
417
+ "pshufd $0x39, %%xmm0, %%xmm0 ;\n"
418
+ "andl $0x3ffffff, %%ecx ;\n"
419
+ "movl %%ecx, 0(%%eax) ;\n"
420
+ "movd %%xmm0, %%ecx ;\n"
421
+ "pshufd $0x39, %%xmm0, %%xmm0 ;\n"
422
+ "shrdl $26, %%ecx, %%edx ;\n"
423
+ "andl $0x1ffffff, %%edx ;\n"
424
+ "movl %%edx, 4(%%eax) ;\n"
425
+ "movd %%xmm0, %%edx ;\n"
426
+ "pshufd $0x39, %%xmm0, %%xmm0 ;\n"
427
+ "shrdl $19, %%edx, %%ecx ;\n"
428
+ "andl $0x3ffffff, %%ecx ;\n"
429
+ "movl %%ecx, 8(%%eax) ;\n"
430
+ "movd %%xmm0, %%ecx ;\n"
431
+ "shrdl $13, %%ecx, %%edx ;\n"
432
+ "andl $0x1ffffff, %%edx ;\n"
433
+ "movl %%edx, 12(%%eax) ;\n"
434
+ "movd %%xmm1, %%edx ;\n"
435
+ "pshufd $0x39, %%xmm1, %%xmm1 ;\n"
436
+ "shrl $6, %%ecx ;\n"
437
+ "andl $0x3ffffff, %%ecx ;\n"
438
+ "movl %%ecx, 16(%%eax) ;\n"
439
+ "movl %%edx, %%ecx ;\n"
440
+ "andl $0x1ffffff, %%edx ;\n"
441
+ "movl %%edx, 20(%%eax) ;\n"
442
+ "movd %%xmm1, %%edx ;\n"
443
+ "pshufd $0x39, %%xmm1, %%xmm1 ;\n"
444
+ "shrdl $25, %%edx, %%ecx ;\n"
445
+ "andl $0x3ffffff, %%ecx ;\n"
446
+ "movl %%ecx, 24(%%eax) ;\n"
447
+ "movd %%xmm1, %%ecx ;\n"
448
+ "pshufd $0x39, %%xmm1, %%xmm1 ;\n"
449
+ "shrdl $19, %%ecx, %%edx ;\n"
450
+ "andl $0x1ffffff, %%edx ;\n"
451
+ "movl %%edx, 28(%%eax) ;\n"
452
+ "movd %%xmm1, %%edx ;\n"
453
+ "movd %%xmm1, %%edx ;\n"
454
+ "shrdl $12, %%edx, %%ecx ;\n"
455
+ "andl $0x3ffffff, %%ecx ;\n"
456
+ "movl %%ecx, 32(%%eax) ;\n"
457
+ "shrl $6, %%edx ;\n"
458
+ "andl $0x1ffffff, %%edx ;\n"
459
+ "xorl %%ecx, %%ecx ;\n"
460
+ "movl %%edx, 36(%%eax) ;\n"
461
+ "movl %%ecx, 40(%%eax) ;\n"
462
+ "movl %%ecx, 44(%%eax) ;\n"
463
+ "movdqa 0(%%eax), %%xmm0 ;\n"
464
+ "movdqa 16(%%eax), %%xmm1 ;\n"
465
+ "movdqa 32(%%eax), %%xmm2 ;\n"
466
+
467
+ /* conditionally negate t2d */
468
+
469
+ /* set up 2p in to 3/4 */
470
+ "movl $0x7ffffda, %%ecx ;\n"
471
+ "movl $0x3fffffe, %%edx ;\n"
472
+ "movd %%ecx, %%xmm3 ;\n"
473
+ "movd %%edx, %%xmm5 ;\n"
474
+ "movl $0x7fffffe, %%ecx ;\n"
475
+ "movd %%ecx, %%xmm4 ;\n"
476
+ "punpckldq %%xmm5, %%xmm3 ;\n"
477
+ "punpckldq %%xmm5, %%xmm4 ;\n"
478
+ "punpcklqdq %%xmm4, %%xmm3 ;\n"
479
+ "movdqa %%xmm4, %%xmm5 ;\n"
480
+ "punpcklqdq %%xmm4, %%xmm4 ;\n"
481
+
482
+ /* subtract and conditionally move */
483
+ "movl %3, %%ecx ;\n"
484
+ "sub $1, %%ecx ;\n"
485
+ "movd %%ecx, %%xmm6 ;\n"
486
+ "pshufd $0x00, %%xmm6, %%xmm6 ;\n"
487
+ "movdqa %%xmm6, %%xmm7 ;\n"
488
+ "psubd %%xmm0, %%xmm3 ;\n"
489
+ "psubd %%xmm1, %%xmm4 ;\n"
490
+ "psubd %%xmm2, %%xmm5 ;\n"
491
+ "pand %%xmm6, %%xmm0 ;\n"
492
+ "pand %%xmm6, %%xmm1 ;\n"
493
+ "pand %%xmm6, %%xmm2 ;\n"
494
+ "pandn %%xmm3, %%xmm6 ;\n"
495
+ "movdqa %%xmm7, %%xmm3 ;\n"
496
+ "pandn %%xmm4, %%xmm7 ;\n"
497
+ "pandn %%xmm5, %%xmm3 ;\n"
498
+ "por %%xmm6, %%xmm0 ;\n"
499
+ "por %%xmm7, %%xmm1 ;\n"
500
+ "por %%xmm3, %%xmm2 ;\n"
501
+
502
+ /* store */
503
+ "movdqa %%xmm0, 0(%%eax) ;\n"
504
+ "movdqa %%xmm1, 16(%%eax) ;\n"
505
+ "movdqa %%xmm2, 32(%%eax) ;\n"
506
+ :
507
+ : "m"(u), "r"(&table[pos * 8]), "m"(t), "m"(sign) /* %0 = u, %1 = table, %2 = t, %3 = sign */
508
+ : "%eax", "%ecx", "%edx", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "cc", "memory"
509
+ );
510
+ }
511
+
512
+ #endif /* defined(ED25519_GCC_32BIT_SSE_CHOOSE) */
513
+