sha3 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of sha3 might be problematic. Click here for more details.

@@ -0,0 +1,1187 @@
1
+ /*
2
+ Code automatically generated by KeccakTools!
3
+
4
+ The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
5
+ Michaël Peeters and Gilles Van Assche. For more information, feedback or
6
+ questions, please refer to our website: http://keccak.noekeon.org/
7
+
8
+ Implementation by the designers,
9
+ hereby denoted as "the implementer".
10
+
11
+ To the extent possible under law, the implementer has waived all copyright
12
+ and related or neighboring rights to the source code in this file.
13
+ http://creativecommons.org/publicdomain/zero/1.0/
14
+ */
15
+
16
+ #define declareABCDE \
17
+ UINT32 Aba0, Abe0, Abi0, Abo0, Abu0; \
18
+ UINT32 Aba1, Abe1, Abi1, Abo1, Abu1; \
19
+ UINT32 Aga0, Age0, Agi0, Ago0, Agu0; \
20
+ UINT32 Aga1, Age1, Agi1, Ago1, Agu1; \
21
+ UINT32 Aka0, Ake0, Aki0, Ako0, Aku0; \
22
+ UINT32 Aka1, Ake1, Aki1, Ako1, Aku1; \
23
+ UINT32 Ama0, Ame0, Ami0, Amo0, Amu0; \
24
+ UINT32 Ama1, Ame1, Ami1, Amo1, Amu1; \
25
+ UINT32 Asa0, Ase0, Asi0, Aso0, Asu0; \
26
+ UINT32 Asa1, Ase1, Asi1, Aso1, Asu1; \
27
+ UINT32 Bba0, Bbe0, Bbi0, Bbo0, Bbu0; \
28
+ UINT32 Bba1, Bbe1, Bbi1, Bbo1, Bbu1; \
29
+ UINT32 Bga0, Bge0, Bgi0, Bgo0, Bgu0; \
30
+ UINT32 Bga1, Bge1, Bgi1, Bgo1, Bgu1; \
31
+ UINT32 Bka0, Bke0, Bki0, Bko0, Bku0; \
32
+ UINT32 Bka1, Bke1, Bki1, Bko1, Bku1; \
33
+ UINT32 Bma0, Bme0, Bmi0, Bmo0, Bmu0; \
34
+ UINT32 Bma1, Bme1, Bmi1, Bmo1, Bmu1; \
35
+ UINT32 Bsa0, Bse0, Bsi0, Bso0, Bsu0; \
36
+ UINT32 Bsa1, Bse1, Bsi1, Bso1, Bsu1; \
37
+ UINT32 Ca0, Ce0, Ci0, Co0, Cu0; \
38
+ UINT32 Ca1, Ce1, Ci1, Co1, Cu1; \
39
+ UINT32 Da0, De0, Di0, Do0, Du0; \
40
+ UINT32 Da1, De1, Di1, Do1, Du1; \
41
+ UINT32 Eba0, Ebe0, Ebi0, Ebo0, Ebu0; \
42
+ UINT32 Eba1, Ebe1, Ebi1, Ebo1, Ebu1; \
43
+ UINT32 Ega0, Ege0, Egi0, Ego0, Egu0; \
44
+ UINT32 Ega1, Ege1, Egi1, Ego1, Egu1; \
45
+ UINT32 Eka0, Eke0, Eki0, Eko0, Eku0; \
46
+ UINT32 Eka1, Eke1, Eki1, Eko1, Eku1; \
47
+ UINT32 Ema0, Eme0, Emi0, Emo0, Emu0; \
48
+ UINT32 Ema1, Eme1, Emi1, Emo1, Emu1; \
49
+ UINT32 Esa0, Ese0, Esi0, Eso0, Esu0; \
50
+ UINT32 Esa1, Ese1, Esi1, Eso1, Esu1; \
51
+
52
+ #define prepareTheta \
53
+ Ca0 = Aba0^Aga0^Aka0^Ama0^Asa0; \
54
+ Ca1 = Aba1^Aga1^Aka1^Ama1^Asa1; \
55
+ Ce0 = Abe0^Age0^Ake0^Ame0^Ase0; \
56
+ Ce1 = Abe1^Age1^Ake1^Ame1^Ase1; \
57
+ Ci0 = Abi0^Agi0^Aki0^Ami0^Asi0; \
58
+ Ci1 = Abi1^Agi1^Aki1^Ami1^Asi1; \
59
+ Co0 = Abo0^Ago0^Ako0^Amo0^Aso0; \
60
+ Co1 = Abo1^Ago1^Ako1^Amo1^Aso1; \
61
+ Cu0 = Abu0^Agu0^Aku0^Amu0^Asu0; \
62
+ Cu1 = Abu1^Agu1^Aku1^Amu1^Asu1; \
63
+
64
+ #ifdef UseBebigokimisa
65
+ // --- Code for round, with prepare-theta (lane complementing pattern 'bebigokimisa')
66
+ // --- using factor 2 interleaving, 64-bit lanes mapped to 32-bit words
67
+ #define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
68
+ Da0 = Cu0^ROL32(Ce1, 1); \
69
+ Da1 = Cu1^Ce0; \
70
+ De0 = Ca0^ROL32(Ci1, 1); \
71
+ De1 = Ca1^Ci0; \
72
+ Di0 = Ce0^ROL32(Co1, 1); \
73
+ Di1 = Ce1^Co0; \
74
+ Do0 = Ci0^ROL32(Cu1, 1); \
75
+ Do1 = Ci1^Cu0; \
76
+ Du0 = Co0^ROL32(Ca1, 1); \
77
+ Du1 = Co1^Ca0; \
78
+ \
79
+ A##ba0 ^= Da0; \
80
+ Bba0 = A##ba0; \
81
+ A##ge0 ^= De0; \
82
+ Bbe0 = ROL32(A##ge0, 22); \
83
+ A##ki1 ^= Di1; \
84
+ Bbi0 = ROL32(A##ki1, 22); \
85
+ A##mo1 ^= Do1; \
86
+ Bbo0 = ROL32(A##mo1, 11); \
87
+ A##su0 ^= Du0; \
88
+ Bbu0 = ROL32(A##su0, 7); \
89
+ E##ba0 = Bba0 ^( Bbe0 | Bbi0 ); \
90
+ E##ba0 ^= KeccakF1600RoundConstants_int2_0[i]; \
91
+ Ca0 = E##ba0; \
92
+ E##be0 = Bbe0 ^((~Bbi0)| Bbo0 ); \
93
+ Ce0 = E##be0; \
94
+ E##bi0 = Bbi0 ^( Bbo0 & Bbu0 ); \
95
+ Ci0 = E##bi0; \
96
+ E##bo0 = Bbo0 ^( Bbu0 | Bba0 ); \
97
+ Co0 = E##bo0; \
98
+ E##bu0 = Bbu0 ^( Bba0 & Bbe0 ); \
99
+ Cu0 = E##bu0; \
100
+ \
101
+ A##ba1 ^= Da1; \
102
+ Bba1 = A##ba1; \
103
+ A##ge1 ^= De1; \
104
+ Bbe1 = ROL32(A##ge1, 22); \
105
+ A##ki0 ^= Di0; \
106
+ Bbi1 = ROL32(A##ki0, 21); \
107
+ A##mo0 ^= Do0; \
108
+ Bbo1 = ROL32(A##mo0, 10); \
109
+ A##su1 ^= Du1; \
110
+ Bbu1 = ROL32(A##su1, 7); \
111
+ E##ba1 = Bba1 ^( Bbe1 | Bbi1 ); \
112
+ E##ba1 ^= KeccakF1600RoundConstants_int2_1[i]; \
113
+ Ca1 = E##ba1; \
114
+ E##be1 = Bbe1 ^((~Bbi1)| Bbo1 ); \
115
+ Ce1 = E##be1; \
116
+ E##bi1 = Bbi1 ^( Bbo1 & Bbu1 ); \
117
+ Ci1 = E##bi1; \
118
+ E##bo1 = Bbo1 ^( Bbu1 | Bba1 ); \
119
+ Co1 = E##bo1; \
120
+ E##bu1 = Bbu1 ^( Bba1 & Bbe1 ); \
121
+ Cu1 = E##bu1; \
122
+ \
123
+ A##bo0 ^= Do0; \
124
+ Bga0 = ROL32(A##bo0, 14); \
125
+ A##gu0 ^= Du0; \
126
+ Bge0 = ROL32(A##gu0, 10); \
127
+ A##ka1 ^= Da1; \
128
+ Bgi0 = ROL32(A##ka1, 2); \
129
+ A##me1 ^= De1; \
130
+ Bgo0 = ROL32(A##me1, 23); \
131
+ A##si1 ^= Di1; \
132
+ Bgu0 = ROL32(A##si1, 31); \
133
+ E##ga0 = Bga0 ^( Bge0 | Bgi0 ); \
134
+ Ca0 ^= E##ga0; \
135
+ E##ge0 = Bge0 ^( Bgi0 & Bgo0 ); \
136
+ Ce0 ^= E##ge0; \
137
+ E##gi0 = Bgi0 ^( Bgo0 |(~Bgu0)); \
138
+ Ci0 ^= E##gi0; \
139
+ E##go0 = Bgo0 ^( Bgu0 | Bga0 ); \
140
+ Co0 ^= E##go0; \
141
+ E##gu0 = Bgu0 ^( Bga0 & Bge0 ); \
142
+ Cu0 ^= E##gu0; \
143
+ \
144
+ A##bo1 ^= Do1; \
145
+ Bga1 = ROL32(A##bo1, 14); \
146
+ A##gu1 ^= Du1; \
147
+ Bge1 = ROL32(A##gu1, 10); \
148
+ A##ka0 ^= Da0; \
149
+ Bgi1 = ROL32(A##ka0, 1); \
150
+ A##me0 ^= De0; \
151
+ Bgo1 = ROL32(A##me0, 22); \
152
+ A##si0 ^= Di0; \
153
+ Bgu1 = ROL32(A##si0, 30); \
154
+ E##ga1 = Bga1 ^( Bge1 | Bgi1 ); \
155
+ Ca1 ^= E##ga1; \
156
+ E##ge1 = Bge1 ^( Bgi1 & Bgo1 ); \
157
+ Ce1 ^= E##ge1; \
158
+ E##gi1 = Bgi1 ^( Bgo1 |(~Bgu1)); \
159
+ Ci1 ^= E##gi1; \
160
+ E##go1 = Bgo1 ^( Bgu1 | Bga1 ); \
161
+ Co1 ^= E##go1; \
162
+ E##gu1 = Bgu1 ^( Bga1 & Bge1 ); \
163
+ Cu1 ^= E##gu1; \
164
+ \
165
+ A##be1 ^= De1; \
166
+ Bka0 = ROL32(A##be1, 1); \
167
+ A##gi0 ^= Di0; \
168
+ Bke0 = ROL32(A##gi0, 3); \
169
+ A##ko1 ^= Do1; \
170
+ Bki0 = ROL32(A##ko1, 13); \
171
+ A##mu0 ^= Du0; \
172
+ Bko0 = ROL32(A##mu0, 4); \
173
+ A##sa0 ^= Da0; \
174
+ Bku0 = ROL32(A##sa0, 9); \
175
+ E##ka0 = Bka0 ^( Bke0 | Bki0 ); \
176
+ Ca0 ^= E##ka0; \
177
+ E##ke0 = Bke0 ^( Bki0 & Bko0 ); \
178
+ Ce0 ^= E##ke0; \
179
+ E##ki0 = Bki0 ^((~Bko0)& Bku0 ); \
180
+ Ci0 ^= E##ki0; \
181
+ E##ko0 = (~Bko0)^( Bku0 | Bka0 ); \
182
+ Co0 ^= E##ko0; \
183
+ E##ku0 = Bku0 ^( Bka0 & Bke0 ); \
184
+ Cu0 ^= E##ku0; \
185
+ \
186
+ A##be0 ^= De0; \
187
+ Bka1 = A##be0; \
188
+ A##gi1 ^= Di1; \
189
+ Bke1 = ROL32(A##gi1, 3); \
190
+ A##ko0 ^= Do0; \
191
+ Bki1 = ROL32(A##ko0, 12); \
192
+ A##mu1 ^= Du1; \
193
+ Bko1 = ROL32(A##mu1, 4); \
194
+ A##sa1 ^= Da1; \
195
+ Bku1 = ROL32(A##sa1, 9); \
196
+ E##ka1 = Bka1 ^( Bke1 | Bki1 ); \
197
+ Ca1 ^= E##ka1; \
198
+ E##ke1 = Bke1 ^( Bki1 & Bko1 ); \
199
+ Ce1 ^= E##ke1; \
200
+ E##ki1 = Bki1 ^((~Bko1)& Bku1 ); \
201
+ Ci1 ^= E##ki1; \
202
+ E##ko1 = (~Bko1)^( Bku1 | Bka1 ); \
203
+ Co1 ^= E##ko1; \
204
+ E##ku1 = Bku1 ^( Bka1 & Bke1 ); \
205
+ Cu1 ^= E##ku1; \
206
+ \
207
+ A##bu1 ^= Du1; \
208
+ Bma0 = ROL32(A##bu1, 14); \
209
+ A##ga0 ^= Da0; \
210
+ Bme0 = ROL32(A##ga0, 18); \
211
+ A##ke0 ^= De0; \
212
+ Bmi0 = ROL32(A##ke0, 5); \
213
+ A##mi1 ^= Di1; \
214
+ Bmo0 = ROL32(A##mi1, 8); \
215
+ A##so0 ^= Do0; \
216
+ Bmu0 = ROL32(A##so0, 28); \
217
+ E##ma0 = Bma0 ^( Bme0 & Bmi0 ); \
218
+ Ca0 ^= E##ma0; \
219
+ E##me0 = Bme0 ^( Bmi0 | Bmo0 ); \
220
+ Ce0 ^= E##me0; \
221
+ E##mi0 = Bmi0 ^((~Bmo0)| Bmu0 ); \
222
+ Ci0 ^= E##mi0; \
223
+ E##mo0 = (~Bmo0)^( Bmu0 & Bma0 ); \
224
+ Co0 ^= E##mo0; \
225
+ E##mu0 = Bmu0 ^( Bma0 | Bme0 ); \
226
+ Cu0 ^= E##mu0; \
227
+ \
228
+ A##bu0 ^= Du0; \
229
+ Bma1 = ROL32(A##bu0, 13); \
230
+ A##ga1 ^= Da1; \
231
+ Bme1 = ROL32(A##ga1, 18); \
232
+ A##ke1 ^= De1; \
233
+ Bmi1 = ROL32(A##ke1, 5); \
234
+ A##mi0 ^= Di0; \
235
+ Bmo1 = ROL32(A##mi0, 7); \
236
+ A##so1 ^= Do1; \
237
+ Bmu1 = ROL32(A##so1, 28); \
238
+ E##ma1 = Bma1 ^( Bme1 & Bmi1 ); \
239
+ Ca1 ^= E##ma1; \
240
+ E##me1 = Bme1 ^( Bmi1 | Bmo1 ); \
241
+ Ce1 ^= E##me1; \
242
+ E##mi1 = Bmi1 ^((~Bmo1)| Bmu1 ); \
243
+ Ci1 ^= E##mi1; \
244
+ E##mo1 = (~Bmo1)^( Bmu1 & Bma1 ); \
245
+ Co1 ^= E##mo1; \
246
+ E##mu1 = Bmu1 ^( Bma1 | Bme1 ); \
247
+ Cu1 ^= E##mu1; \
248
+ \
249
+ A##bi0 ^= Di0; \
250
+ Bsa0 = ROL32(A##bi0, 31); \
251
+ A##go1 ^= Do1; \
252
+ Bse0 = ROL32(A##go1, 28); \
253
+ A##ku1 ^= Du1; \
254
+ Bsi0 = ROL32(A##ku1, 20); \
255
+ A##ma1 ^= Da1; \
256
+ Bso0 = ROL32(A##ma1, 21); \
257
+ A##se0 ^= De0; \
258
+ Bsu0 = ROL32(A##se0, 1); \
259
+ E##sa0 = Bsa0 ^((~Bse0)& Bsi0 ); \
260
+ Ca0 ^= E##sa0; \
261
+ E##se0 = (~Bse0)^( Bsi0 | Bso0 ); \
262
+ Ce0 ^= E##se0; \
263
+ E##si0 = Bsi0 ^( Bso0 & Bsu0 ); \
264
+ Ci0 ^= E##si0; \
265
+ E##so0 = Bso0 ^( Bsu0 | Bsa0 ); \
266
+ Co0 ^= E##so0; \
267
+ E##su0 = Bsu0 ^( Bsa0 & Bse0 ); \
268
+ Cu0 ^= E##su0; \
269
+ \
270
+ A##bi1 ^= Di1; \
271
+ Bsa1 = ROL32(A##bi1, 31); \
272
+ A##go0 ^= Do0; \
273
+ Bse1 = ROL32(A##go0, 27); \
274
+ A##ku0 ^= Du0; \
275
+ Bsi1 = ROL32(A##ku0, 19); \
276
+ A##ma0 ^= Da0; \
277
+ Bso1 = ROL32(A##ma0, 20); \
278
+ A##se1 ^= De1; \
279
+ Bsu1 = ROL32(A##se1, 1); \
280
+ E##sa1 = Bsa1 ^((~Bse1)& Bsi1 ); \
281
+ Ca1 ^= E##sa1; \
282
+ E##se1 = (~Bse1)^( Bsi1 | Bso1 ); \
283
+ Ce1 ^= E##se1; \
284
+ E##si1 = Bsi1 ^( Bso1 & Bsu1 ); \
285
+ Ci1 ^= E##si1; \
286
+ E##so1 = Bso1 ^( Bsu1 | Bsa1 ); \
287
+ Co1 ^= E##so1; \
288
+ E##su1 = Bsu1 ^( Bsa1 & Bse1 ); \
289
+ Cu1 ^= E##su1; \
290
+ \
291
+
292
+ // --- Code for round (lane complementing pattern 'bebigokimisa')
293
+ // --- using factor 2 interleaving, 64-bit lanes mapped to 32-bit words
294
+ #define thetaRhoPiChiIota(i, A, E) \
295
+ Da0 = Cu0^ROL32(Ce1, 1); \
296
+ Da1 = Cu1^Ce0; \
297
+ De0 = Ca0^ROL32(Ci1, 1); \
298
+ De1 = Ca1^Ci0; \
299
+ Di0 = Ce0^ROL32(Co1, 1); \
300
+ Di1 = Ce1^Co0; \
301
+ Do0 = Ci0^ROL32(Cu1, 1); \
302
+ Do1 = Ci1^Cu0; \
303
+ Du0 = Co0^ROL32(Ca1, 1); \
304
+ Du1 = Co1^Ca0; \
305
+ \
306
+ A##ba0 ^= Da0; \
307
+ Bba0 = A##ba0; \
308
+ A##ge0 ^= De0; \
309
+ Bbe0 = ROL32(A##ge0, 22); \
310
+ A##ki1 ^= Di1; \
311
+ Bbi0 = ROL32(A##ki1, 22); \
312
+ A##mo1 ^= Do1; \
313
+ Bbo0 = ROL32(A##mo1, 11); \
314
+ A##su0 ^= Du0; \
315
+ Bbu0 = ROL32(A##su0, 7); \
316
+ E##ba0 = Bba0 ^( Bbe0 | Bbi0 ); \
317
+ E##ba0 ^= KeccakF1600RoundConstants_int2_0[i]; \
318
+ E##be0 = Bbe0 ^((~Bbi0)| Bbo0 ); \
319
+ E##bi0 = Bbi0 ^( Bbo0 & Bbu0 ); \
320
+ E##bo0 = Bbo0 ^( Bbu0 | Bba0 ); \
321
+ E##bu0 = Bbu0 ^( Bba0 & Bbe0 ); \
322
+ \
323
+ A##ba1 ^= Da1; \
324
+ Bba1 = A##ba1; \
325
+ A##ge1 ^= De1; \
326
+ Bbe1 = ROL32(A##ge1, 22); \
327
+ A##ki0 ^= Di0; \
328
+ Bbi1 = ROL32(A##ki0, 21); \
329
+ A##mo0 ^= Do0; \
330
+ Bbo1 = ROL32(A##mo0, 10); \
331
+ A##su1 ^= Du1; \
332
+ Bbu1 = ROL32(A##su1, 7); \
333
+ E##ba1 = Bba1 ^( Bbe1 | Bbi1 ); \
334
+ E##ba1 ^= KeccakF1600RoundConstants_int2_1[i]; \
335
+ E##be1 = Bbe1 ^((~Bbi1)| Bbo1 ); \
336
+ E##bi1 = Bbi1 ^( Bbo1 & Bbu1 ); \
337
+ E##bo1 = Bbo1 ^( Bbu1 | Bba1 ); \
338
+ E##bu1 = Bbu1 ^( Bba1 & Bbe1 ); \
339
+ \
340
+ A##bo0 ^= Do0; \
341
+ Bga0 = ROL32(A##bo0, 14); \
342
+ A##gu0 ^= Du0; \
343
+ Bge0 = ROL32(A##gu0, 10); \
344
+ A##ka1 ^= Da1; \
345
+ Bgi0 = ROL32(A##ka1, 2); \
346
+ A##me1 ^= De1; \
347
+ Bgo0 = ROL32(A##me1, 23); \
348
+ A##si1 ^= Di1; \
349
+ Bgu0 = ROL32(A##si1, 31); \
350
+ E##ga0 = Bga0 ^( Bge0 | Bgi0 ); \
351
+ E##ge0 = Bge0 ^( Bgi0 & Bgo0 ); \
352
+ E##gi0 = Bgi0 ^( Bgo0 |(~Bgu0)); \
353
+ E##go0 = Bgo0 ^( Bgu0 | Bga0 ); \
354
+ E##gu0 = Bgu0 ^( Bga0 & Bge0 ); \
355
+ \
356
+ A##bo1 ^= Do1; \
357
+ Bga1 = ROL32(A##bo1, 14); \
358
+ A##gu1 ^= Du1; \
359
+ Bge1 = ROL32(A##gu1, 10); \
360
+ A##ka0 ^= Da0; \
361
+ Bgi1 = ROL32(A##ka0, 1); \
362
+ A##me0 ^= De0; \
363
+ Bgo1 = ROL32(A##me0, 22); \
364
+ A##si0 ^= Di0; \
365
+ Bgu1 = ROL32(A##si0, 30); \
366
+ E##ga1 = Bga1 ^( Bge1 | Bgi1 ); \
367
+ E##ge1 = Bge1 ^( Bgi1 & Bgo1 ); \
368
+ E##gi1 = Bgi1 ^( Bgo1 |(~Bgu1)); \
369
+ E##go1 = Bgo1 ^( Bgu1 | Bga1 ); \
370
+ E##gu1 = Bgu1 ^( Bga1 & Bge1 ); \
371
+ \
372
+ A##be1 ^= De1; \
373
+ Bka0 = ROL32(A##be1, 1); \
374
+ A##gi0 ^= Di0; \
375
+ Bke0 = ROL32(A##gi0, 3); \
376
+ A##ko1 ^= Do1; \
377
+ Bki0 = ROL32(A##ko1, 13); \
378
+ A##mu0 ^= Du0; \
379
+ Bko0 = ROL32(A##mu0, 4); \
380
+ A##sa0 ^= Da0; \
381
+ Bku0 = ROL32(A##sa0, 9); \
382
+ E##ka0 = Bka0 ^( Bke0 | Bki0 ); \
383
+ E##ke0 = Bke0 ^( Bki0 & Bko0 ); \
384
+ E##ki0 = Bki0 ^((~Bko0)& Bku0 ); \
385
+ E##ko0 = (~Bko0)^( Bku0 | Bka0 ); \
386
+ E##ku0 = Bku0 ^( Bka0 & Bke0 ); \
387
+ \
388
+ A##be0 ^= De0; \
389
+ Bka1 = A##be0; \
390
+ A##gi1 ^= Di1; \
391
+ Bke1 = ROL32(A##gi1, 3); \
392
+ A##ko0 ^= Do0; \
393
+ Bki1 = ROL32(A##ko0, 12); \
394
+ A##mu1 ^= Du1; \
395
+ Bko1 = ROL32(A##mu1, 4); \
396
+ A##sa1 ^= Da1; \
397
+ Bku1 = ROL32(A##sa1, 9); \
398
+ E##ka1 = Bka1 ^( Bke1 | Bki1 ); \
399
+ E##ke1 = Bke1 ^( Bki1 & Bko1 ); \
400
+ E##ki1 = Bki1 ^((~Bko1)& Bku1 ); \
401
+ E##ko1 = (~Bko1)^( Bku1 | Bka1 ); \
402
+ E##ku1 = Bku1 ^( Bka1 & Bke1 ); \
403
+ \
404
+ A##bu1 ^= Du1; \
405
+ Bma0 = ROL32(A##bu1, 14); \
406
+ A##ga0 ^= Da0; \
407
+ Bme0 = ROL32(A##ga0, 18); \
408
+ A##ke0 ^= De0; \
409
+ Bmi0 = ROL32(A##ke0, 5); \
410
+ A##mi1 ^= Di1; \
411
+ Bmo0 = ROL32(A##mi1, 8); \
412
+ A##so0 ^= Do0; \
413
+ Bmu0 = ROL32(A##so0, 28); \
414
+ E##ma0 = Bma0 ^( Bme0 & Bmi0 ); \
415
+ E##me0 = Bme0 ^( Bmi0 | Bmo0 ); \
416
+ E##mi0 = Bmi0 ^((~Bmo0)| Bmu0 ); \
417
+ E##mo0 = (~Bmo0)^( Bmu0 & Bma0 ); \
418
+ E##mu0 = Bmu0 ^( Bma0 | Bme0 ); \
419
+ \
420
+ A##bu0 ^= Du0; \
421
+ Bma1 = ROL32(A##bu0, 13); \
422
+ A##ga1 ^= Da1; \
423
+ Bme1 = ROL32(A##ga1, 18); \
424
+ A##ke1 ^= De1; \
425
+ Bmi1 = ROL32(A##ke1, 5); \
426
+ A##mi0 ^= Di0; \
427
+ Bmo1 = ROL32(A##mi0, 7); \
428
+ A##so1 ^= Do1; \
429
+ Bmu1 = ROL32(A##so1, 28); \
430
+ E##ma1 = Bma1 ^( Bme1 & Bmi1 ); \
431
+ E##me1 = Bme1 ^( Bmi1 | Bmo1 ); \
432
+ E##mi1 = Bmi1 ^((~Bmo1)| Bmu1 ); \
433
+ E##mo1 = (~Bmo1)^( Bmu1 & Bma1 ); \
434
+ E##mu1 = Bmu1 ^( Bma1 | Bme1 ); \
435
+ \
436
+ A##bi0 ^= Di0; \
437
+ Bsa0 = ROL32(A##bi0, 31); \
438
+ A##go1 ^= Do1; \
439
+ Bse0 = ROL32(A##go1, 28); \
440
+ A##ku1 ^= Du1; \
441
+ Bsi0 = ROL32(A##ku1, 20); \
442
+ A##ma1 ^= Da1; \
443
+ Bso0 = ROL32(A##ma1, 21); \
444
+ A##se0 ^= De0; \
445
+ Bsu0 = ROL32(A##se0, 1); \
446
+ E##sa0 = Bsa0 ^((~Bse0)& Bsi0 ); \
447
+ E##se0 = (~Bse0)^( Bsi0 | Bso0 ); \
448
+ E##si0 = Bsi0 ^( Bso0 & Bsu0 ); \
449
+ E##so0 = Bso0 ^( Bsu0 | Bsa0 ); \
450
+ E##su0 = Bsu0 ^( Bsa0 & Bse0 ); \
451
+ \
452
+ A##bi1 ^= Di1; \
453
+ Bsa1 = ROL32(A##bi1, 31); \
454
+ A##go0 ^= Do0; \
455
+ Bse1 = ROL32(A##go0, 27); \
456
+ A##ku0 ^= Du0; \
457
+ Bsi1 = ROL32(A##ku0, 19); \
458
+ A##ma0 ^= Da0; \
459
+ Bso1 = ROL32(A##ma0, 20); \
460
+ A##se1 ^= De1; \
461
+ Bsu1 = ROL32(A##se1, 1); \
462
+ E##sa1 = Bsa1 ^((~Bse1)& Bsi1 ); \
463
+ E##se1 = (~Bse1)^( Bsi1 | Bso1 ); \
464
+ E##si1 = Bsi1 ^( Bso1 & Bsu1 ); \
465
+ E##so1 = Bso1 ^( Bsu1 | Bsa1 ); \
466
+ E##su1 = Bsu1 ^( Bsa1 & Bse1 ); \
467
+ \
468
+
469
+ #else // UseBebigokimisa
470
+ // --- Code for round, with prepare-theta
471
+ // --- using factor 2 interleaving, 64-bit lanes mapped to 32-bit words
472
+ #define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
473
+ Da0 = Cu0^ROL32(Ce1, 1); \
474
+ Da1 = Cu1^Ce0; \
475
+ De0 = Ca0^ROL32(Ci1, 1); \
476
+ De1 = Ca1^Ci0; \
477
+ Di0 = Ce0^ROL32(Co1, 1); \
478
+ Di1 = Ce1^Co0; \
479
+ Do0 = Ci0^ROL32(Cu1, 1); \
480
+ Do1 = Ci1^Cu0; \
481
+ Du0 = Co0^ROL32(Ca1, 1); \
482
+ Du1 = Co1^Ca0; \
483
+ \
484
+ A##ba0 ^= Da0; \
485
+ Bba0 = A##ba0; \
486
+ A##ge0 ^= De0; \
487
+ Bbe0 = ROL32(A##ge0, 22); \
488
+ A##ki1 ^= Di1; \
489
+ Bbi0 = ROL32(A##ki1, 22); \
490
+ A##mo1 ^= Do1; \
491
+ Bbo0 = ROL32(A##mo1, 11); \
492
+ A##su0 ^= Du0; \
493
+ Bbu0 = ROL32(A##su0, 7); \
494
+ E##ba0 = Bba0 ^((~Bbe0)& Bbi0 ); \
495
+ E##ba0 ^= KeccakF1600RoundConstants_int2_0[i]; \
496
+ Ca0 = E##ba0; \
497
+ E##be0 = Bbe0 ^((~Bbi0)& Bbo0 ); \
498
+ Ce0 = E##be0; \
499
+ E##bi0 = Bbi0 ^((~Bbo0)& Bbu0 ); \
500
+ Ci0 = E##bi0; \
501
+ E##bo0 = Bbo0 ^((~Bbu0)& Bba0 ); \
502
+ Co0 = E##bo0; \
503
+ E##bu0 = Bbu0 ^((~Bba0)& Bbe0 ); \
504
+ Cu0 = E##bu0; \
505
+ \
506
+ A##ba1 ^= Da1; \
507
+ Bba1 = A##ba1; \
508
+ A##ge1 ^= De1; \
509
+ Bbe1 = ROL32(A##ge1, 22); \
510
+ A##ki0 ^= Di0; \
511
+ Bbi1 = ROL32(A##ki0, 21); \
512
+ A##mo0 ^= Do0; \
513
+ Bbo1 = ROL32(A##mo0, 10); \
514
+ A##su1 ^= Du1; \
515
+ Bbu1 = ROL32(A##su1, 7); \
516
+ E##ba1 = Bba1 ^((~Bbe1)& Bbi1 ); \
517
+ E##ba1 ^= KeccakF1600RoundConstants_int2_1[i]; \
518
+ Ca1 = E##ba1; \
519
+ E##be1 = Bbe1 ^((~Bbi1)& Bbo1 ); \
520
+ Ce1 = E##be1; \
521
+ E##bi1 = Bbi1 ^((~Bbo1)& Bbu1 ); \
522
+ Ci1 = E##bi1; \
523
+ E##bo1 = Bbo1 ^((~Bbu1)& Bba1 ); \
524
+ Co1 = E##bo1; \
525
+ E##bu1 = Bbu1 ^((~Bba1)& Bbe1 ); \
526
+ Cu1 = E##bu1; \
527
+ \
528
+ A##bo0 ^= Do0; \
529
+ Bga0 = ROL32(A##bo0, 14); \
530
+ A##gu0 ^= Du0; \
531
+ Bge0 = ROL32(A##gu0, 10); \
532
+ A##ka1 ^= Da1; \
533
+ Bgi0 = ROL32(A##ka1, 2); \
534
+ A##me1 ^= De1; \
535
+ Bgo0 = ROL32(A##me1, 23); \
536
+ A##si1 ^= Di1; \
537
+ Bgu0 = ROL32(A##si1, 31); \
538
+ E##ga0 = Bga0 ^((~Bge0)& Bgi0 ); \
539
+ Ca0 ^= E##ga0; \
540
+ E##ge0 = Bge0 ^((~Bgi0)& Bgo0 ); \
541
+ Ce0 ^= E##ge0; \
542
+ E##gi0 = Bgi0 ^((~Bgo0)& Bgu0 ); \
543
+ Ci0 ^= E##gi0; \
544
+ E##go0 = Bgo0 ^((~Bgu0)& Bga0 ); \
545
+ Co0 ^= E##go0; \
546
+ E##gu0 = Bgu0 ^((~Bga0)& Bge0 ); \
547
+ Cu0 ^= E##gu0; \
548
+ \
549
+ A##bo1 ^= Do1; \
550
+ Bga1 = ROL32(A##bo1, 14); \
551
+ A##gu1 ^= Du1; \
552
+ Bge1 = ROL32(A##gu1, 10); \
553
+ A##ka0 ^= Da0; \
554
+ Bgi1 = ROL32(A##ka0, 1); \
555
+ A##me0 ^= De0; \
556
+ Bgo1 = ROL32(A##me0, 22); \
557
+ A##si0 ^= Di0; \
558
+ Bgu1 = ROL32(A##si0, 30); \
559
+ E##ga1 = Bga1 ^((~Bge1)& Bgi1 ); \
560
+ Ca1 ^= E##ga1; \
561
+ E##ge1 = Bge1 ^((~Bgi1)& Bgo1 ); \
562
+ Ce1 ^= E##ge1; \
563
+ E##gi1 = Bgi1 ^((~Bgo1)& Bgu1 ); \
564
+ Ci1 ^= E##gi1; \
565
+ E##go1 = Bgo1 ^((~Bgu1)& Bga1 ); \
566
+ Co1 ^= E##go1; \
567
+ E##gu1 = Bgu1 ^((~Bga1)& Bge1 ); \
568
+ Cu1 ^= E##gu1; \
569
+ \
570
+ A##be1 ^= De1; \
571
+ Bka0 = ROL32(A##be1, 1); \
572
+ A##gi0 ^= Di0; \
573
+ Bke0 = ROL32(A##gi0, 3); \
574
+ A##ko1 ^= Do1; \
575
+ Bki0 = ROL32(A##ko1, 13); \
576
+ A##mu0 ^= Du0; \
577
+ Bko0 = ROL32(A##mu0, 4); \
578
+ A##sa0 ^= Da0; \
579
+ Bku0 = ROL32(A##sa0, 9); \
580
+ E##ka0 = Bka0 ^((~Bke0)& Bki0 ); \
581
+ Ca0 ^= E##ka0; \
582
+ E##ke0 = Bke0 ^((~Bki0)& Bko0 ); \
583
+ Ce0 ^= E##ke0; \
584
+ E##ki0 = Bki0 ^((~Bko0)& Bku0 ); \
585
+ Ci0 ^= E##ki0; \
586
+ E##ko0 = Bko0 ^((~Bku0)& Bka0 ); \
587
+ Co0 ^= E##ko0; \
588
+ E##ku0 = Bku0 ^((~Bka0)& Bke0 ); \
589
+ Cu0 ^= E##ku0; \
590
+ \
591
+ A##be0 ^= De0; \
592
+ Bka1 = A##be0; \
593
+ A##gi1 ^= Di1; \
594
+ Bke1 = ROL32(A##gi1, 3); \
595
+ A##ko0 ^= Do0; \
596
+ Bki1 = ROL32(A##ko0, 12); \
597
+ A##mu1 ^= Du1; \
598
+ Bko1 = ROL32(A##mu1, 4); \
599
+ A##sa1 ^= Da1; \
600
+ Bku1 = ROL32(A##sa1, 9); \
601
+ E##ka1 = Bka1 ^((~Bke1)& Bki1 ); \
602
+ Ca1 ^= E##ka1; \
603
+ E##ke1 = Bke1 ^((~Bki1)& Bko1 ); \
604
+ Ce1 ^= E##ke1; \
605
+ E##ki1 = Bki1 ^((~Bko1)& Bku1 ); \
606
+ Ci1 ^= E##ki1; \
607
+ E##ko1 = Bko1 ^((~Bku1)& Bka1 ); \
608
+ Co1 ^= E##ko1; \
609
+ E##ku1 = Bku1 ^((~Bka1)& Bke1 ); \
610
+ Cu1 ^= E##ku1; \
611
+ \
612
+ A##bu1 ^= Du1; \
613
+ Bma0 = ROL32(A##bu1, 14); \
614
+ A##ga0 ^= Da0; \
615
+ Bme0 = ROL32(A##ga0, 18); \
616
+ A##ke0 ^= De0; \
617
+ Bmi0 = ROL32(A##ke0, 5); \
618
+ A##mi1 ^= Di1; \
619
+ Bmo0 = ROL32(A##mi1, 8); \
620
+ A##so0 ^= Do0; \
621
+ Bmu0 = ROL32(A##so0, 28); \
622
+ E##ma0 = Bma0 ^((~Bme0)& Bmi0 ); \
623
+ Ca0 ^= E##ma0; \
624
+ E##me0 = Bme0 ^((~Bmi0)& Bmo0 ); \
625
+ Ce0 ^= E##me0; \
626
+ E##mi0 = Bmi0 ^((~Bmo0)& Bmu0 ); \
627
+ Ci0 ^= E##mi0; \
628
+ E##mo0 = Bmo0 ^((~Bmu0)& Bma0 ); \
629
+ Co0 ^= E##mo0; \
630
+ E##mu0 = Bmu0 ^((~Bma0)& Bme0 ); \
631
+ Cu0 ^= E##mu0; \
632
+ \
633
+ A##bu0 ^= Du0; \
634
+ Bma1 = ROL32(A##bu0, 13); \
635
+ A##ga1 ^= Da1; \
636
+ Bme1 = ROL32(A##ga1, 18); \
637
+ A##ke1 ^= De1; \
638
+ Bmi1 = ROL32(A##ke1, 5); \
639
+ A##mi0 ^= Di0; \
640
+ Bmo1 = ROL32(A##mi0, 7); \
641
+ A##so1 ^= Do1; \
642
+ Bmu1 = ROL32(A##so1, 28); \
643
+ E##ma1 = Bma1 ^((~Bme1)& Bmi1 ); \
644
+ Ca1 ^= E##ma1; \
645
+ E##me1 = Bme1 ^((~Bmi1)& Bmo1 ); \
646
+ Ce1 ^= E##me1; \
647
+ E##mi1 = Bmi1 ^((~Bmo1)& Bmu1 ); \
648
+ Ci1 ^= E##mi1; \
649
+ E##mo1 = Bmo1 ^((~Bmu1)& Bma1 ); \
650
+ Co1 ^= E##mo1; \
651
+ E##mu1 = Bmu1 ^((~Bma1)& Bme1 ); \
652
+ Cu1 ^= E##mu1; \
653
+ \
654
+ A##bi0 ^= Di0; \
655
+ Bsa0 = ROL32(A##bi0, 31); \
656
+ A##go1 ^= Do1; \
657
+ Bse0 = ROL32(A##go1, 28); \
658
+ A##ku1 ^= Du1; \
659
+ Bsi0 = ROL32(A##ku1, 20); \
660
+ A##ma1 ^= Da1; \
661
+ Bso0 = ROL32(A##ma1, 21); \
662
+ A##se0 ^= De0; \
663
+ Bsu0 = ROL32(A##se0, 1); \
664
+ E##sa0 = Bsa0 ^((~Bse0)& Bsi0 ); \
665
+ Ca0 ^= E##sa0; \
666
+ E##se0 = Bse0 ^((~Bsi0)& Bso0 ); \
667
+ Ce0 ^= E##se0; \
668
+ E##si0 = Bsi0 ^((~Bso0)& Bsu0 ); \
669
+ Ci0 ^= E##si0; \
670
+ E##so0 = Bso0 ^((~Bsu0)& Bsa0 ); \
671
+ Co0 ^= E##so0; \
672
+ E##su0 = Bsu0 ^((~Bsa0)& Bse0 ); \
673
+ Cu0 ^= E##su0; \
674
+ \
675
+ A##bi1 ^= Di1; \
676
+ Bsa1 = ROL32(A##bi1, 31); \
677
+ A##go0 ^= Do0; \
678
+ Bse1 = ROL32(A##go0, 27); \
679
+ A##ku0 ^= Du0; \
680
+ Bsi1 = ROL32(A##ku0, 19); \
681
+ A##ma0 ^= Da0; \
682
+ Bso1 = ROL32(A##ma0, 20); \
683
+ A##se1 ^= De1; \
684
+ Bsu1 = ROL32(A##se1, 1); \
685
+ E##sa1 = Bsa1 ^((~Bse1)& Bsi1 ); \
686
+ Ca1 ^= E##sa1; \
687
+ E##se1 = Bse1 ^((~Bsi1)& Bso1 ); \
688
+ Ce1 ^= E##se1; \
689
+ E##si1 = Bsi1 ^((~Bso1)& Bsu1 ); \
690
+ Ci1 ^= E##si1; \
691
+ E##so1 = Bso1 ^((~Bsu1)& Bsa1 ); \
692
+ Co1 ^= E##so1; \
693
+ E##su1 = Bsu1 ^((~Bsa1)& Bse1 ); \
694
+ Cu1 ^= E##su1; \
695
+ \
696
+
697
+ // --- Code for round
698
+ // --- using factor 2 interleaving, 64-bit lanes mapped to 32-bit words
699
+ #define thetaRhoPiChiIota(i, A, E) \
700
+ Da0 = Cu0^ROL32(Ce1, 1); \
701
+ Da1 = Cu1^Ce0; \
702
+ De0 = Ca0^ROL32(Ci1, 1); \
703
+ De1 = Ca1^Ci0; \
704
+ Di0 = Ce0^ROL32(Co1, 1); \
705
+ Di1 = Ce1^Co0; \
706
+ Do0 = Ci0^ROL32(Cu1, 1); \
707
+ Do1 = Ci1^Cu0; \
708
+ Du0 = Co0^ROL32(Ca1, 1); \
709
+ Du1 = Co1^Ca0; \
710
+ \
711
+ A##ba0 ^= Da0; \
712
+ Bba0 = A##ba0; \
713
+ A##ge0 ^= De0; \
714
+ Bbe0 = ROL32(A##ge0, 22); \
715
+ A##ki1 ^= Di1; \
716
+ Bbi0 = ROL32(A##ki1, 22); \
717
+ A##mo1 ^= Do1; \
718
+ Bbo0 = ROL32(A##mo1, 11); \
719
+ A##su0 ^= Du0; \
720
+ Bbu0 = ROL32(A##su0, 7); \
721
+ E##ba0 = Bba0 ^((~Bbe0)& Bbi0 ); \
722
+ E##ba0 ^= KeccakF1600RoundConstants_int2_0[i]; \
723
+ E##be0 = Bbe0 ^((~Bbi0)& Bbo0 ); \
724
+ E##bi0 = Bbi0 ^((~Bbo0)& Bbu0 ); \
725
+ E##bo0 = Bbo0 ^((~Bbu0)& Bba0 ); \
726
+ E##bu0 = Bbu0 ^((~Bba0)& Bbe0 ); \
727
+ \
728
+ A##ba1 ^= Da1; \
729
+ Bba1 = A##ba1; \
730
+ A##ge1 ^= De1; \
731
+ Bbe1 = ROL32(A##ge1, 22); \
732
+ A##ki0 ^= Di0; \
733
+ Bbi1 = ROL32(A##ki0, 21); \
734
+ A##mo0 ^= Do0; \
735
+ Bbo1 = ROL32(A##mo0, 10); \
736
+ A##su1 ^= Du1; \
737
+ Bbu1 = ROL32(A##su1, 7); \
738
+ E##ba1 = Bba1 ^((~Bbe1)& Bbi1 ); \
739
+ E##ba1 ^= KeccakF1600RoundConstants_int2_1[i]; \
740
+ E##be1 = Bbe1 ^((~Bbi1)& Bbo1 ); \
741
+ E##bi1 = Bbi1 ^((~Bbo1)& Bbu1 ); \
742
+ E##bo1 = Bbo1 ^((~Bbu1)& Bba1 ); \
743
+ E##bu1 = Bbu1 ^((~Bba1)& Bbe1 ); \
744
+ \
745
+ A##bo0 ^= Do0; \
746
+ Bga0 = ROL32(A##bo0, 14); \
747
+ A##gu0 ^= Du0; \
748
+ Bge0 = ROL32(A##gu0, 10); \
749
+ A##ka1 ^= Da1; \
750
+ Bgi0 = ROL32(A##ka1, 2); \
751
+ A##me1 ^= De1; \
752
+ Bgo0 = ROL32(A##me1, 23); \
753
+ A##si1 ^= Di1; \
754
+ Bgu0 = ROL32(A##si1, 31); \
755
+ E##ga0 = Bga0 ^((~Bge0)& Bgi0 ); \
756
+ E##ge0 = Bge0 ^((~Bgi0)& Bgo0 ); \
757
+ E##gi0 = Bgi0 ^((~Bgo0)& Bgu0 ); \
758
+ E##go0 = Bgo0 ^((~Bgu0)& Bga0 ); \
759
+ E##gu0 = Bgu0 ^((~Bga0)& Bge0 ); \
760
+ \
761
+ A##bo1 ^= Do1; \
762
+ Bga1 = ROL32(A##bo1, 14); \
763
+ A##gu1 ^= Du1; \
764
+ Bge1 = ROL32(A##gu1, 10); \
765
+ A##ka0 ^= Da0; \
766
+ Bgi1 = ROL32(A##ka0, 1); \
767
+ A##me0 ^= De0; \
768
+ Bgo1 = ROL32(A##me0, 22); \
769
+ A##si0 ^= Di0; \
770
+ Bgu1 = ROL32(A##si0, 30); \
771
+ E##ga1 = Bga1 ^((~Bge1)& Bgi1 ); \
772
+ E##ge1 = Bge1 ^((~Bgi1)& Bgo1 ); \
773
+ E##gi1 = Bgi1 ^((~Bgo1)& Bgu1 ); \
774
+ E##go1 = Bgo1 ^((~Bgu1)& Bga1 ); \
775
+ E##gu1 = Bgu1 ^((~Bga1)& Bge1 ); \
776
+ \
777
+ A##be1 ^= De1; \
778
+ Bka0 = ROL32(A##be1, 1); \
779
+ A##gi0 ^= Di0; \
780
+ Bke0 = ROL32(A##gi0, 3); \
781
+ A##ko1 ^= Do1; \
782
+ Bki0 = ROL32(A##ko1, 13); \
783
+ A##mu0 ^= Du0; \
784
+ Bko0 = ROL32(A##mu0, 4); \
785
+ A##sa0 ^= Da0; \
786
+ Bku0 = ROL32(A##sa0, 9); \
787
+ E##ka0 = Bka0 ^((~Bke0)& Bki0 ); \
788
+ E##ke0 = Bke0 ^((~Bki0)& Bko0 ); \
789
+ E##ki0 = Bki0 ^((~Bko0)& Bku0 ); \
790
+ E##ko0 = Bko0 ^((~Bku0)& Bka0 ); \
791
+ E##ku0 = Bku0 ^((~Bka0)& Bke0 ); \
792
+ \
793
+ A##be0 ^= De0; \
794
+ Bka1 = A##be0; \
795
+ A##gi1 ^= Di1; \
796
+ Bke1 = ROL32(A##gi1, 3); \
797
+ A##ko0 ^= Do0; \
798
+ Bki1 = ROL32(A##ko0, 12); \
799
+ A##mu1 ^= Du1; \
800
+ Bko1 = ROL32(A##mu1, 4); \
801
+ A##sa1 ^= Da1; \
802
+ Bku1 = ROL32(A##sa1, 9); \
803
+ E##ka1 = Bka1 ^((~Bke1)& Bki1 ); \
804
+ E##ke1 = Bke1 ^((~Bki1)& Bko1 ); \
805
+ E##ki1 = Bki1 ^((~Bko1)& Bku1 ); \
806
+ E##ko1 = Bko1 ^((~Bku1)& Bka1 ); \
807
+ E##ku1 = Bku1 ^((~Bka1)& Bke1 ); \
808
+ \
809
+ A##bu1 ^= Du1; \
810
+ Bma0 = ROL32(A##bu1, 14); \
811
+ A##ga0 ^= Da0; \
812
+ Bme0 = ROL32(A##ga0, 18); \
813
+ A##ke0 ^= De0; \
814
+ Bmi0 = ROL32(A##ke0, 5); \
815
+ A##mi1 ^= Di1; \
816
+ Bmo0 = ROL32(A##mi1, 8); \
817
+ A##so0 ^= Do0; \
818
+ Bmu0 = ROL32(A##so0, 28); \
819
+ E##ma0 = Bma0 ^((~Bme0)& Bmi0 ); \
820
+ E##me0 = Bme0 ^((~Bmi0)& Bmo0 ); \
821
+ E##mi0 = Bmi0 ^((~Bmo0)& Bmu0 ); \
822
+ E##mo0 = Bmo0 ^((~Bmu0)& Bma0 ); \
823
+ E##mu0 = Bmu0 ^((~Bma0)& Bme0 ); \
824
+ \
825
+ A##bu0 ^= Du0; \
826
+ Bma1 = ROL32(A##bu0, 13); \
827
+ A##ga1 ^= Da1; \
828
+ Bme1 = ROL32(A##ga1, 18); \
829
+ A##ke1 ^= De1; \
830
+ Bmi1 = ROL32(A##ke1, 5); \
831
+ A##mi0 ^= Di0; \
832
+ Bmo1 = ROL32(A##mi0, 7); \
833
+ A##so1 ^= Do1; \
834
+ Bmu1 = ROL32(A##so1, 28); \
835
+ E##ma1 = Bma1 ^((~Bme1)& Bmi1 ); \
836
+ E##me1 = Bme1 ^((~Bmi1)& Bmo1 ); \
837
+ E##mi1 = Bmi1 ^((~Bmo1)& Bmu1 ); \
838
+ E##mo1 = Bmo1 ^((~Bmu1)& Bma1 ); \
839
+ E##mu1 = Bmu1 ^((~Bma1)& Bme1 ); \
840
+ \
841
+ A##bi0 ^= Di0; \
842
+ Bsa0 = ROL32(A##bi0, 31); \
843
+ A##go1 ^= Do1; \
844
+ Bse0 = ROL32(A##go1, 28); \
845
+ A##ku1 ^= Du1; \
846
+ Bsi0 = ROL32(A##ku1, 20); \
847
+ A##ma1 ^= Da1; \
848
+ Bso0 = ROL32(A##ma1, 21); \
849
+ A##se0 ^= De0; \
850
+ Bsu0 = ROL32(A##se0, 1); \
851
+ E##sa0 = Bsa0 ^((~Bse0)& Bsi0 ); \
852
+ E##se0 = Bse0 ^((~Bsi0)& Bso0 ); \
853
+ E##si0 = Bsi0 ^((~Bso0)& Bsu0 ); \
854
+ E##so0 = Bso0 ^((~Bsu0)& Bsa0 ); \
855
+ E##su0 = Bsu0 ^((~Bsa0)& Bse0 ); \
856
+ \
857
+ A##bi1 ^= Di1; \
858
+ Bsa1 = ROL32(A##bi1, 31); \
859
+ A##go0 ^= Do0; \
860
+ Bse1 = ROL32(A##go0, 27); \
861
+ A##ku0 ^= Du0; \
862
+ Bsi1 = ROL32(A##ku0, 19); \
863
+ A##ma0 ^= Da0; \
864
+ Bso1 = ROL32(A##ma0, 20); \
865
+ A##se1 ^= De1; \
866
+ Bsu1 = ROL32(A##se1, 1); \
867
+ E##sa1 = Bsa1 ^((~Bse1)& Bsi1 ); \
868
+ E##se1 = Bse1 ^((~Bsi1)& Bso1 ); \
869
+ E##si1 = Bsi1 ^((~Bso1)& Bsu1 ); \
870
+ E##so1 = Bso1 ^((~Bsu1)& Bsa1 ); \
871
+ E##su1 = Bsu1 ^((~Bsa1)& Bse1 ); \
872
+ \
873
+
874
+ #endif // UseBebigokimisa
875
+
876
+ const UINT32 KeccakF1600RoundConstants_int2_0[24] = {
877
+ 0x00000001UL,
878
+ 0x00000000UL,
879
+ 0x00000000UL,
880
+ 0x00000000UL,
881
+ 0x00000001UL,
882
+ 0x00000001UL,
883
+ 0x00000001UL,
884
+ 0x00000001UL,
885
+ 0x00000000UL,
886
+ 0x00000000UL,
887
+ 0x00000001UL,
888
+ 0x00000000UL,
889
+ 0x00000001UL,
890
+ 0x00000001UL,
891
+ 0x00000001UL,
892
+ 0x00000001UL,
893
+ 0x00000000UL,
894
+ 0x00000000UL,
895
+ 0x00000000UL,
896
+ 0x00000000UL,
897
+ 0x00000001UL,
898
+ 0x00000000UL,
899
+ 0x00000001UL,
900
+ 0x00000000UL };
901
+
902
+ const UINT32 KeccakF1600RoundConstants_int2_1[24] = {
903
+ 0x00000000UL,
904
+ 0x00000089UL,
905
+ 0x8000008bUL,
906
+ 0x80008080UL,
907
+ 0x0000008bUL,
908
+ 0x00008000UL,
909
+ 0x80008088UL,
910
+ 0x80000082UL,
911
+ 0x0000000bUL,
912
+ 0x0000000aUL,
913
+ 0x00008082UL,
914
+ 0x00008003UL,
915
+ 0x0000808bUL,
916
+ 0x8000000bUL,
917
+ 0x8000008aUL,
918
+ 0x80000081UL,
919
+ 0x80000081UL,
920
+ 0x80000008UL,
921
+ 0x00000083UL,
922
+ 0x80008003UL,
923
+ 0x80008088UL,
924
+ 0x80000088UL,
925
+ 0x00008000UL,
926
+ 0x80008082UL };
927
+
928
+ #define copyFromStateAndXor1024bits(X, state, input) \
929
+ X##ba0 = state[ 0]^input[ 0]; \
930
+ X##ba1 = state[ 1]^input[ 1]; \
931
+ X##be0 = state[ 2]^input[ 2]; \
932
+ X##be1 = state[ 3]^input[ 3]; \
933
+ X##bi0 = state[ 4]^input[ 4]; \
934
+ X##bi1 = state[ 5]^input[ 5]; \
935
+ X##bo0 = state[ 6]^input[ 6]; \
936
+ X##bo1 = state[ 7]^input[ 7]; \
937
+ X##bu0 = state[ 8]^input[ 8]; \
938
+ X##bu1 = state[ 9]^input[ 9]; \
939
+ X##ga0 = state[10]^input[10]; \
940
+ X##ga1 = state[11]^input[11]; \
941
+ X##ge0 = state[12]^input[12]; \
942
+ X##ge1 = state[13]^input[13]; \
943
+ X##gi0 = state[14]^input[14]; \
944
+ X##gi1 = state[15]^input[15]; \
945
+ X##go0 = state[16]^input[16]; \
946
+ X##go1 = state[17]^input[17]; \
947
+ X##gu0 = state[18]^input[18]; \
948
+ X##gu1 = state[19]^input[19]; \
949
+ X##ka0 = state[20]^input[20]; \
950
+ X##ka1 = state[21]^input[21]; \
951
+ X##ke0 = state[22]^input[22]; \
952
+ X##ke1 = state[23]^input[23]; \
953
+ X##ki0 = state[24]^input[24]; \
954
+ X##ki1 = state[25]^input[25]; \
955
+ X##ko0 = state[26]^input[26]; \
956
+ X##ko1 = state[27]^input[27]; \
957
+ X##ku0 = state[28]^input[28]; \
958
+ X##ku1 = state[29]^input[29]; \
959
+ X##ma0 = state[30]^input[30]; \
960
+ X##ma1 = state[31]^input[31]; \
961
+ X##me0 = state[32]; \
962
+ X##me1 = state[33]; \
963
+ X##mi0 = state[34]; \
964
+ X##mi1 = state[35]; \
965
+ X##mo0 = state[36]; \
966
+ X##mo1 = state[37]; \
967
+ X##mu0 = state[38]; \
968
+ X##mu1 = state[39]; \
969
+ X##sa0 = state[40]; \
970
+ X##sa1 = state[41]; \
971
+ X##se0 = state[42]; \
972
+ X##se1 = state[43]; \
973
+ X##si0 = state[44]; \
974
+ X##si1 = state[45]; \
975
+ X##so0 = state[46]; \
976
+ X##so1 = state[47]; \
977
+ X##su0 = state[48]; \
978
+ X##su1 = state[49]; \
979
+
980
+ #define copyFromStateAndXor1088bits(X, state, input) \
981
+ X##ba0 = state[ 0]^input[ 0]; \
982
+ X##ba1 = state[ 1]^input[ 1]; \
983
+ X##be0 = state[ 2]^input[ 2]; \
984
+ X##be1 = state[ 3]^input[ 3]; \
985
+ X##bi0 = state[ 4]^input[ 4]; \
986
+ X##bi1 = state[ 5]^input[ 5]; \
987
+ X##bo0 = state[ 6]^input[ 6]; \
988
+ X##bo1 = state[ 7]^input[ 7]; \
989
+ X##bu0 = state[ 8]^input[ 8]; \
990
+ X##bu1 = state[ 9]^input[ 9]; \
991
+ X##ga0 = state[10]^input[10]; \
992
+ X##ga1 = state[11]^input[11]; \
993
+ X##ge0 = state[12]^input[12]; \
994
+ X##ge1 = state[13]^input[13]; \
995
+ X##gi0 = state[14]^input[14]; \
996
+ X##gi1 = state[15]^input[15]; \
997
+ X##go0 = state[16]^input[16]; \
998
+ X##go1 = state[17]^input[17]; \
999
+ X##gu0 = state[18]^input[18]; \
1000
+ X##gu1 = state[19]^input[19]; \
1001
+ X##ka0 = state[20]^input[20]; \
1002
+ X##ka1 = state[21]^input[21]; \
1003
+ X##ke0 = state[22]^input[22]; \
1004
+ X##ke1 = state[23]^input[23]; \
1005
+ X##ki0 = state[24]^input[24]; \
1006
+ X##ki1 = state[25]^input[25]; \
1007
+ X##ko0 = state[26]^input[26]; \
1008
+ X##ko1 = state[27]^input[27]; \
1009
+ X##ku0 = state[28]^input[28]; \
1010
+ X##ku1 = state[29]^input[29]; \
1011
+ X##ma0 = state[30]^input[30]; \
1012
+ X##ma1 = state[31]^input[31]; \
1013
+ X##me0 = state[32]^input[32]; \
1014
+ X##me1 = state[33]^input[33]; \
1015
+ X##mi0 = state[34]; \
1016
+ X##mi1 = state[35]; \
1017
+ X##mo0 = state[36]; \
1018
+ X##mo1 = state[37]; \
1019
+ X##mu0 = state[38]; \
1020
+ X##mu1 = state[39]; \
1021
+ X##sa0 = state[40]; \
1022
+ X##sa1 = state[41]; \
1023
+ X##se0 = state[42]; \
1024
+ X##se1 = state[43]; \
1025
+ X##si0 = state[44]; \
1026
+ X##si1 = state[45]; \
1027
+ X##so0 = state[46]; \
1028
+ X##so1 = state[47]; \
1029
+ X##su0 = state[48]; \
1030
+ X##su1 = state[49]; \
1031
+
1032
+ #define copyFromState(X, state) \
1033
+ X##ba0 = state[ 0]; \
1034
+ X##ba1 = state[ 1]; \
1035
+ X##be0 = state[ 2]; \
1036
+ X##be1 = state[ 3]; \
1037
+ X##bi0 = state[ 4]; \
1038
+ X##bi1 = state[ 5]; \
1039
+ X##bo0 = state[ 6]; \
1040
+ X##bo1 = state[ 7]; \
1041
+ X##bu0 = state[ 8]; \
1042
+ X##bu1 = state[ 9]; \
1043
+ X##ga0 = state[10]; \
1044
+ X##ga1 = state[11]; \
1045
+ X##ge0 = state[12]; \
1046
+ X##ge1 = state[13]; \
1047
+ X##gi0 = state[14]; \
1048
+ X##gi1 = state[15]; \
1049
+ X##go0 = state[16]; \
1050
+ X##go1 = state[17]; \
1051
+ X##gu0 = state[18]; \
1052
+ X##gu1 = state[19]; \
1053
+ X##ka0 = state[20]; \
1054
+ X##ka1 = state[21]; \
1055
+ X##ke0 = state[22]; \
1056
+ X##ke1 = state[23]; \
1057
+ X##ki0 = state[24]; \
1058
+ X##ki1 = state[25]; \
1059
+ X##ko0 = state[26]; \
1060
+ X##ko1 = state[27]; \
1061
+ X##ku0 = state[28]; \
1062
+ X##ku1 = state[29]; \
1063
+ X##ma0 = state[30]; \
1064
+ X##ma1 = state[31]; \
1065
+ X##me0 = state[32]; \
1066
+ X##me1 = state[33]; \
1067
+ X##mi0 = state[34]; \
1068
+ X##mi1 = state[35]; \
1069
+ X##mo0 = state[36]; \
1070
+ X##mo1 = state[37]; \
1071
+ X##mu0 = state[38]; \
1072
+ X##mu1 = state[39]; \
1073
+ X##sa0 = state[40]; \
1074
+ X##sa1 = state[41]; \
1075
+ X##se0 = state[42]; \
1076
+ X##se1 = state[43]; \
1077
+ X##si0 = state[44]; \
1078
+ X##si1 = state[45]; \
1079
+ X##so0 = state[46]; \
1080
+ X##so1 = state[47]; \
1081
+ X##su0 = state[48]; \
1082
+ X##su1 = state[49]; \
1083
+
1084
+ #define copyToState(state, X) \
1085
+ state[ 0] = X##ba0; \
1086
+ state[ 1] = X##ba1; \
1087
+ state[ 2] = X##be0; \
1088
+ state[ 3] = X##be1; \
1089
+ state[ 4] = X##bi0; \
1090
+ state[ 5] = X##bi1; \
1091
+ state[ 6] = X##bo0; \
1092
+ state[ 7] = X##bo1; \
1093
+ state[ 8] = X##bu0; \
1094
+ state[ 9] = X##bu1; \
1095
+ state[10] = X##ga0; \
1096
+ state[11] = X##ga1; \
1097
+ state[12] = X##ge0; \
1098
+ state[13] = X##ge1; \
1099
+ state[14] = X##gi0; \
1100
+ state[15] = X##gi1; \
1101
+ state[16] = X##go0; \
1102
+ state[17] = X##go1; \
1103
+ state[18] = X##gu0; \
1104
+ state[19] = X##gu1; \
1105
+ state[20] = X##ka0; \
1106
+ state[21] = X##ka1; \
1107
+ state[22] = X##ke0; \
1108
+ state[23] = X##ke1; \
1109
+ state[24] = X##ki0; \
1110
+ state[25] = X##ki1; \
1111
+ state[26] = X##ko0; \
1112
+ state[27] = X##ko1; \
1113
+ state[28] = X##ku0; \
1114
+ state[29] = X##ku1; \
1115
+ state[30] = X##ma0; \
1116
+ state[31] = X##ma1; \
1117
+ state[32] = X##me0; \
1118
+ state[33] = X##me1; \
1119
+ state[34] = X##mi0; \
1120
+ state[35] = X##mi1; \
1121
+ state[36] = X##mo0; \
1122
+ state[37] = X##mo1; \
1123
+ state[38] = X##mu0; \
1124
+ state[39] = X##mu1; \
1125
+ state[40] = X##sa0; \
1126
+ state[41] = X##sa1; \
1127
+ state[42] = X##se0; \
1128
+ state[43] = X##se1; \
1129
+ state[44] = X##si0; \
1130
+ state[45] = X##si1; \
1131
+ state[46] = X##so0; \
1132
+ state[47] = X##so1; \
1133
+ state[48] = X##su0; \
1134
+ state[49] = X##su1; \
1135
+
1136
+ #define copyStateVariables(X, Y) \
1137
+ X##ba0 = Y##ba0; \
1138
+ X##ba1 = Y##ba1; \
1139
+ X##be0 = Y##be0; \
1140
+ X##be1 = Y##be1; \
1141
+ X##bi0 = Y##bi0; \
1142
+ X##bi1 = Y##bi1; \
1143
+ X##bo0 = Y##bo0; \
1144
+ X##bo1 = Y##bo1; \
1145
+ X##bu0 = Y##bu0; \
1146
+ X##bu1 = Y##bu1; \
1147
+ X##ga0 = Y##ga0; \
1148
+ X##ga1 = Y##ga1; \
1149
+ X##ge0 = Y##ge0; \
1150
+ X##ge1 = Y##ge1; \
1151
+ X##gi0 = Y##gi0; \
1152
+ X##gi1 = Y##gi1; \
1153
+ X##go0 = Y##go0; \
1154
+ X##go1 = Y##go1; \
1155
+ X##gu0 = Y##gu0; \
1156
+ X##gu1 = Y##gu1; \
1157
+ X##ka0 = Y##ka0; \
1158
+ X##ka1 = Y##ka1; \
1159
+ X##ke0 = Y##ke0; \
1160
+ X##ke1 = Y##ke1; \
1161
+ X##ki0 = Y##ki0; \
1162
+ X##ki1 = Y##ki1; \
1163
+ X##ko0 = Y##ko0; \
1164
+ X##ko1 = Y##ko1; \
1165
+ X##ku0 = Y##ku0; \
1166
+ X##ku1 = Y##ku1; \
1167
+ X##ma0 = Y##ma0; \
1168
+ X##ma1 = Y##ma1; \
1169
+ X##me0 = Y##me0; \
1170
+ X##me1 = Y##me1; \
1171
+ X##mi0 = Y##mi0; \
1172
+ X##mi1 = Y##mi1; \
1173
+ X##mo0 = Y##mo0; \
1174
+ X##mo1 = Y##mo1; \
1175
+ X##mu0 = Y##mu0; \
1176
+ X##mu1 = Y##mu1; \
1177
+ X##sa0 = Y##sa0; \
1178
+ X##sa1 = Y##sa1; \
1179
+ X##se0 = Y##se0; \
1180
+ X##se1 = Y##se1; \
1181
+ X##si0 = Y##si0; \
1182
+ X##si1 = Y##si1; \
1183
+ X##so0 = Y##so0; \
1184
+ X##so1 = Y##so1; \
1185
+ X##su0 = Y##su0; \
1186
+ X##su1 = Y##su1; \
1187
+