sha3 0.2.6 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of sha3 might be problematic. Click here for more details.

Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/.travis.yml +9 -1
  4. data/Gemfile.ci +4 -4
  5. data/LICENSE.txt +1 -1
  6. data/README.md +145 -0
  7. data/Rakefile +4 -5
  8. data/ext/sha3/KeccakF-1600-interface.h +28 -34
  9. data/ext/sha3/KeccakHash.c +80 -0
  10. data/ext/sha3/KeccakHash.h +110 -0
  11. data/ext/sha3/KeccakSponge.c +127 -201
  12. data/ext/sha3/KeccakSponge.h +74 -37
  13. data/ext/sha3/Optimized64/KeccakF-1600-64.macros +2199 -0
  14. data/ext/sha3/Optimized64/KeccakF-1600-opt64-settings.h +3 -0
  15. data/ext/sha3/Optimized64/KeccakF-1600-opt64.c +508 -0
  16. data/ext/sha3/{KeccakF-1600-unrolling.macros → Optimized64/KeccakF-1600-unrolling.macros} +16 -14
  17. data/ext/sha3/Optimized64/SnP-interface.h +47 -0
  18. data/ext/sha3/Reference/KeccakF-1600-reference.c +311 -0
  19. data/ext/sha3/Reference/KeccakF-reference.h +26 -0
  20. data/ext/sha3/Reference/SnP-FBWL-default.c +96 -0
  21. data/ext/sha3/Reference/SnP-FBWL-default.h +26 -0
  22. data/ext/sha3/Reference/SnP-interface.h +42 -0
  23. data/ext/sha3/{displayIntermediateValues.c → Reference/displayIntermediateValues.c} +52 -11
  24. data/ext/sha3/{displayIntermediateValues.h → Reference/displayIntermediateValues.h} +11 -6
  25. data/ext/sha3/SnP-Relaned.h +249 -0
  26. data/ext/sha3/brg_endian.h +0 -0
  27. data/ext/sha3/digest.c +67 -70
  28. data/ext/sha3/digest.h +2 -2
  29. data/ext/sha3/extconf.rb +7 -12
  30. data/ext/sha3/sha3.h +2 -2
  31. data/lib/sha3/doc.rb +26 -39
  32. data/lib/sha3/version.rb +2 -2
  33. data/sha3.gemspec +6 -6
  34. data/spec/generate_tests.rb +6 -41
  35. data/spec/sha3_core_spec.rb +111 -133
  36. data/spec/spec_helper.rb +2 -2
  37. data/tests.sh +9 -7
  38. metadata +33 -36
  39. data/README.rdoc +0 -131
  40. data/ext/sha3/KeccakF-1600-32-rvk.macros +0 -555
  41. data/ext/sha3/KeccakF-1600-32-s1.macros +0 -1187
  42. data/ext/sha3/KeccakF-1600-32-s2.macros +0 -1187
  43. data/ext/sha3/KeccakF-1600-32.macros +0 -26
  44. data/ext/sha3/KeccakF-1600-64.macros +0 -728
  45. data/ext/sha3/KeccakF-1600-int-set.h +0 -6
  46. data/ext/sha3/KeccakF-1600-opt.c +0 -504
  47. data/ext/sha3/KeccakF-1600-opt32-settings.h +0 -4
  48. data/ext/sha3/KeccakF-1600-opt32.c-arch +0 -524
  49. data/ext/sha3/KeccakF-1600-opt64-settings.h +0 -7
  50. data/ext/sha3/KeccakF-1600-opt64.c-arch +0 -504
  51. data/ext/sha3/KeccakF-1600-reference.c-arch +0 -300
  52. data/ext/sha3/KeccakF-1600-x86-64-gas.s +0 -766
  53. data/ext/sha3/KeccakF-1600-x86-64-shld-gas.s +0 -766
  54. data/ext/sha3/KeccakNISTInterface.c +0 -81
  55. data/ext/sha3/KeccakNISTInterface.h +0 -70
@@ -0,0 +1,2199 @@
1
+ /*
2
+ Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
3
+ Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
4
+ denoted as "the implementer".
5
+
6
+ For more information, feedback or questions, please refer to our websites:
7
+ http://keccak.noekeon.org/
8
+ http://keyak.noekeon.org/
9
+ http://ketje.noekeon.org/
10
+
11
+ To the extent possible under law, the implementer has waived all copyright
12
+ and related or neighboring rights to the source code in this file.
13
+ http://creativecommons.org/publicdomain/zero/1.0/
14
+ */
15
+
16
+ #define declareABCDE \
17
+ UINT64 Aba, Abe, Abi, Abo, Abu; \
18
+ UINT64 Aga, Age, Agi, Ago, Agu; \
19
+ UINT64 Aka, Ake, Aki, Ako, Aku; \
20
+ UINT64 Ama, Ame, Ami, Amo, Amu; \
21
+ UINT64 Asa, Ase, Asi, Aso, Asu; \
22
+ UINT64 Bba, Bbe, Bbi, Bbo, Bbu; \
23
+ UINT64 Bga, Bge, Bgi, Bgo, Bgu; \
24
+ UINT64 Bka, Bke, Bki, Bko, Bku; \
25
+ UINT64 Bma, Bme, Bmi, Bmo, Bmu; \
26
+ UINT64 Bsa, Bse, Bsi, Bso, Bsu; \
27
+ UINT64 Ca, Ce, Ci, Co, Cu; \
28
+ UINT64 Da, De, Di, Do, Du; \
29
+ UINT64 Eba, Ebe, Ebi, Ebo, Ebu; \
30
+ UINT64 Ega, Ege, Egi, Ego, Egu; \
31
+ UINT64 Eka, Eke, Eki, Eko, Eku; \
32
+ UINT64 Ema, Eme, Emi, Emo, Emu; \
33
+ UINT64 Esa, Ese, Esi, Eso, Esu; \
34
+
35
+ #define prepareTheta \
36
+ Ca = Aba^Aga^Aka^Ama^Asa; \
37
+ Ce = Abe^Age^Ake^Ame^Ase; \
38
+ Ci = Abi^Agi^Aki^Ami^Asi; \
39
+ Co = Abo^Ago^Ako^Amo^Aso; \
40
+ Cu = Abu^Agu^Aku^Amu^Asu; \
41
+
42
+ #ifdef UseBebigokimisa
43
+ // --- Code for round, with prepare-theta (lane complementing pattern 'bebigokimisa')
44
+ // --- 64-bit lanes mapped to 64-bit words
45
+ #define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
46
+ Da = Cu^ROL64(Ce, 1); \
47
+ De = Ca^ROL64(Ci, 1); \
48
+ Di = Ce^ROL64(Co, 1); \
49
+ Do = Ci^ROL64(Cu, 1); \
50
+ Du = Co^ROL64(Ca, 1); \
51
+ \
52
+ A##ba ^= Da; \
53
+ Bba = A##ba; \
54
+ A##ge ^= De; \
55
+ Bbe = ROL64(A##ge, 44); \
56
+ A##ki ^= Di; \
57
+ Bbi = ROL64(A##ki, 43); \
58
+ A##mo ^= Do; \
59
+ Bbo = ROL64(A##mo, 21); \
60
+ A##su ^= Du; \
61
+ Bbu = ROL64(A##su, 14); \
62
+ E##ba = Bba ^( Bbe | Bbi ); \
63
+ E##ba ^= KeccakF1600RoundConstants[i]; \
64
+ Ca = E##ba; \
65
+ E##be = Bbe ^((~Bbi)| Bbo ); \
66
+ Ce = E##be; \
67
+ E##bi = Bbi ^( Bbo & Bbu ); \
68
+ Ci = E##bi; \
69
+ E##bo = Bbo ^( Bbu | Bba ); \
70
+ Co = E##bo; \
71
+ E##bu = Bbu ^( Bba & Bbe ); \
72
+ Cu = E##bu; \
73
+ \
74
+ A##bo ^= Do; \
75
+ Bga = ROL64(A##bo, 28); \
76
+ A##gu ^= Du; \
77
+ Bge = ROL64(A##gu, 20); \
78
+ A##ka ^= Da; \
79
+ Bgi = ROL64(A##ka, 3); \
80
+ A##me ^= De; \
81
+ Bgo = ROL64(A##me, 45); \
82
+ A##si ^= Di; \
83
+ Bgu = ROL64(A##si, 61); \
84
+ E##ga = Bga ^( Bge | Bgi ); \
85
+ Ca ^= E##ga; \
86
+ E##ge = Bge ^( Bgi & Bgo ); \
87
+ Ce ^= E##ge; \
88
+ E##gi = Bgi ^( Bgo |(~Bgu)); \
89
+ Ci ^= E##gi; \
90
+ E##go = Bgo ^( Bgu | Bga ); \
91
+ Co ^= E##go; \
92
+ E##gu = Bgu ^( Bga & Bge ); \
93
+ Cu ^= E##gu; \
94
+ \
95
+ A##be ^= De; \
96
+ Bka = ROL64(A##be, 1); \
97
+ A##gi ^= Di; \
98
+ Bke = ROL64(A##gi, 6); \
99
+ A##ko ^= Do; \
100
+ Bki = ROL64(A##ko, 25); \
101
+ A##mu ^= Du; \
102
+ Bko = ROL64(A##mu, 8); \
103
+ A##sa ^= Da; \
104
+ Bku = ROL64(A##sa, 18); \
105
+ E##ka = Bka ^( Bke | Bki ); \
106
+ Ca ^= E##ka; \
107
+ E##ke = Bke ^( Bki & Bko ); \
108
+ Ce ^= E##ke; \
109
+ E##ki = Bki ^((~Bko)& Bku ); \
110
+ Ci ^= E##ki; \
111
+ E##ko = (~Bko)^( Bku | Bka ); \
112
+ Co ^= E##ko; \
113
+ E##ku = Bku ^( Bka & Bke ); \
114
+ Cu ^= E##ku; \
115
+ \
116
+ A##bu ^= Du; \
117
+ Bma = ROL64(A##bu, 27); \
118
+ A##ga ^= Da; \
119
+ Bme = ROL64(A##ga, 36); \
120
+ A##ke ^= De; \
121
+ Bmi = ROL64(A##ke, 10); \
122
+ A##mi ^= Di; \
123
+ Bmo = ROL64(A##mi, 15); \
124
+ A##so ^= Do; \
125
+ Bmu = ROL64(A##so, 56); \
126
+ E##ma = Bma ^( Bme & Bmi ); \
127
+ Ca ^= E##ma; \
128
+ E##me = Bme ^( Bmi | Bmo ); \
129
+ Ce ^= E##me; \
130
+ E##mi = Bmi ^((~Bmo)| Bmu ); \
131
+ Ci ^= E##mi; \
132
+ E##mo = (~Bmo)^( Bmu & Bma ); \
133
+ Co ^= E##mo; \
134
+ E##mu = Bmu ^( Bma | Bme ); \
135
+ Cu ^= E##mu; \
136
+ \
137
+ A##bi ^= Di; \
138
+ Bsa = ROL64(A##bi, 62); \
139
+ A##go ^= Do; \
140
+ Bse = ROL64(A##go, 55); \
141
+ A##ku ^= Du; \
142
+ Bsi = ROL64(A##ku, 39); \
143
+ A##ma ^= Da; \
144
+ Bso = ROL64(A##ma, 41); \
145
+ A##se ^= De; \
146
+ Bsu = ROL64(A##se, 2); \
147
+ E##sa = Bsa ^((~Bse)& Bsi ); \
148
+ Ca ^= E##sa; \
149
+ E##se = (~Bse)^( Bsi | Bso ); \
150
+ Ce ^= E##se; \
151
+ E##si = Bsi ^( Bso & Bsu ); \
152
+ Ci ^= E##si; \
153
+ E##so = Bso ^( Bsu | Bsa ); \
154
+ Co ^= E##so; \
155
+ E##su = Bsu ^( Bsa & Bse ); \
156
+ Cu ^= E##su; \
157
+ \
158
+
159
+ // --- Code for round (lane complementing pattern 'bebigokimisa')
160
+ // --- 64-bit lanes mapped to 64-bit words
161
+ #define thetaRhoPiChiIota(i, A, E) \
162
+ Da = Cu^ROL64(Ce, 1); \
163
+ De = Ca^ROL64(Ci, 1); \
164
+ Di = Ce^ROL64(Co, 1); \
165
+ Do = Ci^ROL64(Cu, 1); \
166
+ Du = Co^ROL64(Ca, 1); \
167
+ \
168
+ A##ba ^= Da; \
169
+ Bba = A##ba; \
170
+ A##ge ^= De; \
171
+ Bbe = ROL64(A##ge, 44); \
172
+ A##ki ^= Di; \
173
+ Bbi = ROL64(A##ki, 43); \
174
+ A##mo ^= Do; \
175
+ Bbo = ROL64(A##mo, 21); \
176
+ A##su ^= Du; \
177
+ Bbu = ROL64(A##su, 14); \
178
+ E##ba = Bba ^( Bbe | Bbi ); \
179
+ E##ba ^= KeccakF1600RoundConstants[i]; \
180
+ E##be = Bbe ^((~Bbi)| Bbo ); \
181
+ E##bi = Bbi ^( Bbo & Bbu ); \
182
+ E##bo = Bbo ^( Bbu | Bba ); \
183
+ E##bu = Bbu ^( Bba & Bbe ); \
184
+ \
185
+ A##bo ^= Do; \
186
+ Bga = ROL64(A##bo, 28); \
187
+ A##gu ^= Du; \
188
+ Bge = ROL64(A##gu, 20); \
189
+ A##ka ^= Da; \
190
+ Bgi = ROL64(A##ka, 3); \
191
+ A##me ^= De; \
192
+ Bgo = ROL64(A##me, 45); \
193
+ A##si ^= Di; \
194
+ Bgu = ROL64(A##si, 61); \
195
+ E##ga = Bga ^( Bge | Bgi ); \
196
+ E##ge = Bge ^( Bgi & Bgo ); \
197
+ E##gi = Bgi ^( Bgo |(~Bgu)); \
198
+ E##go = Bgo ^( Bgu | Bga ); \
199
+ E##gu = Bgu ^( Bga & Bge ); \
200
+ \
201
+ A##be ^= De; \
202
+ Bka = ROL64(A##be, 1); \
203
+ A##gi ^= Di; \
204
+ Bke = ROL64(A##gi, 6); \
205
+ A##ko ^= Do; \
206
+ Bki = ROL64(A##ko, 25); \
207
+ A##mu ^= Du; \
208
+ Bko = ROL64(A##mu, 8); \
209
+ A##sa ^= Da; \
210
+ Bku = ROL64(A##sa, 18); \
211
+ E##ka = Bka ^( Bke | Bki ); \
212
+ E##ke = Bke ^( Bki & Bko ); \
213
+ E##ki = Bki ^((~Bko)& Bku ); \
214
+ E##ko = (~Bko)^( Bku | Bka ); \
215
+ E##ku = Bku ^( Bka & Bke ); \
216
+ \
217
+ A##bu ^= Du; \
218
+ Bma = ROL64(A##bu, 27); \
219
+ A##ga ^= Da; \
220
+ Bme = ROL64(A##ga, 36); \
221
+ A##ke ^= De; \
222
+ Bmi = ROL64(A##ke, 10); \
223
+ A##mi ^= Di; \
224
+ Bmo = ROL64(A##mi, 15); \
225
+ A##so ^= Do; \
226
+ Bmu = ROL64(A##so, 56); \
227
+ E##ma = Bma ^( Bme & Bmi ); \
228
+ E##me = Bme ^( Bmi | Bmo ); \
229
+ E##mi = Bmi ^((~Bmo)| Bmu ); \
230
+ E##mo = (~Bmo)^( Bmu & Bma ); \
231
+ E##mu = Bmu ^( Bma | Bme ); \
232
+ \
233
+ A##bi ^= Di; \
234
+ Bsa = ROL64(A##bi, 62); \
235
+ A##go ^= Do; \
236
+ Bse = ROL64(A##go, 55); \
237
+ A##ku ^= Du; \
238
+ Bsi = ROL64(A##ku, 39); \
239
+ A##ma ^= Da; \
240
+ Bso = ROL64(A##ma, 41); \
241
+ A##se ^= De; \
242
+ Bsu = ROL64(A##se, 2); \
243
+ E##sa = Bsa ^((~Bse)& Bsi ); \
244
+ E##se = (~Bse)^( Bsi | Bso ); \
245
+ E##si = Bsi ^( Bso & Bsu ); \
246
+ E##so = Bso ^( Bsu | Bsa ); \
247
+ E##su = Bsu ^( Bsa & Bse ); \
248
+ \
249
+
250
+ #else // UseBebigokimisa
251
+ // --- Code for round, with prepare-theta
252
+ // --- 64-bit lanes mapped to 64-bit words
253
+ #define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
254
+ Da = Cu^ROL64(Ce, 1); \
255
+ De = Ca^ROL64(Ci, 1); \
256
+ Di = Ce^ROL64(Co, 1); \
257
+ Do = Ci^ROL64(Cu, 1); \
258
+ Du = Co^ROL64(Ca, 1); \
259
+ \
260
+ A##ba ^= Da; \
261
+ Bba = A##ba; \
262
+ A##ge ^= De; \
263
+ Bbe = ROL64(A##ge, 44); \
264
+ A##ki ^= Di; \
265
+ Bbi = ROL64(A##ki, 43); \
266
+ A##mo ^= Do; \
267
+ Bbo = ROL64(A##mo, 21); \
268
+ A##su ^= Du; \
269
+ Bbu = ROL64(A##su, 14); \
270
+ E##ba = Bba ^((~Bbe)& Bbi ); \
271
+ E##ba ^= KeccakF1600RoundConstants[i]; \
272
+ Ca = E##ba; \
273
+ E##be = Bbe ^((~Bbi)& Bbo ); \
274
+ Ce = E##be; \
275
+ E##bi = Bbi ^((~Bbo)& Bbu ); \
276
+ Ci = E##bi; \
277
+ E##bo = Bbo ^((~Bbu)& Bba ); \
278
+ Co = E##bo; \
279
+ E##bu = Bbu ^((~Bba)& Bbe ); \
280
+ Cu = E##bu; \
281
+ \
282
+ A##bo ^= Do; \
283
+ Bga = ROL64(A##bo, 28); \
284
+ A##gu ^= Du; \
285
+ Bge = ROL64(A##gu, 20); \
286
+ A##ka ^= Da; \
287
+ Bgi = ROL64(A##ka, 3); \
288
+ A##me ^= De; \
289
+ Bgo = ROL64(A##me, 45); \
290
+ A##si ^= Di; \
291
+ Bgu = ROL64(A##si, 61); \
292
+ E##ga = Bga ^((~Bge)& Bgi ); \
293
+ Ca ^= E##ga; \
294
+ E##ge = Bge ^((~Bgi)& Bgo ); \
295
+ Ce ^= E##ge; \
296
+ E##gi = Bgi ^((~Bgo)& Bgu ); \
297
+ Ci ^= E##gi; \
298
+ E##go = Bgo ^((~Bgu)& Bga ); \
299
+ Co ^= E##go; \
300
+ E##gu = Bgu ^((~Bga)& Bge ); \
301
+ Cu ^= E##gu; \
302
+ \
303
+ A##be ^= De; \
304
+ Bka = ROL64(A##be, 1); \
305
+ A##gi ^= Di; \
306
+ Bke = ROL64(A##gi, 6); \
307
+ A##ko ^= Do; \
308
+ Bki = ROL64(A##ko, 25); \
309
+ A##mu ^= Du; \
310
+ Bko = ROL64(A##mu, 8); \
311
+ A##sa ^= Da; \
312
+ Bku = ROL64(A##sa, 18); \
313
+ E##ka = Bka ^((~Bke)& Bki ); \
314
+ Ca ^= E##ka; \
315
+ E##ke = Bke ^((~Bki)& Bko ); \
316
+ Ce ^= E##ke; \
317
+ E##ki = Bki ^((~Bko)& Bku ); \
318
+ Ci ^= E##ki; \
319
+ E##ko = Bko ^((~Bku)& Bka ); \
320
+ Co ^= E##ko; \
321
+ E##ku = Bku ^((~Bka)& Bke ); \
322
+ Cu ^= E##ku; \
323
+ \
324
+ A##bu ^= Du; \
325
+ Bma = ROL64(A##bu, 27); \
326
+ A##ga ^= Da; \
327
+ Bme = ROL64(A##ga, 36); \
328
+ A##ke ^= De; \
329
+ Bmi = ROL64(A##ke, 10); \
330
+ A##mi ^= Di; \
331
+ Bmo = ROL64(A##mi, 15); \
332
+ A##so ^= Do; \
333
+ Bmu = ROL64(A##so, 56); \
334
+ E##ma = Bma ^((~Bme)& Bmi ); \
335
+ Ca ^= E##ma; \
336
+ E##me = Bme ^((~Bmi)& Bmo ); \
337
+ Ce ^= E##me; \
338
+ E##mi = Bmi ^((~Bmo)& Bmu ); \
339
+ Ci ^= E##mi; \
340
+ E##mo = Bmo ^((~Bmu)& Bma ); \
341
+ Co ^= E##mo; \
342
+ E##mu = Bmu ^((~Bma)& Bme ); \
343
+ Cu ^= E##mu; \
344
+ \
345
+ A##bi ^= Di; \
346
+ Bsa = ROL64(A##bi, 62); \
347
+ A##go ^= Do; \
348
+ Bse = ROL64(A##go, 55); \
349
+ A##ku ^= Du; \
350
+ Bsi = ROL64(A##ku, 39); \
351
+ A##ma ^= Da; \
352
+ Bso = ROL64(A##ma, 41); \
353
+ A##se ^= De; \
354
+ Bsu = ROL64(A##se, 2); \
355
+ E##sa = Bsa ^((~Bse)& Bsi ); \
356
+ Ca ^= E##sa; \
357
+ E##se = Bse ^((~Bsi)& Bso ); \
358
+ Ce ^= E##se; \
359
+ E##si = Bsi ^((~Bso)& Bsu ); \
360
+ Ci ^= E##si; \
361
+ E##so = Bso ^((~Bsu)& Bsa ); \
362
+ Co ^= E##so; \
363
+ E##su = Bsu ^((~Bsa)& Bse ); \
364
+ Cu ^= E##su; \
365
+ \
366
+
367
+ // --- Code for round
368
+ // --- 64-bit lanes mapped to 64-bit words
369
+ #define thetaRhoPiChiIota(i, A, E) \
370
+ Da = Cu^ROL64(Ce, 1); \
371
+ De = Ca^ROL64(Ci, 1); \
372
+ Di = Ce^ROL64(Co, 1); \
373
+ Do = Ci^ROL64(Cu, 1); \
374
+ Du = Co^ROL64(Ca, 1); \
375
+ \
376
+ A##ba ^= Da; \
377
+ Bba = A##ba; \
378
+ A##ge ^= De; \
379
+ Bbe = ROL64(A##ge, 44); \
380
+ A##ki ^= Di; \
381
+ Bbi = ROL64(A##ki, 43); \
382
+ A##mo ^= Do; \
383
+ Bbo = ROL64(A##mo, 21); \
384
+ A##su ^= Du; \
385
+ Bbu = ROL64(A##su, 14); \
386
+ E##ba = Bba ^((~Bbe)& Bbi ); \
387
+ E##ba ^= KeccakF1600RoundConstants[i]; \
388
+ E##be = Bbe ^((~Bbi)& Bbo ); \
389
+ E##bi = Bbi ^((~Bbo)& Bbu ); \
390
+ E##bo = Bbo ^((~Bbu)& Bba ); \
391
+ E##bu = Bbu ^((~Bba)& Bbe ); \
392
+ \
393
+ A##bo ^= Do; \
394
+ Bga = ROL64(A##bo, 28); \
395
+ A##gu ^= Du; \
396
+ Bge = ROL64(A##gu, 20); \
397
+ A##ka ^= Da; \
398
+ Bgi = ROL64(A##ka, 3); \
399
+ A##me ^= De; \
400
+ Bgo = ROL64(A##me, 45); \
401
+ A##si ^= Di; \
402
+ Bgu = ROL64(A##si, 61); \
403
+ E##ga = Bga ^((~Bge)& Bgi ); \
404
+ E##ge = Bge ^((~Bgi)& Bgo ); \
405
+ E##gi = Bgi ^((~Bgo)& Bgu ); \
406
+ E##go = Bgo ^((~Bgu)& Bga ); \
407
+ E##gu = Bgu ^((~Bga)& Bge ); \
408
+ \
409
+ A##be ^= De; \
410
+ Bka = ROL64(A##be, 1); \
411
+ A##gi ^= Di; \
412
+ Bke = ROL64(A##gi, 6); \
413
+ A##ko ^= Do; \
414
+ Bki = ROL64(A##ko, 25); \
415
+ A##mu ^= Du; \
416
+ Bko = ROL64(A##mu, 8); \
417
+ A##sa ^= Da; \
418
+ Bku = ROL64(A##sa, 18); \
419
+ E##ka = Bka ^((~Bke)& Bki ); \
420
+ E##ke = Bke ^((~Bki)& Bko ); \
421
+ E##ki = Bki ^((~Bko)& Bku ); \
422
+ E##ko = Bko ^((~Bku)& Bka ); \
423
+ E##ku = Bku ^((~Bka)& Bke ); \
424
+ \
425
+ A##bu ^= Du; \
426
+ Bma = ROL64(A##bu, 27); \
427
+ A##ga ^= Da; \
428
+ Bme = ROL64(A##ga, 36); \
429
+ A##ke ^= De; \
430
+ Bmi = ROL64(A##ke, 10); \
431
+ A##mi ^= Di; \
432
+ Bmo = ROL64(A##mi, 15); \
433
+ A##so ^= Do; \
434
+ Bmu = ROL64(A##so, 56); \
435
+ E##ma = Bma ^((~Bme)& Bmi ); \
436
+ E##me = Bme ^((~Bmi)& Bmo ); \
437
+ E##mi = Bmi ^((~Bmo)& Bmu ); \
438
+ E##mo = Bmo ^((~Bmu)& Bma ); \
439
+ E##mu = Bmu ^((~Bma)& Bme ); \
440
+ \
441
+ A##bi ^= Di; \
442
+ Bsa = ROL64(A##bi, 62); \
443
+ A##go ^= Do; \
444
+ Bse = ROL64(A##go, 55); \
445
+ A##ku ^= Du; \
446
+ Bsi = ROL64(A##ku, 39); \
447
+ A##ma ^= Da; \
448
+ Bso = ROL64(A##ma, 41); \
449
+ A##se ^= De; \
450
+ Bsu = ROL64(A##se, 2); \
451
+ E##sa = Bsa ^((~Bse)& Bsi ); \
452
+ E##se = Bse ^((~Bsi)& Bso ); \
453
+ E##si = Bsi ^((~Bso)& Bsu ); \
454
+ E##so = Bso ^((~Bsu)& Bsa ); \
455
+ E##su = Bsu ^((~Bsa)& Bse ); \
456
+ \
457
+
458
+ #endif // UseBebigokimisa
459
+
460
+ #define copyFromState(X, state) \
461
+ X##ba = state[ 0]; \
462
+ X##be = state[ 1]; \
463
+ X##bi = state[ 2]; \
464
+ X##bo = state[ 3]; \
465
+ X##bu = state[ 4]; \
466
+ X##ga = state[ 5]; \
467
+ X##ge = state[ 6]; \
468
+ X##gi = state[ 7]; \
469
+ X##go = state[ 8]; \
470
+ X##gu = state[ 9]; \
471
+ X##ka = state[10]; \
472
+ X##ke = state[11]; \
473
+ X##ki = state[12]; \
474
+ X##ko = state[13]; \
475
+ X##ku = state[14]; \
476
+ X##ma = state[15]; \
477
+ X##me = state[16]; \
478
+ X##mi = state[17]; \
479
+ X##mo = state[18]; \
480
+ X##mu = state[19]; \
481
+ X##sa = state[20]; \
482
+ X##se = state[21]; \
483
+ X##si = state[22]; \
484
+ X##so = state[23]; \
485
+ X##su = state[24]; \
486
+
487
+ #define copyToState(state, X) \
488
+ state[ 0] = X##ba; \
489
+ state[ 1] = X##be; \
490
+ state[ 2] = X##bi; \
491
+ state[ 3] = X##bo; \
492
+ state[ 4] = X##bu; \
493
+ state[ 5] = X##ga; \
494
+ state[ 6] = X##ge; \
495
+ state[ 7] = X##gi; \
496
+ state[ 8] = X##go; \
497
+ state[ 9] = X##gu; \
498
+ state[10] = X##ka; \
499
+ state[11] = X##ke; \
500
+ state[12] = X##ki; \
501
+ state[13] = X##ko; \
502
+ state[14] = X##ku; \
503
+ state[15] = X##ma; \
504
+ state[16] = X##me; \
505
+ state[17] = X##mi; \
506
+ state[18] = X##mo; \
507
+ state[19] = X##mu; \
508
+ state[20] = X##sa; \
509
+ state[21] = X##se; \
510
+ state[22] = X##si; \
511
+ state[23] = X##so; \
512
+ state[24] = X##su; \
513
+
514
+ #define copyStateVariables(X, Y) \
515
+ X##ba = Y##ba; \
516
+ X##be = Y##be; \
517
+ X##bi = Y##bi; \
518
+ X##bo = Y##bo; \
519
+ X##bu = Y##bu; \
520
+ X##ga = Y##ga; \
521
+ X##ge = Y##ge; \
522
+ X##gi = Y##gi; \
523
+ X##go = Y##go; \
524
+ X##gu = Y##gu; \
525
+ X##ka = Y##ka; \
526
+ X##ke = Y##ke; \
527
+ X##ki = Y##ki; \
528
+ X##ko = Y##ko; \
529
+ X##ku = Y##ku; \
530
+ X##ma = Y##ma; \
531
+ X##me = Y##me; \
532
+ X##mi = Y##mi; \
533
+ X##mo = Y##mo; \
534
+ X##mu = Y##mu; \
535
+ X##sa = Y##sa; \
536
+ X##se = Y##se; \
537
+ X##si = Y##si; \
538
+ X##so = Y##so; \
539
+ X##su = Y##su; \
540
+
541
+ #define copyFromStateAndXOR(X, state, input, laneCount) \
542
+ if (laneCount < 16) { \
543
+ if (laneCount < 8) { \
544
+ if (laneCount < 4) { \
545
+ if (laneCount < 2) { \
546
+ if (laneCount < 1) { \
547
+ X##ba = state[ 0]; \
548
+ } \
549
+ else { \
550
+ X##ba = state[ 0]^input[ 0]; \
551
+ } \
552
+ X##be = state[ 1]; \
553
+ X##bi = state[ 2]; \
554
+ } \
555
+ else { \
556
+ X##ba = state[ 0]^input[ 0]; \
557
+ X##be = state[ 1]^input[ 1]; \
558
+ if (laneCount < 3) { \
559
+ X##bi = state[ 2]; \
560
+ } \
561
+ else { \
562
+ X##bi = state[ 2]^input[ 2]; \
563
+ } \
564
+ } \
565
+ X##bo = state[ 3]; \
566
+ X##bu = state[ 4]; \
567
+ X##ga = state[ 5]; \
568
+ X##ge = state[ 6]; \
569
+ } \
570
+ else { \
571
+ X##ba = state[ 0]^input[ 0]; \
572
+ X##be = state[ 1]^input[ 1]; \
573
+ X##bi = state[ 2]^input[ 2]; \
574
+ X##bo = state[ 3]^input[ 3]; \
575
+ if (laneCount < 6) { \
576
+ if (laneCount < 5) { \
577
+ X##bu = state[ 4]; \
578
+ } \
579
+ else { \
580
+ X##bu = state[ 4]^input[ 4]; \
581
+ } \
582
+ X##ga = state[ 5]; \
583
+ X##ge = state[ 6]; \
584
+ } \
585
+ else { \
586
+ X##bu = state[ 4]^input[ 4]; \
587
+ X##ga = state[ 5]^input[ 5]; \
588
+ if (laneCount < 7) { \
589
+ X##ge = state[ 6]; \
590
+ } \
591
+ else { \
592
+ X##ge = state[ 6]^input[ 6]; \
593
+ } \
594
+ } \
595
+ } \
596
+ X##gi = state[ 7]; \
597
+ X##go = state[ 8]; \
598
+ X##gu = state[ 9]; \
599
+ X##ka = state[10]; \
600
+ X##ke = state[11]; \
601
+ X##ki = state[12]; \
602
+ X##ko = state[13]; \
603
+ X##ku = state[14]; \
604
+ } \
605
+ else { \
606
+ X##ba = state[ 0]^input[ 0]; \
607
+ X##be = state[ 1]^input[ 1]; \
608
+ X##bi = state[ 2]^input[ 2]; \
609
+ X##bo = state[ 3]^input[ 3]; \
610
+ X##bu = state[ 4]^input[ 4]; \
611
+ X##ga = state[ 5]^input[ 5]; \
612
+ X##ge = state[ 6]^input[ 6]; \
613
+ X##gi = state[ 7]^input[ 7]; \
614
+ if (laneCount < 12) { \
615
+ if (laneCount < 10) { \
616
+ if (laneCount < 9) { \
617
+ X##go = state[ 8]; \
618
+ } \
619
+ else { \
620
+ X##go = state[ 8]^input[ 8]; \
621
+ } \
622
+ X##gu = state[ 9]; \
623
+ X##ka = state[10]; \
624
+ } \
625
+ else { \
626
+ X##go = state[ 8]^input[ 8]; \
627
+ X##gu = state[ 9]^input[ 9]; \
628
+ if (laneCount < 11) { \
629
+ X##ka = state[10]; \
630
+ } \
631
+ else { \
632
+ X##ka = state[10]^input[10]; \
633
+ } \
634
+ } \
635
+ X##ke = state[11]; \
636
+ X##ki = state[12]; \
637
+ X##ko = state[13]; \
638
+ X##ku = state[14]; \
639
+ } \
640
+ else { \
641
+ X##go = state[ 8]^input[ 8]; \
642
+ X##gu = state[ 9]^input[ 9]; \
643
+ X##ka = state[10]^input[10]; \
644
+ X##ke = state[11]^input[11]; \
645
+ if (laneCount < 14) { \
646
+ if (laneCount < 13) { \
647
+ X##ki = state[12]; \
648
+ } \
649
+ else { \
650
+ X##ki = state[12]^input[12]; \
651
+ } \
652
+ X##ko = state[13]; \
653
+ X##ku = state[14]; \
654
+ } \
655
+ else { \
656
+ X##ki = state[12]^input[12]; \
657
+ X##ko = state[13]^input[13]; \
658
+ if (laneCount < 15) { \
659
+ X##ku = state[14]; \
660
+ } \
661
+ else { \
662
+ X##ku = state[14]^input[14]; \
663
+ } \
664
+ } \
665
+ } \
666
+ } \
667
+ X##ma = state[15]; \
668
+ X##me = state[16]; \
669
+ X##mi = state[17]; \
670
+ X##mo = state[18]; \
671
+ X##mu = state[19]; \
672
+ X##sa = state[20]; \
673
+ X##se = state[21]; \
674
+ X##si = state[22]; \
675
+ X##so = state[23]; \
676
+ X##su = state[24]; \
677
+ } \
678
+ else { \
679
+ X##ba = state[ 0]^input[ 0]; \
680
+ X##be = state[ 1]^input[ 1]; \
681
+ X##bi = state[ 2]^input[ 2]; \
682
+ X##bo = state[ 3]^input[ 3]; \
683
+ X##bu = state[ 4]^input[ 4]; \
684
+ X##ga = state[ 5]^input[ 5]; \
685
+ X##ge = state[ 6]^input[ 6]; \
686
+ X##gi = state[ 7]^input[ 7]; \
687
+ X##go = state[ 8]^input[ 8]; \
688
+ X##gu = state[ 9]^input[ 9]; \
689
+ X##ka = state[10]^input[10]; \
690
+ X##ke = state[11]^input[11]; \
691
+ X##ki = state[12]^input[12]; \
692
+ X##ko = state[13]^input[13]; \
693
+ X##ku = state[14]^input[14]; \
694
+ X##ma = state[15]^input[15]; \
695
+ if (laneCount < 24) { \
696
+ if (laneCount < 20) { \
697
+ if (laneCount < 18) { \
698
+ if (laneCount < 17) { \
699
+ X##me = state[16]; \
700
+ } \
701
+ else { \
702
+ X##me = state[16]^input[16]; \
703
+ } \
704
+ X##mi = state[17]; \
705
+ X##mo = state[18]; \
706
+ } \
707
+ else { \
708
+ X##me = state[16]^input[16]; \
709
+ X##mi = state[17]^input[17]; \
710
+ if (laneCount < 19) { \
711
+ X##mo = state[18]; \
712
+ } \
713
+ else { \
714
+ X##mo = state[18]^input[18]; \
715
+ } \
716
+ } \
717
+ X##mu = state[19]; \
718
+ X##sa = state[20]; \
719
+ X##se = state[21]; \
720
+ X##si = state[22]; \
721
+ } \
722
+ else { \
723
+ X##me = state[16]^input[16]; \
724
+ X##mi = state[17]^input[17]; \
725
+ X##mo = state[18]^input[18]; \
726
+ X##mu = state[19]^input[19]; \
727
+ if (laneCount < 22) { \
728
+ if (laneCount < 21) { \
729
+ X##sa = state[20]; \
730
+ } \
731
+ else { \
732
+ X##sa = state[20]^input[20]; \
733
+ } \
734
+ X##se = state[21]; \
735
+ X##si = state[22]; \
736
+ } \
737
+ else { \
738
+ X##sa = state[20]^input[20]; \
739
+ X##se = state[21]^input[21]; \
740
+ if (laneCount < 23) { \
741
+ X##si = state[22]; \
742
+ } \
743
+ else { \
744
+ X##si = state[22]^input[22]; \
745
+ } \
746
+ } \
747
+ } \
748
+ X##so = state[23]; \
749
+ X##su = state[24]; \
750
+ } \
751
+ else { \
752
+ X##me = state[16]^input[16]; \
753
+ X##mi = state[17]^input[17]; \
754
+ X##mo = state[18]^input[18]; \
755
+ X##mu = state[19]^input[19]; \
756
+ X##sa = state[20]^input[20]; \
757
+ X##se = state[21]^input[21]; \
758
+ X##si = state[22]^input[22]; \
759
+ X##so = state[23]^input[23]; \
760
+ if (laneCount < 25) { \
761
+ X##su = state[24]; \
762
+ } \
763
+ else { \
764
+ X##su = state[24]^input[24]; \
765
+ } \
766
+ } \
767
+ }
768
+
769
+ #define XORinputAndTrailingBits(X, input, laneCount, trailingBits) \
770
+ if (laneCount < 16) { \
771
+ if (laneCount < 8) { \
772
+ if (laneCount < 4) { \
773
+ if (laneCount < 2) { \
774
+ if (laneCount < 1) { \
775
+ X##ba ^= trailingBits; \
776
+ } \
777
+ else { \
778
+ X##ba ^= input[ 0]; \
779
+ X##be ^= trailingBits; \
780
+ } \
781
+ } \
782
+ else { \
783
+ X##ba ^= input[ 0]; \
784
+ X##be ^= input[ 1]; \
785
+ if (laneCount < 3) { \
786
+ X##bi ^= trailingBits; \
787
+ } \
788
+ else { \
789
+ X##bi ^= input[ 2]; \
790
+ X##bo ^= trailingBits; \
791
+ } \
792
+ } \
793
+ } \
794
+ else { \
795
+ X##ba ^= input[ 0]; \
796
+ X##be ^= input[ 1]; \
797
+ X##bi ^= input[ 2]; \
798
+ X##bo ^= input[ 3]; \
799
+ if (laneCount < 6) { \
800
+ if (laneCount < 5) { \
801
+ X##bu ^= trailingBits; \
802
+ } \
803
+ else { \
804
+ X##bu ^= input[ 4]; \
805
+ X##ga ^= trailingBits; \
806
+ } \
807
+ } \
808
+ else { \
809
+ X##bu ^= input[ 4]; \
810
+ X##ga ^= input[ 5]; \
811
+ if (laneCount < 7) { \
812
+ X##ge ^= trailingBits; \
813
+ } \
814
+ else { \
815
+ X##ge ^= input[ 6]; \
816
+ X##gi ^= trailingBits; \
817
+ } \
818
+ } \
819
+ } \
820
+ } \
821
+ else { \
822
+ X##ba ^= input[ 0]; \
823
+ X##be ^= input[ 1]; \
824
+ X##bi ^= input[ 2]; \
825
+ X##bo ^= input[ 3]; \
826
+ X##bu ^= input[ 4]; \
827
+ X##ga ^= input[ 5]; \
828
+ X##ge ^= input[ 6]; \
829
+ X##gi ^= input[ 7]; \
830
+ if (laneCount < 12) { \
831
+ if (laneCount < 10) { \
832
+ if (laneCount < 9) { \
833
+ X##go ^= trailingBits; \
834
+ } \
835
+ else { \
836
+ X##go ^= input[ 8]; \
837
+ X##gu ^= trailingBits ; \
838
+ } \
839
+ } \
840
+ else { \
841
+ X##go ^= input[ 8]; \
842
+ X##gu ^= input[ 9]; \
843
+ if (laneCount < 11) { \
844
+ X##ka ^= trailingBits; \
845
+ } \
846
+ else { \
847
+ X##ka ^= input[10]; \
848
+ X##ke ^= trailingBits; \
849
+ } \
850
+ } \
851
+ } \
852
+ else { \
853
+ X##go ^= input[ 8]; \
854
+ X##gu ^= input[ 9]; \
855
+ X##ka ^= input[10]; \
856
+ X##ke ^= input[11]; \
857
+ if (laneCount < 14) { \
858
+ if (laneCount < 13) { \
859
+ X##ki ^= trailingBits; \
860
+ } \
861
+ else { \
862
+ X##ki ^= input[12]; \
863
+ X##ko ^= trailingBits; \
864
+ } \
865
+ } \
866
+ else { \
867
+ X##ki ^= input[12]; \
868
+ X##ko ^= input[13]; \
869
+ if (laneCount < 15) { \
870
+ X##ku ^= trailingBits; \
871
+ } \
872
+ else { \
873
+ X##ku ^= input[14]; \
874
+ X##ma ^= trailingBits; \
875
+ } \
876
+ } \
877
+ } \
878
+ } \
879
+ } \
880
+ else { \
881
+ X##ba ^= input[ 0]; \
882
+ X##be ^= input[ 1]; \
883
+ X##bi ^= input[ 2]; \
884
+ X##bo ^= input[ 3]; \
885
+ X##bu ^= input[ 4]; \
886
+ X##ga ^= input[ 5]; \
887
+ X##ge ^= input[ 6]; \
888
+ X##gi ^= input[ 7]; \
889
+ X##go ^= input[ 8]; \
890
+ X##gu ^= input[ 9]; \
891
+ X##ka ^= input[10]; \
892
+ X##ke ^= input[11]; \
893
+ X##ki ^= input[12]; \
894
+ X##ko ^= input[13]; \
895
+ X##ku ^= input[14]; \
896
+ X##ma ^= input[15]; \
897
+ if (laneCount < 24) { \
898
+ if (laneCount < 20) { \
899
+ if (laneCount < 18) { \
900
+ if (laneCount < 17) { \
901
+ X##me ^= trailingBits; \
902
+ } \
903
+ else { \
904
+ X##me ^= input[16]; \
905
+ X##mi ^= trailingBits; \
906
+ } \
907
+ } \
908
+ else { \
909
+ X##me ^= input[16]; \
910
+ X##mi ^= input[17]; \
911
+ if (laneCount < 19) { \
912
+ X##mo ^= trailingBits; \
913
+ } \
914
+ else { \
915
+ X##mo ^= input[18]; \
916
+ X##mu ^= trailingBits; \
917
+ } \
918
+ } \
919
+ } \
920
+ else { \
921
+ X##me ^= input[16]; \
922
+ X##mi ^= input[17]; \
923
+ X##mo ^= input[18]; \
924
+ X##mu ^= input[19]; \
925
+ if (laneCount < 22) { \
926
+ if (laneCount < 21) { \
927
+ X##sa ^= trailingBits; \
928
+ } \
929
+ else { \
930
+ X##sa ^= input[20]; \
931
+ X##se ^= trailingBits; \
932
+ } \
933
+ } \
934
+ else { \
935
+ X##sa ^= input[20]; \
936
+ X##se ^= input[21]; \
937
+ if (laneCount < 23) { \
938
+ X##si ^= trailingBits; \
939
+ } \
940
+ else { \
941
+ X##si ^= input[22]; \
942
+ X##so ^= trailingBits; \
943
+ } \
944
+ } \
945
+ } \
946
+ } \
947
+ else { \
948
+ X##me ^= input[16]; \
949
+ X##mi ^= input[17]; \
950
+ X##mo ^= input[18]; \
951
+ X##mu ^= input[19]; \
952
+ X##sa ^= input[20]; \
953
+ X##se ^= input[21]; \
954
+ X##si ^= input[22]; \
955
+ X##so ^= input[23]; \
956
+ if (laneCount < 25) { \
957
+ X##su ^= trailingBits; \
958
+ } \
959
+ else { \
960
+ X##su ^= input[24]; \
961
+ } \
962
+ } \
963
+ }
964
+
965
+ #ifdef UseBebigokimisa
966
+
967
+ #define copyToStateAndOutput(X, state, output, laneCount) \
968
+ if (laneCount < 16) { \
969
+ if (laneCount < 8) { \
970
+ if (laneCount < 4) { \
971
+ if (laneCount < 2) { \
972
+ state[ 0] = X##ba; \
973
+ if (laneCount >= 1) { \
974
+ output[ 0] = X##ba; \
975
+ } \
976
+ state[ 1] = X##be; \
977
+ state[ 2] = X##bi; \
978
+ } \
979
+ else { \
980
+ state[ 0] = X##ba; \
981
+ output[ 0] = X##ba; \
982
+ state[ 1] = X##be; \
983
+ output[ 1] = ~X##be; \
984
+ state[ 2] = X##bi; \
985
+ if (laneCount >= 3) { \
986
+ output[ 2] = ~X##bi; \
987
+ } \
988
+ } \
989
+ state[ 3] = X##bo; \
990
+ state[ 4] = X##bu; \
991
+ state[ 5] = X##ga; \
992
+ state[ 6] = X##ge; \
993
+ } \
994
+ else { \
995
+ state[ 0] = X##ba; \
996
+ output[ 0] = X##ba; \
997
+ state[ 1] = X##be; \
998
+ output[ 1] = ~X##be; \
999
+ state[ 2] = X##bi; \
1000
+ output[ 2] = ~X##bi; \
1001
+ state[ 3] = X##bo; \
1002
+ output[ 3] = X##bo; \
1003
+ if (laneCount < 6) { \
1004
+ state[ 4] = X##bu; \
1005
+ if (laneCount >= 5) { \
1006
+ output[ 4] = X##bu; \
1007
+ } \
1008
+ state[ 5] = X##ga; \
1009
+ state[ 6] = X##ge; \
1010
+ } \
1011
+ else { \
1012
+ state[ 4] = X##bu; \
1013
+ output[ 4] = X##bu; \
1014
+ state[ 5] = X##ga; \
1015
+ output[ 5] = X##ga; \
1016
+ state[ 6] = X##ge; \
1017
+ if (laneCount >= 7) { \
1018
+ output[ 6] = X##ge; \
1019
+ } \
1020
+ } \
1021
+ } \
1022
+ state[ 7] = X##gi; \
1023
+ state[ 8] = X##go; \
1024
+ state[ 9] = X##gu; \
1025
+ state[10] = X##ka; \
1026
+ state[11] = X##ke; \
1027
+ state[12] = X##ki; \
1028
+ state[13] = X##ko; \
1029
+ state[14] = X##ku; \
1030
+ } \
1031
+ else { \
1032
+ state[ 0] = X##ba; \
1033
+ output[ 0] = X##ba; \
1034
+ state[ 1] = X##be; \
1035
+ output[ 1] = ~X##be; \
1036
+ state[ 2] = X##bi; \
1037
+ output[ 2] = ~X##bi; \
1038
+ state[ 3] = X##bo; \
1039
+ output[ 3] = X##bo; \
1040
+ state[ 4] = X##bu; \
1041
+ output[ 4] = X##bu; \
1042
+ state[ 5] = X##ga; \
1043
+ output[ 5] = X##ga; \
1044
+ state[ 6] = X##ge; \
1045
+ output[ 6] = X##ge; \
1046
+ state[ 7] = X##gi; \
1047
+ output[ 7] = X##gi; \
1048
+ if (laneCount < 12) { \
1049
+ if (laneCount < 10) { \
1050
+ state[ 8] = X##go; \
1051
+ if (laneCount >= 9) { \
1052
+ output[ 8] = ~X##go; \
1053
+ } \
1054
+ state[ 9] = X##gu; \
1055
+ state[10] = X##ka; \
1056
+ } \
1057
+ else { \
1058
+ state[ 8] = X##go; \
1059
+ output[ 8] = ~X##go; \
1060
+ state[ 9] = X##gu; \
1061
+ output[ 9] = X##gu; \
1062
+ state[10] = X##ka; \
1063
+ if (laneCount >= 11) { \
1064
+ output[10] = X##ka; \
1065
+ } \
1066
+ } \
1067
+ state[11] = X##ke; \
1068
+ state[12] = X##ki; \
1069
+ state[13] = X##ko; \
1070
+ state[14] = X##ku; \
1071
+ } \
1072
+ else { \
1073
+ state[ 8] = X##go; \
1074
+ output[ 8] = ~X##go; \
1075
+ state[ 9] = X##gu; \
1076
+ output[ 9] = X##gu; \
1077
+ state[10] = X##ka; \
1078
+ output[10] = X##ka; \
1079
+ state[11] = X##ke; \
1080
+ output[11] = X##ke; \
1081
+ if (laneCount < 14) { \
1082
+ state[12] = X##ki; \
1083
+ if (laneCount >= 13) { \
1084
+ output[12] = ~X##ki; \
1085
+ } \
1086
+ state[13] = X##ko; \
1087
+ state[14] = X##ku; \
1088
+ } \
1089
+ else { \
1090
+ state[12] = X##ki; \
1091
+ output[12] = ~X##ki; \
1092
+ state[13] = X##ko; \
1093
+ output[13] = X##ko; \
1094
+ state[14] = X##ku; \
1095
+ if (laneCount >= 15) { \
1096
+ output[14] = X##ku; \
1097
+ } \
1098
+ } \
1099
+ } \
1100
+ } \
1101
+ state[15] = X##ma; \
1102
+ state[16] = X##me; \
1103
+ state[17] = X##mi; \
1104
+ state[18] = X##mo; \
1105
+ state[19] = X##mu; \
1106
+ state[20] = X##sa; \
1107
+ state[21] = X##se; \
1108
+ state[22] = X##si; \
1109
+ state[23] = X##so; \
1110
+ state[24] = X##su; \
1111
+ } \
1112
+ else { \
1113
+ state[ 0] = X##ba; \
1114
+ output[ 0] = X##ba; \
1115
+ state[ 1] = X##be; \
1116
+ output[ 1] = ~X##be; \
1117
+ state[ 2] = X##bi; \
1118
+ output[ 2] = ~X##bi; \
1119
+ state[ 3] = X##bo; \
1120
+ output[ 3] = X##bo; \
1121
+ state[ 4] = X##bu; \
1122
+ output[ 4] = X##bu; \
1123
+ state[ 5] = X##ga; \
1124
+ output[ 5] = X##ga; \
1125
+ state[ 6] = X##ge; \
1126
+ output[ 6] = X##ge; \
1127
+ state[ 7] = X##gi; \
1128
+ output[ 7] = X##gi; \
1129
+ state[ 8] = X##go; \
1130
+ output[ 8] = ~X##go; \
1131
+ state[ 9] = X##gu; \
1132
+ output[ 9] = X##gu; \
1133
+ state[10] = X##ka; \
1134
+ output[10] = X##ka; \
1135
+ state[11] = X##ke; \
1136
+ output[11] = X##ke; \
1137
+ state[12] = X##ki; \
1138
+ output[12] = ~X##ki; \
1139
+ state[13] = X##ko; \
1140
+ output[13] = X##ko; \
1141
+ state[14] = X##ku; \
1142
+ output[14] = X##ku; \
1143
+ state[15] = X##ma; \
1144
+ output[15] = X##ma; \
1145
+ if (laneCount < 24) { \
1146
+ if (laneCount < 20) { \
1147
+ if (laneCount < 18) { \
1148
+ state[16] = X##me; \
1149
+ if (laneCount >= 17) { \
1150
+ output[16] = X##me; \
1151
+ } \
1152
+ state[17] = X##mi; \
1153
+ state[18] = X##mo; \
1154
+ } \
1155
+ else { \
1156
+ state[16] = X##me; \
1157
+ output[16] = X##me; \
1158
+ state[17] = X##mi; \
1159
+ output[17] = ~X##mi; \
1160
+ state[18] = X##mo; \
1161
+ if (laneCount >= 19) { \
1162
+ output[18] = X##mo; \
1163
+ } \
1164
+ } \
1165
+ state[19] = X##mu; \
1166
+ state[20] = X##sa; \
1167
+ state[21] = X##se; \
1168
+ state[22] = X##si; \
1169
+ } \
1170
+ else { \
1171
+ state[16] = X##me; \
1172
+ output[16] = X##me; \
1173
+ state[17] = X##mi; \
1174
+ output[17] = ~X##mi; \
1175
+ state[18] = X##mo; \
1176
+ output[18] = X##mo; \
1177
+ state[19] = X##mu; \
1178
+ output[19] = X##mu; \
1179
+ if (laneCount < 22) { \
1180
+ state[20] = X##sa; \
1181
+ if (laneCount >= 21) { \
1182
+ output[20] = ~X##sa; \
1183
+ } \
1184
+ state[21] = X##se; \
1185
+ state[22] = X##si; \
1186
+ } \
1187
+ else { \
1188
+ state[20] = X##sa; \
1189
+ output[20] = ~X##sa; \
1190
+ state[21] = X##se; \
1191
+ output[21] = X##se; \
1192
+ state[22] = X##si; \
1193
+ if (laneCount >= 23) { \
1194
+ output[22] = X##si; \
1195
+ } \
1196
+ } \
1197
+ } \
1198
+ state[23] = X##so; \
1199
+ state[24] = X##su; \
1200
+ } \
1201
+ else { \
1202
+ state[16] = X##me; \
1203
+ output[16] = X##me; \
1204
+ state[17] = X##mi; \
1205
+ output[17] = ~X##mi; \
1206
+ state[18] = X##mo; \
1207
+ output[18] = X##mo; \
1208
+ state[19] = X##mu; \
1209
+ output[19] = X##mu; \
1210
+ state[20] = X##sa; \
1211
+ output[20] = ~X##sa; \
1212
+ state[21] = X##se; \
1213
+ output[21] = X##se; \
1214
+ state[22] = X##si; \
1215
+ output[22] = X##si; \
1216
+ state[23] = X##so; \
1217
+ output[23] = X##so; \
1218
+ state[24] = X##su; \
1219
+ if (laneCount >= 25) { \
1220
+ output[24] = X##su; \
1221
+ } \
1222
+ } \
1223
+ }
1224
+
1225
+ #define output(X, output, laneCount) \
1226
+ if (laneCount < 16) { \
1227
+ if (laneCount < 8) { \
1228
+ if (laneCount < 4) { \
1229
+ if (laneCount < 2) { \
1230
+ if (laneCount >= 1) { \
1231
+ output[ 0] = X##ba; \
1232
+ } \
1233
+ } \
1234
+ else { \
1235
+ output[ 0] = X##ba; \
1236
+ output[ 1] = ~X##be; \
1237
+ if (laneCount >= 3) { \
1238
+ output[ 2] = ~X##bi; \
1239
+ } \
1240
+ } \
1241
+ } \
1242
+ else { \
1243
+ output[ 0] = X##ba; \
1244
+ output[ 1] = ~X##be; \
1245
+ output[ 2] = ~X##bi; \
1246
+ output[ 3] = X##bo; \
1247
+ if (laneCount < 6) { \
1248
+ if (laneCount >= 5) { \
1249
+ output[ 4] = X##bu; \
1250
+ } \
1251
+ } \
1252
+ else { \
1253
+ output[ 4] = X##bu; \
1254
+ output[ 5] = X##ga; \
1255
+ if (laneCount >= 7) { \
1256
+ output[ 6] = X##ge; \
1257
+ } \
1258
+ } \
1259
+ } \
1260
+ } \
1261
+ else { \
1262
+ output[ 0] = X##ba; \
1263
+ output[ 1] = ~X##be; \
1264
+ output[ 2] = ~X##bi; \
1265
+ output[ 3] = X##bo; \
1266
+ output[ 4] = X##bu; \
1267
+ output[ 5] = X##ga; \
1268
+ output[ 6] = X##ge; \
1269
+ output[ 7] = X##gi; \
1270
+ if (laneCount < 12) { \
1271
+ if (laneCount < 10) { \
1272
+ if (laneCount >= 9) { \
1273
+ output[ 8] = ~X##go; \
1274
+ } \
1275
+ } \
1276
+ else { \
1277
+ output[ 8] = ~X##go; \
1278
+ output[ 9] = X##gu; \
1279
+ if (laneCount >= 11) { \
1280
+ output[10] = X##ka; \
1281
+ } \
1282
+ } \
1283
+ } \
1284
+ else { \
1285
+ output[ 8] = ~X##go; \
1286
+ output[ 9] = X##gu; \
1287
+ output[10] = X##ka; \
1288
+ output[11] = X##ke; \
1289
+ if (laneCount < 14) { \
1290
+ if (laneCount >= 13) { \
1291
+ output[12] = ~X##ki; \
1292
+ } \
1293
+ } \
1294
+ else { \
1295
+ output[12] = ~X##ki; \
1296
+ output[13] = X##ko; \
1297
+ if (laneCount >= 15) { \
1298
+ output[14] = X##ku; \
1299
+ } \
1300
+ } \
1301
+ } \
1302
+ } \
1303
+ } \
1304
+ else { \
1305
+ output[ 0] = X##ba; \
1306
+ output[ 1] = ~X##be; \
1307
+ output[ 2] = ~X##bi; \
1308
+ output[ 3] = X##bo; \
1309
+ output[ 4] = X##bu; \
1310
+ output[ 5] = X##ga; \
1311
+ output[ 6] = X##ge; \
1312
+ output[ 7] = X##gi; \
1313
+ output[ 8] = ~X##go; \
1314
+ output[ 9] = X##gu; \
1315
+ output[10] = X##ka; \
1316
+ output[11] = X##ke; \
1317
+ output[12] = ~X##ki; \
1318
+ output[13] = X##ko; \
1319
+ output[14] = X##ku; \
1320
+ output[15] = X##ma; \
1321
+ if (laneCount < 24) { \
1322
+ if (laneCount < 20) { \
1323
+ if (laneCount < 18) { \
1324
+ if (laneCount >= 17) { \
1325
+ output[16] = X##me; \
1326
+ } \
1327
+ } \
1328
+ else { \
1329
+ output[16] = X##me; \
1330
+ output[17] = ~X##mi; \
1331
+ if (laneCount >= 19) { \
1332
+ output[18] = X##mo; \
1333
+ } \
1334
+ } \
1335
+ } \
1336
+ else { \
1337
+ output[16] = X##me; \
1338
+ output[17] = ~X##mi; \
1339
+ output[18] = X##mo; \
1340
+ output[19] = X##mu; \
1341
+ if (laneCount < 22) { \
1342
+ if (laneCount >= 21) { \
1343
+ output[20] = ~X##sa; \
1344
+ } \
1345
+ } \
1346
+ else { \
1347
+ output[20] = ~X##sa; \
1348
+ output[21] = X##se; \
1349
+ if (laneCount >= 23) { \
1350
+ output[22] = X##si; \
1351
+ } \
1352
+ } \
1353
+ } \
1354
+ } \
1355
+ else { \
1356
+ output[16] = X##me; \
1357
+ output[17] = ~X##mi; \
1358
+ output[18] = X##mo; \
1359
+ output[19] = X##mu; \
1360
+ output[20] = ~X##sa; \
1361
+ output[21] = X##se; \
1362
+ output[22] = X##si; \
1363
+ output[23] = X##so; \
1364
+ if (laneCount >= 25) { \
1365
+ output[24] = X##su; \
1366
+ } \
1367
+ } \
1368
+ }
1369
+
1370
+ #define wrapOne(X, input, output, index, name) \
1371
+ X##name ^= input[index]; \
1372
+ output[index] = X##name;
1373
+
1374
+ #define wrapOneInvert(X, input, output, index, name) \
1375
+ X##name ^= input[index]; \
1376
+ output[index] = ~X##name;
1377
+
1378
+ #define unwrapOne(X, input, output, index, name) \
1379
+ output[index] = input[index] ^ X##name; \
1380
+ X##name ^= output[index];
1381
+
1382
+ #define unwrapOneInvert(X, input, output, index, name) \
1383
+ output[index] = ~(input[index] ^ X##name); \
1384
+ X##name ^= output[index]; \
1385
+
1386
+ #else // UseBebigokimisa
1387
+
1388
+ #define copyToStateAndOutput(X, state, output, laneCount) \
1389
+ if (laneCount < 16) { \
1390
+ if (laneCount < 8) { \
1391
+ if (laneCount < 4) { \
1392
+ if (laneCount < 2) { \
1393
+ state[ 0] = X##ba; \
1394
+ if (laneCount >= 1) { \
1395
+ output[ 0] = X##ba; \
1396
+ } \
1397
+ state[ 1] = X##be; \
1398
+ state[ 2] = X##bi; \
1399
+ } \
1400
+ else { \
1401
+ state[ 0] = X##ba; \
1402
+ output[ 0] = X##ba; \
1403
+ state[ 1] = X##be; \
1404
+ output[ 1] = X##be; \
1405
+ state[ 2] = X##bi; \
1406
+ if (laneCount >= 3) { \
1407
+ output[ 2] = X##bi; \
1408
+ } \
1409
+ } \
1410
+ state[ 3] = X##bo; \
1411
+ state[ 4] = X##bu; \
1412
+ state[ 5] = X##ga; \
1413
+ state[ 6] = X##ge; \
1414
+ } \
1415
+ else { \
1416
+ state[ 0] = X##ba; \
1417
+ output[ 0] = X##ba; \
1418
+ state[ 1] = X##be; \
1419
+ output[ 1] = X##be; \
1420
+ state[ 2] = X##bi; \
1421
+ output[ 2] = X##bi; \
1422
+ state[ 3] = X##bo; \
1423
+ output[ 3] = X##bo; \
1424
+ if (laneCount < 6) { \
1425
+ state[ 4] = X##bu; \
1426
+ if (laneCount >= 5) { \
1427
+ output[ 4] = X##bu; \
1428
+ } \
1429
+ state[ 5] = X##ga; \
1430
+ state[ 6] = X##ge; \
1431
+ } \
1432
+ else { \
1433
+ state[ 4] = X##bu; \
1434
+ output[ 4] = X##bu; \
1435
+ state[ 5] = X##ga; \
1436
+ output[ 5] = X##ga; \
1437
+ state[ 6] = X##ge; \
1438
+ if (laneCount >= 7) { \
1439
+ output[ 6] = X##ge; \
1440
+ } \
1441
+ } \
1442
+ } \
1443
+ state[ 7] = X##gi; \
1444
+ state[ 8] = X##go; \
1445
+ state[ 9] = X##gu; \
1446
+ state[10] = X##ka; \
1447
+ state[11] = X##ke; \
1448
+ state[12] = X##ki; \
1449
+ state[13] = X##ko; \
1450
+ state[14] = X##ku; \
1451
+ } \
1452
+ else { \
1453
+ state[ 0] = X##ba; \
1454
+ output[ 0] = X##ba; \
1455
+ state[ 1] = X##be; \
1456
+ output[ 1] = X##be; \
1457
+ state[ 2] = X##bi; \
1458
+ output[ 2] = X##bi; \
1459
+ state[ 3] = X##bo; \
1460
+ output[ 3] = X##bo; \
1461
+ state[ 4] = X##bu; \
1462
+ output[ 4] = X##bu; \
1463
+ state[ 5] = X##ga; \
1464
+ output[ 5] = X##ga; \
1465
+ state[ 6] = X##ge; \
1466
+ output[ 6] = X##ge; \
1467
+ state[ 7] = X##gi; \
1468
+ output[ 7] = X##gi; \
1469
+ if (laneCount < 12) { \
1470
+ if (laneCount < 10) { \
1471
+ state[ 8] = X##go; \
1472
+ if (laneCount >= 9) { \
1473
+ output[ 8] = X##go; \
1474
+ } \
1475
+ state[ 9] = X##gu; \
1476
+ state[10] = X##ka; \
1477
+ } \
1478
+ else { \
1479
+ state[ 8] = X##go; \
1480
+ output[ 8] = X##go; \
1481
+ state[ 9] = X##gu; \
1482
+ output[ 9] = X##gu; \
1483
+ state[10] = X##ka; \
1484
+ if (laneCount >= 11) { \
1485
+ output[10] = X##ka; \
1486
+ } \
1487
+ } \
1488
+ state[11] = X##ke; \
1489
+ state[12] = X##ki; \
1490
+ state[13] = X##ko; \
1491
+ state[14] = X##ku; \
1492
+ } \
1493
+ else { \
1494
+ state[ 8] = X##go; \
1495
+ output[ 8] = X##go; \
1496
+ state[ 9] = X##gu; \
1497
+ output[ 9] = X##gu; \
1498
+ state[10] = X##ka; \
1499
+ output[10] = X##ka; \
1500
+ state[11] = X##ke; \
1501
+ output[11] = X##ke; \
1502
+ if (laneCount < 14) { \
1503
+ state[12] = X##ki; \
1504
+ if (laneCount >= 13) { \
1505
+ output[12]= X##ki; \
1506
+ } \
1507
+ state[13] = X##ko; \
1508
+ state[14] = X##ku; \
1509
+ } \
1510
+ else { \
1511
+ state[12] = X##ki; \
1512
+ output[12]= X##ki; \
1513
+ state[13] = X##ko; \
1514
+ output[13] = X##ko; \
1515
+ state[14] = X##ku; \
1516
+ if (laneCount >= 15) { \
1517
+ output[14] = X##ku; \
1518
+ } \
1519
+ } \
1520
+ } \
1521
+ } \
1522
+ state[15] = X##ma; \
1523
+ state[16] = X##me; \
1524
+ state[17] = X##mi; \
1525
+ state[18] = X##mo; \
1526
+ state[19] = X##mu; \
1527
+ state[20] = X##sa; \
1528
+ state[21] = X##se; \
1529
+ state[22] = X##si; \
1530
+ state[23] = X##so; \
1531
+ state[24] = X##su; \
1532
+ } \
1533
+ else { \
1534
+ state[ 0] = X##ba; \
1535
+ output[ 0] = X##ba; \
1536
+ state[ 1] = X##be; \
1537
+ output[ 1] = X##be; \
1538
+ state[ 2] = X##bi; \
1539
+ output[ 2] = X##bi; \
1540
+ state[ 3] = X##bo; \
1541
+ output[ 3] = X##bo; \
1542
+ state[ 4] = X##bu; \
1543
+ output[ 4] = X##bu; \
1544
+ state[ 5] = X##ga; \
1545
+ output[ 5] = X##ga; \
1546
+ state[ 6] = X##ge; \
1547
+ output[ 6] = X##ge; \
1548
+ state[ 7] = X##gi; \
1549
+ output[ 7] = X##gi; \
1550
+ state[ 8] = X##go; \
1551
+ output[ 8] = X##go; \
1552
+ state[ 9] = X##gu; \
1553
+ output[ 9] = X##gu; \
1554
+ state[10] = X##ka; \
1555
+ output[10] = X##ka; \
1556
+ state[11] = X##ke; \
1557
+ output[11] = X##ke; \
1558
+ state[12] = X##ki; \
1559
+ output[12]= X##ki; \
1560
+ state[13] = X##ko; \
1561
+ output[13] = X##ko; \
1562
+ state[14] = X##ku; \
1563
+ output[14] = X##ku; \
1564
+ state[15] = X##ma; \
1565
+ output[15] = X##ma; \
1566
+ if (laneCount < 24) { \
1567
+ if (laneCount < 20) { \
1568
+ if (laneCount < 18) { \
1569
+ state[16] = X##me; \
1570
+ if (laneCount >= 17) { \
1571
+ output[16] = X##me; \
1572
+ } \
1573
+ state[17] = X##mi; \
1574
+ state[18] = X##mo; \
1575
+ } \
1576
+ else { \
1577
+ state[16] = X##me; \
1578
+ output[16] = X##me; \
1579
+ state[17] = X##mi; \
1580
+ output[17] = X##mi; \
1581
+ state[18] = X##mo; \
1582
+ if (laneCount >= 19) { \
1583
+ output[18] = X##mo; \
1584
+ } \
1585
+ } \
1586
+ state[19] = X##mu; \
1587
+ state[20] = X##sa; \
1588
+ state[21] = X##se; \
1589
+ state[22] = X##si; \
1590
+ } \
1591
+ else { \
1592
+ state[16] = X##me; \
1593
+ output[16] = X##me; \
1594
+ state[17] = X##mi; \
1595
+ output[17] = X##mi; \
1596
+ state[18] = X##mo; \
1597
+ output[18] = X##mo; \
1598
+ state[19] = X##mu; \
1599
+ output[19] = X##mu; \
1600
+ if (laneCount < 22) { \
1601
+ state[20] = X##sa; \
1602
+ if (laneCount >= 21) { \
1603
+ output[20] = X##sa; \
1604
+ } \
1605
+ state[21] = X##se; \
1606
+ state[22] = X##si; \
1607
+ } \
1608
+ else { \
1609
+ state[20] = X##sa; \
1610
+ output[20] = X##sa; \
1611
+ state[21] = X##se; \
1612
+ output[21] = X##se; \
1613
+ state[22] = X##si; \
1614
+ if (laneCount >= 23) { \
1615
+ output[22] = X##si; \
1616
+ } \
1617
+ } \
1618
+ } \
1619
+ state[23] = X##so; \
1620
+ state[24] = X##su; \
1621
+ } \
1622
+ else { \
1623
+ state[16] = X##me; \
1624
+ output[16] = X##me; \
1625
+ state[17] = X##mi; \
1626
+ output[17] = X##mi; \
1627
+ state[18] = X##mo; \
1628
+ output[18] = X##mo; \
1629
+ state[19] = X##mu; \
1630
+ output[19] = X##mu; \
1631
+ state[20] = X##sa; \
1632
+ output[20] = X##sa; \
1633
+ state[21] = X##se; \
1634
+ output[21] = X##se; \
1635
+ state[22] = X##si; \
1636
+ output[22] = X##si; \
1637
+ state[23] = X##so; \
1638
+ output[23] = X##so; \
1639
+ state[24] = X##su; \
1640
+ if (laneCount >= 25) { \
1641
+ output[24] = X##su; \
1642
+ } \
1643
+ } \
1644
+ }
1645
+
1646
+ #define output(X, output, laneCount) \
1647
+ if (laneCount < 16) { \
1648
+ if (laneCount < 8) { \
1649
+ if (laneCount < 4) { \
1650
+ if (laneCount < 2) { \
1651
+ if (laneCount >= 1) { \
1652
+ output[ 0] = X##ba; \
1653
+ } \
1654
+ } \
1655
+ else { \
1656
+ output[ 0] = X##ba; \
1657
+ output[ 1] = X##be; \
1658
+ if (laneCount >= 3) { \
1659
+ output[ 2] = X##bi; \
1660
+ } \
1661
+ } \
1662
+ } \
1663
+ else { \
1664
+ output[ 0] = X##ba; \
1665
+ output[ 1] = X##be; \
1666
+ output[ 2] = X##bi; \
1667
+ output[ 3] = X##bo; \
1668
+ if (laneCount < 6) { \
1669
+ if (laneCount >= 5) { \
1670
+ output[ 4] = X##bu; \
1671
+ } \
1672
+ } \
1673
+ else { \
1674
+ output[ 4] = X##bu; \
1675
+ output[ 5] = X##ga; \
1676
+ if (laneCount >= 7) { \
1677
+ output[ 6] = X##ge; \
1678
+ } \
1679
+ } \
1680
+ } \
1681
+ } \
1682
+ else { \
1683
+ output[ 0] = X##ba; \
1684
+ output[ 1] = X##be; \
1685
+ output[ 2] = X##bi; \
1686
+ output[ 3] = X##bo; \
1687
+ output[ 4] = X##bu; \
1688
+ output[ 5] = X##ga; \
1689
+ output[ 6] = X##ge; \
1690
+ output[ 7] = X##gi; \
1691
+ if (laneCount < 12) { \
1692
+ if (laneCount < 10) { \
1693
+ if (laneCount >= 9) { \
1694
+ output[ 8] = X##go; \
1695
+ } \
1696
+ } \
1697
+ else { \
1698
+ output[ 8] = X##go; \
1699
+ output[ 9] = X##gu; \
1700
+ if (laneCount >= 11) { \
1701
+ output[10] = X##ka; \
1702
+ } \
1703
+ } \
1704
+ } \
1705
+ else { \
1706
+ output[ 8] = X##go; \
1707
+ output[ 9] = X##gu; \
1708
+ output[10] = X##ka; \
1709
+ output[11] = X##ke; \
1710
+ if (laneCount < 14) { \
1711
+ if (laneCount >= 13) { \
1712
+ output[12] = X##ki; \
1713
+ } \
1714
+ } \
1715
+ else { \
1716
+ output[12] = X##ki; \
1717
+ output[13] = X##ko; \
1718
+ if (laneCount >= 15) { \
1719
+ output[14] = X##ku; \
1720
+ } \
1721
+ } \
1722
+ } \
1723
+ } \
1724
+ } \
1725
+ else { \
1726
+ output[ 0] = X##ba; \
1727
+ output[ 1] = X##be; \
1728
+ output[ 2] = X##bi; \
1729
+ output[ 3] = X##bo; \
1730
+ output[ 4] = X##bu; \
1731
+ output[ 5] = X##ga; \
1732
+ output[ 6] = X##ge; \
1733
+ output[ 7] = X##gi; \
1734
+ output[ 8] = X##go; \
1735
+ output[ 9] = X##gu; \
1736
+ output[10] = X##ka; \
1737
+ output[11] = X##ke; \
1738
+ output[12] = X##ki; \
1739
+ output[13] = X##ko; \
1740
+ output[14] = X##ku; \
1741
+ output[15] = X##ma; \
1742
+ if (laneCount < 24) { \
1743
+ if (laneCount < 20) { \
1744
+ if (laneCount < 18) { \
1745
+ if (laneCount >= 17) { \
1746
+ output[16] = X##me; \
1747
+ } \
1748
+ } \
1749
+ else { \
1750
+ output[16] = X##me; \
1751
+ output[17] = X##mi; \
1752
+ if (laneCount >= 19) { \
1753
+ output[18] = X##mo; \
1754
+ } \
1755
+ } \
1756
+ } \
1757
+ else { \
1758
+ output[16] = X##me; \
1759
+ output[17] = X##mi; \
1760
+ output[18] = X##mo; \
1761
+ output[19] = X##mu; \
1762
+ if (laneCount < 22) { \
1763
+ if (laneCount >= 21) { \
1764
+ output[20] = X##sa; \
1765
+ } \
1766
+ } \
1767
+ else { \
1768
+ output[20] = X##sa; \
1769
+ output[21] = X##se; \
1770
+ if (laneCount >= 23) { \
1771
+ output[22] = X##si; \
1772
+ } \
1773
+ } \
1774
+ } \
1775
+ } \
1776
+ else { \
1777
+ output[16] = X##me; \
1778
+ output[17] = X##mi; \
1779
+ output[18] = X##mo; \
1780
+ output[19] = X##mu; \
1781
+ output[20] = X##sa; \
1782
+ output[21] = X##se; \
1783
+ output[22] = X##si; \
1784
+ output[23] = X##so; \
1785
+ if (laneCount >= 25) { \
1786
+ output[24] = X##su; \
1787
+ } \
1788
+ } \
1789
+ }
1790
+
1791
+ #define wrapOne(X, input, output, index, name) \
1792
+ X##name ^= input[index]; \
1793
+ output[index] = X##name;
1794
+
1795
+ #define wrapOneInvert(X, input, output, index, name) \
1796
+ X##name ^= input[index]; \
1797
+ output[index] = X##name;
1798
+
1799
+ #define unwrapOne(X, input, output, index, name) \
1800
+ output[index] = input[index] ^ X##name; \
1801
+ X##name ^= output[index];
1802
+
1803
+ #define unwrapOneInvert(X, input, output, index, name) \
1804
+ output[index] = input[index] ^ X##name; \
1805
+ X##name ^= output[index];
1806
+
1807
+ #endif
1808
+
1809
+ #define wrap(X, input, output, laneCount, trailingBits) \
1810
+ if (laneCount < 16) { \
1811
+ if (laneCount < 8) { \
1812
+ if (laneCount < 4) { \
1813
+ if (laneCount < 2) { \
1814
+ if (laneCount < 1) { \
1815
+ X##ba ^= trailingBits; \
1816
+ } \
1817
+ else { \
1818
+ wrapOne(X, input, output, 0, ba) \
1819
+ X##be ^= trailingBits; \
1820
+ } \
1821
+ } \
1822
+ else { \
1823
+ wrapOne(X, input, output, 0, ba) \
1824
+ wrapOneInvert(X, input, output, 1, be) \
1825
+ if (laneCount < 3) { \
1826
+ X##bi ^= trailingBits; \
1827
+ } \
1828
+ else { \
1829
+ wrapOneInvert(X, input, output, 2, bi) \
1830
+ X##bo ^= trailingBits; \
1831
+ } \
1832
+ } \
1833
+ } \
1834
+ else { \
1835
+ wrapOne(X, input, output, 0, ba) \
1836
+ wrapOneInvert(X, input, output, 1, be) \
1837
+ wrapOneInvert(X, input, output, 2, bi) \
1838
+ wrapOne(X, input, output, 3, bo) \
1839
+ if (laneCount < 6) { \
1840
+ if (laneCount < 5) { \
1841
+ X##bu ^= trailingBits; \
1842
+ } \
1843
+ else { \
1844
+ wrapOne(X, input, output, 4, bu) \
1845
+ X##ga ^= trailingBits; \
1846
+ } \
1847
+ } \
1848
+ else { \
1849
+ wrapOne(X, input, output, 4, bu) \
1850
+ wrapOne(X, input, output, 5, ga) \
1851
+ if (laneCount < 7) { \
1852
+ X##ge ^= trailingBits; \
1853
+ } \
1854
+ else { \
1855
+ wrapOne(X, input, output, 6, ge) \
1856
+ X##gi ^= trailingBits; \
1857
+ } \
1858
+ } \
1859
+ } \
1860
+ } \
1861
+ else { \
1862
+ wrapOne(X, input, output, 0, ba) \
1863
+ wrapOneInvert(X, input, output, 1, be) \
1864
+ wrapOneInvert(X, input, output, 2, bi) \
1865
+ wrapOne(X, input, output, 3, bo) \
1866
+ wrapOne(X, input, output, 4, bu) \
1867
+ wrapOne(X, input, output, 5, ga) \
1868
+ wrapOne(X, input, output, 6, ge) \
1869
+ wrapOne(X, input, output, 7, gi) \
1870
+ if (laneCount < 12) { \
1871
+ if (laneCount < 10) { \
1872
+ if (laneCount < 9) { \
1873
+ X##go ^= trailingBits; \
1874
+ } \
1875
+ else { \
1876
+ wrapOneInvert(X, input, output, 8, go) \
1877
+ X##gu ^= trailingBits; \
1878
+ } \
1879
+ } \
1880
+ else { \
1881
+ wrapOneInvert(X, input, output, 8, go) \
1882
+ wrapOne(X, input, output, 9, gu) \
1883
+ if (laneCount < 11) { \
1884
+ X##ka ^= trailingBits; \
1885
+ } \
1886
+ else { \
1887
+ wrapOne(X, input, output, 10, ka) \
1888
+ X##ke ^= trailingBits; \
1889
+ } \
1890
+ } \
1891
+ } \
1892
+ else { \
1893
+ wrapOneInvert(X, input, output, 8, go) \
1894
+ wrapOne(X, input, output, 9, gu) \
1895
+ wrapOne(X, input, output, 10, ka) \
1896
+ wrapOne(X, input, output, 11, ke) \
1897
+ if (laneCount < 14) { \
1898
+ if (laneCount < 13) { \
1899
+ X##ki ^= trailingBits; \
1900
+ } \
1901
+ else { \
1902
+ wrapOneInvert(X, input, output, 12, ki) \
1903
+ X##ko ^= trailingBits; \
1904
+ } \
1905
+ } \
1906
+ else { \
1907
+ wrapOneInvert(X, input, output, 12, ki) \
1908
+ wrapOne(X, input, output, 13, ko) \
1909
+ if (laneCount < 15) { \
1910
+ X##ku ^= trailingBits; \
1911
+ } \
1912
+ else { \
1913
+ wrapOne(X, input, output, 14, ku) \
1914
+ X##ma ^= trailingBits; \
1915
+ } \
1916
+ } \
1917
+ } \
1918
+ } \
1919
+ } \
1920
+ else { \
1921
+ wrapOne(X, input, output, 0, ba) \
1922
+ wrapOneInvert(X, input, output, 1, be) \
1923
+ wrapOneInvert(X, input, output, 2, bi) \
1924
+ wrapOne(X, input, output, 3, bo) \
1925
+ wrapOne(X, input, output, 4, bu) \
1926
+ wrapOne(X, input, output, 5, ga) \
1927
+ wrapOne(X, input, output, 6, ge) \
1928
+ wrapOne(X, input, output, 7, gi) \
1929
+ wrapOneInvert(X, input, output, 8, go) \
1930
+ wrapOne(X, input, output, 9, gu) \
1931
+ wrapOne(X, input, output, 10, ka) \
1932
+ wrapOne(X, input, output, 11, ke) \
1933
+ wrapOneInvert(X, input, output, 12, ki) \
1934
+ wrapOne(X, input, output, 13, ko) \
1935
+ wrapOne(X, input, output, 14, ku) \
1936
+ wrapOne(X, input, output, 15, ma) \
1937
+ if (laneCount < 24) { \
1938
+ if (laneCount < 20) { \
1939
+ if (laneCount < 18) { \
1940
+ if (laneCount < 17) { \
1941
+ X##me ^= trailingBits; \
1942
+ } \
1943
+ else { \
1944
+ wrapOne(X, input, output, 16, me) \
1945
+ X##mi ^= trailingBits; \
1946
+ } \
1947
+ } \
1948
+ else { \
1949
+ wrapOne(X, input, output, 16, me) \
1950
+ wrapOneInvert(X, input, output, 17, mi) \
1951
+ if (laneCount < 19) { \
1952
+ X##mo ^= trailingBits; \
1953
+ } \
1954
+ else { \
1955
+ wrapOne(X, input, output, 18, mo) \
1956
+ X##mu ^= trailingBits; \
1957
+ } \
1958
+ } \
1959
+ } \
1960
+ else { \
1961
+ wrapOne(X, input, output, 16, me) \
1962
+ wrapOneInvert(X, input, output, 17, mi) \
1963
+ wrapOne(X, input, output, 18, mo) \
1964
+ wrapOne(X, input, output, 19, mu) \
1965
+ if (laneCount < 22) { \
1966
+ if (laneCount < 21) { \
1967
+ X##sa ^= trailingBits; \
1968
+ } \
1969
+ else { \
1970
+ wrapOneInvert(X, input, output, 20, sa) \
1971
+ X##se ^= trailingBits; \
1972
+ } \
1973
+ } \
1974
+ else { \
1975
+ wrapOneInvert(X, input, output, 20, sa) \
1976
+ wrapOne(X, input, output, 21, se) \
1977
+ if (laneCount < 23) { \
1978
+ X##si ^= trailingBits; \
1979
+ } \
1980
+ else { \
1981
+ wrapOne(X, input, output, 22, si) \
1982
+ X##so ^= trailingBits; \
1983
+ } \
1984
+ } \
1985
+ } \
1986
+ } \
1987
+ else { \
1988
+ wrapOne(X, input, output, 16, me) \
1989
+ wrapOneInvert(X, input, output, 17, mi) \
1990
+ wrapOne(X, input, output, 18, mo) \
1991
+ wrapOne(X, input, output, 19, mu) \
1992
+ wrapOneInvert(X, input, output, 20, sa) \
1993
+ wrapOne(X, input, output, 21, se) \
1994
+ wrapOne(X, input, output, 22, si) \
1995
+ wrapOne(X, input, output, 23, so) \
1996
+ if (laneCount < 25) { \
1997
+ X##su ^= trailingBits; \
1998
+ } \
1999
+ else { \
2000
+ wrapOne(X, input, output, 24, su) \
2001
+ } \
2002
+ } \
2003
+ }
2004
+
2005
+ #define unwrap(X, input, output, laneCount, trailingBits) \
2006
+ if (laneCount < 16) { \
2007
+ if (laneCount < 8) { \
2008
+ if (laneCount < 4) { \
2009
+ if (laneCount < 2) { \
2010
+ if (laneCount < 1) { \
2011
+ X##ba ^= trailingBits; \
2012
+ } \
2013
+ else { \
2014
+ unwrapOne(X, input, output, 0, ba) \
2015
+ X##be ^= trailingBits; \
2016
+ } \
2017
+ } \
2018
+ else { \
2019
+ unwrapOne(X, input, output, 0, ba) \
2020
+ unwrapOneInvert(X, input, output, 1, be) \
2021
+ if (laneCount < 3) { \
2022
+ X##bi ^= trailingBits; \
2023
+ } \
2024
+ else { \
2025
+ unwrapOneInvert(X, input, output, 2, bi) \
2026
+ X##bo ^= trailingBits; \
2027
+ } \
2028
+ } \
2029
+ } \
2030
+ else { \
2031
+ unwrapOne(X, input, output, 0, ba) \
2032
+ unwrapOneInvert(X, input, output, 1, be) \
2033
+ unwrapOneInvert(X, input, output, 2, bi) \
2034
+ unwrapOne(X, input, output, 3, bo) \
2035
+ if (laneCount < 6) { \
2036
+ if (laneCount < 5) { \
2037
+ X##bu ^= trailingBits; \
2038
+ } \
2039
+ else { \
2040
+ unwrapOne(X, input, output, 4, bu) \
2041
+ X##ga ^= trailingBits; \
2042
+ } \
2043
+ } \
2044
+ else { \
2045
+ unwrapOne(X, input, output, 4, bu) \
2046
+ unwrapOne(X, input, output, 5, ga) \
2047
+ if (laneCount < 7) { \
2048
+ X##ge ^= trailingBits; \
2049
+ } \
2050
+ else { \
2051
+ unwrapOne(X, input, output, 6, ge) \
2052
+ X##gi ^= trailingBits; \
2053
+ } \
2054
+ } \
2055
+ } \
2056
+ } \
2057
+ else { \
2058
+ unwrapOne(X, input, output, 0, ba) \
2059
+ unwrapOneInvert(X, input, output, 1, be) \
2060
+ unwrapOneInvert(X, input, output, 2, bi) \
2061
+ unwrapOne(X, input, output, 3, bo) \
2062
+ unwrapOne(X, input, output, 4, bu) \
2063
+ unwrapOne(X, input, output, 5, ga) \
2064
+ unwrapOne(X, input, output, 6, ge) \
2065
+ unwrapOne(X, input, output, 7, gi) \
2066
+ if (laneCount < 12) { \
2067
+ if (laneCount < 10) { \
2068
+ if (laneCount < 9) { \
2069
+ X##go ^= trailingBits; \
2070
+ } \
2071
+ else { \
2072
+ unwrapOneInvert(X, input, output, 8, go) \
2073
+ X##gu ^= trailingBits; \
2074
+ } \
2075
+ } \
2076
+ else { \
2077
+ unwrapOneInvert(X, input, output, 8, go) \
2078
+ unwrapOne(X, input, output, 9, gu) \
2079
+ if (laneCount < 11) { \
2080
+ X##ka ^= trailingBits; \
2081
+ } \
2082
+ else { \
2083
+ unwrapOne(X, input, output, 10, ka) \
2084
+ X##ke ^= trailingBits; \
2085
+ } \
2086
+ } \
2087
+ } \
2088
+ else { \
2089
+ unwrapOneInvert(X, input, output, 8, go) \
2090
+ unwrapOne(X, input, output, 9, gu) \
2091
+ unwrapOne(X, input, output, 10, ka) \
2092
+ unwrapOne(X, input, output, 11, ke) \
2093
+ if (laneCount < 14) { \
2094
+ if (laneCount < 13) { \
2095
+ X##ki ^= trailingBits; \
2096
+ } \
2097
+ else { \
2098
+ unwrapOneInvert(X, input, output, 12, ki) \
2099
+ X##ko ^= trailingBits; \
2100
+ } \
2101
+ } \
2102
+ else { \
2103
+ unwrapOneInvert(X, input, output, 12, ki) \
2104
+ unwrapOne(X, input, output, 13, ko) \
2105
+ if (laneCount < 15) { \
2106
+ X##ku ^= trailingBits; \
2107
+ } \
2108
+ else { \
2109
+ unwrapOne(X, input, output, 14, ku) \
2110
+ X##ma ^= trailingBits; \
2111
+ } \
2112
+ } \
2113
+ } \
2114
+ } \
2115
+ } \
2116
+ else { \
2117
+ unwrapOne(X, input, output, 0, ba) \
2118
+ unwrapOneInvert(X, input, output, 1, be) \
2119
+ unwrapOneInvert(X, input, output, 2, bi) \
2120
+ unwrapOne(X, input, output, 3, bo) \
2121
+ unwrapOne(X, input, output, 4, bu) \
2122
+ unwrapOne(X, input, output, 5, ga) \
2123
+ unwrapOne(X, input, output, 6, ge) \
2124
+ unwrapOne(X, input, output, 7, gi) \
2125
+ unwrapOneInvert(X, input, output, 8, go) \
2126
+ unwrapOne(X, input, output, 9, gu) \
2127
+ unwrapOne(X, input, output, 10, ka) \
2128
+ unwrapOne(X, input, output, 11, ke) \
2129
+ unwrapOneInvert(X, input, output, 12, ki) \
2130
+ unwrapOne(X, input, output, 13, ko) \
2131
+ unwrapOne(X, input, output, 14, ku) \
2132
+ unwrapOne(X, input, output, 15, ma) \
2133
+ if (laneCount < 24) { \
2134
+ if (laneCount < 20) { \
2135
+ if (laneCount < 18) { \
2136
+ if (laneCount < 17) { \
2137
+ X##me ^= trailingBits; \
2138
+ } \
2139
+ else { \
2140
+ unwrapOne(X, input, output, 16, me) \
2141
+ X##mi ^= trailingBits; \
2142
+ } \
2143
+ } \
2144
+ else { \
2145
+ unwrapOne(X, input, output, 16, me) \
2146
+ unwrapOneInvert(X, input, output, 17, mi) \
2147
+ if (laneCount < 19) { \
2148
+ X##mo ^= trailingBits; \
2149
+ } \
2150
+ else { \
2151
+ unwrapOne(X, input, output, 18, mo) \
2152
+ X##mu ^= trailingBits; \
2153
+ } \
2154
+ } \
2155
+ } \
2156
+ else { \
2157
+ unwrapOne(X, input, output, 16, me) \
2158
+ unwrapOneInvert(X, input, output, 17, mi) \
2159
+ unwrapOne(X, input, output, 18, mo) \
2160
+ unwrapOne(X, input, output, 19, mu) \
2161
+ if (laneCount < 22) { \
2162
+ if (laneCount < 21) { \
2163
+ X##sa ^= trailingBits; \
2164
+ } \
2165
+ else { \
2166
+ unwrapOneInvert(X, input, output, 20, sa) \
2167
+ X##se ^= trailingBits; \
2168
+ } \
2169
+ } \
2170
+ else { \
2171
+ unwrapOneInvert(X, input, output, 20, sa) \
2172
+ unwrapOne(X, input, output, 21, se) \
2173
+ if (laneCount < 23) { \
2174
+ X##si ^= trailingBits; \
2175
+ } \
2176
+ else { \
2177
+ unwrapOne(X, input, output, 22, si) \
2178
+ X##so ^= trailingBits; \
2179
+ } \
2180
+ } \
2181
+ } \
2182
+ } \
2183
+ else { \
2184
+ unwrapOne(X, input, output, 16, me) \
2185
+ unwrapOneInvert(X, input, output, 17, mi) \
2186
+ unwrapOne(X, input, output, 18, mo) \
2187
+ unwrapOne(X, input, output, 19, mu) \
2188
+ unwrapOneInvert(X, input, output, 20, sa) \
2189
+ unwrapOne(X, input, output, 21, se) \
2190
+ unwrapOne(X, input, output, 22, si) \
2191
+ unwrapOne(X, input, output, 23, so) \
2192
+ if (laneCount < 25) { \
2193
+ X##su ^= trailingBits; \
2194
+ } \
2195
+ else { \
2196
+ unwrapOne(X, input, output, 24, su) \
2197
+ } \
2198
+ } \
2199
+ }