sha3 0.2.2 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sha3 might be problematic. Click here for more details.

Files changed (62) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +232 -17
  3. data/.travis.yml +21 -12
  4. data/.yardopts +1 -1
  5. data/ChangeLog.rdoc +16 -0
  6. data/Gemfile +1 -1
  7. data/Gemfile.ci +5 -5
  8. data/LICENSE.txt +1 -1
  9. data/README.md +120 -0
  10. data/Rakefile +15 -18
  11. data/ext/sha3/KeccakF-1600-interface.h +28 -34
  12. data/ext/sha3/KeccakHash.c +80 -0
  13. data/ext/sha3/KeccakHash.h +110 -0
  14. data/ext/sha3/KeccakSponge.c +127 -201
  15. data/ext/sha3/KeccakSponge.h +74 -37
  16. data/ext/sha3/Optimized64/KeccakF-1600-64.macros +2199 -0
  17. data/ext/sha3/Optimized64/KeccakF-1600-opt64-settings.h +3 -0
  18. data/ext/sha3/Optimized64/KeccakF-1600-opt64.c +508 -0
  19. data/ext/sha3/{KeccakF-1600-unrolling.macros → Optimized64/KeccakF-1600-unrolling.macros} +16 -14
  20. data/ext/sha3/Optimized64/SnP-interface.h +47 -0
  21. data/ext/sha3/Reference/KeccakF-1600-reference.c +311 -0
  22. data/ext/sha3/Reference/KeccakF-reference.h +26 -0
  23. data/ext/sha3/Reference/SnP-FBWL-default.c +96 -0
  24. data/ext/sha3/Reference/SnP-FBWL-default.h +26 -0
  25. data/ext/sha3/Reference/SnP-interface.h +42 -0
  26. data/ext/sha3/{displayIntermediateValues.c → Reference/displayIntermediateValues.c} +52 -11
  27. data/ext/sha3/{displayIntermediateValues.h → Reference/displayIntermediateValues.h} +11 -6
  28. data/ext/sha3/SnP-Relaned.h +249 -0
  29. data/ext/sha3/brg_endian.h +0 -0
  30. data/ext/sha3/digest.c +270 -0
  31. data/ext/sha3/digest.h +48 -0
  32. data/ext/sha3/extconf.rb +16 -9
  33. data/ext/sha3/sha3.c +62 -0
  34. data/ext/sha3/sha3.h +26 -0
  35. data/lib/sha3.rb +1 -1
  36. data/lib/sha3/doc.rb +121 -0
  37. data/lib/sha3/version.rb +6 -5
  38. data/sha3.gemspec +13 -15
  39. data/spec/generate_tests.rb +22 -56
  40. data/spec/sha3_core_spec.rb +113 -133
  41. data/spec/spec_helper.rb +2 -2
  42. data/tests.sh +11 -9
  43. metadata +53 -65
  44. data/README.rdoc +0 -133
  45. data/ext/sha3/KeccakF-1600-32-rvk.macros +0 -555
  46. data/ext/sha3/KeccakF-1600-32-s1.macros +0 -1187
  47. data/ext/sha3/KeccakF-1600-32-s2.macros +0 -1187
  48. data/ext/sha3/KeccakF-1600-32.macros +0 -26
  49. data/ext/sha3/KeccakF-1600-64.macros +0 -728
  50. data/ext/sha3/KeccakF-1600-int-set.h +0 -6
  51. data/ext/sha3/KeccakF-1600-opt.c +0 -504
  52. data/ext/sha3/KeccakF-1600-opt32-settings.h +0 -4
  53. data/ext/sha3/KeccakF-1600-opt32.c-arch +0 -524
  54. data/ext/sha3/KeccakF-1600-opt64-settings.h +0 -7
  55. data/ext/sha3/KeccakF-1600-opt64.c-arch +0 -504
  56. data/ext/sha3/KeccakF-1600-reference.c-arch +0 -300
  57. data/ext/sha3/KeccakF-1600-x86-64-gas.s +0 -766
  58. data/ext/sha3/KeccakF-1600-x86-64-shld-gas.s +0 -766
  59. data/ext/sha3/KeccakNISTInterface.c +0 -81
  60. data/ext/sha3/KeccakNISTInterface.h +0 -70
  61. data/ext/sha3/_sha3.c +0 -309
  62. data/ext/sha3/_sha3.h +0 -32
@@ -1,10 +1,12 @@
1
1
  /*
2
- The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
3
- Michaël Peeters and Gilles Van Assche. For more information, feedback or
4
- questions, please refer to our website: http://keccak.noekeon.org/
2
+ Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
3
+ Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
4
+ denoted as "the implementer".
5
5
 
6
- Implementation by the designers,
7
- hereby denoted as "the implementer".
6
+ For more information, feedback or questions, please refer to our websites:
7
+ http://keccak.noekeon.org/
8
+ http://keyak.noekeon.org/
9
+ http://ketje.noekeon.org/
8
10
 
9
11
  To the extent possible under law, the implementer has waived all copyright
10
12
  and related or neighboring rights to the source code in this file.
@@ -14,10 +16,13 @@ http://creativecommons.org/publicdomain/zero/1.0/
14
16
  #ifndef _KeccakSponge_h_
15
17
  #define _KeccakSponge_h_
16
18
 
17
- #define KeccakPermutationSize 1600
18
- #define KeccakPermutationSizeInBytes (KeccakPermutationSize/8)
19
- #define KeccakMaximumRate 1536
20
- #define KeccakMaximumRateInBytes (KeccakMaximumRate/8)
19
+ #include "SnP-interface.h"
20
+ #include <string.h>
21
+
22
+ // on Mac OS-X and possibly others, ALIGN(x) is defined in param.h, and -Werror chokes on the redef.
23
+ #ifdef ALIGN
24
+ #undef ALIGN
25
+ #endif
21
26
 
22
27
  #if defined(__GNUC__)
23
28
  #define ALIGN __attribute__ ((aligned(32)))
@@ -27,50 +32,82 @@ http://creativecommons.org/publicdomain/zero/1.0/
27
32
  #define ALIGN
28
33
  #endif
29
34
 
30
- ALIGN typedef struct spongeStateStruct {
31
- ALIGN unsigned char state[KeccakPermutationSizeInBytes];
32
- ALIGN unsigned char dataQueue[KeccakMaximumRateInBytes];
35
+ /**
36
+ * Structure that contains the sponge instance attributes for use with the
37
+ * Keccak_Sponge* functions.
38
+ * It gathers the state processed by the permutation as well as the rate,
39
+ * the position of input/output bytes in the state and the phase
40
+ * (absorbing or squeezing).
41
+ */
42
+ ALIGN typedef struct Keccak_SpongeInstanceStruct {
43
+ /** The state processed by the permutation. */
44
+ ALIGN unsigned char state[SnP_stateSizeInBytes];
45
+ /** The value of the rate in bits.*/
33
46
  unsigned int rate;
34
- unsigned int capacity;
35
- unsigned int bitsInQueue;
36
- unsigned int fixedOutputLength;
47
+ /** The position in the state of the next byte to be input (when absorbing) or output (when squeezing). */
48
+ unsigned int byteIOIndex;
49
+ /** If set to 0, in the absorbing phase; otherwise, in the squeezing phase. */
37
50
  int squeezing;
38
- unsigned int bitsAvailableForSqueezing;
39
- } spongeState;
51
+ } Keccak_SpongeInstance;
40
52
 
41
53
  /**
42
54
  * Function to initialize the state of the Keccak[r, c] sponge function.
43
- * The sponge function is set to the absorbing phase.
44
- * @param state Pointer to the state of the sponge function to be initialized.
55
+ * The phase of the sponge function is set to absorbing.
56
+ * @param spongeInstance Pointer to the sponge instance to be initialized.
45
57
  * @param rate The value of the rate r.
46
58
  * @param capacity The value of the capacity c.
47
- * @pre One must have r+c=1600 and the rate a multiple of 64 bits in this implementation.
59
+ * @pre One must have r+c equal to the supported width of this implementation
60
+ * and the rate a multiple of 8 bits (one byte) in this implementation.
48
61
  * @return Zero if successful, 1 otherwise.
49
62
  */
50
- int InitSponge(spongeState *state, unsigned int rate, unsigned int capacity);
63
+ int Keccak_SpongeInitialize(Keccak_SpongeInstance *spongeInstance, unsigned int rate, unsigned int capacity);
64
+
51
65
  /**
52
- * Function to give input data for the sponge function to absorb.
53
- * @param state Pointer to the state of the sponge function initialized by InitSponge().
54
- * @param data Pointer to the input data.
55
- * When @a databitLen is not a multiple of 8, the last bits of data must be
56
- * in the least significant bits of the last byte.
57
- * @param databitLen The number of input bits provided in the input data.
58
- * @pre In the previous call to Absorb(), databitLen was a multiple of 8.
66
+ * Function to give input data bytes for the sponge function to absorb.
67
+ * @param spongeInstance Pointer to the sponge instance initialized by Keccak_SpongeInitialize().
68
+ * @param data Pointer to the input data.
69
+ * @param dataByteLen The number of input bytes provided in the input data.
59
70
  * @pre The sponge function must be in the absorbing phase,
60
- * i.e., Squeeze() must not have been called before.
71
+ * i.e., Keccak_SpongeSqueeze() or Keccak_SpongeAbsorbLastFewBits()
72
+ * must not have been called before.
61
73
  * @return Zero if successful, 1 otherwise.
62
74
  */
63
- int Absorb(spongeState *state, const unsigned char *data, unsigned long long databitlen);
75
+ int Keccak_SpongeAbsorb(Keccak_SpongeInstance *spongeInstance, const unsigned char *data, size_t dataByteLen);
76
+
77
+ /**
78
+ * Function to give input data bits for the sponge function to absorb
79
+ * and then to switch to the squeezing phase.
80
+ * @param spongeInstance Pointer to the sponge instance initialized by Keccak_SpongeInitialize().
81
+ * @param delimitedData Byte containing from 0 to 7 trailing bits
82
+ * that must be absorbed.
83
+ * These <i>n</i> bits must be in the least significant bit positions.
84
+ * These bits must be delimited with a bit 1 at position <i>n</i>
85
+ * (counting from 0=LSB to 7=MSB) and followed by bits 0
86
+ * from position <i>n</i>+1 to position 7.
87
+ * Some examples:
88
+ * - If no bits are to be absorbed, then @a delimitedData must be 0x01.
89
+ * - If the 2-bit sequence 0,0 is to be absorbed, @a delimitedData must be 0x04.
90
+ * - If the 5-bit sequence 0,1,0,0,1 is to be absorbed, @a delimitedData must be 0x32.
91
+ * - If the 7-bit sequence 1,1,0,1,0,0,0 is to be absorbed, @a delimitedData must be 0x8B.
92
+ * .
93
+ * @pre The sponge function must be in the absorbing phase,
94
+ * i.e., Keccak_SpongeSqueeze() or Keccak_SpongeAbsorbLastFewBits()
95
+ * must not have been called before.
96
+ * @pre @a delimitedData ≠ 0x00
97
+ * @return Zero if successful, 1 otherwise.
98
+ */
99
+ int Keccak_SpongeAbsorbLastFewBits(Keccak_SpongeInstance *spongeInstance, unsigned char delimitedData);
100
+
64
101
  /**
65
102
  * Function to squeeze output data from the sponge function.
66
- * If the sponge function was in the absorbing phase, this function
67
- * switches it to the squeezing phase.
68
- * @param state Pointer to the state of the sponge function initialized by InitSponge().
69
- * @param output Pointer to the buffer where to store the output data.
70
- * @param outputLength The number of output bits desired.
71
- * It must be a multiple of 8.
103
+ * If the sponge function was in the absorbing phase, this function
104
+ * switches it to the squeezing phase
105
+ * as if Keccak_SpongeAbsorbLastFewBits(spongeInstance, 0x01) was called.
106
+ * @param spongeInstance Pointer to the sponge instance initialized by Keccak_SpongeInitialize().
107
+ * @param data Pointer to the buffer where to store the output data.
108
+ * @param dataByteLen The number of output bytes desired.
72
109
  * @return Zero if successful, 1 otherwise.
73
110
  */
74
- int Squeeze(spongeState *state, unsigned char *output, unsigned long long outputLength);
111
+ int Keccak_SpongeSqueeze(Keccak_SpongeInstance *spongeInstance, unsigned char *data, size_t dataByteLen);
75
112
 
76
113
  #endif
@@ -0,0 +1,2199 @@
1
+ /*
2
+ Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
3
+ Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
4
+ denoted as "the implementer".
5
+
6
+ For more information, feedback or questions, please refer to our websites:
7
+ http://keccak.noekeon.org/
8
+ http://keyak.noekeon.org/
9
+ http://ketje.noekeon.org/
10
+
11
+ To the extent possible under law, the implementer has waived all copyright
12
+ and related or neighboring rights to the source code in this file.
13
+ http://creativecommons.org/publicdomain/zero/1.0/
14
+ */
15
+
16
+ #define declareABCDE \
17
+ UINT64 Aba, Abe, Abi, Abo, Abu; \
18
+ UINT64 Aga, Age, Agi, Ago, Agu; \
19
+ UINT64 Aka, Ake, Aki, Ako, Aku; \
20
+ UINT64 Ama, Ame, Ami, Amo, Amu; \
21
+ UINT64 Asa, Ase, Asi, Aso, Asu; \
22
+ UINT64 Bba, Bbe, Bbi, Bbo, Bbu; \
23
+ UINT64 Bga, Bge, Bgi, Bgo, Bgu; \
24
+ UINT64 Bka, Bke, Bki, Bko, Bku; \
25
+ UINT64 Bma, Bme, Bmi, Bmo, Bmu; \
26
+ UINT64 Bsa, Bse, Bsi, Bso, Bsu; \
27
+ UINT64 Ca, Ce, Ci, Co, Cu; \
28
+ UINT64 Da, De, Di, Do, Du; \
29
+ UINT64 Eba, Ebe, Ebi, Ebo, Ebu; \
30
+ UINT64 Ega, Ege, Egi, Ego, Egu; \
31
+ UINT64 Eka, Eke, Eki, Eko, Eku; \
32
+ UINT64 Ema, Eme, Emi, Emo, Emu; \
33
+ UINT64 Esa, Ese, Esi, Eso, Esu; \
34
+
35
+ #define prepareTheta \
36
+ Ca = Aba^Aga^Aka^Ama^Asa; \
37
+ Ce = Abe^Age^Ake^Ame^Ase; \
38
+ Ci = Abi^Agi^Aki^Ami^Asi; \
39
+ Co = Abo^Ago^Ako^Amo^Aso; \
40
+ Cu = Abu^Agu^Aku^Amu^Asu; \
41
+
42
+ #ifdef UseBebigokimisa
43
+ // --- Code for round, with prepare-theta (lane complementing pattern 'bebigokimisa')
44
+ // --- 64-bit lanes mapped to 64-bit words
45
+ #define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
46
+ Da = Cu^ROL64(Ce, 1); \
47
+ De = Ca^ROL64(Ci, 1); \
48
+ Di = Ce^ROL64(Co, 1); \
49
+ Do = Ci^ROL64(Cu, 1); \
50
+ Du = Co^ROL64(Ca, 1); \
51
+ \
52
+ A##ba ^= Da; \
53
+ Bba = A##ba; \
54
+ A##ge ^= De; \
55
+ Bbe = ROL64(A##ge, 44); \
56
+ A##ki ^= Di; \
57
+ Bbi = ROL64(A##ki, 43); \
58
+ A##mo ^= Do; \
59
+ Bbo = ROL64(A##mo, 21); \
60
+ A##su ^= Du; \
61
+ Bbu = ROL64(A##su, 14); \
62
+ E##ba = Bba ^( Bbe | Bbi ); \
63
+ E##ba ^= KeccakF1600RoundConstants[i]; \
64
+ Ca = E##ba; \
65
+ E##be = Bbe ^((~Bbi)| Bbo ); \
66
+ Ce = E##be; \
67
+ E##bi = Bbi ^( Bbo & Bbu ); \
68
+ Ci = E##bi; \
69
+ E##bo = Bbo ^( Bbu | Bba ); \
70
+ Co = E##bo; \
71
+ E##bu = Bbu ^( Bba & Bbe ); \
72
+ Cu = E##bu; \
73
+ \
74
+ A##bo ^= Do; \
75
+ Bga = ROL64(A##bo, 28); \
76
+ A##gu ^= Du; \
77
+ Bge = ROL64(A##gu, 20); \
78
+ A##ka ^= Da; \
79
+ Bgi = ROL64(A##ka, 3); \
80
+ A##me ^= De; \
81
+ Bgo = ROL64(A##me, 45); \
82
+ A##si ^= Di; \
83
+ Bgu = ROL64(A##si, 61); \
84
+ E##ga = Bga ^( Bge | Bgi ); \
85
+ Ca ^= E##ga; \
86
+ E##ge = Bge ^( Bgi & Bgo ); \
87
+ Ce ^= E##ge; \
88
+ E##gi = Bgi ^( Bgo |(~Bgu)); \
89
+ Ci ^= E##gi; \
90
+ E##go = Bgo ^( Bgu | Bga ); \
91
+ Co ^= E##go; \
92
+ E##gu = Bgu ^( Bga & Bge ); \
93
+ Cu ^= E##gu; \
94
+ \
95
+ A##be ^= De; \
96
+ Bka = ROL64(A##be, 1); \
97
+ A##gi ^= Di; \
98
+ Bke = ROL64(A##gi, 6); \
99
+ A##ko ^= Do; \
100
+ Bki = ROL64(A##ko, 25); \
101
+ A##mu ^= Du; \
102
+ Bko = ROL64(A##mu, 8); \
103
+ A##sa ^= Da; \
104
+ Bku = ROL64(A##sa, 18); \
105
+ E##ka = Bka ^( Bke | Bki ); \
106
+ Ca ^= E##ka; \
107
+ E##ke = Bke ^( Bki & Bko ); \
108
+ Ce ^= E##ke; \
109
+ E##ki = Bki ^((~Bko)& Bku ); \
110
+ Ci ^= E##ki; \
111
+ E##ko = (~Bko)^( Bku | Bka ); \
112
+ Co ^= E##ko; \
113
+ E##ku = Bku ^( Bka & Bke ); \
114
+ Cu ^= E##ku; \
115
+ \
116
+ A##bu ^= Du; \
117
+ Bma = ROL64(A##bu, 27); \
118
+ A##ga ^= Da; \
119
+ Bme = ROL64(A##ga, 36); \
120
+ A##ke ^= De; \
121
+ Bmi = ROL64(A##ke, 10); \
122
+ A##mi ^= Di; \
123
+ Bmo = ROL64(A##mi, 15); \
124
+ A##so ^= Do; \
125
+ Bmu = ROL64(A##so, 56); \
126
+ E##ma = Bma ^( Bme & Bmi ); \
127
+ Ca ^= E##ma; \
128
+ E##me = Bme ^( Bmi | Bmo ); \
129
+ Ce ^= E##me; \
130
+ E##mi = Bmi ^((~Bmo)| Bmu ); \
131
+ Ci ^= E##mi; \
132
+ E##mo = (~Bmo)^( Bmu & Bma ); \
133
+ Co ^= E##mo; \
134
+ E##mu = Bmu ^( Bma | Bme ); \
135
+ Cu ^= E##mu; \
136
+ \
137
+ A##bi ^= Di; \
138
+ Bsa = ROL64(A##bi, 62); \
139
+ A##go ^= Do; \
140
+ Bse = ROL64(A##go, 55); \
141
+ A##ku ^= Du; \
142
+ Bsi = ROL64(A##ku, 39); \
143
+ A##ma ^= Da; \
144
+ Bso = ROL64(A##ma, 41); \
145
+ A##se ^= De; \
146
+ Bsu = ROL64(A##se, 2); \
147
+ E##sa = Bsa ^((~Bse)& Bsi ); \
148
+ Ca ^= E##sa; \
149
+ E##se = (~Bse)^( Bsi | Bso ); \
150
+ Ce ^= E##se; \
151
+ E##si = Bsi ^( Bso & Bsu ); \
152
+ Ci ^= E##si; \
153
+ E##so = Bso ^( Bsu | Bsa ); \
154
+ Co ^= E##so; \
155
+ E##su = Bsu ^( Bsa & Bse ); \
156
+ Cu ^= E##su; \
157
+ \
158
+
159
+ // --- Code for round (lane complementing pattern 'bebigokimisa')
160
+ // --- 64-bit lanes mapped to 64-bit words
161
+ #define thetaRhoPiChiIota(i, A, E) \
162
+ Da = Cu^ROL64(Ce, 1); \
163
+ De = Ca^ROL64(Ci, 1); \
164
+ Di = Ce^ROL64(Co, 1); \
165
+ Do = Ci^ROL64(Cu, 1); \
166
+ Du = Co^ROL64(Ca, 1); \
167
+ \
168
+ A##ba ^= Da; \
169
+ Bba = A##ba; \
170
+ A##ge ^= De; \
171
+ Bbe = ROL64(A##ge, 44); \
172
+ A##ki ^= Di; \
173
+ Bbi = ROL64(A##ki, 43); \
174
+ A##mo ^= Do; \
175
+ Bbo = ROL64(A##mo, 21); \
176
+ A##su ^= Du; \
177
+ Bbu = ROL64(A##su, 14); \
178
+ E##ba = Bba ^( Bbe | Bbi ); \
179
+ E##ba ^= KeccakF1600RoundConstants[i]; \
180
+ E##be = Bbe ^((~Bbi)| Bbo ); \
181
+ E##bi = Bbi ^( Bbo & Bbu ); \
182
+ E##bo = Bbo ^( Bbu | Bba ); \
183
+ E##bu = Bbu ^( Bba & Bbe ); \
184
+ \
185
+ A##bo ^= Do; \
186
+ Bga = ROL64(A##bo, 28); \
187
+ A##gu ^= Du; \
188
+ Bge = ROL64(A##gu, 20); \
189
+ A##ka ^= Da; \
190
+ Bgi = ROL64(A##ka, 3); \
191
+ A##me ^= De; \
192
+ Bgo = ROL64(A##me, 45); \
193
+ A##si ^= Di; \
194
+ Bgu = ROL64(A##si, 61); \
195
+ E##ga = Bga ^( Bge | Bgi ); \
196
+ E##ge = Bge ^( Bgi & Bgo ); \
197
+ E##gi = Bgi ^( Bgo |(~Bgu)); \
198
+ E##go = Bgo ^( Bgu | Bga ); \
199
+ E##gu = Bgu ^( Bga & Bge ); \
200
+ \
201
+ A##be ^= De; \
202
+ Bka = ROL64(A##be, 1); \
203
+ A##gi ^= Di; \
204
+ Bke = ROL64(A##gi, 6); \
205
+ A##ko ^= Do; \
206
+ Bki = ROL64(A##ko, 25); \
207
+ A##mu ^= Du; \
208
+ Bko = ROL64(A##mu, 8); \
209
+ A##sa ^= Da; \
210
+ Bku = ROL64(A##sa, 18); \
211
+ E##ka = Bka ^( Bke | Bki ); \
212
+ E##ke = Bke ^( Bki & Bko ); \
213
+ E##ki = Bki ^((~Bko)& Bku ); \
214
+ E##ko = (~Bko)^( Bku | Bka ); \
215
+ E##ku = Bku ^( Bka & Bke ); \
216
+ \
217
+ A##bu ^= Du; \
218
+ Bma = ROL64(A##bu, 27); \
219
+ A##ga ^= Da; \
220
+ Bme = ROL64(A##ga, 36); \
221
+ A##ke ^= De; \
222
+ Bmi = ROL64(A##ke, 10); \
223
+ A##mi ^= Di; \
224
+ Bmo = ROL64(A##mi, 15); \
225
+ A##so ^= Do; \
226
+ Bmu = ROL64(A##so, 56); \
227
+ E##ma = Bma ^( Bme & Bmi ); \
228
+ E##me = Bme ^( Bmi | Bmo ); \
229
+ E##mi = Bmi ^((~Bmo)| Bmu ); \
230
+ E##mo = (~Bmo)^( Bmu & Bma ); \
231
+ E##mu = Bmu ^( Bma | Bme ); \
232
+ \
233
+ A##bi ^= Di; \
234
+ Bsa = ROL64(A##bi, 62); \
235
+ A##go ^= Do; \
236
+ Bse = ROL64(A##go, 55); \
237
+ A##ku ^= Du; \
238
+ Bsi = ROL64(A##ku, 39); \
239
+ A##ma ^= Da; \
240
+ Bso = ROL64(A##ma, 41); \
241
+ A##se ^= De; \
242
+ Bsu = ROL64(A##se, 2); \
243
+ E##sa = Bsa ^((~Bse)& Bsi ); \
244
+ E##se = (~Bse)^( Bsi | Bso ); \
245
+ E##si = Bsi ^( Bso & Bsu ); \
246
+ E##so = Bso ^( Bsu | Bsa ); \
247
+ E##su = Bsu ^( Bsa & Bse ); \
248
+ \
249
+
250
+ #else // UseBebigokimisa
251
+ // --- Code for round, with prepare-theta
252
+ // --- 64-bit lanes mapped to 64-bit words
253
+ #define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
254
+ Da = Cu^ROL64(Ce, 1); \
255
+ De = Ca^ROL64(Ci, 1); \
256
+ Di = Ce^ROL64(Co, 1); \
257
+ Do = Ci^ROL64(Cu, 1); \
258
+ Du = Co^ROL64(Ca, 1); \
259
+ \
260
+ A##ba ^= Da; \
261
+ Bba = A##ba; \
262
+ A##ge ^= De; \
263
+ Bbe = ROL64(A##ge, 44); \
264
+ A##ki ^= Di; \
265
+ Bbi = ROL64(A##ki, 43); \
266
+ A##mo ^= Do; \
267
+ Bbo = ROL64(A##mo, 21); \
268
+ A##su ^= Du; \
269
+ Bbu = ROL64(A##su, 14); \
270
+ E##ba = Bba ^((~Bbe)& Bbi ); \
271
+ E##ba ^= KeccakF1600RoundConstants[i]; \
272
+ Ca = E##ba; \
273
+ E##be = Bbe ^((~Bbi)& Bbo ); \
274
+ Ce = E##be; \
275
+ E##bi = Bbi ^((~Bbo)& Bbu ); \
276
+ Ci = E##bi; \
277
+ E##bo = Bbo ^((~Bbu)& Bba ); \
278
+ Co = E##bo; \
279
+ E##bu = Bbu ^((~Bba)& Bbe ); \
280
+ Cu = E##bu; \
281
+ \
282
+ A##bo ^= Do; \
283
+ Bga = ROL64(A##bo, 28); \
284
+ A##gu ^= Du; \
285
+ Bge = ROL64(A##gu, 20); \
286
+ A##ka ^= Da; \
287
+ Bgi = ROL64(A##ka, 3); \
288
+ A##me ^= De; \
289
+ Bgo = ROL64(A##me, 45); \
290
+ A##si ^= Di; \
291
+ Bgu = ROL64(A##si, 61); \
292
+ E##ga = Bga ^((~Bge)& Bgi ); \
293
+ Ca ^= E##ga; \
294
+ E##ge = Bge ^((~Bgi)& Bgo ); \
295
+ Ce ^= E##ge; \
296
+ E##gi = Bgi ^((~Bgo)& Bgu ); \
297
+ Ci ^= E##gi; \
298
+ E##go = Bgo ^((~Bgu)& Bga ); \
299
+ Co ^= E##go; \
300
+ E##gu = Bgu ^((~Bga)& Bge ); \
301
+ Cu ^= E##gu; \
302
+ \
303
+ A##be ^= De; \
304
+ Bka = ROL64(A##be, 1); \
305
+ A##gi ^= Di; \
306
+ Bke = ROL64(A##gi, 6); \
307
+ A##ko ^= Do; \
308
+ Bki = ROL64(A##ko, 25); \
309
+ A##mu ^= Du; \
310
+ Bko = ROL64(A##mu, 8); \
311
+ A##sa ^= Da; \
312
+ Bku = ROL64(A##sa, 18); \
313
+ E##ka = Bka ^((~Bke)& Bki ); \
314
+ Ca ^= E##ka; \
315
+ E##ke = Bke ^((~Bki)& Bko ); \
316
+ Ce ^= E##ke; \
317
+ E##ki = Bki ^((~Bko)& Bku ); \
318
+ Ci ^= E##ki; \
319
+ E##ko = Bko ^((~Bku)& Bka ); \
320
+ Co ^= E##ko; \
321
+ E##ku = Bku ^((~Bka)& Bke ); \
322
+ Cu ^= E##ku; \
323
+ \
324
+ A##bu ^= Du; \
325
+ Bma = ROL64(A##bu, 27); \
326
+ A##ga ^= Da; \
327
+ Bme = ROL64(A##ga, 36); \
328
+ A##ke ^= De; \
329
+ Bmi = ROL64(A##ke, 10); \
330
+ A##mi ^= Di; \
331
+ Bmo = ROL64(A##mi, 15); \
332
+ A##so ^= Do; \
333
+ Bmu = ROL64(A##so, 56); \
334
+ E##ma = Bma ^((~Bme)& Bmi ); \
335
+ Ca ^= E##ma; \
336
+ E##me = Bme ^((~Bmi)& Bmo ); \
337
+ Ce ^= E##me; \
338
+ E##mi = Bmi ^((~Bmo)& Bmu ); \
339
+ Ci ^= E##mi; \
340
+ E##mo = Bmo ^((~Bmu)& Bma ); \
341
+ Co ^= E##mo; \
342
+ E##mu = Bmu ^((~Bma)& Bme ); \
343
+ Cu ^= E##mu; \
344
+ \
345
+ A##bi ^= Di; \
346
+ Bsa = ROL64(A##bi, 62); \
347
+ A##go ^= Do; \
348
+ Bse = ROL64(A##go, 55); \
349
+ A##ku ^= Du; \
350
+ Bsi = ROL64(A##ku, 39); \
351
+ A##ma ^= Da; \
352
+ Bso = ROL64(A##ma, 41); \
353
+ A##se ^= De; \
354
+ Bsu = ROL64(A##se, 2); \
355
+ E##sa = Bsa ^((~Bse)& Bsi ); \
356
+ Ca ^= E##sa; \
357
+ E##se = Bse ^((~Bsi)& Bso ); \
358
+ Ce ^= E##se; \
359
+ E##si = Bsi ^((~Bso)& Bsu ); \
360
+ Ci ^= E##si; \
361
+ E##so = Bso ^((~Bsu)& Bsa ); \
362
+ Co ^= E##so; \
363
+ E##su = Bsu ^((~Bsa)& Bse ); \
364
+ Cu ^= E##su; \
365
+ \
366
+
367
+ // --- Code for round
368
+ // --- 64-bit lanes mapped to 64-bit words
369
+ #define thetaRhoPiChiIota(i, A, E) \
370
+ Da = Cu^ROL64(Ce, 1); \
371
+ De = Ca^ROL64(Ci, 1); \
372
+ Di = Ce^ROL64(Co, 1); \
373
+ Do = Ci^ROL64(Cu, 1); \
374
+ Du = Co^ROL64(Ca, 1); \
375
+ \
376
+ A##ba ^= Da; \
377
+ Bba = A##ba; \
378
+ A##ge ^= De; \
379
+ Bbe = ROL64(A##ge, 44); \
380
+ A##ki ^= Di; \
381
+ Bbi = ROL64(A##ki, 43); \
382
+ A##mo ^= Do; \
383
+ Bbo = ROL64(A##mo, 21); \
384
+ A##su ^= Du; \
385
+ Bbu = ROL64(A##su, 14); \
386
+ E##ba = Bba ^((~Bbe)& Bbi ); \
387
+ E##ba ^= KeccakF1600RoundConstants[i]; \
388
+ E##be = Bbe ^((~Bbi)& Bbo ); \
389
+ E##bi = Bbi ^((~Bbo)& Bbu ); \
390
+ E##bo = Bbo ^((~Bbu)& Bba ); \
391
+ E##bu = Bbu ^((~Bba)& Bbe ); \
392
+ \
393
+ A##bo ^= Do; \
394
+ Bga = ROL64(A##bo, 28); \
395
+ A##gu ^= Du; \
396
+ Bge = ROL64(A##gu, 20); \
397
+ A##ka ^= Da; \
398
+ Bgi = ROL64(A##ka, 3); \
399
+ A##me ^= De; \
400
+ Bgo = ROL64(A##me, 45); \
401
+ A##si ^= Di; \
402
+ Bgu = ROL64(A##si, 61); \
403
+ E##ga = Bga ^((~Bge)& Bgi ); \
404
+ E##ge = Bge ^((~Bgi)& Bgo ); \
405
+ E##gi = Bgi ^((~Bgo)& Bgu ); \
406
+ E##go = Bgo ^((~Bgu)& Bga ); \
407
+ E##gu = Bgu ^((~Bga)& Bge ); \
408
+ \
409
+ A##be ^= De; \
410
+ Bka = ROL64(A##be, 1); \
411
+ A##gi ^= Di; \
412
+ Bke = ROL64(A##gi, 6); \
413
+ A##ko ^= Do; \
414
+ Bki = ROL64(A##ko, 25); \
415
+ A##mu ^= Du; \
416
+ Bko = ROL64(A##mu, 8); \
417
+ A##sa ^= Da; \
418
+ Bku = ROL64(A##sa, 18); \
419
+ E##ka = Bka ^((~Bke)& Bki ); \
420
+ E##ke = Bke ^((~Bki)& Bko ); \
421
+ E##ki = Bki ^((~Bko)& Bku ); \
422
+ E##ko = Bko ^((~Bku)& Bka ); \
423
+ E##ku = Bku ^((~Bka)& Bke ); \
424
+ \
425
+ A##bu ^= Du; \
426
+ Bma = ROL64(A##bu, 27); \
427
+ A##ga ^= Da; \
428
+ Bme = ROL64(A##ga, 36); \
429
+ A##ke ^= De; \
430
+ Bmi = ROL64(A##ke, 10); \
431
+ A##mi ^= Di; \
432
+ Bmo = ROL64(A##mi, 15); \
433
+ A##so ^= Do; \
434
+ Bmu = ROL64(A##so, 56); \
435
+ E##ma = Bma ^((~Bme)& Bmi ); \
436
+ E##me = Bme ^((~Bmi)& Bmo ); \
437
+ E##mi = Bmi ^((~Bmo)& Bmu ); \
438
+ E##mo = Bmo ^((~Bmu)& Bma ); \
439
+ E##mu = Bmu ^((~Bma)& Bme ); \
440
+ \
441
+ A##bi ^= Di; \
442
+ Bsa = ROL64(A##bi, 62); \
443
+ A##go ^= Do; \
444
+ Bse = ROL64(A##go, 55); \
445
+ A##ku ^= Du; \
446
+ Bsi = ROL64(A##ku, 39); \
447
+ A##ma ^= Da; \
448
+ Bso = ROL64(A##ma, 41); \
449
+ A##se ^= De; \
450
+ Bsu = ROL64(A##se, 2); \
451
+ E##sa = Bsa ^((~Bse)& Bsi ); \
452
+ E##se = Bse ^((~Bsi)& Bso ); \
453
+ E##si = Bsi ^((~Bso)& Bsu ); \
454
+ E##so = Bso ^((~Bsu)& Bsa ); \
455
+ E##su = Bsu ^((~Bsa)& Bse ); \
456
+ \
457
+
458
+ #endif // UseBebigokimisa
459
+
460
+ #define copyFromState(X, state) \
461
+ X##ba = state[ 0]; \
462
+ X##be = state[ 1]; \
463
+ X##bi = state[ 2]; \
464
+ X##bo = state[ 3]; \
465
+ X##bu = state[ 4]; \
466
+ X##ga = state[ 5]; \
467
+ X##ge = state[ 6]; \
468
+ X##gi = state[ 7]; \
469
+ X##go = state[ 8]; \
470
+ X##gu = state[ 9]; \
471
+ X##ka = state[10]; \
472
+ X##ke = state[11]; \
473
+ X##ki = state[12]; \
474
+ X##ko = state[13]; \
475
+ X##ku = state[14]; \
476
+ X##ma = state[15]; \
477
+ X##me = state[16]; \
478
+ X##mi = state[17]; \
479
+ X##mo = state[18]; \
480
+ X##mu = state[19]; \
481
+ X##sa = state[20]; \
482
+ X##se = state[21]; \
483
+ X##si = state[22]; \
484
+ X##so = state[23]; \
485
+ X##su = state[24]; \
486
+
487
+ #define copyToState(state, X) \
488
+ state[ 0] = X##ba; \
489
+ state[ 1] = X##be; \
490
+ state[ 2] = X##bi; \
491
+ state[ 3] = X##bo; \
492
+ state[ 4] = X##bu; \
493
+ state[ 5] = X##ga; \
494
+ state[ 6] = X##ge; \
495
+ state[ 7] = X##gi; \
496
+ state[ 8] = X##go; \
497
+ state[ 9] = X##gu; \
498
+ state[10] = X##ka; \
499
+ state[11] = X##ke; \
500
+ state[12] = X##ki; \
501
+ state[13] = X##ko; \
502
+ state[14] = X##ku; \
503
+ state[15] = X##ma; \
504
+ state[16] = X##me; \
505
+ state[17] = X##mi; \
506
+ state[18] = X##mo; \
507
+ state[19] = X##mu; \
508
+ state[20] = X##sa; \
509
+ state[21] = X##se; \
510
+ state[22] = X##si; \
511
+ state[23] = X##so; \
512
+ state[24] = X##su; \
513
+
514
+ #define copyStateVariables(X, Y) \
515
+ X##ba = Y##ba; \
516
+ X##be = Y##be; \
517
+ X##bi = Y##bi; \
518
+ X##bo = Y##bo; \
519
+ X##bu = Y##bu; \
520
+ X##ga = Y##ga; \
521
+ X##ge = Y##ge; \
522
+ X##gi = Y##gi; \
523
+ X##go = Y##go; \
524
+ X##gu = Y##gu; \
525
+ X##ka = Y##ka; \
526
+ X##ke = Y##ke; \
527
+ X##ki = Y##ki; \
528
+ X##ko = Y##ko; \
529
+ X##ku = Y##ku; \
530
+ X##ma = Y##ma; \
531
+ X##me = Y##me; \
532
+ X##mi = Y##mi; \
533
+ X##mo = Y##mo; \
534
+ X##mu = Y##mu; \
535
+ X##sa = Y##sa; \
536
+ X##se = Y##se; \
537
+ X##si = Y##si; \
538
+ X##so = Y##so; \
539
+ X##su = Y##su; \
540
+
541
+ #define copyFromStateAndXOR(X, state, input, laneCount) \
542
+ if (laneCount < 16) { \
543
+ if (laneCount < 8) { \
544
+ if (laneCount < 4) { \
545
+ if (laneCount < 2) { \
546
+ if (laneCount < 1) { \
547
+ X##ba = state[ 0]; \
548
+ } \
549
+ else { \
550
+ X##ba = state[ 0]^input[ 0]; \
551
+ } \
552
+ X##be = state[ 1]; \
553
+ X##bi = state[ 2]; \
554
+ } \
555
+ else { \
556
+ X##ba = state[ 0]^input[ 0]; \
557
+ X##be = state[ 1]^input[ 1]; \
558
+ if (laneCount < 3) { \
559
+ X##bi = state[ 2]; \
560
+ } \
561
+ else { \
562
+ X##bi = state[ 2]^input[ 2]; \
563
+ } \
564
+ } \
565
+ X##bo = state[ 3]; \
566
+ X##bu = state[ 4]; \
567
+ X##ga = state[ 5]; \
568
+ X##ge = state[ 6]; \
569
+ } \
570
+ else { \
571
+ X##ba = state[ 0]^input[ 0]; \
572
+ X##be = state[ 1]^input[ 1]; \
573
+ X##bi = state[ 2]^input[ 2]; \
574
+ X##bo = state[ 3]^input[ 3]; \
575
+ if (laneCount < 6) { \
576
+ if (laneCount < 5) { \
577
+ X##bu = state[ 4]; \
578
+ } \
579
+ else { \
580
+ X##bu = state[ 4]^input[ 4]; \
581
+ } \
582
+ X##ga = state[ 5]; \
583
+ X##ge = state[ 6]; \
584
+ } \
585
+ else { \
586
+ X##bu = state[ 4]^input[ 4]; \
587
+ X##ga = state[ 5]^input[ 5]; \
588
+ if (laneCount < 7) { \
589
+ X##ge = state[ 6]; \
590
+ } \
591
+ else { \
592
+ X##ge = state[ 6]^input[ 6]; \
593
+ } \
594
+ } \
595
+ } \
596
+ X##gi = state[ 7]; \
597
+ X##go = state[ 8]; \
598
+ X##gu = state[ 9]; \
599
+ X##ka = state[10]; \
600
+ X##ke = state[11]; \
601
+ X##ki = state[12]; \
602
+ X##ko = state[13]; \
603
+ X##ku = state[14]; \
604
+ } \
605
+ else { \
606
+ X##ba = state[ 0]^input[ 0]; \
607
+ X##be = state[ 1]^input[ 1]; \
608
+ X##bi = state[ 2]^input[ 2]; \
609
+ X##bo = state[ 3]^input[ 3]; \
610
+ X##bu = state[ 4]^input[ 4]; \
611
+ X##ga = state[ 5]^input[ 5]; \
612
+ X##ge = state[ 6]^input[ 6]; \
613
+ X##gi = state[ 7]^input[ 7]; \
614
+ if (laneCount < 12) { \
615
+ if (laneCount < 10) { \
616
+ if (laneCount < 9) { \
617
+ X##go = state[ 8]; \
618
+ } \
619
+ else { \
620
+ X##go = state[ 8]^input[ 8]; \
621
+ } \
622
+ X##gu = state[ 9]; \
623
+ X##ka = state[10]; \
624
+ } \
625
+ else { \
626
+ X##go = state[ 8]^input[ 8]; \
627
+ X##gu = state[ 9]^input[ 9]; \
628
+ if (laneCount < 11) { \
629
+ X##ka = state[10]; \
630
+ } \
631
+ else { \
632
+ X##ka = state[10]^input[10]; \
633
+ } \
634
+ } \
635
+ X##ke = state[11]; \
636
+ X##ki = state[12]; \
637
+ X##ko = state[13]; \
638
+ X##ku = state[14]; \
639
+ } \
640
+ else { \
641
+ X##go = state[ 8]^input[ 8]; \
642
+ X##gu = state[ 9]^input[ 9]; \
643
+ X##ka = state[10]^input[10]; \
644
+ X##ke = state[11]^input[11]; \
645
+ if (laneCount < 14) { \
646
+ if (laneCount < 13) { \
647
+ X##ki = state[12]; \
648
+ } \
649
+ else { \
650
+ X##ki = state[12]^input[12]; \
651
+ } \
652
+ X##ko = state[13]; \
653
+ X##ku = state[14]; \
654
+ } \
655
+ else { \
656
+ X##ki = state[12]^input[12]; \
657
+ X##ko = state[13]^input[13]; \
658
+ if (laneCount < 15) { \
659
+ X##ku = state[14]; \
660
+ } \
661
+ else { \
662
+ X##ku = state[14]^input[14]; \
663
+ } \
664
+ } \
665
+ } \
666
+ } \
667
+ X##ma = state[15]; \
668
+ X##me = state[16]; \
669
+ X##mi = state[17]; \
670
+ X##mo = state[18]; \
671
+ X##mu = state[19]; \
672
+ X##sa = state[20]; \
673
+ X##se = state[21]; \
674
+ X##si = state[22]; \
675
+ X##so = state[23]; \
676
+ X##su = state[24]; \
677
+ } \
678
+ else { \
679
+ X##ba = state[ 0]^input[ 0]; \
680
+ X##be = state[ 1]^input[ 1]; \
681
+ X##bi = state[ 2]^input[ 2]; \
682
+ X##bo = state[ 3]^input[ 3]; \
683
+ X##bu = state[ 4]^input[ 4]; \
684
+ X##ga = state[ 5]^input[ 5]; \
685
+ X##ge = state[ 6]^input[ 6]; \
686
+ X##gi = state[ 7]^input[ 7]; \
687
+ X##go = state[ 8]^input[ 8]; \
688
+ X##gu = state[ 9]^input[ 9]; \
689
+ X##ka = state[10]^input[10]; \
690
+ X##ke = state[11]^input[11]; \
691
+ X##ki = state[12]^input[12]; \
692
+ X##ko = state[13]^input[13]; \
693
+ X##ku = state[14]^input[14]; \
694
+ X##ma = state[15]^input[15]; \
695
+ if (laneCount < 24) { \
696
+ if (laneCount < 20) { \
697
+ if (laneCount < 18) { \
698
+ if (laneCount < 17) { \
699
+ X##me = state[16]; \
700
+ } \
701
+ else { \
702
+ X##me = state[16]^input[16]; \
703
+ } \
704
+ X##mi = state[17]; \
705
+ X##mo = state[18]; \
706
+ } \
707
+ else { \
708
+ X##me = state[16]^input[16]; \
709
+ X##mi = state[17]^input[17]; \
710
+ if (laneCount < 19) { \
711
+ X##mo = state[18]; \
712
+ } \
713
+ else { \
714
+ X##mo = state[18]^input[18]; \
715
+ } \
716
+ } \
717
+ X##mu = state[19]; \
718
+ X##sa = state[20]; \
719
+ X##se = state[21]; \
720
+ X##si = state[22]; \
721
+ } \
722
+ else { \
723
+ X##me = state[16]^input[16]; \
724
+ X##mi = state[17]^input[17]; \
725
+ X##mo = state[18]^input[18]; \
726
+ X##mu = state[19]^input[19]; \
727
+ if (laneCount < 22) { \
728
+ if (laneCount < 21) { \
729
+ X##sa = state[20]; \
730
+ } \
731
+ else { \
732
+ X##sa = state[20]^input[20]; \
733
+ } \
734
+ X##se = state[21]; \
735
+ X##si = state[22]; \
736
+ } \
737
+ else { \
738
+ X##sa = state[20]^input[20]; \
739
+ X##se = state[21]^input[21]; \
740
+ if (laneCount < 23) { \
741
+ X##si = state[22]; \
742
+ } \
743
+ else { \
744
+ X##si = state[22]^input[22]; \
745
+ } \
746
+ } \
747
+ } \
748
+ X##so = state[23]; \
749
+ X##su = state[24]; \
750
+ } \
751
+ else { \
752
+ X##me = state[16]^input[16]; \
753
+ X##mi = state[17]^input[17]; \
754
+ X##mo = state[18]^input[18]; \
755
+ X##mu = state[19]^input[19]; \
756
+ X##sa = state[20]^input[20]; \
757
+ X##se = state[21]^input[21]; \
758
+ X##si = state[22]^input[22]; \
759
+ X##so = state[23]^input[23]; \
760
+ if (laneCount < 25) { \
761
+ X##su = state[24]; \
762
+ } \
763
+ else { \
764
+ X##su = state[24]^input[24]; \
765
+ } \
766
+ } \
767
+ }
768
+
769
+ #define XORinputAndTrailingBits(X, input, laneCount, trailingBits) \
770
+ if (laneCount < 16) { \
771
+ if (laneCount < 8) { \
772
+ if (laneCount < 4) { \
773
+ if (laneCount < 2) { \
774
+ if (laneCount < 1) { \
775
+ X##ba ^= trailingBits; \
776
+ } \
777
+ else { \
778
+ X##ba ^= input[ 0]; \
779
+ X##be ^= trailingBits; \
780
+ } \
781
+ } \
782
+ else { \
783
+ X##ba ^= input[ 0]; \
784
+ X##be ^= input[ 1]; \
785
+ if (laneCount < 3) { \
786
+ X##bi ^= trailingBits; \
787
+ } \
788
+ else { \
789
+ X##bi ^= input[ 2]; \
790
+ X##bo ^= trailingBits; \
791
+ } \
792
+ } \
793
+ } \
794
+ else { \
795
+ X##ba ^= input[ 0]; \
796
+ X##be ^= input[ 1]; \
797
+ X##bi ^= input[ 2]; \
798
+ X##bo ^= input[ 3]; \
799
+ if (laneCount < 6) { \
800
+ if (laneCount < 5) { \
801
+ X##bu ^= trailingBits; \
802
+ } \
803
+ else { \
804
+ X##bu ^= input[ 4]; \
805
+ X##ga ^= trailingBits; \
806
+ } \
807
+ } \
808
+ else { \
809
+ X##bu ^= input[ 4]; \
810
+ X##ga ^= input[ 5]; \
811
+ if (laneCount < 7) { \
812
+ X##ge ^= trailingBits; \
813
+ } \
814
+ else { \
815
+ X##ge ^= input[ 6]; \
816
+ X##gi ^= trailingBits; \
817
+ } \
818
+ } \
819
+ } \
820
+ } \
821
+ else { \
822
+ X##ba ^= input[ 0]; \
823
+ X##be ^= input[ 1]; \
824
+ X##bi ^= input[ 2]; \
825
+ X##bo ^= input[ 3]; \
826
+ X##bu ^= input[ 4]; \
827
+ X##ga ^= input[ 5]; \
828
+ X##ge ^= input[ 6]; \
829
+ X##gi ^= input[ 7]; \
830
+ if (laneCount < 12) { \
831
+ if (laneCount < 10) { \
832
+ if (laneCount < 9) { \
833
+ X##go ^= trailingBits; \
834
+ } \
835
+ else { \
836
+ X##go ^= input[ 8]; \
837
+ X##gu ^= trailingBits ; \
838
+ } \
839
+ } \
840
+ else { \
841
+ X##go ^= input[ 8]; \
842
+ X##gu ^= input[ 9]; \
843
+ if (laneCount < 11) { \
844
+ X##ka ^= trailingBits; \
845
+ } \
846
+ else { \
847
+ X##ka ^= input[10]; \
848
+ X##ke ^= trailingBits; \
849
+ } \
850
+ } \
851
+ } \
852
+ else { \
853
+ X##go ^= input[ 8]; \
854
+ X##gu ^= input[ 9]; \
855
+ X##ka ^= input[10]; \
856
+ X##ke ^= input[11]; \
857
+ if (laneCount < 14) { \
858
+ if (laneCount < 13) { \
859
+ X##ki ^= trailingBits; \
860
+ } \
861
+ else { \
862
+ X##ki ^= input[12]; \
863
+ X##ko ^= trailingBits; \
864
+ } \
865
+ } \
866
+ else { \
867
+ X##ki ^= input[12]; \
868
+ X##ko ^= input[13]; \
869
+ if (laneCount < 15) { \
870
+ X##ku ^= trailingBits; \
871
+ } \
872
+ else { \
873
+ X##ku ^= input[14]; \
874
+ X##ma ^= trailingBits; \
875
+ } \
876
+ } \
877
+ } \
878
+ } \
879
+ } \
880
+ else { \
881
+ X##ba ^= input[ 0]; \
882
+ X##be ^= input[ 1]; \
883
+ X##bi ^= input[ 2]; \
884
+ X##bo ^= input[ 3]; \
885
+ X##bu ^= input[ 4]; \
886
+ X##ga ^= input[ 5]; \
887
+ X##ge ^= input[ 6]; \
888
+ X##gi ^= input[ 7]; \
889
+ X##go ^= input[ 8]; \
890
+ X##gu ^= input[ 9]; \
891
+ X##ka ^= input[10]; \
892
+ X##ke ^= input[11]; \
893
+ X##ki ^= input[12]; \
894
+ X##ko ^= input[13]; \
895
+ X##ku ^= input[14]; \
896
+ X##ma ^= input[15]; \
897
+ if (laneCount < 24) { \
898
+ if (laneCount < 20) { \
899
+ if (laneCount < 18) { \
900
+ if (laneCount < 17) { \
901
+ X##me ^= trailingBits; \
902
+ } \
903
+ else { \
904
+ X##me ^= input[16]; \
905
+ X##mi ^= trailingBits; \
906
+ } \
907
+ } \
908
+ else { \
909
+ X##me ^= input[16]; \
910
+ X##mi ^= input[17]; \
911
+ if (laneCount < 19) { \
912
+ X##mo ^= trailingBits; \
913
+ } \
914
+ else { \
915
+ X##mo ^= input[18]; \
916
+ X##mu ^= trailingBits; \
917
+ } \
918
+ } \
919
+ } \
920
+ else { \
921
+ X##me ^= input[16]; \
922
+ X##mi ^= input[17]; \
923
+ X##mo ^= input[18]; \
924
+ X##mu ^= input[19]; \
925
+ if (laneCount < 22) { \
926
+ if (laneCount < 21) { \
927
+ X##sa ^= trailingBits; \
928
+ } \
929
+ else { \
930
+ X##sa ^= input[20]; \
931
+ X##se ^= trailingBits; \
932
+ } \
933
+ } \
934
+ else { \
935
+ X##sa ^= input[20]; \
936
+ X##se ^= input[21]; \
937
+ if (laneCount < 23) { \
938
+ X##si ^= trailingBits; \
939
+ } \
940
+ else { \
941
+ X##si ^= input[22]; \
942
+ X##so ^= trailingBits; \
943
+ } \
944
+ } \
945
+ } \
946
+ } \
947
+ else { \
948
+ X##me ^= input[16]; \
949
+ X##mi ^= input[17]; \
950
+ X##mo ^= input[18]; \
951
+ X##mu ^= input[19]; \
952
+ X##sa ^= input[20]; \
953
+ X##se ^= input[21]; \
954
+ X##si ^= input[22]; \
955
+ X##so ^= input[23]; \
956
+ if (laneCount < 25) { \
957
+ X##su ^= trailingBits; \
958
+ } \
959
+ else { \
960
+ X##su ^= input[24]; \
961
+ } \
962
+ } \
963
+ }
964
+
965
+ #ifdef UseBebigokimisa
966
+
967
+ #define copyToStateAndOutput(X, state, output, laneCount) \
968
+ if (laneCount < 16) { \
969
+ if (laneCount < 8) { \
970
+ if (laneCount < 4) { \
971
+ if (laneCount < 2) { \
972
+ state[ 0] = X##ba; \
973
+ if (laneCount >= 1) { \
974
+ output[ 0] = X##ba; \
975
+ } \
976
+ state[ 1] = X##be; \
977
+ state[ 2] = X##bi; \
978
+ } \
979
+ else { \
980
+ state[ 0] = X##ba; \
981
+ output[ 0] = X##ba; \
982
+ state[ 1] = X##be; \
983
+ output[ 1] = ~X##be; \
984
+ state[ 2] = X##bi; \
985
+ if (laneCount >= 3) { \
986
+ output[ 2] = ~X##bi; \
987
+ } \
988
+ } \
989
+ state[ 3] = X##bo; \
990
+ state[ 4] = X##bu; \
991
+ state[ 5] = X##ga; \
992
+ state[ 6] = X##ge; \
993
+ } \
994
+ else { \
995
+ state[ 0] = X##ba; \
996
+ output[ 0] = X##ba; \
997
+ state[ 1] = X##be; \
998
+ output[ 1] = ~X##be; \
999
+ state[ 2] = X##bi; \
1000
+ output[ 2] = ~X##bi; \
1001
+ state[ 3] = X##bo; \
1002
+ output[ 3] = X##bo; \
1003
+ if (laneCount < 6) { \
1004
+ state[ 4] = X##bu; \
1005
+ if (laneCount >= 5) { \
1006
+ output[ 4] = X##bu; \
1007
+ } \
1008
+ state[ 5] = X##ga; \
1009
+ state[ 6] = X##ge; \
1010
+ } \
1011
+ else { \
1012
+ state[ 4] = X##bu; \
1013
+ output[ 4] = X##bu; \
1014
+ state[ 5] = X##ga; \
1015
+ output[ 5] = X##ga; \
1016
+ state[ 6] = X##ge; \
1017
+ if (laneCount >= 7) { \
1018
+ output[ 6] = X##ge; \
1019
+ } \
1020
+ } \
1021
+ } \
1022
+ state[ 7] = X##gi; \
1023
+ state[ 8] = X##go; \
1024
+ state[ 9] = X##gu; \
1025
+ state[10] = X##ka; \
1026
+ state[11] = X##ke; \
1027
+ state[12] = X##ki; \
1028
+ state[13] = X##ko; \
1029
+ state[14] = X##ku; \
1030
+ } \
1031
+ else { \
1032
+ state[ 0] = X##ba; \
1033
+ output[ 0] = X##ba; \
1034
+ state[ 1] = X##be; \
1035
+ output[ 1] = ~X##be; \
1036
+ state[ 2] = X##bi; \
1037
+ output[ 2] = ~X##bi; \
1038
+ state[ 3] = X##bo; \
1039
+ output[ 3] = X##bo; \
1040
+ state[ 4] = X##bu; \
1041
+ output[ 4] = X##bu; \
1042
+ state[ 5] = X##ga; \
1043
+ output[ 5] = X##ga; \
1044
+ state[ 6] = X##ge; \
1045
+ output[ 6] = X##ge; \
1046
+ state[ 7] = X##gi; \
1047
+ output[ 7] = X##gi; \
1048
+ if (laneCount < 12) { \
1049
+ if (laneCount < 10) { \
1050
+ state[ 8] = X##go; \
1051
+ if (laneCount >= 9) { \
1052
+ output[ 8] = ~X##go; \
1053
+ } \
1054
+ state[ 9] = X##gu; \
1055
+ state[10] = X##ka; \
1056
+ } \
1057
+ else { \
1058
+ state[ 8] = X##go; \
1059
+ output[ 8] = ~X##go; \
1060
+ state[ 9] = X##gu; \
1061
+ output[ 9] = X##gu; \
1062
+ state[10] = X##ka; \
1063
+ if (laneCount >= 11) { \
1064
+ output[10] = X##ka; \
1065
+ } \
1066
+ } \
1067
+ state[11] = X##ke; \
1068
+ state[12] = X##ki; \
1069
+ state[13] = X##ko; \
1070
+ state[14] = X##ku; \
1071
+ } \
1072
+ else { \
1073
+ state[ 8] = X##go; \
1074
+ output[ 8] = ~X##go; \
1075
+ state[ 9] = X##gu; \
1076
+ output[ 9] = X##gu; \
1077
+ state[10] = X##ka; \
1078
+ output[10] = X##ka; \
1079
+ state[11] = X##ke; \
1080
+ output[11] = X##ke; \
1081
+ if (laneCount < 14) { \
1082
+ state[12] = X##ki; \
1083
+ if (laneCount >= 13) { \
1084
+ output[12] = ~X##ki; \
1085
+ } \
1086
+ state[13] = X##ko; \
1087
+ state[14] = X##ku; \
1088
+ } \
1089
+ else { \
1090
+ state[12] = X##ki; \
1091
+ output[12] = ~X##ki; \
1092
+ state[13] = X##ko; \
1093
+ output[13] = X##ko; \
1094
+ state[14] = X##ku; \
1095
+ if (laneCount >= 15) { \
1096
+ output[14] = X##ku; \
1097
+ } \
1098
+ } \
1099
+ } \
1100
+ } \
1101
+ state[15] = X##ma; \
1102
+ state[16] = X##me; \
1103
+ state[17] = X##mi; \
1104
+ state[18] = X##mo; \
1105
+ state[19] = X##mu; \
1106
+ state[20] = X##sa; \
1107
+ state[21] = X##se; \
1108
+ state[22] = X##si; \
1109
+ state[23] = X##so; \
1110
+ state[24] = X##su; \
1111
+ } \
1112
+ else { \
1113
+ state[ 0] = X##ba; \
1114
+ output[ 0] = X##ba; \
1115
+ state[ 1] = X##be; \
1116
+ output[ 1] = ~X##be; \
1117
+ state[ 2] = X##bi; \
1118
+ output[ 2] = ~X##bi; \
1119
+ state[ 3] = X##bo; \
1120
+ output[ 3] = X##bo; \
1121
+ state[ 4] = X##bu; \
1122
+ output[ 4] = X##bu; \
1123
+ state[ 5] = X##ga; \
1124
+ output[ 5] = X##ga; \
1125
+ state[ 6] = X##ge; \
1126
+ output[ 6] = X##ge; \
1127
+ state[ 7] = X##gi; \
1128
+ output[ 7] = X##gi; \
1129
+ state[ 8] = X##go; \
1130
+ output[ 8] = ~X##go; \
1131
+ state[ 9] = X##gu; \
1132
+ output[ 9] = X##gu; \
1133
+ state[10] = X##ka; \
1134
+ output[10] = X##ka; \
1135
+ state[11] = X##ke; \
1136
+ output[11] = X##ke; \
1137
+ state[12] = X##ki; \
1138
+ output[12] = ~X##ki; \
1139
+ state[13] = X##ko; \
1140
+ output[13] = X##ko; \
1141
+ state[14] = X##ku; \
1142
+ output[14] = X##ku; \
1143
+ state[15] = X##ma; \
1144
+ output[15] = X##ma; \
1145
+ if (laneCount < 24) { \
1146
+ if (laneCount < 20) { \
1147
+ if (laneCount < 18) { \
1148
+ state[16] = X##me; \
1149
+ if (laneCount >= 17) { \
1150
+ output[16] = X##me; \
1151
+ } \
1152
+ state[17] = X##mi; \
1153
+ state[18] = X##mo; \
1154
+ } \
1155
+ else { \
1156
+ state[16] = X##me; \
1157
+ output[16] = X##me; \
1158
+ state[17] = X##mi; \
1159
+ output[17] = ~X##mi; \
1160
+ state[18] = X##mo; \
1161
+ if (laneCount >= 19) { \
1162
+ output[18] = X##mo; \
1163
+ } \
1164
+ } \
1165
+ state[19] = X##mu; \
1166
+ state[20] = X##sa; \
1167
+ state[21] = X##se; \
1168
+ state[22] = X##si; \
1169
+ } \
1170
+ else { \
1171
+ state[16] = X##me; \
1172
+ output[16] = X##me; \
1173
+ state[17] = X##mi; \
1174
+ output[17] = ~X##mi; \
1175
+ state[18] = X##mo; \
1176
+ output[18] = X##mo; \
1177
+ state[19] = X##mu; \
1178
+ output[19] = X##mu; \
1179
+ if (laneCount < 22) { \
1180
+ state[20] = X##sa; \
1181
+ if (laneCount >= 21) { \
1182
+ output[20] = ~X##sa; \
1183
+ } \
1184
+ state[21] = X##se; \
1185
+ state[22] = X##si; \
1186
+ } \
1187
+ else { \
1188
+ state[20] = X##sa; \
1189
+ output[20] = ~X##sa; \
1190
+ state[21] = X##se; \
1191
+ output[21] = X##se; \
1192
+ state[22] = X##si; \
1193
+ if (laneCount >= 23) { \
1194
+ output[22] = X##si; \
1195
+ } \
1196
+ } \
1197
+ } \
1198
+ state[23] = X##so; \
1199
+ state[24] = X##su; \
1200
+ } \
1201
+ else { \
1202
+ state[16] = X##me; \
1203
+ output[16] = X##me; \
1204
+ state[17] = X##mi; \
1205
+ output[17] = ~X##mi; \
1206
+ state[18] = X##mo; \
1207
+ output[18] = X##mo; \
1208
+ state[19] = X##mu; \
1209
+ output[19] = X##mu; \
1210
+ state[20] = X##sa; \
1211
+ output[20] = ~X##sa; \
1212
+ state[21] = X##se; \
1213
+ output[21] = X##se; \
1214
+ state[22] = X##si; \
1215
+ output[22] = X##si; \
1216
+ state[23] = X##so; \
1217
+ output[23] = X##so; \
1218
+ state[24] = X##su; \
1219
+ if (laneCount >= 25) { \
1220
+ output[24] = X##su; \
1221
+ } \
1222
+ } \
1223
+ }
1224
+
1225
+ #define output(X, output, laneCount) \
1226
+ if (laneCount < 16) { \
1227
+ if (laneCount < 8) { \
1228
+ if (laneCount < 4) { \
1229
+ if (laneCount < 2) { \
1230
+ if (laneCount >= 1) { \
1231
+ output[ 0] = X##ba; \
1232
+ } \
1233
+ } \
1234
+ else { \
1235
+ output[ 0] = X##ba; \
1236
+ output[ 1] = ~X##be; \
1237
+ if (laneCount >= 3) { \
1238
+ output[ 2] = ~X##bi; \
1239
+ } \
1240
+ } \
1241
+ } \
1242
+ else { \
1243
+ output[ 0] = X##ba; \
1244
+ output[ 1] = ~X##be; \
1245
+ output[ 2] = ~X##bi; \
1246
+ output[ 3] = X##bo; \
1247
+ if (laneCount < 6) { \
1248
+ if (laneCount >= 5) { \
1249
+ output[ 4] = X##bu; \
1250
+ } \
1251
+ } \
1252
+ else { \
1253
+ output[ 4] = X##bu; \
1254
+ output[ 5] = X##ga; \
1255
+ if (laneCount >= 7) { \
1256
+ output[ 6] = X##ge; \
1257
+ } \
1258
+ } \
1259
+ } \
1260
+ } \
1261
+ else { \
1262
+ output[ 0] = X##ba; \
1263
+ output[ 1] = ~X##be; \
1264
+ output[ 2] = ~X##bi; \
1265
+ output[ 3] = X##bo; \
1266
+ output[ 4] = X##bu; \
1267
+ output[ 5] = X##ga; \
1268
+ output[ 6] = X##ge; \
1269
+ output[ 7] = X##gi; \
1270
+ if (laneCount < 12) { \
1271
+ if (laneCount < 10) { \
1272
+ if (laneCount >= 9) { \
1273
+ output[ 8] = ~X##go; \
1274
+ } \
1275
+ } \
1276
+ else { \
1277
+ output[ 8] = ~X##go; \
1278
+ output[ 9] = X##gu; \
1279
+ if (laneCount >= 11) { \
1280
+ output[10] = X##ka; \
1281
+ } \
1282
+ } \
1283
+ } \
1284
+ else { \
1285
+ output[ 8] = ~X##go; \
1286
+ output[ 9] = X##gu; \
1287
+ output[10] = X##ka; \
1288
+ output[11] = X##ke; \
1289
+ if (laneCount < 14) { \
1290
+ if (laneCount >= 13) { \
1291
+ output[12] = ~X##ki; \
1292
+ } \
1293
+ } \
1294
+ else { \
1295
+ output[12] = ~X##ki; \
1296
+ output[13] = X##ko; \
1297
+ if (laneCount >= 15) { \
1298
+ output[14] = X##ku; \
1299
+ } \
1300
+ } \
1301
+ } \
1302
+ } \
1303
+ } \
1304
+ else { \
1305
+ output[ 0] = X##ba; \
1306
+ output[ 1] = ~X##be; \
1307
+ output[ 2] = ~X##bi; \
1308
+ output[ 3] = X##bo; \
1309
+ output[ 4] = X##bu; \
1310
+ output[ 5] = X##ga; \
1311
+ output[ 6] = X##ge; \
1312
+ output[ 7] = X##gi; \
1313
+ output[ 8] = ~X##go; \
1314
+ output[ 9] = X##gu; \
1315
+ output[10] = X##ka; \
1316
+ output[11] = X##ke; \
1317
+ output[12] = ~X##ki; \
1318
+ output[13] = X##ko; \
1319
+ output[14] = X##ku; \
1320
+ output[15] = X##ma; \
1321
+ if (laneCount < 24) { \
1322
+ if (laneCount < 20) { \
1323
+ if (laneCount < 18) { \
1324
+ if (laneCount >= 17) { \
1325
+ output[16] = X##me; \
1326
+ } \
1327
+ } \
1328
+ else { \
1329
+ output[16] = X##me; \
1330
+ output[17] = ~X##mi; \
1331
+ if (laneCount >= 19) { \
1332
+ output[18] = X##mo; \
1333
+ } \
1334
+ } \
1335
+ } \
1336
+ else { \
1337
+ output[16] = X##me; \
1338
+ output[17] = ~X##mi; \
1339
+ output[18] = X##mo; \
1340
+ output[19] = X##mu; \
1341
+ if (laneCount < 22) { \
1342
+ if (laneCount >= 21) { \
1343
+ output[20] = ~X##sa; \
1344
+ } \
1345
+ } \
1346
+ else { \
1347
+ output[20] = ~X##sa; \
1348
+ output[21] = X##se; \
1349
+ if (laneCount >= 23) { \
1350
+ output[22] = X##si; \
1351
+ } \
1352
+ } \
1353
+ } \
1354
+ } \
1355
+ else { \
1356
+ output[16] = X##me; \
1357
+ output[17] = ~X##mi; \
1358
+ output[18] = X##mo; \
1359
+ output[19] = X##mu; \
1360
+ output[20] = ~X##sa; \
1361
+ output[21] = X##se; \
1362
+ output[22] = X##si; \
1363
+ output[23] = X##so; \
1364
+ if (laneCount >= 25) { \
1365
+ output[24] = X##su; \
1366
+ } \
1367
+ } \
1368
+ }
1369
+
1370
+ #define wrapOne(X, input, output, index, name) \
1371
+ X##name ^= input[index]; \
1372
+ output[index] = X##name;
1373
+
1374
+ #define wrapOneInvert(X, input, output, index, name) \
1375
+ X##name ^= input[index]; \
1376
+ output[index] = ~X##name;
1377
+
1378
+ #define unwrapOne(X, input, output, index, name) \
1379
+ output[index] = input[index] ^ X##name; \
1380
+ X##name ^= output[index];
1381
+
1382
+ #define unwrapOneInvert(X, input, output, index, name) \
1383
+ output[index] = ~(input[index] ^ X##name); \
1384
+ X##name ^= output[index]; \
1385
+
1386
+ #else // UseBebigokimisa
1387
+
1388
+ #define copyToStateAndOutput(X, state, output, laneCount) \
1389
+ if (laneCount < 16) { \
1390
+ if (laneCount < 8) { \
1391
+ if (laneCount < 4) { \
1392
+ if (laneCount < 2) { \
1393
+ state[ 0] = X##ba; \
1394
+ if (laneCount >= 1) { \
1395
+ output[ 0] = X##ba; \
1396
+ } \
1397
+ state[ 1] = X##be; \
1398
+ state[ 2] = X##bi; \
1399
+ } \
1400
+ else { \
1401
+ state[ 0] = X##ba; \
1402
+ output[ 0] = X##ba; \
1403
+ state[ 1] = X##be; \
1404
+ output[ 1] = X##be; \
1405
+ state[ 2] = X##bi; \
1406
+ if (laneCount >= 3) { \
1407
+ output[ 2] = X##bi; \
1408
+ } \
1409
+ } \
1410
+ state[ 3] = X##bo; \
1411
+ state[ 4] = X##bu; \
1412
+ state[ 5] = X##ga; \
1413
+ state[ 6] = X##ge; \
1414
+ } \
1415
+ else { \
1416
+ state[ 0] = X##ba; \
1417
+ output[ 0] = X##ba; \
1418
+ state[ 1] = X##be; \
1419
+ output[ 1] = X##be; \
1420
+ state[ 2] = X##bi; \
1421
+ output[ 2] = X##bi; \
1422
+ state[ 3] = X##bo; \
1423
+ output[ 3] = X##bo; \
1424
+ if (laneCount < 6) { \
1425
+ state[ 4] = X##bu; \
1426
+ if (laneCount >= 5) { \
1427
+ output[ 4] = X##bu; \
1428
+ } \
1429
+ state[ 5] = X##ga; \
1430
+ state[ 6] = X##ge; \
1431
+ } \
1432
+ else { \
1433
+ state[ 4] = X##bu; \
1434
+ output[ 4] = X##bu; \
1435
+ state[ 5] = X##ga; \
1436
+ output[ 5] = X##ga; \
1437
+ state[ 6] = X##ge; \
1438
+ if (laneCount >= 7) { \
1439
+ output[ 6] = X##ge; \
1440
+ } \
1441
+ } \
1442
+ } \
1443
+ state[ 7] = X##gi; \
1444
+ state[ 8] = X##go; \
1445
+ state[ 9] = X##gu; \
1446
+ state[10] = X##ka; \
1447
+ state[11] = X##ke; \
1448
+ state[12] = X##ki; \
1449
+ state[13] = X##ko; \
1450
+ state[14] = X##ku; \
1451
+ } \
1452
+ else { \
1453
+ state[ 0] = X##ba; \
1454
+ output[ 0] = X##ba; \
1455
+ state[ 1] = X##be; \
1456
+ output[ 1] = X##be; \
1457
+ state[ 2] = X##bi; \
1458
+ output[ 2] = X##bi; \
1459
+ state[ 3] = X##bo; \
1460
+ output[ 3] = X##bo; \
1461
+ state[ 4] = X##bu; \
1462
+ output[ 4] = X##bu; \
1463
+ state[ 5] = X##ga; \
1464
+ output[ 5] = X##ga; \
1465
+ state[ 6] = X##ge; \
1466
+ output[ 6] = X##ge; \
1467
+ state[ 7] = X##gi; \
1468
+ output[ 7] = X##gi; \
1469
+ if (laneCount < 12) { \
1470
+ if (laneCount < 10) { \
1471
+ state[ 8] = X##go; \
1472
+ if (laneCount >= 9) { \
1473
+ output[ 8] = X##go; \
1474
+ } \
1475
+ state[ 9] = X##gu; \
1476
+ state[10] = X##ka; \
1477
+ } \
1478
+ else { \
1479
+ state[ 8] = X##go; \
1480
+ output[ 8] = X##go; \
1481
+ state[ 9] = X##gu; \
1482
+ output[ 9] = X##gu; \
1483
+ state[10] = X##ka; \
1484
+ if (laneCount >= 11) { \
1485
+ output[10] = X##ka; \
1486
+ } \
1487
+ } \
1488
+ state[11] = X##ke; \
1489
+ state[12] = X##ki; \
1490
+ state[13] = X##ko; \
1491
+ state[14] = X##ku; \
1492
+ } \
1493
+ else { \
1494
+ state[ 8] = X##go; \
1495
+ output[ 8] = X##go; \
1496
+ state[ 9] = X##gu; \
1497
+ output[ 9] = X##gu; \
1498
+ state[10] = X##ka; \
1499
+ output[10] = X##ka; \
1500
+ state[11] = X##ke; \
1501
+ output[11] = X##ke; \
1502
+ if (laneCount < 14) { \
1503
+ state[12] = X##ki; \
1504
+ if (laneCount >= 13) { \
1505
+ output[12]= X##ki; \
1506
+ } \
1507
+ state[13] = X##ko; \
1508
+ state[14] = X##ku; \
1509
+ } \
1510
+ else { \
1511
+ state[12] = X##ki; \
1512
+ output[12]= X##ki; \
1513
+ state[13] = X##ko; \
1514
+ output[13] = X##ko; \
1515
+ state[14] = X##ku; \
1516
+ if (laneCount >= 15) { \
1517
+ output[14] = X##ku; \
1518
+ } \
1519
+ } \
1520
+ } \
1521
+ } \
1522
+ state[15] = X##ma; \
1523
+ state[16] = X##me; \
1524
+ state[17] = X##mi; \
1525
+ state[18] = X##mo; \
1526
+ state[19] = X##mu; \
1527
+ state[20] = X##sa; \
1528
+ state[21] = X##se; \
1529
+ state[22] = X##si; \
1530
+ state[23] = X##so; \
1531
+ state[24] = X##su; \
1532
+ } \
1533
+ else { \
1534
+ state[ 0] = X##ba; \
1535
+ output[ 0] = X##ba; \
1536
+ state[ 1] = X##be; \
1537
+ output[ 1] = X##be; \
1538
+ state[ 2] = X##bi; \
1539
+ output[ 2] = X##bi; \
1540
+ state[ 3] = X##bo; \
1541
+ output[ 3] = X##bo; \
1542
+ state[ 4] = X##bu; \
1543
+ output[ 4] = X##bu; \
1544
+ state[ 5] = X##ga; \
1545
+ output[ 5] = X##ga; \
1546
+ state[ 6] = X##ge; \
1547
+ output[ 6] = X##ge; \
1548
+ state[ 7] = X##gi; \
1549
+ output[ 7] = X##gi; \
1550
+ state[ 8] = X##go; \
1551
+ output[ 8] = X##go; \
1552
+ state[ 9] = X##gu; \
1553
+ output[ 9] = X##gu; \
1554
+ state[10] = X##ka; \
1555
+ output[10] = X##ka; \
1556
+ state[11] = X##ke; \
1557
+ output[11] = X##ke; \
1558
+ state[12] = X##ki; \
1559
+ output[12]= X##ki; \
1560
+ state[13] = X##ko; \
1561
+ output[13] = X##ko; \
1562
+ state[14] = X##ku; \
1563
+ output[14] = X##ku; \
1564
+ state[15] = X##ma; \
1565
+ output[15] = X##ma; \
1566
+ if (laneCount < 24) { \
1567
+ if (laneCount < 20) { \
1568
+ if (laneCount < 18) { \
1569
+ state[16] = X##me; \
1570
+ if (laneCount >= 17) { \
1571
+ output[16] = X##me; \
1572
+ } \
1573
+ state[17] = X##mi; \
1574
+ state[18] = X##mo; \
1575
+ } \
1576
+ else { \
1577
+ state[16] = X##me; \
1578
+ output[16] = X##me; \
1579
+ state[17] = X##mi; \
1580
+ output[17] = X##mi; \
1581
+ state[18] = X##mo; \
1582
+ if (laneCount >= 19) { \
1583
+ output[18] = X##mo; \
1584
+ } \
1585
+ } \
1586
+ state[19] = X##mu; \
1587
+ state[20] = X##sa; \
1588
+ state[21] = X##se; \
1589
+ state[22] = X##si; \
1590
+ } \
1591
+ else { \
1592
+ state[16] = X##me; \
1593
+ output[16] = X##me; \
1594
+ state[17] = X##mi; \
1595
+ output[17] = X##mi; \
1596
+ state[18] = X##mo; \
1597
+ output[18] = X##mo; \
1598
+ state[19] = X##mu; \
1599
+ output[19] = X##mu; \
1600
+ if (laneCount < 22) { \
1601
+ state[20] = X##sa; \
1602
+ if (laneCount >= 21) { \
1603
+ output[20] = X##sa; \
1604
+ } \
1605
+ state[21] = X##se; \
1606
+ state[22] = X##si; \
1607
+ } \
1608
+ else { \
1609
+ state[20] = X##sa; \
1610
+ output[20] = X##sa; \
1611
+ state[21] = X##se; \
1612
+ output[21] = X##se; \
1613
+ state[22] = X##si; \
1614
+ if (laneCount >= 23) { \
1615
+ output[22] = X##si; \
1616
+ } \
1617
+ } \
1618
+ } \
1619
+ state[23] = X##so; \
1620
+ state[24] = X##su; \
1621
+ } \
1622
+ else { \
1623
+ state[16] = X##me; \
1624
+ output[16] = X##me; \
1625
+ state[17] = X##mi; \
1626
+ output[17] = X##mi; \
1627
+ state[18] = X##mo; \
1628
+ output[18] = X##mo; \
1629
+ state[19] = X##mu; \
1630
+ output[19] = X##mu; \
1631
+ state[20] = X##sa; \
1632
+ output[20] = X##sa; \
1633
+ state[21] = X##se; \
1634
+ output[21] = X##se; \
1635
+ state[22] = X##si; \
1636
+ output[22] = X##si; \
1637
+ state[23] = X##so; \
1638
+ output[23] = X##so; \
1639
+ state[24] = X##su; \
1640
+ if (laneCount >= 25) { \
1641
+ output[24] = X##su; \
1642
+ } \
1643
+ } \
1644
+ }
1645
+
1646
+ #define output(X, output, laneCount) \
1647
+ if (laneCount < 16) { \
1648
+ if (laneCount < 8) { \
1649
+ if (laneCount < 4) { \
1650
+ if (laneCount < 2) { \
1651
+ if (laneCount >= 1) { \
1652
+ output[ 0] = X##ba; \
1653
+ } \
1654
+ } \
1655
+ else { \
1656
+ output[ 0] = X##ba; \
1657
+ output[ 1] = X##be; \
1658
+ if (laneCount >= 3) { \
1659
+ output[ 2] = X##bi; \
1660
+ } \
1661
+ } \
1662
+ } \
1663
+ else { \
1664
+ output[ 0] = X##ba; \
1665
+ output[ 1] = X##be; \
1666
+ output[ 2] = X##bi; \
1667
+ output[ 3] = X##bo; \
1668
+ if (laneCount < 6) { \
1669
+ if (laneCount >= 5) { \
1670
+ output[ 4] = X##bu; \
1671
+ } \
1672
+ } \
1673
+ else { \
1674
+ output[ 4] = X##bu; \
1675
+ output[ 5] = X##ga; \
1676
+ if (laneCount >= 7) { \
1677
+ output[ 6] = X##ge; \
1678
+ } \
1679
+ } \
1680
+ } \
1681
+ } \
1682
+ else { \
1683
+ output[ 0] = X##ba; \
1684
+ output[ 1] = X##be; \
1685
+ output[ 2] = X##bi; \
1686
+ output[ 3] = X##bo; \
1687
+ output[ 4] = X##bu; \
1688
+ output[ 5] = X##ga; \
1689
+ output[ 6] = X##ge; \
1690
+ output[ 7] = X##gi; \
1691
+ if (laneCount < 12) { \
1692
+ if (laneCount < 10) { \
1693
+ if (laneCount >= 9) { \
1694
+ output[ 8] = X##go; \
1695
+ } \
1696
+ } \
1697
+ else { \
1698
+ output[ 8] = X##go; \
1699
+ output[ 9] = X##gu; \
1700
+ if (laneCount >= 11) { \
1701
+ output[10] = X##ka; \
1702
+ } \
1703
+ } \
1704
+ } \
1705
+ else { \
1706
+ output[ 8] = X##go; \
1707
+ output[ 9] = X##gu; \
1708
+ output[10] = X##ka; \
1709
+ output[11] = X##ke; \
1710
+ if (laneCount < 14) { \
1711
+ if (laneCount >= 13) { \
1712
+ output[12] = X##ki; \
1713
+ } \
1714
+ } \
1715
+ else { \
1716
+ output[12] = X##ki; \
1717
+ output[13] = X##ko; \
1718
+ if (laneCount >= 15) { \
1719
+ output[14] = X##ku; \
1720
+ } \
1721
+ } \
1722
+ } \
1723
+ } \
1724
+ } \
1725
+ else { \
1726
+ output[ 0] = X##ba; \
1727
+ output[ 1] = X##be; \
1728
+ output[ 2] = X##bi; \
1729
+ output[ 3] = X##bo; \
1730
+ output[ 4] = X##bu; \
1731
+ output[ 5] = X##ga; \
1732
+ output[ 6] = X##ge; \
1733
+ output[ 7] = X##gi; \
1734
+ output[ 8] = X##go; \
1735
+ output[ 9] = X##gu; \
1736
+ output[10] = X##ka; \
1737
+ output[11] = X##ke; \
1738
+ output[12] = X##ki; \
1739
+ output[13] = X##ko; \
1740
+ output[14] = X##ku; \
1741
+ output[15] = X##ma; \
1742
+ if (laneCount < 24) { \
1743
+ if (laneCount < 20) { \
1744
+ if (laneCount < 18) { \
1745
+ if (laneCount >= 17) { \
1746
+ output[16] = X##me; \
1747
+ } \
1748
+ } \
1749
+ else { \
1750
+ output[16] = X##me; \
1751
+ output[17] = X##mi; \
1752
+ if (laneCount >= 19) { \
1753
+ output[18] = X##mo; \
1754
+ } \
1755
+ } \
1756
+ } \
1757
+ else { \
1758
+ output[16] = X##me; \
1759
+ output[17] = X##mi; \
1760
+ output[18] = X##mo; \
1761
+ output[19] = X##mu; \
1762
+ if (laneCount < 22) { \
1763
+ if (laneCount >= 21) { \
1764
+ output[20] = X##sa; \
1765
+ } \
1766
+ } \
1767
+ else { \
1768
+ output[20] = X##sa; \
1769
+ output[21] = X##se; \
1770
+ if (laneCount >= 23) { \
1771
+ output[22] = X##si; \
1772
+ } \
1773
+ } \
1774
+ } \
1775
+ } \
1776
+ else { \
1777
+ output[16] = X##me; \
1778
+ output[17] = X##mi; \
1779
+ output[18] = X##mo; \
1780
+ output[19] = X##mu; \
1781
+ output[20] = X##sa; \
1782
+ output[21] = X##se; \
1783
+ output[22] = X##si; \
1784
+ output[23] = X##so; \
1785
+ if (laneCount >= 25) { \
1786
+ output[24] = X##su; \
1787
+ } \
1788
+ } \
1789
+ }
1790
+
1791
+ #define wrapOne(X, input, output, index, name) \
1792
+ X##name ^= input[index]; \
1793
+ output[index] = X##name;
1794
+
1795
+ #define wrapOneInvert(X, input, output, index, name) \
1796
+ X##name ^= input[index]; \
1797
+ output[index] = X##name;
1798
+
1799
+ #define unwrapOne(X, input, output, index, name) \
1800
+ output[index] = input[index] ^ X##name; \
1801
+ X##name ^= output[index];
1802
+
1803
+ #define unwrapOneInvert(X, input, output, index, name) \
1804
+ output[index] = input[index] ^ X##name; \
1805
+ X##name ^= output[index];
1806
+
1807
+ #endif
1808
+
1809
+ #define wrap(X, input, output, laneCount, trailingBits) \
1810
+ if (laneCount < 16) { \
1811
+ if (laneCount < 8) { \
1812
+ if (laneCount < 4) { \
1813
+ if (laneCount < 2) { \
1814
+ if (laneCount < 1) { \
1815
+ X##ba ^= trailingBits; \
1816
+ } \
1817
+ else { \
1818
+ wrapOne(X, input, output, 0, ba) \
1819
+ X##be ^= trailingBits; \
1820
+ } \
1821
+ } \
1822
+ else { \
1823
+ wrapOne(X, input, output, 0, ba) \
1824
+ wrapOneInvert(X, input, output, 1, be) \
1825
+ if (laneCount < 3) { \
1826
+ X##bi ^= trailingBits; \
1827
+ } \
1828
+ else { \
1829
+ wrapOneInvert(X, input, output, 2, bi) \
1830
+ X##bo ^= trailingBits; \
1831
+ } \
1832
+ } \
1833
+ } \
1834
+ else { \
1835
+ wrapOne(X, input, output, 0, ba) \
1836
+ wrapOneInvert(X, input, output, 1, be) \
1837
+ wrapOneInvert(X, input, output, 2, bi) \
1838
+ wrapOne(X, input, output, 3, bo) \
1839
+ if (laneCount < 6) { \
1840
+ if (laneCount < 5) { \
1841
+ X##bu ^= trailingBits; \
1842
+ } \
1843
+ else { \
1844
+ wrapOne(X, input, output, 4, bu) \
1845
+ X##ga ^= trailingBits; \
1846
+ } \
1847
+ } \
1848
+ else { \
1849
+ wrapOne(X, input, output, 4, bu) \
1850
+ wrapOne(X, input, output, 5, ga) \
1851
+ if (laneCount < 7) { \
1852
+ X##ge ^= trailingBits; \
1853
+ } \
1854
+ else { \
1855
+ wrapOne(X, input, output, 6, ge) \
1856
+ X##gi ^= trailingBits; \
1857
+ } \
1858
+ } \
1859
+ } \
1860
+ } \
1861
+ else { \
1862
+ wrapOne(X, input, output, 0, ba) \
1863
+ wrapOneInvert(X, input, output, 1, be) \
1864
+ wrapOneInvert(X, input, output, 2, bi) \
1865
+ wrapOne(X, input, output, 3, bo) \
1866
+ wrapOne(X, input, output, 4, bu) \
1867
+ wrapOne(X, input, output, 5, ga) \
1868
+ wrapOne(X, input, output, 6, ge) \
1869
+ wrapOne(X, input, output, 7, gi) \
1870
+ if (laneCount < 12) { \
1871
+ if (laneCount < 10) { \
1872
+ if (laneCount < 9) { \
1873
+ X##go ^= trailingBits; \
1874
+ } \
1875
+ else { \
1876
+ wrapOneInvert(X, input, output, 8, go) \
1877
+ X##gu ^= trailingBits; \
1878
+ } \
1879
+ } \
1880
+ else { \
1881
+ wrapOneInvert(X, input, output, 8, go) \
1882
+ wrapOne(X, input, output, 9, gu) \
1883
+ if (laneCount < 11) { \
1884
+ X##ka ^= trailingBits; \
1885
+ } \
1886
+ else { \
1887
+ wrapOne(X, input, output, 10, ka) \
1888
+ X##ke ^= trailingBits; \
1889
+ } \
1890
+ } \
1891
+ } \
1892
+ else { \
1893
+ wrapOneInvert(X, input, output, 8, go) \
1894
+ wrapOne(X, input, output, 9, gu) \
1895
+ wrapOne(X, input, output, 10, ka) \
1896
+ wrapOne(X, input, output, 11, ke) \
1897
+ if (laneCount < 14) { \
1898
+ if (laneCount < 13) { \
1899
+ X##ki ^= trailingBits; \
1900
+ } \
1901
+ else { \
1902
+ wrapOneInvert(X, input, output, 12, ki) \
1903
+ X##ko ^= trailingBits; \
1904
+ } \
1905
+ } \
1906
+ else { \
1907
+ wrapOneInvert(X, input, output, 12, ki) \
1908
+ wrapOne(X, input, output, 13, ko) \
1909
+ if (laneCount < 15) { \
1910
+ X##ku ^= trailingBits; \
1911
+ } \
1912
+ else { \
1913
+ wrapOne(X, input, output, 14, ku) \
1914
+ X##ma ^= trailingBits; \
1915
+ } \
1916
+ } \
1917
+ } \
1918
+ } \
1919
+ } \
1920
+ else { \
1921
+ wrapOne(X, input, output, 0, ba) \
1922
+ wrapOneInvert(X, input, output, 1, be) \
1923
+ wrapOneInvert(X, input, output, 2, bi) \
1924
+ wrapOne(X, input, output, 3, bo) \
1925
+ wrapOne(X, input, output, 4, bu) \
1926
+ wrapOne(X, input, output, 5, ga) \
1927
+ wrapOne(X, input, output, 6, ge) \
1928
+ wrapOne(X, input, output, 7, gi) \
1929
+ wrapOneInvert(X, input, output, 8, go) \
1930
+ wrapOne(X, input, output, 9, gu) \
1931
+ wrapOne(X, input, output, 10, ka) \
1932
+ wrapOne(X, input, output, 11, ke) \
1933
+ wrapOneInvert(X, input, output, 12, ki) \
1934
+ wrapOne(X, input, output, 13, ko) \
1935
+ wrapOne(X, input, output, 14, ku) \
1936
+ wrapOne(X, input, output, 15, ma) \
1937
+ if (laneCount < 24) { \
1938
+ if (laneCount < 20) { \
1939
+ if (laneCount < 18) { \
1940
+ if (laneCount < 17) { \
1941
+ X##me ^= trailingBits; \
1942
+ } \
1943
+ else { \
1944
+ wrapOne(X, input, output, 16, me) \
1945
+ X##mi ^= trailingBits; \
1946
+ } \
1947
+ } \
1948
+ else { \
1949
+ wrapOne(X, input, output, 16, me) \
1950
+ wrapOneInvert(X, input, output, 17, mi) \
1951
+ if (laneCount < 19) { \
1952
+ X##mo ^= trailingBits; \
1953
+ } \
1954
+ else { \
1955
+ wrapOne(X, input, output, 18, mo) \
1956
+ X##mu ^= trailingBits; \
1957
+ } \
1958
+ } \
1959
+ } \
1960
+ else { \
1961
+ wrapOne(X, input, output, 16, me) \
1962
+ wrapOneInvert(X, input, output, 17, mi) \
1963
+ wrapOne(X, input, output, 18, mo) \
1964
+ wrapOne(X, input, output, 19, mu) \
1965
+ if (laneCount < 22) { \
1966
+ if (laneCount < 21) { \
1967
+ X##sa ^= trailingBits; \
1968
+ } \
1969
+ else { \
1970
+ wrapOneInvert(X, input, output, 20, sa) \
1971
+ X##se ^= trailingBits; \
1972
+ } \
1973
+ } \
1974
+ else { \
1975
+ wrapOneInvert(X, input, output, 20, sa) \
1976
+ wrapOne(X, input, output, 21, se) \
1977
+ if (laneCount < 23) { \
1978
+ X##si ^= trailingBits; \
1979
+ } \
1980
+ else { \
1981
+ wrapOne(X, input, output, 22, si) \
1982
+ X##so ^= trailingBits; \
1983
+ } \
1984
+ } \
1985
+ } \
1986
+ } \
1987
+ else { \
1988
+ wrapOne(X, input, output, 16, me) \
1989
+ wrapOneInvert(X, input, output, 17, mi) \
1990
+ wrapOne(X, input, output, 18, mo) \
1991
+ wrapOne(X, input, output, 19, mu) \
1992
+ wrapOneInvert(X, input, output, 20, sa) \
1993
+ wrapOne(X, input, output, 21, se) \
1994
+ wrapOne(X, input, output, 22, si) \
1995
+ wrapOne(X, input, output, 23, so) \
1996
+ if (laneCount < 25) { \
1997
+ X##su ^= trailingBits; \
1998
+ } \
1999
+ else { \
2000
+ wrapOne(X, input, output, 24, su) \
2001
+ } \
2002
+ } \
2003
+ }
2004
+
2005
+ #define unwrap(X, input, output, laneCount, trailingBits) \
2006
+ if (laneCount < 16) { \
2007
+ if (laneCount < 8) { \
2008
+ if (laneCount < 4) { \
2009
+ if (laneCount < 2) { \
2010
+ if (laneCount < 1) { \
2011
+ X##ba ^= trailingBits; \
2012
+ } \
2013
+ else { \
2014
+ unwrapOne(X, input, output, 0, ba) \
2015
+ X##be ^= trailingBits; \
2016
+ } \
2017
+ } \
2018
+ else { \
2019
+ unwrapOne(X, input, output, 0, ba) \
2020
+ unwrapOneInvert(X, input, output, 1, be) \
2021
+ if (laneCount < 3) { \
2022
+ X##bi ^= trailingBits; \
2023
+ } \
2024
+ else { \
2025
+ unwrapOneInvert(X, input, output, 2, bi) \
2026
+ X##bo ^= trailingBits; \
2027
+ } \
2028
+ } \
2029
+ } \
2030
+ else { \
2031
+ unwrapOne(X, input, output, 0, ba) \
2032
+ unwrapOneInvert(X, input, output, 1, be) \
2033
+ unwrapOneInvert(X, input, output, 2, bi) \
2034
+ unwrapOne(X, input, output, 3, bo) \
2035
+ if (laneCount < 6) { \
2036
+ if (laneCount < 5) { \
2037
+ X##bu ^= trailingBits; \
2038
+ } \
2039
+ else { \
2040
+ unwrapOne(X, input, output, 4, bu) \
2041
+ X##ga ^= trailingBits; \
2042
+ } \
2043
+ } \
2044
+ else { \
2045
+ unwrapOne(X, input, output, 4, bu) \
2046
+ unwrapOne(X, input, output, 5, ga) \
2047
+ if (laneCount < 7) { \
2048
+ X##ge ^= trailingBits; \
2049
+ } \
2050
+ else { \
2051
+ unwrapOne(X, input, output, 6, ge) \
2052
+ X##gi ^= trailingBits; \
2053
+ } \
2054
+ } \
2055
+ } \
2056
+ } \
2057
+ else { \
2058
+ unwrapOne(X, input, output, 0, ba) \
2059
+ unwrapOneInvert(X, input, output, 1, be) \
2060
+ unwrapOneInvert(X, input, output, 2, bi) \
2061
+ unwrapOne(X, input, output, 3, bo) \
2062
+ unwrapOne(X, input, output, 4, bu) \
2063
+ unwrapOne(X, input, output, 5, ga) \
2064
+ unwrapOne(X, input, output, 6, ge) \
2065
+ unwrapOne(X, input, output, 7, gi) \
2066
+ if (laneCount < 12) { \
2067
+ if (laneCount < 10) { \
2068
+ if (laneCount < 9) { \
2069
+ X##go ^= trailingBits; \
2070
+ } \
2071
+ else { \
2072
+ unwrapOneInvert(X, input, output, 8, go) \
2073
+ X##gu ^= trailingBits; \
2074
+ } \
2075
+ } \
2076
+ else { \
2077
+ unwrapOneInvert(X, input, output, 8, go) \
2078
+ unwrapOne(X, input, output, 9, gu) \
2079
+ if (laneCount < 11) { \
2080
+ X##ka ^= trailingBits; \
2081
+ } \
2082
+ else { \
2083
+ unwrapOne(X, input, output, 10, ka) \
2084
+ X##ke ^= trailingBits; \
2085
+ } \
2086
+ } \
2087
+ } \
2088
+ else { \
2089
+ unwrapOneInvert(X, input, output, 8, go) \
2090
+ unwrapOne(X, input, output, 9, gu) \
2091
+ unwrapOne(X, input, output, 10, ka) \
2092
+ unwrapOne(X, input, output, 11, ke) \
2093
+ if (laneCount < 14) { \
2094
+ if (laneCount < 13) { \
2095
+ X##ki ^= trailingBits; \
2096
+ } \
2097
+ else { \
2098
+ unwrapOneInvert(X, input, output, 12, ki) \
2099
+ X##ko ^= trailingBits; \
2100
+ } \
2101
+ } \
2102
+ else { \
2103
+ unwrapOneInvert(X, input, output, 12, ki) \
2104
+ unwrapOne(X, input, output, 13, ko) \
2105
+ if (laneCount < 15) { \
2106
+ X##ku ^= trailingBits; \
2107
+ } \
2108
+ else { \
2109
+ unwrapOne(X, input, output, 14, ku) \
2110
+ X##ma ^= trailingBits; \
2111
+ } \
2112
+ } \
2113
+ } \
2114
+ } \
2115
+ } \
2116
+ else { \
2117
+ unwrapOne(X, input, output, 0, ba) \
2118
+ unwrapOneInvert(X, input, output, 1, be) \
2119
+ unwrapOneInvert(X, input, output, 2, bi) \
2120
+ unwrapOne(X, input, output, 3, bo) \
2121
+ unwrapOne(X, input, output, 4, bu) \
2122
+ unwrapOne(X, input, output, 5, ga) \
2123
+ unwrapOne(X, input, output, 6, ge) \
2124
+ unwrapOne(X, input, output, 7, gi) \
2125
+ unwrapOneInvert(X, input, output, 8, go) \
2126
+ unwrapOne(X, input, output, 9, gu) \
2127
+ unwrapOne(X, input, output, 10, ka) \
2128
+ unwrapOne(X, input, output, 11, ke) \
2129
+ unwrapOneInvert(X, input, output, 12, ki) \
2130
+ unwrapOne(X, input, output, 13, ko) \
2131
+ unwrapOne(X, input, output, 14, ku) \
2132
+ unwrapOne(X, input, output, 15, ma) \
2133
+ if (laneCount < 24) { \
2134
+ if (laneCount < 20) { \
2135
+ if (laneCount < 18) { \
2136
+ if (laneCount < 17) { \
2137
+ X##me ^= trailingBits; \
2138
+ } \
2139
+ else { \
2140
+ unwrapOne(X, input, output, 16, me) \
2141
+ X##mi ^= trailingBits; \
2142
+ } \
2143
+ } \
2144
+ else { \
2145
+ unwrapOne(X, input, output, 16, me) \
2146
+ unwrapOneInvert(X, input, output, 17, mi) \
2147
+ if (laneCount < 19) { \
2148
+ X##mo ^= trailingBits; \
2149
+ } \
2150
+ else { \
2151
+ unwrapOne(X, input, output, 18, mo) \
2152
+ X##mu ^= trailingBits; \
2153
+ } \
2154
+ } \
2155
+ } \
2156
+ else { \
2157
+ unwrapOne(X, input, output, 16, me) \
2158
+ unwrapOneInvert(X, input, output, 17, mi) \
2159
+ unwrapOne(X, input, output, 18, mo) \
2160
+ unwrapOne(X, input, output, 19, mu) \
2161
+ if (laneCount < 22) { \
2162
+ if (laneCount < 21) { \
2163
+ X##sa ^= trailingBits; \
2164
+ } \
2165
+ else { \
2166
+ unwrapOneInvert(X, input, output, 20, sa) \
2167
+ X##se ^= trailingBits; \
2168
+ } \
2169
+ } \
2170
+ else { \
2171
+ unwrapOneInvert(X, input, output, 20, sa) \
2172
+ unwrapOne(X, input, output, 21, se) \
2173
+ if (laneCount < 23) { \
2174
+ X##si ^= trailingBits; \
2175
+ } \
2176
+ else { \
2177
+ unwrapOne(X, input, output, 22, si) \
2178
+ X##so ^= trailingBits; \
2179
+ } \
2180
+ } \
2181
+ } \
2182
+ } \
2183
+ else { \
2184
+ unwrapOne(X, input, output, 16, me) \
2185
+ unwrapOneInvert(X, input, output, 17, mi) \
2186
+ unwrapOne(X, input, output, 18, mo) \
2187
+ unwrapOne(X, input, output, 19, mu) \
2188
+ unwrapOneInvert(X, input, output, 20, sa) \
2189
+ unwrapOne(X, input, output, 21, se) \
2190
+ unwrapOne(X, input, output, 22, si) \
2191
+ unwrapOne(X, input, output, 23, so) \
2192
+ if (laneCount < 25) { \
2193
+ X##su ^= trailingBits; \
2194
+ } \
2195
+ else { \
2196
+ unwrapOne(X, input, output, 24, su) \
2197
+ } \
2198
+ } \
2199
+ }