@pinkparrot/qsafe-mayo-wasm 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. package/.gitmodules +3 -0
  2. package/.vscode/launch.json +12 -0
  3. package/LICENSE +201 -0
  4. package/bridge/mayo1_bridge.c +26 -0
  5. package/bridge/mayo2_bridge.c +26 -0
  6. package/bridge/randombytes_inject.c +44 -0
  7. package/build_mayo1.ps1 +36 -0
  8. package/build_mayo2.ps1 +36 -0
  9. package/dist/mayo.browser.min.js +216 -0
  10. package/dist/mayo1.js +0 -0
  11. package/dist/mayo2.js +0 -0
  12. package/dist/mayo_api.js +139 -0
  13. package/dist/package.json +1 -0
  14. package/gitignore +2 -0
  15. package/index.mjs +1 -0
  16. package/mayo-c/.astylerc +16 -0
  17. package/mayo-c/.cmake/flags.cmake +45 -0
  18. package/mayo-c/.cmake/sanitizers.cmake +81 -0
  19. package/mayo-c/.cmake/target.cmake +71 -0
  20. package/mayo-c/.github/workflows/ci_clang.yml +61 -0
  21. package/mayo-c/.github/workflows/ci_gcc.yml +60 -0
  22. package/mayo-c/.github/workflows/cmake.yml +160 -0
  23. package/mayo-c/.github/workflows/macos_m1.yml +68 -0
  24. package/mayo-c/CMakeLists.txt +35 -0
  25. package/mayo-c/KAT/PQCsignKAT_24_MAYO_1.req +900 -0
  26. package/mayo-c/KAT/PQCsignKAT_24_MAYO_1.rsp +902 -0
  27. package/mayo-c/KAT/PQCsignKAT_24_MAYO_2.req +900 -0
  28. package/mayo-c/KAT/PQCsignKAT_24_MAYO_2.rsp +902 -0
  29. package/mayo-c/KAT/PQCsignKAT_32_MAYO_3.req +900 -0
  30. package/mayo-c/KAT/PQCsignKAT_32_MAYO_3.rsp +902 -0
  31. package/mayo-c/KAT/PQCsignKAT_40_MAYO_5.req +900 -0
  32. package/mayo-c/KAT/PQCsignKAT_40_MAYO_5.rsp +902 -0
  33. package/mayo-c/LICENSE +202 -0
  34. package/mayo-c/META/MAYO-1_META.yml +52 -0
  35. package/mayo-c/META/MAYO-2_META.yml +52 -0
  36. package/mayo-c/META/MAYO-3_META.yml +52 -0
  37. package/mayo-c/META/MAYO-5_META.yml +52 -0
  38. package/mayo-c/NOTICE +13 -0
  39. package/mayo-c/README.md +183 -0
  40. package/mayo-c/apps/CMakeLists.txt +31 -0
  41. package/mayo-c/apps/PQCgenKAT_sign.c +281 -0
  42. package/mayo-c/apps/example.c +151 -0
  43. package/mayo-c/apps/example_nistapi.c +124 -0
  44. package/mayo-c/include/mayo.h +442 -0
  45. package/mayo-c/include/mem.h +25 -0
  46. package/mayo-c/include/randombytes.h +31 -0
  47. package/mayo-c/scripts/contstants.py +141 -0
  48. package/mayo-c/scripts/find_irred_poly.sage +39 -0
  49. package/mayo-c/src/AVX2/arithmetic_common.h +159 -0
  50. package/mayo-c/src/AVX2/echelon_form.h +91 -0
  51. package/mayo-c/src/AVX2/echelon_form_loop.h +58 -0
  52. package/mayo-c/src/AVX2/shuffle_arithmetic.h +442 -0
  53. package/mayo-c/src/CMakeLists.txt +98 -0
  54. package/mayo-c/src/arithmetic.c +128 -0
  55. package/mayo-c/src/arithmetic.h +124 -0
  56. package/mayo-c/src/common/aes128ctr.c +293 -0
  57. package/mayo-c/src/common/aes_c.c +741 -0
  58. package/mayo-c/src/common/aes_ctr.h +32 -0
  59. package/mayo-c/src/common/aes_neon.c +201 -0
  60. package/mayo-c/src/common/debug_bench_tools.h +69 -0
  61. package/mayo-c/src/common/fips202.c +1093 -0
  62. package/mayo-c/src/common/fips202.h +12 -0
  63. package/mayo-c/src/common/mem.c +19 -0
  64. package/mayo-c/src/common/randombytes_ctrdrbg.c +141 -0
  65. package/mayo-c/src/common/randombytes_system.c +399 -0
  66. package/mayo-c/src/generic/arithmetic_dynamic.h +68 -0
  67. package/mayo-c/src/generic/arithmetic_fixed.h +84 -0
  68. package/mayo-c/src/generic/echelon_form.h +152 -0
  69. package/mayo-c/src/generic/ef_inner_loop.h +56 -0
  70. package/mayo-c/src/generic/generic_arithmetic.h +294 -0
  71. package/mayo-c/src/mayo.c +675 -0
  72. package/mayo-c/src/mayo_1/api.c +46 -0
  73. package/mayo-c/src/mayo_1/api.h +43 -0
  74. package/mayo-c/src/mayo_2/api.c +46 -0
  75. package/mayo-c/src/mayo_2/api.h +43 -0
  76. package/mayo-c/src/mayo_3/api.c +46 -0
  77. package/mayo-c/src/mayo_3/api.h +43 -0
  78. package/mayo-c/src/mayo_5/api.c +46 -0
  79. package/mayo-c/src/mayo_5/api.h +43 -0
  80. package/mayo-c/src/neon/arithmetic_common.h +132 -0
  81. package/mayo-c/src/neon/echelon_form.h +55 -0
  82. package/mayo-c/src/neon/echelon_form_loop.h +58 -0
  83. package/mayo-c/src/neon/shuffle_arithmetic.h +462 -0
  84. package/mayo-c/src/params.c +42 -0
  85. package/mayo-c/src/simple_arithmetic.h +138 -0
  86. package/mayo-c/test/CMakeLists.txt +51 -0
  87. package/mayo-c/test/bench.c +166 -0
  88. package/mayo-c/test/m1cycles.c +155 -0
  89. package/mayo-c/test/m1cycles.h +13 -0
  90. package/mayo-c/test/test_kat.c +271 -0
  91. package/mayo-c/test/test_mayo.c +139 -0
  92. package/mayo-c/test/test_sample_solution.c +75 -0
  93. package/mayo-c/test/test_various.c +680 -0
  94. package/package.json +39 -0
  95. package/publish.bat +22 -0
  96. package/readme.md +80 -0
  97. package/test/test.mjs +42 -0
@@ -0,0 +1,124 @@
1
+
2
+ // SPDX-License-Identifier: Apache-2.0
3
+
4
+ #ifndef ARITHMETIC_H
5
+ #define ARITHMETIC_H
6
+
7
+ #include <stdint.h>
8
+ #include <mayo.h>
9
+ #include <stdint.h>
10
+ #include <stddef.h>
11
+
12
+ #if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
13
+ #ifndef TARGET_BIG_ENDIAN
14
+ #define TARGET_BIG_ENDIAN
15
+ #endif
16
+ #endif
17
+
18
+ #define uint32_t_blocker MAYO_NAMESPACE(uint32_t_blocker)
19
+ extern volatile uint32_t uint32_t_blocker;
20
+ #define uint64_t_blocker MAYO_NAMESPACE(uint64_t_blocker)
21
+ extern volatile uint64_t uint64_t_blocker;
22
+ #define unsigned_char_blocker MAYO_NAMESPACE(unsigned_char_blocker)
23
+ extern volatile unsigned char unsigned_char_blocker;
24
+
25
+ #if !(((!defined(__clang__) && defined(__GNUC__) && __GNUC__ <= 12)) && (defined(__x86_64__) || defined(_M_X64)))
26
+ // a > b -> b - a is negative
27
+ // returns 0xFFFFFFFF if true, 0x00000000 if false
28
+ static inline uint32_t ct_is_greater_than(int a, int b) {
29
+ int32_t diff = b - a;
30
+ return ((uint32_t) (diff >> (8*sizeof(uint32_t)-1)) ^ uint32_t_blocker);
31
+ }
32
+
33
+ // a > b -> b - a is negative
34
+ // returns 0xFFFFFFFF if true, 0x00000000 if false
35
+ static inline uint64_t ct_64_is_greater_than(int a, int b) {
36
+ int64_t diff = ((int64_t) b) - ((int64_t) a);
37
+ return ((uint64_t) (diff >> (8*sizeof(uint64_t)-1)) ^ uint64_t_blocker);
38
+ }
39
+
40
+ // if a == b -> 0x00000000, else 0xFFFFFFFF
41
+ static inline uint32_t ct_compare_32(int a, int b) {
42
+ return ((uint32_t)((-(int32_t)(a ^ b)) >> (8*sizeof(uint32_t)-1)) ^ uint32_t_blocker);
43
+ }
44
+
45
+ // if a == b -> 0x0000000000000000, else 0xFFFFFFFFFFFFFFFF
46
+ static inline uint64_t ct_compare_64(int a, int b) {
47
+ return ((uint64_t)((-(int64_t)(a ^ b)) >> (8*sizeof(uint64_t)-1)) ^ uint64_t_blocker);
48
+ }
49
+
50
+ // if a == b -> 0x00, else 0xFF
51
+ static inline unsigned char ct_compare_8(unsigned char a, unsigned char b) {
52
+ return ((int8_t)((-(int32_t)(a ^ b)) >> (8*sizeof(uint32_t)-1)) ^ unsigned_char_blocker);
53
+ }
54
+ #else
55
+ // a > b -> b - a is negative
56
+ // returns 0xFFFFFFFF if true, 0x00000000 if false
57
+ static inline uint32_t ct_is_greater_than(int a, int b) {
58
+ int32_t diff = b - a;
59
+ return ((uint32_t) (diff >> (8*sizeof(uint32_t)-1)));
60
+ }
61
+
62
+ // a > b -> b - a is negative
63
+ // returns 0xFFFFFFFF if true, 0x00000000 if false
64
+ static inline uint64_t ct_64_is_greater_than(int a, int b) {
65
+ int64_t diff = ((int64_t) b) - ((int64_t) a);
66
+ return ((uint64_t) (diff >> (8*sizeof(uint64_t)-1)));
67
+ }
68
+
69
+ // if a == b -> 0x00000000, else 0xFFFFFFFF
70
+ static inline uint32_t ct_compare_32(int a, int b) {
71
+ return ((uint32_t)((-(int32_t)(a ^ b)) >> (8*sizeof(uint32_t)-1)));
72
+ }
73
+
74
+ // if a == b -> 0x0000000000000000, else 0xFFFFFFFFFFFFFFFF
75
+ static inline uint64_t ct_compare_64(int a, int b) {
76
+ return ((uint64_t)((-(int64_t)(a ^ b)) >> (8*sizeof(uint64_t)-1)));
77
+ }
78
+
79
+ // if a == b -> 0x00, else 0xFF
80
+ static inline unsigned char ct_compare_8(unsigned char a, unsigned char b) {
81
+ return ((int8_t)((-(int32_t)(a ^ b)) >> (8*sizeof(uint32_t)-1)));
82
+ }
83
+ #endif
84
+
85
+ #if defined(MAYO_AVX) || defined(MAYO_NEON)
86
+ #include <shuffle_arithmetic.h>
87
+ #elif defined(MAYO_M4)
88
+ #include <m4_arithmetic.h>
89
+ #else
90
+ #include <generic_arithmetic.h>
91
+ #endif
92
+
93
+ static
94
+ inline void vec_mul_add_u64(const int legs, const uint64_t *in, unsigned char a, uint64_t *acc) {
95
+ uint32_t tab = mul_table(a);
96
+
97
+ uint64_t lsb_ask = 0x1111111111111111ULL;
98
+
99
+ for(int i=0; i < legs; i++){
100
+ acc[i] ^= ( in[i] & lsb_ask) * (tab & 0xff)
101
+ ^ ((in[i] >> 1) & lsb_ask) * ((tab >> 8) & 0xf)
102
+ ^ ((in[i] >> 2) & lsb_ask) * ((tab >> 16) & 0xf)
103
+ ^ ((in[i] >> 3) & lsb_ask) * ((tab >> 24) & 0xf);
104
+ }
105
+ }
106
+
107
+ // Calculate Upper in KeyGen
108
+ #define m_upper MAYO_NAMESPACE(m_upper)
109
+ void m_upper(const mayo_params_t* p, const uint64_t *in, uint64_t *out, int size);
110
+
111
+ // Sample solution in Sign
112
+ #define sample_solution MAYO_NAMESPACE(sample_solution)
113
+ int sample_solution(const mayo_params_t *p, unsigned char *A, const unsigned char *y, const unsigned char *r, unsigned char *x, int k, int o, int m, int A_cols);
114
+
115
+ #if defined(__GNUC__) || defined(__clang__)
116
+ #define BSWAP32(i) __builtin_bswap32((i))
117
+ #define BSWAP64(i) __builtin_bswap64((i))
118
+ #else
119
+ #define BSWAP32(i) ((((i) >> 24) & 0xff) | (((i) >> 8) & 0xff00) | (((i) & 0xff00) << 8) | ((i) << 24))
120
+ #define BSWAP64(i) ((BSWAP32((i) >> 32) & 0xffffffff) | (BSWAP32(i) << 32))
121
+ #endif
122
+
123
+ #endif
124
+
@@ -0,0 +1,293 @@
1
+ // SPDX-License-Identifier: Apache-2.0 and MIT and Public Domain
2
+
3
+ #ifdef ENABLE_AESNI
4
+
5
+ #include <mem.h>
6
+ #include <stdint.h>
7
+ #include <string.h>
8
+ #include <tmmintrin.h>
9
+ #include <wmmintrin.h>
10
+
11
+ // Adapted from liboqs/src/common/aes which in turn takes it from:
12
+ // crypto_core/aes128ncrypt/dolbeau/aesenc-int
13
+ // (https://bench.cr.yp.to/supercop.html)
14
+ static inline void aes128ni_setkey_encrypt(const unsigned char *key,
15
+ __m128i rkeys[11]) {
16
+ __m128i key0 = _mm_loadu_si128((const __m128i *)(key + 0));
17
+ __m128i temp0, temp1, temp4;
18
+ int idx = 0;
19
+
20
+ temp0 = key0;
21
+
22
+ #define BLOCK1(IMM) \
23
+ temp1 = _mm_aeskeygenassist_si128(temp0, IMM); \
24
+ rkeys[idx++] = temp0; \
25
+ temp4 = _mm_slli_si128(temp0, 4); \
26
+ temp0 = _mm_xor_si128(temp0, temp4); \
27
+ temp4 = _mm_slli_si128(temp0, 8); \
28
+ temp0 = _mm_xor_si128(temp0, temp4); \
29
+ temp1 = _mm_shuffle_epi32(temp1, 0xff); \
30
+ temp0 = _mm_xor_si128(temp0, temp1)
31
+
32
+ BLOCK1(0x01);
33
+ BLOCK1(0x02);
34
+ BLOCK1(0x04);
35
+ BLOCK1(0x08);
36
+ BLOCK1(0x10);
37
+ BLOCK1(0x20);
38
+ BLOCK1(0x40);
39
+ BLOCK1(0x80);
40
+ BLOCK1(0x1b);
41
+ BLOCK1(0x36);
42
+ rkeys[idx++] = temp0;
43
+ }
44
+
45
+ void oqs_aes128_load_schedule_ni(const uint8_t *key, void **_schedule) {
46
+ *_schedule = malloc(11 * sizeof(__m128i));
47
+ // assert(*_schedule != NULL);
48
+ __m128i *schedule = (__m128i *)*_schedule;
49
+ aes128ni_setkey_encrypt(key, schedule);
50
+ }
51
+
52
+ void oqs_aes128_free_schedule_ni(void *schedule) {
53
+ if (schedule != NULL) {
54
+ mayo_secure_free(schedule, 11 * sizeof(__m128i));
55
+ }
56
+ }
57
+
58
+ // Single encryption
59
+ static inline void aes128ni_encrypt(const __m128i rkeys[11], __m128i nv,
60
+ unsigned char *out) {
61
+ __m128i temp = _mm_xor_si128(nv, rkeys[0]);
62
+ temp = _mm_aesenc_si128(temp, rkeys[1]);
63
+ temp = _mm_aesenc_si128(temp, rkeys[2]);
64
+ temp = _mm_aesenc_si128(temp, rkeys[3]);
65
+ temp = _mm_aesenc_si128(temp, rkeys[4]);
66
+ temp = _mm_aesenc_si128(temp, rkeys[5]);
67
+ temp = _mm_aesenc_si128(temp, rkeys[6]);
68
+ temp = _mm_aesenc_si128(temp, rkeys[7]);
69
+ temp = _mm_aesenc_si128(temp, rkeys[8]);
70
+ temp = _mm_aesenc_si128(temp, rkeys[9]);
71
+ temp = _mm_aesenclast_si128(temp, rkeys[10]);
72
+ _mm_storeu_si128((__m128i *)(out), temp);
73
+ }
74
+
75
+ // 4x interleaved encryption
76
+ static inline void aes128ni_encrypt_x4(const __m128i rkeys[11], __m128i n0,
77
+ __m128i n1, __m128i n2, __m128i n3,
78
+ unsigned char *out) {
79
+ __m128i temp0 = _mm_xor_si128(n0, rkeys[0]);
80
+ __m128i temp1 = _mm_xor_si128(n1, rkeys[0]);
81
+ __m128i temp2 = _mm_xor_si128(n2, rkeys[0]);
82
+ __m128i temp3 = _mm_xor_si128(n3, rkeys[0]);
83
+
84
+ #define AESNENCX4(IDX) \
85
+ temp0 = _mm_aesenc_si128(temp0, rkeys[IDX]); \
86
+ temp1 = _mm_aesenc_si128(temp1, rkeys[IDX]); \
87
+ temp2 = _mm_aesenc_si128(temp2, rkeys[IDX]); \
88
+ temp3 = _mm_aesenc_si128(temp3, rkeys[IDX])
89
+
90
+ AESNENCX4(1);
91
+ AESNENCX4(2);
92
+ AESNENCX4(3);
93
+ AESNENCX4(4);
94
+ AESNENCX4(5);
95
+ AESNENCX4(6);
96
+ AESNENCX4(7);
97
+ AESNENCX4(8);
98
+ AESNENCX4(9);
99
+
100
+ temp0 = _mm_aesenclast_si128(temp0, rkeys[10]);
101
+ temp1 = _mm_aesenclast_si128(temp1, rkeys[10]);
102
+ temp2 = _mm_aesenclast_si128(temp2, rkeys[10]);
103
+ temp3 = _mm_aesenclast_si128(temp3, rkeys[10]);
104
+
105
+ _mm_storeu_si128((__m128i *)(out + 0), temp0);
106
+ _mm_storeu_si128((__m128i *)(out + 16), temp1);
107
+ _mm_storeu_si128((__m128i *)(out + 32), temp2);
108
+ _mm_storeu_si128((__m128i *)(out + 48), temp3);
109
+ }
110
+
111
+ // Not for general use: IV = 0, nonce = 0
112
+ static void oqs_aes128_ctr_enc_sch_ni(const void *schedule, uint8_t *out,
113
+ size_t out_len) {
114
+ __m128i mask =
115
+ _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 7, 6, 5, 4, 3, 2, 1, 0);
116
+ __m128i block = _mm_set_epi64x(0, 0);
117
+ // block = _mm_xor_si128(block, block); // set to zero
118
+
119
+ while (out_len >= 64) {
120
+ __m128i nv0 = block;
121
+ __m128i nv1 = _mm_shuffle_epi8(
122
+ _mm_add_epi64(_mm_shuffle_epi8(block, mask), _mm_set_epi64x(1, 0)),
123
+ mask);
124
+ __m128i nv2 = _mm_shuffle_epi8(
125
+ _mm_add_epi64(_mm_shuffle_epi8(block, mask), _mm_set_epi64x(2, 0)),
126
+ mask);
127
+ __m128i nv3 = _mm_shuffle_epi8(
128
+ _mm_add_epi64(_mm_shuffle_epi8(block, mask), _mm_set_epi64x(3, 0)),
129
+ mask);
130
+ aes128ni_encrypt_x4(schedule, nv0, nv1, nv2, nv3, out);
131
+ block = _mm_shuffle_epi8(
132
+ _mm_add_epi64(_mm_shuffle_epi8(block, mask), _mm_set_epi64x(4, 0)),
133
+ mask);
134
+ out += 64;
135
+ out_len -= 64;
136
+ }
137
+ while (out_len >= 16) {
138
+ aes128ni_encrypt(schedule, block, out);
139
+ out += 16;
140
+ out_len -= 16;
141
+ block = _mm_shuffle_epi8(
142
+ _mm_add_epi64(_mm_shuffle_epi8(block, mask), _mm_set_epi64x(1, 0)),
143
+ mask);
144
+ }
145
+ if (out_len > 0) {
146
+ uint8_t tmp[16];
147
+ aes128ni_encrypt(schedule, block, tmp);
148
+ memcpy(out, tmp, out_len);
149
+ }
150
+ }
151
+
152
+ int AES_128_CTR_NI(unsigned char *output, size_t outputByteLen,
153
+ const unsigned char *input, size_t inputByteLen) {
154
+ void *schedule = NULL;
155
+ oqs_aes128_load_schedule_ni(input, &schedule);
156
+ oqs_aes128_ctr_enc_sch_ni(schedule, output, outputByteLen);
157
+ oqs_aes128_free_schedule_ni(schedule);
158
+ return (int)outputByteLen;
159
+ }
160
+
161
+ // 4-Round AES...
162
+
163
+ // From crypto_core/aes128ncrypt/dolbeau/aesenc-int
164
+ static inline void aes128r4ni_setkey_encrypt(const unsigned char *key,
165
+ __m128i rkeys[5]) {
166
+ __m128i key0 = _mm_loadu_si128((const __m128i *)(key + 0));
167
+ __m128i temp0, temp1, temp4;
168
+ int idx = 0;
169
+
170
+ temp0 = key0;
171
+
172
+ /* blockshift-based block by Cedric Bourrasset */
173
+ #define BLOCK1(IMM) \
174
+ temp1 = _mm_aeskeygenassist_si128(temp0, IMM); \
175
+ rkeys[idx++] = temp0; \
176
+ temp4 = _mm_slli_si128(temp0, 4); \
177
+ temp0 = _mm_xor_si128(temp0, temp4); \
178
+ temp4 = _mm_slli_si128(temp0, 8); \
179
+ temp0 = _mm_xor_si128(temp0, temp4); \
180
+ temp1 = _mm_shuffle_epi32(temp1, 0xff); \
181
+ temp0 = _mm_xor_si128(temp0, temp1)
182
+
183
+ BLOCK1(0x01);
184
+ BLOCK1(0x02);
185
+ BLOCK1(0x04);
186
+ BLOCK1(0x08);
187
+ rkeys[idx++] = temp0;
188
+ }
189
+
190
+ void oqs_aes128r4_load_schedule_ni(const uint8_t *key, void **_schedule) {
191
+ *_schedule = malloc(5 * sizeof(__m128i));
192
+ // assert(*_schedule != NULL);
193
+ __m128i *schedule = (__m128i *)*_schedule;
194
+ aes128r4ni_setkey_encrypt(key, schedule);
195
+ }
196
+
197
+ void oqs_aes128r4_free_schedule_ni(void *schedule) {
198
+ if (schedule != NULL) {
199
+ mayo_secure_free(schedule, 5 * sizeof(__m128i));
200
+ }
201
+ }
202
+
203
+ // Single encryption
204
+ static inline void aes128r4ni_encrypt(const __m128i rkeys[5], __m128i nv,
205
+ unsigned char *out) {
206
+ __m128i temp = _mm_xor_si128(nv, rkeys[0]);
207
+ temp = _mm_aesenc_si128(temp, rkeys[1]);
208
+ temp = _mm_aesenc_si128(temp, rkeys[2]);
209
+ temp = _mm_aesenc_si128(temp, rkeys[3]);
210
+ temp = _mm_aesenclast_si128(temp, rkeys[4]);
211
+ _mm_storeu_si128((__m128i *)(out), temp);
212
+ }
213
+
214
+ // 4x interleaved encryption
215
+ static inline void aes128r4ni_encrypt_x4(const __m128i rkeys[5], __m128i n0,
216
+ __m128i n1, __m128i n2, __m128i n3,
217
+ unsigned char *out) {
218
+ __m128i temp0 = _mm_xor_si128(n0, rkeys[0]);
219
+ __m128i temp1 = _mm_xor_si128(n1, rkeys[0]);
220
+ __m128i temp2 = _mm_xor_si128(n2, rkeys[0]);
221
+ __m128i temp3 = _mm_xor_si128(n3, rkeys[0]);
222
+
223
+ #define AESNENCX4(IDX) \
224
+ temp0 = _mm_aesenc_si128(temp0, rkeys[IDX]); \
225
+ temp1 = _mm_aesenc_si128(temp1, rkeys[IDX]); \
226
+ temp2 = _mm_aesenc_si128(temp2, rkeys[IDX]); \
227
+ temp3 = _mm_aesenc_si128(temp3, rkeys[IDX])
228
+
229
+ AESNENCX4(1);
230
+ AESNENCX4(2);
231
+ AESNENCX4(3);
232
+
233
+ temp0 = _mm_aesenclast_si128(temp0, rkeys[4]);
234
+ temp1 = _mm_aesenclast_si128(temp1, rkeys[4]);
235
+ temp2 = _mm_aesenclast_si128(temp2, rkeys[4]);
236
+ temp3 = _mm_aesenclast_si128(temp3, rkeys[4]);
237
+
238
+ _mm_storeu_si128((__m128i *)(out + 0), temp0);
239
+ _mm_storeu_si128((__m128i *)(out + 16), temp1);
240
+ _mm_storeu_si128((__m128i *)(out + 32), temp2);
241
+ _mm_storeu_si128((__m128i *)(out + 48), temp3);
242
+ }
243
+
244
+ // Not for general use: IV = 0, nonce = 0
245
+ static void oqs_aes128r4_ctr_enc_sch_ni(const void *schedule, uint8_t *out,
246
+ size_t out_len) {
247
+ __m128i mask =
248
+ _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 7, 6, 5, 4, 3, 2, 1, 0);
249
+ __m128i block = _mm_set_epi64x(0, 0);
250
+
251
+ while (out_len >= 64) {
252
+ __m128i nv0 = block;
253
+ __m128i nv1 = _mm_shuffle_epi8(
254
+ _mm_add_epi64(_mm_shuffle_epi8(block, mask), _mm_set_epi64x(1, 0)),
255
+ mask);
256
+ __m128i nv2 = _mm_shuffle_epi8(
257
+ _mm_add_epi64(_mm_shuffle_epi8(block, mask), _mm_set_epi64x(2, 0)),
258
+ mask);
259
+ __m128i nv3 = _mm_shuffle_epi8(
260
+ _mm_add_epi64(_mm_shuffle_epi8(block, mask), _mm_set_epi64x(3, 0)),
261
+ mask);
262
+ aes128r4ni_encrypt_x4(schedule, nv0, nv1, nv2, nv3, out);
263
+ block = _mm_shuffle_epi8(
264
+ _mm_add_epi64(_mm_shuffle_epi8(block, mask), _mm_set_epi64x(4, 0)),
265
+ mask);
266
+ out += 64;
267
+ out_len -= 64;
268
+ }
269
+ while (out_len >= 16) {
270
+ aes128r4ni_encrypt(schedule, block, out);
271
+ out += 16;
272
+ out_len -= 16;
273
+ block = _mm_shuffle_epi8(
274
+ _mm_add_epi64(_mm_shuffle_epi8(block, mask), _mm_set_epi64x(1, 0)),
275
+ mask);
276
+ }
277
+ if (out_len > 0) {
278
+ uint8_t tmp[16];
279
+ aes128r4ni_encrypt(schedule, block, tmp);
280
+ memcpy(out, tmp, out_len);
281
+ }
282
+ }
283
+
284
+ int AES_128_CTR_4R_NI(unsigned char *output, size_t outputByteLen,
285
+ const unsigned char *input, size_t inputByteLen) {
286
+ void *schedule = NULL;
287
+ oqs_aes128r4_load_schedule_ni(input, &schedule);
288
+ oqs_aes128r4_ctr_enc_sch_ni(schedule, output, outputByteLen);
289
+ oqs_aes128r4_free_schedule_ni(schedule);
290
+ return (int)outputByteLen;
291
+ }
292
+ #endif
293
+