@pinkparrot/qsafe-mayo-wasm 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.gitmodules +3 -0
- package/.vscode/launch.json +12 -0
- package/LICENSE +201 -0
- package/bridge/mayo1_bridge.c +26 -0
- package/bridge/mayo2_bridge.c +26 -0
- package/bridge/randombytes_inject.c +44 -0
- package/build_mayo1.ps1 +36 -0
- package/build_mayo2.ps1 +36 -0
- package/dist/mayo.browser.min.js +216 -0
- package/dist/mayo1.js +0 -0
- package/dist/mayo2.js +0 -0
- package/dist/mayo_api.js +139 -0
- package/dist/package.json +1 -0
- package/gitignore +2 -0
- package/index.mjs +1 -0
- package/mayo-c/.astylerc +16 -0
- package/mayo-c/.cmake/flags.cmake +45 -0
- package/mayo-c/.cmake/sanitizers.cmake +81 -0
- package/mayo-c/.cmake/target.cmake +71 -0
- package/mayo-c/.github/workflows/ci_clang.yml +61 -0
- package/mayo-c/.github/workflows/ci_gcc.yml +60 -0
- package/mayo-c/.github/workflows/cmake.yml +160 -0
- package/mayo-c/.github/workflows/macos_m1.yml +68 -0
- package/mayo-c/CMakeLists.txt +35 -0
- package/mayo-c/KAT/PQCsignKAT_24_MAYO_1.req +900 -0
- package/mayo-c/KAT/PQCsignKAT_24_MAYO_1.rsp +902 -0
- package/mayo-c/KAT/PQCsignKAT_24_MAYO_2.req +900 -0
- package/mayo-c/KAT/PQCsignKAT_24_MAYO_2.rsp +902 -0
- package/mayo-c/KAT/PQCsignKAT_32_MAYO_3.req +900 -0
- package/mayo-c/KAT/PQCsignKAT_32_MAYO_3.rsp +902 -0
- package/mayo-c/KAT/PQCsignKAT_40_MAYO_5.req +900 -0
- package/mayo-c/KAT/PQCsignKAT_40_MAYO_5.rsp +902 -0
- package/mayo-c/LICENSE +202 -0
- package/mayo-c/META/MAYO-1_META.yml +52 -0
- package/mayo-c/META/MAYO-2_META.yml +52 -0
- package/mayo-c/META/MAYO-3_META.yml +52 -0
- package/mayo-c/META/MAYO-5_META.yml +52 -0
- package/mayo-c/NOTICE +13 -0
- package/mayo-c/README.md +183 -0
- package/mayo-c/apps/CMakeLists.txt +31 -0
- package/mayo-c/apps/PQCgenKAT_sign.c +281 -0
- package/mayo-c/apps/example.c +151 -0
- package/mayo-c/apps/example_nistapi.c +124 -0
- package/mayo-c/include/mayo.h +442 -0
- package/mayo-c/include/mem.h +25 -0
- package/mayo-c/include/randombytes.h +31 -0
- package/mayo-c/scripts/contstants.py +141 -0
- package/mayo-c/scripts/find_irred_poly.sage +39 -0
- package/mayo-c/src/AVX2/arithmetic_common.h +159 -0
- package/mayo-c/src/AVX2/echelon_form.h +91 -0
- package/mayo-c/src/AVX2/echelon_form_loop.h +58 -0
- package/mayo-c/src/AVX2/shuffle_arithmetic.h +442 -0
- package/mayo-c/src/CMakeLists.txt +98 -0
- package/mayo-c/src/arithmetic.c +128 -0
- package/mayo-c/src/arithmetic.h +124 -0
- package/mayo-c/src/common/aes128ctr.c +293 -0
- package/mayo-c/src/common/aes_c.c +741 -0
- package/mayo-c/src/common/aes_ctr.h +32 -0
- package/mayo-c/src/common/aes_neon.c +201 -0
- package/mayo-c/src/common/debug_bench_tools.h +69 -0
- package/mayo-c/src/common/fips202.c +1093 -0
- package/mayo-c/src/common/fips202.h +12 -0
- package/mayo-c/src/common/mem.c +19 -0
- package/mayo-c/src/common/randombytes_ctrdrbg.c +141 -0
- package/mayo-c/src/common/randombytes_system.c +399 -0
- package/mayo-c/src/generic/arithmetic_dynamic.h +68 -0
- package/mayo-c/src/generic/arithmetic_fixed.h +84 -0
- package/mayo-c/src/generic/echelon_form.h +152 -0
- package/mayo-c/src/generic/ef_inner_loop.h +56 -0
- package/mayo-c/src/generic/generic_arithmetic.h +294 -0
- package/mayo-c/src/mayo.c +675 -0
- package/mayo-c/src/mayo_1/api.c +46 -0
- package/mayo-c/src/mayo_1/api.h +43 -0
- package/mayo-c/src/mayo_2/api.c +46 -0
- package/mayo-c/src/mayo_2/api.h +43 -0
- package/mayo-c/src/mayo_3/api.c +46 -0
- package/mayo-c/src/mayo_3/api.h +43 -0
- package/mayo-c/src/mayo_5/api.c +46 -0
- package/mayo-c/src/mayo_5/api.h +43 -0
- package/mayo-c/src/neon/arithmetic_common.h +132 -0
- package/mayo-c/src/neon/echelon_form.h +55 -0
- package/mayo-c/src/neon/echelon_form_loop.h +58 -0
- package/mayo-c/src/neon/shuffle_arithmetic.h +462 -0
- package/mayo-c/src/params.c +42 -0
- package/mayo-c/src/simple_arithmetic.h +138 -0
- package/mayo-c/test/CMakeLists.txt +51 -0
- package/mayo-c/test/bench.c +166 -0
- package/mayo-c/test/m1cycles.c +155 -0
- package/mayo-c/test/m1cycles.h +13 -0
- package/mayo-c/test/test_kat.c +271 -0
- package/mayo-c/test/test_mayo.c +139 -0
- package/mayo-c/test/test_sample_solution.c +75 -0
- package/mayo-c/test/test_various.c +680 -0
- package/package.json +39 -0
- package/publish.bat +22 -0
- package/readme.md +80 -0
- package/test/test.mjs +42 -0
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
#ifndef ARITHMETIC_H
|
|
5
|
+
#define ARITHMETIC_H
|
|
6
|
+
|
|
7
|
+
#include <stdint.h>
|
|
8
|
+
#include <mayo.h>
|
|
9
|
+
#include <stdint.h>
|
|
10
|
+
#include <stddef.h>
|
|
11
|
+
|
|
12
|
+
#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
|
|
13
|
+
#ifndef TARGET_BIG_ENDIAN
|
|
14
|
+
#define TARGET_BIG_ENDIAN
|
|
15
|
+
#endif
|
|
16
|
+
#endif
|
|
17
|
+
|
|
18
|
+
#define uint32_t_blocker MAYO_NAMESPACE(uint32_t_blocker)
|
|
19
|
+
extern volatile uint32_t uint32_t_blocker;
|
|
20
|
+
#define uint64_t_blocker MAYO_NAMESPACE(uint64_t_blocker)
|
|
21
|
+
extern volatile uint64_t uint64_t_blocker;
|
|
22
|
+
#define unsigned_char_blocker MAYO_NAMESPACE(unsigned_char_blocker)
|
|
23
|
+
extern volatile unsigned char unsigned_char_blocker;
|
|
24
|
+
|
|
25
|
+
#if !(((!defined(__clang__) && defined(__GNUC__) && __GNUC__ <= 12)) && (defined(__x86_64__) || defined(_M_X64)))
|
|
26
|
+
// a > b -> b - a is negative
|
|
27
|
+
// returns 0xFFFFFFFF if true, 0x00000000 if false
|
|
28
|
+
static inline uint32_t ct_is_greater_than(int a, int b) {
|
|
29
|
+
int32_t diff = b - a;
|
|
30
|
+
return ((uint32_t) (diff >> (8*sizeof(uint32_t)-1)) ^ uint32_t_blocker);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
// a > b -> b - a is negative
|
|
34
|
+
// returns 0xFFFFFFFF if true, 0x00000000 if false
|
|
35
|
+
static inline uint64_t ct_64_is_greater_than(int a, int b) {
|
|
36
|
+
int64_t diff = ((int64_t) b) - ((int64_t) a);
|
|
37
|
+
return ((uint64_t) (diff >> (8*sizeof(uint64_t)-1)) ^ uint64_t_blocker);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// if a == b -> 0x00000000, else 0xFFFFFFFF
|
|
41
|
+
static inline uint32_t ct_compare_32(int a, int b) {
|
|
42
|
+
return ((uint32_t)((-(int32_t)(a ^ b)) >> (8*sizeof(uint32_t)-1)) ^ uint32_t_blocker);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// if a == b -> 0x0000000000000000, else 0xFFFFFFFFFFFFFFFF
|
|
46
|
+
static inline uint64_t ct_compare_64(int a, int b) {
|
|
47
|
+
return ((uint64_t)((-(int64_t)(a ^ b)) >> (8*sizeof(uint64_t)-1)) ^ uint64_t_blocker);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// if a == b -> 0x00, else 0xFF
|
|
51
|
+
static inline unsigned char ct_compare_8(unsigned char a, unsigned char b) {
|
|
52
|
+
return ((int8_t)((-(int32_t)(a ^ b)) >> (8*sizeof(uint32_t)-1)) ^ unsigned_char_blocker);
|
|
53
|
+
}
|
|
54
|
+
#else
|
|
55
|
+
// a > b -> b - a is negative
|
|
56
|
+
// returns 0xFFFFFFFF if true, 0x00000000 if false
|
|
57
|
+
static inline uint32_t ct_is_greater_than(int a, int b) {
|
|
58
|
+
int32_t diff = b - a;
|
|
59
|
+
return ((uint32_t) (diff >> (8*sizeof(uint32_t)-1)));
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// a > b -> b - a is negative
|
|
63
|
+
// returns 0xFFFFFFFF if true, 0x00000000 if false
|
|
64
|
+
static inline uint64_t ct_64_is_greater_than(int a, int b) {
|
|
65
|
+
int64_t diff = ((int64_t) b) - ((int64_t) a);
|
|
66
|
+
return ((uint64_t) (diff >> (8*sizeof(uint64_t)-1)));
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// if a == b -> 0x00000000, else 0xFFFFFFFF
|
|
70
|
+
static inline uint32_t ct_compare_32(int a, int b) {
|
|
71
|
+
return ((uint32_t)((-(int32_t)(a ^ b)) >> (8*sizeof(uint32_t)-1)));
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// if a == b -> 0x0000000000000000, else 0xFFFFFFFFFFFFFFFF
|
|
75
|
+
static inline uint64_t ct_compare_64(int a, int b) {
|
|
76
|
+
return ((uint64_t)((-(int64_t)(a ^ b)) >> (8*sizeof(uint64_t)-1)));
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// if a == b -> 0x00, else 0xFF
|
|
80
|
+
static inline unsigned char ct_compare_8(unsigned char a, unsigned char b) {
|
|
81
|
+
return ((int8_t)((-(int32_t)(a ^ b)) >> (8*sizeof(uint32_t)-1)));
|
|
82
|
+
}
|
|
83
|
+
#endif
|
|
84
|
+
|
|
85
|
+
#if defined(MAYO_AVX) || defined(MAYO_NEON)
|
|
86
|
+
#include <shuffle_arithmetic.h>
|
|
87
|
+
#elif defined(MAYO_M4)
|
|
88
|
+
#include <m4_arithmetic.h>
|
|
89
|
+
#else
|
|
90
|
+
#include <generic_arithmetic.h>
|
|
91
|
+
#endif
|
|
92
|
+
|
|
93
|
+
static
|
|
94
|
+
inline void vec_mul_add_u64(const int legs, const uint64_t *in, unsigned char a, uint64_t *acc) {
|
|
95
|
+
uint32_t tab = mul_table(a);
|
|
96
|
+
|
|
97
|
+
uint64_t lsb_ask = 0x1111111111111111ULL;
|
|
98
|
+
|
|
99
|
+
for(int i=0; i < legs; i++){
|
|
100
|
+
acc[i] ^= ( in[i] & lsb_ask) * (tab & 0xff)
|
|
101
|
+
^ ((in[i] >> 1) & lsb_ask) * ((tab >> 8) & 0xf)
|
|
102
|
+
^ ((in[i] >> 2) & lsb_ask) * ((tab >> 16) & 0xf)
|
|
103
|
+
^ ((in[i] >> 3) & lsb_ask) * ((tab >> 24) & 0xf);
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// Calculate Upper in KeyGen
|
|
108
|
+
#define m_upper MAYO_NAMESPACE(m_upper)
|
|
109
|
+
void m_upper(const mayo_params_t* p, const uint64_t *in, uint64_t *out, int size);
|
|
110
|
+
|
|
111
|
+
// Sample solution in Sign
|
|
112
|
+
#define sample_solution MAYO_NAMESPACE(sample_solution)
|
|
113
|
+
int sample_solution(const mayo_params_t *p, unsigned char *A, const unsigned char *y, const unsigned char *r, unsigned char *x, int k, int o, int m, int A_cols);
|
|
114
|
+
|
|
115
|
+
#if defined(__GNUC__) || defined(__clang__)
|
|
116
|
+
#define BSWAP32(i) __builtin_bswap32((i))
|
|
117
|
+
#define BSWAP64(i) __builtin_bswap64((i))
|
|
118
|
+
#else
|
|
119
|
+
#define BSWAP32(i) ((((i) >> 24) & 0xff) | (((i) >> 8) & 0xff00) | (((i) & 0xff00) << 8) | ((i) << 24))
|
|
120
|
+
#define BSWAP64(i) ((BSWAP32((i) >> 32) & 0xffffffff) | (BSWAP32(i) << 32))
|
|
121
|
+
#endif
|
|
122
|
+
|
|
123
|
+
#endif
|
|
124
|
+
|
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
// SPDX-License-Identifier: Apache-2.0 and MIT and Public Domain
|
|
2
|
+
|
|
3
|
+
#ifdef ENABLE_AESNI
|
|
4
|
+
|
|
5
|
+
#include <mem.h>
|
|
6
|
+
#include <stdint.h>
|
|
7
|
+
#include <string.h>
|
|
8
|
+
#include <tmmintrin.h>
|
|
9
|
+
#include <wmmintrin.h>
|
|
10
|
+
|
|
11
|
+
// Adapted from liboqs/src/common/aes which in turn takes it from:
|
|
12
|
+
// crypto_core/aes128ncrypt/dolbeau/aesenc-int
|
|
13
|
+
// (https://bench.cr.yp.to/supercop.html)
|
|
14
|
+
static inline void aes128ni_setkey_encrypt(const unsigned char *key,
|
|
15
|
+
__m128i rkeys[11]) {
|
|
16
|
+
__m128i key0 = _mm_loadu_si128((const __m128i *)(key + 0));
|
|
17
|
+
__m128i temp0, temp1, temp4;
|
|
18
|
+
int idx = 0;
|
|
19
|
+
|
|
20
|
+
temp0 = key0;
|
|
21
|
+
|
|
22
|
+
#define BLOCK1(IMM) \
|
|
23
|
+
temp1 = _mm_aeskeygenassist_si128(temp0, IMM); \
|
|
24
|
+
rkeys[idx++] = temp0; \
|
|
25
|
+
temp4 = _mm_slli_si128(temp0, 4); \
|
|
26
|
+
temp0 = _mm_xor_si128(temp0, temp4); \
|
|
27
|
+
temp4 = _mm_slli_si128(temp0, 8); \
|
|
28
|
+
temp0 = _mm_xor_si128(temp0, temp4); \
|
|
29
|
+
temp1 = _mm_shuffle_epi32(temp1, 0xff); \
|
|
30
|
+
temp0 = _mm_xor_si128(temp0, temp1)
|
|
31
|
+
|
|
32
|
+
BLOCK1(0x01);
|
|
33
|
+
BLOCK1(0x02);
|
|
34
|
+
BLOCK1(0x04);
|
|
35
|
+
BLOCK1(0x08);
|
|
36
|
+
BLOCK1(0x10);
|
|
37
|
+
BLOCK1(0x20);
|
|
38
|
+
BLOCK1(0x40);
|
|
39
|
+
BLOCK1(0x80);
|
|
40
|
+
BLOCK1(0x1b);
|
|
41
|
+
BLOCK1(0x36);
|
|
42
|
+
rkeys[idx++] = temp0;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
void oqs_aes128_load_schedule_ni(const uint8_t *key, void **_schedule) {
|
|
46
|
+
*_schedule = malloc(11 * sizeof(__m128i));
|
|
47
|
+
// assert(*_schedule != NULL);
|
|
48
|
+
__m128i *schedule = (__m128i *)*_schedule;
|
|
49
|
+
aes128ni_setkey_encrypt(key, schedule);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
void oqs_aes128_free_schedule_ni(void *schedule) {
|
|
53
|
+
if (schedule != NULL) {
|
|
54
|
+
mayo_secure_free(schedule, 11 * sizeof(__m128i));
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// Single encryption
|
|
59
|
+
static inline void aes128ni_encrypt(const __m128i rkeys[11], __m128i nv,
|
|
60
|
+
unsigned char *out) {
|
|
61
|
+
__m128i temp = _mm_xor_si128(nv, rkeys[0]);
|
|
62
|
+
temp = _mm_aesenc_si128(temp, rkeys[1]);
|
|
63
|
+
temp = _mm_aesenc_si128(temp, rkeys[2]);
|
|
64
|
+
temp = _mm_aesenc_si128(temp, rkeys[3]);
|
|
65
|
+
temp = _mm_aesenc_si128(temp, rkeys[4]);
|
|
66
|
+
temp = _mm_aesenc_si128(temp, rkeys[5]);
|
|
67
|
+
temp = _mm_aesenc_si128(temp, rkeys[6]);
|
|
68
|
+
temp = _mm_aesenc_si128(temp, rkeys[7]);
|
|
69
|
+
temp = _mm_aesenc_si128(temp, rkeys[8]);
|
|
70
|
+
temp = _mm_aesenc_si128(temp, rkeys[9]);
|
|
71
|
+
temp = _mm_aesenclast_si128(temp, rkeys[10]);
|
|
72
|
+
_mm_storeu_si128((__m128i *)(out), temp);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// 4x interleaved encryption
|
|
76
|
+
static inline void aes128ni_encrypt_x4(const __m128i rkeys[11], __m128i n0,
|
|
77
|
+
__m128i n1, __m128i n2, __m128i n3,
|
|
78
|
+
unsigned char *out) {
|
|
79
|
+
__m128i temp0 = _mm_xor_si128(n0, rkeys[0]);
|
|
80
|
+
__m128i temp1 = _mm_xor_si128(n1, rkeys[0]);
|
|
81
|
+
__m128i temp2 = _mm_xor_si128(n2, rkeys[0]);
|
|
82
|
+
__m128i temp3 = _mm_xor_si128(n3, rkeys[0]);
|
|
83
|
+
|
|
84
|
+
#define AESNENCX4(IDX) \
|
|
85
|
+
temp0 = _mm_aesenc_si128(temp0, rkeys[IDX]); \
|
|
86
|
+
temp1 = _mm_aesenc_si128(temp1, rkeys[IDX]); \
|
|
87
|
+
temp2 = _mm_aesenc_si128(temp2, rkeys[IDX]); \
|
|
88
|
+
temp3 = _mm_aesenc_si128(temp3, rkeys[IDX])
|
|
89
|
+
|
|
90
|
+
AESNENCX4(1);
|
|
91
|
+
AESNENCX4(2);
|
|
92
|
+
AESNENCX4(3);
|
|
93
|
+
AESNENCX4(4);
|
|
94
|
+
AESNENCX4(5);
|
|
95
|
+
AESNENCX4(6);
|
|
96
|
+
AESNENCX4(7);
|
|
97
|
+
AESNENCX4(8);
|
|
98
|
+
AESNENCX4(9);
|
|
99
|
+
|
|
100
|
+
temp0 = _mm_aesenclast_si128(temp0, rkeys[10]);
|
|
101
|
+
temp1 = _mm_aesenclast_si128(temp1, rkeys[10]);
|
|
102
|
+
temp2 = _mm_aesenclast_si128(temp2, rkeys[10]);
|
|
103
|
+
temp3 = _mm_aesenclast_si128(temp3, rkeys[10]);
|
|
104
|
+
|
|
105
|
+
_mm_storeu_si128((__m128i *)(out + 0), temp0);
|
|
106
|
+
_mm_storeu_si128((__m128i *)(out + 16), temp1);
|
|
107
|
+
_mm_storeu_si128((__m128i *)(out + 32), temp2);
|
|
108
|
+
_mm_storeu_si128((__m128i *)(out + 48), temp3);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// Not for general use: IV = 0, nonce = 0
|
|
112
|
+
static void oqs_aes128_ctr_enc_sch_ni(const void *schedule, uint8_t *out,
|
|
113
|
+
size_t out_len) {
|
|
114
|
+
__m128i mask =
|
|
115
|
+
_mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 7, 6, 5, 4, 3, 2, 1, 0);
|
|
116
|
+
__m128i block = _mm_set_epi64x(0, 0);
|
|
117
|
+
// block = _mm_xor_si128(block, block); // set to zero
|
|
118
|
+
|
|
119
|
+
while (out_len >= 64) {
|
|
120
|
+
__m128i nv0 = block;
|
|
121
|
+
__m128i nv1 = _mm_shuffle_epi8(
|
|
122
|
+
_mm_add_epi64(_mm_shuffle_epi8(block, mask), _mm_set_epi64x(1, 0)),
|
|
123
|
+
mask);
|
|
124
|
+
__m128i nv2 = _mm_shuffle_epi8(
|
|
125
|
+
_mm_add_epi64(_mm_shuffle_epi8(block, mask), _mm_set_epi64x(2, 0)),
|
|
126
|
+
mask);
|
|
127
|
+
__m128i nv3 = _mm_shuffle_epi8(
|
|
128
|
+
_mm_add_epi64(_mm_shuffle_epi8(block, mask), _mm_set_epi64x(3, 0)),
|
|
129
|
+
mask);
|
|
130
|
+
aes128ni_encrypt_x4(schedule, nv0, nv1, nv2, nv3, out);
|
|
131
|
+
block = _mm_shuffle_epi8(
|
|
132
|
+
_mm_add_epi64(_mm_shuffle_epi8(block, mask), _mm_set_epi64x(4, 0)),
|
|
133
|
+
mask);
|
|
134
|
+
out += 64;
|
|
135
|
+
out_len -= 64;
|
|
136
|
+
}
|
|
137
|
+
while (out_len >= 16) {
|
|
138
|
+
aes128ni_encrypt(schedule, block, out);
|
|
139
|
+
out += 16;
|
|
140
|
+
out_len -= 16;
|
|
141
|
+
block = _mm_shuffle_epi8(
|
|
142
|
+
_mm_add_epi64(_mm_shuffle_epi8(block, mask), _mm_set_epi64x(1, 0)),
|
|
143
|
+
mask);
|
|
144
|
+
}
|
|
145
|
+
if (out_len > 0) {
|
|
146
|
+
uint8_t tmp[16];
|
|
147
|
+
aes128ni_encrypt(schedule, block, tmp);
|
|
148
|
+
memcpy(out, tmp, out_len);
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
int AES_128_CTR_NI(unsigned char *output, size_t outputByteLen,
|
|
153
|
+
const unsigned char *input, size_t inputByteLen) {
|
|
154
|
+
void *schedule = NULL;
|
|
155
|
+
oqs_aes128_load_schedule_ni(input, &schedule);
|
|
156
|
+
oqs_aes128_ctr_enc_sch_ni(schedule, output, outputByteLen);
|
|
157
|
+
oqs_aes128_free_schedule_ni(schedule);
|
|
158
|
+
return (int)outputByteLen;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// 4-Round AES...
|
|
162
|
+
|
|
163
|
+
// From crypto_core/aes128ncrypt/dolbeau/aesenc-int
|
|
164
|
+
static inline void aes128r4ni_setkey_encrypt(const unsigned char *key,
|
|
165
|
+
__m128i rkeys[5]) {
|
|
166
|
+
__m128i key0 = _mm_loadu_si128((const __m128i *)(key + 0));
|
|
167
|
+
__m128i temp0, temp1, temp4;
|
|
168
|
+
int idx = 0;
|
|
169
|
+
|
|
170
|
+
temp0 = key0;
|
|
171
|
+
|
|
172
|
+
/* blockshift-based block by Cedric Bourrasset */
|
|
173
|
+
#define BLOCK1(IMM) \
|
|
174
|
+
temp1 = _mm_aeskeygenassist_si128(temp0, IMM); \
|
|
175
|
+
rkeys[idx++] = temp0; \
|
|
176
|
+
temp4 = _mm_slli_si128(temp0, 4); \
|
|
177
|
+
temp0 = _mm_xor_si128(temp0, temp4); \
|
|
178
|
+
temp4 = _mm_slli_si128(temp0, 8); \
|
|
179
|
+
temp0 = _mm_xor_si128(temp0, temp4); \
|
|
180
|
+
temp1 = _mm_shuffle_epi32(temp1, 0xff); \
|
|
181
|
+
temp0 = _mm_xor_si128(temp0, temp1)
|
|
182
|
+
|
|
183
|
+
BLOCK1(0x01);
|
|
184
|
+
BLOCK1(0x02);
|
|
185
|
+
BLOCK1(0x04);
|
|
186
|
+
BLOCK1(0x08);
|
|
187
|
+
rkeys[idx++] = temp0;
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
void oqs_aes128r4_load_schedule_ni(const uint8_t *key, void **_schedule) {
|
|
191
|
+
*_schedule = malloc(5 * sizeof(__m128i));
|
|
192
|
+
// assert(*_schedule != NULL);
|
|
193
|
+
__m128i *schedule = (__m128i *)*_schedule;
|
|
194
|
+
aes128r4ni_setkey_encrypt(key, schedule);
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
void oqs_aes128r4_free_schedule_ni(void *schedule) {
|
|
198
|
+
if (schedule != NULL) {
|
|
199
|
+
mayo_secure_free(schedule, 5 * sizeof(__m128i));
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
// Single encryption
|
|
204
|
+
static inline void aes128r4ni_encrypt(const __m128i rkeys[5], __m128i nv,
|
|
205
|
+
unsigned char *out) {
|
|
206
|
+
__m128i temp = _mm_xor_si128(nv, rkeys[0]);
|
|
207
|
+
temp = _mm_aesenc_si128(temp, rkeys[1]);
|
|
208
|
+
temp = _mm_aesenc_si128(temp, rkeys[2]);
|
|
209
|
+
temp = _mm_aesenc_si128(temp, rkeys[3]);
|
|
210
|
+
temp = _mm_aesenclast_si128(temp, rkeys[4]);
|
|
211
|
+
_mm_storeu_si128((__m128i *)(out), temp);
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
// 4x interleaved encryption
|
|
215
|
+
static inline void aes128r4ni_encrypt_x4(const __m128i rkeys[5], __m128i n0,
|
|
216
|
+
__m128i n1, __m128i n2, __m128i n3,
|
|
217
|
+
unsigned char *out) {
|
|
218
|
+
__m128i temp0 = _mm_xor_si128(n0, rkeys[0]);
|
|
219
|
+
__m128i temp1 = _mm_xor_si128(n1, rkeys[0]);
|
|
220
|
+
__m128i temp2 = _mm_xor_si128(n2, rkeys[0]);
|
|
221
|
+
__m128i temp3 = _mm_xor_si128(n3, rkeys[0]);
|
|
222
|
+
|
|
223
|
+
#define AESNENCX4(IDX) \
|
|
224
|
+
temp0 = _mm_aesenc_si128(temp0, rkeys[IDX]); \
|
|
225
|
+
temp1 = _mm_aesenc_si128(temp1, rkeys[IDX]); \
|
|
226
|
+
temp2 = _mm_aesenc_si128(temp2, rkeys[IDX]); \
|
|
227
|
+
temp3 = _mm_aesenc_si128(temp3, rkeys[IDX])
|
|
228
|
+
|
|
229
|
+
AESNENCX4(1);
|
|
230
|
+
AESNENCX4(2);
|
|
231
|
+
AESNENCX4(3);
|
|
232
|
+
|
|
233
|
+
temp0 = _mm_aesenclast_si128(temp0, rkeys[4]);
|
|
234
|
+
temp1 = _mm_aesenclast_si128(temp1, rkeys[4]);
|
|
235
|
+
temp2 = _mm_aesenclast_si128(temp2, rkeys[4]);
|
|
236
|
+
temp3 = _mm_aesenclast_si128(temp3, rkeys[4]);
|
|
237
|
+
|
|
238
|
+
_mm_storeu_si128((__m128i *)(out + 0), temp0);
|
|
239
|
+
_mm_storeu_si128((__m128i *)(out + 16), temp1);
|
|
240
|
+
_mm_storeu_si128((__m128i *)(out + 32), temp2);
|
|
241
|
+
_mm_storeu_si128((__m128i *)(out + 48), temp3);
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
// Not for general use: IV = 0, nonce = 0
|
|
245
|
+
static void oqs_aes128r4_ctr_enc_sch_ni(const void *schedule, uint8_t *out,
|
|
246
|
+
size_t out_len) {
|
|
247
|
+
__m128i mask =
|
|
248
|
+
_mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 7, 6, 5, 4, 3, 2, 1, 0);
|
|
249
|
+
__m128i block = _mm_set_epi64x(0, 0);
|
|
250
|
+
|
|
251
|
+
while (out_len >= 64) {
|
|
252
|
+
__m128i nv0 = block;
|
|
253
|
+
__m128i nv1 = _mm_shuffle_epi8(
|
|
254
|
+
_mm_add_epi64(_mm_shuffle_epi8(block, mask), _mm_set_epi64x(1, 0)),
|
|
255
|
+
mask);
|
|
256
|
+
__m128i nv2 = _mm_shuffle_epi8(
|
|
257
|
+
_mm_add_epi64(_mm_shuffle_epi8(block, mask), _mm_set_epi64x(2, 0)),
|
|
258
|
+
mask);
|
|
259
|
+
__m128i nv3 = _mm_shuffle_epi8(
|
|
260
|
+
_mm_add_epi64(_mm_shuffle_epi8(block, mask), _mm_set_epi64x(3, 0)),
|
|
261
|
+
mask);
|
|
262
|
+
aes128r4ni_encrypt_x4(schedule, nv0, nv1, nv2, nv3, out);
|
|
263
|
+
block = _mm_shuffle_epi8(
|
|
264
|
+
_mm_add_epi64(_mm_shuffle_epi8(block, mask), _mm_set_epi64x(4, 0)),
|
|
265
|
+
mask);
|
|
266
|
+
out += 64;
|
|
267
|
+
out_len -= 64;
|
|
268
|
+
}
|
|
269
|
+
while (out_len >= 16) {
|
|
270
|
+
aes128r4ni_encrypt(schedule, block, out);
|
|
271
|
+
out += 16;
|
|
272
|
+
out_len -= 16;
|
|
273
|
+
block = _mm_shuffle_epi8(
|
|
274
|
+
_mm_add_epi64(_mm_shuffle_epi8(block, mask), _mm_set_epi64x(1, 0)),
|
|
275
|
+
mask);
|
|
276
|
+
}
|
|
277
|
+
if (out_len > 0) {
|
|
278
|
+
uint8_t tmp[16];
|
|
279
|
+
aes128r4ni_encrypt(schedule, block, tmp);
|
|
280
|
+
memcpy(out, tmp, out_len);
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
int AES_128_CTR_4R_NI(unsigned char *output, size_t outputByteLen,
|
|
285
|
+
const unsigned char *input, size_t inputByteLen) {
|
|
286
|
+
void *schedule = NULL;
|
|
287
|
+
oqs_aes128r4_load_schedule_ni(input, &schedule);
|
|
288
|
+
oqs_aes128r4_ctr_enc_sch_ni(schedule, output, outputByteLen);
|
|
289
|
+
oqs_aes128r4_free_schedule_ni(schedule);
|
|
290
|
+
return (int)outputByteLen;
|
|
291
|
+
}
|
|
292
|
+
#endif
|
|
293
|
+
|