@pinkparrot/qsafe-mayo-wasm 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. package/.gitmodules +3 -0
  2. package/.vscode/launch.json +12 -0
  3. package/LICENSE +201 -0
  4. package/bridge/mayo1_bridge.c +26 -0
  5. package/bridge/mayo2_bridge.c +26 -0
  6. package/bridge/randombytes_inject.c +44 -0
  7. package/build_mayo1.ps1 +36 -0
  8. package/build_mayo2.ps1 +36 -0
  9. package/dist/mayo.browser.min.js +216 -0
  10. package/dist/mayo1.js +0 -0
  11. package/dist/mayo2.js +0 -0
  12. package/dist/mayo_api.js +139 -0
  13. package/dist/package.json +1 -0
  14. package/gitignore +2 -0
  15. package/index.mjs +1 -0
  16. package/mayo-c/.astylerc +16 -0
  17. package/mayo-c/.cmake/flags.cmake +45 -0
  18. package/mayo-c/.cmake/sanitizers.cmake +81 -0
  19. package/mayo-c/.cmake/target.cmake +71 -0
  20. package/mayo-c/.github/workflows/ci_clang.yml +61 -0
  21. package/mayo-c/.github/workflows/ci_gcc.yml +60 -0
  22. package/mayo-c/.github/workflows/cmake.yml +160 -0
  23. package/mayo-c/.github/workflows/macos_m1.yml +68 -0
  24. package/mayo-c/CMakeLists.txt +35 -0
  25. package/mayo-c/KAT/PQCsignKAT_24_MAYO_1.req +900 -0
  26. package/mayo-c/KAT/PQCsignKAT_24_MAYO_1.rsp +902 -0
  27. package/mayo-c/KAT/PQCsignKAT_24_MAYO_2.req +900 -0
  28. package/mayo-c/KAT/PQCsignKAT_24_MAYO_2.rsp +902 -0
  29. package/mayo-c/KAT/PQCsignKAT_32_MAYO_3.req +900 -0
  30. package/mayo-c/KAT/PQCsignKAT_32_MAYO_3.rsp +902 -0
  31. package/mayo-c/KAT/PQCsignKAT_40_MAYO_5.req +900 -0
  32. package/mayo-c/KAT/PQCsignKAT_40_MAYO_5.rsp +902 -0
  33. package/mayo-c/LICENSE +202 -0
  34. package/mayo-c/META/MAYO-1_META.yml +52 -0
  35. package/mayo-c/META/MAYO-2_META.yml +52 -0
  36. package/mayo-c/META/MAYO-3_META.yml +52 -0
  37. package/mayo-c/META/MAYO-5_META.yml +52 -0
  38. package/mayo-c/NOTICE +13 -0
  39. package/mayo-c/README.md +183 -0
  40. package/mayo-c/apps/CMakeLists.txt +31 -0
  41. package/mayo-c/apps/PQCgenKAT_sign.c +281 -0
  42. package/mayo-c/apps/example.c +151 -0
  43. package/mayo-c/apps/example_nistapi.c +124 -0
  44. package/mayo-c/include/mayo.h +442 -0
  45. package/mayo-c/include/mem.h +25 -0
  46. package/mayo-c/include/randombytes.h +31 -0
  47. package/mayo-c/scripts/contstants.py +141 -0
  48. package/mayo-c/scripts/find_irred_poly.sage +39 -0
  49. package/mayo-c/src/AVX2/arithmetic_common.h +159 -0
  50. package/mayo-c/src/AVX2/echelon_form.h +91 -0
  51. package/mayo-c/src/AVX2/echelon_form_loop.h +58 -0
  52. package/mayo-c/src/AVX2/shuffle_arithmetic.h +442 -0
  53. package/mayo-c/src/CMakeLists.txt +98 -0
  54. package/mayo-c/src/arithmetic.c +128 -0
  55. package/mayo-c/src/arithmetic.h +124 -0
  56. package/mayo-c/src/common/aes128ctr.c +293 -0
  57. package/mayo-c/src/common/aes_c.c +741 -0
  58. package/mayo-c/src/common/aes_ctr.h +32 -0
  59. package/mayo-c/src/common/aes_neon.c +201 -0
  60. package/mayo-c/src/common/debug_bench_tools.h +69 -0
  61. package/mayo-c/src/common/fips202.c +1093 -0
  62. package/mayo-c/src/common/fips202.h +12 -0
  63. package/mayo-c/src/common/mem.c +19 -0
  64. package/mayo-c/src/common/randombytes_ctrdrbg.c +141 -0
  65. package/mayo-c/src/common/randombytes_system.c +399 -0
  66. package/mayo-c/src/generic/arithmetic_dynamic.h +68 -0
  67. package/mayo-c/src/generic/arithmetic_fixed.h +84 -0
  68. package/mayo-c/src/generic/echelon_form.h +152 -0
  69. package/mayo-c/src/generic/ef_inner_loop.h +56 -0
  70. package/mayo-c/src/generic/generic_arithmetic.h +294 -0
  71. package/mayo-c/src/mayo.c +675 -0
  72. package/mayo-c/src/mayo_1/api.c +46 -0
  73. package/mayo-c/src/mayo_1/api.h +43 -0
  74. package/mayo-c/src/mayo_2/api.c +46 -0
  75. package/mayo-c/src/mayo_2/api.h +43 -0
  76. package/mayo-c/src/mayo_3/api.c +46 -0
  77. package/mayo-c/src/mayo_3/api.h +43 -0
  78. package/mayo-c/src/mayo_5/api.c +46 -0
  79. package/mayo-c/src/mayo_5/api.h +43 -0
  80. package/mayo-c/src/neon/arithmetic_common.h +132 -0
  81. package/mayo-c/src/neon/echelon_form.h +55 -0
  82. package/mayo-c/src/neon/echelon_form_loop.h +58 -0
  83. package/mayo-c/src/neon/shuffle_arithmetic.h +462 -0
  84. package/mayo-c/src/params.c +42 -0
  85. package/mayo-c/src/simple_arithmetic.h +138 -0
  86. package/mayo-c/test/CMakeLists.txt +51 -0
  87. package/mayo-c/test/bench.c +166 -0
  88. package/mayo-c/test/m1cycles.c +155 -0
  89. package/mayo-c/test/m1cycles.h +13 -0
  90. package/mayo-c/test/test_kat.c +271 -0
  91. package/mayo-c/test/test_mayo.c +139 -0
  92. package/mayo-c/test/test_sample_solution.c +75 -0
  93. package/mayo-c/test/test_various.c +680 -0
  94. package/package.json +39 -0
  95. package/publish.bat +22 -0
  96. package/readme.md +80 -0
  97. package/test/test.mjs +42 -0
@@ -0,0 +1,84 @@
1
+ // SPDX-License-Identifier: Apache-2.0
2
+
3
+ #ifndef ARITHMETIC_FIXED_H
4
+ #define ARITHMETIC_FIXED_H
5
+
6
+ #include <stdint.h>
7
+ #include <mayo.h>
8
+ #include <simple_arithmetic.h>
9
+
10
+ // This implements arithmetic for vectors of X field elements in Z_2[x]/(x^4+x+1)
11
+
12
+ static
13
+ inline void m_vec_copy (int m_vec_limbs, const uint64_t *in, uint64_t *out) {
14
+ (void) m_vec_limbs;
15
+ for (size_t i = 0; i < M_VEC_LIMBS_MAX; i++)
16
+ {
17
+ out[i] = in[i];
18
+ }
19
+ }
20
+
21
+ static
22
+ inline void m_vec_add (int m_vec_limbs, const uint64_t *in, uint64_t *acc) {
23
+ (void) m_vec_limbs;
24
+ for (size_t i = 0; i < M_VEC_LIMBS_MAX; i++)
25
+ {
26
+ acc[i] ^= in[i];
27
+ }
28
+ }
29
+
30
+ static
31
+ inline void m_vec_mul_add (int m_vec_limbs, const uint64_t *in, unsigned char a, uint64_t *acc) {
32
+ (void) m_vec_limbs;
33
+ uint32_t tab = mul_table(a);
34
+
35
+ uint64_t lsb_ask = 0x1111111111111111ULL;
36
+
37
+ for(int i=0; i < M_VEC_LIMBS_MAX ;i++){
38
+ acc[i] ^= ( in[i] & lsb_ask) * (tab & 0xff)
39
+ ^ ((in[i] >> 1) & lsb_ask) * ((tab >> 8) & 0xf)
40
+ ^ ((in[i] >> 2) & lsb_ask) * ((tab >> 16) & 0xf)
41
+ ^ ((in[i] >> 3) & lsb_ask) * ((tab >> 24) & 0xf);
42
+ }
43
+ }
44
+
45
+ inline
46
+ static void m_vec_mul_add_x (int m_vec_limbs, const uint64_t *in, uint64_t *acc) {
47
+ (void) m_vec_limbs;
48
+ uint64_t mask_msb = 0x8888888888888888ULL;
49
+ for(int i=0; i < M_VEC_LIMBS_MAX; i++){
50
+ uint64_t t = in[i] & mask_msb;
51
+ acc[i] ^= ((in[i] ^ t) << 1) ^ ((t >> 3) * 3);
52
+ }
53
+ }
54
+
55
+ inline
56
+ static void m_vec_mul_add_x_inv (int m_vec_limbs, const uint64_t *in, uint64_t *acc) {
57
+ (void) m_vec_limbs;
58
+ uint64_t mask_lsb = 0x1111111111111111ULL;
59
+ for(int i=0; i < M_VEC_LIMBS_MAX; i++){
60
+ uint64_t t = in[i] & mask_lsb;
61
+ acc[i] ^= ((in[i] ^ t) >> 1) ^ (t * 9);
62
+ }
63
+ }
64
+
65
+ static
66
+ inline void m_vec_multiply_bins (int m_vec_limbs, uint64_t *bins, uint64_t *out) {
67
+ m_vec_mul_add_x_inv (m_vec_limbs, bins + 5 * M_VEC_LIMBS_MAX, bins + 10 * M_VEC_LIMBS_MAX);
68
+ m_vec_mul_add_x (m_vec_limbs, bins + 11 * M_VEC_LIMBS_MAX, bins + 12 * M_VEC_LIMBS_MAX);
69
+ m_vec_mul_add_x_inv (m_vec_limbs, bins + 10 * M_VEC_LIMBS_MAX, bins + 7 * M_VEC_LIMBS_MAX);
70
+ m_vec_mul_add_x (m_vec_limbs, bins + 12 * M_VEC_LIMBS_MAX, bins + 6 * M_VEC_LIMBS_MAX);
71
+ m_vec_mul_add_x_inv (m_vec_limbs, bins + 7 * M_VEC_LIMBS_MAX, bins + 14 * M_VEC_LIMBS_MAX);
72
+ m_vec_mul_add_x (m_vec_limbs, bins + 6 * M_VEC_LIMBS_MAX, bins + 3 * M_VEC_LIMBS_MAX);
73
+ m_vec_mul_add_x_inv (m_vec_limbs, bins + 14 * M_VEC_LIMBS_MAX, bins + 15 * M_VEC_LIMBS_MAX);
74
+ m_vec_mul_add_x (m_vec_limbs, bins + 3 * M_VEC_LIMBS_MAX, bins + 8 * M_VEC_LIMBS_MAX);
75
+ m_vec_mul_add_x_inv (m_vec_limbs, bins + 15 * M_VEC_LIMBS_MAX, bins + 13 * M_VEC_LIMBS_MAX);
76
+ m_vec_mul_add_x (m_vec_limbs, bins + 8 * M_VEC_LIMBS_MAX, bins + 4 * M_VEC_LIMBS_MAX);
77
+ m_vec_mul_add_x_inv (m_vec_limbs, bins + 13 * M_VEC_LIMBS_MAX, bins + 9 * M_VEC_LIMBS_MAX);
78
+ m_vec_mul_add_x (m_vec_limbs, bins + 4 * M_VEC_LIMBS_MAX, bins + 2 * M_VEC_LIMBS_MAX);
79
+ m_vec_mul_add_x_inv (m_vec_limbs, bins + 9 * M_VEC_LIMBS_MAX, bins + 1 * M_VEC_LIMBS_MAX);
80
+ m_vec_mul_add_x (m_vec_limbs, bins + 2 * M_VEC_LIMBS_MAX, bins + 1 * M_VEC_LIMBS_MAX);
81
+ m_vec_copy (m_vec_limbs, bins + M_VEC_LIMBS_MAX, out);
82
+ }
83
+
84
+ #endif
@@ -0,0 +1,152 @@
1
+
2
+ // SPDX-License-Identifier: Apache-2.0
3
+
4
+ #ifndef ECHELON_FORM_H
5
+ #define ECHELON_FORM_H
6
+
7
+ #include <stdalign.h>
8
+ #include <stdint.h>
9
+ #include <mem.h>
10
+ #include <arithmetic.h>
11
+
12
+ #define MAYO_MAX(x, y) (((x) > (y)) ? (x) : (y))
13
+ #define MAYO_MIN(x, y) (((x) < (y)) ? (x) : (y))
14
+
15
+ static inline unsigned char
16
+ m_extract_element(const uint64_t *in, int index) {
17
+ const int leg = index / 16;
18
+ const int offset = index % 16;
19
+
20
+ return (in[leg] >> (offset*4)) & 0xF;
21
+ }
22
+
23
+ static inline void
24
+ ef_pack_m_vec(const unsigned char *in, uint64_t *out, int ncols) {
25
+ int i;
26
+ unsigned char *out8 = (unsigned char *)out;
27
+ for(i = 0; i+1 < ncols; i += 2){
28
+ #ifdef TARGET_BIG_ENDIAN
29
+ out8[(((i/2 + 8) / 8) * 8) - 1 - (i/2)%8] = (in[i+0] << 0) | (in[i+1] << 4);
30
+ #else
31
+ out8[i/2] = (in[i+0] << 0) | (in[i+1] << 4);
32
+ #endif
33
+ }
34
+ if (ncols % 2 == 1){
35
+ #ifdef TARGET_BIG_ENDIAN
36
+ out8[(((i/2 + 8) / 8) * 8) - 1 - (i/2)%8] = (in[i+0] << 0);
37
+ #else
38
+ out8[i/2] = (in[i+0] << 0);
39
+ #endif
40
+ }
41
+ }
42
+
43
+ static inline void
44
+ ef_unpack_m_vec(int legs, const uint64_t *in, unsigned char *out) {
45
+ const unsigned char *in8 = (const unsigned char *)in;
46
+ for(int i = 0; i < legs * 16; i += 2){
47
+ #ifdef TARGET_BIG_ENDIAN
48
+ out[i] = (in8[(((i/2 + 8) / 8) * 8) - 1 - (i/2)%8]) & 0xF;
49
+ out[i+1] = (in8[(((i/2 + 8) / 8) * 8) - 1 - (i/2)%8] >> 4);
50
+ #else
51
+ out[i] = (in8[i/2]) & 0xF;
52
+ out[i+1] = (in8[i/2] >> 4);
53
+ #endif
54
+ }
55
+ }
56
+
57
+
58
+ // put matrix in row echelon form with ones on first nonzero entries *in
59
+ // constant time*
60
+ static inline void EF(unsigned char *A, int nrows, int ncols) {
61
+
62
+ alignas (32) uint64_t _pivot_row[(K_MAX * O_MAX + 1 + 15) / 16];
63
+ alignas (32) uint64_t _pivot_row2[(K_MAX * O_MAX + 1 + 15) / 16];
64
+ alignas (32) uint64_t packed_A[((K_MAX * O_MAX + 1 + 15) / 16) * M_MAX] = {0};
65
+
66
+ int row_len = (ncols + 15) / 16;
67
+
68
+ // nibbleslice the matrix A
69
+ for (int i = 0; i < nrows; i++) {
70
+ ef_pack_m_vec(A + i * ncols, packed_A + i * row_len, ncols);
71
+ }
72
+
73
+ // pivot row is secret, pivot col is not
74
+
75
+ unsigned char inverse;
76
+ int pivot_row = 0;
77
+ for (int pivot_col = 0; pivot_col < ncols; pivot_col++) {
78
+
79
+ int pivot_row_lower_bound = MAYO_MAX(0, pivot_col + nrows - ncols);
80
+ int pivot_row_upper_bound = MAYO_MIN(nrows - 1, pivot_col);
81
+ // the pivot row is guaranteed to be between these lower and upper bounds if
82
+ // A has full rank
83
+
84
+ // zero out pivot row
85
+ for (int i = 0; i < row_len; i++) {
86
+ _pivot_row[i] = 0;
87
+ _pivot_row2[i] = 0;
88
+ }
89
+
90
+ // try to get a pivot row in constant time
91
+ unsigned char pivot = 0;
92
+ uint64_t pivot_is_zero = -1;
93
+ for (int row = pivot_row_lower_bound;
94
+ row <= MAYO_MIN(nrows - 1, pivot_row_upper_bound + 32); row++) {
95
+
96
+ uint64_t is_pivot_row = ~ct_compare_64(row, pivot_row);
97
+ uint64_t below_pivot_row = ct_64_is_greater_than(row, pivot_row);
98
+
99
+ for (int j = 0; j < row_len; j++) {
100
+ _pivot_row[j] ^= (is_pivot_row | (below_pivot_row & pivot_is_zero)) &
101
+ packed_A[row * row_len + j];
102
+ }
103
+ pivot = m_extract_element(_pivot_row, pivot_col);
104
+ pivot_is_zero = ~ct_compare_64((int) pivot, 0);
105
+ }
106
+
107
+ // multiply pivot row by inverse of pivot
108
+ inverse = inverse_f(pivot);
109
+ vec_mul_add_u64(row_len, _pivot_row, inverse, _pivot_row2);
110
+
111
+ // conditionally write pivot row to the correct row, if there is a nonzero
112
+ // pivot
113
+ for (int row = pivot_row_lower_bound; row <= pivot_row_upper_bound; row++) {
114
+ uint64_t do_copy = ~ct_compare_64(row, pivot_row) & ~pivot_is_zero;
115
+ uint64_t do_not_copy = ~do_copy;
116
+ for (int col = 0; col < row_len; col++) {
117
+ packed_A[row * row_len + col] =
118
+ (do_not_copy & packed_A[row * row_len + col]) +
119
+ (do_copy & _pivot_row2[col]);
120
+ }
121
+ }
122
+
123
+ // eliminate entries below pivot
124
+ for (int row = pivot_row_lower_bound; row < nrows; row++) {
125
+ unsigned char below_pivot = (row > pivot_row);
126
+ unsigned char elt_to_elim = m_extract_element(packed_A + row * row_len, pivot_col);
127
+
128
+ vec_mul_add_u64(row_len, _pivot_row2, below_pivot * elt_to_elim,
129
+ packed_A + row * row_len);
130
+ }
131
+
132
+ pivot_row += (-(int64_t)(~pivot_is_zero));
133
+ }
134
+
135
+ unsigned char temp[(O_MAX * K_MAX + 1 + 15)];
136
+
137
+ // unbitslice the matrix A
138
+ for (int i = 0; i < nrows; i++) {
139
+ ef_unpack_m_vec(row_len, packed_A + i * row_len, temp);
140
+ for (int j = 0; j < ncols; j++) {
141
+ A[i * ncols + j] = temp[j];
142
+ }
143
+ }
144
+
145
+ mayo_secure_clear(temp, K_MAX * O_MAX + 1 + 15);
146
+ mayo_secure_clear(_pivot_row, (K_MAX * O_MAX + 1 + 15) / 16 * 8);
147
+ mayo_secure_clear(_pivot_row2, (K_MAX * O_MAX + 1 + 15) / 16 * 8);
148
+ mayo_secure_clear(packed_A, ((K_MAX * O_MAX + 1 + 15) / 16) * M_MAX * 8);
149
+ }
150
+
151
+ #endif
152
+
@@ -0,0 +1,56 @@
1
+ // SPDX-License-Identifier: Apache-2.0
2
+
3
+ int pivot_row_lower_bound = MAYO_MAX(0, pivot_col + nrows - ncols);
4
+ int pivot_row_upper_bound = MAYO_MIN(nrows - 1, pivot_col);
5
+ // the pivot row is guaranteed to be between these lower and upper bounds if
6
+ // A has full rank
7
+
8
+ // zero out pivot row
9
+ for (int i = offset; i < row_len; i++) {
10
+ _pivot_row[i] = 0;
11
+ _pivot_row2[i] = 0;
12
+ }
13
+
14
+ // try to get a pivot row in constant time
15
+ unsigned char pivot = 0;
16
+ uint64_t pivot_is_zero = -1;
17
+ for (int row = pivot_row_lower_bound;
18
+ row <= MAYO_MIN(nrows - 1, pivot_row_upper_bound + 32); row++) {
19
+
20
+ uint64_t is_pivot_row = ~ct_compare_64(row, pivot_row);
21
+ uint64_t below_pivot_row = ct_64_is_greater_than(row, pivot_row);
22
+
23
+ for (int j = offset; j < row_len; j++) {
24
+ _pivot_row[j] ^= (is_pivot_row | (below_pivot_row & pivot_is_zero)) &
25
+ packed_A[row * row_len + j];
26
+ }
27
+ pivot = m_extract_element(_pivot_row, pivot_col);
28
+ pivot_is_zero = ~ct_compare_64((int) pivot, 0);
29
+ }
30
+
31
+ // multiply pivot row by inverse of pivot
32
+ inverse = inverse_f(pivot);
33
+ vec_mul_add_u64(row_len - offset, _pivot_row + offset, inverse, _pivot_row2 + offset);
34
+
35
+ // conditionally write pivot row to the correct row, if there is a nonzero
36
+ // pivot
37
+ for (int row = pivot_row_lower_bound; row <= pivot_row_upper_bound; row++) {
38
+ uint64_t do_copy = ~ct_compare_64(row, pivot_row) & ~pivot_is_zero;
39
+ uint64_t do_not_copy = ~do_copy;
40
+ for (int col = offset; col < row_len; col++) {
41
+ packed_A[row * row_len + col] =
42
+ (do_not_copy & packed_A[row * row_len + col]) +
43
+ (do_copy & _pivot_row2[col]);
44
+ }
45
+ }
46
+
47
+ // eliminate entries below pivot
48
+ for (int row = pivot_row_lower_bound; row < nrows; row++) {
49
+ unsigned char below_pivot = (row > pivot_row);
50
+ unsigned char elt_to_elim = m_extract_element(packed_A + row * row_len, pivot_col);
51
+
52
+ vec_mul_add_u64(row_len - offset, _pivot_row2 + offset, below_pivot * elt_to_elim,
53
+ packed_A + row * row_len + offset);
54
+ }
55
+
56
+ pivot_row += (-(int32_t)(~pivot_is_zero));
@@ -0,0 +1,294 @@
1
+ // SPDX-License-Identifier: Apache-2.0
2
+
3
+ #ifndef GENERIC_ARITHMETIC_H
4
+ #define GENERIC_ARITHMETIC_H
5
+
6
+ #include <simple_arithmetic.h>
7
+
8
+ #ifdef ENABLE_PARAMS_DYNAMIC
9
+ #include <arithmetic_dynamic.h>
10
+ #else
11
+ #include <arithmetic_fixed.h>
12
+ #endif
13
+
14
+ // multiplies m (possibly upper triangular) matrices with a single matrix and adds result to acc
15
+ static inline
16
+ void mul_add_m_upper_triangular_mat_x_mat(const int m_vec_limbs, const uint64_t *bs_mat, const unsigned char *mat, uint64_t *acc,
17
+ const int bs_mat_rows, const int bs_mat_cols, const int mat_cols, const int triangular) {
18
+
19
+ int bs_mat_entries_used = 0;
20
+ for (int r = 0; r < bs_mat_rows; r++) {
21
+ for (int c = triangular * r; c < bs_mat_cols; c++) {
22
+ for (int k = 0; k < mat_cols; k += 1) {
23
+ m_vec_mul_add(m_vec_limbs, bs_mat + m_vec_limbs * bs_mat_entries_used, mat[c * mat_cols + k], acc + m_vec_limbs * (r * mat_cols + k));
24
+ }
25
+ bs_mat_entries_used += 1;
26
+ }
27
+ }
28
+ }
29
+
30
+ // multiplies m (possibly upper triangular) matrices with the transpose of a single matrix and adds result to acc
31
+ static inline
32
+ void mul_add_m_upper_triangular_mat_x_mat_trans(const int m_vec_limbs, const uint64_t *bs_mat, const unsigned char *mat, uint64_t *acc,
33
+ const int bs_mat_rows, const int bs_mat_cols, const int mat_rows, const int triangular) {
34
+ int bs_mat_entries_used = 0;
35
+ for (int r = 0; r < bs_mat_rows; r++) {
36
+ for (int c = triangular * r; c < bs_mat_cols; c++) {
37
+ for (int k = 0; k < mat_rows; k += 1) {
38
+ m_vec_mul_add(m_vec_limbs, bs_mat + m_vec_limbs * bs_mat_entries_used, mat[k * bs_mat_cols + c], acc + m_vec_limbs * (r * mat_rows + k));
39
+ }
40
+ bs_mat_entries_used += 1;
41
+ }
42
+ }
43
+ }
44
+
45
+ // multiplies the transpose of a single matrix with m matrices and adds result to acc
46
+ static inline
47
+ void mul_add_mat_trans_x_m_mat(const int m_vec_limbs, const unsigned char *mat, const uint64_t *bs_mat, uint64_t *acc,
48
+ const int mat_rows, const int mat_cols, const int bs_mat_cols) {
49
+
50
+ for (int r = 0; r < mat_cols; r++) {
51
+ for (int c = 0; c < mat_rows; c++) {
52
+ for (int k = 0; k < bs_mat_cols; k += 1) {
53
+ m_vec_mul_add(m_vec_limbs, bs_mat + m_vec_limbs * (c * bs_mat_cols + k), mat[c * mat_cols + r], acc + m_vec_limbs * (r * bs_mat_cols + k));
54
+ }
55
+ }
56
+ }
57
+ }
58
+
59
+ // multiplies a single matrix with m matrices and adds result to acc
60
+ static inline
61
+ void mul_add_mat_x_m_mat(const int m_vec_limbs, const unsigned char *mat, const uint64_t *bs_mat, uint64_t *acc,
62
+ const int mat_rows, const int mat_cols, const int bs_mat_cols) {
63
+
64
+ for (int r = 0; r < mat_rows; r++) {
65
+ for (int c = 0; c < mat_cols; c++) {
66
+ for (int k = 0; k < bs_mat_cols; k += 1) {
67
+ m_vec_mul_add(m_vec_limbs, bs_mat + m_vec_limbs * (c * bs_mat_cols + k), mat[r * mat_cols + c], acc + m_vec_limbs * (r * bs_mat_cols + k));
68
+ }
69
+ }
70
+ }
71
+ }
72
+
73
+ static inline
74
+ void P1_times_O(const mayo_params_t* p, const uint64_t* P1, const unsigned char* O, uint64_t* acc){
75
+ #ifndef ENABLE_PARAMS_DYNAMIC
76
+ (void) p;
77
+ #endif
78
+ mul_add_m_upper_triangular_mat_x_mat(PARAM_m_vec_limbs(p), P1, O, acc, PARAM_v(p), PARAM_v(p), PARAM_o(p), 1);
79
+ }
80
+
81
+ static inline
82
+ void P1_times_Vt(const mayo_params_t* p, const uint64_t* P1, const unsigned char* V, uint64_t* acc){
83
+ #ifndef ENABLE_PARAMS_DYNAMIC
84
+ (void) p;
85
+ #endif
86
+ mul_add_m_upper_triangular_mat_x_mat_trans(PARAM_m_vec_limbs(p), P1, V, acc, PARAM_v(p), PARAM_v(p), PARAM_k(p), 1);
87
+ }
88
+
89
+ #if defined(HAVE_STACKEFFICIENT) || defined(PQM4)
90
+ // compute P * S^t = [ P1 P2 ] * [S1] = [P1*S1 + P2*S2]
91
+ // [ 0 P3 ] [S2] [ P3*S2]
92
+ // compute S * PS = [ S1 S2 ] * [ P1*S1 + P2*S2 = P1 ] = [ S1*P1 + S2*P2 ]
93
+ // [ P3*S2 = P2 ]
94
+ static inline void mayo_generic_m_calculate_PS_SPS(const uint64_t *P1, const uint64_t *P2, const uint64_t *P3, const unsigned char *S,
95
+ const int m, const int v, const int o, const int k, uint64_t *SPS) {
96
+
97
+ const int n = o + v;
98
+ const int m_vec_limbs = (m + 15)/16;
99
+
100
+ uint64_t PS[(N_MAX + K_MAX) * M_VEC_LIMBS_MAX] = { 0 };
101
+ uint64_t accumulator[16 * ((M_MAX+15)/16) * N_MAX] = {0};
102
+ int P1_used;
103
+ int P3_used;
104
+
105
+ for (int col = 0; col < k; col++) {
106
+ for(unsigned int i = 0; i < sizeof(accumulator)/8; i++) {
107
+ accumulator[i] = 0;
108
+ }
109
+ P1_used = 0;
110
+ for (int row = 0; row < v; row++) {
111
+ for (int j = row; j < v; j++) {
112
+ m_vec_add(m_vec_limbs, P1 + (P1_used * m_vec_limbs), accumulator + ( row * 16 + S[col * n + j] )*m_vec_limbs);
113
+
114
+ P1_used ++;
115
+ }
116
+
117
+ for (int j = 0; j < o; j++) {
118
+ m_vec_add(m_vec_limbs, P2 + (row * o + j)*m_vec_limbs, accumulator + ( row * 16 + S[(col * n) + j + v] )* m_vec_limbs);
119
+ }
120
+ }
121
+
122
+ P3_used = 0;
123
+ for (int row = v; row < n; row++) {
124
+ for (int j = row; j < n; j++) {
125
+ m_vec_add(m_vec_limbs, P3 + P3_used * m_vec_limbs, accumulator + ( row * 16 + S[col * n + j] )* m_vec_limbs );
126
+ P3_used ++;
127
+ }
128
+ }
129
+
130
+ for (int row = 0; row < n; row++) {
131
+ m_vec_multiply_bins(m_vec_limbs, accumulator + row * 16 * m_vec_limbs, PS + (row + col) * m_vec_limbs);
132
+ }
133
+
134
+ for (int row = 0; row < k; row++) {
135
+ for (unsigned int i = 0; i < 16*((M_MAX+15)/16); ++i)
136
+ accumulator[i] = 0;
137
+ for (int j = 0; j < n; j++) {
138
+ m_vec_add(m_vec_limbs, PS + (j + col) * m_vec_limbs, accumulator + S[row * n + j]*m_vec_limbs);
139
+ }
140
+ m_vec_multiply_bins(m_vec_limbs, accumulator, SPS + (row * k + col) * m_vec_limbs);
141
+ }
142
+
143
+ }
144
+
145
+ }
146
+
147
+ #else
148
+
149
+ // compute P * S^t = [ P1 P2 ] * [S1] = [P1*S1 + P2*S2]
150
+ // [ 0 P3 ] [S2] [ P3*S2]
151
+ static inline void mayo_generic_m_calculate_PS(const uint64_t *P1, const uint64_t *P2, const uint64_t *P3, const unsigned char *S,
152
+ const int m, const int v, const int o, const int k, uint64_t *PS) {
153
+
154
+ const int n = o + v;
155
+ const int m_vec_limbs = (m + 15)/16;
156
+
157
+ uint64_t accumulator[16 * ((M_MAX+15)/16) * K_MAX * N_MAX] = {0};
158
+ int P1_used = 0;
159
+ for (int row = 0; row < v; row++) {
160
+ for (int j = row; j < v; j++) {
161
+ for (int col = 0; col < k; col++) {
162
+ m_vec_add(m_vec_limbs, P1 + (P1_used * m_vec_limbs), accumulator + ( (row * k + col) * 16 + S[col * n + j] )*m_vec_limbs);
163
+ }
164
+ P1_used ++;
165
+ }
166
+
167
+ for (int j = 0; j < o; j++) {
168
+ for (int col = 0; col < k; col++) {
169
+ m_vec_add(m_vec_limbs, P2 + (row * o + j)*m_vec_limbs, accumulator + ( (row * k + col) * 16 + S[(col * n) + j + v] )* m_vec_limbs);
170
+ }
171
+ }
172
+ }
173
+
174
+ int P3_used = 0;
175
+ for (int row = v; row < n; row++) {
176
+ for (int j = row; j < n; j++) {
177
+ for (int col = 0; col < k; col++) {
178
+ m_vec_add(m_vec_limbs, P3 + P3_used * m_vec_limbs, accumulator + ( (row * k + col) * 16 + S[col * n + j] )* m_vec_limbs );
179
+ }
180
+ P3_used ++;
181
+ }
182
+ }
183
+
184
+ // multiply stuff according to the bins of the accumulator and add to PS.
185
+ int i = 0;
186
+ while (i < n * k) {
187
+ m_vec_multiply_bins(m_vec_limbs, accumulator + i * 16 * m_vec_limbs, PS + i * m_vec_limbs);
188
+ i++;
189
+ }
190
+
191
+ }
192
+
193
+ // compute S * PS = [ S1 S2 ] * [ P1*S1 + P2*S2 = P1 ] = [ S1*P1 + S2*P2 ]
194
+ // [ P3*S2 = P2 ]
195
+ static inline void mayo_generic_m_calculate_SPS(const uint64_t *PS, const unsigned char *S, int m, int k, int n, uint64_t *SPS){
196
+ uint64_t accumulator[16*((M_MAX+15)/16)*K_MAX*K_MAX] = {0};
197
+ const int m_vec_limbs = (m + 15)/ 16;
198
+ for (int row = 0; row < k; row++) {
199
+ for (int j = 0; j < n; j++) {
200
+ for (int col = 0; col < k; col += 1) {
201
+ m_vec_add(m_vec_limbs, PS + (j * k + col) * m_vec_limbs, accumulator + ( (row * k + col) * 16 + S[row * n + j] )*m_vec_limbs);
202
+ }
203
+ }
204
+ }
205
+
206
+ // multiply stuff according to the bins of the accumulator and add to PS.
207
+ int i = 0;
208
+ while (i < k*k) {
209
+ m_vec_multiply_bins(m_vec_limbs, accumulator + i * 16 * m_vec_limbs, SPS + i * m_vec_limbs);
210
+ i++;
211
+ }
212
+ }
213
+
214
+ #endif
215
+
216
+
217
+ static inline
218
+ void P1P1t_times_O(const mayo_params_t* p, const uint64_t* P1, const unsigned char* O, uint64_t* acc){
219
+ #ifndef ENABLE_PARAMS_DYNAMIC
220
+ (void) p;
221
+ #endif
222
+ const int param_o = PARAM_o(p);
223
+ const int param_v = PARAM_v(p);
224
+ const int m_vec_limbs = PARAM_m_vec_limbs(p);
225
+
226
+ int bs_mat_entries_used = 0;
227
+ for (int r = 0; r < param_v; r++) {
228
+ for (int c = r; c < param_v; c++) {
229
+ if(c==r) {
230
+ bs_mat_entries_used += 1;
231
+ continue;
232
+ }
233
+ for (int k = 0; k < param_o; k += 1) {
234
+ m_vec_mul_add(m_vec_limbs, P1 + m_vec_limbs * bs_mat_entries_used, O[c * param_o + k], acc + m_vec_limbs * (r * param_o + k));
235
+ m_vec_mul_add(m_vec_limbs, P1 + m_vec_limbs * bs_mat_entries_used, O[r * param_o + k], acc + m_vec_limbs * (c * param_o + k));
236
+ }
237
+ bs_mat_entries_used += 1;
238
+ }
239
+ }
240
+ }
241
+
242
+
243
+ static inline
244
+ void compute_M_and_VPV(const mayo_params_t* p, const unsigned char* Vdec, const uint64_t *L, const uint64_t *P1, uint64_t *VL, uint64_t *VP1V){
245
+
246
+ const int param_k = PARAM_k(p);
247
+ const int param_v = PARAM_v(p);
248
+ const int param_o = PARAM_o(p);
249
+
250
+ //VL
251
+ mul_add_mat_x_m_mat(PARAM_m_vec_limbs(p), Vdec, L, VL, param_k, param_v, param_o);
252
+
253
+ //VP1V
254
+ uint64_t Pv[V_MAX * K_MAX * M_VEC_LIMBS_MAX] = {0};
255
+ P1_times_Vt(p, P1, Vdec, Pv);
256
+ mul_add_mat_x_m_mat(PARAM_m_vec_limbs(p), Vdec, Pv, VP1V, param_k, param_v, param_k);
257
+ }
258
+
259
+ static inline
260
+ void compute_P3(const mayo_params_t* p, const uint64_t* P1, uint64_t *P2, const unsigned char *O, uint64_t *P3){
261
+
262
+ const int m_vec_limbs = PARAM_m_vec_limbs(p);
263
+ const int param_v = PARAM_v(p);
264
+ const int param_o = PARAM_o(p);
265
+
266
+ // compute P1*O + P2
267
+ P1_times_O(p, P1, O, P2);
268
+
269
+ // compute P3 = O^t * (P1*O + P2)
270
+ mul_add_mat_trans_x_m_mat(m_vec_limbs, O, P2, P3, param_v, param_o, param_o);
271
+ }
272
+
273
+ // compute P * S^t = [ P1 P2 ] * [S1] = [P1*S1 + P2*S2]
274
+ // [ 0 P3 ] [S2] [ P3*S2]
275
+ // compute S * PS = [ S1 S2 ] * [ P1*S1 + P2*S2 = P1 ] = [ S1*P1 + S2*P2 ]
276
+ // [ P3*S2 = P2 ]
277
+ static inline void m_calculate_PS_SPS(const mayo_params_t *p, const uint64_t *P1, const uint64_t *P2, const uint64_t *P3, const unsigned char *s,
278
+ uint64_t *SPS) {
279
+ // compute P * S^t = {(P1, P2), (0, P3)} * S^t = {(P1*S1 + P2*S2), (P3 * S2)}
280
+ #ifndef ENABLE_PARAMS_DYNAMIC
281
+ (void) p;
282
+ #endif
283
+ #if defined(HAVE_STACKEFFICIENT) || defined(PQM4)
284
+ mayo_generic_m_calculate_PS_SPS(P1, P2, P3, s, PARAM_m(p), PARAM_v(p), PARAM_o(p), PARAM_k(p), SPS);
285
+ #else
286
+ uint64_t PS[N_MAX * K_MAX * M_VEC_LIMBS_MAX] = { 0 };
287
+ mayo_generic_m_calculate_PS(P1, P2, P3, s, PARAM_m(p), PARAM_v(p), PARAM_o(p), PARAM_k(p), PS);
288
+
289
+ // compute S * P * S = S* (P*S)
290
+ mayo_generic_m_calculate_SPS(PS, s, PARAM_m(p), PARAM_k(p), PARAM_n(p), SPS);
291
+ #endif
292
+ }
293
+
294
+ #endif