pq_crypto 0.6.2 → 0.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/ext/pqcrypto/pqcrypto_version.h +1 -1
- data/ext/pqcrypto/vendor/.vendored +4 -4
- data/ext/pqcrypto/vendor/mlkem-native/README.md +6 -3
- data/ext/pqcrypto/vendor/mlkem-native/RELEASE.md +22 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native.c +77 -36
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native.h +135 -146
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native_asm.S +116 -72
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native_config.h +351 -415
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/cbmc.h +43 -20
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/common.h +16 -8
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/compress.c +57 -31
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/compress.h +260 -349
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/debug.h +17 -24
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202.c +35 -37
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202.h +43 -57
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202x4.c +14 -15
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202x4.h +5 -4
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/keccakf1600.c +42 -6
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/fips202_native_aarch64.h +31 -20
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/{keccak_f1600_x1_scalar_asm.S → keccak_f1600_x1_scalar_aarch64_asm.S} +10 -10
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/{keccak_f1600_x1_v84a_asm.S → keccak_f1600_x1_v84a_aarch64_asm.S} +10 -10
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/{keccak_f1600_x2_v84a_asm.S → keccak_f1600_x2_v84a_aarch64_asm.S} +10 -10
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/{keccak_f1600_x4_v8a_scalar_hybrid_asm.S → keccak_f1600_x4_v8a_scalar_hybrid_aarch64_asm.S} +10 -10
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/{keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S → keccak_f1600_x4_v8a_v84a_scalar_hybrid_aarch64_asm.S} +10 -10
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccakf1600_round_constants.c +10 -9
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x1_scalar.h +2 -1
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x1_v84a.h +1 -1
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x2_v84a.h +4 -2
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x4_v8a_scalar.h +2 -2
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x4_v8a_v84a_scalar.h +1 -1
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/fips202_native_armv81m.h +2 -1
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S +55 -9
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/keccakf1600_round_constants.c +26 -25
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/state_extract_bytes_x4_mve.S +58 -14
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/state_xor_bytes_x4_mve.S +57 -16
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/auto.h +2 -1
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/keccak_f1600_x4_avx2.h +2 -2
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/src/fips202_native_x86_64.h +10 -7
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/src/{keccak_f1600_x4_avx2.S → keccak_f1600_x4_avx2_asm.S} +13 -11
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/src/keccakf1600_constants.c +12 -11
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/indcpa.c +167 -136
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/indcpa.h +75 -68
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/kem.h +135 -157
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/meta.h +15 -13
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/aarch64_zetas.c +143 -135
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/arith_native_aarch64.h +52 -46
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/{intt.S → intt_aarch64_asm.S} +10 -10
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/{ntt.S → ntt_aarch64_asm.S} +10 -10
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/{poly_mulcache_compute_asm.S → poly_mulcache_compute_aarch64_asm.S} +10 -10
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/{poly_reduce_asm.S → poly_reduce_aarch64_asm.S} +10 -10
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/{poly_tobytes_asm.S → poly_tobytes_aarch64_asm.S} +10 -10
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/{poly_tomont_asm.S → poly_tomont_aarch64_asm.S} +10 -12
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/{polyvec_basemul_acc_montgomery_cached_asm_k2.S → polyvec_basemul_acc_montgomery_cached_k2_aarch64_asm.S} +10 -10
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/{polyvec_basemul_acc_montgomery_cached_asm_k3.S → polyvec_basemul_acc_montgomery_cached_k3_aarch64_asm.S} +10 -10
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/{polyvec_basemul_acc_montgomery_cached_asm_k4.S → polyvec_basemul_acc_montgomery_cached_k4_aarch64_asm.S} +10 -10
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/{rej_uniform_asm.S → rej_uniform_aarch64_asm.S} +12 -12
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/rej_uniform_table.c +514 -513
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/api.h +254 -253
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/meta.h +6 -1
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/ppc64le/README.md +6 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/ppc64le/meta.h +77 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/ppc64le/src/arith_native_ppc64le.h +24 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/ppc64le/src/consts.c +299 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/ppc64le/src/consts.h +34 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/ppc64le/src/intt_ppc_asm.S +3222 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/ppc64le/src/ntt_ppc_asm.S +1651 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/ppc64le/src/poly_tomont_ppc_asm.S +294 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/ppc64le/src/reduce_ppc_asm.S +710 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/meta.h +5 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_debug.c +18 -16
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_debug.h +19 -24
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_poly.c +53 -65
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/meta.h +20 -20
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/arith_native_x86_64.h +106 -88
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/compress_consts.c +45 -35
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/compress_consts.h +8 -8
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/consts.c +1 -1
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/consts.h +1 -1
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/{intt.S → intt_avx2_asm.S} +8 -8
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/{ntt.S → ntt_avx2_asm.S} +8 -8
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/{nttfrombytes.S → nttfrombytes_avx2_asm.S} +8 -8
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/{ntttobytes.S → ntttobytes_avx2_asm.S} +8 -8
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/{nttunpack.S → nttunpack_avx2_asm.S} +8 -8
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/{poly_compress_d10.S → poly_compress_d10_avx2_asm.S} +9 -9
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/{poly_compress_d11.S → poly_compress_d11_avx2_asm.S} +9 -9
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/{poly_compress_d4.S → poly_compress_d4_avx2_asm.S} +9 -9
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/{poly_compress_d5.S → poly_compress_d5_avx2_asm.S} +9 -9
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/{poly_decompress_d10.S → poly_decompress_d10_avx2_asm.S} +9 -9
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/{poly_decompress_d11.S → poly_decompress_d11_avx2_asm.S} +9 -9
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/{poly_decompress_d4.S → poly_decompress_d4_avx2_asm.S} +9 -9
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/{poly_decompress_d5.S → poly_decompress_d5_avx2_asm.S} +9 -9
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/{mulcache_compute.S → poly_mulcache_compute_avx2_asm.S} +8 -8
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/{polyvec_basemul_acc_montgomery_cached_asm_k2.S → polyvec_basemul_acc_montgomery_cached_k2_avx2_asm.S} +8 -8
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/{polyvec_basemul_acc_montgomery_cached_asm_k3.S → polyvec_basemul_acc_montgomery_cached_k3_avx2_asm.S} +8 -8
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/{polyvec_basemul_acc_montgomery_cached_asm_k4.S → polyvec_basemul_acc_montgomery_cached_k4_avx2_asm.S} +8 -8
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/{reduce.S → reduce_avx2_asm.S} +8 -8
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/{rej_uniform_asm.S → rej_uniform_avx2_asm.S} +9 -9
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/rej_uniform_table.c +514 -513
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/{tomont.S → tomont_avx2_asm.S} +8 -8
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly.c +61 -57
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly.h +89 -116
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly_k.c +31 -32
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly_k.h +226 -301
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/randombytes.h +21 -29
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/sampling.c +68 -63
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/sampling.h +37 -48
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/sys.h +44 -2
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/verify.h +141 -159
- data/lib/pq_crypto/version.rb +1 -1
- data/script/vendor_libs.rb +3 -3
- metadata +47 -38
|
@@ -18,15 +18,16 @@
|
|
|
18
18
|
|
|
19
19
|
#define MLK_DEBUG_ERROR_HEADER "[ERROR:%s:%04d] "
|
|
20
20
|
|
|
21
|
-
|
|
22
|
-
*
|
|
21
|
+
/**
|
|
22
|
+
* Check whether values in a vint16m1_t vector are within specified bounds.
|
|
23
23
|
*
|
|
24
|
-
*
|
|
25
|
-
*
|
|
26
|
-
*
|
|
27
|
-
*
|
|
28
|
-
*
|
|
29
|
-
|
|
24
|
+
* @param[in] file Filename.
|
|
25
|
+
* @param line Line number.
|
|
26
|
+
* @param vec RISC-V vector to be checked.
|
|
27
|
+
* @param vl Vector length (number of active elements).
|
|
28
|
+
* @param lower_bound_exclusive Exclusive lower bound.
|
|
29
|
+
* @param upper_bound_exclusive Exclusive upper bound.
|
|
30
|
+
*/
|
|
30
31
|
void mlk_debug_check_bounds_int16m1(const char *file, int line, vint16m1_t vec,
|
|
31
32
|
size_t vl, int lower_bound_exclusive,
|
|
32
33
|
int upper_bound_exclusive)
|
|
@@ -43,15 +44,16 @@ void mlk_debug_check_bounds_int16m1(const char *file, int line, vint16m1_t vec,
|
|
|
43
44
|
lower_bound_exclusive, upper_bound_exclusive);
|
|
44
45
|
}
|
|
45
46
|
|
|
46
|
-
|
|
47
|
-
*
|
|
47
|
+
/**
|
|
48
|
+
* Check whether values in a vint16m2_t vector are within specified bounds.
|
|
48
49
|
*
|
|
49
|
-
*
|
|
50
|
-
*
|
|
51
|
-
*
|
|
52
|
-
*
|
|
53
|
-
*
|
|
54
|
-
|
|
50
|
+
* @param[in] file Filename.
|
|
51
|
+
* @param line Line number.
|
|
52
|
+
* @param vec RISC-V vector to be checked.
|
|
53
|
+
* @param vl Vector length (active elements per m1 half).
|
|
54
|
+
* @param lower_bound_exclusive Exclusive lower bound.
|
|
55
|
+
* @param upper_bound_exclusive Exclusive upper bound.
|
|
56
|
+
*/
|
|
55
57
|
void mlk_debug_check_bounds_int16m2(const char *file, int line, vint16m2_t vec,
|
|
56
58
|
size_t vl, int lower_bound_exclusive,
|
|
57
59
|
int upper_bound_exclusive)
|
|
@@ -22,38 +22,33 @@
|
|
|
22
22
|
|
|
23
23
|
#if defined(MLKEM_DEBUG)
|
|
24
24
|
|
|
25
|
-
|
|
26
|
-
*
|
|
27
|
-
*
|
|
28
|
-
* Description: Check whether values in a vint16m1_t vector
|
|
29
|
-
* are within specified bounds.
|
|
25
|
+
/**
|
|
26
|
+
* Check whether values in a vint16m1_t vector are within specified bounds.
|
|
30
27
|
*
|
|
31
|
-
*
|
|
32
|
-
*
|
|
33
|
-
*
|
|
34
|
-
*
|
|
35
|
-
*
|
|
36
|
-
*
|
|
37
|
-
|
|
28
|
+
* @param[in] file Filename.
|
|
29
|
+
* @param line Line number.
|
|
30
|
+
* @param vec RISC-V vector to be checked.
|
|
31
|
+
* @param vl Vector length (number of active elements).
|
|
32
|
+
* @param lower_bound_exclusive Exclusive lower bound.
|
|
33
|
+
* @param upper_bound_exclusive Exclusive upper bound.
|
|
34
|
+
*/
|
|
38
35
|
#define mlk_debug_check_bounds_int16m1 \
|
|
39
36
|
MLK_NAMESPACE(mlkem_debug_check_bounds_int16m1)
|
|
40
37
|
void mlk_debug_check_bounds_int16m1(const char *file, int line, vint16m1_t vec,
|
|
41
38
|
size_t vl, int lower_bound_exclusive,
|
|
42
39
|
int upper_bound_exclusive);
|
|
43
40
|
|
|
44
|
-
|
|
45
|
-
*
|
|
46
|
-
*
|
|
47
|
-
* Description: Check whether values in a vint16m2_t vector
|
|
48
|
-
* are within specified bounds by splitting into m1 vectors.
|
|
41
|
+
/**
|
|
42
|
+
* Check whether values in a vint16m2_t vector are within specified bounds
|
|
43
|
+
* by splitting into m1 vectors.
|
|
49
44
|
*
|
|
50
|
-
*
|
|
51
|
-
*
|
|
52
|
-
*
|
|
53
|
-
*
|
|
54
|
-
*
|
|
55
|
-
*
|
|
56
|
-
|
|
45
|
+
* @param[in] file Filename.
|
|
46
|
+
* @param line Line number.
|
|
47
|
+
* @param vec RISC-V vector to be checked.
|
|
48
|
+
* @param vl Vector length (active elements per m1 half).
|
|
49
|
+
* @param lower_bound_exclusive Exclusive lower bound.
|
|
50
|
+
* @param upper_bound_exclusive Exclusive upper bound.
|
|
51
|
+
*/
|
|
57
52
|
#define mlk_debug_check_bounds_int16m2 \
|
|
58
53
|
MLK_NAMESPACE(mlkem_debug_check_bounds_int16m2)
|
|
59
54
|
void mlk_debug_check_bounds_int16m2(const char *file, int line, vint16m2_t vec,
|
|
@@ -28,7 +28,7 @@
|
|
|
28
28
|
/* check-magic: 1441 == pow(2,32-7,MLKEM_Q) */
|
|
29
29
|
#define MLK_RVV_MONT_NR 1441
|
|
30
30
|
|
|
31
|
-
static
|
|
31
|
+
static MLK_INLINE vint16m1_t fq_redc(vint16m1_t rh, vint16m1_t rl, size_t vl)
|
|
32
32
|
{
|
|
33
33
|
vint16m1_t t;
|
|
34
34
|
|
|
@@ -41,7 +41,7 @@ static inline vint16m1_t fq_redc(vint16m1_t rh, vint16m1_t rl, size_t vl)
|
|
|
41
41
|
|
|
42
42
|
/* Narrowing reduction */
|
|
43
43
|
|
|
44
|
-
static
|
|
44
|
+
static MLK_INLINE vint16m1_t fq_redc2(vint32m2_t z, size_t vl)
|
|
45
45
|
{
|
|
46
46
|
vint16m1_t t;
|
|
47
47
|
|
|
@@ -56,7 +56,7 @@ static inline vint16m1_t fq_redc2(vint32m2_t z, size_t vl)
|
|
|
56
56
|
|
|
57
57
|
/* Narrowing Barrett */
|
|
58
58
|
|
|
59
|
-
static
|
|
59
|
+
static MLK_INLINE vint16m1_t fq_barrett(vint16m1_t a, size_t vl)
|
|
60
60
|
{
|
|
61
61
|
vint16m1_t t;
|
|
62
62
|
const int16_t v = ((1 << 26) + MLKEM_Q / 2) / MLKEM_Q;
|
|
@@ -71,31 +71,29 @@ static inline vint16m1_t fq_barrett(vint16m1_t a, size_t vl)
|
|
|
71
71
|
return t;
|
|
72
72
|
}
|
|
73
73
|
|
|
74
|
-
/* Conditionally add Q (if negative)
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
/* Conditionally subtract Q (if Q or above) */
|
|
86
|
-
|
|
87
|
-
static inline vint16m1_t fq_csub(vint16m1_t rx, size_t vl)
|
|
74
|
+
/* Conditionally add Q (if negative).
|
|
75
|
+
*
|
|
76
|
+
* Constant-time note: We deliberately avoid a `vmslt` + masked `vadd_mu`
|
|
77
|
+
* here. RVV 1.0 does not mandate mask-population-independent latency for
|
|
78
|
+
* masked ops, and Zvkt's DIEL guarantee does not cover the v0 mask
|
|
79
|
+
* register. Using an arithmetic-shift sign mask keeps all operands in the
|
|
80
|
+
* Zvkt-covered set and matches the idiom used in the portable C
|
|
81
|
+
* (mlk_ct_cmask_neg_i16, verify.h) and AArch64 (poly_reduce_aarch64_asm.S)
|
|
82
|
+
* implementations. */
|
|
83
|
+
|
|
84
|
+
static MLK_INLINE vint16m1_t fq_cadd(vint16m1_t rx, size_t vl)
|
|
88
85
|
{
|
|
89
|
-
|
|
86
|
+
vint16m1_t m;
|
|
90
87
|
|
|
91
|
-
|
|
92
|
-
|
|
88
|
+
m = __riscv_vsra_vx_i16m1(rx, 15, vl); /* m = (x < 0) ? -1 : 0 */
|
|
89
|
+
m = __riscv_vand_vx_i16m1(m, MLKEM_Q, vl); /* m = (x < 0) ? Q : 0 */
|
|
90
|
+
rx = __riscv_vadd_vv_i16m1(rx, m, vl); /* x += m */
|
|
93
91
|
return rx;
|
|
94
92
|
}
|
|
95
93
|
|
|
96
94
|
/* Montgomery multiply: vector-vector */
|
|
97
95
|
|
|
98
|
-
static
|
|
96
|
+
static MLK_INLINE vint16m1_t fq_mul_vv(vint16m1_t rx, vint16m1_t ry, size_t vl)
|
|
99
97
|
{
|
|
100
98
|
vint16m1_t rl, rh;
|
|
101
99
|
|
|
@@ -106,7 +104,7 @@ static inline vint16m1_t fq_mul_vv(vint16m1_t rx, vint16m1_t ry, size_t vl)
|
|
|
106
104
|
|
|
107
105
|
/* Montgomery multiply: vector-scalar */
|
|
108
106
|
|
|
109
|
-
static
|
|
107
|
+
static MLK_INLINE vint16m1_t fq_mul_vx(vint16m1_t rx, int16_t ry, size_t vl)
|
|
110
108
|
{
|
|
111
109
|
vint16m1_t rl, rh;
|
|
112
110
|
|
|
@@ -117,7 +115,7 @@ static inline vint16m1_t fq_mul_vx(vint16m1_t rx, int16_t ry, size_t vl)
|
|
|
117
115
|
|
|
118
116
|
/* full normalization */
|
|
119
117
|
|
|
120
|
-
static
|
|
118
|
+
static MLK_INLINE vint16m1_t fq_mulq_vx(vint16m1_t rx, int16_t ry, size_t vl)
|
|
121
119
|
{
|
|
122
120
|
vint16m1_t result;
|
|
123
121
|
|
|
@@ -143,16 +141,12 @@ static vuint16m2_t bitswap_perm(unsigned a, unsigned b, size_t vl)
|
|
|
143
141
|
return xa;
|
|
144
142
|
}
|
|
145
143
|
|
|
146
|
-
|
|
147
|
-
*
|
|
148
|
-
*
|
|
149
|
-
* Description: Computes negacyclic number-theoretic transform (NTT) of
|
|
150
|
-
* a polynomial in place;
|
|
151
|
-
* inputs assumed to be in normal order, output in
|
|
152
|
-
* bitreversed order
|
|
144
|
+
/**
|
|
145
|
+
* Compute negacyclic number-theoretic transform (NTT) of a polynomial in
|
|
146
|
+
* place; input assumed to be in normal order, output in bitreversed order.
|
|
153
147
|
*
|
|
154
|
-
*
|
|
155
|
-
|
|
148
|
+
* @param[in,out] r Input/output polynomial.
|
|
149
|
+
*/
|
|
156
150
|
|
|
157
151
|
/* Forward / Cooley-Tukey butterfly operation */
|
|
158
152
|
|
|
@@ -330,17 +324,6 @@ void mlk_rv64v_poly_ntt(int16_t *r)
|
|
|
330
324
|
&r[0xe0], mlk_rv64v_ntt2(__riscv_vcreate_v_i16m1_i16m2(ve, vf), ze), vl2);
|
|
331
325
|
}
|
|
332
326
|
|
|
333
|
-
/*************************************************
|
|
334
|
-
* Name: poly_invntt_tomont
|
|
335
|
-
*
|
|
336
|
-
* Description: Computes inverse of negacyclic number-theoretic transform (NTT)
|
|
337
|
-
* of a polynomial in place;
|
|
338
|
-
* inputs assumed to be in bitreversed order,
|
|
339
|
-
* output in normal order
|
|
340
|
-
*
|
|
341
|
-
* Arguments: - uint16_t *r: pointer to in/output polynomial
|
|
342
|
-
**************************************************/
|
|
343
|
-
|
|
344
327
|
/* Reverse / Gentleman-Sande butterfly operation */
|
|
345
328
|
|
|
346
329
|
#define MLK_RVV_GS_BFLY_RX(u0, u1, ut, uc, vl) \
|
|
@@ -461,6 +444,13 @@ static vint16m2_t mlk_rv64v_intt2(vint16m2_t vp, vint16m1_t cz)
|
|
|
461
444
|
} while (0)
|
|
462
445
|
|
|
463
446
|
|
|
447
|
+
/**
|
|
448
|
+
* Compute the inverse negacyclic number-theoretic transform (NTT) of a
|
|
449
|
+
* polynomial in place; input assumed to be in bitreversed order, output in
|
|
450
|
+
* normal order.
|
|
451
|
+
*
|
|
452
|
+
* @param[in,out] r Input/output polynomial.
|
|
453
|
+
*/
|
|
464
454
|
/* Only for VLEN=256 for now */
|
|
465
455
|
void mlk_rv64v_poly_invntt_tomont(int16_t *r)
|
|
466
456
|
{
|
|
@@ -617,10 +607,10 @@ void mlk_rv64v_poly_invntt_tomont(int16_t *r)
|
|
|
617
607
|
|
|
618
608
|
/* ML-KEM's middle field GF(3329)[X]/(X^2) multiplication */
|
|
619
609
|
|
|
620
|
-
static
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
610
|
+
static MLK_INLINE void mlk_rv64v_poly_basemul_mont_add_k(int16_t *r,
|
|
611
|
+
const int16_t *a,
|
|
612
|
+
const int16_t *b,
|
|
613
|
+
unsigned kn)
|
|
624
614
|
{
|
|
625
615
|
#include "rv64v_zetas_basemul.inc"
|
|
626
616
|
|
|
@@ -692,19 +682,18 @@ void mlk_rv64v_poly_basemul_mont_add_k4(int16_t *r, const int16_t *a,
|
|
|
692
682
|
}
|
|
693
683
|
#endif /* MLK_CONFIG_MULTILEVEL_WITH_SHARED || MLKEM_K == 4 */
|
|
694
684
|
|
|
695
|
-
|
|
696
|
-
*
|
|
697
|
-
*
|
|
698
|
-
* Description: Inplace conversion of all coefficients of a polynomial
|
|
699
|
-
* from normal domain to Montgomery domain
|
|
685
|
+
/**
|
|
686
|
+
* In-place conversion of all coefficients of a polynomial from the normal
|
|
687
|
+
* domain to the Montgomery domain.
|
|
700
688
|
*
|
|
701
|
-
*
|
|
702
|
-
|
|
689
|
+
* @param[in,out] r Input/output polynomial.
|
|
690
|
+
*/
|
|
703
691
|
void mlk_rv64v_poly_tomont(int16_t *r)
|
|
704
692
|
{
|
|
705
693
|
size_t vl = __riscv_vsetvl_e16m1(MLKEM_N);
|
|
694
|
+
size_t i;
|
|
706
695
|
|
|
707
|
-
for (
|
|
696
|
+
for (i = 0; i < MLKEM_N; i += vl)
|
|
708
697
|
{
|
|
709
698
|
vint16m1_t vec = __riscv_vle16_v_i16m1(&r[i], vl);
|
|
710
699
|
vec = fq_mul_vx(vec, MLK_RVV_MONT_R2, vl);
|
|
@@ -712,21 +701,19 @@ void mlk_rv64v_poly_tomont(int16_t *r)
|
|
|
712
701
|
}
|
|
713
702
|
}
|
|
714
703
|
|
|
715
|
-
|
|
716
|
-
*
|
|
717
|
-
*
|
|
718
|
-
* Description: Applies Barrett reduction to all coefficients of a polynomial
|
|
719
|
-
* for details of the Barrett reduction see
|
|
720
|
-
* comments in poly.c
|
|
704
|
+
/**
|
|
705
|
+
* Apply Barrett reduction to all coefficients of a polynomial. For details
|
|
706
|
+
* of the Barrett reduction see the comments in poly.c.
|
|
721
707
|
*
|
|
722
|
-
*
|
|
723
|
-
|
|
708
|
+
* @param[in,out] r Input/output polynomial.
|
|
709
|
+
*/
|
|
724
710
|
void mlk_rv64v_poly_reduce(int16_t *r)
|
|
725
711
|
{
|
|
726
712
|
size_t vl = __riscv_vsetvl_e16m1(MLKEM_N);
|
|
727
713
|
vint16m1_t vt;
|
|
714
|
+
size_t i;
|
|
728
715
|
|
|
729
|
-
for (
|
|
716
|
+
for (i = 0; i < MLKEM_N; i += vl)
|
|
730
717
|
{
|
|
731
718
|
vt = __riscv_vle16_v_i16m1(&r[i], vl);
|
|
732
719
|
vt = fq_barrett(vt, vl);
|
|
@@ -735,7 +722,8 @@ void mlk_rv64v_poly_reduce(int16_t *r)
|
|
|
735
722
|
}
|
|
736
723
|
}
|
|
737
724
|
|
|
738
|
-
/* Run rejection sampling to get uniform random integers mod q
|
|
725
|
+
/* Run rejection sampling to get uniform random integers mod q.
|
|
726
|
+
* buflen must be a multiple of 12. */
|
|
739
727
|
|
|
740
728
|
unsigned int mlk_rv64v_rej_uniform(int16_t *r, unsigned int len,
|
|
741
729
|
const uint8_t *buf, unsigned int buflen)
|
|
@@ -39,7 +39,7 @@ static MLK_INLINE void mlk_poly_permute_bitrev_to_custom(int16_t data[MLKEM_N])
|
|
|
39
39
|
{
|
|
40
40
|
if (mlk_sys_check_capability(MLK_SYS_CAP_AVX2))
|
|
41
41
|
{
|
|
42
|
-
|
|
42
|
+
mlk_nttunpack_avx2_asm(data);
|
|
43
43
|
}
|
|
44
44
|
}
|
|
45
45
|
|
|
@@ -53,7 +53,7 @@ static MLK_INLINE int mlk_rej_uniform_native(int16_t *r, unsigned len,
|
|
|
53
53
|
{
|
|
54
54
|
return MLK_NATIVE_FUNC_FALLBACK;
|
|
55
55
|
}
|
|
56
|
-
return (int)
|
|
56
|
+
return (int)mlk_rej_uniform_avx2_asm(r, buf, buflen, mlk_rej_uniform_table);
|
|
57
57
|
}
|
|
58
58
|
|
|
59
59
|
MLK_MUST_CHECK_RETURN_VALUE
|
|
@@ -64,7 +64,7 @@ static MLK_INLINE int mlk_ntt_native(int16_t data[MLKEM_N])
|
|
|
64
64
|
return MLK_NATIVE_FUNC_FALLBACK;
|
|
65
65
|
}
|
|
66
66
|
|
|
67
|
-
|
|
67
|
+
mlk_ntt_avx2_asm(data, mlk_qdata);
|
|
68
68
|
return MLK_NATIVE_FUNC_SUCCESS;
|
|
69
69
|
}
|
|
70
70
|
|
|
@@ -76,7 +76,7 @@ static MLK_INLINE int mlk_intt_native(int16_t data[MLKEM_N])
|
|
|
76
76
|
return MLK_NATIVE_FUNC_FALLBACK;
|
|
77
77
|
}
|
|
78
78
|
|
|
79
|
-
|
|
79
|
+
mlk_invntt_avx2_asm(data, mlk_qdata);
|
|
80
80
|
return MLK_NATIVE_FUNC_SUCCESS;
|
|
81
81
|
}
|
|
82
82
|
|
|
@@ -88,7 +88,7 @@ static MLK_INLINE int mlk_poly_reduce_native(int16_t data[MLKEM_N])
|
|
|
88
88
|
return MLK_NATIVE_FUNC_FALLBACK;
|
|
89
89
|
}
|
|
90
90
|
|
|
91
|
-
|
|
91
|
+
mlk_reduce_avx2_asm(data);
|
|
92
92
|
return MLK_NATIVE_FUNC_SUCCESS;
|
|
93
93
|
}
|
|
94
94
|
|
|
@@ -100,7 +100,7 @@ static MLK_INLINE int mlk_poly_tomont_native(int16_t data[MLKEM_N])
|
|
|
100
100
|
return MLK_NATIVE_FUNC_FALLBACK;
|
|
101
101
|
}
|
|
102
102
|
|
|
103
|
-
|
|
103
|
+
mlk_tomont_avx2_asm(data);
|
|
104
104
|
return MLK_NATIVE_FUNC_SUCCESS;
|
|
105
105
|
}
|
|
106
106
|
|
|
@@ -113,7 +113,7 @@ static MLK_INLINE int mlk_poly_mulcache_compute_native(int16_t x[MLKEM_N / 2],
|
|
|
113
113
|
return MLK_NATIVE_FUNC_FALLBACK;
|
|
114
114
|
}
|
|
115
115
|
|
|
116
|
-
|
|
116
|
+
mlk_poly_mulcache_compute_avx2_asm(x, y, mlk_qdata);
|
|
117
117
|
return MLK_NATIVE_FUNC_SUCCESS;
|
|
118
118
|
}
|
|
119
119
|
|
|
@@ -128,7 +128,7 @@ static MLK_INLINE int mlk_polyvec_basemul_acc_montgomery_cached_k2_native(
|
|
|
128
128
|
return MLK_NATIVE_FUNC_FALLBACK;
|
|
129
129
|
}
|
|
130
130
|
|
|
131
|
-
|
|
131
|
+
mlk_polyvec_basemul_acc_montgomery_cached_k2_avx2_asm(r, a, b, b_cache);
|
|
132
132
|
return MLK_NATIVE_FUNC_SUCCESS;
|
|
133
133
|
}
|
|
134
134
|
#endif /* MLK_CONFIG_MULTILEVEL_WITH_SHARED || MLKEM_K == 2 */
|
|
@@ -144,7 +144,7 @@ static MLK_INLINE int mlk_polyvec_basemul_acc_montgomery_cached_k3_native(
|
|
|
144
144
|
return MLK_NATIVE_FUNC_FALLBACK;
|
|
145
145
|
}
|
|
146
146
|
|
|
147
|
-
|
|
147
|
+
mlk_polyvec_basemul_acc_montgomery_cached_k3_avx2_asm(r, a, b, b_cache);
|
|
148
148
|
return MLK_NATIVE_FUNC_SUCCESS;
|
|
149
149
|
}
|
|
150
150
|
#endif /* MLK_CONFIG_MULTILEVEL_WITH_SHARED || MLKEM_K == 3 */
|
|
@@ -160,7 +160,7 @@ static MLK_INLINE int mlk_polyvec_basemul_acc_montgomery_cached_k4_native(
|
|
|
160
160
|
return MLK_NATIVE_FUNC_FALLBACK;
|
|
161
161
|
}
|
|
162
162
|
|
|
163
|
-
|
|
163
|
+
mlk_polyvec_basemul_acc_montgomery_cached_k4_avx2_asm(r, a, b, b_cache);
|
|
164
164
|
return MLK_NATIVE_FUNC_SUCCESS;
|
|
165
165
|
}
|
|
166
166
|
#endif /* MLK_CONFIG_MULTILEVEL_WITH_SHARED || MLKEM_K == 4 */
|
|
@@ -174,7 +174,7 @@ static MLK_INLINE int mlk_poly_tobytes_native(uint8_t r[MLKEM_POLYBYTES],
|
|
|
174
174
|
return MLK_NATIVE_FUNC_FALLBACK;
|
|
175
175
|
}
|
|
176
176
|
|
|
177
|
-
|
|
177
|
+
mlk_ntttobytes_avx2_asm(r, a);
|
|
178
178
|
return MLK_NATIVE_FUNC_SUCCESS;
|
|
179
179
|
}
|
|
180
180
|
|
|
@@ -187,7 +187,7 @@ static MLK_INLINE int mlk_poly_frombytes_native(
|
|
|
187
187
|
return MLK_NATIVE_FUNC_FALLBACK;
|
|
188
188
|
}
|
|
189
189
|
|
|
190
|
-
|
|
190
|
+
mlk_nttfrombytes_avx2_asm(r, a);
|
|
191
191
|
return MLK_NATIVE_FUNC_SUCCESS;
|
|
192
192
|
}
|
|
193
193
|
|
|
@@ -201,7 +201,7 @@ static MLK_INLINE int mlk_poly_compress_d4_native(
|
|
|
201
201
|
return MLK_NATIVE_FUNC_FALLBACK;
|
|
202
202
|
}
|
|
203
203
|
|
|
204
|
-
|
|
204
|
+
mlk_poly_compress_d4_avx2_asm(r, a, mlk_compress_d4_data);
|
|
205
205
|
return MLK_NATIVE_FUNC_SUCCESS;
|
|
206
206
|
}
|
|
207
207
|
|
|
@@ -214,7 +214,7 @@ static MLK_INLINE int mlk_poly_compress_d10_native(
|
|
|
214
214
|
return MLK_NATIVE_FUNC_FALLBACK;
|
|
215
215
|
}
|
|
216
216
|
|
|
217
|
-
|
|
217
|
+
mlk_poly_compress_d10_avx2_asm(r, a, mlk_compress_d10_data);
|
|
218
218
|
return MLK_NATIVE_FUNC_SUCCESS;
|
|
219
219
|
}
|
|
220
220
|
|
|
@@ -227,7 +227,7 @@ static MLK_INLINE int mlk_poly_decompress_d4_native(
|
|
|
227
227
|
return MLK_NATIVE_FUNC_FALLBACK;
|
|
228
228
|
}
|
|
229
229
|
|
|
230
|
-
|
|
230
|
+
mlk_poly_decompress_d4_avx2_asm(r, a, mlk_decompress_d4_data);
|
|
231
231
|
return MLK_NATIVE_FUNC_SUCCESS;
|
|
232
232
|
}
|
|
233
233
|
|
|
@@ -240,7 +240,7 @@ static MLK_INLINE int mlk_poly_decompress_d10_native(
|
|
|
240
240
|
return MLK_NATIVE_FUNC_FALLBACK;
|
|
241
241
|
}
|
|
242
242
|
|
|
243
|
-
|
|
243
|
+
mlk_poly_decompress_d10_avx2_asm(r, a, mlk_decompress_d10_data);
|
|
244
244
|
return MLK_NATIVE_FUNC_SUCCESS;
|
|
245
245
|
}
|
|
246
246
|
#endif /* MLK_CONFIG_MULTILEVEL_WITH_SHARED || MLKEM_K == 2 || MLKEM_K == 3 */
|
|
@@ -255,7 +255,7 @@ static MLK_INLINE int mlk_poly_compress_d5_native(
|
|
|
255
255
|
return MLK_NATIVE_FUNC_FALLBACK;
|
|
256
256
|
}
|
|
257
257
|
|
|
258
|
-
|
|
258
|
+
mlk_poly_compress_d5_avx2_asm(r, a, mlk_compress_d5_data);
|
|
259
259
|
return MLK_NATIVE_FUNC_SUCCESS;
|
|
260
260
|
}
|
|
261
261
|
|
|
@@ -268,7 +268,7 @@ static MLK_INLINE int mlk_poly_compress_d11_native(
|
|
|
268
268
|
return MLK_NATIVE_FUNC_FALLBACK;
|
|
269
269
|
}
|
|
270
270
|
|
|
271
|
-
|
|
271
|
+
mlk_poly_compress_d11_avx2_asm(r, a, mlk_compress_d11_data);
|
|
272
272
|
return MLK_NATIVE_FUNC_SUCCESS;
|
|
273
273
|
}
|
|
274
274
|
|
|
@@ -281,7 +281,7 @@ static MLK_INLINE int mlk_poly_decompress_d5_native(
|
|
|
281
281
|
return MLK_NATIVE_FUNC_FALLBACK;
|
|
282
282
|
}
|
|
283
283
|
|
|
284
|
-
|
|
284
|
+
mlk_poly_decompress_d5_avx2_asm(r, a, mlk_decompress_d5_data);
|
|
285
285
|
return MLK_NATIVE_FUNC_SUCCESS;
|
|
286
286
|
}
|
|
287
287
|
|
|
@@ -294,7 +294,7 @@ static MLK_INLINE int mlk_poly_decompress_d11_native(
|
|
|
294
294
|
return MLK_NATIVE_FUNC_FALLBACK;
|
|
295
295
|
}
|
|
296
296
|
|
|
297
|
-
|
|
297
|
+
mlk_poly_decompress_d11_avx2_asm(r, a, mlk_decompress_d11_data);
|
|
298
298
|
return MLK_NATIVE_FUNC_SUCCESS;
|
|
299
299
|
}
|
|
300
300
|
#endif /* MLK_CONFIG_MULTILEVEL_WITH_SHARED || MLKEM_K == 4 */
|