pq_crypto 0.3.2 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +56 -0
- data/CHANGELOG.md +62 -0
- data/GET_STARTED.md +366 -40
- data/README.md +76 -233
- data/SECURITY.md +107 -82
- data/ext/pqcrypto/extconf.rb +169 -87
- data/ext/pqcrypto/mldsa_api.h +1 -48
- data/ext/pqcrypto/mlkem_api.h +1 -18
- data/ext/pqcrypto/pq_externalmu.c +89 -204
- data/ext/pqcrypto/pqcrypto_native_api.h +129 -0
- data/ext/pqcrypto/pqcrypto_ruby_secure.c +484 -84
- data/ext/pqcrypto/pqcrypto_secure.c +203 -78
- data/ext/pqcrypto/pqcrypto_secure.h +53 -14
- data/ext/pqcrypto/pqcrypto_version.h +7 -0
- data/ext/pqcrypto/randombytes.h +9 -0
- data/ext/pqcrypto/vendor/.vendored +10 -5
- data/ext/pqcrypto/vendor/mldsa-native/BUILDING.md +105 -0
- data/ext/pqcrypto/vendor/mldsa-native/LICENSE +286 -0
- data/ext/pqcrypto/vendor/mldsa-native/META.yml +24 -0
- data/ext/pqcrypto/vendor/mldsa-native/README.md +221 -0
- data/ext/pqcrypto/vendor/mldsa-native/SECURITY.md +8 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.c +721 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.h +975 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_asm.S +724 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_config.h +723 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/cbmc.h +166 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/common.h +321 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/ct.c +21 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/ct.h +385 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/debug.c +73 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/debug.h +130 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.c +277 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.h +244 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.c +182 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.h +117 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.c +438 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.h +105 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/auto.h +71 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/fips202_native_aarch64.h +62 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_asm.S +376 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_asm.S +204 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_asm.S +259 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_asm.S +1077 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S +987 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccakf1600_round_constants.c +41 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_scalar.h +26 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_v84a.h +35 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x2_v84a.h +37 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_scalar.h +27 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_v84a_scalar.h +36 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/api.h +69 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/README.md +10 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/mve.h +32 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/fips202_native_armv81m.h +20 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S +638 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.c +136 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccakf1600_round_constants.c +52 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/auto.h +29 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.c +488 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.h +16 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/xkcp.h +31 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/meta.h +247 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/aarch64_zetas.c +231 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/arith_native_aarch64.h +150 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/intt.S +753 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l4.S +129 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l5.S +145 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l7.S +177 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/ntt.S +653 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/pointwise_montgomery.S +79 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_caddq_asm.S +53 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_chknorm_asm.S +55 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_32_asm.S +85 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_88_asm.S +85 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_32_asm.S +102 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_88_asm.S +110 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_17_asm.S +72 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_19_asm.S +69 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_table.c +40 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_asm.S +189 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta2_asm.S +135 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta4_asm.S +128 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta_table.c +543 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_table.c +62 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/api.h +649 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/meta.h +23 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/meta.h +315 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/arith_native_x86_64.h +124 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.c +157 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.h +27 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/intt.S +2311 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/ntt.S +2383 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/nttunpack.S +239 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise.S +131 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l4.S +139 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l5.S +155 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l7.S +187 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_caddq_avx2.c +61 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_chknorm_avx2.c +52 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_32_avx2.c +155 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_88_avx2.c +155 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_32_avx2.c +102 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_88_avx2.c +104 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_17_avx2.c +91 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_19_avx2.c +93 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_avx2.c +126 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta2_avx2.c +155 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta4_avx2.c +139 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_table.c +160 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.c +293 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.h +224 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/params.h +77 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.c +991 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.h +393 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.c +946 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.h +360 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.c +877 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.h +725 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/randombytes.h +26 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/reduce.h +139 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/rounding.h +249 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.c +1511 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.h +806 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/symmetric.h +68 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sys.h +268 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/zetas.inc +55 -0
- data/ext/pqcrypto/vendor/mlkem-native/BUILDING.md +104 -0
- data/ext/pqcrypto/vendor/mlkem-native/LICENSE +294 -0
- data/ext/pqcrypto/vendor/mlkem-native/META.yml +30 -0
- data/ext/pqcrypto/vendor/mlkem-native/README.md +223 -0
- data/ext/pqcrypto/vendor/mlkem-native/RELEASE.md +86 -0
- data/ext/pqcrypto/vendor/mlkem-native/SECURITY.md +8 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/README.md +23 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native.c +660 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native.h +538 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native_asm.S +681 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native_config.h +709 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/cbmc.h +174 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/common.h +274 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/compress.c +717 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/compress.h +688 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/debug.c +64 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/debug.h +128 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202.c +251 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202.h +158 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202x4.c +208 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202x4.h +80 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/keccakf1600.c +463 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/keccakf1600.h +98 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/auto.h +70 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/fips202_native_aarch64.h +69 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_asm.S +375 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_asm.S +203 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_asm.S +258 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_asm.S +1076 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S +986 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccakf1600_round_constants.c +46 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x1_scalar.h +25 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x1_v84a.h +34 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x2_v84a.h +35 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x4_v8a_scalar.h +26 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x4_v8a_v84a_scalar.h +35 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/api.h +117 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/README.md +10 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/mve.h +79 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/fips202_native_armv81m.h +35 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S +667 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.c +40 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/keccakf1600_round_constants.c +51 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/state_extract_bytes_x4_mve.S +290 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/state_xor_bytes_x4_mve.S +314 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/auto.h +28 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/keccak_f1600_x4_avx2.h +33 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/src/fips202_native_x86_64.h +41 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/src/keccak_f1600_x4_avx2.S +451 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/src/keccakf1600_constants.c +51 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/indcpa.c +622 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/indcpa.h +156 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/kem.c +446 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/kem.h +326 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/README.md +16 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/meta.h +122 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/aarch64_zetas.c +174 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/arith_native_aarch64.h +177 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/intt.S +628 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/ntt.S +562 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_mulcache_compute_asm.S +127 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_reduce_asm.S +150 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_tobytes_asm.S +117 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_tomont_asm.S +98 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k2.S +261 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k3.S +314 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k4.S +368 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/rej_uniform_asm.S +226 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/rej_uniform_table.c +542 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/api.h +637 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/meta.h +25 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/README.md +11 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/meta.h +128 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/arith_native_riscv64.h +45 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_debug.c +81 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_debug.h +145 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_izetas.inc +27 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_poly.c +805 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_zetas.inc +27 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_zetas_basemul.inc +39 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/README.md +4 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/meta.h +304 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/arith_native_x86_64.h +309 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/compress_consts.c +94 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/compress_consts.h +45 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/consts.c +102 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/consts.h +25 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/intt.S +719 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/mulcache_compute.S +90 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/ntt.S +639 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/nttfrombytes.S +193 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/ntttobytes.S +181 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/nttunpack.S +174 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d10.S +382 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d11.S +448 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d4.S +163 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d5.S +220 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d10.S +228 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d11.S +277 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d4.S +180 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d5.S +192 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/polyvec_basemul_acc_montgomery_cached_asm_k2.S +502 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/polyvec_basemul_acc_montgomery_cached_asm_k3.S +750 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/polyvec_basemul_acc_montgomery_cached_asm_k4.S +998 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/reduce.S +218 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/rej_uniform_asm.S +103 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/rej_uniform_table.c +544 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/tomont.S +155 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/params.h +76 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly.c +572 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly.h +317 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly_k.c +502 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly_k.h +668 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/randombytes.h +60 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/sampling.c +362 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/sampling.h +118 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/symmetric.h +70 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/sys.h +260 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/verify.c +20 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/verify.h +464 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/zetas.inc +30 -0
- data/lib/pq_crypto/algorithm_registry.rb +200 -0
- data/lib/pq_crypto/hybrid_kem.rb +1 -12
- data/lib/pq_crypto/kem.rb +104 -13
- data/lib/pq_crypto/pkcs8.rb +387 -0
- data/lib/pq_crypto/serialization.rb +1 -14
- data/lib/pq_crypto/signature.rb +123 -17
- data/lib/pq_crypto/spki.rb +131 -0
- data/lib/pq_crypto/version.rb +1 -1
- data/lib/pq_crypto.rb +79 -20
- data/script/vendor_libs.rb +88 -155
- metadata +241 -73
- data/ext/pqcrypto/vendor/pqclean/common/aes.c +0 -639
- data/ext/pqcrypto/vendor/pqclean/common/aes.h +0 -64
- data/ext/pqcrypto/vendor/pqclean/common/compat.h +0 -73
- data/ext/pqcrypto/vendor/pqclean/common/crypto_declassify.h +0 -7
- data/ext/pqcrypto/vendor/pqclean/common/fips202.c +0 -928
- data/ext/pqcrypto/vendor/pqclean/common/fips202.h +0 -166
- data/ext/pqcrypto/vendor/pqclean/common/keccak2x/feat.S +0 -168
- data/ext/pqcrypto/vendor/pqclean/common/keccak2x/fips202x2.c +0 -684
- data/ext/pqcrypto/vendor/pqclean/common/keccak2x/fips202x2.h +0 -60
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/KeccakP-1600-times4-SIMD256.c +0 -1028
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/KeccakP-1600-times4-SnP.h +0 -50
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/KeccakP-1600-unrolling.macros +0 -198
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/Makefile +0 -8
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/Makefile.Microsoft_nmake +0 -8
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/SIMD256-config.h +0 -3
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/align.h +0 -34
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/brg_endian.h +0 -142
- data/ext/pqcrypto/vendor/pqclean/common/nistseedexpander.c +0 -101
- data/ext/pqcrypto/vendor/pqclean/common/nistseedexpander.h +0 -39
- data/ext/pqcrypto/vendor/pqclean/common/randombytes.c +0 -355
- data/ext/pqcrypto/vendor/pqclean/common/randombytes.h +0 -27
- data/ext/pqcrypto/vendor/pqclean/common/sha2.c +0 -769
- data/ext/pqcrypto/vendor/pqclean/common/sha2.h +0 -173
- data/ext/pqcrypto/vendor/pqclean/common/sp800-185.c +0 -156
- data/ext/pqcrypto/vendor/pqclean/common/sp800-185.h +0 -27
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/LICENSE +0 -5
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/Makefile +0 -19
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/Makefile.Microsoft_nmake +0 -23
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/api.h +0 -18
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/cbd.c +0 -83
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/cbd.h +0 -11
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/indcpa.c +0 -327
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/indcpa.h +0 -22
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/kem.c +0 -164
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/kem.h +0 -23
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/ntt.c +0 -146
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/ntt.h +0 -14
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/params.h +0 -36
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/poly.c +0 -299
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/poly.h +0 -37
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/polyvec.c +0 -188
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/polyvec.h +0 -26
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/reduce.c +0 -41
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/reduce.h +0 -13
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/symmetric-shake.c +0 -71
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/symmetric.h +0 -30
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/verify.c +0 -67
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/verify.h +0 -13
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/LICENSE +0 -5
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/Makefile +0 -19
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/Makefile.Microsoft_nmake +0 -23
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/api.h +0 -50
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/ntt.c +0 -98
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/ntt.h +0 -10
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/packing.c +0 -261
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/packing.h +0 -31
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/params.h +0 -44
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/poly.c +0 -799
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/poly.h +0 -52
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/polyvec.c +0 -415
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/polyvec.h +0 -65
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/reduce.c +0 -69
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/reduce.h +0 -17
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/rounding.c +0 -92
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/rounding.h +0 -14
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/sign.c +0 -407
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/sign.h +0 -47
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/symmetric-shake.c +0 -26
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/symmetric.h +0 -34
|
@@ -0,0 +1,805 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) The mlkem-native project authors
|
|
3
|
+
* SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
/* === ML-KEM NTT using RISC-V Vector intrinstics */
|
|
7
|
+
|
|
8
|
+
#include "../../../common.h"
|
|
9
|
+
|
|
10
|
+
#if defined(MLK_ARITH_BACKEND_RISCV64) && \
|
|
11
|
+
!defined(MLK_CONFIG_MULTILEVEL_NO_SHARED)
|
|
12
|
+
|
|
13
|
+
#include <riscv_vector.h>
|
|
14
|
+
|
|
15
|
+
#include "arith_native_riscv64.h"
|
|
16
|
+
#include "rv64v_debug.h"
|
|
17
|
+
|
|
18
|
+
/* Montgomery reduction constants */
|
|
19
|
+
/* check-magic: -3327 == signed_mod(pow(MLKEM_Q,-1,2^16), 2^16) */
|
|
20
|
+
#define MLK_RVV_QI -3327
|
|
21
|
+
|
|
22
|
+
/* check-magic: 2285 == unsigned_mod(2^16, MLKEM_Q) */
|
|
23
|
+
#define MLK_RVV_MONT_R1 2285
|
|
24
|
+
|
|
25
|
+
/* check-magic: 1353 == pow(2, 32, MLKEM_Q) */
|
|
26
|
+
#define MLK_RVV_MONT_R2 1353
|
|
27
|
+
|
|
28
|
+
/* check-magic: 1441 == pow(2,32-7,MLKEM_Q) */
|
|
29
|
+
#define MLK_RVV_MONT_NR 1441
|
|
30
|
+
|
|
31
|
+
static inline vint16m1_t fq_redc(vint16m1_t rh, vint16m1_t rl, size_t vl)
|
|
32
|
+
{
|
|
33
|
+
vint16m1_t t;
|
|
34
|
+
|
|
35
|
+
t = __riscv_vmul_vx_i16m1(rl, MLK_RVV_QI, vl); /* t = l * Q^-1 */
|
|
36
|
+
t = __riscv_vmulh_vx_i16m1(t, MLKEM_Q, vl); /* t = (t*Q) / R */
|
|
37
|
+
t = __riscv_vsub_vv_i16m1(rh, t, vl); /* t = h - t */
|
|
38
|
+
|
|
39
|
+
return t;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/* Narrowing reduction */
|
|
43
|
+
|
|
44
|
+
static inline vint16m1_t fq_redc2(vint32m2_t z, size_t vl)
|
|
45
|
+
{
|
|
46
|
+
vint16m1_t t;
|
|
47
|
+
|
|
48
|
+
t = __riscv_vmul_vx_i16m1(__riscv_vncvt_x_x_w_i16m1(z, vl), MLK_RVV_QI,
|
|
49
|
+
vl); /* t = l * Q^-1 */
|
|
50
|
+
z = __riscv_vsub_vv_i32m2(z, __riscv_vwmul_vx_i32m2(t, MLKEM_Q, vl),
|
|
51
|
+
vl); /* x = (x - (t*Q)) */
|
|
52
|
+
t = __riscv_vnsra_wx_i16m1(z, 16, vl);
|
|
53
|
+
|
|
54
|
+
return t;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/* Narrowing Barrett */
|
|
58
|
+
|
|
59
|
+
static inline vint16m1_t fq_barrett(vint16m1_t a, size_t vl)
|
|
60
|
+
{
|
|
61
|
+
vint16m1_t t;
|
|
62
|
+
const int16_t v = ((1 << 26) + MLKEM_Q / 2) / MLKEM_Q;
|
|
63
|
+
|
|
64
|
+
t = __riscv_vmulh_vx_i16m1(a, v, vl);
|
|
65
|
+
t = __riscv_vadd_vx_i16m1(t, 1 << (25 - 16), vl);
|
|
66
|
+
t = __riscv_vsra_vx_i16m1(t, 26 - 16, vl);
|
|
67
|
+
t = __riscv_vmul_vx_i16m1(t, MLKEM_Q, vl);
|
|
68
|
+
t = __riscv_vsub_vv_i16m1(a, t, vl);
|
|
69
|
+
|
|
70
|
+
mlk_assert_abs_bound_int16m1(t, vl, MLKEM_Q_HALF);
|
|
71
|
+
return t;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/* Conditionally add Q (if negative) */
|
|
75
|
+
|
|
76
|
+
static inline vint16m1_t fq_cadd(vint16m1_t rx, size_t vl)
|
|
77
|
+
{
|
|
78
|
+
vbool16_t bn;
|
|
79
|
+
|
|
80
|
+
bn = __riscv_vmslt_vx_i16m1_b16(rx, 0, vl); /* if x < 0: */
|
|
81
|
+
rx = __riscv_vadd_vx_i16m1_mu(bn, rx, rx, MLKEM_Q, vl); /* x += Q */
|
|
82
|
+
return rx;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/* Conditionally subtract Q (if Q or above) */
|
|
86
|
+
|
|
87
|
+
static inline vint16m1_t fq_csub(vint16m1_t rx, size_t vl)
|
|
88
|
+
{
|
|
89
|
+
vbool16_t bn;
|
|
90
|
+
|
|
91
|
+
bn = __riscv_vmsge_vx_i16m1_b16(rx, MLKEM_Q, vl); /* if x >= Q: */
|
|
92
|
+
rx = __riscv_vsub_vx_i16m1_mu(bn, rx, rx, MLKEM_Q, vl); /* x -= Q */
|
|
93
|
+
return rx;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/* Montgomery multiply: vector-vector */
|
|
97
|
+
|
|
98
|
+
static inline vint16m1_t fq_mul_vv(vint16m1_t rx, vint16m1_t ry, size_t vl)
|
|
99
|
+
{
|
|
100
|
+
vint16m1_t rl, rh;
|
|
101
|
+
|
|
102
|
+
rh = __riscv_vmulh_vv_i16m1(rx, ry, vl); /* h = (x * y) / R */
|
|
103
|
+
rl = __riscv_vmul_vv_i16m1(rx, ry, vl); /* l = (x * y) % R */
|
|
104
|
+
return fq_redc(rh, rl, vl);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/* Montgomery multiply: vector-scalar */
|
|
108
|
+
|
|
109
|
+
static inline vint16m1_t fq_mul_vx(vint16m1_t rx, int16_t ry, size_t vl)
|
|
110
|
+
{
|
|
111
|
+
vint16m1_t rl, rh;
|
|
112
|
+
|
|
113
|
+
rh = __riscv_vmulh_vx_i16m1(rx, ry, vl); /* h = (x * y) / R */
|
|
114
|
+
rl = __riscv_vmul_vx_i16m1(rx, ry, vl); /* l = (x * y) % R */
|
|
115
|
+
return fq_redc(rh, rl, vl);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
/* full normalization */
|
|
119
|
+
|
|
120
|
+
static inline vint16m1_t fq_mulq_vx(vint16m1_t rx, int16_t ry, size_t vl)
|
|
121
|
+
{
|
|
122
|
+
vint16m1_t result;
|
|
123
|
+
|
|
124
|
+
result = fq_mul_vx(rx, ry, vl);
|
|
125
|
+
|
|
126
|
+
mlk_assert_abs_bound_int16m1(result, vl, MLKEM_Q);
|
|
127
|
+
return result;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/* create a permutation for swapping index bits a and b, a < b */
|
|
131
|
+
|
|
132
|
+
static vuint16m2_t bitswap_perm(unsigned a, unsigned b, size_t vl)
|
|
133
|
+
{
|
|
134
|
+
const vuint16m2_t v2id = __riscv_vid_v_u16m2(vl);
|
|
135
|
+
|
|
136
|
+
vuint16m2_t xa, xb;
|
|
137
|
+
xa = __riscv_vsrl_vx_u16m2(v2id, b - a, vl);
|
|
138
|
+
xa = __riscv_vxor_vv_u16m2(xa, v2id, vl);
|
|
139
|
+
xa = __riscv_vand_vx_u16m2(xa, (1 << a), vl);
|
|
140
|
+
xb = __riscv_vsll_vx_u16m2(xa, b - a, vl);
|
|
141
|
+
xa = __riscv_vxor_vv_u16m2(xa, xb, vl);
|
|
142
|
+
xa = __riscv_vxor_vv_u16m2(v2id, xa, vl);
|
|
143
|
+
return xa;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/*************************************************
|
|
147
|
+
* Name: poly_ntt
|
|
148
|
+
*
|
|
149
|
+
* Description: Computes negacyclic number-theoretic transform (NTT) of
|
|
150
|
+
* a polynomial in place;
|
|
151
|
+
* inputs assumed to be in normal order, output in
|
|
152
|
+
* bitreversed order
|
|
153
|
+
*
|
|
154
|
+
* Arguments: - uint16_t *r: pointer to in/output polynomial
|
|
155
|
+
**************************************************/
|
|
156
|
+
|
|
157
|
+
/* Forward / Cooley-Tukey butterfly operation */
|
|
158
|
+
|
|
159
|
+
#define MLK_RVV_CT_BFLY_FX(u0, u1, ut, uc, vl, layer) \
|
|
160
|
+
{ \
|
|
161
|
+
mlk_assert_abs_bound(&uc, 1, MLKEM_Q_HALF); \
|
|
162
|
+
\
|
|
163
|
+
ut = fq_mul_vx(u1, uc, vl); \
|
|
164
|
+
mlk_assert_abs_bound_int16m1(ut, vl, MLKEM_Q); \
|
|
165
|
+
\
|
|
166
|
+
u1 = __riscv_vsub_vv_i16m1(u0, ut, vl); \
|
|
167
|
+
u0 = __riscv_vadd_vv_i16m1(u0, ut, vl); \
|
|
168
|
+
mlk_assert_abs_bound_int16m1(u0, vl, (layer + 1) * MLKEM_Q); \
|
|
169
|
+
mlk_assert_abs_bound_int16m1(u1, vl, (layer + 1) * MLKEM_Q); \
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
#define MLK_RVV_CT_BFLY_FV(u0, u1, ut, uc, vl, layer) \
|
|
173
|
+
{ \
|
|
174
|
+
mlk_assert_abs_bound_int16m1(uc, vl, MLKEM_Q_HALF); \
|
|
175
|
+
\
|
|
176
|
+
ut = fq_mul_vv(u1, uc, vl); \
|
|
177
|
+
mlk_assert_abs_bound_int16m1(ut, vl, MLKEM_Q); \
|
|
178
|
+
\
|
|
179
|
+
u1 = __riscv_vsub_vv_i16m1(u0, ut, vl); \
|
|
180
|
+
u0 = __riscv_vadd_vv_i16m1(u0, ut, vl); \
|
|
181
|
+
mlk_assert_abs_bound_int16m1(u0, vl, (layer + 1) * MLKEM_Q); \
|
|
182
|
+
mlk_assert_abs_bound_int16m1(u1, vl, (layer + 1) * MLKEM_Q); \
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
static vint16m2_t mlk_rv64v_ntt2(vint16m2_t vp, vint16m1_t cz)
|
|
186
|
+
{
|
|
187
|
+
size_t vl = 16; /* We work with 256-bit vectors of 16x16-bit elements */
|
|
188
|
+
size_t vl2 = 2 * vl;
|
|
189
|
+
|
|
190
|
+
const vuint16m2_t v2p8 = bitswap_perm(3, 4, vl2);
|
|
191
|
+
const vuint16m2_t v2p4 = bitswap_perm(2, 4, vl2);
|
|
192
|
+
const vuint16m2_t v2p2 = bitswap_perm(1, 4, vl2);
|
|
193
|
+
|
|
194
|
+
/* p1 = p8(p4(p2)) */
|
|
195
|
+
const vuint16m2_t v2p1 = __riscv_vrgather_vv_u16m2(
|
|
196
|
+
__riscv_vrgather_vv_u16m2(v2p2, v2p4, vl2), v2p8, vl2);
|
|
197
|
+
|
|
198
|
+
const vuint16m1_t vid = __riscv_vid_v_u16m1(vl);
|
|
199
|
+
const vuint16m1_t cs8 =
|
|
200
|
+
__riscv_vadd_vx_u16m1(__riscv_vsrl_vx_u16m1(vid, 3, vl), 2, vl);
|
|
201
|
+
const vuint16m1_t cs4 =
|
|
202
|
+
__riscv_vadd_vx_u16m1(__riscv_vsrl_vx_u16m1(vid, 2, vl), 2 + 2, vl);
|
|
203
|
+
const vuint16m1_t cs2 =
|
|
204
|
+
__riscv_vadd_vx_u16m1(__riscv_vsrl_vx_u16m1(vid, 1, vl), 2 + 2 + 4, vl);
|
|
205
|
+
|
|
206
|
+
vint16m1_t vt, c0, t0, t1;
|
|
207
|
+
|
|
208
|
+
/* swap 8 */
|
|
209
|
+
vp = __riscv_vrgatherei16_vv_i16m2(vp, v2p8, vl2);
|
|
210
|
+
t0 = __riscv_vget_v_i16m2_i16m1(vp, 0);
|
|
211
|
+
t1 = __riscv_vget_v_i16m2_i16m1(vp, 1);
|
|
212
|
+
|
|
213
|
+
c0 = __riscv_vrgather_vv_i16m1(cz, cs8, vl);
|
|
214
|
+
MLK_RVV_CT_BFLY_FV(t0, t1, vt, c0, vl, 5);
|
|
215
|
+
|
|
216
|
+
/* swap 4 */
|
|
217
|
+
vp = __riscv_vcreate_v_i16m1_i16m2(t0, t1);
|
|
218
|
+
vp = __riscv_vrgatherei16_vv_i16m2(vp, v2p4, vl2);
|
|
219
|
+
t0 = __riscv_vget_v_i16m2_i16m1(vp, 0);
|
|
220
|
+
t1 = __riscv_vget_v_i16m2_i16m1(vp, 1);
|
|
221
|
+
|
|
222
|
+
c0 = __riscv_vrgather_vv_i16m1(cz, cs4, vl);
|
|
223
|
+
MLK_RVV_CT_BFLY_FV(t0, t1, vt, c0, vl, 6);
|
|
224
|
+
|
|
225
|
+
/* swap 2 */
|
|
226
|
+
vp = __riscv_vcreate_v_i16m1_i16m2(t0, t1);
|
|
227
|
+
vp = __riscv_vrgatherei16_vv_i16m2(vp, v2p2, vl2);
|
|
228
|
+
t0 = __riscv_vget_v_i16m2_i16m1(vp, 0);
|
|
229
|
+
t1 = __riscv_vget_v_i16m2_i16m1(vp, 1);
|
|
230
|
+
|
|
231
|
+
c0 = __riscv_vrgather_vv_i16m1(cz, cs2, vl);
|
|
232
|
+
MLK_RVV_CT_BFLY_FV(t0, t1, vt, c0, vl, 7);
|
|
233
|
+
|
|
234
|
+
/* reorganize */
|
|
235
|
+
vp = __riscv_vcreate_v_i16m1_i16m2(t0, t1);
|
|
236
|
+
vp = __riscv_vrgatherei16_vv_i16m2(vp, v2p1, vl2);
|
|
237
|
+
|
|
238
|
+
return vp;
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
/* Only for VLEN=256 for now */
|
|
242
|
+
void mlk_rv64v_poly_ntt(int16_t *r)
|
|
243
|
+
{
|
|
244
|
+
/* zetas can be compiled into vector constants; don't pass as a pointer */
|
|
245
|
+
#include "rv64v_zetas.inc"
|
|
246
|
+
|
|
247
|
+
size_t vl = 16; /* We work with 256-bit vectors of 16x16-bit elements */
|
|
248
|
+
size_t vl2 = 2 * vl;
|
|
249
|
+
|
|
250
|
+
vint16m1_t vt;
|
|
251
|
+
vint16m1_t v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, va, vb, vc, vd, ve, vf;
|
|
252
|
+
|
|
253
|
+
const vint16m1_t z0 = __riscv_vle16_v_i16m1(&zeta[0x00], vl);
|
|
254
|
+
const vint16m1_t z2 = __riscv_vle16_v_i16m1(&zeta[0x10], vl);
|
|
255
|
+
const vint16m1_t z4 = __riscv_vle16_v_i16m1(&zeta[0x20], vl);
|
|
256
|
+
const vint16m1_t z6 = __riscv_vle16_v_i16m1(&zeta[0x30], vl);
|
|
257
|
+
const vint16m1_t z8 = __riscv_vle16_v_i16m1(&zeta[0x40], vl);
|
|
258
|
+
const vint16m1_t za = __riscv_vle16_v_i16m1(&zeta[0x50], vl);
|
|
259
|
+
const vint16m1_t zc = __riscv_vle16_v_i16m1(&zeta[0x60], vl);
|
|
260
|
+
const vint16m1_t ze = __riscv_vle16_v_i16m1(&zeta[0x70], vl);
|
|
261
|
+
|
|
262
|
+
v0 = __riscv_vle16_v_i16m1(&r[0x00], vl);
|
|
263
|
+
v1 = __riscv_vle16_v_i16m1(&r[0x10], vl);
|
|
264
|
+
v2 = __riscv_vle16_v_i16m1(&r[0x20], vl);
|
|
265
|
+
v3 = __riscv_vle16_v_i16m1(&r[0x30], vl);
|
|
266
|
+
v4 = __riscv_vle16_v_i16m1(&r[0x40], vl);
|
|
267
|
+
v5 = __riscv_vle16_v_i16m1(&r[0x50], vl);
|
|
268
|
+
v6 = __riscv_vle16_v_i16m1(&r[0x60], vl);
|
|
269
|
+
v7 = __riscv_vle16_v_i16m1(&r[0x70], vl);
|
|
270
|
+
v8 = __riscv_vle16_v_i16m1(&r[0x80], vl);
|
|
271
|
+
v9 = __riscv_vle16_v_i16m1(&r[0x90], vl);
|
|
272
|
+
va = __riscv_vle16_v_i16m1(&r[0xa0], vl);
|
|
273
|
+
vb = __riscv_vle16_v_i16m1(&r[0xb0], vl);
|
|
274
|
+
vc = __riscv_vle16_v_i16m1(&r[0xc0], vl);
|
|
275
|
+
vd = __riscv_vle16_v_i16m1(&r[0xd0], vl);
|
|
276
|
+
ve = __riscv_vle16_v_i16m1(&r[0xe0], vl);
|
|
277
|
+
vf = __riscv_vle16_v_i16m1(&r[0xf0], vl);
|
|
278
|
+
|
|
279
|
+
MLK_RVV_CT_BFLY_FX(v0, v8, vt, zeta[0x01], vl, 1);
|
|
280
|
+
MLK_RVV_CT_BFLY_FX(v1, v9, vt, zeta[0x01], vl, 1);
|
|
281
|
+
MLK_RVV_CT_BFLY_FX(v2, va, vt, zeta[0x01], vl, 1);
|
|
282
|
+
MLK_RVV_CT_BFLY_FX(v3, vb, vt, zeta[0x01], vl, 1);
|
|
283
|
+
MLK_RVV_CT_BFLY_FX(v4, vc, vt, zeta[0x01], vl, 1);
|
|
284
|
+
MLK_RVV_CT_BFLY_FX(v5, vd, vt, zeta[0x01], vl, 1);
|
|
285
|
+
MLK_RVV_CT_BFLY_FX(v6, ve, vt, zeta[0x01], vl, 1);
|
|
286
|
+
MLK_RVV_CT_BFLY_FX(v7, vf, vt, zeta[0x01], vl, 1);
|
|
287
|
+
|
|
288
|
+
MLK_RVV_CT_BFLY_FX(v0, v4, vt, zeta[0x10], vl, 2);
|
|
289
|
+
MLK_RVV_CT_BFLY_FX(v1, v5, vt, zeta[0x10], vl, 2);
|
|
290
|
+
MLK_RVV_CT_BFLY_FX(v2, v6, vt, zeta[0x10], vl, 2);
|
|
291
|
+
MLK_RVV_CT_BFLY_FX(v3, v7, vt, zeta[0x10], vl, 2);
|
|
292
|
+
MLK_RVV_CT_BFLY_FX(v8, vc, vt, zeta[0x11], vl, 2);
|
|
293
|
+
MLK_RVV_CT_BFLY_FX(v9, vd, vt, zeta[0x11], vl, 2);
|
|
294
|
+
MLK_RVV_CT_BFLY_FX(va, ve, vt, zeta[0x11], vl, 2);
|
|
295
|
+
MLK_RVV_CT_BFLY_FX(vb, vf, vt, zeta[0x11], vl, 2);
|
|
296
|
+
|
|
297
|
+
MLK_RVV_CT_BFLY_FX(v0, v2, vt, zeta[0x20], vl, 3);
|
|
298
|
+
MLK_RVV_CT_BFLY_FX(v1, v3, vt, zeta[0x20], vl, 3);
|
|
299
|
+
MLK_RVV_CT_BFLY_FX(v4, v6, vt, zeta[0x21], vl, 3);
|
|
300
|
+
MLK_RVV_CT_BFLY_FX(v5, v7, vt, zeta[0x21], vl, 3);
|
|
301
|
+
MLK_RVV_CT_BFLY_FX(v8, va, vt, zeta[0x30], vl, 3);
|
|
302
|
+
MLK_RVV_CT_BFLY_FX(v9, vb, vt, zeta[0x30], vl, 3);
|
|
303
|
+
MLK_RVV_CT_BFLY_FX(vc, ve, vt, zeta[0x31], vl, 3);
|
|
304
|
+
MLK_RVV_CT_BFLY_FX(vd, vf, vt, zeta[0x31], vl, 3);
|
|
305
|
+
|
|
306
|
+
MLK_RVV_CT_BFLY_FX(v0, v1, vt, zeta[0x40], vl, 4);
|
|
307
|
+
MLK_RVV_CT_BFLY_FX(v2, v3, vt, zeta[0x41], vl, 4);
|
|
308
|
+
MLK_RVV_CT_BFLY_FX(v4, v5, vt, zeta[0x50], vl, 4);
|
|
309
|
+
MLK_RVV_CT_BFLY_FX(v6, v7, vt, zeta[0x51], vl, 4);
|
|
310
|
+
MLK_RVV_CT_BFLY_FX(v8, v9, vt, zeta[0x60], vl, 4);
|
|
311
|
+
MLK_RVV_CT_BFLY_FX(va, vb, vt, zeta[0x61], vl, 4);
|
|
312
|
+
MLK_RVV_CT_BFLY_FX(vc, vd, vt, zeta[0x70], vl, 4);
|
|
313
|
+
MLK_RVV_CT_BFLY_FX(ve, vf, vt, zeta[0x71], vl, 4);
|
|
314
|
+
|
|
315
|
+
__riscv_vse16_v_i16m2(
|
|
316
|
+
&r[0x00], mlk_rv64v_ntt2(__riscv_vcreate_v_i16m1_i16m2(v0, v1), z0), vl2);
|
|
317
|
+
__riscv_vse16_v_i16m2(
|
|
318
|
+
&r[0x20], mlk_rv64v_ntt2(__riscv_vcreate_v_i16m1_i16m2(v2, v3), z2), vl2);
|
|
319
|
+
__riscv_vse16_v_i16m2(
|
|
320
|
+
&r[0x40], mlk_rv64v_ntt2(__riscv_vcreate_v_i16m1_i16m2(v4, v5), z4), vl2);
|
|
321
|
+
__riscv_vse16_v_i16m2(
|
|
322
|
+
&r[0x60], mlk_rv64v_ntt2(__riscv_vcreate_v_i16m1_i16m2(v6, v7), z6), vl2);
|
|
323
|
+
__riscv_vse16_v_i16m2(
|
|
324
|
+
&r[0x80], mlk_rv64v_ntt2(__riscv_vcreate_v_i16m1_i16m2(v8, v9), z8), vl2);
|
|
325
|
+
__riscv_vse16_v_i16m2(
|
|
326
|
+
&r[0xa0], mlk_rv64v_ntt2(__riscv_vcreate_v_i16m1_i16m2(va, vb), za), vl2);
|
|
327
|
+
__riscv_vse16_v_i16m2(
|
|
328
|
+
&r[0xc0], mlk_rv64v_ntt2(__riscv_vcreate_v_i16m1_i16m2(vc, vd), zc), vl2);
|
|
329
|
+
__riscv_vse16_v_i16m2(
|
|
330
|
+
&r[0xe0], mlk_rv64v_ntt2(__riscv_vcreate_v_i16m1_i16m2(ve, vf), ze), vl2);
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
/*************************************************
|
|
334
|
+
* Name: poly_invntt_tomont
|
|
335
|
+
*
|
|
336
|
+
* Description: Computes inverse of negacyclic number-theoretic transform (NTT)
|
|
337
|
+
* of a polynomial in place;
|
|
338
|
+
* inputs assumed to be in bitreversed order,
|
|
339
|
+
* output in normal order
|
|
340
|
+
*
|
|
341
|
+
* Arguments: - uint16_t *r: pointer to in/output polynomial
|
|
342
|
+
**************************************************/
|
|
343
|
+
|
|
344
|
+
/* Reverse / Gentleman-Sande butterfly operation */
|
|
345
|
+
|
|
346
|
+
#define MLK_RVV_GS_BFLY_RX(u0, u1, ut, uc, vl) \
|
|
347
|
+
{ \
|
|
348
|
+
ut = __riscv_vsub_vv_i16m1(u0, u1, vl); \
|
|
349
|
+
u0 = __riscv_vadd_vv_i16m1(u0, u1, vl); \
|
|
350
|
+
u1 = fq_mul_vx(ut, uc, vl); \
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
#define MLK_RVV_GS_BFLY_RV(u0, u1, ut, uc, vl) \
|
|
354
|
+
{ \
|
|
355
|
+
ut = __riscv_vsub_vv_i16m1(u0, u1, vl); \
|
|
356
|
+
u0 = __riscv_vadd_vv_i16m1(u0, u1, vl); \
|
|
357
|
+
u1 = fq_mul_vv(ut, uc, vl); \
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
static vint16m2_t mlk_rv64v_intt2(vint16m2_t vp, vint16m1_t cz)
|
|
361
|
+
{
|
|
362
|
+
size_t vl = 16; /* We work with 256-bit vectors of 16x16-bit elements */
|
|
363
|
+
size_t vl2 = 2 * vl;
|
|
364
|
+
|
|
365
|
+
const vuint16m2_t v2p8 = bitswap_perm(3, 4, vl2);
|
|
366
|
+
const vuint16m2_t v2p4 = bitswap_perm(2, 4, vl2);
|
|
367
|
+
const vuint16m2_t v2p2 = bitswap_perm(1, 4, vl2);
|
|
368
|
+
|
|
369
|
+
/* p0 = p2(p4(p8)) */
|
|
370
|
+
const vuint16m2_t v2p0 = __riscv_vrgather_vv_u16m2(
|
|
371
|
+
__riscv_vrgather_vv_u16m2(v2p8, v2p4, vl2), v2p2, vl2);
|
|
372
|
+
|
|
373
|
+
const vuint16m1_t vid = __riscv_vid_v_u16m1(vl);
|
|
374
|
+
const vuint16m1_t cs8 =
|
|
375
|
+
__riscv_vadd_vx_u16m1(__riscv_vsrl_vx_u16m1(vid, 3, vl), 2, vl);
|
|
376
|
+
const vuint16m1_t cs4 =
|
|
377
|
+
__riscv_vadd_vx_u16m1(__riscv_vsrl_vx_u16m1(vid, 2, vl), 2 + 2, vl);
|
|
378
|
+
const vuint16m1_t cs2 =
|
|
379
|
+
__riscv_vadd_vx_u16m1(__riscv_vsrl_vx_u16m1(vid, 1, vl), 2 + 2 + 4, vl);
|
|
380
|
+
|
|
381
|
+
vint16m1_t t0, t1, c0, vt;
|
|
382
|
+
|
|
383
|
+
/* initial permute */
|
|
384
|
+
vp = __riscv_vrgatherei16_vv_i16m2(vp, v2p0, vl2);
|
|
385
|
+
t0 = __riscv_vget_v_i16m2_i16m1(vp, 0);
|
|
386
|
+
t1 = __riscv_vget_v_i16m2_i16m1(vp, 1);
|
|
387
|
+
|
|
388
|
+
/* pre-scale */
|
|
389
|
+
t0 = fq_mulq_vx(t0, MLK_RVV_MONT_NR, vl);
|
|
390
|
+
t1 = fq_mulq_vx(t1, MLK_RVV_MONT_NR, vl);
|
|
391
|
+
|
|
392
|
+
/* absolute bounds: < t0 < q, t1 < q */
|
|
393
|
+
mlk_assert_abs_bound_int16m1(t0, vl, MLKEM_Q);
|
|
394
|
+
mlk_assert_abs_bound_int16m1(t1, vl, MLKEM_Q);
|
|
395
|
+
|
|
396
|
+
c0 = __riscv_vrgather_vv_i16m1(cz, cs2, vl);
|
|
397
|
+
MLK_RVV_GS_BFLY_RV(t0, t1, vt, c0, vl);
|
|
398
|
+
|
|
399
|
+
/* absolute bounds: < t0 < 2*q, t1 < q */
|
|
400
|
+
mlk_assert_abs_bound_int16m1(t0, vl, 2 * MLKEM_Q);
|
|
401
|
+
mlk_assert_abs_bound_int16m1(t1, vl, MLKEM_Q);
|
|
402
|
+
|
|
403
|
+
/* swap 2 */
|
|
404
|
+
vp = __riscv_vcreate_v_i16m1_i16m2(t0, t1);
|
|
405
|
+
vp = __riscv_vrgatherei16_vv_i16m2(vp, v2p2, vl2);
|
|
406
|
+
t0 = __riscv_vget_v_i16m2_i16m1(vp, 0);
|
|
407
|
+
t1 = __riscv_vget_v_i16m2_i16m1(vp, 1);
|
|
408
|
+
c0 = __riscv_vrgather_vv_i16m1(cz, cs4, vl);
|
|
409
|
+
MLK_RVV_GS_BFLY_RV(t0, t1, vt, c0, vl);
|
|
410
|
+
|
|
411
|
+
/* absolute bounds: t0 < 4*q, t1 < q */
|
|
412
|
+
mlk_assert_abs_bound_int16m1(t0, vl, 4 * MLKEM_Q);
|
|
413
|
+
mlk_assert_abs_bound_int16m1(t1, vl, MLKEM_Q);
|
|
414
|
+
|
|
415
|
+
/* swap 4 */
|
|
416
|
+
vp = __riscv_vcreate_v_i16m1_i16m2(t0, t1);
|
|
417
|
+
vp = __riscv_vrgatherei16_vv_i16m2(vp, v2p4, vl2);
|
|
418
|
+
t0 = __riscv_vget_v_i16m2_i16m1(vp, 0);
|
|
419
|
+
t1 = __riscv_vget_v_i16m2_i16m1(vp, 1);
|
|
420
|
+
c0 = __riscv_vrgather_vv_i16m1(cz, cs8, vl);
|
|
421
|
+
MLK_RVV_GS_BFLY_RV(t0, t1, vt, c0, vl);
|
|
422
|
+
|
|
423
|
+
/* absolute bounds: < 8*q */
|
|
424
|
+
mlk_assert_abs_bound_int16m1(t0, vl, 8 * MLKEM_Q);
|
|
425
|
+
mlk_assert_abs_bound_int16m1(t1, vl, MLKEM_Q);
|
|
426
|
+
|
|
427
|
+
t0 = fq_mulq_vx(t0, MLK_RVV_MONT_R1, vl);
|
|
428
|
+
|
|
429
|
+
/* absolute bounds: < q */
|
|
430
|
+
mlk_assert_abs_bound_int16m1(t0, vl, MLKEM_Q);
|
|
431
|
+
mlk_assert_abs_bound_int16m1(t1, vl, MLKEM_Q);
|
|
432
|
+
|
|
433
|
+
/* swap 8 */
|
|
434
|
+
vp = __riscv_vcreate_v_i16m1_i16m2(t0, t1);
|
|
435
|
+
vp = __riscv_vrgatherei16_vv_i16m2(vp, v2p8, vl2);
|
|
436
|
+
|
|
437
|
+
return vp;
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
#define MLK_RV64V_ABS_BOUNDS16(vl, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, va, \
|
|
441
|
+
vb, vc, vd, ve, vf, b0, b1, b2, b3, b4, b5, b6, \
|
|
442
|
+
b7, b8, b9, ba, bb, bc, bd, be, bf) \
|
|
443
|
+
do \
|
|
444
|
+
{ \
|
|
445
|
+
mlk_assert_abs_bound_int16m1(v0, vl, (b0) * MLKEM_Q); \
|
|
446
|
+
mlk_assert_abs_bound_int16m1(v1, vl, (b1) * MLKEM_Q); \
|
|
447
|
+
mlk_assert_abs_bound_int16m1(v2, vl, (b2) * MLKEM_Q); \
|
|
448
|
+
mlk_assert_abs_bound_int16m1(v3, vl, (b3) * MLKEM_Q); \
|
|
449
|
+
mlk_assert_abs_bound_int16m1(v4, vl, (b4) * MLKEM_Q); \
|
|
450
|
+
mlk_assert_abs_bound_int16m1(v5, vl, (b5) * MLKEM_Q); \
|
|
451
|
+
mlk_assert_abs_bound_int16m1(v6, vl, (b6) * MLKEM_Q); \
|
|
452
|
+
mlk_assert_abs_bound_int16m1(v7, vl, (b7) * MLKEM_Q); \
|
|
453
|
+
mlk_assert_abs_bound_int16m1(v8, vl, (b8) * MLKEM_Q); \
|
|
454
|
+
mlk_assert_abs_bound_int16m1(v9, vl, (b9) * MLKEM_Q); \
|
|
455
|
+
mlk_assert_abs_bound_int16m1(va, vl, (ba) * MLKEM_Q); \
|
|
456
|
+
mlk_assert_abs_bound_int16m1(vb, vl, (bb) * MLKEM_Q); \
|
|
457
|
+
mlk_assert_abs_bound_int16m1(vc, vl, (bc) * MLKEM_Q); \
|
|
458
|
+
mlk_assert_abs_bound_int16m1(vd, vl, (bd) * MLKEM_Q); \
|
|
459
|
+
mlk_assert_abs_bound_int16m1(ve, vl, (be) * MLKEM_Q); \
|
|
460
|
+
mlk_assert_abs_bound_int16m1(vf, vl, (bf) * MLKEM_Q); \
|
|
461
|
+
} while (0)
|
|
462
|
+
|
|
463
|
+
|
|
464
|
+
/* Only for VLEN=256 for now */
|
|
465
|
+
void mlk_rv64v_poly_invntt_tomont(int16_t *r)
|
|
466
|
+
{
|
|
467
|
+
/* zetas can be compiled into vector constants; don't pass as a pointer */
|
|
468
|
+
#include "rv64v_izetas.inc"
|
|
469
|
+
|
|
470
|
+
size_t vl = 16; /* We work with 256-bit vectors of 16x16-bit elements */
|
|
471
|
+
size_t vl2 = 2 * vl;
|
|
472
|
+
|
|
473
|
+
const vint16m1_t z0 = __riscv_vle16_v_i16m1(&izeta[0x00], vl);
|
|
474
|
+
const vint16m1_t z2 = __riscv_vle16_v_i16m1(&izeta[0x10], vl);
|
|
475
|
+
const vint16m1_t z4 = __riscv_vle16_v_i16m1(&izeta[0x20], vl);
|
|
476
|
+
const vint16m1_t z6 = __riscv_vle16_v_i16m1(&izeta[0x30], vl);
|
|
477
|
+
const vint16m1_t z8 = __riscv_vle16_v_i16m1(&izeta[0x40], vl);
|
|
478
|
+
const vint16m1_t za = __riscv_vle16_v_i16m1(&izeta[0x50], vl);
|
|
479
|
+
const vint16m1_t zc = __riscv_vle16_v_i16m1(&izeta[0x60], vl);
|
|
480
|
+
const vint16m1_t ze = __riscv_vle16_v_i16m1(&izeta[0x70], vl);
|
|
481
|
+
|
|
482
|
+
vint16m1_t vt;
|
|
483
|
+
vint16m1_t v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, va, vb, vc, vd, ve, vf;
|
|
484
|
+
vint16m2_t vp;
|
|
485
|
+
|
|
486
|
+
vp = mlk_rv64v_intt2(__riscv_vle16_v_i16m2(&r[0x00], vl2), z0);
|
|
487
|
+
v0 = __riscv_vget_v_i16m2_i16m1(vp, 0);
|
|
488
|
+
v1 = __riscv_vget_v_i16m2_i16m1(vp, 1);
|
|
489
|
+
|
|
490
|
+
vp = mlk_rv64v_intt2(__riscv_vle16_v_i16m2(&r[0x20], vl2), z2);
|
|
491
|
+
v2 = __riscv_vget_v_i16m2_i16m1(vp, 0);
|
|
492
|
+
v3 = __riscv_vget_v_i16m2_i16m1(vp, 1);
|
|
493
|
+
|
|
494
|
+
vp = mlk_rv64v_intt2(__riscv_vle16_v_i16m2(&r[0x40], vl2), z4);
|
|
495
|
+
v4 = __riscv_vget_v_i16m2_i16m1(vp, 0);
|
|
496
|
+
v5 = __riscv_vget_v_i16m2_i16m1(vp, 1);
|
|
497
|
+
|
|
498
|
+
vp = mlk_rv64v_intt2(__riscv_vle16_v_i16m2(&r[0x60], vl2), z6);
|
|
499
|
+
v6 = __riscv_vget_v_i16m2_i16m1(vp, 0);
|
|
500
|
+
v7 = __riscv_vget_v_i16m2_i16m1(vp, 1);
|
|
501
|
+
|
|
502
|
+
vp = mlk_rv64v_intt2(__riscv_vle16_v_i16m2(&r[0x80], vl2), z8);
|
|
503
|
+
v8 = __riscv_vget_v_i16m2_i16m1(vp, 0);
|
|
504
|
+
v9 = __riscv_vget_v_i16m2_i16m1(vp, 1);
|
|
505
|
+
|
|
506
|
+
vp = mlk_rv64v_intt2(__riscv_vle16_v_i16m2(&r[0xa0], vl2), za);
|
|
507
|
+
va = __riscv_vget_v_i16m2_i16m1(vp, 0);
|
|
508
|
+
vb = __riscv_vget_v_i16m2_i16m1(vp, 1);
|
|
509
|
+
|
|
510
|
+
vp = mlk_rv64v_intt2(__riscv_vle16_v_i16m2(&r[0xc0], vl2), zc);
|
|
511
|
+
vc = __riscv_vget_v_i16m2_i16m1(vp, 0);
|
|
512
|
+
vd = __riscv_vget_v_i16m2_i16m1(vp, 1);
|
|
513
|
+
|
|
514
|
+
vp = mlk_rv64v_intt2(__riscv_vle16_v_i16m2(&r[0xe0], vl2), ze);
|
|
515
|
+
ve = __riscv_vget_v_i16m2_i16m1(vp, 0);
|
|
516
|
+
vf = __riscv_vget_v_i16m2_i16m1(vp, 1);
|
|
517
|
+
|
|
518
|
+
/* absolute bounds < q (see mlk_rv64v_intt2) */
|
|
519
|
+
MLK_RV64V_ABS_BOUNDS16(vl,
|
|
520
|
+
v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, va, vb, vc, vd, ve, vf,
|
|
521
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
|
|
522
|
+
|
|
523
|
+
MLK_RVV_GS_BFLY_RX(v0, v1, vt, izeta[0x40], vl);
|
|
524
|
+
MLK_RVV_GS_BFLY_RX(v2, v3, vt, izeta[0x41], vl);
|
|
525
|
+
MLK_RVV_GS_BFLY_RX(v4, v5, vt, izeta[0x50], vl);
|
|
526
|
+
MLK_RVV_GS_BFLY_RX(v6, v7, vt, izeta[0x51], vl);
|
|
527
|
+
MLK_RVV_GS_BFLY_RX(v8, v9, vt, izeta[0x60], vl);
|
|
528
|
+
MLK_RVV_GS_BFLY_RX(va, vb, vt, izeta[0x61], vl);
|
|
529
|
+
MLK_RVV_GS_BFLY_RX(vc, vd, vt, izeta[0x70], vl);
|
|
530
|
+
MLK_RVV_GS_BFLY_RX(ve, vf, vt, izeta[0x71], vl);
|
|
531
|
+
|
|
532
|
+
/* absolute bounds:
|
|
533
|
+
* - v{0,2,4,6,8,a,c,e}: < 2*q
|
|
534
|
+
* - v{1,3,5,7,9,b,d,f}: < 1*q
|
|
535
|
+
*/
|
|
536
|
+
MLK_RV64V_ABS_BOUNDS16(vl,
|
|
537
|
+
v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, va, vb, vc, vd, ve, vf,
|
|
538
|
+
2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1);
|
|
539
|
+
|
|
540
|
+
MLK_RVV_GS_BFLY_RX(v0, v2, vt, izeta[0x20], vl);
|
|
541
|
+
MLK_RVV_GS_BFLY_RX(v1, v3, vt, izeta[0x20], vl);
|
|
542
|
+
MLK_RVV_GS_BFLY_RX(v4, v6, vt, izeta[0x21], vl);
|
|
543
|
+
MLK_RVV_GS_BFLY_RX(v5, v7, vt, izeta[0x21], vl);
|
|
544
|
+
MLK_RVV_GS_BFLY_RX(v8, va, vt, izeta[0x30], vl);
|
|
545
|
+
MLK_RVV_GS_BFLY_RX(v9, vb, vt, izeta[0x30], vl);
|
|
546
|
+
MLK_RVV_GS_BFLY_RX(vc, ve, vt, izeta[0x31], vl);
|
|
547
|
+
MLK_RVV_GS_BFLY_RX(vd, vf, vt, izeta[0x31], vl);
|
|
548
|
+
|
|
549
|
+
/* absolute bounds:
|
|
550
|
+
* - v{0,4,8,c}: < 4*q
|
|
551
|
+
* - v{1,5,9,d}: < 2*q
|
|
552
|
+
* - v{2,3,6,7,a,b,e,f}: < 1*q
|
|
553
|
+
*/
|
|
554
|
+
MLK_RV64V_ABS_BOUNDS16(vl,
|
|
555
|
+
v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, va, vb, vc, vd, ve, vf,
|
|
556
|
+
4, 2, 1, 1, 4, 2, 1, 1, 4, 2, 1, 1, 4, 2, 1, 1);
|
|
557
|
+
|
|
558
|
+
MLK_RVV_GS_BFLY_RX(v0, v4, vt, izeta[0x10], vl);
|
|
559
|
+
MLK_RVV_GS_BFLY_RX(v1, v5, vt, izeta[0x10], vl);
|
|
560
|
+
MLK_RVV_GS_BFLY_RX(v2, v6, vt, izeta[0x10], vl);
|
|
561
|
+
MLK_RVV_GS_BFLY_RX(v3, v7, vt, izeta[0x10], vl);
|
|
562
|
+
MLK_RVV_GS_BFLY_RX(v8, vc, vt, izeta[0x11], vl);
|
|
563
|
+
MLK_RVV_GS_BFLY_RX(v9, vd, vt, izeta[0x11], vl);
|
|
564
|
+
MLK_RVV_GS_BFLY_RX(va, ve, vt, izeta[0x11], vl);
|
|
565
|
+
MLK_RVV_GS_BFLY_RX(vb, vf, vt, izeta[0x11], vl);
|
|
566
|
+
|
|
567
|
+
/* absolute bounds:
|
|
568
|
+
* - v{0,8}: < 8*q
|
|
569
|
+
* - v{1,9}: < 4*q
|
|
570
|
+
* - v{2,3,a,b}: < 2*q
|
|
571
|
+
* - v{4,5,6,7,c,d,e,f}: < 1*q
|
|
572
|
+
*/
|
|
573
|
+
MLK_RV64V_ABS_BOUNDS16(vl,
|
|
574
|
+
v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, va, vb, vc, vd, ve, vf,
|
|
575
|
+
8, 4, 2, 2, 1, 1, 1, 1, 8, 4, 2, 2, 1, 1, 1, 1);
|
|
576
|
+
|
|
577
|
+
/* Reduce v0, v8 to avoid overflow */
|
|
578
|
+
v0 = fq_mulq_vx(v0, MLK_RVV_MONT_R1, vl);
|
|
579
|
+
v8 = fq_mulq_vx(v8, MLK_RVV_MONT_R1, vl);
|
|
580
|
+
|
|
581
|
+
/* absolute bounds: < 4*q */
|
|
582
|
+
MLK_RV64V_ABS_BOUNDS16(vl,
|
|
583
|
+
v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, va, vb, vc, vd, ve, vf,
|
|
584
|
+
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4);
|
|
585
|
+
|
|
586
|
+
MLK_RVV_GS_BFLY_RX(v0, v8, vt, izeta[0x01], vl);
|
|
587
|
+
MLK_RVV_GS_BFLY_RX(v1, v9, vt, izeta[0x01], vl);
|
|
588
|
+
MLK_RVV_GS_BFLY_RX(v2, va, vt, izeta[0x01], vl);
|
|
589
|
+
MLK_RVV_GS_BFLY_RX(v3, vb, vt, izeta[0x01], vl);
|
|
590
|
+
MLK_RVV_GS_BFLY_RX(v4, vc, vt, izeta[0x01], vl);
|
|
591
|
+
MLK_RVV_GS_BFLY_RX(v5, vd, vt, izeta[0x01], vl);
|
|
592
|
+
MLK_RVV_GS_BFLY_RX(v6, ve, vt, izeta[0x01], vl);
|
|
593
|
+
MLK_RVV_GS_BFLY_RX(v7, vf, vt, izeta[0x01], vl);
|
|
594
|
+
|
|
595
|
+
/* absolute bounds: < 8*q */
|
|
596
|
+
MLK_RV64V_ABS_BOUNDS16(vl,
|
|
597
|
+
v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, va, vb, vc, vd, ve, vf,
|
|
598
|
+
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8);
|
|
599
|
+
|
|
600
|
+
__riscv_vse16_v_i16m1(&r[0x00], v0, vl);
|
|
601
|
+
__riscv_vse16_v_i16m1(&r[0x10], v1, vl);
|
|
602
|
+
__riscv_vse16_v_i16m1(&r[0x20], v2, vl);
|
|
603
|
+
__riscv_vse16_v_i16m1(&r[0x30], v3, vl);
|
|
604
|
+
__riscv_vse16_v_i16m1(&r[0x40], v4, vl);
|
|
605
|
+
__riscv_vse16_v_i16m1(&r[0x50], v5, vl);
|
|
606
|
+
__riscv_vse16_v_i16m1(&r[0x60], v6, vl);
|
|
607
|
+
__riscv_vse16_v_i16m1(&r[0x70], v7, vl);
|
|
608
|
+
__riscv_vse16_v_i16m1(&r[0x80], v8, vl);
|
|
609
|
+
__riscv_vse16_v_i16m1(&r[0x90], v9, vl);
|
|
610
|
+
__riscv_vse16_v_i16m1(&r[0xa0], va, vl);
|
|
611
|
+
__riscv_vse16_v_i16m1(&r[0xb0], vb, vl);
|
|
612
|
+
__riscv_vse16_v_i16m1(&r[0xc0], vc, vl);
|
|
613
|
+
__riscv_vse16_v_i16m1(&r[0xd0], vd, vl);
|
|
614
|
+
__riscv_vse16_v_i16m1(&r[0xe0], ve, vl);
|
|
615
|
+
__riscv_vse16_v_i16m1(&r[0xf0], vf, vl);
|
|
616
|
+
}
|
|
617
|
+
|
|
618
|
+
/* ML-KEM's middle field GF(3329)[X]/(X^2) multiplication */
|
|
619
|
+
|
|
620
|
+
static inline void mlk_rv64v_poly_basemul_mont_add_k(int16_t *r,
|
|
621
|
+
const int16_t *a,
|
|
622
|
+
const int16_t *b,
|
|
623
|
+
unsigned kn)
|
|
624
|
+
{
|
|
625
|
+
#include "rv64v_zetas_basemul.inc"
|
|
626
|
+
|
|
627
|
+
size_t vl = __riscv_vsetvl_e16m1(MLKEM_N);
|
|
628
|
+
size_t i, j;
|
|
629
|
+
|
|
630
|
+
const vuint16m1_t sw0 = __riscv_vxor_vx_u16m1(__riscv_vid_v_u16m1(vl), 1, vl);
|
|
631
|
+
const vbool16_t sb0 = __riscv_vmseq_vx_u16m1_b16(
|
|
632
|
+
__riscv_vand_vx_u16m1(__riscv_vid_v_u16m1(vl), 1, vl), 0, vl);
|
|
633
|
+
|
|
634
|
+
vint16m1_t vt, vu;
|
|
635
|
+
vint32m2_t wa, wb, ws;
|
|
636
|
+
|
|
637
|
+
for (i = 0; i < MLKEM_N; i += vl)
|
|
638
|
+
{
|
|
639
|
+
const vint16m1_t vz = __riscv_vle16_v_i16m1(&roots[i], vl);
|
|
640
|
+
|
|
641
|
+
for (j = 0; j < kn; j += MLKEM_N)
|
|
642
|
+
{
|
|
643
|
+
vt = __riscv_vle16_v_i16m1(&a[i + j], vl);
|
|
644
|
+
vu = __riscv_vle16_v_i16m1(&b[i + j], vl);
|
|
645
|
+
|
|
646
|
+
wa = __riscv_vwmul_vv_i32m2(vz, fq_mul_vv(vt, vu, vl), vl);
|
|
647
|
+
wb = __riscv_vwmul_vv_i32m2(vt, __riscv_vrgather_vv_i16m1(vu, sw0, vl),
|
|
648
|
+
vl);
|
|
649
|
+
|
|
650
|
+
wa =
|
|
651
|
+
__riscv_vadd_vv_i32m2(wa, __riscv_vslidedown_vx_i32m2(wa, 1, vl), vl);
|
|
652
|
+
wb = __riscv_vadd_vv_i32m2(wb, __riscv_vslideup_vx_i32m2(wb, wb, 1, vl),
|
|
653
|
+
vl);
|
|
654
|
+
|
|
655
|
+
wa = __riscv_vmerge_vvm_i32m2(wb, wa, sb0, vl);
|
|
656
|
+
|
|
657
|
+
if (j == 0)
|
|
658
|
+
{
|
|
659
|
+
ws = wa;
|
|
660
|
+
}
|
|
661
|
+
else
|
|
662
|
+
{
|
|
663
|
+
ws = __riscv_vadd_vv_i32m2(ws, wa, vl);
|
|
664
|
+
}
|
|
665
|
+
}
|
|
666
|
+
/* the idea is to keep 32-bit intermediate result, reduce in the end */
|
|
667
|
+
__riscv_vse16_v_i16m1(&r[i], fq_redc2(ws, vl), vl);
|
|
668
|
+
}
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
#if defined(MLK_CONFIG_MULTILEVEL_WITH_SHARED) || MLKEM_K == 2
|
|
672
|
+
void mlk_rv64v_poly_basemul_mont_add_k2(int16_t *r, const int16_t *a,
|
|
673
|
+
const int16_t *b)
|
|
674
|
+
{
|
|
675
|
+
mlk_rv64v_poly_basemul_mont_add_k(r, a, b, 2 * MLKEM_N);
|
|
676
|
+
}
|
|
677
|
+
#endif /* MLK_CONFIG_MULTILEVEL_WITH_SHARED || MLKEM_K == 2 */
|
|
678
|
+
|
|
679
|
+
#if defined(MLK_CONFIG_MULTILEVEL_WITH_SHARED) || MLKEM_K == 3
|
|
680
|
+
void mlk_rv64v_poly_basemul_mont_add_k3(int16_t *r, const int16_t *a,
|
|
681
|
+
const int16_t *b)
|
|
682
|
+
{
|
|
683
|
+
mlk_rv64v_poly_basemul_mont_add_k(r, a, b, 3 * MLKEM_N);
|
|
684
|
+
}
|
|
685
|
+
#endif /* MLK_CONFIG_MULTILEVEL_WITH_SHARED || MLKEM_K == 3 */
|
|
686
|
+
|
|
687
|
+
#if defined(MLK_CONFIG_MULTILEVEL_WITH_SHARED) || MLKEM_K == 4
|
|
688
|
+
void mlk_rv64v_poly_basemul_mont_add_k4(int16_t *r, const int16_t *a,
|
|
689
|
+
const int16_t *b)
|
|
690
|
+
{
|
|
691
|
+
mlk_rv64v_poly_basemul_mont_add_k(r, a, b, 4 * MLKEM_N);
|
|
692
|
+
}
|
|
693
|
+
#endif /* MLK_CONFIG_MULTILEVEL_WITH_SHARED || MLKEM_K == 4 */
|
|
694
|
+
|
|
695
|
+
/*************************************************
|
|
696
|
+
* Name: poly_tomont
|
|
697
|
+
*
|
|
698
|
+
* Description: Inplace conversion of all coefficients of a polynomial
|
|
699
|
+
* from normal domain to Montgomery domain
|
|
700
|
+
*
|
|
701
|
+
* Arguments: - int16_t *r: pointer to input/output polynomial
|
|
702
|
+
**************************************************/
|
|
703
|
+
void mlk_rv64v_poly_tomont(int16_t *r)
|
|
704
|
+
{
|
|
705
|
+
size_t vl = __riscv_vsetvl_e16m1(MLKEM_N);
|
|
706
|
+
|
|
707
|
+
for (size_t i = 0; i < MLKEM_N; i += vl)
|
|
708
|
+
{
|
|
709
|
+
vint16m1_t vec = __riscv_vle16_v_i16m1(&r[i], vl);
|
|
710
|
+
vec = fq_mul_vx(vec, MLK_RVV_MONT_R2, vl);
|
|
711
|
+
__riscv_vse16_v_i16m1(&r[i], vec, vl);
|
|
712
|
+
}
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
/*************************************************
|
|
716
|
+
* Name: poly_reduce
|
|
717
|
+
*
|
|
718
|
+
* Description: Applies Barrett reduction to all coefficients of a polynomial
|
|
719
|
+
* for details of the Barrett reduction see
|
|
720
|
+
* comments in poly.c
|
|
721
|
+
*
|
|
722
|
+
* Arguments: - int16_t *r: pointer to input/output polynomial
|
|
723
|
+
**************************************************/
|
|
724
|
+
void mlk_rv64v_poly_reduce(int16_t *r)
|
|
725
|
+
{
|
|
726
|
+
size_t vl = __riscv_vsetvl_e16m1(MLKEM_N);
|
|
727
|
+
vint16m1_t vt;
|
|
728
|
+
|
|
729
|
+
for (size_t i = 0; i < MLKEM_N; i += vl)
|
|
730
|
+
{
|
|
731
|
+
vt = __riscv_vle16_v_i16m1(&r[i], vl);
|
|
732
|
+
vt = fq_barrett(vt, vl);
|
|
733
|
+
vt = fq_cadd(vt, vl);
|
|
734
|
+
__riscv_vse16_v_i16m1(&r[i], vt, vl);
|
|
735
|
+
}
|
|
736
|
+
}
|
|
737
|
+
|
|
738
|
+
/* Run rejection sampling to get uniform random integers mod q */
|
|
739
|
+
|
|
740
|
+
unsigned int mlk_rv64v_rej_uniform(int16_t *r, unsigned int len,
|
|
741
|
+
const uint8_t *buf, unsigned int buflen)
|
|
742
|
+
{
|
|
743
|
+
unsigned n, ctr, pos;
|
|
744
|
+
vuint16m1_t x, y;
|
|
745
|
+
vbool16_t lt;
|
|
746
|
+
|
|
747
|
+
pos = 0;
|
|
748
|
+
ctr = 0;
|
|
749
|
+
|
|
750
|
+
while (ctr < len && pos < buflen)
|
|
751
|
+
{
|
|
752
|
+
const unsigned vl = (unsigned)__riscv_vsetvl_e16m1((buflen - pos) * 8 / 12);
|
|
753
|
+
const unsigned vl23 = (vl * 24) / 32;
|
|
754
|
+
|
|
755
|
+
const vuint16m1_t vid = __riscv_vid_v_u16m1(vl);
|
|
756
|
+
const vuint16m1_t srl12v = __riscv_vmul_vx_u16m1(vid, 12, vl);
|
|
757
|
+
const vuint16m1_t sel12v = __riscv_vsrl_vx_u16m1(srl12v, 4, vl);
|
|
758
|
+
const vuint16m1_t sll12v = __riscv_vsll_vx_u16m1(vid, 2, vl);
|
|
759
|
+
|
|
760
|
+
/* Functionally, this loop is not necessary, but it avoids re-evaluating
|
|
761
|
+
* the VL too many times. In particular, in the first outer iteration,
|
|
762
|
+
* the inner loop will process the bulk of the data with fixed VL. */
|
|
763
|
+
while (ctr < len && vl23 * 2 <= buflen - pos)
|
|
764
|
+
{
|
|
765
|
+
x = __riscv_vle16_v_u16m1((uint16_t *)&buf[pos], vl23);
|
|
766
|
+
pos += vl23 * 2;
|
|
767
|
+
x = __riscv_vrgather_vv_u16m1(x, sel12v, vl);
|
|
768
|
+
x = __riscv_vor_vv_u16m1(
|
|
769
|
+
__riscv_vsrl_vv_u16m1(x, srl12v, vl),
|
|
770
|
+
__riscv_vsll_vv_u16m1(__riscv_vslidedown(x, 1, vl), sll12v, vl), vl);
|
|
771
|
+
x = __riscv_vand_vx_u16m1(x, 0xFFF, vl);
|
|
772
|
+
|
|
773
|
+
lt = __riscv_vmsltu_vx_u16m1_b16(x, MLKEM_Q, vl);
|
|
774
|
+
y = __riscv_vcompress_vm_u16m1(x, lt, vl);
|
|
775
|
+
n = (unsigned)__riscv_vcpop_m_b16(lt, vl);
|
|
776
|
+
|
|
777
|
+
if (ctr + n > len)
|
|
778
|
+
{
|
|
779
|
+
n = len - ctr;
|
|
780
|
+
}
|
|
781
|
+
__riscv_vse16_v_u16m1((uint16_t *)&r[ctr], y, n);
|
|
782
|
+
ctr += n;
|
|
783
|
+
}
|
|
784
|
+
}
|
|
785
|
+
|
|
786
|
+
return ctr;
|
|
787
|
+
}
|
|
788
|
+
|
|
789
|
+
#else /* MLK_ARITH_BACKEND_RISCV64 && !MLK_CONFIG_MULTILEVEL_NO_SHARED */
|
|
790
|
+
|
|
791
|
+
MLK_EMPTY_CU(rv64v_poly)
|
|
792
|
+
|
|
793
|
+
#endif /* !(MLK_ARITH_BACKEND_RISCV64 && !MLK_CONFIG_MULTILEVEL_NO_SHARED) */
|
|
794
|
+
|
|
795
|
+
/* To facilitate single-compilation-unit (SCU) builds, undefine all macros.
|
|
796
|
+
* Don't modify by hand -- this is auto-generated by scripts/autogen. */
|
|
797
|
+
#undef MLK_RVV_QI
|
|
798
|
+
#undef MLK_RVV_MONT_R1
|
|
799
|
+
#undef MLK_RVV_MONT_R2
|
|
800
|
+
#undef MLK_RVV_MONT_NR
|
|
801
|
+
#undef MLK_RVV_CT_BFLY_FX
|
|
802
|
+
#undef MLK_RVV_CT_BFLY_FV
|
|
803
|
+
#undef MLK_RVV_GS_BFLY_RX
|
|
804
|
+
#undef MLK_RVV_GS_BFLY_RV
|
|
805
|
+
#undef MLK_RV64V_ABS_BOUNDS16
|