pq_crypto 0.3.2 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +56 -0
- data/CHANGELOG.md +62 -0
- data/GET_STARTED.md +366 -40
- data/README.md +76 -233
- data/SECURITY.md +107 -82
- data/ext/pqcrypto/extconf.rb +169 -87
- data/ext/pqcrypto/mldsa_api.h +1 -48
- data/ext/pqcrypto/mlkem_api.h +1 -18
- data/ext/pqcrypto/pq_externalmu.c +89 -204
- data/ext/pqcrypto/pqcrypto_native_api.h +129 -0
- data/ext/pqcrypto/pqcrypto_ruby_secure.c +484 -84
- data/ext/pqcrypto/pqcrypto_secure.c +203 -78
- data/ext/pqcrypto/pqcrypto_secure.h +53 -14
- data/ext/pqcrypto/pqcrypto_version.h +7 -0
- data/ext/pqcrypto/randombytes.h +9 -0
- data/ext/pqcrypto/vendor/.vendored +10 -5
- data/ext/pqcrypto/vendor/mldsa-native/BUILDING.md +105 -0
- data/ext/pqcrypto/vendor/mldsa-native/LICENSE +286 -0
- data/ext/pqcrypto/vendor/mldsa-native/META.yml +24 -0
- data/ext/pqcrypto/vendor/mldsa-native/README.md +221 -0
- data/ext/pqcrypto/vendor/mldsa-native/SECURITY.md +8 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.c +721 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.h +975 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_asm.S +724 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_config.h +723 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/cbmc.h +166 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/common.h +321 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/ct.c +21 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/ct.h +385 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/debug.c +73 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/debug.h +130 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.c +277 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.h +244 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.c +182 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.h +117 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.c +438 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.h +105 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/auto.h +71 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/fips202_native_aarch64.h +62 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_asm.S +376 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_asm.S +204 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_asm.S +259 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_asm.S +1077 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S +987 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccakf1600_round_constants.c +41 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_scalar.h +26 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_v84a.h +35 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x2_v84a.h +37 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_scalar.h +27 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_v84a_scalar.h +36 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/api.h +69 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/README.md +10 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/mve.h +32 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/fips202_native_armv81m.h +20 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S +638 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.c +136 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccakf1600_round_constants.c +52 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/auto.h +29 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.c +488 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.h +16 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/xkcp.h +31 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/meta.h +247 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/aarch64_zetas.c +231 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/arith_native_aarch64.h +150 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/intt.S +753 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l4.S +129 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l5.S +145 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l7.S +177 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/ntt.S +653 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/pointwise_montgomery.S +79 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_caddq_asm.S +53 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_chknorm_asm.S +55 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_32_asm.S +85 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_88_asm.S +85 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_32_asm.S +102 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_88_asm.S +110 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_17_asm.S +72 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_19_asm.S +69 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_table.c +40 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_asm.S +189 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta2_asm.S +135 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta4_asm.S +128 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta_table.c +543 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_table.c +62 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/api.h +649 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/meta.h +23 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/meta.h +315 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/arith_native_x86_64.h +124 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.c +157 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.h +27 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/intt.S +2311 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/ntt.S +2383 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/nttunpack.S +239 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise.S +131 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l4.S +139 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l5.S +155 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l7.S +187 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_caddq_avx2.c +61 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_chknorm_avx2.c +52 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_32_avx2.c +155 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_88_avx2.c +155 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_32_avx2.c +102 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_88_avx2.c +104 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_17_avx2.c +91 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_19_avx2.c +93 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_avx2.c +126 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta2_avx2.c +155 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta4_avx2.c +139 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_table.c +160 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.c +293 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.h +224 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/params.h +77 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.c +991 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.h +393 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.c +946 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.h +360 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.c +877 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.h +725 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/randombytes.h +26 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/reduce.h +139 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/rounding.h +249 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.c +1511 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.h +806 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/symmetric.h +68 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sys.h +268 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/zetas.inc +55 -0
- data/ext/pqcrypto/vendor/mlkem-native/BUILDING.md +104 -0
- data/ext/pqcrypto/vendor/mlkem-native/LICENSE +294 -0
- data/ext/pqcrypto/vendor/mlkem-native/META.yml +30 -0
- data/ext/pqcrypto/vendor/mlkem-native/README.md +223 -0
- data/ext/pqcrypto/vendor/mlkem-native/RELEASE.md +86 -0
- data/ext/pqcrypto/vendor/mlkem-native/SECURITY.md +8 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/README.md +23 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native.c +660 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native.h +538 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native_asm.S +681 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native_config.h +709 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/cbmc.h +174 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/common.h +274 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/compress.c +717 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/compress.h +688 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/debug.c +64 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/debug.h +128 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202.c +251 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202.h +158 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202x4.c +208 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202x4.h +80 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/keccakf1600.c +463 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/keccakf1600.h +98 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/auto.h +70 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/fips202_native_aarch64.h +69 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_asm.S +375 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_asm.S +203 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_asm.S +258 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_asm.S +1076 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S +986 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccakf1600_round_constants.c +46 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x1_scalar.h +25 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x1_v84a.h +34 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x2_v84a.h +35 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x4_v8a_scalar.h +26 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x4_v8a_v84a_scalar.h +35 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/api.h +117 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/README.md +10 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/mve.h +79 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/fips202_native_armv81m.h +35 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S +667 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.c +40 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/keccakf1600_round_constants.c +51 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/state_extract_bytes_x4_mve.S +290 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/state_xor_bytes_x4_mve.S +314 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/auto.h +28 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/keccak_f1600_x4_avx2.h +33 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/src/fips202_native_x86_64.h +41 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/src/keccak_f1600_x4_avx2.S +451 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/src/keccakf1600_constants.c +51 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/indcpa.c +622 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/indcpa.h +156 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/kem.c +446 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/kem.h +326 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/README.md +16 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/meta.h +122 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/aarch64_zetas.c +174 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/arith_native_aarch64.h +177 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/intt.S +628 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/ntt.S +562 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_mulcache_compute_asm.S +127 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_reduce_asm.S +150 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_tobytes_asm.S +117 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_tomont_asm.S +98 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k2.S +261 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k3.S +314 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k4.S +368 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/rej_uniform_asm.S +226 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/rej_uniform_table.c +542 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/api.h +637 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/meta.h +25 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/README.md +11 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/meta.h +128 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/arith_native_riscv64.h +45 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_debug.c +81 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_debug.h +145 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_izetas.inc +27 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_poly.c +805 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_zetas.inc +27 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_zetas_basemul.inc +39 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/README.md +4 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/meta.h +304 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/arith_native_x86_64.h +309 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/compress_consts.c +94 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/compress_consts.h +45 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/consts.c +102 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/consts.h +25 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/intt.S +719 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/mulcache_compute.S +90 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/ntt.S +639 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/nttfrombytes.S +193 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/ntttobytes.S +181 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/nttunpack.S +174 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d10.S +382 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d11.S +448 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d4.S +163 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d5.S +220 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d10.S +228 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d11.S +277 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d4.S +180 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d5.S +192 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/polyvec_basemul_acc_montgomery_cached_asm_k2.S +502 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/polyvec_basemul_acc_montgomery_cached_asm_k3.S +750 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/polyvec_basemul_acc_montgomery_cached_asm_k4.S +998 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/reduce.S +218 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/rej_uniform_asm.S +103 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/rej_uniform_table.c +544 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/tomont.S +155 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/params.h +76 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly.c +572 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly.h +317 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly_k.c +502 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly_k.h +668 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/randombytes.h +60 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/sampling.c +362 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/sampling.h +118 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/symmetric.h +70 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/sys.h +260 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/verify.c +20 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/verify.h +464 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/zetas.inc +30 -0
- data/lib/pq_crypto/algorithm_registry.rb +200 -0
- data/lib/pq_crypto/hybrid_kem.rb +1 -12
- data/lib/pq_crypto/kem.rb +104 -13
- data/lib/pq_crypto/pkcs8.rb +387 -0
- data/lib/pq_crypto/serialization.rb +1 -14
- data/lib/pq_crypto/signature.rb +123 -17
- data/lib/pq_crypto/spki.rb +131 -0
- data/lib/pq_crypto/version.rb +1 -1
- data/lib/pq_crypto.rb +79 -20
- data/script/vendor_libs.rb +88 -155
- metadata +241 -73
- data/ext/pqcrypto/vendor/pqclean/common/aes.c +0 -639
- data/ext/pqcrypto/vendor/pqclean/common/aes.h +0 -64
- data/ext/pqcrypto/vendor/pqclean/common/compat.h +0 -73
- data/ext/pqcrypto/vendor/pqclean/common/crypto_declassify.h +0 -7
- data/ext/pqcrypto/vendor/pqclean/common/fips202.c +0 -928
- data/ext/pqcrypto/vendor/pqclean/common/fips202.h +0 -166
- data/ext/pqcrypto/vendor/pqclean/common/keccak2x/feat.S +0 -168
- data/ext/pqcrypto/vendor/pqclean/common/keccak2x/fips202x2.c +0 -684
- data/ext/pqcrypto/vendor/pqclean/common/keccak2x/fips202x2.h +0 -60
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/KeccakP-1600-times4-SIMD256.c +0 -1028
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/KeccakP-1600-times4-SnP.h +0 -50
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/KeccakP-1600-unrolling.macros +0 -198
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/Makefile +0 -8
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/Makefile.Microsoft_nmake +0 -8
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/SIMD256-config.h +0 -3
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/align.h +0 -34
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/brg_endian.h +0 -142
- data/ext/pqcrypto/vendor/pqclean/common/nistseedexpander.c +0 -101
- data/ext/pqcrypto/vendor/pqclean/common/nistseedexpander.h +0 -39
- data/ext/pqcrypto/vendor/pqclean/common/randombytes.c +0 -355
- data/ext/pqcrypto/vendor/pqclean/common/randombytes.h +0 -27
- data/ext/pqcrypto/vendor/pqclean/common/sha2.c +0 -769
- data/ext/pqcrypto/vendor/pqclean/common/sha2.h +0 -173
- data/ext/pqcrypto/vendor/pqclean/common/sp800-185.c +0 -156
- data/ext/pqcrypto/vendor/pqclean/common/sp800-185.h +0 -27
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/LICENSE +0 -5
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/Makefile +0 -19
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/Makefile.Microsoft_nmake +0 -23
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/api.h +0 -18
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/cbd.c +0 -83
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/cbd.h +0 -11
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/indcpa.c +0 -327
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/indcpa.h +0 -22
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/kem.c +0 -164
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/kem.h +0 -23
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/ntt.c +0 -146
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/ntt.h +0 -14
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/params.h +0 -36
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/poly.c +0 -299
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/poly.h +0 -37
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/polyvec.c +0 -188
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/polyvec.h +0 -26
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/reduce.c +0 -41
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/reduce.h +0 -13
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/symmetric-shake.c +0 -71
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/symmetric.h +0 -30
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/verify.c +0 -67
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/verify.h +0 -13
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/LICENSE +0 -5
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/Makefile +0 -19
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/Makefile.Microsoft_nmake +0 -23
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/api.h +0 -50
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/ntt.c +0 -98
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/ntt.h +0 -10
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/packing.c +0 -261
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/packing.h +0 -31
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/params.h +0 -44
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/poly.c +0 -799
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/poly.h +0 -52
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/polyvec.c +0 -415
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/polyvec.h +0 -65
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/reduce.c +0 -69
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/reduce.h +0 -17
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/rounding.c +0 -92
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/rounding.h +0 -14
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/sign.c +0 -407
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/sign.h +0 -47
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/symmetric-shake.c +0 -26
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/symmetric.h +0 -34
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) The mlkem-native project authors
|
|
3
|
+
* SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
/*yaml
|
|
7
|
+
Name: poly_reduce_asm
|
|
8
|
+
Description: Barrett reduction of polynomial coefficients
|
|
9
|
+
Signature: void mlk_poly_reduce_asm(int16_t p[256])
|
|
10
|
+
ABI:
|
|
11
|
+
x0:
|
|
12
|
+
type: buffer
|
|
13
|
+
size_bytes: 512
|
|
14
|
+
permissions: read/write
|
|
15
|
+
c_parameter: int16_t p[256]
|
|
16
|
+
description: Input/output polynomial
|
|
17
|
+
Stack:
|
|
18
|
+
bytes: 0
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
#include "../../../common.h"
|
|
22
|
+
#if defined(MLK_ARITH_BACKEND_AARCH64) && !defined(MLK_CONFIG_MULTILEVEL_NO_SHARED)
|
|
23
|
+
|
|
24
|
+
/*
|
|
25
|
+
* WARNING: This file is auto-derived from the mlkem-native source file
|
|
26
|
+
* dev/aarch64_opt/src/poly_reduce_asm.S using scripts/simpasm. Do not modify it directly.
|
|
27
|
+
*/
|
|
28
|
+
|
|
29
|
+
#if defined(__ELF__)
|
|
30
|
+
.section .note.GNU-stack,"",@progbits
|
|
31
|
+
#endif
|
|
32
|
+
|
|
33
|
+
.text
|
|
34
|
+
.balign 4
|
|
35
|
+
.global MLK_ASM_NAMESPACE(poly_reduce_asm)
|
|
36
|
+
MLK_ASM_FN_SYMBOL(poly_reduce_asm)
|
|
37
|
+
|
|
38
|
+
.cfi_startproc
|
|
39
|
+
mov w2, #0xd01 // =3329
|
|
40
|
+
dup v3.8h, w2
|
|
41
|
+
mov w2, #0x4ebf // =20159
|
|
42
|
+
dup v4.8h, w2
|
|
43
|
+
mov x1, #0x8 // =8
|
|
44
|
+
ldr q21, [x0], #0x40
|
|
45
|
+
ldur q18, [x0, #-0x20]
|
|
46
|
+
ldur q0, [x0, #-0x30]
|
|
47
|
+
ldur q5, [x0, #-0x10]
|
|
48
|
+
ldr q26, [x0], #0x40
|
|
49
|
+
sqdmulh v17.8h, v21.8h, v4.h[0]
|
|
50
|
+
sqdmulh v27.8h, v18.8h, v4.h[0]
|
|
51
|
+
sqdmulh v22.8h, v0.8h, v4.h[0]
|
|
52
|
+
srshr v17.8h, v17.8h, #0xb
|
|
53
|
+
sqdmulh v23.8h, v5.8h, v4.h[0]
|
|
54
|
+
srshr v29.8h, v27.8h, #0xb
|
|
55
|
+
mls v21.8h, v17.8h, v3.h[0]
|
|
56
|
+
srshr v17.8h, v22.8h, #0xb
|
|
57
|
+
mls v18.8h, v29.8h, v3.h[0]
|
|
58
|
+
srshr v22.8h, v23.8h, #0xb
|
|
59
|
+
mls v0.8h, v17.8h, v3.h[0]
|
|
60
|
+
sshr v2.8h, v21.8h, #0xf
|
|
61
|
+
mls v5.8h, v22.8h, v3.h[0]
|
|
62
|
+
sshr v29.8h, v18.8h, #0xf
|
|
63
|
+
and v19.16b, v3.16b, v2.16b
|
|
64
|
+
sqdmulh v2.8h, v26.8h, v4.h[0]
|
|
65
|
+
sshr v31.8h, v0.8h, #0xf
|
|
66
|
+
add v17.8h, v21.8h, v19.8h
|
|
67
|
+
and v21.16b, v3.16b, v29.16b
|
|
68
|
+
and v31.16b, v3.16b, v31.16b
|
|
69
|
+
sub x1, x1, #0x2
|
|
70
|
+
|
|
71
|
+
Lpoly_reduce_loop_start:
|
|
72
|
+
add v21.8h, v18.8h, v21.8h
|
|
73
|
+
ldur q18, [x0, #-0x20]
|
|
74
|
+
add v25.8h, v0.8h, v31.8h
|
|
75
|
+
ldur q0, [x0, #-0x30]
|
|
76
|
+
stur q21, [x0, #-0x60]
|
|
77
|
+
sshr v28.8h, v5.8h, #0xf
|
|
78
|
+
stur q17, [x0, #-0x80]
|
|
79
|
+
srshr v23.8h, v2.8h, #0xb
|
|
80
|
+
sqdmulh v30.8h, v18.8h, v4.h[0]
|
|
81
|
+
stur q25, [x0, #-0x70]
|
|
82
|
+
and v22.16b, v3.16b, v28.16b
|
|
83
|
+
sqdmulh v7.8h, v0.8h, v4.h[0]
|
|
84
|
+
add v16.8h, v5.8h, v22.8h
|
|
85
|
+
ldur q5, [x0, #-0x10]
|
|
86
|
+
mls v26.8h, v23.8h, v3.h[0]
|
|
87
|
+
stur q16, [x0, #-0x50]
|
|
88
|
+
srshr v6.8h, v30.8h, #0xb
|
|
89
|
+
srshr v1.8h, v7.8h, #0xb
|
|
90
|
+
sqdmulh v19.8h, v5.8h, v4.h[0]
|
|
91
|
+
mls v18.8h, v6.8h, v3.h[0]
|
|
92
|
+
sshr v24.8h, v26.8h, #0xf
|
|
93
|
+
mls v0.8h, v1.8h, v3.h[0]
|
|
94
|
+
and v27.16b, v3.16b, v24.16b
|
|
95
|
+
srshr v29.8h, v19.8h, #0xb
|
|
96
|
+
add v17.8h, v26.8h, v27.8h
|
|
97
|
+
ldr q26, [x0], #0x40
|
|
98
|
+
sshr v1.8h, v18.8h, #0xf
|
|
99
|
+
mls v5.8h, v29.8h, v3.h[0]
|
|
100
|
+
sshr v20.8h, v0.8h, #0xf
|
|
101
|
+
and v21.16b, v3.16b, v1.16b
|
|
102
|
+
and v31.16b, v3.16b, v20.16b
|
|
103
|
+
sqdmulh v2.8h, v26.8h, v4.h[0]
|
|
104
|
+
subs x1, x1, #0x1
|
|
105
|
+
cbnz x1, Lpoly_reduce_loop_start
|
|
106
|
+
add v28.8h, v0.8h, v31.8h
|
|
107
|
+
ldur q29, [x0, #-0x10]
|
|
108
|
+
add v21.8h, v18.8h, v21.8h
|
|
109
|
+
srshr v18.8h, v2.8h, #0xb
|
|
110
|
+
sshr v2.8h, v5.8h, #0xf
|
|
111
|
+
ldur q16, [x0, #-0x20]
|
|
112
|
+
stur q17, [x0, #-0x80]
|
|
113
|
+
ldur q0, [x0, #-0x30]
|
|
114
|
+
and v2.16b, v3.16b, v2.16b
|
|
115
|
+
sqdmulh v24.8h, v29.8h, v4.h[0]
|
|
116
|
+
stur q28, [x0, #-0x70]
|
|
117
|
+
stur q21, [x0, #-0x60]
|
|
118
|
+
add v31.8h, v5.8h, v2.8h
|
|
119
|
+
sqdmulh v6.8h, v16.8h, v4.h[0]
|
|
120
|
+
stur q31, [x0, #-0x50]
|
|
121
|
+
sqdmulh v17.8h, v0.8h, v4.h[0]
|
|
122
|
+
srshr v22.8h, v24.8h, #0xb
|
|
123
|
+
mls v26.8h, v18.8h, v3.h[0]
|
|
124
|
+
srshr v31.8h, v6.8h, #0xb
|
|
125
|
+
mls v29.8h, v22.8h, v3.h[0]
|
|
126
|
+
srshr v19.8h, v17.8h, #0xb
|
|
127
|
+
mls v16.8h, v31.8h, v3.h[0]
|
|
128
|
+
sshr v7.8h, v26.8h, #0xf
|
|
129
|
+
mls v0.8h, v19.8h, v3.h[0]
|
|
130
|
+
and v5.16b, v3.16b, v7.16b
|
|
131
|
+
sshr v22.8h, v29.8h, #0xf
|
|
132
|
+
add v27.8h, v26.8h, v5.8h
|
|
133
|
+
and v26.16b, v3.16b, v22.16b
|
|
134
|
+
sshr v20.8h, v16.8h, #0xf
|
|
135
|
+
stur q27, [x0, #-0x40]
|
|
136
|
+
and v2.16b, v3.16b, v20.16b
|
|
137
|
+
sshr v23.8h, v0.8h, #0xf
|
|
138
|
+
add v18.8h, v29.8h, v26.8h
|
|
139
|
+
add v31.8h, v16.8h, v2.8h
|
|
140
|
+
and v29.16b, v3.16b, v23.16b
|
|
141
|
+
stur q18, [x0, #-0x10]
|
|
142
|
+
add v25.8h, v0.8h, v29.8h
|
|
143
|
+
stur q31, [x0, #-0x20]
|
|
144
|
+
stur q25, [x0, #-0x30]
|
|
145
|
+
ret
|
|
146
|
+
.cfi_endproc
|
|
147
|
+
|
|
148
|
+
MLK_ASM_FN_SIZE(poly_reduce_asm)
|
|
149
|
+
|
|
150
|
+
#endif /* MLK_ARITH_BACKEND_AARCH64 && !MLK_CONFIG_MULTILEVEL_NO_SHARED */
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) The mlkem-native project authors
|
|
3
|
+
* SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
/*yaml
|
|
7
|
+
Name: poly_tobytes_asm
|
|
8
|
+
Description: Convert polynomial to byte representation
|
|
9
|
+
Signature: void mlk_poly_tobytes_asm(uint8_t r[384], const int16_t a[256])
|
|
10
|
+
ABI:
|
|
11
|
+
x0:
|
|
12
|
+
type: buffer
|
|
13
|
+
size_bytes: 384
|
|
14
|
+
permissions: write-only
|
|
15
|
+
c_parameter: uint8_t r[384]
|
|
16
|
+
description: Output byte array
|
|
17
|
+
x1:
|
|
18
|
+
type: buffer
|
|
19
|
+
size_bytes: 512
|
|
20
|
+
permissions: read-only
|
|
21
|
+
c_parameter: const int16_t a[256]
|
|
22
|
+
description: Input polynomial
|
|
23
|
+
Stack:
|
|
24
|
+
bytes: 0
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
#include "../../../common.h"
|
|
28
|
+
#if defined(MLK_ARITH_BACKEND_AARCH64) && !defined(MLK_CONFIG_MULTILEVEL_NO_SHARED)
|
|
29
|
+
|
|
30
|
+
/*
|
|
31
|
+
* WARNING: This file is auto-derived from the mlkem-native source file
|
|
32
|
+
* dev/aarch64_opt/src/poly_tobytes_asm.S using scripts/simpasm. Do not modify it directly.
|
|
33
|
+
*/
|
|
34
|
+
|
|
35
|
+
#if defined(__ELF__)
|
|
36
|
+
.section .note.GNU-stack,"",@progbits
|
|
37
|
+
#endif
|
|
38
|
+
|
|
39
|
+
.text
|
|
40
|
+
.balign 4
|
|
41
|
+
.global MLK_ASM_NAMESPACE(poly_tobytes_asm)
|
|
42
|
+
MLK_ASM_FN_SYMBOL(poly_tobytes_asm)
|
|
43
|
+
|
|
44
|
+
.cfi_startproc
|
|
45
|
+
mov x2, #0x10 // =16
|
|
46
|
+
ldr q5, [x1, #0x10]
|
|
47
|
+
ldr q3, [x1], #0x20
|
|
48
|
+
ldr q29, [x1], #0x20
|
|
49
|
+
ldur q2, [x1, #-0x10]
|
|
50
|
+
ldr q27, [x1, #0x10]
|
|
51
|
+
ldr q23, [x1, #0x30]
|
|
52
|
+
ldr q17, [x1], #0x20
|
|
53
|
+
ldr q16, [x1], #0x20
|
|
54
|
+
uzp2 v26.8h, v3.8h, v5.8h
|
|
55
|
+
uzp1 v19.8h, v3.8h, v5.8h
|
|
56
|
+
uzp2 v0.8h, v29.8h, v2.8h
|
|
57
|
+
uzp1 v1.8h, v29.8h, v2.8h
|
|
58
|
+
xtn v5.8b, v26.8h
|
|
59
|
+
shrn v3.8b, v19.8h, #0x8
|
|
60
|
+
shrn v4.8b, v26.8h, #0x4
|
|
61
|
+
xtn v18.8b, v0.8h
|
|
62
|
+
shrn v30.8b, v0.8h, #0x4
|
|
63
|
+
xtn v28.8b, v1.8h
|
|
64
|
+
shrn v29.8b, v1.8h, #0x8
|
|
65
|
+
sli v3.8b, v5.8b, #0x4
|
|
66
|
+
xtn v2.8b, v19.8h
|
|
67
|
+
sli v29.8b, v18.8b, #0x4
|
|
68
|
+
lsr x2, x2, #1
|
|
69
|
+
sub x2, x2, #0x2
|
|
70
|
+
|
|
71
|
+
Lpoly_tobytes_loop_start:
|
|
72
|
+
uzp1 v25.8h, v17.8h, v27.8h
|
|
73
|
+
uzp2 v31.8h, v17.8h, v27.8h
|
|
74
|
+
uzp1 v24.8h, v16.8h, v23.8h
|
|
75
|
+
uzp2 v6.8h, v16.8h, v23.8h
|
|
76
|
+
st3 { v2.8b, v3.8b, v4.8b }, [x0], #24
|
|
77
|
+
shrn v3.8b, v25.8h, #0x8
|
|
78
|
+
ldr q17, [x1], #0x20
|
|
79
|
+
shrn v4.8b, v31.8h, #0x4
|
|
80
|
+
xtn v21.8b, v6.8h
|
|
81
|
+
ldr q23, [x1, #0x10]
|
|
82
|
+
st3 { v28.8b, v29.8b, v30.8b }, [x0], #24
|
|
83
|
+
shrn v29.8b, v24.8h, #0x8
|
|
84
|
+
ldur q27, [x1, #-0x10]
|
|
85
|
+
xtn v20.8b, v31.8h
|
|
86
|
+
ldr q16, [x1], #0x20
|
|
87
|
+
sli v29.8b, v21.8b, #0x4
|
|
88
|
+
xtn v2.8b, v25.8h
|
|
89
|
+
sli v3.8b, v20.8b, #0x4
|
|
90
|
+
xtn v28.8b, v24.8h
|
|
91
|
+
shrn v30.8b, v6.8h, #0x4
|
|
92
|
+
subs x2, x2, #0x1
|
|
93
|
+
cbnz x2, Lpoly_tobytes_loop_start
|
|
94
|
+
uzp2 v7.8h, v17.8h, v27.8h
|
|
95
|
+
uzp1 v25.8h, v17.8h, v27.8h
|
|
96
|
+
uzp2 v0.8h, v16.8h, v23.8h
|
|
97
|
+
st3 { v2.8b, v3.8b, v4.8b }, [x0], #24
|
|
98
|
+
st3 { v28.8b, v29.8b, v30.8b }, [x0], #24
|
|
99
|
+
shrn v21.8b, v25.8h, #0x8
|
|
100
|
+
uzp1 v2.8h, v16.8h, v23.8h
|
|
101
|
+
shrn v22.8b, v7.8h, #0x4
|
|
102
|
+
shrn v4.8b, v0.8h, #0x4
|
|
103
|
+
xtn v28.8b, v7.8h
|
|
104
|
+
xtn v27.8b, v0.8h
|
|
105
|
+
shrn v3.8b, v2.8h, #0x8
|
|
106
|
+
sli v21.8b, v28.8b, #0x4
|
|
107
|
+
xtn v2.8b, v2.8h
|
|
108
|
+
sli v3.8b, v27.8b, #0x4
|
|
109
|
+
xtn v20.8b, v25.8h
|
|
110
|
+
st3 { v20.8b, v21.8b, v22.8b }, [x0], #24
|
|
111
|
+
st3 { v2.8b, v3.8b, v4.8b }, [x0], #24
|
|
112
|
+
ret
|
|
113
|
+
.cfi_endproc
|
|
114
|
+
|
|
115
|
+
MLK_ASM_FN_SIZE(poly_tobytes_asm)
|
|
116
|
+
|
|
117
|
+
#endif /* MLK_ARITH_BACKEND_AARCH64 && !MLK_CONFIG_MULTILEVEL_NO_SHARED */
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) The mlkem-native project authors
|
|
3
|
+
* SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
/*yaml
|
|
7
|
+
Name: poly_tomont_asm
|
|
8
|
+
Description: Convert polynomial to Montgomery domain
|
|
9
|
+
Signature: void mlk_poly_tomont_asm(int16_t p[256])
|
|
10
|
+
ABI:
|
|
11
|
+
x0:
|
|
12
|
+
type: buffer
|
|
13
|
+
size_bytes: 512
|
|
14
|
+
permissions: read/write
|
|
15
|
+
c_parameter: int16_t p[256]
|
|
16
|
+
description: Input/output polynomial
|
|
17
|
+
Stack:
|
|
18
|
+
bytes: 0
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
#include "../../../common.h"
|
|
22
|
+
#if defined(MLK_ARITH_BACKEND_AARCH64) && !defined(MLK_CONFIG_MULTILEVEL_NO_SHARED)
|
|
23
|
+
|
|
24
|
+
/*
|
|
25
|
+
* WARNING: This file is auto-derived from the mlkem-native source file
|
|
26
|
+
* dev/aarch64_opt/src/poly_tomont_asm.S using scripts/simpasm. Do not modify it directly.
|
|
27
|
+
*/
|
|
28
|
+
|
|
29
|
+
#if defined(__ELF__)
|
|
30
|
+
.section .note.GNU-stack,"",@progbits
|
|
31
|
+
#endif
|
|
32
|
+
|
|
33
|
+
.text
|
|
34
|
+
.balign 4
|
|
35
|
+
.global MLK_ASM_NAMESPACE(poly_tomont_asm)
|
|
36
|
+
MLK_ASM_FN_SYMBOL(poly_tomont_asm)
|
|
37
|
+
|
|
38
|
+
.cfi_startproc
|
|
39
|
+
mov w2, #0xd01 // =3329
|
|
40
|
+
dup v4.8h, w2
|
|
41
|
+
mov w2, #0x4ebf // =20159
|
|
42
|
+
dup v5.8h, w2
|
|
43
|
+
mov w2, #-0x414 // =-1044
|
|
44
|
+
dup v2.8h, w2
|
|
45
|
+
mov w2, #-0x2824 // =-10276
|
|
46
|
+
dup v3.8h, w2
|
|
47
|
+
mov x1, #0x8 // =8
|
|
48
|
+
ldr q18, [x0, #0x20]
|
|
49
|
+
ldr q0, [x0, #0x10]
|
|
50
|
+
ldr q16, [x0], #0x40
|
|
51
|
+
sqrdmulh v23.8h, v0.8h, v3.8h
|
|
52
|
+
mul v26.8h, v0.8h, v2.8h
|
|
53
|
+
sqrdmulh v19.8h, v16.8h, v3.8h
|
|
54
|
+
mls v26.8h, v23.8h, v4.h[0]
|
|
55
|
+
mul v29.8h, v16.8h, v2.8h
|
|
56
|
+
ldur q16, [x0, #-0x10]
|
|
57
|
+
mls v29.8h, v19.8h, v4.h[0]
|
|
58
|
+
stur q26, [x0, #-0x30]
|
|
59
|
+
sqrdmulh v26.8h, v18.8h, v3.8h
|
|
60
|
+
mul v18.8h, v18.8h, v2.8h
|
|
61
|
+
stur q29, [x0, #-0x40]
|
|
62
|
+
sqrdmulh v29.8h, v16.8h, v3.8h
|
|
63
|
+
mls v18.8h, v26.8h, v4.h[0]
|
|
64
|
+
sub x1, x1, #0x1
|
|
65
|
+
|
|
66
|
+
Lpoly_tomont_loop:
|
|
67
|
+
ldr q19, [x0, #0x10]
|
|
68
|
+
mul v26.8h, v16.8h, v2.8h
|
|
69
|
+
ldr q23, [x0, #0x20]
|
|
70
|
+
ldr q17, [x0], #0x40
|
|
71
|
+
mls v26.8h, v29.8h, v4.h[0]
|
|
72
|
+
ldur q16, [x0, #-0x10]
|
|
73
|
+
sqrdmulh v28.8h, v19.8h, v3.8h
|
|
74
|
+
stur q18, [x0, #-0x60]
|
|
75
|
+
mul v0.8h, v19.8h, v2.8h
|
|
76
|
+
stur q26, [x0, #-0x50]
|
|
77
|
+
sqrdmulh v24.8h, v23.8h, v3.8h
|
|
78
|
+
mul v18.8h, v23.8h, v2.8h
|
|
79
|
+
sqrdmulh v22.8h, v17.8h, v3.8h
|
|
80
|
+
mul v26.8h, v17.8h, v2.8h
|
|
81
|
+
mls v0.8h, v28.8h, v4.h[0]
|
|
82
|
+
mls v26.8h, v22.8h, v4.h[0]
|
|
83
|
+
sqrdmulh v29.8h, v16.8h, v3.8h
|
|
84
|
+
stur q0, [x0, #-0x30]
|
|
85
|
+
mls v18.8h, v24.8h, v4.h[0]
|
|
86
|
+
stur q26, [x0, #-0x40]
|
|
87
|
+
sub x1, x1, #0x1
|
|
88
|
+
cbnz x1, Lpoly_tomont_loop
|
|
89
|
+
mul v16.8h, v16.8h, v2.8h
|
|
90
|
+
stur q18, [x0, #-0x20]
|
|
91
|
+
mls v16.8h, v29.8h, v4.h[0]
|
|
92
|
+
stur q16, [x0, #-0x10]
|
|
93
|
+
ret
|
|
94
|
+
.cfi_endproc
|
|
95
|
+
|
|
96
|
+
MLK_ASM_FN_SIZE(poly_tomont_asm)
|
|
97
|
+
|
|
98
|
+
#endif /* MLK_ARITH_BACKEND_AARCH64 && !MLK_CONFIG_MULTILEVEL_NO_SHARED */
|
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) The mlkem-native project authors
|
|
3
|
+
* SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
/* References
|
|
7
|
+
* ==========
|
|
8
|
+
*
|
|
9
|
+
* - [NeonNTT]
|
|
10
|
+
* Neon NTT: Faster Dilithium, Kyber, and Saber on Cortex-A72 and Apple M1
|
|
11
|
+
* Becker, Hwang, Kannwischer, Yang, Yang
|
|
12
|
+
* https://eprint.iacr.org/2021/986
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
/*yaml
|
|
16
|
+
Name: polyvec_basemul_acc_montgomery_cached_asm_k2
|
|
17
|
+
Description: Re-implementation of asymmetric base multiplication following @[NeonNTT] for k=2
|
|
18
|
+
Signature: void mlk_polyvec_basemul_acc_montgomery_cached_asm_k2(int16_t r[256], const int16_t a[512], const int16_t b[512], const int16_t b_cache[256])
|
|
19
|
+
ABI:
|
|
20
|
+
x0:
|
|
21
|
+
type: buffer
|
|
22
|
+
size_bytes: 512
|
|
23
|
+
permissions: write-only
|
|
24
|
+
c_parameter: int16_t r[256]
|
|
25
|
+
description: Output polynomial
|
|
26
|
+
x1:
|
|
27
|
+
type: buffer
|
|
28
|
+
size_bytes: 1024
|
|
29
|
+
permissions: read-only
|
|
30
|
+
c_parameter: const int16_t a[512]
|
|
31
|
+
description: Input polynomial vector a
|
|
32
|
+
x2:
|
|
33
|
+
type: buffer
|
|
34
|
+
size_bytes: 1024
|
|
35
|
+
permissions: read-only
|
|
36
|
+
c_parameter: const int16_t b[512]
|
|
37
|
+
description: Input polynomial vector b
|
|
38
|
+
x3:
|
|
39
|
+
type: buffer
|
|
40
|
+
size_bytes: 512
|
|
41
|
+
permissions: read-only
|
|
42
|
+
c_parameter: const int16_t b_cache[256]
|
|
43
|
+
description: Cached values for b
|
|
44
|
+
Stack:
|
|
45
|
+
bytes: 64
|
|
46
|
+
description: saving callee-saved Neon registers
|
|
47
|
+
*/
|
|
48
|
+
|
|
49
|
+
/* Re-implementation of asymmetric base multiplication following @[NeonNTT] */
|
|
50
|
+
|
|
51
|
+
#include "../../../common.h"
|
|
52
|
+
#if defined(MLK_ARITH_BACKEND_AARCH64) && !defined(MLK_CONFIG_MULTILEVEL_NO_SHARED) && (defined(MLK_CONFIG_MULTILEVEL_WITH_SHARED) || MLKEM_K == 2)
|
|
53
|
+
|
|
54
|
+
/*
|
|
55
|
+
* WARNING: This file is auto-derived from the mlkem-native source file
|
|
56
|
+
* dev/aarch64_opt/src/polyvec_basemul_acc_montgomery_cached_asm_k2.S using scripts/simpasm. Do not modify it directly.
|
|
57
|
+
*/
|
|
58
|
+
|
|
59
|
+
#if defined(__ELF__)
|
|
60
|
+
.section .note.GNU-stack,"",@progbits
|
|
61
|
+
#endif
|
|
62
|
+
|
|
63
|
+
.text
|
|
64
|
+
.balign 4
|
|
65
|
+
.global MLK_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k2)
|
|
66
|
+
MLK_ASM_FN_SYMBOL(polyvec_basemul_acc_montgomery_cached_asm_k2)
|
|
67
|
+
|
|
68
|
+
.cfi_startproc
|
|
69
|
+
sub sp, sp, #0x40
|
|
70
|
+
.cfi_adjust_cfa_offset 0x40
|
|
71
|
+
stp d8, d9, [sp]
|
|
72
|
+
.cfi_rel_offset d8, 0x0
|
|
73
|
+
.cfi_rel_offset d9, 0x8
|
|
74
|
+
stp d10, d11, [sp, #0x10]
|
|
75
|
+
.cfi_rel_offset d10, 0x10
|
|
76
|
+
.cfi_rel_offset d11, 0x18
|
|
77
|
+
stp d12, d13, [sp, #0x20]
|
|
78
|
+
.cfi_rel_offset d12, 0x20
|
|
79
|
+
.cfi_rel_offset d13, 0x28
|
|
80
|
+
stp d14, d15, [sp, #0x30]
|
|
81
|
+
.cfi_rel_offset d14, 0x30
|
|
82
|
+
.cfi_rel_offset d15, 0x38
|
|
83
|
+
mov w14, #0xd01 // =3329
|
|
84
|
+
dup v0.8h, w14
|
|
85
|
+
mov w14, #0xcff // =3327
|
|
86
|
+
dup v2.8h, w14
|
|
87
|
+
add x4, x1, #0x200
|
|
88
|
+
add x5, x2, #0x200
|
|
89
|
+
add x6, x3, #0x100
|
|
90
|
+
mov x13, #0x10 // =16
|
|
91
|
+
ldr q12, [x1], #0x20
|
|
92
|
+
ldur q9, [x1, #-0x10]
|
|
93
|
+
ldr q22, [x2], #0x20
|
|
94
|
+
ldur q30, [x2, #-0x10]
|
|
95
|
+
ldr q6, [x5], #0x20
|
|
96
|
+
ldr q7, [x4, #0x10]
|
|
97
|
+
ldr q8, [x4], #0x20
|
|
98
|
+
ldur q23, [x5, #-0x10]
|
|
99
|
+
uzp1 v16.8h, v12.8h, v9.8h
|
|
100
|
+
uzp2 v14.8h, v12.8h, v9.8h
|
|
101
|
+
uzp2 v13.8h, v22.8h, v30.8h
|
|
102
|
+
uzp1 v18.8h, v22.8h, v30.8h
|
|
103
|
+
ld1 { v27.8h }, [x3], #16
|
|
104
|
+
ld1 { v17.8h }, [x6], #16
|
|
105
|
+
smull2 v4.4s, v16.8h, v18.8h
|
|
106
|
+
ldr q31, [x1, #0x10]
|
|
107
|
+
smull v19.4s, v16.4h, v13.4h
|
|
108
|
+
ldr q24, [x1], #0x20
|
|
109
|
+
smlal v19.4s, v14.4h, v18.4h
|
|
110
|
+
ldr q22, [x2], #0x20
|
|
111
|
+
smlal2 v4.4s, v14.8h, v27.8h
|
|
112
|
+
uzp2 v5.8h, v6.8h, v23.8h
|
|
113
|
+
smull2 v29.4s, v16.8h, v13.8h
|
|
114
|
+
uzp2 v26.8h, v8.8h, v7.8h
|
|
115
|
+
smlal2 v29.4s, v14.8h, v18.8h
|
|
116
|
+
uzp1 v30.8h, v24.8h, v31.8h
|
|
117
|
+
uzp1 v8.8h, v8.8h, v7.8h
|
|
118
|
+
smull v11.4s, v16.4h, v18.4h
|
|
119
|
+
smlal v11.4s, v14.4h, v27.4h
|
|
120
|
+
ldur q1, [x2, #-0x10]
|
|
121
|
+
uzp1 v28.8h, v6.8h, v23.8h
|
|
122
|
+
smlal2 v29.4s, v8.8h, v5.8h
|
|
123
|
+
ldr q25, [x5], #0x20
|
|
124
|
+
smlal v19.4s, v8.4h, v5.4h
|
|
125
|
+
ldr q3, [x4, #0x10]
|
|
126
|
+
smlal2 v29.4s, v26.8h, v28.8h
|
|
127
|
+
uzp1 v27.8h, v22.8h, v1.8h
|
|
128
|
+
smlal v19.4s, v26.4h, v28.4h
|
|
129
|
+
ldr q12, [x4], #0x20
|
|
130
|
+
smlal2 v4.4s, v8.8h, v28.8h
|
|
131
|
+
ldur q21, [x5, #-0x10]
|
|
132
|
+
smlal2 v4.4s, v26.8h, v17.8h
|
|
133
|
+
smlal v11.4s, v8.4h, v28.4h
|
|
134
|
+
ld1 { v15.8h }, [x6], #16
|
|
135
|
+
smlal v11.4s, v26.4h, v17.4h
|
|
136
|
+
ld1 { v20.8h }, [x3], #16
|
|
137
|
+
uzp1 v28.8h, v19.8h, v29.8h
|
|
138
|
+
smull2 v23.4s, v30.8h, v27.8h
|
|
139
|
+
smull v26.4s, v30.4h, v27.4h
|
|
140
|
+
uzp2 v16.8h, v22.8h, v1.8h
|
|
141
|
+
mul v28.8h, v28.8h, v2.8h
|
|
142
|
+
uzp1 v10.8h, v11.8h, v4.8h
|
|
143
|
+
smull2 v8.4s, v30.8h, v16.8h
|
|
144
|
+
mul v13.8h, v10.8h, v2.8h
|
|
145
|
+
smlal v19.4s, v28.4h, v0.4h
|
|
146
|
+
smlal2 v29.4s, v28.8h, v0.8h
|
|
147
|
+
smull v18.4s, v30.4h, v16.4h
|
|
148
|
+
uzp1 v30.8h, v25.8h, v21.8h
|
|
149
|
+
smlal v11.4s, v13.4h, v0.4h
|
|
150
|
+
uzp2 v6.8h, v24.8h, v31.8h
|
|
151
|
+
uzp1 v16.8h, v12.8h, v3.8h
|
|
152
|
+
smlal2 v4.4s, v13.8h, v0.8h
|
|
153
|
+
uzp2 v17.8h, v25.8h, v21.8h
|
|
154
|
+
smlal2 v8.4s, v6.8h, v27.8h
|
|
155
|
+
uzp2 v12.8h, v12.8h, v3.8h
|
|
156
|
+
smlal v18.4s, v6.4h, v27.4h
|
|
157
|
+
uzp2 v9.8h, v19.8h, v29.8h
|
|
158
|
+
smlal2 v8.4s, v16.8h, v17.8h
|
|
159
|
+
smlal2 v8.4s, v12.8h, v30.8h
|
|
160
|
+
uzp2 v19.8h, v11.8h, v4.8h
|
|
161
|
+
sub x13, x13, #0x2
|
|
162
|
+
|
|
163
|
+
Lpolyvec_basemul_acc_montgomery_cached_k2_loop_start:
|
|
164
|
+
smlal v18.4s, v16.4h, v17.4h
|
|
165
|
+
ldr q7, [x4], #0x20
|
|
166
|
+
ldr q10, [x2, #0x10]
|
|
167
|
+
smlal v18.4s, v12.4h, v30.4h
|
|
168
|
+
smlal2 v23.4s, v6.8h, v20.8h
|
|
169
|
+
ldr q14, [x2], #0x20
|
|
170
|
+
smlal2 v23.4s, v16.8h, v30.8h
|
|
171
|
+
zip1 v25.8h, v19.8h, v9.8h
|
|
172
|
+
zip2 v3.8h, v19.8h, v9.8h
|
|
173
|
+
smlal2 v23.4s, v12.8h, v15.8h
|
|
174
|
+
smlal v26.4s, v6.4h, v20.4h
|
|
175
|
+
uzp1 v5.8h, v18.8h, v8.8h
|
|
176
|
+
uzp2 v21.8h, v14.8h, v10.8h
|
|
177
|
+
smlal v26.4s, v16.4h, v30.4h
|
|
178
|
+
str q25, [x0], #0x20
|
|
179
|
+
mul v29.8h, v5.8h, v2.8h
|
|
180
|
+
uzp1 v24.8h, v14.8h, v10.8h
|
|
181
|
+
stur q3, [x0, #-0x10]
|
|
182
|
+
smlal v26.4s, v12.4h, v15.4h
|
|
183
|
+
ld1 { v15.8h }, [x6], #16
|
|
184
|
+
ldr q28, [x1, #0x10]
|
|
185
|
+
ldr q11, [x1], #0x20
|
|
186
|
+
ldr q13, [x5], #0x20
|
|
187
|
+
ldur q27, [x4, #-0x10]
|
|
188
|
+
smlal2 v8.4s, v29.8h, v0.8h
|
|
189
|
+
ldur q22, [x5, #-0x10]
|
|
190
|
+
smlal v18.4s, v29.4h, v0.4h
|
|
191
|
+
uzp1 v4.8h, v26.8h, v23.8h
|
|
192
|
+
uzp1 v1.8h, v11.8h, v28.8h
|
|
193
|
+
uzp2 v6.8h, v11.8h, v28.8h
|
|
194
|
+
uzp1 v16.8h, v7.8h, v27.8h
|
|
195
|
+
mul v31.8h, v4.8h, v2.8h
|
|
196
|
+
uzp2 v17.8h, v13.8h, v22.8h
|
|
197
|
+
ld1 { v20.8h }, [x3], #16
|
|
198
|
+
uzp2 v9.8h, v18.8h, v8.8h
|
|
199
|
+
smull2 v8.4s, v1.8h, v21.8h
|
|
200
|
+
uzp1 v30.8h, v13.8h, v22.8h
|
|
201
|
+
smlal2 v8.4s, v6.8h, v24.8h
|
|
202
|
+
smlal2 v8.4s, v16.8h, v17.8h
|
|
203
|
+
uzp2 v12.8h, v7.8h, v27.8h
|
|
204
|
+
smlal v26.4s, v31.4h, v0.4h
|
|
205
|
+
smlal2 v23.4s, v31.8h, v0.8h
|
|
206
|
+
smull v18.4s, v1.4h, v21.4h
|
|
207
|
+
smlal v18.4s, v6.4h, v24.4h
|
|
208
|
+
smlal2 v8.4s, v12.8h, v30.8h
|
|
209
|
+
uzp2 v19.8h, v26.8h, v23.8h
|
|
210
|
+
smull2 v23.4s, v1.8h, v24.8h
|
|
211
|
+
smull v26.4s, v1.4h, v24.4h
|
|
212
|
+
subs x13, x13, #0x1
|
|
213
|
+
cbnz x13, Lpolyvec_basemul_acc_montgomery_cached_k2_loop_start
|
|
214
|
+
smlal v26.4s, v6.4h, v20.4h
|
|
215
|
+
smlal2 v23.4s, v6.8h, v20.8h
|
|
216
|
+
smlal v26.4s, v16.4h, v30.4h
|
|
217
|
+
smlal2 v23.4s, v16.8h, v30.8h
|
|
218
|
+
smlal v26.4s, v12.4h, v15.4h
|
|
219
|
+
smlal2 v23.4s, v12.8h, v15.8h
|
|
220
|
+
smlal v18.4s, v16.4h, v17.4h
|
|
221
|
+
smlal v18.4s, v12.4h, v30.4h
|
|
222
|
+
zip1 v12.8h, v19.8h, v9.8h
|
|
223
|
+
str q12, [x0], #0x20
|
|
224
|
+
uzp1 v12.8h, v26.8h, v23.8h
|
|
225
|
+
mul v6.8h, v12.8h, v2.8h
|
|
226
|
+
uzp1 v12.8h, v18.8h, v8.8h
|
|
227
|
+
mul v12.8h, v12.8h, v2.8h
|
|
228
|
+
smlal v26.4s, v6.4h, v0.4h
|
|
229
|
+
smlal2 v23.4s, v6.8h, v0.8h
|
|
230
|
+
smlal2 v8.4s, v12.8h, v0.8h
|
|
231
|
+
smlal v18.4s, v12.4h, v0.4h
|
|
232
|
+
zip2 v12.8h, v19.8h, v9.8h
|
|
233
|
+
uzp2 v6.8h, v26.8h, v23.8h
|
|
234
|
+
stur q12, [x0, #-0x10]
|
|
235
|
+
uzp2 v12.8h, v18.8h, v8.8h
|
|
236
|
+
zip2 v1.8h, v6.8h, v12.8h
|
|
237
|
+
zip1 v12.8h, v6.8h, v12.8h
|
|
238
|
+
str q1, [x0, #0x10]
|
|
239
|
+
str q12, [x0], #0x20
|
|
240
|
+
ldp d8, d9, [sp]
|
|
241
|
+
.cfi_restore d8
|
|
242
|
+
.cfi_restore d9
|
|
243
|
+
ldp d10, d11, [sp, #0x10]
|
|
244
|
+
.cfi_restore d10
|
|
245
|
+
.cfi_restore d11
|
|
246
|
+
ldp d12, d13, [sp, #0x20]
|
|
247
|
+
.cfi_restore d12
|
|
248
|
+
.cfi_restore d13
|
|
249
|
+
ldp d14, d15, [sp, #0x30]
|
|
250
|
+
.cfi_restore d14
|
|
251
|
+
.cfi_restore d15
|
|
252
|
+
add sp, sp, #0x40
|
|
253
|
+
.cfi_adjust_cfa_offset -0x40
|
|
254
|
+
ret
|
|
255
|
+
.cfi_endproc
|
|
256
|
+
|
|
257
|
+
MLK_ASM_FN_SIZE(polyvec_basemul_acc_montgomery_cached_asm_k2)
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
#endif /* MLK_ARITH_BACKEND_AARCH64 && !MLK_CONFIG_MULTILEVEL_NO_SHARED && \
|
|
261
|
+
(MLK_CONFIG_MULTILEVEL_WITH_SHARED || MLKEM_K == 2) */
|