pq_crypto 0.3.2 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +56 -0
- data/CHANGELOG.md +62 -0
- data/GET_STARTED.md +366 -40
- data/README.md +76 -233
- data/SECURITY.md +107 -82
- data/ext/pqcrypto/extconf.rb +169 -87
- data/ext/pqcrypto/mldsa_api.h +1 -48
- data/ext/pqcrypto/mlkem_api.h +1 -18
- data/ext/pqcrypto/pq_externalmu.c +89 -204
- data/ext/pqcrypto/pqcrypto_native_api.h +129 -0
- data/ext/pqcrypto/pqcrypto_ruby_secure.c +484 -84
- data/ext/pqcrypto/pqcrypto_secure.c +203 -78
- data/ext/pqcrypto/pqcrypto_secure.h +53 -14
- data/ext/pqcrypto/pqcrypto_version.h +7 -0
- data/ext/pqcrypto/randombytes.h +9 -0
- data/ext/pqcrypto/vendor/.vendored +10 -5
- data/ext/pqcrypto/vendor/mldsa-native/BUILDING.md +105 -0
- data/ext/pqcrypto/vendor/mldsa-native/LICENSE +286 -0
- data/ext/pqcrypto/vendor/mldsa-native/META.yml +24 -0
- data/ext/pqcrypto/vendor/mldsa-native/README.md +221 -0
- data/ext/pqcrypto/vendor/mldsa-native/SECURITY.md +8 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.c +721 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.h +975 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_asm.S +724 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_config.h +723 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/cbmc.h +166 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/common.h +321 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/ct.c +21 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/ct.h +385 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/debug.c +73 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/debug.h +130 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.c +277 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.h +244 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.c +182 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.h +117 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.c +438 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.h +105 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/auto.h +71 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/fips202_native_aarch64.h +62 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_asm.S +376 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_asm.S +204 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_asm.S +259 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_asm.S +1077 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S +987 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccakf1600_round_constants.c +41 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_scalar.h +26 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_v84a.h +35 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x2_v84a.h +37 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_scalar.h +27 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_v84a_scalar.h +36 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/api.h +69 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/README.md +10 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/mve.h +32 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/fips202_native_armv81m.h +20 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S +638 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.c +136 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccakf1600_round_constants.c +52 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/auto.h +29 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.c +488 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.h +16 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/xkcp.h +31 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/meta.h +247 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/aarch64_zetas.c +231 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/arith_native_aarch64.h +150 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/intt.S +753 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l4.S +129 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l5.S +145 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l7.S +177 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/ntt.S +653 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/pointwise_montgomery.S +79 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_caddq_asm.S +53 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_chknorm_asm.S +55 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_32_asm.S +85 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_88_asm.S +85 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_32_asm.S +102 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_88_asm.S +110 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_17_asm.S +72 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_19_asm.S +69 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_table.c +40 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_asm.S +189 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta2_asm.S +135 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta4_asm.S +128 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta_table.c +543 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_table.c +62 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/api.h +649 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/meta.h +23 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/meta.h +315 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/arith_native_x86_64.h +124 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.c +157 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.h +27 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/intt.S +2311 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/ntt.S +2383 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/nttunpack.S +239 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise.S +131 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l4.S +139 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l5.S +155 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l7.S +187 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_caddq_avx2.c +61 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_chknorm_avx2.c +52 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_32_avx2.c +155 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_88_avx2.c +155 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_32_avx2.c +102 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_88_avx2.c +104 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_17_avx2.c +91 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_19_avx2.c +93 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_avx2.c +126 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta2_avx2.c +155 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta4_avx2.c +139 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_table.c +160 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.c +293 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.h +224 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/params.h +77 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.c +991 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.h +393 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.c +946 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.h +360 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.c +877 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.h +725 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/randombytes.h +26 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/reduce.h +139 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/rounding.h +249 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.c +1511 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.h +806 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/symmetric.h +68 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sys.h +268 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/zetas.inc +55 -0
- data/ext/pqcrypto/vendor/mlkem-native/BUILDING.md +104 -0
- data/ext/pqcrypto/vendor/mlkem-native/LICENSE +294 -0
- data/ext/pqcrypto/vendor/mlkem-native/META.yml +30 -0
- data/ext/pqcrypto/vendor/mlkem-native/README.md +223 -0
- data/ext/pqcrypto/vendor/mlkem-native/RELEASE.md +86 -0
- data/ext/pqcrypto/vendor/mlkem-native/SECURITY.md +8 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/README.md +23 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native.c +660 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native.h +538 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native_asm.S +681 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native_config.h +709 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/cbmc.h +174 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/common.h +274 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/compress.c +717 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/compress.h +688 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/debug.c +64 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/debug.h +128 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202.c +251 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202.h +158 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202x4.c +208 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202x4.h +80 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/keccakf1600.c +463 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/keccakf1600.h +98 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/auto.h +70 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/fips202_native_aarch64.h +69 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_asm.S +375 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_asm.S +203 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_asm.S +258 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_asm.S +1076 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S +986 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccakf1600_round_constants.c +46 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x1_scalar.h +25 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x1_v84a.h +34 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x2_v84a.h +35 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x4_v8a_scalar.h +26 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x4_v8a_v84a_scalar.h +35 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/api.h +117 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/README.md +10 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/mve.h +79 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/fips202_native_armv81m.h +35 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S +667 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.c +40 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/keccakf1600_round_constants.c +51 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/state_extract_bytes_x4_mve.S +290 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/state_xor_bytes_x4_mve.S +314 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/auto.h +28 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/keccak_f1600_x4_avx2.h +33 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/src/fips202_native_x86_64.h +41 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/src/keccak_f1600_x4_avx2.S +451 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/src/keccakf1600_constants.c +51 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/indcpa.c +622 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/indcpa.h +156 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/kem.c +446 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/kem.h +326 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/README.md +16 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/meta.h +122 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/aarch64_zetas.c +174 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/arith_native_aarch64.h +177 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/intt.S +628 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/ntt.S +562 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_mulcache_compute_asm.S +127 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_reduce_asm.S +150 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_tobytes_asm.S +117 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_tomont_asm.S +98 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k2.S +261 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k3.S +314 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k4.S +368 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/rej_uniform_asm.S +226 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/rej_uniform_table.c +542 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/api.h +637 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/meta.h +25 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/README.md +11 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/meta.h +128 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/arith_native_riscv64.h +45 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_debug.c +81 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_debug.h +145 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_izetas.inc +27 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_poly.c +805 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_zetas.inc +27 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_zetas_basemul.inc +39 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/README.md +4 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/meta.h +304 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/arith_native_x86_64.h +309 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/compress_consts.c +94 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/compress_consts.h +45 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/consts.c +102 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/consts.h +25 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/intt.S +719 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/mulcache_compute.S +90 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/ntt.S +639 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/nttfrombytes.S +193 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/ntttobytes.S +181 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/nttunpack.S +174 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d10.S +382 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d11.S +448 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d4.S +163 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d5.S +220 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d10.S +228 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d11.S +277 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d4.S +180 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d5.S +192 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/polyvec_basemul_acc_montgomery_cached_asm_k2.S +502 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/polyvec_basemul_acc_montgomery_cached_asm_k3.S +750 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/polyvec_basemul_acc_montgomery_cached_asm_k4.S +998 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/reduce.S +218 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/rej_uniform_asm.S +103 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/rej_uniform_table.c +544 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/tomont.S +155 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/params.h +76 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly.c +572 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly.h +317 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly_k.c +502 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly_k.h +668 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/randombytes.h +60 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/sampling.c +362 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/sampling.h +118 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/symmetric.h +70 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/sys.h +260 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/verify.c +20 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/verify.h +464 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/zetas.inc +30 -0
- data/lib/pq_crypto/algorithm_registry.rb +200 -0
- data/lib/pq_crypto/hybrid_kem.rb +1 -12
- data/lib/pq_crypto/kem.rb +104 -13
- data/lib/pq_crypto/pkcs8.rb +387 -0
- data/lib/pq_crypto/serialization.rb +1 -14
- data/lib/pq_crypto/signature.rb +123 -17
- data/lib/pq_crypto/spki.rb +131 -0
- data/lib/pq_crypto/version.rb +1 -1
- data/lib/pq_crypto.rb +79 -20
- data/script/vendor_libs.rb +88 -155
- metadata +241 -73
- data/ext/pqcrypto/vendor/pqclean/common/aes.c +0 -639
- data/ext/pqcrypto/vendor/pqclean/common/aes.h +0 -64
- data/ext/pqcrypto/vendor/pqclean/common/compat.h +0 -73
- data/ext/pqcrypto/vendor/pqclean/common/crypto_declassify.h +0 -7
- data/ext/pqcrypto/vendor/pqclean/common/fips202.c +0 -928
- data/ext/pqcrypto/vendor/pqclean/common/fips202.h +0 -166
- data/ext/pqcrypto/vendor/pqclean/common/keccak2x/feat.S +0 -168
- data/ext/pqcrypto/vendor/pqclean/common/keccak2x/fips202x2.c +0 -684
- data/ext/pqcrypto/vendor/pqclean/common/keccak2x/fips202x2.h +0 -60
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/KeccakP-1600-times4-SIMD256.c +0 -1028
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/KeccakP-1600-times4-SnP.h +0 -50
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/KeccakP-1600-unrolling.macros +0 -198
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/Makefile +0 -8
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/Makefile.Microsoft_nmake +0 -8
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/SIMD256-config.h +0 -3
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/align.h +0 -34
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/brg_endian.h +0 -142
- data/ext/pqcrypto/vendor/pqclean/common/nistseedexpander.c +0 -101
- data/ext/pqcrypto/vendor/pqclean/common/nistseedexpander.h +0 -39
- data/ext/pqcrypto/vendor/pqclean/common/randombytes.c +0 -355
- data/ext/pqcrypto/vendor/pqclean/common/randombytes.h +0 -27
- data/ext/pqcrypto/vendor/pqclean/common/sha2.c +0 -769
- data/ext/pqcrypto/vendor/pqclean/common/sha2.h +0 -173
- data/ext/pqcrypto/vendor/pqclean/common/sp800-185.c +0 -156
- data/ext/pqcrypto/vendor/pqclean/common/sp800-185.h +0 -27
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/LICENSE +0 -5
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/Makefile +0 -19
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/Makefile.Microsoft_nmake +0 -23
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/api.h +0 -18
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/cbd.c +0 -83
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/cbd.h +0 -11
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/indcpa.c +0 -327
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/indcpa.h +0 -22
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/kem.c +0 -164
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/kem.h +0 -23
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/ntt.c +0 -146
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/ntt.h +0 -14
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/params.h +0 -36
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/poly.c +0 -299
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/poly.h +0 -37
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/polyvec.c +0 -188
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/polyvec.h +0 -26
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/reduce.c +0 -41
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/reduce.h +0 -13
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/symmetric-shake.c +0 -71
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/symmetric.h +0 -30
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/verify.c +0 -67
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/verify.h +0 -13
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/LICENSE +0 -5
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/Makefile +0 -19
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/Makefile.Microsoft_nmake +0 -23
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/api.h +0 -50
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/ntt.c +0 -98
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/ntt.h +0 -10
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/packing.c +0 -261
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/packing.h +0 -31
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/params.h +0 -44
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/poly.c +0 -799
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/poly.h +0 -52
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/polyvec.c +0 -415
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/polyvec.h +0 -65
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/reduce.c +0 -69
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/reduce.h +0 -17
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/rounding.c +0 -92
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/rounding.h +0 -14
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/sign.c +0 -407
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/sign.h +0 -47
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/symmetric-shake.c +0 -26
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/symmetric.h +0 -34
|
@@ -0,0 +1,314 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) The mlkem-native project authors
|
|
3
|
+
* SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
/* References
|
|
7
|
+
* ==========
|
|
8
|
+
*
|
|
9
|
+
* - [NeonNTT]
|
|
10
|
+
* Neon NTT: Faster Dilithium, Kyber, and Saber on Cortex-A72 and Apple M1
|
|
11
|
+
* Becker, Hwang, Kannwischer, Yang, Yang
|
|
12
|
+
* https://eprint.iacr.org/2021/986
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
/*yaml
|
|
16
|
+
Name: polyvec_basemul_acc_montgomery_cached_asm_k3
|
|
17
|
+
Description: Re-implementation of asymmetric base multiplication following @[NeonNTT] for k=3
|
|
18
|
+
Signature: void mlk_polyvec_basemul_acc_montgomery_cached_asm_k3(int16_t r[256], const int16_t a[768], const int16_t b[768], const int16_t b_cache[384])
|
|
19
|
+
ABI:
|
|
20
|
+
x0:
|
|
21
|
+
type: buffer
|
|
22
|
+
size_bytes: 512
|
|
23
|
+
permissions: write-only
|
|
24
|
+
c_parameter: int16_t r[256]
|
|
25
|
+
description: Output polynomial
|
|
26
|
+
x1:
|
|
27
|
+
type: buffer
|
|
28
|
+
size_bytes: 1536
|
|
29
|
+
permissions: read-only
|
|
30
|
+
c_parameter: const int16_t a[768]
|
|
31
|
+
description: Input polynomial vector a
|
|
32
|
+
x2:
|
|
33
|
+
type: buffer
|
|
34
|
+
size_bytes: 1536
|
|
35
|
+
permissions: read-only
|
|
36
|
+
c_parameter: const int16_t b[768]
|
|
37
|
+
description: Input polynomial vector b
|
|
38
|
+
x3:
|
|
39
|
+
type: buffer
|
|
40
|
+
size_bytes: 768
|
|
41
|
+
permissions: read-only
|
|
42
|
+
c_parameter: const int16_t b_cache[384]
|
|
43
|
+
description: Cached values for b
|
|
44
|
+
Stack:
|
|
45
|
+
bytes: 64
|
|
46
|
+
description: saving callee-saved Neon registers
|
|
47
|
+
*/
|
|
48
|
+
|
|
49
|
+
/* Re-implementation of asymmetric base multiplication following @[NeonNTT] */
|
|
50
|
+
|
|
51
|
+
#include "../../../common.h"
|
|
52
|
+
#if defined(MLK_ARITH_BACKEND_AARCH64) && !defined(MLK_CONFIG_MULTILEVEL_NO_SHARED) && (defined(MLK_CONFIG_MULTILEVEL_WITH_SHARED) || MLKEM_K == 3)
|
|
53
|
+
|
|
54
|
+
/*
|
|
55
|
+
* WARNING: This file is auto-derived from the mlkem-native source file
|
|
56
|
+
* dev/aarch64_opt/src/polyvec_basemul_acc_montgomery_cached_asm_k3.S using scripts/simpasm. Do not modify it directly.
|
|
57
|
+
*/
|
|
58
|
+
|
|
59
|
+
#if defined(__ELF__)
|
|
60
|
+
.section .note.GNU-stack,"",@progbits
|
|
61
|
+
#endif
|
|
62
|
+
|
|
63
|
+
.text
|
|
64
|
+
.balign 4
|
|
65
|
+
.global MLK_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k3)
|
|
66
|
+
MLK_ASM_FN_SYMBOL(polyvec_basemul_acc_montgomery_cached_asm_k3)
|
|
67
|
+
|
|
68
|
+
.cfi_startproc
|
|
69
|
+
sub sp, sp, #0x40
|
|
70
|
+
.cfi_adjust_cfa_offset 0x40
|
|
71
|
+
stp d8, d9, [sp]
|
|
72
|
+
.cfi_rel_offset d8, 0x0
|
|
73
|
+
.cfi_rel_offset d9, 0x8
|
|
74
|
+
stp d10, d11, [sp, #0x10]
|
|
75
|
+
.cfi_rel_offset d10, 0x10
|
|
76
|
+
.cfi_rel_offset d11, 0x18
|
|
77
|
+
stp d12, d13, [sp, #0x20]
|
|
78
|
+
.cfi_rel_offset d12, 0x20
|
|
79
|
+
.cfi_rel_offset d13, 0x28
|
|
80
|
+
stp d14, d15, [sp, #0x30]
|
|
81
|
+
.cfi_rel_offset d14, 0x30
|
|
82
|
+
.cfi_rel_offset d15, 0x38
|
|
83
|
+
mov w14, #0xd01 // =3329
|
|
84
|
+
dup v0.8h, w14
|
|
85
|
+
mov w14, #0xcff // =3327
|
|
86
|
+
dup v2.8h, w14
|
|
87
|
+
add x4, x1, #0x200
|
|
88
|
+
add x5, x2, #0x200
|
|
89
|
+
add x6, x3, #0x100
|
|
90
|
+
add x7, x1, #0x400
|
|
91
|
+
add x8, x2, #0x400
|
|
92
|
+
add x9, x3, #0x200
|
|
93
|
+
mov x13, #0x10 // =16
|
|
94
|
+
ldr q6, [x7], #0x20
|
|
95
|
+
ldr q19, [x2, #0x10]
|
|
96
|
+
ldr q23, [x1], #0x20
|
|
97
|
+
ldur q14, [x1, #-0x10]
|
|
98
|
+
ldr q17, [x2], #0x20
|
|
99
|
+
ldr q11, [x4, #0x10]
|
|
100
|
+
ldur q28, [x7, #-0x10]
|
|
101
|
+
ld1 { v30.8h }, [x3], #16
|
|
102
|
+
ldr q26, [x4], #0x20
|
|
103
|
+
ldr q16, [x8, #0x10]
|
|
104
|
+
uzp1 v8.8h, v23.8h, v14.8h
|
|
105
|
+
ldr q22, [x5, #0x10]
|
|
106
|
+
ldr q18, [x5], #0x20
|
|
107
|
+
uzp1 v20.8h, v17.8h, v19.8h
|
|
108
|
+
uzp2 v24.8h, v23.8h, v14.8h
|
|
109
|
+
ldr q31, [x8], #0x20
|
|
110
|
+
smull2 v4.4s, v8.8h, v20.8h
|
|
111
|
+
uzp1 v25.8h, v26.8h, v11.8h
|
|
112
|
+
smull v13.4s, v8.4h, v20.4h
|
|
113
|
+
ld1 { v23.8h }, [x6], #16
|
|
114
|
+
uzp1 v1.8h, v18.8h, v22.8h
|
|
115
|
+
smlal v13.4s, v24.4h, v30.4h
|
|
116
|
+
smlal2 v4.4s, v24.8h, v30.8h
|
|
117
|
+
uzp2 v5.8h, v26.8h, v11.8h
|
|
118
|
+
smlal2 v4.4s, v25.8h, v1.8h
|
|
119
|
+
uzp1 v29.8h, v6.8h, v28.8h
|
|
120
|
+
smlal2 v4.4s, v5.8h, v23.8h
|
|
121
|
+
ld1 { v7.8h }, [x9], #16
|
|
122
|
+
smlal v13.4s, v25.4h, v1.4h
|
|
123
|
+
uzp2 v17.8h, v17.8h, v19.8h
|
|
124
|
+
uzp1 v27.8h, v31.8h, v16.8h
|
|
125
|
+
smlal v13.4s, v5.4h, v23.4h
|
|
126
|
+
uzp2 v22.8h, v18.8h, v22.8h
|
|
127
|
+
smull v18.4s, v8.4h, v17.4h
|
|
128
|
+
uzp2 v28.8h, v6.8h, v28.8h
|
|
129
|
+
smlal v13.4s, v29.4h, v27.4h
|
|
130
|
+
smlal2 v4.4s, v29.8h, v27.8h
|
|
131
|
+
uzp2 v26.8h, v31.8h, v16.8h
|
|
132
|
+
smlal2 v4.4s, v28.8h, v7.8h
|
|
133
|
+
ldr q3, [x7, #0x10]
|
|
134
|
+
smlal v13.4s, v28.4h, v7.4h
|
|
135
|
+
ldr q7, [x1], #0x20
|
|
136
|
+
smlal v18.4s, v24.4h, v20.4h
|
|
137
|
+
ldr q15, [x2], #0x20
|
|
138
|
+
smlal v18.4s, v25.4h, v22.4h
|
|
139
|
+
smull2 v8.4s, v8.8h, v17.8h
|
|
140
|
+
ldur q17, [x1, #-0x10]
|
|
141
|
+
uzp1 v23.8h, v13.8h, v4.8h
|
|
142
|
+
smlal v18.4s, v5.4h, v1.4h
|
|
143
|
+
smlal2 v8.4s, v24.8h, v20.8h
|
|
144
|
+
ld1 { v16.8h }, [x3], #16
|
|
145
|
+
mul v23.8h, v23.8h, v2.8h
|
|
146
|
+
ldr q19, [x5, #0x10]
|
|
147
|
+
ldr q14, [x4, #0x10]
|
|
148
|
+
ldr q11, [x4], #0x20
|
|
149
|
+
ldur q20, [x2, #-0x10]
|
|
150
|
+
smlal2 v8.4s, v25.8h, v22.8h
|
|
151
|
+
smlal2 v8.4s, v5.8h, v1.8h
|
|
152
|
+
ldr q22, [x5], #0x20
|
|
153
|
+
uzp1 v1.8h, v7.8h, v17.8h
|
|
154
|
+
smlal v18.4s, v29.4h, v26.4h
|
|
155
|
+
smlal v13.4s, v23.4h, v0.4h
|
|
156
|
+
uzp2 v31.8h, v11.8h, v14.8h
|
|
157
|
+
uzp1 v21.8h, v15.8h, v20.8h
|
|
158
|
+
smlal2 v4.4s, v23.8h, v0.8h
|
|
159
|
+
ld1 { v9.8h }, [x6], #16
|
|
160
|
+
smlal v18.4s, v28.4h, v27.4h
|
|
161
|
+
smlal2 v8.4s, v29.8h, v26.8h
|
|
162
|
+
ldr q25, [x7], #0x20
|
|
163
|
+
smull v26.4s, v1.4h, v21.4h
|
|
164
|
+
uzp1 v24.8h, v22.8h, v19.8h
|
|
165
|
+
smlal2 v8.4s, v28.8h, v27.8h
|
|
166
|
+
uzp2 v28.8h, v7.8h, v17.8h
|
|
167
|
+
uzp1 v29.8h, v11.8h, v14.8h
|
|
168
|
+
smull2 v23.4s, v1.8h, v21.8h
|
|
169
|
+
ldr q27, [x8], #0x20
|
|
170
|
+
smlal2 v23.4s, v28.8h, v16.8h
|
|
171
|
+
ldur q11, [x8, #-0x10]
|
|
172
|
+
smlal2 v23.4s, v29.8h, v24.8h
|
|
173
|
+
uzp2 v7.8h, v13.8h, v4.8h
|
|
174
|
+
uzp2 v19.8h, v22.8h, v19.8h
|
|
175
|
+
ld1 { v4.8h }, [x9], #16
|
|
176
|
+
smlal2 v23.4s, v31.8h, v9.8h
|
|
177
|
+
uzp1 v13.8h, v25.8h, v3.8h
|
|
178
|
+
uzp1 v14.8h, v18.8h, v8.8h
|
|
179
|
+
smlal v26.4s, v28.4h, v16.4h
|
|
180
|
+
uzp2 v17.8h, v27.8h, v11.8h
|
|
181
|
+
uzp2 v20.8h, v15.8h, v20.8h
|
|
182
|
+
mul v14.8h, v14.8h, v2.8h
|
|
183
|
+
sub x13, x13, #0x2
|
|
184
|
+
|
|
185
|
+
Lpolyvec_basemul_acc_montgomery_cached_k3_loop_start:
|
|
186
|
+
uzp1 v6.8h, v27.8h, v11.8h
|
|
187
|
+
smlal v26.4s, v29.4h, v24.4h
|
|
188
|
+
uzp2 v16.8h, v25.8h, v3.8h
|
|
189
|
+
smlal v26.4s, v31.4h, v9.4h
|
|
190
|
+
ldr q3, [x7, #0x10]
|
|
191
|
+
smlal v26.4s, v13.4h, v6.4h
|
|
192
|
+
smlal2 v8.4s, v14.8h, v0.8h
|
|
193
|
+
ldr q27, [x8], #0x20
|
|
194
|
+
smlal v18.4s, v14.4h, v0.4h
|
|
195
|
+
ldr q25, [x7], #0x20
|
|
196
|
+
smlal2 v23.4s, v13.8h, v6.8h
|
|
197
|
+
ldr q11, [x1], #0x20
|
|
198
|
+
smlal2 v23.4s, v16.8h, v4.8h
|
|
199
|
+
smlal v26.4s, v16.4h, v4.4h
|
|
200
|
+
ldur q22, [x1, #-0x10]
|
|
201
|
+
uzp2 v30.8h, v18.8h, v8.8h
|
|
202
|
+
smull v18.4s, v1.4h, v20.4h
|
|
203
|
+
smlal v18.4s, v28.4h, v21.4h
|
|
204
|
+
ldr q14, [x2], #0x20
|
|
205
|
+
smlal v18.4s, v29.4h, v19.4h
|
|
206
|
+
zip1 v5.8h, v7.8h, v30.8h
|
|
207
|
+
uzp1 v4.8h, v26.8h, v23.8h
|
|
208
|
+
smull2 v8.4s, v1.8h, v20.8h
|
|
209
|
+
zip2 v10.8h, v7.8h, v30.8h
|
|
210
|
+
smlal v18.4s, v31.4h, v24.4h
|
|
211
|
+
mul v12.8h, v4.8h, v2.8h
|
|
212
|
+
ldr q4, [x5, #0x10]
|
|
213
|
+
ldr q20, [x4, #0x10]
|
|
214
|
+
ldr q1, [x4], #0x20
|
|
215
|
+
ldur q30, [x2, #-0x10]
|
|
216
|
+
smlal2 v8.4s, v28.8h, v21.8h
|
|
217
|
+
smlal2 v8.4s, v29.8h, v19.8h
|
|
218
|
+
ldr q19, [x5], #0x20
|
|
219
|
+
smlal2 v8.4s, v31.8h, v24.8h
|
|
220
|
+
ld1 { v15.8h }, [x3], #16
|
|
221
|
+
uzp2 v31.8h, v1.8h, v20.8h
|
|
222
|
+
smlal v26.4s, v12.4h, v0.4h
|
|
223
|
+
smlal2 v23.4s, v12.8h, v0.8h
|
|
224
|
+
uzp1 v21.8h, v14.8h, v30.8h
|
|
225
|
+
uzp1 v29.8h, v1.8h, v20.8h
|
|
226
|
+
uzp1 v1.8h, v11.8h, v22.8h
|
|
227
|
+
smlal2 v8.4s, v13.8h, v17.8h
|
|
228
|
+
ld1 { v9.8h }, [x6], #16
|
|
229
|
+
smlal v18.4s, v13.4h, v17.4h
|
|
230
|
+
uzp1 v24.8h, v19.8h, v4.8h
|
|
231
|
+
uzp2 v7.8h, v26.8h, v23.8h
|
|
232
|
+
smull v26.4s, v1.4h, v21.4h
|
|
233
|
+
smlal v18.4s, v16.4h, v6.4h
|
|
234
|
+
uzp2 v19.8h, v19.8h, v4.8h
|
|
235
|
+
smlal2 v8.4s, v16.8h, v6.8h
|
|
236
|
+
uzp2 v28.8h, v11.8h, v22.8h
|
|
237
|
+
smull2 v23.4s, v1.8h, v21.8h
|
|
238
|
+
uzp1 v13.8h, v25.8h, v3.8h
|
|
239
|
+
smlal2 v23.4s, v28.8h, v15.8h
|
|
240
|
+
ldur q11, [x8, #-0x10]
|
|
241
|
+
smlal2 v23.4s, v29.8h, v24.8h
|
|
242
|
+
ld1 { v4.8h }, [x9], #16
|
|
243
|
+
smlal2 v23.4s, v31.8h, v9.8h
|
|
244
|
+
uzp1 v12.8h, v18.8h, v8.8h
|
|
245
|
+
uzp2 v20.8h, v14.8h, v30.8h
|
|
246
|
+
smlal v26.4s, v28.4h, v15.4h
|
|
247
|
+
str q5, [x0], #0x20
|
|
248
|
+
mul v14.8h, v12.8h, v2.8h
|
|
249
|
+
stur q10, [x0, #-0x10]
|
|
250
|
+
uzp2 v17.8h, v27.8h, v11.8h
|
|
251
|
+
subs x13, x13, #0x1
|
|
252
|
+
cbnz x13, Lpolyvec_basemul_acc_montgomery_cached_k3_loop_start
|
|
253
|
+
uzp2 v3.8h, v25.8h, v3.8h
|
|
254
|
+
smull2 v16.4s, v1.8h, v20.8h
|
|
255
|
+
smull v25.4s, v1.4h, v20.4h
|
|
256
|
+
uzp1 v22.8h, v27.8h, v11.8h
|
|
257
|
+
smlal2 v16.4s, v28.8h, v21.8h
|
|
258
|
+
smlal v25.4s, v28.4h, v21.4h
|
|
259
|
+
smlal2 v16.4s, v29.8h, v19.8h
|
|
260
|
+
smlal v25.4s, v29.4h, v19.4h
|
|
261
|
+
smlal2 v16.4s, v31.8h, v24.8h
|
|
262
|
+
smlal v25.4s, v31.4h, v24.4h
|
|
263
|
+
smlal v25.4s, v13.4h, v17.4h
|
|
264
|
+
smlal2 v16.4s, v13.8h, v17.8h
|
|
265
|
+
smlal2 v16.4s, v3.8h, v22.8h
|
|
266
|
+
smlal v25.4s, v3.4h, v22.4h
|
|
267
|
+
smlal2 v23.4s, v13.8h, v22.8h
|
|
268
|
+
smlal v26.4s, v29.4h, v24.4h
|
|
269
|
+
smlal v26.4s, v31.4h, v9.4h
|
|
270
|
+
smlal v26.4s, v13.4h, v22.4h
|
|
271
|
+
uzp1 v10.8h, v25.8h, v16.8h
|
|
272
|
+
smlal2 v23.4s, v3.8h, v4.8h
|
|
273
|
+
smlal v26.4s, v3.4h, v4.4h
|
|
274
|
+
mul v13.8h, v10.8h, v2.8h
|
|
275
|
+
smlal v18.4s, v14.4h, v0.4h
|
|
276
|
+
smlal2 v8.4s, v14.8h, v0.8h
|
|
277
|
+
uzp1 v3.8h, v26.8h, v23.8h
|
|
278
|
+
mul v24.8h, v3.8h, v2.8h
|
|
279
|
+
uzp2 v17.8h, v18.8h, v8.8h
|
|
280
|
+
smlal v25.4s, v13.4h, v0.4h
|
|
281
|
+
smlal2 v16.4s, v13.8h, v0.8h
|
|
282
|
+
zip1 v21.8h, v7.8h, v17.8h
|
|
283
|
+
zip2 v20.8h, v7.8h, v17.8h
|
|
284
|
+
smlal2 v23.4s, v24.8h, v0.8h
|
|
285
|
+
str q21, [x0], #0x20
|
|
286
|
+
smlal v26.4s, v24.4h, v0.4h
|
|
287
|
+
uzp2 v13.8h, v25.8h, v16.8h
|
|
288
|
+
stur q20, [x0, #-0x10]
|
|
289
|
+
uzp2 v23.8h, v26.8h, v23.8h
|
|
290
|
+
zip1 v18.8h, v23.8h, v13.8h
|
|
291
|
+
zip2 v13.8h, v23.8h, v13.8h
|
|
292
|
+
str q18, [x0], #0x20
|
|
293
|
+
stur q13, [x0, #-0x10]
|
|
294
|
+
ldp d8, d9, [sp]
|
|
295
|
+
.cfi_restore d8
|
|
296
|
+
.cfi_restore d9
|
|
297
|
+
ldp d10, d11, [sp, #0x10]
|
|
298
|
+
.cfi_restore d10
|
|
299
|
+
.cfi_restore d11
|
|
300
|
+
ldp d12, d13, [sp, #0x20]
|
|
301
|
+
.cfi_restore d12
|
|
302
|
+
.cfi_restore d13
|
|
303
|
+
ldp d14, d15, [sp, #0x30]
|
|
304
|
+
.cfi_restore d14
|
|
305
|
+
.cfi_restore d15
|
|
306
|
+
add sp, sp, #0x40
|
|
307
|
+
.cfi_adjust_cfa_offset -0x40
|
|
308
|
+
ret
|
|
309
|
+
.cfi_endproc
|
|
310
|
+
|
|
311
|
+
MLK_ASM_FN_SIZE(polyvec_basemul_acc_montgomery_cached_asm_k3)
|
|
312
|
+
|
|
313
|
+
#endif /* MLK_ARITH_BACKEND_AARCH64 && !MLK_CONFIG_MULTILEVEL_NO_SHARED && \
|
|
314
|
+
(MLK_CONFIG_MULTILEVEL_WITH_SHARED || MLKEM_K == 3) */
|
|
@@ -0,0 +1,368 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) The mlkem-native project authors
|
|
3
|
+
* SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
/* References
|
|
7
|
+
* ==========
|
|
8
|
+
*
|
|
9
|
+
* - [NeonNTT]
|
|
10
|
+
* Neon NTT: Faster Dilithium, Kyber, and Saber on Cortex-A72 and Apple M1
|
|
11
|
+
* Becker, Hwang, Kannwischer, Yang, Yang
|
|
12
|
+
* https://eprint.iacr.org/2021/986
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
/*yaml
|
|
16
|
+
Name: polyvec_basemul_acc_montgomery_cached_asm_k4
|
|
17
|
+
Description: Re-implementation of asymmetric base multiplication following @[NeonNTT] for k=4
|
|
18
|
+
Signature: void mlk_polyvec_basemul_acc_montgomery_cached_asm_k4(int16_t r[256], const int16_t a[1024], const int16_t b[1024], const int16_t b_cache[512])
|
|
19
|
+
ABI:
|
|
20
|
+
x0:
|
|
21
|
+
type: buffer
|
|
22
|
+
size_bytes: 512
|
|
23
|
+
permissions: write-only
|
|
24
|
+
c_parameter: int16_t r[256]
|
|
25
|
+
description: Output polynomial
|
|
26
|
+
x1:
|
|
27
|
+
type: buffer
|
|
28
|
+
size_bytes: 2048
|
|
29
|
+
permissions: read-only
|
|
30
|
+
c_parameter: const int16_t a[1024]
|
|
31
|
+
description: Input polynomial vector a
|
|
32
|
+
x2:
|
|
33
|
+
type: buffer
|
|
34
|
+
size_bytes: 2048
|
|
35
|
+
permissions: read-only
|
|
36
|
+
c_parameter: const int16_t b[1024]
|
|
37
|
+
description: Input polynomial vector b
|
|
38
|
+
x3:
|
|
39
|
+
type: buffer
|
|
40
|
+
size_bytes: 1024
|
|
41
|
+
permissions: read-only
|
|
42
|
+
c_parameter: const int16_t b_cache[512]
|
|
43
|
+
description: Cached values for b
|
|
44
|
+
Stack:
|
|
45
|
+
bytes: 64
|
|
46
|
+
description: saving callee-saved Neon registers
|
|
47
|
+
*/
|
|
48
|
+
|
|
49
|
+
/* Re-implementation of asymmetric base multiplication following @[NeonNTT] */
|
|
50
|
+
|
|
51
|
+
#include "../../../common.h"
|
|
52
|
+
#if defined(MLK_ARITH_BACKEND_AARCH64) && !defined(MLK_CONFIG_MULTILEVEL_NO_SHARED) && (defined(MLK_CONFIG_MULTILEVEL_WITH_SHARED) || MLKEM_K == 4)
|
|
53
|
+
|
|
54
|
+
/*
|
|
55
|
+
* WARNING: This file is auto-derived from the mlkem-native source file
|
|
56
|
+
* dev/aarch64_opt/src/polyvec_basemul_acc_montgomery_cached_asm_k4.S using scripts/simpasm. Do not modify it directly.
|
|
57
|
+
*/
|
|
58
|
+
|
|
59
|
+
#if defined(__ELF__)
|
|
60
|
+
.section .note.GNU-stack,"",@progbits
|
|
61
|
+
#endif
|
|
62
|
+
|
|
63
|
+
.text
|
|
64
|
+
.balign 4
|
|
65
|
+
.global MLK_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k4)
|
|
66
|
+
MLK_ASM_FN_SYMBOL(polyvec_basemul_acc_montgomery_cached_asm_k4)
|
|
67
|
+
|
|
68
|
+
.cfi_startproc
|
|
69
|
+
sub sp, sp, #0x40
|
|
70
|
+
.cfi_adjust_cfa_offset 0x40
|
|
71
|
+
stp d8, d9, [sp]
|
|
72
|
+
.cfi_rel_offset d8, 0x0
|
|
73
|
+
.cfi_rel_offset d9, 0x8
|
|
74
|
+
stp d10, d11, [sp, #0x10]
|
|
75
|
+
.cfi_rel_offset d10, 0x10
|
|
76
|
+
.cfi_rel_offset d11, 0x18
|
|
77
|
+
stp d12, d13, [sp, #0x20]
|
|
78
|
+
.cfi_rel_offset d12, 0x20
|
|
79
|
+
.cfi_rel_offset d13, 0x28
|
|
80
|
+
stp d14, d15, [sp, #0x30]
|
|
81
|
+
.cfi_rel_offset d14, 0x30
|
|
82
|
+
.cfi_rel_offset d15, 0x38
|
|
83
|
+
mov w14, #0xd01 // =3329
|
|
84
|
+
dup v0.8h, w14
|
|
85
|
+
mov w14, #0xcff // =3327
|
|
86
|
+
dup v2.8h, w14
|
|
87
|
+
add x4, x1, #0x200
|
|
88
|
+
add x5, x2, #0x200
|
|
89
|
+
add x6, x3, #0x100
|
|
90
|
+
add x7, x1, #0x400
|
|
91
|
+
add x8, x2, #0x400
|
|
92
|
+
add x9, x3, #0x200
|
|
93
|
+
add x10, x1, #0x600
|
|
94
|
+
add x11, x2, #0x600
|
|
95
|
+
add x12, x3, #0x300
|
|
96
|
+
mov x13, #0x10 // =16
|
|
97
|
+
ldr q28, [x1], #0x20
|
|
98
|
+
ldur q5, [x1, #-0x10]
|
|
99
|
+
ldr q31, [x2], #0x20
|
|
100
|
+
ldur q27, [x2, #-0x10]
|
|
101
|
+
ldr q7, [x5], #0x20
|
|
102
|
+
ldr q10, [x4], #0x20
|
|
103
|
+
ldur q18, [x5, #-0x10]
|
|
104
|
+
ldur q9, [x4, #-0x10]
|
|
105
|
+
uzp1 v11.8h, v28.8h, v5.8h
|
|
106
|
+
uzp2 v19.8h, v28.8h, v5.8h
|
|
107
|
+
uzp2 v4.8h, v31.8h, v27.8h
|
|
108
|
+
uzp1 v1.8h, v31.8h, v27.8h
|
|
109
|
+
ldr q29, [x7], #0x20
|
|
110
|
+
ldr q28, [x8, #0x10]
|
|
111
|
+
uzp1 v24.8h, v10.8h, v9.8h
|
|
112
|
+
uzp1 v17.8h, v7.8h, v18.8h
|
|
113
|
+
uzp2 v7.8h, v7.8h, v18.8h
|
|
114
|
+
ldr q21, [x8], #0x20
|
|
115
|
+
uzp2 v27.8h, v10.8h, v9.8h
|
|
116
|
+
ldur q6, [x7, #-0x10]
|
|
117
|
+
smull v18.4s, v11.4h, v4.4h
|
|
118
|
+
ld1 { v9.8h }, [x3], #16
|
|
119
|
+
smull2 v8.4s, v11.8h, v4.8h
|
|
120
|
+
ldr q16, [x11], #0x20
|
|
121
|
+
smlal2 v8.4s, v19.8h, v1.8h
|
|
122
|
+
ldur q14, [x11, #-0x10]
|
|
123
|
+
smlal v18.4s, v19.4h, v1.4h
|
|
124
|
+
uzp1 v10.8h, v21.8h, v28.8h
|
|
125
|
+
smlal v18.4s, v24.4h, v7.4h
|
|
126
|
+
ldr q4, [x10], #0x20
|
|
127
|
+
smlal2 v8.4s, v24.8h, v7.8h
|
|
128
|
+
ld1 { v12.8h }, [x6], #16
|
|
129
|
+
smull2 v23.4s, v11.8h, v1.8h
|
|
130
|
+
uzp2 v13.8h, v29.8h, v6.8h
|
|
131
|
+
smull v26.4s, v11.4h, v1.4h
|
|
132
|
+
uzp1 v29.8h, v29.8h, v6.8h
|
|
133
|
+
smlal v26.4s, v19.4h, v9.4h
|
|
134
|
+
ldur q15, [x10, #-0x10]
|
|
135
|
+
smlal2 v23.4s, v19.8h, v9.8h
|
|
136
|
+
uzp2 v9.8h, v21.8h, v28.8h
|
|
137
|
+
smlal v18.4s, v27.4h, v17.4h
|
|
138
|
+
uzp2 v6.8h, v16.8h, v14.8h
|
|
139
|
+
uzp1 v21.8h, v16.8h, v14.8h
|
|
140
|
+
smlal2 v8.4s, v27.8h, v17.8h
|
|
141
|
+
smlal2 v8.4s, v29.8h, v9.8h
|
|
142
|
+
uzp1 v30.8h, v4.8h, v15.8h
|
|
143
|
+
uzp2 v16.8h, v4.8h, v15.8h
|
|
144
|
+
smlal v18.4s, v29.4h, v9.4h
|
|
145
|
+
smlal2 v8.4s, v13.8h, v10.8h
|
|
146
|
+
ld1 { v15.8h }, [x9], #16
|
|
147
|
+
smlal v18.4s, v13.4h, v10.4h
|
|
148
|
+
ldr q11, [x4], #0x20
|
|
149
|
+
smlal v18.4s, v30.4h, v6.4h
|
|
150
|
+
ldr q7, [x2], #0x20
|
|
151
|
+
smlal2 v8.4s, v30.8h, v6.8h
|
|
152
|
+
ld1 { v9.8h }, [x12], #16
|
|
153
|
+
smlal2 v23.4s, v24.8h, v17.8h
|
|
154
|
+
ldur q4, [x2, #-0x10]
|
|
155
|
+
smlal v26.4s, v24.4h, v17.4h
|
|
156
|
+
ldur q25, [x4, #-0x10]
|
|
157
|
+
smlal2 v8.4s, v16.8h, v21.8h
|
|
158
|
+
ldr q5, [x5], #0x20
|
|
159
|
+
smlal v18.4s, v16.4h, v21.4h
|
|
160
|
+
ldur q22, [x5, #-0x10]
|
|
161
|
+
smlal v26.4s, v27.4h, v12.4h
|
|
162
|
+
ldr q19, [x1, #0x10]
|
|
163
|
+
smlal v26.4s, v29.4h, v10.4h
|
|
164
|
+
ld1 { v20.8h }, [x3], #16
|
|
165
|
+
smlal v26.4s, v13.4h, v15.4h
|
|
166
|
+
uzp1 v24.8h, v7.8h, v4.8h
|
|
167
|
+
smlal2 v23.4s, v27.8h, v12.8h
|
|
168
|
+
uzp1 v28.8h, v18.8h, v8.8h
|
|
169
|
+
smlal v26.4s, v30.4h, v21.4h
|
|
170
|
+
uzp2 v27.8h, v11.8h, v25.8h
|
|
171
|
+
smlal2 v23.4s, v29.8h, v10.8h
|
|
172
|
+
uzp2 v31.8h, v7.8h, v4.8h
|
|
173
|
+
smlal2 v23.4s, v13.8h, v15.8h
|
|
174
|
+
uzp1 v14.8h, v5.8h, v22.8h
|
|
175
|
+
uzp1 v17.8h, v11.8h, v25.8h
|
|
176
|
+
smlal v26.4s, v16.4h, v9.4h
|
|
177
|
+
mul v29.8h, v28.8h, v2.8h
|
|
178
|
+
sub x13, x13, #0x2
|
|
179
|
+
|
|
180
|
+
Lpolyvec_basemul_acc_montgomery_cached_k4_loop_start:
|
|
181
|
+
smlal2 v23.4s, v30.8h, v21.8h
|
|
182
|
+
ldr q11, [x1], #0x20
|
|
183
|
+
uzp2 v15.8h, v5.8h, v22.8h
|
|
184
|
+
smlal v18.4s, v29.4h, v0.4h
|
|
185
|
+
ldr q12, [x7], #0x20
|
|
186
|
+
smlal2 v8.4s, v29.8h, v0.8h
|
|
187
|
+
ldur q3, [x7, #-0x10]
|
|
188
|
+
ldr q21, [x8], #0x20
|
|
189
|
+
uzp1 v29.8h, v11.8h, v19.8h
|
|
190
|
+
ldur q13, [x8, #-0x10]
|
|
191
|
+
uzp2 v5.8h, v11.8h, v19.8h
|
|
192
|
+
smlal2 v23.4s, v16.8h, v9.8h
|
|
193
|
+
uzp2 v28.8h, v18.8h, v8.8h
|
|
194
|
+
smull2 v8.4s, v29.8h, v31.8h
|
|
195
|
+
smlal2 v8.4s, v5.8h, v24.8h
|
|
196
|
+
uzp1 v7.8h, v12.8h, v3.8h
|
|
197
|
+
smlal2 v8.4s, v17.8h, v15.8h
|
|
198
|
+
uzp2 v11.8h, v21.8h, v13.8h
|
|
199
|
+
uzp1 v4.8h, v26.8h, v23.8h
|
|
200
|
+
smlal2 v8.4s, v27.8h, v14.8h
|
|
201
|
+
smlal2 v8.4s, v7.8h, v11.8h
|
|
202
|
+
mul v6.8h, v4.8h, v2.8h
|
|
203
|
+
ldr q19, [x11], #0x20
|
|
204
|
+
uzp2 v25.8h, v12.8h, v3.8h
|
|
205
|
+
ldr q12, [x10], #0x20
|
|
206
|
+
smull v18.4s, v29.4h, v31.4h
|
|
207
|
+
ldur q3, [x10, #-0x10]
|
|
208
|
+
smlal v18.4s, v5.4h, v24.4h
|
|
209
|
+
uzp1 v4.8h, v21.8h, v13.8h
|
|
210
|
+
smlal v18.4s, v17.4h, v15.4h
|
|
211
|
+
ldur q13, [x11, #-0x10]
|
|
212
|
+
ld1 { v1.8h }, [x6], #16
|
|
213
|
+
smlal v26.4s, v6.4h, v0.4h
|
|
214
|
+
smlal2 v23.4s, v6.8h, v0.8h
|
|
215
|
+
ld1 { v10.8h }, [x9], #16
|
|
216
|
+
smlal v18.4s, v27.4h, v14.4h
|
|
217
|
+
uzp1 v30.8h, v12.8h, v3.8h
|
|
218
|
+
smlal2 v8.4s, v25.8h, v4.8h
|
|
219
|
+
uzp2 v31.8h, v19.8h, v13.8h
|
|
220
|
+
smlal v18.4s, v7.4h, v11.4h
|
|
221
|
+
ld1 { v9.8h }, [x12], #16
|
|
222
|
+
smlal v18.4s, v25.4h, v4.4h
|
|
223
|
+
uzp1 v21.8h, v19.8h, v13.8h
|
|
224
|
+
uzp2 v16.8h, v12.8h, v3.8h
|
|
225
|
+
smlal v18.4s, v30.4h, v31.4h
|
|
226
|
+
smlal2 v8.4s, v30.8h, v31.8h
|
|
227
|
+
uzp2 v31.8h, v26.8h, v23.8h
|
|
228
|
+
smlal2 v8.4s, v16.8h, v21.8h
|
|
229
|
+
smlal v18.4s, v16.4h, v21.4h
|
|
230
|
+
zip1 v15.8h, v31.8h, v28.8h
|
|
231
|
+
ldr q19, [x1, #0x10]
|
|
232
|
+
smull2 v23.4s, v29.8h, v24.8h
|
|
233
|
+
smull v26.4s, v29.4h, v24.4h
|
|
234
|
+
ldr q3, [x2, #0x10]
|
|
235
|
+
smlal v26.4s, v5.4h, v20.4h
|
|
236
|
+
ldr q11, [x2], #0x20
|
|
237
|
+
uzp1 v6.8h, v18.8h, v8.8h
|
|
238
|
+
smlal v26.4s, v17.4h, v14.4h
|
|
239
|
+
smlal v26.4s, v27.4h, v1.4h
|
|
240
|
+
zip2 v13.8h, v31.8h, v28.8h
|
|
241
|
+
smlal v26.4s, v7.4h, v4.4h
|
|
242
|
+
str q15, [x0], #0x20
|
|
243
|
+
smlal v26.4s, v25.4h, v10.4h
|
|
244
|
+
stur q13, [x0, #-0x10]
|
|
245
|
+
mul v29.8h, v6.8h, v2.8h
|
|
246
|
+
uzp1 v24.8h, v11.8h, v3.8h
|
|
247
|
+
uzp2 v31.8h, v11.8h, v3.8h
|
|
248
|
+
ldr q11, [x4], #0x20
|
|
249
|
+
smlal2 v23.4s, v5.8h, v20.8h
|
|
250
|
+
ldur q28, [x4, #-0x10]
|
|
251
|
+
smlal2 v23.4s, v17.8h, v14.8h
|
|
252
|
+
ldr q5, [x5], #0x20
|
|
253
|
+
smlal2 v23.4s, v27.8h, v1.8h
|
|
254
|
+
ldur q22, [x5, #-0x10]
|
|
255
|
+
smlal v26.4s, v30.4h, v21.4h
|
|
256
|
+
ld1 { v20.8h }, [x3], #16
|
|
257
|
+
smlal v26.4s, v16.4h, v9.4h
|
|
258
|
+
uzp1 v17.8h, v11.8h, v28.8h
|
|
259
|
+
smlal2 v23.4s, v7.8h, v4.8h
|
|
260
|
+
uzp2 v27.8h, v11.8h, v28.8h
|
|
261
|
+
smlal2 v23.4s, v25.8h, v10.8h
|
|
262
|
+
uzp1 v14.8h, v5.8h, v22.8h
|
|
263
|
+
subs x13, x13, #0x1
|
|
264
|
+
cbnz x13, Lpolyvec_basemul_acc_montgomery_cached_k4_loop_start
|
|
265
|
+
smlal v18.4s, v29.4h, v0.4h
|
|
266
|
+
ldr q11, [x1], #0x20
|
|
267
|
+
uzp2 v28.8h, v5.8h, v22.8h
|
|
268
|
+
smlal2 v23.4s, v30.8h, v21.8h
|
|
269
|
+
smlal2 v8.4s, v29.8h, v0.8h
|
|
270
|
+
ldr q15, [x8, #0x10]
|
|
271
|
+
smlal2 v23.4s, v16.8h, v9.8h
|
|
272
|
+
ldr q21, [x8], #0x20
|
|
273
|
+
uzp1 v22.8h, v11.8h, v19.8h
|
|
274
|
+
uzp2 v12.8h, v11.8h, v19.8h
|
|
275
|
+
ldr q1, [x7, #0x10]
|
|
276
|
+
ld1 { v6.8h }, [x6], #16
|
|
277
|
+
uzp2 v3.8h, v18.8h, v8.8h
|
|
278
|
+
smull v9.4s, v22.4h, v31.4h
|
|
279
|
+
smull2 v18.4s, v22.8h, v31.8h
|
|
280
|
+
ldr q16, [x7], #0x20
|
|
281
|
+
smull v19.4s, v22.4h, v24.4h
|
|
282
|
+
uzp1 v30.8h, v21.8h, v15.8h
|
|
283
|
+
uzp2 v25.8h, v21.8h, v15.8h
|
|
284
|
+
smull2 v8.4s, v22.8h, v24.8h
|
|
285
|
+
smlal v19.4s, v12.4h, v20.4h
|
|
286
|
+
ldr q13, [x10, #0x10]
|
|
287
|
+
smlal2 v8.4s, v12.8h, v20.8h
|
|
288
|
+
uzp1 v29.8h, v16.8h, v1.8h
|
|
289
|
+
smlal2 v18.4s, v12.8h, v24.8h
|
|
290
|
+
ldr q5, [x10], #0x20
|
|
291
|
+
smlal v9.4s, v12.4h, v24.4h
|
|
292
|
+
ldr q4, [x11], #0x20
|
|
293
|
+
smlal v9.4s, v17.4h, v28.4h
|
|
294
|
+
ldur q22, [x11, #-0x10]
|
|
295
|
+
smlal2 v18.4s, v17.8h, v28.8h
|
|
296
|
+
uzp2 v16.8h, v16.8h, v1.8h
|
|
297
|
+
smlal v19.4s, v17.4h, v14.4h
|
|
298
|
+
ld1 { v28.8h }, [x9], #16
|
|
299
|
+
smlal2 v8.4s, v17.8h, v14.8h
|
|
300
|
+
uzp1 v7.8h, v5.8h, v13.8h
|
|
301
|
+
smlal v9.4s, v27.4h, v14.4h
|
|
302
|
+
uzp1 v17.8h, v4.8h, v22.8h
|
|
303
|
+
smlal2 v18.4s, v27.8h, v14.8h
|
|
304
|
+
uzp2 v12.8h, v5.8h, v13.8h
|
|
305
|
+
uzp2 v21.8h, v4.8h, v22.8h
|
|
306
|
+
smlal v19.4s, v27.4h, v6.4h
|
|
307
|
+
smlal2 v8.4s, v27.8h, v6.8h
|
|
308
|
+
ld1 { v15.8h }, [x12], #16
|
|
309
|
+
smlal v19.4s, v29.4h, v30.4h
|
|
310
|
+
uzp1 v20.8h, v26.8h, v23.8h
|
|
311
|
+
smlal v9.4s, v29.4h, v25.4h
|
|
312
|
+
smlal2 v18.4s, v29.8h, v25.8h
|
|
313
|
+
smlal2 v8.4s, v29.8h, v30.8h
|
|
314
|
+
smlal v19.4s, v16.4h, v28.4h
|
|
315
|
+
smlal2 v8.4s, v16.8h, v28.8h
|
|
316
|
+
smlal2 v18.4s, v16.8h, v30.8h
|
|
317
|
+
smlal v9.4s, v16.4h, v30.4h
|
|
318
|
+
smlal v9.4s, v7.4h, v21.4h
|
|
319
|
+
smlal2 v18.4s, v7.8h, v21.8h
|
|
320
|
+
smlal2 v8.4s, v7.8h, v17.8h
|
|
321
|
+
smlal v19.4s, v7.4h, v17.4h
|
|
322
|
+
smlal v19.4s, v12.4h, v15.4h
|
|
323
|
+
smlal2 v8.4s, v12.8h, v15.8h
|
|
324
|
+
smlal2 v18.4s, v12.8h, v17.8h
|
|
325
|
+
smlal v9.4s, v12.4h, v17.4h
|
|
326
|
+
mul v6.8h, v20.8h, v2.8h
|
|
327
|
+
uzp1 v4.8h, v19.8h, v8.8h
|
|
328
|
+
mul v17.8h, v4.8h, v2.8h
|
|
329
|
+
uzp1 v12.8h, v9.8h, v18.8h
|
|
330
|
+
smlal v26.4s, v6.4h, v0.4h
|
|
331
|
+
mul v21.8h, v12.8h, v2.8h
|
|
332
|
+
smlal2 v23.4s, v6.8h, v0.8h
|
|
333
|
+
smlal2 v8.4s, v17.8h, v0.8h
|
|
334
|
+
smlal v19.4s, v17.4h, v0.4h
|
|
335
|
+
smlal2 v18.4s, v21.8h, v0.8h
|
|
336
|
+
uzp2 v23.8h, v26.8h, v23.8h
|
|
337
|
+
smlal v9.4s, v21.4h, v0.4h
|
|
338
|
+
zip2 v12.8h, v23.8h, v3.8h
|
|
339
|
+
zip1 v22.8h, v23.8h, v3.8h
|
|
340
|
+
uzp2 v14.8h, v19.8h, v8.8h
|
|
341
|
+
uzp2 v18.8h, v9.8h, v18.8h
|
|
342
|
+
str q12, [x0, #0x10]
|
|
343
|
+
str q22, [x0], #0x20
|
|
344
|
+
zip2 v24.8h, v14.8h, v18.8h
|
|
345
|
+
zip1 v21.8h, v14.8h, v18.8h
|
|
346
|
+
str q24, [x0, #0x10]
|
|
347
|
+
str q21, [x0], #0x20
|
|
348
|
+
ldp d8, d9, [sp]
|
|
349
|
+
.cfi_restore d8
|
|
350
|
+
.cfi_restore d9
|
|
351
|
+
ldp d10, d11, [sp, #0x10]
|
|
352
|
+
.cfi_restore d10
|
|
353
|
+
.cfi_restore d11
|
|
354
|
+
ldp d12, d13, [sp, #0x20]
|
|
355
|
+
.cfi_restore d12
|
|
356
|
+
.cfi_restore d13
|
|
357
|
+
ldp d14, d15, [sp, #0x30]
|
|
358
|
+
.cfi_restore d14
|
|
359
|
+
.cfi_restore d15
|
|
360
|
+
add sp, sp, #0x40
|
|
361
|
+
.cfi_adjust_cfa_offset -0x40
|
|
362
|
+
ret
|
|
363
|
+
.cfi_endproc
|
|
364
|
+
|
|
365
|
+
MLK_ASM_FN_SIZE(polyvec_basemul_acc_montgomery_cached_asm_k4)
|
|
366
|
+
|
|
367
|
+
#endif /* MLK_ARITH_BACKEND_AARCH64 && !MLK_CONFIG_MULTILEVEL_NO_SHARED && \
|
|
368
|
+
(MLK_CONFIG_MULTILEVEL_WITH_SHARED || MLKEM_K == 4) */
|