pq_crypto 0.3.2 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +56 -0
- data/CHANGELOG.md +62 -0
- data/GET_STARTED.md +366 -40
- data/README.md +76 -233
- data/SECURITY.md +107 -82
- data/ext/pqcrypto/extconf.rb +169 -87
- data/ext/pqcrypto/mldsa_api.h +1 -48
- data/ext/pqcrypto/mlkem_api.h +1 -18
- data/ext/pqcrypto/pq_externalmu.c +89 -204
- data/ext/pqcrypto/pqcrypto_native_api.h +129 -0
- data/ext/pqcrypto/pqcrypto_ruby_secure.c +484 -84
- data/ext/pqcrypto/pqcrypto_secure.c +203 -78
- data/ext/pqcrypto/pqcrypto_secure.h +53 -14
- data/ext/pqcrypto/pqcrypto_version.h +7 -0
- data/ext/pqcrypto/randombytes.h +9 -0
- data/ext/pqcrypto/vendor/.vendored +10 -5
- data/ext/pqcrypto/vendor/mldsa-native/BUILDING.md +105 -0
- data/ext/pqcrypto/vendor/mldsa-native/LICENSE +286 -0
- data/ext/pqcrypto/vendor/mldsa-native/META.yml +24 -0
- data/ext/pqcrypto/vendor/mldsa-native/README.md +221 -0
- data/ext/pqcrypto/vendor/mldsa-native/SECURITY.md +8 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.c +721 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.h +975 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_asm.S +724 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_config.h +723 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/cbmc.h +166 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/common.h +321 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/ct.c +21 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/ct.h +385 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/debug.c +73 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/debug.h +130 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.c +277 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.h +244 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.c +182 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.h +117 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.c +438 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.h +105 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/auto.h +71 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/fips202_native_aarch64.h +62 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_asm.S +376 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_asm.S +204 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_asm.S +259 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_asm.S +1077 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S +987 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccakf1600_round_constants.c +41 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_scalar.h +26 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_v84a.h +35 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x2_v84a.h +37 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_scalar.h +27 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_v84a_scalar.h +36 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/api.h +69 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/README.md +10 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/mve.h +32 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/fips202_native_armv81m.h +20 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S +638 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.c +136 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccakf1600_round_constants.c +52 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/auto.h +29 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.c +488 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.h +16 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/xkcp.h +31 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/meta.h +247 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/aarch64_zetas.c +231 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/arith_native_aarch64.h +150 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/intt.S +753 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l4.S +129 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l5.S +145 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l7.S +177 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/ntt.S +653 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/pointwise_montgomery.S +79 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_caddq_asm.S +53 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_chknorm_asm.S +55 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_32_asm.S +85 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_88_asm.S +85 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_32_asm.S +102 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_88_asm.S +110 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_17_asm.S +72 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_19_asm.S +69 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_table.c +40 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_asm.S +189 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta2_asm.S +135 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta4_asm.S +128 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta_table.c +543 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_table.c +62 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/api.h +649 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/meta.h +23 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/meta.h +315 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/arith_native_x86_64.h +124 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.c +157 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.h +27 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/intt.S +2311 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/ntt.S +2383 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/nttunpack.S +239 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise.S +131 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l4.S +139 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l5.S +155 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l7.S +187 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_caddq_avx2.c +61 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_chknorm_avx2.c +52 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_32_avx2.c +155 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_88_avx2.c +155 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_32_avx2.c +102 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_88_avx2.c +104 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_17_avx2.c +91 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_19_avx2.c +93 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_avx2.c +126 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta2_avx2.c +155 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta4_avx2.c +139 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_table.c +160 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.c +293 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.h +224 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/params.h +77 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.c +991 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.h +393 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.c +946 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.h +360 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.c +877 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.h +725 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/randombytes.h +26 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/reduce.h +139 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/rounding.h +249 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.c +1511 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.h +806 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/symmetric.h +68 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sys.h +268 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/zetas.inc +55 -0
- data/ext/pqcrypto/vendor/mlkem-native/BUILDING.md +104 -0
- data/ext/pqcrypto/vendor/mlkem-native/LICENSE +294 -0
- data/ext/pqcrypto/vendor/mlkem-native/META.yml +30 -0
- data/ext/pqcrypto/vendor/mlkem-native/README.md +223 -0
- data/ext/pqcrypto/vendor/mlkem-native/RELEASE.md +86 -0
- data/ext/pqcrypto/vendor/mlkem-native/SECURITY.md +8 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/README.md +23 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native.c +660 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native.h +538 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native_asm.S +681 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native_config.h +709 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/cbmc.h +174 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/common.h +274 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/compress.c +717 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/compress.h +688 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/debug.c +64 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/debug.h +128 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202.c +251 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202.h +158 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202x4.c +208 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202x4.h +80 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/keccakf1600.c +463 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/keccakf1600.h +98 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/auto.h +70 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/fips202_native_aarch64.h +69 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_asm.S +375 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_asm.S +203 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_asm.S +258 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_asm.S +1076 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S +986 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccakf1600_round_constants.c +46 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x1_scalar.h +25 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x1_v84a.h +34 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x2_v84a.h +35 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x4_v8a_scalar.h +26 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x4_v8a_v84a_scalar.h +35 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/api.h +117 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/README.md +10 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/mve.h +79 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/fips202_native_armv81m.h +35 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S +667 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.c +40 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/keccakf1600_round_constants.c +51 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/state_extract_bytes_x4_mve.S +290 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/state_xor_bytes_x4_mve.S +314 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/auto.h +28 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/keccak_f1600_x4_avx2.h +33 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/src/fips202_native_x86_64.h +41 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/src/keccak_f1600_x4_avx2.S +451 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/src/keccakf1600_constants.c +51 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/indcpa.c +622 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/indcpa.h +156 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/kem.c +446 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/kem.h +326 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/README.md +16 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/meta.h +122 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/aarch64_zetas.c +174 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/arith_native_aarch64.h +177 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/intt.S +628 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/ntt.S +562 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_mulcache_compute_asm.S +127 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_reduce_asm.S +150 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_tobytes_asm.S +117 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_tomont_asm.S +98 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k2.S +261 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k3.S +314 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k4.S +368 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/rej_uniform_asm.S +226 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/rej_uniform_table.c +542 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/api.h +637 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/meta.h +25 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/README.md +11 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/meta.h +128 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/arith_native_riscv64.h +45 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_debug.c +81 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_debug.h +145 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_izetas.inc +27 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_poly.c +805 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_zetas.inc +27 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_zetas_basemul.inc +39 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/README.md +4 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/meta.h +304 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/arith_native_x86_64.h +309 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/compress_consts.c +94 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/compress_consts.h +45 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/consts.c +102 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/consts.h +25 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/intt.S +719 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/mulcache_compute.S +90 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/ntt.S +639 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/nttfrombytes.S +193 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/ntttobytes.S +181 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/nttunpack.S +174 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d10.S +382 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d11.S +448 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d4.S +163 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d5.S +220 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d10.S +228 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d11.S +277 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d4.S +180 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d5.S +192 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/polyvec_basemul_acc_montgomery_cached_asm_k2.S +502 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/polyvec_basemul_acc_montgomery_cached_asm_k3.S +750 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/polyvec_basemul_acc_montgomery_cached_asm_k4.S +998 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/reduce.S +218 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/rej_uniform_asm.S +103 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/rej_uniform_table.c +544 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/tomont.S +155 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/params.h +76 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly.c +572 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly.h +317 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly_k.c +502 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly_k.h +668 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/randombytes.h +60 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/sampling.c +362 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/sampling.h +118 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/symmetric.h +70 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/sys.h +260 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/verify.c +20 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/verify.h +464 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/zetas.inc +30 -0
- data/lib/pq_crypto/algorithm_registry.rb +200 -0
- data/lib/pq_crypto/hybrid_kem.rb +1 -12
- data/lib/pq_crypto/kem.rb +104 -13
- data/lib/pq_crypto/pkcs8.rb +387 -0
- data/lib/pq_crypto/serialization.rb +1 -14
- data/lib/pq_crypto/signature.rb +123 -17
- data/lib/pq_crypto/spki.rb +131 -0
- data/lib/pq_crypto/version.rb +1 -1
- data/lib/pq_crypto.rb +79 -20
- data/script/vendor_libs.rb +88 -155
- metadata +241 -73
- data/ext/pqcrypto/vendor/pqclean/common/aes.c +0 -639
- data/ext/pqcrypto/vendor/pqclean/common/aes.h +0 -64
- data/ext/pqcrypto/vendor/pqclean/common/compat.h +0 -73
- data/ext/pqcrypto/vendor/pqclean/common/crypto_declassify.h +0 -7
- data/ext/pqcrypto/vendor/pqclean/common/fips202.c +0 -928
- data/ext/pqcrypto/vendor/pqclean/common/fips202.h +0 -166
- data/ext/pqcrypto/vendor/pqclean/common/keccak2x/feat.S +0 -168
- data/ext/pqcrypto/vendor/pqclean/common/keccak2x/fips202x2.c +0 -684
- data/ext/pqcrypto/vendor/pqclean/common/keccak2x/fips202x2.h +0 -60
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/KeccakP-1600-times4-SIMD256.c +0 -1028
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/KeccakP-1600-times4-SnP.h +0 -50
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/KeccakP-1600-unrolling.macros +0 -198
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/Makefile +0 -8
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/Makefile.Microsoft_nmake +0 -8
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/SIMD256-config.h +0 -3
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/align.h +0 -34
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/brg_endian.h +0 -142
- data/ext/pqcrypto/vendor/pqclean/common/nistseedexpander.c +0 -101
- data/ext/pqcrypto/vendor/pqclean/common/nistseedexpander.h +0 -39
- data/ext/pqcrypto/vendor/pqclean/common/randombytes.c +0 -355
- data/ext/pqcrypto/vendor/pqclean/common/randombytes.h +0 -27
- data/ext/pqcrypto/vendor/pqclean/common/sha2.c +0 -769
- data/ext/pqcrypto/vendor/pqclean/common/sha2.h +0 -173
- data/ext/pqcrypto/vendor/pqclean/common/sp800-185.c +0 -156
- data/ext/pqcrypto/vendor/pqclean/common/sp800-185.h +0 -27
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/LICENSE +0 -5
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/Makefile +0 -19
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/Makefile.Microsoft_nmake +0 -23
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/api.h +0 -18
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/cbd.c +0 -83
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/cbd.h +0 -11
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/indcpa.c +0 -327
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/indcpa.h +0 -22
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/kem.c +0 -164
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/kem.h +0 -23
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/ntt.c +0 -146
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/ntt.h +0 -14
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/params.h +0 -36
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/poly.c +0 -299
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/poly.h +0 -37
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/polyvec.c +0 -188
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/polyvec.h +0 -26
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/reduce.c +0 -41
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/reduce.h +0 -13
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/symmetric-shake.c +0 -71
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/symmetric.h +0 -30
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/verify.c +0 -67
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/verify.h +0 -13
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/LICENSE +0 -5
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/Makefile +0 -19
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/Makefile.Microsoft_nmake +0 -23
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/api.h +0 -50
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/ntt.c +0 -98
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/ntt.h +0 -10
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/packing.c +0 -261
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/packing.h +0 -31
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/params.h +0 -44
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/poly.c +0 -799
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/poly.h +0 -52
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/polyvec.c +0 -415
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/polyvec.h +0 -65
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/reduce.c +0 -69
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/reduce.h +0 -17
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/rounding.c +0 -92
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/rounding.h +0 -14
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/sign.c +0 -407
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/sign.h +0 -47
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/symmetric-shake.c +0 -26
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/symmetric.h +0 -34
|
@@ -0,0 +1,562 @@
|
|
|
1
|
+
/* Copyright (c) 2022 Arm Limited
|
|
2
|
+
* Copyright (c) 2022 Hanno Becker
|
|
3
|
+
* Copyright (c) 2023 Amin Abdulrahman, Matthias Kannwischer
|
|
4
|
+
* Copyright (c) The mlkem-native project authors
|
|
5
|
+
* SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
/* References
|
|
9
|
+
* ==========
|
|
10
|
+
*
|
|
11
|
+
* - [NeonNTT]
|
|
12
|
+
* Neon NTT: Faster Dilithium, Kyber, and Saber on Cortex-A72 and Apple M1
|
|
13
|
+
* Becker, Hwang, Kannwischer, Yang, Yang
|
|
14
|
+
* https://eprint.iacr.org/2021/986
|
|
15
|
+
*
|
|
16
|
+
* - [SLOTHY_Paper]
|
|
17
|
+
* Fast and Clean: Auditable high-performance assembly via constraint solving
|
|
18
|
+
* Abdulrahman, Becker, Kannwischer, Klein
|
|
19
|
+
* https://eprint.iacr.org/2022/1303
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
/*yaml
|
|
23
|
+
Name: ntt_asm
|
|
24
|
+
Description: AArch64 ML-KEM forward NTT following @[NeonNTT] and @[SLOTHY_Paper]
|
|
25
|
+
Signature: void mlk_ntt_asm(int16_t p[256], const int16_t twiddles12345[80], const int16_t twiddles56[384])
|
|
26
|
+
ABI:
|
|
27
|
+
x0:
|
|
28
|
+
type: buffer
|
|
29
|
+
size_bytes: 512
|
|
30
|
+
permissions: read/write
|
|
31
|
+
c_parameter: int16_t p[256]
|
|
32
|
+
description: Input/output polynomial
|
|
33
|
+
x1:
|
|
34
|
+
type: buffer
|
|
35
|
+
size_bytes: 160
|
|
36
|
+
permissions: read-only
|
|
37
|
+
c_parameter: const int16_t twiddles12345[80]
|
|
38
|
+
description: Twiddle factors for layers 1-5
|
|
39
|
+
x2:
|
|
40
|
+
type: buffer
|
|
41
|
+
size_bytes: 768
|
|
42
|
+
permissions: read-only
|
|
43
|
+
c_parameter: const int16_t twiddles56[384]
|
|
44
|
+
description: Twiddle factors for layers 6-7
|
|
45
|
+
Stack:
|
|
46
|
+
bytes: 64
|
|
47
|
+
description: saving callee-saved Neon registers
|
|
48
|
+
*/
|
|
49
|
+
|
|
50
|
+
#include "../../../common.h"
|
|
51
|
+
#if defined(MLK_ARITH_BACKEND_AARCH64) && !defined(MLK_CONFIG_MULTILEVEL_NO_SHARED)
|
|
52
|
+
|
|
53
|
+
/*
|
|
54
|
+
* WARNING: This file is auto-derived from the mlkem-native source file
|
|
55
|
+
* dev/aarch64_opt/src/ntt.S using scripts/simpasm. Do not modify it directly.
|
|
56
|
+
*/
|
|
57
|
+
|
|
58
|
+
#if defined(__ELF__)
|
|
59
|
+
.section .note.GNU-stack,"",@progbits
|
|
60
|
+
#endif
|
|
61
|
+
|
|
62
|
+
.text
|
|
63
|
+
.balign 4
|
|
64
|
+
.global MLK_ASM_NAMESPACE(ntt_asm)
|
|
65
|
+
MLK_ASM_FN_SYMBOL(ntt_asm)
|
|
66
|
+
|
|
67
|
+
.cfi_startproc
|
|
68
|
+
sub sp, sp, #0x40
|
|
69
|
+
.cfi_adjust_cfa_offset 0x40
|
|
70
|
+
stp d8, d9, [sp]
|
|
71
|
+
.cfi_rel_offset d8, 0x0
|
|
72
|
+
.cfi_rel_offset d9, 0x8
|
|
73
|
+
stp d10, d11, [sp, #0x10]
|
|
74
|
+
.cfi_rel_offset d10, 0x10
|
|
75
|
+
.cfi_rel_offset d11, 0x18
|
|
76
|
+
stp d12, d13, [sp, #0x20]
|
|
77
|
+
.cfi_rel_offset d12, 0x20
|
|
78
|
+
.cfi_rel_offset d13, 0x28
|
|
79
|
+
stp d14, d15, [sp, #0x30]
|
|
80
|
+
.cfi_rel_offset d14, 0x30
|
|
81
|
+
.cfi_rel_offset d15, 0x38
|
|
82
|
+
mov w5, #0xd01 // =3329
|
|
83
|
+
mov v7.h[0], w5
|
|
84
|
+
mov w5, #0x4ebf // =20159
|
|
85
|
+
mov v7.h[1], w5
|
|
86
|
+
mov x3, x0
|
|
87
|
+
mov x4, #0x4 // =4
|
|
88
|
+
ldr q0, [x1], #0x20
|
|
89
|
+
ldur q1, [x1, #-0x10]
|
|
90
|
+
ldr q21, [x0, #0x40]
|
|
91
|
+
ldr q5, [x0, #0x1c0]
|
|
92
|
+
ldr q30, [x0, #0x110]
|
|
93
|
+
ldr q24, [x0, #0x140]
|
|
94
|
+
ldr q12, [x0, #0x80]
|
|
95
|
+
sqrdmulh v9.8h, v5.8h, v0.h[1]
|
|
96
|
+
mul v23.8h, v5.8h, v0.h[0]
|
|
97
|
+
sqrdmulh v17.8h, v24.8h, v0.h[1]
|
|
98
|
+
ldr q13, [x0, #0xc0]
|
|
99
|
+
mls v23.8h, v9.8h, v7.h[0]
|
|
100
|
+
mul v8.8h, v24.8h, v0.h[0]
|
|
101
|
+
mls v8.8h, v17.8h, v7.h[0]
|
|
102
|
+
add v9.8h, v13.8h, v23.8h
|
|
103
|
+
sub v10.8h, v13.8h, v23.8h
|
|
104
|
+
mul v11.8h, v30.8h, v0.h[0]
|
|
105
|
+
ldr q13, [x0, #0x180]
|
|
106
|
+
sqrdmulh v28.8h, v9.8h, v0.h[3]
|
|
107
|
+
sub v29.8h, v21.8h, v8.8h
|
|
108
|
+
mul v26.8h, v9.8h, v0.h[2]
|
|
109
|
+
add v8.8h, v21.8h, v8.8h
|
|
110
|
+
mul v2.8h, v13.8h, v0.h[0]
|
|
111
|
+
mls v26.8h, v28.8h, v7.h[0]
|
|
112
|
+
mul v28.8h, v10.8h, v0.h[4]
|
|
113
|
+
sqrdmulh v23.8h, v10.8h, v0.h[5]
|
|
114
|
+
add v22.8h, v8.8h, v26.8h
|
|
115
|
+
sqrdmulh v10.8h, v13.8h, v0.h[1]
|
|
116
|
+
sqrdmulh v21.8h, v22.8h, v0.h[7]
|
|
117
|
+
ldr q13, [x0, #0x100]
|
|
118
|
+
mul v16.8h, v22.8h, v0.h[6]
|
|
119
|
+
mls v28.8h, v23.8h, v7.h[0]
|
|
120
|
+
mls v2.8h, v10.8h, v7.h[0]
|
|
121
|
+
sqrdmulh v23.8h, v13.8h, v0.h[1]
|
|
122
|
+
sub v10.8h, v29.8h, v28.8h
|
|
123
|
+
add v17.8h, v29.8h, v28.8h
|
|
124
|
+
mls v16.8h, v21.8h, v7.h[0]
|
|
125
|
+
sub v18.8h, v12.8h, v2.8h
|
|
126
|
+
ldr q29, [x0]
|
|
127
|
+
sqrdmulh v14.8h, v17.8h, v1.h[3]
|
|
128
|
+
add v22.8h, v12.8h, v2.8h
|
|
129
|
+
sqrdmulh v9.8h, v18.8h, v0.h[5]
|
|
130
|
+
mul v21.8h, v13.8h, v0.h[0]
|
|
131
|
+
ldr q13, [x0, #0x150]
|
|
132
|
+
mul v5.8h, v18.8h, v0.h[4]
|
|
133
|
+
mls v5.8h, v9.8h, v7.h[0]
|
|
134
|
+
mul v18.8h, v13.8h, v0.h[0]
|
|
135
|
+
mls v21.8h, v23.8h, v7.h[0]
|
|
136
|
+
sqrdmulh v2.8h, v13.8h, v0.h[1]
|
|
137
|
+
mul v13.8h, v17.8h, v1.h[2]
|
|
138
|
+
sub v4.8h, v29.8h, v21.8h
|
|
139
|
+
mls v13.8h, v14.8h, v7.h[0]
|
|
140
|
+
add v25.8h, v29.8h, v21.8h
|
|
141
|
+
add v6.8h, v4.8h, v5.8h
|
|
142
|
+
sqrdmulh v15.8h, v22.8h, v0.h[3]
|
|
143
|
+
sub v21.8h, v4.8h, v5.8h
|
|
144
|
+
sub v5.8h, v8.8h, v26.8h
|
|
145
|
+
mul v23.8h, v22.8h, v0.h[2]
|
|
146
|
+
add v28.8h, v6.8h, v13.8h
|
|
147
|
+
sub v13.8h, v6.8h, v13.8h
|
|
148
|
+
mul v4.8h, v5.8h, v1.h[0]
|
|
149
|
+
sub x4, x4, #0x2
|
|
150
|
+
|
|
151
|
+
Lntt_layer123_start:
|
|
152
|
+
mls v23.8h, v15.8h, v7.h[0]
|
|
153
|
+
ldr q6, [x0, #0x190]
|
|
154
|
+
ldr q15, [x0, #0x90]
|
|
155
|
+
ldr q19, [x0, #0x10]
|
|
156
|
+
mul v22.8h, v10.8h, v1.h[4]
|
|
157
|
+
ldr q24, [x0, #0x50]
|
|
158
|
+
str q13, [x0, #0x140]
|
|
159
|
+
sqrdmulh v13.8h, v6.8h, v0.h[1]
|
|
160
|
+
sub v20.8h, v25.8h, v23.8h
|
|
161
|
+
sqrdmulh v3.8h, v30.8h, v0.h[1]
|
|
162
|
+
str q28, [x0, #0x100]
|
|
163
|
+
ldr q30, [x0, #0x120]
|
|
164
|
+
mul v8.8h, v6.8h, v0.h[0]
|
|
165
|
+
sqrdmulh v27.8h, v10.8h, v1.h[5]
|
|
166
|
+
mls v11.8h, v3.8h, v7.h[0]
|
|
167
|
+
mls v18.8h, v2.8h, v7.h[0]
|
|
168
|
+
ldr q31, [x0, #0x160]
|
|
169
|
+
sqrdmulh v10.8h, v5.8h, v1.h[1]
|
|
170
|
+
mls v8.8h, v13.8h, v7.h[0]
|
|
171
|
+
ldr q13, [x0, #0x1d0]
|
|
172
|
+
sub v14.8h, v24.8h, v18.8h
|
|
173
|
+
add v9.8h, v24.8h, v18.8h
|
|
174
|
+
sqrdmulh v2.8h, v31.8h, v0.h[1]
|
|
175
|
+
mls v4.8h, v10.8h, v7.h[0]
|
|
176
|
+
add v10.8h, v25.8h, v23.8h
|
|
177
|
+
sub v24.8h, v19.8h, v11.8h
|
|
178
|
+
add v25.8h, v19.8h, v11.8h
|
|
179
|
+
sqrdmulh v28.8h, v13.8h, v0.h[1]
|
|
180
|
+
mul v11.8h, v30.8h, v0.h[0]
|
|
181
|
+
mul v17.8h, v13.8h, v0.h[0]
|
|
182
|
+
sub v13.8h, v10.8h, v16.8h
|
|
183
|
+
sub v6.8h, v15.8h, v8.8h
|
|
184
|
+
mls v17.8h, v28.8h, v7.h[0]
|
|
185
|
+
str q13, [x0, #0x40]
|
|
186
|
+
mls v22.8h, v27.8h, v7.h[0]
|
|
187
|
+
ldr q13, [x0, #0xd0]
|
|
188
|
+
add v26.8h, v20.8h, v4.8h
|
|
189
|
+
mul v18.8h, v31.8h, v0.h[0]
|
|
190
|
+
add v27.8h, v10.8h, v16.8h
|
|
191
|
+
str q26, [x0, #0x80]
|
|
192
|
+
sqrdmulh v31.8h, v6.8h, v0.h[5]
|
|
193
|
+
add v3.8h, v21.8h, v22.8h
|
|
194
|
+
str q27, [x0], #0x10
|
|
195
|
+
mul v26.8h, v6.8h, v0.h[4]
|
|
196
|
+
add v6.8h, v13.8h, v17.8h
|
|
197
|
+
sub v5.8h, v13.8h, v17.8h
|
|
198
|
+
str q3, [x0, #0x170]
|
|
199
|
+
sub v17.8h, v21.8h, v22.8h
|
|
200
|
+
sqrdmulh v10.8h, v6.8h, v0.h[3]
|
|
201
|
+
sub v13.8h, v20.8h, v4.8h
|
|
202
|
+
add v20.8h, v15.8h, v8.8h
|
|
203
|
+
sqrdmulh v12.8h, v5.8h, v0.h[5]
|
|
204
|
+
str q13, [x0, #0xb0]
|
|
205
|
+
mul v8.8h, v6.8h, v0.h[2]
|
|
206
|
+
str q17, [x0, #0x1b0]
|
|
207
|
+
mls v8.8h, v10.8h, v7.h[0]
|
|
208
|
+
mul v29.8h, v5.8h, v0.h[4]
|
|
209
|
+
mls v29.8h, v12.8h, v7.h[0]
|
|
210
|
+
sub v5.8h, v9.8h, v8.8h
|
|
211
|
+
add v3.8h, v9.8h, v8.8h
|
|
212
|
+
sqrdmulh v15.8h, v20.8h, v0.h[3]
|
|
213
|
+
mul v4.8h, v5.8h, v1.h[0]
|
|
214
|
+
add v6.8h, v14.8h, v29.8h
|
|
215
|
+
sqrdmulh v9.8h, v3.8h, v0.h[7]
|
|
216
|
+
sqrdmulh v12.8h, v6.8h, v1.h[3]
|
|
217
|
+
sub v10.8h, v14.8h, v29.8h
|
|
218
|
+
mul v23.8h, v6.8h, v1.h[2]
|
|
219
|
+
mls v26.8h, v31.8h, v7.h[0]
|
|
220
|
+
mls v23.8h, v12.8h, v7.h[0]
|
|
221
|
+
mul v16.8h, v3.8h, v0.h[6]
|
|
222
|
+
add v13.8h, v24.8h, v26.8h
|
|
223
|
+
sub v21.8h, v24.8h, v26.8h
|
|
224
|
+
mls v16.8h, v9.8h, v7.h[0]
|
|
225
|
+
add v28.8h, v13.8h, v23.8h
|
|
226
|
+
sub v13.8h, v13.8h, v23.8h
|
|
227
|
+
mul v23.8h, v20.8h, v0.h[2]
|
|
228
|
+
subs x4, x4, #0x1
|
|
229
|
+
cbnz x4, Lntt_layer123_start
|
|
230
|
+
sqrdmulh v3.8h, v5.8h, v1.h[1]
|
|
231
|
+
mls v23.8h, v15.8h, v7.h[0]
|
|
232
|
+
ldr q5, [x0, #0x190]
|
|
233
|
+
mul v29.8h, v10.8h, v1.h[4]
|
|
234
|
+
mls v4.8h, v3.8h, v7.h[0]
|
|
235
|
+
sub v19.8h, v25.8h, v23.8h
|
|
236
|
+
sqrdmulh v31.8h, v5.8h, v0.h[1]
|
|
237
|
+
sqrdmulh v6.8h, v30.8h, v0.h[1]
|
|
238
|
+
sub v3.8h, v19.8h, v4.8h
|
|
239
|
+
mul v5.8h, v5.8h, v0.h[0]
|
|
240
|
+
str q3, [x0, #0xc0]
|
|
241
|
+
sqrdmulh v12.8h, v10.8h, v1.h[5]
|
|
242
|
+
mls v18.8h, v2.8h, v7.h[0]
|
|
243
|
+
ldr q3, [x0, #0x1d0]
|
|
244
|
+
mls v5.8h, v31.8h, v7.h[0]
|
|
245
|
+
sqrdmulh v10.8h, v3.8h, v0.h[1]
|
|
246
|
+
mls v11.8h, v6.8h, v7.h[0]
|
|
247
|
+
ldr q31, [x0, #0x90]
|
|
248
|
+
mul v30.8h, v3.8h, v0.h[0]
|
|
249
|
+
mls v30.8h, v10.8h, v7.h[0]
|
|
250
|
+
sub v10.8h, v31.8h, v5.8h
|
|
251
|
+
mls v29.8h, v12.8h, v7.h[0]
|
|
252
|
+
ldr q6, [x0, #0xd0]
|
|
253
|
+
sqrdmulh v15.8h, v10.8h, v0.h[5]
|
|
254
|
+
mul v17.8h, v10.8h, v0.h[4]
|
|
255
|
+
add v10.8h, v6.8h, v30.8h
|
|
256
|
+
sub v6.8h, v6.8h, v30.8h
|
|
257
|
+
sqrdmulh v12.8h, v10.8h, v0.h[3]
|
|
258
|
+
sub v27.8h, v21.8h, v29.8h
|
|
259
|
+
sqrdmulh v3.8h, v6.8h, v0.h[5]
|
|
260
|
+
mul v10.8h, v10.8h, v0.h[2]
|
|
261
|
+
ldr q20, [x0, #0x50]
|
|
262
|
+
mls v10.8h, v12.8h, v7.h[0]
|
|
263
|
+
mul v2.8h, v6.8h, v0.h[4]
|
|
264
|
+
add v6.8h, v20.8h, v18.8h
|
|
265
|
+
add v5.8h, v31.8h, v5.8h
|
|
266
|
+
mls v2.8h, v3.8h, v7.h[0]
|
|
267
|
+
sub v31.8h, v6.8h, v10.8h
|
|
268
|
+
sqrdmulh v12.8h, v5.8h, v0.h[3]
|
|
269
|
+
sub v22.8h, v20.8h, v18.8h
|
|
270
|
+
add v6.8h, v6.8h, v10.8h
|
|
271
|
+
mul v20.8h, v31.8h, v1.h[0]
|
|
272
|
+
add v30.8h, v22.8h, v2.8h
|
|
273
|
+
sqrdmulh v3.8h, v6.8h, v0.h[7]
|
|
274
|
+
sqrdmulh v10.8h, v30.8h, v1.h[3]
|
|
275
|
+
mul v9.8h, v30.8h, v1.h[2]
|
|
276
|
+
ldr q30, [x0, #0x10]
|
|
277
|
+
mls v17.8h, v15.8h, v7.h[0]
|
|
278
|
+
mls v9.8h, v10.8h, v7.h[0]
|
|
279
|
+
mul v15.8h, v6.8h, v0.h[6]
|
|
280
|
+
add v24.8h, v30.8h, v11.8h
|
|
281
|
+
sub v10.8h, v22.8h, v2.8h
|
|
282
|
+
mls v15.8h, v3.8h, v7.h[0]
|
|
283
|
+
add v6.8h, v19.8h, v4.8h
|
|
284
|
+
add v22.8h, v25.8h, v23.8h
|
|
285
|
+
sqrdmulh v3.8h, v10.8h, v1.h[5]
|
|
286
|
+
str q13, [x0, #0x140]
|
|
287
|
+
sub v19.8h, v30.8h, v11.8h
|
|
288
|
+
add v25.8h, v22.8h, v16.8h
|
|
289
|
+
mul v5.8h, v5.8h, v0.h[2]
|
|
290
|
+
sub v13.8h, v22.8h, v16.8h
|
|
291
|
+
str q28, [x0, #0x100]
|
|
292
|
+
mls v5.8h, v12.8h, v7.h[0]
|
|
293
|
+
str q13, [x0, #0x40]
|
|
294
|
+
str q6, [x0, #0x80]
|
|
295
|
+
add v21.8h, v21.8h, v29.8h
|
|
296
|
+
sqrdmulh v13.8h, v31.8h, v1.h[1]
|
|
297
|
+
str q25, [x0], #0x10
|
|
298
|
+
add v12.8h, v19.8h, v17.8h
|
|
299
|
+
sub v31.8h, v19.8h, v17.8h
|
|
300
|
+
mul v30.8h, v10.8h, v1.h[4]
|
|
301
|
+
str q21, [x0, #0x170]
|
|
302
|
+
add v21.8h, v24.8h, v5.8h
|
|
303
|
+
add v6.8h, v12.8h, v9.8h
|
|
304
|
+
mls v30.8h, v3.8h, v7.h[0]
|
|
305
|
+
str q27, [x0, #0x1b0]
|
|
306
|
+
sub v10.8h, v21.8h, v15.8h
|
|
307
|
+
sub v12.8h, v12.8h, v9.8h
|
|
308
|
+
mls v20.8h, v13.8h, v7.h[0]
|
|
309
|
+
str q6, [x0, #0x100]
|
|
310
|
+
str q10, [x0, #0x40]
|
|
311
|
+
sub v13.8h, v24.8h, v5.8h
|
|
312
|
+
add v3.8h, v21.8h, v15.8h
|
|
313
|
+
str q12, [x0, #0x140]
|
|
314
|
+
sub v10.8h, v31.8h, v30.8h
|
|
315
|
+
add v21.8h, v31.8h, v30.8h
|
|
316
|
+
str q3, [x0], #0x10
|
|
317
|
+
add v12.8h, v13.8h, v20.8h
|
|
318
|
+
sub v13.8h, v13.8h, v20.8h
|
|
319
|
+
str q21, [x0, #0x170]
|
|
320
|
+
str q10, [x0, #0x1b0]
|
|
321
|
+
str q12, [x0, #0x70]
|
|
322
|
+
str q13, [x0, #0xb0]
|
|
323
|
+
mov x0, x3
|
|
324
|
+
mov x4, #0x8 // =8
|
|
325
|
+
ldr q2, [x0, #0x20]
|
|
326
|
+
ldr q13, [x1], #0x10
|
|
327
|
+
ldr q30, [x0, #0x30]
|
|
328
|
+
ldr q25, [x2, #0x40]
|
|
329
|
+
ldr q5, [x0]
|
|
330
|
+
ldr q18, [x0, #0x60]
|
|
331
|
+
ldr q12, [x0, #0x70]
|
|
332
|
+
sqrdmulh v17.8h, v2.8h, v13.h[1]
|
|
333
|
+
ldr q4, [x1], #0x10
|
|
334
|
+
ldr q23, [x0, #0x10]
|
|
335
|
+
sqrdmulh v21.8h, v30.8h, v13.h[1]
|
|
336
|
+
ldr q24, [x2, #0x20]
|
|
337
|
+
ldr q9, [x2], #0x60
|
|
338
|
+
mul v10.8h, v30.8h, v13.h[0]
|
|
339
|
+
mul v11.8h, v2.8h, v13.h[0]
|
|
340
|
+
mls v10.8h, v21.8h, v7.h[0]
|
|
341
|
+
sqrdmulh v29.8h, v12.8h, v4.h[1]
|
|
342
|
+
mul v1.8h, v12.8h, v4.h[0]
|
|
343
|
+
add v21.8h, v23.8h, v10.8h
|
|
344
|
+
sub v10.8h, v23.8h, v10.8h
|
|
345
|
+
mul v8.8h, v18.8h, v4.h[0]
|
|
346
|
+
sqrdmulh v23.8h, v21.8h, v13.h[3]
|
|
347
|
+
mul v2.8h, v21.8h, v13.h[2]
|
|
348
|
+
mls v1.8h, v29.8h, v7.h[0]
|
|
349
|
+
mls v2.8h, v23.8h, v7.h[0]
|
|
350
|
+
ldur q15, [x2, #-0x50]
|
|
351
|
+
sqrdmulh v0.8h, v10.8h, v13.h[5]
|
|
352
|
+
mls v11.8h, v17.8h, v7.h[0]
|
|
353
|
+
ldr q29, [x0, #0x50]
|
|
354
|
+
mul v23.8h, v10.8h, v13.h[4]
|
|
355
|
+
mls v23.8h, v0.8h, v7.h[0]
|
|
356
|
+
sub v16.8h, v29.8h, v1.8h
|
|
357
|
+
add v3.8h, v5.8h, v11.8h
|
|
358
|
+
sub v31.8h, v5.8h, v11.8h
|
|
359
|
+
sqrdmulh v22.8h, v16.8h, v4.h[5]
|
|
360
|
+
add v30.8h, v3.8h, v2.8h
|
|
361
|
+
sub v0.8h, v3.8h, v2.8h
|
|
362
|
+
sqrdmulh v28.8h, v18.8h, v4.h[1]
|
|
363
|
+
add v21.8h, v31.8h, v23.8h
|
|
364
|
+
sub v19.8h, v31.8h, v23.8h
|
|
365
|
+
mul v26.8h, v16.8h, v4.h[4]
|
|
366
|
+
trn2 v3.4s, v30.4s, v0.4s
|
|
367
|
+
ldur q23, [x2, #-0x10]
|
|
368
|
+
trn2 v18.4s, v21.4s, v19.4s
|
|
369
|
+
mls v26.8h, v22.8h, v7.h[0]
|
|
370
|
+
trn1 v13.4s, v30.4s, v0.4s
|
|
371
|
+
mls v8.8h, v28.8h, v7.h[0]
|
|
372
|
+
trn2 v31.2d, v3.2d, v18.2d
|
|
373
|
+
trn1 v11.4s, v21.4s, v19.4s
|
|
374
|
+
add v27.8h, v29.8h, v1.8h
|
|
375
|
+
sqrdmulh v6.8h, v31.8h, v15.8h
|
|
376
|
+
trn1 v2.2d, v13.2d, v11.2d
|
|
377
|
+
trn2 v13.2d, v13.2d, v11.2d
|
|
378
|
+
mul v1.8h, v31.8h, v9.8h
|
|
379
|
+
ldr q11, [x0, #0x40]
|
|
380
|
+
sqrdmulh v29.8h, v13.8h, v15.8h
|
|
381
|
+
mls v1.8h, v6.8h, v7.h[0]
|
|
382
|
+
trn1 v6.2d, v3.2d, v18.2d
|
|
383
|
+
mul v17.8h, v13.8h, v9.8h
|
|
384
|
+
sub v13.8h, v11.8h, v8.8h
|
|
385
|
+
sqrdmulh v10.8h, v27.8h, v4.h[3]
|
|
386
|
+
sub v12.8h, v13.8h, v26.8h
|
|
387
|
+
sub v18.8h, v6.8h, v1.8h
|
|
388
|
+
mls v17.8h, v29.8h, v7.h[0]
|
|
389
|
+
add v30.8h, v6.8h, v1.8h
|
|
390
|
+
add v6.8h, v13.8h, v26.8h
|
|
391
|
+
ldur q13, [x2, #-0x30]
|
|
392
|
+
sqrdmulh v16.8h, v18.8h, v23.8h
|
|
393
|
+
trn1 v28.4s, v6.4s, v12.4s
|
|
394
|
+
mul v23.8h, v18.8h, v25.8h
|
|
395
|
+
ldr q25, [x2, #0x10]
|
|
396
|
+
add v20.8h, v2.8h, v17.8h
|
|
397
|
+
mul v0.8h, v30.8h, v24.8h
|
|
398
|
+
sqrdmulh v29.8h, v30.8h, v13.8h
|
|
399
|
+
sub v30.8h, v2.8h, v17.8h
|
|
400
|
+
mls v23.8h, v16.8h, v7.h[0]
|
|
401
|
+
sub x4, x4, #0x2
|
|
402
|
+
|
|
403
|
+
Lntt_layer4567_start:
|
|
404
|
+
ldr q19, [x2, #0x50]
|
|
405
|
+
sub v31.8h, v30.8h, v23.8h
|
|
406
|
+
mls v0.8h, v29.8h, v7.h[0]
|
|
407
|
+
add v16.8h, v11.8h, v8.8h
|
|
408
|
+
ldr q18, [x0, #0xa0]
|
|
409
|
+
trn2 v14.4s, v6.4s, v12.4s
|
|
410
|
+
mul v26.8h, v27.8h, v4.h[2]
|
|
411
|
+
ldr q4, [x1], #0x10
|
|
412
|
+
ldr q24, [x2, #0x40]
|
|
413
|
+
ldr q21, [x0, #0xb0]
|
|
414
|
+
mls v26.8h, v10.8h, v7.h[0]
|
|
415
|
+
add v23.8h, v30.8h, v23.8h
|
|
416
|
+
sub v15.8h, v20.8h, v0.8h
|
|
417
|
+
ldr q9, [x0, #0x90]
|
|
418
|
+
add v10.8h, v20.8h, v0.8h
|
|
419
|
+
mul v8.8h, v18.8h, v4.h[0]
|
|
420
|
+
ldr q1, [x2], #0x60
|
|
421
|
+
trn1 v27.4s, v23.4s, v31.4s
|
|
422
|
+
sqrdmulh v12.8h, v18.8h, v4.h[1]
|
|
423
|
+
trn1 v5.4s, v10.4s, v15.4s
|
|
424
|
+
sub v30.8h, v16.8h, v26.8h
|
|
425
|
+
trn2 v13.2d, v5.2d, v27.2d
|
|
426
|
+
sqrdmulh v2.8h, v21.8h, v4.h[1]
|
|
427
|
+
add v29.8h, v16.8h, v26.8h
|
|
428
|
+
mul v0.8h, v21.8h, v4.h[0]
|
|
429
|
+
str q13, [x0, #0x20]
|
|
430
|
+
trn1 v11.4s, v29.4s, v30.4s
|
|
431
|
+
mls v8.8h, v12.8h, v7.h[0]
|
|
432
|
+
trn2 v26.4s, v29.4s, v30.4s
|
|
433
|
+
trn2 v6.2d, v11.2d, v28.2d
|
|
434
|
+
mls v0.8h, v2.8h, v7.h[0]
|
|
435
|
+
trn2 v16.2d, v26.2d, v14.2d
|
|
436
|
+
trn1 v26.2d, v26.2d, v14.2d
|
|
437
|
+
trn1 v20.2d, v5.2d, v27.2d
|
|
438
|
+
sqrdmulh v29.8h, v6.8h, v25.8h
|
|
439
|
+
trn2 v15.4s, v10.4s, v15.4s
|
|
440
|
+
sqrdmulh v13.8h, v16.8h, v25.8h
|
|
441
|
+
str q20, [x0], #0x40
|
|
442
|
+
sub v30.8h, v9.8h, v0.8h
|
|
443
|
+
add v27.8h, v9.8h, v0.8h
|
|
444
|
+
mul v17.8h, v6.8h, v1.8h
|
|
445
|
+
sqrdmulh v22.8h, v30.8h, v4.h[5]
|
|
446
|
+
mul v18.8h, v16.8h, v1.8h
|
|
447
|
+
mls v18.8h, v13.8h, v7.h[0]
|
|
448
|
+
mul v2.8h, v30.8h, v4.h[4]
|
|
449
|
+
mls v2.8h, v22.8h, v7.h[0]
|
|
450
|
+
trn2 v22.4s, v23.4s, v31.4s
|
|
451
|
+
sub v3.8h, v26.8h, v18.8h
|
|
452
|
+
ldur q25, [x2, #-0x30]
|
|
453
|
+
mls v17.8h, v29.8h, v7.h[0]
|
|
454
|
+
trn2 v31.2d, v15.2d, v22.2d
|
|
455
|
+
trn1 v20.2d, v15.2d, v22.2d
|
|
456
|
+
add v16.8h, v26.8h, v18.8h
|
|
457
|
+
sqrdmulh v26.8h, v3.8h, v19.8h
|
|
458
|
+
trn1 v21.2d, v11.2d, v28.2d
|
|
459
|
+
ldr q11, [x0, #0x40]
|
|
460
|
+
sqrdmulh v29.8h, v16.8h, v25.8h
|
|
461
|
+
stur q20, [x0, #-0x30]
|
|
462
|
+
add v20.8h, v21.8h, v17.8h
|
|
463
|
+
stur q31, [x0, #-0x10]
|
|
464
|
+
mul v23.8h, v3.8h, v24.8h
|
|
465
|
+
ldr q25, [x2, #0x10]
|
|
466
|
+
sub v13.8h, v11.8h, v8.8h
|
|
467
|
+
mls v23.8h, v26.8h, v7.h[0]
|
|
468
|
+
ldur q1, [x2, #-0x40]
|
|
469
|
+
sub v12.8h, v13.8h, v2.8h
|
|
470
|
+
add v6.8h, v13.8h, v2.8h
|
|
471
|
+
sqrdmulh v10.8h, v27.8h, v4.h[3]
|
|
472
|
+
sub v30.8h, v21.8h, v17.8h
|
|
473
|
+
mul v0.8h, v16.8h, v1.8h
|
|
474
|
+
trn1 v28.4s, v6.4s, v12.4s
|
|
475
|
+
subs x4, x4, #0x1
|
|
476
|
+
cbnz x4, Lntt_layer4567_start
|
|
477
|
+
add v22.8h, v11.8h, v8.8h
|
|
478
|
+
mul v27.8h, v27.8h, v4.h[2]
|
|
479
|
+
trn2 v17.4s, v6.4s, v12.4s
|
|
480
|
+
ldr q15, [x2], #0x60
|
|
481
|
+
mls v27.8h, v10.8h, v7.h[0]
|
|
482
|
+
add v4.8h, v30.8h, v23.8h
|
|
483
|
+
sub v18.8h, v30.8h, v23.8h
|
|
484
|
+
ldur q6, [x2, #-0x30]
|
|
485
|
+
mls v0.8h, v29.8h, v7.h[0]
|
|
486
|
+
ldur q12, [x2, #-0x40]
|
|
487
|
+
ldur q24, [x2, #-0x20]
|
|
488
|
+
ldur q2, [x2, #-0x10]
|
|
489
|
+
trn1 v9.4s, v4.4s, v18.4s
|
|
490
|
+
add v10.8h, v22.8h, v27.8h
|
|
491
|
+
sub v13.8h, v22.8h, v27.8h
|
|
492
|
+
sub v1.8h, v20.8h, v0.8h
|
|
493
|
+
trn2 v21.4s, v10.4s, v13.4s
|
|
494
|
+
add v27.8h, v20.8h, v0.8h
|
|
495
|
+
trn2 v3.2d, v21.2d, v17.2d
|
|
496
|
+
trn1 v13.4s, v10.4s, v13.4s
|
|
497
|
+
trn1 v31.4s, v27.4s, v1.4s
|
|
498
|
+
sqrdmulh v10.8h, v3.8h, v25.8h
|
|
499
|
+
trn2 v5.2d, v13.2d, v28.2d
|
|
500
|
+
trn1 v13.2d, v13.2d, v28.2d
|
|
501
|
+
trn1 v21.2d, v21.2d, v17.2d
|
|
502
|
+
sqrdmulh v17.8h, v5.8h, v25.8h
|
|
503
|
+
trn2 v30.2d, v31.2d, v9.2d
|
|
504
|
+
mul v25.8h, v3.8h, v15.8h
|
|
505
|
+
str q30, [x0, #0x20]
|
|
506
|
+
trn2 v30.4s, v4.4s, v18.4s
|
|
507
|
+
mls v25.8h, v10.8h, v7.h[0]
|
|
508
|
+
trn2 v3.4s, v27.4s, v1.4s
|
|
509
|
+
mul v20.8h, v5.8h, v15.8h
|
|
510
|
+
trn2 v10.2d, v3.2d, v30.2d
|
|
511
|
+
mls v20.8h, v17.8h, v7.h[0]
|
|
512
|
+
str q10, [x0, #0x30]
|
|
513
|
+
sub v18.8h, v21.8h, v25.8h
|
|
514
|
+
add v10.8h, v21.8h, v25.8h
|
|
515
|
+
trn1 v3.2d, v3.2d, v30.2d
|
|
516
|
+
sqrdmulh v30.8h, v18.8h, v2.8h
|
|
517
|
+
mul v12.8h, v10.8h, v12.8h
|
|
518
|
+
sqrdmulh v6.8h, v10.8h, v6.8h
|
|
519
|
+
str q3, [x0, #0x10]
|
|
520
|
+
add v21.8h, v13.8h, v20.8h
|
|
521
|
+
mul v10.8h, v18.8h, v24.8h
|
|
522
|
+
sub v13.8h, v13.8h, v20.8h
|
|
523
|
+
mls v10.8h, v30.8h, v7.h[0]
|
|
524
|
+
mls v12.8h, v6.8h, v7.h[0]
|
|
525
|
+
trn1 v30.2d, v31.2d, v9.2d
|
|
526
|
+
sub v3.8h, v13.8h, v10.8h
|
|
527
|
+
add v6.8h, v13.8h, v10.8h
|
|
528
|
+
add v10.8h, v21.8h, v12.8h
|
|
529
|
+
sub v21.8h, v21.8h, v12.8h
|
|
530
|
+
trn2 v13.4s, v6.4s, v3.4s
|
|
531
|
+
trn1 v12.4s, v10.4s, v21.4s
|
|
532
|
+
trn2 v21.4s, v10.4s, v21.4s
|
|
533
|
+
trn1 v3.4s, v6.4s, v3.4s
|
|
534
|
+
str q30, [x0], #0x40
|
|
535
|
+
trn2 v10.2d, v21.2d, v13.2d
|
|
536
|
+
trn1 v13.2d, v21.2d, v13.2d
|
|
537
|
+
trn2 v21.2d, v12.2d, v3.2d
|
|
538
|
+
trn1 v3.2d, v12.2d, v3.2d
|
|
539
|
+
str q10, [x0, #0x30]
|
|
540
|
+
str q13, [x0, #0x10]
|
|
541
|
+
str q3, [x0], #0x40
|
|
542
|
+
stur q21, [x0, #-0x20]
|
|
543
|
+
ldp d8, d9, [sp]
|
|
544
|
+
.cfi_restore d8
|
|
545
|
+
.cfi_restore d9
|
|
546
|
+
ldp d10, d11, [sp, #0x10]
|
|
547
|
+
.cfi_restore d10
|
|
548
|
+
.cfi_restore d11
|
|
549
|
+
ldp d12, d13, [sp, #0x20]
|
|
550
|
+
.cfi_restore d12
|
|
551
|
+
.cfi_restore d13
|
|
552
|
+
ldp d14, d15, [sp, #0x30]
|
|
553
|
+
.cfi_restore d14
|
|
554
|
+
.cfi_restore d15
|
|
555
|
+
add sp, sp, #0x40
|
|
556
|
+
.cfi_adjust_cfa_offset -0x40
|
|
557
|
+
ret
|
|
558
|
+
.cfi_endproc
|
|
559
|
+
|
|
560
|
+
MLK_ASM_FN_SIZE(ntt_asm)
|
|
561
|
+
|
|
562
|
+
#endif /* MLK_ARITH_BACKEND_AARCH64 && !MLK_CONFIG_MULTILEVEL_NO_SHARED */
|
data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_mulcache_compute_asm.S
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) The mlkem-native project authors
|
|
3
|
+
* SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
/*yaml
|
|
7
|
+
Name: poly_mulcache_compute_asm
|
|
8
|
+
Description: Compute multiplication cache for polynomial
|
|
9
|
+
Signature: void mlk_poly_mulcache_compute_asm(int16_t cache[128], const int16_t mlk_poly[256], const int16_t zetas[128], const int16_t zetas_twisted[128])
|
|
10
|
+
ABI:
|
|
11
|
+
x0:
|
|
12
|
+
type: buffer
|
|
13
|
+
size_bytes: 256
|
|
14
|
+
permissions: write-only
|
|
15
|
+
c_parameter: int16_t cache[128]
|
|
16
|
+
description: Output cache
|
|
17
|
+
x1:
|
|
18
|
+
type: buffer
|
|
19
|
+
size_bytes: 512
|
|
20
|
+
permissions: read-only
|
|
21
|
+
c_parameter: const int16_t mlk_poly[256]
|
|
22
|
+
description: Input polynomial
|
|
23
|
+
x2:
|
|
24
|
+
type: buffer
|
|
25
|
+
size_bytes: 256
|
|
26
|
+
permissions: read-only
|
|
27
|
+
c_parameter: const int16_t zetas[128]
|
|
28
|
+
description: Zeta values
|
|
29
|
+
x3:
|
|
30
|
+
type: buffer
|
|
31
|
+
size_bytes: 256
|
|
32
|
+
permissions: read-only
|
|
33
|
+
c_parameter: const int16_t zetas_twisted[128]
|
|
34
|
+
description: Twisted zeta values
|
|
35
|
+
Stack:
|
|
36
|
+
bytes: 0
|
|
37
|
+
*/
|
|
38
|
+
|
|
39
|
+
#include "../../../common.h"
|
|
40
|
+
#if defined(MLK_ARITH_BACKEND_AARCH64) && !defined(MLK_CONFIG_MULTILEVEL_NO_SHARED)
|
|
41
|
+
|
|
42
|
+
/*
|
|
43
|
+
* WARNING: This file is auto-derived from the mlkem-native source file
|
|
44
|
+
* dev/aarch64_opt/src/poly_mulcache_compute_asm.S using scripts/simpasm. Do not modify it directly.
|
|
45
|
+
*/
|
|
46
|
+
|
|
47
|
+
#if defined(__ELF__)
|
|
48
|
+
.section .note.GNU-stack,"",@progbits
|
|
49
|
+
#endif
|
|
50
|
+
|
|
51
|
+
.text
|
|
52
|
+
.balign 4
|
|
53
|
+
.global MLK_ASM_NAMESPACE(poly_mulcache_compute_asm)
|
|
54
|
+
MLK_ASM_FN_SYMBOL(poly_mulcache_compute_asm)
|
|
55
|
+
|
|
56
|
+
.cfi_startproc
|
|
57
|
+
mov w5, #0xd01 // =3329
|
|
58
|
+
dup v6.8h, w5
|
|
59
|
+
mov w5, #0x4ebf // =20159
|
|
60
|
+
dup v7.8h, w5
|
|
61
|
+
mov x4, #0x10 // =16
|
|
62
|
+
ldr q0, [x1], #0x20
|
|
63
|
+
ldur q2, [x1, #-0x10]
|
|
64
|
+
ldr q19, [x1], #0x20
|
|
65
|
+
ldr q29, [x3], #0x10
|
|
66
|
+
ldur q16, [x1, #-0x10]
|
|
67
|
+
ldr q18, [x2], #0x10
|
|
68
|
+
ldr q26, [x1], #0x20
|
|
69
|
+
ldr q25, [x2], #0x10
|
|
70
|
+
uzp2 v5.8h, v0.8h, v2.8h
|
|
71
|
+
ldr q28, [x3], #0x10
|
|
72
|
+
ldur q7, [x1, #-0x10]
|
|
73
|
+
ldr q2, [x1], #0x20
|
|
74
|
+
uzp2 v27.8h, v19.8h, v16.8h
|
|
75
|
+
sqrdmulh v16.8h, v5.8h, v29.8h
|
|
76
|
+
ldr q17, [x3], #0x10
|
|
77
|
+
ldr q19, [x3], #0x10
|
|
78
|
+
mul v5.8h, v5.8h, v18.8h
|
|
79
|
+
uzp2 v29.8h, v26.8h, v7.8h
|
|
80
|
+
mul v26.8h, v27.8h, v25.8h
|
|
81
|
+
sqrdmulh v4.8h, v27.8h, v28.8h
|
|
82
|
+
mls v5.8h, v16.8h, v6.h[0]
|
|
83
|
+
lsr x4, x4, #1
|
|
84
|
+
sub x4, x4, #0x2
|
|
85
|
+
|
|
86
|
+
Lpoly_mulcache_compute_loop_start:
|
|
87
|
+
str q5, [x0], #0x10
|
|
88
|
+
sqrdmulh v22.8h, v29.8h, v17.8h
|
|
89
|
+
ldr q28, [x2], #0x10
|
|
90
|
+
ldur q24, [x1, #-0x10]
|
|
91
|
+
ldr q0, [x1], #0x20
|
|
92
|
+
mls v26.8h, v4.8h, v6.h[0]
|
|
93
|
+
ldur q16, [x1, #-0x10]
|
|
94
|
+
ldr q17, [x3], #0x10
|
|
95
|
+
mul v5.8h, v29.8h, v28.8h
|
|
96
|
+
uzp2 v23.8h, v2.8h, v24.8h
|
|
97
|
+
ldr q18, [x2], #0x10
|
|
98
|
+
mls v5.8h, v22.8h, v6.h[0]
|
|
99
|
+
uzp2 v29.8h, v0.8h, v16.8h
|
|
100
|
+
sqrdmulh v4.8h, v23.8h, v19.8h
|
|
101
|
+
ldr q2, [x1], #0x20
|
|
102
|
+
ldr q19, [x3], #0x10
|
|
103
|
+
str q26, [x0], #0x10
|
|
104
|
+
mul v26.8h, v23.8h, v18.8h
|
|
105
|
+
subs x4, x4, #0x1
|
|
106
|
+
cbnz x4, Lpoly_mulcache_compute_loop_start
|
|
107
|
+
mls v26.8h, v4.8h, v6.h[0]
|
|
108
|
+
str q5, [x0], #0x10
|
|
109
|
+
ldr q5, [x2], #0x10
|
|
110
|
+
ldur q4, [x1, #-0x10]
|
|
111
|
+
sqrdmulh v16.8h, v29.8h, v17.8h
|
|
112
|
+
ldr q0, [x2], #0x10
|
|
113
|
+
mul v29.8h, v29.8h, v5.8h
|
|
114
|
+
uzp2 v18.8h, v2.8h, v4.8h
|
|
115
|
+
str q26, [x0], #0x10
|
|
116
|
+
sqrdmulh v17.8h, v18.8h, v19.8h
|
|
117
|
+
mls v29.8h, v16.8h, v6.h[0]
|
|
118
|
+
mul v26.8h, v18.8h, v0.8h
|
|
119
|
+
mls v26.8h, v17.8h, v6.h[0]
|
|
120
|
+
str q29, [x0], #0x10
|
|
121
|
+
str q26, [x0], #0x10
|
|
122
|
+
ret
|
|
123
|
+
.cfi_endproc
|
|
124
|
+
|
|
125
|
+
MLK_ASM_FN_SIZE(poly_mulcache_compute_asm)
|
|
126
|
+
|
|
127
|
+
#endif /* MLK_ARITH_BACKEND_AARCH64 && !MLK_CONFIG_MULTILEVEL_NO_SHARED */
|