pq_crypto 0.3.2 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +56 -0
- data/CHANGELOG.md +62 -0
- data/GET_STARTED.md +366 -40
- data/README.md +76 -233
- data/SECURITY.md +107 -82
- data/ext/pqcrypto/extconf.rb +169 -87
- data/ext/pqcrypto/mldsa_api.h +1 -48
- data/ext/pqcrypto/mlkem_api.h +1 -18
- data/ext/pqcrypto/pq_externalmu.c +89 -204
- data/ext/pqcrypto/pqcrypto_native_api.h +129 -0
- data/ext/pqcrypto/pqcrypto_ruby_secure.c +484 -84
- data/ext/pqcrypto/pqcrypto_secure.c +203 -78
- data/ext/pqcrypto/pqcrypto_secure.h +53 -14
- data/ext/pqcrypto/pqcrypto_version.h +7 -0
- data/ext/pqcrypto/randombytes.h +9 -0
- data/ext/pqcrypto/vendor/.vendored +10 -5
- data/ext/pqcrypto/vendor/mldsa-native/BUILDING.md +105 -0
- data/ext/pqcrypto/vendor/mldsa-native/LICENSE +286 -0
- data/ext/pqcrypto/vendor/mldsa-native/META.yml +24 -0
- data/ext/pqcrypto/vendor/mldsa-native/README.md +221 -0
- data/ext/pqcrypto/vendor/mldsa-native/SECURITY.md +8 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.c +721 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.h +975 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_asm.S +724 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_config.h +723 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/cbmc.h +166 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/common.h +321 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/ct.c +21 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/ct.h +385 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/debug.c +73 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/debug.h +130 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.c +277 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.h +244 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.c +182 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.h +117 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.c +438 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.h +105 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/auto.h +71 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/fips202_native_aarch64.h +62 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_asm.S +376 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_asm.S +204 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_asm.S +259 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_asm.S +1077 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S +987 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccakf1600_round_constants.c +41 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_scalar.h +26 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_v84a.h +35 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x2_v84a.h +37 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_scalar.h +27 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_v84a_scalar.h +36 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/api.h +69 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/README.md +10 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/mve.h +32 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/fips202_native_armv81m.h +20 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S +638 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.c +136 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccakf1600_round_constants.c +52 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/auto.h +29 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.c +488 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.h +16 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/xkcp.h +31 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/meta.h +247 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/aarch64_zetas.c +231 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/arith_native_aarch64.h +150 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/intt.S +753 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l4.S +129 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l5.S +145 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l7.S +177 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/ntt.S +653 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/pointwise_montgomery.S +79 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_caddq_asm.S +53 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_chknorm_asm.S +55 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_32_asm.S +85 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_88_asm.S +85 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_32_asm.S +102 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_88_asm.S +110 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_17_asm.S +72 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_19_asm.S +69 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_table.c +40 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_asm.S +189 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta2_asm.S +135 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta4_asm.S +128 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta_table.c +543 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_table.c +62 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/api.h +649 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/meta.h +23 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/meta.h +315 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/arith_native_x86_64.h +124 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.c +157 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.h +27 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/intt.S +2311 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/ntt.S +2383 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/nttunpack.S +239 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise.S +131 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l4.S +139 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l5.S +155 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l7.S +187 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_caddq_avx2.c +61 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_chknorm_avx2.c +52 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_32_avx2.c +155 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_88_avx2.c +155 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_32_avx2.c +102 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_88_avx2.c +104 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_17_avx2.c +91 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_19_avx2.c +93 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_avx2.c +126 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta2_avx2.c +155 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta4_avx2.c +139 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_table.c +160 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.c +293 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.h +224 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/params.h +77 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.c +991 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.h +393 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.c +946 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.h +360 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.c +877 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.h +725 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/randombytes.h +26 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/reduce.h +139 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/rounding.h +249 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.c +1511 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.h +806 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/symmetric.h +68 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sys.h +268 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/zetas.inc +55 -0
- data/ext/pqcrypto/vendor/mlkem-native/BUILDING.md +104 -0
- data/ext/pqcrypto/vendor/mlkem-native/LICENSE +294 -0
- data/ext/pqcrypto/vendor/mlkem-native/META.yml +30 -0
- data/ext/pqcrypto/vendor/mlkem-native/README.md +223 -0
- data/ext/pqcrypto/vendor/mlkem-native/RELEASE.md +86 -0
- data/ext/pqcrypto/vendor/mlkem-native/SECURITY.md +8 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/README.md +23 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native.c +660 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native.h +538 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native_asm.S +681 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native_config.h +709 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/cbmc.h +174 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/common.h +274 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/compress.c +717 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/compress.h +688 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/debug.c +64 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/debug.h +128 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202.c +251 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202.h +158 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202x4.c +208 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202x4.h +80 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/keccakf1600.c +463 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/keccakf1600.h +98 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/auto.h +70 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/fips202_native_aarch64.h +69 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_asm.S +375 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_asm.S +203 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_asm.S +258 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_asm.S +1076 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S +986 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccakf1600_round_constants.c +46 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x1_scalar.h +25 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x1_v84a.h +34 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x2_v84a.h +35 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x4_v8a_scalar.h +26 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x4_v8a_v84a_scalar.h +35 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/api.h +117 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/README.md +10 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/mve.h +79 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/fips202_native_armv81m.h +35 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S +667 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.c +40 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/keccakf1600_round_constants.c +51 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/state_extract_bytes_x4_mve.S +290 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/state_xor_bytes_x4_mve.S +314 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/auto.h +28 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/keccak_f1600_x4_avx2.h +33 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/src/fips202_native_x86_64.h +41 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/src/keccak_f1600_x4_avx2.S +451 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/src/keccakf1600_constants.c +51 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/indcpa.c +622 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/indcpa.h +156 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/kem.c +446 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/kem.h +326 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/README.md +16 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/meta.h +122 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/aarch64_zetas.c +174 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/arith_native_aarch64.h +177 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/intt.S +628 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/ntt.S +562 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_mulcache_compute_asm.S +127 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_reduce_asm.S +150 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_tobytes_asm.S +117 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_tomont_asm.S +98 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k2.S +261 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k3.S +314 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k4.S +368 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/rej_uniform_asm.S +226 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/rej_uniform_table.c +542 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/api.h +637 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/meta.h +25 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/README.md +11 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/meta.h +128 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/arith_native_riscv64.h +45 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_debug.c +81 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_debug.h +145 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_izetas.inc +27 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_poly.c +805 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_zetas.inc +27 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_zetas_basemul.inc +39 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/README.md +4 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/meta.h +304 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/arith_native_x86_64.h +309 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/compress_consts.c +94 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/compress_consts.h +45 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/consts.c +102 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/consts.h +25 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/intt.S +719 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/mulcache_compute.S +90 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/ntt.S +639 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/nttfrombytes.S +193 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/ntttobytes.S +181 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/nttunpack.S +174 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d10.S +382 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d11.S +448 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d4.S +163 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d5.S +220 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d10.S +228 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d11.S +277 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d4.S +180 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d5.S +192 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/polyvec_basemul_acc_montgomery_cached_asm_k2.S +502 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/polyvec_basemul_acc_montgomery_cached_asm_k3.S +750 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/polyvec_basemul_acc_montgomery_cached_asm_k4.S +998 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/reduce.S +218 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/rej_uniform_asm.S +103 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/rej_uniform_table.c +544 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/tomont.S +155 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/params.h +76 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly.c +572 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly.h +317 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly_k.c +502 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly_k.h +668 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/randombytes.h +60 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/sampling.c +362 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/sampling.h +118 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/symmetric.h +70 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/sys.h +260 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/verify.c +20 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/verify.h +464 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/zetas.inc +30 -0
- data/lib/pq_crypto/algorithm_registry.rb +200 -0
- data/lib/pq_crypto/hybrid_kem.rb +1 -12
- data/lib/pq_crypto/kem.rb +104 -13
- data/lib/pq_crypto/pkcs8.rb +387 -0
- data/lib/pq_crypto/serialization.rb +1 -14
- data/lib/pq_crypto/signature.rb +123 -17
- data/lib/pq_crypto/spki.rb +131 -0
- data/lib/pq_crypto/version.rb +1 -1
- data/lib/pq_crypto.rb +79 -20
- data/script/vendor_libs.rb +88 -155
- metadata +241 -73
- data/ext/pqcrypto/vendor/pqclean/common/aes.c +0 -639
- data/ext/pqcrypto/vendor/pqclean/common/aes.h +0 -64
- data/ext/pqcrypto/vendor/pqclean/common/compat.h +0 -73
- data/ext/pqcrypto/vendor/pqclean/common/crypto_declassify.h +0 -7
- data/ext/pqcrypto/vendor/pqclean/common/fips202.c +0 -928
- data/ext/pqcrypto/vendor/pqclean/common/fips202.h +0 -166
- data/ext/pqcrypto/vendor/pqclean/common/keccak2x/feat.S +0 -168
- data/ext/pqcrypto/vendor/pqclean/common/keccak2x/fips202x2.c +0 -684
- data/ext/pqcrypto/vendor/pqclean/common/keccak2x/fips202x2.h +0 -60
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/KeccakP-1600-times4-SIMD256.c +0 -1028
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/KeccakP-1600-times4-SnP.h +0 -50
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/KeccakP-1600-unrolling.macros +0 -198
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/Makefile +0 -8
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/Makefile.Microsoft_nmake +0 -8
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/SIMD256-config.h +0 -3
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/align.h +0 -34
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/brg_endian.h +0 -142
- data/ext/pqcrypto/vendor/pqclean/common/nistseedexpander.c +0 -101
- data/ext/pqcrypto/vendor/pqclean/common/nistseedexpander.h +0 -39
- data/ext/pqcrypto/vendor/pqclean/common/randombytes.c +0 -355
- data/ext/pqcrypto/vendor/pqclean/common/randombytes.h +0 -27
- data/ext/pqcrypto/vendor/pqclean/common/sha2.c +0 -769
- data/ext/pqcrypto/vendor/pqclean/common/sha2.h +0 -173
- data/ext/pqcrypto/vendor/pqclean/common/sp800-185.c +0 -156
- data/ext/pqcrypto/vendor/pqclean/common/sp800-185.h +0 -27
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/LICENSE +0 -5
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/Makefile +0 -19
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/Makefile.Microsoft_nmake +0 -23
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/api.h +0 -18
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/cbd.c +0 -83
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/cbd.h +0 -11
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/indcpa.c +0 -327
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/indcpa.h +0 -22
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/kem.c +0 -164
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/kem.h +0 -23
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/ntt.c +0 -146
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/ntt.h +0 -14
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/params.h +0 -36
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/poly.c +0 -299
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/poly.h +0 -37
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/polyvec.c +0 -188
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/polyvec.h +0 -26
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/reduce.c +0 -41
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/reduce.h +0 -13
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/symmetric-shake.c +0 -71
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/symmetric.h +0 -30
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/verify.c +0 -67
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/verify.h +0 -13
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/LICENSE +0 -5
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/Makefile +0 -19
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/Makefile.Microsoft_nmake +0 -23
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/api.h +0 -50
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/ntt.c +0 -98
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/ntt.h +0 -10
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/packing.c +0 -261
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/packing.h +0 -31
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/params.h +0 -44
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/poly.c +0 -799
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/poly.h +0 -52
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/polyvec.c +0 -415
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/polyvec.h +0 -65
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/reduce.c +0 -69
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/reduce.h +0 -17
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/rounding.c +0 -92
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/rounding.h +0 -14
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/sign.c +0 -407
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/sign.h +0 -47
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/symmetric-shake.c +0 -26
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/symmetric.h +0 -34
|
@@ -0,0 +1,628 @@
|
|
|
1
|
+
/* Copyright (c) 2022 Arm Limited
|
|
2
|
+
* Copyright (c) 2022 Hanno Becker
|
|
3
|
+
* Copyright (c) 2023 Amin Abdulrahman, Matthias Kannwischer
|
|
4
|
+
* Copyright (c) The mlkem-native project authors
|
|
5
|
+
* SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
/* References
|
|
9
|
+
* ==========
|
|
10
|
+
*
|
|
11
|
+
* - [NeonNTT]
|
|
12
|
+
* Neon NTT: Faster Dilithium, Kyber, and Saber on Cortex-A72 and Apple M1
|
|
13
|
+
* Becker, Hwang, Kannwischer, Yang, Yang
|
|
14
|
+
* https://eprint.iacr.org/2021/986
|
|
15
|
+
*
|
|
16
|
+
* - [SLOTHY_Paper]
|
|
17
|
+
* Fast and Clean: Auditable high-performance assembly via constraint solving
|
|
18
|
+
* Abdulrahman, Becker, Kannwischer, Klein
|
|
19
|
+
* https://eprint.iacr.org/2022/1303
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
/*yaml
|
|
23
|
+
Name: intt_asm
|
|
24
|
+
Description: AArch64 ML-KEM inverse NTT following @[NeonNTT] and @[SLOTHY_Paper]
|
|
25
|
+
Signature: void mlk_intt_asm(int16_t p[256], const int16_t twiddles12345[80], const int16_t twiddles56[384])
|
|
26
|
+
ABI:
|
|
27
|
+
x0:
|
|
28
|
+
type: buffer
|
|
29
|
+
size_bytes: 512
|
|
30
|
+
permissions: read/write
|
|
31
|
+
c_parameter: int16_t p[256]
|
|
32
|
+
description: Input/output polynomial
|
|
33
|
+
x1:
|
|
34
|
+
type: buffer
|
|
35
|
+
size_bytes: 160
|
|
36
|
+
permissions: read-only
|
|
37
|
+
c_parameter: const int16_t twiddles12345[80]
|
|
38
|
+
description: Twiddle factors for layers 1-5
|
|
39
|
+
x2:
|
|
40
|
+
type: buffer
|
|
41
|
+
size_bytes: 768
|
|
42
|
+
permissions: read-only
|
|
43
|
+
c_parameter: const int16_t twiddles56[384]
|
|
44
|
+
description: Twiddle factors for layers 6-7
|
|
45
|
+
Stack:
|
|
46
|
+
bytes: 64
|
|
47
|
+
description: saving callee-saved Neon registers
|
|
48
|
+
*/
|
|
49
|
+
|
|
50
|
+
#include "../../../common.h"
|
|
51
|
+
#if defined(MLK_ARITH_BACKEND_AARCH64) && !defined(MLK_CONFIG_MULTILEVEL_NO_SHARED)
|
|
52
|
+
|
|
53
|
+
/*
|
|
54
|
+
* WARNING: This file is auto-derived from the mlkem-native source file
|
|
55
|
+
* dev/aarch64_opt/src/intt.S using scripts/simpasm. Do not modify it directly.
|
|
56
|
+
*/
|
|
57
|
+
|
|
58
|
+
#if defined(__ELF__)
|
|
59
|
+
.section .note.GNU-stack,"",@progbits
|
|
60
|
+
#endif
|
|
61
|
+
|
|
62
|
+
.text
|
|
63
|
+
.balign 4
|
|
64
|
+
.global MLK_ASM_NAMESPACE(intt_asm)
|
|
65
|
+
MLK_ASM_FN_SYMBOL(intt_asm)
|
|
66
|
+
|
|
67
|
+
.cfi_startproc
|
|
68
|
+
sub sp, sp, #0x40
|
|
69
|
+
.cfi_adjust_cfa_offset 0x40
|
|
70
|
+
stp d8, d9, [sp]
|
|
71
|
+
.cfi_rel_offset d8, 0x0
|
|
72
|
+
.cfi_rel_offset d9, 0x8
|
|
73
|
+
stp d10, d11, [sp, #0x10]
|
|
74
|
+
.cfi_rel_offset d10, 0x10
|
|
75
|
+
.cfi_rel_offset d11, 0x18
|
|
76
|
+
stp d12, d13, [sp, #0x20]
|
|
77
|
+
.cfi_rel_offset d12, 0x20
|
|
78
|
+
.cfi_rel_offset d13, 0x28
|
|
79
|
+
stp d14, d15, [sp, #0x30]
|
|
80
|
+
.cfi_rel_offset d14, 0x30
|
|
81
|
+
.cfi_rel_offset d15, 0x38
|
|
82
|
+
mov w5, #0xd01 // =3329
|
|
83
|
+
mov v7.h[0], w5
|
|
84
|
+
mov w5, #0x4ebf // =20159
|
|
85
|
+
mov v7.h[1], w5
|
|
86
|
+
mov w5, #0x200 // =512
|
|
87
|
+
dup v29.8h, w5
|
|
88
|
+
mov w5, #0x13b0 // =5040
|
|
89
|
+
dup v30.8h, w5
|
|
90
|
+
mov x3, x0
|
|
91
|
+
mov x4, #0x8 // =8
|
|
92
|
+
ldr q13, [x3, #0x20]
|
|
93
|
+
ldr q8, [x3, #0x30]
|
|
94
|
+
ldr q6, [x3]
|
|
95
|
+
ldr q16, [x3, #0x10]
|
|
96
|
+
ldr q4, [x3, #0x50]
|
|
97
|
+
ldr q11, [x3, #0x40]
|
|
98
|
+
ldr q3, [x3, #0x70]
|
|
99
|
+
trn1 v23.4s, v13.4s, v8.4s
|
|
100
|
+
ldr q0, [x3, #0x60]
|
|
101
|
+
trn2 v19.4s, v6.4s, v16.4s
|
|
102
|
+
trn2 v21.4s, v13.4s, v8.4s
|
|
103
|
+
trn1 v6.4s, v6.4s, v16.4s
|
|
104
|
+
ldr q24, [x2, #0x20]
|
|
105
|
+
trn1 v10.2d, v19.2d, v21.2d
|
|
106
|
+
ldr q16, [x2], #0x60
|
|
107
|
+
trn1 v5.2d, v6.2d, v23.2d
|
|
108
|
+
trn1 v28.4s, v0.4s, v3.4s
|
|
109
|
+
trn2 v18.2d, v6.2d, v23.2d
|
|
110
|
+
mul v31.8h, v10.8h, v29.8h
|
|
111
|
+
trn2 v13.4s, v0.4s, v3.4s
|
|
112
|
+
ldur q14, [x2, #-0x50]
|
|
113
|
+
sqrdmulh v26.8h, v18.8h, v30.8h
|
|
114
|
+
ldur q20, [x2, #-0x20]
|
|
115
|
+
mul v17.8h, v18.8h, v29.8h
|
|
116
|
+
trn2 v18.2d, v19.2d, v21.2d
|
|
117
|
+
mul v9.8h, v18.8h, v29.8h
|
|
118
|
+
trn1 v12.4s, v11.4s, v4.4s
|
|
119
|
+
sqrdmulh v22.8h, v18.8h, v30.8h
|
|
120
|
+
sqrdmulh v3.8h, v10.8h, v30.8h
|
|
121
|
+
sqrdmulh v25.8h, v5.8h, v30.8h
|
|
122
|
+
mls v9.8h, v22.8h, v7.h[0]
|
|
123
|
+
mls v17.8h, v26.8h, v7.h[0]
|
|
124
|
+
trn2 v26.4s, v11.4s, v4.4s
|
|
125
|
+
mul v8.8h, v5.8h, v29.8h
|
|
126
|
+
trn1 v10.2d, v26.2d, v13.2d
|
|
127
|
+
ldur q11, [x2, #-0x10]
|
|
128
|
+
mls v31.8h, v3.8h, v7.h[0]
|
|
129
|
+
trn1 v6.2d, v12.2d, v28.2d
|
|
130
|
+
trn2 v3.2d, v26.2d, v13.2d
|
|
131
|
+
ldur q4, [x2, #-0x30]
|
|
132
|
+
mls v8.8h, v25.8h, v7.h[0]
|
|
133
|
+
sub v19.8h, v17.8h, v9.8h
|
|
134
|
+
trn2 v13.2d, v12.2d, v28.2d
|
|
135
|
+
sqrdmulh v1.8h, v3.8h, v30.8h
|
|
136
|
+
add v9.8h, v17.8h, v9.8h
|
|
137
|
+
mul v18.8h, v19.8h, v20.8h
|
|
138
|
+
add v28.8h, v8.8h, v31.8h
|
|
139
|
+
sqrdmulh v20.8h, v19.8h, v11.8h
|
|
140
|
+
sub v12.8h, v28.8h, v9.8h
|
|
141
|
+
sub v23.8h, v8.8h, v31.8h
|
|
142
|
+
sqrdmulh v11.8h, v13.8h, v30.8h
|
|
143
|
+
sqrdmulh v5.8h, v23.8h, v4.8h
|
|
144
|
+
mul v0.8h, v23.8h, v24.8h
|
|
145
|
+
mul v2.8h, v13.8h, v29.8h
|
|
146
|
+
mls v0.8h, v5.8h, v7.h[0]
|
|
147
|
+
add v24.8h, v28.8h, v9.8h
|
|
148
|
+
mls v18.8h, v20.8h, v7.h[0]
|
|
149
|
+
sqrdmulh v15.8h, v6.8h, v30.8h
|
|
150
|
+
sqrdmulh v25.8h, v12.8h, v14.8h
|
|
151
|
+
mul v21.8h, v12.8h, v16.8h
|
|
152
|
+
sub v23.8h, v0.8h, v18.8h
|
|
153
|
+
sqrdmulh v8.8h, v23.8h, v14.8h
|
|
154
|
+
mul v23.8h, v23.8h, v16.8h
|
|
155
|
+
mls v21.8h, v25.8h, v7.h[0]
|
|
156
|
+
mls v23.8h, v8.8h, v7.h[0]
|
|
157
|
+
mul v14.8h, v3.8h, v29.8h
|
|
158
|
+
add v3.8h, v0.8h, v18.8h
|
|
159
|
+
trn2 v4.4s, v24.4s, v3.4s
|
|
160
|
+
mls v14.8h, v1.8h, v7.h[0]
|
|
161
|
+
trn1 v9.4s, v24.4s, v3.4s
|
|
162
|
+
trn2 v12.4s, v21.4s, v23.4s
|
|
163
|
+
mls v2.8h, v11.8h, v7.h[0]
|
|
164
|
+
trn1 v28.4s, v21.4s, v23.4s
|
|
165
|
+
ldr q11, [x1], #0x10
|
|
166
|
+
mul v31.8h, v10.8h, v29.8h
|
|
167
|
+
trn1 v25.2d, v4.2d, v12.2d
|
|
168
|
+
trn1 v20.2d, v9.2d, v28.2d
|
|
169
|
+
ldr q23, [x2, #0x50]
|
|
170
|
+
trn2 v13.2d, v4.2d, v12.2d
|
|
171
|
+
sqrdmulh v21.8h, v10.8h, v30.8h
|
|
172
|
+
trn2 v4.2d, v9.2d, v28.2d
|
|
173
|
+
ldr q9, [x2, #0x40]
|
|
174
|
+
mul v27.8h, v6.8h, v29.8h
|
|
175
|
+
add v26.8h, v20.8h, v25.8h
|
|
176
|
+
sub v3.8h, v2.8h, v14.8h
|
|
177
|
+
sqdmulh v12.8h, v26.8h, v7.h[1]
|
|
178
|
+
add v5.8h, v4.8h, v13.8h
|
|
179
|
+
sub v8.8h, v4.8h, v13.8h
|
|
180
|
+
add v10.8h, v2.8h, v14.8h
|
|
181
|
+
sqdmulh v6.8h, v5.8h, v7.h[1]
|
|
182
|
+
ldr q2, [x2, #0x10]
|
|
183
|
+
mls v27.8h, v15.8h, v7.h[0]
|
|
184
|
+
ldr q15, [x2, #0x20]
|
|
185
|
+
srshr v12.8h, v12.8h, #0xb
|
|
186
|
+
mls v31.8h, v21.8h, v7.h[0]
|
|
187
|
+
srshr v6.8h, v6.8h, #0xb
|
|
188
|
+
sqrdmulh v23.8h, v3.8h, v23.8h
|
|
189
|
+
mls v26.8h, v12.8h, v7.h[0]
|
|
190
|
+
add v21.8h, v27.8h, v31.8h
|
|
191
|
+
mls v5.8h, v6.8h, v7.h[0]
|
|
192
|
+
sub v6.8h, v27.8h, v31.8h
|
|
193
|
+
sub v14.8h, v21.8h, v10.8h
|
|
194
|
+
ldr q27, [x2], #0x60
|
|
195
|
+
mul v3.8h, v3.8h, v9.8h
|
|
196
|
+
mls v3.8h, v23.8h, v7.h[0]
|
|
197
|
+
ldur q13, [x2, #-0x30]
|
|
198
|
+
sub v12.8h, v26.8h, v5.8h
|
|
199
|
+
add v5.8h, v26.8h, v5.8h
|
|
200
|
+
sqrdmulh v31.8h, v8.8h, v11.h[5]
|
|
201
|
+
sqrdmulh v19.8h, v12.8h, v11.h[1]
|
|
202
|
+
mul v24.8h, v12.8h, v11.h[0]
|
|
203
|
+
sqrdmulh v13.8h, v6.8h, v13.8h
|
|
204
|
+
mls v24.8h, v19.8h, v7.h[0]
|
|
205
|
+
sub x4, x4, #0x2
|
|
206
|
+
|
|
207
|
+
Lintt_layer4567_start:
|
|
208
|
+
add v16.8h, v21.8h, v10.8h
|
|
209
|
+
mul v18.8h, v6.8h, v15.8h
|
|
210
|
+
sub v19.8h, v20.8h, v25.8h
|
|
211
|
+
ldr q21, [x3, #0xa0]
|
|
212
|
+
str q5, [x3], #0x40
|
|
213
|
+
mls v18.8h, v13.8h, v7.h[0]
|
|
214
|
+
sqrdmulh v15.8h, v14.8h, v2.8h
|
|
215
|
+
ldr q10, [x3, #0x50]
|
|
216
|
+
ldr q12, [x3, #0x40]
|
|
217
|
+
stur q24, [x3, #-0x20]
|
|
218
|
+
mul v5.8h, v8.8h, v11.h[4]
|
|
219
|
+
sub v0.8h, v18.8h, v3.8h
|
|
220
|
+
ldr q24, [x3, #0x70]
|
|
221
|
+
mls v5.8h, v31.8h, v7.h[0]
|
|
222
|
+
ldr q26, [x2, #0x50]
|
|
223
|
+
trn2 v1.4s, v12.4s, v10.4s
|
|
224
|
+
add v6.8h, v18.8h, v3.8h
|
|
225
|
+
sqrdmulh v20.8h, v0.8h, v2.8h
|
|
226
|
+
trn1 v13.4s, v12.4s, v10.4s
|
|
227
|
+
trn1 v18.4s, v16.4s, v6.4s
|
|
228
|
+
mul v22.8h, v0.8h, v27.8h
|
|
229
|
+
trn1 v17.4s, v21.4s, v24.4s
|
|
230
|
+
sqrdmulh v0.8h, v19.8h, v11.h[3]
|
|
231
|
+
trn1 v25.2d, v13.2d, v17.2d
|
|
232
|
+
mls v22.8h, v20.8h, v7.h[0]
|
|
233
|
+
trn2 v21.4s, v21.4s, v24.4s
|
|
234
|
+
mul v24.8h, v25.8h, v29.8h
|
|
235
|
+
trn2 v28.2d, v13.2d, v17.2d
|
|
236
|
+
sqrdmulh v4.8h, v25.8h, v30.8h
|
|
237
|
+
trn2 v3.2d, v1.2d, v21.2d
|
|
238
|
+
mul v17.8h, v28.8h, v29.8h
|
|
239
|
+
sqrdmulh v31.8h, v28.8h, v30.8h
|
|
240
|
+
ldr q2, [x2, #0x10]
|
|
241
|
+
mls v24.8h, v4.8h, v7.h[0]
|
|
242
|
+
mul v4.8h, v19.8h, v11.h[2]
|
|
243
|
+
ldr q19, [x2, #0x40]
|
|
244
|
+
mls v4.8h, v0.8h, v7.h[0]
|
|
245
|
+
mul v0.8h, v14.8h, v27.8h
|
|
246
|
+
mls v0.8h, v15.8h, v7.h[0]
|
|
247
|
+
sub v8.8h, v4.8h, v5.8h
|
|
248
|
+
mul v12.8h, v3.8h, v29.8h
|
|
249
|
+
mul v23.8h, v8.8h, v11.h[0]
|
|
250
|
+
trn2 v28.4s, v16.4s, v6.4s
|
|
251
|
+
sqrdmulh v10.8h, v8.8h, v11.h[1]
|
|
252
|
+
trn1 v9.4s, v0.4s, v22.4s
|
|
253
|
+
trn2 v22.4s, v0.4s, v22.4s
|
|
254
|
+
ldr q11, [x1], #0x10
|
|
255
|
+
mls v17.8h, v31.8h, v7.h[0]
|
|
256
|
+
trn1 v20.2d, v18.2d, v9.2d
|
|
257
|
+
trn2 v14.2d, v18.2d, v9.2d
|
|
258
|
+
ldr q15, [x2, #0x20]
|
|
259
|
+
trn1 v6.2d, v1.2d, v21.2d
|
|
260
|
+
sqrdmulh v9.8h, v3.8h, v30.8h
|
|
261
|
+
trn1 v25.2d, v28.2d, v22.2d
|
|
262
|
+
trn2 v16.2d, v28.2d, v22.2d
|
|
263
|
+
mls v23.8h, v10.8h, v7.h[0]
|
|
264
|
+
add v1.8h, v20.8h, v25.8h
|
|
265
|
+
sqrdmulh v21.8h, v6.8h, v30.8h
|
|
266
|
+
add v8.8h, v14.8h, v16.8h
|
|
267
|
+
ldr q27, [x2], #0x60
|
|
268
|
+
sqdmulh v28.8h, v8.8h, v7.h[1]
|
|
269
|
+
mls v12.8h, v9.8h, v7.h[0]
|
|
270
|
+
sqdmulh v31.8h, v1.8h, v7.h[1]
|
|
271
|
+
mul v0.8h, v6.8h, v29.8h
|
|
272
|
+
sub v10.8h, v17.8h, v12.8h
|
|
273
|
+
mls v0.8h, v21.8h, v7.h[0]
|
|
274
|
+
srshr v21.8h, v28.8h, #0xb
|
|
275
|
+
srshr v13.8h, v31.8h, #0xb
|
|
276
|
+
sqrdmulh v22.8h, v10.8h, v26.8h
|
|
277
|
+
mls v8.8h, v21.8h, v7.h[0]
|
|
278
|
+
mls v1.8h, v13.8h, v7.h[0]
|
|
279
|
+
add v21.8h, v24.8h, v0.8h
|
|
280
|
+
stur q23, [x3, #-0x10]
|
|
281
|
+
sub v6.8h, v24.8h, v0.8h
|
|
282
|
+
mul v3.8h, v10.8h, v19.8h
|
|
283
|
+
add v0.8h, v4.8h, v5.8h
|
|
284
|
+
sqdmulh v13.8h, v0.8h, v7.h[1]
|
|
285
|
+
ldur q10, [x2, #-0x30]
|
|
286
|
+
add v5.8h, v1.8h, v8.8h
|
|
287
|
+
mls v3.8h, v22.8h, v7.h[0]
|
|
288
|
+
sub v8.8h, v1.8h, v8.8h
|
|
289
|
+
mul v24.8h, v8.8h, v11.h[0]
|
|
290
|
+
sqrdmulh v8.8h, v8.8h, v11.h[1]
|
|
291
|
+
srshr v1.8h, v13.8h, #0xb
|
|
292
|
+
sqrdmulh v13.8h, v6.8h, v10.8h
|
|
293
|
+
mls v0.8h, v1.8h, v7.h[0]
|
|
294
|
+
add v10.8h, v17.8h, v12.8h
|
|
295
|
+
mls v24.8h, v8.8h, v7.h[0]
|
|
296
|
+
sub v8.8h, v14.8h, v16.8h
|
|
297
|
+
sqrdmulh v31.8h, v8.8h, v11.h[5]
|
|
298
|
+
sub v14.8h, v21.8h, v10.8h
|
|
299
|
+
stur q0, [x3, #-0x30]
|
|
300
|
+
subs x4, x4, #0x1
|
|
301
|
+
cbnz x4, Lintt_layer4567_start
|
|
302
|
+
mul v15.8h, v6.8h, v15.8h
|
|
303
|
+
sub v22.8h, v20.8h, v25.8h
|
|
304
|
+
add v4.8h, v21.8h, v10.8h
|
|
305
|
+
str q24, [x3, #0x20]
|
|
306
|
+
mls v15.8h, v13.8h, v7.h[0]
|
|
307
|
+
str q5, [x3], #0x40
|
|
308
|
+
ldr q9, [x1], #0x10
|
|
309
|
+
sqrdmulh v28.8h, v14.8h, v2.8h
|
|
310
|
+
mul v16.8h, v14.8h, v27.8h
|
|
311
|
+
sub v18.8h, v15.8h, v3.8h
|
|
312
|
+
add v15.8h, v15.8h, v3.8h
|
|
313
|
+
sqrdmulh v0.8h, v18.8h, v2.8h
|
|
314
|
+
trn2 v24.4s, v4.4s, v15.4s
|
|
315
|
+
trn1 v2.4s, v4.4s, v15.4s
|
|
316
|
+
mul v18.8h, v18.8h, v27.8h
|
|
317
|
+
mls v16.8h, v28.8h, v7.h[0]
|
|
318
|
+
mls v18.8h, v0.8h, v7.h[0]
|
|
319
|
+
mul v23.8h, v8.8h, v11.h[4]
|
|
320
|
+
sqrdmulh v12.8h, v22.8h, v11.h[3]
|
|
321
|
+
trn1 v17.4s, v16.4s, v18.4s
|
|
322
|
+
trn2 v4.4s, v16.4s, v18.4s
|
|
323
|
+
mls v23.8h, v31.8h, v7.h[0]
|
|
324
|
+
trn2 v3.2d, v2.2d, v17.2d
|
|
325
|
+
trn2 v6.2d, v24.2d, v4.2d
|
|
326
|
+
mul v26.8h, v22.8h, v11.h[2]
|
|
327
|
+
trn1 v28.2d, v2.2d, v17.2d
|
|
328
|
+
mls v26.8h, v12.8h, v7.h[0]
|
|
329
|
+
add v25.8h, v3.8h, v6.8h
|
|
330
|
+
sub v18.8h, v3.8h, v6.8h
|
|
331
|
+
trn1 v24.2d, v24.2d, v4.2d
|
|
332
|
+
sqdmulh v1.8h, v25.8h, v7.h[1]
|
|
333
|
+
sub v27.8h, v28.8h, v24.8h
|
|
334
|
+
sqrdmulh v2.8h, v18.8h, v9.h[5]
|
|
335
|
+
add v28.8h, v28.8h, v24.8h
|
|
336
|
+
mul v24.8h, v27.8h, v9.h[2]
|
|
337
|
+
sqdmulh v12.8h, v28.8h, v7.h[1]
|
|
338
|
+
mul v20.8h, v18.8h, v9.h[4]
|
|
339
|
+
mls v20.8h, v2.8h, v7.h[0]
|
|
340
|
+
srshr v1.8h, v1.8h, #0xb
|
|
341
|
+
sqrdmulh v19.8h, v27.8h, v9.h[3]
|
|
342
|
+
srshr v15.8h, v12.8h, #0xb
|
|
343
|
+
mls v25.8h, v1.8h, v7.h[0]
|
|
344
|
+
add v8.8h, v26.8h, v23.8h
|
|
345
|
+
sub v4.8h, v26.8h, v23.8h
|
|
346
|
+
mls v28.8h, v15.8h, v7.h[0]
|
|
347
|
+
mls v24.8h, v19.8h, v7.h[0]
|
|
348
|
+
mul v2.8h, v4.8h, v11.h[0]
|
|
349
|
+
sub v19.8h, v28.8h, v25.8h
|
|
350
|
+
sqrdmulh v15.8h, v4.8h, v11.h[1]
|
|
351
|
+
add v25.8h, v28.8h, v25.8h
|
|
352
|
+
sub v10.8h, v24.8h, v20.8h
|
|
353
|
+
str q25, [x3], #0x40
|
|
354
|
+
sqrdmulh v22.8h, v19.8h, v9.h[1]
|
|
355
|
+
add v28.8h, v24.8h, v20.8h
|
|
356
|
+
sqrdmulh v25.8h, v10.8h, v9.h[1]
|
|
357
|
+
mul v27.8h, v19.8h, v9.h[0]
|
|
358
|
+
mul v26.8h, v10.8h, v9.h[0]
|
|
359
|
+
sqdmulh v20.8h, v28.8h, v7.h[1]
|
|
360
|
+
sqdmulh v16.8h, v8.8h, v7.h[1]
|
|
361
|
+
mls v26.8h, v25.8h, v7.h[0]
|
|
362
|
+
mls v2.8h, v15.8h, v7.h[0]
|
|
363
|
+
srshr v15.8h, v20.8h, #0xb
|
|
364
|
+
srshr v1.8h, v16.8h, #0xb
|
|
365
|
+
mls v27.8h, v22.8h, v7.h[0]
|
|
366
|
+
mls v28.8h, v15.8h, v7.h[0]
|
|
367
|
+
mls v8.8h, v1.8h, v7.h[0]
|
|
368
|
+
stur q27, [x3, #-0x20]
|
|
369
|
+
stur q2, [x3, #-0x50]
|
|
370
|
+
stur q28, [x3, #-0x30]
|
|
371
|
+
stur q26, [x3, #-0x10]
|
|
372
|
+
stur q8, [x3, #-0x70]
|
|
373
|
+
mov x4, #0x4 // =4
|
|
374
|
+
ldr q0, [x1], #0x20
|
|
375
|
+
ldur q1, [x1, #-0x10]
|
|
376
|
+
ldr q26, [x0]
|
|
377
|
+
ldr q13, [x0, #0x40]
|
|
378
|
+
ldr q28, [x0, #0xc0]
|
|
379
|
+
ldr q2, [x0, #0x140]
|
|
380
|
+
ldr q6, [x0, #0x80]
|
|
381
|
+
ldr q9, [x0, #0x100]
|
|
382
|
+
ldr q29, [x0, #0x1c0]
|
|
383
|
+
ldr q23, [x0, #0x180]
|
|
384
|
+
sub v17.8h, v26.8h, v13.8h
|
|
385
|
+
add v4.8h, v26.8h, v13.8h
|
|
386
|
+
ldr q25, [x0, #0xd0]
|
|
387
|
+
ldr q24, [x0, #0x50]
|
|
388
|
+
add v5.8h, v6.8h, v28.8h
|
|
389
|
+
mul v19.8h, v17.8h, v0.h[6]
|
|
390
|
+
sub v10.8h, v6.8h, v28.8h
|
|
391
|
+
ldr q30, [x0, #0x150]
|
|
392
|
+
sqrdmulh v12.8h, v17.8h, v0.h[7]
|
|
393
|
+
add v17.8h, v9.8h, v2.8h
|
|
394
|
+
sub v28.8h, v9.8h, v2.8h
|
|
395
|
+
ldr q2, [x0, #0x90]
|
|
396
|
+
sub v26.8h, v23.8h, v29.8h
|
|
397
|
+
sqrdmulh v31.8h, v10.8h, v1.h[1]
|
|
398
|
+
add v22.8h, v23.8h, v29.8h
|
|
399
|
+
ldr q3, [x0, #0x110]
|
|
400
|
+
sqrdmulh v9.8h, v28.8h, v1.h[3]
|
|
401
|
+
sub v20.8h, v4.8h, v5.8h
|
|
402
|
+
sub v27.8h, v17.8h, v22.8h
|
|
403
|
+
ldr q29, [x0, #0x10]
|
|
404
|
+
add v16.8h, v4.8h, v5.8h
|
|
405
|
+
sqrdmulh v4.8h, v26.8h, v1.h[5]
|
|
406
|
+
add v6.8h, v17.8h, v22.8h
|
|
407
|
+
ldr q22, [x0, #0x1d0]
|
|
408
|
+
mul v8.8h, v28.8h, v1.h[2]
|
|
409
|
+
sub v21.8h, v2.8h, v25.8h
|
|
410
|
+
sub v5.8h, v16.8h, v6.8h
|
|
411
|
+
mul v17.8h, v26.8h, v1.h[4]
|
|
412
|
+
mul v26.8h, v10.8h, v1.h[0]
|
|
413
|
+
mls v26.8h, v31.8h, v7.h[0]
|
|
414
|
+
mls v17.8h, v4.8h, v7.h[0]
|
|
415
|
+
mls v19.8h, v12.8h, v7.h[0]
|
|
416
|
+
mls v8.8h, v9.8h, v7.h[0]
|
|
417
|
+
sqrdmulh v10.8h, v27.8h, v0.h[5]
|
|
418
|
+
sub v12.8h, v19.8h, v26.8h
|
|
419
|
+
add v9.8h, v19.8h, v26.8h
|
|
420
|
+
sqrdmulh v26.8h, v20.8h, v0.h[3]
|
|
421
|
+
sub v11.8h, v8.8h, v17.8h
|
|
422
|
+
add v14.8h, v8.8h, v17.8h
|
|
423
|
+
sqrdmulh v13.8h, v12.8h, v0.h[3]
|
|
424
|
+
add v23.8h, v9.8h, v14.8h
|
|
425
|
+
sqrdmulh v28.8h, v11.8h, v0.h[5]
|
|
426
|
+
sub v19.8h, v9.8h, v14.8h
|
|
427
|
+
mul v17.8h, v27.8h, v0.h[4]
|
|
428
|
+
str q23, [x0, #0x40]
|
|
429
|
+
mul v14.8h, v20.8h, v0.h[2]
|
|
430
|
+
mul v8.8h, v11.8h, v0.h[4]
|
|
431
|
+
mul v4.8h, v12.8h, v0.h[2]
|
|
432
|
+
mls v14.8h, v26.8h, v7.h[0]
|
|
433
|
+
mls v17.8h, v10.8h, v7.h[0]
|
|
434
|
+
mls v8.8h, v28.8h, v7.h[0]
|
|
435
|
+
mls v4.8h, v13.8h, v7.h[0]
|
|
436
|
+
sub v10.8h, v14.8h, v17.8h
|
|
437
|
+
add v20.8h, v14.8h, v17.8h
|
|
438
|
+
sqrdmulh v28.8h, v5.8h, v0.h[1]
|
|
439
|
+
mul v18.8h, v5.8h, v0.h[0]
|
|
440
|
+
str q20, [x0, #0x80]
|
|
441
|
+
sub v13.8h, v4.8h, v8.8h
|
|
442
|
+
mul v23.8h, v10.8h, v0.h[0]
|
|
443
|
+
mul v17.8h, v19.8h, v0.h[0]
|
|
444
|
+
sqrdmulh v9.8h, v13.8h, v0.h[1]
|
|
445
|
+
mls v18.8h, v28.8h, v7.h[0]
|
|
446
|
+
sqrdmulh v10.8h, v10.8h, v0.h[1]
|
|
447
|
+
sub x4, x4, #0x2
|
|
448
|
+
|
|
449
|
+
Lintt_layer123_start:
|
|
450
|
+
sub v12.8h, v3.8h, v30.8h
|
|
451
|
+
mul v11.8h, v21.8h, v1.h[0]
|
|
452
|
+
add v28.8h, v4.8h, v8.8h
|
|
453
|
+
ldr q20, [x0, #0x190]
|
|
454
|
+
add v27.8h, v16.8h, v6.8h
|
|
455
|
+
sqrdmulh v8.8h, v12.8h, v1.h[3]
|
|
456
|
+
add v16.8h, v29.8h, v24.8h
|
|
457
|
+
str q28, [x0, #0xc0]
|
|
458
|
+
mls v23.8h, v10.8h, v7.h[0]
|
|
459
|
+
str q27, [x0], #0x10
|
|
460
|
+
add v15.8h, v20.8h, v22.8h
|
|
461
|
+
str q18, [x0, #0xf0]
|
|
462
|
+
mul v14.8h, v13.8h, v0.h[0]
|
|
463
|
+
add v2.8h, v2.8h, v25.8h
|
|
464
|
+
sub v26.8h, v20.8h, v22.8h
|
|
465
|
+
mul v4.8h, v12.8h, v1.h[2]
|
|
466
|
+
sub v5.8h, v16.8h, v2.8h
|
|
467
|
+
str q23, [x0, #0x170]
|
|
468
|
+
add v20.8h, v3.8h, v30.8h
|
|
469
|
+
sqrdmulh v27.8h, v26.8h, v1.h[5]
|
|
470
|
+
add v16.8h, v16.8h, v2.8h
|
|
471
|
+
mul v18.8h, v26.8h, v1.h[4]
|
|
472
|
+
sub v31.8h, v20.8h, v15.8h
|
|
473
|
+
mls v4.8h, v8.8h, v7.h[0]
|
|
474
|
+
sub v28.8h, v29.8h, v24.8h
|
|
475
|
+
mls v18.8h, v27.8h, v7.h[0]
|
|
476
|
+
ldr q22, [x0, #0x1d0]
|
|
477
|
+
mul v26.8h, v28.8h, v0.h[6]
|
|
478
|
+
mul v2.8h, v5.8h, v0.h[2]
|
|
479
|
+
sub v12.8h, v4.8h, v18.8h
|
|
480
|
+
sqrdmulh v24.8h, v28.8h, v0.h[7]
|
|
481
|
+
mls v14.8h, v9.8h, v7.h[0]
|
|
482
|
+
sqrdmulh v10.8h, v12.8h, v0.h[5]
|
|
483
|
+
mls v26.8h, v24.8h, v7.h[0]
|
|
484
|
+
ldr q24, [x0, #0x50]
|
|
485
|
+
mul v8.8h, v12.8h, v0.h[4]
|
|
486
|
+
str q14, [x0, #0x1b0]
|
|
487
|
+
add v28.8h, v4.8h, v18.8h
|
|
488
|
+
sqrdmulh v5.8h, v5.8h, v0.h[3]
|
|
489
|
+
add v6.8h, v20.8h, v15.8h
|
|
490
|
+
sqrdmulh v3.8h, v19.8h, v0.h[1]
|
|
491
|
+
sub v13.8h, v16.8h, v6.8h
|
|
492
|
+
sqrdmulh v12.8h, v21.8h, v1.h[1]
|
|
493
|
+
sqrdmulh v21.8h, v13.8h, v0.h[1]
|
|
494
|
+
sqrdmulh v27.8h, v31.8h, v0.h[5]
|
|
495
|
+
ldr q25, [x0, #0xd0]
|
|
496
|
+
mls v11.8h, v12.8h, v7.h[0]
|
|
497
|
+
mul v23.8h, v31.8h, v0.h[4]
|
|
498
|
+
mul v18.8h, v13.8h, v0.h[0]
|
|
499
|
+
add v30.8h, v26.8h, v11.8h
|
|
500
|
+
sub v13.8h, v26.8h, v11.8h
|
|
501
|
+
mls v23.8h, v27.8h, v7.h[0]
|
|
502
|
+
add v12.8h, v30.8h, v28.8h
|
|
503
|
+
sub v19.8h, v30.8h, v28.8h
|
|
504
|
+
mls v2.8h, v5.8h, v7.h[0]
|
|
505
|
+
str q12, [x0, #0x40]
|
|
506
|
+
sqrdmulh v26.8h, v13.8h, v0.h[3]
|
|
507
|
+
mls v8.8h, v10.8h, v7.h[0]
|
|
508
|
+
ldr q30, [x0, #0x150]
|
|
509
|
+
sub v20.8h, v2.8h, v23.8h
|
|
510
|
+
mul v4.8h, v13.8h, v0.h[2]
|
|
511
|
+
add v13.8h, v2.8h, v23.8h
|
|
512
|
+
mls v4.8h, v26.8h, v7.h[0]
|
|
513
|
+
ldr q2, [x0, #0x90]
|
|
514
|
+
mul v23.8h, v20.8h, v0.h[0]
|
|
515
|
+
ldr q29, [x0, #0x10]
|
|
516
|
+
sqrdmulh v10.8h, v20.8h, v0.h[1]
|
|
517
|
+
str q13, [x0, #0x80]
|
|
518
|
+
sub v13.8h, v4.8h, v8.8h
|
|
519
|
+
mls v17.8h, v3.8h, v7.h[0]
|
|
520
|
+
ldr q3, [x0, #0x110]
|
|
521
|
+
mls v18.8h, v21.8h, v7.h[0]
|
|
522
|
+
sub v21.8h, v2.8h, v25.8h
|
|
523
|
+
sqrdmulh v9.8h, v13.8h, v0.h[1]
|
|
524
|
+
str q17, [x0, #0x130]
|
|
525
|
+
mul v17.8h, v19.8h, v0.h[0]
|
|
526
|
+
subs x4, x4, #0x1
|
|
527
|
+
cbnz x4, Lintt_layer123_start
|
|
528
|
+
mls v23.8h, v10.8h, v7.h[0]
|
|
529
|
+
ldr q11, [x0, #0x190]
|
|
530
|
+
str q18, [x0, #0x100]
|
|
531
|
+
add v27.8h, v3.8h, v30.8h
|
|
532
|
+
mul v13.8h, v13.8h, v0.h[0]
|
|
533
|
+
sub v5.8h, v29.8h, v24.8h
|
|
534
|
+
add v14.8h, v16.8h, v6.8h
|
|
535
|
+
mls v13.8h, v9.8h, v7.h[0]
|
|
536
|
+
add v10.8h, v11.8h, v22.8h
|
|
537
|
+
str q23, [x0, #0x180]
|
|
538
|
+
sub v20.8h, v11.8h, v22.8h
|
|
539
|
+
sub v23.8h, v27.8h, v10.8h
|
|
540
|
+
sqrdmulh v16.8h, v21.8h, v1.h[1]
|
|
541
|
+
sqrdmulh v31.8h, v23.8h, v0.h[5]
|
|
542
|
+
str q13, [x0, #0x1c0]
|
|
543
|
+
add v13.8h, v4.8h, v8.8h
|
|
544
|
+
mul v18.8h, v21.8h, v1.h[0]
|
|
545
|
+
str q13, [x0, #0xc0]
|
|
546
|
+
sqrdmulh v13.8h, v19.8h, v0.h[1]
|
|
547
|
+
sqrdmulh v28.8h, v20.8h, v1.h[5]
|
|
548
|
+
str q14, [x0], #0x10
|
|
549
|
+
mul v4.8h, v20.8h, v1.h[4]
|
|
550
|
+
mls v17.8h, v13.8h, v7.h[0]
|
|
551
|
+
sub v13.8h, v3.8h, v30.8h
|
|
552
|
+
sqrdmulh v8.8h, v13.8h, v1.h[3]
|
|
553
|
+
mul v12.8h, v13.8h, v1.h[2]
|
|
554
|
+
mls v4.8h, v28.8h, v7.h[0]
|
|
555
|
+
mls v12.8h, v8.8h, v7.h[0]
|
|
556
|
+
mls v18.8h, v16.8h, v7.h[0]
|
|
557
|
+
str q17, [x0, #0x130]
|
|
558
|
+
sqrdmulh v15.8h, v5.8h, v0.h[7]
|
|
559
|
+
add v11.8h, v27.8h, v10.8h
|
|
560
|
+
mul v16.8h, v5.8h, v0.h[6]
|
|
561
|
+
sub v8.8h, v12.8h, v4.8h
|
|
562
|
+
sqrdmulh v28.8h, v8.8h, v0.h[5]
|
|
563
|
+
add v13.8h, v2.8h, v25.8h
|
|
564
|
+
mls v16.8h, v15.8h, v7.h[0]
|
|
565
|
+
add v26.8h, v12.8h, v4.8h
|
|
566
|
+
mul v8.8h, v8.8h, v0.h[4]
|
|
567
|
+
add v4.8h, v29.8h, v24.8h
|
|
568
|
+
mls v8.8h, v28.8h, v7.h[0]
|
|
569
|
+
sub v20.8h, v4.8h, v13.8h
|
|
570
|
+
add v14.8h, v4.8h, v13.8h
|
|
571
|
+
add v12.8h, v16.8h, v18.8h
|
|
572
|
+
sqrdmulh v22.8h, v20.8h, v0.h[3]
|
|
573
|
+
add v27.8h, v14.8h, v11.8h
|
|
574
|
+
sub v13.8h, v16.8h, v18.8h
|
|
575
|
+
mul v4.8h, v20.8h, v0.h[2]
|
|
576
|
+
str q27, [x0], #0x10
|
|
577
|
+
sub v24.8h, v12.8h, v26.8h
|
|
578
|
+
sqrdmulh v3.8h, v13.8h, v0.h[3]
|
|
579
|
+
mul v13.8h, v13.8h, v0.h[2]
|
|
580
|
+
sqrdmulh v27.8h, v24.8h, v0.h[1]
|
|
581
|
+
mls v13.8h, v3.8h, v7.h[0]
|
|
582
|
+
mul v9.8h, v24.8h, v0.h[0]
|
|
583
|
+
mls v9.8h, v27.8h, v7.h[0]
|
|
584
|
+
add v30.8h, v13.8h, v8.8h
|
|
585
|
+
sub v13.8h, v13.8h, v8.8h
|
|
586
|
+
mls v4.8h, v22.8h, v7.h[0]
|
|
587
|
+
str q30, [x0, #0xb0]
|
|
588
|
+
sqrdmulh v16.8h, v13.8h, v0.h[1]
|
|
589
|
+
str q9, [x0, #0x130]
|
|
590
|
+
mul v9.8h, v13.8h, v0.h[0]
|
|
591
|
+
add v13.8h, v12.8h, v26.8h
|
|
592
|
+
str q13, [x0, #0x30]
|
|
593
|
+
mul v13.8h, v23.8h, v0.h[4]
|
|
594
|
+
sub v23.8h, v14.8h, v11.8h
|
|
595
|
+
mls v13.8h, v31.8h, v7.h[0]
|
|
596
|
+
mls v9.8h, v16.8h, v7.h[0]
|
|
597
|
+
mul v30.8h, v23.8h, v0.h[0]
|
|
598
|
+
sub v24.8h, v4.8h, v13.8h
|
|
599
|
+
add v13.8h, v4.8h, v13.8h
|
|
600
|
+
sqrdmulh v23.8h, v23.8h, v0.h[1]
|
|
601
|
+
str q9, [x0, #0x1b0]
|
|
602
|
+
str q13, [x0, #0x70]
|
|
603
|
+
sqrdmulh v13.8h, v24.8h, v0.h[1]
|
|
604
|
+
mul v21.8h, v24.8h, v0.h[0]
|
|
605
|
+
mls v30.8h, v23.8h, v7.h[0]
|
|
606
|
+
mls v21.8h, v13.8h, v7.h[0]
|
|
607
|
+
str q30, [x0, #0xf0]
|
|
608
|
+
str q21, [x0, #0x170]
|
|
609
|
+
ldp d8, d9, [sp]
|
|
610
|
+
.cfi_restore d8
|
|
611
|
+
.cfi_restore d9
|
|
612
|
+
ldp d10, d11, [sp, #0x10]
|
|
613
|
+
.cfi_restore d10
|
|
614
|
+
.cfi_restore d11
|
|
615
|
+
ldp d12, d13, [sp, #0x20]
|
|
616
|
+
.cfi_restore d12
|
|
617
|
+
.cfi_restore d13
|
|
618
|
+
ldp d14, d15, [sp, #0x30]
|
|
619
|
+
.cfi_restore d14
|
|
620
|
+
.cfi_restore d15
|
|
621
|
+
add sp, sp, #0x40
|
|
622
|
+
.cfi_adjust_cfa_offset -0x40
|
|
623
|
+
ret
|
|
624
|
+
.cfi_endproc
|
|
625
|
+
|
|
626
|
+
MLK_ASM_FN_SIZE(intt_asm)
|
|
627
|
+
|
|
628
|
+
#endif /* MLK_ARITH_BACKEND_AARCH64 && !MLK_CONFIG_MULTILEVEL_NO_SHARED */
|