pq_crypto 0.3.2 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +56 -0
- data/CHANGELOG.md +62 -0
- data/GET_STARTED.md +366 -40
- data/README.md +76 -233
- data/SECURITY.md +107 -82
- data/ext/pqcrypto/extconf.rb +169 -87
- data/ext/pqcrypto/mldsa_api.h +1 -48
- data/ext/pqcrypto/mlkem_api.h +1 -18
- data/ext/pqcrypto/pq_externalmu.c +89 -204
- data/ext/pqcrypto/pqcrypto_native_api.h +129 -0
- data/ext/pqcrypto/pqcrypto_ruby_secure.c +484 -84
- data/ext/pqcrypto/pqcrypto_secure.c +203 -78
- data/ext/pqcrypto/pqcrypto_secure.h +53 -14
- data/ext/pqcrypto/pqcrypto_version.h +7 -0
- data/ext/pqcrypto/randombytes.h +9 -0
- data/ext/pqcrypto/vendor/.vendored +10 -5
- data/ext/pqcrypto/vendor/mldsa-native/BUILDING.md +105 -0
- data/ext/pqcrypto/vendor/mldsa-native/LICENSE +286 -0
- data/ext/pqcrypto/vendor/mldsa-native/META.yml +24 -0
- data/ext/pqcrypto/vendor/mldsa-native/README.md +221 -0
- data/ext/pqcrypto/vendor/mldsa-native/SECURITY.md +8 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.c +721 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.h +975 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_asm.S +724 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_config.h +723 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/cbmc.h +166 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/common.h +321 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/ct.c +21 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/ct.h +385 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/debug.c +73 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/debug.h +130 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.c +277 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.h +244 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.c +182 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.h +117 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.c +438 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.h +105 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/auto.h +71 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/fips202_native_aarch64.h +62 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_asm.S +376 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_asm.S +204 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_asm.S +259 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_asm.S +1077 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S +987 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccakf1600_round_constants.c +41 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_scalar.h +26 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_v84a.h +35 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x2_v84a.h +37 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_scalar.h +27 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_v84a_scalar.h +36 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/api.h +69 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/README.md +10 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/mve.h +32 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/fips202_native_armv81m.h +20 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S +638 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.c +136 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccakf1600_round_constants.c +52 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/auto.h +29 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.c +488 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.h +16 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/xkcp.h +31 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/meta.h +247 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/aarch64_zetas.c +231 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/arith_native_aarch64.h +150 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/intt.S +753 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l4.S +129 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l5.S +145 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l7.S +177 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/ntt.S +653 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/pointwise_montgomery.S +79 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_caddq_asm.S +53 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_chknorm_asm.S +55 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_32_asm.S +85 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_88_asm.S +85 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_32_asm.S +102 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_88_asm.S +110 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_17_asm.S +72 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_19_asm.S +69 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_table.c +40 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_asm.S +189 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta2_asm.S +135 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta4_asm.S +128 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta_table.c +543 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_table.c +62 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/api.h +649 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/meta.h +23 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/meta.h +315 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/arith_native_x86_64.h +124 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.c +157 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.h +27 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/intt.S +2311 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/ntt.S +2383 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/nttunpack.S +239 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise.S +131 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l4.S +139 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l5.S +155 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l7.S +187 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_caddq_avx2.c +61 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_chknorm_avx2.c +52 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_32_avx2.c +155 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_88_avx2.c +155 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_32_avx2.c +102 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_88_avx2.c +104 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_17_avx2.c +91 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_19_avx2.c +93 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_avx2.c +126 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta2_avx2.c +155 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta4_avx2.c +139 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_table.c +160 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.c +293 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.h +224 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/params.h +77 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.c +991 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.h +393 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.c +946 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.h +360 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.c +877 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.h +725 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/randombytes.h +26 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/reduce.h +139 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/rounding.h +249 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.c +1511 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.h +806 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/symmetric.h +68 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sys.h +268 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/zetas.inc +55 -0
- data/ext/pqcrypto/vendor/mlkem-native/BUILDING.md +104 -0
- data/ext/pqcrypto/vendor/mlkem-native/LICENSE +294 -0
- data/ext/pqcrypto/vendor/mlkem-native/META.yml +30 -0
- data/ext/pqcrypto/vendor/mlkem-native/README.md +223 -0
- data/ext/pqcrypto/vendor/mlkem-native/RELEASE.md +86 -0
- data/ext/pqcrypto/vendor/mlkem-native/SECURITY.md +8 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/README.md +23 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native.c +660 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native.h +538 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native_asm.S +681 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native_config.h +709 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/cbmc.h +174 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/common.h +274 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/compress.c +717 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/compress.h +688 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/debug.c +64 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/debug.h +128 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202.c +251 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202.h +158 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202x4.c +208 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202x4.h +80 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/keccakf1600.c +463 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/keccakf1600.h +98 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/auto.h +70 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/fips202_native_aarch64.h +69 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_asm.S +375 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_asm.S +203 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_asm.S +258 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_asm.S +1076 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S +986 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccakf1600_round_constants.c +46 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x1_scalar.h +25 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x1_v84a.h +34 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x2_v84a.h +35 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x4_v8a_scalar.h +26 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x4_v8a_v84a_scalar.h +35 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/api.h +117 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/README.md +10 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/mve.h +79 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/fips202_native_armv81m.h +35 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S +667 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.c +40 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/keccakf1600_round_constants.c +51 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/state_extract_bytes_x4_mve.S +290 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/state_xor_bytes_x4_mve.S +314 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/auto.h +28 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/keccak_f1600_x4_avx2.h +33 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/src/fips202_native_x86_64.h +41 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/src/keccak_f1600_x4_avx2.S +451 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/src/keccakf1600_constants.c +51 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/indcpa.c +622 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/indcpa.h +156 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/kem.c +446 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/kem.h +326 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/README.md +16 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/meta.h +122 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/aarch64_zetas.c +174 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/arith_native_aarch64.h +177 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/intt.S +628 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/ntt.S +562 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_mulcache_compute_asm.S +127 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_reduce_asm.S +150 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_tobytes_asm.S +117 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_tomont_asm.S +98 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k2.S +261 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k3.S +314 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k4.S +368 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/rej_uniform_asm.S +226 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/rej_uniform_table.c +542 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/api.h +637 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/meta.h +25 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/README.md +11 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/meta.h +128 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/arith_native_riscv64.h +45 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_debug.c +81 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_debug.h +145 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_izetas.inc +27 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_poly.c +805 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_zetas.inc +27 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_zetas_basemul.inc +39 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/README.md +4 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/meta.h +304 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/arith_native_x86_64.h +309 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/compress_consts.c +94 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/compress_consts.h +45 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/consts.c +102 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/consts.h +25 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/intt.S +719 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/mulcache_compute.S +90 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/ntt.S +639 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/nttfrombytes.S +193 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/ntttobytes.S +181 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/nttunpack.S +174 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d10.S +382 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d11.S +448 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d4.S +163 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d5.S +220 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d10.S +228 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d11.S +277 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d4.S +180 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d5.S +192 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/polyvec_basemul_acc_montgomery_cached_asm_k2.S +502 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/polyvec_basemul_acc_montgomery_cached_asm_k3.S +750 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/polyvec_basemul_acc_montgomery_cached_asm_k4.S +998 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/reduce.S +218 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/rej_uniform_asm.S +103 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/rej_uniform_table.c +544 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/tomont.S +155 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/params.h +76 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly.c +572 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly.h +317 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly_k.c +502 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly_k.h +668 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/randombytes.h +60 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/sampling.c +362 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/sampling.h +118 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/symmetric.h +70 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/sys.h +260 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/verify.c +20 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/verify.h +464 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/zetas.inc +30 -0
- data/lib/pq_crypto/algorithm_registry.rb +200 -0
- data/lib/pq_crypto/hybrid_kem.rb +1 -12
- data/lib/pq_crypto/kem.rb +104 -13
- data/lib/pq_crypto/pkcs8.rb +387 -0
- data/lib/pq_crypto/serialization.rb +1 -14
- data/lib/pq_crypto/signature.rb +123 -17
- data/lib/pq_crypto/spki.rb +131 -0
- data/lib/pq_crypto/version.rb +1 -1
- data/lib/pq_crypto.rb +79 -20
- data/script/vendor_libs.rb +88 -155
- metadata +241 -73
- data/ext/pqcrypto/vendor/pqclean/common/aes.c +0 -639
- data/ext/pqcrypto/vendor/pqclean/common/aes.h +0 -64
- data/ext/pqcrypto/vendor/pqclean/common/compat.h +0 -73
- data/ext/pqcrypto/vendor/pqclean/common/crypto_declassify.h +0 -7
- data/ext/pqcrypto/vendor/pqclean/common/fips202.c +0 -928
- data/ext/pqcrypto/vendor/pqclean/common/fips202.h +0 -166
- data/ext/pqcrypto/vendor/pqclean/common/keccak2x/feat.S +0 -168
- data/ext/pqcrypto/vendor/pqclean/common/keccak2x/fips202x2.c +0 -684
- data/ext/pqcrypto/vendor/pqclean/common/keccak2x/fips202x2.h +0 -60
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/KeccakP-1600-times4-SIMD256.c +0 -1028
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/KeccakP-1600-times4-SnP.h +0 -50
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/KeccakP-1600-unrolling.macros +0 -198
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/Makefile +0 -8
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/Makefile.Microsoft_nmake +0 -8
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/SIMD256-config.h +0 -3
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/align.h +0 -34
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/brg_endian.h +0 -142
- data/ext/pqcrypto/vendor/pqclean/common/nistseedexpander.c +0 -101
- data/ext/pqcrypto/vendor/pqclean/common/nistseedexpander.h +0 -39
- data/ext/pqcrypto/vendor/pqclean/common/randombytes.c +0 -355
- data/ext/pqcrypto/vendor/pqclean/common/randombytes.h +0 -27
- data/ext/pqcrypto/vendor/pqclean/common/sha2.c +0 -769
- data/ext/pqcrypto/vendor/pqclean/common/sha2.h +0 -173
- data/ext/pqcrypto/vendor/pqclean/common/sp800-185.c +0 -156
- data/ext/pqcrypto/vendor/pqclean/common/sp800-185.h +0 -27
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/LICENSE +0 -5
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/Makefile +0 -19
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/Makefile.Microsoft_nmake +0 -23
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/api.h +0 -18
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/cbd.c +0 -83
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/cbd.h +0 -11
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/indcpa.c +0 -327
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/indcpa.h +0 -22
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/kem.c +0 -164
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/kem.h +0 -23
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/ntt.c +0 -146
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/ntt.h +0 -14
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/params.h +0 -36
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/poly.c +0 -299
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/poly.h +0 -37
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/polyvec.c +0 -188
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/polyvec.h +0 -26
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/reduce.c +0 -41
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/reduce.h +0 -13
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/symmetric-shake.c +0 -71
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/symmetric.h +0 -30
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/verify.c +0 -67
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/verify.h +0 -13
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/LICENSE +0 -5
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/Makefile +0 -19
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/Makefile.Microsoft_nmake +0 -23
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/api.h +0 -50
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/ntt.c +0 -98
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/ntt.h +0 -10
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/packing.c +0 -261
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/packing.h +0 -31
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/params.h +0 -44
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/poly.c +0 -799
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/poly.h +0 -52
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/polyvec.c +0 -415
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/polyvec.h +0 -65
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/reduce.c +0 -69
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/reduce.h +0 -17
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/rounding.c +0 -92
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/rounding.h +0 -14
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/sign.c +0 -407
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/sign.h +0 -47
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/symmetric-shake.c +0 -26
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/symmetric.h +0 -34
|
@@ -0,0 +1,639 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) The mlkem-native project authors
|
|
3
|
+
* SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
/* References
|
|
7
|
+
* ==========
|
|
8
|
+
*
|
|
9
|
+
* - [AVX2_NTT]
|
|
10
|
+
* Faster AVX2 optimized NTT multiplication for Ring-LWE lattice cryptography.
|
|
11
|
+
* Gregor Seiler
|
|
12
|
+
* https://eprint.iacr.org/2018/039
|
|
13
|
+
*
|
|
14
|
+
* - [REF_AVX2]
|
|
15
|
+
* CRYSTALS-Kyber optimized AVX2 implementation
|
|
16
|
+
* Bos, Ducas, Kiltz, Lepoint, Lyubashevsky, Schanck, Schwabe, Seiler, Stehlé
|
|
17
|
+
* https://github.com/pq-crystals/kyber/tree/main/avx2
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
/*
|
|
21
|
+
* This file is derived from the public domain
|
|
22
|
+
* AVX2 Kyber implementation @[REF_AVX2].
|
|
23
|
+
*
|
|
24
|
+
* The core ideas behind the implementation are described in @[AVX2_NTT].
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
#include "../../../common.h"
|
|
28
|
+
#if defined(MLK_ARITH_BACKEND_X86_64_DEFAULT) && \
|
|
29
|
+
!defined(MLK_CONFIG_MULTILEVEL_NO_SHARED)
|
|
30
|
+
|
|
31
|
+
/*
|
|
32
|
+
* WARNING: This file is auto-derived from the mlkem-native source file
|
|
33
|
+
* dev/x86_64/src/ntt.S using scripts/simpasm. Do not modify it directly.
|
|
34
|
+
*/
|
|
35
|
+
|
|
36
|
+
#if defined(__ELF__)
|
|
37
|
+
.section .note.GNU-stack,"",@progbits
|
|
38
|
+
#endif
|
|
39
|
+
|
|
40
|
+
.text
|
|
41
|
+
.balign 4
|
|
42
|
+
.global MLK_ASM_NAMESPACE(ntt_avx2)
|
|
43
|
+
MLK_ASM_FN_SYMBOL(ntt_avx2)
|
|
44
|
+
|
|
45
|
+
.cfi_startproc
|
|
46
|
+
movl $0xd010d01, %eax # imm = 0xD010D01
|
|
47
|
+
vmovd %eax, %xmm0
|
|
48
|
+
vpbroadcastd %xmm0, %ymm0
|
|
49
|
+
vpbroadcastq 0x40(%rsi), %ymm15
|
|
50
|
+
vmovdqa 0x100(%rdi), %ymm8
|
|
51
|
+
vmovdqa 0x120(%rdi), %ymm9
|
|
52
|
+
vmovdqa 0x140(%rdi), %ymm10
|
|
53
|
+
vmovdqa 0x160(%rdi), %ymm11
|
|
54
|
+
vpbroadcastq 0x48(%rsi), %ymm2
|
|
55
|
+
vpmullw %ymm15, %ymm8, %ymm12
|
|
56
|
+
vpmullw %ymm15, %ymm9, %ymm13
|
|
57
|
+
vpmullw %ymm15, %ymm10, %ymm14
|
|
58
|
+
vpmullw %ymm15, %ymm11, %ymm15
|
|
59
|
+
vpmulhw %ymm2, %ymm8, %ymm8
|
|
60
|
+
vpmulhw %ymm2, %ymm9, %ymm9
|
|
61
|
+
vpmulhw %ymm2, %ymm10, %ymm10
|
|
62
|
+
vpmulhw %ymm2, %ymm11, %ymm11
|
|
63
|
+
vmovdqa (%rdi), %ymm4
|
|
64
|
+
vmovdqa 0x20(%rdi), %ymm5
|
|
65
|
+
vmovdqa 0x40(%rdi), %ymm6
|
|
66
|
+
vmovdqa 0x60(%rdi), %ymm7
|
|
67
|
+
vpmulhw %ymm0, %ymm12, %ymm12
|
|
68
|
+
vpmulhw %ymm0, %ymm13, %ymm13
|
|
69
|
+
vpmulhw %ymm0, %ymm14, %ymm14
|
|
70
|
+
vpmulhw %ymm0, %ymm15, %ymm15
|
|
71
|
+
vpaddw %ymm8, %ymm4, %ymm3
|
|
72
|
+
vpsubw %ymm8, %ymm4, %ymm8
|
|
73
|
+
vpaddw %ymm9, %ymm5, %ymm4
|
|
74
|
+
vpsubw %ymm9, %ymm5, %ymm9
|
|
75
|
+
vpaddw %ymm10, %ymm6, %ymm5
|
|
76
|
+
vpsubw %ymm10, %ymm6, %ymm10
|
|
77
|
+
vpaddw %ymm11, %ymm7, %ymm6
|
|
78
|
+
vpsubw %ymm11, %ymm7, %ymm11
|
|
79
|
+
vpsubw %ymm12, %ymm3, %ymm3
|
|
80
|
+
vpaddw %ymm12, %ymm8, %ymm8
|
|
81
|
+
vpsubw %ymm13, %ymm4, %ymm4
|
|
82
|
+
vpaddw %ymm13, %ymm9, %ymm9
|
|
83
|
+
vpsubw %ymm14, %ymm5, %ymm5
|
|
84
|
+
vpaddw %ymm14, %ymm10, %ymm10
|
|
85
|
+
vpsubw %ymm15, %ymm6, %ymm6
|
|
86
|
+
vpaddw %ymm15, %ymm11, %ymm11
|
|
87
|
+
vmovdqa %ymm3, (%rdi)
|
|
88
|
+
vmovdqa %ymm4, 0x20(%rdi)
|
|
89
|
+
vmovdqa %ymm5, 0x40(%rdi)
|
|
90
|
+
vmovdqa %ymm6, 0x60(%rdi)
|
|
91
|
+
vmovdqa %ymm8, 0x100(%rdi)
|
|
92
|
+
vmovdqa %ymm9, 0x120(%rdi)
|
|
93
|
+
vmovdqa %ymm10, 0x140(%rdi)
|
|
94
|
+
vmovdqa %ymm11, 0x160(%rdi)
|
|
95
|
+
vpbroadcastq 0x40(%rsi), %ymm15
|
|
96
|
+
vmovdqa 0x180(%rdi), %ymm8
|
|
97
|
+
vmovdqa 0x1a0(%rdi), %ymm9
|
|
98
|
+
vmovdqa 0x1c0(%rdi), %ymm10
|
|
99
|
+
vmovdqa 0x1e0(%rdi), %ymm11
|
|
100
|
+
vpbroadcastq 0x48(%rsi), %ymm2
|
|
101
|
+
vpmullw %ymm15, %ymm8, %ymm12
|
|
102
|
+
vpmullw %ymm15, %ymm9, %ymm13
|
|
103
|
+
vpmullw %ymm15, %ymm10, %ymm14
|
|
104
|
+
vpmullw %ymm15, %ymm11, %ymm15
|
|
105
|
+
vpmulhw %ymm2, %ymm8, %ymm8
|
|
106
|
+
vpmulhw %ymm2, %ymm9, %ymm9
|
|
107
|
+
vpmulhw %ymm2, %ymm10, %ymm10
|
|
108
|
+
vpmulhw %ymm2, %ymm11, %ymm11
|
|
109
|
+
vmovdqa 0x80(%rdi), %ymm4
|
|
110
|
+
vmovdqa 0xa0(%rdi), %ymm5
|
|
111
|
+
vmovdqa 0xc0(%rdi), %ymm6
|
|
112
|
+
vmovdqa 0xe0(%rdi), %ymm7
|
|
113
|
+
vpmulhw %ymm0, %ymm12, %ymm12
|
|
114
|
+
vpmulhw %ymm0, %ymm13, %ymm13
|
|
115
|
+
vpmulhw %ymm0, %ymm14, %ymm14
|
|
116
|
+
vpmulhw %ymm0, %ymm15, %ymm15
|
|
117
|
+
vpaddw %ymm8, %ymm4, %ymm3
|
|
118
|
+
vpsubw %ymm8, %ymm4, %ymm8
|
|
119
|
+
vpaddw %ymm9, %ymm5, %ymm4
|
|
120
|
+
vpsubw %ymm9, %ymm5, %ymm9
|
|
121
|
+
vpaddw %ymm10, %ymm6, %ymm5
|
|
122
|
+
vpsubw %ymm10, %ymm6, %ymm10
|
|
123
|
+
vpaddw %ymm11, %ymm7, %ymm6
|
|
124
|
+
vpsubw %ymm11, %ymm7, %ymm11
|
|
125
|
+
vpsubw %ymm12, %ymm3, %ymm3
|
|
126
|
+
vpaddw %ymm12, %ymm8, %ymm8
|
|
127
|
+
vpsubw %ymm13, %ymm4, %ymm4
|
|
128
|
+
vpaddw %ymm13, %ymm9, %ymm9
|
|
129
|
+
vpsubw %ymm14, %ymm5, %ymm5
|
|
130
|
+
vpaddw %ymm14, %ymm10, %ymm10
|
|
131
|
+
vpsubw %ymm15, %ymm6, %ymm6
|
|
132
|
+
vpaddw %ymm15, %ymm11, %ymm11
|
|
133
|
+
vmovdqa %ymm3, 0x80(%rdi)
|
|
134
|
+
vmovdqa %ymm4, 0xa0(%rdi)
|
|
135
|
+
vmovdqa %ymm5, 0xc0(%rdi)
|
|
136
|
+
vmovdqa %ymm6, 0xe0(%rdi)
|
|
137
|
+
vmovdqa %ymm8, 0x180(%rdi)
|
|
138
|
+
vmovdqa %ymm9, 0x1a0(%rdi)
|
|
139
|
+
vmovdqa %ymm10, 0x1c0(%rdi)
|
|
140
|
+
vmovdqa %ymm11, 0x1e0(%rdi)
|
|
141
|
+
vmovdqa 0x60(%rsi), %ymm15
|
|
142
|
+
vmovdqa 0x80(%rdi), %ymm8
|
|
143
|
+
vmovdqa 0xa0(%rdi), %ymm9
|
|
144
|
+
vmovdqa 0xc0(%rdi), %ymm10
|
|
145
|
+
vmovdqa 0xe0(%rdi), %ymm11
|
|
146
|
+
vmovdqa 0x80(%rsi), %ymm2
|
|
147
|
+
vpmullw %ymm15, %ymm8, %ymm12
|
|
148
|
+
vpmullw %ymm15, %ymm9, %ymm13
|
|
149
|
+
vpmullw %ymm15, %ymm10, %ymm14
|
|
150
|
+
vpmullw %ymm15, %ymm11, %ymm15
|
|
151
|
+
vpmulhw %ymm2, %ymm8, %ymm8
|
|
152
|
+
vpmulhw %ymm2, %ymm9, %ymm9
|
|
153
|
+
vpmulhw %ymm2, %ymm10, %ymm10
|
|
154
|
+
vpmulhw %ymm2, %ymm11, %ymm11
|
|
155
|
+
vmovdqa (%rdi), %ymm4
|
|
156
|
+
vmovdqa 0x20(%rdi), %ymm5
|
|
157
|
+
vmovdqa 0x40(%rdi), %ymm6
|
|
158
|
+
vmovdqa 0x60(%rdi), %ymm7
|
|
159
|
+
vpmulhw %ymm0, %ymm12, %ymm12
|
|
160
|
+
vpmulhw %ymm0, %ymm13, %ymm13
|
|
161
|
+
vpmulhw %ymm0, %ymm14, %ymm14
|
|
162
|
+
vpmulhw %ymm0, %ymm15, %ymm15
|
|
163
|
+
vpaddw %ymm8, %ymm4, %ymm3
|
|
164
|
+
vpsubw %ymm8, %ymm4, %ymm8
|
|
165
|
+
vpaddw %ymm9, %ymm5, %ymm4
|
|
166
|
+
vpsubw %ymm9, %ymm5, %ymm9
|
|
167
|
+
vpaddw %ymm10, %ymm6, %ymm5
|
|
168
|
+
vpsubw %ymm10, %ymm6, %ymm10
|
|
169
|
+
vpaddw %ymm11, %ymm7, %ymm6
|
|
170
|
+
vpsubw %ymm11, %ymm7, %ymm11
|
|
171
|
+
vpsubw %ymm12, %ymm3, %ymm3
|
|
172
|
+
vpaddw %ymm12, %ymm8, %ymm8
|
|
173
|
+
vpsubw %ymm13, %ymm4, %ymm4
|
|
174
|
+
vpaddw %ymm13, %ymm9, %ymm9
|
|
175
|
+
vpsubw %ymm14, %ymm5, %ymm5
|
|
176
|
+
vpaddw %ymm14, %ymm10, %ymm10
|
|
177
|
+
vpsubw %ymm15, %ymm6, %ymm6
|
|
178
|
+
vpaddw %ymm15, %ymm11, %ymm11
|
|
179
|
+
vperm2i128 $0x20, %ymm10, %ymm5, %ymm7 # ymm7 = ymm5[0,1],ymm10[0,1]
|
|
180
|
+
vperm2i128 $0x31, %ymm10, %ymm5, %ymm10 # ymm10 = ymm5[2,3],ymm10[2,3]
|
|
181
|
+
vperm2i128 $0x20, %ymm11, %ymm6, %ymm5 # ymm5 = ymm6[0,1],ymm11[0,1]
|
|
182
|
+
vperm2i128 $0x31, %ymm11, %ymm6, %ymm11 # ymm11 = ymm6[2,3],ymm11[2,3]
|
|
183
|
+
vmovdqa 0xa0(%rsi), %ymm15
|
|
184
|
+
vmovdqa 0xc0(%rsi), %ymm2
|
|
185
|
+
vpmullw %ymm15, %ymm7, %ymm12
|
|
186
|
+
vpmullw %ymm15, %ymm10, %ymm13
|
|
187
|
+
vpmullw %ymm15, %ymm5, %ymm14
|
|
188
|
+
vpmullw %ymm15, %ymm11, %ymm15
|
|
189
|
+
vpmulhw %ymm2, %ymm7, %ymm7
|
|
190
|
+
vpmulhw %ymm2, %ymm10, %ymm10
|
|
191
|
+
vpmulhw %ymm2, %ymm5, %ymm5
|
|
192
|
+
vpmulhw %ymm2, %ymm11, %ymm11
|
|
193
|
+
vperm2i128 $0x20, %ymm8, %ymm3, %ymm6 # ymm6 = ymm3[0,1],ymm8[0,1]
|
|
194
|
+
vperm2i128 $0x31, %ymm8, %ymm3, %ymm8 # ymm8 = ymm3[2,3],ymm8[2,3]
|
|
195
|
+
vperm2i128 $0x20, %ymm9, %ymm4, %ymm3 # ymm3 = ymm4[0,1],ymm9[0,1]
|
|
196
|
+
vperm2i128 $0x31, %ymm9, %ymm4, %ymm9 # ymm9 = ymm4[2,3],ymm9[2,3]
|
|
197
|
+
vpmulhw %ymm0, %ymm12, %ymm12
|
|
198
|
+
vpmulhw %ymm0, %ymm13, %ymm13
|
|
199
|
+
vpmulhw %ymm0, %ymm14, %ymm14
|
|
200
|
+
vpmulhw %ymm0, %ymm15, %ymm15
|
|
201
|
+
vpaddw %ymm7, %ymm6, %ymm4
|
|
202
|
+
vpsubw %ymm7, %ymm6, %ymm7
|
|
203
|
+
vpaddw %ymm10, %ymm8, %ymm6
|
|
204
|
+
vpsubw %ymm10, %ymm8, %ymm10
|
|
205
|
+
vpaddw %ymm5, %ymm3, %ymm8
|
|
206
|
+
vpsubw %ymm5, %ymm3, %ymm5
|
|
207
|
+
vpaddw %ymm11, %ymm9, %ymm3
|
|
208
|
+
vpsubw %ymm11, %ymm9, %ymm11
|
|
209
|
+
vpsubw %ymm12, %ymm4, %ymm4
|
|
210
|
+
vpaddw %ymm12, %ymm7, %ymm7
|
|
211
|
+
vpsubw %ymm13, %ymm6, %ymm6
|
|
212
|
+
vpaddw %ymm13, %ymm10, %ymm10
|
|
213
|
+
vpsubw %ymm14, %ymm8, %ymm8
|
|
214
|
+
vpaddw %ymm14, %ymm5, %ymm5
|
|
215
|
+
vpsubw %ymm15, %ymm3, %ymm3
|
|
216
|
+
vpaddw %ymm15, %ymm11, %ymm11
|
|
217
|
+
vpunpcklqdq %ymm5, %ymm8, %ymm9 # ymm9 = ymm8[0],ymm5[0],ymm8[2],ymm5[2]
|
|
218
|
+
vpunpckhqdq %ymm5, %ymm8, %ymm5 # ymm5 = ymm8[1],ymm5[1],ymm8[3],ymm5[3]
|
|
219
|
+
vpunpcklqdq %ymm11, %ymm3, %ymm8 # ymm8 = ymm3[0],ymm11[0],ymm3[2],ymm11[2]
|
|
220
|
+
vpunpckhqdq %ymm11, %ymm3, %ymm11 # ymm11 = ymm3[1],ymm11[1],ymm3[3],ymm11[3]
|
|
221
|
+
vmovdqa 0xe0(%rsi), %ymm15
|
|
222
|
+
vmovdqa 0x100(%rsi), %ymm2
|
|
223
|
+
vpmullw %ymm15, %ymm9, %ymm12
|
|
224
|
+
vpmullw %ymm15, %ymm5, %ymm13
|
|
225
|
+
vpmullw %ymm15, %ymm8, %ymm14
|
|
226
|
+
vpmullw %ymm15, %ymm11, %ymm15
|
|
227
|
+
vpmulhw %ymm2, %ymm9, %ymm9
|
|
228
|
+
vpmulhw %ymm2, %ymm5, %ymm5
|
|
229
|
+
vpmulhw %ymm2, %ymm8, %ymm8
|
|
230
|
+
vpmulhw %ymm2, %ymm11, %ymm11
|
|
231
|
+
vpunpcklqdq %ymm7, %ymm4, %ymm3 # ymm3 = ymm4[0],ymm7[0],ymm4[2],ymm7[2]
|
|
232
|
+
vpunpckhqdq %ymm7, %ymm4, %ymm7 # ymm7 = ymm4[1],ymm7[1],ymm4[3],ymm7[3]
|
|
233
|
+
vpunpcklqdq %ymm10, %ymm6, %ymm4 # ymm4 = ymm6[0],ymm10[0],ymm6[2],ymm10[2]
|
|
234
|
+
vpunpckhqdq %ymm10, %ymm6, %ymm10 # ymm10 = ymm6[1],ymm10[1],ymm6[3],ymm10[3]
|
|
235
|
+
vpmulhw %ymm0, %ymm12, %ymm12
|
|
236
|
+
vpmulhw %ymm0, %ymm13, %ymm13
|
|
237
|
+
vpmulhw %ymm0, %ymm14, %ymm14
|
|
238
|
+
vpmulhw %ymm0, %ymm15, %ymm15
|
|
239
|
+
vpaddw %ymm9, %ymm3, %ymm6
|
|
240
|
+
vpsubw %ymm9, %ymm3, %ymm9
|
|
241
|
+
vpaddw %ymm5, %ymm7, %ymm3
|
|
242
|
+
vpsubw %ymm5, %ymm7, %ymm5
|
|
243
|
+
vpaddw %ymm8, %ymm4, %ymm7
|
|
244
|
+
vpsubw %ymm8, %ymm4, %ymm8
|
|
245
|
+
vpaddw %ymm11, %ymm10, %ymm4
|
|
246
|
+
vpsubw %ymm11, %ymm10, %ymm11
|
|
247
|
+
vpsubw %ymm12, %ymm6, %ymm6
|
|
248
|
+
vpaddw %ymm12, %ymm9, %ymm9
|
|
249
|
+
vpsubw %ymm13, %ymm3, %ymm3
|
|
250
|
+
vpaddw %ymm13, %ymm5, %ymm5
|
|
251
|
+
vpsubw %ymm14, %ymm7, %ymm7
|
|
252
|
+
vpaddw %ymm14, %ymm8, %ymm8
|
|
253
|
+
vpsubw %ymm15, %ymm4, %ymm4
|
|
254
|
+
vpaddw %ymm15, %ymm11, %ymm11
|
|
255
|
+
vmovsldup %ymm8, %ymm10 # ymm10 = ymm8[0,0,2,2,4,4,6,6]
|
|
256
|
+
vpblendd $0xaa, %ymm10, %ymm7, %ymm10 # ymm10 = ymm7[0],ymm10[1],ymm7[2],ymm10[3],ymm7[4],ymm10[5],ymm7[6],ymm10[7]
|
|
257
|
+
vpsrlq $0x20, %ymm7, %ymm7
|
|
258
|
+
vpblendd $0xaa, %ymm8, %ymm7, %ymm8 # ymm8 = ymm7[0],ymm8[1],ymm7[2],ymm8[3],ymm7[4],ymm8[5],ymm7[6],ymm8[7]
|
|
259
|
+
vmovsldup %ymm11, %ymm7 # ymm7 = ymm11[0,0,2,2,4,4,6,6]
|
|
260
|
+
vpblendd $0xaa, %ymm7, %ymm4, %ymm7 # ymm7 = ymm4[0],ymm7[1],ymm4[2],ymm7[3],ymm4[4],ymm7[5],ymm4[6],ymm7[7]
|
|
261
|
+
vpsrlq $0x20, %ymm4, %ymm4
|
|
262
|
+
vpblendd $0xaa, %ymm11, %ymm4, %ymm11 # ymm11 = ymm4[0],ymm11[1],ymm4[2],ymm11[3],ymm4[4],ymm11[5],ymm4[6],ymm11[7]
|
|
263
|
+
vmovdqa 0x120(%rsi), %ymm15
|
|
264
|
+
vmovdqa 0x140(%rsi), %ymm2
|
|
265
|
+
vpmullw %ymm15, %ymm10, %ymm12
|
|
266
|
+
vpmullw %ymm15, %ymm8, %ymm13
|
|
267
|
+
vpmullw %ymm15, %ymm7, %ymm14
|
|
268
|
+
vpmullw %ymm15, %ymm11, %ymm15
|
|
269
|
+
vpmulhw %ymm2, %ymm10, %ymm10
|
|
270
|
+
vpmulhw %ymm2, %ymm8, %ymm8
|
|
271
|
+
vpmulhw %ymm2, %ymm7, %ymm7
|
|
272
|
+
vpmulhw %ymm2, %ymm11, %ymm11
|
|
273
|
+
vmovsldup %ymm9, %ymm4 # ymm4 = ymm9[0,0,2,2,4,4,6,6]
|
|
274
|
+
vpblendd $0xaa, %ymm4, %ymm6, %ymm4 # ymm4 = ymm6[0],ymm4[1],ymm6[2],ymm4[3],ymm6[4],ymm4[5],ymm6[6],ymm4[7]
|
|
275
|
+
vpsrlq $0x20, %ymm6, %ymm6
|
|
276
|
+
vpblendd $0xaa, %ymm9, %ymm6, %ymm9 # ymm9 = ymm6[0],ymm9[1],ymm6[2],ymm9[3],ymm6[4],ymm9[5],ymm6[6],ymm9[7]
|
|
277
|
+
vmovsldup %ymm5, %ymm6 # ymm6 = ymm5[0,0,2,2,4,4,6,6]
|
|
278
|
+
vpblendd $0xaa, %ymm6, %ymm3, %ymm6 # ymm6 = ymm3[0],ymm6[1],ymm3[2],ymm6[3],ymm3[4],ymm6[5],ymm3[6],ymm6[7]
|
|
279
|
+
vpsrlq $0x20, %ymm3, %ymm3
|
|
280
|
+
vpblendd $0xaa, %ymm5, %ymm3, %ymm5 # ymm5 = ymm3[0],ymm5[1],ymm3[2],ymm5[3],ymm3[4],ymm5[5],ymm3[6],ymm5[7]
|
|
281
|
+
vpmulhw %ymm0, %ymm12, %ymm12
|
|
282
|
+
vpmulhw %ymm0, %ymm13, %ymm13
|
|
283
|
+
vpmulhw %ymm0, %ymm14, %ymm14
|
|
284
|
+
vpmulhw %ymm0, %ymm15, %ymm15
|
|
285
|
+
vpaddw %ymm10, %ymm4, %ymm3
|
|
286
|
+
vpsubw %ymm10, %ymm4, %ymm10
|
|
287
|
+
vpaddw %ymm8, %ymm9, %ymm4
|
|
288
|
+
vpsubw %ymm8, %ymm9, %ymm8
|
|
289
|
+
vpaddw %ymm7, %ymm6, %ymm9
|
|
290
|
+
vpsubw %ymm7, %ymm6, %ymm7
|
|
291
|
+
vpaddw %ymm11, %ymm5, %ymm6
|
|
292
|
+
vpsubw %ymm11, %ymm5, %ymm11
|
|
293
|
+
vpsubw %ymm12, %ymm3, %ymm3
|
|
294
|
+
vpaddw %ymm12, %ymm10, %ymm10
|
|
295
|
+
vpsubw %ymm13, %ymm4, %ymm4
|
|
296
|
+
vpaddw %ymm13, %ymm8, %ymm8
|
|
297
|
+
vpsubw %ymm14, %ymm9, %ymm9
|
|
298
|
+
vpaddw %ymm14, %ymm7, %ymm7
|
|
299
|
+
vpsubw %ymm15, %ymm6, %ymm6
|
|
300
|
+
vpaddw %ymm15, %ymm11, %ymm11
|
|
301
|
+
vpslld $0x10, %ymm7, %ymm5
|
|
302
|
+
vpblendw $0xaa, %ymm5, %ymm9, %ymm5 # ymm5 = ymm9[0],ymm5[1],ymm9[2],ymm5[3],ymm9[4],ymm5[5],ymm9[6],ymm5[7],ymm9[8],ymm5[9],ymm9[10],ymm5[11],ymm9[12],ymm5[13],ymm9[14],ymm5[15]
|
|
303
|
+
vpsrld $0x10, %ymm9, %ymm9
|
|
304
|
+
vpblendw $0xaa, %ymm7, %ymm9, %ymm7 # ymm7 = ymm9[0],ymm7[1],ymm9[2],ymm7[3],ymm9[4],ymm7[5],ymm9[6],ymm7[7],ymm9[8],ymm7[9],ymm9[10],ymm7[11],ymm9[12],ymm7[13],ymm9[14],ymm7[15]
|
|
305
|
+
vpslld $0x10, %ymm11, %ymm9
|
|
306
|
+
vpblendw $0xaa, %ymm9, %ymm6, %ymm9 # ymm9 = ymm6[0],ymm9[1],ymm6[2],ymm9[3],ymm6[4],ymm9[5],ymm6[6],ymm9[7],ymm6[8],ymm9[9],ymm6[10],ymm9[11],ymm6[12],ymm9[13],ymm6[14],ymm9[15]
|
|
307
|
+
vpsrld $0x10, %ymm6, %ymm6
|
|
308
|
+
vpblendw $0xaa, %ymm11, %ymm6, %ymm11 # ymm11 = ymm6[0],ymm11[1],ymm6[2],ymm11[3],ymm6[4],ymm11[5],ymm6[6],ymm11[7],ymm6[8],ymm11[9],ymm6[10],ymm11[11],ymm6[12],ymm11[13],ymm6[14],ymm11[15]
|
|
309
|
+
vmovdqa 0x160(%rsi), %ymm15
|
|
310
|
+
vmovdqa 0x180(%rsi), %ymm2
|
|
311
|
+
vpmullw %ymm15, %ymm5, %ymm12
|
|
312
|
+
vpmullw %ymm15, %ymm7, %ymm13
|
|
313
|
+
vpmullw %ymm15, %ymm9, %ymm14
|
|
314
|
+
vpmullw %ymm15, %ymm11, %ymm15
|
|
315
|
+
vpmulhw %ymm2, %ymm5, %ymm5
|
|
316
|
+
vpmulhw %ymm2, %ymm7, %ymm7
|
|
317
|
+
vpmulhw %ymm2, %ymm9, %ymm9
|
|
318
|
+
vpmulhw %ymm2, %ymm11, %ymm11
|
|
319
|
+
vpslld $0x10, %ymm10, %ymm6
|
|
320
|
+
vpblendw $0xaa, %ymm6, %ymm3, %ymm6 # ymm6 = ymm3[0],ymm6[1],ymm3[2],ymm6[3],ymm3[4],ymm6[5],ymm3[6],ymm6[7],ymm3[8],ymm6[9],ymm3[10],ymm6[11],ymm3[12],ymm6[13],ymm3[14],ymm6[15]
|
|
321
|
+
vpsrld $0x10, %ymm3, %ymm3
|
|
322
|
+
vpblendw $0xaa, %ymm10, %ymm3, %ymm10 # ymm10 = ymm3[0],ymm10[1],ymm3[2],ymm10[3],ymm3[4],ymm10[5],ymm3[6],ymm10[7],ymm3[8],ymm10[9],ymm3[10],ymm10[11],ymm3[12],ymm10[13],ymm3[14],ymm10[15]
|
|
323
|
+
vpslld $0x10, %ymm8, %ymm3
|
|
324
|
+
vpblendw $0xaa, %ymm3, %ymm4, %ymm3 # ymm3 = ymm4[0],ymm3[1],ymm4[2],ymm3[3],ymm4[4],ymm3[5],ymm4[6],ymm3[7],ymm4[8],ymm3[9],ymm4[10],ymm3[11],ymm4[12],ymm3[13],ymm4[14],ymm3[15]
|
|
325
|
+
vpsrld $0x10, %ymm4, %ymm4
|
|
326
|
+
vpblendw $0xaa, %ymm8, %ymm4, %ymm8 # ymm8 = ymm4[0],ymm8[1],ymm4[2],ymm8[3],ymm4[4],ymm8[5],ymm4[6],ymm8[7],ymm4[8],ymm8[9],ymm4[10],ymm8[11],ymm4[12],ymm8[13],ymm4[14],ymm8[15]
|
|
327
|
+
vpmulhw %ymm0, %ymm12, %ymm12
|
|
328
|
+
vpmulhw %ymm0, %ymm13, %ymm13
|
|
329
|
+
vpmulhw %ymm0, %ymm14, %ymm14
|
|
330
|
+
vpmulhw %ymm0, %ymm15, %ymm15
|
|
331
|
+
vpaddw %ymm5, %ymm6, %ymm4
|
|
332
|
+
vpsubw %ymm5, %ymm6, %ymm5
|
|
333
|
+
vpaddw %ymm7, %ymm10, %ymm6
|
|
334
|
+
vpsubw %ymm7, %ymm10, %ymm7
|
|
335
|
+
vpaddw %ymm9, %ymm3, %ymm10
|
|
336
|
+
vpsubw %ymm9, %ymm3, %ymm9
|
|
337
|
+
vpaddw %ymm11, %ymm8, %ymm3
|
|
338
|
+
vpsubw %ymm11, %ymm8, %ymm11
|
|
339
|
+
vpsubw %ymm12, %ymm4, %ymm4
|
|
340
|
+
vpaddw %ymm12, %ymm5, %ymm5
|
|
341
|
+
vpsubw %ymm13, %ymm6, %ymm6
|
|
342
|
+
vpaddw %ymm13, %ymm7, %ymm7
|
|
343
|
+
vpsubw %ymm14, %ymm10, %ymm10
|
|
344
|
+
vpaddw %ymm14, %ymm9, %ymm9
|
|
345
|
+
vpsubw %ymm15, %ymm3, %ymm3
|
|
346
|
+
vpaddw %ymm15, %ymm11, %ymm11
|
|
347
|
+
vmovdqa 0x1a0(%rsi), %ymm14
|
|
348
|
+
vmovdqa 0x1e0(%rsi), %ymm15
|
|
349
|
+
vmovdqa 0x1c0(%rsi), %ymm8
|
|
350
|
+
vmovdqa 0x200(%rsi), %ymm2
|
|
351
|
+
vpmullw %ymm14, %ymm10, %ymm12
|
|
352
|
+
vpmullw %ymm14, %ymm3, %ymm13
|
|
353
|
+
vpmullw %ymm15, %ymm9, %ymm14
|
|
354
|
+
vpmullw %ymm15, %ymm11, %ymm15
|
|
355
|
+
vpmulhw %ymm8, %ymm10, %ymm10
|
|
356
|
+
vpmulhw %ymm8, %ymm3, %ymm3
|
|
357
|
+
vpmulhw %ymm2, %ymm9, %ymm9
|
|
358
|
+
vpmulhw %ymm2, %ymm11, %ymm11
|
|
359
|
+
vpmulhw %ymm0, %ymm12, %ymm12
|
|
360
|
+
vpmulhw %ymm0, %ymm13, %ymm13
|
|
361
|
+
vpmulhw %ymm0, %ymm14, %ymm14
|
|
362
|
+
vpmulhw %ymm0, %ymm15, %ymm15
|
|
363
|
+
vpaddw %ymm10, %ymm4, %ymm8
|
|
364
|
+
vpsubw %ymm10, %ymm4, %ymm10
|
|
365
|
+
vpaddw %ymm3, %ymm6, %ymm4
|
|
366
|
+
vpsubw %ymm3, %ymm6, %ymm3
|
|
367
|
+
vpaddw %ymm9, %ymm5, %ymm6
|
|
368
|
+
vpsubw %ymm9, %ymm5, %ymm9
|
|
369
|
+
vpaddw %ymm11, %ymm7, %ymm5
|
|
370
|
+
vpsubw %ymm11, %ymm7, %ymm11
|
|
371
|
+
vpsubw %ymm12, %ymm8, %ymm8
|
|
372
|
+
vpaddw %ymm12, %ymm10, %ymm10
|
|
373
|
+
vpsubw %ymm13, %ymm4, %ymm4
|
|
374
|
+
vpaddw %ymm13, %ymm3, %ymm3
|
|
375
|
+
vpsubw %ymm14, %ymm6, %ymm6
|
|
376
|
+
vpaddw %ymm14, %ymm9, %ymm9
|
|
377
|
+
vpsubw %ymm15, %ymm5, %ymm5
|
|
378
|
+
vpaddw %ymm15, %ymm11, %ymm11
|
|
379
|
+
vmovdqa %ymm8, (%rdi)
|
|
380
|
+
vmovdqa %ymm4, 0x20(%rdi)
|
|
381
|
+
vmovdqa %ymm10, 0x40(%rdi)
|
|
382
|
+
vmovdqa %ymm3, 0x60(%rdi)
|
|
383
|
+
vmovdqa %ymm6, 0x80(%rdi)
|
|
384
|
+
vmovdqa %ymm5, 0xa0(%rdi)
|
|
385
|
+
vmovdqa %ymm9, 0xc0(%rdi)
|
|
386
|
+
vmovdqa %ymm11, 0xe0(%rdi)
|
|
387
|
+
vmovdqa 0x220(%rsi), %ymm15
|
|
388
|
+
vmovdqa 0x180(%rdi), %ymm8
|
|
389
|
+
vmovdqa 0x1a0(%rdi), %ymm9
|
|
390
|
+
vmovdqa 0x1c0(%rdi), %ymm10
|
|
391
|
+
vmovdqa 0x1e0(%rdi), %ymm11
|
|
392
|
+
vmovdqa 0x240(%rsi), %ymm2
|
|
393
|
+
vpmullw %ymm15, %ymm8, %ymm12
|
|
394
|
+
vpmullw %ymm15, %ymm9, %ymm13
|
|
395
|
+
vpmullw %ymm15, %ymm10, %ymm14
|
|
396
|
+
vpmullw %ymm15, %ymm11, %ymm15
|
|
397
|
+
vpmulhw %ymm2, %ymm8, %ymm8
|
|
398
|
+
vpmulhw %ymm2, %ymm9, %ymm9
|
|
399
|
+
vpmulhw %ymm2, %ymm10, %ymm10
|
|
400
|
+
vpmulhw %ymm2, %ymm11, %ymm11
|
|
401
|
+
vmovdqa 0x100(%rdi), %ymm4
|
|
402
|
+
vmovdqa 0x120(%rdi), %ymm5
|
|
403
|
+
vmovdqa 0x140(%rdi), %ymm6
|
|
404
|
+
vmovdqa 0x160(%rdi), %ymm7
|
|
405
|
+
vpmulhw %ymm0, %ymm12, %ymm12
|
|
406
|
+
vpmulhw %ymm0, %ymm13, %ymm13
|
|
407
|
+
vpmulhw %ymm0, %ymm14, %ymm14
|
|
408
|
+
vpmulhw %ymm0, %ymm15, %ymm15
|
|
409
|
+
vpaddw %ymm8, %ymm4, %ymm3
|
|
410
|
+
vpsubw %ymm8, %ymm4, %ymm8
|
|
411
|
+
vpaddw %ymm9, %ymm5, %ymm4
|
|
412
|
+
vpsubw %ymm9, %ymm5, %ymm9
|
|
413
|
+
vpaddw %ymm10, %ymm6, %ymm5
|
|
414
|
+
vpsubw %ymm10, %ymm6, %ymm10
|
|
415
|
+
vpaddw %ymm11, %ymm7, %ymm6
|
|
416
|
+
vpsubw %ymm11, %ymm7, %ymm11
|
|
417
|
+
vpsubw %ymm12, %ymm3, %ymm3
|
|
418
|
+
vpaddw %ymm12, %ymm8, %ymm8
|
|
419
|
+
vpsubw %ymm13, %ymm4, %ymm4
|
|
420
|
+
vpaddw %ymm13, %ymm9, %ymm9
|
|
421
|
+
vpsubw %ymm14, %ymm5, %ymm5
|
|
422
|
+
vpaddw %ymm14, %ymm10, %ymm10
|
|
423
|
+
vpsubw %ymm15, %ymm6, %ymm6
|
|
424
|
+
vpaddw %ymm15, %ymm11, %ymm11
|
|
425
|
+
vperm2i128 $0x20, %ymm10, %ymm5, %ymm7 # ymm7 = ymm5[0,1],ymm10[0,1]
|
|
426
|
+
vperm2i128 $0x31, %ymm10, %ymm5, %ymm10 # ymm10 = ymm5[2,3],ymm10[2,3]
|
|
427
|
+
vperm2i128 $0x20, %ymm11, %ymm6, %ymm5 # ymm5 = ymm6[0,1],ymm11[0,1]
|
|
428
|
+
vperm2i128 $0x31, %ymm11, %ymm6, %ymm11 # ymm11 = ymm6[2,3],ymm11[2,3]
|
|
429
|
+
vmovdqa 0x260(%rsi), %ymm15
|
|
430
|
+
vmovdqa 0x280(%rsi), %ymm2
|
|
431
|
+
vpmullw %ymm15, %ymm7, %ymm12
|
|
432
|
+
vpmullw %ymm15, %ymm10, %ymm13
|
|
433
|
+
vpmullw %ymm15, %ymm5, %ymm14
|
|
434
|
+
vpmullw %ymm15, %ymm11, %ymm15
|
|
435
|
+
vpmulhw %ymm2, %ymm7, %ymm7
|
|
436
|
+
vpmulhw %ymm2, %ymm10, %ymm10
|
|
437
|
+
vpmulhw %ymm2, %ymm5, %ymm5
|
|
438
|
+
vpmulhw %ymm2, %ymm11, %ymm11
|
|
439
|
+
vperm2i128 $0x20, %ymm8, %ymm3, %ymm6 # ymm6 = ymm3[0,1],ymm8[0,1]
|
|
440
|
+
vperm2i128 $0x31, %ymm8, %ymm3, %ymm8 # ymm8 = ymm3[2,3],ymm8[2,3]
|
|
441
|
+
vperm2i128 $0x20, %ymm9, %ymm4, %ymm3 # ymm3 = ymm4[0,1],ymm9[0,1]
|
|
442
|
+
vperm2i128 $0x31, %ymm9, %ymm4, %ymm9 # ymm9 = ymm4[2,3],ymm9[2,3]
|
|
443
|
+
vpmulhw %ymm0, %ymm12, %ymm12
|
|
444
|
+
vpmulhw %ymm0, %ymm13, %ymm13
|
|
445
|
+
vpmulhw %ymm0, %ymm14, %ymm14
|
|
446
|
+
vpmulhw %ymm0, %ymm15, %ymm15
|
|
447
|
+
vpaddw %ymm7, %ymm6, %ymm4
|
|
448
|
+
vpsubw %ymm7, %ymm6, %ymm7
|
|
449
|
+
vpaddw %ymm10, %ymm8, %ymm6
|
|
450
|
+
vpsubw %ymm10, %ymm8, %ymm10
|
|
451
|
+
vpaddw %ymm5, %ymm3, %ymm8
|
|
452
|
+
vpsubw %ymm5, %ymm3, %ymm5
|
|
453
|
+
vpaddw %ymm11, %ymm9, %ymm3
|
|
454
|
+
vpsubw %ymm11, %ymm9, %ymm11
|
|
455
|
+
vpsubw %ymm12, %ymm4, %ymm4
|
|
456
|
+
vpaddw %ymm12, %ymm7, %ymm7
|
|
457
|
+
vpsubw %ymm13, %ymm6, %ymm6
|
|
458
|
+
vpaddw %ymm13, %ymm10, %ymm10
|
|
459
|
+
vpsubw %ymm14, %ymm8, %ymm8
|
|
460
|
+
vpaddw %ymm14, %ymm5, %ymm5
|
|
461
|
+
vpsubw %ymm15, %ymm3, %ymm3
|
|
462
|
+
vpaddw %ymm15, %ymm11, %ymm11
|
|
463
|
+
vpunpcklqdq %ymm5, %ymm8, %ymm9 # ymm9 = ymm8[0],ymm5[0],ymm8[2],ymm5[2]
|
|
464
|
+
vpunpckhqdq %ymm5, %ymm8, %ymm5 # ymm5 = ymm8[1],ymm5[1],ymm8[3],ymm5[3]
|
|
465
|
+
vpunpcklqdq %ymm11, %ymm3, %ymm8 # ymm8 = ymm3[0],ymm11[0],ymm3[2],ymm11[2]
|
|
466
|
+
vpunpckhqdq %ymm11, %ymm3, %ymm11 # ymm11 = ymm3[1],ymm11[1],ymm3[3],ymm11[3]
|
|
467
|
+
vmovdqa 0x2a0(%rsi), %ymm15
|
|
468
|
+
vmovdqa 0x2c0(%rsi), %ymm2
|
|
469
|
+
vpmullw %ymm15, %ymm9, %ymm12
|
|
470
|
+
vpmullw %ymm15, %ymm5, %ymm13
|
|
471
|
+
vpmullw %ymm15, %ymm8, %ymm14
|
|
472
|
+
vpmullw %ymm15, %ymm11, %ymm15
|
|
473
|
+
vpmulhw %ymm2, %ymm9, %ymm9
|
|
474
|
+
vpmulhw %ymm2, %ymm5, %ymm5
|
|
475
|
+
vpmulhw %ymm2, %ymm8, %ymm8
|
|
476
|
+
vpmulhw %ymm2, %ymm11, %ymm11
|
|
477
|
+
vpunpcklqdq %ymm7, %ymm4, %ymm3 # ymm3 = ymm4[0],ymm7[0],ymm4[2],ymm7[2]
|
|
478
|
+
vpunpckhqdq %ymm7, %ymm4, %ymm7 # ymm7 = ymm4[1],ymm7[1],ymm4[3],ymm7[3]
|
|
479
|
+
vpunpcklqdq %ymm10, %ymm6, %ymm4 # ymm4 = ymm6[0],ymm10[0],ymm6[2],ymm10[2]
|
|
480
|
+
vpunpckhqdq %ymm10, %ymm6, %ymm10 # ymm10 = ymm6[1],ymm10[1],ymm6[3],ymm10[3]
|
|
481
|
+
vpmulhw %ymm0, %ymm12, %ymm12
|
|
482
|
+
vpmulhw %ymm0, %ymm13, %ymm13
|
|
483
|
+
vpmulhw %ymm0, %ymm14, %ymm14
|
|
484
|
+
vpmulhw %ymm0, %ymm15, %ymm15
|
|
485
|
+
vpaddw %ymm9, %ymm3, %ymm6
|
|
486
|
+
vpsubw %ymm9, %ymm3, %ymm9
|
|
487
|
+
vpaddw %ymm5, %ymm7, %ymm3
|
|
488
|
+
vpsubw %ymm5, %ymm7, %ymm5
|
|
489
|
+
vpaddw %ymm8, %ymm4, %ymm7
|
|
490
|
+
vpsubw %ymm8, %ymm4, %ymm8
|
|
491
|
+
vpaddw %ymm11, %ymm10, %ymm4
|
|
492
|
+
vpsubw %ymm11, %ymm10, %ymm11
|
|
493
|
+
vpsubw %ymm12, %ymm6, %ymm6
|
|
494
|
+
vpaddw %ymm12, %ymm9, %ymm9
|
|
495
|
+
vpsubw %ymm13, %ymm3, %ymm3
|
|
496
|
+
vpaddw %ymm13, %ymm5, %ymm5
|
|
497
|
+
vpsubw %ymm14, %ymm7, %ymm7
|
|
498
|
+
vpaddw %ymm14, %ymm8, %ymm8
|
|
499
|
+
vpsubw %ymm15, %ymm4, %ymm4
|
|
500
|
+
vpaddw %ymm15, %ymm11, %ymm11
|
|
501
|
+
vmovsldup %ymm8, %ymm10 # ymm10 = ymm8[0,0,2,2,4,4,6,6]
|
|
502
|
+
vpblendd $0xaa, %ymm10, %ymm7, %ymm10 # ymm10 = ymm7[0],ymm10[1],ymm7[2],ymm10[3],ymm7[4],ymm10[5],ymm7[6],ymm10[7]
|
|
503
|
+
vpsrlq $0x20, %ymm7, %ymm7
|
|
504
|
+
vpblendd $0xaa, %ymm8, %ymm7, %ymm8 # ymm8 = ymm7[0],ymm8[1],ymm7[2],ymm8[3],ymm7[4],ymm8[5],ymm7[6],ymm8[7]
|
|
505
|
+
vmovsldup %ymm11, %ymm7 # ymm7 = ymm11[0,0,2,2,4,4,6,6]
|
|
506
|
+
vpblendd $0xaa, %ymm7, %ymm4, %ymm7 # ymm7 = ymm4[0],ymm7[1],ymm4[2],ymm7[3],ymm4[4],ymm7[5],ymm4[6],ymm7[7]
|
|
507
|
+
vpsrlq $0x20, %ymm4, %ymm4
|
|
508
|
+
vpblendd $0xaa, %ymm11, %ymm4, %ymm11 # ymm11 = ymm4[0],ymm11[1],ymm4[2],ymm11[3],ymm4[4],ymm11[5],ymm4[6],ymm11[7]
|
|
509
|
+
vmovdqa 0x2e0(%rsi), %ymm15
|
|
510
|
+
vmovdqa 0x300(%rsi), %ymm2
|
|
511
|
+
vpmullw %ymm15, %ymm10, %ymm12
|
|
512
|
+
vpmullw %ymm15, %ymm8, %ymm13
|
|
513
|
+
vpmullw %ymm15, %ymm7, %ymm14
|
|
514
|
+
vpmullw %ymm15, %ymm11, %ymm15
|
|
515
|
+
vpmulhw %ymm2, %ymm10, %ymm10
|
|
516
|
+
vpmulhw %ymm2, %ymm8, %ymm8
|
|
517
|
+
vpmulhw %ymm2, %ymm7, %ymm7
|
|
518
|
+
vpmulhw %ymm2, %ymm11, %ymm11
|
|
519
|
+
vmovsldup %ymm9, %ymm4 # ymm4 = ymm9[0,0,2,2,4,4,6,6]
|
|
520
|
+
vpblendd $0xaa, %ymm4, %ymm6, %ymm4 # ymm4 = ymm6[0],ymm4[1],ymm6[2],ymm4[3],ymm6[4],ymm4[5],ymm6[6],ymm4[7]
|
|
521
|
+
vpsrlq $0x20, %ymm6, %ymm6
|
|
522
|
+
vpblendd $0xaa, %ymm9, %ymm6, %ymm9 # ymm9 = ymm6[0],ymm9[1],ymm6[2],ymm9[3],ymm6[4],ymm9[5],ymm6[6],ymm9[7]
|
|
523
|
+
vmovsldup %ymm5, %ymm6 # ymm6 = ymm5[0,0,2,2,4,4,6,6]
|
|
524
|
+
vpblendd $0xaa, %ymm6, %ymm3, %ymm6 # ymm6 = ymm3[0],ymm6[1],ymm3[2],ymm6[3],ymm3[4],ymm6[5],ymm3[6],ymm6[7]
|
|
525
|
+
vpsrlq $0x20, %ymm3, %ymm3
|
|
526
|
+
vpblendd $0xaa, %ymm5, %ymm3, %ymm5 # ymm5 = ymm3[0],ymm5[1],ymm3[2],ymm5[3],ymm3[4],ymm5[5],ymm3[6],ymm5[7]
|
|
527
|
+
vpmulhw %ymm0, %ymm12, %ymm12
|
|
528
|
+
vpmulhw %ymm0, %ymm13, %ymm13
|
|
529
|
+
vpmulhw %ymm0, %ymm14, %ymm14
|
|
530
|
+
vpmulhw %ymm0, %ymm15, %ymm15
|
|
531
|
+
vpaddw %ymm10, %ymm4, %ymm3
|
|
532
|
+
vpsubw %ymm10, %ymm4, %ymm10
|
|
533
|
+
vpaddw %ymm8, %ymm9, %ymm4
|
|
534
|
+
vpsubw %ymm8, %ymm9, %ymm8
|
|
535
|
+
vpaddw %ymm7, %ymm6, %ymm9
|
|
536
|
+
vpsubw %ymm7, %ymm6, %ymm7
|
|
537
|
+
vpaddw %ymm11, %ymm5, %ymm6
|
|
538
|
+
vpsubw %ymm11, %ymm5, %ymm11
|
|
539
|
+
vpsubw %ymm12, %ymm3, %ymm3
|
|
540
|
+
vpaddw %ymm12, %ymm10, %ymm10
|
|
541
|
+
vpsubw %ymm13, %ymm4, %ymm4
|
|
542
|
+
vpaddw %ymm13, %ymm8, %ymm8
|
|
543
|
+
vpsubw %ymm14, %ymm9, %ymm9
|
|
544
|
+
vpaddw %ymm14, %ymm7, %ymm7
|
|
545
|
+
vpsubw %ymm15, %ymm6, %ymm6
|
|
546
|
+
vpaddw %ymm15, %ymm11, %ymm11
|
|
547
|
+
vpslld $0x10, %ymm7, %ymm5
|
|
548
|
+
vpblendw $0xaa, %ymm5, %ymm9, %ymm5 # ymm5 = ymm9[0],ymm5[1],ymm9[2],ymm5[3],ymm9[4],ymm5[5],ymm9[6],ymm5[7],ymm9[8],ymm5[9],ymm9[10],ymm5[11],ymm9[12],ymm5[13],ymm9[14],ymm5[15]
|
|
549
|
+
vpsrld $0x10, %ymm9, %ymm9
|
|
550
|
+
vpblendw $0xaa, %ymm7, %ymm9, %ymm7 # ymm7 = ymm9[0],ymm7[1],ymm9[2],ymm7[3],ymm9[4],ymm7[5],ymm9[6],ymm7[7],ymm9[8],ymm7[9],ymm9[10],ymm7[11],ymm9[12],ymm7[13],ymm9[14],ymm7[15]
|
|
551
|
+
vpslld $0x10, %ymm11, %ymm9
|
|
552
|
+
vpblendw $0xaa, %ymm9, %ymm6, %ymm9 # ymm9 = ymm6[0],ymm9[1],ymm6[2],ymm9[3],ymm6[4],ymm9[5],ymm6[6],ymm9[7],ymm6[8],ymm9[9],ymm6[10],ymm9[11],ymm6[12],ymm9[13],ymm6[14],ymm9[15]
|
|
553
|
+
vpsrld $0x10, %ymm6, %ymm6
|
|
554
|
+
vpblendw $0xaa, %ymm11, %ymm6, %ymm11 # ymm11 = ymm6[0],ymm11[1],ymm6[2],ymm11[3],ymm6[4],ymm11[5],ymm6[6],ymm11[7],ymm6[8],ymm11[9],ymm6[10],ymm11[11],ymm6[12],ymm11[13],ymm6[14],ymm11[15]
|
|
555
|
+
vmovdqa 0x320(%rsi), %ymm15
|
|
556
|
+
vmovdqa 0x340(%rsi), %ymm2
|
|
557
|
+
vpmullw %ymm15, %ymm5, %ymm12
|
|
558
|
+
vpmullw %ymm15, %ymm7, %ymm13
|
|
559
|
+
vpmullw %ymm15, %ymm9, %ymm14
|
|
560
|
+
vpmullw %ymm15, %ymm11, %ymm15
|
|
561
|
+
vpmulhw %ymm2, %ymm5, %ymm5
|
|
562
|
+
vpmulhw %ymm2, %ymm7, %ymm7
|
|
563
|
+
vpmulhw %ymm2, %ymm9, %ymm9
|
|
564
|
+
vpmulhw %ymm2, %ymm11, %ymm11
|
|
565
|
+
vpslld $0x10, %ymm10, %ymm6
|
|
566
|
+
vpblendw $0xaa, %ymm6, %ymm3, %ymm6 # ymm6 = ymm3[0],ymm6[1],ymm3[2],ymm6[3],ymm3[4],ymm6[5],ymm3[6],ymm6[7],ymm3[8],ymm6[9],ymm3[10],ymm6[11],ymm3[12],ymm6[13],ymm3[14],ymm6[15]
|
|
567
|
+
vpsrld $0x10, %ymm3, %ymm3
|
|
568
|
+
vpblendw $0xaa, %ymm10, %ymm3, %ymm10 # ymm10 = ymm3[0],ymm10[1],ymm3[2],ymm10[3],ymm3[4],ymm10[5],ymm3[6],ymm10[7],ymm3[8],ymm10[9],ymm3[10],ymm10[11],ymm3[12],ymm10[13],ymm3[14],ymm10[15]
|
|
569
|
+
vpslld $0x10, %ymm8, %ymm3
|
|
570
|
+
vpblendw $0xaa, %ymm3, %ymm4, %ymm3 # ymm3 = ymm4[0],ymm3[1],ymm4[2],ymm3[3],ymm4[4],ymm3[5],ymm4[6],ymm3[7],ymm4[8],ymm3[9],ymm4[10],ymm3[11],ymm4[12],ymm3[13],ymm4[14],ymm3[15]
|
|
571
|
+
vpsrld $0x10, %ymm4, %ymm4
|
|
572
|
+
vpblendw $0xaa, %ymm8, %ymm4, %ymm8 # ymm8 = ymm4[0],ymm8[1],ymm4[2],ymm8[3],ymm4[4],ymm8[5],ymm4[6],ymm8[7],ymm4[8],ymm8[9],ymm4[10],ymm8[11],ymm4[12],ymm8[13],ymm4[14],ymm8[15]
|
|
573
|
+
vpmulhw %ymm0, %ymm12, %ymm12
|
|
574
|
+
vpmulhw %ymm0, %ymm13, %ymm13
|
|
575
|
+
vpmulhw %ymm0, %ymm14, %ymm14
|
|
576
|
+
vpmulhw %ymm0, %ymm15, %ymm15
|
|
577
|
+
vpaddw %ymm5, %ymm6, %ymm4
|
|
578
|
+
vpsubw %ymm5, %ymm6, %ymm5
|
|
579
|
+
vpaddw %ymm7, %ymm10, %ymm6
|
|
580
|
+
vpsubw %ymm7, %ymm10, %ymm7
|
|
581
|
+
vpaddw %ymm9, %ymm3, %ymm10
|
|
582
|
+
vpsubw %ymm9, %ymm3, %ymm9
|
|
583
|
+
vpaddw %ymm11, %ymm8, %ymm3
|
|
584
|
+
vpsubw %ymm11, %ymm8, %ymm11
|
|
585
|
+
vpsubw %ymm12, %ymm4, %ymm4
|
|
586
|
+
vpaddw %ymm12, %ymm5, %ymm5
|
|
587
|
+
vpsubw %ymm13, %ymm6, %ymm6
|
|
588
|
+
vpaddw %ymm13, %ymm7, %ymm7
|
|
589
|
+
vpsubw %ymm14, %ymm10, %ymm10
|
|
590
|
+
vpaddw %ymm14, %ymm9, %ymm9
|
|
591
|
+
vpsubw %ymm15, %ymm3, %ymm3
|
|
592
|
+
vpaddw %ymm15, %ymm11, %ymm11
|
|
593
|
+
vmovdqa 0x360(%rsi), %ymm14
|
|
594
|
+
vmovdqa 0x3a0(%rsi), %ymm15
|
|
595
|
+
vmovdqa 0x380(%rsi), %ymm8
|
|
596
|
+
vmovdqa 0x3c0(%rsi), %ymm2
|
|
597
|
+
vpmullw %ymm14, %ymm10, %ymm12
|
|
598
|
+
vpmullw %ymm14, %ymm3, %ymm13
|
|
599
|
+
vpmullw %ymm15, %ymm9, %ymm14
|
|
600
|
+
vpmullw %ymm15, %ymm11, %ymm15
|
|
601
|
+
vpmulhw %ymm8, %ymm10, %ymm10
|
|
602
|
+
vpmulhw %ymm8, %ymm3, %ymm3
|
|
603
|
+
vpmulhw %ymm2, %ymm9, %ymm9
|
|
604
|
+
vpmulhw %ymm2, %ymm11, %ymm11
|
|
605
|
+
vpmulhw %ymm0, %ymm12, %ymm12
|
|
606
|
+
vpmulhw %ymm0, %ymm13, %ymm13
|
|
607
|
+
vpmulhw %ymm0, %ymm14, %ymm14
|
|
608
|
+
vpmulhw %ymm0, %ymm15, %ymm15
|
|
609
|
+
vpaddw %ymm10, %ymm4, %ymm8
|
|
610
|
+
vpsubw %ymm10, %ymm4, %ymm10
|
|
611
|
+
vpaddw %ymm3, %ymm6, %ymm4
|
|
612
|
+
vpsubw %ymm3, %ymm6, %ymm3
|
|
613
|
+
vpaddw %ymm9, %ymm5, %ymm6
|
|
614
|
+
vpsubw %ymm9, %ymm5, %ymm9
|
|
615
|
+
vpaddw %ymm11, %ymm7, %ymm5
|
|
616
|
+
vpsubw %ymm11, %ymm7, %ymm11
|
|
617
|
+
vpsubw %ymm12, %ymm8, %ymm8
|
|
618
|
+
vpaddw %ymm12, %ymm10, %ymm10
|
|
619
|
+
vpsubw %ymm13, %ymm4, %ymm4
|
|
620
|
+
vpaddw %ymm13, %ymm3, %ymm3
|
|
621
|
+
vpsubw %ymm14, %ymm6, %ymm6
|
|
622
|
+
vpaddw %ymm14, %ymm9, %ymm9
|
|
623
|
+
vpsubw %ymm15, %ymm5, %ymm5
|
|
624
|
+
vpaddw %ymm15, %ymm11, %ymm11
|
|
625
|
+
vmovdqa %ymm8, 0x100(%rdi)
|
|
626
|
+
vmovdqa %ymm4, 0x120(%rdi)
|
|
627
|
+
vmovdqa %ymm10, 0x140(%rdi)
|
|
628
|
+
vmovdqa %ymm3, 0x160(%rdi)
|
|
629
|
+
vmovdqa %ymm6, 0x180(%rdi)
|
|
630
|
+
vmovdqa %ymm5, 0x1a0(%rdi)
|
|
631
|
+
vmovdqa %ymm9, 0x1c0(%rdi)
|
|
632
|
+
vmovdqa %ymm11, 0x1e0(%rdi)
|
|
633
|
+
retq
|
|
634
|
+
.cfi_endproc
|
|
635
|
+
|
|
636
|
+
MLK_ASM_FN_SIZE(ntt_avx2)
|
|
637
|
+
|
|
638
|
+
#endif /* MLK_ARITH_BACKEND_X86_64_DEFAULT && !MLK_CONFIG_MULTILEVEL_NO_SHARED \
|
|
639
|
+
*/
|