pq_crypto 0.4.2 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +27 -2
- data/CHANGELOG.md +59 -0
- data/GET_STARTED.md +21 -16
- data/README.md +26 -0
- data/SECURITY.md +22 -16
- data/ext/pqcrypto/extconf.rb +183 -99
- data/ext/pqcrypto/mldsa_api.h +1 -118
- data/ext/pqcrypto/mlkem_api.h +1 -42
- data/ext/pqcrypto/pq_externalmu.c +88 -216
- data/ext/pqcrypto/pqcrypto_native_api.h +132 -0
- data/ext/pqcrypto/pqcrypto_ruby_secure.c +234 -12
- data/ext/pqcrypto/pqcrypto_secure.c +429 -334
- data/ext/pqcrypto/pqcrypto_secure.h +13 -45
- data/ext/pqcrypto/pqcrypto_version.h +1 -1
- data/ext/pqcrypto/randombytes.h +9 -0
- data/ext/pqcrypto/vendor/.vendored +12 -5
- data/ext/pqcrypto/vendor/mldsa-native/BUILDING.md +105 -0
- data/ext/pqcrypto/vendor/mldsa-native/LICENSE +286 -0
- data/ext/pqcrypto/vendor/mldsa-native/META.yml +24 -0
- data/ext/pqcrypto/vendor/mldsa-native/README.md +221 -0
- data/ext/pqcrypto/vendor/mldsa-native/SECURITY.md +8 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.c +721 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.h +975 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_asm.S +724 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_config.h +723 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/cbmc.h +166 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/common.h +321 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/ct.c +21 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/ct.h +385 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/debug.c +73 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/debug.h +130 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.c +277 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.h +244 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.c +182 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.h +117 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.c +438 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.h +105 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/auto.h +71 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/fips202_native_aarch64.h +62 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_asm.S +376 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_asm.S +204 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_asm.S +259 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_asm.S +1077 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S +987 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccakf1600_round_constants.c +41 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_scalar.h +26 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_v84a.h +35 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x2_v84a.h +37 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_scalar.h +27 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_v84a_scalar.h +36 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/api.h +69 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/README.md +10 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/mve.h +32 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/fips202_native_armv81m.h +20 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S +638 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.c +136 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccakf1600_round_constants.c +52 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/auto.h +29 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.c +488 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.h +16 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/xkcp.h +31 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/meta.h +247 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/aarch64_zetas.c +231 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/arith_native_aarch64.h +150 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/intt.S +753 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l4.S +129 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l5.S +145 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l7.S +177 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/ntt.S +653 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/pointwise_montgomery.S +79 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_caddq_asm.S +53 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_chknorm_asm.S +55 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_32_asm.S +85 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_88_asm.S +85 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_32_asm.S +102 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_88_asm.S +110 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_17_asm.S +72 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_19_asm.S +69 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_table.c +40 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_asm.S +189 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta2_asm.S +135 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta4_asm.S +128 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta_table.c +543 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_table.c +62 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/api.h +649 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/meta.h +23 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/meta.h +315 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/arith_native_x86_64.h +124 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.c +157 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.h +27 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/intt.S +2311 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/ntt.S +2383 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/nttunpack.S +239 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise.S +131 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l4.S +139 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l5.S +155 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l7.S +187 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_caddq_avx2.c +61 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_chknorm_avx2.c +52 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_32_avx2.c +155 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_88_avx2.c +155 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_32_avx2.c +102 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_88_avx2.c +104 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_17_avx2.c +91 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_19_avx2.c +93 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_avx2.c +126 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta2_avx2.c +155 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta4_avx2.c +139 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_table.c +160 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.c +293 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.h +224 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/params.h +77 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.c +991 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.h +393 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.c +946 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.h +360 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.c +877 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.h +725 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/randombytes.h +26 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/reduce.h +139 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/rounding.h +249 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.c +1511 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.h +806 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/symmetric.h +68 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sys.h +268 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/zetas.inc +55 -0
- data/ext/pqcrypto/vendor/mlkem-native/BUILDING.md +104 -0
- data/ext/pqcrypto/vendor/mlkem-native/LICENSE +294 -0
- data/ext/pqcrypto/vendor/mlkem-native/META.yml +30 -0
- data/ext/pqcrypto/vendor/mlkem-native/README.md +223 -0
- data/ext/pqcrypto/vendor/mlkem-native/RELEASE.md +86 -0
- data/ext/pqcrypto/vendor/mlkem-native/SECURITY.md +8 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/README.md +23 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native.c +660 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native.h +538 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native_asm.S +681 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native_config.h +709 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/cbmc.h +174 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/common.h +274 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/compress.c +717 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/compress.h +688 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/debug.c +64 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/debug.h +128 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202.c +251 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202.h +158 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202x4.c +208 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202x4.h +80 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/keccakf1600.c +463 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/keccakf1600.h +98 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/auto.h +70 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/fips202_native_aarch64.h +69 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_asm.S +375 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_asm.S +203 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_asm.S +258 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_asm.S +1076 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S +986 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccakf1600_round_constants.c +46 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x1_scalar.h +25 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x1_v84a.h +34 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x2_v84a.h +35 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x4_v8a_scalar.h +26 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x4_v8a_v84a_scalar.h +35 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/api.h +117 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/README.md +10 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/mve.h +79 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/fips202_native_armv81m.h +35 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S +667 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.c +40 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/keccakf1600_round_constants.c +51 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/state_extract_bytes_x4_mve.S +290 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/state_xor_bytes_x4_mve.S +314 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/auto.h +28 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/keccak_f1600_x4_avx2.h +33 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/src/fips202_native_x86_64.h +41 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/src/keccak_f1600_x4_avx2.S +451 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/src/keccakf1600_constants.c +51 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/indcpa.c +622 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/indcpa.h +156 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/kem.c +446 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/kem.h +326 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/README.md +16 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/meta.h +122 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/aarch64_zetas.c +174 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/arith_native_aarch64.h +177 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/intt.S +628 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/ntt.S +562 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_mulcache_compute_asm.S +127 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_reduce_asm.S +150 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_tobytes_asm.S +117 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_tomont_asm.S +98 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k2.S +261 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k3.S +314 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k4.S +368 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/rej_uniform_asm.S +226 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/rej_uniform_table.c +542 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/api.h +637 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/meta.h +25 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/README.md +11 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/meta.h +128 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/arith_native_riscv64.h +45 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_debug.c +81 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_debug.h +145 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_izetas.inc +27 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_poly.c +805 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_zetas.inc +27 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_zetas_basemul.inc +39 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/README.md +4 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/meta.h +304 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/arith_native_x86_64.h +309 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/compress_consts.c +94 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/compress_consts.h +45 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/consts.c +102 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/consts.h +25 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/intt.S +719 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/mulcache_compute.S +90 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/ntt.S +639 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/nttfrombytes.S +193 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/ntttobytes.S +181 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/nttunpack.S +174 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d10.S +382 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d11.S +448 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d4.S +163 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d5.S +220 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d10.S +228 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d11.S +277 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d4.S +180 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d5.S +192 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/polyvec_basemul_acc_montgomery_cached_asm_k2.S +502 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/polyvec_basemul_acc_montgomery_cached_asm_k3.S +750 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/polyvec_basemul_acc_montgomery_cached_asm_k4.S +998 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/reduce.S +218 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/rej_uniform_asm.S +103 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/rej_uniform_table.c +544 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/tomont.S +155 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/params.h +76 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly.c +572 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly.h +317 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly_k.c +502 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly_k.h +668 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/randombytes.h +60 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/sampling.c +362 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/sampling.h +118 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/symmetric.h +70 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/sys.h +260 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/verify.c +20 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/verify.h +464 -0
- data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/zetas.inc +30 -0
- data/lib/pq_crypto/hybrid_kem.rb +10 -1
- data/lib/pq_crypto/version.rb +1 -1
- data/lib/pq_crypto.rb +5 -1
- data/script/vendor_libs.rb +228 -154
- metadata +236 -160
- data/ext/pqcrypto/vendor/pqclean/common/aes.c +0 -639
- data/ext/pqcrypto/vendor/pqclean/common/aes.h +0 -64
- data/ext/pqcrypto/vendor/pqclean/common/compat.h +0 -73
- data/ext/pqcrypto/vendor/pqclean/common/crypto_declassify.h +0 -7
- data/ext/pqcrypto/vendor/pqclean/common/fips202.c +0 -928
- data/ext/pqcrypto/vendor/pqclean/common/fips202.h +0 -166
- data/ext/pqcrypto/vendor/pqclean/common/keccak2x/feat.S +0 -168
- data/ext/pqcrypto/vendor/pqclean/common/keccak2x/fips202x2.c +0 -684
- data/ext/pqcrypto/vendor/pqclean/common/keccak2x/fips202x2.h +0 -60
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/KeccakP-1600-times4-SIMD256.c +0 -1028
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/KeccakP-1600-times4-SnP.h +0 -50
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/KeccakP-1600-unrolling.macros +0 -198
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/Makefile +0 -8
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/Makefile.Microsoft_nmake +0 -8
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/SIMD256-config.h +0 -3
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/align.h +0 -34
- data/ext/pqcrypto/vendor/pqclean/common/keccak4x/brg_endian.h +0 -142
- data/ext/pqcrypto/vendor/pqclean/common/nistseedexpander.c +0 -101
- data/ext/pqcrypto/vendor/pqclean/common/nistseedexpander.h +0 -39
- data/ext/pqcrypto/vendor/pqclean/common/randombytes.c +0 -355
- data/ext/pqcrypto/vendor/pqclean/common/randombytes.h +0 -27
- data/ext/pqcrypto/vendor/pqclean/common/sha2.c +0 -769
- data/ext/pqcrypto/vendor/pqclean/common/sha2.h +0 -173
- data/ext/pqcrypto/vendor/pqclean/common/sp800-185.c +0 -156
- data/ext/pqcrypto/vendor/pqclean/common/sp800-185.h +0 -27
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/LICENSE +0 -5
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/Makefile +0 -19
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/Makefile.Microsoft_nmake +0 -23
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/api.h +0 -18
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/cbd.c +0 -83
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/cbd.h +0 -11
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/indcpa.c +0 -327
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/indcpa.h +0 -22
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/kem.c +0 -164
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/kem.h +0 -23
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/ntt.c +0 -146
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/ntt.h +0 -14
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/params.h +0 -36
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/poly.c +0 -311
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/poly.h +0 -37
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/polyvec.c +0 -198
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/polyvec.h +0 -26
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/reduce.c +0 -41
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/reduce.h +0 -13
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/symmetric-shake.c +0 -71
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/symmetric.h +0 -30
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/verify.c +0 -67
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/verify.h +0 -13
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/LICENSE +0 -5
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/Makefile +0 -19
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/Makefile.Microsoft_nmake +0 -23
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/api.h +0 -18
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/cbd.c +0 -108
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/cbd.h +0 -11
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/indcpa.c +0 -327
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/indcpa.h +0 -22
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/kem.c +0 -164
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/kem.h +0 -23
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/ntt.c +0 -146
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/ntt.h +0 -14
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/params.h +0 -36
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/poly.c +0 -299
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/poly.h +0 -37
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/polyvec.c +0 -188
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/polyvec.h +0 -26
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/reduce.c +0 -41
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/reduce.h +0 -13
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/symmetric-shake.c +0 -71
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/symmetric.h +0 -30
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/verify.c +0 -67
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/verify.h +0 -13
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/LICENSE +0 -5
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/Makefile +0 -19
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/Makefile.Microsoft_nmake +0 -23
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/api.h +0 -18
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/cbd.c +0 -83
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/cbd.h +0 -11
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/indcpa.c +0 -327
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/indcpa.h +0 -22
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/kem.c +0 -164
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/kem.h +0 -23
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/ntt.c +0 -146
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/ntt.h +0 -14
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/params.h +0 -36
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/poly.c +0 -299
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/poly.h +0 -37
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/polyvec.c +0 -188
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/polyvec.h +0 -26
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/reduce.c +0 -41
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/reduce.h +0 -13
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/symmetric-shake.c +0 -71
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/symmetric.h +0 -30
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/verify.c +0 -67
- data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/verify.h +0 -13
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/LICENSE +0 -5
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/Makefile +0 -19
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/Makefile.Microsoft_nmake +0 -23
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/api.h +0 -50
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/ntt.c +0 -98
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/ntt.h +0 -10
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/packing.c +0 -261
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/packing.h +0 -31
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/params.h +0 -44
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/poly.c +0 -848
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/poly.h +0 -52
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/polyvec.c +0 -415
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/polyvec.h +0 -65
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/reduce.c +0 -69
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/reduce.h +0 -17
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/rounding.c +0 -98
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/rounding.h +0 -14
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/sign.c +0 -407
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/sign.h +0 -47
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/symmetric-shake.c +0 -26
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/symmetric.h +0 -34
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/LICENSE +0 -5
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/Makefile +0 -19
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/Makefile.Microsoft_nmake +0 -23
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/api.h +0 -50
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/ntt.c +0 -98
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/ntt.h +0 -10
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/packing.c +0 -261
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/packing.h +0 -31
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/params.h +0 -44
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/poly.c +0 -799
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/poly.h +0 -52
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/polyvec.c +0 -415
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/polyvec.h +0 -65
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/reduce.c +0 -69
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/reduce.h +0 -17
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/rounding.c +0 -92
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/rounding.h +0 -14
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/sign.c +0 -407
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/sign.h +0 -47
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/symmetric-shake.c +0 -26
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/symmetric.h +0 -34
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/LICENSE +0 -5
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/Makefile +0 -19
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/Makefile.Microsoft_nmake +0 -23
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/api.h +0 -50
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/ntt.c +0 -98
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/ntt.h +0 -10
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/packing.c +0 -261
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/packing.h +0 -31
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/params.h +0 -44
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/poly.c +0 -823
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/poly.h +0 -52
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/polyvec.c +0 -415
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/polyvec.h +0 -65
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/reduce.c +0 -69
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/reduce.h +0 -17
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/rounding.c +0 -92
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/rounding.h +0 -14
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/sign.c +0 -407
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/sign.h +0 -47
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/symmetric-shake.c +0 -26
- data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/symmetric.h +0 -34
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) The mlkem-native project authors
|
|
3
|
+
* SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
/* References
|
|
7
|
+
* ==========
|
|
8
|
+
*
|
|
9
|
+
* - [REF_AVX2]
|
|
10
|
+
* CRYSTALS-Kyber optimized AVX2 implementation
|
|
11
|
+
* Bos, Ducas, Kiltz, Lepoint, Lyubashevsky, Schanck, Schwabe, Seiler, Stehlé
|
|
12
|
+
* https://github.com/pq-crystals/kyber/tree/main/avx2
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
/*
|
|
16
|
+
* This file is derived from the public domain
|
|
17
|
+
* AVX2 Kyber implementation @[REF_AVX2].
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#include "../../../common.h"
|
|
21
|
+
|
|
22
|
+
#if defined(MLK_ARITH_BACKEND_X86_64_DEFAULT) && \
|
|
23
|
+
!defined(MLK_CONFIG_MULTILEVEL_NO_SHARED)
|
|
24
|
+
|
|
25
|
+
/*
|
|
26
|
+
* WARNING: This file is auto-derived from the mlkem-native source file
|
|
27
|
+
* dev/x86_64/src/nttfrombytes.S using scripts/simpasm. Do not modify it directly.
|
|
28
|
+
*/
|
|
29
|
+
|
|
30
|
+
#if defined(__ELF__)
|
|
31
|
+
.section .note.GNU-stack,"",@progbits
|
|
32
|
+
#endif
|
|
33
|
+
|
|
34
|
+
.text
|
|
35
|
+
.balign 4
|
|
36
|
+
.global MLK_ASM_NAMESPACE(nttfrombytes_avx2)
|
|
37
|
+
MLK_ASM_FN_SYMBOL(nttfrombytes_avx2)
|
|
38
|
+
|
|
39
|
+
.cfi_startproc
|
|
40
|
+
movl $0xfff0fff, %eax # imm = 0xFFF0FFF
|
|
41
|
+
vmovd %eax, %xmm0
|
|
42
|
+
vpbroadcastd %xmm0, %ymm0
|
|
43
|
+
vmovdqu (%rsi), %ymm4
|
|
44
|
+
vmovdqu 0x20(%rsi), %ymm5
|
|
45
|
+
vmovdqu 0x40(%rsi), %ymm6
|
|
46
|
+
vmovdqu 0x60(%rsi), %ymm7
|
|
47
|
+
vmovdqu 0x80(%rsi), %ymm8
|
|
48
|
+
vmovdqu 0xa0(%rsi), %ymm9
|
|
49
|
+
vperm2i128 $0x20, %ymm7, %ymm4, %ymm3 # ymm3 = ymm4[0,1],ymm7[0,1]
|
|
50
|
+
vperm2i128 $0x31, %ymm7, %ymm4, %ymm7 # ymm7 = ymm4[2,3],ymm7[2,3]
|
|
51
|
+
vperm2i128 $0x20, %ymm8, %ymm5, %ymm4 # ymm4 = ymm5[0,1],ymm8[0,1]
|
|
52
|
+
vperm2i128 $0x31, %ymm8, %ymm5, %ymm8 # ymm8 = ymm5[2,3],ymm8[2,3]
|
|
53
|
+
vperm2i128 $0x20, %ymm9, %ymm6, %ymm5 # ymm5 = ymm6[0,1],ymm9[0,1]
|
|
54
|
+
vperm2i128 $0x31, %ymm9, %ymm6, %ymm9 # ymm9 = ymm6[2,3],ymm9[2,3]
|
|
55
|
+
vpunpcklqdq %ymm8, %ymm3, %ymm6 # ymm6 = ymm3[0],ymm8[0],ymm3[2],ymm8[2]
|
|
56
|
+
vpunpckhqdq %ymm8, %ymm3, %ymm8 # ymm8 = ymm3[1],ymm8[1],ymm3[3],ymm8[3]
|
|
57
|
+
vpunpcklqdq %ymm5, %ymm7, %ymm3 # ymm3 = ymm7[0],ymm5[0],ymm7[2],ymm5[2]
|
|
58
|
+
vpunpckhqdq %ymm5, %ymm7, %ymm5 # ymm5 = ymm7[1],ymm5[1],ymm7[3],ymm5[3]
|
|
59
|
+
vpunpcklqdq %ymm9, %ymm4, %ymm7 # ymm7 = ymm4[0],ymm9[0],ymm4[2],ymm9[2]
|
|
60
|
+
vpunpckhqdq %ymm9, %ymm4, %ymm9 # ymm9 = ymm4[1],ymm9[1],ymm4[3],ymm9[3]
|
|
61
|
+
vmovsldup %ymm5, %ymm4 # ymm4 = ymm5[0,0,2,2,4,4,6,6]
|
|
62
|
+
vpblendd $0xaa, %ymm4, %ymm6, %ymm4 # ymm4 = ymm6[0],ymm4[1],ymm6[2],ymm4[3],ymm6[4],ymm4[5],ymm6[6],ymm4[7]
|
|
63
|
+
vpsrlq $0x20, %ymm6, %ymm6
|
|
64
|
+
vpblendd $0xaa, %ymm5, %ymm6, %ymm5 # ymm5 = ymm6[0],ymm5[1],ymm6[2],ymm5[3],ymm6[4],ymm5[5],ymm6[6],ymm5[7]
|
|
65
|
+
vmovsldup %ymm7, %ymm6 # ymm6 = ymm7[0,0,2,2,4,4,6,6]
|
|
66
|
+
vpblendd $0xaa, %ymm6, %ymm8, %ymm6 # ymm6 = ymm8[0],ymm6[1],ymm8[2],ymm6[3],ymm8[4],ymm6[5],ymm8[6],ymm6[7]
|
|
67
|
+
vpsrlq $0x20, %ymm8, %ymm8
|
|
68
|
+
vpblendd $0xaa, %ymm7, %ymm8, %ymm7 # ymm7 = ymm8[0],ymm7[1],ymm8[2],ymm7[3],ymm8[4],ymm7[5],ymm8[6],ymm7[7]
|
|
69
|
+
vmovsldup %ymm9, %ymm8 # ymm8 = ymm9[0,0,2,2,4,4,6,6]
|
|
70
|
+
vpblendd $0xaa, %ymm8, %ymm3, %ymm8 # ymm8 = ymm3[0],ymm8[1],ymm3[2],ymm8[3],ymm3[4],ymm8[5],ymm3[6],ymm8[7]
|
|
71
|
+
vpsrlq $0x20, %ymm3, %ymm3
|
|
72
|
+
vpblendd $0xaa, %ymm9, %ymm3, %ymm9 # ymm9 = ymm3[0],ymm9[1],ymm3[2],ymm9[3],ymm3[4],ymm9[5],ymm3[6],ymm9[7]
|
|
73
|
+
vpslld $0x10, %ymm7, %ymm10
|
|
74
|
+
vpblendw $0xaa, %ymm10, %ymm4, %ymm10 # ymm10 = ymm4[0],ymm10[1],ymm4[2],ymm10[3],ymm4[4],ymm10[5],ymm4[6],ymm10[7],ymm4[8],ymm10[9],ymm4[10],ymm10[11],ymm4[12],ymm10[13],ymm4[14],ymm10[15]
|
|
75
|
+
vpsrld $0x10, %ymm4, %ymm4
|
|
76
|
+
vpblendw $0xaa, %ymm7, %ymm4, %ymm7 # ymm7 = ymm4[0],ymm7[1],ymm4[2],ymm7[3],ymm4[4],ymm7[5],ymm4[6],ymm7[7],ymm4[8],ymm7[9],ymm4[10],ymm7[11],ymm4[12],ymm7[13],ymm4[14],ymm7[15]
|
|
77
|
+
vpslld $0x10, %ymm8, %ymm4
|
|
78
|
+
vpblendw $0xaa, %ymm4, %ymm5, %ymm4 # ymm4 = ymm5[0],ymm4[1],ymm5[2],ymm4[3],ymm5[4],ymm4[5],ymm5[6],ymm4[7],ymm5[8],ymm4[9],ymm5[10],ymm4[11],ymm5[12],ymm4[13],ymm5[14],ymm4[15]
|
|
79
|
+
vpsrld $0x10, %ymm5, %ymm5
|
|
80
|
+
vpblendw $0xaa, %ymm8, %ymm5, %ymm8 # ymm8 = ymm5[0],ymm8[1],ymm5[2],ymm8[3],ymm5[4],ymm8[5],ymm5[6],ymm8[7],ymm5[8],ymm8[9],ymm5[10],ymm8[11],ymm5[12],ymm8[13],ymm5[14],ymm8[15]
|
|
81
|
+
vpslld $0x10, %ymm9, %ymm5
|
|
82
|
+
vpblendw $0xaa, %ymm5, %ymm6, %ymm5 # ymm5 = ymm6[0],ymm5[1],ymm6[2],ymm5[3],ymm6[4],ymm5[5],ymm6[6],ymm5[7],ymm6[8],ymm5[9],ymm6[10],ymm5[11],ymm6[12],ymm5[13],ymm6[14],ymm5[15]
|
|
83
|
+
vpsrld $0x10, %ymm6, %ymm6
|
|
84
|
+
vpblendw $0xaa, %ymm9, %ymm6, %ymm9 # ymm9 = ymm6[0],ymm9[1],ymm6[2],ymm9[3],ymm6[4],ymm9[5],ymm6[6],ymm9[7],ymm6[8],ymm9[9],ymm6[10],ymm9[11],ymm6[12],ymm9[13],ymm6[14],ymm9[15]
|
|
85
|
+
vpsrlw $0xc, %ymm10, %ymm11
|
|
86
|
+
vpsllw $0x4, %ymm7, %ymm12
|
|
87
|
+
vpor %ymm11, %ymm12, %ymm11
|
|
88
|
+
vpand %ymm0, %ymm10, %ymm10
|
|
89
|
+
vpand %ymm0, %ymm11, %ymm11
|
|
90
|
+
vpsrlw $0x8, %ymm7, %ymm12
|
|
91
|
+
vpsllw $0x8, %ymm4, %ymm13
|
|
92
|
+
vpor %ymm12, %ymm13, %ymm12
|
|
93
|
+
vpand %ymm0, %ymm12, %ymm12
|
|
94
|
+
vpsrlw $0x4, %ymm4, %ymm13
|
|
95
|
+
vpand %ymm0, %ymm13, %ymm13
|
|
96
|
+
vpsrlw $0xc, %ymm8, %ymm14
|
|
97
|
+
vpsllw $0x4, %ymm5, %ymm15
|
|
98
|
+
vpor %ymm14, %ymm15, %ymm14
|
|
99
|
+
vpand %ymm0, %ymm8, %ymm8
|
|
100
|
+
vpand %ymm0, %ymm14, %ymm14
|
|
101
|
+
vpsrlw $0x8, %ymm5, %ymm15
|
|
102
|
+
vpsllw $0x8, %ymm9, %ymm1
|
|
103
|
+
vpor %ymm15, %ymm1, %ymm15
|
|
104
|
+
vpand %ymm0, %ymm15, %ymm15
|
|
105
|
+
vpsrlw $0x4, %ymm9, %ymm1
|
|
106
|
+
vpand %ymm0, %ymm1, %ymm1
|
|
107
|
+
vmovdqa %ymm10, (%rdi)
|
|
108
|
+
vmovdqa %ymm11, 0x20(%rdi)
|
|
109
|
+
vmovdqa %ymm12, 0x40(%rdi)
|
|
110
|
+
vmovdqa %ymm13, 0x60(%rdi)
|
|
111
|
+
vmovdqa %ymm8, 0x80(%rdi)
|
|
112
|
+
vmovdqa %ymm14, 0xa0(%rdi)
|
|
113
|
+
vmovdqa %ymm15, 0xc0(%rdi)
|
|
114
|
+
vmovdqa %ymm1, 0xe0(%rdi)
|
|
115
|
+
vmovdqu 0xc0(%rsi), %ymm4
|
|
116
|
+
vmovdqu 0xe0(%rsi), %ymm5
|
|
117
|
+
vmovdqu 0x100(%rsi), %ymm6
|
|
118
|
+
vmovdqu 0x120(%rsi), %ymm7
|
|
119
|
+
vmovdqu 0x140(%rsi), %ymm8
|
|
120
|
+
vmovdqu 0x160(%rsi), %ymm9
|
|
121
|
+
vperm2i128 $0x20, %ymm7, %ymm4, %ymm3 # ymm3 = ymm4[0,1],ymm7[0,1]
|
|
122
|
+
vperm2i128 $0x31, %ymm7, %ymm4, %ymm7 # ymm7 = ymm4[2,3],ymm7[2,3]
|
|
123
|
+
vperm2i128 $0x20, %ymm8, %ymm5, %ymm4 # ymm4 = ymm5[0,1],ymm8[0,1]
|
|
124
|
+
vperm2i128 $0x31, %ymm8, %ymm5, %ymm8 # ymm8 = ymm5[2,3],ymm8[2,3]
|
|
125
|
+
vperm2i128 $0x20, %ymm9, %ymm6, %ymm5 # ymm5 = ymm6[0,1],ymm9[0,1]
|
|
126
|
+
vperm2i128 $0x31, %ymm9, %ymm6, %ymm9 # ymm9 = ymm6[2,3],ymm9[2,3]
|
|
127
|
+
vpunpcklqdq %ymm8, %ymm3, %ymm6 # ymm6 = ymm3[0],ymm8[0],ymm3[2],ymm8[2]
|
|
128
|
+
vpunpckhqdq %ymm8, %ymm3, %ymm8 # ymm8 = ymm3[1],ymm8[1],ymm3[3],ymm8[3]
|
|
129
|
+
vpunpcklqdq %ymm5, %ymm7, %ymm3 # ymm3 = ymm7[0],ymm5[0],ymm7[2],ymm5[2]
|
|
130
|
+
vpunpckhqdq %ymm5, %ymm7, %ymm5 # ymm5 = ymm7[1],ymm5[1],ymm7[3],ymm5[3]
|
|
131
|
+
vpunpcklqdq %ymm9, %ymm4, %ymm7 # ymm7 = ymm4[0],ymm9[0],ymm4[2],ymm9[2]
|
|
132
|
+
vpunpckhqdq %ymm9, %ymm4, %ymm9 # ymm9 = ymm4[1],ymm9[1],ymm4[3],ymm9[3]
|
|
133
|
+
vmovsldup %ymm5, %ymm4 # ymm4 = ymm5[0,0,2,2,4,4,6,6]
|
|
134
|
+
vpblendd $0xaa, %ymm4, %ymm6, %ymm4 # ymm4 = ymm6[0],ymm4[1],ymm6[2],ymm4[3],ymm6[4],ymm4[5],ymm6[6],ymm4[7]
|
|
135
|
+
vpsrlq $0x20, %ymm6, %ymm6
|
|
136
|
+
vpblendd $0xaa, %ymm5, %ymm6, %ymm5 # ymm5 = ymm6[0],ymm5[1],ymm6[2],ymm5[3],ymm6[4],ymm5[5],ymm6[6],ymm5[7]
|
|
137
|
+
vmovsldup %ymm7, %ymm6 # ymm6 = ymm7[0,0,2,2,4,4,6,6]
|
|
138
|
+
vpblendd $0xaa, %ymm6, %ymm8, %ymm6 # ymm6 = ymm8[0],ymm6[1],ymm8[2],ymm6[3],ymm8[4],ymm6[5],ymm8[6],ymm6[7]
|
|
139
|
+
vpsrlq $0x20, %ymm8, %ymm8
|
|
140
|
+
vpblendd $0xaa, %ymm7, %ymm8, %ymm7 # ymm7 = ymm8[0],ymm7[1],ymm8[2],ymm7[3],ymm8[4],ymm7[5],ymm8[6],ymm7[7]
|
|
141
|
+
vmovsldup %ymm9, %ymm8 # ymm8 = ymm9[0,0,2,2,4,4,6,6]
|
|
142
|
+
vpblendd $0xaa, %ymm8, %ymm3, %ymm8 # ymm8 = ymm3[0],ymm8[1],ymm3[2],ymm8[3],ymm3[4],ymm8[5],ymm3[6],ymm8[7]
|
|
143
|
+
vpsrlq $0x20, %ymm3, %ymm3
|
|
144
|
+
vpblendd $0xaa, %ymm9, %ymm3, %ymm9 # ymm9 = ymm3[0],ymm9[1],ymm3[2],ymm9[3],ymm3[4],ymm9[5],ymm3[6],ymm9[7]
|
|
145
|
+
vpslld $0x10, %ymm7, %ymm10
|
|
146
|
+
vpblendw $0xaa, %ymm10, %ymm4, %ymm10 # ymm10 = ymm4[0],ymm10[1],ymm4[2],ymm10[3],ymm4[4],ymm10[5],ymm4[6],ymm10[7],ymm4[8],ymm10[9],ymm4[10],ymm10[11],ymm4[12],ymm10[13],ymm4[14],ymm10[15]
|
|
147
|
+
vpsrld $0x10, %ymm4, %ymm4
|
|
148
|
+
vpblendw $0xaa, %ymm7, %ymm4, %ymm7 # ymm7 = ymm4[0],ymm7[1],ymm4[2],ymm7[3],ymm4[4],ymm7[5],ymm4[6],ymm7[7],ymm4[8],ymm7[9],ymm4[10],ymm7[11],ymm4[12],ymm7[13],ymm4[14],ymm7[15]
|
|
149
|
+
vpslld $0x10, %ymm8, %ymm4
|
|
150
|
+
vpblendw $0xaa, %ymm4, %ymm5, %ymm4 # ymm4 = ymm5[0],ymm4[1],ymm5[2],ymm4[3],ymm5[4],ymm4[5],ymm5[6],ymm4[7],ymm5[8],ymm4[9],ymm5[10],ymm4[11],ymm5[12],ymm4[13],ymm5[14],ymm4[15]
|
|
151
|
+
vpsrld $0x10, %ymm5, %ymm5
|
|
152
|
+
vpblendw $0xaa, %ymm8, %ymm5, %ymm8 # ymm8 = ymm5[0],ymm8[1],ymm5[2],ymm8[3],ymm5[4],ymm8[5],ymm5[6],ymm8[7],ymm5[8],ymm8[9],ymm5[10],ymm8[11],ymm5[12],ymm8[13],ymm5[14],ymm8[15]
|
|
153
|
+
vpslld $0x10, %ymm9, %ymm5
|
|
154
|
+
vpblendw $0xaa, %ymm5, %ymm6, %ymm5 # ymm5 = ymm6[0],ymm5[1],ymm6[2],ymm5[3],ymm6[4],ymm5[5],ymm6[6],ymm5[7],ymm6[8],ymm5[9],ymm6[10],ymm5[11],ymm6[12],ymm5[13],ymm6[14],ymm5[15]
|
|
155
|
+
vpsrld $0x10, %ymm6, %ymm6
|
|
156
|
+
vpblendw $0xaa, %ymm9, %ymm6, %ymm9 # ymm9 = ymm6[0],ymm9[1],ymm6[2],ymm9[3],ymm6[4],ymm9[5],ymm6[6],ymm9[7],ymm6[8],ymm9[9],ymm6[10],ymm9[11],ymm6[12],ymm9[13],ymm6[14],ymm9[15]
|
|
157
|
+
vpsrlw $0xc, %ymm10, %ymm11
|
|
158
|
+
vpsllw $0x4, %ymm7, %ymm12
|
|
159
|
+
vpor %ymm11, %ymm12, %ymm11
|
|
160
|
+
vpand %ymm0, %ymm10, %ymm10
|
|
161
|
+
vpand %ymm0, %ymm11, %ymm11
|
|
162
|
+
vpsrlw $0x8, %ymm7, %ymm12
|
|
163
|
+
vpsllw $0x8, %ymm4, %ymm13
|
|
164
|
+
vpor %ymm12, %ymm13, %ymm12
|
|
165
|
+
vpand %ymm0, %ymm12, %ymm12
|
|
166
|
+
vpsrlw $0x4, %ymm4, %ymm13
|
|
167
|
+
vpand %ymm0, %ymm13, %ymm13
|
|
168
|
+
vpsrlw $0xc, %ymm8, %ymm14
|
|
169
|
+
vpsllw $0x4, %ymm5, %ymm15
|
|
170
|
+
vpor %ymm14, %ymm15, %ymm14
|
|
171
|
+
vpand %ymm0, %ymm8, %ymm8
|
|
172
|
+
vpand %ymm0, %ymm14, %ymm14
|
|
173
|
+
vpsrlw $0x8, %ymm5, %ymm15
|
|
174
|
+
vpsllw $0x8, %ymm9, %ymm1
|
|
175
|
+
vpor %ymm15, %ymm1, %ymm15
|
|
176
|
+
vpand %ymm0, %ymm15, %ymm15
|
|
177
|
+
vpsrlw $0x4, %ymm9, %ymm1
|
|
178
|
+
vpand %ymm0, %ymm1, %ymm1
|
|
179
|
+
vmovdqa %ymm10, 0x100(%rdi)
|
|
180
|
+
vmovdqa %ymm11, 0x120(%rdi)
|
|
181
|
+
vmovdqa %ymm12, 0x140(%rdi)
|
|
182
|
+
vmovdqa %ymm13, 0x160(%rdi)
|
|
183
|
+
vmovdqa %ymm8, 0x180(%rdi)
|
|
184
|
+
vmovdqa %ymm14, 0x1a0(%rdi)
|
|
185
|
+
vmovdqa %ymm15, 0x1c0(%rdi)
|
|
186
|
+
vmovdqa %ymm1, 0x1e0(%rdi)
|
|
187
|
+
retq
|
|
188
|
+
.cfi_endproc
|
|
189
|
+
|
|
190
|
+
MLK_ASM_FN_SIZE(nttfrombytes_avx2)
|
|
191
|
+
|
|
192
|
+
#endif /* MLK_ARITH_BACKEND_X86_64_DEFAULT && !MLK_CONFIG_MULTILEVEL_NO_SHARED \
|
|
193
|
+
*/
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) The mlkem-native project authors
|
|
3
|
+
* SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
/* References
|
|
7
|
+
* ==========
|
|
8
|
+
*
|
|
9
|
+
* - [REF_AVX2]
|
|
10
|
+
* CRYSTALS-Kyber optimized AVX2 implementation
|
|
11
|
+
* Bos, Ducas, Kiltz, Lepoint, Lyubashevsky, Schanck, Schwabe, Seiler, Stehlé
|
|
12
|
+
* https://github.com/pq-crystals/kyber/tree/main/avx2
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
/*
|
|
16
|
+
* This file is derived from the public domain
|
|
17
|
+
* AVX2 Kyber implementation @[REF_AVX2].
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#include "../../../common.h"
|
|
21
|
+
|
|
22
|
+
#if defined(MLK_ARITH_BACKEND_X86_64_DEFAULT) && \
|
|
23
|
+
!defined(MLK_CONFIG_MULTILEVEL_NO_SHARED)
|
|
24
|
+
|
|
25
|
+
/*
|
|
26
|
+
* WARNING: This file is auto-derived from the mlkem-native source file
|
|
27
|
+
* dev/x86_64/src/ntttobytes.S using scripts/simpasm. Do not modify it directly.
|
|
28
|
+
*/
|
|
29
|
+
|
|
30
|
+
#if defined(__ELF__)
|
|
31
|
+
.section .note.GNU-stack,"",@progbits
|
|
32
|
+
#endif
|
|
33
|
+
|
|
34
|
+
.text
|
|
35
|
+
.balign 4
|
|
36
|
+
.global MLK_ASM_NAMESPACE(ntttobytes_avx2)
|
|
37
|
+
MLK_ASM_FN_SYMBOL(ntttobytes_avx2)
|
|
38
|
+
|
|
39
|
+
.cfi_startproc
|
|
40
|
+
movl $0xd010d01, %eax # imm = 0xD010D01
|
|
41
|
+
vmovd %eax, %xmm0
|
|
42
|
+
vpbroadcastd %xmm0, %ymm0
|
|
43
|
+
vmovdqa (%rsi), %ymm5
|
|
44
|
+
vmovdqa 0x20(%rsi), %ymm6
|
|
45
|
+
vmovdqa 0x40(%rsi), %ymm7
|
|
46
|
+
vmovdqa 0x60(%rsi), %ymm8
|
|
47
|
+
vmovdqa 0x80(%rsi), %ymm9
|
|
48
|
+
vmovdqa 0xa0(%rsi), %ymm10
|
|
49
|
+
vmovdqa 0xc0(%rsi), %ymm11
|
|
50
|
+
vmovdqa 0xe0(%rsi), %ymm12
|
|
51
|
+
vpsllw $0xc, %ymm6, %ymm4
|
|
52
|
+
vpor %ymm4, %ymm5, %ymm4
|
|
53
|
+
vpsrlw $0x4, %ymm6, %ymm5
|
|
54
|
+
vpsllw $0x8, %ymm7, %ymm6
|
|
55
|
+
vpor %ymm5, %ymm6, %ymm5
|
|
56
|
+
vpsrlw $0x8, %ymm7, %ymm6
|
|
57
|
+
vpsllw $0x4, %ymm8, %ymm7
|
|
58
|
+
vpor %ymm6, %ymm7, %ymm6
|
|
59
|
+
vpsllw $0xc, %ymm10, %ymm7
|
|
60
|
+
vpor %ymm7, %ymm9, %ymm7
|
|
61
|
+
vpsrlw $0x4, %ymm10, %ymm8
|
|
62
|
+
vpsllw $0x8, %ymm11, %ymm9
|
|
63
|
+
vpor %ymm8, %ymm9, %ymm8
|
|
64
|
+
vpsrlw $0x8, %ymm11, %ymm9
|
|
65
|
+
vpsllw $0x4, %ymm12, %ymm10
|
|
66
|
+
vpor %ymm9, %ymm10, %ymm9
|
|
67
|
+
vpslld $0x10, %ymm5, %ymm3
|
|
68
|
+
vpblendw $0xaa, %ymm3, %ymm4, %ymm3 # ymm3 = ymm4[0],ymm3[1],ymm4[2],ymm3[3],ymm4[4],ymm3[5],ymm4[6],ymm3[7],ymm4[8],ymm3[9],ymm4[10],ymm3[11],ymm4[12],ymm3[13],ymm4[14],ymm3[15]
|
|
69
|
+
vpsrld $0x10, %ymm4, %ymm4
|
|
70
|
+
vpblendw $0xaa, %ymm5, %ymm4, %ymm5 # ymm5 = ymm4[0],ymm5[1],ymm4[2],ymm5[3],ymm4[4],ymm5[5],ymm4[6],ymm5[7],ymm4[8],ymm5[9],ymm4[10],ymm5[11],ymm4[12],ymm5[13],ymm4[14],ymm5[15]
|
|
71
|
+
vpslld $0x10, %ymm7, %ymm4
|
|
72
|
+
vpblendw $0xaa, %ymm4, %ymm6, %ymm4 # ymm4 = ymm6[0],ymm4[1],ymm6[2],ymm4[3],ymm6[4],ymm4[5],ymm6[6],ymm4[7],ymm6[8],ymm4[9],ymm6[10],ymm4[11],ymm6[12],ymm4[13],ymm6[14],ymm4[15]
|
|
73
|
+
vpsrld $0x10, %ymm6, %ymm6
|
|
74
|
+
vpblendw $0xaa, %ymm7, %ymm6, %ymm7 # ymm7 = ymm6[0],ymm7[1],ymm6[2],ymm7[3],ymm6[4],ymm7[5],ymm6[6],ymm7[7],ymm6[8],ymm7[9],ymm6[10],ymm7[11],ymm6[12],ymm7[13],ymm6[14],ymm7[15]
|
|
75
|
+
vpslld $0x10, %ymm9, %ymm6
|
|
76
|
+
vpblendw $0xaa, %ymm6, %ymm8, %ymm6 # ymm6 = ymm8[0],ymm6[1],ymm8[2],ymm6[3],ymm8[4],ymm6[5],ymm8[6],ymm6[7],ymm8[8],ymm6[9],ymm8[10],ymm6[11],ymm8[12],ymm6[13],ymm8[14],ymm6[15]
|
|
77
|
+
vpsrld $0x10, %ymm8, %ymm8
|
|
78
|
+
vpblendw $0xaa, %ymm9, %ymm8, %ymm9 # ymm9 = ymm8[0],ymm9[1],ymm8[2],ymm9[3],ymm8[4],ymm9[5],ymm8[6],ymm9[7],ymm8[8],ymm9[9],ymm8[10],ymm9[11],ymm8[12],ymm9[13],ymm8[14],ymm9[15]
|
|
79
|
+
vmovsldup %ymm4, %ymm8 # ymm8 = ymm4[0,0,2,2,4,4,6,6]
|
|
80
|
+
vpblendd $0xaa, %ymm8, %ymm3, %ymm8 # ymm8 = ymm3[0],ymm8[1],ymm3[2],ymm8[3],ymm3[4],ymm8[5],ymm3[6],ymm8[7]
|
|
81
|
+
vpsrlq $0x20, %ymm3, %ymm3
|
|
82
|
+
vpblendd $0xaa, %ymm4, %ymm3, %ymm4 # ymm4 = ymm3[0],ymm4[1],ymm3[2],ymm4[3],ymm3[4],ymm4[5],ymm3[6],ymm4[7]
|
|
83
|
+
vmovsldup %ymm5, %ymm3 # ymm3 = ymm5[0,0,2,2,4,4,6,6]
|
|
84
|
+
vpblendd $0xaa, %ymm3, %ymm6, %ymm3 # ymm3 = ymm6[0],ymm3[1],ymm6[2],ymm3[3],ymm6[4],ymm3[5],ymm6[6],ymm3[7]
|
|
85
|
+
vpsrlq $0x20, %ymm6, %ymm6
|
|
86
|
+
vpblendd $0xaa, %ymm5, %ymm6, %ymm5 # ymm5 = ymm6[0],ymm5[1],ymm6[2],ymm5[3],ymm6[4],ymm5[5],ymm6[6],ymm5[7]
|
|
87
|
+
vmovsldup %ymm9, %ymm6 # ymm6 = ymm9[0,0,2,2,4,4,6,6]
|
|
88
|
+
vpblendd $0xaa, %ymm6, %ymm7, %ymm6 # ymm6 = ymm7[0],ymm6[1],ymm7[2],ymm6[3],ymm7[4],ymm6[5],ymm7[6],ymm6[7]
|
|
89
|
+
vpsrlq $0x20, %ymm7, %ymm7
|
|
90
|
+
vpblendd $0xaa, %ymm9, %ymm7, %ymm9 # ymm9 = ymm7[0],ymm9[1],ymm7[2],ymm9[3],ymm7[4],ymm9[5],ymm7[6],ymm9[7]
|
|
91
|
+
vpunpcklqdq %ymm3, %ymm8, %ymm7 # ymm7 = ymm8[0],ymm3[0],ymm8[2],ymm3[2]
|
|
92
|
+
vpunpckhqdq %ymm3, %ymm8, %ymm3 # ymm3 = ymm8[1],ymm3[1],ymm8[3],ymm3[3]
|
|
93
|
+
vpunpcklqdq %ymm4, %ymm6, %ymm8 # ymm8 = ymm6[0],ymm4[0],ymm6[2],ymm4[2]
|
|
94
|
+
vpunpckhqdq %ymm4, %ymm6, %ymm4 # ymm4 = ymm6[1],ymm4[1],ymm6[3],ymm4[3]
|
|
95
|
+
vpunpcklqdq %ymm9, %ymm5, %ymm6 # ymm6 = ymm5[0],ymm9[0],ymm5[2],ymm9[2]
|
|
96
|
+
vpunpckhqdq %ymm9, %ymm5, %ymm9 # ymm9 = ymm5[1],ymm9[1],ymm5[3],ymm9[3]
|
|
97
|
+
vperm2i128 $0x20, %ymm8, %ymm7, %ymm5 # ymm5 = ymm7[0,1],ymm8[0,1]
|
|
98
|
+
vperm2i128 $0x31, %ymm8, %ymm7, %ymm8 # ymm8 = ymm7[2,3],ymm8[2,3]
|
|
99
|
+
vperm2i128 $0x20, %ymm3, %ymm6, %ymm7 # ymm7 = ymm6[0,1],ymm3[0,1]
|
|
100
|
+
vperm2i128 $0x31, %ymm3, %ymm6, %ymm3 # ymm3 = ymm6[2,3],ymm3[2,3]
|
|
101
|
+
vperm2i128 $0x20, %ymm9, %ymm4, %ymm6 # ymm6 = ymm4[0,1],ymm9[0,1]
|
|
102
|
+
vperm2i128 $0x31, %ymm9, %ymm4, %ymm9 # ymm9 = ymm4[2,3],ymm9[2,3]
|
|
103
|
+
vmovdqu %ymm5, (%rdi)
|
|
104
|
+
vmovdqu %ymm7, 0x20(%rdi)
|
|
105
|
+
vmovdqu %ymm6, 0x40(%rdi)
|
|
106
|
+
vmovdqu %ymm8, 0x60(%rdi)
|
|
107
|
+
vmovdqu %ymm3, 0x80(%rdi)
|
|
108
|
+
vmovdqu %ymm9, 0xa0(%rdi)
|
|
109
|
+
vmovdqa 0x100(%rsi), %ymm5
|
|
110
|
+
vmovdqa 0x120(%rsi), %ymm6
|
|
111
|
+
vmovdqa 0x140(%rsi), %ymm7
|
|
112
|
+
vmovdqa 0x160(%rsi), %ymm8
|
|
113
|
+
vmovdqa 0x180(%rsi), %ymm9
|
|
114
|
+
vmovdqa 0x1a0(%rsi), %ymm10
|
|
115
|
+
vmovdqa 0x1c0(%rsi), %ymm11
|
|
116
|
+
vmovdqa 0x1e0(%rsi), %ymm12
|
|
117
|
+
vpsllw $0xc, %ymm6, %ymm4
|
|
118
|
+
vpor %ymm4, %ymm5, %ymm4
|
|
119
|
+
vpsrlw $0x4, %ymm6, %ymm5
|
|
120
|
+
vpsllw $0x8, %ymm7, %ymm6
|
|
121
|
+
vpor %ymm5, %ymm6, %ymm5
|
|
122
|
+
vpsrlw $0x8, %ymm7, %ymm6
|
|
123
|
+
vpsllw $0x4, %ymm8, %ymm7
|
|
124
|
+
vpor %ymm6, %ymm7, %ymm6
|
|
125
|
+
vpsllw $0xc, %ymm10, %ymm7
|
|
126
|
+
vpor %ymm7, %ymm9, %ymm7
|
|
127
|
+
vpsrlw $0x4, %ymm10, %ymm8
|
|
128
|
+
vpsllw $0x8, %ymm11, %ymm9
|
|
129
|
+
vpor %ymm8, %ymm9, %ymm8
|
|
130
|
+
vpsrlw $0x8, %ymm11, %ymm9
|
|
131
|
+
vpsllw $0x4, %ymm12, %ymm10
|
|
132
|
+
vpor %ymm9, %ymm10, %ymm9
|
|
133
|
+
vpslld $0x10, %ymm5, %ymm3
|
|
134
|
+
vpblendw $0xaa, %ymm3, %ymm4, %ymm3 # ymm3 = ymm4[0],ymm3[1],ymm4[2],ymm3[3],ymm4[4],ymm3[5],ymm4[6],ymm3[7],ymm4[8],ymm3[9],ymm4[10],ymm3[11],ymm4[12],ymm3[13],ymm4[14],ymm3[15]
|
|
135
|
+
vpsrld $0x10, %ymm4, %ymm4
|
|
136
|
+
vpblendw $0xaa, %ymm5, %ymm4, %ymm5 # ymm5 = ymm4[0],ymm5[1],ymm4[2],ymm5[3],ymm4[4],ymm5[5],ymm4[6],ymm5[7],ymm4[8],ymm5[9],ymm4[10],ymm5[11],ymm4[12],ymm5[13],ymm4[14],ymm5[15]
|
|
137
|
+
vpslld $0x10, %ymm7, %ymm4
|
|
138
|
+
vpblendw $0xaa, %ymm4, %ymm6, %ymm4 # ymm4 = ymm6[0],ymm4[1],ymm6[2],ymm4[3],ymm6[4],ymm4[5],ymm6[6],ymm4[7],ymm6[8],ymm4[9],ymm6[10],ymm4[11],ymm6[12],ymm4[13],ymm6[14],ymm4[15]
|
|
139
|
+
vpsrld $0x10, %ymm6, %ymm6
|
|
140
|
+
vpblendw $0xaa, %ymm7, %ymm6, %ymm7 # ymm7 = ymm6[0],ymm7[1],ymm6[2],ymm7[3],ymm6[4],ymm7[5],ymm6[6],ymm7[7],ymm6[8],ymm7[9],ymm6[10],ymm7[11],ymm6[12],ymm7[13],ymm6[14],ymm7[15]
|
|
141
|
+
vpslld $0x10, %ymm9, %ymm6
|
|
142
|
+
vpblendw $0xaa, %ymm6, %ymm8, %ymm6 # ymm6 = ymm8[0],ymm6[1],ymm8[2],ymm6[3],ymm8[4],ymm6[5],ymm8[6],ymm6[7],ymm8[8],ymm6[9],ymm8[10],ymm6[11],ymm8[12],ymm6[13],ymm8[14],ymm6[15]
|
|
143
|
+
vpsrld $0x10, %ymm8, %ymm8
|
|
144
|
+
vpblendw $0xaa, %ymm9, %ymm8, %ymm9 # ymm9 = ymm8[0],ymm9[1],ymm8[2],ymm9[3],ymm8[4],ymm9[5],ymm8[6],ymm9[7],ymm8[8],ymm9[9],ymm8[10],ymm9[11],ymm8[12],ymm9[13],ymm8[14],ymm9[15]
|
|
145
|
+
vmovsldup %ymm4, %ymm8 # ymm8 = ymm4[0,0,2,2,4,4,6,6]
|
|
146
|
+
vpblendd $0xaa, %ymm8, %ymm3, %ymm8 # ymm8 = ymm3[0],ymm8[1],ymm3[2],ymm8[3],ymm3[4],ymm8[5],ymm3[6],ymm8[7]
|
|
147
|
+
vpsrlq $0x20, %ymm3, %ymm3
|
|
148
|
+
vpblendd $0xaa, %ymm4, %ymm3, %ymm4 # ymm4 = ymm3[0],ymm4[1],ymm3[2],ymm4[3],ymm3[4],ymm4[5],ymm3[6],ymm4[7]
|
|
149
|
+
vmovsldup %ymm5, %ymm3 # ymm3 = ymm5[0,0,2,2,4,4,6,6]
|
|
150
|
+
vpblendd $0xaa, %ymm3, %ymm6, %ymm3 # ymm3 = ymm6[0],ymm3[1],ymm6[2],ymm3[3],ymm6[4],ymm3[5],ymm6[6],ymm3[7]
|
|
151
|
+
vpsrlq $0x20, %ymm6, %ymm6
|
|
152
|
+
vpblendd $0xaa, %ymm5, %ymm6, %ymm5 # ymm5 = ymm6[0],ymm5[1],ymm6[2],ymm5[3],ymm6[4],ymm5[5],ymm6[6],ymm5[7]
|
|
153
|
+
vmovsldup %ymm9, %ymm6 # ymm6 = ymm9[0,0,2,2,4,4,6,6]
|
|
154
|
+
vpblendd $0xaa, %ymm6, %ymm7, %ymm6 # ymm6 = ymm7[0],ymm6[1],ymm7[2],ymm6[3],ymm7[4],ymm6[5],ymm7[6],ymm6[7]
|
|
155
|
+
vpsrlq $0x20, %ymm7, %ymm7
|
|
156
|
+
vpblendd $0xaa, %ymm9, %ymm7, %ymm9 # ymm9 = ymm7[0],ymm9[1],ymm7[2],ymm9[3],ymm7[4],ymm9[5],ymm7[6],ymm9[7]
|
|
157
|
+
vpunpcklqdq %ymm3, %ymm8, %ymm7 # ymm7 = ymm8[0],ymm3[0],ymm8[2],ymm3[2]
|
|
158
|
+
vpunpckhqdq %ymm3, %ymm8, %ymm3 # ymm3 = ymm8[1],ymm3[1],ymm8[3],ymm3[3]
|
|
159
|
+
vpunpcklqdq %ymm4, %ymm6, %ymm8 # ymm8 = ymm6[0],ymm4[0],ymm6[2],ymm4[2]
|
|
160
|
+
vpunpckhqdq %ymm4, %ymm6, %ymm4 # ymm4 = ymm6[1],ymm4[1],ymm6[3],ymm4[3]
|
|
161
|
+
vpunpcklqdq %ymm9, %ymm5, %ymm6 # ymm6 = ymm5[0],ymm9[0],ymm5[2],ymm9[2]
|
|
162
|
+
vpunpckhqdq %ymm9, %ymm5, %ymm9 # ymm9 = ymm5[1],ymm9[1],ymm5[3],ymm9[3]
|
|
163
|
+
vperm2i128 $0x20, %ymm8, %ymm7, %ymm5 # ymm5 = ymm7[0,1],ymm8[0,1]
|
|
164
|
+
vperm2i128 $0x31, %ymm8, %ymm7, %ymm8 # ymm8 = ymm7[2,3],ymm8[2,3]
|
|
165
|
+
vperm2i128 $0x20, %ymm3, %ymm6, %ymm7 # ymm7 = ymm6[0,1],ymm3[0,1]
|
|
166
|
+
vperm2i128 $0x31, %ymm3, %ymm6, %ymm3 # ymm3 = ymm6[2,3],ymm3[2,3]
|
|
167
|
+
vperm2i128 $0x20, %ymm9, %ymm4, %ymm6 # ymm6 = ymm4[0,1],ymm9[0,1]
|
|
168
|
+
vperm2i128 $0x31, %ymm9, %ymm4, %ymm9 # ymm9 = ymm4[2,3],ymm9[2,3]
|
|
169
|
+
vmovdqu %ymm5, 0xc0(%rdi)
|
|
170
|
+
vmovdqu %ymm7, 0xe0(%rdi)
|
|
171
|
+
vmovdqu %ymm6, 0x100(%rdi)
|
|
172
|
+
vmovdqu %ymm8, 0x120(%rdi)
|
|
173
|
+
vmovdqu %ymm3, 0x140(%rdi)
|
|
174
|
+
vmovdqu %ymm9, 0x160(%rdi)
|
|
175
|
+
retq
|
|
176
|
+
.cfi_endproc
|
|
177
|
+
|
|
178
|
+
MLK_ASM_FN_SIZE(ntttobytes_avx2)
|
|
179
|
+
|
|
180
|
+
#endif /* MLK_ARITH_BACKEND_X86_64_DEFAULT && !MLK_CONFIG_MULTILEVEL_NO_SHARED \
|
|
181
|
+
*/
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) The mlkem-native project authors
|
|
3
|
+
* SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
/* References
|
|
7
|
+
* ==========
|
|
8
|
+
*
|
|
9
|
+
* - [REF_AVX2]
|
|
10
|
+
* CRYSTALS-Kyber optimized AVX2 implementation
|
|
11
|
+
* Bos, Ducas, Kiltz, Lepoint, Lyubashevsky, Schanck, Schwabe, Seiler, Stehlé
|
|
12
|
+
* https://github.com/pq-crystals/kyber/tree/main/avx2
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
/*
|
|
16
|
+
* This file is derived from the public domain
|
|
17
|
+
* AVX2 Kyber implementation @[REF_AVX2].
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#include "../../../common.h"
|
|
21
|
+
|
|
22
|
+
#if defined(MLK_ARITH_BACKEND_X86_64_DEFAULT) && \
|
|
23
|
+
!defined(MLK_CONFIG_MULTILEVEL_NO_SHARED)
|
|
24
|
+
|
|
25
|
+
/*
|
|
26
|
+
* WARNING: This file is auto-derived from the mlkem-native source file
|
|
27
|
+
* dev/x86_64/src/nttunpack.S using scripts/simpasm. Do not modify it directly.
|
|
28
|
+
*/
|
|
29
|
+
|
|
30
|
+
#if defined(__ELF__)
|
|
31
|
+
.section .note.GNU-stack,"",@progbits
|
|
32
|
+
#endif
|
|
33
|
+
|
|
34
|
+
.text
|
|
35
|
+
.balign 4
|
|
36
|
+
.global MLK_ASM_NAMESPACE(nttunpack_avx2)
|
|
37
|
+
MLK_ASM_FN_SYMBOL(nttunpack_avx2)
|
|
38
|
+
|
|
39
|
+
.cfi_startproc
|
|
40
|
+
vmovdqa (%rdi), %ymm4
|
|
41
|
+
vmovdqa 0x20(%rdi), %ymm5
|
|
42
|
+
vmovdqa 0x40(%rdi), %ymm6
|
|
43
|
+
vmovdqa 0x60(%rdi), %ymm7
|
|
44
|
+
vmovdqa 0x80(%rdi), %ymm8
|
|
45
|
+
vmovdqa 0xa0(%rdi), %ymm9
|
|
46
|
+
vmovdqa 0xc0(%rdi), %ymm10
|
|
47
|
+
vmovdqa 0xe0(%rdi), %ymm11
|
|
48
|
+
vperm2i128 $0x20, %ymm8, %ymm4, %ymm3 # ymm3 = ymm4[0,1],ymm8[0,1]
|
|
49
|
+
vperm2i128 $0x31, %ymm8, %ymm4, %ymm8 # ymm8 = ymm4[2,3],ymm8[2,3]
|
|
50
|
+
vperm2i128 $0x20, %ymm9, %ymm5, %ymm4 # ymm4 = ymm5[0,1],ymm9[0,1]
|
|
51
|
+
vperm2i128 $0x31, %ymm9, %ymm5, %ymm9 # ymm9 = ymm5[2,3],ymm9[2,3]
|
|
52
|
+
vperm2i128 $0x20, %ymm10, %ymm6, %ymm5 # ymm5 = ymm6[0,1],ymm10[0,1]
|
|
53
|
+
vperm2i128 $0x31, %ymm10, %ymm6, %ymm10 # ymm10 = ymm6[2,3],ymm10[2,3]
|
|
54
|
+
vperm2i128 $0x20, %ymm11, %ymm7, %ymm6 # ymm6 = ymm7[0,1],ymm11[0,1]
|
|
55
|
+
vperm2i128 $0x31, %ymm11, %ymm7, %ymm11 # ymm11 = ymm7[2,3],ymm11[2,3]
|
|
56
|
+
vpunpcklqdq %ymm5, %ymm3, %ymm7 # ymm7 = ymm3[0],ymm5[0],ymm3[2],ymm5[2]
|
|
57
|
+
vpunpckhqdq %ymm5, %ymm3, %ymm5 # ymm5 = ymm3[1],ymm5[1],ymm3[3],ymm5[3]
|
|
58
|
+
vpunpcklqdq %ymm10, %ymm8, %ymm3 # ymm3 = ymm8[0],ymm10[0],ymm8[2],ymm10[2]
|
|
59
|
+
vpunpckhqdq %ymm10, %ymm8, %ymm10 # ymm10 = ymm8[1],ymm10[1],ymm8[3],ymm10[3]
|
|
60
|
+
vpunpcklqdq %ymm6, %ymm4, %ymm8 # ymm8 = ymm4[0],ymm6[0],ymm4[2],ymm6[2]
|
|
61
|
+
vpunpckhqdq %ymm6, %ymm4, %ymm6 # ymm6 = ymm4[1],ymm6[1],ymm4[3],ymm6[3]
|
|
62
|
+
vpunpcklqdq %ymm11, %ymm9, %ymm4 # ymm4 = ymm9[0],ymm11[0],ymm9[2],ymm11[2]
|
|
63
|
+
vpunpckhqdq %ymm11, %ymm9, %ymm11 # ymm11 = ymm9[1],ymm11[1],ymm9[3],ymm11[3]
|
|
64
|
+
vmovsldup %ymm8, %ymm9 # ymm9 = ymm8[0,0,2,2,4,4,6,6]
|
|
65
|
+
vpblendd $0xaa, %ymm9, %ymm7, %ymm9 # ymm9 = ymm7[0],ymm9[1],ymm7[2],ymm9[3],ymm7[4],ymm9[5],ymm7[6],ymm9[7]
|
|
66
|
+
vpsrlq $0x20, %ymm7, %ymm7
|
|
67
|
+
vpblendd $0xaa, %ymm8, %ymm7, %ymm8 # ymm8 = ymm7[0],ymm8[1],ymm7[2],ymm8[3],ymm7[4],ymm8[5],ymm7[6],ymm8[7]
|
|
68
|
+
vmovsldup %ymm6, %ymm7 # ymm7 = ymm6[0,0,2,2,4,4,6,6]
|
|
69
|
+
vpblendd $0xaa, %ymm7, %ymm5, %ymm7 # ymm7 = ymm5[0],ymm7[1],ymm5[2],ymm7[3],ymm5[4],ymm7[5],ymm5[6],ymm7[7]
|
|
70
|
+
vpsrlq $0x20, %ymm5, %ymm5
|
|
71
|
+
vpblendd $0xaa, %ymm6, %ymm5, %ymm6 # ymm6 = ymm5[0],ymm6[1],ymm5[2],ymm6[3],ymm5[4],ymm6[5],ymm5[6],ymm6[7]
|
|
72
|
+
vmovsldup %ymm4, %ymm5 # ymm5 = ymm4[0,0,2,2,4,4,6,6]
|
|
73
|
+
vpblendd $0xaa, %ymm5, %ymm3, %ymm5 # ymm5 = ymm3[0],ymm5[1],ymm3[2],ymm5[3],ymm3[4],ymm5[5],ymm3[6],ymm5[7]
|
|
74
|
+
vpsrlq $0x20, %ymm3, %ymm3
|
|
75
|
+
vpblendd $0xaa, %ymm4, %ymm3, %ymm4 # ymm4 = ymm3[0],ymm4[1],ymm3[2],ymm4[3],ymm3[4],ymm4[5],ymm3[6],ymm4[7]
|
|
76
|
+
vmovsldup %ymm11, %ymm3 # ymm3 = ymm11[0,0,2,2,4,4,6,6]
|
|
77
|
+
vpblendd $0xaa, %ymm3, %ymm10, %ymm3 # ymm3 = ymm10[0],ymm3[1],ymm10[2],ymm3[3],ymm10[4],ymm3[5],ymm10[6],ymm3[7]
|
|
78
|
+
vpsrlq $0x20, %ymm10, %ymm10
|
|
79
|
+
vpblendd $0xaa, %ymm11, %ymm10, %ymm11 # ymm11 = ymm10[0],ymm11[1],ymm10[2],ymm11[3],ymm10[4],ymm11[5],ymm10[6],ymm11[7]
|
|
80
|
+
vpslld $0x10, %ymm5, %ymm10
|
|
81
|
+
vpblendw $0xaa, %ymm10, %ymm9, %ymm10 # ymm10 = ymm9[0],ymm10[1],ymm9[2],ymm10[3],ymm9[4],ymm10[5],ymm9[6],ymm10[7],ymm9[8],ymm10[9],ymm9[10],ymm10[11],ymm9[12],ymm10[13],ymm9[14],ymm10[15]
|
|
82
|
+
vpsrld $0x10, %ymm9, %ymm9
|
|
83
|
+
vpblendw $0xaa, %ymm5, %ymm9, %ymm5 # ymm5 = ymm9[0],ymm5[1],ymm9[2],ymm5[3],ymm9[4],ymm5[5],ymm9[6],ymm5[7],ymm9[8],ymm5[9],ymm9[10],ymm5[11],ymm9[12],ymm5[13],ymm9[14],ymm5[15]
|
|
84
|
+
vpslld $0x10, %ymm4, %ymm9
|
|
85
|
+
vpblendw $0xaa, %ymm9, %ymm8, %ymm9 # ymm9 = ymm8[0],ymm9[1],ymm8[2],ymm9[3],ymm8[4],ymm9[5],ymm8[6],ymm9[7],ymm8[8],ymm9[9],ymm8[10],ymm9[11],ymm8[12],ymm9[13],ymm8[14],ymm9[15]
|
|
86
|
+
vpsrld $0x10, %ymm8, %ymm8
|
|
87
|
+
vpblendw $0xaa, %ymm4, %ymm8, %ymm4 # ymm4 = ymm8[0],ymm4[1],ymm8[2],ymm4[3],ymm8[4],ymm4[5],ymm8[6],ymm4[7],ymm8[8],ymm4[9],ymm8[10],ymm4[11],ymm8[12],ymm4[13],ymm8[14],ymm4[15]
|
|
88
|
+
vpslld $0x10, %ymm3, %ymm8
|
|
89
|
+
vpblendw $0xaa, %ymm8, %ymm7, %ymm8 # ymm8 = ymm7[0],ymm8[1],ymm7[2],ymm8[3],ymm7[4],ymm8[5],ymm7[6],ymm8[7],ymm7[8],ymm8[9],ymm7[10],ymm8[11],ymm7[12],ymm8[13],ymm7[14],ymm8[15]
|
|
90
|
+
vpsrld $0x10, %ymm7, %ymm7
|
|
91
|
+
vpblendw $0xaa, %ymm3, %ymm7, %ymm3 # ymm3 = ymm7[0],ymm3[1],ymm7[2],ymm3[3],ymm7[4],ymm3[5],ymm7[6],ymm3[7],ymm7[8],ymm3[9],ymm7[10],ymm3[11],ymm7[12],ymm3[13],ymm7[14],ymm3[15]
|
|
92
|
+
vpslld $0x10, %ymm11, %ymm7
|
|
93
|
+
vpblendw $0xaa, %ymm7, %ymm6, %ymm7 # ymm7 = ymm6[0],ymm7[1],ymm6[2],ymm7[3],ymm6[4],ymm7[5],ymm6[6],ymm7[7],ymm6[8],ymm7[9],ymm6[10],ymm7[11],ymm6[12],ymm7[13],ymm6[14],ymm7[15]
|
|
94
|
+
vpsrld $0x10, %ymm6, %ymm6
|
|
95
|
+
vpblendw $0xaa, %ymm11, %ymm6, %ymm11 # ymm11 = ymm6[0],ymm11[1],ymm6[2],ymm11[3],ymm6[4],ymm11[5],ymm6[6],ymm11[7],ymm6[8],ymm11[9],ymm6[10],ymm11[11],ymm6[12],ymm11[13],ymm6[14],ymm11[15]
|
|
96
|
+
vmovdqa %ymm10, (%rdi)
|
|
97
|
+
vmovdqa %ymm5, 0x20(%rdi)
|
|
98
|
+
vmovdqa %ymm9, 0x40(%rdi)
|
|
99
|
+
vmovdqa %ymm4, 0x60(%rdi)
|
|
100
|
+
vmovdqa %ymm8, 0x80(%rdi)
|
|
101
|
+
vmovdqa %ymm3, 0xa0(%rdi)
|
|
102
|
+
vmovdqa %ymm7, 0xc0(%rdi)
|
|
103
|
+
vmovdqa %ymm11, 0xe0(%rdi)
|
|
104
|
+
vmovdqa 0x100(%rdi), %ymm4
|
|
105
|
+
vmovdqa 0x120(%rdi), %ymm5
|
|
106
|
+
vmovdqa 0x140(%rdi), %ymm6
|
|
107
|
+
vmovdqa 0x160(%rdi), %ymm7
|
|
108
|
+
vmovdqa 0x180(%rdi), %ymm8
|
|
109
|
+
vmovdqa 0x1a0(%rdi), %ymm9
|
|
110
|
+
vmovdqa 0x1c0(%rdi), %ymm10
|
|
111
|
+
vmovdqa 0x1e0(%rdi), %ymm11
|
|
112
|
+
vperm2i128 $0x20, %ymm8, %ymm4, %ymm3 # ymm3 = ymm4[0,1],ymm8[0,1]
|
|
113
|
+
vperm2i128 $0x31, %ymm8, %ymm4, %ymm8 # ymm8 = ymm4[2,3],ymm8[2,3]
|
|
114
|
+
vperm2i128 $0x20, %ymm9, %ymm5, %ymm4 # ymm4 = ymm5[0,1],ymm9[0,1]
|
|
115
|
+
vperm2i128 $0x31, %ymm9, %ymm5, %ymm9 # ymm9 = ymm5[2,3],ymm9[2,3]
|
|
116
|
+
vperm2i128 $0x20, %ymm10, %ymm6, %ymm5 # ymm5 = ymm6[0,1],ymm10[0,1]
|
|
117
|
+
vperm2i128 $0x31, %ymm10, %ymm6, %ymm10 # ymm10 = ymm6[2,3],ymm10[2,3]
|
|
118
|
+
vperm2i128 $0x20, %ymm11, %ymm7, %ymm6 # ymm6 = ymm7[0,1],ymm11[0,1]
|
|
119
|
+
vperm2i128 $0x31, %ymm11, %ymm7, %ymm11 # ymm11 = ymm7[2,3],ymm11[2,3]
|
|
120
|
+
vpunpcklqdq %ymm5, %ymm3, %ymm7 # ymm7 = ymm3[0],ymm5[0],ymm3[2],ymm5[2]
|
|
121
|
+
vpunpckhqdq %ymm5, %ymm3, %ymm5 # ymm5 = ymm3[1],ymm5[1],ymm3[3],ymm5[3]
|
|
122
|
+
vpunpcklqdq %ymm10, %ymm8, %ymm3 # ymm3 = ymm8[0],ymm10[0],ymm8[2],ymm10[2]
|
|
123
|
+
vpunpckhqdq %ymm10, %ymm8, %ymm10 # ymm10 = ymm8[1],ymm10[1],ymm8[3],ymm10[3]
|
|
124
|
+
vpunpcklqdq %ymm6, %ymm4, %ymm8 # ymm8 = ymm4[0],ymm6[0],ymm4[2],ymm6[2]
|
|
125
|
+
vpunpckhqdq %ymm6, %ymm4, %ymm6 # ymm6 = ymm4[1],ymm6[1],ymm4[3],ymm6[3]
|
|
126
|
+
vpunpcklqdq %ymm11, %ymm9, %ymm4 # ymm4 = ymm9[0],ymm11[0],ymm9[2],ymm11[2]
|
|
127
|
+
vpunpckhqdq %ymm11, %ymm9, %ymm11 # ymm11 = ymm9[1],ymm11[1],ymm9[3],ymm11[3]
|
|
128
|
+
vmovsldup %ymm8, %ymm9 # ymm9 = ymm8[0,0,2,2,4,4,6,6]
|
|
129
|
+
vpblendd $0xaa, %ymm9, %ymm7, %ymm9 # ymm9 = ymm7[0],ymm9[1],ymm7[2],ymm9[3],ymm7[4],ymm9[5],ymm7[6],ymm9[7]
|
|
130
|
+
vpsrlq $0x20, %ymm7, %ymm7
|
|
131
|
+
vpblendd $0xaa, %ymm8, %ymm7, %ymm8 # ymm8 = ymm7[0],ymm8[1],ymm7[2],ymm8[3],ymm7[4],ymm8[5],ymm7[6],ymm8[7]
|
|
132
|
+
vmovsldup %ymm6, %ymm7 # ymm7 = ymm6[0,0,2,2,4,4,6,6]
|
|
133
|
+
vpblendd $0xaa, %ymm7, %ymm5, %ymm7 # ymm7 = ymm5[0],ymm7[1],ymm5[2],ymm7[3],ymm5[4],ymm7[5],ymm5[6],ymm7[7]
|
|
134
|
+
vpsrlq $0x20, %ymm5, %ymm5
|
|
135
|
+
vpblendd $0xaa, %ymm6, %ymm5, %ymm6 # ymm6 = ymm5[0],ymm6[1],ymm5[2],ymm6[3],ymm5[4],ymm6[5],ymm5[6],ymm6[7]
|
|
136
|
+
vmovsldup %ymm4, %ymm5 # ymm5 = ymm4[0,0,2,2,4,4,6,6]
|
|
137
|
+
vpblendd $0xaa, %ymm5, %ymm3, %ymm5 # ymm5 = ymm3[0],ymm5[1],ymm3[2],ymm5[3],ymm3[4],ymm5[5],ymm3[6],ymm5[7]
|
|
138
|
+
vpsrlq $0x20, %ymm3, %ymm3
|
|
139
|
+
vpblendd $0xaa, %ymm4, %ymm3, %ymm4 # ymm4 = ymm3[0],ymm4[1],ymm3[2],ymm4[3],ymm3[4],ymm4[5],ymm3[6],ymm4[7]
|
|
140
|
+
vmovsldup %ymm11, %ymm3 # ymm3 = ymm11[0,0,2,2,4,4,6,6]
|
|
141
|
+
vpblendd $0xaa, %ymm3, %ymm10, %ymm3 # ymm3 = ymm10[0],ymm3[1],ymm10[2],ymm3[3],ymm10[4],ymm3[5],ymm10[6],ymm3[7]
|
|
142
|
+
vpsrlq $0x20, %ymm10, %ymm10
|
|
143
|
+
vpblendd $0xaa, %ymm11, %ymm10, %ymm11 # ymm11 = ymm10[0],ymm11[1],ymm10[2],ymm11[3],ymm10[4],ymm11[5],ymm10[6],ymm11[7]
|
|
144
|
+
vpslld $0x10, %ymm5, %ymm10
|
|
145
|
+
vpblendw $0xaa, %ymm10, %ymm9, %ymm10 # ymm10 = ymm9[0],ymm10[1],ymm9[2],ymm10[3],ymm9[4],ymm10[5],ymm9[6],ymm10[7],ymm9[8],ymm10[9],ymm9[10],ymm10[11],ymm9[12],ymm10[13],ymm9[14],ymm10[15]
|
|
146
|
+
vpsrld $0x10, %ymm9, %ymm9
|
|
147
|
+
vpblendw $0xaa, %ymm5, %ymm9, %ymm5 # ymm5 = ymm9[0],ymm5[1],ymm9[2],ymm5[3],ymm9[4],ymm5[5],ymm9[6],ymm5[7],ymm9[8],ymm5[9],ymm9[10],ymm5[11],ymm9[12],ymm5[13],ymm9[14],ymm5[15]
|
|
148
|
+
vpslld $0x10, %ymm4, %ymm9
|
|
149
|
+
vpblendw $0xaa, %ymm9, %ymm8, %ymm9 # ymm9 = ymm8[0],ymm9[1],ymm8[2],ymm9[3],ymm8[4],ymm9[5],ymm8[6],ymm9[7],ymm8[8],ymm9[9],ymm8[10],ymm9[11],ymm8[12],ymm9[13],ymm8[14],ymm9[15]
|
|
150
|
+
vpsrld $0x10, %ymm8, %ymm8
|
|
151
|
+
vpblendw $0xaa, %ymm4, %ymm8, %ymm4 # ymm4 = ymm8[0],ymm4[1],ymm8[2],ymm4[3],ymm8[4],ymm4[5],ymm8[6],ymm4[7],ymm8[8],ymm4[9],ymm8[10],ymm4[11],ymm8[12],ymm4[13],ymm8[14],ymm4[15]
|
|
152
|
+
vpslld $0x10, %ymm3, %ymm8
|
|
153
|
+
vpblendw $0xaa, %ymm8, %ymm7, %ymm8 # ymm8 = ymm7[0],ymm8[1],ymm7[2],ymm8[3],ymm7[4],ymm8[5],ymm7[6],ymm8[7],ymm7[8],ymm8[9],ymm7[10],ymm8[11],ymm7[12],ymm8[13],ymm7[14],ymm8[15]
|
|
154
|
+
vpsrld $0x10, %ymm7, %ymm7
|
|
155
|
+
vpblendw $0xaa, %ymm3, %ymm7, %ymm3 # ymm3 = ymm7[0],ymm3[1],ymm7[2],ymm3[3],ymm7[4],ymm3[5],ymm7[6],ymm3[7],ymm7[8],ymm3[9],ymm7[10],ymm3[11],ymm7[12],ymm3[13],ymm7[14],ymm3[15]
|
|
156
|
+
vpslld $0x10, %ymm11, %ymm7
|
|
157
|
+
vpblendw $0xaa, %ymm7, %ymm6, %ymm7 # ymm7 = ymm6[0],ymm7[1],ymm6[2],ymm7[3],ymm6[4],ymm7[5],ymm6[6],ymm7[7],ymm6[8],ymm7[9],ymm6[10],ymm7[11],ymm6[12],ymm7[13],ymm6[14],ymm7[15]
|
|
158
|
+
vpsrld $0x10, %ymm6, %ymm6
|
|
159
|
+
vpblendw $0xaa, %ymm11, %ymm6, %ymm11 # ymm11 = ymm6[0],ymm11[1],ymm6[2],ymm11[3],ymm6[4],ymm11[5],ymm6[6],ymm11[7],ymm6[8],ymm11[9],ymm6[10],ymm11[11],ymm6[12],ymm11[13],ymm6[14],ymm11[15]
|
|
160
|
+
vmovdqa %ymm10, 0x100(%rdi)
|
|
161
|
+
vmovdqa %ymm5, 0x120(%rdi)
|
|
162
|
+
vmovdqa %ymm9, 0x140(%rdi)
|
|
163
|
+
vmovdqa %ymm4, 0x160(%rdi)
|
|
164
|
+
vmovdqa %ymm8, 0x180(%rdi)
|
|
165
|
+
vmovdqa %ymm3, 0x1a0(%rdi)
|
|
166
|
+
vmovdqa %ymm7, 0x1c0(%rdi)
|
|
167
|
+
vmovdqa %ymm11, 0x1e0(%rdi)
|
|
168
|
+
retq
|
|
169
|
+
.cfi_endproc
|
|
170
|
+
|
|
171
|
+
MLK_ASM_FN_SIZE(nttunpack_avx2)
|
|
172
|
+
|
|
173
|
+
#endif /* MLK_ARITH_BACKEND_X86_64_DEFAULT && !MLK_CONFIG_MULTILEVEL_NO_SHARED \
|
|
174
|
+
*/
|