pq_crypto 0.6.0 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/SECURITY.md +7 -0
- data/ext/pqcrypto/extconf.rb +2 -0
- data/ext/pqcrypto/pqcrypto_ruby_secure.c +139 -0
- data/ext/pqcrypto/pqcrypto_secure.c +532 -0
- data/ext/pqcrypto/pqcrypto_secure.h +20 -0
- data/ext/pqcrypto/pqcrypto_version.h +1 -1
- data/ext/pqcrypto/vendor/.vendored +4 -4
- data/ext/pqcrypto/vendor/mldsa-native/README.md +23 -10
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/README.md +23 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.c +114 -58
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.h +498 -461
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_asm.S +145 -85
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_config.h +456 -422
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/cbmc.h +47 -25
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/common.h +26 -14
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/ct.h +56 -81
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/debug.h +17 -24
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.c +33 -40
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.h +67 -87
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.c +19 -14
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.h +13 -5
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.c +84 -10
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.h +10 -5
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/auto.h +6 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/fips202_native_aarch64.h +22 -15
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_aarch64_asm.S +376 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_aarch64_asm.S +204 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_aarch64_asm.S +259 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_aarch64_asm.S +1077 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_aarch64_asm.S +987 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccakf1600_round_constants.c +16 -10
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_scalar.h +2 -1
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_v84a.h +1 -1
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x2_v84a.h +4 -2
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_scalar.h +2 -2
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_v84a_scalar.h +1 -1
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/api.h +60 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/mve.h +48 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/fips202_native_armv81m.h +18 -1
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S +658 -582
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.c +5 -100
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccakf1600_round_constants.c +26 -25
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/state_extract_bytes_x4_mve.S +334 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/state_xor_bytes_x4_mve.S +355 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/auto.h +8 -3
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/{xkcp.h → keccak_f1600_x4_avx2.h} +11 -8
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/fips202_native_x86_64.h +44 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/keccak_f1600_x4_avx2_asm.S +454 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/keccakf1600_constants.c +52 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/meta.h +37 -28
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/aarch64_zetas.c +213 -196
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/arith_native_aarch64.h +248 -64
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/intt_aarch64_asm.S +753 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l4_aarch64_asm.S +129 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l5_aarch64_asm.S +145 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l7_aarch64_asm.S +177 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/ntt_aarch64_asm.S +653 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/pointwise_montgomery_aarch64_asm.S +84 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_caddq_aarch64_asm.S +53 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_chknorm_aarch64_asm.S +55 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_32_aarch64_asm.S +86 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_88_aarch64_asm.S +86 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_32_aarch64_asm.S +103 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_88_aarch64_asm.S +111 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_17_aarch64_asm.S +75 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_19_aarch64_asm.S +72 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_table.c +23 -11
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_aarch64_asm.S +189 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta2_aarch64_asm.S +137 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta4_aarch64_asm.S +130 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta_table.c +520 -516
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_table.c +34 -33
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/api.h +202 -242
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/meta.h +25 -17
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/arith_native_x86_64.h +112 -28
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.c +1 -1
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.h +1 -1
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/intt_avx2_asm.S +2311 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/ntt_avx2_asm.S +2383 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/nttunpack_avx2_asm.S +238 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l4_avx2_asm.S +139 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l5_avx2_asm.S +155 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l7_avx2_asm.S +187 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_avx2_asm.S +130 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_caddq_avx2_asm.S +190 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_32_avx2.c +6 -4
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_88_avx2.c +6 -4
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_32_avx2.c +9 -8
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_88_avx2.c +10 -9
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_17_avx2.c +8 -5
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_19_avx2.c +8 -5
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta2_avx2.c +6 -4
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta4_avx2.c +6 -4
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_table.c +130 -129
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.c +109 -180
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.h +169 -150
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.c +56 -40
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.h +149 -164
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.c +52 -57
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.h +132 -167
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.c +57 -424
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.h +167 -474
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec_lazy.c +308 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec_lazy.h +653 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/reduce.h +22 -29
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/rounding.h +37 -43
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.c +511 -367
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.h +456 -417
- data/lib/pq_crypto/hybrid_kem.rb +1 -1
- data/lib/pq_crypto/internal.rb +23 -0
- data/lib/pq_crypto/kem.rb +27 -34
- data/lib/pq_crypto/pkcs8/der.rb +68 -0
- data/lib/pq_crypto/pkcs8/private_key_choice.rb +186 -0
- data/lib/pq_crypto/pkcs8.rb +51 -468
- data/lib/pq_crypto/serialization.rb +19 -29
- data/lib/pq_crypto/signature.rb +28 -35
- data/lib/pq_crypto/version.rb +1 -1
- data/lib/pq_crypto.rb +10 -0
- data/script/vendor_libs.rb +3 -3
- metadata +44 -35
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_asm.S +0 -376
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_asm.S +0 -204
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_asm.S +0 -259
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_asm.S +0 -1077
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S +0 -987
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.c +0 -488
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.h +0 -16
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/intt.S +0 -753
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l4.S +0 -129
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l5.S +0 -145
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l7.S +0 -177
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/ntt.S +0 -653
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/pointwise_montgomery.S +0 -79
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_caddq_asm.S +0 -53
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_chknorm_asm.S +0 -55
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_32_asm.S +0 -85
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_88_asm.S +0 -85
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_32_asm.S +0 -102
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_88_asm.S +0 -110
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_17_asm.S +0 -72
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_19_asm.S +0 -69
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_asm.S +0 -189
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta2_asm.S +0 -135
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta4_asm.S +0 -128
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/intt.S +0 -2311
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/ntt.S +0 -2383
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/nttunpack.S +0 -239
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise.S +0 -131
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l4.S +0 -139
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l5.S +0 -155
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l7.S +0 -187
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_caddq_avx2.c +0 -61
data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/keccak_f1600_x4_avx2_asm.S
ADDED
|
@@ -0,0 +1,454 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) The mlkem-native project authors
|
|
3
|
+
* Copyright (c) The mldsa-native project authors
|
|
4
|
+
* SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
|
|
5
|
+
*/
|
|
6
|
+
#include "../../../../common.h"
|
|
7
|
+
|
|
8
|
+
#if defined(MLD_FIPS202_X86_64_NEED_X4_AVX2) && \
|
|
9
|
+
!defined(MLD_CONFIG_MULTILEVEL_NO_SHARED)
|
|
10
|
+
|
|
11
|
+
/*
|
|
12
|
+
* WARNING: This file is auto-derived from the mldsa-native source file
|
|
13
|
+
* dev/fips202/x86_64/src/keccak_f1600_x4_avx2_asm.S using scripts/simpasm. Do not modify it directly.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
.text
|
|
17
|
+
.balign 4
|
|
18
|
+
.global MLD_ASM_NAMESPACE(keccak_f1600_x4_avx2_asm)
|
|
19
|
+
MLD_ASM_FN_SYMBOL(keccak_f1600_x4_avx2_asm)
|
|
20
|
+
|
|
21
|
+
.cfi_startproc
|
|
22
|
+
movq %rsp, %r11
|
|
23
|
+
.cfi_def_cfa_register %r11
|
|
24
|
+
andq $-0x20, %rsp
|
|
25
|
+
subq $0x300, %rsp # imm = 0x300
|
|
26
|
+
vmovdqu (%rdi), %ymm0
|
|
27
|
+
vmovdqu 0xc8(%rdi), %ymm3
|
|
28
|
+
vmovdqu 0x190(%rdi), %ymm1
|
|
29
|
+
vmovdqu 0x258(%rdi), %ymm4
|
|
30
|
+
vpunpcklqdq %ymm3, %ymm0, %ymm2 # ymm2 = ymm0[0],ymm3[0],ymm0[2],ymm3[2]
|
|
31
|
+
vpunpckhqdq %ymm3, %ymm0, %ymm0 # ymm0 = ymm0[1],ymm3[1],ymm0[3],ymm3[3]
|
|
32
|
+
vpunpcklqdq %ymm4, %ymm1, %ymm3 # ymm3 = ymm1[0],ymm4[0],ymm1[2],ymm4[2]
|
|
33
|
+
vperm2i128 $0x20, %ymm3, %ymm2, %ymm7 # ymm7 = ymm2[0,1],ymm3[0,1]
|
|
34
|
+
vpunpckhqdq %ymm4, %ymm1, %ymm1 # ymm1 = ymm1[1],ymm4[1],ymm1[3],ymm4[3]
|
|
35
|
+
vperm2i128 $0x31, %ymm3, %ymm2, %ymm3 # ymm3 = ymm2[2,3],ymm3[2,3]
|
|
36
|
+
vmovdqu 0x278(%rdi), %ymm4
|
|
37
|
+
vmovdqu %ymm3, 0x40(%rsp)
|
|
38
|
+
vperm2i128 $0x31, %ymm1, %ymm0, %ymm3 # ymm3 = ymm0[2,3],ymm1[2,3]
|
|
39
|
+
vmovdqu %ymm7, (%rsp)
|
|
40
|
+
vperm2i128 $0x20, %ymm1, %ymm0, %ymm7 # ymm7 = ymm0[0,1],ymm1[0,1]
|
|
41
|
+
vmovdqu 0x20(%rdi), %ymm0
|
|
42
|
+
vmovdqu 0x1b0(%rdi), %ymm1
|
|
43
|
+
vmovdqu %ymm3, 0x60(%rsp)
|
|
44
|
+
vmovdqu 0xe8(%rdi), %ymm3
|
|
45
|
+
vmovdqu %ymm7, 0x20(%rsp)
|
|
46
|
+
vpunpcklqdq %ymm3, %ymm0, %ymm2 # ymm2 = ymm0[0],ymm3[0],ymm0[2],ymm3[2]
|
|
47
|
+
vpunpckhqdq %ymm3, %ymm0, %ymm0 # ymm0 = ymm0[1],ymm3[1],ymm0[3],ymm3[3]
|
|
48
|
+
vpunpcklqdq %ymm4, %ymm1, %ymm3 # ymm3 = ymm1[0],ymm4[0],ymm1[2],ymm4[2]
|
|
49
|
+
vperm2i128 $0x20, %ymm3, %ymm2, %ymm7 # ymm7 = ymm2[0,1],ymm3[0,1]
|
|
50
|
+
vpunpckhqdq %ymm4, %ymm1, %ymm1 # ymm1 = ymm1[1],ymm4[1],ymm1[3],ymm4[3]
|
|
51
|
+
vperm2i128 $0x31, %ymm3, %ymm2, %ymm3 # ymm3 = ymm2[2,3],ymm3[2,3]
|
|
52
|
+
vmovdqu 0x298(%rdi), %ymm4
|
|
53
|
+
vperm2i128 $0x31, %ymm1, %ymm0, %ymm14 # ymm14 = ymm0[2,3],ymm1[2,3]
|
|
54
|
+
vmovdqu %ymm7, 0x80(%rsp)
|
|
55
|
+
vperm2i128 $0x20, %ymm1, %ymm0, %ymm7 # ymm7 = ymm0[0,1],ymm1[0,1]
|
|
56
|
+
vmovdqu 0x40(%rdi), %ymm0
|
|
57
|
+
vmovdqu 0x1d0(%rdi), %ymm1
|
|
58
|
+
vmovdqu %ymm3, 0xc0(%rsp)
|
|
59
|
+
vmovdqu 0x108(%rdi), %ymm3
|
|
60
|
+
vmovdqu %ymm14, %ymm10
|
|
61
|
+
vmovdqu %ymm7, 0xa0(%rsp)
|
|
62
|
+
vpunpcklqdq %ymm3, %ymm0, %ymm2 # ymm2 = ymm0[0],ymm3[0],ymm0[2],ymm3[2]
|
|
63
|
+
vpunpckhqdq %ymm3, %ymm0, %ymm0 # ymm0 = ymm0[1],ymm3[1],ymm0[3],ymm3[3]
|
|
64
|
+
vpunpcklqdq %ymm4, %ymm1, %ymm3 # ymm3 = ymm1[0],ymm4[0],ymm1[2],ymm4[2]
|
|
65
|
+
vpunpckhqdq %ymm4, %ymm1, %ymm1 # ymm1 = ymm1[1],ymm4[1],ymm1[3],ymm4[3]
|
|
66
|
+
vperm2i128 $0x20, %ymm3, %ymm2, %ymm11 # ymm11 = ymm2[0,1],ymm3[0,1]
|
|
67
|
+
vperm2i128 $0x31, %ymm3, %ymm2, %ymm3 # ymm3 = ymm2[2,3],ymm3[2,3]
|
|
68
|
+
vperm2i128 $0x20, %ymm1, %ymm0, %ymm7 # ymm7 = ymm0[0,1],ymm1[0,1]
|
|
69
|
+
vmovdqu %ymm3, 0x100(%rsp)
|
|
70
|
+
vperm2i128 $0x31, %ymm1, %ymm0, %ymm8 # ymm8 = ymm0[2,3],ymm1[2,3]
|
|
71
|
+
vmovdqu 0x128(%rdi), %ymm3
|
|
72
|
+
vmovdqu 0x60(%rdi), %ymm0
|
|
73
|
+
vmovdqu 0x1f0(%rdi), %ymm1
|
|
74
|
+
vmovdqu %ymm7, 0xe0(%rsp)
|
|
75
|
+
vmovdqu %ymm11, %ymm14
|
|
76
|
+
vmovdqu 0x2b8(%rdi), %ymm4
|
|
77
|
+
vmovdqu 0x2f8(%rdi), %ymm5
|
|
78
|
+
vpunpcklqdq %ymm3, %ymm0, %ymm2 # ymm2 = ymm0[0],ymm3[0],ymm0[2],ymm3[2]
|
|
79
|
+
vpunpckhqdq %ymm3, %ymm0, %ymm0 # ymm0 = ymm0[1],ymm3[1],ymm0[3],ymm3[3]
|
|
80
|
+
vpunpcklqdq %ymm4, %ymm1, %ymm3 # ymm3 = ymm1[0],ymm4[0],ymm1[2],ymm4[2]
|
|
81
|
+
vpunpckhqdq %ymm4, %ymm1, %ymm1 # ymm1 = ymm1[1],ymm4[1],ymm1[3],ymm4[3]
|
|
82
|
+
vmovdqu 0x2d8(%rdi), %ymm4
|
|
83
|
+
vperm2i128 $0x20, %ymm3, %ymm2, %ymm15 # ymm15 = ymm2[0,1],ymm3[0,1]
|
|
84
|
+
vperm2i128 $0x31, %ymm3, %ymm2, %ymm3 # ymm3 = ymm2[2,3],ymm3[2,3]
|
|
85
|
+
vperm2i128 $0x20, %ymm1, %ymm0, %ymm7 # ymm7 = ymm0[0,1],ymm1[0,1]
|
|
86
|
+
vperm2i128 $0x31, %ymm1, %ymm0, %ymm9 # ymm9 = ymm0[2,3],ymm1[2,3]
|
|
87
|
+
vmovdqu %ymm3, 0x140(%rsp)
|
|
88
|
+
vmovdqu 0x80(%rdi), %ymm0
|
|
89
|
+
vmovdqu 0x148(%rdi), %ymm3
|
|
90
|
+
vmovdqu 0x210(%rdi), %ymm1
|
|
91
|
+
vmovdqu %ymm7, 0x120(%rsp)
|
|
92
|
+
vpunpcklqdq %ymm3, %ymm0, %ymm2 # ymm2 = ymm0[0],ymm3[0],ymm0[2],ymm3[2]
|
|
93
|
+
vpunpckhqdq %ymm3, %ymm0, %ymm0 # ymm0 = ymm0[1],ymm3[1],ymm0[3],ymm3[3]
|
|
94
|
+
vpunpcklqdq %ymm4, %ymm1, %ymm3 # ymm3 = ymm1[0],ymm4[0],ymm1[2],ymm4[2]
|
|
95
|
+
vpunpckhqdq %ymm4, %ymm1, %ymm1 # ymm1 = ymm1[1],ymm4[1],ymm1[3],ymm4[3]
|
|
96
|
+
vperm2i128 $0x20, %ymm3, %ymm2, %ymm7 # ymm7 = ymm2[0,1],ymm3[0,1]
|
|
97
|
+
vperm2i128 $0x31, %ymm3, %ymm2, %ymm13 # ymm13 = ymm2[2,3],ymm3[2,3]
|
|
98
|
+
vperm2i128 $0x31, %ymm1, %ymm0, %ymm3 # ymm3 = ymm0[2,3],ymm1[2,3]
|
|
99
|
+
vmovdqu %ymm7, 0x160(%rsp)
|
|
100
|
+
vperm2i128 $0x20, %ymm1, %ymm0, %ymm7 # ymm7 = ymm0[0,1],ymm1[0,1]
|
|
101
|
+
vmovdqu 0xa0(%rdi), %ymm0
|
|
102
|
+
vmovdqu 0x230(%rdi), %ymm1
|
|
103
|
+
vmovdqu %ymm3, 0x1a0(%rsp)
|
|
104
|
+
vmovdqu 0x168(%rdi), %ymm3
|
|
105
|
+
vpunpcklqdq %ymm5, %ymm1, %ymm4 # ymm4 = ymm1[0],ymm5[0],ymm1[2],ymm5[2]
|
|
106
|
+
vpunpckhqdq %ymm5, %ymm1, %ymm1 # ymm1 = ymm1[1],ymm5[1],ymm1[3],ymm5[3]
|
|
107
|
+
vmovdqu %ymm7, 0x180(%rsp)
|
|
108
|
+
vpunpcklqdq %ymm3, %ymm0, %ymm2 # ymm2 = ymm0[0],ymm3[0],ymm0[2],ymm3[2]
|
|
109
|
+
vpunpckhqdq %ymm3, %ymm0, %ymm0 # ymm0 = ymm0[1],ymm3[1],ymm0[3],ymm3[3]
|
|
110
|
+
vperm2i128 $0x20, %ymm4, %ymm2, %ymm12 # ymm12 = ymm2[0,1],ymm4[0,1]
|
|
111
|
+
vperm2i128 $0x20, %ymm1, %ymm0, %ymm3 # ymm3 = ymm0[0,1],ymm1[0,1]
|
|
112
|
+
vperm2i128 $0x31, %ymm4, %ymm2, %ymm7 # ymm7 = ymm2[2,3],ymm4[2,3]
|
|
113
|
+
vperm2i128 $0x31, %ymm1, %ymm0, %ymm4 # ymm4 = ymm0[2,3],ymm1[2,3]
|
|
114
|
+
vmovq 0x250(%rdi), %xmm0
|
|
115
|
+
vmovq 0xc0(%rdi), %xmm1
|
|
116
|
+
vmovdqu %ymm12, 0x1c0(%rsp)
|
|
117
|
+
vmovdqu %ymm4, 0x1e0(%rsp)
|
|
118
|
+
vpinsrq $0x1, 0x318(%rdi), %xmm0, %xmm0
|
|
119
|
+
vpinsrq $0x1, 0x188(%rdi), %xmm1, %xmm1
|
|
120
|
+
vinserti128 $0x1, %xmm0, %ymm1, %ymm2
|
|
121
|
+
movq $0x0, %r10
|
|
122
|
+
|
|
123
|
+
LLkeccak_f1600_x4_avx2_asm:
|
|
124
|
+
vmovdqu 0xa0(%rsp), %ymm4
|
|
125
|
+
vpxor 0x1c0(%rsp), %ymm9, %ymm0
|
|
126
|
+
vmovdqu %ymm9, 0x200(%rsp)
|
|
127
|
+
vmovdqu %ymm10, %ymm9
|
|
128
|
+
vmovdqu 0xc0(%rsp), %ymm11
|
|
129
|
+
vmovdqu 0x160(%rsp), %ymm12
|
|
130
|
+
vmovdqu %ymm3, 0x240(%rsp)
|
|
131
|
+
vpxor 0x100(%rsp), %ymm4, %ymm1
|
|
132
|
+
vmovdqu 0x40(%rsp), %ymm10
|
|
133
|
+
vmovdqu %ymm4, 0x220(%rsp)
|
|
134
|
+
vpxor %ymm3, %ymm12, %ymm12
|
|
135
|
+
vmovdqu 0x20(%rsp), %ymm6
|
|
136
|
+
vmovdqu 0x140(%rsp), %ymm4
|
|
137
|
+
vmovdqu %ymm14, 0x2a0(%rsp)
|
|
138
|
+
vpxor %ymm1, %ymm0, %ymm0
|
|
139
|
+
vpxor %ymm8, %ymm11, %ymm1
|
|
140
|
+
vpxor 0x180(%rsp), %ymm7, %ymm11
|
|
141
|
+
vmovdqu %ymm10, 0x280(%rsp)
|
|
142
|
+
vpxor %ymm1, %ymm12, %ymm12
|
|
143
|
+
vpxor %ymm15, %ymm9, %ymm1
|
|
144
|
+
vmovdqu 0xe0(%rsp), %ymm3
|
|
145
|
+
vmovdqu %ymm8, 0x260(%rsp)
|
|
146
|
+
vpxor %ymm1, %ymm11, %ymm11
|
|
147
|
+
vpxor 0x120(%rsp), %ymm14, %ymm1
|
|
148
|
+
vpxor %ymm6, %ymm12, %ymm12
|
|
149
|
+
vmovdqu 0x60(%rsp), %ymm8
|
|
150
|
+
vpxor %ymm10, %ymm11, %ymm11
|
|
151
|
+
vpxor 0x1e0(%rsp), %ymm13, %ymm10
|
|
152
|
+
vpxor %ymm4, %ymm3, %ymm3
|
|
153
|
+
vmovdqu %ymm4, 0x2c0(%rsp)
|
|
154
|
+
vpsrlq $0x3f, %ymm12, %ymm4
|
|
155
|
+
vpsrlq $0x3f, %ymm11, %ymm5
|
|
156
|
+
vpxor (%rsp), %ymm0, %ymm0
|
|
157
|
+
vpxor %ymm1, %ymm10, %ymm10
|
|
158
|
+
vmovdqu 0x80(%rsp), %ymm1
|
|
159
|
+
vpxor %ymm8, %ymm10, %ymm10
|
|
160
|
+
vmovdqu %ymm1, %ymm14
|
|
161
|
+
vpxor 0x1a0(%rsp), %ymm2, %ymm1
|
|
162
|
+
vmovdqu %ymm14, 0x2e0(%rsp)
|
|
163
|
+
vpxor %ymm3, %ymm1, %ymm1
|
|
164
|
+
vpsllq $0x1, %ymm12, %ymm3
|
|
165
|
+
vpor %ymm4, %ymm3, %ymm3
|
|
166
|
+
vpsllq $0x1, %ymm11, %ymm4
|
|
167
|
+
vpxor %ymm14, %ymm1, %ymm1
|
|
168
|
+
vpor %ymm5, %ymm4, %ymm4
|
|
169
|
+
vpsrlq $0x3f, %ymm10, %ymm14
|
|
170
|
+
vpxor %ymm1, %ymm3, %ymm3
|
|
171
|
+
vpsllq $0x1, %ymm10, %ymm5
|
|
172
|
+
vpxor %ymm0, %ymm4, %ymm4
|
|
173
|
+
vpor %ymm14, %ymm5, %ymm5
|
|
174
|
+
vpxor %ymm6, %ymm4, %ymm6
|
|
175
|
+
vpxor %ymm12, %ymm5, %ymm5
|
|
176
|
+
vpsrlq $0x3f, %ymm1, %ymm12
|
|
177
|
+
vpsllq $0x1, %ymm1, %ymm1
|
|
178
|
+
vpxor %ymm7, %ymm5, %ymm7
|
|
179
|
+
vpxor %ymm9, %ymm5, %ymm9
|
|
180
|
+
vpor %ymm12, %ymm1, %ymm1
|
|
181
|
+
vpxor (%rsp), %ymm3, %ymm12
|
|
182
|
+
vpxor %ymm11, %ymm1, %ymm1
|
|
183
|
+
vpsrlq $0x3f, %ymm0, %ymm11
|
|
184
|
+
vpsllq $0x1, %ymm0, %ymm0
|
|
185
|
+
vpxor %ymm13, %ymm1, %ymm13
|
|
186
|
+
vpxor %ymm8, %ymm1, %ymm8
|
|
187
|
+
vpor %ymm11, %ymm0, %ymm0
|
|
188
|
+
vpxor %ymm10, %ymm0, %ymm0
|
|
189
|
+
vpxor 0xc0(%rsp), %ymm4, %ymm10
|
|
190
|
+
vpxor %ymm2, %ymm0, %ymm2
|
|
191
|
+
vpsrlq $0x14, %ymm10, %ymm11
|
|
192
|
+
vpsllq $0x2c, %ymm10, %ymm10
|
|
193
|
+
vpor %ymm11, %ymm10, %ymm10
|
|
194
|
+
vpxor %ymm15, %ymm5, %ymm11
|
|
195
|
+
vpbroadcastq (%rsi), %ymm15
|
|
196
|
+
vpsrlq $0x15, %ymm11, %ymm14
|
|
197
|
+
vpsllq $0x2b, %ymm11, %ymm11
|
|
198
|
+
vpor %ymm14, %ymm11, %ymm11
|
|
199
|
+
vpandn %ymm11, %ymm10, %ymm14
|
|
200
|
+
vpxor %ymm15, %ymm14, %ymm14
|
|
201
|
+
vpxor %ymm12, %ymm14, %ymm15
|
|
202
|
+
vpsrlq $0x2b, %ymm13, %ymm14
|
|
203
|
+
vpsllq $0x15, %ymm13, %ymm13
|
|
204
|
+
vmovdqu %ymm15, (%rsp)
|
|
205
|
+
vpor %ymm14, %ymm13, %ymm13
|
|
206
|
+
vpandn %ymm13, %ymm11, %ymm14
|
|
207
|
+
vpxor %ymm10, %ymm14, %ymm15
|
|
208
|
+
vpsrlq $0x32, %ymm2, %ymm14
|
|
209
|
+
vpsllq $0xe, %ymm2, %ymm2
|
|
210
|
+
vmovdqu %ymm15, 0x20(%rsp)
|
|
211
|
+
vpor %ymm14, %ymm2, %ymm2
|
|
212
|
+
vpandn %ymm2, %ymm13, %ymm14
|
|
213
|
+
vpxor %ymm11, %ymm14, %ymm11
|
|
214
|
+
vmovdqu %ymm11, 0x40(%rsp)
|
|
215
|
+
vpandn %ymm12, %ymm2, %ymm11
|
|
216
|
+
vpandn %ymm10, %ymm12, %ymm12
|
|
217
|
+
vpxor %ymm13, %ymm11, %ymm11
|
|
218
|
+
vmovdqu %ymm11, 0x60(%rsp)
|
|
219
|
+
vpxor %ymm2, %ymm12, %ymm11
|
|
220
|
+
vpsrlq $0x24, %ymm8, %ymm2
|
|
221
|
+
vpsllq $0x1c, %ymm8, %ymm8
|
|
222
|
+
vmovdqu %ymm11, 0x80(%rsp)
|
|
223
|
+
vpor %ymm2, %ymm8, %ymm8
|
|
224
|
+
vpxor 0xe0(%rsp), %ymm0, %ymm2
|
|
225
|
+
vpsrlq $0x2c, %ymm2, %ymm10
|
|
226
|
+
vpsllq $0x14, %ymm2, %ymm2
|
|
227
|
+
vpor %ymm10, %ymm2, %ymm2
|
|
228
|
+
vpxor 0x100(%rsp), %ymm3, %ymm10
|
|
229
|
+
vpsrlq $0x3d, %ymm10, %ymm11
|
|
230
|
+
vpsllq $0x3, %ymm10, %ymm10
|
|
231
|
+
vpor %ymm11, %ymm10, %ymm10
|
|
232
|
+
vpandn %ymm10, %ymm2, %ymm11
|
|
233
|
+
vpxor %ymm8, %ymm11, %ymm11
|
|
234
|
+
vmovdqu %ymm11, 0xa0(%rsp)
|
|
235
|
+
vpxor 0x160(%rsp), %ymm4, %ymm11
|
|
236
|
+
vpsrlq $0x13, %ymm11, %ymm12
|
|
237
|
+
vpsllq $0x2d, %ymm11, %ymm11
|
|
238
|
+
vpor %ymm12, %ymm11, %ymm11
|
|
239
|
+
vpandn %ymm11, %ymm10, %ymm12
|
|
240
|
+
vpxor %ymm2, %ymm12, %ymm12
|
|
241
|
+
vmovdqu %ymm12, 0xc0(%rsp)
|
|
242
|
+
vpsrlq $0x3, %ymm7, %ymm12
|
|
243
|
+
vpsllq $0x3d, %ymm7, %ymm7
|
|
244
|
+
vpor %ymm12, %ymm7, %ymm7
|
|
245
|
+
vpandn %ymm7, %ymm11, %ymm12
|
|
246
|
+
vpxor %ymm10, %ymm12, %ymm10
|
|
247
|
+
vpandn %ymm8, %ymm7, %ymm12
|
|
248
|
+
vpandn %ymm2, %ymm8, %ymm8
|
|
249
|
+
vpsrlq $0x3f, %ymm6, %ymm2
|
|
250
|
+
vpsllq $0x1, %ymm6, %ymm6
|
|
251
|
+
vpxor %ymm11, %ymm12, %ymm14
|
|
252
|
+
vpor %ymm2, %ymm6, %ymm6
|
|
253
|
+
vpsrlq $0x3a, %ymm9, %ymm2
|
|
254
|
+
vpxor %ymm7, %ymm8, %ymm12
|
|
255
|
+
vpsllq $0x6, %ymm9, %ymm9
|
|
256
|
+
vmovdqu %ymm12, 0xe0(%rsp)
|
|
257
|
+
vpxor 0x1a0(%rsp), %ymm0, %ymm7
|
|
258
|
+
vpor %ymm2, %ymm9, %ymm9
|
|
259
|
+
vpxor 0x120(%rsp), %ymm1, %ymm2
|
|
260
|
+
vpshufb (%rdx), %ymm7, %ymm7
|
|
261
|
+
vpsrlq $0x27, %ymm2, %ymm11
|
|
262
|
+
vpsllq $0x19, %ymm2, %ymm2
|
|
263
|
+
vpor %ymm2, %ymm11, %ymm11
|
|
264
|
+
vpandn %ymm11, %ymm9, %ymm2
|
|
265
|
+
vpandn %ymm7, %ymm11, %ymm8
|
|
266
|
+
vpxor %ymm6, %ymm2, %ymm12
|
|
267
|
+
vpxor 0x1c0(%rsp), %ymm3, %ymm2
|
|
268
|
+
vpxor %ymm9, %ymm8, %ymm8
|
|
269
|
+
vmovdqu %ymm12, 0x100(%rsp)
|
|
270
|
+
vpsrlq $0x2e, %ymm2, %ymm12
|
|
271
|
+
vpsllq $0x12, %ymm2, %ymm2
|
|
272
|
+
vpor %ymm2, %ymm12, %ymm2
|
|
273
|
+
vpandn %ymm2, %ymm7, %ymm12
|
|
274
|
+
vpxor %ymm11, %ymm12, %ymm15
|
|
275
|
+
vpandn %ymm6, %ymm2, %ymm11
|
|
276
|
+
vpandn %ymm9, %ymm6, %ymm6
|
|
277
|
+
vpxor %ymm7, %ymm11, %ymm12
|
|
278
|
+
vmovdqu %ymm12, 0x120(%rsp)
|
|
279
|
+
vpxor %ymm2, %ymm6, %ymm12
|
|
280
|
+
vpxor 0x2e0(%rsp), %ymm0, %ymm6
|
|
281
|
+
vpxor 0x2c0(%rsp), %ymm0, %ymm0
|
|
282
|
+
vmovdqu %ymm12, 0x140(%rsp)
|
|
283
|
+
vpsrlq $0x25, %ymm6, %ymm2
|
|
284
|
+
vpsllq $0x1b, %ymm6, %ymm6
|
|
285
|
+
vpor %ymm6, %ymm2, %ymm2
|
|
286
|
+
vpxor 0x220(%rsp), %ymm3, %ymm6
|
|
287
|
+
vpxor 0x200(%rsp), %ymm3, %ymm3
|
|
288
|
+
vpsrlq $0x1c, %ymm6, %ymm7
|
|
289
|
+
vpsllq $0x24, %ymm6, %ymm6
|
|
290
|
+
vpor %ymm6, %ymm7, %ymm7
|
|
291
|
+
vpxor 0x260(%rsp), %ymm4, %ymm6
|
|
292
|
+
vpxor 0x240(%rsp), %ymm4, %ymm4
|
|
293
|
+
vpsrlq $0x36, %ymm6, %ymm12
|
|
294
|
+
vpsllq $0xa, %ymm6, %ymm6
|
|
295
|
+
vpor %ymm6, %ymm12, %ymm12
|
|
296
|
+
vpxor 0x180(%rsp), %ymm5, %ymm6
|
|
297
|
+
vpxor 0x280(%rsp), %ymm5, %ymm5
|
|
298
|
+
vpandn %ymm12, %ymm7, %ymm9
|
|
299
|
+
vpsrlq $0x31, %ymm6, %ymm11
|
|
300
|
+
vpsllq $0xf, %ymm6, %ymm6
|
|
301
|
+
vpxor %ymm2, %ymm9, %ymm9
|
|
302
|
+
vpor %ymm6, %ymm11, %ymm11
|
|
303
|
+
vpandn %ymm11, %ymm12, %ymm6
|
|
304
|
+
vpxor %ymm7, %ymm6, %ymm6
|
|
305
|
+
vmovdqu %ymm6, 0x160(%rsp)
|
|
306
|
+
vpxor 0x1e0(%rsp), %ymm1, %ymm6
|
|
307
|
+
vpxor 0x2a0(%rsp), %ymm1, %ymm1
|
|
308
|
+
vpshufb (%rcx), %ymm6, %ymm6
|
|
309
|
+
vpandn %ymm6, %ymm11, %ymm13
|
|
310
|
+
vpxor %ymm12, %ymm13, %ymm13
|
|
311
|
+
vmovdqu %ymm13, 0x180(%rsp)
|
|
312
|
+
vpandn %ymm2, %ymm6, %ymm13
|
|
313
|
+
vpandn %ymm7, %ymm2, %ymm2
|
|
314
|
+
vpxor %ymm6, %ymm2, %ymm2
|
|
315
|
+
vpsrlq $0x3e, %ymm4, %ymm6
|
|
316
|
+
vpxor %ymm11, %ymm13, %ymm13
|
|
317
|
+
vmovdqu %ymm2, 0x1a0(%rsp)
|
|
318
|
+
vpsrlq $0x2, %ymm5, %ymm2
|
|
319
|
+
vpsllq $0x3e, %ymm5, %ymm5
|
|
320
|
+
vpor %ymm5, %ymm2, %ymm2
|
|
321
|
+
vpsrlq $0x9, %ymm1, %ymm5
|
|
322
|
+
vpsllq $0x37, %ymm1, %ymm1
|
|
323
|
+
vpsllq $0x2, %ymm4, %ymm4
|
|
324
|
+
vpor %ymm1, %ymm5, %ymm1
|
|
325
|
+
vpsrlq $0x19, %ymm0, %ymm5
|
|
326
|
+
vpor %ymm4, %ymm6, %ymm4
|
|
327
|
+
vpsllq $0x27, %ymm0, %ymm0
|
|
328
|
+
vpor %ymm0, %ymm5, %ymm5
|
|
329
|
+
vpandn %ymm5, %ymm1, %ymm0
|
|
330
|
+
vpxor %ymm2, %ymm0, %ymm0
|
|
331
|
+
vmovdqu %ymm0, 0x1c0(%rsp)
|
|
332
|
+
vpsrlq $0x17, %ymm3, %ymm0
|
|
333
|
+
vpsllq $0x29, %ymm3, %ymm3
|
|
334
|
+
vpor %ymm3, %ymm0, %ymm0
|
|
335
|
+
vpandn %ymm4, %ymm0, %ymm7
|
|
336
|
+
vpandn %ymm0, %ymm5, %ymm3
|
|
337
|
+
vpxor %ymm5, %ymm7, %ymm7
|
|
338
|
+
vpandn %ymm2, %ymm4, %ymm5
|
|
339
|
+
vpandn %ymm1, %ymm2, %ymm2
|
|
340
|
+
vpxor %ymm0, %ymm5, %ymm5
|
|
341
|
+
vpxor %ymm1, %ymm3, %ymm3
|
|
342
|
+
vpxor %ymm4, %ymm2, %ymm2
|
|
343
|
+
vmovdqu %ymm5, 0x1e0(%rsp)
|
|
344
|
+
addq $0x8, %rsi
|
|
345
|
+
addq $0x1, %r10
|
|
346
|
+
cmpq $0x18, %r10
|
|
347
|
+
jne LLkeccak_f1600_x4_avx2_asm
|
|
348
|
+
vmovdqu (%rsp), %ymm4
|
|
349
|
+
vmovdqu 0x40(%rsp), %ymm5
|
|
350
|
+
vmovdqu 0x20(%rsp), %ymm0
|
|
351
|
+
vmovdqu 0x60(%rsp), %ymm1
|
|
352
|
+
vmovdqu 0x1c0(%rsp), %ymm12
|
|
353
|
+
vmovdqu %ymm2, 0x1c0(%rsp)
|
|
354
|
+
vpunpcklqdq %ymm0, %ymm4, %ymm2 # ymm2 = ymm4[0],ymm0[0],ymm4[2],ymm0[2]
|
|
355
|
+
vpunpckhqdq %ymm0, %ymm4, %ymm0 # ymm0 = ymm4[1],ymm0[1],ymm4[3],ymm0[3]
|
|
356
|
+
vpunpcklqdq %ymm1, %ymm5, %ymm4 # ymm4 = ymm5[0],ymm1[0],ymm5[2],ymm1[2]
|
|
357
|
+
vpunpckhqdq %ymm1, %ymm5, %ymm1 # ymm1 = ymm5[1],ymm1[1],ymm5[3],ymm1[3]
|
|
358
|
+
vperm2i128 $0x20, %ymm4, %ymm2, %ymm6 # ymm6 = ymm2[0,1],ymm4[0,1]
|
|
359
|
+
vperm2i128 $0x31, %ymm4, %ymm2, %ymm2 # ymm2 = ymm2[2,3],ymm4[2,3]
|
|
360
|
+
vmovdqu 0x80(%rsp), %ymm4
|
|
361
|
+
vperm2i128 $0x20, %ymm1, %ymm0, %ymm5 # ymm5 = ymm0[0,1],ymm1[0,1]
|
|
362
|
+
vperm2i128 $0x31, %ymm1, %ymm0, %ymm0 # ymm0 = ymm0[2,3],ymm1[2,3]
|
|
363
|
+
vmovdqu %ymm6, (%rdi)
|
|
364
|
+
vmovdqu %ymm5, 0xc8(%rdi)
|
|
365
|
+
vmovdqu %ymm2, 0x190(%rdi)
|
|
366
|
+
vmovdqu %ymm0, 0x258(%rdi)
|
|
367
|
+
vmovdqu 0xa0(%rsp), %ymm0
|
|
368
|
+
vpunpcklqdq %ymm0, %ymm4, %ymm2 # ymm2 = ymm4[0],ymm0[0],ymm4[2],ymm0[2]
|
|
369
|
+
vpunpckhqdq %ymm0, %ymm4, %ymm1 # ymm1 = ymm4[1],ymm0[1],ymm4[3],ymm0[3]
|
|
370
|
+
vmovdqu 0xc0(%rsp), %ymm0
|
|
371
|
+
vpunpcklqdq %ymm10, %ymm0, %ymm4 # ymm4 = ymm0[0],ymm10[0],ymm0[2],ymm10[2]
|
|
372
|
+
vpunpckhqdq %ymm10, %ymm0, %ymm0 # ymm0 = ymm0[1],ymm10[1],ymm0[3],ymm10[3]
|
|
373
|
+
vperm2i128 $0x20, %ymm4, %ymm2, %ymm6 # ymm6 = ymm2[0,1],ymm4[0,1]
|
|
374
|
+
vperm2i128 $0x20, %ymm0, %ymm1, %ymm5 # ymm5 = ymm1[0,1],ymm0[0,1]
|
|
375
|
+
vperm2i128 $0x31, %ymm4, %ymm2, %ymm2 # ymm2 = ymm2[2,3],ymm4[2,3]
|
|
376
|
+
vmovdqu 0xe0(%rsp), %ymm4
|
|
377
|
+
vperm2i128 $0x31, %ymm0, %ymm1, %ymm1 # ymm1 = ymm1[2,3],ymm0[2,3]
|
|
378
|
+
vmovdqu 0x100(%rsp), %ymm0
|
|
379
|
+
vmovdqu %ymm2, 0x1b0(%rdi)
|
|
380
|
+
vmovdqu %ymm1, 0x278(%rdi)
|
|
381
|
+
vpunpcklqdq %ymm4, %ymm14, %ymm2 # ymm2 = ymm14[0],ymm4[0],ymm14[2],ymm4[2]
|
|
382
|
+
vpunpckhqdq %ymm4, %ymm14, %ymm1 # ymm1 = ymm14[1],ymm4[1],ymm14[3],ymm4[3]
|
|
383
|
+
vpunpcklqdq %ymm8, %ymm0, %ymm4 # ymm4 = ymm0[0],ymm8[0],ymm0[2],ymm8[2]
|
|
384
|
+
vpunpckhqdq %ymm8, %ymm0, %ymm0 # ymm0 = ymm0[1],ymm8[1],ymm0[3],ymm8[3]
|
|
385
|
+
vmovdqu %ymm6, 0x20(%rdi)
|
|
386
|
+
vmovdqu %ymm5, 0xe8(%rdi)
|
|
387
|
+
vperm2i128 $0x20, %ymm4, %ymm2, %ymm6 # ymm6 = ymm2[0,1],ymm4[0,1]
|
|
388
|
+
vperm2i128 $0x20, %ymm0, %ymm1, %ymm5 # ymm5 = ymm1[0,1],ymm0[0,1]
|
|
389
|
+
vperm2i128 $0x31, %ymm4, %ymm2, %ymm2 # ymm2 = ymm2[2,3],ymm4[2,3]
|
|
390
|
+
vperm2i128 $0x31, %ymm0, %ymm1, %ymm1 # ymm1 = ymm1[2,3],ymm0[2,3]
|
|
391
|
+
vmovdqu 0x120(%rsp), %ymm4
|
|
392
|
+
vmovdqu 0x140(%rsp), %ymm0
|
|
393
|
+
vmovdqu %ymm2, 0x1d0(%rdi)
|
|
394
|
+
vmovdqu %ymm1, 0x298(%rdi)
|
|
395
|
+
vpunpcklqdq %ymm4, %ymm15, %ymm2 # ymm2 = ymm15[0],ymm4[0],ymm15[2],ymm4[2]
|
|
396
|
+
vpunpckhqdq %ymm4, %ymm15, %ymm1 # ymm1 = ymm15[1],ymm4[1],ymm15[3],ymm4[3]
|
|
397
|
+
vpunpcklqdq %ymm9, %ymm0, %ymm4 # ymm4 = ymm0[0],ymm9[0],ymm0[2],ymm9[2]
|
|
398
|
+
vmovdqu %ymm5, 0x108(%rdi)
|
|
399
|
+
vpunpckhqdq %ymm9, %ymm0, %ymm0 # ymm0 = ymm0[1],ymm9[1],ymm0[3],ymm9[3]
|
|
400
|
+
vmovdqu %ymm6, 0x40(%rdi)
|
|
401
|
+
vperm2i128 $0x20, %ymm4, %ymm2, %ymm6 # ymm6 = ymm2[0,1],ymm4[0,1]
|
|
402
|
+
vperm2i128 $0x31, %ymm4, %ymm2, %ymm2 # ymm2 = ymm2[2,3],ymm4[2,3]
|
|
403
|
+
vperm2i128 $0x20, %ymm0, %ymm1, %ymm5 # ymm5 = ymm1[0,1],ymm0[0,1]
|
|
404
|
+
vmovdqu 0x160(%rsp), %ymm4
|
|
405
|
+
vperm2i128 $0x31, %ymm0, %ymm1, %ymm1 # ymm1 = ymm1[2,3],ymm0[2,3]
|
|
406
|
+
vmovdqu 0x180(%rsp), %ymm0
|
|
407
|
+
vmovdqu %ymm5, 0x128(%rdi)
|
|
408
|
+
vmovdqu 0x1a0(%rsp), %ymm5
|
|
409
|
+
vmovdqu %ymm2, 0x1f0(%rdi)
|
|
410
|
+
vpunpcklqdq %ymm0, %ymm4, %ymm2 # ymm2 = ymm4[0],ymm0[0],ymm4[2],ymm0[2]
|
|
411
|
+
vpunpckhqdq %ymm0, %ymm4, %ymm0 # ymm0 = ymm4[1],ymm0[1],ymm4[3],ymm0[3]
|
|
412
|
+
vpunpcklqdq %ymm5, %ymm13, %ymm4 # ymm4 = ymm13[0],ymm5[0],ymm13[2],ymm5[2]
|
|
413
|
+
vmovdqu %ymm6, 0x60(%rdi)
|
|
414
|
+
vperm2i128 $0x20, %ymm4, %ymm2, %ymm6 # ymm6 = ymm2[0,1],ymm4[0,1]
|
|
415
|
+
vmovdqu %ymm1, 0x2b8(%rdi)
|
|
416
|
+
vperm2i128 $0x31, %ymm4, %ymm2, %ymm2 # ymm2 = ymm2[2,3],ymm4[2,3]
|
|
417
|
+
vpunpckhqdq %ymm5, %ymm13, %ymm1 # ymm1 = ymm13[1],ymm5[1],ymm13[3],ymm5[3]
|
|
418
|
+
vmovdqu %ymm6, 0x80(%rdi)
|
|
419
|
+
vmovdqu 0x1e0(%rsp), %ymm4
|
|
420
|
+
vperm2i128 $0x20, %ymm1, %ymm0, %ymm5 # ymm5 = ymm0[0,1],ymm1[0,1]
|
|
421
|
+
vperm2i128 $0x31, %ymm1, %ymm0, %ymm0 # ymm0 = ymm0[2,3],ymm1[2,3]
|
|
422
|
+
vmovdqu %ymm2, 0x210(%rdi)
|
|
423
|
+
vpunpcklqdq %ymm3, %ymm12, %ymm2 # ymm2 = ymm12[0],ymm3[0],ymm12[2],ymm3[2]
|
|
424
|
+
vmovdqu %ymm0, 0x2d8(%rdi)
|
|
425
|
+
vpunpckhqdq %ymm3, %ymm12, %ymm0 # ymm0 = ymm12[1],ymm3[1],ymm12[3],ymm3[3]
|
|
426
|
+
vpunpcklqdq %ymm4, %ymm7, %ymm3 # ymm3 = ymm7[0],ymm4[0],ymm7[2],ymm4[2]
|
|
427
|
+
vpunpckhqdq %ymm4, %ymm7, %ymm1 # ymm1 = ymm7[1],ymm4[1],ymm7[3],ymm4[3]
|
|
428
|
+
vmovdqu %ymm5, 0x148(%rdi)
|
|
429
|
+
vperm2i128 $0x20, %ymm3, %ymm2, %ymm5 # ymm5 = ymm2[0,1],ymm3[0,1]
|
|
430
|
+
vperm2i128 $0x31, %ymm3, %ymm2, %ymm2 # ymm2 = ymm2[2,3],ymm3[2,3]
|
|
431
|
+
vmovdqu 0x1c0(%rsp), %ymm3
|
|
432
|
+
vperm2i128 $0x20, %ymm1, %ymm0, %ymm4 # ymm4 = ymm0[0,1],ymm1[0,1]
|
|
433
|
+
vperm2i128 $0x31, %ymm1, %ymm0, %ymm0 # ymm0 = ymm0[2,3],ymm1[2,3]
|
|
434
|
+
vmovdqu %ymm5, 0xa0(%rdi)
|
|
435
|
+
vextracti128 $0x1, %ymm3, %xmm15
|
|
436
|
+
vmovdqu %ymm4, 0x168(%rdi)
|
|
437
|
+
vmovdqu %ymm2, 0x230(%rdi)
|
|
438
|
+
vmovdqu %ymm0, 0x2f8(%rdi)
|
|
439
|
+
vmovq %xmm3, 0xc0(%rdi)
|
|
440
|
+
vmovhpd %xmm3, 0x188(%rdi)
|
|
441
|
+
vmovq %xmm15, 0x250(%rdi)
|
|
442
|
+
vmovhpd %xmm15, 0x318(%rdi)
|
|
443
|
+
movq %r11, %rsp
|
|
444
|
+
.cfi_def_cfa_register %rsp
|
|
445
|
+
retq
|
|
446
|
+
.cfi_endproc
|
|
447
|
+
|
|
448
|
+
MLD_ASM_FN_SIZE(keccak_f1600_x4_avx2_asm)
|
|
449
|
+
|
|
450
|
+
#endif /* MLD_FIPS202_X86_64_NEED_X4_AVX2 && !MLD_CONFIG_MULTILEVEL_NO_SHARED */
|
|
451
|
+
|
|
452
|
+
#if defined(__ELF__)
|
|
453
|
+
.section .note.GNU-stack,"",%progbits
|
|
454
|
+
#endif
|
data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/keccakf1600_constants.c
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) The mldsa-native project authors
|
|
3
|
+
* SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
/*
|
|
7
|
+
* WARNING: This file is auto-generated from scripts/autogen
|
|
8
|
+
* in the mldsa-native repository.
|
|
9
|
+
* Do not modify it directly.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
#include "../../../../common.h"
|
|
13
|
+
#if defined(MLD_FIPS202_X86_64_NEED_X4_AVX2) && \
|
|
14
|
+
!defined(MLD_CONFIG_MULTILEVEL_NO_SHARED)
|
|
15
|
+
|
|
16
|
+
#include <stdint.h>
|
|
17
|
+
|
|
18
|
+
#include "fips202_native_x86_64.h"
|
|
19
|
+
|
|
20
|
+
MLD_ALIGN MLD_INTERNAL_DATA_DEFINITION const uint64_t
|
|
21
|
+
mld_keccakf1600_round_constants[24] = {
|
|
22
|
+
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
|
|
23
|
+
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
|
|
24
|
+
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
|
|
25
|
+
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
|
|
26
|
+
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
|
|
27
|
+
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
|
|
28
|
+
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
|
|
29
|
+
0x8000000000008080, 0x0000000080000001, 0x8000000080008008,
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
MLD_ALIGN MLD_INTERNAL_DATA_DEFINITION const uint64_t mld_keccak_rho8[4] = {
|
|
33
|
+
0x0605040302010007,
|
|
34
|
+
0x0e0d0c0b0a09080f,
|
|
35
|
+
0x1615141312111017,
|
|
36
|
+
0x1e1d1c1b1a19181f,
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
MLD_ALIGN MLD_INTERNAL_DATA_DEFINITION const uint64_t mld_keccak_rho56[4] = {
|
|
40
|
+
0x0007060504030201,
|
|
41
|
+
0x080f0e0d0c0b0a09,
|
|
42
|
+
0x1017161514131211,
|
|
43
|
+
0x181f1e1d1c1b1a19,
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
#else /* MLD_FIPS202_X86_64_NEED_X4_AVX2 && !MLD_CONFIG_MULTILEVEL_NO_SHARED \
|
|
47
|
+
*/
|
|
48
|
+
|
|
49
|
+
MLD_EMPTY_CU(fips202_x86_64_constants)
|
|
50
|
+
|
|
51
|
+
#endif /* !(MLD_FIPS202_X86_64_NEED_X4_AVX2 && \
|
|
52
|
+
!MLD_CONFIG_MULTILEVEL_NO_SHARED) */
|