pq_crypto 0.6.1 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/SECURITY.md +7 -0
- data/ext/pqcrypto/pqcrypto_version.h +1 -1
- data/ext/pqcrypto/vendor/.vendored +4 -4
- data/ext/pqcrypto/vendor/mldsa-native/README.md +23 -10
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/README.md +23 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.c +114 -58
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.h +498 -461
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_asm.S +145 -85
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_config.h +456 -422
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/cbmc.h +47 -25
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/common.h +26 -14
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/ct.h +56 -81
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/debug.h +17 -24
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.c +33 -40
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.h +67 -87
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.c +19 -14
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.h +13 -5
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.c +84 -10
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.h +10 -5
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/auto.h +6 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/fips202_native_aarch64.h +22 -15
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_aarch64_asm.S +376 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_aarch64_asm.S +204 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_aarch64_asm.S +259 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_aarch64_asm.S +1077 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_aarch64_asm.S +987 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccakf1600_round_constants.c +16 -10
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_scalar.h +2 -1
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_v84a.h +1 -1
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x2_v84a.h +4 -2
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_scalar.h +2 -2
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_v84a_scalar.h +1 -1
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/api.h +60 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/mve.h +48 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/fips202_native_armv81m.h +18 -1
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S +658 -582
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.c +5 -100
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccakf1600_round_constants.c +26 -25
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/state_extract_bytes_x4_mve.S +334 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/state_xor_bytes_x4_mve.S +355 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/auto.h +8 -3
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/{xkcp.h → keccak_f1600_x4_avx2.h} +11 -8
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/fips202_native_x86_64.h +44 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/keccak_f1600_x4_avx2_asm.S +454 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/keccakf1600_constants.c +52 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/meta.h +37 -28
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/aarch64_zetas.c +213 -196
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/arith_native_aarch64.h +248 -64
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/intt_aarch64_asm.S +753 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l4_aarch64_asm.S +129 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l5_aarch64_asm.S +145 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l7_aarch64_asm.S +177 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/ntt_aarch64_asm.S +653 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/pointwise_montgomery_aarch64_asm.S +84 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_caddq_aarch64_asm.S +53 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_chknorm_aarch64_asm.S +55 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_32_aarch64_asm.S +86 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_88_aarch64_asm.S +86 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_32_aarch64_asm.S +103 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_88_aarch64_asm.S +111 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_17_aarch64_asm.S +75 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_19_aarch64_asm.S +72 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_table.c +23 -11
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_aarch64_asm.S +189 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta2_aarch64_asm.S +137 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta4_aarch64_asm.S +130 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta_table.c +520 -516
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_table.c +34 -33
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/api.h +202 -242
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/meta.h +25 -17
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/arith_native_x86_64.h +112 -28
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.c +1 -1
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.h +1 -1
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/intt_avx2_asm.S +2311 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/ntt_avx2_asm.S +2383 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/nttunpack_avx2_asm.S +238 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l4_avx2_asm.S +139 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l5_avx2_asm.S +155 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l7_avx2_asm.S +187 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_avx2_asm.S +130 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_caddq_avx2_asm.S +190 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_32_avx2.c +6 -4
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_88_avx2.c +6 -4
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_32_avx2.c +9 -8
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_88_avx2.c +10 -9
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_17_avx2.c +8 -5
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_19_avx2.c +8 -5
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta2_avx2.c +6 -4
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta4_avx2.c +6 -4
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_table.c +130 -129
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.c +109 -180
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.h +169 -150
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.c +56 -40
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.h +149 -164
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.c +52 -57
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.h +132 -167
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.c +57 -424
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.h +167 -474
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec_lazy.c +308 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec_lazy.h +653 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/reduce.h +22 -29
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/rounding.h +37 -43
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.c +511 -367
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.h +456 -417
- data/lib/pq_crypto/version.rb +1 -1
- data/script/vendor_libs.rb +3 -3
- metadata +41 -35
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_asm.S +0 -376
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_asm.S +0 -204
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_asm.S +0 -259
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_asm.S +0 -1077
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S +0 -987
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.c +0 -488
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.h +0 -16
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/intt.S +0 -753
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l4.S +0 -129
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l5.S +0 -145
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l7.S +0 -177
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/ntt.S +0 -653
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/pointwise_montgomery.S +0 -79
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_caddq_asm.S +0 -53
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_chknorm_asm.S +0 -55
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_32_asm.S +0 -85
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_88_asm.S +0 -85
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_32_asm.S +0 -102
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_88_asm.S +0 -110
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_17_asm.S +0 -72
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_19_asm.S +0 -69
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_asm.S +0 -189
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta2_asm.S +0 -135
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta4_asm.S +0 -128
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/intt.S +0 -2311
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/ntt.S +0 -2383
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/nttunpack.S +0 -239
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise.S +0 -131
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l4.S +0 -139
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l5.S +0 -155
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l7.S +0 -187
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_caddq_avx2.c +0 -61
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
/* Copyright (c) The mldsa-native project authors
|
|
2
|
+
* SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
#include "../../../common.h"
|
|
6
|
+
#if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \
|
|
7
|
+
(defined(MLD_CONFIG_MULTILEVEL_WITH_SHARED) || MLDSA_L == 4)
|
|
8
|
+
|
|
9
|
+
/*
|
|
10
|
+
* WARNING: This file is auto-derived from the mldsa-native source file
|
|
11
|
+
* dev/aarch64_opt/src/mld_polyvecl_pointwise_acc_montgomery_l4_aarch64_asm.S using scripts/simpasm. Do not modify it directly.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
.text
|
|
15
|
+
.balign 4
|
|
16
|
+
.global MLD_ASM_NAMESPACE(polyvecl_pointwise_acc_montgomery_l4_aarch64_asm)
|
|
17
|
+
MLD_ASM_FN_SYMBOL(polyvecl_pointwise_acc_montgomery_l4_aarch64_asm)
|
|
18
|
+
|
|
19
|
+
.cfi_startproc
|
|
20
|
+
mov w3, #0xe001 // =57345
|
|
21
|
+
movk w3, #0x7f, lsl #16
|
|
22
|
+
dup v0.4s, w3
|
|
23
|
+
mov w3, #0x2001 // =8193
|
|
24
|
+
movk w3, #0x380, lsl #16
|
|
25
|
+
dup v1.4s, w3
|
|
26
|
+
mov x3, #0x40 // =64
|
|
27
|
+
|
|
28
|
+
Lpolyvecl_pointwise_acc_montgomery_l4_loop_start:
|
|
29
|
+
ldr q17, [x1, #0x10]
|
|
30
|
+
ldr q18, [x1, #0x20]
|
|
31
|
+
ldr q19, [x1, #0x30]
|
|
32
|
+
ldr q16, [x1], #0x40
|
|
33
|
+
ldr q21, [x2, #0x10]
|
|
34
|
+
ldr q22, [x2, #0x20]
|
|
35
|
+
ldr q23, [x2, #0x30]
|
|
36
|
+
ldr q20, [x2], #0x40
|
|
37
|
+
smull v24.2d, v16.2s, v20.2s
|
|
38
|
+
smull2 v25.2d, v16.4s, v20.4s
|
|
39
|
+
smull v26.2d, v17.2s, v21.2s
|
|
40
|
+
smull2 v27.2d, v17.4s, v21.4s
|
|
41
|
+
smull v28.2d, v18.2s, v22.2s
|
|
42
|
+
smull2 v29.2d, v18.4s, v22.4s
|
|
43
|
+
smull v30.2d, v19.2s, v23.2s
|
|
44
|
+
smull2 v31.2d, v19.4s, v23.4s
|
|
45
|
+
ldr q16, [x1, #0x3c0]
|
|
46
|
+
ldr q17, [x1, #0x3d0]
|
|
47
|
+
ldr q18, [x1, #0x3e0]
|
|
48
|
+
ldr q19, [x1, #0x3f0]
|
|
49
|
+
ldr q20, [x2, #0x3c0]
|
|
50
|
+
ldr q21, [x2, #0x3d0]
|
|
51
|
+
ldr q22, [x2, #0x3e0]
|
|
52
|
+
ldr q23, [x2, #0x3f0]
|
|
53
|
+
smlal v24.2d, v16.2s, v20.2s
|
|
54
|
+
smlal2 v25.2d, v16.4s, v20.4s
|
|
55
|
+
smlal v26.2d, v17.2s, v21.2s
|
|
56
|
+
smlal2 v27.2d, v17.4s, v21.4s
|
|
57
|
+
smlal v28.2d, v18.2s, v22.2s
|
|
58
|
+
smlal2 v29.2d, v18.4s, v22.4s
|
|
59
|
+
smlal v30.2d, v19.2s, v23.2s
|
|
60
|
+
smlal2 v31.2d, v19.4s, v23.4s
|
|
61
|
+
ldr q16, [x1, #0x7c0]
|
|
62
|
+
ldr q17, [x1, #0x7d0]
|
|
63
|
+
ldr q18, [x1, #0x7e0]
|
|
64
|
+
ldr q19, [x1, #0x7f0]
|
|
65
|
+
ldr q20, [x2, #0x7c0]
|
|
66
|
+
ldr q21, [x2, #0x7d0]
|
|
67
|
+
ldr q22, [x2, #0x7e0]
|
|
68
|
+
ldr q23, [x2, #0x7f0]
|
|
69
|
+
smlal v24.2d, v16.2s, v20.2s
|
|
70
|
+
smlal2 v25.2d, v16.4s, v20.4s
|
|
71
|
+
smlal v26.2d, v17.2s, v21.2s
|
|
72
|
+
smlal2 v27.2d, v17.4s, v21.4s
|
|
73
|
+
smlal v28.2d, v18.2s, v22.2s
|
|
74
|
+
smlal2 v29.2d, v18.4s, v22.4s
|
|
75
|
+
smlal v30.2d, v19.2s, v23.2s
|
|
76
|
+
smlal2 v31.2d, v19.4s, v23.4s
|
|
77
|
+
ldr q16, [x1, #0xbc0]
|
|
78
|
+
ldr q17, [x1, #0xbd0]
|
|
79
|
+
ldr q18, [x1, #0xbe0]
|
|
80
|
+
ldr q19, [x1, #0xbf0]
|
|
81
|
+
ldr q20, [x2, #0xbc0]
|
|
82
|
+
ldr q21, [x2, #0xbd0]
|
|
83
|
+
ldr q22, [x2, #0xbe0]
|
|
84
|
+
ldr q23, [x2, #0xbf0]
|
|
85
|
+
smlal v24.2d, v16.2s, v20.2s
|
|
86
|
+
smlal2 v25.2d, v16.4s, v20.4s
|
|
87
|
+
smlal v26.2d, v17.2s, v21.2s
|
|
88
|
+
smlal2 v27.2d, v17.4s, v21.4s
|
|
89
|
+
smlal v28.2d, v18.2s, v22.2s
|
|
90
|
+
smlal2 v29.2d, v18.4s, v22.4s
|
|
91
|
+
smlal v30.2d, v19.2s, v23.2s
|
|
92
|
+
smlal2 v31.2d, v19.4s, v23.4s
|
|
93
|
+
uzp1 v16.4s, v24.4s, v25.4s
|
|
94
|
+
mul v16.4s, v16.4s, v1.4s
|
|
95
|
+
smlsl v24.2d, v16.2s, v0.2s
|
|
96
|
+
smlsl2 v25.2d, v16.4s, v0.4s
|
|
97
|
+
uzp2 v16.4s, v24.4s, v25.4s
|
|
98
|
+
uzp1 v17.4s, v26.4s, v27.4s
|
|
99
|
+
mul v17.4s, v17.4s, v1.4s
|
|
100
|
+
smlsl v26.2d, v17.2s, v0.2s
|
|
101
|
+
smlsl2 v27.2d, v17.4s, v0.4s
|
|
102
|
+
uzp2 v17.4s, v26.4s, v27.4s
|
|
103
|
+
uzp1 v18.4s, v28.4s, v29.4s
|
|
104
|
+
mul v18.4s, v18.4s, v1.4s
|
|
105
|
+
smlsl v28.2d, v18.2s, v0.2s
|
|
106
|
+
smlsl2 v29.2d, v18.4s, v0.4s
|
|
107
|
+
uzp2 v18.4s, v28.4s, v29.4s
|
|
108
|
+
uzp1 v19.4s, v30.4s, v31.4s
|
|
109
|
+
mul v19.4s, v19.4s, v1.4s
|
|
110
|
+
smlsl v30.2d, v19.2s, v0.2s
|
|
111
|
+
smlsl2 v31.2d, v19.4s, v0.4s
|
|
112
|
+
uzp2 v19.4s, v30.4s, v31.4s
|
|
113
|
+
str q17, [x0, #0x10]
|
|
114
|
+
str q18, [x0, #0x20]
|
|
115
|
+
str q19, [x0, #0x30]
|
|
116
|
+
str q16, [x0], #0x40
|
|
117
|
+
subs x3, x3, #0x4
|
|
118
|
+
cbnz x3, Lpolyvecl_pointwise_acc_montgomery_l4_loop_start
|
|
119
|
+
ret
|
|
120
|
+
.cfi_endproc
|
|
121
|
+
|
|
122
|
+
MLD_ASM_FN_SIZE(polyvecl_pointwise_acc_montgomery_l4_aarch64_asm)
|
|
123
|
+
|
|
124
|
+
#endif /* MLD_ARITH_BACKEND_AARCH64 && !MLD_CONFIG_MULTILEVEL_NO_SHARED && \
|
|
125
|
+
(MLD_CONFIG_MULTILEVEL_WITH_SHARED || MLDSA_L == 4) */
|
|
126
|
+
|
|
127
|
+
#if defined(__ELF__)
|
|
128
|
+
.section .note.GNU-stack,"",%progbits
|
|
129
|
+
#endif
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
/* Copyright (c) The mldsa-native project authors
|
|
2
|
+
* SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
#include "../../../common.h"
|
|
6
|
+
#if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \
|
|
7
|
+
(defined(MLD_CONFIG_MULTILEVEL_WITH_SHARED) || MLDSA_L == 5)
|
|
8
|
+
|
|
9
|
+
/*
|
|
10
|
+
* WARNING: This file is auto-derived from the mldsa-native source file
|
|
11
|
+
* dev/aarch64_opt/src/mld_polyvecl_pointwise_acc_montgomery_l5_aarch64_asm.S using scripts/simpasm. Do not modify it directly.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
.text
|
|
15
|
+
.balign 4
|
|
16
|
+
.global MLD_ASM_NAMESPACE(polyvecl_pointwise_acc_montgomery_l5_aarch64_asm)
|
|
17
|
+
MLD_ASM_FN_SYMBOL(polyvecl_pointwise_acc_montgomery_l5_aarch64_asm)
|
|
18
|
+
|
|
19
|
+
.cfi_startproc
|
|
20
|
+
mov w3, #0xe001 // =57345
|
|
21
|
+
movk w3, #0x7f, lsl #16
|
|
22
|
+
dup v0.4s, w3
|
|
23
|
+
mov w3, #0x2001 // =8193
|
|
24
|
+
movk w3, #0x380, lsl #16
|
|
25
|
+
dup v1.4s, w3
|
|
26
|
+
mov x3, #0x40 // =64
|
|
27
|
+
|
|
28
|
+
Lpolyvecl_pointwise_acc_montgomery_l5_loop_start:
|
|
29
|
+
ldr q17, [x1, #0x10]
|
|
30
|
+
ldr q18, [x1, #0x20]
|
|
31
|
+
ldr q19, [x1, #0x30]
|
|
32
|
+
ldr q16, [x1], #0x40
|
|
33
|
+
ldr q21, [x2, #0x10]
|
|
34
|
+
ldr q22, [x2, #0x20]
|
|
35
|
+
ldr q23, [x2, #0x30]
|
|
36
|
+
ldr q20, [x2], #0x40
|
|
37
|
+
smull v24.2d, v16.2s, v20.2s
|
|
38
|
+
smull2 v25.2d, v16.4s, v20.4s
|
|
39
|
+
smull v26.2d, v17.2s, v21.2s
|
|
40
|
+
smull2 v27.2d, v17.4s, v21.4s
|
|
41
|
+
smull v28.2d, v18.2s, v22.2s
|
|
42
|
+
smull2 v29.2d, v18.4s, v22.4s
|
|
43
|
+
smull v30.2d, v19.2s, v23.2s
|
|
44
|
+
smull2 v31.2d, v19.4s, v23.4s
|
|
45
|
+
ldr q16, [x1, #0x3c0]
|
|
46
|
+
ldr q17, [x1, #0x3d0]
|
|
47
|
+
ldr q18, [x1, #0x3e0]
|
|
48
|
+
ldr q19, [x1, #0x3f0]
|
|
49
|
+
ldr q20, [x2, #0x3c0]
|
|
50
|
+
ldr q21, [x2, #0x3d0]
|
|
51
|
+
ldr q22, [x2, #0x3e0]
|
|
52
|
+
ldr q23, [x2, #0x3f0]
|
|
53
|
+
smlal v24.2d, v16.2s, v20.2s
|
|
54
|
+
smlal2 v25.2d, v16.4s, v20.4s
|
|
55
|
+
smlal v26.2d, v17.2s, v21.2s
|
|
56
|
+
smlal2 v27.2d, v17.4s, v21.4s
|
|
57
|
+
smlal v28.2d, v18.2s, v22.2s
|
|
58
|
+
smlal2 v29.2d, v18.4s, v22.4s
|
|
59
|
+
smlal v30.2d, v19.2s, v23.2s
|
|
60
|
+
smlal2 v31.2d, v19.4s, v23.4s
|
|
61
|
+
ldr q16, [x1, #0x7c0]
|
|
62
|
+
ldr q17, [x1, #0x7d0]
|
|
63
|
+
ldr q18, [x1, #0x7e0]
|
|
64
|
+
ldr q19, [x1, #0x7f0]
|
|
65
|
+
ldr q20, [x2, #0x7c0]
|
|
66
|
+
ldr q21, [x2, #0x7d0]
|
|
67
|
+
ldr q22, [x2, #0x7e0]
|
|
68
|
+
ldr q23, [x2, #0x7f0]
|
|
69
|
+
smlal v24.2d, v16.2s, v20.2s
|
|
70
|
+
smlal2 v25.2d, v16.4s, v20.4s
|
|
71
|
+
smlal v26.2d, v17.2s, v21.2s
|
|
72
|
+
smlal2 v27.2d, v17.4s, v21.4s
|
|
73
|
+
smlal v28.2d, v18.2s, v22.2s
|
|
74
|
+
smlal2 v29.2d, v18.4s, v22.4s
|
|
75
|
+
smlal v30.2d, v19.2s, v23.2s
|
|
76
|
+
smlal2 v31.2d, v19.4s, v23.4s
|
|
77
|
+
ldr q16, [x1, #0xbc0]
|
|
78
|
+
ldr q17, [x1, #0xbd0]
|
|
79
|
+
ldr q18, [x1, #0xbe0]
|
|
80
|
+
ldr q19, [x1, #0xbf0]
|
|
81
|
+
ldr q20, [x2, #0xbc0]
|
|
82
|
+
ldr q21, [x2, #0xbd0]
|
|
83
|
+
ldr q22, [x2, #0xbe0]
|
|
84
|
+
ldr q23, [x2, #0xbf0]
|
|
85
|
+
smlal v24.2d, v16.2s, v20.2s
|
|
86
|
+
smlal2 v25.2d, v16.4s, v20.4s
|
|
87
|
+
smlal v26.2d, v17.2s, v21.2s
|
|
88
|
+
smlal2 v27.2d, v17.4s, v21.4s
|
|
89
|
+
smlal v28.2d, v18.2s, v22.2s
|
|
90
|
+
smlal2 v29.2d, v18.4s, v22.4s
|
|
91
|
+
smlal v30.2d, v19.2s, v23.2s
|
|
92
|
+
smlal2 v31.2d, v19.4s, v23.4s
|
|
93
|
+
ldr q16, [x1, #0xfc0]
|
|
94
|
+
ldr q17, [x1, #0xfd0]
|
|
95
|
+
ldr q18, [x1, #0xfe0]
|
|
96
|
+
ldr q19, [x1, #0xff0]
|
|
97
|
+
ldr q20, [x2, #0xfc0]
|
|
98
|
+
ldr q21, [x2, #0xfd0]
|
|
99
|
+
ldr q22, [x2, #0xfe0]
|
|
100
|
+
ldr q23, [x2, #0xff0]
|
|
101
|
+
smlal v24.2d, v16.2s, v20.2s
|
|
102
|
+
smlal2 v25.2d, v16.4s, v20.4s
|
|
103
|
+
smlal v26.2d, v17.2s, v21.2s
|
|
104
|
+
smlal2 v27.2d, v17.4s, v21.4s
|
|
105
|
+
smlal v28.2d, v18.2s, v22.2s
|
|
106
|
+
smlal2 v29.2d, v18.4s, v22.4s
|
|
107
|
+
smlal v30.2d, v19.2s, v23.2s
|
|
108
|
+
smlal2 v31.2d, v19.4s, v23.4s
|
|
109
|
+
uzp1 v16.4s, v24.4s, v25.4s
|
|
110
|
+
mul v16.4s, v16.4s, v1.4s
|
|
111
|
+
smlsl v24.2d, v16.2s, v0.2s
|
|
112
|
+
smlsl2 v25.2d, v16.4s, v0.4s
|
|
113
|
+
uzp2 v16.4s, v24.4s, v25.4s
|
|
114
|
+
uzp1 v17.4s, v26.4s, v27.4s
|
|
115
|
+
mul v17.4s, v17.4s, v1.4s
|
|
116
|
+
smlsl v26.2d, v17.2s, v0.2s
|
|
117
|
+
smlsl2 v27.2d, v17.4s, v0.4s
|
|
118
|
+
uzp2 v17.4s, v26.4s, v27.4s
|
|
119
|
+
uzp1 v18.4s, v28.4s, v29.4s
|
|
120
|
+
mul v18.4s, v18.4s, v1.4s
|
|
121
|
+
smlsl v28.2d, v18.2s, v0.2s
|
|
122
|
+
smlsl2 v29.2d, v18.4s, v0.4s
|
|
123
|
+
uzp2 v18.4s, v28.4s, v29.4s
|
|
124
|
+
uzp1 v19.4s, v30.4s, v31.4s
|
|
125
|
+
mul v19.4s, v19.4s, v1.4s
|
|
126
|
+
smlsl v30.2d, v19.2s, v0.2s
|
|
127
|
+
smlsl2 v31.2d, v19.4s, v0.4s
|
|
128
|
+
uzp2 v19.4s, v30.4s, v31.4s
|
|
129
|
+
str q17, [x0, #0x10]
|
|
130
|
+
str q18, [x0, #0x20]
|
|
131
|
+
str q19, [x0, #0x30]
|
|
132
|
+
str q16, [x0], #0x40
|
|
133
|
+
subs x3, x3, #0x4
|
|
134
|
+
cbnz x3, Lpolyvecl_pointwise_acc_montgomery_l5_loop_start
|
|
135
|
+
ret
|
|
136
|
+
.cfi_endproc
|
|
137
|
+
|
|
138
|
+
MLD_ASM_FN_SIZE(polyvecl_pointwise_acc_montgomery_l5_aarch64_asm)
|
|
139
|
+
|
|
140
|
+
#endif /* MLD_ARITH_BACKEND_AARCH64 && !MLD_CONFIG_MULTILEVEL_NO_SHARED && \
|
|
141
|
+
(MLD_CONFIG_MULTILEVEL_WITH_SHARED || MLDSA_L == 5) */
|
|
142
|
+
|
|
143
|
+
#if defined(__ELF__)
|
|
144
|
+
.section .note.GNU-stack,"",%progbits
|
|
145
|
+
#endif
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
/* Copyright (c) The mldsa-native project authors
|
|
2
|
+
* SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
#include "../../../common.h"
|
|
6
|
+
#if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \
|
|
7
|
+
(defined(MLD_CONFIG_MULTILEVEL_WITH_SHARED) || MLDSA_L == 7)
|
|
8
|
+
|
|
9
|
+
/*
|
|
10
|
+
* WARNING: This file is auto-derived from the mldsa-native source file
|
|
11
|
+
* dev/aarch64_opt/src/mld_polyvecl_pointwise_acc_montgomery_l7_aarch64_asm.S using scripts/simpasm. Do not modify it directly.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
.text
|
|
15
|
+
.balign 4
|
|
16
|
+
.global MLD_ASM_NAMESPACE(polyvecl_pointwise_acc_montgomery_l7_aarch64_asm)
|
|
17
|
+
MLD_ASM_FN_SYMBOL(polyvecl_pointwise_acc_montgomery_l7_aarch64_asm)
|
|
18
|
+
|
|
19
|
+
.cfi_startproc
|
|
20
|
+
mov w3, #0xe001 // =57345
|
|
21
|
+
movk w3, #0x7f, lsl #16
|
|
22
|
+
dup v0.4s, w3
|
|
23
|
+
mov w3, #0x2001 // =8193
|
|
24
|
+
movk w3, #0x380, lsl #16
|
|
25
|
+
dup v1.4s, w3
|
|
26
|
+
mov x3, #0x40 // =64
|
|
27
|
+
|
|
28
|
+
Lpolyvecl_pointwise_acc_montgomery_l7_loop_start:
|
|
29
|
+
ldr q17, [x1, #0x10]
|
|
30
|
+
ldr q18, [x1, #0x20]
|
|
31
|
+
ldr q19, [x1, #0x30]
|
|
32
|
+
ldr q16, [x1], #0x40
|
|
33
|
+
ldr q21, [x2, #0x10]
|
|
34
|
+
ldr q22, [x2, #0x20]
|
|
35
|
+
ldr q23, [x2, #0x30]
|
|
36
|
+
ldr q20, [x2], #0x40
|
|
37
|
+
smull v24.2d, v16.2s, v20.2s
|
|
38
|
+
smull2 v25.2d, v16.4s, v20.4s
|
|
39
|
+
smull v26.2d, v17.2s, v21.2s
|
|
40
|
+
smull2 v27.2d, v17.4s, v21.4s
|
|
41
|
+
smull v28.2d, v18.2s, v22.2s
|
|
42
|
+
smull2 v29.2d, v18.4s, v22.4s
|
|
43
|
+
smull v30.2d, v19.2s, v23.2s
|
|
44
|
+
smull2 v31.2d, v19.4s, v23.4s
|
|
45
|
+
ldr q16, [x1, #0x3c0]
|
|
46
|
+
ldr q17, [x1, #0x3d0]
|
|
47
|
+
ldr q18, [x1, #0x3e0]
|
|
48
|
+
ldr q19, [x1, #0x3f0]
|
|
49
|
+
ldr q20, [x2, #0x3c0]
|
|
50
|
+
ldr q21, [x2, #0x3d0]
|
|
51
|
+
ldr q22, [x2, #0x3e0]
|
|
52
|
+
ldr q23, [x2, #0x3f0]
|
|
53
|
+
smlal v24.2d, v16.2s, v20.2s
|
|
54
|
+
smlal2 v25.2d, v16.4s, v20.4s
|
|
55
|
+
smlal v26.2d, v17.2s, v21.2s
|
|
56
|
+
smlal2 v27.2d, v17.4s, v21.4s
|
|
57
|
+
smlal v28.2d, v18.2s, v22.2s
|
|
58
|
+
smlal2 v29.2d, v18.4s, v22.4s
|
|
59
|
+
smlal v30.2d, v19.2s, v23.2s
|
|
60
|
+
smlal2 v31.2d, v19.4s, v23.4s
|
|
61
|
+
ldr q16, [x1, #0x7c0]
|
|
62
|
+
ldr q17, [x1, #0x7d0]
|
|
63
|
+
ldr q18, [x1, #0x7e0]
|
|
64
|
+
ldr q19, [x1, #0x7f0]
|
|
65
|
+
ldr q20, [x2, #0x7c0]
|
|
66
|
+
ldr q21, [x2, #0x7d0]
|
|
67
|
+
ldr q22, [x2, #0x7e0]
|
|
68
|
+
ldr q23, [x2, #0x7f0]
|
|
69
|
+
smlal v24.2d, v16.2s, v20.2s
|
|
70
|
+
smlal2 v25.2d, v16.4s, v20.4s
|
|
71
|
+
smlal v26.2d, v17.2s, v21.2s
|
|
72
|
+
smlal2 v27.2d, v17.4s, v21.4s
|
|
73
|
+
smlal v28.2d, v18.2s, v22.2s
|
|
74
|
+
smlal2 v29.2d, v18.4s, v22.4s
|
|
75
|
+
smlal v30.2d, v19.2s, v23.2s
|
|
76
|
+
smlal2 v31.2d, v19.4s, v23.4s
|
|
77
|
+
ldr q16, [x1, #0xbc0]
|
|
78
|
+
ldr q17, [x1, #0xbd0]
|
|
79
|
+
ldr q18, [x1, #0xbe0]
|
|
80
|
+
ldr q19, [x1, #0xbf0]
|
|
81
|
+
ldr q20, [x2, #0xbc0]
|
|
82
|
+
ldr q21, [x2, #0xbd0]
|
|
83
|
+
ldr q22, [x2, #0xbe0]
|
|
84
|
+
ldr q23, [x2, #0xbf0]
|
|
85
|
+
smlal v24.2d, v16.2s, v20.2s
|
|
86
|
+
smlal2 v25.2d, v16.4s, v20.4s
|
|
87
|
+
smlal v26.2d, v17.2s, v21.2s
|
|
88
|
+
smlal2 v27.2d, v17.4s, v21.4s
|
|
89
|
+
smlal v28.2d, v18.2s, v22.2s
|
|
90
|
+
smlal2 v29.2d, v18.4s, v22.4s
|
|
91
|
+
smlal v30.2d, v19.2s, v23.2s
|
|
92
|
+
smlal2 v31.2d, v19.4s, v23.4s
|
|
93
|
+
ldr q16, [x1, #0xfc0]
|
|
94
|
+
ldr q17, [x1, #0xfd0]
|
|
95
|
+
ldr q18, [x1, #0xfe0]
|
|
96
|
+
ldr q19, [x1, #0xff0]
|
|
97
|
+
ldr q20, [x2, #0xfc0]
|
|
98
|
+
ldr q21, [x2, #0xfd0]
|
|
99
|
+
ldr q22, [x2, #0xfe0]
|
|
100
|
+
ldr q23, [x2, #0xff0]
|
|
101
|
+
smlal v24.2d, v16.2s, v20.2s
|
|
102
|
+
smlal2 v25.2d, v16.4s, v20.4s
|
|
103
|
+
smlal v26.2d, v17.2s, v21.2s
|
|
104
|
+
smlal2 v27.2d, v17.4s, v21.4s
|
|
105
|
+
smlal v28.2d, v18.2s, v22.2s
|
|
106
|
+
smlal2 v29.2d, v18.4s, v22.4s
|
|
107
|
+
smlal v30.2d, v19.2s, v23.2s
|
|
108
|
+
smlal2 v31.2d, v19.4s, v23.4s
|
|
109
|
+
ldr q16, [x1, #0x13c0]
|
|
110
|
+
ldr q17, [x1, #0x13d0]
|
|
111
|
+
ldr q18, [x1, #0x13e0]
|
|
112
|
+
ldr q19, [x1, #0x13f0]
|
|
113
|
+
ldr q20, [x2, #0x13c0]
|
|
114
|
+
ldr q21, [x2, #0x13d0]
|
|
115
|
+
ldr q22, [x2, #0x13e0]
|
|
116
|
+
ldr q23, [x2, #0x13f0]
|
|
117
|
+
smlal v24.2d, v16.2s, v20.2s
|
|
118
|
+
smlal2 v25.2d, v16.4s, v20.4s
|
|
119
|
+
smlal v26.2d, v17.2s, v21.2s
|
|
120
|
+
smlal2 v27.2d, v17.4s, v21.4s
|
|
121
|
+
smlal v28.2d, v18.2s, v22.2s
|
|
122
|
+
smlal2 v29.2d, v18.4s, v22.4s
|
|
123
|
+
smlal v30.2d, v19.2s, v23.2s
|
|
124
|
+
smlal2 v31.2d, v19.4s, v23.4s
|
|
125
|
+
ldr q16, [x1, #0x17c0]
|
|
126
|
+
ldr q17, [x1, #0x17d0]
|
|
127
|
+
ldr q18, [x1, #0x17e0]
|
|
128
|
+
ldr q19, [x1, #0x17f0]
|
|
129
|
+
ldr q20, [x2, #0x17c0]
|
|
130
|
+
ldr q21, [x2, #0x17d0]
|
|
131
|
+
ldr q22, [x2, #0x17e0]
|
|
132
|
+
ldr q23, [x2, #0x17f0]
|
|
133
|
+
smlal v24.2d, v16.2s, v20.2s
|
|
134
|
+
smlal2 v25.2d, v16.4s, v20.4s
|
|
135
|
+
smlal v26.2d, v17.2s, v21.2s
|
|
136
|
+
smlal2 v27.2d, v17.4s, v21.4s
|
|
137
|
+
smlal v28.2d, v18.2s, v22.2s
|
|
138
|
+
smlal2 v29.2d, v18.4s, v22.4s
|
|
139
|
+
smlal v30.2d, v19.2s, v23.2s
|
|
140
|
+
smlal2 v31.2d, v19.4s, v23.4s
|
|
141
|
+
uzp1 v16.4s, v24.4s, v25.4s
|
|
142
|
+
mul v16.4s, v16.4s, v1.4s
|
|
143
|
+
smlsl v24.2d, v16.2s, v0.2s
|
|
144
|
+
smlsl2 v25.2d, v16.4s, v0.4s
|
|
145
|
+
uzp2 v16.4s, v24.4s, v25.4s
|
|
146
|
+
uzp1 v17.4s, v26.4s, v27.4s
|
|
147
|
+
mul v17.4s, v17.4s, v1.4s
|
|
148
|
+
smlsl v26.2d, v17.2s, v0.2s
|
|
149
|
+
smlsl2 v27.2d, v17.4s, v0.4s
|
|
150
|
+
uzp2 v17.4s, v26.4s, v27.4s
|
|
151
|
+
uzp1 v18.4s, v28.4s, v29.4s
|
|
152
|
+
mul v18.4s, v18.4s, v1.4s
|
|
153
|
+
smlsl v28.2d, v18.2s, v0.2s
|
|
154
|
+
smlsl2 v29.2d, v18.4s, v0.4s
|
|
155
|
+
uzp2 v18.4s, v28.4s, v29.4s
|
|
156
|
+
uzp1 v19.4s, v30.4s, v31.4s
|
|
157
|
+
mul v19.4s, v19.4s, v1.4s
|
|
158
|
+
smlsl v30.2d, v19.2s, v0.2s
|
|
159
|
+
smlsl2 v31.2d, v19.4s, v0.4s
|
|
160
|
+
uzp2 v19.4s, v30.4s, v31.4s
|
|
161
|
+
str q17, [x0, #0x10]
|
|
162
|
+
str q18, [x0, #0x20]
|
|
163
|
+
str q19, [x0, #0x30]
|
|
164
|
+
str q16, [x0], #0x40
|
|
165
|
+
subs x3, x3, #0x4
|
|
166
|
+
cbnz x3, Lpolyvecl_pointwise_acc_montgomery_l7_loop_start
|
|
167
|
+
ret
|
|
168
|
+
.cfi_endproc
|
|
169
|
+
|
|
170
|
+
MLD_ASM_FN_SIZE(polyvecl_pointwise_acc_montgomery_l7_aarch64_asm)
|
|
171
|
+
|
|
172
|
+
#endif /* MLD_ARITH_BACKEND_AARCH64 && !MLD_CONFIG_MULTILEVEL_NO_SHARED && \
|
|
173
|
+
(MLD_CONFIG_MULTILEVEL_WITH_SHARED || MLDSA_L == 7) */
|
|
174
|
+
|
|
175
|
+
#if defined(__ELF__)
|
|
176
|
+
.section .note.GNU-stack,"",%progbits
|
|
177
|
+
#endif
|