pq_crypto 0.6.1 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/SECURITY.md +7 -0
- data/ext/pqcrypto/pqcrypto_version.h +1 -1
- data/ext/pqcrypto/vendor/.vendored +4 -4
- data/ext/pqcrypto/vendor/mldsa-native/README.md +23 -10
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/README.md +23 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.c +114 -58
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.h +498 -461
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_asm.S +145 -85
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_config.h +456 -422
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/cbmc.h +47 -25
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/common.h +26 -14
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/ct.h +56 -81
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/debug.h +17 -24
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.c +33 -40
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.h +67 -87
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.c +19 -14
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.h +13 -5
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.c +84 -10
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.h +10 -5
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/auto.h +6 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/fips202_native_aarch64.h +22 -15
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_aarch64_asm.S +376 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_aarch64_asm.S +204 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_aarch64_asm.S +259 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_aarch64_asm.S +1077 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_aarch64_asm.S +987 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccakf1600_round_constants.c +16 -10
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_scalar.h +2 -1
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_v84a.h +1 -1
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x2_v84a.h +4 -2
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_scalar.h +2 -2
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_v84a_scalar.h +1 -1
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/api.h +60 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/mve.h +48 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/fips202_native_armv81m.h +18 -1
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S +658 -582
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.c +5 -100
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccakf1600_round_constants.c +26 -25
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/state_extract_bytes_x4_mve.S +334 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/state_xor_bytes_x4_mve.S +355 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/auto.h +8 -3
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/{xkcp.h → keccak_f1600_x4_avx2.h} +11 -8
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/fips202_native_x86_64.h +44 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/keccak_f1600_x4_avx2_asm.S +454 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/keccakf1600_constants.c +52 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/meta.h +37 -28
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/aarch64_zetas.c +213 -196
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/arith_native_aarch64.h +248 -64
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/intt_aarch64_asm.S +753 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l4_aarch64_asm.S +129 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l5_aarch64_asm.S +145 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l7_aarch64_asm.S +177 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/ntt_aarch64_asm.S +653 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/pointwise_montgomery_aarch64_asm.S +84 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_caddq_aarch64_asm.S +53 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_chknorm_aarch64_asm.S +55 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_32_aarch64_asm.S +86 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_88_aarch64_asm.S +86 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_32_aarch64_asm.S +103 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_88_aarch64_asm.S +111 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_17_aarch64_asm.S +75 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_19_aarch64_asm.S +72 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_table.c +23 -11
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_aarch64_asm.S +189 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta2_aarch64_asm.S +137 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta4_aarch64_asm.S +130 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta_table.c +520 -516
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_table.c +34 -33
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/api.h +202 -242
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/meta.h +25 -17
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/arith_native_x86_64.h +112 -28
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.c +1 -1
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.h +1 -1
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/intt_avx2_asm.S +2311 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/ntt_avx2_asm.S +2383 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/nttunpack_avx2_asm.S +238 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l4_avx2_asm.S +139 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l5_avx2_asm.S +155 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l7_avx2_asm.S +187 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_avx2_asm.S +130 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_caddq_avx2_asm.S +190 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_32_avx2.c +6 -4
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_88_avx2.c +6 -4
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_32_avx2.c +9 -8
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_88_avx2.c +10 -9
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_17_avx2.c +8 -5
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_19_avx2.c +8 -5
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta2_avx2.c +6 -4
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta4_avx2.c +6 -4
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_table.c +130 -129
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.c +109 -180
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.h +169 -150
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.c +56 -40
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.h +149 -164
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.c +52 -57
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.h +132 -167
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.c +57 -424
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.h +167 -474
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec_lazy.c +308 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec_lazy.h +653 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/reduce.h +22 -29
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/rounding.h +37 -43
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.c +511 -367
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.h +456 -417
- data/lib/pq_crypto/version.rb +1 -1
- data/script/vendor_libs.rb +3 -3
- metadata +41 -35
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_asm.S +0 -376
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_asm.S +0 -204
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_asm.S +0 -259
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_asm.S +0 -1077
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S +0 -987
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.c +0 -488
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.h +0 -16
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/intt.S +0 -753
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l4.S +0 -129
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l5.S +0 -145
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l7.S +0 -177
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/ntt.S +0 -653
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/pointwise_montgomery.S +0 -79
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_caddq_asm.S +0 -53
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_chknorm_asm.S +0 -55
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_32_asm.S +0 -85
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_88_asm.S +0 -85
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_32_asm.S +0 -102
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_88_asm.S +0 -110
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_17_asm.S +0 -72
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_19_asm.S +0 -69
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_asm.S +0 -189
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta2_asm.S +0 -135
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta4_asm.S +0 -128
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/intt.S +0 -2311
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/ntt.S +0 -2383
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/nttunpack.S +0 -239
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise.S +0 -131
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l4.S +0 -139
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l5.S +0 -155
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l7.S +0 -187
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_caddq_avx2.c +0 -61
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
/* Copyright (c) The mldsa-native project authors
|
|
2
|
+
* SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
#include "../../../common.h"
|
|
6
|
+
#if defined(MLD_ARITH_BACKEND_AARCH64) && \
|
|
7
|
+
(!defined(MLD_CONFIG_NO_SIGN_API) || !defined(MLD_CONFIG_NO_VERIFY_API) || \
|
|
8
|
+
defined(MLD_CONFIG_REDUCE_RAM) || defined(MLD_UNIT_TEST)) && \
|
|
9
|
+
!defined(MLD_CONFIG_MULTILEVEL_NO_SHARED)
|
|
10
|
+
|
|
11
|
+
/*
|
|
12
|
+
* WARNING: This file is auto-derived from the mldsa-native source file
|
|
13
|
+
* dev/aarch64_opt/src/pointwise_montgomery_aarch64_asm.S using scripts/simpasm. Do not modify it directly.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
.text
|
|
17
|
+
.balign 4
|
|
18
|
+
.global MLD_ASM_NAMESPACE(poly_pointwise_montgomery_aarch64_asm)
|
|
19
|
+
MLD_ASM_FN_SYMBOL(poly_pointwise_montgomery_aarch64_asm)
|
|
20
|
+
|
|
21
|
+
.cfi_startproc
|
|
22
|
+
mov w3, #0xe001 // =57345
|
|
23
|
+
movk w3, #0x7f, lsl #16
|
|
24
|
+
dup v0.4s, w3
|
|
25
|
+
mov w3, #0x2001 // =8193
|
|
26
|
+
movk w3, #0x380, lsl #16
|
|
27
|
+
dup v1.4s, w3
|
|
28
|
+
mov x3, #0x40 // =64
|
|
29
|
+
|
|
30
|
+
Lpoly_pointwise_montgomery_loop_start:
|
|
31
|
+
ldr q16, [x0]
|
|
32
|
+
ldr q17, [x0, #0x10]
|
|
33
|
+
ldr q18, [x0, #0x20]
|
|
34
|
+
ldr q19, [x0, #0x30]
|
|
35
|
+
ldr q21, [x1, #0x10]
|
|
36
|
+
ldr q22, [x1, #0x20]
|
|
37
|
+
ldr q23, [x1, #0x30]
|
|
38
|
+
ldr q20, [x1], #0x40
|
|
39
|
+
smull v24.2d, v16.2s, v20.2s
|
|
40
|
+
smull2 v25.2d, v16.4s, v20.4s
|
|
41
|
+
smull v26.2d, v17.2s, v21.2s
|
|
42
|
+
smull2 v27.2d, v17.4s, v21.4s
|
|
43
|
+
smull v28.2d, v18.2s, v22.2s
|
|
44
|
+
smull2 v29.2d, v18.4s, v22.4s
|
|
45
|
+
smull v30.2d, v19.2s, v23.2s
|
|
46
|
+
smull2 v31.2d, v19.4s, v23.4s
|
|
47
|
+
uzp1 v16.4s, v24.4s, v25.4s
|
|
48
|
+
mul v16.4s, v16.4s, v1.4s
|
|
49
|
+
smlsl v24.2d, v16.2s, v0.2s
|
|
50
|
+
smlsl2 v25.2d, v16.4s, v0.4s
|
|
51
|
+
uzp2 v16.4s, v24.4s, v25.4s
|
|
52
|
+
uzp1 v17.4s, v26.4s, v27.4s
|
|
53
|
+
mul v17.4s, v17.4s, v1.4s
|
|
54
|
+
smlsl v26.2d, v17.2s, v0.2s
|
|
55
|
+
smlsl2 v27.2d, v17.4s, v0.4s
|
|
56
|
+
uzp2 v17.4s, v26.4s, v27.4s
|
|
57
|
+
uzp1 v18.4s, v28.4s, v29.4s
|
|
58
|
+
mul v18.4s, v18.4s, v1.4s
|
|
59
|
+
smlsl v28.2d, v18.2s, v0.2s
|
|
60
|
+
smlsl2 v29.2d, v18.4s, v0.4s
|
|
61
|
+
uzp2 v18.4s, v28.4s, v29.4s
|
|
62
|
+
uzp1 v19.4s, v30.4s, v31.4s
|
|
63
|
+
mul v19.4s, v19.4s, v1.4s
|
|
64
|
+
smlsl v30.2d, v19.2s, v0.2s
|
|
65
|
+
smlsl2 v31.2d, v19.4s, v0.4s
|
|
66
|
+
uzp2 v19.4s, v30.4s, v31.4s
|
|
67
|
+
str q17, [x0, #0x10]
|
|
68
|
+
str q18, [x0, #0x20]
|
|
69
|
+
str q19, [x0, #0x30]
|
|
70
|
+
str q16, [x0], #0x40
|
|
71
|
+
subs x3, x3, #0x4
|
|
72
|
+
cbnz x3, Lpoly_pointwise_montgomery_loop_start
|
|
73
|
+
ret
|
|
74
|
+
.cfi_endproc
|
|
75
|
+
|
|
76
|
+
MLD_ASM_FN_SIZE(poly_pointwise_montgomery_aarch64_asm)
|
|
77
|
+
|
|
78
|
+
#endif /* MLD_ARITH_BACKEND_AARCH64 && (!MLD_CONFIG_NO_SIGN_API || \
|
|
79
|
+
!MLD_CONFIG_NO_VERIFY_API || MLD_CONFIG_REDUCE_RAM || MLD_UNIT_TEST) \
|
|
80
|
+
&& !MLD_CONFIG_MULTILEVEL_NO_SHARED */
|
|
81
|
+
|
|
82
|
+
#if defined(__ELF__)
|
|
83
|
+
.section .note.GNU-stack,"",%progbits
|
|
84
|
+
#endif
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) The mldsa-native project authors
|
|
3
|
+
* SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
|
|
4
|
+
*/
|
|
5
|
+
#include "../../../common.h"
|
|
6
|
+
|
|
7
|
+
#if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED)
|
|
8
|
+
|
|
9
|
+
/*
|
|
10
|
+
* WARNING: This file is auto-derived from the mldsa-native source file
|
|
11
|
+
* dev/aarch64_opt/src/poly_caddq_aarch64_asm.S using scripts/simpasm. Do not modify it directly.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
.text
|
|
15
|
+
.balign 4
|
|
16
|
+
.global MLD_ASM_NAMESPACE(poly_caddq_aarch64_asm)
|
|
17
|
+
MLD_ASM_FN_SYMBOL(poly_caddq_aarch64_asm)
|
|
18
|
+
|
|
19
|
+
.cfi_startproc
|
|
20
|
+
mov w9, #0xe001 // =57345
|
|
21
|
+
movk w9, #0x7f, lsl #16
|
|
22
|
+
dup v4.4s, w9
|
|
23
|
+
mov x1, #0x10 // =16
|
|
24
|
+
|
|
25
|
+
Lpoly_caddq_loop:
|
|
26
|
+
ldr q0, [x0]
|
|
27
|
+
ldr q1, [x0, #0x10]
|
|
28
|
+
ldr q2, [x0, #0x20]
|
|
29
|
+
ldr q3, [x0, #0x30]
|
|
30
|
+
ushr v5.4s, v0.4s, #0x1f
|
|
31
|
+
mla v0.4s, v5.4s, v4.4s
|
|
32
|
+
ushr v5.4s, v1.4s, #0x1f
|
|
33
|
+
mla v1.4s, v5.4s, v4.4s
|
|
34
|
+
ushr v5.4s, v2.4s, #0x1f
|
|
35
|
+
mla v2.4s, v5.4s, v4.4s
|
|
36
|
+
ushr v5.4s, v3.4s, #0x1f
|
|
37
|
+
mla v3.4s, v5.4s, v4.4s
|
|
38
|
+
str q1, [x0, #0x10]
|
|
39
|
+
str q2, [x0, #0x20]
|
|
40
|
+
str q3, [x0, #0x30]
|
|
41
|
+
str q0, [x0], #0x40
|
|
42
|
+
subs x1, x1, #0x1
|
|
43
|
+
b.ne Lpoly_caddq_loop
|
|
44
|
+
ret
|
|
45
|
+
.cfi_endproc
|
|
46
|
+
|
|
47
|
+
MLD_ASM_FN_SIZE(poly_caddq_aarch64_asm)
|
|
48
|
+
|
|
49
|
+
#endif /* MLD_ARITH_BACKEND_AARCH64 && !MLD_CONFIG_MULTILEVEL_NO_SHARED */
|
|
50
|
+
|
|
51
|
+
#if defined(__ELF__)
|
|
52
|
+
.section .note.GNU-stack,"",%progbits
|
|
53
|
+
#endif
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) The mldsa-native project authors
|
|
3
|
+
* SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
|
|
4
|
+
*/
|
|
5
|
+
#include "../../../common.h"
|
|
6
|
+
|
|
7
|
+
#if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED)
|
|
8
|
+
|
|
9
|
+
/*
|
|
10
|
+
* WARNING: This file is auto-derived from the mldsa-native source file
|
|
11
|
+
* dev/aarch64_opt/src/poly_chknorm_aarch64_asm.S using scripts/simpasm. Do not modify it directly.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
.text
|
|
15
|
+
.balign 4
|
|
16
|
+
.global MLD_ASM_NAMESPACE(poly_chknorm_aarch64_asm)
|
|
17
|
+
MLD_ASM_FN_SYMBOL(poly_chknorm_aarch64_asm)
|
|
18
|
+
|
|
19
|
+
.cfi_startproc
|
|
20
|
+
dup v20.4s, w1
|
|
21
|
+
eor v21.16b, v21.16b, v21.16b
|
|
22
|
+
mov x2, #0x10 // =16
|
|
23
|
+
|
|
24
|
+
Lpoly_chknorm_loop:
|
|
25
|
+
ldr q1, [x0, #0x10]
|
|
26
|
+
ldr q2, [x0, #0x20]
|
|
27
|
+
ldr q3, [x0, #0x30]
|
|
28
|
+
ldr q0, [x0], #0x40
|
|
29
|
+
abs v1.4s, v1.4s
|
|
30
|
+
cmge v1.4s, v1.4s, v20.4s
|
|
31
|
+
orr v21.16b, v21.16b, v1.16b
|
|
32
|
+
abs v2.4s, v2.4s
|
|
33
|
+
cmge v2.4s, v2.4s, v20.4s
|
|
34
|
+
orr v21.16b, v21.16b, v2.16b
|
|
35
|
+
abs v3.4s, v3.4s
|
|
36
|
+
cmge v3.4s, v3.4s, v20.4s
|
|
37
|
+
orr v21.16b, v21.16b, v3.16b
|
|
38
|
+
abs v0.4s, v0.4s
|
|
39
|
+
cmge v0.4s, v0.4s, v20.4s
|
|
40
|
+
orr v21.16b, v21.16b, v0.16b
|
|
41
|
+
subs x2, x2, #0x1
|
|
42
|
+
b.ne Lpoly_chknorm_loop
|
|
43
|
+
umaxv s21, v21.4s
|
|
44
|
+
fmov w0, s21
|
|
45
|
+
and w0, w0, #0x1
|
|
46
|
+
ret
|
|
47
|
+
.cfi_endproc
|
|
48
|
+
|
|
49
|
+
MLD_ASM_FN_SIZE(poly_chknorm_aarch64_asm)
|
|
50
|
+
|
|
51
|
+
#endif /* MLD_ARITH_BACKEND_AARCH64 && !MLD_CONFIG_MULTILEVEL_NO_SHARED */
|
|
52
|
+
|
|
53
|
+
#if defined(__ELF__)
|
|
54
|
+
.section .note.GNU-stack,"",%progbits
|
|
55
|
+
#endif
|
data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_32_aarch64_asm.S
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) The mldsa-native project authors
|
|
3
|
+
* SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
|
|
4
|
+
*/
|
|
5
|
+
#include "../../../common.h"
|
|
6
|
+
|
|
7
|
+
#if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_NO_SIGN_API) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \
|
|
8
|
+
(defined(MLD_CONFIG_MULTILEVEL_WITH_SHARED) || (MLD_CONFIG_PARAMETER_SET == 65 || MLD_CONFIG_PARAMETER_SET == 87))
|
|
9
|
+
|
|
10
|
+
/*
|
|
11
|
+
* WARNING: This file is auto-derived from the mldsa-native source file
|
|
12
|
+
* dev/aarch64_opt/src/poly_decompose_32_aarch64_asm.S using scripts/simpasm. Do not modify it directly.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
.text
|
|
16
|
+
.balign 4
|
|
17
|
+
.global MLD_ASM_NAMESPACE(poly_decompose_32_aarch64_asm)
|
|
18
|
+
MLD_ASM_FN_SYMBOL(poly_decompose_32_aarch64_asm)
|
|
19
|
+
|
|
20
|
+
.cfi_startproc
|
|
21
|
+
mov w4, #0xe001 // =57345
|
|
22
|
+
movk w4, #0x7f, lsl #16
|
|
23
|
+
dup v20.4s, w4
|
|
24
|
+
mov w5, #0xe100 // =57600
|
|
25
|
+
movk w5, #0x7b, lsl #16
|
|
26
|
+
dup v21.4s, w5
|
|
27
|
+
mov w7, #0xfe00 // =65024
|
|
28
|
+
movk w7, #0x7, lsl #16
|
|
29
|
+
dup v22.4s, w7
|
|
30
|
+
mov w11, #0x401 // =1025
|
|
31
|
+
movk w11, #0x4010, lsl #16
|
|
32
|
+
dup v23.4s, w11
|
|
33
|
+
mov x3, #0x10 // =16
|
|
34
|
+
|
|
35
|
+
Lpoly_decompose_32_loop:
|
|
36
|
+
ldr q0, [x1]
|
|
37
|
+
ldr q1, [x1, #0x10]
|
|
38
|
+
ldr q2, [x1, #0x20]
|
|
39
|
+
ldr q3, [x1, #0x30]
|
|
40
|
+
sqdmulh v5.4s, v1.4s, v23.4s
|
|
41
|
+
srshr v5.4s, v5.4s, #0x12
|
|
42
|
+
cmgt v24.4s, v1.4s, v21.4s
|
|
43
|
+
mls v1.4s, v5.4s, v22.4s
|
|
44
|
+
bic v5.16b, v5.16b, v24.16b
|
|
45
|
+
add v1.4s, v1.4s, v24.4s
|
|
46
|
+
sqdmulh v6.4s, v2.4s, v23.4s
|
|
47
|
+
srshr v6.4s, v6.4s, #0x12
|
|
48
|
+
cmgt v24.4s, v2.4s, v21.4s
|
|
49
|
+
mls v2.4s, v6.4s, v22.4s
|
|
50
|
+
bic v6.16b, v6.16b, v24.16b
|
|
51
|
+
add v2.4s, v2.4s, v24.4s
|
|
52
|
+
sqdmulh v7.4s, v3.4s, v23.4s
|
|
53
|
+
srshr v7.4s, v7.4s, #0x12
|
|
54
|
+
cmgt v24.4s, v3.4s, v21.4s
|
|
55
|
+
mls v3.4s, v7.4s, v22.4s
|
|
56
|
+
bic v7.16b, v7.16b, v24.16b
|
|
57
|
+
add v3.4s, v3.4s, v24.4s
|
|
58
|
+
sqdmulh v4.4s, v0.4s, v23.4s
|
|
59
|
+
srshr v4.4s, v4.4s, #0x12
|
|
60
|
+
cmgt v24.4s, v0.4s, v21.4s
|
|
61
|
+
mls v0.4s, v4.4s, v22.4s
|
|
62
|
+
bic v4.16b, v4.16b, v24.16b
|
|
63
|
+
add v0.4s, v0.4s, v24.4s
|
|
64
|
+
str q5, [x0, #0x10]
|
|
65
|
+
str q6, [x0, #0x20]
|
|
66
|
+
str q7, [x0, #0x30]
|
|
67
|
+
str q4, [x0], #0x40
|
|
68
|
+
str q1, [x1, #0x10]
|
|
69
|
+
str q2, [x1, #0x20]
|
|
70
|
+
str q3, [x1, #0x30]
|
|
71
|
+
str q0, [x1], #0x40
|
|
72
|
+
subs x3, x3, #0x1
|
|
73
|
+
b.ne Lpoly_decompose_32_loop
|
|
74
|
+
ret
|
|
75
|
+
.cfi_endproc
|
|
76
|
+
|
|
77
|
+
MLD_ASM_FN_SIZE(poly_decompose_32_aarch64_asm)
|
|
78
|
+
|
|
79
|
+
#endif /* MLD_ARITH_BACKEND_AARCH64 && !MLD_CONFIG_NO_SIGN_API && \
|
|
80
|
+
!MLD_CONFIG_MULTILEVEL_NO_SHARED && \
|
|
81
|
+
(MLD_CONFIG_MULTILEVEL_WITH_SHARED || MLD_CONFIG_PARAMETER_SET == 65 \
|
|
82
|
+
|| MLD_CONFIG_PARAMETER_SET == 87) */
|
|
83
|
+
|
|
84
|
+
#if defined(__ELF__)
|
|
85
|
+
.section .note.GNU-stack,"",%progbits
|
|
86
|
+
#endif
|
data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_88_aarch64_asm.S
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) The mldsa-native project authors
|
|
3
|
+
* SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
|
|
4
|
+
*/
|
|
5
|
+
#include "../../../common.h"
|
|
6
|
+
|
|
7
|
+
#if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_NO_SIGN_API) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \
|
|
8
|
+
(defined(MLD_CONFIG_MULTILEVEL_WITH_SHARED) || MLD_CONFIG_PARAMETER_SET == 44)
|
|
9
|
+
|
|
10
|
+
/*
|
|
11
|
+
* WARNING: This file is auto-derived from the mldsa-native source file
|
|
12
|
+
* dev/aarch64_opt/src/poly_decompose_88_aarch64_asm.S using scripts/simpasm. Do not modify it directly.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
.text
|
|
16
|
+
.balign 4
|
|
17
|
+
.global MLD_ASM_NAMESPACE(poly_decompose_88_aarch64_asm)
|
|
18
|
+
MLD_ASM_FN_SYMBOL(poly_decompose_88_aarch64_asm)
|
|
19
|
+
|
|
20
|
+
.cfi_startproc
|
|
21
|
+
mov w4, #0xe001 // =57345
|
|
22
|
+
movk w4, #0x7f, lsl #16
|
|
23
|
+
dup v20.4s, w4
|
|
24
|
+
mov w5, #0x6c00 // =27648
|
|
25
|
+
movk w5, #0x7e, lsl #16
|
|
26
|
+
dup v21.4s, w5
|
|
27
|
+
mov w7, #0xe800 // =59392
|
|
28
|
+
movk w7, #0x2, lsl #16
|
|
29
|
+
dup v22.4s, w7
|
|
30
|
+
mov w11, #0x581 // =1409
|
|
31
|
+
movk w11, #0x5816, lsl #16
|
|
32
|
+
dup v23.4s, w11
|
|
33
|
+
mov x3, #0x10 // =16
|
|
34
|
+
|
|
35
|
+
Lpoly_decompose_88_loop:
|
|
36
|
+
ldr q0, [x1]
|
|
37
|
+
ldr q1, [x1, #0x10]
|
|
38
|
+
ldr q2, [x1, #0x20]
|
|
39
|
+
ldr q3, [x1, #0x30]
|
|
40
|
+
sqdmulh v5.4s, v1.4s, v23.4s
|
|
41
|
+
srshr v5.4s, v5.4s, #0x11
|
|
42
|
+
cmgt v24.4s, v1.4s, v21.4s
|
|
43
|
+
mls v1.4s, v5.4s, v22.4s
|
|
44
|
+
bic v5.16b, v5.16b, v24.16b
|
|
45
|
+
add v1.4s, v1.4s, v24.4s
|
|
46
|
+
sqdmulh v6.4s, v2.4s, v23.4s
|
|
47
|
+
srshr v6.4s, v6.4s, #0x11
|
|
48
|
+
cmgt v24.4s, v2.4s, v21.4s
|
|
49
|
+
mls v2.4s, v6.4s, v22.4s
|
|
50
|
+
bic v6.16b, v6.16b, v24.16b
|
|
51
|
+
add v2.4s, v2.4s, v24.4s
|
|
52
|
+
sqdmulh v7.4s, v3.4s, v23.4s
|
|
53
|
+
srshr v7.4s, v7.4s, #0x11
|
|
54
|
+
cmgt v24.4s, v3.4s, v21.4s
|
|
55
|
+
mls v3.4s, v7.4s, v22.4s
|
|
56
|
+
bic v7.16b, v7.16b, v24.16b
|
|
57
|
+
add v3.4s, v3.4s, v24.4s
|
|
58
|
+
sqdmulh v4.4s, v0.4s, v23.4s
|
|
59
|
+
srshr v4.4s, v4.4s, #0x11
|
|
60
|
+
cmgt v24.4s, v0.4s, v21.4s
|
|
61
|
+
mls v0.4s, v4.4s, v22.4s
|
|
62
|
+
bic v4.16b, v4.16b, v24.16b
|
|
63
|
+
add v0.4s, v0.4s, v24.4s
|
|
64
|
+
str q5, [x0, #0x10]
|
|
65
|
+
str q6, [x0, #0x20]
|
|
66
|
+
str q7, [x0, #0x30]
|
|
67
|
+
str q4, [x0], #0x40
|
|
68
|
+
str q1, [x1, #0x10]
|
|
69
|
+
str q2, [x1, #0x20]
|
|
70
|
+
str q3, [x1, #0x30]
|
|
71
|
+
str q0, [x1], #0x40
|
|
72
|
+
subs x3, x3, #0x1
|
|
73
|
+
b.ne Lpoly_decompose_88_loop
|
|
74
|
+
ret
|
|
75
|
+
.cfi_endproc
|
|
76
|
+
|
|
77
|
+
MLD_ASM_FN_SIZE(poly_decompose_88_aarch64_asm)
|
|
78
|
+
|
|
79
|
+
#endif /* MLD_ARITH_BACKEND_AARCH64 && !MLD_CONFIG_NO_SIGN_API && \
|
|
80
|
+
!MLD_CONFIG_MULTILEVEL_NO_SHARED && \
|
|
81
|
+
(MLD_CONFIG_MULTILEVEL_WITH_SHARED || MLD_CONFIG_PARAMETER_SET == 44) \
|
|
82
|
+
*/
|
|
83
|
+
|
|
84
|
+
#if defined(__ELF__)
|
|
85
|
+
.section .note.GNU-stack,"",%progbits
|
|
86
|
+
#endif
|
data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_32_aarch64_asm.S
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) The mldsa-native project authors
|
|
3
|
+
* SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
|
|
4
|
+
*/
|
|
5
|
+
#include "../../../common.h"
|
|
6
|
+
|
|
7
|
+
#if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_NO_VERIFY_API) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \
|
|
8
|
+
(defined(MLD_CONFIG_MULTILEVEL_WITH_SHARED) || (MLD_CONFIG_PARAMETER_SET == 65 || MLD_CONFIG_PARAMETER_SET == 87))
|
|
9
|
+
|
|
10
|
+
/*
|
|
11
|
+
* WARNING: This file is auto-derived from the mldsa-native source file
|
|
12
|
+
* dev/aarch64_opt/src/poly_use_hint_32_aarch64_asm.S using scripts/simpasm. Do not modify it directly.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
.text
|
|
16
|
+
.balign 4
|
|
17
|
+
.global MLD_ASM_NAMESPACE(poly_use_hint_32_aarch64_asm)
|
|
18
|
+
MLD_ASM_FN_SYMBOL(poly_use_hint_32_aarch64_asm)
|
|
19
|
+
|
|
20
|
+
.cfi_startproc
|
|
21
|
+
mov w4, #0xe001 // =57345
|
|
22
|
+
movk w4, #0x7f, lsl #16
|
|
23
|
+
dup v20.4s, w4
|
|
24
|
+
mov w5, #0xe100 // =57600
|
|
25
|
+
movk w5, #0x7b, lsl #16
|
|
26
|
+
dup v21.4s, w5
|
|
27
|
+
mov w7, #0xfe00 // =65024
|
|
28
|
+
movk w7, #0x7, lsl #16
|
|
29
|
+
dup v22.4s, w7
|
|
30
|
+
mov w11, #0x401 // =1025
|
|
31
|
+
movk w11, #0x4010, lsl #16
|
|
32
|
+
dup v23.4s, w11
|
|
33
|
+
movi v24.4s, #0xf
|
|
34
|
+
mov x3, #0x10 // =16
|
|
35
|
+
|
|
36
|
+
Lpoly_use_hint_32_loop:
|
|
37
|
+
ldr q1, [x0, #0x10]
|
|
38
|
+
ldr q2, [x0, #0x20]
|
|
39
|
+
ldr q3, [x0, #0x30]
|
|
40
|
+
ldr q0, [x0]
|
|
41
|
+
ldr q5, [x1, #0x10]
|
|
42
|
+
ldr q6, [x1, #0x20]
|
|
43
|
+
ldr q7, [x1, #0x30]
|
|
44
|
+
ldr q4, [x1], #0x40
|
|
45
|
+
sqdmulh v17.4s, v1.4s, v23.4s
|
|
46
|
+
srshr v17.4s, v17.4s, #0x12
|
|
47
|
+
cmgt v25.4s, v1.4s, v21.4s
|
|
48
|
+
mls v1.4s, v17.4s, v22.4s
|
|
49
|
+
bic v17.16b, v17.16b, v25.16b
|
|
50
|
+
add v1.4s, v1.4s, v25.4s
|
|
51
|
+
cmle v1.4s, v1.4s, #0
|
|
52
|
+
orr v1.4s, #0x1
|
|
53
|
+
mla v17.4s, v1.4s, v5.4s
|
|
54
|
+
and v17.16b, v17.16b, v24.16b
|
|
55
|
+
sqdmulh v18.4s, v2.4s, v23.4s
|
|
56
|
+
srshr v18.4s, v18.4s, #0x12
|
|
57
|
+
cmgt v25.4s, v2.4s, v21.4s
|
|
58
|
+
mls v2.4s, v18.4s, v22.4s
|
|
59
|
+
bic v18.16b, v18.16b, v25.16b
|
|
60
|
+
add v2.4s, v2.4s, v25.4s
|
|
61
|
+
cmle v2.4s, v2.4s, #0
|
|
62
|
+
orr v2.4s, #0x1
|
|
63
|
+
mla v18.4s, v2.4s, v6.4s
|
|
64
|
+
and v18.16b, v18.16b, v24.16b
|
|
65
|
+
sqdmulh v19.4s, v3.4s, v23.4s
|
|
66
|
+
srshr v19.4s, v19.4s, #0x12
|
|
67
|
+
cmgt v25.4s, v3.4s, v21.4s
|
|
68
|
+
mls v3.4s, v19.4s, v22.4s
|
|
69
|
+
bic v19.16b, v19.16b, v25.16b
|
|
70
|
+
add v3.4s, v3.4s, v25.4s
|
|
71
|
+
cmle v3.4s, v3.4s, #0
|
|
72
|
+
orr v3.4s, #0x1
|
|
73
|
+
mla v19.4s, v3.4s, v7.4s
|
|
74
|
+
and v19.16b, v19.16b, v24.16b
|
|
75
|
+
sqdmulh v16.4s, v0.4s, v23.4s
|
|
76
|
+
srshr v16.4s, v16.4s, #0x12
|
|
77
|
+
cmgt v25.4s, v0.4s, v21.4s
|
|
78
|
+
mls v0.4s, v16.4s, v22.4s
|
|
79
|
+
bic v16.16b, v16.16b, v25.16b
|
|
80
|
+
add v0.4s, v0.4s, v25.4s
|
|
81
|
+
cmle v0.4s, v0.4s, #0
|
|
82
|
+
orr v0.4s, #0x1
|
|
83
|
+
mla v16.4s, v0.4s, v4.4s
|
|
84
|
+
and v16.16b, v16.16b, v24.16b
|
|
85
|
+
str q17, [x0, #0x10]
|
|
86
|
+
str q18, [x0, #0x20]
|
|
87
|
+
str q19, [x0, #0x30]
|
|
88
|
+
str q16, [x0], #0x40
|
|
89
|
+
subs x3, x3, #0x1
|
|
90
|
+
b.ne Lpoly_use_hint_32_loop
|
|
91
|
+
ret
|
|
92
|
+
.cfi_endproc
|
|
93
|
+
|
|
94
|
+
MLD_ASM_FN_SIZE(poly_use_hint_32_aarch64_asm)
|
|
95
|
+
|
|
96
|
+
#endif /* MLD_ARITH_BACKEND_AARCH64 && !MLD_CONFIG_NO_VERIFY_API && \
|
|
97
|
+
!MLD_CONFIG_MULTILEVEL_NO_SHARED && \
|
|
98
|
+
(MLD_CONFIG_MULTILEVEL_WITH_SHARED || MLD_CONFIG_PARAMETER_SET == 65 \
|
|
99
|
+
|| MLD_CONFIG_PARAMETER_SET == 87) */
|
|
100
|
+
|
|
101
|
+
#if defined(__ELF__)
|
|
102
|
+
.section .note.GNU-stack,"",%progbits
|
|
103
|
+
#endif
|
data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_88_aarch64_asm.S
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) The mldsa-native project authors
|
|
3
|
+
* SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
|
|
4
|
+
*/
|
|
5
|
+
#include "../../../common.h"
|
|
6
|
+
|
|
7
|
+
#if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_NO_VERIFY_API) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \
|
|
8
|
+
(defined(MLD_CONFIG_MULTILEVEL_WITH_SHARED) || MLD_CONFIG_PARAMETER_SET == 44)
|
|
9
|
+
|
|
10
|
+
/*
|
|
11
|
+
* WARNING: This file is auto-derived from the mldsa-native source file
|
|
12
|
+
* dev/aarch64_opt/src/poly_use_hint_88_aarch64_asm.S using scripts/simpasm. Do not modify it directly.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
.text
|
|
16
|
+
.balign 4
|
|
17
|
+
.global MLD_ASM_NAMESPACE(poly_use_hint_88_aarch64_asm)
|
|
18
|
+
MLD_ASM_FN_SYMBOL(poly_use_hint_88_aarch64_asm)
|
|
19
|
+
|
|
20
|
+
.cfi_startproc
|
|
21
|
+
mov w4, #0xe001 // =57345
|
|
22
|
+
movk w4, #0x7f, lsl #16
|
|
23
|
+
dup v20.4s, w4
|
|
24
|
+
mov w5, #0x6c00 // =27648
|
|
25
|
+
movk w5, #0x7e, lsl #16
|
|
26
|
+
dup v21.4s, w5
|
|
27
|
+
mov w7, #0xe800 // =59392
|
|
28
|
+
movk w7, #0x2, lsl #16
|
|
29
|
+
dup v22.4s, w7
|
|
30
|
+
mov w11, #0x581 // =1409
|
|
31
|
+
movk w11, #0x5816, lsl #16
|
|
32
|
+
dup v23.4s, w11
|
|
33
|
+
movi v24.4s, #0x2b
|
|
34
|
+
mov x3, #0x10 // =16
|
|
35
|
+
|
|
36
|
+
Lpoly_use_hint_88_loop:
|
|
37
|
+
ldr q1, [x0, #0x10]
|
|
38
|
+
ldr q2, [x0, #0x20]
|
|
39
|
+
ldr q3, [x0, #0x30]
|
|
40
|
+
ldr q0, [x0]
|
|
41
|
+
ldr q5, [x1, #0x10]
|
|
42
|
+
ldr q6, [x1, #0x20]
|
|
43
|
+
ldr q7, [x1, #0x30]
|
|
44
|
+
ldr q4, [x1], #0x40
|
|
45
|
+
sqdmulh v17.4s, v1.4s, v23.4s
|
|
46
|
+
srshr v17.4s, v17.4s, #0x11
|
|
47
|
+
cmgt v25.4s, v1.4s, v21.4s
|
|
48
|
+
mls v1.4s, v17.4s, v22.4s
|
|
49
|
+
bic v17.16b, v17.16b, v25.16b
|
|
50
|
+
add v1.4s, v1.4s, v25.4s
|
|
51
|
+
cmle v1.4s, v1.4s, #0
|
|
52
|
+
orr v1.4s, #0x1
|
|
53
|
+
mla v17.4s, v1.4s, v5.4s
|
|
54
|
+
cmgt v25.4s, v17.4s, v24.4s
|
|
55
|
+
bic v17.16b, v17.16b, v25.16b
|
|
56
|
+
umin v17.4s, v17.4s, v24.4s
|
|
57
|
+
sqdmulh v18.4s, v2.4s, v23.4s
|
|
58
|
+
srshr v18.4s, v18.4s, #0x11
|
|
59
|
+
cmgt v25.4s, v2.4s, v21.4s
|
|
60
|
+
mls v2.4s, v18.4s, v22.4s
|
|
61
|
+
bic v18.16b, v18.16b, v25.16b
|
|
62
|
+
add v2.4s, v2.4s, v25.4s
|
|
63
|
+
cmle v2.4s, v2.4s, #0
|
|
64
|
+
orr v2.4s, #0x1
|
|
65
|
+
mla v18.4s, v2.4s, v6.4s
|
|
66
|
+
cmgt v25.4s, v18.4s, v24.4s
|
|
67
|
+
bic v18.16b, v18.16b, v25.16b
|
|
68
|
+
umin v18.4s, v18.4s, v24.4s
|
|
69
|
+
sqdmulh v19.4s, v3.4s, v23.4s
|
|
70
|
+
srshr v19.4s, v19.4s, #0x11
|
|
71
|
+
cmgt v25.4s, v3.4s, v21.4s
|
|
72
|
+
mls v3.4s, v19.4s, v22.4s
|
|
73
|
+
bic v19.16b, v19.16b, v25.16b
|
|
74
|
+
add v3.4s, v3.4s, v25.4s
|
|
75
|
+
cmle v3.4s, v3.4s, #0
|
|
76
|
+
orr v3.4s, #0x1
|
|
77
|
+
mla v19.4s, v3.4s, v7.4s
|
|
78
|
+
cmgt v25.4s, v19.4s, v24.4s
|
|
79
|
+
bic v19.16b, v19.16b, v25.16b
|
|
80
|
+
umin v19.4s, v19.4s, v24.4s
|
|
81
|
+
sqdmulh v16.4s, v0.4s, v23.4s
|
|
82
|
+
srshr v16.4s, v16.4s, #0x11
|
|
83
|
+
cmgt v25.4s, v0.4s, v21.4s
|
|
84
|
+
mls v0.4s, v16.4s, v22.4s
|
|
85
|
+
bic v16.16b, v16.16b, v25.16b
|
|
86
|
+
add v0.4s, v0.4s, v25.4s
|
|
87
|
+
cmle v0.4s, v0.4s, #0
|
|
88
|
+
orr v0.4s, #0x1
|
|
89
|
+
mla v16.4s, v0.4s, v4.4s
|
|
90
|
+
cmgt v25.4s, v16.4s, v24.4s
|
|
91
|
+
bic v16.16b, v16.16b, v25.16b
|
|
92
|
+
umin v16.4s, v16.4s, v24.4s
|
|
93
|
+
str q17, [x0, #0x10]
|
|
94
|
+
str q18, [x0, #0x20]
|
|
95
|
+
str q19, [x0, #0x30]
|
|
96
|
+
str q16, [x0], #0x40
|
|
97
|
+
subs x3, x3, #0x1
|
|
98
|
+
b.ne Lpoly_use_hint_88_loop
|
|
99
|
+
ret
|
|
100
|
+
.cfi_endproc
|
|
101
|
+
|
|
102
|
+
MLD_ASM_FN_SIZE(poly_use_hint_88_aarch64_asm)
|
|
103
|
+
|
|
104
|
+
#endif /* MLD_ARITH_BACKEND_AARCH64 && !MLD_CONFIG_NO_VERIFY_API && \
|
|
105
|
+
!MLD_CONFIG_MULTILEVEL_NO_SHARED && \
|
|
106
|
+
(MLD_CONFIG_MULTILEVEL_WITH_SHARED || MLD_CONFIG_PARAMETER_SET == 44) \
|
|
107
|
+
*/
|
|
108
|
+
|
|
109
|
+
#if defined(__ELF__)
|
|
110
|
+
.section .note.GNU-stack,"",%progbits
|
|
111
|
+
#endif
|
data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_17_aarch64_asm.S
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) The mldsa-native project authors
|
|
3
|
+
* Copyright (c) The mlkem-native project authors
|
|
4
|
+
* SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
#include "../../../common.h"
|
|
8
|
+
#if defined(MLD_ARITH_BACKEND_AARCH64) && \
|
|
9
|
+
(!defined(MLD_CONFIG_NO_SIGN_API) || !defined(MLD_CONFIG_NO_VERIFY_API)) && \
|
|
10
|
+
!defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \
|
|
11
|
+
(defined(MLD_CONFIG_MULTILEVEL_WITH_SHARED) || MLD_CONFIG_PARAMETER_SET == 44)
|
|
12
|
+
|
|
13
|
+
/*
|
|
14
|
+
* WARNING: This file is auto-derived from the mldsa-native source file
|
|
15
|
+
* dev/aarch64_opt/src/polyz_unpack_17_aarch64_asm.S using scripts/simpasm. Do not modify it directly.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
.text
|
|
19
|
+
.balign 4
|
|
20
|
+
.global MLD_ASM_NAMESPACE(polyz_unpack_17_aarch64_asm)
|
|
21
|
+
MLD_ASM_FN_SYMBOL(polyz_unpack_17_aarch64_asm)
|
|
22
|
+
|
|
23
|
+
.cfi_startproc
|
|
24
|
+
ldr q24, [x2]
|
|
25
|
+
ldr q25, [x2, #0x10]
|
|
26
|
+
ldr q26, [x2, #0x20]
|
|
27
|
+
ldr q27, [x2, #0x30]
|
|
28
|
+
mov x3, #0xfe00000000 // =1090921693184
|
|
29
|
+
mov v28.d[0], x3
|
|
30
|
+
mov x3, #0xfc // =252
|
|
31
|
+
movk x3, #0xfa, lsl #32
|
|
32
|
+
mov v28.d[1], x3
|
|
33
|
+
movi v29.4s, #0x3, msl #16
|
|
34
|
+
movi v30.4s, #0x2, lsl #16
|
|
35
|
+
mov x9, #0x10 // =16
|
|
36
|
+
|
|
37
|
+
Lpolyz_unpack_17_loop:
|
|
38
|
+
ld1 { v0.16b, v1.16b }, [x1]
|
|
39
|
+
add x1, x1, #0x14
|
|
40
|
+
ld1 { v2.16b }, [x1], #16
|
|
41
|
+
tbl v4.16b, { v0.16b }, v24.16b
|
|
42
|
+
tbl v5.16b, { v0.16b, v1.16b }, v25.16b
|
|
43
|
+
tbl v6.16b, { v1.16b }, v26.16b
|
|
44
|
+
tbl v7.16b, { v1.16b, v2.16b }, v27.16b
|
|
45
|
+
ushl v4.4s, v4.4s, v28.4s
|
|
46
|
+
and v4.16b, v4.16b, v29.16b
|
|
47
|
+
sub v4.4s, v30.4s, v4.4s
|
|
48
|
+
ushl v5.4s, v5.4s, v28.4s
|
|
49
|
+
and v5.16b, v5.16b, v29.16b
|
|
50
|
+
sub v5.4s, v30.4s, v5.4s
|
|
51
|
+
ushl v6.4s, v6.4s, v28.4s
|
|
52
|
+
and v6.16b, v6.16b, v29.16b
|
|
53
|
+
sub v6.4s, v30.4s, v6.4s
|
|
54
|
+
ushl v7.4s, v7.4s, v28.4s
|
|
55
|
+
and v7.16b, v7.16b, v29.16b
|
|
56
|
+
sub v7.4s, v30.4s, v7.4s
|
|
57
|
+
str q5, [x0, #0x10]
|
|
58
|
+
str q6, [x0, #0x20]
|
|
59
|
+
str q7, [x0, #0x30]
|
|
60
|
+
str q4, [x0], #0x40
|
|
61
|
+
subs x9, x9, #0x1
|
|
62
|
+
b.ne Lpolyz_unpack_17_loop
|
|
63
|
+
ret
|
|
64
|
+
.cfi_endproc
|
|
65
|
+
|
|
66
|
+
MLD_ASM_FN_SIZE(polyz_unpack_17_aarch64_asm)
|
|
67
|
+
|
|
68
|
+
#endif /* MLD_ARITH_BACKEND_AARCH64 && (!MLD_CONFIG_NO_SIGN_API || \
|
|
69
|
+
!MLD_CONFIG_NO_VERIFY_API) && !MLD_CONFIG_MULTILEVEL_NO_SHARED && \
|
|
70
|
+
(MLD_CONFIG_MULTILEVEL_WITH_SHARED || MLD_CONFIG_PARAMETER_SET == 44) \
|
|
71
|
+
*/
|
|
72
|
+
|
|
73
|
+
#if defined(__ELF__)
|
|
74
|
+
.section .note.GNU-stack,"",%progbits
|
|
75
|
+
#endif
|