pq_crypto 0.6.1 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/SECURITY.md +7 -0
- data/ext/pqcrypto/pqcrypto_version.h +1 -1
- data/ext/pqcrypto/vendor/.vendored +4 -4
- data/ext/pqcrypto/vendor/mldsa-native/README.md +23 -10
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/README.md +23 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.c +114 -58
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.h +498 -461
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_asm.S +145 -85
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_config.h +456 -422
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/cbmc.h +47 -25
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/common.h +26 -14
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/ct.h +56 -81
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/debug.h +17 -24
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.c +33 -40
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.h +67 -87
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.c +19 -14
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.h +13 -5
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.c +84 -10
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.h +10 -5
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/auto.h +6 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/fips202_native_aarch64.h +22 -15
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_aarch64_asm.S +376 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_aarch64_asm.S +204 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_aarch64_asm.S +259 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_aarch64_asm.S +1077 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_aarch64_asm.S +987 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccakf1600_round_constants.c +16 -10
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_scalar.h +2 -1
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_v84a.h +1 -1
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x2_v84a.h +4 -2
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_scalar.h +2 -2
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_v84a_scalar.h +1 -1
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/api.h +60 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/mve.h +48 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/fips202_native_armv81m.h +18 -1
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S +658 -582
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.c +5 -100
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccakf1600_round_constants.c +26 -25
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/state_extract_bytes_x4_mve.S +334 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/state_xor_bytes_x4_mve.S +355 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/auto.h +8 -3
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/{xkcp.h → keccak_f1600_x4_avx2.h} +11 -8
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/fips202_native_x86_64.h +44 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/keccak_f1600_x4_avx2_asm.S +454 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/keccakf1600_constants.c +52 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/meta.h +37 -28
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/aarch64_zetas.c +213 -196
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/arith_native_aarch64.h +248 -64
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/intt_aarch64_asm.S +753 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l4_aarch64_asm.S +129 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l5_aarch64_asm.S +145 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l7_aarch64_asm.S +177 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/ntt_aarch64_asm.S +653 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/pointwise_montgomery_aarch64_asm.S +84 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_caddq_aarch64_asm.S +53 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_chknorm_aarch64_asm.S +55 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_32_aarch64_asm.S +86 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_88_aarch64_asm.S +86 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_32_aarch64_asm.S +103 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_88_aarch64_asm.S +111 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_17_aarch64_asm.S +75 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_19_aarch64_asm.S +72 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_table.c +23 -11
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_aarch64_asm.S +189 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta2_aarch64_asm.S +137 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta4_aarch64_asm.S +130 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta_table.c +520 -516
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_table.c +34 -33
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/api.h +202 -242
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/meta.h +25 -17
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/arith_native_x86_64.h +112 -28
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.c +1 -1
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.h +1 -1
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/intt_avx2_asm.S +2311 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/ntt_avx2_asm.S +2383 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/nttunpack_avx2_asm.S +238 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l4_avx2_asm.S +139 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l5_avx2_asm.S +155 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l7_avx2_asm.S +187 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_avx2_asm.S +130 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_caddq_avx2_asm.S +190 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_32_avx2.c +6 -4
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_88_avx2.c +6 -4
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_32_avx2.c +9 -8
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_88_avx2.c +10 -9
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_17_avx2.c +8 -5
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_19_avx2.c +8 -5
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta2_avx2.c +6 -4
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta4_avx2.c +6 -4
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_table.c +130 -129
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.c +109 -180
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.h +169 -150
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.c +56 -40
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.h +149 -164
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.c +52 -57
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.h +132 -167
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.c +57 -424
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.h +167 -474
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec_lazy.c +308 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec_lazy.h +653 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/reduce.h +22 -29
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/rounding.h +37 -43
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.c +511 -367
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.h +456 -417
- data/lib/pq_crypto/version.rb +1 -1
- data/script/vendor_libs.rb +3 -3
- metadata +41 -35
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_asm.S +0 -376
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_asm.S +0 -204
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_asm.S +0 -259
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_asm.S +0 -1077
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S +0 -987
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.c +0 -488
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.h +0 -16
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/intt.S +0 -753
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l4.S +0 -129
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l5.S +0 -145
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l7.S +0 -177
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/ntt.S +0 -653
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/pointwise_montgomery.S +0 -79
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_caddq_asm.S +0 -53
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_chknorm_asm.S +0 -55
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_32_asm.S +0 -85
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_88_asm.S +0 -85
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_32_asm.S +0 -102
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_88_asm.S +0 -110
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_17_asm.S +0 -72
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_19_asm.S +0 -69
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_asm.S +0 -189
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta2_asm.S +0 -135
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta4_asm.S +0 -128
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/intt.S +0 -2311
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/ntt.S +0 -2383
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/nttunpack.S +0 -239
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise.S +0 -131
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l4.S +0 -139
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l5.S +0 -155
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l7.S +0 -187
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_caddq_avx2.c +0 -61
data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_19_aarch64_asm.S
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) The mldsa-native project authors
|
|
3
|
+
* Copyright (c) The mlkem-native project authors
|
|
4
|
+
* SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
#include "../../../common.h"
|
|
8
|
+
#if defined(MLD_ARITH_BACKEND_AARCH64) && \
|
|
9
|
+
(!defined(MLD_CONFIG_NO_SIGN_API) || !defined(MLD_CONFIG_NO_VERIFY_API)) && \
|
|
10
|
+
!defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \
|
|
11
|
+
(defined(MLD_CONFIG_MULTILEVEL_WITH_SHARED) || (MLD_CONFIG_PARAMETER_SET == 65 || MLD_CONFIG_PARAMETER_SET == 87))
|
|
12
|
+
|
|
13
|
+
/*
|
|
14
|
+
* WARNING: This file is auto-derived from the mldsa-native source file
|
|
15
|
+
* dev/aarch64_opt/src/polyz_unpack_19_aarch64_asm.S using scripts/simpasm. Do not modify it directly.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
.text
|
|
19
|
+
.balign 4
|
|
20
|
+
.global MLD_ASM_NAMESPACE(polyz_unpack_19_aarch64_asm)
|
|
21
|
+
MLD_ASM_FN_SYMBOL(polyz_unpack_19_aarch64_asm)
|
|
22
|
+
|
|
23
|
+
.cfi_startproc
|
|
24
|
+
ldr q24, [x2]
|
|
25
|
+
ldr q25, [x2, #0x10]
|
|
26
|
+
ldr q26, [x2, #0x20]
|
|
27
|
+
ldr q27, [x2, #0x30]
|
|
28
|
+
mov x3, #0xfc00000000 // =1082331758592
|
|
29
|
+
dup v28.2d, x3
|
|
30
|
+
movi v29.4s, #0xf, msl #16
|
|
31
|
+
movi v30.4s, #0x8, lsl #16
|
|
32
|
+
mov x9, #0x10 // =16
|
|
33
|
+
|
|
34
|
+
Lpolyz_unpack_19_loop:
|
|
35
|
+
ld1 { v0.16b, v1.16b }, [x1]
|
|
36
|
+
add x1, x1, #0x18
|
|
37
|
+
ld1 { v2.16b }, [x1], #16
|
|
38
|
+
tbl v4.16b, { v0.16b }, v24.16b
|
|
39
|
+
tbl v5.16b, { v0.16b, v1.16b }, v25.16b
|
|
40
|
+
tbl v6.16b, { v1.16b }, v26.16b
|
|
41
|
+
tbl v7.16b, { v1.16b, v2.16b }, v27.16b
|
|
42
|
+
ushl v4.4s, v4.4s, v28.4s
|
|
43
|
+
and v4.16b, v4.16b, v29.16b
|
|
44
|
+
sub v4.4s, v30.4s, v4.4s
|
|
45
|
+
ushl v5.4s, v5.4s, v28.4s
|
|
46
|
+
and v5.16b, v5.16b, v29.16b
|
|
47
|
+
sub v5.4s, v30.4s, v5.4s
|
|
48
|
+
ushl v6.4s, v6.4s, v28.4s
|
|
49
|
+
and v6.16b, v6.16b, v29.16b
|
|
50
|
+
sub v6.4s, v30.4s, v6.4s
|
|
51
|
+
ushl v7.4s, v7.4s, v28.4s
|
|
52
|
+
and v7.16b, v7.16b, v29.16b
|
|
53
|
+
sub v7.4s, v30.4s, v7.4s
|
|
54
|
+
str q5, [x0, #0x10]
|
|
55
|
+
str q6, [x0, #0x20]
|
|
56
|
+
str q7, [x0, #0x30]
|
|
57
|
+
str q4, [x0], #0x40
|
|
58
|
+
subs x9, x9, #0x1
|
|
59
|
+
b.ne Lpolyz_unpack_19_loop
|
|
60
|
+
ret
|
|
61
|
+
.cfi_endproc
|
|
62
|
+
|
|
63
|
+
MLD_ASM_FN_SIZE(polyz_unpack_19_aarch64_asm)
|
|
64
|
+
|
|
65
|
+
#endif /* MLD_ARITH_BACKEND_AARCH64 && (!MLD_CONFIG_NO_SIGN_API || \
|
|
66
|
+
!MLD_CONFIG_NO_VERIFY_API) && !MLD_CONFIG_MULTILEVEL_NO_SHARED && \
|
|
67
|
+
(MLD_CONFIG_MULTILEVEL_WITH_SHARED || MLD_CONFIG_PARAMETER_SET == 65 \
|
|
68
|
+
|| MLD_CONFIG_PARAMETER_SET == 87) */
|
|
69
|
+
|
|
70
|
+
#if defined(__ELF__)
|
|
71
|
+
.section .note.GNU-stack,"",%progbits
|
|
72
|
+
#endif
|
|
@@ -16,22 +16,34 @@
|
|
|
16
16
|
|
|
17
17
|
#include "arith_native_aarch64.h"
|
|
18
18
|
|
|
19
|
+
#if !defined(MLD_CONFIG_NO_SIGN_API) || !defined(MLD_CONFIG_NO_VERIFY_API)
|
|
19
20
|
/* Table of indices used for tbl instructions in polyz_unpack_{17,19}.
|
|
20
21
|
* See autogen for details. */
|
|
21
22
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
23
|
+
#if defined(MLD_CONFIG_MULTILEVEL_WITH_SHARED) || MLD_CONFIG_PARAMETER_SET == 44
|
|
24
|
+
MLD_ALIGN MLD_INTERNAL_DATA_DEFINITION const uint8_t
|
|
25
|
+
mld_polyz_unpack_17_indices[64] = {
|
|
26
|
+
0, 1, 2, 255, 2, 3, 4, 255, 4, 5, 6, 255, 6, 7, 8, 255,
|
|
27
|
+
9, 10, 11, 255, 11, 12, 13, 255, 13, 14, 15, 255, 15, 16, 17, 255,
|
|
28
|
+
2, 3, 4, 255, 4, 5, 6, 255, 6, 7, 8, 255, 8, 9, 10, 255,
|
|
29
|
+
11, 12, 13, 255, 13, 14, 15, 255, 15, 28, 29, 255, 29, 30, 31, 255,
|
|
27
30
|
};
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
31
|
+
#endif /* MLD_CONFIG_MULTILEVEL_WITH_SHARED || MLD_CONFIG_PARAMETER_SET == 44 \
|
|
32
|
+
*/
|
|
33
|
+
|
|
34
|
+
#if defined(MLD_CONFIG_MULTILEVEL_WITH_SHARED) || \
|
|
35
|
+
(MLD_CONFIG_PARAMETER_SET == 65 || MLD_CONFIG_PARAMETER_SET == 87)
|
|
36
|
+
MLD_ALIGN MLD_INTERNAL_DATA_DEFINITION const uint8_t
|
|
37
|
+
mld_polyz_unpack_19_indices[64] = {
|
|
38
|
+
0, 1, 2, 255, 2, 3, 4, 255, 5, 6, 7, 255, 7, 8, 9, 255,
|
|
39
|
+
10, 11, 12, 255, 12, 13, 14, 255, 15, 16, 17, 255, 17, 18, 19, 255,
|
|
40
|
+
4, 5, 6, 255, 6, 7, 8, 255, 9, 10, 11, 255, 11, 12, 13, 255,
|
|
41
|
+
14, 15, 24, 255, 24, 25, 26, 255, 27, 28, 29, 255, 29, 30, 31, 255,
|
|
34
42
|
};
|
|
43
|
+
#endif /* MLD_CONFIG_MULTILEVEL_WITH_SHARED || MLD_CONFIG_PARAMETER_SET == 65 \
|
|
44
|
+
|| MLD_CONFIG_PARAMETER_SET == 87 */
|
|
45
|
+
|
|
46
|
+
#endif /* !MLD_CONFIG_NO_SIGN_API || !MLD_CONFIG_NO_VERIFY_API */
|
|
35
47
|
|
|
36
48
|
#else /* MLD_ARITH_BACKEND_AARCH64 && !MLD_CONFIG_MULTILEVEL_NO_SHARED */
|
|
37
49
|
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) The mldsa-native project authors
|
|
3
|
+
* Copyright (c) The mlkem-native project authors
|
|
4
|
+
* SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
#include "../../../common.h"
|
|
8
|
+
#if defined(MLD_ARITH_BACKEND_AARCH64) && \
|
|
9
|
+
!defined(MLD_CONFIG_MULTILEVEL_NO_SHARED)
|
|
10
|
+
|
|
11
|
+
/*
|
|
12
|
+
* WARNING: This file is auto-derived from the mldsa-native source file
|
|
13
|
+
* dev/aarch64_opt/src/rej_uniform_aarch64_asm.S using scripts/simpasm. Do not modify it directly.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
.text
|
|
17
|
+
.balign 4
|
|
18
|
+
.global MLD_ASM_NAMESPACE(rej_uniform_aarch64_asm)
|
|
19
|
+
MLD_ASM_FN_SYMBOL(rej_uniform_aarch64_asm)
|
|
20
|
+
|
|
21
|
+
.cfi_startproc
|
|
22
|
+
sub sp, sp, #0x440
|
|
23
|
+
.cfi_adjust_cfa_offset 0x440
|
|
24
|
+
mov x7, #0x1 // =1
|
|
25
|
+
movk x7, #0x2, lsl #32
|
|
26
|
+
mov v31.d[0], x7
|
|
27
|
+
mov x7, #0x4 // =4
|
|
28
|
+
movk x7, #0x8, lsl #32
|
|
29
|
+
mov v31.d[1], x7
|
|
30
|
+
mov w7, #0xe001 // =57345
|
|
31
|
+
movk w7, #0x7f, lsl #16
|
|
32
|
+
dup v30.4s, w7
|
|
33
|
+
mov x8, sp
|
|
34
|
+
mov x7, x8
|
|
35
|
+
mov x11, #0x0 // =0
|
|
36
|
+
eor v16.16b, v16.16b, v16.16b
|
|
37
|
+
|
|
38
|
+
Lrej_uniform_initial_zero:
|
|
39
|
+
str q16, [x7], #0x40
|
|
40
|
+
stur q16, [x7, #-0x30]
|
|
41
|
+
stur q16, [x7, #-0x20]
|
|
42
|
+
stur q16, [x7, #-0x10]
|
|
43
|
+
add x11, x11, #0x10
|
|
44
|
+
cmp x11, #0x100
|
|
45
|
+
b.lt Lrej_uniform_initial_zero
|
|
46
|
+
mov x7, x8
|
|
47
|
+
mov x9, #0x0 // =0
|
|
48
|
+
mov x4, #0x100 // =256
|
|
49
|
+
cmp x2, #0x30
|
|
50
|
+
b.lo Lrej_uniform_loop48_end
|
|
51
|
+
|
|
52
|
+
Lrej_uniform_loop48:
|
|
53
|
+
cmp x9, x4
|
|
54
|
+
b.hs Lrej_uniform_memory_copy
|
|
55
|
+
sub x2, x2, #0x30
|
|
56
|
+
ld3 { v0.16b, v1.16b, v2.16b }, [x1], #48
|
|
57
|
+
movi v4.16b, #0x80
|
|
58
|
+
bic v2.16b, v2.16b, v4.16b
|
|
59
|
+
zip1 v4.16b, v0.16b, v1.16b
|
|
60
|
+
zip2 v5.16b, v0.16b, v1.16b
|
|
61
|
+
ushll v6.8h, v2.8b, #0x0
|
|
62
|
+
ushll2 v7.8h, v2.16b, #0x0
|
|
63
|
+
zip1 v16.8h, v4.8h, v6.8h
|
|
64
|
+
zip2 v17.8h, v4.8h, v6.8h
|
|
65
|
+
zip1 v18.8h, v5.8h, v7.8h
|
|
66
|
+
zip2 v19.8h, v5.8h, v7.8h
|
|
67
|
+
cmhi v4.4s, v30.4s, v16.4s
|
|
68
|
+
cmhi v5.4s, v30.4s, v17.4s
|
|
69
|
+
cmhi v6.4s, v30.4s, v18.4s
|
|
70
|
+
cmhi v7.4s, v30.4s, v19.4s
|
|
71
|
+
and v4.16b, v4.16b, v31.16b
|
|
72
|
+
and v5.16b, v5.16b, v31.16b
|
|
73
|
+
and v6.16b, v6.16b, v31.16b
|
|
74
|
+
and v7.16b, v7.16b, v31.16b
|
|
75
|
+
uaddlv d20, v4.4s
|
|
76
|
+
uaddlv d21, v5.4s
|
|
77
|
+
uaddlv d22, v6.4s
|
|
78
|
+
uaddlv d23, v7.4s
|
|
79
|
+
fmov x12, d20
|
|
80
|
+
fmov x13, d21
|
|
81
|
+
fmov x14, d22
|
|
82
|
+
fmov x15, d23
|
|
83
|
+
ldr q24, [x3, x12, lsl #4]
|
|
84
|
+
ldr q25, [x3, x13, lsl #4]
|
|
85
|
+
ldr q26, [x3, x14, lsl #4]
|
|
86
|
+
ldr q27, [x3, x15, lsl #4]
|
|
87
|
+
cnt v4.16b, v4.16b
|
|
88
|
+
cnt v5.16b, v5.16b
|
|
89
|
+
cnt v6.16b, v6.16b
|
|
90
|
+
cnt v7.16b, v7.16b
|
|
91
|
+
uaddlv d20, v4.4s
|
|
92
|
+
uaddlv d21, v5.4s
|
|
93
|
+
uaddlv d22, v6.4s
|
|
94
|
+
uaddlv d23, v7.4s
|
|
95
|
+
fmov x12, d20
|
|
96
|
+
fmov x13, d21
|
|
97
|
+
fmov x14, d22
|
|
98
|
+
fmov x15, d23
|
|
99
|
+
tbl v16.16b, { v16.16b }, v24.16b
|
|
100
|
+
tbl v17.16b, { v17.16b }, v25.16b
|
|
101
|
+
tbl v18.16b, { v18.16b }, v26.16b
|
|
102
|
+
tbl v19.16b, { v19.16b }, v27.16b
|
|
103
|
+
st1 { v16.4s }, [x7]
|
|
104
|
+
add x7, x7, x12, lsl #2
|
|
105
|
+
st1 { v17.4s }, [x7]
|
|
106
|
+
add x7, x7, x13, lsl #2
|
|
107
|
+
st1 { v18.4s }, [x7]
|
|
108
|
+
add x7, x7, x14, lsl #2
|
|
109
|
+
st1 { v19.4s }, [x7]
|
|
110
|
+
add x7, x7, x15, lsl #2
|
|
111
|
+
add x12, x12, x13
|
|
112
|
+
add x14, x14, x15
|
|
113
|
+
add x9, x9, x12
|
|
114
|
+
add x9, x9, x14
|
|
115
|
+
cmp x2, #0x30
|
|
116
|
+
b.hs Lrej_uniform_loop48
|
|
117
|
+
|
|
118
|
+
Lrej_uniform_loop48_end:
|
|
119
|
+
cmp x9, x4
|
|
120
|
+
b.hs Lrej_uniform_memory_copy
|
|
121
|
+
cmp x2, #0x18
|
|
122
|
+
b.lo Lrej_uniform_memory_copy
|
|
123
|
+
sub x2, x2, #0x18
|
|
124
|
+
ld3 { v0.8b, v1.8b, v2.8b }, [x1], #24
|
|
125
|
+
movi v4.16b, #0x80
|
|
126
|
+
bic v2.16b, v2.16b, v4.16b
|
|
127
|
+
zip1 v4.16b, v0.16b, v1.16b
|
|
128
|
+
ushll v6.8h, v2.8b, #0x0
|
|
129
|
+
zip1 v16.8h, v4.8h, v6.8h
|
|
130
|
+
zip2 v17.8h, v4.8h, v6.8h
|
|
131
|
+
cmhi v4.4s, v30.4s, v16.4s
|
|
132
|
+
cmhi v5.4s, v30.4s, v17.4s
|
|
133
|
+
and v4.16b, v4.16b, v31.16b
|
|
134
|
+
and v5.16b, v5.16b, v31.16b
|
|
135
|
+
uaddlv d20, v4.4s
|
|
136
|
+
uaddlv d21, v5.4s
|
|
137
|
+
fmov x12, d20
|
|
138
|
+
fmov x13, d21
|
|
139
|
+
ldr q24, [x3, x12, lsl #4]
|
|
140
|
+
ldr q25, [x3, x13, lsl #4]
|
|
141
|
+
cnt v4.16b, v4.16b
|
|
142
|
+
cnt v5.16b, v5.16b
|
|
143
|
+
uaddlv d20, v4.4s
|
|
144
|
+
uaddlv d21, v5.4s
|
|
145
|
+
fmov x12, d20
|
|
146
|
+
fmov x13, d21
|
|
147
|
+
tbl v16.16b, { v16.16b }, v24.16b
|
|
148
|
+
tbl v17.16b, { v17.16b }, v25.16b
|
|
149
|
+
st1 { v16.4s }, [x7]
|
|
150
|
+
add x7, x7, x12, lsl #2
|
|
151
|
+
st1 { v17.4s }, [x7]
|
|
152
|
+
add x7, x7, x13, lsl #2
|
|
153
|
+
add x9, x9, x12
|
|
154
|
+
add x9, x9, x13
|
|
155
|
+
|
|
156
|
+
Lrej_uniform_memory_copy:
|
|
157
|
+
cmp x9, x4
|
|
158
|
+
csel x9, x9, x4, lo
|
|
159
|
+
mov x11, #0x0 // =0
|
|
160
|
+
mov x7, x8
|
|
161
|
+
|
|
162
|
+
Lrej_uniform_final_copy:
|
|
163
|
+
ldr q16, [x7], #0x40
|
|
164
|
+
ldur q17, [x7, #-0x30]
|
|
165
|
+
ldur q18, [x7, #-0x20]
|
|
166
|
+
ldur q19, [x7, #-0x10]
|
|
167
|
+
str q16, [x0], #0x40
|
|
168
|
+
stur q17, [x0, #-0x30]
|
|
169
|
+
stur q18, [x0, #-0x20]
|
|
170
|
+
stur q19, [x0, #-0x10]
|
|
171
|
+
add x11, x11, #0x10
|
|
172
|
+
cmp x11, #0x100
|
|
173
|
+
b.lt Lrej_uniform_final_copy
|
|
174
|
+
mov x0, x9
|
|
175
|
+
b Lrej_uniform_return
|
|
176
|
+
|
|
177
|
+
Lrej_uniform_return:
|
|
178
|
+
add sp, sp, #0x440
|
|
179
|
+
.cfi_adjust_cfa_offset -0x440
|
|
180
|
+
ret
|
|
181
|
+
.cfi_endproc
|
|
182
|
+
|
|
183
|
+
MLD_ASM_FN_SIZE(rej_uniform_aarch64_asm)
|
|
184
|
+
|
|
185
|
+
#endif /* MLD_ARITH_BACKEND_AARCH64 && !MLD_CONFIG_MULTILEVEL_NO_SHARED */
|
|
186
|
+
|
|
187
|
+
#if defined(__ELF__)
|
|
188
|
+
.section .note.GNU-stack,"",%progbits
|
|
189
|
+
#endif
|
data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta2_aarch64_asm.S
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) The mldsa-native project authors
|
|
3
|
+
* Copyright (c) The mlkem-native project authors
|
|
4
|
+
* SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
#include "../../../common.h"
|
|
8
|
+
#if defined(MLD_ARITH_BACKEND_AARCH64) && \
|
|
9
|
+
!defined(MLD_CONFIG_NO_KEYPAIR_API) && \
|
|
10
|
+
!defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \
|
|
11
|
+
(defined(MLD_CONFIG_MULTILEVEL_WITH_SHARED) || MLDSA_ETA == 2)
|
|
12
|
+
|
|
13
|
+
/*
|
|
14
|
+
* WARNING: This file is auto-derived from the mldsa-native source file
|
|
15
|
+
* dev/aarch64_opt/src/rej_uniform_eta2_aarch64_asm.S using scripts/simpasm. Do not modify it directly.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
.text
|
|
19
|
+
.balign 4
|
|
20
|
+
.global MLD_ASM_NAMESPACE(rej_uniform_eta2_aarch64_asm)
|
|
21
|
+
MLD_ASM_FN_SYMBOL(rej_uniform_eta2_aarch64_asm)
|
|
22
|
+
|
|
23
|
+
.cfi_startproc
|
|
24
|
+
sub sp, sp, #0x240
|
|
25
|
+
.cfi_adjust_cfa_offset 0x240
|
|
26
|
+
mov x7, #0x1 // =1
|
|
27
|
+
movk x7, #0x2, lsl #16
|
|
28
|
+
movk x7, #0x4, lsl #32
|
|
29
|
+
movk x7, #0x8, lsl #48
|
|
30
|
+
mov v31.d[0], x7
|
|
31
|
+
mov x7, #0x10 // =16
|
|
32
|
+
movk x7, #0x20, lsl #16
|
|
33
|
+
movk x7, #0x40, lsl #32
|
|
34
|
+
movk x7, #0x80, lsl #48
|
|
35
|
+
mov v31.d[1], x7
|
|
36
|
+
movi v30.8h, #0xf
|
|
37
|
+
mov x8, sp
|
|
38
|
+
mov x7, x8
|
|
39
|
+
mov x11, #0x0 // =0
|
|
40
|
+
eor v16.16b, v16.16b, v16.16b
|
|
41
|
+
|
|
42
|
+
Lrej_uniform_eta2_initial_zero:
|
|
43
|
+
str q16, [x7], #0x40
|
|
44
|
+
stur q16, [x7, #-0x30]
|
|
45
|
+
stur q16, [x7, #-0x20]
|
|
46
|
+
stur q16, [x7, #-0x10]
|
|
47
|
+
add x11, x11, #0x20
|
|
48
|
+
cmp x11, #0x100
|
|
49
|
+
b.lt Lrej_uniform_eta2_initial_zero
|
|
50
|
+
mov x7, x8
|
|
51
|
+
mov x9, #0x0 // =0
|
|
52
|
+
mov x4, #0x100 // =256
|
|
53
|
+
|
|
54
|
+
Lrej_uniform_eta2_loop8:
|
|
55
|
+
cmp x9, x4
|
|
56
|
+
b.hs Lrej_uniform_eta2_memory_copy
|
|
57
|
+
sub x2, x2, #0x8
|
|
58
|
+
ld1 { v0.8b }, [x1], #8
|
|
59
|
+
movi v26.8b, #0xf
|
|
60
|
+
and v27.8b, v0.8b, v26.8b
|
|
61
|
+
ushr v28.8b, v0.8b, #0x4
|
|
62
|
+
zip1 v26.8b, v27.8b, v28.8b
|
|
63
|
+
zip2 v29.8b, v27.8b, v28.8b
|
|
64
|
+
ushll v16.8h, v26.8b, #0x0
|
|
65
|
+
ushll v17.8h, v29.8b, #0x0
|
|
66
|
+
cmhi v4.8h, v30.8h, v16.8h
|
|
67
|
+
cmhi v5.8h, v30.8h, v17.8h
|
|
68
|
+
and v4.16b, v4.16b, v31.16b
|
|
69
|
+
and v5.16b, v5.16b, v31.16b
|
|
70
|
+
uaddlv s20, v4.8h
|
|
71
|
+
uaddlv s21, v5.8h
|
|
72
|
+
fmov w12, s20
|
|
73
|
+
fmov w13, s21
|
|
74
|
+
ldr q24, [x3, x12, lsl #4]
|
|
75
|
+
ldr q25, [x3, x13, lsl #4]
|
|
76
|
+
cnt v4.16b, v4.16b
|
|
77
|
+
cnt v5.16b, v5.16b
|
|
78
|
+
uaddlv s20, v4.8h
|
|
79
|
+
uaddlv s21, v5.8h
|
|
80
|
+
fmov w12, s20
|
|
81
|
+
fmov w13, s21
|
|
82
|
+
tbl v16.16b, { v16.16b }, v24.16b
|
|
83
|
+
tbl v17.16b, { v17.16b }, v25.16b
|
|
84
|
+
st1 { v16.8h }, [x7]
|
|
85
|
+
add x7, x7, x12, lsl #1
|
|
86
|
+
st1 { v17.8h }, [x7]
|
|
87
|
+
add x7, x7, x13, lsl #1
|
|
88
|
+
add x12, x12, x13
|
|
89
|
+
add x9, x9, x12
|
|
90
|
+
cmp x2, #0x8
|
|
91
|
+
b.hs Lrej_uniform_eta2_loop8
|
|
92
|
+
|
|
93
|
+
Lrej_uniform_eta2_memory_copy:
|
|
94
|
+
cmp x9, x4
|
|
95
|
+
csel x9, x9, x4, lo
|
|
96
|
+
mov w7, #0x199a // =6554
|
|
97
|
+
dup v26.8h, w7
|
|
98
|
+
movi v27.8h, #0x5
|
|
99
|
+
movi v7.8h, #0x2
|
|
100
|
+
mov x11, #0x0 // =0
|
|
101
|
+
mov x7, x8
|
|
102
|
+
|
|
103
|
+
Lrej_uniform_eta2_final_copy:
|
|
104
|
+
ldr q16, [x7], #0x20
|
|
105
|
+
ldur q18, [x7, #-0x10]
|
|
106
|
+
sqdmulh v28.8h, v16.8h, v26.8h
|
|
107
|
+
mls v16.8h, v28.8h, v27.8h
|
|
108
|
+
sqdmulh v28.8h, v18.8h, v26.8h
|
|
109
|
+
mls v18.8h, v28.8h, v27.8h
|
|
110
|
+
sub v16.8h, v7.8h, v16.8h
|
|
111
|
+
sub v18.8h, v7.8h, v18.8h
|
|
112
|
+
sshll2 v17.4s, v16.8h, #0x0
|
|
113
|
+
sshll v16.4s, v16.4h, #0x0
|
|
114
|
+
sshll2 v19.4s, v18.8h, #0x0
|
|
115
|
+
sshll v18.4s, v18.4h, #0x0
|
|
116
|
+
str q16, [x0], #0x40
|
|
117
|
+
stur q17, [x0, #-0x30]
|
|
118
|
+
stur q18, [x0, #-0x20]
|
|
119
|
+
stur q19, [x0, #-0x10]
|
|
120
|
+
add x11, x11, #0x10
|
|
121
|
+
cmp x11, #0x100
|
|
122
|
+
b.lt Lrej_uniform_eta2_final_copy
|
|
123
|
+
mov x0, x9
|
|
124
|
+
add sp, sp, #0x240
|
|
125
|
+
.cfi_adjust_cfa_offset -0x240
|
|
126
|
+
ret
|
|
127
|
+
.cfi_endproc
|
|
128
|
+
|
|
129
|
+
MLD_ASM_FN_SIZE(rej_uniform_eta2_aarch64_asm)
|
|
130
|
+
|
|
131
|
+
#endif /* MLD_ARITH_BACKEND_AARCH64 && !MLD_CONFIG_NO_KEYPAIR_API && \
|
|
132
|
+
!MLD_CONFIG_MULTILEVEL_NO_SHARED && \
|
|
133
|
+
(MLD_CONFIG_MULTILEVEL_WITH_SHARED || MLDSA_ETA == 2) */
|
|
134
|
+
|
|
135
|
+
#if defined(__ELF__)
|
|
136
|
+
.section .note.GNU-stack,"",%progbits
|
|
137
|
+
#endif
|
data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta4_aarch64_asm.S
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) The mldsa-native project authors
|
|
3
|
+
* Copyright (c) The mlkem-native project authors
|
|
4
|
+
* SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
#include "../../../common.h"
|
|
8
|
+
#if defined(MLD_ARITH_BACKEND_AARCH64) && \
|
|
9
|
+
!defined(MLD_CONFIG_NO_KEYPAIR_API) && \
|
|
10
|
+
!defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \
|
|
11
|
+
(defined(MLD_CONFIG_MULTILEVEL_WITH_SHARED) || MLDSA_ETA == 4)
|
|
12
|
+
|
|
13
|
+
/*
|
|
14
|
+
* WARNING: This file is auto-derived from the mldsa-native source file
|
|
15
|
+
* dev/aarch64_opt/src/rej_uniform_eta4_aarch64_asm.S using scripts/simpasm. Do not modify it directly.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
.text
|
|
19
|
+
.balign 4
|
|
20
|
+
.global MLD_ASM_NAMESPACE(rej_uniform_eta4_aarch64_asm)
|
|
21
|
+
MLD_ASM_FN_SYMBOL(rej_uniform_eta4_aarch64_asm)
|
|
22
|
+
|
|
23
|
+
.cfi_startproc
|
|
24
|
+
sub sp, sp, #0x240
|
|
25
|
+
.cfi_adjust_cfa_offset 0x240
|
|
26
|
+
mov x7, #0x1 // =1
|
|
27
|
+
movk x7, #0x2, lsl #16
|
|
28
|
+
movk x7, #0x4, lsl #32
|
|
29
|
+
movk x7, #0x8, lsl #48
|
|
30
|
+
mov v31.d[0], x7
|
|
31
|
+
mov x7, #0x10 // =16
|
|
32
|
+
movk x7, #0x20, lsl #16
|
|
33
|
+
movk x7, #0x40, lsl #32
|
|
34
|
+
movk x7, #0x80, lsl #48
|
|
35
|
+
mov v31.d[1], x7
|
|
36
|
+
movi v30.8h, #0x9
|
|
37
|
+
movi v7.8h, #0x4
|
|
38
|
+
mov x8, sp
|
|
39
|
+
mov x7, x8
|
|
40
|
+
mov x11, #0x0 // =0
|
|
41
|
+
eor v16.16b, v16.16b, v16.16b
|
|
42
|
+
|
|
43
|
+
Lrej_uniform_eta4_initial_zero:
|
|
44
|
+
str q16, [x7], #0x40
|
|
45
|
+
stur q16, [x7, #-0x30]
|
|
46
|
+
stur q16, [x7, #-0x20]
|
|
47
|
+
stur q16, [x7, #-0x10]
|
|
48
|
+
add x11, x11, #0x20
|
|
49
|
+
cmp x11, #0x100
|
|
50
|
+
b.lt Lrej_uniform_eta4_initial_zero
|
|
51
|
+
mov x7, x8
|
|
52
|
+
mov x9, #0x0 // =0
|
|
53
|
+
mov x4, #0x100 // =256
|
|
54
|
+
|
|
55
|
+
Lrej_uniform_eta4_loop8:
|
|
56
|
+
cmp x9, x4
|
|
57
|
+
b.hs Lrej_uniform_eta4_memory_copy
|
|
58
|
+
sub x2, x2, #0x8
|
|
59
|
+
ld1 { v0.8b }, [x1], #8
|
|
60
|
+
movi v26.8b, #0xf
|
|
61
|
+
and v27.8b, v0.8b, v26.8b
|
|
62
|
+
ushr v28.8b, v0.8b, #0x4
|
|
63
|
+
zip1 v26.8b, v27.8b, v28.8b
|
|
64
|
+
zip2 v29.8b, v27.8b, v28.8b
|
|
65
|
+
ushll v16.8h, v26.8b, #0x0
|
|
66
|
+
ushll v17.8h, v29.8b, #0x0
|
|
67
|
+
cmhi v4.8h, v30.8h, v16.8h
|
|
68
|
+
cmhi v5.8h, v30.8h, v17.8h
|
|
69
|
+
and v4.16b, v4.16b, v31.16b
|
|
70
|
+
and v5.16b, v5.16b, v31.16b
|
|
71
|
+
uaddlv s20, v4.8h
|
|
72
|
+
uaddlv s21, v5.8h
|
|
73
|
+
fmov w12, s20
|
|
74
|
+
fmov w13, s21
|
|
75
|
+
ldr q24, [x3, x12, lsl #4]
|
|
76
|
+
ldr q25, [x3, x13, lsl #4]
|
|
77
|
+
cnt v4.16b, v4.16b
|
|
78
|
+
cnt v5.16b, v5.16b
|
|
79
|
+
uaddlv s20, v4.8h
|
|
80
|
+
uaddlv s21, v5.8h
|
|
81
|
+
fmov w12, s20
|
|
82
|
+
fmov w13, s21
|
|
83
|
+
tbl v16.16b, { v16.16b }, v24.16b
|
|
84
|
+
tbl v17.16b, { v17.16b }, v25.16b
|
|
85
|
+
st1 { v16.8h }, [x7]
|
|
86
|
+
add x7, x7, x12, lsl #1
|
|
87
|
+
st1 { v17.8h }, [x7]
|
|
88
|
+
add x7, x7, x13, lsl #1
|
|
89
|
+
add x12, x12, x13
|
|
90
|
+
add x9, x9, x12
|
|
91
|
+
cmp x2, #0x8
|
|
92
|
+
b.hs Lrej_uniform_eta4_loop8
|
|
93
|
+
|
|
94
|
+
Lrej_uniform_eta4_memory_copy:
|
|
95
|
+
cmp x9, x4
|
|
96
|
+
csel x9, x9, x4, lo
|
|
97
|
+
mov x11, #0x0 // =0
|
|
98
|
+
mov x7, x8
|
|
99
|
+
|
|
100
|
+
Lrej_uniform_eta4_final_copy:
|
|
101
|
+
ldr q16, [x7], #0x20
|
|
102
|
+
ldur q18, [x7, #-0x10]
|
|
103
|
+
sub v16.8h, v7.8h, v16.8h
|
|
104
|
+
sub v18.8h, v7.8h, v18.8h
|
|
105
|
+
sshll2 v17.4s, v16.8h, #0x0
|
|
106
|
+
sshll v16.4s, v16.4h, #0x0
|
|
107
|
+
sshll2 v19.4s, v18.8h, #0x0
|
|
108
|
+
sshll v18.4s, v18.4h, #0x0
|
|
109
|
+
str q16, [x0], #0x40
|
|
110
|
+
stur q17, [x0, #-0x30]
|
|
111
|
+
stur q18, [x0, #-0x20]
|
|
112
|
+
stur q19, [x0, #-0x10]
|
|
113
|
+
add x11, x11, #0x10
|
|
114
|
+
cmp x11, #0x100
|
|
115
|
+
b.lt Lrej_uniform_eta4_final_copy
|
|
116
|
+
mov x0, x9
|
|
117
|
+
add sp, sp, #0x240
|
|
118
|
+
.cfi_adjust_cfa_offset -0x240
|
|
119
|
+
ret
|
|
120
|
+
.cfi_endproc
|
|
121
|
+
|
|
122
|
+
MLD_ASM_FN_SIZE(rej_uniform_eta4_aarch64_asm)
|
|
123
|
+
|
|
124
|
+
#endif /* MLD_ARITH_BACKEND_AARCH64 && !MLD_CONFIG_NO_KEYPAIR_API && \
|
|
125
|
+
!MLD_CONFIG_MULTILEVEL_NO_SHARED && \
|
|
126
|
+
(MLD_CONFIG_MULTILEVEL_WITH_SHARED || MLDSA_ETA == 4) */
|
|
127
|
+
|
|
128
|
+
#if defined(__ELF__)
|
|
129
|
+
.section .note.GNU-stack,"",%progbits
|
|
130
|
+
#endif
|