pq_crypto 0.6.1 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/SECURITY.md +7 -0
- data/ext/pqcrypto/pqcrypto_version.h +1 -1
- data/ext/pqcrypto/vendor/.vendored +4 -4
- data/ext/pqcrypto/vendor/mldsa-native/README.md +23 -10
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/README.md +23 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.c +114 -58
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.h +498 -461
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_asm.S +145 -85
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_config.h +456 -422
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/cbmc.h +47 -25
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/common.h +26 -14
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/ct.h +56 -81
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/debug.h +17 -24
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.c +33 -40
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.h +67 -87
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.c +19 -14
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.h +13 -5
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.c +84 -10
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.h +10 -5
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/auto.h +6 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/fips202_native_aarch64.h +22 -15
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_aarch64_asm.S +376 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_aarch64_asm.S +204 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_aarch64_asm.S +259 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_aarch64_asm.S +1077 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_aarch64_asm.S +987 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccakf1600_round_constants.c +16 -10
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_scalar.h +2 -1
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_v84a.h +1 -1
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x2_v84a.h +4 -2
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_scalar.h +2 -2
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_v84a_scalar.h +1 -1
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/api.h +60 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/mve.h +48 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/fips202_native_armv81m.h +18 -1
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S +658 -582
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.c +5 -100
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccakf1600_round_constants.c +26 -25
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/state_extract_bytes_x4_mve.S +334 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/state_xor_bytes_x4_mve.S +355 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/auto.h +8 -3
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/{xkcp.h → keccak_f1600_x4_avx2.h} +11 -8
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/fips202_native_x86_64.h +44 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/keccak_f1600_x4_avx2_asm.S +454 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/keccakf1600_constants.c +52 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/meta.h +37 -28
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/aarch64_zetas.c +213 -196
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/arith_native_aarch64.h +248 -64
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/intt_aarch64_asm.S +753 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l4_aarch64_asm.S +129 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l5_aarch64_asm.S +145 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l7_aarch64_asm.S +177 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/ntt_aarch64_asm.S +653 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/pointwise_montgomery_aarch64_asm.S +84 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_caddq_aarch64_asm.S +53 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_chknorm_aarch64_asm.S +55 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_32_aarch64_asm.S +86 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_88_aarch64_asm.S +86 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_32_aarch64_asm.S +103 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_88_aarch64_asm.S +111 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_17_aarch64_asm.S +75 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_19_aarch64_asm.S +72 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_table.c +23 -11
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_aarch64_asm.S +189 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta2_aarch64_asm.S +137 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta4_aarch64_asm.S +130 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta_table.c +520 -516
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_table.c +34 -33
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/api.h +202 -242
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/meta.h +25 -17
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/arith_native_x86_64.h +112 -28
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.c +1 -1
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.h +1 -1
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/intt_avx2_asm.S +2311 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/ntt_avx2_asm.S +2383 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/nttunpack_avx2_asm.S +238 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l4_avx2_asm.S +139 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l5_avx2_asm.S +155 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l7_avx2_asm.S +187 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_avx2_asm.S +130 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_caddq_avx2_asm.S +190 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_32_avx2.c +6 -4
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_88_avx2.c +6 -4
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_32_avx2.c +9 -8
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_88_avx2.c +10 -9
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_17_avx2.c +8 -5
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_19_avx2.c +8 -5
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta2_avx2.c +6 -4
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta4_avx2.c +6 -4
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_table.c +130 -129
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.c +109 -180
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.h +169 -150
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.c +56 -40
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.h +149 -164
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.c +52 -57
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.h +132 -167
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.c +57 -424
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.h +167 -474
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec_lazy.c +308 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec_lazy.h +653 -0
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/reduce.h +22 -29
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/rounding.h +37 -43
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.c +511 -367
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.h +456 -417
- data/lib/pq_crypto/version.rb +1 -1
- data/script/vendor_libs.rb +3 -3
- metadata +41 -35
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_asm.S +0 -376
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_asm.S +0 -204
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_asm.S +0 -259
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_asm.S +0 -1077
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S +0 -987
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.c +0 -488
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.h +0 -16
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/intt.S +0 -753
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l4.S +0 -129
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l5.S +0 -145
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l7.S +0 -177
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/ntt.S +0 -653
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/pointwise_montgomery.S +0 -79
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_caddq_asm.S +0 -53
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_chknorm_asm.S +0 -55
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_32_asm.S +0 -85
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_88_asm.S +0 -85
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_32_asm.S +0 -102
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_88_asm.S +0 -110
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_17_asm.S +0 -72
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_19_asm.S +0 -69
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_asm.S +0 -189
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta2_asm.S +0 -135
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta4_asm.S +0 -128
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/intt.S +0 -2311
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/ntt.S +0 -2383
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/nttunpack.S +0 -239
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise.S +0 -131
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l4.S +0 -139
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l5.S +0 -155
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l7.S +0 -187
- data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_caddq_avx2.c +0 -61
data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.c
CHANGED
|
@@ -12,114 +12,19 @@
|
|
|
12
12
|
|
|
13
13
|
#include "fips202_native_armv81m.h"
|
|
14
14
|
|
|
15
|
-
/*
|
|
16
|
-
* TEMPORARY: Bit-interleaving using efficient shift-and-mask operations.
|
|
17
|
-
* TODO: Replace with optimized MVE assembly implementations
|
|
18
|
-
* (as a part of XORBytes and ExtractBytes)
|
|
19
|
-
*/
|
|
20
|
-
|
|
21
|
-
/* Extract even-indexed bits from 64-bit value into lower 32 bits */
|
|
22
|
-
static uint32_t bitinterleave_even(uint64_t x)
|
|
23
|
-
{
|
|
24
|
-
uint64_t t;
|
|
25
|
-
t = x & 0x5555555555555555ULL;
|
|
26
|
-
t = (t | (t >> 1)) & 0x3333333333333333ULL;
|
|
27
|
-
t = (t | (t >> 2)) & 0x0f0f0f0f0f0f0f0fULL;
|
|
28
|
-
t = (t | (t >> 4)) & 0x00ff00ff00ff00ffULL;
|
|
29
|
-
t = (t | (t >> 8)) & 0x0000ffff0000ffffULL;
|
|
30
|
-
t = (t | (t >> 16)) & 0x00000000ffffffffULL;
|
|
31
|
-
return (uint32_t)t;
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
/* Extract odd-indexed bits from 64-bit value into lower 32 bits */
|
|
35
|
-
static uint32_t bitinterleave_odd(uint64_t x)
|
|
36
|
-
{
|
|
37
|
-
return bitinterleave_even(x >> 1);
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
/* Spread 32-bit value across even bit positions of 64-bit result */
|
|
41
|
-
static uint64_t spread_even(uint32_t x)
|
|
42
|
-
{
|
|
43
|
-
uint64_t t = x;
|
|
44
|
-
t = (t | (t << 16)) & 0x0000ffff0000ffffULL;
|
|
45
|
-
t = (t | (t << 8)) & 0x00ff00ff00ff00ffULL;
|
|
46
|
-
t = (t | (t << 4)) & 0x0f0f0f0f0f0f0f0fULL;
|
|
47
|
-
t = (t | (t << 2)) & 0x3333333333333333ULL;
|
|
48
|
-
t = (t | (t << 1)) & 0x5555555555555555ULL;
|
|
49
|
-
return t;
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
/* Combine even and odd 32-bit halves into interleaved 64-bit value */
|
|
53
|
-
static uint64_t bitdeinterleave(uint32_t even, uint32_t odd)
|
|
54
|
-
{
|
|
55
|
-
return spread_even(even) | (spread_even(odd) << 1);
|
|
56
|
-
}
|
|
57
15
|
|
|
58
16
|
/*
|
|
59
|
-
*
|
|
60
|
-
*
|
|
17
|
+
* Keccak-f1600 x4 permutation (on bit-interleaved state)
|
|
18
|
+
* State is expected to already be in bit-interleaved format.
|
|
61
19
|
*/
|
|
62
|
-
static void interleave_4fold(uint64_t *state_4x, const uint64_t *state0,
|
|
63
|
-
const uint64_t *state1, const uint64_t *state2,
|
|
64
|
-
const uint64_t *state3)
|
|
65
|
-
{
|
|
66
|
-
uint32_t *state_4xl = (uint32_t *)state_4x;
|
|
67
|
-
uint32_t *state_4xh = (uint32_t *)state_4x + 100;
|
|
68
|
-
|
|
69
|
-
for (size_t i = 0; i < 25; i++)
|
|
70
|
-
{
|
|
71
|
-
state_4xl[i * 4 + 0] = bitinterleave_even(state0[i]);
|
|
72
|
-
state_4xl[i * 4 + 1] = bitinterleave_even(state1[i]);
|
|
73
|
-
state_4xl[i * 4 + 2] = bitinterleave_even(state2[i]);
|
|
74
|
-
state_4xl[i * 4 + 3] = bitinterleave_even(state3[i]);
|
|
75
|
-
|
|
76
|
-
state_4xh[i * 4 + 0] = bitinterleave_odd(state0[i]);
|
|
77
|
-
state_4xh[i * 4 + 1] = bitinterleave_odd(state1[i]);
|
|
78
|
-
state_4xh[i * 4 + 2] = bitinterleave_odd(state2[i]);
|
|
79
|
-
state_4xh[i * 4 + 3] = bitinterleave_odd(state3[i]);
|
|
80
|
-
}
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
static void deinterleave_4fold(uint64_t *state_4x, uint64_t *state0,
|
|
84
|
-
uint64_t *state1, uint64_t *state2,
|
|
85
|
-
uint64_t *state3)
|
|
86
|
-
{
|
|
87
|
-
uint32_t *state_4xl = (uint32_t *)state_4x;
|
|
88
|
-
uint32_t *state_4xh = (uint32_t *)state_4x + 100;
|
|
89
|
-
|
|
90
|
-
for (size_t i = 0; i < 25; i++)
|
|
91
|
-
{
|
|
92
|
-
state0[i] = bitdeinterleave(state_4xl[i * 4 + 0], state_4xh[i * 4 + 0]);
|
|
93
|
-
state1[i] = bitdeinterleave(state_4xl[i * 4 + 1], state_4xh[i * 4 + 1]);
|
|
94
|
-
state2[i] = bitdeinterleave(state_4xl[i * 4 + 2], state_4xh[i * 4 + 2]);
|
|
95
|
-
state3[i] = bitdeinterleave(state_4xl[i * 4 + 3], state_4xh[i * 4 + 3]);
|
|
96
|
-
}
|
|
97
|
-
}
|
|
98
|
-
|
|
99
20
|
#define mld_keccak_f1600_x4_native_impl \
|
|
100
21
|
MLD_NAMESPACE(keccak_f1600_x4_native_impl)
|
|
101
22
|
int mld_keccak_f1600_x4_native_impl(uint64_t *state)
|
|
102
23
|
{
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
* TODO: Replace with optimized MVE assembly implementations
|
|
106
|
-
* (as a part of XORBytes and ExtractBytes)
|
|
107
|
-
*/
|
|
108
|
-
MLD_ALIGN uint64_t state_4x[100];
|
|
109
|
-
MLD_ALIGN uint64_t state_4x_tmp[100];
|
|
110
|
-
|
|
111
|
-
/* Interleave the 4 states into bit-interleaved format */
|
|
112
|
-
interleave_4fold(state_4x, &state[0], &state[25], &state[50], &state[75]);
|
|
113
|
-
|
|
114
|
-
/* Run the permutation */
|
|
115
|
-
mld_keccak_f1600_x4_mve_asm(state_4x, state_4x_tmp,
|
|
24
|
+
MLD_ALIGN uint64_t state_tmp[100];
|
|
25
|
+
mld_keccak_f1600_x4_mve_asm(state, state_tmp,
|
|
116
26
|
mld_keccakf1600_round_constants);
|
|
117
|
-
|
|
118
|
-
/* Deinterleave back to 4 separate states */
|
|
119
|
-
deinterleave_4fold(state_4x, &state[0], &state[25], &state[50], &state[75]);
|
|
120
|
-
|
|
121
|
-
mld_zeroize(state_4x, sizeof(state_4x));
|
|
122
|
-
mld_zeroize(state_4x_tmp, sizeof(state_4x_tmp));
|
|
27
|
+
mld_zeroize(state_tmp, sizeof(state_tmp));
|
|
123
28
|
return MLD_NATIVE_FUNC_SUCCESS;
|
|
124
29
|
}
|
|
125
30
|
|
|
@@ -17,31 +17,32 @@
|
|
|
17
17
|
* - low word contains even-indexed bits
|
|
18
18
|
* - high word contains odd-indexed bits
|
|
19
19
|
*/
|
|
20
|
-
MLD_ALIGN const uint32_t
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
20
|
+
MLD_ALIGN MLD_INTERNAL_DATA_DEFINITION const uint32_t
|
|
21
|
+
mld_keccakf1600_round_constants[48] = {
|
|
22
|
+
0x00000001, 0x00000000, /* RC0 */
|
|
23
|
+
0x00000000, 0x00000089, /* RC1 */
|
|
24
|
+
0x00000000, 0x8000008b, /* RC2 */
|
|
25
|
+
0x00000000, 0x80008080, /* RC3 */
|
|
26
|
+
0x00000001, 0x0000008b, /* RC4 */
|
|
27
|
+
0x00000001, 0x00008000, /* RC5 */
|
|
28
|
+
0x00000001, 0x80008088, /* RC6 */
|
|
29
|
+
0x00000001, 0x80000082, /* RC7 */
|
|
30
|
+
0x00000000, 0x0000000b, /* RC8 */
|
|
31
|
+
0x00000000, 0x0000000a, /* RC9 */
|
|
32
|
+
0x00000001, 0x00008082, /* RC10 */
|
|
33
|
+
0x00000000, 0x00008003, /* RC11 */
|
|
34
|
+
0x00000001, 0x0000808b, /* RC12 */
|
|
35
|
+
0x00000001, 0x8000000b, /* RC13 */
|
|
36
|
+
0x00000001, 0x8000008a, /* RC14 */
|
|
37
|
+
0x00000001, 0x80000081, /* RC15 */
|
|
38
|
+
0x00000000, 0x80000081, /* RC16 */
|
|
39
|
+
0x00000000, 0x80000008, /* RC17 */
|
|
40
|
+
0x00000000, 0x00000083, /* RC18 */
|
|
41
|
+
0x00000000, 0x80008003, /* RC19 */
|
|
42
|
+
0x00000001, 0x80008088, /* RC20 */
|
|
43
|
+
0x00000000, 0x80000088, /* RC21 */
|
|
44
|
+
0x00000001, 0x00008000, /* RC22 */
|
|
45
|
+
0x00000000, 0x80008082, /* RC23 */
|
|
45
46
|
};
|
|
46
47
|
|
|
47
48
|
#else /* MLD_FIPS202_ARMV81M_NEED_X4 && !MLD_CONFIG_MULTILEVEL_NO_SHARED */
|
|
@@ -0,0 +1,334 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) The mlkem-native project authors
|
|
3
|
+
* Copyright (c) The mldsa-native project authors
|
|
4
|
+
* Copyright (c) 2026 Arm Limited
|
|
5
|
+
* SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// ---------------------------------------------------------------------------
|
|
9
|
+
// Overview
|
|
10
|
+
// ---------------------------------------------------------------------------
|
|
11
|
+
// MVE/Helium implementation of KeccakF1600x4_StateExtractBytes
|
|
12
|
+
// (inverse of state_xor_bytes_x4_mve.S).
|
|
13
|
+
//
|
|
14
|
+
// void KeccakF1600x4_StateExtractBytes(state, d0, d1, d2, d3, offset, length)
|
|
15
|
+
//
|
|
16
|
+
// Reads 'length' bytes from the bit-interleaved Keccak state starting at
|
|
17
|
+
// byte 'offset', recombines the even and odd halves of each lane back
|
|
18
|
+
// into plain bytes, and writes them to four output buffers (d0..d3).
|
|
19
|
+
//
|
|
20
|
+
// ---------------------------------------------------------------------------
|
|
21
|
+
// Bit-interleaving background
|
|
22
|
+
// ---------------------------------------------------------------------------
|
|
23
|
+
// Each 64-bit Keccak lane is stored as two 32-bit words:
|
|
24
|
+
// even half -- bits 0, 2, 4, ..., 62 of the lane
|
|
25
|
+
// odd half -- bits 1, 3, 5, ..., 63 of the lane
|
|
26
|
+
// This representation allows 64-bit lane rotations (used in the Keccak
|
|
27
|
+
// round function) to be implemented as pairs of 32-bit rotations.
|
|
28
|
+
//
|
|
29
|
+
// Batched (x4) processing:
|
|
30
|
+
// Four Keccak instances are processed as a batch. Their states are
|
|
31
|
+
// stored interleaved in a single 800-byte buffer: first the even
|
|
32
|
+
// halves of all 25 lanes (400 bytes), then the odd halves (400 bytes).
|
|
33
|
+
// Within each 16-byte row, the four u32 words correspond to
|
|
34
|
+
// instances 0..3 of the same lane, enabling SIMD-parallel operations
|
|
35
|
+
// across all four instances.
|
|
36
|
+
//
|
|
37
|
+
// State memory layout (25 lanes x 4 instances x 2 halves):
|
|
38
|
+
// S[i][l]_even/odd = even/odd half of lane l, instance i (u32)
|
|
39
|
+
// Each row is 16 bytes (one Q-register).
|
|
40
|
+
// Offset Contents
|
|
41
|
+
// 0 S[0][ 0]_even, S[1][ 0]_even, S[2][ 0]_even, S[3][ 0]_even
|
|
42
|
+
// 16 S[0][ 1]_even, S[1][ 1]_even, S[2][ 1]_even, S[3][ 1]_even
|
|
43
|
+
// ...
|
|
44
|
+
// 384 S[0][24]_even, S[1][24]_even, S[2][24]_even, S[3][24]_even
|
|
45
|
+
// 400 S[0][ 0]_odd, S[1][ 0]_odd, S[2][ 0]_odd, S[3][ 0]_odd
|
|
46
|
+
// 416 S[0][ 1]_odd, S[1][ 1]_odd, S[2][ 1]_odd, S[3][ 1]_odd
|
|
47
|
+
// ...
|
|
48
|
+
// 784 S[0][24]_odd, S[1][24]_odd, S[2][24]_odd, S[3][24]_odd
|
|
49
|
+
//
|
|
50
|
+
// ---------------------------------------------------------------------------
|
|
51
|
+
// Three-phase structure
|
|
52
|
+
// ---------------------------------------------------------------------------
|
|
53
|
+
// Prologue -- if offset is not 8-byte aligned, extract
|
|
54
|
+
// min(length, 8-(offset%8)) bytes via predicated byte stores.
|
|
55
|
+
// Main -- process full 8-byte groups: load even/odd lane pair,
|
|
56
|
+
// de-interleave, scatter-store to output buffers.
|
|
57
|
+
// Tail -- extract remaining <8 bytes via predicated byte stores.
|
|
58
|
+
|
|
59
|
+
#include "../../../../common.h"
|
|
60
|
+
#if defined(MLD_FIPS202_ARMV81M_NEED_X4) && \
|
|
61
|
+
!defined(MLD_CONFIG_MULTILEVEL_NO_SHARED)
|
|
62
|
+
|
|
63
|
+
/*
|
|
64
|
+
* WARNING: This file is auto-derived from the mldsa-native source file
|
|
65
|
+
* dev/fips202/armv81m/src/state_extract_bytes_x4_mve.S using scripts/simpasm. Do not modify it directly.
|
|
66
|
+
*/
|
|
67
|
+
|
|
68
|
+
.thumb
|
|
69
|
+
.syntax unified
|
|
70
|
+
|
|
71
|
+
.text
|
|
72
|
+
.balign 4
|
|
73
|
+
.global MLD_ASM_NAMESPACE(keccak_f1600_x4_state_extract_bytes_asm)
|
|
74
|
+
MLD_ASM_FN_SYMBOL(keccak_f1600_x4_state_extract_bytes_asm)
|
|
75
|
+
|
|
76
|
+
.cfi_startproc
|
|
77
|
+
push.w {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr}
|
|
78
|
+
.cfi_adjust_cfa_offset 0x28
|
|
79
|
+
.cfi_rel_offset r4, 0x0
|
|
80
|
+
.cfi_rel_offset r5, 0x4
|
|
81
|
+
.cfi_rel_offset r6, 0x8
|
|
82
|
+
.cfi_rel_offset r7, 0xc
|
|
83
|
+
.cfi_rel_offset r8, 0x10
|
|
84
|
+
.cfi_rel_offset r9, 0x14
|
|
85
|
+
.cfi_rel_offset r10, 0x18
|
|
86
|
+
.cfi_rel_offset r11, 0x1c
|
|
87
|
+
.cfi_rel_offset lr, 0x24
|
|
88
|
+
vpush {d8, d9, d10, d11, d12, d13, d14, d15}
|
|
89
|
+
.cfi_adjust_cfa_offset 0x40
|
|
90
|
+
.cfi_rel_offset d8, 0x0
|
|
91
|
+
.cfi_rel_offset d9, 0x8
|
|
92
|
+
.cfi_rel_offset d10, 0x10
|
|
93
|
+
.cfi_rel_offset d11, 0x18
|
|
94
|
+
.cfi_rel_offset d12, 0x20
|
|
95
|
+
.cfi_rel_offset d13, 0x28
|
|
96
|
+
.cfi_rel_offset d14, 0x30
|
|
97
|
+
.cfi_rel_offset d15, 0x38
|
|
98
|
+
ldr r4, [sp, #0x68]
|
|
99
|
+
ldr.w r10, [sp, #0x6c]
|
|
100
|
+
ldr r6, [sp, #0x70]
|
|
101
|
+
cmp r6, #0x0
|
|
102
|
+
beq.w Lkeccak_f1600_x4_state_extract_bytes_asm_exit @ imm = #0x2ea
|
|
103
|
+
and r5, r10, #0x7
|
|
104
|
+
bic r9, r10, #0x7
|
|
105
|
+
add.w r8, r0, r9, lsl #1
|
|
106
|
+
add.w r7, r8, #0x190
|
|
107
|
+
cmp r5, #0x0
|
|
108
|
+
beq.w Lkeccak_f1600_x4_state_extract_bytes_asm_pre_main @ imm = #0x112
|
|
109
|
+
vldrw.u32 q0, [r8], #16
|
|
110
|
+
vldrw.u32 q1, [r7], #16
|
|
111
|
+
vrev32.16 q2, q0
|
|
112
|
+
vrev32.16 q3, q1
|
|
113
|
+
vsli.32 q0, q0, #0x8
|
|
114
|
+
vsli.16 q0, q0, #0x4
|
|
115
|
+
vsli.8 q0, q0, #0x1
|
|
116
|
+
vshr.u8 q4, q0, #0x3
|
|
117
|
+
vsli.8 q0, q4, #0x4
|
|
118
|
+
vshr.u8 q4, q0, #0x5
|
|
119
|
+
vsli.8 q0, q4, #0x6
|
|
120
|
+
vsli.32 q1, q1, #0x8
|
|
121
|
+
vsli.16 q1, q1, #0x4
|
|
122
|
+
vsli.8 q1, q1, #0x1
|
|
123
|
+
vshr.u8 q4, q1, #0x3
|
|
124
|
+
vsli.8 q1, q4, #0x4
|
|
125
|
+
vshr.u8 q4, q1, #0x5
|
|
126
|
+
vsli.8 q1, q4, #0x6
|
|
127
|
+
mov.w r0, #0x55
|
|
128
|
+
vdup.8 q4, r0
|
|
129
|
+
vand q0, q0, q4
|
|
130
|
+
vand q1, q1, q4
|
|
131
|
+
vshl.i32 q1, q1, #0x1
|
|
132
|
+
vorr q0, q0, q1
|
|
133
|
+
vsli.32 q2, q2, #0x8
|
|
134
|
+
vsli.16 q2, q2, #0x4
|
|
135
|
+
vsli.8 q2, q2, #0x1
|
|
136
|
+
vshr.u8 q1, q2, #0x3
|
|
137
|
+
vsli.8 q2, q1, #0x4
|
|
138
|
+
vshr.u8 q1, q2, #0x5
|
|
139
|
+
vsli.8 q2, q1, #0x6
|
|
140
|
+
vsli.32 q3, q3, #0x8
|
|
141
|
+
vsli.16 q3, q3, #0x4
|
|
142
|
+
vsli.8 q3, q3, #0x1
|
|
143
|
+
vshr.u8 q1, q3, #0x3
|
|
144
|
+
vsli.8 q3, q1, #0x4
|
|
145
|
+
vshr.u8 q1, q3, #0x5
|
|
146
|
+
vsli.8 q3, q1, #0x6
|
|
147
|
+
vand q1, q2, q4
|
|
148
|
+
vand q3, q3, q4
|
|
149
|
+
vshl.i32 q3, q3, #0x1
|
|
150
|
+
vorr q1, q1, q3
|
|
151
|
+
vrev64.32 q2, q0
|
|
152
|
+
vrev64.32 q3, q1
|
|
153
|
+
movw r0, #0xf0f
|
|
154
|
+
vmsr p0, r0
|
|
155
|
+
vpsel q0, q0, q3
|
|
156
|
+
vpsel q1, q2, q1
|
|
157
|
+
vmov.f64 d4, d1
|
|
158
|
+
vmov.f64 d6, d3
|
|
159
|
+
rsb.w lr, r5, #0x8
|
|
160
|
+
cmp r6, lr
|
|
161
|
+
it ls
|
|
162
|
+
movls lr, r6
|
|
163
|
+
vctp.8 lr
|
|
164
|
+
vmrs r11, p0
|
|
165
|
+
lsl.w r11, r11, r5
|
|
166
|
+
vmsr p0, r11
|
|
167
|
+
subs r1, r1, r5
|
|
168
|
+
subs r2, r2, r5
|
|
169
|
+
subs r3, r3, r5
|
|
170
|
+
subs r4, r4, r5
|
|
171
|
+
vpstttt
|
|
172
|
+
vstrbt.8 q0, [r1], #4
|
|
173
|
+
vstrbt.8 q1, [r2], #4
|
|
174
|
+
vstrbt.8 q2, [r3], #4
|
|
175
|
+
vstrbt.8 q3, [r4], #4
|
|
176
|
+
subs.w r6, r6, lr
|
|
177
|
+
cmp r6, #0x0
|
|
178
|
+
beq.w Lkeccak_f1600_x4_state_extract_bytes_asm_exit @ imm = #0x1cc
|
|
179
|
+
vmov q7[2], q7[0], r1, r3
|
|
180
|
+
vmov q7[3], q7[1], r2, r4
|
|
181
|
+
b Lkeccak_f1600_x4_state_extract_bytes_asm_main_body @ imm = #0xe
|
|
182
|
+
|
|
183
|
+
Lkeccak_f1600_x4_state_extract_bytes_asm_pre_main:
|
|
184
|
+
vmov q7[2], q7[0], r1, r3
|
|
185
|
+
vmov q7[3], q7[1], r2, r4
|
|
186
|
+
mov.w r12, #0x4
|
|
187
|
+
vsub.i32 q7, q7, r12
|
|
188
|
+
|
|
189
|
+
Lkeccak_f1600_x4_state_extract_bytes_asm_main_body:
|
|
190
|
+
lsr.w lr, r6, #0x3
|
|
191
|
+
wls lr, lr, Lkeccak_f1600_x4_state_extract_bytes_asm_main_loop_end @ imm = #0xb4
|
|
192
|
+
|
|
193
|
+
Lkeccak_f1600_x4_state_extract_bytes_asm_main_loop_start:
|
|
194
|
+
vldrw.u32 q0, [r8], #16
|
|
195
|
+
vldrw.u32 q1, [r7], #16
|
|
196
|
+
vrev32.16 q2, q0
|
|
197
|
+
vrev32.16 q3, q1
|
|
198
|
+
vsli.32 q0, q0, #0x8
|
|
199
|
+
vsli.16 q0, q0, #0x4
|
|
200
|
+
vsli.8 q0, q0, #0x1
|
|
201
|
+
vshr.u8 q4, q0, #0x3
|
|
202
|
+
vsli.8 q0, q4, #0x4
|
|
203
|
+
vshr.u8 q4, q0, #0x5
|
|
204
|
+
vsli.8 q0, q4, #0x6
|
|
205
|
+
vsli.32 q1, q1, #0x8
|
|
206
|
+
vsli.16 q1, q1, #0x4
|
|
207
|
+
vsli.8 q1, q1, #0x1
|
|
208
|
+
vshr.u8 q4, q1, #0x3
|
|
209
|
+
vsli.8 q1, q4, #0x4
|
|
210
|
+
vshr.u8 q4, q1, #0x5
|
|
211
|
+
vsli.8 q1, q4, #0x6
|
|
212
|
+
mov.w r0, #0x55
|
|
213
|
+
vdup.8 q4, r0
|
|
214
|
+
vand q0, q0, q4
|
|
215
|
+
vand q1, q1, q4
|
|
216
|
+
vshl.i32 q1, q1, #0x1
|
|
217
|
+
vorr q0, q0, q1
|
|
218
|
+
vsli.32 q2, q2, #0x8
|
|
219
|
+
vsli.16 q2, q2, #0x4
|
|
220
|
+
vsli.8 q2, q2, #0x1
|
|
221
|
+
vshr.u8 q1, q2, #0x3
|
|
222
|
+
vsli.8 q2, q1, #0x4
|
|
223
|
+
vshr.u8 q1, q2, #0x5
|
|
224
|
+
vsli.8 q2, q1, #0x6
|
|
225
|
+
vsli.32 q3, q3, #0x8
|
|
226
|
+
vsli.16 q3, q3, #0x4
|
|
227
|
+
vsli.8 q3, q3, #0x1
|
|
228
|
+
vshr.u8 q1, q3, #0x3
|
|
229
|
+
vsli.8 q3, q1, #0x4
|
|
230
|
+
vshr.u8 q1, q3, #0x5
|
|
231
|
+
vsli.8 q3, q1, #0x6
|
|
232
|
+
vand q1, q2, q4
|
|
233
|
+
vand q3, q3, q4
|
|
234
|
+
vshl.i32 q3, q3, #0x1
|
|
235
|
+
vorr q1, q1, q3
|
|
236
|
+
vstrw.32 q0, [q7, #4]!
|
|
237
|
+
vstrw.32 q1, [q7, #4]!
|
|
238
|
+
le lr, Lkeccak_f1600_x4_state_extract_bytes_asm_main_loop_start @ imm = #-0xb4
|
|
239
|
+
|
|
240
|
+
Lkeccak_f1600_x4_state_extract_bytes_asm_main_loop_end:
|
|
241
|
+
ands r6, r6, #0x7
|
|
242
|
+
beq Lkeccak_f1600_x4_state_extract_bytes_asm_exit @ imm = #0xee
|
|
243
|
+
mov.w r12, #0x4
|
|
244
|
+
vadd.i32 q7, q7, r12
|
|
245
|
+
vmov r1, r3, q7[2], q7[0]
|
|
246
|
+
vmov r2, r4, q7[3], q7[1]
|
|
247
|
+
vldrw.u32 q0, [r8], #16
|
|
248
|
+
vldrw.u32 q1, [r7], #16
|
|
249
|
+
vrev32.16 q2, q0
|
|
250
|
+
vrev32.16 q3, q1
|
|
251
|
+
vsli.32 q0, q0, #0x8
|
|
252
|
+
vsli.16 q0, q0, #0x4
|
|
253
|
+
vsli.8 q0, q0, #0x1
|
|
254
|
+
vshr.u8 q4, q0, #0x3
|
|
255
|
+
vsli.8 q0, q4, #0x4
|
|
256
|
+
vshr.u8 q4, q0, #0x5
|
|
257
|
+
vsli.8 q0, q4, #0x6
|
|
258
|
+
vsli.32 q1, q1, #0x8
|
|
259
|
+
vsli.16 q1, q1, #0x4
|
|
260
|
+
vsli.8 q1, q1, #0x1
|
|
261
|
+
vshr.u8 q4, q1, #0x3
|
|
262
|
+
vsli.8 q1, q4, #0x4
|
|
263
|
+
vshr.u8 q4, q1, #0x5
|
|
264
|
+
vsli.8 q1, q4, #0x6
|
|
265
|
+
mov.w r0, #0x55
|
|
266
|
+
vdup.8 q4, r0
|
|
267
|
+
vand q0, q0, q4
|
|
268
|
+
vand q1, q1, q4
|
|
269
|
+
vshl.i32 q1, q1, #0x1
|
|
270
|
+
vorr q0, q0, q1
|
|
271
|
+
vsli.32 q2, q2, #0x8
|
|
272
|
+
vsli.16 q2, q2, #0x4
|
|
273
|
+
vsli.8 q2, q2, #0x1
|
|
274
|
+
vshr.u8 q1, q2, #0x3
|
|
275
|
+
vsli.8 q2, q1, #0x4
|
|
276
|
+
vshr.u8 q1, q2, #0x5
|
|
277
|
+
vsli.8 q2, q1, #0x6
|
|
278
|
+
vsli.32 q3, q3, #0x8
|
|
279
|
+
vsli.16 q3, q3, #0x4
|
|
280
|
+
vsli.8 q3, q3, #0x1
|
|
281
|
+
vshr.u8 q1, q3, #0x3
|
|
282
|
+
vsli.8 q3, q1, #0x4
|
|
283
|
+
vshr.u8 q1, q3, #0x5
|
|
284
|
+
vsli.8 q3, q1, #0x6
|
|
285
|
+
vand q1, q2, q4
|
|
286
|
+
vand q3, q3, q4
|
|
287
|
+
vshl.i32 q3, q3, #0x1
|
|
288
|
+
vorr q1, q1, q3
|
|
289
|
+
vrev64.32 q2, q0
|
|
290
|
+
vrev64.32 q3, q1
|
|
291
|
+
movw r0, #0xf0f
|
|
292
|
+
vmsr p0, r0
|
|
293
|
+
vpsel q0, q0, q3
|
|
294
|
+
vpsel q1, q2, q1
|
|
295
|
+
vmov.f64 d4, d1
|
|
296
|
+
vmov.f64 d6, d3
|
|
297
|
+
vctp.8 r6
|
|
298
|
+
vpstttt
|
|
299
|
+
vstrbt.8 q0, [r1], #4
|
|
300
|
+
vstrbt.8 q1, [r2], #4
|
|
301
|
+
vstrbt.8 q2, [r3], #4
|
|
302
|
+
vstrbt.8 q3, [r4], #4
|
|
303
|
+
|
|
304
|
+
Lkeccak_f1600_x4_state_extract_bytes_asm_exit:
|
|
305
|
+
vpop {d8, d9, d10, d11, d12, d13, d14, d15}
|
|
306
|
+
.cfi_restore d8
|
|
307
|
+
.cfi_restore d9
|
|
308
|
+
.cfi_restore d10
|
|
309
|
+
.cfi_restore d11
|
|
310
|
+
.cfi_restore d12
|
|
311
|
+
.cfi_restore d13
|
|
312
|
+
.cfi_restore d14
|
|
313
|
+
.cfi_restore d15
|
|
314
|
+
.cfi_adjust_cfa_offset -0x40
|
|
315
|
+
pop.w {r4, r5, r6, r7, r8, r9, r10, r11, r12, pc}
|
|
316
|
+
.cfi_restore r4
|
|
317
|
+
.cfi_restore r5
|
|
318
|
+
.cfi_restore r6
|
|
319
|
+
.cfi_restore r7
|
|
320
|
+
.cfi_restore r8
|
|
321
|
+
.cfi_restore r9
|
|
322
|
+
.cfi_restore r10
|
|
323
|
+
.cfi_restore r11
|
|
324
|
+
.cfi_restore lr
|
|
325
|
+
.cfi_adjust_cfa_offset -0x28
|
|
326
|
+
.cfi_endproc
|
|
327
|
+
|
|
328
|
+
MLD_ASM_FN_SIZE(keccak_f1600_x4_state_extract_bytes_asm)
|
|
329
|
+
|
|
330
|
+
#endif /* MLD_FIPS202_ARMV81M_NEED_X4 && !MLD_CONFIG_MULTILEVEL_NO_SHARED */
|
|
331
|
+
|
|
332
|
+
#if defined(__ELF__)
|
|
333
|
+
.section .note.GNU-stack,"",%progbits
|
|
334
|
+
#endif
|