pq_crypto 0.6.1 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/SECURITY.md +7 -0
  4. data/ext/pqcrypto/pqcrypto_version.h +1 -1
  5. data/ext/pqcrypto/vendor/.vendored +4 -4
  6. data/ext/pqcrypto/vendor/mldsa-native/README.md +23 -10
  7. data/ext/pqcrypto/vendor/mldsa-native/mldsa/README.md +23 -0
  8. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.c +114 -58
  9. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.h +498 -461
  10. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_asm.S +145 -85
  11. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_config.h +456 -422
  12. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/cbmc.h +47 -25
  13. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/common.h +26 -14
  14. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/ct.h +56 -81
  15. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/debug.h +17 -24
  16. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.c +33 -40
  17. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.h +67 -87
  18. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.c +19 -14
  19. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.h +13 -5
  20. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.c +84 -10
  21. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.h +10 -5
  22. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/auto.h +6 -0
  23. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/fips202_native_aarch64.h +22 -15
  24. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_aarch64_asm.S +376 -0
  25. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_aarch64_asm.S +204 -0
  26. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_aarch64_asm.S +259 -0
  27. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_aarch64_asm.S +1077 -0
  28. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_aarch64_asm.S +987 -0
  29. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccakf1600_round_constants.c +16 -10
  30. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_scalar.h +2 -1
  31. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_v84a.h +1 -1
  32. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x2_v84a.h +4 -2
  33. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_scalar.h +2 -2
  34. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_v84a_scalar.h +1 -1
  35. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/api.h +60 -0
  36. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/mve.h +48 -0
  37. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/fips202_native_armv81m.h +18 -1
  38. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S +658 -582
  39. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.c +5 -100
  40. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccakf1600_round_constants.c +26 -25
  41. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/state_extract_bytes_x4_mve.S +334 -0
  42. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/state_xor_bytes_x4_mve.S +355 -0
  43. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/auto.h +8 -3
  44. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/{xkcp.h → keccak_f1600_x4_avx2.h} +11 -8
  45. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/fips202_native_x86_64.h +44 -0
  46. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/keccak_f1600_x4_avx2_asm.S +454 -0
  47. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/keccakf1600_constants.c +52 -0
  48. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/meta.h +37 -28
  49. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/aarch64_zetas.c +213 -196
  50. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/arith_native_aarch64.h +248 -64
  51. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/intt_aarch64_asm.S +753 -0
  52. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l4_aarch64_asm.S +129 -0
  53. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l5_aarch64_asm.S +145 -0
  54. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l7_aarch64_asm.S +177 -0
  55. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/ntt_aarch64_asm.S +653 -0
  56. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/pointwise_montgomery_aarch64_asm.S +84 -0
  57. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_caddq_aarch64_asm.S +53 -0
  58. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_chknorm_aarch64_asm.S +55 -0
  59. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_32_aarch64_asm.S +86 -0
  60. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_88_aarch64_asm.S +86 -0
  61. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_32_aarch64_asm.S +103 -0
  62. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_88_aarch64_asm.S +111 -0
  63. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_17_aarch64_asm.S +75 -0
  64. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_19_aarch64_asm.S +72 -0
  65. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_table.c +23 -11
  66. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_aarch64_asm.S +189 -0
  67. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta2_aarch64_asm.S +137 -0
  68. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta4_aarch64_asm.S +130 -0
  69. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta_table.c +520 -516
  70. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_table.c +34 -33
  71. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/api.h +202 -242
  72. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/meta.h +25 -17
  73. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/arith_native_x86_64.h +112 -28
  74. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.c +1 -1
  75. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.h +1 -1
  76. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/intt_avx2_asm.S +2311 -0
  77. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/ntt_avx2_asm.S +2383 -0
  78. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/nttunpack_avx2_asm.S +238 -0
  79. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l4_avx2_asm.S +139 -0
  80. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l5_avx2_asm.S +155 -0
  81. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l7_avx2_asm.S +187 -0
  82. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_avx2_asm.S +130 -0
  83. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_caddq_avx2_asm.S +190 -0
  84. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_32_avx2.c +6 -4
  85. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_88_avx2.c +6 -4
  86. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_32_avx2.c +9 -8
  87. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_88_avx2.c +10 -9
  88. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_17_avx2.c +8 -5
  89. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_19_avx2.c +8 -5
  90. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta2_avx2.c +6 -4
  91. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta4_avx2.c +6 -4
  92. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_table.c +130 -129
  93. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.c +109 -180
  94. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.h +169 -150
  95. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.c +56 -40
  96. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.h +149 -164
  97. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.c +52 -57
  98. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.h +132 -167
  99. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.c +57 -424
  100. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.h +167 -474
  101. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec_lazy.c +308 -0
  102. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec_lazy.h +653 -0
  103. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/reduce.h +22 -29
  104. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/rounding.h +37 -43
  105. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.c +511 -367
  106. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.h +456 -417
  107. data/lib/pq_crypto/version.rb +1 -1
  108. data/script/vendor_libs.rb +3 -3
  109. metadata +41 -35
  110. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_asm.S +0 -376
  111. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_asm.S +0 -204
  112. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_asm.S +0 -259
  113. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_asm.S +0 -1077
  114. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S +0 -987
  115. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.c +0 -488
  116. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.h +0 -16
  117. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/intt.S +0 -753
  118. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l4.S +0 -129
  119. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l5.S +0 -145
  120. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l7.S +0 -177
  121. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/ntt.S +0 -653
  122. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/pointwise_montgomery.S +0 -79
  123. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_caddq_asm.S +0 -53
  124. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_chknorm_asm.S +0 -55
  125. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_32_asm.S +0 -85
  126. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_88_asm.S +0 -85
  127. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_32_asm.S +0 -102
  128. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_88_asm.S +0 -110
  129. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_17_asm.S +0 -72
  130. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_19_asm.S +0 -69
  131. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_asm.S +0 -189
  132. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta2_asm.S +0 -135
  133. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta4_asm.S +0 -128
  134. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/intt.S +0 -2311
  135. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/ntt.S +0 -2383
  136. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/nttunpack.S +0 -239
  137. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise.S +0 -131
  138. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l4.S +0 -139
  139. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l5.S +0 -155
  140. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l7.S +0 -187
  141. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_caddq_avx2.c +0 -61
@@ -0,0 +1,355 @@
1
+ /*
2
+ * Copyright (c) The mlkem-native project authors
3
+ * Copyright (c) The mldsa-native project authors
4
+ * Copyright (c) 2026 Arm Limited
5
+ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
6
+ */
7
+
8
+ // ---------------------------------------------------------------------------
9
+ // Overview
10
+ // ---------------------------------------------------------------------------
11
+ // MVE/Helium implementation of KeccakF1600x4_StateXORBytes.
12
+ //
13
+ // void KeccakF1600x4_StateXORBytes(state, d0, d1, d2, d3, offset, length)
14
+ //
15
+ // Reads 'length' plain bytes from each of four input buffers (d0..d3),
16
+ // splits every byte into its even and odd bits (bit-interleaving), and
17
+ // XORs the result into the Keccak state starting at byte 'offset'.
18
+ //
19
+ // ---------------------------------------------------------------------------
20
+ // Bit-interleaving background
21
+ // ---------------------------------------------------------------------------
22
+ // Each 64-bit Keccak lane is stored as two 32-bit words:
23
+ // even half -- bits 0, 2, 4, ..., 62 of the lane
24
+ // odd half -- bits 1, 3, 5, ..., 63 of the lane
25
+ // This representation allows 64-bit lane rotations (used in the Keccak
26
+ // round function) to be implemented as pairs of 32-bit rotations.
27
+ //
28
+ // Batched (x4) processing:
29
+ // Four Keccak instances are processed as a batch. Their states are
30
+ // stored interleaved in a single 800-byte buffer: first the even
31
+ // halves of all 25 lanes (400 bytes), then the odd halves (400 bytes).
32
+ // Within each 16-byte row, the four u32 words correspond to
33
+ // instances 0..3 of the same lane, enabling SIMD-parallel operations
34
+ // across all four instances.
35
+ //
36
+ // State memory layout (25 lanes x 4 instances x 2 halves):
37
+ // S[i][l]_even/odd = even/odd half of lane l, instance i (u32)
38
+ // Each row is 16 bytes (one Q-register).
39
+ // Offset Contents
40
+ // 0 S[0][ 0]_even, S[1][ 0]_even, S[2][ 0]_even, S[3][ 0]_even
41
+ // 16 S[0][ 1]_even, S[1][ 1]_even, S[2][ 1]_even, S[3][ 1]_even
42
+ // ...
43
+ // 384 S[0][24]_even, S[1][24]_even, S[2][24]_even, S[3][24]_even
44
+ // 400 S[0][ 0]_odd, S[1][ 0]_odd, S[2][ 0]_odd, S[3][ 0]_odd
45
+ // 416 S[0][ 1]_odd, S[1][ 1]_odd, S[2][ 1]_odd, S[3][ 1]_odd
46
+ // ...
47
+ // 784 S[0][24]_odd, S[1][24]_odd, S[2][24]_odd, S[3][24]_odd
48
+ //
49
+ // ---------------------------------------------------------------------------
50
+ // Three-phase structure
51
+ // ---------------------------------------------------------------------------
52
+ // Prologue -- if offset is not 8-byte aligned, absorb
53
+ // min(length, 8-(offset%8)) bytes via predicated byte loads.
54
+ // Main -- process full 8-byte groups via word-level gather loads,
55
+ // bit-interleave, then VEOR into even/odd state halves.
56
+ // Tail -- absorb remaining <8 bytes via predicated byte loads.
57
+
58
+ #include "../../../../common.h"
59
+ #if defined(MLD_FIPS202_ARMV81M_NEED_X4) && \
60
+ !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED)
61
+
62
+ /*
63
+ * WARNING: This file is auto-derived from the mldsa-native source file
64
+ * dev/fips202/armv81m/src/state_xor_bytes_x4_mve.S using scripts/simpasm. Do not modify it directly.
65
+ */
66
+
67
+ .thumb
68
+ .syntax unified
69
+
70
+ .text
71
+ .balign 4
72
+ .global MLD_ASM_NAMESPACE(keccak_f1600_x4_state_xor_bytes_asm)
73
+ MLD_ASM_FN_SYMBOL(keccak_f1600_x4_state_xor_bytes_asm)
74
+
75
+ .cfi_startproc
76
+ push.w {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr}
77
+ .cfi_adjust_cfa_offset 0x28
78
+ .cfi_rel_offset r4, 0x0
79
+ .cfi_rel_offset r5, 0x4
80
+ .cfi_rel_offset r6, 0x8
81
+ .cfi_rel_offset r7, 0xc
82
+ .cfi_rel_offset r8, 0x10
83
+ .cfi_rel_offset r9, 0x14
84
+ .cfi_rel_offset r10, 0x18
85
+ .cfi_rel_offset r11, 0x1c
86
+ .cfi_rel_offset lr, 0x24
87
+ vpush {d8, d9, d10, d11, d12, d13, d14, d15}
88
+ .cfi_adjust_cfa_offset 0x40
89
+ .cfi_rel_offset d8, 0x0
90
+ .cfi_rel_offset d9, 0x8
91
+ .cfi_rel_offset d10, 0x10
92
+ .cfi_rel_offset d11, 0x18
93
+ .cfi_rel_offset d12, 0x20
94
+ .cfi_rel_offset d13, 0x28
95
+ .cfi_rel_offset d14, 0x30
96
+ .cfi_rel_offset d15, 0x38
97
+ ldr r4, [sp, #0x68]
98
+ ldr.w r10, [sp, #0x6c]
99
+ ldr r6, [sp, #0x70]
100
+ cmp r6, #0x0
101
+ beq.w Lkeccak_f1600_x4_state_xor_bytes_asm_exit @ imm = #0x346
102
+ and r5, r10, #0x7
103
+ bic r9, r10, #0x7
104
+ add.w r8, r0, r9, lsl #1
105
+ add.w r7, r8, #0x190
106
+ cmp r5, #0x0
107
+ beq.w Lkeccak_f1600_x4_state_xor_bytes_asm_pre_main @ imm = #0x12c
108
+ subs r1, r1, r5
109
+ subs r2, r2, r5
110
+ subs r3, r3, r5
111
+ subs r4, r4, r5
112
+ rsb.w lr, r5, #0x8
113
+ cmp r6, lr
114
+ it ls
115
+ movls lr, r6
116
+ subs.w r6, r6, lr
117
+ vctp.8 lr
118
+ vmrs r11, p0
119
+ lsl.w r11, r11, r5
120
+ vmsr p0, r11
121
+ vpstttt
122
+ vldrbt.u8 q0, [r1], #4
123
+ vldrbt.u8 q1, [r2], #4
124
+ vldrbt.u8 q2, [r3], #4
125
+ vldrbt.u8 q3, [r4], #4
126
+ vmov.f64 d1, d4
127
+ vmov.f64 d3, d6
128
+ vrev64.32 q2, q0
129
+ vrev64.32 q3, q1
130
+ movw r0, #0xf0f
131
+ vmsr p0, r0
132
+ vpsel q0, q0, q3
133
+ vpsel q1, q2, q1
134
+ vmov q2, q0
135
+ vmov q3, q1
136
+ vshr.u8 q4, q0, #0x2
137
+ vsli.8 q0, q4, #0x1
138
+ vshr.u8 q4, q0, #0x3
139
+ vsli.8 q0, q4, #0x2
140
+ vshr.u8 q4, q0, #0x4
141
+ vsli.8 q0, q4, #0x3
142
+ vshr.u16 q4, q0, #0x8
143
+ vsli.8 q0, q4, #0x4
144
+ vshr.u32 q4, q0, #0x10
145
+ vsli.16 q0, q4, #0x8
146
+ vshr.u8 q4, q3, #0x2
147
+ vsli.8 q3, q4, #0x1
148
+ vshr.u8 q4, q3, #0x3
149
+ vsli.8 q3, q4, #0x2
150
+ vshr.u8 q4, q3, #0x4
151
+ vsli.8 q3, q4, #0x3
152
+ vshr.u16 q4, q3, #0x8
153
+ vsli.8 q3, q4, #0x4
154
+ vshr.u32 q4, q3, #0x10
155
+ vsli.16 q3, q4, #0x8
156
+ vsli.32 q0, q3, #0x10
157
+ vshl.i8 q4, q2, #0x2
158
+ vsri.8 q2, q4, #0x1
159
+ vshl.i8 q4, q2, #0x3
160
+ vsri.8 q2, q4, #0x2
161
+ vshl.i8 q4, q2, #0x4
162
+ vsri.8 q2, q4, #0x3
163
+ vshl.i16 q4, q2, #0x8
164
+ vsri.8 q2, q4, #0x4
165
+ vshl.i32 q4, q2, #0x10
166
+ vsri.16 q2, q4, #0x8
167
+ vshl.i8 q4, q1, #0x2
168
+ vsri.8 q1, q4, #0x1
169
+ vshl.i8 q4, q1, #0x3
170
+ vsri.8 q1, q4, #0x2
171
+ vshl.i8 q4, q1, #0x4
172
+ vsri.8 q1, q4, #0x3
173
+ vshl.i16 q4, q1, #0x8
174
+ vsri.8 q1, q4, #0x4
175
+ vshl.i32 q4, q1, #0x10
176
+ vsri.16 q1, q4, #0x8
177
+ vsri.32 q1, q2, #0x10
178
+ vldrw.u32 q4, [r8]
179
+ vldrw.u32 q5, [r7]
180
+ veor q4, q4, q0
181
+ veor q5, q5, q1
182
+ vstrw.32 q4, [r8], #16
183
+ vstrw.32 q5, [r7], #16
184
+ vmov q7[2], q7[0], r1, r3
185
+ vmov q7[3], q7[1], r2, r4
186
+ b Lkeccak_f1600_x4_state_xor_bytes_asm_main_body @ imm = #0xe
187
+
188
+ Lkeccak_f1600_x4_state_xor_bytes_asm_pre_main:
189
+ vmov q7[2], q7[0], r1, r3
190
+ vmov q7[3], q7[1], r2, r4
191
+ mov.w r0, #0x4
192
+ vsub.i32 q7, q7, r0
193
+
194
+ Lkeccak_f1600_x4_state_xor_bytes_asm_main_body:
195
+ lsr.w lr, r6, #0x3
196
+ wls lr, lr, Lkeccak_f1600_x4_state_xor_bytes_asm_main_loop_end @ imm = #0xd4
197
+
198
+ Lkeccak_f1600_x4_state_xor_bytes_asm_main_loop_start:
199
+ vldrw.u32 q0, [q7, #4]!
200
+ vldrw.u32 q1, [q7, #4]!
201
+ vmov q2, q0
202
+ vmov q3, q1
203
+ vshr.u8 q4, q0, #0x2
204
+ vsli.8 q0, q4, #0x1
205
+ vshr.u8 q4, q0, #0x3
206
+ vsli.8 q0, q4, #0x2
207
+ vshr.u8 q4, q0, #0x4
208
+ vsli.8 q0, q4, #0x3
209
+ vshr.u16 q4, q0, #0x8
210
+ vsli.8 q0, q4, #0x4
211
+ vshr.u32 q4, q0, #0x10
212
+ vsli.16 q0, q4, #0x8
213
+ vshr.u8 q4, q3, #0x2
214
+ vsli.8 q3, q4, #0x1
215
+ vshr.u8 q4, q3, #0x3
216
+ vsli.8 q3, q4, #0x2
217
+ vshr.u8 q4, q3, #0x4
218
+ vsli.8 q3, q4, #0x3
219
+ vshr.u16 q4, q3, #0x8
220
+ vsli.8 q3, q4, #0x4
221
+ vshr.u32 q4, q3, #0x10
222
+ vsli.16 q3, q4, #0x8
223
+ vsli.32 q0, q3, #0x10
224
+ vshl.i8 q4, q2, #0x2
225
+ vsri.8 q2, q4, #0x1
226
+ vshl.i8 q4, q2, #0x3
227
+ vsri.8 q2, q4, #0x2
228
+ vshl.i8 q4, q2, #0x4
229
+ vsri.8 q2, q4, #0x3
230
+ vshl.i16 q4, q2, #0x8
231
+ vsri.8 q2, q4, #0x4
232
+ vshl.i32 q4, q2, #0x10
233
+ vsri.16 q2, q4, #0x8
234
+ vshl.i8 q4, q1, #0x2
235
+ vsri.8 q1, q4, #0x1
236
+ vshl.i8 q4, q1, #0x3
237
+ vsri.8 q1, q4, #0x2
238
+ vshl.i8 q4, q1, #0x4
239
+ vsri.8 q1, q4, #0x3
240
+ vshl.i16 q4, q1, #0x8
241
+ vsri.8 q1, q4, #0x4
242
+ vshl.i32 q4, q1, #0x10
243
+ vsri.16 q1, q4, #0x8
244
+ vsri.32 q1, q2, #0x10
245
+ vldrw.u32 q4, [r8]
246
+ vldrw.u32 q5, [r7]
247
+ veor q4, q4, q0
248
+ veor q5, q5, q1
249
+ vstrw.32 q4, [r8], #16
250
+ vstrw.32 q5, [r7], #16
251
+ le lr, Lkeccak_f1600_x4_state_xor_bytes_asm_main_loop_start @ imm = #-0xd4
252
+
253
+ Lkeccak_f1600_x4_state_xor_bytes_asm_main_loop_end:
254
+ ands r6, r6, #0x7
255
+ beq.w Lkeccak_f1600_x4_state_xor_bytes_asm_exit @ imm = #0x110
256
+ mov.w r0, #0x4
257
+ vadd.i32 q7, q7, r0
258
+ vmov r1, r3, q7[2], q7[0]
259
+ vmov r2, r4, q7[3], q7[1]
260
+ vctp.8 r6
261
+ vpstttt
262
+ vldrbt.u8 q0, [r1]
263
+ vldrbt.u8 q1, [r2]
264
+ vldrbt.u8 q2, [r3]
265
+ vldrbt.u8 q3, [r4]
266
+ vmov.f64 d1, d4
267
+ vmov.f64 d3, d6
268
+ vrev64.32 q2, q0
269
+ vrev64.32 q3, q1
270
+ movw r0, #0xf0f
271
+ vmsr p0, r0
272
+ vpsel q0, q0, q3
273
+ vpsel q1, q2, q1
274
+ vmov q2, q0
275
+ vmov q3, q1
276
+ vshr.u8 q4, q0, #0x2
277
+ vsli.8 q0, q4, #0x1
278
+ vshr.u8 q4, q0, #0x3
279
+ vsli.8 q0, q4, #0x2
280
+ vshr.u8 q4, q0, #0x4
281
+ vsli.8 q0, q4, #0x3
282
+ vshr.u16 q4, q0, #0x8
283
+ vsli.8 q0, q4, #0x4
284
+ vshr.u32 q4, q0, #0x10
285
+ vsli.16 q0, q4, #0x8
286
+ vshr.u8 q4, q3, #0x2
287
+ vsli.8 q3, q4, #0x1
288
+ vshr.u8 q4, q3, #0x3
289
+ vsli.8 q3, q4, #0x2
290
+ vshr.u8 q4, q3, #0x4
291
+ vsli.8 q3, q4, #0x3
292
+ vshr.u16 q4, q3, #0x8
293
+ vsli.8 q3, q4, #0x4
294
+ vshr.u32 q4, q3, #0x10
295
+ vsli.16 q3, q4, #0x8
296
+ vsli.32 q0, q3, #0x10
297
+ vshl.i8 q4, q2, #0x2
298
+ vsri.8 q2, q4, #0x1
299
+ vshl.i8 q4, q2, #0x3
300
+ vsri.8 q2, q4, #0x2
301
+ vshl.i8 q4, q2, #0x4
302
+ vsri.8 q2, q4, #0x3
303
+ vshl.i16 q4, q2, #0x8
304
+ vsri.8 q2, q4, #0x4
305
+ vshl.i32 q4, q2, #0x10
306
+ vsri.16 q2, q4, #0x8
307
+ vshl.i8 q4, q1, #0x2
308
+ vsri.8 q1, q4, #0x1
309
+ vshl.i8 q4, q1, #0x3
310
+ vsri.8 q1, q4, #0x2
311
+ vshl.i8 q4, q1, #0x4
312
+ vsri.8 q1, q4, #0x3
313
+ vshl.i16 q4, q1, #0x8
314
+ vsri.8 q1, q4, #0x4
315
+ vshl.i32 q4, q1, #0x10
316
+ vsri.16 q1, q4, #0x8
317
+ vsri.32 q1, q2, #0x10
318
+ vldrw.u32 q4, [r8]
319
+ vldrw.u32 q5, [r7]
320
+ veor q4, q4, q0
321
+ veor q5, q5, q1
322
+ vstrw.32 q4, [r8], #16
323
+ vstrw.32 q5, [r7], #16
324
+
325
+ Lkeccak_f1600_x4_state_xor_bytes_asm_exit:
326
+ vpop {d8, d9, d10, d11, d12, d13, d14, d15}
327
+ .cfi_restore d8
328
+ .cfi_restore d9
329
+ .cfi_restore d10
330
+ .cfi_restore d11
331
+ .cfi_restore d12
332
+ .cfi_restore d13
333
+ .cfi_restore d14
334
+ .cfi_restore d15
335
+ .cfi_adjust_cfa_offset -0x40
336
+ pop.w {r4, r5, r6, r7, r8, r9, r10, r11, r12, pc}
337
+ .cfi_restore r4
338
+ .cfi_restore r5
339
+ .cfi_restore r6
340
+ .cfi_restore r7
341
+ .cfi_restore r8
342
+ .cfi_restore r9
343
+ .cfi_restore r10
344
+ .cfi_restore r11
345
+ .cfi_restore lr
346
+ .cfi_adjust_cfa_offset -0x28
347
+ .cfi_endproc
348
+
349
+ MLD_ASM_FN_SIZE(keccak_f1600_x4_state_xor_bytes_asm)
350
+
351
+ #endif /* MLD_FIPS202_ARMV81M_NEED_X4 && !MLD_CONFIG_MULTILEVEL_NO_SHARED */
352
+
353
+ #if defined(__ELF__)
354
+ .section .note.GNU-stack,"",%progbits
355
+ #endif
@@ -16,9 +16,14 @@
16
16
  #include "aarch64/auto.h"
17
17
  #endif
18
18
 
19
- #if defined(MLD_SYS_X86_64) && defined(MLD_SYS_X86_64_AVX2)
20
- #include "x86_64/xkcp.h"
21
- #endif
19
+ #if defined(MLD_SYS_X86_64) && defined(MLD_SYS_X86_64_AVX2) && \
20
+ (!defined(MLD_CONFIG_NO_KEYPAIR_API) || \
21
+ !defined(MLD_CONFIG_NO_SIGN_API) || !defined(MLD_CONFIG_REDUCE_RAM)) && \
22
+ !defined(MLD_CONFIG_SERIAL_FIPS202_ONLY)
23
+ #include "x86_64/keccak_f1600_x4_avx2.h"
24
+ #endif /* MLD_SYS_X86_64 && MLD_SYS_X86_64_AVX2 && (!MLD_CONFIG_NO_KEYPAIR_API \
25
+ || !MLD_CONFIG_NO_SIGN_API || !MLD_CONFIG_REDUCE_RAM) && \
26
+ !MLD_CONFIG_SERIAL_FIPS202_ONLY */
22
27
 
23
28
  /* We do not yet include the FIPS202 backend for Armv8.1-M+MVE by default
24
29
  * as it is still experimental and undergoing review. */
@@ -4,18 +4,19 @@
4
4
  * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
5
5
  */
6
6
 
7
- #ifndef MLD_FIPS202_NATIVE_X86_64_XKCP_H
8
- #define MLD_FIPS202_NATIVE_X86_64_XKCP_H
7
+ #ifndef MLD_FIPS202_NATIVE_X86_64_KECCAK_F1600_X4_AVX2_H
8
+ #define MLD_FIPS202_NATIVE_X86_64_KECCAK_F1600_X4_AVX2_H
9
9
 
10
10
  #include "../../../common.h"
11
11
 
12
- #define MLD_FIPS202_X86_64_XKCP
12
+ #define MLD_FIPS202_X86_64_NEED_X4_AVX2
13
+
14
+ /* Part of backend API */
15
+ #define MLD_USE_FIPS202_X4_NATIVE
13
16
 
14
17
  #if !defined(__ASSEMBLER__)
15
18
  #include "../api.h"
16
- #include "src/KeccakP_1600_times4_SIMD256.h"
17
-
18
- #define MLD_USE_FIPS202_X4_NATIVE
19
+ #include "src/fips202_native_x86_64.h"
19
20
  MLD_MUST_CHECK_RETURN_VALUE
20
21
  static MLD_INLINE int mld_keccak_f1600_x4_native(uint64_t *state)
21
22
  {
@@ -23,9 +24,11 @@ static MLD_INLINE int mld_keccak_f1600_x4_native(uint64_t *state)
23
24
  {
24
25
  return MLD_NATIVE_FUNC_FALLBACK;
25
26
  }
26
- mld_keccakf1600x4_permute24(state);
27
+
28
+ mld_keccak_f1600_x4_avx2_asm(state, mld_keccakf1600_round_constants,
29
+ mld_keccak_rho8, mld_keccak_rho56);
27
30
  return MLD_NATIVE_FUNC_SUCCESS;
28
31
  }
29
32
  #endif /* !__ASSEMBLER__ */
30
33
 
31
- #endif /* !MLD_FIPS202_NATIVE_X86_64_XKCP_H */
34
+ #endif /* !MLD_FIPS202_NATIVE_X86_64_KECCAK_F1600_X4_AVX2_H */
@@ -0,0 +1,44 @@
1
+ /*
2
+ * Copyright (c) The mlkem-native project authors
3
+ * Copyright (c) The mldsa-native project authors
4
+ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
5
+ */
6
+
7
+ #ifndef MLD_FIPS202_NATIVE_X86_64_SRC_FIPS202_NATIVE_X86_64_H
8
+ #define MLD_FIPS202_NATIVE_X86_64_SRC_FIPS202_NATIVE_X86_64_H
9
+
10
+ #include "../../../../cbmc.h"
11
+ #include "../../../../common.h"
12
+
13
+ /* TODO: Reconsider whether this check is needed -- x86_64 is always
14
+ * little-endian, so the backend selection already implies this. */
15
+ #ifndef MLD_SYS_LITTLE_ENDIAN
16
+ #error Expecting a little-endian platform
17
+ #endif
18
+
19
+ #define mld_keccakf1600_round_constants \
20
+ MLD_NAMESPACE(keccakf1600_round_constants)
21
+ MLD_INTERNAL_DATA_DECLARATION const uint64_t
22
+ mld_keccakf1600_round_constants[24];
23
+
24
+ #define mld_keccak_rho8 MLD_NAMESPACE(keccak_rho8)
25
+ MLD_INTERNAL_DATA_DECLARATION const uint64_t mld_keccak_rho8[4];
26
+
27
+ #define mld_keccak_rho56 MLD_NAMESPACE(keccak_rho56)
28
+ MLD_INTERNAL_DATA_DECLARATION const uint64_t mld_keccak_rho56[4];
29
+
30
+ #define mld_keccak_f1600_x4_avx2_asm MLD_NAMESPACE(keccak_f1600_x4_avx2_asm)
31
+ void mld_keccak_f1600_x4_avx2_asm(uint64_t states[100], const uint64_t rc[24],
32
+ const uint64_t rho8[4],
33
+ const uint64_t rho56[4])
34
+ /* This must be kept in sync with the HOL-Light specification
35
+ * in proofs/hol_light/x86_64/proofs/keccak_f1600_x4_avx2_asm.ml */
36
+ __contract__(
37
+ requires(memory_no_alias(states, sizeof(uint64_t) * 25 * 4))
38
+ requires(rc == mld_keccakf1600_round_constants)
39
+ requires(rho8 == mld_keccak_rho8)
40
+ requires(rho56 == mld_keccak_rho56)
41
+ assigns(memory_slice(states, sizeof(uint64_t) * 25 * 4))
42
+ );
43
+
44
+ #endif /* !MLD_FIPS202_NATIVE_X86_64_SRC_FIPS202_NATIVE_X86_64_H */