pq_crypto 0.6.0 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (154) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +12 -0
  3. data/SECURITY.md +7 -0
  4. data/ext/pqcrypto/extconf.rb +2 -0
  5. data/ext/pqcrypto/pqcrypto_ruby_secure.c +139 -0
  6. data/ext/pqcrypto/pqcrypto_secure.c +532 -0
  7. data/ext/pqcrypto/pqcrypto_secure.h +20 -0
  8. data/ext/pqcrypto/pqcrypto_version.h +1 -1
  9. data/ext/pqcrypto/vendor/.vendored +4 -4
  10. data/ext/pqcrypto/vendor/mldsa-native/README.md +23 -10
  11. data/ext/pqcrypto/vendor/mldsa-native/mldsa/README.md +23 -0
  12. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.c +114 -58
  13. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.h +498 -461
  14. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_asm.S +145 -85
  15. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_config.h +456 -422
  16. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/cbmc.h +47 -25
  17. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/common.h +26 -14
  18. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/ct.h +56 -81
  19. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/debug.h +17 -24
  20. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.c +33 -40
  21. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.h +67 -87
  22. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.c +19 -14
  23. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.h +13 -5
  24. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.c +84 -10
  25. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.h +10 -5
  26. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/auto.h +6 -0
  27. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/fips202_native_aarch64.h +22 -15
  28. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_aarch64_asm.S +376 -0
  29. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_aarch64_asm.S +204 -0
  30. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_aarch64_asm.S +259 -0
  31. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_aarch64_asm.S +1077 -0
  32. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_aarch64_asm.S +987 -0
  33. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccakf1600_round_constants.c +16 -10
  34. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_scalar.h +2 -1
  35. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_v84a.h +1 -1
  36. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x2_v84a.h +4 -2
  37. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_scalar.h +2 -2
  38. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_v84a_scalar.h +1 -1
  39. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/api.h +60 -0
  40. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/mve.h +48 -0
  41. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/fips202_native_armv81m.h +18 -1
  42. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S +658 -582
  43. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.c +5 -100
  44. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccakf1600_round_constants.c +26 -25
  45. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/state_extract_bytes_x4_mve.S +334 -0
  46. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/state_xor_bytes_x4_mve.S +355 -0
  47. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/auto.h +8 -3
  48. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/{xkcp.h → keccak_f1600_x4_avx2.h} +11 -8
  49. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/fips202_native_x86_64.h +44 -0
  50. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/keccak_f1600_x4_avx2_asm.S +454 -0
  51. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/keccakf1600_constants.c +52 -0
  52. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/meta.h +37 -28
  53. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/aarch64_zetas.c +213 -196
  54. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/arith_native_aarch64.h +248 -64
  55. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/intt_aarch64_asm.S +753 -0
  56. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l4_aarch64_asm.S +129 -0
  57. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l5_aarch64_asm.S +145 -0
  58. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l7_aarch64_asm.S +177 -0
  59. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/ntt_aarch64_asm.S +653 -0
  60. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/pointwise_montgomery_aarch64_asm.S +84 -0
  61. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_caddq_aarch64_asm.S +53 -0
  62. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_chknorm_aarch64_asm.S +55 -0
  63. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_32_aarch64_asm.S +86 -0
  64. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_88_aarch64_asm.S +86 -0
  65. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_32_aarch64_asm.S +103 -0
  66. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_88_aarch64_asm.S +111 -0
  67. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_17_aarch64_asm.S +75 -0
  68. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_19_aarch64_asm.S +72 -0
  69. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_table.c +23 -11
  70. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_aarch64_asm.S +189 -0
  71. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta2_aarch64_asm.S +137 -0
  72. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta4_aarch64_asm.S +130 -0
  73. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta_table.c +520 -516
  74. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_table.c +34 -33
  75. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/api.h +202 -242
  76. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/meta.h +25 -17
  77. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/arith_native_x86_64.h +112 -28
  78. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.c +1 -1
  79. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.h +1 -1
  80. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/intt_avx2_asm.S +2311 -0
  81. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/ntt_avx2_asm.S +2383 -0
  82. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/nttunpack_avx2_asm.S +238 -0
  83. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l4_avx2_asm.S +139 -0
  84. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l5_avx2_asm.S +155 -0
  85. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l7_avx2_asm.S +187 -0
  86. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_avx2_asm.S +130 -0
  87. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_caddq_avx2_asm.S +190 -0
  88. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_32_avx2.c +6 -4
  89. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_88_avx2.c +6 -4
  90. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_32_avx2.c +9 -8
  91. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_88_avx2.c +10 -9
  92. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_17_avx2.c +8 -5
  93. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_19_avx2.c +8 -5
  94. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta2_avx2.c +6 -4
  95. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta4_avx2.c +6 -4
  96. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_table.c +130 -129
  97. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.c +109 -180
  98. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.h +169 -150
  99. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.c +56 -40
  100. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.h +149 -164
  101. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.c +52 -57
  102. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.h +132 -167
  103. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.c +57 -424
  104. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.h +167 -474
  105. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec_lazy.c +308 -0
  106. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec_lazy.h +653 -0
  107. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/reduce.h +22 -29
  108. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/rounding.h +37 -43
  109. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.c +511 -367
  110. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.h +456 -417
  111. data/lib/pq_crypto/hybrid_kem.rb +1 -1
  112. data/lib/pq_crypto/internal.rb +23 -0
  113. data/lib/pq_crypto/kem.rb +27 -34
  114. data/lib/pq_crypto/pkcs8/der.rb +68 -0
  115. data/lib/pq_crypto/pkcs8/private_key_choice.rb +186 -0
  116. data/lib/pq_crypto/pkcs8.rb +51 -468
  117. data/lib/pq_crypto/serialization.rb +19 -29
  118. data/lib/pq_crypto/signature.rb +28 -35
  119. data/lib/pq_crypto/version.rb +1 -1
  120. data/lib/pq_crypto.rb +10 -0
  121. data/script/vendor_libs.rb +3 -3
  122. metadata +44 -35
  123. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_asm.S +0 -376
  124. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_asm.S +0 -204
  125. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_asm.S +0 -259
  126. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_asm.S +0 -1077
  127. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S +0 -987
  128. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.c +0 -488
  129. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.h +0 -16
  130. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/intt.S +0 -753
  131. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l4.S +0 -129
  132. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l5.S +0 -145
  133. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l7.S +0 -177
  134. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/ntt.S +0 -653
  135. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/pointwise_montgomery.S +0 -79
  136. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_caddq_asm.S +0 -53
  137. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_chknorm_asm.S +0 -55
  138. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_32_asm.S +0 -85
  139. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_88_asm.S +0 -85
  140. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_32_asm.S +0 -102
  141. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_88_asm.S +0 -110
  142. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_17_asm.S +0 -72
  143. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_19_asm.S +0 -69
  144. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_asm.S +0 -189
  145. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta2_asm.S +0 -135
  146. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta4_asm.S +0 -128
  147. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/intt.S +0 -2311
  148. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/ntt.S +0 -2383
  149. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/nttunpack.S +0 -239
  150. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise.S +0 -131
  151. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l4.S +0 -139
  152. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l5.S +0 -155
  153. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l7.S +0 -187
  154. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_caddq_avx2.c +0 -61
@@ -0,0 +1,355 @@
1
+ /*
2
+ * Copyright (c) The mlkem-native project authors
3
+ * Copyright (c) The mldsa-native project authors
4
+ * Copyright (c) 2026 Arm Limited
5
+ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
6
+ */
7
+
8
+ // ---------------------------------------------------------------------------
9
+ // Overview
10
+ // ---------------------------------------------------------------------------
11
+ // MVE/Helium implementation of KeccakF1600x4_StateXORBytes.
12
+ //
13
+ // void KeccakF1600x4_StateXORBytes(state, d0, d1, d2, d3, offset, length)
14
+ //
15
+ // Reads 'length' plain bytes from each of four input buffers (d0..d3),
16
+ // splits every byte into its even and odd bits (bit-interleaving), and
17
+ // XORs the result into the Keccak state starting at byte 'offset'.
18
+ //
19
+ // ---------------------------------------------------------------------------
20
+ // Bit-interleaving background
21
+ // ---------------------------------------------------------------------------
22
+ // Each 64-bit Keccak lane is stored as two 32-bit words:
23
+ // even half -- bits 0, 2, 4, ..., 62 of the lane
24
+ // odd half -- bits 1, 3, 5, ..., 63 of the lane
25
+ // This representation allows 64-bit lane rotations (used in the Keccak
26
+ // round function) to be implemented as pairs of 32-bit rotations.
27
+ //
28
+ // Batched (x4) processing:
29
+ // Four Keccak instances are processed as a batch. Their states are
30
+ // stored interleaved in a single 800-byte buffer: first the even
31
+ // halves of all 25 lanes (400 bytes), then the odd halves (400 bytes).
32
+ // Within each 16-byte row, the four u32 words correspond to
33
+ // instances 0..3 of the same lane, enabling SIMD-parallel operations
34
+ // across all four instances.
35
+ //
36
+ // State memory layout (25 lanes x 4 instances x 2 halves):
37
+ // S[i][l]_even/odd = even/odd half of lane l, instance i (u32)
38
+ // Each row is 16 bytes (one Q-register).
39
+ // Offset Contents
40
+ // 0 S[0][ 0]_even, S[1][ 0]_even, S[2][ 0]_even, S[3][ 0]_even
41
+ // 16 S[0][ 1]_even, S[1][ 1]_even, S[2][ 1]_even, S[3][ 1]_even
42
+ // ...
43
+ // 384 S[0][24]_even, S[1][24]_even, S[2][24]_even, S[3][24]_even
44
+ // 400 S[0][ 0]_odd, S[1][ 0]_odd, S[2][ 0]_odd, S[3][ 0]_odd
45
+ // 416 S[0][ 1]_odd, S[1][ 1]_odd, S[2][ 1]_odd, S[3][ 1]_odd
46
+ // ...
47
+ // 784 S[0][24]_odd, S[1][24]_odd, S[2][24]_odd, S[3][24]_odd
48
+ //
49
+ // ---------------------------------------------------------------------------
50
+ // Three-phase structure
51
+ // ---------------------------------------------------------------------------
52
+ // Prologue -- if offset is not 8-byte aligned, absorb
53
+ // min(length, 8-(offset%8)) bytes via predicated byte loads.
54
+ // Main -- process full 8-byte groups via word-level gather loads,
55
+ // bit-interleave, then VEOR into even/odd state halves.
56
+ // Tail -- absorb remaining <8 bytes via predicated byte loads.
57
+
58
+ #include "../../../../common.h"
59
+ #if defined(MLD_FIPS202_ARMV81M_NEED_X4) && \
60
+ !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED)
61
+
62
+ /*
63
+ * WARNING: This file is auto-derived from the mldsa-native source file
64
+ * dev/fips202/armv81m/src/state_xor_bytes_x4_mve.S using scripts/simpasm. Do not modify it directly.
65
+ */
66
+
67
+ .thumb
68
+ .syntax unified
69
+
70
+ .text
71
+ .balign 4
72
+ .global MLD_ASM_NAMESPACE(keccak_f1600_x4_state_xor_bytes_asm)
73
+ MLD_ASM_FN_SYMBOL(keccak_f1600_x4_state_xor_bytes_asm)
74
+
75
+ .cfi_startproc
76
+ push.w {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr}
77
+ .cfi_adjust_cfa_offset 0x28
78
+ .cfi_rel_offset r4, 0x0
79
+ .cfi_rel_offset r5, 0x4
80
+ .cfi_rel_offset r6, 0x8
81
+ .cfi_rel_offset r7, 0xc
82
+ .cfi_rel_offset r8, 0x10
83
+ .cfi_rel_offset r9, 0x14
84
+ .cfi_rel_offset r10, 0x18
85
+ .cfi_rel_offset r11, 0x1c
86
+ .cfi_rel_offset lr, 0x24
87
+ vpush {d8, d9, d10, d11, d12, d13, d14, d15}
88
+ .cfi_adjust_cfa_offset 0x40
89
+ .cfi_rel_offset d8, 0x0
90
+ .cfi_rel_offset d9, 0x8
91
+ .cfi_rel_offset d10, 0x10
92
+ .cfi_rel_offset d11, 0x18
93
+ .cfi_rel_offset d12, 0x20
94
+ .cfi_rel_offset d13, 0x28
95
+ .cfi_rel_offset d14, 0x30
96
+ .cfi_rel_offset d15, 0x38
97
+ ldr r4, [sp, #0x68]
98
+ ldr.w r10, [sp, #0x6c]
99
+ ldr r6, [sp, #0x70]
100
+ cmp r6, #0x0
101
+ beq.w Lkeccak_f1600_x4_state_xor_bytes_asm_exit @ imm = #0x346
102
+ and r5, r10, #0x7
103
+ bic r9, r10, #0x7
104
+ add.w r8, r0, r9, lsl #1
105
+ add.w r7, r8, #0x190
106
+ cmp r5, #0x0
107
+ beq.w Lkeccak_f1600_x4_state_xor_bytes_asm_pre_main @ imm = #0x12c
108
+ subs r1, r1, r5
109
+ subs r2, r2, r5
110
+ subs r3, r3, r5
111
+ subs r4, r4, r5
112
+ rsb.w lr, r5, #0x8
113
+ cmp r6, lr
114
+ it ls
115
+ movls lr, r6
116
+ subs.w r6, r6, lr
117
+ vctp.8 lr
118
+ vmrs r11, p0
119
+ lsl.w r11, r11, r5
120
+ vmsr p0, r11
121
+ vpstttt
122
+ vldrbt.u8 q0, [r1], #4
123
+ vldrbt.u8 q1, [r2], #4
124
+ vldrbt.u8 q2, [r3], #4
125
+ vldrbt.u8 q3, [r4], #4
126
+ vmov.f64 d1, d4
127
+ vmov.f64 d3, d6
128
+ vrev64.32 q2, q0
129
+ vrev64.32 q3, q1
130
+ movw r0, #0xf0f
131
+ vmsr p0, r0
132
+ vpsel q0, q0, q3
133
+ vpsel q1, q2, q1
134
+ vmov q2, q0
135
+ vmov q3, q1
136
+ vshr.u8 q4, q0, #0x2
137
+ vsli.8 q0, q4, #0x1
138
+ vshr.u8 q4, q0, #0x3
139
+ vsli.8 q0, q4, #0x2
140
+ vshr.u8 q4, q0, #0x4
141
+ vsli.8 q0, q4, #0x3
142
+ vshr.u16 q4, q0, #0x8
143
+ vsli.8 q0, q4, #0x4
144
+ vshr.u32 q4, q0, #0x10
145
+ vsli.16 q0, q4, #0x8
146
+ vshr.u8 q4, q3, #0x2
147
+ vsli.8 q3, q4, #0x1
148
+ vshr.u8 q4, q3, #0x3
149
+ vsli.8 q3, q4, #0x2
150
+ vshr.u8 q4, q3, #0x4
151
+ vsli.8 q3, q4, #0x3
152
+ vshr.u16 q4, q3, #0x8
153
+ vsli.8 q3, q4, #0x4
154
+ vshr.u32 q4, q3, #0x10
155
+ vsli.16 q3, q4, #0x8
156
+ vsli.32 q0, q3, #0x10
157
+ vshl.i8 q4, q2, #0x2
158
+ vsri.8 q2, q4, #0x1
159
+ vshl.i8 q4, q2, #0x3
160
+ vsri.8 q2, q4, #0x2
161
+ vshl.i8 q4, q2, #0x4
162
+ vsri.8 q2, q4, #0x3
163
+ vshl.i16 q4, q2, #0x8
164
+ vsri.8 q2, q4, #0x4
165
+ vshl.i32 q4, q2, #0x10
166
+ vsri.16 q2, q4, #0x8
167
+ vshl.i8 q4, q1, #0x2
168
+ vsri.8 q1, q4, #0x1
169
+ vshl.i8 q4, q1, #0x3
170
+ vsri.8 q1, q4, #0x2
171
+ vshl.i8 q4, q1, #0x4
172
+ vsri.8 q1, q4, #0x3
173
+ vshl.i16 q4, q1, #0x8
174
+ vsri.8 q1, q4, #0x4
175
+ vshl.i32 q4, q1, #0x10
176
+ vsri.16 q1, q4, #0x8
177
+ vsri.32 q1, q2, #0x10
178
+ vldrw.u32 q4, [r8]
179
+ vldrw.u32 q5, [r7]
180
+ veor q4, q4, q0
181
+ veor q5, q5, q1
182
+ vstrw.32 q4, [r8], #16
183
+ vstrw.32 q5, [r7], #16
184
+ vmov q7[2], q7[0], r1, r3
185
+ vmov q7[3], q7[1], r2, r4
186
+ b Lkeccak_f1600_x4_state_xor_bytes_asm_main_body @ imm = #0xe
187
+
188
+ Lkeccak_f1600_x4_state_xor_bytes_asm_pre_main:
189
+ vmov q7[2], q7[0], r1, r3
190
+ vmov q7[3], q7[1], r2, r4
191
+ mov.w r0, #0x4
192
+ vsub.i32 q7, q7, r0
193
+
194
+ Lkeccak_f1600_x4_state_xor_bytes_asm_main_body:
195
+ lsr.w lr, r6, #0x3
196
+ wls lr, lr, Lkeccak_f1600_x4_state_xor_bytes_asm_main_loop_end @ imm = #0xd4
197
+
198
+ Lkeccak_f1600_x4_state_xor_bytes_asm_main_loop_start:
199
+ vldrw.u32 q0, [q7, #4]!
200
+ vldrw.u32 q1, [q7, #4]!
201
+ vmov q2, q0
202
+ vmov q3, q1
203
+ vshr.u8 q4, q0, #0x2
204
+ vsli.8 q0, q4, #0x1
205
+ vshr.u8 q4, q0, #0x3
206
+ vsli.8 q0, q4, #0x2
207
+ vshr.u8 q4, q0, #0x4
208
+ vsli.8 q0, q4, #0x3
209
+ vshr.u16 q4, q0, #0x8
210
+ vsli.8 q0, q4, #0x4
211
+ vshr.u32 q4, q0, #0x10
212
+ vsli.16 q0, q4, #0x8
213
+ vshr.u8 q4, q3, #0x2
214
+ vsli.8 q3, q4, #0x1
215
+ vshr.u8 q4, q3, #0x3
216
+ vsli.8 q3, q4, #0x2
217
+ vshr.u8 q4, q3, #0x4
218
+ vsli.8 q3, q4, #0x3
219
+ vshr.u16 q4, q3, #0x8
220
+ vsli.8 q3, q4, #0x4
221
+ vshr.u32 q4, q3, #0x10
222
+ vsli.16 q3, q4, #0x8
223
+ vsli.32 q0, q3, #0x10
224
+ vshl.i8 q4, q2, #0x2
225
+ vsri.8 q2, q4, #0x1
226
+ vshl.i8 q4, q2, #0x3
227
+ vsri.8 q2, q4, #0x2
228
+ vshl.i8 q4, q2, #0x4
229
+ vsri.8 q2, q4, #0x3
230
+ vshl.i16 q4, q2, #0x8
231
+ vsri.8 q2, q4, #0x4
232
+ vshl.i32 q4, q2, #0x10
233
+ vsri.16 q2, q4, #0x8
234
+ vshl.i8 q4, q1, #0x2
235
+ vsri.8 q1, q4, #0x1
236
+ vshl.i8 q4, q1, #0x3
237
+ vsri.8 q1, q4, #0x2
238
+ vshl.i8 q4, q1, #0x4
239
+ vsri.8 q1, q4, #0x3
240
+ vshl.i16 q4, q1, #0x8
241
+ vsri.8 q1, q4, #0x4
242
+ vshl.i32 q4, q1, #0x10
243
+ vsri.16 q1, q4, #0x8
244
+ vsri.32 q1, q2, #0x10
245
+ vldrw.u32 q4, [r8]
246
+ vldrw.u32 q5, [r7]
247
+ veor q4, q4, q0
248
+ veor q5, q5, q1
249
+ vstrw.32 q4, [r8], #16
250
+ vstrw.32 q5, [r7], #16
251
+ le lr, Lkeccak_f1600_x4_state_xor_bytes_asm_main_loop_start @ imm = #-0xd4
252
+
253
+ Lkeccak_f1600_x4_state_xor_bytes_asm_main_loop_end:
254
+ ands r6, r6, #0x7
255
+ beq.w Lkeccak_f1600_x4_state_xor_bytes_asm_exit @ imm = #0x110
256
+ mov.w r0, #0x4
257
+ vadd.i32 q7, q7, r0
258
+ vmov r1, r3, q7[2], q7[0]
259
+ vmov r2, r4, q7[3], q7[1]
260
+ vctp.8 r6
261
+ vpstttt
262
+ vldrbt.u8 q0, [r1]
263
+ vldrbt.u8 q1, [r2]
264
+ vldrbt.u8 q2, [r3]
265
+ vldrbt.u8 q3, [r4]
266
+ vmov.f64 d1, d4
267
+ vmov.f64 d3, d6
268
+ vrev64.32 q2, q0
269
+ vrev64.32 q3, q1
270
+ movw r0, #0xf0f
271
+ vmsr p0, r0
272
+ vpsel q0, q0, q3
273
+ vpsel q1, q2, q1
274
+ vmov q2, q0
275
+ vmov q3, q1
276
+ vshr.u8 q4, q0, #0x2
277
+ vsli.8 q0, q4, #0x1
278
+ vshr.u8 q4, q0, #0x3
279
+ vsli.8 q0, q4, #0x2
280
+ vshr.u8 q4, q0, #0x4
281
+ vsli.8 q0, q4, #0x3
282
+ vshr.u16 q4, q0, #0x8
283
+ vsli.8 q0, q4, #0x4
284
+ vshr.u32 q4, q0, #0x10
285
+ vsli.16 q0, q4, #0x8
286
+ vshr.u8 q4, q3, #0x2
287
+ vsli.8 q3, q4, #0x1
288
+ vshr.u8 q4, q3, #0x3
289
+ vsli.8 q3, q4, #0x2
290
+ vshr.u8 q4, q3, #0x4
291
+ vsli.8 q3, q4, #0x3
292
+ vshr.u16 q4, q3, #0x8
293
+ vsli.8 q3, q4, #0x4
294
+ vshr.u32 q4, q3, #0x10
295
+ vsli.16 q3, q4, #0x8
296
+ vsli.32 q0, q3, #0x10
297
+ vshl.i8 q4, q2, #0x2
298
+ vsri.8 q2, q4, #0x1
299
+ vshl.i8 q4, q2, #0x3
300
+ vsri.8 q2, q4, #0x2
301
+ vshl.i8 q4, q2, #0x4
302
+ vsri.8 q2, q4, #0x3
303
+ vshl.i16 q4, q2, #0x8
304
+ vsri.8 q2, q4, #0x4
305
+ vshl.i32 q4, q2, #0x10
306
+ vsri.16 q2, q4, #0x8
307
+ vshl.i8 q4, q1, #0x2
308
+ vsri.8 q1, q4, #0x1
309
+ vshl.i8 q4, q1, #0x3
310
+ vsri.8 q1, q4, #0x2
311
+ vshl.i8 q4, q1, #0x4
312
+ vsri.8 q1, q4, #0x3
313
+ vshl.i16 q4, q1, #0x8
314
+ vsri.8 q1, q4, #0x4
315
+ vshl.i32 q4, q1, #0x10
316
+ vsri.16 q1, q4, #0x8
317
+ vsri.32 q1, q2, #0x10
318
+ vldrw.u32 q4, [r8]
319
+ vldrw.u32 q5, [r7]
320
+ veor q4, q4, q0
321
+ veor q5, q5, q1
322
+ vstrw.32 q4, [r8], #16
323
+ vstrw.32 q5, [r7], #16
324
+
325
+ Lkeccak_f1600_x4_state_xor_bytes_asm_exit:
326
+ vpop {d8, d9, d10, d11, d12, d13, d14, d15}
327
+ .cfi_restore d8
328
+ .cfi_restore d9
329
+ .cfi_restore d10
330
+ .cfi_restore d11
331
+ .cfi_restore d12
332
+ .cfi_restore d13
333
+ .cfi_restore d14
334
+ .cfi_restore d15
335
+ .cfi_adjust_cfa_offset -0x40
336
+ pop.w {r4, r5, r6, r7, r8, r9, r10, r11, r12, pc}
337
+ .cfi_restore r4
338
+ .cfi_restore r5
339
+ .cfi_restore r6
340
+ .cfi_restore r7
341
+ .cfi_restore r8
342
+ .cfi_restore r9
343
+ .cfi_restore r10
344
+ .cfi_restore r11
345
+ .cfi_restore lr
346
+ .cfi_adjust_cfa_offset -0x28
347
+ .cfi_endproc
348
+
349
+ MLD_ASM_FN_SIZE(keccak_f1600_x4_state_xor_bytes_asm)
350
+
351
+ #endif /* MLD_FIPS202_ARMV81M_NEED_X4 && !MLD_CONFIG_MULTILEVEL_NO_SHARED */
352
+
353
+ #if defined(__ELF__)
354
+ .section .note.GNU-stack,"",%progbits
355
+ #endif
@@ -16,9 +16,14 @@
16
16
  #include "aarch64/auto.h"
17
17
  #endif
18
18
 
19
- #if defined(MLD_SYS_X86_64) && defined(MLD_SYS_X86_64_AVX2)
20
- #include "x86_64/xkcp.h"
21
- #endif
19
+ #if defined(MLD_SYS_X86_64) && defined(MLD_SYS_X86_64_AVX2) && \
20
+ (!defined(MLD_CONFIG_NO_KEYPAIR_API) || \
21
+ !defined(MLD_CONFIG_NO_SIGN_API) || !defined(MLD_CONFIG_REDUCE_RAM)) && \
22
+ !defined(MLD_CONFIG_SERIAL_FIPS202_ONLY)
23
+ #include "x86_64/keccak_f1600_x4_avx2.h"
24
+ #endif /* MLD_SYS_X86_64 && MLD_SYS_X86_64_AVX2 && (!MLD_CONFIG_NO_KEYPAIR_API \
25
+ || !MLD_CONFIG_NO_SIGN_API || !MLD_CONFIG_REDUCE_RAM) && \
26
+ !MLD_CONFIG_SERIAL_FIPS202_ONLY */
22
27
 
23
28
  /* We do not yet include the FIPS202 backend for Armv8.1-M+MVE by default
24
29
  * as it is still experimental and undergoing review. */
@@ -4,18 +4,19 @@
4
4
  * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
5
5
  */
6
6
 
7
- #ifndef MLD_FIPS202_NATIVE_X86_64_XKCP_H
8
- #define MLD_FIPS202_NATIVE_X86_64_XKCP_H
7
+ #ifndef MLD_FIPS202_NATIVE_X86_64_KECCAK_F1600_X4_AVX2_H
8
+ #define MLD_FIPS202_NATIVE_X86_64_KECCAK_F1600_X4_AVX2_H
9
9
 
10
10
  #include "../../../common.h"
11
11
 
12
- #define MLD_FIPS202_X86_64_XKCP
12
+ #define MLD_FIPS202_X86_64_NEED_X4_AVX2
13
+
14
+ /* Part of backend API */
15
+ #define MLD_USE_FIPS202_X4_NATIVE
13
16
 
14
17
  #if !defined(__ASSEMBLER__)
15
18
  #include "../api.h"
16
- #include "src/KeccakP_1600_times4_SIMD256.h"
17
-
18
- #define MLD_USE_FIPS202_X4_NATIVE
19
+ #include "src/fips202_native_x86_64.h"
19
20
  MLD_MUST_CHECK_RETURN_VALUE
20
21
  static MLD_INLINE int mld_keccak_f1600_x4_native(uint64_t *state)
21
22
  {
@@ -23,9 +24,11 @@ static MLD_INLINE int mld_keccak_f1600_x4_native(uint64_t *state)
23
24
  {
24
25
  return MLD_NATIVE_FUNC_FALLBACK;
25
26
  }
26
- mld_keccakf1600x4_permute24(state);
27
+
28
+ mld_keccak_f1600_x4_avx2_asm(state, mld_keccakf1600_round_constants,
29
+ mld_keccak_rho8, mld_keccak_rho56);
27
30
  return MLD_NATIVE_FUNC_SUCCESS;
28
31
  }
29
32
  #endif /* !__ASSEMBLER__ */
30
33
 
31
- #endif /* !MLD_FIPS202_NATIVE_X86_64_XKCP_H */
34
+ #endif /* !MLD_FIPS202_NATIVE_X86_64_KECCAK_F1600_X4_AVX2_H */
@@ -0,0 +1,44 @@
1
+ /*
2
+ * Copyright (c) The mlkem-native project authors
3
+ * Copyright (c) The mldsa-native project authors
4
+ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
5
+ */
6
+
7
+ #ifndef MLD_FIPS202_NATIVE_X86_64_SRC_FIPS202_NATIVE_X86_64_H
8
+ #define MLD_FIPS202_NATIVE_X86_64_SRC_FIPS202_NATIVE_X86_64_H
9
+
10
+ #include "../../../../cbmc.h"
11
+ #include "../../../../common.h"
12
+
13
+ /* TODO: Reconsider whether this check is needed -- x86_64 is always
14
+ * little-endian, so the backend selection already implies this. */
15
+ #ifndef MLD_SYS_LITTLE_ENDIAN
16
+ #error Expecting a little-endian platform
17
+ #endif
18
+
19
+ #define mld_keccakf1600_round_constants \
20
+ MLD_NAMESPACE(keccakf1600_round_constants)
21
+ MLD_INTERNAL_DATA_DECLARATION const uint64_t
22
+ mld_keccakf1600_round_constants[24];
23
+
24
+ #define mld_keccak_rho8 MLD_NAMESPACE(keccak_rho8)
25
+ MLD_INTERNAL_DATA_DECLARATION const uint64_t mld_keccak_rho8[4];
26
+
27
+ #define mld_keccak_rho56 MLD_NAMESPACE(keccak_rho56)
28
+ MLD_INTERNAL_DATA_DECLARATION const uint64_t mld_keccak_rho56[4];
29
+
30
+ #define mld_keccak_f1600_x4_avx2_asm MLD_NAMESPACE(keccak_f1600_x4_avx2_asm)
31
+ void mld_keccak_f1600_x4_avx2_asm(uint64_t states[100], const uint64_t rc[24],
32
+ const uint64_t rho8[4],
33
+ const uint64_t rho56[4])
34
+ /* This must be kept in sync with the HOL-Light specification
35
+ * in proofs/hol_light/x86_64/proofs/keccak_f1600_x4_avx2_asm.ml */
36
+ __contract__(
37
+ requires(memory_no_alias(states, sizeof(uint64_t) * 25 * 4))
38
+ requires(rc == mld_keccakf1600_round_constants)
39
+ requires(rho8 == mld_keccak_rho8)
40
+ requires(rho56 == mld_keccak_rho56)
41
+ assigns(memory_slice(states, sizeof(uint64_t) * 25 * 4))
42
+ );
43
+
44
+ #endif /* !MLD_FIPS202_NATIVE_X86_64_SRC_FIPS202_NATIVE_X86_64_H */