pq_crypto 0.6.1 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/SECURITY.md +7 -0
  4. data/ext/pqcrypto/pqcrypto_version.h +1 -1
  5. data/ext/pqcrypto/vendor/.vendored +4 -4
  6. data/ext/pqcrypto/vendor/mldsa-native/README.md +23 -10
  7. data/ext/pqcrypto/vendor/mldsa-native/mldsa/README.md +23 -0
  8. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.c +114 -58
  9. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.h +498 -461
  10. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_asm.S +145 -85
  11. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_config.h +456 -422
  12. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/cbmc.h +47 -25
  13. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/common.h +26 -14
  14. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/ct.h +56 -81
  15. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/debug.h +17 -24
  16. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.c +33 -40
  17. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.h +67 -87
  18. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.c +19 -14
  19. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.h +13 -5
  20. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.c +84 -10
  21. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.h +10 -5
  22. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/auto.h +6 -0
  23. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/fips202_native_aarch64.h +22 -15
  24. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_aarch64_asm.S +376 -0
  25. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_aarch64_asm.S +204 -0
  26. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_aarch64_asm.S +259 -0
  27. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_aarch64_asm.S +1077 -0
  28. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_aarch64_asm.S +987 -0
  29. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccakf1600_round_constants.c +16 -10
  30. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_scalar.h +2 -1
  31. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_v84a.h +1 -1
  32. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x2_v84a.h +4 -2
  33. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_scalar.h +2 -2
  34. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_v84a_scalar.h +1 -1
  35. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/api.h +60 -0
  36. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/mve.h +48 -0
  37. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/fips202_native_armv81m.h +18 -1
  38. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S +658 -582
  39. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.c +5 -100
  40. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccakf1600_round_constants.c +26 -25
  41. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/state_extract_bytes_x4_mve.S +334 -0
  42. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/state_xor_bytes_x4_mve.S +355 -0
  43. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/auto.h +8 -3
  44. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/{xkcp.h → keccak_f1600_x4_avx2.h} +11 -8
  45. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/fips202_native_x86_64.h +44 -0
  46. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/keccak_f1600_x4_avx2_asm.S +454 -0
  47. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/keccakf1600_constants.c +52 -0
  48. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/meta.h +37 -28
  49. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/aarch64_zetas.c +213 -196
  50. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/arith_native_aarch64.h +248 -64
  51. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/intt_aarch64_asm.S +753 -0
  52. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l4_aarch64_asm.S +129 -0
  53. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l5_aarch64_asm.S +145 -0
  54. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l7_aarch64_asm.S +177 -0
  55. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/ntt_aarch64_asm.S +653 -0
  56. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/pointwise_montgomery_aarch64_asm.S +84 -0
  57. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_caddq_aarch64_asm.S +53 -0
  58. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_chknorm_aarch64_asm.S +55 -0
  59. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_32_aarch64_asm.S +86 -0
  60. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_88_aarch64_asm.S +86 -0
  61. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_32_aarch64_asm.S +103 -0
  62. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_88_aarch64_asm.S +111 -0
  63. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_17_aarch64_asm.S +75 -0
  64. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_19_aarch64_asm.S +72 -0
  65. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_table.c +23 -11
  66. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_aarch64_asm.S +189 -0
  67. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta2_aarch64_asm.S +137 -0
  68. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta4_aarch64_asm.S +130 -0
  69. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta_table.c +520 -516
  70. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_table.c +34 -33
  71. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/api.h +202 -242
  72. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/meta.h +25 -17
  73. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/arith_native_x86_64.h +112 -28
  74. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.c +1 -1
  75. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.h +1 -1
  76. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/intt_avx2_asm.S +2311 -0
  77. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/ntt_avx2_asm.S +2383 -0
  78. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/nttunpack_avx2_asm.S +238 -0
  79. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l4_avx2_asm.S +139 -0
  80. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l5_avx2_asm.S +155 -0
  81. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l7_avx2_asm.S +187 -0
  82. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_avx2_asm.S +130 -0
  83. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_caddq_avx2_asm.S +190 -0
  84. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_32_avx2.c +6 -4
  85. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_88_avx2.c +6 -4
  86. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_32_avx2.c +9 -8
  87. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_88_avx2.c +10 -9
  88. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_17_avx2.c +8 -5
  89. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_19_avx2.c +8 -5
  90. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta2_avx2.c +6 -4
  91. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta4_avx2.c +6 -4
  92. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_table.c +130 -129
  93. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.c +109 -180
  94. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.h +169 -150
  95. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.c +56 -40
  96. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.h +149 -164
  97. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.c +52 -57
  98. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.h +132 -167
  99. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.c +57 -424
  100. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.h +167 -474
  101. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec_lazy.c +308 -0
  102. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec_lazy.h +653 -0
  103. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/reduce.h +22 -29
  104. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/rounding.h +37 -43
  105. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.c +511 -367
  106. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.h +456 -417
  107. data/lib/pq_crypto/version.rb +1 -1
  108. data/script/vendor_libs.rb +3 -3
  109. metadata +41 -35
  110. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_asm.S +0 -376
  111. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_asm.S +0 -204
  112. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_asm.S +0 -259
  113. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_asm.S +0 -1077
  114. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S +0 -987
  115. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.c +0 -488
  116. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.h +0 -16
  117. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/intt.S +0 -753
  118. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l4.S +0 -129
  119. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l5.S +0 -145
  120. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l7.S +0 -177
  121. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/ntt.S +0 -653
  122. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/pointwise_montgomery.S +0 -79
  123. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_caddq_asm.S +0 -53
  124. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_chknorm_asm.S +0 -55
  125. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_32_asm.S +0 -85
  126. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_88_asm.S +0 -85
  127. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_32_asm.S +0 -102
  128. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_88_asm.S +0 -110
  129. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_17_asm.S +0 -72
  130. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_19_asm.S +0 -69
  131. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_asm.S +0 -189
  132. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta2_asm.S +0 -135
  133. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta4_asm.S +0 -128
  134. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/intt.S +0 -2311
  135. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/ntt.S +0 -2383
  136. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/nttunpack.S +0 -239
  137. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise.S +0 -131
  138. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l4.S +0 -139
  139. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l5.S +0 -155
  140. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l7.S +0 -187
  141. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_caddq_avx2.c +0 -61
@@ -0,0 +1,129 @@
1
+ /* Copyright (c) The mldsa-native project authors
2
+ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
3
+ */
4
+
5
+ #include "../../../common.h"
6
+ #if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \
7
+ (defined(MLD_CONFIG_MULTILEVEL_WITH_SHARED) || MLDSA_L == 4)
8
+
9
+ /*
10
+ * WARNING: This file is auto-derived from the mldsa-native source file
11
+ * dev/aarch64_opt/src/mld_polyvecl_pointwise_acc_montgomery_l4_aarch64_asm.S using scripts/simpasm. Do not modify it directly.
12
+ */
13
+
14
+ .text
15
+ .balign 4
16
+ .global MLD_ASM_NAMESPACE(polyvecl_pointwise_acc_montgomery_l4_aarch64_asm)
17
+ MLD_ASM_FN_SYMBOL(polyvecl_pointwise_acc_montgomery_l4_aarch64_asm)
18
+
19
+ .cfi_startproc
20
+ mov w3, #0xe001 // =57345
21
+ movk w3, #0x7f, lsl #16
22
+ dup v0.4s, w3
23
+ mov w3, #0x2001 // =8193
24
+ movk w3, #0x380, lsl #16
25
+ dup v1.4s, w3
26
+ mov x3, #0x40 // =64
27
+
28
+ Lpolyvecl_pointwise_acc_montgomery_l4_loop_start:
29
+ ldr q17, [x1, #0x10]
30
+ ldr q18, [x1, #0x20]
31
+ ldr q19, [x1, #0x30]
32
+ ldr q16, [x1], #0x40
33
+ ldr q21, [x2, #0x10]
34
+ ldr q22, [x2, #0x20]
35
+ ldr q23, [x2, #0x30]
36
+ ldr q20, [x2], #0x40
37
+ smull v24.2d, v16.2s, v20.2s
38
+ smull2 v25.2d, v16.4s, v20.4s
39
+ smull v26.2d, v17.2s, v21.2s
40
+ smull2 v27.2d, v17.4s, v21.4s
41
+ smull v28.2d, v18.2s, v22.2s
42
+ smull2 v29.2d, v18.4s, v22.4s
43
+ smull v30.2d, v19.2s, v23.2s
44
+ smull2 v31.2d, v19.4s, v23.4s
45
+ ldr q16, [x1, #0x3c0]
46
+ ldr q17, [x1, #0x3d0]
47
+ ldr q18, [x1, #0x3e0]
48
+ ldr q19, [x1, #0x3f0]
49
+ ldr q20, [x2, #0x3c0]
50
+ ldr q21, [x2, #0x3d0]
51
+ ldr q22, [x2, #0x3e0]
52
+ ldr q23, [x2, #0x3f0]
53
+ smlal v24.2d, v16.2s, v20.2s
54
+ smlal2 v25.2d, v16.4s, v20.4s
55
+ smlal v26.2d, v17.2s, v21.2s
56
+ smlal2 v27.2d, v17.4s, v21.4s
57
+ smlal v28.2d, v18.2s, v22.2s
58
+ smlal2 v29.2d, v18.4s, v22.4s
59
+ smlal v30.2d, v19.2s, v23.2s
60
+ smlal2 v31.2d, v19.4s, v23.4s
61
+ ldr q16, [x1, #0x7c0]
62
+ ldr q17, [x1, #0x7d0]
63
+ ldr q18, [x1, #0x7e0]
64
+ ldr q19, [x1, #0x7f0]
65
+ ldr q20, [x2, #0x7c0]
66
+ ldr q21, [x2, #0x7d0]
67
+ ldr q22, [x2, #0x7e0]
68
+ ldr q23, [x2, #0x7f0]
69
+ smlal v24.2d, v16.2s, v20.2s
70
+ smlal2 v25.2d, v16.4s, v20.4s
71
+ smlal v26.2d, v17.2s, v21.2s
72
+ smlal2 v27.2d, v17.4s, v21.4s
73
+ smlal v28.2d, v18.2s, v22.2s
74
+ smlal2 v29.2d, v18.4s, v22.4s
75
+ smlal v30.2d, v19.2s, v23.2s
76
+ smlal2 v31.2d, v19.4s, v23.4s
77
+ ldr q16, [x1, #0xbc0]
78
+ ldr q17, [x1, #0xbd0]
79
+ ldr q18, [x1, #0xbe0]
80
+ ldr q19, [x1, #0xbf0]
81
+ ldr q20, [x2, #0xbc0]
82
+ ldr q21, [x2, #0xbd0]
83
+ ldr q22, [x2, #0xbe0]
84
+ ldr q23, [x2, #0xbf0]
85
+ smlal v24.2d, v16.2s, v20.2s
86
+ smlal2 v25.2d, v16.4s, v20.4s
87
+ smlal v26.2d, v17.2s, v21.2s
88
+ smlal2 v27.2d, v17.4s, v21.4s
89
+ smlal v28.2d, v18.2s, v22.2s
90
+ smlal2 v29.2d, v18.4s, v22.4s
91
+ smlal v30.2d, v19.2s, v23.2s
92
+ smlal2 v31.2d, v19.4s, v23.4s
93
+ uzp1 v16.4s, v24.4s, v25.4s
94
+ mul v16.4s, v16.4s, v1.4s
95
+ smlsl v24.2d, v16.2s, v0.2s
96
+ smlsl2 v25.2d, v16.4s, v0.4s
97
+ uzp2 v16.4s, v24.4s, v25.4s
98
+ uzp1 v17.4s, v26.4s, v27.4s
99
+ mul v17.4s, v17.4s, v1.4s
100
+ smlsl v26.2d, v17.2s, v0.2s
101
+ smlsl2 v27.2d, v17.4s, v0.4s
102
+ uzp2 v17.4s, v26.4s, v27.4s
103
+ uzp1 v18.4s, v28.4s, v29.4s
104
+ mul v18.4s, v18.4s, v1.4s
105
+ smlsl v28.2d, v18.2s, v0.2s
106
+ smlsl2 v29.2d, v18.4s, v0.4s
107
+ uzp2 v18.4s, v28.4s, v29.4s
108
+ uzp1 v19.4s, v30.4s, v31.4s
109
+ mul v19.4s, v19.4s, v1.4s
110
+ smlsl v30.2d, v19.2s, v0.2s
111
+ smlsl2 v31.2d, v19.4s, v0.4s
112
+ uzp2 v19.4s, v30.4s, v31.4s
113
+ str q17, [x0, #0x10]
114
+ str q18, [x0, #0x20]
115
+ str q19, [x0, #0x30]
116
+ str q16, [x0], #0x40
117
+ subs x3, x3, #0x4
118
+ cbnz x3, Lpolyvecl_pointwise_acc_montgomery_l4_loop_start
119
+ ret
120
+ .cfi_endproc
121
+
122
+ MLD_ASM_FN_SIZE(polyvecl_pointwise_acc_montgomery_l4_aarch64_asm)
123
+
124
+ #endif /* MLD_ARITH_BACKEND_AARCH64 && !MLD_CONFIG_MULTILEVEL_NO_SHARED && \
125
+ (MLD_CONFIG_MULTILEVEL_WITH_SHARED || MLDSA_L == 4) */
126
+
127
+ #if defined(__ELF__)
128
+ .section .note.GNU-stack,"",%progbits
129
+ #endif
@@ -0,0 +1,145 @@
1
+ /* Copyright (c) The mldsa-native project authors
2
+ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
3
+ */
4
+
5
+ #include "../../../common.h"
6
+ #if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \
7
+ (defined(MLD_CONFIG_MULTILEVEL_WITH_SHARED) || MLDSA_L == 5)
8
+
9
+ /*
10
+ * WARNING: This file is auto-derived from the mldsa-native source file
11
+ * dev/aarch64_opt/src/mld_polyvecl_pointwise_acc_montgomery_l5_aarch64_asm.S using scripts/simpasm. Do not modify it directly.
12
+ */
13
+
14
+ .text
15
+ .balign 4
16
+ .global MLD_ASM_NAMESPACE(polyvecl_pointwise_acc_montgomery_l5_aarch64_asm)
17
+ MLD_ASM_FN_SYMBOL(polyvecl_pointwise_acc_montgomery_l5_aarch64_asm)
18
+
19
+ .cfi_startproc
20
+ mov w3, #0xe001 // =57345
21
+ movk w3, #0x7f, lsl #16
22
+ dup v0.4s, w3
23
+ mov w3, #0x2001 // =8193
24
+ movk w3, #0x380, lsl #16
25
+ dup v1.4s, w3
26
+ mov x3, #0x40 // =64
27
+
28
+ Lpolyvecl_pointwise_acc_montgomery_l5_loop_start:
29
+ ldr q17, [x1, #0x10]
30
+ ldr q18, [x1, #0x20]
31
+ ldr q19, [x1, #0x30]
32
+ ldr q16, [x1], #0x40
33
+ ldr q21, [x2, #0x10]
34
+ ldr q22, [x2, #0x20]
35
+ ldr q23, [x2, #0x30]
36
+ ldr q20, [x2], #0x40
37
+ smull v24.2d, v16.2s, v20.2s
38
+ smull2 v25.2d, v16.4s, v20.4s
39
+ smull v26.2d, v17.2s, v21.2s
40
+ smull2 v27.2d, v17.4s, v21.4s
41
+ smull v28.2d, v18.2s, v22.2s
42
+ smull2 v29.2d, v18.4s, v22.4s
43
+ smull v30.2d, v19.2s, v23.2s
44
+ smull2 v31.2d, v19.4s, v23.4s
45
+ ldr q16, [x1, #0x3c0]
46
+ ldr q17, [x1, #0x3d0]
47
+ ldr q18, [x1, #0x3e0]
48
+ ldr q19, [x1, #0x3f0]
49
+ ldr q20, [x2, #0x3c0]
50
+ ldr q21, [x2, #0x3d0]
51
+ ldr q22, [x2, #0x3e0]
52
+ ldr q23, [x2, #0x3f0]
53
+ smlal v24.2d, v16.2s, v20.2s
54
+ smlal2 v25.2d, v16.4s, v20.4s
55
+ smlal v26.2d, v17.2s, v21.2s
56
+ smlal2 v27.2d, v17.4s, v21.4s
57
+ smlal v28.2d, v18.2s, v22.2s
58
+ smlal2 v29.2d, v18.4s, v22.4s
59
+ smlal v30.2d, v19.2s, v23.2s
60
+ smlal2 v31.2d, v19.4s, v23.4s
61
+ ldr q16, [x1, #0x7c0]
62
+ ldr q17, [x1, #0x7d0]
63
+ ldr q18, [x1, #0x7e0]
64
+ ldr q19, [x1, #0x7f0]
65
+ ldr q20, [x2, #0x7c0]
66
+ ldr q21, [x2, #0x7d0]
67
+ ldr q22, [x2, #0x7e0]
68
+ ldr q23, [x2, #0x7f0]
69
+ smlal v24.2d, v16.2s, v20.2s
70
+ smlal2 v25.2d, v16.4s, v20.4s
71
+ smlal v26.2d, v17.2s, v21.2s
72
+ smlal2 v27.2d, v17.4s, v21.4s
73
+ smlal v28.2d, v18.2s, v22.2s
74
+ smlal2 v29.2d, v18.4s, v22.4s
75
+ smlal v30.2d, v19.2s, v23.2s
76
+ smlal2 v31.2d, v19.4s, v23.4s
77
+ ldr q16, [x1, #0xbc0]
78
+ ldr q17, [x1, #0xbd0]
79
+ ldr q18, [x1, #0xbe0]
80
+ ldr q19, [x1, #0xbf0]
81
+ ldr q20, [x2, #0xbc0]
82
+ ldr q21, [x2, #0xbd0]
83
+ ldr q22, [x2, #0xbe0]
84
+ ldr q23, [x2, #0xbf0]
85
+ smlal v24.2d, v16.2s, v20.2s
86
+ smlal2 v25.2d, v16.4s, v20.4s
87
+ smlal v26.2d, v17.2s, v21.2s
88
+ smlal2 v27.2d, v17.4s, v21.4s
89
+ smlal v28.2d, v18.2s, v22.2s
90
+ smlal2 v29.2d, v18.4s, v22.4s
91
+ smlal v30.2d, v19.2s, v23.2s
92
+ smlal2 v31.2d, v19.4s, v23.4s
93
+ ldr q16, [x1, #0xfc0]
94
+ ldr q17, [x1, #0xfd0]
95
+ ldr q18, [x1, #0xfe0]
96
+ ldr q19, [x1, #0xff0]
97
+ ldr q20, [x2, #0xfc0]
98
+ ldr q21, [x2, #0xfd0]
99
+ ldr q22, [x2, #0xfe0]
100
+ ldr q23, [x2, #0xff0]
101
+ smlal v24.2d, v16.2s, v20.2s
102
+ smlal2 v25.2d, v16.4s, v20.4s
103
+ smlal v26.2d, v17.2s, v21.2s
104
+ smlal2 v27.2d, v17.4s, v21.4s
105
+ smlal v28.2d, v18.2s, v22.2s
106
+ smlal2 v29.2d, v18.4s, v22.4s
107
+ smlal v30.2d, v19.2s, v23.2s
108
+ smlal2 v31.2d, v19.4s, v23.4s
109
+ uzp1 v16.4s, v24.4s, v25.4s
110
+ mul v16.4s, v16.4s, v1.4s
111
+ smlsl v24.2d, v16.2s, v0.2s
112
+ smlsl2 v25.2d, v16.4s, v0.4s
113
+ uzp2 v16.4s, v24.4s, v25.4s
114
+ uzp1 v17.4s, v26.4s, v27.4s
115
+ mul v17.4s, v17.4s, v1.4s
116
+ smlsl v26.2d, v17.2s, v0.2s
117
+ smlsl2 v27.2d, v17.4s, v0.4s
118
+ uzp2 v17.4s, v26.4s, v27.4s
119
+ uzp1 v18.4s, v28.4s, v29.4s
120
+ mul v18.4s, v18.4s, v1.4s
121
+ smlsl v28.2d, v18.2s, v0.2s
122
+ smlsl2 v29.2d, v18.4s, v0.4s
123
+ uzp2 v18.4s, v28.4s, v29.4s
124
+ uzp1 v19.4s, v30.4s, v31.4s
125
+ mul v19.4s, v19.4s, v1.4s
126
+ smlsl v30.2d, v19.2s, v0.2s
127
+ smlsl2 v31.2d, v19.4s, v0.4s
128
+ uzp2 v19.4s, v30.4s, v31.4s
129
+ str q17, [x0, #0x10]
130
+ str q18, [x0, #0x20]
131
+ str q19, [x0, #0x30]
132
+ str q16, [x0], #0x40
133
+ subs x3, x3, #0x4
134
+ cbnz x3, Lpolyvecl_pointwise_acc_montgomery_l5_loop_start
135
+ ret
136
+ .cfi_endproc
137
+
138
+ MLD_ASM_FN_SIZE(polyvecl_pointwise_acc_montgomery_l5_aarch64_asm)
139
+
140
+ #endif /* MLD_ARITH_BACKEND_AARCH64 && !MLD_CONFIG_MULTILEVEL_NO_SHARED && \
141
+ (MLD_CONFIG_MULTILEVEL_WITH_SHARED || MLDSA_L == 5) */
142
+
143
+ #if defined(__ELF__)
144
+ .section .note.GNU-stack,"",%progbits
145
+ #endif
@@ -0,0 +1,177 @@
1
+ /* Copyright (c) The mldsa-native project authors
2
+ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
3
+ */
4
+
5
+ #include "../../../common.h"
6
+ #if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \
7
+ (defined(MLD_CONFIG_MULTILEVEL_WITH_SHARED) || MLDSA_L == 7)
8
+
9
+ /*
10
+ * WARNING: This file is auto-derived from the mldsa-native source file
11
+ * dev/aarch64_opt/src/mld_polyvecl_pointwise_acc_montgomery_l7_aarch64_asm.S using scripts/simpasm. Do not modify it directly.
12
+ */
13
+
14
+ .text
15
+ .balign 4
16
+ .global MLD_ASM_NAMESPACE(polyvecl_pointwise_acc_montgomery_l7_aarch64_asm)
17
+ MLD_ASM_FN_SYMBOL(polyvecl_pointwise_acc_montgomery_l7_aarch64_asm)
18
+
19
+ .cfi_startproc
20
+ mov w3, #0xe001 // =57345
21
+ movk w3, #0x7f, lsl #16
22
+ dup v0.4s, w3
23
+ mov w3, #0x2001 // =8193
24
+ movk w3, #0x380, lsl #16
25
+ dup v1.4s, w3
26
+ mov x3, #0x40 // =64
27
+
28
+ Lpolyvecl_pointwise_acc_montgomery_l7_loop_start:
29
+ ldr q17, [x1, #0x10]
30
+ ldr q18, [x1, #0x20]
31
+ ldr q19, [x1, #0x30]
32
+ ldr q16, [x1], #0x40
33
+ ldr q21, [x2, #0x10]
34
+ ldr q22, [x2, #0x20]
35
+ ldr q23, [x2, #0x30]
36
+ ldr q20, [x2], #0x40
37
+ smull v24.2d, v16.2s, v20.2s
38
+ smull2 v25.2d, v16.4s, v20.4s
39
+ smull v26.2d, v17.2s, v21.2s
40
+ smull2 v27.2d, v17.4s, v21.4s
41
+ smull v28.2d, v18.2s, v22.2s
42
+ smull2 v29.2d, v18.4s, v22.4s
43
+ smull v30.2d, v19.2s, v23.2s
44
+ smull2 v31.2d, v19.4s, v23.4s
45
+ ldr q16, [x1, #0x3c0]
46
+ ldr q17, [x1, #0x3d0]
47
+ ldr q18, [x1, #0x3e0]
48
+ ldr q19, [x1, #0x3f0]
49
+ ldr q20, [x2, #0x3c0]
50
+ ldr q21, [x2, #0x3d0]
51
+ ldr q22, [x2, #0x3e0]
52
+ ldr q23, [x2, #0x3f0]
53
+ smlal v24.2d, v16.2s, v20.2s
54
+ smlal2 v25.2d, v16.4s, v20.4s
55
+ smlal v26.2d, v17.2s, v21.2s
56
+ smlal2 v27.2d, v17.4s, v21.4s
57
+ smlal v28.2d, v18.2s, v22.2s
58
+ smlal2 v29.2d, v18.4s, v22.4s
59
+ smlal v30.2d, v19.2s, v23.2s
60
+ smlal2 v31.2d, v19.4s, v23.4s
61
+ ldr q16, [x1, #0x7c0]
62
+ ldr q17, [x1, #0x7d0]
63
+ ldr q18, [x1, #0x7e0]
64
+ ldr q19, [x1, #0x7f0]
65
+ ldr q20, [x2, #0x7c0]
66
+ ldr q21, [x2, #0x7d0]
67
+ ldr q22, [x2, #0x7e0]
68
+ ldr q23, [x2, #0x7f0]
69
+ smlal v24.2d, v16.2s, v20.2s
70
+ smlal2 v25.2d, v16.4s, v20.4s
71
+ smlal v26.2d, v17.2s, v21.2s
72
+ smlal2 v27.2d, v17.4s, v21.4s
73
+ smlal v28.2d, v18.2s, v22.2s
74
+ smlal2 v29.2d, v18.4s, v22.4s
75
+ smlal v30.2d, v19.2s, v23.2s
76
+ smlal2 v31.2d, v19.4s, v23.4s
77
+ ldr q16, [x1, #0xbc0]
78
+ ldr q17, [x1, #0xbd0]
79
+ ldr q18, [x1, #0xbe0]
80
+ ldr q19, [x1, #0xbf0]
81
+ ldr q20, [x2, #0xbc0]
82
+ ldr q21, [x2, #0xbd0]
83
+ ldr q22, [x2, #0xbe0]
84
+ ldr q23, [x2, #0xbf0]
85
+ smlal v24.2d, v16.2s, v20.2s
86
+ smlal2 v25.2d, v16.4s, v20.4s
87
+ smlal v26.2d, v17.2s, v21.2s
88
+ smlal2 v27.2d, v17.4s, v21.4s
89
+ smlal v28.2d, v18.2s, v22.2s
90
+ smlal2 v29.2d, v18.4s, v22.4s
91
+ smlal v30.2d, v19.2s, v23.2s
92
+ smlal2 v31.2d, v19.4s, v23.4s
93
+ ldr q16, [x1, #0xfc0]
94
+ ldr q17, [x1, #0xfd0]
95
+ ldr q18, [x1, #0xfe0]
96
+ ldr q19, [x1, #0xff0]
97
+ ldr q20, [x2, #0xfc0]
98
+ ldr q21, [x2, #0xfd0]
99
+ ldr q22, [x2, #0xfe0]
100
+ ldr q23, [x2, #0xff0]
101
+ smlal v24.2d, v16.2s, v20.2s
102
+ smlal2 v25.2d, v16.4s, v20.4s
103
+ smlal v26.2d, v17.2s, v21.2s
104
+ smlal2 v27.2d, v17.4s, v21.4s
105
+ smlal v28.2d, v18.2s, v22.2s
106
+ smlal2 v29.2d, v18.4s, v22.4s
107
+ smlal v30.2d, v19.2s, v23.2s
108
+ smlal2 v31.2d, v19.4s, v23.4s
109
+ ldr q16, [x1, #0x13c0]
110
+ ldr q17, [x1, #0x13d0]
111
+ ldr q18, [x1, #0x13e0]
112
+ ldr q19, [x1, #0x13f0]
113
+ ldr q20, [x2, #0x13c0]
114
+ ldr q21, [x2, #0x13d0]
115
+ ldr q22, [x2, #0x13e0]
116
+ ldr q23, [x2, #0x13f0]
117
+ smlal v24.2d, v16.2s, v20.2s
118
+ smlal2 v25.2d, v16.4s, v20.4s
119
+ smlal v26.2d, v17.2s, v21.2s
120
+ smlal2 v27.2d, v17.4s, v21.4s
121
+ smlal v28.2d, v18.2s, v22.2s
122
+ smlal2 v29.2d, v18.4s, v22.4s
123
+ smlal v30.2d, v19.2s, v23.2s
124
+ smlal2 v31.2d, v19.4s, v23.4s
125
+ ldr q16, [x1, #0x17c0]
126
+ ldr q17, [x1, #0x17d0]
127
+ ldr q18, [x1, #0x17e0]
128
+ ldr q19, [x1, #0x17f0]
129
+ ldr q20, [x2, #0x17c0]
130
+ ldr q21, [x2, #0x17d0]
131
+ ldr q22, [x2, #0x17e0]
132
+ ldr q23, [x2, #0x17f0]
133
+ smlal v24.2d, v16.2s, v20.2s
134
+ smlal2 v25.2d, v16.4s, v20.4s
135
+ smlal v26.2d, v17.2s, v21.2s
136
+ smlal2 v27.2d, v17.4s, v21.4s
137
+ smlal v28.2d, v18.2s, v22.2s
138
+ smlal2 v29.2d, v18.4s, v22.4s
139
+ smlal v30.2d, v19.2s, v23.2s
140
+ smlal2 v31.2d, v19.4s, v23.4s
141
+ uzp1 v16.4s, v24.4s, v25.4s
142
+ mul v16.4s, v16.4s, v1.4s
143
+ smlsl v24.2d, v16.2s, v0.2s
144
+ smlsl2 v25.2d, v16.4s, v0.4s
145
+ uzp2 v16.4s, v24.4s, v25.4s
146
+ uzp1 v17.4s, v26.4s, v27.4s
147
+ mul v17.4s, v17.4s, v1.4s
148
+ smlsl v26.2d, v17.2s, v0.2s
149
+ smlsl2 v27.2d, v17.4s, v0.4s
150
+ uzp2 v17.4s, v26.4s, v27.4s
151
+ uzp1 v18.4s, v28.4s, v29.4s
152
+ mul v18.4s, v18.4s, v1.4s
153
+ smlsl v28.2d, v18.2s, v0.2s
154
+ smlsl2 v29.2d, v18.4s, v0.4s
155
+ uzp2 v18.4s, v28.4s, v29.4s
156
+ uzp1 v19.4s, v30.4s, v31.4s
157
+ mul v19.4s, v19.4s, v1.4s
158
+ smlsl v30.2d, v19.2s, v0.2s
159
+ smlsl2 v31.2d, v19.4s, v0.4s
160
+ uzp2 v19.4s, v30.4s, v31.4s
161
+ str q17, [x0, #0x10]
162
+ str q18, [x0, #0x20]
163
+ str q19, [x0, #0x30]
164
+ str q16, [x0], #0x40
165
+ subs x3, x3, #0x4
166
+ cbnz x3, Lpolyvecl_pointwise_acc_montgomery_l7_loop_start
167
+ ret
168
+ .cfi_endproc
169
+
170
+ MLD_ASM_FN_SIZE(polyvecl_pointwise_acc_montgomery_l7_aarch64_asm)
171
+
172
+ #endif /* MLD_ARITH_BACKEND_AARCH64 && !MLD_CONFIG_MULTILEVEL_NO_SHARED && \
173
+ (MLD_CONFIG_MULTILEVEL_WITH_SHARED || MLDSA_L == 7) */
174
+
175
+ #if defined(__ELF__)
176
+ .section .note.GNU-stack,"",%progbits
177
+ #endif