pq_crypto 0.6.0 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (154) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +12 -0
  3. data/SECURITY.md +7 -0
  4. data/ext/pqcrypto/extconf.rb +2 -0
  5. data/ext/pqcrypto/pqcrypto_ruby_secure.c +139 -0
  6. data/ext/pqcrypto/pqcrypto_secure.c +532 -0
  7. data/ext/pqcrypto/pqcrypto_secure.h +20 -0
  8. data/ext/pqcrypto/pqcrypto_version.h +1 -1
  9. data/ext/pqcrypto/vendor/.vendored +4 -4
  10. data/ext/pqcrypto/vendor/mldsa-native/README.md +23 -10
  11. data/ext/pqcrypto/vendor/mldsa-native/mldsa/README.md +23 -0
  12. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.c +114 -58
  13. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.h +498 -461
  14. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_asm.S +145 -85
  15. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_config.h +456 -422
  16. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/cbmc.h +47 -25
  17. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/common.h +26 -14
  18. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/ct.h +56 -81
  19. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/debug.h +17 -24
  20. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.c +33 -40
  21. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.h +67 -87
  22. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.c +19 -14
  23. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.h +13 -5
  24. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.c +84 -10
  25. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.h +10 -5
  26. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/auto.h +6 -0
  27. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/fips202_native_aarch64.h +22 -15
  28. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_aarch64_asm.S +376 -0
  29. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_aarch64_asm.S +204 -0
  30. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_aarch64_asm.S +259 -0
  31. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_aarch64_asm.S +1077 -0
  32. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_aarch64_asm.S +987 -0
  33. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccakf1600_round_constants.c +16 -10
  34. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_scalar.h +2 -1
  35. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_v84a.h +1 -1
  36. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x2_v84a.h +4 -2
  37. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_scalar.h +2 -2
  38. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_v84a_scalar.h +1 -1
  39. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/api.h +60 -0
  40. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/mve.h +48 -0
  41. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/fips202_native_armv81m.h +18 -1
  42. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S +658 -582
  43. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.c +5 -100
  44. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccakf1600_round_constants.c +26 -25
  45. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/state_extract_bytes_x4_mve.S +334 -0
  46. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/state_xor_bytes_x4_mve.S +355 -0
  47. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/auto.h +8 -3
  48. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/{xkcp.h → keccak_f1600_x4_avx2.h} +11 -8
  49. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/fips202_native_x86_64.h +44 -0
  50. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/keccak_f1600_x4_avx2_asm.S +454 -0
  51. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/keccakf1600_constants.c +52 -0
  52. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/meta.h +37 -28
  53. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/aarch64_zetas.c +213 -196
  54. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/arith_native_aarch64.h +248 -64
  55. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/intt_aarch64_asm.S +753 -0
  56. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l4_aarch64_asm.S +129 -0
  57. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l5_aarch64_asm.S +145 -0
  58. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l7_aarch64_asm.S +177 -0
  59. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/ntt_aarch64_asm.S +653 -0
  60. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/pointwise_montgomery_aarch64_asm.S +84 -0
  61. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_caddq_aarch64_asm.S +53 -0
  62. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_chknorm_aarch64_asm.S +55 -0
  63. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_32_aarch64_asm.S +86 -0
  64. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_88_aarch64_asm.S +86 -0
  65. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_32_aarch64_asm.S +103 -0
  66. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_88_aarch64_asm.S +111 -0
  67. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_17_aarch64_asm.S +75 -0
  68. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_19_aarch64_asm.S +72 -0
  69. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_table.c +23 -11
  70. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_aarch64_asm.S +189 -0
  71. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta2_aarch64_asm.S +137 -0
  72. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta4_aarch64_asm.S +130 -0
  73. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta_table.c +520 -516
  74. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_table.c +34 -33
  75. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/api.h +202 -242
  76. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/meta.h +25 -17
  77. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/arith_native_x86_64.h +112 -28
  78. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.c +1 -1
  79. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.h +1 -1
  80. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/intt_avx2_asm.S +2311 -0
  81. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/ntt_avx2_asm.S +2383 -0
  82. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/nttunpack_avx2_asm.S +238 -0
  83. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l4_avx2_asm.S +139 -0
  84. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l5_avx2_asm.S +155 -0
  85. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l7_avx2_asm.S +187 -0
  86. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_avx2_asm.S +130 -0
  87. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_caddq_avx2_asm.S +190 -0
  88. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_32_avx2.c +6 -4
  89. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_88_avx2.c +6 -4
  90. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_32_avx2.c +9 -8
  91. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_88_avx2.c +10 -9
  92. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_17_avx2.c +8 -5
  93. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_19_avx2.c +8 -5
  94. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta2_avx2.c +6 -4
  95. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta4_avx2.c +6 -4
  96. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_table.c +130 -129
  97. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.c +109 -180
  98. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.h +169 -150
  99. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.c +56 -40
  100. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.h +149 -164
  101. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.c +52 -57
  102. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.h +132 -167
  103. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.c +57 -424
  104. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.h +167 -474
  105. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec_lazy.c +308 -0
  106. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec_lazy.h +653 -0
  107. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/reduce.h +22 -29
  108. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/rounding.h +37 -43
  109. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.c +511 -367
  110. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.h +456 -417
  111. data/lib/pq_crypto/hybrid_kem.rb +1 -1
  112. data/lib/pq_crypto/internal.rb +23 -0
  113. data/lib/pq_crypto/kem.rb +27 -34
  114. data/lib/pq_crypto/pkcs8/der.rb +68 -0
  115. data/lib/pq_crypto/pkcs8/private_key_choice.rb +186 -0
  116. data/lib/pq_crypto/pkcs8.rb +51 -468
  117. data/lib/pq_crypto/serialization.rb +19 -29
  118. data/lib/pq_crypto/signature.rb +28 -35
  119. data/lib/pq_crypto/version.rb +1 -1
  120. data/lib/pq_crypto.rb +10 -0
  121. data/script/vendor_libs.rb +3 -3
  122. metadata +44 -35
  123. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_asm.S +0 -376
  124. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_asm.S +0 -204
  125. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_asm.S +0 -259
  126. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_asm.S +0 -1077
  127. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S +0 -987
  128. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.c +0 -488
  129. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.h +0 -16
  130. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/intt.S +0 -753
  131. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l4.S +0 -129
  132. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l5.S +0 -145
  133. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l7.S +0 -177
  134. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/ntt.S +0 -653
  135. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/pointwise_montgomery.S +0 -79
  136. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_caddq_asm.S +0 -53
  137. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_chknorm_asm.S +0 -55
  138. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_32_asm.S +0 -85
  139. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_88_asm.S +0 -85
  140. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_32_asm.S +0 -102
  141. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_88_asm.S +0 -110
  142. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_17_asm.S +0 -72
  143. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_19_asm.S +0 -69
  144. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_asm.S +0 -189
  145. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta2_asm.S +0 -135
  146. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta4_asm.S +0 -128
  147. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/intt.S +0 -2311
  148. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/ntt.S +0 -2383
  149. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/nttunpack.S +0 -239
  150. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise.S +0 -131
  151. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l4.S +0 -139
  152. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l5.S +0 -155
  153. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l7.S +0 -187
  154. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_caddq_avx2.c +0 -61
@@ -0,0 +1,129 @@
1
+ /* Copyright (c) The mldsa-native project authors
2
+ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
3
+ */
4
+
5
+ #include "../../../common.h"
6
+ #if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \
7
+ (defined(MLD_CONFIG_MULTILEVEL_WITH_SHARED) || MLDSA_L == 4)
8
+
9
+ /*
10
+ * WARNING: This file is auto-derived from the mldsa-native source file
11
+ * dev/aarch64_opt/src/mld_polyvecl_pointwise_acc_montgomery_l4_aarch64_asm.S using scripts/simpasm. Do not modify it directly.
12
+ */
13
+
14
+ .text
15
+ .balign 4
16
+ .global MLD_ASM_NAMESPACE(polyvecl_pointwise_acc_montgomery_l4_aarch64_asm)
17
+ MLD_ASM_FN_SYMBOL(polyvecl_pointwise_acc_montgomery_l4_aarch64_asm)
18
+
19
+ .cfi_startproc
20
+ mov w3, #0xe001 // =57345
21
+ movk w3, #0x7f, lsl #16
22
+ dup v0.4s, w3
23
+ mov w3, #0x2001 // =8193
24
+ movk w3, #0x380, lsl #16
25
+ dup v1.4s, w3
26
+ mov x3, #0x40 // =64
27
+
28
+ Lpolyvecl_pointwise_acc_montgomery_l4_loop_start:
29
+ ldr q17, [x1, #0x10]
30
+ ldr q18, [x1, #0x20]
31
+ ldr q19, [x1, #0x30]
32
+ ldr q16, [x1], #0x40
33
+ ldr q21, [x2, #0x10]
34
+ ldr q22, [x2, #0x20]
35
+ ldr q23, [x2, #0x30]
36
+ ldr q20, [x2], #0x40
37
+ smull v24.2d, v16.2s, v20.2s
38
+ smull2 v25.2d, v16.4s, v20.4s
39
+ smull v26.2d, v17.2s, v21.2s
40
+ smull2 v27.2d, v17.4s, v21.4s
41
+ smull v28.2d, v18.2s, v22.2s
42
+ smull2 v29.2d, v18.4s, v22.4s
43
+ smull v30.2d, v19.2s, v23.2s
44
+ smull2 v31.2d, v19.4s, v23.4s
45
+ ldr q16, [x1, #0x3c0]
46
+ ldr q17, [x1, #0x3d0]
47
+ ldr q18, [x1, #0x3e0]
48
+ ldr q19, [x1, #0x3f0]
49
+ ldr q20, [x2, #0x3c0]
50
+ ldr q21, [x2, #0x3d0]
51
+ ldr q22, [x2, #0x3e0]
52
+ ldr q23, [x2, #0x3f0]
53
+ smlal v24.2d, v16.2s, v20.2s
54
+ smlal2 v25.2d, v16.4s, v20.4s
55
+ smlal v26.2d, v17.2s, v21.2s
56
+ smlal2 v27.2d, v17.4s, v21.4s
57
+ smlal v28.2d, v18.2s, v22.2s
58
+ smlal2 v29.2d, v18.4s, v22.4s
59
+ smlal v30.2d, v19.2s, v23.2s
60
+ smlal2 v31.2d, v19.4s, v23.4s
61
+ ldr q16, [x1, #0x7c0]
62
+ ldr q17, [x1, #0x7d0]
63
+ ldr q18, [x1, #0x7e0]
64
+ ldr q19, [x1, #0x7f0]
65
+ ldr q20, [x2, #0x7c0]
66
+ ldr q21, [x2, #0x7d0]
67
+ ldr q22, [x2, #0x7e0]
68
+ ldr q23, [x2, #0x7f0]
69
+ smlal v24.2d, v16.2s, v20.2s
70
+ smlal2 v25.2d, v16.4s, v20.4s
71
+ smlal v26.2d, v17.2s, v21.2s
72
+ smlal2 v27.2d, v17.4s, v21.4s
73
+ smlal v28.2d, v18.2s, v22.2s
74
+ smlal2 v29.2d, v18.4s, v22.4s
75
+ smlal v30.2d, v19.2s, v23.2s
76
+ smlal2 v31.2d, v19.4s, v23.4s
77
+ ldr q16, [x1, #0xbc0]
78
+ ldr q17, [x1, #0xbd0]
79
+ ldr q18, [x1, #0xbe0]
80
+ ldr q19, [x1, #0xbf0]
81
+ ldr q20, [x2, #0xbc0]
82
+ ldr q21, [x2, #0xbd0]
83
+ ldr q22, [x2, #0xbe0]
84
+ ldr q23, [x2, #0xbf0]
85
+ smlal v24.2d, v16.2s, v20.2s
86
+ smlal2 v25.2d, v16.4s, v20.4s
87
+ smlal v26.2d, v17.2s, v21.2s
88
+ smlal2 v27.2d, v17.4s, v21.4s
89
+ smlal v28.2d, v18.2s, v22.2s
90
+ smlal2 v29.2d, v18.4s, v22.4s
91
+ smlal v30.2d, v19.2s, v23.2s
92
+ smlal2 v31.2d, v19.4s, v23.4s
93
+ uzp1 v16.4s, v24.4s, v25.4s
94
+ mul v16.4s, v16.4s, v1.4s
95
+ smlsl v24.2d, v16.2s, v0.2s
96
+ smlsl2 v25.2d, v16.4s, v0.4s
97
+ uzp2 v16.4s, v24.4s, v25.4s
98
+ uzp1 v17.4s, v26.4s, v27.4s
99
+ mul v17.4s, v17.4s, v1.4s
100
+ smlsl v26.2d, v17.2s, v0.2s
101
+ smlsl2 v27.2d, v17.4s, v0.4s
102
+ uzp2 v17.4s, v26.4s, v27.4s
103
+ uzp1 v18.4s, v28.4s, v29.4s
104
+ mul v18.4s, v18.4s, v1.4s
105
+ smlsl v28.2d, v18.2s, v0.2s
106
+ smlsl2 v29.2d, v18.4s, v0.4s
107
+ uzp2 v18.4s, v28.4s, v29.4s
108
+ uzp1 v19.4s, v30.4s, v31.4s
109
+ mul v19.4s, v19.4s, v1.4s
110
+ smlsl v30.2d, v19.2s, v0.2s
111
+ smlsl2 v31.2d, v19.4s, v0.4s
112
+ uzp2 v19.4s, v30.4s, v31.4s
113
+ str q17, [x0, #0x10]
114
+ str q18, [x0, #0x20]
115
+ str q19, [x0, #0x30]
116
+ str q16, [x0], #0x40
117
+ subs x3, x3, #0x4
118
+ cbnz x3, Lpolyvecl_pointwise_acc_montgomery_l4_loop_start
119
+ ret
120
+ .cfi_endproc
121
+
122
+ MLD_ASM_FN_SIZE(polyvecl_pointwise_acc_montgomery_l4_aarch64_asm)
123
+
124
+ #endif /* MLD_ARITH_BACKEND_AARCH64 && !MLD_CONFIG_MULTILEVEL_NO_SHARED && \
125
+ (MLD_CONFIG_MULTILEVEL_WITH_SHARED || MLDSA_L == 4) */
126
+
127
+ #if defined(__ELF__)
128
+ .section .note.GNU-stack,"",%progbits
129
+ #endif
@@ -0,0 +1,145 @@
1
+ /* Copyright (c) The mldsa-native project authors
2
+ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
3
+ */
4
+
5
+ #include "../../../common.h"
6
+ #if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \
7
+ (defined(MLD_CONFIG_MULTILEVEL_WITH_SHARED) || MLDSA_L == 5)
8
+
9
+ /*
10
+ * WARNING: This file is auto-derived from the mldsa-native source file
11
+ * dev/aarch64_opt/src/mld_polyvecl_pointwise_acc_montgomery_l5_aarch64_asm.S using scripts/simpasm. Do not modify it directly.
12
+ */
13
+
14
+ .text
15
+ .balign 4
16
+ .global MLD_ASM_NAMESPACE(polyvecl_pointwise_acc_montgomery_l5_aarch64_asm)
17
+ MLD_ASM_FN_SYMBOL(polyvecl_pointwise_acc_montgomery_l5_aarch64_asm)
18
+
19
+ .cfi_startproc
20
+ mov w3, #0xe001 // =57345
21
+ movk w3, #0x7f, lsl #16
22
+ dup v0.4s, w3
23
+ mov w3, #0x2001 // =8193
24
+ movk w3, #0x380, lsl #16
25
+ dup v1.4s, w3
26
+ mov x3, #0x40 // =64
27
+
28
+ Lpolyvecl_pointwise_acc_montgomery_l5_loop_start:
29
+ ldr q17, [x1, #0x10]
30
+ ldr q18, [x1, #0x20]
31
+ ldr q19, [x1, #0x30]
32
+ ldr q16, [x1], #0x40
33
+ ldr q21, [x2, #0x10]
34
+ ldr q22, [x2, #0x20]
35
+ ldr q23, [x2, #0x30]
36
+ ldr q20, [x2], #0x40
37
+ smull v24.2d, v16.2s, v20.2s
38
+ smull2 v25.2d, v16.4s, v20.4s
39
+ smull v26.2d, v17.2s, v21.2s
40
+ smull2 v27.2d, v17.4s, v21.4s
41
+ smull v28.2d, v18.2s, v22.2s
42
+ smull2 v29.2d, v18.4s, v22.4s
43
+ smull v30.2d, v19.2s, v23.2s
44
+ smull2 v31.2d, v19.4s, v23.4s
45
+ ldr q16, [x1, #0x3c0]
46
+ ldr q17, [x1, #0x3d0]
47
+ ldr q18, [x1, #0x3e0]
48
+ ldr q19, [x1, #0x3f0]
49
+ ldr q20, [x2, #0x3c0]
50
+ ldr q21, [x2, #0x3d0]
51
+ ldr q22, [x2, #0x3e0]
52
+ ldr q23, [x2, #0x3f0]
53
+ smlal v24.2d, v16.2s, v20.2s
54
+ smlal2 v25.2d, v16.4s, v20.4s
55
+ smlal v26.2d, v17.2s, v21.2s
56
+ smlal2 v27.2d, v17.4s, v21.4s
57
+ smlal v28.2d, v18.2s, v22.2s
58
+ smlal2 v29.2d, v18.4s, v22.4s
59
+ smlal v30.2d, v19.2s, v23.2s
60
+ smlal2 v31.2d, v19.4s, v23.4s
61
+ ldr q16, [x1, #0x7c0]
62
+ ldr q17, [x1, #0x7d0]
63
+ ldr q18, [x1, #0x7e0]
64
+ ldr q19, [x1, #0x7f0]
65
+ ldr q20, [x2, #0x7c0]
66
+ ldr q21, [x2, #0x7d0]
67
+ ldr q22, [x2, #0x7e0]
68
+ ldr q23, [x2, #0x7f0]
69
+ smlal v24.2d, v16.2s, v20.2s
70
+ smlal2 v25.2d, v16.4s, v20.4s
71
+ smlal v26.2d, v17.2s, v21.2s
72
+ smlal2 v27.2d, v17.4s, v21.4s
73
+ smlal v28.2d, v18.2s, v22.2s
74
+ smlal2 v29.2d, v18.4s, v22.4s
75
+ smlal v30.2d, v19.2s, v23.2s
76
+ smlal2 v31.2d, v19.4s, v23.4s
77
+ ldr q16, [x1, #0xbc0]
78
+ ldr q17, [x1, #0xbd0]
79
+ ldr q18, [x1, #0xbe0]
80
+ ldr q19, [x1, #0xbf0]
81
+ ldr q20, [x2, #0xbc0]
82
+ ldr q21, [x2, #0xbd0]
83
+ ldr q22, [x2, #0xbe0]
84
+ ldr q23, [x2, #0xbf0]
85
+ smlal v24.2d, v16.2s, v20.2s
86
+ smlal2 v25.2d, v16.4s, v20.4s
87
+ smlal v26.2d, v17.2s, v21.2s
88
+ smlal2 v27.2d, v17.4s, v21.4s
89
+ smlal v28.2d, v18.2s, v22.2s
90
+ smlal2 v29.2d, v18.4s, v22.4s
91
+ smlal v30.2d, v19.2s, v23.2s
92
+ smlal2 v31.2d, v19.4s, v23.4s
93
+ ldr q16, [x1, #0xfc0]
94
+ ldr q17, [x1, #0xfd0]
95
+ ldr q18, [x1, #0xfe0]
96
+ ldr q19, [x1, #0xff0]
97
+ ldr q20, [x2, #0xfc0]
98
+ ldr q21, [x2, #0xfd0]
99
+ ldr q22, [x2, #0xfe0]
100
+ ldr q23, [x2, #0xff0]
101
+ smlal v24.2d, v16.2s, v20.2s
102
+ smlal2 v25.2d, v16.4s, v20.4s
103
+ smlal v26.2d, v17.2s, v21.2s
104
+ smlal2 v27.2d, v17.4s, v21.4s
105
+ smlal v28.2d, v18.2s, v22.2s
106
+ smlal2 v29.2d, v18.4s, v22.4s
107
+ smlal v30.2d, v19.2s, v23.2s
108
+ smlal2 v31.2d, v19.4s, v23.4s
109
+ uzp1 v16.4s, v24.4s, v25.4s
110
+ mul v16.4s, v16.4s, v1.4s
111
+ smlsl v24.2d, v16.2s, v0.2s
112
+ smlsl2 v25.2d, v16.4s, v0.4s
113
+ uzp2 v16.4s, v24.4s, v25.4s
114
+ uzp1 v17.4s, v26.4s, v27.4s
115
+ mul v17.4s, v17.4s, v1.4s
116
+ smlsl v26.2d, v17.2s, v0.2s
117
+ smlsl2 v27.2d, v17.4s, v0.4s
118
+ uzp2 v17.4s, v26.4s, v27.4s
119
+ uzp1 v18.4s, v28.4s, v29.4s
120
+ mul v18.4s, v18.4s, v1.4s
121
+ smlsl v28.2d, v18.2s, v0.2s
122
+ smlsl2 v29.2d, v18.4s, v0.4s
123
+ uzp2 v18.4s, v28.4s, v29.4s
124
+ uzp1 v19.4s, v30.4s, v31.4s
125
+ mul v19.4s, v19.4s, v1.4s
126
+ smlsl v30.2d, v19.2s, v0.2s
127
+ smlsl2 v31.2d, v19.4s, v0.4s
128
+ uzp2 v19.4s, v30.4s, v31.4s
129
+ str q17, [x0, #0x10]
130
+ str q18, [x0, #0x20]
131
+ str q19, [x0, #0x30]
132
+ str q16, [x0], #0x40
133
+ subs x3, x3, #0x4
134
+ cbnz x3, Lpolyvecl_pointwise_acc_montgomery_l5_loop_start
135
+ ret
136
+ .cfi_endproc
137
+
138
+ MLD_ASM_FN_SIZE(polyvecl_pointwise_acc_montgomery_l5_aarch64_asm)
139
+
140
+ #endif /* MLD_ARITH_BACKEND_AARCH64 && !MLD_CONFIG_MULTILEVEL_NO_SHARED && \
141
+ (MLD_CONFIG_MULTILEVEL_WITH_SHARED || MLDSA_L == 5) */
142
+
143
+ #if defined(__ELF__)
144
+ .section .note.GNU-stack,"",%progbits
145
+ #endif
@@ -0,0 +1,177 @@
1
+ /* Copyright (c) The mldsa-native project authors
2
+ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
3
+ */
4
+
5
+ #include "../../../common.h"
6
+ #if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \
7
+ (defined(MLD_CONFIG_MULTILEVEL_WITH_SHARED) || MLDSA_L == 7)
8
+
9
+ /*
10
+ * WARNING: This file is auto-derived from the mldsa-native source file
11
+ * dev/aarch64_opt/src/mld_polyvecl_pointwise_acc_montgomery_l7_aarch64_asm.S using scripts/simpasm. Do not modify it directly.
12
+ */
13
+
14
+ .text
15
+ .balign 4
16
+ .global MLD_ASM_NAMESPACE(polyvecl_pointwise_acc_montgomery_l7_aarch64_asm)
17
+ MLD_ASM_FN_SYMBOL(polyvecl_pointwise_acc_montgomery_l7_aarch64_asm)
18
+
19
+ .cfi_startproc
20
+ mov w3, #0xe001 // =57345
21
+ movk w3, #0x7f, lsl #16
22
+ dup v0.4s, w3
23
+ mov w3, #0x2001 // =8193
24
+ movk w3, #0x380, lsl #16
25
+ dup v1.4s, w3
26
+ mov x3, #0x40 // =64
27
+
28
+ Lpolyvecl_pointwise_acc_montgomery_l7_loop_start:
29
+ ldr q17, [x1, #0x10]
30
+ ldr q18, [x1, #0x20]
31
+ ldr q19, [x1, #0x30]
32
+ ldr q16, [x1], #0x40
33
+ ldr q21, [x2, #0x10]
34
+ ldr q22, [x2, #0x20]
35
+ ldr q23, [x2, #0x30]
36
+ ldr q20, [x2], #0x40
37
+ smull v24.2d, v16.2s, v20.2s
38
+ smull2 v25.2d, v16.4s, v20.4s
39
+ smull v26.2d, v17.2s, v21.2s
40
+ smull2 v27.2d, v17.4s, v21.4s
41
+ smull v28.2d, v18.2s, v22.2s
42
+ smull2 v29.2d, v18.4s, v22.4s
43
+ smull v30.2d, v19.2s, v23.2s
44
+ smull2 v31.2d, v19.4s, v23.4s
45
+ ldr q16, [x1, #0x3c0]
46
+ ldr q17, [x1, #0x3d0]
47
+ ldr q18, [x1, #0x3e0]
48
+ ldr q19, [x1, #0x3f0]
49
+ ldr q20, [x2, #0x3c0]
50
+ ldr q21, [x2, #0x3d0]
51
+ ldr q22, [x2, #0x3e0]
52
+ ldr q23, [x2, #0x3f0]
53
+ smlal v24.2d, v16.2s, v20.2s
54
+ smlal2 v25.2d, v16.4s, v20.4s
55
+ smlal v26.2d, v17.2s, v21.2s
56
+ smlal2 v27.2d, v17.4s, v21.4s
57
+ smlal v28.2d, v18.2s, v22.2s
58
+ smlal2 v29.2d, v18.4s, v22.4s
59
+ smlal v30.2d, v19.2s, v23.2s
60
+ smlal2 v31.2d, v19.4s, v23.4s
61
+ ldr q16, [x1, #0x7c0]
62
+ ldr q17, [x1, #0x7d0]
63
+ ldr q18, [x1, #0x7e0]
64
+ ldr q19, [x1, #0x7f0]
65
+ ldr q20, [x2, #0x7c0]
66
+ ldr q21, [x2, #0x7d0]
67
+ ldr q22, [x2, #0x7e0]
68
+ ldr q23, [x2, #0x7f0]
69
+ smlal v24.2d, v16.2s, v20.2s
70
+ smlal2 v25.2d, v16.4s, v20.4s
71
+ smlal v26.2d, v17.2s, v21.2s
72
+ smlal2 v27.2d, v17.4s, v21.4s
73
+ smlal v28.2d, v18.2s, v22.2s
74
+ smlal2 v29.2d, v18.4s, v22.4s
75
+ smlal v30.2d, v19.2s, v23.2s
76
+ smlal2 v31.2d, v19.4s, v23.4s
77
+ ldr q16, [x1, #0xbc0]
78
+ ldr q17, [x1, #0xbd0]
79
+ ldr q18, [x1, #0xbe0]
80
+ ldr q19, [x1, #0xbf0]
81
+ ldr q20, [x2, #0xbc0]
82
+ ldr q21, [x2, #0xbd0]
83
+ ldr q22, [x2, #0xbe0]
84
+ ldr q23, [x2, #0xbf0]
85
+ smlal v24.2d, v16.2s, v20.2s
86
+ smlal2 v25.2d, v16.4s, v20.4s
87
+ smlal v26.2d, v17.2s, v21.2s
88
+ smlal2 v27.2d, v17.4s, v21.4s
89
+ smlal v28.2d, v18.2s, v22.2s
90
+ smlal2 v29.2d, v18.4s, v22.4s
91
+ smlal v30.2d, v19.2s, v23.2s
92
+ smlal2 v31.2d, v19.4s, v23.4s
93
+ ldr q16, [x1, #0xfc0]
94
+ ldr q17, [x1, #0xfd0]
95
+ ldr q18, [x1, #0xfe0]
96
+ ldr q19, [x1, #0xff0]
97
+ ldr q20, [x2, #0xfc0]
98
+ ldr q21, [x2, #0xfd0]
99
+ ldr q22, [x2, #0xfe0]
100
+ ldr q23, [x2, #0xff0]
101
+ smlal v24.2d, v16.2s, v20.2s
102
+ smlal2 v25.2d, v16.4s, v20.4s
103
+ smlal v26.2d, v17.2s, v21.2s
104
+ smlal2 v27.2d, v17.4s, v21.4s
105
+ smlal v28.2d, v18.2s, v22.2s
106
+ smlal2 v29.2d, v18.4s, v22.4s
107
+ smlal v30.2d, v19.2s, v23.2s
108
+ smlal2 v31.2d, v19.4s, v23.4s
109
+ ldr q16, [x1, #0x13c0]
110
+ ldr q17, [x1, #0x13d0]
111
+ ldr q18, [x1, #0x13e0]
112
+ ldr q19, [x1, #0x13f0]
113
+ ldr q20, [x2, #0x13c0]
114
+ ldr q21, [x2, #0x13d0]
115
+ ldr q22, [x2, #0x13e0]
116
+ ldr q23, [x2, #0x13f0]
117
+ smlal v24.2d, v16.2s, v20.2s
118
+ smlal2 v25.2d, v16.4s, v20.4s
119
+ smlal v26.2d, v17.2s, v21.2s
120
+ smlal2 v27.2d, v17.4s, v21.4s
121
+ smlal v28.2d, v18.2s, v22.2s
122
+ smlal2 v29.2d, v18.4s, v22.4s
123
+ smlal v30.2d, v19.2s, v23.2s
124
+ smlal2 v31.2d, v19.4s, v23.4s
125
+ ldr q16, [x1, #0x17c0]
126
+ ldr q17, [x1, #0x17d0]
127
+ ldr q18, [x1, #0x17e0]
128
+ ldr q19, [x1, #0x17f0]
129
+ ldr q20, [x2, #0x17c0]
130
+ ldr q21, [x2, #0x17d0]
131
+ ldr q22, [x2, #0x17e0]
132
+ ldr q23, [x2, #0x17f0]
133
+ smlal v24.2d, v16.2s, v20.2s
134
+ smlal2 v25.2d, v16.4s, v20.4s
135
+ smlal v26.2d, v17.2s, v21.2s
136
+ smlal2 v27.2d, v17.4s, v21.4s
137
+ smlal v28.2d, v18.2s, v22.2s
138
+ smlal2 v29.2d, v18.4s, v22.4s
139
+ smlal v30.2d, v19.2s, v23.2s
140
+ smlal2 v31.2d, v19.4s, v23.4s
141
+ uzp1 v16.4s, v24.4s, v25.4s
142
+ mul v16.4s, v16.4s, v1.4s
143
+ smlsl v24.2d, v16.2s, v0.2s
144
+ smlsl2 v25.2d, v16.4s, v0.4s
145
+ uzp2 v16.4s, v24.4s, v25.4s
146
+ uzp1 v17.4s, v26.4s, v27.4s
147
+ mul v17.4s, v17.4s, v1.4s
148
+ smlsl v26.2d, v17.2s, v0.2s
149
+ smlsl2 v27.2d, v17.4s, v0.4s
150
+ uzp2 v17.4s, v26.4s, v27.4s
151
+ uzp1 v18.4s, v28.4s, v29.4s
152
+ mul v18.4s, v18.4s, v1.4s
153
+ smlsl v28.2d, v18.2s, v0.2s
154
+ smlsl2 v29.2d, v18.4s, v0.4s
155
+ uzp2 v18.4s, v28.4s, v29.4s
156
+ uzp1 v19.4s, v30.4s, v31.4s
157
+ mul v19.4s, v19.4s, v1.4s
158
+ smlsl v30.2d, v19.2s, v0.2s
159
+ smlsl2 v31.2d, v19.4s, v0.4s
160
+ uzp2 v19.4s, v30.4s, v31.4s
161
+ str q17, [x0, #0x10]
162
+ str q18, [x0, #0x20]
163
+ str q19, [x0, #0x30]
164
+ str q16, [x0], #0x40
165
+ subs x3, x3, #0x4
166
+ cbnz x3, Lpolyvecl_pointwise_acc_montgomery_l7_loop_start
167
+ ret
168
+ .cfi_endproc
169
+
170
+ MLD_ASM_FN_SIZE(polyvecl_pointwise_acc_montgomery_l7_aarch64_asm)
171
+
172
+ #endif /* MLD_ARITH_BACKEND_AARCH64 && !MLD_CONFIG_MULTILEVEL_NO_SHARED && \
173
+ (MLD_CONFIG_MULTILEVEL_WITH_SHARED || MLDSA_L == 7) */
174
+
175
+ #if defined(__ELF__)
176
+ .section .note.GNU-stack,"",%progbits
177
+ #endif