pq_crypto 0.6.1 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/SECURITY.md +7 -0
  4. data/ext/pqcrypto/pqcrypto_version.h +1 -1
  5. data/ext/pqcrypto/vendor/.vendored +4 -4
  6. data/ext/pqcrypto/vendor/mldsa-native/README.md +23 -10
  7. data/ext/pqcrypto/vendor/mldsa-native/mldsa/README.md +23 -0
  8. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.c +114 -58
  9. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.h +498 -461
  10. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_asm.S +145 -85
  11. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_config.h +456 -422
  12. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/cbmc.h +47 -25
  13. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/common.h +26 -14
  14. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/ct.h +56 -81
  15. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/debug.h +17 -24
  16. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.c +33 -40
  17. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.h +67 -87
  18. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.c +19 -14
  19. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.h +13 -5
  20. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.c +84 -10
  21. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.h +10 -5
  22. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/auto.h +6 -0
  23. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/fips202_native_aarch64.h +22 -15
  24. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_aarch64_asm.S +376 -0
  25. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_aarch64_asm.S +204 -0
  26. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_aarch64_asm.S +259 -0
  27. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_aarch64_asm.S +1077 -0
  28. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_aarch64_asm.S +987 -0
  29. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccakf1600_round_constants.c +16 -10
  30. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_scalar.h +2 -1
  31. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_v84a.h +1 -1
  32. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x2_v84a.h +4 -2
  33. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_scalar.h +2 -2
  34. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_v84a_scalar.h +1 -1
  35. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/api.h +60 -0
  36. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/mve.h +48 -0
  37. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/fips202_native_armv81m.h +18 -1
  38. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S +658 -582
  39. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.c +5 -100
  40. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccakf1600_round_constants.c +26 -25
  41. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/state_extract_bytes_x4_mve.S +334 -0
  42. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/state_xor_bytes_x4_mve.S +355 -0
  43. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/auto.h +8 -3
  44. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/{xkcp.h → keccak_f1600_x4_avx2.h} +11 -8
  45. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/fips202_native_x86_64.h +44 -0
  46. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/keccak_f1600_x4_avx2_asm.S +454 -0
  47. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/keccakf1600_constants.c +52 -0
  48. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/meta.h +37 -28
  49. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/aarch64_zetas.c +213 -196
  50. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/arith_native_aarch64.h +248 -64
  51. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/intt_aarch64_asm.S +753 -0
  52. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l4_aarch64_asm.S +129 -0
  53. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l5_aarch64_asm.S +145 -0
  54. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l7_aarch64_asm.S +177 -0
  55. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/ntt_aarch64_asm.S +653 -0
  56. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/pointwise_montgomery_aarch64_asm.S +84 -0
  57. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_caddq_aarch64_asm.S +53 -0
  58. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_chknorm_aarch64_asm.S +55 -0
  59. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_32_aarch64_asm.S +86 -0
  60. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_88_aarch64_asm.S +86 -0
  61. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_32_aarch64_asm.S +103 -0
  62. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_88_aarch64_asm.S +111 -0
  63. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_17_aarch64_asm.S +75 -0
  64. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_19_aarch64_asm.S +72 -0
  65. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_table.c +23 -11
  66. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_aarch64_asm.S +189 -0
  67. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta2_aarch64_asm.S +137 -0
  68. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta4_aarch64_asm.S +130 -0
  69. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta_table.c +520 -516
  70. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_table.c +34 -33
  71. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/api.h +202 -242
  72. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/meta.h +25 -17
  73. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/arith_native_x86_64.h +112 -28
  74. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.c +1 -1
  75. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.h +1 -1
  76. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/intt_avx2_asm.S +2311 -0
  77. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/ntt_avx2_asm.S +2383 -0
  78. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/nttunpack_avx2_asm.S +238 -0
  79. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l4_avx2_asm.S +139 -0
  80. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l5_avx2_asm.S +155 -0
  81. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l7_avx2_asm.S +187 -0
  82. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_avx2_asm.S +130 -0
  83. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_caddq_avx2_asm.S +190 -0
  84. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_32_avx2.c +6 -4
  85. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_88_avx2.c +6 -4
  86. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_32_avx2.c +9 -8
  87. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_88_avx2.c +10 -9
  88. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_17_avx2.c +8 -5
  89. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_19_avx2.c +8 -5
  90. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta2_avx2.c +6 -4
  91. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta4_avx2.c +6 -4
  92. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_table.c +130 -129
  93. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.c +109 -180
  94. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.h +169 -150
  95. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.c +56 -40
  96. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.h +149 -164
  97. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.c +52 -57
  98. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.h +132 -167
  99. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.c +57 -424
  100. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.h +167 -474
  101. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec_lazy.c +308 -0
  102. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec_lazy.h +653 -0
  103. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/reduce.h +22 -29
  104. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/rounding.h +37 -43
  105. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.c +511 -367
  106. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.h +456 -417
  107. data/lib/pq_crypto/version.rb +1 -1
  108. data/script/vendor_libs.rb +3 -3
  109. metadata +41 -35
  110. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_asm.S +0 -376
  111. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_asm.S +0 -204
  112. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_asm.S +0 -259
  113. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_asm.S +0 -1077
  114. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S +0 -987
  115. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.c +0 -488
  116. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.h +0 -16
  117. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/intt.S +0 -753
  118. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l4.S +0 -129
  119. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l5.S +0 -145
  120. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l7.S +0 -177
  121. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/ntt.S +0 -653
  122. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/pointwise_montgomery.S +0 -79
  123. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_caddq_asm.S +0 -53
  124. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_chknorm_asm.S +0 -55
  125. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_32_asm.S +0 -85
  126. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_88_asm.S +0 -85
  127. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_32_asm.S +0 -102
  128. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_88_asm.S +0 -110
  129. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_17_asm.S +0 -72
  130. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_19_asm.S +0 -69
  131. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_asm.S +0 -189
  132. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta2_asm.S +0 -135
  133. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta4_asm.S +0 -128
  134. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/intt.S +0 -2311
  135. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/ntt.S +0 -2383
  136. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/nttunpack.S +0 -239
  137. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise.S +0 -131
  138. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l4.S +0 -139
  139. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l5.S +0 -155
  140. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l7.S +0 -187
  141. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_caddq_avx2.c +0 -61
@@ -0,0 +1,84 @@
1
+ /* Copyright (c) The mldsa-native project authors
2
+ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
3
+ */
4
+
5
+ #include "../../../common.h"
6
+ #if defined(MLD_ARITH_BACKEND_AARCH64) && \
7
+ (!defined(MLD_CONFIG_NO_SIGN_API) || !defined(MLD_CONFIG_NO_VERIFY_API) || \
8
+ defined(MLD_CONFIG_REDUCE_RAM) || defined(MLD_UNIT_TEST)) && \
9
+ !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED)
10
+
11
+ /*
12
+ * WARNING: This file is auto-derived from the mldsa-native source file
13
+ * dev/aarch64_opt/src/pointwise_montgomery_aarch64_asm.S using scripts/simpasm. Do not modify it directly.
14
+ */
15
+
16
+ .text
17
+ .balign 4
18
+ .global MLD_ASM_NAMESPACE(poly_pointwise_montgomery_aarch64_asm)
19
+ MLD_ASM_FN_SYMBOL(poly_pointwise_montgomery_aarch64_asm)
20
+
21
+ .cfi_startproc
22
+ mov w3, #0xe001 // =57345
23
+ movk w3, #0x7f, lsl #16
24
+ dup v0.4s, w3
25
+ mov w3, #0x2001 // =8193
26
+ movk w3, #0x380, lsl #16
27
+ dup v1.4s, w3
28
+ mov x3, #0x40 // =64
29
+
30
+ Lpoly_pointwise_montgomery_loop_start:
31
+ ldr q16, [x0]
32
+ ldr q17, [x0, #0x10]
33
+ ldr q18, [x0, #0x20]
34
+ ldr q19, [x0, #0x30]
35
+ ldr q21, [x1, #0x10]
36
+ ldr q22, [x1, #0x20]
37
+ ldr q23, [x1, #0x30]
38
+ ldr q20, [x1], #0x40
39
+ smull v24.2d, v16.2s, v20.2s
40
+ smull2 v25.2d, v16.4s, v20.4s
41
+ smull v26.2d, v17.2s, v21.2s
42
+ smull2 v27.2d, v17.4s, v21.4s
43
+ smull v28.2d, v18.2s, v22.2s
44
+ smull2 v29.2d, v18.4s, v22.4s
45
+ smull v30.2d, v19.2s, v23.2s
46
+ smull2 v31.2d, v19.4s, v23.4s
47
+ uzp1 v16.4s, v24.4s, v25.4s
48
+ mul v16.4s, v16.4s, v1.4s
49
+ smlsl v24.2d, v16.2s, v0.2s
50
+ smlsl2 v25.2d, v16.4s, v0.4s
51
+ uzp2 v16.4s, v24.4s, v25.4s
52
+ uzp1 v17.4s, v26.4s, v27.4s
53
+ mul v17.4s, v17.4s, v1.4s
54
+ smlsl v26.2d, v17.2s, v0.2s
55
+ smlsl2 v27.2d, v17.4s, v0.4s
56
+ uzp2 v17.4s, v26.4s, v27.4s
57
+ uzp1 v18.4s, v28.4s, v29.4s
58
+ mul v18.4s, v18.4s, v1.4s
59
+ smlsl v28.2d, v18.2s, v0.2s
60
+ smlsl2 v29.2d, v18.4s, v0.4s
61
+ uzp2 v18.4s, v28.4s, v29.4s
62
+ uzp1 v19.4s, v30.4s, v31.4s
63
+ mul v19.4s, v19.4s, v1.4s
64
+ smlsl v30.2d, v19.2s, v0.2s
65
+ smlsl2 v31.2d, v19.4s, v0.4s
66
+ uzp2 v19.4s, v30.4s, v31.4s
67
+ str q17, [x0, #0x10]
68
+ str q18, [x0, #0x20]
69
+ str q19, [x0, #0x30]
70
+ str q16, [x0], #0x40
71
+ subs x3, x3, #0x4
72
+ cbnz x3, Lpoly_pointwise_montgomery_loop_start
73
+ ret
74
+ .cfi_endproc
75
+
76
+ MLD_ASM_FN_SIZE(poly_pointwise_montgomery_aarch64_asm)
77
+
78
+ #endif /* MLD_ARITH_BACKEND_AARCH64 && (!MLD_CONFIG_NO_SIGN_API || \
79
+ !MLD_CONFIG_NO_VERIFY_API || MLD_CONFIG_REDUCE_RAM || MLD_UNIT_TEST) \
80
+ && !MLD_CONFIG_MULTILEVEL_NO_SHARED */
81
+
82
+ #if defined(__ELF__)
83
+ .section .note.GNU-stack,"",%progbits
84
+ #endif
@@ -0,0 +1,53 @@
1
+ /*
2
+ * Copyright (c) The mldsa-native project authors
3
+ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
4
+ */
5
+ #include "../../../common.h"
6
+
7
+ #if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED)
8
+
9
+ /*
10
+ * WARNING: This file is auto-derived from the mldsa-native source file
11
+ * dev/aarch64_opt/src/poly_caddq_aarch64_asm.S using scripts/simpasm. Do not modify it directly.
12
+ */
13
+
14
+ .text
15
+ .balign 4
16
+ .global MLD_ASM_NAMESPACE(poly_caddq_aarch64_asm)
17
+ MLD_ASM_FN_SYMBOL(poly_caddq_aarch64_asm)
18
+
19
+ .cfi_startproc
20
+ mov w9, #0xe001 // =57345
21
+ movk w9, #0x7f, lsl #16
22
+ dup v4.4s, w9
23
+ mov x1, #0x10 // =16
24
+
25
+ Lpoly_caddq_loop:
26
+ ldr q0, [x0]
27
+ ldr q1, [x0, #0x10]
28
+ ldr q2, [x0, #0x20]
29
+ ldr q3, [x0, #0x30]
30
+ ushr v5.4s, v0.4s, #0x1f
31
+ mla v0.4s, v5.4s, v4.4s
32
+ ushr v5.4s, v1.4s, #0x1f
33
+ mla v1.4s, v5.4s, v4.4s
34
+ ushr v5.4s, v2.4s, #0x1f
35
+ mla v2.4s, v5.4s, v4.4s
36
+ ushr v5.4s, v3.4s, #0x1f
37
+ mla v3.4s, v5.4s, v4.4s
38
+ str q1, [x0, #0x10]
39
+ str q2, [x0, #0x20]
40
+ str q3, [x0, #0x30]
41
+ str q0, [x0], #0x40
42
+ subs x1, x1, #0x1
43
+ b.ne Lpoly_caddq_loop
44
+ ret
45
+ .cfi_endproc
46
+
47
+ MLD_ASM_FN_SIZE(poly_caddq_aarch64_asm)
48
+
49
+ #endif /* MLD_ARITH_BACKEND_AARCH64 && !MLD_CONFIG_MULTILEVEL_NO_SHARED */
50
+
51
+ #if defined(__ELF__)
52
+ .section .note.GNU-stack,"",%progbits
53
+ #endif
@@ -0,0 +1,55 @@
1
+ /*
2
+ * Copyright (c) The mldsa-native project authors
3
+ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
4
+ */
5
+ #include "../../../common.h"
6
+
7
+ #if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED)
8
+
9
+ /*
10
+ * WARNING: This file is auto-derived from the mldsa-native source file
11
+ * dev/aarch64_opt/src/poly_chknorm_aarch64_asm.S using scripts/simpasm. Do not modify it directly.
12
+ */
13
+
14
+ .text
15
+ .balign 4
16
+ .global MLD_ASM_NAMESPACE(poly_chknorm_aarch64_asm)
17
+ MLD_ASM_FN_SYMBOL(poly_chknorm_aarch64_asm)
18
+
19
+ .cfi_startproc
20
+ dup v20.4s, w1
21
+ eor v21.16b, v21.16b, v21.16b
22
+ mov x2, #0x10 // =16
23
+
24
+ Lpoly_chknorm_loop:
25
+ ldr q1, [x0, #0x10]
26
+ ldr q2, [x0, #0x20]
27
+ ldr q3, [x0, #0x30]
28
+ ldr q0, [x0], #0x40
29
+ abs v1.4s, v1.4s
30
+ cmge v1.4s, v1.4s, v20.4s
31
+ orr v21.16b, v21.16b, v1.16b
32
+ abs v2.4s, v2.4s
33
+ cmge v2.4s, v2.4s, v20.4s
34
+ orr v21.16b, v21.16b, v2.16b
35
+ abs v3.4s, v3.4s
36
+ cmge v3.4s, v3.4s, v20.4s
37
+ orr v21.16b, v21.16b, v3.16b
38
+ abs v0.4s, v0.4s
39
+ cmge v0.4s, v0.4s, v20.4s
40
+ orr v21.16b, v21.16b, v0.16b
41
+ subs x2, x2, #0x1
42
+ b.ne Lpoly_chknorm_loop
43
+ umaxv s21, v21.4s
44
+ fmov w0, s21
45
+ and w0, w0, #0x1
46
+ ret
47
+ .cfi_endproc
48
+
49
+ MLD_ASM_FN_SIZE(poly_chknorm_aarch64_asm)
50
+
51
+ #endif /* MLD_ARITH_BACKEND_AARCH64 && !MLD_CONFIG_MULTILEVEL_NO_SHARED */
52
+
53
+ #if defined(__ELF__)
54
+ .section .note.GNU-stack,"",%progbits
55
+ #endif
@@ -0,0 +1,86 @@
1
+ /*
2
+ * Copyright (c) The mldsa-native project authors
3
+ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
4
+ */
5
+ #include "../../../common.h"
6
+
7
+ #if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_NO_SIGN_API) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \
8
+ (defined(MLD_CONFIG_MULTILEVEL_WITH_SHARED) || (MLD_CONFIG_PARAMETER_SET == 65 || MLD_CONFIG_PARAMETER_SET == 87))
9
+
10
+ /*
11
+ * WARNING: This file is auto-derived from the mldsa-native source file
12
+ * dev/aarch64_opt/src/poly_decompose_32_aarch64_asm.S using scripts/simpasm. Do not modify it directly.
13
+ */
14
+
15
+ .text
16
+ .balign 4
17
+ .global MLD_ASM_NAMESPACE(poly_decompose_32_aarch64_asm)
18
+ MLD_ASM_FN_SYMBOL(poly_decompose_32_aarch64_asm)
19
+
20
+ .cfi_startproc
21
+ mov w4, #0xe001 // =57345
22
+ movk w4, #0x7f, lsl #16
23
+ dup v20.4s, w4
24
+ mov w5, #0xe100 // =57600
25
+ movk w5, #0x7b, lsl #16
26
+ dup v21.4s, w5
27
+ mov w7, #0xfe00 // =65024
28
+ movk w7, #0x7, lsl #16
29
+ dup v22.4s, w7
30
+ mov w11, #0x401 // =1025
31
+ movk w11, #0x4010, lsl #16
32
+ dup v23.4s, w11
33
+ mov x3, #0x10 // =16
34
+
35
+ Lpoly_decompose_32_loop:
36
+ ldr q0, [x1]
37
+ ldr q1, [x1, #0x10]
38
+ ldr q2, [x1, #0x20]
39
+ ldr q3, [x1, #0x30]
40
+ sqdmulh v5.4s, v1.4s, v23.4s
41
+ srshr v5.4s, v5.4s, #0x12
42
+ cmgt v24.4s, v1.4s, v21.4s
43
+ mls v1.4s, v5.4s, v22.4s
44
+ bic v5.16b, v5.16b, v24.16b
45
+ add v1.4s, v1.4s, v24.4s
46
+ sqdmulh v6.4s, v2.4s, v23.4s
47
+ srshr v6.4s, v6.4s, #0x12
48
+ cmgt v24.4s, v2.4s, v21.4s
49
+ mls v2.4s, v6.4s, v22.4s
50
+ bic v6.16b, v6.16b, v24.16b
51
+ add v2.4s, v2.4s, v24.4s
52
+ sqdmulh v7.4s, v3.4s, v23.4s
53
+ srshr v7.4s, v7.4s, #0x12
54
+ cmgt v24.4s, v3.4s, v21.4s
55
+ mls v3.4s, v7.4s, v22.4s
56
+ bic v7.16b, v7.16b, v24.16b
57
+ add v3.4s, v3.4s, v24.4s
58
+ sqdmulh v4.4s, v0.4s, v23.4s
59
+ srshr v4.4s, v4.4s, #0x12
60
+ cmgt v24.4s, v0.4s, v21.4s
61
+ mls v0.4s, v4.4s, v22.4s
62
+ bic v4.16b, v4.16b, v24.16b
63
+ add v0.4s, v0.4s, v24.4s
64
+ str q5, [x0, #0x10]
65
+ str q6, [x0, #0x20]
66
+ str q7, [x0, #0x30]
67
+ str q4, [x0], #0x40
68
+ str q1, [x1, #0x10]
69
+ str q2, [x1, #0x20]
70
+ str q3, [x1, #0x30]
71
+ str q0, [x1], #0x40
72
+ subs x3, x3, #0x1
73
+ b.ne Lpoly_decompose_32_loop
74
+ ret
75
+ .cfi_endproc
76
+
77
+ MLD_ASM_FN_SIZE(poly_decompose_32_aarch64_asm)
78
+
79
+ #endif /* MLD_ARITH_BACKEND_AARCH64 && !MLD_CONFIG_NO_SIGN_API && \
80
+ !MLD_CONFIG_MULTILEVEL_NO_SHARED && \
81
+ (MLD_CONFIG_MULTILEVEL_WITH_SHARED || MLD_CONFIG_PARAMETER_SET == 65 \
82
+ || MLD_CONFIG_PARAMETER_SET == 87) */
83
+
84
+ #if defined(__ELF__)
85
+ .section .note.GNU-stack,"",%progbits
86
+ #endif
@@ -0,0 +1,86 @@
1
+ /*
2
+ * Copyright (c) The mldsa-native project authors
3
+ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
4
+ */
5
+ #include "../../../common.h"
6
+
7
+ #if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_NO_SIGN_API) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \
8
+ (defined(MLD_CONFIG_MULTILEVEL_WITH_SHARED) || MLD_CONFIG_PARAMETER_SET == 44)
9
+
10
+ /*
11
+ * WARNING: This file is auto-derived from the mldsa-native source file
12
+ * dev/aarch64_opt/src/poly_decompose_88_aarch64_asm.S using scripts/simpasm. Do not modify it directly.
13
+ */
14
+
15
+ .text
16
+ .balign 4
17
+ .global MLD_ASM_NAMESPACE(poly_decompose_88_aarch64_asm)
18
+ MLD_ASM_FN_SYMBOL(poly_decompose_88_aarch64_asm)
19
+
20
+ .cfi_startproc
21
+ mov w4, #0xe001 // =57345
22
+ movk w4, #0x7f, lsl #16
23
+ dup v20.4s, w4
24
+ mov w5, #0x6c00 // =27648
25
+ movk w5, #0x7e, lsl #16
26
+ dup v21.4s, w5
27
+ mov w7, #0xe800 // =59392
28
+ movk w7, #0x2, lsl #16
29
+ dup v22.4s, w7
30
+ mov w11, #0x581 // =1409
31
+ movk w11, #0x5816, lsl #16
32
+ dup v23.4s, w11
33
+ mov x3, #0x10 // =16
34
+
35
+ Lpoly_decompose_88_loop:
36
+ ldr q0, [x1]
37
+ ldr q1, [x1, #0x10]
38
+ ldr q2, [x1, #0x20]
39
+ ldr q3, [x1, #0x30]
40
+ sqdmulh v5.4s, v1.4s, v23.4s
41
+ srshr v5.4s, v5.4s, #0x11
42
+ cmgt v24.4s, v1.4s, v21.4s
43
+ mls v1.4s, v5.4s, v22.4s
44
+ bic v5.16b, v5.16b, v24.16b
45
+ add v1.4s, v1.4s, v24.4s
46
+ sqdmulh v6.4s, v2.4s, v23.4s
47
+ srshr v6.4s, v6.4s, #0x11
48
+ cmgt v24.4s, v2.4s, v21.4s
49
+ mls v2.4s, v6.4s, v22.4s
50
+ bic v6.16b, v6.16b, v24.16b
51
+ add v2.4s, v2.4s, v24.4s
52
+ sqdmulh v7.4s, v3.4s, v23.4s
53
+ srshr v7.4s, v7.4s, #0x11
54
+ cmgt v24.4s, v3.4s, v21.4s
55
+ mls v3.4s, v7.4s, v22.4s
56
+ bic v7.16b, v7.16b, v24.16b
57
+ add v3.4s, v3.4s, v24.4s
58
+ sqdmulh v4.4s, v0.4s, v23.4s
59
+ srshr v4.4s, v4.4s, #0x11
60
+ cmgt v24.4s, v0.4s, v21.4s
61
+ mls v0.4s, v4.4s, v22.4s
62
+ bic v4.16b, v4.16b, v24.16b
63
+ add v0.4s, v0.4s, v24.4s
64
+ str q5, [x0, #0x10]
65
+ str q6, [x0, #0x20]
66
+ str q7, [x0, #0x30]
67
+ str q4, [x0], #0x40
68
+ str q1, [x1, #0x10]
69
+ str q2, [x1, #0x20]
70
+ str q3, [x1, #0x30]
71
+ str q0, [x1], #0x40
72
+ subs x3, x3, #0x1
73
+ b.ne Lpoly_decompose_88_loop
74
+ ret
75
+ .cfi_endproc
76
+
77
+ MLD_ASM_FN_SIZE(poly_decompose_88_aarch64_asm)
78
+
79
+ #endif /* MLD_ARITH_BACKEND_AARCH64 && !MLD_CONFIG_NO_SIGN_API && \
80
+ !MLD_CONFIG_MULTILEVEL_NO_SHARED && \
81
+ (MLD_CONFIG_MULTILEVEL_WITH_SHARED || MLD_CONFIG_PARAMETER_SET == 44) \
82
+ */
83
+
84
+ #if defined(__ELF__)
85
+ .section .note.GNU-stack,"",%progbits
86
+ #endif
@@ -0,0 +1,103 @@
1
+ /*
2
+ * Copyright (c) The mldsa-native project authors
3
+ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
4
+ */
5
+ #include "../../../common.h"
6
+
7
+ #if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_NO_VERIFY_API) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \
8
+ (defined(MLD_CONFIG_MULTILEVEL_WITH_SHARED) || (MLD_CONFIG_PARAMETER_SET == 65 || MLD_CONFIG_PARAMETER_SET == 87))
9
+
10
+ /*
11
+ * WARNING: This file is auto-derived from the mldsa-native source file
12
+ * dev/aarch64_opt/src/poly_use_hint_32_aarch64_asm.S using scripts/simpasm. Do not modify it directly.
13
+ */
14
+
15
+ .text
16
+ .balign 4
17
+ .global MLD_ASM_NAMESPACE(poly_use_hint_32_aarch64_asm)
18
+ MLD_ASM_FN_SYMBOL(poly_use_hint_32_aarch64_asm)
19
+
20
+ .cfi_startproc
21
+ mov w4, #0xe001 // =57345
22
+ movk w4, #0x7f, lsl #16
23
+ dup v20.4s, w4
24
+ mov w5, #0xe100 // =57600
25
+ movk w5, #0x7b, lsl #16
26
+ dup v21.4s, w5
27
+ mov w7, #0xfe00 // =65024
28
+ movk w7, #0x7, lsl #16
29
+ dup v22.4s, w7
30
+ mov w11, #0x401 // =1025
31
+ movk w11, #0x4010, lsl #16
32
+ dup v23.4s, w11
33
+ movi v24.4s, #0xf
34
+ mov x3, #0x10 // =16
35
+
36
+ Lpoly_use_hint_32_loop:
37
+ ldr q1, [x0, #0x10]
38
+ ldr q2, [x0, #0x20]
39
+ ldr q3, [x0, #0x30]
40
+ ldr q0, [x0]
41
+ ldr q5, [x1, #0x10]
42
+ ldr q6, [x1, #0x20]
43
+ ldr q7, [x1, #0x30]
44
+ ldr q4, [x1], #0x40
45
+ sqdmulh v17.4s, v1.4s, v23.4s
46
+ srshr v17.4s, v17.4s, #0x12
47
+ cmgt v25.4s, v1.4s, v21.4s
48
+ mls v1.4s, v17.4s, v22.4s
49
+ bic v17.16b, v17.16b, v25.16b
50
+ add v1.4s, v1.4s, v25.4s
51
+ cmle v1.4s, v1.4s, #0
52
+ orr v1.4s, #0x1
53
+ mla v17.4s, v1.4s, v5.4s
54
+ and v17.16b, v17.16b, v24.16b
55
+ sqdmulh v18.4s, v2.4s, v23.4s
56
+ srshr v18.4s, v18.4s, #0x12
57
+ cmgt v25.4s, v2.4s, v21.4s
58
+ mls v2.4s, v18.4s, v22.4s
59
+ bic v18.16b, v18.16b, v25.16b
60
+ add v2.4s, v2.4s, v25.4s
61
+ cmle v2.4s, v2.4s, #0
62
+ orr v2.4s, #0x1
63
+ mla v18.4s, v2.4s, v6.4s
64
+ and v18.16b, v18.16b, v24.16b
65
+ sqdmulh v19.4s, v3.4s, v23.4s
66
+ srshr v19.4s, v19.4s, #0x12
67
+ cmgt v25.4s, v3.4s, v21.4s
68
+ mls v3.4s, v19.4s, v22.4s
69
+ bic v19.16b, v19.16b, v25.16b
70
+ add v3.4s, v3.4s, v25.4s
71
+ cmle v3.4s, v3.4s, #0
72
+ orr v3.4s, #0x1
73
+ mla v19.4s, v3.4s, v7.4s
74
+ and v19.16b, v19.16b, v24.16b
75
+ sqdmulh v16.4s, v0.4s, v23.4s
76
+ srshr v16.4s, v16.4s, #0x12
77
+ cmgt v25.4s, v0.4s, v21.4s
78
+ mls v0.4s, v16.4s, v22.4s
79
+ bic v16.16b, v16.16b, v25.16b
80
+ add v0.4s, v0.4s, v25.4s
81
+ cmle v0.4s, v0.4s, #0
82
+ orr v0.4s, #0x1
83
+ mla v16.4s, v0.4s, v4.4s
84
+ and v16.16b, v16.16b, v24.16b
85
+ str q17, [x0, #0x10]
86
+ str q18, [x0, #0x20]
87
+ str q19, [x0, #0x30]
88
+ str q16, [x0], #0x40
89
+ subs x3, x3, #0x1
90
+ b.ne Lpoly_use_hint_32_loop
91
+ ret
92
+ .cfi_endproc
93
+
94
+ MLD_ASM_FN_SIZE(poly_use_hint_32_aarch64_asm)
95
+
96
+ #endif /* MLD_ARITH_BACKEND_AARCH64 && !MLD_CONFIG_NO_VERIFY_API && \
97
+ !MLD_CONFIG_MULTILEVEL_NO_SHARED && \
98
+ (MLD_CONFIG_MULTILEVEL_WITH_SHARED || MLD_CONFIG_PARAMETER_SET == 65 \
99
+ || MLD_CONFIG_PARAMETER_SET == 87) */
100
+
101
+ #if defined(__ELF__)
102
+ .section .note.GNU-stack,"",%progbits
103
+ #endif
@@ -0,0 +1,111 @@
1
+ /*
2
+ * Copyright (c) The mldsa-native project authors
3
+ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
4
+ */
5
+ #include "../../../common.h"
6
+
7
+ #if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_NO_VERIFY_API) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \
8
+ (defined(MLD_CONFIG_MULTILEVEL_WITH_SHARED) || MLD_CONFIG_PARAMETER_SET == 44)
9
+
10
+ /*
11
+ * WARNING: This file is auto-derived from the mldsa-native source file
12
+ * dev/aarch64_opt/src/poly_use_hint_88_aarch64_asm.S using scripts/simpasm. Do not modify it directly.
13
+ */
14
+
15
+ .text
16
+ .balign 4
17
+ .global MLD_ASM_NAMESPACE(poly_use_hint_88_aarch64_asm)
18
+ MLD_ASM_FN_SYMBOL(poly_use_hint_88_aarch64_asm)
19
+
20
+ .cfi_startproc
21
+ mov w4, #0xe001 // =57345
22
+ movk w4, #0x7f, lsl #16
23
+ dup v20.4s, w4
24
+ mov w5, #0x6c00 // =27648
25
+ movk w5, #0x7e, lsl #16
26
+ dup v21.4s, w5
27
+ mov w7, #0xe800 // =59392
28
+ movk w7, #0x2, lsl #16
29
+ dup v22.4s, w7
30
+ mov w11, #0x581 // =1409
31
+ movk w11, #0x5816, lsl #16
32
+ dup v23.4s, w11
33
+ movi v24.4s, #0x2b
34
+ mov x3, #0x10 // =16
35
+
36
+ Lpoly_use_hint_88_loop:
37
+ ldr q1, [x0, #0x10]
38
+ ldr q2, [x0, #0x20]
39
+ ldr q3, [x0, #0x30]
40
+ ldr q0, [x0]
41
+ ldr q5, [x1, #0x10]
42
+ ldr q6, [x1, #0x20]
43
+ ldr q7, [x1, #0x30]
44
+ ldr q4, [x1], #0x40
45
+ sqdmulh v17.4s, v1.4s, v23.4s
46
+ srshr v17.4s, v17.4s, #0x11
47
+ cmgt v25.4s, v1.4s, v21.4s
48
+ mls v1.4s, v17.4s, v22.4s
49
+ bic v17.16b, v17.16b, v25.16b
50
+ add v1.4s, v1.4s, v25.4s
51
+ cmle v1.4s, v1.4s, #0
52
+ orr v1.4s, #0x1
53
+ mla v17.4s, v1.4s, v5.4s
54
+ cmgt v25.4s, v17.4s, v24.4s
55
+ bic v17.16b, v17.16b, v25.16b
56
+ umin v17.4s, v17.4s, v24.4s
57
+ sqdmulh v18.4s, v2.4s, v23.4s
58
+ srshr v18.4s, v18.4s, #0x11
59
+ cmgt v25.4s, v2.4s, v21.4s
60
+ mls v2.4s, v18.4s, v22.4s
61
+ bic v18.16b, v18.16b, v25.16b
62
+ add v2.4s, v2.4s, v25.4s
63
+ cmle v2.4s, v2.4s, #0
64
+ orr v2.4s, #0x1
65
+ mla v18.4s, v2.4s, v6.4s
66
+ cmgt v25.4s, v18.4s, v24.4s
67
+ bic v18.16b, v18.16b, v25.16b
68
+ umin v18.4s, v18.4s, v24.4s
69
+ sqdmulh v19.4s, v3.4s, v23.4s
70
+ srshr v19.4s, v19.4s, #0x11
71
+ cmgt v25.4s, v3.4s, v21.4s
72
+ mls v3.4s, v19.4s, v22.4s
73
+ bic v19.16b, v19.16b, v25.16b
74
+ add v3.4s, v3.4s, v25.4s
75
+ cmle v3.4s, v3.4s, #0
76
+ orr v3.4s, #0x1
77
+ mla v19.4s, v3.4s, v7.4s
78
+ cmgt v25.4s, v19.4s, v24.4s
79
+ bic v19.16b, v19.16b, v25.16b
80
+ umin v19.4s, v19.4s, v24.4s
81
+ sqdmulh v16.4s, v0.4s, v23.4s
82
+ srshr v16.4s, v16.4s, #0x11
83
+ cmgt v25.4s, v0.4s, v21.4s
84
+ mls v0.4s, v16.4s, v22.4s
85
+ bic v16.16b, v16.16b, v25.16b
86
+ add v0.4s, v0.4s, v25.4s
87
+ cmle v0.4s, v0.4s, #0
88
+ orr v0.4s, #0x1
89
+ mla v16.4s, v0.4s, v4.4s
90
+ cmgt v25.4s, v16.4s, v24.4s
91
+ bic v16.16b, v16.16b, v25.16b
92
+ umin v16.4s, v16.4s, v24.4s
93
+ str q17, [x0, #0x10]
94
+ str q18, [x0, #0x20]
95
+ str q19, [x0, #0x30]
96
+ str q16, [x0], #0x40
97
+ subs x3, x3, #0x1
98
+ b.ne Lpoly_use_hint_88_loop
99
+ ret
100
+ .cfi_endproc
101
+
102
+ MLD_ASM_FN_SIZE(poly_use_hint_88_aarch64_asm)
103
+
104
+ #endif /* MLD_ARITH_BACKEND_AARCH64 && !MLD_CONFIG_NO_VERIFY_API && \
105
+ !MLD_CONFIG_MULTILEVEL_NO_SHARED && \
106
+ (MLD_CONFIG_MULTILEVEL_WITH_SHARED || MLD_CONFIG_PARAMETER_SET == 44) \
107
+ */
108
+
109
+ #if defined(__ELF__)
110
+ .section .note.GNU-stack,"",%progbits
111
+ #endif
@@ -0,0 +1,75 @@
1
+ /*
2
+ * Copyright (c) The mldsa-native project authors
3
+ * Copyright (c) The mlkem-native project authors
4
+ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
5
+ */
6
+
7
+ #include "../../../common.h"
8
+ #if defined(MLD_ARITH_BACKEND_AARCH64) && \
9
+ (!defined(MLD_CONFIG_NO_SIGN_API) || !defined(MLD_CONFIG_NO_VERIFY_API)) && \
10
+ !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \
11
+ (defined(MLD_CONFIG_MULTILEVEL_WITH_SHARED) || MLD_CONFIG_PARAMETER_SET == 44)
12
+
13
+ /*
14
+ * WARNING: This file is auto-derived from the mldsa-native source file
15
+ * dev/aarch64_opt/src/polyz_unpack_17_aarch64_asm.S using scripts/simpasm. Do not modify it directly.
16
+ */
17
+
18
+ .text
19
+ .balign 4
20
+ .global MLD_ASM_NAMESPACE(polyz_unpack_17_aarch64_asm)
21
+ MLD_ASM_FN_SYMBOL(polyz_unpack_17_aarch64_asm)
22
+
23
+ .cfi_startproc
24
+ ldr q24, [x2]
25
+ ldr q25, [x2, #0x10]
26
+ ldr q26, [x2, #0x20]
27
+ ldr q27, [x2, #0x30]
28
+ mov x3, #0xfe00000000 // =1090921693184
29
+ mov v28.d[0], x3
30
+ mov x3, #0xfc // =252
31
+ movk x3, #0xfa, lsl #32
32
+ mov v28.d[1], x3
33
+ movi v29.4s, #0x3, msl #16
34
+ movi v30.4s, #0x2, lsl #16
35
+ mov x9, #0x10 // =16
36
+
37
+ Lpolyz_unpack_17_loop:
38
+ ld1 { v0.16b, v1.16b }, [x1]
39
+ add x1, x1, #0x14
40
+ ld1 { v2.16b }, [x1], #16
41
+ tbl v4.16b, { v0.16b }, v24.16b
42
+ tbl v5.16b, { v0.16b, v1.16b }, v25.16b
43
+ tbl v6.16b, { v1.16b }, v26.16b
44
+ tbl v7.16b, { v1.16b, v2.16b }, v27.16b
45
+ ushl v4.4s, v4.4s, v28.4s
46
+ and v4.16b, v4.16b, v29.16b
47
+ sub v4.4s, v30.4s, v4.4s
48
+ ushl v5.4s, v5.4s, v28.4s
49
+ and v5.16b, v5.16b, v29.16b
50
+ sub v5.4s, v30.4s, v5.4s
51
+ ushl v6.4s, v6.4s, v28.4s
52
+ and v6.16b, v6.16b, v29.16b
53
+ sub v6.4s, v30.4s, v6.4s
54
+ ushl v7.4s, v7.4s, v28.4s
55
+ and v7.16b, v7.16b, v29.16b
56
+ sub v7.4s, v30.4s, v7.4s
57
+ str q5, [x0, #0x10]
58
+ str q6, [x0, #0x20]
59
+ str q7, [x0, #0x30]
60
+ str q4, [x0], #0x40
61
+ subs x9, x9, #0x1
62
+ b.ne Lpolyz_unpack_17_loop
63
+ ret
64
+ .cfi_endproc
65
+
66
+ MLD_ASM_FN_SIZE(polyz_unpack_17_aarch64_asm)
67
+
68
+ #endif /* MLD_ARITH_BACKEND_AARCH64 && (!MLD_CONFIG_NO_SIGN_API || \
69
+ !MLD_CONFIG_NO_VERIFY_API) && !MLD_CONFIG_MULTILEVEL_NO_SHARED && \
70
+ (MLD_CONFIG_MULTILEVEL_WITH_SHARED || MLD_CONFIG_PARAMETER_SET == 44) \
71
+ */
72
+
73
+ #if defined(__ELF__)
74
+ .section .note.GNU-stack,"",%progbits
75
+ #endif