pq_crypto 0.3.2 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (328) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ci.yml +56 -0
  3. data/CHANGELOG.md +62 -0
  4. data/GET_STARTED.md +366 -40
  5. data/README.md +76 -233
  6. data/SECURITY.md +107 -82
  7. data/ext/pqcrypto/extconf.rb +169 -87
  8. data/ext/pqcrypto/mldsa_api.h +1 -48
  9. data/ext/pqcrypto/mlkem_api.h +1 -18
  10. data/ext/pqcrypto/pq_externalmu.c +89 -204
  11. data/ext/pqcrypto/pqcrypto_native_api.h +129 -0
  12. data/ext/pqcrypto/pqcrypto_ruby_secure.c +484 -84
  13. data/ext/pqcrypto/pqcrypto_secure.c +203 -78
  14. data/ext/pqcrypto/pqcrypto_secure.h +53 -14
  15. data/ext/pqcrypto/pqcrypto_version.h +7 -0
  16. data/ext/pqcrypto/randombytes.h +9 -0
  17. data/ext/pqcrypto/vendor/.vendored +10 -5
  18. data/ext/pqcrypto/vendor/mldsa-native/BUILDING.md +105 -0
  19. data/ext/pqcrypto/vendor/mldsa-native/LICENSE +286 -0
  20. data/ext/pqcrypto/vendor/mldsa-native/META.yml +24 -0
  21. data/ext/pqcrypto/vendor/mldsa-native/README.md +221 -0
  22. data/ext/pqcrypto/vendor/mldsa-native/SECURITY.md +8 -0
  23. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.c +721 -0
  24. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.h +975 -0
  25. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_asm.S +724 -0
  26. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_config.h +723 -0
  27. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/cbmc.h +166 -0
  28. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/common.h +321 -0
  29. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/ct.c +21 -0
  30. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/ct.h +385 -0
  31. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/debug.c +73 -0
  32. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/debug.h +130 -0
  33. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.c +277 -0
  34. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.h +244 -0
  35. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.c +182 -0
  36. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.h +117 -0
  37. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.c +438 -0
  38. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.h +105 -0
  39. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/auto.h +71 -0
  40. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/fips202_native_aarch64.h +62 -0
  41. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_asm.S +376 -0
  42. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_asm.S +204 -0
  43. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_asm.S +259 -0
  44. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_asm.S +1077 -0
  45. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S +987 -0
  46. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccakf1600_round_constants.c +41 -0
  47. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_scalar.h +26 -0
  48. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_v84a.h +35 -0
  49. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x2_v84a.h +37 -0
  50. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_scalar.h +27 -0
  51. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_v84a_scalar.h +36 -0
  52. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/api.h +69 -0
  53. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/README.md +10 -0
  54. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/mve.h +32 -0
  55. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/fips202_native_armv81m.h +20 -0
  56. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S +638 -0
  57. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.c +136 -0
  58. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccakf1600_round_constants.c +52 -0
  59. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/auto.h +29 -0
  60. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.c +488 -0
  61. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.h +16 -0
  62. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/xkcp.h +31 -0
  63. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/meta.h +247 -0
  64. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/aarch64_zetas.c +231 -0
  65. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/arith_native_aarch64.h +150 -0
  66. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/intt.S +753 -0
  67. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l4.S +129 -0
  68. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l5.S +145 -0
  69. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l7.S +177 -0
  70. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/ntt.S +653 -0
  71. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/pointwise_montgomery.S +79 -0
  72. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_caddq_asm.S +53 -0
  73. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_chknorm_asm.S +55 -0
  74. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_32_asm.S +85 -0
  75. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_88_asm.S +85 -0
  76. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_32_asm.S +102 -0
  77. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_88_asm.S +110 -0
  78. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_17_asm.S +72 -0
  79. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_19_asm.S +69 -0
  80. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_table.c +40 -0
  81. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_asm.S +189 -0
  82. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta2_asm.S +135 -0
  83. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta4_asm.S +128 -0
  84. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta_table.c +543 -0
  85. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_table.c +62 -0
  86. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/api.h +649 -0
  87. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/meta.h +23 -0
  88. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/meta.h +315 -0
  89. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/arith_native_x86_64.h +124 -0
  90. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.c +157 -0
  91. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.h +27 -0
  92. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/intt.S +2311 -0
  93. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/ntt.S +2383 -0
  94. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/nttunpack.S +239 -0
  95. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise.S +131 -0
  96. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l4.S +139 -0
  97. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l5.S +155 -0
  98. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l7.S +187 -0
  99. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_caddq_avx2.c +61 -0
  100. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_chknorm_avx2.c +52 -0
  101. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_32_avx2.c +155 -0
  102. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_88_avx2.c +155 -0
  103. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_32_avx2.c +102 -0
  104. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_88_avx2.c +104 -0
  105. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_17_avx2.c +91 -0
  106. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_19_avx2.c +93 -0
  107. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_avx2.c +126 -0
  108. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta2_avx2.c +155 -0
  109. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta4_avx2.c +139 -0
  110. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_table.c +160 -0
  111. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.c +293 -0
  112. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.h +224 -0
  113. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/params.h +77 -0
  114. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.c +991 -0
  115. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.h +393 -0
  116. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.c +946 -0
  117. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.h +360 -0
  118. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.c +877 -0
  119. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.h +725 -0
  120. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/randombytes.h +26 -0
  121. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/reduce.h +139 -0
  122. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/rounding.h +249 -0
  123. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.c +1511 -0
  124. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.h +806 -0
  125. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/symmetric.h +68 -0
  126. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sys.h +268 -0
  127. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/zetas.inc +55 -0
  128. data/ext/pqcrypto/vendor/mlkem-native/BUILDING.md +104 -0
  129. data/ext/pqcrypto/vendor/mlkem-native/LICENSE +294 -0
  130. data/ext/pqcrypto/vendor/mlkem-native/META.yml +30 -0
  131. data/ext/pqcrypto/vendor/mlkem-native/README.md +223 -0
  132. data/ext/pqcrypto/vendor/mlkem-native/RELEASE.md +86 -0
  133. data/ext/pqcrypto/vendor/mlkem-native/SECURITY.md +8 -0
  134. data/ext/pqcrypto/vendor/mlkem-native/mlkem/README.md +23 -0
  135. data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native.c +660 -0
  136. data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native.h +538 -0
  137. data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native_asm.S +681 -0
  138. data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native_config.h +709 -0
  139. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/cbmc.h +174 -0
  140. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/common.h +274 -0
  141. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/compress.c +717 -0
  142. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/compress.h +688 -0
  143. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/debug.c +64 -0
  144. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/debug.h +128 -0
  145. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202.c +251 -0
  146. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202.h +158 -0
  147. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202x4.c +208 -0
  148. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202x4.h +80 -0
  149. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/keccakf1600.c +463 -0
  150. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/keccakf1600.h +98 -0
  151. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/auto.h +70 -0
  152. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/fips202_native_aarch64.h +69 -0
  153. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_asm.S +375 -0
  154. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_asm.S +203 -0
  155. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_asm.S +258 -0
  156. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_asm.S +1076 -0
  157. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S +986 -0
  158. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccakf1600_round_constants.c +46 -0
  159. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x1_scalar.h +25 -0
  160. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x1_v84a.h +34 -0
  161. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x2_v84a.h +35 -0
  162. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x4_v8a_scalar.h +26 -0
  163. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x4_v8a_v84a_scalar.h +35 -0
  164. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/api.h +117 -0
  165. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/README.md +10 -0
  166. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/mve.h +79 -0
  167. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/fips202_native_armv81m.h +35 -0
  168. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S +667 -0
  169. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.c +40 -0
  170. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/keccakf1600_round_constants.c +51 -0
  171. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/state_extract_bytes_x4_mve.S +290 -0
  172. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/state_xor_bytes_x4_mve.S +314 -0
  173. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/auto.h +28 -0
  174. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/keccak_f1600_x4_avx2.h +33 -0
  175. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/src/fips202_native_x86_64.h +41 -0
  176. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/src/keccak_f1600_x4_avx2.S +451 -0
  177. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/src/keccakf1600_constants.c +51 -0
  178. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/indcpa.c +622 -0
  179. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/indcpa.h +156 -0
  180. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/kem.c +446 -0
  181. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/kem.h +326 -0
  182. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/README.md +16 -0
  183. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/meta.h +122 -0
  184. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/aarch64_zetas.c +174 -0
  185. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/arith_native_aarch64.h +177 -0
  186. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/intt.S +628 -0
  187. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/ntt.S +562 -0
  188. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_mulcache_compute_asm.S +127 -0
  189. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_reduce_asm.S +150 -0
  190. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_tobytes_asm.S +117 -0
  191. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_tomont_asm.S +98 -0
  192. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k2.S +261 -0
  193. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k3.S +314 -0
  194. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k4.S +368 -0
  195. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/rej_uniform_asm.S +226 -0
  196. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/rej_uniform_table.c +542 -0
  197. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/api.h +637 -0
  198. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/meta.h +25 -0
  199. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/README.md +11 -0
  200. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/meta.h +128 -0
  201. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/arith_native_riscv64.h +45 -0
  202. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_debug.c +81 -0
  203. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_debug.h +145 -0
  204. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_izetas.inc +27 -0
  205. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_poly.c +805 -0
  206. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_zetas.inc +27 -0
  207. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_zetas_basemul.inc +39 -0
  208. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/README.md +4 -0
  209. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/meta.h +304 -0
  210. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/arith_native_x86_64.h +309 -0
  211. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/compress_consts.c +94 -0
  212. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/compress_consts.h +45 -0
  213. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/consts.c +102 -0
  214. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/consts.h +25 -0
  215. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/intt.S +719 -0
  216. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/mulcache_compute.S +90 -0
  217. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/ntt.S +639 -0
  218. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/nttfrombytes.S +193 -0
  219. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/ntttobytes.S +181 -0
  220. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/nttunpack.S +174 -0
  221. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d10.S +382 -0
  222. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d11.S +448 -0
  223. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d4.S +163 -0
  224. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d5.S +220 -0
  225. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d10.S +228 -0
  226. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d11.S +277 -0
  227. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d4.S +180 -0
  228. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d5.S +192 -0
  229. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/polyvec_basemul_acc_montgomery_cached_asm_k2.S +502 -0
  230. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/polyvec_basemul_acc_montgomery_cached_asm_k3.S +750 -0
  231. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/polyvec_basemul_acc_montgomery_cached_asm_k4.S +998 -0
  232. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/reduce.S +218 -0
  233. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/rej_uniform_asm.S +103 -0
  234. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/rej_uniform_table.c +544 -0
  235. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/tomont.S +155 -0
  236. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/params.h +76 -0
  237. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly.c +572 -0
  238. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly.h +317 -0
  239. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly_k.c +502 -0
  240. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly_k.h +668 -0
  241. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/randombytes.h +60 -0
  242. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/sampling.c +362 -0
  243. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/sampling.h +118 -0
  244. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/symmetric.h +70 -0
  245. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/sys.h +260 -0
  246. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/verify.c +20 -0
  247. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/verify.h +464 -0
  248. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/zetas.inc +30 -0
  249. data/lib/pq_crypto/algorithm_registry.rb +200 -0
  250. data/lib/pq_crypto/hybrid_kem.rb +1 -12
  251. data/lib/pq_crypto/kem.rb +104 -13
  252. data/lib/pq_crypto/pkcs8.rb +387 -0
  253. data/lib/pq_crypto/serialization.rb +1 -14
  254. data/lib/pq_crypto/signature.rb +123 -17
  255. data/lib/pq_crypto/spki.rb +131 -0
  256. data/lib/pq_crypto/version.rb +1 -1
  257. data/lib/pq_crypto.rb +79 -20
  258. data/script/vendor_libs.rb +88 -155
  259. metadata +241 -73
  260. data/ext/pqcrypto/vendor/pqclean/common/aes.c +0 -639
  261. data/ext/pqcrypto/vendor/pqclean/common/aes.h +0 -64
  262. data/ext/pqcrypto/vendor/pqclean/common/compat.h +0 -73
  263. data/ext/pqcrypto/vendor/pqclean/common/crypto_declassify.h +0 -7
  264. data/ext/pqcrypto/vendor/pqclean/common/fips202.c +0 -928
  265. data/ext/pqcrypto/vendor/pqclean/common/fips202.h +0 -166
  266. data/ext/pqcrypto/vendor/pqclean/common/keccak2x/feat.S +0 -168
  267. data/ext/pqcrypto/vendor/pqclean/common/keccak2x/fips202x2.c +0 -684
  268. data/ext/pqcrypto/vendor/pqclean/common/keccak2x/fips202x2.h +0 -60
  269. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/KeccakP-1600-times4-SIMD256.c +0 -1028
  270. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/KeccakP-1600-times4-SnP.h +0 -50
  271. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/KeccakP-1600-unrolling.macros +0 -198
  272. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/Makefile +0 -8
  273. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/Makefile.Microsoft_nmake +0 -8
  274. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/SIMD256-config.h +0 -3
  275. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/align.h +0 -34
  276. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/brg_endian.h +0 -142
  277. data/ext/pqcrypto/vendor/pqclean/common/nistseedexpander.c +0 -101
  278. data/ext/pqcrypto/vendor/pqclean/common/nistseedexpander.h +0 -39
  279. data/ext/pqcrypto/vendor/pqclean/common/randombytes.c +0 -355
  280. data/ext/pqcrypto/vendor/pqclean/common/randombytes.h +0 -27
  281. data/ext/pqcrypto/vendor/pqclean/common/sha2.c +0 -769
  282. data/ext/pqcrypto/vendor/pqclean/common/sha2.h +0 -173
  283. data/ext/pqcrypto/vendor/pqclean/common/sp800-185.c +0 -156
  284. data/ext/pqcrypto/vendor/pqclean/common/sp800-185.h +0 -27
  285. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/LICENSE +0 -5
  286. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/Makefile +0 -19
  287. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/Makefile.Microsoft_nmake +0 -23
  288. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/api.h +0 -18
  289. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/cbd.c +0 -83
  290. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/cbd.h +0 -11
  291. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/indcpa.c +0 -327
  292. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/indcpa.h +0 -22
  293. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/kem.c +0 -164
  294. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/kem.h +0 -23
  295. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/ntt.c +0 -146
  296. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/ntt.h +0 -14
  297. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/params.h +0 -36
  298. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/poly.c +0 -299
  299. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/poly.h +0 -37
  300. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/polyvec.c +0 -188
  301. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/polyvec.h +0 -26
  302. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/reduce.c +0 -41
  303. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/reduce.h +0 -13
  304. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/symmetric-shake.c +0 -71
  305. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/symmetric.h +0 -30
  306. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/verify.c +0 -67
  307. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/verify.h +0 -13
  308. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/LICENSE +0 -5
  309. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/Makefile +0 -19
  310. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/Makefile.Microsoft_nmake +0 -23
  311. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/api.h +0 -50
  312. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/ntt.c +0 -98
  313. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/ntt.h +0 -10
  314. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/packing.c +0 -261
  315. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/packing.h +0 -31
  316. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/params.h +0 -44
  317. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/poly.c +0 -799
  318. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/poly.h +0 -52
  319. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/polyvec.c +0 -415
  320. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/polyvec.h +0 -65
  321. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/reduce.c +0 -69
  322. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/reduce.h +0 -17
  323. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/rounding.c +0 -92
  324. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/rounding.h +0 -14
  325. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/sign.c +0 -407
  326. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/sign.h +0 -47
  327. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/symmetric-shake.c +0 -26
  328. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/symmetric.h +0 -34
@@ -0,0 +1,653 @@
1
+ /* Copyright (c) 2022 Arm Limited
2
+ * Copyright (c) 2022 Hanno Becker
3
+ * Copyright (c) 2023 Amin Abdulrahman, Matthias Kannwischer
4
+ * Copyright (c) The mlkem-native project authors
5
+ * Copyright (c) The mldsa-native project authors
6
+ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
7
+ */
8
+
9
+ /* References
10
+ * ==========
11
+ *
12
+ * - [NeonNTT]
13
+ * Neon NTT: Faster Dilithium, Kyber, and Saber on Cortex-A72 and Apple M1
14
+ * Becker, Hwang, Kannwischer, Yang, Yang
15
+ * https://eprint.iacr.org/2021/986
16
+ *
17
+ * - [SLOTHY_Paper]
18
+ * Fast and Clean: Auditable high-performance assembly via constraint solving
19
+ * Abdulrahman, Becker, Kannwischer, Klein
20
+ * https://eprint.iacr.org/2022/1303
21
+ */
22
+
23
+ /* AArch64 ML-DSA forward NTT following @[NeonNTT] and @[SLOTHY_Paper] */
24
+
25
+ #include "../../../common.h"
26
+ #if defined(MLD_ARITH_BACKEND_AARCH64) && !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED)
27
+
28
+ /*
29
+ * WARNING: This file is auto-derived from the mldsa-native source file
30
+ * dev/aarch64_opt/src/ntt.S using scripts/simpasm. Do not modify it directly.
31
+ */
32
+
33
+ #if defined(__ELF__)
34
+ .section .note.GNU-stack,"",@progbits
35
+ #endif
36
+
37
+ .text
38
+ .balign 4
39
+ .global MLD_ASM_NAMESPACE(ntt_asm)
40
+ MLD_ASM_FN_SYMBOL(ntt_asm)
41
+
42
+ .cfi_startproc
43
+ sub sp, sp, #0x40
44
+ .cfi_adjust_cfa_offset 0x40
45
+ stp d8, d9, [sp]
46
+ .cfi_rel_offset d8, 0x0
47
+ .cfi_rel_offset d9, 0x8
48
+ stp d10, d11, [sp, #0x10]
49
+ .cfi_rel_offset d10, 0x10
50
+ .cfi_rel_offset d11, 0x18
51
+ stp d12, d13, [sp, #0x20]
52
+ .cfi_rel_offset d12, 0x20
53
+ .cfi_rel_offset d13, 0x28
54
+ stp d14, d15, [sp, #0x30]
55
+ .cfi_rel_offset d14, 0x30
56
+ .cfi_rel_offset d15, 0x38
57
+ mov w5, #0xe001 // =57345
58
+ movk w5, #0x7f, lsl #16
59
+ dup v7.4s, w5
60
+ mov x3, x0
61
+ mov x4, #0x8 // =8
62
+ ldr q0, [x1], #0x40
63
+ ldur q1, [x1, #-0x30]
64
+ ldur q2, [x1, #-0x20]
65
+ ldur q3, [x1, #-0x10]
66
+ ldr q23, [x0, #0x390]
67
+ ldr q13, [x0, #0x380]
68
+ ldr q22, [x0, #0x80]
69
+ ldr q26, [x0, #0x190]
70
+ ldr q8, [x0, #0x280]
71
+ ldr q6, [x0, #0x210]
72
+ mul v10.4s, v13.4s, v0.s[0]
73
+ sqrdmulh v13.4s, v13.4s, v0.s[1]
74
+ mul v12.4s, v8.4s, v0.s[0]
75
+ sqrdmulh v27.4s, v8.4s, v0.s[1]
76
+ mul v4.4s, v6.4s, v0.s[0]
77
+ mls v10.4s, v13.4s, v7.s[0]
78
+ ldr q13, [x0, #0x180]
79
+ sqrdmulh v14.4s, v23.4s, v0.s[1]
80
+ mls v12.4s, v27.4s, v7.s[0]
81
+ add v31.4s, v13.4s, v10.4s
82
+ sub v13.4s, v13.4s, v10.4s
83
+ mul v10.4s, v23.4s, v0.s[0]
84
+ sqrdmulh v8.4s, v13.4s, v1.s[1]
85
+ sub v18.4s, v22.4s, v12.4s
86
+ mls v10.4s, v14.4s, v7.s[0]
87
+ mul v13.4s, v13.4s, v1.s[0]
88
+ mls v13.4s, v8.4s, v7.s[0]
89
+ sub v29.4s, v26.4s, v10.4s
90
+ add v25.4s, v26.4s, v10.4s
91
+ mul v10.4s, v31.4s, v0.s[2]
92
+ mul v14.4s, v25.4s, v0.s[2]
93
+ add v17.4s, v18.4s, v13.4s
94
+ sub v15.4s, v18.4s, v13.4s
95
+ sqrdmulh v13.4s, v31.4s, v0.s[3]
96
+ sqrdmulh v20.4s, v15.4s, v3.s[1]
97
+ sqrdmulh v5.4s, v17.4s, v2.s[3]
98
+ mls v10.4s, v13.4s, v7.s[0]
99
+ ldr q13, [x0, #0x300]
100
+ mul v18.4s, v17.4s, v2.s[2]
101
+ add v31.4s, v22.4s, v12.4s
102
+ mul v23.4s, v15.4s, v3.s[0]
103
+ ldr q17, [x0, #0x90]
104
+ add v19.4s, v31.4s, v10.4s
105
+ sub v16.4s, v31.4s, v10.4s
106
+ mul v10.4s, v13.4s, v0.s[0]
107
+ sqrdmulh v13.4s, v13.4s, v0.s[1]
108
+ sqrdmulh v27.4s, v16.4s, v2.s[1]
109
+ mul v11.4s, v16.4s, v2.s[0]
110
+ mls v10.4s, v13.4s, v7.s[0]
111
+ ldr q13, [x0, #0x290]
112
+ ldr q22, [x0, #0x100]
113
+ mls v11.4s, v27.4s, v7.s[0]
114
+ sqrdmulh v15.4s, v13.4s, v0.s[1]
115
+ sub v12.4s, v22.4s, v10.4s
116
+ add v30.4s, v22.4s, v10.4s
117
+ mul v10.4s, v13.4s, v0.s[0]
118
+ ldr q28, [x0]
119
+ sqrdmulh v13.4s, v25.4s, v0.s[3]
120
+ sqrdmulh v27.4s, v30.4s, v0.s[3]
121
+ mls v10.4s, v15.4s, v7.s[0]
122
+ mls v14.4s, v13.4s, v7.s[0]
123
+ ldr q13, [x0, #0x200]
124
+ sqrdmulh v25.4s, v12.4s, v1.s[1]
125
+ add v24.4s, v17.4s, v10.4s
126
+ sub v21.4s, v17.4s, v10.4s
127
+ sqrdmulh v8.4s, v13.4s, v0.s[1]
128
+ sub v9.4s, v24.4s, v14.4s
129
+ mul v26.4s, v12.4s, v1.s[0]
130
+ mul v13.4s, v13.4s, v0.s[0]
131
+ mls v13.4s, v8.4s, v7.s[0]
132
+ mul v8.4s, v30.4s, v0.s[2]
133
+ mls v8.4s, v27.4s, v7.s[0]
134
+ add v16.4s, v28.4s, v13.4s
135
+ sub v10.4s, v28.4s, v13.4s
136
+ mls v26.4s, v25.4s, v7.s[0]
137
+ sqrdmulh v12.4s, v19.4s, v1.s[3]
138
+ sub v25.4s, v16.4s, v8.4s
139
+ mls v23.4s, v20.4s, v7.s[0]
140
+ sub v22.4s, v25.4s, v11.4s
141
+ sqrdmulh v20.4s, v9.4s, v2.s[1]
142
+ sub v15.4s, v10.4s, v26.4s
143
+ sub x4, x4, #0x2
144
+
145
+ Lntt_layer123_start:
146
+ add v31.4s, v10.4s, v26.4s
147
+ mul v17.4s, v19.4s, v1.s[2]
148
+ add v26.4s, v15.4s, v23.4s
149
+ ldr q30, [x0, #0x2a0]
150
+ sub v13.4s, v15.4s, v23.4s
151
+ mul v23.4s, v29.4s, v1.s[0]
152
+ add v25.4s, v25.4s, v11.4s
153
+ str q22, [x0, #0x180]
154
+ mul v11.4s, v9.4s, v2.s[0]
155
+ str q13, [x0, #0x380]
156
+ ldr q28, [x0, #0x10]
157
+ add v10.4s, v16.4s, v8.4s
158
+ mls v17.4s, v12.4s, v7.s[0]
159
+ ldr q13, [x0, #0x3a0]
160
+ str q26, [x0, #0x300]
161
+ sqrdmulh v27.4s, v30.4s, v0.s[1]
162
+ mls v18.4s, v5.4s, v7.s[0]
163
+ ldr q9, [x0, #0x1a0]
164
+ sub v16.4s, v10.4s, v17.4s
165
+ add v15.4s, v10.4s, v17.4s
166
+ sqrdmulh v10.4s, v6.4s, v0.s[1]
167
+ str q16, [x0, #0x80]
168
+ str q15, [x0], #0x10
169
+ sqrdmulh v19.4s, v13.4s, v0.s[1]
170
+ sub v15.4s, v31.4s, v18.4s
171
+ mul v8.4s, v13.4s, v0.s[0]
172
+ add v26.4s, v31.4s, v18.4s
173
+ str q15, [x0, #0x270]
174
+ sqrdmulh v13.4s, v29.4s, v1.s[1]
175
+ str q26, [x0, #0x1f0]
176
+ mls v8.4s, v19.4s, v7.s[0]
177
+ mls v11.4s, v20.4s, v7.s[0]
178
+ mls v23.4s, v13.4s, v7.s[0]
179
+ add v22.4s, v9.4s, v8.4s
180
+ ldr q6, [x0, #0x210]
181
+ sub v29.4s, v9.4s, v8.4s
182
+ mul v17.4s, v30.4s, v0.s[0]
183
+ ldr q9, [x0, #0x300]
184
+ sqrdmulh v13.4s, v22.4s, v0.s[3]
185
+ add v18.4s, v21.4s, v23.4s
186
+ mls v4.4s, v10.4s, v7.s[0]
187
+ sub v31.4s, v21.4s, v23.4s
188
+ sqrdmulh v16.4s, v31.4s, v3.s[1]
189
+ add v19.4s, v24.4s, v14.4s
190
+ mul v14.4s, v22.4s, v0.s[2]
191
+ sub v10.4s, v28.4s, v4.4s
192
+ mls v14.4s, v13.4s, v7.s[0]
193
+ ldr q13, [x0, #0x100]
194
+ sqrdmulh v22.4s, v9.4s, v0.s[1]
195
+ mul v8.4s, v9.4s, v0.s[0]
196
+ mul v23.4s, v31.4s, v3.s[0]
197
+ mls v8.4s, v22.4s, v7.s[0]
198
+ mls v23.4s, v16.4s, v7.s[0]
199
+ add v16.4s, v28.4s, v4.4s
200
+ ldr q22, [x0, #0x90]
201
+ mul v4.4s, v6.4s, v0.s[0]
202
+ mls v17.4s, v27.4s, v7.s[0]
203
+ add v21.4s, v13.4s, v8.4s
204
+ sub v27.4s, v13.4s, v8.4s
205
+ sqrdmulh v31.4s, v21.4s, v0.s[3]
206
+ str q25, [x0, #0xf0]
207
+ mul v8.4s, v21.4s, v0.s[2]
208
+ add v24.4s, v22.4s, v17.4s
209
+ sub v21.4s, v22.4s, v17.4s
210
+ sqrdmulh v5.4s, v18.4s, v2.s[3]
211
+ mls v8.4s, v31.4s, v7.s[0]
212
+ sub v9.4s, v24.4s, v14.4s
213
+ sqrdmulh v20.4s, v27.4s, v1.s[1]
214
+ mul v26.4s, v27.4s, v1.s[0]
215
+ sub v25.4s, v16.4s, v8.4s
216
+ mul v18.4s, v18.4s, v2.s[2]
217
+ sub v22.4s, v25.4s, v11.4s
218
+ mls v26.4s, v20.4s, v7.s[0]
219
+ sqrdmulh v20.4s, v9.4s, v2.s[1]
220
+ sqrdmulh v12.4s, v19.4s, v1.s[3]
221
+ sub v15.4s, v10.4s, v26.4s
222
+ subs x4, x4, #0x1
223
+ cbnz x4, Lntt_layer123_start
224
+ add v13.4s, v10.4s, v26.4s
225
+ mls v18.4s, v5.4s, v7.s[0]
226
+ str q22, [x0, #0x180]
227
+ add v27.4s, v16.4s, v8.4s
228
+ mul v22.4s, v19.4s, v1.s[2]
229
+ add v26.4s, v24.4s, v14.4s
230
+ ldr q31, [x0, #0x110]
231
+ sub v14.4s, v15.4s, v23.4s
232
+ add v17.4s, v15.4s, v23.4s
233
+ mls v22.4s, v12.4s, v7.s[0]
234
+ add v28.4s, v13.4s, v18.4s
235
+ str q14, [x0, #0x380]
236
+ sqrdmulh v24.4s, v6.4s, v0.s[1]
237
+ add v5.4s, v25.4s, v11.4s
238
+ sub v19.4s, v13.4s, v18.4s
239
+ str q17, [x0, #0x300]
240
+ str q5, [x0, #0x100]
241
+ mul v16.4s, v9.4s, v2.s[0]
242
+ ldr q18, [x0, #0x310]
243
+ str q19, [x0, #0x280]
244
+ mls v16.4s, v20.4s, v7.s[0]
245
+ str q28, [x0, #0x200]
246
+ add v13.4s, v27.4s, v22.4s
247
+ ldr q15, [x0, #0x10]
248
+ sub v10.4s, v27.4s, v22.4s
249
+ mls v4.4s, v24.4s, v7.s[0]
250
+ str q13, [x0], #0x10
251
+ str q10, [x0, #0x70]
252
+ sqrdmulh v12.4s, v29.4s, v1.s[1]
253
+ mul v23.4s, v29.4s, v1.s[0]
254
+ mul v8.4s, v26.4s, v1.s[2]
255
+ add v20.4s, v15.4s, v4.4s
256
+ sub v6.4s, v15.4s, v4.4s
257
+ mls v23.4s, v12.4s, v7.s[0]
258
+ sqrdmulh v22.4s, v18.4s, v0.s[1]
259
+ mul v5.4s, v18.4s, v0.s[0]
260
+ sub v28.4s, v21.4s, v23.4s
261
+ sqrdmulh v10.4s, v26.4s, v1.s[3]
262
+ mls v5.4s, v22.4s, v7.s[0]
263
+ sqrdmulh v30.4s, v28.4s, v3.s[1]
264
+ add v4.4s, v21.4s, v23.4s
265
+ mls v8.4s, v10.4s, v7.s[0]
266
+ add v12.4s, v31.4s, v5.4s
267
+ sub v9.4s, v31.4s, v5.4s
268
+ sqrdmulh v25.4s, v4.4s, v2.s[3]
269
+ sqrdmulh v15.4s, v9.4s, v1.s[1]
270
+ sqrdmulh v31.4s, v12.4s, v0.s[3]
271
+ mul v18.4s, v12.4s, v0.s[2]
272
+ mul v11.4s, v9.4s, v1.s[0]
273
+ mls v18.4s, v31.4s, v7.s[0]
274
+ mul v29.4s, v4.4s, v2.s[2]
275
+ mls v29.4s, v25.4s, v7.s[0]
276
+ add v23.4s, v20.4s, v18.4s
277
+ mls v11.4s, v15.4s, v7.s[0]
278
+ sub v31.4s, v20.4s, v18.4s
279
+ add v17.4s, v23.4s, v8.4s
280
+ add v5.4s, v31.4s, v16.4s
281
+ mul v24.4s, v28.4s, v3.s[0]
282
+ str q17, [x0], #0x10
283
+ sub v19.4s, v31.4s, v16.4s
284
+ mls v24.4s, v30.4s, v7.s[0]
285
+ str q5, [x0, #0xf0]
286
+ add v31.4s, v6.4s, v11.4s
287
+ sub v26.4s, v23.4s, v8.4s
288
+ str q19, [x0, #0x170]
289
+ add v4.4s, v31.4s, v29.4s
290
+ sub v13.4s, v6.4s, v11.4s
291
+ str q26, [x0, #0x70]
292
+ sub v11.4s, v31.4s, v29.4s
293
+ sub v22.4s, v13.4s, v24.4s
294
+ add v23.4s, v13.4s, v24.4s
295
+ str q4, [x0, #0x1f0]
296
+ str q11, [x0, #0x270]
297
+ str q23, [x0, #0x2f0]
298
+ str q22, [x0, #0x370]
299
+ mov x0, x3
300
+ mov x4, #0x8 // =8
301
+ ldr q9, [x0, #0x40]
302
+ ldr q23, [x1], #0x40
303
+ ldr q21, [x2, #0x60]
304
+ ldr q1, [x0, #0x20]
305
+ ldur q14, [x1, #-0x30]
306
+ ldr q13, [x0]
307
+ ldr q11, [x2, #0x50]
308
+ sqrdmulh v16.4s, v9.4s, v23.s[1]
309
+ ldr q17, [x0, #0x50]
310
+ mul v15.4s, v9.4s, v23.s[0]
311
+ ldr q30, [x0, #0x70]
312
+ ldr q27, [x0, #0x60]
313
+ ldr q8, [x2, #0x30]
314
+ sqrdmulh v12.4s, v17.4s, v23.s[1]
315
+ ldr q6, [x0, #0x30]
316
+ mls v15.4s, v16.4s, v7.s[0]
317
+ sqrdmulh v18.4s, v27.4s, v23.s[1]
318
+ sqrdmulh v19.4s, v30.4s, v23.s[1]
319
+ add v5.4s, v13.4s, v15.4s
320
+ mul v25.4s, v27.4s, v23.s[0]
321
+ sub v26.4s, v13.4s, v15.4s
322
+ mls v25.4s, v18.4s, v7.s[0]
323
+ mul v10.4s, v17.4s, v23.s[0]
324
+ mls v10.4s, v12.4s, v7.s[0]
325
+ mul v4.4s, v30.4s, v23.s[0]
326
+ sub v22.4s, v1.4s, v25.4s
327
+ mls v4.4s, v19.4s, v7.s[0]
328
+ add v28.4s, v1.4s, v25.4s
329
+ sqrdmulh v19.4s, v28.4s, v23.s[3]
330
+ sqrdmulh v9.4s, v22.4s, v14.s[1]
331
+ add v2.4s, v6.4s, v4.4s
332
+ mul v0.4s, v28.4s, v23.s[2]
333
+ sqrdmulh v27.4s, v2.4s, v23.s[3]
334
+ sub v17.4s, v6.4s, v4.4s
335
+ mul v3.4s, v2.4s, v23.s[2]
336
+ sqrdmulh v20.4s, v17.4s, v14.s[1]
337
+ ldr q1, [x0, #0x10]
338
+ mls v3.4s, v27.4s, v7.s[0]
339
+ mls v0.4s, v19.4s, v7.s[0]
340
+ ldur q16, [x1, #-0x20]
341
+ add v31.4s, v1.4s, v10.4s
342
+ mul v30.4s, v17.4s, v14.s[0]
343
+ mls v30.4s, v20.4s, v7.s[0]
344
+ add v27.4s, v31.4s, v3.4s
345
+ sub v23.4s, v1.4s, v10.4s
346
+ sub v24.4s, v31.4s, v3.4s
347
+ sqrdmulh v4.4s, v27.4s, v14.s[3]
348
+ sqrdmulh v10.4s, v24.4s, v16.s[1]
349
+ mul v18.4s, v24.4s, v16.s[0]
350
+ add v15.4s, v23.4s, v30.4s
351
+ sub v23.4s, v23.4s, v30.4s
352
+ mul v29.4s, v27.4s, v14.s[2]
353
+ sub v2.4s, v5.4s, v0.4s
354
+ add v12.4s, v5.4s, v0.4s
355
+ mls v18.4s, v10.4s, v7.s[0]
356
+ ldur q3, [x1, #-0x10]
357
+ mls v29.4s, v4.4s, v7.s[0]
358
+ mul v4.4s, v22.4s, v14.s[0]
359
+ add v1.4s, v2.4s, v18.4s
360
+ sub v24.4s, v2.4s, v18.4s
361
+ mls v4.4s, v9.4s, v7.s[0]
362
+ ldr q20, [x2, #0x10]
363
+ add v25.4s, v12.4s, v29.4s
364
+ mul v9.4s, v23.4s, v3.s[0]
365
+ sub v5.4s, v12.4s, v29.4s
366
+ sqrdmulh v31.4s, v23.4s, v3.s[1]
367
+ trn2 v6.4s, v1.4s, v24.4s
368
+ trn2 v10.4s, v25.4s, v5.4s
369
+ sqrdmulh v13.4s, v15.4s, v16.s[3]
370
+ trn2 v30.2d, v10.2d, v6.2d
371
+ ldr q3, [x2], #0xc0
372
+ mul v12.4s, v15.4s, v16.s[2]
373
+ trn1 v27.2d, v10.2d, v6.2d
374
+ mls v9.4s, v31.4s, v7.s[0]
375
+ trn1 v22.4s, v25.4s, v5.4s
376
+ sub v6.4s, v26.4s, v4.4s
377
+ mls v12.4s, v13.4s, v7.s[0]
378
+ trn1 v1.4s, v1.4s, v24.4s
379
+ add v13.4s, v26.4s, v4.4s
380
+ trn2 v10.2d, v22.2d, v1.2d
381
+ mul v28.4s, v30.4s, v3.4s
382
+ sub v31.4s, v6.4s, v9.4s
383
+ sub x4, x4, #0x1
384
+
385
+ Lntt_layer45678_start:
386
+ add v2.4s, v13.4s, v12.4s
387
+ sqrdmulh v5.4s, v30.4s, v20.4s
388
+ sub v25.4s, v13.4s, v12.4s
389
+ add v17.4s, v6.4s, v9.4s
390
+ mul v19.4s, v10.4s, v3.4s
391
+ trn2 v4.4s, v2.4s, v25.4s
392
+ ldur q24, [x2, #-0x50]
393
+ trn2 v29.4s, v17.4s, v31.4s
394
+ sqrdmulh v15.4s, v10.4s, v20.4s
395
+ mls v28.4s, v5.4s, v7.s[0]
396
+ trn2 v3.2d, v4.2d, v29.2d
397
+ sqrdmulh v12.4s, v3.4s, v24.4s
398
+ mul v16.4s, v3.4s, v21.4s
399
+ mls v19.4s, v15.4s, v7.s[0]
400
+ ldur q10, [x2, #-0xa0]
401
+ add v13.4s, v27.4s, v28.4s
402
+ mls v16.4s, v12.4s, v7.s[0]
403
+ sqrdmulh v9.4s, v13.4s, v8.4s
404
+ sub v30.4s, v27.4s, v28.4s
405
+ ldr q18, [x1], #0x40
406
+ mul v8.4s, v13.4s, v10.4s
407
+ ldr q10, [x0, #0xd0]
408
+ sqrdmulh v14.4s, v30.4s, v11.4s
409
+ ldr q23, [x0, #0xe0]
410
+ sqrdmulh v13.4s, v10.4s, v18.s[1]
411
+ sqrdmulh v12.4s, v23.4s, v18.s[1]
412
+ ldur q6, [x2, #-0x80]
413
+ mul v3.4s, v10.4s, v18.s[0]
414
+ mls v3.4s, v13.4s, v7.s[0]
415
+ ldr q13, [x0, #0xf0]
416
+ trn1 v27.4s, v2.4s, v25.4s
417
+ mul v2.4s, v30.4s, v6.4s
418
+ trn1 v20.4s, v17.4s, v31.4s
419
+ trn1 v25.2d, v4.2d, v29.2d
420
+ sqrdmulh v10.4s, v13.4s, v18.s[1]
421
+ trn2 v5.2d, v27.2d, v20.2d
422
+ ldur q6, [x2, #-0x10]
423
+ mls v8.4s, v9.4s, v7.s[0]
424
+ sub v15.4s, v25.4s, v16.4s
425
+ sqrdmulh v31.4s, v5.4s, v24.4s
426
+ sqrdmulh v30.4s, v15.4s, v6.4s
427
+ ldur q9, [x2, #-0x30]
428
+ mul v4.4s, v5.4s, v21.4s
429
+ ldur q21, [x2, #-0x40]
430
+ ldur q6, [x2, #-0x20]
431
+ add v5.4s, v25.4s, v16.4s
432
+ mls v4.4s, v31.4s, v7.s[0]
433
+ mul v0.4s, v5.4s, v21.4s
434
+ mul v17.4s, v13.4s, v18.s[0]
435
+ mls v17.4s, v10.4s, v7.s[0]
436
+ ldr q28, [x0, #0xb0]
437
+ sqrdmulh v26.4s, v5.4s, v9.4s
438
+ mul v9.4s, v15.4s, v6.4s
439
+ trn1 v6.2d, v22.2d, v1.2d
440
+ mls v9.4s, v30.4s, v7.s[0]
441
+ add v25.4s, v28.4s, v17.4s
442
+ mls v2.4s, v14.4s, v7.s[0]
443
+ trn1 v5.2d, v27.2d, v20.2d
444
+ ldr q20, [x2, #0x10]
445
+ mul v29.4s, v25.4s, v18.s[2]
446
+ add v15.4s, v6.4s, v19.4s
447
+ ldr q30, [x0, #0xc0]
448
+ sub v19.4s, v6.4s, v19.4s
449
+ add v31.4s, v15.4s, v8.4s
450
+ mls v0.4s, v26.4s, v7.s[0]
451
+ ldur q14, [x1, #-0x30]
452
+ add v21.4s, v19.4s, v2.4s
453
+ sub v24.4s, v19.4s, v2.4s
454
+ sqrdmulh v27.4s, v25.4s, v18.s[3]
455
+ sub v26.4s, v15.4s, v8.4s
456
+ ldr q2, [x0, #0x90]
457
+ mul v16.4s, v30.4s, v18.s[0]
458
+ sub v25.4s, v28.4s, v17.4s
459
+ trn1 v11.4s, v31.4s, v26.4s
460
+ ldr q1, [x0, #0xa0]
461
+ trn1 v6.4s, v21.4s, v24.4s
462
+ sqrdmulh v13.4s, v25.4s, v14.s[1]
463
+ add v8.4s, v2.4s, v3.4s
464
+ trn2 v28.2d, v11.2d, v6.2d
465
+ sqrdmulh v19.4s, v30.4s, v18.s[1]
466
+ sub v10.4s, v5.4s, v4.4s
467
+ ldur q22, [x1, #-0x20]
468
+ str q28, [x0, #0x20]
469
+ mls v29.4s, v27.4s, v7.s[0]
470
+ add v15.4s, v10.4s, v9.4s
471
+ mul v25.4s, v25.4s, v14.s[0]
472
+ ldur q27, [x1, #-0x10]
473
+ trn2 v17.4s, v31.4s, v26.4s
474
+ trn2 v21.4s, v21.4s, v24.4s
475
+ mls v16.4s, v19.4s, v7.s[0]
476
+ sub v24.4s, v8.4s, v29.4s
477
+ sub v10.4s, v10.4s, v9.4s
478
+ mls v25.4s, v13.4s, v7.s[0]
479
+ trn1 v13.2d, v11.2d, v6.2d
480
+ ldr q28, [x0, #0x80]
481
+ sqrdmulh v30.4s, v24.4s, v22.s[1]
482
+ trn2 v19.2d, v17.2d, v21.2d
483
+ trn1 v6.2d, v17.2d, v21.2d
484
+ mul v31.4s, v23.4s, v18.s[0]
485
+ str q13, [x0], #0x80
486
+ stur q6, [x0, #-0x70]
487
+ stur q19, [x0, #-0x50]
488
+ ldr q11, [x2, #0x50]
489
+ mls v31.4s, v12.4s, v7.s[0]
490
+ ldr q21, [x2, #0x60]
491
+ trn1 v9.4s, v15.4s, v10.4s
492
+ trn2 v6.4s, v15.4s, v10.4s
493
+ mul v24.4s, v24.4s, v22.s[0]
494
+ sub v10.4s, v2.4s, v3.4s
495
+ ldr q3, [x2], #0xc0
496
+ mls v24.4s, v30.4s, v7.s[0]
497
+ add v26.4s, v8.4s, v29.4s
498
+ ldur q8, [x2, #-0x90]
499
+ add v17.4s, v5.4s, v4.4s
500
+ sqrdmulh v2.4s, v26.4s, v14.s[3]
501
+ sub v13.4s, v1.4s, v31.4s
502
+ add v30.4s, v1.4s, v31.4s
503
+ add v15.4s, v10.4s, v25.4s
504
+ sqrdmulh v19.4s, v13.4s, v14.s[1]
505
+ sub v25.4s, v10.4s, v25.4s
506
+ mul v29.4s, v13.4s, v14.s[0]
507
+ sub v5.4s, v28.4s, v16.4s
508
+ sqrdmulh v4.4s, v30.4s, v18.s[3]
509
+ sub v23.4s, v17.4s, v0.4s
510
+ add v31.4s, v17.4s, v0.4s
511
+ mul v18.4s, v30.4s, v18.s[2]
512
+ add v1.4s, v28.4s, v16.4s
513
+ trn2 v12.4s, v31.4s, v23.4s
514
+ mls v29.4s, v19.4s, v7.s[0]
515
+ trn1 v13.4s, v31.4s, v23.4s
516
+ trn2 v30.2d, v12.2d, v6.2d
517
+ mls v18.4s, v4.4s, v7.s[0]
518
+ trn2 v10.2d, v13.2d, v9.2d
519
+ trn1 v31.2d, v13.2d, v9.2d
520
+ mul v19.4s, v26.4s, v14.s[2]
521
+ trn1 v12.2d, v12.2d, v6.2d
522
+ sub v6.4s, v5.4s, v29.4s
523
+ mls v19.4s, v2.4s, v7.s[0]
524
+ add v13.4s, v5.4s, v29.4s
525
+ stur q10, [x0, #-0x20]
526
+ sub v10.4s, v1.4s, v18.4s
527
+ add v28.4s, v1.4s, v18.4s
528
+ sqrdmulh v5.4s, v25.4s, v27.s[1]
529
+ stur q31, [x0, #-0x40]
530
+ add v26.4s, v10.4s, v24.4s
531
+ sub v31.4s, v10.4s, v24.4s
532
+ mul v9.4s, v25.4s, v27.s[0]
533
+ stur q12, [x0, #-0x30]
534
+ sub v24.4s, v28.4s, v19.4s
535
+ sqrdmulh v10.4s, v15.4s, v22.s[3]
536
+ trn1 v1.4s, v26.4s, v31.4s
537
+ stur q30, [x0, #-0x10]
538
+ add v30.4s, v28.4s, v19.4s
539
+ mls v9.4s, v5.4s, v7.s[0]
540
+ trn2 v25.4s, v26.4s, v31.4s
541
+ trn2 v14.4s, v30.4s, v24.4s
542
+ mul v12.4s, v15.4s, v22.s[2]
543
+ trn1 v22.4s, v30.4s, v24.4s
544
+ trn1 v27.2d, v14.2d, v25.2d
545
+ mls v12.4s, v10.4s, v7.s[0]
546
+ trn2 v30.2d, v14.2d, v25.2d
547
+ sub v31.4s, v6.4s, v9.4s
548
+ trn2 v10.2d, v22.2d, v1.2d
549
+ mul v28.4s, v30.4s, v3.4s
550
+ subs x4, x4, #0x1
551
+ cbnz x4, Lntt_layer45678_start
552
+ add v9.4s, v6.4s, v9.4s
553
+ sqrdmulh v6.4s, v30.4s, v20.4s
554
+ ldur q24, [x2, #-0xa0]
555
+ add v25.4s, v13.4s, v12.4s
556
+ sub v15.4s, v13.4s, v12.4s
557
+ mul v19.4s, v10.4s, v3.4s
558
+ trn2 v5.4s, v9.4s, v31.4s
559
+ sqrdmulh v3.4s, v10.4s, v20.4s
560
+ trn2 v10.4s, v25.4s, v15.4s
561
+ mls v28.4s, v6.4s, v7.s[0]
562
+ trn2 v13.2d, v10.2d, v5.2d
563
+ ldur q30, [x2, #-0x50]
564
+ mul v12.4s, v13.4s, v21.4s
565
+ mls v19.4s, v3.4s, v7.s[0]
566
+ add v20.4s, v27.4s, v28.4s
567
+ sqrdmulh v13.4s, v13.4s, v30.4s
568
+ sub v3.4s, v27.4s, v28.4s
569
+ mul v24.4s, v20.4s, v24.4s
570
+ sqrdmulh v6.4s, v3.4s, v11.4s
571
+ ldur q27, [x2, #-0x80]
572
+ mls v12.4s, v13.4s, v7.s[0]
573
+ trn1 v25.4s, v25.4s, v15.4s
574
+ mul v27.4s, v3.4s, v27.4s
575
+ trn1 v31.4s, v9.4s, v31.4s
576
+ trn1 v3.2d, v10.2d, v5.2d
577
+ ldur q13, [x2, #-0x30]
578
+ ldur q15, [x2, #-0x40]
579
+ sqrdmulh v9.4s, v20.4s, v8.4s
580
+ trn2 v20.2d, v25.2d, v31.2d
581
+ ldur q10, [x2, #-0x10]
582
+ mls v27.4s, v6.4s, v7.s[0]
583
+ add v5.4s, v3.4s, v12.4s
584
+ sub v6.4s, v3.4s, v12.4s
585
+ sqrdmulh v3.4s, v20.4s, v30.4s
586
+ trn1 v12.2d, v22.2d, v1.2d
587
+ sqrdmulh v10.4s, v6.4s, v10.4s
588
+ mls v24.4s, v9.4s, v7.s[0]
589
+ sub v9.4s, v12.4s, v19.4s
590
+ trn1 v25.2d, v25.2d, v31.2d
591
+ sqrdmulh v31.4s, v5.4s, v13.4s
592
+ add v30.4s, v9.4s, v27.4s
593
+ add v13.4s, v12.4s, v19.4s
594
+ mul v1.4s, v20.4s, v21.4s
595
+ ldur q12, [x2, #-0x20]
596
+ add v21.4s, v13.4s, v24.4s
597
+ sub v13.4s, v13.4s, v24.4s
598
+ mls v1.4s, v3.4s, v7.s[0]
599
+ sub v3.4s, v9.4s, v27.4s
600
+ mul v9.4s, v6.4s, v12.4s
601
+ trn2 v12.4s, v21.4s, v13.4s
602
+ trn1 v6.4s, v30.4s, v3.4s
603
+ trn2 v30.4s, v30.4s, v3.4s
604
+ mls v9.4s, v10.4s, v7.s[0]
605
+ trn1 v13.4s, v21.4s, v13.4s
606
+ mul v15.4s, v5.4s, v15.4s
607
+ sub v3.4s, v25.4s, v1.4s
608
+ add v5.4s, v25.4s, v1.4s
609
+ mls v15.4s, v31.4s, v7.s[0]
610
+ trn1 v21.2d, v13.2d, v6.2d
611
+ trn2 v6.2d, v13.2d, v6.2d
612
+ add v10.4s, v3.4s, v9.4s
613
+ sub v13.4s, v3.4s, v9.4s
614
+ str q21, [x0], #0x80
615
+ trn1 v3.2d, v12.2d, v30.2d
616
+ trn2 v31.2d, v12.2d, v30.2d
617
+ trn1 v21.4s, v10.4s, v13.4s
618
+ sub v30.4s, v5.4s, v15.4s
619
+ add v12.4s, v5.4s, v15.4s
620
+ stur q3, [x0, #-0x70]
621
+ trn2 v13.4s, v10.4s, v13.4s
622
+ trn1 v19.4s, v12.4s, v30.4s
623
+ trn2 v12.4s, v12.4s, v30.4s
624
+ stur q6, [x0, #-0x60]
625
+ stur q31, [x0, #-0x50]
626
+ trn1 v10.2d, v19.2d, v21.2d
627
+ trn2 v3.2d, v19.2d, v21.2d
628
+ trn1 v21.2d, v12.2d, v13.2d
629
+ trn2 v13.2d, v12.2d, v13.2d
630
+ stur q10, [x0, #-0x40]
631
+ stur q3, [x0, #-0x20]
632
+ stur q13, [x0, #-0x10]
633
+ stur q21, [x0, #-0x30]
634
+ ldp d8, d9, [sp]
635
+ .cfi_restore d8
636
+ .cfi_restore d9
637
+ ldp d10, d11, [sp, #0x10]
638
+ .cfi_restore d10
639
+ .cfi_restore d11
640
+ ldp d12, d13, [sp, #0x20]
641
+ .cfi_restore d12
642
+ .cfi_restore d13
643
+ ldp d14, d15, [sp, #0x30]
644
+ .cfi_restore d14
645
+ .cfi_restore d15
646
+ add sp, sp, #0x40
647
+ .cfi_adjust_cfa_offset -0x40
648
+ ret
649
+ .cfi_endproc
650
+
651
+ MLD_ASM_FN_SIZE(ntt_asm)
652
+
653
+ #endif /* MLD_ARITH_BACKEND_AARCH64 && !MLD_CONFIG_MULTILEVEL_NO_SHARED */