pq_crypto 0.4.2 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (408) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +25 -0
  3. data/GET_STARTED.md +21 -16
  4. data/README.md +26 -0
  5. data/SECURITY.md +22 -16
  6. data/ext/pqcrypto/extconf.rb +148 -99
  7. data/ext/pqcrypto/mldsa_api.h +1 -118
  8. data/ext/pqcrypto/mlkem_api.h +1 -42
  9. data/ext/pqcrypto/pq_externalmu.c +88 -216
  10. data/ext/pqcrypto/pqcrypto_native_api.h +129 -0
  11. data/ext/pqcrypto/pqcrypto_ruby_secure.c +0 -3
  12. data/ext/pqcrypto/pqcrypto_secure.c +135 -117
  13. data/ext/pqcrypto/pqcrypto_secure.h +1 -42
  14. data/ext/pqcrypto/pqcrypto_version.h +1 -1
  15. data/ext/pqcrypto/randombytes.h +9 -0
  16. data/ext/pqcrypto/vendor/.vendored +10 -5
  17. data/ext/pqcrypto/vendor/mldsa-native/BUILDING.md +105 -0
  18. data/ext/pqcrypto/vendor/mldsa-native/LICENSE +286 -0
  19. data/ext/pqcrypto/vendor/mldsa-native/META.yml +24 -0
  20. data/ext/pqcrypto/vendor/mldsa-native/README.md +221 -0
  21. data/ext/pqcrypto/vendor/mldsa-native/SECURITY.md +8 -0
  22. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.c +721 -0
  23. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.h +975 -0
  24. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_asm.S +724 -0
  25. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_config.h +723 -0
  26. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/cbmc.h +166 -0
  27. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/common.h +321 -0
  28. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/ct.c +21 -0
  29. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/ct.h +385 -0
  30. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/debug.c +73 -0
  31. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/debug.h +130 -0
  32. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.c +277 -0
  33. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.h +244 -0
  34. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.c +182 -0
  35. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.h +117 -0
  36. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.c +438 -0
  37. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.h +105 -0
  38. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/auto.h +71 -0
  39. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/fips202_native_aarch64.h +62 -0
  40. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_asm.S +376 -0
  41. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_asm.S +204 -0
  42. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_asm.S +259 -0
  43. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_asm.S +1077 -0
  44. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S +987 -0
  45. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccakf1600_round_constants.c +41 -0
  46. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_scalar.h +26 -0
  47. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_v84a.h +35 -0
  48. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x2_v84a.h +37 -0
  49. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_scalar.h +27 -0
  50. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_v84a_scalar.h +36 -0
  51. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/api.h +69 -0
  52. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/README.md +10 -0
  53. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/mve.h +32 -0
  54. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/fips202_native_armv81m.h +20 -0
  55. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S +638 -0
  56. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.c +136 -0
  57. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccakf1600_round_constants.c +52 -0
  58. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/auto.h +29 -0
  59. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.c +488 -0
  60. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.h +16 -0
  61. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/xkcp.h +31 -0
  62. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/meta.h +247 -0
  63. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/aarch64_zetas.c +231 -0
  64. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/arith_native_aarch64.h +150 -0
  65. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/intt.S +753 -0
  66. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l4.S +129 -0
  67. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l5.S +145 -0
  68. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l7.S +177 -0
  69. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/ntt.S +653 -0
  70. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/pointwise_montgomery.S +79 -0
  71. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_caddq_asm.S +53 -0
  72. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_chknorm_asm.S +55 -0
  73. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_32_asm.S +85 -0
  74. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_88_asm.S +85 -0
  75. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_32_asm.S +102 -0
  76. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_88_asm.S +110 -0
  77. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_17_asm.S +72 -0
  78. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_19_asm.S +69 -0
  79. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_table.c +40 -0
  80. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_asm.S +189 -0
  81. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta2_asm.S +135 -0
  82. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta4_asm.S +128 -0
  83. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta_table.c +543 -0
  84. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_table.c +62 -0
  85. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/api.h +649 -0
  86. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/meta.h +23 -0
  87. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/meta.h +315 -0
  88. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/arith_native_x86_64.h +124 -0
  89. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.c +157 -0
  90. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.h +27 -0
  91. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/intt.S +2311 -0
  92. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/ntt.S +2383 -0
  93. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/nttunpack.S +239 -0
  94. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise.S +131 -0
  95. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l4.S +139 -0
  96. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l5.S +155 -0
  97. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l7.S +187 -0
  98. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_caddq_avx2.c +61 -0
  99. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_chknorm_avx2.c +52 -0
  100. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_32_avx2.c +155 -0
  101. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_88_avx2.c +155 -0
  102. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_32_avx2.c +102 -0
  103. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_88_avx2.c +104 -0
  104. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_17_avx2.c +91 -0
  105. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_19_avx2.c +93 -0
  106. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_avx2.c +126 -0
  107. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta2_avx2.c +155 -0
  108. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta4_avx2.c +139 -0
  109. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_table.c +160 -0
  110. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.c +293 -0
  111. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.h +224 -0
  112. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/params.h +77 -0
  113. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.c +991 -0
  114. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.h +393 -0
  115. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.c +946 -0
  116. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.h +360 -0
  117. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.c +877 -0
  118. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.h +725 -0
  119. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/randombytes.h +26 -0
  120. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/reduce.h +139 -0
  121. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/rounding.h +249 -0
  122. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.c +1511 -0
  123. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.h +806 -0
  124. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/symmetric.h +68 -0
  125. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sys.h +268 -0
  126. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/zetas.inc +55 -0
  127. data/ext/pqcrypto/vendor/mlkem-native/BUILDING.md +104 -0
  128. data/ext/pqcrypto/vendor/mlkem-native/LICENSE +294 -0
  129. data/ext/pqcrypto/vendor/mlkem-native/META.yml +30 -0
  130. data/ext/pqcrypto/vendor/mlkem-native/README.md +223 -0
  131. data/ext/pqcrypto/vendor/mlkem-native/RELEASE.md +86 -0
  132. data/ext/pqcrypto/vendor/mlkem-native/SECURITY.md +8 -0
  133. data/ext/pqcrypto/vendor/mlkem-native/mlkem/README.md +23 -0
  134. data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native.c +660 -0
  135. data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native.h +538 -0
  136. data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native_asm.S +681 -0
  137. data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native_config.h +709 -0
  138. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/cbmc.h +174 -0
  139. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/common.h +274 -0
  140. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/compress.c +717 -0
  141. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/compress.h +688 -0
  142. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/debug.c +64 -0
  143. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/debug.h +128 -0
  144. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202.c +251 -0
  145. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202.h +158 -0
  146. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202x4.c +208 -0
  147. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202x4.h +80 -0
  148. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/keccakf1600.c +463 -0
  149. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/keccakf1600.h +98 -0
  150. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/auto.h +70 -0
  151. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/fips202_native_aarch64.h +69 -0
  152. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_asm.S +375 -0
  153. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_asm.S +203 -0
  154. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_asm.S +258 -0
  155. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_asm.S +1076 -0
  156. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S +986 -0
  157. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccakf1600_round_constants.c +46 -0
  158. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x1_scalar.h +25 -0
  159. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x1_v84a.h +34 -0
  160. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x2_v84a.h +35 -0
  161. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x4_v8a_scalar.h +26 -0
  162. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x4_v8a_v84a_scalar.h +35 -0
  163. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/api.h +117 -0
  164. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/README.md +10 -0
  165. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/mve.h +79 -0
  166. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/fips202_native_armv81m.h +35 -0
  167. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S +667 -0
  168. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.c +40 -0
  169. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/keccakf1600_round_constants.c +51 -0
  170. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/state_extract_bytes_x4_mve.S +290 -0
  171. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/state_xor_bytes_x4_mve.S +314 -0
  172. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/auto.h +28 -0
  173. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/keccak_f1600_x4_avx2.h +33 -0
  174. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/src/fips202_native_x86_64.h +41 -0
  175. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/src/keccak_f1600_x4_avx2.S +451 -0
  176. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/src/keccakf1600_constants.c +51 -0
  177. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/indcpa.c +622 -0
  178. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/indcpa.h +156 -0
  179. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/kem.c +446 -0
  180. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/kem.h +326 -0
  181. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/README.md +16 -0
  182. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/meta.h +122 -0
  183. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/aarch64_zetas.c +174 -0
  184. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/arith_native_aarch64.h +177 -0
  185. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/intt.S +628 -0
  186. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/ntt.S +562 -0
  187. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_mulcache_compute_asm.S +127 -0
  188. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_reduce_asm.S +150 -0
  189. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_tobytes_asm.S +117 -0
  190. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_tomont_asm.S +98 -0
  191. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k2.S +261 -0
  192. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k3.S +314 -0
  193. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k4.S +368 -0
  194. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/rej_uniform_asm.S +226 -0
  195. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/rej_uniform_table.c +542 -0
  196. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/api.h +637 -0
  197. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/meta.h +25 -0
  198. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/README.md +11 -0
  199. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/meta.h +128 -0
  200. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/arith_native_riscv64.h +45 -0
  201. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_debug.c +81 -0
  202. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_debug.h +145 -0
  203. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_izetas.inc +27 -0
  204. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_poly.c +805 -0
  205. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_zetas.inc +27 -0
  206. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_zetas_basemul.inc +39 -0
  207. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/README.md +4 -0
  208. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/meta.h +304 -0
  209. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/arith_native_x86_64.h +309 -0
  210. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/compress_consts.c +94 -0
  211. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/compress_consts.h +45 -0
  212. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/consts.c +102 -0
  213. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/consts.h +25 -0
  214. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/intt.S +719 -0
  215. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/mulcache_compute.S +90 -0
  216. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/ntt.S +639 -0
  217. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/nttfrombytes.S +193 -0
  218. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/ntttobytes.S +181 -0
  219. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/nttunpack.S +174 -0
  220. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d10.S +382 -0
  221. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d11.S +448 -0
  222. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d4.S +163 -0
  223. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d5.S +220 -0
  224. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d10.S +228 -0
  225. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d11.S +277 -0
  226. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d4.S +180 -0
  227. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d5.S +192 -0
  228. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/polyvec_basemul_acc_montgomery_cached_asm_k2.S +502 -0
  229. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/polyvec_basemul_acc_montgomery_cached_asm_k3.S +750 -0
  230. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/polyvec_basemul_acc_montgomery_cached_asm_k4.S +998 -0
  231. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/reduce.S +218 -0
  232. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/rej_uniform_asm.S +103 -0
  233. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/rej_uniform_table.c +544 -0
  234. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/tomont.S +155 -0
  235. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/params.h +76 -0
  236. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly.c +572 -0
  237. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly.h +317 -0
  238. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly_k.c +502 -0
  239. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly_k.h +668 -0
  240. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/randombytes.h +60 -0
  241. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/sampling.c +362 -0
  242. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/sampling.h +118 -0
  243. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/symmetric.h +70 -0
  244. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/sys.h +260 -0
  245. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/verify.c +20 -0
  246. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/verify.h +464 -0
  247. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/zetas.inc +30 -0
  248. data/lib/pq_crypto/version.rb +1 -1
  249. data/lib/pq_crypto.rb +1 -1
  250. data/script/vendor_libs.rb +88 -159
  251. metadata +236 -160
  252. data/ext/pqcrypto/vendor/pqclean/common/aes.c +0 -639
  253. data/ext/pqcrypto/vendor/pqclean/common/aes.h +0 -64
  254. data/ext/pqcrypto/vendor/pqclean/common/compat.h +0 -73
  255. data/ext/pqcrypto/vendor/pqclean/common/crypto_declassify.h +0 -7
  256. data/ext/pqcrypto/vendor/pqclean/common/fips202.c +0 -928
  257. data/ext/pqcrypto/vendor/pqclean/common/fips202.h +0 -166
  258. data/ext/pqcrypto/vendor/pqclean/common/keccak2x/feat.S +0 -168
  259. data/ext/pqcrypto/vendor/pqclean/common/keccak2x/fips202x2.c +0 -684
  260. data/ext/pqcrypto/vendor/pqclean/common/keccak2x/fips202x2.h +0 -60
  261. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/KeccakP-1600-times4-SIMD256.c +0 -1028
  262. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/KeccakP-1600-times4-SnP.h +0 -50
  263. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/KeccakP-1600-unrolling.macros +0 -198
  264. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/Makefile +0 -8
  265. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/Makefile.Microsoft_nmake +0 -8
  266. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/SIMD256-config.h +0 -3
  267. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/align.h +0 -34
  268. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/brg_endian.h +0 -142
  269. data/ext/pqcrypto/vendor/pqclean/common/nistseedexpander.c +0 -101
  270. data/ext/pqcrypto/vendor/pqclean/common/nistseedexpander.h +0 -39
  271. data/ext/pqcrypto/vendor/pqclean/common/randombytes.c +0 -355
  272. data/ext/pqcrypto/vendor/pqclean/common/randombytes.h +0 -27
  273. data/ext/pqcrypto/vendor/pqclean/common/sha2.c +0 -769
  274. data/ext/pqcrypto/vendor/pqclean/common/sha2.h +0 -173
  275. data/ext/pqcrypto/vendor/pqclean/common/sp800-185.c +0 -156
  276. data/ext/pqcrypto/vendor/pqclean/common/sp800-185.h +0 -27
  277. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/LICENSE +0 -5
  278. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/Makefile +0 -19
  279. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/Makefile.Microsoft_nmake +0 -23
  280. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/api.h +0 -18
  281. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/cbd.c +0 -83
  282. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/cbd.h +0 -11
  283. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/indcpa.c +0 -327
  284. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/indcpa.h +0 -22
  285. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/kem.c +0 -164
  286. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/kem.h +0 -23
  287. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/ntt.c +0 -146
  288. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/ntt.h +0 -14
  289. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/params.h +0 -36
  290. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/poly.c +0 -311
  291. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/poly.h +0 -37
  292. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/polyvec.c +0 -198
  293. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/polyvec.h +0 -26
  294. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/reduce.c +0 -41
  295. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/reduce.h +0 -13
  296. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/symmetric-shake.c +0 -71
  297. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/symmetric.h +0 -30
  298. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/verify.c +0 -67
  299. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/verify.h +0 -13
  300. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/LICENSE +0 -5
  301. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/Makefile +0 -19
  302. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/Makefile.Microsoft_nmake +0 -23
  303. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/api.h +0 -18
  304. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/cbd.c +0 -108
  305. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/cbd.h +0 -11
  306. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/indcpa.c +0 -327
  307. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/indcpa.h +0 -22
  308. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/kem.c +0 -164
  309. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/kem.h +0 -23
  310. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/ntt.c +0 -146
  311. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/ntt.h +0 -14
  312. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/params.h +0 -36
  313. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/poly.c +0 -299
  314. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/poly.h +0 -37
  315. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/polyvec.c +0 -188
  316. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/polyvec.h +0 -26
  317. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/reduce.c +0 -41
  318. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/reduce.h +0 -13
  319. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/symmetric-shake.c +0 -71
  320. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/symmetric.h +0 -30
  321. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/verify.c +0 -67
  322. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/verify.h +0 -13
  323. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/LICENSE +0 -5
  324. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/Makefile +0 -19
  325. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/Makefile.Microsoft_nmake +0 -23
  326. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/api.h +0 -18
  327. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/cbd.c +0 -83
  328. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/cbd.h +0 -11
  329. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/indcpa.c +0 -327
  330. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/indcpa.h +0 -22
  331. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/kem.c +0 -164
  332. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/kem.h +0 -23
  333. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/ntt.c +0 -146
  334. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/ntt.h +0 -14
  335. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/params.h +0 -36
  336. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/poly.c +0 -299
  337. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/poly.h +0 -37
  338. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/polyvec.c +0 -188
  339. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/polyvec.h +0 -26
  340. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/reduce.c +0 -41
  341. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/reduce.h +0 -13
  342. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/symmetric-shake.c +0 -71
  343. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/symmetric.h +0 -30
  344. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/verify.c +0 -67
  345. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/verify.h +0 -13
  346. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/LICENSE +0 -5
  347. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/Makefile +0 -19
  348. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/Makefile.Microsoft_nmake +0 -23
  349. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/api.h +0 -50
  350. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/ntt.c +0 -98
  351. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/ntt.h +0 -10
  352. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/packing.c +0 -261
  353. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/packing.h +0 -31
  354. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/params.h +0 -44
  355. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/poly.c +0 -848
  356. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/poly.h +0 -52
  357. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/polyvec.c +0 -415
  358. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/polyvec.h +0 -65
  359. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/reduce.c +0 -69
  360. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/reduce.h +0 -17
  361. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/rounding.c +0 -98
  362. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/rounding.h +0 -14
  363. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/sign.c +0 -407
  364. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/sign.h +0 -47
  365. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/symmetric-shake.c +0 -26
  366. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/symmetric.h +0 -34
  367. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/LICENSE +0 -5
  368. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/Makefile +0 -19
  369. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/Makefile.Microsoft_nmake +0 -23
  370. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/api.h +0 -50
  371. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/ntt.c +0 -98
  372. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/ntt.h +0 -10
  373. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/packing.c +0 -261
  374. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/packing.h +0 -31
  375. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/params.h +0 -44
  376. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/poly.c +0 -799
  377. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/poly.h +0 -52
  378. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/polyvec.c +0 -415
  379. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/polyvec.h +0 -65
  380. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/reduce.c +0 -69
  381. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/reduce.h +0 -17
  382. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/rounding.c +0 -92
  383. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/rounding.h +0 -14
  384. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/sign.c +0 -407
  385. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/sign.h +0 -47
  386. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/symmetric-shake.c +0 -26
  387. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/symmetric.h +0 -34
  388. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/LICENSE +0 -5
  389. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/Makefile +0 -19
  390. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/Makefile.Microsoft_nmake +0 -23
  391. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/api.h +0 -50
  392. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/ntt.c +0 -98
  393. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/ntt.h +0 -10
  394. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/packing.c +0 -261
  395. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/packing.h +0 -31
  396. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/params.h +0 -44
  397. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/poly.c +0 -823
  398. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/poly.h +0 -52
  399. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/polyvec.c +0 -415
  400. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/polyvec.h +0 -65
  401. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/reduce.c +0 -69
  402. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/reduce.h +0 -17
  403. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/rounding.c +0 -92
  404. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/rounding.h +0 -14
  405. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/sign.c +0 -407
  406. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/sign.h +0 -47
  407. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/symmetric-shake.c +0 -26
  408. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/symmetric.h +0 -34
@@ -0,0 +1,805 @@
1
+ /*
2
+ * Copyright (c) The mlkem-native project authors
3
+ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
4
+ */
5
+
6
+ /* === ML-KEM NTT using RISC-V Vector intrinstics */
7
+
8
+ #include "../../../common.h"
9
+
10
+ #if defined(MLK_ARITH_BACKEND_RISCV64) && \
11
+ !defined(MLK_CONFIG_MULTILEVEL_NO_SHARED)
12
+
13
+ #include <riscv_vector.h>
14
+
15
+ #include "arith_native_riscv64.h"
16
+ #include "rv64v_debug.h"
17
+
18
+ /* Montgomery reduction constants */
19
+ /* check-magic: -3327 == signed_mod(pow(MLKEM_Q,-1,2^16), 2^16) */
20
+ #define MLK_RVV_QI -3327
21
+
22
+ /* check-magic: 2285 == unsigned_mod(2^16, MLKEM_Q) */
23
+ #define MLK_RVV_MONT_R1 2285
24
+
25
+ /* check-magic: 1353 == pow(2, 32, MLKEM_Q) */
26
+ #define MLK_RVV_MONT_R2 1353
27
+
28
+ /* check-magic: 1441 == pow(2,32-7,MLKEM_Q) */
29
+ #define MLK_RVV_MONT_NR 1441
30
+
31
+ static inline vint16m1_t fq_redc(vint16m1_t rh, vint16m1_t rl, size_t vl)
32
+ {
33
+ vint16m1_t t;
34
+
35
+ t = __riscv_vmul_vx_i16m1(rl, MLK_RVV_QI, vl); /* t = l * Q^-1 */
36
+ t = __riscv_vmulh_vx_i16m1(t, MLKEM_Q, vl); /* t = (t*Q) / R */
37
+ t = __riscv_vsub_vv_i16m1(rh, t, vl); /* t = h - t */
38
+
39
+ return t;
40
+ }
41
+
42
+ /* Narrowing reduction */
43
+
44
+ static inline vint16m1_t fq_redc2(vint32m2_t z, size_t vl)
45
+ {
46
+ vint16m1_t t;
47
+
48
+ t = __riscv_vmul_vx_i16m1(__riscv_vncvt_x_x_w_i16m1(z, vl), MLK_RVV_QI,
49
+ vl); /* t = l * Q^-1 */
50
+ z = __riscv_vsub_vv_i32m2(z, __riscv_vwmul_vx_i32m2(t, MLKEM_Q, vl),
51
+ vl); /* x = (x - (t*Q)) */
52
+ t = __riscv_vnsra_wx_i16m1(z, 16, vl);
53
+
54
+ return t;
55
+ }
56
+
57
+ /* Narrowing Barrett */
58
+
59
+ static inline vint16m1_t fq_barrett(vint16m1_t a, size_t vl)
60
+ {
61
+ vint16m1_t t;
62
+ const int16_t v = ((1 << 26) + MLKEM_Q / 2) / MLKEM_Q;
63
+
64
+ t = __riscv_vmulh_vx_i16m1(a, v, vl);
65
+ t = __riscv_vadd_vx_i16m1(t, 1 << (25 - 16), vl);
66
+ t = __riscv_vsra_vx_i16m1(t, 26 - 16, vl);
67
+ t = __riscv_vmul_vx_i16m1(t, MLKEM_Q, vl);
68
+ t = __riscv_vsub_vv_i16m1(a, t, vl);
69
+
70
+ mlk_assert_abs_bound_int16m1(t, vl, MLKEM_Q_HALF);
71
+ return t;
72
+ }
73
+
74
+ /* Conditionally add Q (if negative) */
75
+
76
+ static inline vint16m1_t fq_cadd(vint16m1_t rx, size_t vl)
77
+ {
78
+ vbool16_t bn;
79
+
80
+ bn = __riscv_vmslt_vx_i16m1_b16(rx, 0, vl); /* if x < 0: */
81
+ rx = __riscv_vadd_vx_i16m1_mu(bn, rx, rx, MLKEM_Q, vl); /* x += Q */
82
+ return rx;
83
+ }
84
+
85
+ /* Conditionally subtract Q (if Q or above) */
86
+
87
+ static inline vint16m1_t fq_csub(vint16m1_t rx, size_t vl)
88
+ {
89
+ vbool16_t bn;
90
+
91
+ bn = __riscv_vmsge_vx_i16m1_b16(rx, MLKEM_Q, vl); /* if x >= Q: */
92
+ rx = __riscv_vsub_vx_i16m1_mu(bn, rx, rx, MLKEM_Q, vl); /* x -= Q */
93
+ return rx;
94
+ }
95
+
96
+ /* Montgomery multiply: vector-vector */
97
+
98
+ static inline vint16m1_t fq_mul_vv(vint16m1_t rx, vint16m1_t ry, size_t vl)
99
+ {
100
+ vint16m1_t rl, rh;
101
+
102
+ rh = __riscv_vmulh_vv_i16m1(rx, ry, vl); /* h = (x * y) / R */
103
+ rl = __riscv_vmul_vv_i16m1(rx, ry, vl); /* l = (x * y) % R */
104
+ return fq_redc(rh, rl, vl);
105
+ }
106
+
107
+ /* Montgomery multiply: vector-scalar */
108
+
109
+ static inline vint16m1_t fq_mul_vx(vint16m1_t rx, int16_t ry, size_t vl)
110
+ {
111
+ vint16m1_t rl, rh;
112
+
113
+ rh = __riscv_vmulh_vx_i16m1(rx, ry, vl); /* h = (x * y) / R */
114
+ rl = __riscv_vmul_vx_i16m1(rx, ry, vl); /* l = (x * y) % R */
115
+ return fq_redc(rh, rl, vl);
116
+ }
117
+
118
+ /* full normalization */
119
+
120
+ static inline vint16m1_t fq_mulq_vx(vint16m1_t rx, int16_t ry, size_t vl)
121
+ {
122
+ vint16m1_t result;
123
+
124
+ result = fq_mul_vx(rx, ry, vl);
125
+
126
+ mlk_assert_abs_bound_int16m1(result, vl, MLKEM_Q);
127
+ return result;
128
+ }
129
+
130
+ /* create a permutation for swapping index bits a and b, a < b */
131
+
132
+ static vuint16m2_t bitswap_perm(unsigned a, unsigned b, size_t vl)
133
+ {
134
+ const vuint16m2_t v2id = __riscv_vid_v_u16m2(vl);
135
+
136
+ vuint16m2_t xa, xb;
137
+ xa = __riscv_vsrl_vx_u16m2(v2id, b - a, vl);
138
+ xa = __riscv_vxor_vv_u16m2(xa, v2id, vl);
139
+ xa = __riscv_vand_vx_u16m2(xa, (1 << a), vl);
140
+ xb = __riscv_vsll_vx_u16m2(xa, b - a, vl);
141
+ xa = __riscv_vxor_vv_u16m2(xa, xb, vl);
142
+ xa = __riscv_vxor_vv_u16m2(v2id, xa, vl);
143
+ return xa;
144
+ }
145
+
146
+ /*************************************************
147
+ * Name: poly_ntt
148
+ *
149
+ * Description: Computes negacyclic number-theoretic transform (NTT) of
150
+ * a polynomial in place;
151
+ * inputs assumed to be in normal order, output in
152
+ * bitreversed order
153
+ *
154
+ * Arguments: - uint16_t *r: pointer to in/output polynomial
155
+ **************************************************/
156
+
157
+ /* Forward / Cooley-Tukey butterfly operation */
158
+
159
+ #define MLK_RVV_CT_BFLY_FX(u0, u1, ut, uc, vl, layer) \
160
+ { \
161
+ mlk_assert_abs_bound(&uc, 1, MLKEM_Q_HALF); \
162
+ \
163
+ ut = fq_mul_vx(u1, uc, vl); \
164
+ mlk_assert_abs_bound_int16m1(ut, vl, MLKEM_Q); \
165
+ \
166
+ u1 = __riscv_vsub_vv_i16m1(u0, ut, vl); \
167
+ u0 = __riscv_vadd_vv_i16m1(u0, ut, vl); \
168
+ mlk_assert_abs_bound_int16m1(u0, vl, (layer + 1) * MLKEM_Q); \
169
+ mlk_assert_abs_bound_int16m1(u1, vl, (layer + 1) * MLKEM_Q); \
170
+ }
171
+
172
+ #define MLK_RVV_CT_BFLY_FV(u0, u1, ut, uc, vl, layer) \
173
+ { \
174
+ mlk_assert_abs_bound_int16m1(uc, vl, MLKEM_Q_HALF); \
175
+ \
176
+ ut = fq_mul_vv(u1, uc, vl); \
177
+ mlk_assert_abs_bound_int16m1(ut, vl, MLKEM_Q); \
178
+ \
179
+ u1 = __riscv_vsub_vv_i16m1(u0, ut, vl); \
180
+ u0 = __riscv_vadd_vv_i16m1(u0, ut, vl); \
181
+ mlk_assert_abs_bound_int16m1(u0, vl, (layer + 1) * MLKEM_Q); \
182
+ mlk_assert_abs_bound_int16m1(u1, vl, (layer + 1) * MLKEM_Q); \
183
+ }
184
+
185
+ static vint16m2_t mlk_rv64v_ntt2(vint16m2_t vp, vint16m1_t cz)
186
+ {
187
+ size_t vl = 16; /* We work with 256-bit vectors of 16x16-bit elements */
188
+ size_t vl2 = 2 * vl;
189
+
190
+ const vuint16m2_t v2p8 = bitswap_perm(3, 4, vl2);
191
+ const vuint16m2_t v2p4 = bitswap_perm(2, 4, vl2);
192
+ const vuint16m2_t v2p2 = bitswap_perm(1, 4, vl2);
193
+
194
+ /* p1 = p8(p4(p2)) */
195
+ const vuint16m2_t v2p1 = __riscv_vrgather_vv_u16m2(
196
+ __riscv_vrgather_vv_u16m2(v2p2, v2p4, vl2), v2p8, vl2);
197
+
198
+ const vuint16m1_t vid = __riscv_vid_v_u16m1(vl);
199
+ const vuint16m1_t cs8 =
200
+ __riscv_vadd_vx_u16m1(__riscv_vsrl_vx_u16m1(vid, 3, vl), 2, vl);
201
+ const vuint16m1_t cs4 =
202
+ __riscv_vadd_vx_u16m1(__riscv_vsrl_vx_u16m1(vid, 2, vl), 2 + 2, vl);
203
+ const vuint16m1_t cs2 =
204
+ __riscv_vadd_vx_u16m1(__riscv_vsrl_vx_u16m1(vid, 1, vl), 2 + 2 + 4, vl);
205
+
206
+ vint16m1_t vt, c0, t0, t1;
207
+
208
+ /* swap 8 */
209
+ vp = __riscv_vrgatherei16_vv_i16m2(vp, v2p8, vl2);
210
+ t0 = __riscv_vget_v_i16m2_i16m1(vp, 0);
211
+ t1 = __riscv_vget_v_i16m2_i16m1(vp, 1);
212
+
213
+ c0 = __riscv_vrgather_vv_i16m1(cz, cs8, vl);
214
+ MLK_RVV_CT_BFLY_FV(t0, t1, vt, c0, vl, 5);
215
+
216
+ /* swap 4 */
217
+ vp = __riscv_vcreate_v_i16m1_i16m2(t0, t1);
218
+ vp = __riscv_vrgatherei16_vv_i16m2(vp, v2p4, vl2);
219
+ t0 = __riscv_vget_v_i16m2_i16m1(vp, 0);
220
+ t1 = __riscv_vget_v_i16m2_i16m1(vp, 1);
221
+
222
+ c0 = __riscv_vrgather_vv_i16m1(cz, cs4, vl);
223
+ MLK_RVV_CT_BFLY_FV(t0, t1, vt, c0, vl, 6);
224
+
225
+ /* swap 2 */
226
+ vp = __riscv_vcreate_v_i16m1_i16m2(t0, t1);
227
+ vp = __riscv_vrgatherei16_vv_i16m2(vp, v2p2, vl2);
228
+ t0 = __riscv_vget_v_i16m2_i16m1(vp, 0);
229
+ t1 = __riscv_vget_v_i16m2_i16m1(vp, 1);
230
+
231
+ c0 = __riscv_vrgather_vv_i16m1(cz, cs2, vl);
232
+ MLK_RVV_CT_BFLY_FV(t0, t1, vt, c0, vl, 7);
233
+
234
+ /* reorganize */
235
+ vp = __riscv_vcreate_v_i16m1_i16m2(t0, t1);
236
+ vp = __riscv_vrgatherei16_vv_i16m2(vp, v2p1, vl2);
237
+
238
+ return vp;
239
+ }
240
+
241
+ /* Only for VLEN=256 for now */
242
+ void mlk_rv64v_poly_ntt(int16_t *r)
243
+ {
244
+ /* zetas can be compiled into vector constants; don't pass as a pointer */
245
+ #include "rv64v_zetas.inc"
246
+
247
+ size_t vl = 16; /* We work with 256-bit vectors of 16x16-bit elements */
248
+ size_t vl2 = 2 * vl;
249
+
250
+ vint16m1_t vt;
251
+ vint16m1_t v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, va, vb, vc, vd, ve, vf;
252
+
253
+ const vint16m1_t z0 = __riscv_vle16_v_i16m1(&zeta[0x00], vl);
254
+ const vint16m1_t z2 = __riscv_vle16_v_i16m1(&zeta[0x10], vl);
255
+ const vint16m1_t z4 = __riscv_vle16_v_i16m1(&zeta[0x20], vl);
256
+ const vint16m1_t z6 = __riscv_vle16_v_i16m1(&zeta[0x30], vl);
257
+ const vint16m1_t z8 = __riscv_vle16_v_i16m1(&zeta[0x40], vl);
258
+ const vint16m1_t za = __riscv_vle16_v_i16m1(&zeta[0x50], vl);
259
+ const vint16m1_t zc = __riscv_vle16_v_i16m1(&zeta[0x60], vl);
260
+ const vint16m1_t ze = __riscv_vle16_v_i16m1(&zeta[0x70], vl);
261
+
262
+ v0 = __riscv_vle16_v_i16m1(&r[0x00], vl);
263
+ v1 = __riscv_vle16_v_i16m1(&r[0x10], vl);
264
+ v2 = __riscv_vle16_v_i16m1(&r[0x20], vl);
265
+ v3 = __riscv_vle16_v_i16m1(&r[0x30], vl);
266
+ v4 = __riscv_vle16_v_i16m1(&r[0x40], vl);
267
+ v5 = __riscv_vle16_v_i16m1(&r[0x50], vl);
268
+ v6 = __riscv_vle16_v_i16m1(&r[0x60], vl);
269
+ v7 = __riscv_vle16_v_i16m1(&r[0x70], vl);
270
+ v8 = __riscv_vle16_v_i16m1(&r[0x80], vl);
271
+ v9 = __riscv_vle16_v_i16m1(&r[0x90], vl);
272
+ va = __riscv_vle16_v_i16m1(&r[0xa0], vl);
273
+ vb = __riscv_vle16_v_i16m1(&r[0xb0], vl);
274
+ vc = __riscv_vle16_v_i16m1(&r[0xc0], vl);
275
+ vd = __riscv_vle16_v_i16m1(&r[0xd0], vl);
276
+ ve = __riscv_vle16_v_i16m1(&r[0xe0], vl);
277
+ vf = __riscv_vle16_v_i16m1(&r[0xf0], vl);
278
+
279
+ MLK_RVV_CT_BFLY_FX(v0, v8, vt, zeta[0x01], vl, 1);
280
+ MLK_RVV_CT_BFLY_FX(v1, v9, vt, zeta[0x01], vl, 1);
281
+ MLK_RVV_CT_BFLY_FX(v2, va, vt, zeta[0x01], vl, 1);
282
+ MLK_RVV_CT_BFLY_FX(v3, vb, vt, zeta[0x01], vl, 1);
283
+ MLK_RVV_CT_BFLY_FX(v4, vc, vt, zeta[0x01], vl, 1);
284
+ MLK_RVV_CT_BFLY_FX(v5, vd, vt, zeta[0x01], vl, 1);
285
+ MLK_RVV_CT_BFLY_FX(v6, ve, vt, zeta[0x01], vl, 1);
286
+ MLK_RVV_CT_BFLY_FX(v7, vf, vt, zeta[0x01], vl, 1);
287
+
288
+ MLK_RVV_CT_BFLY_FX(v0, v4, vt, zeta[0x10], vl, 2);
289
+ MLK_RVV_CT_BFLY_FX(v1, v5, vt, zeta[0x10], vl, 2);
290
+ MLK_RVV_CT_BFLY_FX(v2, v6, vt, zeta[0x10], vl, 2);
291
+ MLK_RVV_CT_BFLY_FX(v3, v7, vt, zeta[0x10], vl, 2);
292
+ MLK_RVV_CT_BFLY_FX(v8, vc, vt, zeta[0x11], vl, 2);
293
+ MLK_RVV_CT_BFLY_FX(v9, vd, vt, zeta[0x11], vl, 2);
294
+ MLK_RVV_CT_BFLY_FX(va, ve, vt, zeta[0x11], vl, 2);
295
+ MLK_RVV_CT_BFLY_FX(vb, vf, vt, zeta[0x11], vl, 2);
296
+
297
+ MLK_RVV_CT_BFLY_FX(v0, v2, vt, zeta[0x20], vl, 3);
298
+ MLK_RVV_CT_BFLY_FX(v1, v3, vt, zeta[0x20], vl, 3);
299
+ MLK_RVV_CT_BFLY_FX(v4, v6, vt, zeta[0x21], vl, 3);
300
+ MLK_RVV_CT_BFLY_FX(v5, v7, vt, zeta[0x21], vl, 3);
301
+ MLK_RVV_CT_BFLY_FX(v8, va, vt, zeta[0x30], vl, 3);
302
+ MLK_RVV_CT_BFLY_FX(v9, vb, vt, zeta[0x30], vl, 3);
303
+ MLK_RVV_CT_BFLY_FX(vc, ve, vt, zeta[0x31], vl, 3);
304
+ MLK_RVV_CT_BFLY_FX(vd, vf, vt, zeta[0x31], vl, 3);
305
+
306
+ MLK_RVV_CT_BFLY_FX(v0, v1, vt, zeta[0x40], vl, 4);
307
+ MLK_RVV_CT_BFLY_FX(v2, v3, vt, zeta[0x41], vl, 4);
308
+ MLK_RVV_CT_BFLY_FX(v4, v5, vt, zeta[0x50], vl, 4);
309
+ MLK_RVV_CT_BFLY_FX(v6, v7, vt, zeta[0x51], vl, 4);
310
+ MLK_RVV_CT_BFLY_FX(v8, v9, vt, zeta[0x60], vl, 4);
311
+ MLK_RVV_CT_BFLY_FX(va, vb, vt, zeta[0x61], vl, 4);
312
+ MLK_RVV_CT_BFLY_FX(vc, vd, vt, zeta[0x70], vl, 4);
313
+ MLK_RVV_CT_BFLY_FX(ve, vf, vt, zeta[0x71], vl, 4);
314
+
315
+ __riscv_vse16_v_i16m2(
316
+ &r[0x00], mlk_rv64v_ntt2(__riscv_vcreate_v_i16m1_i16m2(v0, v1), z0), vl2);
317
+ __riscv_vse16_v_i16m2(
318
+ &r[0x20], mlk_rv64v_ntt2(__riscv_vcreate_v_i16m1_i16m2(v2, v3), z2), vl2);
319
+ __riscv_vse16_v_i16m2(
320
+ &r[0x40], mlk_rv64v_ntt2(__riscv_vcreate_v_i16m1_i16m2(v4, v5), z4), vl2);
321
+ __riscv_vse16_v_i16m2(
322
+ &r[0x60], mlk_rv64v_ntt2(__riscv_vcreate_v_i16m1_i16m2(v6, v7), z6), vl2);
323
+ __riscv_vse16_v_i16m2(
324
+ &r[0x80], mlk_rv64v_ntt2(__riscv_vcreate_v_i16m1_i16m2(v8, v9), z8), vl2);
325
+ __riscv_vse16_v_i16m2(
326
+ &r[0xa0], mlk_rv64v_ntt2(__riscv_vcreate_v_i16m1_i16m2(va, vb), za), vl2);
327
+ __riscv_vse16_v_i16m2(
328
+ &r[0xc0], mlk_rv64v_ntt2(__riscv_vcreate_v_i16m1_i16m2(vc, vd), zc), vl2);
329
+ __riscv_vse16_v_i16m2(
330
+ &r[0xe0], mlk_rv64v_ntt2(__riscv_vcreate_v_i16m1_i16m2(ve, vf), ze), vl2);
331
+ }
332
+
333
+ /*************************************************
334
+ * Name: poly_invntt_tomont
335
+ *
336
+ * Description: Computes inverse of negacyclic number-theoretic transform (NTT)
337
+ * of a polynomial in place;
338
+ * inputs assumed to be in bitreversed order,
339
+ * output in normal order
340
+ *
341
+ * Arguments: - uint16_t *r: pointer to in/output polynomial
342
+ **************************************************/
343
+
344
+ /* Reverse / Gentleman-Sande butterfly operation */
345
+
346
+ #define MLK_RVV_GS_BFLY_RX(u0, u1, ut, uc, vl) \
347
+ { \
348
+ ut = __riscv_vsub_vv_i16m1(u0, u1, vl); \
349
+ u0 = __riscv_vadd_vv_i16m1(u0, u1, vl); \
350
+ u1 = fq_mul_vx(ut, uc, vl); \
351
+ }
352
+
353
+ #define MLK_RVV_GS_BFLY_RV(u0, u1, ut, uc, vl) \
354
+ { \
355
+ ut = __riscv_vsub_vv_i16m1(u0, u1, vl); \
356
+ u0 = __riscv_vadd_vv_i16m1(u0, u1, vl); \
357
+ u1 = fq_mul_vv(ut, uc, vl); \
358
+ }
359
+
360
+ static vint16m2_t mlk_rv64v_intt2(vint16m2_t vp, vint16m1_t cz)
361
+ {
362
+ size_t vl = 16; /* We work with 256-bit vectors of 16x16-bit elements */
363
+ size_t vl2 = 2 * vl;
364
+
365
+ const vuint16m2_t v2p8 = bitswap_perm(3, 4, vl2);
366
+ const vuint16m2_t v2p4 = bitswap_perm(2, 4, vl2);
367
+ const vuint16m2_t v2p2 = bitswap_perm(1, 4, vl2);
368
+
369
+ /* p0 = p2(p4(p8)) */
370
+ const vuint16m2_t v2p0 = __riscv_vrgather_vv_u16m2(
371
+ __riscv_vrgather_vv_u16m2(v2p8, v2p4, vl2), v2p2, vl2);
372
+
373
+ const vuint16m1_t vid = __riscv_vid_v_u16m1(vl);
374
+ const vuint16m1_t cs8 =
375
+ __riscv_vadd_vx_u16m1(__riscv_vsrl_vx_u16m1(vid, 3, vl), 2, vl);
376
+ const vuint16m1_t cs4 =
377
+ __riscv_vadd_vx_u16m1(__riscv_vsrl_vx_u16m1(vid, 2, vl), 2 + 2, vl);
378
+ const vuint16m1_t cs2 =
379
+ __riscv_vadd_vx_u16m1(__riscv_vsrl_vx_u16m1(vid, 1, vl), 2 + 2 + 4, vl);
380
+
381
+ vint16m1_t t0, t1, c0, vt;
382
+
383
+ /* initial permute */
384
+ vp = __riscv_vrgatherei16_vv_i16m2(vp, v2p0, vl2);
385
+ t0 = __riscv_vget_v_i16m2_i16m1(vp, 0);
386
+ t1 = __riscv_vget_v_i16m2_i16m1(vp, 1);
387
+
388
+ /* pre-scale */
389
+ t0 = fq_mulq_vx(t0, MLK_RVV_MONT_NR, vl);
390
+ t1 = fq_mulq_vx(t1, MLK_RVV_MONT_NR, vl);
391
+
392
+ /* absolute bounds: < t0 < q, t1 < q */
393
+ mlk_assert_abs_bound_int16m1(t0, vl, MLKEM_Q);
394
+ mlk_assert_abs_bound_int16m1(t1, vl, MLKEM_Q);
395
+
396
+ c0 = __riscv_vrgather_vv_i16m1(cz, cs2, vl);
397
+ MLK_RVV_GS_BFLY_RV(t0, t1, vt, c0, vl);
398
+
399
+ /* absolute bounds: < t0 < 2*q, t1 < q */
400
+ mlk_assert_abs_bound_int16m1(t0, vl, 2 * MLKEM_Q);
401
+ mlk_assert_abs_bound_int16m1(t1, vl, MLKEM_Q);
402
+
403
+ /* swap 2 */
404
+ vp = __riscv_vcreate_v_i16m1_i16m2(t0, t1);
405
+ vp = __riscv_vrgatherei16_vv_i16m2(vp, v2p2, vl2);
406
+ t0 = __riscv_vget_v_i16m2_i16m1(vp, 0);
407
+ t1 = __riscv_vget_v_i16m2_i16m1(vp, 1);
408
+ c0 = __riscv_vrgather_vv_i16m1(cz, cs4, vl);
409
+ MLK_RVV_GS_BFLY_RV(t0, t1, vt, c0, vl);
410
+
411
+ /* absolute bounds: t0 < 4*q, t1 < q */
412
+ mlk_assert_abs_bound_int16m1(t0, vl, 4 * MLKEM_Q);
413
+ mlk_assert_abs_bound_int16m1(t1, vl, MLKEM_Q);
414
+
415
+ /* swap 4 */
416
+ vp = __riscv_vcreate_v_i16m1_i16m2(t0, t1);
417
+ vp = __riscv_vrgatherei16_vv_i16m2(vp, v2p4, vl2);
418
+ t0 = __riscv_vget_v_i16m2_i16m1(vp, 0);
419
+ t1 = __riscv_vget_v_i16m2_i16m1(vp, 1);
420
+ c0 = __riscv_vrgather_vv_i16m1(cz, cs8, vl);
421
+ MLK_RVV_GS_BFLY_RV(t0, t1, vt, c0, vl);
422
+
423
+ /* absolute bounds: < 8*q */
424
+ mlk_assert_abs_bound_int16m1(t0, vl, 8 * MLKEM_Q);
425
+ mlk_assert_abs_bound_int16m1(t1, vl, MLKEM_Q);
426
+
427
+ t0 = fq_mulq_vx(t0, MLK_RVV_MONT_R1, vl);
428
+
429
+ /* absolute bounds: < q */
430
+ mlk_assert_abs_bound_int16m1(t0, vl, MLKEM_Q);
431
+ mlk_assert_abs_bound_int16m1(t1, vl, MLKEM_Q);
432
+
433
+ /* swap 8 */
434
+ vp = __riscv_vcreate_v_i16m1_i16m2(t0, t1);
435
+ vp = __riscv_vrgatherei16_vv_i16m2(vp, v2p8, vl2);
436
+
437
+ return vp;
438
+ }
439
+
440
+ #define MLK_RV64V_ABS_BOUNDS16(vl, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, va, \
441
+ vb, vc, vd, ve, vf, b0, b1, b2, b3, b4, b5, b6, \
442
+ b7, b8, b9, ba, bb, bc, bd, be, bf) \
443
+ do \
444
+ { \
445
+ mlk_assert_abs_bound_int16m1(v0, vl, (b0) * MLKEM_Q); \
446
+ mlk_assert_abs_bound_int16m1(v1, vl, (b1) * MLKEM_Q); \
447
+ mlk_assert_abs_bound_int16m1(v2, vl, (b2) * MLKEM_Q); \
448
+ mlk_assert_abs_bound_int16m1(v3, vl, (b3) * MLKEM_Q); \
449
+ mlk_assert_abs_bound_int16m1(v4, vl, (b4) * MLKEM_Q); \
450
+ mlk_assert_abs_bound_int16m1(v5, vl, (b5) * MLKEM_Q); \
451
+ mlk_assert_abs_bound_int16m1(v6, vl, (b6) * MLKEM_Q); \
452
+ mlk_assert_abs_bound_int16m1(v7, vl, (b7) * MLKEM_Q); \
453
+ mlk_assert_abs_bound_int16m1(v8, vl, (b8) * MLKEM_Q); \
454
+ mlk_assert_abs_bound_int16m1(v9, vl, (b9) * MLKEM_Q); \
455
+ mlk_assert_abs_bound_int16m1(va, vl, (ba) * MLKEM_Q); \
456
+ mlk_assert_abs_bound_int16m1(vb, vl, (bb) * MLKEM_Q); \
457
+ mlk_assert_abs_bound_int16m1(vc, vl, (bc) * MLKEM_Q); \
458
+ mlk_assert_abs_bound_int16m1(vd, vl, (bd) * MLKEM_Q); \
459
+ mlk_assert_abs_bound_int16m1(ve, vl, (be) * MLKEM_Q); \
460
+ mlk_assert_abs_bound_int16m1(vf, vl, (bf) * MLKEM_Q); \
461
+ } while (0)
462
+
463
+
464
+ /* Only for VLEN=256 for now */
465
+ void mlk_rv64v_poly_invntt_tomont(int16_t *r)
466
+ {
467
+ /* zetas can be compiled into vector constants; don't pass as a pointer */
468
+ #include "rv64v_izetas.inc"
469
+
470
+ size_t vl = 16; /* We work with 256-bit vectors of 16x16-bit elements */
471
+ size_t vl2 = 2 * vl;
472
+
473
+ const vint16m1_t z0 = __riscv_vle16_v_i16m1(&izeta[0x00], vl);
474
+ const vint16m1_t z2 = __riscv_vle16_v_i16m1(&izeta[0x10], vl);
475
+ const vint16m1_t z4 = __riscv_vle16_v_i16m1(&izeta[0x20], vl);
476
+ const vint16m1_t z6 = __riscv_vle16_v_i16m1(&izeta[0x30], vl);
477
+ const vint16m1_t z8 = __riscv_vle16_v_i16m1(&izeta[0x40], vl);
478
+ const vint16m1_t za = __riscv_vle16_v_i16m1(&izeta[0x50], vl);
479
+ const vint16m1_t zc = __riscv_vle16_v_i16m1(&izeta[0x60], vl);
480
+ const vint16m1_t ze = __riscv_vle16_v_i16m1(&izeta[0x70], vl);
481
+
482
+ vint16m1_t vt;
483
+ vint16m1_t v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, va, vb, vc, vd, ve, vf;
484
+ vint16m2_t vp;
485
+
486
+ vp = mlk_rv64v_intt2(__riscv_vle16_v_i16m2(&r[0x00], vl2), z0);
487
+ v0 = __riscv_vget_v_i16m2_i16m1(vp, 0);
488
+ v1 = __riscv_vget_v_i16m2_i16m1(vp, 1);
489
+
490
+ vp = mlk_rv64v_intt2(__riscv_vle16_v_i16m2(&r[0x20], vl2), z2);
491
+ v2 = __riscv_vget_v_i16m2_i16m1(vp, 0);
492
+ v3 = __riscv_vget_v_i16m2_i16m1(vp, 1);
493
+
494
+ vp = mlk_rv64v_intt2(__riscv_vle16_v_i16m2(&r[0x40], vl2), z4);
495
+ v4 = __riscv_vget_v_i16m2_i16m1(vp, 0);
496
+ v5 = __riscv_vget_v_i16m2_i16m1(vp, 1);
497
+
498
+ vp = mlk_rv64v_intt2(__riscv_vle16_v_i16m2(&r[0x60], vl2), z6);
499
+ v6 = __riscv_vget_v_i16m2_i16m1(vp, 0);
500
+ v7 = __riscv_vget_v_i16m2_i16m1(vp, 1);
501
+
502
+ vp = mlk_rv64v_intt2(__riscv_vle16_v_i16m2(&r[0x80], vl2), z8);
503
+ v8 = __riscv_vget_v_i16m2_i16m1(vp, 0);
504
+ v9 = __riscv_vget_v_i16m2_i16m1(vp, 1);
505
+
506
+ vp = mlk_rv64v_intt2(__riscv_vle16_v_i16m2(&r[0xa0], vl2), za);
507
+ va = __riscv_vget_v_i16m2_i16m1(vp, 0);
508
+ vb = __riscv_vget_v_i16m2_i16m1(vp, 1);
509
+
510
+ vp = mlk_rv64v_intt2(__riscv_vle16_v_i16m2(&r[0xc0], vl2), zc);
511
+ vc = __riscv_vget_v_i16m2_i16m1(vp, 0);
512
+ vd = __riscv_vget_v_i16m2_i16m1(vp, 1);
513
+
514
+ vp = mlk_rv64v_intt2(__riscv_vle16_v_i16m2(&r[0xe0], vl2), ze);
515
+ ve = __riscv_vget_v_i16m2_i16m1(vp, 0);
516
+ vf = __riscv_vget_v_i16m2_i16m1(vp, 1);
517
+
518
+ /* absolute bounds < q (see mlk_rv64v_intt2) */
519
+ MLK_RV64V_ABS_BOUNDS16(vl,
520
+ v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, va, vb, vc, vd, ve, vf,
521
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
522
+
523
+ MLK_RVV_GS_BFLY_RX(v0, v1, vt, izeta[0x40], vl);
524
+ MLK_RVV_GS_BFLY_RX(v2, v3, vt, izeta[0x41], vl);
525
+ MLK_RVV_GS_BFLY_RX(v4, v5, vt, izeta[0x50], vl);
526
+ MLK_RVV_GS_BFLY_RX(v6, v7, vt, izeta[0x51], vl);
527
+ MLK_RVV_GS_BFLY_RX(v8, v9, vt, izeta[0x60], vl);
528
+ MLK_RVV_GS_BFLY_RX(va, vb, vt, izeta[0x61], vl);
529
+ MLK_RVV_GS_BFLY_RX(vc, vd, vt, izeta[0x70], vl);
530
+ MLK_RVV_GS_BFLY_RX(ve, vf, vt, izeta[0x71], vl);
531
+
532
+ /* absolute bounds:
533
+ * - v{0,2,4,6,8,a,c,e}: < 2*q
534
+ * - v{1,3,5,7,9,b,d,f}: < 1*q
535
+ */
536
+ MLK_RV64V_ABS_BOUNDS16(vl,
537
+ v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, va, vb, vc, vd, ve, vf,
538
+ 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1);
539
+
540
+ MLK_RVV_GS_BFLY_RX(v0, v2, vt, izeta[0x20], vl);
541
+ MLK_RVV_GS_BFLY_RX(v1, v3, vt, izeta[0x20], vl);
542
+ MLK_RVV_GS_BFLY_RX(v4, v6, vt, izeta[0x21], vl);
543
+ MLK_RVV_GS_BFLY_RX(v5, v7, vt, izeta[0x21], vl);
544
+ MLK_RVV_GS_BFLY_RX(v8, va, vt, izeta[0x30], vl);
545
+ MLK_RVV_GS_BFLY_RX(v9, vb, vt, izeta[0x30], vl);
546
+ MLK_RVV_GS_BFLY_RX(vc, ve, vt, izeta[0x31], vl);
547
+ MLK_RVV_GS_BFLY_RX(vd, vf, vt, izeta[0x31], vl);
548
+
549
+ /* absolute bounds:
550
+ * - v{0,4,8,c}: < 4*q
551
+ * - v{1,5,9,d}: < 2*q
552
+ * - v{2,3,6,7,a,b,e,f}: < 1*q
553
+ */
554
+ MLK_RV64V_ABS_BOUNDS16(vl,
555
+ v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, va, vb, vc, vd, ve, vf,
556
+ 4, 2, 1, 1, 4, 2, 1, 1, 4, 2, 1, 1, 4, 2, 1, 1);
557
+
558
+ MLK_RVV_GS_BFLY_RX(v0, v4, vt, izeta[0x10], vl);
559
+ MLK_RVV_GS_BFLY_RX(v1, v5, vt, izeta[0x10], vl);
560
+ MLK_RVV_GS_BFLY_RX(v2, v6, vt, izeta[0x10], vl);
561
+ MLK_RVV_GS_BFLY_RX(v3, v7, vt, izeta[0x10], vl);
562
+ MLK_RVV_GS_BFLY_RX(v8, vc, vt, izeta[0x11], vl);
563
+ MLK_RVV_GS_BFLY_RX(v9, vd, vt, izeta[0x11], vl);
564
+ MLK_RVV_GS_BFLY_RX(va, ve, vt, izeta[0x11], vl);
565
+ MLK_RVV_GS_BFLY_RX(vb, vf, vt, izeta[0x11], vl);
566
+
567
+ /* absolute bounds:
568
+ * - v{0,8}: < 8*q
569
+ * - v{1,9}: < 4*q
570
+ * - v{2,3,a,b}: < 2*q
571
+ * - v{4,5,6,7,c,d,e,f}: < 1*q
572
+ */
573
+ MLK_RV64V_ABS_BOUNDS16(vl,
574
+ v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, va, vb, vc, vd, ve, vf,
575
+ 8, 4, 2, 2, 1, 1, 1, 1, 8, 4, 2, 2, 1, 1, 1, 1);
576
+
577
+ /* Reduce v0, v8 to avoid overflow */
578
+ v0 = fq_mulq_vx(v0, MLK_RVV_MONT_R1, vl);
579
+ v8 = fq_mulq_vx(v8, MLK_RVV_MONT_R1, vl);
580
+
581
+ /* absolute bounds: < 4*q */
582
+ MLK_RV64V_ABS_BOUNDS16(vl,
583
+ v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, va, vb, vc, vd, ve, vf,
584
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4);
585
+
586
+ MLK_RVV_GS_BFLY_RX(v0, v8, vt, izeta[0x01], vl);
587
+ MLK_RVV_GS_BFLY_RX(v1, v9, vt, izeta[0x01], vl);
588
+ MLK_RVV_GS_BFLY_RX(v2, va, vt, izeta[0x01], vl);
589
+ MLK_RVV_GS_BFLY_RX(v3, vb, vt, izeta[0x01], vl);
590
+ MLK_RVV_GS_BFLY_RX(v4, vc, vt, izeta[0x01], vl);
591
+ MLK_RVV_GS_BFLY_RX(v5, vd, vt, izeta[0x01], vl);
592
+ MLK_RVV_GS_BFLY_RX(v6, ve, vt, izeta[0x01], vl);
593
+ MLK_RVV_GS_BFLY_RX(v7, vf, vt, izeta[0x01], vl);
594
+
595
+ /* absolute bounds: < 8*q */
596
+ MLK_RV64V_ABS_BOUNDS16(vl,
597
+ v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, va, vb, vc, vd, ve, vf,
598
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8);
599
+
600
+ __riscv_vse16_v_i16m1(&r[0x00], v0, vl);
601
+ __riscv_vse16_v_i16m1(&r[0x10], v1, vl);
602
+ __riscv_vse16_v_i16m1(&r[0x20], v2, vl);
603
+ __riscv_vse16_v_i16m1(&r[0x30], v3, vl);
604
+ __riscv_vse16_v_i16m1(&r[0x40], v4, vl);
605
+ __riscv_vse16_v_i16m1(&r[0x50], v5, vl);
606
+ __riscv_vse16_v_i16m1(&r[0x60], v6, vl);
607
+ __riscv_vse16_v_i16m1(&r[0x70], v7, vl);
608
+ __riscv_vse16_v_i16m1(&r[0x80], v8, vl);
609
+ __riscv_vse16_v_i16m1(&r[0x90], v9, vl);
610
+ __riscv_vse16_v_i16m1(&r[0xa0], va, vl);
611
+ __riscv_vse16_v_i16m1(&r[0xb0], vb, vl);
612
+ __riscv_vse16_v_i16m1(&r[0xc0], vc, vl);
613
+ __riscv_vse16_v_i16m1(&r[0xd0], vd, vl);
614
+ __riscv_vse16_v_i16m1(&r[0xe0], ve, vl);
615
+ __riscv_vse16_v_i16m1(&r[0xf0], vf, vl);
616
+ }
617
+
618
+ /* ML-KEM's middle field GF(3329)[X]/(X^2) multiplication */
619
+
620
+ static inline void mlk_rv64v_poly_basemul_mont_add_k(int16_t *r,
621
+ const int16_t *a,
622
+ const int16_t *b,
623
+ unsigned kn)
624
+ {
625
+ #include "rv64v_zetas_basemul.inc"
626
+
627
+ size_t vl = __riscv_vsetvl_e16m1(MLKEM_N);
628
+ size_t i, j;
629
+
630
+ const vuint16m1_t sw0 = __riscv_vxor_vx_u16m1(__riscv_vid_v_u16m1(vl), 1, vl);
631
+ const vbool16_t sb0 = __riscv_vmseq_vx_u16m1_b16(
632
+ __riscv_vand_vx_u16m1(__riscv_vid_v_u16m1(vl), 1, vl), 0, vl);
633
+
634
+ vint16m1_t vt, vu;
635
+ vint32m2_t wa, wb, ws;
636
+
637
+ for (i = 0; i < MLKEM_N; i += vl)
638
+ {
639
+ const vint16m1_t vz = __riscv_vle16_v_i16m1(&roots[i], vl);
640
+
641
+ for (j = 0; j < kn; j += MLKEM_N)
642
+ {
643
+ vt = __riscv_vle16_v_i16m1(&a[i + j], vl);
644
+ vu = __riscv_vle16_v_i16m1(&b[i + j], vl);
645
+
646
+ wa = __riscv_vwmul_vv_i32m2(vz, fq_mul_vv(vt, vu, vl), vl);
647
+ wb = __riscv_vwmul_vv_i32m2(vt, __riscv_vrgather_vv_i16m1(vu, sw0, vl),
648
+ vl);
649
+
650
+ wa =
651
+ __riscv_vadd_vv_i32m2(wa, __riscv_vslidedown_vx_i32m2(wa, 1, vl), vl);
652
+ wb = __riscv_vadd_vv_i32m2(wb, __riscv_vslideup_vx_i32m2(wb, wb, 1, vl),
653
+ vl);
654
+
655
+ wa = __riscv_vmerge_vvm_i32m2(wb, wa, sb0, vl);
656
+
657
+ if (j == 0)
658
+ {
659
+ ws = wa;
660
+ }
661
+ else
662
+ {
663
+ ws = __riscv_vadd_vv_i32m2(ws, wa, vl);
664
+ }
665
+ }
666
+ /* the idea is to keep 32-bit intermediate result, reduce in the end */
667
+ __riscv_vse16_v_i16m1(&r[i], fq_redc2(ws, vl), vl);
668
+ }
669
+ }
670
+
671
+ #if defined(MLK_CONFIG_MULTILEVEL_WITH_SHARED) || MLKEM_K == 2
672
+ void mlk_rv64v_poly_basemul_mont_add_k2(int16_t *r, const int16_t *a,
673
+ const int16_t *b)
674
+ {
675
+ mlk_rv64v_poly_basemul_mont_add_k(r, a, b, 2 * MLKEM_N);
676
+ }
677
+ #endif /* MLK_CONFIG_MULTILEVEL_WITH_SHARED || MLKEM_K == 2 */
678
+
679
+ #if defined(MLK_CONFIG_MULTILEVEL_WITH_SHARED) || MLKEM_K == 3
680
+ void mlk_rv64v_poly_basemul_mont_add_k3(int16_t *r, const int16_t *a,
681
+ const int16_t *b)
682
+ {
683
+ mlk_rv64v_poly_basemul_mont_add_k(r, a, b, 3 * MLKEM_N);
684
+ }
685
+ #endif /* MLK_CONFIG_MULTILEVEL_WITH_SHARED || MLKEM_K == 3 */
686
+
687
+ #if defined(MLK_CONFIG_MULTILEVEL_WITH_SHARED) || MLKEM_K == 4
688
+ void mlk_rv64v_poly_basemul_mont_add_k4(int16_t *r, const int16_t *a,
689
+ const int16_t *b)
690
+ {
691
+ mlk_rv64v_poly_basemul_mont_add_k(r, a, b, 4 * MLKEM_N);
692
+ }
693
+ #endif /* MLK_CONFIG_MULTILEVEL_WITH_SHARED || MLKEM_K == 4 */
694
+
695
+ /*************************************************
696
+ * Name: poly_tomont
697
+ *
698
+ * Description: Inplace conversion of all coefficients of a polynomial
699
+ * from normal domain to Montgomery domain
700
+ *
701
+ * Arguments: - int16_t *r: pointer to input/output polynomial
702
+ **************************************************/
703
+ void mlk_rv64v_poly_tomont(int16_t *r)
704
+ {
705
+ size_t vl = __riscv_vsetvl_e16m1(MLKEM_N);
706
+
707
+ for (size_t i = 0; i < MLKEM_N; i += vl)
708
+ {
709
+ vint16m1_t vec = __riscv_vle16_v_i16m1(&r[i], vl);
710
+ vec = fq_mul_vx(vec, MLK_RVV_MONT_R2, vl);
711
+ __riscv_vse16_v_i16m1(&r[i], vec, vl);
712
+ }
713
+ }
714
+
715
+ /*************************************************
716
+ * Name: poly_reduce
717
+ *
718
+ * Description: Applies Barrett reduction to all coefficients of a polynomial
719
+ * for details of the Barrett reduction see
720
+ * comments in poly.c
721
+ *
722
+ * Arguments: - int16_t *r: pointer to input/output polynomial
723
+ **************************************************/
724
+ void mlk_rv64v_poly_reduce(int16_t *r)
725
+ {
726
+ size_t vl = __riscv_vsetvl_e16m1(MLKEM_N);
727
+ vint16m1_t vt;
728
+
729
+ for (size_t i = 0; i < MLKEM_N; i += vl)
730
+ {
731
+ vt = __riscv_vle16_v_i16m1(&r[i], vl);
732
+ vt = fq_barrett(vt, vl);
733
+ vt = fq_cadd(vt, vl);
734
+ __riscv_vse16_v_i16m1(&r[i], vt, vl);
735
+ }
736
+ }
737
+
738
+ /* Run rejection sampling to get uniform random integers mod q */
739
+
740
+ unsigned int mlk_rv64v_rej_uniform(int16_t *r, unsigned int len,
741
+ const uint8_t *buf, unsigned int buflen)
742
+ {
743
+ unsigned n, ctr, pos;
744
+ vuint16m1_t x, y;
745
+ vbool16_t lt;
746
+
747
+ pos = 0;
748
+ ctr = 0;
749
+
750
+ while (ctr < len && pos < buflen)
751
+ {
752
+ const unsigned vl = (unsigned)__riscv_vsetvl_e16m1((buflen - pos) * 8 / 12);
753
+ const unsigned vl23 = (vl * 24) / 32;
754
+
755
+ const vuint16m1_t vid = __riscv_vid_v_u16m1(vl);
756
+ const vuint16m1_t srl12v = __riscv_vmul_vx_u16m1(vid, 12, vl);
757
+ const vuint16m1_t sel12v = __riscv_vsrl_vx_u16m1(srl12v, 4, vl);
758
+ const vuint16m1_t sll12v = __riscv_vsll_vx_u16m1(vid, 2, vl);
759
+
760
+ /* Functionally, this loop is not necessary, but it avoids re-evaluating
761
+ * the VL too many times. In particular, in the first outer iteration,
762
+ * the inner loop will process the bulk of the data with fixed VL. */
763
+ while (ctr < len && vl23 * 2 <= buflen - pos)
764
+ {
765
+ x = __riscv_vle16_v_u16m1((uint16_t *)&buf[pos], vl23);
766
+ pos += vl23 * 2;
767
+ x = __riscv_vrgather_vv_u16m1(x, sel12v, vl);
768
+ x = __riscv_vor_vv_u16m1(
769
+ __riscv_vsrl_vv_u16m1(x, srl12v, vl),
770
+ __riscv_vsll_vv_u16m1(__riscv_vslidedown(x, 1, vl), sll12v, vl), vl);
771
+ x = __riscv_vand_vx_u16m1(x, 0xFFF, vl);
772
+
773
+ lt = __riscv_vmsltu_vx_u16m1_b16(x, MLKEM_Q, vl);
774
+ y = __riscv_vcompress_vm_u16m1(x, lt, vl);
775
+ n = (unsigned)__riscv_vcpop_m_b16(lt, vl);
776
+
777
+ if (ctr + n > len)
778
+ {
779
+ n = len - ctr;
780
+ }
781
+ __riscv_vse16_v_u16m1((uint16_t *)&r[ctr], y, n);
782
+ ctr += n;
783
+ }
784
+ }
785
+
786
+ return ctr;
787
+ }
788
+
789
+ #else /* MLK_ARITH_BACKEND_RISCV64 && !MLK_CONFIG_MULTILEVEL_NO_SHARED */
790
+
791
+ MLK_EMPTY_CU(rv64v_poly)
792
+
793
+ #endif /* !(MLK_ARITH_BACKEND_RISCV64 && !MLK_CONFIG_MULTILEVEL_NO_SHARED) */
794
+
795
+ /* To facilitate single-compilation-unit (SCU) builds, undefine all macros.
796
+ * Don't modify by hand -- this is auto-generated by scripts/autogen. */
797
+ #undef MLK_RVV_QI
798
+ #undef MLK_RVV_MONT_R1
799
+ #undef MLK_RVV_MONT_R2
800
+ #undef MLK_RVV_MONT_NR
801
+ #undef MLK_RVV_CT_BFLY_FX
802
+ #undef MLK_RVV_CT_BFLY_FV
803
+ #undef MLK_RVV_GS_BFLY_RX
804
+ #undef MLK_RVV_GS_BFLY_RV
805
+ #undef MLK_RV64V_ABS_BOUNDS16