pq_crypto 0.3.2 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (328) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ci.yml +56 -0
  3. data/CHANGELOG.md +62 -0
  4. data/GET_STARTED.md +366 -40
  5. data/README.md +76 -233
  6. data/SECURITY.md +107 -82
  7. data/ext/pqcrypto/extconf.rb +169 -87
  8. data/ext/pqcrypto/mldsa_api.h +1 -48
  9. data/ext/pqcrypto/mlkem_api.h +1 -18
  10. data/ext/pqcrypto/pq_externalmu.c +89 -204
  11. data/ext/pqcrypto/pqcrypto_native_api.h +129 -0
  12. data/ext/pqcrypto/pqcrypto_ruby_secure.c +484 -84
  13. data/ext/pqcrypto/pqcrypto_secure.c +203 -78
  14. data/ext/pqcrypto/pqcrypto_secure.h +53 -14
  15. data/ext/pqcrypto/pqcrypto_version.h +7 -0
  16. data/ext/pqcrypto/randombytes.h +9 -0
  17. data/ext/pqcrypto/vendor/.vendored +10 -5
  18. data/ext/pqcrypto/vendor/mldsa-native/BUILDING.md +105 -0
  19. data/ext/pqcrypto/vendor/mldsa-native/LICENSE +286 -0
  20. data/ext/pqcrypto/vendor/mldsa-native/META.yml +24 -0
  21. data/ext/pqcrypto/vendor/mldsa-native/README.md +221 -0
  22. data/ext/pqcrypto/vendor/mldsa-native/SECURITY.md +8 -0
  23. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.c +721 -0
  24. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.h +975 -0
  25. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_asm.S +724 -0
  26. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_config.h +723 -0
  27. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/cbmc.h +166 -0
  28. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/common.h +321 -0
  29. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/ct.c +21 -0
  30. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/ct.h +385 -0
  31. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/debug.c +73 -0
  32. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/debug.h +130 -0
  33. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.c +277 -0
  34. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.h +244 -0
  35. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.c +182 -0
  36. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.h +117 -0
  37. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.c +438 -0
  38. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.h +105 -0
  39. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/auto.h +71 -0
  40. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/fips202_native_aarch64.h +62 -0
  41. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_asm.S +376 -0
  42. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_asm.S +204 -0
  43. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_asm.S +259 -0
  44. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_asm.S +1077 -0
  45. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S +987 -0
  46. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccakf1600_round_constants.c +41 -0
  47. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_scalar.h +26 -0
  48. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_v84a.h +35 -0
  49. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x2_v84a.h +37 -0
  50. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_scalar.h +27 -0
  51. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_v84a_scalar.h +36 -0
  52. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/api.h +69 -0
  53. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/README.md +10 -0
  54. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/mve.h +32 -0
  55. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/fips202_native_armv81m.h +20 -0
  56. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S +638 -0
  57. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.c +136 -0
  58. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccakf1600_round_constants.c +52 -0
  59. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/auto.h +29 -0
  60. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.c +488 -0
  61. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.h +16 -0
  62. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/xkcp.h +31 -0
  63. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/meta.h +247 -0
  64. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/aarch64_zetas.c +231 -0
  65. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/arith_native_aarch64.h +150 -0
  66. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/intt.S +753 -0
  67. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l4.S +129 -0
  68. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l5.S +145 -0
  69. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l7.S +177 -0
  70. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/ntt.S +653 -0
  71. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/pointwise_montgomery.S +79 -0
  72. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_caddq_asm.S +53 -0
  73. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_chknorm_asm.S +55 -0
  74. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_32_asm.S +85 -0
  75. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_88_asm.S +85 -0
  76. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_32_asm.S +102 -0
  77. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_88_asm.S +110 -0
  78. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_17_asm.S +72 -0
  79. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_19_asm.S +69 -0
  80. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_table.c +40 -0
  81. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_asm.S +189 -0
  82. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta2_asm.S +135 -0
  83. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta4_asm.S +128 -0
  84. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta_table.c +543 -0
  85. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_table.c +62 -0
  86. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/api.h +649 -0
  87. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/meta.h +23 -0
  88. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/meta.h +315 -0
  89. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/arith_native_x86_64.h +124 -0
  90. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.c +157 -0
  91. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.h +27 -0
  92. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/intt.S +2311 -0
  93. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/ntt.S +2383 -0
  94. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/nttunpack.S +239 -0
  95. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise.S +131 -0
  96. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l4.S +139 -0
  97. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l5.S +155 -0
  98. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l7.S +187 -0
  99. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_caddq_avx2.c +61 -0
  100. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_chknorm_avx2.c +52 -0
  101. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_32_avx2.c +155 -0
  102. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_88_avx2.c +155 -0
  103. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_32_avx2.c +102 -0
  104. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_88_avx2.c +104 -0
  105. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_17_avx2.c +91 -0
  106. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_19_avx2.c +93 -0
  107. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_avx2.c +126 -0
  108. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta2_avx2.c +155 -0
  109. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta4_avx2.c +139 -0
  110. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_table.c +160 -0
  111. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.c +293 -0
  112. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.h +224 -0
  113. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/params.h +77 -0
  114. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.c +991 -0
  115. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.h +393 -0
  116. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.c +946 -0
  117. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.h +360 -0
  118. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.c +877 -0
  119. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.h +725 -0
  120. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/randombytes.h +26 -0
  121. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/reduce.h +139 -0
  122. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/rounding.h +249 -0
  123. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.c +1511 -0
  124. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.h +806 -0
  125. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/symmetric.h +68 -0
  126. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sys.h +268 -0
  127. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/zetas.inc +55 -0
  128. data/ext/pqcrypto/vendor/mlkem-native/BUILDING.md +104 -0
  129. data/ext/pqcrypto/vendor/mlkem-native/LICENSE +294 -0
  130. data/ext/pqcrypto/vendor/mlkem-native/META.yml +30 -0
  131. data/ext/pqcrypto/vendor/mlkem-native/README.md +223 -0
  132. data/ext/pqcrypto/vendor/mlkem-native/RELEASE.md +86 -0
  133. data/ext/pqcrypto/vendor/mlkem-native/SECURITY.md +8 -0
  134. data/ext/pqcrypto/vendor/mlkem-native/mlkem/README.md +23 -0
  135. data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native.c +660 -0
  136. data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native.h +538 -0
  137. data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native_asm.S +681 -0
  138. data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native_config.h +709 -0
  139. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/cbmc.h +174 -0
  140. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/common.h +274 -0
  141. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/compress.c +717 -0
  142. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/compress.h +688 -0
  143. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/debug.c +64 -0
  144. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/debug.h +128 -0
  145. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202.c +251 -0
  146. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202.h +158 -0
  147. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202x4.c +208 -0
  148. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202x4.h +80 -0
  149. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/keccakf1600.c +463 -0
  150. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/keccakf1600.h +98 -0
  151. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/auto.h +70 -0
  152. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/fips202_native_aarch64.h +69 -0
  153. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_asm.S +375 -0
  154. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_asm.S +203 -0
  155. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_asm.S +258 -0
  156. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_asm.S +1076 -0
  157. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S +986 -0
  158. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccakf1600_round_constants.c +46 -0
  159. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x1_scalar.h +25 -0
  160. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x1_v84a.h +34 -0
  161. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x2_v84a.h +35 -0
  162. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x4_v8a_scalar.h +26 -0
  163. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x4_v8a_v84a_scalar.h +35 -0
  164. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/api.h +117 -0
  165. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/README.md +10 -0
  166. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/mve.h +79 -0
  167. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/fips202_native_armv81m.h +35 -0
  168. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S +667 -0
  169. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.c +40 -0
  170. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/keccakf1600_round_constants.c +51 -0
  171. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/state_extract_bytes_x4_mve.S +290 -0
  172. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/state_xor_bytes_x4_mve.S +314 -0
  173. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/auto.h +28 -0
  174. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/keccak_f1600_x4_avx2.h +33 -0
  175. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/src/fips202_native_x86_64.h +41 -0
  176. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/src/keccak_f1600_x4_avx2.S +451 -0
  177. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/src/keccakf1600_constants.c +51 -0
  178. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/indcpa.c +622 -0
  179. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/indcpa.h +156 -0
  180. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/kem.c +446 -0
  181. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/kem.h +326 -0
  182. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/README.md +16 -0
  183. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/meta.h +122 -0
  184. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/aarch64_zetas.c +174 -0
  185. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/arith_native_aarch64.h +177 -0
  186. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/intt.S +628 -0
  187. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/ntt.S +562 -0
  188. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_mulcache_compute_asm.S +127 -0
  189. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_reduce_asm.S +150 -0
  190. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_tobytes_asm.S +117 -0
  191. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_tomont_asm.S +98 -0
  192. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k2.S +261 -0
  193. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k3.S +314 -0
  194. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k4.S +368 -0
  195. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/rej_uniform_asm.S +226 -0
  196. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/rej_uniform_table.c +542 -0
  197. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/api.h +637 -0
  198. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/meta.h +25 -0
  199. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/README.md +11 -0
  200. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/meta.h +128 -0
  201. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/arith_native_riscv64.h +45 -0
  202. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_debug.c +81 -0
  203. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_debug.h +145 -0
  204. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_izetas.inc +27 -0
  205. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_poly.c +805 -0
  206. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_zetas.inc +27 -0
  207. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_zetas_basemul.inc +39 -0
  208. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/README.md +4 -0
  209. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/meta.h +304 -0
  210. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/arith_native_x86_64.h +309 -0
  211. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/compress_consts.c +94 -0
  212. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/compress_consts.h +45 -0
  213. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/consts.c +102 -0
  214. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/consts.h +25 -0
  215. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/intt.S +719 -0
  216. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/mulcache_compute.S +90 -0
  217. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/ntt.S +639 -0
  218. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/nttfrombytes.S +193 -0
  219. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/ntttobytes.S +181 -0
  220. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/nttunpack.S +174 -0
  221. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d10.S +382 -0
  222. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d11.S +448 -0
  223. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d4.S +163 -0
  224. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d5.S +220 -0
  225. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d10.S +228 -0
  226. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d11.S +277 -0
  227. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d4.S +180 -0
  228. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d5.S +192 -0
  229. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/polyvec_basemul_acc_montgomery_cached_asm_k2.S +502 -0
  230. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/polyvec_basemul_acc_montgomery_cached_asm_k3.S +750 -0
  231. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/polyvec_basemul_acc_montgomery_cached_asm_k4.S +998 -0
  232. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/reduce.S +218 -0
  233. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/rej_uniform_asm.S +103 -0
  234. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/rej_uniform_table.c +544 -0
  235. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/tomont.S +155 -0
  236. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/params.h +76 -0
  237. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly.c +572 -0
  238. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly.h +317 -0
  239. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly_k.c +502 -0
  240. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly_k.h +668 -0
  241. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/randombytes.h +60 -0
  242. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/sampling.c +362 -0
  243. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/sampling.h +118 -0
  244. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/symmetric.h +70 -0
  245. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/sys.h +260 -0
  246. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/verify.c +20 -0
  247. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/verify.h +464 -0
  248. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/zetas.inc +30 -0
  249. data/lib/pq_crypto/algorithm_registry.rb +200 -0
  250. data/lib/pq_crypto/hybrid_kem.rb +1 -12
  251. data/lib/pq_crypto/kem.rb +104 -13
  252. data/lib/pq_crypto/pkcs8.rb +387 -0
  253. data/lib/pq_crypto/serialization.rb +1 -14
  254. data/lib/pq_crypto/signature.rb +123 -17
  255. data/lib/pq_crypto/spki.rb +131 -0
  256. data/lib/pq_crypto/version.rb +1 -1
  257. data/lib/pq_crypto.rb +79 -20
  258. data/script/vendor_libs.rb +88 -155
  259. metadata +241 -73
  260. data/ext/pqcrypto/vendor/pqclean/common/aes.c +0 -639
  261. data/ext/pqcrypto/vendor/pqclean/common/aes.h +0 -64
  262. data/ext/pqcrypto/vendor/pqclean/common/compat.h +0 -73
  263. data/ext/pqcrypto/vendor/pqclean/common/crypto_declassify.h +0 -7
  264. data/ext/pqcrypto/vendor/pqclean/common/fips202.c +0 -928
  265. data/ext/pqcrypto/vendor/pqclean/common/fips202.h +0 -166
  266. data/ext/pqcrypto/vendor/pqclean/common/keccak2x/feat.S +0 -168
  267. data/ext/pqcrypto/vendor/pqclean/common/keccak2x/fips202x2.c +0 -684
  268. data/ext/pqcrypto/vendor/pqclean/common/keccak2x/fips202x2.h +0 -60
  269. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/KeccakP-1600-times4-SIMD256.c +0 -1028
  270. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/KeccakP-1600-times4-SnP.h +0 -50
  271. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/KeccakP-1600-unrolling.macros +0 -198
  272. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/Makefile +0 -8
  273. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/Makefile.Microsoft_nmake +0 -8
  274. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/SIMD256-config.h +0 -3
  275. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/align.h +0 -34
  276. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/brg_endian.h +0 -142
  277. data/ext/pqcrypto/vendor/pqclean/common/nistseedexpander.c +0 -101
  278. data/ext/pqcrypto/vendor/pqclean/common/nistseedexpander.h +0 -39
  279. data/ext/pqcrypto/vendor/pqclean/common/randombytes.c +0 -355
  280. data/ext/pqcrypto/vendor/pqclean/common/randombytes.h +0 -27
  281. data/ext/pqcrypto/vendor/pqclean/common/sha2.c +0 -769
  282. data/ext/pqcrypto/vendor/pqclean/common/sha2.h +0 -173
  283. data/ext/pqcrypto/vendor/pqclean/common/sp800-185.c +0 -156
  284. data/ext/pqcrypto/vendor/pqclean/common/sp800-185.h +0 -27
  285. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/LICENSE +0 -5
  286. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/Makefile +0 -19
  287. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/Makefile.Microsoft_nmake +0 -23
  288. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/api.h +0 -18
  289. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/cbd.c +0 -83
  290. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/cbd.h +0 -11
  291. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/indcpa.c +0 -327
  292. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/indcpa.h +0 -22
  293. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/kem.c +0 -164
  294. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/kem.h +0 -23
  295. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/ntt.c +0 -146
  296. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/ntt.h +0 -14
  297. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/params.h +0 -36
  298. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/poly.c +0 -299
  299. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/poly.h +0 -37
  300. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/polyvec.c +0 -188
  301. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/polyvec.h +0 -26
  302. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/reduce.c +0 -41
  303. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/reduce.h +0 -13
  304. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/symmetric-shake.c +0 -71
  305. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/symmetric.h +0 -30
  306. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/verify.c +0 -67
  307. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/verify.h +0 -13
  308. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/LICENSE +0 -5
  309. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/Makefile +0 -19
  310. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/Makefile.Microsoft_nmake +0 -23
  311. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/api.h +0 -50
  312. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/ntt.c +0 -98
  313. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/ntt.h +0 -10
  314. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/packing.c +0 -261
  315. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/packing.h +0 -31
  316. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/params.h +0 -44
  317. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/poly.c +0 -799
  318. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/poly.h +0 -52
  319. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/polyvec.c +0 -415
  320. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/polyvec.h +0 -65
  321. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/reduce.c +0 -69
  322. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/reduce.h +0 -17
  323. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/rounding.c +0 -92
  324. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/rounding.h +0 -14
  325. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/sign.c +0 -407
  326. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/sign.h +0 -47
  327. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/symmetric-shake.c +0 -26
  328. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/symmetric.h +0 -34
@@ -0,0 +1,239 @@
1
+ /*
2
+ * Copyright (c) The mlkem-native project authors
3
+ * Copyright (c) The mldsa-native project authors
4
+ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
5
+ */
6
+
7
+ /* References
8
+ * ==========
9
+ *
10
+ * - [REF_AVX2]
11
+ * CRYSTALS-Dilithium optimized AVX2 implementation
12
+ * Bai, Ducas, Kiltz, Lepoint, Lyubashevsky, Schwabe, Seiler, Stehlé
13
+ * https://github.com/pq-crystals/dilithium/tree/master/avx2
14
+ */
15
+
16
+ /*
17
+ * This file is derived from the public domain
18
+ * AVX2 Dilithium implementation @[REF_AVX2].
19
+ */
20
+
21
+ #include "../../../common.h"
22
+
23
+ #if defined(MLD_ARITH_BACKEND_X86_64_DEFAULT) && \
24
+ !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED)
25
+
26
+ /*
27
+ * WARNING: This file is auto-derived from the mldsa-native source file
28
+ * dev/x86_64/src/nttunpack.S using scripts/simpasm. Do not modify it directly.
29
+ */
30
+
31
+ #if defined(__ELF__)
32
+ .section .note.GNU-stack,"",@progbits
33
+ #endif
34
+
35
+ .text
36
+ .balign 4
37
+ .global MLD_ASM_NAMESPACE(nttunpack_avx2)
38
+ MLD_ASM_FN_SYMBOL(nttunpack_avx2)
39
+
40
+ .cfi_startproc
41
+ vmovdqa (%rdi), %ymm4
42
+ vmovdqa 0x20(%rdi), %ymm5
43
+ vmovdqa 0x40(%rdi), %ymm6
44
+ vmovdqa 0x60(%rdi), %ymm7
45
+ vmovdqa 0x80(%rdi), %ymm8
46
+ vmovdqa 0xa0(%rdi), %ymm9
47
+ vmovdqa 0xc0(%rdi), %ymm10
48
+ vmovdqa 0xe0(%rdi), %ymm11
49
+ vperm2i128 $0x20, %ymm8, %ymm4, %ymm3 # ymm3 = ymm4[0,1],ymm8[0,1]
50
+ vperm2i128 $0x31, %ymm8, %ymm4, %ymm8 # ymm8 = ymm4[2,3],ymm8[2,3]
51
+ vperm2i128 $0x20, %ymm9, %ymm5, %ymm4 # ymm4 = ymm5[0,1],ymm9[0,1]
52
+ vperm2i128 $0x31, %ymm9, %ymm5, %ymm9 # ymm9 = ymm5[2,3],ymm9[2,3]
53
+ vperm2i128 $0x20, %ymm10, %ymm6, %ymm5 # ymm5 = ymm6[0,1],ymm10[0,1]
54
+ vperm2i128 $0x31, %ymm10, %ymm6, %ymm10 # ymm10 = ymm6[2,3],ymm10[2,3]
55
+ vperm2i128 $0x20, %ymm11, %ymm7, %ymm6 # ymm6 = ymm7[0,1],ymm11[0,1]
56
+ vperm2i128 $0x31, %ymm11, %ymm7, %ymm11 # ymm11 = ymm7[2,3],ymm11[2,3]
57
+ vpunpcklqdq %ymm5, %ymm3, %ymm7 # ymm7 = ymm3[0],ymm5[0],ymm3[2],ymm5[2]
58
+ vpunpckhqdq %ymm5, %ymm3, %ymm5 # ymm5 = ymm3[1],ymm5[1],ymm3[3],ymm5[3]
59
+ vpunpcklqdq %ymm10, %ymm8, %ymm3 # ymm3 = ymm8[0],ymm10[0],ymm8[2],ymm10[2]
60
+ vpunpckhqdq %ymm10, %ymm8, %ymm10 # ymm10 = ymm8[1],ymm10[1],ymm8[3],ymm10[3]
61
+ vpunpcklqdq %ymm6, %ymm4, %ymm8 # ymm8 = ymm4[0],ymm6[0],ymm4[2],ymm6[2]
62
+ vpunpckhqdq %ymm6, %ymm4, %ymm6 # ymm6 = ymm4[1],ymm6[1],ymm4[3],ymm6[3]
63
+ vpunpcklqdq %ymm11, %ymm9, %ymm4 # ymm4 = ymm9[0],ymm11[0],ymm9[2],ymm11[2]
64
+ vpunpckhqdq %ymm11, %ymm9, %ymm11 # ymm11 = ymm9[1],ymm11[1],ymm9[3],ymm11[3]
65
+ vmovsldup %ymm8, %ymm9 # ymm9 = ymm8[0,0,2,2,4,4,6,6]
66
+ vpblendd $0xaa, %ymm9, %ymm7, %ymm9 # ymm9 = ymm7[0],ymm9[1],ymm7[2],ymm9[3],ymm7[4],ymm9[5],ymm7[6],ymm9[7]
67
+ vpsrlq $0x20, %ymm7, %ymm7
68
+ vpblendd $0xaa, %ymm8, %ymm7, %ymm8 # ymm8 = ymm7[0],ymm8[1],ymm7[2],ymm8[3],ymm7[4],ymm8[5],ymm7[6],ymm8[7]
69
+ vmovsldup %ymm6, %ymm7 # ymm7 = ymm6[0,0,2,2,4,4,6,6]
70
+ vpblendd $0xaa, %ymm7, %ymm5, %ymm7 # ymm7 = ymm5[0],ymm7[1],ymm5[2],ymm7[3],ymm5[4],ymm7[5],ymm5[6],ymm7[7]
71
+ vpsrlq $0x20, %ymm5, %ymm5
72
+ vpblendd $0xaa, %ymm6, %ymm5, %ymm6 # ymm6 = ymm5[0],ymm6[1],ymm5[2],ymm6[3],ymm5[4],ymm6[5],ymm5[6],ymm6[7]
73
+ vmovsldup %ymm4, %ymm5 # ymm5 = ymm4[0,0,2,2,4,4,6,6]
74
+ vpblendd $0xaa, %ymm5, %ymm3, %ymm5 # ymm5 = ymm3[0],ymm5[1],ymm3[2],ymm5[3],ymm3[4],ymm5[5],ymm3[6],ymm5[7]
75
+ vpsrlq $0x20, %ymm3, %ymm3
76
+ vpblendd $0xaa, %ymm4, %ymm3, %ymm4 # ymm4 = ymm3[0],ymm4[1],ymm3[2],ymm4[3],ymm3[4],ymm4[5],ymm3[6],ymm4[7]
77
+ vmovsldup %ymm11, %ymm3 # ymm3 = ymm11[0,0,2,2,4,4,6,6]
78
+ vpblendd $0xaa, %ymm3, %ymm10, %ymm3 # ymm3 = ymm10[0],ymm3[1],ymm10[2],ymm3[3],ymm10[4],ymm3[5],ymm10[6],ymm3[7]
79
+ vpsrlq $0x20, %ymm10, %ymm10
80
+ vpblendd $0xaa, %ymm11, %ymm10, %ymm11 # ymm11 = ymm10[0],ymm11[1],ymm10[2],ymm11[3],ymm10[4],ymm11[5],ymm10[6],ymm11[7]
81
+ vmovdqa %ymm9, (%rdi)
82
+ vmovdqa %ymm8, 0x20(%rdi)
83
+ vmovdqa %ymm7, 0x40(%rdi)
84
+ vmovdqa %ymm6, 0x60(%rdi)
85
+ vmovdqa %ymm5, 0x80(%rdi)
86
+ vmovdqa %ymm4, 0xa0(%rdi)
87
+ vmovdqa %ymm3, 0xc0(%rdi)
88
+ vmovdqa %ymm11, 0xe0(%rdi)
89
+ vmovdqa 0x100(%rdi), %ymm4
90
+ vmovdqa 0x120(%rdi), %ymm5
91
+ vmovdqa 0x140(%rdi), %ymm6
92
+ vmovdqa 0x160(%rdi), %ymm7
93
+ vmovdqa 0x180(%rdi), %ymm8
94
+ vmovdqa 0x1a0(%rdi), %ymm9
95
+ vmovdqa 0x1c0(%rdi), %ymm10
96
+ vmovdqa 0x1e0(%rdi), %ymm11
97
+ vperm2i128 $0x20, %ymm8, %ymm4, %ymm3 # ymm3 = ymm4[0,1],ymm8[0,1]
98
+ vperm2i128 $0x31, %ymm8, %ymm4, %ymm8 # ymm8 = ymm4[2,3],ymm8[2,3]
99
+ vperm2i128 $0x20, %ymm9, %ymm5, %ymm4 # ymm4 = ymm5[0,1],ymm9[0,1]
100
+ vperm2i128 $0x31, %ymm9, %ymm5, %ymm9 # ymm9 = ymm5[2,3],ymm9[2,3]
101
+ vperm2i128 $0x20, %ymm10, %ymm6, %ymm5 # ymm5 = ymm6[0,1],ymm10[0,1]
102
+ vperm2i128 $0x31, %ymm10, %ymm6, %ymm10 # ymm10 = ymm6[2,3],ymm10[2,3]
103
+ vperm2i128 $0x20, %ymm11, %ymm7, %ymm6 # ymm6 = ymm7[0,1],ymm11[0,1]
104
+ vperm2i128 $0x31, %ymm11, %ymm7, %ymm11 # ymm11 = ymm7[2,3],ymm11[2,3]
105
+ vpunpcklqdq %ymm5, %ymm3, %ymm7 # ymm7 = ymm3[0],ymm5[0],ymm3[2],ymm5[2]
106
+ vpunpckhqdq %ymm5, %ymm3, %ymm5 # ymm5 = ymm3[1],ymm5[1],ymm3[3],ymm5[3]
107
+ vpunpcklqdq %ymm10, %ymm8, %ymm3 # ymm3 = ymm8[0],ymm10[0],ymm8[2],ymm10[2]
108
+ vpunpckhqdq %ymm10, %ymm8, %ymm10 # ymm10 = ymm8[1],ymm10[1],ymm8[3],ymm10[3]
109
+ vpunpcklqdq %ymm6, %ymm4, %ymm8 # ymm8 = ymm4[0],ymm6[0],ymm4[2],ymm6[2]
110
+ vpunpckhqdq %ymm6, %ymm4, %ymm6 # ymm6 = ymm4[1],ymm6[1],ymm4[3],ymm6[3]
111
+ vpunpcklqdq %ymm11, %ymm9, %ymm4 # ymm4 = ymm9[0],ymm11[0],ymm9[2],ymm11[2]
112
+ vpunpckhqdq %ymm11, %ymm9, %ymm11 # ymm11 = ymm9[1],ymm11[1],ymm9[3],ymm11[3]
113
+ vmovsldup %ymm8, %ymm9 # ymm9 = ymm8[0,0,2,2,4,4,6,6]
114
+ vpblendd $0xaa, %ymm9, %ymm7, %ymm9 # ymm9 = ymm7[0],ymm9[1],ymm7[2],ymm9[3],ymm7[4],ymm9[5],ymm7[6],ymm9[7]
115
+ vpsrlq $0x20, %ymm7, %ymm7
116
+ vpblendd $0xaa, %ymm8, %ymm7, %ymm8 # ymm8 = ymm7[0],ymm8[1],ymm7[2],ymm8[3],ymm7[4],ymm8[5],ymm7[6],ymm8[7]
117
+ vmovsldup %ymm6, %ymm7 # ymm7 = ymm6[0,0,2,2,4,4,6,6]
118
+ vpblendd $0xaa, %ymm7, %ymm5, %ymm7 # ymm7 = ymm5[0],ymm7[1],ymm5[2],ymm7[3],ymm5[4],ymm7[5],ymm5[6],ymm7[7]
119
+ vpsrlq $0x20, %ymm5, %ymm5
120
+ vpblendd $0xaa, %ymm6, %ymm5, %ymm6 # ymm6 = ymm5[0],ymm6[1],ymm5[2],ymm6[3],ymm5[4],ymm6[5],ymm5[6],ymm6[7]
121
+ vmovsldup %ymm4, %ymm5 # ymm5 = ymm4[0,0,2,2,4,4,6,6]
122
+ vpblendd $0xaa, %ymm5, %ymm3, %ymm5 # ymm5 = ymm3[0],ymm5[1],ymm3[2],ymm5[3],ymm3[4],ymm5[5],ymm3[6],ymm5[7]
123
+ vpsrlq $0x20, %ymm3, %ymm3
124
+ vpblendd $0xaa, %ymm4, %ymm3, %ymm4 # ymm4 = ymm3[0],ymm4[1],ymm3[2],ymm4[3],ymm3[4],ymm4[5],ymm3[6],ymm4[7]
125
+ vmovsldup %ymm11, %ymm3 # ymm3 = ymm11[0,0,2,2,4,4,6,6]
126
+ vpblendd $0xaa, %ymm3, %ymm10, %ymm3 # ymm3 = ymm10[0],ymm3[1],ymm10[2],ymm3[3],ymm10[4],ymm3[5],ymm10[6],ymm3[7]
127
+ vpsrlq $0x20, %ymm10, %ymm10
128
+ vpblendd $0xaa, %ymm11, %ymm10, %ymm11 # ymm11 = ymm10[0],ymm11[1],ymm10[2],ymm11[3],ymm10[4],ymm11[5],ymm10[6],ymm11[7]
129
+ vmovdqa %ymm9, 0x100(%rdi)
130
+ vmovdqa %ymm8, 0x120(%rdi)
131
+ vmovdqa %ymm7, 0x140(%rdi)
132
+ vmovdqa %ymm6, 0x160(%rdi)
133
+ vmovdqa %ymm5, 0x180(%rdi)
134
+ vmovdqa %ymm4, 0x1a0(%rdi)
135
+ vmovdqa %ymm3, 0x1c0(%rdi)
136
+ vmovdqa %ymm11, 0x1e0(%rdi)
137
+ vmovdqa 0x200(%rdi), %ymm4
138
+ vmovdqa 0x220(%rdi), %ymm5
139
+ vmovdqa 0x240(%rdi), %ymm6
140
+ vmovdqa 0x260(%rdi), %ymm7
141
+ vmovdqa 0x280(%rdi), %ymm8
142
+ vmovdqa 0x2a0(%rdi), %ymm9
143
+ vmovdqa 0x2c0(%rdi), %ymm10
144
+ vmovdqa 0x2e0(%rdi), %ymm11
145
+ vperm2i128 $0x20, %ymm8, %ymm4, %ymm3 # ymm3 = ymm4[0,1],ymm8[0,1]
146
+ vperm2i128 $0x31, %ymm8, %ymm4, %ymm8 # ymm8 = ymm4[2,3],ymm8[2,3]
147
+ vperm2i128 $0x20, %ymm9, %ymm5, %ymm4 # ymm4 = ymm5[0,1],ymm9[0,1]
148
+ vperm2i128 $0x31, %ymm9, %ymm5, %ymm9 # ymm9 = ymm5[2,3],ymm9[2,3]
149
+ vperm2i128 $0x20, %ymm10, %ymm6, %ymm5 # ymm5 = ymm6[0,1],ymm10[0,1]
150
+ vperm2i128 $0x31, %ymm10, %ymm6, %ymm10 # ymm10 = ymm6[2,3],ymm10[2,3]
151
+ vperm2i128 $0x20, %ymm11, %ymm7, %ymm6 # ymm6 = ymm7[0,1],ymm11[0,1]
152
+ vperm2i128 $0x31, %ymm11, %ymm7, %ymm11 # ymm11 = ymm7[2,3],ymm11[2,3]
153
+ vpunpcklqdq %ymm5, %ymm3, %ymm7 # ymm7 = ymm3[0],ymm5[0],ymm3[2],ymm5[2]
154
+ vpunpckhqdq %ymm5, %ymm3, %ymm5 # ymm5 = ymm3[1],ymm5[1],ymm3[3],ymm5[3]
155
+ vpunpcklqdq %ymm10, %ymm8, %ymm3 # ymm3 = ymm8[0],ymm10[0],ymm8[2],ymm10[2]
156
+ vpunpckhqdq %ymm10, %ymm8, %ymm10 # ymm10 = ymm8[1],ymm10[1],ymm8[3],ymm10[3]
157
+ vpunpcklqdq %ymm6, %ymm4, %ymm8 # ymm8 = ymm4[0],ymm6[0],ymm4[2],ymm6[2]
158
+ vpunpckhqdq %ymm6, %ymm4, %ymm6 # ymm6 = ymm4[1],ymm6[1],ymm4[3],ymm6[3]
159
+ vpunpcklqdq %ymm11, %ymm9, %ymm4 # ymm4 = ymm9[0],ymm11[0],ymm9[2],ymm11[2]
160
+ vpunpckhqdq %ymm11, %ymm9, %ymm11 # ymm11 = ymm9[1],ymm11[1],ymm9[3],ymm11[3]
161
+ vmovsldup %ymm8, %ymm9 # ymm9 = ymm8[0,0,2,2,4,4,6,6]
162
+ vpblendd $0xaa, %ymm9, %ymm7, %ymm9 # ymm9 = ymm7[0],ymm9[1],ymm7[2],ymm9[3],ymm7[4],ymm9[5],ymm7[6],ymm9[7]
163
+ vpsrlq $0x20, %ymm7, %ymm7
164
+ vpblendd $0xaa, %ymm8, %ymm7, %ymm8 # ymm8 = ymm7[0],ymm8[1],ymm7[2],ymm8[3],ymm7[4],ymm8[5],ymm7[6],ymm8[7]
165
+ vmovsldup %ymm6, %ymm7 # ymm7 = ymm6[0,0,2,2,4,4,6,6]
166
+ vpblendd $0xaa, %ymm7, %ymm5, %ymm7 # ymm7 = ymm5[0],ymm7[1],ymm5[2],ymm7[3],ymm5[4],ymm7[5],ymm5[6],ymm7[7]
167
+ vpsrlq $0x20, %ymm5, %ymm5
168
+ vpblendd $0xaa, %ymm6, %ymm5, %ymm6 # ymm6 = ymm5[0],ymm6[1],ymm5[2],ymm6[3],ymm5[4],ymm6[5],ymm5[6],ymm6[7]
169
+ vmovsldup %ymm4, %ymm5 # ymm5 = ymm4[0,0,2,2,4,4,6,6]
170
+ vpblendd $0xaa, %ymm5, %ymm3, %ymm5 # ymm5 = ymm3[0],ymm5[1],ymm3[2],ymm5[3],ymm3[4],ymm5[5],ymm3[6],ymm5[7]
171
+ vpsrlq $0x20, %ymm3, %ymm3
172
+ vpblendd $0xaa, %ymm4, %ymm3, %ymm4 # ymm4 = ymm3[0],ymm4[1],ymm3[2],ymm4[3],ymm3[4],ymm4[5],ymm3[6],ymm4[7]
173
+ vmovsldup %ymm11, %ymm3 # ymm3 = ymm11[0,0,2,2,4,4,6,6]
174
+ vpblendd $0xaa, %ymm3, %ymm10, %ymm3 # ymm3 = ymm10[0],ymm3[1],ymm10[2],ymm3[3],ymm10[4],ymm3[5],ymm10[6],ymm3[7]
175
+ vpsrlq $0x20, %ymm10, %ymm10
176
+ vpblendd $0xaa, %ymm11, %ymm10, %ymm11 # ymm11 = ymm10[0],ymm11[1],ymm10[2],ymm11[3],ymm10[4],ymm11[5],ymm10[6],ymm11[7]
177
+ vmovdqa %ymm9, 0x200(%rdi)
178
+ vmovdqa %ymm8, 0x220(%rdi)
179
+ vmovdqa %ymm7, 0x240(%rdi)
180
+ vmovdqa %ymm6, 0x260(%rdi)
181
+ vmovdqa %ymm5, 0x280(%rdi)
182
+ vmovdqa %ymm4, 0x2a0(%rdi)
183
+ vmovdqa %ymm3, 0x2c0(%rdi)
184
+ vmovdqa %ymm11, 0x2e0(%rdi)
185
+ vmovdqa 0x300(%rdi), %ymm4
186
+ vmovdqa 0x320(%rdi), %ymm5
187
+ vmovdqa 0x340(%rdi), %ymm6
188
+ vmovdqa 0x360(%rdi), %ymm7
189
+ vmovdqa 0x380(%rdi), %ymm8
190
+ vmovdqa 0x3a0(%rdi), %ymm9
191
+ vmovdqa 0x3c0(%rdi), %ymm10
192
+ vmovdqa 0x3e0(%rdi), %ymm11
193
+ vperm2i128 $0x20, %ymm8, %ymm4, %ymm3 # ymm3 = ymm4[0,1],ymm8[0,1]
194
+ vperm2i128 $0x31, %ymm8, %ymm4, %ymm8 # ymm8 = ymm4[2,3],ymm8[2,3]
195
+ vperm2i128 $0x20, %ymm9, %ymm5, %ymm4 # ymm4 = ymm5[0,1],ymm9[0,1]
196
+ vperm2i128 $0x31, %ymm9, %ymm5, %ymm9 # ymm9 = ymm5[2,3],ymm9[2,3]
197
+ vperm2i128 $0x20, %ymm10, %ymm6, %ymm5 # ymm5 = ymm6[0,1],ymm10[0,1]
198
+ vperm2i128 $0x31, %ymm10, %ymm6, %ymm10 # ymm10 = ymm6[2,3],ymm10[2,3]
199
+ vperm2i128 $0x20, %ymm11, %ymm7, %ymm6 # ymm6 = ymm7[0,1],ymm11[0,1]
200
+ vperm2i128 $0x31, %ymm11, %ymm7, %ymm11 # ymm11 = ymm7[2,3],ymm11[2,3]
201
+ vpunpcklqdq %ymm5, %ymm3, %ymm7 # ymm7 = ymm3[0],ymm5[0],ymm3[2],ymm5[2]
202
+ vpunpckhqdq %ymm5, %ymm3, %ymm5 # ymm5 = ymm3[1],ymm5[1],ymm3[3],ymm5[3]
203
+ vpunpcklqdq %ymm10, %ymm8, %ymm3 # ymm3 = ymm8[0],ymm10[0],ymm8[2],ymm10[2]
204
+ vpunpckhqdq %ymm10, %ymm8, %ymm10 # ymm10 = ymm8[1],ymm10[1],ymm8[3],ymm10[3]
205
+ vpunpcklqdq %ymm6, %ymm4, %ymm8 # ymm8 = ymm4[0],ymm6[0],ymm4[2],ymm6[2]
206
+ vpunpckhqdq %ymm6, %ymm4, %ymm6 # ymm6 = ymm4[1],ymm6[1],ymm4[3],ymm6[3]
207
+ vpunpcklqdq %ymm11, %ymm9, %ymm4 # ymm4 = ymm9[0],ymm11[0],ymm9[2],ymm11[2]
208
+ vpunpckhqdq %ymm11, %ymm9, %ymm11 # ymm11 = ymm9[1],ymm11[1],ymm9[3],ymm11[3]
209
+ vmovsldup %ymm8, %ymm9 # ymm9 = ymm8[0,0,2,2,4,4,6,6]
210
+ vpblendd $0xaa, %ymm9, %ymm7, %ymm9 # ymm9 = ymm7[0],ymm9[1],ymm7[2],ymm9[3],ymm7[4],ymm9[5],ymm7[6],ymm9[7]
211
+ vpsrlq $0x20, %ymm7, %ymm7
212
+ vpblendd $0xaa, %ymm8, %ymm7, %ymm8 # ymm8 = ymm7[0],ymm8[1],ymm7[2],ymm8[3],ymm7[4],ymm8[5],ymm7[6],ymm8[7]
213
+ vmovsldup %ymm6, %ymm7 # ymm7 = ymm6[0,0,2,2,4,4,6,6]
214
+ vpblendd $0xaa, %ymm7, %ymm5, %ymm7 # ymm7 = ymm5[0],ymm7[1],ymm5[2],ymm7[3],ymm5[4],ymm7[5],ymm5[6],ymm7[7]
215
+ vpsrlq $0x20, %ymm5, %ymm5
216
+ vpblendd $0xaa, %ymm6, %ymm5, %ymm6 # ymm6 = ymm5[0],ymm6[1],ymm5[2],ymm6[3],ymm5[4],ymm6[5],ymm5[6],ymm6[7]
217
+ vmovsldup %ymm4, %ymm5 # ymm5 = ymm4[0,0,2,2,4,4,6,6]
218
+ vpblendd $0xaa, %ymm5, %ymm3, %ymm5 # ymm5 = ymm3[0],ymm5[1],ymm3[2],ymm5[3],ymm3[4],ymm5[5],ymm3[6],ymm5[7]
219
+ vpsrlq $0x20, %ymm3, %ymm3
220
+ vpblendd $0xaa, %ymm4, %ymm3, %ymm4 # ymm4 = ymm3[0],ymm4[1],ymm3[2],ymm4[3],ymm3[4],ymm4[5],ymm3[6],ymm4[7]
221
+ vmovsldup %ymm11, %ymm3 # ymm3 = ymm11[0,0,2,2,4,4,6,6]
222
+ vpblendd $0xaa, %ymm3, %ymm10, %ymm3 # ymm3 = ymm10[0],ymm3[1],ymm10[2],ymm3[3],ymm10[4],ymm3[5],ymm10[6],ymm3[7]
223
+ vpsrlq $0x20, %ymm10, %ymm10
224
+ vpblendd $0xaa, %ymm11, %ymm10, %ymm11 # ymm11 = ymm10[0],ymm11[1],ymm10[2],ymm11[3],ymm10[4],ymm11[5],ymm10[6],ymm11[7]
225
+ vmovdqa %ymm9, 0x300(%rdi)
226
+ vmovdqa %ymm8, 0x320(%rdi)
227
+ vmovdqa %ymm7, 0x340(%rdi)
228
+ vmovdqa %ymm6, 0x360(%rdi)
229
+ vmovdqa %ymm5, 0x380(%rdi)
230
+ vmovdqa %ymm4, 0x3a0(%rdi)
231
+ vmovdqa %ymm3, 0x3c0(%rdi)
232
+ vmovdqa %ymm11, 0x3e0(%rdi)
233
+ retq
234
+ .cfi_endproc
235
+
236
+ MLD_ASM_FN_SIZE(nttunpack_avx2)
237
+
238
+ #endif /* MLD_ARITH_BACKEND_X86_64_DEFAULT && !MLD_CONFIG_MULTILEVEL_NO_SHARED \
239
+ */
@@ -0,0 +1,131 @@
1
+ /*
2
+ * Copyright (c) The mldsa-native project authors
3
+ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
4
+ */
5
+
6
+ /* References
7
+ * ==========
8
+ *
9
+ * - [REF_AVX2]
10
+ * CRYSTALS-Dilithium optimized AVX2 implementation
11
+ * Bai, Ducas, Kiltz, Lepoint, Lyubashevsky, Schwabe, Seiler, Stehlé
12
+ * https://github.com/pq-crystals/dilithium/tree/master/avx2
13
+ */
14
+
15
+ /*
16
+ * This file is derived from the public domain
17
+ * AVX2 Dilithium implementation @[REF_AVX2].
18
+ */
19
+
20
+ #include "../../../common.h"
21
+ #if defined(MLD_ARITH_BACKEND_X86_64_DEFAULT) && \
22
+ !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED)
23
+
24
+ /*
25
+ * WARNING: This file is auto-derived from the mldsa-native source file
26
+ * dev/x86_64/src/pointwise.S using scripts/simpasm. Do not modify it directly.
27
+ */
28
+
29
+ #if defined(__ELF__)
30
+ .section .note.GNU-stack,"",@progbits
31
+ #endif
32
+
33
+ .text
34
+ .balign 4
35
+ .global MLD_ASM_NAMESPACE(pointwise_avx2)
36
+ MLD_ASM_FN_SYMBOL(pointwise_avx2)
37
+
38
+ .cfi_startproc
39
+ vmovdqa 0x20(%rcx), %ymm0
40
+ vmovdqa (%rcx), %ymm1
41
+ xorl %eax, %eax
42
+
43
+ Lpointwise_avx2_looptop1:
44
+ vmovdqa (%rsi), %ymm2
45
+ vmovdqa 0x20(%rsi), %ymm4
46
+ vmovdqa 0x40(%rsi), %ymm6
47
+ vmovdqa (%rdx), %ymm10
48
+ vmovdqa 0x20(%rdx), %ymm12
49
+ vmovdqa 0x40(%rdx), %ymm14
50
+ vpsrlq $0x20, %ymm2, %ymm3
51
+ vpsrlq $0x20, %ymm4, %ymm5
52
+ vmovshdup %ymm6, %ymm7 # ymm7 = ymm6[1,1,3,3,5,5,7,7]
53
+ vpsrlq $0x20, %ymm10, %ymm11
54
+ vpsrlq $0x20, %ymm12, %ymm13
55
+ vmovshdup %ymm14, %ymm15 # ymm15 = ymm14[1,1,3,3,5,5,7,7]
56
+ vpmuldq %ymm10, %ymm2, %ymm2
57
+ vpmuldq %ymm11, %ymm3, %ymm3
58
+ vpmuldq %ymm12, %ymm4, %ymm4
59
+ vpmuldq %ymm13, %ymm5, %ymm5
60
+ vpmuldq %ymm14, %ymm6, %ymm6
61
+ vpmuldq %ymm15, %ymm7, %ymm7
62
+ vpmuldq %ymm2, %ymm0, %ymm10
63
+ vpmuldq %ymm3, %ymm0, %ymm11
64
+ vpmuldq %ymm4, %ymm0, %ymm12
65
+ vpmuldq %ymm5, %ymm0, %ymm13
66
+ vpmuldq %ymm6, %ymm0, %ymm14
67
+ vpmuldq %ymm7, %ymm0, %ymm15
68
+ vpmuldq %ymm10, %ymm1, %ymm10
69
+ vpmuldq %ymm11, %ymm1, %ymm11
70
+ vpmuldq %ymm12, %ymm1, %ymm12
71
+ vpmuldq %ymm13, %ymm1, %ymm13
72
+ vpmuldq %ymm14, %ymm1, %ymm14
73
+ vpmuldq %ymm15, %ymm1, %ymm15
74
+ vpsubq %ymm10, %ymm2, %ymm2
75
+ vpsubq %ymm11, %ymm3, %ymm3
76
+ vpsubq %ymm12, %ymm4, %ymm4
77
+ vpsubq %ymm13, %ymm5, %ymm5
78
+ vpsubq %ymm14, %ymm6, %ymm6
79
+ vpsubq %ymm15, %ymm7, %ymm7
80
+ vpsrlq $0x20, %ymm2, %ymm2
81
+ vpsrlq $0x20, %ymm4, %ymm4
82
+ vmovshdup %ymm6, %ymm6 # ymm6 = ymm6[1,1,3,3,5,5,7,7]
83
+ vpblendd $0xaa, %ymm3, %ymm2, %ymm2 # ymm2 = ymm2[0],ymm3[1],ymm2[2],ymm3[3],ymm2[4],ymm3[5],ymm2[6],ymm3[7]
84
+ vpblendd $0xaa, %ymm5, %ymm4, %ymm4 # ymm4 = ymm4[0],ymm5[1],ymm4[2],ymm5[3],ymm4[4],ymm5[5],ymm4[6],ymm5[7]
85
+ vpblendd $0xaa, %ymm7, %ymm6, %ymm6 # ymm6 = ymm6[0],ymm7[1],ymm6[2],ymm7[3],ymm6[4],ymm7[5],ymm6[6],ymm7[7]
86
+ vmovdqa %ymm2, (%rdi)
87
+ vmovdqa %ymm4, 0x20(%rdi)
88
+ vmovdqa %ymm6, 0x40(%rdi)
89
+ addq $0x60, %rdi
90
+ addq $0x60, %rsi
91
+ addq $0x60, %rdx
92
+ addl $0x1, %eax
93
+ cmpl $0xa, %eax
94
+ jb Lpointwise_avx2_looptop1
95
+ vmovdqa (%rsi), %ymm2
96
+ vmovdqa 0x20(%rsi), %ymm4
97
+ vmovdqa (%rdx), %ymm10
98
+ vmovdqa 0x20(%rdx), %ymm12
99
+ vpsrlq $0x20, %ymm2, %ymm3
100
+ vpsrlq $0x20, %ymm4, %ymm5
101
+ vmovshdup %ymm10, %ymm11 # ymm11 = ymm10[1,1,3,3,5,5,7,7]
102
+ vmovshdup %ymm12, %ymm13 # ymm13 = ymm12[1,1,3,3,5,5,7,7]
103
+ vpmuldq %ymm10, %ymm2, %ymm2
104
+ vpmuldq %ymm11, %ymm3, %ymm3
105
+ vpmuldq %ymm12, %ymm4, %ymm4
106
+ vpmuldq %ymm13, %ymm5, %ymm5
107
+ vpmuldq %ymm2, %ymm0, %ymm10
108
+ vpmuldq %ymm3, %ymm0, %ymm11
109
+ vpmuldq %ymm4, %ymm0, %ymm12
110
+ vpmuldq %ymm5, %ymm0, %ymm13
111
+ vpmuldq %ymm10, %ymm1, %ymm10
112
+ vpmuldq %ymm11, %ymm1, %ymm11
113
+ vpmuldq %ymm12, %ymm1, %ymm12
114
+ vpmuldq %ymm13, %ymm1, %ymm13
115
+ vpsubq %ymm10, %ymm2, %ymm2
116
+ vpsubq %ymm11, %ymm3, %ymm3
117
+ vpsubq %ymm12, %ymm4, %ymm4
118
+ vpsubq %ymm13, %ymm5, %ymm5
119
+ vpsrlq $0x20, %ymm2, %ymm2
120
+ vmovshdup %ymm4, %ymm4 # ymm4 = ymm4[1,1,3,3,5,5,7,7]
121
+ vpblendd $0x55, %ymm2, %ymm3, %ymm2 # ymm2 = ymm2[0],ymm3[1],ymm2[2],ymm3[3],ymm2[4],ymm3[5],ymm2[6],ymm3[7]
122
+ vpblendd $0x55, %ymm4, %ymm5, %ymm4 # ymm4 = ymm4[0],ymm5[1],ymm4[2],ymm5[3],ymm4[4],ymm5[5],ymm4[6],ymm5[7]
123
+ vmovdqa %ymm2, (%rdi)
124
+ vmovdqa %ymm4, 0x20(%rdi)
125
+ retq
126
+ .cfi_endproc
127
+
128
+ MLD_ASM_FN_SIZE(pointwise_avx2)
129
+
130
+ #endif /* MLD_ARITH_BACKEND_X86_64_DEFAULT && !MLD_CONFIG_MULTILEVEL_NO_SHARED \
131
+ */
@@ -0,0 +1,139 @@
1
+ /*
2
+ * Copyright (c) The mldsa-native project authors
3
+ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
4
+ */
5
+
6
+ /* References
7
+ * ==========
8
+ *
9
+ * - [REF_AVX2]
10
+ * CRYSTALS-Dilithium optimized AVX2 implementation
11
+ * Bai, Ducas, Kiltz, Lepoint, Lyubashevsky, Schwabe, Seiler, Stehlé
12
+ * https://github.com/pq-crystals/dilithium/tree/master/avx2
13
+ */
14
+
15
+ /*
16
+ * This file is derived from the public domain
17
+ * AVX2 Dilithium implementation @[REF_AVX2].
18
+ */
19
+
20
+ #include "../../../common.h"
21
+ #if defined(MLD_ARITH_BACKEND_X86_64_DEFAULT) && \
22
+ !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \
23
+ (defined(MLD_CONFIG_MULTILEVEL_WITH_SHARED) || MLDSA_L == 4)
24
+
25
+ /*
26
+ * WARNING: This file is auto-derived from the mldsa-native source file
27
+ * dev/x86_64/src/pointwise_acc_l4.S using scripts/simpasm. Do not modify it directly.
28
+ */
29
+
30
+ #if defined(__ELF__)
31
+ .section .note.GNU-stack,"",@progbits
32
+ #endif
33
+
34
+ .text
35
+ .balign 4
36
+ .global MLD_ASM_NAMESPACE(pointwise_acc_l4_avx2)
37
+ MLD_ASM_FN_SYMBOL(pointwise_acc_l4_avx2)
38
+
39
+ .cfi_startproc
40
+ vmovdqa 0x20(%rcx), %ymm0
41
+ vmovdqa (%rcx), %ymm1
42
+ xorl %eax, %eax
43
+
44
+ Lpointwise_acc_l4_avx2_looptop2:
45
+ vmovdqa (%rsi), %ymm6
46
+ vmovdqa 0x20(%rsi), %ymm8
47
+ vmovdqa (%rdx), %ymm10
48
+ vmovdqa 0x20(%rdx), %ymm12
49
+ vpsrlq $0x20, %ymm6, %ymm7
50
+ vpsrlq $0x20, %ymm8, %ymm9
51
+ vmovshdup %ymm10, %ymm11 # ymm11 = ymm10[1,1,3,3,5,5,7,7]
52
+ vmovshdup %ymm12, %ymm13 # ymm13 = ymm12[1,1,3,3,5,5,7,7]
53
+ vpmuldq %ymm10, %ymm6, %ymm6
54
+ vpmuldq %ymm11, %ymm7, %ymm7
55
+ vpmuldq %ymm12, %ymm8, %ymm8
56
+ vpmuldq %ymm13, %ymm9, %ymm9
57
+ vmovdqa %ymm6, %ymm2
58
+ vmovdqa %ymm7, %ymm3
59
+ vmovdqa %ymm8, %ymm4
60
+ vmovdqa %ymm9, %ymm5
61
+ vmovdqa 0x400(%rsi), %ymm6
62
+ vmovdqa 0x420(%rsi), %ymm8
63
+ vmovdqa 0x400(%rdx), %ymm10
64
+ vmovdqa 0x420(%rdx), %ymm12
65
+ vpsrlq $0x20, %ymm6, %ymm7
66
+ vpsrlq $0x20, %ymm8, %ymm9
67
+ vmovshdup %ymm10, %ymm11 # ymm11 = ymm10[1,1,3,3,5,5,7,7]
68
+ vmovshdup %ymm12, %ymm13 # ymm13 = ymm12[1,1,3,3,5,5,7,7]
69
+ vpmuldq %ymm10, %ymm6, %ymm6
70
+ vpmuldq %ymm11, %ymm7, %ymm7
71
+ vpmuldq %ymm12, %ymm8, %ymm8
72
+ vpmuldq %ymm13, %ymm9, %ymm9
73
+ vpaddq %ymm2, %ymm6, %ymm2
74
+ vpaddq %ymm3, %ymm7, %ymm3
75
+ vpaddq %ymm4, %ymm8, %ymm4
76
+ vpaddq %ymm5, %ymm9, %ymm5
77
+ vmovdqa 0x800(%rsi), %ymm6
78
+ vmovdqa 0x820(%rsi), %ymm8
79
+ vmovdqa 0x800(%rdx), %ymm10
80
+ vmovdqa 0x820(%rdx), %ymm12
81
+ vpsrlq $0x20, %ymm6, %ymm7
82
+ vpsrlq $0x20, %ymm8, %ymm9
83
+ vmovshdup %ymm10, %ymm11 # ymm11 = ymm10[1,1,3,3,5,5,7,7]
84
+ vmovshdup %ymm12, %ymm13 # ymm13 = ymm12[1,1,3,3,5,5,7,7]
85
+ vpmuldq %ymm10, %ymm6, %ymm6
86
+ vpmuldq %ymm11, %ymm7, %ymm7
87
+ vpmuldq %ymm12, %ymm8, %ymm8
88
+ vpmuldq %ymm13, %ymm9, %ymm9
89
+ vpaddq %ymm2, %ymm6, %ymm2
90
+ vpaddq %ymm3, %ymm7, %ymm3
91
+ vpaddq %ymm4, %ymm8, %ymm4
92
+ vpaddq %ymm5, %ymm9, %ymm5
93
+ vmovdqa 0xc00(%rsi), %ymm6
94
+ vmovdqa 0xc20(%rsi), %ymm8
95
+ vmovdqa 0xc00(%rdx), %ymm10
96
+ vmovdqa 0xc20(%rdx), %ymm12
97
+ vpsrlq $0x20, %ymm6, %ymm7
98
+ vpsrlq $0x20, %ymm8, %ymm9
99
+ vmovshdup %ymm10, %ymm11 # ymm11 = ymm10[1,1,3,3,5,5,7,7]
100
+ vmovshdup %ymm12, %ymm13 # ymm13 = ymm12[1,1,3,3,5,5,7,7]
101
+ vpmuldq %ymm10, %ymm6, %ymm6
102
+ vpmuldq %ymm11, %ymm7, %ymm7
103
+ vpmuldq %ymm12, %ymm8, %ymm8
104
+ vpmuldq %ymm13, %ymm9, %ymm9
105
+ vpaddq %ymm2, %ymm6, %ymm2
106
+ vpaddq %ymm3, %ymm7, %ymm3
107
+ vpaddq %ymm4, %ymm8, %ymm4
108
+ vpaddq %ymm5, %ymm9, %ymm5
109
+ vpmuldq %ymm2, %ymm0, %ymm6
110
+ vpmuldq %ymm3, %ymm0, %ymm7
111
+ vpmuldq %ymm4, %ymm0, %ymm8
112
+ vpmuldq %ymm5, %ymm0, %ymm9
113
+ vpmuldq %ymm6, %ymm1, %ymm6
114
+ vpmuldq %ymm7, %ymm1, %ymm7
115
+ vpmuldq %ymm8, %ymm1, %ymm8
116
+ vpmuldq %ymm9, %ymm1, %ymm9
117
+ vpsubq %ymm6, %ymm2, %ymm2
118
+ vpsubq %ymm7, %ymm3, %ymm3
119
+ vpsubq %ymm8, %ymm4, %ymm4
120
+ vpsubq %ymm9, %ymm5, %ymm5
121
+ vpsrlq $0x20, %ymm2, %ymm2
122
+ vmovshdup %ymm4, %ymm4 # ymm4 = ymm4[1,1,3,3,5,5,7,7]
123
+ vpblendd $0xaa, %ymm3, %ymm2, %ymm2 # ymm2 = ymm2[0],ymm3[1],ymm2[2],ymm3[3],ymm2[4],ymm3[5],ymm2[6],ymm3[7]
124
+ vpblendd $0xaa, %ymm5, %ymm4, %ymm4 # ymm4 = ymm4[0],ymm5[1],ymm4[2],ymm5[3],ymm4[4],ymm5[5],ymm4[6],ymm5[7]
125
+ vmovdqa %ymm2, (%rdi)
126
+ vmovdqa %ymm4, 0x20(%rdi)
127
+ addq $0x40, %rsi
128
+ addq $0x40, %rdx
129
+ addq $0x40, %rdi
130
+ addl $0x1, %eax
131
+ cmpl $0x10, %eax
132
+ jb Lpointwise_acc_l4_avx2_looptop2
133
+ retq
134
+ .cfi_endproc
135
+
136
+ MLD_ASM_FN_SIZE(pointwise_acc_l4_avx2)
137
+
138
+ #endif /* MLD_ARITH_BACKEND_X86_64_DEFAULT && !MLD_CONFIG_MULTILEVEL_NO_SHARED \
139
+ && (MLD_CONFIG_MULTILEVEL_WITH_SHARED || MLDSA_L == 4) */
@@ -0,0 +1,155 @@
1
+ /*
2
+ * Copyright (c) The mldsa-native project authors
3
+ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
4
+ */
5
+
6
+ /* References
7
+ * ==========
8
+ *
9
+ * - [REF_AVX2]
10
+ * CRYSTALS-Dilithium optimized AVX2 implementation
11
+ * Bai, Ducas, Kiltz, Lepoint, Lyubashevsky, Schwabe, Seiler, Stehlé
12
+ * https://github.com/pq-crystals/dilithium/tree/master/avx2
13
+ */
14
+
15
+ /*
16
+ * This file is derived from the public domain
17
+ * AVX2 Dilithium implementation @[REF_AVX2].
18
+ */
19
+
20
+ #include "../../../common.h"
21
+ #if defined(MLD_ARITH_BACKEND_X86_64_DEFAULT) && \
22
+ !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \
23
+ (defined(MLD_CONFIG_MULTILEVEL_WITH_SHARED) || MLDSA_L == 5)
24
+
25
+ /*
26
+ * WARNING: This file is auto-derived from the mldsa-native source file
27
+ * dev/x86_64/src/pointwise_acc_l5.S using scripts/simpasm. Do not modify it directly.
28
+ */
29
+
30
+ #if defined(__ELF__)
31
+ .section .note.GNU-stack,"",@progbits
32
+ #endif
33
+
34
+ .text
35
+ .balign 4
36
+ .global MLD_ASM_NAMESPACE(pointwise_acc_l5_avx2)
37
+ MLD_ASM_FN_SYMBOL(pointwise_acc_l5_avx2)
38
+
39
+ .cfi_startproc
40
+ vmovdqa 0x20(%rcx), %ymm0
41
+ vmovdqa (%rcx), %ymm1
42
+ xorl %eax, %eax
43
+
44
+ Lpointwise_acc_l5_avx2_looptop2:
45
+ vmovdqa (%rsi), %ymm6
46
+ vmovdqa 0x20(%rsi), %ymm8
47
+ vmovdqa (%rdx), %ymm10
48
+ vmovdqa 0x20(%rdx), %ymm12
49
+ vpsrlq $0x20, %ymm6, %ymm7
50
+ vpsrlq $0x20, %ymm8, %ymm9
51
+ vmovshdup %ymm10, %ymm11 # ymm11 = ymm10[1,1,3,3,5,5,7,7]
52
+ vmovshdup %ymm12, %ymm13 # ymm13 = ymm12[1,1,3,3,5,5,7,7]
53
+ vpmuldq %ymm10, %ymm6, %ymm6
54
+ vpmuldq %ymm11, %ymm7, %ymm7
55
+ vpmuldq %ymm12, %ymm8, %ymm8
56
+ vpmuldq %ymm13, %ymm9, %ymm9
57
+ vmovdqa %ymm6, %ymm2
58
+ vmovdqa %ymm7, %ymm3
59
+ vmovdqa %ymm8, %ymm4
60
+ vmovdqa %ymm9, %ymm5
61
+ vmovdqa 0x400(%rsi), %ymm6
62
+ vmovdqa 0x420(%rsi), %ymm8
63
+ vmovdqa 0x400(%rdx), %ymm10
64
+ vmovdqa 0x420(%rdx), %ymm12
65
+ vpsrlq $0x20, %ymm6, %ymm7
66
+ vpsrlq $0x20, %ymm8, %ymm9
67
+ vmovshdup %ymm10, %ymm11 # ymm11 = ymm10[1,1,3,3,5,5,7,7]
68
+ vmovshdup %ymm12, %ymm13 # ymm13 = ymm12[1,1,3,3,5,5,7,7]
69
+ vpmuldq %ymm10, %ymm6, %ymm6
70
+ vpmuldq %ymm11, %ymm7, %ymm7
71
+ vpmuldq %ymm12, %ymm8, %ymm8
72
+ vpmuldq %ymm13, %ymm9, %ymm9
73
+ vpaddq %ymm2, %ymm6, %ymm2
74
+ vpaddq %ymm3, %ymm7, %ymm3
75
+ vpaddq %ymm4, %ymm8, %ymm4
76
+ vpaddq %ymm5, %ymm9, %ymm5
77
+ vmovdqa 0x800(%rsi), %ymm6
78
+ vmovdqa 0x820(%rsi), %ymm8
79
+ vmovdqa 0x800(%rdx), %ymm10
80
+ vmovdqa 0x820(%rdx), %ymm12
81
+ vpsrlq $0x20, %ymm6, %ymm7
82
+ vpsrlq $0x20, %ymm8, %ymm9
83
+ vmovshdup %ymm10, %ymm11 # ymm11 = ymm10[1,1,3,3,5,5,7,7]
84
+ vmovshdup %ymm12, %ymm13 # ymm13 = ymm12[1,1,3,3,5,5,7,7]
85
+ vpmuldq %ymm10, %ymm6, %ymm6
86
+ vpmuldq %ymm11, %ymm7, %ymm7
87
+ vpmuldq %ymm12, %ymm8, %ymm8
88
+ vpmuldq %ymm13, %ymm9, %ymm9
89
+ vpaddq %ymm2, %ymm6, %ymm2
90
+ vpaddq %ymm3, %ymm7, %ymm3
91
+ vpaddq %ymm4, %ymm8, %ymm4
92
+ vpaddq %ymm5, %ymm9, %ymm5
93
+ vmovdqa 0xc00(%rsi), %ymm6
94
+ vmovdqa 0xc20(%rsi), %ymm8
95
+ vmovdqa 0xc00(%rdx), %ymm10
96
+ vmovdqa 0xc20(%rdx), %ymm12
97
+ vpsrlq $0x20, %ymm6, %ymm7
98
+ vpsrlq $0x20, %ymm8, %ymm9
99
+ vmovshdup %ymm10, %ymm11 # ymm11 = ymm10[1,1,3,3,5,5,7,7]
100
+ vmovshdup %ymm12, %ymm13 # ymm13 = ymm12[1,1,3,3,5,5,7,7]
101
+ vpmuldq %ymm10, %ymm6, %ymm6
102
+ vpmuldq %ymm11, %ymm7, %ymm7
103
+ vpmuldq %ymm12, %ymm8, %ymm8
104
+ vpmuldq %ymm13, %ymm9, %ymm9
105
+ vpaddq %ymm2, %ymm6, %ymm2
106
+ vpaddq %ymm3, %ymm7, %ymm3
107
+ vpaddq %ymm4, %ymm8, %ymm4
108
+ vpaddq %ymm5, %ymm9, %ymm5
109
+ vmovdqa 0x1000(%rsi), %ymm6
110
+ vmovdqa 0x1020(%rsi), %ymm8
111
+ vmovdqa 0x1000(%rdx), %ymm10
112
+ vmovdqa 0x1020(%rdx), %ymm12
113
+ vpsrlq $0x20, %ymm6, %ymm7
114
+ vpsrlq $0x20, %ymm8, %ymm9
115
+ vmovshdup %ymm10, %ymm11 # ymm11 = ymm10[1,1,3,3,5,5,7,7]
116
+ vmovshdup %ymm12, %ymm13 # ymm13 = ymm12[1,1,3,3,5,5,7,7]
117
+ vpmuldq %ymm10, %ymm6, %ymm6
118
+ vpmuldq %ymm11, %ymm7, %ymm7
119
+ vpmuldq %ymm12, %ymm8, %ymm8
120
+ vpmuldq %ymm13, %ymm9, %ymm9
121
+ vpaddq %ymm2, %ymm6, %ymm2
122
+ vpaddq %ymm3, %ymm7, %ymm3
123
+ vpaddq %ymm4, %ymm8, %ymm4
124
+ vpaddq %ymm5, %ymm9, %ymm5
125
+ vpmuldq %ymm2, %ymm0, %ymm6
126
+ vpmuldq %ymm3, %ymm0, %ymm7
127
+ vpmuldq %ymm4, %ymm0, %ymm8
128
+ vpmuldq %ymm5, %ymm0, %ymm9
129
+ vpmuldq %ymm6, %ymm1, %ymm6
130
+ vpmuldq %ymm7, %ymm1, %ymm7
131
+ vpmuldq %ymm8, %ymm1, %ymm8
132
+ vpmuldq %ymm9, %ymm1, %ymm9
133
+ vpsubq %ymm6, %ymm2, %ymm2
134
+ vpsubq %ymm7, %ymm3, %ymm3
135
+ vpsubq %ymm8, %ymm4, %ymm4
136
+ vpsubq %ymm9, %ymm5, %ymm5
137
+ vpsrlq $0x20, %ymm2, %ymm2
138
+ vmovshdup %ymm4, %ymm4 # ymm4 = ymm4[1,1,3,3,5,5,7,7]
139
+ vpblendd $0xaa, %ymm3, %ymm2, %ymm2 # ymm2 = ymm2[0],ymm3[1],ymm2[2],ymm3[3],ymm2[4],ymm3[5],ymm2[6],ymm3[7]
140
+ vpblendd $0xaa, %ymm5, %ymm4, %ymm4 # ymm4 = ymm4[0],ymm5[1],ymm4[2],ymm5[3],ymm4[4],ymm5[5],ymm4[6],ymm5[7]
141
+ vmovdqa %ymm2, (%rdi)
142
+ vmovdqa %ymm4, 0x20(%rdi)
143
+ addq $0x40, %rsi
144
+ addq $0x40, %rdx
145
+ addq $0x40, %rdi
146
+ addl $0x1, %eax
147
+ cmpl $0x10, %eax
148
+ jb Lpointwise_acc_l5_avx2_looptop2
149
+ retq
150
+ .cfi_endproc
151
+
152
+ MLD_ASM_FN_SIZE(pointwise_acc_l5_avx2)
153
+
154
+ #endif /* MLD_ARITH_BACKEND_X86_64_DEFAULT && !MLD_CONFIG_MULTILEVEL_NO_SHARED \
155
+ && (MLD_CONFIG_MULTILEVEL_WITH_SHARED || MLDSA_L == 5) */