pq_crypto 0.3.2 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (328) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ci.yml +56 -0
  3. data/CHANGELOG.md +62 -0
  4. data/GET_STARTED.md +366 -40
  5. data/README.md +76 -233
  6. data/SECURITY.md +107 -82
  7. data/ext/pqcrypto/extconf.rb +169 -87
  8. data/ext/pqcrypto/mldsa_api.h +1 -48
  9. data/ext/pqcrypto/mlkem_api.h +1 -18
  10. data/ext/pqcrypto/pq_externalmu.c +89 -204
  11. data/ext/pqcrypto/pqcrypto_native_api.h +129 -0
  12. data/ext/pqcrypto/pqcrypto_ruby_secure.c +484 -84
  13. data/ext/pqcrypto/pqcrypto_secure.c +203 -78
  14. data/ext/pqcrypto/pqcrypto_secure.h +53 -14
  15. data/ext/pqcrypto/pqcrypto_version.h +7 -0
  16. data/ext/pqcrypto/randombytes.h +9 -0
  17. data/ext/pqcrypto/vendor/.vendored +10 -5
  18. data/ext/pqcrypto/vendor/mldsa-native/BUILDING.md +105 -0
  19. data/ext/pqcrypto/vendor/mldsa-native/LICENSE +286 -0
  20. data/ext/pqcrypto/vendor/mldsa-native/META.yml +24 -0
  21. data/ext/pqcrypto/vendor/mldsa-native/README.md +221 -0
  22. data/ext/pqcrypto/vendor/mldsa-native/SECURITY.md +8 -0
  23. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.c +721 -0
  24. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.h +975 -0
  25. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_asm.S +724 -0
  26. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_config.h +723 -0
  27. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/cbmc.h +166 -0
  28. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/common.h +321 -0
  29. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/ct.c +21 -0
  30. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/ct.h +385 -0
  31. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/debug.c +73 -0
  32. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/debug.h +130 -0
  33. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.c +277 -0
  34. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.h +244 -0
  35. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.c +182 -0
  36. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.h +117 -0
  37. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.c +438 -0
  38. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.h +105 -0
  39. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/auto.h +71 -0
  40. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/fips202_native_aarch64.h +62 -0
  41. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_asm.S +376 -0
  42. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_asm.S +204 -0
  43. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_asm.S +259 -0
  44. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_asm.S +1077 -0
  45. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S +987 -0
  46. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccakf1600_round_constants.c +41 -0
  47. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_scalar.h +26 -0
  48. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_v84a.h +35 -0
  49. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x2_v84a.h +37 -0
  50. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_scalar.h +27 -0
  51. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_v84a_scalar.h +36 -0
  52. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/api.h +69 -0
  53. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/README.md +10 -0
  54. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/mve.h +32 -0
  55. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/fips202_native_armv81m.h +20 -0
  56. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S +638 -0
  57. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.c +136 -0
  58. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccakf1600_round_constants.c +52 -0
  59. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/auto.h +29 -0
  60. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.c +488 -0
  61. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.h +16 -0
  62. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/xkcp.h +31 -0
  63. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/meta.h +247 -0
  64. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/aarch64_zetas.c +231 -0
  65. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/arith_native_aarch64.h +150 -0
  66. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/intt.S +753 -0
  67. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l4.S +129 -0
  68. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l5.S +145 -0
  69. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l7.S +177 -0
  70. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/ntt.S +653 -0
  71. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/pointwise_montgomery.S +79 -0
  72. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_caddq_asm.S +53 -0
  73. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_chknorm_asm.S +55 -0
  74. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_32_asm.S +85 -0
  75. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_88_asm.S +85 -0
  76. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_32_asm.S +102 -0
  77. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_88_asm.S +110 -0
  78. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_17_asm.S +72 -0
  79. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_19_asm.S +69 -0
  80. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_table.c +40 -0
  81. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_asm.S +189 -0
  82. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta2_asm.S +135 -0
  83. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta4_asm.S +128 -0
  84. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta_table.c +543 -0
  85. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_table.c +62 -0
  86. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/api.h +649 -0
  87. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/meta.h +23 -0
  88. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/meta.h +315 -0
  89. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/arith_native_x86_64.h +124 -0
  90. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.c +157 -0
  91. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.h +27 -0
  92. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/intt.S +2311 -0
  93. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/ntt.S +2383 -0
  94. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/nttunpack.S +239 -0
  95. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise.S +131 -0
  96. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l4.S +139 -0
  97. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l5.S +155 -0
  98. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l7.S +187 -0
  99. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_caddq_avx2.c +61 -0
  100. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_chknorm_avx2.c +52 -0
  101. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_32_avx2.c +155 -0
  102. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_88_avx2.c +155 -0
  103. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_32_avx2.c +102 -0
  104. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_88_avx2.c +104 -0
  105. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_17_avx2.c +91 -0
  106. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_19_avx2.c +93 -0
  107. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_avx2.c +126 -0
  108. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta2_avx2.c +155 -0
  109. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta4_avx2.c +139 -0
  110. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_table.c +160 -0
  111. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.c +293 -0
  112. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.h +224 -0
  113. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/params.h +77 -0
  114. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.c +991 -0
  115. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.h +393 -0
  116. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.c +946 -0
  117. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.h +360 -0
  118. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.c +877 -0
  119. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.h +725 -0
  120. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/randombytes.h +26 -0
  121. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/reduce.h +139 -0
  122. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/rounding.h +249 -0
  123. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.c +1511 -0
  124. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.h +806 -0
  125. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/symmetric.h +68 -0
  126. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sys.h +268 -0
  127. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/zetas.inc +55 -0
  128. data/ext/pqcrypto/vendor/mlkem-native/BUILDING.md +104 -0
  129. data/ext/pqcrypto/vendor/mlkem-native/LICENSE +294 -0
  130. data/ext/pqcrypto/vendor/mlkem-native/META.yml +30 -0
  131. data/ext/pqcrypto/vendor/mlkem-native/README.md +223 -0
  132. data/ext/pqcrypto/vendor/mlkem-native/RELEASE.md +86 -0
  133. data/ext/pqcrypto/vendor/mlkem-native/SECURITY.md +8 -0
  134. data/ext/pqcrypto/vendor/mlkem-native/mlkem/README.md +23 -0
  135. data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native.c +660 -0
  136. data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native.h +538 -0
  137. data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native_asm.S +681 -0
  138. data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native_config.h +709 -0
  139. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/cbmc.h +174 -0
  140. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/common.h +274 -0
  141. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/compress.c +717 -0
  142. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/compress.h +688 -0
  143. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/debug.c +64 -0
  144. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/debug.h +128 -0
  145. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202.c +251 -0
  146. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202.h +158 -0
  147. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202x4.c +208 -0
  148. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202x4.h +80 -0
  149. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/keccakf1600.c +463 -0
  150. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/keccakf1600.h +98 -0
  151. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/auto.h +70 -0
  152. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/fips202_native_aarch64.h +69 -0
  153. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_asm.S +375 -0
  154. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_asm.S +203 -0
  155. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_asm.S +258 -0
  156. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_asm.S +1076 -0
  157. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S +986 -0
  158. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccakf1600_round_constants.c +46 -0
  159. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x1_scalar.h +25 -0
  160. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x1_v84a.h +34 -0
  161. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x2_v84a.h +35 -0
  162. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x4_v8a_scalar.h +26 -0
  163. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x4_v8a_v84a_scalar.h +35 -0
  164. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/api.h +117 -0
  165. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/README.md +10 -0
  166. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/mve.h +79 -0
  167. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/fips202_native_armv81m.h +35 -0
  168. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S +667 -0
  169. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.c +40 -0
  170. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/keccakf1600_round_constants.c +51 -0
  171. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/state_extract_bytes_x4_mve.S +290 -0
  172. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/state_xor_bytes_x4_mve.S +314 -0
  173. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/auto.h +28 -0
  174. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/keccak_f1600_x4_avx2.h +33 -0
  175. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/src/fips202_native_x86_64.h +41 -0
  176. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/src/keccak_f1600_x4_avx2.S +451 -0
  177. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/src/keccakf1600_constants.c +51 -0
  178. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/indcpa.c +622 -0
  179. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/indcpa.h +156 -0
  180. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/kem.c +446 -0
  181. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/kem.h +326 -0
  182. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/README.md +16 -0
  183. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/meta.h +122 -0
  184. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/aarch64_zetas.c +174 -0
  185. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/arith_native_aarch64.h +177 -0
  186. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/intt.S +628 -0
  187. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/ntt.S +562 -0
  188. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_mulcache_compute_asm.S +127 -0
  189. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_reduce_asm.S +150 -0
  190. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_tobytes_asm.S +117 -0
  191. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_tomont_asm.S +98 -0
  192. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k2.S +261 -0
  193. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k3.S +314 -0
  194. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k4.S +368 -0
  195. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/rej_uniform_asm.S +226 -0
  196. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/rej_uniform_table.c +542 -0
  197. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/api.h +637 -0
  198. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/meta.h +25 -0
  199. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/README.md +11 -0
  200. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/meta.h +128 -0
  201. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/arith_native_riscv64.h +45 -0
  202. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_debug.c +81 -0
  203. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_debug.h +145 -0
  204. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_izetas.inc +27 -0
  205. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_poly.c +805 -0
  206. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_zetas.inc +27 -0
  207. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_zetas_basemul.inc +39 -0
  208. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/README.md +4 -0
  209. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/meta.h +304 -0
  210. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/arith_native_x86_64.h +309 -0
  211. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/compress_consts.c +94 -0
  212. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/compress_consts.h +45 -0
  213. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/consts.c +102 -0
  214. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/consts.h +25 -0
  215. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/intt.S +719 -0
  216. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/mulcache_compute.S +90 -0
  217. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/ntt.S +639 -0
  218. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/nttfrombytes.S +193 -0
  219. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/ntttobytes.S +181 -0
  220. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/nttunpack.S +174 -0
  221. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d10.S +382 -0
  222. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d11.S +448 -0
  223. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d4.S +163 -0
  224. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d5.S +220 -0
  225. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d10.S +228 -0
  226. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d11.S +277 -0
  227. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d4.S +180 -0
  228. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d5.S +192 -0
  229. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/polyvec_basemul_acc_montgomery_cached_asm_k2.S +502 -0
  230. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/polyvec_basemul_acc_montgomery_cached_asm_k3.S +750 -0
  231. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/polyvec_basemul_acc_montgomery_cached_asm_k4.S +998 -0
  232. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/reduce.S +218 -0
  233. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/rej_uniform_asm.S +103 -0
  234. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/rej_uniform_table.c +544 -0
  235. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/tomont.S +155 -0
  236. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/params.h +76 -0
  237. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly.c +572 -0
  238. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly.h +317 -0
  239. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly_k.c +502 -0
  240. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly_k.h +668 -0
  241. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/randombytes.h +60 -0
  242. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/sampling.c +362 -0
  243. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/sampling.h +118 -0
  244. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/symmetric.h +70 -0
  245. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/sys.h +260 -0
  246. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/verify.c +20 -0
  247. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/verify.h +464 -0
  248. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/zetas.inc +30 -0
  249. data/lib/pq_crypto/algorithm_registry.rb +200 -0
  250. data/lib/pq_crypto/hybrid_kem.rb +1 -12
  251. data/lib/pq_crypto/kem.rb +104 -13
  252. data/lib/pq_crypto/pkcs8.rb +387 -0
  253. data/lib/pq_crypto/serialization.rb +1 -14
  254. data/lib/pq_crypto/signature.rb +123 -17
  255. data/lib/pq_crypto/spki.rb +131 -0
  256. data/lib/pq_crypto/version.rb +1 -1
  257. data/lib/pq_crypto.rb +79 -20
  258. data/script/vendor_libs.rb +88 -155
  259. metadata +241 -73
  260. data/ext/pqcrypto/vendor/pqclean/common/aes.c +0 -639
  261. data/ext/pqcrypto/vendor/pqclean/common/aes.h +0 -64
  262. data/ext/pqcrypto/vendor/pqclean/common/compat.h +0 -73
  263. data/ext/pqcrypto/vendor/pqclean/common/crypto_declassify.h +0 -7
  264. data/ext/pqcrypto/vendor/pqclean/common/fips202.c +0 -928
  265. data/ext/pqcrypto/vendor/pqclean/common/fips202.h +0 -166
  266. data/ext/pqcrypto/vendor/pqclean/common/keccak2x/feat.S +0 -168
  267. data/ext/pqcrypto/vendor/pqclean/common/keccak2x/fips202x2.c +0 -684
  268. data/ext/pqcrypto/vendor/pqclean/common/keccak2x/fips202x2.h +0 -60
  269. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/KeccakP-1600-times4-SIMD256.c +0 -1028
  270. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/KeccakP-1600-times4-SnP.h +0 -50
  271. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/KeccakP-1600-unrolling.macros +0 -198
  272. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/Makefile +0 -8
  273. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/Makefile.Microsoft_nmake +0 -8
  274. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/SIMD256-config.h +0 -3
  275. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/align.h +0 -34
  276. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/brg_endian.h +0 -142
  277. data/ext/pqcrypto/vendor/pqclean/common/nistseedexpander.c +0 -101
  278. data/ext/pqcrypto/vendor/pqclean/common/nistseedexpander.h +0 -39
  279. data/ext/pqcrypto/vendor/pqclean/common/randombytes.c +0 -355
  280. data/ext/pqcrypto/vendor/pqclean/common/randombytes.h +0 -27
  281. data/ext/pqcrypto/vendor/pqclean/common/sha2.c +0 -769
  282. data/ext/pqcrypto/vendor/pqclean/common/sha2.h +0 -173
  283. data/ext/pqcrypto/vendor/pqclean/common/sp800-185.c +0 -156
  284. data/ext/pqcrypto/vendor/pqclean/common/sp800-185.h +0 -27
  285. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/LICENSE +0 -5
  286. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/Makefile +0 -19
  287. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/Makefile.Microsoft_nmake +0 -23
  288. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/api.h +0 -18
  289. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/cbd.c +0 -83
  290. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/cbd.h +0 -11
  291. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/indcpa.c +0 -327
  292. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/indcpa.h +0 -22
  293. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/kem.c +0 -164
  294. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/kem.h +0 -23
  295. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/ntt.c +0 -146
  296. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/ntt.h +0 -14
  297. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/params.h +0 -36
  298. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/poly.c +0 -299
  299. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/poly.h +0 -37
  300. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/polyvec.c +0 -188
  301. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/polyvec.h +0 -26
  302. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/reduce.c +0 -41
  303. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/reduce.h +0 -13
  304. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/symmetric-shake.c +0 -71
  305. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/symmetric.h +0 -30
  306. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/verify.c +0 -67
  307. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/verify.h +0 -13
  308. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/LICENSE +0 -5
  309. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/Makefile +0 -19
  310. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/Makefile.Microsoft_nmake +0 -23
  311. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/api.h +0 -50
  312. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/ntt.c +0 -98
  313. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/ntt.h +0 -10
  314. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/packing.c +0 -261
  315. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/packing.h +0 -31
  316. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/params.h +0 -44
  317. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/poly.c +0 -799
  318. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/poly.h +0 -52
  319. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/polyvec.c +0 -415
  320. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/polyvec.h +0 -65
  321. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/reduce.c +0 -69
  322. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/reduce.h +0 -17
  323. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/rounding.c +0 -92
  324. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/rounding.h +0 -14
  325. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/sign.c +0 -407
  326. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/sign.h +0 -47
  327. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/symmetric-shake.c +0 -26
  328. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/symmetric.h +0 -34
@@ -0,0 +1,448 @@
1
+ /*
2
+ * Copyright (c) The mlkem-native project authors
3
+ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
4
+ */
5
+
6
+ /* References
7
+ * ==========
8
+ *
9
+ * - [REF_AVX2]
10
+ * CRYSTALS-Kyber optimized AVX2 implementation
11
+ * Bos, Ducas, Kiltz, Lepoint, Lyubashevsky, Schanck, Schwabe, Seiler, Stehlé
12
+ * https://github.com/pq-crystals/kyber/tree/main/avx2
13
+ */
14
+
15
+ /*
16
+ * This file is derived from the public domain
17
+ * AVX2 Kyber implementation @[REF_AVX2].
18
+ */
19
+
20
+ /*************************************************
21
+ * Name: mlk_poly_compress_d11_avx2
22
+ *
23
+ * Description: Compression of a polynomial to 11 bits per coefficient.
24
+ *
25
+ * Arguments: - uint8_t *r: pointer to output byte array
26
+ * (of length MLKEM_POLYCOMPRESSEDBYTES_D11)
27
+ * - const int16_t *a: pointer to input polynomial
28
+ * - const uint8_t *data: pointer to constants
29
+ * (srlvqidx[0:32], shufbidx[32:64])
30
+ **************************************************/
31
+
32
+ #include "../../../common.h"
33
+
34
+ #if defined(MLK_ARITH_BACKEND_X86_64_DEFAULT) && \
35
+ !defined(MLK_CONFIG_MULTILEVEL_NO_SHARED) && \
36
+ (defined(MLK_CONFIG_MULTILEVEL_WITH_SHARED) || MLKEM_K == 4)
37
+
38
+ /*
39
+ * WARNING: This file is auto-derived from the mlkem-native source file
40
+ * dev/x86_64/src/poly_compress_d11.S using scripts/simpasm. Do not modify it directly.
41
+ */
42
+
43
+ #if defined(__ELF__)
44
+ .section .note.GNU-stack,"",@progbits
45
+ #endif
46
+
47
+ .text
48
+ .balign 4
49
+ .global MLK_ASM_NAMESPACE(poly_compress_d11_avx2)
50
+ MLK_ASM_FN_SYMBOL(poly_compress_d11_avx2)
51
+
52
+ .cfi_startproc
53
+ movl $0x4ebf4ebf, %eax # imm = 0x4EBF4EBF
54
+ vmovd %eax, %xmm0
55
+ vpbroadcastd %xmm0, %ymm0
56
+ vpsllw $0x3, %ymm0, %ymm1
57
+ movl $0x240024, %eax # imm = 0x240024
58
+ vmovd %eax, %xmm2
59
+ vpbroadcastd %xmm2, %ymm2
60
+ movl $0x20002000, %eax # imm = 0x20002000
61
+ vmovd %eax, %xmm3
62
+ vpbroadcastd %xmm3, %ymm3
63
+ movl $0x7ff07ff, %eax # imm = 0x7FF07FF
64
+ vmovd %eax, %xmm4
65
+ vpbroadcastd %xmm4, %ymm4
66
+ movabsq $0x800000108000001, %rax # imm = 0x800000108000001
67
+ vmovq %rax, %xmm5
68
+ vpbroadcastq %xmm5, %ymm5
69
+ movl $0xa, %eax
70
+ vmovq %rax, %xmm6
71
+ vpbroadcastq %xmm6, %ymm6
72
+ vmovdqa (%rdx), %ymm7
73
+ vmovdqa 0x20(%rdx), %ymm8
74
+ vmovdqa (%rsi), %ymm9
75
+ vpmullw %ymm1, %ymm9, %ymm10
76
+ vpaddw %ymm2, %ymm9, %ymm11
77
+ vpsllw $0x3, %ymm9, %ymm9
78
+ vpmulhw %ymm0, %ymm9, %ymm9
79
+ vpsubw %ymm11, %ymm10, %ymm11
80
+ vpandn %ymm11, %ymm10, %ymm10
81
+ vpsrlw $0xf, %ymm10, %ymm10
82
+ vpsubw %ymm10, %ymm9, %ymm9
83
+ vpmulhrsw %ymm3, %ymm9, %ymm9
84
+ vpand %ymm4, %ymm9, %ymm9
85
+ vpmaddwd %ymm5, %ymm9, %ymm9
86
+ vpsllvd %ymm6, %ymm9, %ymm9
87
+ vpsrldq $0x8, %ymm9, %ymm10 # ymm10 = ymm9[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm9[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero
88
+ vpsrlvq %ymm7, %ymm9, %ymm9
89
+ vpsllq $0x22, %ymm10, %ymm10
90
+ vpaddq %ymm10, %ymm9, %ymm9
91
+ vpshufb %ymm8, %ymm9, %ymm9
92
+ vextracti128 $0x1, %ymm9, %xmm10
93
+ vpblendvb %xmm8, %xmm10, %xmm9, %xmm9
94
+ vmovdqu %xmm9, (%rdi)
95
+ vmovd %xmm10, 0x10(%rdi)
96
+ vpextrw $0x2, %xmm10, 0x14(%rdi)
97
+ vmovdqa 0x20(%rsi), %ymm9
98
+ vpmullw %ymm1, %ymm9, %ymm10
99
+ vpaddw %ymm2, %ymm9, %ymm11
100
+ vpsllw $0x3, %ymm9, %ymm9
101
+ vpmulhw %ymm0, %ymm9, %ymm9
102
+ vpsubw %ymm11, %ymm10, %ymm11
103
+ vpandn %ymm11, %ymm10, %ymm10
104
+ vpsrlw $0xf, %ymm10, %ymm10
105
+ vpsubw %ymm10, %ymm9, %ymm9
106
+ vpmulhrsw %ymm3, %ymm9, %ymm9
107
+ vpand %ymm4, %ymm9, %ymm9
108
+ vpmaddwd %ymm5, %ymm9, %ymm9
109
+ vpsllvd %ymm6, %ymm9, %ymm9
110
+ vpsrldq $0x8, %ymm9, %ymm10 # ymm10 = ymm9[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm9[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero
111
+ vpsrlvq %ymm7, %ymm9, %ymm9
112
+ vpsllq $0x22, %ymm10, %ymm10
113
+ vpaddq %ymm10, %ymm9, %ymm9
114
+ vpshufb %ymm8, %ymm9, %ymm9
115
+ vextracti128 $0x1, %ymm9, %xmm10
116
+ vpblendvb %xmm8, %xmm10, %xmm9, %xmm9
117
+ vmovdqu %xmm9, 0x16(%rdi)
118
+ vmovd %xmm10, 0x26(%rdi)
119
+ vpextrw $0x2, %xmm10, 0x2a(%rdi)
120
+ vmovdqa 0x40(%rsi), %ymm9
121
+ vpmullw %ymm1, %ymm9, %ymm10
122
+ vpaddw %ymm2, %ymm9, %ymm11
123
+ vpsllw $0x3, %ymm9, %ymm9
124
+ vpmulhw %ymm0, %ymm9, %ymm9
125
+ vpsubw %ymm11, %ymm10, %ymm11
126
+ vpandn %ymm11, %ymm10, %ymm10
127
+ vpsrlw $0xf, %ymm10, %ymm10
128
+ vpsubw %ymm10, %ymm9, %ymm9
129
+ vpmulhrsw %ymm3, %ymm9, %ymm9
130
+ vpand %ymm4, %ymm9, %ymm9
131
+ vpmaddwd %ymm5, %ymm9, %ymm9
132
+ vpsllvd %ymm6, %ymm9, %ymm9
133
+ vpsrldq $0x8, %ymm9, %ymm10 # ymm10 = ymm9[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm9[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero
134
+ vpsrlvq %ymm7, %ymm9, %ymm9
135
+ vpsllq $0x22, %ymm10, %ymm10
136
+ vpaddq %ymm10, %ymm9, %ymm9
137
+ vpshufb %ymm8, %ymm9, %ymm9
138
+ vextracti128 $0x1, %ymm9, %xmm10
139
+ vpblendvb %xmm8, %xmm10, %xmm9, %xmm9
140
+ vmovdqu %xmm9, 0x2c(%rdi)
141
+ vmovd %xmm10, 0x3c(%rdi)
142
+ vpextrw $0x2, %xmm10, 0x40(%rdi)
143
+ vmovdqa 0x60(%rsi), %ymm9
144
+ vpmullw %ymm1, %ymm9, %ymm10
145
+ vpaddw %ymm2, %ymm9, %ymm11
146
+ vpsllw $0x3, %ymm9, %ymm9
147
+ vpmulhw %ymm0, %ymm9, %ymm9
148
+ vpsubw %ymm11, %ymm10, %ymm11
149
+ vpandn %ymm11, %ymm10, %ymm10
150
+ vpsrlw $0xf, %ymm10, %ymm10
151
+ vpsubw %ymm10, %ymm9, %ymm9
152
+ vpmulhrsw %ymm3, %ymm9, %ymm9
153
+ vpand %ymm4, %ymm9, %ymm9
154
+ vpmaddwd %ymm5, %ymm9, %ymm9
155
+ vpsllvd %ymm6, %ymm9, %ymm9
156
+ vpsrldq $0x8, %ymm9, %ymm10 # ymm10 = ymm9[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm9[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero
157
+ vpsrlvq %ymm7, %ymm9, %ymm9
158
+ vpsllq $0x22, %ymm10, %ymm10
159
+ vpaddq %ymm10, %ymm9, %ymm9
160
+ vpshufb %ymm8, %ymm9, %ymm9
161
+ vextracti128 $0x1, %ymm9, %xmm10
162
+ vpblendvb %xmm8, %xmm10, %xmm9, %xmm9
163
+ vmovdqu %xmm9, 0x42(%rdi)
164
+ vmovd %xmm10, 0x52(%rdi)
165
+ vpextrw $0x2, %xmm10, 0x56(%rdi)
166
+ vmovdqa 0x80(%rsi), %ymm9
167
+ vpmullw %ymm1, %ymm9, %ymm10
168
+ vpaddw %ymm2, %ymm9, %ymm11
169
+ vpsllw $0x3, %ymm9, %ymm9
170
+ vpmulhw %ymm0, %ymm9, %ymm9
171
+ vpsubw %ymm11, %ymm10, %ymm11
172
+ vpandn %ymm11, %ymm10, %ymm10
173
+ vpsrlw $0xf, %ymm10, %ymm10
174
+ vpsubw %ymm10, %ymm9, %ymm9
175
+ vpmulhrsw %ymm3, %ymm9, %ymm9
176
+ vpand %ymm4, %ymm9, %ymm9
177
+ vpmaddwd %ymm5, %ymm9, %ymm9
178
+ vpsllvd %ymm6, %ymm9, %ymm9
179
+ vpsrldq $0x8, %ymm9, %ymm10 # ymm10 = ymm9[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm9[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero
180
+ vpsrlvq %ymm7, %ymm9, %ymm9
181
+ vpsllq $0x22, %ymm10, %ymm10
182
+ vpaddq %ymm10, %ymm9, %ymm9
183
+ vpshufb %ymm8, %ymm9, %ymm9
184
+ vextracti128 $0x1, %ymm9, %xmm10
185
+ vpblendvb %xmm8, %xmm10, %xmm9, %xmm9
186
+ vmovdqu %xmm9, 0x58(%rdi)
187
+ vmovd %xmm10, 0x68(%rdi)
188
+ vpextrw $0x2, %xmm10, 0x6c(%rdi)
189
+ vmovdqa 0xa0(%rsi), %ymm9
190
+ vpmullw %ymm1, %ymm9, %ymm10
191
+ vpaddw %ymm2, %ymm9, %ymm11
192
+ vpsllw $0x3, %ymm9, %ymm9
193
+ vpmulhw %ymm0, %ymm9, %ymm9
194
+ vpsubw %ymm11, %ymm10, %ymm11
195
+ vpandn %ymm11, %ymm10, %ymm10
196
+ vpsrlw $0xf, %ymm10, %ymm10
197
+ vpsubw %ymm10, %ymm9, %ymm9
198
+ vpmulhrsw %ymm3, %ymm9, %ymm9
199
+ vpand %ymm4, %ymm9, %ymm9
200
+ vpmaddwd %ymm5, %ymm9, %ymm9
201
+ vpsllvd %ymm6, %ymm9, %ymm9
202
+ vpsrldq $0x8, %ymm9, %ymm10 # ymm10 = ymm9[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm9[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero
203
+ vpsrlvq %ymm7, %ymm9, %ymm9
204
+ vpsllq $0x22, %ymm10, %ymm10
205
+ vpaddq %ymm10, %ymm9, %ymm9
206
+ vpshufb %ymm8, %ymm9, %ymm9
207
+ vextracti128 $0x1, %ymm9, %xmm10
208
+ vpblendvb %xmm8, %xmm10, %xmm9, %xmm9
209
+ vmovdqu %xmm9, 0x6e(%rdi)
210
+ vmovd %xmm10, 0x7e(%rdi)
211
+ vpextrw $0x2, %xmm10, 0x82(%rdi)
212
+ vmovdqa 0xc0(%rsi), %ymm9
213
+ vpmullw %ymm1, %ymm9, %ymm10
214
+ vpaddw %ymm2, %ymm9, %ymm11
215
+ vpsllw $0x3, %ymm9, %ymm9
216
+ vpmulhw %ymm0, %ymm9, %ymm9
217
+ vpsubw %ymm11, %ymm10, %ymm11
218
+ vpandn %ymm11, %ymm10, %ymm10
219
+ vpsrlw $0xf, %ymm10, %ymm10
220
+ vpsubw %ymm10, %ymm9, %ymm9
221
+ vpmulhrsw %ymm3, %ymm9, %ymm9
222
+ vpand %ymm4, %ymm9, %ymm9
223
+ vpmaddwd %ymm5, %ymm9, %ymm9
224
+ vpsllvd %ymm6, %ymm9, %ymm9
225
+ vpsrldq $0x8, %ymm9, %ymm10 # ymm10 = ymm9[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm9[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero
226
+ vpsrlvq %ymm7, %ymm9, %ymm9
227
+ vpsllq $0x22, %ymm10, %ymm10
228
+ vpaddq %ymm10, %ymm9, %ymm9
229
+ vpshufb %ymm8, %ymm9, %ymm9
230
+ vextracti128 $0x1, %ymm9, %xmm10
231
+ vpblendvb %xmm8, %xmm10, %xmm9, %xmm9
232
+ vmovdqu %xmm9, 0x84(%rdi)
233
+ vmovd %xmm10, 0x94(%rdi)
234
+ vpextrw $0x2, %xmm10, 0x98(%rdi)
235
+ vmovdqa 0xe0(%rsi), %ymm9
236
+ vpmullw %ymm1, %ymm9, %ymm10
237
+ vpaddw %ymm2, %ymm9, %ymm11
238
+ vpsllw $0x3, %ymm9, %ymm9
239
+ vpmulhw %ymm0, %ymm9, %ymm9
240
+ vpsubw %ymm11, %ymm10, %ymm11
241
+ vpandn %ymm11, %ymm10, %ymm10
242
+ vpsrlw $0xf, %ymm10, %ymm10
243
+ vpsubw %ymm10, %ymm9, %ymm9
244
+ vpmulhrsw %ymm3, %ymm9, %ymm9
245
+ vpand %ymm4, %ymm9, %ymm9
246
+ vpmaddwd %ymm5, %ymm9, %ymm9
247
+ vpsllvd %ymm6, %ymm9, %ymm9
248
+ vpsrldq $0x8, %ymm9, %ymm10 # ymm10 = ymm9[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm9[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero
249
+ vpsrlvq %ymm7, %ymm9, %ymm9
250
+ vpsllq $0x22, %ymm10, %ymm10
251
+ vpaddq %ymm10, %ymm9, %ymm9
252
+ vpshufb %ymm8, %ymm9, %ymm9
253
+ vextracti128 $0x1, %ymm9, %xmm10
254
+ vpblendvb %xmm8, %xmm10, %xmm9, %xmm9
255
+ vmovdqu %xmm9, 0x9a(%rdi)
256
+ vmovd %xmm10, 0xaa(%rdi)
257
+ vpextrw $0x2, %xmm10, 0xae(%rdi)
258
+ vmovdqa 0x100(%rsi), %ymm9
259
+ vpmullw %ymm1, %ymm9, %ymm10
260
+ vpaddw %ymm2, %ymm9, %ymm11
261
+ vpsllw $0x3, %ymm9, %ymm9
262
+ vpmulhw %ymm0, %ymm9, %ymm9
263
+ vpsubw %ymm11, %ymm10, %ymm11
264
+ vpandn %ymm11, %ymm10, %ymm10
265
+ vpsrlw $0xf, %ymm10, %ymm10
266
+ vpsubw %ymm10, %ymm9, %ymm9
267
+ vpmulhrsw %ymm3, %ymm9, %ymm9
268
+ vpand %ymm4, %ymm9, %ymm9
269
+ vpmaddwd %ymm5, %ymm9, %ymm9
270
+ vpsllvd %ymm6, %ymm9, %ymm9
271
+ vpsrldq $0x8, %ymm9, %ymm10 # ymm10 = ymm9[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm9[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero
272
+ vpsrlvq %ymm7, %ymm9, %ymm9
273
+ vpsllq $0x22, %ymm10, %ymm10
274
+ vpaddq %ymm10, %ymm9, %ymm9
275
+ vpshufb %ymm8, %ymm9, %ymm9
276
+ vextracti128 $0x1, %ymm9, %xmm10
277
+ vpblendvb %xmm8, %xmm10, %xmm9, %xmm9
278
+ vmovdqu %xmm9, 0xb0(%rdi)
279
+ vmovd %xmm10, 0xc0(%rdi)
280
+ vpextrw $0x2, %xmm10, 0xc4(%rdi)
281
+ vmovdqa 0x120(%rsi), %ymm9
282
+ vpmullw %ymm1, %ymm9, %ymm10
283
+ vpaddw %ymm2, %ymm9, %ymm11
284
+ vpsllw $0x3, %ymm9, %ymm9
285
+ vpmulhw %ymm0, %ymm9, %ymm9
286
+ vpsubw %ymm11, %ymm10, %ymm11
287
+ vpandn %ymm11, %ymm10, %ymm10
288
+ vpsrlw $0xf, %ymm10, %ymm10
289
+ vpsubw %ymm10, %ymm9, %ymm9
290
+ vpmulhrsw %ymm3, %ymm9, %ymm9
291
+ vpand %ymm4, %ymm9, %ymm9
292
+ vpmaddwd %ymm5, %ymm9, %ymm9
293
+ vpsllvd %ymm6, %ymm9, %ymm9
294
+ vpsrldq $0x8, %ymm9, %ymm10 # ymm10 = ymm9[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm9[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero
295
+ vpsrlvq %ymm7, %ymm9, %ymm9
296
+ vpsllq $0x22, %ymm10, %ymm10
297
+ vpaddq %ymm10, %ymm9, %ymm9
298
+ vpshufb %ymm8, %ymm9, %ymm9
299
+ vextracti128 $0x1, %ymm9, %xmm10
300
+ vpblendvb %xmm8, %xmm10, %xmm9, %xmm9
301
+ vmovdqu %xmm9, 0xc6(%rdi)
302
+ vmovd %xmm10, 0xd6(%rdi)
303
+ vpextrw $0x2, %xmm10, 0xda(%rdi)
304
+ vmovdqa 0x140(%rsi), %ymm9
305
+ vpmullw %ymm1, %ymm9, %ymm10
306
+ vpaddw %ymm2, %ymm9, %ymm11
307
+ vpsllw $0x3, %ymm9, %ymm9
308
+ vpmulhw %ymm0, %ymm9, %ymm9
309
+ vpsubw %ymm11, %ymm10, %ymm11
310
+ vpandn %ymm11, %ymm10, %ymm10
311
+ vpsrlw $0xf, %ymm10, %ymm10
312
+ vpsubw %ymm10, %ymm9, %ymm9
313
+ vpmulhrsw %ymm3, %ymm9, %ymm9
314
+ vpand %ymm4, %ymm9, %ymm9
315
+ vpmaddwd %ymm5, %ymm9, %ymm9
316
+ vpsllvd %ymm6, %ymm9, %ymm9
317
+ vpsrldq $0x8, %ymm9, %ymm10 # ymm10 = ymm9[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm9[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero
318
+ vpsrlvq %ymm7, %ymm9, %ymm9
319
+ vpsllq $0x22, %ymm10, %ymm10
320
+ vpaddq %ymm10, %ymm9, %ymm9
321
+ vpshufb %ymm8, %ymm9, %ymm9
322
+ vextracti128 $0x1, %ymm9, %xmm10
323
+ vpblendvb %xmm8, %xmm10, %xmm9, %xmm9
324
+ vmovdqu %xmm9, 0xdc(%rdi)
325
+ vmovd %xmm10, 0xec(%rdi)
326
+ vpextrw $0x2, %xmm10, 0xf0(%rdi)
327
+ vmovdqa 0x160(%rsi), %ymm9
328
+ vpmullw %ymm1, %ymm9, %ymm10
329
+ vpaddw %ymm2, %ymm9, %ymm11
330
+ vpsllw $0x3, %ymm9, %ymm9
331
+ vpmulhw %ymm0, %ymm9, %ymm9
332
+ vpsubw %ymm11, %ymm10, %ymm11
333
+ vpandn %ymm11, %ymm10, %ymm10
334
+ vpsrlw $0xf, %ymm10, %ymm10
335
+ vpsubw %ymm10, %ymm9, %ymm9
336
+ vpmulhrsw %ymm3, %ymm9, %ymm9
337
+ vpand %ymm4, %ymm9, %ymm9
338
+ vpmaddwd %ymm5, %ymm9, %ymm9
339
+ vpsllvd %ymm6, %ymm9, %ymm9
340
+ vpsrldq $0x8, %ymm9, %ymm10 # ymm10 = ymm9[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm9[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero
341
+ vpsrlvq %ymm7, %ymm9, %ymm9
342
+ vpsllq $0x22, %ymm10, %ymm10
343
+ vpaddq %ymm10, %ymm9, %ymm9
344
+ vpshufb %ymm8, %ymm9, %ymm9
345
+ vextracti128 $0x1, %ymm9, %xmm10
346
+ vpblendvb %xmm8, %xmm10, %xmm9, %xmm9
347
+ vmovdqu %xmm9, 0xf2(%rdi)
348
+ vmovd %xmm10, 0x102(%rdi)
349
+ vpextrw $0x2, %xmm10, 0x106(%rdi)
350
+ vmovdqa 0x180(%rsi), %ymm9
351
+ vpmullw %ymm1, %ymm9, %ymm10
352
+ vpaddw %ymm2, %ymm9, %ymm11
353
+ vpsllw $0x3, %ymm9, %ymm9
354
+ vpmulhw %ymm0, %ymm9, %ymm9
355
+ vpsubw %ymm11, %ymm10, %ymm11
356
+ vpandn %ymm11, %ymm10, %ymm10
357
+ vpsrlw $0xf, %ymm10, %ymm10
358
+ vpsubw %ymm10, %ymm9, %ymm9
359
+ vpmulhrsw %ymm3, %ymm9, %ymm9
360
+ vpand %ymm4, %ymm9, %ymm9
361
+ vpmaddwd %ymm5, %ymm9, %ymm9
362
+ vpsllvd %ymm6, %ymm9, %ymm9
363
+ vpsrldq $0x8, %ymm9, %ymm10 # ymm10 = ymm9[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm9[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero
364
+ vpsrlvq %ymm7, %ymm9, %ymm9
365
+ vpsllq $0x22, %ymm10, %ymm10
366
+ vpaddq %ymm10, %ymm9, %ymm9
367
+ vpshufb %ymm8, %ymm9, %ymm9
368
+ vextracti128 $0x1, %ymm9, %xmm10
369
+ vpblendvb %xmm8, %xmm10, %xmm9, %xmm9
370
+ vmovdqu %xmm9, 0x108(%rdi)
371
+ vmovd %xmm10, 0x118(%rdi)
372
+ vpextrw $0x2, %xmm10, 0x11c(%rdi)
373
+ vmovdqa 0x1a0(%rsi), %ymm9
374
+ vpmullw %ymm1, %ymm9, %ymm10
375
+ vpaddw %ymm2, %ymm9, %ymm11
376
+ vpsllw $0x3, %ymm9, %ymm9
377
+ vpmulhw %ymm0, %ymm9, %ymm9
378
+ vpsubw %ymm11, %ymm10, %ymm11
379
+ vpandn %ymm11, %ymm10, %ymm10
380
+ vpsrlw $0xf, %ymm10, %ymm10
381
+ vpsubw %ymm10, %ymm9, %ymm9
382
+ vpmulhrsw %ymm3, %ymm9, %ymm9
383
+ vpand %ymm4, %ymm9, %ymm9
384
+ vpmaddwd %ymm5, %ymm9, %ymm9
385
+ vpsllvd %ymm6, %ymm9, %ymm9
386
+ vpsrldq $0x8, %ymm9, %ymm10 # ymm10 = ymm9[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm9[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero
387
+ vpsrlvq %ymm7, %ymm9, %ymm9
388
+ vpsllq $0x22, %ymm10, %ymm10
389
+ vpaddq %ymm10, %ymm9, %ymm9
390
+ vpshufb %ymm8, %ymm9, %ymm9
391
+ vextracti128 $0x1, %ymm9, %xmm10
392
+ vpblendvb %xmm8, %xmm10, %xmm9, %xmm9
393
+ vmovdqu %xmm9, 0x11e(%rdi)
394
+ vmovd %xmm10, 0x12e(%rdi)
395
+ vpextrw $0x2, %xmm10, 0x132(%rdi)
396
+ vmovdqa 0x1c0(%rsi), %ymm9
397
+ vpmullw %ymm1, %ymm9, %ymm10
398
+ vpaddw %ymm2, %ymm9, %ymm11
399
+ vpsllw $0x3, %ymm9, %ymm9
400
+ vpmulhw %ymm0, %ymm9, %ymm9
401
+ vpsubw %ymm11, %ymm10, %ymm11
402
+ vpandn %ymm11, %ymm10, %ymm10
403
+ vpsrlw $0xf, %ymm10, %ymm10
404
+ vpsubw %ymm10, %ymm9, %ymm9
405
+ vpmulhrsw %ymm3, %ymm9, %ymm9
406
+ vpand %ymm4, %ymm9, %ymm9
407
+ vpmaddwd %ymm5, %ymm9, %ymm9
408
+ vpsllvd %ymm6, %ymm9, %ymm9
409
+ vpsrldq $0x8, %ymm9, %ymm10 # ymm10 = ymm9[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm9[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero
410
+ vpsrlvq %ymm7, %ymm9, %ymm9
411
+ vpsllq $0x22, %ymm10, %ymm10
412
+ vpaddq %ymm10, %ymm9, %ymm9
413
+ vpshufb %ymm8, %ymm9, %ymm9
414
+ vextracti128 $0x1, %ymm9, %xmm10
415
+ vpblendvb %xmm8, %xmm10, %xmm9, %xmm9
416
+ vmovdqu %xmm9, 0x134(%rdi)
417
+ vmovd %xmm10, 0x144(%rdi)
418
+ vpextrw $0x2, %xmm10, 0x148(%rdi)
419
+ vmovdqa 0x1e0(%rsi), %ymm9
420
+ vpmullw %ymm1, %ymm9, %ymm10
421
+ vpaddw %ymm2, %ymm9, %ymm11
422
+ vpsllw $0x3, %ymm9, %ymm9
423
+ vpmulhw %ymm0, %ymm9, %ymm9
424
+ vpsubw %ymm11, %ymm10, %ymm11
425
+ vpandn %ymm11, %ymm10, %ymm10
426
+ vpsrlw $0xf, %ymm10, %ymm10
427
+ vpsubw %ymm10, %ymm9, %ymm9
428
+ vpmulhrsw %ymm3, %ymm9, %ymm9
429
+ vpand %ymm4, %ymm9, %ymm9
430
+ vpmaddwd %ymm5, %ymm9, %ymm9
431
+ vpsllvd %ymm6, %ymm9, %ymm9
432
+ vpsrldq $0x8, %ymm9, %ymm10 # ymm10 = ymm9[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm9[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero
433
+ vpsrlvq %ymm7, %ymm9, %ymm9
434
+ vpsllq $0x22, %ymm10, %ymm10
435
+ vpaddq %ymm10, %ymm9, %ymm9
436
+ vpshufb %ymm8, %ymm9, %ymm9
437
+ vextracti128 $0x1, %ymm9, %xmm10
438
+ vpblendvb %xmm8, %xmm10, %xmm9, %xmm9
439
+ vmovdqu %xmm9, 0x14a(%rdi)
440
+ vmovd %xmm10, 0x15a(%rdi)
441
+ vpextrw $0x2, %xmm10, 0x15e(%rdi)
442
+ retq
443
+ .cfi_endproc
444
+
445
+ MLK_ASM_FN_SIZE(poly_compress_d11_avx2)
446
+
447
+ #endif /* MLK_ARITH_BACKEND_X86_64_DEFAULT && !MLK_CONFIG_MULTILEVEL_NO_SHARED \
448
+ && (MLK_CONFIG_MULTILEVEL_WITH_SHARED || MLKEM_K == 4) */
@@ -0,0 +1,163 @@
1
+ /*
2
+ * Copyright (c) The mlkem-native project authors
3
+ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
4
+ */
5
+
6
+ /* References
7
+ * ==========
8
+ *
9
+ * - [REF_AVX2]
10
+ * CRYSTALS-Kyber optimized AVX2 implementation
11
+ * Bos, Ducas, Kiltz, Lepoint, Lyubashevsky, Schanck, Schwabe, Seiler, Stehlé
12
+ * https://github.com/pq-crystals/kyber/tree/main/avx2
13
+ */
14
+
15
+ /*
16
+ * This file is derived from the public domain
17
+ * AVX2 Kyber implementation @[REF_AVX2].
18
+ */
19
+
20
+ /*************************************************
21
+ * Name: mlk_poly_compress_d4_avx2
22
+ *
23
+ * Description: Compression of a polynomial to 4 bits per coefficient.
24
+ *
25
+ * Arguments: - uint8_t *r: pointer to output byte array
26
+ * (of length MLKEM_POLYCOMPRESSEDBYTES_D4)
27
+ * - const int16_t *a: pointer to input polynomial
28
+ * - const uint8_t *data: pointer to permdidx constant
29
+ **************************************************/
30
+
31
+ #include "../../../common.h"
32
+ #if defined(MLK_ARITH_BACKEND_X86_64_DEFAULT) && \
33
+ !defined(MLK_CONFIG_MULTILEVEL_NO_SHARED) && \
34
+ (defined(MLK_CONFIG_MULTILEVEL_WITH_SHARED) || MLKEM_K == 2 || MLKEM_K == 3)
35
+
36
+ /*
37
+ * WARNING: This file is auto-derived from the mlkem-native source file
38
+ * dev/x86_64/src/poly_compress_d4.S using scripts/simpasm. Do not modify it directly.
39
+ */
40
+
41
+ #if defined(__ELF__)
42
+ .section .note.GNU-stack,"",@progbits
43
+ #endif
44
+
45
+ .text
46
+ .balign 4
47
+ .global MLK_ASM_NAMESPACE(poly_compress_d4_avx2)
48
+ MLK_ASM_FN_SYMBOL(poly_compress_d4_avx2)
49
+
50
+ .cfi_startproc
51
+ movl $0x4ebf4ebf, %eax # imm = 0x4EBF4EBF
52
+ vmovd %eax, %xmm0
53
+ vpbroadcastd %xmm0, %ymm0
54
+ movl $0x2000200, %eax # imm = 0x2000200
55
+ vmovd %eax, %xmm1
56
+ vpbroadcastd %xmm1, %ymm1
57
+ movl $0xf000f, %eax # imm = 0xF000F
58
+ vmovd %eax, %xmm2
59
+ vpbroadcastd %xmm2, %ymm2
60
+ movl $0x10011001, %eax # imm = 0x10011001
61
+ vmovd %eax, %xmm3
62
+ vpbroadcastd %xmm3, %ymm3
63
+ vmovdqa (%rdx), %ymm4
64
+ vmovdqa (%rsi), %ymm5
65
+ vmovdqa 0x20(%rsi), %ymm6
66
+ vmovdqa 0x40(%rsi), %ymm7
67
+ vmovdqa 0x60(%rsi), %ymm8
68
+ vpmulhw %ymm0, %ymm5, %ymm5
69
+ vpmulhw %ymm0, %ymm6, %ymm6
70
+ vpmulhw %ymm0, %ymm7, %ymm7
71
+ vpmulhw %ymm0, %ymm8, %ymm8
72
+ vpmulhrsw %ymm1, %ymm5, %ymm5
73
+ vpmulhrsw %ymm1, %ymm6, %ymm6
74
+ vpmulhrsw %ymm1, %ymm7, %ymm7
75
+ vpmulhrsw %ymm1, %ymm8, %ymm8
76
+ vpand %ymm2, %ymm5, %ymm5
77
+ vpand %ymm2, %ymm6, %ymm6
78
+ vpand %ymm2, %ymm7, %ymm7
79
+ vpand %ymm2, %ymm8, %ymm8
80
+ vpackuswb %ymm6, %ymm5, %ymm5
81
+ vpackuswb %ymm8, %ymm7, %ymm7
82
+ vpmaddubsw %ymm3, %ymm5, %ymm5
83
+ vpmaddubsw %ymm3, %ymm7, %ymm7
84
+ vpackuswb %ymm7, %ymm5, %ymm5
85
+ vpermd %ymm5, %ymm4, %ymm5
86
+ vmovdqu %ymm5, (%rdi)
87
+ vmovdqa 0x80(%rsi), %ymm5
88
+ vmovdqa 0xa0(%rsi), %ymm6
89
+ vmovdqa 0xc0(%rsi), %ymm7
90
+ vmovdqa 0xe0(%rsi), %ymm8
91
+ vpmulhw %ymm0, %ymm5, %ymm5
92
+ vpmulhw %ymm0, %ymm6, %ymm6
93
+ vpmulhw %ymm0, %ymm7, %ymm7
94
+ vpmulhw %ymm0, %ymm8, %ymm8
95
+ vpmulhrsw %ymm1, %ymm5, %ymm5
96
+ vpmulhrsw %ymm1, %ymm6, %ymm6
97
+ vpmulhrsw %ymm1, %ymm7, %ymm7
98
+ vpmulhrsw %ymm1, %ymm8, %ymm8
99
+ vpand %ymm2, %ymm5, %ymm5
100
+ vpand %ymm2, %ymm6, %ymm6
101
+ vpand %ymm2, %ymm7, %ymm7
102
+ vpand %ymm2, %ymm8, %ymm8
103
+ vpackuswb %ymm6, %ymm5, %ymm5
104
+ vpackuswb %ymm8, %ymm7, %ymm7
105
+ vpmaddubsw %ymm3, %ymm5, %ymm5
106
+ vpmaddubsw %ymm3, %ymm7, %ymm7
107
+ vpackuswb %ymm7, %ymm5, %ymm5
108
+ vpermd %ymm5, %ymm4, %ymm5
109
+ vmovdqu %ymm5, 0x20(%rdi)
110
+ vmovdqa 0x100(%rsi), %ymm5
111
+ vmovdqa 0x120(%rsi), %ymm6
112
+ vmovdqa 0x140(%rsi), %ymm7
113
+ vmovdqa 0x160(%rsi), %ymm8
114
+ vpmulhw %ymm0, %ymm5, %ymm5
115
+ vpmulhw %ymm0, %ymm6, %ymm6
116
+ vpmulhw %ymm0, %ymm7, %ymm7
117
+ vpmulhw %ymm0, %ymm8, %ymm8
118
+ vpmulhrsw %ymm1, %ymm5, %ymm5
119
+ vpmulhrsw %ymm1, %ymm6, %ymm6
120
+ vpmulhrsw %ymm1, %ymm7, %ymm7
121
+ vpmulhrsw %ymm1, %ymm8, %ymm8
122
+ vpand %ymm2, %ymm5, %ymm5
123
+ vpand %ymm2, %ymm6, %ymm6
124
+ vpand %ymm2, %ymm7, %ymm7
125
+ vpand %ymm2, %ymm8, %ymm8
126
+ vpackuswb %ymm6, %ymm5, %ymm5
127
+ vpackuswb %ymm8, %ymm7, %ymm7
128
+ vpmaddubsw %ymm3, %ymm5, %ymm5
129
+ vpmaddubsw %ymm3, %ymm7, %ymm7
130
+ vpackuswb %ymm7, %ymm5, %ymm5
131
+ vpermd %ymm5, %ymm4, %ymm5
132
+ vmovdqu %ymm5, 0x40(%rdi)
133
+ vmovdqa 0x180(%rsi), %ymm5
134
+ vmovdqa 0x1a0(%rsi), %ymm6
135
+ vmovdqa 0x1c0(%rsi), %ymm7
136
+ vmovdqa 0x1e0(%rsi), %ymm8
137
+ vpmulhw %ymm0, %ymm5, %ymm5
138
+ vpmulhw %ymm0, %ymm6, %ymm6
139
+ vpmulhw %ymm0, %ymm7, %ymm7
140
+ vpmulhw %ymm0, %ymm8, %ymm8
141
+ vpmulhrsw %ymm1, %ymm5, %ymm5
142
+ vpmulhrsw %ymm1, %ymm6, %ymm6
143
+ vpmulhrsw %ymm1, %ymm7, %ymm7
144
+ vpmulhrsw %ymm1, %ymm8, %ymm8
145
+ vpand %ymm2, %ymm5, %ymm5
146
+ vpand %ymm2, %ymm6, %ymm6
147
+ vpand %ymm2, %ymm7, %ymm7
148
+ vpand %ymm2, %ymm8, %ymm8
149
+ vpackuswb %ymm6, %ymm5, %ymm5
150
+ vpackuswb %ymm8, %ymm7, %ymm7
151
+ vpmaddubsw %ymm3, %ymm5, %ymm5
152
+ vpmaddubsw %ymm3, %ymm7, %ymm7
153
+ vpackuswb %ymm7, %ymm5, %ymm5
154
+ vpermd %ymm5, %ymm4, %ymm5
155
+ vmovdqu %ymm5, 0x60(%rdi)
156
+ retq
157
+ .cfi_endproc
158
+
159
+ MLK_ASM_FN_SIZE(poly_compress_d4_avx2)
160
+
161
+ #endif /* MLK_ARITH_BACKEND_X86_64_DEFAULT && !MLK_CONFIG_MULTILEVEL_NO_SHARED \
162
+ && (MLK_CONFIG_MULTILEVEL_WITH_SHARED || MLKEM_K == 2 || MLKEM_K == \
163
+ 3) */