pq_crypto 0.4.2 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (410) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ci.yml +27 -2
  3. data/CHANGELOG.md +59 -0
  4. data/GET_STARTED.md +21 -16
  5. data/README.md +26 -0
  6. data/SECURITY.md +22 -16
  7. data/ext/pqcrypto/extconf.rb +183 -99
  8. data/ext/pqcrypto/mldsa_api.h +1 -118
  9. data/ext/pqcrypto/mlkem_api.h +1 -42
  10. data/ext/pqcrypto/pq_externalmu.c +88 -216
  11. data/ext/pqcrypto/pqcrypto_native_api.h +132 -0
  12. data/ext/pqcrypto/pqcrypto_ruby_secure.c +234 -12
  13. data/ext/pqcrypto/pqcrypto_secure.c +429 -334
  14. data/ext/pqcrypto/pqcrypto_secure.h +13 -45
  15. data/ext/pqcrypto/pqcrypto_version.h +1 -1
  16. data/ext/pqcrypto/randombytes.h +9 -0
  17. data/ext/pqcrypto/vendor/.vendored +12 -5
  18. data/ext/pqcrypto/vendor/mldsa-native/BUILDING.md +105 -0
  19. data/ext/pqcrypto/vendor/mldsa-native/LICENSE +286 -0
  20. data/ext/pqcrypto/vendor/mldsa-native/META.yml +24 -0
  21. data/ext/pqcrypto/vendor/mldsa-native/README.md +221 -0
  22. data/ext/pqcrypto/vendor/mldsa-native/SECURITY.md +8 -0
  23. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.c +721 -0
  24. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.h +975 -0
  25. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_asm.S +724 -0
  26. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_config.h +723 -0
  27. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/cbmc.h +166 -0
  28. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/common.h +321 -0
  29. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/ct.c +21 -0
  30. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/ct.h +385 -0
  31. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/debug.c +73 -0
  32. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/debug.h +130 -0
  33. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.c +277 -0
  34. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.h +244 -0
  35. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.c +182 -0
  36. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.h +117 -0
  37. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.c +438 -0
  38. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.h +105 -0
  39. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/auto.h +71 -0
  40. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/fips202_native_aarch64.h +62 -0
  41. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_asm.S +376 -0
  42. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_asm.S +204 -0
  43. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_asm.S +259 -0
  44. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_asm.S +1077 -0
  45. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S +987 -0
  46. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccakf1600_round_constants.c +41 -0
  47. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_scalar.h +26 -0
  48. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_v84a.h +35 -0
  49. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x2_v84a.h +37 -0
  50. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_scalar.h +27 -0
  51. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_v84a_scalar.h +36 -0
  52. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/api.h +69 -0
  53. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/README.md +10 -0
  54. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/mve.h +32 -0
  55. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/fips202_native_armv81m.h +20 -0
  56. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S +638 -0
  57. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.c +136 -0
  58. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccakf1600_round_constants.c +52 -0
  59. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/auto.h +29 -0
  60. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.c +488 -0
  61. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.h +16 -0
  62. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/xkcp.h +31 -0
  63. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/meta.h +247 -0
  64. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/aarch64_zetas.c +231 -0
  65. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/arith_native_aarch64.h +150 -0
  66. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/intt.S +753 -0
  67. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l4.S +129 -0
  68. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l5.S +145 -0
  69. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l7.S +177 -0
  70. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/ntt.S +653 -0
  71. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/pointwise_montgomery.S +79 -0
  72. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_caddq_asm.S +53 -0
  73. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_chknorm_asm.S +55 -0
  74. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_32_asm.S +85 -0
  75. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_88_asm.S +85 -0
  76. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_32_asm.S +102 -0
  77. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_88_asm.S +110 -0
  78. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_17_asm.S +72 -0
  79. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_19_asm.S +69 -0
  80. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_table.c +40 -0
  81. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_asm.S +189 -0
  82. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta2_asm.S +135 -0
  83. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta4_asm.S +128 -0
  84. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta_table.c +543 -0
  85. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_table.c +62 -0
  86. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/api.h +649 -0
  87. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/meta.h +23 -0
  88. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/meta.h +315 -0
  89. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/arith_native_x86_64.h +124 -0
  90. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.c +157 -0
  91. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.h +27 -0
  92. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/intt.S +2311 -0
  93. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/ntt.S +2383 -0
  94. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/nttunpack.S +239 -0
  95. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise.S +131 -0
  96. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l4.S +139 -0
  97. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l5.S +155 -0
  98. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l7.S +187 -0
  99. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_caddq_avx2.c +61 -0
  100. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_chknorm_avx2.c +52 -0
  101. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_32_avx2.c +155 -0
  102. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_88_avx2.c +155 -0
  103. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_32_avx2.c +102 -0
  104. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_88_avx2.c +104 -0
  105. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_17_avx2.c +91 -0
  106. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_19_avx2.c +93 -0
  107. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_avx2.c +126 -0
  108. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta2_avx2.c +155 -0
  109. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta4_avx2.c +139 -0
  110. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_table.c +160 -0
  111. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.c +293 -0
  112. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.h +224 -0
  113. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/params.h +77 -0
  114. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.c +991 -0
  115. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.h +393 -0
  116. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.c +946 -0
  117. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.h +360 -0
  118. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.c +877 -0
  119. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.h +725 -0
  120. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/randombytes.h +26 -0
  121. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/reduce.h +139 -0
  122. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/rounding.h +249 -0
  123. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.c +1511 -0
  124. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.h +806 -0
  125. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/symmetric.h +68 -0
  126. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sys.h +268 -0
  127. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/zetas.inc +55 -0
  128. data/ext/pqcrypto/vendor/mlkem-native/BUILDING.md +104 -0
  129. data/ext/pqcrypto/vendor/mlkem-native/LICENSE +294 -0
  130. data/ext/pqcrypto/vendor/mlkem-native/META.yml +30 -0
  131. data/ext/pqcrypto/vendor/mlkem-native/README.md +223 -0
  132. data/ext/pqcrypto/vendor/mlkem-native/RELEASE.md +86 -0
  133. data/ext/pqcrypto/vendor/mlkem-native/SECURITY.md +8 -0
  134. data/ext/pqcrypto/vendor/mlkem-native/mlkem/README.md +23 -0
  135. data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native.c +660 -0
  136. data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native.h +538 -0
  137. data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native_asm.S +681 -0
  138. data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native_config.h +709 -0
  139. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/cbmc.h +174 -0
  140. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/common.h +274 -0
  141. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/compress.c +717 -0
  142. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/compress.h +688 -0
  143. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/debug.c +64 -0
  144. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/debug.h +128 -0
  145. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202.c +251 -0
  146. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202.h +158 -0
  147. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202x4.c +208 -0
  148. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202x4.h +80 -0
  149. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/keccakf1600.c +463 -0
  150. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/keccakf1600.h +98 -0
  151. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/auto.h +70 -0
  152. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/fips202_native_aarch64.h +69 -0
  153. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_asm.S +375 -0
  154. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_asm.S +203 -0
  155. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_asm.S +258 -0
  156. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_asm.S +1076 -0
  157. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S +986 -0
  158. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccakf1600_round_constants.c +46 -0
  159. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x1_scalar.h +25 -0
  160. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x1_v84a.h +34 -0
  161. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x2_v84a.h +35 -0
  162. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x4_v8a_scalar.h +26 -0
  163. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x4_v8a_v84a_scalar.h +35 -0
  164. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/api.h +117 -0
  165. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/README.md +10 -0
  166. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/mve.h +79 -0
  167. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/fips202_native_armv81m.h +35 -0
  168. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S +667 -0
  169. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.c +40 -0
  170. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/keccakf1600_round_constants.c +51 -0
  171. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/state_extract_bytes_x4_mve.S +290 -0
  172. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/state_xor_bytes_x4_mve.S +314 -0
  173. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/auto.h +28 -0
  174. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/keccak_f1600_x4_avx2.h +33 -0
  175. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/src/fips202_native_x86_64.h +41 -0
  176. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/src/keccak_f1600_x4_avx2.S +451 -0
  177. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/src/keccakf1600_constants.c +51 -0
  178. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/indcpa.c +622 -0
  179. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/indcpa.h +156 -0
  180. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/kem.c +446 -0
  181. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/kem.h +326 -0
  182. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/README.md +16 -0
  183. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/meta.h +122 -0
  184. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/aarch64_zetas.c +174 -0
  185. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/arith_native_aarch64.h +177 -0
  186. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/intt.S +628 -0
  187. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/ntt.S +562 -0
  188. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_mulcache_compute_asm.S +127 -0
  189. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_reduce_asm.S +150 -0
  190. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_tobytes_asm.S +117 -0
  191. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/poly_tomont_asm.S +98 -0
  192. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k2.S +261 -0
  193. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k3.S +314 -0
  194. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/polyvec_basemul_acc_montgomery_cached_asm_k4.S +368 -0
  195. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/rej_uniform_asm.S +226 -0
  196. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/rej_uniform_table.c +542 -0
  197. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/api.h +637 -0
  198. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/meta.h +25 -0
  199. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/README.md +11 -0
  200. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/meta.h +128 -0
  201. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/arith_native_riscv64.h +45 -0
  202. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_debug.c +81 -0
  203. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_debug.h +145 -0
  204. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_izetas.inc +27 -0
  205. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_poly.c +805 -0
  206. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_zetas.inc +27 -0
  207. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_zetas_basemul.inc +39 -0
  208. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/README.md +4 -0
  209. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/meta.h +304 -0
  210. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/arith_native_x86_64.h +309 -0
  211. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/compress_consts.c +94 -0
  212. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/compress_consts.h +45 -0
  213. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/consts.c +102 -0
  214. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/consts.h +25 -0
  215. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/intt.S +719 -0
  216. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/mulcache_compute.S +90 -0
  217. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/ntt.S +639 -0
  218. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/nttfrombytes.S +193 -0
  219. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/ntttobytes.S +181 -0
  220. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/nttunpack.S +174 -0
  221. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d10.S +382 -0
  222. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d11.S +448 -0
  223. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d4.S +163 -0
  224. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_compress_d5.S +220 -0
  225. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d10.S +228 -0
  226. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d11.S +277 -0
  227. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d4.S +180 -0
  228. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/poly_decompress_d5.S +192 -0
  229. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/polyvec_basemul_acc_montgomery_cached_asm_k2.S +502 -0
  230. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/polyvec_basemul_acc_montgomery_cached_asm_k3.S +750 -0
  231. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/polyvec_basemul_acc_montgomery_cached_asm_k4.S +998 -0
  232. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/reduce.S +218 -0
  233. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/rej_uniform_asm.S +103 -0
  234. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/rej_uniform_table.c +544 -0
  235. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/tomont.S +155 -0
  236. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/params.h +76 -0
  237. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly.c +572 -0
  238. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly.h +317 -0
  239. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly_k.c +502 -0
  240. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly_k.h +668 -0
  241. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/randombytes.h +60 -0
  242. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/sampling.c +362 -0
  243. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/sampling.h +118 -0
  244. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/symmetric.h +70 -0
  245. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/sys.h +260 -0
  246. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/verify.c +20 -0
  247. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/verify.h +464 -0
  248. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/zetas.inc +30 -0
  249. data/lib/pq_crypto/hybrid_kem.rb +10 -1
  250. data/lib/pq_crypto/version.rb +1 -1
  251. data/lib/pq_crypto.rb +5 -1
  252. data/script/vendor_libs.rb +228 -154
  253. metadata +236 -160
  254. data/ext/pqcrypto/vendor/pqclean/common/aes.c +0 -639
  255. data/ext/pqcrypto/vendor/pqclean/common/aes.h +0 -64
  256. data/ext/pqcrypto/vendor/pqclean/common/compat.h +0 -73
  257. data/ext/pqcrypto/vendor/pqclean/common/crypto_declassify.h +0 -7
  258. data/ext/pqcrypto/vendor/pqclean/common/fips202.c +0 -928
  259. data/ext/pqcrypto/vendor/pqclean/common/fips202.h +0 -166
  260. data/ext/pqcrypto/vendor/pqclean/common/keccak2x/feat.S +0 -168
  261. data/ext/pqcrypto/vendor/pqclean/common/keccak2x/fips202x2.c +0 -684
  262. data/ext/pqcrypto/vendor/pqclean/common/keccak2x/fips202x2.h +0 -60
  263. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/KeccakP-1600-times4-SIMD256.c +0 -1028
  264. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/KeccakP-1600-times4-SnP.h +0 -50
  265. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/KeccakP-1600-unrolling.macros +0 -198
  266. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/Makefile +0 -8
  267. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/Makefile.Microsoft_nmake +0 -8
  268. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/SIMD256-config.h +0 -3
  269. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/align.h +0 -34
  270. data/ext/pqcrypto/vendor/pqclean/common/keccak4x/brg_endian.h +0 -142
  271. data/ext/pqcrypto/vendor/pqclean/common/nistseedexpander.c +0 -101
  272. data/ext/pqcrypto/vendor/pqclean/common/nistseedexpander.h +0 -39
  273. data/ext/pqcrypto/vendor/pqclean/common/randombytes.c +0 -355
  274. data/ext/pqcrypto/vendor/pqclean/common/randombytes.h +0 -27
  275. data/ext/pqcrypto/vendor/pqclean/common/sha2.c +0 -769
  276. data/ext/pqcrypto/vendor/pqclean/common/sha2.h +0 -173
  277. data/ext/pqcrypto/vendor/pqclean/common/sp800-185.c +0 -156
  278. data/ext/pqcrypto/vendor/pqclean/common/sp800-185.h +0 -27
  279. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/LICENSE +0 -5
  280. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/Makefile +0 -19
  281. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/Makefile.Microsoft_nmake +0 -23
  282. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/api.h +0 -18
  283. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/cbd.c +0 -83
  284. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/cbd.h +0 -11
  285. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/indcpa.c +0 -327
  286. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/indcpa.h +0 -22
  287. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/kem.c +0 -164
  288. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/kem.h +0 -23
  289. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/ntt.c +0 -146
  290. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/ntt.h +0 -14
  291. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/params.h +0 -36
  292. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/poly.c +0 -311
  293. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/poly.h +0 -37
  294. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/polyvec.c +0 -198
  295. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/polyvec.h +0 -26
  296. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/reduce.c +0 -41
  297. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/reduce.h +0 -13
  298. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/symmetric-shake.c +0 -71
  299. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/symmetric.h +0 -30
  300. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/verify.c +0 -67
  301. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-1024/clean/verify.h +0 -13
  302. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/LICENSE +0 -5
  303. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/Makefile +0 -19
  304. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/Makefile.Microsoft_nmake +0 -23
  305. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/api.h +0 -18
  306. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/cbd.c +0 -108
  307. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/cbd.h +0 -11
  308. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/indcpa.c +0 -327
  309. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/indcpa.h +0 -22
  310. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/kem.c +0 -164
  311. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/kem.h +0 -23
  312. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/ntt.c +0 -146
  313. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/ntt.h +0 -14
  314. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/params.h +0 -36
  315. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/poly.c +0 -299
  316. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/poly.h +0 -37
  317. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/polyvec.c +0 -188
  318. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/polyvec.h +0 -26
  319. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/reduce.c +0 -41
  320. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/reduce.h +0 -13
  321. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/symmetric-shake.c +0 -71
  322. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/symmetric.h +0 -30
  323. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/verify.c +0 -67
  324. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-512/clean/verify.h +0 -13
  325. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/LICENSE +0 -5
  326. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/Makefile +0 -19
  327. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/Makefile.Microsoft_nmake +0 -23
  328. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/api.h +0 -18
  329. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/cbd.c +0 -83
  330. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/cbd.h +0 -11
  331. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/indcpa.c +0 -327
  332. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/indcpa.h +0 -22
  333. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/kem.c +0 -164
  334. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/kem.h +0 -23
  335. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/ntt.c +0 -146
  336. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/ntt.h +0 -14
  337. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/params.h +0 -36
  338. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/poly.c +0 -299
  339. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/poly.h +0 -37
  340. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/polyvec.c +0 -188
  341. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/polyvec.h +0 -26
  342. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/reduce.c +0 -41
  343. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/reduce.h +0 -13
  344. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/symmetric-shake.c +0 -71
  345. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/symmetric.h +0 -30
  346. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/verify.c +0 -67
  347. data/ext/pqcrypto/vendor/pqclean/crypto_kem/ml-kem-768/clean/verify.h +0 -13
  348. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/LICENSE +0 -5
  349. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/Makefile +0 -19
  350. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/Makefile.Microsoft_nmake +0 -23
  351. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/api.h +0 -50
  352. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/ntt.c +0 -98
  353. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/ntt.h +0 -10
  354. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/packing.c +0 -261
  355. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/packing.h +0 -31
  356. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/params.h +0 -44
  357. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/poly.c +0 -848
  358. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/poly.h +0 -52
  359. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/polyvec.c +0 -415
  360. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/polyvec.h +0 -65
  361. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/reduce.c +0 -69
  362. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/reduce.h +0 -17
  363. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/rounding.c +0 -98
  364. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/rounding.h +0 -14
  365. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/sign.c +0 -407
  366. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/sign.h +0 -47
  367. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/symmetric-shake.c +0 -26
  368. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-44/clean/symmetric.h +0 -34
  369. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/LICENSE +0 -5
  370. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/Makefile +0 -19
  371. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/Makefile.Microsoft_nmake +0 -23
  372. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/api.h +0 -50
  373. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/ntt.c +0 -98
  374. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/ntt.h +0 -10
  375. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/packing.c +0 -261
  376. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/packing.h +0 -31
  377. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/params.h +0 -44
  378. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/poly.c +0 -799
  379. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/poly.h +0 -52
  380. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/polyvec.c +0 -415
  381. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/polyvec.h +0 -65
  382. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/reduce.c +0 -69
  383. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/reduce.h +0 -17
  384. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/rounding.c +0 -92
  385. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/rounding.h +0 -14
  386. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/sign.c +0 -407
  387. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/sign.h +0 -47
  388. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/symmetric-shake.c +0 -26
  389. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-65/clean/symmetric.h +0 -34
  390. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/LICENSE +0 -5
  391. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/Makefile +0 -19
  392. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/Makefile.Microsoft_nmake +0 -23
  393. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/api.h +0 -50
  394. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/ntt.c +0 -98
  395. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/ntt.h +0 -10
  396. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/packing.c +0 -261
  397. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/packing.h +0 -31
  398. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/params.h +0 -44
  399. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/poly.c +0 -823
  400. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/poly.h +0 -52
  401. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/polyvec.c +0 -415
  402. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/polyvec.h +0 -65
  403. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/reduce.c +0 -69
  404. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/reduce.h +0 -17
  405. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/rounding.c +0 -92
  406. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/rounding.h +0 -14
  407. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/sign.c +0 -407
  408. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/sign.h +0 -47
  409. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/symmetric-shake.c +0 -26
  410. data/ext/pqcrypto/vendor/pqclean/crypto_sign/ml-dsa-87/clean/symmetric.h +0 -34
@@ -0,0 +1,719 @@
1
+ /*
2
+ * Copyright (c) The mlkem-native project authors
3
+ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
4
+ */
5
+
6
+ /* References
7
+ * ==========
8
+ *
9
+ * - [AVX2_NTT]
10
+ * Faster AVX2 optimized NTT multiplication for Ring-LWE lattice cryptography.
11
+ * Gregor Seiler
12
+ * https://eprint.iacr.org/2018/039
13
+ *
14
+ * - [REF_AVX2]
15
+ * CRYSTALS-Kyber optimized AVX2 implementation
16
+ * Bos, Ducas, Kiltz, Lepoint, Lyubashevsky, Schanck, Schwabe, Seiler, Stehlé
17
+ * https://github.com/pq-crystals/kyber/tree/main/avx2
18
+ */
19
+
20
+ /*
21
+ * This file is derived from the public domain
22
+ * AVX2 Kyber implementation @[REF_AVX2].
23
+ *
24
+ * The core ideas behind the implementation are described in @[AVX2_NTT].
25
+ *
26
+ * Changes:
27
+ * - Different placement of modular reductions to simplify
28
+ * reasoning of non-overflow
29
+ */
30
+
31
+ #include "../../../common.h"
32
+ #if defined(MLK_ARITH_BACKEND_X86_64_DEFAULT) && \
33
+ !defined(MLK_CONFIG_MULTILEVEL_NO_SHARED)
34
+
35
+ /*
36
+ * WARNING: This file is auto-derived from the mlkem-native source file
37
+ * dev/x86_64/src/intt.S using scripts/simpasm. Do not modify it directly.
38
+ */
39
+
40
+ #if defined(__ELF__)
41
+ .section .note.GNU-stack,"",@progbits
42
+ #endif
43
+
44
+ .text
45
+ .balign 4
46
+ .global MLK_ASM_NAMESPACE(invntt_avx2)
47
+ MLK_ASM_FN_SYMBOL(invntt_avx2)
48
+
49
+ .cfi_startproc
50
+ movl $0xd010d01, %eax # imm = 0xD010D01
51
+ vmovd %eax, %xmm0
52
+ vpbroadcastd %xmm0, %ymm0
53
+ movl $0xd8a1d8a1, %eax # imm = 0xD8A1D8A1
54
+ vmovd %eax, %xmm2
55
+ vpbroadcastd %xmm2, %ymm2
56
+ movl $0x5a105a1, %eax # imm = 0x5A105A1
57
+ vmovd %eax, %xmm3
58
+ vpbroadcastd %xmm3, %ymm3
59
+ vmovdqa (%rdi), %ymm4
60
+ vmovdqa 0x40(%rdi), %ymm6
61
+ vmovdqa 0x20(%rdi), %ymm5
62
+ vmovdqa 0x60(%rdi), %ymm7
63
+ vpmullw %ymm2, %ymm4, %ymm12
64
+ vpmulhw %ymm3, %ymm4, %ymm4
65
+ vpmulhw %ymm0, %ymm12, %ymm12
66
+ vpsubw %ymm12, %ymm4, %ymm4
67
+ vpmullw %ymm2, %ymm6, %ymm12
68
+ vpmulhw %ymm3, %ymm6, %ymm6
69
+ vpmulhw %ymm0, %ymm12, %ymm12
70
+ vpsubw %ymm12, %ymm6, %ymm6
71
+ vpmullw %ymm2, %ymm5, %ymm12
72
+ vpmulhw %ymm3, %ymm5, %ymm5
73
+ vpmulhw %ymm0, %ymm12, %ymm12
74
+ vpsubw %ymm12, %ymm5, %ymm5
75
+ vpmullw %ymm2, %ymm7, %ymm12
76
+ vpmulhw %ymm3, %ymm7, %ymm7
77
+ vpmulhw %ymm0, %ymm12, %ymm12
78
+ vpsubw %ymm12, %ymm7, %ymm7
79
+ vmovdqa 0x80(%rdi), %ymm8
80
+ vmovdqa 0xc0(%rdi), %ymm10
81
+ vmovdqa 0xa0(%rdi), %ymm9
82
+ vmovdqa 0xe0(%rdi), %ymm11
83
+ vpmullw %ymm2, %ymm8, %ymm12
84
+ vpmulhw %ymm3, %ymm8, %ymm8
85
+ vpmulhw %ymm0, %ymm12, %ymm12
86
+ vpsubw %ymm12, %ymm8, %ymm8
87
+ vpmullw %ymm2, %ymm10, %ymm12
88
+ vpmulhw %ymm3, %ymm10, %ymm10
89
+ vpmulhw %ymm0, %ymm12, %ymm12
90
+ vpsubw %ymm12, %ymm10, %ymm10
91
+ vpmullw %ymm2, %ymm9, %ymm12
92
+ vpmulhw %ymm3, %ymm9, %ymm9
93
+ vpmulhw %ymm0, %ymm12, %ymm12
94
+ vpsubw %ymm12, %ymm9, %ymm9
95
+ vpmullw %ymm2, %ymm11, %ymm12
96
+ vpmulhw %ymm3, %ymm11, %ymm11
97
+ vpmulhw %ymm0, %ymm12, %ymm12
98
+ vpsubw %ymm12, %ymm11, %ymm11
99
+ vpermq $0x4e, 0x3a0(%rsi), %ymm15 # ymm15 = mem[2,3,0,1]
100
+ vpermq $0x4e, 0x360(%rsi), %ymm1 # ymm1 = mem[2,3,0,1]
101
+ vpermq $0x4e, 0x3c0(%rsi), %ymm2 # ymm2 = mem[2,3,0,1]
102
+ vpermq $0x4e, 0x380(%rsi), %ymm3 # ymm3 = mem[2,3,0,1]
103
+ vmovdqa (%rsi), %ymm12
104
+ vpshufb %ymm12, %ymm15, %ymm15
105
+ vpshufb %ymm12, %ymm1, %ymm1
106
+ vpshufb %ymm12, %ymm2, %ymm2
107
+ vpshufb %ymm12, %ymm3, %ymm3
108
+ vpsubw %ymm4, %ymm6, %ymm12
109
+ vpaddw %ymm6, %ymm4, %ymm4
110
+ vpsubw %ymm5, %ymm7, %ymm13
111
+ vpmullw %ymm15, %ymm12, %ymm6
112
+ vpaddw %ymm7, %ymm5, %ymm5
113
+ vpsubw %ymm8, %ymm10, %ymm14
114
+ vpmullw %ymm15, %ymm13, %ymm7
115
+ vpaddw %ymm10, %ymm8, %ymm8
116
+ vpsubw %ymm9, %ymm11, %ymm15
117
+ vpmullw %ymm1, %ymm14, %ymm10
118
+ vpaddw %ymm11, %ymm9, %ymm9
119
+ vpmullw %ymm1, %ymm15, %ymm11
120
+ vpmulhw %ymm2, %ymm12, %ymm12
121
+ vpmulhw %ymm2, %ymm13, %ymm13
122
+ vpmulhw %ymm3, %ymm14, %ymm14
123
+ vpmulhw %ymm3, %ymm15, %ymm15
124
+ vpmulhw %ymm0, %ymm6, %ymm6
125
+ vpmulhw %ymm0, %ymm7, %ymm7
126
+ vpmulhw %ymm0, %ymm10, %ymm10
127
+ vpmulhw %ymm0, %ymm11, %ymm11
128
+ vpsubw %ymm6, %ymm12, %ymm6
129
+ vpsubw %ymm7, %ymm13, %ymm7
130
+ vpsubw %ymm10, %ymm14, %ymm10
131
+ vpsubw %ymm11, %ymm15, %ymm11
132
+ vpermq $0x4e, 0x320(%rsi), %ymm2 # ymm2 = mem[2,3,0,1]
133
+ vpermq $0x4e, 0x340(%rsi), %ymm3 # ymm3 = mem[2,3,0,1]
134
+ vmovdqa (%rsi), %ymm1
135
+ vpshufb %ymm1, %ymm2, %ymm2
136
+ vpshufb %ymm1, %ymm3, %ymm3
137
+ vpsubw %ymm4, %ymm8, %ymm12
138
+ vpaddw %ymm8, %ymm4, %ymm4
139
+ vpsubw %ymm5, %ymm9, %ymm13
140
+ vpmullw %ymm2, %ymm12, %ymm8
141
+ vpaddw %ymm9, %ymm5, %ymm5
142
+ vpsubw %ymm6, %ymm10, %ymm14
143
+ vpmullw %ymm2, %ymm13, %ymm9
144
+ vpaddw %ymm10, %ymm6, %ymm6
145
+ vpsubw %ymm7, %ymm11, %ymm15
146
+ vpmullw %ymm2, %ymm14, %ymm10
147
+ vpaddw %ymm11, %ymm7, %ymm7
148
+ vpmullw %ymm2, %ymm15, %ymm11
149
+ vpmulhw %ymm3, %ymm12, %ymm12
150
+ vpmulhw %ymm3, %ymm13, %ymm13
151
+ vpmulhw %ymm3, %ymm14, %ymm14
152
+ vpmulhw %ymm3, %ymm15, %ymm15
153
+ vpmulhw %ymm0, %ymm8, %ymm8
154
+ vpmulhw %ymm0, %ymm9, %ymm9
155
+ vpmulhw %ymm0, %ymm10, %ymm10
156
+ vpmulhw %ymm0, %ymm11, %ymm11
157
+ vpsubw %ymm8, %ymm12, %ymm8
158
+ vpsubw %ymm9, %ymm13, %ymm9
159
+ vpsubw %ymm10, %ymm14, %ymm10
160
+ vpsubw %ymm11, %ymm15, %ymm11
161
+ vpslld $0x10, %ymm5, %ymm3
162
+ vpblendw $0xaa, %ymm3, %ymm4, %ymm3 # ymm3 = ymm4[0],ymm3[1],ymm4[2],ymm3[3],ymm4[4],ymm3[5],ymm4[6],ymm3[7],ymm4[8],ymm3[9],ymm4[10],ymm3[11],ymm4[12],ymm3[13],ymm4[14],ymm3[15]
163
+ vpsrld $0x10, %ymm4, %ymm4
164
+ vpblendw $0xaa, %ymm5, %ymm4, %ymm5 # ymm5 = ymm4[0],ymm5[1],ymm4[2],ymm5[3],ymm4[4],ymm5[5],ymm4[6],ymm5[7],ymm4[8],ymm5[9],ymm4[10],ymm5[11],ymm4[12],ymm5[13],ymm4[14],ymm5[15]
165
+ vpslld $0x10, %ymm7, %ymm4
166
+ vpblendw $0xaa, %ymm4, %ymm6, %ymm4 # ymm4 = ymm6[0],ymm4[1],ymm6[2],ymm4[3],ymm6[4],ymm4[5],ymm6[6],ymm4[7],ymm6[8],ymm4[9],ymm6[10],ymm4[11],ymm6[12],ymm4[13],ymm6[14],ymm4[15]
167
+ vpsrld $0x10, %ymm6, %ymm6
168
+ vpblendw $0xaa, %ymm7, %ymm6, %ymm7 # ymm7 = ymm6[0],ymm7[1],ymm6[2],ymm7[3],ymm6[4],ymm7[5],ymm6[6],ymm7[7],ymm6[8],ymm7[9],ymm6[10],ymm7[11],ymm6[12],ymm7[13],ymm6[14],ymm7[15]
169
+ vpslld $0x10, %ymm9, %ymm6
170
+ vpblendw $0xaa, %ymm6, %ymm8, %ymm6 # ymm6 = ymm8[0],ymm6[1],ymm8[2],ymm6[3],ymm8[4],ymm6[5],ymm8[6],ymm6[7],ymm8[8],ymm6[9],ymm8[10],ymm6[11],ymm8[12],ymm6[13],ymm8[14],ymm6[15]
171
+ vpsrld $0x10, %ymm8, %ymm8
172
+ vpblendw $0xaa, %ymm9, %ymm8, %ymm9 # ymm9 = ymm8[0],ymm9[1],ymm8[2],ymm9[3],ymm8[4],ymm9[5],ymm8[6],ymm9[7],ymm8[8],ymm9[9],ymm8[10],ymm9[11],ymm8[12],ymm9[13],ymm8[14],ymm9[15]
173
+ vpslld $0x10, %ymm11, %ymm8
174
+ vpblendw $0xaa, %ymm8, %ymm10, %ymm8 # ymm8 = ymm10[0],ymm8[1],ymm10[2],ymm8[3],ymm10[4],ymm8[5],ymm10[6],ymm8[7],ymm10[8],ymm8[9],ymm10[10],ymm8[11],ymm10[12],ymm8[13],ymm10[14],ymm8[15]
175
+ vpsrld $0x10, %ymm10, %ymm10
176
+ vpblendw $0xaa, %ymm11, %ymm10, %ymm11 # ymm11 = ymm10[0],ymm11[1],ymm10[2],ymm11[3],ymm10[4],ymm11[5],ymm10[6],ymm11[7],ymm10[8],ymm11[9],ymm10[10],ymm11[11],ymm10[12],ymm11[13],ymm10[14],ymm11[15]
177
+ vmovdqa 0x20(%rsi), %ymm12
178
+ vpermd 0x2e0(%rsi), %ymm12, %ymm2
179
+ vpermd 0x300(%rsi), %ymm12, %ymm10
180
+ vpsubw %ymm3, %ymm5, %ymm12
181
+ vpaddw %ymm5, %ymm3, %ymm3
182
+ vpsubw %ymm4, %ymm7, %ymm13
183
+ vpmullw %ymm2, %ymm12, %ymm5
184
+ vpaddw %ymm7, %ymm4, %ymm4
185
+ vpsubw %ymm6, %ymm9, %ymm14
186
+ vpmullw %ymm2, %ymm13, %ymm7
187
+ vpaddw %ymm9, %ymm6, %ymm6
188
+ vpsubw %ymm8, %ymm11, %ymm15
189
+ vpmullw %ymm2, %ymm14, %ymm9
190
+ vpaddw %ymm11, %ymm8, %ymm8
191
+ vpmullw %ymm2, %ymm15, %ymm11
192
+ vpmulhw %ymm10, %ymm12, %ymm12
193
+ vpmulhw %ymm10, %ymm13, %ymm13
194
+ vpmulhw %ymm10, %ymm14, %ymm14
195
+ vpmulhw %ymm10, %ymm15, %ymm15
196
+ vpmulhw %ymm0, %ymm5, %ymm5
197
+ vpmulhw %ymm0, %ymm7, %ymm7
198
+ vpmulhw %ymm0, %ymm9, %ymm9
199
+ vpmulhw %ymm0, %ymm11, %ymm11
200
+ vpsubw %ymm5, %ymm12, %ymm5
201
+ vpsubw %ymm7, %ymm13, %ymm7
202
+ vpsubw %ymm9, %ymm14, %ymm9
203
+ vpsubw %ymm11, %ymm15, %ymm11
204
+ movl $0x4ebf4ebf, %eax # imm = 0x4EBF4EBF
205
+ vmovd %eax, %xmm1
206
+ vpbroadcastd %xmm1, %ymm1
207
+ vpmulhw %ymm1, %ymm3, %ymm12
208
+ vpsraw $0xa, %ymm12, %ymm12
209
+ vpmullw %ymm0, %ymm12, %ymm12
210
+ vpsubw %ymm12, %ymm3, %ymm3
211
+ vmovsldup %ymm4, %ymm10 # ymm10 = ymm4[0,0,2,2,4,4,6,6]
212
+ vpblendd $0xaa, %ymm10, %ymm3, %ymm10 # ymm10 = ymm3[0],ymm10[1],ymm3[2],ymm10[3],ymm3[4],ymm10[5],ymm3[6],ymm10[7]
213
+ vpsrlq $0x20, %ymm3, %ymm3
214
+ vpblendd $0xaa, %ymm4, %ymm3, %ymm4 # ymm4 = ymm3[0],ymm4[1],ymm3[2],ymm4[3],ymm3[4],ymm4[5],ymm3[6],ymm4[7]
215
+ vmovsldup %ymm8, %ymm3 # ymm3 = ymm8[0,0,2,2,4,4,6,6]
216
+ vpblendd $0xaa, %ymm3, %ymm6, %ymm3 # ymm3 = ymm6[0],ymm3[1],ymm6[2],ymm3[3],ymm6[4],ymm3[5],ymm6[6],ymm3[7]
217
+ vpsrlq $0x20, %ymm6, %ymm6
218
+ vpblendd $0xaa, %ymm8, %ymm6, %ymm8 # ymm8 = ymm6[0],ymm8[1],ymm6[2],ymm8[3],ymm6[4],ymm8[5],ymm6[6],ymm8[7]
219
+ vmovsldup %ymm7, %ymm6 # ymm6 = ymm7[0,0,2,2,4,4,6,6]
220
+ vpblendd $0xaa, %ymm6, %ymm5, %ymm6 # ymm6 = ymm5[0],ymm6[1],ymm5[2],ymm6[3],ymm5[4],ymm6[5],ymm5[6],ymm6[7]
221
+ vpsrlq $0x20, %ymm5, %ymm5
222
+ vpblendd $0xaa, %ymm7, %ymm5, %ymm7 # ymm7 = ymm5[0],ymm7[1],ymm5[2],ymm7[3],ymm5[4],ymm7[5],ymm5[6],ymm7[7]
223
+ vmovsldup %ymm11, %ymm5 # ymm5 = ymm11[0,0,2,2,4,4,6,6]
224
+ vpblendd $0xaa, %ymm5, %ymm9, %ymm5 # ymm5 = ymm9[0],ymm5[1],ymm9[2],ymm5[3],ymm9[4],ymm5[5],ymm9[6],ymm5[7]
225
+ vpsrlq $0x20, %ymm9, %ymm9
226
+ vpblendd $0xaa, %ymm11, %ymm9, %ymm11 # ymm11 = ymm9[0],ymm11[1],ymm9[2],ymm11[3],ymm9[4],ymm11[5],ymm9[6],ymm11[7]
227
+ vpermq $0x1b, 0x2a0(%rsi), %ymm2 # ymm2 = mem[3,2,1,0]
228
+ vpermq $0x1b, 0x2c0(%rsi), %ymm9 # ymm9 = mem[3,2,1,0]
229
+ vpsubw %ymm10, %ymm4, %ymm12
230
+ vpaddw %ymm4, %ymm10, %ymm10
231
+ vpsubw %ymm3, %ymm8, %ymm13
232
+ vpmullw %ymm2, %ymm12, %ymm4
233
+ vpaddw %ymm8, %ymm3, %ymm3
234
+ vpsubw %ymm6, %ymm7, %ymm14
235
+ vpmullw %ymm2, %ymm13, %ymm8
236
+ vpaddw %ymm7, %ymm6, %ymm6
237
+ vpsubw %ymm5, %ymm11, %ymm15
238
+ vpmullw %ymm2, %ymm14, %ymm7
239
+ vpaddw %ymm11, %ymm5, %ymm5
240
+ vpmullw %ymm2, %ymm15, %ymm11
241
+ vpmulhw %ymm9, %ymm12, %ymm12
242
+ vpmulhw %ymm9, %ymm13, %ymm13
243
+ vpmulhw %ymm9, %ymm14, %ymm14
244
+ vpmulhw %ymm9, %ymm15, %ymm15
245
+ vpmulhw %ymm0, %ymm4, %ymm4
246
+ vpmulhw %ymm0, %ymm8, %ymm8
247
+ vpmulhw %ymm0, %ymm7, %ymm7
248
+ vpmulhw %ymm0, %ymm11, %ymm11
249
+ vpsubw %ymm4, %ymm12, %ymm4
250
+ vpsubw %ymm8, %ymm13, %ymm8
251
+ vpsubw %ymm7, %ymm14, %ymm7
252
+ vpsubw %ymm11, %ymm15, %ymm11
253
+ vpmulhw %ymm1, %ymm10, %ymm12
254
+ vpsraw $0xa, %ymm12, %ymm12
255
+ vpmullw %ymm0, %ymm12, %ymm12
256
+ vpsubw %ymm12, %ymm10, %ymm10
257
+ vpunpcklqdq %ymm3, %ymm10, %ymm9 # ymm9 = ymm10[0],ymm3[0],ymm10[2],ymm3[2]
258
+ vpunpckhqdq %ymm3, %ymm10, %ymm3 # ymm3 = ymm10[1],ymm3[1],ymm10[3],ymm3[3]
259
+ vpunpcklqdq %ymm5, %ymm6, %ymm10 # ymm10 = ymm6[0],ymm5[0],ymm6[2],ymm5[2]
260
+ vpunpckhqdq %ymm5, %ymm6, %ymm5 # ymm5 = ymm6[1],ymm5[1],ymm6[3],ymm5[3]
261
+ vpunpcklqdq %ymm8, %ymm4, %ymm6 # ymm6 = ymm4[0],ymm8[0],ymm4[2],ymm8[2]
262
+ vpunpckhqdq %ymm8, %ymm4, %ymm8 # ymm8 = ymm4[1],ymm8[1],ymm4[3],ymm8[3]
263
+ vpunpcklqdq %ymm11, %ymm7, %ymm4 # ymm4 = ymm7[0],ymm11[0],ymm7[2],ymm11[2]
264
+ vpunpckhqdq %ymm11, %ymm7, %ymm11 # ymm11 = ymm7[1],ymm11[1],ymm7[3],ymm11[3]
265
+ vpermq $0x4e, 0x260(%rsi), %ymm2 # ymm2 = mem[2,3,0,1]
266
+ vpermq $0x4e, 0x280(%rsi), %ymm7 # ymm7 = mem[2,3,0,1]
267
+ vpsubw %ymm9, %ymm3, %ymm12
268
+ vpaddw %ymm3, %ymm9, %ymm9
269
+ vpsubw %ymm10, %ymm5, %ymm13
270
+ vpmullw %ymm2, %ymm12, %ymm3
271
+ vpaddw %ymm5, %ymm10, %ymm10
272
+ vpsubw %ymm6, %ymm8, %ymm14
273
+ vpmullw %ymm2, %ymm13, %ymm5
274
+ vpaddw %ymm8, %ymm6, %ymm6
275
+ vpsubw %ymm4, %ymm11, %ymm15
276
+ vpmullw %ymm2, %ymm14, %ymm8
277
+ vpaddw %ymm11, %ymm4, %ymm4
278
+ vpmullw %ymm2, %ymm15, %ymm11
279
+ vpmulhw %ymm7, %ymm12, %ymm12
280
+ vpmulhw %ymm7, %ymm13, %ymm13
281
+ vpmulhw %ymm7, %ymm14, %ymm14
282
+ vpmulhw %ymm7, %ymm15, %ymm15
283
+ vpmulhw %ymm0, %ymm3, %ymm3
284
+ vpmulhw %ymm0, %ymm5, %ymm5
285
+ vpmulhw %ymm0, %ymm8, %ymm8
286
+ vpmulhw %ymm0, %ymm11, %ymm11
287
+ vpsubw %ymm3, %ymm12, %ymm3
288
+ vpsubw %ymm5, %ymm13, %ymm5
289
+ vpsubw %ymm8, %ymm14, %ymm8
290
+ vpsubw %ymm11, %ymm15, %ymm11
291
+ vpmulhw %ymm1, %ymm9, %ymm12
292
+ vpsraw $0xa, %ymm12, %ymm12
293
+ vpmullw %ymm0, %ymm12, %ymm12
294
+ vpsubw %ymm12, %ymm9, %ymm9
295
+ vperm2i128 $0x20, %ymm10, %ymm9, %ymm7 # ymm7 = ymm9[0,1],ymm10[0,1]
296
+ vperm2i128 $0x31, %ymm10, %ymm9, %ymm10 # ymm10 = ymm9[2,3],ymm10[2,3]
297
+ vperm2i128 $0x20, %ymm4, %ymm6, %ymm9 # ymm9 = ymm6[0,1],ymm4[0,1]
298
+ vperm2i128 $0x31, %ymm4, %ymm6, %ymm4 # ymm4 = ymm6[2,3],ymm4[2,3]
299
+ vperm2i128 $0x20, %ymm5, %ymm3, %ymm6 # ymm6 = ymm3[0,1],ymm5[0,1]
300
+ vperm2i128 $0x31, %ymm5, %ymm3, %ymm5 # ymm5 = ymm3[2,3],ymm5[2,3]
301
+ vperm2i128 $0x20, %ymm11, %ymm8, %ymm3 # ymm3 = ymm8[0,1],ymm11[0,1]
302
+ vperm2i128 $0x31, %ymm11, %ymm8, %ymm11 # ymm11 = ymm8[2,3],ymm11[2,3]
303
+ vmovdqa 0x220(%rsi), %ymm2
304
+ vmovdqa 0x240(%rsi), %ymm8
305
+ vpsubw %ymm7, %ymm10, %ymm12
306
+ vpaddw %ymm10, %ymm7, %ymm7
307
+ vpsubw %ymm9, %ymm4, %ymm13
308
+ vpmullw %ymm2, %ymm12, %ymm10
309
+ vpaddw %ymm4, %ymm9, %ymm9
310
+ vpsubw %ymm6, %ymm5, %ymm14
311
+ vpmullw %ymm2, %ymm13, %ymm4
312
+ vpaddw %ymm5, %ymm6, %ymm6
313
+ vpsubw %ymm3, %ymm11, %ymm15
314
+ vpmullw %ymm2, %ymm14, %ymm5
315
+ vpaddw %ymm11, %ymm3, %ymm3
316
+ vpmullw %ymm2, %ymm15, %ymm11
317
+ vpmulhw %ymm8, %ymm12, %ymm12
318
+ vpmulhw %ymm8, %ymm13, %ymm13
319
+ vpmulhw %ymm8, %ymm14, %ymm14
320
+ vpmulhw %ymm8, %ymm15, %ymm15
321
+ vpmulhw %ymm0, %ymm10, %ymm10
322
+ vpmulhw %ymm0, %ymm4, %ymm4
323
+ vpmulhw %ymm0, %ymm5, %ymm5
324
+ vpmulhw %ymm0, %ymm11, %ymm11
325
+ vpsubw %ymm10, %ymm12, %ymm10
326
+ vpsubw %ymm4, %ymm13, %ymm4
327
+ vpsubw %ymm5, %ymm14, %ymm5
328
+ vpsubw %ymm11, %ymm15, %ymm11
329
+ vpmulhw %ymm1, %ymm7, %ymm12
330
+ vpsraw $0xa, %ymm12, %ymm12
331
+ vpmullw %ymm0, %ymm12, %ymm12
332
+ vpsubw %ymm12, %ymm7, %ymm7
333
+ vmovdqa %ymm7, (%rdi)
334
+ vmovdqa %ymm9, 0x20(%rdi)
335
+ vmovdqa %ymm6, 0x40(%rdi)
336
+ vmovdqa %ymm3, 0x60(%rdi)
337
+ vmovdqa %ymm10, 0x80(%rdi)
338
+ vmovdqa %ymm4, 0xa0(%rdi)
339
+ vmovdqa %ymm5, 0xc0(%rdi)
340
+ vmovdqa %ymm11, 0xe0(%rdi)
341
+ movl $0xd8a1d8a1, %eax # imm = 0xD8A1D8A1
342
+ vmovd %eax, %xmm2
343
+ vpbroadcastd %xmm2, %ymm2
344
+ movl $0x5a105a1, %eax # imm = 0x5A105A1
345
+ vmovd %eax, %xmm3
346
+ vpbroadcastd %xmm3, %ymm3
347
+ vmovdqa 0x100(%rdi), %ymm4
348
+ vmovdqa 0x140(%rdi), %ymm6
349
+ vmovdqa 0x120(%rdi), %ymm5
350
+ vmovdqa 0x160(%rdi), %ymm7
351
+ vpmullw %ymm2, %ymm4, %ymm12
352
+ vpmulhw %ymm3, %ymm4, %ymm4
353
+ vpmulhw %ymm0, %ymm12, %ymm12
354
+ vpsubw %ymm12, %ymm4, %ymm4
355
+ vpmullw %ymm2, %ymm6, %ymm12
356
+ vpmulhw %ymm3, %ymm6, %ymm6
357
+ vpmulhw %ymm0, %ymm12, %ymm12
358
+ vpsubw %ymm12, %ymm6, %ymm6
359
+ vpmullw %ymm2, %ymm5, %ymm12
360
+ vpmulhw %ymm3, %ymm5, %ymm5
361
+ vpmulhw %ymm0, %ymm12, %ymm12
362
+ vpsubw %ymm12, %ymm5, %ymm5
363
+ vpmullw %ymm2, %ymm7, %ymm12
364
+ vpmulhw %ymm3, %ymm7, %ymm7
365
+ vpmulhw %ymm0, %ymm12, %ymm12
366
+ vpsubw %ymm12, %ymm7, %ymm7
367
+ vmovdqa 0x180(%rdi), %ymm8
368
+ vmovdqa 0x1c0(%rdi), %ymm10
369
+ vmovdqa 0x1a0(%rdi), %ymm9
370
+ vmovdqa 0x1e0(%rdi), %ymm11
371
+ vpmullw %ymm2, %ymm8, %ymm12
372
+ vpmulhw %ymm3, %ymm8, %ymm8
373
+ vpmulhw %ymm0, %ymm12, %ymm12
374
+ vpsubw %ymm12, %ymm8, %ymm8
375
+ vpmullw %ymm2, %ymm10, %ymm12
376
+ vpmulhw %ymm3, %ymm10, %ymm10
377
+ vpmulhw %ymm0, %ymm12, %ymm12
378
+ vpsubw %ymm12, %ymm10, %ymm10
379
+ vpmullw %ymm2, %ymm9, %ymm12
380
+ vpmulhw %ymm3, %ymm9, %ymm9
381
+ vpmulhw %ymm0, %ymm12, %ymm12
382
+ vpsubw %ymm12, %ymm9, %ymm9
383
+ vpmullw %ymm2, %ymm11, %ymm12
384
+ vpmulhw %ymm3, %ymm11, %ymm11
385
+ vpmulhw %ymm0, %ymm12, %ymm12
386
+ vpsubw %ymm12, %ymm11, %ymm11
387
+ vpermq $0x4e, 0x1e0(%rsi), %ymm15 # ymm15 = mem[2,3,0,1]
388
+ vpermq $0x4e, 0x1a0(%rsi), %ymm1 # ymm1 = mem[2,3,0,1]
389
+ vpermq $0x4e, 0x200(%rsi), %ymm2 # ymm2 = mem[2,3,0,1]
390
+ vpermq $0x4e, 0x1c0(%rsi), %ymm3 # ymm3 = mem[2,3,0,1]
391
+ vmovdqa (%rsi), %ymm12
392
+ vpshufb %ymm12, %ymm15, %ymm15
393
+ vpshufb %ymm12, %ymm1, %ymm1
394
+ vpshufb %ymm12, %ymm2, %ymm2
395
+ vpshufb %ymm12, %ymm3, %ymm3
396
+ vpsubw %ymm4, %ymm6, %ymm12
397
+ vpaddw %ymm6, %ymm4, %ymm4
398
+ vpsubw %ymm5, %ymm7, %ymm13
399
+ vpmullw %ymm15, %ymm12, %ymm6
400
+ vpaddw %ymm7, %ymm5, %ymm5
401
+ vpsubw %ymm8, %ymm10, %ymm14
402
+ vpmullw %ymm15, %ymm13, %ymm7
403
+ vpaddw %ymm10, %ymm8, %ymm8
404
+ vpsubw %ymm9, %ymm11, %ymm15
405
+ vpmullw %ymm1, %ymm14, %ymm10
406
+ vpaddw %ymm11, %ymm9, %ymm9
407
+ vpmullw %ymm1, %ymm15, %ymm11
408
+ vpmulhw %ymm2, %ymm12, %ymm12
409
+ vpmulhw %ymm2, %ymm13, %ymm13
410
+ vpmulhw %ymm3, %ymm14, %ymm14
411
+ vpmulhw %ymm3, %ymm15, %ymm15
412
+ vpmulhw %ymm0, %ymm6, %ymm6
413
+ vpmulhw %ymm0, %ymm7, %ymm7
414
+ vpmulhw %ymm0, %ymm10, %ymm10
415
+ vpmulhw %ymm0, %ymm11, %ymm11
416
+ vpsubw %ymm6, %ymm12, %ymm6
417
+ vpsubw %ymm7, %ymm13, %ymm7
418
+ vpsubw %ymm10, %ymm14, %ymm10
419
+ vpsubw %ymm11, %ymm15, %ymm11
420
+ vpermq $0x4e, 0x160(%rsi), %ymm2 # ymm2 = mem[2,3,0,1]
421
+ vpermq $0x4e, 0x180(%rsi), %ymm3 # ymm3 = mem[2,3,0,1]
422
+ vmovdqa (%rsi), %ymm1
423
+ vpshufb %ymm1, %ymm2, %ymm2
424
+ vpshufb %ymm1, %ymm3, %ymm3
425
+ vpsubw %ymm4, %ymm8, %ymm12
426
+ vpaddw %ymm8, %ymm4, %ymm4
427
+ vpsubw %ymm5, %ymm9, %ymm13
428
+ vpmullw %ymm2, %ymm12, %ymm8
429
+ vpaddw %ymm9, %ymm5, %ymm5
430
+ vpsubw %ymm6, %ymm10, %ymm14
431
+ vpmullw %ymm2, %ymm13, %ymm9
432
+ vpaddw %ymm10, %ymm6, %ymm6
433
+ vpsubw %ymm7, %ymm11, %ymm15
434
+ vpmullw %ymm2, %ymm14, %ymm10
435
+ vpaddw %ymm11, %ymm7, %ymm7
436
+ vpmullw %ymm2, %ymm15, %ymm11
437
+ vpmulhw %ymm3, %ymm12, %ymm12
438
+ vpmulhw %ymm3, %ymm13, %ymm13
439
+ vpmulhw %ymm3, %ymm14, %ymm14
440
+ vpmulhw %ymm3, %ymm15, %ymm15
441
+ vpmulhw %ymm0, %ymm8, %ymm8
442
+ vpmulhw %ymm0, %ymm9, %ymm9
443
+ vpmulhw %ymm0, %ymm10, %ymm10
444
+ vpmulhw %ymm0, %ymm11, %ymm11
445
+ vpsubw %ymm8, %ymm12, %ymm8
446
+ vpsubw %ymm9, %ymm13, %ymm9
447
+ vpsubw %ymm10, %ymm14, %ymm10
448
+ vpsubw %ymm11, %ymm15, %ymm11
449
+ vpslld $0x10, %ymm5, %ymm3
450
+ vpblendw $0xaa, %ymm3, %ymm4, %ymm3 # ymm3 = ymm4[0],ymm3[1],ymm4[2],ymm3[3],ymm4[4],ymm3[5],ymm4[6],ymm3[7],ymm4[8],ymm3[9],ymm4[10],ymm3[11],ymm4[12],ymm3[13],ymm4[14],ymm3[15]
451
+ vpsrld $0x10, %ymm4, %ymm4
452
+ vpblendw $0xaa, %ymm5, %ymm4, %ymm5 # ymm5 = ymm4[0],ymm5[1],ymm4[2],ymm5[3],ymm4[4],ymm5[5],ymm4[6],ymm5[7],ymm4[8],ymm5[9],ymm4[10],ymm5[11],ymm4[12],ymm5[13],ymm4[14],ymm5[15]
453
+ vpslld $0x10, %ymm7, %ymm4
454
+ vpblendw $0xaa, %ymm4, %ymm6, %ymm4 # ymm4 = ymm6[0],ymm4[1],ymm6[2],ymm4[3],ymm6[4],ymm4[5],ymm6[6],ymm4[7],ymm6[8],ymm4[9],ymm6[10],ymm4[11],ymm6[12],ymm4[13],ymm6[14],ymm4[15]
455
+ vpsrld $0x10, %ymm6, %ymm6
456
+ vpblendw $0xaa, %ymm7, %ymm6, %ymm7 # ymm7 = ymm6[0],ymm7[1],ymm6[2],ymm7[3],ymm6[4],ymm7[5],ymm6[6],ymm7[7],ymm6[8],ymm7[9],ymm6[10],ymm7[11],ymm6[12],ymm7[13],ymm6[14],ymm7[15]
457
+ vpslld $0x10, %ymm9, %ymm6
458
+ vpblendw $0xaa, %ymm6, %ymm8, %ymm6 # ymm6 = ymm8[0],ymm6[1],ymm8[2],ymm6[3],ymm8[4],ymm6[5],ymm8[6],ymm6[7],ymm8[8],ymm6[9],ymm8[10],ymm6[11],ymm8[12],ymm6[13],ymm8[14],ymm6[15]
459
+ vpsrld $0x10, %ymm8, %ymm8
460
+ vpblendw $0xaa, %ymm9, %ymm8, %ymm9 # ymm9 = ymm8[0],ymm9[1],ymm8[2],ymm9[3],ymm8[4],ymm9[5],ymm8[6],ymm9[7],ymm8[8],ymm9[9],ymm8[10],ymm9[11],ymm8[12],ymm9[13],ymm8[14],ymm9[15]
461
+ vpslld $0x10, %ymm11, %ymm8
462
+ vpblendw $0xaa, %ymm8, %ymm10, %ymm8 # ymm8 = ymm10[0],ymm8[1],ymm10[2],ymm8[3],ymm10[4],ymm8[5],ymm10[6],ymm8[7],ymm10[8],ymm8[9],ymm10[10],ymm8[11],ymm10[12],ymm8[13],ymm10[14],ymm8[15]
463
+ vpsrld $0x10, %ymm10, %ymm10
464
+ vpblendw $0xaa, %ymm11, %ymm10, %ymm11 # ymm11 = ymm10[0],ymm11[1],ymm10[2],ymm11[3],ymm10[4],ymm11[5],ymm10[6],ymm11[7],ymm10[8],ymm11[9],ymm10[10],ymm11[11],ymm10[12],ymm11[13],ymm10[14],ymm11[15]
465
+ vmovdqa 0x20(%rsi), %ymm12
466
+ vpermd 0x120(%rsi), %ymm12, %ymm2
467
+ vpermd 0x140(%rsi), %ymm12, %ymm10
468
+ vpsubw %ymm3, %ymm5, %ymm12
469
+ vpaddw %ymm5, %ymm3, %ymm3
470
+ vpsubw %ymm4, %ymm7, %ymm13
471
+ vpmullw %ymm2, %ymm12, %ymm5
472
+ vpaddw %ymm7, %ymm4, %ymm4
473
+ vpsubw %ymm6, %ymm9, %ymm14
474
+ vpmullw %ymm2, %ymm13, %ymm7
475
+ vpaddw %ymm9, %ymm6, %ymm6
476
+ vpsubw %ymm8, %ymm11, %ymm15
477
+ vpmullw %ymm2, %ymm14, %ymm9
478
+ vpaddw %ymm11, %ymm8, %ymm8
479
+ vpmullw %ymm2, %ymm15, %ymm11
480
+ vpmulhw %ymm10, %ymm12, %ymm12
481
+ vpmulhw %ymm10, %ymm13, %ymm13
482
+ vpmulhw %ymm10, %ymm14, %ymm14
483
+ vpmulhw %ymm10, %ymm15, %ymm15
484
+ vpmulhw %ymm0, %ymm5, %ymm5
485
+ vpmulhw %ymm0, %ymm7, %ymm7
486
+ vpmulhw %ymm0, %ymm9, %ymm9
487
+ vpmulhw %ymm0, %ymm11, %ymm11
488
+ vpsubw %ymm5, %ymm12, %ymm5
489
+ vpsubw %ymm7, %ymm13, %ymm7
490
+ vpsubw %ymm9, %ymm14, %ymm9
491
+ vpsubw %ymm11, %ymm15, %ymm11
492
+ movl $0x4ebf4ebf, %eax # imm = 0x4EBF4EBF
493
+ vmovd %eax, %xmm1
494
+ vpbroadcastd %xmm1, %ymm1
495
+ vpmulhw %ymm1, %ymm3, %ymm12
496
+ vpsraw $0xa, %ymm12, %ymm12
497
+ vpmullw %ymm0, %ymm12, %ymm12
498
+ vpsubw %ymm12, %ymm3, %ymm3
499
+ vmovsldup %ymm4, %ymm10 # ymm10 = ymm4[0,0,2,2,4,4,6,6]
500
+ vpblendd $0xaa, %ymm10, %ymm3, %ymm10 # ymm10 = ymm3[0],ymm10[1],ymm3[2],ymm10[3],ymm3[4],ymm10[5],ymm3[6],ymm10[7]
501
+ vpsrlq $0x20, %ymm3, %ymm3
502
+ vpblendd $0xaa, %ymm4, %ymm3, %ymm4 # ymm4 = ymm3[0],ymm4[1],ymm3[2],ymm4[3],ymm3[4],ymm4[5],ymm3[6],ymm4[7]
503
+ vmovsldup %ymm8, %ymm3 # ymm3 = ymm8[0,0,2,2,4,4,6,6]
504
+ vpblendd $0xaa, %ymm3, %ymm6, %ymm3 # ymm3 = ymm6[0],ymm3[1],ymm6[2],ymm3[3],ymm6[4],ymm3[5],ymm6[6],ymm3[7]
505
+ vpsrlq $0x20, %ymm6, %ymm6
506
+ vpblendd $0xaa, %ymm8, %ymm6, %ymm8 # ymm8 = ymm6[0],ymm8[1],ymm6[2],ymm8[3],ymm6[4],ymm8[5],ymm6[6],ymm8[7]
507
+ vmovsldup %ymm7, %ymm6 # ymm6 = ymm7[0,0,2,2,4,4,6,6]
508
+ vpblendd $0xaa, %ymm6, %ymm5, %ymm6 # ymm6 = ymm5[0],ymm6[1],ymm5[2],ymm6[3],ymm5[4],ymm6[5],ymm5[6],ymm6[7]
509
+ vpsrlq $0x20, %ymm5, %ymm5
510
+ vpblendd $0xaa, %ymm7, %ymm5, %ymm7 # ymm7 = ymm5[0],ymm7[1],ymm5[2],ymm7[3],ymm5[4],ymm7[5],ymm5[6],ymm7[7]
511
+ vmovsldup %ymm11, %ymm5 # ymm5 = ymm11[0,0,2,2,4,4,6,6]
512
+ vpblendd $0xaa, %ymm5, %ymm9, %ymm5 # ymm5 = ymm9[0],ymm5[1],ymm9[2],ymm5[3],ymm9[4],ymm5[5],ymm9[6],ymm5[7]
513
+ vpsrlq $0x20, %ymm9, %ymm9
514
+ vpblendd $0xaa, %ymm11, %ymm9, %ymm11 # ymm11 = ymm9[0],ymm11[1],ymm9[2],ymm11[3],ymm9[4],ymm11[5],ymm9[6],ymm11[7]
515
+ vpermq $0x1b, 0xe0(%rsi), %ymm2 # ymm2 = mem[3,2,1,0]
516
+ vpermq $0x1b, 0x100(%rsi), %ymm9 # ymm9 = mem[3,2,1,0]
517
+ vpsubw %ymm10, %ymm4, %ymm12
518
+ vpaddw %ymm4, %ymm10, %ymm10
519
+ vpsubw %ymm3, %ymm8, %ymm13
520
+ vpmullw %ymm2, %ymm12, %ymm4
521
+ vpaddw %ymm8, %ymm3, %ymm3
522
+ vpsubw %ymm6, %ymm7, %ymm14
523
+ vpmullw %ymm2, %ymm13, %ymm8
524
+ vpaddw %ymm7, %ymm6, %ymm6
525
+ vpsubw %ymm5, %ymm11, %ymm15
526
+ vpmullw %ymm2, %ymm14, %ymm7
527
+ vpaddw %ymm11, %ymm5, %ymm5
528
+ vpmullw %ymm2, %ymm15, %ymm11
529
+ vpmulhw %ymm9, %ymm12, %ymm12
530
+ vpmulhw %ymm9, %ymm13, %ymm13
531
+ vpmulhw %ymm9, %ymm14, %ymm14
532
+ vpmulhw %ymm9, %ymm15, %ymm15
533
+ vpmulhw %ymm0, %ymm4, %ymm4
534
+ vpmulhw %ymm0, %ymm8, %ymm8
535
+ vpmulhw %ymm0, %ymm7, %ymm7
536
+ vpmulhw %ymm0, %ymm11, %ymm11
537
+ vpsubw %ymm4, %ymm12, %ymm4
538
+ vpsubw %ymm8, %ymm13, %ymm8
539
+ vpsubw %ymm7, %ymm14, %ymm7
540
+ vpsubw %ymm11, %ymm15, %ymm11
541
+ vpmulhw %ymm1, %ymm10, %ymm12
542
+ vpsraw $0xa, %ymm12, %ymm12
543
+ vpmullw %ymm0, %ymm12, %ymm12
544
+ vpsubw %ymm12, %ymm10, %ymm10
545
+ vpunpcklqdq %ymm3, %ymm10, %ymm9 # ymm9 = ymm10[0],ymm3[0],ymm10[2],ymm3[2]
546
+ vpunpckhqdq %ymm3, %ymm10, %ymm3 # ymm3 = ymm10[1],ymm3[1],ymm10[3],ymm3[3]
547
+ vpunpcklqdq %ymm5, %ymm6, %ymm10 # ymm10 = ymm6[0],ymm5[0],ymm6[2],ymm5[2]
548
+ vpunpckhqdq %ymm5, %ymm6, %ymm5 # ymm5 = ymm6[1],ymm5[1],ymm6[3],ymm5[3]
549
+ vpunpcklqdq %ymm8, %ymm4, %ymm6 # ymm6 = ymm4[0],ymm8[0],ymm4[2],ymm8[2]
550
+ vpunpckhqdq %ymm8, %ymm4, %ymm8 # ymm8 = ymm4[1],ymm8[1],ymm4[3],ymm8[3]
551
+ vpunpcklqdq %ymm11, %ymm7, %ymm4 # ymm4 = ymm7[0],ymm11[0],ymm7[2],ymm11[2]
552
+ vpunpckhqdq %ymm11, %ymm7, %ymm11 # ymm11 = ymm7[1],ymm11[1],ymm7[3],ymm11[3]
553
+ vpermq $0x4e, 0xa0(%rsi), %ymm2 # ymm2 = mem[2,3,0,1]
554
+ vpermq $0x4e, 0xc0(%rsi), %ymm7 # ymm7 = mem[2,3,0,1]
555
+ vpsubw %ymm9, %ymm3, %ymm12
556
+ vpaddw %ymm3, %ymm9, %ymm9
557
+ vpsubw %ymm10, %ymm5, %ymm13
558
+ vpmullw %ymm2, %ymm12, %ymm3
559
+ vpaddw %ymm5, %ymm10, %ymm10
560
+ vpsubw %ymm6, %ymm8, %ymm14
561
+ vpmullw %ymm2, %ymm13, %ymm5
562
+ vpaddw %ymm8, %ymm6, %ymm6
563
+ vpsubw %ymm4, %ymm11, %ymm15
564
+ vpmullw %ymm2, %ymm14, %ymm8
565
+ vpaddw %ymm11, %ymm4, %ymm4
566
+ vpmullw %ymm2, %ymm15, %ymm11
567
+ vpmulhw %ymm7, %ymm12, %ymm12
568
+ vpmulhw %ymm7, %ymm13, %ymm13
569
+ vpmulhw %ymm7, %ymm14, %ymm14
570
+ vpmulhw %ymm7, %ymm15, %ymm15
571
+ vpmulhw %ymm0, %ymm3, %ymm3
572
+ vpmulhw %ymm0, %ymm5, %ymm5
573
+ vpmulhw %ymm0, %ymm8, %ymm8
574
+ vpmulhw %ymm0, %ymm11, %ymm11
575
+ vpsubw %ymm3, %ymm12, %ymm3
576
+ vpsubw %ymm5, %ymm13, %ymm5
577
+ vpsubw %ymm8, %ymm14, %ymm8
578
+ vpsubw %ymm11, %ymm15, %ymm11
579
+ vpmulhw %ymm1, %ymm9, %ymm12
580
+ vpsraw $0xa, %ymm12, %ymm12
581
+ vpmullw %ymm0, %ymm12, %ymm12
582
+ vpsubw %ymm12, %ymm9, %ymm9
583
+ vperm2i128 $0x20, %ymm10, %ymm9, %ymm7 # ymm7 = ymm9[0,1],ymm10[0,1]
584
+ vperm2i128 $0x31, %ymm10, %ymm9, %ymm10 # ymm10 = ymm9[2,3],ymm10[2,3]
585
+ vperm2i128 $0x20, %ymm4, %ymm6, %ymm9 # ymm9 = ymm6[0,1],ymm4[0,1]
586
+ vperm2i128 $0x31, %ymm4, %ymm6, %ymm4 # ymm4 = ymm6[2,3],ymm4[2,3]
587
+ vperm2i128 $0x20, %ymm5, %ymm3, %ymm6 # ymm6 = ymm3[0,1],ymm5[0,1]
588
+ vperm2i128 $0x31, %ymm5, %ymm3, %ymm5 # ymm5 = ymm3[2,3],ymm5[2,3]
589
+ vperm2i128 $0x20, %ymm11, %ymm8, %ymm3 # ymm3 = ymm8[0,1],ymm11[0,1]
590
+ vperm2i128 $0x31, %ymm11, %ymm8, %ymm11 # ymm11 = ymm8[2,3],ymm11[2,3]
591
+ vmovdqa 0x60(%rsi), %ymm2
592
+ vmovdqa 0x80(%rsi), %ymm8
593
+ vpsubw %ymm7, %ymm10, %ymm12
594
+ vpaddw %ymm10, %ymm7, %ymm7
595
+ vpsubw %ymm9, %ymm4, %ymm13
596
+ vpmullw %ymm2, %ymm12, %ymm10
597
+ vpaddw %ymm4, %ymm9, %ymm9
598
+ vpsubw %ymm6, %ymm5, %ymm14
599
+ vpmullw %ymm2, %ymm13, %ymm4
600
+ vpaddw %ymm5, %ymm6, %ymm6
601
+ vpsubw %ymm3, %ymm11, %ymm15
602
+ vpmullw %ymm2, %ymm14, %ymm5
603
+ vpaddw %ymm11, %ymm3, %ymm3
604
+ vpmullw %ymm2, %ymm15, %ymm11
605
+ vpmulhw %ymm8, %ymm12, %ymm12
606
+ vpmulhw %ymm8, %ymm13, %ymm13
607
+ vpmulhw %ymm8, %ymm14, %ymm14
608
+ vpmulhw %ymm8, %ymm15, %ymm15
609
+ vpmulhw %ymm0, %ymm10, %ymm10
610
+ vpmulhw %ymm0, %ymm4, %ymm4
611
+ vpmulhw %ymm0, %ymm5, %ymm5
612
+ vpmulhw %ymm0, %ymm11, %ymm11
613
+ vpsubw %ymm10, %ymm12, %ymm10
614
+ vpsubw %ymm4, %ymm13, %ymm4
615
+ vpsubw %ymm5, %ymm14, %ymm5
616
+ vpsubw %ymm11, %ymm15, %ymm11
617
+ vpmulhw %ymm1, %ymm7, %ymm12
618
+ vpsraw $0xa, %ymm12, %ymm12
619
+ vpmullw %ymm0, %ymm12, %ymm12
620
+ vpsubw %ymm12, %ymm7, %ymm7
621
+ vmovdqa %ymm7, 0x100(%rdi)
622
+ vmovdqa %ymm9, 0x120(%rdi)
623
+ vmovdqa %ymm6, 0x140(%rdi)
624
+ vmovdqa %ymm3, 0x160(%rdi)
625
+ vmovdqa %ymm10, 0x180(%rdi)
626
+ vmovdqa %ymm4, 0x1a0(%rdi)
627
+ vmovdqa %ymm5, 0x1c0(%rdi)
628
+ vmovdqa %ymm11, 0x1e0(%rdi)
629
+ vmovdqa (%rdi), %ymm4
630
+ vmovdqa 0x100(%rdi), %ymm8
631
+ vmovdqa 0x20(%rdi), %ymm5
632
+ vmovdqa 0x120(%rdi), %ymm9
633
+ vpbroadcastq 0x40(%rsi), %ymm2
634
+ vmovdqa 0x40(%rdi), %ymm6
635
+ vmovdqa 0x140(%rdi), %ymm10
636
+ vmovdqa 0x60(%rdi), %ymm7
637
+ vmovdqa 0x160(%rdi), %ymm11
638
+ vpbroadcastq 0x48(%rsi), %ymm3
639
+ vpsubw %ymm4, %ymm8, %ymm12
640
+ vpaddw %ymm8, %ymm4, %ymm4
641
+ vpsubw %ymm5, %ymm9, %ymm13
642
+ vpmullw %ymm2, %ymm12, %ymm8
643
+ vpaddw %ymm9, %ymm5, %ymm5
644
+ vpsubw %ymm6, %ymm10, %ymm14
645
+ vpmullw %ymm2, %ymm13, %ymm9
646
+ vpaddw %ymm10, %ymm6, %ymm6
647
+ vpsubw %ymm7, %ymm11, %ymm15
648
+ vpmullw %ymm2, %ymm14, %ymm10
649
+ vpaddw %ymm11, %ymm7, %ymm7
650
+ vpmullw %ymm2, %ymm15, %ymm11
651
+ vpmulhw %ymm3, %ymm12, %ymm12
652
+ vpmulhw %ymm3, %ymm13, %ymm13
653
+ vpmulhw %ymm3, %ymm14, %ymm14
654
+ vpmulhw %ymm3, %ymm15, %ymm15
655
+ vpmulhw %ymm0, %ymm8, %ymm8
656
+ vpmulhw %ymm0, %ymm9, %ymm9
657
+ vpmulhw %ymm0, %ymm10, %ymm10
658
+ vpmulhw %ymm0, %ymm11, %ymm11
659
+ vpsubw %ymm8, %ymm12, %ymm8
660
+ vpsubw %ymm9, %ymm13, %ymm9
661
+ vpsubw %ymm10, %ymm14, %ymm10
662
+ vpsubw %ymm11, %ymm15, %ymm11
663
+ vmovdqa %ymm4, (%rdi)
664
+ vmovdqa %ymm5, 0x20(%rdi)
665
+ vmovdqa %ymm6, 0x40(%rdi)
666
+ vmovdqa %ymm7, 0x60(%rdi)
667
+ vmovdqa %ymm8, 0x100(%rdi)
668
+ vmovdqa %ymm9, 0x120(%rdi)
669
+ vmovdqa %ymm10, 0x140(%rdi)
670
+ vmovdqa %ymm11, 0x160(%rdi)
671
+ vmovdqa 0x80(%rdi), %ymm4
672
+ vmovdqa 0x180(%rdi), %ymm8
673
+ vmovdqa 0xa0(%rdi), %ymm5
674
+ vmovdqa 0x1a0(%rdi), %ymm9
675
+ vpbroadcastq 0x40(%rsi), %ymm2
676
+ vmovdqa 0xc0(%rdi), %ymm6
677
+ vmovdqa 0x1c0(%rdi), %ymm10
678
+ vmovdqa 0xe0(%rdi), %ymm7
679
+ vmovdqa 0x1e0(%rdi), %ymm11
680
+ vpbroadcastq 0x48(%rsi), %ymm3
681
+ vpsubw %ymm4, %ymm8, %ymm12
682
+ vpaddw %ymm8, %ymm4, %ymm4
683
+ vpsubw %ymm5, %ymm9, %ymm13
684
+ vpmullw %ymm2, %ymm12, %ymm8
685
+ vpaddw %ymm9, %ymm5, %ymm5
686
+ vpsubw %ymm6, %ymm10, %ymm14
687
+ vpmullw %ymm2, %ymm13, %ymm9
688
+ vpaddw %ymm10, %ymm6, %ymm6
689
+ vpsubw %ymm7, %ymm11, %ymm15
690
+ vpmullw %ymm2, %ymm14, %ymm10
691
+ vpaddw %ymm11, %ymm7, %ymm7
692
+ vpmullw %ymm2, %ymm15, %ymm11
693
+ vpmulhw %ymm3, %ymm12, %ymm12
694
+ vpmulhw %ymm3, %ymm13, %ymm13
695
+ vpmulhw %ymm3, %ymm14, %ymm14
696
+ vpmulhw %ymm3, %ymm15, %ymm15
697
+ vpmulhw %ymm0, %ymm8, %ymm8
698
+ vpmulhw %ymm0, %ymm9, %ymm9
699
+ vpmulhw %ymm0, %ymm10, %ymm10
700
+ vpmulhw %ymm0, %ymm11, %ymm11
701
+ vpsubw %ymm8, %ymm12, %ymm8
702
+ vpsubw %ymm9, %ymm13, %ymm9
703
+ vpsubw %ymm10, %ymm14, %ymm10
704
+ vpsubw %ymm11, %ymm15, %ymm11
705
+ vmovdqa %ymm4, 0x80(%rdi)
706
+ vmovdqa %ymm5, 0xa0(%rdi)
707
+ vmovdqa %ymm6, 0xc0(%rdi)
708
+ vmovdqa %ymm7, 0xe0(%rdi)
709
+ vmovdqa %ymm8, 0x180(%rdi)
710
+ vmovdqa %ymm9, 0x1a0(%rdi)
711
+ vmovdqa %ymm10, 0x1c0(%rdi)
712
+ vmovdqa %ymm11, 0x1e0(%rdi)
713
+ retq
714
+ .cfi_endproc
715
+
716
+ MLK_ASM_FN_SIZE(invntt_avx2)
717
+
718
+ #endif /* MLK_ARITH_BACKEND_X86_64_DEFAULT && !MLK_CONFIG_MULTILEVEL_NO_SHARED \
719
+ */