pq_crypto 0.6.0 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (154) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +12 -0
  3. data/SECURITY.md +7 -0
  4. data/ext/pqcrypto/extconf.rb +2 -0
  5. data/ext/pqcrypto/pqcrypto_ruby_secure.c +139 -0
  6. data/ext/pqcrypto/pqcrypto_secure.c +532 -0
  7. data/ext/pqcrypto/pqcrypto_secure.h +20 -0
  8. data/ext/pqcrypto/pqcrypto_version.h +1 -1
  9. data/ext/pqcrypto/vendor/.vendored +4 -4
  10. data/ext/pqcrypto/vendor/mldsa-native/README.md +23 -10
  11. data/ext/pqcrypto/vendor/mldsa-native/mldsa/README.md +23 -0
  12. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.c +114 -58
  13. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.h +498 -461
  14. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_asm.S +145 -85
  15. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_config.h +456 -422
  16. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/cbmc.h +47 -25
  17. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/common.h +26 -14
  18. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/ct.h +56 -81
  19. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/debug.h +17 -24
  20. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.c +33 -40
  21. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.h +67 -87
  22. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.c +19 -14
  23. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.h +13 -5
  24. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.c +84 -10
  25. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.h +10 -5
  26. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/auto.h +6 -0
  27. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/fips202_native_aarch64.h +22 -15
  28. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_aarch64_asm.S +376 -0
  29. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_aarch64_asm.S +204 -0
  30. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_aarch64_asm.S +259 -0
  31. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_aarch64_asm.S +1077 -0
  32. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_aarch64_asm.S +987 -0
  33. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccakf1600_round_constants.c +16 -10
  34. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_scalar.h +2 -1
  35. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_v84a.h +1 -1
  36. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x2_v84a.h +4 -2
  37. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_scalar.h +2 -2
  38. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_v84a_scalar.h +1 -1
  39. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/api.h +60 -0
  40. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/mve.h +48 -0
  41. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/fips202_native_armv81m.h +18 -1
  42. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S +658 -582
  43. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.c +5 -100
  44. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccakf1600_round_constants.c +26 -25
  45. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/state_extract_bytes_x4_mve.S +334 -0
  46. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/state_xor_bytes_x4_mve.S +355 -0
  47. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/auto.h +8 -3
  48. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/{xkcp.h → keccak_f1600_x4_avx2.h} +11 -8
  49. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/fips202_native_x86_64.h +44 -0
  50. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/keccak_f1600_x4_avx2_asm.S +454 -0
  51. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/keccakf1600_constants.c +52 -0
  52. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/meta.h +37 -28
  53. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/aarch64_zetas.c +213 -196
  54. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/arith_native_aarch64.h +248 -64
  55. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/intt_aarch64_asm.S +753 -0
  56. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l4_aarch64_asm.S +129 -0
  57. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l5_aarch64_asm.S +145 -0
  58. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l7_aarch64_asm.S +177 -0
  59. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/ntt_aarch64_asm.S +653 -0
  60. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/pointwise_montgomery_aarch64_asm.S +84 -0
  61. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_caddq_aarch64_asm.S +53 -0
  62. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_chknorm_aarch64_asm.S +55 -0
  63. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_32_aarch64_asm.S +86 -0
  64. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_88_aarch64_asm.S +86 -0
  65. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_32_aarch64_asm.S +103 -0
  66. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_88_aarch64_asm.S +111 -0
  67. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_17_aarch64_asm.S +75 -0
  68. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_19_aarch64_asm.S +72 -0
  69. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_table.c +23 -11
  70. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_aarch64_asm.S +189 -0
  71. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta2_aarch64_asm.S +137 -0
  72. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta4_aarch64_asm.S +130 -0
  73. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta_table.c +520 -516
  74. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_table.c +34 -33
  75. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/api.h +202 -242
  76. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/meta.h +25 -17
  77. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/arith_native_x86_64.h +112 -28
  78. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.c +1 -1
  79. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.h +1 -1
  80. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/intt_avx2_asm.S +2311 -0
  81. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/ntt_avx2_asm.S +2383 -0
  82. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/nttunpack_avx2_asm.S +238 -0
  83. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l4_avx2_asm.S +139 -0
  84. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l5_avx2_asm.S +155 -0
  85. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l7_avx2_asm.S +187 -0
  86. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_avx2_asm.S +130 -0
  87. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_caddq_avx2_asm.S +190 -0
  88. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_32_avx2.c +6 -4
  89. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_88_avx2.c +6 -4
  90. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_32_avx2.c +9 -8
  91. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_88_avx2.c +10 -9
  92. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_17_avx2.c +8 -5
  93. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_19_avx2.c +8 -5
  94. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta2_avx2.c +6 -4
  95. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta4_avx2.c +6 -4
  96. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_table.c +130 -129
  97. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.c +109 -180
  98. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.h +169 -150
  99. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.c +56 -40
  100. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.h +149 -164
  101. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.c +52 -57
  102. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.h +132 -167
  103. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.c +57 -424
  104. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.h +167 -474
  105. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec_lazy.c +308 -0
  106. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec_lazy.h +653 -0
  107. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/reduce.h +22 -29
  108. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/rounding.h +37 -43
  109. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.c +511 -367
  110. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.h +456 -417
  111. data/lib/pq_crypto/hybrid_kem.rb +1 -1
  112. data/lib/pq_crypto/internal.rb +23 -0
  113. data/lib/pq_crypto/kem.rb +27 -34
  114. data/lib/pq_crypto/pkcs8/der.rb +68 -0
  115. data/lib/pq_crypto/pkcs8/private_key_choice.rb +186 -0
  116. data/lib/pq_crypto/pkcs8.rb +51 -468
  117. data/lib/pq_crypto/serialization.rb +19 -29
  118. data/lib/pq_crypto/signature.rb +28 -35
  119. data/lib/pq_crypto/version.rb +1 -1
  120. data/lib/pq_crypto.rb +10 -0
  121. data/script/vendor_libs.rb +3 -3
  122. metadata +44 -35
  123. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_asm.S +0 -376
  124. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_asm.S +0 -204
  125. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_asm.S +0 -259
  126. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_asm.S +0 -1077
  127. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S +0 -987
  128. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.c +0 -488
  129. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.h +0 -16
  130. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/intt.S +0 -753
  131. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l4.S +0 -129
  132. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l5.S +0 -145
  133. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l7.S +0 -177
  134. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/ntt.S +0 -653
  135. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/pointwise_montgomery.S +0 -79
  136. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_caddq_asm.S +0 -53
  137. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_chknorm_asm.S +0 -55
  138. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_32_asm.S +0 -85
  139. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_88_asm.S +0 -85
  140. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_32_asm.S +0 -102
  141. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_88_asm.S +0 -110
  142. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_17_asm.S +0 -72
  143. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_19_asm.S +0 -69
  144. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_asm.S +0 -189
  145. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta2_asm.S +0 -135
  146. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta4_asm.S +0 -128
  147. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/intt.S +0 -2311
  148. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/ntt.S +0 -2383
  149. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/nttunpack.S +0 -239
  150. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise.S +0 -131
  151. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l4.S +0 -139
  152. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l5.S +0 -155
  153. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l7.S +0 -187
  154. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_caddq_avx2.c +0 -61
@@ -0,0 +1,454 @@
1
+ /*
2
+ * Copyright (c) The mlkem-native project authors
3
+ * Copyright (c) The mldsa-native project authors
4
+ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
5
+ */
6
+ #include "../../../../common.h"
7
+
8
+ #if defined(MLD_FIPS202_X86_64_NEED_X4_AVX2) && \
9
+ !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED)
10
+
11
+ /*
12
+ * WARNING: This file is auto-derived from the mldsa-native source file
13
+ * dev/fips202/x86_64/src/keccak_f1600_x4_avx2_asm.S using scripts/simpasm. Do not modify it directly.
14
+ */
15
+
16
+ .text
17
+ .balign 4
18
+ .global MLD_ASM_NAMESPACE(keccak_f1600_x4_avx2_asm)
19
+ MLD_ASM_FN_SYMBOL(keccak_f1600_x4_avx2_asm)
20
+
21
+ .cfi_startproc
22
+ movq %rsp, %r11
23
+ .cfi_def_cfa_register %r11
24
+ andq $-0x20, %rsp
25
+ subq $0x300, %rsp # imm = 0x300
26
+ vmovdqu (%rdi), %ymm0
27
+ vmovdqu 0xc8(%rdi), %ymm3
28
+ vmovdqu 0x190(%rdi), %ymm1
29
+ vmovdqu 0x258(%rdi), %ymm4
30
+ vpunpcklqdq %ymm3, %ymm0, %ymm2 # ymm2 = ymm0[0],ymm3[0],ymm0[2],ymm3[2]
31
+ vpunpckhqdq %ymm3, %ymm0, %ymm0 # ymm0 = ymm0[1],ymm3[1],ymm0[3],ymm3[3]
32
+ vpunpcklqdq %ymm4, %ymm1, %ymm3 # ymm3 = ymm1[0],ymm4[0],ymm1[2],ymm4[2]
33
+ vperm2i128 $0x20, %ymm3, %ymm2, %ymm7 # ymm7 = ymm2[0,1],ymm3[0,1]
34
+ vpunpckhqdq %ymm4, %ymm1, %ymm1 # ymm1 = ymm1[1],ymm4[1],ymm1[3],ymm4[3]
35
+ vperm2i128 $0x31, %ymm3, %ymm2, %ymm3 # ymm3 = ymm2[2,3],ymm3[2,3]
36
+ vmovdqu 0x278(%rdi), %ymm4
37
+ vmovdqu %ymm3, 0x40(%rsp)
38
+ vperm2i128 $0x31, %ymm1, %ymm0, %ymm3 # ymm3 = ymm0[2,3],ymm1[2,3]
39
+ vmovdqu %ymm7, (%rsp)
40
+ vperm2i128 $0x20, %ymm1, %ymm0, %ymm7 # ymm7 = ymm0[0,1],ymm1[0,1]
41
+ vmovdqu 0x20(%rdi), %ymm0
42
+ vmovdqu 0x1b0(%rdi), %ymm1
43
+ vmovdqu %ymm3, 0x60(%rsp)
44
+ vmovdqu 0xe8(%rdi), %ymm3
45
+ vmovdqu %ymm7, 0x20(%rsp)
46
+ vpunpcklqdq %ymm3, %ymm0, %ymm2 # ymm2 = ymm0[0],ymm3[0],ymm0[2],ymm3[2]
47
+ vpunpckhqdq %ymm3, %ymm0, %ymm0 # ymm0 = ymm0[1],ymm3[1],ymm0[3],ymm3[3]
48
+ vpunpcklqdq %ymm4, %ymm1, %ymm3 # ymm3 = ymm1[0],ymm4[0],ymm1[2],ymm4[2]
49
+ vperm2i128 $0x20, %ymm3, %ymm2, %ymm7 # ymm7 = ymm2[0,1],ymm3[0,1]
50
+ vpunpckhqdq %ymm4, %ymm1, %ymm1 # ymm1 = ymm1[1],ymm4[1],ymm1[3],ymm4[3]
51
+ vperm2i128 $0x31, %ymm3, %ymm2, %ymm3 # ymm3 = ymm2[2,3],ymm3[2,3]
52
+ vmovdqu 0x298(%rdi), %ymm4
53
+ vperm2i128 $0x31, %ymm1, %ymm0, %ymm14 # ymm14 = ymm0[2,3],ymm1[2,3]
54
+ vmovdqu %ymm7, 0x80(%rsp)
55
+ vperm2i128 $0x20, %ymm1, %ymm0, %ymm7 # ymm7 = ymm0[0,1],ymm1[0,1]
56
+ vmovdqu 0x40(%rdi), %ymm0
57
+ vmovdqu 0x1d0(%rdi), %ymm1
58
+ vmovdqu %ymm3, 0xc0(%rsp)
59
+ vmovdqu 0x108(%rdi), %ymm3
60
+ vmovdqu %ymm14, %ymm10
61
+ vmovdqu %ymm7, 0xa0(%rsp)
62
+ vpunpcklqdq %ymm3, %ymm0, %ymm2 # ymm2 = ymm0[0],ymm3[0],ymm0[2],ymm3[2]
63
+ vpunpckhqdq %ymm3, %ymm0, %ymm0 # ymm0 = ymm0[1],ymm3[1],ymm0[3],ymm3[3]
64
+ vpunpcklqdq %ymm4, %ymm1, %ymm3 # ymm3 = ymm1[0],ymm4[0],ymm1[2],ymm4[2]
65
+ vpunpckhqdq %ymm4, %ymm1, %ymm1 # ymm1 = ymm1[1],ymm4[1],ymm1[3],ymm4[3]
66
+ vperm2i128 $0x20, %ymm3, %ymm2, %ymm11 # ymm11 = ymm2[0,1],ymm3[0,1]
67
+ vperm2i128 $0x31, %ymm3, %ymm2, %ymm3 # ymm3 = ymm2[2,3],ymm3[2,3]
68
+ vperm2i128 $0x20, %ymm1, %ymm0, %ymm7 # ymm7 = ymm0[0,1],ymm1[0,1]
69
+ vmovdqu %ymm3, 0x100(%rsp)
70
+ vperm2i128 $0x31, %ymm1, %ymm0, %ymm8 # ymm8 = ymm0[2,3],ymm1[2,3]
71
+ vmovdqu 0x128(%rdi), %ymm3
72
+ vmovdqu 0x60(%rdi), %ymm0
73
+ vmovdqu 0x1f0(%rdi), %ymm1
74
+ vmovdqu %ymm7, 0xe0(%rsp)
75
+ vmovdqu %ymm11, %ymm14
76
+ vmovdqu 0x2b8(%rdi), %ymm4
77
+ vmovdqu 0x2f8(%rdi), %ymm5
78
+ vpunpcklqdq %ymm3, %ymm0, %ymm2 # ymm2 = ymm0[0],ymm3[0],ymm0[2],ymm3[2]
79
+ vpunpckhqdq %ymm3, %ymm0, %ymm0 # ymm0 = ymm0[1],ymm3[1],ymm0[3],ymm3[3]
80
+ vpunpcklqdq %ymm4, %ymm1, %ymm3 # ymm3 = ymm1[0],ymm4[0],ymm1[2],ymm4[2]
81
+ vpunpckhqdq %ymm4, %ymm1, %ymm1 # ymm1 = ymm1[1],ymm4[1],ymm1[3],ymm4[3]
82
+ vmovdqu 0x2d8(%rdi), %ymm4
83
+ vperm2i128 $0x20, %ymm3, %ymm2, %ymm15 # ymm15 = ymm2[0,1],ymm3[0,1]
84
+ vperm2i128 $0x31, %ymm3, %ymm2, %ymm3 # ymm3 = ymm2[2,3],ymm3[2,3]
85
+ vperm2i128 $0x20, %ymm1, %ymm0, %ymm7 # ymm7 = ymm0[0,1],ymm1[0,1]
86
+ vperm2i128 $0x31, %ymm1, %ymm0, %ymm9 # ymm9 = ymm0[2,3],ymm1[2,3]
87
+ vmovdqu %ymm3, 0x140(%rsp)
88
+ vmovdqu 0x80(%rdi), %ymm0
89
+ vmovdqu 0x148(%rdi), %ymm3
90
+ vmovdqu 0x210(%rdi), %ymm1
91
+ vmovdqu %ymm7, 0x120(%rsp)
92
+ vpunpcklqdq %ymm3, %ymm0, %ymm2 # ymm2 = ymm0[0],ymm3[0],ymm0[2],ymm3[2]
93
+ vpunpckhqdq %ymm3, %ymm0, %ymm0 # ymm0 = ymm0[1],ymm3[1],ymm0[3],ymm3[3]
94
+ vpunpcklqdq %ymm4, %ymm1, %ymm3 # ymm3 = ymm1[0],ymm4[0],ymm1[2],ymm4[2]
95
+ vpunpckhqdq %ymm4, %ymm1, %ymm1 # ymm1 = ymm1[1],ymm4[1],ymm1[3],ymm4[3]
96
+ vperm2i128 $0x20, %ymm3, %ymm2, %ymm7 # ymm7 = ymm2[0,1],ymm3[0,1]
97
+ vperm2i128 $0x31, %ymm3, %ymm2, %ymm13 # ymm13 = ymm2[2,3],ymm3[2,3]
98
+ vperm2i128 $0x31, %ymm1, %ymm0, %ymm3 # ymm3 = ymm0[2,3],ymm1[2,3]
99
+ vmovdqu %ymm7, 0x160(%rsp)
100
+ vperm2i128 $0x20, %ymm1, %ymm0, %ymm7 # ymm7 = ymm0[0,1],ymm1[0,1]
101
+ vmovdqu 0xa0(%rdi), %ymm0
102
+ vmovdqu 0x230(%rdi), %ymm1
103
+ vmovdqu %ymm3, 0x1a0(%rsp)
104
+ vmovdqu 0x168(%rdi), %ymm3
105
+ vpunpcklqdq %ymm5, %ymm1, %ymm4 # ymm4 = ymm1[0],ymm5[0],ymm1[2],ymm5[2]
106
+ vpunpckhqdq %ymm5, %ymm1, %ymm1 # ymm1 = ymm1[1],ymm5[1],ymm1[3],ymm5[3]
107
+ vmovdqu %ymm7, 0x180(%rsp)
108
+ vpunpcklqdq %ymm3, %ymm0, %ymm2 # ymm2 = ymm0[0],ymm3[0],ymm0[2],ymm3[2]
109
+ vpunpckhqdq %ymm3, %ymm0, %ymm0 # ymm0 = ymm0[1],ymm3[1],ymm0[3],ymm3[3]
110
+ vperm2i128 $0x20, %ymm4, %ymm2, %ymm12 # ymm12 = ymm2[0,1],ymm4[0,1]
111
+ vperm2i128 $0x20, %ymm1, %ymm0, %ymm3 # ymm3 = ymm0[0,1],ymm1[0,1]
112
+ vperm2i128 $0x31, %ymm4, %ymm2, %ymm7 # ymm7 = ymm2[2,3],ymm4[2,3]
113
+ vperm2i128 $0x31, %ymm1, %ymm0, %ymm4 # ymm4 = ymm0[2,3],ymm1[2,3]
114
+ vmovq 0x250(%rdi), %xmm0
115
+ vmovq 0xc0(%rdi), %xmm1
116
+ vmovdqu %ymm12, 0x1c0(%rsp)
117
+ vmovdqu %ymm4, 0x1e0(%rsp)
118
+ vpinsrq $0x1, 0x318(%rdi), %xmm0, %xmm0
119
+ vpinsrq $0x1, 0x188(%rdi), %xmm1, %xmm1
120
+ vinserti128 $0x1, %xmm0, %ymm1, %ymm2
121
+ movq $0x0, %r10
122
+
123
+ LLkeccak_f1600_x4_avx2_asm:
124
+ vmovdqu 0xa0(%rsp), %ymm4
125
+ vpxor 0x1c0(%rsp), %ymm9, %ymm0
126
+ vmovdqu %ymm9, 0x200(%rsp)
127
+ vmovdqu %ymm10, %ymm9
128
+ vmovdqu 0xc0(%rsp), %ymm11
129
+ vmovdqu 0x160(%rsp), %ymm12
130
+ vmovdqu %ymm3, 0x240(%rsp)
131
+ vpxor 0x100(%rsp), %ymm4, %ymm1
132
+ vmovdqu 0x40(%rsp), %ymm10
133
+ vmovdqu %ymm4, 0x220(%rsp)
134
+ vpxor %ymm3, %ymm12, %ymm12
135
+ vmovdqu 0x20(%rsp), %ymm6
136
+ vmovdqu 0x140(%rsp), %ymm4
137
+ vmovdqu %ymm14, 0x2a0(%rsp)
138
+ vpxor %ymm1, %ymm0, %ymm0
139
+ vpxor %ymm8, %ymm11, %ymm1
140
+ vpxor 0x180(%rsp), %ymm7, %ymm11
141
+ vmovdqu %ymm10, 0x280(%rsp)
142
+ vpxor %ymm1, %ymm12, %ymm12
143
+ vpxor %ymm15, %ymm9, %ymm1
144
+ vmovdqu 0xe0(%rsp), %ymm3
145
+ vmovdqu %ymm8, 0x260(%rsp)
146
+ vpxor %ymm1, %ymm11, %ymm11
147
+ vpxor 0x120(%rsp), %ymm14, %ymm1
148
+ vpxor %ymm6, %ymm12, %ymm12
149
+ vmovdqu 0x60(%rsp), %ymm8
150
+ vpxor %ymm10, %ymm11, %ymm11
151
+ vpxor 0x1e0(%rsp), %ymm13, %ymm10
152
+ vpxor %ymm4, %ymm3, %ymm3
153
+ vmovdqu %ymm4, 0x2c0(%rsp)
154
+ vpsrlq $0x3f, %ymm12, %ymm4
155
+ vpsrlq $0x3f, %ymm11, %ymm5
156
+ vpxor (%rsp), %ymm0, %ymm0
157
+ vpxor %ymm1, %ymm10, %ymm10
158
+ vmovdqu 0x80(%rsp), %ymm1
159
+ vpxor %ymm8, %ymm10, %ymm10
160
+ vmovdqu %ymm1, %ymm14
161
+ vpxor 0x1a0(%rsp), %ymm2, %ymm1
162
+ vmovdqu %ymm14, 0x2e0(%rsp)
163
+ vpxor %ymm3, %ymm1, %ymm1
164
+ vpsllq $0x1, %ymm12, %ymm3
165
+ vpor %ymm4, %ymm3, %ymm3
166
+ vpsllq $0x1, %ymm11, %ymm4
167
+ vpxor %ymm14, %ymm1, %ymm1
168
+ vpor %ymm5, %ymm4, %ymm4
169
+ vpsrlq $0x3f, %ymm10, %ymm14
170
+ vpxor %ymm1, %ymm3, %ymm3
171
+ vpsllq $0x1, %ymm10, %ymm5
172
+ vpxor %ymm0, %ymm4, %ymm4
173
+ vpor %ymm14, %ymm5, %ymm5
174
+ vpxor %ymm6, %ymm4, %ymm6
175
+ vpxor %ymm12, %ymm5, %ymm5
176
+ vpsrlq $0x3f, %ymm1, %ymm12
177
+ vpsllq $0x1, %ymm1, %ymm1
178
+ vpxor %ymm7, %ymm5, %ymm7
179
+ vpxor %ymm9, %ymm5, %ymm9
180
+ vpor %ymm12, %ymm1, %ymm1
181
+ vpxor (%rsp), %ymm3, %ymm12
182
+ vpxor %ymm11, %ymm1, %ymm1
183
+ vpsrlq $0x3f, %ymm0, %ymm11
184
+ vpsllq $0x1, %ymm0, %ymm0
185
+ vpxor %ymm13, %ymm1, %ymm13
186
+ vpxor %ymm8, %ymm1, %ymm8
187
+ vpor %ymm11, %ymm0, %ymm0
188
+ vpxor %ymm10, %ymm0, %ymm0
189
+ vpxor 0xc0(%rsp), %ymm4, %ymm10
190
+ vpxor %ymm2, %ymm0, %ymm2
191
+ vpsrlq $0x14, %ymm10, %ymm11
192
+ vpsllq $0x2c, %ymm10, %ymm10
193
+ vpor %ymm11, %ymm10, %ymm10
194
+ vpxor %ymm15, %ymm5, %ymm11
195
+ vpbroadcastq (%rsi), %ymm15
196
+ vpsrlq $0x15, %ymm11, %ymm14
197
+ vpsllq $0x2b, %ymm11, %ymm11
198
+ vpor %ymm14, %ymm11, %ymm11
199
+ vpandn %ymm11, %ymm10, %ymm14
200
+ vpxor %ymm15, %ymm14, %ymm14
201
+ vpxor %ymm12, %ymm14, %ymm15
202
+ vpsrlq $0x2b, %ymm13, %ymm14
203
+ vpsllq $0x15, %ymm13, %ymm13
204
+ vmovdqu %ymm15, (%rsp)
205
+ vpor %ymm14, %ymm13, %ymm13
206
+ vpandn %ymm13, %ymm11, %ymm14
207
+ vpxor %ymm10, %ymm14, %ymm15
208
+ vpsrlq $0x32, %ymm2, %ymm14
209
+ vpsllq $0xe, %ymm2, %ymm2
210
+ vmovdqu %ymm15, 0x20(%rsp)
211
+ vpor %ymm14, %ymm2, %ymm2
212
+ vpandn %ymm2, %ymm13, %ymm14
213
+ vpxor %ymm11, %ymm14, %ymm11
214
+ vmovdqu %ymm11, 0x40(%rsp)
215
+ vpandn %ymm12, %ymm2, %ymm11
216
+ vpandn %ymm10, %ymm12, %ymm12
217
+ vpxor %ymm13, %ymm11, %ymm11
218
+ vmovdqu %ymm11, 0x60(%rsp)
219
+ vpxor %ymm2, %ymm12, %ymm11
220
+ vpsrlq $0x24, %ymm8, %ymm2
221
+ vpsllq $0x1c, %ymm8, %ymm8
222
+ vmovdqu %ymm11, 0x80(%rsp)
223
+ vpor %ymm2, %ymm8, %ymm8
224
+ vpxor 0xe0(%rsp), %ymm0, %ymm2
225
+ vpsrlq $0x2c, %ymm2, %ymm10
226
+ vpsllq $0x14, %ymm2, %ymm2
227
+ vpor %ymm10, %ymm2, %ymm2
228
+ vpxor 0x100(%rsp), %ymm3, %ymm10
229
+ vpsrlq $0x3d, %ymm10, %ymm11
230
+ vpsllq $0x3, %ymm10, %ymm10
231
+ vpor %ymm11, %ymm10, %ymm10
232
+ vpandn %ymm10, %ymm2, %ymm11
233
+ vpxor %ymm8, %ymm11, %ymm11
234
+ vmovdqu %ymm11, 0xa0(%rsp)
235
+ vpxor 0x160(%rsp), %ymm4, %ymm11
236
+ vpsrlq $0x13, %ymm11, %ymm12
237
+ vpsllq $0x2d, %ymm11, %ymm11
238
+ vpor %ymm12, %ymm11, %ymm11
239
+ vpandn %ymm11, %ymm10, %ymm12
240
+ vpxor %ymm2, %ymm12, %ymm12
241
+ vmovdqu %ymm12, 0xc0(%rsp)
242
+ vpsrlq $0x3, %ymm7, %ymm12
243
+ vpsllq $0x3d, %ymm7, %ymm7
244
+ vpor %ymm12, %ymm7, %ymm7
245
+ vpandn %ymm7, %ymm11, %ymm12
246
+ vpxor %ymm10, %ymm12, %ymm10
247
+ vpandn %ymm8, %ymm7, %ymm12
248
+ vpandn %ymm2, %ymm8, %ymm8
249
+ vpsrlq $0x3f, %ymm6, %ymm2
250
+ vpsllq $0x1, %ymm6, %ymm6
251
+ vpxor %ymm11, %ymm12, %ymm14
252
+ vpor %ymm2, %ymm6, %ymm6
253
+ vpsrlq $0x3a, %ymm9, %ymm2
254
+ vpxor %ymm7, %ymm8, %ymm12
255
+ vpsllq $0x6, %ymm9, %ymm9
256
+ vmovdqu %ymm12, 0xe0(%rsp)
257
+ vpxor 0x1a0(%rsp), %ymm0, %ymm7
258
+ vpor %ymm2, %ymm9, %ymm9
259
+ vpxor 0x120(%rsp), %ymm1, %ymm2
260
+ vpshufb (%rdx), %ymm7, %ymm7
261
+ vpsrlq $0x27, %ymm2, %ymm11
262
+ vpsllq $0x19, %ymm2, %ymm2
263
+ vpor %ymm2, %ymm11, %ymm11
264
+ vpandn %ymm11, %ymm9, %ymm2
265
+ vpandn %ymm7, %ymm11, %ymm8
266
+ vpxor %ymm6, %ymm2, %ymm12
267
+ vpxor 0x1c0(%rsp), %ymm3, %ymm2
268
+ vpxor %ymm9, %ymm8, %ymm8
269
+ vmovdqu %ymm12, 0x100(%rsp)
270
+ vpsrlq $0x2e, %ymm2, %ymm12
271
+ vpsllq $0x12, %ymm2, %ymm2
272
+ vpor %ymm2, %ymm12, %ymm2
273
+ vpandn %ymm2, %ymm7, %ymm12
274
+ vpxor %ymm11, %ymm12, %ymm15
275
+ vpandn %ymm6, %ymm2, %ymm11
276
+ vpandn %ymm9, %ymm6, %ymm6
277
+ vpxor %ymm7, %ymm11, %ymm12
278
+ vmovdqu %ymm12, 0x120(%rsp)
279
+ vpxor %ymm2, %ymm6, %ymm12
280
+ vpxor 0x2e0(%rsp), %ymm0, %ymm6
281
+ vpxor 0x2c0(%rsp), %ymm0, %ymm0
282
+ vmovdqu %ymm12, 0x140(%rsp)
283
+ vpsrlq $0x25, %ymm6, %ymm2
284
+ vpsllq $0x1b, %ymm6, %ymm6
285
+ vpor %ymm6, %ymm2, %ymm2
286
+ vpxor 0x220(%rsp), %ymm3, %ymm6
287
+ vpxor 0x200(%rsp), %ymm3, %ymm3
288
+ vpsrlq $0x1c, %ymm6, %ymm7
289
+ vpsllq $0x24, %ymm6, %ymm6
290
+ vpor %ymm6, %ymm7, %ymm7
291
+ vpxor 0x260(%rsp), %ymm4, %ymm6
292
+ vpxor 0x240(%rsp), %ymm4, %ymm4
293
+ vpsrlq $0x36, %ymm6, %ymm12
294
+ vpsllq $0xa, %ymm6, %ymm6
295
+ vpor %ymm6, %ymm12, %ymm12
296
+ vpxor 0x180(%rsp), %ymm5, %ymm6
297
+ vpxor 0x280(%rsp), %ymm5, %ymm5
298
+ vpandn %ymm12, %ymm7, %ymm9
299
+ vpsrlq $0x31, %ymm6, %ymm11
300
+ vpsllq $0xf, %ymm6, %ymm6
301
+ vpxor %ymm2, %ymm9, %ymm9
302
+ vpor %ymm6, %ymm11, %ymm11
303
+ vpandn %ymm11, %ymm12, %ymm6
304
+ vpxor %ymm7, %ymm6, %ymm6
305
+ vmovdqu %ymm6, 0x160(%rsp)
306
+ vpxor 0x1e0(%rsp), %ymm1, %ymm6
307
+ vpxor 0x2a0(%rsp), %ymm1, %ymm1
308
+ vpshufb (%rcx), %ymm6, %ymm6
309
+ vpandn %ymm6, %ymm11, %ymm13
310
+ vpxor %ymm12, %ymm13, %ymm13
311
+ vmovdqu %ymm13, 0x180(%rsp)
312
+ vpandn %ymm2, %ymm6, %ymm13
313
+ vpandn %ymm7, %ymm2, %ymm2
314
+ vpxor %ymm6, %ymm2, %ymm2
315
+ vpsrlq $0x3e, %ymm4, %ymm6
316
+ vpxor %ymm11, %ymm13, %ymm13
317
+ vmovdqu %ymm2, 0x1a0(%rsp)
318
+ vpsrlq $0x2, %ymm5, %ymm2
319
+ vpsllq $0x3e, %ymm5, %ymm5
320
+ vpor %ymm5, %ymm2, %ymm2
321
+ vpsrlq $0x9, %ymm1, %ymm5
322
+ vpsllq $0x37, %ymm1, %ymm1
323
+ vpsllq $0x2, %ymm4, %ymm4
324
+ vpor %ymm1, %ymm5, %ymm1
325
+ vpsrlq $0x19, %ymm0, %ymm5
326
+ vpor %ymm4, %ymm6, %ymm4
327
+ vpsllq $0x27, %ymm0, %ymm0
328
+ vpor %ymm0, %ymm5, %ymm5
329
+ vpandn %ymm5, %ymm1, %ymm0
330
+ vpxor %ymm2, %ymm0, %ymm0
331
+ vmovdqu %ymm0, 0x1c0(%rsp)
332
+ vpsrlq $0x17, %ymm3, %ymm0
333
+ vpsllq $0x29, %ymm3, %ymm3
334
+ vpor %ymm3, %ymm0, %ymm0
335
+ vpandn %ymm4, %ymm0, %ymm7
336
+ vpandn %ymm0, %ymm5, %ymm3
337
+ vpxor %ymm5, %ymm7, %ymm7
338
+ vpandn %ymm2, %ymm4, %ymm5
339
+ vpandn %ymm1, %ymm2, %ymm2
340
+ vpxor %ymm0, %ymm5, %ymm5
341
+ vpxor %ymm1, %ymm3, %ymm3
342
+ vpxor %ymm4, %ymm2, %ymm2
343
+ vmovdqu %ymm5, 0x1e0(%rsp)
344
+ addq $0x8, %rsi
345
+ addq $0x1, %r10
346
+ cmpq $0x18, %r10
347
+ jne LLkeccak_f1600_x4_avx2_asm
348
+ vmovdqu (%rsp), %ymm4
349
+ vmovdqu 0x40(%rsp), %ymm5
350
+ vmovdqu 0x20(%rsp), %ymm0
351
+ vmovdqu 0x60(%rsp), %ymm1
352
+ vmovdqu 0x1c0(%rsp), %ymm12
353
+ vmovdqu %ymm2, 0x1c0(%rsp)
354
+ vpunpcklqdq %ymm0, %ymm4, %ymm2 # ymm2 = ymm4[0],ymm0[0],ymm4[2],ymm0[2]
355
+ vpunpckhqdq %ymm0, %ymm4, %ymm0 # ymm0 = ymm4[1],ymm0[1],ymm4[3],ymm0[3]
356
+ vpunpcklqdq %ymm1, %ymm5, %ymm4 # ymm4 = ymm5[0],ymm1[0],ymm5[2],ymm1[2]
357
+ vpunpckhqdq %ymm1, %ymm5, %ymm1 # ymm1 = ymm5[1],ymm1[1],ymm5[3],ymm1[3]
358
+ vperm2i128 $0x20, %ymm4, %ymm2, %ymm6 # ymm6 = ymm2[0,1],ymm4[0,1]
359
+ vperm2i128 $0x31, %ymm4, %ymm2, %ymm2 # ymm2 = ymm2[2,3],ymm4[2,3]
360
+ vmovdqu 0x80(%rsp), %ymm4
361
+ vperm2i128 $0x20, %ymm1, %ymm0, %ymm5 # ymm5 = ymm0[0,1],ymm1[0,1]
362
+ vperm2i128 $0x31, %ymm1, %ymm0, %ymm0 # ymm0 = ymm0[2,3],ymm1[2,3]
363
+ vmovdqu %ymm6, (%rdi)
364
+ vmovdqu %ymm5, 0xc8(%rdi)
365
+ vmovdqu %ymm2, 0x190(%rdi)
366
+ vmovdqu %ymm0, 0x258(%rdi)
367
+ vmovdqu 0xa0(%rsp), %ymm0
368
+ vpunpcklqdq %ymm0, %ymm4, %ymm2 # ymm2 = ymm4[0],ymm0[0],ymm4[2],ymm0[2]
369
+ vpunpckhqdq %ymm0, %ymm4, %ymm1 # ymm1 = ymm4[1],ymm0[1],ymm4[3],ymm0[3]
370
+ vmovdqu 0xc0(%rsp), %ymm0
371
+ vpunpcklqdq %ymm10, %ymm0, %ymm4 # ymm4 = ymm0[0],ymm10[0],ymm0[2],ymm10[2]
372
+ vpunpckhqdq %ymm10, %ymm0, %ymm0 # ymm0 = ymm0[1],ymm10[1],ymm0[3],ymm10[3]
373
+ vperm2i128 $0x20, %ymm4, %ymm2, %ymm6 # ymm6 = ymm2[0,1],ymm4[0,1]
374
+ vperm2i128 $0x20, %ymm0, %ymm1, %ymm5 # ymm5 = ymm1[0,1],ymm0[0,1]
375
+ vperm2i128 $0x31, %ymm4, %ymm2, %ymm2 # ymm2 = ymm2[2,3],ymm4[2,3]
376
+ vmovdqu 0xe0(%rsp), %ymm4
377
+ vperm2i128 $0x31, %ymm0, %ymm1, %ymm1 # ymm1 = ymm1[2,3],ymm0[2,3]
378
+ vmovdqu 0x100(%rsp), %ymm0
379
+ vmovdqu %ymm2, 0x1b0(%rdi)
380
+ vmovdqu %ymm1, 0x278(%rdi)
381
+ vpunpcklqdq %ymm4, %ymm14, %ymm2 # ymm2 = ymm14[0],ymm4[0],ymm14[2],ymm4[2]
382
+ vpunpckhqdq %ymm4, %ymm14, %ymm1 # ymm1 = ymm14[1],ymm4[1],ymm14[3],ymm4[3]
383
+ vpunpcklqdq %ymm8, %ymm0, %ymm4 # ymm4 = ymm0[0],ymm8[0],ymm0[2],ymm8[2]
384
+ vpunpckhqdq %ymm8, %ymm0, %ymm0 # ymm0 = ymm0[1],ymm8[1],ymm0[3],ymm8[3]
385
+ vmovdqu %ymm6, 0x20(%rdi)
386
+ vmovdqu %ymm5, 0xe8(%rdi)
387
+ vperm2i128 $0x20, %ymm4, %ymm2, %ymm6 # ymm6 = ymm2[0,1],ymm4[0,1]
388
+ vperm2i128 $0x20, %ymm0, %ymm1, %ymm5 # ymm5 = ymm1[0,1],ymm0[0,1]
389
+ vperm2i128 $0x31, %ymm4, %ymm2, %ymm2 # ymm2 = ymm2[2,3],ymm4[2,3]
390
+ vperm2i128 $0x31, %ymm0, %ymm1, %ymm1 # ymm1 = ymm1[2,3],ymm0[2,3]
391
+ vmovdqu 0x120(%rsp), %ymm4
392
+ vmovdqu 0x140(%rsp), %ymm0
393
+ vmovdqu %ymm2, 0x1d0(%rdi)
394
+ vmovdqu %ymm1, 0x298(%rdi)
395
+ vpunpcklqdq %ymm4, %ymm15, %ymm2 # ymm2 = ymm15[0],ymm4[0],ymm15[2],ymm4[2]
396
+ vpunpckhqdq %ymm4, %ymm15, %ymm1 # ymm1 = ymm15[1],ymm4[1],ymm15[3],ymm4[3]
397
+ vpunpcklqdq %ymm9, %ymm0, %ymm4 # ymm4 = ymm0[0],ymm9[0],ymm0[2],ymm9[2]
398
+ vmovdqu %ymm5, 0x108(%rdi)
399
+ vpunpckhqdq %ymm9, %ymm0, %ymm0 # ymm0 = ymm0[1],ymm9[1],ymm0[3],ymm9[3]
400
+ vmovdqu %ymm6, 0x40(%rdi)
401
+ vperm2i128 $0x20, %ymm4, %ymm2, %ymm6 # ymm6 = ymm2[0,1],ymm4[0,1]
402
+ vperm2i128 $0x31, %ymm4, %ymm2, %ymm2 # ymm2 = ymm2[2,3],ymm4[2,3]
403
+ vperm2i128 $0x20, %ymm0, %ymm1, %ymm5 # ymm5 = ymm1[0,1],ymm0[0,1]
404
+ vmovdqu 0x160(%rsp), %ymm4
405
+ vperm2i128 $0x31, %ymm0, %ymm1, %ymm1 # ymm1 = ymm1[2,3],ymm0[2,3]
406
+ vmovdqu 0x180(%rsp), %ymm0
407
+ vmovdqu %ymm5, 0x128(%rdi)
408
+ vmovdqu 0x1a0(%rsp), %ymm5
409
+ vmovdqu %ymm2, 0x1f0(%rdi)
410
+ vpunpcklqdq %ymm0, %ymm4, %ymm2 # ymm2 = ymm4[0],ymm0[0],ymm4[2],ymm0[2]
411
+ vpunpckhqdq %ymm0, %ymm4, %ymm0 # ymm0 = ymm4[1],ymm0[1],ymm4[3],ymm0[3]
412
+ vpunpcklqdq %ymm5, %ymm13, %ymm4 # ymm4 = ymm13[0],ymm5[0],ymm13[2],ymm5[2]
413
+ vmovdqu %ymm6, 0x60(%rdi)
414
+ vperm2i128 $0x20, %ymm4, %ymm2, %ymm6 # ymm6 = ymm2[0,1],ymm4[0,1]
415
+ vmovdqu %ymm1, 0x2b8(%rdi)
416
+ vperm2i128 $0x31, %ymm4, %ymm2, %ymm2 # ymm2 = ymm2[2,3],ymm4[2,3]
417
+ vpunpckhqdq %ymm5, %ymm13, %ymm1 # ymm1 = ymm13[1],ymm5[1],ymm13[3],ymm5[3]
418
+ vmovdqu %ymm6, 0x80(%rdi)
419
+ vmovdqu 0x1e0(%rsp), %ymm4
420
+ vperm2i128 $0x20, %ymm1, %ymm0, %ymm5 # ymm5 = ymm0[0,1],ymm1[0,1]
421
+ vperm2i128 $0x31, %ymm1, %ymm0, %ymm0 # ymm0 = ymm0[2,3],ymm1[2,3]
422
+ vmovdqu %ymm2, 0x210(%rdi)
423
+ vpunpcklqdq %ymm3, %ymm12, %ymm2 # ymm2 = ymm12[0],ymm3[0],ymm12[2],ymm3[2]
424
+ vmovdqu %ymm0, 0x2d8(%rdi)
425
+ vpunpckhqdq %ymm3, %ymm12, %ymm0 # ymm0 = ymm12[1],ymm3[1],ymm12[3],ymm3[3]
426
+ vpunpcklqdq %ymm4, %ymm7, %ymm3 # ymm3 = ymm7[0],ymm4[0],ymm7[2],ymm4[2]
427
+ vpunpckhqdq %ymm4, %ymm7, %ymm1 # ymm1 = ymm7[1],ymm4[1],ymm7[3],ymm4[3]
428
+ vmovdqu %ymm5, 0x148(%rdi)
429
+ vperm2i128 $0x20, %ymm3, %ymm2, %ymm5 # ymm5 = ymm2[0,1],ymm3[0,1]
430
+ vperm2i128 $0x31, %ymm3, %ymm2, %ymm2 # ymm2 = ymm2[2,3],ymm3[2,3]
431
+ vmovdqu 0x1c0(%rsp), %ymm3
432
+ vperm2i128 $0x20, %ymm1, %ymm0, %ymm4 # ymm4 = ymm0[0,1],ymm1[0,1]
433
+ vperm2i128 $0x31, %ymm1, %ymm0, %ymm0 # ymm0 = ymm0[2,3],ymm1[2,3]
434
+ vmovdqu %ymm5, 0xa0(%rdi)
435
+ vextracti128 $0x1, %ymm3, %xmm15
436
+ vmovdqu %ymm4, 0x168(%rdi)
437
+ vmovdqu %ymm2, 0x230(%rdi)
438
+ vmovdqu %ymm0, 0x2f8(%rdi)
439
+ vmovq %xmm3, 0xc0(%rdi)
440
+ vmovhpd %xmm3, 0x188(%rdi)
441
+ vmovq %xmm15, 0x250(%rdi)
442
+ vmovhpd %xmm15, 0x318(%rdi)
443
+ movq %r11, %rsp
444
+ .cfi_def_cfa_register %rsp
445
+ retq
446
+ .cfi_endproc
447
+
448
+ MLD_ASM_FN_SIZE(keccak_f1600_x4_avx2_asm)
449
+
450
+ #endif /* MLD_FIPS202_X86_64_NEED_X4_AVX2 && !MLD_CONFIG_MULTILEVEL_NO_SHARED */
451
+
452
+ #if defined(__ELF__)
453
+ .section .note.GNU-stack,"",%progbits
454
+ #endif
@@ -0,0 +1,52 @@
1
+ /*
2
+ * Copyright (c) The mldsa-native project authors
3
+ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
4
+ */
5
+
6
+ /*
7
+ * WARNING: This file is auto-generated from scripts/autogen
8
+ * in the mldsa-native repository.
9
+ * Do not modify it directly.
10
+ */
11
+
12
+ #include "../../../../common.h"
13
+ #if defined(MLD_FIPS202_X86_64_NEED_X4_AVX2) && \
14
+ !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED)
15
+
16
+ #include <stdint.h>
17
+
18
+ #include "fips202_native_x86_64.h"
19
+
20
+ MLD_ALIGN MLD_INTERNAL_DATA_DEFINITION const uint64_t
21
+ mld_keccakf1600_round_constants[24] = {
22
+ 0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
23
+ 0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
24
+ 0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
25
+ 0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
26
+ 0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
27
+ 0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
28
+ 0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
29
+ 0x8000000000008080, 0x0000000080000001, 0x8000000080008008,
30
+ };
31
+
32
+ MLD_ALIGN MLD_INTERNAL_DATA_DEFINITION const uint64_t mld_keccak_rho8[4] = {
33
+ 0x0605040302010007,
34
+ 0x0e0d0c0b0a09080f,
35
+ 0x1615141312111017,
36
+ 0x1e1d1c1b1a19181f,
37
+ };
38
+
39
+ MLD_ALIGN MLD_INTERNAL_DATA_DEFINITION const uint64_t mld_keccak_rho56[4] = {
40
+ 0x0007060504030201,
41
+ 0x080f0e0d0c0b0a09,
42
+ 0x1017161514131211,
43
+ 0x181f1e1d1c1b1a19,
44
+ };
45
+
46
+ #else /* MLD_FIPS202_X86_64_NEED_X4_AVX2 && !MLD_CONFIG_MULTILEVEL_NO_SHARED \
47
+ */
48
+
49
+ MLD_EMPTY_CU(fips202_x86_64_constants)
50
+
51
+ #endif /* !(MLD_FIPS202_X86_64_NEED_X4_AVX2 && \
52
+ !MLD_CONFIG_MULTILEVEL_NO_SHARED) */