pq_crypto 0.6.1 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/SECURITY.md +7 -0
  4. data/ext/pqcrypto/pqcrypto_version.h +1 -1
  5. data/ext/pqcrypto/vendor/.vendored +4 -4
  6. data/ext/pqcrypto/vendor/mldsa-native/README.md +23 -10
  7. data/ext/pqcrypto/vendor/mldsa-native/mldsa/README.md +23 -0
  8. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.c +114 -58
  9. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.h +498 -461
  10. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_asm.S +145 -85
  11. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_config.h +456 -422
  12. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/cbmc.h +47 -25
  13. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/common.h +26 -14
  14. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/ct.h +56 -81
  15. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/debug.h +17 -24
  16. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.c +33 -40
  17. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.h +67 -87
  18. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.c +19 -14
  19. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.h +13 -5
  20. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.c +84 -10
  21. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.h +10 -5
  22. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/auto.h +6 -0
  23. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/fips202_native_aarch64.h +22 -15
  24. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_aarch64_asm.S +376 -0
  25. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_aarch64_asm.S +204 -0
  26. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_aarch64_asm.S +259 -0
  27. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_aarch64_asm.S +1077 -0
  28. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_aarch64_asm.S +987 -0
  29. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccakf1600_round_constants.c +16 -10
  30. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_scalar.h +2 -1
  31. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_v84a.h +1 -1
  32. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x2_v84a.h +4 -2
  33. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_scalar.h +2 -2
  34. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_v84a_scalar.h +1 -1
  35. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/api.h +60 -0
  36. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/mve.h +48 -0
  37. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/fips202_native_armv81m.h +18 -1
  38. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S +658 -582
  39. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.c +5 -100
  40. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccakf1600_round_constants.c +26 -25
  41. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/state_extract_bytes_x4_mve.S +334 -0
  42. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/state_xor_bytes_x4_mve.S +355 -0
  43. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/auto.h +8 -3
  44. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/{xkcp.h → keccak_f1600_x4_avx2.h} +11 -8
  45. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/fips202_native_x86_64.h +44 -0
  46. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/keccak_f1600_x4_avx2_asm.S +454 -0
  47. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/keccakf1600_constants.c +52 -0
  48. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/meta.h +37 -28
  49. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/aarch64_zetas.c +213 -196
  50. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/arith_native_aarch64.h +248 -64
  51. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/intt_aarch64_asm.S +753 -0
  52. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l4_aarch64_asm.S +129 -0
  53. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l5_aarch64_asm.S +145 -0
  54. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l7_aarch64_asm.S +177 -0
  55. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/ntt_aarch64_asm.S +653 -0
  56. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/pointwise_montgomery_aarch64_asm.S +84 -0
  57. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_caddq_aarch64_asm.S +53 -0
  58. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_chknorm_aarch64_asm.S +55 -0
  59. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_32_aarch64_asm.S +86 -0
  60. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_88_aarch64_asm.S +86 -0
  61. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_32_aarch64_asm.S +103 -0
  62. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_88_aarch64_asm.S +111 -0
  63. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_17_aarch64_asm.S +75 -0
  64. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_19_aarch64_asm.S +72 -0
  65. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_table.c +23 -11
  66. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_aarch64_asm.S +189 -0
  67. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta2_aarch64_asm.S +137 -0
  68. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta4_aarch64_asm.S +130 -0
  69. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta_table.c +520 -516
  70. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_table.c +34 -33
  71. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/api.h +202 -242
  72. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/meta.h +25 -17
  73. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/arith_native_x86_64.h +112 -28
  74. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.c +1 -1
  75. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.h +1 -1
  76. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/intt_avx2_asm.S +2311 -0
  77. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/ntt_avx2_asm.S +2383 -0
  78. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/nttunpack_avx2_asm.S +238 -0
  79. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l4_avx2_asm.S +139 -0
  80. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l5_avx2_asm.S +155 -0
  81. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l7_avx2_asm.S +187 -0
  82. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_avx2_asm.S +130 -0
  83. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_caddq_avx2_asm.S +190 -0
  84. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_32_avx2.c +6 -4
  85. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_88_avx2.c +6 -4
  86. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_32_avx2.c +9 -8
  87. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_88_avx2.c +10 -9
  88. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_17_avx2.c +8 -5
  89. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_19_avx2.c +8 -5
  90. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta2_avx2.c +6 -4
  91. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta4_avx2.c +6 -4
  92. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_table.c +130 -129
  93. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.c +109 -180
  94. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.h +169 -150
  95. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.c +56 -40
  96. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.h +149 -164
  97. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.c +52 -57
  98. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.h +132 -167
  99. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.c +57 -424
  100. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.h +167 -474
  101. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec_lazy.c +308 -0
  102. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec_lazy.h +653 -0
  103. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/reduce.h +22 -29
  104. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/rounding.h +37 -43
  105. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.c +511 -367
  106. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.h +456 -417
  107. data/lib/pq_crypto/version.rb +1 -1
  108. data/script/vendor_libs.rb +3 -3
  109. metadata +41 -35
  110. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_asm.S +0 -376
  111. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_asm.S +0 -204
  112. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_asm.S +0 -259
  113. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_asm.S +0 -1077
  114. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S +0 -987
  115. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.c +0 -488
  116. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.h +0 -16
  117. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/intt.S +0 -753
  118. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l4.S +0 -129
  119. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l5.S +0 -145
  120. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l7.S +0 -177
  121. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/ntt.S +0 -653
  122. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/pointwise_montgomery.S +0 -79
  123. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_caddq_asm.S +0 -53
  124. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_chknorm_asm.S +0 -55
  125. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_32_asm.S +0 -85
  126. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_88_asm.S +0 -85
  127. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_32_asm.S +0 -102
  128. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_88_asm.S +0 -110
  129. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_17_asm.S +0 -72
  130. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_19_asm.S +0 -69
  131. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_asm.S +0 -189
  132. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta2_asm.S +0 -135
  133. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta4_asm.S +0 -128
  134. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/intt.S +0 -2311
  135. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/ntt.S +0 -2383
  136. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/nttunpack.S +0 -239
  137. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise.S +0 -131
  138. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l4.S +0 -139
  139. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l5.S +0 -155
  140. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l7.S +0 -187
  141. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_caddq_avx2.c +0 -61
@@ -1,187 +0,0 @@
1
- /*
2
- * Copyright (c) The mldsa-native project authors
3
- * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
4
- */
5
-
6
- /* References
7
- * ==========
8
- *
9
- * - [REF_AVX2]
10
- * CRYSTALS-Dilithium optimized AVX2 implementation
11
- * Bai, Ducas, Kiltz, Lepoint, Lyubashevsky, Schwabe, Seiler, Stehlé
12
- * https://github.com/pq-crystals/dilithium/tree/master/avx2
13
- */
14
-
15
- /*
16
- * This file is derived from the public domain
17
- * AVX2 Dilithium implementation @[REF_AVX2].
18
- */
19
-
20
- #include "../../../common.h"
21
- #if defined(MLD_ARITH_BACKEND_X86_64_DEFAULT) && \
22
- !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \
23
- (defined(MLD_CONFIG_MULTILEVEL_WITH_SHARED) || MLDSA_L == 7)
24
-
25
- /*
26
- * WARNING: This file is auto-derived from the mldsa-native source file
27
- * dev/x86_64/src/pointwise_acc_l7.S using scripts/simpasm. Do not modify it directly.
28
- */
29
-
30
- #if defined(__ELF__)
31
- .section .note.GNU-stack,"",@progbits
32
- #endif
33
-
34
- .text
35
- .balign 4
36
- .global MLD_ASM_NAMESPACE(pointwise_acc_l7_avx2)
37
- MLD_ASM_FN_SYMBOL(pointwise_acc_l7_avx2)
38
-
39
- .cfi_startproc
40
- vmovdqa 0x20(%rcx), %ymm0
41
- vmovdqa (%rcx), %ymm1
42
- xorl %eax, %eax
43
-
44
- Lpointwise_acc_l7_avx2_looptop2:
45
- vmovdqa (%rsi), %ymm6
46
- vmovdqa 0x20(%rsi), %ymm8
47
- vmovdqa (%rdx), %ymm10
48
- vmovdqa 0x20(%rdx), %ymm12
49
- vpsrlq $0x20, %ymm6, %ymm7
50
- vpsrlq $0x20, %ymm8, %ymm9
51
- vmovshdup %ymm10, %ymm11 # ymm11 = ymm10[1,1,3,3,5,5,7,7]
52
- vmovshdup %ymm12, %ymm13 # ymm13 = ymm12[1,1,3,3,5,5,7,7]
53
- vpmuldq %ymm10, %ymm6, %ymm6
54
- vpmuldq %ymm11, %ymm7, %ymm7
55
- vpmuldq %ymm12, %ymm8, %ymm8
56
- vpmuldq %ymm13, %ymm9, %ymm9
57
- vmovdqa %ymm6, %ymm2
58
- vmovdqa %ymm7, %ymm3
59
- vmovdqa %ymm8, %ymm4
60
- vmovdqa %ymm9, %ymm5
61
- vmovdqa 0x400(%rsi), %ymm6
62
- vmovdqa 0x420(%rsi), %ymm8
63
- vmovdqa 0x400(%rdx), %ymm10
64
- vmovdqa 0x420(%rdx), %ymm12
65
- vpsrlq $0x20, %ymm6, %ymm7
66
- vpsrlq $0x20, %ymm8, %ymm9
67
- vmovshdup %ymm10, %ymm11 # ymm11 = ymm10[1,1,3,3,5,5,7,7]
68
- vmovshdup %ymm12, %ymm13 # ymm13 = ymm12[1,1,3,3,5,5,7,7]
69
- vpmuldq %ymm10, %ymm6, %ymm6
70
- vpmuldq %ymm11, %ymm7, %ymm7
71
- vpmuldq %ymm12, %ymm8, %ymm8
72
- vpmuldq %ymm13, %ymm9, %ymm9
73
- vpaddq %ymm2, %ymm6, %ymm2
74
- vpaddq %ymm3, %ymm7, %ymm3
75
- vpaddq %ymm4, %ymm8, %ymm4
76
- vpaddq %ymm5, %ymm9, %ymm5
77
- vmovdqa 0x800(%rsi), %ymm6
78
- vmovdqa 0x820(%rsi), %ymm8
79
- vmovdqa 0x800(%rdx), %ymm10
80
- vmovdqa 0x820(%rdx), %ymm12
81
- vpsrlq $0x20, %ymm6, %ymm7
82
- vpsrlq $0x20, %ymm8, %ymm9
83
- vmovshdup %ymm10, %ymm11 # ymm11 = ymm10[1,1,3,3,5,5,7,7]
84
- vmovshdup %ymm12, %ymm13 # ymm13 = ymm12[1,1,3,3,5,5,7,7]
85
- vpmuldq %ymm10, %ymm6, %ymm6
86
- vpmuldq %ymm11, %ymm7, %ymm7
87
- vpmuldq %ymm12, %ymm8, %ymm8
88
- vpmuldq %ymm13, %ymm9, %ymm9
89
- vpaddq %ymm2, %ymm6, %ymm2
90
- vpaddq %ymm3, %ymm7, %ymm3
91
- vpaddq %ymm4, %ymm8, %ymm4
92
- vpaddq %ymm5, %ymm9, %ymm5
93
- vmovdqa 0xc00(%rsi), %ymm6
94
- vmovdqa 0xc20(%rsi), %ymm8
95
- vmovdqa 0xc00(%rdx), %ymm10
96
- vmovdqa 0xc20(%rdx), %ymm12
97
- vpsrlq $0x20, %ymm6, %ymm7
98
- vpsrlq $0x20, %ymm8, %ymm9
99
- vmovshdup %ymm10, %ymm11 # ymm11 = ymm10[1,1,3,3,5,5,7,7]
100
- vmovshdup %ymm12, %ymm13 # ymm13 = ymm12[1,1,3,3,5,5,7,7]
101
- vpmuldq %ymm10, %ymm6, %ymm6
102
- vpmuldq %ymm11, %ymm7, %ymm7
103
- vpmuldq %ymm12, %ymm8, %ymm8
104
- vpmuldq %ymm13, %ymm9, %ymm9
105
- vpaddq %ymm2, %ymm6, %ymm2
106
- vpaddq %ymm3, %ymm7, %ymm3
107
- vpaddq %ymm4, %ymm8, %ymm4
108
- vpaddq %ymm5, %ymm9, %ymm5
109
- vmovdqa 0x1000(%rsi), %ymm6
110
- vmovdqa 0x1020(%rsi), %ymm8
111
- vmovdqa 0x1000(%rdx), %ymm10
112
- vmovdqa 0x1020(%rdx), %ymm12
113
- vpsrlq $0x20, %ymm6, %ymm7
114
- vpsrlq $0x20, %ymm8, %ymm9
115
- vmovshdup %ymm10, %ymm11 # ymm11 = ymm10[1,1,3,3,5,5,7,7]
116
- vmovshdup %ymm12, %ymm13 # ymm13 = ymm12[1,1,3,3,5,5,7,7]
117
- vpmuldq %ymm10, %ymm6, %ymm6
118
- vpmuldq %ymm11, %ymm7, %ymm7
119
- vpmuldq %ymm12, %ymm8, %ymm8
120
- vpmuldq %ymm13, %ymm9, %ymm9
121
- vpaddq %ymm2, %ymm6, %ymm2
122
- vpaddq %ymm3, %ymm7, %ymm3
123
- vpaddq %ymm4, %ymm8, %ymm4
124
- vpaddq %ymm5, %ymm9, %ymm5
125
- vmovdqa 0x1400(%rsi), %ymm6
126
- vmovdqa 0x1420(%rsi), %ymm8
127
- vmovdqa 0x1400(%rdx), %ymm10
128
- vmovdqa 0x1420(%rdx), %ymm12
129
- vpsrlq $0x20, %ymm6, %ymm7
130
- vpsrlq $0x20, %ymm8, %ymm9
131
- vmovshdup %ymm10, %ymm11 # ymm11 = ymm10[1,1,3,3,5,5,7,7]
132
- vmovshdup %ymm12, %ymm13 # ymm13 = ymm12[1,1,3,3,5,5,7,7]
133
- vpmuldq %ymm10, %ymm6, %ymm6
134
- vpmuldq %ymm11, %ymm7, %ymm7
135
- vpmuldq %ymm12, %ymm8, %ymm8
136
- vpmuldq %ymm13, %ymm9, %ymm9
137
- vpaddq %ymm2, %ymm6, %ymm2
138
- vpaddq %ymm3, %ymm7, %ymm3
139
- vpaddq %ymm4, %ymm8, %ymm4
140
- vpaddq %ymm5, %ymm9, %ymm5
141
- vmovdqa 0x1800(%rsi), %ymm6
142
- vmovdqa 0x1820(%rsi), %ymm8
143
- vmovdqa 0x1800(%rdx), %ymm10
144
- vmovdqa 0x1820(%rdx), %ymm12
145
- vpsrlq $0x20, %ymm6, %ymm7
146
- vpsrlq $0x20, %ymm8, %ymm9
147
- vmovshdup %ymm10, %ymm11 # ymm11 = ymm10[1,1,3,3,5,5,7,7]
148
- vmovshdup %ymm12, %ymm13 # ymm13 = ymm12[1,1,3,3,5,5,7,7]
149
- vpmuldq %ymm10, %ymm6, %ymm6
150
- vpmuldq %ymm11, %ymm7, %ymm7
151
- vpmuldq %ymm12, %ymm8, %ymm8
152
- vpmuldq %ymm13, %ymm9, %ymm9
153
- vpaddq %ymm2, %ymm6, %ymm2
154
- vpaddq %ymm3, %ymm7, %ymm3
155
- vpaddq %ymm4, %ymm8, %ymm4
156
- vpaddq %ymm5, %ymm9, %ymm5
157
- vpmuldq %ymm2, %ymm0, %ymm6
158
- vpmuldq %ymm3, %ymm0, %ymm7
159
- vpmuldq %ymm4, %ymm0, %ymm8
160
- vpmuldq %ymm5, %ymm0, %ymm9
161
- vpmuldq %ymm6, %ymm1, %ymm6
162
- vpmuldq %ymm7, %ymm1, %ymm7
163
- vpmuldq %ymm8, %ymm1, %ymm8
164
- vpmuldq %ymm9, %ymm1, %ymm9
165
- vpsubq %ymm6, %ymm2, %ymm2
166
- vpsubq %ymm7, %ymm3, %ymm3
167
- vpsubq %ymm8, %ymm4, %ymm4
168
- vpsubq %ymm9, %ymm5, %ymm5
169
- vpsrlq $0x20, %ymm2, %ymm2
170
- vmovshdup %ymm4, %ymm4 # ymm4 = ymm4[1,1,3,3,5,5,7,7]
171
- vpblendd $0xaa, %ymm3, %ymm2, %ymm2 # ymm2 = ymm2[0],ymm3[1],ymm2[2],ymm3[3],ymm2[4],ymm3[5],ymm2[6],ymm3[7]
172
- vpblendd $0xaa, %ymm5, %ymm4, %ymm4 # ymm4 = ymm4[0],ymm5[1],ymm4[2],ymm5[3],ymm4[4],ymm5[5],ymm4[6],ymm5[7]
173
- vmovdqa %ymm2, (%rdi)
174
- vmovdqa %ymm4, 0x20(%rdi)
175
- addq $0x40, %rsi
176
- addq $0x40, %rdx
177
- addq $0x40, %rdi
178
- addl $0x1, %eax
179
- cmpl $0x10, %eax
180
- jb Lpointwise_acc_l7_avx2_looptop2
181
- retq
182
- .cfi_endproc
183
-
184
- MLD_ASM_FN_SIZE(pointwise_acc_l7_avx2)
185
-
186
- #endif /* MLD_ARITH_BACKEND_X86_64_DEFAULT && !MLD_CONFIG_MULTILEVEL_NO_SHARED \
187
- && (MLD_CONFIG_MULTILEVEL_WITH_SHARED || MLDSA_L == 7) */
@@ -1,61 +0,0 @@
1
- /*
2
- * Copyright (c) The mldsa-native project authors
3
- * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
4
- */
5
-
6
- /* References
7
- * ==========
8
- *
9
- * - [REF_AVX2]
10
- * CRYSTALS-Dilithium optimized AVX2 implementation
11
- * Bai, Ducas, Kiltz, Lepoint, Lyubashevsky, Schwabe, Seiler, Stehlé
12
- * https://github.com/pq-crystals/dilithium/tree/master/avx2
13
- */
14
-
15
- /*
16
- * This file is derived from the public domain
17
- * AVX2 Dilithium implementation @[REF_AVX2].
18
- */
19
-
20
- #include "../../../common.h"
21
-
22
- #if defined(MLD_ARITH_BACKEND_X86_64_DEFAULT) && \
23
- !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED)
24
-
25
- #include <immintrin.h>
26
- #include "arith_native_x86_64.h"
27
- #include "consts.h"
28
-
29
- /*************************************************
30
- * Name: mld_poly_caddq_avx2
31
- *
32
- * Description: For all coefficients of in/out polynomial add Q if
33
- * coefficient is negative.
34
- *
35
- * Arguments: - int32_t *r: pointer to input/output polynomial
36
- **************************************************/
37
- void mld_poly_caddq_avx2(int32_t *r)
38
- {
39
- unsigned int i;
40
- __m256i f, g;
41
- const __m256i q = _mm256_set1_epi32(MLDSA_Q);
42
- const __m256i zero = _mm256_setzero_si256();
43
- __m256i *rr = (__m256i *)r;
44
-
45
- for (i = 0; i < MLDSA_N / 8; i++)
46
- {
47
- f = _mm256_load_si256(&rr[i]);
48
- g = _mm256_cmpgt_epi32(zero, f);
49
- g = _mm256_and_si256(g, q);
50
- f = _mm256_add_epi32(f, g);
51
- _mm256_store_si256(&rr[i], f);
52
- }
53
- }
54
-
55
- #else /* MLD_ARITH_BACKEND_X86_64_DEFAULT && !MLD_CONFIG_MULTILEVEL_NO_SHARED \
56
- */
57
-
58
- MLD_EMPTY_CU(avx2_reduce)
59
-
60
- #endif /* !(MLD_ARITH_BACKEND_X86_64_DEFAULT && \
61
- !MLD_CONFIG_MULTILEVEL_NO_SHARED) */