pq_crypto 0.6.0 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (154) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +12 -0
  3. data/SECURITY.md +7 -0
  4. data/ext/pqcrypto/extconf.rb +2 -0
  5. data/ext/pqcrypto/pqcrypto_ruby_secure.c +139 -0
  6. data/ext/pqcrypto/pqcrypto_secure.c +532 -0
  7. data/ext/pqcrypto/pqcrypto_secure.h +20 -0
  8. data/ext/pqcrypto/pqcrypto_version.h +1 -1
  9. data/ext/pqcrypto/vendor/.vendored +4 -4
  10. data/ext/pqcrypto/vendor/mldsa-native/README.md +23 -10
  11. data/ext/pqcrypto/vendor/mldsa-native/mldsa/README.md +23 -0
  12. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.c +114 -58
  13. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native.h +498 -461
  14. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_asm.S +145 -85
  15. data/ext/pqcrypto/vendor/mldsa-native/mldsa/mldsa_native_config.h +456 -422
  16. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/cbmc.h +47 -25
  17. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/common.h +26 -14
  18. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/ct.h +56 -81
  19. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/debug.h +17 -24
  20. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.c +33 -40
  21. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202.h +67 -87
  22. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.c +19 -14
  23. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/fips202x4.h +13 -5
  24. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.c +84 -10
  25. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/keccakf1600.h +10 -5
  26. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/auto.h +6 -0
  27. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/fips202_native_aarch64.h +22 -15
  28. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_aarch64_asm.S +376 -0
  29. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_aarch64_asm.S +204 -0
  30. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_aarch64_asm.S +259 -0
  31. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_aarch64_asm.S +1077 -0
  32. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_aarch64_asm.S +987 -0
  33. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccakf1600_round_constants.c +16 -10
  34. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_scalar.h +2 -1
  35. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x1_v84a.h +1 -1
  36. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x2_v84a.h +4 -2
  37. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_scalar.h +2 -2
  38. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/x4_v8a_v84a_scalar.h +1 -1
  39. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/api.h +60 -0
  40. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/mve.h +48 -0
  41. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/fips202_native_armv81m.h +18 -1
  42. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S +658 -582
  43. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.c +5 -100
  44. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/keccakf1600_round_constants.c +26 -25
  45. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/state_extract_bytes_x4_mve.S +334 -0
  46. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/armv81m/src/state_xor_bytes_x4_mve.S +355 -0
  47. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/auto.h +8 -3
  48. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/{xkcp.h → keccak_f1600_x4_avx2.h} +11 -8
  49. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/fips202_native_x86_64.h +44 -0
  50. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/keccak_f1600_x4_avx2_asm.S +454 -0
  51. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/keccakf1600_constants.c +52 -0
  52. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/meta.h +37 -28
  53. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/aarch64_zetas.c +213 -196
  54. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/arith_native_aarch64.h +248 -64
  55. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/intt_aarch64_asm.S +753 -0
  56. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l4_aarch64_asm.S +129 -0
  57. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l5_aarch64_asm.S +145 -0
  58. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l7_aarch64_asm.S +177 -0
  59. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/ntt_aarch64_asm.S +653 -0
  60. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/pointwise_montgomery_aarch64_asm.S +84 -0
  61. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_caddq_aarch64_asm.S +53 -0
  62. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_chknorm_aarch64_asm.S +55 -0
  63. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_32_aarch64_asm.S +86 -0
  64. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_88_aarch64_asm.S +86 -0
  65. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_32_aarch64_asm.S +103 -0
  66. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_88_aarch64_asm.S +111 -0
  67. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_17_aarch64_asm.S +75 -0
  68. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_19_aarch64_asm.S +72 -0
  69. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_table.c +23 -11
  70. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_aarch64_asm.S +189 -0
  71. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta2_aarch64_asm.S +137 -0
  72. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta4_aarch64_asm.S +130 -0
  73. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta_table.c +520 -516
  74. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_table.c +34 -33
  75. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/api.h +202 -242
  76. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/meta.h +25 -17
  77. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/arith_native_x86_64.h +112 -28
  78. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.c +1 -1
  79. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/consts.h +1 -1
  80. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/intt_avx2_asm.S +2311 -0
  81. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/ntt_avx2_asm.S +2383 -0
  82. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/nttunpack_avx2_asm.S +238 -0
  83. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l4_avx2_asm.S +139 -0
  84. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l5_avx2_asm.S +155 -0
  85. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l7_avx2_asm.S +187 -0
  86. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_avx2_asm.S +130 -0
  87. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_caddq_avx2_asm.S +190 -0
  88. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_32_avx2.c +6 -4
  89. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_decompose_88_avx2.c +6 -4
  90. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_32_avx2.c +9 -8
  91. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_use_hint_88_avx2.c +10 -9
  92. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_17_avx2.c +8 -5
  93. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/polyz_unpack_19_avx2.c +8 -5
  94. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta2_avx2.c +6 -4
  95. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_eta4_avx2.c +6 -4
  96. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/rej_uniform_table.c +130 -129
  97. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.c +109 -180
  98. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/packing.h +169 -150
  99. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.c +56 -40
  100. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly.h +149 -164
  101. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.c +52 -57
  102. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/poly_kl.h +132 -167
  103. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.c +57 -424
  104. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec.h +167 -474
  105. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec_lazy.c +308 -0
  106. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/polyvec_lazy.h +653 -0
  107. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/reduce.h +22 -29
  108. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/rounding.h +37 -43
  109. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.c +511 -367
  110. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/sign.h +456 -417
  111. data/lib/pq_crypto/hybrid_kem.rb +1 -1
  112. data/lib/pq_crypto/internal.rb +23 -0
  113. data/lib/pq_crypto/kem.rb +27 -34
  114. data/lib/pq_crypto/pkcs8/der.rb +68 -0
  115. data/lib/pq_crypto/pkcs8/private_key_choice.rb +186 -0
  116. data/lib/pq_crypto/pkcs8.rb +51 -468
  117. data/lib/pq_crypto/serialization.rb +19 -29
  118. data/lib/pq_crypto/signature.rb +28 -35
  119. data/lib/pq_crypto/version.rb +1 -1
  120. data/lib/pq_crypto.rb +10 -0
  121. data/script/vendor_libs.rb +3 -3
  122. metadata +44 -35
  123. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_scalar_asm.S +0 -376
  124. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x1_v84a_asm.S +0 -204
  125. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x2_v84a_asm.S +0 -259
  126. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_asm.S +0 -1077
  127. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S +0 -987
  128. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.c +0 -488
  129. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/fips202/native/x86_64/src/KeccakP_1600_times4_SIMD256.h +0 -16
  130. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/intt.S +0 -753
  131. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l4.S +0 -129
  132. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l5.S +0 -145
  133. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/mld_polyvecl_pointwise_acc_montgomery_l7.S +0 -177
  134. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/ntt.S +0 -653
  135. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/pointwise_montgomery.S +0 -79
  136. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_caddq_asm.S +0 -53
  137. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_chknorm_asm.S +0 -55
  138. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_32_asm.S +0 -85
  139. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_decompose_88_asm.S +0 -85
  140. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_32_asm.S +0 -102
  141. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/poly_use_hint_88_asm.S +0 -110
  142. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_17_asm.S +0 -72
  143. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/polyz_unpack_19_asm.S +0 -69
  144. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_asm.S +0 -189
  145. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta2_asm.S +0 -135
  146. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/aarch64/src/rej_uniform_eta4_asm.S +0 -128
  147. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/intt.S +0 -2311
  148. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/ntt.S +0 -2383
  149. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/nttunpack.S +0 -239
  150. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise.S +0 -131
  151. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l4.S +0 -139
  152. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l5.S +0 -155
  153. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/pointwise_acc_l7.S +0 -187
  154. data/ext/pqcrypto/vendor/mldsa-native/mldsa/src/native/x86_64/src/poly_caddq_avx2.c +0 -61
@@ -1,187 +0,0 @@
1
- /*
2
- * Copyright (c) The mldsa-native project authors
3
- * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
4
- */
5
-
6
- /* References
7
- * ==========
8
- *
9
- * - [REF_AVX2]
10
- * CRYSTALS-Dilithium optimized AVX2 implementation
11
- * Bai, Ducas, Kiltz, Lepoint, Lyubashevsky, Schwabe, Seiler, Stehlé
12
- * https://github.com/pq-crystals/dilithium/tree/master/avx2
13
- */
14
-
15
- /*
16
- * This file is derived from the public domain
17
- * AVX2 Dilithium implementation @[REF_AVX2].
18
- */
19
-
20
- #include "../../../common.h"
21
- #if defined(MLD_ARITH_BACKEND_X86_64_DEFAULT) && \
22
- !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED) && \
23
- (defined(MLD_CONFIG_MULTILEVEL_WITH_SHARED) || MLDSA_L == 7)
24
-
25
- /*
26
- * WARNING: This file is auto-derived from the mldsa-native source file
27
- * dev/x86_64/src/pointwise_acc_l7.S using scripts/simpasm. Do not modify it directly.
28
- */
29
-
30
- #if defined(__ELF__)
31
- .section .note.GNU-stack,"",@progbits
32
- #endif
33
-
34
- .text
35
- .balign 4
36
- .global MLD_ASM_NAMESPACE(pointwise_acc_l7_avx2)
37
- MLD_ASM_FN_SYMBOL(pointwise_acc_l7_avx2)
38
-
39
- .cfi_startproc
40
- vmovdqa 0x20(%rcx), %ymm0
41
- vmovdqa (%rcx), %ymm1
42
- xorl %eax, %eax
43
-
44
- Lpointwise_acc_l7_avx2_looptop2:
45
- vmovdqa (%rsi), %ymm6
46
- vmovdqa 0x20(%rsi), %ymm8
47
- vmovdqa (%rdx), %ymm10
48
- vmovdqa 0x20(%rdx), %ymm12
49
- vpsrlq $0x20, %ymm6, %ymm7
50
- vpsrlq $0x20, %ymm8, %ymm9
51
- vmovshdup %ymm10, %ymm11 # ymm11 = ymm10[1,1,3,3,5,5,7,7]
52
- vmovshdup %ymm12, %ymm13 # ymm13 = ymm12[1,1,3,3,5,5,7,7]
53
- vpmuldq %ymm10, %ymm6, %ymm6
54
- vpmuldq %ymm11, %ymm7, %ymm7
55
- vpmuldq %ymm12, %ymm8, %ymm8
56
- vpmuldq %ymm13, %ymm9, %ymm9
57
- vmovdqa %ymm6, %ymm2
58
- vmovdqa %ymm7, %ymm3
59
- vmovdqa %ymm8, %ymm4
60
- vmovdqa %ymm9, %ymm5
61
- vmovdqa 0x400(%rsi), %ymm6
62
- vmovdqa 0x420(%rsi), %ymm8
63
- vmovdqa 0x400(%rdx), %ymm10
64
- vmovdqa 0x420(%rdx), %ymm12
65
- vpsrlq $0x20, %ymm6, %ymm7
66
- vpsrlq $0x20, %ymm8, %ymm9
67
- vmovshdup %ymm10, %ymm11 # ymm11 = ymm10[1,1,3,3,5,5,7,7]
68
- vmovshdup %ymm12, %ymm13 # ymm13 = ymm12[1,1,3,3,5,5,7,7]
69
- vpmuldq %ymm10, %ymm6, %ymm6
70
- vpmuldq %ymm11, %ymm7, %ymm7
71
- vpmuldq %ymm12, %ymm8, %ymm8
72
- vpmuldq %ymm13, %ymm9, %ymm9
73
- vpaddq %ymm2, %ymm6, %ymm2
74
- vpaddq %ymm3, %ymm7, %ymm3
75
- vpaddq %ymm4, %ymm8, %ymm4
76
- vpaddq %ymm5, %ymm9, %ymm5
77
- vmovdqa 0x800(%rsi), %ymm6
78
- vmovdqa 0x820(%rsi), %ymm8
79
- vmovdqa 0x800(%rdx), %ymm10
80
- vmovdqa 0x820(%rdx), %ymm12
81
- vpsrlq $0x20, %ymm6, %ymm7
82
- vpsrlq $0x20, %ymm8, %ymm9
83
- vmovshdup %ymm10, %ymm11 # ymm11 = ymm10[1,1,3,3,5,5,7,7]
84
- vmovshdup %ymm12, %ymm13 # ymm13 = ymm12[1,1,3,3,5,5,7,7]
85
- vpmuldq %ymm10, %ymm6, %ymm6
86
- vpmuldq %ymm11, %ymm7, %ymm7
87
- vpmuldq %ymm12, %ymm8, %ymm8
88
- vpmuldq %ymm13, %ymm9, %ymm9
89
- vpaddq %ymm2, %ymm6, %ymm2
90
- vpaddq %ymm3, %ymm7, %ymm3
91
- vpaddq %ymm4, %ymm8, %ymm4
92
- vpaddq %ymm5, %ymm9, %ymm5
93
- vmovdqa 0xc00(%rsi), %ymm6
94
- vmovdqa 0xc20(%rsi), %ymm8
95
- vmovdqa 0xc00(%rdx), %ymm10
96
- vmovdqa 0xc20(%rdx), %ymm12
97
- vpsrlq $0x20, %ymm6, %ymm7
98
- vpsrlq $0x20, %ymm8, %ymm9
99
- vmovshdup %ymm10, %ymm11 # ymm11 = ymm10[1,1,3,3,5,5,7,7]
100
- vmovshdup %ymm12, %ymm13 # ymm13 = ymm12[1,1,3,3,5,5,7,7]
101
- vpmuldq %ymm10, %ymm6, %ymm6
102
- vpmuldq %ymm11, %ymm7, %ymm7
103
- vpmuldq %ymm12, %ymm8, %ymm8
104
- vpmuldq %ymm13, %ymm9, %ymm9
105
- vpaddq %ymm2, %ymm6, %ymm2
106
- vpaddq %ymm3, %ymm7, %ymm3
107
- vpaddq %ymm4, %ymm8, %ymm4
108
- vpaddq %ymm5, %ymm9, %ymm5
109
- vmovdqa 0x1000(%rsi), %ymm6
110
- vmovdqa 0x1020(%rsi), %ymm8
111
- vmovdqa 0x1000(%rdx), %ymm10
112
- vmovdqa 0x1020(%rdx), %ymm12
113
- vpsrlq $0x20, %ymm6, %ymm7
114
- vpsrlq $0x20, %ymm8, %ymm9
115
- vmovshdup %ymm10, %ymm11 # ymm11 = ymm10[1,1,3,3,5,5,7,7]
116
- vmovshdup %ymm12, %ymm13 # ymm13 = ymm12[1,1,3,3,5,5,7,7]
117
- vpmuldq %ymm10, %ymm6, %ymm6
118
- vpmuldq %ymm11, %ymm7, %ymm7
119
- vpmuldq %ymm12, %ymm8, %ymm8
120
- vpmuldq %ymm13, %ymm9, %ymm9
121
- vpaddq %ymm2, %ymm6, %ymm2
122
- vpaddq %ymm3, %ymm7, %ymm3
123
- vpaddq %ymm4, %ymm8, %ymm4
124
- vpaddq %ymm5, %ymm9, %ymm5
125
- vmovdqa 0x1400(%rsi), %ymm6
126
- vmovdqa 0x1420(%rsi), %ymm8
127
- vmovdqa 0x1400(%rdx), %ymm10
128
- vmovdqa 0x1420(%rdx), %ymm12
129
- vpsrlq $0x20, %ymm6, %ymm7
130
- vpsrlq $0x20, %ymm8, %ymm9
131
- vmovshdup %ymm10, %ymm11 # ymm11 = ymm10[1,1,3,3,5,5,7,7]
132
- vmovshdup %ymm12, %ymm13 # ymm13 = ymm12[1,1,3,3,5,5,7,7]
133
- vpmuldq %ymm10, %ymm6, %ymm6
134
- vpmuldq %ymm11, %ymm7, %ymm7
135
- vpmuldq %ymm12, %ymm8, %ymm8
136
- vpmuldq %ymm13, %ymm9, %ymm9
137
- vpaddq %ymm2, %ymm6, %ymm2
138
- vpaddq %ymm3, %ymm7, %ymm3
139
- vpaddq %ymm4, %ymm8, %ymm4
140
- vpaddq %ymm5, %ymm9, %ymm5
141
- vmovdqa 0x1800(%rsi), %ymm6
142
- vmovdqa 0x1820(%rsi), %ymm8
143
- vmovdqa 0x1800(%rdx), %ymm10
144
- vmovdqa 0x1820(%rdx), %ymm12
145
- vpsrlq $0x20, %ymm6, %ymm7
146
- vpsrlq $0x20, %ymm8, %ymm9
147
- vmovshdup %ymm10, %ymm11 # ymm11 = ymm10[1,1,3,3,5,5,7,7]
148
- vmovshdup %ymm12, %ymm13 # ymm13 = ymm12[1,1,3,3,5,5,7,7]
149
- vpmuldq %ymm10, %ymm6, %ymm6
150
- vpmuldq %ymm11, %ymm7, %ymm7
151
- vpmuldq %ymm12, %ymm8, %ymm8
152
- vpmuldq %ymm13, %ymm9, %ymm9
153
- vpaddq %ymm2, %ymm6, %ymm2
154
- vpaddq %ymm3, %ymm7, %ymm3
155
- vpaddq %ymm4, %ymm8, %ymm4
156
- vpaddq %ymm5, %ymm9, %ymm5
157
- vpmuldq %ymm2, %ymm0, %ymm6
158
- vpmuldq %ymm3, %ymm0, %ymm7
159
- vpmuldq %ymm4, %ymm0, %ymm8
160
- vpmuldq %ymm5, %ymm0, %ymm9
161
- vpmuldq %ymm6, %ymm1, %ymm6
162
- vpmuldq %ymm7, %ymm1, %ymm7
163
- vpmuldq %ymm8, %ymm1, %ymm8
164
- vpmuldq %ymm9, %ymm1, %ymm9
165
- vpsubq %ymm6, %ymm2, %ymm2
166
- vpsubq %ymm7, %ymm3, %ymm3
167
- vpsubq %ymm8, %ymm4, %ymm4
168
- vpsubq %ymm9, %ymm5, %ymm5
169
- vpsrlq $0x20, %ymm2, %ymm2
170
- vmovshdup %ymm4, %ymm4 # ymm4 = ymm4[1,1,3,3,5,5,7,7]
171
- vpblendd $0xaa, %ymm3, %ymm2, %ymm2 # ymm2 = ymm2[0],ymm3[1],ymm2[2],ymm3[3],ymm2[4],ymm3[5],ymm2[6],ymm3[7]
172
- vpblendd $0xaa, %ymm5, %ymm4, %ymm4 # ymm4 = ymm4[0],ymm5[1],ymm4[2],ymm5[3],ymm4[4],ymm5[5],ymm4[6],ymm5[7]
173
- vmovdqa %ymm2, (%rdi)
174
- vmovdqa %ymm4, 0x20(%rdi)
175
- addq $0x40, %rsi
176
- addq $0x40, %rdx
177
- addq $0x40, %rdi
178
- addl $0x1, %eax
179
- cmpl $0x10, %eax
180
- jb Lpointwise_acc_l7_avx2_looptop2
181
- retq
182
- .cfi_endproc
183
-
184
- MLD_ASM_FN_SIZE(pointwise_acc_l7_avx2)
185
-
186
- #endif /* MLD_ARITH_BACKEND_X86_64_DEFAULT && !MLD_CONFIG_MULTILEVEL_NO_SHARED \
187
- && (MLD_CONFIG_MULTILEVEL_WITH_SHARED || MLDSA_L == 7) */
@@ -1,61 +0,0 @@
1
- /*
2
- * Copyright (c) The mldsa-native project authors
3
- * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
4
- */
5
-
6
- /* References
7
- * ==========
8
- *
9
- * - [REF_AVX2]
10
- * CRYSTALS-Dilithium optimized AVX2 implementation
11
- * Bai, Ducas, Kiltz, Lepoint, Lyubashevsky, Schwabe, Seiler, Stehlé
12
- * https://github.com/pq-crystals/dilithium/tree/master/avx2
13
- */
14
-
15
- /*
16
- * This file is derived from the public domain
17
- * AVX2 Dilithium implementation @[REF_AVX2].
18
- */
19
-
20
- #include "../../../common.h"
21
-
22
- #if defined(MLD_ARITH_BACKEND_X86_64_DEFAULT) && \
23
- !defined(MLD_CONFIG_MULTILEVEL_NO_SHARED)
24
-
25
- #include <immintrin.h>
26
- #include "arith_native_x86_64.h"
27
- #include "consts.h"
28
-
29
- /*************************************************
30
- * Name: mld_poly_caddq_avx2
31
- *
32
- * Description: For all coefficients of in/out polynomial add Q if
33
- * coefficient is negative.
34
- *
35
- * Arguments: - int32_t *r: pointer to input/output polynomial
36
- **************************************************/
37
- void mld_poly_caddq_avx2(int32_t *r)
38
- {
39
- unsigned int i;
40
- __m256i f, g;
41
- const __m256i q = _mm256_set1_epi32(MLDSA_Q);
42
- const __m256i zero = _mm256_setzero_si256();
43
- __m256i *rr = (__m256i *)r;
44
-
45
- for (i = 0; i < MLDSA_N / 8; i++)
46
- {
47
- f = _mm256_load_si256(&rr[i]);
48
- g = _mm256_cmpgt_epi32(zero, f);
49
- g = _mm256_and_si256(g, q);
50
- f = _mm256_add_epi32(f, g);
51
- _mm256_store_si256(&rr[i], f);
52
- }
53
- }
54
-
55
- #else /* MLD_ARITH_BACKEND_X86_64_DEFAULT && !MLD_CONFIG_MULTILEVEL_NO_SHARED \
56
- */
57
-
58
- MLD_EMPTY_CU(avx2_reduce)
59
-
60
- #endif /* !(MLD_ARITH_BACKEND_X86_64_DEFAULT && \
61
- !MLD_CONFIG_MULTILEVEL_NO_SHARED) */