pq_crypto 0.6.2 → 0.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/pqcrypto/pqcrypto_version.h +1 -1
  4. data/ext/pqcrypto/vendor/.vendored +4 -4
  5. data/ext/pqcrypto/vendor/mlkem-native/README.md +6 -3
  6. data/ext/pqcrypto/vendor/mlkem-native/RELEASE.md +22 -0
  7. data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native.c +77 -36
  8. data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native.h +135 -146
  9. data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native_asm.S +116 -72
  10. data/ext/pqcrypto/vendor/mlkem-native/mlkem/mlkem_native_config.h +351 -415
  11. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/cbmc.h +43 -20
  12. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/common.h +16 -8
  13. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/compress.c +57 -31
  14. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/compress.h +260 -349
  15. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/debug.h +17 -24
  16. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202.c +35 -37
  17. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202.h +43 -57
  18. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202x4.c +14 -15
  19. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/fips202x4.h +5 -4
  20. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/keccakf1600.c +42 -6
  21. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/fips202_native_aarch64.h +31 -20
  22. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/{keccak_f1600_x1_scalar_asm.S → keccak_f1600_x1_scalar_aarch64_asm.S} +10 -10
  23. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/{keccak_f1600_x1_v84a_asm.S → keccak_f1600_x1_v84a_aarch64_asm.S} +10 -10
  24. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/{keccak_f1600_x2_v84a_asm.S → keccak_f1600_x2_v84a_aarch64_asm.S} +10 -10
  25. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/{keccak_f1600_x4_v8a_scalar_hybrid_asm.S → keccak_f1600_x4_v8a_scalar_hybrid_aarch64_asm.S} +10 -10
  26. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/{keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S → keccak_f1600_x4_v8a_v84a_scalar_hybrid_aarch64_asm.S} +10 -10
  27. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/src/keccakf1600_round_constants.c +10 -9
  28. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x1_scalar.h +2 -1
  29. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x1_v84a.h +1 -1
  30. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x2_v84a.h +4 -2
  31. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x4_v8a_scalar.h +2 -2
  32. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/aarch64/x4_v8a_v84a_scalar.h +1 -1
  33. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/fips202_native_armv81m.h +2 -1
  34. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S +55 -9
  35. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/keccakf1600_round_constants.c +26 -25
  36. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/state_extract_bytes_x4_mve.S +58 -14
  37. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/armv81m/src/state_xor_bytes_x4_mve.S +57 -16
  38. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/auto.h +2 -1
  39. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/keccak_f1600_x4_avx2.h +2 -2
  40. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/src/fips202_native_x86_64.h +10 -7
  41. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/src/{keccak_f1600_x4_avx2.S → keccak_f1600_x4_avx2_asm.S} +13 -11
  42. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/fips202/native/x86_64/src/keccakf1600_constants.c +12 -11
  43. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/indcpa.c +167 -136
  44. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/indcpa.h +75 -68
  45. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/kem.h +135 -157
  46. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/meta.h +15 -13
  47. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/aarch64_zetas.c +143 -135
  48. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/arith_native_aarch64.h +52 -46
  49. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/{intt.S → intt_aarch64_asm.S} +10 -10
  50. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/{ntt.S → ntt_aarch64_asm.S} +10 -10
  51. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/{poly_mulcache_compute_asm.S → poly_mulcache_compute_aarch64_asm.S} +10 -10
  52. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/{poly_reduce_asm.S → poly_reduce_aarch64_asm.S} +10 -10
  53. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/{poly_tobytes_asm.S → poly_tobytes_aarch64_asm.S} +10 -10
  54. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/{poly_tomont_asm.S → poly_tomont_aarch64_asm.S} +10 -12
  55. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/{polyvec_basemul_acc_montgomery_cached_asm_k2.S → polyvec_basemul_acc_montgomery_cached_k2_aarch64_asm.S} +10 -10
  56. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/{polyvec_basemul_acc_montgomery_cached_asm_k3.S → polyvec_basemul_acc_montgomery_cached_k3_aarch64_asm.S} +10 -10
  57. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/{polyvec_basemul_acc_montgomery_cached_asm_k4.S → polyvec_basemul_acc_montgomery_cached_k4_aarch64_asm.S} +10 -10
  58. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/{rej_uniform_asm.S → rej_uniform_aarch64_asm.S} +12 -12
  59. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/aarch64/src/rej_uniform_table.c +514 -513
  60. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/api.h +254 -253
  61. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/meta.h +6 -1
  62. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/ppc64le/README.md +6 -0
  63. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/ppc64le/meta.h +77 -0
  64. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/ppc64le/src/arith_native_ppc64le.h +24 -0
  65. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/ppc64le/src/consts.c +299 -0
  66. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/ppc64le/src/consts.h +34 -0
  67. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/ppc64le/src/intt_ppc_asm.S +3222 -0
  68. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/ppc64le/src/ntt_ppc_asm.S +1651 -0
  69. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/ppc64le/src/poly_tomont_ppc_asm.S +294 -0
  70. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/ppc64le/src/reduce_ppc_asm.S +710 -0
  71. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/meta.h +5 -0
  72. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_debug.c +18 -16
  73. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_debug.h +19 -24
  74. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/riscv64/src/rv64v_poly.c +53 -65
  75. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/meta.h +20 -20
  76. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/arith_native_x86_64.h +106 -88
  77. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/compress_consts.c +45 -35
  78. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/compress_consts.h +8 -8
  79. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/consts.c +1 -1
  80. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/consts.h +1 -1
  81. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/{intt.S → intt_avx2_asm.S} +8 -8
  82. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/{ntt.S → ntt_avx2_asm.S} +8 -8
  83. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/{nttfrombytes.S → nttfrombytes_avx2_asm.S} +8 -8
  84. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/{ntttobytes.S → ntttobytes_avx2_asm.S} +8 -8
  85. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/{nttunpack.S → nttunpack_avx2_asm.S} +8 -8
  86. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/{poly_compress_d10.S → poly_compress_d10_avx2_asm.S} +9 -9
  87. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/{poly_compress_d11.S → poly_compress_d11_avx2_asm.S} +9 -9
  88. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/{poly_compress_d4.S → poly_compress_d4_avx2_asm.S} +9 -9
  89. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/{poly_compress_d5.S → poly_compress_d5_avx2_asm.S} +9 -9
  90. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/{poly_decompress_d10.S → poly_decompress_d10_avx2_asm.S} +9 -9
  91. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/{poly_decompress_d11.S → poly_decompress_d11_avx2_asm.S} +9 -9
  92. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/{poly_decompress_d4.S → poly_decompress_d4_avx2_asm.S} +9 -9
  93. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/{poly_decompress_d5.S → poly_decompress_d5_avx2_asm.S} +9 -9
  94. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/{mulcache_compute.S → poly_mulcache_compute_avx2_asm.S} +8 -8
  95. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/{polyvec_basemul_acc_montgomery_cached_asm_k2.S → polyvec_basemul_acc_montgomery_cached_k2_avx2_asm.S} +8 -8
  96. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/{polyvec_basemul_acc_montgomery_cached_asm_k3.S → polyvec_basemul_acc_montgomery_cached_k3_avx2_asm.S} +8 -8
  97. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/{polyvec_basemul_acc_montgomery_cached_asm_k4.S → polyvec_basemul_acc_montgomery_cached_k4_avx2_asm.S} +8 -8
  98. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/{reduce.S → reduce_avx2_asm.S} +8 -8
  99. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/{rej_uniform_asm.S → rej_uniform_avx2_asm.S} +9 -9
  100. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/rej_uniform_table.c +514 -513
  101. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/native/x86_64/src/{tomont.S → tomont_avx2_asm.S} +8 -8
  102. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly.c +61 -57
  103. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly.h +89 -116
  104. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly_k.c +31 -32
  105. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/poly_k.h +226 -301
  106. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/randombytes.h +21 -29
  107. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/sampling.c +68 -63
  108. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/sampling.h +37 -48
  109. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/sys.h +44 -2
  110. data/ext/pqcrypto/vendor/mlkem-native/mlkem/src/verify.h +141 -159
  111. data/lib/pq_crypto/version.rb +1 -1
  112. data/script/vendor_libs.rb +3 -3
  113. metadata +47 -38
@@ -0,0 +1,710 @@
1
+ /*
2
+ * Copyright (c) The mlkem-native project authors
3
+ * Copyright (c) IBM Corp. 2025, 2026
4
+ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
5
+ *
6
+ * Written by Danny Tsen <dtsen@us.ibm.com>
7
+ */
8
+ #include "../../../common.h"
9
+ #if defined(MLK_ARITH_BACKEND_PPC64LE_DEFAULT) && \
10
+ !defined(MLK_CONFIG_MULTILEVEL_NO_SHARED) && defined(__POWER8_VECTOR__)
11
+
12
+ /*
13
+ * WARNING: This file is auto-derived from the mlkem-native source file
14
+ * dev/ppc64le/src/reduce_ppc_asm.S using scripts/simpasm. Do not modify it directly.
15
+ */
16
+
17
+ .text
18
+ .balign 4
19
+ .global MLK_ASM_NAMESPACE(reduce_ppc_asm)
20
+ MLK_ASM_FN_SYMBOL(reduce_ppc_asm)
21
+
22
+ stdu 1, -224(1)
23
+ mflr 0
24
+ std 14, 96(1)
25
+ std 15, 104(1)
26
+ std 16, 112(1)
27
+ li 6, 128
28
+ li 7, 144
29
+ li 8, 160
30
+ li 9, 176
31
+ li 10, 192
32
+ stxvd2x 52, 6, 1
33
+ stxvd2x 53, 7, 1
34
+ stxvd2x 54, 8, 1
35
+ stxvd2x 55, 9, 1
36
+ stxvd2x 56, 10, 1
37
+ vxor 7, 7, 7
38
+ li 6, 16
39
+ li 7, 32
40
+ lxvd2x 35, 6, 4
41
+ lxvd2x 32, 7, 4
42
+ vspltisw 2, 13
43
+ vadduwm 2, 2, 2
44
+ vspltisw 4, 1
45
+ vsubuwm 5, 2, 4
46
+ vslw 1, 4, 5
47
+ li 4, -128
48
+ li 5, -112
49
+ li 6, -96
50
+ li 7, -80
51
+ li 8, -64
52
+ li 9, -48
53
+ li 10, -32
54
+ li 11, -16
55
+ li 14, 16
56
+ li 15, 32
57
+ li 16, 48
58
+ lxvd2x 40, 0, 3
59
+ lxvd2x 44, 14, 3
60
+ lxvd2x 48, 15, 3
61
+ lxvd2x 52, 16, 3
62
+ addi 3, 3, 64
63
+ vmulosh 6, 8, 0
64
+ vmulesh 5, 8, 0
65
+ vmulosh 11, 12, 0
66
+ vmulesh 10, 12, 0
67
+ vmulosh 15, 16, 0
68
+ vmulesh 14, 16, 0
69
+ vmulosh 19, 20, 0
70
+ vmulesh 18, 20, 0
71
+ xxmrglw 36, 37, 38
72
+ xxmrghw 37, 37, 38
73
+ xxmrglw 41, 42, 43
74
+ xxmrghw 42, 42, 43
75
+ xxmrglw 45, 46, 47
76
+ xxmrghw 46, 46, 47
77
+ xxmrglw 49, 50, 51
78
+ xxmrghw 50, 50, 51
79
+ vadduwm 4, 4, 1
80
+ vadduwm 5, 5, 1
81
+ vadduwm 9, 9, 1
82
+ vadduwm 10, 10, 1
83
+ vadduwm 13, 13, 1
84
+ vadduwm 14, 14, 1
85
+ vadduwm 17, 17, 1
86
+ vadduwm 18, 18, 1
87
+ vsraw 4, 4, 2
88
+ vsraw 5, 5, 2
89
+ vsraw 9, 9, 2
90
+ vsraw 10, 10, 2
91
+ vsraw 13, 13, 2
92
+ vsraw 14, 14, 2
93
+ vsraw 17, 17, 2
94
+ vsraw 18, 18, 2
95
+ vpkuwum 4, 5, 4
96
+ vsubuhm 4, 7, 4
97
+ vpkuwum 9, 10, 9
98
+ vsubuhm 9, 7, 9
99
+ vpkuwum 13, 14, 13
100
+ vsubuhm 13, 7, 13
101
+ vpkuwum 17, 18, 17
102
+ vsubuhm 17, 7, 17
103
+ vmladduhm 21, 4, 3, 8
104
+ vmladduhm 22, 9, 3, 12
105
+ vmladduhm 23, 13, 3, 16
106
+ vmladduhm 24, 17, 3, 20
107
+ lxvd2x 40, 0, 3
108
+ lxvd2x 44, 14, 3
109
+ lxvd2x 48, 15, 3
110
+ lxvd2x 52, 16, 3
111
+ addi 3, 3, 64
112
+ vmulosh 6, 8, 0
113
+ vmulesh 5, 8, 0
114
+ vmulosh 11, 12, 0
115
+ vmulesh 10, 12, 0
116
+ vmulosh 15, 16, 0
117
+ vmulesh 14, 16, 0
118
+ vmulosh 19, 20, 0
119
+ vmulesh 18, 20, 0
120
+ xxmrglw 36, 37, 38
121
+ xxmrghw 37, 37, 38
122
+ xxmrglw 41, 42, 43
123
+ xxmrghw 42, 42, 43
124
+ xxmrglw 45, 46, 47
125
+ xxmrghw 46, 46, 47
126
+ xxmrglw 49, 50, 51
127
+ xxmrghw 50, 50, 51
128
+ vadduwm 4, 4, 1
129
+ vadduwm 5, 5, 1
130
+ vadduwm 9, 9, 1
131
+ vadduwm 10, 10, 1
132
+ vadduwm 13, 13, 1
133
+ vadduwm 14, 14, 1
134
+ vadduwm 17, 17, 1
135
+ vadduwm 18, 18, 1
136
+ vsraw 4, 4, 2
137
+ vsraw 5, 5, 2
138
+ vsraw 9, 9, 2
139
+ vsraw 10, 10, 2
140
+ vsraw 13, 13, 2
141
+ vsraw 14, 14, 2
142
+ vsraw 17, 17, 2
143
+ vsraw 18, 18, 2
144
+ vpkuwum 4, 5, 4
145
+ vsubuhm 4, 7, 4
146
+ vpkuwum 9, 10, 9
147
+ vsubuhm 9, 7, 9
148
+ vpkuwum 13, 14, 13
149
+ vsubuhm 13, 7, 13
150
+ vpkuwum 17, 18, 17
151
+ vsubuhm 17, 7, 17
152
+ vmladduhm 4, 4, 3, 8
153
+ vmladduhm 9, 9, 3, 12
154
+ vmladduhm 13, 13, 3, 16
155
+ vmladduhm 17, 17, 3, 20
156
+ stxvd2x 53, 4, 3
157
+ stxvd2x 54, 5, 3
158
+ stxvd2x 55, 6, 3
159
+ stxvd2x 56, 7, 3
160
+ stxvd2x 36, 8, 3
161
+ stxvd2x 41, 9, 3
162
+ stxvd2x 45, 10, 3
163
+ stxvd2x 49, 11, 3
164
+ lxvd2x 40, 0, 3
165
+ lxvd2x 44, 14, 3
166
+ lxvd2x 48, 15, 3
167
+ lxvd2x 52, 16, 3
168
+ addi 3, 3, 64
169
+ vmulosh 6, 8, 0
170
+ vmulesh 5, 8, 0
171
+ vmulosh 11, 12, 0
172
+ vmulesh 10, 12, 0
173
+ vmulosh 15, 16, 0
174
+ vmulesh 14, 16, 0
175
+ vmulosh 19, 20, 0
176
+ vmulesh 18, 20, 0
177
+ xxmrglw 36, 37, 38
178
+ xxmrghw 37, 37, 38
179
+ xxmrglw 41, 42, 43
180
+ xxmrghw 42, 42, 43
181
+ xxmrglw 45, 46, 47
182
+ xxmrghw 46, 46, 47
183
+ xxmrglw 49, 50, 51
184
+ xxmrghw 50, 50, 51
185
+ vadduwm 4, 4, 1
186
+ vadduwm 5, 5, 1
187
+ vadduwm 9, 9, 1
188
+ vadduwm 10, 10, 1
189
+ vadduwm 13, 13, 1
190
+ vadduwm 14, 14, 1
191
+ vadduwm 17, 17, 1
192
+ vadduwm 18, 18, 1
193
+ vsraw 4, 4, 2
194
+ vsraw 5, 5, 2
195
+ vsraw 9, 9, 2
196
+ vsraw 10, 10, 2
197
+ vsraw 13, 13, 2
198
+ vsraw 14, 14, 2
199
+ vsraw 17, 17, 2
200
+ vsraw 18, 18, 2
201
+ vpkuwum 4, 5, 4
202
+ vsubuhm 4, 7, 4
203
+ vpkuwum 9, 10, 9
204
+ vsubuhm 9, 7, 9
205
+ vpkuwum 13, 14, 13
206
+ vsubuhm 13, 7, 13
207
+ vpkuwum 17, 18, 17
208
+ vsubuhm 17, 7, 17
209
+ vmladduhm 21, 4, 3, 8
210
+ vmladduhm 22, 9, 3, 12
211
+ vmladduhm 23, 13, 3, 16
212
+ vmladduhm 24, 17, 3, 20
213
+ lxvd2x 40, 0, 3
214
+ lxvd2x 44, 14, 3
215
+ lxvd2x 48, 15, 3
216
+ lxvd2x 52, 16, 3
217
+ addi 3, 3, 64
218
+ vmulosh 6, 8, 0
219
+ vmulesh 5, 8, 0
220
+ vmulosh 11, 12, 0
221
+ vmulesh 10, 12, 0
222
+ vmulosh 15, 16, 0
223
+ vmulesh 14, 16, 0
224
+ vmulosh 19, 20, 0
225
+ vmulesh 18, 20, 0
226
+ xxmrglw 36, 37, 38
227
+ xxmrghw 37, 37, 38
228
+ xxmrglw 41, 42, 43
229
+ xxmrghw 42, 42, 43
230
+ xxmrglw 45, 46, 47
231
+ xxmrghw 46, 46, 47
232
+ xxmrglw 49, 50, 51
233
+ xxmrghw 50, 50, 51
234
+ vadduwm 4, 4, 1
235
+ vadduwm 5, 5, 1
236
+ vadduwm 9, 9, 1
237
+ vadduwm 10, 10, 1
238
+ vadduwm 13, 13, 1
239
+ vadduwm 14, 14, 1
240
+ vadduwm 17, 17, 1
241
+ vadduwm 18, 18, 1
242
+ vsraw 4, 4, 2
243
+ vsraw 5, 5, 2
244
+ vsraw 9, 9, 2
245
+ vsraw 10, 10, 2
246
+ vsraw 13, 13, 2
247
+ vsraw 14, 14, 2
248
+ vsraw 17, 17, 2
249
+ vsraw 18, 18, 2
250
+ vpkuwum 4, 5, 4
251
+ vsubuhm 4, 7, 4
252
+ vpkuwum 9, 10, 9
253
+ vsubuhm 9, 7, 9
254
+ vpkuwum 13, 14, 13
255
+ vsubuhm 13, 7, 13
256
+ vpkuwum 17, 18, 17
257
+ vsubuhm 17, 7, 17
258
+ vmladduhm 4, 4, 3, 8
259
+ vmladduhm 9, 9, 3, 12
260
+ vmladduhm 13, 13, 3, 16
261
+ vmladduhm 17, 17, 3, 20
262
+ stxvd2x 53, 4, 3
263
+ stxvd2x 54, 5, 3
264
+ stxvd2x 55, 6, 3
265
+ stxvd2x 56, 7, 3
266
+ stxvd2x 36, 8, 3
267
+ stxvd2x 41, 9, 3
268
+ stxvd2x 45, 10, 3
269
+ stxvd2x 49, 11, 3
270
+ lxvd2x 40, 0, 3
271
+ lxvd2x 44, 14, 3
272
+ lxvd2x 48, 15, 3
273
+ lxvd2x 52, 16, 3
274
+ addi 3, 3, 64
275
+ vmulosh 6, 8, 0
276
+ vmulesh 5, 8, 0
277
+ vmulosh 11, 12, 0
278
+ vmulesh 10, 12, 0
279
+ vmulosh 15, 16, 0
280
+ vmulesh 14, 16, 0
281
+ vmulosh 19, 20, 0
282
+ vmulesh 18, 20, 0
283
+ xxmrglw 36, 37, 38
284
+ xxmrghw 37, 37, 38
285
+ xxmrglw 41, 42, 43
286
+ xxmrghw 42, 42, 43
287
+ xxmrglw 45, 46, 47
288
+ xxmrghw 46, 46, 47
289
+ xxmrglw 49, 50, 51
290
+ xxmrghw 50, 50, 51
291
+ vadduwm 4, 4, 1
292
+ vadduwm 5, 5, 1
293
+ vadduwm 9, 9, 1
294
+ vadduwm 10, 10, 1
295
+ vadduwm 13, 13, 1
296
+ vadduwm 14, 14, 1
297
+ vadduwm 17, 17, 1
298
+ vadduwm 18, 18, 1
299
+ vsraw 4, 4, 2
300
+ vsraw 5, 5, 2
301
+ vsraw 9, 9, 2
302
+ vsraw 10, 10, 2
303
+ vsraw 13, 13, 2
304
+ vsraw 14, 14, 2
305
+ vsraw 17, 17, 2
306
+ vsraw 18, 18, 2
307
+ vpkuwum 4, 5, 4
308
+ vsubuhm 4, 7, 4
309
+ vpkuwum 9, 10, 9
310
+ vsubuhm 9, 7, 9
311
+ vpkuwum 13, 14, 13
312
+ vsubuhm 13, 7, 13
313
+ vpkuwum 17, 18, 17
314
+ vsubuhm 17, 7, 17
315
+ vmladduhm 21, 4, 3, 8
316
+ vmladduhm 22, 9, 3, 12
317
+ vmladduhm 23, 13, 3, 16
318
+ vmladduhm 24, 17, 3, 20
319
+ lxvd2x 40, 0, 3
320
+ lxvd2x 44, 14, 3
321
+ lxvd2x 48, 15, 3
322
+ lxvd2x 52, 16, 3
323
+ addi 3, 3, 64
324
+ vmulosh 6, 8, 0
325
+ vmulesh 5, 8, 0
326
+ vmulosh 11, 12, 0
327
+ vmulesh 10, 12, 0
328
+ vmulosh 15, 16, 0
329
+ vmulesh 14, 16, 0
330
+ vmulosh 19, 20, 0
331
+ vmulesh 18, 20, 0
332
+ xxmrglw 36, 37, 38
333
+ xxmrghw 37, 37, 38
334
+ xxmrglw 41, 42, 43
335
+ xxmrghw 42, 42, 43
336
+ xxmrglw 45, 46, 47
337
+ xxmrghw 46, 46, 47
338
+ xxmrglw 49, 50, 51
339
+ xxmrghw 50, 50, 51
340
+ vadduwm 4, 4, 1
341
+ vadduwm 5, 5, 1
342
+ vadduwm 9, 9, 1
343
+ vadduwm 10, 10, 1
344
+ vadduwm 13, 13, 1
345
+ vadduwm 14, 14, 1
346
+ vadduwm 17, 17, 1
347
+ vadduwm 18, 18, 1
348
+ vsraw 4, 4, 2
349
+ vsraw 5, 5, 2
350
+ vsraw 9, 9, 2
351
+ vsraw 10, 10, 2
352
+ vsraw 13, 13, 2
353
+ vsraw 14, 14, 2
354
+ vsraw 17, 17, 2
355
+ vsraw 18, 18, 2
356
+ vpkuwum 4, 5, 4
357
+ vsubuhm 4, 7, 4
358
+ vpkuwum 9, 10, 9
359
+ vsubuhm 9, 7, 9
360
+ vpkuwum 13, 14, 13
361
+ vsubuhm 13, 7, 13
362
+ vpkuwum 17, 18, 17
363
+ vsubuhm 17, 7, 17
364
+ vmladduhm 4, 4, 3, 8
365
+ vmladduhm 9, 9, 3, 12
366
+ vmladduhm 13, 13, 3, 16
367
+ vmladduhm 17, 17, 3, 20
368
+ stxvd2x 53, 4, 3
369
+ stxvd2x 54, 5, 3
370
+ stxvd2x 55, 6, 3
371
+ stxvd2x 56, 7, 3
372
+ stxvd2x 36, 8, 3
373
+ stxvd2x 41, 9, 3
374
+ stxvd2x 45, 10, 3
375
+ stxvd2x 49, 11, 3
376
+ lxvd2x 40, 0, 3
377
+ lxvd2x 44, 14, 3
378
+ lxvd2x 48, 15, 3
379
+ lxvd2x 52, 16, 3
380
+ addi 3, 3, 64
381
+ vmulosh 6, 8, 0
382
+ vmulesh 5, 8, 0
383
+ vmulosh 11, 12, 0
384
+ vmulesh 10, 12, 0
385
+ vmulosh 15, 16, 0
386
+ vmulesh 14, 16, 0
387
+ vmulosh 19, 20, 0
388
+ vmulesh 18, 20, 0
389
+ xxmrglw 36, 37, 38
390
+ xxmrghw 37, 37, 38
391
+ xxmrglw 41, 42, 43
392
+ xxmrghw 42, 42, 43
393
+ xxmrglw 45, 46, 47
394
+ xxmrghw 46, 46, 47
395
+ xxmrglw 49, 50, 51
396
+ xxmrghw 50, 50, 51
397
+ vadduwm 4, 4, 1
398
+ vadduwm 5, 5, 1
399
+ vadduwm 9, 9, 1
400
+ vadduwm 10, 10, 1
401
+ vadduwm 13, 13, 1
402
+ vadduwm 14, 14, 1
403
+ vadduwm 17, 17, 1
404
+ vadduwm 18, 18, 1
405
+ vsraw 4, 4, 2
406
+ vsraw 5, 5, 2
407
+ vsraw 9, 9, 2
408
+ vsraw 10, 10, 2
409
+ vsraw 13, 13, 2
410
+ vsraw 14, 14, 2
411
+ vsraw 17, 17, 2
412
+ vsraw 18, 18, 2
413
+ vpkuwum 4, 5, 4
414
+ vsubuhm 4, 7, 4
415
+ vpkuwum 9, 10, 9
416
+ vsubuhm 9, 7, 9
417
+ vpkuwum 13, 14, 13
418
+ vsubuhm 13, 7, 13
419
+ vpkuwum 17, 18, 17
420
+ vsubuhm 17, 7, 17
421
+ vmladduhm 21, 4, 3, 8
422
+ vmladduhm 22, 9, 3, 12
423
+ vmladduhm 23, 13, 3, 16
424
+ vmladduhm 24, 17, 3, 20
425
+ lxvd2x 40, 0, 3
426
+ lxvd2x 44, 14, 3
427
+ lxvd2x 48, 15, 3
428
+ lxvd2x 52, 16, 3
429
+ addi 3, 3, 64
430
+ vmulosh 6, 8, 0
431
+ vmulesh 5, 8, 0
432
+ vmulosh 11, 12, 0
433
+ vmulesh 10, 12, 0
434
+ vmulosh 15, 16, 0
435
+ vmulesh 14, 16, 0
436
+ vmulosh 19, 20, 0
437
+ vmulesh 18, 20, 0
438
+ xxmrglw 36, 37, 38
439
+ xxmrghw 37, 37, 38
440
+ xxmrglw 41, 42, 43
441
+ xxmrghw 42, 42, 43
442
+ xxmrglw 45, 46, 47
443
+ xxmrghw 46, 46, 47
444
+ xxmrglw 49, 50, 51
445
+ xxmrghw 50, 50, 51
446
+ vadduwm 4, 4, 1
447
+ vadduwm 5, 5, 1
448
+ vadduwm 9, 9, 1
449
+ vadduwm 10, 10, 1
450
+ vadduwm 13, 13, 1
451
+ vadduwm 14, 14, 1
452
+ vadduwm 17, 17, 1
453
+ vadduwm 18, 18, 1
454
+ vsraw 4, 4, 2
455
+ vsraw 5, 5, 2
456
+ vsraw 9, 9, 2
457
+ vsraw 10, 10, 2
458
+ vsraw 13, 13, 2
459
+ vsraw 14, 14, 2
460
+ vsraw 17, 17, 2
461
+ vsraw 18, 18, 2
462
+ vpkuwum 4, 5, 4
463
+ vsubuhm 4, 7, 4
464
+ vpkuwum 9, 10, 9
465
+ vsubuhm 9, 7, 9
466
+ vpkuwum 13, 14, 13
467
+ vsubuhm 13, 7, 13
468
+ vpkuwum 17, 18, 17
469
+ vsubuhm 17, 7, 17
470
+ vmladduhm 4, 4, 3, 8
471
+ vmladduhm 9, 9, 3, 12
472
+ vmladduhm 13, 13, 3, 16
473
+ vmladduhm 17, 17, 3, 20
474
+ stxvd2x 53, 4, 3
475
+ stxvd2x 54, 5, 3
476
+ stxvd2x 55, 6, 3
477
+ stxvd2x 56, 7, 3
478
+ stxvd2x 36, 8, 3
479
+ stxvd2x 41, 9, 3
480
+ stxvd2x 45, 10, 3
481
+ stxvd2x 49, 11, 3
482
+ addi 3, 3, -512
483
+ vxor 9, 9, 9
484
+ vspltish 10, 15
485
+ vmr 11, 3
486
+ lxvd2x 44, 0, 3
487
+ lxvd2x 45, 14, 3
488
+ lxvd2x 46, 15, 3
489
+ lxvd2x 47, 16, 3
490
+ addi 3, 3, 64
491
+ vsrh 1, 12, 10
492
+ vsrh 0, 13, 10
493
+ vsrh 3, 14, 10
494
+ vsrh 2, 15, 10
495
+ vadduhm 7, 12, 11
496
+ vadduhm 8, 13, 11
497
+ vadduhm 5, 14, 11
498
+ vadduhm 6, 15, 11
499
+ vcmpequh 1, 1, 9
500
+ vcmpequh 0, 0, 9
501
+ vcmpequh 3, 3, 9
502
+ vcmpequh 2, 2, 9
503
+ xxsel 33, 39, 44, 33
504
+ xxsel 32, 40, 45, 32
505
+ xxsel 35, 37, 46, 35
506
+ xxsel 34, 38, 47, 34
507
+ stxvd2x 35, 10, 3
508
+ stxvd2x 34, 11, 3
509
+ stxvd2x 33, 8, 3
510
+ stxvd2x 32, 9, 3
511
+ lxvd2x 44, 0, 3
512
+ lxvd2x 45, 14, 3
513
+ lxvd2x 46, 15, 3
514
+ lxvd2x 47, 16, 3
515
+ addi 3, 3, 64
516
+ vsrh 1, 12, 10
517
+ vsrh 0, 13, 10
518
+ vsrh 3, 14, 10
519
+ vsrh 2, 15, 10
520
+ vadduhm 7, 12, 11
521
+ vadduhm 8, 13, 11
522
+ vadduhm 5, 14, 11
523
+ vadduhm 6, 15, 11
524
+ vcmpequh 1, 1, 9
525
+ vcmpequh 0, 0, 9
526
+ vcmpequh 3, 3, 9
527
+ vcmpequh 2, 2, 9
528
+ xxsel 33, 39, 44, 33
529
+ xxsel 32, 40, 45, 32
530
+ xxsel 35, 37, 46, 35
531
+ xxsel 34, 38, 47, 34
532
+ stxvd2x 35, 10, 3
533
+ stxvd2x 34, 11, 3
534
+ stxvd2x 33, 8, 3
535
+ stxvd2x 32, 9, 3
536
+ lxvd2x 44, 0, 3
537
+ lxvd2x 45, 14, 3
538
+ lxvd2x 46, 15, 3
539
+ lxvd2x 47, 16, 3
540
+ addi 3, 3, 64
541
+ vsrh 1, 12, 10
542
+ vsrh 0, 13, 10
543
+ vsrh 3, 14, 10
544
+ vsrh 2, 15, 10
545
+ vadduhm 7, 12, 11
546
+ vadduhm 8, 13, 11
547
+ vadduhm 5, 14, 11
548
+ vadduhm 6, 15, 11
549
+ vcmpequh 1, 1, 9
550
+ vcmpequh 0, 0, 9
551
+ vcmpequh 3, 3, 9
552
+ vcmpequh 2, 2, 9
553
+ xxsel 33, 39, 44, 33
554
+ xxsel 32, 40, 45, 32
555
+ xxsel 35, 37, 46, 35
556
+ xxsel 34, 38, 47, 34
557
+ stxvd2x 35, 10, 3
558
+ stxvd2x 34, 11, 3
559
+ stxvd2x 33, 8, 3
560
+ stxvd2x 32, 9, 3
561
+ lxvd2x 44, 0, 3
562
+ lxvd2x 45, 14, 3
563
+ lxvd2x 46, 15, 3
564
+ lxvd2x 47, 16, 3
565
+ addi 3, 3, 64
566
+ vsrh 1, 12, 10
567
+ vsrh 0, 13, 10
568
+ vsrh 3, 14, 10
569
+ vsrh 2, 15, 10
570
+ vadduhm 7, 12, 11
571
+ vadduhm 8, 13, 11
572
+ vadduhm 5, 14, 11
573
+ vadduhm 6, 15, 11
574
+ vcmpequh 1, 1, 9
575
+ vcmpequh 0, 0, 9
576
+ vcmpequh 3, 3, 9
577
+ vcmpequh 2, 2, 9
578
+ xxsel 33, 39, 44, 33
579
+ xxsel 32, 40, 45, 32
580
+ xxsel 35, 37, 46, 35
581
+ xxsel 34, 38, 47, 34
582
+ stxvd2x 35, 10, 3
583
+ stxvd2x 34, 11, 3
584
+ stxvd2x 33, 8, 3
585
+ stxvd2x 32, 9, 3
586
+ lxvd2x 44, 0, 3
587
+ lxvd2x 45, 14, 3
588
+ lxvd2x 46, 15, 3
589
+ lxvd2x 47, 16, 3
590
+ addi 3, 3, 64
591
+ vsrh 1, 12, 10
592
+ vsrh 0, 13, 10
593
+ vsrh 3, 14, 10
594
+ vsrh 2, 15, 10
595
+ vadduhm 7, 12, 11
596
+ vadduhm 8, 13, 11
597
+ vadduhm 5, 14, 11
598
+ vadduhm 6, 15, 11
599
+ vcmpequh 1, 1, 9
600
+ vcmpequh 0, 0, 9
601
+ vcmpequh 3, 3, 9
602
+ vcmpequh 2, 2, 9
603
+ xxsel 33, 39, 44, 33
604
+ xxsel 32, 40, 45, 32
605
+ xxsel 35, 37, 46, 35
606
+ xxsel 34, 38, 47, 34
607
+ stxvd2x 35, 10, 3
608
+ stxvd2x 34, 11, 3
609
+ stxvd2x 33, 8, 3
610
+ stxvd2x 32, 9, 3
611
+ lxvd2x 44, 0, 3
612
+ lxvd2x 45, 14, 3
613
+ lxvd2x 46, 15, 3
614
+ lxvd2x 47, 16, 3
615
+ addi 3, 3, 64
616
+ vsrh 1, 12, 10
617
+ vsrh 0, 13, 10
618
+ vsrh 3, 14, 10
619
+ vsrh 2, 15, 10
620
+ vadduhm 7, 12, 11
621
+ vadduhm 8, 13, 11
622
+ vadduhm 5, 14, 11
623
+ vadduhm 6, 15, 11
624
+ vcmpequh 1, 1, 9
625
+ vcmpequh 0, 0, 9
626
+ vcmpequh 3, 3, 9
627
+ vcmpequh 2, 2, 9
628
+ xxsel 33, 39, 44, 33
629
+ xxsel 32, 40, 45, 32
630
+ xxsel 35, 37, 46, 35
631
+ xxsel 34, 38, 47, 34
632
+ stxvd2x 35, 10, 3
633
+ stxvd2x 34, 11, 3
634
+ stxvd2x 33, 8, 3
635
+ stxvd2x 32, 9, 3
636
+ lxvd2x 44, 0, 3
637
+ lxvd2x 45, 14, 3
638
+ lxvd2x 46, 15, 3
639
+ lxvd2x 47, 16, 3
640
+ addi 3, 3, 64
641
+ vsrh 1, 12, 10
642
+ vsrh 0, 13, 10
643
+ vsrh 3, 14, 10
644
+ vsrh 2, 15, 10
645
+ vadduhm 7, 12, 11
646
+ vadduhm 8, 13, 11
647
+ vadduhm 5, 14, 11
648
+ vadduhm 6, 15, 11
649
+ vcmpequh 1, 1, 9
650
+ vcmpequh 0, 0, 9
651
+ vcmpequh 3, 3, 9
652
+ vcmpequh 2, 2, 9
653
+ xxsel 33, 39, 44, 33
654
+ xxsel 32, 40, 45, 32
655
+ xxsel 35, 37, 46, 35
656
+ xxsel 34, 38, 47, 34
657
+ stxvd2x 35, 10, 3
658
+ stxvd2x 34, 11, 3
659
+ stxvd2x 33, 8, 3
660
+ stxvd2x 32, 9, 3
661
+ lxvd2x 44, 0, 3
662
+ lxvd2x 45, 14, 3
663
+ lxvd2x 46, 15, 3
664
+ lxvd2x 47, 16, 3
665
+ addi 3, 3, 64
666
+ vsrh 1, 12, 10
667
+ vsrh 0, 13, 10
668
+ vsrh 3, 14, 10
669
+ vsrh 2, 15, 10
670
+ vadduhm 7, 12, 11
671
+ vadduhm 8, 13, 11
672
+ vadduhm 5, 14, 11
673
+ vadduhm 6, 15, 11
674
+ vcmpequh 1, 1, 9
675
+ vcmpequh 0, 0, 9
676
+ vcmpequh 3, 3, 9
677
+ vcmpequh 2, 2, 9
678
+ xxsel 33, 39, 44, 33
679
+ xxsel 32, 40, 45, 32
680
+ xxsel 35, 37, 46, 35
681
+ xxsel 34, 38, 47, 34
682
+ stxvd2x 35, 10, 3
683
+ stxvd2x 34, 11, 3
684
+ stxvd2x 33, 8, 3
685
+ stxvd2x 32, 9, 3
686
+ ld 14, 96(1)
687
+ ld 15, 104(1)
688
+ ld 16, 112(1)
689
+ li 6, 128
690
+ li 7, 144
691
+ li 8, 160
692
+ li 9, 176
693
+ li 10, 192
694
+ lxvd2x 52, 6, 1
695
+ lxvd2x 53, 7, 1
696
+ lxvd2x 54, 8, 1
697
+ lxvd2x 55, 9, 1
698
+ lxvd2x 56, 10, 1
699
+ mtlr 0
700
+ addi 1, 1, 224
701
+ blr
702
+
703
+ MLK_ASM_FN_SIZE(reduce_ppc_asm)
704
+
705
+ #endif /* MLK_ARITH_BACKEND_PPC64LE_DEFAULT && !MLK_CONFIG_MULTILEVEL_NO_SHARED \
706
+ && __POWER8_VECTOR__ */
707
+
708
+ #if defined(__ELF__)
709
+ .section .note.GNU-stack,"",%progbits
710
+ #endif
@@ -65,6 +65,11 @@ static MLK_INLINE int mlk_rej_uniform_native(int16_t *r, unsigned len,
65
65
  const uint8_t *buf,
66
66
  unsigned buflen)
67
67
  {
68
+ /* mlk_rv64v_rej_uniform requires buflen to be a multiple of 12 */
69
+ if (buflen % 12 != 0)
70
+ {
71
+ return MLK_NATIVE_FUNC_FALLBACK;
72
+ }
68
73
  /* The cast from unsigned to signed integer is safe
69
74
  * because the return value is <= len, which we asssume
70
75
  * to be bound by 4096 and hence <= INT_MAX. */