whispercpp 1.3.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +5 -0
  3. data/LICENSE +1 -1
  4. data/README.md +165 -434
  5. data/Rakefile +60 -11
  6. data/ext/.gitignore +13 -0
  7. data/ext/cpu.mk +9 -0
  8. data/ext/{dr_wav.h → examples/dr_wav.h} +3560 -1179
  9. data/ext/extconf.rb +185 -16
  10. data/ext/ggml/include/ggml-alloc.h +76 -0
  11. data/ext/ggml/include/ggml-backend.h +352 -0
  12. data/ext/ggml/include/ggml-blas.h +25 -0
  13. data/ext/ggml/include/ggml-cann.h +123 -0
  14. data/ext/ggml/include/ggml-cpp.h +38 -0
  15. data/ext/ggml/include/ggml-cpu.h +135 -0
  16. data/ext/ggml/include/ggml-cuda.h +47 -0
  17. data/ext/ggml/include/ggml-kompute.h +50 -0
  18. data/ext/ggml/include/ggml-metal.h +66 -0
  19. data/ext/ggml/include/ggml-opencl.h +26 -0
  20. data/ext/ggml/include/ggml-opt.h +216 -0
  21. data/ext/ggml/include/ggml-rpc.h +28 -0
  22. data/ext/ggml/include/ggml-sycl.h +49 -0
  23. data/ext/ggml/include/ggml-vulkan.h +31 -0
  24. data/ext/{ggml.h → ggml/include/ggml.h} +479 -596
  25. data/ext/ggml/src/ggml-alloc.c +1037 -0
  26. data/ext/ggml/src/ggml-amx/common.h +94 -0
  27. data/ext/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
  28. data/ext/ggml/src/ggml-amx/mmq.cpp +2510 -0
  29. data/ext/ggml/src/ggml-amx/mmq.h +17 -0
  30. data/ext/ggml/src/ggml-backend-impl.h +256 -0
  31. data/ext/ggml/src/ggml-backend-reg.cpp +552 -0
  32. data/ext/ggml/src/ggml-backend.cpp +1999 -0
  33. data/ext/ggml/src/ggml-blas/ggml-blas.cpp +517 -0
  34. data/ext/ggml/src/ggml-cann/acl_tensor.cpp +175 -0
  35. data/ext/ggml/src/ggml-cann/acl_tensor.h +258 -0
  36. data/ext/ggml/src/ggml-cann/aclnn_ops.cpp +3427 -0
  37. data/ext/ggml/src/ggml-cann/aclnn_ops.h +592 -0
  38. data/ext/ggml/src/ggml-cann/common.h +286 -0
  39. data/ext/ggml/src/ggml-cann/ggml-cann.cpp +2188 -0
  40. data/ext/ggml/src/ggml-cann/kernels/ascendc_kernels.h +19 -0
  41. data/ext/ggml/src/ggml-cann/kernels/dup.cpp +236 -0
  42. data/ext/ggml/src/ggml-cann/kernels/get_row_f16.cpp +197 -0
  43. data/ext/ggml/src/ggml-cann/kernels/get_row_f32.cpp +190 -0
  44. data/ext/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +204 -0
  45. data/ext/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +191 -0
  46. data/ext/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +218 -0
  47. data/ext/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +216 -0
  48. data/ext/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +295 -0
  49. data/ext/ggml/src/ggml-common.h +1853 -0
  50. data/ext/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
  51. data/ext/ggml/src/ggml-cpu/amx/amx.h +8 -0
  52. data/ext/ggml/src/ggml-cpu/amx/common.h +91 -0
  53. data/ext/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
  54. data/ext/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  55. data/ext/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
  56. data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +4262 -0
  57. data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
  58. data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
  59. data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
  60. data/ext/ggml/src/ggml-cpu/ggml-cpu-impl.h +386 -0
  61. data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.c +10835 -0
  62. data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
  63. data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
  64. data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
  65. data/ext/ggml/src/ggml-cpu/ggml-cpu.c +14123 -0
  66. data/ext/ggml/src/ggml-cpu/ggml-cpu.cpp +622 -0
  67. data/ext/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1884 -0
  68. data/ext/ggml/src/ggml-cpu/llamafile/sgemm.h +14 -0
  69. data/ext/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
  70. data/ext/ggml/src/ggml-cuda/vendors/hip.h +186 -0
  71. data/ext/ggml/src/ggml-cuda/vendors/musa.h +134 -0
  72. data/ext/ggml/src/ggml-impl.h +556 -0
  73. data/ext/ggml/src/ggml-kompute/ggml-kompute.cpp +2251 -0
  74. data/ext/ggml/src/ggml-metal/ggml-metal-impl.h +288 -0
  75. data/ext/ggml/src/ggml-metal/ggml-metal.m +4884 -0
  76. data/ext/ggml/src/ggml-metal/ggml-metal.metal +6732 -0
  77. data/ext/ggml/src/ggml-opt.cpp +854 -0
  78. data/ext/ggml/src/ggml-quants.c +5238 -0
  79. data/ext/ggml/src/ggml-quants.h +100 -0
  80. data/ext/ggml/src/ggml-rpc/ggml-rpc.cpp +1406 -0
  81. data/ext/ggml/src/ggml-sycl/common.cpp +95 -0
  82. data/ext/ggml/src/ggml-sycl/concat.cpp +196 -0
  83. data/ext/ggml/src/ggml-sycl/conv.cpp +99 -0
  84. data/ext/ggml/src/ggml-sycl/convert.cpp +547 -0
  85. data/ext/ggml/src/ggml-sycl/dmmv.cpp +1023 -0
  86. data/ext/ggml/src/ggml-sycl/element_wise.cpp +1030 -0
  87. data/ext/ggml/src/ggml-sycl/ggml-sycl.cpp +4729 -0
  88. data/ext/ggml/src/ggml-sycl/im2col.cpp +126 -0
  89. data/ext/ggml/src/ggml-sycl/mmq.cpp +3031 -0
  90. data/ext/ggml/src/ggml-sycl/mmvq.cpp +1015 -0
  91. data/ext/ggml/src/ggml-sycl/norm.cpp +378 -0
  92. data/ext/ggml/src/ggml-sycl/outprod.cpp +56 -0
  93. data/ext/ggml/src/ggml-sycl/rope.cpp +276 -0
  94. data/ext/ggml/src/ggml-sycl/softmax.cpp +251 -0
  95. data/ext/ggml/src/ggml-sycl/tsembd.cpp +72 -0
  96. data/ext/ggml/src/ggml-sycl/wkv6.cpp +141 -0
  97. data/ext/ggml/src/ggml-threading.cpp +12 -0
  98. data/ext/ggml/src/ggml-threading.h +14 -0
  99. data/ext/ggml/src/ggml-vulkan/ggml-vulkan.cpp +8657 -0
  100. data/ext/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +593 -0
  101. data/ext/ggml/src/ggml.c +7694 -0
  102. data/ext/{whisper.h → include/whisper.h} +23 -22
  103. data/ext/metal-embed.mk +17 -0
  104. data/ext/metal.mk +6 -0
  105. data/ext/ruby_whisper.cpp +1492 -9
  106. data/ext/ruby_whisper.h +10 -0
  107. data/ext/scripts/get-flags.mk +38 -0
  108. data/ext/src/coreml/whisper-decoder-impl.h +146 -0
  109. data/ext/src/coreml/whisper-decoder-impl.m +201 -0
  110. data/ext/src/coreml/whisper-encoder-impl.h +142 -0
  111. data/ext/src/coreml/whisper-encoder-impl.m +197 -0
  112. data/ext/src/coreml/whisper-encoder.h +26 -0
  113. data/ext/src/openvino/whisper-openvino-encoder.cpp +108 -0
  114. data/ext/src/openvino/whisper-openvino-encoder.h +31 -0
  115. data/ext/{whisper.cpp → src/whisper.cpp} +661 -492
  116. data/extsources.rb +6 -0
  117. data/lib/whisper/model/uri.rb +157 -0
  118. data/lib/whisper.rb +2 -0
  119. data/tests/helper.rb +7 -0
  120. data/tests/jfk_reader/.gitignore +5 -0
  121. data/tests/jfk_reader/extconf.rb +3 -0
  122. data/tests/jfk_reader/jfk_reader.c +68 -0
  123. data/tests/test_callback.rb +160 -0
  124. data/tests/test_error.rb +20 -0
  125. data/tests/test_model.rb +71 -0
  126. data/tests/test_package.rb +31 -0
  127. data/tests/test_params.rb +160 -0
  128. data/tests/test_segment.rb +83 -0
  129. data/tests/test_whisper.rb +211 -123
  130. data/whispercpp.gemspec +36 -0
  131. metadata +137 -11
  132. data/ext/ggml.c +0 -21755
@@ -0,0 +1,10 @@
1
+ #pragma once
2
+ #include "common.h"
3
+
4
+ size_t ggml_backend_amx_desired_wsize(const struct ggml_tensor * dst);
5
+
6
+ size_t ggml_backend_amx_get_alloc_size(const struct ggml_tensor * tensor);
7
+
8
+ void ggml_backend_amx_convert_weight(struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
9
+
10
+ void ggml_backend_amx_mul_mat(const struct ggml_compute_params * params, struct ggml_tensor * dst);
@@ -0,0 +1,323 @@
1
+ #include "ggml-backend-impl.h"
2
+
3
+ #if defined(__x86_64__) || (defined(_MSC_VER) && defined(_M_AMD64))
4
+
5
+ #ifdef _MSC_VER
6
+ #include <intrin.h>
7
+ #endif
8
+
9
+ #include <cstring>
10
+ #include <vector>
11
+ #include <bitset>
12
+ #include <array>
13
+ #include <string>
14
+
15
+ // ref: https://cdrdv2-public.intel.com/782156/325383-sdm-vol-2abcd.pdf
16
+ struct cpuid_x86 {
17
+ bool SSE3(void) { return f_1_ecx[0]; }
18
+ bool PCLMULQDQ(void) { return f_1_ecx[1]; }
19
+ bool MONITOR(void) { return f_1_ecx[3]; }
20
+ bool SSSE3(void) { return f_1_ecx[9]; }
21
+ bool FMA(void) { return f_1_ecx[12]; }
22
+ bool CMPXCHG16B(void) { return f_1_ecx[13]; }
23
+ bool SSE41(void) { return f_1_ecx[19]; }
24
+ bool SSE42(void) { return f_1_ecx[20]; }
25
+ bool MOVBE(void) { return f_1_ecx[22]; }
26
+ bool POPCNT(void) { return f_1_ecx[23]; }
27
+ bool AES(void) { return f_1_ecx[25]; }
28
+ bool XSAVE(void) { return f_1_ecx[26]; }
29
+ bool OSXSAVE(void) { return f_1_ecx[27]; }
30
+ bool AVX(void) { return f_1_ecx[28]; }
31
+ bool F16C(void) { return f_1_ecx[29]; }
32
+ bool RDRAND(void) { return f_1_ecx[30]; }
33
+
34
+ bool MSR(void) { return f_1_edx[5]; }
35
+ bool CX8(void) { return f_1_edx[8]; }
36
+ bool SEP(void) { return f_1_edx[11]; }
37
+ bool CMOV(void) { return f_1_edx[15]; }
38
+ bool CLFSH(void) { return f_1_edx[19]; }
39
+ bool MMX(void) { return f_1_edx[23]; }
40
+ bool FXSR(void) { return f_1_edx[24]; }
41
+ bool SSE(void) { return f_1_edx[25]; }
42
+ bool SSE2(void) { return f_1_edx[26]; }
43
+
44
+ bool FSGSBASE(void) { return f_7_ebx[0]; }
45
+ bool BMI1(void) { return f_7_ebx[3]; }
46
+ bool HLE(void) { return is_intel && f_7_ebx[4]; }
47
+ bool AVX2(void) { return f_7_ebx[5]; }
48
+ bool BMI2(void) { return f_7_ebx[8]; }
49
+ bool ERMS(void) { return f_7_ebx[9]; }
50
+ bool INVPCID(void) { return f_7_ebx[10]; }
51
+ bool RTM(void) { return is_intel && f_7_ebx[11]; }
52
+ bool AVX512F(void) { return f_7_ebx[16]; }
53
+ bool AVX512DQ(void) { return f_7_ebx[17]; }
54
+ bool RDSEED(void) { return f_7_ebx[18]; }
55
+ bool ADX(void) { return f_7_ebx[19]; }
56
+ bool AVX512PF(void) { return f_7_ebx[26]; }
57
+ bool AVX512ER(void) { return f_7_ebx[27]; }
58
+ bool AVX512CD(void) { return f_7_ebx[28]; }
59
+ bool AVX512BW(void) { return f_7_ebx[30]; }
60
+ bool AVX512VL(void) { return f_7_ebx[31]; }
61
+
62
+ bool SHA(void) { return f_7_ebx[29]; }
63
+
64
+ bool PREFETCHWT1(void) { return f_7_ecx[0]; }
65
+
66
+ bool LAHF(void) { return f_81_ecx[0]; }
67
+ bool LZCNT(void) { return is_intel && f_81_ecx[5]; }
68
+ bool ABM(void) { return is_amd && f_81_ecx[5]; }
69
+ bool SSE4a(void) { return is_amd && f_81_ecx[6]; }
70
+ bool XOP(void) { return is_amd && f_81_ecx[11]; }
71
+ bool TBM(void) { return is_amd && f_81_ecx[21]; }
72
+
73
+ bool SYSCALL(void) { return is_intel && f_81_edx[11]; }
74
+ bool MMXEXT(void) { return is_amd && f_81_edx[22]; }
75
+ bool RDTSCP(void) { return is_intel && f_81_edx[27]; }
76
+ bool _3DNOWEXT(void) { return is_amd && f_81_edx[30]; }
77
+ bool _3DNOW(void) { return is_amd && f_81_edx[31]; }
78
+
79
+ bool AVX512_VBMI(void) { return f_7_ecx[1]; }
80
+ bool AVX512_VNNI(void) { return f_7_ecx[11]; }
81
+ bool AVX512_FP16(void) { return f_7_edx[23]; }
82
+ bool AVX512_BF16(void) { return f_7_1_eax[5]; }
83
+ bool AVX_VNNI(void) { return f_7_1_eax[4]; }
84
+
85
+ bool AMX_TILE(void) { return f_7_edx[24]; }
86
+ bool AMX_INT8(void) { return f_7_edx[25]; }
87
+ bool AMX_FP16(void) { return f_7_1_eax[21]; }
88
+ bool AMX_BF16(void) { return f_7_edx[22]; }
89
+
90
+ #ifdef _MSC_VER
91
+ static void cpuid(int cpu_info[4], int eax) {
92
+ __cpuid(cpu_info, eax);
93
+ }
94
+ static void cpuidex(int cpu_info[4], int eax, int ecx) {
95
+ __cpuidex(cpu_info, eax, ecx);
96
+ }
97
+ #else
98
+ static void cpuid(int cpu_info[4], int eax) {
99
+ __asm__ __volatile__(
100
+ "cpuid"
101
+ : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
102
+ : "a"(eax), "c"(0));
103
+ }
104
+ static void cpuidex(int cpu_info[4], int eax, int ecx) {
105
+ __asm__ __volatile__(
106
+ "cpuid"
107
+ : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
108
+ : "a"(eax), "c"(ecx));
109
+ }
110
+ #endif
111
+
112
+ cpuid_x86() {
113
+ std::array<int, 4> cpui;
114
+ std::vector<std::array<int, 4>> data;
115
+
116
+ // calling __cpuid with 0x0 as the function_id argument
117
+ // gets the number of the highest valid function ID.
118
+ cpuid(cpui.data(), 0);
119
+ int n_ids = cpui[0];
120
+
121
+ for (int i = 0; i <= n_ids; ++i) {
122
+ cpuidex(cpui.data(), i, 0);
123
+ data.push_back(cpui);
124
+ }
125
+
126
+ // capture vendor string
127
+ char vendor[0x20] = {};
128
+ *reinterpret_cast<int *>(vendor) = data[0][1];
129
+ *reinterpret_cast<int *>(vendor + 4) = data[0][3];
130
+ *reinterpret_cast<int *>(vendor + 8) = data[0][2];
131
+ this->vendor = vendor;
132
+ if (this->vendor == "GenuineIntel") {
133
+ is_intel = true;
134
+ } else if (this->vendor == "AuthenticAMD") {
135
+ is_amd = true;
136
+ }
137
+
138
+ // load bitset with flags for function 0x00000001
139
+ if (n_ids >= 1) {
140
+ f_1_ecx = data[1][2];
141
+ f_1_edx = data[1][3];
142
+ }
143
+
144
+ // load bitset with flags for function 0x00000007
145
+ if (n_ids >= 7) {
146
+ f_7_ebx = data[7][1];
147
+ f_7_ecx = data[7][2];
148
+ f_7_edx = data[7][3];
149
+ cpuidex(cpui.data(), 7, 1);
150
+ f_7_1_eax = cpui[0];
151
+ }
152
+
153
+ // calling __cpuid with 0x80000000 as the function_id argument
154
+ // gets the number of the highest valid extended ID.
155
+ cpuid(cpui.data(), 0x80000000);
156
+ unsigned int n_ex_ids = cpui[0];
157
+
158
+ std::vector<std::array<int, 4>> ext_data;
159
+ for (unsigned int i = 0x80000000; i <= n_ex_ids; ++i) {
160
+ cpuidex(cpui.data(), i, 0);
161
+ ext_data.push_back(cpui);
162
+ }
163
+
164
+ // load bitset with flags for function 0x80000001
165
+ if (n_ex_ids >= 0x80000001) {
166
+ f_81_ecx = ext_data[1][2];
167
+ f_81_edx = ext_data[1][3];
168
+ }
169
+
170
+ // interpret CPU brand string if reported
171
+ char brand[0x40] = {};
172
+ if (n_ex_ids >= 0x80000004) {
173
+ std::memcpy(brand, ext_data[2].data(), sizeof(cpui));
174
+ std::memcpy(brand + 16, ext_data[3].data(), sizeof(cpui));
175
+ std::memcpy(brand + 32, ext_data[4].data(), sizeof(cpui));
176
+ this->brand = brand;
177
+ }
178
+ }
179
+
180
+ bool is_intel = false;
181
+ bool is_amd = false;
182
+ std::string vendor;
183
+ std::string brand;
184
+ std::bitset<32> f_1_ecx;
185
+ std::bitset<32> f_1_edx;
186
+ std::bitset<32> f_7_ebx;
187
+ std::bitset<32> f_7_ecx;
188
+ std::bitset<32> f_7_edx;
189
+ std::bitset<32> f_7_1_eax;
190
+ std::bitset<32> f_81_ecx;
191
+ std::bitset<32> f_81_edx;
192
+ };
193
+
194
+ #if 0
195
+ void test_x86_is() {
196
+ cpuid_x86 is;
197
+ printf("CPU Vendor: %s\n", is.vendor.c_str());
198
+ printf("Brand: %s\n", is.brand.c_str());
199
+ printf("is_intel: %d\n", is.is_intel);
200
+ printf("is_amd: %d\n", is.is_amd);
201
+ printf("sse3: %d\n", is.SSE3());
202
+ printf("pclmulqdq: %d\n", is.PCLMULQDQ());
203
+ printf("ssse3: %d\n", is.SSSE3());
204
+ printf("fma: %d\n", is.FMA());
205
+ printf("cmpxchg16b: %d\n", is.CMPXCHG16B());
206
+ printf("sse41: %d\n", is.SSE41());
207
+ printf("sse42: %d\n", is.SSE42());
208
+ printf("movbe: %d\n", is.MOVBE());
209
+ printf("popcnt: %d\n", is.POPCNT());
210
+ printf("aes: %d\n", is.AES());
211
+ printf("xsave: %d\n", is.XSAVE());
212
+ printf("osxsave: %d\n", is.OSXSAVE());
213
+ printf("avx: %d\n", is.AVX());
214
+ printf("f16c: %d\n", is.F16C());
215
+ printf("rdrand: %d\n", is.RDRAND());
216
+ printf("msr: %d\n", is.MSR());
217
+ printf("cx8: %d\n", is.CX8());
218
+ printf("sep: %d\n", is.SEP());
219
+ printf("cmov: %d\n", is.CMOV());
220
+ printf("clflush: %d\n", is.CLFSH());
221
+ printf("mmx: %d\n", is.MMX());
222
+ printf("fxsr: %d\n", is.FXSR());
223
+ printf("sse: %d\n", is.SSE());
224
+ printf("sse2: %d\n", is.SSE2());
225
+ printf("fsgsbase: %d\n", is.FSGSBASE());
226
+ printf("bmi1: %d\n", is.BMI1());
227
+ printf("hle: %d\n", is.HLE());
228
+ printf("avx2: %d\n", is.AVX2());
229
+ printf("bmi2: %d\n", is.BMI2());
230
+ printf("erms: %d\n", is.ERMS());
231
+ printf("invpcid: %d\n", is.INVPCID());
232
+ printf("rtm: %d\n", is.RTM());
233
+ printf("avx512f: %d\n", is.AVX512F());
234
+ printf("rdseed: %d\n", is.RDSEED());
235
+ printf("adx: %d\n", is.ADX());
236
+ printf("avx512pf: %d\n", is.AVX512PF());
237
+ printf("avx512er: %d\n", is.AVX512ER());
238
+ printf("avx512cd: %d\n", is.AVX512CD());
239
+ printf("sha: %d\n", is.SHA());
240
+ printf("prefetchwt1: %d\n", is.PREFETCHWT1());
241
+ printf("lahf: %d\n", is.LAHF());
242
+ printf("lzcnt: %d\n", is.LZCNT());
243
+ printf("abm: %d\n", is.ABM());
244
+ printf("sse4a: %d\n", is.SSE4a());
245
+ printf("xop: %d\n", is.XOP());
246
+ printf("tbm: %d\n", is.TBM());
247
+ printf("syscall: %d\n", is.SYSCALL());
248
+ printf("mmxext: %d\n", is.MMXEXT());
249
+ printf("rdtscp: %d\n", is.RDTSCP());
250
+ printf("3dnowext: %d\n", is._3DNOWEXT());
251
+ printf("3dnow: %d\n", is._3DNOW());
252
+ printf("avx512_vbmi: %d\n", is.AVX512_VBMI());
253
+ printf("avx512_vnni: %d\n", is.AVX512_VNNI());
254
+ printf("avx512_fp16: %d\n", is.AVX512_FP16());
255
+ printf("avx512_bf16: %d\n", is.AVX512_BF16());
256
+ printf("amx_tile: %d\n", is.AMX_TILE());
257
+ printf("amx_int8: %d\n", is.AMX_INT8());
258
+ printf("amx_fp16: %d\n", is.AMX_FP16());
259
+ printf("amx_bf16: %d\n", is.AMX_BF16());
260
+ }
261
+ #endif
262
+
263
+ static int ggml_backend_cpu_x86_score() {
264
+ // FIXME: this does not check for OS support
265
+
266
+ int score = 0;
267
+ cpuid_x86 is;
268
+
269
+ #ifdef GGML_FMA
270
+ if (!is.FMA()) { return 0; }
271
+ score += 1;
272
+ #endif
273
+ #ifdef GGML_F16C
274
+ if (!is.F16C()) { return 0; }
275
+ score += 1<<1;
276
+ #endif
277
+ #ifdef GGML_SSE42
278
+ if (!is.SSE42()) { return 0; }
279
+ score += 1<<2;
280
+ #endif
281
+ #ifdef GGML_AVX
282
+ if (!is.AVX()) { return 0; }
283
+ score += 1<<4;
284
+ #endif
285
+ #ifdef GGML_AVX2
286
+ if (!is.AVX2()) { return 0; }
287
+ score += 1<<5;
288
+ #endif
289
+ #ifdef GGML_AVX_VNNI
290
+ if (!is.AVX_VNNI()) { return 0; }
291
+ score += 1<<6;
292
+ #endif
293
+ #ifdef GGML_AVX512
294
+ if (!is.AVX512F()) { return 0; }
295
+ if (!is.AVX512CD()) { return 0; }
296
+ if (!is.AVX512VL()) { return 0; }
297
+ if (!is.AVX512DQ()) { return 0; }
298
+ if (!is.AVX512BW()) { return 0; }
299
+ score += 1<<7;
300
+ #endif
301
+ #ifdef GGML_AVX512_VBMI
302
+ if (!is.AVX512_VBMI()) { return 0; }
303
+ score += 1<<8;
304
+ #endif
305
+ #ifdef GGML_AVX512_BF16
306
+ if (!is.AVX512_BF16()) { return 0; }
307
+ score += 1<<9;
308
+ #endif
309
+ #ifdef GGML_AVX512_VNNI
310
+ if (!is.AVX512_VNNI()) { return 0; }
311
+ score += 1<<10;
312
+ #endif
313
+ #ifdef GGML_AMX_INT8
314
+ if (!is.AMX_INT8()) { return 0; }
315
+ score += 1<<11;
316
+ #endif
317
+
318
+ return score;
319
+ }
320
+
321
+ GGML_BACKEND_DL_SCORE_IMPL(ggml_backend_cpu_x86_score)
322
+
323
+ #endif // defined(__x86_64__) || (defined(_MSC_VER) && defined(_M_AMD64))