whispercpp 1.2.0.2 → 1.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (135) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +5 -0
  3. data/LICENSE +1 -1
  4. data/README.md +165 -434
  5. data/Rakefile +46 -86
  6. data/ext/.gitignore +13 -0
  7. data/ext/cpu.mk +9 -0
  8. data/ext/{dr_wav.h → examples/dr_wav.h} +3560 -1179
  9. data/ext/extconf.rb +185 -7
  10. data/ext/ggml/include/ggml-alloc.h +76 -0
  11. data/ext/ggml/include/ggml-backend.h +352 -0
  12. data/ext/ggml/include/ggml-blas.h +25 -0
  13. data/ext/ggml/include/ggml-cann.h +123 -0
  14. data/ext/ggml/include/ggml-cpp.h +38 -0
  15. data/ext/ggml/include/ggml-cpu.h +135 -0
  16. data/ext/ggml/include/ggml-cuda.h +47 -0
  17. data/ext/ggml/include/ggml-kompute.h +50 -0
  18. data/ext/ggml/include/ggml-metal.h +66 -0
  19. data/ext/ggml/include/ggml-opencl.h +26 -0
  20. data/ext/ggml/include/ggml-opt.h +216 -0
  21. data/ext/ggml/include/ggml-rpc.h +28 -0
  22. data/ext/ggml/include/ggml-sycl.h +49 -0
  23. data/ext/ggml/include/ggml-vulkan.h +31 -0
  24. data/ext/ggml/include/ggml.h +2285 -0
  25. data/ext/ggml/src/ggml-alloc.c +1037 -0
  26. data/ext/ggml/src/ggml-amx/common.h +94 -0
  27. data/ext/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
  28. data/ext/ggml/src/ggml-amx/mmq.cpp +2510 -0
  29. data/ext/ggml/src/ggml-amx/mmq.h +17 -0
  30. data/ext/ggml/src/ggml-backend-impl.h +256 -0
  31. data/ext/ggml/src/ggml-backend-reg.cpp +552 -0
  32. data/ext/ggml/src/ggml-backend.cpp +1999 -0
  33. data/ext/ggml/src/ggml-blas/ggml-blas.cpp +517 -0
  34. data/ext/ggml/src/ggml-cann/acl_tensor.cpp +175 -0
  35. data/ext/ggml/src/ggml-cann/acl_tensor.h +258 -0
  36. data/ext/ggml/src/ggml-cann/aclnn_ops.cpp +3427 -0
  37. data/ext/ggml/src/ggml-cann/aclnn_ops.h +592 -0
  38. data/ext/ggml/src/ggml-cann/common.h +286 -0
  39. data/ext/ggml/src/ggml-cann/ggml-cann.cpp +2188 -0
  40. data/ext/ggml/src/ggml-cann/kernels/ascendc_kernels.h +19 -0
  41. data/ext/ggml/src/ggml-cann/kernels/dup.cpp +236 -0
  42. data/ext/ggml/src/ggml-cann/kernels/get_row_f16.cpp +197 -0
  43. data/ext/ggml/src/ggml-cann/kernels/get_row_f32.cpp +190 -0
  44. data/ext/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +204 -0
  45. data/ext/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +191 -0
  46. data/ext/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +218 -0
  47. data/ext/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +216 -0
  48. data/ext/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +295 -0
  49. data/ext/ggml/src/ggml-common.h +1853 -0
  50. data/ext/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
  51. data/ext/ggml/src/ggml-cpu/amx/amx.h +8 -0
  52. data/ext/ggml/src/ggml-cpu/amx/common.h +91 -0
  53. data/ext/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
  54. data/ext/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  55. data/ext/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
  56. data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +4262 -0
  57. data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
  58. data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
  59. data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
  60. data/ext/ggml/src/ggml-cpu/ggml-cpu-impl.h +386 -0
  61. data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.c +10835 -0
  62. data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
  63. data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
  64. data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
  65. data/ext/ggml/src/ggml-cpu/ggml-cpu.c +14123 -0
  66. data/ext/ggml/src/ggml-cpu/ggml-cpu.cpp +622 -0
  67. data/ext/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1884 -0
  68. data/ext/ggml/src/ggml-cpu/llamafile/sgemm.h +14 -0
  69. data/ext/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
  70. data/ext/ggml/src/ggml-cuda/vendors/hip.h +186 -0
  71. data/ext/ggml/src/ggml-cuda/vendors/musa.h +134 -0
  72. data/ext/ggml/src/ggml-impl.h +556 -0
  73. data/ext/ggml/src/ggml-kompute/ggml-kompute.cpp +2251 -0
  74. data/ext/ggml/src/ggml-metal/ggml-metal-impl.h +288 -0
  75. data/ext/ggml/src/ggml-metal/ggml-metal.m +4884 -0
  76. data/ext/ggml/src/ggml-metal/ggml-metal.metal +6732 -0
  77. data/ext/ggml/src/ggml-opt.cpp +854 -0
  78. data/ext/ggml/src/ggml-quants.c +5238 -0
  79. data/ext/ggml/src/ggml-quants.h +100 -0
  80. data/ext/ggml/src/ggml-rpc/ggml-rpc.cpp +1406 -0
  81. data/ext/ggml/src/ggml-sycl/common.cpp +95 -0
  82. data/ext/ggml/src/ggml-sycl/concat.cpp +196 -0
  83. data/ext/ggml/src/ggml-sycl/conv.cpp +99 -0
  84. data/ext/ggml/src/ggml-sycl/convert.cpp +547 -0
  85. data/ext/ggml/src/ggml-sycl/dmmv.cpp +1023 -0
  86. data/ext/ggml/src/ggml-sycl/element_wise.cpp +1030 -0
  87. data/ext/ggml/src/ggml-sycl/ggml-sycl.cpp +4729 -0
  88. data/ext/ggml/src/ggml-sycl/im2col.cpp +126 -0
  89. data/ext/ggml/src/ggml-sycl/mmq.cpp +3031 -0
  90. data/ext/ggml/src/ggml-sycl/mmvq.cpp +1015 -0
  91. data/ext/ggml/src/ggml-sycl/norm.cpp +378 -0
  92. data/ext/ggml/src/ggml-sycl/outprod.cpp +56 -0
  93. data/ext/ggml/src/ggml-sycl/rope.cpp +276 -0
  94. data/ext/ggml/src/ggml-sycl/softmax.cpp +251 -0
  95. data/ext/ggml/src/ggml-sycl/tsembd.cpp +72 -0
  96. data/ext/ggml/src/ggml-sycl/wkv6.cpp +141 -0
  97. data/ext/ggml/src/ggml-threading.cpp +12 -0
  98. data/ext/ggml/src/ggml-threading.h +14 -0
  99. data/ext/ggml/src/ggml-vulkan/ggml-vulkan.cpp +8657 -0
  100. data/ext/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +593 -0
  101. data/ext/ggml/src/ggml.c +7694 -0
  102. data/ext/include/whisper.h +672 -0
  103. data/ext/metal-embed.mk +17 -0
  104. data/ext/metal.mk +6 -0
  105. data/ext/ruby_whisper.cpp +1608 -159
  106. data/ext/ruby_whisper.h +10 -0
  107. data/ext/scripts/get-flags.mk +38 -0
  108. data/ext/src/coreml/whisper-decoder-impl.h +146 -0
  109. data/ext/src/coreml/whisper-decoder-impl.m +201 -0
  110. data/ext/src/coreml/whisper-encoder-impl.h +142 -0
  111. data/ext/src/coreml/whisper-encoder-impl.m +197 -0
  112. data/ext/src/coreml/whisper-encoder.h +26 -0
  113. data/ext/src/openvino/whisper-openvino-encoder.cpp +108 -0
  114. data/ext/src/openvino/whisper-openvino-encoder.h +31 -0
  115. data/ext/src/whisper.cpp +7393 -0
  116. data/extsources.rb +6 -0
  117. data/lib/whisper/model/uri.rb +157 -0
  118. data/lib/whisper.rb +2 -0
  119. data/tests/helper.rb +7 -0
  120. data/tests/jfk_reader/.gitignore +5 -0
  121. data/tests/jfk_reader/extconf.rb +3 -0
  122. data/tests/jfk_reader/jfk_reader.c +68 -0
  123. data/tests/test_callback.rb +160 -0
  124. data/tests/test_error.rb +20 -0
  125. data/tests/test_model.rb +71 -0
  126. data/tests/test_package.rb +31 -0
  127. data/tests/test_params.rb +160 -0
  128. data/tests/test_segment.rb +83 -0
  129. data/tests/test_whisper.rb +211 -123
  130. data/whispercpp.gemspec +36 -0
  131. metadata +137 -11
  132. data/ext/ggml.c +0 -8616
  133. data/ext/ggml.h +0 -748
  134. data/ext/whisper.cpp +0 -4829
  135. data/ext/whisper.h +0 -402
@@ -0,0 +1,10 @@
1
+ #pragma once
2
+ #include "common.h"
3
+
4
+ size_t ggml_backend_amx_desired_wsize(const struct ggml_tensor * dst);
5
+
6
+ size_t ggml_backend_amx_get_alloc_size(const struct ggml_tensor * tensor);
7
+
8
+ void ggml_backend_amx_convert_weight(struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
9
+
10
+ void ggml_backend_amx_mul_mat(const struct ggml_compute_params * params, struct ggml_tensor * dst);
@@ -0,0 +1,323 @@
1
+ #include "ggml-backend-impl.h"
2
+
3
+ #if defined(__x86_64__) || (defined(_MSC_VER) && defined(_M_AMD64))
4
+
5
+ #ifdef _MSC_VER
6
+ #include <intrin.h>
7
+ #endif
8
+
9
+ #include <cstring>
10
+ #include <vector>
11
+ #include <bitset>
12
+ #include <array>
13
+ #include <string>
14
+
15
+ // ref: https://cdrdv2-public.intel.com/782156/325383-sdm-vol-2abcd.pdf
16
+ struct cpuid_x86 {
17
+ bool SSE3(void) { return f_1_ecx[0]; }
18
+ bool PCLMULQDQ(void) { return f_1_ecx[1]; }
19
+ bool MONITOR(void) { return f_1_ecx[3]; }
20
+ bool SSSE3(void) { return f_1_ecx[9]; }
21
+ bool FMA(void) { return f_1_ecx[12]; }
22
+ bool CMPXCHG16B(void) { return f_1_ecx[13]; }
23
+ bool SSE41(void) { return f_1_ecx[19]; }
24
+ bool SSE42(void) { return f_1_ecx[20]; }
25
+ bool MOVBE(void) { return f_1_ecx[22]; }
26
+ bool POPCNT(void) { return f_1_ecx[23]; }
27
+ bool AES(void) { return f_1_ecx[25]; }
28
+ bool XSAVE(void) { return f_1_ecx[26]; }
29
+ bool OSXSAVE(void) { return f_1_ecx[27]; }
30
+ bool AVX(void) { return f_1_ecx[28]; }
31
+ bool F16C(void) { return f_1_ecx[29]; }
32
+ bool RDRAND(void) { return f_1_ecx[30]; }
33
+
34
+ bool MSR(void) { return f_1_edx[5]; }
35
+ bool CX8(void) { return f_1_edx[8]; }
36
+ bool SEP(void) { return f_1_edx[11]; }
37
+ bool CMOV(void) { return f_1_edx[15]; }
38
+ bool CLFSH(void) { return f_1_edx[19]; }
39
+ bool MMX(void) { return f_1_edx[23]; }
40
+ bool FXSR(void) { return f_1_edx[24]; }
41
+ bool SSE(void) { return f_1_edx[25]; }
42
+ bool SSE2(void) { return f_1_edx[26]; }
43
+
44
+ bool FSGSBASE(void) { return f_7_ebx[0]; }
45
+ bool BMI1(void) { return f_7_ebx[3]; }
46
+ bool HLE(void) { return is_intel && f_7_ebx[4]; }
47
+ bool AVX2(void) { return f_7_ebx[5]; }
48
+ bool BMI2(void) { return f_7_ebx[8]; }
49
+ bool ERMS(void) { return f_7_ebx[9]; }
50
+ bool INVPCID(void) { return f_7_ebx[10]; }
51
+ bool RTM(void) { return is_intel && f_7_ebx[11]; }
52
+ bool AVX512F(void) { return f_7_ebx[16]; }
53
+ bool AVX512DQ(void) { return f_7_ebx[17]; }
54
+ bool RDSEED(void) { return f_7_ebx[18]; }
55
+ bool ADX(void) { return f_7_ebx[19]; }
56
+ bool AVX512PF(void) { return f_7_ebx[26]; }
57
+ bool AVX512ER(void) { return f_7_ebx[27]; }
58
+ bool AVX512CD(void) { return f_7_ebx[28]; }
59
+ bool AVX512BW(void) { return f_7_ebx[30]; }
60
+ bool AVX512VL(void) { return f_7_ebx[31]; }
61
+
62
+ bool SHA(void) { return f_7_ebx[29]; }
63
+
64
+ bool PREFETCHWT1(void) { return f_7_ecx[0]; }
65
+
66
+ bool LAHF(void) { return f_81_ecx[0]; }
67
+ bool LZCNT(void) { return is_intel && f_81_ecx[5]; }
68
+ bool ABM(void) { return is_amd && f_81_ecx[5]; }
69
+ bool SSE4a(void) { return is_amd && f_81_ecx[6]; }
70
+ bool XOP(void) { return is_amd && f_81_ecx[11]; }
71
+ bool TBM(void) { return is_amd && f_81_ecx[21]; }
72
+
73
+ bool SYSCALL(void) { return is_intel && f_81_edx[11]; }
74
+ bool MMXEXT(void) { return is_amd && f_81_edx[22]; }
75
+ bool RDTSCP(void) { return is_intel && f_81_edx[27]; }
76
+ bool _3DNOWEXT(void) { return is_amd && f_81_edx[30]; }
77
+ bool _3DNOW(void) { return is_amd && f_81_edx[31]; }
78
+
79
+ bool AVX512_VBMI(void) { return f_7_ecx[1]; }
80
+ bool AVX512_VNNI(void) { return f_7_ecx[11]; }
81
+ bool AVX512_FP16(void) { return f_7_edx[23]; }
82
+ bool AVX512_BF16(void) { return f_7_1_eax[5]; }
83
+ bool AVX_VNNI(void) { return f_7_1_eax[4]; }
84
+
85
+ bool AMX_TILE(void) { return f_7_edx[24]; }
86
+ bool AMX_INT8(void) { return f_7_edx[25]; }
87
+ bool AMX_FP16(void) { return f_7_1_eax[21]; }
88
+ bool AMX_BF16(void) { return f_7_edx[22]; }
89
+
90
+ #ifdef _MSC_VER
91
+ static void cpuid(int cpu_info[4], int eax) {
92
+ __cpuid(cpu_info, eax);
93
+ }
94
+ static void cpuidex(int cpu_info[4], int eax, int ecx) {
95
+ __cpuidex(cpu_info, eax, ecx);
96
+ }
97
+ #else
98
+ static void cpuid(int cpu_info[4], int eax) {
99
+ __asm__ __volatile__(
100
+ "cpuid"
101
+ : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
102
+ : "a"(eax), "c"(0));
103
+ }
104
+ static void cpuidex(int cpu_info[4], int eax, int ecx) {
105
+ __asm__ __volatile__(
106
+ "cpuid"
107
+ : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
108
+ : "a"(eax), "c"(ecx));
109
+ }
110
+ #endif
111
+
112
+ cpuid_x86() {
113
+ std::array<int, 4> cpui;
114
+ std::vector<std::array<int, 4>> data;
115
+
116
+ // calling __cpuid with 0x0 as the function_id argument
117
+ // gets the number of the highest valid function ID.
118
+ cpuid(cpui.data(), 0);
119
+ int n_ids = cpui[0];
120
+
121
+ for (int i = 0; i <= n_ids; ++i) {
122
+ cpuidex(cpui.data(), i, 0);
123
+ data.push_back(cpui);
124
+ }
125
+
126
+ // capture vendor string
127
+ char vendor[0x20] = {};
128
+ *reinterpret_cast<int *>(vendor) = data[0][1];
129
+ *reinterpret_cast<int *>(vendor + 4) = data[0][3];
130
+ *reinterpret_cast<int *>(vendor + 8) = data[0][2];
131
+ this->vendor = vendor;
132
+ if (this->vendor == "GenuineIntel") {
133
+ is_intel = true;
134
+ } else if (this->vendor == "AuthenticAMD") {
135
+ is_amd = true;
136
+ }
137
+
138
+ // load bitset with flags for function 0x00000001
139
+ if (n_ids >= 1) {
140
+ f_1_ecx = data[1][2];
141
+ f_1_edx = data[1][3];
142
+ }
143
+
144
+ // load bitset with flags for function 0x00000007
145
+ if (n_ids >= 7) {
146
+ f_7_ebx = data[7][1];
147
+ f_7_ecx = data[7][2];
148
+ f_7_edx = data[7][3];
149
+ cpuidex(cpui.data(), 7, 1);
150
+ f_7_1_eax = cpui[0];
151
+ }
152
+
153
+ // calling __cpuid with 0x80000000 as the function_id argument
154
+ // gets the number of the highest valid extended ID.
155
+ cpuid(cpui.data(), 0x80000000);
156
+ unsigned int n_ex_ids = cpui[0];
157
+
158
+ std::vector<std::array<int, 4>> ext_data;
159
+ for (unsigned int i = 0x80000000; i <= n_ex_ids; ++i) {
160
+ cpuidex(cpui.data(), i, 0);
161
+ ext_data.push_back(cpui);
162
+ }
163
+
164
+ // load bitset with flags for function 0x80000001
165
+ if (n_ex_ids >= 0x80000001) {
166
+ f_81_ecx = ext_data[1][2];
167
+ f_81_edx = ext_data[1][3];
168
+ }
169
+
170
+ // interpret CPU brand string if reported
171
+ char brand[0x40] = {};
172
+ if (n_ex_ids >= 0x80000004) {
173
+ std::memcpy(brand, ext_data[2].data(), sizeof(cpui));
174
+ std::memcpy(brand + 16, ext_data[3].data(), sizeof(cpui));
175
+ std::memcpy(brand + 32, ext_data[4].data(), sizeof(cpui));
176
+ this->brand = brand;
177
+ }
178
+ }
179
+
180
+ bool is_intel = false;
181
+ bool is_amd = false;
182
+ std::string vendor;
183
+ std::string brand;
184
+ std::bitset<32> f_1_ecx;
185
+ std::bitset<32> f_1_edx;
186
+ std::bitset<32> f_7_ebx;
187
+ std::bitset<32> f_7_ecx;
188
+ std::bitset<32> f_7_edx;
189
+ std::bitset<32> f_7_1_eax;
190
+ std::bitset<32> f_81_ecx;
191
+ std::bitset<32> f_81_edx;
192
+ };
193
+
194
+ #if 0
195
+ void test_x86_is() {
196
+ cpuid_x86 is;
197
+ printf("CPU Vendor: %s\n", is.vendor.c_str());
198
+ printf("Brand: %s\n", is.brand.c_str());
199
+ printf("is_intel: %d\n", is.is_intel);
200
+ printf("is_amd: %d\n", is.is_amd);
201
+ printf("sse3: %d\n", is.SSE3());
202
+ printf("pclmulqdq: %d\n", is.PCLMULQDQ());
203
+ printf("ssse3: %d\n", is.SSSE3());
204
+ printf("fma: %d\n", is.FMA());
205
+ printf("cmpxchg16b: %d\n", is.CMPXCHG16B());
206
+ printf("sse41: %d\n", is.SSE41());
207
+ printf("sse42: %d\n", is.SSE42());
208
+ printf("movbe: %d\n", is.MOVBE());
209
+ printf("popcnt: %d\n", is.POPCNT());
210
+ printf("aes: %d\n", is.AES());
211
+ printf("xsave: %d\n", is.XSAVE());
212
+ printf("osxsave: %d\n", is.OSXSAVE());
213
+ printf("avx: %d\n", is.AVX());
214
+ printf("f16c: %d\n", is.F16C());
215
+ printf("rdrand: %d\n", is.RDRAND());
216
+ printf("msr: %d\n", is.MSR());
217
+ printf("cx8: %d\n", is.CX8());
218
+ printf("sep: %d\n", is.SEP());
219
+ printf("cmov: %d\n", is.CMOV());
220
+ printf("clflush: %d\n", is.CLFSH());
221
+ printf("mmx: %d\n", is.MMX());
222
+ printf("fxsr: %d\n", is.FXSR());
223
+ printf("sse: %d\n", is.SSE());
224
+ printf("sse2: %d\n", is.SSE2());
225
+ printf("fsgsbase: %d\n", is.FSGSBASE());
226
+ printf("bmi1: %d\n", is.BMI1());
227
+ printf("hle: %d\n", is.HLE());
228
+ printf("avx2: %d\n", is.AVX2());
229
+ printf("bmi2: %d\n", is.BMI2());
230
+ printf("erms: %d\n", is.ERMS());
231
+ printf("invpcid: %d\n", is.INVPCID());
232
+ printf("rtm: %d\n", is.RTM());
233
+ printf("avx512f: %d\n", is.AVX512F());
234
+ printf("rdseed: %d\n", is.RDSEED());
235
+ printf("adx: %d\n", is.ADX());
236
+ printf("avx512pf: %d\n", is.AVX512PF());
237
+ printf("avx512er: %d\n", is.AVX512ER());
238
+ printf("avx512cd: %d\n", is.AVX512CD());
239
+ printf("sha: %d\n", is.SHA());
240
+ printf("prefetchwt1: %d\n", is.PREFETCHWT1());
241
+ printf("lahf: %d\n", is.LAHF());
242
+ printf("lzcnt: %d\n", is.LZCNT());
243
+ printf("abm: %d\n", is.ABM());
244
+ printf("sse4a: %d\n", is.SSE4a());
245
+ printf("xop: %d\n", is.XOP());
246
+ printf("tbm: %d\n", is.TBM());
247
+ printf("syscall: %d\n", is.SYSCALL());
248
+ printf("mmxext: %d\n", is.MMXEXT());
249
+ printf("rdtscp: %d\n", is.RDTSCP());
250
+ printf("3dnowext: %d\n", is._3DNOWEXT());
251
+ printf("3dnow: %d\n", is._3DNOW());
252
+ printf("avx512_vbmi: %d\n", is.AVX512_VBMI());
253
+ printf("avx512_vnni: %d\n", is.AVX512_VNNI());
254
+ printf("avx512_fp16: %d\n", is.AVX512_FP16());
255
+ printf("avx512_bf16: %d\n", is.AVX512_BF16());
256
+ printf("amx_tile: %d\n", is.AMX_TILE());
257
+ printf("amx_int8: %d\n", is.AMX_INT8());
258
+ printf("amx_fp16: %d\n", is.AMX_FP16());
259
+ printf("amx_bf16: %d\n", is.AMX_BF16());
260
+ }
261
+ #endif
262
+
263
+ static int ggml_backend_cpu_x86_score() {
264
+ // FIXME: this does not check for OS support
265
+
266
+ int score = 0;
267
+ cpuid_x86 is;
268
+
269
+ #ifdef GGML_FMA
270
+ if (!is.FMA()) { return 0; }
271
+ score += 1;
272
+ #endif
273
+ #ifdef GGML_F16C
274
+ if (!is.F16C()) { return 0; }
275
+ score += 1<<1;
276
+ #endif
277
+ #ifdef GGML_SSE42
278
+ if (!is.SSE42()) { return 0; }
279
+ score += 1<<2;
280
+ #endif
281
+ #ifdef GGML_AVX
282
+ if (!is.AVX()) { return 0; }
283
+ score += 1<<4;
284
+ #endif
285
+ #ifdef GGML_AVX2
286
+ if (!is.AVX2()) { return 0; }
287
+ score += 1<<5;
288
+ #endif
289
+ #ifdef GGML_AVX_VNNI
290
+ if (!is.AVX_VNNI()) { return 0; }
291
+ score += 1<<6;
292
+ #endif
293
+ #ifdef GGML_AVX512
294
+ if (!is.AVX512F()) { return 0; }
295
+ if (!is.AVX512CD()) { return 0; }
296
+ if (!is.AVX512VL()) { return 0; }
297
+ if (!is.AVX512DQ()) { return 0; }
298
+ if (!is.AVX512BW()) { return 0; }
299
+ score += 1<<7;
300
+ #endif
301
+ #ifdef GGML_AVX512_VBMI
302
+ if (!is.AVX512_VBMI()) { return 0; }
303
+ score += 1<<8;
304
+ #endif
305
+ #ifdef GGML_AVX512_BF16
306
+ if (!is.AVX512_BF16()) { return 0; }
307
+ score += 1<<9;
308
+ #endif
309
+ #ifdef GGML_AVX512_VNNI
310
+ if (!is.AVX512_VNNI()) { return 0; }
311
+ score += 1<<10;
312
+ #endif
313
+ #ifdef GGML_AMX_INT8
314
+ if (!is.AMX_INT8()) { return 0; }
315
+ score += 1<<11;
316
+ #endif
317
+
318
+ return score;
319
+ }
320
+
321
+ GGML_BACKEND_DL_SCORE_IMPL(ggml_backend_cpu_x86_score)
322
+
323
+ #endif // defined(__x86_64__) || (defined(_MSC_VER) && defined(_M_AMD64))