whispercpp 1.3.0 → 1.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +5 -0
- data/LICENSE +1 -1
- data/README.md +165 -434
- data/Rakefile +60 -11
- data/ext/.gitignore +13 -0
- data/ext/cpu.mk +9 -0
- data/ext/{dr_wav.h → examples/dr_wav.h} +3560 -1179
- data/ext/extconf.rb +185 -16
- data/ext/ggml/include/ggml-alloc.h +76 -0
- data/ext/ggml/include/ggml-backend.h +352 -0
- data/ext/ggml/include/ggml-blas.h +25 -0
- data/ext/ggml/include/ggml-cann.h +123 -0
- data/ext/ggml/include/ggml-cpp.h +38 -0
- data/ext/ggml/include/ggml-cpu.h +135 -0
- data/ext/ggml/include/ggml-cuda.h +47 -0
- data/ext/ggml/include/ggml-kompute.h +50 -0
- data/ext/ggml/include/ggml-metal.h +66 -0
- data/ext/ggml/include/ggml-opencl.h +26 -0
- data/ext/ggml/include/ggml-opt.h +216 -0
- data/ext/ggml/include/ggml-rpc.h +28 -0
- data/ext/ggml/include/ggml-sycl.h +49 -0
- data/ext/ggml/include/ggml-vulkan.h +31 -0
- data/ext/{ggml.h → ggml/include/ggml.h} +479 -596
- data/ext/ggml/src/ggml-alloc.c +1037 -0
- data/ext/ggml/src/ggml-amx/common.h +94 -0
- data/ext/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
- data/ext/ggml/src/ggml-amx/mmq.cpp +2510 -0
- data/ext/ggml/src/ggml-amx/mmq.h +17 -0
- data/ext/ggml/src/ggml-backend-impl.h +256 -0
- data/ext/ggml/src/ggml-backend-reg.cpp +552 -0
- data/ext/ggml/src/ggml-backend.cpp +1999 -0
- data/ext/ggml/src/ggml-blas/ggml-blas.cpp +517 -0
- data/ext/ggml/src/ggml-cann/acl_tensor.cpp +175 -0
- data/ext/ggml/src/ggml-cann/acl_tensor.h +258 -0
- data/ext/ggml/src/ggml-cann/aclnn_ops.cpp +3427 -0
- data/ext/ggml/src/ggml-cann/aclnn_ops.h +592 -0
- data/ext/ggml/src/ggml-cann/common.h +286 -0
- data/ext/ggml/src/ggml-cann/ggml-cann.cpp +2188 -0
- data/ext/ggml/src/ggml-cann/kernels/ascendc_kernels.h +19 -0
- data/ext/ggml/src/ggml-cann/kernels/dup.cpp +236 -0
- data/ext/ggml/src/ggml-cann/kernels/get_row_f16.cpp +197 -0
- data/ext/ggml/src/ggml-cann/kernels/get_row_f32.cpp +190 -0
- data/ext/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +204 -0
- data/ext/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +191 -0
- data/ext/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +218 -0
- data/ext/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +216 -0
- data/ext/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +295 -0
- data/ext/ggml/src/ggml-common.h +1853 -0
- data/ext/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
- data/ext/ggml/src/ggml-cpu/amx/amx.h +8 -0
- data/ext/ggml/src/ggml-cpu/amx/common.h +91 -0
- data/ext/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
- data/ext/ggml/src/ggml-cpu/amx/mmq.h +10 -0
- data/ext/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +4262 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-impl.h +386 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.c +10835 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu.c +14123 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu.cpp +622 -0
- data/ext/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1884 -0
- data/ext/ggml/src/ggml-cpu/llamafile/sgemm.h +14 -0
- data/ext/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
- data/ext/ggml/src/ggml-cuda/vendors/hip.h +186 -0
- data/ext/ggml/src/ggml-cuda/vendors/musa.h +134 -0
- data/ext/ggml/src/ggml-impl.h +556 -0
- data/ext/ggml/src/ggml-kompute/ggml-kompute.cpp +2251 -0
- data/ext/ggml/src/ggml-metal/ggml-metal-impl.h +288 -0
- data/ext/ggml/src/ggml-metal/ggml-metal.m +4884 -0
- data/ext/ggml/src/ggml-metal/ggml-metal.metal +6732 -0
- data/ext/ggml/src/ggml-opt.cpp +854 -0
- data/ext/ggml/src/ggml-quants.c +5238 -0
- data/ext/ggml/src/ggml-quants.h +100 -0
- data/ext/ggml/src/ggml-rpc/ggml-rpc.cpp +1406 -0
- data/ext/ggml/src/ggml-sycl/common.cpp +95 -0
- data/ext/ggml/src/ggml-sycl/concat.cpp +196 -0
- data/ext/ggml/src/ggml-sycl/conv.cpp +99 -0
- data/ext/ggml/src/ggml-sycl/convert.cpp +547 -0
- data/ext/ggml/src/ggml-sycl/dmmv.cpp +1023 -0
- data/ext/ggml/src/ggml-sycl/element_wise.cpp +1030 -0
- data/ext/ggml/src/ggml-sycl/ggml-sycl.cpp +4729 -0
- data/ext/ggml/src/ggml-sycl/im2col.cpp +126 -0
- data/ext/ggml/src/ggml-sycl/mmq.cpp +3031 -0
- data/ext/ggml/src/ggml-sycl/mmvq.cpp +1015 -0
- data/ext/ggml/src/ggml-sycl/norm.cpp +378 -0
- data/ext/ggml/src/ggml-sycl/outprod.cpp +56 -0
- data/ext/ggml/src/ggml-sycl/rope.cpp +276 -0
- data/ext/ggml/src/ggml-sycl/softmax.cpp +251 -0
- data/ext/ggml/src/ggml-sycl/tsembd.cpp +72 -0
- data/ext/ggml/src/ggml-sycl/wkv6.cpp +141 -0
- data/ext/ggml/src/ggml-threading.cpp +12 -0
- data/ext/ggml/src/ggml-threading.h +14 -0
- data/ext/ggml/src/ggml-vulkan/ggml-vulkan.cpp +8657 -0
- data/ext/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +593 -0
- data/ext/ggml/src/ggml.c +7694 -0
- data/ext/{whisper.h → include/whisper.h} +23 -22
- data/ext/metal-embed.mk +17 -0
- data/ext/metal.mk +6 -0
- data/ext/ruby_whisper.cpp +1492 -9
- data/ext/ruby_whisper.h +10 -0
- data/ext/scripts/get-flags.mk +38 -0
- data/ext/src/coreml/whisper-decoder-impl.h +146 -0
- data/ext/src/coreml/whisper-decoder-impl.m +201 -0
- data/ext/src/coreml/whisper-encoder-impl.h +142 -0
- data/ext/src/coreml/whisper-encoder-impl.m +197 -0
- data/ext/src/coreml/whisper-encoder.h +26 -0
- data/ext/src/openvino/whisper-openvino-encoder.cpp +108 -0
- data/ext/src/openvino/whisper-openvino-encoder.h +31 -0
- data/ext/{whisper.cpp → src/whisper.cpp} +661 -492
- data/extsources.rb +6 -0
- data/lib/whisper/model/uri.rb +157 -0
- data/lib/whisper.rb +2 -0
- data/tests/helper.rb +7 -0
- data/tests/jfk_reader/.gitignore +5 -0
- data/tests/jfk_reader/extconf.rb +3 -0
- data/tests/jfk_reader/jfk_reader.c +68 -0
- data/tests/test_callback.rb +160 -0
- data/tests/test_error.rb +20 -0
- data/tests/test_model.rb +71 -0
- data/tests/test_package.rb +31 -0
- data/tests/test_params.rb +160 -0
- data/tests/test_segment.rb +83 -0
- data/tests/test_whisper.rb +211 -123
- data/whispercpp.gemspec +36 -0
- metadata +137 -11
- data/ext/ggml.c +0 -21755
@@ -0,0 +1,10 @@
|
|
1
|
+
#pragma once
|
2
|
+
#include "common.h"
|
3
|
+
|
4
|
+
size_t ggml_backend_amx_desired_wsize(const struct ggml_tensor * dst);
|
5
|
+
|
6
|
+
size_t ggml_backend_amx_get_alloc_size(const struct ggml_tensor * tensor);
|
7
|
+
|
8
|
+
void ggml_backend_amx_convert_weight(struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
9
|
+
|
10
|
+
void ggml_backend_amx_mul_mat(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
@@ -0,0 +1,323 @@
|
|
1
|
+
#include "ggml-backend-impl.h"
|
2
|
+
|
3
|
+
#if defined(__x86_64__) || (defined(_MSC_VER) && defined(_M_AMD64))
|
4
|
+
|
5
|
+
#ifdef _MSC_VER
|
6
|
+
#include <intrin.h>
|
7
|
+
#endif
|
8
|
+
|
9
|
+
#include <cstring>
|
10
|
+
#include <vector>
|
11
|
+
#include <bitset>
|
12
|
+
#include <array>
|
13
|
+
#include <string>
|
14
|
+
|
15
|
+
// ref: https://cdrdv2-public.intel.com/782156/325383-sdm-vol-2abcd.pdf
|
16
|
+
struct cpuid_x86 {
|
17
|
+
bool SSE3(void) { return f_1_ecx[0]; }
|
18
|
+
bool PCLMULQDQ(void) { return f_1_ecx[1]; }
|
19
|
+
bool MONITOR(void) { return f_1_ecx[3]; }
|
20
|
+
bool SSSE3(void) { return f_1_ecx[9]; }
|
21
|
+
bool FMA(void) { return f_1_ecx[12]; }
|
22
|
+
bool CMPXCHG16B(void) { return f_1_ecx[13]; }
|
23
|
+
bool SSE41(void) { return f_1_ecx[19]; }
|
24
|
+
bool SSE42(void) { return f_1_ecx[20]; }
|
25
|
+
bool MOVBE(void) { return f_1_ecx[22]; }
|
26
|
+
bool POPCNT(void) { return f_1_ecx[23]; }
|
27
|
+
bool AES(void) { return f_1_ecx[25]; }
|
28
|
+
bool XSAVE(void) { return f_1_ecx[26]; }
|
29
|
+
bool OSXSAVE(void) { return f_1_ecx[27]; }
|
30
|
+
bool AVX(void) { return f_1_ecx[28]; }
|
31
|
+
bool F16C(void) { return f_1_ecx[29]; }
|
32
|
+
bool RDRAND(void) { return f_1_ecx[30]; }
|
33
|
+
|
34
|
+
bool MSR(void) { return f_1_edx[5]; }
|
35
|
+
bool CX8(void) { return f_1_edx[8]; }
|
36
|
+
bool SEP(void) { return f_1_edx[11]; }
|
37
|
+
bool CMOV(void) { return f_1_edx[15]; }
|
38
|
+
bool CLFSH(void) { return f_1_edx[19]; }
|
39
|
+
bool MMX(void) { return f_1_edx[23]; }
|
40
|
+
bool FXSR(void) { return f_1_edx[24]; }
|
41
|
+
bool SSE(void) { return f_1_edx[25]; }
|
42
|
+
bool SSE2(void) { return f_1_edx[26]; }
|
43
|
+
|
44
|
+
bool FSGSBASE(void) { return f_7_ebx[0]; }
|
45
|
+
bool BMI1(void) { return f_7_ebx[3]; }
|
46
|
+
bool HLE(void) { return is_intel && f_7_ebx[4]; }
|
47
|
+
bool AVX2(void) { return f_7_ebx[5]; }
|
48
|
+
bool BMI2(void) { return f_7_ebx[8]; }
|
49
|
+
bool ERMS(void) { return f_7_ebx[9]; }
|
50
|
+
bool INVPCID(void) { return f_7_ebx[10]; }
|
51
|
+
bool RTM(void) { return is_intel && f_7_ebx[11]; }
|
52
|
+
bool AVX512F(void) { return f_7_ebx[16]; }
|
53
|
+
bool AVX512DQ(void) { return f_7_ebx[17]; }
|
54
|
+
bool RDSEED(void) { return f_7_ebx[18]; }
|
55
|
+
bool ADX(void) { return f_7_ebx[19]; }
|
56
|
+
bool AVX512PF(void) { return f_7_ebx[26]; }
|
57
|
+
bool AVX512ER(void) { return f_7_ebx[27]; }
|
58
|
+
bool AVX512CD(void) { return f_7_ebx[28]; }
|
59
|
+
bool AVX512BW(void) { return f_7_ebx[30]; }
|
60
|
+
bool AVX512VL(void) { return f_7_ebx[31]; }
|
61
|
+
|
62
|
+
bool SHA(void) { return f_7_ebx[29]; }
|
63
|
+
|
64
|
+
bool PREFETCHWT1(void) { return f_7_ecx[0]; }
|
65
|
+
|
66
|
+
bool LAHF(void) { return f_81_ecx[0]; }
|
67
|
+
bool LZCNT(void) { return is_intel && f_81_ecx[5]; }
|
68
|
+
bool ABM(void) { return is_amd && f_81_ecx[5]; }
|
69
|
+
bool SSE4a(void) { return is_amd && f_81_ecx[6]; }
|
70
|
+
bool XOP(void) { return is_amd && f_81_ecx[11]; }
|
71
|
+
bool TBM(void) { return is_amd && f_81_ecx[21]; }
|
72
|
+
|
73
|
+
bool SYSCALL(void) { return is_intel && f_81_edx[11]; }
|
74
|
+
bool MMXEXT(void) { return is_amd && f_81_edx[22]; }
|
75
|
+
bool RDTSCP(void) { return is_intel && f_81_edx[27]; }
|
76
|
+
bool _3DNOWEXT(void) { return is_amd && f_81_edx[30]; }
|
77
|
+
bool _3DNOW(void) { return is_amd && f_81_edx[31]; }
|
78
|
+
|
79
|
+
bool AVX512_VBMI(void) { return f_7_ecx[1]; }
|
80
|
+
bool AVX512_VNNI(void) { return f_7_ecx[11]; }
|
81
|
+
bool AVX512_FP16(void) { return f_7_edx[23]; }
|
82
|
+
bool AVX512_BF16(void) { return f_7_1_eax[5]; }
|
83
|
+
bool AVX_VNNI(void) { return f_7_1_eax[4]; }
|
84
|
+
|
85
|
+
bool AMX_TILE(void) { return f_7_edx[24]; }
|
86
|
+
bool AMX_INT8(void) { return f_7_edx[25]; }
|
87
|
+
bool AMX_FP16(void) { return f_7_1_eax[21]; }
|
88
|
+
bool AMX_BF16(void) { return f_7_edx[22]; }
|
89
|
+
|
90
|
+
#ifdef _MSC_VER
|
91
|
+
static void cpuid(int cpu_info[4], int eax) {
|
92
|
+
__cpuid(cpu_info, eax);
|
93
|
+
}
|
94
|
+
static void cpuidex(int cpu_info[4], int eax, int ecx) {
|
95
|
+
__cpuidex(cpu_info, eax, ecx);
|
96
|
+
}
|
97
|
+
#else
|
98
|
+
static void cpuid(int cpu_info[4], int eax) {
|
99
|
+
__asm__ __volatile__(
|
100
|
+
"cpuid"
|
101
|
+
: "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
|
102
|
+
: "a"(eax), "c"(0));
|
103
|
+
}
|
104
|
+
static void cpuidex(int cpu_info[4], int eax, int ecx) {
|
105
|
+
__asm__ __volatile__(
|
106
|
+
"cpuid"
|
107
|
+
: "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
|
108
|
+
: "a"(eax), "c"(ecx));
|
109
|
+
}
|
110
|
+
#endif
|
111
|
+
|
112
|
+
cpuid_x86() {
|
113
|
+
std::array<int, 4> cpui;
|
114
|
+
std::vector<std::array<int, 4>> data;
|
115
|
+
|
116
|
+
// calling __cpuid with 0x0 as the function_id argument
|
117
|
+
// gets the number of the highest valid function ID.
|
118
|
+
cpuid(cpui.data(), 0);
|
119
|
+
int n_ids = cpui[0];
|
120
|
+
|
121
|
+
for (int i = 0; i <= n_ids; ++i) {
|
122
|
+
cpuidex(cpui.data(), i, 0);
|
123
|
+
data.push_back(cpui);
|
124
|
+
}
|
125
|
+
|
126
|
+
// capture vendor string
|
127
|
+
char vendor[0x20] = {};
|
128
|
+
*reinterpret_cast<int *>(vendor) = data[0][1];
|
129
|
+
*reinterpret_cast<int *>(vendor + 4) = data[0][3];
|
130
|
+
*reinterpret_cast<int *>(vendor + 8) = data[0][2];
|
131
|
+
this->vendor = vendor;
|
132
|
+
if (this->vendor == "GenuineIntel") {
|
133
|
+
is_intel = true;
|
134
|
+
} else if (this->vendor == "AuthenticAMD") {
|
135
|
+
is_amd = true;
|
136
|
+
}
|
137
|
+
|
138
|
+
// load bitset with flags for function 0x00000001
|
139
|
+
if (n_ids >= 1) {
|
140
|
+
f_1_ecx = data[1][2];
|
141
|
+
f_1_edx = data[1][3];
|
142
|
+
}
|
143
|
+
|
144
|
+
// load bitset with flags for function 0x00000007
|
145
|
+
if (n_ids >= 7) {
|
146
|
+
f_7_ebx = data[7][1];
|
147
|
+
f_7_ecx = data[7][2];
|
148
|
+
f_7_edx = data[7][3];
|
149
|
+
cpuidex(cpui.data(), 7, 1);
|
150
|
+
f_7_1_eax = cpui[0];
|
151
|
+
}
|
152
|
+
|
153
|
+
// calling __cpuid with 0x80000000 as the function_id argument
|
154
|
+
// gets the number of the highest valid extended ID.
|
155
|
+
cpuid(cpui.data(), 0x80000000);
|
156
|
+
unsigned int n_ex_ids = cpui[0];
|
157
|
+
|
158
|
+
std::vector<std::array<int, 4>> ext_data;
|
159
|
+
for (unsigned int i = 0x80000000; i <= n_ex_ids; ++i) {
|
160
|
+
cpuidex(cpui.data(), i, 0);
|
161
|
+
ext_data.push_back(cpui);
|
162
|
+
}
|
163
|
+
|
164
|
+
// load bitset with flags for function 0x80000001
|
165
|
+
if (n_ex_ids >= 0x80000001) {
|
166
|
+
f_81_ecx = ext_data[1][2];
|
167
|
+
f_81_edx = ext_data[1][3];
|
168
|
+
}
|
169
|
+
|
170
|
+
// interpret CPU brand string if reported
|
171
|
+
char brand[0x40] = {};
|
172
|
+
if (n_ex_ids >= 0x80000004) {
|
173
|
+
std::memcpy(brand, ext_data[2].data(), sizeof(cpui));
|
174
|
+
std::memcpy(brand + 16, ext_data[3].data(), sizeof(cpui));
|
175
|
+
std::memcpy(brand + 32, ext_data[4].data(), sizeof(cpui));
|
176
|
+
this->brand = brand;
|
177
|
+
}
|
178
|
+
}
|
179
|
+
|
180
|
+
bool is_intel = false;
|
181
|
+
bool is_amd = false;
|
182
|
+
std::string vendor;
|
183
|
+
std::string brand;
|
184
|
+
std::bitset<32> f_1_ecx;
|
185
|
+
std::bitset<32> f_1_edx;
|
186
|
+
std::bitset<32> f_7_ebx;
|
187
|
+
std::bitset<32> f_7_ecx;
|
188
|
+
std::bitset<32> f_7_edx;
|
189
|
+
std::bitset<32> f_7_1_eax;
|
190
|
+
std::bitset<32> f_81_ecx;
|
191
|
+
std::bitset<32> f_81_edx;
|
192
|
+
};
|
193
|
+
|
194
|
+
#if 0
|
195
|
+
void test_x86_is() {
|
196
|
+
cpuid_x86 is;
|
197
|
+
printf("CPU Vendor: %s\n", is.vendor.c_str());
|
198
|
+
printf("Brand: %s\n", is.brand.c_str());
|
199
|
+
printf("is_intel: %d\n", is.is_intel);
|
200
|
+
printf("is_amd: %d\n", is.is_amd);
|
201
|
+
printf("sse3: %d\n", is.SSE3());
|
202
|
+
printf("pclmulqdq: %d\n", is.PCLMULQDQ());
|
203
|
+
printf("ssse3: %d\n", is.SSSE3());
|
204
|
+
printf("fma: %d\n", is.FMA());
|
205
|
+
printf("cmpxchg16b: %d\n", is.CMPXCHG16B());
|
206
|
+
printf("sse41: %d\n", is.SSE41());
|
207
|
+
printf("sse42: %d\n", is.SSE42());
|
208
|
+
printf("movbe: %d\n", is.MOVBE());
|
209
|
+
printf("popcnt: %d\n", is.POPCNT());
|
210
|
+
printf("aes: %d\n", is.AES());
|
211
|
+
printf("xsave: %d\n", is.XSAVE());
|
212
|
+
printf("osxsave: %d\n", is.OSXSAVE());
|
213
|
+
printf("avx: %d\n", is.AVX());
|
214
|
+
printf("f16c: %d\n", is.F16C());
|
215
|
+
printf("rdrand: %d\n", is.RDRAND());
|
216
|
+
printf("msr: %d\n", is.MSR());
|
217
|
+
printf("cx8: %d\n", is.CX8());
|
218
|
+
printf("sep: %d\n", is.SEP());
|
219
|
+
printf("cmov: %d\n", is.CMOV());
|
220
|
+
printf("clflush: %d\n", is.CLFSH());
|
221
|
+
printf("mmx: %d\n", is.MMX());
|
222
|
+
printf("fxsr: %d\n", is.FXSR());
|
223
|
+
printf("sse: %d\n", is.SSE());
|
224
|
+
printf("sse2: %d\n", is.SSE2());
|
225
|
+
printf("fsgsbase: %d\n", is.FSGSBASE());
|
226
|
+
printf("bmi1: %d\n", is.BMI1());
|
227
|
+
printf("hle: %d\n", is.HLE());
|
228
|
+
printf("avx2: %d\n", is.AVX2());
|
229
|
+
printf("bmi2: %d\n", is.BMI2());
|
230
|
+
printf("erms: %d\n", is.ERMS());
|
231
|
+
printf("invpcid: %d\n", is.INVPCID());
|
232
|
+
printf("rtm: %d\n", is.RTM());
|
233
|
+
printf("avx512f: %d\n", is.AVX512F());
|
234
|
+
printf("rdseed: %d\n", is.RDSEED());
|
235
|
+
printf("adx: %d\n", is.ADX());
|
236
|
+
printf("avx512pf: %d\n", is.AVX512PF());
|
237
|
+
printf("avx512er: %d\n", is.AVX512ER());
|
238
|
+
printf("avx512cd: %d\n", is.AVX512CD());
|
239
|
+
printf("sha: %d\n", is.SHA());
|
240
|
+
printf("prefetchwt1: %d\n", is.PREFETCHWT1());
|
241
|
+
printf("lahf: %d\n", is.LAHF());
|
242
|
+
printf("lzcnt: %d\n", is.LZCNT());
|
243
|
+
printf("abm: %d\n", is.ABM());
|
244
|
+
printf("sse4a: %d\n", is.SSE4a());
|
245
|
+
printf("xop: %d\n", is.XOP());
|
246
|
+
printf("tbm: %d\n", is.TBM());
|
247
|
+
printf("syscall: %d\n", is.SYSCALL());
|
248
|
+
printf("mmxext: %d\n", is.MMXEXT());
|
249
|
+
printf("rdtscp: %d\n", is.RDTSCP());
|
250
|
+
printf("3dnowext: %d\n", is._3DNOWEXT());
|
251
|
+
printf("3dnow: %d\n", is._3DNOW());
|
252
|
+
printf("avx512_vbmi: %d\n", is.AVX512_VBMI());
|
253
|
+
printf("avx512_vnni: %d\n", is.AVX512_VNNI());
|
254
|
+
printf("avx512_fp16: %d\n", is.AVX512_FP16());
|
255
|
+
printf("avx512_bf16: %d\n", is.AVX512_BF16());
|
256
|
+
printf("amx_tile: %d\n", is.AMX_TILE());
|
257
|
+
printf("amx_int8: %d\n", is.AMX_INT8());
|
258
|
+
printf("amx_fp16: %d\n", is.AMX_FP16());
|
259
|
+
printf("amx_bf16: %d\n", is.AMX_BF16());
|
260
|
+
}
|
261
|
+
#endif
|
262
|
+
|
263
|
+
static int ggml_backend_cpu_x86_score() {
|
264
|
+
// FIXME: this does not check for OS support
|
265
|
+
|
266
|
+
int score = 0;
|
267
|
+
cpuid_x86 is;
|
268
|
+
|
269
|
+
#ifdef GGML_FMA
|
270
|
+
if (!is.FMA()) { return 0; }
|
271
|
+
score += 1;
|
272
|
+
#endif
|
273
|
+
#ifdef GGML_F16C
|
274
|
+
if (!is.F16C()) { return 0; }
|
275
|
+
score += 1<<1;
|
276
|
+
#endif
|
277
|
+
#ifdef GGML_SSE42
|
278
|
+
if (!is.SSE42()) { return 0; }
|
279
|
+
score += 1<<2;
|
280
|
+
#endif
|
281
|
+
#ifdef GGML_AVX
|
282
|
+
if (!is.AVX()) { return 0; }
|
283
|
+
score += 1<<4;
|
284
|
+
#endif
|
285
|
+
#ifdef GGML_AVX2
|
286
|
+
if (!is.AVX2()) { return 0; }
|
287
|
+
score += 1<<5;
|
288
|
+
#endif
|
289
|
+
#ifdef GGML_AVX_VNNI
|
290
|
+
if (!is.AVX_VNNI()) { return 0; }
|
291
|
+
score += 1<<6;
|
292
|
+
#endif
|
293
|
+
#ifdef GGML_AVX512
|
294
|
+
if (!is.AVX512F()) { return 0; }
|
295
|
+
if (!is.AVX512CD()) { return 0; }
|
296
|
+
if (!is.AVX512VL()) { return 0; }
|
297
|
+
if (!is.AVX512DQ()) { return 0; }
|
298
|
+
if (!is.AVX512BW()) { return 0; }
|
299
|
+
score += 1<<7;
|
300
|
+
#endif
|
301
|
+
#ifdef GGML_AVX512_VBMI
|
302
|
+
if (!is.AVX512_VBMI()) { return 0; }
|
303
|
+
score += 1<<8;
|
304
|
+
#endif
|
305
|
+
#ifdef GGML_AVX512_BF16
|
306
|
+
if (!is.AVX512_BF16()) { return 0; }
|
307
|
+
score += 1<<9;
|
308
|
+
#endif
|
309
|
+
#ifdef GGML_AVX512_VNNI
|
310
|
+
if (!is.AVX512_VNNI()) { return 0; }
|
311
|
+
score += 1<<10;
|
312
|
+
#endif
|
313
|
+
#ifdef GGML_AMX_INT8
|
314
|
+
if (!is.AMX_INT8()) { return 0; }
|
315
|
+
score += 1<<11;
|
316
|
+
#endif
|
317
|
+
|
318
|
+
return score;
|
319
|
+
}
|
320
|
+
|
321
|
+
GGML_BACKEND_DL_SCORE_IMPL(ggml_backend_cpu_x86_score)
|
322
|
+
|
323
|
+
#endif // defined(__x86_64__) || (defined(_MSC_VER) && defined(_M_AMD64))
|