whispercpp 1.3.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +5 -0
- data/LICENSE +1 -1
- data/README.md +165 -434
- data/Rakefile +60 -11
- data/ext/.gitignore +13 -0
- data/ext/cpu.mk +9 -0
- data/ext/{dr_wav.h → examples/dr_wav.h} +3560 -1179
- data/ext/extconf.rb +185 -16
- data/ext/ggml/include/ggml-alloc.h +76 -0
- data/ext/ggml/include/ggml-backend.h +352 -0
- data/ext/ggml/include/ggml-blas.h +25 -0
- data/ext/ggml/include/ggml-cann.h +123 -0
- data/ext/ggml/include/ggml-cpp.h +38 -0
- data/ext/ggml/include/ggml-cpu.h +135 -0
- data/ext/ggml/include/ggml-cuda.h +47 -0
- data/ext/ggml/include/ggml-kompute.h +50 -0
- data/ext/ggml/include/ggml-metal.h +66 -0
- data/ext/ggml/include/ggml-opencl.h +26 -0
- data/ext/ggml/include/ggml-opt.h +216 -0
- data/ext/ggml/include/ggml-rpc.h +28 -0
- data/ext/ggml/include/ggml-sycl.h +49 -0
- data/ext/ggml/include/ggml-vulkan.h +31 -0
- data/ext/{ggml.h → ggml/include/ggml.h} +479 -596
- data/ext/ggml/src/ggml-alloc.c +1037 -0
- data/ext/ggml/src/ggml-amx/common.h +94 -0
- data/ext/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
- data/ext/ggml/src/ggml-amx/mmq.cpp +2510 -0
- data/ext/ggml/src/ggml-amx/mmq.h +17 -0
- data/ext/ggml/src/ggml-backend-impl.h +256 -0
- data/ext/ggml/src/ggml-backend-reg.cpp +552 -0
- data/ext/ggml/src/ggml-backend.cpp +1999 -0
- data/ext/ggml/src/ggml-blas/ggml-blas.cpp +517 -0
- data/ext/ggml/src/ggml-cann/acl_tensor.cpp +175 -0
- data/ext/ggml/src/ggml-cann/acl_tensor.h +258 -0
- data/ext/ggml/src/ggml-cann/aclnn_ops.cpp +3427 -0
- data/ext/ggml/src/ggml-cann/aclnn_ops.h +592 -0
- data/ext/ggml/src/ggml-cann/common.h +286 -0
- data/ext/ggml/src/ggml-cann/ggml-cann.cpp +2188 -0
- data/ext/ggml/src/ggml-cann/kernels/ascendc_kernels.h +19 -0
- data/ext/ggml/src/ggml-cann/kernels/dup.cpp +236 -0
- data/ext/ggml/src/ggml-cann/kernels/get_row_f16.cpp +197 -0
- data/ext/ggml/src/ggml-cann/kernels/get_row_f32.cpp +190 -0
- data/ext/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +204 -0
- data/ext/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +191 -0
- data/ext/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +218 -0
- data/ext/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +216 -0
- data/ext/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +295 -0
- data/ext/ggml/src/ggml-common.h +1853 -0
- data/ext/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
- data/ext/ggml/src/ggml-cpu/amx/amx.h +8 -0
- data/ext/ggml/src/ggml-cpu/amx/common.h +91 -0
- data/ext/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
- data/ext/ggml/src/ggml-cpu/amx/mmq.h +10 -0
- data/ext/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +4262 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-impl.h +386 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.c +10835 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu.c +14123 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu.cpp +622 -0
- data/ext/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1884 -0
- data/ext/ggml/src/ggml-cpu/llamafile/sgemm.h +14 -0
- data/ext/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
- data/ext/ggml/src/ggml-cuda/vendors/hip.h +186 -0
- data/ext/ggml/src/ggml-cuda/vendors/musa.h +134 -0
- data/ext/ggml/src/ggml-impl.h +556 -0
- data/ext/ggml/src/ggml-kompute/ggml-kompute.cpp +2251 -0
- data/ext/ggml/src/ggml-metal/ggml-metal-impl.h +288 -0
- data/ext/ggml/src/ggml-metal/ggml-metal.m +4884 -0
- data/ext/ggml/src/ggml-metal/ggml-metal.metal +6732 -0
- data/ext/ggml/src/ggml-opt.cpp +854 -0
- data/ext/ggml/src/ggml-quants.c +5238 -0
- data/ext/ggml/src/ggml-quants.h +100 -0
- data/ext/ggml/src/ggml-rpc/ggml-rpc.cpp +1406 -0
- data/ext/ggml/src/ggml-sycl/common.cpp +95 -0
- data/ext/ggml/src/ggml-sycl/concat.cpp +196 -0
- data/ext/ggml/src/ggml-sycl/conv.cpp +99 -0
- data/ext/ggml/src/ggml-sycl/convert.cpp +547 -0
- data/ext/ggml/src/ggml-sycl/dmmv.cpp +1023 -0
- data/ext/ggml/src/ggml-sycl/element_wise.cpp +1030 -0
- data/ext/ggml/src/ggml-sycl/ggml-sycl.cpp +4729 -0
- data/ext/ggml/src/ggml-sycl/im2col.cpp +126 -0
- data/ext/ggml/src/ggml-sycl/mmq.cpp +3031 -0
- data/ext/ggml/src/ggml-sycl/mmvq.cpp +1015 -0
- data/ext/ggml/src/ggml-sycl/norm.cpp +378 -0
- data/ext/ggml/src/ggml-sycl/outprod.cpp +56 -0
- data/ext/ggml/src/ggml-sycl/rope.cpp +276 -0
- data/ext/ggml/src/ggml-sycl/softmax.cpp +251 -0
- data/ext/ggml/src/ggml-sycl/tsembd.cpp +72 -0
- data/ext/ggml/src/ggml-sycl/wkv6.cpp +141 -0
- data/ext/ggml/src/ggml-threading.cpp +12 -0
- data/ext/ggml/src/ggml-threading.h +14 -0
- data/ext/ggml/src/ggml-vulkan/ggml-vulkan.cpp +8657 -0
- data/ext/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +593 -0
- data/ext/ggml/src/ggml.c +7694 -0
- data/ext/{whisper.h → include/whisper.h} +23 -22
- data/ext/metal-embed.mk +17 -0
- data/ext/metal.mk +6 -0
- data/ext/ruby_whisper.cpp +1492 -9
- data/ext/ruby_whisper.h +10 -0
- data/ext/scripts/get-flags.mk +38 -0
- data/ext/src/coreml/whisper-decoder-impl.h +146 -0
- data/ext/src/coreml/whisper-decoder-impl.m +201 -0
- data/ext/src/coreml/whisper-encoder-impl.h +142 -0
- data/ext/src/coreml/whisper-encoder-impl.m +197 -0
- data/ext/src/coreml/whisper-encoder.h +26 -0
- data/ext/src/openvino/whisper-openvino-encoder.cpp +108 -0
- data/ext/src/openvino/whisper-openvino-encoder.h +31 -0
- data/ext/{whisper.cpp → src/whisper.cpp} +661 -492
- data/extsources.rb +6 -0
- data/lib/whisper/model/uri.rb +157 -0
- data/lib/whisper.rb +2 -0
- data/tests/helper.rb +7 -0
- data/tests/jfk_reader/.gitignore +5 -0
- data/tests/jfk_reader/extconf.rb +3 -0
- data/tests/jfk_reader/jfk_reader.c +68 -0
- data/tests/test_callback.rb +160 -0
- data/tests/test_error.rb +20 -0
- data/tests/test_model.rb +71 -0
- data/tests/test_package.rb +31 -0
- data/tests/test_params.rb +160 -0
- data/tests/test_segment.rb +83 -0
- data/tests/test_whisper.rb +211 -123
- data/whispercpp.gemspec +36 -0
- metadata +137 -11
- data/ext/ggml.c +0 -21755
@@ -0,0 +1,10 @@
|
|
1
|
+
#pragma once
|
2
|
+
#include "common.h"
|
3
|
+
|
4
|
+
size_t ggml_backend_amx_desired_wsize(const struct ggml_tensor * dst);
|
5
|
+
|
6
|
+
size_t ggml_backend_amx_get_alloc_size(const struct ggml_tensor * tensor);
|
7
|
+
|
8
|
+
void ggml_backend_amx_convert_weight(struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
9
|
+
|
10
|
+
void ggml_backend_amx_mul_mat(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
@@ -0,0 +1,323 @@
|
|
1
|
+
#include "ggml-backend-impl.h"
|
2
|
+
|
3
|
+
#if defined(__x86_64__) || (defined(_MSC_VER) && defined(_M_AMD64))
|
4
|
+
|
5
|
+
#ifdef _MSC_VER
|
6
|
+
#include <intrin.h>
|
7
|
+
#endif
|
8
|
+
|
9
|
+
#include <cstring>
|
10
|
+
#include <vector>
|
11
|
+
#include <bitset>
|
12
|
+
#include <array>
|
13
|
+
#include <string>
|
14
|
+
|
15
|
+
// ref: https://cdrdv2-public.intel.com/782156/325383-sdm-vol-2abcd.pdf
|
16
|
+
struct cpuid_x86 {
|
17
|
+
bool SSE3(void) { return f_1_ecx[0]; }
|
18
|
+
bool PCLMULQDQ(void) { return f_1_ecx[1]; }
|
19
|
+
bool MONITOR(void) { return f_1_ecx[3]; }
|
20
|
+
bool SSSE3(void) { return f_1_ecx[9]; }
|
21
|
+
bool FMA(void) { return f_1_ecx[12]; }
|
22
|
+
bool CMPXCHG16B(void) { return f_1_ecx[13]; }
|
23
|
+
bool SSE41(void) { return f_1_ecx[19]; }
|
24
|
+
bool SSE42(void) { return f_1_ecx[20]; }
|
25
|
+
bool MOVBE(void) { return f_1_ecx[22]; }
|
26
|
+
bool POPCNT(void) { return f_1_ecx[23]; }
|
27
|
+
bool AES(void) { return f_1_ecx[25]; }
|
28
|
+
bool XSAVE(void) { return f_1_ecx[26]; }
|
29
|
+
bool OSXSAVE(void) { return f_1_ecx[27]; }
|
30
|
+
bool AVX(void) { return f_1_ecx[28]; }
|
31
|
+
bool F16C(void) { return f_1_ecx[29]; }
|
32
|
+
bool RDRAND(void) { return f_1_ecx[30]; }
|
33
|
+
|
34
|
+
bool MSR(void) { return f_1_edx[5]; }
|
35
|
+
bool CX8(void) { return f_1_edx[8]; }
|
36
|
+
bool SEP(void) { return f_1_edx[11]; }
|
37
|
+
bool CMOV(void) { return f_1_edx[15]; }
|
38
|
+
bool CLFSH(void) { return f_1_edx[19]; }
|
39
|
+
bool MMX(void) { return f_1_edx[23]; }
|
40
|
+
bool FXSR(void) { return f_1_edx[24]; }
|
41
|
+
bool SSE(void) { return f_1_edx[25]; }
|
42
|
+
bool SSE2(void) { return f_1_edx[26]; }
|
43
|
+
|
44
|
+
bool FSGSBASE(void) { return f_7_ebx[0]; }
|
45
|
+
bool BMI1(void) { return f_7_ebx[3]; }
|
46
|
+
bool HLE(void) { return is_intel && f_7_ebx[4]; }
|
47
|
+
bool AVX2(void) { return f_7_ebx[5]; }
|
48
|
+
bool BMI2(void) { return f_7_ebx[8]; }
|
49
|
+
bool ERMS(void) { return f_7_ebx[9]; }
|
50
|
+
bool INVPCID(void) { return f_7_ebx[10]; }
|
51
|
+
bool RTM(void) { return is_intel && f_7_ebx[11]; }
|
52
|
+
bool AVX512F(void) { return f_7_ebx[16]; }
|
53
|
+
bool AVX512DQ(void) { return f_7_ebx[17]; }
|
54
|
+
bool RDSEED(void) { return f_7_ebx[18]; }
|
55
|
+
bool ADX(void) { return f_7_ebx[19]; }
|
56
|
+
bool AVX512PF(void) { return f_7_ebx[26]; }
|
57
|
+
bool AVX512ER(void) { return f_7_ebx[27]; }
|
58
|
+
bool AVX512CD(void) { return f_7_ebx[28]; }
|
59
|
+
bool AVX512BW(void) { return f_7_ebx[30]; }
|
60
|
+
bool AVX512VL(void) { return f_7_ebx[31]; }
|
61
|
+
|
62
|
+
bool SHA(void) { return f_7_ebx[29]; }
|
63
|
+
|
64
|
+
bool PREFETCHWT1(void) { return f_7_ecx[0]; }
|
65
|
+
|
66
|
+
bool LAHF(void) { return f_81_ecx[0]; }
|
67
|
+
bool LZCNT(void) { return is_intel && f_81_ecx[5]; }
|
68
|
+
bool ABM(void) { return is_amd && f_81_ecx[5]; }
|
69
|
+
bool SSE4a(void) { return is_amd && f_81_ecx[6]; }
|
70
|
+
bool XOP(void) { return is_amd && f_81_ecx[11]; }
|
71
|
+
bool TBM(void) { return is_amd && f_81_ecx[21]; }
|
72
|
+
|
73
|
+
bool SYSCALL(void) { return is_intel && f_81_edx[11]; }
|
74
|
+
bool MMXEXT(void) { return is_amd && f_81_edx[22]; }
|
75
|
+
bool RDTSCP(void) { return is_intel && f_81_edx[27]; }
|
76
|
+
bool _3DNOWEXT(void) { return is_amd && f_81_edx[30]; }
|
77
|
+
bool _3DNOW(void) { return is_amd && f_81_edx[31]; }
|
78
|
+
|
79
|
+
bool AVX512_VBMI(void) { return f_7_ecx[1]; }
|
80
|
+
bool AVX512_VNNI(void) { return f_7_ecx[11]; }
|
81
|
+
bool AVX512_FP16(void) { return f_7_edx[23]; }
|
82
|
+
bool AVX512_BF16(void) { return f_7_1_eax[5]; }
|
83
|
+
bool AVX_VNNI(void) { return f_7_1_eax[4]; }
|
84
|
+
|
85
|
+
bool AMX_TILE(void) { return f_7_edx[24]; }
|
86
|
+
bool AMX_INT8(void) { return f_7_edx[25]; }
|
87
|
+
bool AMX_FP16(void) { return f_7_1_eax[21]; }
|
88
|
+
bool AMX_BF16(void) { return f_7_edx[22]; }
|
89
|
+
|
90
|
+
#ifdef _MSC_VER
|
91
|
+
static void cpuid(int cpu_info[4], int eax) {
|
92
|
+
__cpuid(cpu_info, eax);
|
93
|
+
}
|
94
|
+
static void cpuidex(int cpu_info[4], int eax, int ecx) {
|
95
|
+
__cpuidex(cpu_info, eax, ecx);
|
96
|
+
}
|
97
|
+
#else
|
98
|
+
static void cpuid(int cpu_info[4], int eax) {
|
99
|
+
__asm__ __volatile__(
|
100
|
+
"cpuid"
|
101
|
+
: "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
|
102
|
+
: "a"(eax), "c"(0));
|
103
|
+
}
|
104
|
+
static void cpuidex(int cpu_info[4], int eax, int ecx) {
|
105
|
+
__asm__ __volatile__(
|
106
|
+
"cpuid"
|
107
|
+
: "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
|
108
|
+
: "a"(eax), "c"(ecx));
|
109
|
+
}
|
110
|
+
#endif
|
111
|
+
|
112
|
+
cpuid_x86() {
|
113
|
+
std::array<int, 4> cpui;
|
114
|
+
std::vector<std::array<int, 4>> data;
|
115
|
+
|
116
|
+
// calling __cpuid with 0x0 as the function_id argument
|
117
|
+
// gets the number of the highest valid function ID.
|
118
|
+
cpuid(cpui.data(), 0);
|
119
|
+
int n_ids = cpui[0];
|
120
|
+
|
121
|
+
for (int i = 0; i <= n_ids; ++i) {
|
122
|
+
cpuidex(cpui.data(), i, 0);
|
123
|
+
data.push_back(cpui);
|
124
|
+
}
|
125
|
+
|
126
|
+
// capture vendor string
|
127
|
+
char vendor[0x20] = {};
|
128
|
+
*reinterpret_cast<int *>(vendor) = data[0][1];
|
129
|
+
*reinterpret_cast<int *>(vendor + 4) = data[0][3];
|
130
|
+
*reinterpret_cast<int *>(vendor + 8) = data[0][2];
|
131
|
+
this->vendor = vendor;
|
132
|
+
if (this->vendor == "GenuineIntel") {
|
133
|
+
is_intel = true;
|
134
|
+
} else if (this->vendor == "AuthenticAMD") {
|
135
|
+
is_amd = true;
|
136
|
+
}
|
137
|
+
|
138
|
+
// load bitset with flags for function 0x00000001
|
139
|
+
if (n_ids >= 1) {
|
140
|
+
f_1_ecx = data[1][2];
|
141
|
+
f_1_edx = data[1][3];
|
142
|
+
}
|
143
|
+
|
144
|
+
// load bitset with flags for function 0x00000007
|
145
|
+
if (n_ids >= 7) {
|
146
|
+
f_7_ebx = data[7][1];
|
147
|
+
f_7_ecx = data[7][2];
|
148
|
+
f_7_edx = data[7][3];
|
149
|
+
cpuidex(cpui.data(), 7, 1);
|
150
|
+
f_7_1_eax = cpui[0];
|
151
|
+
}
|
152
|
+
|
153
|
+
// calling __cpuid with 0x80000000 as the function_id argument
|
154
|
+
// gets the number of the highest valid extended ID.
|
155
|
+
cpuid(cpui.data(), 0x80000000);
|
156
|
+
unsigned int n_ex_ids = cpui[0];
|
157
|
+
|
158
|
+
std::vector<std::array<int, 4>> ext_data;
|
159
|
+
for (unsigned int i = 0x80000000; i <= n_ex_ids; ++i) {
|
160
|
+
cpuidex(cpui.data(), i, 0);
|
161
|
+
ext_data.push_back(cpui);
|
162
|
+
}
|
163
|
+
|
164
|
+
// load bitset with flags for function 0x80000001
|
165
|
+
if (n_ex_ids >= 0x80000001) {
|
166
|
+
f_81_ecx = ext_data[1][2];
|
167
|
+
f_81_edx = ext_data[1][3];
|
168
|
+
}
|
169
|
+
|
170
|
+
// interpret CPU brand string if reported
|
171
|
+
char brand[0x40] = {};
|
172
|
+
if (n_ex_ids >= 0x80000004) {
|
173
|
+
std::memcpy(brand, ext_data[2].data(), sizeof(cpui));
|
174
|
+
std::memcpy(brand + 16, ext_data[3].data(), sizeof(cpui));
|
175
|
+
std::memcpy(brand + 32, ext_data[4].data(), sizeof(cpui));
|
176
|
+
this->brand = brand;
|
177
|
+
}
|
178
|
+
}
|
179
|
+
|
180
|
+
bool is_intel = false;
|
181
|
+
bool is_amd = false;
|
182
|
+
std::string vendor;
|
183
|
+
std::string brand;
|
184
|
+
std::bitset<32> f_1_ecx;
|
185
|
+
std::bitset<32> f_1_edx;
|
186
|
+
std::bitset<32> f_7_ebx;
|
187
|
+
std::bitset<32> f_7_ecx;
|
188
|
+
std::bitset<32> f_7_edx;
|
189
|
+
std::bitset<32> f_7_1_eax;
|
190
|
+
std::bitset<32> f_81_ecx;
|
191
|
+
std::bitset<32> f_81_edx;
|
192
|
+
};
|
193
|
+
|
194
|
+
#if 0
|
195
|
+
void test_x86_is() {
|
196
|
+
cpuid_x86 is;
|
197
|
+
printf("CPU Vendor: %s\n", is.vendor.c_str());
|
198
|
+
printf("Brand: %s\n", is.brand.c_str());
|
199
|
+
printf("is_intel: %d\n", is.is_intel);
|
200
|
+
printf("is_amd: %d\n", is.is_amd);
|
201
|
+
printf("sse3: %d\n", is.SSE3());
|
202
|
+
printf("pclmulqdq: %d\n", is.PCLMULQDQ());
|
203
|
+
printf("ssse3: %d\n", is.SSSE3());
|
204
|
+
printf("fma: %d\n", is.FMA());
|
205
|
+
printf("cmpxchg16b: %d\n", is.CMPXCHG16B());
|
206
|
+
printf("sse41: %d\n", is.SSE41());
|
207
|
+
printf("sse42: %d\n", is.SSE42());
|
208
|
+
printf("movbe: %d\n", is.MOVBE());
|
209
|
+
printf("popcnt: %d\n", is.POPCNT());
|
210
|
+
printf("aes: %d\n", is.AES());
|
211
|
+
printf("xsave: %d\n", is.XSAVE());
|
212
|
+
printf("osxsave: %d\n", is.OSXSAVE());
|
213
|
+
printf("avx: %d\n", is.AVX());
|
214
|
+
printf("f16c: %d\n", is.F16C());
|
215
|
+
printf("rdrand: %d\n", is.RDRAND());
|
216
|
+
printf("msr: %d\n", is.MSR());
|
217
|
+
printf("cx8: %d\n", is.CX8());
|
218
|
+
printf("sep: %d\n", is.SEP());
|
219
|
+
printf("cmov: %d\n", is.CMOV());
|
220
|
+
printf("clflush: %d\n", is.CLFSH());
|
221
|
+
printf("mmx: %d\n", is.MMX());
|
222
|
+
printf("fxsr: %d\n", is.FXSR());
|
223
|
+
printf("sse: %d\n", is.SSE());
|
224
|
+
printf("sse2: %d\n", is.SSE2());
|
225
|
+
printf("fsgsbase: %d\n", is.FSGSBASE());
|
226
|
+
printf("bmi1: %d\n", is.BMI1());
|
227
|
+
printf("hle: %d\n", is.HLE());
|
228
|
+
printf("avx2: %d\n", is.AVX2());
|
229
|
+
printf("bmi2: %d\n", is.BMI2());
|
230
|
+
printf("erms: %d\n", is.ERMS());
|
231
|
+
printf("invpcid: %d\n", is.INVPCID());
|
232
|
+
printf("rtm: %d\n", is.RTM());
|
233
|
+
printf("avx512f: %d\n", is.AVX512F());
|
234
|
+
printf("rdseed: %d\n", is.RDSEED());
|
235
|
+
printf("adx: %d\n", is.ADX());
|
236
|
+
printf("avx512pf: %d\n", is.AVX512PF());
|
237
|
+
printf("avx512er: %d\n", is.AVX512ER());
|
238
|
+
printf("avx512cd: %d\n", is.AVX512CD());
|
239
|
+
printf("sha: %d\n", is.SHA());
|
240
|
+
printf("prefetchwt1: %d\n", is.PREFETCHWT1());
|
241
|
+
printf("lahf: %d\n", is.LAHF());
|
242
|
+
printf("lzcnt: %d\n", is.LZCNT());
|
243
|
+
printf("abm: %d\n", is.ABM());
|
244
|
+
printf("sse4a: %d\n", is.SSE4a());
|
245
|
+
printf("xop: %d\n", is.XOP());
|
246
|
+
printf("tbm: %d\n", is.TBM());
|
247
|
+
printf("syscall: %d\n", is.SYSCALL());
|
248
|
+
printf("mmxext: %d\n", is.MMXEXT());
|
249
|
+
printf("rdtscp: %d\n", is.RDTSCP());
|
250
|
+
printf("3dnowext: %d\n", is._3DNOWEXT());
|
251
|
+
printf("3dnow: %d\n", is._3DNOW());
|
252
|
+
printf("avx512_vbmi: %d\n", is.AVX512_VBMI());
|
253
|
+
printf("avx512_vnni: %d\n", is.AVX512_VNNI());
|
254
|
+
printf("avx512_fp16: %d\n", is.AVX512_FP16());
|
255
|
+
printf("avx512_bf16: %d\n", is.AVX512_BF16());
|
256
|
+
printf("amx_tile: %d\n", is.AMX_TILE());
|
257
|
+
printf("amx_int8: %d\n", is.AMX_INT8());
|
258
|
+
printf("amx_fp16: %d\n", is.AMX_FP16());
|
259
|
+
printf("amx_bf16: %d\n", is.AMX_BF16());
|
260
|
+
}
|
261
|
+
#endif
|
262
|
+
|
263
|
+
static int ggml_backend_cpu_x86_score() {
|
264
|
+
// FIXME: this does not check for OS support
|
265
|
+
|
266
|
+
int score = 0;
|
267
|
+
cpuid_x86 is;
|
268
|
+
|
269
|
+
#ifdef GGML_FMA
|
270
|
+
if (!is.FMA()) { return 0; }
|
271
|
+
score += 1;
|
272
|
+
#endif
|
273
|
+
#ifdef GGML_F16C
|
274
|
+
if (!is.F16C()) { return 0; }
|
275
|
+
score += 1<<1;
|
276
|
+
#endif
|
277
|
+
#ifdef GGML_SSE42
|
278
|
+
if (!is.SSE42()) { return 0; }
|
279
|
+
score += 1<<2;
|
280
|
+
#endif
|
281
|
+
#ifdef GGML_AVX
|
282
|
+
if (!is.AVX()) { return 0; }
|
283
|
+
score += 1<<4;
|
284
|
+
#endif
|
285
|
+
#ifdef GGML_AVX2
|
286
|
+
if (!is.AVX2()) { return 0; }
|
287
|
+
score += 1<<5;
|
288
|
+
#endif
|
289
|
+
#ifdef GGML_AVX_VNNI
|
290
|
+
if (!is.AVX_VNNI()) { return 0; }
|
291
|
+
score += 1<<6;
|
292
|
+
#endif
|
293
|
+
#ifdef GGML_AVX512
|
294
|
+
if (!is.AVX512F()) { return 0; }
|
295
|
+
if (!is.AVX512CD()) { return 0; }
|
296
|
+
if (!is.AVX512VL()) { return 0; }
|
297
|
+
if (!is.AVX512DQ()) { return 0; }
|
298
|
+
if (!is.AVX512BW()) { return 0; }
|
299
|
+
score += 1<<7;
|
300
|
+
#endif
|
301
|
+
#ifdef GGML_AVX512_VBMI
|
302
|
+
if (!is.AVX512_VBMI()) { return 0; }
|
303
|
+
score += 1<<8;
|
304
|
+
#endif
|
305
|
+
#ifdef GGML_AVX512_BF16
|
306
|
+
if (!is.AVX512_BF16()) { return 0; }
|
307
|
+
score += 1<<9;
|
308
|
+
#endif
|
309
|
+
#ifdef GGML_AVX512_VNNI
|
310
|
+
if (!is.AVX512_VNNI()) { return 0; }
|
311
|
+
score += 1<<10;
|
312
|
+
#endif
|
313
|
+
#ifdef GGML_AMX_INT8
|
314
|
+
if (!is.AMX_INT8()) { return 0; }
|
315
|
+
score += 1<<11;
|
316
|
+
#endif
|
317
|
+
|
318
|
+
return score;
|
319
|
+
}
|
320
|
+
|
321
|
+
GGML_BACKEND_DL_SCORE_IMPL(ggml_backend_cpu_x86_score)
|
322
|
+
|
323
|
+
#endif // defined(__x86_64__) || (defined(_MSC_VER) && defined(_M_AMD64))
|