whispercpp 1.3.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +5 -0
- data/LICENSE +1 -1
- data/README.md +165 -434
- data/Rakefile +60 -11
- data/ext/.gitignore +13 -0
- data/ext/cpu.mk +9 -0
- data/ext/{dr_wav.h → examples/dr_wav.h} +3560 -1179
- data/ext/extconf.rb +185 -16
- data/ext/ggml/include/ggml-alloc.h +76 -0
- data/ext/ggml/include/ggml-backend.h +352 -0
- data/ext/ggml/include/ggml-blas.h +25 -0
- data/ext/ggml/include/ggml-cann.h +123 -0
- data/ext/ggml/include/ggml-cpp.h +38 -0
- data/ext/ggml/include/ggml-cpu.h +135 -0
- data/ext/ggml/include/ggml-cuda.h +47 -0
- data/ext/ggml/include/ggml-kompute.h +50 -0
- data/ext/ggml/include/ggml-metal.h +66 -0
- data/ext/ggml/include/ggml-opencl.h +26 -0
- data/ext/ggml/include/ggml-opt.h +216 -0
- data/ext/ggml/include/ggml-rpc.h +28 -0
- data/ext/ggml/include/ggml-sycl.h +49 -0
- data/ext/ggml/include/ggml-vulkan.h +31 -0
- data/ext/{ggml.h → ggml/include/ggml.h} +479 -596
- data/ext/ggml/src/ggml-alloc.c +1037 -0
- data/ext/ggml/src/ggml-amx/common.h +94 -0
- data/ext/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
- data/ext/ggml/src/ggml-amx/mmq.cpp +2510 -0
- data/ext/ggml/src/ggml-amx/mmq.h +17 -0
- data/ext/ggml/src/ggml-backend-impl.h +256 -0
- data/ext/ggml/src/ggml-backend-reg.cpp +552 -0
- data/ext/ggml/src/ggml-backend.cpp +1999 -0
- data/ext/ggml/src/ggml-blas/ggml-blas.cpp +517 -0
- data/ext/ggml/src/ggml-cann/acl_tensor.cpp +175 -0
- data/ext/ggml/src/ggml-cann/acl_tensor.h +258 -0
- data/ext/ggml/src/ggml-cann/aclnn_ops.cpp +3427 -0
- data/ext/ggml/src/ggml-cann/aclnn_ops.h +592 -0
- data/ext/ggml/src/ggml-cann/common.h +286 -0
- data/ext/ggml/src/ggml-cann/ggml-cann.cpp +2188 -0
- data/ext/ggml/src/ggml-cann/kernels/ascendc_kernels.h +19 -0
- data/ext/ggml/src/ggml-cann/kernels/dup.cpp +236 -0
- data/ext/ggml/src/ggml-cann/kernels/get_row_f16.cpp +197 -0
- data/ext/ggml/src/ggml-cann/kernels/get_row_f32.cpp +190 -0
- data/ext/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +204 -0
- data/ext/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +191 -0
- data/ext/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +218 -0
- data/ext/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +216 -0
- data/ext/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +295 -0
- data/ext/ggml/src/ggml-common.h +1853 -0
- data/ext/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
- data/ext/ggml/src/ggml-cpu/amx/amx.h +8 -0
- data/ext/ggml/src/ggml-cpu/amx/common.h +91 -0
- data/ext/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
- data/ext/ggml/src/ggml-cpu/amx/mmq.h +10 -0
- data/ext/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +4262 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-impl.h +386 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.c +10835 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu.c +14123 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu.cpp +622 -0
- data/ext/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1884 -0
- data/ext/ggml/src/ggml-cpu/llamafile/sgemm.h +14 -0
- data/ext/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
- data/ext/ggml/src/ggml-cuda/vendors/hip.h +186 -0
- data/ext/ggml/src/ggml-cuda/vendors/musa.h +134 -0
- data/ext/ggml/src/ggml-impl.h +556 -0
- data/ext/ggml/src/ggml-kompute/ggml-kompute.cpp +2251 -0
- data/ext/ggml/src/ggml-metal/ggml-metal-impl.h +288 -0
- data/ext/ggml/src/ggml-metal/ggml-metal.m +4884 -0
- data/ext/ggml/src/ggml-metal/ggml-metal.metal +6732 -0
- data/ext/ggml/src/ggml-opt.cpp +854 -0
- data/ext/ggml/src/ggml-quants.c +5238 -0
- data/ext/ggml/src/ggml-quants.h +100 -0
- data/ext/ggml/src/ggml-rpc/ggml-rpc.cpp +1406 -0
- data/ext/ggml/src/ggml-sycl/common.cpp +95 -0
- data/ext/ggml/src/ggml-sycl/concat.cpp +196 -0
- data/ext/ggml/src/ggml-sycl/conv.cpp +99 -0
- data/ext/ggml/src/ggml-sycl/convert.cpp +547 -0
- data/ext/ggml/src/ggml-sycl/dmmv.cpp +1023 -0
- data/ext/ggml/src/ggml-sycl/element_wise.cpp +1030 -0
- data/ext/ggml/src/ggml-sycl/ggml-sycl.cpp +4729 -0
- data/ext/ggml/src/ggml-sycl/im2col.cpp +126 -0
- data/ext/ggml/src/ggml-sycl/mmq.cpp +3031 -0
- data/ext/ggml/src/ggml-sycl/mmvq.cpp +1015 -0
- data/ext/ggml/src/ggml-sycl/norm.cpp +378 -0
- data/ext/ggml/src/ggml-sycl/outprod.cpp +56 -0
- data/ext/ggml/src/ggml-sycl/rope.cpp +276 -0
- data/ext/ggml/src/ggml-sycl/softmax.cpp +251 -0
- data/ext/ggml/src/ggml-sycl/tsembd.cpp +72 -0
- data/ext/ggml/src/ggml-sycl/wkv6.cpp +141 -0
- data/ext/ggml/src/ggml-threading.cpp +12 -0
- data/ext/ggml/src/ggml-threading.h +14 -0
- data/ext/ggml/src/ggml-vulkan/ggml-vulkan.cpp +8657 -0
- data/ext/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +593 -0
- data/ext/ggml/src/ggml.c +7694 -0
- data/ext/{whisper.h → include/whisper.h} +23 -22
- data/ext/metal-embed.mk +17 -0
- data/ext/metal.mk +6 -0
- data/ext/ruby_whisper.cpp +1492 -9
- data/ext/ruby_whisper.h +10 -0
- data/ext/scripts/get-flags.mk +38 -0
- data/ext/src/coreml/whisper-decoder-impl.h +146 -0
- data/ext/src/coreml/whisper-decoder-impl.m +201 -0
- data/ext/src/coreml/whisper-encoder-impl.h +142 -0
- data/ext/src/coreml/whisper-encoder-impl.m +197 -0
- data/ext/src/coreml/whisper-encoder.h +26 -0
- data/ext/src/openvino/whisper-openvino-encoder.cpp +108 -0
- data/ext/src/openvino/whisper-openvino-encoder.h +31 -0
- data/ext/{whisper.cpp → src/whisper.cpp} +661 -492
- data/extsources.rb +6 -0
- data/lib/whisper/model/uri.rb +157 -0
- data/lib/whisper.rb +2 -0
- data/tests/helper.rb +7 -0
- data/tests/jfk_reader/.gitignore +5 -0
- data/tests/jfk_reader/extconf.rb +3 -0
- data/tests/jfk_reader/jfk_reader.c +68 -0
- data/tests/test_callback.rb +160 -0
- data/tests/test_error.rb +20 -0
- data/tests/test_model.rb +71 -0
- data/tests/test_package.rb +31 -0
- data/tests/test_params.rb +160 -0
- data/tests/test_segment.rb +83 -0
- data/tests/test_whisper.rb +211 -123
- data/whispercpp.gemspec +36 -0
- metadata +137 -11
- data/ext/ggml.c +0 -21755
@@ -0,0 +1,552 @@
|
|
1
|
+
#include "ggml-backend-impl.h"
|
2
|
+
#include "ggml-backend.h"
|
3
|
+
#include "ggml-impl.h"
|
4
|
+
#include <algorithm>
|
5
|
+
#include <codecvt>
|
6
|
+
#include <cstring>
|
7
|
+
#include <filesystem>
|
8
|
+
#include <locale>
|
9
|
+
#include <memory>
|
10
|
+
#include <string>
|
11
|
+
#include <type_traits>
|
12
|
+
#include <vector>
|
13
|
+
|
14
|
+
#ifdef _WIN32
|
15
|
+
# define WIN32_LEAN_AND_MEAN
|
16
|
+
# ifndef NOMINMAX
|
17
|
+
# define NOMINMAX
|
18
|
+
# endif
|
19
|
+
# include <windows.h>
|
20
|
+
#elif defined(__APPLE__)
|
21
|
+
# include <mach-o/dyld.h>
|
22
|
+
# include <dlfcn.h>
|
23
|
+
#else
|
24
|
+
# include <dlfcn.h>
|
25
|
+
# include <unistd.h>
|
26
|
+
#endif
|
27
|
+
|
28
|
+
// Backend registry
|
29
|
+
#ifdef GGML_USE_CPU
|
30
|
+
#include "ggml-cpu.h"
|
31
|
+
#endif
|
32
|
+
|
33
|
+
#ifdef GGML_USE_CUDA
|
34
|
+
#include "ggml-cuda.h"
|
35
|
+
#endif
|
36
|
+
|
37
|
+
#ifdef GGML_USE_METAL
|
38
|
+
#include "ggml-metal.h"
|
39
|
+
#endif
|
40
|
+
|
41
|
+
#ifdef GGML_USE_SYCL
|
42
|
+
#include "ggml-sycl.h"
|
43
|
+
#endif
|
44
|
+
|
45
|
+
#ifdef GGML_USE_VULKAN
|
46
|
+
#include "ggml-vulkan.h"
|
47
|
+
#endif
|
48
|
+
|
49
|
+
#ifdef GGML_USE_OPENCL
|
50
|
+
#include "ggml-opencl.h"
|
51
|
+
#endif
|
52
|
+
|
53
|
+
#ifdef GGML_USE_BLAS
|
54
|
+
#include "ggml-blas.h"
|
55
|
+
#endif
|
56
|
+
|
57
|
+
#ifdef GGML_USE_RPC
|
58
|
+
#include "ggml-rpc.h"
|
59
|
+
#endif
|
60
|
+
|
61
|
+
#ifdef GGML_USE_CANN
|
62
|
+
#include "ggml-cann.h"
|
63
|
+
#endif
|
64
|
+
|
65
|
+
#ifdef GGML_USE_KOMPUTE
|
66
|
+
#include "ggml-kompute.h"
|
67
|
+
#endif
|
68
|
+
|
69
|
+
#ifdef _WIN32
|
70
|
+
|
71
|
+
using dl_handle = std::remove_pointer_t<HMODULE>;
|
72
|
+
|
73
|
+
struct dl_handle_deleter {
|
74
|
+
void operator()(HMODULE handle) {
|
75
|
+
FreeLibrary(handle);
|
76
|
+
}
|
77
|
+
};
|
78
|
+
|
79
|
+
static dl_handle * dl_load_library(const std::wstring & path) {
|
80
|
+
// suppress error dialogs for missing DLLs
|
81
|
+
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
|
82
|
+
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
|
83
|
+
|
84
|
+
HMODULE handle = LoadLibraryW(path.c_str());
|
85
|
+
|
86
|
+
SetErrorMode(old_mode);
|
87
|
+
|
88
|
+
return handle;
|
89
|
+
}
|
90
|
+
|
91
|
+
static dl_handle * dl_load_library(const std::string & path) {
|
92
|
+
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
|
93
|
+
return dl_load_library(converter.from_bytes(path));
|
94
|
+
}
|
95
|
+
|
96
|
+
static void * dl_get_sym(dl_handle * handle, const char * name) {
|
97
|
+
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
|
98
|
+
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
|
99
|
+
|
100
|
+
void * p = (void *) GetProcAddress(handle, name);
|
101
|
+
|
102
|
+
SetErrorMode(old_mode);
|
103
|
+
|
104
|
+
return p;
|
105
|
+
}
|
106
|
+
|
107
|
+
#else
|
108
|
+
|
109
|
+
using dl_handle = void;
|
110
|
+
|
111
|
+
struct dl_handle_deleter {
|
112
|
+
void operator()(void * handle) {
|
113
|
+
dlclose(handle);
|
114
|
+
}
|
115
|
+
};
|
116
|
+
|
117
|
+
static void * dl_load_library(const std::string & path) {
|
118
|
+
dl_handle * handle = dlopen(path.c_str(), RTLD_NOW | RTLD_LOCAL);
|
119
|
+
|
120
|
+
return handle;
|
121
|
+
}
|
122
|
+
|
123
|
+
static void * dl_get_sym(dl_handle * handle, const char * name) {
|
124
|
+
return dlsym(handle, name);
|
125
|
+
}
|
126
|
+
|
127
|
+
#endif
|
128
|
+
|
129
|
+
using dl_handle_ptr = std::unique_ptr<dl_handle, dl_handle_deleter>;
|
130
|
+
|
131
|
+
struct ggml_backend_reg_entry {
|
132
|
+
ggml_backend_reg_t reg;
|
133
|
+
dl_handle_ptr handle;
|
134
|
+
};
|
135
|
+
|
136
|
+
struct ggml_backend_registry {
|
137
|
+
std::vector<ggml_backend_reg_entry> backends;
|
138
|
+
std::vector<ggml_backend_dev_t> devices;
|
139
|
+
|
140
|
+
ggml_backend_registry() {
|
141
|
+
#ifdef GGML_USE_CUDA
|
142
|
+
register_backend(ggml_backend_cuda_reg());
|
143
|
+
#endif
|
144
|
+
#ifdef GGML_USE_METAL
|
145
|
+
register_backend(ggml_backend_metal_reg());
|
146
|
+
#endif
|
147
|
+
#ifdef GGML_USE_SYCL
|
148
|
+
register_backend(ggml_backend_sycl_reg());
|
149
|
+
#endif
|
150
|
+
#ifdef GGML_USE_VULKAN
|
151
|
+
register_backend(ggml_backend_vk_reg());
|
152
|
+
#endif
|
153
|
+
#ifdef GGML_USE_OPENCL
|
154
|
+
register_backend(ggml_backend_opencl_reg());
|
155
|
+
#endif
|
156
|
+
#ifdef GGML_USE_CANN
|
157
|
+
register_backend(ggml_backend_cann_reg());
|
158
|
+
#endif
|
159
|
+
#ifdef GGML_USE_BLAS
|
160
|
+
register_backend(ggml_backend_blas_reg());
|
161
|
+
#endif
|
162
|
+
#ifdef GGML_USE_RPC
|
163
|
+
register_backend(ggml_backend_rpc_reg());
|
164
|
+
#endif
|
165
|
+
#ifdef GGML_USE_KOMPUTE
|
166
|
+
register_backend(ggml_backend_kompute_reg());
|
167
|
+
#endif
|
168
|
+
#ifdef GGML_USE_CPU
|
169
|
+
register_backend(ggml_backend_cpu_reg());
|
170
|
+
#endif
|
171
|
+
}
|
172
|
+
|
173
|
+
~ggml_backend_registry() {
|
174
|
+
// FIXME: backends cannot be safely unloaded without a function to destroy all the backend resources,
|
175
|
+
// since backend threads may still be running and accessing resources from the dynamic library
|
176
|
+
for (auto & entry : backends) {
|
177
|
+
if (entry.handle) {
|
178
|
+
entry.handle.release(); // NOLINT
|
179
|
+
}
|
180
|
+
}
|
181
|
+
}
|
182
|
+
|
183
|
+
void register_backend(ggml_backend_reg_t reg, dl_handle_ptr handle = nullptr) {
|
184
|
+
if (!reg) {
|
185
|
+
return;
|
186
|
+
}
|
187
|
+
|
188
|
+
#ifndef NDEBUG
|
189
|
+
GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n",
|
190
|
+
__func__, ggml_backend_reg_name(reg), ggml_backend_reg_dev_count(reg));
|
191
|
+
#endif
|
192
|
+
backends.push_back({ reg, std::move(handle) });
|
193
|
+
for (size_t i = 0; i < ggml_backend_reg_dev_count(reg); i++) {
|
194
|
+
register_device(ggml_backend_reg_dev_get(reg, i));
|
195
|
+
}
|
196
|
+
}
|
197
|
+
|
198
|
+
void register_device(ggml_backend_dev_t device) {
|
199
|
+
#ifndef NDEBUG
|
200
|
+
GGML_LOG_DEBUG("%s: registered device %s (%s)\n", __func__, ggml_backend_dev_name(device), ggml_backend_dev_description(device));
|
201
|
+
#endif
|
202
|
+
devices.push_back(device);
|
203
|
+
}
|
204
|
+
|
205
|
+
ggml_backend_reg_t load_backend(const char * path, bool silent) {
|
206
|
+
dl_handle_ptr handle { dl_load_library(path) };
|
207
|
+
if (!handle) {
|
208
|
+
if (!silent) {
|
209
|
+
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path);
|
210
|
+
}
|
211
|
+
return nullptr;
|
212
|
+
}
|
213
|
+
|
214
|
+
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
|
215
|
+
if (score_fn && score_fn() == 0) {
|
216
|
+
if (!silent) {
|
217
|
+
GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, path);
|
218
|
+
}
|
219
|
+
return nullptr;
|
220
|
+
}
|
221
|
+
|
222
|
+
auto backend_init_fn = (ggml_backend_init_t) dl_get_sym(handle.get(), "ggml_backend_init");
|
223
|
+
if (!backend_init_fn) {
|
224
|
+
if (!silent) {
|
225
|
+
GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, path);
|
226
|
+
}
|
227
|
+
return nullptr;
|
228
|
+
}
|
229
|
+
|
230
|
+
ggml_backend_reg_t reg = backend_init_fn();
|
231
|
+
if (!reg || reg->api_version != GGML_BACKEND_API_VERSION) {
|
232
|
+
if (!silent) {
|
233
|
+
if (!reg) {
|
234
|
+
GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, path);
|
235
|
+
} else {
|
236
|
+
GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
|
237
|
+
__func__, path, reg->api_version, GGML_BACKEND_API_VERSION);
|
238
|
+
}
|
239
|
+
}
|
240
|
+
return nullptr;
|
241
|
+
}
|
242
|
+
|
243
|
+
GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), path);
|
244
|
+
|
245
|
+
register_backend(reg, std::move(handle));
|
246
|
+
|
247
|
+
return reg;
|
248
|
+
}
|
249
|
+
|
250
|
+
void unload_backend(ggml_backend_reg_t reg, bool silent) {
|
251
|
+
auto it = std::find_if(backends.begin(), backends.end(),
|
252
|
+
[reg](const ggml_backend_reg_entry & entry) { return entry.reg == reg; });
|
253
|
+
|
254
|
+
if (it == backends.end()) {
|
255
|
+
if (!silent) {
|
256
|
+
GGML_LOG_ERROR("%s: backend not found\n", __func__);
|
257
|
+
}
|
258
|
+
return;
|
259
|
+
}
|
260
|
+
|
261
|
+
if (!silent) {
|
262
|
+
GGML_LOG_DEBUG("%s: unloading %s backend\n", __func__, ggml_backend_reg_name(reg));
|
263
|
+
}
|
264
|
+
|
265
|
+
// remove devices
|
266
|
+
devices.erase(
|
267
|
+
std::remove_if(devices.begin(), devices.end(),
|
268
|
+
[reg](ggml_backend_dev_t dev) { return ggml_backend_dev_backend_reg(dev) == reg; }),
|
269
|
+
devices.end());
|
270
|
+
|
271
|
+
// remove backend
|
272
|
+
backends.erase(it);
|
273
|
+
}
|
274
|
+
};
|
275
|
+
|
276
|
+
static ggml_backend_registry & get_reg() {
|
277
|
+
static ggml_backend_registry reg;
|
278
|
+
return reg;
|
279
|
+
}
|
280
|
+
|
281
|
+
// Internal API
|
282
|
+
void ggml_backend_register(ggml_backend_reg_t reg) {
|
283
|
+
get_reg().register_backend(reg);
|
284
|
+
}
|
285
|
+
|
286
|
+
void ggml_backend_device_register(ggml_backend_dev_t device) {
|
287
|
+
get_reg().register_device(device);
|
288
|
+
}
|
289
|
+
|
290
|
+
// Backend (reg) enumeration
|
291
|
+
static bool striequals(const char * a, const char * b) {
|
292
|
+
for (; *a && *b; a++, b++) {
|
293
|
+
if (std::tolower(*a) != std::tolower(*b)) {
|
294
|
+
return false;
|
295
|
+
}
|
296
|
+
}
|
297
|
+
return *a == *b;
|
298
|
+
}
|
299
|
+
|
300
|
+
size_t ggml_backend_reg_count() {
|
301
|
+
return get_reg().backends.size();
|
302
|
+
}
|
303
|
+
|
304
|
+
ggml_backend_reg_t ggml_backend_reg_get(size_t index) {
|
305
|
+
GGML_ASSERT(index < ggml_backend_reg_count());
|
306
|
+
return get_reg().backends[index].reg;
|
307
|
+
}
|
308
|
+
|
309
|
+
ggml_backend_reg_t ggml_backend_reg_by_name(const char * name) {
|
310
|
+
for (size_t i = 0; i < ggml_backend_reg_count(); i++) {
|
311
|
+
ggml_backend_reg_t reg = ggml_backend_reg_get(i);
|
312
|
+
if (striequals(ggml_backend_reg_name(reg), name)) {
|
313
|
+
return reg;
|
314
|
+
}
|
315
|
+
}
|
316
|
+
return nullptr;
|
317
|
+
}
|
318
|
+
|
319
|
+
// Device enumeration
|
320
|
+
size_t ggml_backend_dev_count() {
|
321
|
+
return get_reg().devices.size();
|
322
|
+
}
|
323
|
+
|
324
|
+
ggml_backend_dev_t ggml_backend_dev_get(size_t index) {
|
325
|
+
GGML_ASSERT(index < ggml_backend_dev_count());
|
326
|
+
return get_reg().devices[index];
|
327
|
+
}
|
328
|
+
|
329
|
+
ggml_backend_dev_t ggml_backend_dev_by_name(const char * name) {
|
330
|
+
for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
|
331
|
+
ggml_backend_dev_t dev = ggml_backend_dev_get(i);
|
332
|
+
if (striequals(ggml_backend_dev_name(dev), name)) {
|
333
|
+
return dev;
|
334
|
+
}
|
335
|
+
}
|
336
|
+
return nullptr;
|
337
|
+
}
|
338
|
+
|
339
|
+
ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_dev_type type) {
|
340
|
+
for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
|
341
|
+
ggml_backend_dev_t dev = ggml_backend_dev_get(i);
|
342
|
+
if (ggml_backend_dev_type(dev) == type) {
|
343
|
+
return dev;
|
344
|
+
}
|
345
|
+
}
|
346
|
+
return nullptr;
|
347
|
+
}
|
348
|
+
|
349
|
+
// Convenience functions
|
350
|
+
ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params) {
|
351
|
+
ggml_backend_dev_t dev = ggml_backend_dev_by_name(name);
|
352
|
+
if (!dev) {
|
353
|
+
return nullptr;
|
354
|
+
}
|
355
|
+
return ggml_backend_dev_init(dev, params);
|
356
|
+
}
|
357
|
+
|
358
|
+
ggml_backend_t ggml_backend_init_by_type(enum ggml_backend_dev_type type, const char * params) {
|
359
|
+
ggml_backend_dev_t dev = ggml_backend_dev_by_type(type);
|
360
|
+
if (!dev) {
|
361
|
+
return nullptr;
|
362
|
+
}
|
363
|
+
return ggml_backend_dev_init(dev, params);
|
364
|
+
}
|
365
|
+
|
366
|
+
ggml_backend_t ggml_backend_init_best(void) {
|
367
|
+
ggml_backend_dev_t dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU);
|
368
|
+
if (!dev) {
|
369
|
+
dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
|
370
|
+
}
|
371
|
+
if (!dev) {
|
372
|
+
return nullptr;
|
373
|
+
}
|
374
|
+
return ggml_backend_dev_init(dev, nullptr);
|
375
|
+
}
|
376
|
+
|
377
|
+
// Dynamic loading
|
378
|
+
ggml_backend_reg_t ggml_backend_load(const char * path) {
|
379
|
+
return get_reg().load_backend(path, false);
|
380
|
+
}
|
381
|
+
|
382
|
+
void ggml_backend_unload(ggml_backend_reg_t reg) {
|
383
|
+
get_reg().unload_backend(reg, true);
|
384
|
+
}
|
385
|
+
|
386
|
+
static std::string get_executable_path() {
|
387
|
+
#if defined(__APPLE__)
|
388
|
+
// get executable path
|
389
|
+
std::vector<char> path;
|
390
|
+
uint32_t size;
|
391
|
+
while (true) {
|
392
|
+
size = path.size();
|
393
|
+
if (_NSGetExecutablePath(path.data(), &size) == 0) {
|
394
|
+
break;
|
395
|
+
}
|
396
|
+
path.resize(size);
|
397
|
+
}
|
398
|
+
std::string base_path(path.data(), size);
|
399
|
+
// remove executable name
|
400
|
+
auto last_slash = base_path.find_last_of('/');
|
401
|
+
if (last_slash != std::string::npos) {
|
402
|
+
base_path = base_path.substr(0, last_slash);
|
403
|
+
}
|
404
|
+
return base_path + "/";
|
405
|
+
#elif defined(__linux__)
|
406
|
+
std::string base_path = ".";
|
407
|
+
std::vector<char> path(1024);
|
408
|
+
while (true) {
|
409
|
+
// get executable path
|
410
|
+
ssize_t len = readlink("/proc/self/exe", path.data(), path.size());
|
411
|
+
if (len == -1) {
|
412
|
+
break;
|
413
|
+
}
|
414
|
+
if (len < (ssize_t) path.size()) {
|
415
|
+
base_path = std::string(path.data(), len);
|
416
|
+
// remove executable name
|
417
|
+
auto last_slash = base_path.find_last_of('/');
|
418
|
+
if (last_slash != std::string::npos) {
|
419
|
+
base_path = base_path.substr(0, last_slash);
|
420
|
+
}
|
421
|
+
break;
|
422
|
+
}
|
423
|
+
path.resize(path.size() * 2);
|
424
|
+
}
|
425
|
+
|
426
|
+
return base_path + "/";
|
427
|
+
#elif defined(_WIN32)
|
428
|
+
std::vector<char> path(MAX_PATH);
|
429
|
+
DWORD len = GetModuleFileNameA(NULL, path.data(), path.size());
|
430
|
+
if (len == 0) {
|
431
|
+
return "";
|
432
|
+
}
|
433
|
+
std::string base_path(path.data(), len);
|
434
|
+
// remove executable name
|
435
|
+
auto last_slash = base_path.find_last_of('\\');
|
436
|
+
if (last_slash != std::string::npos) {
|
437
|
+
base_path = base_path.substr(0, last_slash);
|
438
|
+
}
|
439
|
+
return base_path + "\\";
|
440
|
+
#endif
|
441
|
+
}
|
442
|
+
|
443
|
+
static std::string backend_filename_prefix() {
|
444
|
+
#ifdef _WIN32
|
445
|
+
return "ggml-";
|
446
|
+
#else
|
447
|
+
return "libggml-";
|
448
|
+
#endif
|
449
|
+
}
|
450
|
+
|
451
|
+
static std::string backend_filename_suffix() {
|
452
|
+
#ifdef _WIN32
|
453
|
+
return ".dll";
|
454
|
+
#else
|
455
|
+
return ".so";
|
456
|
+
#endif
|
457
|
+
}
|
458
|
+
|
459
|
+
static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
|
460
|
+
// enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
|
461
|
+
// TODO: search system paths
|
462
|
+
std::string file_prefix = backend_filename_prefix() + name + "-";
|
463
|
+
std::vector<std::string> search_paths;
|
464
|
+
if (user_search_path == nullptr) {
|
465
|
+
search_paths.push_back("./");
|
466
|
+
search_paths.push_back(get_executable_path());
|
467
|
+
} else {
|
468
|
+
#if defined(_WIN32)
|
469
|
+
search_paths.push_back(std::string(user_search_path) + "\\");
|
470
|
+
#else
|
471
|
+
search_paths.push_back(std::string(user_search_path) + "/");
|
472
|
+
#endif
|
473
|
+
}
|
474
|
+
|
475
|
+
int best_score = 0;
|
476
|
+
std::string best_path;
|
477
|
+
|
478
|
+
namespace fs = std::filesystem;
|
479
|
+
for (const auto & search_path : search_paths) {
|
480
|
+
if (!fs::exists(search_path)) {
|
481
|
+
continue;
|
482
|
+
}
|
483
|
+
fs::directory_iterator dir_it(search_path, fs::directory_options::skip_permission_denied);
|
484
|
+
for (const auto & entry : dir_it) {
|
485
|
+
if (entry.is_regular_file()) {
|
486
|
+
std::string filename = entry.path().filename().string();
|
487
|
+
std::string ext = entry.path().extension().string();
|
488
|
+
if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
|
489
|
+
dl_handle_ptr handle { dl_load_library(entry.path().c_str()) };
|
490
|
+
if (!handle && !silent) {
|
491
|
+
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, entry.path().string().c_str());
|
492
|
+
}
|
493
|
+
if (handle) {
|
494
|
+
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
|
495
|
+
if (score_fn) {
|
496
|
+
int s = score_fn();
|
497
|
+
#ifndef NDEBUG
|
498
|
+
GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, entry.path().string().c_str(), s);
|
499
|
+
#endif
|
500
|
+
if (s > best_score) {
|
501
|
+
best_score = s;
|
502
|
+
best_path = entry.path().string();
|
503
|
+
}
|
504
|
+
} else {
|
505
|
+
if (!silent) {
|
506
|
+
GGML_LOG_INFO("%s: failed to find ggml_backend_score in %s\n", __func__, entry.path().string().c_str());
|
507
|
+
}
|
508
|
+
}
|
509
|
+
}
|
510
|
+
}
|
511
|
+
}
|
512
|
+
}
|
513
|
+
}
|
514
|
+
|
515
|
+
if (best_score == 0) {
|
516
|
+
// try to load the base backend
|
517
|
+
for (const auto & search_path : search_paths) {
|
518
|
+
std::string path = search_path + backend_filename_prefix() + name + backend_filename_suffix();
|
519
|
+
if (fs::exists(path)) {
|
520
|
+
return get_reg().load_backend(path.c_str(), silent);
|
521
|
+
}
|
522
|
+
}
|
523
|
+
return nullptr;
|
524
|
+
}
|
525
|
+
|
526
|
+
return get_reg().load_backend(best_path.c_str(), silent);
|
527
|
+
}
|
528
|
+
|
529
|
+
void ggml_backend_load_all() {
|
530
|
+
ggml_backend_load_all_from_path(nullptr);
|
531
|
+
}
|
532
|
+
|
533
|
+
void ggml_backend_load_all_from_path(const char * dir_path) {
|
534
|
+
#ifdef NDEBUG
|
535
|
+
bool silent = true;
|
536
|
+
#else
|
537
|
+
bool silent = false;
|
538
|
+
#endif
|
539
|
+
|
540
|
+
ggml_backend_load_best("blas", silent, dir_path);
|
541
|
+
ggml_backend_load_best("cann", silent, dir_path);
|
542
|
+
ggml_backend_load_best("cuda", silent, dir_path);
|
543
|
+
ggml_backend_load_best("hip", silent, dir_path);
|
544
|
+
ggml_backend_load_best("kompute", silent, dir_path);
|
545
|
+
ggml_backend_load_best("metal", silent, dir_path);
|
546
|
+
ggml_backend_load_best("rpc", silent, dir_path);
|
547
|
+
ggml_backend_load_best("sycl", silent, dir_path);
|
548
|
+
ggml_backend_load_best("vulkan", silent, dir_path);
|
549
|
+
ggml_backend_load_best("opencl", silent, dir_path);
|
550
|
+
ggml_backend_load_best("musa", silent, dir_path);
|
551
|
+
ggml_backend_load_best("cpu", silent, dir_path);
|
552
|
+
}
|