whispercpp 1.2.0.2 → 1.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +5 -0
- data/LICENSE +1 -1
- data/README.md +165 -434
- data/Rakefile +46 -86
- data/ext/.gitignore +13 -0
- data/ext/cpu.mk +9 -0
- data/ext/{dr_wav.h → examples/dr_wav.h} +3560 -1179
- data/ext/extconf.rb +185 -7
- data/ext/ggml/include/ggml-alloc.h +76 -0
- data/ext/ggml/include/ggml-backend.h +352 -0
- data/ext/ggml/include/ggml-blas.h +25 -0
- data/ext/ggml/include/ggml-cann.h +123 -0
- data/ext/ggml/include/ggml-cpp.h +38 -0
- data/ext/ggml/include/ggml-cpu.h +135 -0
- data/ext/ggml/include/ggml-cuda.h +47 -0
- data/ext/ggml/include/ggml-kompute.h +50 -0
- data/ext/ggml/include/ggml-metal.h +66 -0
- data/ext/ggml/include/ggml-opencl.h +26 -0
- data/ext/ggml/include/ggml-opt.h +216 -0
- data/ext/ggml/include/ggml-rpc.h +28 -0
- data/ext/ggml/include/ggml-sycl.h +49 -0
- data/ext/ggml/include/ggml-vulkan.h +31 -0
- data/ext/ggml/include/ggml.h +2285 -0
- data/ext/ggml/src/ggml-alloc.c +1037 -0
- data/ext/ggml/src/ggml-amx/common.h +94 -0
- data/ext/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
- data/ext/ggml/src/ggml-amx/mmq.cpp +2510 -0
- data/ext/ggml/src/ggml-amx/mmq.h +17 -0
- data/ext/ggml/src/ggml-backend-impl.h +256 -0
- data/ext/ggml/src/ggml-backend-reg.cpp +552 -0
- data/ext/ggml/src/ggml-backend.cpp +1999 -0
- data/ext/ggml/src/ggml-blas/ggml-blas.cpp +517 -0
- data/ext/ggml/src/ggml-cann/acl_tensor.cpp +175 -0
- data/ext/ggml/src/ggml-cann/acl_tensor.h +258 -0
- data/ext/ggml/src/ggml-cann/aclnn_ops.cpp +3427 -0
- data/ext/ggml/src/ggml-cann/aclnn_ops.h +592 -0
- data/ext/ggml/src/ggml-cann/common.h +286 -0
- data/ext/ggml/src/ggml-cann/ggml-cann.cpp +2188 -0
- data/ext/ggml/src/ggml-cann/kernels/ascendc_kernels.h +19 -0
- data/ext/ggml/src/ggml-cann/kernels/dup.cpp +236 -0
- data/ext/ggml/src/ggml-cann/kernels/get_row_f16.cpp +197 -0
- data/ext/ggml/src/ggml-cann/kernels/get_row_f32.cpp +190 -0
- data/ext/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +204 -0
- data/ext/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +191 -0
- data/ext/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +218 -0
- data/ext/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +216 -0
- data/ext/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +295 -0
- data/ext/ggml/src/ggml-common.h +1853 -0
- data/ext/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
- data/ext/ggml/src/ggml-cpu/amx/amx.h +8 -0
- data/ext/ggml/src/ggml-cpu/amx/common.h +91 -0
- data/ext/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
- data/ext/ggml/src/ggml-cpu/amx/mmq.h +10 -0
- data/ext/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +4262 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-impl.h +386 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.c +10835 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu.c +14123 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu.cpp +622 -0
- data/ext/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1884 -0
- data/ext/ggml/src/ggml-cpu/llamafile/sgemm.h +14 -0
- data/ext/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
- data/ext/ggml/src/ggml-cuda/vendors/hip.h +186 -0
- data/ext/ggml/src/ggml-cuda/vendors/musa.h +134 -0
- data/ext/ggml/src/ggml-impl.h +556 -0
- data/ext/ggml/src/ggml-kompute/ggml-kompute.cpp +2251 -0
- data/ext/ggml/src/ggml-metal/ggml-metal-impl.h +288 -0
- data/ext/ggml/src/ggml-metal/ggml-metal.m +4884 -0
- data/ext/ggml/src/ggml-metal/ggml-metal.metal +6732 -0
- data/ext/ggml/src/ggml-opt.cpp +854 -0
- data/ext/ggml/src/ggml-quants.c +5238 -0
- data/ext/ggml/src/ggml-quants.h +100 -0
- data/ext/ggml/src/ggml-rpc/ggml-rpc.cpp +1406 -0
- data/ext/ggml/src/ggml-sycl/common.cpp +95 -0
- data/ext/ggml/src/ggml-sycl/concat.cpp +196 -0
- data/ext/ggml/src/ggml-sycl/conv.cpp +99 -0
- data/ext/ggml/src/ggml-sycl/convert.cpp +547 -0
- data/ext/ggml/src/ggml-sycl/dmmv.cpp +1023 -0
- data/ext/ggml/src/ggml-sycl/element_wise.cpp +1030 -0
- data/ext/ggml/src/ggml-sycl/ggml-sycl.cpp +4729 -0
- data/ext/ggml/src/ggml-sycl/im2col.cpp +126 -0
- data/ext/ggml/src/ggml-sycl/mmq.cpp +3031 -0
- data/ext/ggml/src/ggml-sycl/mmvq.cpp +1015 -0
- data/ext/ggml/src/ggml-sycl/norm.cpp +378 -0
- data/ext/ggml/src/ggml-sycl/outprod.cpp +56 -0
- data/ext/ggml/src/ggml-sycl/rope.cpp +276 -0
- data/ext/ggml/src/ggml-sycl/softmax.cpp +251 -0
- data/ext/ggml/src/ggml-sycl/tsembd.cpp +72 -0
- data/ext/ggml/src/ggml-sycl/wkv6.cpp +141 -0
- data/ext/ggml/src/ggml-threading.cpp +12 -0
- data/ext/ggml/src/ggml-threading.h +14 -0
- data/ext/ggml/src/ggml-vulkan/ggml-vulkan.cpp +8657 -0
- data/ext/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +593 -0
- data/ext/ggml/src/ggml.c +7694 -0
- data/ext/include/whisper.h +672 -0
- data/ext/metal-embed.mk +17 -0
- data/ext/metal.mk +6 -0
- data/ext/ruby_whisper.cpp +1608 -159
- data/ext/ruby_whisper.h +10 -0
- data/ext/scripts/get-flags.mk +38 -0
- data/ext/src/coreml/whisper-decoder-impl.h +146 -0
- data/ext/src/coreml/whisper-decoder-impl.m +201 -0
- data/ext/src/coreml/whisper-encoder-impl.h +142 -0
- data/ext/src/coreml/whisper-encoder-impl.m +197 -0
- data/ext/src/coreml/whisper-encoder.h +26 -0
- data/ext/src/openvino/whisper-openvino-encoder.cpp +108 -0
- data/ext/src/openvino/whisper-openvino-encoder.h +31 -0
- data/ext/src/whisper.cpp +7393 -0
- data/extsources.rb +6 -0
- data/lib/whisper/model/uri.rb +157 -0
- data/lib/whisper.rb +2 -0
- data/tests/helper.rb +7 -0
- data/tests/jfk_reader/.gitignore +5 -0
- data/tests/jfk_reader/extconf.rb +3 -0
- data/tests/jfk_reader/jfk_reader.c +68 -0
- data/tests/test_callback.rb +160 -0
- data/tests/test_error.rb +20 -0
- data/tests/test_model.rb +71 -0
- data/tests/test_package.rb +31 -0
- data/tests/test_params.rb +160 -0
- data/tests/test_segment.rb +83 -0
- data/tests/test_whisper.rb +211 -123
- data/whispercpp.gemspec +36 -0
- metadata +137 -11
- data/ext/ggml.c +0 -8616
- data/ext/ggml.h +0 -748
- data/ext/whisper.cpp +0 -4829
- data/ext/whisper.h +0 -402
data/ext/extconf.rb
CHANGED
@@ -1,13 +1,10 @@
|
|
1
1
|
require 'mkmf'
|
2
|
-
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','whisper.cpp')} .")
|
3
|
-
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','whisper.h')} .")
|
4
|
-
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml.h')} .")
|
5
|
-
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml.c')} .")
|
6
|
-
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','examples','dr_wav.h')} .")
|
7
|
-
|
8
2
|
|
9
3
|
# need to use c++ compiler flags
|
10
|
-
$CXXFLAGS << ' -std=c++
|
4
|
+
$CXXFLAGS << ' -std=c++17'
|
5
|
+
|
6
|
+
$LDFLAGS << ' -lstdc++'
|
7
|
+
|
11
8
|
# Set to true when building binary gems
|
12
9
|
if enable_config('static-stdlib', false)
|
13
10
|
$LDFLAGS << ' -static-libgcc -static-libstdc++'
|
@@ -18,4 +15,185 @@ if enable_config('march-tune-native', false)
|
|
18
15
|
$CXXFLAGS << ' -march=native -mtune=native'
|
19
16
|
end
|
20
17
|
|
18
|
+
if ENV['WHISPER_METAL']
|
19
|
+
$GGML_METAL ||= true
|
20
|
+
$DEPRECATE_WARNING ||= true
|
21
|
+
end
|
22
|
+
|
23
|
+
$UNAME_S = `uname -s`.chomp
|
24
|
+
$UNAME_P = `uname -p`.chomp
|
25
|
+
$UNAME_M = `uname -m`.chomp
|
26
|
+
|
27
|
+
if $UNAME_S == 'Darwin'
|
28
|
+
unless ENV['GGML_NO_METAL']
|
29
|
+
$GGML_METAL ||= true
|
30
|
+
end
|
31
|
+
$GGML_NO_OPENMP ||= true
|
32
|
+
end
|
33
|
+
|
34
|
+
if $GGML_METAL
|
35
|
+
$GGML_METAL_EMBED_LIBRARY = true
|
36
|
+
end
|
37
|
+
|
38
|
+
$MK_CPPFLAGS = '-Iggml/include -Iggml/src -Iggml/src/ggml-cpu -Iinclude -Isrc -Iexamples'
|
39
|
+
$MK_CFLAGS = '-std=c11 -fPIC'
|
40
|
+
$MK_CXXFLAGS = '-std=c++17 -fPIC'
|
41
|
+
$MK_NVCCFLAGS = '-std=c++17'
|
42
|
+
$MK_LDFLAGS = ''
|
43
|
+
|
44
|
+
$OBJ_GGML = []
|
45
|
+
$OBJ_WHISPER = []
|
46
|
+
$OBJ_COMMON = []
|
47
|
+
$OBJ_SDL = []
|
48
|
+
|
49
|
+
$MK_CPPFLAGS << ' -D_XOPEN_SOURCE=600'
|
50
|
+
|
51
|
+
if $UNAME_S == 'Linux'
|
52
|
+
$MK_CPPFLAGS << ' -D_GNU_SOURCE'
|
53
|
+
end
|
54
|
+
|
55
|
+
if $UNAME_S == 'Darwin'
|
56
|
+
$MK_CPPFLAGS << ' -D_DARWIN_C_SOURCE'
|
57
|
+
end
|
58
|
+
|
59
|
+
if ENV['WHISPER_DEBUG']
|
60
|
+
$MK_CFLAGS << ' -O0 -g'
|
61
|
+
$MK_CXXFLAGS << ' -O0 -g'
|
62
|
+
$MK_LDFLAGS << ' -g'
|
63
|
+
$MK_NVCCFLAGS << ' -O0 -g'
|
64
|
+
else
|
65
|
+
$MK_CPPFLAGS << ' -DNDEBUG'
|
66
|
+
$MK_CFLAGS << ' -O3'
|
67
|
+
$MK_CXXFLAGS << ' -O3'
|
68
|
+
$MK_NVCCFLAGS << ' -O3'
|
69
|
+
end
|
70
|
+
|
71
|
+
$WARN_FLAGS =
|
72
|
+
' -Wall' <<
|
73
|
+
' -Wextra' <<
|
74
|
+
' -Wpedantic' <<
|
75
|
+
' -Wcast-qual' <<
|
76
|
+
' -Wno-unused-function'
|
77
|
+
|
78
|
+
$MK_CFLAGS <<
|
79
|
+
$WARN_FLAGS <<
|
80
|
+
' -Wshadow' <<
|
81
|
+
' -Wstrict-prototypes' <<
|
82
|
+
' -Wpointer-arith' <<
|
83
|
+
' -Wmissing-prototypes' <<
|
84
|
+
' -Werror=implicit-int' <<
|
85
|
+
' -Werror=implicit-function-declaration'
|
86
|
+
|
87
|
+
$MK_CXXFLAGS <<
|
88
|
+
$WARN_FLAGS <<
|
89
|
+
' -Wmissing-declarations' <<
|
90
|
+
' -Wmissing-noreturn'
|
91
|
+
|
92
|
+
unless `#{cc_command} #{$LDFLAGS} -Wl,-v 2>&1`.chomp.include? 'dyld-1015.7'
|
93
|
+
$MK_CPPFLAGS << ' -DHAVE_BUGGY_APPLE_LINKER'
|
94
|
+
end
|
95
|
+
|
96
|
+
if %w[Linux Darwin FreeBSD NetBSD OpenBSD Haiku].include? $UNAME_S
|
97
|
+
$MK_CFLAGS << ' -pthread'
|
98
|
+
$MK_CXXFLAGS << ' -pthread'
|
99
|
+
end
|
100
|
+
|
101
|
+
unless $_WIN32
|
102
|
+
$DSO_EXT = '.so'
|
103
|
+
else
|
104
|
+
$DSO_EXT = '.dll'
|
105
|
+
end
|
106
|
+
|
107
|
+
unless ENV['RISCV']
|
108
|
+
if %w[x86_64 i686 amd64].include? $UNAME_M
|
109
|
+
$HOST_CXXFLAGS ||= ''
|
110
|
+
|
111
|
+
$MK_CFLAGS << ' -march=native -mtune=native'
|
112
|
+
$HOST_CXXFLAGS << ' -march=native -mtune=native'
|
113
|
+
end
|
114
|
+
else
|
115
|
+
$MK_CFLAGS << ' -march=rv64gcv -mabi=lp64d'
|
116
|
+
$MK_CXXFLAGS << ' -march=rv64gcv -mabi=lp64d'
|
117
|
+
end
|
118
|
+
|
119
|
+
unless ENV['GGML_NO_ACCELERATE']
|
120
|
+
if $UNAME_S == 'Darwin'
|
121
|
+
$MK_CPPFLAGS << ' -DGGML_USE_ACCELERATE -DGGML_USE_BLAS -DGGML_BLAS_USE_ACCELERATE'
|
122
|
+
$MK_CPPFLAGS << ' -DACCELERATE_NEW_LAPACK'
|
123
|
+
$MK_CPPFLAGS << ' -DACCELERATE_LAPACK_ILP64'
|
124
|
+
$MK_LDFLAGS << ' -framework Accelerate'
|
125
|
+
$OBJ_GGML << 'ggml/src/ggml-blas/ggml-blas.o'
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
if ENV['GGML_OPENBLAS']
|
130
|
+
$MK_CPPFLAGS << " -DGGML_USE_BLAS #{`pkg-config --cflags-only-I openblas`.chomp}"
|
131
|
+
$MK_CFLAGS << " #{`pkg-config --cflags-only-other openblas)`.chomp}"
|
132
|
+
$MK_LDFLAGS << " #{`pkg-config --libs openblas`}"
|
133
|
+
$OBJ_GGML << 'ggml/src/ggml-blas/ggml-blas.o'
|
134
|
+
end
|
135
|
+
|
136
|
+
if ENV['GGML_OPENBLAS64']
|
137
|
+
$MK_CPPFLAGS << " -DGGML_USE_BLAS #{`pkg-config --cflags-only-I openblas64`.chomp}"
|
138
|
+
$MK_CFLAGS << " #{`pkg-config --cflags-only-other openblas64)`.chomp}"
|
139
|
+
$MK_LDFLAGS << " #{`pkg-config --libs openblas64`}"
|
140
|
+
$OBJ_GGML << 'ggml/src/ggml-blas/ggml-blas.o'
|
141
|
+
end
|
142
|
+
|
143
|
+
if $GGML_METAL
|
144
|
+
$MK_CPPFLAGS << ' -DGGML_USE_METAL'
|
145
|
+
$MK_LDFLAGS << ' -framework Foundation -framework Metal -framework MetalKit'
|
146
|
+
$OBJ_GGML << 'ggml/src/ggml-metal/ggml-metal.o'
|
147
|
+
|
148
|
+
if ENV['GGML_METAL_NDEBUG']
|
149
|
+
$MK_CPPFLAGS << ' -DGGML_METAL_NDEBUG'
|
150
|
+
end
|
151
|
+
|
152
|
+
if $GGML_METAL_EMBED_LIBRARY
|
153
|
+
$MK_CPPFLAGS << ' -DGGML_METAL_EMBED_LIBRARY'
|
154
|
+
$OBJ_GGML << 'ggml/src/ggml-metal/ggml-metal-embed.o'
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
$OBJ_GGML <<
|
159
|
+
'ggml/src/ggml.o' <<
|
160
|
+
'ggml/src/ggml-alloc.o' <<
|
161
|
+
'ggml/src/ggml-backend.o' <<
|
162
|
+
'ggml/src/ggml-backend-reg.o' <<
|
163
|
+
'ggml/src/ggml-opt.o' <<
|
164
|
+
'ggml/src/ggml-quants.o' <<
|
165
|
+
'ggml/src/ggml-threading.o' <<
|
166
|
+
'ggml/src/ggml-cpu/ggml-cpu.o' <<
|
167
|
+
'ggml/src/ggml-cpu/ggml-cpu-cpp.o' <<
|
168
|
+
'ggml/src/ggml-cpu/ggml-cpu-aarch64.o' <<
|
169
|
+
'ggml/src/ggml-cpu/ggml-cpu-hbm.o' <<
|
170
|
+
'ggml/src/ggml-cpu/ggml-cpu-quants.o' <<
|
171
|
+
'ggml/src/ggml-cpu/ggml-cpu-traits.o'
|
172
|
+
|
173
|
+
$OBJ_WHISPER <<
|
174
|
+
'src/whisper.o'
|
175
|
+
|
176
|
+
$objs = $OBJ_GGML + $OBJ_WHISPER + $OBJ_COMMON + $OBJ_SDL
|
177
|
+
$objs << "ruby_whisper.o"
|
178
|
+
|
179
|
+
$CPPFLAGS = "#{$MK_CPPFLAGS} #{$CPPFLAGS}"
|
180
|
+
$CFLAGS = "#{$CPPFLAGS} #{$MK_CFLAGS} #{$GF_CFLAGS} #{$CFLAGS}"
|
181
|
+
$BASE_CXXFLAGS = "#{$MK_CXXFLAGS} #{$CXXFLAGS}"
|
182
|
+
$CXXFLAGS = "#{$BASE_CXXFLAGS} #{$HOST_CXXFLAGS} #{$GF_CXXFLAGS} #{$CPPFLAGS}"
|
183
|
+
$NVCCFLAGS = "#{$MK_NVCCFLAGS} #{$NVCCFLAGS}"
|
184
|
+
$LDFLAGS = "#{$MK_LDFLAGS} #{$LDFLAGS}"
|
185
|
+
|
21
186
|
create_makefile('whisper')
|
187
|
+
|
188
|
+
File.open 'Makefile', 'a' do |file|
|
189
|
+
file.puts 'include scripts/get-flags.mk'
|
190
|
+
file.puts 'include cpu.mk'
|
191
|
+
|
192
|
+
if $GGML_METAL
|
193
|
+
file.puts 'include metal.mk'
|
194
|
+
|
195
|
+
if $GGML_METAL_EMBED_LIBRARY
|
196
|
+
file.puts 'include metal-embed.mk'
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
#pragma once
|
2
|
+
|
3
|
+
#include "ggml.h"
|
4
|
+
|
5
|
+
#ifdef __cplusplus
|
6
|
+
extern "C" {
|
7
|
+
#endif
|
8
|
+
|
9
|
+
typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t;
|
10
|
+
typedef struct ggml_backend_buffer * ggml_backend_buffer_t;
|
11
|
+
typedef struct ggml_backend * ggml_backend_t;
|
12
|
+
|
13
|
+
// Tensor allocator
|
14
|
+
struct ggml_tallocr {
|
15
|
+
ggml_backend_buffer_t buffer;
|
16
|
+
void * base;
|
17
|
+
size_t alignment;
|
18
|
+
size_t offset;
|
19
|
+
};
|
20
|
+
|
21
|
+
GGML_API struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer);
|
22
|
+
GGML_API void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor);
|
23
|
+
|
24
|
+
// Graph allocator
|
25
|
+
/*
|
26
|
+
Example usage:
|
27
|
+
ggml_gallocr_t galloc = ggml_gallocr_new(ggml_backend_cpu_buffer_type());
|
28
|
+
|
29
|
+
// optional: create a worst-case graph and reserve the buffers to avoid reallocations
|
30
|
+
ggml_gallocr_reserve(galloc, build_graph(max_batch));
|
31
|
+
|
32
|
+
// allocate the graph
|
33
|
+
struct ggml_cgraph * graph = build_graph(batch);
|
34
|
+
ggml_gallocr_alloc_graph(galloc, graph);
|
35
|
+
|
36
|
+
printf("compute buffer size: %zu bytes\n", ggml_gallocr_get_buffer_size(galloc, 0));
|
37
|
+
|
38
|
+
// evaluate the graph
|
39
|
+
ggml_backend_graph_compute(backend, graph);
|
40
|
+
*/
|
41
|
+
|
42
|
+
// special tensor flags for use with the graph allocator:
|
43
|
+
// ggml_set_input(): all input tensors are allocated at the beginning of the graph in non-overlapping addresses
|
44
|
+
// ggml_set_output(): output tensors are never freed and never overwritten
|
45
|
+
|
46
|
+
typedef struct ggml_gallocr * ggml_gallocr_t;
|
47
|
+
|
48
|
+
GGML_API ggml_gallocr_t ggml_gallocr_new(ggml_backend_buffer_type_t buft);
|
49
|
+
GGML_API ggml_gallocr_t ggml_gallocr_new_n(ggml_backend_buffer_type_t * bufts, int n_bufs);
|
50
|
+
GGML_API void ggml_gallocr_free(ggml_gallocr_t galloc);
|
51
|
+
|
52
|
+
// pre-allocate buffers from a measure graph - does not allocate or modify the graph
|
53
|
+
// call with a worst-case graph to avoid buffer reallocations
|
54
|
+
// not strictly required for single buffer usage: ggml_gallocr_alloc_graph will reallocate the buffers automatically if needed
|
55
|
+
// returns false if the buffer allocation failed
|
56
|
+
GGML_API bool ggml_gallocr_reserve(ggml_gallocr_t galloc, struct ggml_cgraph * graph);
|
57
|
+
GGML_API bool ggml_gallocr_reserve_n(
|
58
|
+
ggml_gallocr_t galloc,
|
59
|
+
struct ggml_cgraph * graph,
|
60
|
+
const int * node_buffer_ids,
|
61
|
+
const int * leaf_buffer_ids);
|
62
|
+
|
63
|
+
// automatic reallocation if the topology changes when using a single buffer
|
64
|
+
// returns false if using multiple buffers and a re-allocation is needed (call ggml_gallocr_reserve_n first to set the node buffers)
|
65
|
+
GGML_API bool ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, struct ggml_cgraph * graph);
|
66
|
+
|
67
|
+
GGML_API size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id);
|
68
|
+
|
69
|
+
// Utils
|
70
|
+
// Create a buffer and allocate all the tensors in a ggml_context
|
71
|
+
GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft);
|
72
|
+
GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, ggml_backend_t backend);
|
73
|
+
|
74
|
+
#ifdef __cplusplus
|
75
|
+
}
|
76
|
+
#endif
|
@@ -0,0 +1,352 @@
|
|
1
|
+
#pragma once
|
2
|
+
|
3
|
+
#include "ggml.h"
|
4
|
+
#include "ggml-alloc.h"
|
5
|
+
|
6
|
+
#ifdef GGML_BACKEND_SHARED
|
7
|
+
# if defined(_WIN32) && !defined(__MINGW32__)
|
8
|
+
# ifdef GGML_BACKEND_BUILD
|
9
|
+
# define GGML_BACKEND_API __declspec(dllexport) extern
|
10
|
+
# else
|
11
|
+
# define GGML_BACKEND_API __declspec(dllimport) extern
|
12
|
+
# endif
|
13
|
+
# else
|
14
|
+
# define GGML_BACKEND_API __attribute__ ((visibility ("default"))) extern
|
15
|
+
# endif
|
16
|
+
#else
|
17
|
+
# define GGML_BACKEND_API extern
|
18
|
+
#endif
|
19
|
+
|
20
|
+
#ifdef __cplusplus
|
21
|
+
extern "C" {
|
22
|
+
#endif
|
23
|
+
|
24
|
+
typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t;
|
25
|
+
typedef struct ggml_backend_buffer * ggml_backend_buffer_t;
|
26
|
+
typedef struct ggml_backend_event * ggml_backend_event_t;
|
27
|
+
typedef struct ggml_backend * ggml_backend_t;
|
28
|
+
typedef void * ggml_backend_graph_plan_t;
|
29
|
+
typedef struct ggml_backend_reg * ggml_backend_reg_t;
|
30
|
+
typedef struct ggml_backend_device * ggml_backend_dev_t;
|
31
|
+
|
32
|
+
|
33
|
+
//
|
34
|
+
// Backend buffer type
|
35
|
+
//
|
36
|
+
|
37
|
+
GGML_API const char * ggml_backend_buft_name (ggml_backend_buffer_type_t buft);
|
38
|
+
GGML_API ggml_backend_buffer_t ggml_backend_buft_alloc_buffer (ggml_backend_buffer_type_t buft, size_t size);
|
39
|
+
GGML_API size_t ggml_backend_buft_get_alignment (ggml_backend_buffer_type_t buft);
|
40
|
+
GGML_API size_t ggml_backend_buft_get_max_size (ggml_backend_buffer_type_t buft);
|
41
|
+
GGML_API size_t ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor);
|
42
|
+
GGML_API bool ggml_backend_buft_is_host (ggml_backend_buffer_type_t buft);
|
43
|
+
GGML_API ggml_backend_dev_t ggml_backend_buft_get_device (ggml_backend_buffer_type_t buft);
|
44
|
+
|
45
|
+
//
|
46
|
+
// Backend buffer
|
47
|
+
//
|
48
|
+
|
49
|
+
enum ggml_backend_buffer_usage {
|
50
|
+
GGML_BACKEND_BUFFER_USAGE_ANY = 0,
|
51
|
+
GGML_BACKEND_BUFFER_USAGE_WEIGHTS = 1,
|
52
|
+
GGML_BACKEND_BUFFER_USAGE_COMPUTE = 2,
|
53
|
+
};
|
54
|
+
|
55
|
+
GGML_API const char * ggml_backend_buffer_name (ggml_backend_buffer_t buffer);
|
56
|
+
GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer);
|
57
|
+
GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer);
|
58
|
+
GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer);
|
59
|
+
GGML_API void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
60
|
+
GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
|
61
|
+
GGML_API size_t ggml_backend_buffer_get_max_size (ggml_backend_buffer_t buffer);
|
62
|
+
GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
63
|
+
GGML_API void ggml_backend_buffer_clear (ggml_backend_buffer_t buffer, uint8_t value);
|
64
|
+
GGML_API bool ggml_backend_buffer_is_host (ggml_backend_buffer_t buffer);
|
65
|
+
GGML_API void ggml_backend_buffer_set_usage (ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage);
|
66
|
+
GGML_API enum ggml_backend_buffer_usage ggml_backend_buffer_get_usage (ggml_backend_buffer_t buffer);
|
67
|
+
GGML_API ggml_backend_buffer_type_t ggml_backend_buffer_get_type (ggml_backend_buffer_t buffer);
|
68
|
+
GGML_API void ggml_backend_buffer_reset (ggml_backend_buffer_t buffer);
|
69
|
+
|
70
|
+
// tensor copy between different backends
|
71
|
+
GGML_API void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst);
|
72
|
+
|
73
|
+
//
|
74
|
+
// Backend (stream)
|
75
|
+
//
|
76
|
+
|
77
|
+
GGML_API ggml_guid_t ggml_backend_guid(ggml_backend_t backend);
|
78
|
+
GGML_API const char * ggml_backend_name(ggml_backend_t backend);
|
79
|
+
GGML_API void ggml_backend_free(ggml_backend_t backend);
|
80
|
+
|
81
|
+
GGML_API ggml_backend_buffer_type_t ggml_backend_get_default_buffer_type(ggml_backend_t backend);
|
82
|
+
GGML_API ggml_backend_buffer_t ggml_backend_alloc_buffer(ggml_backend_t backend, size_t size);
|
83
|
+
GGML_API size_t ggml_backend_get_alignment(ggml_backend_t backend);
|
84
|
+
GGML_API size_t ggml_backend_get_max_size(ggml_backend_t backend);
|
85
|
+
|
86
|
+
GGML_API void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
87
|
+
GGML_API void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
88
|
+
|
89
|
+
// "offset" refers to the offset in tensor->data for setting/getting data
|
90
|
+
GGML_API void ggml_backend_tensor_set( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
91
|
+
GGML_API void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
92
|
+
GGML_API void ggml_backend_tensor_memset( struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size);
|
93
|
+
|
94
|
+
GGML_API void ggml_backend_synchronize(ggml_backend_t backend);
|
95
|
+
|
96
|
+
GGML_API ggml_backend_graph_plan_t ggml_backend_graph_plan_create(ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
97
|
+
GGML_API void ggml_backend_graph_plan_free (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
|
98
|
+
|
99
|
+
GGML_API enum ggml_status ggml_backend_graph_plan_compute (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
|
100
|
+
GGML_API enum ggml_status ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
101
|
+
GGML_API enum ggml_status ggml_backend_graph_compute_async(ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
102
|
+
|
103
|
+
// NOTE: will be removed, use device version instead
|
104
|
+
GGML_API bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op);
|
105
|
+
GGML_API bool ggml_backend_supports_buft(ggml_backend_t backend, ggml_backend_buffer_type_t buft);
|
106
|
+
GGML_API bool ggml_backend_offload_op(ggml_backend_t backend, const struct ggml_tensor * op);
|
107
|
+
|
108
|
+
// asynchronous copy
|
109
|
+
// the copy is performed after all the currently queued operations in backend_src
|
110
|
+
// backend_dst will wait for the copy to complete before performing other operations
|
111
|
+
// automatic fallback to sync copy if async is not supported
|
112
|
+
GGML_API void ggml_backend_tensor_copy_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, struct ggml_tensor * src, struct ggml_tensor * dst);
|
113
|
+
|
114
|
+
GGML_API ggml_backend_dev_t ggml_backend_get_device(ggml_backend_t backend);
|
115
|
+
|
116
|
+
//
|
117
|
+
// Events
|
118
|
+
//
|
119
|
+
|
120
|
+
GGML_API ggml_backend_event_t ggml_backend_event_new(ggml_backend_dev_t device);
|
121
|
+
GGML_API void ggml_backend_event_free(ggml_backend_event_t event);
|
122
|
+
GGML_API void ggml_backend_event_record(ggml_backend_event_t event, ggml_backend_t backend);
|
123
|
+
GGML_API void ggml_backend_event_synchronize(ggml_backend_event_t event);
|
124
|
+
GGML_API void ggml_backend_event_wait(ggml_backend_t backend, ggml_backend_event_t event);
|
125
|
+
|
126
|
+
//
|
127
|
+
// Backend device
|
128
|
+
//
|
129
|
+
|
130
|
+
enum ggml_backend_dev_type {
|
131
|
+
// CPU device using system memory
|
132
|
+
GGML_BACKEND_DEVICE_TYPE_CPU,
|
133
|
+
// GPU device using dedicated memory
|
134
|
+
GGML_BACKEND_DEVICE_TYPE_GPU,
|
135
|
+
// accelerator devices intended to be used together with the CPU backend (e.g. BLAS or AMX)
|
136
|
+
GGML_BACKEND_DEVICE_TYPE_ACCEL
|
137
|
+
};
|
138
|
+
|
139
|
+
// functionality supported by the device
|
140
|
+
struct ggml_backend_dev_caps {
|
141
|
+
// asynchronous operations
|
142
|
+
bool async;
|
143
|
+
// pinned host buffer
|
144
|
+
bool host_buffer;
|
145
|
+
// creating buffers from host ptr
|
146
|
+
bool buffer_from_host_ptr;
|
147
|
+
// event synchronization
|
148
|
+
bool events;
|
149
|
+
};
|
150
|
+
|
151
|
+
// all the device properties
|
152
|
+
struct ggml_backend_dev_props {
|
153
|
+
const char * name;
|
154
|
+
const char * description;
|
155
|
+
size_t memory_free;
|
156
|
+
size_t memory_total;
|
157
|
+
enum ggml_backend_dev_type type;
|
158
|
+
struct ggml_backend_dev_caps caps;
|
159
|
+
};
|
160
|
+
|
161
|
+
GGML_API const char * ggml_backend_dev_name(ggml_backend_dev_t device);
|
162
|
+
GGML_API const char * ggml_backend_dev_description(ggml_backend_dev_t device);
|
163
|
+
GGML_API void ggml_backend_dev_memory(ggml_backend_dev_t device, size_t * free, size_t * total);
|
164
|
+
GGML_API enum ggml_backend_dev_type ggml_backend_dev_type(ggml_backend_dev_t device);
|
165
|
+
GGML_API void ggml_backend_dev_get_props(ggml_backend_dev_t device, struct ggml_backend_dev_props * props);
|
166
|
+
GGML_API ggml_backend_reg_t ggml_backend_dev_backend_reg(ggml_backend_dev_t device);
|
167
|
+
GGML_API ggml_backend_t ggml_backend_dev_init(ggml_backend_dev_t device, const char * params);
|
168
|
+
GGML_API ggml_backend_buffer_type_t ggml_backend_dev_buffer_type(ggml_backend_dev_t device);
|
169
|
+
GGML_API ggml_backend_buffer_type_t ggml_backend_dev_host_buffer_type(ggml_backend_dev_t device);
|
170
|
+
GGML_API ggml_backend_buffer_t ggml_backend_dev_buffer_from_host_ptr(ggml_backend_dev_t device, void * ptr, size_t size, size_t max_tensor_size);
|
171
|
+
|
172
|
+
GGML_API bool ggml_backend_dev_supports_op(ggml_backend_dev_t device, const struct ggml_tensor * op);
|
173
|
+
GGML_API bool ggml_backend_dev_supports_buft(ggml_backend_dev_t device, ggml_backend_buffer_type_t buft);
|
174
|
+
GGML_API bool ggml_backend_dev_offload_op(ggml_backend_dev_t device, const struct ggml_tensor * op);
|
175
|
+
|
176
|
+
//
|
177
|
+
// Backend (reg)
|
178
|
+
//
|
179
|
+
|
180
|
+
GGML_API const char * ggml_backend_reg_name(ggml_backend_reg_t reg);
|
181
|
+
GGML_API size_t ggml_backend_reg_dev_count(ggml_backend_reg_t reg);
|
182
|
+
GGML_API ggml_backend_dev_t ggml_backend_reg_dev_get(ggml_backend_reg_t reg, size_t index);
|
183
|
+
GGML_API void * ggml_backend_reg_get_proc_address(ggml_backend_reg_t reg, const char * name);
|
184
|
+
|
185
|
+
// Common functions that may be obtained using ggml_backend_reg_get_proc_address
|
186
|
+
|
187
|
+
// Split buffer type for tensor parallelism
|
188
|
+
typedef ggml_backend_buffer_type_t (*ggml_backend_split_buffer_type_t)(int main_device, const float * tensor_split);
|
189
|
+
// Set the number of threads for the backend
|
190
|
+
typedef void (*ggml_backend_set_n_threads_t)(ggml_backend_t backend, int n_threads);
|
191
|
+
// Get additional buffer types provided by the device (returns a NULL-terminated array)
|
192
|
+
typedef ggml_backend_buffer_type_t * (*ggml_backend_dev_get_extra_bufts_t)(ggml_backend_dev_t device);
|
193
|
+
// Set the abort callback for the backend
|
194
|
+
typedef void (*ggml_backend_set_abort_callback_t)(ggml_backend_t backend, ggml_abort_callback abort_callback, void * abort_callback_data);
|
195
|
+
// Get a list of feature flags supported by the backend (returns a NULL-terminated array)
|
196
|
+
struct ggml_backend_feature {
|
197
|
+
const char * name;
|
198
|
+
const char * value;
|
199
|
+
};
|
200
|
+
typedef struct ggml_backend_feature * (*ggml_backend_get_features_t)(ggml_backend_reg_t reg);
|
201
|
+
|
202
|
+
//
|
203
|
+
// Backend registry
|
204
|
+
//
|
205
|
+
|
206
|
+
// Backend (reg) enumeration
|
207
|
+
GGML_API size_t ggml_backend_reg_count(void);
|
208
|
+
GGML_API ggml_backend_reg_t ggml_backend_reg_get(size_t index);
|
209
|
+
GGML_API ggml_backend_reg_t ggml_backend_reg_by_name(const char * name);
|
210
|
+
|
211
|
+
// Device enumeration
|
212
|
+
GGML_API size_t ggml_backend_dev_count(void);
|
213
|
+
GGML_API ggml_backend_dev_t ggml_backend_dev_get(size_t index);
|
214
|
+
GGML_API ggml_backend_dev_t ggml_backend_dev_by_name(const char * name);
|
215
|
+
GGML_API ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_dev_type type);
|
216
|
+
|
217
|
+
// Direct backend (stream) initialization
|
218
|
+
// = ggml_backend_dev_init(ggml_backend_dev_by_name(name), params)
|
219
|
+
GGML_API ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params);
|
220
|
+
// = ggml_backend_dev_init(ggml_backend_dev_by_type(type), params)
|
221
|
+
GGML_API ggml_backend_t ggml_backend_init_by_type(enum ggml_backend_dev_type type, const char * params);
|
222
|
+
// = ggml_backend_dev_init(ggml_backend_dev_by_type(GPU) OR ggml_backend_dev_by_type(CPU), NULL)
|
223
|
+
GGML_API ggml_backend_t ggml_backend_init_best(void);
|
224
|
+
|
225
|
+
// Load a backend from a dynamic library and register it
|
226
|
+
GGML_API ggml_backend_reg_t ggml_backend_load(const char * path);
|
227
|
+
// Unload a backend if loaded dynamically and unregister it
|
228
|
+
GGML_API void ggml_backend_unload(ggml_backend_reg_t reg);
|
229
|
+
// Load all known backends from dynamic libraries
|
230
|
+
GGML_API void ggml_backend_load_all(void);
|
231
|
+
GGML_API void ggml_backend_load_all_from_path(const char * dir_path);
|
232
|
+
|
233
|
+
//
|
234
|
+
// Backend scheduler
|
235
|
+
//
|
236
|
+
|
237
|
+
// The backend scheduler allows for multiple backend devices to be used together
|
238
|
+
// Handles compute buffer allocation, assignment of tensors to backends, and copying of tensors between backends
|
239
|
+
// The backends are selected based on:
|
240
|
+
// - the backend that supports the operation
|
241
|
+
// - the location of the pre-allocated tensors (e.g. the weights)
|
242
|
+
/*
|
243
|
+
Example usage:
|
244
|
+
|
245
|
+
// operations that use tensors allocated in a buffer with USAGE_WEIGHTS will be assigned
|
246
|
+
// preferrably to run on the same backend as the buffer
|
247
|
+
ggml_backend_buffer_set_usage(buf_weights, GGML_BACKEND_BUFFER_USAGE_WEIGHTS);
|
248
|
+
|
249
|
+
sched = ggml_backend_sched_new({backend_gpu, backend_gpu2, backend_cpu}, NULL, num_backends, GGML_DEFAULT_GRAPH_SIZE, false);
|
250
|
+
|
251
|
+
// initialize buffers from a max size graph (optional)
|
252
|
+
reserve_graph = build_graph(sched, max_batch_size);
|
253
|
+
|
254
|
+
// manually assign nodes to a backend (optional, should not be needed in most cases)
|
255
|
+
struct ggml_tensor * node = ggml_mul_mat(ctx, ...);
|
256
|
+
ggml_backend_sched_set_tensor_backend(sched, node, backend_gpu);
|
257
|
+
|
258
|
+
ggml_backend_sched_reserve(sched, reserve_graph);
|
259
|
+
|
260
|
+
// compute
|
261
|
+
graph = build_graph(sched); // the graph and its tensors are single-use in terms of allocation, multi-use in terms of computation
|
262
|
+
for (int i = 0; i < 10; ++i) {
|
263
|
+
ggml_backend_sched_graph_compute(sched, graph); // on the first iteration the graph is allocated automatically
|
264
|
+
}
|
265
|
+
|
266
|
+
// if there are graph inputs:
|
267
|
+
graph = build_graph(sched); // get a new graph that is not allocated (the metadata for the old graph is freed once ggml_free is called)
|
268
|
+
ggml_backend_sched_reset(sched); // clear the allocation of the previous graph
|
269
|
+
ggml_backend_sched_alloc_graph(sched, graph); // explicitly allocate the new graph but do not execute it
|
270
|
+
ggml_backend_tensor_set(input_tensor, ...); // copy data to the newly allocated graph tensors
|
271
|
+
ggml_backend_sched_graph_compute(sched, graph); // execute the graph
|
272
|
+
|
273
|
+
// as an alternative to the above it is also possible to assign the inputs to a dedicated context and
|
274
|
+
// allocate them statically via ggml_backend_alloc_ctx_tensors
|
275
|
+
}
|
276
|
+
*/
|
277
|
+
|
278
|
+
typedef struct ggml_backend_sched * ggml_backend_sched_t;
|
279
|
+
|
280
|
+
// Evaluation callback for each node in the graph (set with ggml_backend_sched_set_eval_callback)
|
281
|
+
// when ask == true, the scheduler wants to know if the user wants to observe this node
|
282
|
+
// this allows the scheduler to batch nodes together in order to evaluate them in a single call
|
283
|
+
//
|
284
|
+
// when ask == false, the scheduler is passing the node tensor to the user for observation
|
285
|
+
// if the user returns false, the scheduler will cancel the graph compute
|
286
|
+
//
|
287
|
+
typedef bool (*ggml_backend_sched_eval_callback)(struct ggml_tensor * t, bool ask, void * user_data);
|
288
|
+
|
289
|
+
// Initialize a backend scheduler, backends with low index are given priority over backends with high index
|
290
|
+
GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size, bool parallel);
|
291
|
+
GGML_API void ggml_backend_sched_free(ggml_backend_sched_t sched);
|
292
|
+
|
293
|
+
// Initialize backend buffers from a measure graph
|
294
|
+
GGML_API bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph); // returns success
|
295
|
+
|
296
|
+
GGML_API int ggml_backend_sched_get_n_backends(ggml_backend_sched_t sched);
|
297
|
+
GGML_API ggml_backend_t ggml_backend_sched_get_backend(ggml_backend_sched_t sched, int i);
|
298
|
+
|
299
|
+
// Get the number of splits of the last graph
|
300
|
+
GGML_API int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched);
|
301
|
+
GGML_API int ggml_backend_sched_get_n_copies(ggml_backend_sched_t sched);
|
302
|
+
|
303
|
+
GGML_API size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend);
|
304
|
+
|
305
|
+
GGML_API void ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend);
|
306
|
+
GGML_API ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node);
|
307
|
+
|
308
|
+
// Allocate and compute graph on the backend scheduler
|
309
|
+
GGML_API bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph); // returns success
|
310
|
+
GGML_API enum ggml_status ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
|
311
|
+
GGML_API enum ggml_status ggml_backend_sched_graph_compute_async(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
|
312
|
+
GGML_API void ggml_backend_sched_synchronize(ggml_backend_sched_t sched);
|
313
|
+
|
314
|
+
// Reset all assignments and allocators - must be called before changing the node backends or allocating a new graph.
|
315
|
+
// This in effect deallocates all tensors that were previously allocated and leaves them with dangling pointers.
|
316
|
+
// The correct way to use this API is to discard the deallocated tensors and create new ones.
|
317
|
+
GGML_API void ggml_backend_sched_reset(ggml_backend_sched_t sched);
|
318
|
+
|
319
|
+
// Set a callback to be called for each resulting node during graph compute
|
320
|
+
GGML_API void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data);
|
321
|
+
|
322
|
+
//
|
323
|
+
// Utils
|
324
|
+
//
|
325
|
+
|
326
|
+
struct ggml_backend_graph_copy {
|
327
|
+
ggml_backend_buffer_t buffer;
|
328
|
+
struct ggml_context * ctx_allocated;
|
329
|
+
struct ggml_context * ctx_unallocated;
|
330
|
+
struct ggml_cgraph * graph;
|
331
|
+
};
|
332
|
+
|
333
|
+
// Copy a graph to a different backend
|
334
|
+
GGML_API struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph);
|
335
|
+
GGML_API void ggml_backend_graph_copy_free(struct ggml_backend_graph_copy copy);
|
336
|
+
|
337
|
+
typedef bool (*ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data);
|
338
|
+
|
339
|
+
// Compare the output of two backends
|
340
|
+
GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);
|
341
|
+
|
342
|
+
// Tensor initialization
|
343
|
+
GGML_API void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
|
344
|
+
GGML_API void ggml_backend_view_init(struct ggml_tensor * tensor);
|
345
|
+
|
346
|
+
// CPU buffer types are always available
|
347
|
+
GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
|
348
|
+
GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void);
|
349
|
+
|
350
|
+
#ifdef __cplusplus
|
351
|
+
}
|
352
|
+
#endif
|
@@ -0,0 +1,25 @@
|
|
1
|
+
#pragma once
|
2
|
+
|
3
|
+
#include "ggml.h"
|
4
|
+
#include "ggml-backend.h"
|
5
|
+
|
6
|
+
|
7
|
+
#ifdef __cplusplus
|
8
|
+
extern "C" {
|
9
|
+
#endif
|
10
|
+
|
11
|
+
// backend API
|
12
|
+
GGML_BACKEND_API ggml_backend_t ggml_backend_blas_init(void);
|
13
|
+
|
14
|
+
GGML_BACKEND_API bool ggml_backend_is_blas(ggml_backend_t backend);
|
15
|
+
|
16
|
+
// number of threads used for conversion to float
|
17
|
+
// for openblas and blis, this will also set the number of threads used for blas operations
|
18
|
+
GGML_BACKEND_API void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads);
|
19
|
+
|
20
|
+
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_blas_reg(void);
|
21
|
+
|
22
|
+
|
23
|
+
#ifdef __cplusplus
|
24
|
+
}
|
25
|
+
#endif
|