llama_cpp 0.14.0 → 0.14.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/ext/llama_cpp/extconf.rb +3 -1
- data/ext/llama_cpp/llama_cpp.cpp +71 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +9 -0
- data/vendor/tmp/llama.cpp/Makefile +28 -12
- data/vendor/tmp/llama.cpp/ggml-alloc.c +45 -64
- data/vendor/tmp/llama.cpp/ggml-alloc.h +13 -5
- data/vendor/tmp/llama.cpp/ggml-backend-impl.h +14 -3
- data/vendor/tmp/llama.cpp/ggml-backend.c +358 -135
- data/vendor/tmp/llama.cpp/ggml-backend.h +41 -17
- data/vendor/tmp/llama.cpp/ggml-common.h +1830 -0
- data/vendor/tmp/llama.cpp/ggml-cuda.cu +187 -1033
- data/vendor/tmp/llama.cpp/ggml-impl.h +6 -2
- data/vendor/tmp/llama.cpp/ggml-kompute.cpp +5 -0
- data/vendor/tmp/llama.cpp/ggml-metal.m +42 -20
- data/vendor/tmp/llama.cpp/ggml-metal.metal +44 -910
- data/vendor/tmp/llama.cpp/ggml-quants.c +457 -1074
- data/vendor/tmp/llama.cpp/ggml-quants.h +27 -259
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +388 -565
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +6 -39
- data/vendor/tmp/llama.cpp/ggml.c +509 -343
- data/vendor/tmp/llama.cpp/ggml.h +61 -47
- data/vendor/tmp/llama.cpp/llama.cpp +1446 -687
- data/vendor/tmp/llama.cpp/llama.h +25 -11
- data/vendor/tmp/llama.cpp/unicode.cpp +1672 -0
- data/vendor/tmp/llama.cpp/unicode.h +16 -774
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d720138462a39f3fd9853befa19e55543a794eb4d1c379f7d9db516a4569db68
|
4
|
+
data.tar.gz: 9779852b62cf57ab208275b746ca2492921cf1f92d4ebf6be26a668f1a7bbb66
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7fa80468abc917099b58009a7a821c704989b8086026e92f8e71a1310ea7ec0449276aeb4653bdb4ddf499183c785b0513ab75b1fa6a94659fe15be2cf05190c
|
7
|
+
data.tar.gz: '091155784ead62d3ef4ec68f3b4c9f6b1fc97ef87db45327266712912501746b08df983e0d0b81b518d229d4d31f1a0d77ad36f2d7156c26141b8116049d3206'
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,14 @@
|
|
1
|
+
## [[0.14.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.14.1...v0.14.2)] - 2024-03-16
|
2
|
+
|
3
|
+
- Fix to use metal embed library on macOS.
|
4
|
+
|
5
|
+
## [[0.14.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.14.0...v0.14.1)] - 2024-03-16
|
6
|
+
|
7
|
+
- Bump llama.cpp from b2361 to b2435.
|
8
|
+
- Add constants for vocaburary type: `LLAMA_VOCAB_TYPE_NONE`.
|
9
|
+
- Add `n_ubatch` and `n_seq_max` accessors to `ContextParams`.
|
10
|
+
- Add `n_ubatch`, `n_seq_max`, `set_causal_attn`, and `synchronize` methods to `Context`.
|
11
|
+
|
1
12
|
## [[0.14.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.13.0...v0.14.0)] - 2024-03-09
|
2
13
|
|
3
14
|
**Breaking Changes**
|
data/ext/llama_cpp/extconf.rb
CHANGED
@@ -21,6 +21,8 @@ make_envs << ' LLAMA_HIPBLAS=1' if with_config('hipblas')
|
|
21
21
|
make_envs << ' LLAMA_MPI=1' if with_config('mpi')
|
22
22
|
make_envs << ' LLAMA_VULKAN=1' if with_config('vulkan')
|
23
23
|
|
24
|
+
make_envs << ' LLAMA_METAL_EMBED_LIBRARY=1' if RUBY_PLATFORM.match?(/darwin/)
|
25
|
+
|
24
26
|
Dir.chdir(LLAMA_CPP_DIR) do
|
25
27
|
_mkstdout, _mkstderr, mkstatus = Open3.capture3("make lib #{make_envs}".strip)
|
26
28
|
abort('Failed to build llama.cpp.') unless mkstatus.success?
|
@@ -33,8 +35,8 @@ if RUBY_PLATFORM.match?(/darwin/)
|
|
33
35
|
Dir.chdir(VENDOR_LIB_DIR) do
|
34
36
|
_mkstdout, _mkstderr, mkstatus = Open3.capture3("install_name_tool -id #{VENDOR_LIB_DIR}/libllama.dylib libllama.dylib")
|
35
37
|
abort('Failed to set installation path for libllama.dylib.') unless mkstatus.success?
|
36
|
-
FileUtils.cp("#{LLAMA_CPP_DIR}/ggml-metal.metal", VENDOR_LIB_DIR)
|
37
38
|
end
|
39
|
+
FileUtils.cp("#{LLAMA_CPP_DIR}/ggml-metal-embed.metal", VENDOR_LIB_DIR)
|
38
40
|
end
|
39
41
|
|
40
42
|
abort('libstdc++ is not found.') unless have_library('stdc++')
|
data/ext/llama_cpp/llama_cpp.cpp
CHANGED
@@ -946,6 +946,10 @@ public:
|
|
946
946
|
rb_define_method(rb_cLLaMAContextParams, "n_ctx", RUBY_METHOD_FUNC(_llama_context_params_get_n_ctx), 0);
|
947
947
|
rb_define_method(rb_cLLaMAContextParams, "n_batch=", RUBY_METHOD_FUNC(_llama_context_params_set_n_batch), 1);
|
948
948
|
rb_define_method(rb_cLLaMAContextParams, "n_batch", RUBY_METHOD_FUNC(_llama_context_params_get_n_batch), 0);
|
949
|
+
rb_define_method(rb_cLLaMAContextParams, "n_ubatch=", RUBY_METHOD_FUNC(_llama_context_params_set_n_ubatch), 1);
|
950
|
+
rb_define_method(rb_cLLaMAContextParams, "n_ubatch", RUBY_METHOD_FUNC(_llama_context_params_get_n_ubatch), 0);
|
951
|
+
rb_define_method(rb_cLLaMAContextParams, "n_seq_max=", RUBY_METHOD_FUNC(_llama_context_params_set_n_seq_max), 1);
|
952
|
+
rb_define_method(rb_cLLaMAContextParams, "n_seq_max", RUBY_METHOD_FUNC(_llama_context_params_get_n_seq_max), 0);
|
949
953
|
rb_define_method(rb_cLLaMAContextParams, "n_threads=", RUBY_METHOD_FUNC(_llama_context_params_set_n_threads), 1);
|
950
954
|
rb_define_method(rb_cLLaMAContextParams, "n_threads", RUBY_METHOD_FUNC(_llama_context_params_get_n_threads), 0);
|
951
955
|
rb_define_method(rb_cLLaMAContextParams, "n_threads_batch=", RUBY_METHOD_FUNC(_llama_context_params_set_n_threads_batch), 1);
|
@@ -1031,6 +1035,30 @@ private:
|
|
1031
1035
|
return INT2NUM(ptr->params.n_batch);
|
1032
1036
|
}
|
1033
1037
|
|
1038
|
+
// n_ubatch
|
1039
|
+
static VALUE _llama_context_params_set_n_ubatch(VALUE self, VALUE n_ubatch) {
|
1040
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
1041
|
+
ptr->params.n_ubatch = NUM2INT(n_ubatch);
|
1042
|
+
return INT2NUM(ptr->params.n_ubatch);
|
1043
|
+
}
|
1044
|
+
|
1045
|
+
static VALUE _llama_context_params_get_n_ubatch(VALUE self) {
|
1046
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
1047
|
+
return INT2NUM(ptr->params.n_ubatch);
|
1048
|
+
}
|
1049
|
+
|
1050
|
+
// n_seq_max
|
1051
|
+
static VALUE _llama_context_params_set_n_seq_max(VALUE self, VALUE n_seq_max) {
|
1052
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
1053
|
+
ptr->params.n_seq_max = NUM2INT(n_seq_max);
|
1054
|
+
return INT2NUM(ptr->params.n_seq_max);
|
1055
|
+
}
|
1056
|
+
|
1057
|
+
static VALUE _llama_context_params_get_n_seq_max(VALUE self) {
|
1058
|
+
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
1059
|
+
return INT2NUM(ptr->params.n_seq_max);
|
1060
|
+
}
|
1061
|
+
|
1034
1062
|
// n_threads
|
1035
1063
|
static VALUE _llama_context_params_set_n_threads(VALUE self, VALUE n_threads) {
|
1036
1064
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
@@ -2019,6 +2047,8 @@ public:
|
|
2019
2047
|
rb_define_method(rb_cLLaMAContext, "embeddings_seq", RUBY_METHOD_FUNC(_llama_context_embeddings_seq), 1);
|
2020
2048
|
rb_define_method(rb_cLLaMAContext, "n_ctx", RUBY_METHOD_FUNC(_llama_context_n_ctx), 0);
|
2021
2049
|
rb_define_method(rb_cLLaMAContext, "n_batch", RUBY_METHOD_FUNC(_llama_context_n_batch), 0);
|
2050
|
+
rb_define_method(rb_cLLaMAContext, "n_ubatch", RUBY_METHOD_FUNC(_llama_context_n_ubatch), 0);
|
2051
|
+
rb_define_method(rb_cLLaMAContext, "n_seq_max", RUBY_METHOD_FUNC(_llama_context_n_seq_max), 0);
|
2022
2052
|
rb_define_method(rb_cLLaMAContext, "timings", RUBY_METHOD_FUNC(_llama_context_get_timings), 0);
|
2023
2053
|
rb_define_method(rb_cLLaMAContext, "print_timings", RUBY_METHOD_FUNC(_llama_context_print_timings), 0);
|
2024
2054
|
rb_define_method(rb_cLLaMAContext, "reset_timings", RUBY_METHOD_FUNC(_llama_context_reset_timings), 0);
|
@@ -2033,6 +2063,8 @@ public:
|
|
2033
2063
|
rb_define_method(rb_cLLaMAContext, "kv_cache_kv_cache_defrag", RUBY_METHOD_FUNC(_llama_context_kv_cache_defrag), 0);
|
2034
2064
|
rb_define_method(rb_cLLaMAContext, "kv_cache_kv_cache_update", RUBY_METHOD_FUNC(_llama_context_kv_cache_update), 0);
|
2035
2065
|
rb_define_method(rb_cLLaMAContext, "set_rng_seed", RUBY_METHOD_FUNC(_llama_context_set_rng_seed), 1);
|
2066
|
+
rb_define_method(rb_cLLaMAContext, "set_causal_attn", RUBY_METHOD_FUNC(_llama_context_set_causal_attn), 1);
|
2067
|
+
rb_define_method(rb_cLLaMAContext, "synchronize", RUBY_METHOD_FUNC(_llama_context_synchronize), 0);
|
2036
2068
|
rb_define_method(rb_cLLaMAContext, "load_session_file", RUBY_METHOD_FUNC(_llama_context_load_session_file), -1);
|
2037
2069
|
rb_define_method(rb_cLLaMAContext, "save_session_file", RUBY_METHOD_FUNC(_llama_context_save_session_file), -1);
|
2038
2070
|
rb_define_method(rb_cLLaMAContext, "sample_repetition_penalties", RUBY_METHOD_FUNC(_llama_context_sample_repetition_penalties), -1);
|
@@ -2250,6 +2282,24 @@ private:
|
|
2250
2282
|
return UINT2NUM(llama_n_batch(ptr->ctx));
|
2251
2283
|
}
|
2252
2284
|
|
2285
|
+
static VALUE _llama_context_n_ubatch(VALUE self) {
|
2286
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
2287
|
+
if (ptr->ctx == NULL) {
|
2288
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
2289
|
+
return Qnil;
|
2290
|
+
}
|
2291
|
+
return UINT2NUM(llama_n_ubatch(ptr->ctx));
|
2292
|
+
}
|
2293
|
+
|
2294
|
+
static VALUE _llama_context_n_seq_max(VALUE self) {
|
2295
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
2296
|
+
if (ptr->ctx == NULL) {
|
2297
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
2298
|
+
return Qnil;
|
2299
|
+
}
|
2300
|
+
return UINT2NUM(llama_n_seq_max(ptr->ctx));
|
2301
|
+
}
|
2302
|
+
|
2253
2303
|
static VALUE _llama_context_get_timings(VALUE self) {
|
2254
2304
|
LLaMAContextWrapper* ptr = get_llama_context(self);
|
2255
2305
|
if (ptr->ctx == NULL) {
|
@@ -2395,6 +2445,26 @@ private:
|
|
2395
2445
|
return Qnil;
|
2396
2446
|
}
|
2397
2447
|
|
2448
|
+
static VALUE _llama_context_set_causal_attn(VALUE self, VALUE causal_attn) {
|
2449
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
2450
|
+
if (ptr->ctx == NULL) {
|
2451
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
2452
|
+
return Qnil;
|
2453
|
+
}
|
2454
|
+
llama_set_causal_attn(ptr->ctx, RTEST(causal_attn) ? true : false);
|
2455
|
+
return Qnil;
|
2456
|
+
}
|
2457
|
+
|
2458
|
+
static VALUE _llama_context_synchronize(VALUE self) {
|
2459
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
2460
|
+
if (ptr->ctx == NULL) {
|
2461
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
2462
|
+
return Qnil;
|
2463
|
+
}
|
2464
|
+
llama_synchronize(ptr->ctx);
|
2465
|
+
return Qnil;
|
2466
|
+
}
|
2467
|
+
|
2398
2468
|
static VALUE _llama_context_load_session_file(int argc, VALUE* argv, VALUE self) {
|
2399
2469
|
VALUE kw_args = Qnil;
|
2400
2470
|
ID kw_table[1] = { rb_intern("session_path") };
|
@@ -3204,6 +3274,7 @@ extern "C" void Init_llama_cpp(void) {
|
|
3204
3274
|
rb_define_module_function(rb_mLLaMACpp, "supports_mlock?", rb_llama_supports_mlock, 0);
|
3205
3275
|
rb_define_module_function(rb_mLLaMACpp, "supports_gpu_offload?", rb_llama_supports_gpu_offload, 0);
|
3206
3276
|
|
3277
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_NONE", INT2NUM(LLAMA_VOCAB_TYPE_NONE));
|
3207
3278
|
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_SPM", INT2NUM(LLAMA_VOCAB_TYPE_SPM));
|
3208
3279
|
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_BPE", INT2NUM(LLAMA_VOCAB_TYPE_BPE));
|
3209
3280
|
rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_TYPE_WPM", INT2NUM(LLAMA_VOCAB_TYPE_WPM));
|
data/lib/llama_cpp/version.rb
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
# llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
4
4
|
module LLaMACpp
|
5
5
|
# The version of llama_cpp.rb you install.
|
6
|
-
VERSION = '0.14.
|
6
|
+
VERSION = '0.14.2'
|
7
7
|
|
8
8
|
# The version of llama.cpp bundled with llama_cpp.rb.
|
9
|
-
LLAMA_CPP_VERSION = '
|
9
|
+
LLAMA_CPP_VERSION = 'b2435'
|
10
10
|
end
|
data/sig/llama_cpp.rbs
CHANGED
@@ -3,6 +3,7 @@ module LLaMACpp
|
|
3
3
|
LLAMA_CPP_VERSION: String
|
4
4
|
LLAMA_DEFALUT_SEED: String
|
5
5
|
|
6
|
+
LLAMA_VOCAB_TYPE_NONE: Integer
|
6
7
|
LLAMA_VOCAB_TYPE_SPM: Integer
|
7
8
|
LLAMA_VOCAB_TYPE_BPE: Integer
|
8
9
|
LLAMA_VOCAB_TYPE_WPM: Integer
|
@@ -207,6 +208,8 @@ module LLaMACpp
|
|
207
208
|
def logits: () -> Array[Float]
|
208
209
|
def n_ctx: () -> Integer
|
209
210
|
def n_batch: () -> Integer
|
211
|
+
def n_ubatch: () -> Integer
|
212
|
+
def n_seq_max: () -> Integer
|
210
213
|
def timings: () -> ::LLaMACpp::Timings
|
211
214
|
def print_timings: () -> void
|
212
215
|
def reset_timings: () -> void
|
@@ -221,6 +224,8 @@ module LLaMACpp
|
|
221
224
|
def kv_cache_defrag: () -> void
|
222
225
|
def kv_cache_update: () -> void
|
223
226
|
def set_rng_seed: (Integer) -> void
|
227
|
+
def set_causal_attn: (bool) -> void
|
228
|
+
def synchronize: () -> void
|
224
229
|
def load_session_file: (session_path: String) -> void
|
225
230
|
def save_session_file: (session_path: String, session_tokens: Array[Integer]) -> void
|
226
231
|
def sample_repetition_penalties: (::LLaMACpp::TokenDataArray, Array[Integer], penalty_repeat: Float, penalty_freq: Float, penalty_present: Float) -> void
|
@@ -250,6 +255,10 @@ module LLaMACpp
|
|
250
255
|
def n_ctx=: (Integer) -> Integer
|
251
256
|
def n_batch: () -> Integer
|
252
257
|
def n_batch=: (Integer) -> Integer
|
258
|
+
def n_ubatch: () -> Integer
|
259
|
+
def n_ubatch=: (Integer) -> Integer
|
260
|
+
def n_seq_max: () -> Integer
|
261
|
+
def n_seq_max=: (Integer) -> Integer
|
253
262
|
def n_threads: () -> Integer
|
254
263
|
def n_threads=: (Integer) -> Integer
|
255
264
|
def n_threads_batch: () -> Integer
|
@@ -2,7 +2,7 @@
|
|
2
2
|
BUILD_TARGETS = \
|
3
3
|
main quantize quantize-stats perplexity imatrix embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml \
|
4
4
|
simple batched batched-bench save-load-state server gguf llama-bench libllava.a llava-cli baby-llama beam-search \
|
5
|
-
speculative infill tokenize benchmark-matmult parallel finetune export-lora lookahead lookup passkey tests/test-c.o
|
5
|
+
speculative infill tokenize benchmark-matmult parallel finetune export-lora lookahead lookup passkey gritlm tests/test-c.o
|
6
6
|
|
7
7
|
# Binaries only useful for tests
|
8
8
|
TEST_TARGETS = \
|
@@ -167,6 +167,10 @@ ifeq ($(UNAME_S),OpenBSD)
|
|
167
167
|
MK_CPPFLAGS += -D_BSD_SOURCE
|
168
168
|
endif
|
169
169
|
|
170
|
+
ifdef LLAMA_SCHED_MAX_COPIES
|
171
|
+
MK_CPPFLAGS += -DGGML_SCHED_MAX_COPIES=$(LLAMA_SCHED_MAX_COPIES)
|
172
|
+
endif
|
173
|
+
|
170
174
|
ifdef LLAMA_DEBUG
|
171
175
|
MK_CFLAGS += -O0 -g
|
172
176
|
MK_CXXFLAGS += -O0 -g
|
@@ -201,6 +205,10 @@ ifdef LLAMA_SERVER_VERBOSE
|
|
201
205
|
MK_CPPFLAGS += -DSERVER_VERBOSE=$(LLAMA_SERVER_VERBOSE)
|
202
206
|
endif
|
203
207
|
|
208
|
+
ifdef LLAMA_SERVER_SSL
|
209
|
+
MK_CPPFLAGS += -DCPPHTTPLIB_OPENSSL_SUPPORT
|
210
|
+
MK_LDFLAGS += -lssl -lcrypto
|
211
|
+
endif
|
204
212
|
|
205
213
|
ifdef LLAMA_CODE_COVERAGE
|
206
214
|
MK_CXXFLAGS += -fprofile-arcs -ftest-coverage -dumpbase ''
|
@@ -451,7 +459,7 @@ endif # LLAMA_CUDA_PEER_MAX_BATCH_SIZE
|
|
451
459
|
ifdef LLAMA_CUDA_CCBIN
|
452
460
|
MK_NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
|
453
461
|
endif
|
454
|
-
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
|
462
|
+
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml-common.h
|
455
463
|
ifdef JETSON_EOL_MODULE_DETECT
|
456
464
|
$(NVCC) -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c++11 -O3 $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
|
457
465
|
else
|
@@ -551,15 +559,16 @@ ggml-metal.o: ggml-metal.m ggml-metal.h
|
|
551
559
|
$(CC) $(CFLAGS) -c $< -o $@
|
552
560
|
|
553
561
|
ifdef LLAMA_METAL_EMBED_LIBRARY
|
554
|
-
ggml-metal-embed.o: ggml-metal.metal
|
562
|
+
ggml-metal-embed.o: ggml-metal.metal ggml-common.h
|
555
563
|
@echo "Embedding Metal library"
|
564
|
+
@sed -e '/#include "ggml-common.h"/r ggml-common.h' -e '/#include "ggml-common.h"/d' < ggml-metal.metal > ggml-metal-embed.metal
|
556
565
|
$(eval TEMP_ASSEMBLY=$(shell mktemp))
|
557
|
-
@echo ".section __DATA, __ggml_metallib"
|
558
|
-
@echo ".globl _ggml_metallib_start"
|
559
|
-
@echo "_ggml_metallib_start:"
|
560
|
-
@echo ".incbin \"
|
561
|
-
@echo ".globl _ggml_metallib_end"
|
562
|
-
@echo "_ggml_metallib_end:"
|
566
|
+
@echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)
|
567
|
+
@echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)
|
568
|
+
@echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY)
|
569
|
+
@echo ".incbin \"ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY)
|
570
|
+
@echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY)
|
571
|
+
@echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY)
|
563
572
|
@$(AS) $(TEMP_ASSEMBLY) -o $@
|
564
573
|
@rm -f ${TEMP_ASSEMBLY}
|
565
574
|
endif
|
@@ -628,12 +637,15 @@ ggml-alloc.o: ggml-alloc.c ggml.h ggml-alloc.h
|
|
628
637
|
ggml-backend.o: ggml-backend.c ggml.h ggml-backend.h
|
629
638
|
$(CC) $(CFLAGS) -c $< -o $@
|
630
639
|
|
631
|
-
ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h
|
640
|
+
ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h ggml-common.h
|
632
641
|
$(CC) $(CFLAGS) -c $< -o $@
|
633
642
|
|
634
|
-
|
643
|
+
unicode.o: unicode.cpp unicode.h
|
644
|
+
$(CXX) $(CXXFLAGS) -c $< -o $@
|
635
645
|
|
636
|
-
|
646
|
+
OBJS += ggml-alloc.o ggml-backend.o ggml-quants.o unicode.o
|
647
|
+
|
648
|
+
llama.o: llama.cpp unicode.h ggml.h ggml-alloc.h ggml-backend.h ggml-cuda.h ggml-metal.h llama.h
|
637
649
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
638
650
|
|
639
651
|
COMMON_H_DEPS = common/common.h common/sampling.h common/log.h
|
@@ -725,6 +737,10 @@ embedding: examples/embedding/embedding.cpp ggml.o llama.o $(C
|
|
725
737
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
726
738
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
727
739
|
|
740
|
+
gritlm: examples/gritlm/gritlm.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
741
|
+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
742
|
+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
743
|
+
|
728
744
|
save-load-state: examples/save-load-state/save-load-state.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
729
745
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
730
746
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
@@ -61,7 +61,6 @@ static bool ggml_op_can_inplace(enum ggml_op op) {
|
|
61
61
|
}
|
62
62
|
}
|
63
63
|
|
64
|
-
// TODO: GGML_PAD ?
|
65
64
|
static size_t aligned_offset(const void * buffer, size_t offset, size_t alignment) {
|
66
65
|
assert(alignment && !(alignment & (alignment - 1))); // power of 2
|
67
66
|
size_t align = (alignment - (((uintptr_t)buffer + offset) % alignment)) % alignment;
|
@@ -69,25 +68,14 @@ static size_t aligned_offset(const void * buffer, size_t offset, size_t alignmen
|
|
69
68
|
}
|
70
69
|
|
71
70
|
// tallocr
|
72
|
-
struct ggml_tallocr {
|
73
|
-
ggml_backend_buffer_t buffer;
|
74
|
-
void * base;
|
75
|
-
size_t alignment;
|
76
|
-
size_t offset;
|
77
|
-
};
|
78
|
-
|
79
|
-
ggml_tallocr_t ggml_tallocr_new(ggml_backend_buffer_t buffer) {
|
80
|
-
ggml_tallocr_t talloc = malloc(sizeof(struct ggml_tallocr));
|
81
|
-
if (talloc == NULL) {
|
82
|
-
return NULL;
|
83
|
-
}
|
84
71
|
|
72
|
+
struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer) {
|
85
73
|
void * base = ggml_backend_buffer_get_base(buffer);
|
86
74
|
size_t align = ggml_backend_buffer_get_alignment(buffer);
|
87
75
|
|
88
76
|
assert(align && !(align & (align - 1))); // power of 2
|
89
77
|
|
90
|
-
|
78
|
+
struct ggml_tallocr talloc = (struct ggml_tallocr) {
|
91
79
|
/*.buffer = */ buffer,
|
92
80
|
/*.base = */ base,
|
93
81
|
/*.alignment = */ align,
|
@@ -96,11 +84,7 @@ ggml_tallocr_t ggml_tallocr_new(ggml_backend_buffer_t buffer) {
|
|
96
84
|
return talloc;
|
97
85
|
}
|
98
86
|
|
99
|
-
void
|
100
|
-
free(talloc);
|
101
|
-
}
|
102
|
-
|
103
|
-
void ggml_tallocr_alloc(ggml_tallocr_t talloc, struct ggml_tensor * tensor) {
|
87
|
+
void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor) {
|
104
88
|
size_t size = ggml_backend_buffer_get_alloc_size(talloc->buffer, tensor);
|
105
89
|
size = GGML_PAD(size, talloc->alignment);
|
106
90
|
|
@@ -354,12 +338,16 @@ struct hash_node {
|
|
354
338
|
bool allocated;
|
355
339
|
};
|
356
340
|
|
357
|
-
//
|
358
341
|
struct tensor_alloc {
|
359
342
|
size_t offset;
|
360
343
|
size_t size_max; // 0 = pre-allocated, unused, or view
|
361
344
|
};
|
362
345
|
|
346
|
+
struct leaf_alloc {
|
347
|
+
int buffer_id;
|
348
|
+
struct tensor_alloc leaf;
|
349
|
+
};
|
350
|
+
|
363
351
|
struct node_alloc {
|
364
352
|
int buffer_id;
|
365
353
|
struct tensor_alloc dst;
|
@@ -378,7 +366,7 @@ struct ggml_gallocr {
|
|
378
366
|
struct node_alloc * node_allocs; // [n_nodes]
|
379
367
|
int n_nodes;
|
380
368
|
|
381
|
-
struct
|
369
|
+
struct leaf_alloc * leaf_allocs; // [n_leafs]
|
382
370
|
int n_leafs;
|
383
371
|
};
|
384
372
|
|
@@ -543,13 +531,20 @@ static int get_node_buffer_id(const int * node_buffer_ids, int i) {
|
|
543
531
|
return node_buffer_ids ? node_buffer_ids[i] : 0;
|
544
532
|
}
|
545
533
|
|
546
|
-
static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids) {
|
534
|
+
static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids) {
|
547
535
|
// clear hash tables
|
548
536
|
memset(galloc->hash_set.keys, 0, galloc->hash_set.size * sizeof(struct ggml_tensor *));
|
549
537
|
memset(galloc->hash_values, 0, galloc->hash_set.size * sizeof(struct hash_node));
|
550
538
|
|
539
|
+
// allocate leafs
|
540
|
+
// these may be tensors that the application is not using in the graph, but may still want to allocate for other purposes
|
541
|
+
for (int i = 0; i < graph->n_leafs; i++) {
|
542
|
+
struct ggml_tensor * leaf = graph->leafs[i];
|
543
|
+
ggml_gallocr_allocate_node(galloc, leaf, get_node_buffer_id(leaf_buffer_ids, i));
|
544
|
+
}
|
545
|
+
|
551
546
|
// count number of children and views
|
552
|
-
// allocate
|
547
|
+
// allocate other graph inputs and leafs first to avoid overwriting them
|
553
548
|
for (int i = 0; i < graph->n_nodes; i++) {
|
554
549
|
struct ggml_tensor * node = graph->nodes[i];
|
555
550
|
|
@@ -577,19 +572,6 @@ static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgr
|
|
577
572
|
}
|
578
573
|
}
|
579
574
|
|
580
|
-
// allocate the remaining leafs that are unused on the graph
|
581
|
-
// these are effectively static tensors that the application is not using in the graph, but may still want to allocate for other purposes
|
582
|
-
for (int i = 0; i < graph->n_leafs; i++) {
|
583
|
-
struct ggml_tensor * leaf = graph->leafs[i];
|
584
|
-
struct hash_node * hn = ggml_gallocr_hash_get(galloc, leaf);
|
585
|
-
|
586
|
-
if (hn->n_children == 0) {
|
587
|
-
assert(!hn->allocated);
|
588
|
-
// since buffer ids are only given for nodes, these leafs are always allocated in the first buffer
|
589
|
-
ggml_gallocr_allocate_node(galloc, leaf, 0);
|
590
|
-
}
|
591
|
-
}
|
592
|
-
|
593
575
|
// allocate tensors
|
594
576
|
for (int i = 0; i < graph->n_nodes; i++) {
|
595
577
|
struct ggml_tensor * node = graph->nodes[i];
|
@@ -652,7 +634,7 @@ static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgr
|
|
652
634
|
}
|
653
635
|
}
|
654
636
|
|
655
|
-
bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids) {
|
637
|
+
bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids) {
|
656
638
|
size_t hash_size = graph->visited_hash_table.size;
|
657
639
|
|
658
640
|
// initialize hash table
|
@@ -676,7 +658,7 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
|
|
676
658
|
}
|
677
659
|
|
678
660
|
// allocate in hash table
|
679
|
-
ggml_gallocr_alloc_graph_impl(galloc, graph, node_buffer_ids);
|
661
|
+
ggml_gallocr_alloc_graph_impl(galloc, graph, node_buffer_ids, leaf_buffer_ids);
|
680
662
|
|
681
663
|
// set the node_allocs from the hash table
|
682
664
|
if (galloc->n_nodes < graph->n_nodes) {
|
@@ -711,15 +693,16 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
|
|
711
693
|
}
|
712
694
|
if (galloc->n_leafs < graph->n_leafs) {
|
713
695
|
free(galloc->leaf_allocs);
|
714
|
-
galloc->leaf_allocs = calloc(sizeof(
|
696
|
+
galloc->leaf_allocs = calloc(sizeof(galloc->leaf_allocs[0]), graph->n_leafs);
|
715
697
|
GGML_ASSERT(galloc->leaf_allocs != NULL);
|
716
698
|
}
|
717
699
|
galloc->n_leafs = graph->n_leafs;
|
718
700
|
for (int i = 0; i < graph->n_leafs; i++) {
|
719
701
|
struct ggml_tensor * leaf = graph->leafs[i];
|
720
702
|
struct hash_node * hn = ggml_gallocr_hash_get(galloc, leaf);
|
721
|
-
galloc->leaf_allocs[i].
|
722
|
-
galloc->leaf_allocs[i].
|
703
|
+
galloc->leaf_allocs[i].buffer_id = hn->buffer_id;
|
704
|
+
galloc->leaf_allocs[i].leaf.offset = hn->offset;
|
705
|
+
galloc->leaf_allocs[i].leaf.size_max = ggml_backend_buft_get_alloc_size(galloc->bufts[hn->buffer_id], leaf);
|
723
706
|
}
|
724
707
|
|
725
708
|
// reallocate buffers if needed
|
@@ -727,7 +710,8 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
|
|
727
710
|
size_t cur_size = galloc->buffers[i] ? ggml_backend_buffer_get_size(galloc->buffers[i]) : 0;
|
728
711
|
size_t new_size = ggml_dyn_tallocr_max_size(galloc->buf_tallocs[i]);
|
729
712
|
|
730
|
-
if
|
713
|
+
// even if there are no tensors allocated in this buffer, we still need to allocate it to initialize views
|
714
|
+
if (new_size > cur_size || galloc->buffers[i] == NULL) {
|
731
715
|
#ifndef NDEBUG
|
732
716
|
fprintf(stderr, "%s: reallocating %s buffer from size %.02f MiB to %.02f MiB\n", __func__, ggml_backend_buft_name(galloc->bufts[i]), cur_size / 1024.0 / 1024.0, new_size / 1024.0 / 1024.0);
|
733
717
|
#endif
|
@@ -744,30 +728,30 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
|
|
744
728
|
}
|
745
729
|
|
746
730
|
bool ggml_gallocr_reserve(ggml_gallocr_t galloc, struct ggml_cgraph *graph) {
|
747
|
-
return ggml_gallocr_reserve_n(galloc, graph, NULL);
|
731
|
+
return ggml_gallocr_reserve_n(galloc, graph, NULL, NULL);
|
748
732
|
}
|
749
733
|
|
750
|
-
static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor *
|
751
|
-
assert(
|
734
|
+
static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor * tensor, int buffer_id, struct tensor_alloc * tensor_alloc) {
|
735
|
+
assert(tensor->data || tensor->view_src || ggml_backend_buffer_get_alloc_size(galloc->buffers[buffer_id], tensor) <= tensor_alloc->size_max);
|
752
736
|
|
753
|
-
if (
|
754
|
-
if (
|
737
|
+
if (tensor->view_src != NULL) {
|
738
|
+
if (tensor->buffer == NULL) {
|
755
739
|
assert(tensor_alloc->offset == SIZE_MAX);
|
756
|
-
if (
|
740
|
+
if (tensor->view_src->buffer == NULL) {
|
757
741
|
// this tensor was allocated without ggml-backend
|
758
742
|
return;
|
759
743
|
}
|
760
|
-
ggml_backend_view_init(galloc->buffers[buffer_id],
|
744
|
+
ggml_backend_view_init(galloc->buffers[buffer_id], tensor);
|
761
745
|
}
|
762
746
|
} else {
|
763
|
-
if (
|
747
|
+
if (tensor->data == NULL) {
|
764
748
|
assert(tensor_alloc->offset != SIZE_MAX);
|
765
|
-
assert(ggml_backend_buffer_get_alloc_size(galloc->buffers[buffer_id],
|
749
|
+
assert(ggml_backend_buffer_get_alloc_size(galloc->buffers[buffer_id], tensor) <= tensor_alloc->size_max);
|
766
750
|
void * base = ggml_backend_buffer_get_base(galloc->buffers[buffer_id]);
|
767
751
|
void * addr = (char *)base + tensor_alloc->offset;
|
768
|
-
ggml_backend_tensor_alloc(galloc->buffers[buffer_id],
|
752
|
+
ggml_backend_tensor_alloc(galloc->buffers[buffer_id], tensor, addr);
|
769
753
|
} else {
|
770
|
-
if (
|
754
|
+
if (tensor->buffer == NULL) {
|
771
755
|
// this tensor was allocated without ggml-backend
|
772
756
|
return;
|
773
757
|
}
|
@@ -843,13 +827,18 @@ bool ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, struct ggml_cgraph * graph)
|
|
843
827
|
|
844
828
|
// reset buffers
|
845
829
|
for (int i = 0; i < galloc->n_buffers; i++) {
|
846
|
-
// zero size buffers are not allocated
|
847
830
|
if (galloc->buffers[i] != NULL) {
|
848
831
|
ggml_backend_buffer_reset(galloc->buffers[i]);
|
849
832
|
}
|
850
833
|
}
|
851
834
|
|
852
835
|
// allocate the graph tensors from the previous assignments
|
836
|
+
// leafs
|
837
|
+
for (int i = 0; i < graph->n_leafs; i++) {
|
838
|
+
struct ggml_tensor * leaf = graph->leafs[i];
|
839
|
+
struct leaf_alloc * leaf_alloc = &galloc->leaf_allocs[i];
|
840
|
+
ggml_gallocr_init_tensor(galloc, leaf, leaf_alloc->buffer_id, &leaf_alloc->leaf);
|
841
|
+
}
|
853
842
|
// nodes
|
854
843
|
for (int i = 0; i < graph->n_nodes; i++) {
|
855
844
|
struct ggml_tensor * node = graph->nodes[i];
|
@@ -863,12 +852,6 @@ bool ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, struct ggml_cgraph * graph)
|
|
863
852
|
}
|
864
853
|
ggml_gallocr_init_tensor(galloc, node, node_alloc->buffer_id, &node_alloc->dst);
|
865
854
|
}
|
866
|
-
// leafs
|
867
|
-
for (int i = 0; i < graph->n_leafs; i++) {
|
868
|
-
struct ggml_tensor * leaf = graph->leafs[i];
|
869
|
-
struct tensor_alloc * leaf_alloc = &galloc->leaf_allocs[i];
|
870
|
-
ggml_gallocr_init_tensor(galloc, leaf, 0, leaf_alloc);
|
871
|
-
}
|
872
855
|
|
873
856
|
return true;
|
874
857
|
}
|
@@ -900,12 +883,12 @@ static bool alloc_tensor_range(struct ggml_context * ctx,
|
|
900
883
|
return false;
|
901
884
|
}
|
902
885
|
|
903
|
-
struct ggml_tallocr
|
886
|
+
struct ggml_tallocr tallocr = ggml_tallocr_new(buffer);
|
904
887
|
|
905
888
|
for (struct ggml_tensor * t = first; t != last; t = ggml_get_next_tensor(ctx, t)) {
|
906
889
|
if (t->data == NULL) {
|
907
890
|
if (t->view_src == NULL) {
|
908
|
-
ggml_tallocr_alloc(tallocr, t);
|
891
|
+
ggml_tallocr_alloc(&tallocr, t);
|
909
892
|
} else if (t->buffer == NULL) {
|
910
893
|
ggml_backend_view_init(buffer, t);
|
911
894
|
}
|
@@ -917,8 +900,6 @@ static bool alloc_tensor_range(struct ggml_context * ctx,
|
|
917
900
|
}
|
918
901
|
}
|
919
902
|
|
920
|
-
ggml_tallocr_free(tallocr);
|
921
|
-
|
922
903
|
*buffers = realloc(*buffers, sizeof(ggml_backend_buffer_t) * (*n_buffers + 1));
|
923
904
|
(*buffers)[(*n_buffers)++] = buffer;
|
924
905
|
|
@@ -11,11 +11,15 @@ typedef struct ggml_backend_buffer * ggml_backend_buffer_t;
|
|
11
11
|
typedef struct ggml_backend * ggml_backend_t;
|
12
12
|
|
13
13
|
// Tensor allocator
|
14
|
-
|
14
|
+
struct ggml_tallocr {
|
15
|
+
ggml_backend_buffer_t buffer;
|
16
|
+
void * base;
|
17
|
+
size_t alignment;
|
18
|
+
size_t offset;
|
19
|
+
};
|
15
20
|
|
16
|
-
GGML_API
|
17
|
-
GGML_API void
|
18
|
-
GGML_API void ggml_tallocr_alloc(ggml_tallocr_t talloc, struct ggml_tensor * tensor);
|
21
|
+
GGML_API struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer);
|
22
|
+
GGML_API void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor);
|
19
23
|
|
20
24
|
// Graph allocator
|
21
25
|
/*
|
@@ -50,7 +54,11 @@ GGML_API void ggml_gallocr_free(ggml_gallocr_t galloc);
|
|
50
54
|
// not strictly required for single buffer usage: ggml_gallocr_alloc_graph will reallocate the buffers automatically if needed
|
51
55
|
// returns false if the buffer allocation failed
|
52
56
|
GGML_API bool ggml_gallocr_reserve(ggml_gallocr_t galloc, struct ggml_cgraph * graph);
|
53
|
-
GGML_API bool ggml_gallocr_reserve_n(
|
57
|
+
GGML_API bool ggml_gallocr_reserve_n(
|
58
|
+
ggml_gallocr_t galloc,
|
59
|
+
struct ggml_cgraph * graph,
|
60
|
+
const int * node_buffer_ids,
|
61
|
+
const int * leaf_buffer_ids);
|
54
62
|
|
55
63
|
// automatic reallocation if the topology changes when using a single buffer
|
56
64
|
// returns false if using multiple buffers and a re-allocation is needed (call ggml_gallocr_reserve_n first to set the node buffers)
|
@@ -86,12 +86,12 @@ extern "C" {
|
|
86
86
|
// (optional) asynchronous tensor data access
|
87
87
|
void (*GGML_CALL set_tensor_async)(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
88
88
|
void (*GGML_CALL get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
89
|
-
bool (*GGML_CALL cpy_tensor_async)(ggml_backend_t
|
89
|
+
bool (*GGML_CALL cpy_tensor_async)(ggml_backend_t backend_src, ggml_backend_t backend_dst, const struct ggml_tensor * src, struct ggml_tensor * dst);
|
90
90
|
|
91
91
|
// (optional) complete all pending operations
|
92
92
|
void (*GGML_CALL synchronize)(ggml_backend_t backend);
|
93
93
|
|
94
|
-
//
|
94
|
+
// compute graph with a plan (not used currently)
|
95
95
|
ggml_backend_graph_plan_t (*GGML_CALL graph_plan_create) (ggml_backend_t backend, const struct ggml_cgraph * cgraph);
|
96
96
|
void (*GGML_CALL graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
|
97
97
|
|
@@ -102,16 +102,27 @@ extern "C" {
|
|
102
102
|
|
103
103
|
// check if the backend supports an operation
|
104
104
|
bool (*GGML_CALL supports_op)(ggml_backend_t backend, const struct ggml_tensor * op);
|
105
|
+
|
106
|
+
// (optional) event synchronization
|
107
|
+
ggml_backend_event_t (*GGML_CALL event_new) (ggml_backend_t backend);
|
108
|
+
void (*GGML_CALL event_free) (ggml_backend_event_t event);
|
109
|
+
void (*GGML_CALL event_record) (ggml_backend_event_t event);
|
110
|
+
void (*GGML_CALL event_wait) (ggml_backend_t backend, ggml_backend_event_t event);
|
111
|
+
void (*GGML_CALL event_synchronize) (ggml_backend_event_t event);
|
105
112
|
};
|
106
113
|
|
107
114
|
struct ggml_backend {
|
108
115
|
ggml_guid_t guid;
|
109
116
|
|
110
117
|
struct ggml_backend_i iface;
|
111
|
-
|
112
118
|
ggml_backend_context_t context;
|
113
119
|
};
|
114
120
|
|
121
|
+
struct ggml_backend_event {
|
122
|
+
ggml_backend_t backend;
|
123
|
+
void * context;
|
124
|
+
};
|
125
|
+
|
115
126
|
//
|
116
127
|
// Backend registry
|
117
128
|
//
|