@fugood/llama.node 0.3.1 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +1 -8
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/package.json +4 -2
- package/src/DetokenizeWorker.cpp +1 -1
- package/src/EmbeddingWorker.cpp +2 -2
- package/src/LlamaCompletionWorker.cpp +10 -10
- package/src/LlamaCompletionWorker.h +2 -2
- package/src/LlamaContext.cpp +14 -17
- package/src/TokenizeWorker.cpp +1 -1
- package/src/common.hpp +5 -4
- package/src/llama.cpp/.github/workflows/build.yml +137 -29
- package/src/llama.cpp/.github/workflows/close-issue.yml +5 -0
- package/src/llama.cpp/.github/workflows/docker.yml +46 -34
- package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +7 -0
- package/src/llama.cpp/.github/workflows/nix-ci.yml +7 -0
- package/src/llama.cpp/.github/workflows/python-check-requirements.yml +2 -4
- package/src/llama.cpp/.github/workflows/python-type-check.yml +3 -1
- package/src/llama.cpp/.github/workflows/server.yml +7 -0
- package/src/llama.cpp/CMakeLists.txt +26 -11
- package/src/llama.cpp/cmake/arm64-apple-clang.cmake +16 -0
- package/src/llama.cpp/common/CMakeLists.txt +10 -10
- package/src/llama.cpp/common/arg.cpp +2041 -0
- package/src/llama.cpp/common/arg.h +77 -0
- package/src/llama.cpp/common/common.cpp +523 -1861
- package/src/llama.cpp/common/common.h +234 -106
- package/src/llama.cpp/common/console.cpp +3 -0
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
- package/src/llama.cpp/common/log.cpp +401 -0
- package/src/llama.cpp/common/log.h +66 -698
- package/src/llama.cpp/common/ngram-cache.cpp +39 -36
- package/src/llama.cpp/common/ngram-cache.h +19 -19
- package/src/llama.cpp/common/sampling.cpp +356 -350
- package/src/llama.cpp/common/sampling.h +62 -139
- package/src/llama.cpp/common/stb_image.h +5990 -6398
- package/src/llama.cpp/docs/build.md +72 -17
- package/src/llama.cpp/examples/CMakeLists.txt +1 -2
- package/src/llama.cpp/examples/batched/batched.cpp +49 -65
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +42 -53
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +55 -52
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +22 -22
- package/src/llama.cpp/examples/cvector-generator/pca.hpp +3 -13
- package/src/llama.cpp/examples/embedding/embedding.cpp +147 -91
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +37 -37
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +39 -38
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +14 -39
- package/src/llama.cpp/examples/{baby-llama → gen-docs}/CMakeLists.txt +2 -2
- package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +83 -0
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +58 -39
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +46 -39
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +75 -69
- package/src/llama.cpp/examples/infill/infill.cpp +131 -192
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +276 -178
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +40 -36
- package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
- package/src/llama.cpp/examples/llava/clip.cpp +686 -150
- package/src/llama.cpp/examples/llava/clip.h +11 -2
- package/src/llama.cpp/examples/llava/llava-cli.cpp +60 -71
- package/src/llama.cpp/examples/llava/llava.cpp +146 -26
- package/src/llama.cpp/examples/llava/llava.h +2 -3
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +323 -0
- package/src/llama.cpp/examples/llava/requirements.txt +1 -0
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +55 -56
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +15 -13
- package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +34 -33
- package/src/llama.cpp/examples/lookup/lookup.cpp +60 -63
- package/src/llama.cpp/examples/main/main.cpp +216 -313
- package/src/llama.cpp/examples/parallel/parallel.cpp +58 -59
- package/src/llama.cpp/examples/passkey/passkey.cpp +53 -61
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +277 -311
- package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize/quantize.cpp +27 -9
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +12 -12
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +57 -52
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +27 -2
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +60 -46
- package/src/llama.cpp/examples/server/CMakeLists.txt +7 -18
- package/src/llama.cpp/examples/server/server.cpp +1347 -1531
- package/src/llama.cpp/examples/server/tests/requirements.txt +2 -1
- package/src/llama.cpp/examples/server/utils.hpp +396 -107
- package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/simple/simple.cpp +132 -106
- package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +197 -0
- package/src/llama.cpp/examples/speculative/speculative.cpp +153 -124
- package/src/llama.cpp/examples/sycl/run-llama2.sh +10 -19
- package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +27 -29
- package/src/llama.cpp/ggml/CMakeLists.txt +29 -12
- package/src/llama.cpp/ggml/include/ggml-alloc.h +3 -3
- package/src/llama.cpp/ggml/include/ggml-amx.h +25 -0
- package/src/llama.cpp/ggml/include/ggml-backend.h +166 -68
- package/src/llama.cpp/ggml/include/ggml-blas.h +5 -3
- package/src/llama.cpp/ggml/include/ggml-cann.h +17 -19
- package/src/llama.cpp/ggml/include/ggml-cpp.h +38 -0
- package/src/llama.cpp/ggml/include/ggml-cpu.h +177 -0
- package/src/llama.cpp/ggml/include/ggml-cuda.h +17 -17
- package/src/llama.cpp/ggml/include/ggml-kompute.h +7 -3
- package/src/llama.cpp/ggml/include/ggml-metal.h +13 -12
- package/src/llama.cpp/ggml/include/ggml-opt.h +216 -0
- package/src/llama.cpp/ggml/include/ggml-rpc.h +9 -5
- package/src/llama.cpp/ggml/include/ggml-sycl.h +18 -11
- package/src/llama.cpp/ggml/include/ggml-vulkan.h +10 -8
- package/src/llama.cpp/ggml/include/ggml.h +272 -505
- package/src/llama.cpp/ggml/src/CMakeLists.txt +69 -1110
- package/src/llama.cpp/ggml/src/ggml-aarch64.c +52 -2116
- package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -20
- package/src/llama.cpp/ggml/src/ggml-alloc.c +29 -27
- package/src/llama.cpp/ggml/src/ggml-amx/CMakeLists.txt +107 -0
- package/src/llama.cpp/ggml/src/ggml-amx/common.h +94 -0
- package/src/llama.cpp/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
- package/src/llama.cpp/ggml/src/ggml-amx/mmq.cpp +2510 -0
- package/src/llama.cpp/ggml/src/ggml-amx/mmq.h +17 -0
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +144 -81
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +195 -0
- package/src/llama.cpp/ggml/src/{ggml-backend.c → ggml-backend.cpp} +394 -635
- package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +91 -0
- package/src/llama.cpp/ggml/src/{ggml-blas.cpp → ggml-blas/ggml-blas.cpp} +217 -70
- package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +46 -0
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +4 -27
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +32 -4
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +179 -41
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +1 -0
- package/src/llama.cpp/ggml/src/{ggml-cann.cpp → ggml-cann/ggml-cann.cpp} +458 -353
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -1
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +2 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +278 -0
- package/src/llama.cpp/ggml/src/ggml-common.h +20 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +261 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.c +3560 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +30 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +371 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +10822 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +13970 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +663 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1885 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +155 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +178 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +134 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +106 -0
- package/src/llama.cpp/ggml/src/ggml-impl.h +380 -584
- package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +162 -0
- package/src/llama.cpp/ggml/src/{ggml-kompute.cpp → ggml-kompute/ggml-kompute.cpp} +233 -87
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +108 -0
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +249 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +100 -0
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +867 -0
- package/src/llama.cpp/ggml/src/ggml-quants.c +369 -9994
- package/src/llama.cpp/ggml/src/ggml-quants.h +78 -110
- package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +11 -0
- package/src/llama.cpp/ggml/src/{ggml-rpc.cpp → ggml-rpc/ggml-rpc.cpp} +560 -335
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +81 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +6 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +51 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +310 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +99 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +21 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +57 -57
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +106 -106
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +18 -25
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +1011 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +76 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +101 -0
- package/src/llama.cpp/ggml/src/{ggml-sycl.cpp → ggml-sycl/ggml-sycl.cpp} +3350 -3980
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +125 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +23 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +70 -68
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +9 -6
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +56 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +8 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +71 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +21 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +138 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-threading.cpp +12 -0
- package/src/llama.cpp/ggml/src/ggml-threading.h +12 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +78 -0
- package/src/llama.cpp/ggml/src/{ggml-vulkan.cpp → ggml-vulkan/ggml-vulkan.cpp} +2034 -1718
- package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/CMakeLists.txt +2 -0
- package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/vulkan-shaders-gen.cpp +152 -185
- package/src/llama.cpp/ggml/src/ggml.c +2075 -16579
- package/src/llama.cpp/include/llama.h +296 -285
- package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +46 -0
- package/src/llama.cpp/pocs/vdot/q8dot.cpp +4 -3
- package/src/llama.cpp/pocs/vdot/vdot.cpp +8 -7
- package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +1 -1
- package/src/llama.cpp/src/CMakeLists.txt +2 -1
- package/src/llama.cpp/src/llama-grammar.cpp +721 -122
- package/src/llama.cpp/src/llama-grammar.h +120 -15
- package/src/llama.cpp/src/llama-impl.h +156 -1
- package/src/llama.cpp/src/llama-sampling.cpp +2058 -346
- package/src/llama.cpp/src/llama-sampling.h +39 -47
- package/src/llama.cpp/src/llama-vocab.cpp +390 -127
- package/src/llama.cpp/src/llama-vocab.h +60 -20
- package/src/llama.cpp/src/llama.cpp +6215 -3263
- package/src/llama.cpp/src/unicode-data.cpp +6 -4
- package/src/llama.cpp/src/unicode-data.h +4 -4
- package/src/llama.cpp/src/unicode.cpp +15 -7
- package/src/llama.cpp/tests/CMakeLists.txt +4 -2
- package/src/llama.cpp/tests/test-arg-parser.cpp +131 -0
- package/src/llama.cpp/tests/test-backend-ops.cpp +1725 -297
- package/src/llama.cpp/tests/test-barrier.cpp +94 -0
- package/src/llama.cpp/tests/test-chat-template.cpp +9 -5
- package/src/llama.cpp/tests/test-grammar-integration.cpp +23 -38
- package/src/llama.cpp/tests/test-grammar-parser.cpp +6 -4
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +23 -8
- package/src/llama.cpp/tests/test-llama-grammar.cpp +9 -8
- package/src/llama.cpp/tests/test-log.cpp +39 -0
- package/src/llama.cpp/tests/test-opt.cpp +853 -142
- package/src/llama.cpp/tests/test-quantize-fns.cpp +28 -19
- package/src/llama.cpp/tests/test-quantize-perf.cpp +16 -14
- package/src/llama.cpp/tests/test-rope.cpp +2 -1
- package/src/llama.cpp/tests/test-sampling.cpp +226 -142
- package/src/llama.cpp/tests/test-tokenizer-0.cpp +56 -36
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +5 -5
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +5 -5
- package/patches/llama.patch +0 -22
- package/src/llama.cpp/.github/workflows/bench.yml +0 -310
- package/src/llama.cpp/common/grammar-parser.cpp +0 -536
- package/src/llama.cpp/common/grammar-parser.h +0 -29
- package/src/llama.cpp/common/train.cpp +0 -1513
- package/src/llama.cpp/common/train.h +0 -233
- package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +0 -1640
- package/src/llama.cpp/examples/benchmark/CMakeLists.txt +0 -6
- package/src/llama.cpp/examples/benchmark/benchmark-matmult.cpp +0 -275
- package/src/llama.cpp/ggml/src/llamafile/sgemm.cpp +0 -1027
- package/src/llama.cpp/tests/test-grad0.cpp +0 -1566
- /package/src/llama.cpp/ggml/{cmake → src/ggml-cpu/cmake}/FindSIMD.cmake +0 -0
- /package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.h +0 -0
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
#include "common.h"
|
|
3
|
+
#include <stdint.h>
|
|
4
|
+
|
|
5
|
+
#ifdef __cplusplus
|
|
6
|
+
extern "C" {
|
|
7
|
+
#endif
|
|
8
|
+
|
|
9
|
+
size_t ggml_backend_amx_get_alloc_size(const struct ggml_tensor * tensor);
|
|
10
|
+
|
|
11
|
+
void ggml_backend_amx_convert_weight(struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
|
12
|
+
|
|
13
|
+
void ggml_backend_amx_mul_mat(ggml_backend_amx_context * ctx, struct ggml_tensor * dst);
|
|
14
|
+
|
|
15
|
+
#ifdef __cplusplus
|
|
16
|
+
}
|
|
17
|
+
#endif
|
|
@@ -9,144 +9,207 @@ extern "C" {
|
|
|
9
9
|
#endif
|
|
10
10
|
|
|
11
11
|
//
|
|
12
|
-
// Backend buffer
|
|
12
|
+
// Backend buffer type
|
|
13
13
|
//
|
|
14
14
|
|
|
15
|
-
// buffer type
|
|
16
|
-
typedef void * ggml_backend_buffer_type_context_t;
|
|
17
|
-
|
|
18
15
|
struct ggml_backend_buffer_type_i {
|
|
19
|
-
const char * (*
|
|
16
|
+
const char * (*get_name) (ggml_backend_buffer_type_t buft);
|
|
20
17
|
// allocate a buffer of this type
|
|
21
|
-
ggml_backend_buffer_t (*
|
|
18
|
+
ggml_backend_buffer_t (*alloc_buffer) (ggml_backend_buffer_type_t buft, size_t size);
|
|
22
19
|
// tensor alignment
|
|
23
|
-
size_t (*
|
|
24
|
-
// max buffer size that can be allocated
|
|
25
|
-
size_t (*
|
|
26
|
-
// data size needed to allocate the tensor, including padding
|
|
27
|
-
size_t (*
|
|
28
|
-
// check if tensor data is in host memory
|
|
29
|
-
bool (*
|
|
20
|
+
size_t (*get_alignment) (ggml_backend_buffer_type_t buft);
|
|
21
|
+
// (optional) max buffer size that can be allocated (defaults to SIZE_MAX)
|
|
22
|
+
size_t (*get_max_size) (ggml_backend_buffer_type_t buft);
|
|
23
|
+
// (optional) data size needed to allocate the tensor, including padding (defaults to ggml_nbytes)
|
|
24
|
+
size_t (*get_alloc_size)(ggml_backend_buffer_type_t buft, const struct ggml_tensor * tensor);
|
|
25
|
+
// (optional) check if tensor data is in host memory and uses standard ggml tensor layout (defaults to false)
|
|
26
|
+
bool (*is_host) (ggml_backend_buffer_type_t buft);
|
|
30
27
|
};
|
|
31
28
|
|
|
32
29
|
struct ggml_backend_buffer_type {
|
|
33
30
|
struct ggml_backend_buffer_type_i iface;
|
|
34
|
-
|
|
31
|
+
ggml_backend_dev_t device;
|
|
32
|
+
void * context;
|
|
35
33
|
};
|
|
36
34
|
|
|
37
|
-
//
|
|
38
|
-
|
|
35
|
+
//
|
|
36
|
+
// Backend buffer
|
|
37
|
+
//
|
|
39
38
|
|
|
40
39
|
struct ggml_backend_buffer_i {
|
|
41
|
-
|
|
42
|
-
void (*
|
|
43
|
-
|
|
44
|
-
void
|
|
45
|
-
|
|
46
|
-
void (*
|
|
47
|
-
|
|
48
|
-
void (*
|
|
49
|
-
void (*
|
|
40
|
+
// (optional) free the buffer
|
|
41
|
+
void (*free_buffer) (ggml_backend_buffer_t buffer);
|
|
42
|
+
// base address of the buffer
|
|
43
|
+
void * (*get_base) (ggml_backend_buffer_t buffer);
|
|
44
|
+
// (optional) initialize a tensor in the buffer (eg. add tensor extras)
|
|
45
|
+
void (*init_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
|
46
|
+
// tensor data access
|
|
47
|
+
void (*memset_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size);
|
|
48
|
+
void (*set_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
|
49
|
+
void (*get_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
|
50
|
+
// (optional) tensor copy: dst is in the buffer, src may be in any buffer, including buffers from a different backend (return false if not supported)
|
|
51
|
+
bool (*cpy_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * src, struct ggml_tensor * dst);
|
|
52
|
+
// clear the entire buffer
|
|
53
|
+
void (*clear) (ggml_backend_buffer_t buffer, uint8_t value);
|
|
54
|
+
// (optional) reset any internal state due to tensor initialization, such as tensor extras
|
|
55
|
+
void (*reset) (ggml_backend_buffer_t buffer);
|
|
50
56
|
};
|
|
51
57
|
|
|
52
58
|
struct ggml_backend_buffer {
|
|
53
59
|
struct ggml_backend_buffer_i iface;
|
|
54
60
|
ggml_backend_buffer_type_t buft;
|
|
55
|
-
|
|
61
|
+
void * context;
|
|
56
62
|
size_t size;
|
|
57
63
|
enum ggml_backend_buffer_usage usage;
|
|
58
64
|
};
|
|
59
65
|
|
|
60
|
-
|
|
61
|
-
ggml_backend_buffer_type_t
|
|
62
|
-
struct ggml_backend_buffer_i
|
|
63
|
-
|
|
64
|
-
size_t
|
|
66
|
+
ggml_backend_buffer_t ggml_backend_buffer_init(
|
|
67
|
+
ggml_backend_buffer_type_t buft,
|
|
68
|
+
struct ggml_backend_buffer_i iface,
|
|
69
|
+
void * context,
|
|
70
|
+
size_t size);
|
|
65
71
|
|
|
66
72
|
// do not use directly, use ggml_backend_tensor_copy instead
|
|
67
73
|
bool ggml_backend_buffer_copy_tensor(const struct ggml_tensor * src, struct ggml_tensor * dst);
|
|
68
74
|
|
|
75
|
+
// multi-buffer
|
|
69
76
|
// buffer that contains a collection of buffers
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
77
|
+
ggml_backend_buffer_t ggml_backend_multi_buffer_alloc_buffer(ggml_backend_buffer_t * buffers, size_t n_buffers);
|
|
78
|
+
bool ggml_backend_buffer_is_multi_buffer(ggml_backend_buffer_t buffer);
|
|
79
|
+
void ggml_backend_multi_buffer_set_usage(ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage);
|
|
73
80
|
|
|
74
81
|
//
|
|
75
|
-
// Backend
|
|
82
|
+
// Backend (stream)
|
|
76
83
|
//
|
|
77
84
|
|
|
78
|
-
typedef void * ggml_backend_context_t;
|
|
79
|
-
|
|
80
85
|
struct ggml_backend_i {
|
|
81
|
-
const char * (*
|
|
82
|
-
|
|
83
|
-
void (*GGML_CALL free)(ggml_backend_t backend);
|
|
86
|
+
const char * (*get_name)(ggml_backend_t backend);
|
|
84
87
|
|
|
85
|
-
|
|
86
|
-
ggml_backend_buffer_type_t (*GGML_CALL get_default_buffer_type)(ggml_backend_t backend);
|
|
88
|
+
void (*free)(ggml_backend_t backend);
|
|
87
89
|
|
|
88
90
|
// (optional) asynchronous tensor data access
|
|
89
|
-
void (*
|
|
90
|
-
void (*
|
|
91
|
-
bool (*
|
|
91
|
+
void (*set_tensor_async)(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
|
92
|
+
void (*get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
|
93
|
+
bool (*cpy_tensor_async)(ggml_backend_t backend_src, ggml_backend_t backend_dst, const struct ggml_tensor * src, struct ggml_tensor * dst);
|
|
92
94
|
|
|
93
|
-
// (optional) complete all pending operations
|
|
94
|
-
void (*
|
|
95
|
+
// (optional) complete all pending operations (required if the backend supports async operations)
|
|
96
|
+
void (*synchronize)(ggml_backend_t backend);
|
|
95
97
|
|
|
96
|
-
//
|
|
97
|
-
//
|
|
98
|
-
ggml_backend_graph_plan_t (*
|
|
99
|
-
void (*
|
|
98
|
+
// (optional) graph plans (not used currently)
|
|
99
|
+
// compute graph with a plan
|
|
100
|
+
ggml_backend_graph_plan_t (*graph_plan_create) (ggml_backend_t backend, const struct ggml_cgraph * cgraph);
|
|
101
|
+
void (*graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
|
|
100
102
|
// update the plan with a new graph - this should be faster than creating a new plan when the graph has the same topology
|
|
101
|
-
void (*
|
|
103
|
+
void (*graph_plan_update) (ggml_backend_t backend, ggml_backend_graph_plan_t plan, const struct ggml_cgraph * cgraph);
|
|
102
104
|
// compute the graph with the plan
|
|
103
|
-
enum ggml_status (*
|
|
105
|
+
enum ggml_status (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
|
|
104
106
|
|
|
105
|
-
// compute graph
|
|
106
|
-
enum ggml_status
|
|
107
|
-
|
|
108
|
-
// check if the backend can compute an operation
|
|
109
|
-
bool (*GGML_CALL supports_op)(ggml_backend_t backend, const struct ggml_tensor * op);
|
|
110
|
-
|
|
111
|
-
// check if the backend can use tensors allocated in a buffer type
|
|
112
|
-
bool (*GGML_CALL supports_buft)(ggml_backend_t backend, ggml_backend_buffer_type_t buft);
|
|
113
|
-
|
|
114
|
-
// check if the backend wants to run an operation, even if the weights are allocated in a CPU buffer
|
|
115
|
-
// these should be expensive operations with large batch sizes that may benefit from running on this backend
|
|
116
|
-
// even if the weight has to be copied from the CPU temporarily
|
|
117
|
-
bool (*GGML_CALL offload_op)(ggml_backend_t backend, const struct ggml_tensor * op);
|
|
107
|
+
// compute graph (always async if supported by the backend)
|
|
108
|
+
enum ggml_status (*graph_compute) (ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
|
118
109
|
|
|
119
110
|
// (optional) event synchronization
|
|
120
|
-
//
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
void (*GGML_CALL event_record) (ggml_backend_event_t event);
|
|
125
|
-
// wait for an event on on a different backend instance
|
|
126
|
-
void (*GGML_CALL event_wait) (ggml_backend_t backend, ggml_backend_event_t event);
|
|
127
|
-
// block until an event is recorded
|
|
128
|
-
void (*GGML_CALL event_synchronize) (ggml_backend_event_t event);
|
|
111
|
+
// record an event on this stream
|
|
112
|
+
void (*event_record)(ggml_backend_t backend, ggml_backend_event_t event);
|
|
113
|
+
// wait for an event on on a different stream
|
|
114
|
+
void (*event_wait) (ggml_backend_t backend, ggml_backend_event_t event);
|
|
129
115
|
};
|
|
130
116
|
|
|
131
117
|
struct ggml_backend {
|
|
132
118
|
ggml_guid_t guid;
|
|
133
|
-
|
|
134
119
|
struct ggml_backend_i iface;
|
|
135
|
-
|
|
120
|
+
ggml_backend_dev_t device;
|
|
121
|
+
void * context;
|
|
136
122
|
};
|
|
137
123
|
|
|
138
124
|
struct ggml_backend_event {
|
|
139
|
-
|
|
125
|
+
struct ggml_backend_device * device;
|
|
126
|
+
void * context;
|
|
127
|
+
};
|
|
128
|
+
|
|
129
|
+
//
|
|
130
|
+
// Backend device
|
|
131
|
+
//
|
|
132
|
+
|
|
133
|
+
// Note: if additional properties are needed, we should add a struct with all of them
|
|
134
|
+
// the current functions to obtain the properties can remain, since they are more convenient for often used properties
|
|
135
|
+
struct ggml_backend_device_i {
|
|
136
|
+
// device name: short identifier for this device, such as "CPU" or "CUDA0"
|
|
137
|
+
const char * (*get_name)(ggml_backend_dev_t dev);
|
|
138
|
+
|
|
139
|
+
// device description: short informative description of the device, could be the model name
|
|
140
|
+
const char * (*get_description)(ggml_backend_dev_t dev);
|
|
141
|
+
|
|
142
|
+
// device memory in bytes
|
|
143
|
+
void (*get_memory)(ggml_backend_dev_t dev, size_t * free, size_t * total);
|
|
144
|
+
|
|
145
|
+
// device type
|
|
146
|
+
enum ggml_backend_dev_type (*get_type)(ggml_backend_dev_t dev);
|
|
147
|
+
|
|
148
|
+
// device properties
|
|
149
|
+
void (*get_props)(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props);
|
|
150
|
+
|
|
151
|
+
// backend (stream) initialization
|
|
152
|
+
ggml_backend_t (*init_backend)(ggml_backend_dev_t dev, const char * params);
|
|
153
|
+
|
|
154
|
+
// preferred buffer type
|
|
155
|
+
ggml_backend_buffer_type_t (*get_buffer_type)(ggml_backend_dev_t dev);
|
|
156
|
+
|
|
157
|
+
// (optional) host buffer type (in system memory, typically this is a pinned memory buffer for faster transfers between host and device)
|
|
158
|
+
ggml_backend_buffer_type_t (*get_host_buffer_type)(ggml_backend_dev_t dev);
|
|
159
|
+
|
|
160
|
+
// (optional) buffer from pointer: create a buffer from a host pointer (useful for memory mapped models and importing data from other libraries)
|
|
161
|
+
ggml_backend_buffer_t (*buffer_from_host_ptr)(ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size);
|
|
162
|
+
|
|
163
|
+
// check if the backend can compute an operation
|
|
164
|
+
bool (*supports_op)(ggml_backend_dev_t dev, const struct ggml_tensor * op);
|
|
165
|
+
|
|
166
|
+
// check if the backend can use tensors allocated in a buffer type
|
|
167
|
+
bool (*supports_buft)(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft);
|
|
168
|
+
|
|
169
|
+
// (optional) check if the backend wants to run an operation, even if the weights are allocated in an incompatible buffer
|
|
170
|
+
// these should be expensive operations that may benefit from running on this backend instead of the CPU backend
|
|
171
|
+
bool (*offload_op)(ggml_backend_dev_t dev, const struct ggml_tensor * op);
|
|
172
|
+
|
|
173
|
+
// (optional) event synchronization
|
|
174
|
+
ggml_backend_event_t (*event_new) (ggml_backend_dev_t dev);
|
|
175
|
+
void (*event_free) (ggml_backend_dev_t dev, ggml_backend_event_t event);
|
|
176
|
+
void (*event_synchronize) (ggml_backend_dev_t dev, ggml_backend_event_t event);
|
|
177
|
+
};
|
|
178
|
+
|
|
179
|
+
struct ggml_backend_device {
|
|
180
|
+
struct ggml_backend_device_i iface;
|
|
181
|
+
ggml_backend_reg_t reg;
|
|
140
182
|
void * context;
|
|
141
183
|
};
|
|
142
184
|
|
|
143
185
|
//
|
|
144
|
-
// Backend
|
|
186
|
+
// Backend (reg)
|
|
145
187
|
//
|
|
146
188
|
|
|
147
|
-
|
|
189
|
+
struct ggml_backend_reg_i {
|
|
190
|
+
const char * (*get_name)(ggml_backend_reg_t reg);
|
|
191
|
+
|
|
192
|
+
// enumerate available devices
|
|
193
|
+
size_t (*get_device_count)(ggml_backend_reg_t reg);
|
|
194
|
+
ggml_backend_dev_t (*get_device)(ggml_backend_reg_t reg, size_t index);
|
|
195
|
+
|
|
196
|
+
// (optional) get a pointer to a function in the backend
|
|
197
|
+
// backends can add custom functions that are not part of the standard ggml-backend interface
|
|
198
|
+
void * (*get_proc_address)(ggml_backend_reg_t reg, const char * name);
|
|
199
|
+
};
|
|
200
|
+
|
|
201
|
+
struct ggml_backend_reg {
|
|
202
|
+
// int api_version; // TODO: for dynamic loading
|
|
203
|
+
struct ggml_backend_reg_i iface;
|
|
204
|
+
void * context;
|
|
205
|
+
};
|
|
206
|
+
|
|
148
207
|
|
|
149
|
-
|
|
208
|
+
// Internal backend registry API
|
|
209
|
+
void ggml_backend_register(ggml_backend_reg_t reg);
|
|
210
|
+
void ggml_backend_device_register(ggml_backend_dev_t device);
|
|
211
|
+
// TODO: backends can be loaded as a dynamic library, in which case it needs to export this function
|
|
212
|
+
// typedef ggml_backend_register_t * (*ggml_backend_init)(void);
|
|
150
213
|
|
|
151
214
|
#ifdef __cplusplus
|
|
152
215
|
}
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
#include "ggml-backend-impl.h"
|
|
2
|
+
#include "ggml-backend.h"
|
|
3
|
+
#include "ggml-cpu.h"
|
|
4
|
+
#include "ggml-impl.h"
|
|
5
|
+
#include <cstring>
|
|
6
|
+
#include <vector>
|
|
7
|
+
|
|
8
|
+
// Backend registry
|
|
9
|
+
|
|
10
|
+
#ifdef GGML_USE_CUDA
|
|
11
|
+
#include "ggml-cuda.h"
|
|
12
|
+
#endif
|
|
13
|
+
|
|
14
|
+
#ifdef GGML_USE_METAL
|
|
15
|
+
#include "ggml-metal.h"
|
|
16
|
+
#endif
|
|
17
|
+
|
|
18
|
+
#ifdef GGML_USE_SYCL
|
|
19
|
+
#include "ggml-sycl.h"
|
|
20
|
+
#endif
|
|
21
|
+
|
|
22
|
+
#ifdef GGML_USE_VULKAN
|
|
23
|
+
#include "ggml-vulkan.h"
|
|
24
|
+
#endif
|
|
25
|
+
|
|
26
|
+
#ifdef GGML_USE_BLAS
|
|
27
|
+
#include "ggml-blas.h"
|
|
28
|
+
#endif
|
|
29
|
+
|
|
30
|
+
#ifdef GGML_USE_RPC
|
|
31
|
+
#include "ggml-rpc.h"
|
|
32
|
+
#endif
|
|
33
|
+
|
|
34
|
+
#ifdef GGML_USE_AMX
|
|
35
|
+
# include "ggml-amx.h"
|
|
36
|
+
#endif
|
|
37
|
+
|
|
38
|
+
#ifdef GGML_USE_CANN
|
|
39
|
+
#include "ggml-cann.h"
|
|
40
|
+
#endif
|
|
41
|
+
|
|
42
|
+
#ifdef GGML_USE_KOMPUTE
|
|
43
|
+
#include "ggml-kompute.h"
|
|
44
|
+
#endif
|
|
45
|
+
|
|
46
|
+
struct ggml_backend_registry {
|
|
47
|
+
std::vector<ggml_backend_reg_t> backends;
|
|
48
|
+
std::vector<ggml_backend_dev_t> devices;
|
|
49
|
+
|
|
50
|
+
ggml_backend_registry() {
|
|
51
|
+
#ifdef GGML_USE_CUDA
|
|
52
|
+
register_backend(ggml_backend_cuda_reg());
|
|
53
|
+
#endif
|
|
54
|
+
#ifdef GGML_USE_METAL
|
|
55
|
+
register_backend(ggml_backend_metal_reg());
|
|
56
|
+
#endif
|
|
57
|
+
#ifdef GGML_USE_SYCL
|
|
58
|
+
register_backend(ggml_backend_sycl_reg());
|
|
59
|
+
#endif
|
|
60
|
+
#ifdef GGML_USE_VULKAN
|
|
61
|
+
register_backend(ggml_backend_vk_reg());
|
|
62
|
+
#endif
|
|
63
|
+
#ifdef GGML_USE_CANN
|
|
64
|
+
register_backend(ggml_backend_cann_reg());
|
|
65
|
+
#endif
|
|
66
|
+
#ifdef GGML_USE_BLAS
|
|
67
|
+
register_backend(ggml_backend_blas_reg());
|
|
68
|
+
#endif
|
|
69
|
+
#ifdef GGML_USE_RPC
|
|
70
|
+
register_backend(ggml_backend_rpc_reg());
|
|
71
|
+
#endif
|
|
72
|
+
#ifdef GGML_USE_AMX
|
|
73
|
+
register_backend(ggml_backend_amx_reg());
|
|
74
|
+
#endif
|
|
75
|
+
#ifdef GGML_USE_KOMPUTE
|
|
76
|
+
register_backend(ggml_backend_kompute_reg());
|
|
77
|
+
#endif
|
|
78
|
+
|
|
79
|
+
register_backend(ggml_backend_cpu_reg());
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
void register_backend(ggml_backend_reg_t reg) {
|
|
83
|
+
if (!reg) {
|
|
84
|
+
return;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
#ifndef NDEBUG
|
|
88
|
+
GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n",
|
|
89
|
+
__func__, ggml_backend_reg_name(reg), ggml_backend_reg_dev_count(reg));
|
|
90
|
+
#endif
|
|
91
|
+
backends.push_back(reg);
|
|
92
|
+
for (size_t i = 0; i < ggml_backend_reg_dev_count(reg); i++) {
|
|
93
|
+
register_device(ggml_backend_reg_dev_get(reg, i));
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
void register_device(ggml_backend_dev_t device) {
|
|
98
|
+
#ifndef NDEBUG
|
|
99
|
+
GGML_LOG_DEBUG("%s: registered device %s (%s)\n", __func__, ggml_backend_dev_name(device), ggml_backend_dev_description(device));
|
|
100
|
+
#endif
|
|
101
|
+
devices.push_back(device);
|
|
102
|
+
}
|
|
103
|
+
};
|
|
104
|
+
|
|
105
|
+
static ggml_backend_registry & get_reg() {
|
|
106
|
+
static ggml_backend_registry reg;
|
|
107
|
+
return reg;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// Internal API
|
|
111
|
+
void ggml_backend_register(ggml_backend_reg_t reg) {
|
|
112
|
+
get_reg().register_backend(reg);
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
void ggml_backend_device_register(ggml_backend_dev_t device) {
|
|
116
|
+
get_reg().register_device(device);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// Backend (reg) enumeration
|
|
120
|
+
size_t ggml_backend_reg_count() {
|
|
121
|
+
return get_reg().backends.size();
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
ggml_backend_reg_t ggml_backend_reg_get(size_t index) {
|
|
125
|
+
GGML_ASSERT(index < ggml_backend_reg_count());
|
|
126
|
+
return get_reg().backends[index];
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
ggml_backend_reg_t ggml_backend_reg_by_name(const char * name) {
|
|
130
|
+
for (size_t i = 0; i < ggml_backend_reg_count(); i++) {
|
|
131
|
+
ggml_backend_reg_t reg = ggml_backend_reg_get(i);
|
|
132
|
+
if (std::strcmp(ggml_backend_reg_name(reg), name) == 0) {
|
|
133
|
+
return reg;
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
return NULL;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
// Device enumeration
|
|
140
|
+
size_t ggml_backend_dev_count() {
|
|
141
|
+
return get_reg().devices.size();
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
ggml_backend_dev_t ggml_backend_dev_get(size_t index) {
|
|
145
|
+
GGML_ASSERT(index < ggml_backend_dev_count());
|
|
146
|
+
return get_reg().devices[index];
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
ggml_backend_dev_t ggml_backend_dev_by_name(const char * name) {
|
|
150
|
+
for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
|
|
151
|
+
ggml_backend_dev_t dev = ggml_backend_dev_get(i);
|
|
152
|
+
if (strcmp(ggml_backend_dev_name(dev), name) == 0) {
|
|
153
|
+
return dev;
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
return NULL;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_dev_type type) {
|
|
160
|
+
for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
|
|
161
|
+
ggml_backend_dev_t dev = ggml_backend_dev_get(i);
|
|
162
|
+
if (ggml_backend_dev_type(dev) == type) {
|
|
163
|
+
return dev;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
return NULL;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// Convenience functions
|
|
170
|
+
ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params) {
|
|
171
|
+
ggml_backend_dev_t dev = ggml_backend_dev_by_name(name);
|
|
172
|
+
if (!dev) {
|
|
173
|
+
return NULL;
|
|
174
|
+
}
|
|
175
|
+
return ggml_backend_dev_init(dev, params);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
ggml_backend_t ggml_backend_init_by_type(enum ggml_backend_dev_type type, const char * params) {
|
|
179
|
+
ggml_backend_dev_t dev = ggml_backend_dev_by_type(type);
|
|
180
|
+
if (!dev) {
|
|
181
|
+
return NULL;
|
|
182
|
+
}
|
|
183
|
+
return ggml_backend_dev_init(dev, params);
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
ggml_backend_t ggml_backend_init_best(void) {
|
|
187
|
+
ggml_backend_dev_t dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU);
|
|
188
|
+
if (!dev) {
|
|
189
|
+
dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
|
|
190
|
+
}
|
|
191
|
+
if (!dev) {
|
|
192
|
+
return NULL;
|
|
193
|
+
}
|
|
194
|
+
return ggml_backend_dev_init(dev, NULL);
|
|
195
|
+
}
|