@fugood/llama.node 0.3.2 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +2 -0
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/package.json +1 -1
- package/src/DetokenizeWorker.cpp +1 -1
- package/src/EmbeddingWorker.cpp +2 -2
- package/src/LlamaCompletionWorker.cpp +8 -8
- package/src/LlamaCompletionWorker.h +2 -2
- package/src/LlamaContext.cpp +8 -9
- package/src/TokenizeWorker.cpp +1 -1
- package/src/common.hpp +4 -4
- package/src/llama.cpp/.github/workflows/build.yml +43 -9
- package/src/llama.cpp/.github/workflows/docker.yml +3 -0
- package/src/llama.cpp/CMakeLists.txt +7 -4
- package/src/llama.cpp/cmake/arm64-apple-clang.cmake +16 -0
- package/src/llama.cpp/common/CMakeLists.txt +0 -2
- package/src/llama.cpp/common/arg.cpp +642 -607
- package/src/llama.cpp/common/arg.h +22 -22
- package/src/llama.cpp/common/common.cpp +79 -281
- package/src/llama.cpp/common/common.h +130 -100
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
- package/src/llama.cpp/common/log.cpp +50 -50
- package/src/llama.cpp/common/log.h +18 -18
- package/src/llama.cpp/common/ngram-cache.cpp +36 -36
- package/src/llama.cpp/common/ngram-cache.h +19 -19
- package/src/llama.cpp/common/sampling.cpp +116 -108
- package/src/llama.cpp/common/sampling.h +20 -20
- package/src/llama.cpp/docs/build.md +37 -17
- package/src/llama.cpp/examples/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/batched/batched.cpp +14 -14
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +10 -11
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +1 -1
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +9 -9
- package/src/llama.cpp/examples/embedding/embedding.cpp +12 -12
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +8 -8
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +5 -5
- package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +7 -7
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +18 -18
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +20 -11
- package/src/llama.cpp/examples/infill/infill.cpp +40 -86
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +42 -151
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +11 -14
- package/src/llama.cpp/examples/llava/clip.cpp +1 -0
- package/src/llama.cpp/examples/llava/llava-cli.cpp +23 -23
- package/src/llama.cpp/examples/llava/llava.cpp +37 -3
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +21 -21
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +26 -26
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +7 -7
- package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +14 -14
- package/src/llama.cpp/examples/lookup/lookup.cpp +29 -29
- package/src/llama.cpp/examples/main/main.cpp +64 -109
- package/src/llama.cpp/examples/parallel/parallel.cpp +18 -19
- package/src/llama.cpp/examples/passkey/passkey.cpp +14 -14
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +99 -120
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +10 -9
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +13 -13
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +3 -1
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +34 -17
- package/src/llama.cpp/examples/server/CMakeLists.txt +4 -13
- package/src/llama.cpp/examples/server/server.cpp +553 -691
- package/src/llama.cpp/examples/server/utils.hpp +312 -25
- package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/simple/simple.cpp +128 -96
- package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +197 -0
- package/src/llama.cpp/examples/speculative/speculative.cpp +54 -51
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +2 -2
- package/src/llama.cpp/ggml/CMakeLists.txt +15 -9
- package/src/llama.cpp/ggml/include/ggml-amx.h +25 -0
- package/src/llama.cpp/ggml/include/ggml-backend.h +46 -33
- package/src/llama.cpp/ggml/include/ggml-blas.h +5 -3
- package/src/llama.cpp/ggml/include/ggml-cann.h +9 -7
- package/src/llama.cpp/ggml/include/ggml-cpp.h +38 -0
- package/src/llama.cpp/ggml/include/ggml-cpu.h +177 -0
- package/src/llama.cpp/ggml/include/ggml-cuda.h +12 -12
- package/src/llama.cpp/ggml/include/ggml-kompute.h +7 -3
- package/src/llama.cpp/ggml/include/ggml-metal.h +11 -7
- package/src/llama.cpp/ggml/include/ggml-opt.h +216 -0
- package/src/llama.cpp/ggml/include/ggml-rpc.h +9 -5
- package/src/llama.cpp/ggml/include/ggml-sycl.h +18 -11
- package/src/llama.cpp/ggml/include/ggml-vulkan.h +10 -8
- package/src/llama.cpp/ggml/include/ggml.h +53 -393
- package/src/llama.cpp/ggml/src/CMakeLists.txt +66 -1149
- package/src/llama.cpp/ggml/src/ggml-aarch64.c +46 -3126
- package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -20
- package/src/llama.cpp/ggml/src/ggml-alloc.c +23 -27
- package/src/llama.cpp/ggml/src/ggml-amx/CMakeLists.txt +107 -0
- package/src/llama.cpp/ggml/src/ggml-amx/common.h +94 -0
- package/src/llama.cpp/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
- package/src/llama.cpp/ggml/src/ggml-amx/mmq.cpp +2510 -0
- package/src/llama.cpp/ggml/src/ggml-amx/mmq.h +17 -0
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +6 -25
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +195 -0
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +303 -864
- package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +91 -0
- package/src/llama.cpp/ggml/src/{ggml-blas.cpp → ggml-blas/ggml-blas.cpp} +213 -65
- package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +46 -0
- package/src/llama.cpp/ggml/src/{ggml-cann.cpp → ggml-cann/ggml-cann.cpp} +255 -149
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +261 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.c +3560 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +30 -0
- package/src/llama.cpp/ggml/src/{ggml-cpu-impl.h → ggml-cpu/ggml-cpu-impl.h} +0 -243
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +10822 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +13970 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +663 -0
- package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.cpp +667 -1
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +155 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +106 -0
- package/src/llama.cpp/ggml/src/ggml-impl.h +366 -16
- package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +162 -0
- package/src/llama.cpp/ggml/src/{ggml-kompute.cpp → ggml-kompute/ggml-kompute.cpp} +238 -72
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +108 -0
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +249 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +100 -0
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +867 -0
- package/src/llama.cpp/ggml/src/ggml-quants.c +187 -10692
- package/src/llama.cpp/ggml/src/ggml-quants.h +78 -125
- package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +11 -0
- package/src/llama.cpp/ggml/src/{ggml-rpc.cpp → ggml-rpc/ggml-rpc.cpp} +475 -300
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +81 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +3 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +40 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +258 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +2 -22
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +1011 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +76 -0
- package/src/llama.cpp/ggml/src/{ggml-sycl.cpp → ggml-sycl/ggml-sycl.cpp} +3584 -4142
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +69 -67
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +3 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +56 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +6 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +138 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-threading.cpp +12 -0
- package/src/llama.cpp/ggml/src/ggml-threading.h +12 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +78 -0
- package/src/llama.cpp/ggml/src/{ggml-vulkan.cpp → ggml-vulkan/ggml-vulkan.cpp} +555 -623
- package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/vulkan-shaders-gen.cpp +125 -206
- package/src/llama.cpp/ggml/src/ggml.c +4032 -19890
- package/src/llama.cpp/include/llama.h +67 -33
- package/src/llama.cpp/pocs/vdot/q8dot.cpp +4 -3
- package/src/llama.cpp/pocs/vdot/vdot.cpp +8 -7
- package/src/llama.cpp/src/CMakeLists.txt +2 -1
- package/src/llama.cpp/src/llama-sampling.cpp +745 -105
- package/src/llama.cpp/src/llama-sampling.h +21 -2
- package/src/llama.cpp/src/llama-vocab.cpp +49 -9
- package/src/llama.cpp/src/llama-vocab.h +35 -11
- package/src/llama.cpp/src/llama.cpp +2636 -2406
- package/src/llama.cpp/src/unicode-data.cpp +2 -2
- package/src/llama.cpp/tests/CMakeLists.txt +1 -2
- package/src/llama.cpp/tests/test-arg-parser.cpp +14 -14
- package/src/llama.cpp/tests/test-backend-ops.cpp +185 -60
- package/src/llama.cpp/tests/test-barrier.cpp +1 -0
- package/src/llama.cpp/tests/test-chat-template.cpp +9 -5
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -4
- package/src/llama.cpp/tests/test-log.cpp +2 -2
- package/src/llama.cpp/tests/test-opt.cpp +853 -142
- package/src/llama.cpp/tests/test-quantize-fns.cpp +22 -19
- package/src/llama.cpp/tests/test-quantize-perf.cpp +16 -14
- package/src/llama.cpp/tests/test-rope.cpp +1 -0
- package/src/llama.cpp/tests/test-sampling.cpp +162 -137
- package/src/llama.cpp/tests/test-tokenizer-0.cpp +7 -7
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +5 -5
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +5 -5
- package/src/llama.cpp/common/train.cpp +0 -1515
- package/src/llama.cpp/common/train.h +0 -233
- package/src/llama.cpp/examples/baby-llama/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +0 -1639
- package/src/llama.cpp/tests/test-grad0.cpp +0 -1683
- /package/src/llama.cpp/ggml/{cmake → src/ggml-cpu/cmake}/FindSIMD.cmake +0 -0
- /package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.h +0 -0
- /package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/CMakeLists.txt +0 -0
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
#include "common.h"
|
|
3
|
+
#include <stdint.h>
|
|
4
|
+
|
|
5
|
+
#ifdef __cplusplus
|
|
6
|
+
extern "C" {
|
|
7
|
+
#endif
|
|
8
|
+
|
|
9
|
+
size_t ggml_backend_amx_get_alloc_size(const struct ggml_tensor * tensor);
|
|
10
|
+
|
|
11
|
+
void ggml_backend_amx_convert_weight(struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
|
12
|
+
|
|
13
|
+
void ggml_backend_amx_mul_mat(ggml_backend_amx_context * ctx, struct ggml_tensor * dst);
|
|
14
|
+
|
|
15
|
+
#ifdef __cplusplus
|
|
16
|
+
}
|
|
17
|
+
#endif
|
|
@@ -22,7 +22,7 @@ extern "C" {
|
|
|
22
22
|
size_t (*get_max_size) (ggml_backend_buffer_type_t buft);
|
|
23
23
|
// (optional) data size needed to allocate the tensor, including padding (defaults to ggml_nbytes)
|
|
24
24
|
size_t (*get_alloc_size)(ggml_backend_buffer_type_t buft, const struct ggml_tensor * tensor);
|
|
25
|
-
// (optional) check if tensor data is in host memory (defaults to false)
|
|
25
|
+
// (optional) check if tensor data is in host memory and uses standard ggml tensor layout (defaults to false)
|
|
26
26
|
bool (*is_host) (ggml_backend_buffer_type_t buft);
|
|
27
27
|
};
|
|
28
28
|
|
|
@@ -37,7 +37,6 @@ extern "C" {
|
|
|
37
37
|
//
|
|
38
38
|
|
|
39
39
|
struct ggml_backend_buffer_i {
|
|
40
|
-
const char * (*get_name) (ggml_backend_buffer_t buffer);
|
|
41
40
|
// (optional) free the buffer
|
|
42
41
|
void (*free_buffer) (ggml_backend_buffer_t buffer);
|
|
43
42
|
// base address of the buffer
|
|
@@ -88,18 +87,16 @@ extern "C" {
|
|
|
88
87
|
|
|
89
88
|
void (*free)(ggml_backend_t backend);
|
|
90
89
|
|
|
91
|
-
// buffer allocation
|
|
92
|
-
ggml_backend_buffer_type_t (*get_default_buffer_type)(ggml_backend_t backend);
|
|
93
|
-
|
|
94
90
|
// (optional) asynchronous tensor data access
|
|
95
91
|
void (*set_tensor_async)(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
|
96
92
|
void (*get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
|
97
93
|
bool (*cpy_tensor_async)(ggml_backend_t backend_src, ggml_backend_t backend_dst, const struct ggml_tensor * src, struct ggml_tensor * dst);
|
|
98
94
|
|
|
99
|
-
// (optional) complete all pending operations
|
|
95
|
+
// (optional) complete all pending operations (required if the backend supports async operations)
|
|
100
96
|
void (*synchronize)(ggml_backend_t backend);
|
|
101
97
|
|
|
102
|
-
// (optional)
|
|
98
|
+
// (optional) graph plans (not used currently)
|
|
99
|
+
// compute graph with a plan
|
|
103
100
|
ggml_backend_graph_plan_t (*graph_plan_create) (ggml_backend_t backend, const struct ggml_cgraph * cgraph);
|
|
104
101
|
void (*graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
|
|
105
102
|
// update the plan with a new graph - this should be faster than creating a new plan when the graph has the same topology
|
|
@@ -110,21 +107,6 @@ extern "C" {
|
|
|
110
107
|
// compute graph (always async if supported by the backend)
|
|
111
108
|
enum ggml_status (*graph_compute) (ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
|
112
109
|
|
|
113
|
-
// IMPORTANT: these functions have been moved to the device interface and will be removed from the backend interface
|
|
114
|
-
// new backends should implement the device interface instead
|
|
115
|
-
|
|
116
|
-
// These functions are being moved to the device interface
|
|
117
|
-
// check if the backend can compute an operation
|
|
118
|
-
bool (*supports_op) (ggml_backend_t backend, const struct ggml_tensor * op);
|
|
119
|
-
|
|
120
|
-
// check if the backend can use tensors allocated in a buffer type
|
|
121
|
-
bool (*supports_buft)(ggml_backend_t backend, ggml_backend_buffer_type_t buft);
|
|
122
|
-
|
|
123
|
-
// check if the backend wants to run an operation, even if the weights are allocated in a CPU buffer
|
|
124
|
-
// these should be expensive operations with large batch sizes that may benefit from running on this backend
|
|
125
|
-
// even if the weight has to be copied from the CPU temporarily
|
|
126
|
-
bool (*offload_op) (ggml_backend_t backend, const struct ggml_tensor * op);
|
|
127
|
-
|
|
128
110
|
// (optional) event synchronization
|
|
129
111
|
// record an event on this stream
|
|
130
112
|
void (*event_record)(ggml_backend_t backend, ggml_backend_event_t event);
|
|
@@ -184,9 +166,8 @@ extern "C" {
|
|
|
184
166
|
// check if the backend can use tensors allocated in a buffer type
|
|
185
167
|
bool (*supports_buft)(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft);
|
|
186
168
|
|
|
187
|
-
// check if the backend wants to run an operation, even if the weights are allocated in
|
|
188
|
-
// these should be expensive operations
|
|
189
|
-
// even if the weight has to be copied from the CPU temporarily
|
|
169
|
+
// (optional) check if the backend wants to run an operation, even if the weights are allocated in an incompatible buffer
|
|
170
|
+
// these should be expensive operations that may benefit from running on this backend instead of the CPU backend
|
|
190
171
|
bool (*offload_op)(ggml_backend_dev_t dev, const struct ggml_tensor * op);
|
|
191
172
|
|
|
192
173
|
// (optional) event synchronization
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
#include "ggml-backend-impl.h"
|
|
2
|
+
#include "ggml-backend.h"
|
|
3
|
+
#include "ggml-cpu.h"
|
|
4
|
+
#include "ggml-impl.h"
|
|
5
|
+
#include <cstring>
|
|
6
|
+
#include <vector>
|
|
7
|
+
|
|
8
|
+
// Backend registry
|
|
9
|
+
|
|
10
|
+
#ifdef GGML_USE_CUDA
|
|
11
|
+
#include "ggml-cuda.h"
|
|
12
|
+
#endif
|
|
13
|
+
|
|
14
|
+
#ifdef GGML_USE_METAL
|
|
15
|
+
#include "ggml-metal.h"
|
|
16
|
+
#endif
|
|
17
|
+
|
|
18
|
+
#ifdef GGML_USE_SYCL
|
|
19
|
+
#include "ggml-sycl.h"
|
|
20
|
+
#endif
|
|
21
|
+
|
|
22
|
+
#ifdef GGML_USE_VULKAN
|
|
23
|
+
#include "ggml-vulkan.h"
|
|
24
|
+
#endif
|
|
25
|
+
|
|
26
|
+
#ifdef GGML_USE_BLAS
|
|
27
|
+
#include "ggml-blas.h"
|
|
28
|
+
#endif
|
|
29
|
+
|
|
30
|
+
#ifdef GGML_USE_RPC
|
|
31
|
+
#include "ggml-rpc.h"
|
|
32
|
+
#endif
|
|
33
|
+
|
|
34
|
+
#ifdef GGML_USE_AMX
|
|
35
|
+
# include "ggml-amx.h"
|
|
36
|
+
#endif
|
|
37
|
+
|
|
38
|
+
#ifdef GGML_USE_CANN
|
|
39
|
+
#include "ggml-cann.h"
|
|
40
|
+
#endif
|
|
41
|
+
|
|
42
|
+
#ifdef GGML_USE_KOMPUTE
|
|
43
|
+
#include "ggml-kompute.h"
|
|
44
|
+
#endif
|
|
45
|
+
|
|
46
|
+
struct ggml_backend_registry {
|
|
47
|
+
std::vector<ggml_backend_reg_t> backends;
|
|
48
|
+
std::vector<ggml_backend_dev_t> devices;
|
|
49
|
+
|
|
50
|
+
ggml_backend_registry() {
|
|
51
|
+
#ifdef GGML_USE_CUDA
|
|
52
|
+
register_backend(ggml_backend_cuda_reg());
|
|
53
|
+
#endif
|
|
54
|
+
#ifdef GGML_USE_METAL
|
|
55
|
+
register_backend(ggml_backend_metal_reg());
|
|
56
|
+
#endif
|
|
57
|
+
#ifdef GGML_USE_SYCL
|
|
58
|
+
register_backend(ggml_backend_sycl_reg());
|
|
59
|
+
#endif
|
|
60
|
+
#ifdef GGML_USE_VULKAN
|
|
61
|
+
register_backend(ggml_backend_vk_reg());
|
|
62
|
+
#endif
|
|
63
|
+
#ifdef GGML_USE_CANN
|
|
64
|
+
register_backend(ggml_backend_cann_reg());
|
|
65
|
+
#endif
|
|
66
|
+
#ifdef GGML_USE_BLAS
|
|
67
|
+
register_backend(ggml_backend_blas_reg());
|
|
68
|
+
#endif
|
|
69
|
+
#ifdef GGML_USE_RPC
|
|
70
|
+
register_backend(ggml_backend_rpc_reg());
|
|
71
|
+
#endif
|
|
72
|
+
#ifdef GGML_USE_AMX
|
|
73
|
+
register_backend(ggml_backend_amx_reg());
|
|
74
|
+
#endif
|
|
75
|
+
#ifdef GGML_USE_KOMPUTE
|
|
76
|
+
register_backend(ggml_backend_kompute_reg());
|
|
77
|
+
#endif
|
|
78
|
+
|
|
79
|
+
register_backend(ggml_backend_cpu_reg());
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
void register_backend(ggml_backend_reg_t reg) {
|
|
83
|
+
if (!reg) {
|
|
84
|
+
return;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
#ifndef NDEBUG
|
|
88
|
+
GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n",
|
|
89
|
+
__func__, ggml_backend_reg_name(reg), ggml_backend_reg_dev_count(reg));
|
|
90
|
+
#endif
|
|
91
|
+
backends.push_back(reg);
|
|
92
|
+
for (size_t i = 0; i < ggml_backend_reg_dev_count(reg); i++) {
|
|
93
|
+
register_device(ggml_backend_reg_dev_get(reg, i));
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
void register_device(ggml_backend_dev_t device) {
|
|
98
|
+
#ifndef NDEBUG
|
|
99
|
+
GGML_LOG_DEBUG("%s: registered device %s (%s)\n", __func__, ggml_backend_dev_name(device), ggml_backend_dev_description(device));
|
|
100
|
+
#endif
|
|
101
|
+
devices.push_back(device);
|
|
102
|
+
}
|
|
103
|
+
};
|
|
104
|
+
|
|
105
|
+
static ggml_backend_registry & get_reg() {
|
|
106
|
+
static ggml_backend_registry reg;
|
|
107
|
+
return reg;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// Internal API
|
|
111
|
+
void ggml_backend_register(ggml_backend_reg_t reg) {
|
|
112
|
+
get_reg().register_backend(reg);
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
void ggml_backend_device_register(ggml_backend_dev_t device) {
|
|
116
|
+
get_reg().register_device(device);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// Backend (reg) enumeration
|
|
120
|
+
size_t ggml_backend_reg_count() {
|
|
121
|
+
return get_reg().backends.size();
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
ggml_backend_reg_t ggml_backend_reg_get(size_t index) {
|
|
125
|
+
GGML_ASSERT(index < ggml_backend_reg_count());
|
|
126
|
+
return get_reg().backends[index];
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
ggml_backend_reg_t ggml_backend_reg_by_name(const char * name) {
|
|
130
|
+
for (size_t i = 0; i < ggml_backend_reg_count(); i++) {
|
|
131
|
+
ggml_backend_reg_t reg = ggml_backend_reg_get(i);
|
|
132
|
+
if (std::strcmp(ggml_backend_reg_name(reg), name) == 0) {
|
|
133
|
+
return reg;
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
return NULL;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
// Device enumeration
|
|
140
|
+
size_t ggml_backend_dev_count() {
|
|
141
|
+
return get_reg().devices.size();
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
ggml_backend_dev_t ggml_backend_dev_get(size_t index) {
|
|
145
|
+
GGML_ASSERT(index < ggml_backend_dev_count());
|
|
146
|
+
return get_reg().devices[index];
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
ggml_backend_dev_t ggml_backend_dev_by_name(const char * name) {
|
|
150
|
+
for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
|
|
151
|
+
ggml_backend_dev_t dev = ggml_backend_dev_get(i);
|
|
152
|
+
if (strcmp(ggml_backend_dev_name(dev), name) == 0) {
|
|
153
|
+
return dev;
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
return NULL;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_dev_type type) {
|
|
160
|
+
for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
|
|
161
|
+
ggml_backend_dev_t dev = ggml_backend_dev_get(i);
|
|
162
|
+
if (ggml_backend_dev_type(dev) == type) {
|
|
163
|
+
return dev;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
return NULL;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// Convenience functions
|
|
170
|
+
ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params) {
|
|
171
|
+
ggml_backend_dev_t dev = ggml_backend_dev_by_name(name);
|
|
172
|
+
if (!dev) {
|
|
173
|
+
return NULL;
|
|
174
|
+
}
|
|
175
|
+
return ggml_backend_dev_init(dev, params);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
ggml_backend_t ggml_backend_init_by_type(enum ggml_backend_dev_type type, const char * params) {
|
|
179
|
+
ggml_backend_dev_t dev = ggml_backend_dev_by_type(type);
|
|
180
|
+
if (!dev) {
|
|
181
|
+
return NULL;
|
|
182
|
+
}
|
|
183
|
+
return ggml_backend_dev_init(dev, params);
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
ggml_backend_t ggml_backend_init_best(void) {
|
|
187
|
+
ggml_backend_dev_t dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU);
|
|
188
|
+
if (!dev) {
|
|
189
|
+
dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
|
|
190
|
+
}
|
|
191
|
+
if (!dev) {
|
|
192
|
+
return NULL;
|
|
193
|
+
}
|
|
194
|
+
return ggml_backend_dev_init(dev, NULL);
|
|
195
|
+
}
|