@fugood/llama.node 0.3.3 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +5 -0
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +18 -1
- package/package.json +1 -1
- package/src/EmbeddingWorker.cpp +15 -5
- package/src/EmbeddingWorker.h +2 -1
- package/src/LlamaCompletionWorker.cpp +1 -1
- package/src/LlamaContext.cpp +81 -18
- package/src/LlamaContext.h +2 -0
- package/src/llama.cpp/.github/workflows/build.yml +197 -159
- package/src/llama.cpp/.github/workflows/docker.yml +5 -8
- package/src/llama.cpp/.github/workflows/python-lint.yml +8 -1
- package/src/llama.cpp/.github/workflows/server.yml +21 -14
- package/src/llama.cpp/CMakeLists.txt +11 -6
- package/src/llama.cpp/Sources/llama/llama.h +4 -0
- package/src/llama.cpp/cmake/common.cmake +33 -0
- package/src/llama.cpp/cmake/x64-windows-llvm.cmake +11 -0
- package/src/llama.cpp/common/CMakeLists.txt +6 -2
- package/src/llama.cpp/common/arg.cpp +426 -245
- package/src/llama.cpp/common/common.cpp +143 -80
- package/src/llama.cpp/common/common.h +81 -24
- package/src/llama.cpp/common/sampling.cpp +53 -19
- package/src/llama.cpp/common/sampling.h +22 -1
- package/src/llama.cpp/common/speculative.cpp +274 -0
- package/src/llama.cpp/common/speculative.h +28 -0
- package/src/llama.cpp/docs/build.md +101 -148
- package/src/llama.cpp/examples/CMakeLists.txt +32 -13
- package/src/llama.cpp/examples/batched/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/batched/batched.cpp +5 -4
- package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/cvector-generator/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +1 -1
- package/src/llama.cpp/examples/embedding/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +3 -2
- package/src/llama.cpp/examples/export-lora/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +4 -7
- package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gguf/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +8 -1
- package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +2 -2
- package/src/llama.cpp/examples/gritlm/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
- package/src/llama.cpp/examples/imatrix/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +11 -2
- package/src/llama.cpp/examples/infill/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/infill/infill.cpp +1 -1
- package/src/llama.cpp/examples/llama-bench/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +405 -316
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
- package/src/llama.cpp/examples/llava/CMakeLists.txt +10 -3
- package/src/llama.cpp/examples/llava/clip.cpp +262 -66
- package/src/llama.cpp/examples/llava/clip.h +8 -2
- package/src/llama.cpp/examples/llava/llava-cli.cpp +1 -1
- package/src/llama.cpp/examples/llava/llava.cpp +46 -19
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +1 -1
- package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +581 -0
- package/src/llama.cpp/examples/lookahead/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +1 -1
- package/src/llama.cpp/examples/lookup/CMakeLists.txt +4 -4
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +2 -1
- package/src/llama.cpp/examples/lookup/lookup.cpp +2 -2
- package/src/llama.cpp/examples/main/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/main/main.cpp +9 -5
- package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/parallel/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/parallel/parallel.cpp +1 -1
- package/src/llama.cpp/examples/passkey/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/perplexity/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize/quantize.cpp +0 -3
- package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/retrieval/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +4 -4
- package/src/llama.cpp/examples/run/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/run/run.cpp +911 -0
- package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +4 -4
- package/src/llama.cpp/examples/server/CMakeLists.txt +3 -7
- package/src/llama.cpp/examples/server/server.cpp +1758 -886
- package/src/llama.cpp/examples/server/tests/requirements.txt +2 -2
- package/src/llama.cpp/examples/server/utils.hpp +94 -304
- package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/simple/simple.cpp +4 -0
- package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +3 -0
- package/src/llama.cpp/examples/speculative/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/speculative/speculative.cpp +16 -15
- package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +265 -0
- package/src/llama.cpp/examples/tokenize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +1 -1
- package/src/llama.cpp/examples/tts/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/tts/tts.cpp +932 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +46 -34
- package/src/llama.cpp/ggml/include/ggml-backend.h +16 -0
- package/src/llama.cpp/ggml/include/ggml-cpu.h +7 -49
- package/src/llama.cpp/ggml/include/ggml-opencl.h +26 -0
- package/src/llama.cpp/ggml/include/ggml.h +106 -24
- package/src/llama.cpp/ggml/src/CMakeLists.txt +73 -24
- package/src/llama.cpp/ggml/src/ggml-alloc.c +0 -1
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +51 -11
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +379 -22
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +3 -7
- package/src/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +5 -2
- package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +33 -3
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +456 -111
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +6 -3
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +95 -35
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -5
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +22 -9
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +24 -13
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +23 -13
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +17 -0
- package/src/llama.cpp/ggml/src/ggml-common.h +42 -42
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +288 -213
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
- package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/common.h +19 -22
- package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/mmq.cpp +93 -92
- package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/mmq.h +2 -9
- package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-aarch64.c → ggml-cpu-aarch64.cpp} +892 -190
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +2 -24
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +15 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +38 -25
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +552 -399
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +101 -136
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +2 -2
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +7 -10
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +8 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +4 -6
- package/src/llama.cpp/ggml/src/ggml-impl.h +32 -11
- package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +13 -9
- package/src/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +131 -64
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +3 -6
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +39 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +14 -7
- package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +147 -0
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +4004 -0
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +67 -80
- package/src/llama.cpp/ggml/src/ggml-quants.c +0 -9
- package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +3 -5
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +5 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +13 -10
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +2 -11
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +2 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +5 -5
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +32 -13
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +80 -61
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +159 -114
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +6 -6
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +6 -20
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +4 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +8 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +4 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +7 -7
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +4 -1
- package/src/llama.cpp/ggml/src/ggml-threading.h +4 -2
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +21 -7
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1718 -399
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +3 -1
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +105 -31
- package/src/llama.cpp/ggml/src/ggml.c +367 -207
- package/src/llama.cpp/include/llama-cpp.h +25 -0
- package/src/llama.cpp/include/llama.h +26 -19
- package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +46 -0
- package/src/llama.cpp/pocs/CMakeLists.txt +3 -1
- package/src/llama.cpp/pocs/vdot/CMakeLists.txt +2 -2
- package/src/llama.cpp/src/CMakeLists.txt +2 -7
- package/src/llama.cpp/src/llama-grammar.cpp +15 -15
- package/src/llama.cpp/src/llama-grammar.h +2 -5
- package/src/llama.cpp/src/llama-sampling.cpp +35 -90
- package/src/llama.cpp/src/llama-vocab.cpp +6 -1
- package/src/llama.cpp/src/llama.cpp +1748 -640
- package/src/llama.cpp/src/unicode.cpp +62 -51
- package/src/llama.cpp/src/unicode.h +9 -10
- package/src/llama.cpp/tests/CMakeLists.txt +48 -37
- package/src/llama.cpp/tests/test-arg-parser.cpp +2 -2
- package/src/llama.cpp/tests/test-backend-ops.cpp +140 -21
- package/src/llama.cpp/tests/test-chat-template.cpp +50 -4
- package/src/llama.cpp/tests/test-gguf.cpp +1303 -0
- package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -6
- package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -4
- package/src/llama.cpp/tests/test-quantize-fns.cpp +3 -3
- package/src/llama.cpp/tests/test-rope.cpp +61 -20
- package/src/llama.cpp/tests/test-sampling.cpp +2 -2
- package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +0 -72
- package/src/llama.cpp/.github/workflows/nix-ci.yml +0 -79
- package/src/llama.cpp/.github/workflows/nix-flake-update.yml +0 -22
- package/src/llama.cpp/.github/workflows/nix-publish-flake.yml +0 -36
- package/src/llama.cpp/ggml/include/ggml-amx.h +0 -25
- package/src/llama.cpp/ggml/src/ggml-aarch64.c +0 -129
- package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -19
- package/src/llama.cpp/ggml/src/ggml-amx/CMakeLists.txt +0 -107
- package/src/llama.cpp/ggml/src/ggml-amx/ggml-amx.cpp +0 -446
|
@@ -8,6 +8,8 @@
|
|
|
8
8
|
extern "C" {
|
|
9
9
|
#endif
|
|
10
10
|
|
|
11
|
+
#define GGML_BACKEND_API_VERSION 1
|
|
12
|
+
|
|
11
13
|
//
|
|
12
14
|
// Backend buffer type
|
|
13
15
|
//
|
|
@@ -63,20 +65,20 @@ extern "C" {
|
|
|
63
65
|
enum ggml_backend_buffer_usage usage;
|
|
64
66
|
};
|
|
65
67
|
|
|
66
|
-
ggml_backend_buffer_t ggml_backend_buffer_init(
|
|
68
|
+
GGML_API ggml_backend_buffer_t ggml_backend_buffer_init(
|
|
67
69
|
ggml_backend_buffer_type_t buft,
|
|
68
70
|
struct ggml_backend_buffer_i iface,
|
|
69
71
|
void * context,
|
|
70
72
|
size_t size);
|
|
71
73
|
|
|
72
74
|
// do not use directly, use ggml_backend_tensor_copy instead
|
|
73
|
-
bool ggml_backend_buffer_copy_tensor(const struct ggml_tensor * src, struct ggml_tensor * dst);
|
|
75
|
+
GGML_API bool ggml_backend_buffer_copy_tensor(const struct ggml_tensor * src, struct ggml_tensor * dst);
|
|
74
76
|
|
|
75
77
|
// multi-buffer
|
|
76
78
|
// buffer that contains a collection of buffers
|
|
77
|
-
ggml_backend_buffer_t ggml_backend_multi_buffer_alloc_buffer(ggml_backend_buffer_t * buffers, size_t n_buffers);
|
|
78
|
-
bool ggml_backend_buffer_is_multi_buffer(ggml_backend_buffer_t buffer);
|
|
79
|
-
void ggml_backend_multi_buffer_set_usage(ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage);
|
|
79
|
+
GGML_API ggml_backend_buffer_t ggml_backend_multi_buffer_alloc_buffer(ggml_backend_buffer_t * buffers, size_t n_buffers);
|
|
80
|
+
GGML_API bool ggml_backend_buffer_is_multi_buffer(ggml_backend_buffer_t buffer);
|
|
81
|
+
GGML_API void ggml_backend_multi_buffer_set_usage(ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage);
|
|
80
82
|
|
|
81
83
|
//
|
|
82
84
|
// Backend (stream)
|
|
@@ -199,17 +201,55 @@ extern "C" {
|
|
|
199
201
|
};
|
|
200
202
|
|
|
201
203
|
struct ggml_backend_reg {
|
|
202
|
-
|
|
204
|
+
int api_version; // initialize to GGML_BACKEND_API_VERSION
|
|
203
205
|
struct ggml_backend_reg_i iface;
|
|
204
206
|
void * context;
|
|
205
207
|
};
|
|
206
208
|
|
|
207
|
-
|
|
208
209
|
// Internal backend registry API
|
|
209
|
-
void ggml_backend_register(ggml_backend_reg_t reg);
|
|
210
|
-
void ggml_backend_device_register(ggml_backend_dev_t device);
|
|
211
|
-
|
|
212
|
-
//
|
|
210
|
+
GGML_API void ggml_backend_register(ggml_backend_reg_t reg);
|
|
211
|
+
GGML_API void ggml_backend_device_register(ggml_backend_dev_t device);
|
|
212
|
+
|
|
213
|
+
// Add backend dynamic loading support to the backend
|
|
214
|
+
|
|
215
|
+
// Initialize the backend
|
|
216
|
+
typedef ggml_backend_reg_t (*ggml_backend_init_t)(void);
|
|
217
|
+
// Optional: obtain a score for the backend based on the system configuration
|
|
218
|
+
// Higher scores are preferred, 0 means the backend is not supported in the current system
|
|
219
|
+
typedef int (*ggml_backend_score_t)(void);
|
|
220
|
+
|
|
221
|
+
#ifdef GGML_BACKEND_DL
|
|
222
|
+
# ifdef __cplusplus
|
|
223
|
+
# define GGML_BACKEND_DL_IMPL(reg_fn) \
|
|
224
|
+
extern "C" { \
|
|
225
|
+
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_init(void); \
|
|
226
|
+
} \
|
|
227
|
+
ggml_backend_reg_t ggml_backend_init(void) { \
|
|
228
|
+
return reg_fn(); \
|
|
229
|
+
}
|
|
230
|
+
# define GGML_BACKEND_DL_SCORE_IMPL(score_fn) \
|
|
231
|
+
extern "C" { \
|
|
232
|
+
GGML_BACKEND_API int ggml_backend_score(void); \
|
|
233
|
+
} \
|
|
234
|
+
int ggml_backend_score(void) { \
|
|
235
|
+
return score_fn(); \
|
|
236
|
+
}
|
|
237
|
+
# else
|
|
238
|
+
# define GGML_BACKEND_DL_IMPL(reg_fn) \
|
|
239
|
+
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_init(void); \
|
|
240
|
+
ggml_backend_reg_t ggml_backend_init(void) { \
|
|
241
|
+
return reg_fn(); \
|
|
242
|
+
}
|
|
243
|
+
# define GGML_BACKEND_DL_SCORE_IMPL(score_fn) \
|
|
244
|
+
GGML_BACKEND_API int ggml_backend_score(void); \
|
|
245
|
+
int ggml_backend_score(void) { \
|
|
246
|
+
return score_fn(); \
|
|
247
|
+
}
|
|
248
|
+
# endif
|
|
249
|
+
#else
|
|
250
|
+
# define GGML_BACKEND_DL_IMPL(reg_fn)
|
|
251
|
+
# define GGML_BACKEND_DL_SCORE_IMPL(score_fn)
|
|
252
|
+
#endif
|
|
213
253
|
|
|
214
254
|
#ifdef __cplusplus
|
|
215
255
|
}
|
|
@@ -1,11 +1,34 @@
|
|
|
1
1
|
#include "ggml-backend-impl.h"
|
|
2
2
|
#include "ggml-backend.h"
|
|
3
|
-
#include "ggml-cpu.h"
|
|
4
3
|
#include "ggml-impl.h"
|
|
4
|
+
#include <algorithm>
|
|
5
|
+
#include <codecvt>
|
|
5
6
|
#include <cstring>
|
|
7
|
+
#include <filesystem>
|
|
8
|
+
#include <locale>
|
|
9
|
+
#include <memory>
|
|
10
|
+
#include <string>
|
|
11
|
+
#include <type_traits>
|
|
6
12
|
#include <vector>
|
|
7
13
|
|
|
14
|
+
#ifdef _WIN32
|
|
15
|
+
# define WIN32_LEAN_AND_MEAN
|
|
16
|
+
# ifndef NOMINMAX
|
|
17
|
+
# define NOMINMAX
|
|
18
|
+
# endif
|
|
19
|
+
# include <windows.h>
|
|
20
|
+
#elif defined(__APPLE__)
|
|
21
|
+
# include <mach-o/dyld.h>
|
|
22
|
+
# include <dlfcn.h>
|
|
23
|
+
#else
|
|
24
|
+
# include <dlfcn.h>
|
|
25
|
+
# include <unistd.h>
|
|
26
|
+
#endif
|
|
27
|
+
|
|
8
28
|
// Backend registry
|
|
29
|
+
#ifdef GGML_USE_CPU
|
|
30
|
+
#include "ggml-cpu.h"
|
|
31
|
+
#endif
|
|
9
32
|
|
|
10
33
|
#ifdef GGML_USE_CUDA
|
|
11
34
|
#include "ggml-cuda.h"
|
|
@@ -23,6 +46,10 @@
|
|
|
23
46
|
#include "ggml-vulkan.h"
|
|
24
47
|
#endif
|
|
25
48
|
|
|
49
|
+
#ifdef GGML_USE_OPENCL
|
|
50
|
+
#include "ggml-opencl.h"
|
|
51
|
+
#endif
|
|
52
|
+
|
|
26
53
|
#ifdef GGML_USE_BLAS
|
|
27
54
|
#include "ggml-blas.h"
|
|
28
55
|
#endif
|
|
@@ -31,10 +58,6 @@
|
|
|
31
58
|
#include "ggml-rpc.h"
|
|
32
59
|
#endif
|
|
33
60
|
|
|
34
|
-
#ifdef GGML_USE_AMX
|
|
35
|
-
# include "ggml-amx.h"
|
|
36
|
-
#endif
|
|
37
|
-
|
|
38
61
|
#ifdef GGML_USE_CANN
|
|
39
62
|
#include "ggml-cann.h"
|
|
40
63
|
#endif
|
|
@@ -43,8 +66,75 @@
|
|
|
43
66
|
#include "ggml-kompute.h"
|
|
44
67
|
#endif
|
|
45
68
|
|
|
69
|
+
#ifdef _WIN32
|
|
70
|
+
|
|
71
|
+
using dl_handle = std::remove_pointer_t<HMODULE>;
|
|
72
|
+
|
|
73
|
+
struct dl_handle_deleter {
|
|
74
|
+
void operator()(HMODULE handle) {
|
|
75
|
+
FreeLibrary(handle);
|
|
76
|
+
}
|
|
77
|
+
};
|
|
78
|
+
|
|
79
|
+
static dl_handle * dl_load_library(const std::wstring & path) {
|
|
80
|
+
// suppress error dialogs for missing DLLs
|
|
81
|
+
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
|
|
82
|
+
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
|
|
83
|
+
|
|
84
|
+
HMODULE handle = LoadLibraryW(path.c_str());
|
|
85
|
+
|
|
86
|
+
SetErrorMode(old_mode);
|
|
87
|
+
|
|
88
|
+
return handle;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
static dl_handle * dl_load_library(const std::string & path) {
|
|
92
|
+
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
|
|
93
|
+
return dl_load_library(converter.from_bytes(path));
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
static void * dl_get_sym(dl_handle * handle, const char * name) {
|
|
97
|
+
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
|
|
98
|
+
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
|
|
99
|
+
|
|
100
|
+
void * p = (void *) GetProcAddress(handle, name);
|
|
101
|
+
|
|
102
|
+
SetErrorMode(old_mode);
|
|
103
|
+
|
|
104
|
+
return p;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
#else
|
|
108
|
+
|
|
109
|
+
using dl_handle = void;
|
|
110
|
+
|
|
111
|
+
struct dl_handle_deleter {
|
|
112
|
+
void operator()(void * handle) {
|
|
113
|
+
dlclose(handle);
|
|
114
|
+
}
|
|
115
|
+
};
|
|
116
|
+
|
|
117
|
+
static void * dl_load_library(const std::string & path) {
|
|
118
|
+
dl_handle * handle = dlopen(path.c_str(), RTLD_NOW | RTLD_LOCAL);
|
|
119
|
+
|
|
120
|
+
return handle;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
static void * dl_get_sym(dl_handle * handle, const char * name) {
|
|
124
|
+
return dlsym(handle, name);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
#endif
|
|
128
|
+
|
|
129
|
+
using dl_handle_ptr = std::unique_ptr<dl_handle, dl_handle_deleter>;
|
|
130
|
+
|
|
131
|
+
struct ggml_backend_reg_entry {
|
|
132
|
+
ggml_backend_reg_t reg;
|
|
133
|
+
dl_handle_ptr handle;
|
|
134
|
+
};
|
|
135
|
+
|
|
46
136
|
struct ggml_backend_registry {
|
|
47
|
-
std::vector<
|
|
137
|
+
std::vector<ggml_backend_reg_entry> backends;
|
|
48
138
|
std::vector<ggml_backend_dev_t> devices;
|
|
49
139
|
|
|
50
140
|
ggml_backend_registry() {
|
|
@@ -60,6 +150,9 @@ struct ggml_backend_registry {
|
|
|
60
150
|
#ifdef GGML_USE_VULKAN
|
|
61
151
|
register_backend(ggml_backend_vk_reg());
|
|
62
152
|
#endif
|
|
153
|
+
#ifdef GGML_USE_OPENCL
|
|
154
|
+
register_backend(ggml_backend_opencl_reg());
|
|
155
|
+
#endif
|
|
63
156
|
#ifdef GGML_USE_CANN
|
|
64
157
|
register_backend(ggml_backend_cann_reg());
|
|
65
158
|
#endif
|
|
@@ -69,17 +162,25 @@ struct ggml_backend_registry {
|
|
|
69
162
|
#ifdef GGML_USE_RPC
|
|
70
163
|
register_backend(ggml_backend_rpc_reg());
|
|
71
164
|
#endif
|
|
72
|
-
#ifdef GGML_USE_AMX
|
|
73
|
-
register_backend(ggml_backend_amx_reg());
|
|
74
|
-
#endif
|
|
75
165
|
#ifdef GGML_USE_KOMPUTE
|
|
76
166
|
register_backend(ggml_backend_kompute_reg());
|
|
77
167
|
#endif
|
|
78
|
-
|
|
168
|
+
#ifdef GGML_USE_CPU
|
|
79
169
|
register_backend(ggml_backend_cpu_reg());
|
|
170
|
+
#endif
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
~ggml_backend_registry() {
|
|
174
|
+
// FIXME: backends cannot be safely unloaded without a function to destroy all the backend resources,
|
|
175
|
+
// since backend threads may still be running and accessing resources from the dynamic library
|
|
176
|
+
for (auto & entry : backends) {
|
|
177
|
+
if (entry.handle) {
|
|
178
|
+
entry.handle.release(); // NOLINT
|
|
179
|
+
}
|
|
180
|
+
}
|
|
80
181
|
}
|
|
81
182
|
|
|
82
|
-
void register_backend(ggml_backend_reg_t reg) {
|
|
183
|
+
void register_backend(ggml_backend_reg_t reg, dl_handle_ptr handle = nullptr) {
|
|
83
184
|
if (!reg) {
|
|
84
185
|
return;
|
|
85
186
|
}
|
|
@@ -88,7 +189,7 @@ struct ggml_backend_registry {
|
|
|
88
189
|
GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n",
|
|
89
190
|
__func__, ggml_backend_reg_name(reg), ggml_backend_reg_dev_count(reg));
|
|
90
191
|
#endif
|
|
91
|
-
backends.push_back(reg);
|
|
192
|
+
backends.push_back({ reg, std::move(handle) });
|
|
92
193
|
for (size_t i = 0; i < ggml_backend_reg_dev_count(reg); i++) {
|
|
93
194
|
register_device(ggml_backend_reg_dev_get(reg, i));
|
|
94
195
|
}
|
|
@@ -100,6 +201,76 @@ struct ggml_backend_registry {
|
|
|
100
201
|
#endif
|
|
101
202
|
devices.push_back(device);
|
|
102
203
|
}
|
|
204
|
+
|
|
205
|
+
ggml_backend_reg_t load_backend(const char * path, bool silent) {
|
|
206
|
+
dl_handle_ptr handle { dl_load_library(path) };
|
|
207
|
+
if (!handle) {
|
|
208
|
+
if (!silent) {
|
|
209
|
+
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path);
|
|
210
|
+
}
|
|
211
|
+
return nullptr;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
|
|
215
|
+
if (score_fn && score_fn() == 0) {
|
|
216
|
+
if (!silent) {
|
|
217
|
+
GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, path);
|
|
218
|
+
}
|
|
219
|
+
return nullptr;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
auto backend_init_fn = (ggml_backend_init_t) dl_get_sym(handle.get(), "ggml_backend_init");
|
|
223
|
+
if (!backend_init_fn) {
|
|
224
|
+
if (!silent) {
|
|
225
|
+
GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, path);
|
|
226
|
+
}
|
|
227
|
+
return nullptr;
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
ggml_backend_reg_t reg = backend_init_fn();
|
|
231
|
+
if (!reg || reg->api_version != GGML_BACKEND_API_VERSION) {
|
|
232
|
+
if (!silent) {
|
|
233
|
+
if (!reg) {
|
|
234
|
+
GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, path);
|
|
235
|
+
} else {
|
|
236
|
+
GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
|
|
237
|
+
__func__, path, reg->api_version, GGML_BACKEND_API_VERSION);
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
return nullptr;
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), path);
|
|
244
|
+
|
|
245
|
+
register_backend(reg, std::move(handle));
|
|
246
|
+
|
|
247
|
+
return reg;
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
void unload_backend(ggml_backend_reg_t reg, bool silent) {
|
|
251
|
+
auto it = std::find_if(backends.begin(), backends.end(),
|
|
252
|
+
[reg](const ggml_backend_reg_entry & entry) { return entry.reg == reg; });
|
|
253
|
+
|
|
254
|
+
if (it == backends.end()) {
|
|
255
|
+
if (!silent) {
|
|
256
|
+
GGML_LOG_ERROR("%s: backend not found\n", __func__);
|
|
257
|
+
}
|
|
258
|
+
return;
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
if (!silent) {
|
|
262
|
+
GGML_LOG_DEBUG("%s: unloading %s backend\n", __func__, ggml_backend_reg_name(reg));
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
// remove devices
|
|
266
|
+
devices.erase(
|
|
267
|
+
std::remove_if(devices.begin(), devices.end(),
|
|
268
|
+
[reg](ggml_backend_dev_t dev) { return ggml_backend_dev_backend_reg(dev) == reg; }),
|
|
269
|
+
devices.end());
|
|
270
|
+
|
|
271
|
+
// remove backend
|
|
272
|
+
backends.erase(it);
|
|
273
|
+
}
|
|
103
274
|
};
|
|
104
275
|
|
|
105
276
|
static ggml_backend_registry & get_reg() {
|
|
@@ -117,23 +288,32 @@ void ggml_backend_device_register(ggml_backend_dev_t device) {
|
|
|
117
288
|
}
|
|
118
289
|
|
|
119
290
|
// Backend (reg) enumeration
|
|
291
|
+
static bool striequals(const char * a, const char * b) {
|
|
292
|
+
for (; *a && *b; a++, b++) {
|
|
293
|
+
if (std::tolower(*a) != std::tolower(*b)) {
|
|
294
|
+
return false;
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
return *a == *b;
|
|
298
|
+
}
|
|
299
|
+
|
|
120
300
|
size_t ggml_backend_reg_count() {
|
|
121
301
|
return get_reg().backends.size();
|
|
122
302
|
}
|
|
123
303
|
|
|
124
304
|
ggml_backend_reg_t ggml_backend_reg_get(size_t index) {
|
|
125
305
|
GGML_ASSERT(index < ggml_backend_reg_count());
|
|
126
|
-
return get_reg().backends[index];
|
|
306
|
+
return get_reg().backends[index].reg;
|
|
127
307
|
}
|
|
128
308
|
|
|
129
309
|
ggml_backend_reg_t ggml_backend_reg_by_name(const char * name) {
|
|
130
310
|
for (size_t i = 0; i < ggml_backend_reg_count(); i++) {
|
|
131
311
|
ggml_backend_reg_t reg = ggml_backend_reg_get(i);
|
|
132
|
-
if (
|
|
312
|
+
if (striequals(ggml_backend_reg_name(reg), name)) {
|
|
133
313
|
return reg;
|
|
134
314
|
}
|
|
135
315
|
}
|
|
136
|
-
return
|
|
316
|
+
return nullptr;
|
|
137
317
|
}
|
|
138
318
|
|
|
139
319
|
// Device enumeration
|
|
@@ -149,11 +329,11 @@ ggml_backend_dev_t ggml_backend_dev_get(size_t index) {
|
|
|
149
329
|
ggml_backend_dev_t ggml_backend_dev_by_name(const char * name) {
|
|
150
330
|
for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
|
|
151
331
|
ggml_backend_dev_t dev = ggml_backend_dev_get(i);
|
|
152
|
-
if (
|
|
332
|
+
if (striequals(ggml_backend_dev_name(dev), name)) {
|
|
153
333
|
return dev;
|
|
154
334
|
}
|
|
155
335
|
}
|
|
156
|
-
return
|
|
336
|
+
return nullptr;
|
|
157
337
|
}
|
|
158
338
|
|
|
159
339
|
ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_dev_type type) {
|
|
@@ -163,14 +343,14 @@ ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_dev_type type) {
|
|
|
163
343
|
return dev;
|
|
164
344
|
}
|
|
165
345
|
}
|
|
166
|
-
return
|
|
346
|
+
return nullptr;
|
|
167
347
|
}
|
|
168
348
|
|
|
169
349
|
// Convenience functions
|
|
170
350
|
ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params) {
|
|
171
351
|
ggml_backend_dev_t dev = ggml_backend_dev_by_name(name);
|
|
172
352
|
if (!dev) {
|
|
173
|
-
return
|
|
353
|
+
return nullptr;
|
|
174
354
|
}
|
|
175
355
|
return ggml_backend_dev_init(dev, params);
|
|
176
356
|
}
|
|
@@ -178,7 +358,7 @@ ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params)
|
|
|
178
358
|
ggml_backend_t ggml_backend_init_by_type(enum ggml_backend_dev_type type, const char * params) {
|
|
179
359
|
ggml_backend_dev_t dev = ggml_backend_dev_by_type(type);
|
|
180
360
|
if (!dev) {
|
|
181
|
-
return
|
|
361
|
+
return nullptr;
|
|
182
362
|
}
|
|
183
363
|
return ggml_backend_dev_init(dev, params);
|
|
184
364
|
}
|
|
@@ -189,7 +369,184 @@ ggml_backend_t ggml_backend_init_best(void) {
|
|
|
189
369
|
dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
|
|
190
370
|
}
|
|
191
371
|
if (!dev) {
|
|
192
|
-
return
|
|
372
|
+
return nullptr;
|
|
373
|
+
}
|
|
374
|
+
return ggml_backend_dev_init(dev, nullptr);
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
// Dynamic loading
|
|
378
|
+
ggml_backend_reg_t ggml_backend_load(const char * path) {
|
|
379
|
+
return get_reg().load_backend(path, false);
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
void ggml_backend_unload(ggml_backend_reg_t reg) {
|
|
383
|
+
get_reg().unload_backend(reg, true);
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
static std::string get_executable_path() {
|
|
387
|
+
#if defined(__APPLE__)
|
|
388
|
+
// get executable path
|
|
389
|
+
std::vector<char> path;
|
|
390
|
+
uint32_t size;
|
|
391
|
+
while (true) {
|
|
392
|
+
size = path.size();
|
|
393
|
+
if (_NSGetExecutablePath(path.data(), &size) == 0) {
|
|
394
|
+
break;
|
|
395
|
+
}
|
|
396
|
+
path.resize(size);
|
|
397
|
+
}
|
|
398
|
+
std::string base_path(path.data(), size);
|
|
399
|
+
// remove executable name
|
|
400
|
+
auto last_slash = base_path.find_last_of('/');
|
|
401
|
+
if (last_slash != std::string::npos) {
|
|
402
|
+
base_path = base_path.substr(0, last_slash);
|
|
403
|
+
}
|
|
404
|
+
return base_path + "/";
|
|
405
|
+
#elif defined(__linux__)
|
|
406
|
+
std::string base_path = ".";
|
|
407
|
+
std::vector<char> path(1024);
|
|
408
|
+
while (true) {
|
|
409
|
+
// get executable path
|
|
410
|
+
ssize_t len = readlink("/proc/self/exe", path.data(), path.size());
|
|
411
|
+
if (len == -1) {
|
|
412
|
+
break;
|
|
413
|
+
}
|
|
414
|
+
if (len < (ssize_t) path.size()) {
|
|
415
|
+
base_path = std::string(path.data(), len);
|
|
416
|
+
// remove executable name
|
|
417
|
+
auto last_slash = base_path.find_last_of('/');
|
|
418
|
+
if (last_slash != std::string::npos) {
|
|
419
|
+
base_path = base_path.substr(0, last_slash);
|
|
420
|
+
}
|
|
421
|
+
break;
|
|
422
|
+
}
|
|
423
|
+
path.resize(path.size() * 2);
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
return base_path + "/";
|
|
427
|
+
#elif defined(_WIN32)
|
|
428
|
+
std::vector<char> path(MAX_PATH);
|
|
429
|
+
DWORD len = GetModuleFileNameA(NULL, path.data(), path.size());
|
|
430
|
+
if (len == 0) {
|
|
431
|
+
return "";
|
|
193
432
|
}
|
|
194
|
-
|
|
433
|
+
std::string base_path(path.data(), len);
|
|
434
|
+
// remove executable name
|
|
435
|
+
auto last_slash = base_path.find_last_of('\\');
|
|
436
|
+
if (last_slash != std::string::npos) {
|
|
437
|
+
base_path = base_path.substr(0, last_slash);
|
|
438
|
+
}
|
|
439
|
+
return base_path + "\\";
|
|
440
|
+
#endif
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
static std::string backend_filename_prefix() {
|
|
444
|
+
#ifdef _WIN32
|
|
445
|
+
return "ggml-";
|
|
446
|
+
#else
|
|
447
|
+
return "libggml-";
|
|
448
|
+
#endif
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
static std::string backend_filename_suffix() {
|
|
452
|
+
#ifdef _WIN32
|
|
453
|
+
return ".dll";
|
|
454
|
+
#else
|
|
455
|
+
return ".so";
|
|
456
|
+
#endif
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
|
|
460
|
+
// enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
|
|
461
|
+
// TODO: search system paths
|
|
462
|
+
std::string file_prefix = backend_filename_prefix() + name + "-";
|
|
463
|
+
std::vector<std::string> search_paths;
|
|
464
|
+
if (user_search_path == nullptr) {
|
|
465
|
+
search_paths.push_back("./");
|
|
466
|
+
search_paths.push_back(get_executable_path());
|
|
467
|
+
} else {
|
|
468
|
+
#if defined(_WIN32)
|
|
469
|
+
search_paths.push_back(std::string(user_search_path) + "\\");
|
|
470
|
+
#else
|
|
471
|
+
search_paths.push_back(std::string(user_search_path) + "/");
|
|
472
|
+
#endif
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
int best_score = 0;
|
|
476
|
+
std::string best_path;
|
|
477
|
+
|
|
478
|
+
namespace fs = std::filesystem;
|
|
479
|
+
for (const auto & search_path : search_paths) {
|
|
480
|
+
if (!fs::exists(search_path)) {
|
|
481
|
+
continue;
|
|
482
|
+
}
|
|
483
|
+
fs::directory_iterator dir_it(search_path, fs::directory_options::skip_permission_denied);
|
|
484
|
+
for (const auto & entry : dir_it) {
|
|
485
|
+
if (entry.is_regular_file()) {
|
|
486
|
+
std::string filename = entry.path().filename().string();
|
|
487
|
+
std::string ext = entry.path().extension().string();
|
|
488
|
+
if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
|
|
489
|
+
dl_handle_ptr handle { dl_load_library(entry.path().c_str()) };
|
|
490
|
+
if (!handle && !silent) {
|
|
491
|
+
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, entry.path().string().c_str());
|
|
492
|
+
}
|
|
493
|
+
if (handle) {
|
|
494
|
+
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
|
|
495
|
+
if (score_fn) {
|
|
496
|
+
int s = score_fn();
|
|
497
|
+
#ifndef NDEBUG
|
|
498
|
+
GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, entry.path().string().c_str(), s);
|
|
499
|
+
#endif
|
|
500
|
+
if (s > best_score) {
|
|
501
|
+
best_score = s;
|
|
502
|
+
best_path = entry.path().string();
|
|
503
|
+
}
|
|
504
|
+
} else {
|
|
505
|
+
if (!silent) {
|
|
506
|
+
GGML_LOG_INFO("%s: failed to find ggml_backend_score in %s\n", __func__, entry.path().string().c_str());
|
|
507
|
+
}
|
|
508
|
+
}
|
|
509
|
+
}
|
|
510
|
+
}
|
|
511
|
+
}
|
|
512
|
+
}
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
if (best_score == 0) {
|
|
516
|
+
// try to load the base backend
|
|
517
|
+
for (const auto & search_path : search_paths) {
|
|
518
|
+
std::string path = search_path + backend_filename_prefix() + name + backend_filename_suffix();
|
|
519
|
+
if (fs::exists(path)) {
|
|
520
|
+
return get_reg().load_backend(path.c_str(), silent);
|
|
521
|
+
}
|
|
522
|
+
}
|
|
523
|
+
return nullptr;
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
return get_reg().load_backend(best_path.c_str(), silent);
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
void ggml_backend_load_all() {
|
|
530
|
+
ggml_backend_load_all_from_path(nullptr);
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
void ggml_backend_load_all_from_path(const char * dir_path) {
|
|
534
|
+
#ifdef NDEBUG
|
|
535
|
+
bool silent = true;
|
|
536
|
+
#else
|
|
537
|
+
bool silent = false;
|
|
538
|
+
#endif
|
|
539
|
+
|
|
540
|
+
ggml_backend_load_best("blas", silent, dir_path);
|
|
541
|
+
ggml_backend_load_best("cann", silent, dir_path);
|
|
542
|
+
ggml_backend_load_best("cuda", silent, dir_path);
|
|
543
|
+
ggml_backend_load_best("hip", silent, dir_path);
|
|
544
|
+
ggml_backend_load_best("kompute", silent, dir_path);
|
|
545
|
+
ggml_backend_load_best("metal", silent, dir_path);
|
|
546
|
+
ggml_backend_load_best("rpc", silent, dir_path);
|
|
547
|
+
ggml_backend_load_best("sycl", silent, dir_path);
|
|
548
|
+
ggml_backend_load_best("vulkan", silent, dir_path);
|
|
549
|
+
ggml_backend_load_best("opencl", silent, dir_path);
|
|
550
|
+
ggml_backend_load_best("musa", silent, dir_path);
|
|
551
|
+
ggml_backend_load_best("cpu", silent, dir_path);
|
|
195
552
|
}
|
|
@@ -252,6 +252,7 @@ void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_ten
|
|
|
252
252
|
}
|
|
253
253
|
|
|
254
254
|
void ggml_backend_tensor_set(struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
|
255
|
+
GGML_ASSERT(tensor);
|
|
255
256
|
ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
|
|
256
257
|
|
|
257
258
|
if (size == 0) {
|
|
@@ -266,6 +267,7 @@ void ggml_backend_tensor_set(struct ggml_tensor * tensor, const void * data, siz
|
|
|
266
267
|
}
|
|
267
268
|
|
|
268
269
|
void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
|
|
270
|
+
GGML_ASSERT(tensor);
|
|
269
271
|
ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
|
|
270
272
|
|
|
271
273
|
if (size == 0) {
|
|
@@ -740,7 +742,8 @@ static int ggml_backend_sched_backend_id_from_cur(ggml_backend_sched_t sched, st
|
|
|
740
742
|
|
|
741
743
|
if (tensor->buffer || (tensor->view_src && tensor->view_src->buffer)) {
|
|
742
744
|
// since the tensor is pre-allocated, it cannot be moved to another backend
|
|
743
|
-
|
|
745
|
+
ggml_backend_buffer_t buffer = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
|
|
746
|
+
GGML_ABORT("pre-allocated tensor (%s) in a buffer (%s) that cannot run the operation (%s)", tensor->name, ggml_backend_buffer_name(buffer), ggml_op_name(tensor->op));
|
|
744
747
|
}
|
|
745
748
|
|
|
746
749
|
// graph input
|
|
@@ -884,9 +887,6 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
|
|
|
884
887
|
for (int i = 0; i < graph->n_nodes; i++) {
|
|
885
888
|
struct ggml_tensor * node = graph->nodes[i];
|
|
886
889
|
int * node_backend_id = &tensor_backend_id(node);
|
|
887
|
-
if (ggml_is_view_op(node->op)) {
|
|
888
|
-
continue;
|
|
889
|
-
}
|
|
890
890
|
// do not overwrite user assignments
|
|
891
891
|
if (*node_backend_id == -1) {
|
|
892
892
|
*node_backend_id = ggml_backend_sched_backend_id_from_cur(sched, node);
|
|
@@ -11,12 +11,9 @@ find_package(BLAS)
|
|
|
11
11
|
if (BLAS_FOUND)
|
|
12
12
|
message(STATUS "BLAS found, Libraries: ${BLAS_LIBRARIES}")
|
|
13
13
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
target_link_libraries(ggml-blas PRIVATE ggml-base)
|
|
19
|
-
target_include_directories(ggml-blas PRIVATE . ..)
|
|
14
|
+
ggml_add_backend_library(ggml-blas
|
|
15
|
+
ggml-blas.cpp
|
|
16
|
+
)
|
|
20
17
|
|
|
21
18
|
if (${GGML_BLAS_VENDOR} MATCHES "Apple")
|
|
22
19
|
add_compile_definitions(ACCELERATE_NEW_LAPACK)
|
|
@@ -75,7 +72,6 @@ if (BLAS_FOUND)
|
|
|
75
72
|
|
|
76
73
|
message(STATUS "BLAS found, Includes: ${BLAS_INCLUDE_DIRS}")
|
|
77
74
|
|
|
78
|
-
#add_compile_options(${BLAS_LINKER_FLAGS})
|
|
79
75
|
target_compile_options(ggml-blas PRIVATE ${BLAS_LINKER_FLAGS})
|
|
80
76
|
|
|
81
77
|
if (${BLAS_INCLUDE_DIRS} MATCHES "mkl" AND (${GGML_BLAS_VENDOR} MATCHES "Generic" OR ${GGML_BLAS_VENDOR} MATCHES "Intel"))
|
|
@@ -506,9 +506,12 @@ static const struct ggml_backend_reg_i ggml_backend_blas_reg_i = {
|
|
|
506
506
|
|
|
507
507
|
ggml_backend_reg_t ggml_backend_blas_reg(void) {
|
|
508
508
|
static struct ggml_backend_reg ggml_backend_blas_reg = {
|
|
509
|
-
/* .
|
|
510
|
-
/* .
|
|
509
|
+
/* .api_version = */ GGML_BACKEND_API_VERSION,
|
|
510
|
+
/* .iface = */ ggml_backend_blas_reg_i,
|
|
511
|
+
/* .context = */ NULL,
|
|
511
512
|
};
|
|
512
513
|
|
|
513
514
|
return &ggml_backend_blas_reg;
|
|
514
515
|
}
|
|
516
|
+
|
|
517
|
+
GGML_BACKEND_DL_IMPL(ggml_backend_blas_reg)
|