@fugood/llama.node 0.3.6 → 0.3.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -2
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +3 -1
- package/lib/index.js +16 -1
- package/lib/index.ts +16 -0
- package/package.json +1 -1
- package/src/EmbeddingWorker.cpp +4 -3
- package/src/LlamaCompletionWorker.cpp +4 -2
- package/src/LlamaContext.cpp +61 -6
- package/src/LlamaContext.h +1 -0
- package/src/common.hpp +6 -11
- package/src/llama.cpp/.github/workflows/build.yml +19 -17
- package/src/llama.cpp/.github/workflows/docker.yml +77 -30
- package/src/llama.cpp/.github/workflows/editorconfig.yml +3 -1
- package/src/llama.cpp/.github/workflows/server.yml +22 -3
- package/src/llama.cpp/CMakeLists.txt +49 -24
- package/src/llama.cpp/common/arg.cpp +82 -26
- package/src/llama.cpp/common/arg.h +3 -0
- package/src/llama.cpp/common/common.cpp +192 -72
- package/src/llama.cpp/common/common.h +51 -18
- package/src/llama.cpp/common/ngram-cache.cpp +12 -12
- package/src/llama.cpp/common/ngram-cache.h +2 -2
- package/src/llama.cpp/common/sampling.cpp +11 -6
- package/src/llama.cpp/common/speculative.cpp +18 -15
- package/src/llama.cpp/docs/build.md +2 -0
- package/src/llama.cpp/examples/batched/batched.cpp +9 -7
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +3 -3
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +10 -8
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +11 -8
- package/src/llama.cpp/examples/cvector-generator/mean.hpp +1 -1
- package/src/llama.cpp/examples/cvector-generator/pca.hpp +1 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +8 -7
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +7 -6
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +8 -7
- package/src/llama.cpp/examples/gguf/gguf.cpp +10 -6
- package/src/llama.cpp/examples/gguf-hash/gguf-hash.cpp +1 -0
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +8 -7
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +13 -10
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +13 -12
- package/src/llama.cpp/examples/infill/infill.cpp +23 -24
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +44 -13
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +11 -6
- package/src/llama.cpp/examples/llava/clip.cpp +4 -2
- package/src/llama.cpp/examples/llava/llava-cli.cpp +9 -6
- package/src/llama.cpp/examples/llava/llava.cpp +2 -2
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +8 -4
- package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +11 -8
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +6 -7
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +4 -9
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +3 -7
- package/src/llama.cpp/examples/lookup/lookup.cpp +5 -6
- package/src/llama.cpp/examples/main/main.cpp +51 -29
- package/src/llama.cpp/examples/parallel/parallel.cpp +5 -6
- package/src/llama.cpp/examples/passkey/passkey.cpp +7 -5
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +37 -23
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +12 -14
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +8 -8
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +12 -0
- package/src/llama.cpp/examples/run/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +1351 -0
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +114 -0
- package/src/llama.cpp/examples/run/run.cpp +175 -61
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +4 -25
- package/src/llama.cpp/examples/server/CMakeLists.txt +1 -0
- package/src/llama.cpp/examples/server/httplib.h +1295 -409
- package/src/llama.cpp/examples/server/server.cpp +387 -181
- package/src/llama.cpp/examples/server/tests/requirements.txt +1 -0
- package/src/llama.cpp/examples/server/utils.hpp +170 -58
- package/src/llama.cpp/examples/simple/simple.cpp +9 -8
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +16 -12
- package/src/llama.cpp/examples/speculative/speculative.cpp +22 -23
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +8 -12
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +17 -5
- package/src/llama.cpp/examples/tts/tts.cpp +64 -23
- package/src/llama.cpp/ggml/CMakeLists.txt +5 -21
- package/src/llama.cpp/ggml/include/ggml-backend.h +2 -0
- package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -0
- package/src/llama.cpp/ggml/include/ggml.h +36 -145
- package/src/llama.cpp/ggml/include/gguf.h +202 -0
- package/src/llama.cpp/ggml/src/CMakeLists.txt +6 -3
- package/src/llama.cpp/ggml/src/ggml-alloc.c +5 -0
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +0 -1
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +79 -49
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +5 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +33 -23
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +57 -72
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +87 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +335 -66
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +10 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1090 -378
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +2 -2
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +3 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +3 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +3 -1
- package/src/llama.cpp/ggml/src/ggml-impl.h +11 -16
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +16 -0
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +6 -6
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +154 -35
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +9 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +18 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +40 -95
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +48 -48
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +24 -24
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +238 -164
- package/src/llama.cpp/ggml/src/ggml-sycl/gla.cpp +105 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/gla.hpp +8 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +3 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +7 -5
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +74 -4
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +314 -116
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -2
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +9 -3
- package/src/llama.cpp/ggml/src/ggml.c +117 -1327
- package/src/llama.cpp/ggml/src/gguf.cpp +1329 -0
- package/src/llama.cpp/include/llama-cpp.h +6 -1
- package/src/llama.cpp/include/llama.h +138 -75
- package/src/llama.cpp/src/CMakeLists.txt +13 -1
- package/src/llama.cpp/src/llama-adapter.cpp +347 -0
- package/src/llama.cpp/src/llama-adapter.h +74 -0
- package/src/llama.cpp/src/llama-arch.cpp +1487 -0
- package/src/llama.cpp/src/llama-arch.h +400 -0
- package/src/llama.cpp/src/llama-batch.cpp +368 -0
- package/src/llama.cpp/src/llama-batch.h +88 -0
- package/src/llama.cpp/src/llama-chat.cpp +578 -0
- package/src/llama.cpp/src/llama-chat.h +52 -0
- package/src/llama.cpp/src/llama-context.cpp +1775 -0
- package/src/llama.cpp/src/llama-context.h +128 -0
- package/src/llama.cpp/src/llama-cparams.cpp +1 -0
- package/src/llama.cpp/src/llama-cparams.h +37 -0
- package/src/llama.cpp/src/llama-grammar.cpp +5 -4
- package/src/llama.cpp/src/llama-grammar.h +3 -1
- package/src/llama.cpp/src/llama-hparams.cpp +71 -0
- package/src/llama.cpp/src/llama-hparams.h +139 -0
- package/src/llama.cpp/src/llama-impl.cpp +167 -0
- package/src/llama.cpp/src/llama-impl.h +16 -136
- package/src/llama.cpp/src/llama-kv-cache.cpp +718 -0
- package/src/llama.cpp/src/llama-kv-cache.h +218 -0
- package/src/llama.cpp/src/llama-mmap.cpp +589 -0
- package/src/llama.cpp/src/llama-mmap.h +67 -0
- package/src/llama.cpp/src/llama-model-loader.cpp +1124 -0
- package/src/llama.cpp/src/llama-model-loader.h +167 -0
- package/src/llama.cpp/src/llama-model.cpp +3953 -0
- package/src/llama.cpp/src/llama-model.h +370 -0
- package/src/llama.cpp/src/llama-quant.cpp +934 -0
- package/src/llama.cpp/src/llama-quant.h +1 -0
- package/src/llama.cpp/src/llama-sampling.cpp +147 -32
- package/src/llama.cpp/src/llama-sampling.h +3 -19
- package/src/llama.cpp/src/llama-vocab.cpp +1832 -575
- package/src/llama.cpp/src/llama-vocab.h +97 -142
- package/src/llama.cpp/src/llama.cpp +7160 -20314
- package/src/llama.cpp/src/unicode.cpp +8 -3
- package/src/llama.cpp/tests/CMakeLists.txt +2 -0
- package/src/llama.cpp/tests/test-autorelease.cpp +3 -3
- package/src/llama.cpp/tests/test-backend-ops.cpp +370 -59
- package/src/llama.cpp/tests/test-chat-template.cpp +162 -125
- package/src/llama.cpp/tests/test-gguf.cpp +222 -187
- package/src/llama.cpp/tests/test-model-load-cancel.cpp +1 -1
- package/src/llama.cpp/tests/test-sampling.cpp +0 -1
- package/src/llama.cpp/tests/test-tokenizer-0.cpp +4 -4
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +9 -7
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +8 -6
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
// This file contains functionality related to "GGUF" files, the binary file format used by ggml.
|
|
2
|
+
// GGUF files have the following structure:
|
|
3
|
+
//
|
|
4
|
+
// 1. File magic "GGUF" (4 bytes).
|
|
5
|
+
// 2. File version (uint32_t).
|
|
6
|
+
// 3. Number of ggml tensors in file (int64_t).
|
|
7
|
+
// 4. Number of key-value-pairs in file (int64_t).
|
|
8
|
+
// 5. For each KV pair:
|
|
9
|
+
// 1. The key (string).
|
|
10
|
+
// 2. The value type (gguf_type).
|
|
11
|
+
// 3a. If the value type is GGUF_TYPE_ARRAY:
|
|
12
|
+
// 1. The type of the array (gguf_type).
|
|
13
|
+
// 2. The number of elements in the array (uint64_t).
|
|
14
|
+
// 3. The binary representation of each element in the array.
|
|
15
|
+
// 3b. Otherwise:
|
|
16
|
+
// 1. The binary representation of the value.
|
|
17
|
+
// 6. For each ggml tensor:
|
|
18
|
+
// 1. The tensor name (string).
|
|
19
|
+
// 2. The number of dimensions of the tensor (uint32_t).
|
|
20
|
+
// 3. For each dimension:
|
|
21
|
+
// 1. The size of the tensor in the dimension (int64_t).
|
|
22
|
+
// 4. The tensor data type (ggml_type).
|
|
23
|
+
// 5. The tensor data offset in the tensor data binary blob (uint64_t).
|
|
24
|
+
// 7. The tensor data binary blob (optional, aligned).
|
|
25
|
+
//
|
|
26
|
+
// Strings are serialized as the string length (uint64_t) followed by the C string without the null terminator.
|
|
27
|
+
// All enums are stored as int32_t.
|
|
28
|
+
// All bool values are stored as int8_t.
|
|
29
|
+
// If the special key "general.alignment" (uint32_t) is defined it is used for alignment,
|
|
30
|
+
// otherwise GGUF_DEFAULT_ALIGNMENT is used.
|
|
31
|
+
//
|
|
32
|
+
// Module maintainer: Johannes Gäßler (@JohannesGaessler, johannesg@5d6.de)
|
|
33
|
+
|
|
34
|
+
#pragma once
|
|
35
|
+
|
|
36
|
+
#include "ggml.h"
|
|
37
|
+
|
|
38
|
+
#include <stdbool.h>
|
|
39
|
+
#include <stdint.h>
|
|
40
|
+
|
|
41
|
+
#define GGUF_MAGIC "GGUF"
|
|
42
|
+
#define GGUF_VERSION 3
|
|
43
|
+
|
|
44
|
+
#define GGUF_KEY_GENERAL_ALIGNMENT "general.alignment"
|
|
45
|
+
|
|
46
|
+
#define GGUF_DEFAULT_ALIGNMENT 32
|
|
47
|
+
|
|
48
|
+
#ifdef __cplusplus
|
|
49
|
+
extern "C" {
|
|
50
|
+
#endif
|
|
51
|
+
|
|
52
|
+
// types that can be stored as GGUF KV data
|
|
53
|
+
enum gguf_type {
|
|
54
|
+
GGUF_TYPE_UINT8 = 0,
|
|
55
|
+
GGUF_TYPE_INT8 = 1,
|
|
56
|
+
GGUF_TYPE_UINT16 = 2,
|
|
57
|
+
GGUF_TYPE_INT16 = 3,
|
|
58
|
+
GGUF_TYPE_UINT32 = 4,
|
|
59
|
+
GGUF_TYPE_INT32 = 5,
|
|
60
|
+
GGUF_TYPE_FLOAT32 = 6,
|
|
61
|
+
GGUF_TYPE_BOOL = 7,
|
|
62
|
+
GGUF_TYPE_STRING = 8,
|
|
63
|
+
GGUF_TYPE_ARRAY = 9,
|
|
64
|
+
GGUF_TYPE_UINT64 = 10,
|
|
65
|
+
GGUF_TYPE_INT64 = 11,
|
|
66
|
+
GGUF_TYPE_FLOAT64 = 12,
|
|
67
|
+
GGUF_TYPE_COUNT, // marks the end of the enum
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
struct gguf_context;
|
|
71
|
+
|
|
72
|
+
struct gguf_init_params {
|
|
73
|
+
bool no_alloc;
|
|
74
|
+
|
|
75
|
+
// if not NULL, create a ggml_context and allocate the tensor data in it
|
|
76
|
+
struct ggml_context ** ctx;
|
|
77
|
+
};
|
|
78
|
+
|
|
79
|
+
GGML_API struct gguf_context * gguf_init_empty(void);
|
|
80
|
+
GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params);
|
|
81
|
+
//GGML_API struct gguf_context * gguf_init_from_buffer(..);
|
|
82
|
+
|
|
83
|
+
GGML_API void gguf_free(struct gguf_context * ctx);
|
|
84
|
+
|
|
85
|
+
GGML_API const char * gguf_type_name(enum gguf_type type);
|
|
86
|
+
|
|
87
|
+
GGML_API uint32_t gguf_get_version (const struct gguf_context * ctx);
|
|
88
|
+
GGML_API size_t gguf_get_alignment (const struct gguf_context * ctx);
|
|
89
|
+
GGML_API size_t gguf_get_data_offset(const struct gguf_context * ctx);
|
|
90
|
+
|
|
91
|
+
GGML_API int64_t gguf_get_n_kv(const struct gguf_context * ctx);
|
|
92
|
+
GGML_API int64_t gguf_find_key(const struct gguf_context * ctx, const char * key); // returns -1 if key is not found
|
|
93
|
+
GGML_API const char * gguf_get_key (const struct gguf_context * ctx, int64_t key_id);
|
|
94
|
+
|
|
95
|
+
GGML_API enum gguf_type gguf_get_kv_type (const struct gguf_context * ctx, int64_t key_id);
|
|
96
|
+
GGML_API enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int64_t key_id);
|
|
97
|
+
|
|
98
|
+
// will abort if the wrong type is used for the key
|
|
99
|
+
GGML_API uint8_t gguf_get_val_u8 (const struct gguf_context * ctx, int64_t key_id);
|
|
100
|
+
GGML_API int8_t gguf_get_val_i8 (const struct gguf_context * ctx, int64_t key_id);
|
|
101
|
+
GGML_API uint16_t gguf_get_val_u16 (const struct gguf_context * ctx, int64_t key_id);
|
|
102
|
+
GGML_API int16_t gguf_get_val_i16 (const struct gguf_context * ctx, int64_t key_id);
|
|
103
|
+
GGML_API uint32_t gguf_get_val_u32 (const struct gguf_context * ctx, int64_t key_id);
|
|
104
|
+
GGML_API int32_t gguf_get_val_i32 (const struct gguf_context * ctx, int64_t key_id);
|
|
105
|
+
GGML_API float gguf_get_val_f32 (const struct gguf_context * ctx, int64_t key_id);
|
|
106
|
+
GGML_API uint64_t gguf_get_val_u64 (const struct gguf_context * ctx, int64_t key_id);
|
|
107
|
+
GGML_API int64_t gguf_get_val_i64 (const struct gguf_context * ctx, int64_t key_id);
|
|
108
|
+
GGML_API double gguf_get_val_f64 (const struct gguf_context * ctx, int64_t key_id);
|
|
109
|
+
GGML_API bool gguf_get_val_bool(const struct gguf_context * ctx, int64_t key_id);
|
|
110
|
+
GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int64_t key_id);
|
|
111
|
+
GGML_API const void * gguf_get_val_data(const struct gguf_context * ctx, int64_t key_id);
|
|
112
|
+
GGML_API size_t gguf_get_arr_n (const struct gguf_context * ctx, int64_t key_id);
|
|
113
|
+
|
|
114
|
+
// get raw pointer to the first element of the array with the given key_id
|
|
115
|
+
// for bool arrays, note that they are always stored as int8 on all platforms (usually this makes no difference)
|
|
116
|
+
GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int64_t key_id);
|
|
117
|
+
|
|
118
|
+
// get ith C string from array with given key_id
|
|
119
|
+
GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int64_t key_id, size_t i);
|
|
120
|
+
|
|
121
|
+
GGML_API int64_t gguf_get_n_tensors (const struct gguf_context * ctx);
|
|
122
|
+
GGML_API int64_t gguf_find_tensor (const struct gguf_context * ctx, const char * name); // returns -1 if the tensor is not found
|
|
123
|
+
GGML_API size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int64_t tensor_id);
|
|
124
|
+
GGML_API const char * gguf_get_tensor_name (const struct gguf_context * ctx, int64_t tensor_id);
|
|
125
|
+
GGML_API enum ggml_type gguf_get_tensor_type (const struct gguf_context * ctx, int64_t tensor_id);
|
|
126
|
+
GGML_API size_t gguf_get_tensor_size (const struct gguf_context * ctx, int64_t tensor_id);
|
|
127
|
+
|
|
128
|
+
// removes key if it exists, returns id that the key had prior to removal (-1 if it didn't exist)
|
|
129
|
+
GGML_API int64_t gguf_remove_key(struct gguf_context * ctx, const char * key);
|
|
130
|
+
|
|
131
|
+
// overrides an existing KV pair or adds a new one, the new KV pair is always at the back
|
|
132
|
+
GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val);
|
|
133
|
+
GGML_API void gguf_set_val_i8 (struct gguf_context * ctx, const char * key, int8_t val);
|
|
134
|
+
GGML_API void gguf_set_val_u16 (struct gguf_context * ctx, const char * key, uint16_t val);
|
|
135
|
+
GGML_API void gguf_set_val_i16 (struct gguf_context * ctx, const char * key, int16_t val);
|
|
136
|
+
GGML_API void gguf_set_val_u32 (struct gguf_context * ctx, const char * key, uint32_t val);
|
|
137
|
+
GGML_API void gguf_set_val_i32 (struct gguf_context * ctx, const char * key, int32_t val);
|
|
138
|
+
GGML_API void gguf_set_val_f32 (struct gguf_context * ctx, const char * key, float val);
|
|
139
|
+
GGML_API void gguf_set_val_u64 (struct gguf_context * ctx, const char * key, uint64_t val);
|
|
140
|
+
GGML_API void gguf_set_val_i64 (struct gguf_context * ctx, const char * key, int64_t val);
|
|
141
|
+
GGML_API void gguf_set_val_f64 (struct gguf_context * ctx, const char * key, double val);
|
|
142
|
+
GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val);
|
|
143
|
+
GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val);
|
|
144
|
+
|
|
145
|
+
// creates a new array with n elements of the given type and copies the corresponding number of bytes from data
|
|
146
|
+
GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, size_t n);
|
|
147
|
+
|
|
148
|
+
// creates a new array with n strings and copies the corresponding strings from data
|
|
149
|
+
GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, size_t n);
|
|
150
|
+
|
|
151
|
+
// set or add KV pairs from another context
|
|
152
|
+
GGML_API void gguf_set_kv(struct gguf_context * ctx, const struct gguf_context * src);
|
|
153
|
+
|
|
154
|
+
// add tensor to GGUF context, tensor name must be unique
|
|
155
|
+
GGML_API void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tensor);
|
|
156
|
+
|
|
157
|
+
// after changing a tensor's type, the offsets of all tensors with higher indices are immediately recalculated
|
|
158
|
+
// in such a way that the tensor data remains as one contiguous block (except for padding)
|
|
159
|
+
GGML_API void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type);
|
|
160
|
+
|
|
161
|
+
// assumes that at least gguf_get_tensor_size bytes can be read from data
|
|
162
|
+
GGML_API void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data);
|
|
163
|
+
|
|
164
|
+
// writing gguf files can be done in 3 ways:
|
|
165
|
+
//
|
|
166
|
+
// - write the entire gguf_context to a binary file in a single pass:
|
|
167
|
+
//
|
|
168
|
+
// gguf_write_to_file(ctx, fname, /*only_meta =*/ false);
|
|
169
|
+
//
|
|
170
|
+
// - write only the meta data to a file, then re-open the file and append the tensor data:
|
|
171
|
+
//
|
|
172
|
+
// gguf_write_to_file(ctx, fname, /*only_meta =*/ true);
|
|
173
|
+
// FILE * f = fopen(fname, "ab");
|
|
174
|
+
// fwrite(f, ...); // write tensor data
|
|
175
|
+
// fclose(f);
|
|
176
|
+
//
|
|
177
|
+
// - first prepare a file with a placeholder for the meta data, write the tensor data, then write the meta data:
|
|
178
|
+
//
|
|
179
|
+
// FILE * f = fopen(fname, "wb");
|
|
180
|
+
// const size_t size_meta = gguf_get_meta_size(ctx);
|
|
181
|
+
// fseek(f, size_meta, SEEK_SET);
|
|
182
|
+
// fwrite(f, ...); // write tensor data
|
|
183
|
+
// void * data = malloc(size_meta);
|
|
184
|
+
// gguf_get_meta_data(ctx, data);
|
|
185
|
+
// rewind(f);
|
|
186
|
+
// fwrite(data, 1, data, f);
|
|
187
|
+
// free(data);
|
|
188
|
+
// fclose(f);
|
|
189
|
+
//
|
|
190
|
+
|
|
191
|
+
// write the entire context to a binary file
|
|
192
|
+
GGML_API bool gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta);
|
|
193
|
+
|
|
194
|
+
// get the size in bytes of the meta data (header, kv pairs, tensor info) including padding
|
|
195
|
+
GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx);
|
|
196
|
+
|
|
197
|
+
// writes the meta data to pointer "data"
|
|
198
|
+
GGML_API void gguf_get_meta_data(const struct gguf_context * ctx, void * data);
|
|
199
|
+
|
|
200
|
+
#ifdef __cplusplus
|
|
201
|
+
}
|
|
202
|
+
#endif
|
|
@@ -208,6 +208,7 @@ add_library(ggml-base
|
|
|
208
208
|
../include/ggml-backend.h
|
|
209
209
|
../include/ggml-cpp.h
|
|
210
210
|
../include/ggml-opt.h
|
|
211
|
+
../include/gguf.h
|
|
211
212
|
ggml.c
|
|
212
213
|
ggml-alloc.c
|
|
213
214
|
ggml-backend.cpp
|
|
@@ -215,7 +216,8 @@ add_library(ggml-base
|
|
|
215
216
|
ggml-threading.cpp
|
|
216
217
|
ggml-threading.h
|
|
217
218
|
ggml-quants.c
|
|
218
|
-
ggml-quants.h
|
|
219
|
+
ggml-quants.h
|
|
220
|
+
gguf.cpp)
|
|
219
221
|
|
|
220
222
|
target_include_directories(ggml-base PRIVATE .)
|
|
221
223
|
|
|
@@ -234,6 +236,7 @@ function(ggml_add_backend_library backend)
|
|
|
234
236
|
# write the shared library to the output directory
|
|
235
237
|
set_target_properties(${backend} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
|
|
236
238
|
target_compile_definitions(${backend} PRIVATE GGML_BACKEND_DL)
|
|
239
|
+
add_dependencies(ggml ${backend})
|
|
237
240
|
else()
|
|
238
241
|
add_library(${backend} ${ARGN})
|
|
239
242
|
target_link_libraries(ggml PUBLIC ${backend})
|
|
@@ -289,9 +292,9 @@ if (GGML_CPU_ALL_VARIANTS)
|
|
|
289
292
|
ggml_add_cpu_backend_variant(haswell AVX F16C AVX2 FMA)
|
|
290
293
|
ggml_add_cpu_backend_variant(skylakex AVX F16C AVX2 FMA AVX512)
|
|
291
294
|
ggml_add_cpu_backend_variant(icelake AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
|
|
295
|
+
ggml_add_cpu_backend_variant(alderlake AVX F16C AVX2 FMA AVX_VNNI)
|
|
292
296
|
if (NOT MSVC)
|
|
293
|
-
# MSVC doesn't support
|
|
294
|
-
ggml_add_cpu_backend_variant(alderlake AVX F16C AVX2 FMA AVX_VNNI)
|
|
297
|
+
# MSVC doesn't support AMX
|
|
295
298
|
ggml_add_cpu_backend_variant(sapphirerapids AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
|
|
296
299
|
endif()
|
|
297
300
|
else ()
|
|
@@ -37,6 +37,7 @@ static bool ggml_are_same_layout(const struct ggml_tensor * a, const struct ggml
|
|
|
37
37
|
return true;
|
|
38
38
|
}
|
|
39
39
|
|
|
40
|
+
// ops that return true for this function must not use restrict pointers for their backend implementations
|
|
40
41
|
static bool ggml_op_can_inplace(enum ggml_op op) {
|
|
41
42
|
switch (op) {
|
|
42
43
|
case GGML_OP_SCALE:
|
|
@@ -52,8 +53,12 @@ static bool ggml_op_can_inplace(enum ggml_op op) {
|
|
|
52
53
|
case GGML_OP_LOG:
|
|
53
54
|
case GGML_OP_UNARY:
|
|
54
55
|
case GGML_OP_ROPE:
|
|
56
|
+
case GGML_OP_ROPE_BACK:
|
|
57
|
+
case GGML_OP_SILU_BACK:
|
|
55
58
|
case GGML_OP_RMS_NORM:
|
|
59
|
+
case GGML_OP_RMS_NORM_BACK:
|
|
56
60
|
case GGML_OP_SOFT_MAX:
|
|
61
|
+
case GGML_OP_SOFT_MAX_BACK:
|
|
57
62
|
return true;
|
|
58
63
|
|
|
59
64
|
default:
|
|
@@ -208,7 +208,6 @@ extern "C" {
|
|
|
208
208
|
|
|
209
209
|
// Internal backend registry API
|
|
210
210
|
GGML_API void ggml_backend_register(ggml_backend_reg_t reg);
|
|
211
|
-
GGML_API void ggml_backend_device_register(ggml_backend_dev_t device);
|
|
212
211
|
|
|
213
212
|
// Add backend dynamic loading support to the backend
|
|
214
213
|
|
|
@@ -66,6 +66,26 @@
|
|
|
66
66
|
#include "ggml-kompute.h"
|
|
67
67
|
#endif
|
|
68
68
|
|
|
69
|
+
// disable C++17 deprecation warning for std::codecvt_utf8
|
|
70
|
+
#if defined(__clang__)
|
|
71
|
+
# pragma clang diagnostic push
|
|
72
|
+
# pragma clang diagnostic ignored "-Wdeprecated-declarations"
|
|
73
|
+
#endif
|
|
74
|
+
|
|
75
|
+
static std::wstring utf8_to_utf16(const std::string & str) {
|
|
76
|
+
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
|
|
77
|
+
return converter.from_bytes(str);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
static std::string utf16_to_utf8(const std::wstring & str) {
|
|
81
|
+
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
|
|
82
|
+
return converter.to_bytes(str);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
#if defined(__clang__)
|
|
86
|
+
# pragma clang diagnostic pop
|
|
87
|
+
#endif
|
|
88
|
+
|
|
69
89
|
#ifdef _WIN32
|
|
70
90
|
|
|
71
91
|
using dl_handle = std::remove_pointer_t<HMODULE>;
|
|
@@ -88,11 +108,6 @@ static dl_handle * dl_load_library(const std::wstring & path) {
|
|
|
88
108
|
return handle;
|
|
89
109
|
}
|
|
90
110
|
|
|
91
|
-
static dl_handle * dl_load_library(const std::string & path) {
|
|
92
|
-
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
|
|
93
|
-
return dl_load_library(converter.from_bytes(path));
|
|
94
|
-
}
|
|
95
|
-
|
|
96
111
|
static void * dl_get_sym(dl_handle * handle, const char * name) {
|
|
97
112
|
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
|
|
98
113
|
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
|
|
@@ -114,8 +129,8 @@ struct dl_handle_deleter {
|
|
|
114
129
|
}
|
|
115
130
|
};
|
|
116
131
|
|
|
117
|
-
static void * dl_load_library(const std::
|
|
118
|
-
dl_handle * handle = dlopen(path.c_str(), RTLD_NOW | RTLD_LOCAL);
|
|
132
|
+
static void * dl_load_library(const std::wstring & path) {
|
|
133
|
+
dl_handle * handle = dlopen(utf16_to_utf8(path).c_str(), RTLD_NOW | RTLD_LOCAL);
|
|
119
134
|
|
|
120
135
|
return handle;
|
|
121
136
|
}
|
|
@@ -202,11 +217,11 @@ struct ggml_backend_registry {
|
|
|
202
217
|
devices.push_back(device);
|
|
203
218
|
}
|
|
204
219
|
|
|
205
|
-
ggml_backend_reg_t load_backend(const
|
|
220
|
+
ggml_backend_reg_t load_backend(const std::wstring & path, bool silent) {
|
|
206
221
|
dl_handle_ptr handle { dl_load_library(path) };
|
|
207
222
|
if (!handle) {
|
|
208
223
|
if (!silent) {
|
|
209
|
-
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path);
|
|
224
|
+
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(path).c_str());
|
|
210
225
|
}
|
|
211
226
|
return nullptr;
|
|
212
227
|
}
|
|
@@ -214,7 +229,7 @@ struct ggml_backend_registry {
|
|
|
214
229
|
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
|
|
215
230
|
if (score_fn && score_fn() == 0) {
|
|
216
231
|
if (!silent) {
|
|
217
|
-
GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, path);
|
|
232
|
+
GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, utf16_to_utf8(path).c_str());
|
|
218
233
|
}
|
|
219
234
|
return nullptr;
|
|
220
235
|
}
|
|
@@ -222,7 +237,7 @@ struct ggml_backend_registry {
|
|
|
222
237
|
auto backend_init_fn = (ggml_backend_init_t) dl_get_sym(handle.get(), "ggml_backend_init");
|
|
223
238
|
if (!backend_init_fn) {
|
|
224
239
|
if (!silent) {
|
|
225
|
-
GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, path);
|
|
240
|
+
GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, utf16_to_utf8(path).c_str());
|
|
226
241
|
}
|
|
227
242
|
return nullptr;
|
|
228
243
|
}
|
|
@@ -231,16 +246,16 @@ struct ggml_backend_registry {
|
|
|
231
246
|
if (!reg || reg->api_version != GGML_BACKEND_API_VERSION) {
|
|
232
247
|
if (!silent) {
|
|
233
248
|
if (!reg) {
|
|
234
|
-
GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, path);
|
|
249
|
+
GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, utf16_to_utf8(path).c_str());
|
|
235
250
|
} else {
|
|
236
251
|
GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
|
|
237
|
-
__func__, path, reg->api_version, GGML_BACKEND_API_VERSION);
|
|
252
|
+
__func__, utf16_to_utf8(path).c_str(), reg->api_version, GGML_BACKEND_API_VERSION);
|
|
238
253
|
}
|
|
239
254
|
}
|
|
240
255
|
return nullptr;
|
|
241
256
|
}
|
|
242
257
|
|
|
243
|
-
GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), path);
|
|
258
|
+
GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), utf16_to_utf8(path).c_str());
|
|
244
259
|
|
|
245
260
|
register_backend(reg, std::move(handle));
|
|
246
261
|
|
|
@@ -376,14 +391,14 @@ ggml_backend_t ggml_backend_init_best(void) {
|
|
|
376
391
|
|
|
377
392
|
// Dynamic loading
|
|
378
393
|
ggml_backend_reg_t ggml_backend_load(const char * path) {
|
|
379
|
-
return get_reg().load_backend(path, false);
|
|
394
|
+
return get_reg().load_backend(utf8_to_utf16(path), false);
|
|
380
395
|
}
|
|
381
396
|
|
|
382
397
|
void ggml_backend_unload(ggml_backend_reg_t reg) {
|
|
383
398
|
get_reg().unload_backend(reg, true);
|
|
384
399
|
}
|
|
385
400
|
|
|
386
|
-
static std::
|
|
401
|
+
static std::wstring get_executable_path() {
|
|
387
402
|
#if defined(__APPLE__)
|
|
388
403
|
// get executable path
|
|
389
404
|
std::vector<char> path;
|
|
@@ -401,13 +416,17 @@ static std::string get_executable_path() {
|
|
|
401
416
|
if (last_slash != std::string::npos) {
|
|
402
417
|
base_path = base_path.substr(0, last_slash);
|
|
403
418
|
}
|
|
404
|
-
return base_path + "/";
|
|
405
|
-
#elif defined(__linux__)
|
|
419
|
+
return utf8_to_utf16(base_path + "/");
|
|
420
|
+
#elif defined(__linux__) || defined(__FreeBSD__)
|
|
406
421
|
std::string base_path = ".";
|
|
407
422
|
std::vector<char> path(1024);
|
|
408
423
|
while (true) {
|
|
409
424
|
// get executable path
|
|
425
|
+
# if defined(__linux__)
|
|
410
426
|
ssize_t len = readlink("/proc/self/exe", path.data(), path.size());
|
|
427
|
+
# elif defined(__FreeBSD__)
|
|
428
|
+
ssize_t len = readlink("/proc/curproc/file", path.data(), path.size());
|
|
429
|
+
# endif
|
|
411
430
|
if (len == -1) {
|
|
412
431
|
break;
|
|
413
432
|
}
|
|
@@ -423,57 +442,63 @@ static std::string get_executable_path() {
|
|
|
423
442
|
path.resize(path.size() * 2);
|
|
424
443
|
}
|
|
425
444
|
|
|
426
|
-
return base_path + "/";
|
|
445
|
+
return utf8_to_utf16(base_path + "/");
|
|
427
446
|
#elif defined(_WIN32)
|
|
428
|
-
std::vector<
|
|
429
|
-
DWORD len =
|
|
447
|
+
std::vector<wchar_t> path(MAX_PATH);
|
|
448
|
+
DWORD len = GetModuleFileNameW(NULL, path.data(), path.size());
|
|
430
449
|
if (len == 0) {
|
|
431
|
-
return
|
|
450
|
+
return {};
|
|
432
451
|
}
|
|
433
|
-
std::
|
|
452
|
+
std::wstring base_path(path.data(), len);
|
|
434
453
|
// remove executable name
|
|
435
454
|
auto last_slash = base_path.find_last_of('\\');
|
|
436
455
|
if (last_slash != std::string::npos) {
|
|
437
456
|
base_path = base_path.substr(0, last_slash);
|
|
438
457
|
}
|
|
439
|
-
return base_path + "\\";
|
|
458
|
+
return base_path + L"\\";
|
|
459
|
+
#else
|
|
460
|
+
return {};
|
|
440
461
|
#endif
|
|
441
462
|
}
|
|
442
463
|
|
|
443
|
-
static std::
|
|
464
|
+
static std::wstring backend_filename_prefix() {
|
|
444
465
|
#ifdef _WIN32
|
|
445
|
-
return "ggml-";
|
|
466
|
+
return L"ggml-";
|
|
446
467
|
#else
|
|
447
|
-
return "libggml-";
|
|
468
|
+
return L"libggml-";
|
|
448
469
|
#endif
|
|
449
470
|
}
|
|
450
471
|
|
|
451
|
-
static std::
|
|
472
|
+
static std::wstring backend_filename_suffix() {
|
|
452
473
|
#ifdef _WIN32
|
|
453
|
-
return ".dll";
|
|
474
|
+
return L".dll";
|
|
454
475
|
#else
|
|
455
|
-
return ".so";
|
|
476
|
+
return L".so";
|
|
477
|
+
#endif
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
static std::wstring path_separator() {
|
|
481
|
+
#ifdef _WIN32
|
|
482
|
+
return L"\\";
|
|
483
|
+
#else
|
|
484
|
+
return L"/";
|
|
456
485
|
#endif
|
|
457
486
|
}
|
|
458
487
|
|
|
459
488
|
static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
|
|
460
489
|
// enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
|
|
461
490
|
// TODO: search system paths
|
|
462
|
-
std::
|
|
463
|
-
std::vector<std::
|
|
491
|
+
std::wstring file_prefix = backend_filename_prefix() + utf8_to_utf16(name) + L"-";
|
|
492
|
+
std::vector<std::wstring> search_paths;
|
|
464
493
|
if (user_search_path == nullptr) {
|
|
465
|
-
search_paths.push_back("
|
|
494
|
+
search_paths.push_back(L"." + path_separator());
|
|
466
495
|
search_paths.push_back(get_executable_path());
|
|
467
496
|
} else {
|
|
468
|
-
|
|
469
|
-
search_paths.push_back(std::string(user_search_path) + "\\");
|
|
470
|
-
#else
|
|
471
|
-
search_paths.push_back(std::string(user_search_path) + "/");
|
|
472
|
-
#endif
|
|
497
|
+
search_paths.push_back(utf8_to_utf16(user_search_path) + path_separator());
|
|
473
498
|
}
|
|
474
499
|
|
|
475
500
|
int best_score = 0;
|
|
476
|
-
std::
|
|
501
|
+
std::wstring best_path;
|
|
477
502
|
|
|
478
503
|
namespace fs = std::filesystem;
|
|
479
504
|
for (const auto & search_path : search_paths) {
|
|
@@ -483,27 +508,27 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
|
|
|
483
508
|
fs::directory_iterator dir_it(search_path, fs::directory_options::skip_permission_denied);
|
|
484
509
|
for (const auto & entry : dir_it) {
|
|
485
510
|
if (entry.is_regular_file()) {
|
|
486
|
-
std::
|
|
487
|
-
std::
|
|
511
|
+
std::wstring filename = entry.path().filename().wstring();
|
|
512
|
+
std::wstring ext = entry.path().extension().wstring();
|
|
488
513
|
if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
|
|
489
|
-
dl_handle_ptr handle { dl_load_library(entry.path().
|
|
514
|
+
dl_handle_ptr handle { dl_load_library(entry.path().wstring()) };
|
|
490
515
|
if (!handle && !silent) {
|
|
491
|
-
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, entry.path().
|
|
516
|
+
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
|
|
492
517
|
}
|
|
493
518
|
if (handle) {
|
|
494
519
|
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
|
|
495
520
|
if (score_fn) {
|
|
496
521
|
int s = score_fn();
|
|
497
522
|
#ifndef NDEBUG
|
|
498
|
-
GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, entry.path().
|
|
523
|
+
GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), s);
|
|
499
524
|
#endif
|
|
500
525
|
if (s > best_score) {
|
|
501
526
|
best_score = s;
|
|
502
|
-
best_path = entry.path().
|
|
527
|
+
best_path = entry.path().wstring();
|
|
503
528
|
}
|
|
504
529
|
} else {
|
|
505
530
|
if (!silent) {
|
|
506
|
-
GGML_LOG_INFO("%s: failed to find ggml_backend_score in %s\n", __func__, entry.path().
|
|
531
|
+
GGML_LOG_INFO("%s: failed to find ggml_backend_score in %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str());
|
|
507
532
|
}
|
|
508
533
|
}
|
|
509
534
|
}
|
|
@@ -515,15 +540,15 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
|
|
|
515
540
|
if (best_score == 0) {
|
|
516
541
|
// try to load the base backend
|
|
517
542
|
for (const auto & search_path : search_paths) {
|
|
518
|
-
std::
|
|
543
|
+
std::wstring path = search_path + backend_filename_prefix() + utf8_to_utf16(name) + backend_filename_suffix();
|
|
519
544
|
if (fs::exists(path)) {
|
|
520
|
-
return get_reg().load_backend(path
|
|
545
|
+
return get_reg().load_backend(path, silent);
|
|
521
546
|
}
|
|
522
547
|
}
|
|
523
548
|
return nullptr;
|
|
524
549
|
}
|
|
525
550
|
|
|
526
|
-
return get_reg().load_backend(best_path
|
|
551
|
+
return get_reg().load_backend(best_path, silent);
|
|
527
552
|
}
|
|
528
553
|
|
|
529
554
|
void ggml_backend_load_all() {
|
|
@@ -549,4 +574,9 @@ void ggml_backend_load_all_from_path(const char * dir_path) {
|
|
|
549
574
|
ggml_backend_load_best("opencl", silent, dir_path);
|
|
550
575
|
ggml_backend_load_best("musa", silent, dir_path);
|
|
551
576
|
ggml_backend_load_best("cpu", silent, dir_path);
|
|
577
|
+
// check the environment variable GGML_BACKEND_PATH to load an out-of-tree backend
|
|
578
|
+
const char * backend_path = std::getenv("GGML_BACKEND_PATH");
|
|
579
|
+
if (backend_path) {
|
|
580
|
+
ggml_backend_load(backend_path);
|
|
581
|
+
}
|
|
552
582
|
}
|
|
@@ -764,7 +764,7 @@ static int ggml_backend_sched_backend_id_from_cur(ggml_backend_sched_t sched, st
|
|
|
764
764
|
if (tensor->op != GGML_OP_ROPE && src->buffer != NULL && src->buffer->usage == GGML_BACKEND_BUFFER_USAGE_WEIGHTS) {
|
|
765
765
|
int src_backend_id = ggml_backend_sched_backend_from_buffer(sched, src, tensor);
|
|
766
766
|
// check if a backend with higher prio wants to offload the op
|
|
767
|
-
if (src_backend_id == sched->n_backends - 1) {
|
|
767
|
+
if (src_backend_id == sched->n_backends - 1 && ggml_backend_buffer_is_host(src->buffer)) {
|
|
768
768
|
for (int b = 0; b < src_backend_id; b++) {
|
|
769
769
|
if (ggml_backend_supports_op(sched->backends[b], tensor) && ggml_backend_offload_op(sched->backends[b], tensor)) {
|
|
770
770
|
SET_CAUSE(tensor, "1.off");
|
|
@@ -795,9 +795,12 @@ static void ggml_backend_sched_print_assignments(ggml_backend_sched_t sched, str
|
|
|
795
795
|
for (int i = 0; i < graph->n_nodes; i++) {
|
|
796
796
|
if (cur_split < sched->n_splits && i == sched->splits[cur_split].i_start) {
|
|
797
797
|
ggml_backend_t split_backend = sched->backends[sched->splits[cur_split].backend_id];
|
|
798
|
-
GGML_LOG_DEBUG("\n## SPLIT #%d: %s # %d inputs
|
|
798
|
+
GGML_LOG_DEBUG("\n## SPLIT #%d: %s # %d inputs", cur_split, ggml_backend_name(split_backend),
|
|
799
799
|
sched->splits[cur_split].n_inputs);
|
|
800
800
|
for (int j = 0; j < sched->splits[cur_split].n_inputs; j++) {
|
|
801
|
+
if (j == 0) {
|
|
802
|
+
GGML_LOG_DEBUG(": ");
|
|
803
|
+
}
|
|
801
804
|
GGML_LOG_DEBUG("[%s (%5.5s)] ", sched->splits[cur_split].inputs[j]->name,
|
|
802
805
|
fmt_size(ggml_nbytes(sched->splits[cur_split].inputs[j])));
|
|
803
806
|
}
|