@fugood/llama.node 0.3.3 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +5 -0
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +18 -1
- package/package.json +1 -1
- package/src/EmbeddingWorker.cpp +15 -5
- package/src/EmbeddingWorker.h +2 -1
- package/src/LlamaCompletionWorker.cpp +1 -1
- package/src/LlamaContext.cpp +81 -18
- package/src/LlamaContext.h +2 -0
- package/src/llama.cpp/.github/workflows/build.yml +197 -159
- package/src/llama.cpp/.github/workflows/docker.yml +5 -8
- package/src/llama.cpp/.github/workflows/python-lint.yml +8 -1
- package/src/llama.cpp/.github/workflows/server.yml +21 -14
- package/src/llama.cpp/CMakeLists.txt +11 -6
- package/src/llama.cpp/Sources/llama/llama.h +4 -0
- package/src/llama.cpp/cmake/common.cmake +33 -0
- package/src/llama.cpp/cmake/x64-windows-llvm.cmake +11 -0
- package/src/llama.cpp/common/CMakeLists.txt +6 -2
- package/src/llama.cpp/common/arg.cpp +426 -245
- package/src/llama.cpp/common/common.cpp +143 -80
- package/src/llama.cpp/common/common.h +81 -24
- package/src/llama.cpp/common/sampling.cpp +53 -19
- package/src/llama.cpp/common/sampling.h +22 -1
- package/src/llama.cpp/common/speculative.cpp +274 -0
- package/src/llama.cpp/common/speculative.h +28 -0
- package/src/llama.cpp/docs/build.md +101 -148
- package/src/llama.cpp/examples/CMakeLists.txt +32 -13
- package/src/llama.cpp/examples/batched/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/batched/batched.cpp +5 -4
- package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/cvector-generator/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +1 -1
- package/src/llama.cpp/examples/embedding/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +3 -2
- package/src/llama.cpp/examples/export-lora/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +4 -7
- package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gguf/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +8 -1
- package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +2 -2
- package/src/llama.cpp/examples/gritlm/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
- package/src/llama.cpp/examples/imatrix/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +11 -2
- package/src/llama.cpp/examples/infill/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/infill/infill.cpp +1 -1
- package/src/llama.cpp/examples/llama-bench/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +405 -316
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
- package/src/llama.cpp/examples/llava/CMakeLists.txt +10 -3
- package/src/llama.cpp/examples/llava/clip.cpp +262 -66
- package/src/llama.cpp/examples/llava/clip.h +8 -2
- package/src/llama.cpp/examples/llava/llava-cli.cpp +1 -1
- package/src/llama.cpp/examples/llava/llava.cpp +46 -19
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +1 -1
- package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +581 -0
- package/src/llama.cpp/examples/lookahead/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +1 -1
- package/src/llama.cpp/examples/lookup/CMakeLists.txt +4 -4
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +2 -1
- package/src/llama.cpp/examples/lookup/lookup.cpp +2 -2
- package/src/llama.cpp/examples/main/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/main/main.cpp +9 -5
- package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/parallel/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/parallel/parallel.cpp +1 -1
- package/src/llama.cpp/examples/passkey/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/perplexity/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize/quantize.cpp +0 -3
- package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/retrieval/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +4 -4
- package/src/llama.cpp/examples/run/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/run/run.cpp +911 -0
- package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +4 -4
- package/src/llama.cpp/examples/server/CMakeLists.txt +3 -7
- package/src/llama.cpp/examples/server/server.cpp +1758 -886
- package/src/llama.cpp/examples/server/tests/requirements.txt +2 -2
- package/src/llama.cpp/examples/server/utils.hpp +94 -304
- package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/simple/simple.cpp +4 -0
- package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +3 -0
- package/src/llama.cpp/examples/speculative/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/speculative/speculative.cpp +16 -15
- package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +265 -0
- package/src/llama.cpp/examples/tokenize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +1 -1
- package/src/llama.cpp/examples/tts/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/tts/tts.cpp +932 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +46 -34
- package/src/llama.cpp/ggml/include/ggml-backend.h +16 -0
- package/src/llama.cpp/ggml/include/ggml-cpu.h +7 -49
- package/src/llama.cpp/ggml/include/ggml-opencl.h +26 -0
- package/src/llama.cpp/ggml/include/ggml.h +106 -24
- package/src/llama.cpp/ggml/src/CMakeLists.txt +73 -24
- package/src/llama.cpp/ggml/src/ggml-alloc.c +0 -1
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +51 -11
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +379 -22
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +3 -7
- package/src/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +5 -2
- package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +33 -3
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +456 -111
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +6 -3
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +95 -35
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -5
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +22 -9
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +24 -13
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +23 -13
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +17 -0
- package/src/llama.cpp/ggml/src/ggml-common.h +42 -42
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +288 -213
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
- package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/common.h +19 -22
- package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/mmq.cpp +93 -92
- package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/mmq.h +2 -9
- package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-aarch64.c → ggml-cpu-aarch64.cpp} +892 -190
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +2 -24
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +15 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +38 -25
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +552 -399
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +101 -136
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +2 -2
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +7 -10
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +8 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +4 -6
- package/src/llama.cpp/ggml/src/ggml-impl.h +32 -11
- package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +13 -9
- package/src/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +131 -64
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +3 -6
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +39 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +14 -7
- package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +147 -0
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +4004 -0
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +67 -80
- package/src/llama.cpp/ggml/src/ggml-quants.c +0 -9
- package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +3 -5
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +5 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +13 -10
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +2 -11
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +2 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +5 -5
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +32 -13
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +80 -61
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +159 -114
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +6 -6
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +6 -20
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +4 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +8 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +4 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +7 -7
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +4 -1
- package/src/llama.cpp/ggml/src/ggml-threading.h +4 -2
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +21 -7
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1718 -399
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +3 -1
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +105 -31
- package/src/llama.cpp/ggml/src/ggml.c +367 -207
- package/src/llama.cpp/include/llama-cpp.h +25 -0
- package/src/llama.cpp/include/llama.h +26 -19
- package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +46 -0
- package/src/llama.cpp/pocs/CMakeLists.txt +3 -1
- package/src/llama.cpp/pocs/vdot/CMakeLists.txt +2 -2
- package/src/llama.cpp/src/CMakeLists.txt +2 -7
- package/src/llama.cpp/src/llama-grammar.cpp +15 -15
- package/src/llama.cpp/src/llama-grammar.h +2 -5
- package/src/llama.cpp/src/llama-sampling.cpp +35 -90
- package/src/llama.cpp/src/llama-vocab.cpp +6 -1
- package/src/llama.cpp/src/llama.cpp +1748 -640
- package/src/llama.cpp/src/unicode.cpp +62 -51
- package/src/llama.cpp/src/unicode.h +9 -10
- package/src/llama.cpp/tests/CMakeLists.txt +48 -37
- package/src/llama.cpp/tests/test-arg-parser.cpp +2 -2
- package/src/llama.cpp/tests/test-backend-ops.cpp +140 -21
- package/src/llama.cpp/tests/test-chat-template.cpp +50 -4
- package/src/llama.cpp/tests/test-gguf.cpp +1303 -0
- package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -6
- package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -4
- package/src/llama.cpp/tests/test-quantize-fns.cpp +3 -3
- package/src/llama.cpp/tests/test-rope.cpp +61 -20
- package/src/llama.cpp/tests/test-sampling.cpp +2 -2
- package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +0 -72
- package/src/llama.cpp/.github/workflows/nix-ci.yml +0 -79
- package/src/llama.cpp/.github/workflows/nix-flake-update.yml +0 -22
- package/src/llama.cpp/.github/workflows/nix-publish-flake.yml +0 -36
- package/src/llama.cpp/ggml/include/ggml-amx.h +0 -25
- package/src/llama.cpp/ggml/src/ggml-aarch64.c +0 -129
- package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -19
- package/src/llama.cpp/ggml/src/ggml-amx/CMakeLists.txt +0 -107
- package/src/llama.cpp/ggml/src/ggml-amx/ggml-amx.cpp +0 -446
|
@@ -2,11 +2,18 @@
|
|
|
2
2
|
#include "ggml-backend-impl.h"
|
|
3
3
|
#include "ggml-cpu.h"
|
|
4
4
|
#include "ggml-cpu-aarch64.h"
|
|
5
|
+
#include "ggml-cpu-traits.h"
|
|
5
6
|
#include "ggml-impl.h"
|
|
7
|
+
#include "amx/amx.h"
|
|
8
|
+
|
|
6
9
|
#include <cctype>
|
|
7
10
|
#include <string>
|
|
8
11
|
#include <vector>
|
|
9
12
|
|
|
13
|
+
#ifdef GGML_USE_CPU_HBM
|
|
14
|
+
#include "ggml-cpu-hbm.h"
|
|
15
|
+
#endif
|
|
16
|
+
|
|
10
17
|
#if defined(__APPLE__)
|
|
11
18
|
#include <sys/types.h>
|
|
12
19
|
#include <sys/sysctl.h>
|
|
@@ -22,124 +29,20 @@
|
|
|
22
29
|
|
|
23
30
|
// ggml-backend interface
|
|
24
31
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
// buffer type HBM
|
|
28
|
-
|
|
29
|
-
#include <hbwmalloc.h>
|
|
30
|
-
|
|
31
|
-
static const char * ggml_backend_cpu_hbm_buffer_type_get_name(ggml_backend_buffer_type_t buft) {
|
|
32
|
-
return "CPU_HBM";
|
|
33
|
-
|
|
34
|
-
GGML_UNUSED(buft);
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
static void ggml_backend_cpu_hbm_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
|
38
|
-
hbw_free(buffer->context);
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
static ggml_backend_buffer_t ggml_backend_cpu_hbm_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
|
|
42
|
-
void * ptr;
|
|
43
|
-
int result = hbw_posix_memalign(&ptr, ggml_backend_cpu_buffer_type_get_alignment(buft), size);
|
|
44
|
-
if (result != 0) {
|
|
45
|
-
GGML_LOG_ERROR("failed to allocate HBM buffer of size %zu\n", size);
|
|
46
|
-
return NULL;
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
ggml_backend_buffer_t buffer = ggml_backend_cpu_buffer_from_ptr(ptr, size);
|
|
50
|
-
buffer->buft = buft;
|
|
51
|
-
buffer->iface.free_buffer = ggml_backend_cpu_hbm_buffer_free_buffer;
|
|
52
|
-
|
|
53
|
-
return buffer;
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void) {
|
|
57
|
-
static struct ggml_backend_buffer_type ggml_backend_cpu_buffer_type_hbm = {
|
|
58
|
-
/* .iface = */ {
|
|
59
|
-
/* .get_name = */ ggml_backend_cpu_hbm_buffer_type_get_name,
|
|
60
|
-
/* .alloc_buffer = */ ggml_backend_cpu_hbm_buffer_type_alloc_buffer,
|
|
61
|
-
/* .get_alignment = */ ggml_backend_cpu_buffer_type_get_alignment,
|
|
62
|
-
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
|
|
63
|
-
/* .get_alloc_size = */ NULL, // defaults to ggml_nbytes
|
|
64
|
-
/* .is_host = */ ggml_backend_cpu_buffer_type_is_host,
|
|
65
|
-
},
|
|
66
|
-
/* .context = */ NULL,
|
|
67
|
-
};
|
|
68
|
-
|
|
69
|
-
return &ggml_backend_cpu_buffer_type_hbm;
|
|
70
|
-
}
|
|
71
|
-
#endif
|
|
72
|
-
|
|
73
|
-
// buffer type AARCH64
|
|
74
|
-
|
|
75
|
-
static void ggml_backend_cpu_aarch64_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
|
|
76
|
-
tensor->extra = (void *)ggml_aarch64_get_optimal_repack_type(tensor); // NOLINT
|
|
77
|
-
|
|
78
|
-
GGML_UNUSED(buffer);
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
static void ggml_backend_cpu_aarch64_buffer_set_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
|
82
|
-
GGML_ASSERT(offset == 0);
|
|
83
|
-
GGML_ASSERT(size == ggml_nbytes(tensor));
|
|
84
|
-
|
|
85
|
-
enum ggml_type repack_type = (enum ggml_type)(intptr_t)tensor->extra;
|
|
86
|
-
|
|
87
|
-
ggml_aarch64_repack_tensor(tensor, repack_type, data, size);
|
|
88
|
-
|
|
89
|
-
GGML_UNUSED(buffer);
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
static const char * ggml_backend_cpu_aarch64_buffer_type_get_name(ggml_backend_buffer_type_t buft) {
|
|
93
|
-
return "CPU_AARCH64";
|
|
94
|
-
|
|
95
|
-
GGML_UNUSED(buft);
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
static ggml_backend_buffer_t ggml_backend_cpu_aarch64_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
|
|
99
|
-
auto * buffer = ggml_backend_buft_alloc_buffer(ggml_backend_cpu_buffer_type(), size);
|
|
100
|
-
|
|
101
|
-
if (buffer == NULL) {
|
|
102
|
-
return NULL;
|
|
103
|
-
}
|
|
104
|
-
|
|
105
|
-
buffer->buft = buft;
|
|
106
|
-
buffer->iface.init_tensor = ggml_backend_cpu_aarch64_buffer_init_tensor;
|
|
107
|
-
buffer->iface.set_tensor = ggml_backend_cpu_aarch64_buffer_set_tensor;
|
|
108
|
-
|
|
109
|
-
return buffer;
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
ggml_backend_buffer_type_t ggml_backend_cpu_aarch64_buffer_type(void) {
|
|
113
|
-
static struct ggml_backend_buffer_type ggml_backend_cpu_buffer_type_aarch64 = {
|
|
114
|
-
/* .iface = */ {
|
|
115
|
-
/* .get_name = */ ggml_backend_cpu_aarch64_buffer_type_get_name,
|
|
116
|
-
/* .alloc_buffer = */ ggml_backend_cpu_aarch64_buffer_type_alloc_buffer,
|
|
117
|
-
/* .get_alignment = */ ggml_backend_cpu_buffer_type()->iface.get_alignment,
|
|
118
|
-
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
|
|
119
|
-
/* .get_alloc_size = */ NULL, // defaults to ggml_nbytes
|
|
120
|
-
/* .is_host = */ NULL,
|
|
121
|
-
},
|
|
122
|
-
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_cpu_reg(), 0),
|
|
123
|
-
/* .context = */ NULL,
|
|
124
|
-
};
|
|
125
|
-
|
|
126
|
-
return &ggml_backend_cpu_buffer_type_aarch64;
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
bool ggml_backend_cpu_buft_is_aarch64(ggml_backend_buffer_type_t buft) {
|
|
130
|
-
return buft == ggml_backend_cpu_aarch64_buffer_type();
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
static ggml_backend_buffer_type_t * ggml_backend_cpu_get_extra_bufts(ggml_backend_dev_t device) {
|
|
32
|
+
std::vector<ggml_backend_buffer_type_t>& ggml_backend_cpu_get_extra_buffers_type() {
|
|
134
33
|
static std::vector<ggml_backend_buffer_type_t> bufts = []() {
|
|
135
34
|
std::vector<ggml_backend_buffer_type_t> bufts;
|
|
136
35
|
|
|
137
|
-
#
|
|
138
|
-
|
|
36
|
+
#if defined(__AMX_INT8__) && defined(__AVX512VNNI__)
|
|
37
|
+
if (ggml_backend_amx_buffer_type()) {
|
|
38
|
+
bufts.push_back(ggml_backend_amx_buffer_type());
|
|
39
|
+
}
|
|
139
40
|
#endif
|
|
140
41
|
|
|
141
42
|
#ifdef GGML_USE_CPU_AARCH64
|
|
142
|
-
|
|
43
|
+
if (ggml_backend_cpu_aarch64_buffer_type()) {
|
|
44
|
+
bufts.push_back(ggml_backend_cpu_aarch64_buffer_type());
|
|
45
|
+
}
|
|
143
46
|
#endif
|
|
144
47
|
|
|
145
48
|
bufts.push_back(NULL);
|
|
@@ -147,11 +50,22 @@ static ggml_backend_buffer_type_t * ggml_backend_cpu_get_extra_bufts(ggml_backen
|
|
|
147
50
|
return bufts;
|
|
148
51
|
}();
|
|
149
52
|
|
|
150
|
-
return bufts
|
|
53
|
+
return bufts;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
static ggml_backend_buffer_type_t * ggml_backend_cpu_device_get_extra_buffers_type(ggml_backend_dev_t device) {
|
|
57
|
+
return ggml_backend_cpu_get_extra_buffers_type().data();
|
|
151
58
|
|
|
152
59
|
GGML_UNUSED(device);
|
|
153
60
|
}
|
|
154
61
|
|
|
62
|
+
static bool ggml_backend_cpu_is_extra_buffer_type(ggml_backend_buffer_type_t buft) {
|
|
63
|
+
for (auto extra : ggml_backend_cpu_get_extra_buffers_type()) {
|
|
64
|
+
if (extra && extra == buft) return true;
|
|
65
|
+
}
|
|
66
|
+
return false;
|
|
67
|
+
}
|
|
68
|
+
|
|
155
69
|
// CPU backend - backend (stream)
|
|
156
70
|
|
|
157
71
|
struct ggml_backend_cpu_context {
|
|
@@ -456,14 +370,23 @@ static bool ggml_backend_cpu_device_supports_op(ggml_backend_dev_t dev, const st
|
|
|
456
370
|
const struct ggml_tensor * src0 = op->src[0];
|
|
457
371
|
const struct ggml_tensor * src1 = op->src[1];
|
|
458
372
|
|
|
459
|
-
if (
|
|
460
|
-
|
|
461
|
-
|
|
373
|
+
if (op->op == GGML_OP_NONE || op->op == GGML_OP_RESHAPE || op->op == GGML_OP_VIEW || op->op == GGML_OP_PERMUTE || op->op == GGML_OP_TRANSPOSE) {
|
|
374
|
+
return true;
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
// extra_buffer_op?
|
|
378
|
+
for (auto extra : ggml_backend_cpu_get_extra_buffers_type()) {
|
|
379
|
+
if (extra) {
|
|
380
|
+
auto buf_extra = (ggml::cpu::extra_buffer_type*) extra->context;
|
|
381
|
+
if (buf_extra && buf_extra->supports_op(dev, op)) {
|
|
382
|
+
return true;
|
|
383
|
+
}
|
|
462
384
|
}
|
|
463
385
|
}
|
|
464
386
|
|
|
465
|
-
|
|
466
|
-
|
|
387
|
+
// the other case need host buffer.
|
|
388
|
+
for (int i = 0; i < GGML_MAX_SRC; i++) {
|
|
389
|
+
if (op->src[i] && op->src[i]->buffer && !ggml_backend_buft_is_host(op->src[i]->buffer->buft)) {
|
|
467
390
|
return false;
|
|
468
391
|
}
|
|
469
392
|
}
|
|
@@ -471,8 +394,11 @@ static bool ggml_backend_cpu_device_supports_op(ggml_backend_dev_t dev, const st
|
|
|
471
394
|
switch (op->op) {
|
|
472
395
|
case GGML_OP_CPY:
|
|
473
396
|
return
|
|
397
|
+
op->type != GGML_TYPE_IQ3_XXS &&
|
|
398
|
+
op->type != GGML_TYPE_IQ3_S &&
|
|
474
399
|
op->type != GGML_TYPE_IQ2_XXS &&
|
|
475
400
|
op->type != GGML_TYPE_IQ2_XS &&
|
|
401
|
+
op->type != GGML_TYPE_IQ2_S &&
|
|
476
402
|
op->type != GGML_TYPE_IQ1_S &&
|
|
477
403
|
op->type != GGML_TYPE_IQ1_M; // missing type_traits.from_float
|
|
478
404
|
case GGML_OP_MUL_MAT:
|
|
@@ -486,13 +412,10 @@ static bool ggml_backend_cpu_device_supports_op(ggml_backend_dev_t dev, const st
|
|
|
486
412
|
default:
|
|
487
413
|
return true;
|
|
488
414
|
}
|
|
489
|
-
|
|
490
|
-
GGML_UNUSED(dev);
|
|
491
415
|
}
|
|
492
416
|
|
|
493
417
|
static bool ggml_backend_cpu_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
|
|
494
|
-
return ggml_backend_buft_is_host(buft) ||
|
|
495
|
-
|
|
418
|
+
return ggml_backend_buft_is_host(buft) || ggml_backend_cpu_is_extra_buffer_type(buft);
|
|
496
419
|
GGML_UNUSED(dev);
|
|
497
420
|
}
|
|
498
421
|
|
|
@@ -541,16 +464,12 @@ static ggml_backend_dev_t ggml_backend_cpu_reg_get_device(ggml_backend_reg_t reg
|
|
|
541
464
|
return &ggml_backend_cpu_device;
|
|
542
465
|
}
|
|
543
466
|
|
|
544
|
-
struct ggml_backend_feature {
|
|
545
|
-
const char * name;
|
|
546
|
-
const char * value;
|
|
547
|
-
};
|
|
548
|
-
|
|
549
|
-
// Not used yet
|
|
550
467
|
// This is intended to replace the the ggml_cpu_has_* functions when loading the CPU backend dynamically,
|
|
551
|
-
// and additionally to allow other backends to expose their own list of features that applications can query using the same API
|
|
468
|
+
// and additionally to allow other backends to expose their own list of features that applications can query using the same API
|
|
552
469
|
static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t reg) {
|
|
553
470
|
static std::vector<ggml_backend_feature> features = []() {
|
|
471
|
+
ggml_cpu_init();
|
|
472
|
+
|
|
554
473
|
std::vector<ggml_backend_feature> features;
|
|
555
474
|
if (ggml_cpu_has_sse3()) {
|
|
556
475
|
features.push_back({ "SSE3", "1" });
|
|
@@ -561,6 +480,9 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r
|
|
|
561
480
|
if (ggml_cpu_has_avx()) {
|
|
562
481
|
features.push_back({ "AVX", "1" });
|
|
563
482
|
}
|
|
483
|
+
if (ggml_cpu_has_avx_vnni()) {
|
|
484
|
+
features.push_back({ "AVX_VNNI", "1" });
|
|
485
|
+
}
|
|
564
486
|
if (ggml_cpu_has_avx2()) {
|
|
565
487
|
features.push_back({ "AVX2", "1" });
|
|
566
488
|
}
|
|
@@ -570,9 +492,6 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r
|
|
|
570
492
|
if (ggml_cpu_has_fma()) {
|
|
571
493
|
features.push_back({ "FMA", "1" });
|
|
572
494
|
}
|
|
573
|
-
if (ggml_cpu_has_avx_vnni()) {
|
|
574
|
-
features.push_back({ "AVX_VNNI", "1" });
|
|
575
|
-
}
|
|
576
495
|
if (ggml_cpu_has_avx512()) {
|
|
577
496
|
features.push_back({ "AVX512", "1" });
|
|
578
497
|
}
|
|
@@ -603,6 +522,12 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r
|
|
|
603
522
|
if (ggml_cpu_has_sve()) {
|
|
604
523
|
features.push_back({ "SVE", "1" });
|
|
605
524
|
}
|
|
525
|
+
if (ggml_cpu_has_dotprod()) {
|
|
526
|
+
features.push_back({ "DOTPROD", "1" });
|
|
527
|
+
}
|
|
528
|
+
if (ggml_cpu_has_matmul_int8()) {
|
|
529
|
+
features.push_back({ "MATMUL_INT8", "1" });
|
|
530
|
+
}
|
|
606
531
|
if (ggml_cpu_get_sve_cnt() > 0) {
|
|
607
532
|
static std::string sve_cnt = std::to_string(ggml_cpu_get_sve_cnt());
|
|
608
533
|
features.push_back({ "SVE_CNT", sve_cnt.c_str() });
|
|
@@ -619,6 +544,18 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r
|
|
|
619
544
|
if (ggml_cpu_has_llamafile()) {
|
|
620
545
|
features.push_back({ "LLAMAFILE", "1" });
|
|
621
546
|
}
|
|
547
|
+
#ifdef GGML_USE_ACCELERATE
|
|
548
|
+
features.push_back({ "ACCELERATE", "1" });
|
|
549
|
+
#endif
|
|
550
|
+
#ifdef GGML_USE_CPU_HBM
|
|
551
|
+
features.push_back({ "CPU_HBM", "1" });
|
|
552
|
+
#endif
|
|
553
|
+
#ifdef GGML_USE_OPENMP
|
|
554
|
+
features.push_back({ "OPENMP", "1" });
|
|
555
|
+
#endif
|
|
556
|
+
#ifdef GGML_USE_CPU_AARCH64
|
|
557
|
+
features.push_back({ "AARCH64_REPACK", "1" });
|
|
558
|
+
#endif
|
|
622
559
|
|
|
623
560
|
features.push_back({ nullptr, nullptr });
|
|
624
561
|
|
|
@@ -632,10 +569,35 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r
|
|
|
632
569
|
|
|
633
570
|
static void * ggml_backend_cpu_get_proc_address(ggml_backend_reg_t reg, const char * name) {
|
|
634
571
|
if (strcmp(name, "ggml_backend_set_n_threads") == 0) {
|
|
635
|
-
|
|
572
|
+
ggml_backend_set_n_threads_t fct = ggml_backend_cpu_set_n_threads;
|
|
573
|
+
return (void *)fct;
|
|
636
574
|
}
|
|
637
575
|
if (strcmp(name, "ggml_backend_dev_get_extra_bufts") == 0) {
|
|
638
|
-
|
|
576
|
+
ggml_backend_dev_get_extra_bufts_t fct = ggml_backend_cpu_device_get_extra_buffers_type;
|
|
577
|
+
return (void *)fct;
|
|
578
|
+
}
|
|
579
|
+
if (strcmp(name, "ggml_backend_get_features") == 0) {
|
|
580
|
+
return (void *)ggml_backend_cpu_get_features;
|
|
581
|
+
}
|
|
582
|
+
if (strcmp(name, "ggml_backend_set_abort_callback") == 0) {
|
|
583
|
+
return (void *)ggml_backend_cpu_set_abort_callback;
|
|
584
|
+
}
|
|
585
|
+
if (strcmp(name, "ggml_backend_cpu_numa_init") == 0) {
|
|
586
|
+
return (void *)ggml_numa_init;
|
|
587
|
+
}
|
|
588
|
+
if (strcmp(name, "ggml_backend_cpu_is_numa") == 0) {
|
|
589
|
+
return (void *)ggml_is_numa;
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
// threadpool - TODO: move to ggml-base
|
|
593
|
+
if (strcmp(name, "ggml_threadpool_new") == 0) {
|
|
594
|
+
return (void *)ggml_threadpool_new;
|
|
595
|
+
}
|
|
596
|
+
if (strcmp(name, "ggml_threadpool_free") == 0) {
|
|
597
|
+
return (void *)ggml_threadpool_free;
|
|
598
|
+
}
|
|
599
|
+
if (strcmp(name, "ggml_backend_cpu_set_threadpool") == 0) {
|
|
600
|
+
return (void *)ggml_backend_cpu_set_threadpool;
|
|
639
601
|
}
|
|
640
602
|
|
|
641
603
|
return NULL;
|
|
@@ -655,9 +617,12 @@ ggml_backend_reg_t ggml_backend_cpu_reg(void) {
|
|
|
655
617
|
ggml_cpu_init();
|
|
656
618
|
|
|
657
619
|
static struct ggml_backend_reg ggml_backend_cpu_reg = {
|
|
658
|
-
/* .
|
|
659
|
-
/* .
|
|
620
|
+
/* .api_version = */ GGML_BACKEND_API_VERSION,
|
|
621
|
+
/* .iface = */ ggml_backend_cpu_reg_i,
|
|
622
|
+
/* .context = */ NULL,
|
|
660
623
|
};
|
|
661
624
|
|
|
662
625
|
return &ggml_backend_cpu_reg;
|
|
663
626
|
}
|
|
627
|
+
|
|
628
|
+
GGML_BACKEND_DL_IMPL(ggml_backend_cpu_reg)
|
|
@@ -50,8 +50,7 @@
|
|
|
50
50
|
|
|
51
51
|
#include "sgemm.h"
|
|
52
52
|
#include "ggml-impl.h"
|
|
53
|
-
|
|
54
|
-
#include "../ggml-cpu-impl.h"
|
|
53
|
+
#include "ggml-cpu-impl.h"
|
|
55
54
|
#include "ggml-quants.h"
|
|
56
55
|
|
|
57
56
|
#ifdef _MSC_VER
|
|
@@ -205,6 +204,7 @@ template <> inline float32x4_t load(const float *p) {
|
|
|
205
204
|
return vld1q_f32(p);
|
|
206
205
|
}
|
|
207
206
|
#if !defined(_MSC_VER)
|
|
207
|
+
// FIXME: this should check for __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
|
|
208
208
|
template <> inline float16x8_t load(const ggml_fp16_t *p) {
|
|
209
209
|
return vld1q_f16((const float16_t *)p);
|
|
210
210
|
}
|
|
@@ -12,7 +12,7 @@ if (CUDAToolkit_FOUND)
|
|
|
12
12
|
# 61 == Pascal, __dp4a instruction (per-byte integer dot product)
|
|
13
13
|
# 70 == V100, FP16 tensor cores
|
|
14
14
|
# 75 == Turing, int8 tensor cores
|
|
15
|
-
if (GGML_NATIVE AND CUDAToolkit_VERSION VERSION_GREATER_EQUAL "11.6")
|
|
15
|
+
if (GGML_NATIVE AND CUDAToolkit_VERSION VERSION_GREATER_EQUAL "11.6" AND CMAKE_VERSION VERSION_GREATER_EQUAL "3.24")
|
|
16
16
|
set(CMAKE_CUDA_ARCHITECTURES "native")
|
|
17
17
|
elseif(GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16)
|
|
18
18
|
set(CMAKE_CUDA_ARCHITECTURES "60;61;70;75")
|
|
@@ -46,13 +46,10 @@ if (CUDAToolkit_FOUND)
|
|
|
46
46
|
list(APPEND GGML_SOURCES_CUDA ${SRCS})
|
|
47
47
|
endif()
|
|
48
48
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
target_link_libraries(ggml-cuda PRIVATE ggml-base)
|
|
55
|
-
target_include_directories(ggml-cuda PRIVATE . ..)
|
|
49
|
+
ggml_add_backend_library(ggml-cuda
|
|
50
|
+
${GGML_HEADERS_CUDA}
|
|
51
|
+
${GGML_SOURCES_CUDA}
|
|
52
|
+
)
|
|
56
53
|
|
|
57
54
|
add_compile_definitions(GGML_CUDA_PEER_MAX_BATCH_SIZE=${GGML_CUDA_PEER_MAX_BATCH_SIZE})
|
|
58
55
|
|
|
@@ -135,7 +132,7 @@ if (CUDAToolkit_FOUND)
|
|
|
135
132
|
|
|
136
133
|
message("-- CUDA host compiler is ${CUDA_CCID} ${CUDA_CCVER}")
|
|
137
134
|
|
|
138
|
-
|
|
135
|
+
ggml_get_flags(${CUDA_CCID} ${CUDA_CCVER})
|
|
139
136
|
list(APPEND CUDA_CXX_FLAGS ${CXX_FLAGS} ${GF_CXX_FLAGS}) # This is passed to -Xcompiler later
|
|
140
137
|
endif()
|
|
141
138
|
|
|
@@ -149,7 +146,7 @@ if (CUDAToolkit_FOUND)
|
|
|
149
146
|
list(APPEND CUDA_FLAGS -Xcompiler ${CUDA_CXX_FLAGS_JOINED})
|
|
150
147
|
endif()
|
|
151
148
|
|
|
152
|
-
|
|
149
|
+
target_compile_options(ggml-cuda PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:${CUDA_FLAGS}>")
|
|
153
150
|
else()
|
|
154
151
|
message(FATAL_ERROR "CUDA Toolkit not found")
|
|
155
152
|
endif()
|
|
@@ -95,6 +95,14 @@
|
|
|
95
95
|
|
|
96
96
|
#define __CUDA_ARCH__ 1300
|
|
97
97
|
|
|
98
|
+
#if defined(__gfx803__) || defined(__gfx900__) || defined(__gfx906__)
|
|
99
|
+
#define GCN
|
|
100
|
+
#endif
|
|
101
|
+
|
|
102
|
+
#if defined(__gfx908__) || defined(__gfx90a__) || defined(__gfx942__)
|
|
103
|
+
#define CDNA
|
|
104
|
+
#endif
|
|
105
|
+
|
|
98
106
|
#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1103__) || \
|
|
99
107
|
defined(__gfx1150__) || defined(__gfx1151__)
|
|
100
108
|
#define RDNA3
|
|
@@ -64,12 +64,10 @@ else()
|
|
|
64
64
|
list(APPEND GGML_SOURCES_ROCM ${SRCS})
|
|
65
65
|
endif()
|
|
66
66
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
target_link_libraries(ggml-hip PRIVATE ggml-base)
|
|
72
|
-
target_include_directories(ggml-hip PRIVATE . ..)
|
|
67
|
+
ggml_add_backend_library(ggml-hip
|
|
68
|
+
${GGML_HEADERS_ROCM}
|
|
69
|
+
${GGML_SOURCES_ROCM}
|
|
70
|
+
)
|
|
73
71
|
|
|
74
72
|
# TODO: do not use CUDA definitions for HIP
|
|
75
73
|
target_compile_definitions(ggml PUBLIC GGML_USE_CUDA)
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
#include <arm_sve.h>
|
|
15
15
|
#endif // __ARM_FEATURE_SVE
|
|
16
16
|
|
|
17
|
-
#if defined(__ARM_NEON)
|
|
17
|
+
#if defined(__ARM_NEON) && !defined(__CUDACC__)
|
|
18
18
|
// if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example:
|
|
19
19
|
//
|
|
20
20
|
// $ ln -sfn /Library/Developer/CommandLineTools/usr/lib/clang/13.1.6/include/arm_neon.h ./src/
|
|
@@ -30,11 +30,13 @@
|
|
|
30
30
|
extern "C" {
|
|
31
31
|
#endif
|
|
32
32
|
|
|
33
|
-
#
|
|
34
|
-
#
|
|
33
|
+
#ifndef MIN
|
|
34
|
+
# define MIN(a, b) ((a) < (b) ? (a) : (b))
|
|
35
|
+
#endif
|
|
35
36
|
|
|
36
|
-
#
|
|
37
|
-
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
|
37
|
+
#ifndef MAX
|
|
38
|
+
# define MAX(a, b) ((a) > (b) ? (a) : (b))
|
|
39
|
+
#endif
|
|
38
40
|
|
|
39
41
|
// required for mmap as gguf only guarantees 32-byte alignment
|
|
40
42
|
#define TENSOR_ALIGNMENT 32
|
|
@@ -72,8 +74,8 @@ static inline int ggml_up(int n, int m) {
|
|
|
72
74
|
//
|
|
73
75
|
|
|
74
76
|
GGML_ATTRIBUTE_FORMAT(2, 3)
|
|
75
|
-
void ggml_log_internal (enum ggml_log_level level, const char * format, ...);
|
|
76
|
-
void ggml_log_callback_default(enum ggml_log_level level, const char * text, void * user_data);
|
|
77
|
+
GGML_API void ggml_log_internal (enum ggml_log_level level, const char * format, ...);
|
|
78
|
+
GGML_API void ggml_log_callback_default(enum ggml_log_level level, const char * text, void * user_data);
|
|
77
79
|
|
|
78
80
|
#define GGML_LOG(...) ggml_log_internal(GGML_LOG_LEVEL_NONE , __VA_ARGS__)
|
|
79
81
|
#define GGML_LOG_INFO(...) ggml_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__)
|
|
@@ -295,24 +297,27 @@ struct ggml_cgraph {
|
|
|
295
297
|
enum ggml_cgraph_eval_order order;
|
|
296
298
|
};
|
|
297
299
|
|
|
300
|
+
// returns a slice of cgraph with nodes [i0, i1)
|
|
301
|
+
// the slice does not have leafs or gradients
|
|
302
|
+
// if you need the gradients, get them from the original graph
|
|
298
303
|
struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph, int i0, int i1);
|
|
299
304
|
|
|
300
305
|
// Memory allocation
|
|
301
306
|
|
|
302
|
-
void * ggml_aligned_malloc(size_t size);
|
|
303
|
-
void ggml_aligned_free(void * ptr, size_t size);
|
|
307
|
+
GGML_API void * ggml_aligned_malloc(size_t size);
|
|
308
|
+
GGML_API void ggml_aligned_free(void * ptr, size_t size);
|
|
304
309
|
|
|
305
310
|
// FP16 to FP32 conversion
|
|
306
311
|
|
|
307
312
|
#if defined(__ARM_NEON)
|
|
308
|
-
#
|
|
313
|
+
#if defined(_MSC_VER) || (defined(__CUDACC__) && __CUDACC_VER_MAJOR__ <= 11)
|
|
309
314
|
typedef uint16_t ggml_fp16_internal_t;
|
|
310
315
|
#else
|
|
311
316
|
typedef __fp16 ggml_fp16_internal_t;
|
|
312
317
|
#endif
|
|
313
318
|
#endif
|
|
314
319
|
|
|
315
|
-
#if defined(__ARM_NEON) && !defined(_MSC_VER)
|
|
320
|
+
#if defined(__ARM_NEON) && !defined(_MSC_VER) && !(defined(__CUDACC__) && __CUDACC_VER_MAJOR__ <= 11)
|
|
316
321
|
#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
|
|
317
322
|
#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
|
|
318
323
|
|
|
@@ -546,6 +551,22 @@ static inline ggml_bf16_t ggml_compute_fp32_to_bf16(float s) {
|
|
|
546
551
|
#define GGML_FP32_TO_BF16(x) ggml_compute_fp32_to_bf16(x)
|
|
547
552
|
#define GGML_BF16_TO_FP32(x) ggml_compute_bf16_to_fp32(x)
|
|
548
553
|
|
|
554
|
+
// expose GGUF internals for test code
|
|
555
|
+
|
|
556
|
+
GGML_API size_t gguf_type_size(enum gguf_type type);
|
|
557
|
+
|
|
558
|
+
GGML_API struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params);
|
|
559
|
+
|
|
560
|
+
struct gguf_buf {
|
|
561
|
+
void * data;
|
|
562
|
+
size_t size;
|
|
563
|
+
size_t offset;
|
|
564
|
+
};
|
|
565
|
+
GGML_API struct gguf_buf gguf_buf_init(size_t size);
|
|
566
|
+
GGML_API void gguf_buf_free(struct gguf_buf buf);
|
|
567
|
+
|
|
568
|
+
GGML_API void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf * buf, bool only_meta);
|
|
569
|
+
|
|
549
570
|
#ifdef __cplusplus
|
|
550
571
|
}
|
|
551
572
|
#endif
|
|
@@ -6,13 +6,13 @@ if (NOT glslc_executable)
|
|
|
6
6
|
message(FATAL_ERROR "glslc not found")
|
|
7
7
|
endif()
|
|
8
8
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
9
|
+
ggml_add_backend_library(ggml-kompute
|
|
10
|
+
ggml-kompute.cpp
|
|
11
|
+
../../include/ggml-kompute.h
|
|
12
|
+
)
|
|
13
13
|
|
|
14
14
|
target_link_libraries(ggml-kompute PRIVATE ggml-base kompute)
|
|
15
|
-
target_include_directories(ggml-kompute PRIVATE
|
|
15
|
+
target_include_directories(ggml-kompute PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
|
|
16
16
|
|
|
17
17
|
add_compile_definitions(VULKAN_HPP_DISPATCH_LOADER_DYNAMIC=1)
|
|
18
18
|
|
|
@@ -105,8 +105,10 @@ if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/kompute/CMakeLists.txt")
|
|
|
105
105
|
kompute-shaders/op_getrows_q4_0.comp
|
|
106
106
|
kompute-shaders/op_getrows_q4_1.comp
|
|
107
107
|
kompute-shaders/op_getrows_q6_k.comp
|
|
108
|
-
kompute-shaders/
|
|
109
|
-
kompute-shaders/
|
|
108
|
+
kompute-shaders/op_rope_norm_f16.comp
|
|
109
|
+
kompute-shaders/op_rope_norm_f32.comp
|
|
110
|
+
kompute-shaders/op_rope_neox_f16.comp
|
|
111
|
+
kompute-shaders/op_rope_neox_f32.comp
|
|
110
112
|
kompute-shaders/op_cpy_f16_f16.comp
|
|
111
113
|
kompute-shaders/op_cpy_f16_f32.comp
|
|
112
114
|
kompute-shaders/op_cpy_f32_f16.comp
|
|
@@ -139,8 +141,10 @@ if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/kompute/CMakeLists.txt")
|
|
|
139
141
|
shaderop_getrows_q4_0.h
|
|
140
142
|
shaderop_getrows_q4_1.h
|
|
141
143
|
shaderop_getrows_q6_k.h
|
|
142
|
-
|
|
143
|
-
|
|
144
|
+
shaderop_rope_norm_f16.h
|
|
145
|
+
shaderop_rope_norm_f32.h
|
|
146
|
+
shaderop_rope_neox_f16.h
|
|
147
|
+
shaderop_rope_neox_f32.h
|
|
144
148
|
shaderop_cpy_f16_f16.h
|
|
145
149
|
shaderop_cpy_f16_f32.h
|
|
146
150
|
shaderop_cpy_f32_f16.h
|