@fugood/llama.node 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +1 -10
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/package.json +6 -4
- package/src/LlamaCompletionWorker.cpp +6 -6
- package/src/LlamaContext.cpp +7 -9
- package/src/common.hpp +2 -1
- package/src/llama.cpp/.github/workflows/build.yml +98 -24
- package/src/llama.cpp/.github/workflows/close-issue.yml +5 -0
- package/src/llama.cpp/.github/workflows/docker.yml +43 -34
- package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +7 -0
- package/src/llama.cpp/.github/workflows/nix-ci.yml +7 -0
- package/src/llama.cpp/.github/workflows/python-check-requirements.yml +2 -4
- package/src/llama.cpp/.github/workflows/python-type-check.yml +3 -1
- package/src/llama.cpp/.github/workflows/server.yml +7 -0
- package/src/llama.cpp/CMakeLists.txt +20 -8
- package/src/llama.cpp/common/CMakeLists.txt +12 -10
- package/src/llama.cpp/common/arg.cpp +2006 -0
- package/src/llama.cpp/common/arg.h +77 -0
- package/src/llama.cpp/common/common.cpp +496 -1632
- package/src/llama.cpp/common/common.h +161 -63
- package/src/llama.cpp/common/console.cpp +3 -0
- package/src/llama.cpp/common/log.cpp +401 -0
- package/src/llama.cpp/common/log.h +66 -698
- package/src/llama.cpp/common/ngram-cache.cpp +3 -0
- package/src/llama.cpp/common/sampling.cpp +348 -350
- package/src/llama.cpp/common/sampling.h +62 -139
- package/src/llama.cpp/common/stb_image.h +5990 -6398
- package/src/llama.cpp/common/train.cpp +2 -0
- package/src/llama.cpp/docs/build.md +36 -1
- package/src/llama.cpp/examples/CMakeLists.txt +0 -1
- package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +1 -2
- package/src/llama.cpp/examples/batched/batched.cpp +39 -55
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +34 -44
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +55 -52
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +15 -15
- package/src/llama.cpp/examples/cvector-generator/pca.hpp +3 -13
- package/src/llama.cpp/examples/embedding/embedding.cpp +143 -87
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +33 -33
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +36 -35
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +14 -39
- package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +83 -0
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +58 -39
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +34 -27
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +59 -62
- package/src/llama.cpp/examples/infill/infill.cpp +117 -132
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +265 -58
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +29 -22
- package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
- package/src/llama.cpp/examples/llava/clip.cpp +685 -150
- package/src/llama.cpp/examples/llava/clip.h +11 -2
- package/src/llama.cpp/examples/llava/llava-cli.cpp +47 -58
- package/src/llama.cpp/examples/llava/llava.cpp +110 -24
- package/src/llama.cpp/examples/llava/llava.h +2 -3
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +323 -0
- package/src/llama.cpp/examples/llava/requirements.txt +1 -0
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +42 -43
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +10 -8
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +23 -22
- package/src/llama.cpp/examples/lookup/lookup.cpp +40 -43
- package/src/llama.cpp/examples/main/main.cpp +210 -262
- package/src/llama.cpp/examples/parallel/parallel.cpp +49 -49
- package/src/llama.cpp/examples/passkey/passkey.cpp +42 -50
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +187 -200
- package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize/quantize.cpp +27 -9
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +2 -3
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +49 -44
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +24 -1
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +32 -35
- package/src/llama.cpp/examples/server/CMakeLists.txt +3 -5
- package/src/llama.cpp/examples/server/server.cpp +1027 -1073
- package/src/llama.cpp/examples/server/tests/requirements.txt +2 -1
- package/src/llama.cpp/examples/server/utils.hpp +107 -105
- package/src/llama.cpp/examples/simple/simple.cpp +35 -41
- package/src/llama.cpp/examples/speculative/speculative.cpp +129 -103
- package/src/llama.cpp/examples/sycl/run-llama2.sh +10 -19
- package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +25 -27
- package/src/llama.cpp/ggml/CMakeLists.txt +14 -3
- package/src/llama.cpp/ggml/include/ggml-alloc.h +3 -3
- package/src/llama.cpp/ggml/include/ggml-backend.h +145 -60
- package/src/llama.cpp/ggml/include/ggml-blas.h +3 -3
- package/src/llama.cpp/ggml/include/ggml-cann.h +15 -19
- package/src/llama.cpp/ggml/include/ggml-cuda.h +16 -16
- package/src/llama.cpp/ggml/include/ggml-metal.h +5 -8
- package/src/llama.cpp/ggml/include/ggml-rpc.h +5 -5
- package/src/llama.cpp/ggml/include/ggml-sycl.h +8 -8
- package/src/llama.cpp/ggml/include/ggml-vulkan.h +7 -7
- package/src/llama.cpp/ggml/include/ggml.h +293 -186
- package/src/llama.cpp/ggml/src/CMakeLists.txt +86 -44
- package/src/llama.cpp/ggml/src/ggml-aarch64.c +2135 -1119
- package/src/llama.cpp/ggml/src/ggml-alloc.c +6 -0
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +152 -70
- package/src/llama.cpp/ggml/src/{ggml-backend.c → ggml-backend.cpp} +606 -286
- package/src/llama.cpp/ggml/src/ggml-blas.cpp +9 -10
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +4 -27
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +32 -4
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +179 -41
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -1
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +2 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +278 -0
- package/src/llama.cpp/ggml/src/ggml-cann.cpp +215 -216
- package/src/llama.cpp/ggml/src/ggml-common.h +20 -0
- package/src/llama.cpp/ggml/src/ggml-cpu-impl.h +614 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +178 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +134 -0
- package/src/llama.cpp/ggml/src/ggml-impl.h +49 -603
- package/src/llama.cpp/ggml/src/ggml-kompute.cpp +4 -24
- package/src/llama.cpp/ggml/src/ggml-quants.c +972 -92
- package/src/llama.cpp/ggml/src/ggml-quants.h +15 -0
- package/src/llama.cpp/ggml/src/ggml-rpc.cpp +116 -66
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +3 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +52 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +99 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +21 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +57 -57
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +106 -106
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +16 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +101 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +125 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +23 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +6 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +2 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +71 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +21 -0
- package/src/llama.cpp/ggml/src/ggml-sycl.cpp +97 -169
- package/src/llama.cpp/ggml/src/ggml-vulkan.cpp +1508 -1124
- package/src/llama.cpp/ggml/src/ggml.c +3001 -1647
- package/src/llama.cpp/ggml/src/llamafile/sgemm.cpp +192 -0
- package/src/llama.cpp/ggml/src/vulkan-shaders/CMakeLists.txt +2 -0
- package/src/llama.cpp/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp +88 -40
- package/src/llama.cpp/include/llama.h +241 -264
- package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +46 -0
- package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +1 -1
- package/src/llama.cpp/src/llama-grammar.cpp +721 -122
- package/src/llama.cpp/src/llama-grammar.h +120 -15
- package/src/llama.cpp/src/llama-impl.h +156 -1
- package/src/llama.cpp/src/llama-sampling.cpp +1375 -303
- package/src/llama.cpp/src/llama-sampling.h +20 -47
- package/src/llama.cpp/src/llama-vocab.cpp +343 -120
- package/src/llama.cpp/src/llama-vocab.h +33 -17
- package/src/llama.cpp/src/llama.cpp +4247 -1525
- package/src/llama.cpp/src/unicode-data.cpp +6 -4
- package/src/llama.cpp/src/unicode-data.h +4 -4
- package/src/llama.cpp/src/unicode.cpp +15 -7
- package/src/llama.cpp/tests/CMakeLists.txt +3 -0
- package/src/llama.cpp/tests/test-arg-parser.cpp +131 -0
- package/src/llama.cpp/tests/test-backend-ops.cpp +1592 -289
- package/src/llama.cpp/tests/test-barrier.cpp +93 -0
- package/src/llama.cpp/tests/test-grad0.cpp +187 -70
- package/src/llama.cpp/tests/test-grammar-integration.cpp +23 -38
- package/src/llama.cpp/tests/test-grammar-parser.cpp +6 -4
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +6 -4
- package/src/llama.cpp/tests/test-llama-grammar.cpp +9 -8
- package/src/llama.cpp/tests/test-log.cpp +39 -0
- package/src/llama.cpp/tests/test-quantize-fns.cpp +6 -0
- package/src/llama.cpp/tests/test-rope.cpp +1 -1
- package/src/llama.cpp/tests/test-sampling.cpp +157 -98
- package/src/llama.cpp/tests/test-tokenizer-0.cpp +55 -35
- package/patches/llama.patch +0 -22
- package/src/llama.cpp/.github/workflows/bench.yml +0 -310
- package/src/llama.cpp/common/grammar-parser.cpp +0 -536
- package/src/llama.cpp/common/grammar-parser.h +0 -29
- package/src/llama.cpp/examples/benchmark/CMakeLists.txt +0 -6
- package/src/llama.cpp/examples/benchmark/benchmark-matmult.cpp +0 -275
|
@@ -56,6 +56,15 @@ else()
|
|
|
56
56
|
set(GGML_NATIVE_DEFAULT ON)
|
|
57
57
|
endif()
|
|
58
58
|
|
|
59
|
+
# defaults
|
|
60
|
+
if (NOT GGML_LLAMAFILE_DEFAULT)
|
|
61
|
+
set(GGML_LLAMAFILE_DEFAULT OFF)
|
|
62
|
+
endif()
|
|
63
|
+
|
|
64
|
+
if (NOT GGML_CUDA_GRAPHS_DEFAULT)
|
|
65
|
+
set(GGML_CUDA_GRAPHS_DEFAULT OFF)
|
|
66
|
+
endif()
|
|
67
|
+
|
|
59
68
|
# general
|
|
60
69
|
option(GGML_STATIC "ggml: static link libraries" OFF)
|
|
61
70
|
option(GGML_NATIVE "ggml: enable -march=native flag" ${GGML_NATIVE_DEFAULT})
|
|
@@ -110,7 +119,7 @@ option(GGML_ACCELERATE "ggml: enable Accelerate framework"
|
|
|
110
119
|
option(GGML_BLAS "ggml: use BLAS" ${GGML_BLAS_DEFAULT})
|
|
111
120
|
set(GGML_BLAS_VENDOR ${GGML_BLAS_VENDOR_DEFAULT} CACHE STRING
|
|
112
121
|
"ggml: BLAS library vendor")
|
|
113
|
-
option(GGML_LLAMAFILE "ggml: use LLAMAFILE"
|
|
122
|
+
option(GGML_LLAMAFILE "ggml: use LLAMAFILE" ${GGML_LLAMAFILE_DEFAULT})
|
|
114
123
|
|
|
115
124
|
option(GGML_CUDA "ggml: use CUDA" OFF)
|
|
116
125
|
option(GGML_MUSA "ggml: use MUSA" OFF)
|
|
@@ -127,15 +136,16 @@ set (GGML_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
|
|
|
127
136
|
option(GGML_CUDA_NO_PEER_COPY "ggml: do not use peer to peer copies" OFF)
|
|
128
137
|
option(GGML_CUDA_NO_VMM "ggml: do not try to use CUDA VMM" OFF)
|
|
129
138
|
option(GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashAttention" OFF)
|
|
130
|
-
option(
|
|
139
|
+
option(GGML_CUDA_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" ${GGML_CUDA_GRAPHS_DEFAULT})
|
|
131
140
|
|
|
132
|
-
option(GGML_CURL "ggml: use libcurl to download model from an URL" OFF)
|
|
133
141
|
option(GGML_HIPBLAS "ggml: use hipBLAS" OFF)
|
|
134
142
|
option(GGML_HIP_UMA "ggml: use HIP unified memory architecture" OFF)
|
|
135
143
|
option(GGML_VULKAN "ggml: use Vulkan" OFF)
|
|
136
144
|
option(GGML_VULKAN_CHECK_RESULTS "ggml: run Vulkan op checks" OFF)
|
|
137
145
|
option(GGML_VULKAN_DEBUG "ggml: enable Vulkan debug output" OFF)
|
|
138
146
|
option(GGML_VULKAN_MEMORY_DEBUG "ggml: enable Vulkan memory debug output" OFF)
|
|
147
|
+
option(GGML_VULKAN_SHADER_DEBUG_INFO "ggml: enable Vulkan shader debug info" OFF)
|
|
148
|
+
option(GGML_VULKAN_PERF "ggml: enable Vulkan perf output" OFF)
|
|
139
149
|
option(GGML_VULKAN_VALIDATE "ggml: enable Vulkan validation" OFF)
|
|
140
150
|
option(GGML_VULKAN_RUN_TESTS "ggml: run Vulkan tests" OFF)
|
|
141
151
|
option(GGML_KOMPUTE "ggml: use Kompute" OFF)
|
|
@@ -207,6 +217,7 @@ set(GGML_PUBLIC_HEADERS
|
|
|
207
217
|
include/ggml-alloc.h
|
|
208
218
|
include/ggml-backend.h
|
|
209
219
|
include/ggml-blas.h
|
|
220
|
+
include/ggml-cann.h
|
|
210
221
|
include/ggml-cuda.h
|
|
211
222
|
include/ggml.h
|
|
212
223
|
include/ggml-kompute.h
|
|
@@ -7,8 +7,8 @@ extern "C" {
|
|
|
7
7
|
#endif
|
|
8
8
|
|
|
9
9
|
typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t;
|
|
10
|
-
typedef struct
|
|
11
|
-
typedef struct
|
|
10
|
+
typedef struct ggml_backend_buffer * ggml_backend_buffer_t;
|
|
11
|
+
typedef struct ggml_backend * ggml_backend_t;
|
|
12
12
|
|
|
13
13
|
// Tensor allocator
|
|
14
14
|
struct ggml_tallocr {
|
|
@@ -24,7 +24,7 @@ GGML_API void ggml_tallocr_alloc(struct ggml_tallocr * talloc, st
|
|
|
24
24
|
// Graph allocator
|
|
25
25
|
/*
|
|
26
26
|
Example usage:
|
|
27
|
-
ggml_gallocr_t galloc = ggml_gallocr_new(
|
|
27
|
+
ggml_gallocr_t galloc = ggml_gallocr_new(ggml_backend_cpu_buffer_type());
|
|
28
28
|
|
|
29
29
|
// optional: create a worst-case graph and reserve the buffers to avoid reallocations
|
|
30
30
|
ggml_gallocr_reserve(galloc, build_graph(max_batch));
|
|
@@ -12,43 +12,52 @@ extern "C" {
|
|
|
12
12
|
typedef struct ggml_backend_event * ggml_backend_event_t;
|
|
13
13
|
typedef struct ggml_backend * ggml_backend_t;
|
|
14
14
|
typedef void * ggml_backend_graph_plan_t;
|
|
15
|
+
typedef struct ggml_backend_reg * ggml_backend_reg_t;
|
|
16
|
+
typedef struct ggml_backend_device * ggml_backend_dev_t;
|
|
17
|
+
|
|
15
18
|
|
|
16
19
|
//
|
|
17
|
-
// Backend buffer
|
|
20
|
+
// Backend buffer type
|
|
18
21
|
//
|
|
19
22
|
|
|
20
|
-
|
|
21
|
-
GGML_API
|
|
22
|
-
GGML_API
|
|
23
|
-
GGML_API
|
|
24
|
-
GGML_API
|
|
25
|
-
GGML_API
|
|
26
|
-
GGML_API
|
|
23
|
+
GGML_API const char * ggml_backend_buft_name (ggml_backend_buffer_type_t buft);
|
|
24
|
+
GGML_API ggml_backend_buffer_t ggml_backend_buft_alloc_buffer (ggml_backend_buffer_type_t buft, size_t size);
|
|
25
|
+
GGML_API size_t ggml_backend_buft_get_alignment (ggml_backend_buffer_type_t buft);
|
|
26
|
+
GGML_API size_t ggml_backend_buft_get_max_size (ggml_backend_buffer_type_t buft);
|
|
27
|
+
GGML_API size_t ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor);
|
|
28
|
+
GGML_API bool ggml_backend_buft_is_host (ggml_backend_buffer_type_t buft);
|
|
29
|
+
GGML_API ggml_backend_dev_t ggml_backend_buft_get_device (ggml_backend_buffer_type_t buft);
|
|
30
|
+
|
|
31
|
+
//
|
|
32
|
+
// Backend buffer
|
|
33
|
+
//
|
|
27
34
|
|
|
28
|
-
// buffer
|
|
29
35
|
enum ggml_backend_buffer_usage {
|
|
30
36
|
GGML_BACKEND_BUFFER_USAGE_ANY = 0,
|
|
31
37
|
GGML_BACKEND_BUFFER_USAGE_WEIGHTS = 1,
|
|
32
38
|
GGML_BACKEND_BUFFER_USAGE_COMPUTE = 2,
|
|
33
39
|
};
|
|
34
40
|
|
|
35
|
-
GGML_API
|
|
36
|
-
GGML_API
|
|
37
|
-
GGML_API
|
|
38
|
-
GGML_API
|
|
39
|
-
GGML_API
|
|
40
|
-
GGML_API
|
|
41
|
-
GGML_API
|
|
42
|
-
GGML_API
|
|
43
|
-
GGML_API
|
|
44
|
-
GGML_API
|
|
45
|
-
GGML_API
|
|
46
|
-
GGML_API
|
|
47
|
-
GGML_API
|
|
48
|
-
GGML_API
|
|
41
|
+
GGML_API const char * ggml_backend_buffer_name (ggml_backend_buffer_t buffer);
|
|
42
|
+
GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer);
|
|
43
|
+
GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer);
|
|
44
|
+
GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer);
|
|
45
|
+
GGML_API void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
|
46
|
+
GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
|
|
47
|
+
GGML_API size_t ggml_backend_buffer_get_max_size (ggml_backend_buffer_t buffer);
|
|
48
|
+
GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
|
49
|
+
GGML_API void ggml_backend_buffer_clear (ggml_backend_buffer_t buffer, uint8_t value);
|
|
50
|
+
GGML_API bool ggml_backend_buffer_is_host (ggml_backend_buffer_t buffer);
|
|
51
|
+
GGML_API void ggml_backend_buffer_set_usage (ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage);
|
|
52
|
+
GGML_API enum ggml_backend_buffer_usage ggml_backend_buffer_get_usage (ggml_backend_buffer_t buffer);
|
|
53
|
+
GGML_API ggml_backend_buffer_type_t ggml_backend_buffer_get_type (ggml_backend_buffer_t buffer);
|
|
54
|
+
GGML_API void ggml_backend_buffer_reset (ggml_backend_buffer_t buffer);
|
|
55
|
+
|
|
56
|
+
// tensor copy between different backends
|
|
57
|
+
GGML_API void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst);
|
|
49
58
|
|
|
50
59
|
//
|
|
51
|
-
// Backend
|
|
60
|
+
// Backend (stream)
|
|
52
61
|
//
|
|
53
62
|
|
|
54
63
|
GGML_API ggml_guid_t ggml_backend_guid(ggml_backend_t backend);
|
|
@@ -63,8 +72,10 @@ extern "C" {
|
|
|
63
72
|
GGML_API void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
|
64
73
|
GGML_API void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
|
65
74
|
|
|
66
|
-
|
|
67
|
-
GGML_API
|
|
75
|
+
// "offset" refers to the offset of the tensor data for setting/getting data
|
|
76
|
+
GGML_API void ggml_backend_tensor_set( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
|
77
|
+
GGML_API void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
|
78
|
+
GGML_API void ggml_backend_tensor_memset( struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size);
|
|
68
79
|
|
|
69
80
|
GGML_API void ggml_backend_synchronize(ggml_backend_t backend);
|
|
70
81
|
|
|
@@ -74,64 +85,118 @@ extern "C" {
|
|
|
74
85
|
GGML_API enum ggml_status ggml_backend_graph_plan_compute (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
|
|
75
86
|
GGML_API enum ggml_status ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
|
76
87
|
GGML_API enum ggml_status ggml_backend_graph_compute_async(ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
|
88
|
+
|
|
89
|
+
// NOTE: will be removed, use device version instead
|
|
77
90
|
GGML_API bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op);
|
|
78
91
|
GGML_API bool ggml_backend_supports_buft(ggml_backend_t backend, ggml_backend_buffer_type_t buft);
|
|
79
92
|
GGML_API bool ggml_backend_offload_op(ggml_backend_t backend, const struct ggml_tensor * op);
|
|
80
93
|
|
|
81
|
-
// tensor copy between different backends
|
|
82
|
-
GGML_API void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst);
|
|
83
|
-
|
|
84
94
|
// asynchronous copy
|
|
85
95
|
// the copy is performed after all the currently queued operations in backend_src
|
|
86
96
|
// backend_dst will wait for the copy to complete before performing other operations
|
|
87
97
|
// automatic fallback to sync copy if async is not supported
|
|
88
98
|
GGML_API void ggml_backend_tensor_copy_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, struct ggml_tensor * src, struct ggml_tensor * dst);
|
|
89
99
|
|
|
90
|
-
|
|
91
|
-
GGML_API ggml_backend_event_t ggml_backend_event_new (ggml_backend_t backend);
|
|
92
|
-
GGML_API void ggml_backend_event_free (ggml_backend_event_t event);
|
|
93
|
-
GGML_API void ggml_backend_event_record (ggml_backend_event_t event);
|
|
94
|
-
GGML_API void ggml_backend_event_synchronize(ggml_backend_event_t event);
|
|
95
|
-
GGML_API void ggml_backend_event_wait (ggml_backend_t backend, ggml_backend_event_t event);
|
|
100
|
+
GGML_API ggml_backend_dev_t ggml_backend_get_device(ggml_backend_t backend);
|
|
96
101
|
|
|
97
102
|
//
|
|
98
|
-
//
|
|
103
|
+
// Events
|
|
99
104
|
//
|
|
100
105
|
|
|
101
|
-
GGML_API
|
|
106
|
+
GGML_API ggml_backend_event_t ggml_backend_event_new(ggml_backend_dev_t device);
|
|
107
|
+
GGML_API void ggml_backend_event_free(ggml_backend_event_t event);
|
|
108
|
+
GGML_API void ggml_backend_event_record(ggml_backend_event_t event, ggml_backend_t backend);
|
|
109
|
+
GGML_API void ggml_backend_event_synchronize(ggml_backend_event_t event);
|
|
110
|
+
GGML_API void ggml_backend_event_wait(ggml_backend_t backend, ggml_backend_event_t event);
|
|
102
111
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
112
|
+
//
|
|
113
|
+
// Backend device
|
|
114
|
+
//
|
|
106
115
|
|
|
107
|
-
|
|
108
|
-
|
|
116
|
+
enum ggml_backend_dev_type {
|
|
117
|
+
GGML_BACKEND_DEVICE_TYPE_CPU,
|
|
118
|
+
GGML_BACKEND_DEVICE_TYPE_GPU,
|
|
119
|
+
// devices with full capabilities (excludes backends such as BLAS that only support matrix multiplication)
|
|
120
|
+
GGML_BACKEND_DEVICE_TYPE_CPU_FULL,
|
|
121
|
+
GGML_BACKEND_DEVICE_TYPE_GPU_FULL
|
|
122
|
+
};
|
|
109
123
|
|
|
110
|
-
|
|
124
|
+
// functionality supported by the device
|
|
125
|
+
struct ggml_backend_dev_caps {
|
|
126
|
+
// asynchronous operations
|
|
127
|
+
bool async;
|
|
128
|
+
// pinned host buffer
|
|
129
|
+
bool host_buffer;
|
|
130
|
+
// event synchronization
|
|
131
|
+
bool events;
|
|
132
|
+
};
|
|
111
133
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
134
|
+
// all the device properties
|
|
135
|
+
struct ggml_backend_dev_props {
|
|
136
|
+
const char * name;
|
|
137
|
+
const char * description;
|
|
138
|
+
size_t memory_free;
|
|
139
|
+
size_t memory_total;
|
|
140
|
+
enum ggml_backend_dev_type type;
|
|
141
|
+
struct ggml_backend_dev_caps caps;
|
|
142
|
+
};
|
|
143
|
+
|
|
144
|
+
GGML_API const char * ggml_backend_dev_name(ggml_backend_dev_t device);
|
|
145
|
+
GGML_API const char * ggml_backend_dev_description(ggml_backend_dev_t device);
|
|
146
|
+
GGML_API void ggml_backend_dev_memory(ggml_backend_dev_t device, size_t * free, size_t * total);
|
|
147
|
+
GGML_API enum ggml_backend_dev_type ggml_backend_dev_type(ggml_backend_dev_t device);
|
|
148
|
+
GGML_API void ggml_backend_dev_get_props(ggml_backend_dev_t device, struct ggml_backend_dev_props * props);
|
|
149
|
+
GGML_API ggml_backend_reg_t ggml_backend_dev_backend_reg(ggml_backend_dev_t device);
|
|
150
|
+
GGML_API ggml_backend_t ggml_backend_dev_init(ggml_backend_dev_t device, const char * params);
|
|
151
|
+
GGML_API ggml_backend_buffer_type_t ggml_backend_dev_buffer_type(ggml_backend_dev_t device);
|
|
152
|
+
GGML_API ggml_backend_buffer_type_t ggml_backend_dev_host_buffer_type(ggml_backend_dev_t device);
|
|
153
|
+
GGML_API ggml_backend_buffer_t ggml_backend_dev_buffer_from_host_ptr(ggml_backend_dev_t device, void * ptr, size_t size, size_t max_tensor_size);
|
|
154
|
+
|
|
155
|
+
GGML_API bool ggml_backend_dev_supports_op(ggml_backend_dev_t device, const struct ggml_tensor * op);
|
|
156
|
+
GGML_API bool ggml_backend_dev_supports_buft(ggml_backend_dev_t device, ggml_backend_buffer_type_t buft);
|
|
157
|
+
GGML_API bool ggml_backend_dev_offload_op(ggml_backend_dev_t device, const struct ggml_tensor * op);
|
|
115
158
|
|
|
116
159
|
//
|
|
117
|
-
// Backend
|
|
160
|
+
// Backend (reg)
|
|
118
161
|
//
|
|
119
162
|
|
|
120
|
-
|
|
163
|
+
GGML_API const char * ggml_backend_reg_name(ggml_backend_reg_t reg);
|
|
164
|
+
GGML_API size_t ggml_backend_reg_dev_count(ggml_backend_reg_t reg);
|
|
165
|
+
GGML_API ggml_backend_dev_t ggml_backend_reg_dev_get(ggml_backend_reg_t reg, size_t index);
|
|
166
|
+
GGML_API void * ggml_backend_reg_get_proc_address(ggml_backend_reg_t reg, const char * name);
|
|
121
167
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
168
|
+
|
|
169
|
+
// Functions that may be obtained using ggml_backend_reg_get_proc_address
|
|
170
|
+
typedef ggml_backend_buffer_type_t (*ggml_backend_split_buffer_type_t)(const float *);
|
|
171
|
+
|
|
172
|
+
//
|
|
173
|
+
// Backend registry
|
|
174
|
+
//
|
|
175
|
+
|
|
176
|
+
// Backend (reg) enumeration
|
|
177
|
+
GGML_API size_t ggml_backend_reg_count(void);
|
|
178
|
+
GGML_API ggml_backend_reg_t ggml_backend_reg_get(size_t index);
|
|
179
|
+
GGML_API ggml_backend_reg_t ggml_backend_reg_by_name(const char * name);
|
|
180
|
+
|
|
181
|
+
// Device enumeration
|
|
182
|
+
GGML_API size_t ggml_backend_dev_count(void);
|
|
183
|
+
GGML_API ggml_backend_dev_t ggml_backend_dev_get(size_t index);
|
|
184
|
+
GGML_API ggml_backend_dev_t ggml_backend_dev_by_name(const char * name);
|
|
185
|
+
GGML_API ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_dev_type type);
|
|
186
|
+
|
|
187
|
+
// Direct backend (stream) initialization
|
|
188
|
+
// = ggml_backend_dev_init(ggml_backend_dev_by_name(name), params)
|
|
189
|
+
GGML_API ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params);
|
|
190
|
+
// = ggml_backend_dev_init(ggml_backend_dev_by_type(type), params)
|
|
191
|
+
GGML_API ggml_backend_t ggml_backend_init_by_type(enum ggml_backend_dev_type type, const char * params);
|
|
192
|
+
// = ggml_backend_dev_init(ggml_backend_dev_by_type(GPU_FULL) OR ggml_backend_dev_by_type(CPU_FULL), NULL)
|
|
193
|
+
GGML_API ggml_backend_t ggml_backend_init_best(void);
|
|
129
194
|
|
|
130
195
|
//
|
|
131
196
|
// Backend scheduler
|
|
132
197
|
//
|
|
133
198
|
|
|
134
|
-
// The backend scheduler allows for multiple
|
|
199
|
+
// The backend scheduler allows for multiple backend devices to be used together
|
|
135
200
|
// Handles compute buffer allocation, assignment of tensors to backends, and copying of tensors between backends
|
|
136
201
|
// The backends are selected based on:
|
|
137
202
|
// - the backend that supports the operation
|
|
@@ -166,9 +231,9 @@ extern "C" {
|
|
|
166
231
|
}
|
|
167
232
|
*/
|
|
168
233
|
|
|
169
|
-
struct ggml_backend_sched;
|
|
170
234
|
typedef struct ggml_backend_sched * ggml_backend_sched_t;
|
|
171
235
|
|
|
236
|
+
// Evaluation callback for each node in the graph (set with ggml_backend_sched_set_eval_callback)
|
|
172
237
|
// when ask == true, the scheduler wants to know if the user wants to observe this node
|
|
173
238
|
// this allows the scheduler to batch nodes together in order to evaluate them in a single call
|
|
174
239
|
//
|
|
@@ -182,7 +247,7 @@ extern "C" {
|
|
|
182
247
|
GGML_API void ggml_backend_sched_free(ggml_backend_sched_t sched);
|
|
183
248
|
|
|
184
249
|
// Initialize backend buffers from a measure graph
|
|
185
|
-
GGML_API bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph);
|
|
250
|
+
GGML_API bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph); // returns success
|
|
186
251
|
|
|
187
252
|
GGML_API int ggml_backend_sched_get_n_backends(ggml_backend_sched_t sched);
|
|
188
253
|
GGML_API ggml_backend_t ggml_backend_sched_get_backend(ggml_backend_sched_t sched, int i);
|
|
@@ -197,7 +262,7 @@ extern "C" {
|
|
|
197
262
|
GGML_API ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node);
|
|
198
263
|
|
|
199
264
|
// Allocate and compute graph on the backend scheduler
|
|
200
|
-
GGML_API bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
|
|
265
|
+
GGML_API bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph); // returns success
|
|
201
266
|
GGML_API enum ggml_status ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
|
|
202
267
|
GGML_API enum ggml_status ggml_backend_sched_graph_compute_async(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
|
|
203
268
|
GGML_API void ggml_backend_sched_synchronize(ggml_backend_sched_t sched);
|
|
@@ -223,7 +288,7 @@ extern "C" {
|
|
|
223
288
|
GGML_API struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph);
|
|
224
289
|
GGML_API void ggml_backend_graph_copy_free(struct ggml_backend_graph_copy copy);
|
|
225
290
|
|
|
226
|
-
typedef bool (*
|
|
291
|
+
typedef bool (*ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data);
|
|
227
292
|
|
|
228
293
|
// Compare the output of two backends
|
|
229
294
|
GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);
|
|
@@ -232,6 +297,26 @@ extern "C" {
|
|
|
232
297
|
GGML_API void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
|
|
233
298
|
GGML_API void ggml_backend_view_init(struct ggml_tensor * tensor);
|
|
234
299
|
|
|
300
|
+
//
|
|
301
|
+
// CPU backend
|
|
302
|
+
//
|
|
303
|
+
|
|
304
|
+
GGML_API ggml_backend_t ggml_backend_cpu_init(void);
|
|
305
|
+
|
|
306
|
+
GGML_API bool ggml_backend_is_cpu (ggml_backend_t backend);
|
|
307
|
+
GGML_API void ggml_backend_cpu_set_n_threads (ggml_backend_t backend_cpu, int n_threads);
|
|
308
|
+
GGML_API void ggml_backend_cpu_set_threadpool (ggml_backend_t backend_cpu, ggml_threadpool_t threadpool);
|
|
309
|
+
GGML_API void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data);
|
|
310
|
+
|
|
311
|
+
// Create a backend buffer from an existing pointer
|
|
312
|
+
GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
|
|
313
|
+
GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void);
|
|
314
|
+
|
|
315
|
+
GGML_API ggml_backend_reg_t ggml_backend_cpu_reg(void);
|
|
316
|
+
|
|
317
|
+
#ifdef GGML_USE_CPU_HBM
|
|
318
|
+
GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void);
|
|
319
|
+
#endif
|
|
235
320
|
|
|
236
321
|
#ifdef __cplusplus
|
|
237
322
|
}
|
|
@@ -9,13 +9,13 @@ extern "C" {
|
|
|
9
9
|
#endif
|
|
10
10
|
|
|
11
11
|
// backend API
|
|
12
|
-
GGML_API
|
|
12
|
+
GGML_API ggml_backend_t ggml_backend_blas_init(void);
|
|
13
13
|
|
|
14
|
-
GGML_API
|
|
14
|
+
GGML_API bool ggml_backend_is_blas(ggml_backend_t backend);
|
|
15
15
|
|
|
16
16
|
// number of threads used for conversion to float
|
|
17
17
|
// for openblas and blis, this will also set the number of threads used for blas operations
|
|
18
|
-
GGML_API
|
|
18
|
+
GGML_API void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads);
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
#ifdef __cplusplus
|
|
@@ -44,7 +44,7 @@ extern "C" {
|
|
|
44
44
|
* @param device The index of the device to initialize.
|
|
45
45
|
* @return A pointer to the initialized backend instance, or nullptr on failure.
|
|
46
46
|
*/
|
|
47
|
-
GGML_API
|
|
47
|
+
GGML_API ggml_backend_t ggml_backend_cann_init(int32_t device);
|
|
48
48
|
|
|
49
49
|
/**
|
|
50
50
|
* @brief Checks if a given backend is a CANN backend.
|
|
@@ -55,7 +55,7 @@ GGML_API GGML_CALL ggml_backend_t ggml_backend_cann_init(int32_t device);
|
|
|
55
55
|
* @param backend The backend instance to check.
|
|
56
56
|
* @return True if the backend is a CANN backend, false otherwise.
|
|
57
57
|
*/
|
|
58
|
-
GGML_API
|
|
58
|
+
GGML_API bool ggml_backend_is_cann(ggml_backend_t backend);
|
|
59
59
|
|
|
60
60
|
/**
|
|
61
61
|
* @brief Retrieves the CANN buffer type for a specified device.
|
|
@@ -67,7 +67,7 @@ GGML_API GGML_CALL bool ggml_backend_is_cann(ggml_backend_t backend);
|
|
|
67
67
|
* @return A pointer to the buffer type interface for the specified device, or
|
|
68
68
|
* nullptr if the device index is out of range.
|
|
69
69
|
*/
|
|
70
|
-
GGML_API
|
|
70
|
+
GGML_API ggml_backend_buffer_type_t
|
|
71
71
|
ggml_backend_cann_buffer_type(int32_t device);
|
|
72
72
|
|
|
73
73
|
/**
|
|
@@ -78,7 +78,14 @@ ggml_backend_cann_buffer_type(int32_t device);
|
|
|
78
78
|
*
|
|
79
79
|
* @return The number of CANN devices available.
|
|
80
80
|
*/
|
|
81
|
-
GGML_API
|
|
81
|
+
GGML_API int32_t ggml_backend_cann_get_device_count(void);
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* @brief pinned host buffer for use with the CPU backend for faster copies between CPU and NPU.
|
|
85
|
+
*
|
|
86
|
+
* @return A pointer to the host buffer type interface.
|
|
87
|
+
*/
|
|
88
|
+
GGML_API ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type(void);
|
|
82
89
|
|
|
83
90
|
/**
|
|
84
91
|
* @brief Retrieves the description of a specific CANN device.
|
|
@@ -90,7 +97,7 @@ GGML_API GGML_CALL int32_t ggml_backend_cann_get_device_count(void);
|
|
|
90
97
|
* @param description Pointer to a buffer where the description will be written.
|
|
91
98
|
* @param description_size Size of the description buffer.
|
|
92
99
|
*/
|
|
93
|
-
GGML_API
|
|
100
|
+
GGML_API void ggml_backend_cann_get_device_description(
|
|
94
101
|
int32_t device, char* description, size_t description_size);
|
|
95
102
|
|
|
96
103
|
/**
|
|
@@ -105,20 +112,9 @@ GGML_API GGML_CALL void ggml_backend_cann_get_device_description(
|
|
|
105
112
|
* @param total Pointer to a variable where the total memory size will be
|
|
106
113
|
* stored.
|
|
107
114
|
*/
|
|
108
|
-
GGML_API
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
/**
|
|
113
|
-
* @brief Set the logging callback for GGML.
|
|
114
|
-
*
|
|
115
|
-
* This function sets the logging callback and user data for logging.
|
|
116
|
-
*
|
|
117
|
-
* @param log_callback The logging callback to set.
|
|
118
|
-
* @param user_data User data to pass to the logging callback.
|
|
119
|
-
*/
|
|
120
|
-
GGML_API void ggml_backend_cann_log_set_callback(ggml_log_callback log_callback,
|
|
121
|
-
void* user_data);
|
|
115
|
+
GGML_API void ggml_backend_cann_get_device_memory(int32_t device,
|
|
116
|
+
size_t* free,
|
|
117
|
+
size_t* total);
|
|
122
118
|
|
|
123
119
|
#ifdef __cplusplus
|
|
124
120
|
}
|
|
@@ -3,6 +3,10 @@
|
|
|
3
3
|
#include "ggml.h"
|
|
4
4
|
#include "ggml-backend.h"
|
|
5
5
|
|
|
6
|
+
#ifdef __cplusplus
|
|
7
|
+
extern "C" {
|
|
8
|
+
#endif
|
|
9
|
+
|
|
6
10
|
#ifdef GGML_USE_HIPBLAS
|
|
7
11
|
#define GGML_CUDA_NAME "ROCm"
|
|
8
12
|
#define GGML_CUBLAS_NAME "hipBLAS"
|
|
@@ -13,35 +17,31 @@
|
|
|
13
17
|
#define GGML_CUDA_NAME "CUDA"
|
|
14
18
|
#define GGML_CUBLAS_NAME "cuBLAS"
|
|
15
19
|
#endif
|
|
16
|
-
|
|
17
|
-
#ifdef __cplusplus
|
|
18
|
-
extern "C" {
|
|
19
|
-
#endif
|
|
20
|
-
|
|
21
20
|
#define GGML_CUDA_MAX_DEVICES 16
|
|
22
21
|
|
|
23
22
|
// backend API
|
|
24
|
-
GGML_API
|
|
23
|
+
GGML_API ggml_backend_t ggml_backend_cuda_init(int device);
|
|
25
24
|
|
|
26
|
-
GGML_API
|
|
25
|
+
GGML_API bool ggml_backend_is_cuda(ggml_backend_t backend);
|
|
27
26
|
|
|
28
27
|
// device buffer
|
|
29
|
-
GGML_API
|
|
28
|
+
GGML_API ggml_backend_buffer_type_t ggml_backend_cuda_buffer_type(int device);
|
|
30
29
|
|
|
31
30
|
// split tensor buffer that splits matrices by rows across multiple devices
|
|
32
|
-
GGML_API
|
|
31
|
+
GGML_API ggml_backend_buffer_type_t ggml_backend_cuda_split_buffer_type(const float * tensor_split);
|
|
33
32
|
|
|
34
33
|
// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
|
|
35
|
-
GGML_API
|
|
34
|
+
GGML_API ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type(void);
|
|
35
|
+
|
|
36
|
+
GGML_API int ggml_backend_cuda_get_device_count(void);
|
|
37
|
+
GGML_API void ggml_backend_cuda_get_device_description(int device, char * description, size_t description_size);
|
|
38
|
+
GGML_API void ggml_backend_cuda_get_device_memory(int device, size_t * free, size_t * total);
|
|
36
39
|
|
|
37
|
-
GGML_API
|
|
38
|
-
GGML_API
|
|
39
|
-
GGML_API GGML_CALL void ggml_backend_cuda_get_device_memory(int device, size_t * free, size_t * total);
|
|
40
|
+
GGML_API bool ggml_backend_cuda_register_host_buffer(void * buffer, size_t size);
|
|
41
|
+
GGML_API void ggml_backend_cuda_unregister_host_buffer(void * buffer);
|
|
40
42
|
|
|
41
|
-
GGML_API
|
|
42
|
-
GGML_API GGML_CALL void ggml_backend_cuda_unregister_host_buffer(void * buffer);
|
|
43
|
+
GGML_API ggml_backend_reg_t ggml_backend_cuda_reg(void);
|
|
43
44
|
|
|
44
|
-
GGML_API void ggml_backend_cuda_log_set_callback(ggml_log_callback log_callback, void * user_data);
|
|
45
45
|
#ifdef __cplusplus
|
|
46
46
|
}
|
|
47
47
|
#endif
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
// Note: this description is outdated
|
|
2
|
+
//
|
|
1
3
|
// An interface allowing to compute ggml_cgraph with Metal
|
|
2
4
|
//
|
|
3
5
|
// This is a fully functional interface that extends ggml with GPU support for Apple devices.
|
|
@@ -25,9 +27,6 @@
|
|
|
25
27
|
#include <stddef.h>
|
|
26
28
|
#include <stdbool.h>
|
|
27
29
|
|
|
28
|
-
// max memory buffers that can be mapped to the device
|
|
29
|
-
#define GGML_METAL_MAX_BUFFERS 64
|
|
30
|
-
|
|
31
30
|
struct ggml_tensor;
|
|
32
31
|
struct ggml_cgraph;
|
|
33
32
|
|
|
@@ -40,17 +39,15 @@ extern "C" {
|
|
|
40
39
|
// user-code should use only these functions
|
|
41
40
|
//
|
|
42
41
|
|
|
43
|
-
GGML_API void ggml_backend_metal_log_set_callback(ggml_log_callback log_callback, void * user_data);
|
|
44
|
-
|
|
45
42
|
GGML_API ggml_backend_t ggml_backend_metal_init(void);
|
|
46
43
|
|
|
47
44
|
GGML_API bool ggml_backend_is_metal(ggml_backend_t backend);
|
|
48
45
|
|
|
49
|
-
GGML_API
|
|
46
|
+
GGML_API ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t size, size_t max_size);
|
|
50
47
|
|
|
51
|
-
GGML_API void
|
|
48
|
+
GGML_API void ggml_backend_metal_set_abort_callback(ggml_backend_t backend, ggml_abort_callback abort_callback, void * user_data);
|
|
52
49
|
|
|
53
|
-
GGML_API
|
|
50
|
+
GGML_API ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void);
|
|
54
51
|
|
|
55
52
|
// helper to check if the device supports a specific family
|
|
56
53
|
// ideally, the user code should be doing these checks
|
|
@@ -10,14 +10,14 @@ extern "C" {
|
|
|
10
10
|
#define GGML_RPC_MAX_SERVERS 16
|
|
11
11
|
|
|
12
12
|
// backend API
|
|
13
|
-
GGML_API
|
|
14
|
-
GGML_API
|
|
13
|
+
GGML_API ggml_backend_t ggml_backend_rpc_init(const char * endpoint);
|
|
14
|
+
GGML_API bool ggml_backend_is_rpc(ggml_backend_t backend);
|
|
15
15
|
|
|
16
|
-
GGML_API
|
|
16
|
+
GGML_API ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const char * endpoint);
|
|
17
17
|
|
|
18
|
-
GGML_API
|
|
18
|
+
GGML_API void ggml_backend_rpc_get_device_memory(const char * endpoint, size_t * free, size_t * total);
|
|
19
19
|
|
|
20
|
-
GGML_API
|
|
20
|
+
GGML_API void start_rpc_server(ggml_backend_t backend, const char * endpoint, size_t free_mem, size_t total_mem);
|
|
21
21
|
|
|
22
22
|
#ifdef __cplusplus
|
|
23
23
|
}
|
|
@@ -23,20 +23,20 @@ GGML_API ggml_backend_t ggml_backend_sycl_init(int device);
|
|
|
23
23
|
GGML_API ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device);
|
|
24
24
|
|
|
25
25
|
// split tensor buffer that splits matrices by rows across multiple devices
|
|
26
|
-
GGML_API
|
|
26
|
+
GGML_API ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split);
|
|
27
27
|
|
|
28
28
|
// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
|
|
29
29
|
GGML_API ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type(void);
|
|
30
30
|
|
|
31
|
-
GGML_API void
|
|
32
|
-
GGML_API
|
|
33
|
-
GGML_API
|
|
34
|
-
GGML_API
|
|
35
|
-
GGML_API
|
|
31
|
+
GGML_API void ggml_backend_sycl_print_sycl_devices(void);
|
|
32
|
+
GGML_API void ggml_sycl_get_gpu_list(int *id_list, int max_len);
|
|
33
|
+
GGML_API void ggml_sycl_get_device_description(int device, char *description, size_t description_size);
|
|
34
|
+
GGML_API int ggml_backend_sycl_get_device_count();
|
|
35
|
+
GGML_API void ggml_backend_sycl_get_device_memory(int device, size_t *free, size_t *total);
|
|
36
36
|
|
|
37
37
|
// SYCL doesn't support registering host memory, keep here for reference
|
|
38
|
-
// GGML_API
|
|
39
|
-
// GGML_API
|
|
38
|
+
// GGML_API bool ggml_backend_sycl_register_host_buffer(void * buffer, size_t size);
|
|
39
|
+
// GGML_API void ggml_backend_sycl_unregister_host_buffer(void * buffer);
|
|
40
40
|
#ifdef __cplusplus
|
|
41
41
|
}
|
|
42
42
|
#endif
|
|
@@ -13,16 +13,16 @@ extern "C" {
|
|
|
13
13
|
GGML_API void ggml_vk_instance_init(void);
|
|
14
14
|
|
|
15
15
|
// backend API
|
|
16
|
-
GGML_API
|
|
16
|
+
GGML_API ggml_backend_t ggml_backend_vk_init(size_t dev_num);
|
|
17
17
|
|
|
18
|
-
GGML_API
|
|
19
|
-
GGML_API
|
|
20
|
-
GGML_API
|
|
21
|
-
GGML_API
|
|
18
|
+
GGML_API bool ggml_backend_is_vk(ggml_backend_t backend);
|
|
19
|
+
GGML_API int ggml_backend_vk_get_device_count(void);
|
|
20
|
+
GGML_API void ggml_backend_vk_get_device_description(int device, char * description, size_t description_size);
|
|
21
|
+
GGML_API void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total);
|
|
22
22
|
|
|
23
|
-
GGML_API
|
|
23
|
+
GGML_API ggml_backend_buffer_type_t ggml_backend_vk_buffer_type(size_t dev_num);
|
|
24
24
|
// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
|
|
25
|
-
GGML_API
|
|
25
|
+
GGML_API ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type(void);
|
|
26
26
|
|
|
27
27
|
#ifdef __cplusplus
|
|
28
28
|
}
|