@fugood/llama.node 0.3.1 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +1 -8
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/package.json +4 -2
- package/src/DetokenizeWorker.cpp +1 -1
- package/src/EmbeddingWorker.cpp +2 -2
- package/src/LlamaCompletionWorker.cpp +10 -10
- package/src/LlamaCompletionWorker.h +2 -2
- package/src/LlamaContext.cpp +14 -17
- package/src/TokenizeWorker.cpp +1 -1
- package/src/common.hpp +5 -4
- package/src/llama.cpp/.github/workflows/build.yml +137 -29
- package/src/llama.cpp/.github/workflows/close-issue.yml +5 -0
- package/src/llama.cpp/.github/workflows/docker.yml +46 -34
- package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +7 -0
- package/src/llama.cpp/.github/workflows/nix-ci.yml +7 -0
- package/src/llama.cpp/.github/workflows/python-check-requirements.yml +2 -4
- package/src/llama.cpp/.github/workflows/python-type-check.yml +3 -1
- package/src/llama.cpp/.github/workflows/server.yml +7 -0
- package/src/llama.cpp/CMakeLists.txt +26 -11
- package/src/llama.cpp/cmake/arm64-apple-clang.cmake +16 -0
- package/src/llama.cpp/common/CMakeLists.txt +10 -10
- package/src/llama.cpp/common/arg.cpp +2041 -0
- package/src/llama.cpp/common/arg.h +77 -0
- package/src/llama.cpp/common/common.cpp +523 -1861
- package/src/llama.cpp/common/common.h +234 -106
- package/src/llama.cpp/common/console.cpp +3 -0
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
- package/src/llama.cpp/common/log.cpp +401 -0
- package/src/llama.cpp/common/log.h +66 -698
- package/src/llama.cpp/common/ngram-cache.cpp +39 -36
- package/src/llama.cpp/common/ngram-cache.h +19 -19
- package/src/llama.cpp/common/sampling.cpp +356 -350
- package/src/llama.cpp/common/sampling.h +62 -139
- package/src/llama.cpp/common/stb_image.h +5990 -6398
- package/src/llama.cpp/docs/build.md +72 -17
- package/src/llama.cpp/examples/CMakeLists.txt +1 -2
- package/src/llama.cpp/examples/batched/batched.cpp +49 -65
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +42 -53
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +55 -52
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +22 -22
- package/src/llama.cpp/examples/cvector-generator/pca.hpp +3 -13
- package/src/llama.cpp/examples/embedding/embedding.cpp +147 -91
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +37 -37
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +39 -38
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +14 -39
- package/src/llama.cpp/examples/{baby-llama → gen-docs}/CMakeLists.txt +2 -2
- package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +83 -0
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +58 -39
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +46 -39
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +75 -69
- package/src/llama.cpp/examples/infill/infill.cpp +131 -192
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +276 -178
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +40 -36
- package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
- package/src/llama.cpp/examples/llava/clip.cpp +686 -150
- package/src/llama.cpp/examples/llava/clip.h +11 -2
- package/src/llama.cpp/examples/llava/llava-cli.cpp +60 -71
- package/src/llama.cpp/examples/llava/llava.cpp +146 -26
- package/src/llama.cpp/examples/llava/llava.h +2 -3
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +323 -0
- package/src/llama.cpp/examples/llava/requirements.txt +1 -0
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +55 -56
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +15 -13
- package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +34 -33
- package/src/llama.cpp/examples/lookup/lookup.cpp +60 -63
- package/src/llama.cpp/examples/main/main.cpp +216 -313
- package/src/llama.cpp/examples/parallel/parallel.cpp +58 -59
- package/src/llama.cpp/examples/passkey/passkey.cpp +53 -61
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +277 -311
- package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize/quantize.cpp +27 -9
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +12 -12
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +57 -52
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +27 -2
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +60 -46
- package/src/llama.cpp/examples/server/CMakeLists.txt +7 -18
- package/src/llama.cpp/examples/server/server.cpp +1347 -1531
- package/src/llama.cpp/examples/server/tests/requirements.txt +2 -1
- package/src/llama.cpp/examples/server/utils.hpp +396 -107
- package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/simple/simple.cpp +132 -106
- package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +197 -0
- package/src/llama.cpp/examples/speculative/speculative.cpp +153 -124
- package/src/llama.cpp/examples/sycl/run-llama2.sh +10 -19
- package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +27 -29
- package/src/llama.cpp/ggml/CMakeLists.txt +29 -12
- package/src/llama.cpp/ggml/include/ggml-alloc.h +3 -3
- package/src/llama.cpp/ggml/include/ggml-amx.h +25 -0
- package/src/llama.cpp/ggml/include/ggml-backend.h +166 -68
- package/src/llama.cpp/ggml/include/ggml-blas.h +5 -3
- package/src/llama.cpp/ggml/include/ggml-cann.h +17 -19
- package/src/llama.cpp/ggml/include/ggml-cpp.h +38 -0
- package/src/llama.cpp/ggml/include/ggml-cpu.h +177 -0
- package/src/llama.cpp/ggml/include/ggml-cuda.h +17 -17
- package/src/llama.cpp/ggml/include/ggml-kompute.h +7 -3
- package/src/llama.cpp/ggml/include/ggml-metal.h +13 -12
- package/src/llama.cpp/ggml/include/ggml-opt.h +216 -0
- package/src/llama.cpp/ggml/include/ggml-rpc.h +9 -5
- package/src/llama.cpp/ggml/include/ggml-sycl.h +18 -11
- package/src/llama.cpp/ggml/include/ggml-vulkan.h +10 -8
- package/src/llama.cpp/ggml/include/ggml.h +272 -505
- package/src/llama.cpp/ggml/src/CMakeLists.txt +69 -1110
- package/src/llama.cpp/ggml/src/ggml-aarch64.c +52 -2116
- package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -20
- package/src/llama.cpp/ggml/src/ggml-alloc.c +29 -27
- package/src/llama.cpp/ggml/src/ggml-amx/CMakeLists.txt +107 -0
- package/src/llama.cpp/ggml/src/ggml-amx/common.h +94 -0
- package/src/llama.cpp/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
- package/src/llama.cpp/ggml/src/ggml-amx/mmq.cpp +2510 -0
- package/src/llama.cpp/ggml/src/ggml-amx/mmq.h +17 -0
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +144 -81
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +195 -0
- package/src/llama.cpp/ggml/src/{ggml-backend.c → ggml-backend.cpp} +394 -635
- package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +91 -0
- package/src/llama.cpp/ggml/src/{ggml-blas.cpp → ggml-blas/ggml-blas.cpp} +217 -70
- package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +46 -0
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +4 -27
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +32 -4
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +179 -41
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +1 -0
- package/src/llama.cpp/ggml/src/{ggml-cann.cpp → ggml-cann/ggml-cann.cpp} +458 -353
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -1
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +2 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +278 -0
- package/src/llama.cpp/ggml/src/ggml-common.h +20 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +261 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.c +3560 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +30 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +371 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +10822 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +13970 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +663 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1885 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +155 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +178 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +134 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +106 -0
- package/src/llama.cpp/ggml/src/ggml-impl.h +380 -584
- package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +162 -0
- package/src/llama.cpp/ggml/src/{ggml-kompute.cpp → ggml-kompute/ggml-kompute.cpp} +233 -87
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +108 -0
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +249 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +100 -0
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +867 -0
- package/src/llama.cpp/ggml/src/ggml-quants.c +369 -9994
- package/src/llama.cpp/ggml/src/ggml-quants.h +78 -110
- package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +11 -0
- package/src/llama.cpp/ggml/src/{ggml-rpc.cpp → ggml-rpc/ggml-rpc.cpp} +560 -335
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +81 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +6 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +51 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +310 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +99 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +21 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +57 -57
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +106 -106
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +18 -25
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +1011 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +76 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +101 -0
- package/src/llama.cpp/ggml/src/{ggml-sycl.cpp → ggml-sycl/ggml-sycl.cpp} +3350 -3980
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +125 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +23 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +70 -68
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +9 -6
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +56 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +8 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +71 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +21 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +138 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-threading.cpp +12 -0
- package/src/llama.cpp/ggml/src/ggml-threading.h +12 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +78 -0
- package/src/llama.cpp/ggml/src/{ggml-vulkan.cpp → ggml-vulkan/ggml-vulkan.cpp} +2034 -1718
- package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/CMakeLists.txt +2 -0
- package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/vulkan-shaders-gen.cpp +152 -185
- package/src/llama.cpp/ggml/src/ggml.c +2075 -16579
- package/src/llama.cpp/include/llama.h +296 -285
- package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +46 -0
- package/src/llama.cpp/pocs/vdot/q8dot.cpp +4 -3
- package/src/llama.cpp/pocs/vdot/vdot.cpp +8 -7
- package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +1 -1
- package/src/llama.cpp/src/CMakeLists.txt +2 -1
- package/src/llama.cpp/src/llama-grammar.cpp +721 -122
- package/src/llama.cpp/src/llama-grammar.h +120 -15
- package/src/llama.cpp/src/llama-impl.h +156 -1
- package/src/llama.cpp/src/llama-sampling.cpp +2058 -346
- package/src/llama.cpp/src/llama-sampling.h +39 -47
- package/src/llama.cpp/src/llama-vocab.cpp +390 -127
- package/src/llama.cpp/src/llama-vocab.h +60 -20
- package/src/llama.cpp/src/llama.cpp +6215 -3263
- package/src/llama.cpp/src/unicode-data.cpp +6 -4
- package/src/llama.cpp/src/unicode-data.h +4 -4
- package/src/llama.cpp/src/unicode.cpp +15 -7
- package/src/llama.cpp/tests/CMakeLists.txt +4 -2
- package/src/llama.cpp/tests/test-arg-parser.cpp +131 -0
- package/src/llama.cpp/tests/test-backend-ops.cpp +1725 -297
- package/src/llama.cpp/tests/test-barrier.cpp +94 -0
- package/src/llama.cpp/tests/test-chat-template.cpp +9 -5
- package/src/llama.cpp/tests/test-grammar-integration.cpp +23 -38
- package/src/llama.cpp/tests/test-grammar-parser.cpp +6 -4
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +23 -8
- package/src/llama.cpp/tests/test-llama-grammar.cpp +9 -8
- package/src/llama.cpp/tests/test-log.cpp +39 -0
- package/src/llama.cpp/tests/test-opt.cpp +853 -142
- package/src/llama.cpp/tests/test-quantize-fns.cpp +28 -19
- package/src/llama.cpp/tests/test-quantize-perf.cpp +16 -14
- package/src/llama.cpp/tests/test-rope.cpp +2 -1
- package/src/llama.cpp/tests/test-sampling.cpp +226 -142
- package/src/llama.cpp/tests/test-tokenizer-0.cpp +56 -36
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +5 -5
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +5 -5
- package/patches/llama.patch +0 -22
- package/src/llama.cpp/.github/workflows/bench.yml +0 -310
- package/src/llama.cpp/common/grammar-parser.cpp +0 -536
- package/src/llama.cpp/common/grammar-parser.h +0 -29
- package/src/llama.cpp/common/train.cpp +0 -1513
- package/src/llama.cpp/common/train.h +0 -233
- package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +0 -1640
- package/src/llama.cpp/examples/benchmark/CMakeLists.txt +0 -6
- package/src/llama.cpp/examples/benchmark/benchmark-matmult.cpp +0 -275
- package/src/llama.cpp/ggml/src/llamafile/sgemm.cpp +0 -1027
- package/src/llama.cpp/tests/test-grad0.cpp +0 -1566
- /package/src/llama.cpp/ggml/{cmake → src/ggml-cpu/cmake}/FindSIMD.cmake +0 -0
- /package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.h +0 -0
|
@@ -30,6 +30,7 @@
|
|
|
30
30
|
#include <cstring>
|
|
31
31
|
#include <mutex>
|
|
32
32
|
|
|
33
|
+
#include "ggml-impl.h"
|
|
33
34
|
#include "ggml-backend-impl.h"
|
|
34
35
|
#include "ggml-cann/aclnn_ops.h"
|
|
35
36
|
#include "ggml-cann/common.h"
|
|
@@ -38,68 +39,7 @@
|
|
|
38
39
|
|
|
39
40
|
#include "ggml-common.h"
|
|
40
41
|
|
|
41
|
-
|
|
42
|
-
* @brief Default logging callback for GGML.
|
|
43
|
-
*
|
|
44
|
-
* This function is the default logging callback that logs messages to stderr.
|
|
45
|
-
*
|
|
46
|
-
* @param level The log level.
|
|
47
|
-
* @param msg The log message.
|
|
48
|
-
* @param user_data User data passed to the callback.
|
|
49
|
-
*/
|
|
50
|
-
static void ggml_cann_default_log_callback(enum ggml_log_level level,
|
|
51
|
-
const char* msg, void* user_data) {
|
|
52
|
-
GGML_UNUSED(level);
|
|
53
|
-
GGML_UNUSED(user_data);
|
|
54
|
-
fprintf(stderr, "%s", msg);
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
ggml_log_callback ggml_cann_log_callback = ggml_cann_default_log_callback;
|
|
58
|
-
void* ggml_cann_log_user_data = NULL;
|
|
59
|
-
|
|
60
|
-
GGML_API void ggml_backend_cann_log_set_callback(ggml_log_callback log_callback,
|
|
61
|
-
void* user_data) {
|
|
62
|
-
ggml_cann_log_callback = log_callback;
|
|
63
|
-
ggml_cann_log_user_data = user_data;
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
#define GGML_CANN_LOG_INFO(...) ggml_cann_log(GGML_LOG_LEVEL_INFO, __VA_ARGS__)
|
|
67
|
-
#define GGML_CANN_LOG_WARN(...) ggml_cann_log(GGML_LOG_LEVEL_WARN, __VA_ARGS__)
|
|
68
|
-
#define GGML_CANN_LOG_ERROR(...) \
|
|
69
|
-
ggml_cann_log(GGML_LOG_LEVEL_ERROR, __VA_ARGS__)
|
|
70
|
-
|
|
71
|
-
GGML_ATTRIBUTE_FORMAT(2, 3)
|
|
72
|
-
|
|
73
|
-
/**
|
|
74
|
-
* @brief Log a message using the current logging callback.
|
|
75
|
-
*
|
|
76
|
-
* This function formats a log message and passes it to the current logging
|
|
77
|
-
* callback.
|
|
78
|
-
*
|
|
79
|
-
* @param level The log level.
|
|
80
|
-
* @param format The format string for the log message.
|
|
81
|
-
* @param ... The arguments for the format string.
|
|
82
|
-
*/
|
|
83
|
-
static void ggml_cann_log(enum ggml_log_level level, const char* format, ...) {
|
|
84
|
-
if (ggml_cann_log_callback != NULL) {
|
|
85
|
-
va_list args;
|
|
86
|
-
va_start(args, format);
|
|
87
|
-
char buffer[128];
|
|
88
|
-
int len = vsnprintf(buffer, 128, format, args);
|
|
89
|
-
if (len < 128) {
|
|
90
|
-
ggml_cann_log_callback(level, buffer, ggml_cann_log_user_data);
|
|
91
|
-
} else {
|
|
92
|
-
// vsnprintf adds a null terminator
|
|
93
|
-
std::vector<char> buffer2(len + 1);
|
|
94
|
-
va_end(args);
|
|
95
|
-
va_start(args, format);
|
|
96
|
-
vsnprintf(&buffer2[0], buffer2.size(), format, args);
|
|
97
|
-
ggml_cann_log_callback(level, buffer2.data(),
|
|
98
|
-
ggml_cann_log_user_data);
|
|
99
|
-
}
|
|
100
|
-
va_end(args);
|
|
101
|
-
}
|
|
102
|
-
}
|
|
42
|
+
#define GGML_CANN_NAME "CANN"
|
|
103
43
|
|
|
104
44
|
/**
|
|
105
45
|
* @brief Handles CANN errors by printing an error message and aborting.
|
|
@@ -115,10 +55,10 @@ static void ggml_cann_log(enum ggml_log_level level, const char* format, ...) {
|
|
|
115
55
|
int32_t id = -1;
|
|
116
56
|
aclrtGetDevice(&id);
|
|
117
57
|
|
|
118
|
-
|
|
119
|
-
|
|
58
|
+
GGML_LOG_ERROR("CANN error: %s\n", msg);
|
|
59
|
+
GGML_LOG_ERROR(" current device: %d, in function %s at %s:%d\n", id, func,
|
|
120
60
|
file, line);
|
|
121
|
-
|
|
61
|
+
GGML_LOG_ERROR(" %s\n", stmt);
|
|
122
62
|
// abort with GGML_ASSERT to get a stack trace
|
|
123
63
|
GGML_ABORT("CANN error");
|
|
124
64
|
}
|
|
@@ -164,7 +104,7 @@ static ggml_cann_device_info ggml_cann_init() {
|
|
|
164
104
|
aclError err = aclrtGetDeviceCount((uint32_t*)&info.device_count);
|
|
165
105
|
|
|
166
106
|
if (err != ACL_SUCCESS) {
|
|
167
|
-
|
|
107
|
+
GGML_LOG_ERROR("%s: failed to initialize CANN: %s\n",
|
|
168
108
|
__func__, aclGetRecentErrMsg());
|
|
169
109
|
return info;
|
|
170
110
|
}
|
|
@@ -314,7 +254,7 @@ struct ggml_cann_pool_leg : public ggml_cann_pool {
|
|
|
314
254
|
*actual_size = look_ahead_size;
|
|
315
255
|
pool_size += look_ahead_size;
|
|
316
256
|
#ifdef DEBUG_CANN_MALLOC
|
|
317
|
-
|
|
257
|
+
GGML_LOG_INFO(
|
|
318
258
|
"%s[%d]: %d buffers, max_size = %u MB, pool_size = %u MB, "
|
|
319
259
|
"requested %u MB\n",
|
|
320
260
|
__func__, device, nnz, (uint32_t)(max_size / 1024 / 1024),
|
|
@@ -469,7 +409,7 @@ struct ggml_cann_pool_vmm : public ggml_cann_pool {
|
|
|
469
409
|
// add to the pool
|
|
470
410
|
pool_size += reserve_size;
|
|
471
411
|
|
|
472
|
-
//
|
|
412
|
+
// GGML_LOG_INFO("cann pool[%d]: size increased to %llu MB (
|
|
473
413
|
// reserved %llu MB)\n",
|
|
474
414
|
// device, (unsigned long long) (pool_size/1024/1024),
|
|
475
415
|
// (unsigned long long) (reserve_size/1024/1024));
|
|
@@ -482,7 +422,7 @@ struct ggml_cann_pool_vmm : public ggml_cann_pool {
|
|
|
482
422
|
pool_used += size;
|
|
483
423
|
|
|
484
424
|
#ifdef DEBUG_CANN_MALLOC
|
|
485
|
-
|
|
425
|
+
GGML_LOG_INFO("cann pool[%d]: allocated %llu bytes at %llx\n", device,
|
|
486
426
|
(unsigned long long)size, (unsigned long long)ptr);
|
|
487
427
|
#endif
|
|
488
428
|
return ptr;
|
|
@@ -496,7 +436,7 @@ struct ggml_cann_pool_vmm : public ggml_cann_pool {
|
|
|
496
436
|
*/
|
|
497
437
|
void free(void* ptr, size_t size) override {
|
|
498
438
|
#ifdef DEBUG_CANN_MALLOC
|
|
499
|
-
|
|
439
|
+
GGML_LOG_INFO("cann pool[%d]: freed %llu bytes at %llx\n", device,
|
|
500
440
|
(unsigned long long)size, (unsigned long long)ptr);
|
|
501
441
|
#endif
|
|
502
442
|
|
|
@@ -549,23 +489,6 @@ struct ggml_backend_cann_buffer_context {
|
|
|
549
489
|
~ggml_backend_cann_buffer_context() { ACL_CHECK(aclrtFree(dev_ptr)); }
|
|
550
490
|
};
|
|
551
491
|
|
|
552
|
-
/**
|
|
553
|
-
* @brief Retrieve the name associated with a CANN buffer.
|
|
554
|
-
*
|
|
555
|
-
* This function returns the name of a CANN buffer, which is stored in the
|
|
556
|
-
* context of the buffer.
|
|
557
|
-
*
|
|
558
|
-
* @param buffer The CANN buffer whose name is to be retrieved.
|
|
559
|
-
* @return A pointer to a C-string containing the name of the buffer.
|
|
560
|
-
*/
|
|
561
|
-
|
|
562
|
-
GGML_CALL static const char* ggml_backend_cann_buffer_get_name(
|
|
563
|
-
ggml_backend_buffer_t buffer) {
|
|
564
|
-
return "CANN";
|
|
565
|
-
|
|
566
|
-
GGML_UNUSED(buffer);
|
|
567
|
-
}
|
|
568
|
-
|
|
569
492
|
/**
|
|
570
493
|
* @brief Check if a buffer is a CANN buffer.
|
|
571
494
|
*
|
|
@@ -575,9 +498,10 @@ GGML_CALL static const char* ggml_backend_cann_buffer_get_name(
|
|
|
575
498
|
* @param buffer The buffer to check.
|
|
576
499
|
* @return true if the buffer is a CANN buffer, false otherwise.
|
|
577
500
|
*/
|
|
578
|
-
|
|
501
|
+
static bool ggml_backend_buft_is_cann(ggml_backend_buffer_type_t buft);
|
|
502
|
+
static bool ggml_backend_buffer_is_cann(
|
|
579
503
|
ggml_backend_buffer_t buffer) {
|
|
580
|
-
return buffer->
|
|
504
|
+
return ggml_backend_buft_is_cann(buffer->buft);
|
|
581
505
|
}
|
|
582
506
|
|
|
583
507
|
/**
|
|
@@ -588,7 +512,7 @@ GGML_CALL static bool ggml_backend_buffer_is_cann(
|
|
|
588
512
|
*
|
|
589
513
|
* @param buffer The CANN buffer to free.
|
|
590
514
|
*/
|
|
591
|
-
|
|
515
|
+
static void ggml_backend_cann_buffer_free_buffer(
|
|
592
516
|
ggml_backend_buffer_t buffer) {
|
|
593
517
|
ggml_backend_cann_buffer_context* ctx =
|
|
594
518
|
(ggml_backend_cann_buffer_context*)buffer->context;
|
|
@@ -604,7 +528,7 @@ GGML_CALL static void ggml_backend_cann_buffer_free_buffer(
|
|
|
604
528
|
* @param buffer The CANN buffer whose base pointer is to be retrieved.
|
|
605
529
|
* @return A pointer to the base of the device memory allocated for the buffer.
|
|
606
530
|
*/
|
|
607
|
-
|
|
531
|
+
static void* ggml_backend_cann_buffer_get_base(
|
|
608
532
|
ggml_backend_buffer_t buffer) {
|
|
609
533
|
ggml_backend_cann_buffer_context* ctx =
|
|
610
534
|
(ggml_backend_cann_buffer_context*)buffer->context;
|
|
@@ -624,10 +548,9 @@ GGML_CALL static void* ggml_backend_cann_buffer_get_base(
|
|
|
624
548
|
* @param dst Pointer to the destination buffer where transformed data will be
|
|
625
549
|
* stored.
|
|
626
550
|
*/
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
GGML_ASSERT(tensor->op == GGML_OP_NONE);
|
|
551
|
+
static void ggml_backend_cann_transform_q4_0(ggml_tensor* tensor,
|
|
552
|
+
const void* src,
|
|
553
|
+
void* dst) {
|
|
631
554
|
|
|
632
555
|
int64_t n_elems = ggml_nelements(tensor);
|
|
633
556
|
int64_t groups = n_elems / QK4_0;
|
|
@@ -677,9 +600,8 @@ GGML_CALL static void ggml_backend_cann_transform_q4_0(ggml_tensor* tensor,
|
|
|
677
600
|
* @param dst Pointer to the destination buffer where the Q4.0 formatted data
|
|
678
601
|
* will be stored.
|
|
679
602
|
*/
|
|
680
|
-
|
|
603
|
+
static void ggml_backend_cann_transform_back_q4_0(
|
|
681
604
|
const ggml_tensor* tensor, void* src, void* dst) {
|
|
682
|
-
GGML_ASSERT(tensor->op == GGML_OP_NONE);
|
|
683
605
|
|
|
684
606
|
int64_t n_elems = ggml_nelements(tensor);
|
|
685
607
|
int64_t groups = n_elems / QK4_0;
|
|
@@ -727,9 +649,9 @@ GGML_CALL static void ggml_backend_cann_transform_back_q4_0(
|
|
|
727
649
|
* @param dst Pointer to the destination buffer where transformed data will be
|
|
728
650
|
* stored.
|
|
729
651
|
*/
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
652
|
+
static void ggml_backend_cann_transform_q8_0(ggml_tensor* tensor,
|
|
653
|
+
const void* src,
|
|
654
|
+
void* dst) {
|
|
733
655
|
int64_t n_elems = ggml_nelements(tensor);
|
|
734
656
|
int64_t groups = n_elems / QK8_0;
|
|
735
657
|
size_t quant_bytes = n_elems * sizeof(uint8_t);
|
|
@@ -761,7 +683,7 @@ GGML_CALL static void ggml_backend_cann_transform_q8_0(ggml_tensor* tensor,
|
|
|
761
683
|
* @param dst Pointer to the destination buffer where the Q8.0 formatted data
|
|
762
684
|
* will be stored.
|
|
763
685
|
*/
|
|
764
|
-
|
|
686
|
+
static void ggml_backend_cann_transform_back_q8_0(
|
|
765
687
|
const ggml_tensor* tensor, const void* src, void* dst) {
|
|
766
688
|
int64_t n_elems = ggml_nelements(tensor);
|
|
767
689
|
int64_t groups = n_elems / QK8_0;
|
|
@@ -793,8 +715,8 @@ GGML_CALL static void ggml_backend_cann_transform_back_q8_0(
|
|
|
793
715
|
* @param dst Pointer to the destination buffer where transformed data will be
|
|
794
716
|
* stored.
|
|
795
717
|
*/
|
|
796
|
-
|
|
797
|
-
|
|
718
|
+
static void ggml_backend_cann_transform(ggml_tensor* tensor,
|
|
719
|
+
const void* src, void* dst) {
|
|
798
720
|
switch (tensor->type) {
|
|
799
721
|
case GGML_TYPE_Q4_0:
|
|
800
722
|
ggml_backend_cann_transform_q4_0(tensor, src, dst);
|
|
@@ -819,7 +741,7 @@ GGML_CALL static void ggml_backend_cann_transform(ggml_tensor* tensor,
|
|
|
819
741
|
* @param dst Pointer to the destination buffer where transformed tensor data
|
|
820
742
|
* will be stored.
|
|
821
743
|
*/
|
|
822
|
-
|
|
744
|
+
static void ggml_backend_cann_transform_back(
|
|
823
745
|
const ggml_tensor* tensor, void* src, void* dst) {
|
|
824
746
|
switch (tensor->type) {
|
|
825
747
|
case GGML_TYPE_Q4_0:
|
|
@@ -842,7 +764,7 @@ GGML_CALL static void ggml_backend_cann_transform_back(
|
|
|
842
764
|
* @param type The tensor type to check.
|
|
843
765
|
* @return true if transformation is needed, false otherwise.
|
|
844
766
|
*/
|
|
845
|
-
|
|
767
|
+
static bool need_transform(ggml_type type) {
|
|
846
768
|
switch (type) {
|
|
847
769
|
case GGML_TYPE_Q4_0:
|
|
848
770
|
case GGML_TYPE_Q8_0:
|
|
@@ -861,7 +783,7 @@ GGML_CALL static bool need_transform(ggml_type type) {
|
|
|
861
783
|
* @param buffer The CANN buffer from which to initialize the tensor.
|
|
862
784
|
* @param tensor Pointer to the tensor to be initialized.
|
|
863
785
|
*/
|
|
864
|
-
|
|
786
|
+
static void ggml_backend_cann_buffer_init_tensor(
|
|
865
787
|
ggml_backend_buffer_t buffer, ggml_tensor* tensor) {
|
|
866
788
|
if (tensor->view_src != NULL && tensor->view_offs == 0) {
|
|
867
789
|
GGML_ASSERT(tensor->view_src->buffer->buft == buffer->buft);
|
|
@@ -897,12 +819,11 @@ GGML_CALL static void ggml_backend_cann_buffer_init_tensor(
|
|
|
897
819
|
* @param offset Offset in the source data from where to start copying.
|
|
898
820
|
* @param size Size of the data to be copied, in bytes.
|
|
899
821
|
*/
|
|
900
|
-
|
|
901
|
-
ggml_backend_buffer_t buffer, ggml_tensor*
|
|
822
|
+
static void ggml_backend_cann_buffer_set_tensor(
|
|
823
|
+
ggml_backend_buffer_t buffer, ggml_tensor *tensor, const void *data,
|
|
902
824
|
size_t offset, size_t size) {
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
(ggml_backend_cann_buffer_context*)buffer->context;
|
|
825
|
+
ggml_backend_cann_buffer_context *ctx =
|
|
826
|
+
(ggml_backend_cann_buffer_context *)buffer->context;
|
|
906
827
|
|
|
907
828
|
ggml_cann_set_device(ctx->device);
|
|
908
829
|
// TODO: refer to cann(#6017), it use thread's default stream.
|
|
@@ -910,22 +831,14 @@ GGML_CALL static void ggml_backend_cann_buffer_set_tensor(
|
|
|
910
831
|
// Why aclrtSynchronizeDevice?
|
|
911
832
|
|
|
912
833
|
if (!need_transform(tensor->type)) {
|
|
913
|
-
ACL_CHECK(aclrtMemcpy(tensor->data, size,
|
|
914
|
-
|
|
834
|
+
ACL_CHECK(aclrtMemcpy((char *)tensor->data + offset, size, data, size,
|
|
835
|
+
ACL_MEMCPY_HOST_TO_DEVICE));
|
|
915
836
|
} else {
|
|
916
|
-
void*
|
|
917
|
-
ggml_backend_cann_transform(tensor,
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
void* check_buffer = malloc(size);
|
|
922
|
-
ggml_backend_cann_transform_back(tensor, transform_buffer,
|
|
923
|
-
check_buffer);
|
|
924
|
-
GGML_ASSERT(memcmp((const char*)data + offset, check_buffer, size) ==
|
|
925
|
-
0);
|
|
926
|
-
free(check_buffer);
|
|
927
|
-
#endif
|
|
928
|
-
ACL_CHECK(aclrtMemcpy(tensor->data, size, transform_buffer, size,
|
|
837
|
+
void *transform_buffer = malloc(size);
|
|
838
|
+
ggml_backend_cann_transform(tensor, data, transform_buffer);
|
|
839
|
+
|
|
840
|
+
ACL_CHECK(aclrtMemcpy((char *)tensor->data + offset, size,
|
|
841
|
+
transform_buffer, size,
|
|
929
842
|
ACL_MEMCPY_HOST_TO_DEVICE));
|
|
930
843
|
free(transform_buffer);
|
|
931
844
|
}
|
|
@@ -944,24 +857,23 @@ GGML_CALL static void ggml_backend_cann_buffer_set_tensor(
|
|
|
944
857
|
* @param offset Offset in the destination buffer where to start copying.
|
|
945
858
|
* @param size Size of the data to be copied, in bytes.
|
|
946
859
|
*/
|
|
947
|
-
|
|
860
|
+
static void ggml_backend_cann_buffer_get_tensor(
|
|
948
861
|
ggml_backend_buffer_t buffer, const ggml_tensor* tensor, void* data,
|
|
949
862
|
size_t offset, size_t size) {
|
|
950
|
-
GGML_ASSERT(size == ggml_nbytes(tensor));
|
|
951
863
|
ggml_backend_cann_buffer_context* ctx =
|
|
952
864
|
(ggml_backend_cann_buffer_context*)buffer->context;
|
|
953
865
|
|
|
954
866
|
ggml_cann_set_device(ctx->device);
|
|
955
867
|
|
|
956
868
|
if (!need_transform(tensor->type)) {
|
|
957
|
-
ACL_CHECK(aclrtMemcpy((char*)data + offset, size,
|
|
869
|
+
ACL_CHECK(aclrtMemcpy(data, size, (char*)tensor->data + offset, size,
|
|
958
870
|
ACL_MEMCPY_DEVICE_TO_HOST));
|
|
959
871
|
} else {
|
|
960
872
|
void* transform_buffer = malloc(size);
|
|
961
|
-
ACL_CHECK(aclrtMemcpy(transform_buffer, size,
|
|
873
|
+
ACL_CHECK(aclrtMemcpy(transform_buffer, size,
|
|
874
|
+
(char*)tensor->data + offset, size,
|
|
962
875
|
ACL_MEMCPY_DEVICE_TO_HOST));
|
|
963
|
-
ggml_backend_cann_transform_back(tensor, transform_buffer,
|
|
964
|
-
(char*)data + offset);
|
|
876
|
+
ggml_backend_cann_transform_back(tensor, transform_buffer, data);
|
|
965
877
|
free(transform_buffer);
|
|
966
878
|
}
|
|
967
879
|
}
|
|
@@ -979,7 +891,7 @@ GGML_CALL static void ggml_backend_cann_buffer_get_tensor(
|
|
|
979
891
|
* @param dst Pointer to the destination tensor where the data will be copied.
|
|
980
892
|
* @return true if the copy operation succeeded, false otherwise.
|
|
981
893
|
*/
|
|
982
|
-
|
|
894
|
+
static bool ggml_backend_cann_buffer_cpy_tensor(
|
|
983
895
|
ggml_backend_buffer_t buffer, const ggml_tensor* src, ggml_tensor* dst) {
|
|
984
896
|
if (ggml_backend_buffer_is_cann(src->buffer)) {
|
|
985
897
|
ggml_backend_cann_buffer_context* src_ctx =
|
|
@@ -1021,7 +933,7 @@ GGML_CALL static bool ggml_backend_cann_buffer_cpy_tensor(
|
|
|
1021
933
|
* @param buffer The CANN buffer to be cleared.
|
|
1022
934
|
* @param value The value to which each byte in the buffer will be set.
|
|
1023
935
|
*/
|
|
1024
|
-
|
|
936
|
+
static void ggml_backend_cann_buffer_clear(
|
|
1025
937
|
ggml_backend_buffer_t buffer, uint8_t value) {
|
|
1026
938
|
ggml_backend_cann_buffer_context* ctx =
|
|
1027
939
|
(ggml_backend_cann_buffer_context*)buffer->context;
|
|
@@ -1036,11 +948,11 @@ GGML_CALL static void ggml_backend_cann_buffer_clear(
|
|
|
1036
948
|
* This structure defines function pointers to operations that can be performed
|
|
1037
949
|
* on a CANN buffer within the backend.
|
|
1038
950
|
*/
|
|
1039
|
-
static ggml_backend_buffer_i ggml_backend_cann_buffer_interface = {
|
|
1040
|
-
/* .get_name = */ ggml_backend_cann_buffer_get_name,
|
|
951
|
+
static const ggml_backend_buffer_i ggml_backend_cann_buffer_interface = {
|
|
1041
952
|
/* .free_buffer = */ ggml_backend_cann_buffer_free_buffer,
|
|
1042
953
|
/* .get_base = */ ggml_backend_cann_buffer_get_base,
|
|
1043
954
|
/* .init_tensor = */ ggml_backend_cann_buffer_init_tensor,
|
|
955
|
+
/* .memset_tensor = */ NULL,
|
|
1044
956
|
/* .set_tensor = */ ggml_backend_cann_buffer_set_tensor,
|
|
1045
957
|
/* .get_tensor = */ ggml_backend_cann_buffer_get_tensor,
|
|
1046
958
|
/* .cpy_tensor = */ ggml_backend_cann_buffer_cpy_tensor,
|
|
@@ -1068,11 +980,12 @@ struct ggml_backend_cann_buffer_type_context {
|
|
|
1068
980
|
* @param buft Pointer to the buffer type context.
|
|
1069
981
|
* @return Const pointer to the C-style string containing the name.
|
|
1070
982
|
*/
|
|
1071
|
-
|
|
983
|
+
static const char* ggml_backend_cann_buffer_type_name(
|
|
1072
984
|
ggml_backend_buffer_type_t buft) {
|
|
1073
|
-
|
|
985
|
+
ggml_backend_cann_buffer_type_context* buft_ctx =
|
|
986
|
+
(ggml_backend_cann_buffer_type_context*)buft->context;
|
|
1074
987
|
|
|
1075
|
-
|
|
988
|
+
return buft_ctx->name.c_str();
|
|
1076
989
|
}
|
|
1077
990
|
|
|
1078
991
|
/**
|
|
@@ -1085,7 +998,7 @@ GGML_CALL static const char* ggml_backend_cann_buffer_type_name(
|
|
|
1085
998
|
* @param size Size in bytes of the buffer to allocate.
|
|
1086
999
|
* @return Pointer to the allocated buffer, or nullptr if allocation fails.
|
|
1087
1000
|
*/
|
|
1088
|
-
|
|
1001
|
+
static ggml_backend_buffer_t
|
|
1089
1002
|
ggml_backend_cann_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft,
|
|
1090
1003
|
size_t size) {
|
|
1091
1004
|
ggml_backend_cann_buffer_type_context* buft_ctx =
|
|
@@ -1098,7 +1011,7 @@ ggml_backend_cann_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft,
|
|
|
1098
1011
|
void* dev_ptr;
|
|
1099
1012
|
aclError err = aclrtMalloc(&dev_ptr, size, ACL_MEM_MALLOC_HUGE_FIRST);
|
|
1100
1013
|
if (err != ACL_SUCCESS) {
|
|
1101
|
-
|
|
1014
|
+
GGML_LOG_ERROR(
|
|
1102
1015
|
"%s: allocating %.2f MiB on device %d: aclrtMalloc failed: %s\n",
|
|
1103
1016
|
__func__, size / 1024.0 / 1024.0, buft_ctx->device,
|
|
1104
1017
|
aclGetRecentErrMsg());
|
|
@@ -1124,7 +1037,7 @@ ggml_backend_cann_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft,
|
|
|
1124
1037
|
* @return The alignment requirement in bytes (fixed at 128 bytes for CANN
|
|
1125
1038
|
* buffers).
|
|
1126
1039
|
*/
|
|
1127
|
-
|
|
1040
|
+
static size_t ggml_backend_cann_buffer_type_get_alignment(
|
|
1128
1041
|
ggml_backend_buffer_type_t buft) {
|
|
1129
1042
|
return 128;
|
|
1130
1043
|
|
|
@@ -1145,7 +1058,7 @@ GGML_CALL static size_t ggml_backend_cann_buffer_type_get_alignment(
|
|
|
1145
1058
|
* @return The total allocation size in bytes required for the tensor in the
|
|
1146
1059
|
* CANN buffer.
|
|
1147
1060
|
*/
|
|
1148
|
-
|
|
1061
|
+
static size_t ggml_backend_cann_buffer_type_get_alloc_size(
|
|
1149
1062
|
ggml_backend_buffer_type_t buft, const ggml_tensor* tensor) {
|
|
1150
1063
|
size_t size = ggml_nbytes(tensor);
|
|
1151
1064
|
int64_t ne0 = tensor->ne[0];
|
|
@@ -1171,19 +1084,25 @@ GGML_CALL static size_t ggml_backend_cann_buffer_type_get_alloc_size(
|
|
|
1171
1084
|
GGML_UNUSED(buft);
|
|
1172
1085
|
}
|
|
1173
1086
|
|
|
1087
|
+
static bool ggml_backend_cann_buffer_type_is_host(ggml_backend_buffer_type_t buft) {
|
|
1088
|
+
return false;
|
|
1089
|
+
|
|
1090
|
+
GGML_UNUSED(buft);
|
|
1091
|
+
}
|
|
1092
|
+
|
|
1174
1093
|
/**
|
|
1175
1094
|
* @brief Interface for managing CANN buffer types in the GGML backend.
|
|
1176
1095
|
*
|
|
1177
1096
|
* Provides function pointers for allocating, querying properties, and managing
|
|
1178
1097
|
* memory for CANN buffer types in the GGML backend.
|
|
1179
1098
|
*/
|
|
1180
|
-
static ggml_backend_buffer_type_i ggml_backend_cann_buffer_type_interface = {
|
|
1099
|
+
static const ggml_backend_buffer_type_i ggml_backend_cann_buffer_type_interface = {
|
|
1181
1100
|
/* .get_name = */ ggml_backend_cann_buffer_type_name,
|
|
1182
1101
|
/* .alloc_buffer = */ ggml_backend_cann_buffer_type_alloc_buffer,
|
|
1183
1102
|
/* .get_alignment = */ ggml_backend_cann_buffer_type_get_alignment,
|
|
1184
1103
|
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
|
|
1185
1104
|
/* .get_alloc_size = */ ggml_backend_cann_buffer_type_get_alloc_size,
|
|
1186
|
-
/* .is_host = */
|
|
1105
|
+
/* .is_host = */ ggml_backend_cann_buffer_type_is_host,
|
|
1187
1106
|
};
|
|
1188
1107
|
|
|
1189
1108
|
/**
|
|
@@ -1196,7 +1115,7 @@ static ggml_backend_buffer_type_i ggml_backend_cann_buffer_type_interface = {
|
|
|
1196
1115
|
* @return A pointer to the buffer type interface for the specified device, or
|
|
1197
1116
|
* nullptr if the device index is out of range.
|
|
1198
1117
|
*/
|
|
1199
|
-
|
|
1118
|
+
ggml_backend_buffer_type_t
|
|
1200
1119
|
ggml_backend_cann_buffer_type(int32_t device) {
|
|
1201
1120
|
static std::mutex mutex;
|
|
1202
1121
|
std::lock_guard<std::mutex> lock(mutex);
|
|
@@ -1214,6 +1133,7 @@ ggml_backend_cann_buffer_type(int32_t device) {
|
|
|
1214
1133
|
for (int32_t i = 0; i < GGML_CANN_MAX_DEVICES; i++) {
|
|
1215
1134
|
ggml_backend_cann_buffer_types[i] = {
|
|
1216
1135
|
/* .iface = */ ggml_backend_cann_buffer_type_interface,
|
|
1136
|
+
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_cann_reg(), device),
|
|
1217
1137
|
/* .context = */
|
|
1218
1138
|
new ggml_backend_cann_buffer_type_context{
|
|
1219
1139
|
i, "CANN" + std::to_string(i)},
|
|
@@ -1225,6 +1145,116 @@ ggml_backend_cann_buffer_type(int32_t device) {
|
|
|
1225
1145
|
return &ggml_backend_cann_buffer_types[device];
|
|
1226
1146
|
}
|
|
1227
1147
|
|
|
1148
|
+
/**
|
|
1149
|
+
* @brief Retrieves the name associated with a CANN host buffer type.
|
|
1150
|
+
*
|
|
1151
|
+
* This function returns the descriptive name associated with the specified
|
|
1152
|
+
* CANN host buffer type context.
|
|
1153
|
+
*
|
|
1154
|
+
* @param buft Pointer to the host buffer type context.
|
|
1155
|
+
* @return Const pointer to the C-style string containing the name.
|
|
1156
|
+
*/
|
|
1157
|
+
static const char * ggml_backend_cann_host_buffer_type_name(ggml_backend_buffer_type_t buft) {
|
|
1158
|
+
return "CANN_Host";
|
|
1159
|
+
|
|
1160
|
+
GGML_UNUSED(buft);
|
|
1161
|
+
}
|
|
1162
|
+
|
|
1163
|
+
/**
|
|
1164
|
+
* @brief Retrieves the name associated with a CANN host buffer.
|
|
1165
|
+
*
|
|
1166
|
+
* This function returns the descriptive name associated with the specified
|
|
1167
|
+
* CANN host buffer context.
|
|
1168
|
+
*
|
|
1169
|
+
* @param buft Pointer to the host buffer context.
|
|
1170
|
+
* @return Const pointer to the C-style string containing the name.
|
|
1171
|
+
*/
|
|
1172
|
+
static const char * ggml_backend_cann_host_buffer_name(ggml_backend_buffer_t buffer) {
|
|
1173
|
+
return "CANN_Host";
|
|
1174
|
+
|
|
1175
|
+
GGML_UNUSED(buffer);
|
|
1176
|
+
}
|
|
1177
|
+
|
|
1178
|
+
/**
|
|
1179
|
+
* @brief Free resources associated with a CANN host buffer.
|
|
1180
|
+
*
|
|
1181
|
+
* This function frees the resources associated with a CANN host buffer, including
|
|
1182
|
+
* its context.
|
|
1183
|
+
*
|
|
1184
|
+
* @param buffer The CANN host buffer to free.
|
|
1185
|
+
*/
|
|
1186
|
+
static void ggml_backend_cann_host_buffer_free(ggml_backend_buffer_t buffer) {
|
|
1187
|
+
ACL_CHECK(aclrtFreeHost(buffer->context));
|
|
1188
|
+
}
|
|
1189
|
+
|
|
1190
|
+
/**
|
|
1191
|
+
* @brief Allocates a new CANN host buffer of the specified size.
|
|
1192
|
+
*
|
|
1193
|
+
* This function allocates a new CANN host buffer with the given size.
|
|
1194
|
+
* @param size Size in bytes of the host buffer to allocate.
|
|
1195
|
+
* @return Pointer to the allocated host buffer, or nullptr if allocation fails.
|
|
1196
|
+
*/
|
|
1197
|
+
static void * ggml_cann_host_malloc(size_t size) {
|
|
1198
|
+
if (getenv("GGML_CANN_NO_PINNED") != nullptr) {
|
|
1199
|
+
return nullptr;
|
|
1200
|
+
}
|
|
1201
|
+
|
|
1202
|
+
void * hostPtr = nullptr;
|
|
1203
|
+
aclError err = aclrtMallocHost((void **) &hostPtr, size);
|
|
1204
|
+
if (err != ACL_SUCCESS) {
|
|
1205
|
+
|
|
1206
|
+
GGML_LOG_WARN("%s: failed to allocate %.2f MiB of pinned memory: %s\n", __func__,
|
|
1207
|
+
size / 1024.0 / 1024.0, aclGetRecentErrMsg());
|
|
1208
|
+
return nullptr;
|
|
1209
|
+
}
|
|
1210
|
+
return hostPtr;
|
|
1211
|
+
}
|
|
1212
|
+
|
|
1213
|
+
/**
|
|
1214
|
+
* @brief Allocates a new CANN host buffer of the specified type and size.
|
|
1215
|
+
*
|
|
1216
|
+
* @param buft Pointer to the host buffer type context.
|
|
1217
|
+
* @param size Size in bytes of the host buffer to allocate.
|
|
1218
|
+
* @return Pointer to the allocated host buffer, or CPU buffer pointer if allocation fails.
|
|
1219
|
+
*/
|
|
1220
|
+
static ggml_backend_buffer_t ggml_backend_cann_host_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
|
|
1221
|
+
void * hostPtr = ggml_cann_host_malloc(size);
|
|
1222
|
+
|
|
1223
|
+
if (hostPtr == nullptr) {
|
|
1224
|
+
// fallback to cpu buffer
|
|
1225
|
+
return ggml_backend_buft_alloc_buffer(ggml_backend_cpu_buffer_type(), size);
|
|
1226
|
+
}
|
|
1227
|
+
|
|
1228
|
+
ggml_backend_buffer_t buffer = ggml_backend_cpu_buffer_from_ptr(hostPtr, size);
|
|
1229
|
+
buffer->buft = buft;
|
|
1230
|
+
buffer->iface.free_buffer = ggml_backend_cann_host_buffer_free;
|
|
1231
|
+
|
|
1232
|
+
return buffer;
|
|
1233
|
+
}
|
|
1234
|
+
|
|
1235
|
+
/**
|
|
1236
|
+
* @brief Interface for managing CANN host buffer types in the GGML backend.
|
|
1237
|
+
*
|
|
1238
|
+
* Provides function pointers for allocating, querying properties, and managing
|
|
1239
|
+
* memory for CANN buffer types in the GGML backend.
|
|
1240
|
+
*/
|
|
1241
|
+
ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type() {
|
|
1242
|
+
static struct ggml_backend_buffer_type ggml_backend_cann_buffer_type_host = {
|
|
1243
|
+
/* .iface = */ {
|
|
1244
|
+
/* .get_name = */ ggml_backend_cann_host_buffer_type_name,
|
|
1245
|
+
/* .alloc_buffer = */ ggml_backend_cann_host_buffer_type_alloc_buffer,
|
|
1246
|
+
/* .get_alignment = */ ggml_backend_cpu_buffer_type()->iface.get_alignment,
|
|
1247
|
+
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
|
|
1248
|
+
/* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
|
|
1249
|
+
/* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
|
|
1250
|
+
},
|
|
1251
|
+
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_cann_reg(), 0),
|
|
1252
|
+
/* .context = */ nullptr,
|
|
1253
|
+
};
|
|
1254
|
+
|
|
1255
|
+
return &ggml_backend_cann_buffer_type_host;
|
|
1256
|
+
}
|
|
1257
|
+
|
|
1228
1258
|
/**
|
|
1229
1259
|
* @brief Computes the forward operation for a given tensor using CANN
|
|
1230
1260
|
* operations.
|
|
@@ -1388,7 +1418,7 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx,
|
|
|
1388
1418
|
* @param backend Pointer to the CANN backend structure.
|
|
1389
1419
|
* @return A pointer to a constant string representing the backend name.
|
|
1390
1420
|
*/
|
|
1391
|
-
|
|
1421
|
+
static const char* ggml_backend_cann_name(ggml_backend_t backend) {
|
|
1392
1422
|
ggml_backend_cann_context* cann_ctx =
|
|
1393
1423
|
(ggml_backend_cann_context*)backend->context;
|
|
1394
1424
|
|
|
@@ -1403,7 +1433,7 @@ GGML_CALL static const char* ggml_backend_cann_name(ggml_backend_t backend) {
|
|
|
1403
1433
|
*
|
|
1404
1434
|
* @param backend Pointer to the CANN backend structure to be freed.
|
|
1405
1435
|
*/
|
|
1406
|
-
|
|
1436
|
+
static void ggml_backend_cann_free(ggml_backend_t backend) {
|
|
1407
1437
|
ggml_backend_cann_context* cann_ctx =
|
|
1408
1438
|
(ggml_backend_cann_context*)backend->context;
|
|
1409
1439
|
ACL_CHECK(aclrtSynchronizeDevice());
|
|
@@ -1418,24 +1448,6 @@ GGML_CALL static void ggml_backend_cann_free(ggml_backend_t backend) {
|
|
|
1418
1448
|
delete backend;
|
|
1419
1449
|
}
|
|
1420
1450
|
|
|
1421
|
-
/**
|
|
1422
|
-
* @brief Retrieves the default buffer type associated with the CANN backend.
|
|
1423
|
-
*
|
|
1424
|
-
* This function returns the buffer type specific to the device associated
|
|
1425
|
-
* with the CANN backend. It is used to allocate buffers for computations
|
|
1426
|
-
* performed by the backend.
|
|
1427
|
-
*
|
|
1428
|
-
* @param backend Pointer to the CANN backend structure.
|
|
1429
|
-
* @return Pointer to the buffer type structure for the CANN backend.
|
|
1430
|
-
*/
|
|
1431
|
-
GGML_CALL static ggml_backend_buffer_type_t
|
|
1432
|
-
ggml_backend_cann_get_default_buffer_type(ggml_backend_t backend) {
|
|
1433
|
-
ggml_backend_cann_context* cann_ctx =
|
|
1434
|
-
(ggml_backend_cann_context*)backend->context;
|
|
1435
|
-
|
|
1436
|
-
return ggml_backend_cann_buffer_type(cann_ctx->device);
|
|
1437
|
-
}
|
|
1438
|
-
|
|
1439
1451
|
/**
|
|
1440
1452
|
* @brief Sets tensor data asynchronously in the CANN backend.
|
|
1441
1453
|
*
|
|
@@ -1449,43 +1461,35 @@ ggml_backend_cann_get_default_buffer_type(ggml_backend_t backend) {
|
|
|
1449
1461
|
* @param offset Offset in bytes within the host data.
|
|
1450
1462
|
* @param size Size of the data to copy in bytes.
|
|
1451
1463
|
*/
|
|
1452
|
-
|
|
1453
|
-
|
|
1454
|
-
|
|
1455
|
-
|
|
1456
|
-
|
|
1457
|
-
ggml_backend_cann_context*
|
|
1458
|
-
(ggml_backend_cann_context*)backend->context;
|
|
1464
|
+
static void ggml_backend_cann_set_tensor_async(ggml_backend_t backend,
|
|
1465
|
+
ggml_tensor *tensor,
|
|
1466
|
+
const void *data,
|
|
1467
|
+
size_t offset,
|
|
1468
|
+
size_t size) {
|
|
1469
|
+
ggml_backend_cann_context *cann_ctx =
|
|
1470
|
+
(ggml_backend_cann_context *)backend->context;
|
|
1459
1471
|
|
|
1460
1472
|
if (!need_transform(tensor->type)) {
|
|
1473
|
+
ACL_CHECK(aclrtMemcpyAsync((char *)tensor->data + offset, size, data,
|
|
1474
|
+
size, ACL_MEMCPY_HOST_TO_DEVICE,
|
|
1475
|
+
cann_ctx->stream()));
|
|
1476
|
+
} else {
|
|
1477
|
+
void *transform_buffer = malloc(size);
|
|
1478
|
+
ggml_backend_cann_transform(tensor, data, transform_buffer);
|
|
1479
|
+
|
|
1461
1480
|
ACL_CHECK(aclrtMemcpyAsync(
|
|
1462
|
-
|
|
1481
|
+
(char *)tensor->data + offset, size, transform_buffer, size,
|
|
1463
1482
|
ACL_MEMCPY_HOST_TO_DEVICE, cann_ctx->stream()));
|
|
1464
|
-
} else {
|
|
1465
|
-
void* transform_buffer = malloc(size);
|
|
1466
|
-
ggml_backend_cann_transform(tensor, (const char*)data + offset,
|
|
1467
|
-
transform_buffer);
|
|
1468
|
-
|
|
1469
|
-
#ifndef NDEBUG
|
|
1470
|
-
void* check_buffer = malloc(size);
|
|
1471
|
-
ggml_backend_cann_transform_back(tensor, transform_buffer,
|
|
1472
|
-
check_buffer);
|
|
1473
|
-
GGML_ASSERT(memcmp((const char*)data + offset, check_buffer, size));
|
|
1474
|
-
free(check_buffer);
|
|
1475
|
-
#endif
|
|
1476
|
-
ACL_CHECK(aclrtMemcpyAsync(tensor->data, size, transform_buffer, size,
|
|
1477
|
-
ACL_MEMCPY_HOST_TO_DEVICE,
|
|
1478
|
-
cann_ctx->stream()));
|
|
1479
1483
|
ACL_CHECK(aclrtSynchronizeStream(cann_ctx->stream()));
|
|
1480
1484
|
free(transform_buffer);
|
|
1481
1485
|
}
|
|
1482
1486
|
}
|
|
1483
1487
|
|
|
1484
|
-
|
|
1485
|
-
ggml_backend_t backend, const ggml_tensor*
|
|
1488
|
+
static void ggml_backend_cann_get_tensor_async(
|
|
1489
|
+
ggml_backend_t backend, const ggml_tensor *tensor, void *data,
|
|
1486
1490
|
size_t offset, size_t size) {
|
|
1487
|
-
ggml_backend_cann_context*
|
|
1488
|
-
(ggml_backend_cann_context*)backend->context;
|
|
1491
|
+
ggml_backend_cann_context *cann_ctx =
|
|
1492
|
+
(ggml_backend_cann_context *)backend->context;
|
|
1489
1493
|
ggml_backend_buffer_t buf =
|
|
1490
1494
|
tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
|
|
1491
1495
|
|
|
@@ -1493,17 +1497,16 @@ GGML_CALL static void ggml_backend_cann_get_tensor_async(
|
|
|
1493
1497
|
"unsupported buffer type");
|
|
1494
1498
|
|
|
1495
1499
|
if (!need_transform(tensor->type)) {
|
|
1496
|
-
ACL_CHECK(aclrtMemcpyAsync((char*)data + offset,
|
|
1500
|
+
ACL_CHECK(aclrtMemcpyAsync(data, size, (char *)tensor->data + offset,
|
|
1497
1501
|
size, ACL_MEMCPY_DEVICE_TO_HOST,
|
|
1498
1502
|
cann_ctx->stream()));
|
|
1499
1503
|
} else {
|
|
1500
|
-
void*
|
|
1501
|
-
ACL_CHECK(aclrtMemcpyAsync(
|
|
1502
|
-
|
|
1503
|
-
|
|
1504
|
+
void *transform_buffer = malloc(size);
|
|
1505
|
+
ACL_CHECK(aclrtMemcpyAsync(
|
|
1506
|
+
transform_buffer, size, (char *)tensor->data + offset, size,
|
|
1507
|
+
ACL_MEMCPY_DEVICE_TO_HOST, cann_ctx->stream()));
|
|
1504
1508
|
ACL_CHECK(aclrtSynchronizeStream(cann_ctx->stream()));
|
|
1505
|
-
ggml_backend_cann_transform_back(tensor, transform_buffer,
|
|
1506
|
-
(char*)data + offset);
|
|
1509
|
+
ggml_backend_cann_transform_back(tensor, transform_buffer, data);
|
|
1507
1510
|
free(transform_buffer);
|
|
1508
1511
|
}
|
|
1509
1512
|
}
|
|
@@ -1521,7 +1524,7 @@ GGML_CALL static void ggml_backend_cann_get_tensor_async(
|
|
|
1521
1524
|
* @param dst Pointer to the destination tensor to copy data to.
|
|
1522
1525
|
* @return true if the copy operation succeeds, false otherwise.
|
|
1523
1526
|
*/
|
|
1524
|
-
|
|
1527
|
+
static bool ggml_backend_cann_cpy_tensor_async(
|
|
1525
1528
|
ggml_backend_t backend_src, ggml_backend_t backend_dst,
|
|
1526
1529
|
const ggml_tensor* src, ggml_tensor* dst) {
|
|
1527
1530
|
GGML_ASSERT(ggml_backend_is_cann(backend_src) ||
|
|
@@ -1589,7 +1592,7 @@ GGML_CALL static bool ggml_backend_cann_cpy_tensor_async(
|
|
|
1589
1592
|
*
|
|
1590
1593
|
* @param backend Pointer to the CANN backend structure to synchronize.
|
|
1591
1594
|
*/
|
|
1592
|
-
|
|
1595
|
+
static void ggml_backend_cann_synchronize(ggml_backend_t backend) {
|
|
1593
1596
|
ggml_backend_cann_context* cann_ctx =
|
|
1594
1597
|
(ggml_backend_cann_context*)backend->context;
|
|
1595
1598
|
|
|
@@ -1610,7 +1613,7 @@ GGML_CALL static void ggml_backend_cann_synchronize(ggml_backend_t backend) {
|
|
|
1610
1613
|
* @return enum ggml_status Returns GGML_STATUS_SUCCESS if computation
|
|
1611
1614
|
* completes successfully, otherwise an appropriate error status.
|
|
1612
1615
|
*/
|
|
1613
|
-
|
|
1616
|
+
static enum ggml_status ggml_backend_cann_graph_compute(
|
|
1614
1617
|
ggml_backend_t backend, ggml_cgraph* cgraph) {
|
|
1615
1618
|
ggml_backend_cann_context* cann_ctx =
|
|
1616
1619
|
(ggml_backend_cann_context*)backend->context;
|
|
@@ -1627,7 +1630,7 @@ GGML_CALL static enum ggml_status ggml_backend_cann_graph_compute(
|
|
|
1627
1630
|
bool ok = ggml_cann_compute_forward(*cann_ctx, node);
|
|
1628
1631
|
|
|
1629
1632
|
if (!ok) {
|
|
1630
|
-
|
|
1633
|
+
GGML_LOG_ERROR("%s: error: op not supported %s (%s)\n", __func__,
|
|
1631
1634
|
node->name, ggml_op_name(node->op));
|
|
1632
1635
|
}
|
|
1633
1636
|
GGML_ASSERT(ok);
|
|
@@ -1648,7 +1651,7 @@ GGML_CALL static enum ggml_status ggml_backend_cann_graph_compute(
|
|
|
1648
1651
|
* @return bool Returns true if the operation is supported by the backend,
|
|
1649
1652
|
* otherwise false.
|
|
1650
1653
|
*/
|
|
1651
|
-
|
|
1654
|
+
static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
|
|
1652
1655
|
const ggml_tensor* op) {
|
|
1653
1656
|
switch (op->op) {
|
|
1654
1657
|
case GGML_OP_UNARY:
|
|
@@ -1666,10 +1669,13 @@ GGML_CALL static bool ggml_backend_cann_supports_op(ggml_backend_t backend,
|
|
|
1666
1669
|
}
|
|
1667
1670
|
case GGML_OP_MUL_MAT: {
|
|
1668
1671
|
switch (op->src[0]->type) {
|
|
1669
|
-
// case GGML_TYPE_Q4_0:
|
|
1670
1672
|
case GGML_TYPE_F16:
|
|
1671
1673
|
case GGML_TYPE_F32:
|
|
1672
1674
|
case GGML_TYPE_Q8_0:
|
|
1675
|
+
// TODO: fix me
|
|
1676
|
+
// Current groupsize should not be greater than k-1 in
|
|
1677
|
+
// aclnnWeightQuantBatchMatmulV2GetWorkspaceSize().
|
|
1678
|
+
case GGML_TYPE_Q4_0:
|
|
1673
1679
|
return true;
|
|
1674
1680
|
default:
|
|
1675
1681
|
return false;
|
|
@@ -1694,6 +1700,7 @@ GGML_CALL static bool ggml_backend_cann_supports_op(ggml_backend_t backend,
|
|
|
1694
1700
|
case GGML_TYPE_F32:
|
|
1695
1701
|
case GGML_TYPE_F16:
|
|
1696
1702
|
case GGML_TYPE_Q8_0:
|
|
1703
|
+
case GGML_TYPE_Q4_0:
|
|
1697
1704
|
return true;
|
|
1698
1705
|
default:
|
|
1699
1706
|
return false;
|
|
@@ -1735,7 +1742,7 @@ GGML_CALL static bool ggml_backend_cann_supports_op(ggml_backend_t backend,
|
|
|
1735
1742
|
return false;
|
|
1736
1743
|
}
|
|
1737
1744
|
|
|
1738
|
-
GGML_UNUSED(
|
|
1745
|
+
GGML_UNUSED(dev);
|
|
1739
1746
|
}
|
|
1740
1747
|
|
|
1741
1748
|
/**
|
|
@@ -1753,31 +1760,6 @@ static bool ggml_backend_buft_is_cann(ggml_backend_buffer_type_t buft) {
|
|
|
1753
1760
|
return buft->iface.get_name == ggml_backend_cann_buffer_type_name;
|
|
1754
1761
|
}
|
|
1755
1762
|
|
|
1756
|
-
/**
|
|
1757
|
-
* @brief Checks if the CANN backend supports a specific backend buffer type.
|
|
1758
|
-
*
|
|
1759
|
-
* This function determines whether the CANN backend supports the given backend
|
|
1760
|
-
* buffer type by comparing the device context of the backend and buffer type.
|
|
1761
|
-
* It returns true if the devices are same between the backend context and
|
|
1762
|
-
* buffer type context.
|
|
1763
|
-
*
|
|
1764
|
-
* @param backend Pointer to the CANN backend.
|
|
1765
|
-
* @param buft Pointer to the backend buffer type to check.
|
|
1766
|
-
* @return bool Returns true if the CANN backend supports the buffer type,
|
|
1767
|
-
* otherwise false.
|
|
1768
|
-
*/
|
|
1769
|
-
GGML_CALL static bool ggml_backend_cann_supports_buft(
|
|
1770
|
-
ggml_backend_t backend, ggml_backend_buffer_type_t buft) {
|
|
1771
|
-
if (ggml_backend_buft_is_cann(buft)) {
|
|
1772
|
-
ggml_backend_cann_context * cann_ctx =
|
|
1773
|
-
(ggml_backend_cann_context *)backend->context;
|
|
1774
|
-
ggml_backend_cann_buffer_type_context * buft_ctx =
|
|
1775
|
-
(ggml_backend_cann_buffer_type_context *)buft->context;
|
|
1776
|
-
return buft_ctx->device == cann_ctx->device;
|
|
1777
|
-
}
|
|
1778
|
-
return false;
|
|
1779
|
-
}
|
|
1780
|
-
|
|
1781
1763
|
/**
|
|
1782
1764
|
* @brief Determines if a tensor operation should be offloaded to the CANN
|
|
1783
1765
|
* backend.
|
|
@@ -1792,54 +1774,14 @@ GGML_CALL static bool ggml_backend_cann_supports_buft(
|
|
|
1792
1774
|
* @return bool Returns true if the operation should be offloaded, otherwise
|
|
1793
1775
|
* false.
|
|
1794
1776
|
*/
|
|
1795
|
-
|
|
1777
|
+
static bool ggml_backend_cann_offload_op(ggml_backend_dev_t dev,
|
|
1796
1778
|
const ggml_tensor* op) {
|
|
1797
1779
|
const int min_batch_size = 32;
|
|
1798
|
-
GGML_UNUSED(
|
|
1780
|
+
GGML_UNUSED(dev);
|
|
1799
1781
|
|
|
1800
1782
|
return op->ne[1] >= min_batch_size && op->op != GGML_OP_GET_ROWS;
|
|
1801
1783
|
}
|
|
1802
1784
|
|
|
1803
|
-
/**
|
|
1804
|
-
* @brief Creates a new event for the CANN backend.
|
|
1805
|
-
*
|
|
1806
|
-
* This function initializes a new event for the CANN backend by setting the
|
|
1807
|
-
* device and creating an ACL runtime event. The created event is then wrapped
|
|
1808
|
-
* in a ggml_backend_event structure and returned.
|
|
1809
|
-
*
|
|
1810
|
-
* @param backend Pointer to the CANN backend.
|
|
1811
|
-
* @return ggml_backend_event_t Returns a pointer to the new event structure.
|
|
1812
|
-
*/
|
|
1813
|
-
static ggml_backend_event_t ggml_backend_cann_event_new(
|
|
1814
|
-
ggml_backend_t backend) {
|
|
1815
|
-
ggml_backend_cann_context* cann_ctx =
|
|
1816
|
-
(ggml_backend_cann_context*)backend->context;
|
|
1817
|
-
|
|
1818
|
-
ggml_cann_set_device(cann_ctx->device);
|
|
1819
|
-
|
|
1820
|
-
aclrtEvent event;
|
|
1821
|
-
ACL_CHECK(aclrtCreateEvent(&event));
|
|
1822
|
-
|
|
1823
|
-
return new ggml_backend_event{
|
|
1824
|
-
/* .backend = */ backend,
|
|
1825
|
-
/* .context = */ event,
|
|
1826
|
-
};
|
|
1827
|
-
}
|
|
1828
|
-
|
|
1829
|
-
/**
|
|
1830
|
-
* @brief Frees a CANN backend event.
|
|
1831
|
-
*
|
|
1832
|
-
* This function destroys the ACL runtime event associated with the given CANN
|
|
1833
|
-
* backend event and then deletes the event structure itself.
|
|
1834
|
-
*
|
|
1835
|
-
* @param event Pointer to the event structure to be freed.
|
|
1836
|
-
*/
|
|
1837
|
-
static void ggml_backend_cann_event_free(ggml_backend_event_t event) {
|
|
1838
|
-
ACL_CHECK(aclrtDestroyEvent((aclrtEvent)event->context));
|
|
1839
|
-
|
|
1840
|
-
delete event;
|
|
1841
|
-
}
|
|
1842
|
-
|
|
1843
1785
|
/**
|
|
1844
1786
|
* @brief Records an event on the CANN backend stream.
|
|
1845
1787
|
*
|
|
@@ -1848,10 +1790,9 @@ static void ggml_backend_cann_event_free(ggml_backend_event_t event) {
|
|
|
1848
1790
|
*
|
|
1849
1791
|
* @param event Pointer to the event structure to be recorded.
|
|
1850
1792
|
*/
|
|
1851
|
-
static void ggml_backend_cann_event_record(ggml_backend_event_t event) {
|
|
1793
|
+
static void ggml_backend_cann_event_record(ggml_backend_t backend, ggml_backend_event_t event) {
|
|
1852
1794
|
ggml_backend_cann_context* cann_ctx =
|
|
1853
|
-
(ggml_backend_cann_context*)
|
|
1854
|
-
|
|
1795
|
+
(ggml_backend_cann_context*)backend->context;
|
|
1855
1796
|
ACL_CHECK(aclrtRecordEvent((aclrtEvent)event->context, cann_ctx->stream()));
|
|
1856
1797
|
}
|
|
1857
1798
|
|
|
@@ -1869,8 +1810,7 @@ static void ggml_backend_cann_event_wait(ggml_backend_t backend,
|
|
|
1869
1810
|
ggml_backend_event_t event) {
|
|
1870
1811
|
ggml_backend_cann_context* cann_ctx =
|
|
1871
1812
|
(ggml_backend_cann_context*)backend->context;
|
|
1872
|
-
|
|
1873
|
-
if (ggml_backend_is_cann(event->backend)) {
|
|
1813
|
+
if (ggml_backend_is_cann(backend)) {
|
|
1874
1814
|
ACL_CHECK(aclrtStreamWaitEvent(cann_ctx->stream(),
|
|
1875
1815
|
(aclrtEvent)event->context));
|
|
1876
1816
|
} else {
|
|
@@ -1878,17 +1818,6 @@ static void ggml_backend_cann_event_wait(ggml_backend_t backend,
|
|
|
1878
1818
|
}
|
|
1879
1819
|
}
|
|
1880
1820
|
|
|
1881
|
-
/**
|
|
1882
|
-
* @brief Synchronizes the given event on the CANN backend.
|
|
1883
|
-
*
|
|
1884
|
-
* This function waits for the specified event to complete on the ACL runtime.
|
|
1885
|
-
*
|
|
1886
|
-
* @param event Pointer to the event structure to be synchronized.
|
|
1887
|
-
*/
|
|
1888
|
-
static void ggml_backend_cann_event_synchronize(ggml_backend_event_t event) {
|
|
1889
|
-
ACL_CHECK(aclrtSynchronizeEvent((aclrtEvent)event->context));
|
|
1890
|
-
}
|
|
1891
|
-
|
|
1892
1821
|
/**
|
|
1893
1822
|
* @brief Structure defining the interface for the CANN backend.
|
|
1894
1823
|
*
|
|
@@ -1896,10 +1825,9 @@ static void ggml_backend_cann_event_synchronize(ggml_backend_event_t event) {
|
|
|
1896
1825
|
* supported by the CANN backend, including name retrieval, memory
|
|
1897
1826
|
* management, tensor operations, synchronization, and event handling.
|
|
1898
1827
|
*/
|
|
1899
|
-
static ggml_backend_i ggml_backend_cann_interface = {
|
|
1828
|
+
static const ggml_backend_i ggml_backend_cann_interface = {
|
|
1900
1829
|
/* .get_name = */ ggml_backend_cann_name,
|
|
1901
1830
|
/* .free = */ ggml_backend_cann_free,
|
|
1902
|
-
/* .get_default_buffer_type = */ ggml_backend_cann_get_default_buffer_type,
|
|
1903
1831
|
/* .set_tensor_async = */ ggml_backend_cann_set_tensor_async,
|
|
1904
1832
|
/* .get_tensor_async = */ ggml_backend_cann_get_tensor_async,
|
|
1905
1833
|
/* .cpy_tensor_async = */ ggml_backend_cann_cpy_tensor_async,
|
|
@@ -1909,14 +1837,8 @@ static ggml_backend_i ggml_backend_cann_interface = {
|
|
|
1909
1837
|
/* .graph_plan_update = */ NULL,
|
|
1910
1838
|
/* .graph_plan_compute = */ NULL,
|
|
1911
1839
|
/* .graph_compute = */ ggml_backend_cann_graph_compute,
|
|
1912
|
-
/* .supports_op = */ ggml_backend_cann_supports_op,
|
|
1913
|
-
/* .supports_buft = */ ggml_backend_cann_supports_buft,
|
|
1914
|
-
/* .offload_op = */ ggml_backend_cann_offload_op,
|
|
1915
|
-
/* .event_new = */ ggml_backend_cann_event_new,
|
|
1916
|
-
/* .event_free = */ ggml_backend_cann_event_free,
|
|
1917
1840
|
/* .event_record = */ ggml_backend_cann_event_record,
|
|
1918
1841
|
/* .event_wait = */ ggml_backend_cann_event_wait,
|
|
1919
|
-
/* .event_synchronize = */ ggml_backend_cann_event_synchronize,
|
|
1920
1842
|
};
|
|
1921
1843
|
|
|
1922
1844
|
/**
|
|
@@ -1933,91 +1855,274 @@ static ggml_guid_t ggml_backend_cann_guid() {
|
|
|
1933
1855
|
return &guid;
|
|
1934
1856
|
}
|
|
1935
1857
|
|
|
1936
|
-
|
|
1858
|
+
// backend device
|
|
1859
|
+
struct ggml_backend_cann_device_context {
|
|
1860
|
+
int device;
|
|
1861
|
+
std::string name;
|
|
1862
|
+
std::string description;
|
|
1863
|
+
};
|
|
1864
|
+
|
|
1865
|
+
static const char * ggml_backend_cann_device_get_name(ggml_backend_dev_t dev) {
|
|
1866
|
+
ggml_backend_cann_device_context * ctx = (ggml_backend_cann_device_context *)dev->context;
|
|
1867
|
+
return ctx->name.c_str();
|
|
1868
|
+
}
|
|
1869
|
+
|
|
1870
|
+
static const char* ggml_backend_cann_device_get_description(ggml_backend_dev_t dev) {
|
|
1871
|
+
ggml_backend_cann_device_context * ctx = (ggml_backend_cann_device_context *)dev->context;
|
|
1872
|
+
return ctx->description.c_str();
|
|
1873
|
+
}
|
|
1874
|
+
|
|
1875
|
+
static void ggml_backend_cann_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
|
|
1876
|
+
ggml_backend_cann_device_context * ctx = (ggml_backend_cann_device_context *)dev->context;
|
|
1877
|
+
ggml_backend_cann_get_device_memory(ctx->device, free, total);
|
|
1878
|
+
}
|
|
1879
|
+
|
|
1880
|
+
static enum ggml_backend_dev_type ggml_backend_cann_device_get_type(ggml_backend_dev_t dev) {
|
|
1881
|
+
GGML_UNUSED(dev);
|
|
1882
|
+
return GGML_BACKEND_DEVICE_TYPE_GPU;
|
|
1883
|
+
}
|
|
1884
|
+
|
|
1885
|
+
static void ggml_backend_cann_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
|
|
1886
|
+
props->name = ggml_backend_cann_device_get_name(dev);
|
|
1887
|
+
props->description = ggml_backend_cann_device_get_description(dev);
|
|
1888
|
+
props->type = ggml_backend_cann_device_get_type(dev);
|
|
1889
|
+
ggml_backend_cann_device_get_memory(dev, &props->memory_free, &props->memory_total);
|
|
1890
|
+
|
|
1891
|
+
bool host_buffer = getenv("GGML_CANN_NO_PINNED") == nullptr;
|
|
1892
|
+
|
|
1893
|
+
props->caps = {
|
|
1894
|
+
/* .async = */ false,
|
|
1895
|
+
/* .host_buffer = */ host_buffer,
|
|
1896
|
+
/* .buffer_from_host_ptr = */ false,
|
|
1897
|
+
/* .events = */ true,
|
|
1898
|
+
};
|
|
1899
|
+
}
|
|
1900
|
+
|
|
1901
|
+
static ggml_backend_t ggml_backend_cann_device_init(ggml_backend_dev_t dev, const char * params) {
|
|
1902
|
+
GGML_UNUSED(params);
|
|
1903
|
+
ggml_backend_cann_device_context * ctx = (ggml_backend_cann_device_context *)dev->context;
|
|
1904
|
+
return ggml_backend_cann_init(ctx->device);
|
|
1905
|
+
}
|
|
1906
|
+
|
|
1907
|
+
/**
|
|
1908
|
+
* @brief Checks if the CANN backend supports a specific backend buffer type.
|
|
1909
|
+
*
|
|
1910
|
+
* This function determines whether the CANN backend supports the given backend
|
|
1911
|
+
* buffer type by comparing the device context of the backend and buffer type.
|
|
1912
|
+
* It returns true if the devices are same between the backend context and
|
|
1913
|
+
* buffer type context.
|
|
1914
|
+
*
|
|
1915
|
+
* @param backend Pointer to the CANN backend.
|
|
1916
|
+
* @param buft Pointer to the backend buffer type to check.
|
|
1917
|
+
* @return bool Returns true if the CANN backend supports the buffer type,
|
|
1918
|
+
* otherwise false.
|
|
1919
|
+
*/
|
|
1920
|
+
static bool ggml_backend_cann_supports_buft(
|
|
1921
|
+
ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
|
|
1922
|
+
if (ggml_backend_buft_is_cann(buft)) {
|
|
1923
|
+
ggml_backend_cann_device_context * dev_ctx = (ggml_backend_cann_device_context *)dev->context;
|
|
1924
|
+
ggml_backend_cann_buffer_type_context * buft_ctx =
|
|
1925
|
+
(ggml_backend_cann_buffer_type_context *)buft->context;
|
|
1926
|
+
return buft_ctx->device == dev_ctx->device;
|
|
1927
|
+
}
|
|
1928
|
+
return false;
|
|
1929
|
+
}
|
|
1930
|
+
|
|
1931
|
+
static ggml_backend_buffer_type_t ggml_backend_cann_device_get_buffer_type(ggml_backend_dev_t dev) {
|
|
1932
|
+
ggml_backend_cann_device_context * ctx = (ggml_backend_cann_device_context *)dev->context;
|
|
1933
|
+
return ggml_backend_cann_buffer_type(ctx->device);
|
|
1934
|
+
}
|
|
1935
|
+
|
|
1936
|
+
static ggml_backend_buffer_type_t ggml_backend_cann_device_get_host_buffer_type(ggml_backend_dev_t dev) {
|
|
1937
|
+
GGML_UNUSED(dev);
|
|
1938
|
+
return ggml_backend_cann_host_buffer_type();
|
|
1939
|
+
}
|
|
1940
|
+
|
|
1941
|
+
/**
|
|
1942
|
+
* @brief Creates a new event for the CANN backend device.
|
|
1943
|
+
*
|
|
1944
|
+
* This function initializes a new event for the CANN backend by setting the
|
|
1945
|
+
* device and creating an ACL runtime event. The created event is then wrapped
|
|
1946
|
+
* in a ggml_backend_event structure and returned.
|
|
1947
|
+
*
|
|
1948
|
+
* @param backend Pointer to the CANN backend.
|
|
1949
|
+
* @return ggml_backend_event_t Returns a pointer to the new event structure.
|
|
1950
|
+
*/
|
|
1951
|
+
static ggml_backend_event_t ggml_backend_cann_device_event_new(
|
|
1952
|
+
ggml_backend_dev_t dev) {
|
|
1953
|
+
ggml_backend_cann_device_context * dev_ctx = (ggml_backend_cann_device_context *)dev->context;
|
|
1954
|
+
|
|
1955
|
+
ggml_cann_set_device(dev_ctx->device);
|
|
1956
|
+
|
|
1957
|
+
aclrtEvent event;
|
|
1958
|
+
ACL_CHECK(aclrtCreateEvent(&event));
|
|
1959
|
+
|
|
1960
|
+
return new ggml_backend_event{
|
|
1961
|
+
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_cann_reg(), dev_ctx->device),
|
|
1962
|
+
/* .context = */ event,
|
|
1963
|
+
};
|
|
1964
|
+
}
|
|
1965
|
+
|
|
1966
|
+
/**
|
|
1967
|
+
* @brief Frees a CANN backend event.
|
|
1968
|
+
*
|
|
1969
|
+
* This function destroys the ACL runtime event associated with the given CANN
|
|
1970
|
+
* backend event and then deletes the event structure itself.
|
|
1971
|
+
*
|
|
1972
|
+
* @param event Pointer to the event structure to be freed.
|
|
1973
|
+
*/
|
|
1974
|
+
static void ggml_backend_cann_device_event_free(ggml_backend_dev_t dev, ggml_backend_event_t event) {
|
|
1975
|
+
ACL_CHECK(aclrtDestroyEvent((aclrtEvent)event->context));
|
|
1976
|
+
|
|
1977
|
+
delete event;
|
|
1978
|
+
GGML_UNUSED(dev);
|
|
1979
|
+
}
|
|
1980
|
+
|
|
1981
|
+
/**
|
|
1982
|
+
* @brief Synchronizes the given event on the CANN backend.
|
|
1983
|
+
*
|
|
1984
|
+
* This function waits for the specified event to complete on the ACL runtime.
|
|
1985
|
+
*
|
|
1986
|
+
* @param event Pointer to the event structure to be synchronized.
|
|
1987
|
+
*/
|
|
1988
|
+
static void ggml_backend_cann_device_event_synchronize(ggml_backend_dev_t dev, ggml_backend_event_t event) {
|
|
1989
|
+
ACL_CHECK(aclrtSynchronizeEvent((aclrtEvent)event->context));
|
|
1990
|
+
|
|
1991
|
+
GGML_UNUSED(dev);
|
|
1992
|
+
}
|
|
1993
|
+
|
|
1994
|
+
static const ggml_backend_device_i ggml_backend_cann_device_interface = {
|
|
1995
|
+
/* .get_name = */ ggml_backend_cann_device_get_name,
|
|
1996
|
+
/* .get_description = */ ggml_backend_cann_device_get_description,
|
|
1997
|
+
/* .get_memory = */ ggml_backend_cann_device_get_memory,
|
|
1998
|
+
/* .get_type = */ ggml_backend_cann_device_get_type,
|
|
1999
|
+
/* .get_props = */ ggml_backend_cann_device_get_props,
|
|
2000
|
+
/* .init_backend = */ ggml_backend_cann_device_init, // called for every card
|
|
2001
|
+
/* .get_buffer_type = */ ggml_backend_cann_device_get_buffer_type,
|
|
2002
|
+
/* .get_host_buffer_type = */ ggml_backend_cann_device_get_host_buffer_type,
|
|
2003
|
+
/* .buffer_from_host_ptr = */ NULL, // not supported for CANN
|
|
2004
|
+
/* .supports_op = */ ggml_backend_cann_supports_op,
|
|
2005
|
+
/* .supports_buft = */ ggml_backend_cann_supports_buft,
|
|
2006
|
+
/* .offload_op = */ ggml_backend_cann_offload_op,
|
|
2007
|
+
/* .event_new = */ ggml_backend_cann_device_event_new,
|
|
2008
|
+
/* .event_free = */ ggml_backend_cann_device_event_free,
|
|
2009
|
+
/* .event_synchronize = */ ggml_backend_cann_device_event_synchronize,
|
|
2010
|
+
};
|
|
2011
|
+
|
|
2012
|
+
|
|
2013
|
+
// backend reg
|
|
2014
|
+
struct ggml_backend_cann_reg_context {
|
|
2015
|
+
std::vector<ggml_backend_dev_t> devices;
|
|
2016
|
+
};
|
|
2017
|
+
|
|
2018
|
+
static const char * ggml_backend_cann_reg_get_name(ggml_backend_reg_t reg) {
|
|
2019
|
+
GGML_UNUSED(reg);
|
|
2020
|
+
return GGML_CANN_NAME;
|
|
2021
|
+
}
|
|
2022
|
+
|
|
2023
|
+
static size_t ggml_backend_cann_reg_get_device_count(ggml_backend_reg_t reg) {
|
|
2024
|
+
ggml_backend_cann_reg_context * ctx = (ggml_backend_cann_reg_context *)reg->context;
|
|
2025
|
+
return ctx->devices.size();
|
|
2026
|
+
}
|
|
2027
|
+
|
|
2028
|
+
static ggml_backend_dev_t ggml_backend_cann_reg_get_device(ggml_backend_reg_t reg, size_t index) {
|
|
2029
|
+
ggml_backend_cann_reg_context * ctx = (ggml_backend_cann_reg_context *)reg->context;
|
|
2030
|
+
GGML_ASSERT(index < ctx->devices.size());
|
|
2031
|
+
return ctx->devices[index];
|
|
2032
|
+
}
|
|
2033
|
+
|
|
2034
|
+
static void * ggml_backend_cann_reg_get_proc_address(ggml_backend_reg_t reg, const char * name) {
|
|
2035
|
+
GGML_UNUSED(reg);
|
|
2036
|
+
GGML_UNUSED(name);
|
|
2037
|
+
// reserved for future use
|
|
2038
|
+
return nullptr;
|
|
2039
|
+
}
|
|
2040
|
+
|
|
2041
|
+
static const ggml_backend_reg_i ggml_backend_cann_reg_interface = {
|
|
2042
|
+
/* .get_name = */ ggml_backend_cann_reg_get_name,
|
|
2043
|
+
/* .get_device_count = */ ggml_backend_cann_reg_get_device_count,
|
|
2044
|
+
/* .get_device_get = */ ggml_backend_cann_reg_get_device,
|
|
2045
|
+
/* .get_proc_address = */ ggml_backend_cann_reg_get_proc_address,
|
|
2046
|
+
};
|
|
2047
|
+
|
|
2048
|
+
// backend registry, called only once for cann backend
|
|
2049
|
+
ggml_backend_reg_t ggml_backend_cann_reg() {
|
|
2050
|
+
static ggml_backend_reg reg;
|
|
2051
|
+
static bool initialized = false;
|
|
2052
|
+
|
|
2053
|
+
{
|
|
2054
|
+
static std::mutex mutex;
|
|
2055
|
+
std::lock_guard<std::mutex> lock(mutex);
|
|
2056
|
+
if (!initialized) {
|
|
2057
|
+
aclInit(nullptr);
|
|
2058
|
+
ggml_backend_cann_reg_context * ctx = new ggml_backend_cann_reg_context;
|
|
2059
|
+
|
|
2060
|
+
for (int i = 0; i < ggml_cann_info().device_count; i++) {
|
|
2061
|
+
ggml_backend_cann_device_context* dev_ctx = new ggml_backend_cann_device_context();
|
|
2062
|
+
dev_ctx->description = aclrtGetSocName();
|
|
2063
|
+
dev_ctx->device = i;
|
|
2064
|
+
dev_ctx->name = GGML_CANN_NAME + std::to_string(i);
|
|
2065
|
+
ggml_cann_set_device(i);
|
|
2066
|
+
ggml_backend_dev_t dev = new ggml_backend_device {
|
|
2067
|
+
/* .interface = */ ggml_backend_cann_device_interface,
|
|
2068
|
+
/* .reg = */ ®,
|
|
2069
|
+
/* .context = */ dev_ctx
|
|
2070
|
+
};
|
|
2071
|
+
ctx->devices.push_back(dev);
|
|
2072
|
+
}
|
|
2073
|
+
|
|
2074
|
+
reg = ggml_backend_reg {
|
|
2075
|
+
/* .interface = */ ggml_backend_cann_reg_interface,
|
|
2076
|
+
/* .context = */ ctx
|
|
2077
|
+
};
|
|
2078
|
+
}
|
|
2079
|
+
|
|
2080
|
+
initialized = true;
|
|
2081
|
+
}
|
|
2082
|
+
|
|
2083
|
+
return ®
|
|
2084
|
+
}
|
|
2085
|
+
|
|
2086
|
+
ggml_backend_t ggml_backend_cann_init(int32_t device) {
|
|
1937
2087
|
aclInit(nullptr);
|
|
1938
2088
|
if (device < 0 || device >= ggml_backend_cann_get_device_count()) {
|
|
1939
|
-
|
|
2089
|
+
GGML_LOG_ERROR("%s: error: invalid device %d\n", __func__, device);
|
|
1940
2090
|
return nullptr;
|
|
1941
2091
|
}
|
|
1942
2092
|
|
|
1943
2093
|
ggml_backend_cann_context* ctx = new ggml_backend_cann_context(device);
|
|
1944
2094
|
if (ctx == nullptr) {
|
|
1945
|
-
|
|
2095
|
+
GGML_LOG_ERROR("%s: error: failed to allocate context\n", __func__);
|
|
1946
2096
|
return nullptr;
|
|
1947
2097
|
}
|
|
1948
|
-
|
|
2098
|
+
ggml_cann_set_device(ctx->device);
|
|
1949
2099
|
ggml_backend_t cann_backend =
|
|
1950
2100
|
new ggml_backend{/* .guid = */ ggml_backend_cann_guid(),
|
|
1951
2101
|
/* .interface = */ ggml_backend_cann_interface,
|
|
2102
|
+
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_cann_reg(), device),
|
|
1952
2103
|
/* .context = */ ctx};
|
|
1953
2104
|
|
|
1954
2105
|
return cann_backend;
|
|
1955
2106
|
}
|
|
1956
2107
|
|
|
1957
|
-
|
|
2108
|
+
bool ggml_backend_is_cann(ggml_backend_t backend) {
|
|
1958
2109
|
return backend != NULL &&
|
|
1959
2110
|
ggml_guid_matches(backend->guid, ggml_backend_cann_guid());
|
|
1960
2111
|
}
|
|
1961
2112
|
|
|
1962
|
-
|
|
2113
|
+
int32_t ggml_backend_cann_get_device_count() {
|
|
1963
2114
|
return ggml_cann_info().device_count;
|
|
1964
2115
|
}
|
|
1965
2116
|
|
|
1966
|
-
|
|
2117
|
+
void ggml_backend_cann_get_device_description(
|
|
1967
2118
|
int32_t device, char* description, size_t description_size) {
|
|
1968
2119
|
ggml_cann_set_device(device);
|
|
1969
2120
|
const char* soc_name = aclrtGetSocName();
|
|
1970
2121
|
snprintf(description, description_size, "%s", soc_name);
|
|
1971
2122
|
}
|
|
1972
2123
|
|
|
1973
|
-
|
|
1974
|
-
|
|
2124
|
+
void ggml_backend_cann_get_device_memory(int32_t device, size_t* free,
|
|
2125
|
+
size_t* total) {
|
|
1975
2126
|
ggml_cann_set_device(device);
|
|
1976
2127
|
ACL_CHECK(aclrtGetMemInfo(ACL_HBM_MEM, free, total));
|
|
1977
2128
|
}
|
|
1978
|
-
|
|
1979
|
-
// backend registry
|
|
1980
|
-
/**
|
|
1981
|
-
* @brief Initializes a CANN backend based on the provided parameters.
|
|
1982
|
-
*
|
|
1983
|
-
* This function initializes a CANN backend using the device index and then
|
|
1984
|
-
* initializes the backend using `ggml_backend_cann_init`.
|
|
1985
|
-
*
|
|
1986
|
-
* @param params Parameters for initialization (unused in this implementation).
|
|
1987
|
-
* @param user_data User data containing the device index to initialize the
|
|
1988
|
-
* backend.
|
|
1989
|
-
* @return ggml_backend_t The initialized CANN backend.
|
|
1990
|
-
*/
|
|
1991
|
-
GGML_CALL static ggml_backend_t ggml_backend_reg_cann_init(const char* params,
|
|
1992
|
-
void* user_data) {
|
|
1993
|
-
ggml_backend_t cann_backend =
|
|
1994
|
-
ggml_backend_cann_init((int)(intptr_t)user_data);
|
|
1995
|
-
return cann_backend;
|
|
1996
|
-
|
|
1997
|
-
GGML_UNUSED(params);
|
|
1998
|
-
}
|
|
1999
|
-
|
|
2000
|
-
extern "C" GGML_CALL int ggml_backend_cann_reg_devices();
|
|
2001
|
-
|
|
2002
|
-
/**
|
|
2003
|
-
* @brief Registers CANN (Ascend) devices as backend options.
|
|
2004
|
-
*
|
|
2005
|
-
* This function initializes ACL, retrieves the number of available CANN
|
|
2006
|
-
* devices, and registers each device as a backend option using
|
|
2007
|
-
* `ggml_backend_register`. Each device is given a unique name based on
|
|
2008
|
-
* `GGML_CANN_NAME` followed by its index.
|
|
2009
|
-
*
|
|
2010
|
-
* @return int The number of CANN devices registered.
|
|
2011
|
-
*/
|
|
2012
|
-
GGML_CALL int ggml_backend_cann_reg_devices() {
|
|
2013
|
-
uint32_t device_count = ggml_backend_cann_get_device_count();
|
|
2014
|
-
// initialization
|
|
2015
|
-
for (uint32_t i = 0; i < device_count; i++) {
|
|
2016
|
-
char name[128];
|
|
2017
|
-
snprintf(name, sizeof(name), "CANN%d", i);
|
|
2018
|
-
ggml_backend_register(name, ggml_backend_reg_cann_init,
|
|
2019
|
-
ggml_backend_cann_buffer_type(i),
|
|
2020
|
-
(void*)(intptr_t)i);
|
|
2021
|
-
}
|
|
2022
|
-
return device_count;
|
|
2023
|
-
}
|