@fugood/llama.node 0.3.2 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +7 -0
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +18 -1
- package/package.json +1 -1
- package/src/DetokenizeWorker.cpp +1 -1
- package/src/EmbeddingWorker.cpp +17 -7
- package/src/EmbeddingWorker.h +2 -1
- package/src/LlamaCompletionWorker.cpp +8 -8
- package/src/LlamaCompletionWorker.h +2 -2
- package/src/LlamaContext.cpp +89 -27
- package/src/LlamaContext.h +2 -0
- package/src/TokenizeWorker.cpp +1 -1
- package/src/common.hpp +4 -4
- package/src/llama.cpp/.github/workflows/build.yml +240 -168
- package/src/llama.cpp/.github/workflows/docker.yml +8 -8
- package/src/llama.cpp/.github/workflows/python-lint.yml +8 -1
- package/src/llama.cpp/.github/workflows/server.yml +21 -14
- package/src/llama.cpp/CMakeLists.txt +14 -6
- package/src/llama.cpp/Sources/llama/llama.h +4 -0
- package/src/llama.cpp/cmake/arm64-apple-clang.cmake +16 -0
- package/src/llama.cpp/cmake/common.cmake +33 -0
- package/src/llama.cpp/cmake/x64-windows-llvm.cmake +11 -0
- package/src/llama.cpp/common/CMakeLists.txt +6 -4
- package/src/llama.cpp/common/arg.cpp +986 -770
- package/src/llama.cpp/common/arg.h +22 -22
- package/src/llama.cpp/common/common.cpp +212 -351
- package/src/llama.cpp/common/common.h +204 -117
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
- package/src/llama.cpp/common/log.cpp +50 -50
- package/src/llama.cpp/common/log.h +18 -18
- package/src/llama.cpp/common/ngram-cache.cpp +36 -36
- package/src/llama.cpp/common/ngram-cache.h +19 -19
- package/src/llama.cpp/common/sampling.cpp +163 -121
- package/src/llama.cpp/common/sampling.h +41 -20
- package/src/llama.cpp/common/speculative.cpp +274 -0
- package/src/llama.cpp/common/speculative.h +28 -0
- package/src/llama.cpp/docs/build.md +134 -161
- package/src/llama.cpp/examples/CMakeLists.txt +33 -14
- package/src/llama.cpp/examples/batched/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/batched/batched.cpp +19 -18
- package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +10 -11
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +1 -1
- package/src/llama.cpp/examples/cvector-generator/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +9 -9
- package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +1 -1
- package/src/llama.cpp/examples/embedding/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +12 -12
- package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +3 -2
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +8 -8
- package/src/llama.cpp/examples/export-lora/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +5 -5
- package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +4 -7
- package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +7 -7
- package/src/llama.cpp/examples/gguf/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +8 -1
- package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +2 -2
- package/src/llama.cpp/examples/gritlm/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +18 -18
- package/src/llama.cpp/examples/imatrix/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +31 -13
- package/src/llama.cpp/examples/infill/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/infill/infill.cpp +41 -87
- package/src/llama.cpp/examples/llama-bench/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +439 -459
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +2 -0
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +11 -14
- package/src/llama.cpp/examples/llava/CMakeLists.txt +10 -3
- package/src/llama.cpp/examples/llava/clip.cpp +263 -66
- package/src/llama.cpp/examples/llava/clip.h +8 -2
- package/src/llama.cpp/examples/llava/llava-cli.cpp +23 -23
- package/src/llama.cpp/examples/llava/llava.cpp +83 -22
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +21 -21
- package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +581 -0
- package/src/llama.cpp/examples/lookahead/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +26 -26
- package/src/llama.cpp/examples/lookup/CMakeLists.txt +4 -4
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +7 -7
- package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +16 -15
- package/src/llama.cpp/examples/lookup/lookup.cpp +30 -30
- package/src/llama.cpp/examples/main/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/main/main.cpp +73 -114
- package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/parallel/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/parallel/parallel.cpp +18 -19
- package/src/llama.cpp/examples/passkey/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/passkey/passkey.cpp +14 -14
- package/src/llama.cpp/examples/perplexity/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +99 -120
- package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize/quantize.cpp +0 -3
- package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +10 -9
- package/src/llama.cpp/examples/retrieval/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +16 -16
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +3 -1
- package/src/llama.cpp/examples/run/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/run/run.cpp +911 -0
- package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +38 -21
- package/src/llama.cpp/examples/server/CMakeLists.txt +3 -16
- package/src/llama.cpp/examples/server/server.cpp +2073 -1339
- package/src/llama.cpp/examples/server/tests/requirements.txt +2 -2
- package/src/llama.cpp/examples/server/utils.hpp +354 -277
- package/src/llama.cpp/examples/simple/CMakeLists.txt +2 -2
- package/src/llama.cpp/examples/simple/simple.cpp +130 -94
- package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +200 -0
- package/src/llama.cpp/examples/speculative/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/speculative/speculative.cpp +68 -64
- package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +265 -0
- package/src/llama.cpp/examples/tokenize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +3 -3
- package/src/llama.cpp/examples/tts/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/tts/tts.cpp +932 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +54 -36
- package/src/llama.cpp/ggml/include/ggml-backend.h +63 -34
- package/src/llama.cpp/ggml/include/ggml-blas.h +5 -3
- package/src/llama.cpp/ggml/include/ggml-cann.h +9 -7
- package/src/llama.cpp/ggml/include/ggml-cpp.h +38 -0
- package/src/llama.cpp/ggml/include/ggml-cpu.h +135 -0
- package/src/llama.cpp/ggml/include/ggml-cuda.h +12 -12
- package/src/llama.cpp/ggml/include/ggml-kompute.h +7 -3
- package/src/llama.cpp/ggml/include/ggml-metal.h +11 -7
- package/src/llama.cpp/ggml/include/ggml-opencl.h +26 -0
- package/src/llama.cpp/ggml/include/ggml-opt.h +216 -0
- package/src/llama.cpp/ggml/include/ggml-rpc.h +9 -5
- package/src/llama.cpp/ggml/include/ggml-sycl.h +18 -11
- package/src/llama.cpp/ggml/include/ggml-vulkan.h +10 -8
- package/src/llama.cpp/ggml/include/ggml.h +159 -417
- package/src/llama.cpp/ggml/src/CMakeLists.txt +121 -1155
- package/src/llama.cpp/ggml/src/ggml-alloc.c +23 -28
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +57 -36
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +552 -0
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +306 -867
- package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +87 -0
- package/src/llama.cpp/ggml/src/{ggml-blas.cpp → ggml-blas/ggml-blas.cpp} +216 -65
- package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +76 -0
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +456 -111
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +6 -3
- package/src/llama.cpp/ggml/src/{ggml-cann.cpp → ggml-cann/ggml-cann.cpp} +343 -177
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -5
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +22 -9
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +24 -13
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +23 -13
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +17 -0
- package/src/llama.cpp/ggml/src/ggml-common.h +42 -42
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +336 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/common.h +91 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/mmq.h +10 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
- package/src/llama.cpp/ggml/src/{ggml-aarch64.c → ggml-cpu/ggml-cpu-aarch64.cpp} +1299 -246
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
- package/src/llama.cpp/ggml/src/{ggml-cpu-impl.h → ggml-cpu/ggml-cpu-impl.h} +14 -242
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +10835 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +14123 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +628 -0
- package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.cpp +666 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +152 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +8 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +104 -0
- package/src/llama.cpp/ggml/src/ggml-impl.h +393 -22
- package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +166 -0
- package/src/llama.cpp/ggml/src/{ggml-kompute.cpp → ggml-kompute/ggml-kompute.cpp} +360 -127
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +105 -0
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +288 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +107 -0
- package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +147 -0
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +4004 -0
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +854 -0
- package/src/llama.cpp/ggml/src/ggml-quants.c +188 -10702
- package/src/llama.cpp/ggml/src/ggml-quants.h +78 -125
- package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
- package/src/llama.cpp/ggml/src/{ggml-rpc.cpp → ggml-rpc/ggml-rpc.cpp} +478 -300
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +84 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +3 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +36 -5
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +259 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +5 -5
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +34 -35
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +1030 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +76 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +4 -4
- package/src/llama.cpp/ggml/src/{ggml-sycl.cpp → ggml-sycl/ggml-sycl.cpp} +3638 -4151
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +6 -6
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +75 -87
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +7 -6
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +56 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +6 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +4 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +7 -7
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +141 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-threading.cpp +12 -0
- package/src/llama.cpp/ggml/src/ggml-threading.h +14 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +92 -0
- package/src/llama.cpp/ggml/src/{ggml-vulkan.cpp → ggml-vulkan/ggml-vulkan.cpp} +2138 -887
- package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/CMakeLists.txt +3 -1
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +593 -0
- package/src/llama.cpp/ggml/src/ggml.c +4427 -20125
- package/src/llama.cpp/include/llama-cpp.h +25 -0
- package/src/llama.cpp/include/llama.h +93 -52
- package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +46 -0
- package/src/llama.cpp/pocs/CMakeLists.txt +3 -1
- package/src/llama.cpp/pocs/vdot/CMakeLists.txt +2 -2
- package/src/llama.cpp/pocs/vdot/q8dot.cpp +4 -3
- package/src/llama.cpp/pocs/vdot/vdot.cpp +8 -7
- package/src/llama.cpp/src/CMakeLists.txt +4 -8
- package/src/llama.cpp/src/llama-grammar.cpp +15 -15
- package/src/llama.cpp/src/llama-grammar.h +2 -5
- package/src/llama.cpp/src/llama-sampling.cpp +779 -194
- package/src/llama.cpp/src/llama-sampling.h +21 -2
- package/src/llama.cpp/src/llama-vocab.cpp +55 -10
- package/src/llama.cpp/src/llama-vocab.h +35 -11
- package/src/llama.cpp/src/llama.cpp +4317 -2979
- package/src/llama.cpp/src/unicode-data.cpp +2 -2
- package/src/llama.cpp/src/unicode.cpp +62 -51
- package/src/llama.cpp/src/unicode.h +9 -10
- package/src/llama.cpp/tests/CMakeLists.txt +48 -38
- package/src/llama.cpp/tests/test-arg-parser.cpp +15 -15
- package/src/llama.cpp/tests/test-backend-ops.cpp +324 -80
- package/src/llama.cpp/tests/test-barrier.cpp +1 -0
- package/src/llama.cpp/tests/test-chat-template.cpp +59 -9
- package/src/llama.cpp/tests/test-gguf.cpp +1303 -0
- package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -6
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -4
- package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -4
- package/src/llama.cpp/tests/test-log.cpp +2 -2
- package/src/llama.cpp/tests/test-opt.cpp +853 -142
- package/src/llama.cpp/tests/test-quantize-fns.cpp +24 -21
- package/src/llama.cpp/tests/test-quantize-perf.cpp +16 -14
- package/src/llama.cpp/tests/test-rope.cpp +62 -20
- package/src/llama.cpp/tests/test-sampling.cpp +163 -138
- package/src/llama.cpp/tests/test-tokenizer-0.cpp +7 -7
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +5 -5
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +5 -5
- package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +0 -72
- package/src/llama.cpp/.github/workflows/nix-ci.yml +0 -79
- package/src/llama.cpp/.github/workflows/nix-flake-update.yml +0 -22
- package/src/llama.cpp/.github/workflows/nix-publish-flake.yml +0 -36
- package/src/llama.cpp/common/train.cpp +0 -1515
- package/src/llama.cpp/common/train.h +0 -233
- package/src/llama.cpp/examples/baby-llama/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +0 -1639
- package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -39
- package/src/llama.cpp/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp +0 -600
- package/src/llama.cpp/tests/test-grad0.cpp +0 -1683
- /package/src/llama.cpp/ggml/{cmake → src/ggml-cpu/cmake}/FindSIMD.cmake +0 -0
- /package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.h +0 -0
|
@@ -39,6 +39,8 @@
|
|
|
39
39
|
|
|
40
40
|
#include "ggml-common.h"
|
|
41
41
|
|
|
42
|
+
#define GGML_CANN_NAME "CANN"
|
|
43
|
+
|
|
42
44
|
/**
|
|
43
45
|
* @brief Handles CANN errors by printing an error message and aborting.
|
|
44
46
|
*
|
|
@@ -120,6 +122,10 @@ static ggml_cann_device_info ggml_cann_init() {
|
|
|
120
122
|
ACL_CHECK(aclrtMemGetAllocationGranularity(
|
|
121
123
|
&prop, ACL_RT_MEM_ALLOC_GRANULARITY_RECOMMENDED,
|
|
122
124
|
&info.devices[id].vmm_granularity));
|
|
125
|
+
|
|
126
|
+
size_t free, total;
|
|
127
|
+
ggml_backend_cann_get_device_memory(id, &free, &total);
|
|
128
|
+
info.devices[id].total_vram = free;
|
|
123
129
|
}
|
|
124
130
|
|
|
125
131
|
// TODO: add more device info later.
|
|
@@ -206,6 +212,11 @@ struct ggml_cann_pool_leg : public ggml_cann_pool {
|
|
|
206
212
|
* @return A pointer to the allocated buffer.
|
|
207
213
|
*/
|
|
208
214
|
void* alloc(size_t size, size_t* actual_size) override {
|
|
215
|
+
const size_t alignment = 128;
|
|
216
|
+
size = GGML_PAD(size, alignment);
|
|
217
|
+
if (size == 0) {
|
|
218
|
+
size = alignment;
|
|
219
|
+
}
|
|
209
220
|
#ifdef DEBUG_CANN_MALLOC
|
|
210
221
|
int nnz = 0;
|
|
211
222
|
size_t max_size = 0;
|
|
@@ -244,13 +255,11 @@ struct ggml_cann_pool_leg : public ggml_cann_pool {
|
|
|
244
255
|
return ptr;
|
|
245
256
|
}
|
|
246
257
|
void* ptr;
|
|
247
|
-
size_t look_ahead_size = (size_t)(1.05 * size);
|
|
248
|
-
look_ahead_size = 256 * ((look_ahead_size + 255) / 256);
|
|
249
258
|
ggml_cann_set_device(device);
|
|
250
259
|
ACL_CHECK(
|
|
251
|
-
aclrtMalloc(&ptr,
|
|
252
|
-
*actual_size =
|
|
253
|
-
pool_size +=
|
|
260
|
+
aclrtMalloc(&ptr, size, ACL_MEM_MALLOC_HUGE_FIRST));
|
|
261
|
+
*actual_size = size;
|
|
262
|
+
pool_size += size;
|
|
254
263
|
#ifdef DEBUG_CANN_MALLOC
|
|
255
264
|
GGML_LOG_INFO(
|
|
256
265
|
"%s[%d]: %d buffers, max_size = %u MB, pool_size = %u MB, "
|
|
@@ -294,7 +303,7 @@ struct ggml_cann_pool_vmm : public ggml_cann_pool {
|
|
|
294
303
|
/**
|
|
295
304
|
* @brief The maximum size of the virtual memory pool (32 GB).
|
|
296
305
|
*/
|
|
297
|
-
|
|
306
|
+
size_t max_size;
|
|
298
307
|
|
|
299
308
|
/**
|
|
300
309
|
* @brief The device ID associated with this buffer pool.
|
|
@@ -339,7 +348,11 @@ struct ggml_cann_pool_vmm : public ggml_cann_pool {
|
|
|
339
348
|
*/
|
|
340
349
|
explicit ggml_cann_pool_vmm(int device)
|
|
341
350
|
: device(device),
|
|
342
|
-
granularity(ggml_cann_info().devices[device].vmm_granularity) {
|
|
351
|
+
granularity(ggml_cann_info().devices[device].vmm_granularity) {
|
|
352
|
+
auto dev = ggml_cann_info().devices[device];
|
|
353
|
+
granularity = dev.vmm_granularity;
|
|
354
|
+
max_size = dev.total_vram;
|
|
355
|
+
}
|
|
343
356
|
|
|
344
357
|
/**
|
|
345
358
|
* @brief Destructor to free all buffers in the virtual memory pool.
|
|
@@ -368,17 +381,19 @@ struct ggml_cann_pool_vmm : public ggml_cann_pool {
|
|
|
368
381
|
// round up the allocation size to the alignment to ensure that all
|
|
369
382
|
// allocations are aligned for all data types
|
|
370
383
|
const size_t alignment = 128;
|
|
371
|
-
size =
|
|
384
|
+
size = GGML_PAD(size, alignment);
|
|
385
|
+
if (size == 0) {
|
|
386
|
+
size = alignment;
|
|
387
|
+
}
|
|
372
388
|
|
|
373
389
|
size_t avail = pool_size - pool_used;
|
|
374
390
|
|
|
375
391
|
if (size > avail) {
|
|
376
392
|
// round up to the next multiple of the granularity
|
|
377
393
|
size_t reserve_size = size - avail;
|
|
378
|
-
reserve_size =
|
|
379
|
-
granularity * ((reserve_size + granularity - 1) / granularity);
|
|
394
|
+
reserve_size = GGML_PAD(reserve_size, granularity);
|
|
380
395
|
|
|
381
|
-
GGML_ASSERT(pool_size + reserve_size <=
|
|
396
|
+
GGML_ASSERT(pool_size + reserve_size <= max_size);
|
|
382
397
|
|
|
383
398
|
// allocate more physical memory
|
|
384
399
|
aclrtPhysicalMemProp prop = {};
|
|
@@ -394,7 +409,7 @@ struct ggml_cann_pool_vmm : public ggml_cann_pool {
|
|
|
394
409
|
// reserve virtual address space (if not already reserved)
|
|
395
410
|
if (pool_addr == 0) {
|
|
396
411
|
ACL_CHECK(aclrtReserveMemAddress(
|
|
397
|
-
&pool_addr,
|
|
412
|
+
&pool_addr, max_size, 0, NULL, 1));
|
|
398
413
|
}
|
|
399
414
|
|
|
400
415
|
// map at the end of the pool
|
|
@@ -407,10 +422,11 @@ struct ggml_cann_pool_vmm : public ggml_cann_pool {
|
|
|
407
422
|
// add to the pool
|
|
408
423
|
pool_size += reserve_size;
|
|
409
424
|
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
425
|
+
#ifdef DEBUG_CANN_MALLOC
|
|
426
|
+
GGML_LOG_INFO("cann pool[%d]: size increased to %llu MB (reserved %llu MB)\n",
|
|
427
|
+
device, (unsigned long long) (pool_size/1024/1024),
|
|
428
|
+
(unsigned long long) (reserve_size/1024/1024));
|
|
429
|
+
#endif
|
|
414
430
|
}
|
|
415
431
|
|
|
416
432
|
GGML_ASSERT(pool_addr != 0);
|
|
@@ -455,7 +471,6 @@ struct ggml_cann_pool_vmm : public ggml_cann_pool {
|
|
|
455
471
|
*/
|
|
456
472
|
std::unique_ptr<ggml_cann_pool> ggml_backend_cann_context::new_pool_for_device(
|
|
457
473
|
int device) {
|
|
458
|
-
// return std::unique_ptr<ggml_cann_pool>(new ggml_cann_pool_leg(device));
|
|
459
474
|
return std::unique_ptr<ggml_cann_pool>(new ggml_cann_pool_vmm(device));
|
|
460
475
|
}
|
|
461
476
|
|
|
@@ -487,23 +502,6 @@ struct ggml_backend_cann_buffer_context {
|
|
|
487
502
|
~ggml_backend_cann_buffer_context() { ACL_CHECK(aclrtFree(dev_ptr)); }
|
|
488
503
|
};
|
|
489
504
|
|
|
490
|
-
/**
|
|
491
|
-
* @brief Retrieve the name associated with a CANN buffer.
|
|
492
|
-
*
|
|
493
|
-
* This function returns the name of a CANN buffer, which is stored in the
|
|
494
|
-
* context of the buffer.
|
|
495
|
-
*
|
|
496
|
-
* @param buffer The CANN buffer whose name is to be retrieved.
|
|
497
|
-
* @return A pointer to a C-string containing the name of the buffer.
|
|
498
|
-
*/
|
|
499
|
-
|
|
500
|
-
static const char* ggml_backend_cann_buffer_get_name(
|
|
501
|
-
ggml_backend_buffer_t buffer) {
|
|
502
|
-
return "CANN";
|
|
503
|
-
|
|
504
|
-
GGML_UNUSED(buffer);
|
|
505
|
-
}
|
|
506
|
-
|
|
507
505
|
/**
|
|
508
506
|
* @brief Check if a buffer is a CANN buffer.
|
|
509
507
|
*
|
|
@@ -513,9 +511,10 @@ static const char* ggml_backend_cann_buffer_get_name(
|
|
|
513
511
|
* @param buffer The buffer to check.
|
|
514
512
|
* @return true if the buffer is a CANN buffer, false otherwise.
|
|
515
513
|
*/
|
|
514
|
+
static bool ggml_backend_buft_is_cann(ggml_backend_buffer_type_t buft);
|
|
516
515
|
static bool ggml_backend_buffer_is_cann(
|
|
517
516
|
ggml_backend_buffer_t buffer) {
|
|
518
|
-
return buffer->
|
|
517
|
+
return ggml_backend_buft_is_cann(buffer->buft);
|
|
519
518
|
}
|
|
520
519
|
|
|
521
520
|
/**
|
|
@@ -851,13 +850,6 @@ static void ggml_backend_cann_buffer_set_tensor(
|
|
|
851
850
|
void *transform_buffer = malloc(size);
|
|
852
851
|
ggml_backend_cann_transform(tensor, data, transform_buffer);
|
|
853
852
|
|
|
854
|
-
#ifndef NDEBUG
|
|
855
|
-
void *check_buffer = malloc(size);
|
|
856
|
-
ggml_backend_cann_transform_back(tensor, transform_buffer,
|
|
857
|
-
check_buffer);
|
|
858
|
-
GGML_ASSERT(memcmp(data, check_buffer, size) == 0);
|
|
859
|
-
free(check_buffer);
|
|
860
|
-
#endif
|
|
861
853
|
ACL_CHECK(aclrtMemcpy((char *)tensor->data + offset, size,
|
|
862
854
|
transform_buffer, size,
|
|
863
855
|
ACL_MEMCPY_HOST_TO_DEVICE));
|
|
@@ -969,8 +961,7 @@ static void ggml_backend_cann_buffer_clear(
|
|
|
969
961
|
* This structure defines function pointers to operations that can be performed
|
|
970
962
|
* on a CANN buffer within the backend.
|
|
971
963
|
*/
|
|
972
|
-
static ggml_backend_buffer_i ggml_backend_cann_buffer_interface = {
|
|
973
|
-
/* .get_name = */ ggml_backend_cann_buffer_get_name,
|
|
964
|
+
static const ggml_backend_buffer_i ggml_backend_cann_buffer_interface = {
|
|
974
965
|
/* .free_buffer = */ ggml_backend_cann_buffer_free_buffer,
|
|
975
966
|
/* .get_base = */ ggml_backend_cann_buffer_get_base,
|
|
976
967
|
/* .init_tensor = */ ggml_backend_cann_buffer_init_tensor,
|
|
@@ -1004,9 +995,10 @@ struct ggml_backend_cann_buffer_type_context {
|
|
|
1004
995
|
*/
|
|
1005
996
|
static const char* ggml_backend_cann_buffer_type_name(
|
|
1006
997
|
ggml_backend_buffer_type_t buft) {
|
|
1007
|
-
|
|
998
|
+
ggml_backend_cann_buffer_type_context* buft_ctx =
|
|
999
|
+
(ggml_backend_cann_buffer_type_context*)buft->context;
|
|
1008
1000
|
|
|
1009
|
-
|
|
1001
|
+
return buft_ctx->name.c_str();
|
|
1010
1002
|
}
|
|
1011
1003
|
|
|
1012
1004
|
/**
|
|
@@ -1105,19 +1097,25 @@ static size_t ggml_backend_cann_buffer_type_get_alloc_size(
|
|
|
1105
1097
|
GGML_UNUSED(buft);
|
|
1106
1098
|
}
|
|
1107
1099
|
|
|
1100
|
+
static bool ggml_backend_cann_buffer_type_is_host(ggml_backend_buffer_type_t buft) {
|
|
1101
|
+
return false;
|
|
1102
|
+
|
|
1103
|
+
GGML_UNUSED(buft);
|
|
1104
|
+
}
|
|
1105
|
+
|
|
1108
1106
|
/**
|
|
1109
1107
|
* @brief Interface for managing CANN buffer types in the GGML backend.
|
|
1110
1108
|
*
|
|
1111
1109
|
* Provides function pointers for allocating, querying properties, and managing
|
|
1112
1110
|
* memory for CANN buffer types in the GGML backend.
|
|
1113
1111
|
*/
|
|
1114
|
-
static ggml_backend_buffer_type_i ggml_backend_cann_buffer_type_interface = {
|
|
1112
|
+
static const ggml_backend_buffer_type_i ggml_backend_cann_buffer_type_interface = {
|
|
1115
1113
|
/* .get_name = */ ggml_backend_cann_buffer_type_name,
|
|
1116
1114
|
/* .alloc_buffer = */ ggml_backend_cann_buffer_type_alloc_buffer,
|
|
1117
1115
|
/* .get_alignment = */ ggml_backend_cann_buffer_type_get_alignment,
|
|
1118
1116
|
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
|
|
1119
1117
|
/* .get_alloc_size = */ ggml_backend_cann_buffer_type_get_alloc_size,
|
|
1120
|
-
/* .is_host = */
|
|
1118
|
+
/* .is_host = */ ggml_backend_cann_buffer_type_is_host,
|
|
1121
1119
|
};
|
|
1122
1120
|
|
|
1123
1121
|
/**
|
|
@@ -1145,9 +1143,10 @@ ggml_backend_cann_buffer_type(int32_t device) {
|
|
|
1145
1143
|
static bool ggml_backend_cann_buffer_type_initialized = false;
|
|
1146
1144
|
|
|
1147
1145
|
if (!ggml_backend_cann_buffer_type_initialized) {
|
|
1148
|
-
for (int32_t i = 0; i <
|
|
1146
|
+
for (int32_t i = 0; i < ggml_cann_info().device_count; i++) {
|
|
1149
1147
|
ggml_backend_cann_buffer_types[i] = {
|
|
1150
1148
|
/* .iface = */ ggml_backend_cann_buffer_type_interface,
|
|
1149
|
+
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_cann_reg(), i),
|
|
1151
1150
|
/* .context = */
|
|
1152
1151
|
new ggml_backend_cann_buffer_type_context{
|
|
1153
1152
|
i, "CANN" + std::to_string(i)},
|
|
@@ -1213,10 +1212,15 @@ static void * ggml_cann_host_malloc(size_t size) {
|
|
|
1213
1212
|
return nullptr;
|
|
1214
1213
|
}
|
|
1215
1214
|
|
|
1215
|
+
const size_t alignment = 128;
|
|
1216
|
+
size = GGML_PAD(size, alignment);
|
|
1217
|
+
if (size == 0) {
|
|
1218
|
+
size = alignment;
|
|
1219
|
+
}
|
|
1220
|
+
|
|
1216
1221
|
void * hostPtr = nullptr;
|
|
1217
1222
|
aclError err = aclrtMallocHost((void **) &hostPtr, size);
|
|
1218
1223
|
if (err != ACL_SUCCESS) {
|
|
1219
|
-
|
|
1220
1224
|
GGML_LOG_WARN("%s: failed to allocate %.2f MiB of pinned memory: %s\n", __func__,
|
|
1221
1225
|
size / 1024.0 / 1024.0, aclGetRecentErrMsg());
|
|
1222
1226
|
return nullptr;
|
|
@@ -1241,7 +1245,6 @@ static ggml_backend_buffer_t ggml_backend_cann_host_buffer_type_alloc_buffer(ggm
|
|
|
1241
1245
|
|
|
1242
1246
|
ggml_backend_buffer_t buffer = ggml_backend_cpu_buffer_from_ptr(hostPtr, size);
|
|
1243
1247
|
buffer->buft = buft;
|
|
1244
|
-
buffer->iface.get_name = ggml_backend_cann_host_buffer_name;
|
|
1245
1248
|
buffer->iface.free_buffer = ggml_backend_cann_host_buffer_free;
|
|
1246
1249
|
|
|
1247
1250
|
return buffer;
|
|
@@ -1263,7 +1266,7 @@ ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type() {
|
|
|
1263
1266
|
/* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
|
|
1264
1267
|
/* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
|
|
1265
1268
|
},
|
|
1266
|
-
/* .device = */
|
|
1269
|
+
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_cann_reg(), 0),
|
|
1267
1270
|
/* .context = */ nullptr,
|
|
1268
1271
|
};
|
|
1269
1272
|
|
|
@@ -1463,24 +1466,6 @@ static void ggml_backend_cann_free(ggml_backend_t backend) {
|
|
|
1463
1466
|
delete backend;
|
|
1464
1467
|
}
|
|
1465
1468
|
|
|
1466
|
-
/**
|
|
1467
|
-
* @brief Retrieves the default buffer type associated with the CANN backend.
|
|
1468
|
-
*
|
|
1469
|
-
* This function returns the buffer type specific to the device associated
|
|
1470
|
-
* with the CANN backend. It is used to allocate buffers for computations
|
|
1471
|
-
* performed by the backend.
|
|
1472
|
-
*
|
|
1473
|
-
* @param backend Pointer to the CANN backend structure.
|
|
1474
|
-
* @return Pointer to the buffer type structure for the CANN backend.
|
|
1475
|
-
*/
|
|
1476
|
-
static ggml_backend_buffer_type_t
|
|
1477
|
-
ggml_backend_cann_get_default_buffer_type(ggml_backend_t backend) {
|
|
1478
|
-
ggml_backend_cann_context* cann_ctx =
|
|
1479
|
-
(ggml_backend_cann_context*)backend->context;
|
|
1480
|
-
|
|
1481
|
-
return ggml_backend_cann_buffer_type(cann_ctx->device);
|
|
1482
|
-
}
|
|
1483
|
-
|
|
1484
1469
|
/**
|
|
1485
1470
|
* @brief Sets tensor data asynchronously in the CANN backend.
|
|
1486
1471
|
*
|
|
@@ -1510,13 +1495,6 @@ static void ggml_backend_cann_set_tensor_async(ggml_backend_t backend,
|
|
|
1510
1495
|
void *transform_buffer = malloc(size);
|
|
1511
1496
|
ggml_backend_cann_transform(tensor, data, transform_buffer);
|
|
1512
1497
|
|
|
1513
|
-
#ifndef NDEBUG
|
|
1514
|
-
void *check_buffer = malloc(size);
|
|
1515
|
-
ggml_backend_cann_transform_back(tensor, transform_buffer,
|
|
1516
|
-
check_buffer);
|
|
1517
|
-
GGML_ASSERT(memcmp(data, check_buffer, size));
|
|
1518
|
-
free(check_buffer);
|
|
1519
|
-
#endif
|
|
1520
1498
|
ACL_CHECK(aclrtMemcpyAsync(
|
|
1521
1499
|
(char *)tensor->data + offset, size, transform_buffer, size,
|
|
1522
1500
|
ACL_MEMCPY_HOST_TO_DEVICE, cann_ctx->stream()));
|
|
@@ -1691,7 +1669,7 @@ static enum ggml_status ggml_backend_cann_graph_compute(
|
|
|
1691
1669
|
* @return bool Returns true if the operation is supported by the backend,
|
|
1692
1670
|
* otherwise false.
|
|
1693
1671
|
*/
|
|
1694
|
-
static bool ggml_backend_cann_supports_op(
|
|
1672
|
+
static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
|
|
1695
1673
|
const ggml_tensor* op) {
|
|
1696
1674
|
switch (op->op) {
|
|
1697
1675
|
case GGML_OP_UNARY:
|
|
@@ -1709,12 +1687,14 @@ static bool ggml_backend_cann_supports_op(ggml_backend_t backend,
|
|
|
1709
1687
|
}
|
|
1710
1688
|
case GGML_OP_MUL_MAT: {
|
|
1711
1689
|
switch (op->src[0]->type) {
|
|
1712
|
-
case GGML_TYPE_F16:
|
|
1713
|
-
case GGML_TYPE_F32:
|
|
1714
1690
|
case GGML_TYPE_Q8_0:
|
|
1715
|
-
// TODO: fix me
|
|
1716
1691
|
// Current groupsize should not be greater than k-1 in
|
|
1717
|
-
// aclnnWeightQuantBatchMatmulV2GetWorkspaceSize
|
|
1692
|
+
// aclnnWeightQuantBatchMatmulV2GetWorkspaceSize
|
|
1693
|
+
if (op->src[0]->ne[0] <= QK8_0) {
|
|
1694
|
+
return false;
|
|
1695
|
+
}
|
|
1696
|
+
case GGML_TYPE_F16:
|
|
1697
|
+
case GGML_TYPE_F32:
|
|
1718
1698
|
case GGML_TYPE_Q4_0:
|
|
1719
1699
|
return true;
|
|
1720
1700
|
default:
|
|
@@ -1746,9 +1726,50 @@ static bool ggml_backend_cann_supports_op(ggml_backend_t backend,
|
|
|
1746
1726
|
return false;
|
|
1747
1727
|
}
|
|
1748
1728
|
}
|
|
1729
|
+
case GGML_OP_CONT: {
|
|
1730
|
+
// TODO: support GGML_TYPE_BF16
|
|
1731
|
+
switch (op->src[0]->type) {
|
|
1732
|
+
case GGML_TYPE_F32:
|
|
1733
|
+
case GGML_TYPE_F16:
|
|
1734
|
+
return true;
|
|
1735
|
+
default:
|
|
1736
|
+
return false;
|
|
1737
|
+
}
|
|
1738
|
+
}
|
|
1739
|
+
case GGML_OP_ROPE: {
|
|
1740
|
+
// TODO: with ops-test v == 1
|
|
1741
|
+
float * ext_factor = (float*)((int32_t*)op->op_params + 7);
|
|
1742
|
+
// TODO: n_dims <= ne0
|
|
1743
|
+
if (op->src[0]->ne[0] != op->op_params[1]) {
|
|
1744
|
+
return false;
|
|
1745
|
+
}
|
|
1746
|
+
// TODO: ext_factor != 0
|
|
1747
|
+
if (*ext_factor != 0) {
|
|
1748
|
+
return false;
|
|
1749
|
+
}
|
|
1750
|
+
|
|
1751
|
+
const int mode = ((const int32_t *) op->op_params)[2];
|
|
1752
|
+
if (mode & GGML_ROPE_TYPE_MROPE) {
|
|
1753
|
+
return false;
|
|
1754
|
+
}
|
|
1755
|
+
if (mode & GGML_ROPE_TYPE_VISION) {
|
|
1756
|
+
return false;
|
|
1757
|
+
}
|
|
1758
|
+
|
|
1759
|
+
return true;
|
|
1760
|
+
}
|
|
1761
|
+
case GGML_OP_UPSCALE: {
|
|
1762
|
+
// aclnnUpsampleNearest2dGetWorkspaceSize not support
|
|
1763
|
+
// selfDimN[2]/outDimN[2] or selfDimC[3]/outDimC[3] not equal
|
|
1764
|
+
if (op->src[0]->ne[2] * op->ne[3] != op->src[0]->ne[3] * op->ne[2]) {
|
|
1765
|
+
return false;
|
|
1766
|
+
}
|
|
1767
|
+
return true;
|
|
1768
|
+
}
|
|
1769
|
+
case GGML_OP_IM2COL:
|
|
1770
|
+
case GGML_OP_CONCAT:
|
|
1749
1771
|
case GGML_OP_DUP:
|
|
1750
1772
|
case GGML_OP_REPEAT:
|
|
1751
|
-
case GGML_OP_CONCAT:
|
|
1752
1773
|
case GGML_OP_NONE:
|
|
1753
1774
|
case GGML_OP_RESHAPE:
|
|
1754
1775
|
case GGML_OP_VIEW:
|
|
@@ -1762,17 +1783,13 @@ static bool ggml_backend_cann_supports_op(ggml_backend_t backend,
|
|
|
1762
1783
|
case GGML_OP_SCALE:
|
|
1763
1784
|
case GGML_OP_SQR:
|
|
1764
1785
|
case GGML_OP_CLAMP:
|
|
1765
|
-
case GGML_OP_CONT:
|
|
1766
1786
|
case GGML_OP_DIAG_MASK_INF:
|
|
1767
1787
|
case GGML_OP_SOFT_MAX:
|
|
1768
|
-
case GGML_OP_ROPE:
|
|
1769
|
-
case GGML_OP_IM2COL:
|
|
1770
1788
|
case GGML_OP_POOL_2D:
|
|
1771
1789
|
case GGML_OP_SUM_ROWS:
|
|
1772
1790
|
case GGML_OP_ARGSORT:
|
|
1773
1791
|
case GGML_OP_ACC:
|
|
1774
1792
|
case GGML_OP_GROUP_NORM:
|
|
1775
|
-
case GGML_OP_UPSCALE:
|
|
1776
1793
|
case GGML_OP_PAD:
|
|
1777
1794
|
case GGML_OP_ARANGE:
|
|
1778
1795
|
case GGML_OP_TIMESTEP_EMBEDDING:
|
|
@@ -1782,7 +1799,7 @@ static bool ggml_backend_cann_supports_op(ggml_backend_t backend,
|
|
|
1782
1799
|
return false;
|
|
1783
1800
|
}
|
|
1784
1801
|
|
|
1785
|
-
GGML_UNUSED(
|
|
1802
|
+
GGML_UNUSED(dev);
|
|
1786
1803
|
}
|
|
1787
1804
|
|
|
1788
1805
|
/**
|
|
@@ -1800,31 +1817,6 @@ static bool ggml_backend_buft_is_cann(ggml_backend_buffer_type_t buft) {
|
|
|
1800
1817
|
return buft->iface.get_name == ggml_backend_cann_buffer_type_name;
|
|
1801
1818
|
}
|
|
1802
1819
|
|
|
1803
|
-
/**
|
|
1804
|
-
* @brief Checks if the CANN backend supports a specific backend buffer type.
|
|
1805
|
-
*
|
|
1806
|
-
* This function determines whether the CANN backend supports the given backend
|
|
1807
|
-
* buffer type by comparing the device context of the backend and buffer type.
|
|
1808
|
-
* It returns true if the devices are same between the backend context and
|
|
1809
|
-
* buffer type context.
|
|
1810
|
-
*
|
|
1811
|
-
* @param backend Pointer to the CANN backend.
|
|
1812
|
-
* @param buft Pointer to the backend buffer type to check.
|
|
1813
|
-
* @return bool Returns true if the CANN backend supports the buffer type,
|
|
1814
|
-
* otherwise false.
|
|
1815
|
-
*/
|
|
1816
|
-
static bool ggml_backend_cann_supports_buft(
|
|
1817
|
-
ggml_backend_t backend, ggml_backend_buffer_type_t buft) {
|
|
1818
|
-
if (ggml_backend_buft_is_cann(buft)) {
|
|
1819
|
-
ggml_backend_cann_context * cann_ctx =
|
|
1820
|
-
(ggml_backend_cann_context *)backend->context;
|
|
1821
|
-
ggml_backend_cann_buffer_type_context * buft_ctx =
|
|
1822
|
-
(ggml_backend_cann_buffer_type_context *)buft->context;
|
|
1823
|
-
return buft_ctx->device == cann_ctx->device;
|
|
1824
|
-
}
|
|
1825
|
-
return false;
|
|
1826
|
-
}
|
|
1827
|
-
|
|
1828
1820
|
/**
|
|
1829
1821
|
* @brief Determines if a tensor operation should be offloaded to the CANN
|
|
1830
1822
|
* backend.
|
|
@@ -1839,54 +1831,14 @@ static bool ggml_backend_cann_supports_buft(
|
|
|
1839
1831
|
* @return bool Returns true if the operation should be offloaded, otherwise
|
|
1840
1832
|
* false.
|
|
1841
1833
|
*/
|
|
1842
|
-
static bool ggml_backend_cann_offload_op(
|
|
1834
|
+
static bool ggml_backend_cann_offload_op(ggml_backend_dev_t dev,
|
|
1843
1835
|
const ggml_tensor* op) {
|
|
1844
1836
|
const int min_batch_size = 32;
|
|
1845
|
-
GGML_UNUSED(
|
|
1837
|
+
GGML_UNUSED(dev);
|
|
1846
1838
|
|
|
1847
1839
|
return op->ne[1] >= min_batch_size && op->op != GGML_OP_GET_ROWS;
|
|
1848
1840
|
}
|
|
1849
1841
|
|
|
1850
|
-
/**
|
|
1851
|
-
* @brief Creates a new event for the CANN backend.
|
|
1852
|
-
*
|
|
1853
|
-
* This function initializes a new event for the CANN backend by setting the
|
|
1854
|
-
* device and creating an ACL runtime event. The created event is then wrapped
|
|
1855
|
-
* in a ggml_backend_event structure and returned.
|
|
1856
|
-
*
|
|
1857
|
-
* @param backend Pointer to the CANN backend.
|
|
1858
|
-
* @return ggml_backend_event_t Returns a pointer to the new event structure.
|
|
1859
|
-
*/
|
|
1860
|
-
static ggml_backend_event_t ggml_backend_cann_event_new(
|
|
1861
|
-
ggml_backend_t backend) {
|
|
1862
|
-
ggml_backend_cann_context* cann_ctx =
|
|
1863
|
-
(ggml_backend_cann_context*)backend->context;
|
|
1864
|
-
|
|
1865
|
-
ggml_cann_set_device(cann_ctx->device);
|
|
1866
|
-
|
|
1867
|
-
aclrtEvent event;
|
|
1868
|
-
ACL_CHECK(aclrtCreateEvent(&event));
|
|
1869
|
-
|
|
1870
|
-
return new ggml_backend_event{
|
|
1871
|
-
/* .backend = */ backend,
|
|
1872
|
-
/* .context = */ event,
|
|
1873
|
-
};
|
|
1874
|
-
}
|
|
1875
|
-
|
|
1876
|
-
/**
|
|
1877
|
-
* @brief Frees a CANN backend event.
|
|
1878
|
-
*
|
|
1879
|
-
* This function destroys the ACL runtime event associated with the given CANN
|
|
1880
|
-
* backend event and then deletes the event structure itself.
|
|
1881
|
-
*
|
|
1882
|
-
* @param event Pointer to the event structure to be freed.
|
|
1883
|
-
*/
|
|
1884
|
-
static void ggml_backend_cann_event_free(ggml_backend_event_t event) {
|
|
1885
|
-
ACL_CHECK(aclrtDestroyEvent((aclrtEvent)event->context));
|
|
1886
|
-
|
|
1887
|
-
delete event;
|
|
1888
|
-
}
|
|
1889
|
-
|
|
1890
1842
|
/**
|
|
1891
1843
|
* @brief Records an event on the CANN backend stream.
|
|
1892
1844
|
*
|
|
@@ -1895,10 +1847,9 @@ static void ggml_backend_cann_event_free(ggml_backend_event_t event) {
|
|
|
1895
1847
|
*
|
|
1896
1848
|
* @param event Pointer to the event structure to be recorded.
|
|
1897
1849
|
*/
|
|
1898
|
-
static void ggml_backend_cann_event_record(ggml_backend_event_t event) {
|
|
1850
|
+
static void ggml_backend_cann_event_record(ggml_backend_t backend, ggml_backend_event_t event) {
|
|
1899
1851
|
ggml_backend_cann_context* cann_ctx =
|
|
1900
|
-
(ggml_backend_cann_context*)
|
|
1901
|
-
|
|
1852
|
+
(ggml_backend_cann_context*)backend->context;
|
|
1902
1853
|
ACL_CHECK(aclrtRecordEvent((aclrtEvent)event->context, cann_ctx->stream()));
|
|
1903
1854
|
}
|
|
1904
1855
|
|
|
@@ -1916,8 +1867,7 @@ static void ggml_backend_cann_event_wait(ggml_backend_t backend,
|
|
|
1916
1867
|
ggml_backend_event_t event) {
|
|
1917
1868
|
ggml_backend_cann_context* cann_ctx =
|
|
1918
1869
|
(ggml_backend_cann_context*)backend->context;
|
|
1919
|
-
|
|
1920
|
-
if (ggml_backend_is_cann(event->backend)) {
|
|
1870
|
+
if (ggml_backend_is_cann(backend)) {
|
|
1921
1871
|
ACL_CHECK(aclrtStreamWaitEvent(cann_ctx->stream(),
|
|
1922
1872
|
(aclrtEvent)event->context));
|
|
1923
1873
|
} else {
|
|
@@ -1925,17 +1875,6 @@ static void ggml_backend_cann_event_wait(ggml_backend_t backend,
|
|
|
1925
1875
|
}
|
|
1926
1876
|
}
|
|
1927
1877
|
|
|
1928
|
-
/**
|
|
1929
|
-
* @brief Synchronizes the given event on the CANN backend.
|
|
1930
|
-
*
|
|
1931
|
-
* This function waits for the specified event to complete on the ACL runtime.
|
|
1932
|
-
*
|
|
1933
|
-
* @param event Pointer to the event structure to be synchronized.
|
|
1934
|
-
*/
|
|
1935
|
-
static void ggml_backend_cann_event_synchronize(ggml_backend_event_t event) {
|
|
1936
|
-
ACL_CHECK(aclrtSynchronizeEvent((aclrtEvent)event->context));
|
|
1937
|
-
}
|
|
1938
|
-
|
|
1939
1878
|
/**
|
|
1940
1879
|
* @brief Structure defining the interface for the CANN backend.
|
|
1941
1880
|
*
|
|
@@ -1943,10 +1882,9 @@ static void ggml_backend_cann_event_synchronize(ggml_backend_event_t event) {
|
|
|
1943
1882
|
* supported by the CANN backend, including name retrieval, memory
|
|
1944
1883
|
* management, tensor operations, synchronization, and event handling.
|
|
1945
1884
|
*/
|
|
1946
|
-
static ggml_backend_i ggml_backend_cann_interface = {
|
|
1885
|
+
static const ggml_backend_i ggml_backend_cann_interface = {
|
|
1947
1886
|
/* .get_name = */ ggml_backend_cann_name,
|
|
1948
1887
|
/* .free = */ ggml_backend_cann_free,
|
|
1949
|
-
/* .get_default_buffer_type = */ ggml_backend_cann_get_default_buffer_type,
|
|
1950
1888
|
/* .set_tensor_async = */ ggml_backend_cann_set_tensor_async,
|
|
1951
1889
|
/* .get_tensor_async = */ ggml_backend_cann_get_tensor_async,
|
|
1952
1890
|
/* .cpy_tensor_async = */ ggml_backend_cann_cpy_tensor_async,
|
|
@@ -1956,9 +1894,6 @@ static ggml_backend_i ggml_backend_cann_interface = {
|
|
|
1956
1894
|
/* .graph_plan_update = */ NULL,
|
|
1957
1895
|
/* .graph_plan_compute = */ NULL,
|
|
1958
1896
|
/* .graph_compute = */ ggml_backend_cann_graph_compute,
|
|
1959
|
-
/* .supports_op = */ ggml_backend_cann_supports_op,
|
|
1960
|
-
/* .supports_buft = */ ggml_backend_cann_supports_buft,
|
|
1961
|
-
/* .offload_op = */ ggml_backend_cann_offload_op,
|
|
1962
1897
|
/* .event_record = */ ggml_backend_cann_event_record,
|
|
1963
1898
|
/* .event_wait = */ ggml_backend_cann_event_wait,
|
|
1964
1899
|
};
|
|
@@ -1977,6 +1912,235 @@ static ggml_guid_t ggml_backend_cann_guid() {
|
|
|
1977
1912
|
return &guid;
|
|
1978
1913
|
}
|
|
1979
1914
|
|
|
1915
|
+
// backend device
|
|
1916
|
+
struct ggml_backend_cann_device_context {
|
|
1917
|
+
int device;
|
|
1918
|
+
std::string name;
|
|
1919
|
+
std::string description;
|
|
1920
|
+
};
|
|
1921
|
+
|
|
1922
|
+
static const char * ggml_backend_cann_device_get_name(ggml_backend_dev_t dev) {
|
|
1923
|
+
ggml_backend_cann_device_context * ctx = (ggml_backend_cann_device_context *)dev->context;
|
|
1924
|
+
return ctx->name.c_str();
|
|
1925
|
+
}
|
|
1926
|
+
|
|
1927
|
+
static const char* ggml_backend_cann_device_get_description(ggml_backend_dev_t dev) {
|
|
1928
|
+
ggml_backend_cann_device_context * ctx = (ggml_backend_cann_device_context *)dev->context;
|
|
1929
|
+
return ctx->description.c_str();
|
|
1930
|
+
}
|
|
1931
|
+
|
|
1932
|
+
static void ggml_backend_cann_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
|
|
1933
|
+
ggml_backend_cann_device_context * ctx = (ggml_backend_cann_device_context *)dev->context;
|
|
1934
|
+
ggml_backend_cann_get_device_memory(ctx->device, free, total);
|
|
1935
|
+
}
|
|
1936
|
+
|
|
1937
|
+
static enum ggml_backend_dev_type ggml_backend_cann_device_get_type(ggml_backend_dev_t dev) {
|
|
1938
|
+
GGML_UNUSED(dev);
|
|
1939
|
+
return GGML_BACKEND_DEVICE_TYPE_GPU;
|
|
1940
|
+
}
|
|
1941
|
+
|
|
1942
|
+
static void ggml_backend_cann_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
|
|
1943
|
+
props->name = ggml_backend_cann_device_get_name(dev);
|
|
1944
|
+
props->description = ggml_backend_cann_device_get_description(dev);
|
|
1945
|
+
props->type = ggml_backend_cann_device_get_type(dev);
|
|
1946
|
+
ggml_backend_cann_device_get_memory(dev, &props->memory_free, &props->memory_total);
|
|
1947
|
+
|
|
1948
|
+
bool host_buffer = getenv("GGML_CANN_NO_PINNED") == nullptr;
|
|
1949
|
+
|
|
1950
|
+
props->caps = {
|
|
1951
|
+
/* .async = */ false,
|
|
1952
|
+
/* .host_buffer = */ host_buffer,
|
|
1953
|
+
/* .buffer_from_host_ptr = */ false,
|
|
1954
|
+
/* .events = */ true,
|
|
1955
|
+
};
|
|
1956
|
+
}
|
|
1957
|
+
|
|
1958
|
+
static ggml_backend_t ggml_backend_cann_device_init(ggml_backend_dev_t dev, const char * params) {
|
|
1959
|
+
GGML_UNUSED(params);
|
|
1960
|
+
ggml_backend_cann_device_context * ctx = (ggml_backend_cann_device_context *)dev->context;
|
|
1961
|
+
return ggml_backend_cann_init(ctx->device);
|
|
1962
|
+
}
|
|
1963
|
+
|
|
1964
|
+
/**
|
|
1965
|
+
* @brief Checks if the CANN backend supports a specific backend buffer type.
|
|
1966
|
+
*
|
|
1967
|
+
* This function determines whether the CANN backend supports the given backend
|
|
1968
|
+
* buffer type by comparing the device context of the backend and buffer type.
|
|
1969
|
+
* It returns true if the devices are same between the backend context and
|
|
1970
|
+
* buffer type context.
|
|
1971
|
+
*
|
|
1972
|
+
* @param backend Pointer to the CANN backend.
|
|
1973
|
+
* @param buft Pointer to the backend buffer type to check.
|
|
1974
|
+
* @return bool Returns true if the CANN backend supports the buffer type,
|
|
1975
|
+
* otherwise false.
|
|
1976
|
+
*/
|
|
1977
|
+
static bool ggml_backend_cann_supports_buft(
|
|
1978
|
+
ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
|
|
1979
|
+
if (ggml_backend_buft_is_cann(buft)) {
|
|
1980
|
+
ggml_backend_cann_device_context * dev_ctx = (ggml_backend_cann_device_context *)dev->context;
|
|
1981
|
+
ggml_backend_cann_buffer_type_context * buft_ctx =
|
|
1982
|
+
(ggml_backend_cann_buffer_type_context *)buft->context;
|
|
1983
|
+
return buft_ctx->device == dev_ctx->device;
|
|
1984
|
+
}
|
|
1985
|
+
return false;
|
|
1986
|
+
}
|
|
1987
|
+
|
|
1988
|
+
static ggml_backend_buffer_type_t ggml_backend_cann_device_get_buffer_type(ggml_backend_dev_t dev) {
|
|
1989
|
+
ggml_backend_cann_device_context * ctx = (ggml_backend_cann_device_context *)dev->context;
|
|
1990
|
+
return ggml_backend_cann_buffer_type(ctx->device);
|
|
1991
|
+
}
|
|
1992
|
+
|
|
1993
|
+
static ggml_backend_buffer_type_t ggml_backend_cann_device_get_host_buffer_type(ggml_backend_dev_t dev) {
|
|
1994
|
+
GGML_UNUSED(dev);
|
|
1995
|
+
return ggml_backend_cann_host_buffer_type();
|
|
1996
|
+
}
|
|
1997
|
+
|
|
1998
|
+
/**
|
|
1999
|
+
* @brief Creates a new event for the CANN backend device.
|
|
2000
|
+
*
|
|
2001
|
+
* This function initializes a new event for the CANN backend by setting the
|
|
2002
|
+
* device and creating an ACL runtime event. The created event is then wrapped
|
|
2003
|
+
* in a ggml_backend_event structure and returned.
|
|
2004
|
+
*
|
|
2005
|
+
* @param backend Pointer to the CANN backend.
|
|
2006
|
+
* @return ggml_backend_event_t Returns a pointer to the new event structure.
|
|
2007
|
+
*/
|
|
2008
|
+
static ggml_backend_event_t ggml_backend_cann_device_event_new(
|
|
2009
|
+
ggml_backend_dev_t dev) {
|
|
2010
|
+
ggml_backend_cann_device_context * dev_ctx = (ggml_backend_cann_device_context *)dev->context;
|
|
2011
|
+
|
|
2012
|
+
ggml_cann_set_device(dev_ctx->device);
|
|
2013
|
+
|
|
2014
|
+
aclrtEvent event;
|
|
2015
|
+
ACL_CHECK(aclrtCreateEvent(&event));
|
|
2016
|
+
|
|
2017
|
+
return new ggml_backend_event{
|
|
2018
|
+
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_cann_reg(), dev_ctx->device),
|
|
2019
|
+
/* .context = */ event,
|
|
2020
|
+
};
|
|
2021
|
+
}
|
|
2022
|
+
|
|
2023
|
+
/**
|
|
2024
|
+
* @brief Frees a CANN backend event.
|
|
2025
|
+
*
|
|
2026
|
+
* This function destroys the ACL runtime event associated with the given CANN
|
|
2027
|
+
* backend event and then deletes the event structure itself.
|
|
2028
|
+
*
|
|
2029
|
+
* @param event Pointer to the event structure to be freed.
|
|
2030
|
+
*/
|
|
2031
|
+
static void ggml_backend_cann_device_event_free(ggml_backend_dev_t dev, ggml_backend_event_t event) {
|
|
2032
|
+
ACL_CHECK(aclrtDestroyEvent((aclrtEvent)event->context));
|
|
2033
|
+
|
|
2034
|
+
delete event;
|
|
2035
|
+
GGML_UNUSED(dev);
|
|
2036
|
+
}
|
|
2037
|
+
|
|
2038
|
+
/**
|
|
2039
|
+
* @brief Synchronizes the given event on the CANN backend.
|
|
2040
|
+
*
|
|
2041
|
+
* This function waits for the specified event to complete on the ACL runtime.
|
|
2042
|
+
*
|
|
2043
|
+
* @param event Pointer to the event structure to be synchronized.
|
|
2044
|
+
*/
|
|
2045
|
+
static void ggml_backend_cann_device_event_synchronize(ggml_backend_dev_t dev, ggml_backend_event_t event) {
|
|
2046
|
+
ACL_CHECK(aclrtSynchronizeEvent((aclrtEvent)event->context));
|
|
2047
|
+
|
|
2048
|
+
GGML_UNUSED(dev);
|
|
2049
|
+
}
|
|
2050
|
+
|
|
2051
|
+
static const ggml_backend_device_i ggml_backend_cann_device_interface = {
|
|
2052
|
+
/* .get_name = */ ggml_backend_cann_device_get_name,
|
|
2053
|
+
/* .get_description = */ ggml_backend_cann_device_get_description,
|
|
2054
|
+
/* .get_memory = */ ggml_backend_cann_device_get_memory,
|
|
2055
|
+
/* .get_type = */ ggml_backend_cann_device_get_type,
|
|
2056
|
+
/* .get_props = */ ggml_backend_cann_device_get_props,
|
|
2057
|
+
/* .init_backend = */ ggml_backend_cann_device_init, // called for every card
|
|
2058
|
+
/* .get_buffer_type = */ ggml_backend_cann_device_get_buffer_type,
|
|
2059
|
+
/* .get_host_buffer_type = */ ggml_backend_cann_device_get_host_buffer_type,
|
|
2060
|
+
/* .buffer_from_host_ptr = */ NULL, // not supported for CANN
|
|
2061
|
+
/* .supports_op = */ ggml_backend_cann_supports_op,
|
|
2062
|
+
/* .supports_buft = */ ggml_backend_cann_supports_buft,
|
|
2063
|
+
/* .offload_op = */ ggml_backend_cann_offload_op,
|
|
2064
|
+
/* .event_new = */ ggml_backend_cann_device_event_new,
|
|
2065
|
+
/* .event_free = */ ggml_backend_cann_device_event_free,
|
|
2066
|
+
/* .event_synchronize = */ ggml_backend_cann_device_event_synchronize,
|
|
2067
|
+
};
|
|
2068
|
+
|
|
2069
|
+
|
|
2070
|
+
// backend reg
|
|
2071
|
+
struct ggml_backend_cann_reg_context {
|
|
2072
|
+
std::vector<ggml_backend_dev_t> devices;
|
|
2073
|
+
};
|
|
2074
|
+
|
|
2075
|
+
static const char * ggml_backend_cann_reg_get_name(ggml_backend_reg_t reg) {
|
|
2076
|
+
GGML_UNUSED(reg);
|
|
2077
|
+
return GGML_CANN_NAME;
|
|
2078
|
+
}
|
|
2079
|
+
|
|
2080
|
+
static size_t ggml_backend_cann_reg_get_device_count(ggml_backend_reg_t reg) {
|
|
2081
|
+
ggml_backend_cann_reg_context * ctx = (ggml_backend_cann_reg_context *)reg->context;
|
|
2082
|
+
return ctx->devices.size();
|
|
2083
|
+
}
|
|
2084
|
+
|
|
2085
|
+
static ggml_backend_dev_t ggml_backend_cann_reg_get_device(ggml_backend_reg_t reg, size_t index) {
|
|
2086
|
+
ggml_backend_cann_reg_context * ctx = (ggml_backend_cann_reg_context *)reg->context;
|
|
2087
|
+
GGML_ASSERT(index < ctx->devices.size());
|
|
2088
|
+
return ctx->devices[index];
|
|
2089
|
+
}
|
|
2090
|
+
|
|
2091
|
+
static void * ggml_backend_cann_reg_get_proc_address(ggml_backend_reg_t reg, const char * name) {
|
|
2092
|
+
GGML_UNUSED(reg);
|
|
2093
|
+
GGML_UNUSED(name);
|
|
2094
|
+
// reserved for future use
|
|
2095
|
+
return nullptr;
|
|
2096
|
+
}
|
|
2097
|
+
|
|
2098
|
+
static const ggml_backend_reg_i ggml_backend_cann_reg_interface = {
|
|
2099
|
+
/* .get_name = */ ggml_backend_cann_reg_get_name,
|
|
2100
|
+
/* .get_device_count = */ ggml_backend_cann_reg_get_device_count,
|
|
2101
|
+
/* .get_device = */ ggml_backend_cann_reg_get_device,
|
|
2102
|
+
/* .get_proc_address = */ ggml_backend_cann_reg_get_proc_address,
|
|
2103
|
+
};
|
|
2104
|
+
|
|
2105
|
+
// backend registry, called only once for cann backend
|
|
2106
|
+
ggml_backend_reg_t ggml_backend_cann_reg() {
|
|
2107
|
+
static ggml_backend_reg reg;
|
|
2108
|
+
static bool initialized = false;
|
|
2109
|
+
|
|
2110
|
+
{
|
|
2111
|
+
static std::mutex mutex;
|
|
2112
|
+
std::lock_guard<std::mutex> lock(mutex);
|
|
2113
|
+
if (!initialized) {
|
|
2114
|
+
aclInit(nullptr);
|
|
2115
|
+
ggml_backend_cann_reg_context * ctx = new ggml_backend_cann_reg_context;
|
|
2116
|
+
|
|
2117
|
+
for (int i = 0; i < ggml_cann_info().device_count; i++) {
|
|
2118
|
+
ggml_backend_cann_device_context* dev_ctx = new ggml_backend_cann_device_context();
|
|
2119
|
+
dev_ctx->description = aclrtGetSocName();
|
|
2120
|
+
dev_ctx->device = i;
|
|
2121
|
+
dev_ctx->name = GGML_CANN_NAME + std::to_string(i);
|
|
2122
|
+
ggml_cann_set_device(i);
|
|
2123
|
+
ggml_backend_dev_t dev = new ggml_backend_device {
|
|
2124
|
+
/* .iface = */ ggml_backend_cann_device_interface,
|
|
2125
|
+
/* .reg = */ ®,
|
|
2126
|
+
/* .context = */ dev_ctx
|
|
2127
|
+
};
|
|
2128
|
+
ctx->devices.push_back(dev);
|
|
2129
|
+
}
|
|
2130
|
+
|
|
2131
|
+
reg = ggml_backend_reg {
|
|
2132
|
+
/* .api_version = */ GGML_BACKEND_API_VERSION,
|
|
2133
|
+
/* .iface = */ ggml_backend_cann_reg_interface,
|
|
2134
|
+
/* .context = */ ctx
|
|
2135
|
+
};
|
|
2136
|
+
}
|
|
2137
|
+
|
|
2138
|
+
initialized = true;
|
|
2139
|
+
}
|
|
2140
|
+
|
|
2141
|
+
return ®
|
|
2142
|
+
}
|
|
2143
|
+
|
|
1980
2144
|
ggml_backend_t ggml_backend_cann_init(int32_t device) {
|
|
1981
2145
|
aclInit(nullptr);
|
|
1982
2146
|
if (device < 0 || device >= ggml_backend_cann_get_device_count()) {
|
|
@@ -1993,7 +2157,7 @@ ggml_backend_t ggml_backend_cann_init(int32_t device) {
|
|
|
1993
2157
|
ggml_backend_t cann_backend =
|
|
1994
2158
|
new ggml_backend{/* .guid = */ ggml_backend_cann_guid(),
|
|
1995
2159
|
/* .interface = */ ggml_backend_cann_interface,
|
|
1996
|
-
/* .device = */
|
|
2160
|
+
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_cann_reg(), device),
|
|
1997
2161
|
/* .context = */ ctx};
|
|
1998
2162
|
|
|
1999
2163
|
return cann_backend;
|
|
@@ -2020,3 +2184,5 @@ void ggml_backend_cann_get_device_memory(int32_t device, size_t* free,
|
|
|
2020
2184
|
ggml_cann_set_device(device);
|
|
2021
2185
|
ACL_CHECK(aclrtGetMemInfo(ACL_HBM_MEM, free, total));
|
|
2022
2186
|
}
|
|
2187
|
+
|
|
2188
|
+
GGML_BACKEND_DL_IMPL(ggml_backend_cann_reg)
|