@novastera-oss/llamarn 0.2.5 → 0.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/RNLlamaCpp.podspec +3 -2
- package/android/CMakeLists.txt +6 -3
- package/android/src/main/cpp/include/llama.h +140 -38
- package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
- package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
- package/cpp/LlamaCppModel.cpp +48 -67
- package/cpp/LlamaCppModel.h +8 -3
- package/cpp/PureCppImpl.cpp +1 -1
- package/cpp/PureCppImpl.h +2 -2
- package/cpp/build-info.cpp +2 -2
- package/cpp/llama.cpp/CMakeLists.txt +15 -4
- package/cpp/llama.cpp/Makefile +2 -2
- package/cpp/llama.cpp/README.md +33 -13
- package/cpp/llama.cpp/common/CMakeLists.txt +15 -28
- package/cpp/llama.cpp/common/arg.cpp +38 -12
- package/cpp/llama.cpp/common/build-info.cpp.in +2 -2
- package/cpp/llama.cpp/common/chat-parser.cpp +9 -3
- package/cpp/llama.cpp/common/chat-parser.h +4 -1
- package/cpp/llama.cpp/common/chat.cpp +16 -13
- package/cpp/llama.cpp/common/chat.h +1 -1
- package/cpp/llama.cpp/common/common.cpp +52 -40
- package/cpp/llama.cpp/common/common.h +5 -2
- package/cpp/llama.cpp/common/json-partial.cpp +5 -4
- package/cpp/llama.cpp/common/json-partial.h +2 -1
- package/cpp/llama.cpp/common/json-schema-to-grammar.cpp +2 -1
- package/cpp/llama.cpp/common/json-schema-to-grammar.h +4 -4
- package/cpp/llama.cpp/common/speculative.cpp +6 -4
- package/cpp/llama.cpp/convert_hf_to_gguf.py +128 -84
- package/cpp/llama.cpp/ggml/CMakeLists.txt +47 -2
- package/cpp/llama.cpp/ggml/cmake/common.cmake +1 -2
- package/cpp/llama.cpp/ggml/include/ggml.h +1 -3
- package/cpp/llama.cpp/ggml/src/CMakeLists.txt +49 -13
- package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +10 -5
- package/cpp/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +3 -3
- package/cpp/llama.cpp/ggml/src/ggml-cann/common.h +6 -1
- package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +33 -9
- package/cpp/llama.cpp/ggml/src/ggml-common.h +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +93 -24
- package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +4113 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +2174 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +2638 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +2731 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +2068 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +396 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +1299 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +1480 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/x86/quants.c +4310 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +59 -3206
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +184 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/common.h +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +7 -4
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +33 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +8 -8
- package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-hbm.cpp → hbm.cpp} +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +56 -7
- package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/quants.c +1157 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.cpp +1555 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.h +98 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +2 -4
- package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +6 -8
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +5 -2
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +25 -16
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cu +6 -4
- package/cpp/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-impl.h +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +11 -10
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +33 -8
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +135 -100
- package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +908 -3
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/concat.cl +109 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/pad.cl +30 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/repeat.cl +39 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/tanh.cl +63 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/upscale.cl +121 -0
- package/cpp/llama.cpp/ggml/src/ggml-quants.c +0 -2
- package/cpp/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +18 -15
- package/cpp/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +3 -3
- package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +19 -24
- package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +21 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +121 -4
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +32 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +2 -96
- package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +164 -46
- package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +32 -8
- package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +38 -10
- package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +118 -11
- package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +108 -16
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +26 -29
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +432 -248
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +0 -12
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +98 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +2 -0
- package/cpp/llama.cpp/ggml/src/ggml.c +9 -8
- package/cpp/llama.cpp/ggml/src/ggml.cpp +26 -0
- package/cpp/llama.cpp/ggml/src/gguf.cpp +19 -2
- package/cpp/llama.cpp/gguf-py/gguf/constants.py +57 -0
- package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +4 -1
- package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +14 -3
- package/cpp/llama.cpp/include/llama.h +140 -38
- package/cpp/llama.cpp/requirements/requirements-compare-llama-bench.txt +1 -0
- package/cpp/llama.cpp/src/CMakeLists.txt +4 -1
- package/cpp/llama.cpp/src/llama-arch.cpp +95 -3
- package/cpp/llama.cpp/src/llama-arch.h +7 -1
- package/cpp/llama.cpp/src/llama-batch.cpp +289 -31
- package/cpp/llama.cpp/src/llama-batch.h +47 -17
- package/cpp/llama.cpp/src/llama-chat.cpp +19 -2
- package/cpp/llama.cpp/src/llama-chat.h +1 -0
- package/cpp/llama.cpp/src/llama-context.cpp +488 -313
- package/cpp/llama.cpp/src/llama-context.h +38 -17
- package/cpp/llama.cpp/src/llama-cparams.cpp +1 -1
- package/cpp/llama.cpp/src/llama-cparams.h +1 -1
- package/cpp/llama.cpp/src/llama-graph.cpp +275 -152
- package/cpp/llama.cpp/src/llama-graph.h +109 -52
- package/cpp/llama.cpp/src/llama-hparams.cpp +6 -2
- package/cpp/llama.cpp/src/llama-hparams.h +8 -2
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +281 -0
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.h +133 -0
- package/cpp/llama.cpp/src/llama-kv-cache-unified.cpp +1835 -0
- package/cpp/llama.cpp/src/llama-kv-cache-unified.h +308 -0
- package/cpp/llama.cpp/src/llama-kv-cells.h +53 -17
- package/cpp/llama.cpp/src/llama-memory-hybrid.cpp +247 -0
- package/cpp/llama.cpp/src/llama-memory-hybrid.h +143 -0
- package/cpp/llama.cpp/src/llama-memory-recurrent.cpp +1116 -0
- package/cpp/llama.cpp/src/llama-memory-recurrent.h +188 -0
- package/cpp/llama.cpp/src/llama-memory.cpp +41 -0
- package/cpp/llama.cpp/src/llama-memory.h +89 -4
- package/cpp/llama.cpp/src/llama-mmap.cpp +1 -1
- package/cpp/llama.cpp/src/llama-model-loader.cpp +42 -17
- package/cpp/llama.cpp/src/llama-model.cpp +735 -143
- package/cpp/llama.cpp/src/llama-model.h +4 -0
- package/cpp/llama.cpp/src/llama-quant.cpp +2 -1
- package/cpp/llama.cpp/src/llama-vocab.cpp +39 -25
- package/cpp/llama.cpp/src/llama.cpp +11 -7
- package/cpp/llama.cpp/src/unicode.cpp +5 -0
- package/cpp/llama.cpp/vendor/cpp-httplib/httplib.h +10518 -0
- package/cpp/llama.cpp/vendor/miniaudio/miniaudio.h +93468 -0
- package/cpp/llama.cpp/{common → vendor}/minja/chat-template.hpp +1 -1
- package/cpp/llama.cpp/{common → vendor}/minja/minja.hpp +1 -1
- package/cpp/llama.cpp/{common → vendor/nlohmann}/json.hpp +3027 -2267
- package/cpp/llama.cpp/vendor/nlohmann/json_fwd.hpp +187 -0
- package/cpp/llama.cpp/vendor/stb/stb_image.h +7988 -0
- package/cpp/rn-completion.cpp +65 -10
- package/cpp/{rn-llama.hpp → rn-llama.h} +1 -1
- package/cpp/{rn-utils.hpp → rn-utils.h} +8 -1
- package/ios/include/chat.h +1 -1
- package/ios/include/common/minja/chat-template.hpp +1 -1
- package/ios/include/common/minja/minja.hpp +1 -1
- package/ios/include/common.h +5 -2
- package/ios/include/json-schema-to-grammar.h +4 -4
- package/ios/include/llama.h +140 -38
- package/ios/include/{common → nlohmann}/json.hpp +3027 -2267
- package/ios/libs/llama.xcframework/Info.plist +20 -20
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4863 -4617
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +1 -3
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +140 -38
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4834 -4638
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3742 -3557
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +1 -3
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +140 -38
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4834 -4638
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3744 -3559
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +1 -3
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +140 -38
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +1 -3
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +140 -38
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +1 -3
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +140 -38
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4863 -4616
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +1 -3
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +140 -38
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4834 -4637
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3742 -3556
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +1 -3
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +140 -38
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4900 -4653
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +1 -3
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +140 -38
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4871 -4674
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3773 -3587
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +1 -3
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +140 -38
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/package.json +1 -2
- package/cpp/llama.cpp/common/cmake/build-info-gen-cpp.cmake +0 -24
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +0 -8
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +0 -13891
- package/cpp/llama.cpp/src/llama-kv-cache.cpp +0 -2747
- package/cpp/llama.cpp/src/llama-kv-cache.h +0 -502
- /package/cpp/llama.cpp/ggml/src/ggml-cpu/{cpu-feats-x86.cpp → arch/x86/cpu-feats.cpp} +0 -0
- /package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-hbm.h → hbm.h} +0 -0
- /package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
|
@@ -1340,7 +1340,10 @@ static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) {
|
|
|
1340
1340
|
// allocate graph
|
|
1341
1341
|
if (backend_ids_changed || !ggml_gallocr_alloc_graph(sched->galloc, &sched->graph)) {
|
|
1342
1342
|
// the re-allocation may cause the split inputs to be moved to a different address
|
|
1343
|
-
ggml_backend_sched_synchronize
|
|
1343
|
+
// synchronize without ggml_backend_sched_synchronize to avoid changing cur_copy
|
|
1344
|
+
for (int i = 0; i < sched->n_backends; i++) {
|
|
1345
|
+
ggml_backend_synchronize(sched->backends[i]);
|
|
1346
|
+
}
|
|
1344
1347
|
#ifndef NDEBUG
|
|
1345
1348
|
GGML_LOG_DEBUG("%s: failed to allocate graph, reserving (backend_ids_changed = %d)\n", __func__, backend_ids_changed);
|
|
1346
1349
|
#endif
|
|
@@ -1564,7 +1567,6 @@ bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgra
|
|
|
1564
1567
|
|
|
1565
1568
|
ggml_backend_sched_split_graph(sched, graph);
|
|
1566
1569
|
|
|
1567
|
-
|
|
1568
1570
|
if (!ggml_backend_sched_alloc_splits(sched)) {
|
|
1569
1571
|
return false;
|
|
1570
1572
|
}
|
|
@@ -1598,9 +1600,12 @@ void ggml_backend_sched_synchronize(ggml_backend_sched_t sched) {
|
|
|
1598
1600
|
for (int i = 0; i < sched->n_backends; i++) {
|
|
1599
1601
|
ggml_backend_synchronize(sched->backends[i]);
|
|
1600
1602
|
}
|
|
1601
|
-
|
|
1602
|
-
|
|
1603
|
-
|
|
1603
|
+
if (!sched->is_alloc) {
|
|
1604
|
+
// if the graph is not already allocated, always use copy 0 after a synchronization
|
|
1605
|
+
// this ensures that during generation the same copy is used every time,
|
|
1606
|
+
// which avoids changes in the graph that could cause CUDA or other graphs to be disabled
|
|
1607
|
+
sched->cur_copy = 0;
|
|
1608
|
+
}
|
|
1604
1609
|
}
|
|
1605
1610
|
|
|
1606
1611
|
void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data) {
|
|
@@ -81,7 +81,7 @@ if (BLAS_FOUND)
|
|
|
81
81
|
target_link_libraries (ggml-blas PRIVATE ${BLAS_LIBRARIES})
|
|
82
82
|
target_include_directories(ggml-blas PRIVATE ${BLAS_INCLUDE_DIRS})
|
|
83
83
|
else()
|
|
84
|
-
message(
|
|
85
|
-
|
|
86
|
-
|
|
84
|
+
message(FATAL_ERROR "BLAS not found, please refer to "
|
|
85
|
+
"https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors"
|
|
86
|
+
" to set correct GGML_BLAS_VENDOR")
|
|
87
87
|
endif()
|
|
@@ -37,6 +37,7 @@
|
|
|
37
37
|
#include <thread>
|
|
38
38
|
#include <unistd.h>
|
|
39
39
|
#include <functional>
|
|
40
|
+
#include <optional>
|
|
40
41
|
|
|
41
42
|
#include "../include/ggml-cann.h"
|
|
42
43
|
#include "../include/ggml.h"
|
|
@@ -103,6 +104,9 @@ const ggml_cann_device_info& ggml_cann_info();
|
|
|
103
104
|
void ggml_cann_set_device(int32_t device);
|
|
104
105
|
int32_t ggml_cann_get_device();
|
|
105
106
|
|
|
107
|
+
std::optional<std::string> get_env(const std::string& name);
|
|
108
|
+
bool parse_bool(const std::string& value);
|
|
109
|
+
|
|
106
110
|
/**
|
|
107
111
|
* @brief Abstract base class for memory pools used by CANN.
|
|
108
112
|
*/
|
|
@@ -354,7 +358,8 @@ struct ggml_backend_cann_context {
|
|
|
354
358
|
: device(device), name("CANN" + std::to_string(device)), task_queue(1024, device) {
|
|
355
359
|
ggml_cann_set_device(device);
|
|
356
360
|
description = aclrtGetSocName();
|
|
357
|
-
|
|
361
|
+
|
|
362
|
+
bool async_mode = parse_bool(get_env("GGML_CANN_ASYNC_MODE").value_or(""));
|
|
358
363
|
GGML_LOG_INFO("%s: device %d async operator submission is %s\n", __func__,
|
|
359
364
|
device, async_mode ? "ON" : "OFF");
|
|
360
365
|
}
|
|
@@ -31,6 +31,8 @@
|
|
|
31
31
|
#include <mutex>
|
|
32
32
|
#include <queue>
|
|
33
33
|
#include <chrono>
|
|
34
|
+
#include <unordered_set>
|
|
35
|
+
#include <optional>
|
|
34
36
|
|
|
35
37
|
#include "ggml-impl.h"
|
|
36
38
|
#include "ggml-backend-impl.h"
|
|
@@ -93,6 +95,26 @@ int32_t ggml_cann_get_device() {
|
|
|
93
95
|
return id;
|
|
94
96
|
}
|
|
95
97
|
|
|
98
|
+
/**
|
|
99
|
+
* @brief Get the value of the specified environment variable (name).
|
|
100
|
+
* if not empty, return a std::string object
|
|
101
|
+
*/
|
|
102
|
+
std::optional<std::string> get_env(const std::string& name) {
|
|
103
|
+
const char* val = std::getenv(name.c_str());
|
|
104
|
+
if (!val) return std::nullopt;
|
|
105
|
+
std::string res = std::string(val);
|
|
106
|
+
std::transform(res.begin(), res.end(), res.begin(), ::tolower);
|
|
107
|
+
return res;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* @brief Verify whether the environment variable is a valid value.
|
|
112
|
+
*/
|
|
113
|
+
bool parse_bool(const std::string& value) {
|
|
114
|
+
std::unordered_set<std::string> valid_values = {"on", "1", "yes", "y", "enable", "true"};
|
|
115
|
+
return valid_values.find(value) != valid_values.end();
|
|
116
|
+
}
|
|
117
|
+
|
|
96
118
|
/**
|
|
97
119
|
* @brief Initialize the CANN device information.
|
|
98
120
|
*
|
|
@@ -214,7 +236,7 @@ struct ggml_cann_pool_buf_prio : public ggml_cann_pool {
|
|
|
214
236
|
* @param device The device ID to associate with this buffer pool.
|
|
215
237
|
*/
|
|
216
238
|
explicit ggml_cann_pool_buf_prio(int device) : device(device) {
|
|
217
|
-
disable_clean =
|
|
239
|
+
disable_clean = parse_bool(get_env("GGML_CANN_DISABLE_BUF_POOL_CLEAN").value_or(""));
|
|
218
240
|
}
|
|
219
241
|
|
|
220
242
|
/**
|
|
@@ -410,7 +432,7 @@ struct ggml_cann_pool_buf : public ggml_cann_pool {
|
|
|
410
432
|
* @param device The device ID to associate with this buffer pool.
|
|
411
433
|
*/
|
|
412
434
|
explicit ggml_cann_pool_buf(int device) : device(device) {
|
|
413
|
-
disable_clean =
|
|
435
|
+
disable_clean = parse_bool(get_env("GGML_CANN_DISABLE_BUF_POOL_CLEAN").value_or(""));
|
|
414
436
|
}
|
|
415
437
|
|
|
416
438
|
/**
|
|
@@ -731,16 +753,18 @@ struct ggml_cann_pool_vmm : public ggml_cann_pool {
|
|
|
731
753
|
*/
|
|
732
754
|
std::unique_ptr<ggml_cann_pool> ggml_backend_cann_context::new_pool_for_device(
|
|
733
755
|
int device) {
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
return std::unique_ptr<ggml_cann_pool>(new ggml_cann_pool_vmm(device));
|
|
738
|
-
}
|
|
739
|
-
bool enable_buf_prio = (getenv("GGML_CANN_ENABLE_BUF_PRIO_POOL") != nullptr);
|
|
740
|
-
if (enable_buf_prio) {
|
|
756
|
+
std::string mem_pool_type = get_env("GGML_CANN_MEM_POOL").value_or("");
|
|
757
|
+
|
|
758
|
+
if (mem_pool_type == "prio") {
|
|
741
759
|
GGML_LOG_INFO("%s: device %d use buffer pool with priority queue\n", __func__, device);
|
|
742
760
|
return std::unique_ptr<ggml_cann_pool>(new ggml_cann_pool_buf_prio(device));
|
|
743
761
|
}
|
|
762
|
+
|
|
763
|
+
if (ggml_cann_info().devices[device].vmm && mem_pool_type != "leg") {
|
|
764
|
+
GGML_LOG_INFO("%s: device %d use vmm pool\n", __func__, device);
|
|
765
|
+
return std::unique_ptr<ggml_cann_pool>(new ggml_cann_pool_vmm(device));
|
|
766
|
+
}
|
|
767
|
+
|
|
744
768
|
GGML_LOG_INFO("%s: device %d use buffer pool\n", __func__, device);
|
|
745
769
|
return std::unique_ptr<ggml_cann_pool>(new ggml_cann_pool_buf(device));
|
|
746
770
|
}
|
|
@@ -1074,6 +1074,10 @@ GGML_TABLE_BEGIN(uint32_t, iq3s_grid, 512)
|
|
|
1074
1074
|
0x0f090307, 0x0f090501, 0x0f090b01, 0x0f0b0505, 0x0f0b0905, 0x0f0d0105, 0x0f0d0703, 0x0f0f0101,
|
|
1075
1075
|
GGML_TABLE_END()
|
|
1076
1076
|
|
|
1077
|
+
GGML_TABLE_BEGIN(int8_t, kvalues_iq4nl, 16)
|
|
1078
|
+
-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113,
|
|
1079
|
+
GGML_TABLE_END()
|
|
1080
|
+
|
|
1077
1081
|
#define NGRID_IQ1S 2048
|
|
1078
1082
|
#define IQ1S_DELTA 0.125f
|
|
1079
1083
|
#define IQ1M_DELTA 0.125f
|
|
@@ -1,3 +1,17 @@
|
|
|
1
|
+
function(ggml_add_cpu_backend_features cpu_name arch)
|
|
2
|
+
# The feature detection code is compiled as a separate target so that
|
|
3
|
+
# it can be built without the architecture flags
|
|
4
|
+
# Since multiple variants of the CPU backend may be included in the same
|
|
5
|
+
# build, using set_source_files_properties() to set the arch flags is not possible
|
|
6
|
+
set(GGML_CPU_FEATS_NAME ${cpu_name}-feats)
|
|
7
|
+
add_library(${GGML_CPU_FEATS_NAME} OBJECT ggml-cpu/arch/${arch}/cpu-feats.cpp)
|
|
8
|
+
target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE . .. ../include)
|
|
9
|
+
target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE ${ARGN})
|
|
10
|
+
target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE GGML_BACKEND_DL GGML_BACKEND_BUILD GGML_BACKEND_SHARED)
|
|
11
|
+
set_target_properties(${GGML_CPU_FEATS_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
|
12
|
+
target_link_libraries(${cpu_name} PRIVATE ${GGML_CPU_FEATS_NAME})
|
|
13
|
+
endfunction()
|
|
14
|
+
|
|
1
15
|
function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
2
16
|
if (tag_name)
|
|
3
17
|
set(GGML_CPU_NAME ggml-cpu-${tag_name})
|
|
@@ -10,14 +24,14 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
10
24
|
list (APPEND GGML_CPU_SOURCES
|
|
11
25
|
ggml-cpu/ggml-cpu.c
|
|
12
26
|
ggml-cpu/ggml-cpu.cpp
|
|
13
|
-
ggml-cpu/
|
|
14
|
-
ggml-cpu/
|
|
15
|
-
ggml-cpu/
|
|
16
|
-
ggml-cpu/
|
|
17
|
-
ggml-cpu/
|
|
18
|
-
ggml-cpu/
|
|
19
|
-
ggml-cpu/
|
|
20
|
-
ggml-cpu/
|
|
27
|
+
ggml-cpu/repack.cpp
|
|
28
|
+
ggml-cpu/repack.h
|
|
29
|
+
ggml-cpu/hbm.cpp
|
|
30
|
+
ggml-cpu/hbm.h
|
|
31
|
+
ggml-cpu/quants.c
|
|
32
|
+
ggml-cpu/quants.h
|
|
33
|
+
ggml-cpu/traits.cpp
|
|
34
|
+
ggml-cpu/traits.h
|
|
21
35
|
ggml-cpu/amx/amx.cpp
|
|
22
36
|
ggml-cpu/amx/amx.h
|
|
23
37
|
ggml-cpu/amx/mmq.cpp
|
|
@@ -84,6 +98,11 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
84
98
|
|
|
85
99
|
if (GGML_SYSTEM_ARCH STREQUAL "ARM")
|
|
86
100
|
message(STATUS "ARM detected")
|
|
101
|
+
list(APPEND GGML_CPU_SOURCES
|
|
102
|
+
ggml-cpu/arch/arm/quants.c
|
|
103
|
+
ggml-cpu/arch/arm/repack.cpp
|
|
104
|
+
)
|
|
105
|
+
|
|
87
106
|
if (MSVC AND NOT CMAKE_C_COMPILER_ID STREQUAL "Clang")
|
|
88
107
|
message(FATAL_ERROR "MSVC is not supported for ARM, use clang")
|
|
89
108
|
else()
|
|
@@ -138,6 +157,49 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
138
157
|
else()
|
|
139
158
|
if (GGML_CPU_ARM_ARCH)
|
|
140
159
|
list(APPEND ARCH_FLAGS -march=${GGML_CPU_ARM_ARCH})
|
|
160
|
+
elseif(GGML_CPU_ALL_VARIANTS)
|
|
161
|
+
# Begin with the lowest baseline
|
|
162
|
+
set(ARM_MCPU "armv8-a")
|
|
163
|
+
set(ARCH_TAGS "")
|
|
164
|
+
set(ARCH_DEFINITIONS "")
|
|
165
|
+
|
|
166
|
+
# When a feature is selected, bump the MCPU to the first
|
|
167
|
+
# version that supported it
|
|
168
|
+
if (GGML_INTERNAL_DOTPROD)
|
|
169
|
+
set(ARM_MCPU "armv8.2-a")
|
|
170
|
+
set(ARCH_TAGS "${ARCH_TAGS}+dotprod")
|
|
171
|
+
list(APPEND ARCH_DEFINITIONS GGML_USE_DOTPROD)
|
|
172
|
+
endif()
|
|
173
|
+
if (GGML_INTERNAL_FP16_VECTOR_ARITHMETIC)
|
|
174
|
+
set(ARM_MCPU "armv8.2-a")
|
|
175
|
+
set(ARCH_TAGS "${ARCH_TAGS}+fp16")
|
|
176
|
+
list(APPEND ARCH_DEFINITIONS GGML_USE_FP16_VECTOR_ARITHMETIC)
|
|
177
|
+
endif()
|
|
178
|
+
if (GGML_INTERNAL_SVE)
|
|
179
|
+
set(ARM_MCPU "armv8.2-a")
|
|
180
|
+
set(ARCH_TAGS "${ARCH_TAGS}+sve")
|
|
181
|
+
list(APPEND ARCH_DEFINITIONS GGML_USE_SVE)
|
|
182
|
+
endif()
|
|
183
|
+
if (GGML_INTERNAL_MATMUL_INT8)
|
|
184
|
+
set(ARM_MCPU "armv8.6-a")
|
|
185
|
+
set(ARCH_TAGS "${ARCH_TAGS}+i8mm")
|
|
186
|
+
list(APPEND ARCH_DEFINITIONS GGML_USE_MATMUL_INT8)
|
|
187
|
+
endif()
|
|
188
|
+
if (GGML_INTERNAL_SVE2)
|
|
189
|
+
set(ARM_MCPU "armv8.6-a")
|
|
190
|
+
set(ARCH_TAGS "${ARCH_TAGS}+sve2")
|
|
191
|
+
list(APPEND ARCH_DEFINITIONS GGML_USE_SVE2)
|
|
192
|
+
endif()
|
|
193
|
+
if (GGML_INTERNAL_NOSVE)
|
|
194
|
+
set(ARCH_TAGS "${ARCH_TAGS}+nosve")
|
|
195
|
+
endif()
|
|
196
|
+
if (GGML_INTERNAL_SME)
|
|
197
|
+
set(ARM_MCPU "armv9.2-a")
|
|
198
|
+
set(ARCH_TAGS "${ARCH_TAGS}+sme")
|
|
199
|
+
list(APPEND ARCH_DEFINITIONS GGML_USE_SME)
|
|
200
|
+
endif()
|
|
201
|
+
list(APPEND ARCH_FLAGS "-march=${ARM_MCPU}${ARCH_TAGS}")
|
|
202
|
+
ggml_add_cpu_backend_features(${GGML_CPU_NAME} arm ${ARCH_DEFINITIONS})
|
|
141
203
|
endif()
|
|
142
204
|
endif()
|
|
143
205
|
|
|
@@ -167,6 +229,11 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
167
229
|
endif()
|
|
168
230
|
elseif (GGML_SYSTEM_ARCH STREQUAL "x86")
|
|
169
231
|
message(STATUS "x86 detected")
|
|
232
|
+
list(APPEND GGML_CPU_SOURCES
|
|
233
|
+
ggml-cpu/arch/x86/quants.c
|
|
234
|
+
ggml-cpu/arch/x86/repack.cpp
|
|
235
|
+
)
|
|
236
|
+
|
|
170
237
|
if (MSVC)
|
|
171
238
|
# instruction set detection for MSVC only
|
|
172
239
|
if (GGML_NATIVE)
|
|
@@ -296,21 +363,11 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
296
363
|
# the feature check relies on ARCH_DEFINITIONS, but it is not set with GGML_NATIVE
|
|
297
364
|
message(FATAL_ERROR "GGML_NATIVE is not compatible with GGML_BACKEND_DL, consider using GGML_CPU_ALL_VARIANTS")
|
|
298
365
|
endif()
|
|
299
|
-
|
|
300
|
-
# The feature detection code is compiled as a separate target so that
|
|
301
|
-
# it can be built without the architecture flags
|
|
302
|
-
# Since multiple variants of the CPU backend may be included in the same
|
|
303
|
-
# build, using set_source_files_properties() to set the arch flags is not possible
|
|
304
|
-
set(GGML_CPU_FEATS_NAME ${GGML_CPU_NAME}-feats)
|
|
305
|
-
add_library(${GGML_CPU_FEATS_NAME} OBJECT ggml-cpu/cpu-feats-x86.cpp)
|
|
306
|
-
target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE . .. ../include)
|
|
307
|
-
target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE ${ARCH_DEFINITIONS})
|
|
308
|
-
target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE GGML_BACKEND_DL GGML_BACKEND_BUILD GGML_BACKEND_SHARED)
|
|
309
|
-
set_target_properties(${GGML_CPU_FEATS_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
|
310
|
-
target_link_libraries(${GGML_CPU_NAME} PRIVATE ${GGML_CPU_FEATS_NAME})
|
|
366
|
+
ggml_add_cpu_backend_features(${GGML_CPU_NAME} x86 ${ARCH_DEFINITIONS})
|
|
311
367
|
endif()
|
|
312
368
|
elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC")
|
|
313
369
|
message(STATUS "PowerPC detected")
|
|
370
|
+
list(APPEND GGML_CPU_SOURCES ggml-cpu/arch/powerpc/quants.c)
|
|
314
371
|
if (GGML_NATIVE)
|
|
315
372
|
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
|
|
316
373
|
file(READ "/proc/cpuinfo" POWER10_M)
|
|
@@ -318,7 +375,8 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
318
375
|
execute_process(COMMAND bash -c "prtconf |grep 'Implementation' | head -n 1" OUTPUT_VARIABLE POWER10_M)
|
|
319
376
|
endif()
|
|
320
377
|
|
|
321
|
-
string(
|
|
378
|
+
string(TOUPPER "${POWER10_M}" POWER10_M_UPPER)
|
|
379
|
+
string(REGEX MATCHALL "POWER *([0-9]+)" MATCHED_STRING "${POWER10_M_UPPER}")
|
|
322
380
|
string(REGEX REPLACE "POWER *([0-9]+)" "\\1" EXTRACTED_NUMBER "${MATCHED_STRING}")
|
|
323
381
|
|
|
324
382
|
if (EXTRACTED_NUMBER GREATER_EQUAL 10)
|
|
@@ -337,6 +395,8 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
337
395
|
endif()
|
|
338
396
|
elseif (GGML_SYSTEM_ARCH STREQUAL "loongarch64")
|
|
339
397
|
message(STATUS "loongarch64 detected")
|
|
398
|
+
list(APPEND GGML_CPU_SOURCES ggml-cpu/arch/loongarch/quants.c)
|
|
399
|
+
|
|
340
400
|
list(APPEND ARCH_FLAGS -march=loongarch64)
|
|
341
401
|
if (GGML_LASX)
|
|
342
402
|
list(APPEND ARCH_FLAGS -mlasx)
|
|
@@ -346,6 +406,10 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
346
406
|
endif()
|
|
347
407
|
elseif (GGML_SYSTEM_ARCH STREQUAL "riscv64")
|
|
348
408
|
message(STATUS "riscv64 detected")
|
|
409
|
+
list(APPEND GGML_CPU_SOURCES
|
|
410
|
+
ggml-cpu/arch/riscv/quants.c
|
|
411
|
+
ggml-cpu/arch/riscv/repack.cpp
|
|
412
|
+
)
|
|
349
413
|
if (GGML_RVV)
|
|
350
414
|
if (GGML_XTHEADVECTOR)
|
|
351
415
|
list(APPEND ARCH_FLAGS -march=rv64gc_xtheadvector -mabi=lp64d)
|
|
@@ -357,6 +421,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
357
421
|
endif()
|
|
358
422
|
elseif (GGML_SYSTEM_ARCH STREQUAL "s390x")
|
|
359
423
|
message(STATUS "s390x detected")
|
|
424
|
+
list(APPEND GGML_CPU_SOURCES ggml-cpu/arch/s390/quants.c)
|
|
360
425
|
file(READ "/proc/cpuinfo" CPUINFO_CONTENTS)
|
|
361
426
|
string(REGEX REPLACE "machine[ \t\r\n]*=[ \t\r\n]*([0-9]+)" "\\1" S390X_M ${CPUINFO_CONTENTS})
|
|
362
427
|
|
|
@@ -380,12 +445,16 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
380
445
|
if (GGML_VXE)
|
|
381
446
|
list(APPEND ARCH_FLAGS -mvx -mzvector)
|
|
382
447
|
endif()
|
|
448
|
+
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "wasm")
|
|
449
|
+
message(STATUS "Wasm detected")
|
|
450
|
+
list (APPEND GGML_CPU_SOURCES ggml-cpu/arch/wasm/quants.c)
|
|
383
451
|
else()
|
|
384
|
-
message(
|
|
452
|
+
message(WARNING "Unknown CPU architecture. Falling back to generic implementations.")
|
|
453
|
+
list(APPEND ARCH_FLAGS -DGGML_CPU_GENERIC)
|
|
385
454
|
endif()
|
|
386
455
|
|
|
387
|
-
if (
|
|
388
|
-
target_compile_definitions(${GGML_CPU_NAME} PRIVATE
|
|
456
|
+
if (GGML_CPU_REPACK)
|
|
457
|
+
target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_REPACK)
|
|
389
458
|
endif()
|
|
390
459
|
|
|
391
460
|
if (GGML_CPU_KLEIDIAI)
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
#include "ggml-backend-impl.h"
|
|
2
|
+
|
|
3
|
+
#if defined(__aarch64__)
|
|
4
|
+
|
|
5
|
+
#if defined(__linux__)
|
|
6
|
+
#include <sys/auxv.h>
|
|
7
|
+
#elif defined(__APPLE__)
|
|
8
|
+
#include <sys/sysctl.h>
|
|
9
|
+
#endif
|
|
10
|
+
|
|
11
|
+
#if !defined(HWCAP2_I8MM)
|
|
12
|
+
#define HWCAP2_I8MM (1 << 13)
|
|
13
|
+
#endif
|
|
14
|
+
|
|
15
|
+
#if !defined(HWCAP2_SME)
|
|
16
|
+
#define HWCAP2_SME (1 << 23)
|
|
17
|
+
#endif
|
|
18
|
+
|
|
19
|
+
struct aarch64_features {
|
|
20
|
+
// has_neon not needed, aarch64 has NEON guaranteed
|
|
21
|
+
bool has_dotprod = false;
|
|
22
|
+
bool has_fp16_va = false;
|
|
23
|
+
bool has_sve = false;
|
|
24
|
+
bool has_sve2 = false;
|
|
25
|
+
bool has_i8mm = false;
|
|
26
|
+
bool has_sme = false;
|
|
27
|
+
|
|
28
|
+
aarch64_features() {
|
|
29
|
+
#if defined(__linux__)
|
|
30
|
+
uint32_t hwcap = getauxval(AT_HWCAP);
|
|
31
|
+
uint32_t hwcap2 = getauxval(AT_HWCAP2);
|
|
32
|
+
|
|
33
|
+
has_dotprod = !!(hwcap & HWCAP_ASIMDDP);
|
|
34
|
+
has_fp16_va = !!(hwcap & HWCAP_FPHP);
|
|
35
|
+
has_sve = !!(hwcap & HWCAP_SVE);
|
|
36
|
+
has_sve2 = !!(hwcap2 & HWCAP2_SVE2);
|
|
37
|
+
has_i8mm = !!(hwcap2 & HWCAP2_I8MM);
|
|
38
|
+
has_sme = !!(hwcap2 & HWCAP2_SME);
|
|
39
|
+
#elif defined(__APPLE__)
|
|
40
|
+
int oldp = 0;
|
|
41
|
+
size_t size = sizeof(oldp);
|
|
42
|
+
|
|
43
|
+
if (sysctlbyname("hw.optional.arm.FEAT_DotProd", &oldp, &size, NULL, 0) == 0) {
|
|
44
|
+
has_dotprod = static_cast<bool>(oldp);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
if (sysctlbyname("hw.optional.arm.FEAT_I8MM", &oldp, &size, NULL, 0) == 0) {
|
|
48
|
+
has_i8mm = static_cast<bool>(oldp);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
if (sysctlbyname("hw.optional.arm.FEAT_SME", &oldp, &size, NULL, 0) == 0) {
|
|
52
|
+
has_sme = static_cast<bool>(oldp);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// Apple apparently does not implement SVE yet
|
|
56
|
+
#endif
|
|
57
|
+
}
|
|
58
|
+
};
|
|
59
|
+
|
|
60
|
+
static int ggml_backend_cpu_aarch64_score() {
|
|
61
|
+
int score = 1;
|
|
62
|
+
aarch64_features af;
|
|
63
|
+
|
|
64
|
+
#ifdef GGML_USE_DOTPROD
|
|
65
|
+
if (!af.has_dotprod) { return 0; }
|
|
66
|
+
score += 1<<1;
|
|
67
|
+
#endif
|
|
68
|
+
#ifdef GGML_USE_FP16_VECTOR_ARITHMETIC
|
|
69
|
+
if (!af.has_fp16_va) { return 0; }
|
|
70
|
+
score += 1<<2;
|
|
71
|
+
#endif
|
|
72
|
+
#ifdef GGML_USE_SVE
|
|
73
|
+
if (!af.has_sve) { return 0; }
|
|
74
|
+
score += 1<<3;
|
|
75
|
+
#endif
|
|
76
|
+
#ifdef GGML_USE_MATMUL_INT8
|
|
77
|
+
if (!af.has_i8mm) { return 0; }
|
|
78
|
+
score += 1<<4;
|
|
79
|
+
#endif
|
|
80
|
+
#ifdef GGML_USE_SVE2
|
|
81
|
+
if (!af.has_sve2) { return 0; }
|
|
82
|
+
score += 1<<5;
|
|
83
|
+
#endif
|
|
84
|
+
#ifdef GGML_USE_SME
|
|
85
|
+
if (!af.has_sme) { return 0; }
|
|
86
|
+
score += 1<<6;
|
|
87
|
+
#endif
|
|
88
|
+
|
|
89
|
+
return score;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
GGML_BACKEND_DL_SCORE_IMPL(ggml_backend_cpu_aarch64_score)
|
|
93
|
+
|
|
94
|
+
# endif // defined(__aarch64__)
|