@novastera-oss/llamarn 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +80 -14
- package/RNLlamaCpp.podspec +10 -3
- package/android/CMakeLists.txt +8 -0
- package/android/src/main/cpp/include/llama.h +62 -125
- package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
- package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
- package/cpp/build-info.cpp +2 -2
- package/cpp/llama.cpp/README.md +11 -3
- package/cpp/llama.cpp/build-xcframework.sh +1 -0
- package/cpp/llama.cpp/common/CMakeLists.txt +8 -2
- package/cpp/llama.cpp/common/arg.cpp +153 -113
- package/cpp/llama.cpp/common/chat-parser.cpp +379 -0
- package/cpp/llama.cpp/common/chat-parser.h +117 -0
- package/cpp/llama.cpp/common/chat.cpp +847 -699
- package/cpp/llama.cpp/common/chat.h +73 -6
- package/cpp/llama.cpp/common/common.cpp +50 -82
- package/cpp/llama.cpp/common/common.h +21 -17
- package/cpp/llama.cpp/common/json-partial.cpp +255 -0
- package/cpp/llama.cpp/common/json-partial.h +37 -0
- package/cpp/llama.cpp/common/minja/chat-template.hpp +9 -5
- package/cpp/llama.cpp/common/minja/minja.hpp +69 -36
- package/cpp/llama.cpp/common/regex-partial.cpp +204 -0
- package/cpp/llama.cpp/common/regex-partial.h +56 -0
- package/cpp/llama.cpp/common/sampling.cpp +7 -8
- package/cpp/llama.cpp/convert_hf_to_gguf.py +453 -118
- package/cpp/llama.cpp/convert_hf_to_gguf_update.py +120 -68
- package/cpp/llama.cpp/ggml/CMakeLists.txt +2 -1
- package/cpp/llama.cpp/ggml/cmake/common.cmake +25 -0
- package/cpp/llama.cpp/ggml/include/ggml-opt.h +49 -28
- package/cpp/llama.cpp/ggml/include/ggml.h +26 -7
- package/cpp/llama.cpp/ggml/src/CMakeLists.txt +16 -10
- package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +4 -1
- package/cpp/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +604 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +42 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +54 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +50 -51
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +5 -9
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +779 -19
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +22 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +88 -5
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -12
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +264 -69
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +322 -100
- package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +117 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +85 -16
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +220 -49
- package/cpp/llama.cpp/ggml/src/ggml-cuda/acc.cu +40 -26
- package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cu +11 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +15 -7
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +266 -64
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +49 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +48 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cu +2 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +5 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cu +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/quantize.cu +7 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/sum.cu +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +10 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-impl.h +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +99 -17
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +200 -2
- package/cpp/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +8 -2
- package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cu +112 -0
- package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cuh +12 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +6 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +972 -178
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/div.cl +72 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +72 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sub.cl +72 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +39 -0
- package/cpp/llama.cpp/ggml/src/ggml-opt.cpp +373 -190
- package/cpp/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +29 -23
- package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +5 -10
- package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +101 -5
- package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +31 -33
- package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +29 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +4 -5
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +9 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +84 -72
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
- package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +1 -3
- package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +324 -129
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +31 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +95 -68
- package/cpp/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +22 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +1 -4
- package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +2 -3
- package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +69 -43
- package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +2 -14
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +81 -91
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +432 -181
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +17 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +6 -152
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +162 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +360 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +2 -118
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +12 -1
- package/cpp/llama.cpp/ggml/src/ggml.c +107 -36
- package/cpp/llama.cpp/ggml/src/gguf.cpp +33 -33
- package/cpp/llama.cpp/gguf-py/gguf/constants.py +100 -15
- package/cpp/llama.cpp/gguf-py/gguf/gguf_reader.py +1 -1
- package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +44 -12
- package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_editor_gui.py +21 -10
- package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_new_metadata.py +5 -2
- package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +128 -31
- package/cpp/llama.cpp/gguf-py/gguf/utility.py +1 -1
- package/cpp/llama.cpp/gguf-py/pyproject.toml +1 -1
- package/cpp/llama.cpp/include/llama.h +62 -125
- package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-nomic-bert-moe.gguf +0 -0
- package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.out +1 -1
- package/cpp/llama.cpp/models/templates/Qwen-QwQ-32B.jinja +62 -0
- package/cpp/llama.cpp/models/templates/Qwen-Qwen3-0.6B.jinja +85 -0
- package/cpp/llama.cpp/models/templates/README.md +2 -0
- package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +5 -1
- package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +5 -1
- package/cpp/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
- package/cpp/llama.cpp/requirements/requirements-gguf_editor_gui.txt +1 -1
- package/cpp/llama.cpp/src/CMakeLists.txt +2 -0
- package/cpp/llama.cpp/src/llama-arch.cpp +6 -0
- package/cpp/llama.cpp/src/llama-arch.h +2 -0
- package/cpp/llama.cpp/src/llama-batch.cpp +3 -1
- package/cpp/llama.cpp/src/llama-context.cpp +340 -123
- package/cpp/llama.cpp/src/llama-context.h +30 -0
- package/cpp/llama.cpp/src/llama-cparams.cpp +4 -0
- package/cpp/llama.cpp/src/llama-cparams.h +2 -0
- package/cpp/llama.cpp/src/llama-grammar.cpp +12 -2
- package/cpp/llama.cpp/src/llama-graph.cpp +157 -247
- package/cpp/llama.cpp/src/llama-graph.h +52 -7
- package/cpp/llama.cpp/src/llama-hparams.cpp +17 -1
- package/cpp/llama.cpp/src/llama-hparams.h +37 -5
- package/cpp/llama.cpp/src/llama-kv-cache.cpp +742 -481
- package/cpp/llama.cpp/src/llama-kv-cache.h +196 -99
- package/cpp/llama.cpp/src/llama-kv-cells.h +379 -0
- package/cpp/llama.cpp/src/llama-memory.h +4 -3
- package/cpp/llama.cpp/src/llama-model-loader.cpp +22 -17
- package/cpp/llama.cpp/src/llama-model-saver.cpp +281 -0
- package/cpp/llama.cpp/src/llama-model-saver.h +37 -0
- package/cpp/llama.cpp/src/llama-model.cpp +529 -172
- package/cpp/llama.cpp/src/llama-model.h +6 -1
- package/cpp/llama.cpp/src/llama-quant.cpp +15 -13
- package/cpp/llama.cpp/src/llama-sampling.cpp +2 -2
- package/cpp/llama.cpp/src/llama-vocab.cpp +35 -8
- package/cpp/llama.cpp/src/llama-vocab.h +6 -0
- package/cpp/llama.cpp/src/llama.cpp +14 -0
- package/cpp/rn-completion.cpp +4 -2
- package/ios/include/chat.h +73 -6
- package/ios/include/common/minja/chat-template.hpp +9 -5
- package/ios/include/common/minja/minja.hpp +69 -36
- package/ios/include/common.h +21 -17
- package/ios/include/llama.h +62 -125
- package/ios/libs/llama.xcframework/Info.plist +19 -19
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4617 -4487
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4638 -4508
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3557 -3435
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4638 -4508
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3559 -3437
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4616 -4487
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4637 -4508
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3556 -3435
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4653 -4523
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4674 -4544
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3587 -3465
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/package.json +1 -1
- package/cpp/llama.cpp/common/stb_image.h +0 -7988
- package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +0 -46
|
@@ -36,6 +36,7 @@
|
|
|
36
36
|
#include "ggml-backend-impl.h"
|
|
37
37
|
#include "ggml-cann/aclnn_ops.h"
|
|
38
38
|
#include "ggml-cann/common.h"
|
|
39
|
+
#include "ggml.h"
|
|
39
40
|
|
|
40
41
|
#define GGML_COMMON_DECL_C
|
|
41
42
|
|
|
@@ -1672,7 +1673,8 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx,
|
|
|
1672
1673
|
ggml_cann_mul_mat(ctx, dst);
|
|
1673
1674
|
break;
|
|
1674
1675
|
case GGML_OP_MUL_MAT_ID:
|
|
1675
|
-
|
|
1676
|
+
ggml_cann_mul_mat_id(ctx, dst);
|
|
1677
|
+
break;
|
|
1676
1678
|
case GGML_OP_SCALE:
|
|
1677
1679
|
ggml_cann_scale(ctx, dst);
|
|
1678
1680
|
break;
|
|
@@ -1747,6 +1749,9 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx,
|
|
|
1747
1749
|
case GGML_OP_COUNT_EQUAL:
|
|
1748
1750
|
ggml_cann_count_equal(ctx, dst);
|
|
1749
1751
|
break;
|
|
1752
|
+
case GGML_OP_FLASH_ATTN_EXT:
|
|
1753
|
+
ggml_cann_flash_attn_ext(ctx, dst);
|
|
1754
|
+
break;
|
|
1750
1755
|
default:
|
|
1751
1756
|
return false;
|
|
1752
1757
|
}
|
|
@@ -2030,7 +2035,22 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
|
|
|
2030
2035
|
}
|
|
2031
2036
|
}
|
|
2032
2037
|
case GGML_OP_MUL_MAT_ID:
|
|
2033
|
-
|
|
2038
|
+
switch (op->src[0]->type) {
|
|
2039
|
+
case GGML_TYPE_F16:
|
|
2040
|
+
case GGML_TYPE_F32:
|
|
2041
|
+
return true;
|
|
2042
|
+
case GGML_TYPE_Q8_0:
|
|
2043
|
+
case GGML_TYPE_Q4_0:
|
|
2044
|
+
#ifdef ASCEND_310P
|
|
2045
|
+
// Q4 && Q8 per group is not suppor on 310p device
|
|
2046
|
+
return false;
|
|
2047
|
+
#endif
|
|
2048
|
+
// only support contiguous for quantized types.
|
|
2049
|
+
return ggml_is_contiguous(op->src[0]) &&
|
|
2050
|
+
ggml_is_contiguous(op->src[1]);
|
|
2051
|
+
default:
|
|
2052
|
+
return false;
|
|
2053
|
+
}
|
|
2034
2054
|
// embedding
|
|
2035
2055
|
case GGML_OP_GET_ROWS: {
|
|
2036
2056
|
switch (op->src[0]->type) {
|
|
@@ -2161,6 +2181,38 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
|
|
|
2161
2181
|
case GGML_OP_PAD_REFLECT_1D:
|
|
2162
2182
|
case GGML_OP_COUNT_EQUAL:
|
|
2163
2183
|
return true;
|
|
2184
|
+
case GGML_OP_FLASH_ATTN_EXT:{
|
|
2185
|
+
// derived from [ggml-cuda.cu]
|
|
2186
|
+
if(op->src[1]->type != GGML_TYPE_F16 || op->src[2]->type != GGML_TYPE_F16){
|
|
2187
|
+
return false;
|
|
2188
|
+
}
|
|
2189
|
+
if(op->src[1]->type != GGML_TYPE_F16 && op->src[1]->type != GGML_TYPE_F32 && op->src[1]->type != GGML_TYPE_BF16){
|
|
2190
|
+
return false;
|
|
2191
|
+
}
|
|
2192
|
+
if(op->type != GGML_TYPE_F16 && op->type != GGML_TYPE_F32 && op->type != GGML_TYPE_BF16){
|
|
2193
|
+
return false;
|
|
2194
|
+
}
|
|
2195
|
+
if (op->src[1]->ne[0] != op->src[2]->ne[0]) {
|
|
2196
|
+
// different head sizes of K and V are not supported yet
|
|
2197
|
+
return false;
|
|
2198
|
+
}
|
|
2199
|
+
if (op->src[0]->ne[0] == 192) {
|
|
2200
|
+
return false;
|
|
2201
|
+
}
|
|
2202
|
+
if (op->src[0]->ne[0] == 576) {
|
|
2203
|
+
// DeepSeek MLA
|
|
2204
|
+
return false;
|
|
2205
|
+
}
|
|
2206
|
+
if (op->src[0]->ne[3] != 1) {
|
|
2207
|
+
return false;
|
|
2208
|
+
}
|
|
2209
|
+
float logitSoftcap = 0.0f;
|
|
2210
|
+
memcpy(&logitSoftcap, (float*)op->op_params + 2, sizeof(float));
|
|
2211
|
+
if(logitSoftcap != 0.0f) {
|
|
2212
|
+
return false;
|
|
2213
|
+
}
|
|
2214
|
+
return true;
|
|
2215
|
+
}
|
|
2164
2216
|
default:
|
|
2165
2217
|
return false;
|
|
2166
2218
|
}
|
|
@@ -82,13 +82,8 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
82
82
|
target_link_libraries(${GGML_CPU_NAME} PUBLIC memkind)
|
|
83
83
|
endif()
|
|
84
84
|
|
|
85
|
-
if (
|
|
86
|
-
CMAKE_GENERATOR_PLATFORM_LWR STREQUAL "arm64" OR
|
|
87
|
-
(NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
|
|
88
|
-
CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm.*|ARM64)$"))
|
|
89
|
-
|
|
85
|
+
if (GGML_SYSTEM_ARCH STREQUAL "ARM")
|
|
90
86
|
message(STATUS "ARM detected")
|
|
91
|
-
|
|
92
87
|
if (MSVC AND NOT CMAKE_C_COMPILER_ID STREQUAL "Clang")
|
|
93
88
|
message(FATAL_ERROR "MSVC is not supported for ARM, use clang")
|
|
94
89
|
else()
|
|
@@ -170,12 +165,8 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
170
165
|
endforeach()
|
|
171
166
|
endif()
|
|
172
167
|
endif()
|
|
173
|
-
elseif (
|
|
174
|
-
(NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
|
|
175
|
-
CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|i686|AMD64|amd64)$"))
|
|
176
|
-
|
|
168
|
+
elseif (GGML_SYSTEM_ARCH STREQUAL "x86")
|
|
177
169
|
message(STATUS "x86 detected")
|
|
178
|
-
|
|
179
170
|
if (MSVC)
|
|
180
171
|
# instruction set detection for MSVC only
|
|
181
172
|
if (GGML_NATIVE)
|
|
@@ -299,7 +290,26 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
299
290
|
endif()
|
|
300
291
|
endif()
|
|
301
292
|
endif()
|
|
302
|
-
|
|
293
|
+
|
|
294
|
+
if (GGML_BACKEND_DL)
|
|
295
|
+
if (GGML_NATIVE)
|
|
296
|
+
# the feature check relies on ARCH_DEFINITIONS, but it is not set with GGML_NATIVE
|
|
297
|
+
message(FATAL_ERROR "GGML_NATIVE is not compatible with GGML_BACKEND_DL, consider using GGML_CPU_ALL_VARIANTS")
|
|
298
|
+
endif()
|
|
299
|
+
|
|
300
|
+
# The feature detection code is compiled as a separate target so that
|
|
301
|
+
# it can be built without the architecture flags
|
|
302
|
+
# Since multiple variants of the CPU backend may be included in the same
|
|
303
|
+
# build, using set_source_files_properties() to set the arch flags is not possible
|
|
304
|
+
set(GGML_CPU_FEATS_NAME ${GGML_CPU_NAME}-feats)
|
|
305
|
+
add_library(${GGML_CPU_FEATS_NAME} OBJECT ggml-cpu/cpu-feats-x86.cpp)
|
|
306
|
+
target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE . .. ../include)
|
|
307
|
+
target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE ${ARCH_DEFINITIONS})
|
|
308
|
+
target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE GGML_BACKEND_DL GGML_BACKEND_BUILD GGML_BACKEND_SHARED)
|
|
309
|
+
set_target_properties(${GGML_CPU_FEATS_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
|
310
|
+
target_link_libraries(${GGML_CPU_NAME} PRIVATE ${GGML_CPU_FEATS_NAME})
|
|
311
|
+
endif()
|
|
312
|
+
elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC")
|
|
303
313
|
message(STATUS "PowerPC detected")
|
|
304
314
|
if (GGML_NATIVE)
|
|
305
315
|
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
|
|
@@ -325,9 +335,8 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
325
335
|
list(APPEND ARCH_FLAGS -mcpu=${GGML_CPU_POWERPC_CPUTYPE})
|
|
326
336
|
endif()
|
|
327
337
|
endif()
|
|
328
|
-
elseif (
|
|
338
|
+
elseif (GGML_SYSTEM_ARCH STREQUAL "loongarch64")
|
|
329
339
|
message(STATUS "loongarch64 detected")
|
|
330
|
-
|
|
331
340
|
list(APPEND ARCH_FLAGS -march=loongarch64)
|
|
332
341
|
if (GGML_LASX)
|
|
333
342
|
list(APPEND ARCH_FLAGS -mlasx)
|
|
@@ -335,16 +344,18 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
335
344
|
if (GGML_LSX)
|
|
336
345
|
list(APPEND ARCH_FLAGS -mlsx)
|
|
337
346
|
endif()
|
|
338
|
-
elseif (
|
|
339
|
-
message(STATUS "
|
|
347
|
+
elseif (GGML_SYSTEM_ARCH STREQUAL "riscv64")
|
|
348
|
+
message(STATUS "riscv64 detected")
|
|
340
349
|
if (GGML_RVV)
|
|
341
|
-
if (
|
|
342
|
-
list(APPEND ARCH_FLAGS -march=
|
|
350
|
+
if (GGML_XTHEADVECTOR)
|
|
351
|
+
list(APPEND ARCH_FLAGS -march=rv64gc_xtheadvector -mabi=lp64d)
|
|
352
|
+
elseif (GGML_RV_ZFH)
|
|
353
|
+
list(APPEND ARCH_FLAGS -march=rv64gcv_zfhmin -mabi=lp64d)
|
|
343
354
|
else()
|
|
344
355
|
list(APPEND ARCH_FLAGS -march=rv64gcv -mabi=lp64d)
|
|
345
356
|
endif()
|
|
346
357
|
endif()
|
|
347
|
-
elseif (
|
|
358
|
+
elseif (GGML_SYSTEM_ARCH STREQUAL "s390x")
|
|
348
359
|
message(STATUS "s390x detected")
|
|
349
360
|
file(READ "/proc/cpuinfo" CPUINFO_CONTENTS)
|
|
350
361
|
string(REGEX REPLACE "machine[ \t\r\n]*=[ \t\r\n]*([0-9]+)" "\\1" S390X_M ${CPUINFO_CONTENTS})
|
|
@@ -385,9 +396,9 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
385
396
|
|
|
386
397
|
# Fetch KleidiAI sources:
|
|
387
398
|
include(FetchContent)
|
|
388
|
-
set(KLEIDIAI_COMMIT_TAG "v1.
|
|
399
|
+
set(KLEIDIAI_COMMIT_TAG "v1.6.0")
|
|
389
400
|
set(KLEIDIAI_DOWNLOAD_URL "https://github.com/ARM-software/kleidiai/archive/refs/tags/${KLEIDIAI_COMMIT_TAG}.tar.gz")
|
|
390
|
-
set(KLEIDIAI_ARCHIVE_MD5 "
|
|
401
|
+
set(KLEIDIAI_ARCHIVE_MD5 "75b4ad68f25ab673dcc01065e5a0b05f")
|
|
391
402
|
|
|
392
403
|
if (POLICY CMP0135)
|
|
393
404
|
cmake_policy(SET CMP0135 NEW)
|
|
@@ -428,6 +439,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
428
439
|
${KLEIDIAI_SRC}/kai/ukernels/
|
|
429
440
|
${KLEIDIAI_SRC}/kai/ukernels/matmul/
|
|
430
441
|
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/
|
|
442
|
+
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_fp32_bf16p_bf16p/
|
|
431
443
|
${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/)
|
|
432
444
|
|
|
433
445
|
set(ARCH_FLAGS_TEMP "${ARCH_FLAGS}")
|
|
@@ -438,17 +450,19 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
438
450
|
string(FIND "${ARCH_FLAGS_TEMP}" "+i8mm" I8MM_ENABLED)
|
|
439
451
|
string(FIND "${ARCH_FLAGS_TEMP}" "+sme" SME_ENABLED)
|
|
440
452
|
|
|
441
|
-
set(PRIVATE_ARCH_FLAGS ${
|
|
453
|
+
set(PRIVATE_ARCH_FLAGS ${ARCH_FLAGS_TEMP})
|
|
442
454
|
|
|
443
|
-
list(APPEND GGML_KLEIDIAI_SOURCES
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
455
|
+
list(APPEND GGML_KLEIDIAI_SOURCES
|
|
456
|
+
${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32.c
|
|
457
|
+
${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32ps1s0scalef16_qsu4c32s16s0_neon.c
|
|
458
|
+
${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32_neon.c
|
|
459
|
+
${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0.c)
|
|
447
460
|
|
|
448
461
|
if (NOT DOTPROD_ENABLED MATCHES -1)
|
|
449
|
-
list(APPEND GGML_KLEIDIAI_SOURCES
|
|
450
|
-
|
|
451
|
-
|
|
462
|
+
list(APPEND GGML_KLEIDIAI_SOURCES
|
|
463
|
+
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c
|
|
464
|
+
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4x4_1x4_neon_dotprod.c
|
|
465
|
+
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x4_qsi4c32p4x4_16x4_neon_dotprod.c)
|
|
452
466
|
endif()
|
|
453
467
|
|
|
454
468
|
if (NOT I8MM_ENABLED MATCHES -1)
|
|
@@ -456,9 +470,13 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
456
470
|
endif()
|
|
457
471
|
|
|
458
472
|
if (NOT SME_ENABLED MATCHES -1)
|
|
459
|
-
list(APPEND GGML_KLEIDIAI_SOURCES
|
|
460
|
-
|
|
461
|
-
|
|
473
|
+
list(APPEND GGML_KLEIDIAI_SOURCES
|
|
474
|
+
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1vlx4_qsi4c32p4vlx4_1vlx4vl_sme2_mopa.c
|
|
475
|
+
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4vlx4_1x4vl_sme2_sdot.c
|
|
476
|
+
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_fp32_bf16p_bf16p/kai_matmul_clamp_f32_bf16p2vlx2_bf16p2vlx2_2vlx2vl_sme2_mopa.c
|
|
477
|
+
${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_lhs_pack_bf16p2vlx2_f32_sme.c
|
|
478
|
+
${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_bf16p2vlx2b_f32_x32_sme.c)
|
|
479
|
+
set(PRIVATE_ARCH_FLAGS "-fno-tree-vectorize;${PRIVATE_ARCH_FLAGS}+sve+sve2")
|
|
462
480
|
endif()
|
|
463
481
|
|
|
464
482
|
set_source_files_properties(${GGML_KLEIDIAI_SOURCES} PROPERTIES COMPILE_OPTIONS "${PRIVATE_ARCH_FLAGS}")
|
|
@@ -470,25 +488,6 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
470
488
|
target_compile_options(${GGML_CPU_NAME} PRIVATE ${ARCH_FLAGS})
|
|
471
489
|
target_compile_definitions(${GGML_CPU_NAME} PRIVATE ${ARCH_DEFINITIONS})
|
|
472
490
|
|
|
473
|
-
if (GGML_BACKEND_DL)
|
|
474
|
-
if (GGML_NATIVE)
|
|
475
|
-
# the feature check relies on ARCH_DEFINITIONS, but it is not set with GGML_NATIVE
|
|
476
|
-
message(FATAL_ERROR "GGML_NATIVE is not compatible with GGML_BACKEND_DL, consider using GGML_CPU_ALL_VARIANTS")
|
|
477
|
-
endif()
|
|
478
|
-
|
|
479
|
-
# The feature detection code is compiled as a separate target so that
|
|
480
|
-
# it can be built without the architecture flags
|
|
481
|
-
# Since multiple variants of the CPU backend may be included in the same
|
|
482
|
-
# build, using set_source_files_properties() to set the arch flags is not possible
|
|
483
|
-
set(GGML_CPU_FEATS_NAME ${GGML_CPU_NAME}-feats)
|
|
484
|
-
add_library(${GGML_CPU_FEATS_NAME} OBJECT ggml-cpu/cpu-feats-x86.cpp)
|
|
485
|
-
target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE . .. ../include)
|
|
486
|
-
target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE ${ARCH_DEFINITIONS})
|
|
487
|
-
target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE GGML_BACKEND_DL GGML_BACKEND_BUILD GGML_BACKEND_SHARED)
|
|
488
|
-
set_target_properties(${GGML_CPU_FEATS_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
|
489
|
-
target_link_libraries(${GGML_CPU_NAME} PRIVATE ${GGML_CPU_FEATS_NAME})
|
|
490
|
-
endif()
|
|
491
|
-
|
|
492
491
|
if (EMSCRIPTEN)
|
|
493
492
|
set_target_properties(${GGML_CPU_NAME} PROPERTIES COMPILE_FLAGS "-msimd128")
|
|
494
493
|
endif()
|
|
@@ -1191,7 +1191,7 @@ static void ggml_gemv_q4_0_8x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, c
|
|
|
1191
1191
|
}
|
|
1192
1192
|
}
|
|
1193
1193
|
return;
|
|
1194
|
-
#elif defined
|
|
1194
|
+
#elif defined __riscv_v
|
|
1195
1195
|
if (__riscv_vlenb() >= QK4_0) {
|
|
1196
1196
|
const size_t vl = QK4_0;
|
|
1197
1197
|
|
|
@@ -3783,7 +3783,7 @@ static void ggml_gemm_q4_0_8x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, c
|
|
|
3783
3783
|
}
|
|
3784
3784
|
return;
|
|
3785
3785
|
}
|
|
3786
|
-
#elif defined
|
|
3786
|
+
#elif defined __riscv_v
|
|
3787
3787
|
if (__riscv_vlenb() >= QK4_0) {
|
|
3788
3788
|
const size_t vl = QK4_0;
|
|
3789
3789
|
|
|
@@ -320,21 +320,17 @@ inline static int32x4_t ggml_vdotq_s32(int32x4_t acc, int8x16_t a, int8x16_t b)
|
|
|
320
320
|
|
|
321
321
|
#ifdef __wasm_simd128__
|
|
322
322
|
#include <wasm_simd128.h>
|
|
323
|
-
#
|
|
323
|
+
#endif
|
|
324
|
+
|
|
324
325
|
#ifdef __POWER9_VECTOR__
|
|
325
326
|
#include <altivec.h>
|
|
326
|
-
#
|
|
327
|
+
#endif
|
|
328
|
+
|
|
327
329
|
#if defined(_MSC_VER) || defined(__MINGW32__)
|
|
328
330
|
#include <intrin.h>
|
|
329
|
-
#
|
|
330
|
-
#if defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__) || defined(__SSSE3__) || defined(__SSE3__) || defined(__SSE__)
|
|
331
|
-
#if !defined(__riscv)
|
|
331
|
+
#elif defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__) || defined(__SSSE3__) || defined(__SSE3__) || defined(__SSE__)
|
|
332
332
|
#include <immintrin.h>
|
|
333
333
|
#endif
|
|
334
|
-
#endif
|
|
335
|
-
#endif
|
|
336
|
-
#endif
|
|
337
|
-
#endif
|
|
338
334
|
|
|
339
335
|
#ifdef __riscv_v_intrinsic
|
|
340
336
|
#include <riscv_vector.h>
|