@novastera-oss/llamarn 0.2.6 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/cpp/include/llama.h +141 -38
- package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
- package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
- package/cpp/LlamaCppModel.cpp +58 -24
- package/cpp/LlamaCppModel.h +3 -3
- package/cpp/PureCppImpl.cpp +1 -1
- package/cpp/PureCppImpl.h +2 -2
- package/cpp/build-info.cpp +2 -2
- package/cpp/llama.cpp/CMakeLists.txt +15 -4
- package/cpp/llama.cpp/Makefile +2 -2
- package/cpp/llama.cpp/README.md +32 -13
- package/cpp/llama.cpp/common/CMakeLists.txt +10 -20
- package/cpp/llama.cpp/common/arg.cpp +37 -6
- package/cpp/llama.cpp/common/build-info.cpp.in +2 -2
- package/cpp/llama.cpp/common/chat-parser.cpp +5 -0
- package/cpp/llama.cpp/common/chat-parser.h +2 -0
- package/cpp/llama.cpp/common/chat.cpp +12 -9
- package/cpp/llama.cpp/common/chat.h +1 -1
- package/cpp/llama.cpp/common/common.cpp +53 -40
- package/cpp/llama.cpp/common/common.h +6 -2
- package/cpp/llama.cpp/common/json-schema-to-grammar.cpp +3 -46
- package/cpp/llama.cpp/common/speculative.cpp +6 -4
- package/cpp/llama.cpp/convert_hf_to_gguf.py +215 -76
- package/cpp/llama.cpp/ggml/CMakeLists.txt +48 -2
- package/cpp/llama.cpp/ggml/cmake/common.cmake +1 -2
- package/cpp/llama.cpp/ggml/include/ggml-cpu.h +2 -0
- package/cpp/llama.cpp/ggml/include/ggml.h +33 -0
- package/cpp/llama.cpp/ggml/src/CMakeLists.txt +64 -13
- package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/common.h +6 -1
- package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +33 -9
- package/cpp/llama.cpp/ggml/src/ggml-common.h +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +124 -26
- package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +11 -10
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +4114 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +2163 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +2639 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +2732 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +2069 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +397 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +1300 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +1481 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/x86/quants.c +4311 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +79 -3225
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +184 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/common.h +4 -3
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +16 -7
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +93 -104
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +12 -8
- package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-hbm.cpp → hbm.cpp} +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +58 -8
- package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +194 -69
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.h +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/quants.c +1158 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.cpp +1571 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.h +98 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +213 -37
- package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +45 -45
- package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +59 -37
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-transpose.cu +91 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-transpose.cuh +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +4 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +90 -39
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mean.cu +19 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mean.cuh +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cu +257 -87
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cuh +2 -3
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cu +6 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cu +5 -18
- package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cuh +0 -1
- package/cpp/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-impl.h +61 -183
- package/cpp/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +11 -10
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +16 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +260 -49
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +497 -282
- package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cuh +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +1078 -468
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/concat.cl +109 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/pad.cl +30 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/repeat.cl +39 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/tanh.cl +63 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/upscale.cl +121 -0
- package/cpp/llama.cpp/ggml/src/ggml-quants.c +0 -2
- package/cpp/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +18 -15
- package/cpp/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +5 -6
- package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +20 -48
- package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +28 -41
- package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +4 -10
- package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +117 -165
- package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +192 -53
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +32 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +49 -67
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +31 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +99 -159
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +8 -105
- package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +209 -92
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +60 -80
- package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +158 -203
- package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +55 -74
- package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +38 -10
- package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +24 -20
- package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +3 -3
- package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +3 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +3 -8
- package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +108 -16
- package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +12 -16
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +36 -28
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +487 -247
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -12
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +98 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +2 -0
- package/cpp/llama.cpp/ggml/src/ggml.c +69 -19
- package/cpp/llama.cpp/ggml/src/gguf.cpp +5 -1
- package/cpp/llama.cpp/gguf-py/gguf/constants.py +133 -0
- package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +25 -1
- package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +78 -3
- package/cpp/llama.cpp/gguf-py/gguf/vocab.py +97 -4
- package/cpp/llama.cpp/gguf-py/pyproject.toml +2 -2
- package/cpp/llama.cpp/include/llama.h +141 -38
- package/cpp/llama.cpp/models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja +124 -0
- package/cpp/llama.cpp/requirements/requirements-compare-llama-bench.txt +1 -0
- package/cpp/llama.cpp/src/CMakeLists.txt +2 -2
- package/cpp/llama.cpp/src/llama-arch.cpp +150 -3
- package/cpp/llama.cpp/src/llama-arch.h +25 -1
- package/cpp/llama.cpp/src/llama-batch.cpp +736 -274
- package/cpp/llama.cpp/src/llama-batch.h +110 -57
- package/cpp/llama.cpp/src/llama-chat.cpp +30 -8
- package/cpp/llama.cpp/src/llama-chat.h +1 -0
- package/cpp/llama.cpp/src/llama-context.cpp +360 -266
- package/cpp/llama.cpp/src/llama-context.h +27 -23
- package/cpp/llama.cpp/src/llama-cparams.cpp +1 -1
- package/cpp/llama.cpp/src/llama-cparams.h +1 -1
- package/cpp/llama.cpp/src/llama-graph.cpp +411 -344
- package/cpp/llama.cpp/src/llama-graph.h +126 -58
- package/cpp/llama.cpp/src/llama-hparams.cpp +10 -2
- package/cpp/llama.cpp/src/llama-hparams.h +16 -2
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +103 -73
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.h +34 -42
- package/cpp/llama.cpp/src/llama-kv-cache-unified.cpp +345 -221
- package/cpp/llama.cpp/src/llama-kv-cache-unified.h +75 -50
- package/cpp/llama.cpp/src/llama-kv-cells.h +51 -22
- package/cpp/llama.cpp/src/llama-memory-hybrid.cpp +246 -0
- package/cpp/llama.cpp/src/llama-memory-hybrid.h +138 -0
- package/cpp/llama.cpp/src/{llama-kv-cache-recurrent.cpp → llama-memory-recurrent.cpp} +302 -317
- package/cpp/llama.cpp/src/{llama-kv-cache-recurrent.h → llama-memory-recurrent.h} +60 -68
- package/cpp/llama.cpp/src/llama-memory.cpp +41 -0
- package/cpp/llama.cpp/src/llama-memory.h +73 -36
- package/cpp/llama.cpp/src/llama-mmap.cpp +1 -1
- package/cpp/llama.cpp/src/llama-model-loader.cpp +42 -17
- package/cpp/llama.cpp/src/llama-model-saver.cpp +1 -0
- package/cpp/llama.cpp/src/llama-model.cpp +1630 -511
- package/cpp/llama.cpp/src/llama-model.h +26 -0
- package/cpp/llama.cpp/src/llama-quant.cpp +89 -6
- package/cpp/llama.cpp/src/llama-vocab.cpp +58 -26
- package/cpp/llama.cpp/src/llama-vocab.h +1 -0
- package/cpp/llama.cpp/src/llama.cpp +11 -7
- package/cpp/llama.cpp/src/unicode.cpp +5 -0
- package/cpp/rn-completion.cpp +2 -2
- package/cpp/{rn-llama.hpp → rn-llama.h} +1 -1
- package/cpp/{rn-utils.hpp → rn-utils.h} +3 -0
- package/ios/include/chat.h +1 -1
- package/ios/include/common.h +6 -2
- package/ios/include/llama.h +141 -38
- package/ios/libs/llama.xcframework/Info.plist +15 -15
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4890 -4689
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +141 -38
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4861 -4710
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3764 -3622
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +141 -38
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4861 -4710
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3766 -3624
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +141 -38
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +141 -38
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +141 -38
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4890 -4689
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +141 -38
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4861 -4710
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3764 -3622
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +141 -38
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4926 -4725
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +141 -38
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4897 -4746
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3794 -3652
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +33 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +141 -38
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/package.json +1 -2
- package/cpp/llama.cpp/common/cmake/build-info-gen-cpp.cmake +0 -24
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +0 -8
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +0 -13891
- package/cpp/llama.cpp/src/llama-kv-cache.cpp +0 -1
- package/cpp/llama.cpp/src/llama-kv-cache.h +0 -44
- /package/cpp/llama.cpp/ggml/src/ggml-cpu/{cpu-feats-x86.cpp → arch/x86/cpu-feats.cpp} +0 -0
- /package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-hbm.h → hbm.h} +0 -0
- /package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
// Rename `_generic` functions if no native implementation is available.
|
|
4
|
+
// This effectively selects the generic implementation.
|
|
5
|
+
|
|
6
|
+
#if defined(GGML_CPU_GENERIC)
|
|
7
|
+
// quants.c
|
|
8
|
+
#define quantize_row_q8_0_generic quantize_row_q8_0
|
|
9
|
+
#define quantize_row_q8_1_generic quantize_row_q8_1
|
|
10
|
+
#define quantize_row_q8_K_generic quantize_row_q8_K
|
|
11
|
+
#define ggml_vec_dot_q4_0_q8_0_generic ggml_vec_dot_q4_0_q8_0
|
|
12
|
+
#define ggml_vec_dot_q4_1_q8_1_generic ggml_vec_dot_q4_1_q8_1
|
|
13
|
+
#define ggml_vec_dot_q5_0_q8_0_generic ggml_vec_dot_q5_0_q8_0
|
|
14
|
+
#define ggml_vec_dot_q5_1_q8_1_generic ggml_vec_dot_q5_1_q8_1
|
|
15
|
+
#define ggml_vec_dot_q8_0_q8_0_generic ggml_vec_dot_q8_0_q8_0
|
|
16
|
+
#define ggml_vec_dot_tq1_0_q8_K_generic ggml_vec_dot_tq1_0_q8_K
|
|
17
|
+
#define ggml_vec_dot_tq2_0_q8_K_generic ggml_vec_dot_tq2_0_q8_K
|
|
18
|
+
#define ggml_vec_dot_q2_K_q8_K_generic ggml_vec_dot_q2_K_q8_K
|
|
19
|
+
#define ggml_vec_dot_q3_K_q8_K_generic ggml_vec_dot_q3_K_q8_K
|
|
20
|
+
#define ggml_vec_dot_q4_K_q8_K_generic ggml_vec_dot_q4_K_q8_K
|
|
21
|
+
#define ggml_vec_dot_q5_K_q8_K_generic ggml_vec_dot_q5_K_q8_K
|
|
22
|
+
#define ggml_vec_dot_q6_K_q8_K_generic ggml_vec_dot_q6_K_q8_K
|
|
23
|
+
#define ggml_vec_dot_iq2_xxs_q8_K_generic ggml_vec_dot_iq2_xxs_q8_K
|
|
24
|
+
#define ggml_vec_dot_iq2_xs_q8_K_generic ggml_vec_dot_iq2_xs_q8_K
|
|
25
|
+
#define ggml_vec_dot_iq2_s_q8_K_generic ggml_vec_dot_iq2_s_q8_K
|
|
26
|
+
#define ggml_vec_dot_iq3_xxs_q8_K_generic ggml_vec_dot_iq3_xxs_q8_K
|
|
27
|
+
#define ggml_vec_dot_iq3_s_q8_K_generic ggml_vec_dot_iq3_s_q8_K
|
|
28
|
+
#define ggml_vec_dot_iq1_s_q8_K_generic ggml_vec_dot_iq1_s_q8_K
|
|
29
|
+
#define ggml_vec_dot_iq1_m_q8_K_generic ggml_vec_dot_iq1_m_q8_K
|
|
30
|
+
#define ggml_vec_dot_iq4_nl_q8_0_generic ggml_vec_dot_iq4_nl_q8_0
|
|
31
|
+
#define ggml_vec_dot_iq4_xs_q8_K_generic ggml_vec_dot_iq4_xs_q8_K
|
|
32
|
+
// repack.cpp
|
|
33
|
+
#define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4
|
|
34
|
+
#define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
|
|
35
|
+
#define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
|
|
36
|
+
#define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
|
|
37
|
+
#define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
|
|
38
|
+
#define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0
|
|
39
|
+
#define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K
|
|
40
|
+
#define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0
|
|
41
|
+
#define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0
|
|
42
|
+
#define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0
|
|
43
|
+
#define ggml_gemm_q4_0_8x8_q8_0_generic ggml_gemm_q4_0_8x8_q8_0
|
|
44
|
+
#define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K
|
|
45
|
+
#define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0
|
|
46
|
+
#elif defined(__aarch64__) || defined(__arm__) || defined(_M_ARM) || defined(_M_ARM64)
|
|
47
|
+
// repack.cpp
|
|
48
|
+
#define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
|
|
49
|
+
#define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K
|
|
50
|
+
#define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K
|
|
51
|
+
#elif defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_X64)
|
|
52
|
+
// repack.cpp
|
|
53
|
+
#define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4
|
|
54
|
+
#define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
|
|
55
|
+
#define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
|
|
56
|
+
#define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0
|
|
57
|
+
#define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0
|
|
58
|
+
#define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0
|
|
59
|
+
#define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0
|
|
60
|
+
#elif defined(__POWERPC__) || defined(__powerpc__)
|
|
61
|
+
// ref: https://github.com/ggml-org/llama.cpp/pull/14146#issuecomment-2972561679
|
|
62
|
+
// quants.c
|
|
63
|
+
#define quantize_row_q8_K_generic quantize_row_q8_K
|
|
64
|
+
#define ggml_vec_dot_tq1_0_q8_K_generic ggml_vec_dot_tq1_0_q8_K
|
|
65
|
+
#define ggml_vec_dot_tq2_0_q8_K_generic ggml_vec_dot_tq2_0_q8_K
|
|
66
|
+
#define ggml_vec_dot_iq1_m_q8_K_generic ggml_vec_dot_iq1_m_q8_K
|
|
67
|
+
// repack.cpp
|
|
68
|
+
#define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4
|
|
69
|
+
#define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
|
|
70
|
+
#define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
|
|
71
|
+
#define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
|
|
72
|
+
#define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
|
|
73
|
+
#define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0
|
|
74
|
+
#define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K
|
|
75
|
+
#define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0
|
|
76
|
+
#define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0
|
|
77
|
+
#define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0
|
|
78
|
+
#define ggml_gemm_q4_0_8x8_q8_0_generic ggml_gemm_q4_0_8x8_q8_0
|
|
79
|
+
#define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K
|
|
80
|
+
#define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0
|
|
81
|
+
#elif defined(__loongarch64)
|
|
82
|
+
// quants.c
|
|
83
|
+
#define quantize_row_q8_K_generic quantize_row_q8_K
|
|
84
|
+
#define ggml_vec_dot_tq1_0_q8_K_generic ggml_vec_dot_tq1_0_q8_K
|
|
85
|
+
#define ggml_vec_dot_tq2_0_q8_K_generic ggml_vec_dot_tq2_0_q8_K
|
|
86
|
+
#define ggml_vec_dot_iq1_m_q8_K_generic ggml_vec_dot_iq1_m_q8_K
|
|
87
|
+
// repack.cpp
|
|
88
|
+
#define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4
|
|
89
|
+
#define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
|
|
90
|
+
#define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
|
|
91
|
+
#define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
|
|
92
|
+
#define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
|
|
93
|
+
#define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0
|
|
94
|
+
#define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K
|
|
95
|
+
#define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0
|
|
96
|
+
#define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0
|
|
97
|
+
#define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0
|
|
98
|
+
#define ggml_gemm_q4_0_8x8_q8_0_generic ggml_gemm_q4_0_8x8_q8_0
|
|
99
|
+
#define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K
|
|
100
|
+
#define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0
|
|
101
|
+
#elif defined(__riscv)
|
|
102
|
+
// quants.c
|
|
103
|
+
#define quantize_row_q8_K_generic quantize_row_q8_K
|
|
104
|
+
#define ggml_vec_dot_tq1_0_q8_K_generic ggml_vec_dot_tq1_0_q8_K
|
|
105
|
+
#define ggml_vec_dot_tq2_0_q8_K_generic ggml_vec_dot_tq2_0_q8_K
|
|
106
|
+
#define ggml_vec_dot_iq2_xxs_q8_K_generic ggml_vec_dot_iq2_xxs_q8_K
|
|
107
|
+
#define ggml_vec_dot_iq2_xs_q8_K_generic ggml_vec_dot_iq2_xs_q8_K
|
|
108
|
+
#define ggml_vec_dot_iq2_s_q8_K_generic ggml_vec_dot_iq2_s_q8_K
|
|
109
|
+
#define ggml_vec_dot_iq3_xxs_q8_K_generic ggml_vec_dot_iq3_xxs_q8_K
|
|
110
|
+
#define ggml_vec_dot_iq3_s_q8_K_generic ggml_vec_dot_iq3_s_q8_K
|
|
111
|
+
#define ggml_vec_dot_iq1_s_q8_K_generic ggml_vec_dot_iq1_s_q8_K
|
|
112
|
+
#define ggml_vec_dot_iq1_m_q8_K_generic ggml_vec_dot_iq1_m_q8_K
|
|
113
|
+
#define ggml_vec_dot_iq4_nl_q8_0_generic ggml_vec_dot_iq4_nl_q8_0
|
|
114
|
+
#define ggml_vec_dot_iq4_xs_q8_K_generic ggml_vec_dot_iq4_xs_q8_K
|
|
115
|
+
// repack.cpp
|
|
116
|
+
#define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4
|
|
117
|
+
#define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
|
|
118
|
+
#define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
|
|
119
|
+
#define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
|
|
120
|
+
#define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
|
|
121
|
+
#define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K
|
|
122
|
+
#define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0
|
|
123
|
+
#define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0
|
|
124
|
+
#define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0
|
|
125
|
+
#define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K
|
|
126
|
+
#define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0
|
|
127
|
+
#elif defined(__s390x__)
|
|
128
|
+
// quants.c
|
|
129
|
+
#define quantize_row_q8_K_generic quantize_row_q8_K
|
|
130
|
+
#define ggml_vec_dot_q5_0_q8_0_generic ggml_vec_dot_q5_0_q8_0
|
|
131
|
+
#define ggml_vec_dot_q5_1_q8_1_generic ggml_vec_dot_q5_1_q8_1
|
|
132
|
+
#define ggml_vec_dot_tq1_0_q8_K_generic ggml_vec_dot_tq1_0_q8_K
|
|
133
|
+
#define ggml_vec_dot_tq2_0_q8_K_generic ggml_vec_dot_tq2_0_q8_K
|
|
134
|
+
#define ggml_vec_dot_q2_K_q8_K_generic ggml_vec_dot_q2_K_q8_K
|
|
135
|
+
#define ggml_vec_dot_iq2_xxs_q8_K_generic ggml_vec_dot_iq2_xxs_q8_K
|
|
136
|
+
#define ggml_vec_dot_iq2_xs_q8_K_generic ggml_vec_dot_iq2_xs_q8_K
|
|
137
|
+
#define ggml_vec_dot_iq2_s_q8_K_generic ggml_vec_dot_iq2_s_q8_K
|
|
138
|
+
#define ggml_vec_dot_iq3_xxs_q8_K_generic ggml_vec_dot_iq3_xxs_q8_K
|
|
139
|
+
#define ggml_vec_dot_iq3_s_q8_K_generic ggml_vec_dot_iq3_s_q8_K
|
|
140
|
+
#define ggml_vec_dot_iq1_s_q8_K_generic ggml_vec_dot_iq1_s_q8_K
|
|
141
|
+
#define ggml_vec_dot_iq1_m_q8_K_generic ggml_vec_dot_iq1_m_q8_K
|
|
142
|
+
// repack.cpp
|
|
143
|
+
#define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4
|
|
144
|
+
#define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
|
|
145
|
+
#define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
|
|
146
|
+
#define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
|
|
147
|
+
#define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
|
|
148
|
+
#define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0
|
|
149
|
+
#define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K
|
|
150
|
+
#define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0
|
|
151
|
+
#define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0
|
|
152
|
+
#define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0
|
|
153
|
+
#define ggml_gemm_q4_0_8x8_q8_0_generic ggml_gemm_q4_0_8x8_q8_0
|
|
154
|
+
#define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K
|
|
155
|
+
#define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0
|
|
156
|
+
#elif defined(__wasm__)
|
|
157
|
+
// quants.c
|
|
158
|
+
#define ggml_vec_dot_q4_1_q8_1_generic ggml_vec_dot_q4_1_q8_1
|
|
159
|
+
#define ggml_vec_dot_tq1_0_q8_K_generic ggml_vec_dot_tq1_0_q8_K
|
|
160
|
+
#define ggml_vec_dot_tq2_0_q8_K_generic ggml_vec_dot_tq2_0_q8_K
|
|
161
|
+
#define ggml_vec_dot_iq2_xxs_q8_K_generic ggml_vec_dot_iq2_xxs_q8_K
|
|
162
|
+
#define ggml_vec_dot_iq2_xs_q8_K_generic ggml_vec_dot_iq2_xs_q8_K
|
|
163
|
+
#define ggml_vec_dot_iq2_s_q8_K_generic ggml_vec_dot_iq2_s_q8_K
|
|
164
|
+
#define ggml_vec_dot_iq3_xxs_q8_K_generic ggml_vec_dot_iq3_xxs_q8_K
|
|
165
|
+
#define ggml_vec_dot_iq3_s_q8_K_generic ggml_vec_dot_iq3_s_q8_K
|
|
166
|
+
#define ggml_vec_dot_iq1_s_q8_K_generic ggml_vec_dot_iq1_s_q8_K
|
|
167
|
+
#define ggml_vec_dot_iq1_m_q8_K_generic ggml_vec_dot_iq1_m_q8_K
|
|
168
|
+
#define ggml_vec_dot_iq4_nl_q8_0_generic ggml_vec_dot_iq4_nl_q8_0
|
|
169
|
+
#define ggml_vec_dot_iq4_xs_q8_K_generic ggml_vec_dot_iq4_xs_q8_K
|
|
170
|
+
// repack.cpp
|
|
171
|
+
#define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4
|
|
172
|
+
#define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
|
|
173
|
+
#define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
|
|
174
|
+
#define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
|
|
175
|
+
#define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
|
|
176
|
+
#define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0
|
|
177
|
+
#define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K
|
|
178
|
+
#define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0
|
|
179
|
+
#define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0
|
|
180
|
+
#define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0
|
|
181
|
+
#define ggml_gemm_q4_0_8x8_q8_0_generic ggml_gemm_q4_0_8x8_q8_0
|
|
182
|
+
#define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K
|
|
183
|
+
#define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0
|
|
184
|
+
#endif
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
#pragma once
|
|
2
2
|
|
|
3
3
|
#include "ggml.h"
|
|
4
|
-
#include "
|
|
4
|
+
#include "traits.h"
|
|
5
5
|
#include "ggml-cpu-impl.h"
|
|
6
6
|
#include "ggml-impl.h"
|
|
7
|
+
#include "simd-mappings.h"
|
|
7
8
|
|
|
8
9
|
#ifdef __cplusplus
|
|
9
10
|
|
|
@@ -12,11 +13,11 @@
|
|
|
12
13
|
// convenience functions/macros for use in template calls
|
|
13
14
|
// note: these won't be required after the 'traits' lookup table is used.
|
|
14
15
|
static inline ggml_fp16_t f32_to_f16(float x) {
|
|
15
|
-
return
|
|
16
|
+
return GGML_CPU_FP32_TO_FP16(x);
|
|
16
17
|
}
|
|
17
18
|
|
|
18
19
|
static inline float f16_to_f32(ggml_fp16_t x) {
|
|
19
|
-
return
|
|
20
|
+
return GGML_CPU_FP16_TO_FP32(x);
|
|
20
21
|
}
|
|
21
22
|
|
|
22
23
|
static inline ggml_bf16_t f32_to_bf16(float x) {
|
|
@@ -62,11 +62,17 @@ struct ggml_compute_params {
|
|
|
62
62
|
#if defined(__s390x__) && defined(__VEC__)
|
|
63
63
|
#ifndef __VXE__
|
|
64
64
|
#define __VXE__
|
|
65
|
-
#endif
|
|
65
|
+
#endif // __VXE__
|
|
66
66
|
#ifndef __VXE2__
|
|
67
67
|
#define __VXE2__
|
|
68
|
-
#endif
|
|
69
|
-
#endif
|
|
68
|
+
#endif // __VXE2__
|
|
69
|
+
#endif // __s390x__ && __VEC__
|
|
70
|
+
|
|
71
|
+
#if defined(__s390x__) && defined(GGML_NNPA)
|
|
72
|
+
#ifndef __NNPA__
|
|
73
|
+
#define __NNPA__
|
|
74
|
+
#endif // __NNPA__
|
|
75
|
+
#endif // __s390x__ && GGML_NNPA
|
|
70
76
|
|
|
71
77
|
#if defined(__ARM_FEATURE_SVE)
|
|
72
78
|
#include <sys/prctl.h>
|
|
@@ -371,7 +377,7 @@ inline static int32x4_t ggml_vdotq_s32(int32x4_t acc, int8x16_t a, int8x16_t b)
|
|
|
371
377
|
#define vec_xor(a, b) ((a) ^ (b)) // Vector XOR
|
|
372
378
|
#endif
|
|
373
379
|
|
|
374
|
-
typedef signed
|
|
380
|
+
typedef signed char char8x16_t __attribute__((vector_size(16)));
|
|
375
381
|
typedef unsigned char uchar8x16_t __attribute__((vector_size(16)));
|
|
376
382
|
|
|
377
383
|
typedef int8_t int8x16_t __attribute__((vector_size(16)));
|
|
@@ -382,10 +388,10 @@ typedef uint8_t uint8x16_t __attribute__((vector_size(16)));
|
|
|
382
388
|
typedef uint16_t uint16x8_t __attribute__((vector_size(16)));
|
|
383
389
|
typedef uint32_t uint32x4_t __attribute__((vector_size(16)));
|
|
384
390
|
|
|
385
|
-
typedef float
|
|
386
|
-
typedef double double64x2_t
|
|
391
|
+
typedef float float32x4_t __attribute__((vector_size(16)));
|
|
392
|
+
typedef double double64x2_t __attribute__((vector_size(16)));
|
|
387
393
|
|
|
388
|
-
typedef signed
|
|
394
|
+
typedef signed long long long64x2_t __attribute__((vector_size(16)));
|
|
389
395
|
typedef unsigned long long ulong64x2_t __attribute__((vector_size(16)));
|
|
390
396
|
|
|
391
397
|
typedef struct ggml_uint8x16x2_t {
|
|
@@ -503,6 +509,9 @@ static __m256 __lasx_xvreplfr2vr_s(const float val) {
|
|
|
503
509
|
// TODO: move to ggml-threading
|
|
504
510
|
void ggml_barrier(struct ggml_threadpool * tp);
|
|
505
511
|
|
|
512
|
+
void ggml_threadpool_chunk_set(struct ggml_threadpool * tp, int value);
|
|
513
|
+
int ggml_threadpool_chunk_add(struct ggml_threadpool * tp, int value);
|
|
514
|
+
|
|
506
515
|
#ifdef __cplusplus
|
|
507
516
|
}
|
|
508
517
|
#endif
|
|
@@ -3,11 +3,11 @@
|
|
|
3
3
|
|
|
4
4
|
#include "ggml-backend-impl.h"
|
|
5
5
|
#include "ggml-backend.h"
|
|
6
|
-
#include "
|
|
6
|
+
#include "traits.h"
|
|
7
7
|
#include "ggml-cpu-impl.h"
|
|
8
8
|
#include "ggml-cpu.h"
|
|
9
9
|
#include "ggml-impl.h"
|
|
10
|
-
#include "
|
|
10
|
+
#include "quants.h"
|
|
11
11
|
#include "ggml-threading.h"
|
|
12
12
|
#include "unary-ops.h"
|
|
13
13
|
#include "binary-ops.h"
|
|
@@ -72,15 +72,13 @@
|
|
|
72
72
|
#define UNUSED GGML_UNUSED
|
|
73
73
|
#define SWAP(x, y, T) do { T SWAP = x; (x) = y; (y) = SWAP; } while (0)
|
|
74
74
|
|
|
75
|
+
// precomputed f32 table for f16 (256 KB) (simd-mappings.h)
|
|
76
|
+
float ggml_table_f32_f16[1 << 16];
|
|
77
|
+
|
|
75
78
|
#if defined(__ARM_ARCH)
|
|
76
79
|
struct ggml_arm_arch_features_type {
|
|
77
|
-
int has_neon;
|
|
78
|
-
int has_dotprod;
|
|
79
|
-
int has_i8mm;
|
|
80
|
-
int has_sve;
|
|
81
80
|
int sve_cnt;
|
|
82
|
-
|
|
83
|
-
} ggml_arm_arch_features = {-1, -1, -1, -1, 0, -1};
|
|
81
|
+
} ggml_arm_arch_features = { 0 };
|
|
84
82
|
#endif
|
|
85
83
|
|
|
86
84
|
|
|
@@ -197,6 +195,7 @@ typedef pthread_t ggml_thread_t;
|
|
|
197
195
|
|
|
198
196
|
static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
|
|
199
197
|
[GGML_TYPE_F32] = {
|
|
198
|
+
.from_float = (ggml_from_float_t) ggml_cpu_fp32_to_fp32,
|
|
200
199
|
.vec_dot = (ggml_vec_dot_t) ggml_vec_dot_f32,
|
|
201
200
|
.vec_dot_type = GGML_TYPE_F32,
|
|
202
201
|
.nrows = 1,
|
|
@@ -559,6 +558,14 @@ void ggml_barrier(struct ggml_threadpool * tp) {
|
|
|
559
558
|
#endif
|
|
560
559
|
}
|
|
561
560
|
|
|
561
|
+
void ggml_threadpool_chunk_set(struct ggml_threadpool * tp, int value) {
|
|
562
|
+
atomic_store_explicit(&tp->current_chunk, value, memory_order_relaxed);
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
int ggml_threadpool_chunk_add(struct ggml_threadpool * tp, int value) {
|
|
566
|
+
return atomic_fetch_add_explicit(&tp->current_chunk, value, memory_order_relaxed);
|
|
567
|
+
}
|
|
568
|
+
|
|
562
569
|
#if defined(__gnu_linux__)
|
|
563
570
|
static cpu_set_t ggml_get_numa_affinity(void) {
|
|
564
571
|
cpu_set_t cpuset;
|
|
@@ -670,87 +677,15 @@ bool ggml_is_numa(void) {
|
|
|
670
677
|
|
|
671
678
|
#if defined(__linux__) && defined(__aarch64__)
|
|
672
679
|
#include <sys/auxv.h>
|
|
673
|
-
#elif defined(__APPLE__)
|
|
674
|
-
#include <sys/sysctl.h>
|
|
675
|
-
#endif
|
|
676
|
-
|
|
677
|
-
#if !defined(HWCAP2_I8MM)
|
|
678
|
-
#define HWCAP2_I8MM (1 << 13)
|
|
679
|
-
#endif
|
|
680
|
-
|
|
681
|
-
#if !defined(HWCAP2_SME)
|
|
682
|
-
#define HWCAP2_SME (1 << 23)
|
|
683
680
|
#endif
|
|
684
681
|
|
|
685
682
|
static void ggml_init_arm_arch_features(void) {
|
|
686
|
-
#if defined(__linux__) && defined(__aarch64__)
|
|
687
|
-
uint32_t hwcap = getauxval(AT_HWCAP);
|
|
688
|
-
uint32_t hwcap2 = getauxval(AT_HWCAP2);
|
|
689
|
-
|
|
690
|
-
ggml_arm_arch_features.has_neon = !!(hwcap & HWCAP_ASIMD);
|
|
691
|
-
ggml_arm_arch_features.has_dotprod = !!(hwcap & HWCAP_ASIMDDP);
|
|
692
|
-
ggml_arm_arch_features.has_i8mm = !!(hwcap2 & HWCAP2_I8MM);
|
|
693
|
-
ggml_arm_arch_features.has_sve = !!(hwcap & HWCAP_SVE);
|
|
694
|
-
ggml_arm_arch_features.has_sme = !!(hwcap2 & HWCAP2_SME);
|
|
695
|
-
|
|
696
|
-
#if defined(__ARM_FEATURE_SVE)
|
|
683
|
+
#if defined(__linux__) && defined(__aarch64__) && defined(__ARM_FEATURE_SVE)
|
|
697
684
|
ggml_arm_arch_features.sve_cnt = PR_SVE_VL_LEN_MASK & prctl(PR_SVE_GET_VL);
|
|
698
685
|
#endif
|
|
699
|
-
#elif defined(__APPLE__)
|
|
700
|
-
int oldp = 0;
|
|
701
|
-
size_t size = sizeof(oldp);
|
|
702
|
-
if (sysctlbyname("hw.optional.AdvSIMD", &oldp, &size, NULL, 0) != 0) {
|
|
703
|
-
oldp = 0;
|
|
704
|
-
}
|
|
705
|
-
ggml_arm_arch_features.has_neon = oldp;
|
|
706
|
-
|
|
707
|
-
if (sysctlbyname("hw.optional.arm.FEAT_DotProd", &oldp, &size, NULL, 0) != 0) {
|
|
708
|
-
oldp = 0;
|
|
709
|
-
}
|
|
710
|
-
ggml_arm_arch_features.has_dotprod = oldp;
|
|
711
|
-
|
|
712
|
-
if (sysctlbyname("hw.optional.arm.FEAT_I8MM", &oldp, &size, NULL, 0) != 0) {
|
|
713
|
-
oldp = 0;
|
|
714
|
-
}
|
|
715
|
-
ggml_arm_arch_features.has_i8mm = oldp;
|
|
716
|
-
|
|
717
|
-
if (sysctlbyname("hw.optional.arm.FEAT_SME", &oldp, &size, NULL, 0) != 0) {
|
|
718
|
-
oldp = 0;
|
|
719
|
-
}
|
|
720
|
-
ggml_arm_arch_features.has_sme = oldp;
|
|
721
|
-
|
|
722
|
-
ggml_arm_arch_features.has_sve = 0;
|
|
723
|
-
ggml_arm_arch_features.sve_cnt = 0;
|
|
724
|
-
#else
|
|
725
|
-
// Run-time CPU feature detection not implemented for this platform, fallback to compile time
|
|
726
|
-
#if defined(__ARM_NEON)
|
|
727
|
-
ggml_arm_arch_features.has_neon = 1;
|
|
728
|
-
#else
|
|
729
|
-
ggml_arm_arch_features.has_neon = 0;
|
|
730
|
-
#endif
|
|
731
|
-
|
|
732
|
-
#if defined(__ARM_FEATURE_MATMUL_INT8)
|
|
733
|
-
ggml_arm_arch_features.has_i8mm = 1;
|
|
734
|
-
#else
|
|
735
|
-
ggml_arm_arch_features.has_i8mm = 0;
|
|
736
|
-
#endif
|
|
737
|
-
|
|
738
|
-
#if defined(__ARM_FEATURE_SVE)
|
|
739
|
-
ggml_arm_arch_features.has_sve = 1;
|
|
740
|
-
ggml_arm_arch_features.sve_cnt = 16;
|
|
741
|
-
#else
|
|
742
|
-
ggml_arm_arch_features.has_sve = 0;
|
|
743
|
-
ggml_arm_arch_features.sve_cnt = 0;
|
|
744
|
-
#endif
|
|
745
|
-
|
|
746
|
-
#if defined(__ARM_FEATURE_SME) || defined(__ARM_FEATURE_SME2)
|
|
747
|
-
ggml_arm_arch_features.has_sme = 1;
|
|
748
|
-
#else
|
|
749
|
-
ggml_arm_arch_features.has_sme = 0;
|
|
750
|
-
#endif
|
|
751
|
-
#endif
|
|
752
686
|
}
|
|
753
|
-
|
|
687
|
+
|
|
688
|
+
#endif // __ARM_ARCH
|
|
754
689
|
|
|
755
690
|
struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value) {
|
|
756
691
|
GGML_ASSERT(!ggml_get_no_alloc(ctx));
|
|
@@ -805,7 +740,7 @@ struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value) {
|
|
|
805
740
|
{
|
|
806
741
|
assert(tensor->nb[0] == sizeof(ggml_fp16_t));
|
|
807
742
|
for (int i = 0; i < n; i++) {
|
|
808
|
-
ggml_vec_set_f16(nc, (ggml_fp16_t *)(data + i*n1),
|
|
743
|
+
ggml_vec_set_f16(nc, (ggml_fp16_t *)(data + i*n1), GGML_CPU_FP32_TO_FP16(value));
|
|
809
744
|
}
|
|
810
745
|
} break;
|
|
811
746
|
case GGML_TYPE_BF16:
|
|
@@ -864,7 +799,7 @@ struct ggml_tensor * ggml_set_f32(struct ggml_tensor * tensor, float value) {
|
|
|
864
799
|
{
|
|
865
800
|
assert(tensor->nb[0] == sizeof(ggml_fp16_t));
|
|
866
801
|
for (int i = 0; i < n; i++) {
|
|
867
|
-
ggml_vec_set_f16(nc, (ggml_fp16_t *)(data + i*n1),
|
|
802
|
+
ggml_vec_set_f16(nc, (ggml_fp16_t *)(data + i*n1), GGML_CPU_FP32_TO_FP16(value));
|
|
868
803
|
}
|
|
869
804
|
} break;
|
|
870
805
|
case GGML_TYPE_BF16:
|
|
@@ -915,7 +850,7 @@ int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i) {
|
|
|
915
850
|
case GGML_TYPE_F16:
|
|
916
851
|
{
|
|
917
852
|
GGML_ASSERT(tensor->nb[0] == sizeof(ggml_fp16_t));
|
|
918
|
-
return
|
|
853
|
+
return GGML_CPU_FP16_TO_FP32(((ggml_fp16_t *)(tensor->data))[i]);
|
|
919
854
|
}
|
|
920
855
|
case GGML_TYPE_BF16:
|
|
921
856
|
{
|
|
@@ -960,7 +895,7 @@ void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value) {
|
|
|
960
895
|
case GGML_TYPE_F16:
|
|
961
896
|
{
|
|
962
897
|
GGML_ASSERT(tensor->nb[0] == sizeof(ggml_fp16_t));
|
|
963
|
-
((ggml_fp16_t *)(tensor->data))[i] =
|
|
898
|
+
((ggml_fp16_t *)(tensor->data))[i] = GGML_CPU_FP32_TO_FP16(value);
|
|
964
899
|
} break;
|
|
965
900
|
case GGML_TYPE_BF16:
|
|
966
901
|
{
|
|
@@ -989,7 +924,7 @@ int32_t ggml_get_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i
|
|
|
989
924
|
case GGML_TYPE_I32:
|
|
990
925
|
return ((int32_t *) data)[0];
|
|
991
926
|
case GGML_TYPE_F16:
|
|
992
|
-
return
|
|
927
|
+
return GGML_CPU_FP16_TO_FP32(((ggml_fp16_t *) data)[0]);
|
|
993
928
|
case GGML_TYPE_BF16:
|
|
994
929
|
return GGML_BF16_TO_FP32(((ggml_bf16_t *) data)[0]);
|
|
995
930
|
case GGML_TYPE_F32:
|
|
@@ -1016,7 +951,7 @@ void ggml_set_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2,
|
|
|
1016
951
|
} break;
|
|
1017
952
|
case GGML_TYPE_F16:
|
|
1018
953
|
{
|
|
1019
|
-
((ggml_fp16_t *)(data))[0] =
|
|
954
|
+
((ggml_fp16_t *)(data))[0] = GGML_CPU_FP32_TO_FP16(value);
|
|
1020
955
|
} break;
|
|
1021
956
|
case GGML_TYPE_BF16:
|
|
1022
957
|
{
|
|
@@ -1054,7 +989,7 @@ float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i) {
|
|
|
1054
989
|
}
|
|
1055
990
|
case GGML_TYPE_F16:
|
|
1056
991
|
{
|
|
1057
|
-
return
|
|
992
|
+
return GGML_CPU_FP16_TO_FP32(((ggml_fp16_t *)(tensor->data))[i]);
|
|
1058
993
|
}
|
|
1059
994
|
case GGML_TYPE_BF16:
|
|
1060
995
|
{
|
|
@@ -1093,7 +1028,7 @@ void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value) {
|
|
|
1093
1028
|
} break;
|
|
1094
1029
|
case GGML_TYPE_F16:
|
|
1095
1030
|
{
|
|
1096
|
-
((ggml_fp16_t *)(tensor->data))[i] =
|
|
1031
|
+
((ggml_fp16_t *)(tensor->data))[i] = GGML_CPU_FP32_TO_FP16(value);
|
|
1097
1032
|
} break;
|
|
1098
1033
|
case GGML_TYPE_BF16:
|
|
1099
1034
|
{
|
|
@@ -1120,7 +1055,7 @@ float ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2,
|
|
|
1120
1055
|
case GGML_TYPE_I32:
|
|
1121
1056
|
return ((int32_t *) data)[0];
|
|
1122
1057
|
case GGML_TYPE_F16:
|
|
1123
|
-
return
|
|
1058
|
+
return GGML_CPU_FP16_TO_FP32(((ggml_fp16_t *) data)[0]);
|
|
1124
1059
|
case GGML_TYPE_BF16:
|
|
1125
1060
|
return GGML_BF16_TO_FP32(((ggml_bf16_t *) data)[0]);
|
|
1126
1061
|
case GGML_TYPE_F32:
|
|
@@ -1147,7 +1082,7 @@ void ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2,
|
|
|
1147
1082
|
} break;
|
|
1148
1083
|
case GGML_TYPE_F16:
|
|
1149
1084
|
{
|
|
1150
|
-
((ggml_fp16_t *)(data))[0] =
|
|
1085
|
+
((ggml_fp16_t *)(data))[0] = GGML_CPU_FP32_TO_FP16(value);
|
|
1151
1086
|
} break;
|
|
1152
1087
|
case GGML_TYPE_BF16:
|
|
1153
1088
|
{
|
|
@@ -1883,6 +1818,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
|
1883
1818
|
{
|
|
1884
1819
|
ggml_compute_forward_get_rows_back(params, tensor);
|
|
1885
1820
|
} break;
|
|
1821
|
+
case GGML_OP_SET_ROWS:
|
|
1822
|
+
{
|
|
1823
|
+
ggml_compute_forward_set_rows(params, tensor);
|
|
1824
|
+
} break;
|
|
1886
1825
|
case GGML_OP_DIAG:
|
|
1887
1826
|
{
|
|
1888
1827
|
ggml_compute_forward_diag(params, tensor);
|
|
@@ -1959,6 +1898,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
|
1959
1898
|
{
|
|
1960
1899
|
ggml_compute_forward_pad_reflect_1d(params, tensor);
|
|
1961
1900
|
} break;
|
|
1901
|
+
case GGML_OP_ROLL:
|
|
1902
|
+
{
|
|
1903
|
+
ggml_compute_forward_roll(params, tensor);
|
|
1904
|
+
} break;
|
|
1962
1905
|
case GGML_OP_ARANGE:
|
|
1963
1906
|
{
|
|
1964
1907
|
ggml_compute_forward_arange(params, tensor);
|
|
@@ -2232,6 +2175,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|
|
2232
2175
|
n_tasks = n_threads;
|
|
2233
2176
|
} break;
|
|
2234
2177
|
case GGML_OP_GET_ROWS:
|
|
2178
|
+
case GGML_OP_SET_ROWS:
|
|
2235
2179
|
{
|
|
2236
2180
|
// FIXME: get_rows can use additional threads, but the cost of launching additional threads
|
|
2237
2181
|
// decreases performance with GPU offloading
|
|
@@ -2283,6 +2227,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|
|
2283
2227
|
case GGML_OP_UPSCALE:
|
|
2284
2228
|
case GGML_OP_PAD:
|
|
2285
2229
|
case GGML_OP_PAD_REFLECT_1D:
|
|
2230
|
+
case GGML_OP_ROLL:
|
|
2286
2231
|
case GGML_OP_ARANGE:
|
|
2287
2232
|
case GGML_OP_TIMESTEP_EMBEDDING:
|
|
2288
2233
|
case GGML_OP_ARGSORT:
|
|
@@ -3185,6 +3130,10 @@ enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct g
|
|
|
3185
3130
|
return ggml_graph_compute(cgraph, &cplan);
|
|
3186
3131
|
}
|
|
3187
3132
|
|
|
3133
|
+
void ggml_cpu_fp32_to_fp32(const float * x, float * y, int64_t n) {
|
|
3134
|
+
memcpy(y, x, n * sizeof(float));
|
|
3135
|
+
}
|
|
3136
|
+
|
|
3188
3137
|
void ggml_cpu_fp32_to_fp16(const float * x, ggml_fp16_t * y, int64_t n) {
|
|
3189
3138
|
int64_t i = 0;
|
|
3190
3139
|
#if defined(__F16C__)
|
|
@@ -3205,9 +3154,24 @@ void ggml_cpu_fp32_to_fp16(const float * x, ggml_fp16_t * y, int64_t n) {
|
|
|
3205
3154
|
__m128i y_vec = _mm_cvtps_ph(x_vec, _MM_FROUND_TO_NEAREST_INT);
|
|
3206
3155
|
_mm_storel_epi64((__m128i *)(y + i), y_vec);
|
|
3207
3156
|
}
|
|
3157
|
+
#elif defined(__NNPA__)
|
|
3158
|
+
for (; i + 7 < n; i += 8) {
|
|
3159
|
+
float32x4_t v_xh = vec_xl(0, (const float *)(x + i + 0));
|
|
3160
|
+
float32x4_t v_xl = vec_xl(0, (const float *)(x + i + 4));
|
|
3161
|
+
uint16x8_t v_yd = vec_round_from_fp32(v_xh, v_xl, 0);
|
|
3162
|
+
uint16x8_t v_y = vec_convert_to_fp16(v_yd, 0);
|
|
3163
|
+
vec_xst(v_y, 0, (ggml_fp16_t *)(y + i));
|
|
3164
|
+
}
|
|
3165
|
+
for (; i + 3 < n; i += 4) {
|
|
3166
|
+
float32x4_t v_x = vec_xl(0, (const float *)(x + i));
|
|
3167
|
+
float32x4_t v_zero = vec_splats(0.0f);
|
|
3168
|
+
uint16x8_t v_yd = vec_round_from_fp32(v_x, v_zero, 0);
|
|
3169
|
+
uint16x8_t v_y = vec_convert_to_fp16(v_yd, 0);
|
|
3170
|
+
vec_xst(v_y, 0, (ggml_fp16_t *)(y + i));
|
|
3171
|
+
}
|
|
3208
3172
|
#endif
|
|
3209
3173
|
for (; i < n; ++i) {
|
|
3210
|
-
y[i] =
|
|
3174
|
+
y[i] = GGML_CPU_FP32_TO_FP16(x[i]);
|
|
3211
3175
|
}
|
|
3212
3176
|
}
|
|
3213
3177
|
|
|
@@ -3231,9 +3195,25 @@ void ggml_cpu_fp16_to_fp32(const ggml_fp16_t * x, float * y, int64_t n) {
|
|
|
3231
3195
|
__m128 y_vec = _mm_cvtph_ps(x_vec);
|
|
3232
3196
|
_mm_storeu_ps(y + i, y_vec);
|
|
3233
3197
|
}
|
|
3198
|
+
#elif defined(__NNPA__)
|
|
3199
|
+
for (; i + 7 < n; i += 8) {
|
|
3200
|
+
uint16x8_t v_x = vec_xl(0, (const ggml_fp16_t *)(x + i));
|
|
3201
|
+
uint16x8_t v_yd = vec_convert_from_fp16(v_x, 0);
|
|
3202
|
+
float32x4_t v_yh = vec_extend_to_fp32_hi(v_yd, 0);
|
|
3203
|
+
float32x4_t v_yl = vec_extend_to_fp32_lo(v_yd, 0);
|
|
3204
|
+
vec_xst(v_yh, 0, (float *)(y + i + 0));
|
|
3205
|
+
vec_xst(v_yl, 0, (float *)(y + i + 4));
|
|
3206
|
+
}
|
|
3207
|
+
for (; i + 3 < n; i += 4) {
|
|
3208
|
+
uint16x8_t v_x = vec_xl(0, (const ggml_fp16_t *)(x + i));
|
|
3209
|
+
uint16x8_t v_yd = vec_convert_from_fp16(v_x, 0);
|
|
3210
|
+
float32x4_t v_yh = vec_extend_to_fp32_hi(v_yd, 0);
|
|
3211
|
+
vec_xst(v_yh, 0, (float *)(y + i));
|
|
3212
|
+
}
|
|
3234
3213
|
#endif
|
|
3214
|
+
|
|
3235
3215
|
for (; i < n; ++i) {
|
|
3236
|
-
y[i] =
|
|
3216
|
+
y[i] = GGML_CPU_FP16_TO_FP32(x[i]);
|
|
3237
3217
|
}
|
|
3238
3218
|
}
|
|
3239
3219
|
|
|
@@ -3433,9 +3413,17 @@ int ggml_cpu_has_vxe(void) {
|
|
|
3433
3413
|
#endif
|
|
3434
3414
|
}
|
|
3435
3415
|
|
|
3416
|
+
int ggml_cpu_has_nnpa(void) {
|
|
3417
|
+
#if defined(GGML_NNPA)
|
|
3418
|
+
return 1;
|
|
3419
|
+
#else
|
|
3420
|
+
return 0;
|
|
3421
|
+
#endif
|
|
3422
|
+
}
|
|
3423
|
+
|
|
3436
3424
|
int ggml_cpu_has_neon(void) {
|
|
3437
3425
|
#if defined(__ARM_ARCH) && defined(__ARM_NEON)
|
|
3438
|
-
return
|
|
3426
|
+
return 1;
|
|
3439
3427
|
#else
|
|
3440
3428
|
return 0;
|
|
3441
3429
|
#endif
|
|
@@ -3443,7 +3431,7 @@ int ggml_cpu_has_neon(void) {
|
|
|
3443
3431
|
|
|
3444
3432
|
int ggml_cpu_has_dotprod(void) {
|
|
3445
3433
|
#if defined(__ARM_ARCH) && defined(__ARM_FEATURE_DOTPROD)
|
|
3446
|
-
return
|
|
3434
|
+
return 1;
|
|
3447
3435
|
#else
|
|
3448
3436
|
return 0;
|
|
3449
3437
|
#endif
|
|
@@ -3451,7 +3439,7 @@ int ggml_cpu_has_dotprod(void) {
|
|
|
3451
3439
|
|
|
3452
3440
|
int ggml_cpu_has_sve(void) {
|
|
3453
3441
|
#if defined(__ARM_ARCH) && defined(__ARM_FEATURE_SVE)
|
|
3454
|
-
return
|
|
3442
|
+
return 1;
|
|
3455
3443
|
#else
|
|
3456
3444
|
return 0;
|
|
3457
3445
|
#endif
|
|
@@ -3459,7 +3447,7 @@ int ggml_cpu_has_sve(void) {
|
|
|
3459
3447
|
|
|
3460
3448
|
int ggml_cpu_has_matmul_int8(void) {
|
|
3461
3449
|
#if defined(__ARM_ARCH) && defined(__ARM_FEATURE_MATMUL_INT8)
|
|
3462
|
-
return
|
|
3450
|
+
return 1;
|
|
3463
3451
|
#else
|
|
3464
3452
|
return 0;
|
|
3465
3453
|
#endif
|
|
@@ -3475,14 +3463,14 @@ int ggml_cpu_get_sve_cnt(void) {
|
|
|
3475
3463
|
|
|
3476
3464
|
int ggml_cpu_has_sme(void) {
|
|
3477
3465
|
#if defined(__ARM_ARCH) && defined(__ARM_FEATURE_SME)
|
|
3478
|
-
return
|
|
3466
|
+
return 1;
|
|
3479
3467
|
#else
|
|
3480
3468
|
return 0;
|
|
3481
3469
|
#endif
|
|
3482
3470
|
}
|
|
3483
3471
|
|
|
3484
3472
|
void ggml_cpu_init(void) {
|
|
3485
|
-
// needed to initialize
|
|
3473
|
+
// needed to initialize ggml_time
|
|
3486
3474
|
{
|
|
3487
3475
|
struct ggml_init_params params = { 0, NULL, false };
|
|
3488
3476
|
struct ggml_context * ctx = ggml_init(params);
|
|
@@ -3503,9 +3491,10 @@ void ggml_cpu_init(void) {
|
|
|
3503
3491
|
uint16_t u16;
|
|
3504
3492
|
ggml_fp16_t fp16;
|
|
3505
3493
|
} u = {i};
|
|
3506
|
-
float f =
|
|
3507
|
-
|
|
3508
|
-
|
|
3494
|
+
float f = GGML_COMPUTE_FP16_TO_FP32(u.fp16);
|
|
3495
|
+
ggml_table_f32_f16[i] = f;
|
|
3496
|
+
ggml_table_gelu_f16[i] = GGML_CPU_FP32_TO_FP16(ggml_gelu_f32(f));
|
|
3497
|
+
ggml_table_gelu_quick_f16[i] = GGML_CPU_FP32_TO_FP16(ggml_gelu_quick_f32(f));
|
|
3509
3498
|
}
|
|
3510
3499
|
|
|
3511
3500
|
const uint64_t t_end = ggml_time_us(); UNUSED(t_end);
|